315 lines
11 KiB
Python
Executable File
315 lines
11 KiB
Python
Executable File
#!/usr/bin/env python3.5
|
|
|
|
# You are free to use and/or change this code for
|
|
# your own needs.
|
|
|
|
# Original code (c)2018 Jan Lerking
|
|
# Program to test various scenarios of
|
|
# single thread, multi thread, pool and process
|
|
|
|
import os
|
|
import sys
|
|
import multiprocessing as mp
|
|
import time
|
|
import io
|
|
import cpuinfo
|
|
import queue
|
|
import threading
|
|
import time
|
|
import random
|
|
import string
|
|
import shutil
|
|
from tkinter import Tk, ttk, Label, Button, LabelFrame
|
|
from tkinter import filedialog, Entry, Checkbutton
|
|
from tkinter import Grid, StringVar, DoubleVar
|
|
from tkinter import N, E, S, W
|
|
from tkinter import DISABLED, NORMAL
|
|
from os import errno
|
|
import os
|
|
from os.path import expanduser
|
|
import platform
|
|
class file_progress:
|
|
def __init__(self, master):
|
|
self.sourcedir = StringVar()
|
|
self.destdir = StringVar()
|
|
self.addinc = StringVar()
|
|
self.cfilevar = DoubleVar()
|
|
self.totalvar = 0
|
|
self.filecnt = 0
|
|
self.infofolder = 'Number of folders: 0'
|
|
self.infofile = 'Number of headers: 0'
|
|
self.currentfile = 'Current file: '
|
|
self.totprogress = 'Total progress: '
|
|
self.sourcedir.set('Select source directory!')
|
|
self.destdir.set('Select destination directory!')
|
|
|
|
self.master = master
|
|
self.master.title('File progress')
|
|
self.master.grid_columnconfigure(1, weight=1)
|
|
|
|
self.frame = LabelFrame(master, text='Progress')
|
|
self.frame.grid(row=0, column=0, columnspan=3, sticky=N+S+E+W, padx=5, pady=5)
|
|
self.frame.grid_columnconfigure(1, weight=1)
|
|
|
|
self.totallabel = Label(self.frame, text=self.totprogress)
|
|
self.totallabel.grid(row=8, column=0, sticky=W, padx=5, pady=5)
|
|
self.totallabel.config(state=DISABLED)
|
|
|
|
self.totalprogress = ttk.Progressbar(self.frame, orient='horizontal', mode='determinate')
|
|
self.totalprogress.grid(row=9, column=0, columnspan=3, sticky=N+S+E+W, padx=5, pady=5)
|
|
self.totalprogress.config(variable=self.totalvar, maximum=self.filecnt)
|
|
|
|
def select_sourcedir(self, sourcedir):
|
|
root.directory = os.path.abspath(filedialog.askdirectory())
|
|
if root.directory:
|
|
sourcedir.set(root.directory)
|
|
filecnt = sourcedir_filecnt(root.directory)
|
|
if filecnt > 0:
|
|
tempstr = 'Number of headers: '+str(filecnt)
|
|
temptot = 'Total progress: 0 of '+str(filecnt)
|
|
print ('Source directory: ', sourcedir.get())
|
|
self.destlabel.config(state=NORMAL)
|
|
self.destentry.config(state=NORMAL)
|
|
self.destdir_button.config(state=NORMAL)
|
|
self.infofiles.config(text=tempstr)
|
|
self.totallabel.config(text=temptot)
|
|
self.filecnt = filecnt
|
|
foldercnt = sourcedir_foldercnt(root.directory)
|
|
if foldercnt > 0:
|
|
tempstr = 'Number of folders: '+str(foldercnt)
|
|
self.infofolders.config(text=tempstr)
|
|
|
|
def select_destdir(self, destdir):
|
|
root.directory = filedialog.askdirectory()
|
|
if root.directory:
|
|
destdir.set(root.directory)
|
|
print ('Destination directory: ', destdir.get())
|
|
self.incchkbox.config(state=NORMAL)
|
|
self.infofolders.config(state=NORMAL)
|
|
self.infofiles.config(state=NORMAL)
|
|
self.translate_button.config(state=NORMAL)
|
|
self.cfilelabel.config(state=NORMAL)
|
|
self.totallabel.config(state=NORMAL)
|
|
|
|
def translate(self, destdir, sourcedir, addinc):
|
|
doinc = addinc.get()
|
|
dest = destdir.get()
|
|
source = sourcedir.get()
|
|
if doinc == 'yes':
|
|
dest = dest+'/include'
|
|
print(os.path.exists(os.path.dirname(dest)))
|
|
if not os.path.exists(os.path.dirname(dest)):
|
|
try:
|
|
os.makedirs(os.path.dirname(dest))
|
|
except OSError as exc: # Guard against race condition
|
|
if exc.errno != errno.EEXIST:
|
|
raise
|
|
destdir.set(dest)
|
|
print ('Destination directory: ', destdir.get())
|
|
process_files(source, dest)
|
|
|
|
def cfileprogress_update(cnt):
|
|
self.cfilevar = cnt
|
|
|
|
def currentfile_update(current):
|
|
self.currentfile = 'Current file: '+current
|
|
|
|
root = Tk()
|
|
root.update()
|
|
#root.minsize(350, 210)
|
|
#width = (root.winfo_screenwidth()/2)-(350/2)
|
|
#height = (root.winfo_screenheight()/2)-(210/2)
|
|
#root.geometry('+%d+%d' % (width, height))
|
|
root.resizable(False, False)
|
|
f_progress = file_progress(root)
|
|
root.mainloop()
|
|
|
|
cpu_info = cpuinfo.get_cpu_info()
|
|
num_cores = mp.cpu_count()
|
|
print(num_cores)
|
|
|
|
filelist = []
|
|
exitFlag = 0
|
|
queueLock = threading.Lock()
|
|
workQueue = queue.Queue(0)
|
|
threads = []
|
|
class myThread (threading.Thread):
|
|
def __init__(self, threadID, name, q):
|
|
threading.Thread.__init__(self)
|
|
self.threadID = threadID
|
|
self.name = name
|
|
self.q = q
|
|
def run(self):
|
|
print ("Starting " + self.name)
|
|
process_data(self.name, self.q)
|
|
print ("Exiting " + self.name)
|
|
|
|
def process_data(threadName, q):
|
|
while not exitFlag:
|
|
queueLock.acquire()
|
|
if not workQueue.empty():
|
|
data = q.get()
|
|
process_file(data)
|
|
queueLock.release()
|
|
print ("%s processing %s" % (threadName, data))
|
|
else:
|
|
queueLock.release()
|
|
time.sleep(1)
|
|
|
|
def sourcedir_filecnt(sourcedir):
|
|
### Return the number of files, ending with '.h', in sourcedir - including subdirectories ###
|
|
cnt = 0
|
|
global filelist
|
|
for folderName, subfolders, files in os.walk(sourcedir):
|
|
for file in files:
|
|
if file.lower().endswith('.h'):
|
|
cnt += 1
|
|
filelist += [folderName+'/'+file]
|
|
return cnt
|
|
|
|
def process_files(sourcedir, destdir):
|
|
global filelist
|
|
for f in filelist:
|
|
process_file(f)
|
|
|
|
def process_file(data):
|
|
outfile = ''
|
|
inputfile = data
|
|
encodings = ['utf-8', 'latin-1', 'windows-1250', 'windows-1252', 'ascii',
|
|
'big5', 'big5hkscs', 'cp037', 'cp273', 'cp424', 'cp437', 'cp500',
|
|
'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp856',
|
|
'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 'cp863', 'cp864', 'cp865',
|
|
'cp866', 'cp869', 'cp874', 'cp875', 'cp932', 'cp949', 'cp950', 'cp1006',
|
|
'cp1026', 'cp1125', 'cp1140', 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254',
|
|
'cp1255', 'cp1256', 'cp1257', 'cp1258', 'cp65001', 'euc-jp', 'euc-jis-2004',
|
|
'euc-jisx0213', 'euc-kr', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022-jp',
|
|
'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', 'iso2022-jp-3',
|
|
'iso2022-jp-ext', 'iso2022-kr', 'iso8859-2', 'iso8859-3', 'iso8859-4',
|
|
'iso8859-5', 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
|
|
'iso8859-11', 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 'johab',
|
|
'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 'mac-cyrillic', 'mac-greek',
|
|
'mac-iceland', 'mac-latin2', 'mac-roman', 'mac-turkish', 'ptcp154',
|
|
'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 'utf-32', 'utf-32-be',
|
|
'utf-32-le', 'utf-16', 'utf-16-be', 'utf-16-le', 'utf-7', 'utf-8-sig']
|
|
for e in encodings:
|
|
try:
|
|
fh = io.open(data, 'r', encoding=e)
|
|
fh.readlines()
|
|
fh.seek(0)
|
|
except UnicodeDecodeError:
|
|
print('got unicode error with %s , trying different encoding' % e)
|
|
else:
|
|
#print('opening the file with encoding: %s ' % e)
|
|
break
|
|
#print(os.path.basename(data))
|
|
for lines in fh:
|
|
outfile = outfile+lines
|
|
fh.close()
|
|
outputfile = os.path.splitext(inputfile)[0]+'.inc'
|
|
outputfile = str(outputfile).replace(sourcedir, destdir)
|
|
#print(outputfile)
|
|
if not os.path.exists(os.path.dirname(outputfile)):
|
|
try:
|
|
os.makedirs(os.path.dirname(outputfile))
|
|
except OSError as exc: # Guard against race condition
|
|
if exc.errno != errno.EEXIST:
|
|
raise
|
|
newfile = open(outputfile, "w")
|
|
newfile.write(outfile)
|
|
newfile.close()
|
|
|
|
def single_thread():
|
|
t1 = time.time()
|
|
cnt = sourcedir_filecnt(sourcedir)
|
|
print('Files in: '+str(cnt))
|
|
process_files(sourcedir, destdir)
|
|
print('Single thread process time: '+str(time.time()-t1))
|
|
|
|
def multi_thread():
|
|
global filelist
|
|
global exitFlag
|
|
|
|
t1 = time.time()
|
|
cnt = sourcedir_filecnt(sourcedir)
|
|
threadList = ["thread 1", "thread 2", "thread 3", "thread 4", "thread 5"]
|
|
nameList = filelist
|
|
threadID = 0
|
|
|
|
# Create new threads
|
|
for tName in threadList:
|
|
thread = myThread(threadID, tName, workQueue)
|
|
thread.start()
|
|
threads.append(thread)
|
|
threadID += 1
|
|
|
|
# Fill the queue
|
|
queueLock.acquire()
|
|
for word in nameList:
|
|
workQueue.put(word)
|
|
queueLock.release()
|
|
|
|
# Wait for queue to empty
|
|
while not workQueue.empty():
|
|
pass
|
|
|
|
# Notify threads it's time to exit
|
|
exitFlag = 1
|
|
|
|
# Wait for all threads to complete
|
|
for t in threads:
|
|
t.join()
|
|
print ("Exiting Main Thread")
|
|
print('Files in: '+str(cnt))
|
|
print('Multi thread process time: '+str(time.time()-t1))
|
|
|
|
def async_process(num):
|
|
t1 = time.time()
|
|
cnt = sourcedir_filecnt(sourcedir)
|
|
|
|
pool = mp.Pool(processes=num)
|
|
#[pool.apply_async(process_file, args=(f,)) for f in filelist]
|
|
pool.map(process_file, filelist)
|
|
#output = [p.get() for p in results]
|
|
#print(output)
|
|
|
|
# Setup a list of processes that we want to run
|
|
#processes = [mp.Process(target=rand_string, args=(5, output)) for x in range(8)]
|
|
#processes = [mp.Process(target=process_files, args=(sourcedir, destdir)) for x in range(num)]
|
|
|
|
# Run processes
|
|
#for p in processes:
|
|
#p.start()
|
|
|
|
# Exit the completed processes
|
|
#for p in processes:
|
|
#p.join()
|
|
|
|
# Get process results from the output queue
|
|
#results = [output.get() for p in processes]
|
|
|
|
#print(results)
|
|
print('Files in: '+str(cnt))
|
|
print('Multi ('+str(num)+') processes time: '+str(time.time()-t1))
|
|
|
|
def cleanup(dest):
|
|
shutil.rmtree(dest)
|
|
|
|
#sourcedir = 'C:/Users/dksojlg/Documents/gtk+-3.22.26'
|
|
sourcedir = '/usr/include'
|
|
#destdir = 'C:/Users/dksojlg/Documents/include'
|
|
destdir = '/data_2/include'
|
|
|
|
#print(cpu_info)
|
|
#single_thread() #2543 files - Single thread process time: 1.3732633590698242 sec.
|
|
#os.rmdir(destdir)
|
|
#multi_thread() #2543 files - Syncronized 5 threads process time: 80.25179100036621 sec.
|
|
#os.rmdir(destdir)
|
|
async_process(16) #2543 files - 2 processes, process time: 1.2379400730133057 sec.
|
|
#cleanup(destdir)
|
|
#async_process(4) #2543 files - 2 processes, process time: 2.6622860431671143 sec.
|
|
#cleanup(destdir)
|
|
#async_process(6) #2543 files - 2 processes, process time: 2.6622860431671143 sec.
|
|
#cleanup(destdir)
|
|
#async_process(8) #2543 files - 2 processes, process time: 2.6622860431671143 sec.
|
|
#cleanup(destdir)
|