Files
h2inc-old/multiprocessing_test.py

315 lines
11 KiB
Python
Executable File

#!/usr/bin/env python3.5
# You are free to use and/or change this code for
# your own needs.
# Original code (c)2018 Jan Lerking
# Program to test various scenarios of
# single thread, multi thread, pool and process
import os
import sys
import multiprocessing as mp
import time
import io
import cpuinfo
import queue
import threading
import time
import random
import string
import shutil
from tkinter import Tk, ttk, Label, Button, LabelFrame
from tkinter import filedialog, Entry, Checkbutton
from tkinter import Grid, StringVar, DoubleVar
from tkinter import N, E, S, W
from tkinter import DISABLED, NORMAL
from os import errno
import os
from os.path import expanduser
import platform
class file_progress:
def __init__(self, master):
self.sourcedir = StringVar()
self.destdir = StringVar()
self.addinc = StringVar()
self.cfilevar = DoubleVar()
self.totalvar = 0
self.filecnt = 0
self.infofolder = 'Number of folders: 0'
self.infofile = 'Number of headers: 0'
self.currentfile = 'Current file: '
self.totprogress = 'Total progress: '
self.sourcedir.set('Select source directory!')
self.destdir.set('Select destination directory!')
self.master = master
self.master.title('File progress')
self.master.grid_columnconfigure(1, weight=1)
self.frame = LabelFrame(master, text='Progress')
self.frame.grid(row=0, column=0, columnspan=3, sticky=N+S+E+W, padx=5, pady=5)
self.frame.grid_columnconfigure(1, weight=1)
self.totallabel = Label(self.frame, text=self.totprogress)
self.totallabel.grid(row=8, column=0, sticky=W, padx=5, pady=5)
self.totallabel.config(state=DISABLED)
self.totalprogress = ttk.Progressbar(self.frame, orient='horizontal', mode='determinate')
self.totalprogress.grid(row=9, column=0, columnspan=3, sticky=N+S+E+W, padx=5, pady=5)
self.totalprogress.config(variable=self.totalvar, maximum=self.filecnt)
def select_sourcedir(self, sourcedir):
root.directory = os.path.abspath(filedialog.askdirectory())
if root.directory:
sourcedir.set(root.directory)
filecnt = sourcedir_filecnt(root.directory)
if filecnt > 0:
tempstr = 'Number of headers: '+str(filecnt)
temptot = 'Total progress: 0 of '+str(filecnt)
print ('Source directory: ', sourcedir.get())
self.destlabel.config(state=NORMAL)
self.destentry.config(state=NORMAL)
self.destdir_button.config(state=NORMAL)
self.infofiles.config(text=tempstr)
self.totallabel.config(text=temptot)
self.filecnt = filecnt
foldercnt = sourcedir_foldercnt(root.directory)
if foldercnt > 0:
tempstr = 'Number of folders: '+str(foldercnt)
self.infofolders.config(text=tempstr)
def select_destdir(self, destdir):
root.directory = filedialog.askdirectory()
if root.directory:
destdir.set(root.directory)
print ('Destination directory: ', destdir.get())
self.incchkbox.config(state=NORMAL)
self.infofolders.config(state=NORMAL)
self.infofiles.config(state=NORMAL)
self.translate_button.config(state=NORMAL)
self.cfilelabel.config(state=NORMAL)
self.totallabel.config(state=NORMAL)
def translate(self, destdir, sourcedir, addinc):
doinc = addinc.get()
dest = destdir.get()
source = sourcedir.get()
if doinc == 'yes':
dest = dest+'/include'
print(os.path.exists(os.path.dirname(dest)))
if not os.path.exists(os.path.dirname(dest)):
try:
os.makedirs(os.path.dirname(dest))
except OSError as exc: # Guard against race condition
if exc.errno != errno.EEXIST:
raise
destdir.set(dest)
print ('Destination directory: ', destdir.get())
process_files(source, dest)
def cfileprogress_update(cnt):
self.cfilevar = cnt
def currentfile_update(current):
self.currentfile = 'Current file: '+current
root = Tk()
root.update()
#root.minsize(350, 210)
#width = (root.winfo_screenwidth()/2)-(350/2)
#height = (root.winfo_screenheight()/2)-(210/2)
#root.geometry('+%d+%d' % (width, height))
root.resizable(False, False)
f_progress = file_progress(root)
root.mainloop()
cpu_info = cpuinfo.get_cpu_info()
num_cores = mp.cpu_count()
print(num_cores)
filelist = []
exitFlag = 0
queueLock = threading.Lock()
workQueue = queue.Queue(0)
threads = []
class myThread (threading.Thread):
def __init__(self, threadID, name, q):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.q = q
def run(self):
print ("Starting " + self.name)
process_data(self.name, self.q)
print ("Exiting " + self.name)
def process_data(threadName, q):
while not exitFlag:
queueLock.acquire()
if not workQueue.empty():
data = q.get()
process_file(data)
queueLock.release()
print ("%s processing %s" % (threadName, data))
else:
queueLock.release()
time.sleep(1)
def sourcedir_filecnt(sourcedir):
### Return the number of files, ending with '.h', in sourcedir - including subdirectories ###
cnt = 0
global filelist
for folderName, subfolders, files in os.walk(sourcedir):
for file in files:
if file.lower().endswith('.h'):
cnt += 1
filelist += [folderName+'/'+file]
return cnt
def process_files(sourcedir, destdir):
global filelist
for f in filelist:
process_file(f)
def process_file(data):
outfile = ''
inputfile = data
encodings = ['utf-8', 'latin-1', 'windows-1250', 'windows-1252', 'ascii',
'big5', 'big5hkscs', 'cp037', 'cp273', 'cp424', 'cp437', 'cp500',
'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp856',
'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 'cp863', 'cp864', 'cp865',
'cp866', 'cp869', 'cp874', 'cp875', 'cp932', 'cp949', 'cp950', 'cp1006',
'cp1026', 'cp1125', 'cp1140', 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254',
'cp1255', 'cp1256', 'cp1257', 'cp1258', 'cp65001', 'euc-jp', 'euc-jis-2004',
'euc-jisx0213', 'euc-kr', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022-jp',
'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', 'iso2022-jp-3',
'iso2022-jp-ext', 'iso2022-kr', 'iso8859-2', 'iso8859-3', 'iso8859-4',
'iso8859-5', 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
'iso8859-11', 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 'johab',
'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 'mac-cyrillic', 'mac-greek',
'mac-iceland', 'mac-latin2', 'mac-roman', 'mac-turkish', 'ptcp154',
'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 'utf-32', 'utf-32-be',
'utf-32-le', 'utf-16', 'utf-16-be', 'utf-16-le', 'utf-7', 'utf-8-sig']
for e in encodings:
try:
fh = io.open(data, 'r', encoding=e)
fh.readlines()
fh.seek(0)
except UnicodeDecodeError:
print('got unicode error with %s , trying different encoding' % e)
else:
#print('opening the file with encoding: %s ' % e)
break
#print(os.path.basename(data))
for lines in fh:
outfile = outfile+lines
fh.close()
outputfile = os.path.splitext(inputfile)[0]+'.inc'
outputfile = str(outputfile).replace(sourcedir, destdir)
#print(outputfile)
if not os.path.exists(os.path.dirname(outputfile)):
try:
os.makedirs(os.path.dirname(outputfile))
except OSError as exc: # Guard against race condition
if exc.errno != errno.EEXIST:
raise
newfile = open(outputfile, "w")
newfile.write(outfile)
newfile.close()
def single_thread():
t1 = time.time()
cnt = sourcedir_filecnt(sourcedir)
print('Files in: '+str(cnt))
process_files(sourcedir, destdir)
print('Single thread process time: '+str(time.time()-t1))
def multi_thread():
global filelist
global exitFlag
t1 = time.time()
cnt = sourcedir_filecnt(sourcedir)
threadList = ["thread 1", "thread 2", "thread 3", "thread 4", "thread 5"]
nameList = filelist
threadID = 0
# Create new threads
for tName in threadList:
thread = myThread(threadID, tName, workQueue)
thread.start()
threads.append(thread)
threadID += 1
# Fill the queue
queueLock.acquire()
for word in nameList:
workQueue.put(word)
queueLock.release()
# Wait for queue to empty
while not workQueue.empty():
pass
# Notify threads it's time to exit
exitFlag = 1
# Wait for all threads to complete
for t in threads:
t.join()
print ("Exiting Main Thread")
print('Files in: '+str(cnt))
print('Multi thread process time: '+str(time.time()-t1))
def async_process(num):
t1 = time.time()
cnt = sourcedir_filecnt(sourcedir)
pool = mp.Pool(processes=num)
#[pool.apply_async(process_file, args=(f,)) for f in filelist]
pool.map(process_file, filelist)
#output = [p.get() for p in results]
#print(output)
# Setup a list of processes that we want to run
#processes = [mp.Process(target=rand_string, args=(5, output)) for x in range(8)]
#processes = [mp.Process(target=process_files, args=(sourcedir, destdir)) for x in range(num)]
# Run processes
#for p in processes:
#p.start()
# Exit the completed processes
#for p in processes:
#p.join()
# Get process results from the output queue
#results = [output.get() for p in processes]
#print(results)
print('Files in: '+str(cnt))
print('Multi ('+str(num)+') processes time: '+str(time.time()-t1))
def cleanup(dest):
shutil.rmtree(dest)
#sourcedir = 'C:/Users/dksojlg/Documents/gtk+-3.22.26'
sourcedir = '/usr/include'
#destdir = 'C:/Users/dksojlg/Documents/include'
destdir = '/data_2/include'
#print(cpu_info)
#single_thread() #2543 files - Single thread process time: 1.3732633590698242 sec.
#os.rmdir(destdir)
#multi_thread() #2543 files - Syncronized 5 threads process time: 80.25179100036621 sec.
#os.rmdir(destdir)
async_process(16) #2543 files - 2 processes, process time: 1.2379400730133057 sec.
#cleanup(destdir)
#async_process(4) #2543 files - 2 processes, process time: 2.6622860431671143 sec.
#cleanup(destdir)
#async_process(6) #2543 files - 2 processes, process time: 2.6622860431671143 sec.
#cleanup(destdir)
#async_process(8) #2543 files - 2 processes, process time: 2.6622860431671143 sec.
#cleanup(destdir)