h2inc-old/parser/parser.py

# (c) 2018 Jan Lerking
# Python parser for c header files.
# Used for creating corresponding NASM include files.

import os
import sys
import multiprocessing
import threading
import time
import keywords

keywords.init()
num_cores = multiprocessing.cpu_count()
work_queue = multiprocessing.JoinableQueue()
signal_queue = multiprocessing.JoinableQueue()

test_folder = "/usr/include"
filelist = []
folderlist = []
processes = []
threads = []

class Listener:
    def __init__(self, queue):
        self.signal_queue = queue

    def go(self):
        print("Listener has started")
        while True:
            # Listen for results on the queue and process them accordingly
            data = self.signal_queue.get()
            print(data)
            # Check if finished
            if data[0] == "work_update":
                print("Workers are working.")
                self.signal_queue.task_done()
                return
            elif data[0] == "work_finished":
                print("Workers are finished.")
                self.signal_queue.task_done()
                return
            else:
                self.signal_queue.task_done()
                return

class Worker:
    def __init__(self, sq, wq):
        self.signal_queue = sq
        self.work_queue = wq

    def go(self):
        #while self.work_queue.get(2) != None:
        while True:
            #print("Working")
            task = self.work_queue.get(2)
            #print(task)
            time.sleep(0.1)
            self.work_queue.task_done()
            print(self.work_queue.qsize())
            self.signal_queue.put("work_update")
        self.work_queue.task_done()
        self.signal_queue.put("work_finished")

class parser:
    def __init__(self, file):
        self.prep = keywords.preprocessor_directives
        self.reg = keywords.regular
        self.file = file
        self.blockcomment = False

    def get_token(self, keyword):
        token = ""
        if keyword in self.prep:
            token = self.prep.values(keyword)
        if keyword in self.reg:
            token = self.reg.values(keyword)
        return token

    def parse_preprocess(self, token):
        return False

    # Converts a word into a 'key : value' pair.
    def tokenize_word(self, word):
        token = {}
        if word in self.prep:
            token.keys = word
            token.values = self.prep.values(word)
        if word in self.reg:
            token.keys = word
            token.values = self.reg.values(word)
        return token

    # Creates a list of elements per line in file,
    # then adding this list as a single element in a global tuple
    def parseline(self, line):
        tupline = []
        word = [w for w in line.split()]
        for w in word:
            token = self.tokenize_word(w)
            if token.values == 'PREPROCESS':
                self.parse_preprocess(token)
        return tupline

def get_script_path():
    return sys.path[0]

def sourcedir_filecnt(sourcedir):
    ### Return the number of files, ending with '.h', in sourcedir - including subdirectories ###
    cnt = 0
    global filelist
    for folderName, subfolders, files in os.walk(sourcedir):
        for file in files:
            if file.lower().endswith('.h'):
                cnt += 1
                filelist += [folderName+'/'+file]
                #print(folderName+'/'+file)
    #print(filelist)
    return cnt

def sourcedir_foldercnt(sourcedir):
    # Return the number of folders, if it contains '*.h' files,
    # in 'sourcedir' - including subdirectories.
    cnt = 0
    global folderlist
    for folderName, subfolders, files in os.walk(sourcedir):
        if subfolders:
            for subfolder in subfolders:
                sourcedir_foldercnt(subfolder)
        tempf = [file for file in files if file.lower().endswith('.h')]
        if tempf:
            cnt = cnt+1
            #print(folderName)
            folderlist += [folderName]
    #print(folderlist)
    #print(len(folderlist))
    return cnt

def process_files(gui, source, dest):
    global sourcedir
    global destdir
    sourcedir = source
    destdir = dest
    pool = multiprocessing.Pool(processes=num_cores)
    pool.map(process_file, filelist)

def process_file(data):
    outfile = ''
    inputfile = data
    encodings = ['utf-8', 'latin-1', 'windows-1250', 'windows-1252', 'ascii',
                'big5', 'big5hkscs', 'cp037', 'cp273', 'cp424', 'cp437', 'cp500',
                'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp856',
                'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 'cp863', 'cp864', 'cp865',
                'cp866', 'cp869', 'cp874', 'cp875', 'cp932', 'cp949', 'cp950', 'cp1006',
                'cp1026', 'cp1125', 'cp1140', 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254',
                'cp1255', 'cp1256', 'cp1257', 'cp1258', 'cp65001', 'euc-jp', 'euc-jis-2004',
                'euc-jisx0213', 'euc-kr', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022-jp',
                'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', 'iso2022-jp-3',
                'iso2022-jp-ext', 'iso2022-kr', 'iso8859-2', 'iso8859-3', 'iso8859-4',
                'iso8859-5', 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
                'iso8859-11', 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 'johab',
                'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 'mac-cyrillic', 'mac-greek',
                'mac-iceland', 'mac-latin2', 'mac-roman', 'mac-turkish', 'ptcp154',
                'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 'utf-32', 'utf-32-be',
                'utf-32-le', 'utf-16', 'utf-16-be', 'utf-16-le', 'utf-7', 'utf-8-sig']
    for e in encodings:
        try:
            fh = io.open(data, 'r', encoding=e)
            fh.readlines()
            fh.seek(0)
        except UnicodeDecodeError:
            print('got unicode error with %s , trying different encoding' % e)
        else:
            #print('opening the file with encoding:  %s ' % e)
            break
    #print(os.path.basename(data))
    for lines in fh:
        outfile = outfile+lines
    fh.close()
    outputfile = os.path.splitext(inputfile)[0]+'.inc'
    outputfile = str(outputfile).replace(sourcedir, destdir)
    #print(outputfile)
    if not os.path.exists(os.path.dirname(outputfile)):
        try:
            os.makedirs(os.path.dirname(outputfile))
        except OSError as exc: # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise
    newfile = open(outputfile, "w")
    newfile.write(outfile)
    newfile.close()


print(test_folder)
print('Number of *.h files in directory: ',sourcedir_filecnt(test_folder))
print('Number of processor cores: ',num_cores)

print("Creating Listener")
listener = Listener(signal_queue)

print("Starting Listener")
thread = threading.Thread(target=listener.go, args=())
thread.start()
threads.append(thread)
print(threads)

print("Creating work queue")
for i in filelist:
    work_queue.put(i)
print(work_queue.qsize())

for i in range(num_cores):
    w = Worker(signal_queue, work_queue)
    p = multiprocessing.Process(target = w.go, args=())
    p.deamon = True
    p.start()
    processes.append(p)
print(processes)

for i in range(num_cores):
    work_queue.put(None)

print('Items on work queue: ',work_queue.qsize())
work_queue.join()
print('Items on signal queue: ',signal_queue.qsize())
signal_queue.join()

print("Closing down workers")
for p in processes:
    print("Closing down:",p)
    p.join()
print(processes)

print("Closing down Listener")
for t in threads:
    t.join()
print(threads)