Files
h2inc-old/parser.py
2018-07-19 11:11:15 +02:00

150 lines
5.3 KiB
Python

# (c) 2018 Jan Lerking
# Python parser for c header files.
# Used for creating corresponding NASM include files.
import keywords
import os
import sys
import multiprocessing as mp
keywords.init()
num_cores = mp.cpu_count()
test_folder = ""
filelist = []
class worker:
class parser:
def __init__(self, file):
self.prep = keywords.preprocessor_directives
self.reg = keywords.regular
self.file = file
self.blockcomment = False
def get_token(self, keyword):
token = ""
if keyword in self.prep:
token = self.prep(keyword)
if keyword in self.reg:
token = self.reg(keyword)
return token
def parse_preprocess(self, token):
return False
# Converts a word into a 'key : value' pair.
def tokenize_word(self, word):
token = {}
if word in self.prep:
token.keys = word
token.values = self.prep(word)
if word in self.reg:
token.keys = word
token.values = self.reg(word)
return token
# Creates a list of elements per line in file,
# then adding this list as a single element in a global tuple
def parseline(self, line):
tupline = []
word = [w for w in line.split()]
for w in word:
token = self.tokenize_word(w)
if token.value == 'PREPROCESS':
self.parse_preprocess(token)
return tupline
def get_script_path():
return sys.path[0]
def sourcedir_filecnt(sourcedir):
### Return the number of files, ending with '.h', in sourcedir - including subdirectories ###
cnt = 0
global filelist
for folderName, subfolders, files in os.walk(sourcedir):
for file in files:
if file.lower().endswith('.h'):
cnt += 1
filelist += [folderName+'/'+file]
#print(folderName+'/'+file)
#print(filelist)
return cnt
def sourcedir_foldercnt(sourcedir):
### Return the number of folders, if it contains '*.h' files, in sourcedir - including subdirectories ###
global cnt
global folderlist
for folderName, subfolders, files in os.walk(sourcedir):
if subfolders:
for subfolder in subfolders:
sourcedir_foldercnt(subfolder)
tempf = [file for file in files if file.lower().endswith('.h')]
if tempf:
cnt = cnt+1
#print(folderName)
folderlist += [folderName]
#print(folderlist)
#print(len(folderlist))
return cnt
def process_files(gui, source, dest):
global sourcedir
global destdir
sourcedir = source
destdir = dest
pool = mp.Pool(processes=num_cores)
pool.map(process_file, filelist)
def process_file(data):
outfile = ''
inputfile = data
encodings = ['utf-8', 'latin-1', 'windows-1250', 'windows-1252', 'ascii',
'big5', 'big5hkscs', 'cp037', 'cp273', 'cp424', 'cp437', 'cp500',
'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp856',
'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 'cp863', 'cp864', 'cp865',
'cp866', 'cp869', 'cp874', 'cp875', 'cp932', 'cp949', 'cp950', 'cp1006',
'cp1026', 'cp1125', 'cp1140', 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254',
'cp1255', 'cp1256', 'cp1257', 'cp1258', 'cp65001', 'euc-jp', 'euc-jis-2004',
'euc-jisx0213', 'euc-kr', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022-jp',
'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', 'iso2022-jp-3',
'iso2022-jp-ext', 'iso2022-kr', 'iso8859-2', 'iso8859-3', 'iso8859-4',
'iso8859-5', 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
'iso8859-11', 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 'johab',
'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 'mac-cyrillic', 'mac-greek',
'mac-iceland', 'mac-latin2', 'mac-roman', 'mac-turkish', 'ptcp154',
'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 'utf-32', 'utf-32-be',
'utf-32-le', 'utf-16', 'utf-16-be', 'utf-16-le', 'utf-7', 'utf-8-sig']
for e in encodings:
try:
fh = io.open(data, 'r', encoding=e)
fh.readlines()
fh.seek(0)
except UnicodeDecodeError:
print('got unicode error with %s , trying different encoding' % e)
else:
#print('opening the file with encoding: %s ' % e)
break
#print(os.path.basename(data))
for lines in fh:
outfile = outfile+lines
fh.close()
outputfile = os.path.splitext(inputfile)[0]+'.inc'
outputfile = str(outputfile).replace(sourcedir, destdir)
#print(outputfile)
if not os.path.exists(os.path.dirname(outputfile)):
try:
os.makedirs(os.path.dirname(outputfile))
except OSError as exc: # Guard against race condition
if exc.errno != errno.EEXIST:
raise
newfile = open(outputfile, "w")
newfile.write(outfile)
newfile.close()
test_folder = get_script_path()
print(test_folder)
print('Number of *.h files in directory: ',sourcedir_filecnt(test_folder))
print(num_cores)