h2inc/parser.py

"""Contains class PARSER"""
from itertools import count

#Element type definitions. Used in the parse process.
ELEMENT_TYPE_PREPROCESS = 1
ELEMENT_TYPE_REGULAR = 2

TOKENS = ['TOKEN_CSTART','TOKEN_CMID','TOKEN_CEND','TOKEN_RPAREN',
        'TOKEN_LPAREN','TOKEN_ENDLINE','TOKEN_RETVAL','TOKEN_PREPROCESS',
        'TOKEN_ID','TOKEN_PLUS','TOKEN_MINUS','TOKEN_DIV','TOKEN_MULT',
        'TOKEN_ASSIGN','TOKEN_EQUAL','TOKEN_LBRACE','TOKEN_RBRACE',
        'TOKEN_COMMA','TOKEN_SEMICOLON','TOKEN_LANGLE','TOKEN_RANGLE','TOKEN_POINTER']

RESERVED = {'auto'  : 'AUTO','break' : 'BREAK','case' : 'CASE','char' : 'CHAR',
        'const' : 'CONST','continue' : 'CONTINUE','default' : 'DEFAULT','do' : 'DO',
        'int' : 'INT','long' : 'LONG','register' : 'REGISTER','return' : 'RETURN',
        'short' : 'SHORT','signed' : 'SIGNED','sizeof' : 'SIZEOF','static' : 'STATIC',
        'struct' : 'STRUCT','switch' : 'SWITCH','typedef' : 'TYPEDEF','union' : 'UNION',
        'unsigned' : 'UNSIGNED','void' : 'VOID','volatile' : 'VOLATILE','while' : 'WHILE',
        'double' : 'DOUBLE','else' : 'ELSE','enum' : 'ENUM','extern' : 'EXTERN',
        'float' : 'FLOAT','for' : 'FOR','goto' : 'GOTO','if' : 'IF'}

PREPROCESSOR_DIRECTIVES = {'#include' : 'TOKEN_PREPROCESS','#define' : 'TOKEN_PREPROCESS','#undef' : 'TOKEN_PREPROCESS',
        '#if' : 'TOKEN_PREPROCESS','#ifdef' : 'TOKEN_PREPROCESS','#ifndef' : 'TOKEN_PREPROCESS','#error' : 'TOKEN_PREPROCESS',
        '__FILE__' : 'TOKEN_PREPROCESS','__LINE__' : 'TOKEN_PREPROCESS','__DATE__' : 'TOKEN_PREPROCESS',
        '__TIME__' : 'TOKEN_PREPROCESS','__TIMESTAMP__' : 'TOKEN_PREPROCESS','pragma' : 'TOKEN_PREPROCESS',
        '#' : 'TOKEN_PREPROCESS','##' : 'TOKEN_PREPROCESS','#endif' : 'TOKEN_PREPROCESS'}

REGULAR = {'/*' : 'TOKEN_CSTART','*/' : 'TOKEN_CEND', '*' : 'TOKEN_CMID', '=' : 'TOKEN_ASSIGN','==' : 'TOKEN_EQUAL',
        '{' : 'TOKEN_LBRACE','}' : 'TOKEN_RBRACE','\+' : 'TOKEN_PLUS','-' : 'TOKEN_MINUS',
        '\*' : 'TOKEN_MULT','/' : 'TOKEN_DIV','\(' : 'TOKEN_LPAREN','\)' : 'TOKEN_RPAREN',
        ',' : 'TOKEN_COMMA',';' : 'TOKEN_SEMICOLON','\<' : 'TOKEN_LANGLE','\>' : 'TOKEN_RANGLE'}

NASM_PREPROCESS_DIRECTIVES = {'#include' : '%include','#define' : '%define','#undef' : '%undef',
        '#if' : '%if','#ifdef' : '%ifdef','#ifndef' : '%ifndef','#endif' : '%endif',
        '#error' : '%error','__FILE__' : '__FILE__','__LINE__' : '__LINE__',
        '__DATE__' : '__DATE__','__TIME__' : '__TIME__','__TIMESTAMP__' : '__TIMESTAMP__',
        'pragma' : 'pragma','#' : '#','##' : '##'}

NASM_ENUM = "EQU"

NASM_REGULAR = {'/*' : ';', '*' : ';', '*/' : ''}

TOKENS += RESERVED.values()

COMMENT_SINGLE_LINE = 0
COMMENT_MULTI_LINE = 1

class PARSEOBJECT:
    _passes = count(0)

    def __init__(self):
        self.parseline = []
        self.parsefile = []
        self.passes = 0

    def parse_reset(self):
        self.parseline = []
        self.parsefile = []
        self._passes = count(0)
        self.inside_comment = False
        self.inside_typedef = False
        self.typedef_enum = False
        self.enum_begin = False

    def inc_passes(self):
        self.passes = next(self._passes)

    def parseheader(self, fl):
        tempfile = []
        self.parse_reset()
        for l in fl:
            analyzed_line = self.analyzer(l)
            tempfile.append(analyzed_line)
        self.inc_passes()
        self.parsefile = self.parsetokens(tempfile)
        return self.parsefile

    def parseinclude(self, data):
        tempstr = str(data)
        if tempstr.startswith('<'):
            tempstr = tempstr.replace('<', '"')
            tempstr = tempstr.replace('.h>', '.inc"')
        if tempstr.endswith('.h'):
            tempstr = '"'+tempstr
            tempstr = tempstr.replace('.h', '.inc"')
        return tempstr

    def tokenizer(self, w):
        token = ""
        if w in PREPROCESSOR_DIRECTIVES:
            token = PREPROCESSOR_DIRECTIVES.get(w)
            return token
        if w in REGULAR:
            token = REGULAR.get(w)
            return token
        return False

    def analyzer(self, ln):
        analysed = []
        word = [w for w in ln.split()]
        for w in word:
            t = self.tokenizer(w)
            if t == False:
                analysed.append(w)
                continue
            else:
                analysed.append(t)
                analysed.append(w)
        return analysed

    def parsetokens(self, fl):
        templine = []
        tempfile = []
        enum_cnt = 0

        for l in fl:
            templine = []
            tempstr = ""
            if len(l) == 0:
                templine.append("\n")
                continue
            if l[0] == "TOKEN_CSTART" or l[0] == "TOKEN_CMID" or l[0] == "TOKEN_CEND":
                self.inside_comment = True
                tempfile.append(self.parse_comment(l))
                continue
            if l[0] == "TYPEDEF" or l[0] == "typedef":
                self.parse_typedef(l)
                if self.typedef_enum == False:
                    templine.append("; ")
                    for e in l:
                        templine.append(e)
                    tempfile.append(templine)
                continue
            if l[0] == "TOKEN_PREPROCESS":
                tempfile.append(self.parse_preprocess(l))
                continue
            if self.inside_typedef == True:
                if self.typedef_enum == True:
                    if l[0] == "TOKEN_LBRACE" and len(l) == 2:
                        self.enum_begin = True
                        continue
                    if len(l) == 1:
                        if l[0].endswith(","):
                            tempstr = l[0]
                            templine.append(tempstr[:-1]+"\t")
                            templine.append("EQU\t")
                            templine.append(str(enum_cnt)+"\n")
                            tempfile.append(templine)
                            enum_cnt += 1
                            continue
                        else:
                            templine.append(l[0]+"\t")
                            templine.append("EQU\t")
                            templine.append(str(enum_cnt)+"\n")
                            tempfile.append(templine)
                            enum_cnt += 1
                            continue
                        continue
                    if len(l) == 3:
                        if l[0].endswith(","):
                            tempstr = l[0]
                            enum_cnt = l[2]
                            templine.append(tempstr[:-1]+"\t")
                            templine.append("EQU"+"\t")
                            templine.append(enum_cnt+"\n")
                            tempfile.append(templine)
                            continue
                        continue
                    if l[0] == "TOKEN_RBRACE" and len(l) == 3:
                        self.enum_begin = False
                        self.typedef_enum = False
                        self.inside_typedef = False
                        enum_cnt = 0
                        continue
                    continue
                continue
        return tempfile

    def parse_typedef(self, l):
        templine = []
        for w in l:
            if w == "TYPEDEF" or w == "typedef":
                self.inside_typedef = True
                continue
            if w == "ENUM" or w == "enum":
                self.typedef_enum = True
                continue


    def parse_comment(self, l):
        templine = []
        for w in l:
            if w in TOKENS:
                continue
            if w in NASM_REGULAR:
                templine.append(NASM_REGULAR.get(w))
                continue
            templine.append(w)
        return templine

    def parse_preprocess(self, l):
        newline = []
        for w in l:
            if w in TOKENS:
                continue
            if w in PREPROCESSOR_DIRECTIVES:
                newline.append(NASM_PREPROCESS_DIRECTIVES.get(w))
                continue
            if w.startswith("<"):
                newline.append(self.parseinclude(w))
                continue
            if w in NASM_REGULAR:
                newline.append(NASM_REGULAR.get(w))
                continue
            newline.append(w)
        return newline


class PARSER(PARSEOBJECT):
    _ids = count(0)
    _passes = count(0)

    def __init__(self):
        self.id = next(self._ids)
        self.tupline = []
        self.tupfile = []
        self.passes = next(self._passes)