Module parser

Expand source code
#!/usr/bin/env python

from __future__ import absolute_import
from __future__ import print_function
import sys, os, re

# Our modules
import stack, lexer

'''
The parser needs several variables to operate.
  Quick summary of variables:
     Token definitions, Lexer tokens, Parser functions,
      Parser states, Parse state table.
See pangparser.py for documentation and examples.
'''

# Quick into: The lexer creates a stack of tokens. The parser scans
# the tokens, and walks the state machine for matches. If match
# is encountered, the parser calls the function in the state table,
# and / or changes state. Reduce is called after the state has been
# successfully digested. For more info see lex / yacc literature.

_gl_cnt = 0
def unique():             # create a unique temporary number
    global _gl_cnt; _gl_cnt+= 10
    return _gl_cnt

# This variable controls the display of the default action.
# The default action is executed when there is no rule for the
# expression. Mostly useful for debugging the grammar.

_show_default_action = False

# May be redefined, included here for required initial states:

ANYSTATE    = [-2, "anystate"]
REDUCE      = [-1, "reduce"]
IGNORE      = [unique(), "ignore"]
INIT        = [unique(), "init"]

# ------------------------------------------------------------------------
# This parser creates no error conditions. Bad for languages, good
# for text parsing. Warnings can be generated by enabling the
# 'show_default' action.
# The parser is not fully recursive, so states need to be nested by
# hand. The flat parser is an advantage for text processing.

class Parse():

    def __init__(self, data, xstack, pvg = None):

        self.fstack = stack.Stack()
        self.fsm = INIT; self.contflag = 0
        self.pvg = pvg
        self.pardict = {}

        # Create parse dictionary:
        for pt in parsetable:
            if pt[0] != None:
                if pt[0][1] not in self.pardict:
                    self.pardict[pt[0][1]] = dict()     # Add if new
                dd = self.pardict[pt[0][1]]
                if pt[2]:
                    #print "pt2", pt[2]
                    dd[ pt[2]] = pt[:]
                else:
                    self.add_class(dd, pt)
            else:
                for aa in pt[1]:
                    if aa[1] not in self.pardict:
                        self.pardict[aa[1]] = dict()  # Add if new
                    dd  = self.pardict[aa[1]]
                    if pt[2]:
                        #print "pt2", pt[2]
                        dd[ pt[2] ] = pt[:]
                    else:
                        self.add_class(dd, pt)

        '''for sss in self.pardict.iterkeys():
            print "Key:", sss
            for cc in self.pardict[sss].iterkeys():
                print "   Subkey:", cc
                print self.pardict[sss][cc][2:]'''

        while True:
            tt = xstack.get2()  # Gen Next token
            if not tt:
                break
            self.parse_item2(data, tt)

    def add_class(self, dd, pt):
        for aa in pt[3]:
            dd[ aa ] = pt[:]

    # This is the new routine, dictionary driven
    # About ten times as fast

    def parse_item2(self, data, tt):

        #print "parse_item", data, tt[0], tt[1].start(), tt[1].end()
        mmm = tt[1];
        self.strx = data[mmm.start():mmm.end()]
        #print "parser:", tt[0], "=", "'" + self.strx + "'"
        if self.pvg.show_state:
            print("state:", self.fsm, "str:", "'" + self.strx + "' token:", tt[0])
        try:
            curr = self.pardict[self.fsm[1]]
        except:
            print("no state on", tt[0], self.strx)
        try:
            item = curr[tt[0][0]]
        except:
            if self.pvg.show_parse:
                # show context
                bbb = mmm.start() - 5;  eee = mmm.end()+ 5
                cont = data[bbb:mmm.start()] + "'" +  self.strx + "'" + \
                        data[mmm.end():eee]

                print("no key on", tt[0], cont)
            return

        #print "item:", item

        if item[4] != None:
            item[4](self, tt, item)

        if item[5] == REDUCE:
            # This is an actionless reduce ... rare
            self.reduce(tt)

        elif item[5] == IGNORE:
            pass
        else:
            #print " Setting new state", pt[3], self.strx
            self.fstack.push([self.fsm, self.contflag, tt, self.strx])
            self.fsm = item[5]
            self.contflag = item[6]

    # This is the old routine
    def parse_item(self, data, tt):

        #print data, tt[0], tt[1].start(), tt[1].end()
        mmm = tt[1];
        self.strx = data[mmm.start():mmm.end()]

        #print "Scanning in state:", self.fsm,
        #print  "for", tt[0][1] + "=\"" + self.strx + "\""
        match = False


        # Scan parse table:
        for pt in parsetable:
            statematch = 0; classmatch = False

            if pt[0] == None:
                if self.fsm in pt[1]:
                    statematch = 1
            elif pt[0][0] == self.fsm[0]:
                   statematch = 1

            if not statematch:
                #print "Not in state: ", pt[0][0]
                continue

            # See if we have a class match
            if pt[3] != None:
                if tt[0][0] in pt[3]:
                    classmatch = True

            #print "tt[0][0]=", tt[0][0], "tt[0][1]=", tt[0][1], "pt[2]", pt[2]
            if classmatch or tt[0][0] == pt[2]:
                #print " matching table entry ", pt[0], pt[1]
                match = True
                if pt[4] != None:
                    pt[4](self, tt, pt)

                if pt[5] == REDUCE:
                    # This is an actionless reduce ... rare
                    self.reduce(tt)

                elif pt[5] == IGNORE:
                    pass
                else:
                    #print " Setting new state", pt[3], self.strx
                    self.fstack.push([self.fsm, self.contflag, tt, self.strx])
                    self.fsm = pt[5]
                    self.contflag = pt[6]
                # Done working, next token
                break;

        if not match:
            if _show_default_action:
                print(" default action on",  tt[0], "'" + self.strx + "'", \
                "Pos:", mmm.start())

    def popstate(self):
        self.fsm, self.contflag, self.ttt, self.stry = self.fstack.pop()

if __name__ == "__main__":
    print("This module was not meant to operate as main.")

# EOF

Functions

def unique()
Expand source code
def unique():             # create a unique temporary number
    global _gl_cnt; _gl_cnt+= 10
    return _gl_cnt

Classes

class Parse (data, xstack, pvg=None)
Expand source code
class Parse():

    def __init__(self, data, xstack, pvg = None):

        self.fstack = stack.Stack()
        self.fsm = INIT; self.contflag = 0
        self.pvg = pvg
        self.pardict = {}

        # Create parse dictionary:
        for pt in parsetable:
            if pt[0] != None:
                if pt[0][1] not in self.pardict:
                    self.pardict[pt[0][1]] = dict()     # Add if new
                dd = self.pardict[pt[0][1]]
                if pt[2]:
                    #print "pt2", pt[2]
                    dd[ pt[2]] = pt[:]
                else:
                    self.add_class(dd, pt)
            else:
                for aa in pt[1]:
                    if aa[1] not in self.pardict:
                        self.pardict[aa[1]] = dict()  # Add if new
                    dd  = self.pardict[aa[1]]
                    if pt[2]:
                        #print "pt2", pt[2]
                        dd[ pt[2] ] = pt[:]
                    else:
                        self.add_class(dd, pt)

        '''for sss in self.pardict.iterkeys():
            print "Key:", sss
            for cc in self.pardict[sss].iterkeys():
                print "   Subkey:", cc
                print self.pardict[sss][cc][2:]'''

        while True:
            tt = xstack.get2()  # Gen Next token
            if not tt:
                break
            self.parse_item2(data, tt)

    def add_class(self, dd, pt):
        for aa in pt[3]:
            dd[ aa ] = pt[:]

    # This is the new routine, dictionary driven
    # About ten times as fast

    def parse_item2(self, data, tt):

        #print "parse_item", data, tt[0], tt[1].start(), tt[1].end()
        mmm = tt[1];
        self.strx = data[mmm.start():mmm.end()]
        #print "parser:", tt[0], "=", "'" + self.strx + "'"
        if self.pvg.show_state:
            print("state:", self.fsm, "str:", "'" + self.strx + "' token:", tt[0])
        try:
            curr = self.pardict[self.fsm[1]]
        except:
            print("no state on", tt[0], self.strx)
        try:
            item = curr[tt[0][0]]
        except:
            if self.pvg.show_parse:
                # show context
                bbb = mmm.start() - 5;  eee = mmm.end()+ 5
                cont = data[bbb:mmm.start()] + "'" +  self.strx + "'" + \
                        data[mmm.end():eee]

                print("no key on", tt[0], cont)
            return

        #print "item:", item

        if item[4] != None:
            item[4](self, tt, item)

        if item[5] == REDUCE:
            # This is an actionless reduce ... rare
            self.reduce(tt)

        elif item[5] == IGNORE:
            pass
        else:
            #print " Setting new state", pt[3], self.strx
            self.fstack.push([self.fsm, self.contflag, tt, self.strx])
            self.fsm = item[5]
            self.contflag = item[6]

    # This is the old routine
    def parse_item(self, data, tt):

        #print data, tt[0], tt[1].start(), tt[1].end()
        mmm = tt[1];
        self.strx = data[mmm.start():mmm.end()]

        #print "Scanning in state:", self.fsm,
        #print  "for", tt[0][1] + "=\"" + self.strx + "\""
        match = False


        # Scan parse table:
        for pt in parsetable:
            statematch = 0; classmatch = False

            if pt[0] == None:
                if self.fsm in pt[1]:
                    statematch = 1
            elif pt[0][0] == self.fsm[0]:
                   statematch = 1

            if not statematch:
                #print "Not in state: ", pt[0][0]
                continue

            # See if we have a class match
            if pt[3] != None:
                if tt[0][0] in pt[3]:
                    classmatch = True

            #print "tt[0][0]=", tt[0][0], "tt[0][1]=", tt[0][1], "pt[2]", pt[2]
            if classmatch or tt[0][0] == pt[2]:
                #print " matching table entry ", pt[0], pt[1]
                match = True
                if pt[4] != None:
                    pt[4](self, tt, pt)

                if pt[5] == REDUCE:
                    # This is an actionless reduce ... rare
                    self.reduce(tt)

                elif pt[5] == IGNORE:
                    pass
                else:
                    #print " Setting new state", pt[3], self.strx
                    self.fstack.push([self.fsm, self.contflag, tt, self.strx])
                    self.fsm = pt[5]
                    self.contflag = pt[6]
                # Done working, next token
                break;

        if not match:
            if _show_default_action:
                print(" default action on",  tt[0], "'" + self.strx + "'", \
                "Pos:", mmm.start())

    def popstate(self):
        self.fsm, self.contflag, self.ttt, self.stry = self.fstack.pop()

Methods

def add_class(self, dd, pt)
Expand source code
def add_class(self, dd, pt):
    for aa in pt[3]:
        dd[ aa ] = pt[:]
def parse_item(self, data, tt)
Expand source code
def parse_item(self, data, tt):

    #print data, tt[0], tt[1].start(), tt[1].end()
    mmm = tt[1];
    self.strx = data[mmm.start():mmm.end()]

    #print "Scanning in state:", self.fsm,
    #print  "for", tt[0][1] + "=\"" + self.strx + "\""
    match = False


    # Scan parse table:
    for pt in parsetable:
        statematch = 0; classmatch = False

        if pt[0] == None:
            if self.fsm in pt[1]:
                statematch = 1
        elif pt[0][0] == self.fsm[0]:
               statematch = 1

        if not statematch:
            #print "Not in state: ", pt[0][0]
            continue

        # See if we have a class match
        if pt[3] != None:
            if tt[0][0] in pt[3]:
                classmatch = True

        #print "tt[0][0]=", tt[0][0], "tt[0][1]=", tt[0][1], "pt[2]", pt[2]
        if classmatch or tt[0][0] == pt[2]:
            #print " matching table entry ", pt[0], pt[1]
            match = True
            if pt[4] != None:
                pt[4](self, tt, pt)

            if pt[5] == REDUCE:
                # This is an actionless reduce ... rare
                self.reduce(tt)

            elif pt[5] == IGNORE:
                pass
            else:
                #print " Setting new state", pt[3], self.strx
                self.fstack.push([self.fsm, self.contflag, tt, self.strx])
                self.fsm = pt[5]
                self.contflag = pt[6]
            # Done working, next token
            break;

    if not match:
        if _show_default_action:
            print(" default action on",  tt[0], "'" + self.strx + "'", \
            "Pos:", mmm.start())
def parse_item2(self, data, tt)
Expand source code
def parse_item2(self, data, tt):

    #print "parse_item", data, tt[0], tt[1].start(), tt[1].end()
    mmm = tt[1];
    self.strx = data[mmm.start():mmm.end()]
    #print "parser:", tt[0], "=", "'" + self.strx + "'"
    if self.pvg.show_state:
        print("state:", self.fsm, "str:", "'" + self.strx + "' token:", tt[0])
    try:
        curr = self.pardict[self.fsm[1]]
    except:
        print("no state on", tt[0], self.strx)
    try:
        item = curr[tt[0][0]]
    except:
        if self.pvg.show_parse:
            # show context
            bbb = mmm.start() - 5;  eee = mmm.end()+ 5
            cont = data[bbb:mmm.start()] + "'" +  self.strx + "'" + \
                    data[mmm.end():eee]

            print("no key on", tt[0], cont)
        return

    #print "item:", item

    if item[4] != None:
        item[4](self, tt, item)

    if item[5] == REDUCE:
        # This is an actionless reduce ... rare
        self.reduce(tt)

    elif item[5] == IGNORE:
        pass
    else:
        #print " Setting new state", pt[3], self.strx
        self.fstack.push([self.fsm, self.contflag, tt, self.strx])
        self.fsm = item[5]
        self.contflag = item[6]
def popstate(self)
Expand source code
def popstate(self):
    self.fsm, self.contflag, self.ttt, self.stry = self.fstack.pop()