import xpath.expr as X from xpath.yappsrt import * from string import * import re class XPathScanner(Scanner): patterns = [ ("r'\\:'", re.compile('\\:')), ("r'node\\s*\\('", re.compile('node\\s*\\(')), ("r'text\\s*\\('", re.compile('text\\s*\\(')), ("r'comment\\s*\\('", re.compile('comment\\s*\\(')), ("r'processing-instruction\\s*\\('", re.compile('processing-instruction\\s*\\(')), ("r'\\,'", re.compile('\\,')), ("r'\\.'", re.compile('\\.')), ("r'\\$'", re.compile('\\$')), ("r'\\)'", re.compile('\\)')), ("r'\\('", re.compile('\\(')), ("r'\\]'", re.compile('\\]')), ("r'\\['", re.compile('\\[')), ("r'\\*'", re.compile('\\*')), ("r':'", re.compile(':')), ("r'\\.\\.'", re.compile('\\.\\.')), ("r'@'", re.compile('@')), ("r'::'", re.compile('::')), ("r'\\/\\/'", re.compile('\\/\\/')), ("r'\\/'", re.compile('\\/')), ("r'\\-'", re.compile('\\-')), ("'\\|'", re.compile('\\|')), ("r'and'", re.compile('and')), ("r'or'", re.compile('or')), ('\\s+', re.compile('\\s+')), ('END', re.compile('$')), ('FORWARD_AXIS_NAME', re.compile('child|descendant-or-self|attribute|self|descendant|following-sibling|following|namespace')), ('REVERSE_AXIS_NAME', re.compile('parent|preceding-sibling|preceding|ancestor-or-self|ancestor')), ('NCNAME', re.compile('[a-zA-Z_][a-zA-Z0-9_\\-\\.\\w]*(?!\\()')), ('FUNCNAME', re.compile('[a-zA-Z_][a-zA-Z0-9_\\-\\.\\w]*')), ('DQUOTE', re.compile('\\"(?:[^\\"])*\\"')), ('SQUOTE', re.compile("\\'(?:[^\\'])*\\'")), ('NUMBER', re.compile('((\\.[0-9]+)|([0-9]+(\\.[0-9]*)?))([eE][\\+\\-]?[0-9]+)?')), ('EQ_COMP', re.compile('\\!?\\=')), ('REL_COMP', re.compile('[\\<\\>]\\=?')), ('ADD_COMP', re.compile('[\\+\\-]')), ('MUL_COMP', re.compile('\\*|div|mod')), ] def __init__(self, str): Scanner.__init__(self,None,['\\s+'],str) class XPath(Parser): def XPath(self): Expr = self.Expr() END = self._scan('END') return Expr def Expr(self): OrExpr = self.OrExpr() return OrExpr def OrExpr(self): AndExpr = self.AndExpr() Expr = AndExpr while self._peek("r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'or'": self._scan("r'or'") AndExpr = self.AndExpr() Expr = X.OrExpr('or', Expr, AndExpr) return Expr def AndExpr(self): EqualityExpr = self.EqualityExpr() Expr = EqualityExpr while self._peek("r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'and'": self._scan("r'and'") EqualityExpr = self.EqualityExpr() Expr = X.AndExpr('and', Expr, EqualityExpr) return Expr def EqualityExpr(self): RelationalExpr = self.RelationalExpr() Expr = RelationalExpr while self._peek('EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'EQ_COMP': EQ_COMP = self._scan('EQ_COMP') RelationalExpr = self.RelationalExpr() Expr = X.EqualityExpr(EQ_COMP, Expr, RelationalExpr) return Expr def RelationalExpr(self): AdditiveExpr = self.AdditiveExpr() Expr = AdditiveExpr while self._peek('REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'REL_COMP': REL_COMP = self._scan('REL_COMP') AdditiveExpr = self.AdditiveExpr() Expr = X.EqualityExpr(REL_COMP, Expr, AdditiveExpr) return Expr def AdditiveExpr(self): MultiplicativeExpr = self.MultiplicativeExpr() Expr = MultiplicativeExpr while self._peek('ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'ADD_COMP': ADD_COMP = self._scan('ADD_COMP') MultiplicativeExpr = self.MultiplicativeExpr() Expr = X.ArithmeticalExpr(ADD_COMP, Expr, MultiplicativeExpr) return Expr def MultiplicativeExpr(self): UnionExpr = self.UnionExpr() Expr = UnionExpr while self._peek('MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'MUL_COMP': MUL_COMP = self._scan('MUL_COMP') UnionExpr = self.UnionExpr() Expr = X.ArithmeticalExpr(MUL_COMP, Expr, UnionExpr) return Expr def UnionExpr(self): UnaryExpr = self.UnaryExpr() Expr = UnaryExpr while self._peek("'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "'\\|'": self._scan("'\\|'") UnaryExpr = self.UnaryExpr() Expr = X.UnionExpr('|', Expr, UnaryExpr) return Expr def UnaryExpr(self): _token_ = self._peek("r'\\-'", "r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction\\s*\\('", "r'comment\\s*\\('", "r'text\\s*\\('", "r'node\\s*\\('", "r'\\*'", 'NCNAME') if _token_ == "r'\\-'": self._scan("r'\\-'") ValueExpr = self.ValueExpr() return X.NegationExpr(ValueExpr) else: ValueExpr = self.ValueExpr() return ValueExpr def ValueExpr(self): PathExpr = self.PathExpr() return PathExpr def PathExpr(self): _token_ = self._peek("r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction\\s*\\('", "r'comment\\s*\\('", "r'text\\s*\\('", "r'node\\s*\\('", "r'\\*'", 'NCNAME') if _token_ == "r'\\/'": self._scan("r'\\/'") path = None if self._peek("r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction\\s*\\('", "r'comment\\s*\\('", "r'text\\s*\\('", "r'node\\s*\\('", "r'\\*'", 'NCNAME', "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") not in ["'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'"]: RelativePathExpr = self.RelativePathExpr() path = RelativePathExpr return X.AbsolutePathExpr(path) elif _token_ == "r'\\/\\/'": self._scan("r'\\/\\/'") RelativePathExpr = self.RelativePathExpr() step = X.AxisStep('descendant-or-self') RelativePathExpr.steps.insert(0, step) return X.AbsolutePathExpr(RelativePathExpr) else: RelativePathExpr = self.RelativePathExpr() return RelativePathExpr def RelativePathExpr(self): StepExpr = self.StepExpr() steps = [StepExpr] while self._peek("r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") in ["r'\\/'", "r'\\/\\/'"]: _token_ = self._peek("r'\\/'", "r'\\/\\/'") if _token_ == "r'\\/'": self._scan("r'\\/'") else:# == "r'\\/\\/'" self._scan("r'\\/\\/'") steps.append(X.AxisStep('descendant-or-self')) StepExpr = self.StepExpr() steps.append(StepExpr) return X.PathExpr(steps) def StepExpr(self): _token_ = self._peek("r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction\\s*\\('", "r'comment\\s*\\('", "r'text\\s*\\('", "r'node\\s*\\('", "r'\\*'", 'NCNAME') if _token_ not in ["r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE']: AxisStep = self.AxisStep() return AxisStep else: FilterExpr = self.FilterExpr() return FilterExpr def AxisStep(self): _token_ = self._peek('FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'processing-instruction\\s*\\('", "r'comment\\s*\\('", "r'text\\s*\\('", "r'node\\s*\\('", "r'\\*'", 'NCNAME') if _token_ not in ['REVERSE_AXIS_NAME', "r'\\.\\.'"]: ForwardStep = self.ForwardStep() step = ForwardStep else:# in ['REVERSE_AXIS_NAME', "r'\\.\\.'"] ReverseStep = self.ReverseStep() step = ReverseStep expr = X.AxisStep(*step) if self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['": PredicateList = self.PredicateList() expr = X.PredicateList(expr, PredicateList, step[0]) return expr def ForwardStep(self): _token_ = self._peek('FORWARD_AXIS_NAME', "r'@'", "r'processing-instruction\\s*\\('", "r'comment\\s*\\('", "r'text\\s*\\('", "r'node\\s*\\('", "r'\\*'", 'NCNAME') if _token_ == 'FORWARD_AXIS_NAME': ForwardAxis = self.ForwardAxis() NodeTest = self.NodeTest() return [ForwardAxis, NodeTest] else: AbbrevForwardStep = self.AbbrevForwardStep() return AbbrevForwardStep def ForwardAxis(self): FORWARD_AXIS_NAME = self._scan('FORWARD_AXIS_NAME') self._scan("r'::'") return FORWARD_AXIS_NAME def AbbrevForwardStep(self): axis = 'child' if self._peek("r'@'", "r'processing-instruction\\s*\\('", "r'comment\\s*\\('", "r'text\\s*\\('", "r'node\\s*\\('", "r'\\*'", 'NCNAME') == "r'@'": self._scan("r'@'") axis = 'attribute' NodeTest = self.NodeTest() return [axis, NodeTest] def ReverseStep(self): _token_ = self._peek('REVERSE_AXIS_NAME', "r'\\.\\.'") if _token_ == 'REVERSE_AXIS_NAME': ReverseAxis = self.ReverseAxis() NodeTest = self.NodeTest() return [ReverseAxis, NodeTest] else:# == "r'\\.\\.'" AbbrevReverseStep = self.AbbrevReverseStep() return AbbrevReverseStep def ReverseAxis(self): REVERSE_AXIS_NAME = self._scan('REVERSE_AXIS_NAME') self._scan("r'::'") return REVERSE_AXIS_NAME def AbbrevReverseStep(self): self._scan("r'\\.\\.'") return ['parent', None] def NodeTest(self): _token_ = self._peek("r'processing-instruction\\s*\\('", "r'comment\\s*\\('", "r'text\\s*\\('", "r'node\\s*\\('", "r'\\*'", 'NCNAME') if _token_ not in ["r'\\*'", 'NCNAME']: KindTest = self.KindTest() return KindTest else:# in ["r'\\*'", 'NCNAME'] NameTest = self.NameTest() return NameTest def NameTest(self): prefix = None WildcardOrNCName = self.WildcardOrNCName() localpart = WildcardOrNCName if self._peek("r':'", "r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r':'": self._scan("r':'") WildcardOrNCName = self.WildcardOrNCName() prefix = localpart localpart = WildcardOrNCName return X.NameTest(prefix, localpart) def WildcardOrNCName(self): _token_ = self._peek("r'\\*'", 'NCNAME') if _token_ == "r'\\*'": self._scan("r'\\*'") return '*' else:# == 'NCNAME' NCNAME = self._scan('NCNAME') return NCNAME def FilterExpr(self): PrimaryExpr = self.PrimaryExpr() if self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['": PredicateList = self.PredicateList() PrimaryExpr = X.PredicateList(PrimaryExpr,PredicateList) return PrimaryExpr def PredicateList(self): Predicate = self.Predicate() predicates = [Predicate] while self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['": Predicate = self.Predicate() predicates.append(Predicate) return predicates def Predicate(self): self._scan("r'\\['") Expr = self.Expr() self._scan("r'\\]'") return Expr def PrimaryExpr(self): _token_ = self._peek("r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE') if _token_ not in ["r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME']: Literal = self.Literal() return X.LiteralExpr(Literal) elif _token_ == "r'\\$'": VariableReference = self.VariableReference() return VariableReference elif _token_ == "r'\\('": self._scan("r'\\('") Expr = self.Expr() self._scan("r'\\)'") return Expr elif _token_ == "r'\\.'": ContextItemExpr = self.ContextItemExpr() return ContextItemExpr else:# == 'FUNCNAME' FunctionCall = self.FunctionCall() return FunctionCall def VariableReference(self): self._scan("r'\\$'") QName = self.QName() return X.VariableReference(*QName) def ContextItemExpr(self): self._scan("r'\\.'") return X.AxisStep('self') def FunctionCall(self): FUNCNAME = self._scan('FUNCNAME') self._scan("r'\\('") args = [] if self._peek("r'\\,'", "r'\\)'", "r'\\-'", "r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction\\s*\\('", "r'comment\\s*\\('", "r'text\\s*\\('", "r'node\\s*\\('", "r'\\*'", 'NCNAME') not in ["r'\\,'", "r'\\)'"]: Expr = self.Expr() args.append(Expr) while self._peek("r'\\,'", "r'\\)'") == "r'\\,'": self._scan("r'\\,'") Expr = self.Expr() args.append(Expr) self._scan("r'\\)'") return X.Function(FUNCNAME, args) def KindTest(self): _token_ = self._peek("r'processing-instruction\\s*\\('", "r'comment\\s*\\('", "r'text\\s*\\('", "r'node\\s*\\('") if _token_ == "r'processing-instruction\\s*\\('": PITest = self.PITest() return PITest elif _token_ == "r'comment\\s*\\('": CommentTest = self.CommentTest() return CommentTest elif _token_ == "r'text\\s*\\('": TextTest = self.TextTest() return TextTest else:# == "r'node\\s*\\('" AnyKindTest = self.AnyKindTest() return AnyKindTest def PITest(self): self._scan("r'processing-instruction\\s*\\('") name = None if self._peek('NCNAME', "r'\\)'", 'DQUOTE', 'SQUOTE') != "r'\\)'": _token_ = self._peek('NCNAME', 'DQUOTE', 'SQUOTE') if _token_ == 'NCNAME': NCNAME = self._scan('NCNAME') name = NCNAME else:# in ['DQUOTE', 'SQUOTE'] StringLiteral = self.StringLiteral() name = StringLiteral self._scan("r'\\)'") return X.PITest(name) def CommentTest(self): self._scan("r'comment\\s*\\('") self._scan("r'\\)'") return X.CommentTest() def TextTest(self): self._scan("r'text\\s*\\('") self._scan("r'\\)'") return X.TextTest() def AnyKindTest(self): self._scan("r'node\\s*\\('") self._scan("r'\\)'") return X.AnyKindTest() def Literal(self): _token_ = self._peek('NUMBER', 'DQUOTE', 'SQUOTE') if _token_ == 'NUMBER': NumericLiteral = self.NumericLiteral() return NumericLiteral else:# in ['DQUOTE', 'SQUOTE'] StringLiteral = self.StringLiteral() return StringLiteral def NumericLiteral(self): NUMBER = self._scan('NUMBER') return float(NUMBER) def StringLiteral(self): _token_ = self._peek('DQUOTE', 'SQUOTE') if _token_ == 'DQUOTE': DQUOTE = self._scan('DQUOTE') return DQUOTE[1:-1] else:# == 'SQUOTE' SQUOTE = self._scan('SQUOTE') return SQUOTE[1:-1] def QName(self): NCNAME = self._scan('NCNAME') name = NCNAME if self._peek("r'\\:'", "r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\:'": self._scan("r'\\:'") NCNAME = self._scan('NCNAME') return (name, NCNAME) return (None, name) def parse(rule, text): P = XPath(XPathScanner(text)) return wrap_error_reporter(P, rule) if __name__ == '__main__': from sys import argv, stdin if len(argv) >= 2: if len(argv) >= 3: f = open(argv[2],'r') else: f = stdin print parse(argv[1], f.read()) else: print 'Args: []'