diff --git a/pyulib/setup.py b/pyulib/setup.py
index 70c905a..0af8d30 100755
--- a/pyulib/setup.py
+++ b/pyulib/setup.py
@@ -88,6 +88,7 @@ addp('ulib.ext.tarfile', ['README.txt'])
addp('ulib.ext.web')
addp('ulib.ext.web.wsgiserver', ['LICENSE.txt'])
addp('ulib.ext.web.contrib')
+addp('ulib.ext.xpath')
addp('ulib.formats')
addp('ulib.gae')
addp('ulib.json')
diff --git a/pyulib/src/ulib/ext/xpath/README.rst b/pyulib/src/ulib/ext/xpath/README.rst
new file mode 100644
index 0000000..0425286
--- /dev/null
+++ b/pyulib/src/ulib/ext/xpath/README.rst
@@ -0,0 +1,297 @@
+:mod:`xpath` --- XPath Queries For DOM Trees
+============================================
+The :mod:`xpath` module is a pure Python implementation of the XPath query
+language, operating on DOM documents. It supports most of XPath 1.0, with
+the following exceptions:
+
+* The namespace axis is not supported.
+* The ``round()`` function rounds toward 0, not towards positive infinity.
+
+The following XPath 2.0 features are supported:
+
+* A default namespace may be supplied in the expression context.
+* Node tests may have a wildcard namespace. (e.g., ``*:name``.)
+
+This module provides the following functions for evaluating XPath expressions:
+
+.. function:: find(expr, node, [\**kwargs])
+
+ Evaluate the XPath expression *expr* with *node* as the context node,
+ and return:
+
+ * ``True`` or ``False``, when the expression has a boolean result.
+ * A :class:`float`, when the expression has an numeric result.
+ * A :class:`unicode`, when the expression has a string result.
+ * A list of :class:`xml.dom.Node`, when the expression has a
+ node-set result.
+
+.. function:: findnode(expr, node, [\**kwargs])
+
+ Evaluate the XPath expression *expr* with *node* as the context node,
+ and return a single node. If the result of the expression is a non-empty
+ node-set, return the first node in the set. If the result is an empty
+ node-set, return ``None``. If the result is not a node-set, raise
+ :exc:`XPathTypeError`.
+
+.. function:: findvalue(expr, node, [\**kwargs])
+
+ Evaluate the XPath expression *expr* with *node* as the context node,
+ and return the string-value of the result. If the result is an empty
+ node-set, return ``None`` instead.
+
+.. function:: findvalues(expr, node, [\**kwargs])
+
+ Evaluate the XPath expression *expr* with *node* as the context node,
+ and return a list of the string-values of the resulting node-set. If
+ the result is not a node-set, raise :exc:`XPathTypeError`.
+
+The above functions take take the following optional keyword arguments
+defining the evaluation context:
+
+*context*
+ A :class:`XPathContext` object containing the evaluation context. It
+ is legal to supply both a context object and additional arguments
+ extending its contents.
+
+*default_namespace*
+ The default namespace URI, which will be used for any unqualified name
+ in the XPath expression.
+
+*namespaces*
+ A mapping of prefixes to namespace URIs.
+
+*variables*
+ A mapping of variable names to values. To map a variable in a specific
+ namespace, use a two element tuple of the (namespace URI, name) as the key.
+
+Additional keyword arguments will be used as variable bindings.
+
+Basic Queries
+-------------
+The examples in this section use this XML document: ::
+
+
+
+
+
+
+Select the ``item`` element in a document: ::
+
+ >>> xpath.find('//item', doc)
+ [, ]
+
+Select the ``name`` attribute of the first item element (note that this returns
+a list of Attr nodes): ::
+
+ >>> xpath.find('//item[1]/@name', doc)
+ []
+
+Select the string-value of the ``name`` attribute of the last item element: ::
+
+ >>> xpath.findvalue('//item[last()]/@name', doc)
+ u'parrot'
+
+Select the first item element with a ``name`` attribute that starts with "p": ::
+
+ >>> xpath.findnode('//item[starts-with(@name,"p")]', doc)
+
+
+Namespaces
+----------
+The examples in this section use this XML document: ::
+
+
+ - python
+ parrot
+
+
+The *namespaces* argument to the evaluation functions provides a dictionary
+of prefixes to namespace URIs. Prefixed QNames in expressions will be
+expanded according to this mapping.
+
+To select the string-values of the ``item`` elements in the
+"\http://circus.example.org/" namespace: ::
+
+ >>> xpath.findvalues('//prefix:item', doc,
+ ... namespaces={'prefix':'http://circus.example.org/'})
+ [u'parrot']
+
+The *default_namespace* argument provides a namespace URI that will be
+used for any unprefixed QName appearing in a position where an element
+name is expected. (Default namespaces are a feature of XPath 2.0.)
+
+To select the string-values of the ``item`` elements in the
+"\http://flying.example.org/" namespace: ::
+
+ >>> xpath.findvalues('//item', doc,
+ ... default_namespace='http://flying.example.org/')
+ [u'python']
+
+When a *default_namespaces* argument is not provided, the default namespace
+is that of the document element. When a *namespaces* argument is not
+provided, the prefix declarations consist of all prefixes defined on the
+document element.
+
+To select the string values of all the ``item`` elements: ::
+
+ >>> xpath.findvalues('//item | //circus:item', doc)
+ [u'python', u'parrot']
+
+The :mod:`xpath` module supports wildcard matches against both the prefix
+and local name. (XPath 1.0 only support wildcard matches against the local
+name; XPath 2.0 adds support for wildcard matches against the prefix.)
+
+To select all children of the document element, regardless of namespace: ::
+
+ >>> xpath.find('/*:*/*:*', doc)
+ [, ]
+
+Variables
+---------
+The examples in this section use this XML document: ::
+
+
+ - python
+ - parrot
+
+
+XPath variables may be passed to the evaluation functions as keyword
+arguments: ::
+
+ >>> xpath.findvalue('//item[@id = $id]', doc, id=2)
+ u'parrot'
+
+It is also possible to pass a dictionary of variables to an evaluation
+function with the *variables* keyword argument: ::
+
+ >>> xpath.findvalue('//item[@id = $id]', doc, variables={'id':1})
+ u'python'
+
+To define a variable within a specific namespace, use a tuple of
+``(namespace-URI, local-name)`` as the key in the variable dictionary: ::
+
+ >>> variables = { ('http://python.example.org/', 'id') : 1 }
+ >>> namespaces = { 'python' : 'http://python.example.org/' }
+ >>> xpath.findvalue('//item[@id = $python:id]', doc,
+ ... variables=variables, namespaces=namespaces)
+ u'python'
+
+Compiled Expression Objects
+---------------------------
+.. class:: XPath(expr)
+
+ An expression object which contains a compiled form of the XPath
+ expression *expr*.
+
+ Under most circumstances, it is not necessary to directly use this class,
+ since the :func:`find` et al. functions cache compiled expressions.
+
+ .. method:: find(node, [\**kwargs])
+ findnode(node, [\**kwargs])
+ findvalue(node, [\**kwargs])
+ findvalues(node, [\**kwargs])
+
+ These methods are identical to the functions of the same name.
+
+Create and use a compiled expression: ::
+
+ >>> expr = xpath.XPath('//text()')
+ >>> print expr
+ /descendant-or-self::node()/child::text()
+ >>> expr.find()
+ []
+
+Expression Context Objects
+--------------------------
+.. class:: XPathContext([document,] [\**kwargs])
+
+ The static context of an XPath expression. Context objects may be
+ created with the same keyword arguments accepted by the expression
+ evaluation functions.
+
+ The *document* argument may contain a DOM node. If provided, the
+ default namespace and namespace declarations will be initialized from
+ the document element of this node.
+
+ The context contains the following attributes and methods:
+
+ .. attribute:: default_namespace
+
+ The default namespace URI.
+
+ .. attribute:: namespaces
+
+ The mapping of prefixes to namespace URIs.
+
+ .. attribute:: variables
+
+ The mapping of variables to values. The keys of this map may
+ be either strings for variables with no namespace, or
+ (namespaceURI, name) tuples for variables contained in a
+ namespace.
+
+ .. method:: find(expr, node, [\**kwargs])
+ findnode(expr, node, [\**kwargs])
+ findvalue(expr, node, [\**kwargs])
+ findvalues(expr, node, [\**kwargs])
+
+ Evaluate *expr* in the context with *node* as the context node.
+ *expr* may be either a string or a :class:`XPath` object.
+
+Create and use an evaluation context: ::
+
+ >>> context = xpath.XPathContext()
+ >>> context.namespaces['py'] = 'http://python.example.org/'
+ >>> context.variables['min'] = 4
+ >>> context.findvalues('//item[@id>=$min and @id<=$max]', doc, max=6)
+ [u'4', u'5', u'6']
+
+Exceptions
+----------
+This module defines the following exceptions:
+
+.. exception:: XPathError
+
+ Base exception class used for all XPath exceptions.
+
+.. exception:: XPathNotImplementedError
+
+ Raised when an XPath expression contains a feature of XPath which
+ has not been implemented.
+
+.. exception:: XPathParseError
+
+ Raised when an XPath expression could not be parsed.
+
+.. exception:: XPathTypeError
+
+ Raised when an XPath expression is found to contain a type error.
+ For example, the expression "string()/node()" contains a type error
+ because the "string()" function does not return a node-set.
+
+.. exception:: XPathUnknownFunctionError
+
+ Raised when an XPath expression contains a function that has no
+ binding in the expression context.
+
+.. exception:: XPathUnknownPrefixError
+
+ Raised when an XPath expression contains a QName with a namespace
+ prefix that has no corresponding namespace declaration in the expression
+ context.
+
+.. exception:: XPathUnknownVariableError
+
+ Raised when an XPath expression contains a variable that has no
+ binding in the expression context.
+
+References
+----------
+.. seealso::
+
+ `XML Path Language (XPath) Version 1.0 `_
+ The W3C recommendation upon which this module is based.
+
+ `XML Path Language (XPath) 2.0 `_
+ Second version of XPath, mostly unsupported by this module.
diff --git a/pyulib/src/ulib/ext/xpath/__init__.py b/pyulib/src/ulib/ext/xpath/__init__.py
new file mode 100644
index 0000000..6cfabb2
--- /dev/null
+++ b/pyulib/src/ulib/ext/xpath/__init__.py
@@ -0,0 +1,23 @@
+import exceptions
+
+from _xpath import api, XPathContext, XPath
+from exceptions import *
+
+__all__ = ['find', 'findnode', 'findvalue', 'findvalues', 'XPathContext', 'XPath']
+__all__.extend((x for x in dir(exceptions) if not x.startswith('_')))
+
+@api
+def find(expr, node, **kwargs):
+ return XPath.get(expr).find(node, **kwargs)
+
+@api
+def findnode(expr, node, **kwargs):
+ return XPath.get(expr).findnode(node, **kwargs)
+
+@api
+def findvalue(expr, node, **kwargs):
+ return XPath.get(expr).findvalue(node, **kwargs)
+
+@api
+def findvalues(expr, node, **kwargs):
+ return XPath.get(expr).findvalues(node, **kwargs)
diff --git a/pyulib/src/ulib/ext/xpath/_xpath.py b/pyulib/src/ulib/ext/xpath/_xpath.py
new file mode 100644
index 0000000..d1c48bc
--- /dev/null
+++ b/pyulib/src/ulib/ext/xpath/_xpath.py
@@ -0,0 +1,143 @@
+import expr as E
+import parser as P
+import yappsrt as Y
+
+from exceptions import *
+
+def api(f):
+ """Decorator for functions and methods that are part of the external
+ module API and that can throw XPathError exceptions.
+
+ The call stack for these exceptions can be very large, and not very
+ interesting to the user. This decorator rethrows XPathErrors to
+ trim the stack.
+
+ """
+ def api_function(*args, **kwargs):
+ try:
+ return f(*args, **kwargs)
+ except XPathError, e:
+ raise e
+ api_function.__name__ = f.__name__
+ api_function.__doc__ = f.__doc__
+ return api_function
+
+class XPathContext(object):
+ def __init__(self, document=None, **kwargs):
+ self.default_namespace = None
+ self.namespaces = {}
+ self.variables = {}
+
+ if document is not None:
+ if document.nodeType != document.DOCUMENT_NODE:
+ document = document.ownerDocument
+ if document.documentElement is not None:
+ attrs = document.documentElement.attributes
+ for attr in (attrs.item(i) for i in xrange(attrs.length)):
+ if attr.name == 'xmlns':
+ self.default_namespace = attr.value
+ elif attr.name.startswith('xmlns:'):
+ self.namespaces[attr.name[6:]] = attr.value
+
+ self.update(**kwargs)
+
+ def clone(self):
+ dup = XPathContext()
+ dup.default_namespace = self.default_namespace
+ dup.namespaces.update(self.namespaces)
+ dup.variables.update(self.variables)
+ return dup
+
+ def update(self, default_namespace=None, namespaces=None,
+ variables=None, **kwargs):
+ if default_namespace is not None:
+ self.default_namespace = default_namespace
+ if namespaces is not None:
+ self.namespaces = namespaces
+ if variables is not None:
+ self.variables = variables
+ self.variables.update(kwargs)
+
+ @api
+ def find(self, expr, node, **kwargs):
+ return XPath.get(expr).find(node, context=self, **kwargs)
+
+ @api
+ def findnode(self, expr, node, **kwargs):
+ return XPath.get(expr).findnode(node, context=self, **kwargs)
+
+ @api
+ def findvalue(self, expr, node, **kwargs):
+ return XPath.get(expr).findvalue(node, context=self, **kwargs)
+
+ @api
+ def findvalues(self, expr, node, **kwargs):
+ return XPath.get(expr).findvalues(node, context=self, **kwargs)
+
+class XPath():
+ _max_cache = 100
+ _cache = {}
+
+ def __init__(self, expr):
+ """Init docs.
+ """
+ try:
+ parser = P.XPath(P.XPathScanner(str(expr)))
+ self.expr = parser.XPath()
+ except Y.SyntaxError, e:
+ raise XPathParseError(str(expr), e.pos, e.msg)
+
+ @classmethod
+ def get(cls, s):
+ if isinstance(s, cls):
+ return s
+ try:
+ return cls._cache[s]
+ except KeyError:
+ if len(cls._cache) > cls._max_cache:
+ cls._cache.clear()
+ expr = cls(s)
+ cls._cache[s] = expr
+ return expr
+
+ @api
+ def find(self, node, context=None, **kwargs):
+ if context is None:
+ context = XPathContext(node, **kwargs)
+ elif kwargs:
+ context = context.clone()
+ context.update(**kwargs)
+ return self.expr.evaluate(node, 1, 1, context)
+
+ @api
+ def findnode(self, node, context=None, **kwargs):
+ result = self.find(node, context, **kwargs)
+ if not E.nodesetp(result):
+ raise XPathTypeError("expression is not a node-set")
+ if len(result) == 0:
+ return None
+ return result[0]
+
+ @api
+ def findvalue(self, node, context=None, **kwargs):
+ result = self.find(node, context, **kwargs)
+ if E.nodesetp(result):
+ if len(result) == 0:
+ return None
+ result = E.string(result)
+ return result
+
+ @api
+ def findvalues(self, node, context=None, **kwargs):
+ result = self.find(node, context, **kwargs)
+ if not E.nodesetp(result):
+ raise XPathTypeError("expression is not a node-set")
+ return [E.string_value(x) for x in result]
+
+ def __repr__(self):
+ return '%s.%s(%s)' % (self.__class__.__module__,
+ self.__class__.__name__,
+ repr(str(self.expr)))
+
+ def __str__(self):
+ return str(self.expr)
diff --git a/pyulib/src/ulib/ext/xpath/exceptions.py b/pyulib/src/ulib/ext/xpath/exceptions.py
new file mode 100644
index 0000000..1597670
--- /dev/null
+++ b/pyulib/src/ulib/ext/xpath/exceptions.py
@@ -0,0 +1,49 @@
+
+class XPathError(Exception):
+ """Base exception class used for all XPath exceptions."""
+
+class XPathNotImplementedError(XPathError):
+ """Raised when an XPath expression contains a feature of XPath which
+ has not been implemented.
+
+ """
+
+class XPathParseError(XPathError):
+ """Raised when an XPath expression could not be parsed."""
+
+ def __init__(self, expr, pos, message):
+ XPathError.__init__(self)
+ self.expr = expr
+ self.pos = pos
+ self.message = message
+
+ def __str__(self):
+ return ("Syntax error:\n" +
+ self.expr.replace("\n", " ") + "\n" +
+ ("-" * self.pos) + "^")
+
+class XPathTypeError(XPathError):
+ """Raised when an XPath expression is found to contain a type error.
+ For example, the expression "string()/node()" contains a type error
+ because the "string()" function does not return a node-set.
+
+ """
+
+class XPathUnknownFunctionError(XPathError):
+ """Raised when an XPath expression contains a function that has no
+ binding in the expression context.
+
+ """
+
+class XPathUnknownPrefixError(XPathError):
+ """Raised when an XPath expression contains a QName with a namespace
+ prefix that has no corresponding namespace declaration in the expression
+ context.
+
+ """
+
+class XPathUnknownVariableError(XPathError):
+ """Raised when an XPath expression contains a variable that has no
+ binding in the expression context.
+
+ """
diff --git a/pyulib/src/ulib/ext/xpath/expr.py b/pyulib/src/ulib/ext/xpath/expr.py
new file mode 100644
index 0000000..4f7550a
--- /dev/null
+++ b/pyulib/src/ulib/ext/xpath/expr.py
@@ -0,0 +1,896 @@
+from __future__ import division
+from itertools import *
+import math
+import operator
+import re
+import xml.dom
+import weakref
+
+from exceptions import *
+
+
+#
+# Data model functions.
+#
+
+def string_value(node):
+ """Compute the string-value of a node."""
+ if (node.nodeType == node.DOCUMENT_NODE or
+ node.nodeType == node.ELEMENT_NODE):
+ s = u''
+ for n in axes['descendant'](node):
+ if n.nodeType == n.TEXT_NODE:
+ s += n.data
+ return s
+
+ elif node.nodeType == node.ATTRIBUTE_NODE:
+ return node.value
+
+ elif (node.nodeType == node.PROCESSING_INSTRUCTION_NODE or
+ node.nodeType == node.COMMENT_NODE or
+ node.nodeType == node.TEXT_NODE):
+ return node.data
+
+def document_order(node):
+ """Compute a document order value for the node.
+
+ cmp(document_order(a), document_order(b)) will return -1, 0, or 1 if
+ a is before, identical to, or after b in the document respectively.
+
+ We represent document order as a list of sibling indexes. That is,
+ the third child of the document node has an order of [2]. The first
+ child of that node has an order of [2,0].
+
+ Attributes have a sibling index of -1 (coming before all children of
+ their node) and are further ordered by name--e.g., [2,0,-1,'href'].
+
+ """
+
+ # Attributes: parent-order + [-1, attribute-name]
+ if node.nodeType == node.ATTRIBUTE_NODE:
+ order = document_order(node.ownerElement)
+ order.extend((-1, node.name))
+ return order
+
+ # The document root (hopefully): []
+ if node.parentNode is None:
+ return []
+
+ # Determine which child this is of its parent.
+ sibpos = 0
+ sib = node
+ while sib.previousSibling is not None:
+ sibpos += 1
+ sib = sib.previousSibling
+
+ # Order: parent-order + [sibling-position]
+ order = document_order(node.parentNode)
+ order.append(sibpos)
+ return order
+
+#
+# Type functions, operating on the various XPath types.
+#
+# Internally, we use the following representations:
+# nodeset - list of DOM tree nodes in document order
+# string - str or unicode
+# boolean - bool
+# number - int or float
+#
+
+def nodeset(v):
+ """Convert a value to a nodeset."""
+ if not nodesetp(v):
+ raise XPathTypeError, "value is not a node-set"
+ return v
+
+def nodesetp(v):
+ """Return true iff 'v' is a node-set."""
+ if isinstance(v, list):
+ return True
+
+def string(v):
+ """Convert a value to a string."""
+ if nodesetp(v):
+ if not v:
+ return u''
+ return string_value(v[0])
+ elif numberp(v):
+ if v == float('inf'):
+ return u'Infinity'
+ elif v == float('-inf'):
+ return u'-Infinity'
+ elif int(v) == v and v <= 0xffffffff:
+ v = int(v)
+ elif str(v) == 'nan':
+ return u'NaN'
+ return unicode(v)
+ elif booleanp(v):
+ return u'true' if v else u'false'
+ return v
+
+def stringp(v):
+ """Return true iff 'v' is a string."""
+ return isinstance(v, basestring)
+
+def boolean(v):
+ """Convert a value to a boolean."""
+ if nodesetp(v):
+ return len(v) > 0
+ elif numberp(v):
+ if v == 0 or v != v:
+ return False
+ return True
+ elif stringp(v):
+ return v != ''
+ return v
+
+def booleanp(v):
+ """Return true iff 'v' is a boolean."""
+ return isinstance(v, bool)
+
+def number(v):
+ """Convert a value to a number."""
+ if nodesetp(v):
+ v = string(v)
+ try:
+ return float(v)
+ except ValueError:
+ return float('NaN')
+
+def numberp(v):
+ """Return true iff 'v' is a number."""
+ return (not(isinstance(v, bool)) and
+ (isinstance(v, int) or isinstance(v, float)))
+
+class Expr(object):
+ """Abstract base class for XPath expressions."""
+
+ def evaluate(self, node, pos, size, context):
+ """Evaluate the expression.
+
+ The context node, context position, and context size are passed as
+ arguments.
+
+ Returns an XPath value: a nodeset, string, boolean, or number.
+
+ """
+
+class BinaryOperatorExpr(Expr):
+ """Base class for all binary operators."""
+
+ def __init__(self, op, left, right):
+ self.op = op
+ self.left = left
+ self.right = right
+
+ def evaluate(self, node, pos, size, context):
+ # Subclasses either override evaluate() or implement operate().
+ return self.operate(self.left.evaluate(node, pos, size, context),
+ self.right.evaluate(node, pos, size, context))
+
+ def __str__(self):
+ return '(%s %s %s)' % (self.left, self.op, self.right)
+
+class AndExpr(BinaryOperatorExpr):
+ """ and """
+
+ def evaluate(self, node, pos, size, context):
+ # Note that XPath boolean operations short-circuit.
+ return (boolean(self.left.evaluate(node, pos, size, context) and
+ boolean(self.right.evaluate(node, pos, size, context))))
+
+class OrExpr(BinaryOperatorExpr):
+ """ or """
+
+ def evaluate(self, node, pos, size, context):
+ # Note that XPath boolean operations short-circuit.
+ return (boolean(self.left.evaluate(node, pos, size, context) or
+ boolean(self.right.evaluate(node, pos, size, context))))
+
+class EqualityExpr(BinaryOperatorExpr):
+ """ = , != , etc."""
+
+ operators = {
+ '=' : operator.eq,
+ '!=' : operator.ne,
+ '<=' : operator.le,
+ '<' : operator.lt,
+ '>=' : operator.ge,
+ '>' : operator.gt,
+ }
+
+ def operate(self, a, b):
+ if nodesetp(a):
+ for node in a:
+ if self.operate(string_value(node), b):
+ return True
+ return False
+
+ if nodesetp(b):
+ for node in b:
+ if self.operate(a, string_value(node)):
+ return True
+ return False
+
+ if self.op in ('=', '!='):
+ if booleanp(a) or booleanp(b):
+ convert = boolean
+ elif numberp(a) or numberp(b):
+ convert = number
+ else:
+ convert = string
+ else:
+ convert = number
+
+ a, b = convert(a), convert(b)
+ return self.operators[self.op](a, b)
+
+def divop(x, y):
+ try:
+ return x / y
+ except ZeroDivisionError:
+ if x == 0 and y == 0:
+ return float('nan')
+ if x < 0:
+ return float('-inf')
+ return float('inf')
+
+class ArithmeticalExpr(BinaryOperatorExpr):
+ """ + , - , etc."""
+
+ # Note that we must use math.fmod for the correct modulo semantics.
+ operators = {
+ '+' : operator.add,
+ '-' : operator.sub,
+ '*' : operator.mul,
+ 'div' : divop,
+ 'mod' : math.fmod
+ }
+
+ def operate(self, a, b):
+ return self.operators[self.op](number(a), number(b))
+
+class UnionExpr(BinaryOperatorExpr):
+ """ | """
+
+ def operate(self, a, b):
+ if not nodesetp(a) or not nodesetp(b):
+ raise XPathTypeError("union operand is not a node-set")
+
+ # Need to sort the result to preserve document order.
+ return sorted(set(chain(a, b)), key=document_order)
+
+class NegationExpr(Expr):
+ """- """
+
+ def __init__(self, expr):
+ self.expr = expr
+
+ def evaluate(self, node, pos, size, context):
+ return -number(self.expr.evaluate(node, pos, size, context))
+
+ def __str__(self):
+ return '(-%s)' % self.expr
+
+class LiteralExpr(Expr):
+ """Literals--either numbers or strings."""
+
+ def __init__(self, literal):
+ self.literal = literal
+
+ def evaluate(self, node, pos, size, context):
+ return self.literal
+
+ def __str__(self):
+ if stringp(self.literal):
+ if "'" in self.literal:
+ return '"%s"' % self.literal
+ else:
+ return "'%s'" % self.literal
+ return string(self.literal)
+
+class VariableReference(Expr):
+ """Variable references."""
+
+ def __init__(self, prefix, name):
+ self.prefix = prefix
+ self.name = name
+
+ def evaluate(self, node, pos, size, context):
+ try:
+ if self.prefix is not None:
+ try:
+ namespaceURI = context.namespaces[self.prefix]
+ except KeyError:
+ raise XPathUnknownPrefixError(self.prefix)
+ return context.variables[(namespaceURI, self.name)]
+ else:
+ return context.variables[self.name]
+ except KeyError:
+ raise XPathUnknownVariableError(str(self))
+
+ def __str__(self):
+ if self.prefix is None:
+ return '$%s' % self.name
+ else:
+ return '$%s:%s' % (self.prefix, self.name)
+
+class Function(Expr):
+ """Functions."""
+
+ def __init__(self, name, args):
+ self.name = name
+ self.args = args
+ self.evaluate = getattr(self, 'f_%s' % name.replace('-', '_'), None)
+ if self.evaluate is None:
+ raise XPathUnknownFunctionError, 'unknown function "%s()"' % name
+
+ if len(self.args) < self.evaluate.minargs:
+ raise XPathTypeError, 'too few arguments for "%s()"' % name
+ if (self.evaluate.maxargs is not None and
+ len(self.args) > self.evaluate.maxargs):
+ raise XPathTypeError, 'too many arguments for "%s()"' % name
+
+ #
+ # XPath functions are implemented by methods of the Function class.
+ #
+ # A method implementing an XPath function is decorated with the function
+ # decorator, and receives the evaluated function arguments as positional
+ # parameters.
+ #
+
+ def function(minargs, maxargs, implicit=False, first=False, convert=None):
+ """Function decorator.
+
+ minargs -- Minimum number of arguments taken by the function.
+ maxargs -- Maximum number of arguments taken by the function.
+ implicit -- True for functions which operate on a nodeset consisting
+ of the current context node when passed no argument.
+ (e.g., string() and number().)
+ convert -- When non-None, a function used to filter function arguments.
+ """
+ def decorator(f):
+ def new_f(self, node, pos, size, context):
+ if implicit and len(self.args) == 0:
+ args = [[node]]
+ else:
+ args = [x.evaluate(node, pos, size, context)
+ for x in self.args]
+ if first:
+ args[0] = nodeset(args[0])
+ if len(args[0]) > 0:
+ args[0] = args[0][0]
+ else:
+ args[0] = None
+ if convert is not None:
+ args = [convert(x) for x in args]
+ return f(self, node, pos, size, context, *args)
+
+ new_f.minargs = minargs
+ new_f.maxargs = maxargs
+ new_f.__name__ = f.__name__
+ new_f.__doc__ = f.__doc__
+ return new_f
+ return decorator
+
+ # Node Set Functions
+
+ @function(0, 0)
+ def f_last(self, node, pos, size, context):
+ return size
+
+ @function(0, 0)
+ def f_position(self, node, pos, size, context):
+ return pos
+
+ @function(1, 1, convert=nodeset)
+ def f_count(self, node, pos, size, context, nodes):
+ return len(nodes)
+
+ @function(1, 1)
+ def f_id(self, node, pos, size, context, arg):
+ if nodesetp(arg):
+ ids = (string_value(x) for x in arg)
+ else:
+ ids = [string(arg)]
+ if node.nodeType != node.DOCUMENT_NODE:
+ node = node.ownerDocument
+ return list(filter(None, (node.getElementById(id) for id in ids)))
+
+ @function(0, 1, implicit=True, first=True)
+ def f_local_name(self, node, pos, size, context, argnode):
+ if argnode is None:
+ return ''
+ if (argnode.nodeType == argnode.ELEMENT_NODE or
+ argnode.nodeType == argnode.ATTRIBUTE_NODE):
+ return argnode.localName
+ elif argnode.nodeType == argnode.PROCESSING_INSTRUCTION_NODE:
+ return argnode.target
+ return ''
+
+ @function(0, 1, implicit=True, first=True)
+ def f_namespace_uri(self, node, pos, size, context, argnode):
+ if argnode is None:
+ return ''
+ return argnode.namespaceURI
+
+ @function(0, 1, implicit=True, first=True)
+ def f_name(self, node, pos, size, context, argnode):
+ if argnode is None:
+ return ''
+ if argnode.nodeType == argnode.ELEMENT_NODE:
+ return argnode.tagName
+ elif argnode.nodeType == argnode.ATTRIBUTE_NODE:
+ return argnode.name
+ elif argnode.nodeType == argnode.PROCESSING_INSTRUCTION_NODE:
+ return argnode.target
+ return ''
+
+ # String Functions
+
+ @function(0, 1, implicit=True, convert=string)
+ def f_string(self, node, pos, size, context, arg):
+ return arg
+
+ @function(2, None, convert=string)
+ def f_concat(self, node, pos, size, context, *args):
+ return ''.join((x for x in args))
+
+ @function(2, 2, convert=string)
+ def f_starts_with(self, node, pos, size, context, a, b):
+ return a.startswith(b)
+
+ @function(2, 2, convert=string)
+ def f_contains(self, node, pos, size, context, a, b):
+ return b in a
+
+ @function(2, 2, convert=string)
+ def f_substring_before(self, node, pos, size, context, a, b):
+ try:
+ return a[0:a.index(b)]
+ except ValueError:
+ return ''
+
+ @function(2, 2, convert=string)
+ def f_substring_after(self, node, pos, size, context, a, b):
+ try:
+ return a[a.index(b)+len(b):]
+ except ValueError:
+ return ''
+
+ @function(2, 3)
+ def f_substring(self, node, pos, size, context, s, start, count=None):
+ s = string(s)
+ start = round(number(start))
+ if start != start:
+ # Catch NaN
+ return ''
+
+ if count is None:
+ end = len(s) + 1
+ else:
+ end = start + round(number(count))
+ if end != end:
+ # Catch NaN
+ return ''
+ if end > len(s):
+ end = len(s)+1
+
+ if start < 1:
+ start = 1
+ if start > len(s):
+ return ''
+ if end <= start:
+ return ''
+ return s[int(start)-1:int(end)-1]
+
+ @function(0, 1, implicit=True, convert=string)
+ def f_string_length(self, node, pos, size, context, s):
+ return len(s)
+
+ @function(0, 1, implicit=True, convert=string)
+ def f_normalize_space(self, node, pos, size, context, s):
+ return re.sub(r'\s+', ' ', s.strip())
+
+ @function(3, 3, convert=lambda x: unicode(string(x)))
+ def f_translate(self, node, pos, size, context, s, source, target):
+ # str.translate() and unicode.translate() are completely different.
+ # The translate() arguments are coerced to unicode.
+ table = {}
+ for schar, tchar in izip(source, target):
+ schar = ord(schar)
+ if schar not in table:
+ table[schar] = tchar
+ if len(source) > len(target):
+ for schar in source[len(target):]:
+ schar = ord(schar)
+ if schar not in table:
+ table[schar] = None
+ return s.translate(table)
+
+ # Boolean functions
+
+ @function(1, 1, convert=boolean)
+ def f_boolean(self, node, pos, size, context, b):
+ return b
+
+ @function(1, 1, convert=boolean)
+ def f_not(self, node, pos, size, context, b):
+ return not b
+
+ @function(0, 0)
+ def f_true(self, node, pos, size, context):
+ return True
+
+ @function(0, 0)
+ def f_false(self, node, pos, size, context):
+ return False
+
+ @function(1, 1, convert=string)
+ def f_lang(self, node, pos, size, context, s):
+ s = s.lower()
+ for n in axes['ancestor-or-self'](node):
+ if n.nodeType == n.ELEMENT_NODE and n.hasAttribute('xml:lang'):
+ lang = n.getAttribute('xml:lang').lower()
+ if s == lang or lang.startswith(s + u'-'):
+ return True
+ break
+ return False
+
+ # Number functions
+
+ @function(0, 1, implicit=True, convert=number)
+ def f_number(self, node, pos, size, context, n):
+ return n
+
+ @function(1, 1, convert=nodeset)
+ def f_sum(self, node, pos, size, context, nodes):
+ return sum((number(string_value(x)) for x in nodes))
+
+ @function(1, 1, convert=number)
+ def f_floor(self, node, pos, size, context, n):
+ return math.floor(n)
+
+ @function(1, 1, convert=number)
+ def f_ceiling(self, node, pos, size, context, n):
+ return math.ceil(n)
+
+ @function(1, 1, convert=number)
+ def f_round(self, node, pos, size, context, n):
+ # XXX round(-0.0) should be -0.0, not 0.0.
+ # XXX round(-1.5) should be -1.0, not -2.0.
+ return round(n)
+
+ def __str__(self):
+ return '%s(%s)' % (self.name, ', '.join((str(x) for x in self.args)))
+
+#
+# XPath axes.
+#
+
+# Dictionary of all axis functions.
+axes = {}
+
+def axisfn(reverse=False, principal_node_type=xml.dom.Node.ELEMENT_NODE):
+ """Axis function decorator.
+
+ An axis function will take a node as an argument and return a sequence
+ over the nodes along an XPath axis. Axis functions have two extra
+ attributes indicating the axis direction and principal node type.
+ """
+ def decorate(f):
+ f.__name__ = f.__name__.replace('_', '-')
+ f.reverse = reverse
+ f.principal_node_type = principal_node_type
+ return f
+ return decorate
+
+def make_axes():
+ """Define functions to walk each of the possible XPath axes."""
+
+ @axisfn()
+ def child(node):
+ return node.childNodes
+
+ @axisfn()
+ def descendant(node):
+ for child in node.childNodes:
+ for node in descendant_or_self(child):
+ yield node
+
+ @axisfn()
+ def parent(node):
+ if node.parentNode is not None:
+ yield node.parentNode
+
+ @axisfn(reverse=True)
+ def ancestor(node):
+ while node.parentNode is not None:
+ node = node.parentNode
+ yield node
+
+ @axisfn()
+ def following_sibling(node):
+ while node.nextSibling is not None:
+ node = node.nextSibling
+ yield node
+
+ @axisfn(reverse=True)
+ def preceding_sibling(node):
+ while node.previousSibling is not None:
+ node = node.previousSibling
+ yield node
+
+ @axisfn()
+ def following(node):
+ while node is not None:
+ while node.nextSibling is not None:
+ node = node.nextSibling
+ for n in descendant_or_self(node):
+ yield n
+ node = node.parentNode
+
+ @axisfn(reverse=True)
+ def preceding(node):
+ while node is not None:
+ while node.previousSibling is not None:
+ node = node.previousSibling
+ # Could be more efficient here.
+ for n in reversed(list(descendant_or_self(node))):
+ yield n
+ node = node.parentNode
+
+ @axisfn(principal_node_type=xml.dom.Node.ATTRIBUTE_NODE)
+ def attribute(node):
+ if node.attributes is not None:
+ return (node.attributes.item(i)
+ for i in xrange(node.attributes.length))
+ return ()
+
+ @axisfn()
+ def namespace(node):
+ raise XPathNotImplementedError("namespace axis is not implemented")
+
+ @axisfn()
+ def self(node):
+ yield node
+
+ @axisfn()
+ def descendant_or_self(node):
+ yield node
+ for child in node.childNodes:
+ for node in descendant_or_self(child):
+ yield node
+
+ @axisfn(reverse=True)
+ def ancestor_or_self(node):
+ return chain([node], ancestor(node))
+
+ # Place each axis function defined here into the 'axes' dict.
+ for axis in locals().values():
+ axes[axis.__name__] = axis
+
+make_axes()
+
+def merge_into_nodeset(target, source):
+ """Place all the nodes from the source node-set into the target
+ node-set, preserving document order. Both node-sets must be in
+ document order to begin with.
+
+ """
+ if len(target) == 0:
+ target.extend(source)
+ return
+
+ source = [n for n in source if n not in target]
+ if len(source) == 0:
+ return
+
+ # If the last node in the target set comes before the first node in the
+ # source set, then we can just concatenate the sets. Otherwise, we
+ # will need to sort. (We could also check to see if the last node in
+ # the source set comes before the first node in the target set, but this
+ # situation is very unlikely in practice.)
+ if document_order(target[-1]) < document_order(source[0]):
+ target.extend(source)
+ else:
+ target.extend(source)
+ target.sort(key=document_order)
+
+class AbsolutePathExpr(Expr):
+ """Absolute location paths."""
+
+ def __init__(self, path):
+ self.path = path
+
+ def evaluate(self, node, pos, size, context):
+ if node.nodeType != node.DOCUMENT_NODE:
+ node = node.ownerDocument
+ if self.path is None:
+ return [node]
+ return self.path.evaluate(node, 1, 1, context)
+
+ def __str__(self):
+ return '/%s' % (self.path or '')
+
+class PathExpr(Expr):
+ """Location path expressions."""
+
+ def __init__(self, steps):
+ self.steps = steps
+
+ def evaluate(self, node, pos, size, context):
+ # The first step in the path is evaluated in the current context.
+ # If this is the only step in the path, the return value is
+ # unimportant. If there are other steps, however, it must be a
+ # node-set.
+ result = self.steps[0].evaluate(node, pos, size, context)
+ if len(self.steps) > 1 and not nodesetp(result):
+ raise XPathTypeError("path step is not a node-set")
+
+ # Subsequent steps are evaluated for each node in the node-set
+ # resulting from the previous step.
+ for step in self.steps[1:]:
+ aggregate = []
+ for i in xrange(len(result)):
+ nodes = step.evaluate(result[i], i+1, len(result), context)
+ if not nodesetp(nodes):
+ raise XPathTypeError("path step is not a node-set")
+ merge_into_nodeset(aggregate, nodes)
+ result = aggregate
+
+ return result
+
+ def __str__(self):
+ return '/'.join((str(s) for s in self.steps))
+
+class PredicateList(Expr):
+ """A list of predicates.
+
+ Predicates are handled as an expression wrapping the expression
+ filtered by the predicates.
+
+ """
+ def __init__(self, expr, predicates, axis='child'):
+ self.predicates = predicates
+ self.expr = expr
+ self.axis = axes[axis]
+
+ def evaluate(self, node, pos, size, context):
+ result = self.expr.evaluate(node, pos, size, context)
+ if not nodesetp(result):
+ raise XPathTypeError("predicate input is not a node-set")
+
+ if self.axis.reverse:
+ result.reverse()
+
+ for pred in self.predicates:
+ match = []
+ for i, node in izip(count(1), result):
+ r = pred.evaluate(node, i, len(result), context)
+
+ # If a predicate evaluates to a number, select the node
+ # with that position. Otherwise, select nodes for which
+ # the boolean value of the predicate is true.
+ if numberp(r):
+ if r == i:
+ match.append(node)
+ elif boolean(r):
+ match.append(node)
+ result = match
+
+ if self.axis.reverse:
+ result.reverse()
+
+ return result
+
+ def __str__(self):
+ s = str(self.expr)
+ if '/' in s:
+ s = '(%s)' % s
+ return s + ''.join(('[%s]' % x for x in self.predicates))
+
+class AxisStep(Expr):
+ """One step in a location path expression."""
+
+ def __init__(self, axis, test=None, predicates=None):
+ if test is None:
+ test = AnyKindTest()
+ self.axis = axes[axis]
+ self.test = test
+
+ def evaluate(self, node, pos, size, context):
+ match = []
+ for n in self.axis(node):
+ if self.test.match(n, self.axis, context):
+ match.append(n)
+
+ if self.axis.reverse:
+ match.reverse()
+
+ return match
+
+ def __str__(self):
+ return '%s::%s' % (self.axis.__name__, self.test)
+
+#
+# Node tests.
+#
+
+class Test(object):
+ """Abstract base class for node tests."""
+
+ def match(self, node, axis, context):
+ """Return True if 'node' matches the test along 'axis'."""
+
+class NameTest(object):
+ def __init__(self, prefix, localpart):
+ self.prefix = prefix
+ self.localName = localpart
+ if self.prefix == None and self.localName == '*':
+ self.prefix = '*'
+
+ def match(self, node, axis, context):
+ if node.nodeType != axis.principal_node_type:
+ return False
+
+ if self.prefix != '*':
+ namespaceURI = None
+ if self.prefix is not None:
+ try:
+ namespaceURI = context.namespaces[self.prefix]
+ except KeyError:
+ raise XPathUnknownPrefixError(self.prefix)
+ elif axis.principal_node_type == node.ELEMENT_NODE:
+ namespaceURI = context.default_namespace
+ if namespaceURI != node.namespaceURI:
+ return False
+ if self.localName != '*':
+ if self.localName != node.localName:
+ return False
+ return True
+
+ def __str__(self):
+ if self.prefix is not None:
+ return '%s:%s' % (self.prefix, self.localName)
+ else:
+ return self.localName
+
+class PITest(object):
+ def __init__(self, name=None):
+ self.name = name
+
+ def match(self, node, axis, context):
+ return (node.nodeType == node.PROCESSING_INSTRUCTION_NODE and
+ (self.name is None or node.target == self.name))
+
+ def __str__(self):
+ if self.name is None:
+ name = ''
+ elif "'" in self.name:
+ name = '"%s"' % self.name
+ else:
+ name = "'%s'" % self.name
+ return 'processing-instruction(%s)' % name
+
+class CommentTest(object):
+ def match(self, node, axis, context):
+ return node.nodeType == node.COMMENT_NODE
+
+ def __str__(self):
+ return 'comment()'
+
+class TextTest(object):
+ def match(self, node, axis, context):
+ return node.nodeType == node.TEXT_NODE
+
+ def __str__(self):
+ return 'text()'
+
+class AnyKindTest(object):
+ def match(self, node, axis, context):
+ return True
+
+ def __str__(self):
+ return 'node()'
diff --git a/pyulib/src/ulib/ext/xpath/parser.g b/pyulib/src/ulib/ext/xpath/parser.g
new file mode 100644
index 0000000..df75bb1
--- /dev/null
+++ b/pyulib/src/ulib/ext/xpath/parser.g
@@ -0,0 +1,252 @@
+import expr as X
+from yappsrt import *
+
+%%
+
+parser XPath:
+ option: 'no-support-module'
+
+ ignore: r'\s+'
+ token END: r'$'
+
+ token FORWARD_AXIS_NAME:
+ r'child|descendant-or-self|attribute|self|descendant|following-sibling|following|namespace'
+ token REVERSE_AXIS_NAME:
+ r'parent|preceding-sibling|preceding|ancestor-or-self|ancestor'
+
+ # Dire hack here, since yapps2 has only one token of lookahead: NCNAME
+ # does not match when followed by a open paren.
+ token NCNAME: r'[a-zA-Z_][a-zA-Z0-9_\-\.\w]*(?!\()'
+ token FUNCNAME: r'[a-zA-Z_][a-zA-Z0-9_\-\.\w]*'
+
+ token DQUOTE: r'\"(?:[^\"])*\"'
+ token SQUOTE: r"\'(?:[^\'])*\'"
+ token NUMBER: r'((\.[0-9]+)|([0-9]+(\.[0-9]*)?))([eE][\+\-]?[0-9]+)?'
+ token EQ_COMP: r'\!?\='
+ token REL_COMP: r'[\<\>]\=?'
+ token ADD_COMP: r'[\+\-]'
+ token MUL_COMP: r'\*|div|mod'
+
+ rule XPath:
+ Expr END {{ return Expr }}
+
+ rule Expr:
+ OrExpr {{ return OrExpr }}
+
+ rule OrExpr:
+ AndExpr {{ Expr = AndExpr }}
+ (
+ r'or' AndExpr
+ {{ Expr = X.OrExpr('or', Expr, AndExpr) }}
+ )* {{ return Expr }}
+
+ rule AndExpr:
+ EqualityExpr {{ Expr = EqualityExpr }}
+ (
+ r'and' EqualityExpr
+ {{ Expr = X.AndExpr('and', Expr, EqualityExpr) }}
+ )* {{ return Expr }}
+
+ rule EqualityExpr:
+ RelationalExpr {{ Expr = RelationalExpr }}
+ (
+ EQ_COMP
+ RelationalExpr
+ {{ Expr = X.EqualityExpr(EQ_COMP, Expr, RelationalExpr) }}
+ )* {{ return Expr }}
+
+ rule RelationalExpr:
+ AdditiveExpr {{ Expr = AdditiveExpr }}
+ (
+ REL_COMP
+ AdditiveExpr
+ {{ Expr = X.EqualityExpr(REL_COMP, Expr, AdditiveExpr) }}
+ )* {{ return Expr }}
+
+ rule AdditiveExpr:
+ MultiplicativeExpr {{ Expr = MultiplicativeExpr }}
+ (
+ ADD_COMP
+ MultiplicativeExpr
+ {{ Expr = X.ArithmeticalExpr(ADD_COMP, Expr, MultiplicativeExpr) }}
+ )* {{ return Expr }}
+
+ rule MultiplicativeExpr:
+ UnionExpr {{ Expr = UnionExpr }}
+ (
+ MUL_COMP
+ UnionExpr
+ {{ Expr = X.ArithmeticalExpr(MUL_COMP, Expr, UnionExpr) }}
+ )* {{ return Expr }}
+
+ rule UnionExpr:
+ UnaryExpr {{ Expr = UnaryExpr }}
+ (
+ '\|' UnaryExpr
+ {{ Expr = X.UnionExpr('|', Expr, UnaryExpr) }}
+ )* {{ return Expr }}
+
+ rule UnaryExpr:
+ r'\-' ValueExpr {{ return X.NegationExpr(ValueExpr) }}
+ | ValueExpr {{ return ValueExpr }}
+
+ rule ValueExpr:
+ PathExpr {{ return PathExpr }}
+
+ rule PathExpr:
+ r'\/' {{ path = None }}
+ [
+ RelativePathExpr {{ path = RelativePathExpr }}
+ ] {{ return X.AbsolutePathExpr(path) }}
+ | r'\/\/' RelativePathExpr
+ {{ step = X.AxisStep('descendant-or-self') }}
+ {{ RelativePathExpr.steps.insert(0, step) }}
+ {{ return X.AbsolutePathExpr(RelativePathExpr) }}
+ | RelativePathExpr {{ return RelativePathExpr }}
+
+ rule RelativePathExpr:
+ StepExpr {{ steps = [StepExpr] }}
+ (
+ (
+ r'\/'
+ | r'\/\/'
+ {{ steps.append(X.AxisStep('descendant-or-self')) }}
+ )
+ StepExpr {{ steps.append(StepExpr) }}
+ )*
+ {{ return X.PathExpr(steps) }}
+
+ rule StepExpr:
+ AxisStep {{ return AxisStep }}
+ | FilterExpr {{ return FilterExpr }}
+
+ rule AxisStep:
+ (
+ ForwardStep {{ step = ForwardStep }}
+ | ReverseStep {{ step = ReverseStep }}
+ ) {{ expr = X.AxisStep(*step) }}
+ [
+ PredicateList
+ {{ expr = X.PredicateList(expr, PredicateList, step[0]) }}
+ ]
+ {{ return expr }}
+
+ rule ForwardStep:
+ ForwardAxis NodeTest {{ return [ForwardAxis, NodeTest] }}
+ | AbbrevForwardStep {{ return AbbrevForwardStep }}
+
+ rule ForwardAxis:
+ FORWARD_AXIS_NAME r'::' {{ return FORWARD_AXIS_NAME }}
+
+ rule AbbrevForwardStep:
+ {{ axis = 'child' }}
+ [
+ r'@' {{ axis = 'attribute' }}
+ ]
+ NodeTest {{ return [axis, NodeTest] }}
+
+ rule ReverseStep:
+ ReverseAxis NodeTest {{ return [ReverseAxis, NodeTest] }}
+ | AbbrevReverseStep {{ return AbbrevReverseStep }}
+
+ rule ReverseAxis:
+ REVERSE_AXIS_NAME r'::' {{ return REVERSE_AXIS_NAME }}
+
+ rule AbbrevReverseStep:
+ r'\.\.' {{ return ['parent', None] }}
+
+ rule NodeTest:
+ KindTest {{ return KindTest }}
+ | NameTest {{ return NameTest }}
+
+ rule NameTest:
+ # We also support the XPath 2.0 :*.
+ {{ prefix = None }}
+ WildcardOrNCName {{ localpart = WildcardOrNCName }}
+ [
+ r':' WildcardOrNCName {{ prefix = localpart }}
+ {{ localpart = WildcardOrNCName }}
+ ]
+ {{ return X.NameTest(prefix, localpart) }}
+
+ rule WildcardOrNCName:
+ r'\*' {{ return '*' }}
+ | NCNAME {{ return NCNAME }}
+
+ rule FilterExpr:
+ PrimaryExpr
+ [
+ PredicateList
+ {{ PrimaryExpr = X.PredicateList(PrimaryExpr,PredicateList) }}
+ ] {{ return PrimaryExpr }}
+
+ rule PredicateList:
+ Predicate {{ predicates = [Predicate] }}
+ (
+ Predicate {{ predicates.append(Predicate) }}
+ )* {{ return predicates }}
+
+ rule Predicate:
+ r'\[' Expr r'\]' {{ return Expr }}
+
+ rule PrimaryExpr:
+ Literal {{ return X.LiteralExpr(Literal) }}
+ | VariableReference {{ return VariableReference }}
+ | r'\(' Expr r'\)' {{ return Expr }}
+ | ContextItemExpr {{ return ContextItemExpr }}
+ | FunctionCall {{ return FunctionCall }}
+
+ rule VariableReference:
+ r'\$' QName
+ {{ return X.VariableReference(*QName) }}
+
+ rule ContextItemExpr:
+ r'\.' {{ return X.AxisStep('self') }}
+
+ rule FunctionCall:
+ FUNCNAME r'\(' {{ args = [] }}
+ [
+ Expr {{ args.append(Expr) }}
+ (
+ r'\,' Expr {{ args.append(Expr) }}
+ )*
+ ] r'\)' {{ return X.Function(FUNCNAME, args) }}
+
+ rule KindTest:
+ PITest {{ return PITest }}
+ | CommentTest {{ return CommentTest }}
+ | TextTest {{ return TextTest }}
+ | AnyKindTest {{ return AnyKindTest }}
+
+ rule PITest:
+ r'processing-instruction' {{ name = None }}
+ r'\(' [
+ NCNAME {{ name = NCNAME }}
+ | StringLiteral {{ name = StringLiteral }}
+ ] r'\)' {{ return X.PITest(name) }}
+
+ rule CommentTest:
+ r'comment' r'\(' r'\)' {{ return X.CommentTest() }}
+
+ rule TextTest:
+ r'text' r'\(' r'\)' {{ return X.TextTest() }}
+
+ rule AnyKindTest:
+ r'node' r'\(' r'\)' {{ return X.AnyKindTest() }}
+
+ rule Literal:
+ NumericLiteral {{ return NumericLiteral }}
+ | StringLiteral {{ return StringLiteral }}
+
+ rule NumericLiteral:
+ NUMBER {{ return float(NUMBER) }}
+
+ rule StringLiteral:
+ DQUOTE {{ return DQUOTE[1:-1] }}
+ | SQUOTE {{ return SQUOTE[1:-1] }}
+
+ rule QName:
+ NCNAME {{ name = NCNAME }}
+ [
+ r'\:' NCNAME {{ return (name, NCNAME) }}
+ ] {{ return (None, name) }}
diff --git a/pyulib/src/ulib/ext/xpath/parser.py b/pyulib/src/ulib/ext/xpath/parser.py
new file mode 100644
index 0000000..bb673f9
--- /dev/null
+++ b/pyulib/src/ulib/ext/xpath/parser.py
@@ -0,0 +1,420 @@
+import expr as X
+from yappsrt import *
+
+
+from string import *
+import re
+
+class XPathScanner(Scanner):
+ patterns = [
+ ("r'\\:'", re.compile('\\:')),
+ ("r'node'", re.compile('node')),
+ ("r'text'", re.compile('text')),
+ ("r'comment'", re.compile('comment')),
+ ("r'processing-instruction'", re.compile('processing-instruction')),
+ ("r'\\,'", re.compile('\\,')),
+ ("r'\\.'", re.compile('\\.')),
+ ("r'\\$'", re.compile('\\$')),
+ ("r'\\)'", re.compile('\\)')),
+ ("r'\\('", re.compile('\\(')),
+ ("r'\\]'", re.compile('\\]')),
+ ("r'\\['", re.compile('\\[')),
+ ("r'\\*'", re.compile('\\*')),
+ ("r':'", re.compile(':')),
+ ("r'\\.\\.'", re.compile('\\.\\.')),
+ ("r'@'", re.compile('@')),
+ ("r'::'", re.compile('::')),
+ ("r'\\/\\/'", re.compile('\\/\\/')),
+ ("r'\\/'", re.compile('\\/')),
+ ("r'\\-'", re.compile('\\-')),
+ ("'\\|'", re.compile('\\|')),
+ ("r'and'", re.compile('and')),
+ ("r'or'", re.compile('or')),
+ ('\\s+', re.compile('\\s+')),
+ ('END', re.compile('$')),
+ ('FORWARD_AXIS_NAME', re.compile('child|descendant-or-self|attribute|self|descendant|following-sibling|following|namespace')),
+ ('REVERSE_AXIS_NAME', re.compile('parent|preceding-sibling|preceding|ancestor-or-self|ancestor')),
+ ('NCNAME', re.compile('[a-zA-Z_][a-zA-Z0-9_\\-\\.\\w]*(?!\\()')),
+ ('FUNCNAME', re.compile('[a-zA-Z_][a-zA-Z0-9_\\-\\.\\w]*')),
+ ('DQUOTE', re.compile('\\"(?:[^\\"])*\\"')),
+ ('SQUOTE', re.compile("\\'(?:[^\\'])*\\'")),
+ ('NUMBER', re.compile('((\\.[0-9]+)|([0-9]+(\\.[0-9]*)?))([eE][\\+\\-]?[0-9]+)?')),
+ ('EQ_COMP', re.compile('\\!?\\=')),
+ ('REL_COMP', re.compile('[\\<\\>]\\=?')),
+ ('ADD_COMP', re.compile('[\\+\\-]')),
+ ('MUL_COMP', re.compile('\\*|div|mod')),
+ ]
+ def __init__(self, str):
+ Scanner.__init__(self,None,['\\s+'],str)
+
+class XPath(Parser):
+ def XPath(self):
+ Expr = self.Expr()
+ END = self._scan('END')
+ return Expr
+
+ def Expr(self):
+ OrExpr = self.OrExpr()
+ return OrExpr
+
+ def OrExpr(self):
+ AndExpr = self.AndExpr()
+ Expr = AndExpr
+ while self._peek("r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'or'":
+ self._scan("r'or'")
+ AndExpr = self.AndExpr()
+ Expr = X.OrExpr('or', Expr, AndExpr)
+ return Expr
+
+ def AndExpr(self):
+ EqualityExpr = self.EqualityExpr()
+ Expr = EqualityExpr
+ while self._peek("r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'and'":
+ self._scan("r'and'")
+ EqualityExpr = self.EqualityExpr()
+ Expr = X.AndExpr('and', Expr, EqualityExpr)
+ return Expr
+
+ def EqualityExpr(self):
+ RelationalExpr = self.RelationalExpr()
+ Expr = RelationalExpr
+ while self._peek('EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'EQ_COMP':
+ EQ_COMP = self._scan('EQ_COMP')
+ RelationalExpr = self.RelationalExpr()
+ Expr = X.EqualityExpr(EQ_COMP, Expr, RelationalExpr)
+ return Expr
+
+ def RelationalExpr(self):
+ AdditiveExpr = self.AdditiveExpr()
+ Expr = AdditiveExpr
+ while self._peek('REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'REL_COMP':
+ REL_COMP = self._scan('REL_COMP')
+ AdditiveExpr = self.AdditiveExpr()
+ Expr = X.EqualityExpr(REL_COMP, Expr, AdditiveExpr)
+ return Expr
+
+ def AdditiveExpr(self):
+ MultiplicativeExpr = self.MultiplicativeExpr()
+ Expr = MultiplicativeExpr
+ while self._peek('ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'ADD_COMP':
+ ADD_COMP = self._scan('ADD_COMP')
+ MultiplicativeExpr = self.MultiplicativeExpr()
+ Expr = X.ArithmeticalExpr(ADD_COMP, Expr, MultiplicativeExpr)
+ return Expr
+
+ def MultiplicativeExpr(self):
+ UnionExpr = self.UnionExpr()
+ Expr = UnionExpr
+ while self._peek('MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'MUL_COMP':
+ MUL_COMP = self._scan('MUL_COMP')
+ UnionExpr = self.UnionExpr()
+ Expr = X.ArithmeticalExpr(MUL_COMP, Expr, UnionExpr)
+ return Expr
+
+ def UnionExpr(self):
+ UnaryExpr = self.UnaryExpr()
+ Expr = UnaryExpr
+ while self._peek("'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "'\\|'":
+ self._scan("'\\|'")
+ UnaryExpr = self.UnaryExpr()
+ Expr = X.UnionExpr('|', Expr, UnaryExpr)
+ return Expr
+
+ def UnaryExpr(self):
+ _token_ = self._peek("r'\\-'", "r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
+ if _token_ == "r'\\-'":
+ self._scan("r'\\-'")
+ ValueExpr = self.ValueExpr()
+ return X.NegationExpr(ValueExpr)
+ else:
+ ValueExpr = self.ValueExpr()
+ return ValueExpr
+
+ def ValueExpr(self):
+ PathExpr = self.PathExpr()
+ return PathExpr
+
+ def PathExpr(self):
+ _token_ = self._peek("r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
+ if _token_ == "r'\\/'":
+ self._scan("r'\\/'")
+ path = None
+ if self._peek("r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME', "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") not in ["'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'"]:
+ RelativePathExpr = self.RelativePathExpr()
+ path = RelativePathExpr
+ return X.AbsolutePathExpr(path)
+ elif _token_ == "r'\\/\\/'":
+ self._scan("r'\\/\\/'")
+ RelativePathExpr = self.RelativePathExpr()
+ step = X.AxisStep('descendant-or-self')
+ RelativePathExpr.steps.insert(0, step)
+ return X.AbsolutePathExpr(RelativePathExpr)
+ else:
+ RelativePathExpr = self.RelativePathExpr()
+ return RelativePathExpr
+
+ def RelativePathExpr(self):
+ StepExpr = self.StepExpr()
+ steps = [StepExpr]
+ while self._peek("r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") in ["r'\\/'", "r'\\/\\/'"]:
+ _token_ = self._peek("r'\\/'", "r'\\/\\/'")
+ if _token_ == "r'\\/'":
+ self._scan("r'\\/'")
+ else:# == "r'\\/\\/'"
+ self._scan("r'\\/\\/'")
+ steps.append(X.AxisStep('descendant-or-self'))
+ StepExpr = self.StepExpr()
+ steps.append(StepExpr)
+ return X.PathExpr(steps)
+
+ def StepExpr(self):
+ _token_ = self._peek("r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
+ if _token_ not in ["r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE']:
+ AxisStep = self.AxisStep()
+ return AxisStep
+ else:
+ FilterExpr = self.FilterExpr()
+ return FilterExpr
+
+ def AxisStep(self):
+ _token_ = self._peek('FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
+ if _token_ not in ['REVERSE_AXIS_NAME', "r'\\.\\.'"]:
+ ForwardStep = self.ForwardStep()
+ step = ForwardStep
+ else:# in ['REVERSE_AXIS_NAME', "r'\\.\\.'"]
+ ReverseStep = self.ReverseStep()
+ step = ReverseStep
+ expr = X.AxisStep(*step)
+ if self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['":
+ PredicateList = self.PredicateList()
+ expr = X.PredicateList(expr, PredicateList, step[0])
+ return expr
+
+ def ForwardStep(self):
+ _token_ = self._peek('FORWARD_AXIS_NAME', "r'@'", "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
+ if _token_ == 'FORWARD_AXIS_NAME':
+ ForwardAxis = self.ForwardAxis()
+ NodeTest = self.NodeTest()
+ return [ForwardAxis, NodeTest]
+ else:
+ AbbrevForwardStep = self.AbbrevForwardStep()
+ return AbbrevForwardStep
+
+ def ForwardAxis(self):
+ FORWARD_AXIS_NAME = self._scan('FORWARD_AXIS_NAME')
+ self._scan("r'::'")
+ return FORWARD_AXIS_NAME
+
+ def AbbrevForwardStep(self):
+ axis = 'child'
+ if self._peek("r'@'", "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME') == "r'@'":
+ self._scan("r'@'")
+ axis = 'attribute'
+ NodeTest = self.NodeTest()
+ return [axis, NodeTest]
+
+ def ReverseStep(self):
+ _token_ = self._peek('REVERSE_AXIS_NAME', "r'\\.\\.'")
+ if _token_ == 'REVERSE_AXIS_NAME':
+ ReverseAxis = self.ReverseAxis()
+ NodeTest = self.NodeTest()
+ return [ReverseAxis, NodeTest]
+ else:# == "r'\\.\\.'"
+ AbbrevReverseStep = self.AbbrevReverseStep()
+ return AbbrevReverseStep
+
+ def ReverseAxis(self):
+ REVERSE_AXIS_NAME = self._scan('REVERSE_AXIS_NAME')
+ self._scan("r'::'")
+ return REVERSE_AXIS_NAME
+
+ def AbbrevReverseStep(self):
+ self._scan("r'\\.\\.'")
+ return ['parent', None]
+
+ def NodeTest(self):
+ _token_ = self._peek("r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
+ if _token_ not in ["r'\\*'", 'NCNAME']:
+ KindTest = self.KindTest()
+ return KindTest
+ else:# in ["r'\\*'", 'NCNAME']
+ NameTest = self.NameTest()
+ return NameTest
+
+ def NameTest(self):
+ prefix = None
+ WildcardOrNCName = self.WildcardOrNCName()
+ localpart = WildcardOrNCName
+ if self._peek("r':'", "r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r':'":
+ self._scan("r':'")
+ WildcardOrNCName = self.WildcardOrNCName()
+ prefix = localpart
+ localpart = WildcardOrNCName
+ return X.NameTest(prefix, localpart)
+
+ def WildcardOrNCName(self):
+ _token_ = self._peek("r'\\*'", 'NCNAME')
+ if _token_ == "r'\\*'":
+ self._scan("r'\\*'")
+ return '*'
+ else:# == 'NCNAME'
+ NCNAME = self._scan('NCNAME')
+ return NCNAME
+
+ def FilterExpr(self):
+ PrimaryExpr = self.PrimaryExpr()
+ if self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['":
+ PredicateList = self.PredicateList()
+ PrimaryExpr = X.PredicateList(PrimaryExpr,PredicateList)
+ return PrimaryExpr
+
+ def PredicateList(self):
+ Predicate = self.Predicate()
+ predicates = [Predicate]
+ while self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['":
+ Predicate = self.Predicate()
+ predicates.append(Predicate)
+ return predicates
+
+ def Predicate(self):
+ self._scan("r'\\['")
+ Expr = self.Expr()
+ self._scan("r'\\]'")
+ return Expr
+
+ def PrimaryExpr(self):
+ _token_ = self._peek("r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE')
+ if _token_ not in ["r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME']:
+ Literal = self.Literal()
+ return X.LiteralExpr(Literal)
+ elif _token_ == "r'\\$'":
+ VariableReference = self.VariableReference()
+ return VariableReference
+ elif _token_ == "r'\\('":
+ self._scan("r'\\('")
+ Expr = self.Expr()
+ self._scan("r'\\)'")
+ return Expr
+ elif _token_ == "r'\\.'":
+ ContextItemExpr = self.ContextItemExpr()
+ return ContextItemExpr
+ else:# == 'FUNCNAME'
+ FunctionCall = self.FunctionCall()
+ return FunctionCall
+
+ def VariableReference(self):
+ self._scan("r'\\$'")
+ QName = self.QName()
+ return X.VariableReference(*QName)
+
+ def ContextItemExpr(self):
+ self._scan("r'\\.'")
+ return X.AxisStep('self')
+
+ def FunctionCall(self):
+ FUNCNAME = self._scan('FUNCNAME')
+ self._scan("r'\\('")
+ args = []
+ if self._peek("r'\\,'", "r'\\)'", "r'\\-'", "r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME') not in ["r'\\,'", "r'\\)'"]:
+ Expr = self.Expr()
+ args.append(Expr)
+ while self._peek("r'\\,'", "r'\\)'") == "r'\\,'":
+ self._scan("r'\\,'")
+ Expr = self.Expr()
+ args.append(Expr)
+ self._scan("r'\\)'")
+ return X.Function(FUNCNAME, args)
+
+ def KindTest(self):
+ _token_ = self._peek("r'processing-instruction'", "r'comment'", "r'text'", "r'node'")
+ if _token_ == "r'processing-instruction'":
+ PITest = self.PITest()
+ return PITest
+ elif _token_ == "r'comment'":
+ CommentTest = self.CommentTest()
+ return CommentTest
+ elif _token_ == "r'text'":
+ TextTest = self.TextTest()
+ return TextTest
+ else:# == "r'node'"
+ AnyKindTest = self.AnyKindTest()
+ return AnyKindTest
+
+ def PITest(self):
+ self._scan("r'processing-instruction'")
+ name = None
+ self._scan("r'\\('")
+ if self._peek('NCNAME', "r'\\)'", 'DQUOTE', 'SQUOTE') != "r'\\)'":
+ _token_ = self._peek('NCNAME', 'DQUOTE', 'SQUOTE')
+ if _token_ == 'NCNAME':
+ NCNAME = self._scan('NCNAME')
+ name = NCNAME
+ else:# in ['DQUOTE', 'SQUOTE']
+ StringLiteral = self.StringLiteral()
+ name = StringLiteral
+ self._scan("r'\\)'")
+ return X.PITest(name)
+
+ def CommentTest(self):
+ self._scan("r'comment'")
+ self._scan("r'\\('")
+ self._scan("r'\\)'")
+ return X.CommentTest()
+
+ def TextTest(self):
+ self._scan("r'text'")
+ self._scan("r'\\('")
+ self._scan("r'\\)'")
+ return X.TextTest()
+
+ def AnyKindTest(self):
+ self._scan("r'node'")
+ self._scan("r'\\('")
+ self._scan("r'\\)'")
+ return X.AnyKindTest()
+
+ def Literal(self):
+ _token_ = self._peek('NUMBER', 'DQUOTE', 'SQUOTE')
+ if _token_ == 'NUMBER':
+ NumericLiteral = self.NumericLiteral()
+ return NumericLiteral
+ else:# in ['DQUOTE', 'SQUOTE']
+ StringLiteral = self.StringLiteral()
+ return StringLiteral
+
+ def NumericLiteral(self):
+ NUMBER = self._scan('NUMBER')
+ return float(NUMBER)
+
+ def StringLiteral(self):
+ _token_ = self._peek('DQUOTE', 'SQUOTE')
+ if _token_ == 'DQUOTE':
+ DQUOTE = self._scan('DQUOTE')
+ return DQUOTE[1:-1]
+ else:# == 'SQUOTE'
+ SQUOTE = self._scan('SQUOTE')
+ return SQUOTE[1:-1]
+
+ def QName(self):
+ NCNAME = self._scan('NCNAME')
+ name = NCNAME
+ if self._peek("r'\\:'", "r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\:'":
+ self._scan("r'\\:'")
+ NCNAME = self._scan('NCNAME')
+ return (name, NCNAME)
+ return (None, name)
+
+
+def parse(rule, text):
+ P = XPath(XPathScanner(text))
+ return wrap_error_reporter(P, rule)
+
+if __name__ == '__main__':
+ from sys import argv, stdin
+ if len(argv) >= 2:
+ if len(argv) >= 3:
+ f = open(argv[2],'r')
+ else:
+ f = stdin
+ print parse(argv[1], f.read())
+ else: print 'Args: []'
diff --git a/pyulib/src/ulib/ext/xpath/yappsrt.py b/pyulib/src/ulib/ext/xpath/yappsrt.py
new file mode 100644
index 0000000..c8d8933
--- /dev/null
+++ b/pyulib/src/ulib/ext/xpath/yappsrt.py
@@ -0,0 +1,174 @@
+# Yapps 2.0 Runtime
+#
+# This module is needed to run generated parsers.
+
+from string import join, count, find, rfind
+import re
+
+class SyntaxError(Exception):
+ """When we run into an unexpected token, this is the exception to use"""
+ def __init__(self, pos=-1, msg="Bad Token"):
+ Exception.__init__(self)
+ self.pos = pos
+ self.msg = msg
+ def __repr__(self):
+ if self.pos < 0: return "#"
+ else: return "SyntaxError[@ char %s: %s]" % (repr(self.pos), self.msg)
+
+class NoMoreTokens(Exception):
+ """Another exception object, for when we run out of tokens"""
+ pass
+
+class Scanner:
+ def __init__(self, patterns, ignore, input):
+ """Patterns is [(terminal,regex)...]
+ Ignore is [terminal,...];
+ Input is a string"""
+ self.tokens = []
+ self.restrictions = []
+ self.input = input
+ self.pos = 0
+ self.ignore = ignore
+ # The stored patterns are a pair (compiled regex,source
+ # regex). If the patterns variable passed in to the
+ # constructor is None, we assume that the class already has a
+ # proper .patterns list constructed
+ if patterns is not None:
+ self.patterns = []
+ for k, r in patterns:
+ self.patterns.append( (k, re.compile(r)) )
+
+ def token(self, i, restrict=0):
+ """Get the i'th token, and if i is one past the end, then scan
+ for another token; restrict is a list of tokens that
+ are allowed, or 0 for any token."""
+ if i == len(self.tokens): self.scan(restrict)
+ if i < len(self.tokens):
+ # Make sure the restriction is more restricted
+ if restrict and self.restrictions[i]:
+ for r in restrict:
+ if r not in self.restrictions[i]:
+ raise NotImplementedError("Unimplemented: restriction set changed")
+ return self.tokens[i]
+ raise NoMoreTokens()
+
+ def __repr__(self):
+ """Print the last 10 tokens that have been scanned in"""
+ output = ''
+ for t in self.tokens[-10:]:
+ output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],repr(t[3]))
+ return output
+
+ def scan(self, restrict):
+ """Should scan another token and add it to the list, self.tokens,
+ and add the restriction to self.restrictions"""
+ # Keep looking for a token, ignoring any in self.ignore
+ while 1:
+ # Search the patterns for the longest match, with earlier
+ # tokens in the list having preference
+ best_match = -1
+ best_pat = '(error)'
+ for p, regexp in self.patterns:
+ # First check to see if we're ignoring this token
+ if restrict and p not in restrict and p not in self.ignore:
+ continue
+ m = regexp.match(self.input, self.pos)
+ if m and len(m.group(0)) > best_match:
+ # We got a match that's better than the previous one
+ best_pat = p
+ best_match = len(m.group(0))
+
+ # If we didn't find anything, raise an error
+ if best_pat == '(error)' and best_match < 0:
+ msg = "Bad Token"
+ if restrict:
+ msg = "Trying to find one of "+join(restrict,", ")
+ raise SyntaxError(self.pos, msg)
+
+ # If we found something that isn't to be ignored, return it
+ if best_pat not in self.ignore:
+ # Create a token with this data
+ token = (self.pos, self.pos+best_match, best_pat,
+ self.input[self.pos:self.pos+best_match])
+ self.pos = self.pos + best_match
+ # Only add this token if it's not in the list
+ # (to prevent looping)
+ if not self.tokens or token != self.tokens[-1]:
+ self.tokens.append(token)
+ self.restrictions.append(restrict)
+ return
+ else:
+ # This token should be ignored ..
+ self.pos = self.pos + best_match
+
+class Parser:
+ def __init__(self, scanner):
+ self._scanner = scanner
+ self._pos = 0
+
+ def _peek(self, *types):
+ """Returns the token type for lookahead; if there are any args
+ then the list of args is the set of token types to allow"""
+ tok = self._scanner.token(self._pos, types)
+ return tok[2]
+
+ def _scan(self, type):
+ """Returns the matched text, and moves to the next token"""
+ tok = self._scanner.token(self._pos, [type])
+ if tok[2] != type:
+ raise SyntaxError(tok[0], 'Trying to find '+type)
+ self._pos = 1+self._pos
+ return tok[3]
+
+
+
+def print_error(input, err, scanner):
+ """This is a really dumb long function to print error messages nicely."""
+ p = err.pos
+ # Figure out the line number
+ line = count(input[:p], '\n')
+ print err.msg+" on line "+repr(line+1)+":"
+ # Now try printing part of the line
+ text = input[max(p-80, 0):p+80]
+ p = p - max(p-80, 0)
+
+ # Strip to the left
+ i = rfind(text[:p], '\n')
+ j = rfind(text[:p], '\r')
+ if i < 0 or (0 <= j < i): i = j
+ if 0 <= i < p:
+ p = p - i - 1
+ text = text[i+1:]
+
+ # Strip to the right
+ i = find(text,'\n', p)
+ j = find(text,'\r', p)
+ if i < 0 or (0 <= j < i): i = j
+ if i >= 0:
+ text = text[:i]
+
+ # Now shorten the text
+ while len(text) > 70 and p > 60:
+ # Cut off 10 chars
+ text = "..." + text[10:]
+ p = p - 7
+
+ # Now print the string, along with an indicator
+ print '> ',text
+ print '> ',' '*p + '^'
+ print 'List of nearby tokens:', scanner
+
+def wrap_error_reporter(parser, rule):
+ return_value = None
+ try:
+ return_value = getattr(parser, rule)()
+ except SyntaxError, s:
+ input = parser._scanner.input
+ try:
+ print_error(input, s, parser._scanner)
+ except ImportError:
+ print 'Syntax Error',s.msg,'on line',1+count(input[:s.pos], '\n')
+ except NoMoreTokens:
+ print 'Could not complete parsing; stopped around here:'
+ print parser._scanner
+ return return_value