importation modification initiale de py-dom-xpath
This commit is contained in:
parent
e9b455de2a
commit
10dc28f319
|
@ -88,6 +88,7 @@ addp('ulib.ext.tarfile', ['README.txt'])
|
||||||
addp('ulib.ext.web')
|
addp('ulib.ext.web')
|
||||||
addp('ulib.ext.web.wsgiserver', ['LICENSE.txt'])
|
addp('ulib.ext.web.wsgiserver', ['LICENSE.txt'])
|
||||||
addp('ulib.ext.web.contrib')
|
addp('ulib.ext.web.contrib')
|
||||||
|
addp('ulib.ext.xpath')
|
||||||
addp('ulib.formats')
|
addp('ulib.formats')
|
||||||
addp('ulib.gae')
|
addp('ulib.gae')
|
||||||
addp('ulib.json')
|
addp('ulib.json')
|
||||||
|
|
|
@ -0,0 +1,297 @@
|
||||||
|
:mod:`xpath` --- XPath Queries For DOM Trees
|
||||||
|
============================================
|
||||||
|
The :mod:`xpath` module is a pure Python implementation of the XPath query
|
||||||
|
language, operating on DOM documents. It supports most of XPath 1.0, with
|
||||||
|
the following exceptions:
|
||||||
|
|
||||||
|
* The namespace axis is not supported.
|
||||||
|
* The ``round()`` function rounds toward 0, not towards positive infinity.
|
||||||
|
|
||||||
|
The following XPath 2.0 features are supported:
|
||||||
|
|
||||||
|
* A default namespace may be supplied in the expression context.
|
||||||
|
* Node tests may have a wildcard namespace. (e.g., ``*:name``.)
|
||||||
|
|
||||||
|
This module provides the following functions for evaluating XPath expressions:
|
||||||
|
|
||||||
|
.. function:: find(expr, node, [\**kwargs])
|
||||||
|
|
||||||
|
Evaluate the XPath expression *expr* with *node* as the context node,
|
||||||
|
and return:
|
||||||
|
|
||||||
|
* ``True`` or ``False``, when the expression has a boolean result.
|
||||||
|
* A :class:`float`, when the expression has an numeric result.
|
||||||
|
* A :class:`unicode`, when the expression has a string result.
|
||||||
|
* A list of :class:`xml.dom.Node`, when the expression has a
|
||||||
|
node-set result.
|
||||||
|
|
||||||
|
.. function:: findnode(expr, node, [\**kwargs])
|
||||||
|
|
||||||
|
Evaluate the XPath expression *expr* with *node* as the context node,
|
||||||
|
and return a single node. If the result of the expression is a non-empty
|
||||||
|
node-set, return the first node in the set. If the result is an empty
|
||||||
|
node-set, return ``None``. If the result is not a node-set, raise
|
||||||
|
:exc:`XPathTypeError`.
|
||||||
|
|
||||||
|
.. function:: findvalue(expr, node, [\**kwargs])
|
||||||
|
|
||||||
|
Evaluate the XPath expression *expr* with *node* as the context node,
|
||||||
|
and return the string-value of the result. If the result is an empty
|
||||||
|
node-set, return ``None`` instead.
|
||||||
|
|
||||||
|
.. function:: findvalues(expr, node, [\**kwargs])
|
||||||
|
|
||||||
|
Evaluate the XPath expression *expr* with *node* as the context node,
|
||||||
|
and return a list of the string-values of the resulting node-set. If
|
||||||
|
the result is not a node-set, raise :exc:`XPathTypeError`.
|
||||||
|
|
||||||
|
The above functions take take the following optional keyword arguments
|
||||||
|
defining the evaluation context:
|
||||||
|
|
||||||
|
*context*
|
||||||
|
A :class:`XPathContext` object containing the evaluation context. It
|
||||||
|
is legal to supply both a context object and additional arguments
|
||||||
|
extending its contents.
|
||||||
|
|
||||||
|
*default_namespace*
|
||||||
|
The default namespace URI, which will be used for any unqualified name
|
||||||
|
in the XPath expression.
|
||||||
|
|
||||||
|
*namespaces*
|
||||||
|
A mapping of prefixes to namespace URIs.
|
||||||
|
|
||||||
|
*variables*
|
||||||
|
A mapping of variable names to values. To map a variable in a specific
|
||||||
|
namespace, use a two element tuple of the (namespace URI, name) as the key.
|
||||||
|
|
||||||
|
Additional keyword arguments will be used as variable bindings.
|
||||||
|
|
||||||
|
Basic Queries
|
||||||
|
-------------
|
||||||
|
The examples in this section use this XML document: ::
|
||||||
|
|
||||||
|
<doc>
|
||||||
|
<item name="python" />
|
||||||
|
<item name="parrot" />
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
Select the ``item`` element in a document: ::
|
||||||
|
|
||||||
|
>>> xpath.find('//item', doc)
|
||||||
|
[<DOM Element: item at 0x474468>, <DOM Element: item at 0x27d7d8>]
|
||||||
|
|
||||||
|
Select the ``name`` attribute of the first item element (note that this returns
|
||||||
|
a list of Attr nodes): ::
|
||||||
|
|
||||||
|
>>> xpath.find('//item[1]/@name', doc)
|
||||||
|
[<xml.dom.minidom.Attr instance at 0x474300>]
|
||||||
|
|
||||||
|
Select the string-value of the ``name`` attribute of the last item element: ::
|
||||||
|
|
||||||
|
>>> xpath.findvalue('//item[last()]/@name', doc)
|
||||||
|
u'parrot'
|
||||||
|
|
||||||
|
Select the first item element with a ``name`` attribute that starts with "p": ::
|
||||||
|
|
||||||
|
>>> xpath.findnode('//item[starts-with(@name,"p")]', doc)
|
||||||
|
<DOM Element: item at 0x474468>
|
||||||
|
|
||||||
|
Namespaces
|
||||||
|
----------
|
||||||
|
The examples in this section use this XML document: ::
|
||||||
|
|
||||||
|
<doc xmlns="http://flying.example.org/"
|
||||||
|
xmlns:circus="http://circus.example.org/">
|
||||||
|
<item>python</item>
|
||||||
|
<circus:item>parrot</circus:item>
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
The *namespaces* argument to the evaluation functions provides a dictionary
|
||||||
|
of prefixes to namespace URIs. Prefixed QNames in expressions will be
|
||||||
|
expanded according to this mapping.
|
||||||
|
|
||||||
|
To select the string-values of the ``item`` elements in the
|
||||||
|
"\http://circus.example.org/" namespace: ::
|
||||||
|
|
||||||
|
>>> xpath.findvalues('//prefix:item', doc,
|
||||||
|
... namespaces={'prefix':'http://circus.example.org/'})
|
||||||
|
[u'parrot']
|
||||||
|
|
||||||
|
The *default_namespace* argument provides a namespace URI that will be
|
||||||
|
used for any unprefixed QName appearing in a position where an element
|
||||||
|
name is expected. (Default namespaces are a feature of XPath 2.0.)
|
||||||
|
|
||||||
|
To select the string-values of the ``item`` elements in the
|
||||||
|
"\http://flying.example.org/" namespace: ::
|
||||||
|
|
||||||
|
>>> xpath.findvalues('//item', doc,
|
||||||
|
... default_namespace='http://flying.example.org/')
|
||||||
|
[u'python']
|
||||||
|
|
||||||
|
When a *default_namespaces* argument is not provided, the default namespace
|
||||||
|
is that of the document element. When a *namespaces* argument is not
|
||||||
|
provided, the prefix declarations consist of all prefixes defined on the
|
||||||
|
document element.
|
||||||
|
|
||||||
|
To select the string values of all the ``item`` elements: ::
|
||||||
|
|
||||||
|
>>> xpath.findvalues('//item | //circus:item', doc)
|
||||||
|
[u'python', u'parrot']
|
||||||
|
|
||||||
|
The :mod:`xpath` module supports wildcard matches against both the prefix
|
||||||
|
and local name. (XPath 1.0 only support wildcard matches against the local
|
||||||
|
name; XPath 2.0 adds support for wildcard matches against the prefix.)
|
||||||
|
|
||||||
|
To select all children of the document element, regardless of namespace: ::
|
||||||
|
|
||||||
|
>>> xpath.find('/*:*/*:*', doc)
|
||||||
|
[<DOM Element: item at 0x474d00>, <DOM Element: circus:item at 0x4743a0>]
|
||||||
|
|
||||||
|
Variables
|
||||||
|
---------
|
||||||
|
The examples in this section use this XML document: ::
|
||||||
|
|
||||||
|
<doc>
|
||||||
|
<item id="1">python</item>
|
||||||
|
<item id="2">parrot</item>
|
||||||
|
</doc>
|
||||||
|
|
||||||
|
XPath variables may be passed to the evaluation functions as keyword
|
||||||
|
arguments: ::
|
||||||
|
|
||||||
|
>>> xpath.findvalue('//item[@id = $id]', doc, id=2)
|
||||||
|
u'parrot'
|
||||||
|
|
||||||
|
It is also possible to pass a dictionary of variables to an evaluation
|
||||||
|
function with the *variables* keyword argument: ::
|
||||||
|
|
||||||
|
>>> xpath.findvalue('//item[@id = $id]', doc, variables={'id':1})
|
||||||
|
u'python'
|
||||||
|
|
||||||
|
To define a variable within a specific namespace, use a tuple of
|
||||||
|
``(namespace-URI, local-name)`` as the key in the variable dictionary: ::
|
||||||
|
|
||||||
|
>>> variables = { ('http://python.example.org/', 'id') : 1 }
|
||||||
|
>>> namespaces = { 'python' : 'http://python.example.org/' }
|
||||||
|
>>> xpath.findvalue('//item[@id = $python:id]', doc,
|
||||||
|
... variables=variables, namespaces=namespaces)
|
||||||
|
u'python'
|
||||||
|
|
||||||
|
Compiled Expression Objects
|
||||||
|
---------------------------
|
||||||
|
.. class:: XPath(expr)
|
||||||
|
|
||||||
|
An expression object which contains a compiled form of the XPath
|
||||||
|
expression *expr*.
|
||||||
|
|
||||||
|
Under most circumstances, it is not necessary to directly use this class,
|
||||||
|
since the :func:`find` et al. functions cache compiled expressions.
|
||||||
|
|
||||||
|
.. method:: find(node, [\**kwargs])
|
||||||
|
findnode(node, [\**kwargs])
|
||||||
|
findvalue(node, [\**kwargs])
|
||||||
|
findvalues(node, [\**kwargs])
|
||||||
|
|
||||||
|
These methods are identical to the functions of the same name.
|
||||||
|
|
||||||
|
Create and use a compiled expression: ::
|
||||||
|
|
||||||
|
>>> expr = xpath.XPath('//text()')
|
||||||
|
>>> print expr
|
||||||
|
/descendant-or-self::node()/child::text()
|
||||||
|
>>> expr.find()
|
||||||
|
[<DOM Text node "Monty">]
|
||||||
|
|
||||||
|
Expression Context Objects
|
||||||
|
--------------------------
|
||||||
|
.. class:: XPathContext([document,] [\**kwargs])
|
||||||
|
|
||||||
|
The static context of an XPath expression. Context objects may be
|
||||||
|
created with the same keyword arguments accepted by the expression
|
||||||
|
evaluation functions.
|
||||||
|
|
||||||
|
The *document* argument may contain a DOM node. If provided, the
|
||||||
|
default namespace and namespace declarations will be initialized from
|
||||||
|
the document element of this node.
|
||||||
|
|
||||||
|
The context contains the following attributes and methods:
|
||||||
|
|
||||||
|
.. attribute:: default_namespace
|
||||||
|
|
||||||
|
The default namespace URI.
|
||||||
|
|
||||||
|
.. attribute:: namespaces
|
||||||
|
|
||||||
|
The mapping of prefixes to namespace URIs.
|
||||||
|
|
||||||
|
.. attribute:: variables
|
||||||
|
|
||||||
|
The mapping of variables to values. The keys of this map may
|
||||||
|
be either strings for variables with no namespace, or
|
||||||
|
(namespaceURI, name) tuples for variables contained in a
|
||||||
|
namespace.
|
||||||
|
|
||||||
|
.. method:: find(expr, node, [\**kwargs])
|
||||||
|
findnode(expr, node, [\**kwargs])
|
||||||
|
findvalue(expr, node, [\**kwargs])
|
||||||
|
findvalues(expr, node, [\**kwargs])
|
||||||
|
|
||||||
|
Evaluate *expr* in the context with *node* as the context node.
|
||||||
|
*expr* may be either a string or a :class:`XPath` object.
|
||||||
|
|
||||||
|
Create and use an evaluation context: ::
|
||||||
|
|
||||||
|
>>> context = xpath.XPathContext()
|
||||||
|
>>> context.namespaces['py'] = 'http://python.example.org/'
|
||||||
|
>>> context.variables['min'] = 4
|
||||||
|
>>> context.findvalues('//item[@id>=$min and @id<=$max]', doc, max=6)
|
||||||
|
[u'4', u'5', u'6']
|
||||||
|
|
||||||
|
Exceptions
|
||||||
|
----------
|
||||||
|
This module defines the following exceptions:
|
||||||
|
|
||||||
|
.. exception:: XPathError
|
||||||
|
|
||||||
|
Base exception class used for all XPath exceptions.
|
||||||
|
|
||||||
|
.. exception:: XPathNotImplementedError
|
||||||
|
|
||||||
|
Raised when an XPath expression contains a feature of XPath which
|
||||||
|
has not been implemented.
|
||||||
|
|
||||||
|
.. exception:: XPathParseError
|
||||||
|
|
||||||
|
Raised when an XPath expression could not be parsed.
|
||||||
|
|
||||||
|
.. exception:: XPathTypeError
|
||||||
|
|
||||||
|
Raised when an XPath expression is found to contain a type error.
|
||||||
|
For example, the expression "string()/node()" contains a type error
|
||||||
|
because the "string()" function does not return a node-set.
|
||||||
|
|
||||||
|
.. exception:: XPathUnknownFunctionError
|
||||||
|
|
||||||
|
Raised when an XPath expression contains a function that has no
|
||||||
|
binding in the expression context.
|
||||||
|
|
||||||
|
.. exception:: XPathUnknownPrefixError
|
||||||
|
|
||||||
|
Raised when an XPath expression contains a QName with a namespace
|
||||||
|
prefix that has no corresponding namespace declaration in the expression
|
||||||
|
context.
|
||||||
|
|
||||||
|
.. exception:: XPathUnknownVariableError
|
||||||
|
|
||||||
|
Raised when an XPath expression contains a variable that has no
|
||||||
|
binding in the expression context.
|
||||||
|
|
||||||
|
References
|
||||||
|
----------
|
||||||
|
.. seealso::
|
||||||
|
|
||||||
|
`XML Path Language (XPath) Version 1.0 <http://www.w3.org/TR/xpath>`_
|
||||||
|
The W3C recommendation upon which this module is based.
|
||||||
|
|
||||||
|
`XML Path Language (XPath) 2.0 <http://www.w3.org/TR/xpath20/>`_
|
||||||
|
Second version of XPath, mostly unsupported by this module.
|
|
@ -0,0 +1,23 @@
|
||||||
|
import exceptions
|
||||||
|
|
||||||
|
from _xpath import api, XPathContext, XPath
|
||||||
|
from exceptions import *
|
||||||
|
|
||||||
|
__all__ = ['find', 'findnode', 'findvalue', 'findvalues', 'XPathContext', 'XPath']
|
||||||
|
__all__.extend((x for x in dir(exceptions) if not x.startswith('_')))
|
||||||
|
|
||||||
|
@api
|
||||||
|
def find(expr, node, **kwargs):
|
||||||
|
return XPath.get(expr).find(node, **kwargs)
|
||||||
|
|
||||||
|
@api
|
||||||
|
def findnode(expr, node, **kwargs):
|
||||||
|
return XPath.get(expr).findnode(node, **kwargs)
|
||||||
|
|
||||||
|
@api
|
||||||
|
def findvalue(expr, node, **kwargs):
|
||||||
|
return XPath.get(expr).findvalue(node, **kwargs)
|
||||||
|
|
||||||
|
@api
|
||||||
|
def findvalues(expr, node, **kwargs):
|
||||||
|
return XPath.get(expr).findvalues(node, **kwargs)
|
|
@ -0,0 +1,143 @@
|
||||||
|
import expr as E
|
||||||
|
import parser as P
|
||||||
|
import yappsrt as Y
|
||||||
|
|
||||||
|
from exceptions import *
|
||||||
|
|
||||||
|
def api(f):
|
||||||
|
"""Decorator for functions and methods that are part of the external
|
||||||
|
module API and that can throw XPathError exceptions.
|
||||||
|
|
||||||
|
The call stack for these exceptions can be very large, and not very
|
||||||
|
interesting to the user. This decorator rethrows XPathErrors to
|
||||||
|
trim the stack.
|
||||||
|
|
||||||
|
"""
|
||||||
|
def api_function(*args, **kwargs):
|
||||||
|
try:
|
||||||
|
return f(*args, **kwargs)
|
||||||
|
except XPathError, e:
|
||||||
|
raise e
|
||||||
|
api_function.__name__ = f.__name__
|
||||||
|
api_function.__doc__ = f.__doc__
|
||||||
|
return api_function
|
||||||
|
|
||||||
|
class XPathContext(object):
|
||||||
|
def __init__(self, document=None, **kwargs):
|
||||||
|
self.default_namespace = None
|
||||||
|
self.namespaces = {}
|
||||||
|
self.variables = {}
|
||||||
|
|
||||||
|
if document is not None:
|
||||||
|
if document.nodeType != document.DOCUMENT_NODE:
|
||||||
|
document = document.ownerDocument
|
||||||
|
if document.documentElement is not None:
|
||||||
|
attrs = document.documentElement.attributes
|
||||||
|
for attr in (attrs.item(i) for i in xrange(attrs.length)):
|
||||||
|
if attr.name == 'xmlns':
|
||||||
|
self.default_namespace = attr.value
|
||||||
|
elif attr.name.startswith('xmlns:'):
|
||||||
|
self.namespaces[attr.name[6:]] = attr.value
|
||||||
|
|
||||||
|
self.update(**kwargs)
|
||||||
|
|
||||||
|
def clone(self):
|
||||||
|
dup = XPathContext()
|
||||||
|
dup.default_namespace = self.default_namespace
|
||||||
|
dup.namespaces.update(self.namespaces)
|
||||||
|
dup.variables.update(self.variables)
|
||||||
|
return dup
|
||||||
|
|
||||||
|
def update(self, default_namespace=None, namespaces=None,
|
||||||
|
variables=None, **kwargs):
|
||||||
|
if default_namespace is not None:
|
||||||
|
self.default_namespace = default_namespace
|
||||||
|
if namespaces is not None:
|
||||||
|
self.namespaces = namespaces
|
||||||
|
if variables is not None:
|
||||||
|
self.variables = variables
|
||||||
|
self.variables.update(kwargs)
|
||||||
|
|
||||||
|
@api
|
||||||
|
def find(self, expr, node, **kwargs):
|
||||||
|
return XPath.get(expr).find(node, context=self, **kwargs)
|
||||||
|
|
||||||
|
@api
|
||||||
|
def findnode(self, expr, node, **kwargs):
|
||||||
|
return XPath.get(expr).findnode(node, context=self, **kwargs)
|
||||||
|
|
||||||
|
@api
|
||||||
|
def findvalue(self, expr, node, **kwargs):
|
||||||
|
return XPath.get(expr).findvalue(node, context=self, **kwargs)
|
||||||
|
|
||||||
|
@api
|
||||||
|
def findvalues(self, expr, node, **kwargs):
|
||||||
|
return XPath.get(expr).findvalues(node, context=self, **kwargs)
|
||||||
|
|
||||||
|
class XPath():
|
||||||
|
_max_cache = 100
|
||||||
|
_cache = {}
|
||||||
|
|
||||||
|
def __init__(self, expr):
|
||||||
|
"""Init docs.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
parser = P.XPath(P.XPathScanner(str(expr)))
|
||||||
|
self.expr = parser.XPath()
|
||||||
|
except Y.SyntaxError, e:
|
||||||
|
raise XPathParseError(str(expr), e.pos, e.msg)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get(cls, s):
|
||||||
|
if isinstance(s, cls):
|
||||||
|
return s
|
||||||
|
try:
|
||||||
|
return cls._cache[s]
|
||||||
|
except KeyError:
|
||||||
|
if len(cls._cache) > cls._max_cache:
|
||||||
|
cls._cache.clear()
|
||||||
|
expr = cls(s)
|
||||||
|
cls._cache[s] = expr
|
||||||
|
return expr
|
||||||
|
|
||||||
|
@api
|
||||||
|
def find(self, node, context=None, **kwargs):
|
||||||
|
if context is None:
|
||||||
|
context = XPathContext(node, **kwargs)
|
||||||
|
elif kwargs:
|
||||||
|
context = context.clone()
|
||||||
|
context.update(**kwargs)
|
||||||
|
return self.expr.evaluate(node, 1, 1, context)
|
||||||
|
|
||||||
|
@api
|
||||||
|
def findnode(self, node, context=None, **kwargs):
|
||||||
|
result = self.find(node, context, **kwargs)
|
||||||
|
if not E.nodesetp(result):
|
||||||
|
raise XPathTypeError("expression is not a node-set")
|
||||||
|
if len(result) == 0:
|
||||||
|
return None
|
||||||
|
return result[0]
|
||||||
|
|
||||||
|
@api
|
||||||
|
def findvalue(self, node, context=None, **kwargs):
|
||||||
|
result = self.find(node, context, **kwargs)
|
||||||
|
if E.nodesetp(result):
|
||||||
|
if len(result) == 0:
|
||||||
|
return None
|
||||||
|
result = E.string(result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
@api
|
||||||
|
def findvalues(self, node, context=None, **kwargs):
|
||||||
|
result = self.find(node, context, **kwargs)
|
||||||
|
if not E.nodesetp(result):
|
||||||
|
raise XPathTypeError("expression is not a node-set")
|
||||||
|
return [E.string_value(x) for x in result]
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '%s.%s(%s)' % (self.__class__.__module__,
|
||||||
|
self.__class__.__name__,
|
||||||
|
repr(str(self.expr)))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return str(self.expr)
|
|
@ -0,0 +1,49 @@
|
||||||
|
|
||||||
|
class XPathError(Exception):
|
||||||
|
"""Base exception class used for all XPath exceptions."""
|
||||||
|
|
||||||
|
class XPathNotImplementedError(XPathError):
|
||||||
|
"""Raised when an XPath expression contains a feature of XPath which
|
||||||
|
has not been implemented.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
class XPathParseError(XPathError):
|
||||||
|
"""Raised when an XPath expression could not be parsed."""
|
||||||
|
|
||||||
|
def __init__(self, expr, pos, message):
|
||||||
|
XPathError.__init__(self)
|
||||||
|
self.expr = expr
|
||||||
|
self.pos = pos
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return ("Syntax error:\n" +
|
||||||
|
self.expr.replace("\n", " ") + "\n" +
|
||||||
|
("-" * self.pos) + "^")
|
||||||
|
|
||||||
|
class XPathTypeError(XPathError):
|
||||||
|
"""Raised when an XPath expression is found to contain a type error.
|
||||||
|
For example, the expression "string()/node()" contains a type error
|
||||||
|
because the "string()" function does not return a node-set.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
class XPathUnknownFunctionError(XPathError):
|
||||||
|
"""Raised when an XPath expression contains a function that has no
|
||||||
|
binding in the expression context.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
class XPathUnknownPrefixError(XPathError):
|
||||||
|
"""Raised when an XPath expression contains a QName with a namespace
|
||||||
|
prefix that has no corresponding namespace declaration in the expression
|
||||||
|
context.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
class XPathUnknownVariableError(XPathError):
|
||||||
|
"""Raised when an XPath expression contains a variable that has no
|
||||||
|
binding in the expression context.
|
||||||
|
|
||||||
|
"""
|
|
@ -0,0 +1,896 @@
|
||||||
|
from __future__ import division
|
||||||
|
from itertools import *
|
||||||
|
import math
|
||||||
|
import operator
|
||||||
|
import re
|
||||||
|
import xml.dom
|
||||||
|
import weakref
|
||||||
|
|
||||||
|
from exceptions import *
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Data model functions.
|
||||||
|
#
|
||||||
|
|
||||||
|
def string_value(node):
|
||||||
|
"""Compute the string-value of a node."""
|
||||||
|
if (node.nodeType == node.DOCUMENT_NODE or
|
||||||
|
node.nodeType == node.ELEMENT_NODE):
|
||||||
|
s = u''
|
||||||
|
for n in axes['descendant'](node):
|
||||||
|
if n.nodeType == n.TEXT_NODE:
|
||||||
|
s += n.data
|
||||||
|
return s
|
||||||
|
|
||||||
|
elif node.nodeType == node.ATTRIBUTE_NODE:
|
||||||
|
return node.value
|
||||||
|
|
||||||
|
elif (node.nodeType == node.PROCESSING_INSTRUCTION_NODE or
|
||||||
|
node.nodeType == node.COMMENT_NODE or
|
||||||
|
node.nodeType == node.TEXT_NODE):
|
||||||
|
return node.data
|
||||||
|
|
||||||
|
def document_order(node):
|
||||||
|
"""Compute a document order value for the node.
|
||||||
|
|
||||||
|
cmp(document_order(a), document_order(b)) will return -1, 0, or 1 if
|
||||||
|
a is before, identical to, or after b in the document respectively.
|
||||||
|
|
||||||
|
We represent document order as a list of sibling indexes. That is,
|
||||||
|
the third child of the document node has an order of [2]. The first
|
||||||
|
child of that node has an order of [2,0].
|
||||||
|
|
||||||
|
Attributes have a sibling index of -1 (coming before all children of
|
||||||
|
their node) and are further ordered by name--e.g., [2,0,-1,'href'].
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Attributes: parent-order + [-1, attribute-name]
|
||||||
|
if node.nodeType == node.ATTRIBUTE_NODE:
|
||||||
|
order = document_order(node.ownerElement)
|
||||||
|
order.extend((-1, node.name))
|
||||||
|
return order
|
||||||
|
|
||||||
|
# The document root (hopefully): []
|
||||||
|
if node.parentNode is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Determine which child this is of its parent.
|
||||||
|
sibpos = 0
|
||||||
|
sib = node
|
||||||
|
while sib.previousSibling is not None:
|
||||||
|
sibpos += 1
|
||||||
|
sib = sib.previousSibling
|
||||||
|
|
||||||
|
# Order: parent-order + [sibling-position]
|
||||||
|
order = document_order(node.parentNode)
|
||||||
|
order.append(sibpos)
|
||||||
|
return order
|
||||||
|
|
||||||
|
#
|
||||||
|
# Type functions, operating on the various XPath types.
|
||||||
|
#
|
||||||
|
# Internally, we use the following representations:
|
||||||
|
# nodeset - list of DOM tree nodes in document order
|
||||||
|
# string - str or unicode
|
||||||
|
# boolean - bool
|
||||||
|
# number - int or float
|
||||||
|
#
|
||||||
|
|
||||||
|
def nodeset(v):
|
||||||
|
"""Convert a value to a nodeset."""
|
||||||
|
if not nodesetp(v):
|
||||||
|
raise XPathTypeError, "value is not a node-set"
|
||||||
|
return v
|
||||||
|
|
||||||
|
def nodesetp(v):
|
||||||
|
"""Return true iff 'v' is a node-set."""
|
||||||
|
if isinstance(v, list):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def string(v):
|
||||||
|
"""Convert a value to a string."""
|
||||||
|
if nodesetp(v):
|
||||||
|
if not v:
|
||||||
|
return u''
|
||||||
|
return string_value(v[0])
|
||||||
|
elif numberp(v):
|
||||||
|
if v == float('inf'):
|
||||||
|
return u'Infinity'
|
||||||
|
elif v == float('-inf'):
|
||||||
|
return u'-Infinity'
|
||||||
|
elif int(v) == v and v <= 0xffffffff:
|
||||||
|
v = int(v)
|
||||||
|
elif str(v) == 'nan':
|
||||||
|
return u'NaN'
|
||||||
|
return unicode(v)
|
||||||
|
elif booleanp(v):
|
||||||
|
return u'true' if v else u'false'
|
||||||
|
return v
|
||||||
|
|
||||||
|
def stringp(v):
|
||||||
|
"""Return true iff 'v' is a string."""
|
||||||
|
return isinstance(v, basestring)
|
||||||
|
|
||||||
|
def boolean(v):
|
||||||
|
"""Convert a value to a boolean."""
|
||||||
|
if nodesetp(v):
|
||||||
|
return len(v) > 0
|
||||||
|
elif numberp(v):
|
||||||
|
if v == 0 or v != v:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
elif stringp(v):
|
||||||
|
return v != ''
|
||||||
|
return v
|
||||||
|
|
||||||
|
def booleanp(v):
|
||||||
|
"""Return true iff 'v' is a boolean."""
|
||||||
|
return isinstance(v, bool)
|
||||||
|
|
||||||
|
def number(v):
|
||||||
|
"""Convert a value to a number."""
|
||||||
|
if nodesetp(v):
|
||||||
|
v = string(v)
|
||||||
|
try:
|
||||||
|
return float(v)
|
||||||
|
except ValueError:
|
||||||
|
return float('NaN')
|
||||||
|
|
||||||
|
def numberp(v):
|
||||||
|
"""Return true iff 'v' is a number."""
|
||||||
|
return (not(isinstance(v, bool)) and
|
||||||
|
(isinstance(v, int) or isinstance(v, float)))
|
||||||
|
|
||||||
|
class Expr(object):
|
||||||
|
"""Abstract base class for XPath expressions."""
|
||||||
|
|
||||||
|
def evaluate(self, node, pos, size, context):
|
||||||
|
"""Evaluate the expression.
|
||||||
|
|
||||||
|
The context node, context position, and context size are passed as
|
||||||
|
arguments.
|
||||||
|
|
||||||
|
Returns an XPath value: a nodeset, string, boolean, or number.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
class BinaryOperatorExpr(Expr):
|
||||||
|
"""Base class for all binary operators."""
|
||||||
|
|
||||||
|
def __init__(self, op, left, right):
|
||||||
|
self.op = op
|
||||||
|
self.left = left
|
||||||
|
self.right = right
|
||||||
|
|
||||||
|
def evaluate(self, node, pos, size, context):
|
||||||
|
# Subclasses either override evaluate() or implement operate().
|
||||||
|
return self.operate(self.left.evaluate(node, pos, size, context),
|
||||||
|
self.right.evaluate(node, pos, size, context))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '(%s %s %s)' % (self.left, self.op, self.right)
|
||||||
|
|
||||||
|
class AndExpr(BinaryOperatorExpr):
|
||||||
|
"""<x> and <y>"""
|
||||||
|
|
||||||
|
def evaluate(self, node, pos, size, context):
|
||||||
|
# Note that XPath boolean operations short-circuit.
|
||||||
|
return (boolean(self.left.evaluate(node, pos, size, context) and
|
||||||
|
boolean(self.right.evaluate(node, pos, size, context))))
|
||||||
|
|
||||||
|
class OrExpr(BinaryOperatorExpr):
|
||||||
|
"""<x> or <y>"""
|
||||||
|
|
||||||
|
def evaluate(self, node, pos, size, context):
|
||||||
|
# Note that XPath boolean operations short-circuit.
|
||||||
|
return (boolean(self.left.evaluate(node, pos, size, context) or
|
||||||
|
boolean(self.right.evaluate(node, pos, size, context))))
|
||||||
|
|
||||||
|
class EqualityExpr(BinaryOperatorExpr):
|
||||||
|
"""<x> = <y>, <x> != <y>, etc."""
|
||||||
|
|
||||||
|
operators = {
|
||||||
|
'=' : operator.eq,
|
||||||
|
'!=' : operator.ne,
|
||||||
|
'<=' : operator.le,
|
||||||
|
'<' : operator.lt,
|
||||||
|
'>=' : operator.ge,
|
||||||
|
'>' : operator.gt,
|
||||||
|
}
|
||||||
|
|
||||||
|
def operate(self, a, b):
|
||||||
|
if nodesetp(a):
|
||||||
|
for node in a:
|
||||||
|
if self.operate(string_value(node), b):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
if nodesetp(b):
|
||||||
|
for node in b:
|
||||||
|
if self.operate(a, string_value(node)):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
if self.op in ('=', '!='):
|
||||||
|
if booleanp(a) or booleanp(b):
|
||||||
|
convert = boolean
|
||||||
|
elif numberp(a) or numberp(b):
|
||||||
|
convert = number
|
||||||
|
else:
|
||||||
|
convert = string
|
||||||
|
else:
|
||||||
|
convert = number
|
||||||
|
|
||||||
|
a, b = convert(a), convert(b)
|
||||||
|
return self.operators[self.op](a, b)
|
||||||
|
|
||||||
|
def divop(x, y):
|
||||||
|
try:
|
||||||
|
return x / y
|
||||||
|
except ZeroDivisionError:
|
||||||
|
if x == 0 and y == 0:
|
||||||
|
return float('nan')
|
||||||
|
if x < 0:
|
||||||
|
return float('-inf')
|
||||||
|
return float('inf')
|
||||||
|
|
||||||
|
class ArithmeticalExpr(BinaryOperatorExpr):
|
||||||
|
"""<x> + <y>, <x> - <y>, etc."""
|
||||||
|
|
||||||
|
# Note that we must use math.fmod for the correct modulo semantics.
|
||||||
|
operators = {
|
||||||
|
'+' : operator.add,
|
||||||
|
'-' : operator.sub,
|
||||||
|
'*' : operator.mul,
|
||||||
|
'div' : divop,
|
||||||
|
'mod' : math.fmod
|
||||||
|
}
|
||||||
|
|
||||||
|
def operate(self, a, b):
|
||||||
|
return self.operators[self.op](number(a), number(b))
|
||||||
|
|
||||||
|
class UnionExpr(BinaryOperatorExpr):
|
||||||
|
"""<x> | <y>"""
|
||||||
|
|
||||||
|
def operate(self, a, b):
|
||||||
|
if not nodesetp(a) or not nodesetp(b):
|
||||||
|
raise XPathTypeError("union operand is not a node-set")
|
||||||
|
|
||||||
|
# Need to sort the result to preserve document order.
|
||||||
|
return sorted(set(chain(a, b)), key=document_order)
|
||||||
|
|
||||||
|
class NegationExpr(Expr):
|
||||||
|
"""- <x>"""
|
||||||
|
|
||||||
|
def __init__(self, expr):
|
||||||
|
self.expr = expr
|
||||||
|
|
||||||
|
def evaluate(self, node, pos, size, context):
|
||||||
|
return -number(self.expr.evaluate(node, pos, size, context))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '(-%s)' % self.expr
|
||||||
|
|
||||||
|
class LiteralExpr(Expr):
|
||||||
|
"""Literals--either numbers or strings."""
|
||||||
|
|
||||||
|
def __init__(self, literal):
|
||||||
|
self.literal = literal
|
||||||
|
|
||||||
|
def evaluate(self, node, pos, size, context):
|
||||||
|
return self.literal
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
if stringp(self.literal):
|
||||||
|
if "'" in self.literal:
|
||||||
|
return '"%s"' % self.literal
|
||||||
|
else:
|
||||||
|
return "'%s'" % self.literal
|
||||||
|
return string(self.literal)
|
||||||
|
|
||||||
|
class VariableReference(Expr):
|
||||||
|
"""Variable references."""
|
||||||
|
|
||||||
|
def __init__(self, prefix, name):
|
||||||
|
self.prefix = prefix
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
def evaluate(self, node, pos, size, context):
|
||||||
|
try:
|
||||||
|
if self.prefix is not None:
|
||||||
|
try:
|
||||||
|
namespaceURI = context.namespaces[self.prefix]
|
||||||
|
except KeyError:
|
||||||
|
raise XPathUnknownPrefixError(self.prefix)
|
||||||
|
return context.variables[(namespaceURI, self.name)]
|
||||||
|
else:
|
||||||
|
return context.variables[self.name]
|
||||||
|
except KeyError:
|
||||||
|
raise XPathUnknownVariableError(str(self))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
if self.prefix is None:
|
||||||
|
return '$%s' % self.name
|
||||||
|
else:
|
||||||
|
return '$%s:%s' % (self.prefix, self.name)
|
||||||
|
|
||||||
|
class Function(Expr):
|
||||||
|
"""Functions."""
|
||||||
|
|
||||||
|
def __init__(self, name, args):
|
||||||
|
self.name = name
|
||||||
|
self.args = args
|
||||||
|
self.evaluate = getattr(self, 'f_%s' % name.replace('-', '_'), None)
|
||||||
|
if self.evaluate is None:
|
||||||
|
raise XPathUnknownFunctionError, 'unknown function "%s()"' % name
|
||||||
|
|
||||||
|
if len(self.args) < self.evaluate.minargs:
|
||||||
|
raise XPathTypeError, 'too few arguments for "%s()"' % name
|
||||||
|
if (self.evaluate.maxargs is not None and
|
||||||
|
len(self.args) > self.evaluate.maxargs):
|
||||||
|
raise XPathTypeError, 'too many arguments for "%s()"' % name
|
||||||
|
|
||||||
|
#
|
||||||
|
# XPath functions are implemented by methods of the Function class.
|
||||||
|
#
|
||||||
|
# A method implementing an XPath function is decorated with the function
|
||||||
|
# decorator, and receives the evaluated function arguments as positional
|
||||||
|
# parameters.
|
||||||
|
#
|
||||||
|
|
||||||
|
def function(minargs, maxargs, implicit=False, first=False, convert=None):
|
||||||
|
"""Function decorator.
|
||||||
|
|
||||||
|
minargs -- Minimum number of arguments taken by the function.
|
||||||
|
maxargs -- Maximum number of arguments taken by the function.
|
||||||
|
implicit -- True for functions which operate on a nodeset consisting
|
||||||
|
of the current context node when passed no argument.
|
||||||
|
(e.g., string() and number().)
|
||||||
|
convert -- When non-None, a function used to filter function arguments.
|
||||||
|
"""
|
||||||
|
def decorator(f):
|
||||||
|
def new_f(self, node, pos, size, context):
|
||||||
|
if implicit and len(self.args) == 0:
|
||||||
|
args = [[node]]
|
||||||
|
else:
|
||||||
|
args = [x.evaluate(node, pos, size, context)
|
||||||
|
for x in self.args]
|
||||||
|
if first:
|
||||||
|
args[0] = nodeset(args[0])
|
||||||
|
if len(args[0]) > 0:
|
||||||
|
args[0] = args[0][0]
|
||||||
|
else:
|
||||||
|
args[0] = None
|
||||||
|
if convert is not None:
|
||||||
|
args = [convert(x) for x in args]
|
||||||
|
return f(self, node, pos, size, context, *args)
|
||||||
|
|
||||||
|
new_f.minargs = minargs
|
||||||
|
new_f.maxargs = maxargs
|
||||||
|
new_f.__name__ = f.__name__
|
||||||
|
new_f.__doc__ = f.__doc__
|
||||||
|
return new_f
|
||||||
|
return decorator
|
||||||
|
|
||||||
|
# Node Set Functions
|
||||||
|
|
||||||
|
@function(0, 0)
|
||||||
|
def f_last(self, node, pos, size, context):
|
||||||
|
return size
|
||||||
|
|
||||||
|
@function(0, 0)
|
||||||
|
def f_position(self, node, pos, size, context):
|
||||||
|
return pos
|
||||||
|
|
||||||
|
@function(1, 1, convert=nodeset)
|
||||||
|
def f_count(self, node, pos, size, context, nodes):
|
||||||
|
return len(nodes)
|
||||||
|
|
||||||
|
@function(1, 1)
|
||||||
|
def f_id(self, node, pos, size, context, arg):
|
||||||
|
if nodesetp(arg):
|
||||||
|
ids = (string_value(x) for x in arg)
|
||||||
|
else:
|
||||||
|
ids = [string(arg)]
|
||||||
|
if node.nodeType != node.DOCUMENT_NODE:
|
||||||
|
node = node.ownerDocument
|
||||||
|
return list(filter(None, (node.getElementById(id) for id in ids)))
|
||||||
|
|
||||||
|
@function(0, 1, implicit=True, first=True)
|
||||||
|
def f_local_name(self, node, pos, size, context, argnode):
|
||||||
|
if argnode is None:
|
||||||
|
return ''
|
||||||
|
if (argnode.nodeType == argnode.ELEMENT_NODE or
|
||||||
|
argnode.nodeType == argnode.ATTRIBUTE_NODE):
|
||||||
|
return argnode.localName
|
||||||
|
elif argnode.nodeType == argnode.PROCESSING_INSTRUCTION_NODE:
|
||||||
|
return argnode.target
|
||||||
|
return ''
|
||||||
|
|
||||||
|
@function(0, 1, implicit=True, first=True)
|
||||||
|
def f_namespace_uri(self, node, pos, size, context, argnode):
|
||||||
|
if argnode is None:
|
||||||
|
return ''
|
||||||
|
return argnode.namespaceURI
|
||||||
|
|
||||||
|
@function(0, 1, implicit=True, first=True)
|
||||||
|
def f_name(self, node, pos, size, context, argnode):
|
||||||
|
if argnode is None:
|
||||||
|
return ''
|
||||||
|
if argnode.nodeType == argnode.ELEMENT_NODE:
|
||||||
|
return argnode.tagName
|
||||||
|
elif argnode.nodeType == argnode.ATTRIBUTE_NODE:
|
||||||
|
return argnode.name
|
||||||
|
elif argnode.nodeType == argnode.PROCESSING_INSTRUCTION_NODE:
|
||||||
|
return argnode.target
|
||||||
|
return ''
|
||||||
|
|
||||||
|
# String Functions
|
||||||
|
|
||||||
|
@function(0, 1, implicit=True, convert=string)
|
||||||
|
def f_string(self, node, pos, size, context, arg):
|
||||||
|
return arg
|
||||||
|
|
||||||
|
@function(2, None, convert=string)
|
||||||
|
def f_concat(self, node, pos, size, context, *args):
|
||||||
|
return ''.join((x for x in args))
|
||||||
|
|
||||||
|
@function(2, 2, convert=string)
|
||||||
|
def f_starts_with(self, node, pos, size, context, a, b):
|
||||||
|
return a.startswith(b)
|
||||||
|
|
||||||
|
@function(2, 2, convert=string)
|
||||||
|
def f_contains(self, node, pos, size, context, a, b):
|
||||||
|
return b in a
|
||||||
|
|
||||||
|
@function(2, 2, convert=string)
|
||||||
|
def f_substring_before(self, node, pos, size, context, a, b):
|
||||||
|
try:
|
||||||
|
return a[0:a.index(b)]
|
||||||
|
except ValueError:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
@function(2, 2, convert=string)
|
||||||
|
def f_substring_after(self, node, pos, size, context, a, b):
|
||||||
|
try:
|
||||||
|
return a[a.index(b)+len(b):]
|
||||||
|
except ValueError:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
@function(2, 3)
|
||||||
|
def f_substring(self, node, pos, size, context, s, start, count=None):
|
||||||
|
s = string(s)
|
||||||
|
start = round(number(start))
|
||||||
|
if start != start:
|
||||||
|
# Catch NaN
|
||||||
|
return ''
|
||||||
|
|
||||||
|
if count is None:
|
||||||
|
end = len(s) + 1
|
||||||
|
else:
|
||||||
|
end = start + round(number(count))
|
||||||
|
if end != end:
|
||||||
|
# Catch NaN
|
||||||
|
return ''
|
||||||
|
if end > len(s):
|
||||||
|
end = len(s)+1
|
||||||
|
|
||||||
|
if start < 1:
|
||||||
|
start = 1
|
||||||
|
if start > len(s):
|
||||||
|
return ''
|
||||||
|
if end <= start:
|
||||||
|
return ''
|
||||||
|
return s[int(start)-1:int(end)-1]
|
||||||
|
|
||||||
|
@function(0, 1, implicit=True, convert=string)
|
||||||
|
def f_string_length(self, node, pos, size, context, s):
|
||||||
|
return len(s)
|
||||||
|
|
||||||
|
@function(0, 1, implicit=True, convert=string)
|
||||||
|
def f_normalize_space(self, node, pos, size, context, s):
|
||||||
|
return re.sub(r'\s+', ' ', s.strip())
|
||||||
|
|
||||||
|
@function(3, 3, convert=lambda x: unicode(string(x)))
|
||||||
|
def f_translate(self, node, pos, size, context, s, source, target):
|
||||||
|
# str.translate() and unicode.translate() are completely different.
|
||||||
|
# The translate() arguments are coerced to unicode.
|
||||||
|
table = {}
|
||||||
|
for schar, tchar in izip(source, target):
|
||||||
|
schar = ord(schar)
|
||||||
|
if schar not in table:
|
||||||
|
table[schar] = tchar
|
||||||
|
if len(source) > len(target):
|
||||||
|
for schar in source[len(target):]:
|
||||||
|
schar = ord(schar)
|
||||||
|
if schar not in table:
|
||||||
|
table[schar] = None
|
||||||
|
return s.translate(table)
|
||||||
|
|
||||||
|
# Boolean functions
|
||||||
|
|
||||||
|
@function(1, 1, convert=boolean)
|
||||||
|
def f_boolean(self, node, pos, size, context, b):
|
||||||
|
return b
|
||||||
|
|
||||||
|
@function(1, 1, convert=boolean)
|
||||||
|
def f_not(self, node, pos, size, context, b):
|
||||||
|
return not b
|
||||||
|
|
||||||
|
@function(0, 0)
|
||||||
|
def f_true(self, node, pos, size, context):
|
||||||
|
return True
|
||||||
|
|
||||||
|
@function(0, 0)
|
||||||
|
def f_false(self, node, pos, size, context):
|
||||||
|
return False
|
||||||
|
|
||||||
|
@function(1, 1, convert=string)
|
||||||
|
def f_lang(self, node, pos, size, context, s):
|
||||||
|
s = s.lower()
|
||||||
|
for n in axes['ancestor-or-self'](node):
|
||||||
|
if n.nodeType == n.ELEMENT_NODE and n.hasAttribute('xml:lang'):
|
||||||
|
lang = n.getAttribute('xml:lang').lower()
|
||||||
|
if s == lang or lang.startswith(s + u'-'):
|
||||||
|
return True
|
||||||
|
break
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Number functions
|
||||||
|
|
||||||
|
@function(0, 1, implicit=True, convert=number)
|
||||||
|
def f_number(self, node, pos, size, context, n):
|
||||||
|
return n
|
||||||
|
|
||||||
|
@function(1, 1, convert=nodeset)
|
||||||
|
def f_sum(self, node, pos, size, context, nodes):
|
||||||
|
return sum((number(string_value(x)) for x in nodes))
|
||||||
|
|
||||||
|
@function(1, 1, convert=number)
|
||||||
|
def f_floor(self, node, pos, size, context, n):
|
||||||
|
return math.floor(n)
|
||||||
|
|
||||||
|
@function(1, 1, convert=number)
|
||||||
|
def f_ceiling(self, node, pos, size, context, n):
|
||||||
|
return math.ceil(n)
|
||||||
|
|
||||||
|
@function(1, 1, convert=number)
|
||||||
|
def f_round(self, node, pos, size, context, n):
|
||||||
|
# XXX round(-0.0) should be -0.0, not 0.0.
|
||||||
|
# XXX round(-1.5) should be -1.0, not -2.0.
|
||||||
|
return round(n)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '%s(%s)' % (self.name, ', '.join((str(x) for x in self.args)))
|
||||||
|
|
||||||
|
#
|
||||||
|
# XPath axes.
|
||||||
|
#
|
||||||
|
|
||||||
|
# Dictionary of all axis functions.
|
||||||
|
axes = {}
|
||||||
|
|
||||||
|
def axisfn(reverse=False, principal_node_type=xml.dom.Node.ELEMENT_NODE):
|
||||||
|
"""Axis function decorator.
|
||||||
|
|
||||||
|
An axis function will take a node as an argument and return a sequence
|
||||||
|
over the nodes along an XPath axis. Axis functions have two extra
|
||||||
|
attributes indicating the axis direction and principal node type.
|
||||||
|
"""
|
||||||
|
def decorate(f):
|
||||||
|
f.__name__ = f.__name__.replace('_', '-')
|
||||||
|
f.reverse = reverse
|
||||||
|
f.principal_node_type = principal_node_type
|
||||||
|
return f
|
||||||
|
return decorate
|
||||||
|
|
||||||
|
def make_axes():
|
||||||
|
"""Define functions to walk each of the possible XPath axes."""
|
||||||
|
|
||||||
|
@axisfn()
|
||||||
|
def child(node):
|
||||||
|
return node.childNodes
|
||||||
|
|
||||||
|
@axisfn()
|
||||||
|
def descendant(node):
|
||||||
|
for child in node.childNodes:
|
||||||
|
for node in descendant_or_self(child):
|
||||||
|
yield node
|
||||||
|
|
||||||
|
@axisfn()
|
||||||
|
def parent(node):
|
||||||
|
if node.parentNode is not None:
|
||||||
|
yield node.parentNode
|
||||||
|
|
||||||
|
@axisfn(reverse=True)
|
||||||
|
def ancestor(node):
|
||||||
|
while node.parentNode is not None:
|
||||||
|
node = node.parentNode
|
||||||
|
yield node
|
||||||
|
|
||||||
|
@axisfn()
|
||||||
|
def following_sibling(node):
|
||||||
|
while node.nextSibling is not None:
|
||||||
|
node = node.nextSibling
|
||||||
|
yield node
|
||||||
|
|
||||||
|
@axisfn(reverse=True)
|
||||||
|
def preceding_sibling(node):
|
||||||
|
while node.previousSibling is not None:
|
||||||
|
node = node.previousSibling
|
||||||
|
yield node
|
||||||
|
|
||||||
|
@axisfn()
|
||||||
|
def following(node):
|
||||||
|
while node is not None:
|
||||||
|
while node.nextSibling is not None:
|
||||||
|
node = node.nextSibling
|
||||||
|
for n in descendant_or_self(node):
|
||||||
|
yield n
|
||||||
|
node = node.parentNode
|
||||||
|
|
||||||
|
@axisfn(reverse=True)
|
||||||
|
def preceding(node):
|
||||||
|
while node is not None:
|
||||||
|
while node.previousSibling is not None:
|
||||||
|
node = node.previousSibling
|
||||||
|
# Could be more efficient here.
|
||||||
|
for n in reversed(list(descendant_or_self(node))):
|
||||||
|
yield n
|
||||||
|
node = node.parentNode
|
||||||
|
|
||||||
|
@axisfn(principal_node_type=xml.dom.Node.ATTRIBUTE_NODE)
|
||||||
|
def attribute(node):
|
||||||
|
if node.attributes is not None:
|
||||||
|
return (node.attributes.item(i)
|
||||||
|
for i in xrange(node.attributes.length))
|
||||||
|
return ()
|
||||||
|
|
||||||
|
@axisfn()
|
||||||
|
def namespace(node):
|
||||||
|
raise XPathNotImplementedError("namespace axis is not implemented")
|
||||||
|
|
||||||
|
@axisfn()
|
||||||
|
def self(node):
|
||||||
|
yield node
|
||||||
|
|
||||||
|
@axisfn()
|
||||||
|
def descendant_or_self(node):
|
||||||
|
yield node
|
||||||
|
for child in node.childNodes:
|
||||||
|
for node in descendant_or_self(child):
|
||||||
|
yield node
|
||||||
|
|
||||||
|
@axisfn(reverse=True)
|
||||||
|
def ancestor_or_self(node):
|
||||||
|
return chain([node], ancestor(node))
|
||||||
|
|
||||||
|
# Place each axis function defined here into the 'axes' dict.
|
||||||
|
for axis in locals().values():
|
||||||
|
axes[axis.__name__] = axis
|
||||||
|
|
||||||
|
make_axes()
|
||||||
|
|
||||||
|
def merge_into_nodeset(target, source):
|
||||||
|
"""Place all the nodes from the source node-set into the target
|
||||||
|
node-set, preserving document order. Both node-sets must be in
|
||||||
|
document order to begin with.
|
||||||
|
|
||||||
|
"""
|
||||||
|
if len(target) == 0:
|
||||||
|
target.extend(source)
|
||||||
|
return
|
||||||
|
|
||||||
|
source = [n for n in source if n not in target]
|
||||||
|
if len(source) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
# If the last node in the target set comes before the first node in the
|
||||||
|
# source set, then we can just concatenate the sets. Otherwise, we
|
||||||
|
# will need to sort. (We could also check to see if the last node in
|
||||||
|
# the source set comes before the first node in the target set, but this
|
||||||
|
# situation is very unlikely in practice.)
|
||||||
|
if document_order(target[-1]) < document_order(source[0]):
|
||||||
|
target.extend(source)
|
||||||
|
else:
|
||||||
|
target.extend(source)
|
||||||
|
target.sort(key=document_order)
|
||||||
|
|
||||||
|
class AbsolutePathExpr(Expr):
|
||||||
|
"""Absolute location paths."""
|
||||||
|
|
||||||
|
def __init__(self, path):
|
||||||
|
self.path = path
|
||||||
|
|
||||||
|
def evaluate(self, node, pos, size, context):
|
||||||
|
if node.nodeType != node.DOCUMENT_NODE:
|
||||||
|
node = node.ownerDocument
|
||||||
|
if self.path is None:
|
||||||
|
return [node]
|
||||||
|
return self.path.evaluate(node, 1, 1, context)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '/%s' % (self.path or '')
|
||||||
|
|
||||||
|
class PathExpr(Expr):
|
||||||
|
"""Location path expressions."""
|
||||||
|
|
||||||
|
def __init__(self, steps):
|
||||||
|
self.steps = steps
|
||||||
|
|
||||||
|
def evaluate(self, node, pos, size, context):
|
||||||
|
# The first step in the path is evaluated in the current context.
|
||||||
|
# If this is the only step in the path, the return value is
|
||||||
|
# unimportant. If there are other steps, however, it must be a
|
||||||
|
# node-set.
|
||||||
|
result = self.steps[0].evaluate(node, pos, size, context)
|
||||||
|
if len(self.steps) > 1 and not nodesetp(result):
|
||||||
|
raise XPathTypeError("path step is not a node-set")
|
||||||
|
|
||||||
|
# Subsequent steps are evaluated for each node in the node-set
|
||||||
|
# resulting from the previous step.
|
||||||
|
for step in self.steps[1:]:
|
||||||
|
aggregate = []
|
||||||
|
for i in xrange(len(result)):
|
||||||
|
nodes = step.evaluate(result[i], i+1, len(result), context)
|
||||||
|
if not nodesetp(nodes):
|
||||||
|
raise XPathTypeError("path step is not a node-set")
|
||||||
|
merge_into_nodeset(aggregate, nodes)
|
||||||
|
result = aggregate
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '/'.join((str(s) for s in self.steps))
|
||||||
|
|
||||||
|
class PredicateList(Expr):
|
||||||
|
"""A list of predicates.
|
||||||
|
|
||||||
|
Predicates are handled as an expression wrapping the expression
|
||||||
|
filtered by the predicates.
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, expr, predicates, axis='child'):
|
||||||
|
self.predicates = predicates
|
||||||
|
self.expr = expr
|
||||||
|
self.axis = axes[axis]
|
||||||
|
|
||||||
|
def evaluate(self, node, pos, size, context):
|
||||||
|
result = self.expr.evaluate(node, pos, size, context)
|
||||||
|
if not nodesetp(result):
|
||||||
|
raise XPathTypeError("predicate input is not a node-set")
|
||||||
|
|
||||||
|
if self.axis.reverse:
|
||||||
|
result.reverse()
|
||||||
|
|
||||||
|
for pred in self.predicates:
|
||||||
|
match = []
|
||||||
|
for i, node in izip(count(1), result):
|
||||||
|
r = pred.evaluate(node, i, len(result), context)
|
||||||
|
|
||||||
|
# If a predicate evaluates to a number, select the node
|
||||||
|
# with that position. Otherwise, select nodes for which
|
||||||
|
# the boolean value of the predicate is true.
|
||||||
|
if numberp(r):
|
||||||
|
if r == i:
|
||||||
|
match.append(node)
|
||||||
|
elif boolean(r):
|
||||||
|
match.append(node)
|
||||||
|
result = match
|
||||||
|
|
||||||
|
if self.axis.reverse:
|
||||||
|
result.reverse()
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
s = str(self.expr)
|
||||||
|
if '/' in s:
|
||||||
|
s = '(%s)' % s
|
||||||
|
return s + ''.join(('[%s]' % x for x in self.predicates))
|
||||||
|
|
||||||
|
class AxisStep(Expr):
|
||||||
|
"""One step in a location path expression."""
|
||||||
|
|
||||||
|
def __init__(self, axis, test=None, predicates=None):
|
||||||
|
if test is None:
|
||||||
|
test = AnyKindTest()
|
||||||
|
self.axis = axes[axis]
|
||||||
|
self.test = test
|
||||||
|
|
||||||
|
def evaluate(self, node, pos, size, context):
|
||||||
|
match = []
|
||||||
|
for n in self.axis(node):
|
||||||
|
if self.test.match(n, self.axis, context):
|
||||||
|
match.append(n)
|
||||||
|
|
||||||
|
if self.axis.reverse:
|
||||||
|
match.reverse()
|
||||||
|
|
||||||
|
return match
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '%s::%s' % (self.axis.__name__, self.test)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Node tests.
|
||||||
|
#
|
||||||
|
|
||||||
|
class Test(object):
|
||||||
|
"""Abstract base class for node tests."""
|
||||||
|
|
||||||
|
def match(self, node, axis, context):
|
||||||
|
"""Return True if 'node' matches the test along 'axis'."""
|
||||||
|
|
||||||
|
class NameTest(object):
|
||||||
|
def __init__(self, prefix, localpart):
|
||||||
|
self.prefix = prefix
|
||||||
|
self.localName = localpart
|
||||||
|
if self.prefix == None and self.localName == '*':
|
||||||
|
self.prefix = '*'
|
||||||
|
|
||||||
|
def match(self, node, axis, context):
|
||||||
|
if node.nodeType != axis.principal_node_type:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if self.prefix != '*':
|
||||||
|
namespaceURI = None
|
||||||
|
if self.prefix is not None:
|
||||||
|
try:
|
||||||
|
namespaceURI = context.namespaces[self.prefix]
|
||||||
|
except KeyError:
|
||||||
|
raise XPathUnknownPrefixError(self.prefix)
|
||||||
|
elif axis.principal_node_type == node.ELEMENT_NODE:
|
||||||
|
namespaceURI = context.default_namespace
|
||||||
|
if namespaceURI != node.namespaceURI:
|
||||||
|
return False
|
||||||
|
if self.localName != '*':
|
||||||
|
if self.localName != node.localName:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
if self.prefix is not None:
|
||||||
|
return '%s:%s' % (self.prefix, self.localName)
|
||||||
|
else:
|
||||||
|
return self.localName
|
||||||
|
|
||||||
|
class PITest(object):
|
||||||
|
def __init__(self, name=None):
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
def match(self, node, axis, context):
|
||||||
|
return (node.nodeType == node.PROCESSING_INSTRUCTION_NODE and
|
||||||
|
(self.name is None or node.target == self.name))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
if self.name is None:
|
||||||
|
name = ''
|
||||||
|
elif "'" in self.name:
|
||||||
|
name = '"%s"' % self.name
|
||||||
|
else:
|
||||||
|
name = "'%s'" % self.name
|
||||||
|
return 'processing-instruction(%s)' % name
|
||||||
|
|
||||||
|
class CommentTest(object):
|
||||||
|
def match(self, node, axis, context):
|
||||||
|
return node.nodeType == node.COMMENT_NODE
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'comment()'
|
||||||
|
|
||||||
|
class TextTest(object):
|
||||||
|
def match(self, node, axis, context):
|
||||||
|
return node.nodeType == node.TEXT_NODE
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'text()'
|
||||||
|
|
||||||
|
class AnyKindTest(object):
|
||||||
|
def match(self, node, axis, context):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'node()'
|
|
@ -0,0 +1,252 @@
|
||||||
|
import expr as X
|
||||||
|
from yappsrt import *
|
||||||
|
|
||||||
|
%%
|
||||||
|
|
||||||
|
parser XPath:
|
||||||
|
option: 'no-support-module'
|
||||||
|
|
||||||
|
ignore: r'\s+'
|
||||||
|
token END: r'$'
|
||||||
|
|
||||||
|
token FORWARD_AXIS_NAME:
|
||||||
|
r'child|descendant-or-self|attribute|self|descendant|following-sibling|following|namespace'
|
||||||
|
token REVERSE_AXIS_NAME:
|
||||||
|
r'parent|preceding-sibling|preceding|ancestor-or-self|ancestor'
|
||||||
|
|
||||||
|
# Dire hack here, since yapps2 has only one token of lookahead: NCNAME
|
||||||
|
# does not match when followed by a open paren.
|
||||||
|
token NCNAME: r'[a-zA-Z_][a-zA-Z0-9_\-\.\w]*(?!\()'
|
||||||
|
token FUNCNAME: r'[a-zA-Z_][a-zA-Z0-9_\-\.\w]*'
|
||||||
|
|
||||||
|
token DQUOTE: r'\"(?:[^\"])*\"'
|
||||||
|
token SQUOTE: r"\'(?:[^\'])*\'"
|
||||||
|
token NUMBER: r'((\.[0-9]+)|([0-9]+(\.[0-9]*)?))([eE][\+\-]?[0-9]+)?'
|
||||||
|
token EQ_COMP: r'\!?\='
|
||||||
|
token REL_COMP: r'[\<\>]\=?'
|
||||||
|
token ADD_COMP: r'[\+\-]'
|
||||||
|
token MUL_COMP: r'\*|div|mod'
|
||||||
|
|
||||||
|
rule XPath:
|
||||||
|
Expr END {{ return Expr }}
|
||||||
|
|
||||||
|
rule Expr:
|
||||||
|
OrExpr {{ return OrExpr }}
|
||||||
|
|
||||||
|
rule OrExpr:
|
||||||
|
AndExpr {{ Expr = AndExpr }}
|
||||||
|
(
|
||||||
|
r'or' AndExpr
|
||||||
|
{{ Expr = X.OrExpr('or', Expr, AndExpr) }}
|
||||||
|
)* {{ return Expr }}
|
||||||
|
|
||||||
|
rule AndExpr:
|
||||||
|
EqualityExpr {{ Expr = EqualityExpr }}
|
||||||
|
(
|
||||||
|
r'and' EqualityExpr
|
||||||
|
{{ Expr = X.AndExpr('and', Expr, EqualityExpr) }}
|
||||||
|
)* {{ return Expr }}
|
||||||
|
|
||||||
|
rule EqualityExpr:
|
||||||
|
RelationalExpr {{ Expr = RelationalExpr }}
|
||||||
|
(
|
||||||
|
EQ_COMP
|
||||||
|
RelationalExpr
|
||||||
|
{{ Expr = X.EqualityExpr(EQ_COMP, Expr, RelationalExpr) }}
|
||||||
|
)* {{ return Expr }}
|
||||||
|
|
||||||
|
rule RelationalExpr:
|
||||||
|
AdditiveExpr {{ Expr = AdditiveExpr }}
|
||||||
|
(
|
||||||
|
REL_COMP
|
||||||
|
AdditiveExpr
|
||||||
|
{{ Expr = X.EqualityExpr(REL_COMP, Expr, AdditiveExpr) }}
|
||||||
|
)* {{ return Expr }}
|
||||||
|
|
||||||
|
rule AdditiveExpr:
|
||||||
|
MultiplicativeExpr {{ Expr = MultiplicativeExpr }}
|
||||||
|
(
|
||||||
|
ADD_COMP
|
||||||
|
MultiplicativeExpr
|
||||||
|
{{ Expr = X.ArithmeticalExpr(ADD_COMP, Expr, MultiplicativeExpr) }}
|
||||||
|
)* {{ return Expr }}
|
||||||
|
|
||||||
|
rule MultiplicativeExpr:
|
||||||
|
UnionExpr {{ Expr = UnionExpr }}
|
||||||
|
(
|
||||||
|
MUL_COMP
|
||||||
|
UnionExpr
|
||||||
|
{{ Expr = X.ArithmeticalExpr(MUL_COMP, Expr, UnionExpr) }}
|
||||||
|
)* {{ return Expr }}
|
||||||
|
|
||||||
|
rule UnionExpr:
|
||||||
|
UnaryExpr {{ Expr = UnaryExpr }}
|
||||||
|
(
|
||||||
|
'\|' UnaryExpr
|
||||||
|
{{ Expr = X.UnionExpr('|', Expr, UnaryExpr) }}
|
||||||
|
)* {{ return Expr }}
|
||||||
|
|
||||||
|
rule UnaryExpr:
|
||||||
|
r'\-' ValueExpr {{ return X.NegationExpr(ValueExpr) }}
|
||||||
|
| ValueExpr {{ return ValueExpr }}
|
||||||
|
|
||||||
|
rule ValueExpr:
|
||||||
|
PathExpr {{ return PathExpr }}
|
||||||
|
|
||||||
|
rule PathExpr:
|
||||||
|
r'\/' {{ path = None }}
|
||||||
|
[
|
||||||
|
RelativePathExpr {{ path = RelativePathExpr }}
|
||||||
|
] {{ return X.AbsolutePathExpr(path) }}
|
||||||
|
| r'\/\/' RelativePathExpr
|
||||||
|
{{ step = X.AxisStep('descendant-or-self') }}
|
||||||
|
{{ RelativePathExpr.steps.insert(0, step) }}
|
||||||
|
{{ return X.AbsolutePathExpr(RelativePathExpr) }}
|
||||||
|
| RelativePathExpr {{ return RelativePathExpr }}
|
||||||
|
|
||||||
|
rule RelativePathExpr:
|
||||||
|
StepExpr {{ steps = [StepExpr] }}
|
||||||
|
(
|
||||||
|
(
|
||||||
|
r'\/'
|
||||||
|
| r'\/\/'
|
||||||
|
{{ steps.append(X.AxisStep('descendant-or-self')) }}
|
||||||
|
)
|
||||||
|
StepExpr {{ steps.append(StepExpr) }}
|
||||||
|
)*
|
||||||
|
{{ return X.PathExpr(steps) }}
|
||||||
|
|
||||||
|
rule StepExpr:
|
||||||
|
AxisStep {{ return AxisStep }}
|
||||||
|
| FilterExpr {{ return FilterExpr }}
|
||||||
|
|
||||||
|
rule AxisStep:
|
||||||
|
(
|
||||||
|
ForwardStep {{ step = ForwardStep }}
|
||||||
|
| ReverseStep {{ step = ReverseStep }}
|
||||||
|
) {{ expr = X.AxisStep(*step) }}
|
||||||
|
[
|
||||||
|
PredicateList
|
||||||
|
{{ expr = X.PredicateList(expr, PredicateList, step[0]) }}
|
||||||
|
]
|
||||||
|
{{ return expr }}
|
||||||
|
|
||||||
|
rule ForwardStep:
|
||||||
|
ForwardAxis NodeTest {{ return [ForwardAxis, NodeTest] }}
|
||||||
|
| AbbrevForwardStep {{ return AbbrevForwardStep }}
|
||||||
|
|
||||||
|
rule ForwardAxis:
|
||||||
|
FORWARD_AXIS_NAME r'::' {{ return FORWARD_AXIS_NAME }}
|
||||||
|
|
||||||
|
rule AbbrevForwardStep:
|
||||||
|
{{ axis = 'child' }}
|
||||||
|
[
|
||||||
|
r'@' {{ axis = 'attribute' }}
|
||||||
|
]
|
||||||
|
NodeTest {{ return [axis, NodeTest] }}
|
||||||
|
|
||||||
|
rule ReverseStep:
|
||||||
|
ReverseAxis NodeTest {{ return [ReverseAxis, NodeTest] }}
|
||||||
|
| AbbrevReverseStep {{ return AbbrevReverseStep }}
|
||||||
|
|
||||||
|
rule ReverseAxis:
|
||||||
|
REVERSE_AXIS_NAME r'::' {{ return REVERSE_AXIS_NAME }}
|
||||||
|
|
||||||
|
rule AbbrevReverseStep:
|
||||||
|
r'\.\.' {{ return ['parent', None] }}
|
||||||
|
|
||||||
|
rule NodeTest:
|
||||||
|
KindTest {{ return KindTest }}
|
||||||
|
| NameTest {{ return NameTest }}
|
||||||
|
|
||||||
|
rule NameTest:
|
||||||
|
# We also support the XPath 2.0 <name>:*.
|
||||||
|
{{ prefix = None }}
|
||||||
|
WildcardOrNCName {{ localpart = WildcardOrNCName }}
|
||||||
|
[
|
||||||
|
r':' WildcardOrNCName {{ prefix = localpart }}
|
||||||
|
{{ localpart = WildcardOrNCName }}
|
||||||
|
]
|
||||||
|
{{ return X.NameTest(prefix, localpart) }}
|
||||||
|
|
||||||
|
rule WildcardOrNCName:
|
||||||
|
r'\*' {{ return '*' }}
|
||||||
|
| NCNAME {{ return NCNAME }}
|
||||||
|
|
||||||
|
rule FilterExpr:
|
||||||
|
PrimaryExpr
|
||||||
|
[
|
||||||
|
PredicateList
|
||||||
|
{{ PrimaryExpr = X.PredicateList(PrimaryExpr,PredicateList) }}
|
||||||
|
] {{ return PrimaryExpr }}
|
||||||
|
|
||||||
|
rule PredicateList:
|
||||||
|
Predicate {{ predicates = [Predicate] }}
|
||||||
|
(
|
||||||
|
Predicate {{ predicates.append(Predicate) }}
|
||||||
|
)* {{ return predicates }}
|
||||||
|
|
||||||
|
rule Predicate:
|
||||||
|
r'\[' Expr r'\]' {{ return Expr }}
|
||||||
|
|
||||||
|
rule PrimaryExpr:
|
||||||
|
Literal {{ return X.LiteralExpr(Literal) }}
|
||||||
|
| VariableReference {{ return VariableReference }}
|
||||||
|
| r'\(' Expr r'\)' {{ return Expr }}
|
||||||
|
| ContextItemExpr {{ return ContextItemExpr }}
|
||||||
|
| FunctionCall {{ return FunctionCall }}
|
||||||
|
|
||||||
|
rule VariableReference:
|
||||||
|
r'\$' QName
|
||||||
|
{{ return X.VariableReference(*QName) }}
|
||||||
|
|
||||||
|
rule ContextItemExpr:
|
||||||
|
r'\.' {{ return X.AxisStep('self') }}
|
||||||
|
|
||||||
|
rule FunctionCall:
|
||||||
|
FUNCNAME r'\(' {{ args = [] }}
|
||||||
|
[
|
||||||
|
Expr {{ args.append(Expr) }}
|
||||||
|
(
|
||||||
|
r'\,' Expr {{ args.append(Expr) }}
|
||||||
|
)*
|
||||||
|
] r'\)' {{ return X.Function(FUNCNAME, args) }}
|
||||||
|
|
||||||
|
rule KindTest:
|
||||||
|
PITest {{ return PITest }}
|
||||||
|
| CommentTest {{ return CommentTest }}
|
||||||
|
| TextTest {{ return TextTest }}
|
||||||
|
| AnyKindTest {{ return AnyKindTest }}
|
||||||
|
|
||||||
|
rule PITest:
|
||||||
|
r'processing-instruction' {{ name = None }}
|
||||||
|
r'\(' [
|
||||||
|
NCNAME {{ name = NCNAME }}
|
||||||
|
| StringLiteral {{ name = StringLiteral }}
|
||||||
|
] r'\)' {{ return X.PITest(name) }}
|
||||||
|
|
||||||
|
rule CommentTest:
|
||||||
|
r'comment' r'\(' r'\)' {{ return X.CommentTest() }}
|
||||||
|
|
||||||
|
rule TextTest:
|
||||||
|
r'text' r'\(' r'\)' {{ return X.TextTest() }}
|
||||||
|
|
||||||
|
rule AnyKindTest:
|
||||||
|
r'node' r'\(' r'\)' {{ return X.AnyKindTest() }}
|
||||||
|
|
||||||
|
rule Literal:
|
||||||
|
NumericLiteral {{ return NumericLiteral }}
|
||||||
|
| StringLiteral {{ return StringLiteral }}
|
||||||
|
|
||||||
|
rule NumericLiteral:
|
||||||
|
NUMBER {{ return float(NUMBER) }}
|
||||||
|
|
||||||
|
rule StringLiteral:
|
||||||
|
DQUOTE {{ return DQUOTE[1:-1] }}
|
||||||
|
| SQUOTE {{ return SQUOTE[1:-1] }}
|
||||||
|
|
||||||
|
rule QName:
|
||||||
|
NCNAME {{ name = NCNAME }}
|
||||||
|
[
|
||||||
|
r'\:' NCNAME {{ return (name, NCNAME) }}
|
||||||
|
] {{ return (None, name) }}
|
|
@ -0,0 +1,420 @@
|
||||||
|
import expr as X
|
||||||
|
from yappsrt import *
|
||||||
|
|
||||||
|
|
||||||
|
from string import *
|
||||||
|
import re
|
||||||
|
|
||||||
|
class XPathScanner(Scanner):
|
||||||
|
patterns = [
|
||||||
|
("r'\\:'", re.compile('\\:')),
|
||||||
|
("r'node'", re.compile('node')),
|
||||||
|
("r'text'", re.compile('text')),
|
||||||
|
("r'comment'", re.compile('comment')),
|
||||||
|
("r'processing-instruction'", re.compile('processing-instruction')),
|
||||||
|
("r'\\,'", re.compile('\\,')),
|
||||||
|
("r'\\.'", re.compile('\\.')),
|
||||||
|
("r'\\$'", re.compile('\\$')),
|
||||||
|
("r'\\)'", re.compile('\\)')),
|
||||||
|
("r'\\('", re.compile('\\(')),
|
||||||
|
("r'\\]'", re.compile('\\]')),
|
||||||
|
("r'\\['", re.compile('\\[')),
|
||||||
|
("r'\\*'", re.compile('\\*')),
|
||||||
|
("r':'", re.compile(':')),
|
||||||
|
("r'\\.\\.'", re.compile('\\.\\.')),
|
||||||
|
("r'@'", re.compile('@')),
|
||||||
|
("r'::'", re.compile('::')),
|
||||||
|
("r'\\/\\/'", re.compile('\\/\\/')),
|
||||||
|
("r'\\/'", re.compile('\\/')),
|
||||||
|
("r'\\-'", re.compile('\\-')),
|
||||||
|
("'\\|'", re.compile('\\|')),
|
||||||
|
("r'and'", re.compile('and')),
|
||||||
|
("r'or'", re.compile('or')),
|
||||||
|
('\\s+', re.compile('\\s+')),
|
||||||
|
('END', re.compile('$')),
|
||||||
|
('FORWARD_AXIS_NAME', re.compile('child|descendant-or-self|attribute|self|descendant|following-sibling|following|namespace')),
|
||||||
|
('REVERSE_AXIS_NAME', re.compile('parent|preceding-sibling|preceding|ancestor-or-self|ancestor')),
|
||||||
|
('NCNAME', re.compile('[a-zA-Z_][a-zA-Z0-9_\\-\\.\\w]*(?!\\()')),
|
||||||
|
('FUNCNAME', re.compile('[a-zA-Z_][a-zA-Z0-9_\\-\\.\\w]*')),
|
||||||
|
('DQUOTE', re.compile('\\"(?:[^\\"])*\\"')),
|
||||||
|
('SQUOTE', re.compile("\\'(?:[^\\'])*\\'")),
|
||||||
|
('NUMBER', re.compile('((\\.[0-9]+)|([0-9]+(\\.[0-9]*)?))([eE][\\+\\-]?[0-9]+)?')),
|
||||||
|
('EQ_COMP', re.compile('\\!?\\=')),
|
||||||
|
('REL_COMP', re.compile('[\\<\\>]\\=?')),
|
||||||
|
('ADD_COMP', re.compile('[\\+\\-]')),
|
||||||
|
('MUL_COMP', re.compile('\\*|div|mod')),
|
||||||
|
]
|
||||||
|
def __init__(self, str):
|
||||||
|
Scanner.__init__(self,None,['\\s+'],str)
|
||||||
|
|
||||||
|
class XPath(Parser):
|
||||||
|
def XPath(self):
|
||||||
|
Expr = self.Expr()
|
||||||
|
END = self._scan('END')
|
||||||
|
return Expr
|
||||||
|
|
||||||
|
def Expr(self):
|
||||||
|
OrExpr = self.OrExpr()
|
||||||
|
return OrExpr
|
||||||
|
|
||||||
|
def OrExpr(self):
|
||||||
|
AndExpr = self.AndExpr()
|
||||||
|
Expr = AndExpr
|
||||||
|
while self._peek("r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'or'":
|
||||||
|
self._scan("r'or'")
|
||||||
|
AndExpr = self.AndExpr()
|
||||||
|
Expr = X.OrExpr('or', Expr, AndExpr)
|
||||||
|
return Expr
|
||||||
|
|
||||||
|
def AndExpr(self):
|
||||||
|
EqualityExpr = self.EqualityExpr()
|
||||||
|
Expr = EqualityExpr
|
||||||
|
while self._peek("r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'and'":
|
||||||
|
self._scan("r'and'")
|
||||||
|
EqualityExpr = self.EqualityExpr()
|
||||||
|
Expr = X.AndExpr('and', Expr, EqualityExpr)
|
||||||
|
return Expr
|
||||||
|
|
||||||
|
def EqualityExpr(self):
|
||||||
|
RelationalExpr = self.RelationalExpr()
|
||||||
|
Expr = RelationalExpr
|
||||||
|
while self._peek('EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'EQ_COMP':
|
||||||
|
EQ_COMP = self._scan('EQ_COMP')
|
||||||
|
RelationalExpr = self.RelationalExpr()
|
||||||
|
Expr = X.EqualityExpr(EQ_COMP, Expr, RelationalExpr)
|
||||||
|
return Expr
|
||||||
|
|
||||||
|
def RelationalExpr(self):
|
||||||
|
AdditiveExpr = self.AdditiveExpr()
|
||||||
|
Expr = AdditiveExpr
|
||||||
|
while self._peek('REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'REL_COMP':
|
||||||
|
REL_COMP = self._scan('REL_COMP')
|
||||||
|
AdditiveExpr = self.AdditiveExpr()
|
||||||
|
Expr = X.EqualityExpr(REL_COMP, Expr, AdditiveExpr)
|
||||||
|
return Expr
|
||||||
|
|
||||||
|
def AdditiveExpr(self):
|
||||||
|
MultiplicativeExpr = self.MultiplicativeExpr()
|
||||||
|
Expr = MultiplicativeExpr
|
||||||
|
while self._peek('ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'ADD_COMP':
|
||||||
|
ADD_COMP = self._scan('ADD_COMP')
|
||||||
|
MultiplicativeExpr = self.MultiplicativeExpr()
|
||||||
|
Expr = X.ArithmeticalExpr(ADD_COMP, Expr, MultiplicativeExpr)
|
||||||
|
return Expr
|
||||||
|
|
||||||
|
def MultiplicativeExpr(self):
|
||||||
|
UnionExpr = self.UnionExpr()
|
||||||
|
Expr = UnionExpr
|
||||||
|
while self._peek('MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'MUL_COMP':
|
||||||
|
MUL_COMP = self._scan('MUL_COMP')
|
||||||
|
UnionExpr = self.UnionExpr()
|
||||||
|
Expr = X.ArithmeticalExpr(MUL_COMP, Expr, UnionExpr)
|
||||||
|
return Expr
|
||||||
|
|
||||||
|
def UnionExpr(self):
|
||||||
|
UnaryExpr = self.UnaryExpr()
|
||||||
|
Expr = UnaryExpr
|
||||||
|
while self._peek("'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "'\\|'":
|
||||||
|
self._scan("'\\|'")
|
||||||
|
UnaryExpr = self.UnaryExpr()
|
||||||
|
Expr = X.UnionExpr('|', Expr, UnaryExpr)
|
||||||
|
return Expr
|
||||||
|
|
||||||
|
def UnaryExpr(self):
|
||||||
|
_token_ = self._peek("r'\\-'", "r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
|
||||||
|
if _token_ == "r'\\-'":
|
||||||
|
self._scan("r'\\-'")
|
||||||
|
ValueExpr = self.ValueExpr()
|
||||||
|
return X.NegationExpr(ValueExpr)
|
||||||
|
else:
|
||||||
|
ValueExpr = self.ValueExpr()
|
||||||
|
return ValueExpr
|
||||||
|
|
||||||
|
def ValueExpr(self):
|
||||||
|
PathExpr = self.PathExpr()
|
||||||
|
return PathExpr
|
||||||
|
|
||||||
|
def PathExpr(self):
|
||||||
|
_token_ = self._peek("r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
|
||||||
|
if _token_ == "r'\\/'":
|
||||||
|
self._scan("r'\\/'")
|
||||||
|
path = None
|
||||||
|
if self._peek("r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME', "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") not in ["'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'"]:
|
||||||
|
RelativePathExpr = self.RelativePathExpr()
|
||||||
|
path = RelativePathExpr
|
||||||
|
return X.AbsolutePathExpr(path)
|
||||||
|
elif _token_ == "r'\\/\\/'":
|
||||||
|
self._scan("r'\\/\\/'")
|
||||||
|
RelativePathExpr = self.RelativePathExpr()
|
||||||
|
step = X.AxisStep('descendant-or-self')
|
||||||
|
RelativePathExpr.steps.insert(0, step)
|
||||||
|
return X.AbsolutePathExpr(RelativePathExpr)
|
||||||
|
else:
|
||||||
|
RelativePathExpr = self.RelativePathExpr()
|
||||||
|
return RelativePathExpr
|
||||||
|
|
||||||
|
def RelativePathExpr(self):
|
||||||
|
StepExpr = self.StepExpr()
|
||||||
|
steps = [StepExpr]
|
||||||
|
while self._peek("r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") in ["r'\\/'", "r'\\/\\/'"]:
|
||||||
|
_token_ = self._peek("r'\\/'", "r'\\/\\/'")
|
||||||
|
if _token_ == "r'\\/'":
|
||||||
|
self._scan("r'\\/'")
|
||||||
|
else:# == "r'\\/\\/'"
|
||||||
|
self._scan("r'\\/\\/'")
|
||||||
|
steps.append(X.AxisStep('descendant-or-self'))
|
||||||
|
StepExpr = self.StepExpr()
|
||||||
|
steps.append(StepExpr)
|
||||||
|
return X.PathExpr(steps)
|
||||||
|
|
||||||
|
def StepExpr(self):
|
||||||
|
_token_ = self._peek("r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
|
||||||
|
if _token_ not in ["r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE']:
|
||||||
|
AxisStep = self.AxisStep()
|
||||||
|
return AxisStep
|
||||||
|
else:
|
||||||
|
FilterExpr = self.FilterExpr()
|
||||||
|
return FilterExpr
|
||||||
|
|
||||||
|
def AxisStep(self):
|
||||||
|
_token_ = self._peek('FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
|
||||||
|
if _token_ not in ['REVERSE_AXIS_NAME', "r'\\.\\.'"]:
|
||||||
|
ForwardStep = self.ForwardStep()
|
||||||
|
step = ForwardStep
|
||||||
|
else:# in ['REVERSE_AXIS_NAME', "r'\\.\\.'"]
|
||||||
|
ReverseStep = self.ReverseStep()
|
||||||
|
step = ReverseStep
|
||||||
|
expr = X.AxisStep(*step)
|
||||||
|
if self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['":
|
||||||
|
PredicateList = self.PredicateList()
|
||||||
|
expr = X.PredicateList(expr, PredicateList, step[0])
|
||||||
|
return expr
|
||||||
|
|
||||||
|
def ForwardStep(self):
|
||||||
|
_token_ = self._peek('FORWARD_AXIS_NAME', "r'@'", "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
|
||||||
|
if _token_ == 'FORWARD_AXIS_NAME':
|
||||||
|
ForwardAxis = self.ForwardAxis()
|
||||||
|
NodeTest = self.NodeTest()
|
||||||
|
return [ForwardAxis, NodeTest]
|
||||||
|
else:
|
||||||
|
AbbrevForwardStep = self.AbbrevForwardStep()
|
||||||
|
return AbbrevForwardStep
|
||||||
|
|
||||||
|
def ForwardAxis(self):
|
||||||
|
FORWARD_AXIS_NAME = self._scan('FORWARD_AXIS_NAME')
|
||||||
|
self._scan("r'::'")
|
||||||
|
return FORWARD_AXIS_NAME
|
||||||
|
|
||||||
|
def AbbrevForwardStep(self):
|
||||||
|
axis = 'child'
|
||||||
|
if self._peek("r'@'", "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME') == "r'@'":
|
||||||
|
self._scan("r'@'")
|
||||||
|
axis = 'attribute'
|
||||||
|
NodeTest = self.NodeTest()
|
||||||
|
return [axis, NodeTest]
|
||||||
|
|
||||||
|
def ReverseStep(self):
|
||||||
|
_token_ = self._peek('REVERSE_AXIS_NAME', "r'\\.\\.'")
|
||||||
|
if _token_ == 'REVERSE_AXIS_NAME':
|
||||||
|
ReverseAxis = self.ReverseAxis()
|
||||||
|
NodeTest = self.NodeTest()
|
||||||
|
return [ReverseAxis, NodeTest]
|
||||||
|
else:# == "r'\\.\\.'"
|
||||||
|
AbbrevReverseStep = self.AbbrevReverseStep()
|
||||||
|
return AbbrevReverseStep
|
||||||
|
|
||||||
|
def ReverseAxis(self):
|
||||||
|
REVERSE_AXIS_NAME = self._scan('REVERSE_AXIS_NAME')
|
||||||
|
self._scan("r'::'")
|
||||||
|
return REVERSE_AXIS_NAME
|
||||||
|
|
||||||
|
def AbbrevReverseStep(self):
|
||||||
|
self._scan("r'\\.\\.'")
|
||||||
|
return ['parent', None]
|
||||||
|
|
||||||
|
def NodeTest(self):
|
||||||
|
_token_ = self._peek("r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
|
||||||
|
if _token_ not in ["r'\\*'", 'NCNAME']:
|
||||||
|
KindTest = self.KindTest()
|
||||||
|
return KindTest
|
||||||
|
else:# in ["r'\\*'", 'NCNAME']
|
||||||
|
NameTest = self.NameTest()
|
||||||
|
return NameTest
|
||||||
|
|
||||||
|
def NameTest(self):
|
||||||
|
prefix = None
|
||||||
|
WildcardOrNCName = self.WildcardOrNCName()
|
||||||
|
localpart = WildcardOrNCName
|
||||||
|
if self._peek("r':'", "r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r':'":
|
||||||
|
self._scan("r':'")
|
||||||
|
WildcardOrNCName = self.WildcardOrNCName()
|
||||||
|
prefix = localpart
|
||||||
|
localpart = WildcardOrNCName
|
||||||
|
return X.NameTest(prefix, localpart)
|
||||||
|
|
||||||
|
def WildcardOrNCName(self):
|
||||||
|
_token_ = self._peek("r'\\*'", 'NCNAME')
|
||||||
|
if _token_ == "r'\\*'":
|
||||||
|
self._scan("r'\\*'")
|
||||||
|
return '*'
|
||||||
|
else:# == 'NCNAME'
|
||||||
|
NCNAME = self._scan('NCNAME')
|
||||||
|
return NCNAME
|
||||||
|
|
||||||
|
def FilterExpr(self):
|
||||||
|
PrimaryExpr = self.PrimaryExpr()
|
||||||
|
if self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['":
|
||||||
|
PredicateList = self.PredicateList()
|
||||||
|
PrimaryExpr = X.PredicateList(PrimaryExpr,PredicateList)
|
||||||
|
return PrimaryExpr
|
||||||
|
|
||||||
|
def PredicateList(self):
|
||||||
|
Predicate = self.Predicate()
|
||||||
|
predicates = [Predicate]
|
||||||
|
while self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['":
|
||||||
|
Predicate = self.Predicate()
|
||||||
|
predicates.append(Predicate)
|
||||||
|
return predicates
|
||||||
|
|
||||||
|
def Predicate(self):
|
||||||
|
self._scan("r'\\['")
|
||||||
|
Expr = self.Expr()
|
||||||
|
self._scan("r'\\]'")
|
||||||
|
return Expr
|
||||||
|
|
||||||
|
def PrimaryExpr(self):
|
||||||
|
_token_ = self._peek("r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE')
|
||||||
|
if _token_ not in ["r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME']:
|
||||||
|
Literal = self.Literal()
|
||||||
|
return X.LiteralExpr(Literal)
|
||||||
|
elif _token_ == "r'\\$'":
|
||||||
|
VariableReference = self.VariableReference()
|
||||||
|
return VariableReference
|
||||||
|
elif _token_ == "r'\\('":
|
||||||
|
self._scan("r'\\('")
|
||||||
|
Expr = self.Expr()
|
||||||
|
self._scan("r'\\)'")
|
||||||
|
return Expr
|
||||||
|
elif _token_ == "r'\\.'":
|
||||||
|
ContextItemExpr = self.ContextItemExpr()
|
||||||
|
return ContextItemExpr
|
||||||
|
else:# == 'FUNCNAME'
|
||||||
|
FunctionCall = self.FunctionCall()
|
||||||
|
return FunctionCall
|
||||||
|
|
||||||
|
def VariableReference(self):
|
||||||
|
self._scan("r'\\$'")
|
||||||
|
QName = self.QName()
|
||||||
|
return X.VariableReference(*QName)
|
||||||
|
|
||||||
|
def ContextItemExpr(self):
|
||||||
|
self._scan("r'\\.'")
|
||||||
|
return X.AxisStep('self')
|
||||||
|
|
||||||
|
def FunctionCall(self):
|
||||||
|
FUNCNAME = self._scan('FUNCNAME')
|
||||||
|
self._scan("r'\\('")
|
||||||
|
args = []
|
||||||
|
if self._peek("r'\\,'", "r'\\)'", "r'\\-'", "r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME') not in ["r'\\,'", "r'\\)'"]:
|
||||||
|
Expr = self.Expr()
|
||||||
|
args.append(Expr)
|
||||||
|
while self._peek("r'\\,'", "r'\\)'") == "r'\\,'":
|
||||||
|
self._scan("r'\\,'")
|
||||||
|
Expr = self.Expr()
|
||||||
|
args.append(Expr)
|
||||||
|
self._scan("r'\\)'")
|
||||||
|
return X.Function(FUNCNAME, args)
|
||||||
|
|
||||||
|
def KindTest(self):
|
||||||
|
_token_ = self._peek("r'processing-instruction'", "r'comment'", "r'text'", "r'node'")
|
||||||
|
if _token_ == "r'processing-instruction'":
|
||||||
|
PITest = self.PITest()
|
||||||
|
return PITest
|
||||||
|
elif _token_ == "r'comment'":
|
||||||
|
CommentTest = self.CommentTest()
|
||||||
|
return CommentTest
|
||||||
|
elif _token_ == "r'text'":
|
||||||
|
TextTest = self.TextTest()
|
||||||
|
return TextTest
|
||||||
|
else:# == "r'node'"
|
||||||
|
AnyKindTest = self.AnyKindTest()
|
||||||
|
return AnyKindTest
|
||||||
|
|
||||||
|
def PITest(self):
|
||||||
|
self._scan("r'processing-instruction'")
|
||||||
|
name = None
|
||||||
|
self._scan("r'\\('")
|
||||||
|
if self._peek('NCNAME', "r'\\)'", 'DQUOTE', 'SQUOTE') != "r'\\)'":
|
||||||
|
_token_ = self._peek('NCNAME', 'DQUOTE', 'SQUOTE')
|
||||||
|
if _token_ == 'NCNAME':
|
||||||
|
NCNAME = self._scan('NCNAME')
|
||||||
|
name = NCNAME
|
||||||
|
else:# in ['DQUOTE', 'SQUOTE']
|
||||||
|
StringLiteral = self.StringLiteral()
|
||||||
|
name = StringLiteral
|
||||||
|
self._scan("r'\\)'")
|
||||||
|
return X.PITest(name)
|
||||||
|
|
||||||
|
def CommentTest(self):
|
||||||
|
self._scan("r'comment'")
|
||||||
|
self._scan("r'\\('")
|
||||||
|
self._scan("r'\\)'")
|
||||||
|
return X.CommentTest()
|
||||||
|
|
||||||
|
def TextTest(self):
|
||||||
|
self._scan("r'text'")
|
||||||
|
self._scan("r'\\('")
|
||||||
|
self._scan("r'\\)'")
|
||||||
|
return X.TextTest()
|
||||||
|
|
||||||
|
def AnyKindTest(self):
|
||||||
|
self._scan("r'node'")
|
||||||
|
self._scan("r'\\('")
|
||||||
|
self._scan("r'\\)'")
|
||||||
|
return X.AnyKindTest()
|
||||||
|
|
||||||
|
def Literal(self):
|
||||||
|
_token_ = self._peek('NUMBER', 'DQUOTE', 'SQUOTE')
|
||||||
|
if _token_ == 'NUMBER':
|
||||||
|
NumericLiteral = self.NumericLiteral()
|
||||||
|
return NumericLiteral
|
||||||
|
else:# in ['DQUOTE', 'SQUOTE']
|
||||||
|
StringLiteral = self.StringLiteral()
|
||||||
|
return StringLiteral
|
||||||
|
|
||||||
|
def NumericLiteral(self):
|
||||||
|
NUMBER = self._scan('NUMBER')
|
||||||
|
return float(NUMBER)
|
||||||
|
|
||||||
|
def StringLiteral(self):
|
||||||
|
_token_ = self._peek('DQUOTE', 'SQUOTE')
|
||||||
|
if _token_ == 'DQUOTE':
|
||||||
|
DQUOTE = self._scan('DQUOTE')
|
||||||
|
return DQUOTE[1:-1]
|
||||||
|
else:# == 'SQUOTE'
|
||||||
|
SQUOTE = self._scan('SQUOTE')
|
||||||
|
return SQUOTE[1:-1]
|
||||||
|
|
||||||
|
def QName(self):
|
||||||
|
NCNAME = self._scan('NCNAME')
|
||||||
|
name = NCNAME
|
||||||
|
if self._peek("r'\\:'", "r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\:'":
|
||||||
|
self._scan("r'\\:'")
|
||||||
|
NCNAME = self._scan('NCNAME')
|
||||||
|
return (name, NCNAME)
|
||||||
|
return (None, name)
|
||||||
|
|
||||||
|
|
||||||
|
def parse(rule, text):
|
||||||
|
P = XPath(XPathScanner(text))
|
||||||
|
return wrap_error_reporter(P, rule)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from sys import argv, stdin
|
||||||
|
if len(argv) >= 2:
|
||||||
|
if len(argv) >= 3:
|
||||||
|
f = open(argv[2],'r')
|
||||||
|
else:
|
||||||
|
f = stdin
|
||||||
|
print parse(argv[1], f.read())
|
||||||
|
else: print 'Args: <rule> [<filename>]'
|
|
@ -0,0 +1,174 @@
|
||||||
|
# Yapps 2.0 Runtime
|
||||||
|
#
|
||||||
|
# This module is needed to run generated parsers.
|
||||||
|
|
||||||
|
from string import join, count, find, rfind
|
||||||
|
import re
|
||||||
|
|
||||||
|
class SyntaxError(Exception):
|
||||||
|
"""When we run into an unexpected token, this is the exception to use"""
|
||||||
|
def __init__(self, pos=-1, msg="Bad Token"):
|
||||||
|
Exception.__init__(self)
|
||||||
|
self.pos = pos
|
||||||
|
self.msg = msg
|
||||||
|
def __repr__(self):
|
||||||
|
if self.pos < 0: return "#<syntax-error>"
|
||||||
|
else: return "SyntaxError[@ char %s: %s]" % (repr(self.pos), self.msg)
|
||||||
|
|
||||||
|
class NoMoreTokens(Exception):
|
||||||
|
"""Another exception object, for when we run out of tokens"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
class Scanner:
|
||||||
|
def __init__(self, patterns, ignore, input):
|
||||||
|
"""Patterns is [(terminal,regex)...]
|
||||||
|
Ignore is [terminal,...];
|
||||||
|
Input is a string"""
|
||||||
|
self.tokens = []
|
||||||
|
self.restrictions = []
|
||||||
|
self.input = input
|
||||||
|
self.pos = 0
|
||||||
|
self.ignore = ignore
|
||||||
|
# The stored patterns are a pair (compiled regex,source
|
||||||
|
# regex). If the patterns variable passed in to the
|
||||||
|
# constructor is None, we assume that the class already has a
|
||||||
|
# proper .patterns list constructed
|
||||||
|
if patterns is not None:
|
||||||
|
self.patterns = []
|
||||||
|
for k, r in patterns:
|
||||||
|
self.patterns.append( (k, re.compile(r)) )
|
||||||
|
|
||||||
|
def token(self, i, restrict=0):
|
||||||
|
"""Get the i'th token, and if i is one past the end, then scan
|
||||||
|
for another token; restrict is a list of tokens that
|
||||||
|
are allowed, or 0 for any token."""
|
||||||
|
if i == len(self.tokens): self.scan(restrict)
|
||||||
|
if i < len(self.tokens):
|
||||||
|
# Make sure the restriction is more restricted
|
||||||
|
if restrict and self.restrictions[i]:
|
||||||
|
for r in restrict:
|
||||||
|
if r not in self.restrictions[i]:
|
||||||
|
raise NotImplementedError("Unimplemented: restriction set changed")
|
||||||
|
return self.tokens[i]
|
||||||
|
raise NoMoreTokens()
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
"""Print the last 10 tokens that have been scanned in"""
|
||||||
|
output = ''
|
||||||
|
for t in self.tokens[-10:]:
|
||||||
|
output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],repr(t[3]))
|
||||||
|
return output
|
||||||
|
|
||||||
|
def scan(self, restrict):
|
||||||
|
"""Should scan another token and add it to the list, self.tokens,
|
||||||
|
and add the restriction to self.restrictions"""
|
||||||
|
# Keep looking for a token, ignoring any in self.ignore
|
||||||
|
while 1:
|
||||||
|
# Search the patterns for the longest match, with earlier
|
||||||
|
# tokens in the list having preference
|
||||||
|
best_match = -1
|
||||||
|
best_pat = '(error)'
|
||||||
|
for p, regexp in self.patterns:
|
||||||
|
# First check to see if we're ignoring this token
|
||||||
|
if restrict and p not in restrict and p not in self.ignore:
|
||||||
|
continue
|
||||||
|
m = regexp.match(self.input, self.pos)
|
||||||
|
if m and len(m.group(0)) > best_match:
|
||||||
|
# We got a match that's better than the previous one
|
||||||
|
best_pat = p
|
||||||
|
best_match = len(m.group(0))
|
||||||
|
|
||||||
|
# If we didn't find anything, raise an error
|
||||||
|
if best_pat == '(error)' and best_match < 0:
|
||||||
|
msg = "Bad Token"
|
||||||
|
if restrict:
|
||||||
|
msg = "Trying to find one of "+join(restrict,", ")
|
||||||
|
raise SyntaxError(self.pos, msg)
|
||||||
|
|
||||||
|
# If we found something that isn't to be ignored, return it
|
||||||
|
if best_pat not in self.ignore:
|
||||||
|
# Create a token with this data
|
||||||
|
token = (self.pos, self.pos+best_match, best_pat,
|
||||||
|
self.input[self.pos:self.pos+best_match])
|
||||||
|
self.pos = self.pos + best_match
|
||||||
|
# Only add this token if it's not in the list
|
||||||
|
# (to prevent looping)
|
||||||
|
if not self.tokens or token != self.tokens[-1]:
|
||||||
|
self.tokens.append(token)
|
||||||
|
self.restrictions.append(restrict)
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
# This token should be ignored ..
|
||||||
|
self.pos = self.pos + best_match
|
||||||
|
|
||||||
|
class Parser:
|
||||||
|
def __init__(self, scanner):
|
||||||
|
self._scanner = scanner
|
||||||
|
self._pos = 0
|
||||||
|
|
||||||
|
def _peek(self, *types):
|
||||||
|
"""Returns the token type for lookahead; if there are any args
|
||||||
|
then the list of args is the set of token types to allow"""
|
||||||
|
tok = self._scanner.token(self._pos, types)
|
||||||
|
return tok[2]
|
||||||
|
|
||||||
|
def _scan(self, type):
|
||||||
|
"""Returns the matched text, and moves to the next token"""
|
||||||
|
tok = self._scanner.token(self._pos, [type])
|
||||||
|
if tok[2] != type:
|
||||||
|
raise SyntaxError(tok[0], 'Trying to find '+type)
|
||||||
|
self._pos = 1+self._pos
|
||||||
|
return tok[3]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def print_error(input, err, scanner):
|
||||||
|
"""This is a really dumb long function to print error messages nicely."""
|
||||||
|
p = err.pos
|
||||||
|
# Figure out the line number
|
||||||
|
line = count(input[:p], '\n')
|
||||||
|
print err.msg+" on line "+repr(line+1)+":"
|
||||||
|
# Now try printing part of the line
|
||||||
|
text = input[max(p-80, 0):p+80]
|
||||||
|
p = p - max(p-80, 0)
|
||||||
|
|
||||||
|
# Strip to the left
|
||||||
|
i = rfind(text[:p], '\n')
|
||||||
|
j = rfind(text[:p], '\r')
|
||||||
|
if i < 0 or (0 <= j < i): i = j
|
||||||
|
if 0 <= i < p:
|
||||||
|
p = p - i - 1
|
||||||
|
text = text[i+1:]
|
||||||
|
|
||||||
|
# Strip to the right
|
||||||
|
i = find(text,'\n', p)
|
||||||
|
j = find(text,'\r', p)
|
||||||
|
if i < 0 or (0 <= j < i): i = j
|
||||||
|
if i >= 0:
|
||||||
|
text = text[:i]
|
||||||
|
|
||||||
|
# Now shorten the text
|
||||||
|
while len(text) > 70 and p > 60:
|
||||||
|
# Cut off 10 chars
|
||||||
|
text = "..." + text[10:]
|
||||||
|
p = p - 7
|
||||||
|
|
||||||
|
# Now print the string, along with an indicator
|
||||||
|
print '> ',text
|
||||||
|
print '> ',' '*p + '^'
|
||||||
|
print 'List of nearby tokens:', scanner
|
||||||
|
|
||||||
|
def wrap_error_reporter(parser, rule):
|
||||||
|
return_value = None
|
||||||
|
try:
|
||||||
|
return_value = getattr(parser, rule)()
|
||||||
|
except SyntaxError, s:
|
||||||
|
input = parser._scanner.input
|
||||||
|
try:
|
||||||
|
print_error(input, s, parser._scanner)
|
||||||
|
except ImportError:
|
||||||
|
print 'Syntax Error',s.msg,'on line',1+count(input[:s.pos], '\n')
|
||||||
|
except NoMoreTokens:
|
||||||
|
print 'Could not complete parsing; stopped around here:'
|
||||||
|
print parser._scanner
|
||||||
|
return return_value
|
Loading…
Reference in New Issue