importation modification initiale de py-dom-xpath
This commit is contained in:
parent
e9b455de2a
commit
10dc28f319
|
@ -88,6 +88,7 @@ addp('ulib.ext.tarfile', ['README.txt'])
|
|||
addp('ulib.ext.web')
|
||||
addp('ulib.ext.web.wsgiserver', ['LICENSE.txt'])
|
||||
addp('ulib.ext.web.contrib')
|
||||
addp('ulib.ext.xpath')
|
||||
addp('ulib.formats')
|
||||
addp('ulib.gae')
|
||||
addp('ulib.json')
|
||||
|
|
|
@ -0,0 +1,297 @@
|
|||
:mod:`xpath` --- XPath Queries For DOM Trees
|
||||
============================================
|
||||
The :mod:`xpath` module is a pure Python implementation of the XPath query
|
||||
language, operating on DOM documents. It supports most of XPath 1.0, with
|
||||
the following exceptions:
|
||||
|
||||
* The namespace axis is not supported.
|
||||
* The ``round()`` function rounds toward 0, not towards positive infinity.
|
||||
|
||||
The following XPath 2.0 features are supported:
|
||||
|
||||
* A default namespace may be supplied in the expression context.
|
||||
* Node tests may have a wildcard namespace. (e.g., ``*:name``.)
|
||||
|
||||
This module provides the following functions for evaluating XPath expressions:
|
||||
|
||||
.. function:: find(expr, node, [\**kwargs])
|
||||
|
||||
Evaluate the XPath expression *expr* with *node* as the context node,
|
||||
and return:
|
||||
|
||||
* ``True`` or ``False``, when the expression has a boolean result.
|
||||
* A :class:`float`, when the expression has an numeric result.
|
||||
* A :class:`unicode`, when the expression has a string result.
|
||||
* A list of :class:`xml.dom.Node`, when the expression has a
|
||||
node-set result.
|
||||
|
||||
.. function:: findnode(expr, node, [\**kwargs])
|
||||
|
||||
Evaluate the XPath expression *expr* with *node* as the context node,
|
||||
and return a single node. If the result of the expression is a non-empty
|
||||
node-set, return the first node in the set. If the result is an empty
|
||||
node-set, return ``None``. If the result is not a node-set, raise
|
||||
:exc:`XPathTypeError`.
|
||||
|
||||
.. function:: findvalue(expr, node, [\**kwargs])
|
||||
|
||||
Evaluate the XPath expression *expr* with *node* as the context node,
|
||||
and return the string-value of the result. If the result is an empty
|
||||
node-set, return ``None`` instead.
|
||||
|
||||
.. function:: findvalues(expr, node, [\**kwargs])
|
||||
|
||||
Evaluate the XPath expression *expr* with *node* as the context node,
|
||||
and return a list of the string-values of the resulting node-set. If
|
||||
the result is not a node-set, raise :exc:`XPathTypeError`.
|
||||
|
||||
The above functions take take the following optional keyword arguments
|
||||
defining the evaluation context:
|
||||
|
||||
*context*
|
||||
A :class:`XPathContext` object containing the evaluation context. It
|
||||
is legal to supply both a context object and additional arguments
|
||||
extending its contents.
|
||||
|
||||
*default_namespace*
|
||||
The default namespace URI, which will be used for any unqualified name
|
||||
in the XPath expression.
|
||||
|
||||
*namespaces*
|
||||
A mapping of prefixes to namespace URIs.
|
||||
|
||||
*variables*
|
||||
A mapping of variable names to values. To map a variable in a specific
|
||||
namespace, use a two element tuple of the (namespace URI, name) as the key.
|
||||
|
||||
Additional keyword arguments will be used as variable bindings.
|
||||
|
||||
Basic Queries
|
||||
-------------
|
||||
The examples in this section use this XML document: ::
|
||||
|
||||
<doc>
|
||||
<item name="python" />
|
||||
<item name="parrot" />
|
||||
</doc>
|
||||
|
||||
Select the ``item`` element in a document: ::
|
||||
|
||||
>>> xpath.find('//item', doc)
|
||||
[<DOM Element: item at 0x474468>, <DOM Element: item at 0x27d7d8>]
|
||||
|
||||
Select the ``name`` attribute of the first item element (note that this returns
|
||||
a list of Attr nodes): ::
|
||||
|
||||
>>> xpath.find('//item[1]/@name', doc)
|
||||
[<xml.dom.minidom.Attr instance at 0x474300>]
|
||||
|
||||
Select the string-value of the ``name`` attribute of the last item element: ::
|
||||
|
||||
>>> xpath.findvalue('//item[last()]/@name', doc)
|
||||
u'parrot'
|
||||
|
||||
Select the first item element with a ``name`` attribute that starts with "p": ::
|
||||
|
||||
>>> xpath.findnode('//item[starts-with(@name,"p")]', doc)
|
||||
<DOM Element: item at 0x474468>
|
||||
|
||||
Namespaces
|
||||
----------
|
||||
The examples in this section use this XML document: ::
|
||||
|
||||
<doc xmlns="http://flying.example.org/"
|
||||
xmlns:circus="http://circus.example.org/">
|
||||
<item>python</item>
|
||||
<circus:item>parrot</circus:item>
|
||||
</doc>
|
||||
|
||||
The *namespaces* argument to the evaluation functions provides a dictionary
|
||||
of prefixes to namespace URIs. Prefixed QNames in expressions will be
|
||||
expanded according to this mapping.
|
||||
|
||||
To select the string-values of the ``item`` elements in the
|
||||
"\http://circus.example.org/" namespace: ::
|
||||
|
||||
>>> xpath.findvalues('//prefix:item', doc,
|
||||
... namespaces={'prefix':'http://circus.example.org/'})
|
||||
[u'parrot']
|
||||
|
||||
The *default_namespace* argument provides a namespace URI that will be
|
||||
used for any unprefixed QName appearing in a position where an element
|
||||
name is expected. (Default namespaces are a feature of XPath 2.0.)
|
||||
|
||||
To select the string-values of the ``item`` elements in the
|
||||
"\http://flying.example.org/" namespace: ::
|
||||
|
||||
>>> xpath.findvalues('//item', doc,
|
||||
... default_namespace='http://flying.example.org/')
|
||||
[u'python']
|
||||
|
||||
When a *default_namespaces* argument is not provided, the default namespace
|
||||
is that of the document element. When a *namespaces* argument is not
|
||||
provided, the prefix declarations consist of all prefixes defined on the
|
||||
document element.
|
||||
|
||||
To select the string values of all the ``item`` elements: ::
|
||||
|
||||
>>> xpath.findvalues('//item | //circus:item', doc)
|
||||
[u'python', u'parrot']
|
||||
|
||||
The :mod:`xpath` module supports wildcard matches against both the prefix
|
||||
and local name. (XPath 1.0 only support wildcard matches against the local
|
||||
name; XPath 2.0 adds support for wildcard matches against the prefix.)
|
||||
|
||||
To select all children of the document element, regardless of namespace: ::
|
||||
|
||||
>>> xpath.find('/*:*/*:*', doc)
|
||||
[<DOM Element: item at 0x474d00>, <DOM Element: circus:item at 0x4743a0>]
|
||||
|
||||
Variables
|
||||
---------
|
||||
The examples in this section use this XML document: ::
|
||||
|
||||
<doc>
|
||||
<item id="1">python</item>
|
||||
<item id="2">parrot</item>
|
||||
</doc>
|
||||
|
||||
XPath variables may be passed to the evaluation functions as keyword
|
||||
arguments: ::
|
||||
|
||||
>>> xpath.findvalue('//item[@id = $id]', doc, id=2)
|
||||
u'parrot'
|
||||
|
||||
It is also possible to pass a dictionary of variables to an evaluation
|
||||
function with the *variables* keyword argument: ::
|
||||
|
||||
>>> xpath.findvalue('//item[@id = $id]', doc, variables={'id':1})
|
||||
u'python'
|
||||
|
||||
To define a variable within a specific namespace, use a tuple of
|
||||
``(namespace-URI, local-name)`` as the key in the variable dictionary: ::
|
||||
|
||||
>>> variables = { ('http://python.example.org/', 'id') : 1 }
|
||||
>>> namespaces = { 'python' : 'http://python.example.org/' }
|
||||
>>> xpath.findvalue('//item[@id = $python:id]', doc,
|
||||
... variables=variables, namespaces=namespaces)
|
||||
u'python'
|
||||
|
||||
Compiled Expression Objects
|
||||
---------------------------
|
||||
.. class:: XPath(expr)
|
||||
|
||||
An expression object which contains a compiled form of the XPath
|
||||
expression *expr*.
|
||||
|
||||
Under most circumstances, it is not necessary to directly use this class,
|
||||
since the :func:`find` et al. functions cache compiled expressions.
|
||||
|
||||
.. method:: find(node, [\**kwargs])
|
||||
findnode(node, [\**kwargs])
|
||||
findvalue(node, [\**kwargs])
|
||||
findvalues(node, [\**kwargs])
|
||||
|
||||
These methods are identical to the functions of the same name.
|
||||
|
||||
Create and use a compiled expression: ::
|
||||
|
||||
>>> expr = xpath.XPath('//text()')
|
||||
>>> print expr
|
||||
/descendant-or-self::node()/child::text()
|
||||
>>> expr.find()
|
||||
[<DOM Text node "Monty">]
|
||||
|
||||
Expression Context Objects
|
||||
--------------------------
|
||||
.. class:: XPathContext([document,] [\**kwargs])
|
||||
|
||||
The static context of an XPath expression. Context objects may be
|
||||
created with the same keyword arguments accepted by the expression
|
||||
evaluation functions.
|
||||
|
||||
The *document* argument may contain a DOM node. If provided, the
|
||||
default namespace and namespace declarations will be initialized from
|
||||
the document element of this node.
|
||||
|
||||
The context contains the following attributes and methods:
|
||||
|
||||
.. attribute:: default_namespace
|
||||
|
||||
The default namespace URI.
|
||||
|
||||
.. attribute:: namespaces
|
||||
|
||||
The mapping of prefixes to namespace URIs.
|
||||
|
||||
.. attribute:: variables
|
||||
|
||||
The mapping of variables to values. The keys of this map may
|
||||
be either strings for variables with no namespace, or
|
||||
(namespaceURI, name) tuples for variables contained in a
|
||||
namespace.
|
||||
|
||||
.. method:: find(expr, node, [\**kwargs])
|
||||
findnode(expr, node, [\**kwargs])
|
||||
findvalue(expr, node, [\**kwargs])
|
||||
findvalues(expr, node, [\**kwargs])
|
||||
|
||||
Evaluate *expr* in the context with *node* as the context node.
|
||||
*expr* may be either a string or a :class:`XPath` object.
|
||||
|
||||
Create and use an evaluation context: ::
|
||||
|
||||
>>> context = xpath.XPathContext()
|
||||
>>> context.namespaces['py'] = 'http://python.example.org/'
|
||||
>>> context.variables['min'] = 4
|
||||
>>> context.findvalues('//item[@id>=$min and @id<=$max]', doc, max=6)
|
||||
[u'4', u'5', u'6']
|
||||
|
||||
Exceptions
|
||||
----------
|
||||
This module defines the following exceptions:
|
||||
|
||||
.. exception:: XPathError
|
||||
|
||||
Base exception class used for all XPath exceptions.
|
||||
|
||||
.. exception:: XPathNotImplementedError
|
||||
|
||||
Raised when an XPath expression contains a feature of XPath which
|
||||
has not been implemented.
|
||||
|
||||
.. exception:: XPathParseError
|
||||
|
||||
Raised when an XPath expression could not be parsed.
|
||||
|
||||
.. exception:: XPathTypeError
|
||||
|
||||
Raised when an XPath expression is found to contain a type error.
|
||||
For example, the expression "string()/node()" contains a type error
|
||||
because the "string()" function does not return a node-set.
|
||||
|
||||
.. exception:: XPathUnknownFunctionError
|
||||
|
||||
Raised when an XPath expression contains a function that has no
|
||||
binding in the expression context.
|
||||
|
||||
.. exception:: XPathUnknownPrefixError
|
||||
|
||||
Raised when an XPath expression contains a QName with a namespace
|
||||
prefix that has no corresponding namespace declaration in the expression
|
||||
context.
|
||||
|
||||
.. exception:: XPathUnknownVariableError
|
||||
|
||||
Raised when an XPath expression contains a variable that has no
|
||||
binding in the expression context.
|
||||
|
||||
References
|
||||
----------
|
||||
.. seealso::
|
||||
|
||||
`XML Path Language (XPath) Version 1.0 <http://www.w3.org/TR/xpath>`_
|
||||
The W3C recommendation upon which this module is based.
|
||||
|
||||
`XML Path Language (XPath) 2.0 <http://www.w3.org/TR/xpath20/>`_
|
||||
Second version of XPath, mostly unsupported by this module.
|
|
@ -0,0 +1,23 @@
|
|||
import exceptions
|
||||
|
||||
from _xpath import api, XPathContext, XPath
|
||||
from exceptions import *
|
||||
|
||||
__all__ = ['find', 'findnode', 'findvalue', 'findvalues', 'XPathContext', 'XPath']
|
||||
__all__.extend((x for x in dir(exceptions) if not x.startswith('_')))
|
||||
|
||||
@api
|
||||
def find(expr, node, **kwargs):
|
||||
return XPath.get(expr).find(node, **kwargs)
|
||||
|
||||
@api
|
||||
def findnode(expr, node, **kwargs):
|
||||
return XPath.get(expr).findnode(node, **kwargs)
|
||||
|
||||
@api
|
||||
def findvalue(expr, node, **kwargs):
|
||||
return XPath.get(expr).findvalue(node, **kwargs)
|
||||
|
||||
@api
|
||||
def findvalues(expr, node, **kwargs):
|
||||
return XPath.get(expr).findvalues(node, **kwargs)
|
|
@ -0,0 +1,143 @@
|
|||
import expr as E
|
||||
import parser as P
|
||||
import yappsrt as Y
|
||||
|
||||
from exceptions import *
|
||||
|
||||
def api(f):
|
||||
"""Decorator for functions and methods that are part of the external
|
||||
module API and that can throw XPathError exceptions.
|
||||
|
||||
The call stack for these exceptions can be very large, and not very
|
||||
interesting to the user. This decorator rethrows XPathErrors to
|
||||
trim the stack.
|
||||
|
||||
"""
|
||||
def api_function(*args, **kwargs):
|
||||
try:
|
||||
return f(*args, **kwargs)
|
||||
except XPathError, e:
|
||||
raise e
|
||||
api_function.__name__ = f.__name__
|
||||
api_function.__doc__ = f.__doc__
|
||||
return api_function
|
||||
|
||||
class XPathContext(object):
|
||||
def __init__(self, document=None, **kwargs):
|
||||
self.default_namespace = None
|
||||
self.namespaces = {}
|
||||
self.variables = {}
|
||||
|
||||
if document is not None:
|
||||
if document.nodeType != document.DOCUMENT_NODE:
|
||||
document = document.ownerDocument
|
||||
if document.documentElement is not None:
|
||||
attrs = document.documentElement.attributes
|
||||
for attr in (attrs.item(i) for i in xrange(attrs.length)):
|
||||
if attr.name == 'xmlns':
|
||||
self.default_namespace = attr.value
|
||||
elif attr.name.startswith('xmlns:'):
|
||||
self.namespaces[attr.name[6:]] = attr.value
|
||||
|
||||
self.update(**kwargs)
|
||||
|
||||
def clone(self):
|
||||
dup = XPathContext()
|
||||
dup.default_namespace = self.default_namespace
|
||||
dup.namespaces.update(self.namespaces)
|
||||
dup.variables.update(self.variables)
|
||||
return dup
|
||||
|
||||
def update(self, default_namespace=None, namespaces=None,
|
||||
variables=None, **kwargs):
|
||||
if default_namespace is not None:
|
||||
self.default_namespace = default_namespace
|
||||
if namespaces is not None:
|
||||
self.namespaces = namespaces
|
||||
if variables is not None:
|
||||
self.variables = variables
|
||||
self.variables.update(kwargs)
|
||||
|
||||
@api
|
||||
def find(self, expr, node, **kwargs):
|
||||
return XPath.get(expr).find(node, context=self, **kwargs)
|
||||
|
||||
@api
|
||||
def findnode(self, expr, node, **kwargs):
|
||||
return XPath.get(expr).findnode(node, context=self, **kwargs)
|
||||
|
||||
@api
|
||||
def findvalue(self, expr, node, **kwargs):
|
||||
return XPath.get(expr).findvalue(node, context=self, **kwargs)
|
||||
|
||||
@api
|
||||
def findvalues(self, expr, node, **kwargs):
|
||||
return XPath.get(expr).findvalues(node, context=self, **kwargs)
|
||||
|
||||
class XPath():
|
||||
_max_cache = 100
|
||||
_cache = {}
|
||||
|
||||
def __init__(self, expr):
|
||||
"""Init docs.
|
||||
"""
|
||||
try:
|
||||
parser = P.XPath(P.XPathScanner(str(expr)))
|
||||
self.expr = parser.XPath()
|
||||
except Y.SyntaxError, e:
|
||||
raise XPathParseError(str(expr), e.pos, e.msg)
|
||||
|
||||
@classmethod
|
||||
def get(cls, s):
|
||||
if isinstance(s, cls):
|
||||
return s
|
||||
try:
|
||||
return cls._cache[s]
|
||||
except KeyError:
|
||||
if len(cls._cache) > cls._max_cache:
|
||||
cls._cache.clear()
|
||||
expr = cls(s)
|
||||
cls._cache[s] = expr
|
||||
return expr
|
||||
|
||||
@api
|
||||
def find(self, node, context=None, **kwargs):
|
||||
if context is None:
|
||||
context = XPathContext(node, **kwargs)
|
||||
elif kwargs:
|
||||
context = context.clone()
|
||||
context.update(**kwargs)
|
||||
return self.expr.evaluate(node, 1, 1, context)
|
||||
|
||||
@api
|
||||
def findnode(self, node, context=None, **kwargs):
|
||||
result = self.find(node, context, **kwargs)
|
||||
if not E.nodesetp(result):
|
||||
raise XPathTypeError("expression is not a node-set")
|
||||
if len(result) == 0:
|
||||
return None
|
||||
return result[0]
|
||||
|
||||
@api
|
||||
def findvalue(self, node, context=None, **kwargs):
|
||||
result = self.find(node, context, **kwargs)
|
||||
if E.nodesetp(result):
|
||||
if len(result) == 0:
|
||||
return None
|
||||
result = E.string(result)
|
||||
return result
|
||||
|
||||
@api
|
||||
def findvalues(self, node, context=None, **kwargs):
|
||||
result = self.find(node, context, **kwargs)
|
||||
if not E.nodesetp(result):
|
||||
raise XPathTypeError("expression is not a node-set")
|
||||
return [E.string_value(x) for x in result]
|
||||
|
||||
def __repr__(self):
|
||||
return '%s.%s(%s)' % (self.__class__.__module__,
|
||||
self.__class__.__name__,
|
||||
repr(str(self.expr)))
|
||||
|
||||
def __str__(self):
|
||||
return str(self.expr)
|
|
@ -0,0 +1,49 @@
|
|||
|
||||
class XPathError(Exception):
|
||||
"""Base exception class used for all XPath exceptions."""
|
||||
|
||||
class XPathNotImplementedError(XPathError):
|
||||
"""Raised when an XPath expression contains a feature of XPath which
|
||||
has not been implemented.
|
||||
|
||||
"""
|
||||
|
||||
class XPathParseError(XPathError):
|
||||
"""Raised when an XPath expression could not be parsed."""
|
||||
|
||||
def __init__(self, expr, pos, message):
|
||||
XPathError.__init__(self)
|
||||
self.expr = expr
|
||||
self.pos = pos
|
||||
self.message = message
|
||||
|
||||
def __str__(self):
|
||||
return ("Syntax error:\n" +
|
||||
self.expr.replace("\n", " ") + "\n" +
|
||||
("-" * self.pos) + "^")
|
||||
|
||||
class XPathTypeError(XPathError):
|
||||
"""Raised when an XPath expression is found to contain a type error.
|
||||
For example, the expression "string()/node()" contains a type error
|
||||
because the "string()" function does not return a node-set.
|
||||
|
||||
"""
|
||||
|
||||
class XPathUnknownFunctionError(XPathError):
|
||||
"""Raised when an XPath expression contains a function that has no
|
||||
binding in the expression context.
|
||||
|
||||
"""
|
||||
|
||||
class XPathUnknownPrefixError(XPathError):
|
||||
"""Raised when an XPath expression contains a QName with a namespace
|
||||
prefix that has no corresponding namespace declaration in the expression
|
||||
context.
|
||||
|
||||
"""
|
||||
|
||||
class XPathUnknownVariableError(XPathError):
|
||||
"""Raised when an XPath expression contains a variable that has no
|
||||
binding in the expression context.
|
||||
|
||||
"""
|
|
@ -0,0 +1,896 @@
|
|||
from __future__ import division
|
||||
from itertools import *
|
||||
import math
|
||||
import operator
|
||||
import re
|
||||
import xml.dom
|
||||
import weakref
|
||||
|
||||
from exceptions import *
|
||||
|
||||
|
||||
#
|
||||
# Data model functions.
|
||||
#
|
||||
|
||||
def string_value(node):
|
||||
"""Compute the string-value of a node."""
|
||||
if (node.nodeType == node.DOCUMENT_NODE or
|
||||
node.nodeType == node.ELEMENT_NODE):
|
||||
s = u''
|
||||
for n in axes['descendant'](node):
|
||||
if n.nodeType == n.TEXT_NODE:
|
||||
s += n.data
|
||||
return s
|
||||
|
||||
elif node.nodeType == node.ATTRIBUTE_NODE:
|
||||
return node.value
|
||||
|
||||
elif (node.nodeType == node.PROCESSING_INSTRUCTION_NODE or
|
||||
node.nodeType == node.COMMENT_NODE or
|
||||
node.nodeType == node.TEXT_NODE):
|
||||
return node.data
|
||||
|
||||
def document_order(node):
|
||||
"""Compute a document order value for the node.
|
||||
|
||||
cmp(document_order(a), document_order(b)) will return -1, 0, or 1 if
|
||||
a is before, identical to, or after b in the document respectively.
|
||||
|
||||
We represent document order as a list of sibling indexes. That is,
|
||||
the third child of the document node has an order of [2]. The first
|
||||
child of that node has an order of [2,0].
|
||||
|
||||
Attributes have a sibling index of -1 (coming before all children of
|
||||
their node) and are further ordered by name--e.g., [2,0,-1,'href'].
|
||||
|
||||
"""
|
||||
|
||||
# Attributes: parent-order + [-1, attribute-name]
|
||||
if node.nodeType == node.ATTRIBUTE_NODE:
|
||||
order = document_order(node.ownerElement)
|
||||
order.extend((-1, node.name))
|
||||
return order
|
||||
|
||||
# The document root (hopefully): []
|
||||
if node.parentNode is None:
|
||||
return []
|
||||
|
||||
# Determine which child this is of its parent.
|
||||
sibpos = 0
|
||||
sib = node
|
||||
while sib.previousSibling is not None:
|
||||
sibpos += 1
|
||||
sib = sib.previousSibling
|
||||
|
||||
# Order: parent-order + [sibling-position]
|
||||
order = document_order(node.parentNode)
|
||||
order.append(sibpos)
|
||||
return order
|
||||
|
||||
#
|
||||
# Type functions, operating on the various XPath types.
|
||||
#
|
||||
# Internally, we use the following representations:
|
||||
# nodeset - list of DOM tree nodes in document order
|
||||
# string - str or unicode
|
||||
# boolean - bool
|
||||
# number - int or float
|
||||
#
|
||||
|
||||
def nodeset(v):
|
||||
"""Convert a value to a nodeset."""
|
||||
if not nodesetp(v):
|
||||
raise XPathTypeError, "value is not a node-set"
|
||||
return v
|
||||
|
||||
def nodesetp(v):
|
||||
"""Return true iff 'v' is a node-set."""
|
||||
if isinstance(v, list):
|
||||
return True
|
||||
|
||||
def string(v):
|
||||
"""Convert a value to a string."""
|
||||
if nodesetp(v):
|
||||
if not v:
|
||||
return u''
|
||||
return string_value(v[0])
|
||||
elif numberp(v):
|
||||
if v == float('inf'):
|
||||
return u'Infinity'
|
||||
elif v == float('-inf'):
|
||||
return u'-Infinity'
|
||||
elif int(v) == v and v <= 0xffffffff:
|
||||
v = int(v)
|
||||
elif str(v) == 'nan':
|
||||
return u'NaN'
|
||||
return unicode(v)
|
||||
elif booleanp(v):
|
||||
return u'true' if v else u'false'
|
||||
return v
|
||||
|
||||
def stringp(v):
|
||||
"""Return true iff 'v' is a string."""
|
||||
return isinstance(v, basestring)
|
||||
|
||||
def boolean(v):
|
||||
"""Convert a value to a boolean."""
|
||||
if nodesetp(v):
|
||||
return len(v) > 0
|
||||
elif numberp(v):
|
||||
if v == 0 or v != v:
|
||||
return False
|
||||
return True
|
||||
elif stringp(v):
|
||||
return v != ''
|
||||
return v
|
||||
|
||||
def booleanp(v):
|
||||
"""Return true iff 'v' is a boolean."""
|
||||
return isinstance(v, bool)
|
||||
|
||||
def number(v):
|
||||
"""Convert a value to a number."""
|
||||
if nodesetp(v):
|
||||
v = string(v)
|
||||
try:
|
||||
return float(v)
|
||||
except ValueError:
|
||||
return float('NaN')
|
||||
|
||||
def numberp(v):
|
||||
"""Return true iff 'v' is a number."""
|
||||
return (not(isinstance(v, bool)) and
|
||||
(isinstance(v, int) or isinstance(v, float)))
|
||||
|
||||
class Expr(object):
|
||||
"""Abstract base class for XPath expressions."""
|
||||
|
||||
def evaluate(self, node, pos, size, context):
|
||||
"""Evaluate the expression.
|
||||
|
||||
The context node, context position, and context size are passed as
|
||||
arguments.
|
||||
|
||||
Returns an XPath value: a nodeset, string, boolean, or number.
|
||||
|
||||
"""
|
||||
|
||||
class BinaryOperatorExpr(Expr):
|
||||
"""Base class for all binary operators."""
|
||||
|
||||
def __init__(self, op, left, right):
|
||||
self.op = op
|
||||
self.left = left
|
||||
self.right = right
|
||||
|
||||
def evaluate(self, node, pos, size, context):
|
||||
# Subclasses either override evaluate() or implement operate().
|
||||
return self.operate(self.left.evaluate(node, pos, size, context),
|
||||
self.right.evaluate(node, pos, size, context))
|
||||
|
||||
def __str__(self):
|
||||
return '(%s %s %s)' % (self.left, self.op, self.right)
|
||||
|
||||
class AndExpr(BinaryOperatorExpr):
|
||||
"""<x> and <y>"""
|
||||
|
||||
def evaluate(self, node, pos, size, context):
|
||||
# Note that XPath boolean operations short-circuit.
|
||||
return (boolean(self.left.evaluate(node, pos, size, context) and
|
||||
boolean(self.right.evaluate(node, pos, size, context))))
|
||||
|
||||
class OrExpr(BinaryOperatorExpr):
|
||||
"""<x> or <y>"""
|
||||
|
||||
def evaluate(self, node, pos, size, context):
|
||||
# Note that XPath boolean operations short-circuit.
|
||||
return (boolean(self.left.evaluate(node, pos, size, context) or
|
||||
boolean(self.right.evaluate(node, pos, size, context))))
|
||||
|
||||
class EqualityExpr(BinaryOperatorExpr):
|
||||
"""<x> = <y>, <x> != <y>, etc."""
|
||||
|
||||
operators = {
|
||||
'=' : operator.eq,
|
||||
'!=' : operator.ne,
|
||||
'<=' : operator.le,
|
||||
'<' : operator.lt,
|
||||
'>=' : operator.ge,
|
||||
'>' : operator.gt,
|
||||
}
|
||||
|
||||
def operate(self, a, b):
|
||||
if nodesetp(a):
|
||||
for node in a:
|
||||
if self.operate(string_value(node), b):
|
||||
return True
|
||||
return False
|
||||
|
||||
if nodesetp(b):
|
||||
for node in b:
|
||||
if self.operate(a, string_value(node)):
|
||||
return True
|
||||
return False
|
||||
|
||||
if self.op in ('=', '!='):
|
||||
if booleanp(a) or booleanp(b):
|
||||
convert = boolean
|
||||
elif numberp(a) or numberp(b):
|
||||
convert = number
|
||||
else:
|
||||
convert = string
|
||||
else:
|
||||
convert = number
|
||||
|
||||
a, b = convert(a), convert(b)
|
||||
return self.operators[self.op](a, b)
|
||||
|
||||
def divop(x, y):
|
||||
try:
|
||||
return x / y
|
||||
except ZeroDivisionError:
|
||||
if x == 0 and y == 0:
|
||||
return float('nan')
|
||||
if x < 0:
|
||||
return float('-inf')
|
||||
return float('inf')
|
||||
|
||||
class ArithmeticalExpr(BinaryOperatorExpr):
|
||||
"""<x> + <y>, <x> - <y>, etc."""
|
||||
|
||||
# Note that we must use math.fmod for the correct modulo semantics.
|
||||
operators = {
|
||||
'+' : operator.add,
|
||||
'-' : operator.sub,
|
||||
'*' : operator.mul,
|
||||
'div' : divop,
|
||||
'mod' : math.fmod
|
||||
}
|
||||
|
||||
def operate(self, a, b):
|
||||
return self.operators[self.op](number(a), number(b))
|
||||
|
||||
class UnionExpr(BinaryOperatorExpr):
|
||||
"""<x> | <y>"""
|
||||
|
||||
def operate(self, a, b):
|
||||
if not nodesetp(a) or not nodesetp(b):
|
||||
raise XPathTypeError("union operand is not a node-set")
|
||||
|
||||
# Need to sort the result to preserve document order.
|
||||
return sorted(set(chain(a, b)), key=document_order)
|
||||
|
||||
class NegationExpr(Expr):
|
||||
"""- <x>"""
|
||||
|
||||
def __init__(self, expr):
|
||||
self.expr = expr
|
||||
|
||||
def evaluate(self, node, pos, size, context):
|
||||
return -number(self.expr.evaluate(node, pos, size, context))
|
||||
|
||||
def __str__(self):
|
||||
return '(-%s)' % self.expr
|
||||
|
||||
class LiteralExpr(Expr):
|
||||
"""Literals--either numbers or strings."""
|
||||
|
||||
def __init__(self, literal):
|
||||
self.literal = literal
|
||||
|
||||
def evaluate(self, node, pos, size, context):
|
||||
return self.literal
|
||||
|
||||
def __str__(self):
|
||||
if stringp(self.literal):
|
||||
if "'" in self.literal:
|
||||
return '"%s"' % self.literal
|
||||
else:
|
||||
return "'%s'" % self.literal
|
||||
return string(self.literal)
|
||||
|
||||
class VariableReference(Expr):
|
||||
"""Variable references."""
|
||||
|
||||
def __init__(self, prefix, name):
|
||||
self.prefix = prefix
|
||||
self.name = name
|
||||
|
||||
def evaluate(self, node, pos, size, context):
|
||||
try:
|
||||
if self.prefix is not None:
|
||||
try:
|
||||
namespaceURI = context.namespaces[self.prefix]
|
||||
except KeyError:
|
||||
raise XPathUnknownPrefixError(self.prefix)
|
||||
return context.variables[(namespaceURI, self.name)]
|
||||
else:
|
||||
return context.variables[self.name]
|
||||
except KeyError:
|
||||
raise XPathUnknownVariableError(str(self))
|
||||
|
||||
def __str__(self):
|
||||
if self.prefix is None:
|
||||
return '$%s' % self.name
|
||||
else:
|
||||
return '$%s:%s' % (self.prefix, self.name)
|
||||
|
||||
class Function(Expr):
|
||||
"""Functions."""
|
||||
|
||||
def __init__(self, name, args):
|
||||
self.name = name
|
||||
self.args = args
|
||||
self.evaluate = getattr(self, 'f_%s' % name.replace('-', '_'), None)
|
||||
if self.evaluate is None:
|
||||
raise XPathUnknownFunctionError, 'unknown function "%s()"' % name
|
||||
|
||||
if len(self.args) < self.evaluate.minargs:
|
||||
raise XPathTypeError, 'too few arguments for "%s()"' % name
|
||||
if (self.evaluate.maxargs is not None and
|
||||
len(self.args) > self.evaluate.maxargs):
|
||||
raise XPathTypeError, 'too many arguments for "%s()"' % name
|
||||
|
||||
#
|
||||
# XPath functions are implemented by methods of the Function class.
|
||||
#
|
||||
# A method implementing an XPath function is decorated with the function
|
||||
# decorator, and receives the evaluated function arguments as positional
|
||||
# parameters.
|
||||
#
|
||||
|
||||
def function(minargs, maxargs, implicit=False, first=False, convert=None):
|
||||
"""Function decorator.
|
||||
|
||||
minargs -- Minimum number of arguments taken by the function.
|
||||
maxargs -- Maximum number of arguments taken by the function.
|
||||
implicit -- True for functions which operate on a nodeset consisting
|
||||
of the current context node when passed no argument.
|
||||
(e.g., string() and number().)
|
||||
convert -- When non-None, a function used to filter function arguments.
|
||||
"""
|
||||
def decorator(f):
|
||||
def new_f(self, node, pos, size, context):
|
||||
if implicit and len(self.args) == 0:
|
||||
args = [[node]]
|
||||
else:
|
||||
args = [x.evaluate(node, pos, size, context)
|
||||
for x in self.args]
|
||||
if first:
|
||||
args[0] = nodeset(args[0])
|
||||
if len(args[0]) > 0:
|
||||
args[0] = args[0][0]
|
||||
else:
|
||||
args[0] = None
|
||||
if convert is not None:
|
||||
args = [convert(x) for x in args]
|
||||
return f(self, node, pos, size, context, *args)
|
||||
|
||||
new_f.minargs = minargs
|
||||
new_f.maxargs = maxargs
|
||||
new_f.__name__ = f.__name__
|
||||
new_f.__doc__ = f.__doc__
|
||||
return new_f
|
||||
return decorator
|
||||
|
||||
# Node Set Functions
|
||||
|
||||
@function(0, 0)
|
||||
def f_last(self, node, pos, size, context):
|
||||
return size
|
||||
|
||||
@function(0, 0)
|
||||
def f_position(self, node, pos, size, context):
|
||||
return pos
|
||||
|
||||
@function(1, 1, convert=nodeset)
|
||||
def f_count(self, node, pos, size, context, nodes):
|
||||
return len(nodes)
|
||||
|
||||
@function(1, 1)
|
||||
def f_id(self, node, pos, size, context, arg):
|
||||
if nodesetp(arg):
|
||||
ids = (string_value(x) for x in arg)
|
||||
else:
|
||||
ids = [string(arg)]
|
||||
if node.nodeType != node.DOCUMENT_NODE:
|
||||
node = node.ownerDocument
|
||||
return list(filter(None, (node.getElementById(id) for id in ids)))
|
||||
|
||||
@function(0, 1, implicit=True, first=True)
|
||||
def f_local_name(self, node, pos, size, context, argnode):
|
||||
if argnode is None:
|
||||
return ''
|
||||
if (argnode.nodeType == argnode.ELEMENT_NODE or
|
||||
argnode.nodeType == argnode.ATTRIBUTE_NODE):
|
||||
return argnode.localName
|
||||
elif argnode.nodeType == argnode.PROCESSING_INSTRUCTION_NODE:
|
||||
return argnode.target
|
||||
return ''
|
||||
|
||||
@function(0, 1, implicit=True, first=True)
|
||||
def f_namespace_uri(self, node, pos, size, context, argnode):
|
||||
if argnode is None:
|
||||
return ''
|
||||
return argnode.namespaceURI
|
||||
|
||||
@function(0, 1, implicit=True, first=True)
|
||||
def f_name(self, node, pos, size, context, argnode):
|
||||
if argnode is None:
|
||||
return ''
|
||||
if argnode.nodeType == argnode.ELEMENT_NODE:
|
||||
return argnode.tagName
|
||||
elif argnode.nodeType == argnode.ATTRIBUTE_NODE:
|
||||
return argnode.name
|
||||
elif argnode.nodeType == argnode.PROCESSING_INSTRUCTION_NODE:
|
||||
return argnode.target
|
||||
return ''
|
||||
|
||||
# String Functions
|
||||
|
||||
@function(0, 1, implicit=True, convert=string)
|
||||
def f_string(self, node, pos, size, context, arg):
|
||||
return arg
|
||||
|
||||
@function(2, None, convert=string)
|
||||
def f_concat(self, node, pos, size, context, *args):
|
||||
return ''.join((x for x in args))
|
||||
|
||||
@function(2, 2, convert=string)
|
||||
def f_starts_with(self, node, pos, size, context, a, b):
|
||||
return a.startswith(b)
|
||||
|
||||
@function(2, 2, convert=string)
|
||||
def f_contains(self, node, pos, size, context, a, b):
|
||||
return b in a
|
||||
|
||||
@function(2, 2, convert=string)
|
||||
def f_substring_before(self, node, pos, size, context, a, b):
|
||||
try:
|
||||
return a[0:a.index(b)]
|
||||
except ValueError:
|
||||
return ''
|
||||
|
||||
@function(2, 2, convert=string)
|
||||
def f_substring_after(self, node, pos, size, context, a, b):
|
||||
try:
|
||||
return a[a.index(b)+len(b):]
|
||||
except ValueError:
|
||||
return ''
|
||||
|
||||
@function(2, 3)
|
||||
def f_substring(self, node, pos, size, context, s, start, count=None):
|
||||
s = string(s)
|
||||
start = round(number(start))
|
||||
if start != start:
|
||||
# Catch NaN
|
||||
return ''
|
||||
|
||||
if count is None:
|
||||
end = len(s) + 1
|
||||
else:
|
||||
end = start + round(number(count))
|
||||
if end != end:
|
||||
# Catch NaN
|
||||
return ''
|
||||
if end > len(s):
|
||||
end = len(s)+1
|
||||
|
||||
if start < 1:
|
||||
start = 1
|
||||
if start > len(s):
|
||||
return ''
|
||||
if end <= start:
|
||||
return ''
|
||||
return s[int(start)-1:int(end)-1]
|
||||
|
||||
@function(0, 1, implicit=True, convert=string)
|
||||
def f_string_length(self, node, pos, size, context, s):
|
||||
return len(s)
|
||||
|
||||
@function(0, 1, implicit=True, convert=string)
|
||||
def f_normalize_space(self, node, pos, size, context, s):
|
||||
return re.sub(r'\s+', ' ', s.strip())
|
||||
|
||||
@function(3, 3, convert=lambda x: unicode(string(x)))
|
||||
def f_translate(self, node, pos, size, context, s, source, target):
|
||||
# str.translate() and unicode.translate() are completely different.
|
||||
# The translate() arguments are coerced to unicode.
|
||||
table = {}
|
||||
for schar, tchar in izip(source, target):
|
||||
schar = ord(schar)
|
||||
if schar not in table:
|
||||
table[schar] = tchar
|
||||
if len(source) > len(target):
|
||||
for schar in source[len(target):]:
|
||||
schar = ord(schar)
|
||||
if schar not in table:
|
||||
table[schar] = None
|
||||
return s.translate(table)
|
||||
|
||||
# Boolean functions
|
||||
|
||||
@function(1, 1, convert=boolean)
|
||||
def f_boolean(self, node, pos, size, context, b):
|
||||
return b
|
||||
|
||||
@function(1, 1, convert=boolean)
|
||||
def f_not(self, node, pos, size, context, b):
|
||||
return not b
|
||||
|
||||
@function(0, 0)
|
||||
def f_true(self, node, pos, size, context):
|
||||
return True
|
||||
|
||||
@function(0, 0)
|
||||
def f_false(self, node, pos, size, context):
|
||||
return False
|
||||
|
||||
@function(1, 1, convert=string)
|
||||
def f_lang(self, node, pos, size, context, s):
|
||||
s = s.lower()
|
||||
for n in axes['ancestor-or-self'](node):
|
||||
if n.nodeType == n.ELEMENT_NODE and n.hasAttribute('xml:lang'):
|
||||
lang = n.getAttribute('xml:lang').lower()
|
||||
if s == lang or lang.startswith(s + u'-'):
|
||||
return True
|
||||
break
|
||||
return False
|
||||
|
||||
# Number functions
|
||||
|
||||
@function(0, 1, implicit=True, convert=number)
|
||||
def f_number(self, node, pos, size, context, n):
|
||||
return n
|
||||
|
||||
@function(1, 1, convert=nodeset)
|
||||
def f_sum(self, node, pos, size, context, nodes):
|
||||
return sum((number(string_value(x)) for x in nodes))
|
||||
|
||||
@function(1, 1, convert=number)
|
||||
def f_floor(self, node, pos, size, context, n):
|
||||
return math.floor(n)
|
||||
|
||||
@function(1, 1, convert=number)
|
||||
def f_ceiling(self, node, pos, size, context, n):
|
||||
return math.ceil(n)
|
||||
|
||||
@function(1, 1, convert=number)
|
||||
def f_round(self, node, pos, size, context, n):
|
||||
# XXX round(-0.0) should be -0.0, not 0.0.
|
||||
# XXX round(-1.5) should be -1.0, not -2.0.
|
||||
return round(n)
|
||||
|
||||
def __str__(self):
|
||||
return '%s(%s)' % (self.name, ', '.join((str(x) for x in self.args)))
|
||||
|
||||
#
|
||||
# XPath axes.
|
||||
#
|
||||
|
||||
# Dictionary of all axis functions.
|
||||
axes = {}
|
||||
|
||||
def axisfn(reverse=False, principal_node_type=xml.dom.Node.ELEMENT_NODE):
|
||||
"""Axis function decorator.
|
||||
|
||||
An axis function will take a node as an argument and return a sequence
|
||||
over the nodes along an XPath axis. Axis functions have two extra
|
||||
attributes indicating the axis direction and principal node type.
|
||||
"""
|
||||
def decorate(f):
|
||||
f.__name__ = f.__name__.replace('_', '-')
|
||||
f.reverse = reverse
|
||||
f.principal_node_type = principal_node_type
|
||||
return f
|
||||
return decorate
|
||||
|
||||
def make_axes():
|
||||
"""Define functions to walk each of the possible XPath axes."""
|
||||
|
||||
@axisfn()
|
||||
def child(node):
|
||||
return node.childNodes
|
||||
|
||||
@axisfn()
|
||||
def descendant(node):
|
||||
for child in node.childNodes:
|
||||
for node in descendant_or_self(child):
|
||||
yield node
|
||||
|
||||
@axisfn()
|
||||
def parent(node):
|
||||
if node.parentNode is not None:
|
||||
yield node.parentNode
|
||||
|
||||
@axisfn(reverse=True)
|
||||
def ancestor(node):
|
||||
while node.parentNode is not None:
|
||||
node = node.parentNode
|
||||
yield node
|
||||
|
||||
@axisfn()
|
||||
def following_sibling(node):
|
||||
while node.nextSibling is not None:
|
||||
node = node.nextSibling
|
||||
yield node
|
||||
|
||||
@axisfn(reverse=True)
|
||||
def preceding_sibling(node):
|
||||
while node.previousSibling is not None:
|
||||
node = node.previousSibling
|
||||
yield node
|
||||
|
||||
@axisfn()
|
||||
def following(node):
|
||||
while node is not None:
|
||||
while node.nextSibling is not None:
|
||||
node = node.nextSibling
|
||||
for n in descendant_or_self(node):
|
||||
yield n
|
||||
node = node.parentNode
|
||||
|
||||
@axisfn(reverse=True)
|
||||
def preceding(node):
|
||||
while node is not None:
|
||||
while node.previousSibling is not None:
|
||||
node = node.previousSibling
|
||||
# Could be more efficient here.
|
||||
for n in reversed(list(descendant_or_self(node))):
|
||||
yield n
|
||||
node = node.parentNode
|
||||
|
||||
@axisfn(principal_node_type=xml.dom.Node.ATTRIBUTE_NODE)
|
||||
def attribute(node):
|
||||
if node.attributes is not None:
|
||||
return (node.attributes.item(i)
|
||||
for i in xrange(node.attributes.length))
|
||||
return ()
|
||||
|
||||
@axisfn()
|
||||
def namespace(node):
|
||||
raise XPathNotImplementedError("namespace axis is not implemented")
|
||||
|
||||
@axisfn()
|
||||
def self(node):
|
||||
yield node
|
||||
|
||||
@axisfn()
|
||||
def descendant_or_self(node):
|
||||
yield node
|
||||
for child in node.childNodes:
|
||||
for node in descendant_or_self(child):
|
||||
yield node
|
||||
|
||||
@axisfn(reverse=True)
|
||||
def ancestor_or_self(node):
|
||||
return chain([node], ancestor(node))
|
||||
|
||||
# Place each axis function defined here into the 'axes' dict.
|
||||
for axis in locals().values():
|
||||
axes[axis.__name__] = axis
|
||||
|
||||
make_axes()
|
||||
|
||||
def merge_into_nodeset(target, source):
|
||||
"""Place all the nodes from the source node-set into the target
|
||||
node-set, preserving document order. Both node-sets must be in
|
||||
document order to begin with.
|
||||
|
||||
"""
|
||||
if len(target) == 0:
|
||||
target.extend(source)
|
||||
return
|
||||
|
||||
source = [n for n in source if n not in target]
|
||||
if len(source) == 0:
|
||||
return
|
||||
|
||||
# If the last node in the target set comes before the first node in the
|
||||
# source set, then we can just concatenate the sets. Otherwise, we
|
||||
# will need to sort. (We could also check to see if the last node in
|
||||
# the source set comes before the first node in the target set, but this
|
||||
# situation is very unlikely in practice.)
|
||||
if document_order(target[-1]) < document_order(source[0]):
|
||||
target.extend(source)
|
||||
else:
|
||||
target.extend(source)
|
||||
target.sort(key=document_order)
|
||||
|
||||
class AbsolutePathExpr(Expr):
|
||||
"""Absolute location paths."""
|
||||
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
|
||||
def evaluate(self, node, pos, size, context):
|
||||
if node.nodeType != node.DOCUMENT_NODE:
|
||||
node = node.ownerDocument
|
||||
if self.path is None:
|
||||
return [node]
|
||||
return self.path.evaluate(node, 1, 1, context)
|
||||
|
||||
def __str__(self):
|
||||
return '/%s' % (self.path or '')
|
||||
|
||||
class PathExpr(Expr):
|
||||
"""Location path expressions."""
|
||||
|
||||
def __init__(self, steps):
|
||||
self.steps = steps
|
||||
|
||||
def evaluate(self, node, pos, size, context):
|
||||
# The first step in the path is evaluated in the current context.
|
||||
# If this is the only step in the path, the return value is
|
||||
# unimportant. If there are other steps, however, it must be a
|
||||
# node-set.
|
||||
result = self.steps[0].evaluate(node, pos, size, context)
|
||||
if len(self.steps) > 1 and not nodesetp(result):
|
||||
raise XPathTypeError("path step is not a node-set")
|
||||
|
||||
# Subsequent steps are evaluated for each node in the node-set
|
||||
# resulting from the previous step.
|
||||
for step in self.steps[1:]:
|
||||
aggregate = []
|
||||
for i in xrange(len(result)):
|
||||
nodes = step.evaluate(result[i], i+1, len(result), context)
|
||||
if not nodesetp(nodes):
|
||||
raise XPathTypeError("path step is not a node-set")
|
||||
merge_into_nodeset(aggregate, nodes)
|
||||
result = aggregate
|
||||
|
||||
return result
|
||||
|
||||
def __str__(self):
|
||||
return '/'.join((str(s) for s in self.steps))
|
||||
|
||||
class PredicateList(Expr):
|
||||
"""A list of predicates.
|
||||
|
||||
Predicates are handled as an expression wrapping the expression
|
||||
filtered by the predicates.
|
||||
|
||||
"""
|
||||
def __init__(self, expr, predicates, axis='child'):
|
||||
self.predicates = predicates
|
||||
self.expr = expr
|
||||
self.axis = axes[axis]
|
||||
|
||||
def evaluate(self, node, pos, size, context):
|
||||
result = self.expr.evaluate(node, pos, size, context)
|
||||
if not nodesetp(result):
|
||||
raise XPathTypeError("predicate input is not a node-set")
|
||||
|
||||
if self.axis.reverse:
|
||||
result.reverse()
|
||||
|
||||
for pred in self.predicates:
|
||||
match = []
|
||||
for i, node in izip(count(1), result):
|
||||
r = pred.evaluate(node, i, len(result), context)
|
||||
|
||||
# If a predicate evaluates to a number, select the node
|
||||
# with that position. Otherwise, select nodes for which
|
||||
# the boolean value of the predicate is true.
|
||||
if numberp(r):
|
||||
if r == i:
|
||||
match.append(node)
|
||||
elif boolean(r):
|
||||
match.append(node)
|
||||
result = match
|
||||
|
||||
if self.axis.reverse:
|
||||
result.reverse()
|
||||
|
||||
return result
|
||||
|
||||
def __str__(self):
|
||||
s = str(self.expr)
|
||||
if '/' in s:
|
||||
s = '(%s)' % s
|
||||
return s + ''.join(('[%s]' % x for x in self.predicates))
|
||||
|
||||
class AxisStep(Expr):
|
||||
"""One step in a location path expression."""
|
||||
|
||||
def __init__(self, axis, test=None, predicates=None):
|
||||
if test is None:
|
||||
test = AnyKindTest()
|
||||
self.axis = axes[axis]
|
||||
self.test = test
|
||||
|
||||
def evaluate(self, node, pos, size, context):
|
||||
match = []
|
||||
for n in self.axis(node):
|
||||
if self.test.match(n, self.axis, context):
|
||||
match.append(n)
|
||||
|
||||
if self.axis.reverse:
|
||||
match.reverse()
|
||||
|
||||
return match
|
||||
|
||||
def __str__(self):
|
||||
return '%s::%s' % (self.axis.__name__, self.test)
|
||||
|
||||
#
|
||||
# Node tests.
|
||||
#
|
||||
|
||||
class Test(object):
|
||||
"""Abstract base class for node tests."""
|
||||
|
||||
def match(self, node, axis, context):
|
||||
"""Return True if 'node' matches the test along 'axis'."""
|
||||
|
||||
class NameTest(object):
|
||||
def __init__(self, prefix, localpart):
|
||||
self.prefix = prefix
|
||||
self.localName = localpart
|
||||
if self.prefix == None and self.localName == '*':
|
||||
self.prefix = '*'
|
||||
|
||||
def match(self, node, axis, context):
|
||||
if node.nodeType != axis.principal_node_type:
|
||||
return False
|
||||
|
||||
if self.prefix != '*':
|
||||
namespaceURI = None
|
||||
if self.prefix is not None:
|
||||
try:
|
||||
namespaceURI = context.namespaces[self.prefix]
|
||||
except KeyError:
|
||||
raise XPathUnknownPrefixError(self.prefix)
|
||||
elif axis.principal_node_type == node.ELEMENT_NODE:
|
||||
namespaceURI = context.default_namespace
|
||||
if namespaceURI != node.namespaceURI:
|
||||
return False
|
||||
if self.localName != '*':
|
||||
if self.localName != node.localName:
|
||||
return False
|
||||
return True
|
||||
|
||||
def __str__(self):
|
||||
if self.prefix is not None:
|
||||
return '%s:%s' % (self.prefix, self.localName)
|
||||
else:
|
||||
return self.localName
|
||||
|
||||
class PITest(object):
|
||||
def __init__(self, name=None):
|
||||
self.name = name
|
||||
|
||||
def match(self, node, axis, context):
|
||||
return (node.nodeType == node.PROCESSING_INSTRUCTION_NODE and
|
||||
(self.name is None or node.target == self.name))
|
||||
|
||||
def __str__(self):
|
||||
if self.name is None:
|
||||
name = ''
|
||||
elif "'" in self.name:
|
||||
name = '"%s"' % self.name
|
||||
else:
|
||||
name = "'%s'" % self.name
|
||||
return 'processing-instruction(%s)' % name
|
||||
|
||||
class CommentTest(object):
|
||||
def match(self, node, axis, context):
|
||||
return node.nodeType == node.COMMENT_NODE
|
||||
|
||||
def __str__(self):
|
||||
return 'comment()'
|
||||
|
||||
class TextTest(object):
|
||||
def match(self, node, axis, context):
|
||||
return node.nodeType == node.TEXT_NODE
|
||||
|
||||
def __str__(self):
|
||||
return 'text()'
|
||||
|
||||
class AnyKindTest(object):
|
||||
def match(self, node, axis, context):
|
||||
return True
|
||||
|
||||
def __str__(self):
|
||||
return 'node()'
|
|
@ -0,0 +1,252 @@
|
|||
import expr as X
|
||||
from yappsrt import *
|
||||
|
||||
%%
|
||||
|
||||
parser XPath:
|
||||
option: 'no-support-module'
|
||||
|
||||
ignore: r'\s+'
|
||||
token END: r'$'
|
||||
|
||||
token FORWARD_AXIS_NAME:
|
||||
r'child|descendant-or-self|attribute|self|descendant|following-sibling|following|namespace'
|
||||
token REVERSE_AXIS_NAME:
|
||||
r'parent|preceding-sibling|preceding|ancestor-or-self|ancestor'
|
||||
|
||||
# Dire hack here, since yapps2 has only one token of lookahead: NCNAME
|
||||
# does not match when followed by a open paren.
|
||||
token NCNAME: r'[a-zA-Z_][a-zA-Z0-9_\-\.\w]*(?!\()'
|
||||
token FUNCNAME: r'[a-zA-Z_][a-zA-Z0-9_\-\.\w]*'
|
||||
|
||||
token DQUOTE: r'\"(?:[^\"])*\"'
|
||||
token SQUOTE: r"\'(?:[^\'])*\'"
|
||||
token NUMBER: r'((\.[0-9]+)|([0-9]+(\.[0-9]*)?))([eE][\+\-]?[0-9]+)?'
|
||||
token EQ_COMP: r'\!?\='
|
||||
token REL_COMP: r'[\<\>]\=?'
|
||||
token ADD_COMP: r'[\+\-]'
|
||||
token MUL_COMP: r'\*|div|mod'
|
||||
|
||||
rule XPath:
|
||||
Expr END {{ return Expr }}
|
||||
|
||||
rule Expr:
|
||||
OrExpr {{ return OrExpr }}
|
||||
|
||||
rule OrExpr:
|
||||
AndExpr {{ Expr = AndExpr }}
|
||||
(
|
||||
r'or' AndExpr
|
||||
{{ Expr = X.OrExpr('or', Expr, AndExpr) }}
|
||||
)* {{ return Expr }}
|
||||
|
||||
rule AndExpr:
|
||||
EqualityExpr {{ Expr = EqualityExpr }}
|
||||
(
|
||||
r'and' EqualityExpr
|
||||
{{ Expr = X.AndExpr('and', Expr, EqualityExpr) }}
|
||||
)* {{ return Expr }}
|
||||
|
||||
rule EqualityExpr:
|
||||
RelationalExpr {{ Expr = RelationalExpr }}
|
||||
(
|
||||
EQ_COMP
|
||||
RelationalExpr
|
||||
{{ Expr = X.EqualityExpr(EQ_COMP, Expr, RelationalExpr) }}
|
||||
)* {{ return Expr }}
|
||||
|
||||
rule RelationalExpr:
|
||||
AdditiveExpr {{ Expr = AdditiveExpr }}
|
||||
(
|
||||
REL_COMP
|
||||
AdditiveExpr
|
||||
{{ Expr = X.EqualityExpr(REL_COMP, Expr, AdditiveExpr) }}
|
||||
)* {{ return Expr }}
|
||||
|
||||
rule AdditiveExpr:
|
||||
MultiplicativeExpr {{ Expr = MultiplicativeExpr }}
|
||||
(
|
||||
ADD_COMP
|
||||
MultiplicativeExpr
|
||||
{{ Expr = X.ArithmeticalExpr(ADD_COMP, Expr, MultiplicativeExpr) }}
|
||||
)* {{ return Expr }}
|
||||
|
||||
rule MultiplicativeExpr:
|
||||
UnionExpr {{ Expr = UnionExpr }}
|
||||
(
|
||||
MUL_COMP
|
||||
UnionExpr
|
||||
{{ Expr = X.ArithmeticalExpr(MUL_COMP, Expr, UnionExpr) }}
|
||||
)* {{ return Expr }}
|
||||
|
||||
rule UnionExpr:
|
||||
UnaryExpr {{ Expr = UnaryExpr }}
|
||||
(
|
||||
'\|' UnaryExpr
|
||||
{{ Expr = X.UnionExpr('|', Expr, UnaryExpr) }}
|
||||
)* {{ return Expr }}
|
||||
|
||||
rule UnaryExpr:
|
||||
r'\-' ValueExpr {{ return X.NegationExpr(ValueExpr) }}
|
||||
| ValueExpr {{ return ValueExpr }}
|
||||
|
||||
rule ValueExpr:
|
||||
PathExpr {{ return PathExpr }}
|
||||
|
||||
rule PathExpr:
|
||||
r'\/' {{ path = None }}
|
||||
[
|
||||
RelativePathExpr {{ path = RelativePathExpr }}
|
||||
] {{ return X.AbsolutePathExpr(path) }}
|
||||
| r'\/\/' RelativePathExpr
|
||||
{{ step = X.AxisStep('descendant-or-self') }}
|
||||
{{ RelativePathExpr.steps.insert(0, step) }}
|
||||
{{ return X.AbsolutePathExpr(RelativePathExpr) }}
|
||||
| RelativePathExpr {{ return RelativePathExpr }}
|
||||
|
||||
rule RelativePathExpr:
|
||||
StepExpr {{ steps = [StepExpr] }}
|
||||
(
|
||||
(
|
||||
r'\/'
|
||||
| r'\/\/'
|
||||
{{ steps.append(X.AxisStep('descendant-or-self')) }}
|
||||
)
|
||||
StepExpr {{ steps.append(StepExpr) }}
|
||||
)*
|
||||
{{ return X.PathExpr(steps) }}
|
||||
|
||||
rule StepExpr:
|
||||
AxisStep {{ return AxisStep }}
|
||||
| FilterExpr {{ return FilterExpr }}
|
||||
|
||||
rule AxisStep:
|
||||
(
|
||||
ForwardStep {{ step = ForwardStep }}
|
||||
| ReverseStep {{ step = ReverseStep }}
|
||||
) {{ expr = X.AxisStep(*step) }}
|
||||
[
|
||||
PredicateList
|
||||
{{ expr = X.PredicateList(expr, PredicateList, step[0]) }}
|
||||
]
|
||||
{{ return expr }}
|
||||
|
||||
rule ForwardStep:
|
||||
ForwardAxis NodeTest {{ return [ForwardAxis, NodeTest] }}
|
||||
| AbbrevForwardStep {{ return AbbrevForwardStep }}
|
||||
|
||||
rule ForwardAxis:
|
||||
FORWARD_AXIS_NAME r'::' {{ return FORWARD_AXIS_NAME }}
|
||||
|
||||
rule AbbrevForwardStep:
|
||||
{{ axis = 'child' }}
|
||||
[
|
||||
r'@' {{ axis = 'attribute' }}
|
||||
]
|
||||
NodeTest {{ return [axis, NodeTest] }}
|
||||
|
||||
rule ReverseStep:
|
||||
ReverseAxis NodeTest {{ return [ReverseAxis, NodeTest] }}
|
||||
| AbbrevReverseStep {{ return AbbrevReverseStep }}
|
||||
|
||||
rule ReverseAxis:
|
||||
REVERSE_AXIS_NAME r'::' {{ return REVERSE_AXIS_NAME }}
|
||||
|
||||
rule AbbrevReverseStep:
|
||||
r'\.\.' {{ return ['parent', None] }}
|
||||
|
||||
rule NodeTest:
|
||||
KindTest {{ return KindTest }}
|
||||
| NameTest {{ return NameTest }}
|
||||
|
||||
rule NameTest:
|
||||
# We also support the XPath 2.0 <name>:*.
|
||||
{{ prefix = None }}
|
||||
WildcardOrNCName {{ localpart = WildcardOrNCName }}
|
||||
[
|
||||
r':' WildcardOrNCName {{ prefix = localpart }}
|
||||
{{ localpart = WildcardOrNCName }}
|
||||
]
|
||||
{{ return X.NameTest(prefix, localpart) }}
|
||||
|
||||
rule WildcardOrNCName:
|
||||
r'\*' {{ return '*' }}
|
||||
| NCNAME {{ return NCNAME }}
|
||||
|
||||
rule FilterExpr:
|
||||
PrimaryExpr
|
||||
[
|
||||
PredicateList
|
||||
{{ PrimaryExpr = X.PredicateList(PrimaryExpr,PredicateList) }}
|
||||
] {{ return PrimaryExpr }}
|
||||
|
||||
rule PredicateList:
|
||||
Predicate {{ predicates = [Predicate] }}
|
||||
(
|
||||
Predicate {{ predicates.append(Predicate) }}
|
||||
)* {{ return predicates }}
|
||||
|
||||
rule Predicate:
|
||||
r'\[' Expr r'\]' {{ return Expr }}
|
||||
|
||||
rule PrimaryExpr:
|
||||
Literal {{ return X.LiteralExpr(Literal) }}
|
||||
| VariableReference {{ return VariableReference }}
|
||||
| r'\(' Expr r'\)' {{ return Expr }}
|
||||
| ContextItemExpr {{ return ContextItemExpr }}
|
||||
| FunctionCall {{ return FunctionCall }}
|
||||
|
||||
rule VariableReference:
|
||||
r'\$' QName
|
||||
{{ return X.VariableReference(*QName) }}
|
||||
|
||||
rule ContextItemExpr:
|
||||
r'\.' {{ return X.AxisStep('self') }}
|
||||
|
||||
rule FunctionCall:
|
||||
FUNCNAME r'\(' {{ args = [] }}
|
||||
[
|
||||
Expr {{ args.append(Expr) }}
|
||||
(
|
||||
r'\,' Expr {{ args.append(Expr) }}
|
||||
)*
|
||||
] r'\)' {{ return X.Function(FUNCNAME, args) }}
|
||||
|
||||
rule KindTest:
|
||||
PITest {{ return PITest }}
|
||||
| CommentTest {{ return CommentTest }}
|
||||
| TextTest {{ return TextTest }}
|
||||
| AnyKindTest {{ return AnyKindTest }}
|
||||
|
||||
rule PITest:
|
||||
r'processing-instruction' {{ name = None }}
|
||||
r'\(' [
|
||||
NCNAME {{ name = NCNAME }}
|
||||
| StringLiteral {{ name = StringLiteral }}
|
||||
] r'\)' {{ return X.PITest(name) }}
|
||||
|
||||
rule CommentTest:
|
||||
r'comment' r'\(' r'\)' {{ return X.CommentTest() }}
|
||||
|
||||
rule TextTest:
|
||||
r'text' r'\(' r'\)' {{ return X.TextTest() }}
|
||||
|
||||
rule AnyKindTest:
|
||||
r'node' r'\(' r'\)' {{ return X.AnyKindTest() }}
|
||||
|
||||
rule Literal:
|
||||
NumericLiteral {{ return NumericLiteral }}
|
||||
| StringLiteral {{ return StringLiteral }}
|
||||
|
||||
rule NumericLiteral:
|
||||
NUMBER {{ return float(NUMBER) }}
|
||||
|
||||
rule StringLiteral:
|
||||
DQUOTE {{ return DQUOTE[1:-1] }}
|
||||
| SQUOTE {{ return SQUOTE[1:-1] }}
|
||||
|
||||
rule QName:
|
||||
NCNAME {{ name = NCNAME }}
|
||||
[
|
||||
r'\:' NCNAME {{ return (name, NCNAME) }}
|
||||
] {{ return (None, name) }}
|
|
@ -0,0 +1,420 @@
|
|||
import expr as X
|
||||
from yappsrt import *
|
||||
|
||||
|
||||
from string import *
|
||||
import re
|
||||
|
||||
class XPathScanner(Scanner):
|
||||
patterns = [
|
||||
("r'\\:'", re.compile('\\:')),
|
||||
("r'node'", re.compile('node')),
|
||||
("r'text'", re.compile('text')),
|
||||
("r'comment'", re.compile('comment')),
|
||||
("r'processing-instruction'", re.compile('processing-instruction')),
|
||||
("r'\\,'", re.compile('\\,')),
|
||||
("r'\\.'", re.compile('\\.')),
|
||||
("r'\\$'", re.compile('\\$')),
|
||||
("r'\\)'", re.compile('\\)')),
|
||||
("r'\\('", re.compile('\\(')),
|
||||
("r'\\]'", re.compile('\\]')),
|
||||
("r'\\['", re.compile('\\[')),
|
||||
("r'\\*'", re.compile('\\*')),
|
||||
("r':'", re.compile(':')),
|
||||
("r'\\.\\.'", re.compile('\\.\\.')),
|
||||
("r'@'", re.compile('@')),
|
||||
("r'::'", re.compile('::')),
|
||||
("r'\\/\\/'", re.compile('\\/\\/')),
|
||||
("r'\\/'", re.compile('\\/')),
|
||||
("r'\\-'", re.compile('\\-')),
|
||||
("'\\|'", re.compile('\\|')),
|
||||
("r'and'", re.compile('and')),
|
||||
("r'or'", re.compile('or')),
|
||||
('\\s+', re.compile('\\s+')),
|
||||
('END', re.compile('$')),
|
||||
('FORWARD_AXIS_NAME', re.compile('child|descendant-or-self|attribute|self|descendant|following-sibling|following|namespace')),
|
||||
('REVERSE_AXIS_NAME', re.compile('parent|preceding-sibling|preceding|ancestor-or-self|ancestor')),
|
||||
('NCNAME', re.compile('[a-zA-Z_][a-zA-Z0-9_\\-\\.\\w]*(?!\\()')),
|
||||
('FUNCNAME', re.compile('[a-zA-Z_][a-zA-Z0-9_\\-\\.\\w]*')),
|
||||
('DQUOTE', re.compile('\\"(?:[^\\"])*\\"')),
|
||||
('SQUOTE', re.compile("\\'(?:[^\\'])*\\'")),
|
||||
('NUMBER', re.compile('((\\.[0-9]+)|([0-9]+(\\.[0-9]*)?))([eE][\\+\\-]?[0-9]+)?')),
|
||||
('EQ_COMP', re.compile('\\!?\\=')),
|
||||
('REL_COMP', re.compile('[\\<\\>]\\=?')),
|
||||
('ADD_COMP', re.compile('[\\+\\-]')),
|
||||
('MUL_COMP', re.compile('\\*|div|mod')),
|
||||
]
|
||||
def __init__(self, str):
|
||||
Scanner.__init__(self,None,['\\s+'],str)
|
||||
|
||||
class XPath(Parser):
|
||||
def XPath(self):
|
||||
Expr = self.Expr()
|
||||
END = self._scan('END')
|
||||
return Expr
|
||||
|
||||
def Expr(self):
|
||||
OrExpr = self.OrExpr()
|
||||
return OrExpr
|
||||
|
||||
def OrExpr(self):
|
||||
AndExpr = self.AndExpr()
|
||||
Expr = AndExpr
|
||||
while self._peek("r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'or'":
|
||||
self._scan("r'or'")
|
||||
AndExpr = self.AndExpr()
|
||||
Expr = X.OrExpr('or', Expr, AndExpr)
|
||||
return Expr
|
||||
|
||||
def AndExpr(self):
|
||||
EqualityExpr = self.EqualityExpr()
|
||||
Expr = EqualityExpr
|
||||
while self._peek("r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'and'":
|
||||
self._scan("r'and'")
|
||||
EqualityExpr = self.EqualityExpr()
|
||||
Expr = X.AndExpr('and', Expr, EqualityExpr)
|
||||
return Expr
|
||||
|
||||
def EqualityExpr(self):
|
||||
RelationalExpr = self.RelationalExpr()
|
||||
Expr = RelationalExpr
|
||||
while self._peek('EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'EQ_COMP':
|
||||
EQ_COMP = self._scan('EQ_COMP')
|
||||
RelationalExpr = self.RelationalExpr()
|
||||
Expr = X.EqualityExpr(EQ_COMP, Expr, RelationalExpr)
|
||||
return Expr
|
||||
|
||||
def RelationalExpr(self):
|
||||
AdditiveExpr = self.AdditiveExpr()
|
||||
Expr = AdditiveExpr
|
||||
while self._peek('REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'REL_COMP':
|
||||
REL_COMP = self._scan('REL_COMP')
|
||||
AdditiveExpr = self.AdditiveExpr()
|
||||
Expr = X.EqualityExpr(REL_COMP, Expr, AdditiveExpr)
|
||||
return Expr
|
||||
|
||||
def AdditiveExpr(self):
|
||||
MultiplicativeExpr = self.MultiplicativeExpr()
|
||||
Expr = MultiplicativeExpr
|
||||
while self._peek('ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'ADD_COMP':
|
||||
ADD_COMP = self._scan('ADD_COMP')
|
||||
MultiplicativeExpr = self.MultiplicativeExpr()
|
||||
Expr = X.ArithmeticalExpr(ADD_COMP, Expr, MultiplicativeExpr)
|
||||
return Expr
|
||||
|
||||
def MultiplicativeExpr(self):
|
||||
UnionExpr = self.UnionExpr()
|
||||
Expr = UnionExpr
|
||||
while self._peek('MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'MUL_COMP':
|
||||
MUL_COMP = self._scan('MUL_COMP')
|
||||
UnionExpr = self.UnionExpr()
|
||||
Expr = X.ArithmeticalExpr(MUL_COMP, Expr, UnionExpr)
|
||||
return Expr
|
||||
|
||||
def UnionExpr(self):
|
||||
UnaryExpr = self.UnaryExpr()
|
||||
Expr = UnaryExpr
|
||||
while self._peek("'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "'\\|'":
|
||||
self._scan("'\\|'")
|
||||
UnaryExpr = self.UnaryExpr()
|
||||
Expr = X.UnionExpr('|', Expr, UnaryExpr)
|
||||
return Expr
|
||||
|
||||
def UnaryExpr(self):
|
||||
_token_ = self._peek("r'\\-'", "r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
|
||||
if _token_ == "r'\\-'":
|
||||
self._scan("r'\\-'")
|
||||
ValueExpr = self.ValueExpr()
|
||||
return X.NegationExpr(ValueExpr)
|
||||
else:
|
||||
ValueExpr = self.ValueExpr()
|
||||
return ValueExpr
|
||||
|
||||
def ValueExpr(self):
|
||||
PathExpr = self.PathExpr()
|
||||
return PathExpr
|
||||
|
||||
def PathExpr(self):
|
||||
_token_ = self._peek("r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
|
||||
if _token_ == "r'\\/'":
|
||||
self._scan("r'\\/'")
|
||||
path = None
|
||||
if self._peek("r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME', "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") not in ["'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'"]:
|
||||
RelativePathExpr = self.RelativePathExpr()
|
||||
path = RelativePathExpr
|
||||
return X.AbsolutePathExpr(path)
|
||||
elif _token_ == "r'\\/\\/'":
|
||||
self._scan("r'\\/\\/'")
|
||||
RelativePathExpr = self.RelativePathExpr()
|
||||
step = X.AxisStep('descendant-or-self')
|
||||
RelativePathExpr.steps.insert(0, step)
|
||||
return X.AbsolutePathExpr(RelativePathExpr)
|
||||
else:
|
||||
RelativePathExpr = self.RelativePathExpr()
|
||||
return RelativePathExpr
|
||||
|
||||
def RelativePathExpr(self):
|
||||
StepExpr = self.StepExpr()
|
||||
steps = [StepExpr]
|
||||
while self._peek("r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") in ["r'\\/'", "r'\\/\\/'"]:
|
||||
_token_ = self._peek("r'\\/'", "r'\\/\\/'")
|
||||
if _token_ == "r'\\/'":
|
||||
self._scan("r'\\/'")
|
||||
else:# == "r'\\/\\/'"
|
||||
self._scan("r'\\/\\/'")
|
||||
steps.append(X.AxisStep('descendant-or-self'))
|
||||
StepExpr = self.StepExpr()
|
||||
steps.append(StepExpr)
|
||||
return X.PathExpr(steps)
|
||||
|
||||
def StepExpr(self):
|
||||
_token_ = self._peek("r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
|
||||
if _token_ not in ["r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE']:
|
||||
AxisStep = self.AxisStep()
|
||||
return AxisStep
|
||||
else:
|
||||
FilterExpr = self.FilterExpr()
|
||||
return FilterExpr
|
||||
|
||||
def AxisStep(self):
|
||||
_token_ = self._peek('FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
|
||||
if _token_ not in ['REVERSE_AXIS_NAME', "r'\\.\\.'"]:
|
||||
ForwardStep = self.ForwardStep()
|
||||
step = ForwardStep
|
||||
else:# in ['REVERSE_AXIS_NAME', "r'\\.\\.'"]
|
||||
ReverseStep = self.ReverseStep()
|
||||
step = ReverseStep
|
||||
expr = X.AxisStep(*step)
|
||||
if self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['":
|
||||
PredicateList = self.PredicateList()
|
||||
expr = X.PredicateList(expr, PredicateList, step[0])
|
||||
return expr
|
||||
|
||||
def ForwardStep(self):
|
||||
_token_ = self._peek('FORWARD_AXIS_NAME', "r'@'", "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
|
||||
if _token_ == 'FORWARD_AXIS_NAME':
|
||||
ForwardAxis = self.ForwardAxis()
|
||||
NodeTest = self.NodeTest()
|
||||
return [ForwardAxis, NodeTest]
|
||||
else:
|
||||
AbbrevForwardStep = self.AbbrevForwardStep()
|
||||
return AbbrevForwardStep
|
||||
|
||||
def ForwardAxis(self):
|
||||
FORWARD_AXIS_NAME = self._scan('FORWARD_AXIS_NAME')
|
||||
self._scan("r'::'")
|
||||
return FORWARD_AXIS_NAME
|
||||
|
||||
def AbbrevForwardStep(self):
|
||||
axis = 'child'
|
||||
if self._peek("r'@'", "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME') == "r'@'":
|
||||
self._scan("r'@'")
|
||||
axis = 'attribute'
|
||||
NodeTest = self.NodeTest()
|
||||
return [axis, NodeTest]
|
||||
|
||||
def ReverseStep(self):
|
||||
_token_ = self._peek('REVERSE_AXIS_NAME', "r'\\.\\.'")
|
||||
if _token_ == 'REVERSE_AXIS_NAME':
|
||||
ReverseAxis = self.ReverseAxis()
|
||||
NodeTest = self.NodeTest()
|
||||
return [ReverseAxis, NodeTest]
|
||||
else:# == "r'\\.\\.'"
|
||||
AbbrevReverseStep = self.AbbrevReverseStep()
|
||||
return AbbrevReverseStep
|
||||
|
||||
def ReverseAxis(self):
|
||||
REVERSE_AXIS_NAME = self._scan('REVERSE_AXIS_NAME')
|
||||
self._scan("r'::'")
|
||||
return REVERSE_AXIS_NAME
|
||||
|
||||
def AbbrevReverseStep(self):
|
||||
self._scan("r'\\.\\.'")
|
||||
return ['parent', None]
|
||||
|
||||
def NodeTest(self):
|
||||
_token_ = self._peek("r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
|
||||
if _token_ not in ["r'\\*'", 'NCNAME']:
|
||||
KindTest = self.KindTest()
|
||||
return KindTest
|
||||
else:# in ["r'\\*'", 'NCNAME']
|
||||
NameTest = self.NameTest()
|
||||
return NameTest
|
||||
|
||||
def NameTest(self):
|
||||
prefix = None
|
||||
WildcardOrNCName = self.WildcardOrNCName()
|
||||
localpart = WildcardOrNCName
|
||||
if self._peek("r':'", "r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r':'":
|
||||
self._scan("r':'")
|
||||
WildcardOrNCName = self.WildcardOrNCName()
|
||||
prefix = localpart
|
||||
localpart = WildcardOrNCName
|
||||
return X.NameTest(prefix, localpart)
|
||||
|
||||
def WildcardOrNCName(self):
|
||||
_token_ = self._peek("r'\\*'", 'NCNAME')
|
||||
if _token_ == "r'\\*'":
|
||||
self._scan("r'\\*'")
|
||||
return '*'
|
||||
else:# == 'NCNAME'
|
||||
NCNAME = self._scan('NCNAME')
|
||||
return NCNAME
|
||||
|
||||
def FilterExpr(self):
|
||||
PrimaryExpr = self.PrimaryExpr()
|
||||
if self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['":
|
||||
PredicateList = self.PredicateList()
|
||||
PrimaryExpr = X.PredicateList(PrimaryExpr,PredicateList)
|
||||
return PrimaryExpr
|
||||
|
||||
def PredicateList(self):
|
||||
Predicate = self.Predicate()
|
||||
predicates = [Predicate]
|
||||
while self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['":
|
||||
Predicate = self.Predicate()
|
||||
predicates.append(Predicate)
|
||||
return predicates
|
||||
|
||||
def Predicate(self):
|
||||
self._scan("r'\\['")
|
||||
Expr = self.Expr()
|
||||
self._scan("r'\\]'")
|
||||
return Expr
|
||||
|
||||
def PrimaryExpr(self):
|
||||
_token_ = self._peek("r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE')
|
||||
if _token_ not in ["r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME']:
|
||||
Literal = self.Literal()
|
||||
return X.LiteralExpr(Literal)
|
||||
elif _token_ == "r'\\$'":
|
||||
VariableReference = self.VariableReference()
|
||||
return VariableReference
|
||||
elif _token_ == "r'\\('":
|
||||
self._scan("r'\\('")
|
||||
Expr = self.Expr()
|
||||
self._scan("r'\\)'")
|
||||
return Expr
|
||||
elif _token_ == "r'\\.'":
|
||||
ContextItemExpr = self.ContextItemExpr()
|
||||
return ContextItemExpr
|
||||
else:# == 'FUNCNAME'
|
||||
FunctionCall = self.FunctionCall()
|
||||
return FunctionCall
|
||||
|
||||
def VariableReference(self):
|
||||
self._scan("r'\\$'")
|
||||
QName = self.QName()
|
||||
return X.VariableReference(*QName)
|
||||
|
||||
def ContextItemExpr(self):
|
||||
self._scan("r'\\.'")
|
||||
return X.AxisStep('self')
|
||||
|
||||
def FunctionCall(self):
|
||||
FUNCNAME = self._scan('FUNCNAME')
|
||||
self._scan("r'\\('")
|
||||
args = []
|
||||
if self._peek("r'\\,'", "r'\\)'", "r'\\-'", "r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME') not in ["r'\\,'", "r'\\)'"]:
|
||||
Expr = self.Expr()
|
||||
args.append(Expr)
|
||||
while self._peek("r'\\,'", "r'\\)'") == "r'\\,'":
|
||||
self._scan("r'\\,'")
|
||||
Expr = self.Expr()
|
||||
args.append(Expr)
|
||||
self._scan("r'\\)'")
|
||||
return X.Function(FUNCNAME, args)
|
||||
|
||||
def KindTest(self):
|
||||
_token_ = self._peek("r'processing-instruction'", "r'comment'", "r'text'", "r'node'")
|
||||
if _token_ == "r'processing-instruction'":
|
||||
PITest = self.PITest()
|
||||
return PITest
|
||||
elif _token_ == "r'comment'":
|
||||
CommentTest = self.CommentTest()
|
||||
return CommentTest
|
||||
elif _token_ == "r'text'":
|
||||
TextTest = self.TextTest()
|
||||
return TextTest
|
||||
else:# == "r'node'"
|
||||
AnyKindTest = self.AnyKindTest()
|
||||
return AnyKindTest
|
||||
|
||||
def PITest(self):
|
||||
self._scan("r'processing-instruction'")
|
||||
name = None
|
||||
self._scan("r'\\('")
|
||||
if self._peek('NCNAME', "r'\\)'", 'DQUOTE', 'SQUOTE') != "r'\\)'":
|
||||
_token_ = self._peek('NCNAME', 'DQUOTE', 'SQUOTE')
|
||||
if _token_ == 'NCNAME':
|
||||
NCNAME = self._scan('NCNAME')
|
||||
name = NCNAME
|
||||
else:# in ['DQUOTE', 'SQUOTE']
|
||||
StringLiteral = self.StringLiteral()
|
||||
name = StringLiteral
|
||||
self._scan("r'\\)'")
|
||||
return X.PITest(name)
|
||||
|
||||
def CommentTest(self):
|
||||
self._scan("r'comment'")
|
||||
self._scan("r'\\('")
|
||||
self._scan("r'\\)'")
|
||||
return X.CommentTest()
|
||||
|
||||
def TextTest(self):
|
||||
self._scan("r'text'")
|
||||
self._scan("r'\\('")
|
||||
self._scan("r'\\)'")
|
||||
return X.TextTest()
|
||||
|
||||
def AnyKindTest(self):
|
||||
self._scan("r'node'")
|
||||
self._scan("r'\\('")
|
||||
self._scan("r'\\)'")
|
||||
return X.AnyKindTest()
|
||||
|
||||
def Literal(self):
|
||||
_token_ = self._peek('NUMBER', 'DQUOTE', 'SQUOTE')
|
||||
if _token_ == 'NUMBER':
|
||||
NumericLiteral = self.NumericLiteral()
|
||||
return NumericLiteral
|
||||
else:# in ['DQUOTE', 'SQUOTE']
|
||||
StringLiteral = self.StringLiteral()
|
||||
return StringLiteral
|
||||
|
||||
def NumericLiteral(self):
|
||||
NUMBER = self._scan('NUMBER')
|
||||
return float(NUMBER)
|
||||
|
||||
def StringLiteral(self):
|
||||
_token_ = self._peek('DQUOTE', 'SQUOTE')
|
||||
if _token_ == 'DQUOTE':
|
||||
DQUOTE = self._scan('DQUOTE')
|
||||
return DQUOTE[1:-1]
|
||||
else:# == 'SQUOTE'
|
||||
SQUOTE = self._scan('SQUOTE')
|
||||
return SQUOTE[1:-1]
|
||||
|
||||
def QName(self):
|
||||
NCNAME = self._scan('NCNAME')
|
||||
name = NCNAME
|
||||
if self._peek("r'\\:'", "r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\:'":
|
||||
self._scan("r'\\:'")
|
||||
NCNAME = self._scan('NCNAME')
|
||||
return (name, NCNAME)
|
||||
return (None, name)
|
||||
|
||||
|
||||
def parse(rule, text):
|
||||
P = XPath(XPathScanner(text))
|
||||
return wrap_error_reporter(P, rule)
|
||||
|
||||
if __name__ == '__main__':
|
||||
from sys import argv, stdin
|
||||
if len(argv) >= 2:
|
||||
if len(argv) >= 3:
|
||||
f = open(argv[2],'r')
|
||||
else:
|
||||
f = stdin
|
||||
print parse(argv[1], f.read())
|
||||
else: print 'Args: <rule> [<filename>]'
|
|
@ -0,0 +1,174 @@
|
|||
# Yapps 2.0 Runtime
|
||||
#
|
||||
# This module is needed to run generated parsers.
|
||||
|
||||
from string import join, count, find, rfind
|
||||
import re
|
||||
|
||||
class SyntaxError(Exception):
|
||||
"""When we run into an unexpected token, this is the exception to use"""
|
||||
def __init__(self, pos=-1, msg="Bad Token"):
|
||||
Exception.__init__(self)
|
||||
self.pos = pos
|
||||
self.msg = msg
|
||||
def __repr__(self):
|
||||
if self.pos < 0: return "#<syntax-error>"
|
||||
else: return "SyntaxError[@ char %s: %s]" % (repr(self.pos), self.msg)
|
||||
|
||||
class NoMoreTokens(Exception):
|
||||
"""Another exception object, for when we run out of tokens"""
|
||||
pass
|
||||
|
||||
class Scanner:
|
||||
def __init__(self, patterns, ignore, input):
|
||||
"""Patterns is [(terminal,regex)...]
|
||||
Ignore is [terminal,...];
|
||||
Input is a string"""
|
||||
self.tokens = []
|
||||
self.restrictions = []
|
||||
self.input = input
|
||||
self.pos = 0
|
||||
self.ignore = ignore
|
||||
# The stored patterns are a pair (compiled regex,source
|
||||
# regex). If the patterns variable passed in to the
|
||||
# constructor is None, we assume that the class already has a
|
||||
# proper .patterns list constructed
|
||||
if patterns is not None:
|
||||
self.patterns = []
|
||||
for k, r in patterns:
|
||||
self.patterns.append( (k, re.compile(r)) )
|
||||
|
||||
def token(self, i, restrict=0):
|
||||
"""Get the i'th token, and if i is one past the end, then scan
|
||||
for another token; restrict is a list of tokens that
|
||||
are allowed, or 0 for any token."""
|
||||
if i == len(self.tokens): self.scan(restrict)
|
||||
if i < len(self.tokens):
|
||||
# Make sure the restriction is more restricted
|
||||
if restrict and self.restrictions[i]:
|
||||
for r in restrict:
|
||||
if r not in self.restrictions[i]:
|
||||
raise NotImplementedError("Unimplemented: restriction set changed")
|
||||
return self.tokens[i]
|
||||
raise NoMoreTokens()
|
||||
|
||||
def __repr__(self):
|
||||
"""Print the last 10 tokens that have been scanned in"""
|
||||
output = ''
|
||||
for t in self.tokens[-10:]:
|
||||
output = '%s\n (@%s) %s = %s' % (output,t[0],t[2],repr(t[3]))
|
||||
return output
|
||||
|
||||
def scan(self, restrict):
|
||||
"""Should scan another token and add it to the list, self.tokens,
|
||||
and add the restriction to self.restrictions"""
|
||||
# Keep looking for a token, ignoring any in self.ignore
|
||||
while 1:
|
||||
# Search the patterns for the longest match, with earlier
|
||||
# tokens in the list having preference
|
||||
best_match = -1
|
||||
best_pat = '(error)'
|
||||
for p, regexp in self.patterns:
|
||||
# First check to see if we're ignoring this token
|
||||
if restrict and p not in restrict and p not in self.ignore:
|
||||
continue
|
||||
m = regexp.match(self.input, self.pos)
|
||||
if m and len(m.group(0)) > best_match:
|
||||
# We got a match that's better than the previous one
|
||||
best_pat = p
|
||||
best_match = len(m.group(0))
|
||||
|
||||
# If we didn't find anything, raise an error
|
||||
if best_pat == '(error)' and best_match < 0:
|
||||
msg = "Bad Token"
|
||||
if restrict:
|
||||
msg = "Trying to find one of "+join(restrict,", ")
|
||||
raise SyntaxError(self.pos, msg)
|
||||
|
||||
# If we found something that isn't to be ignored, return it
|
||||
if best_pat not in self.ignore:
|
||||
# Create a token with this data
|
||||
token = (self.pos, self.pos+best_match, best_pat,
|
||||
self.input[self.pos:self.pos+best_match])
|
||||
self.pos = self.pos + best_match
|
||||
# Only add this token if it's not in the list
|
||||
# (to prevent looping)
|
||||
if not self.tokens or token != self.tokens[-1]:
|
||||
self.tokens.append(token)
|
||||
self.restrictions.append(restrict)
|
||||
return
|
||||
else:
|
||||
# This token should be ignored ..
|
||||
self.pos = self.pos + best_match
|
||||
|
||||
class Parser:
|
||||
def __init__(self, scanner):
|
||||
self._scanner = scanner
|
||||
self._pos = 0
|
||||
|
||||
def _peek(self, *types):
|
||||
"""Returns the token type for lookahead; if there are any args
|
||||
then the list of args is the set of token types to allow"""
|
||||
tok = self._scanner.token(self._pos, types)
|
||||
return tok[2]
|
||||
|
||||
def _scan(self, type):
|
||||
"""Returns the matched text, and moves to the next token"""
|
||||
tok = self._scanner.token(self._pos, [type])
|
||||
if tok[2] != type:
|
||||
raise SyntaxError(tok[0], 'Trying to find '+type)
|
||||
self._pos = 1+self._pos
|
||||
return tok[3]
|
||||
|
||||
|
||||
|
||||
def print_error(input, err, scanner):
|
||||
"""This is a really dumb long function to print error messages nicely."""
|
||||
p = err.pos
|
||||
# Figure out the line number
|
||||
line = count(input[:p], '\n')
|
||||
print err.msg+" on line "+repr(line+1)+":"
|
||||
# Now try printing part of the line
|
||||
text = input[max(p-80, 0):p+80]
|
||||
p = p - max(p-80, 0)
|
||||
|
||||
# Strip to the left
|
||||
i = rfind(text[:p], '\n')
|
||||
j = rfind(text[:p], '\r')
|
||||
if i < 0 or (0 <= j < i): i = j
|
||||
if 0 <= i < p:
|
||||
p = p - i - 1
|
||||
text = text[i+1:]
|
||||
|
||||
# Strip to the right
|
||||
i = find(text,'\n', p)
|
||||
j = find(text,'\r', p)
|
||||
if i < 0 or (0 <= j < i): i = j
|
||||
if i >= 0:
|
||||
text = text[:i]
|
||||
|
||||
# Now shorten the text
|
||||
while len(text) > 70 and p > 60:
|
||||
# Cut off 10 chars
|
||||
text = "..." + text[10:]
|
||||
p = p - 7
|
||||
|
||||
# Now print the string, along with an indicator
|
||||
print '> ',text
|
||||
print '> ',' '*p + '^'
|
||||
print 'List of nearby tokens:', scanner
|
||||
|
||||
def wrap_error_reporter(parser, rule):
|
||||
return_value = None
|
||||
try:
|
||||
return_value = getattr(parser, rule)()
|
||||
except SyntaxError, s:
|
||||
input = parser._scanner.input
|
||||
try:
|
||||
print_error(input, s, parser._scanner)
|
||||
except ImportError:
|
||||
print 'Syntax Error',s.msg,'on line',1+count(input[:s.pos], '\n')
|
||||
except NoMoreTokens:
|
||||
print 'Could not complete parsing; stopped around here:'
|
||||
print parser._scanner
|
||||
return return_value
|
Loading…
Reference in New Issue