904 lines
27 KiB
Python
904 lines
27 KiB
Python
|
from __future__ import division
|
||
|
from itertools import *
|
||
|
import math
|
||
|
import operator
|
||
|
import re
|
||
|
import xml.dom
|
||
|
import weakref
|
||
|
|
||
|
from xpath.exceptions import *
|
||
|
import xpath
|
||
|
|
||
|
|
||
|
#
|
||
|
# Data model functions.
|
||
|
#
|
||
|
|
||
|
def string_value(node):
|
||
|
"""Compute the string-value of a node."""
|
||
|
if (node.nodeType == node.DOCUMENT_NODE or
|
||
|
node.nodeType == node.ELEMENT_NODE):
|
||
|
s = u''
|
||
|
for n in axes['descendant'](node):
|
||
|
if n.nodeType == n.TEXT_NODE:
|
||
|
s += n.data
|
||
|
elif n.nodeType == n.CDATA_SECTION_NODE:
|
||
|
s += n.nodeValue
|
||
|
return s
|
||
|
|
||
|
elif node.nodeType == node.ATTRIBUTE_NODE:
|
||
|
return node.value
|
||
|
|
||
|
elif (node.nodeType == node.PROCESSING_INSTRUCTION_NODE or
|
||
|
node.nodeType == node.COMMENT_NODE or
|
||
|
node.nodeType == node.TEXT_NODE):
|
||
|
return node.data
|
||
|
|
||
|
elif node.nodeType == node.CDATA_SECTION_NODE:
|
||
|
return node.nodeValue
|
||
|
|
||
|
def document_order(node):
|
||
|
"""Compute a document order value for the node.
|
||
|
|
||
|
cmp(document_order(a), document_order(b)) will return -1, 0, or 1 if
|
||
|
a is before, identical to, or after b in the document respectively.
|
||
|
|
||
|
We represent document order as a list of sibling indexes. That is,
|
||
|
the third child of the document node has an order of [2]. The first
|
||
|
child of that node has an order of [2,0].
|
||
|
|
||
|
Attributes have a sibling index of -1 (coming before all children of
|
||
|
their node) and are further ordered by name--e.g., [2,0,-1,'href'].
|
||
|
|
||
|
"""
|
||
|
|
||
|
# Attributes: parent-order + [-1, attribute-name]
|
||
|
if node.nodeType == node.ATTRIBUTE_NODE:
|
||
|
order = document_order(node.ownerElement)
|
||
|
order.extend((-1, node.name))
|
||
|
return order
|
||
|
|
||
|
# The document root (hopefully): []
|
||
|
if node.parentNode is None:
|
||
|
return []
|
||
|
|
||
|
# Determine which child this is of its parent.
|
||
|
sibpos = 0
|
||
|
sib = node
|
||
|
while sib.previousSibling is not None:
|
||
|
sibpos += 1
|
||
|
sib = sib.previousSibling
|
||
|
|
||
|
# Order: parent-order + [sibling-position]
|
||
|
order = document_order(node.parentNode)
|
||
|
order.append(sibpos)
|
||
|
return order
|
||
|
|
||
|
#
|
||
|
# Type functions, operating on the various XPath types.
|
||
|
#
|
||
|
# Internally, we use the following representations:
|
||
|
# nodeset - list of DOM tree nodes in document order
|
||
|
# string - str or unicode
|
||
|
# boolean - bool
|
||
|
# number - int or float
|
||
|
#
|
||
|
|
||
|
def nodeset(v):
|
||
|
"""Convert a value to a nodeset."""
|
||
|
if not nodesetp(v):
|
||
|
raise XPathTypeError, "value is not a node-set"
|
||
|
return v
|
||
|
|
||
|
def nodesetp(v):
|
||
|
"""Return true iff 'v' is a node-set."""
|
||
|
if isinstance(v, list):
|
||
|
return True
|
||
|
|
||
|
def string(v):
|
||
|
"""Convert a value to a string."""
|
||
|
if nodesetp(v):
|
||
|
if not v:
|
||
|
return u''
|
||
|
return string_value(v[0])
|
||
|
elif numberp(v):
|
||
|
if v == float('inf'):
|
||
|
return u'Infinity'
|
||
|
elif v == float('-inf'):
|
||
|
return u'-Infinity'
|
||
|
elif str(v) == 'nan':
|
||
|
return u'NaN'
|
||
|
elif int(v) == v and v <= 0xffffffff:
|
||
|
v = int(v)
|
||
|
return unicode(v)
|
||
|
elif booleanp(v):
|
||
|
return u'true' if v else u'false'
|
||
|
return v
|
||
|
|
||
|
def stringp(v):
|
||
|
"""Return true iff 'v' is a string."""
|
||
|
return isinstance(v, basestring)
|
||
|
|
||
|
def boolean(v):
|
||
|
"""Convert a value to a boolean."""
|
||
|
if nodesetp(v):
|
||
|
return len(v) > 0
|
||
|
elif numberp(v):
|
||
|
if v == 0 or v != v:
|
||
|
return False
|
||
|
return True
|
||
|
elif stringp(v):
|
||
|
return v != ''
|
||
|
return v
|
||
|
|
||
|
def booleanp(v):
|
||
|
"""Return true iff 'v' is a boolean."""
|
||
|
return isinstance(v, bool)
|
||
|
|
||
|
def number(v):
|
||
|
"""Convert a value to a number."""
|
||
|
if nodesetp(v):
|
||
|
v = string(v)
|
||
|
try:
|
||
|
return float(v)
|
||
|
except ValueError:
|
||
|
return float('NaN')
|
||
|
|
||
|
def numberp(v):
|
||
|
"""Return true iff 'v' is a number."""
|
||
|
return (not(isinstance(v, bool)) and
|
||
|
(isinstance(v, int) or isinstance(v, float)))
|
||
|
|
||
|
class Expr(object):
|
||
|
"""Abstract base class for XPath expressions."""
|
||
|
|
||
|
def evaluate(self, node, pos, size, context):
|
||
|
"""Evaluate the expression.
|
||
|
|
||
|
The context node, context position, and context size are passed as
|
||
|
arguments.
|
||
|
|
||
|
Returns an XPath value: a nodeset, string, boolean, or number.
|
||
|
|
||
|
"""
|
||
|
|
||
|
class BinaryOperatorExpr(Expr):
|
||
|
"""Base class for all binary operators."""
|
||
|
|
||
|
def __init__(self, op, left, right):
|
||
|
self.op = op
|
||
|
self.left = left
|
||
|
self.right = right
|
||
|
|
||
|
def evaluate(self, node, pos, size, context):
|
||
|
# Subclasses either override evaluate() or implement operate().
|
||
|
return self.operate(self.left.evaluate(node, pos, size, context),
|
||
|
self.right.evaluate(node, pos, size, context))
|
||
|
|
||
|
def __str__(self):
|
||
|
return '(%s %s %s)' % (self.left, self.op, self.right)
|
||
|
|
||
|
class AndExpr(BinaryOperatorExpr):
|
||
|
"""<x> and <y>"""
|
||
|
|
||
|
def evaluate(self, node, pos, size, context):
|
||
|
# Note that XPath boolean operations short-circuit.
|
||
|
return (boolean(self.left.evaluate(node, pos, size, context) and
|
||
|
boolean(self.right.evaluate(node, pos, size, context))))
|
||
|
|
||
|
class OrExpr(BinaryOperatorExpr):
|
||
|
"""<x> or <y>"""
|
||
|
|
||
|
def evaluate(self, node, pos, size, context):
|
||
|
# Note that XPath boolean operations short-circuit.
|
||
|
return (boolean(self.left.evaluate(node, pos, size, context) or
|
||
|
boolean(self.right.evaluate(node, pos, size, context))))
|
||
|
|
||
|
class EqualityExpr(BinaryOperatorExpr):
|
||
|
"""<x> = <y>, <x> != <y>, etc."""
|
||
|
|
||
|
operators = {
|
||
|
'=' : operator.eq,
|
||
|
'!=' : operator.ne,
|
||
|
'<=' : operator.le,
|
||
|
'<' : operator.lt,
|
||
|
'>=' : operator.ge,
|
||
|
'>' : operator.gt,
|
||
|
}
|
||
|
|
||
|
def operate(self, a, b):
|
||
|
if nodesetp(a):
|
||
|
for node in a:
|
||
|
if self.operate(string_value(node), b):
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
if nodesetp(b):
|
||
|
for node in b:
|
||
|
if self.operate(a, string_value(node)):
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
if self.op in ('=', '!='):
|
||
|
if booleanp(a) or booleanp(b):
|
||
|
convert = boolean
|
||
|
elif numberp(a) or numberp(b):
|
||
|
convert = number
|
||
|
else:
|
||
|
convert = string
|
||
|
else:
|
||
|
convert = number
|
||
|
|
||
|
a, b = convert(a), convert(b)
|
||
|
return self.operators[self.op](a, b)
|
||
|
|
||
|
def divop(x, y):
|
||
|
try:
|
||
|
return x / y
|
||
|
except ZeroDivisionError:
|
||
|
if x == 0 and y == 0:
|
||
|
return float('nan')
|
||
|
if x < 0:
|
||
|
return float('-inf')
|
||
|
return float('inf')
|
||
|
|
||
|
class ArithmeticalExpr(BinaryOperatorExpr):
|
||
|
"""<x> + <y>, <x> - <y>, etc."""
|
||
|
|
||
|
# Note that we must use math.fmod for the correct modulo semantics.
|
||
|
operators = {
|
||
|
'+' : operator.add,
|
||
|
'-' : operator.sub,
|
||
|
'*' : operator.mul,
|
||
|
'div' : divop,
|
||
|
'mod' : math.fmod
|
||
|
}
|
||
|
|
||
|
def operate(self, a, b):
|
||
|
return self.operators[self.op](number(a), number(b))
|
||
|
|
||
|
class UnionExpr(BinaryOperatorExpr):
|
||
|
"""<x> | <y>"""
|
||
|
|
||
|
def operate(self, a, b):
|
||
|
if not nodesetp(a) or not nodesetp(b):
|
||
|
raise XPathTypeError("union operand is not a node-set")
|
||
|
|
||
|
# Need to sort the result to preserve document order.
|
||
|
return sorted(set(chain(a, b)), key=document_order)
|
||
|
|
||
|
class NegationExpr(Expr):
|
||
|
"""- <x>"""
|
||
|
|
||
|
def __init__(self, expr):
|
||
|
self.expr = expr
|
||
|
|
||
|
def evaluate(self, node, pos, size, context):
|
||
|
return -number(self.expr.evaluate(node, pos, size, context))
|
||
|
|
||
|
def __str__(self):
|
||
|
return '(-%s)' % self.expr
|
||
|
|
||
|
class LiteralExpr(Expr):
|
||
|
"""Literals--either numbers or strings."""
|
||
|
|
||
|
def __init__(self, literal):
|
||
|
self.literal = literal
|
||
|
|
||
|
def evaluate(self, node, pos, size, context):
|
||
|
return self.literal
|
||
|
|
||
|
def __str__(self):
|
||
|
if stringp(self.literal):
|
||
|
if "'" in self.literal:
|
||
|
return '"%s"' % self.literal
|
||
|
else:
|
||
|
return "'%s'" % self.literal
|
||
|
return string(self.literal)
|
||
|
|
||
|
class VariableReference(Expr):
|
||
|
"""Variable references."""
|
||
|
|
||
|
def __init__(self, prefix, name):
|
||
|
self.prefix = prefix
|
||
|
self.name = name
|
||
|
|
||
|
def evaluate(self, node, pos, size, context):
|
||
|
try:
|
||
|
if self.prefix is not None:
|
||
|
try:
|
||
|
namespaceURI = context.namespaces[self.prefix]
|
||
|
except KeyError:
|
||
|
raise XPathUnknownPrefixError(self.prefix)
|
||
|
return context.variables[(namespaceURI, self.name)]
|
||
|
else:
|
||
|
return context.variables[self.name]
|
||
|
except KeyError:
|
||
|
raise XPathUnknownVariableError(str(self))
|
||
|
|
||
|
def __str__(self):
|
||
|
if self.prefix is None:
|
||
|
return '$%s' % self.name
|
||
|
else:
|
||
|
return '$%s:%s' % (self.prefix, self.name)
|
||
|
|
||
|
class Function(Expr):
|
||
|
"""Functions."""
|
||
|
|
||
|
def __init__(self, name, args):
|
||
|
self.name = name
|
||
|
self.args = args
|
||
|
self.evaluate = getattr(self, 'f_%s' % name.replace('-', '_'), None)
|
||
|
if self.evaluate is None:
|
||
|
raise XPathUnknownFunctionError, 'unknown function "%s()"' % name
|
||
|
|
||
|
if len(self.args) < self.evaluate.minargs:
|
||
|
raise XPathTypeError, 'too few arguments for "%s()"' % name
|
||
|
if (self.evaluate.maxargs is not None and
|
||
|
len(self.args) > self.evaluate.maxargs):
|
||
|
raise XPathTypeError, 'too many arguments for "%s()"' % name
|
||
|
|
||
|
#
|
||
|
# XPath functions are implemented by methods of the Function class.
|
||
|
#
|
||
|
# A method implementing an XPath function is decorated with the function
|
||
|
# decorator, and receives the evaluated function arguments as positional
|
||
|
# parameters.
|
||
|
#
|
||
|
|
||
|
def function(minargs, maxargs, implicit=False, first=False, convert=None):
|
||
|
"""Function decorator.
|
||
|
|
||
|
minargs -- Minimum number of arguments taken by the function.
|
||
|
maxargs -- Maximum number of arguments taken by the function.
|
||
|
implicit -- True for functions which operate on a nodeset consisting
|
||
|
of the current context node when passed no argument.
|
||
|
(e.g., string() and number().)
|
||
|
convert -- When non-None, a function used to filter function arguments.
|
||
|
"""
|
||
|
def decorator(f):
|
||
|
def new_f(self, node, pos, size, context):
|
||
|
if implicit and len(self.args) == 0:
|
||
|
args = [[node]]
|
||
|
else:
|
||
|
args = [x.evaluate(node, pos, size, context)
|
||
|
for x in self.args]
|
||
|
if first:
|
||
|
args[0] = nodeset(args[0])
|
||
|
if len(args[0]) > 0:
|
||
|
args[0] = args[0][0]
|
||
|
else:
|
||
|
args[0] = None
|
||
|
if convert is not None:
|
||
|
args = [convert(x) for x in args]
|
||
|
return f(self, node, pos, size, context, *args)
|
||
|
|
||
|
new_f.minargs = minargs
|
||
|
new_f.maxargs = maxargs
|
||
|
new_f.__name__ = f.__name__
|
||
|
new_f.__doc__ = f.__doc__
|
||
|
return new_f
|
||
|
return decorator
|
||
|
|
||
|
# Node Set Functions
|
||
|
|
||
|
@function(0, 0)
|
||
|
def f_last(self, node, pos, size, context):
|
||
|
return size
|
||
|
|
||
|
@function(0, 0)
|
||
|
def f_position(self, node, pos, size, context):
|
||
|
return pos
|
||
|
|
||
|
@function(1, 1, convert=nodeset)
|
||
|
def f_count(self, node, pos, size, context, nodes):
|
||
|
return len(nodes)
|
||
|
|
||
|
@function(1, 1)
|
||
|
def f_id(self, node, pos, size, context, arg):
|
||
|
if nodesetp(arg):
|
||
|
ids = (string_value(x) for x in arg)
|
||
|
else:
|
||
|
ids = [string(arg)]
|
||
|
if node.nodeType != node.DOCUMENT_NODE:
|
||
|
node = node.ownerDocument
|
||
|
return list(filter(None, (node.getElementById(id) for id in ids)))
|
||
|
|
||
|
@function(0, 1, implicit=True, first=True)
|
||
|
def f_local_name(self, node, pos, size, context, argnode):
|
||
|
if argnode is None:
|
||
|
return ''
|
||
|
if (argnode.nodeType == argnode.ELEMENT_NODE or
|
||
|
argnode.nodeType == argnode.ATTRIBUTE_NODE):
|
||
|
return argnode.localName
|
||
|
elif argnode.nodeType == argnode.PROCESSING_INSTRUCTION_NODE:
|
||
|
return argnode.target
|
||
|
return ''
|
||
|
|
||
|
@function(0, 1, implicit=True, first=True)
|
||
|
def f_namespace_uri(self, node, pos, size, context, argnode):
|
||
|
if argnode is None:
|
||
|
return ''
|
||
|
return argnode.namespaceURI
|
||
|
|
||
|
@function(0, 1, implicit=True, first=True)
|
||
|
def f_name(self, node, pos, size, context, argnode):
|
||
|
if argnode is None:
|
||
|
return ''
|
||
|
if argnode.nodeType == argnode.ELEMENT_NODE:
|
||
|
return argnode.tagName
|
||
|
elif argnode.nodeType == argnode.ATTRIBUTE_NODE:
|
||
|
return argnode.name
|
||
|
elif argnode.nodeType == argnode.PROCESSING_INSTRUCTION_NODE:
|
||
|
return argnode.target
|
||
|
return ''
|
||
|
|
||
|
# String Functions
|
||
|
|
||
|
@function(0, 1, implicit=True, convert=string)
|
||
|
def f_string(self, node, pos, size, context, arg):
|
||
|
return arg
|
||
|
|
||
|
@function(2, None, convert=string)
|
||
|
def f_concat(self, node, pos, size, context, *args):
|
||
|
return ''.join((x for x in args))
|
||
|
|
||
|
@function(2, 2, convert=string)
|
||
|
def f_starts_with(self, node, pos, size, context, a, b):
|
||
|
return a.startswith(b)
|
||
|
|
||
|
@function(2, 2, convert=string)
|
||
|
def f_contains(self, node, pos, size, context, a, b):
|
||
|
return b in a
|
||
|
|
||
|
@function(2, 2, convert=string)
|
||
|
def f_substring_before(self, node, pos, size, context, a, b):
|
||
|
try:
|
||
|
return a[0:a.index(b)]
|
||
|
except ValueError:
|
||
|
return ''
|
||
|
|
||
|
@function(2, 2, convert=string)
|
||
|
def f_substring_after(self, node, pos, size, context, a, b):
|
||
|
try:
|
||
|
return a[a.index(b)+len(b):]
|
||
|
except ValueError:
|
||
|
return ''
|
||
|
|
||
|
@function(2, 3)
|
||
|
def f_substring(self, node, pos, size, context, s, start, count=None):
|
||
|
s = string(s)
|
||
|
start = round(number(start))
|
||
|
if start != start:
|
||
|
# Catch NaN
|
||
|
return ''
|
||
|
|
||
|
if count is None:
|
||
|
end = len(s) + 1
|
||
|
else:
|
||
|
end = start + round(number(count))
|
||
|
if end != end:
|
||
|
# Catch NaN
|
||
|
return ''
|
||
|
if end > len(s):
|
||
|
end = len(s)+1
|
||
|
|
||
|
if start < 1:
|
||
|
start = 1
|
||
|
if start > len(s):
|
||
|
return ''
|
||
|
if end <= start:
|
||
|
return ''
|
||
|
return s[int(start)-1:int(end)-1]
|
||
|
|
||
|
@function(0, 1, implicit=True, convert=string)
|
||
|
def f_string_length(self, node, pos, size, context, s):
|
||
|
return len(s)
|
||
|
|
||
|
@function(0, 1, implicit=True, convert=string)
|
||
|
def f_normalize_space(self, node, pos, size, context, s):
|
||
|
return re.sub(r'\s+', ' ', s.strip())
|
||
|
|
||
|
@function(3, 3, convert=lambda x: unicode(string(x)))
|
||
|
def f_translate(self, node, pos, size, context, s, source, target):
|
||
|
# str.translate() and unicode.translate() are completely different.
|
||
|
# The translate() arguments are coerced to unicode.
|
||
|
table = {}
|
||
|
for schar, tchar in izip(source, target):
|
||
|
schar = ord(schar)
|
||
|
if schar not in table:
|
||
|
table[schar] = tchar
|
||
|
if len(source) > len(target):
|
||
|
for schar in source[len(target):]:
|
||
|
schar = ord(schar)
|
||
|
if schar not in table:
|
||
|
table[schar] = None
|
||
|
return s.translate(table)
|
||
|
|
||
|
# Boolean functions
|
||
|
|
||
|
@function(1, 1, convert=boolean)
|
||
|
def f_boolean(self, node, pos, size, context, b):
|
||
|
return b
|
||
|
|
||
|
@function(1, 1, convert=boolean)
|
||
|
def f_not(self, node, pos, size, context, b):
|
||
|
return not b
|
||
|
|
||
|
@function(0, 0)
|
||
|
def f_true(self, node, pos, size, context):
|
||
|
return True
|
||
|
|
||
|
@function(0, 0)
|
||
|
def f_false(self, node, pos, size, context):
|
||
|
return False
|
||
|
|
||
|
@function(1, 1, convert=string)
|
||
|
def f_lang(self, node, pos, size, context, s):
|
||
|
s = s.lower()
|
||
|
for n in axes['ancestor-or-self'](node):
|
||
|
if n.nodeType == n.ELEMENT_NODE and n.hasAttribute('xml:lang'):
|
||
|
lang = n.getAttribute('xml:lang').lower()
|
||
|
if s == lang or lang.startswith(s + u'-'):
|
||
|
return True
|
||
|
break
|
||
|
return False
|
||
|
|
||
|
# Number functions
|
||
|
|
||
|
@function(0, 1, implicit=True, convert=number)
|
||
|
def f_number(self, node, pos, size, context, n):
|
||
|
return n
|
||
|
|
||
|
@function(1, 1, convert=nodeset)
|
||
|
def f_sum(self, node, pos, size, context, nodes):
|
||
|
return sum((number(string_value(x)) for x in nodes))
|
||
|
|
||
|
@function(1, 1, convert=number)
|
||
|
def f_floor(self, node, pos, size, context, n):
|
||
|
return math.floor(n)
|
||
|
|
||
|
@function(1, 1, convert=number)
|
||
|
def f_ceiling(self, node, pos, size, context, n):
|
||
|
return math.ceil(n)
|
||
|
|
||
|
@function(1, 1, convert=number)
|
||
|
def f_round(self, node, pos, size, context, n):
|
||
|
# XXX round(-0.0) should be -0.0, not 0.0.
|
||
|
# XXX round(-1.5) should be -1.0, not -2.0.
|
||
|
return round(n)
|
||
|
|
||
|
def __str__(self):
|
||
|
return '%s(%s)' % (self.name, ', '.join((str(x) for x in self.args)))
|
||
|
|
||
|
#
|
||
|
# XPath axes.
|
||
|
#
|
||
|
|
||
|
# Dictionary of all axis functions.
|
||
|
axes = {}
|
||
|
|
||
|
def axisfn(reverse=False, principal_node_type=xml.dom.Node.ELEMENT_NODE):
|
||
|
"""Axis function decorator.
|
||
|
|
||
|
An axis function will take a node as an argument and return a sequence
|
||
|
over the nodes along an XPath axis. Axis functions have two extra
|
||
|
attributes indicating the axis direction and principal node type.
|
||
|
"""
|
||
|
def decorate(f):
|
||
|
f.__name__ = f.__name__.replace('_', '-')
|
||
|
f.reverse = reverse
|
||
|
f.principal_node_type = principal_node_type
|
||
|
return f
|
||
|
return decorate
|
||
|
|
||
|
def make_axes():
|
||
|
"""Define functions to walk each of the possible XPath axes."""
|
||
|
|
||
|
@axisfn()
|
||
|
def child(node):
|
||
|
return node.childNodes
|
||
|
|
||
|
@axisfn()
|
||
|
def descendant(node):
|
||
|
for child in node.childNodes:
|
||
|
for node in descendant_or_self(child):
|
||
|
yield node
|
||
|
|
||
|
@axisfn()
|
||
|
def parent(node):
|
||
|
if node.parentNode is not None:
|
||
|
yield node.parentNode
|
||
|
|
||
|
@axisfn(reverse=True)
|
||
|
def ancestor(node):
|
||
|
while node.parentNode is not None:
|
||
|
node = node.parentNode
|
||
|
yield node
|
||
|
|
||
|
@axisfn()
|
||
|
def following_sibling(node):
|
||
|
while node.nextSibling is not None:
|
||
|
node = node.nextSibling
|
||
|
yield node
|
||
|
|
||
|
@axisfn(reverse=True)
|
||
|
def preceding_sibling(node):
|
||
|
while node.previousSibling is not None:
|
||
|
node = node.previousSibling
|
||
|
yield node
|
||
|
|
||
|
@axisfn()
|
||
|
def following(node):
|
||
|
while node is not None:
|
||
|
while node.nextSibling is not None:
|
||
|
node = node.nextSibling
|
||
|
for n in descendant_or_self(node):
|
||
|
yield n
|
||
|
node = node.parentNode
|
||
|
|
||
|
@axisfn(reverse=True)
|
||
|
def preceding(node):
|
||
|
while node is not None:
|
||
|
while node.previousSibling is not None:
|
||
|
node = node.previousSibling
|
||
|
# Could be more efficient here.
|
||
|
for n in reversed(list(descendant_or_self(node))):
|
||
|
yield n
|
||
|
node = node.parentNode
|
||
|
|
||
|
@axisfn(principal_node_type=xml.dom.Node.ATTRIBUTE_NODE)
|
||
|
def attribute(node):
|
||
|
if node.attributes is not None:
|
||
|
return (node.attributes.item(i)
|
||
|
for i in xrange(node.attributes.length))
|
||
|
return ()
|
||
|
|
||
|
@axisfn()
|
||
|
def namespace(node):
|
||
|
raise XPathNotImplementedError("namespace axis is not implemented")
|
||
|
|
||
|
@axisfn()
|
||
|
def self(node):
|
||
|
yield node
|
||
|
|
||
|
@axisfn()
|
||
|
def descendant_or_self(node):
|
||
|
yield node
|
||
|
for child in node.childNodes:
|
||
|
for node in descendant_or_self(child):
|
||
|
yield node
|
||
|
|
||
|
@axisfn(reverse=True)
|
||
|
def ancestor_or_self(node):
|
||
|
return chain([node], ancestor(node))
|
||
|
|
||
|
# Place each axis function defined here into the 'axes' dict.
|
||
|
for axis in locals().values():
|
||
|
axes[axis.__name__] = axis
|
||
|
|
||
|
make_axes()
|
||
|
|
||
|
def merge_into_nodeset(target, source):
|
||
|
"""Place all the nodes from the source node-set into the target
|
||
|
node-set, preserving document order. Both node-sets must be in
|
||
|
document order to begin with.
|
||
|
|
||
|
"""
|
||
|
if len(target) == 0:
|
||
|
target.extend(source)
|
||
|
return
|
||
|
|
||
|
source = [n for n in source if n not in target]
|
||
|
if len(source) == 0:
|
||
|
return
|
||
|
|
||
|
# If the last node in the target set comes before the first node in the
|
||
|
# source set, then we can just concatenate the sets. Otherwise, we
|
||
|
# will need to sort. (We could also check to see if the last node in
|
||
|
# the source set comes before the first node in the target set, but this
|
||
|
# situation is very unlikely in practice.)
|
||
|
if document_order(target[-1]) < document_order(source[0]):
|
||
|
target.extend(source)
|
||
|
else:
|
||
|
target.extend(source)
|
||
|
target.sort(key=document_order)
|
||
|
|
||
|
class AbsolutePathExpr(Expr):
|
||
|
"""Absolute location paths."""
|
||
|
|
||
|
def __init__(self, path):
|
||
|
self.path = path
|
||
|
|
||
|
def evaluate(self, node, pos, size, context):
|
||
|
if node.nodeType != node.DOCUMENT_NODE:
|
||
|
node = node.ownerDocument
|
||
|
if self.path is None:
|
||
|
return [node]
|
||
|
return self.path.evaluate(node, 1, 1, context)
|
||
|
|
||
|
def __str__(self):
|
||
|
return '/%s' % (self.path or '')
|
||
|
|
||
|
class PathExpr(Expr):
|
||
|
"""Location path expressions."""
|
||
|
|
||
|
def __init__(self, steps):
|
||
|
self.steps = steps
|
||
|
|
||
|
def evaluate(self, node, pos, size, context):
|
||
|
# The first step in the path is evaluated in the current context.
|
||
|
# If this is the only step in the path, the return value is
|
||
|
# unimportant. If there are other steps, however, it must be a
|
||
|
# node-set.
|
||
|
result = self.steps[0].evaluate(node, pos, size, context)
|
||
|
if len(self.steps) > 1 and not nodesetp(result):
|
||
|
raise XPathTypeError("path step is not a node-set")
|
||
|
|
||
|
# Subsequent steps are evaluated for each node in the node-set
|
||
|
# resulting from the previous step.
|
||
|
for step in self.steps[1:]:
|
||
|
aggregate = []
|
||
|
for i in xrange(len(result)):
|
||
|
nodes = step.evaluate(result[i], i+1, len(result), context)
|
||
|
if not nodesetp(nodes):
|
||
|
raise XPathTypeError("path step is not a node-set")
|
||
|
merge_into_nodeset(aggregate, nodes)
|
||
|
result = aggregate
|
||
|
|
||
|
return result
|
||
|
|
||
|
def __str__(self):
|
||
|
return '/'.join((str(s) for s in self.steps))
|
||
|
|
||
|
class PredicateList(Expr):
|
||
|
"""A list of predicates.
|
||
|
|
||
|
Predicates are handled as an expression wrapping the expression
|
||
|
filtered by the predicates.
|
||
|
|
||
|
"""
|
||
|
def __init__(self, expr, predicates, axis='child'):
|
||
|
self.predicates = predicates
|
||
|
self.expr = expr
|
||
|
self.axis = axes[axis]
|
||
|
|
||
|
def evaluate(self, node, pos, size, context):
|
||
|
result = self.expr.evaluate(node, pos, size, context)
|
||
|
if not nodesetp(result):
|
||
|
raise XPathTypeError("predicate input is not a node-set")
|
||
|
|
||
|
if self.axis.reverse:
|
||
|
result.reverse()
|
||
|
|
||
|
for pred in self.predicates:
|
||
|
match = []
|
||
|
for i, node in izip(count(1), result):
|
||
|
r = pred.evaluate(node, i, len(result), context)
|
||
|
|
||
|
# If a predicate evaluates to a number, select the node
|
||
|
# with that position. Otherwise, select nodes for which
|
||
|
# the boolean value of the predicate is true.
|
||
|
if numberp(r):
|
||
|
if r == i:
|
||
|
match.append(node)
|
||
|
elif boolean(r):
|
||
|
match.append(node)
|
||
|
result = match
|
||
|
|
||
|
if self.axis.reverse:
|
||
|
result.reverse()
|
||
|
|
||
|
return result
|
||
|
|
||
|
def __str__(self):
|
||
|
s = str(self.expr)
|
||
|
if '/' in s:
|
||
|
s = '(%s)' % s
|
||
|
return s + ''.join(('[%s]' % x for x in self.predicates))
|
||
|
|
||
|
class AxisStep(Expr):
|
||
|
"""One step in a location path expression."""
|
||
|
|
||
|
def __init__(self, axis, test=None, predicates=None):
|
||
|
if test is None:
|
||
|
test = AnyKindTest()
|
||
|
self.axis = axes[axis]
|
||
|
self.test = test
|
||
|
|
||
|
def evaluate(self, node, pos, size, context):
|
||
|
match = []
|
||
|
for n in self.axis(node):
|
||
|
if self.test.match(n, self.axis, context):
|
||
|
match.append(n)
|
||
|
|
||
|
if self.axis.reverse:
|
||
|
match.reverse()
|
||
|
|
||
|
return match
|
||
|
|
||
|
def __str__(self):
|
||
|
return '%s::%s' % (self.axis.__name__, self.test)
|
||
|
|
||
|
#
|
||
|
# Node tests.
|
||
|
#
|
||
|
|
||
|
class Test(object):
|
||
|
"""Abstract base class for node tests."""
|
||
|
|
||
|
def match(self, node, axis, context):
|
||
|
"""Return True if 'node' matches the test along 'axis'."""
|
||
|
|
||
|
class NameTest(object):
|
||
|
def __init__(self, prefix, localpart):
|
||
|
self.prefix = prefix
|
||
|
self.localName = localpart
|
||
|
if self.prefix == None and self.localName == '*':
|
||
|
self.prefix = '*'
|
||
|
|
||
|
def match(self, node, axis, context):
|
||
|
if node.nodeType != axis.principal_node_type:
|
||
|
return False
|
||
|
|
||
|
if self.prefix != '*':
|
||
|
namespaceURI = None
|
||
|
if self.prefix is not None:
|
||
|
try:
|
||
|
namespaceURI = context.namespaces[self.prefix]
|
||
|
except KeyError:
|
||
|
raise XPathUnknownPrefixError(self.prefix)
|
||
|
elif axis.principal_node_type == node.ELEMENT_NODE:
|
||
|
namespaceURI = context.default_namespace
|
||
|
if namespaceURI != node.namespaceURI:
|
||
|
return False
|
||
|
if self.localName != '*':
|
||
|
if self.localName != node.localName:
|
||
|
return False
|
||
|
return True
|
||
|
|
||
|
def __str__(self):
|
||
|
if self.prefix is not None:
|
||
|
return '%s:%s' % (self.prefix, self.localName)
|
||
|
else:
|
||
|
return self.localName
|
||
|
|
||
|
class PITest(object):
|
||
|
def __init__(self, name=None):
|
||
|
self.name = name
|
||
|
|
||
|
def match(self, node, axis, context):
|
||
|
return (node.nodeType == node.PROCESSING_INSTRUCTION_NODE and
|
||
|
(self.name is None or node.target == self.name))
|
||
|
|
||
|
def __str__(self):
|
||
|
if self.name is None:
|
||
|
name = ''
|
||
|
elif "'" in self.name:
|
||
|
name = '"%s"' % self.name
|
||
|
else:
|
||
|
name = "'%s'" % self.name
|
||
|
return 'processing-instruction(%s)' % name
|
||
|
|
||
|
class CommentTest(object):
|
||
|
def match(self, node, axis, context):
|
||
|
return node.nodeType == node.COMMENT_NODE
|
||
|
|
||
|
def __str__(self):
|
||
|
return 'comment()'
|
||
|
|
||
|
class TextTest(object):
|
||
|
def match(self, node, axis, context):
|
||
|
return (node.nodeType == node.TEXT_NODE or
|
||
|
node.nodeType == node.CDATA_SECTION_NODE)
|
||
|
|
||
|
def __str__(self):
|
||
|
return 'text()'
|
||
|
|
||
|
class AnyKindTest(object):
|
||
|
def match(self, node, axis, context):
|
||
|
return True
|
||
|
|
||
|
def __str__(self):
|
||
|
return 'node()'
|