nutools/lib/ulib/support/xpathtool.py

159 lines
6.5 KiB
Python
Executable File

#!/usr/bin/env python
# -*- coding: utf-8 mode: python -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8
u"""Ce script permet d'obtenir ou de modifier un élément identifié par une expression XPATH
"""
import os, sys, re, types, tempfile, codecs, shutil
from os import path
sys.path.insert(0, path.join(path.dirname(__file__), 'python'))
from xml.dom import Node, minidom
import xpath
def get_text(node, as_xml=False):
if isinstance(node, Node):
if node.nodeType == Node.ELEMENT_NODE:
if as_xml: return node.toxml("utf-8")
else: return xpath.expr.string_value(node).encode("utf-8")
elif node.nodeType == Node.ATTRIBUTE_NODE:
return node.value.encode("utf-8")
elif node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
return node.data.encode("utf-8")
elif type(node) is types.UnicodeType:
return node.encode("utf-8")
else:
return str(node)
def set_text(node, value, doc):
if not isinstance(node, Node):
raise ValueError("L'expression ne désigne pas un noeud XML")
if node.nodeType == Node.ELEMENT_NODE:
firstChild = node.firstChild
if value is not None:
textNode = doc.createTextNode(value)
if firstChild is None:
node.appendChild(textNode)
elif firstChild.nodeType == Node.TEXT_NODE or firstChild.nodeType == Node.CDATA_SECTION_NODE:
node.replaceChild(textNode, firstChild)
else:
node.insertBefore(textNode, firstChild)
elif firstChild is not None:
if firstChild.nodeType == Node.TEXT_NODE or firstChild.nodeType == Node.CDATA_SECTION_NODE:
node.removeChild(firstChild)
elif node.nodeType == Node.ATTRIBUTE_NODE:
if value is not None: node.value = value
else: pass # impossible d'accéder au parent d'un attribut
elif node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
if value is not None: node.data = value
else: node.parentNode.removeChild(node)
else:
raise ValueError("Type de noeud non supporté: %s" % node.nodeType)
RE_PARENT0 = re.compile(r'(^|/)parent\[')
RE_PARENT1 = re.compile(r'(^|/)parent(?=/|$)')
def py_dom_xpath_compat(expr):
expr = RE_PARENT0.sub(r"\1*[local-name()='parent' and ", expr)
expr = RE_PARENT1.sub(r"\1*[local-name()='parent']", expr)
return expr
def run_xpathtool():
from optparse import OptionParser
OP = OptionParser(usage=u"\n\t%prog -g XPATH [INPUT [OUTPUT]]\n\t%prog -s XPATH VALUE [INPUT [OUTPUT]]", description=__doc__)
OP.add_option('-f', '--input', dest='inf',
help=u"Spécifier le fichier en entrée")
OP.add_option('-o', '--output', dest='outf',
help=u"Spécifier le fichier en sortie")
OP.add_option('-g', '--get', dest='mode', action='store_const', const='get',
help=u"Forcer l'affichage de la valeur. "
+ u"Par défaut, ce mode est sélectionné s'il n'y a aucun argument après XPATH")
OP.add_option('-t', '--exist', dest='mode', action='store_const', const='exist',
help=u"Tester l'existence du chemin spécifié.")
OP.add_option('-s', '--set', dest='mode', action='store_const', const='set',
help=u"Forcer la modification de la valeur. "
+ u"Par défaut, ce mode est sélectionné s'il y a un argument VALUE après XPATH")
OP.add_option('-x', '--as-xml', dest='as_xml', action='store_true',
help=u"Retourner le résultat de l'expression en XML")
OP.add_option('--no-compat', dest='compat', action='store_false', default=True,
help=u"Ne pas transfomer certaines expression en un équivalent compatible avec py-dom-xpath. "
+ u"Par exemple, par défaut \"/parent\" est transformé en \"/*[local-name='parent']\". "
+ u"Cette option désactive ce comportement.")
o, args = OP.parse_args()
inf = o.inf
outf = o.outf
mode = o.mode
as_xml = o.as_xml
compat = o.compat
count = len(args)
if count == 0: raise ValueError("Vous devez spécifier l'expression XPATH")
expr = args[0]
if compat: expr = py_dom_xpath_compat(expr)
value = None
args = args[1:]
count = len(args)
if mode is None:
if count == 0: # xpathtool.py XPATH
mode = 'get'
elif inf is None:
if count == 1: # xpathtool.py XPATH INPUT
mode = 'get'
inf = args[0]
elif count >= 2: # xpathtool.py XPATH VALUE INPUT [OUTPUT]
mode = 'set'
value = args[0]
inf = args[1]
if count > 2: outf = args[2]
elif inf is not None: # xpathtool.py XPATH VALUE -f INPUT
mode = 'set'
value = args[0]
elif mode == 'get':
if inf is None: # xpathtool.py -g XPATH [INPUT]
if count > 0: inf = args[0]
elif mode == 'set':
if count > 0: value = args[0]
if inf is None: # xpathtool.py -s XPATH VALUE [INPUT [OUTPUT]]
if count > 1: inf = args[1]
if count > 2: outf = args[2]
if inf == '-': inf = None
if outf == '-': outf = sys.stdout
if inf is None:
inf = sys.stdin
if outf is None: outf = sys.stdout
elif outf is None:
outf = inf
doc = minidom.parse(inf)
if mode == 'get' or mode == 'exist':
#print "search %r from %r" % (expr, inf) #DEBUG
nodes = xpath.find(expr, doc)
if mode == 'get':
for node in nodes:
print get_text(node, as_xml)
if nodes: r = 0
else: r = 1
sys.exit(r)
elif mode == 'set':
if value is not None and not type(value) is types.UnicodeType:
value = unicode(value, "utf-8")
#print "search %r in %r, replace with %r then write in %r" % (expr, inf, value, outf) #DEBUG
for node in xpath.find(expr, doc):
set_text(node, value, doc)
#print "writing to %r" % outf #DEBUG
if type(outf) in types.StringTypes:
fd, tmpf = tempfile.mkstemp()
try:
os.close(fd)
out = codecs.open(tmpf, "w", "utf-8")
doc.writexml(out, encoding="utf-8")
out.close()
shutil.copyfile(tmpf, outf)
finally:
os.remove(tmpf)
else:
doc.writexml(outf, encoding="utf-8")
if __name__ == '__main__':
run_xpathtool()