#!/usr/bin/env python # -*- coding: utf-8 mode: python -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8 u"""Ce script permet d'obtenir ou de modifier un élément identifié par une expression XPATH """ import os, sys, re, types, tempfile, codecs, shutil from os import path sys.path.insert(0, path.join(path.dirname(__file__), 'python')) from xml.dom import Node, minidom import xpath def get_text(node, as_xml=False): if isinstance(node, Node): if node.nodeType == Node.ELEMENT_NODE: if as_xml: return node.toxml("utf-8") else: return xpath.expr.string_value(node).encode("utf-8") elif node.nodeType == Node.ATTRIBUTE_NODE: return node.value.encode("utf-8") elif node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE: return node.data.encode("utf-8") elif type(node) is types.UnicodeType: return node.encode("utf-8") else: return str(node) def set_text(node, value, doc): if not isinstance(node, Node): raise ValueError("L'expression ne désigne pas un noeud XML") if node.nodeType == Node.ELEMENT_NODE: firstChild = node.firstChild if value is not None: textNode = doc.createTextNode(value) if firstChild is None: node.appendChild(textNode) elif firstChild.nodeType == Node.TEXT_NODE or firstChild.nodeType == Node.CDATA_SECTION_NODE: node.replaceChild(textNode, firstChild) else: node.insertBefore(textNode, firstChild) elif firstChild is not None: if firstChild.nodeType == Node.TEXT_NODE or firstChild.nodeType == Node.CDATA_SECTION_NODE: node.removeChild(firstChild) elif node.nodeType == Node.ATTRIBUTE_NODE: if value is not None: node.value = value else: pass # impossible d'accéder au parent d'un attribut elif node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE: if value is not None: node.data = value else: node.parentNode.removeChild(node) else: raise ValueError("Type de noeud non supporté: %s" % node.nodeType) RE_PARENT0 = re.compile(r'(^|/)parent\[') RE_PARENT1 = re.compile(r'(^|/)parent(?=/|$)') def py_dom_xpath_compat(expr): expr = RE_PARENT0.sub(r"\1*[local-name()='parent' and ", expr) expr = RE_PARENT1.sub(r"\1*[local-name()='parent']", expr) return expr def run_xpathtool(): from optparse import OptionParser OP = OptionParser(usage=u"\n\t%prog -g XPATH [INPUT [OUTPUT]]\n\t%prog -s XPATH VALUE [INPUT [OUTPUT]]", description=__doc__) OP.add_option('-f', '--input', dest='inf', help=u"Spécifier le fichier en entrée") OP.add_option('-o', '--output', dest='outf', help=u"Spécifier le fichier en sortie") OP.add_option('-g', '--get', dest='mode', action='store_const', const='get', help=u"Forcer l'affichage de la valeur. " + u"Par défaut, ce mode est sélectionné s'il n'y a aucun argument après XPATH") OP.add_option('-t', '--exist', dest='mode', action='store_const', const='exist', help=u"Tester l'existence du chemin spécifié.") OP.add_option('-s', '--set', dest='mode', action='store_const', const='set', help=u"Forcer la modification de la valeur. " + u"Par défaut, ce mode est sélectionné s'il y a un argument VALUE après XPATH") OP.add_option('-x', '--as-xml', dest='as_xml', action='store_true', help=u"Retourner le résultat de l'expression en XML") OP.add_option('--no-compat', dest='compat', action='store_false', default=True, help=u"Ne pas transfomer certaines expression en un équivalent compatible avec py-dom-xpath. " + u"Par exemple, par défaut \"/parent\" est transformé en \"/*[local-name='parent']\". " + u"Cette option désactive ce comportement.") o, args = OP.parse_args() inf = o.inf outf = o.outf mode = o.mode as_xml = o.as_xml compat = o.compat count = len(args) if count == 0: raise ValueError("Vous devez spécifier l'expression XPATH") expr = args[0] if compat: expr = py_dom_xpath_compat(expr) value = None args = args[1:] count = len(args) if mode is None: if count == 0: # xpathtool.py XPATH mode = 'get' elif inf is None: if count == 1: # xpathtool.py XPATH INPUT mode = 'get' inf = args[0] elif count >= 2: # xpathtool.py XPATH VALUE INPUT [OUTPUT] mode = 'set' value = args[0] inf = args[1] if count > 2: outf = args[2] elif inf is not None: # xpathtool.py XPATH VALUE -f INPUT mode = 'set' value = args[0] elif mode == 'get': if inf is None: # xpathtool.py -g XPATH [INPUT] if count > 0: inf = args[0] elif mode == 'set': if count > 0: value = args[0] if inf is None: # xpathtool.py -s XPATH VALUE [INPUT [OUTPUT]] if count > 1: inf = args[1] if count > 2: outf = args[2] if inf == '-': inf = None if outf == '-': outf = sys.stdout if inf is None: inf = sys.stdin if outf is None: outf = sys.stdout elif outf is None: outf = inf doc = minidom.parse(inf) if mode == 'get' or mode == 'exist': #print "search %r from %r" % (expr, inf) #DEBUG nodes = xpath.find(expr, doc) if mode == 'get': for node in nodes: print get_text(node, as_xml) if nodes: r = 0 else: r = 1 sys.exit(r) elif mode == 'set': if value is not None and not type(value) is types.UnicodeType: value = unicode(value, "utf-8") #print "search %r in %r, replace with %r then write in %r" % (expr, inf, value, outf) #DEBUG for node in xpath.find(expr, doc): set_text(node, value, doc) #print "writing to %r" % outf #DEBUG if type(outf) in types.StringTypes: fd, tmpf = tempfile.mkstemp() try: os.close(fd) out = codecs.open(tmpf, "w", "utf-8") doc.writexml(out, encoding="utf-8") out.close() shutil.copyfile(tmpf, outf) finally: os.remove(tmpf) else: doc.writexml(outf, encoding="utf-8") if __name__ == '__main__': run_xpathtool()