supervivencia en python usando xml con namespaces

En python hay dos librerías que se usan extensamente xml que viene en el core de python y lxml una librería externa que provee de algunas cosas que xml no.

Vamos a manejar tres clases o conceptons ElementTree, Element y ElemenPath y vamos a jugar siempre con que nuestro xml tiene namespaces ya que tooooodo el uso actual de xml de servicios soap se hace con namespaces, excepto los ficheros de configuración de java

<tag attributo=»value»>texto</tag>

Elment, es un tag, y tiene nombre ( tag, tagname o como quieras llamarlo ), attributos y texto

En estos ejemplos se realiza una acción y se dumpea la salida para comprobar lo que estamos haciendo

import xml.etree.ElementTree as ET

# crear nodos <root><body></body></root>
>>> root = ET.Element('root') >>> body = ET.SubElement(a, 'body') >>> ET.dump(root)

# asignar atributos a un tag
# <root><body env=»devel»></body></root>
>>> root = ET.Element('root') >>> body = ET.SubElement(root, 'body', {'env':'devel'}) >>> ET.dump(body) <root><body env="devel" /></root>

# otras formas de manejar los attributos
>>> body.attrib['env'] 'devel' >>> body.attrib['env'] = 'foo' >>> body.attrib['env'] 'foo'

# texto dentro de un Element
>>> body.text = 'this is the text' >>> ET.dump(body) <body env="foo">this is the text</body>
# importar de fichero
>>> import xml.etree.ElementTree as ET >>> tree = ET.parse('country_data.xml') >>> root = tree.getroot()

# importar de string
>>> xml_string = "<root><body></body></root>" >>> root = ET.fromstring(xml_string) >>> ET.dump(root) <root><body /></root>
# namespaces

source = """<?xml version="1.0" encoding="utf-8"?> <soap:Envelope xmlns:cwmp="urn:dslforum-org:cwmp-1-2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:soap-enc="http://schemas.xmlsoap.org/soap/encoding/" xmlns:xsd="http://www.w3.org/2001/XMLSchema"> <soap:Header> <cwmp:ID soap:mustUnderstand="1">irrelevant_request_id</cwmp:ID> <cwmp:HoldRequests soap:mustUnderstand="1">0</cwmp:HoldRequests> </soap:Header> <soap:Body> </soap:Body> </soap:Envelope>"""

>>> xml = ET.fromstring(source)
>>> xml
<Element ‘{http://schemas.xmlsoap.org/soap/envelope/}Envelope’ at 0x7f8ddb53d950>

Dos cosas a tener en cuenta, NO puede haber un salto de linea de en el «»»<?xml…» genera una excepción «XML or text declaration not at start of entity»

source = """ <?xml version="1.0" encoding="utf-8"?> <soap:Envelope xmlns:cwmp="urn:dslforum-org:cwmp-1-2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:soap-enc="http://schemas.xmlsoap.org/soap/encoding/" xmlns:xsd="http://www.w3.org/2001/XMLSchema"> <soap:Header> <cwmp:ID soap:mustUnderstand="1">irrelevant_request_id</cwmp:ID> <cwmp:HoldRequests soap:mustUnderstand="1">0</cwmp:HoldRequests> </soap:Header> <soap:Body> </soap:Body> </soap:Envelope>"""

>>> import xml.etree.ElementTree as ET
>>> xml = ET.fromstring(source)
>>> xml = ET.fromstring(source)
Traceback (most recent call last):
File «<stdin>», line 1, in <module>
File «/usr/lib/python2.7/xml/etree/ElementTree.py», line 1311, in XML
parser.feed(text)
File «/usr/lib/python2.7/xml/etree/ElementTree.py», line 1653, in feed
self._raiseerror(v)
File «/usr/lib/python2.7/xml/etree/ElementTree.py», line 1517, in _raiseerror
raise err
xml.etree.ElementTree.ParseError: XML or text declaration not at start of entity: line 2, column 0
#Usando namespaces los tags se nombran así
{url}TagName

Con lo que las busquedas xpath se hace buscando por {url}TagName. Para buscar <cwmp:ID> lo tendremos que hacer de la siguiente manera, pero antes hay que registar las urls

Podemos usar find o findall para buscar uno o todos los nodos que cumplan un pattern xpath
source = """<?xml version="1.0" encoding="utf-8"?> <soap:Envelope xmlns:cwmp="urn:dslforum-org:cwmp-1-2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:soap-enc="http://schemas.xmlsoap.org/soap/encoding/" xmlns:xsd="http://www.w3.org/2001/XMLSchema"> <soap:Header> <cwmp:ID soap:mustUnderstand="1">irrelevant_request_id</cwmp:ID> <cwmp:HoldRequests soap:mustUnderstand="1">0</cwmp:HoldRequests> </soap:Header> <soap:Body> </soap:Body> </soap:Envelope>"""

>>> xml = ET.fromstring(source)
# como tenemos namespaces esta consulta no funcionará
>>> xml.find('ID')
# esta debería funcionar, pero tampoco funciona por que find busca desde la raiz y no desde cualquier nodo
>>> xml.find('{urn:dslforum-org:cwmp-1-2}ID')
# buscamos con find desde cualquier nodo con una busqueda al estilo xpath
>>> xml.findall('.//{urn:dslforum-org:cwmp-1-2}ID') <Element '{urn:dslforum-org:cwmp-1-2}ID' at 0x7f234c675c50>

# es posible usar etiquetas para los namespaces
>>> ns = {'cwmp': 'urn:dslforum-org:cwmp-1-2', 'soap': 'http://schemas.xmlsoap.org/soap/envelope/'} >>> xml.find('.//{urn:dslforum-org:cwmp-1-2}ID', ns) <Element '{urn:dslforum-org:cwmp-1-2}ID' at 0x7f234c675c50>

# findall devolerá una lista con los resultados, o una lista vacía si no hay resultado
>>> xml.findall('.//{urn:dslforum-org:cwmp-1-2}ID', ns) [<Element '{urn:dslforum-org:cwmp-1-2}ID' at 0x7f234c675c50>]
# pip install lxml
# from lxml import etree

# crear nodos <root><body></body></root>
>>> root = etree.Element('root') >>> root.append(etree.Element('body')) >>> etree.tostring(root) '<root><body/></root>' >>> etree.tostring(root, pretty_print=True) '<root>\n <body/>\n</root>\n'

# asignar atributos a un tag

>>> root = etree.Element('root') >>> root.append(etree.Element('body', env='devel')) >>> etree.tostring(root) '<root><body/><body env="devel"/></root>' >>> etree.tostring(root, pretty_print=True) '<root>\n <body/>\n <body env="devel"/>\n</root>\n'

# manejar attributes de forma comoda
>>> root = etree.Element('root') >>> root.append(etree.Element('body', my_attribute='my_value')) >>> attributes = root[0].attrib >>> attributes['new_attr'] = '12345' >>> etree.tostring(root) '<root><body my_attribute="my_value" new_attr="12345"/></root>'
# manejar texto
>>> from lxml import etree >>> root = etree.Element('root') >>> root.append(etree.Element('body', my_attribute='my_value')) >>> root.text = 'this is the text' >>> etree.tostring(root) '<root>this is the text<body my_attribute="my_value"/></root>'

# importar de fichero
>>> from lxml import etree >>> tree = ET.parse('country_data.xml') >>> root = tree.getroot()

# importar de string
>>> xml_string = "<root><body></body></root>" >>> root = ET.fromstring(xml_string) >>> ET.dump(root) <root><body /></root>

# namespaces

>>> source = """<?xml version="1.0" encoding="utf-8"?> <soap:Envelope xmlns:cwmp="urn:dslforum-org:cwmp-1-2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/" xmlns:soap-enc="http://schemas.xmlsoap.org/soap/encoding/" xmlns:xsd="http://www.w3.org/2001/XMLSchema"> <soap:Header> <cwmp:ID soap:mustUnderstand="1">irrelevant_request_id</cwmp:ID> <cwmp:HoldRequests soap:mustUnderstand="1">0</cwmp:HoldRequests> </soap:Header> <soap:Body> </soap:Body> </soap:Envelope>""" >>> root = ET.fromstring(source)
#Buscando nodos, como el xml hay que poner {<namespace}Tag

# nótese que el {namespace} debe ser exacto al encabezado o no funcionará la búsqueda
>>> print(root.findall('{http://schemas.xmlsoap.org/soap/envelope}Header')) [] >>> print(root.findall('{http://schemas.xmlsoap.org/soap/envelope/}Header')) [<Element '{http://schemas.xmlsoap.org/soap/envelope/}Header' at 0x7f234c675e90>]
# para poder obtener los nsmap de un xml debemos usar objectify, hay dos Element

http://lxml.de/api/xml.etree.ElementTree.Element-class.html
http://lxml.de/api/lxml.etree.ElementBase-class.html

>>> root = objectify.fromstring(source) >>> root.nsmap {'cwmp': 'urn:dslforum-org:cwmp-1-2', 'soap-enc': 'http://schemas.xmlsoap.org/soap/encoding/', 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', 'soap': 'http://schemas.xmlsoap.org/soap/envelope/', 'xsd': 'http://www.w3.org/2001/XMLSchema'}
# printea con cabecera xml usando xml_declaration=True
root = etree.Element('root') etree.tostring(root, encoding="utf-8" , xml_declaration=True) "<?xml version='1.0' encoding='utf-8'?>\n<root/>"

# Se pueden crear a mano elmentos de namespaces con ElementMarker pero el trabajo es muy extenso y el código no es muy legible, mi recomendación es usar templates de string importarlos y usarlos en vez de generarlos a mano.

from lxml import etree
from lxml.builder import ElementMaker
soap = ElementMaker(namespace=»http://schemas.xmlsoap.org/soap/envelope/», nsmap={‘soap’ : «http://schemas.xmlsoap.org/soap/envelope/»})

# Extras : xmlwitch
xmlwitch sólo lee y no escribe, pero lo hace de manera muy cómodo

supervivencia en python usando xml con namespaces

Comentarios

Deja una respuesta