.


:




:

































 

 

 

 


XML




XML , , . XML . Etree .

>>> import xml.etree.ElementTree as etree>>> tree = etree.parse('examples/feed.xml')>>> root = tree.getroot()>>> root.findall('{http://www.w3.org/2005/Atom}entry') ①[<Element {http://www.w3.org/2005/Atom}entry at e2b4e0>, <Element {http://www.w3.org/2005/Atom}entry at e2b510>, <Element {http://www.w3.org/2005/Atom}entry at e2b540>]>>> root.tag'{http://www.w3.org/2005/Atom}feed'>>> root.findall('{http://www.w3.org/2005/Atom}feed') ②[]>>> root.findall('{http://www.w3.org/2005/Atom}author') ③[]

① findall() . ( .)

② ( ) findall(). . ? , . feed feed, .

③ . XML author; , ( entry). author (direct children) ; ( ). author , .

>>> tree.findall('{http://www.w3.org/2005/Atom}entry') ①[<Element {http://www.w3.org/2005/Atom}entry at e2b4e0>, <Element {http://www.w3.org/2005/Atom}entry at e2b510>, <Element {http://www.w3.org/2005/Atom}entry at e2b540>]>>> tree.findall('{http://www.w3.org/2005/Atom}author') ②[]

① tree ( etree.parse()) . tree.getroot().findall().

② , , author . ? , tree.getroot().findall('{http://www.w3.org/2005/Atom}author'), author, . author ; entry. , .

findall() find() . .

>>> entries = tree.findall('{http://www.w3.org/2005/Atom}entry') ①>>> len(entries)3>>> title_element = entries[0].find('{http://www.w3.org/2005/Atom}title') ②>>> title_element.text'Dive into history, 2009 edition'>>> foo_element = entries[0].find('{http://www.w3.org/2005/Atom}foo') ③>>> foo_element>>> type(foo_element)<class 'NoneType'>

① findall() atom:entry.

② find() ElementTree .

③ foo , find() None.

find(). ElementTree False (. if len(element) 0). if element.find('...') , find() ; ! find() if element.find('...') is not None.

, . , .

>>> all_links = tree.findall('//{http://www.w3.org/2005/Atom}link') ①>>> all_links[<Element {http://www.w3.org/2005/Atom}link at e181b0>, <Element {http://www.w3.org/2005/Atom}link at e2b570>, <Element {http://www.w3.org/2005/Atom}link at e2b480>, <Element {http://www.w3.org/2005/Atom}link at e2b5a0>]>>> all_links[0].attrib ②{'href': 'http://diveintomark.org/', 'type': 'text/html', 'rel': 'alternate'}>>> all_links[1].attrib ③{'href': 'http://diveintomark.org/archives/2009/03/27/dive-into-history-2009-edition', 'type': 'text/html', 'rel': 'alternate'}>>> all_links[2].attrib{'href': 'http://diveintomark.org/archives/2009/03/21/accessibility-is-a-harsh-mistress', 'type': 'text/html', 'rel': 'alternate'}>>> all_links[3].attrib{'href': 'http://diveintomark.org/archives/2008/12/18/give-part-1-container-formats', 'type': 'text/html', 'rel': 'alternate'}

① //{http://www.w3.org/2005/Atom}link . // . // , . , .

② . , , html .

③ entry. entry link. findall() , link.

, findall() ElementTree , . ElementTree XPath. XPath W3C XML . ElementTree XPath . , XPath. XML API ElementTree XPath.

LXML

lxml libxml2. API ElementTree, XPath 1.0 . Windows ; Linux (, yum apt-get). lxml .

>>> from lxml import etree ①>>> tree = etree.parse('examples/feed.xml') ②>>> root = tree.getroot() ③>>> root.findall('{http://www.w3.org/2005/Atom}entry') ④[<Element {http://www.w3.org/2005/Atom}entry at e2b4e0>, <Element {http://www.w3.org/2005/Atom}entry at e2b510>, <Element {http://www.w3.org/2005/Atom}entry at e2b540>]

① lxml API ElementTree.

② parse(): ElementTree.

③ getroot(): .

④ findall(): .

XML lxml ElementTree. API ElementTree , lxml , , ElementTree.

try: from lxml import etreeexcept ImportError: import xml.etree.ElementTree as etree

, lxml ElementTree: findall() .

>>> import lxml.etree ①>>> tree = lxml.etree.parse('examples/feed.xml')>>> tree.findall('//{http://www.w3.org/2005/Atom}*[@href]') ②[<Element {http://www.w3.org/2005/Atom}link at eeb8a0>, <Element {http://www.w3.org/2005/Atom}link at eeb990>, <Element {http://www.w3.org/2005/Atom}link at eeb960>, <Element {http://www.w3.org/2005/Atom}link at eeb9c0>]>>> tree.findall("//{http://www.w3.org/2005/Atom}*[@href='http://diveintomark.org/']") ③[<Element {http://www.w3.org/2005/Atom}link at eeb930>]>>> NS = '{http://www.w3.org/2005/Atom}'>>> tree.findall('//{NS}author[{NS}uri]'.format(NS=NS)) ④[<Element {http://www.w3.org/2005/Atom}author at eeba80>, <Element {http://www.w3.org/2005/Atom}author at eebba0>]

① lxml.etree ( etree: from lxml import etree) , lxml.

② Atom ( ), href. // , . {http://www.w3.org/2005/Atom} Atom. * . [@href] href.

③ Atom href http://diveintomark.org/.

④ ( ) Atom author Atom uri. 2 author: entry. entry author name, uri.

? lxml XPath 1.0. XPath, . XPath lxml.

>>> import lxml.etree>>> tree = lxml.etree.parse('examples/feed.xml')>>> NSMAP = {'atom': 'http://www.w3.org/2005/Atom'} ①>>> entries = tree.xpath("//atom:category[@term='accessibility']/..", ②... namespaces=NSMAP)>>> entries ③[<Element {http://www.w3.org/2005/Atom}entry at e2b630>]>>> entry = entries[0]>>> entry.xpath('./atom:title/text()', namespaces=NSMAP) ④['Accessibility is a harsh mistress']

① XPath , . Python.

② XPath . category ( Atom) - term='accessibility'. , . /.. ? , . , entry <category term='accessibility'>.

③ xpath() ElementTree. entry term='accessibility'.

④ XPath . , DOM XML , (nodes). , . XPath . : text() title (atom:title) (./).

XML

ElementTree XML , .

>>> import xml.etree.ElementTree as etree>>> new_feed = etree.Element('{http://www.w3.org/2005/Atom}feed', ①... attrib={'{http://www.w3.org/XML/1998/namespace}lang': 'en'}) ②>>> print(etree.tostring(new_feed)) ③<ns0:feed xmlns:ns0='http://www.w3.org/2005/Atom' xml:lang='en'/>

① Element. ( ). feed Atom. XML.

② attrib. , ElementTree {_}_.

③ tostring() ElementTree.

new_feed? ElementTree XML , . XML xmlns='http://www.w3.org/2005/Atom'. (, Atom), , , (<feed>, <link>, <entry>). , .

XML XML . DOM

<ns0:feed xmlns:ns0='http://www.w3.org/2005/Atom' xml:lang='en'/>

<feed xmlns='http://www.w3.org/2005/Atom' xml:lang='en'/>

, . ns0: , 4 × 79 + 4 , 320 . UTF-8 320 . ( gzip 21 ; 21 21 ). , , Atom, , .

lxml: ElementTree lxml .

>>> import lxml.etree>>> NSMAP = {None: 'http://www.w3.org/2005/Atom'} ①>>> new_feed = lxml.etree.Element('feed', nsmap=NSMAP) ②>>> print(lxml.etree.tounicode(new_feed)) ③<feed xmlns='http://www.w3.org/2005/Atom'/>>>> new_feed.set('{http://www.w3.org/XML/1998/namespace}lang', 'en') ④>>> print(lxml.etree.tounicode(new_feed))<feed xmlns='http://www.w3.org/2005/Atom' xml:lang='en'/>

① . ; - . None .

② lxml nsmap, .

③ , Atom feed .

④ , xml:lang. set(), . : ElementTree . ( ElementTree. lxml ElementTree nsmap .)

? , . .

>>> title = lxml.etree.SubElement(new_feed, 'title', ①... attrib={'type':'html'}) ②>>> print(lxml.etree.tounicode(new_feed)) ③<feed xmlns='http://www.w3.org/2005/Atom' xml:lang='en'><title type='html'/></feed>>>> title.text = 'dive into &hellip;' ④>>> print(lxml.etree.tounicode(new_feed)) ⑤<feed xmlns='http://www.w3.org/2005/Atom' xml:lang='en'><title type='html'>dive into &amp;hellip;</title></feed>>>> print(lxml.etree.tounicode(new_feed, pretty_print=True)) ⑥<feed xmlns='http://www.w3.org/2005/Atom' xml:lang='en'><title type='html'>dive into&amp;hellip;</title></feed>

① SubElement. ( new_feed) . , .

② . , - .

③ , title Atom feed. title , lxml />.

④ , .text.

⑤ title . < ', escape-. lxml .

⑥ (pretty printing), . lxml (insignificant whitespace) XML .

, , xmlwitch, Python with XML .




:


: 2016-11-18; !; : 898 |


:

:

, ,
==> ...

1303 - | 1261 -


© 2015-2024 lektsii.org - -

: 0.031 .