python:twistedlxmlandre
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| python:twistedlxmlandre [2014/07/26 09:22] – [re.search, re.match and re.findall] admin | python:twistedlxmlandre [2022/10/29 16:15] (current) – external edit 127.0.0.1 | ||
|---|---|---|---|
| Line 240: | Line 240: | ||
| * ElementTree: | * ElementTree: | ||
| * HTMLElement: | * HTMLElement: | ||
| - | ==== Create Etree from xml and html ==== | + | ==== Parsing |
| refer: http:// | refer: http:// | ||
| etree.parse return **lxml.etree._ElementTree** object | etree.parse return **lxml.etree._ElementTree** object | ||
| Line 421: | Line 421: | ||
| result = etree.tostring(tree.getroot(), | result = etree.tostring(tree.getroot(), | ||
| print(result) | print(result) | ||
| + | </ | ||
| + | ==== Build xml using Etree ==== | ||
| + | * Build xml using xml.etree.ElementTree:< | ||
| + | from xml.etree import ElementTree as ET | ||
| + | ''' | ||
| + | <?xml version=" | ||
| + | < | ||
| + | <country name=" | ||
| + | < | ||
| + | < | ||
| + | </ | ||
| + | <country name=" | ||
| + | < | ||
| + | < | ||
| + | </ | ||
| + | </ | ||
| + | ''' | ||
| + | data = ET.Element(' | ||
| + | |||
| + | country1 = ET.SubElement(data, | ||
| + | rank1 = ET.SubElement(country1, | ||
| + | rank1.text = ' | ||
| + | year1 = ET.SubElement(country1, | ||
| + | year1.text = ' | ||
| + | |||
| + | country2 = ET.SubElement(data, | ||
| + | rank2 = ET.SubElement(country2, | ||
| + | rank2.text = ' | ||
| + | year2 = ET.SubElement(country2, | ||
| + | year2.text = ' | ||
| + | print ET.tostring(data) | ||
| + | </ | ||
| + | < | ||
| + | </ | ||
| + | * Build xml using lxml.etree:< | ||
| + | from lxml import etree as ET | ||
| + | ''' | ||
| + | <?xml version=" | ||
| + | < | ||
| + | <country name=" | ||
| + | < | ||
| + | < | ||
| + | </ | ||
| + | <country name=" | ||
| + | < | ||
| + | < | ||
| + | </ | ||
| + | </ | ||
| + | ''' | ||
| + | data = ET.Element(' | ||
| + | |||
| + | country1 = ET.SubElement(data, | ||
| + | rank1 = ET.SubElement(country1, | ||
| + | rank1.text = ' | ||
| + | year1 = ET.SubElement(country1, | ||
| + | year1.text = ' | ||
| + | |||
| + | country2 = ET.SubElement(data, | ||
| + | rank2 = ET.SubElement(country2, | ||
| + | rank2.text = ' | ||
| + | year2 = ET.SubElement(country2, | ||
| + | year2.text = ' | ||
| + | print ET.tostring(data) | ||
| + | </ | ||
| + | < | ||
| </ | </ | ||
| ==== Custom Functions ==== | ==== Custom Functions ==== | ||
| Line 461: | Line 526: | ||
| tree.write(' | tree.write(' | ||
| </ | </ | ||
| - | |||
| ===== re Package(Regular Expression) ===== | ===== re Package(Regular Expression) ===== | ||
| + | To use re package, we need to import it:<code python> | ||
| + | import re | ||
| + | </ | ||
| ==== Regular Expression Language ==== | ==== Regular Expression Language ==== | ||
| A regular expression (abbreviated regex or regexp) is a sequence of characters that forms a search pattern\\ | A regular expression (abbreviated regex or regexp) is a sequence of characters that forms a search pattern\\ | ||
| - | refer: http:// | + | refer: |
| + | * http:// | ||
| + | * python: https:// | ||
| **Match Character** | **Match Character** | ||
| Line 507: | Line 576: | ||
| </ | </ | ||
| === re.findall === | === re.findall === | ||
| - | * re.findall: The findall() is probably the single most powerful function in the re module< | + | findall: The findall() is probably the single most powerful function in the re module |
| + | - Example 1: <code python> | ||
| str = ' | str = ' | ||
| Line 516: | Line 586: | ||
| # do something with each found email string | # do something with each found email string | ||
| print email | print email | ||
| - | </ | + | </ |
| + | * [\w\.-]+ => Begin with one or multiple(sign: | ||
| + | * @[\w\.-]+ => next of it is character @ and one or multiple characters in group: [word, **.** , **-**] | ||
| + | - Example 2: <code python> | ||
| # Open file | # Open file | ||
| f = open(' | f = open(' | ||
| Line 523: | Line 596: | ||
| strings = re.findall(r' | strings = re.findall(r' | ||
| </ | </ | ||
| - | === re.search, re.match | + | === re.search, re.match === |
| * re.search: The re.search() method takes a regular expression pattern and a string and searches for that pattern within the string< | * re.search: The re.search() method takes a regular expression pattern and a string and searches for that pattern within the string< | ||
| str = 'an example word: | str = 'an example word: | ||
| Line 546: | Line 619: | ||
| PHP FOUND | PHP FOUND | ||
| C++ NOT FOUND | C++ NOT FOUND | ||
| + | </ | ||
| + | * re.search and re.match< | ||
| + | import re | ||
| + | |||
| + | text = "The Attila the Hun Show" | ||
| + | |||
| + | # a single character | ||
| + | m = re.match(" | ||
| + | if m: print repr(" | ||
| + | |||
| + | # any string of characters | ||
| + | m = re.match(" | ||
| + | if m: print repr(" | ||
| + | |||
| + | # a string of letters (at least one) | ||
| + | m = re.match(" | ||
| + | if m: print repr(" | ||
| + | |||
| + | # a string of digits | ||
| + | m = re.match(" | ||
| + | if m: print repr(" | ||
| + | |||
| + | </ | ||
| + | ' | ||
| + | ' | ||
| + | ' | ||
| + | </ | ||
| + | * re.search and re.match:< | ||
| + | import re | ||
| + | |||
| + | print ' | ||
| + | text =" | ||
| + | |||
| + | print " | ||
| + | m = re.match(" | ||
| + | if m: | ||
| + | print m.group(1, 2, 3) | ||
| + | |||
| + | print " | ||
| + | s = re.search(" | ||
| + | if s: | ||
| + | print s.group(1, 2, 3) | ||
| + | |||
| + | print ' | ||
| + | text =" | ||
| + | print " | ||
| + | m = re.match(" | ||
| + | if m: | ||
| + | print m.group(1, 2, 3) | ||
| + | |||
| + | print " | ||
| + | s = re.search(" | ||
| + | if s: | ||
| + | print s.group(1, 2, 3) | ||
| + | |||
| + | </ | ||
| + | ********************************** | ||
| + | match1: | ||
| + | (' | ||
| + | search1: | ||
| + | (' | ||
| + | ********************************** | ||
| + | match2: | ||
| + | search2: | ||
| + | (' | ||
| </ | </ | ||
| === re.sub and re.compile === | === re.sub and re.compile === | ||
| Line 554: | Line 692: | ||
| text2 = re.sub(" | text2 = re.sub(" | ||
| print text2 | print text2 | ||
| + | </ | ||
| + | Python for beginner is a very good website | ||
| </ | </ | ||
| * Here is another example (taken from Googles Python class ) which searches for all the email addresses, and changes them to keep the user (1) but have yo-yo-dyne.com as the host.< | * Here is another example (taken from Googles Python class ) which searches for all the email addresses, and changes them to keep the user (1) but have yo-yo-dyne.com as the host.< | ||
| Line 562: | Line 702: | ||
| ## 1 is group(1), 2 group(2) in the replacement | ## 1 is group(1), 2 group(2) in the replacement | ||
| - | print re.sub(r' | + | print re.sub(r' |
| ## purple alice@yo-yo-dyne.com, | ## purple alice@yo-yo-dyne.com, | ||
| + | </ | ||
| + | purple alice@yo-yo-dyne.com, | ||
| </ | </ | ||
| * re.compile: With the re.compile() function we can compile pattern into pattern objects, which have methods for various operations such as searching for pattern matches or performing string substitutions. | * re.compile: With the re.compile() function we can compile pattern into pattern objects, which have methods for various operations such as searching for pattern matches or performing string substitutions. | ||
python/twistedlxmlandre.1406366565.txt.gz · Last modified: (external edit)
