python:compare
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revisionLast revisionBoth sides next revision | ||
python:compare [2014/07/22 07:09] – [Finding Matching String with SequenceMatcher] admin | python:compare [2014/07/22 14:10] – [lxml.html.diff for comparing HTML files] admin | ||
---|---|---|---|
Line 8: | Line 8: | ||
* **get_matching_blocks** | * **get_matching_blocks** | ||
The function **get_opcodes** using above these functions for parsing\\ | The function **get_opcodes** using above these functions for parsing\\ | ||
- | Create SequenceMatcher with input is **string | + | Create SequenceMatcher with input are two **strings |
=== match ratio === | === match ratio === | ||
* Calculate match ratio of two strings:< | * Calculate match ratio of two strings:< | ||
Line 92: | Line 92: | ||
a[9] and b[9] match for 0 elements | a[9] and b[9] match for 0 elements | ||
</ | </ | ||
+ | === get_opcodes === | ||
+ | <code python> | ||
+ | import difflib | ||
+ | import sys | ||
+ | |||
+ | a = """ | ||
+ | abc pq | ||
+ | ef abc | ||
+ | mn | ||
+ | """ | ||
+ | b = """ | ||
+ | ef | ||
+ | mn | ||
+ | """ | ||
+ | print 'a = ', a | ||
+ | print 'b = ', b | ||
+ | seq = difflib.SequenceMatcher(None, | ||
+ | print ' | ||
+ | for tag, alo, ahi, blo, bhi in seq.get_opcodes(): | ||
+ | print '- ', tag, alo, ahi, blo, bhi, ':' | ||
+ | print ' | ||
+ | for i in range(alo, ahi): | ||
+ | sys.stdout.writelines(a[i]) | ||
+ | print ' | ||
+ | for i in range(blo, bhi): | ||
+ | sys.stdout.writelines(b[i]) | ||
+ | result = list(difflib.ndiff(a, | ||
+ | print ' | ||
+ | print ' | ||
+ | sys.stdout.writelines(result) | ||
+ | </ | ||
+ | a = [' abcd\n', | ||
+ | b = ['abcd abcd\n', | ||
+ | ******************************* | ||
+ | - replace 0 3 0 2 : | ||
+ | --from: | ||
+ | abcd | ||
+ | abc pq | ||
+ | ef abc | ||
+ | --to: | ||
+ | abcd abcd | ||
+ | ef | ||
+ | - equal 3 4 2 3 : | ||
+ | --from: | ||
+ | mn | ||
+ | --to: | ||
+ | mn | ||
+ | ******************************* | ||
+ | normal diff: | ||
+ | - abcd | ||
+ | + abcd abcd | ||
+ | ? ++++ | ||
+ | + ef | ||
+ | - abc pq | ||
+ | - ef abc | ||
+ | mn | ||
+ | </ | ||
=== Match string with multilines === | === Match string with multilines === | ||
<code python> | <code python> | ||
Line 123: | Line 180: | ||
rate = seq.ratio() * 100 | rate = seq.ratio() * 100 | ||
print ' | print ' | ||
+ | print ' | ||
+ | print ' | ||
print ' | print ' | ||
for block in seq2.get_matching_blocks(): | for block in seq2.get_matching_blocks(): | ||
Line 133: | Line 192: | ||
result = list(d.compare(a, | result = list(d.compare(a, | ||
print ' | print ' | ||
- | sys.stdout.writelines(result) | + | sys.stdout.writelines(result)</ |
- | </ | + | |
************************* | ************************* | ||
rate1: | rate1: | ||
Line 158: | Line 216: | ||
<<<< | <<<< | ||
************************* | ************************* | ||
+ | rate2: | ||
+ | longest_match2: | ||
matching blocks2: | matching blocks2: | ||
a[3] and b[2] match for 1 elements | a[3] and b[2] match for 1 elements | ||
Line 171: | Line 231: | ||
- abc pq | - abc pq | ||
- ef abc | - ef abc | ||
- | mn</ | + | mn |
+ | </ | ||
=== SequenceMatcher with files === | === SequenceMatcher with files === | ||
<code python> | <code python> | ||
Line 310: | Line 371: | ||
* etree for parsing HTML | * etree for parsing HTML | ||
Examples for lxml.html.diff: | Examples for lxml.html.diff: | ||
- | * Simple diff HTML:< | + | * Simple diff:<code python> |
+ | from os import path | ||
+ | import sys, re | ||
+ | from lxml.html import diff, etree, HTMLParser | ||
+ | import codecs | ||
+ | import StringIO | ||
+ | doc1 = '''< | ||
+ | <div class=" | ||
+ | <a title=" | ||
+ | < | ||
+ | </ | ||
+ | </ | ||
+ | <div class=" | ||
+ | <a class=" | ||
+ | <i class=" | ||
+ | < | ||
+ | </ | ||
+ | < | ||
+ | </ | ||
+ | </ | ||
+ | doc2 = '''< | ||
+ | <div class=" | ||
+ | <a title=" | ||
+ | < | ||
+ | </ | ||
+ | </ | ||
+ | <div class=" | ||
+ | <a class=" | ||
+ | <i class=" | ||
+ | < | ||
+ | </ | ||
+ | </ | ||
+ | </ | ||
+ | diffcontent = diff.htmldiff(doc1, | ||
+ | diffcontent = codecs.encode(diffcontent, | ||
+ | print diffcontent | ||
+ | </ | ||
+ | <div class=" | ||
+ | </ | ||
+ | * diff 2 HTML files:<code python> | ||
from os import path | from os import path | ||
import sys, re | import sys, re |
python/compare.txt · Last modified: 2022/10/29 16:15 by 127.0.0.1