python:compare
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| python:compare [2014/07/22 07:10] – [Finding Matching String with SequenceMatcher] admin | python:compare [2022/10/29 16:15] (current) – external edit 127.0.0.1 | ||
|---|---|---|---|
| Line 8: | Line 8: | ||
| * **get_matching_blocks** | * **get_matching_blocks** | ||
| The function **get_opcodes** using above these functions for parsing\\ | The function **get_opcodes** using above these functions for parsing\\ | ||
| - | Create SequenceMatcher with input is 2 **strings or 2 lists** | + | Create SequenceMatcher with input are two **strings or two lists** |
| === match ratio === | === match ratio === | ||
| * Calculate match ratio of two strings:< | * Calculate match ratio of two strings:< | ||
| Line 92: | Line 92: | ||
| a[9] and b[9] match for 0 elements | a[9] and b[9] match for 0 elements | ||
| </ | </ | ||
| + | === get_opcodes === | ||
| + | <code python> | ||
| + | import difflib | ||
| + | import sys | ||
| + | |||
| + | a = """ | ||
| + | abc pq | ||
| + | ef abc | ||
| + | mn | ||
| + | """ | ||
| + | b = """ | ||
| + | ef | ||
| + | mn | ||
| + | """ | ||
| + | print 'a = ', a | ||
| + | print 'b = ', b | ||
| + | seq = difflib.SequenceMatcher(None, | ||
| + | print ' | ||
| + | for tag, alo, ahi, blo, bhi in seq.get_opcodes(): | ||
| + | print '- ', tag, alo, ahi, blo, bhi, ':' | ||
| + | print ' | ||
| + | for i in range(alo, ahi): | ||
| + | sys.stdout.writelines(a[i]) | ||
| + | print ' | ||
| + | for i in range(blo, bhi): | ||
| + | sys.stdout.writelines(b[i]) | ||
| + | result = list(difflib.ndiff(a, | ||
| + | print ' | ||
| + | print ' | ||
| + | sys.stdout.writelines(result) | ||
| + | </ | ||
| + | a = [' abcd\n', | ||
| + | b = ['abcd abcd\n', | ||
| + | ******************************* | ||
| + | - replace 0 3 0 2 : | ||
| + | --from: | ||
| + | abcd | ||
| + | abc pq | ||
| + | ef abc | ||
| + | --to: | ||
| + | abcd abcd | ||
| + | ef | ||
| + | - equal 3 4 2 3 : | ||
| + | --from: | ||
| + | mn | ||
| + | --to: | ||
| + | mn | ||
| + | ******************************* | ||
| + | normal diff: | ||
| + | - abcd | ||
| + | + abcd abcd | ||
| + | ? ++++ | ||
| + | + ef | ||
| + | - abc pq | ||
| + | - ef abc | ||
| + | mn | ||
| + | </ | ||
| === Match string with multilines === | === Match string with multilines === | ||
| <code python> | <code python> | ||
| Line 123: | Line 180: | ||
| rate = seq.ratio() * 100 | rate = seq.ratio() * 100 | ||
| print ' | print ' | ||
| + | print ' | ||
| + | print ' | ||
| print ' | print ' | ||
| for block in seq2.get_matching_blocks(): | for block in seq2.get_matching_blocks(): | ||
| Line 133: | Line 192: | ||
| result = list(d.compare(a, | result = list(d.compare(a, | ||
| print ' | print ' | ||
| - | sys.stdout.writelines(result) | + | sys.stdout.writelines(result)</ |
| - | </ | + | |
| ************************* | ************************* | ||
| rate1: | rate1: | ||
| Line 158: | Line 216: | ||
| <<<< | <<<< | ||
| ************************* | ************************* | ||
| + | rate2: | ||
| + | longest_match2: | ||
| matching blocks2: | matching blocks2: | ||
| a[3] and b[2] match for 1 elements | a[3] and b[2] match for 1 elements | ||
| Line 171: | Line 231: | ||
| - abc pq | - abc pq | ||
| - ef abc | - ef abc | ||
| - | mn</ | + | mn |
| + | </ | ||
| === SequenceMatcher with files === | === SequenceMatcher with files === | ||
| <code python> | <code python> | ||
| Line 310: | Line 371: | ||
| * etree for parsing HTML | * etree for parsing HTML | ||
| Examples for lxml.html.diff: | Examples for lxml.html.diff: | ||
| - | * Simple diff HTML:< | + | * Simple diff:<code python> |
| + | from os import path | ||
| + | import sys, re | ||
| + | from lxml.html import diff, etree, HTMLParser | ||
| + | import codecs | ||
| + | import StringIO | ||
| + | doc1 = '''< | ||
| + | <div class=" | ||
| + | <a title=" | ||
| + | < | ||
| + | </ | ||
| + | </ | ||
| + | <div class=" | ||
| + | <a class=" | ||
| + | <i class=" | ||
| + | < | ||
| + | </ | ||
| + | < | ||
| + | </ | ||
| + | </ | ||
| + | doc2 = '''< | ||
| + | <div class=" | ||
| + | <a title=" | ||
| + | < | ||
| + | </ | ||
| + | </ | ||
| + | <div class=" | ||
| + | <a class=" | ||
| + | <i class=" | ||
| + | < | ||
| + | </ | ||
| + | </ | ||
| + | </ | ||
| + | diffcontent = diff.htmldiff(doc1, | ||
| + | diffcontent = codecs.encode(diffcontent, | ||
| + | print diffcontent | ||
| + | </ | ||
| + | <div class=" | ||
| + | </ | ||
| + | * diff 2 HTML files:<code python> | ||
| from os import path | from os import path | ||
| import sys, re | import sys, re | ||
python/compare.1406013001.txt.gz · Last modified: (external edit)
