User Tools

Site Tools


python:compare

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
python:compare [2014/07/22 07:10] – [Finding Matching String with SequenceMatcher] adminpython:compare [2022/10/29 16:15] (current) – external edit 127.0.0.1
Line 92: Line 92:
 a[9] and b[9] match for 0 elements a[9] and b[9] match for 0 elements
 </code> **a[0] and b[4] match for 5 elements:** 5 elements from a[0] are ' abcd' and 5 elements from b[9] are ' abcd' </code> **a[0] and b[4] match for 5 elements:** 5 elements from a[0] are ' abcd' and 5 elements from b[9] are ' abcd'
 +=== get_opcodes ===
 +<code python>
 +import difflib
 +import sys
 + 
 +a = """ abcd
 +abc pq
 +ef abc
 +mn
 +""".splitlines(1)
 +b = """abcd abcd
 +ef
 +mn
 +""".splitlines(1)
 +print 'a = ', a
 +print 'b = ', b
 +seq = difflib.SequenceMatcher(None, a, b)
 +print '*******************************'
 +for tag, alo, ahi, blo, bhi in seq.get_opcodes():
 +    print '- ', tag, alo, ahi, blo, bhi, ':'
 +    print '--from:'
 +    for i in range(alo, ahi):
 +        sys.stdout.writelines(a[i])
 +    print '--to:'
 +    for i in range(blo, bhi):
 +        sys.stdout.writelines(b[i])
 +result = list(difflib.ndiff(a, b))
 +print '*******************************'
 +print 'normal diff:'
 +sys.stdout.writelines(result)
 +</code>output:<code>
 +a =  [' abcd\n', 'abc pq\n', 'ef abc\n', 'mn\n']
 +b =  ['abcd abcd\n', 'ef\n', 'mn\n']
 +*******************************
 +-  replace 0 3 0 2 :
 +--from:
 + abcd
 +abc pq
 +ef abc
 +--to:
 +abcd abcd
 +ef
 +-  equal 3 4 2 3 :
 +--from:
 +mn
 +--to:
 +mn
 +*******************************
 +normal diff:
 +-  abcd
 ++ abcd abcd
 +? ++++
 ++ ef
 +- abc pq
 +- ef abc
 +  mn
 +</code>
 === Match string with multilines === === Match string with multilines ===
 <code python> <code python>
Line 123: Line 180:
 rate = seq.ratio() * 100 rate = seq.ratio() * 100
 print '*************************' print '*************************'
 +print 'rate2: ',rate
 +print 'longest_match2: ', seq2.find_longest_match(0, 4, 0, 3)
 print 'matching blocks2:' print 'matching blocks2:'
 for block in seq2.get_matching_blocks(): for block in seq2.get_matching_blocks():
Line 133: Line 192:
 result = list(d.compare(a, b)) result = list(d.compare(a, b))
 print 'normal diff:' print 'normal diff:'
-sys.stdout.writelines(result) +sys.stdout.writelines(result)</code>output:<code>
-</code>output:<code>+
 ************************* *************************
 rate1:  60.0 rate1:  60.0
Line 158: Line 216:
 <<<< <<<<
 ************************* *************************
 +rate2:  60.0
 +longest_match2:  Match(a=3, b=2, size=1)
 matching blocks2: matching blocks2:
 a[3] and b[2] match for 1 elements a[3] and b[2] match for 1 elements
Line 171: Line 231:
 - abc pq - abc pq
 - ef abc - ef abc
-  mn</code>+  mn 
 +</code>
 === SequenceMatcher with files === === SequenceMatcher with files ===
 <code python> <code python>
Line 310: Line 371:
   * etree for parsing HTML   * etree for parsing HTML
 Examples for lxml.html.diff: Examples for lxml.html.diff:
-  * Simple diff HTML:<code python>+  * Simple diff:<code python> 
 +from os import path 
 +import sys, re 
 +from lxml.html import diff, etree, HTMLParser 
 +import codecs 
 +import StringIO 
 +doc1 = '''<div class="cart-button"> 
 +<div class="cart"> 
 +    <a title="Add to cart" data-id="35;" class="button addToCart-1 ">  
 +        <span>Add to cart</span> 
 +    </a> 
 +</div>  
 +<div class="wishlist"> 
 +    <a class="tooltip-1" title="Add to Wish List" onclick="addToWishList('35');"> 
 +    <i class="icon-star"></i> 
 +    <span>Add to Wish List</span> 
 +    </a> 
 +    <b>simple</b> 
 +</div> 
 +</div>''' 
 +doc2 = '''<div class="cart-button"> 
 +<div class="cart"> 
 +    <a title="Add to cart" data-id="35;" class="button addToCart-1 ">  
 +        <span>Add to cart</span> 
 +    </a> 
 +</div>  
 +<div class="wishlist"> 
 +    <a class="tooltip-1" title="Add to Wish List" onclick="addToWishList('30');"> 
 +    <i class="icon-star"></i> 
 +    <span>Add to Wish List change</span> 
 +    </a> 
 +</div> 
 +</div>''' 
 +diffcontent = diff.htmldiff(doc1, doc2) 
 +diffcontent = codecs.encode(diffcontent, 'utf-8'
 +print diffcontent 
 +</code>output:<code html> 
 +<div class="cart-button"><div class="cart"><a title="Add to cart" data-id="35;" class="button addToCart-1 "><span>Add to cart</span> </a> </div> <div class="wishlist"><a class="tooltip-1" title="Add to Wish List" onclick="addToWishList('30');"><i class="icon-star"></i> <span>Add to Wish List <ins>change</ins> </span> </a> <del><b>simple</b></del> </div> </div> 
 +</code> 
 +  * diff 2 HTML files:<code python>
 from os import path from os import path
 import sys, re import sys, re
python/compare.1406013017.txt.gz · Last modified: 2022/10/29 16:15 (external edit)