User Tools

Site Tools


python:compare

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
python:compare [2014/07/21 23:19] – [diff 2 files] adminpython:compare [2022/10/29 16:15] (current) – external edit 127.0.0.1
Line 7: Line 7:
   * **find_longest_match**   * **find_longest_match**
   * **get_matching_blocks**   * **get_matching_blocks**
-The function **get_opcodes** using above these functions for parsing+The function **get_opcodes** using above these functions for parsing\\ 
 +Create SequenceMatcher with input are two **strings or two lists**
 === match ratio === === match ratio ===
   * Calculate match ratio of two strings:<code python>   * Calculate match ratio of two strings:<code python>
Line 91: Line 92:
 a[9] and b[9] match for 0 elements a[9] and b[9] match for 0 elements
 </code> **a[0] and b[4] match for 5 elements:** 5 elements from a[0] are ' abcd' and 5 elements from b[9] are ' abcd' </code> **a[0] and b[4] match for 5 elements:** 5 elements from a[0] are ' abcd' and 5 elements from b[9] are ' abcd'
-=== Math string with multilines ===+=== get_opcodes ===
 <code python> <code python>
 import difflib import difflib
 +import sys
 + 
 +a = """ abcd
 +abc pq
 +ef abc
 +mn
 +""".splitlines(1)
 +b = """abcd abcd
 +ef
 +mn
 +""".splitlines(1)
 +print 'a = ', a
 +print 'b = ', b
 +seq = difflib.SequenceMatcher(None, a, b)
 +print '*******************************'
 +for tag, alo, ahi, blo, bhi in seq.get_opcodes():
 +    print '- ', tag, alo, ahi, blo, bhi, ':'
 +    print '--from:'
 +    for i in range(alo, ahi):
 +        sys.stdout.writelines(a[i])
 +    print '--to:'
 +    for i in range(blo, bhi):
 +        sys.stdout.writelines(b[i])
 +result = list(difflib.ndiff(a, b))
 +print '*******************************'
 +print 'normal diff:'
 +sys.stdout.writelines(result)
 +</code>output:<code>
 +a =  [' abcd\n', 'abc pq\n', 'ef abc\n', 'mn\n']
 +b =  ['abcd abcd\n', 'ef\n', 'mn\n']
 +*******************************
 +-  replace 0 3 0 2 :
 +--from:
 + abcd
 +abc pq
 +ef abc
 +--to:
 +abcd abcd
 +ef
 +-  equal 3 4 2 3 :
 +--from:
 +mn
 +--to:
 +mn
 +*******************************
 +normal diff:
 +-  abcd
 ++ abcd abcd
 +? ++++
 ++ ef
 +- abc pq
 +- ef abc
 +  mn
 +</code>
 +=== Match string with multilines ===
 +<code python>
 +import difflib
 +import sys
    
 a = """ abcd  a = """ abcd 
 abc pq abc pq
 ef abc ef abc
 +mn
 """ """
 b = """abcd abcd b = """abcd abcd
 ef ef
 +mn
 """ """
    
 seq = difflib.SequenceMatcher(None, a, b) seq = difflib.SequenceMatcher(None, a, b)
 rate = seq.ratio() * 100 rate = seq.ratio() * 100
-print 'rate: ',rate +print '*************************' 
-print 'longest_match: ', seq.find_longest_match(0, 20, 0, 9) +print 'rate1: ',rate 
-print 'matching blocks:'+print 'longest_match1: ', seq.find_longest_match(0, 20, 0, 9) 
 +print 'matching blocks1:'
 for block in seq.get_matching_blocks(): for block in seq.get_matching_blocks():
     print "a[%d] and b[%d] match for %d elements" % block     print "a[%d] and b[%d] match for %d elements" % block
     print '>>>>', a[block[0]:(block[0] + block[2])]     print '>>>>', a[block[0]:(block[0] + block[2])]
-    print '<<<<', b[block[1]:(block[1] + block[2])]</code>output:<code> +    print '<<<<', b[block[1]:(block[1] + block[2])] 
-rate:  52.9411764706 + 
-longest_match:  Match(a=0, b=4, size=5) +a = a.splitlines(1) 
-matching blocks:+b = b.splitlines(1) 
 +seq2 = difflib.SequenceMatcher(None, a, b) 
 +rate = seq.ratio() * 100 
 +print '*************************' 
 +print 'rate2: ',rate 
 +print 'longest_match2: ', seq2.find_longest_match(0, 4, 0, 3) 
 +print 'matching blocks2:' 
 +for block in seq2.get_matching_blocks(): 
 +    print "a[%d] and b[%d] match for %d elements" % block 
 +    print '>>>>', a[block[0]:(block[0] + block[2])] 
 +    print '<<<<', b[block[1]:(block[1] + block[2])] 
 + 
 + 
 +d = difflib.Differ() 
 +result = list(d.compare(a, b)) 
 +print 'normal diff:' 
 +sys.stdout.writelines(result)</code>output:<code> 
 +************************* 
 +rate1:  60.0 
 +longest_match1:  Match(a=0, b=4, size=5) 
 +matching blocks1:
 a[0] and b[4] match for 5 elements a[0] and b[4] match for 5 elements
 >>>>  abcd >>>>  abcd
Line 123: Line 205:
 <<<< <<<<
 ef ef
-a[20] and b[12] match for elements+a[20] and b[12] match for elements
 >>>> >>>>
 +mn
  
 <<<< <<<<
 +mn
  
-a[21] and b[13] match for 0 elements+a[24] and b[16] match for 0 elements
 >>>> >>>>
 <<<< <<<<
 +*************************
 +rate2:  60.0
 +longest_match2:  Match(a=3, b=2, size=1)
 +matching blocks2:
 +a[3] and b[2] match for 1 elements
 +>>>> ['mn\n']
 +<<<< ['mn\n']
 +a[4] and b[3] match for 0 elements
 +>>>> []
 +<<<< []
 +normal diff:
 ++ abcd abcd
 ++ ef
 +-  abcd
 +- abc pq
 +- ef abc
 +  mn
 </code> </code>
 === SequenceMatcher with files === === SequenceMatcher with files ===
Line 260: Line 361:
     doc2 = content.splitlines(1)     doc2 = content.splitlines(1)
  
-with open('login.html', 'wb') as f: 
-    for line in doc2: 
-        f.writelines(line)         
 result = difflib.ndiff(doc1, doc2) result = difflib.ndiff(doc1, doc2)
 with open('compare.html', 'wb') as f: with open('compare.html', 'wb') as f:
     for line in result:     for line in result:
         f.writelines(line)         f.writelines(line)
-with open('index.html', 'wb') as f: +</code> 
-    for line in doc1: +==== lxml.html.diff for comparing HTML files ==== 
-        f.writelines(line)+xml.html.diff using 2 basic libraries: 
 +  * difflib for comparing 2 files 
 +  * etree for parsing HTML 
 +Examples for lxml.html.diff: 
 +  * Simple diff:<code python> 
 +from os import path 
 +import sys, re 
 +from lxml.html import diff, etree, HTMLParser 
 +import codecs 
 +import StringIO 
 +doc1 = '''<div class="cart-button"> 
 +<div class="cart"> 
 +    <a title="Add to cart" data-id="35;" class="button addToCart-1 ">  
 +        <span>Add to cart</span> 
 +    </a> 
 +</div>  
 +<div class="wishlist"> 
 +    <a class="tooltip-1" title="Add to Wish List" onclick="addToWishList('35');"> 
 +    <i class="icon-star"></i> 
 +    <span>Add to Wish List</span> 
 +    </a> 
 +    <b>simple</b> 
 +</div> 
 +</div>''' 
 +doc2 = '''<div class="cart-button"> 
 +<div class="cart"> 
 +    <a title="Add to cart" data-id="35;" class="button addToCart-1 ">  
 +        <span>Add to cart</span> 
 +    </a> 
 +</div>  
 +<div class="wishlist"> 
 +    <a class="tooltip-1" title="Add to Wish List" onclick="addToWishList('30');"> 
 +    <i class="icon-star"></i> 
 +    <span>Add to Wish List change</span> 
 +    </a> 
 +</div> 
 +</div>''' 
 +diffcontent = diff.htmldiff(doc1, doc2) 
 +diffcontent = codecs.encode(diffcontent, 'utf-8'
 +print diffcontent 
 +</code>output:<code html> 
 +<div class="cart-button"><div class="cart"><a title="Add to cart" data-id="35;" class="button addToCart-1 "><span>Add to cart</span> </a> </div> <div class="wishlist"><a class="tooltip-1" title="Add to Wish List" onclick="addToWishList('30');"><i class="icon-star"></i> <span>Add to Wish List <ins>change</ins> </span> </a> <del><b>simple</b></del> </div> </div> 
 +</code> 
 +  * diff 2 HTML files:<code python> 
 +from os import path 
 +import sys, re 
 +from lxml.html import diff 
 +import codecs 
 +  
 +INPUT_DIR = 'opencart_47066' 
 +htmlfile1 = path.join(INPUT_DIR, 'index.html'
 +htmlfile2 = path.join(INPUT_DIR, 'index.php@route=account%2Flogin.html'
 +with open(htmlfile1, 'r') as f: 
 +    content = f.read() 
 +    doc1 = content 
 +with open(htmlfile2, 'r') as f
 +    content = f.read() 
 +    doc2 = content 
 +diffcontent = diff.htmldiff(doc1, doc2) 
 +diffcontent = codecs.encode(diffcontent, 'utf-8'
 +print diffcontent 
 </code> </code>
 ===== filecmp ===== ===== filecmp =====
python/compare.1405984751.txt.gz · Last modified: 2022/10/29 16:15 (external edit)