User Tools

Site Tools


python:exelprocess

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
python:exelprocess [2020/01/19 15:20] – [Using sheet(xlrd.sheet.Sheet)] adminpython:exelprocess [2022/10/29 16:15] (current) – external edit 127.0.0.1
Line 1: Line 1:
 ====== Exel Processing ====== ====== Exel Processing ======
 +===== Function in Exel =====
 +Trước khi xử lý các hàm tìm kiếm Vlookup and Hlookup, If trong exel cần phải format dữ liệu và sort trước -> Lúc đó hàm xử lý mới chạy đúng
 +VLOOKUP($B2,omni!$A$1:$C$4778,3,FALSE) -> Tìm chính xác nhờ tuỳ chọn **FALSE**
 ===== Install ===== ===== Install =====
   * Install on windows:<code bat>   * Install on windows:<code bat>
Line 11: Line 14:
 pip install xlutils pip install xlutils
 </code> </code>
-===== using xlrd =====+===== Using xlrd =====
 Basic APIs: Basic APIs:
   * **xlrd.open_workbook('adminup/shop.xlsx')** return **xlrd.book.Book** object   * **xlrd.open_workbook('adminup/shop.xlsx')** return **xlrd.book.Book** object
Line 95: Line 98:
     print sheet_info.dump()             print sheet_info.dump()        
 </code> </code>
-===== Parse Shop.xls ====+==== Parse xls file ====
 <code python> <code python>
 import xlrd import xlrd
- +from xlrd.book import Book 
-def parseshopxls(filename): +from xlrd.sheet import Sheet 
-    shop_info +nhapkhaufile = 'nhapkhau.xlsx
-                 'config_name':'',#Cen cua hang +xuatkhaufile = 'xuatkhau.xlsx
-                 'config_owner':'',#Chu cua hang +constantsfile = 'constants.xlsx
-                 'config_address':'',#Dia chi +def parsexls(filename):
-                 'config_email':'',#Email +
-                 'config_telephone':'',#Dien thoai +
-                 'config_title':'',#Tieu de +
-                 'config_meta_description':'',#Mo ta tu khoa +
-                 } +
-    shop_cat = []  +
-    shop_products = []+
     xlsfile = xlrd.open_workbook(filename)     xlsfile = xlrd.open_workbook(filename)
-    sheet_info = None 
-    sheet_cat = None 
-    sheet_products = None 
     for sheet in xlsfile.sheets():     for sheet in xlsfile.sheets():
-        if sheet.name == u'Th\xf4ng tin shop': +        n_cells = sheet.ncols              
-            sheet_info sheet +        for curr_row in range(sheet.nrows): 
-        elif sheet.name == u'Danh m\u1ee5c':  +            curr_cell = 0 
-            sheet_cat = sheet +            while curr_cell < n_cells: 
-        elif sheet.name == u'S\u1EA3n ph\u1EA9m': +                cell_type = sheet.cell_type(curr_row, curr_cell) 
-            sheet_products = sheet +                if cell_type == xlrd.XL_CELL_EMPTY: 
-    '''get shop_info''' +                    curr_cell += 1 
-    n_cells = sheet_info.ncols              +                    continue 
-    for curr_row in range(sheet_info.nrows): +                value = sheet.cell_value(curr_row, curr_cell)
-        curr_cell = 0 +
-        while curr_cell < n_cells: +
-            cell_type = sheet_info.cell_type(curr_row, curr_cell) +
-            if cell_type == xlrd.XL_CELL_EMPTY:+
                 curr_cell += 1                 curr_cell += 1
-                continue +                print value 
-            value = sheet_info.cell_value(curr_row, curr_cell) +parsexls(constantsfile 
-            if value == u'T\xean c\u1eeda h\xe0ng': +
-                curr_cell += 1 +
-                shop_info['config_name'] = sheet_info.cell_value(curr_row, curr_cell) +
-            elif value == u'Ch\u1ee7 c\u1eeda h\xe0ng': +
-                curr_cell += 1 +
-                shop_info['config_owner'] = sheet_info.cell_value(curr_row, curr_cell) +
-            elif value == u'\u0110\u1ecba ch\u1ec9': +
-                curr_cell += 1 +
-                shop_info['config_address'] = sheet_info.cell_value(curr_row, curr_cell) +
-            elif value == u'Email': +
-                curr_cell += 1 +
-                shop_info['config_email'] = sheet_info.cell_value(curr_row, curr_cell)         +
-            elif value == u'\u0110i\u1ec7n tho\u1ea1i': +
-                curr_cell += 1 +
-                shop_info['config_telephone'] = int(sheet_info.cell_value(curr_row, curr_cell)) +
-            curr_cell += 1 +
-    '''get shop_cat'''          +
-    n_cells = sheet_cat.ncols +
-    for curr_row in range(sheet_cat.nrows): +
-        curr_cell = 0 +
-        while curr_cell < n_cells: +
-            cell_type = sheet_cat.cell_type(curr_row, curr_cell) +
-            if cell_type == xlrd.XL_CELL_EMPTY: +
-                curr_cell += 1 +
-                continue +
-            value = sheet_cat.cell_value(curr_row, curr_cell) +
-            if value != u'T\xean danh m\u1ee5c': +
-                shop_cat.append(value) +
-            curr_cell += 1 +
-    '''get shop_products'''          +
-    n_cells = sheet_products.ncols +
-    first_data_col = -1 +
-    is_first_data_row = True +
-    for curr_row in range(sheet_products.nrows): +
-        curr_cell = 0 +
-        while curr_cell < n_cells: +
-            cell_type = sheet_products.cell_type(curr_row, curr_cell) +
-            if cell_type == xlrd.XL_CELL_EMPTY: +
-                curr_cell += 1 +
-                continue  +
-            if first_data_col < 0: +
-                first_data_col = curr_cell +
-            data_col = curr_cell - first_data_col +
-            value = sheet_products.cell_value(curr_row, curr_cell)          +
-            if data_col == 0: +
-                product_info = {} +
-                product_info['name'] = value +
-            elif data_col == 1: +
-                product_info['model'] = value             +
-            elif data_col == 2: +
-                if cell_type == xlrd.XL_CELL_NUMBER: +
-                    value = int(value) +
-                product_info['price'] = value +
-            elif data_col == 3: +
-                product_info['meta_description'] = value +
-            elif data_col == 4: +
-                product_info['description'] = value +
-            elif data_col == 5: +
-                product_info['product_category'] = value +
-            elif data_col == 6: +
-                product_info['image'] = value +
-                if is_first_data_row: +
-                    is_first_data_row = False +
-                else: +
-                    shop_products.append(product_info) +
-            curr_cell += 1 +
-    return shop_info, shop_cat, shop_products          +
-if __name__ == '__main__': +
-    shop_info, shop_cat, shop_products = parseshopxls('adminup/shop.xlsx'+
-    print shop_info +
-    print shop_cat  +
-    for product in shop_products: +
-        print '****************************' +
-        print product    +
 </code> </code>
 +==== Error Parse UTF-8 file ====
 +<code>
 +UnicodeDecodeError: 'utf16' codec can't decode bytes in position 3438-3439: unexpected end of data
 +</code> => Check exel format in encoding(not utf-8)(May be exel file was not stored as encoding utf-8, It can be saved as utf16 default encoding) 
 +You can debug for information:<code python>
 +xlsfile = xlrd.open_workbook(filename,verbosity=3)
 +</code>output:<code>
 +BOF: op=0x0809 vers=0x0600 stream=0x0005 buildid=14420 buildyr=1997 -> BIFF80
 +CODEPAGE: codepage 1200 -> encoding 'utf_16_le'
 +DATEMODE: datemode 0
 +</code> => default encoding: **'utf_16_le'**
 +
 +**Fix**: Lỗi do file exel, mở lại file exel và lưu lại sẽ hết lỗi
 +===== Using xlwt =====
python/exelprocess.txt · Last modified: 2022/10/29 16:15 by 127.0.0.1