python:exelprocess
This is an old revision of the document!
Table of Contents
Exel Processing
Install
- Install on windows:
easy_install.exe xlrd easy_install.exe xlwt easy_install.exe xlutils
- Install on linux:
pip install xlrd pip install xlwt pip install xlutils
using xlrd
Basic APIs:
- xlrd.open_workbook('adminup/shop.xlsx') return xlrd.book.Book object
Example codes:
Open Workbook and get sheets
- Simple Open workbook
import xlrd xlsfile = xlrd.open_workbook('adminup/shop.xlsx') print xlsfile
output:
<xlrd.book.Book object at 0x023263B0>
- Get sheets:
import xlrd from xlrd.book import Book xlsfile = xlrd.open_workbook('adminup/shop.xlsx') print xlsfile.sheets()
output:
[<xlrd.sheet.Sheet object at 0x02256530>, <xlrd.sheet.Sheet object at 0x022564D0>, <xlrd.sheet.Sheet object at 0x022569F0>]
Using sheet(xlrd.sheet.Sheet)
- dump sheet to get basic informations
import xlrd from xlrd.book import Book from xlrd.sheet import Sheet xlsfile = xlrd.open_workbook('adminup/shop.xlsx') for sheet in xlsfile.sheets(): print '******************************' sheet.dump()
output for sheet1:
_cell_types: <type 'list'>, len = 8 _cell_values: <type 'list'>, len = 8 _dimncols: 4 _dimnrows: 8 _first_full_rowx: 3 name: u'Th\xf4ng tin shop' ncols: 4 nrows: 8 number: 0
output for sheet2:
_cell_types: <type 'list'>, len = 6 _cell_values: <type 'list'>, len = 6 _dimncols: 3 _dimnrows: 54 _first_full_rowx: 2 name: u'Danh m\u1ee5c' ncols: 3 nrows: 6 number: 1
- Get sheet by name: Using http://www.rapidmonkey.com/unicodeconverter/ to convert utf-8 to ASCII(Unicode Escaped) Content
import xlrd from xlrd.book import Book from xlrd.sheet import Sheet xlsfile = xlrd.open_workbook('adminup/shop.xlsx') sheet_info = xlsfile.sheet_by_name(u'Th\xf4ng tin shop') sheet_cat = xlsfile.sheet_by_name(u'Danh m\u1ee5c') sheet_products = xlsfile.sheet_by_name(u'S\u1EA3n ph\u1EA9m')
Or
import xlrd from xlrd.book import Book from xlrd.sheet import Sheet xlsfile = xlrd.open_workbook('adminup/shop.xlsx') sheet_info = None sheet_cat = None sheet_products = None for sheet in xlsfile.sheets(): if sheet.name == u'Th\xf4ng tin shop': sheet_info = sheet elif sheet.name == u'Danh m\u1ee5c': sheet_cat = sheet elif sheet.name == u'S\u1EA3n ph\u1EA9m': sheet_products = sheet if sheet_info is not None: print sheet_info.dump()
Parse Shop.xls
import xlrd def parseshopxls(filename): shop_info = { 'config_name':'',#Cen cua hang 'config_owner':'',#Chu cua hang 'config_address':'',#Dia chi 'config_email':'',#Email 'config_telephone':'',#Dien thoai 'config_title':'',#Tieu de 'config_meta_description':'',#Mo ta tu khoa } shop_cat = [] shop_products = [] xlsfile = xlrd.open_workbook(filename) sheet_info = None sheet_cat = None sheet_products = None for sheet in xlsfile.sheets(): if sheet.name == u'Th\xf4ng tin shop': sheet_info = sheet elif sheet.name == u'Danh m\u1ee5c': sheet_cat = sheet elif sheet.name == u'S\u1EA3n ph\u1EA9m': sheet_products = sheet '''get shop_info''' n_cells = sheet_info.ncols for curr_row in range(sheet_info.nrows): curr_cell = 0 while curr_cell < n_cells: cell_type = sheet_info.cell_type(curr_row, curr_cell) if cell_type == xlrd.XL_CELL_EMPTY: curr_cell += 1 continue value = sheet_info.cell_value(curr_row, curr_cell) if value == u'T\xean c\u1eeda h\xe0ng': curr_cell += 1 shop_info['config_name'] = sheet_info.cell_value(curr_row, curr_cell) elif value == u'Ch\u1ee7 c\u1eeda h\xe0ng': curr_cell += 1 shop_info['config_owner'] = sheet_info.cell_value(curr_row, curr_cell) elif value == u'\u0110\u1ecba ch\u1ec9': curr_cell += 1 shop_info['config_address'] = sheet_info.cell_value(curr_row, curr_cell) elif value == u'Email': curr_cell += 1 shop_info['config_email'] = sheet_info.cell_value(curr_row, curr_cell) elif value == u'\u0110i\u1ec7n tho\u1ea1i': curr_cell += 1 shop_info['config_telephone'] = int(sheet_info.cell_value(curr_row, curr_cell)) curr_cell += 1 '''get shop_cat''' n_cells = sheet_cat.ncols for curr_row in range(sheet_cat.nrows): curr_cell = 0 while curr_cell < n_cells: cell_type = sheet_cat.cell_type(curr_row, curr_cell) if cell_type == xlrd.XL_CELL_EMPTY: curr_cell += 1 continue value = sheet_cat.cell_value(curr_row, curr_cell) if value != u'T\xean danh m\u1ee5c': shop_cat.append(value) curr_cell += 1 '''get shop_products''' n_cells = sheet_products.ncols first_data_col = -1 is_first_data_row = True for curr_row in range(sheet_products.nrows): curr_cell = 0 while curr_cell < n_cells: cell_type = sheet_products.cell_type(curr_row, curr_cell) if cell_type == xlrd.XL_CELL_EMPTY: curr_cell += 1 continue if first_data_col < 0: first_data_col = curr_cell data_col = curr_cell - first_data_col value = sheet_products.cell_value(curr_row, curr_cell) if data_col == 0: product_info = {} product_info['name'] = value elif data_col == 1: product_info['model'] = value elif data_col == 2: if cell_type == xlrd.XL_CELL_NUMBER: value = int(value) product_info['price'] = value elif data_col == 3: product_info['meta_description'] = value elif data_col == 4: product_info['description'] = value elif data_col == 5: product_info['product_category'] = value elif data_col == 6: product_info['image'] = value if is_first_data_row: is_first_data_row = False else: shop_products.append(product_info) curr_cell += 1 return shop_info, shop_cat, shop_products if __name__ == '__main__': shop_info, shop_cat, shop_products = parseshopxls('adminup/shop.xlsx') print shop_info print shop_cat for product in shop_products: print '****************************' print product
python/exelprocess.1579447224.txt.gz · Last modified: 2022/10/29 16:15 (external edit)