User Tools

Site Tools


python:exelprocess

This is an old revision of the document!


Exel Processing

Install

  • Install on windows:
    easy_install.exe xlrd
    easy_install.exe xlwt
    easy_install.exe xlutils
  • Install on linux:
    pip install xlrd
    pip install xlwt
    pip install xlutils

using xlrd

Basic APIs:

  • xlrd.open_workbook('adminup/shop.xlsx') return xlrd.book.Book object

Example codes:

Open Workbook and get sheets

  • Simple Open workbook
    import xlrd
     
    xlsfile = xlrd.open_workbook('adminup/shop.xlsx')
    print xlsfile

    output:

    <xlrd.book.Book object at 0x023263B0>
  • Get sheets:
    import xlrd
    from xlrd.book import Book
     
    xlsfile = xlrd.open_workbook('adminup/shop.xlsx')
    print xlsfile.sheets()

    output:

    [<xlrd.sheet.Sheet object at 0x02256530>, <xlrd.sheet.Sheet object at 0x022564D0>, <xlrd.sheet.Sheet object at 0x022569F0>]

Using sheet(xlrd.sheet.Sheet)

  • dump sheet to get basic informations
    import xlrd
    from xlrd.book import Book
    from xlrd.sheet import Sheet
     
    xlsfile = xlrd.open_workbook('adminup/shop.xlsx')
    for sheet in xlsfile.sheets():
        print '******************************'
        sheet.dump()

    output for sheet1:

    _cell_types: <type 'list'>, len = 8
    _cell_values: <type 'list'>, len = 8
    _dimncols: 4
    _dimnrows: 8
    _first_full_rowx: 3
    
    name: u'Th\xf4ng tin shop'
    ncols: 4
    nrows: 8
    number: 0
    

    output for sheet2:

    _cell_types: <type 'list'>, len = 6
    _cell_values: <type 'list'>, len = 6
    _dimncols: 3
    _dimnrows: 54
    _first_full_rowx: 2
    
    name: u'Danh m\u1ee5c'
    ncols: 3
    nrows: 6
    number: 1
  • Get sheet by name: Using http://www.rapidmonkey.com/unicodeconverter/ to convert utf-8 to ASCII(Unicode Escaped) Content
    import xlrd
    from xlrd.book import Book
    from xlrd.sheet import Sheet
     
    xlsfile = xlrd.open_workbook('adminup/shop.xlsx')
    sheet_info = xlsfile.sheet_by_name(u'Th\xf4ng tin shop')
    sheet_cat = xlsfile.sheet_by_name(u'Danh m\u1ee5c')
    sheet_products = xlsfile.sheet_by_name(u'S\u1EA3n ph\u1EA9m')

    Or

    import xlrd
    from xlrd.book import Book
    from xlrd.sheet import Sheet
     
    xlsfile = xlrd.open_workbook('adminup/shop.xlsx')
    sheet_info = None
    sheet_cat = None
    sheet_products = None
    for sheet in xlsfile.sheets():
        if sheet.name == u'Th\xf4ng tin shop':
            sheet_info = sheet
        elif sheet.name == u'Danh m\u1ee5c':
            sheet_cat = sheet
        elif sheet.name == u'S\u1EA3n ph\u1EA9m':
            sheet_products = sheet
    if sheet_info is not None:
        print sheet_info.dump()        

Parse Shop.xls

import xlrd
 
def parseshopxls(filename):
    shop_info = {
                 'config_name':'',#Cen cua hang
                 'config_owner':'',#Chu cua hang
                 'config_address':'',#Dia chi
                 'config_email':'',#Email
                 'config_telephone':'',#Dien thoai
                 'config_title':'',#Tieu de
                 'config_meta_description':'',#Mo ta tu khoa
                 }
    shop_cat = [] 
    shop_products = []
    xlsfile = xlrd.open_workbook(filename)
    sheet_info = None
    sheet_cat = None
    sheet_products = None
    for sheet in xlsfile.sheets():
        if sheet.name == u'Th\xf4ng tin shop':
            sheet_info = sheet
        elif sheet.name == u'Danh m\u1ee5c': 
            sheet_cat = sheet
        elif sheet.name == u'S\u1EA3n ph\u1EA9m':
            sheet_products = sheet
    '''get shop_info'''
    n_cells = sheet_info.ncols             
    for curr_row in range(sheet_info.nrows):
        curr_cell = 0
        while curr_cell < n_cells:
            cell_type = sheet_info.cell_type(curr_row, curr_cell)
            if cell_type == xlrd.XL_CELL_EMPTY:
                curr_cell += 1
                continue
            value = sheet_info.cell_value(curr_row, curr_cell)
            if value == u'T\xean c\u1eeda h\xe0ng':
                curr_cell += 1
                shop_info['config_name'] = sheet_info.cell_value(curr_row, curr_cell)
            elif value == u'Ch\u1ee7 c\u1eeda h\xe0ng':
                curr_cell += 1
                shop_info['config_owner'] = sheet_info.cell_value(curr_row, curr_cell)
            elif value == u'\u0110\u1ecba ch\u1ec9':
                curr_cell += 1
                shop_info['config_address'] = sheet_info.cell_value(curr_row, curr_cell)
            elif value == u'Email':
                curr_cell += 1
                shop_info['config_email'] = sheet_info.cell_value(curr_row, curr_cell)        
            elif value == u'\u0110i\u1ec7n tho\u1ea1i':
                curr_cell += 1
                shop_info['config_telephone'] = int(sheet_info.cell_value(curr_row, curr_cell))
            curr_cell += 1
    '''get shop_cat'''         
    n_cells = sheet_cat.ncols
    for curr_row in range(sheet_cat.nrows):
        curr_cell = 0
        while curr_cell < n_cells:
            cell_type = sheet_cat.cell_type(curr_row, curr_cell)
            if cell_type == xlrd.XL_CELL_EMPTY:
                curr_cell += 1
                continue
            value = sheet_cat.cell_value(curr_row, curr_cell)
            if value != u'T\xean danh m\u1ee5c':
                shop_cat.append(value)
            curr_cell += 1
    '''get shop_products'''         
    n_cells = sheet_products.ncols
    first_data_col = -1
    is_first_data_row = True
    for curr_row in range(sheet_products.nrows):
        curr_cell = 0
        while curr_cell < n_cells:
            cell_type = sheet_products.cell_type(curr_row, curr_cell)
            if cell_type == xlrd.XL_CELL_EMPTY:
                curr_cell += 1
                continue 
            if first_data_col < 0:
                first_data_col = curr_cell
            data_col = curr_cell - first_data_col
            value = sheet_products.cell_value(curr_row, curr_cell)         
            if data_col == 0:
                product_info = {}
                product_info['name'] = value
            elif data_col == 1:
                product_info['model'] = value            
            elif data_col == 2:
                if cell_type == xlrd.XL_CELL_NUMBER:
                    value = int(value)
                product_info['price'] = value
            elif data_col == 3:
                product_info['meta_description'] = value
            elif data_col == 4:
                product_info['description'] = value
            elif data_col == 5:
                product_info['product_category'] = value
            elif data_col == 6:
                product_info['image'] = value
                if is_first_data_row:
                    is_first_data_row = False
                else:
                    shop_products.append(product_info)
            curr_cell += 1
    return shop_info, shop_cat, shop_products         
if __name__ == '__main__':
    shop_info, shop_cat, shop_products = parseshopxls('adminup/shop.xlsx')
    print shop_info
    print shop_cat 
    for product in shop_products:
        print '****************************'
        print product    
python/exelprocess.1579447224.txt.gz · Last modified: 2022/10/29 16:15 (external edit)