使用xlrd将xlsx文件导入Python

3
尝试将.xlsx文件导入Python。我已经搜索了谷歌并找到了一些代码,可以用于.csv文件,但需要用于.xlsx文件。因此,我基本上拼凑在一起,并希望它能工作。非常感谢任何和所有的帮助!**添加了我的整个代码和数据文件的片段。我需要它能够导入.xlsx并执行数据。
import xlrd

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx')

worksheet = workbook.get_sheet(0, update_pos=True)

header = []
report_card = []

# Count Stmnts for Required Data
valid_count = 0
app_req_count = 0
intr_req_count = 0
oe_intr_req_count = 0
part_img_req_count = 0
upc_req_count = 0
unspsc_req_count = 0
msds_req_count = 0

# Count Stmts for Missing Data
missing_app_count = 0
missing_intr_count = 0
missing_oe_intr_count = 0
missing_mpcc_count = 0
missing_attr_values_count = 0
missing_part_img_count = 0
missing_upc_count = 0
missing_warr_text_count = 0
missing_warr_pdf_count = 0
missing_unspsc_count = 0
missing_msds_count = 0


for row_num, row in enumerate(worksheet):
    if row_num <= 4:
        #  print(row)  # Print out the header
        header.append([row[0], row[2]])
    else:
        hq_line, part_no, part_class, appl_req, appl_count, intr_req, 
        intr_count, oe_intr_req, has_oe_intr, has_attr_editor, 
        has_attr_values, part_img_req, has_part_img, has_mpcc, warr_req, 
        has_warr_txt, has_warr_pdf, msds_req, has_msds, upc_req, has_upc, 
        has_unspsc, attr_count, attrval_count, valid_part = row

    if valid_part == 'YES':
        valid_count += 1

        # Required Parts Count
        if appl_req == 'YES':
            app_req_count += 1
        if intr_req == 'YES':
            intr_req_count += 1
        if oe_intr_req == 'YES':
            oe_intr_req_count += 1
        if part_img_req == 'YES':
            part_img_req_count += 1
        if upc_req == 'YES':
            upc_req_count += 1
        if msds_req == 'YES':
            msds_req_count += 1

        # Missing Data Counts
        if appl_req == 'YES' and appl_count == '0':
            missing_app_count += 1
        if intr_req == 'YES' and intr_count == '0':
            missing_intr_count += 1
        if oe_intr_req == 'YES' and has_oe_intr == '0':
             missing_oe_intr_count += 1
        if has_mpcc == 'NO':
             missing_mpcc_count += 1
        if has_attr_values == 'NO':
            missing_attr_values_count += 1
        if has_part_img == 'NO':
            missing_part_img_count += 1
        if upc_req == 'YES' and has_upc == '0':
            missing_upc_count += 1
        if warr_req == 'YES' and has_warr_txt == 'NO':
            missing_warr_text_count += 1
        if warr_req == 'YES' and has_warr_pdf == 'NO':
            missing_warr_pdf_count += 1
        if has_unspsc == 'NO':
            missing_unspsc_count += 1
        if msds_req == 'YES' and has_msds == 'NO':
            missing_msds_count += 1

# Statements for Required Counts
valid_parts = ('Number of Valid Parts: ', '{:,}'.format(valid_count))
application_required = ('Application Records Required: ', 
'{:,}'.format(app_req_count))
interchange_required = ('Interchange Records Required: ', 
'{:,}'.format(intr_req_count))
oe_interchange_required = ('OE Interchange Records Required: ', 
'{:,}'.format(oe_intr_req_count))
mpcc = ('MPCC Required: ', '{:,}'.format(valid_count))  # Every valid part 
requires a MPCC
attributes = ('Attributes Required: ', '{:,}'.format(valid_count))  # Every 
valid part requires attributes
image_required = ('Image Required: ', '{:,}'.format(part_img_req_count))
upc = ('UPC Requited: ', '{:,}'.format(upc_req_count))
warranties = ('Warranty Text/PDF Required: ', '{:,}'.format(valid_count))  # 
Require warranty text/pdf on all parts
unspsc = ('UNSPSC Code Required: ', '{:,}'.format(valid_count))  # Require 
UNSPSC Codes for all parts
msds = ('MSDS Required: ', '{:,}'.format(msds_req_count))

# Statements for Missing Counts
missing_applications = ('Missing Applications: ', 
'{:,}'.format(missing_app_count))
missing_interchange = ('Missing Interchange: ', 
'{:,}'.format(missing_intr_count))
missing_oe_interchange = ('Missing OE Interchange: ', 
'{:,}'.format(missing_oe_intr_count))
missing_mpcc = ('Missing MPCC: ', '{:,}'.format(missing_mpcc_count))
missing_attributes = ('Missing Attributes: ', 
'{:,}'.format(missing_attr_values_count))
missing_image = ('Missing Image: ', '{:,}'.format(missing_part_img_count))
missing_UPC = ('Missing UPC: ', '{:,}'.format(missing_upc_count))
missing_warranty_text = ('Missing Warranty Text: ', 
'{:,}'.format(missing_warr_text_count))
missing_warranty_pdf = ('Missing Warranty PDF: ', 
'{:,}'.format(missing_warr_pdf_count))
missing_unspsc = ('Missing UNSPSC Code: ', 
'{:,}'.format(missing_unspsc_count))
missing_msds = ('Missing MSDS: ', '{:,}'.format(missing_msds_count))

# CSV Output
report_card.append(valid_parts)
report_card.append(application_required)
report_card.append(interchange_required)
report_card.append(oe_interchange_required)
report_card.append(mpcc)
report_card.append(attributes)
report_card.append(image_required)
report_card.append(upc)
report_card.append(warranties)
report_card.append(unspsc)
report_card.append(msds)
report_card.append(missing_applications)
report_card.append(missing_interchange)
report_card.append(missing_oe_interchange)
report_card.append(missing_mpcc)
report_card.append(missing_attributes)
report_card.append(missing_image)
report_card.append(missing_UPC)
report_card.append(missing_warranty_text)
report_card.append(missing_warranty_pdf)
report_card.append(missing_unspsc)
report_card.append(missing_msds)

for row in header:
    print(row)

for x in report_card:
    print(x)



with open('Report_Card.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(header)
    writer.writerows(report_card)

CSV文件:

Supplier Line:,,Gates Rubber - Denver (GAT),,,,,,,,,,,,,,,,,,,,,,
Summary:,,Parts HQ Abbr,,,,,,,,,,,,,,,,,,,,,,
ACCT No:,,40013586,,,,,,,,,,,,,,,,,,,,,,
RecCount:,,10221,,,,,,,,,,,,,,,,,,,,,,
Applicable Date:,,"June 14, 2017 (Wednesday)",,,,,,,,,,,,,,,,,,,,,,
,,,,,,,,,,,,,,,,,,,,,,,,
HQ Line,Part No,Part Class,Appl Req,Appl Count ,Intr Req,Intr Count ,OE Intr Req,Has OE Intr,Has Attr Editor, Has Attr Values,Part IMG Req,Has Part IMG,Has MPCC,Warr Req,Has Warr TXT,Has Warr PDF,MSDS Req,Has MSDS,UPC Req,Has UPC,Has UNSPSC,Attr Count ,AttrVal Count ,Valid Part
GAT,'27210',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES
GAT,'27211',O,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,14,YES
GAT,'27212',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES
GAT,'27213',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES
GAT,'27220',S,NO,0,YES,2,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,35,20,YES
GAT,'27221',S,NO,0,YES,2,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,35,20,YES

我知道我的代码出了问题,因为枚举函数在导入 .csv 文件时可以正常工作,但现在它无法将 book 识别为可迭代对象。 - Sam Russo
如果您编辑问题并包含XLSX文件的小样本,将会很有帮助。 - Martin Evans
如果所有其他方法都失败了,将你的xlsx文件另存为CSV格式,然后使用它。 - boardrider
这是针对大量 .xlsx 文件的工作流程。逐个打开和保存不够高效。 - Sam Russo
1
如果Excel是一个矩阵,你一定要看看 Pandas http://pandas.pydata.org/,特别是读取Excel函数 https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html - Anton vBR
显示剩余2条评论
3个回答

2

由于它是一个Excel工作簿对象,而不仅仅是.CSV文件,所以您的book对象由多个表格组成。因此,首先您必须获取要处理的表格,然后需要通过调用sheet.get_rows()来获取行。

下面的代码应该可以工作,我在本地测试过。

import xlrd

book = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx')

header = []
report_card = []

# Count Stmnts for Required Data
valid_count = 0
app_req_count = 0
intr_req_count = 0

# Count Stmts for Missing Data
missing_app_count = 0
missing_intr_count = 0
missing_oe_intr_count = 0

sheet = book.sheets()[0]
for row_num, row in enumerate(sheet.get_rows()):
    if row_num <= 4:
        #  print(row)  # Print out the header
        header.append([row[0], row[2]])

print(header)

1
这个脚本可以将Excel数据表转换为字典列表:
import xlrd

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx', on_demand = True)
worksheet = workbook.sheet_by_index(0)
first_row = [] # The row where we stock the name of the column
for col in range(worksheet.ncols):
    first_row.append( worksheet.cell_value(0,col) )
# transform the workbook to a list of dictionaries
data =[]
for row in range(1, worksheet.nrows):
    elm = {}
    for col in range(worksheet.ncols):
        elm[first_row[col]]=worksheet.cell_value(row,col)
    data.append(elm)
print data

你也可以使用 Pandas
from pandas import *
xls = ExcelFile('GAT_US_PartReview_2017-06-23.xlsx')
df = xls.parse(xls.sheet_names[0])
print df.to_dict()

0

访问 xlrd 库的代码需要进行以下几处修复:

import xlrd

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx')
worksheet = workbook.sheet_by_index(0)

header = []
report_card = []

# Count Stmnts for Required Data
valid_count = 0
app_req_count = 0
intr_req_count = 0
oe_intr_req_count = 0
part_img_req_count = 0
upc_req_count = 0
unspsc_req_count = 0
msds_req_count = 0

# Count Stmts for Missing Data
missing_app_count = 0
missing_intr_count = 0
missing_oe_intr_count = 0
missing_mpcc_count = 0
missing_attr_values_count = 0
missing_part_img_count = 0
missing_upc_count = 0
missing_warr_text_count = 0
missing_warr_pdf_count = 0
missing_unspsc_count = 0
missing_msds_count = 0

for row_num in range(worksheet.nrows):
    row = worksheet.row_values(row_num)

    if row_num <= 4:
        #  print(row)  # Print out the header
        header.append([row[0], row[2]])
    else:
        hq_line, part_no, part_class, appl_req, appl_count, intr_req, \
        intr_count, oe_intr_req, has_oe_intr, has_attr_editor, \
        has_attr_values, part_img_req, has_part_img, has_mpcc, warr_req, \
        has_warr_txt, has_warr_pdf, msds_req, has_msds, upc_req, has_upc, \
        has_unspsc, attr_count, attrval_count, valid_part = row

        if valid_part == 'YES':
            valid_count += 1

            # Required Parts Count
            if appl_req == 'YES':
                app_req_count += 1
            if intr_req == 'YES':
                intr_req_count += 1
            if oe_intr_req == 'YES':
                oe_intr_req_count += 1
            if part_img_req == 'YES':
                part_img_req_count += 1
            if upc_req == 'YES':
                upc_req_count += 1
            if msds_req == 'YES':
                msds_req_count += 1

            # Missing Data Counts
            if appl_req == 'YES' and appl_count == '0':
                missing_app_count += 1
            if intr_req == 'YES' and intr_count == '0':
                missing_intr_count += 1
            if oe_intr_req == 'YES' and has_oe_intr == '0':
                 missing_oe_intr_count += 1
            if has_mpcc == 'NO':
                 missing_mpcc_count += 1
            if has_attr_values == 'NO':
                missing_attr_values_count += 1
            if has_part_img == 'NO':
                missing_part_img_count += 1
            if upc_req == 'YES' and has_upc == '0':
                missing_upc_count += 1
            if warr_req == 'YES' and has_warr_txt == 'NO':
                missing_warr_text_count += 1
            if warr_req == 'YES' and has_warr_pdf == 'NO':
                missing_warr_pdf_count += 1
            if has_unspsc == 'NO':
                missing_unspsc_count += 1
            if msds_req == 'YES' and has_msds == 'NO':
                missing_msds_count += 1

# Statements for Required Counts
valid_parts = ('Number of Valid Parts: ', '{:,}'.format(valid_count))
application_required = ('Application Records Required: ', '{:,}'.format(app_req_count))
interchange_required = ('Interchange Records Required: ', '{:,}'.format(intr_req_count))
oe_interchange_required = ('OE Interchange Records Required: ', '{:,}'.format(oe_intr_req_count))
mpcc = ('MPCC Required: ', '{:,}'.format(valid_count))  # Every valid part requires a MPCC
attributes = ('Attributes Required: ', '{:,}'.format(valid_count))  # Every valid part requires attributes
image_required = ('Image Required: ', '{:,}'.format(part_img_req_count))
upc = ('UPC Requited: ', '{:,}'.format(upc_req_count))
warranties = ('Warranty Text/PDF Required: ', '{:,}'.format(valid_count))  # Require warranty text/pdf on all parts
unspsc = ('UNSPSC Code Required: ', '{:,}'.format(valid_count))  # Require UNSPSC Codes for all parts
msds = ('MSDS Required: ', '{:,}'.format(msds_req_count))

# Statements for Missing Counts
missing_applications = ('Missing Applications: ', '{:,}'.format(missing_app_count))
missing_interchange = ('Missing Interchange: ', '{:,}'.format(missing_intr_count))
missing_oe_interchange = ('Missing OE Interchange: ', '{:,}'.format(missing_oe_intr_count))
missing_mpcc = ('Missing MPCC: ', '{:,}'.format(missing_mpcc_count))
missing_attributes = ('Missing Attributes: ', '{:,}'.format(missing_attr_values_count))
missing_image = ('Missing Image: ', '{:,}'.format(missing_part_img_count))
missing_UPC = ('Missing UPC: ', '{:,}'.format(missing_upc_count))
missing_warranty_text = ('Missing Warranty Text: ', '{:,}'.format(missing_warr_text_count))
missing_warranty_pdf = ('Missing Warranty PDF: ', '{:,}'.format(missing_warr_pdf_count))
missing_unspsc = ('Missing UNSPSC Code: ', '{:,}'.format(missing_unspsc_count))
missing_msds = ('Missing MSDS: ', '{:,}'.format(missing_msds_count))

# CSV Output
report_card.append(valid_parts)
report_card.append(application_required)
report_card.append(interchange_required)
report_card.append(oe_interchange_required)
report_card.append(mpcc)
report_card.append(attributes)
report_card.append(image_required)
report_card.append(upc)
report_card.append(warranties)
report_card.append(unspsc)
report_card.append(msds)
report_card.append(missing_applications)
report_card.append(missing_interchange)
report_card.append(missing_oe_interchange)
report_card.append(missing_mpcc)
report_card.append(missing_attributes)
report_card.append(missing_image)
report_card.append(missing_UPC)
report_card.append(missing_warranty_text)
report_card.append(missing_warranty_pdf)
report_card.append(missing_unspsc)
report_card.append(missing_msds)

for row in header:
    print(row)

for x in report_card:
    print(x)

with open('Report_Card.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(header)
    writer.writerows(report_card)        

根据您提供的数据,这将为您生成一个名为Report_Card.csv的文件,其内容如下:
Supplier Line:  Gates Rubber - Denver (GAT)
Summary:    Parts HQ Abbr
ACCT No:    40013586
RecCount:   10221
Applicable Date:    June 14, 2017 (Wednesday)
Number of Valid Parts:  6
Application Records Required:   0
Interchange Records Required:   6
OE Interchange Records Required:    6
MPCC Required:  6
Attributes Required:    6
Image Required:     6
UPC Requited:   6
Warranty Text/PDF Required:     6
UNSPSC Code Required:   6
MSDS Required:  0
Missing Applications:   0
Missing Interchange:    0
Missing OE Interchange:     0
Missing MPCC:   0
Missing Attributes:     0
Missing Image:  0
Missing UPC:    0
Missing Warranty Text:  0
Missing Warranty PDF:   0
Missing UNSPSC Code:    0
Missing MSDS:   0

你的代码肯定需要重新考虑。与其有那么多变量,比如每列一个变量,不如考虑使用类似于Python Counter() 的东西,它像字典一样工作。使用列表或字典将使您的代码更简单。

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接