我正在使用Visual Studio 2008,并需要使用Open XML SDK 2.0从Excel表格创建一个DataTable
。我需要使用表格的第一行创建DataTable
列,并用其余值完成它。
有没有人有示例代码或可以帮助我完成此操作的链接?
我正在使用Visual Studio 2008,并需要使用Open XML SDK 2.0从Excel表格创建一个DataTable
。我需要使用表格的第一行创建DataTable
列,并用其余值完成它。
有没有人有示例代码或可以帮助我完成此操作的链接?
我认为这应该能够满足您的要求。另一个函数存在的目的是为了处理如果您在列标题中有共享字符串,我假设您确实有这种情况。不确定这是否完美,但我希望它能帮到您。
static void Main(string[] args)
{
DataTable dt = new DataTable();
using (SpreadsheetDocument spreadSheetDocument = SpreadsheetDocument.Open(@"..\..\example.xlsx", false))
{
WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart;
IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
string relationshipId = sheets.First().Id.Value;
WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId);
Worksheet workSheet = worksheetPart.Worksheet;
SheetData sheetData = workSheet.GetFirstChild<SheetData>();
IEnumerable<Row> rows = sheetData.Descendants<Row>();
foreach (Cell cell in rows.ElementAt(0))
{
dt.Columns.Add(GetCellValue(spreadSheetDocument, cell));
}
foreach (Row row in rows) //this will also include your header row...
{
DataRow tempRow = dt.NewRow();
for (int i = 0; i < row.Descendants<Cell>().Count(); i++)
{
tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i-1));
}
dt.Rows.Add(tempRow);
}
}
dt.Rows.RemoveAt(0); //...so i'm taking it out here.
}
public static string GetCellValue(SpreadsheetDocument document, Cell cell)
{
SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
string value = cell.CellValue.InnerXml;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
}
else
{
return value;
}
}
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using System.Data;
public static DataTable ReadExcelToDataTable(string fileName)
{
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(fileName, false))
{
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
IEnumerable sheets = spreadsheetDocument.WorkbookPart.Workbook.GetFirstChild().Elements();
string relationshipId = sheets.First().Id.Value;
WorksheetPart worksheetPart = (WorksheetPart)spreadsheetDocument.WorkbookPart.GetPartById(relationshipId);
Worksheet workSheet = worksheetPart.Worksheet;
SheetData sheetData = workSheet.GetFirstChild();
IEnumerable rows = sheetData.Descendants();
DataTable dataTable = new DataTable();
foreach (Cell cell in rows.ElementAt(0))
{
dataTable.Columns.Add(GetCellValue(spreadsheetDocument, cell));
}
foreach (Row row in rows.Skip(1))
{
DataRow dataRow = dataTable.NewRow();
for (int i = 0; i < row.Descendants().Count(); i++)
{
dataRow[i] = GetCellValue(spreadsheetDocument, row.Descendants().ElementAt(i));
}
dataTable.Rows.Add(dataRow);
}
return dataTable;
}
}
private static string GetCellValue(SpreadsheetDocument document, Cell cell)
{
SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
string value = cell.CellValue.InnerXml;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
}
else
{
return value;
}
} | |
- Rahul NikatetempRow
中连续得到A1、D1和G1。此外,在某些情况下,“CellValue”为null,在“GetCellValue”的第二行会出现异常。 - Andrew嗨,上面的代码除了一个变更外都运行良好。
请将下面这行代码替换掉
tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i-1));
随着
tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i));
如果您使用(i-1),它会抛出异常:
specified argument was out of the range of valid values. parameter name index.
这个解决方案适用于没有空单元格的电子表格。
要处理空单元格,您需要替换此行:
tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i-1));
使用类似以下的方式:
Cell cell = row.Descendants<Cell>().ElementAt(i);
int index = CellReferenceToIndex(cell);
tempRow[index] = GetCellValue(spreadSheetDocument, cell);
并添加此方法:
private static int CellReferenceToIndex(Cell cell)
{
int index = -1;
string reference = cell.CellReference.ToString().ToUpper();
foreach (char ch in reference)
{
if (Char.IsLetter(ch))
{
int value = (int)ch - (int)'A';
index = (index + 1) * 26 + value;
}
else
return index;
}
return index;
}
(index == 0) ? value : ((index + 1) * 26) + value
=> 对于以 A
开头的多字符索引,如 AA
、AB
、AAC
等,将会返回错误的结果,因为 A
的值将被转换为 0
而不是 26 的倍数。 - Lanorkin这是我完整的解决方案,其中还考虑了空单元格。
public static class ExcelHelper
{
//To get the value of the cell, even it's empty. Unable to use loop by index
private static string GetCellValue(WorkbookPart wbPart, List<Cell> theCells, string cellColumnReference)
{
Cell theCell = null;
string value = "";
foreach (Cell cell in theCells)
{
if (cell.CellReference.Value.StartsWith(cellColumnReference))
{
theCell = cell;
break;
}
}
if (theCell != null)
{
value = theCell.InnerText;
// If the cell represents an integer number, you are done.
// For dates, this code returns the serialized value that represents the date. The code handles strings and
// Booleans individually. For shared strings, the code looks up the corresponding value in the shared string table. For Booleans, the code converts the value into the words TRUE or FALSE.
if (theCell.DataType != null)
{
switch (theCell.DataType.Value)
{
case CellValues.SharedString:
// For shared strings, look up the value in the shared strings table.
var stringTable = wbPart.GetPartsOfType<SharedStringTablePart>().FirstOrDefault();
// If the shared string table is missing, something is wrong. Return the index that is in the cell. Otherwise, look up the correct text in the table.
if (stringTable != null)
{
value = stringTable.SharedStringTable.ElementAt(int.Parse(value)).InnerText;
}
break;
case CellValues.Boolean:
switch (value)
{
case "0":
value = "FALSE";
break;
default:
value = "TRUE";
break;
}
break;
}
}
}
return value;
}
private static string GetCellValue(WorkbookPart wbPart, List<Cell> theCells, int index)
{
return GetCellValue(wbPart, theCells, GetExcelColumnName(index));
}
private static string GetExcelColumnName(int columnNumber)
{
int dividend = columnNumber;
string columnName = String.Empty;
int modulo;
while (dividend > 0)
{
modulo = (dividend - 1) % 26;
columnName = Convert.ToChar(65 + modulo).ToString() + columnName;
dividend = (int)((dividend - modulo) / 26);
}
return columnName;
}
//Only xlsx files
public static DataTable GetDataTableFromExcelFile(string filePath, string sheetName = "")
{
DataTable dt = new DataTable();
try
{
using (SpreadsheetDocument document = SpreadsheetDocument.Open(filePath, false))
{
WorkbookPart wbPart = document.WorkbookPart;
IEnumerable<Sheet> sheets = document.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
string sheetId = sheetName != "" ? sheets.Where(q => q.Name == sheetName).First().Id.Value : sheets.First().Id.Value;
WorksheetPart wsPart = (WorksheetPart)wbPart.GetPartById(sheetId);
SheetData sheetdata = wsPart.Worksheet.Elements<SheetData>().FirstOrDefault();
int totalHeaderCount = sheetdata.Descendants<Row>().ElementAt(0).Descendants<Cell>().Count();
//Get the header
for (int i = 1; i <= totalHeaderCount; i++)
{
dt.Columns.Add(GetCellValue(wbPart, sheetdata.Descendants<Row>().ElementAt(0).Elements<Cell>().ToList(), i));
}
foreach (Row r in sheetdata.Descendants<Row>())
{
if (r.RowIndex > 1)
{
DataRow tempRow = dt.NewRow();
//Always get from the header count, because the index of the row changes where empty cell is not counted
for (int i = 1; i <= totalHeaderCount; i++)
{
tempRow[i - 1] = GetCellValue(wbPart, r.Elements<Cell>().ToList(), i);
}
dt.Rows.Add(tempRow);
}
}
}
}
catch (Exception ex)
{
}
return dt;
}
}
ExcelUtility.cs
using System.Data;
using System.Linq;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
namespace Core_Excel.Utilities
{
static class ExcelUtility
{
public static DataTable Read(string path)
{
var dt = new DataTable();
using (var ssDoc = SpreadsheetDocument.Open(path, false))
{
var sheets = ssDoc.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
var relationshipId = sheets.First().Id.Value;
var worksheetPart = (WorksheetPart) ssDoc.WorkbookPart.GetPartById(relationshipId);
var workSheet = worksheetPart.Worksheet;
var sheetData = workSheet.GetFirstChild<SheetData>();
var rows = sheetData.Descendants<Row>().ToList();
foreach (var row in rows) //this will also include your header row...
{
var tempRow = dt.NewRow();
var colCount = row.Descendants<Cell>().Count();
foreach (var cell in row.Descendants<Cell>())
{
var index = GetIndex(cell.CellReference);
// Add Columns
for (var i = dt.Columns.Count; i <= index; i++)
dt.Columns.Add();
tempRow[index] = GetCellValue(ssDoc, cell);
}
dt.Rows.Add(tempRow);
}
}
return dt;
}
private static string GetCellValue(SpreadsheetDocument document, Cell cell)
{
var stringTablePart = document.WorkbookPart.SharedStringTablePart;
var value = cell.CellValue.InnerXml;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
return stringTablePart.SharedStringTable.ChildElements[int.Parse(value)].InnerText;
return value;
}
public static int GetIndex(string name)
{
if (string.IsNullOrWhiteSpace(name))
return -1;
int index = 0;
foreach (var ch in name)
{
if (char.IsLetter(ch))
{
int value = ch - 'A' + 1;
index = value + index * 26;
}
else
break;
}
return index - 1;
}
}
}
用法:
var path = "D:\\Documents\\test.xlsx";
var dt = ExcelUtility.Read(path);
那就尽情享受吧!
Public Shared Function ExcelToDataTable(filename As String) As DataTable
Try
Dim dt As New DataTable()
Using doc As SpreadsheetDocument = SpreadsheetDocument.Open(filename, False)
Dim workbookPart As WorkbookPart = doc.WorkbookPart
Dim sheets As IEnumerable(Of Sheet) = doc.WorkbookPart.Workbook.GetFirstChild(Of Sheets)().Elements(Of Sheet)()
Dim relationshipId As String = sheets.First().Id.Value
Dim worksheetPart As WorksheetPart = DirectCast(doc.WorkbookPart.GetPartById(relationshipId), WorksheetPart)
Dim workSheet As Worksheet = worksheetPart.Worksheet
Dim sheetData As SheetData = workSheet.GetFirstChild(Of SheetData)()
Dim rows As IEnumerable(Of Row) = sheetData.Descendants(Of Row)()
For Each cell As Cell In rows.ElementAt(0)
dt.Columns.Add(GetCellValue(doc, cell))
Next
For Each row As Row In rows
'this will also include your header row...
Dim tempRow As DataRow = dt.NewRow()
For i As Integer = 0 To row.Descendants(Of Cell)().Count() - 1
tempRow(i) = GetCellValue(doc, row.Descendants(Of Cell)().ElementAt(i))
Next
dt.Rows.Add(tempRow)
Next
End Using
dt.Rows.RemoveAt(0)
Return dt
Catch ex As Exception
Throw ex
End Try
End Function
Public Shared Function GetCellValue(document As SpreadsheetDocument, cell As Cell) As String
Try
If IsNothing(cell.CellValue) Then
Return ""
End If
Dim value As String = cell.CellValue.InnerXml
If cell.DataType IsNot Nothing AndAlso cell.DataType.Value = CellValues.SharedString Then
Dim stringTablePart As SharedStringTablePart = document.WorkbookPart.SharedStringTablePart
Return stringTablePart.SharedStringTable.ChildElements(Int32.Parse(value)).InnerText
Else
Return value
End If
Catch ex As Exception
Return ""
End Try
End Function
我知道这个帖子已经很久了。然而,上面的解决方案都没有真正适用于我,包括空单元格问题等。
我在GitHub上找到了一个非常好的'MIT'许可证解决方案: https://github.com/ExcelDataReader/ExcelDataReader 这对我的C#和VBnet应用程序都有效。 VBNET的示例调用(c#的示例代码在GitHub上):
Using stream As FileStream = New FileStream(DataPath & "\" & fName.Name, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)
Using reader As IExcelDataReader = ExcelReaderFactory.CreateReader(stream)
ds = reader.AsDataSet(New ExcelDataSetConfiguration() With {
.UseColumnDataType = False,
.ConfigureDataTable = Function(tableReader) New ExcelDataTableConfiguration() With {
.UseHeaderRow = True
}
})
End Using
End Using
结果是一个数据集,其中每个工作簿中的表格都有一个表。
我真的很喜欢自己编译C#中的dll,而不是使用现成的dll。这样我就可以控制我向客户提供什么。
using System;
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
namespace myNamespace
{
static class ExcelUtility
{
// SS Note: isHeaderOnTopRow functionality is to set column names as the first row of 'sheet'
public static DataTable[] Read(string path, bool isHeaderOnTopRow = false)
{
try
{
using (var ssDoc = SpreadsheetDocument.Open(path, false))
{
var sheets = ssDoc.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
DataTable[] dtArray = new DataTable[sheets.ToList().Count];
int counti = 0;
foreach (Sheet sheet in sheets)
{
var dt = new DataTable();
var relationshipId = sheet.Id.Value;
var worksheetPart = (WorksheetPart)ssDoc.WorkbookPart.GetPartById(relationshipId);
var workSheet = worksheetPart.Worksheet;
var sheetData = workSheet.GetFirstChild<SheetData>();
var rows = sheetData.Descendants<Row>().ToList();
int rowIndex = 0;
foreach (var row in rows) //this will also include your header row...
{
var tempRow = dt.NewRow();
var colCount = row.Descendants<Cell>().Count();
int colIndex = 0;
foreach (var cell in row.Descendants<Cell>())
{
var index = GetIndex(cell.CellReference);
// SS Note: ADDED next line as we were getting cell.CellReference (or index) as -1 in our provided xlsx file.
index = (index < 0 ? colIndex++ : index);
// Add Columns
for (var i = dt.Columns.Count; i <= index; i++)
dt.Columns.Add();
if (isHeaderOnTopRow && rowIndex == 0)
{
string heading = GetCellValue(ssDoc, cell);
heading = (heading.Length > 0 ? heading : $"Column{index + 1}");
dt.Columns[index].ColumnName = heading;
}
else
{
tempRow[index] = GetCellValue(ssDoc, cell);
}
}
if (rowIndex > 0 || isHeaderOnTopRow == false)
{
dt.Rows.Add(tempRow);
}
rowIndex++;
}
dtArray[counti++] = dt;
}
return dtArray;
}
}
catch (Exception e)
{
Console.WriteLine(e);
}
return null;
}
private static string GetCellValue(SpreadsheetDocument document, Cell cell)
{
var stringTablePart = document.WorkbookPart.SharedStringTablePart;
var value = cell.CellValue.InnerXml;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
return stringTablePart.SharedStringTable.ChildElements[int.Parse(value)].InnerText;
return value;
}
public static int GetIndex(string name)
{
if (string.IsNullOrWhiteSpace(name))
return -1;
int index = 0;
foreach (var ch in name)
{
if (char.IsLetter(ch))
{
int value = ch - 'A' + 1;
index = value + index * 26;
}
else
break;
}
return index - 1;
}
public static void ExportDataSet(DataSet ds, string destination)
{
try
{
using (var workbook = SpreadsheetDocument.Create(destination, DocumentFormat.OpenXml.SpreadsheetDocumentType.Workbook))
{
var workbookPart = workbook.AddWorkbookPart();
workbook.WorkbookPart.Workbook = new DocumentFormat.OpenXml.Spreadsheet.Workbook();
workbook.WorkbookPart.Workbook.Sheets = new DocumentFormat.OpenXml.Spreadsheet.Sheets();
foreach (System.Data.DataTable table in ds.Tables)
{
var sheetPart = workbook.WorkbookPart.AddNewPart<WorksheetPart>();
var sheetData = new DocumentFormat.OpenXml.Spreadsheet.SheetData();
sheetPart.Worksheet = new DocumentFormat.OpenXml.Spreadsheet.Worksheet(sheetData);
DocumentFormat.OpenXml.Spreadsheet.Sheets sheets = workbook.WorkbookPart.Workbook.GetFirstChild<DocumentFormat.OpenXml.Spreadsheet.Sheets>();
string relationshipId = workbook.WorkbookPart.GetIdOfPart(sheetPart);
uint sheetId = 1;
if (sheets.Elements<DocumentFormat.OpenXml.Spreadsheet.Sheet>().Count() > 0)
{
sheetId =
sheets.Elements<DocumentFormat.OpenXml.Spreadsheet.Sheet>().Select(s => s.SheetId.Value).Max() + 1;
}
DocumentFormat.OpenXml.Spreadsheet.Sheet sheet = new DocumentFormat.OpenXml.Spreadsheet.Sheet() { Id = relationshipId, SheetId = sheetId, Name = table.TableName };
sheets.Append(sheet);
DocumentFormat.OpenXml.Spreadsheet.Row headerRow = new DocumentFormat.OpenXml.Spreadsheet.Row();
List<String> columns = new List<string>();
foreach (System.Data.DataColumn column in table.Columns)
{
columns.Add(column.ColumnName);
DocumentFormat.OpenXml.Spreadsheet.Cell cell = new DocumentFormat.OpenXml.Spreadsheet.Cell();
cell.DataType = DocumentFormat.OpenXml.Spreadsheet.CellValues.String;
cell.CellValue = new DocumentFormat.OpenXml.Spreadsheet.CellValue(column.ColumnName);
headerRow.AppendChild(cell);
}
sheetData.AppendChild(headerRow);
foreach (System.Data.DataRow dsrow in table.Rows)
{
DocumentFormat.OpenXml.Spreadsheet.Row newRow = new DocumentFormat.OpenXml.Spreadsheet.Row();
foreach (String col in columns)
{
DocumentFormat.OpenXml.Spreadsheet.Cell cell = new DocumentFormat.OpenXml.Spreadsheet.Cell();
cell.DataType = DocumentFormat.OpenXml.Spreadsheet.CellValues.String;
cell.CellValue = new DocumentFormat.OpenXml.Spreadsheet.CellValue(dsrow[col].ToString()); //
newRow.AppendChild(cell);
}
sheetData.AppendChild(newRow);
}
}
}
}
catch (Exception e)
{
Console.WriteLine(e);
}
}
public static void saveDataTablesToExcel(DataTable[] dataTables, string saveToFilePath)
{
// Create a DataSet
DataSet dataSet = new DataSet("Tables");
// We can add multiple DataTable to DataSet
foreach (DataTable dt in dataTables)
{
dataSet.Tables.Add(dt);
}
ExportDataSet(dataSet, saveToFilePath);
}
}
}
使用方法:
// save three datatables in xlsx file
DataTable[] dataTables = new DataTable[3];
dataTables[0] = firstDataTable;
dataTables[1] = secondDataTable;
dataTables[2] = thirdDataTable;
string fileName = "saved.xlsx";
saveDataTablesToExcel(dataTables, $"{ExcelFileSaveFolder}{fileName}");
// retrieve data from first sheet and set it to 'returnTable'
DataTable returnTable = null;
var path = $"{ExcelFileSaveFolder}{fileName}";
DataTable[] getDataTables = ExcelUtility.Read(path, true);
if (getDataTables != null && getDataTables.Length > 0)
returnTable = getDataTables[0];
如果行的值为null或为空,则获取值错误。
如果所有列都填充了数据,则它是有效的。但可能并非所有行都是如此。