(1) OleDb: 用这种方法读取Excel速度还是非常的快的,但这种方式读取数据的时候不太灵活,不过可以在 DataTable 中对数据进行一些删减修改
为江川等地区用户提供了全套网页设计制作服务,及江川网站建设行业解决方案。主营业务为成都做网站、网站设计、江川网站设计,以传统方式定制建设网站,并提供域名空间备案等一条龙服务,秉承以专业、用心的态度为用户提供真诚的服务。我们深信只要达到每一位用户的要求,就会得到认可,从而选择与我们长期合作。这样,我们也可以走得更远!
这种方式将Excel作为一个数据源,直接用Sql语句获取数据了。所以读取之前要知道此次要读取的Sheet(当然也可以用序号,类似dt.Row[0][0]。这样倒是不需要知道Sheet)
?
- if (fileType == ".xls")
- connStr = "Provider=Microsoft.Jet.OLEDB.4.0;" + "Data Source=" + fileName + ";" + ";Extended Properties=\"Excel 8.0;HDR=YES;IMEX=1\"";
- else
- connStr = "Provider=Microsoft.ACE.OLEDB.12.0;" + "Data Source=" + fileName + ";" + ";Extended Properties=\"Excel 12.0;HDR=YES;IMEX=1\"";
- OleDbConnection conn new OleDbConnection(connStr);
- DataTable dtSheetName = conn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, new object[] { null, null, null, "TABLE" });
以上是读取Excel的Sheet名,xls和xlsx的连接字符串也不一样的,可以根据文件的后缀来区别。这里需要注意的一点,Excel里面只有一个Sheet,但通过这种方式读取Sheet可能会大于一个。原因已经有人在别的网站说过了,偷一下懒O(∩_∩)O,下面文段来自【cdwolfling】
【在使用过程中发现取出的Sheet和实际excel不一致, 会多出不少。目前总结后有两种情况:
1. 取出的名称中,包括了XL命名管理器中的名称(参见XL2007的公式--命名管理器, 快捷键Crtl+F3);
2. 取出的名称中,包括了FilterDatabase后缀的, 这是XL用来记录Filter范围的, 参见http://www.mrexcel.com/forum/showthread.php?t=27225;
对于***点比较简单, 删除已有命名管理器中的内容即可;第二点处理起来比较麻烦, Filter删除后这些名称依然保留着,简单的做法是新增sheet然后将原sheet Copy进去】
---------------------------------
但实际情况并不能为每个Excel做以上检查【cdwolfling】也给出了过滤的方案,当时还是有点问题,本来补充了一点。总之先看代码吧
- for (int i = 0; i < dtSheetName.Rows.Count; i++)
- {
- ?
- SheetName = (string)dtSheetName.Rows[i]["TABLE_NAME"];
- if (SheetName .Contains("$") && !SheetName .Replace("'", "").EndsWith("$"))continue;//过滤无效SheetName完毕....
- da.SelectCommand = new OleDbCommand(String.Format(sql_F, tblName), conn);
- DataSet dsItem = new DataSet();
- da.Fill(dsItem, tblName);
- ?
- }
因为读取出来无效SheetName一般情况***一个字符都不会是$。如果SheetName有一些特殊符号,读取出来的SheetName会自动加上单引号,比如在Excel中将SheetName编辑成:MySheet(1),此时读取出来的SheetName就为:'MySheet(1)$',所以判断***一个字符是不是$之前***过滤一下单引号。
优点:读取方式简单、读取速度快
缺点:除了读取过程不太灵活之外,这种读取方式还有个弊端就是,当Excel数据量很大时。会非常占用内存,当内存不够时会抛出内存溢出的异常。
不过一般情况下还是非常不错的
读取Excel完整代码:
- ///
- /// 读取Excel文件到DataSet中
- ///
- /// 文件路径
- ///
- public static DataSet ToDataTable(string filePath)
- {
- string connStr = "";
- string fileType = System.IO.Path.GetExtension(fileName);
- if (string.IsNullOrEmpty(fileType)) return null;
- if (fileType == ".xls")
- connStr = "Provider=Microsoft.Jet.OLEDB.4.0;" + "Data Source=" + filePath+ ";" + ";Extended Properties=\"Excel 8.0;HDR=YES;IMEX=1\"";
- else
- connStr = "Provider=Microsoft.ACE.OLEDB.12.0;" + "Data Source=" + filePath+ ";" + ";Extended Properties=\"Excel 12.0;HDR=YES;IMEX=1\"";
- string sql_F = "Select * FROM [{0}]";
- OleDbConnection conn = null;
- OleDbDataAdapter da = null;
- DataTable dtSheetName= null;
- DataSet ds = new DataSet();
- try
- {
- // 初始化连接,并打开
- conn = new OleDbConnection(connStr);
- conn.Open();
- // 获取数据源的表定义元数据
- string SheetName = "";
- dtSheetName= conn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, new object[] { null, null, null, "TABLE" });
- // 初始化适配器
- da = new OleDbDataAdapter();
- for (int i = 0; i < dtSheetName.Rows.Count; i++)
- {
- SheetName = (string)dtSheetName.Rows[i]["TABLE_NAME"];
- if (SheetName .Contains("$") && !SheetName .Replace("'", "").EndsWith("$"))
- {
- continue;
- }
- da.SelectCommand = new OleDbCommand(String.Format(sql_F, SheetName ), conn);
- DataSet dsItem = new DataSet();
- da.Fill(dsItem, tblName);
- ds.Tables.Add(dsItem.Tables[0].Copy());
- }
- }
- catch (Exception ex)
- {
- }
- finally
- {
- // 关闭连接
- if (conn.State == ConnectionState.Open)
- {
- conn.Close();
- da.Dispose();
- conn.Dispose();
- }
- }
- return ds;
- }
#p#
(2):Com组件的方式读取Excel
这种方式需要先引用 Microsoft.Office.Interop.Excel 。***说下这种方式的优缺点
优点:可以非常灵活的读取Excel中的数据
缺点:如果是Web站点部署在IIS上时,还需要服务器机子已安装了Excel,有时候还需要为配置IIS权限。最重要的一点因为是基于单元格方式读取的,所以数据很慢(曾做过试验,直接读取千行、200多列的文件,直接读取耗时15分钟。即使采用多线程分段读取来提高CPU的利用率也需要8分钟。PS:CPU I3)
需要读取大文件的的童鞋们慎重。
附上单线程和多线程读取类:
- public class ExcelOptions
- {
- private Stopwatch wath = new Stopwatch();
- ///
- /// 使用COM读取Excel
- ///
- /// 路径
- ///
DataTabel - public System.Data.DataTable GetExcelData(string excelFilePath)
- {
- Excel.Application app = new Excel.Application();
- Excel.Sheets sheets;
- Excel.Workbook workbook = null;
- object oMissiong = System.Reflection.Missing.Value;
- System.Data.DataTable dt = new System.Data.DataTable();
- wath.Start();
- try
- {
- if (app == null)
- {
- return null;
- }
- workbook = app.Workbooks.Open(excelFilePath, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong);
- //将数据读入到DataTable中——Start
- sheets = workbook.Worksheets;
- Excel.Worksheet worksheet = (Excel.Worksheet)sheets.get_Item(1);//读取***张表
- if (worksheet == null)
- return null;
- string cellContent;
- int iRowCount = worksheet.UsedRange.Rows.Count;
- int iColCount = worksheet.UsedRange.Columns.Count;
- Excel.Range range;
- //负责列头Start
- DataColumn dc;
- int ColumnID = 1;
- range = (Excel.Range)worksheet.Cells[1, 1];
- while (range.Text.ToString().Trim() != "")
- {
- dc = new DataColumn();
- dc.DataType = System.Type.GetType("System.String");
- dc.ColumnName = range.Text.ToString().Trim();
- dt.Columns.Add(dc);
- range = (Excel.Range)worksheet.Cells[1, ++ColumnID];
- }
- //End
- for (int iRow = 2; iRow <= iRowCount; iRow++)
- {
- DataRow dr = dt.NewRow();
- for (int iCol = 1; iCol <= iColCount; iCol++)
- {
- range = (Excel.Range)worksheet.Cells[iRow, iCol];
- cellContent = (range.Value2 == null) ? "" : range.Text.ToString();
- //if (iRow == 1)
- //{
- // dt.Columns.Add(cellContent);
- //}
- //else
- //{
- dr[iCol - 1] = cellContent;
- //}
- }
- //if (iRow != 1)
- dt.Rows.Add(dr);
- }
- wath.Stop();
- TimeSpan ts = wath.Elapsed;
- //将数据读入到DataTable中——End
- return dt;
- }
- catch
- {
- return null;
- }
- finally
- {
- workbook.Close(false, oMissiong, oMissiong);
- System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook);
- workbook = null;
- app.Workbooks.Close();
- app.Quit();
- System.Runtime.InteropServices.Marshal.ReleaseComObject(app);
- app = null;
- GC.Collect();
- GC.WaitForPendingFinalizers();
- }
- }
- ///
- /// 使用COM,多线程读取Excel(1 主线程、4 副线程)
- ///
- /// 路径
- ///
DataTabel - public System.Data.DataTable ThreadReadExcel(string excelFilePath)
- {
- Excel.Application app = new Excel.Application();
- Excel.Sheets sheets = null;
- Excel.Workbook workbook = null;
- object oMissiong = System.Reflection.Missing.Value;
- System.Data.DataTable dt = new System.Data.DataTable();
- wath.Start();
- try
- {
- if (app == null)
- {
- return null;
- }
- workbook = app.Workbooks.Open(excelFilePath, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong, oMissiong);
- //将数据读入到DataTable中——Start
- sheets = workbook.Worksheets;
- Excel.Worksheet worksheet = (Excel.Worksheet)sheets.get_Item(1);//读取***张表
- if (worksheet == null)
- return null;
- string cellContent;
- int iRowCount = worksheet.UsedRange.Rows.Count;
- int iColCount = worksheet.UsedRange.Columns.Count;
- Excel.Range range;
- //负责列头Start
- DataColumn dc;
- int ColumnID = 1;
- range = (Excel.Range)worksheet.Cells[1, 1];
- //while (range.Text.ToString().Trim() != "")
- while (iColCount >= ColumnID)
- {
- dc = new DataColumn();
- dc.DataType = System.Type.GetType("System.String");
- string strNewColumnName = range.Text.ToString().Trim();
- if (strNewColumnName.Length == 0) strNewColumnName = "_1";
- //判断列名是否重复
- for (int i = 1; i < ColumnID; i++)
- {
- if (dt.Columns[i - 1].ColumnName == strNewColumnName)
- strNewColumnName = strNewColumnName + "_1";
- }
- dc.ColumnName = strNewColumnName;
- dt.Columns.Add(dc);
- range = (Excel.Range)worksheet.Cells[1, ++ColumnID];
- }
- //End
- //数据大于500条,使用多进程进行读取数据
- if (iRowCount - 1 > 500)
- {
- //开始多线程读取数据
- //新建线程
- int b2 = (iRowCount - 1) / 10;
- DataTable dt1 = new DataTable("dt1");
- dt1 = dt.Clone();
- SheetOptions sheet1thread = new SheetOptions(worksheet, iColCount, 2, b2 + 1, dt1);
- Thread othread1 = new Thread(new ThreadStart(sheet1thread.SheetToDataTable));
- othread1.Start();
- //阻塞 1 毫秒,保证***个读取 dt1
- Thread.Sleep(1);
- DataTable dt2 = new DataTable("dt2");
- dt2 = dt.Clone();
- SheetOptions sheet2thread = new SheetOptions(worksheet, iColCount, b2 + 2, b2 * 2 + 1, dt2);
- Thread othread2 = new Thread(new ThreadStart(sheet2thread.SheetToDataTable));
- othread2.Start();
- DataTable dt3 = new DataTable("dt3");
- dt3 = dt.Clone();
- SheetOptions sheet3thread = new SheetOptions(worksheet, iColCount, b2 * 2 + 2, b2 * 3 + 1, dt3);
- Thread othread3 = new Thread(new ThreadStart(sheet3thread.SheetToDataTable));
- othread3.Start();
- DataTable dt4 = new DataTable("dt4");
- dt4 = dt.Clone();
- SheetOptions sheet4thread = new SheetOptions(worksheet, iColCount, b2 * 3 + 2, b2 * 4 + 1, dt4);
- Thread othread4 = new Thread(new ThreadStart(sheet4thread.SheetToDataTable));
- othread4.Start();
- //主线程读取剩余数据
- for (int iRow = b2 * 4 + 2; iRow <= iRowCount; iRow++)
- {
- DataRow dr = dt.NewRow();
- for (int iCol = 1; iCol <= iColCount; iCol++)
- {
- range = (Excel.Range)worksheet.Cells[iRow, iCol];
- cellContent = (range.Value2 == null) ? "" : range.Text.ToString();
- dr[iCol - 1] = cellContent;
- }
- dt.Rows.Add(dr);
- }
- othread1.Join();
- othread2.Join();
- othread3.Join();
- othread4.Join();
- //将多个线程读取出来的数据追加至 dt1 后面
- foreach (DataRow dr in dt.Rows)
- dt1.Rows.Add(dr.ItemArray);
- dt.Clear();
- dt.Dispose();
- foreach (DataRow dr in dt2.Rows)
- dt1.Rows.Add(dr.ItemArray);
- dt2.Clear();
- dt2.Dispose();
- foreach (DataRow dr in dt3.Rows)
- dt1.Rows.Add(dr.ItemArray);
- dt3.Clear();
- dt3.Dispose();
- foreach (DataRow dr in dt4.Rows)
- dt1.Rows.Add(dr.ItemArray);
- dt4.Clear();
- dt4.Dispose();
- return dt1;
- }
- else
- {
- for (int iRow = 2; iRow <= iRowCount; iRow++)
- {
- DataRow dr = dt.NewRow();
- for (int iCol = 1; iCol <= iColCount; iCol++)
- {
- range = (Excel.Range)worksheet.Cells[iRow, iCol];
- cellContent = (range.Value2 == null) ? "" : range.Text.ToString();
- dr[iCol - 1] = cellContent;
- }
- dt.Rows.Add(dr);
- }
- }
- wath.Stop();
- TimeSpan ts = wath.Elapsed;
- //将数据读入到DataTable中——End
- return dt;
- }
- catch
- {
- return null;
- }
- finally
- {
- workbook.Close(false, oMissiong, oMissiong);
- System.Runtime.InteropServices.Marshal.ReleaseComObject(workbook);
- System.Runtime.InteropServices.Marshal.ReleaseComObject(sheets);
- workbook = null;
- app.Workbooks.Close();
- app.Quit();
- System.Runtime.InteropServices.Marshal.ReleaseComObject(app);
- app = null;
-  
分享文章:C#读取Excel几种方法的体会
链接分享:http://www.mswzjz.cn/qtweb/news37/203837.html攀枝花网站建设、攀枝花网站运维推广公司-贝锐智能,是专注品牌与效果的网络营销公司;服务项目有等
声明:本网站发布的内容(图片、视频和文字)以用户投稿、用户转载内容为主,如果涉及侵权请尽快告知,我们将会在第一时间删除。文章观点不代表本网站立场,如需处理请联系客服。电话:028-86922220;邮箱:631063699@qq.com。内容未经允许不得转载,或转载时需注明来源: 贝锐智能