使用NOPI读取Word、Excel文档内容

使用NOPI读取Excel的例子很多,读取Word的例子不多。

Excel的解析方式有多中,可以使用ODBC查询,把Excel作为一个数据集对待。也可以使用文档结构模型的方式进行解析,即解析Workbook(工作簿)、Sheet、Row、Column。

Word的解析比较复杂,因为Word的文档结构模型定义较为复杂。解析Word或者Excel,关键是理解Word、Excel的文档对象模型。

Word、Excel文档对象模型的解析,可以通过COM接口调用,此类方式使用较广。(可以录制宏代码,然后替换为对应的语言)

也可以使用XML模型解析,尤其是对于2007、2010版本的文档的解析。

using NPOI.POIFS.FileSystem;

using NPOI.SS.UserModel;

using NPOI.XSSF.UserModel;

using NPOI.XWPF.UserModel;

using System;

using System.Collections.Generic;

using System.Configuration;

using System.IO;

using System.Text;

namespace eyuan

{

public static class NOPIHandler

{

/// <summary>

///

/// </summary>

/// <param name="fileName"></param>

/// <returns></returns>

public static List<List<List<string>>> ReadExcel(string fileName)

{

//打开Excel工作簿

XSSFWorkbook hssfworkbook = null;

try

{

using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read))

{

hssfworkbook = new XSSFWorkbook(file);

}

}

catch (Exception e)

{

LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() }));

}

//循环Sheet页

int sheetsCount = hssfworkbook.NumberOfSheets;

List<List<List<string>>> workBookContent = new List<List<List<string>>>();

for (int i = 0; i < sheetsCount; i++)

{

//Sheet索引从0开始

ISheet sheet = hssfworkbook.GetSheetAt(i);

//循环行

List<List<string>> sheetContent = new List<List<string>>();

int rowCount = sheet.PhysicalNumberOfRows;

for (int j = 0; j < rowCount; j++)

{

//Row(逻辑行)的索引从0开始

IRow row = sheet.GetRow(j);

//循环列(各行的列数可能不同)

List<string> rowContent = new List<string>();

int cellCount = row.PhysicalNumberOfCells;

for (int k = 0; k < cellCount; k++)

{

//ICell cell = row.GetCell(k);

ICell cell = row.Cells[k];

if (cell == null)

{

rowContent.Add("NIL");

}

else

{

rowContent.Add(cell.ToString());

//rowContent.Add(cell.StringCellValue);

}

}

//添加行到集合中

sheetContent.Add(rowContent);

}

//添加Sheet到集合中

workBookContent.Add(sheetContent);

}

return workBookContent;

}

/// <summary>

///

/// </summary>

/// <param name="fileName"></param>

/// <returns></returns>

public static string ReadExcelText(string fileName)

{

string ExcelCellSeparator = ConfigurationManager.AppSettings["ExcelCellSeparator"];

string ExcelRowSeparator = ConfigurationManager.AppSettings["ExcelRowSeparator"];

string ExcelSheetSeparator = ConfigurationManager.AppSettings["ExcelSheetSeparator"];

//

List<List<List<string>>> excelContent = ReadExcel(fileName);

string fileText = string.Empty;

StringBuilder sbFileText = new StringBuilder();

//循环处理WorkBook中的各Sheet页

List<List<List<string>>>.Enumerator enumeratorWorkBook = excelContent.GetEnumerator();

while (enumeratorWorkBook.MoveNext())

{

//循环处理当期Sheet页中的各行

List<List<string>>.Enumerator enumeratorSheet = enumeratorWorkBook.Current.GetEnumerator();

while (enumeratorSheet.MoveNext())

{

string[] rowContent = enumeratorSheet.Current.ToArray();

sbFileText.Append(string.Join(ExcelCellSeparator, rowContent));

sbFileText.Append(ExcelRowSeparator);

}

sbFileText.Append(ExcelSheetSeparator);

}

//

fileText = sbFileText.ToString();

return fileText;

}

/// <summary>

/// 读取Word内容

/// </summary>

/// <param name="fileName"></param>

/// <returns></returns>

public static string ReadWordText(string fileName)

{

string WordTableCellSeparator = ConfigurationManager.AppSettings["WordTableCellSeparator"];

string WordTableRowSeparator = ConfigurationManager.AppSettings["WordTableRowSeparator"];

string WordTableSeparator = ConfigurationManager.AppSettings["WordTableSeparator"];

//

string CaptureWordHeader = ConfigurationManager.AppSettings["CaptureWordHeader"];

string CaptureWordFooter = ConfigurationManager.AppSettings["CaptureWordFooter"];

string CaptureWordTable = ConfigurationManager.AppSettings["CaptureWordTable"];

string CaptureWordImage = ConfigurationManager.AppSettings["CaptureWordImage"];

//

string CaptureWordImageFileName = ConfigurationManager.AppSettings["CaptureWordImageFileName"];

//

string fileText = string.Empty;

StringBuilder sbFileText = new StringBuilder();

#region 打开文档

XWPFDocument document = null;

try

{

using (FileStream file = new FileStream(fileName, FileMode.Open, FileAccess.Read))

{

document = new XWPFDocument(file);

}

}

catch (Exception e)

{

LogHandler.LogWrite(string.Format("文件{0}打开失败,错误:{1}", new string[] { fileName, e.ToString() }));

}

#endregion

#region 页眉、页脚

//页眉

if (CaptureWordHeader == "true")

{

sbFileText.AppendLine("Capture Header Begin");

foreach (XWPFHeader xwpfHeader in document.HeaderList)

{

sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfHeader.Text }));

}

sbFileText.AppendLine("Capture Header End");

}

//页脚

if (CaptureWordFooter == "true")

{

sbFileText.AppendLine("Capture Footer Begin");

foreach (XWPFFooter xwpfFooter in document.FooterList)

{

sbFileText.AppendLine(string.Format("{0}", new string[] { xwpfFooter.Text }));

}

sbFileText.AppendLine("Capture Footer End");

}

#endregion

#region 表格

if (CaptureWordTable == "true")

{

sbFileText.AppendLine("Capture Table Begin");

foreach (XWPFTable table in document.Tables)

{

//循环表格行

foreach (XWPFTableRow row in table.Rows)

{

foreach (XWPFTableCell cell in row.GetTableCells())

{

sbFileText.Append(cell.GetText());

//

sbFileText.Append(WordTableCellSeparator);

}

sbFileText.Append(WordTableRowSeparator);

}

sbFileText.Append(WordTableSeparator);

}

sbFileText.AppendLine("Capture Table End");

}

#endregion

#region 图片

if (CaptureWordImage == "true")

{

sbFileText.AppendLine("Capture Image Begin");

foreach (XWPFPictureData pictureData in document.AllPictures)

{

string picExtName = pictureData.suggestFileExtension();

string picFileName = pictureData.GetFileName();

byte[] picFileContent = pictureData.GetData();

//

string picTempName = string.Format(CaptureWordImageFileName, new string[] { Guid.NewGuid().ToString() + "_" + picFileName + "." + picExtName });

//

using (FileStream fs = new FileStream(picTempName, FileMode.Create, FileAccess.Write))

{

fs.Write(picFileContent, 0, picFileContent.Length);

fs.Close();

}

//

sbFileText.AppendLine(picTempName);

}

sbFileText.AppendLine("Capture Image End");

}

#endregion

//正文段落

sbFileText.AppendLine("Capture Paragraph Begin");

foreach (XWPFParagraph paragraph in document.Paragraphs)

{

sbFileText.AppendLine(paragraph.ParagraphText);

}

sbFileText.AppendLine("Capture Paragraph End");

//

//

fileText = sbFileText.ToString();

return fileText;

}

}

}

以上是 使用NOPI读取Word、Excel文档内容 的全部内容, 来源链接: utcz.com/z/361251.html

回到顶部