我试图提取Word文档中的所有单词。我可以按以下方式一次性完成...
Word.Application word = new Word.Application();
doc = word.Documents.Open(@"C:\SampleText.doc");
doc.Activate();
foreach (Word.Range docRange in doc.Words) // loads all words in document
{
IEnumerable<string> sortedSubstrings = Enumerable.Range(0, docRange.Text.Trim().Length)
.Select(i => docRange.Text.Substring(i))
.OrderBy(s => s.Length < 3 ? s : s.Remove(2, Math.Min(s.Length - 2, 2)));
wordPosition =
(int)
docRange.get_Information(
Microsoft.Office.Interop.Word.WdInformation.wdFirstCharacterColumnNumber);
foreach (var substring in sortedSubstrings)
{
index = docRange.Text.IndexOf(substring) + wordPosition;
charLocation[index] = substring;
}
}
然而,我更希望能够逐行加载文档...这样做可能吗?
我可以按段落加载它,但是我无法迭代遍历段落以提取所有单词。
foreach (Word.Paragraph para in doc.Paragraphs)
{
foreach (Word.Range docRange in para) // Error: type Word.para is not enumeranle**
{
IEnumerable<string> sortedSubstrings = Enumerable.Range(0, docRange.Text.Trim().Length)
.Select(i => docRange.Text.Substring(i))
.OrderBy(s => s.Length < 3 ? s : s.Remove(2, Math.Min(s.Length - 2, 2)));
wordPosition =
(int)
docRange.get_Information(
Microsoft.Office.Interop.Word.WdInformation.wdFirstCharacterColumnNumber);
foreach (var substring in sortedSubstrings)
{
index = docRange.Text.IndexOf(substring) + wordPosition;
charLocation[index] = substring;
}
}
}