我尝试使用以下正则表达式来分割文本文件中的数据,但在测试过程中发现了一个奇怪的错误 - 即使是非常简单的文件也会被错误地分割。以下是用于说明这种行为的示例代码:
const string line = "511525,3122,9,39,2007,9,39,3127,9,39,\" -49,368.11 \",\"-32,724.16\",2,1,\" 2,347.91 \", - ,\" 2,234.17 \", - ,2.2,1.143,2,1.24,FALSE,1,2,0,311,511625";
const string pattern = ",(?=([^\"]*\"[^\"]*\")*[^\"]*$)";
Console.WriteLine();
Console.WriteLine("SPLIT");
var splitted = Regex.Split(line, pattern, RegexOptions.Compiled);
foreach (var s in splitted)
{
Console.WriteLine(s);
}
Console.WriteLine();
Console.WriteLine("REPLACE");
var replaced = Regex.Replace(line, pattern, "!" , RegexOptions.Compiled);
Console.WriteLine(replaced);
Console.WriteLine();
Console.WriteLine("MATCH");
var matches = Regex.Matches(line, pattern);
foreach (Match match in matches)
{
Console.WriteLine(match.Index);
}
因此,正如您所看到的,split方法是唯一会产生意外结果(它在无效位置上拆分!)的方法!Matches
和Replace
都会给出完全正确的结果。我甚至尝试在RegexBuddy中测试提到的正则表达式,它显示与Regex.Matches
相同的匹配项!我是否漏掉了什么,还是Split
方法中存在错误?
控制台输出:
SPLIT
511525
, - ," 2,234.17 "
3122
, - ," 2,234.17 "
9
, - ," 2,234.17 "
39
, - ," 2,234.17 "
2007
, - ," 2,234.17 "
9
, - ," 2,234.17 "
39
, - ," 2,234.17 "
3127
, - ," 2,234.17 "
9
, - ," 2,234.17 "
39
, - ," 2,234.17 "
" -49,368.11 "
, - ," 2,234.17 "
"-32,724.16"
, - ," 2,234.17 "
2
, - ," 2,234.17 "
1
, - ," 2,234.17 "
" 2,347.91 "
- ," 2,234.17 "
-
" 2,234.17 "
" 2,234.17 "
-
2.2
1.143
2
1.24
FALSE
1
2
0
311
511625
REPLACE
511525!3122!9!39!2007!9!39!3127!9!39!" -49,368.11 "!"-32,724.16"!2!1!" 2,347.91 "! - !" 2,234.17 "! - !2.2!1.143!2!1.24!FALSE!1!2!0!311!511625
MATCH
6
11
13
16
21
23
26
31
33
36
51
64
66
68
81
87
100
106
110
116
118
123
129
131
133
135
139