这个答案已经完整并且适用于区分大小写模式,但不适用于不区分大小写模式,而且可能存在其他错误,因为它没有进行很好的单元测试,并且可能可以进一步优化,例如我重复了本地函数__SameChar而没有使用比较函数回调,这样会更快,实际上,允许用户为所有这些传递比较函数对于想要提供一些额外逻辑(某些语言的等价Unicode字形集)的Unicode用户来说是很好的。基于Dorin Dominica的代码,我构建了以下内容。
function _FindStringBoyer(const sString, sPattern: string;
const bCaseSensitive: Boolean = True; const fromPos: Integer = 1): Integer;
function __SameChar(StringIndex, PatternIndex: Integer): Boolean;
begin
if bCaseSensitive then
Result := (sString[StringIndex] = sPattern[PatternIndex])
else
Result := (CompareText(sString[StringIndex], sPattern[PatternIndex]) = 0);
end;
var
SkipTable: array [Char] of Integer;
LengthPattern: Integer;
LengthString: Integer;
Index: Integer;
kIndex: Integer;
LastMarker: Integer;
Large: Integer;
chPattern: Char;
begin
if fromPos < 1 then
raise Exception.CreateFmt('Invalid search start position: %d.', [fromPos]);
LengthPattern := Length(sPattern);
LengthString := Length(sString);
for chPattern := Low(Char) to High(Char) do
SkipTable[chPattern] := LengthPattern;
for Index := 1 to LengthPattern -1 do
SkipTable[sPattern[Index]] := LengthPattern - Index;
Large := LengthPattern + LengthString + 1;
LastMarker := SkipTable[sPattern[LengthPattern]];
SkipTable[sPattern[LengthPattern]] := Large;
Index := fromPos + LengthPattern -1;
Result := 0;
while Index <= LengthString do begin
repeat
Index := Index + SkipTable[sString[Index]];
until Index > LengthString;
if Index <= Large then
Break
else
Index := Index - Large;
kIndex := 1;
while (kIndex < LengthPattern) and __SameChar(Index - kIndex, LengthPattern - kIndex) do
Inc(kIndex);
if kIndex = LengthPattern then begin
Result := Index - kIndex + 1;
Index := Index + LengthPattern;
exit;
end else begin
if __SameChar(Index, LengthPattern) then
Index := Index + LastMarker
else
Index := Index + SkipTable[sString[Index]];
end;
end;
end;
function _StringCountBoyer(const aSourceString, aFindString : String; Const CaseSensitive : Boolean = TRUE) : Integer;
var
foundPos:Integer;
fromPos:Integer;
Limit:Integer;
guard:Integer;
SkipTable: array [Char] of Integer;
LengthPattern: Integer;
LengthString: Integer;
Index: Integer;
kIndex: Integer;
LastMarker: Integer;
Large: Integer;
chPattern: Char;
function __SameChar(StringIndex, PatternIndex: Integer): Boolean;
begin
if CaseSensitive then
Result := (aSourceString[StringIndex] = aFindString[PatternIndex])
else
Result := (CompareText(aSourceString[StringIndex], aFindString[PatternIndex]) = 0);
end;
begin
result := 0;
foundPos := 1;
fromPos := 1;
Limit := Length(aSourceString);
guard := Length(aFindString);
Index := 0;
LengthPattern := Length(aFindString);
LengthString := Length(aSourceString);
for chPattern := Low(Char) to High(Char) do
SkipTable[chPattern] := LengthPattern;
for Index := 1 to LengthPattern -1 do
SkipTable[aFindString[Index]] := LengthPattern - Index;
Large := LengthPattern + LengthString + 1;
LastMarker := SkipTable[aFindString[LengthPattern]];
SkipTable[aFindString[LengthPattern]] := Large;
while (foundPos>=1) and (fromPos < Limit) and (Index<Limit) do begin
Index := fromPos + LengthPattern -1;
if Index>Limit then
break;
kIndex := 0;
while Index <= LengthString do begin
repeat
Index := Index + SkipTable[aSourceString[Index]];
until Index > LengthString;
if Index <= Large then
Break
else
Index := Index - Large;
kIndex := 1;
while (kIndex < LengthPattern) and __SameChar(Index - kIndex, LengthPattern - kIndex) do
Inc(kIndex);
if kIndex = LengthPattern then begin
Index := Index + LengthPattern;
fromPos := Index;
Inc(Result);
break;
end else begin
if __SameChar(Index, LengthPattern) then
Index := Index + LastMarker
else
Index := Index + SkipTable[aSourceString[Index]];
end;
end;
end;
end;
这是一个非常好的算法,因为:
- 使用这种方法计算字符串Y中子串X的实例要快得多,效果惊人。
- 仅替换Pos(),_FindStringBoyer()比由FastCode项目人员贡献给Delphi的纯汇编版本的Pos()更快,而如果你需要不区分大小写,则可以想象当我们不必在100兆字节的字符串上调用UpperCase时,性能提升会有多大。(好吧,你的字符串不会那么大。但是,高效算法是一件美妙的事情。)
好的,我用Boyer-Moore风格编写了一个字符串替换函数:
function _StringReplaceBoyer(const aSourceString, aFindString,aReplaceString : String; Flags: TReplaceFlags) : String;
var
errors:Integer;
fromPos:Integer;
Limit:Integer;
guard:Integer;
SkipTable: array [Char] of Integer;
LengthPattern: Integer;
LengthString: Integer;
Index: Integer;
kIndex: Integer;
LastMarker: Integer;
Large: Integer;
chPattern: Char;
CaseSensitive:Boolean;
foundAt:Integer;
lastFoundAt:Integer;
copyStartsAt:Integer;
copyLen:Integer;
function __SameChar(StringIndex, PatternIndex: Integer): Boolean;
begin
if CaseSensitive then
Result := (aSourceString[StringIndex] = aFindString[PatternIndex])
else
Result := (CompareText(aSourceString[StringIndex], aFindString[PatternIndex]) = 0);
end;
begin
result := '';
lastFoundAt := 0;
fromPos := 1;
errors := 0;
CaseSensitive := rfIgnoreCase in Flags;
Limit := Length(aSourceString);
guard := Length(aFindString);
Index := 0;
LengthPattern := Length(aFindString);
LengthString := Length(aSourceString);
for chPattern := Low(Char) to High(Char) do
SkipTable[chPattern] := LengthPattern;
for Index := 1 to LengthPattern -1 do
SkipTable[aFindString[Index]] := LengthPattern - Index;
Large := LengthPattern + LengthString + 1;
LastMarker := SkipTable[aFindString[LengthPattern]];
SkipTable[aFindString[LengthPattern]] := Large;
while (fromPos>=1) and (fromPos <= Limit) and (Index<=Limit) do begin
Index := fromPos + LengthPattern -1;
if Index>Limit then
break;
kIndex := 0;
foundAt := 0;
while Index <= LengthString do begin
repeat
Index := Index + SkipTable[aSourceString[Index]];
until Index > LengthString;
if Index <= Large then
Break
else
Index := Index - Large;
kIndex := 1;
while (kIndex < LengthPattern) and __SameChar(Index - kIndex, LengthPattern - kIndex) do
Inc(kIndex);
if kIndex = LengthPattern then begin
foundAt := Index - kIndex + 1;
Index := Index + LengthPattern;
fromPos := (foundAt+LengthPattern);
if lastFoundAt=0 then begin
copyStartsAt := 1;
copyLen := foundAt-copyStartsAt;
end else begin
copyStartsAt := lastFoundAt+LengthPattern;
copyLen := foundAt-copyStartsAt;
end;
if (copyLen<=0)or(copyStartsAt<=0) then begin
Inc(errors);
end;
Result := Result + Copy(aSourceString, copyStartsAt, copyLen ) + aReplaceString;
lastFoundAt := foundAt;
if not (rfReplaceAll in Flags) then
fromPos := 0;
break;
end else begin
if __SameChar(Index, LengthPattern) then
Index := Index + LastMarker
else
Index := Index + SkipTable[aSourceString[Index]];
end;
end;
end;
if (lastFoundAt=0) then
begin
Result := aSourceString;
end
else
if (lastFoundAt+LengthPattern < Limit) then begin
copyStartsAt := lastFoundAt+LengthPattern;
copyLen := Limit;
Result := Result + Copy(aSourceString, copyStartsAt, copyLen );
end;
end;
好的,问题是:这个堆栈占用了多少空间:
var
skiptable : array [Char] of Integer; // 65536*4 bytes stack usage on Unicode delphi
再见CPU地狱,你好栈地狱。如果我使用动态数组,那么我必须在运行时调整它的大小。因此,这个东西基本上很快,因为计算机上的虚拟内存系统不会在堆栈中增加256K而感到困扰,但这并不总是最优的代码。尽管如此,我的电脑对于像这样的大堆栈内容并不会有任何反应。这不会成为Delphi标准库默认或赢得任何快速编码挑战,因为它占用了太多空间。我认为,重复搜索是应该将上述代码编写为类的情况,而跳跃表应该是该类中的数据字段。然后,您可以一次构建Boyer-Moore表,并随着时间的推移,如果字符串不变,则重复使用该对象进行快速查找。