我尝试改进一份解析文本文件(1.5GB)的旧例程。这个例程相当愚笨,它类似这样构建字符串:
s:= s+ buff[i];
于是,我想到TStringBuilder会带来显著的速度提升。但实际上,它比原来的方法慢了114%。
因此,我自己编写了一个StringBuilder,它的速度比经典的s:= s + chr (在4MB字符串上进行的实验)快
184.82倍(是的,184!!!!!!),甚至比TStringBuilder还要快。
测试结果:
经典的 s:= s + c
时间:8502毫秒
procedure TfrmTester.btnClassicClick(Sender: TObject);
VAR
s: string;
FileBody: string;
c: Cardinal;
i: Integer;
begin
FileBody:= ReadFile(File4MB);
c:= GetTickCount;
for i:= 1 to Length(FileBody) DO
s:= s+ FileBody[i];
Log.Lines.Add('Time: '+ IntToStr(GetTickCount-c) + 'ms');
end;
预缓冲
Time:
BuffSize= 10000
BuffSize= 100000
BuffSize= 1000000
代码:
procedure TfrmTester.btnBufferedClick(Sender: TObject);
VAR
s: string;
FileBody: string;
c: Cardinal;
CurBuffLen, marker, i: Integer;
begin
FileBody:= ReadFile(File4MB);
c:= GetTickCount;
marker:= 1;
CurBuffLen:= 0;
for i:= 1 to Length(FileBody) DO
begin
if i > CurBuffLen then
begin
SetLength(s, CurBuffLen+ BuffSize);
CurBuffLen:= Length(s)
end;
s[marker]:= FileBody[i];
Inc(marker);
end;
SetLength(s, marker-1);
Log.Lines.Add('Time: '+ IntToStr(GetTickCount-c) + 'ms');
if s <> FileBody
then Log.Lines.Add('FAILED!');
end;
预缓存,作为类
Time:
BuffSize= 10000
BuffSize= 100000
BuffSize= 1000000
代码:
procedure TfrmTester.btnBuffClassClick(Sender: TObject);
VAR
StringBuff: TCStringBuff;
s: string;
FileBody: string;
c: Cardinal;
i: Integer;
begin
FileBody:= ReadFile(File4MB);
c:= GetTickCount;
StringBuff:= TCStringBuff.Create(BuffSize);
TRY
for i:= 1 to Length(FileBody) DO
StringBuff.AddChar(filebody[i]);
s:= StringBuff.GetResult;
FINALLY
FreeAndNil(StringBuff);
END;
Log.Lines.Add('Time: '+ IntToStr(GetTickCount-c) + 'ms');
if s <> FileBody
then Log.Lines.Add('FAILED!');
end;
这是该类:
constructor TCStringBuff.Create(aBuffSize: Integer= 10000);
begin
BuffSize:= aBuffSize;
marker:= 1;
CurBuffLen:= 0;
inp:= 1;
end;
function TCStringBuff.GetResult: string;
begin
SetLength(s, marker-1);
Result:= s;
s:= '';
end;
procedure TCStringBuff.AddChar(Ch: Char);
begin
if inp > CurBuffLen then
begin
SetLength(s, CurBuffLen+ BuffSize);
CurBuffLen:= Length(s)
end;
s[marker]:= Ch;
Inc(marker);
Inc(inp);
end;
结论:
如果您有大型字符串(超过10K),请停止使用s:= s + c。 即使您有小字符串,但经常这样做(例如,您有一个在小字符串上进行某些字符串处理的函数,但经常调用它),也可能是正确的。
_
PS:您还可以查看此链接:https://www.delphitools.info/2013/10/30/efficient-string-building-in-delphi/2/