在处理相对较大的文本文件时,我注意到了一些奇怪的现象。异步读写实际上比非异步读取更慢:
例如,执行以下虚拟代码:
var res1 = File.WriteAllLinesAsync(string.Format(@"C:\Projects\DelMee\file{0}.txt", i), lines);
var res2 = File.WriteAllLinesAsync(string.Format(@"C:\Projects\DelMee\file{0}_bck.txt", i), lines);
await res1;
await res2;
实际上比...要慢得多。
File.WriteAllLines(string.Format(@"C:\Projects\DelMee\file{0}.txt", i), lines);
File.WriteAllLines(string.Format(@"C:\Projects\DelMee\file{0}_bck.txt", i), lines);
理论上第一种方法应该更快,因为在第一个写入完成之前应该已经开始了第二个写入。对于15~25MB的文件(10秒 vs 20秒),性能差异约为100%。
我注意到ReadAllLines和ReadAllLinesAsync也有相同的行为。
更新:0主要思路是在TestFileWriteXXX函数完成后处理所有文件。
Task.WhenAll(allTasks1); // Without await is not a valid option
更新:1 我添加了使用线程进行读写操作,效果提升了50%。以下是完整示例:
更新:2 我更新了代码,以消除缓冲区生成的开销。
const int MaxAttempts = 5;
static void Main(string[] args)
{
TestFileWrite();
TestFileWriteViaThread();
TestFileWriteAsync();
Console.ReadLine();
}
private static void TestFileWrite()
{
Clear();
Stopwatch stopWatch = new Stopwatch();
stopWatch.Start();
Console.WriteLine( "Begin TestFileWrite");
for (int i = 0; i < MaxAttempts; ++i)
{
TestFileWriteInt(i);
}
TimeSpan ts = stopWatch.Elapsed;
string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10);
Console.WriteLine("TestFileWrite took: " + elapsedTime);
}
private static void TestFileWriteViaThread()
{
Clear();
Stopwatch stopWatch = new Stopwatch();
stopWatch.Start();
Console.WriteLine("Begin TestFileWriteViaThread");
List<Thread> _threads = new List<Thread>();
for (int i = 0; i < MaxAttempts; ++i)
{
var t = new Thread(TestFileWriteInt);
t.Start(i);
_threads.Add(t);
}
_threads.ForEach(T => T.Join());
TimeSpan ts = stopWatch.Elapsed;
string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10);
Console.WriteLine("TestFileWriteViaThread took: " + elapsedTime);
}
private static void TestFileWriteInt(object oIndex)
{
int index = (int)oIndex;
List<string> lines = GenerateLines(index);
File.WriteAllLines(string.Format(@"C:\Projects\DelMee\file{0}.txt", index), lines);
File.WriteAllLines(string.Format(@"F:\Projects\DelMee\file{0}_bck.txt", index), lines);
var text = File.ReadAllLines(string.Format(@"C:\Projects\DelMee\file{0}.txt", index));
var text1 = File.ReadAllLines(string.Format(@"C:\Projects\DelMee\file{0}.txt", index));
//File.WriteAllLines(string.Format(@"C:\Projects\DelMee\file_test{0}.txt", index), text1);
}
private static async void TestFileWriteAsync()
{
Clear();
Console.WriteLine("Begin TestFileWriteAsync ");
Stopwatch stopWatch = new Stopwatch();
stopWatch.Start();
for (int i = 0; i < MaxAttempts; ++i)
{
List<string> lines = GenerateLines(i);
var allTasks = new List<Task>();
allTasks.Add(File.WriteAllLinesAsync(string.Format(@"C:\Projects\DelMee\file{0}.txt", i), lines));
allTasks.Add(File.WriteAllLinesAsync(string.Format(@"F:\Projects\DelMee\file{0}_bck.txt", i), lines));
await Task.WhenAll(allTasks);
var allTasks1 = new List<Task<string[]>>();
allTasks1.Add(File.ReadAllLinesAsync(string.Format(@"C:\Projects\DelMee\file{0}.txt", i)));
allTasks1.Add(File.ReadAllLinesAsync(string.Format(@"C:\Projects\DelMee\file{0}.txt", i)));
await Task.WhenAll(allTasks1);
// await File.WriteAllLinesAsync(string.Format(@"C:\Projects\DelMee\file_test{0}.txt", i), allTasks1[0].Result);
}
stopWatch.Stop();
TimeSpan ts = stopWatch.Elapsed;
string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10);
Console.WriteLine("TestFileWriteAsync took: " + elapsedTime);
}
private static void Clear()
{
for (int i = 0; i < 15; ++i)
{
System.IO.File.Delete(string.Format(@"C:\Projects\DelMee\file{0}.txt", i));
System.IO.File.Delete(string.Format(@"F:\Projects\DelMee\file{0}_bck.txt", i));
}
}
static string buffer = new string('a', 25 * 1024 * 1024);
private static List<string> GenerateLines(int i)
{
return new List<string>() { buffer };
}
结果如下:
TestFileWrite 耗时: 00:00:03.50
TestFileWriteViaThread 耗时: 00:00:01.63
TestFileWriteAsync 耗时: 00:00:06.78
8个核心CPU,C盘和F盘分别使用两个不同的SATA连接850 EVO固态硬盘。
更新3 - 结论 看起来 File.WriteAllLinesAsync 在处理大量数据刷新的场景时表现良好。正如下面的回答所指出的那样,最好直接使用 FileStream。但是异步操作仍然比顺序访问慢。
但目前最快的方法仍然是使用多线程。