使用C# Parallel.ForEach循环处理SFTP文件,但未处理下载

3

我正在使用版本为2016的Renci SSH.NET包。我正在从外部服务器下载文件。通常情况下,我可以每6秒下载一个文件,当你有成千上万个文件时这很糟糕。最近我尝试将foreach循环更改为Parallel.ForEach。这样做将下载文件的时间缩短到了1.5秒。但是当我检查这些文件时,它们都是0 KB,所以没有下载任何内容。并行循环有什么问题吗?我是C#的新手,正在尝试提高下载速度。

Parallel.ForEach(summary.RemoteFiles, (f, loopstate) =>
{
    //Are we still connected? If not, reestablish a connection for up to a max of "MaxReconnectAttempts" 
    if (!sftp.IsConnected)
    {
        int maxAttempts = Convert.ToInt32(ConfigurationManager.AppSettings["MaxReconnectAttempts"]);

        StatusUpdate(this, new Types.StatusUpdateEventArgs() { message = "SFTP Service has been connected from remote system, attempting to reconnect (" + sftpConnInfo.Host + ":" + sftpConnInfo.Port.ToString() + remotePath + " - Attempt 1 of " + maxAttempts.ToString() + ")", Location = locationName });

        for (int attempts = 1; attempts <= maxAttempts; attempts++)
        {
            sftp.Connect();

            if (sftp.IsConnected)
            {
                StatusUpdate(this, new Types.StatusUpdateEventArgs() { message = "SFTP Service - Connection reestablished (" + remotePath + ")", Location = locationName });
                break;
            }
            else
            {
                if ((attempts + 1) <= maxAttempts)
                {
                    StatusUpdate(this, new Types.StatusUpdateEventArgs() { message = "SFTP Service still disconnected from remote system, preparing another reconnect attempt (" + sftpConnInfo.Host + ":" + sftpConnInfo.Port.ToString() + remotePath + " - Attempt " + (attempts + 1).ToString() + " of " + maxAttempts.ToString() + ")", Location = locationName });
                    System.Threading.Thread.Sleep(2000);
                }
                else
                {
                    //Max reconnect attempts reached - end the session and ensure the appropriate "failure" workflow is triggered
                    connectionLost = true;
                }
            }
        }
    }

    if (connectionLost)
        loopstate.Break();
       // break;


    totalFileCount++;
    try
    {
      if (!System.IO.File.Exists(localSaveLocation + f.FileName))

        {
            System.Diagnostics.Debug.WriteLine("\tDownloading file " + totalFileCount.ToString() + "(" + f.FileName + ")");

            System.IO.Stream localFile = System.IO.File.OpenWrite(localSaveLocation + f.FileName);
            //Log remote file name, local file name, date/time start
            start = DateTime.Now;
            sftp.DownloadFile(f.FullName, localFile);
            end = DateTime.Now;

            //Log remote file name, local file name, date/time complete (increment the "successful" downloads by 1)
            timeElapsed = end.Subtract(start);
            runningSeconds += timeElapsed.TotalSeconds;
            runningAvg = runningSeconds / Convert.ToDouble(totalFileCount);
            estimatedSecondsRemaining = (summary.RemoteFiles.Count - totalFileCount) * runningAvg;

            elapsedTimeString = timeElapsed.TotalSeconds.ToString("#.####") + " seconds";
            System.Diagnostics.Debug.WriteLine("\tCompleted downloading file in " + elapsedTimeString + " " + "(" + f.FileName + ")");
            downloadedFileCount++;
            ProcessFileComplete(this, new Types.ProcessFileCompleteEventArgs() { downloadSuccessful = true, elapsedTime = timeElapsed.TotalSeconds, fileName = f.FileName, fullLocalPath = localSaveLocation + f.FileName, Location = locationName, FilesDownloaded = totalFileCount, FilesRemaining = (summary.RemoteFiles.Count - totalFileCount), AvgSecondsPerDownload = runningAvg, TotalSecondsElapsed = runningSeconds, EstimatedTimeRemaining = TimeSpan.FromSeconds(estimatedSecondsRemaining) });

            f.FileDownloaded = true;

            if (deleteAfterDownload)
                sftp.DeleteFile(f.FullName);
        }
        else
        {
            System.Diagnostics.Debug.WriteLine("\tFile " + totalFileCount.ToString() + "(" + f.FileName + ") already exists locally");
            downloadedFileCount++;

            ProcessFileComplete(this, new Types.ProcessFileCompleteEventArgs() { downloadSuccessful = true, elapsedTime = 0, fileName = f.FileName + " (File already exists locally)", fullLocalPath = localSaveLocation + f.FileName, Location = locationName, FilesDownloaded = totalFileCount, FilesRemaining = (summary.RemoteFiles.Count - totalFileCount), AvgSecondsPerDownload = runningAvg, TotalSecondsElapsed = runningSeconds, EstimatedTimeRemaining = TimeSpan.FromSeconds(estimatedSecondsRemaining) });
            f.FileDownloaded = true;

            if (deleteAfterDownload)
                sftp.DeleteFile(f.FullName);
        }
    }
    catch (System.Exception ex)
    {
       // We log stuff here
    }

}); 
1个回答

5
我不确定你为什么会得到空文件。我怀疑是因为你没有关闭localFile流。
尽管如此,即使你的代码能够工作,如果你在下载时使用同一个连接,你几乎不会获得任何性能优势,因为SFTP传输往往受网络延迟或CPU限制。你需要使用多个连接来克服这个问题。
请参阅我的Server Fault上有关影响SFTP传输速度的因素的答案
实现一些连接池并每次选择一个空闲连接。
简单示例:
var clients = new ConcurrentBag<SftpClient>();

var opts = new ParallelOptions { MaxDegreeOfParallelism = maxConnections };

Parallel.ForEach(files, opts, (f, loopstate) => {
    if (!clients.TryTake(out var client))
    {
        client = new SftpClient(hostName, userName, password);
        client.Connect();
    }

    string localPath = Path.Combine(destPath, f.Name);
    Console.WriteLine(
        "Thread {0}, Connection {1}, File {2} => {3}",
        Thread.CurrentThread.ManagedThreadId, client.GetHashCode(),
        f.FullName, localPath);

    using (var stream = File.Create(localPath))
    {
        client.DownloadFile(f.FullName, stream);
    }

    clients.Add(client);
});

Console.WriteLine("Closing {0} connections", clients.Count);

foreach (var client in clients)
{
    client.Dispose();
}

另一种方法是启动固定数量的线程,每个线程使用一个连接,并从队列中选择文件。

有关实现示例,请参见我的WinSCP .NET程序集文章:
自动化SFTP/FTP协议上的并行连接传输


关于FTP的类似问题:
使用FluentFTP并发地从FTP下载多个文件,带有最大值限制


网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接