使用异步任务的BlockingCollection

4
我正在尝试正确地模拟多线程单生产者/多消费者场景,其中消费者可以请求生产者获取一个项目,但生产者需要执行耗时操作来生成它(比如执行查询或打印文档)。
我的目标是确保没有消费者可以同时向生产者请求生成一个项目。在我的实际用例中,生产者是硬件控制器,必须确保只有一个请求被发送到硬件。其他并发请求必须最终等待或被拒绝(我知道如何拒绝它们,所以让我们集中精力让它们等待)。
我希望生产者和每个消费者在不同的线程中运行。 使用只有“BlockingCollection”可能无法得到干净的代码。我不得不与“SemaphoreSlim”一起使用,否则消费者可能会遇到竞争条件。 我认为这应该可以工作(事实上,在我所有的测试中都很好地工作),即使我不确定100%。 这是我的程序:
生产者:
class Producer : IDisposable
{
    //Explicit waiting item => I feel this should not be there
    private SemaphoreSlim _semaphore;

    private BlockingCollection<Task<string>> _collection;

    public Producer()
    {
        _collection = new BlockingCollection<Task<string>>(new ConcurrentQueue<Task<string>>(), 1);
        _semaphore = new SemaphoreSlim(1, 1);
    }

    public void Start()
    {
        Task consumer = Task.Factory.StartNew(() =>
        {
            try
            {
                while (!_collection.IsCompleted)
                {
                    Task<string> current = _collection.Take();
                    current.RunSynchronously(); //Is this bad?

                    //Signal the long running operation has ended => This is what I'm not happy about
                    _semaphore.Release();
                }
            }
            catch (InvalidOperationException)
            {
                Console.WriteLine("Adding was compeleted!");
            }
        });
    }

    public string GetRandomString(string consumerName)
    {
        Task<string> task = new Task<string>(() =>
        {
            //Simulate long running operation
            Thread.Sleep(100);
            return GetRandomString();
        });

        _collection.Add(task);

        //Wait for long running operation to complete => This is what I'm not happy about
        _semaphore.Wait();

        Console.WriteLine("Producer produced {0} by {1} request", task.Result, consumerName);
        return task.Result;
    }

    public void Dispose()
    {
        _collection.CompleteAdding();
    }

    private string GetRandomString()
    {
        var chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
        var random = new Random();
        var result = new string(Enumerable
            .Repeat(chars, 8)
            .Select(s => s[random.Next(s.Length)])
            .ToArray());
        return result;
    }
}

消费者:

class Consumer
{
    Producer _producer;
    string _name;

    public Consumer(
        Producer producer,
        string name)
    {
        _producer = producer;
        _name = name;
    }

    public string GetOrderedString()
    {
        string produced = _producer.GetRandomString(_name);
        return String.Join(String.Empty, produced.OrderBy(c => c));
    }
}

控制台应用程序:

class Program
{
    static void Main(string[] args)
    {
        int consumerNumber = 5;
        int reps = 10;

        Producer prod = new Producer();
        prod.Start();

        Task[] consumers = new Task[consumerNumber];

        for (var cConsumers = 0; cConsumers < consumerNumber; cConsumers++)
        {
            Consumer consumer = new Consumer(prod, String.Format("Consumer{0}", cConsumers + 1));

            Task consumerTask = Task.Factory.StartNew((consumerIndex) =>
            {
                int cConsumerNumber = (int)consumerIndex;
                for (var counter = 0; counter < reps; counter++)
                {
                    string data = consumer.GetOrderedString();
                    Console.WriteLine("Consumer{0} consumed {1} at iteration {2}", cConsumerNumber, data, counter + 1);
                }
            }, cConsumers + 1);

            consumers[cConsumers] = consumerTask;
        }

        Task continuation = Task.Factory.ContinueWhenAll(consumers, (c) =>
        {
            prod.Dispose();
            Console.WriteLine("Producer/Consumer ended");
            Console.ReadLine();
        });

        continuation.Wait();
    }
}

我关心的是这是否是正确解决问题的方式,或者你们有其他最佳实践可以建议。
我已经谷歌搜索并尝试了不同的想法,但每个例子都假设生产者能够立即在请求后生产物品...这在现实世界的应用程序中相当罕见:)
非常感谢任何帮助。

3
你应该使用TPL Dataflow。 - i3arnon
1
你想要实现什么目标并不是很清楚,能否再详细说明一下? - Evk
@i3arnon 我会去看一下,但据我所知TPL Dataflow是适用于.Net >= 4.5的。有没有适用于4.0的东西? - andreapier
嗯,我把它看作是一个按需制造者... 但最后你可能是对的... 我不太确定这是否是最好的方法。然而,如果有人知道如何解决这个问题,那会很有帮助。 - andreapier
我看了一下Tpl Dataflow...我认为这是处理我的情况的正确方式,而不必编写大量样板(可能有错误)代码。感谢大家的建议! - andreapier
显示剩余4条评论
2个回答

2

如果我理解正确,您希望确保所谓的“生产者”一次只处理一个任务。然后,通过对您的代码进行轻微修改,您可以像这样实现:

internal class Producer : IDisposable {
    private readonly BlockingCollection<RandomStringRequest> _collection;

    public Producer() {
        _collection = new BlockingCollection<RandomStringRequest>(new ConcurrentQueue<RandomStringRequest>());
    }

    public void Start() {
        Task consumer = Task.Factory.StartNew(() => {
            try {
                foreach (var request in _collection.GetConsumingEnumerable()) {
                    Thread.Sleep(100); // long work
                    request.SetResult(GetRandomString());
                }
            }
            catch (InvalidOperationException) {
                Console.WriteLine("Adding was compeleted!");
            }
        });
    }

    public RandomStringRequest GetRandomString(string consumerName) {
        var request = new RandomStringRequest();
        _collection.Add(request);
        return request;            
    }

    public void Dispose() {
        _collection.CompleteAdding();
    }

    private string GetRandomString() {
        var chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
        var random = new Random();
        var result = new string(Enumerable
            .Repeat(chars, 8)
            .Select(s => s[random.Next(s.Length)])
            .ToArray());
        return result;
    }
}

internal class RandomStringRequest : IDisposable {
    private string _result;
    private ManualResetEvent _signal;

    public RandomStringRequest() {
        _signal = new ManualResetEvent(false);
    }

    public void SetResult(string result) {
        _result = result;
        _signal.Set();
    }

    public string GetResult() {
        _signal.WaitOne();
        return _result;
    }

    public bool TryGetResult(TimeSpan timeout, out string result) {
        result = null;
        if (_signal.WaitOne(timeout)) {
            result = _result;
            return true;
        }
        return false;
    }

    public void Dispose() {
        _signal.Dispose();
    }
}

internal class Consumer {
    private Producer _producer;
    private string _name;

    public Consumer(
        Producer producer,
        string name) {
        _producer = producer;
        _name = name;
    }

    public string GetOrderedString() {
        using (var request = _producer.GetRandomString(_name)) {
            // wait here for result to be prepared
            var produced = request.GetResult();
            return String.Join(String.Empty, produced.OrderBy(c => c));
        }
    }
}

请注意,生产者是单线程的,并且它使用 GetConsumingEnumerable。此外,没有信号量和任务。相反,RandomStringRequest 被返回给消费者,当调用 GetResult 或 TryGetResult 时,它将等待生产者生成结果(或超时)。您可能还希望在某些地方传递 CancellationTokens(例如到 GetConsumingEnumerable)。

谢谢,非常聪明的例子!出于清晰起见,我没有传递任何CancellationToken。 - andreapier

1
我认为信号量并不是真正必要的。您的所有需求应该已经通过Task和可用的并发集合得到满足。
我试图创建一个小的示例代码。不幸的是,我仍然没有完全理解异步/等待,所以也许这也是在您的情况下可能有所帮助的领域(如果您的真实任务主要是I/O限制而不是CPU限制)。
但是正如您已经看到的,不需要使用信号量或类似的东西。所有这些事情都由提供的类完成(例如对BlockingCollection.GetConsumingEnumerable()的调用)。
因此,希望这能有所帮助。

Program.cs

private static void Main(string[] args)
{
    var producer = new Producer();
    var consumer = new Consumer(producer.Workers);

    consumer.Start();
    producer.Start();

    Console.ReadKey();
}

Producer.cs

public class Producer : IDisposable
{
    private CancellationTokenSource _Cts;
    private Random _Random = new Random();
    private int _WorkCounter = 0;
    private BlockingCollection<Task<String>> _Workers;
    private Task _WorkProducer;

    public Producer()
    {
        _Workers = new BlockingCollection<Task<String>>();
    }

    public IEnumerable<Task<String>> Workers
    {
        get { return _Workers.GetConsumingEnumerable(); }
    }

    public void Dispose()
    {
        Stop();
    }

    public void Start()
    {
        if (_Cts != null)
            throw new InvalidOperationException("Producer has already been started.");

        _Cts = new CancellationTokenSource();
        _WorkProducer = Task.Factory.StartNew(() => Run(_Cts.Token));
    }

    public void Stop()
    {
        var cts = _Cts;

        if (cts != null)
        {
            cts.Cancel();
            cts.Dispose();
            _Cts = null;
        }

        _WorkProducer.Wait();
    }

    private String GetRandomString()
    {
        var chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
        var result = new String(Enumerable
            .Repeat(chars, 8)
            .Select(s => s[_Random.Next(s.Length)])
            .ToArray());

        return result;
    }

    private void Run(CancellationToken token)
    {
        while (!token.IsCancellationRequested)
        {
            var worker = StartNewWorker();
            _Workers.Add(worker);
            Task.Delay(100);
        }

        _Workers.CompleteAdding();
        _Workers = new BlockingCollection<Task<String>>();
    }

    private Task<String> StartNewWorker()
    {
        return Task.Factory.StartNew<String>(Worker);
    }

    private String Worker()
    {
        var workerId = Interlocked.Increment(ref _WorkCounter);
        var neededTime = TimeSpan.FromSeconds(_Random.NextDouble() * 5);
        Console.WriteLine("Worker " + workerId + " starts in " + neededTime);
        Task.Delay(neededTime).Wait();
        var result = GetRandomString();
        Console.WriteLine("Worker " + workerId + " finished with " + result);

        return result;
    }
}

Consumer.cs

public class Consumer
{
    private Task _Consumer;
    private IEnumerable<Task<String>> _Workers;

    public Consumer(IEnumerable<Task<String>> workers)
    {
        if (workers == null)
            throw new ArgumentNullException("workers");

        _Workers = workers;
    }

    public void Start()
    {
        var consumer = _Consumer;

        if (consumer == null
            || consumer.IsCompleted)
        {
            _Consumer = Task.Factory.StartNew(Run);
        }
    }

    private void Run()
    {
        foreach (var worker in _Workers)
        {
            var result = worker.Result;
            Console.WriteLine("Received result " + result);
        }
    }
}

不错的例子。我想你可以将其缩短为.Take()方法而不是GetConsumingEnumerable()。我猜问问题的人认为调用.Take()的函数将立即返回一个新值...他实际上试图重新实现BlockingCollection - Alex F
@Jasper 我不认为那是真的... 你为什么认为我没有理解返回结果和返回将完成结果的任务之间的区别? - andreapier

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接