将分割调整算法拆分为两个步骤

Question

将分割调整算法拆分为两个步骤

c#algorithmimage-processingimageprocessor

26

我编写了以下的缩放算法，可以正确地缩放图像。但由于每次循环都需要迭代权重数组，速度太慢了。

我相当确定我应该能够将算法拆分为两个步骤，就像您使用双通道高斯模糊一样，这将大大减少操作复杂性并提高性能。不幸的是，我无法使它正常工作。有人能帮忙吗？

Parallel.For(
    startY,
    endY,
    y =>
    {
        if (y >= targetY && y < targetBottom)
        {
            Weight[] verticalValues = this.verticalWeights[y].Values;

            for (int x = startX; x < endX; x++)
            {
                Weight[] horizontalValues = this.horizontalWeights[x].Values;

                // Destination color components
                Color destination = new Color();

                // This is where there is too much operation complexity.
                foreach (Weight yw in verticalValues)
                {
                    int originY = yw.Index;

                    foreach (Weight xw in horizontalValues)
                    {
                        int originX = xw.Index;
                        Color sourceColor = Color.Expand(source[originX, originY]);
                        float weight = yw.Value * xw.Value;
                        destination += sourceColor * weight;
                    }
                }

                destination = Color.Compress(destination);
                target[x, y] = destination;
            }
        }
    });

权重和索引计算如下。每个维度都有一个：

/// <summary>
/// Computes the weights to apply at each pixel when resizing.
/// </summary>
/// <param name="destinationSize">The destination section size.</param>
/// <param name="sourceSize">The source section size.</param>
/// <returns>
/// The <see cref="T:Weights[]"/>.
/// </returns>
private Weights[] PrecomputeWeights(int destinationSize, int sourceSize)
{
    IResampler sampler = this.Sampler;
    float ratio = sourceSize / (float)destinationSize;
    float scale = ratio;

    // When shrinking, broaden the effective kernel support so that we still
    // visit every source pixel.
    if (scale < 1)
    {
        scale = 1;
    }

    float scaledRadius = (float)Math.Ceiling(scale * sampler.Radius);
    Weights[] result = new Weights[destinationSize];

    // Make the weights slices, one source for each column or row.
    Parallel.For(
        0,
        destinationSize,
        i =>
            {
                float center = ((i + .5f) * ratio) - 0.5f;
                int start = (int)Math.Ceiling(center - scaledRadius);

                if (start < 0)
                {
                    start = 0;
                }

                int end = (int)Math.Floor(center + scaledRadius);

                if (end > sourceSize)
                {
                    end = sourceSize;

                    if (end < start)
                    {
                        end = start;
                    }
                }

                float sum = 0;
                result[i] = new Weights();

                List<Weight> builder = new List<Weight>();
                for (int a = start; a < end; a++)
                {
                    float w = sampler.GetValue((a - center) / scale);

                    if (w < 0 || w > 0)
                    {
                        sum += w;
                        builder.Add(new Weight(a, w));
                    }
                }

                // Normalise the values
                if (sum > 0 || sum < 0)
                {
                    builder.ForEach(w => w.Value /= sum);
                }

                result[i].Values = builder.ToArray();
                result[i].Sum = sum;
            });

    return result;
}

/// <summary>
/// Represents the weight to be added to a scaled pixel.
/// </summary>
protected class Weight
{
    /// <summary>
    /// The pixel index.
    /// </summary>
    public readonly int Index;

    /// <summary>
    /// Initializes a new instance of the <see cref="Weight"/> class.
    /// </summary>
    /// <param name="index">The index.</param>
    /// <param name="value">The value.</param>
    public Weight(int index, float value)
    {
        this.Index = index;
        this.Value = value;
    }

    /// <summary>
    /// Gets or sets the result of the interpolation algorithm.
    /// </summary>
    public float Value { get; set; }
}

/// <summary>
/// Represents a collection of weights and their sum.
/// </summary>
protected class Weights
{
    /// <summary>
    /// Gets or sets the values.
    /// </summary>
    public Weight[] Values { get; set; }

    /// <summary>
    /// Gets or sets the sum.
    /// </summary>
    public float Sum { get; set; }
}

每个 IResampler 根据给定的索引提供相应的权重系列。双三次重采样器的工作原理如下。

/// <summary>
/// The function implements the bicubic kernel algorithm W(x) as described on
/// <see href="https://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm">Wikipedia</see>
/// A commonly used algorithm within imageprocessing that preserves sharpness better than triangle interpolation.
/// </summary>
public class BicubicResampler : IResampler
{
    /// <inheritdoc/>
    public float Radius => 2;

    /// <inheritdoc/>
    public float GetValue(float x)
    {
        // The coefficient.
        float a = -0.5f;

        if (x < 0)
        {
            x = -x;
        }

        float result = 0;

        if (x <= 1)
        {
            result = (((1.5f * x) - 2.5f) * x * x) + 1;
        }
        else if (x < 2)
        {
            result = (((((a * x) + 2.5f) * x) - 4) * x) + 2;
        }

        return result;
    }
}

这是一个关于现有算法调整大小的图片示例。输出结果正确（请注意，银色光泽得以保留）。

原始图片。

使用双三次重采样器将图像缩小了一半。

这段代码是我正在编写的一个更大的库的一部分，旨在将图像处理添加到corefx中。

- James South

@Spektre，我已经在问题中添加了更多信息。如果您需要其他任何信息，请告诉我。 - James South

由于这是双三次插值（而不是我最初从您的代码中假设的任意卷积，但并没有深入研究），因此无法通过递归细分来解决。相反，可以将其分解为2x1D问题，就像您打算的那样，但我不确定它是否比直接进行2D重采样更快（在现代计算机上）。必须对其进行一些测试，但在周一之前没有时间。 - Spektre

是的，双三次插值算法是我使用的众多算法之一。如果您能进行一些测试，那就太好了。当我将高斯模糊算法切换到2x1D时，性能得到了很大提升，所以希望这次也能如此。周一完全没问题，周末不用工作 :) - James South

1

高斯从2D到2x1D的分离更好，因为它是2D积分。双三次滤波不是积分，所以不要期望有如此高的加速。此外，高斯1D有时可以通过递归细分类似于FFT算法将其从O(n)加速到O(log(n))...在较小区域上进行n x积分更快... - Spektre

@Spektre 真糟糕，我还期望有好消息。380ms 对我来说太慢了。 "什么多项式" 对不起，我不确定你的意思。 - James South

显示剩余6条评论

3个回答

1

从一个抽象的角度来看（不了解图像处理），我认为你正在多次计算权重和源颜色的值（在最内部的foreach循环中，每当相同的索引对再次出现时）;是否可以提前预先计算它们呢？您需要为HorizontalWeight和VerticalWeight矩阵计算“直接乘积”矩阵（仅将每个索引对（x，y）的值相乘），并且还可以提前将Color.Expand应用于源。这些任务可以并行完成，“直接乘积”（抱歉，我不知道这个野兽的正确名称）应该在许多库中可用。

- Daniel

我想我明白你的意思，谢谢。我会尝试使用那种方法。 - James South

0

好的，这是我的做法。

关键是先只调整图像的宽度，保持高度与原始图像相同。我们将结果像素存储在临时图像中。

然后就是将该图像缩小到我们的最终输出大小。

正如您所看到的，我们不再在每个像素上迭代两个重量集合。尽管必须两次遍历外部像素循环，但算法的操作速度要快得多，在我的测试图像上平均快了约25％。

// Interpolate the image using the calculated weights.
// First process the columns.
Parallel.For(
    0,
    sourceBottom,
    y =>
    {
        for (int x = startX; x < endX; x++)
        {
            Weight[] horizontalValues = this.HorizontalWeights[x].Values;

            // Destination color components
            Color destination = new Color();

            foreach (Weight xw in horizontalValues)
            {
                int originX = xw.Index;
                Color sourceColor = Color.Expand(source[originX, y]);
                destination += sourceColor * xw.Value;
            }

            destination = Color.Compress(destination);
            this.firstPass[x, y] = destination;
        }
    });

// Now process the rows.
Parallel.For(
    startY,
    endY,
    y =>
    {
        if (y >= targetY && y < targetBottom)
        {
            Weight[] verticalValues = this.VerticalWeights[y].Values;

            for (int x = startX; x < endX; x++)
            {
                // Destination color components
                Color destination = new Color();

                foreach (Weight yw in verticalValues)
                {
                    int originY = yw.Index;
                    int originX = x;
                    Color sourceColor = Color.Expand(this.firstPass[originX, originY]);
                    destination += sourceColor * yw.Value;
                }

                destination = Color.Compress(destination);
                target[x, y] = destination;
            }
        }
    });

- James South

大的速度提升是当您专门读取连续的内存区域时。为了做到这一点，您必须将矩阵转置两次 - 一次在缩放X之后，然后在缩放Y之后。好处是您可以在写操作期间进行转置，如果该内存区域没有读取依赖关系，则CPU可以异步执行。 - Lilith River

这将简化您的代码 - 您现在只需要知道如何在一个方向上进行缩放，并且每次写入时始终进行转置。您调用Scale1D两次，每次传递不同的宽度/高度和加权结构。现在我只在C中测试过这个，所以数组边界检查可能会减慢速度，但我认为底层的缓存一致性优势也应该在CLR JIT中起作用。仅使用写入时转置，连续读取的方法似乎提供了接近一个数量级的性能增益。 - Lilith River

网页内容由stack overflow 提供, 点击上面的

可以查看英文原文，
原文链接

- Micromega · Accepted Answer

你可以尝试使用加权的沃罗诺伊图。随机选择一组点并计算沃罗诺伊图。使用劳埃德算法平滑多边形，并插值多边形的颜色。使用权重计算加权沃罗诺伊图。例如，沃罗诺伊点阵和马赛克: http://www.mrl.nyu.edu/~ajsecord/stipples.html 和 http://www.evilmadscientist.com/2012/stipplegen-weighted-voronoi-stippling-and-tsp-paths-in-processing/。