首先,我想为您提供一些背景信息。
我有两种图像需要合并。第一张图片是背景图像,格式为8BppGrey,分辨率为320x240。第二张图片是前景图像,格式为32BppRGBA,分辨率为64x48。
更新 MVP的github存储库在问题底部。
为了实现这一点,我使用双线性插值将第二个图像调整大小到与第一个图像相同的大小,然后使用混合将两个图像合并成一个图像。只有当第二个图像的alpha值大于0时才会进行混合。
我需要尽快完成它,所以我的想法是将调整大小和合并/混合过程结合起来。
为了实现这一点,我使用writeablebitmapex repository中的调整大小函数,并添加了合并/混合功能。
一切都按预期工作,但我想减少执行时间。
这是当前的调试时间:
// CPU: Intel(R) Core(TM) i7-4810MQ CPU @ 2.80GHz
MediaServer: Execution time in c++ 5 ms
MediaServer: Resizing took 4 ms.
MediaServer: Execution time in c++ 5 ms
MediaServer: Resizing took 5 ms.
MediaServer: Execution time in c++ 4 ms
MediaServer: Resizing took 4 ms.
MediaServer: Execution time in c++ 3 ms
MediaServer: Resizing took 3 ms.
MediaServer: Execution time in c++ 4 ms
MediaServer: Resizing took 4 ms.
MediaServer: Execution time in c++ 5 ms
MediaServer: Resizing took 4 ms.
MediaServer: Execution time in c++ 6 ms
MediaServer: Resizing took 6 ms.
MediaServer: Execution time in c++ 3 ms
MediaServer: Resizing took 3 ms.
我有没有机会增加调整大小/合并/混合过程的性能并降低执行时间?
是否有一些部分可以并行化处理?
我是否有机会使用一些处理器功能?
嵌套循环是一个巨大的性能损失,但我不知道如何编写更好的代码。
我希望整个过程达到1或2毫秒。这可行吗?
下面是我使用的修改后的Visual C++函数:
- pd是我用于在WPF中显示结果的可写位图的后备缓冲区。我使用的格式是默认的32BppRGBA。
- pixels是64x48 32BppRGBA图像的int[]数组
- widthSource和heightSource是像素图像的大小
- 宽度和高度是输出图像的目标大小
- baseImage是320x240 8BppGray图像的int[]数组
VC++代码:
unsigned int Resize(int* pd, int* pixels, int widthSource, int heightSource, int width, int height, byte* baseImage)
{
unsigned int start = clock();
float xs = (float)widthSource / width;
float ys = (float)heightSource / height;
float fracx, fracy, ifracx, ifracy, sx, sy, l0, l1, rf, gf, bf;
int c, x0, x1, y0, y1;
byte c1a, c1r, c1g, c1b, c2a, c2r, c2g, c2b, c3a, c3r, c3g, c3b, c4a, c4r, c4g, c4b;
byte a, r, g, b;
// Bilinear
int srcIdx = 0;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
sx = x * xs;
sy = y * ys;
x0 = (int)sx;
y0 = (int)sy;
// Calculate coordinates of the 4 interpolation points
fracx = sx - x0;
fracy = sy - y0;
ifracx = 1.0f - fracx;
ifracy = 1.0f - fracy;
x1 = x0 + 1;
if (x1 >= widthSource)
{
x1 = x0;
}
y1 = y0 + 1;
if (y1 >= heightSource)
{
y1 = y0;
}
// Read source color
c = pixels[y0 * widthSource + x0];
c1a = (byte)(c >> 24);
c1r = (byte)(c >> 16);
c1g = (byte)(c >> 8);
c1b = (byte)(c);
c = pixels[y0 * widthSource + x1];
c2a = (byte)(c >> 24);
c2r = (byte)(c >> 16);
c2g = (byte)(c >> 8);
c2b = (byte)(c);
c = pixels[y1 * widthSource + x0];
c3a = (byte)(c >> 24);
c3r = (byte)(c >> 16);
c3g = (byte)(c >> 8);
c3b = (byte)(c);
c = pixels[y1 * widthSource + x1];
c4a = (byte)(c >> 24);
c4r = (byte)(c >> 16);
c4g = (byte)(c >> 8);
c4b = (byte)(c);
// Calculate colors
// Alpha
l0 = ifracx * c1a + fracx * c2a;
l1 = ifracx * c3a + fracx * c4a;
a = (byte)(ifracy * l0 + fracy * l1);
// Write destination
if (a > 0)
{
// Red
l0 = ifracx * c1r + fracx * c2r;
l1 = ifracx * c3r + fracx * c4r;
rf = ifracy * l0 + fracy * l1;
// Green
l0 = ifracx * c1g + fracx * c2g;
l1 = ifracx * c3g + fracx * c4g;
gf = ifracy * l0 + fracy * l1;
// Blue
l0 = ifracx * c1b + fracx * c2b;
l1 = ifracx * c3b + fracx * c4b;
bf = ifracy * l0 + fracy * l1;
// Cast to byte
float alpha = a / 255.0f;
r = (byte)((rf * alpha) + (baseImage[srcIdx] * (1.0f - alpha)));
g = (byte)((gf * alpha) + (baseImage[srcIdx] * (1.0f - alpha)));
b = (byte)((bf * alpha) + (baseImage[srcIdx] * (1.0f - alpha)));
pd[srcIdx++] = (255 << 24) | (r << 16) | (g << 8) | b;
}
else
{
// Alpha, Red, Green, Blue
pd[srcIdx++] = (255 << 24) | (baseImage[srcIdx] << 16) | (baseImage[srcIdx] << 8) | baseImage[srcIdx];
}
}
}
unsigned int end = clock() - start;
return end;
}
{{链接1:Github仓库}}