

Profiler说这个函数内部的总时间有50%。你如何进行优化? 它将BMP颜色方案转换为YUV。谢谢!


#define Y_FROM_RGB(_r_,_g_,_b_) ( (  66 * _b_ + 129 * _g_ +  25 * _r_ + 128) >> 8) + 16
#define V_FROM_RGB(_r_,_g_,_b_) ( ( 112 * _b_ -  94 * _g_ -  18 * _r_ + 128) >> 10) + 128
#define U_FROM_RGB(_r_,_g_,_b_) ( ( -38 * _b_ -  74 * _g_ + 112 * _r_ + 128) >> 10) + 128

 * \brief
 * Converts 24 bit image to YCrCb image channels
 * \param source
 * Source 24bit image pointer
 * \param source_width
 * Source image width
 * \param dest_Y
 * destination image Y component pointer
 * \param dest_scan_size_Y
 * destination image Y component line size
 * \param dest_U
 * destination image U component pointer
 * \param dest_scan_size_U
 * destination image U component line size
 * \param dest_V
 * destination image V component pointer
 * \param dest_scan_size_V
 * destination image V component line size
 * \param dest_width
 * Destination image width = source_width
 * \param dest_height
 * Destination image height = source image height
 * Convert 24 bit image (source) with width (source_width)
 * to YCrCb image channels (dest_Y, dest_U, dest_V) with size (dest_width)x(dest_height), and line size
 * (dest_scan_size_Y, dest_scan_size_U, dest_scan_size_V) (in bytes)
void ImageConvert_24_YUV420P(unsigned char * source, int source_width,
                            unsigned char * dest_Y, int dest_scan_size_Y,
                            unsigned char * dest_U, int dest_scan_size_U,
                            unsigned char * dest_V, int dest_scan_size_V,
                            int dest_width, int dest_height)
  int source_scan_size = source_width*3;

  int half_width = dest_width/2;

  //Y loop
  for (int y = 0; y < dest_height/2; y ++)
    //Start of line
    unsigned char * source_scan = source;
    unsigned char * source_scan_next = source+source_scan_size;
    unsigned char * dest_scan_Y = dest_Y;
    unsigned char * dest_scan_U = dest_U;
    unsigned char * dest_scan_V = dest_V;

    //Do all pixels
    for (int x = 0; x < half_width; x++)
      int R = source_scan[0];
      int G = source_scan[1];
      int B = source_scan[2];

      int Y = Y_FROM_RGB(B, G, R);

      *dest_scan_Y = Y;
      source_scan += 3;
      dest_scan_Y += 1;

      int R1 = source_scan[0];
      int G1 = source_scan[1];
      int B1 = source_scan[2];

      Y = Y_FROM_RGB(B1, G1, R1);

      R += (R1 + source_scan_next[0] + source_scan_next[3]);
      G += (G1 + source_scan_next[1] + source_scan_next[4]);
      B += (B1 + source_scan_next[2] + source_scan_next[5]);

      *dest_scan_Y = Y;
      *dest_scan_V = V_FROM_RGB(B, G, R);
      *dest_scan_U = U_FROM_RGB(B, G, R);

      source_scan += 3;
      dest_scan_Y += 1;
      dest_scan_U += 1;
      dest_scan_V += 1;
      source_scan_next += 6;

    //scroll to next line
    source += source_scan_size;
    dest_Y += dest_scan_size_Y;
    dest_U += dest_scan_size_U;
    dest_V += dest_scan_size_V;

    //Start of line
    source_scan = source;
    dest_scan_Y = dest_Y;

    //Do all pixels
    for (int x = 0; x < half_width; x ++)
      int R = source_scan[0];
      int G = source_scan[1];
      int B = source_scan[2];

      int Y = Y_FROM_RGB(B, G, R);

      *dest_scan_Y = Y;
      source_scan += 3;
      dest_scan_Y += 1;

      R = source_scan[0];
      G = source_scan[1];
      B = source_scan[2];

      Y = Y_FROM_RGB(B, G, R);
      *dest_scan_Y = Y;
      source_scan += 3;
      dest_scan_Y += 1;

    source += source_scan_size;
    dest_Y += dest_scan_size_Y;

这里是否存在内存问题?如果没有,数据能否用字长大小(整数)代替字节大小? - Simon
这里内存不是问题。数据可以用整数表示。 - artur_i_am

for (int x = 0; x < half_width; x ++) 
  int R = source_scan[0]; 
  int G = source_scan[1]; 
  int B = source_scan[2]; 

  int Y = Y_FROM_RGB(B, G, R); 

  *dest_scan_Y = Y; 
  source_scan += 3; 
  dest_scan_Y += 1; 

  R = source_scan[0]; 
  G = source_scan[1]; 
  B = source_scan[2]; 





Arm Cotext-A8拥有Neon技术,它支持SIMD。你应该能在ARM网站上找到更多信息。



在我使用__restrict限定符声明变量后,代码变得更慢了 :) - artur_i_am
@artur_i_am:听起来你需要向编译器供应商报告一个 bug! - caf

网页内容由stack overflow 提供, 点击上面的