Libav AVFrame转换为Opencv Mat再转换为AVPacket

8
我刚接触libav,正在编写一个使用opencv作为核心的视频处理软件。我的步骤如下:
1. 读取视频数据包 2. 将数据包解码为AVFrame 3. 将AVFrame转换为CV Mat 4. 处理Mat 5. 将CV Mat转换为AVFrame 6. 将AVFrame编码为AVPacket 7. 写入数据包 8. 返回第一步
我阅读了http://dranger.com/ffmpeg/tutorial01.html中的dranger教程,并使用了解码编码示例。我可以读取视频,提取视频帧并将其转换为CV Mat。我的问题是从cv Mat转换为AVFrame并将其编码为AVPacket。
请帮我看看这段代码:
int main(int argc, char **argv)
{
AVOutputFormat *ofmt = NULL;
AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx = NULL;
AVPacket pkt;
AVCodecContext    *pCodecCtx = NULL;
AVCodec           *pCodec = NULL;
AVFrame           *pFrame = NULL;
AVFrame           *pFrameRGB = NULL;
int videoStream=-1;
int audioStream=-1;
int               frameFinished;
int               numBytes;
uint8_t           *buffer = NULL;
struct SwsContext *sws_ctx = NULL;
FrameManipulation *mal_frame;

const char *in_filename, *out_filename;
int ret, i;
if (argc < 3) {

    printf("usage: %s input output\n"
           "API example program to remux a media file with libavformat and libavcodec.\n"
           "The output format is guessed according to the file extension.\n"
           "\n", argv[0]);
    return 1;
}
in_filename  = arg[1];
out_filename = arg[2];
av_register_all();
if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
    fprintf(stderr, "Could not open input file '%s'", in_filename);
    goto end;
}

if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
    fprintf(stderr, "Failed to retrieve input stream information");
    goto end;
}

av_dump_format(ifmt_ctx, 0, in_filename, 0);
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, out_filename);

if (!ofmt_ctx) {
    fprintf(stderr, "Could not create output context\n");
    ret = AVERROR_UNKNOWN;
    goto end;
}

ofmt = ofmt_ctx->oformat;

for (i = 0; i < ifmt_ctx->nb_streams; i++) {
    AVStream *in_stream = ifmt_ctx->streams[i];
    AVStream *out_stream = avformat_new_stream(ofmt_ctx, in_stream->codec->codec);

    if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO &&
       videoStream < 0) {
           videoStream=i;
    }

    if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO &&
       audioStream < 0) {
            audioStream=i;
    }

    if (!out_stream) {
        fprintf(stderr, "Failed allocating output stream\n");
        ret = AVERROR_UNKNOWN;
        goto end;
    }

    ret = avcodec_copy_context(out_stream->codec, in_stream->codec);

    if (ret < 0) {
        fprintf(stderr, "Failed to copy context from input to output stream codec context\n");
        goto end;
    }

    out_stream->codec->codec_tag = 0;

    if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
       out_stream->codec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}

pCodec=avcodec_find_decoder(ifmt_ctx->streams[videoStream]->codec->codec_id);
pCodecCtx = avcodec_alloc_context3(pCodec);

if(avcodec_copy_context(pCodecCtx, ifmt_ctx->streams[videoStream]->codec) != 0) {
  fprintf(stderr, "Couldn't copy codec context");
  return -1; // Error copying codec context
}

// Open codec
 if(avcodec_open2(pCodecCtx, pCodec, NULL)<0)
   return -1; // Could not open codec

// Allocate video frame
 pFrame=av_frame_alloc();

 // Allocate an AVFrame structure
 pFrameRGB=av_frame_alloc();

 // Determine required buffer size and allocate buffer
 numBytes=avpicture_get_size(AV_PIX_FMT_RGB24, ifmt_ctx->streams[videoStream]->codec->width,
                 ifmt_ctx->streams[videoStream]->codec->height);

 buffer=(uint8_t *)av_malloc(numBytes*sizeof(uint8_t));

 // Assign appropriate parts of buffer to image planes in pFrameRGB
 // Note that pFrameRGB is an AVFrame, but AVFrame is a superset
 // of AVPicture
 avpicture_fill((AVPicture *)pFrameRGB, buffer, AV_PIX_FMT_BGR24,
        ifmt_ctx->streams[videoStream]->codec->width, ifmt_ctx->streams[videoStream]->codec->height);

 av_dump_format(ofmt_ctx, 0, out_filename, 1);

 if (!(ofmt->flags & AVFMT_NOFILE)) {
    ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
    if (ret < 0) {
        fprintf(stderr, "Could not open output file '%s'", out_filename);
        goto end;
    }
}

ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0) {
    fprintf(stderr, "Error occurred when opening output file\n");
    goto end;
}

// Assign appropriate parts of buffer to image planes in pFrameRGB
// Note that pFrameRGB is an AVFrame, but AVFrame is a superset
// of AVPicture

avpicture_fill((AVPicture *)pFrameRGB, buffer, AV_PIX_FMT_BGR24,
                   ifmt_ctx->streams[videoStream]->codec->width,
                   ifmt_ctx->streams[videoStream]->codec->height);

// initialize SWS context for software scaling
sws_ctx = sws_getContext(
             ifmt_ctx->streams[videoStream]->codec->width,
             ifmt_ctx->streams[videoStream]->codec->height,
             ifmt_ctx->streams[videoStream]->codec->pix_fmt,
             ifmt_ctx->streams[videoStream]->codec->width,
             ifmt_ctx->streams[videoStream]->codec->height,
             AV_PIX_FMT_BGR24,
             SWS_BICUBIC,
             NULL,
             NULL,
             NULL
             );
// Loop through packets
while (1) {

    AVStream *in_stream, *out_stream;
    ret = av_read_frame(ifmt_ctx, &pkt);
    if(pkt.stream_index==videoStream)

     // Decode video frame
      avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &pkt);

      if(frameFinished) {
                sws_scale(sws_ctx, (uint8_t const * const *)pFrame->data,
                pFrame->linesize, 0, pCodecCtx->height,
                pFrameRGB->data, pFrameRGB->linesize);
                cv::Mat img= mal_frame->process(
                          pFrameRGB,pFrame->width,pFrame->height);
/* My problem is Here ------------*/
    

    avpicture_fill((AVPicture*)pFrameRGB, 
                     img.data, 
                     PIX_FMT_BGR24, 
                     outStream->codec->width, 
                     outStream->codec->height);
    
    pFrameRGB->width =  ifmt_ctx->streams[videoStream]->codec->width;
    pFrameRGB->height = ifmt_ctx->streams[videoStream]->codec->height;
    
            avcodec_encode_video2(ifmt_ctx->streams[videoStream]->codec , 
                                                     &pkt , pFrameRGB , &gotPacket);
/*
I get this error
[swscaler @ 0x14b58a0] bad src image pointers
[swscaler @ 0x14b58a0] bad src image pointers
*/

/* My Problem Ends here ---------- */
               
    }

    if (ret < 0)

        break;

    in_stream  = ifmt_ctx->streams[pkt.stream_index];

    out_stream = ofmt_ctx->streams[pkt.stream_index];



    //log_packet(ifmt_ctx, &pkt, "in");

    /* copy packet */

    pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base,

                               AV_ROUND_NEAR_INF);



    pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, AV_ROUND_NEAR_INF);

    pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);

    pkt.pos = -1;

    log_packet(ofmt_ctx, &pkt, "out");

    ret = av_interleaved_write_frame(ofmt_ctx, &pkt);

    if (ret < 0) {

        fprintf(stderr, "Error muxing packet\n");

        break;

    }

    av_free_packet(&pkt);

}

av_write_trailer(ofmt_ctx);

end:

avformat_close_input(&ifmt_ctx);

/* close output */

if (ofmt_ctx && !(ofmt->flags & AVFMT_NOFILE))

    avio_closep(&ofmt_ctx->pb);

avformat_free_context(ofmt_ctx);

if (ret < 0 && ret != AVERROR_EOF) {

    return 1;

}

return 0;

}

当我运行这段代码时,在这部分出现未知的致命错误:
   /* My problem is Here ------------*/
    

    avpicture_fill((AVPicture*)pFrameRGB, 
                     img.data, 
                     PIX_FMT_BGR24, 
                     outStream->codec->width, 
                     outStream->codec->height);
    
    pFrameRGB->width =  ifmt_ctx->streams[videoStream]->codec->width;
    pFrameRGB->height = ifmt_ctx->streams[videoStream]->codec->height;
    
            avcodec_encode_video2(ifmt_ctx->streams[videoStream]->codec , 
                                                     &pkt , pFrameRGB , &gotPacket);
/*
I get this error
[swscaler @ 0x14b58a0] bad src image pointers
[swscaler @ 0x14b58a0] bad src image pointers
*/

/* My Problem Ends here ---------- */

我希望能将cv Mat转换回AVFrame并将其编码为AVPacket。非常感谢您的帮助。


当OpenCV支持libav作为解码/编码后端时,为什么要手动操作呢? - aram
1
@Aram opencv的VideoWriter在编码视频时不包括音频流。我不想为我的视频处理添加额外的阶段。 - Davood Falahati
你在使用哪个版本的ffmpeg/libav?还有opencv版本是多少?那个“FrameManipulation”结构体是什么?你必须提供一个可验证的示例供我们运行。我注意到你正在使用“remuxing”示例,而不是“decoding_encoding”,并且非常陈旧,很多API已被弃用。 - halfelf
我使用ffmpeg 2.8和opencv 3.2。FrameManipulation减去一些对象,它不会改变帧的颜色空间和大小。将其视为imblur。@halfelf - Davood Falahati
2个回答

5
阅读了一些示例,阅读了源代码和一些人提供的帮助后,我成功使代码运行。我使用了转码和编码示例并将它们混合在一起。 这是我的代码
以下是要点: 1- 应该使用libswscale将AVFrame转换为所需的数据包格式,以便将其馈送到openCV Mat中。为此,我们定义
struct SwsContext *sws_ctx = NULL;
sws_ctx = sws_getContext(pCodecCtx->width,
             pCodecCtx->height,
             pCodecCtx->pix_fmt,
             pCodecCtx->width,
             pCodecCtx->height,
             AV_PIX_FMT_BGR24,
             SWS_BICUBIC,
             NULL,
             NULL,
             NULL
             );

要将OpenCV的Mat格式转换回AVFrame格式,需要使用swscale并将OpenCV的BGR帧格式转换为YUV。因此,我这样做:

 struct SwsContext *sws_ctx_bgr_yuv = NULL;
 sws_ctx_bgr_yuv = sws_getContext(pCodecCtx->width,
                                 pCodecCtx->height,
                                 AV_PIX_FMT_BGR24,
                                 pCodecCtx->width,
                                 pCodecCtx->height,
                                 pCodecCtx->pix_fmt //AV_PIX_FMT_YUV420p
                                 ,0,0,NULL,NULL);

这里是帧的读取/解码/编码循环:

while (1) {
    if ((ret = av_read_frame(ifmt_ctx, &packet)) < 0)
        break;
    stream_index = packet.stream_index;
    type = ifmt_ctx->streams[packet.stream_index]->codec->codec_type;
    av_log(NULL, AV_LOG_DEBUG, "Demuxer gave frame of stream_index %u\n",
            stream_index);
    if (filter_ctx[stream_index].filter_graph) {
        av_log(NULL, AV_LOG_DEBUG, "Going to reencode&filter the frame\n");
        frame = av_frame_alloc();
        if (!frame) {
            ret = AVERROR(ENOMEM);
            break;
        }
        av_packet_rescale_ts(&packet,
                             ifmt_ctx->streams[stream_index]->time_base,
                             ifmt_ctx->streams[stream_index]->codec->time_base);
        dec_func = (type == AVMEDIA_TYPE_VIDEO) ? avcodec_decode_video2 :
            avcodec_decode_audio4;
        ret = dec_func(ifmt_ctx->streams[stream_index]->codec, frame,
                &got_frame, &packet);
        if (ret < 0) {
            av_frame_free(&frame);
            av_log(NULL, AV_LOG_ERROR, "Decoding failed\n");
            break;
        }
        if (got_frame) {
            if(stream_index==video_index){

                sws_scale(sws_ctx, (uint8_t const * const *)frame->data,
                      frame->linesize, 0, pCodecCtx->height,
                      pFrameRGB->data, pFrameRGB->linesize);
    /*------------------------------------------------------------------------
    /* Frame converts to opencv Mat
    /*------------------------------------------------------------------------*/
                cv::Mat img(frame->height,frame->width,CV_8UC3,pFrameRGB->data[0]);
                img=manipulate_image(img); //this is opencv Mat, do whatever you want, but don't change its dimensions and format
    //manipulate_function can be considered as as simple as blurring
                const int stride[] = {img.step[0] };
    /* opencv Mat converts back to AVFrame         */
                sws_scale(sws_ctx_bgr_yuv, &img.data, stride, 0, img.rows, frame->data, frame->linesize);

            }
            frame->pts = av_frame_get_best_effort_timestamp(frame);
    /* AVFrame re-encodes to AVPacket and will be sent to encoder */
            ret = filter_encode_write_frame(frame, stream_index);
            av_frame_free(&frame);

            if (ret < 0)
                goto end;
        } else {
            av_frame_free(&frame);
        }
    } else {
        /* remux this frame without reencoding */
        av_packet_rescale_ts(&packet,
                             ifmt_ctx->streams[stream_index]->time_base,
                             ofmt_ctx->streams[stream_index]->time_base);
        ret = av_interleaved_write_frame(ofmt_ctx, &packet);
        if (ret < 0)
            goto end;
    }
    av_free_packet(&packet);
}

如果可能,请将转换代码添加到您的答案中,而不仅是链接。Original English text:Python is a high-level programming language known for its simplicity, readability, and flexibility. It is widely used by developers for web development, scientific computing, data analysis, artificial intelligence, and more. Translated Chinese text:Python是一种高级编程语言,以其简单性、可读性和灵活性而闻名。它被开发者广泛用于Web开发、科学计算、数据分析、人工智能等领域。 - Micka
@Micka,你是指所有的东西吗?我的意思是不要让这个帖子变得比现在更长。 - Davood Falahati
@DavoodFalahati 相关的东西。如果是648,那就是它了。这比有人在一年后需要这个答案,点击链接然后跳转到404错误页面要好。 - Martin Dawson

3

以下是一种基于c++的方法,用于在cv::MatAVframe之间进行转换。这个方法是根据我找到并调试过的一些代码编写的。请注意,它仅适用于8位3通道图像,但可以通过在两个函数中更改AV_PIX_FMT_BGR24来更改。

希望这能帮到你。

AVFrame cvmat_to_avframe(cv::Mat* frame)
{

        AVFrame dst;
        cv::Size frameSize = frame->size();
        AVCodec *encoder = avcodec_find_encoder(AV_CODEC_ID_RAWVIDEO);
        AVFormatContext* outContainer = avformat_alloc_context();
        AVStream *outStream = avformat_new_stream(outContainer, encoder);
        avcodec_get_context_defaults3(outStream->codec, encoder);

        outStream->codec->pix_fmt = AV_PIX_FMT_BGR24;
        outStream->codec->width = frame->cols;
        outStream->codec->height = frame->rows;
        avpicture_fill((AVPicture*)&dst, frame->data, AV_PIX_FMT_BGR24, outStream->codec->width, outStream->codec->height);
        dst.width = frameSize.width;
        dst.height = frameSize.height;

        return dst;
}


cv::Mat avframe_to_cvmat(AVFrame *frame)
{
        AVFrame dst;
        cv::Mat m;

        memset(&dst, 0, sizeof(dst));

        int w = frame->width, h = frame->height;
        m = cv::Mat(h, w, CV_8UC3);
        dst.data[0] = (uint8_t *)m.data;
        avpicture_fill( (AVPicture *)&dst, dst.data[0], AV_PIX_FMT_BGR24, w, h);

        struct SwsContext *convert_ctx=NULL;
        enum AVPixelFormat src_pixfmt = AV_PIX_FMT_BGR24;
        enum AVPixelFormat dst_pixfmt = AV_PIX_FMT_BGR24;
        convert_ctx = sws_getContext(w, h, src_pixfmt, w, h, dst_pixfmt,
                        SWS_FAST_BILINEAR, NULL, NULL, NULL);

        sws_scale(convert_ctx, frame->data, frame->linesize, 0, h,
                        dst.data, dst.linesize);
        sws_freeContext(convert_ctx);

        return m;
}

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接