FFmpeg - 音频编码产生额外噪音

Question

FFmpeg - 音频编码产生额外噪音

3

我正在尝试使用FFmpeg将一个视频（在这种情况下是MP4）复制为另一个MP4。这样做是为了让我掌握解码 / 编码视频并继续处理该过程中的其他事物。我的代码基本上需要获取一个视频文件，解码视频和音频流，并将视频和音频流编码到输出视频文件中。

截至目前，我的代码仅适用于输入文件的视频流。输出文件的视频部分与输入文件的视频部分完全相同。然而，音频部分并不是。输出的音频部分包含原始音频，但有噪声。可以想象成有人在麦克风里大喊或者当音频太响时扬声器无法处理。

我处理视频和音频流的解码/编码过程方式相同，只是AVCodecContext设置有所不同（视频->帧速率、宽度、高度等；音频->采样率、通道等）。

这是我目前正在使用的代码：

视频结构体：

typedef struct Video {
    AVFormatContext* inputContext;
    AVFormatContext* outputContext;
    AVCodec* videoCodec;
    AVCodec* audioCodec;
    AVStream* inputStream;
    AVStream* outputStream;
    AVCodecContext* videoCodecContext_I; // Input
    AVCodecContext* audioCodecContext_I; // Input
    AVCodecContext* videoCodecContext_O; // Output
    AVCodecContext* audioCodecContext_O; // Output
    int videoStream; // Video stream index
    int audioStream; // Audio stream index
} Video;

处理编码/解码的主要代码（我仅包含音频部分，因为视频部分相同）：

int openVideo(Video* video, char* filename, char* outputFile) {
    video->inputContext = avformat_alloc_context();
    if (!video->inputContext) {
        printf("[ERROR] Failed to allocate input format context\n");
        return -1;
    }
    if (avformat_open_input(&(video->inputContext), filename, NULL, NULL) < 0) {
        printf("[ERROR] Could not open the input file\n");
        return -1;
    }

    if (avformat_find_stream_info(video->inputContext, NULL) < 0) {
        printf("[ERROR] Failed to retrieve input stream info\n");
        return -1;
    }
    avformat_alloc_output_context2(&(video->outputContext), NULL, NULL, outputFile);
    if (!video->outputContext) {
        printf("[ERROR] Failed to create output context\n");
        return -1;
    }
    printf("[OPEN] Video %s opened\n", filename);
    return 0;
}

int prepareStreamInfo(AVCodecContext** codecContext, AVCodec** codec, AVStream* stream) {
    *codec = avcodec_find_decoder(stream->codecpar->codec_id);
    if (!*codec) {
        printf("[ERROR] Failed to find input codec\n");
        return -1;
    }
    *codecContext = avcodec_alloc_context3(*codec);
    if (!codecContext) {
        printf("[ERROR] Failed to allocate memory for input codec context\n");
        return -1;
    }
    if (avcodec_parameters_to_context(*codecContext, stream->codecpar) < 0) {
        printf("[ERROR] Failed to fill input codec context\n");
        return -1;
    }
    if (avcodec_open2(*codecContext, *codec, NULL) < 0) {
        printf("[ERROR] Failed to open input codec\n");
        return -1;
    }
    return 0;
}

int findStreams(Video* video, char* filename, char* outputFile) {
    if (openVideo(video, filename, outputFile) < 0) {
        printf("[ERROR] Video %s failed to open\n", filename);
        return -1;
    }
    for (int i = 0; i < video->inputContext->nb_streams; i++) {
        video->inputStream = video->inputContext->streams[i];
        if (video->inputContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
            video->videoStream = i;
            if (prepareStreamInfo(&(video->videoCodecContext_I), &(video->videoCodec), video->inputStream) < 0) {
                printf("[ERROR] Could not prepare video stream information\n");
                return -1;video->outputStream->time_base = video->audioCodecContext_O->time_base;
            }
        } else if (video->inputContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
            video->audioStream = i;
            if (prepareStreamInfo(&(video->audioCodecContext_I), &(video->audioCodec), video->inputStream) < 0) {
                printf("[ERROR] Could not prepare audio stream information\n");
                return -1;
            }
        }
        video->outputStream = avformat_new_stream(video->outputContext, NULL);
        if (!video->outputStream) {
            printf("[ERROR] Failed allocating output stream\n");
            return -1;
        }
        if (avcodec_parameters_copy(video->outputStream->codecpar, video->inputStream->codecpar) < 0) {
            printf("[ERROR] Failed to copy codec parameters\n");
            return -1;
        }
    }
    if (video->videoStream == -1) {
        printf("[ERROR] Video stream for %s not found\n", filename);
        return -1;
    }
    if (video->audioStream == -1) {
        printf("[ERROR] Audio stream for %s not found\n", filename);
        return -1;
    }
    if (!(video->outputContext->oformat->flags & AVFMT_NOFILE)) {
    if (avio_open(&(video->outputContext->pb), outputFile, AVIO_FLAG_WRITE) < 0) {
      printf("Could not open output file %s", outputFile);
      return -1;
    }
  }
    return 0;
}

int prepareAudioOutStream(Video* video) {
    video->audioCodec = avcodec_find_encoder_by_name("mp2");
    if (!video->audioCodec) {
        printf("[ERROR] Failed to find audio output codec\n");
        return -1;
    }
    video->audioCodecContext_O = avcodec_alloc_context3(video->audioCodec);
    if (!video->audioCodecContext_O) {
        printf("[ERROR] Failed to allocate memory for audio output codec context\n");
        return -1;
    }
    // Quite possibly the issue
    video->audioCodecContext_O->channels = video->audioCodecContext_I->channels;
    video->audioCodecContext_O->channel_layout = av_get_default_channel_layout(video->audioCodecContext_O->channels);
    video->audioCodecContext_O->sample_rate = video->audioCodecContext_I->sample_rate;
    video->audioCodecContext_O->sample_fmt = video->audioCodec->sample_fmts[0];
    video->audioCodecContext_O->bit_rate = video->audioCodecContext_I->bit_rate;
    video->audioCodecContext_O->time_base = video->audioCodecContext_I->time_base;
    video->audioCodecContext_O->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
    if (avcodec_open2(video->audioCodecContext_O, video->audioCodec, NULL) < 0) {
        printf("[ERROR] Failed to open audio output codec\n");
        return -1;
    }
    if (avcodec_parameters_from_context(getAudioStream(video)->codecpar, video->audioCodecContext_O) < 0) {
        printf("[ERROR] Failed to fill audio stream\n");
        return -1;
    }
    return 0;
}

int decodeAudio(Video* video, AVPacket* packet, AVFrame* frame) {
    int response = avcodec_send_packet(video->audioCodecContext_I, packet);
    if (response < 0) {
        printf("[ERROR] Failed to send audio packet to decoder\n");
        return response;
    }
    while (response >= 0) {
        response = avcodec_receive_frame(video->audioCodecContext_I, frame);
        if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {
            break;
        } else if (response < 0) {
            printf("[ERROR] Failed to receive audio frame from decoder\n");
            return response;
        }
        if (response >= 0) {
            // Do stuff and encode
            if (encodeAudio(video, frame) < 0) {
                printf("[ERROR] Failed to encode new audio\n");
                return -1;
            }
        }
        av_frame_unref(frame);
    }
    return 0;
}

int encodeAudio(Video* video, AVFrame* frame) {
    AVPacket* packet = av_packet_alloc();
    if (!packet) {
        printf("[ERROR] Could not allocate memory for audio output packet\n");
        return -1;
    }
    int response = avcodec_send_frame(video->audioCodecContext_O, frame);
    if (response < 0) {
        printf("[ERROR] Failed to send audio frame for encoding\n");
        return response;
    }
    while (response >= 0) {
        response = avcodec_receive_packet(video->audioCodecContext_O, packet);
        if (response == AVERROR(EAGAIN) || response == AVERROR_EOF) {
            break;
        } else if (response < 0) {
            printf("[ERROR] Failed to receive audio packet from encoder\n");
            return response;
        }
        packet->stream_index = video->audioStream;
        video->inputStream = getAudioStream(video);
        video->outputStream = video->outputContext->streams[packet->stream_index];
        packet->pts = av_rescale_q_rnd(packet->pts, video->inputStream->time_base, video->outputStream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
        packet->dts = av_rescale_q_rnd(packet->dts, video->inputStream->time_base, video->outputStream->time_base, AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX);
        packet->duration = av_rescale_q(packet->duration, video->inputStream->time_base, video->outputStream->time_base);
        packet->pos = -1;
        //av_packet_rescale_ts(packet, video->inputStream->time_base, video->outputStream->time_base);

        response = av_interleaved_write_frame(video->outputContext, packet);
        if (response < 0) {
            printf("[ERROR] Failed to write audio packet\n");
            break;
        }
    }
    av_packet_unref(packet);
    av_packet_free(&packet);
    return 0;
}

int readFrames(Video* video, AVPacket* packet, AVFrame* frame) {
    if (!packet) {
        printf("[ERROR] Packet not allocated to be read\n");
        return -1;
    }
    if (!frame) {
        printf("[ERROR] Frame not allocated to be read\n");
        return -1;
    }
    if (prepareVideoOutStream(video) < 0) {
        printf("[ERROR] Failed to prepare output video stream\n");
        return -1;
    }
    if (prepareAudioOutStream(video) < 0) {
        printf("[ERROR] Failed to prepare output audio stream\n");
        return -1;
    }
    int frameNum = 0;
    while (av_read_frame(video->inputContext, packet) >= 0) {
        printf("[READ] Reading frame %i\n", frameNum);
        if (packet->stream_index == video->videoStream) {
            if (decodeVideo(video, packet, frame) < 0) {
                printf("[ERROR] Failed to decode and encode video\n");
                return -1;
            }
        } else if (packet->stream_index == video->audioStream) {
            if (decodeAudio(video, packet, frame) < 0) {
                printf("[ERROR] Failed to decode and encode audio\n");
                return -1;
            }
        }
        av_packet_unref(packet);
        frameNum++;
    }
    // Flush encoder
    encodeVideo(video, NULL);
    encodeAudio(video, NULL);
    av_write_trailer(video->outputContext);
    return 0;
}

运行所有函数的主方法：

int main(int argc, char* argv[]) {
    Video* video = (Video*)malloc(sizeof(Video));
    initVideo(video);
    if (findStreams(video, argv[1], argv[2]) < 0) {
        printf("[ERROR] Could not find streams\n");
        return -1;
    }

    AVDictionary* dic = NULL;
    if (avformat_write_header(video->outputContext, &dic) < 0) {
        printf("[ERROR] Error while writing header to output file\n");
        return -1;
    }
    AVFrame* frame = av_frame_alloc();
    AVPacket* packet = av_packet_alloc();
    if (readFrames(video, packet, frame) < 0) {
        printf("[ERROR] Failed to read and write new video\n");
        return -1;
    }
    freeVideo(video); // Frees all codecs and contexts and the video
    return 0;
}

我尝试将我的代码布局排列得整齐，这样可以从上到下阅读而不需要向上滚动。

我意识到在复制视频时，我只需将AVPacket传递给输出文件的写操作即可，但我想将来能够使用AVFrame，所以我写成了这种方式。我有一种感觉，我的音频行为问题是由于prepareAudioOutStream()函数中的音频输出AVCodecContext引起的。

阅读FFmpeg文档以及其他在线资源对此问题并没有什么帮助。我肯定是漏掉了某些东西（或者有一些不必要的东西），所以任何指向正确方向的信息都将很有帮助。

谢谢。

- user3208915

2个回答

2

感谢Alexis（上文提到的人），我能够找出问题所在。我意识到需要重新采样音频，以便在编码时匹配。

我基本上需要创建一个SwrContext并使用swr_init和swr_convert_frame函数对音频进行重新采样。这里我参考了一些代码：https://ffmpeg.org/doxygen/trunk/transcode_aac_8c-example.html 另外，请确保为重新采样的帧设置channel_layout、sample_rate和format；否则，您将无法转换该帧。

- user3208915

网页内容由stack overflow 提供, 点击上面的

可以查看英文原文，
原文链接

- Alexis Nealon · Accepted Answer

我是一名音频工程师，不是一名程序员，但我希望这可能有所帮助。可能发生的情况是你的位深度被截断了；例如24位音频被截断为16位，这会听起来失真和嘈杂。最重要的每一个被截去的比特会削减6dB的余量。这会增加噪音，将一个响亮而清晰的正弦波逐渐变成扭曲的方波，因为显著比特的减少增加了。

在重新编码过程中检查位深度选项。可能是你的编码器有位深度限制。检查源位深度和重新编码的位深度，看看有什么区别。您可以使用VLC媒体播放器进行此操作。

还建议在编码之前留出一些余量（至少0.1 dB）。预编码音频可能已经达到最大值，因此重新编码可能会添加一些轻微的失真。

更多信息请参见：

通过截断减少采样位深度

https://www.apple.com/itunes/docs/apple-digital-masters.pdf