vs版本:vs2017
ffmpeg版本信息如下:
E:\learn\ffmpeg\ffmpeg_x264_static\bin>ffmpeg
ffmpeg version N-102642-g864d1ef2fc Copyright © 2000-2021 the FFmpeg developers
built with Microsoft ® C/C++ Optimizing Compiler Version 19.16.27045 for x64
configuration: --toolchain=msvc --arch=x86_64 --disable-debug --enable-gpl --enable-libx264 --extra-cflags=-I/usr/local/x264/include --extra-ldflags=’-LIBPATH:/usr/local/x264/lib’ --prefix=/home/ffmpeg_x264_static
libavutil 57. 0.100 / 57. 0.100
libavcodec 59. 1.100 / 59. 1.100
libavformat 59. 2.101 / 59. 2.101
libavdevice 59. 0.100 / 59. 0.100
libavfilter 8. 0.101 / 8. 0.101
libswscale 6. 0.100 / 6. 0.100
libswresample 4. 0.100 / 4. 0.100
libpostproc 56. 0.100 / 56. 0.100
现在我用ffmpeg录制了32秒的本地系统声音(音乐),采样率为48000,声道数为2,channel_layout为AV_CH_LAYOUT_STEREO,sample_fmt为AV_SAMPLE_FMT_FLTP,现在需要重采样,采样率需要变为44100,其余的保持不变。
为此写了如下代码:
#include <Windows.h>
#include <conio.h>
#ifdef __cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif
AVFormatContext *pFormatCtx_InputAudio = NULL, *pFormatCtx_OutputAudio = NULL;
AVCodecContext *pReadCodecContext = NULL;
int AudioIndex_file;
AVCodecContext *pCodecEncodeCtx_Audio = NULL;
AVCodec *pCodecEncode_Audio = NULL;
AVAudioFifo *fifo_audio_mic = NULL;
bool bCap = true;
CRITICAL_SECTION AudioSection;
DWORD WINAPI AudioFileCapThreadProc(LPVOID lpParam);
static char *dup_wchar_to_utf8(const wchar_t *w)
{
char *s = NULL;
int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
s = (char *)av_malloc(l);
if (s)
WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
return s;
}
int OpenAudioFileCapture()
{
//查找输入方式
const AVInputFormat *pAudioInputFmt = av_find_input_format("dshow");
//以Direct Show的方式打开设备,并将 输入方式 关联到格式上下文
const char * psDevName = dup_wchar_to_utf8(L"E:/learn/ffmpeg/ffmpeg_x264_static/bin/FfmpegAudioTest-48000.mp4");
if (avformat_open_input(&pFormatCtx_InputAudio, psDevName, NULL, NULL) < 0)
{
printf("Couldn't open input stream.(无法打开音频输入流)\n");
return -1;
}
if (avformat_find_stream_info(pFormatCtx_InputAudio, NULL) < 0)
{
return -1;
}
if (pFormatCtx_InputAudio->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
{
printf("Couldn't find video stream information.(无法获取音频流信息)\n");
return -1;
}
const AVCodec *tmpCodec = avcodec_find_decoder(pFormatCtx_InputAudio->streams[0]->codecpar->codec_id);
pReadCodecContext = avcodec_alloc_context3(tmpCodec);
//pReadCodecContext->sample_rate = select_sample_rate(tmpCodec);
pReadCodecContext->sample_rate = 48000;
pReadCodecContext->channel_layout = AV_CH_LAYOUT_STEREO;
pReadCodecContext->channels = av_get_channel_layout_nb_channels(pReadCodecContext->channel_layout);
pReadCodecContext->sample_fmt = (AVSampleFormat)pFormatCtx_InputAudio->streams[0]->codecpar->format;
//pReadCodecContext->sample_fmt = AV_SAMPLE_FMT_FLTP;
if (0 > avcodec_open2(pReadCodecContext, tmpCodec, NULL))
{
printf("can not find or open audio decoder!\n");
}
return 0;
}
int OpenOutPut()
{
AVStream *pAudioStream = NULL;
const char *outFileName = "FfmpegAudioTest.mp4";
avformat_alloc_output_context2(&pFormatCtx_OutputAudio, NULL, NULL, outFileName);
if (pFormatCtx_InputAudio->streams[0]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
{
AVCodecContext *pOutputCodecCtx;
pAudioStream = avformat_new_stream(pFormatCtx_OutputAudio, NULL);
AudioIndex_file = 0;
pCodecEncode_Audio = (AVCodec *)avcodec_find_encoder(pFormatCtx_OutputAudio->oformat->audio_codec);
pCodecEncodeCtx_Audio = avcodec_alloc_context3(pCodecEncode_Audio);
if (!pCodecEncodeCtx_Audio) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
//pCodecEncodeCtx_Audio->codec_id = pFormatCtx_Out->oformat->audio_codec;
pCodecEncodeCtx_Audio->sample_fmt = pCodecEncode_Audio->sample_fmts ? pCodecEncode_Audio->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
pCodecEncodeCtx_Audio->bit_rate = 64000;
pCodecEncodeCtx_Audio->sample_rate = 44100;
if (pCodecEncode_Audio->supported_samplerates) {
pCodecEncodeCtx_Audio->sample_rate = pCodecEncode_Audio->supported_samplerates[0];
for (int i = 0; pCodecEncode_Audio->supported_samplerates[i]; i++) {
if (pCodecEncode_Audio->supported_samplerates[i] == 44100)
pCodecEncodeCtx_Audio->sample_rate = 44100;
}
}
pCodecEncodeCtx_Audio->channels = av_get_channel_layout_nb_channels(pCodecEncodeCtx_Audio->channel_layout);
pCodecEncodeCtx_Audio->channel_layout = AV_CH_LAYOUT_STEREO;
if (pCodecEncode_Audio->channel_layouts) {
pCodecEncodeCtx_Audio->channel_layout = pCodecEncode_Audio->channel_layouts[0];
for (int i = 0; pCodecEncode_Audio->channel_layouts[i]; i++) {
if (pCodecEncode_Audio->channel_layouts[i] == AV_CH_LAYOUT_STEREO)
pCodecEncodeCtx_Audio->channel_layout = AV_CH_LAYOUT_STEREO;
}
}
pCodecEncodeCtx_Audio->channels = av_get_channel_layout_nb_channels(pCodecEncodeCtx_Audio->channel_layout);
AVRational timeBase;
timeBase.den = pCodecEncodeCtx_Audio->sample_rate;
timeBase.num = 1;
pAudioStream->time_base = timeBase;
if (avcodec_open2(pCodecEncodeCtx_Audio, pCodecEncode_Audio, 0) < 0)
{
//编码器打开失败,退出程序
return -1;
}
}
if (!(pFormatCtx_OutputAudio->oformat->flags & AVFMT_NOFILE))
{
if (avio_open(&pFormatCtx_OutputAudio->pb, outFileName, AVIO_FLAG_WRITE) < 0)
{
printf("can not open output file handle!\n");
return -1;
}
}
avcodec_parameters_from_context(pAudioStream->codecpar, pCodecEncodeCtx_Audio);
if (avformat_write_header(pFormatCtx_OutputAudio, NULL) < 0)
{
printf("can not write the header of the output file!\n");
return -1;
}
return 0;
}
int main(int argc, char *argv[])
{
int ret = 0;
avdevice_register_all();
struct SwrContext *audio_convert_ctx;
audio_convert_ctx = swr_alloc();
/* set options */
av_opt_set_int(audio_convert_ctx, "in_channel_layout", AV_CH_LAYOUT_STEREO, 0);
av_opt_set_int(audio_convert_ctx, "in_sample_rate", 48000, 0);
av_opt_set_sample_fmt(audio_convert_ctx, "in_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
av_opt_set_int(audio_convert_ctx, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
av_opt_set_int(audio_convert_ctx, "out_sample_rate", 44100, 0);
av_opt_set_sample_fmt(audio_convert_ctx, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
ret = swr_init(audio_convert_ctx);
if (OpenAudioFileCapture() < 0)
{
return -1;
}
if (OpenOutPut() < 0)
{
return -1;
}
InitializeCriticalSection(&AudioSection);
CreateThread(NULL, 0, AudioFileCapThreadProc, 0, 0, NULL);
int AudioFrameIndex_mic = 0;
while (1)
{
if (NULL == fifo_audio_mic)
{
continue;
}
if (av_audio_fifo_size(fifo_audio_mic) >=
(pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024))
{
AVFrame *frame_mic = NULL;
frame_mic = av_frame_alloc();
frame_mic->nb_samples = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024;
frame_mic->channel_layout = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->channel_layout;
frame_mic->format = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->format;
//frame_mic->sample_rate = pFormatCtx_Out->streams[AudioIndex_mic]->codecpar->sample_rate;
frame_mic->sample_rate = pFormatCtx_InputAudio->streams[0]->codecpar->sample_rate;
av_frame_get_buffer(frame_mic, 0);
EnterCriticalSection(&AudioSection);
int readcount = av_audio_fifo_read(fifo_audio_mic, (void **)frame_mic->data,
(pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024));
LeaveCriticalSection(&AudioSection);
AVPacket pkt_out_mic = { 0 };
int got_picture_mic = -1;
pkt_out_mic.data = NULL;
pkt_out_mic.size = 0;
frame_mic->pts = AudioFrameIndex_mic * pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;
AVFrame *frame_mic_encode = NULL;
frame_mic_encode = av_frame_alloc();
//int iDelaySamples = swr_get_delay(audio_convert_ctx, frame_mic->sample_rate);
int iDelaySamples = 0;
//int dst_nb_samples = av_rescale_rnd(iDelaySamples + frame_mic->nb_samples, frame_mic->sample_rate, pCodecEncodeCtx_Audio->sample_rate, AVRounding(1));
int dst_nb_samples = av_rescale_rnd(iDelaySamples + frame_mic->nb_samples, pCodecEncodeCtx_Audio->sample_rate, frame_mic->sample_rate, AV_ROUND_UP);
frame_mic_encode->nb_samples = pCodecEncodeCtx_Audio->frame_size;
frame_mic_encode->channel_layout = pCodecEncodeCtx_Audio->channel_layout;
frame_mic_encode->format = pCodecEncodeCtx_Audio->sample_fmt;
frame_mic_encode->sample_rate = pCodecEncodeCtx_Audio->sample_rate;
av_frame_get_buffer(frame_mic_encode, 0);
//uint8_t *audio_buf = NULL;
uint8_t *audio_buf[2] = { 0 };
audio_buf[0] = (uint8_t *)frame_mic_encode->data[0];
audio_buf[1] = (uint8_t *)frame_mic_encode->data[1];
int nb = swr_convert(audio_convert_ctx, audio_buf, dst_nb_samples, (const uint8_t**)frame_mic->data, frame_mic->nb_samples);
ret = avcodec_send_frame(pCodecEncodeCtx_Audio, frame_mic_encode);
ret = avcodec_receive_packet(pCodecEncodeCtx_Audio, &pkt_out_mic);
if (ret == AVERROR(EAGAIN))
{
continue;
}
av_frame_free(&frame_mic);
av_frame_free(&frame_mic_encode);
{
pkt_out_mic.stream_index = AudioIndex_file;
//pkt_out_mic.pts = AudioFrameIndex_mic * pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;
//pkt_out_mic.dts = AudioFrameIndex_mic * pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;
//pkt_out_mic.duration = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;
pkt_out_mic.pts = AudioFrameIndex_mic * dst_nb_samples;
pkt_out_mic.dts = AudioFrameIndex_mic * dst_nb_samples;
pkt_out_mic.duration = dst_nb_samples;
int ret2 = av_interleaved_write_frame(pFormatCtx_OutputAudio, &pkt_out_mic);
av_packet_unref(&pkt_out_mic);
}
AudioFrameIndex_mic++;
if (!bCap)
{
if (av_audio_fifo_size(fifo_audio_mic) < 1024)
{
break;
}
}
}
}
av_write_trailer(pFormatCtx_OutputAudio);
avio_close(pFormatCtx_OutputAudio->pb);
avformat_free_context(pFormatCtx_OutputAudio);
if (pFormatCtx_InputAudio != NULL)
{
avformat_close_input(&pFormatCtx_InputAudio);
pFormatCtx_InputAudio = NULL;
}
getchar();
return 0;
}
DWORD WINAPI AudioFileCapThreadProc(LPVOID lpParam)
{
AVFrame *pFrame;
pFrame = av_frame_alloc();
AVPacket packet = { 0 };
int ret = 0;
int iCount = 0;
while (bCap)
{
av_packet_unref(&packet);
ret = av_read_frame(pFormatCtx_InputAudio, &packet);
if (ret == AVERROR_EOF)
{
bCap = false;
break;
}
iCount++;
ret = avcodec_send_packet(pReadCodecContext, &packet);
if (ret >= 0)
{
ret = avcodec_receive_frame(pReadCodecContext, pFrame);
if (ret == AVERROR(EAGAIN))
{
break;
}
else if (ret == AVERROR_EOF)
{
return 0;
}
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
exit(1);
}
if (NULL == fifo_audio_mic)
{
fifo_audio_mic = av_audio_fifo_alloc((AVSampleFormat)pFormatCtx_InputAudio->streams[0]->codecpar->format, pFormatCtx_InputAudio->streams[0]->codecpar->channels, 3000 * pFrame->nb_samples);
}
int buf_space = av_audio_fifo_space(fifo_audio_mic);
if (av_audio_fifo_space(fifo_audio_mic) >= pFrame->nb_samples)
{
EnterCriticalSection(&AudioSection);
ret = av_audio_fifo_write(fifo_audio_mic, (void **)pFrame->data, pFrame->nb_samples);
LeaveCriticalSection(&AudioSection);
}
int samples = av_audio_fifo_size(fifo_audio_mic);
av_packet_unref(&packet);
}
}
return 0;
}
其中线程AudioFileCapThreadProc用于读取本地mp4文件,ret == AVERROR_EOF表示已经读到文件尾。
然后主线程里面读取队列fifo_audio_mic里面的数据,每次读取1024个样例(frame_size),然后用swr_convert进行重采样,原来每次读取的1024个样例,经过重采样后,变为941个,结果录制后的文件也是32秒,但是伴有滋滋声。
注意,int iDelaySamples = 0;本人采取的是0,如果是下面这样,就会出现问题,具体原因,我也回答不上来。
int iDelaySamples = swr_get_delay(audio_convert_ctx, frame_mic->sample_rate);
就上面产生的滋滋声,本人查了一些csdn,其中重点参考了如下博客:
https://blog.youkuaiyun.com/qq_42152681/article/details/113064018
找到了下面的一段话:
我重采样后,样例数是941,不是1024,所以,本人改造时,将941样例的音频数据,没有调用av_interleaved_write_frame写文件,而是再次送入队列,等积累够1024样例时,再写文件,结果ok,代码如下:
#include <Windows.h>
#include <conio.h>
#ifdef __cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif
AVFormatContext *pFormatCtx_InputAudio = NULL, *pFormatCtx_OutputAudio = NULL;
AVCodecContext *pReadCodecContext = NULL;
int AudioIndex_file;
AVCodecContext *pCodecEncodeCtx_Audio = NULL;
AVCodec *pCodecEncode_Audio = NULL;
AVAudioFifo *fifo_audio_inner = NULL;
AVAudioFifo *fifo_audio_resample = NULL;
struct SwrContext *audio_convert_ctx = NULL;
bool bCap = true;
CRITICAL_SECTION AudioSection;
CRITICAL_SECTION AudioResampleSection;
DWORD WINAPI AudioFileCapThreadProc(LPVOID lpParam);
DWORD WINAPI AudioFileResampleThreadProc(LPVOID lpParam);
static char *dup_wchar_to_utf8(const wchar_t *w)
{
char *s = NULL;
int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
s = (char *)av_malloc(l);
if (s)
WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
return s;
}
int OpenAudioFileCapture()
{
//查找输入方式
const AVInputFormat *pAudioInputFmt = av_find_input_format("dshow");
//以Direct Show的方式打开设备,并将 输入方式 关联到格式上下文
const char * psDevName = dup_wchar_to_utf8(L"E:/learn/ffmpeg/ffmpeg_x264_static/bin/FfmpegAudioTest-48000.mp4");
if (avformat_open_input(&pFormatCtx_InputAudio, psDevName, NULL, NULL) < 0)
{
printf("Couldn't open input stream.(无法打开音频输入流)\n");
return -1;
}
if (avformat_find_stream_info(pFormatCtx_InputAudio, NULL) < 0)
{
return -1;
}
if (pFormatCtx_InputAudio->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
{
printf("Couldn't find video stream information.(无法获取音频流信息)\n");
return -1;
}
const AVCodec *tmpCodec = avcodec_find_decoder(pFormatCtx_InputAudio->streams[0]->codecpar->codec_id);
pReadCodecContext = avcodec_alloc_context3(tmpCodec);
//pReadCodecContext->sample_rate = select_sample_rate(tmpCodec);
pReadCodecContext->sample_rate = 48000;
pReadCodecContext->channel_layout = AV_CH_LAYOUT_STEREO;
pReadCodecContext->channels = av_get_channel_layout_nb_channels(pReadCodecContext->channel_layout);
pReadCodecContext->sample_fmt = (AVSampleFormat)pFormatCtx_InputAudio->streams[0]->codecpar->format;
//pReadCodecContext->sample_fmt = AV_SAMPLE_FMT_FLTP;
if (0 > avcodec_open2(pReadCodecContext, tmpCodec, NULL))
{
printf("can not find or open audio decoder!\n");
}
return 0;
}
int OpenOutPut()
{
AVStream *pAudioStream = NULL;
const char *outFileName = "FfmpegAudioTest.mp4";
avformat_alloc_output_context2(&pFormatCtx_OutputAudio, NULL, NULL, outFileName);
if (pFormatCtx_InputAudio->streams[0]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
{
AVCodecContext *pOutputCodecCtx;
pAudioStream = avformat_new_stream(pFormatCtx_OutputAudio, NULL);
AudioIndex_file = 0;
pCodecEncode_Audio = (AVCodec *)avcodec_find_encoder(pFormatCtx_OutputAudio->oformat->audio_codec);
pCodecEncodeCtx_Audio = avcodec_alloc_context3(pCodecEncode_Audio);
if (!pCodecEncodeCtx_Audio) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
//pCodecEncodeCtx_Audio->codec_id = pFormatCtx_Out->oformat->audio_codec;
pCodecEncodeCtx_Audio->sample_fmt = pCodecEncode_Audio->sample_fmts ? pCodecEncode_Audio->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
pCodecEncodeCtx_Audio->bit_rate = 64000;
pCodecEncodeCtx_Audio->sample_rate = 44100;
if (pCodecEncode_Audio->supported_samplerates) {
pCodecEncodeCtx_Audio->sample_rate = pCodecEncode_Audio->supported_samplerates[0];
for (int i = 0; pCodecEncode_Audio->supported_samplerates[i]; i++) {
if (pCodecEncode_Audio->supported_samplerates[i] == 44100)
pCodecEncodeCtx_Audio->sample_rate = 44100;
}
}
pCodecEncodeCtx_Audio->channels = av_get_channel_layout_nb_channels(pCodecEncodeCtx_Audio->channel_layout);
pCodecEncodeCtx_Audio->channel_layout = AV_CH_LAYOUT_STEREO;
if (pCodecEncode_Audio->channel_layouts) {
pCodecEncodeCtx_Audio->channel_layout = pCodecEncode_Audio->channel_layouts[0];
for (int i = 0; pCodecEncode_Audio->channel_layouts[i]; i++) {
if (pCodecEncode_Audio->channel_layouts[i] == AV_CH_LAYOUT_STEREO)
pCodecEncodeCtx_Audio->channel_layout = AV_CH_LAYOUT_STEREO;
}
}
pCodecEncodeCtx_Audio->channels = av_get_channel_layout_nb_channels(pCodecEncodeCtx_Audio->channel_layout);
AVRational timeBase;
timeBase.den = pCodecEncodeCtx_Audio->sample_rate;
timeBase.num = 1;
pAudioStream->time_base = timeBase;
if (avcodec_open2(pCodecEncodeCtx_Audio, pCodecEncode_Audio, 0) < 0)
{
//编码器打开失败,退出程序
return -1;
}
}
if (!(pFormatCtx_OutputAudio->oformat->flags & AVFMT_NOFILE))
{
if (avio_open(&pFormatCtx_OutputAudio->pb, outFileName, AVIO_FLAG_WRITE) < 0)
{
printf("can not open output file handle!\n");
return -1;
}
}
avcodec_parameters_from_context(pAudioStream->codecpar, pCodecEncodeCtx_Audio);
if (avformat_write_header(pFormatCtx_OutputAudio, NULL) < 0)
{
printf("can not write the header of the output file!\n");
return -1;
}
return 0;
}
int main(int argc, char *argv[])
{
int ret = 0;
avdevice_register_all();
audio_convert_ctx = swr_alloc();
/* set options */
av_opt_set_int(audio_convert_ctx, "in_channel_layout", AV_CH_LAYOUT_STEREO, 0);
av_opt_set_int(audio_convert_ctx, "in_sample_rate", 48000, 0);
av_opt_set_sample_fmt(audio_convert_ctx, "in_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
av_opt_set_int(audio_convert_ctx, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
av_opt_set_int(audio_convert_ctx, "out_sample_rate", 44100, 0);
av_opt_set_sample_fmt(audio_convert_ctx, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
ret = swr_init(audio_convert_ctx);
if (OpenAudioFileCapture() < 0)
{
return -1;
}
if (OpenOutPut() < 0)
{
return -1;
}
InitializeCriticalSection(&AudioSection);
InitializeCriticalSection(&AudioResampleSection);
CreateThread(NULL, 0, AudioFileCapThreadProc, 0, 0, NULL);
CreateThread(NULL, 0, AudioFileResampleThreadProc, 0, 0, NULL);
int AudioFrameIndex_mic = 0;
while (1)
{
if (NULL == fifo_audio_inner)
{
continue;
}
if (NULL == fifo_audio_resample)
{
continue;
}
if (av_audio_fifo_size(fifo_audio_resample) >=
(pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024))
{
AVFrame *frame_resample = NULL;
frame_resample = av_frame_alloc();
frame_resample->nb_samples = pCodecEncodeCtx_Audio->frame_size;
frame_resample->channel_layout = pCodecEncodeCtx_Audio->channel_layout;
frame_resample->format = pCodecEncodeCtx_Audio->sample_fmt;
frame_resample->sample_rate = pCodecEncodeCtx_Audio->sample_rate;
av_frame_get_buffer(frame_resample, 0);
EnterCriticalSection(&AudioResampleSection);
int readcount = av_audio_fifo_read(fifo_audio_resample, (void **)frame_resample->data,
(pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024));
LeaveCriticalSection(&AudioResampleSection);
AVPacket pkt_out_resample = { 0 };
pkt_out_resample.data = NULL;
pkt_out_resample.size = 0;
frame_resample->pts = AudioFrameIndex_mic * pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;
ret = avcodec_send_frame(pCodecEncodeCtx_Audio, frame_resample);
ret = avcodec_receive_packet(pCodecEncodeCtx_Audio, &pkt_out_resample);
if (ret == AVERROR(EAGAIN))
{
continue;
}
av_frame_free(&frame_resample);
{
pkt_out_resample.stream_index = AudioIndex_file;
//pkt_out_mic.pts = AudioFrameIndex_mic * pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;
//pkt_out_mic.dts = AudioFrameIndex_mic * pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;
//pkt_out_mic.duration = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;
pkt_out_resample.pts = AudioFrameIndex_mic * 1024;
pkt_out_resample.dts = AudioFrameIndex_mic * 1024;
pkt_out_resample.duration = 1024;
int ret2 = av_interleaved_write_frame(pFormatCtx_OutputAudio, &pkt_out_resample);
av_packet_unref(&pkt_out_resample);
}
AudioFrameIndex_mic++;
if (!bCap)
{
if (av_audio_fifo_size(fifo_audio_resample) < 1024)
{
break;
}
}
}
}
av_write_trailer(pFormatCtx_OutputAudio);
avio_close(pFormatCtx_OutputAudio->pb);
avformat_free_context(pFormatCtx_OutputAudio);
if (pFormatCtx_InputAudio != NULL)
{
avformat_close_input(&pFormatCtx_InputAudio);
pFormatCtx_InputAudio = NULL;
}
getchar();
return 0;
}
DWORD WINAPI AudioFileCapThreadProc(LPVOID lpParam)
{
AVFrame *pFrame;
pFrame = av_frame_alloc();
AVPacket packet = { 0 };
int ret = 0;
int iCount = 0;
while (bCap)
{
av_packet_unref(&packet);
ret = av_read_frame(pFormatCtx_InputAudio, &packet);
if (ret == AVERROR_EOF)
{
bCap = false;
break;
}
iCount++;
ret = avcodec_send_packet(pReadCodecContext, &packet);
if (ret >= 0)
{
ret = avcodec_receive_frame(pReadCodecContext, pFrame);
if (ret == AVERROR(EAGAIN))
{
break;
}
else if (ret == AVERROR_EOF)
{
return 0;
}
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
exit(1);
}
if (NULL == fifo_audio_inner)
{
fifo_audio_inner = av_audio_fifo_alloc((AVSampleFormat)pFormatCtx_InputAudio->streams[0]->codecpar->format, pFormatCtx_InputAudio->streams[0]->codecpar->channels, 3000 * pFrame->nb_samples);
}
if (NULL == fifo_audio_resample)
{
fifo_audio_resample = av_audio_fifo_alloc((AVSampleFormat)pFormatCtx_OutputAudio->streams[0]->codecpar->format, pFormatCtx_OutputAudio->streams[0]->codecpar->channels, 3000 * pFrame->nb_samples);
}
int buf_space = av_audio_fifo_space(fifo_audio_inner);
if (av_audio_fifo_space(fifo_audio_inner) >= pFrame->nb_samples)
{
EnterCriticalSection(&AudioSection);
ret = av_audio_fifo_write(fifo_audio_inner, (void **)pFrame->data, pFrame->nb_samples);
LeaveCriticalSection(&AudioSection);
}
int samples = av_audio_fifo_size(fifo_audio_inner);
av_packet_unref(&packet);
}
}
return 0;
}
DWORD WINAPI AudioFileResampleThreadProc(LPVOID lpParam)
{
AVFrame *pFrame;
pFrame = av_frame_alloc();
AVPacket packet = { 0 };
int ret = 0;
int iCount = 0;
while (1)
{
if (fifo_audio_inner == NULL)
{
continue;
}
if (av_audio_fifo_size(fifo_audio_inner) >=
(pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024))
{
AVFrame *frame_mic = NULL;
frame_mic = av_frame_alloc();
frame_mic->nb_samples = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024;
frame_mic->channel_layout = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->channel_layout;
frame_mic->format = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->format;
//frame_mic->sample_rate = pFormatCtx_Out->streams[AudioIndex_mic]->codecpar->sample_rate;
frame_mic->sample_rate = pFormatCtx_InputAudio->streams[0]->codecpar->sample_rate;
av_frame_get_buffer(frame_mic, 0);
EnterCriticalSection(&AudioSection);
int readcount = av_audio_fifo_read(fifo_audio_inner, (void **)frame_mic->data,
(pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024));
LeaveCriticalSection(&AudioSection);
AVFrame *frame_mic_encode = NULL;
frame_mic_encode = av_frame_alloc();
//int iDelaySamples = swr_get_delay(audio_convert_ctx, frame_mic->sample_rate);
int iDelaySamples = 0;
//int dst_nb_samples = av_rescale_rnd(iDelaySamples + frame_mic->nb_samples, frame_mic->sample_rate, pCodecEncodeCtx_Audio->sample_rate, AVRounding(1));
int dst_nb_samples = av_rescale_rnd(iDelaySamples + frame_mic->nb_samples, pCodecEncodeCtx_Audio->sample_rate, frame_mic->sample_rate, AV_ROUND_UP);
frame_mic_encode->nb_samples = pCodecEncodeCtx_Audio->frame_size;
frame_mic_encode->channel_layout = pCodecEncodeCtx_Audio->channel_layout;
frame_mic_encode->format = pCodecEncodeCtx_Audio->sample_fmt;
frame_mic_encode->sample_rate = pCodecEncodeCtx_Audio->sample_rate;
av_frame_get_buffer(frame_mic_encode, 0);
//uint8_t *audio_buf = NULL;
uint8_t *audio_buf[2] = { 0 };
audio_buf[0] = (uint8_t *)frame_mic_encode->data[0];
audio_buf[1] = (uint8_t *)frame_mic_encode->data[1];
int nb = swr_convert(audio_convert_ctx, audio_buf, dst_nb_samples, (const uint8_t**)frame_mic->data, frame_mic->nb_samples);
//if (av_audio_fifo_space(fifo_audio_resample) >= pFrame->nb_samples)
{
EnterCriticalSection(&AudioResampleSection);
ret = av_audio_fifo_write(fifo_audio_resample, (void **)frame_mic_encode->data, dst_nb_samples);
LeaveCriticalSection(&AudioResampleSection);
}
if (!bCap)
{
if (av_audio_fifo_size(fifo_audio_inner) < 1024)
{
break;
}
}
}
}
return 0;
}