ffmpeg音频重采样

最新推荐文章于 2024-08-04 00:19:33 发布

原创最新推荐文章于 2024-08-04 00:19:33 发布 · 1k 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#音视频

博客主要介绍了使用ffmpeg进行音频重采样时遇到的滋滋声问题。作者发现，由于原始音频帧经过重采样后不等于目标帧大小，直接写入文件会导致失真。解决方案是将不足一帧的数据暂存，积累到足够一帧后再写入文件，从而消除了滋滋声。

vs版本：vs2017
ffmpeg版本信息如下：
E:\learn\ffmpeg\ffmpeg_x264_static\bin>ffmpeg
ffmpeg version N-102642-g864d1ef2fc Copyright © 2000-2021 the FFmpeg developers
built with Microsoft ® C/C++ Optimizing Compiler Version 19.16.27045 for x64
configuration: --toolchain=msvc --arch=x86_64 --disable-debug --enable-gpl --enable-libx264 --extra-cflags=-I/usr/local/x264/include --extra-ldflags=’-LIBPATH:/usr/local/x264/lib’ --prefix=/home/ffmpeg_x264_static
libavutil 57. 0.100 / 57. 0.100
libavcodec 59. 1.100 / 59. 1.100
libavformat 59. 2.101 / 59. 2.101
libavdevice 59. 0.100 / 59. 0.100
libavfilter 8. 0.101 / 8. 0.101
libswscale 6. 0.100 / 6. 0.100
libswresample 4. 0.100 / 4. 0.100
libpostproc 56. 0.100 / 56. 0.100

现在我用ffmpeg录制了32秒的本地系统声音(音乐)，采样率为48000，声道数为2，channel_layout为AV_CH_LAYOUT_STEREO，sample_fmt为AV_SAMPLE_FMT_FLTP，现在需要重采样，采样率需要变为44100，其余的保持不变。

为此写了如下代码：

#include <Windows.h>
#include <conio.h>

#ifdef	__cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"

#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif




AVFormatContext	*pFormatCtx_InputAudio = NULL, *pFormatCtx_OutputAudio = NULL;
AVCodecContext *pReadCodecContext = NULL;

int AudioIndex_file;

AVCodecContext	*pCodecEncodeCtx_Audio = NULL;
AVCodec			*pCodecEncode_Audio = NULL;


AVAudioFifo		*fifo_audio_mic = NULL;


bool bCap = true;
CRITICAL_SECTION AudioSection;

DWORD WINAPI AudioFileCapThreadProc(LPVOID lpParam);



static char *dup_wchar_to_utf8(const wchar_t *w)
{
	char *s = NULL;
	int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
	s = (char *)av_malloc(l);
	if (s)
		WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
	return s;
}





int OpenAudioFileCapture()
{
	//查找输入方式
	const AVInputFormat *pAudioInputFmt = av_find_input_format("dshow");

	//以Direct Show的方式打开设备，并将 输入方式 关联到格式上下文
	const char * psDevName = dup_wchar_to_utf8(L"E:/learn/ffmpeg/ffmpeg_x264_static/bin/FfmpegAudioTest-48000.mp4");
	if (avformat_open_input(&pFormatCtx_InputAudio, psDevName, NULL, NULL) < 0)
	{
		printf("Couldn't open input stream.（无法打开音频输入流）\n");
		return -1;
	}

	if (avformat_find_stream_info(pFormatCtx_InputAudio, NULL) < 0)
	{
		return -1;
	}

	if (pFormatCtx_InputAudio->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
	{
		printf("Couldn't find video stream information.（无法获取音频流信息）\n");
		return -1;
	}


	const AVCodec *tmpCodec = avcodec_find_decoder(pFormatCtx_InputAudio->streams[0]->codecpar->codec_id);

	pReadCodecContext = avcodec_alloc_context3(tmpCodec);

	//pReadCodecContext->sample_rate = select_sample_rate(tmpCodec);
	pReadCodecContext->sample_rate = 48000;
	pReadCodecContext->channel_layout = AV_CH_LAYOUT_STEREO;
	pReadCodecContext->channels = av_get_channel_layout_nb_channels(pReadCodecContext->channel_layout);

	pReadCodecContext->sample_fmt = (AVSampleFormat)pFormatCtx_InputAudio->streams[0]->codecpar->format;
	//pReadCodecContext->sample_fmt = AV_SAMPLE_FMT_FLTP;

	if (0 > avcodec_open2(pReadCodecContext, tmpCodec, NULL))
	{
		printf("can not find or open audio decoder!\n");
	}


	return 0;
}


int OpenOutPut()
{
	AVStream *pAudioStream = NULL;
	const char *outFileName = "FfmpegAudioTest.mp4";
	avformat_alloc_output_context2(&pFormatCtx_OutputAudio, NULL, NULL, outFileName);


	if (pFormatCtx_InputAudio->streams[0]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
	{
		AVCodecContext *pOutputCodecCtx;
		pAudioStream = avformat_new_stream(pFormatCtx_OutputAudio, NULL);

		AudioIndex_file = 0;

		pCodecEncode_Audio = (AVCodec *)avcodec_find_encoder(pFormatCtx_OutputAudio->oformat->audio_codec);

		pCodecEncodeCtx_Audio = avcodec_alloc_context3(pCodecEncode_Audio);
		if (!pCodecEncodeCtx_Audio) {
			fprintf(stderr, "Could not alloc an encoding context\n");
			exit(1);
		}


		//pCodecEncodeCtx_Audio->codec_id = pFormatCtx_Out->oformat->audio_codec;
		pCodecEncodeCtx_Audio->sample_fmt = pCodecEncode_Audio->sample_fmts ? pCodecEncode_Audio->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
		pCodecEncodeCtx_Audio->bit_rate = 64000;
		pCodecEncodeCtx_Audio->sample_rate = 44100;
		if (pCodecEncode_Audio->supported_samplerates) {
			pCodecEncodeCtx_Audio->sample_rate = pCodecEncode_Audio->supported_samplerates[0];
			for (int i = 0; pCodecEncode_Audio->supported_samplerates[i]; i++) {
				if (pCodecEncode_Audio->supported_samplerates[i] == 44100)
					pCodecEncodeCtx_Audio->sample_rate = 44100;
			}
		}

		pCodecEncodeCtx_Audio->channels = av_get_channel_layout_nb_channels(pCodecEncodeCtx_Audio->channel_layout);
		pCodecEncodeCtx_Audio->channel_layout = AV_CH_LAYOUT_STEREO;
		if (pCodecEncode_Audio->channel_layouts) {
			pCodecEncodeCtx_Audio->channel_layout = pCodecEncode_Audio->channel_layouts[0];
			for (int i = 0; pCodecEncode_Audio->channel_layouts[i]; i++) {
				if (pCodecEncode_Audio->channel_layouts[i] == AV_CH_LAYOUT_STEREO)
					pCodecEncodeCtx_Audio->channel_layout = AV_CH_LAYOUT_STEREO;
			}
		}
		pCodecEncodeCtx_Audio->channels = av_get_channel_layout_nb_channels(pCodecEncodeCtx_Audio->channel_layout);


		AVRational timeBase;
		timeBase.den = pCodecEncodeCtx_Audio->sample_rate;
		timeBase.num = 1;
		pAudioStream->time_base = timeBase;

		if (avcodec_open2(pCodecEncodeCtx_Audio, pCodecEncode_Audio, 0) < 0)
		{
			//编码器打开失败，退出程序
			return -1;
		}
	}

	if (!(pFormatCtx_OutputAudio->oformat->flags & AVFMT_NOFILE))
	{
		if (avio_open(&pFormatCtx_OutputAudio->pb, outFileName, AVIO_FLAG_WRITE) < 0)
		{
			printf("can not open output file handle!\n");
			return -1;
		}
	}

	avcodec_parameters_from_context(pAudioStream->codecpar, pCodecEncodeCtx_Audio);

	if (avformat_write_header(pFormatCtx_OutputAudio, NULL) < 0)
	{
		printf("can not write the header of the output file!\n");
		return -1;
	}

	return 0;
}


int main(int argc, char *argv[])
{
	int ret = 0;


	avdevice_register_all();

	struct SwrContext *audio_convert_ctx;
	audio_convert_ctx = swr_alloc();


	/* set options */
	av_opt_set_int(audio_convert_ctx, "in_channel_layout", AV_CH_LAYOUT_STEREO, 0);
	av_opt_set_int(audio_convert_ctx, "in_sample_rate", 48000, 0);
	av_opt_set_sample_fmt(audio_convert_ctx, "in_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);

	av_opt_set_int(audio_convert_ctx, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
	av_opt_set_int(audio_convert_ctx, "out_sample_rate", 44100, 0);
	av_opt_set_sample_fmt(audio_convert_ctx, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);

	ret = swr_init(audio_convert_ctx);

	if (OpenAudioFileCapture() < 0)
	{
		return -1;
	}

	if (OpenOutPut() < 0)
	{
		return -1;
	}


	InitializeCriticalSection(&AudioSection);

	CreateThread(NULL, 0, AudioFileCapThreadProc, 0, 0, NULL);

	int AudioFrameIndex_mic = 0;

	while (1)
	{
		if (NULL == fifo_audio_mic)
		{
			continue;
		}
		if (av_audio_fifo_size(fifo_audio_mic) >=
			(pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024))
		{
			AVFrame *frame_mic = NULL;
			frame_mic = av_frame_alloc();

			frame_mic->nb_samples = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024;
			frame_mic->channel_layout = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->channel_layout;
			frame_mic->format = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->format;
			//frame_mic->sample_rate = pFormatCtx_Out->streams[AudioIndex_mic]->codecpar->sample_rate;
			frame_mic->sample_rate = pFormatCtx_InputAudio->streams[0]->codecpar->sample_rate;
			av_frame_get_buffer(frame_mic, 0);

			EnterCriticalSection(&AudioSection);
			int readcount = av_audio_fifo_read(fifo_audio_mic, (void **)frame_mic->data,
				(pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024));
			LeaveCriticalSection(&AudioSection);

			AVPacket pkt_out_mic = { 0 };

			int got_picture_mic = -1;
			pkt_out_mic.data = NULL;
			pkt_out_mic.size = 0;

			frame_mic->pts = AudioFrameIndex_mic * pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;


			AVFrame *frame_mic_encode = NULL;
			frame_mic_encode = av_frame_alloc();

			//int iDelaySamples = swr_get_delay(audio_convert_ctx, frame_mic->sample_rate);
			int iDelaySamples = 0;
			//int dst_nb_samples = av_rescale_rnd(iDelaySamples + frame_mic->nb_samples, frame_mic->sample_rate, pCodecEncodeCtx_Audio->sample_rate, AVRounding(1));
			int dst_nb_samples = av_rescale_rnd(iDelaySamples + frame_mic->nb_samples, pCodecEncodeCtx_Audio->sample_rate, frame_mic->sample_rate, AV_ROUND_UP);


			frame_mic_encode->nb_samples = pCodecEncodeCtx_Audio->frame_size;
			frame_mic_encode->channel_layout = pCodecEncodeCtx_Audio->channel_layout;
			frame_mic_encode->format = pCodecEncodeCtx_Audio->sample_fmt;
			frame_mic_encode->sample_rate = pCodecEncodeCtx_Audio->sample_rate;
			av_frame_get_buffer(frame_mic_encode, 0);


			//uint8_t *audio_buf = NULL;
			uint8_t *audio_buf[2] = { 0 };
			audio_buf[0] = (uint8_t *)frame_mic_encode->data[0];
			audio_buf[1] = (uint8_t *)frame_mic_encode->data[1];

			int nb = swr_convert(audio_convert_ctx, audio_buf, dst_nb_samples, (const uint8_t**)frame_mic->data, frame_mic->nb_samples);

			ret = avcodec_send_frame(pCodecEncodeCtx_Audio, frame_mic_encode);

			ret = avcodec_receive_packet(pCodecEncodeCtx_Audio, &pkt_out_mic);
			if (ret == AVERROR(EAGAIN))
			{
				continue;
			}
			av_frame_free(&frame_mic);
			av_frame_free(&frame_mic_encode);
			{
				pkt_out_mic.stream_index = AudioIndex_file;
				//pkt_out_mic.pts = AudioFrameIndex_mic * pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;
				//pkt_out_mic.dts = AudioFrameIndex_mic * pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;
				//pkt_out_mic.duration = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;

				pkt_out_mic.pts = AudioFrameIndex_mic * dst_nb_samples;
				pkt_out_mic.dts = AudioFrameIndex_mic * dst_nb_samples;
				pkt_out_mic.duration = dst_nb_samples;

				int ret2 = av_interleaved_write_frame(pFormatCtx_OutputAudio, &pkt_out_mic);
				av_packet_unref(&pkt_out_mic);
			}
			AudioFrameIndex_mic++;

			if (!bCap)
			{
				if (av_audio_fifo_size(fifo_audio_mic) < 1024)
				{
					break;
				}
			}
		}
	}

	av_write_trailer(pFormatCtx_OutputAudio);

	avio_close(pFormatCtx_OutputAudio->pb);
	avformat_free_context(pFormatCtx_OutputAudio);

	if (pFormatCtx_InputAudio != NULL)
	{
		avformat_close_input(&pFormatCtx_InputAudio);
		pFormatCtx_InputAudio = NULL;
	}
	getchar();

	return 0;
}


DWORD WINAPI AudioFileCapThreadProc(LPVOID lpParam)
{
	AVFrame *pFrame;
	pFrame = av_frame_alloc();

	AVPacket packet = { 0 };
	int ret = 0;
	int iCount = 0;

	while (bCap)
	{
		av_packet_unref(&packet);

		ret = av_read_frame(pFormatCtx_InputAudio, &packet);

		if (ret == AVERROR_EOF)
		{
			bCap = false;
			break;
		}
		iCount++;
		ret = avcodec_send_packet(pReadCodecContext, &packet);
		if (ret >= 0)
		{
			ret = avcodec_receive_frame(pReadCodecContext, pFrame);
			if (ret == AVERROR(EAGAIN))
			{
				break;
			}
			else if (ret == AVERROR_EOF)
			{
				return 0;
			}
			else if (ret < 0) {
				fprintf(stderr, "Error during decoding\n");
				exit(1);
			}

			if (NULL == fifo_audio_mic)
			{
				fifo_audio_mic = av_audio_fifo_alloc((AVSampleFormat)pFormatCtx_InputAudio->streams[0]->codecpar->format, pFormatCtx_InputAudio->streams[0]->codecpar->channels, 3000 * pFrame->nb_samples);
			}

			int buf_space = av_audio_fifo_space(fifo_audio_mic);
			if (av_audio_fifo_space(fifo_audio_mic) >= pFrame->nb_samples)
			{
				EnterCriticalSection(&AudioSection);
				ret = av_audio_fifo_write(fifo_audio_mic, (void **)pFrame->data, pFrame->nb_samples);
				LeaveCriticalSection(&AudioSection);
			}

			int samples = av_audio_fifo_size(fifo_audio_mic);

			av_packet_unref(&packet);
		}

	}

	return 0;
}

其中线程AudioFileCapThreadProc用于读取本地mp4文件，ret == AVERROR_EOF表示已经读到文件尾。

然后主线程里面读取队列fifo_audio_mic里面的数据，每次读取1024个样例(frame_size)，然后用swr_convert进行重采样，原来每次读取的1024个样例，经过重采样后，变为941个，结果录制后的文件也是32秒，但是伴有滋滋声。
注意，int iDelaySamples = 0;本人采取的是0，如果是下面这样，就会出现问题，具体原因，我也回答不上来。
int iDelaySamples = swr_get_delay(audio_convert_ctx, frame_mic->sample_rate);

就上面产生的滋滋声，本人查了一些csdn，其中重点参考了如下博客：
https://blog.youkuaiyun.com/qq_42152681/article/details/113064018

找到了下面的一段话：
在这里插入图片描述
我重采样后，样例数是941，不是1024，所以，本人改造时，将941样例的音频数据，没有调用av_interleaved_write_frame写文件，而是再次送入队列，等积累够1024样例时，再写文件，结果ok，代码如下：

#include <Windows.h>
#include <conio.h>

#ifdef	__cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"

#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif




AVFormatContext	*pFormatCtx_InputAudio = NULL, *pFormatCtx_OutputAudio = NULL;
AVCodecContext *pReadCodecContext = NULL;

int AudioIndex_file;

AVCodecContext	*pCodecEncodeCtx_Audio = NULL;
AVCodec			*pCodecEncode_Audio = NULL;


AVAudioFifo		*fifo_audio_inner = NULL;
AVAudioFifo		*fifo_audio_resample = NULL;

struct SwrContext *audio_convert_ctx = NULL;


bool bCap = true;
CRITICAL_SECTION AudioSection;

CRITICAL_SECTION AudioResampleSection;

DWORD WINAPI AudioFileCapThreadProc(LPVOID lpParam);
DWORD WINAPI AudioFileResampleThreadProc(LPVOID lpParam);




static char *dup_wchar_to_utf8(const wchar_t *w)
{
	char *s = NULL;
	int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
	s = (char *)av_malloc(l);
	if (s)
		WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
	return s;
}





int OpenAudioFileCapture()
{
	//查找输入方式
	const AVInputFormat *pAudioInputFmt = av_find_input_format("dshow");

	//以Direct Show的方式打开设备，并将 输入方式 关联到格式上下文
	const char * psDevName = dup_wchar_to_utf8(L"E:/learn/ffmpeg/ffmpeg_x264_static/bin/FfmpegAudioTest-48000.mp4");
	if (avformat_open_input(&pFormatCtx_InputAudio, psDevName, NULL, NULL) < 0)
	{
		printf("Couldn't open input stream.（无法打开音频输入流）\n");
		return -1;
	}

	if (avformat_find_stream_info(pFormatCtx_InputAudio, NULL) < 0)
	{
		return -1;
	}

	if (pFormatCtx_InputAudio->streams[0]->codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
	{
		printf("Couldn't find video stream information.（无法获取音频流信息）\n");
		return -1;
	}


	const AVCodec *tmpCodec = avcodec_find_decoder(pFormatCtx_InputAudio->streams[0]->codecpar->codec_id);

	pReadCodecContext = avcodec_alloc_context3(tmpCodec);

	//pReadCodecContext->sample_rate = select_sample_rate(tmpCodec);
	pReadCodecContext->sample_rate = 48000;
	pReadCodecContext->channel_layout = AV_CH_LAYOUT_STEREO;
	pReadCodecContext->channels = av_get_channel_layout_nb_channels(pReadCodecContext->channel_layout);

	pReadCodecContext->sample_fmt = (AVSampleFormat)pFormatCtx_InputAudio->streams[0]->codecpar->format;
	//pReadCodecContext->sample_fmt = AV_SAMPLE_FMT_FLTP;

	if (0 > avcodec_open2(pReadCodecContext, tmpCodec, NULL))
	{
		printf("can not find or open audio decoder!\n");
	}


	return 0;
}


int OpenOutPut()
{
	AVStream *pAudioStream = NULL;
	const char *outFileName = "FfmpegAudioTest.mp4";
	avformat_alloc_output_context2(&pFormatCtx_OutputAudio, NULL, NULL, outFileName);


	if (pFormatCtx_InputAudio->streams[0]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
	{
		AVCodecContext *pOutputCodecCtx;
		pAudioStream = avformat_new_stream(pFormatCtx_OutputAudio, NULL);

		AudioIndex_file = 0;

		pCodecEncode_Audio = (AVCodec *)avcodec_find_encoder(pFormatCtx_OutputAudio->oformat->audio_codec);

		pCodecEncodeCtx_Audio = avcodec_alloc_context3(pCodecEncode_Audio);
		if (!pCodecEncodeCtx_Audio) {
			fprintf(stderr, "Could not alloc an encoding context\n");
			exit(1);
		}


		//pCodecEncodeCtx_Audio->codec_id = pFormatCtx_Out->oformat->audio_codec;
		pCodecEncodeCtx_Audio->sample_fmt = pCodecEncode_Audio->sample_fmts ? pCodecEncode_Audio->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
		pCodecEncodeCtx_Audio->bit_rate = 64000;
		pCodecEncodeCtx_Audio->sample_rate = 44100;
		if (pCodecEncode_Audio->supported_samplerates) {
			pCodecEncodeCtx_Audio->sample_rate = pCodecEncode_Audio->supported_samplerates[0];
			for (int i = 0; pCodecEncode_Audio->supported_samplerates[i]; i++) {
				if (pCodecEncode_Audio->supported_samplerates[i] == 44100)
					pCodecEncodeCtx_Audio->sample_rate = 44100;
			}
		}

		pCodecEncodeCtx_Audio->channels = av_get_channel_layout_nb_channels(pCodecEncodeCtx_Audio->channel_layout);
		pCodecEncodeCtx_Audio->channel_layout = AV_CH_LAYOUT_STEREO;
		if (pCodecEncode_Audio->channel_layouts) {
			pCodecEncodeCtx_Audio->channel_layout = pCodecEncode_Audio->channel_layouts[0];
			for (int i = 0; pCodecEncode_Audio->channel_layouts[i]; i++) {
				if (pCodecEncode_Audio->channel_layouts[i] == AV_CH_LAYOUT_STEREO)
					pCodecEncodeCtx_Audio->channel_layout = AV_CH_LAYOUT_STEREO;
			}
		}
		pCodecEncodeCtx_Audio->channels = av_get_channel_layout_nb_channels(pCodecEncodeCtx_Audio->channel_layout);


		AVRational timeBase;
		timeBase.den = pCodecEncodeCtx_Audio->sample_rate;
		timeBase.num = 1;
		pAudioStream->time_base = timeBase;

		if (avcodec_open2(pCodecEncodeCtx_Audio, pCodecEncode_Audio, 0) < 0)
		{
			//编码器打开失败，退出程序
			return -1;
		}
	}

	if (!(pFormatCtx_OutputAudio->oformat->flags & AVFMT_NOFILE))
	{
		if (avio_open(&pFormatCtx_OutputAudio->pb, outFileName, AVIO_FLAG_WRITE) < 0)
		{
			printf("can not open output file handle!\n");
			return -1;
		}
	}

	avcodec_parameters_from_context(pAudioStream->codecpar, pCodecEncodeCtx_Audio);

	if (avformat_write_header(pFormatCtx_OutputAudio, NULL) < 0)
	{
		printf("can not write the header of the output file!\n");
		return -1;
	}

	return 0;
}


int main(int argc, char *argv[])
{
	int ret = 0;


	avdevice_register_all();

	audio_convert_ctx = swr_alloc();


	/* set options */
	av_opt_set_int(audio_convert_ctx, "in_channel_layout", AV_CH_LAYOUT_STEREO, 0);
	av_opt_set_int(audio_convert_ctx, "in_sample_rate", 48000, 0);
	av_opt_set_sample_fmt(audio_convert_ctx, "in_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);

	av_opt_set_int(audio_convert_ctx, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
	av_opt_set_int(audio_convert_ctx, "out_sample_rate", 44100, 0);
	av_opt_set_sample_fmt(audio_convert_ctx, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);

	ret = swr_init(audio_convert_ctx);

	if (OpenAudioFileCapture() < 0)
	{
		return -1;
	}

	if (OpenOutPut() < 0)
	{
		return -1;
	}


	InitializeCriticalSection(&AudioSection);
	InitializeCriticalSection(&AudioResampleSection);

	CreateThread(NULL, 0, AudioFileCapThreadProc, 0, 0, NULL);

	CreateThread(NULL, 0, AudioFileResampleThreadProc, 0, 0, NULL);

	int AudioFrameIndex_mic = 0;

	while (1)
	{
		if (NULL == fifo_audio_inner)
		{
			continue;
		}
		if (NULL == fifo_audio_resample)
		{
			continue;
		}
		if (av_audio_fifo_size(fifo_audio_resample) >=
			(pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024))
		{
			AVFrame *frame_resample = NULL;
			frame_resample = av_frame_alloc();

			frame_resample->nb_samples = pCodecEncodeCtx_Audio->frame_size;
			frame_resample->channel_layout = pCodecEncodeCtx_Audio->channel_layout;
			frame_resample->format = pCodecEncodeCtx_Audio->sample_fmt;
			frame_resample->sample_rate = pCodecEncodeCtx_Audio->sample_rate;
			av_frame_get_buffer(frame_resample, 0);

			EnterCriticalSection(&AudioResampleSection);
			int readcount = av_audio_fifo_read(fifo_audio_resample, (void **)frame_resample->data,
				(pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024));
			LeaveCriticalSection(&AudioResampleSection);

			AVPacket pkt_out_resample = { 0 };
			pkt_out_resample.data = NULL;
			pkt_out_resample.size = 0;

			frame_resample->pts = AudioFrameIndex_mic * pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;

			ret = avcodec_send_frame(pCodecEncodeCtx_Audio, frame_resample);
			ret = avcodec_receive_packet(pCodecEncodeCtx_Audio, &pkt_out_resample);
			if (ret == AVERROR(EAGAIN))
			{
				continue;
			}
			
			av_frame_free(&frame_resample);
			{
				pkt_out_resample.stream_index = AudioIndex_file;
				//pkt_out_mic.pts = AudioFrameIndex_mic * pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;
				//pkt_out_mic.dts = AudioFrameIndex_mic * pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;
				//pkt_out_mic.duration = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size;

				pkt_out_resample.pts = AudioFrameIndex_mic * 1024;
				pkt_out_resample.dts = AudioFrameIndex_mic * 1024;
				pkt_out_resample.duration = 1024;

				int ret2 = av_interleaved_write_frame(pFormatCtx_OutputAudio, &pkt_out_resample);
				av_packet_unref(&pkt_out_resample);
			}
			AudioFrameIndex_mic++;

			if (!bCap)
			{
				if (av_audio_fifo_size(fifo_audio_resample) < 1024)
				{
					break;
				}
			}
		}
	}

	av_write_trailer(pFormatCtx_OutputAudio);

	avio_close(pFormatCtx_OutputAudio->pb);
	avformat_free_context(pFormatCtx_OutputAudio);

	if (pFormatCtx_InputAudio != NULL)
	{
		avformat_close_input(&pFormatCtx_InputAudio);
		pFormatCtx_InputAudio = NULL;
	}
	getchar();

	return 0;
}


DWORD WINAPI AudioFileCapThreadProc(LPVOID lpParam)
{
	AVFrame *pFrame;
	pFrame = av_frame_alloc();

	AVPacket packet = { 0 };
	int ret = 0;
	int iCount = 0;

	while (bCap)
	{
		av_packet_unref(&packet);

		ret = av_read_frame(pFormatCtx_InputAudio, &packet);

		if (ret == AVERROR_EOF)
		{
			bCap = false;
			break;
		}
		iCount++;
		ret = avcodec_send_packet(pReadCodecContext, &packet);
		if (ret >= 0)
		{
			ret = avcodec_receive_frame(pReadCodecContext, pFrame);
			if (ret == AVERROR(EAGAIN))
			{
				break;
			}
			else if (ret == AVERROR_EOF)
			{
				return 0;
			}
			else if (ret < 0) {
				fprintf(stderr, "Error during decoding\n");
				exit(1);
			}

			if (NULL == fifo_audio_inner)
			{
				fifo_audio_inner = av_audio_fifo_alloc((AVSampleFormat)pFormatCtx_InputAudio->streams[0]->codecpar->format, pFormatCtx_InputAudio->streams[0]->codecpar->channels, 3000 * pFrame->nb_samples);
			}

			if (NULL == fifo_audio_resample)
			{
				fifo_audio_resample = av_audio_fifo_alloc((AVSampleFormat)pFormatCtx_OutputAudio->streams[0]->codecpar->format, pFormatCtx_OutputAudio->streams[0]->codecpar->channels, 3000 * pFrame->nb_samples);
			}

			int buf_space = av_audio_fifo_space(fifo_audio_inner);
			if (av_audio_fifo_space(fifo_audio_inner) >= pFrame->nb_samples)
			{
				EnterCriticalSection(&AudioSection);
				ret = av_audio_fifo_write(fifo_audio_inner, (void **)pFrame->data, pFrame->nb_samples);
				LeaveCriticalSection(&AudioSection);
			}

			int samples = av_audio_fifo_size(fifo_audio_inner);

			av_packet_unref(&packet);
		}

	}

	return 0;
}



DWORD WINAPI AudioFileResampleThreadProc(LPVOID lpParam)
{
	AVFrame *pFrame;
	pFrame = av_frame_alloc();

	AVPacket packet = { 0 };
	int ret = 0;
	int iCount = 0;

	while (1)
	{
		if (fifo_audio_inner == NULL)
		{
			continue;
		}

		if (av_audio_fifo_size(fifo_audio_inner) >=
			(pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024))
		{
			AVFrame *frame_mic = NULL;
			frame_mic = av_frame_alloc();

			frame_mic->nb_samples = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024;
			frame_mic->channel_layout = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->channel_layout;
			frame_mic->format = pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->format;
			//frame_mic->sample_rate = pFormatCtx_Out->streams[AudioIndex_mic]->codecpar->sample_rate;
			frame_mic->sample_rate = pFormatCtx_InputAudio->streams[0]->codecpar->sample_rate;
			av_frame_get_buffer(frame_mic, 0);

			EnterCriticalSection(&AudioSection);
			int readcount = av_audio_fifo_read(fifo_audio_inner, (void **)frame_mic->data,
				(pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size > 0 ? pFormatCtx_OutputAudio->streams[AudioIndex_file]->codecpar->frame_size : 1024));
			LeaveCriticalSection(&AudioSection);

			AVFrame *frame_mic_encode = NULL;
			frame_mic_encode = av_frame_alloc();

			//int iDelaySamples = swr_get_delay(audio_convert_ctx, frame_mic->sample_rate);
			int iDelaySamples = 0;
			//int dst_nb_samples = av_rescale_rnd(iDelaySamples + frame_mic->nb_samples, frame_mic->sample_rate, pCodecEncodeCtx_Audio->sample_rate, AVRounding(1));
			int dst_nb_samples = av_rescale_rnd(iDelaySamples + frame_mic->nb_samples, pCodecEncodeCtx_Audio->sample_rate, frame_mic->sample_rate, AV_ROUND_UP);


			frame_mic_encode->nb_samples = pCodecEncodeCtx_Audio->frame_size;
			frame_mic_encode->channel_layout = pCodecEncodeCtx_Audio->channel_layout;
			frame_mic_encode->format = pCodecEncodeCtx_Audio->sample_fmt;
			frame_mic_encode->sample_rate = pCodecEncodeCtx_Audio->sample_rate;
			av_frame_get_buffer(frame_mic_encode, 0);


			//uint8_t *audio_buf = NULL;
			uint8_t *audio_buf[2] = { 0 };
			audio_buf[0] = (uint8_t *)frame_mic_encode->data[0];
			audio_buf[1] = (uint8_t *)frame_mic_encode->data[1];

			int nb = swr_convert(audio_convert_ctx, audio_buf, dst_nb_samples, (const uint8_t**)frame_mic->data, frame_mic->nb_samples);

			

			//if (av_audio_fifo_space(fifo_audio_resample) >= pFrame->nb_samples)
			{
				EnterCriticalSection(&AudioResampleSection);
				ret = av_audio_fifo_write(fifo_audio_resample, (void **)frame_mic_encode->data, dst_nb_samples);
				LeaveCriticalSection(&AudioResampleSection);
			}

			if (!bCap)
			{
				if (av_audio_fifo_size(fifo_audio_inner) < 1024)
				{
					break;
				}
			}

		}

	}

	return 0;
}