之前写过ffmpeg录制麦克风声音和pc内部声音(如播放歌曲)—混音,现在再进一步,混音后,再和采集到的桌面视频合在一处。
现在说说大致思路,如下所示,创建了四个线程
HANDLE hThreadAudio = CreateThread(NULL, 0, AudioCapThreadProc, 0, 0, NULL);
HANDLE hThreadAudioMic = CreateThread(NULL, 0, AudioMicCapThreadProc, 0, 0, NULL);
HANDLE hThreadAudioMix = CreateThread(NULL, 0, AudioMixThreadProc, 0, 0, NULL);
HANDLE hThreadVideo = CreateThread(NULL, 0, ScreenCapThreadProc, 0, 0, NULL);
其中线程hThreadAudio代表的是本地pc内部声音采集,线程hThreadAudioMic代表的是本地麦克风声音采集。线程hThreadAudioMix专门用于上面两路音频混音,线程hThreadVideo用于抓取桌面视频,然后主线程对混音后的音频数据和视频数据进行合并。
接着我们看如下四个队列变量:
AVFifoBuffer *fifo_video = NULL;
AVAudioFifo *fifo_audio = NULL;
AVAudioFifo *fifo_audio_mic = NULL;
///fifo_audio_mix是两路音频混音后,声音所要塞入的队列,便于后续做音视频同步
AVAudioFifo *fifo_audio_mix = NULL;
其中fifo_video是线程hThreadVideo抓取的视频数据所入的队列;fifo_audio是线程hThreadAudio抓取的电脑内部音频数据所入的队列;fifo_audio_mic是线程hThreadAudioMic抓取的电脑麦克风音频数据所入的队列,fifo_audio_mix是电脑内部音频和麦克风音频数据合并后,缩入的队列。
主线程负责从fifo_video取视频数据,从fifo_audio_mix取混音后的音频数据,然后写入文件。
音频格式上,hThreadAudio和hThreadAudioMic采集到的都是AV_SAMPLE_FMT_S16格式,故混音时,采取的格式也是AV_SAMPLE_FMT_S16。最后写文件的时候,需要转换成AV_SAMPLE_FMT_FLTP格式。
下面是完整代码:
// FfmpegAudioTest.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//
#include <Windows.h>
#include <conio.h>
#ifdef __cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avutil.h"
#include "libavutil/fifo.h"
#include "libavutil/frame.h"
#include "libavutil/imgutils.h"
#include "libavfilter/avfilter.h"
#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#include "SDL.h"
#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
//#pragma comment(lib, "avfilter.lib")
//#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#pragma comment(lib, "SDL2.lib")
#ifdef __cplusplus
};
#endif
AVFormatContext *pFormatCtx_Audio = NULL;
AVFormatContext *pFormatCtx_AudioMic = NULL;
AVFormatContext *pFormatCtx_Out = NULL;
AVFormatContext *pFormatCtx_Video = NULL;
AVCodecContext *pReadCodecCtx_Video = NULL;
AVCodecContext *pReadCodecCtx_Audio = NULL;
AVCodecContext *pReadCodecCtx_AudioMic = NULL;
AVCodec *pReadCodec_Video = NULL;
int iVideoStreamIndex = 0;
int iAudioStreamIndex = 1;
AVCodecContext *pCodecEncodeCtx_Video = NULL;
AVCodecContext *pCodecEncodeCtx_Audio = NULL;
AVCodec *pCodecEncode_Audio = NULL;
AVFifoBuffer *fifo_video = NULL;
AVAudioFifo *fifo_audio = NULL;
AVAudioFifo *fifo_audio_mic = NULL;
///fifo_audio_mix是两路音频混音后,声音所要塞入的队列,便于后续做音视频同步
AVAudioFifo *fifo_audio_mix = NULL;
SwsContext *img_convert_ctx = NULL;
SwrContext *audio_convert_ctx = NULL;
uint8_t *picture_buf = NULL, *frame_buf = NULL;
bool bCap = true;
int AudioFrameIndex_mix = 0;
int64_t cur_pts_v = 0;
int64_t cur_pts_a = 0;
int yuv420_frame_size = 0;
AVFilterGraph* _filter_graph = NULL;
AVFilterContext* _filter_ctx_src_inner = NULL;
AVFilterContext* _filter_ctx_src_mic = NULL;
AVFilterContext* _filter_ctx_sink = NULL;
CRITICAL_SECTION VideoSection;
CRITICAL_SECTION AudioSection;
CRITICAL_SECTION AudioSection_mic;
CRITICAL_SECTION AudioSection_mix;
DWORD WINAPI AudioCapThreadProc(LPVOID lpParam);
DWORD WINAPI AudioMicCapThreadProc(LPVOID lpParam);
DWORD WINAPI AudioMixThreadProc(LPVOID lpParam);
DWORD WINAPI ScreenCapThreadProc(LPVOID lpParam);
typedef struct BufferSourceContext {
const AVClass *bscclass;
AVFifoBuffer *fifo;
AVRational time_base; ///< time_base to set in the output link
AVRational frame_rate; ///< frame_rate to set in the output link
unsigned nb_failed_requests;
unsigned warning_limit;
/* video only */
int w, h;
enum AVPixelFormat pix_fmt;
AVRational pixel_aspect;
char *sws_param;
AVBufferRef *hw_frames_ctx;
/* audio only */
int sample_rate;
enum AVSampleFormat sample_fmt;
int channels;
uint64_t channel_layout;
char *channel_layout_str;
int got_format_from_params;
int eof;
} BufferSourceContext;
static char *dup_wchar_to_utf8(const wchar_t *w)
{
char *s = NULL;
int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
s = (char *)av_malloc(l);
if (s)
WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
return s;
}
/* just pick the highest supported samplerate */
static int select_sample_rate(const AVCodec *codec)
{
const int *p;
int best_samplerate = 0;
if (!codec->supported_samplerates)
return 44100;
p = codec->supported_samplerates;
while (*p) {
if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate))
best_samplerate = *p;
p++;
}
return best_samplerate;
}
/* select layout with the highest channel count */
static int select_channel_layout(const AVCodec *codec)
{
const uint64_t *p;
uint64_t best_ch_layout = 0;
int best_nb_channels = 0;
if (!codec->channel_layouts)
return AV_CH_LAYOUT_STEREO;
p = codec->channel_layouts;
while (*p) {
int nb_channels = av_get_channel_layout_nb_channels(*p);
if (nb_channels > best_nb_channels) {
best_ch_layout = *p;
best_nb_channels = nb_channels;
}
p++;
}
return best_ch_layout;
}
int InitFilter(const char* filter_desc)
{
char args_inner[512];
const char* pad_name_inner = "in0";
char args_mic[512];
const char* pad_name_mic = "in1";
AVFilter* filter_src_spk = (AVFilter *)avfilter_get_by_name("abuffer");
AVFilter* filter_src_mic = (AVFilter *)avfilter_get_by_name("abuffer");
AVFilter* filter_sink = (AVFilter *)avfilter_get_by_name("abuffersink");
AVFilterInOut* filter_output_inner = avfilter_inout_alloc();
AVFilterInOut* filter_output_mic = avfilter_inout_alloc();
AVFilterInOut* filter_input = avfilter_inout_alloc();
_filter_graph = avfilter_graph_alloc();
sprintf_s(args_inner, sizeof(args_inner), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x",
pReadCodecCtx_Audio->time_base.num,
pReadCodecCtx_Audio->time_base.den,
pReadCodecCtx_Audio->sample_rate,
av_get_sample_fmt_name((AVSampleFormat)pReadCodecCtx_Audio->sample_fmt),
pReadCodecCtx_Audio->channel_layout);
sprintf_s(args_mic, sizeof(args_mic), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x",
pReadCodecCtx_AudioMic->time_base.num,
pReadCodecCtx_AudioMic->time_base.den,
pReadCodecCtx_AudioMic->sample_rate,
av_get_sample_fmt_name((AVSampleFormat)pReadCodecCtx_AudioMic->sample_fmt),
pReadCodecCtx_AudioMic->channel_layout);
//sprintf_s(args_spk, sizeof(args_spk), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x", _fmt_ctx_out->streams[_index_a_out]->codec->time_base.num, _fmt_ctx_out->streams[_index_a_out]->codec->time_base.den, _fmt_ctx_out->streams[_index_a_out]->codec->sample_rate, av_get_sample_fmt_name(_fmt_ctx_out->streams[_index_a_out]->codec->sample_fmt), _fmt_ctx_out->streams[_index_a_out]->codec->channel_layout);
//sprintf_s(args_mic, sizeof(args_mic), "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%I64x", _fmt_ctx_out->streams[_index_a_out]->codec->time_base.num, _fmt_ctx_out->streams[_index_a_out]->codec->time_base.den, _fmt_ctx_out->streams[_index_a_out]->codec->sample_rate, av_get_sample_fmt_name(_fmt_ctx_out->streams[_index_a_out]->codec->sample_fmt), _fmt_ctx_out->streams[_index_a_out]->codec->channel_layout);
int ret = 0;
ret = avfilter_graph_create_filter(&_filter_ctx_src_inner, filter_src_spk, pad_name_inner, args_inner, NULL, _filter_graph);
if (ret

最低0.47元/天 解锁文章
1708

被折叠的 条评论
为什么被折叠?



