ffmpeg录制桌面视频和麦克风音频(音视频同步)

最新推荐文章于 2025-10-07 10:14:17 发布

原创

最新推荐文章于 2025-10-07 10:14:17 发布 · 2.4k 阅读

16 ·

CC 4.0 BY-SA版权

文章标签：

#ffmpeg

本文介绍使用FFmpeg实现音视频同步录制的方法，通过整合桌面录制和麦克风录音功能，实现高质量的音视频同步输出。文中详细介绍了如何配置FFmpeg版本、设置参数及编码格式等关键步骤。

vs版本:2017
ffmpeg版本号：
ffmpeg version N-102642-g864d1ef2fc Copyright © 2000-2021 the FFmpeg developers
built with gcc 8.1.0 (x86_64-win32-seh-rev0, Built by MinGW-W64 project)
configuration: --arch=x86_64 --prefix=/home/ffmpeg_static_x64 --disable-debug
libavutil 57. 0.100 / 57. 0.100
libavcodec 59. 1.100 / 59. 1.100
libavformat 59. 2.101 / 59. 2.101
libavdevice 59. 0.100 / 59. 0.100
libavfilter 8. 0.101 / 8. 0.101
libswscale 6. 0.100 / 6. 0.100
libswresample 4. 0.100 / 4. 0.100

关于ffmpeg的lib和dll，本人在csdn上上传了相关资源，并且免费下载。

本人之前写过ffmpeg录制麦克风声音和ffmpeg录制桌面(队列方式)

现在将两者合入到一起，实现音视频同步，完整代码下面有给出，这里不做赘述，关于代码的理解，读者可以参看我前两篇博客。

半年前，我在写音视频同步的时候，参考过其他人的博客，其中最重要的是ffmpeg实现录屏+录音
，结果短时期未能成功，继而做其他事情去了。

如今半年过去，当我重新开始学ffmpeg时，我首先参考的是ffmpeg本身的demo，位于doc/examples文件夹下面，其中有个muxing.c，这个是关于音视频混合的例子，比较简单，容易懂，而半年前看别人的博客，对应的ffmpeg的版本比较旧，旧版本的ffmpeg里面有很多不建议的变量和方法，称之为deprecated，学习复杂度会高。新版本将deprecated的函数和变量去掉了，减少了学习的复杂性。

将新版本ffmpeg里面的demo看完后，然后再看ffmpeg实现录屏+录音，就会觉得帮助不小，多说一句，对于视频和音频的平面(planar)和打包(packed)模式，建议大家先了解下，对了解音视频至关重要。

// FfmpegAudioTest.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//

#include <Windows.h>
#include <conio.h>

#ifdef	__cplusplus
extern "C"
{
   
   
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/imgutils.h"

#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")

	//#pragma comment(lib, "avfilter.lib")
	//#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif




AVFormatContext	*pFormatCtx_Audio = NULL, *pFormatCtx_Out = NULL;
AVCodecContext *pReadCodecContext_Audio = NULL;

AVFormatContext	*pFormatCtx_Video = NULL;
AVCodecContext	*pReadCodecCtx_Video = NULL;
AVCodec			*pReadCodec_Video = NULL;

int VideoIndex = 0;
int AudioIndex_mic = 0;

AVCodecContext	*pCodecEncodeCtx_Audio = NULL;
AVCodec			*pCodecEncode_Audio = NULL;

AVCodecContext	*pCodecEncodeCtx_Video = NULL;
AVCodec			*pCodecEncode_Video = NULL;


AVFifoBuffer	*fifo_video = NULL;
AVAudioFifo		*fifo_audio_mic = NULL;

SwrContext *audio_convert_ctx = NULL;
SwsContext *img_convert_ctx = NULL;
int frame_size = 0;

uint8_t *picture_buf = NULL, *frame_buf = NULL;

int iPicCount = 0;

int64_t cur_pts_v = 0;
int64_t cur_pts_a = 0;


CRITICAL_SECTION VideoSection;
CRITICAL_SECTION AudioSection;

DWORD WINAPI AudioMicCapThreadProc(LPVOID lpParam);
DWORD WINAPI ScreenCapThreadProc(LPVOID lpParam);

static char *dup_wchar_to_utf8(const wchar_t *w)
{
   
   
	char *s = NULL;
	int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
	s = (char *)av_malloc(l);
	if (s)
		WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
	return s;
}


/* just pick the highest supported samplerate */
static int select_sample_rate(const AVCodec *codec)
{
   
   
	const int *p;
	int best_samplerate = 0;

	if (!codec->supported_samplerates)
		return 44100;

	p = codec->supported_samplerates;
	while (*p) {
   
   
		if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate))
			best_samplerate = *p;
		p++;
	}
	return best_samplerate;
}




/* select layout with the highest channel count */
static int select_channel_layout(const AVCodec *codec)
{
   
   
	const uint64_t *p;
	uint64_t best_ch_layout = 0;
	int best_nb_channels = 0;

	if (!codec->channel_layouts)
		return AV_CH_LAYOUT_STEREO;

	p = codec->channel_layouts;
	while (*p) {
   
   
		int nb_channels = av_get_channel_layout_nb_channels(*p);

		if (nb_channels > best_nb_channels) {
   
   
			best_ch_layout = *p;
			best_nb_channels = nb_channels;
		}
		p++;
	}
	return best_ch_layout;
}


int OpenVideoCapture()
{
   
   
	const AVInputFormat *ifmt = av_find_input_format("gdigrab");
	//这里可以加参数打开，例如可以指定采集帧率
	AVDictionary *options = NULL;
	av_dict_set(&options, "framerate", "25", NULL);
	av_dict_set(&options, "probesize", "50000000", NULL);
	//av_dict_set(&options,"offset_x","20",0);
	//The distance from the top edge of the screen or desktop
	//av_dict_set(&options,"offset_y","40",0);
	//Video frame size. The default is to capture the full screen
	//av_dict_set(&options,"video_size","320x240",0);
	if (avformat_open_input(&pFormatCtx_Video, "desktop", ifmt, &options) != 0)
	{
   
   
		printf("Couldn't open input stream.（无法打开视频输入流）\n");
		return -

最低0.47元/天解锁文章

8 条评论

会说书的铲子 2022.09.25
博主您好，我将原代码中main改成muxing线程之后，在mfc按钮中启动muxing线程，然后会出现录制60秒但视频只剩下40秒的情况，结果测试其他时间，都会只剩下3分之二。想问问这个问题会是因为我多写了一个线程出来吗？
- 会说书的铲子回复tusong86 2022.09.26
  好的大佬，我已经在学习了，感谢大佬的回答
- tusong86回复会说书的铲子 2022.09.26
  目前用ffmpeg自带的gdigrab抓图确实有问题，所以不要用ffmpeg里面自带的gdigrab抓图，尝试自己用gdi抓图，本人其他博客里面也有如果自己调用操作系统gdi抓图，这样可以控制节奏
- 会说书的铲子回复会说书的铲子 2022.09.25
  后面发现就算不将原代码中的main改成线程也会出现这个情况。我将原代码的main写成muxing函数，然后在另一个cpp的main函数中调用，还是会出现只剩下3分之二的情况，这是为啥勒

arible 2022.09.20
博主有测试过吗，我这边测试效果是有点快进的感觉
- arible回复tusong86 2022.09.27
  感谢大佬，已经看过那篇博客了，录制成功！
- tusong86回复arible 2022.09.20
  这个博客上面用的是ffmpeg里面自带的gdigrab抓图，抓图的速度不好控制，会产生速度问题；这个问题，后面有解决过，自己gdi抓图就行，不要用ffmpeg自带的抓；我有写相关的博客，你可以找到