ffmpeg录制桌面视频和系统内部声音(音视频同步)

最新推荐文章于 2025-10-22 09:46:51 发布

原创

最新推荐文章于 2025-10-22 09:46:51 发布 · 3.5k 阅读

12 ·

CC 4.0 BY-SA版权

文章标签：

#音视频

本文介绍使用FFmpeg录制电脑内部声音的方法，包括安装所需软件screencapturerecorder，利用虚拟音频捕获设备录制系统声音，以及通过代码实现音视频同步录制的技术细节。

本文抓取的是电脑内部声音，需要先安装软件screen capture recorder，这个软件大小有50M，太大，安装后，里面有一个脚本文件，如下所示：
在这里插入图片描述
打开这个文件，可以看到如下内容：

这个文件比较小，只有59k，对其进行regsvr32后，用ffmpeg -list_devices true -f dshow -i dummy查看支持的设备列表，会发现多个virtual-audio-capturer。所以大家以后需要做产品录制系统声音时，只需要将这个dll拿出来，安装包里面注册下就行，不需要额外安装screen capture recorder。

本人缩写的音视频抓取和同步的比较多了，这地方单列出来，出于以下原因：
1.前面的代码写的比较粗糙，变量命名不规范
2.音视频同步时，会进行抓取的音频和视频的pts做比对，确认什么时候写音频，什么时候写视频，而音视频是两个通道，我想要测试下，如果录制2分钟，我先只写视频往文件里面，只将音频写内存队列里面，等2分钟过后，视频已经写完了，再写入内存中的音频入文件，音视频能否是同步的。

故为做参考，本人先写个例子，此处将代码贴下来，然后再此基础上就第二点进行验证。
在这里插入图片描述
这上面工程名字起的不对，应该叫FfmpegVideoInnerAudioTest更准确些，main函数所在文件FfmpegVideoFirstInnerAudioSecondTest的内容如下：

#include "ULinkRecord.h"
#include <stdio.h>
#include <conio.h>


int main()
{
   
   
	ULinkRecord cULinkRecord;

	cULinkRecord.SetRecordPath("E:\\learn\\ffmpeg\\FfmpegTest\\x64\\Release");

	RECT rect;
	rect.left = 0;
	rect.top = 0;
	rect.right = 1920;
	rect.bottom = 1080;

	cULinkRecord.SetRecordRect(rect);

	cULinkRecord.StartRecord();

	Sleep(120000);

	printf("begin StopRecord\n");
	cULinkRecord.StopRecord();
	printf("end StopRecord\n");
	return 0;
}

ULinkRecord.h的内容如下：

#pragma once

#include <string>
#include <Windows.h>

#ifdef	__cplusplus
extern "C"
{
   
   
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avutil.h"
#include "libavutil/fifo.h"
#include "libavutil/frame.h"
#include "libavutil/imgutils.h"

#include "libavfilter/avfilter.h"
#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"


#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")


#ifdef __cplusplus
};
#endif

class ULinkRecord
{
   
   
public:
	ULinkRecord();
	~ULinkRecord();
public:
	void SetRecordPath(const char* pRecordPath);
	void SetRecordRect(RECT rectRecord);
	int StartRecord();
	void StopRecord();
	void SetMute(bool bMute);
private:
	int OpenAudioInnerCapture();
	int OpenOutPut();
	void Clear();
private:
	static DWORD WINAPI AudioInnerCaptureProc(LPVOID lpParam);
	void AudioInnerCapture();

	static DWORD WINAPI AudioInnerResampleProc(LPVOID lpParam);
	void AudioInnerResample();

	static DWORD WINAPI ScreenCaptureProc(LPVOID lpParam);
	void ScreenCapture();

	static DWORD WINAPI ScreenAudioMixProc(LPVOID lpParam);
	void ScreenAudioMix();
private:
	std::string m_strRecordPath;
	std::string m_strFilePrefix;
private:
	CRITICAL_SECTION m_csVideoSection;
	CRITICAL_SECTION m_csAudioInnerSection;
	CRITICAL_SECTION m_csAudioInnerResampleSection;
	CRITICAL_SECTION m_csAudioMicSection;
	CRITICAL_SECTION m_csAudioMixSection;

	AVFifoBuffer *m_pVideoFifo = NULL;
	AVAudioFifo *m_pAudioInnerFifo = NULL;
	AVAudioFifo *m_pAudioInnerResampleFifo = NULL;

	AVFormatContext *m_pFormatCtx_Out = NULL;
	AVFormatContext	*m_pFormatCtx_AudioInner = NULL;

	AVCodecContext *m_pReadCodecCtx_AudioInner = NULL;
	AVCodecContext *m_pReadCodecCtx_AudioMic = NULL;
	AVCodec *m_pReadCodec_Video = NULL;

	AVCodecContext	*m_pCodecEncodeCtx_Video = NULL;
	AVCodecContext	*m_pCodecEncodeCtx_Audio = NULL;
	AVCodec			*m_pCodecEncode_Audio = NULL;

	SwrContext *m_pAudioInnerResampleCtx = NULL;
	SwrContext *m_pAudioConvertCtx = NULL;

	int m_iVideoStreamIndex = 0;
	int m_iAudioStreamIndex = 0;
	bool m_bRecord = false;
	bool m_bMute = true;

	HANDLE m_hAudioInnerCapture = NULL;
	HANDLE m_hAudioInnerResample = NULL;
	HANDLE m_hScreenCapture = NULL;
	HANDLE m_hScreenAudioMix = NULL;

	int m_iYuv420FrameSize = 0;

	int m_iRecordPosX = 0;
	int m_iRecordPosY = 0;
	int m_iRecordWidth = 0;
	int m_iRecordHeight = 0;

	int m_iFrameNumber = 0;
};

ULinkRecord.cpp的内容如下：

#include "ULinkRecord.h"
#include "log/log.h"
#include "appfun/appfun.h"

#include "CaptureScreen.h"



typedef struct BufferSourceContext {
   
   
	const AVClass    *bscclass;
	AVFifoBuffer     *fifo;
	AVRational        time_base;     ///< time_base to set in the output link
	AVRational        frame_rate;    ///< frame_rate to set in the output link
	unsigned          nb_failed_requests;
	unsigned          warning_limit;

	/* video only */
	int               w, h;
	enum AVPixelFormat  pix_fmt;
	AVRational        pixel_aspect;
	char              *sws_param;

	AVBufferRef *hw_frames_ctx;

	/* audio only */
	int sample_rate;
	enum AVSampleFormat sample_fmt;
	int channels;
	uint64_t channel_layout;
	char    *channel_layout_str;

	int got_format_from_params;
	int eof;
} BufferSourceContext;


static char *dup_wchar_to_utf8(const wchar_t *w)
{
   
   
	char *s = NULL;
	int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
	s = (char *)av_malloc(l);
	if (s)
		WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
	return s;
}


/* just pick the highest supported samplerate */
static int select_sample_rate(const AVCodec *codec)
{
   
   
	const int *p;
	int best_samplerate = 0;

	if (!codec->supported_samplerates)
		return 44100;

	p = codec->supported_samplerates;
	while (*p) {
   
   
		if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate))
			best_samplerate = *p;
		p++;
	}
	return best_samplerate;
}




/* select layout with the highest channel count */
static int select_channel_layout(const AVCodec *codec)
{
   
   
	const uint64_t *p;
	uint64_t best_ch_layout = 0;
	int best_nb_channels = 0;

	if (!codec->channel_layouts)
		return AV_CH_LAYOUT_STEREO;

	p = codec->channel_layouts;
	while (*p) {
   
   
		int nb_channels = av_get_channel_layout_nb_channels(*p);

		if (nb_channels > best_nb_channels) {
   
   
			best_ch_layout = *p;
			best_nb_channels = nb_channels;
		}
		p++;
	}
	return best_ch_layout;
}


unsigned char clip_value(unsigned char x, unsigned char min_val, unsigned char  max_val) {
   
   
	if (x > max_val) {
   
   
		return max_val;
	}
	else if (x < min_val) {
   
   
		return min_val;
	}
	else {
   
   
		return x;
	}
}

//RGB to YUV420
bool RGB24_TO_YUV420(unsigned char *RgbBuf, int w, int h, unsigned char *yuvBuf)
{
   
   
	unsigned char*ptrY, *ptrU, *ptrV, *ptrRGB;
	memset(yuvBuf, 0, w*h * 3 / 2);
	ptrY = yuvBuf;
	ptrU = yuvBuf + w * h;
	ptrV = ptrU + (w*h * 1 / 4);
	unsigned char y, u, v, r, g, b;
	for (int j = h - 1; j >= 0; j--) {
   
   
		ptrRGB = RgbBuf + w * j * 3;
		for (int i = 0; i < w; i++) {
   
   

			b = *(ptrRGB++);
			g = *(ptrRGB++);
			r = *(ptrRGB++);


			y = (unsigned char)((66 * r + 129 * g + 25 * b + 128) >> 8) + 16;
			u = (unsigned char)((-38 * r - 74 * g + 112 * b + 128) >> 8) + 128;
			v = (unsigned char)((

最低0.47元/天解锁文章

5 条评论

会说书的铲子 2022.10.08
大佬这个如何只抓取某个窗口呢求解答
- 会说书的铲子回复tusong86 2022.10.09
  好的，谢谢大佬
- tusong86回复会说书的铲子 2022.10.08
  这个由hwnd得到hdc，然后由hdc创建位图，网上比较多，你找下

会说书的铲子 2022.09.27
大佬，我看您的这个工程就是使用自己抓屏来录屏，然后我就运行了这个工程看看能不能解决我之前遇到的那个问题，在原本的10帧率是正常的，但是我修改为25帧率了之后，就只能录一半时长，我追踪了下录屏里面frame的抓取，发现无论我修改为几帧，screenCapture线程都只能抓到一秒10帧。我通过修改 460行的frameRate为25 以及 screenCapture里面延时计算为*40，最后录出来视频是25帧，但程序运行的时候只能抓10帧左右。因为我自己编写过只有录屏的来测试，发现我电脑是可以抓到25帧率的，但这个项目不行，是我的修改不对还是说确实是因为多线程的原因导致录屏线程无法抓到足够的帧率呢？