ffmpeg 音频解码二

最新推荐文章于 2025-03-20 15:49:32 发布

lhuann_

最新推荐文章于 2025-03-20 15:49:32 发布

阅读量1.1k

点赞数 2

分类专栏：音视频文章标签：音视频

本文链接：https://blog.youkuaiyun.com/qq_35543489/article/details/121176263

版权

音视频专栏收录该内容

10 篇文章

订阅专栏

1. ffmpeg 视频解码一
 2. ffmpeg 视频解码二
 3. ffmpeg 音频解码一
 4. ffmpeg 音频解码二
 5. ffmpeg 音视频解码
 6. ffmpeg 视频编码一
 7. ffmpeg 视频编码一（精简版）
8. ffmpeg 视频编码二（基于 libswscale 转换视频）
9. ffmpeg 过滤器libavfilter的使用
 10. ffmpeg 视频编码三（基于 libavfilter 转换视频）

前言

这是ffmpeg的音频解码的第二篇，这篇主要实现一下使用ffmpeg的API实现解码，不使用和上篇的解析器做解析。

音频的基本信息这里不概述，具体可以参照 ffmpeg 音频解码一

流程图

在这里插入图片描述
代码流程即如流程图所示，下面讲解一下当中部分函数的作用）。

avformat_open_input
打开输入文件，并读取文件头相关信息
avformat_find_stream_info
读取媒体文件信息。
av_find_best_stream
获取音频流序号（因为文件当中可能既有音频也有视频，字幕等流，我们这里使用这个函数获取视频流的序号）。
avcodec_find_decoder
获取解码器
avcodec_parameters_to_context
我们自己构建的解码器并没有设置一些解码相关的参数，此时我们拷贝音频流的参数到里面即可。
av_read_frame
从视频当中读取数据（一帧），不用和上篇一样，还需要我们使用解析器解析成一帧。
avcodec_send_packet
发送我们刚刚得到的解析数据到解码器做解码。
avcodec_receive_frame
获取解码之后的数据。

源码


#pragma once
#define __STDC_CONSTANT_MACROS
#define _CRT_SECURE_NO_WARNINGS

extern "C"
{
#include <libavformat/avformat.h>
#include "libavcodec/avcodec.h"
}


using namespace std;

#define INPUT_FILE_NAME "lh_online.aac"
#define OUTPUT_FILE_NAME "lh_online.pcm"


static int get_format_from_sample_fmt(const char** fmt,
	enum AVSampleFormat sample_fmt)
{
	int i;
	struct sample_fmt_entry {
		enum AVSampleFormat sample_fmt; const char* fmt_be, * fmt_le;
	} sample_fmt_entries[] = {
		{ AV_SAMPLE_FMT_U8,  "u8",    "u8"    },
		{ AV_SAMPLE_FMT_S16, "s16be", "s16le" },
		{ AV_SAMPLE_FMT_S32, "s32be", "s32le" },
		{ AV_SAMPLE_FMT_FLT, "f32be", "f32le" },
		{ AV_SAMPLE_FMT_DBL, "f64be", "f64le" },
	};
	*fmt = NULL;

	for (i = 0; i < FF_ARRAY_ELEMS(sample_fmt_entries); i++) {
		struct sample_fmt_entry* entry = &sample_fmt_entries[i];
		if (sample_fmt == entry->sample_fmt) {
			*fmt = AV_NE(entry->fmt_be, entry->fmt_le);
			return 0;
		}
	}

	av_log(NULL, AV_LOG_ERROR, "sample format %s is not supported as output format\n", av_get_sample_fmt_name(sample_fmt));
	return -1;
}

static void decode(AVCodecContext* dec_ctx, AVFrame* frame, AVPacket* pkt,
	FILE* ofile)
{
	int i, ch;
	int ret, data_size;
	ret = avcodec_send_packet(dec_ctx, pkt);
	if (ret < 0) {
		av_log(NULL, AV_LOG_ERROR, "Error sending a packet for decoding。\n");
		exit(1);
	}

	while (ret >= 0) {
		ret = avcodec_receive_frame(dec_ctx, frame);
		if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
			return;
		else if (ret < 0) {
			av_log(NULL, AV_LOG_ERROR, "Error receive a frame for decoding.\n");
			exit(1);
		}

		printf("frame_number: %d \n", dec_ctx->frame_number);
		//获取每个采样点当中每个声道的大小
		data_size = av_get_bytes_per_sample(dec_ctx->sample_fmt);
		if (data_size < 0) {
			av_log(NULL, AV_LOG_ERROR, "Failed to calculate data size.\n");
			exit(1);
		}
		//遍历采样点
		for (i = 0; i < frame->nb_samples; i++) {
			//遍历声道
			for (ch = 0; ch < dec_ctx->channels; ch++) {
				fwrite(frame->data[ch] + data_size * i, 1, data_size, ofile);
			}
		}
	}
}

int main(int argc, char* argv[])
{
	const AVCodec* codec;
	AVFormatContext* fmt_ctx = NULL;
	AVCodecContext* c = NULL;
	AVStream* st;
	AVFrame* frame;
	AVPacket* pkt;
	int ret;
	int stream_index;
	FILE* ofile;
	enum AVSampleFormat sfmt;
	const char* fmt;

	//打开输入文件，并为fmt_ctx分配空间
	if (avformat_open_input(&fmt_ctx, INPUT_FILE_NAME, NULL, NULL)) {
		av_log(NULL, AV_LOG_ERROR, "Codec not open source file.\n");
		exit(1);
	}

	//获取流信息
	if (avformat_find_stream_info(fmt_ctx, NULL) < 0) {
		av_log(NULL, AV_LOG_ERROR, "Could not find stream information.\n");
		exit(1);
	}

	//获取视频流序号（这里我们明确要解码的是视频，也只处理音频）
	stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
	if (stream_index < 0) {
		av_log(NULL, AV_LOG_ERROR, "Cannot find stream index\n");
		exit(1);
	}

	//获取文件流
	st = fmt_ctx->streams[stream_index];

	//获取解码器（这里不需要我们显示的指定了）
	codec = avcodec_find_decoder(st->codecpar->codec_id);
	if (!codec) {
		av_log(NULL, AV_LOG_ERROR, "Codec not found.\n");
		exit(1);
	}

	//分配解析器上下文
	c = avcodec_alloc_context3(codec);
	if (!c) {
		av_log(NULL, AV_LOG_ERROR, "Could not allocate video codec context.\n");
		exit(1);
	}

	//把输入流的编解码参数复制到我们的解码器上
	if (avcodec_parameters_to_context(c, st->codecpar) < 0) {
		av_log(NULL, AV_LOG_ERROR, "Failed to copy %s codec parameters to decoder context\n");
		exit(1);
	}

	//打开解码器
	if (avcodec_open2(c, codec, NULL) < 0) {
		av_log(NULL, AV_LOG_ERROR, "Could not open codec.\n");
		exit(1);
	}

	//分配AVPacket
	pkt = av_packet_alloc();
	if (!pkt) {
		exit(1);
	}

	//分配AVFrame
	frame = av_frame_alloc();
	if (!frame) {
		exit(1);
	}

	//打开输出文件
	ofile = fopen(OUTPUT_FILE_NAME, "wb+");
	if (!ofile) {
		av_log(NULL, AV_LOG_ERROR, "Could not open \s.\n", OUTPUT_FILE_NAME);
		exit(1);
	}


	//从文件读取帧
	while (av_read_frame(fmt_ctx, pkt) >= 0) {
		//只处理音频流
		if (pkt->stream_index == stream_index) {
			decode(c, frame, pkt, ofile);
		}
		av_packet_unref(pkt);
	}

	//flush 解码器
	decode(c, frame, NULL, ofile);

	//此时就已经解码完了，我们稍后使用ffplay播放下音频
	//解码出来的pcm数据是没有这些基础数据的，我们需要从元数据获取
	//打印下基本信息
	//声道数
	printf("channels: %d \n", c->channels);
	//采样率
	printf("sample_rate: %d  \n", c->sample_rate);
	//一帧音频所占字节代销
	printf("buffer: %d  \n", av_samples_get_buffer_size(NULL, c->channels, c->frame_size, c->sample_fmt, 1));
	//采样格式
	sfmt = c->sample_fmt;
	printf("sample_fmt: %s  \n", av_get_sample_fmt_name(sfmt));
	//如果为planar，转换为packed格式
	if (av_sample_fmt_is_planar(sfmt)) {
		const char* packed = av_get_sample_fmt_name(sfmt);
		sfmt = av_get_packed_sample_fmt(sfmt);
	}
	if (get_format_from_sample_fmt(&fmt, sfmt) < 0) {
		av_log(NULL, AV_LOG_ERROR, "Could not get forma \s.\n", av_get_sample_fmt_name(sfmt));
		exit(1);
	}

	//打印播放命令
	printf("Play the output audio file with the command:\n"
		"ffplay -f %s -ac %d -ar %d %s\n",
		fmt, c->channels, c->sample_rate, OUTPUT_FILE_NAME);

	//资源释放
	fclose(ofile);

	avcodec_free_context(&c);
	av_frame_free(&frame);
	av_packet_free(&pkt);

	return 0;
}

执行过程中的一些信息如下：
在这里插入图片描述
可见待解码文件是一个有2个声道，采样率为44100HZ，采样格式为fltp的文件，共有1478帧。
到此，我们的音频就解码完成了。
接下来使用命令播放我们解码出来的音频试试:

ffplay -f f32le -ac 2 -ar 44100 lh_online.pcm

结果：
在这里插入图片描述
此时你应该听到和aac音频文件一样的声音。
到此，音频解码的部分就结束了。
接下来将写从一个包含音视频的文件拆分音频和视频到单独的文件中，之后是重采样，编码，音视频推流等。