FFmpeg --18-pcm编码为aac

原创已于 2025-12-25 13:34:16 修改 · 1.3k 阅读

9 ·

CC 4.0 BY-SA版权

文章标签：

#pcm #ffmpeg

于 2024-03-14 10:41:01 首次发布

FFmpeg从放弃到入门专栏收录该内容

36 篇文章

订阅专栏

基本概念

从本地⽂件读取PCM数据进⾏AAC格式编码，然后将编码后的AAC数据存储到本地⽂件。

PCM样本格式：未经压缩的⾳频采样数据裸流
参数：

Sample Rate : 采样频率
Sample Size : 量化位数
Number of Channels : 通道个数
Sign : 表示样本数据是否是有符号位
Byte Ordering : 字节序
Integer Or Floating Point : 整形或浮点型

流程

请添加图片描述

功能特性

核心功能

从本地文件读取PCM音频数据
支持多种PCM格式输入（S16LE, F32LE等）
编码输出为AAC格式音频
支持ADTS头部生成

支持的编码器

默认AAC编码器 (aac)
- 输入格式：AV_SAMPLE_FMT_FLTP（平面浮点）
- 需要格式转换
libfdk_aac编码器
- 输入格式：AV_SAMPLE_FMT_S16（有符号16位整数）
- 质量更高的编码器

配置参数

采样率：48000 Hz
声道数：2（立体声）
声道布局：AV_CH_LAYOUT_STEREO
比特率：128 kbps
Profile：AAC Low Complexity

代码结构分析

主要函数

static int check_sample_fmt() // 检查采样格式支持
static int check_sample_rate() // 检查采样率支持
static int check_channel_layout() // 检查声道布局支持
static int encode() // 执行音频帧编码
void f32le_convert_to_fltp() // F32LE到FLTP格式转换
static void get_adts_header() // 生成AAC ADTS头部

关键流程

初始化阶段

解析命令行参数
查找并初始化编码器
配置编码器参数
打开输入输出文件

编码循环

读取PCM数据 → 格式转换 → 填充AVFrame → 编码 → 写入文件

清理阶段

冲刷编码器
释放资源
关闭文件

编码器支持矩阵

编码器	输入PCM格式	输出格式	特点
aac	AV_SAMPLE_FMT_FLTP	原始AAC	FFmpeg内置编码器
libfdk_aac	AV_SAMPLE_FMT_S16	原始AAC	高质量第三方编码器

参数配置详情

音频参数配置

codec_ctx->bit_rate = 128*1024;          // 128kbps固定比特率
codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO;  // 立体声布局
codec_ctx->sample_rate = 48000;          // 48kHz采样率
codec_ctx->channels = 2;                 // 双声道
codec_ctx->profile = FF_PROFILE_AAC_LOW; // LC复杂度档次
codec_ctx->flags = AV_CODEC_FLAG_GLOBAL_HEADER; // 不使用全局头

帧参数配置

frame->nb_samples = codec_ctx->frame_size;    // 每帧采样点数
frame->format = codec_ctx->sample_fmt;        // 采样格式
frame->channel_layout = codec_ctx->channel_layout; // 声道布局

编码器兼容性检查

采样格式检查

static int check_sample_fmt(const AVCodec *codec, enum AVSampleFormat sample_fmt)
{
    const enum AVSampleFormat *p = codec->sample_fmts;
    while (*p != AV_SAMPLE_FMT_NONE) {
        if (*p == sample_fmt) return 1;
        p++;
    }
    return 0;
}

采样率检查

static int check_sample_rate(const AVCodec *codec, const int sample_rate)
{
    const int *p = codec->supported_samplerates;
    while (*p != 0) {
        printf("%s support %dhz\n", codec->name, *p);
        if (*p == sample_rate) return 1;
        p++;
    }
    return 0;
}

音频格式转换

F32LE到FLTP转换

void f32le_convert_to_fltp(float *f32le, float *fltp, int nb_samples) 
{
    float *fltp_l = fltp;                    // 左声道平面
    float *fltp_r = fltp + nb_samples;       // 右声道平面
    
    for(int i = 0; i < nb_samples; i++) {
        fltp_l[i] = f32le[i*2];              // 提取左声道
        fltp_r[i] = f32le[i*2+1];            // 提取右声道
    }
}

格式说明
F32LE: 交错排列的32位浮点 [L,R,L,R,L,R,…]
FLTP: 平面排列的32位浮点 [L,L,L,…] + [R,R,R,…]

ADTS头部生成

static void get_adts_header(AVCodecContext *ctx, uint8_t *adts_header, int aac_length)
{
    // 采样率映射表
    uint8_t freq_idx = 0;
    switch (ctx->sample_rate) {
        case 96000: freq_idx = 0; break;
        case 48000: freq_idx = 3; break;
        case 44100: freq_idx = 4; break;
        // ... 其他采样率
    }
    
    // ADTS头部结构
    adts_header[0] = 0xFF;  // 同步字
    adts_header[1] = 0xF1;  // 同步字 + 版本
    // ... 其他字段填充
}

核心编码循环

static int encode(AVCodecContext *ctx, AVFrame *frame, AVPacket *pkt, FILE *output)
{
    // 发送帧到编码器
    ret = avcodec_send_frame(ctx, frame);
    
    // 接收所有可用编码包
    while (ret >= 0) {
        ret = avcodec_receive_packet(ctx, pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break;
        
        // 写入ADTS头部（如果需要）
        if(ctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
            uint8_t aac_header[7];
            get_adts_header(ctx, aac_header, pkt->size);
            fwrite(aac_header, 1, 7, output);
        }
        
        // 写入AAC数据
        fwrite(pkt->data, 1, pkt->size, output);
    }
    return 0;
}

内存管理与资源分配

资源分配序列

// 1. 分配编码器上下文
codec_ctx = avcodec_alloc_context3(codec);

// 2. 分配帧和包
pkt = av_packet_alloc();
frame = av_frame_alloc();

// 3. 配置帧参数并分配缓冲区
frame->nb_samples = codec_ctx->frame_size;
frame->format = codec_ctx->sample_fmt;
frame->channel_layout = codec_ctx->channel_layout;
ret = av_frame_get_buffer(frame, 0);

// 4. 分配PCM缓冲区
pcm_buf = (uint8_t *)malloc(frame_bytes);
pcm_temp_buf = (uint8_t *)malloc(frame_bytes);

资源释放序列

// 逆序释放所有资源
free(pcm_temp_buf);
free(pcm_buf);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);

错误处理机制

编码状态检查

ret = avcodec_send_frame(ctx, frame);
if (ret < 0) {
    fprintf(stderr, "Error sending the frame to the encoder\n");
    return -1;
}

while (ret >= 0) {
    ret = avcodec_receive_packet(ctx, pkt);
    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
        return 0;  // 正常状态
    } else if (ret < 0) {
        fprintf(stderr, "Error encoding audio frame\n");
        return -1;  // 错误状态
    }
    // 处理编码数据...
}

文件操作检查

infile = fopen(in_pcm_file, "rb");
if (!infile) {
    fprintf(stderr, "Could not open %s\n", in_pcm_file);
    exit(1);
}

性能优化特性

帧重用机制

ret = av_frame_make_writable(frame);
if(ret != 0)
    printf("av_frame_make_writable failed, ret = %d\n", ret);

批量数据处理

// 计算每帧数据量
int frame_bytes = av_get_bytes_per_sample(frame->format) 
                * frame->channels 
                * frame->nb_samples;

内存预分配

// 预分配PCM缓冲区，避免重复分配
uint8_t *pcm_buf = (uint8_t *)malloc(frame_bytes);
uint8_t *pcm_temp_buf = (uint8_t *)malloc(frame_bytes);

code(核心部分)

static int encode(AVCodecContext *ctx, AVFrame *frame, AVPacket *pkt, FILE *output)
{
    int ret;

    /* send the frame for encoding */
    ret = avcodec_send_frame(ctx, frame);
    if (ret < 0) {
        fprintf(stderr, "Error sending the frame to the encoder\n");
        return -1;
    }
    while (ret >= 0) {
        ret = avcodec_receive_packet(ctx, pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
            return 0;
        } else if (ret < 0) {
            fprintf(stderr, "Error encoding audio frame\n");
            return -1;
        }

        size_t len = 0;
        if((ctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) {
            // 需要额外的adts header写入
            uint8_t aac_header[7];
            get_adts_header(ctx, aac_header, pkt->size);
            len = fwrite(aac_header, 1, 7, output);
            if(len != 7) {
                fprintf(stderr, "fwrite aac_header failed\n");
                return -1;
            }
        }
        len = fwrite(pkt->data, 1, pkt->size, output);
        if(len != pkt->size) {
            fprintf(stderr, "fwrite aac data failed\n");
            return -1;
        }
       
    }
    return -1;
}

int main(int argc, char **argv)
{
    char *in_pcm_file = NULL;
    char *out_aac_file = NULL;
    FILE *infile = NULL;
    FILE *outfile = NULL;
    const AVCodec *codec = NULL;
    AVCodecContext *codec_ctx= NULL;
    AVFrame *frame = NULL;
    AVPacket *pkt = NULL;
    int ret = 0;
    int force_codec = 0;     // 强制使用指定的编码
    char *codec_name = NULL;

    in_pcm_file = argv[1];      // 输入PCM文件
    out_aac_file = argv[2];     // 输出的AAC文件

    enum AVCodecID codec_id = AV_CODEC_ID_AAC;

    codec_ctx = avcodec_alloc_context3(codec);
    if (!codec_ctx) {
        fprintf(stderr, "Could not allocate audio codec context\n");
        exit(1);
    }
    codec_ctx->codec_id = codec_id;
    codec_ctx->codec_type = AVMEDIA_TYPE_AUDIO;
    codec_ctx->bit_rate = 128*1024;
    codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO;
    codec_ctx->sample_rate    = 48000; //48000;
    codec_ctx->channels       = av_get_channel_layout_nb_channels(codec_ctx->channel_layout);
    codec_ctx->profile = FF_PROFILE_AAC_LOW;    //

    if(strcmp(codec->name, "aac") == 0) {
        codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
    } else if(strcmp(codec->name, "libfdk_aac") == 0) {
        codec_ctx->sample_fmt = AV_SAMPLE_FMT_S16;
    } else {
        codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
    }
    /* 检测支持采样格式支持情况 */
    check_sample_fmt(codec, codec_ctx->sample_fmt);
    check_sample_rate(codec, codec_ctx->sample_rate);
    check_channel_layout(codec, codec_ctx->channel_layout);
    
    codec_ctx->flags = AV_CODEC_FLAG_GLOBAL_HEADER;  //ffmpeg默认的aac是不带adts，而fdk_aac默认带adts，这里我们强制不带
    /* 将编码器上下文和编码器进行关联 */
    avcodec_open2(codec_ctx, codec, NULL);
    
    // 打开输入和输出文件
    infile = fopen(in_pcm_file, "rb");
    outfile = fopen(out_aac_file, "wb");

    /* packet for holding encoded output */
    pkt = av_packet_alloc();

    /* frame containing input raw audio */
    frame = av_frame_alloc();
  
    // 设置frame参数
    frame->nb_samples     = codec_ctx->frame_size;
    frame->format         = codec_ctx->sample_fmt;
    frame->channel_layout = codec_ctx->channel_layout;
    frame->channels = av_get_channel_layout_nb_channels(frame->channel_layout);
   
    
    /* 为frame分配buffer */
    av_frame_get_buffer(frame, 0);
   
    // 计算出每一帧的数据 单个采样点的字节 * 通道数目 * 每帧采样点数量
    int frame_bytes = av_get_bytes_per_sample(frame->format) \
            * frame->channels \
            * frame->nb_samples;
    
    uint8_t *pcm_buf = (uint8_t *)malloc(frame_bytes);
    uint8_t *pcm_temp_buf = (uint8_t *)malloc(frame_bytes);
    int64_t pts = 0;
    
    printf("start enode\n");
    for (;;) {
        memset(pcm_buf, 0, frame_bytes);
        size_t read_bytes = fread(pcm_buf, 1, frame_bytes, infile);
        if(read_bytes <= 0) {
            printf("read file finish\n");
            break;
        }

        /* 确保该frame可写, 如果编码器内部保持了内存参考计数，则需要重新拷贝一个备份
            目的是新写入的数据和编码器保存的数据不能产生冲突
        */
        av_frame_make_writable(frame);
      
        if(AV_SAMPLE_FMT_S16 == frame->format) {
            // 将读取到的PCM数据填充到frame去
            ret = av_samples_fill_arrays(frame->data, frame->linesize,
                                   pcm_buf, frame->channels,
                                   frame->nb_samples, frame->format, 0);
        } else {
            // 将读取到的PCM数据填充到frame去
            // 将本地的f32le packed模式的数据转为float palanar
            memset(pcm_temp_buf, 0, frame_bytes);
            f32le_convert_to_fltp((float *)pcm_buf, (float *)pcm_temp_buf, frame->nb_samples);
            ret = av_samples_fill_arrays(frame->data, frame->linesize,
                                   pcm_temp_buf, frame->channels,
                                   frame->nb_samples, frame->format, 0);
        }
        // 设置pts
        pts += frame->nb_samples;
        frame->pts = pts;       // 使用采样率作为pts的单位，具体换算成秒 pts*1/采样率
        ret = encode(codec_ctx, frame, pkt, outfile);
        if(ret < 0) {
            printf("encode failed\n");
            break;
        }
    }

    /* 冲刷编码器 */
    encode(codec_ctx, NULL, pkt, outfile);

    // 关闭文件
    fclose(infile);
    fclose(outfile);

    // 释放内存
    if(pcm_buf) {
        free(pcm_buf);
    }
    if (pcm_temp_buf) {
        free(pcm_temp_buf);
    }
    av_frame_free(&frame);
    av_packet_free(&pkt);
    avcodec_free_context(&codec_ctx);
    printf("main finish, please enter Enter and exit\n");
    getchar();
    return 0;
}