FFmpeg --16-音频重采样分析

原创已于 2025-11-19 17:24:03 修改 · 784 阅读

11 ·

CC 4.0 BY-SA版权

文章标签：

#音视频 #ffmpeg

于 2024-03-12 11:22:33 首次发布

FFmpeg从放弃到入门专栏收录该内容

36 篇文章

订阅专栏

本文详细解释了音频的基本概念，包括比特率、采样频率、采样精度和通道数，以及音频重采样的重要性。主要介绍了FFmpeg中的SwrContext结构和其相关函数，如设置参数、初始化、转换音频数据和释放资源的过程。

文章目录

音频重采样

为什么重采样？
格式匹配：原始PCM是S16格式，但AAC编码器通常需要FLTP（浮点平面）格式

重采样改变的什么？

采样率：采样点和采样频率改变，播放时间不变，
采样格式： S16整型 → FLTP浮点
声道布局：交错存储 → 平面存储
声道数：单声道->双声道

FFmpeg重采样步骤：

// 初始化重采样上下文（包含采样率转换）
SwrContext *swr = swr_alloc();
av_opt_set_int(swr, "in_sample_rate", 44100, 0);
av_opt_set_int(swr, "out_sample_rate", 48000, 0);
av_opt_set_sample_fmt(swr, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);

// 执行重采样（同时改变采样率和格式）
swr_convert(swr, output, output_samples, input, input_samples);

重采样名词

采样点：
一帧中的采样点集合
1024个采样点是AAC的一个编码帧

采样频率
定义：每秒钟采样的点数
常见值：8kHz(电话)、44.1kHz(CD)、48kHz(视频)、96kHz(高清音频)

采样精度
定义：每个样本点的量化位数
常见值：8bit、16bit、24bit、32bit(float)
精度越高，动态范围越大，音质越好

比特率
定义：每秒传输的比特数，单位bps
计算公式：比特率 = 采样频率 × 采样精度 × 通道数
示例：44.1kHz × 16bit × 2声道 = 1411.2 kbps

帧 (Frame)：
定义：每次编码的采样单元
典型值：
MP3: 1152个采样点
AAC: 1024个采样点
OPUS: 可变，通常480个采样点

帧长 (Frame Duration)
计算公式：帧长 = 每帧采样点数 / 采样频率
示例：AAC@44.1kHz → 1024/44100 ≈ 23.2ms

音频参数

数据格式：

enum AVSampleFormat {
    AV_SAMPLE_FMT_U8,    // 8位无符号
    AV_SAMPLE_FMT_S16,   // 16位有符号  
    AV_SAMPLE_FMT_S32,   // 32位有符号
    AV_SAMPLE_FMT_FLT,   // 32位浮点
    AV_SAMPLE_FMT_DBL,   // 64位浮点
    // 对应的平面格式...
};

通道数 (Channels)
单声道(Mono)：1个声道
立体声(Stereo)：2个声道
环绕声：5.1、7.1等多声道

PCM S16格式 vs FLTP格式的区别?

特性	S16	FLTP
数据类型	16位整数	32位浮点数
精度	固定点，65536个级别	浮点，更高动态范围
动态范围	约96dB	超过120dB
存储方式	交错存储	平面存储
内存占用	较小	较大（2倍）
处理效率	整数运算快	浮点运算，适合DSP
量化误差	有量化噪声	几乎无量化误差

原始PCM读取（S16格式）：

// 读取交错存储的S16数据
fread(pcm_frame_buf, 1, pcm_frame_size, in_pcm_fd);
// 数据布局：[L0][R0][L1][R1][L2][R2]...

重采样到FLTP：

// 转换为平面浮点格式
AVFrame *fltp_frame = AllocFltpPcmFrame(pcm_channels, audio_encoder.GetFrameSize());
audio_resampler.ResampleFromS16ToFLTP(pcm_frame_buf, fltp_frame);
// 数据布局：
// 平面0：[L0][L1][L2]... (左声道)
// 平面1：[R0][R1][R2]... (右声道)

S16转FLTP的数值映射：

// S16: -32768 到 +32767
// FLTP: -1.0 到 +1.0

int16_t s16_sample = 16384;  // 约0.5幅度
float fltp_sample = s16_sample / 32768.0f;  // = 0.5f

code

输入和输出：

// 输入参数设置
int64_t src_ch_layout = AV_CH_LAYOUT_STEREO;  // 输入立体声
int src_rate = 48000;                         // 输入48kHz
enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_DBL; // 输入双精度浮点

// 输出参数设置  
int64_t dst_ch_layout = AV_CH_LAYOUT_STEREO;  // 输出立体声
int dst_rate = 44100;                         // 输出44.1kHz
enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_S16; // 输出16位有符号整数

重采样计算

// 关键计算公式
int64_t delay = swr_get_delay(swr_ctx, src_rate);
dst_nb_samples = av_rescale_rnd(delay + src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);

公式解析：
输出样本数 = (重采样器延迟 + 输入样本数) × (输出采样率 / 输入采样率)

具体计算：
输入：1024个样本，48kHz采样率
输出采样率：44.1kHz
比例因子：44100/48000 = 0.91875
理论输出样本数：1024 × 0.91875 ≈ 940.8

采样转化比：原采样率/后采样率
输出的采样点：

// 对于48kHz → 44.1kHz转换
double ratio = (double)dst_rate / src_rate;  // 44100.0 / 48000.0 = 0.91875

// 输入1024个样本时的输出
int expected_output = 1024 * ratio;  // 1024 × 0.91875 ≈ 940.8 → 941(四舍五入)

音频时间：

// 输入音频时间
double input_time = src_nb_samples / (double)src_rate;  // 1024/48000 ≈ 21.33ms

// 输出音频时间  
double output_time = ret / (double)dst_rate;  // 941/44100 ≈ 21.34ms

// 可以看到时间基本保持一致

采样格式转换

// 从双精度浮点(-1.0到1.0)转换为16位有符号整数(-32768到32767)
AV_SAMPLE_FMT_DBL → AV_SAMPLE_FMT_S16

// 量化过程：float_value × 32767.0 → int16_t

声道布局保持

// 保持立体声布局，数据排列方式变化
AV_CH_LAYOUT_STEREO = (AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT)

PTS(呈现时间戳)计算
在音视频处理中，PTS需要根据采样率重新计算：

// 输入PTS计算
input_pts = samples_processed * 1000000 / src_rate;  // 微秒

// 输出PTS计算  
output_pts = samples_output * 1000000 / dst_rate;    // 微秒

时间同步保证
虽然采样率改变，但音频内容的时间长度应该保持不变：

输入时间 = 1024样本 / 48000Hz = 21.333ms
输出时间 = 941样本 / 44100Hz = 21.338ms
微小的差异是由于采样率转换的精度造成的。

重采样核心步骤

// 创建重采样上下文
swr_ctx = swr_alloc();

// 设置参数
av_opt_set_int(swr_ctx, "in_channel_layout", src_ch_layout, 0);
av_opt_set_int(swr_ctx, "in_sample_rate", src_rate, 0);
// ... 其他参数

// 初始化
swr_init(swr_ctx);

// 循环转换
while (t < 10) {
    // 生成输入数据
    fill_samples((double *)src_data[0], src_nb_samples, src_nb_channels, src_rate, &t);
    
    // 计算输出样本数
    dst_nb_samples = av_rescale_rnd(delay + src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
    
    // 执行重采样
    ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples);
}

采样格式定义：

struct sample_fmt_entry {  
    enum AVSampleFormat sample_fmt;  // FFmpeg采样格式枚举  
    const char *fmt_be, *fmt_le;     // 大端和小端的格式字符串  
} sample_fmt_entries[] = {  
    { AV_SAMPLE_FMT_U8,  "u8",    "u8"    },    // 无符号8位，无字节序  
    { AV_SAMPLE_FMT_S16, "s16be", "s16le" },    // 有符号16位，分字节序  
    { AV_SAMPLE_FMT_S32, "s32be", "s32le" },    // 有符号32位，分字节序  
    { AV_SAMPLE_FMT_FLT, "f32be", "f32le" },    // 单精度浮点，分字节序  
    { AV_SAMPLE_FMT_DBL, "f64be", "f64le" },    // 双精度浮点，分字节序  
};

缓冲区管理：

// 动态调整输出缓冲区大小
if (dst_nb_samples > max_dst_nb_samples) {
    av_freep(&dst_data[0]);
    ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
                           dst_nb_samples, dst_sample_fmt, 1);
    max_dst_nb_samples = dst_nb_samples;
}

延迟处理：

// 获取重采样器内部延迟
int64_t delay = swr_get_delay(swr_ctx, src_rate);

完整代码

/*
 * Copyright (c) 2012 Stefano Sabatini
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/**
 * @example resampling_audio.c
 * libswresample API use example.
 */

#include <libavutil/opt.h>
#include <libavutil/channel_layout.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>

static int get_format_from_sample_fmt(const char **fmt,
                                      enum AVSampleFormat sample_fmt)
{
    int i;
    struct sample_fmt_entry {
        enum AVSampleFormat sample_fmt; const char *fmt_be, *fmt_le;
    } sample_fmt_entries[] = {
    { AV_SAMPLE_FMT_U8,  "u8",    "u8"    },
    { AV_SAMPLE_FMT_S16, "s16be", "s16le" },
    { AV_SAMPLE_FMT_S32, "s32be", "s32le" },
    { AV_SAMPLE_FMT_FLT, "f32be", "f32le" },
    { AV_SAMPLE_FMT_DBL, "f64be", "f64le" },
};
    *fmt = NULL;

    for (i = 0; i < FF_ARRAY_ELEMS(sample_fmt_entries); i++) {
        struct sample_fmt_entry *entry = &sample_fmt_entries[i];
        if (sample_fmt == entry->sample_fmt) {
            *fmt = AV_NE(entry->fmt_be, entry->fmt_le);
            return 0;
        }
    }

    fprintf(stderr,
            "Sample format %s not supported as output format\n",
            av_get_sample_fmt_name(sample_fmt));
    return AVERROR(EINVAL);
}

/**
 * Fill dst buffer with nb_samples, generated starting from t. 交错模式的
 */
static void fill_samples(double *dst, int nb_samples, int nb_channels, int sample_rate, double *t)
{
    int i, j;
    double tincr = 1.0 / sample_rate, *dstp = dst;
    const double c = 2 * M_PI * 440.0;

    /* generate sin tone with 440Hz frequency and duplicated channels */
    for (i = 0; i < nb_samples; i++) {
        *dstp = sin(c * *t);
        for (j = 1; j < nb_channels; j++)
            dstp[j] = dstp[0];
        dstp += nb_channels;
        *t += tincr;
    }
}

int main(int argc, char **argv)
{
    // 输入参数
    int64_t src_ch_layout = AV_CH_LAYOUT_STEREO;
    int src_rate = 48000;
    enum AVSampleFormat src_sample_fmt = AV_SAMPLE_FMT_DBL;
    int src_nb_channels = 0;
    uint8_t **src_data = NULL;  // 二级指针
    int src_linesize;
    int src_nb_samples = 1024;


    // 输出参数
    int64_t dst_ch_layout = AV_CH_LAYOUT_STEREO;
    int dst_rate = 44100;
    enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_S16;
    int dst_nb_channels = 0;
    uint8_t **dst_data = NULL;  //二级指针
    int dst_linesize;
    int dst_nb_samples;
    int max_dst_nb_samples;

    // 输出文件
    const char *dst_filename = NULL;    // 保存输出的pcm到本地，然后播放验证
    FILE *dst_file;


    int dst_bufsize;
    const char *fmt;

    // 重采样实例
    struct SwrContext *swr_ctx;

    double t;
    int ret;

    if (argc != 2) {
        fprintf(stderr, "Usage: %s output_file\n"
                        "API example program to show how to resample an audio stream with libswresample.\n"
                        "This program generates a series of audio frames, resamples them to a specified "
                        "output format and rate and saves them to an output file named output_file.\n",
                argv[0]);
        exit(1);
    }
    dst_filename = argv[1];

    dst_file = fopen(dst_filename, "wb");
    if (!dst_file) {
        fprintf(stderr, "Could not open destination file %s\n", dst_filename);
        exit(1);
    }

    // 创建重采样器
    /* create resampler context */
    swr_ctx = swr_alloc();
    if (!swr_ctx) {
        fprintf(stderr, "Could not allocate resampler context\n");
        ret = AVERROR(ENOMEM);
        goto end;
    }

    // 设置重采样参数
    /* set options */
    // 输入参数
    av_opt_set_int(swr_ctx, "in_channel_layout",    src_ch_layout, 0);
    av_opt_set_int(swr_ctx, "in_sample_rate",       src_rate, 0);
    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", src_sample_fmt, 0);
    // 输出参数
    av_opt_set_int(swr_ctx, "out_channel_layout",    dst_ch_layout, 0);
    av_opt_set_int(swr_ctx, "out_sample_rate",       dst_rate, 0);
    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", dst_sample_fmt, 0);

    // 初始化重采样
    /* initialize the resampling context */
    if ((ret = swr_init(swr_ctx)) < 0) {
        fprintf(stderr, "Failed to initialize the resampling context\n");
        goto end;
    }

    /* allocate source and destination samples buffers */
    // 计算出输入源的通道数量
    src_nb_channels = av_get_channel_layout_nb_channels(src_ch_layout);
    // 给输入源分配内存空间
    ret = av_samples_alloc_array_and_samples(&src_data, &src_linesize, src_nb_channels,
                                             src_nb_samples, src_sample_fmt, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate source samples\n");
        goto end;
    }

    /* compute the number of converted samples: buffering is avoided
     * ensuring that the output buffer will contain at least all the
     * converted input samples */
    // 计算输出采样数量
    max_dst_nb_samples = dst_nb_samples =
            av_rescale_rnd(src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);

    /* buffer is going to be directly written to a rawaudio file, no alignment */
    dst_nb_channels = av_get_channel_layout_nb_channels(dst_ch_layout);
    // 分配输出缓存内存
    ret = av_samples_alloc_array_and_samples(&dst_data, &dst_linesize, dst_nb_channels,
                                             dst_nb_samples, dst_sample_fmt, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not allocate destination samples\n");
        goto end;
    }

    t = 0;
    do {
        /* generate synthetic audio */
        // 生成输入源
        fill_samples((double *)src_data[0], src_nb_samples, src_nb_channels, src_rate, &t);

        /* compute destination number of samples */
        int64_t delay = swr_get_delay(swr_ctx, src_rate);
        dst_nb_samples = av_rescale_rnd(delay + src_nb_samples, dst_rate, src_rate, AV_ROUND_UP);
        if (dst_nb_samples > max_dst_nb_samples) {
            av_freep(&dst_data[0]);
            ret = av_samples_alloc(dst_data, &dst_linesize, dst_nb_channels,
                                   dst_nb_samples, dst_sample_fmt, 1);
            if (ret < 0)
                break;
            max_dst_nb_samples = dst_nb_samples;
        }
        //        int fifo_size = swr_get_out_samples(swr_ctx,src_nb_samples);
        //        printf("fifo_size:%d\n", fifo_size);
        //        if(fifo_size < 1024)
        //            continue;

        /* convert to destination format */
        //        ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples);
        ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, (const uint8_t **)src_data, src_nb_samples);
        if (ret < 0) {
            fprintf(stderr, "Error while converting\n");
            goto end;
        }
        dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
                                                 ret, dst_sample_fmt, 1);
        if (dst_bufsize < 0) {
            fprintf(stderr, "Could not get sample buffer size\n");
            goto end;
        }
        printf("t:%f in:%d out:%d\n", t, src_nb_samples, ret);
        fwrite(dst_data[0], 1, dst_bufsize, dst_file);
    } while (t < 10);

    ret = swr_convert(swr_ctx, dst_data, dst_nb_samples, NULL, 0);
    if (ret < 0) {
        fprintf(stderr, "Error while converting\n");
        goto end;
    }
    dst_bufsize = av_samples_get_buffer_size(&dst_linesize, dst_nb_channels,
                                             ret, dst_sample_fmt, 1);
    if (dst_bufsize < 0) {
        fprintf(stderr, "Could not get sample buffer size\n");
        goto end;
    }
    printf("flush in:%d out:%d\n", 0, ret);
    fwrite(dst_data[0], 1, dst_bufsize, dst_file);


    if ((ret = get_format_from_sample_fmt(&fmt, dst_sample_fmt)) < 0)
        goto end;
    fprintf(stderr, "Resampling succeeded. Play the output file with the command:\n"
                    "ffplay -f %s -channel_layout %"PRId64" -channels %d -ar %d %s\n",
            fmt, dst_ch_layout, dst_nb_channels, dst_rate, dst_filename);

end:
    fclose(dst_file);

    if (src_data)
        av_freep(&src_data[0]);
    av_freep(&src_data);

    if (dst_data)
        av_freep(&dst_data[0]);
    av_freep(&dst_data);

    swr_free(&swr_ctx);
    return ret < 0;
}