深入浅出FFMPEG

最新推荐文章于 2024-06-12 16:01:28 发布

cuijinquan

最新推荐文章于 2024-06-12 16:01:28 发布

阅读量604

点赞数

分类专栏： FFmpeg

FFmpeg 专栏收录该内容

63 篇文章

订阅专栏

====== 深入浅出FFMPEG ======
===== 数字媒体处理的基本流程 =====
===== 认识FFMPEG =====
FFMPEG堪称自由软件中最完备的一套多媒体支持库，它几乎实现了所有当下常见的数据封装格式、多媒体传输协议以及音视频编解码器。因此，对于从事多媒体技术开发的工程师来说，深入研究FFMPEG成为一门必不可少的工作，可以这样说，FFMPEG之于多媒体开发工程师的重要性正如kernel之于嵌入式系统工程师一般。

几个小知识：
  * FFMPEG项目是由法国人Fabrice Bellard发起的，此人也是著名的CPU模拟器项目QEMU的发起者，同时还是[[http://bellard.org/pi/pi2700e9/announce.html|圆周率算法纪录]]的保持者。
  * FF是Fast Forward的意思，翻译成中文是“快进”。
  * [[http://bits.ohloh.net/attachments/2230/ffmpeg-logo-square_med.png|FFMPEG的LOGO]]是一个"Z字扫描"示意图，Z字扫描用于将图像的二维频域数据一维化，同时保证了一维化的数据具备良好的统计特性，从而提高其后要进行的一维熵编码的效率。

===== 一个简单的测试程序 =====
<code c>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#include "libavutil/avstring.h"
#include "libavformat/avformat.h"
#include "libavdevice/avdevice.h"
#include "libavcodec/opt.h"
#include "libswscale/swscale.h"

#define DECODED_AUDIO_BUFFER_SIZE            192000

struct options
{
    int streamId;
    int frames;
    int nodec;
    int64_t lstart;
    char finput[256];
    char foutput1[256];
    char foutput2[256];
};

int parse_options(struct options *opts, int argc, char** argv)
{
    int optidx;
    char *optstr;

    if (argc < 2) return -1;

    opts->streamId = -1;
    opts->lstart = -1;
    opts->frames = -1;
    opts->foutput1[0] = 0;
    opts->foutput2[0] = 0;
    opts->nodec = 0;
    strcpy(opts->finput, argv[1]);

    optidx = 2;
    while (optidx < argc)
    {
        optstr = argv[optidx++];
        if (*optstr++ != '-') return -1;
        switch (*optstr++)
        {
        case 's':  //< stream id
            opts->streamId = atoi(optstr);
            break;
        case 'f':  //< frames
            opts->frames = atoi(optstr);
            break;
        case 'k':  //< skipped
            opts->lstart = atoll(optstr);
            break;
        case 'o':  //< output
            strcpy(opts->foutput1, optstr);
            strcat(opts->foutput1, ".mpg");
            strcpy(opts->foutput2, optstr);
            strcat(opts->foutput2, ".raw");
            break;
        case 'n': //decoding and output options
            if (strcmp("dec", optstr) == 0)
                opts->nodec = 1;
            break;
        default:
            return -1;
        }
    }

    return 0;
}

void show_help(char* program)
{
    printf("Simple FFMPEG test program\n");
    printf("Usage: %s inputfile [-sstreamid [-fframes] [-kskipped] [-ooutput_filename(without extension)]]\n", program);
    return;
}

static void log_callback(void* ptr, int level, const char* fmt, va_list vl)
{
    vfprintf(stdout, fmt, vl);
}

#include <sys/ioctl.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/soundcard.h>

#define OSS_DEVICE "/dev/dsp0"

struct audio_dsp
{
    int audio_fd;
    int channels;
    int format;
    int speed;
};
int map_formats(enum SampleFormat format)
{
    switch(format)
    {
        case SAMPLE_FMT_U8:
            return AFMT_U8;
        case SAMPLE_FMT_S16:
            return AFMT_S16_LE;
        default:
            return AFMT_U8;
    }
}
int set_audio(struct audio_dsp* dsp)
{
    if (dsp->audio_fd == -1)
    {
        printf("Invalid audio dsp id!\n");
        return -1;
    }

    if (-1 == ioctl(dsp->audio_fd, SNDCTL_DSP_SETFMT, &dsp->format))
    {
        printf("Failed to set dsp format!\n");
        return -1;
    }

    if (-1 == ioctl(dsp->audio_fd, SNDCTL_DSP_CHANNELS, &dsp->channels))
    {
        printf("Failed to set dsp format!\n");
        return -1;
    }

    if (-1 == ioctl(dsp->audio_fd, SNDCTL_DSP_SPEED, &dsp->speed))
    {
        printf("Failed to set dsp format!\n");
        return -1;
    }
    return 0;
}

int play_pcm(struct audio_dsp* dsp, unsigned char *buf, int size)
{
    if (dsp->audio_fd == -1)
    {
        printf("Invalid audio dsp id!\n");
        return -1;
    }

    if (-1 == write(dsp->audio_fd, buf, size))
    {
        printf("Failed to write audio dsp!\n");
        return -1;
    }

    return 0;
}

#include <linux/fb.h>
#include <sys/mman.h>

#define FB_DEVICE "/dev/fb0"

enum pic_format
{
    eYUV_420_Planer,
};
struct video_fb
{
    int video_fd;
    struct fb_var_screeninfo vinfo;
    struct fb_fix_screeninfo finfo;
    unsigned char *fbp;
    AVFrame *frameRGB;
    struct
    {
        int x;
        int y;
    } video_pos;
};

int open_video(struct video_fb *fb, int x, int y)
{
    int screensize;
    fb->video_fd = open(FB_DEVICE, O_WRONLY);
    if (fb->video_fd == -1) return -1;

    if (ioctl(fb->video_fd, FBIOGET_FSCREENINFO, &fb->finfo)) return -2;
    if (ioctl(fb->video_fd, FBIOGET_VSCREENINFO, &fb->finfo)) return -2;

    printf("video device: resolution %dx%d, �pp\n", fb->vinfo.xres, fb->vinfo.yres, fb->vinfo.bits_per_pixel);
    screensize = fb->vinfo.xres * fb->vinfo.yres * fb->vinfo.bits_per_pixel / 8;
    fb->fbp = (unsigned char *) mmap(0, screensize, PROT_READ|PROT_WRITE, MAP_SHARED, fb->video_fd, 0);
    if (fb->fbp == -1) return -3;

    if (x >= fb->vinfo.xres || y >= fb->vinfo.yres)
    {
        return -4;
    }
    else
    {
        fb->video_pos.x = x;
        fb->video_pos.y = y;
    }

    fb->frameRGB = avcodec_alloc_frame();
    if (!fb->frameRGB) return -5;

    return 0;
}

int show_picture(struct video_fb *fb, AVFrame *frame, int width, int height, enum pic_format format)
{
    struct SwsContext *sws;
    int i;
    unsigned char *dest;
    unsigned char *src;

    if (fb->video_fd == -1) return -1;
    if ((fb->video_pos.x >= fb->vinfo.xres) || (fb->video_pos.y >= fb->vinfo.yres)) return -2;

    if (fb->video_pos.x + width > fb->vinfo.xres)
    {
        width = fb->vinfo.xres - fb->video_pos.x;
    }
    if (fb->video_pos.y + height > fb->vinfo.yres)
    {
        height = fb->vinfo.yres - fb->video_pos.y;
    }

    if (format == PIX_FMT_YUV420P)
    {
        sws = sws_getContext(width, height, format, width, height, PIX_FMT_RGB32, SWS_FAST_BILINEAR, NULL, NULL, NULL);
        if (sws == 0)
        {
            return -3;
        }
        if (sws_scale(sws, frame->data, frame->linesize, 0, height, fb->frameRGB->data, fb->frameRGB->linesize))
        {
            return -3;
        }

        dest = fb->fbp + (fb->video_pos.x+fb->vinfo.xoffset) * (fb->vinfo.bits_per_pixel/8) +(fb->video_pos.y+fb->vinfo.yoffset) * fb->finfo.line_length;
        for (i = 0; i < height; i++)
        {
            memcpy(dest, src, width*4);
            src += fb->frameRGB->linesize[0];
            dest += fb->finfo.line_length;
        }
    }
    return 0;
}

void close_video(struct video_fb *fb)
{
    if (fb->video_fd != -1)
    {
        munmap(fb->fbp, fb->vinfo.xres * fb->vinfo.yres * fb->vinfo.bits_per_pixel / 8);
        close(fb->video_fd);
        fb->video_fd = -1;
    }
}

int main(int argc, char **argv)
{
    AVFormatContext* pCtx = 0;
    AVCodecContext *pCodecCtx = 0;
    AVCodec *pCodec = 0;
    AVPacket packet;
    AVFrame *pFrame = 0;
    FILE *fpo1 = NULL;
    FILE *fpo2 = NULL;
    int nframe;
    int err;
    int got_picture;
    int picwidth, picheight, linesize;
    unsigned char *pBuf;
    int16_t *pSndBuf = NULL;
    int i;
    int64_t timestamp;
    struct options opt;
    int usefo = 0;
    struct audio_dsp dsp;

    av_register_all();

    av_log_set_callback(log_callback);
    av_log_set_level(50);

    if (parse_options(&opt, argc, argv) < 0 || (strlen(opt.finput) == 0))
    {
        show_help(argv[0]);
        return 0;
    }


    err = av_open_input_file(&pCtx, opt.finput, 0, 0, 0);
    if (err < 0)
    {
        printf("\n->(av_open_input_file)\tERROR:\t%d\n", err);
        goto fail;
    }
    err = av_find_stream_info(pCtx);
    if (err < 0)
    {
        printf("\n->(av_find_stream_info)\tERROR:\t%d\n", err);
        goto fail;
    }
    if (opt.streamId < 0)
    {
        dump_format(pCtx, 0, pCtx->filename, 0);
        goto fail;
    }
    else
    {
    }

    if (strlen(opt.foutput1) && strlen(opt.foutput2))
    {
        fpo1 = fopen(opt.foutput1, "wb");
        fpo2 = fopen(opt.foutput2, "wb");
        if (!fpo1 || !fpo2)
        {
            printf("\n->error opening output files\n");
            goto fail;
        }
        usefo = 1;
    }
    else
    {
        usefo = 0;
    }

    if (opt.streamId >= pCtx->nb_streams)
    {
        printf("\n->StreamId\tERROR\n");
        goto fail;
    }

    if (opt.lstart > 0)
    {
        err = av_seek_frame(pCtx, opt.streamId, opt.lstart, AVSEEK_FLAG_ANY);
        if (err < 0)
        {
            printf("\n->(av_seek_frame)\tERROR:\t%d\n", err);
            goto fail;
        }
    }


    pCodecCtx = pCtx->streams[opt.streamId]->codec;
    pCodec = avcodec_find_decoder(pCodecCtx->codec_id);
    if (!pCodec)
    {
        printf("\n->can not find codec!\n");
        goto fail;
    }
    err = avcodec_open(pCodecCtx, pCodec);
    if (err < 0)
    {
        printf("\n->(avcodec_open)\tERROR:\t%d\n", err);
        goto fail;
    }
    pSndBuf = (int16_t *) malloc(DECODED_AUDIO_BUFFER_SIZE);
    if (!pSndBuf)
    {
        printf("\n->no memory\n");
        goto fail;
    }
    pFrame = avcodec_alloc_frame();


    if (!opt.nodec)
    {
        dsp.audio_fd = open(OSS_DEVICE, O_WRONLY);
        if (dsp.audio_fd == -1)
        {
            printf("\n-> can not open audio device\n");
            goto fail;
        }
        dsp.channels = pCodecCtx->channels;
        dsp.speed = pCodecCtx->sample_rate;
        dsp.format = map_formats(pCodecCtx->sample_fmt);
        if (set_audio(&dsp) < 0)
        {
            printf("\n-> can not set audio device\n");
            goto fail;
        }
    }

    nframe = 0;
    while(nframe < opt.frames || opt.frames == -1)
    {
        err = av_read_frame(pCtx, &packet);
        if (err < 0)
        {
            printf("\n->(av_read_frame)\tERROR:\t%d\n", err);
            break;
        }
        timestamp = av_rescale_q(packet.dts, pCtx->streams[packet.stream_index]->time_base, (AVRational){1, AV_TIME_BASE});
        printf("\nFrame No ] stream#%d\tsize mB, timestamp:%6lld, dts:%6lld, pts:%6lld, ", nframe++, packet.stream_index, packet.size,
               timestamp, packet.dts, packet.pts);

        if (packet.stream_index == opt.streamId)
        {
#if 0
            for (i = 0; i < packet.size; i++)
            {
                if (i == 0) printf("\n pktdata: ");
                printf("%2x  ", packet.data[i]);
            }
            printf("\n");
#endif
            if (usefo)
            {
                fwrite(packet.data, packet.size, 1, fpo1);
                fflush(fpo1);
            }

            if (pCtx->streams[opt.streamId]->codec->codec_type == CODEC_TYPE_VIDEO && !opt.nodec)
            {
                picheight = pCtx->streams[opt.streamId]->codec->height;
                picwidth = pCtx->streams[opt.streamId]->codec->width;

                avcodec_decode_video2(pCodecCtx, pFrame, &got_picture, &packet);
                if (got_picture)
                {
                    printf("[Video: type %d]", pFrame->pict_type);
                    if (pCtx->streams[opt.streamId]->codec->pix_fmt == PIX_FMT_YUV420P && usefo)
                    {
                        linesize = pFrame->linesize[0];
                        pBuf = pFrame->data[0];
                        for (i = 0; i < picheight; i++)
                        {
                            fwrite(pBuf, picwidth, 1, fpo2);
                            pBuf += linesize;
                        }

                        linesize = pFrame->linesize[1];
                        pBuf = pFrame->data[1];
                        for (i = 0; i < picheight/2; i++)
                        {
                            fwrite(pBuf, picwidth/2, 1, fpo2);
                            pBuf += linesize;
                        }

                        linesize = pFrame->linesize[2];
                        pBuf = pFrame->data[2];
                        for (i = 0; i < picheight/2; i++)
                        {
                            fwrite(pBuf, picwidth/2, 1, fpo2);
                            pBuf += linesize;
                        }
                        fflush(fpo2);
                    }
                    else
                    {

                    }
                }

                av_free_packet(&packet);
            }
            else if (pCtx->streams[opt.streamId]->codec->codec_type == CODEC_TYPE_AUDIO && !opt.nodec)
            {
                int size = DECODED_AUDIO_BUFFER_SIZE;

                avcodec_decode_audio3(pCodecCtx, pSndBuf, &size, &packet);
                printf("[Audio: ] Samples]", size);
                if (usefo)
                {
                    fwrite(pSndBuf, size/2, 1, fpo2);
                    fflush(fpo2);
                }
                else
                {
                    play_pcm(&dsp, pSndBuf, size);
                }
            }
        }
    }

fail:
    if (pSndBuf)
    {
        free(pSndBuf);
    }
    if (pCtx)
    {
        av_close_input_file(pCtx);
    }
    if (fpo1)
    {
        fclose(fpo1);
    }
    if (fpo2)
    {
        fclose(fpo2);
    }
    if (!pFrame)
    {
        av_free(pFrame);
    }
    if (!usefo && (dsp.audio_fd != -1))
    {
        close(dsp.audio_fd);
    }
    return 0;
}
</code>
这一小段代码可以实现的功能包括：
  * 打开一个多媒体文件并获取基本的媒体信息。
  * 获取编码器句柄。
  * 根据给定的时间标签进行一个跳转。
  * 读取数据帧。
  * 解码音频帧或者视频帧。
  * 关闭多媒体文件。
这些功能足以支持一个功能强大的多媒体播放器，因为最复杂的解复用、解码、数据分析过程已经在FFMpeg内部实现了，需要关注的仅剩同步问题。

===== 用户接口 =====
==== 数据结构====
=== 基本概念 ===
编解码器、数据帧、媒体流和容器是数字媒体处理系统的四个基本概念，它们的关系如下图所示：

首先需要统一术语：
  * 容器／文件（Conainer/File）：即特定格式的多媒体文件。
  * 媒体流（Stream）：指时间轴上的一段连续数据，如一段声音数据，一段视频数据或一段字幕数据，可以是压缩的，也可以是非压缩的，压缩的数据需要关联特定的编解码器。
  * 数据帧／数据包（Frame/Packet）：通常，一个媒体流由大量的数据帧组成，对于压缩数据，帧对应着编解码器的最小处理单元。通常，分属于不同媒体流的数据帧交错复用于容器之中，参见[[tech:multimedia:interleave|交错]]。
  * 编解码器：编解码器以帧为单位实现压缩数据和原始数据之间的相互转换。

在FFMPEG中，使用AVFormatContext、AVStream、AVPacket、AVCodecContext及AVCodec来抽象这几个基本要素：
=== AVPacket ===
AVPacket定义在avcodec.h中，如下：
<code c>
typedef struct AVPacket {

    int64_t pts;

    int64_t dts;
    uint8_t *data;
    int   size;
    int   stream_index;
    int   flags;

    int   duration;
    void  (*destruct)(struct AVPacket *);
    void  *priv;
    int64_t pos;                            ///< byte position in stream, -1 if unknown


    int64_t convergence_duration;
} AVPacket;
</code>
FFMPEG使用AVPacket来暂存解复用之后、解码之前的媒体数据（一个音/视频帧、一个字幕包等）及附加信息（解码时间戳、显示时间戳、时长等）。其中：
  * dts表示解码时间戳，pts表示显示时间戳，它们的单位是所属媒体流的时间基准。
  * stream_index给出所属媒体流的编号；
  * data为数据缓冲区指针，size为长度；
  * duration为数据的时长，也是以所属媒体流的时间基准为单位；
  * pos表示该数据在媒体流中的字节偏移量；
  * destruct为用于释放数据缓冲区的函数指针；
  * flags为标志域，其中，最低为置1表示该数据是一个关键帧。
AVPacket结构本身只是个容器，它使用data成员指向实际的数据缓冲区，这个缓冲区可以通过av_new_packet创建，可以通过av_dup_packet拷贝，也可以由FFMPEG的API产生（如av_read_frame），使用之后需要通过调用av_free_packet释放。av_free_packet调用的是结构体本身的destruct函数，它的值有两种情况：1)av_destruct_packet_nofree或0；2)av_destruct_packet，其中，前者仅仅是将data和size的值清0而已，后者才会真正地释放缓冲区。FFMPEG内部使用AVPacket结构建立缓冲区装载数据，同时提供destruct函数，如果FFMPEG打算自己维护缓冲区，则将destruct设为av_destruct_packet_nofree，用户调用av_free_packet清理缓冲区时并不能够将其释放；如果FFMPEG不会再使用该缓冲区，则将destruct设为av_destruct_packet，表示它能够被释放。对于缓冲区不能够被释放的AVPackt，用户在使用之前最好调用av_dup_packet进行缓冲区的克隆，将其转化为缓冲区能够被释放的AVPacket，以免对缓冲区的不当占用造成异常错误。而av_dup_packet会为destruct指针为av_destruct_packet_nofree的AVPacket新建一个缓冲区，然后将原缓冲区的数据拷贝至新缓冲区，置data的值为新缓冲区的地址，同时设destruct指针为av_destruct_packet。

=== 时间信息 ===
时间信息用于实现多媒体同步。

同步的目的在于展示多媒体信息时，能够保持媒体对象之间固有的时间关系。同步有两类，一类是流内同步，其主要任务是保证单个媒体流内的时间关系，以满足感知要求，如按照规定的帧率播放一段视频；另一类是流间同步，主要任务是保证不同媒体流之间的时间关系，如音频和视频之间的关系（lipsync）。

对于固定速率的媒体，如固定帧率的视频或固定比特率的音频，可以将时间信息（帧率或比特率）置于文件首部（header），如[[tech:multimedia:digital_media#容器|AVI的hdrl List、MP4的moov box]]，还有一种相对复杂的方案是将时间信息嵌入媒体流的内部，如MPEG TS和Real video，这种方案可以处理变速率的媒体，亦可有效避免同步过程中的时间漂移。

FFMPEG会为每一个数据包打上时间标签，以更有效地支持上层应用的同步机制。时间标签有两种，一种是DTS，称为解码时间标签，另一种是PTS，称为显示时间标签。对于声音来说，这两个时间标签是相同的，但对于某些视频编码格式，由于采用了双向预测技术，会造成DTS和PTS的不一致。

无双向预测帧的情况：
  图像类型: I   P   P   P   P   P   P ...  I   P   P
  DTS:     0   1   2   3   4   5   6...  100 101 102
  PTS:     0   1   2   3   4   5   6...  100 101 102
有双向预测帧的情况：
  图像类型: I   P   B   B   P   B   B ...  I   P   B
  DTS:     0   1   2   3   4   5   6 ...  100 101 102
  PTS:     0   3   1   2   6   4   5 ...  100 104 102
对于存在双向预测帧的情况，通常要求解码器对图像重排序，以保证输出的图像顺序为显示顺序：
  解码器输入：I   P   B   B   P   B   B
   (DTS)     0   1   2   3   4   5   6
   (PTS)     0   3   1   2   6   4   5
  解码器输出：X   I   B   B   P   B   B   P
   (PTS)     X   0   1   2   3   4   5   6

**时间信息的获取：**

通过调用av_find_stream_info，多媒体应用可以从AVFormatContext对象中拿到媒体文件的时间信息：主要是总时间长度和开始时间，此外还有与时间信息相关的比特率和文件大小。其中时间信息的单位是AV_TIME_BASE：微秒。
<code c>
typedef struct AVFormatContext {

    ......


    int64_t start_time;

    int64_t duration;

    int64_t file_size;

    int bit_rate;

    ......

} AVFormatContext;
</code>
以上4个成员变量都是只读的，基于FFMpeg的中间件需要将其封装到某个接口中，如：
<code c>
LONG GetDuratioin(IntfX*);
LONG GetStartTime(IntfX*);
LONG GetFileSize(IntfX*);
LONG GetBitRate(IntfX*);
</code>

==== APIs ====
=== av_open_input_file ===
<code c>
int av_open_input_file(AVFormatContext **ic_ptr, const char *filename,
                       AVInputFormat *fmt,
                       int buf_size,
                       AVFormatParameters *ap);
</code>
av_open_input_file完成两个任务：
  - 打开一个文件或URL，基于字节流的底层输入模块得到初始化。
  - 解析多媒体文件或多媒体流的头信息，创建AVFormatContext结构并填充其中的关键字段，依次为各个原始流建立AVStream结构。
一个多媒体文件或多媒体流与其包含的原始流的关系如下：
  多媒体文件/多媒体流 (movie.mkv)
    原始流 1  (h.264 video)
    原始流 2  (aac audio for Chinese)
    原始流 3  (aac audio for english)
    原始流 4  (Chinese Subtitle)
    原始流 5  (English Subtitle)
    ...
关于输入参数：
  * ic_ptr，这是一个指向指针的指针，用于返回av_open_input_file内部构造的一个AVFormatContext结构体。
  * fmt，用于显式指定输入文件的格式，如果设为空则自动判断其输入格式。
  * buf_size，设置基于字节流的底层输入模块的缓冲区大小，如果为0则使用缺省值32768。
  * ap，用于显示指定输入文件的一些媒体参数，可以设为0（不指定）。
这个函数通过解析多媒体文件或流的头信息及其他辅助数据，能够获取足够多的关于文件、流和编解码器的信息，但由于任何一种多媒体格式提供的信息都是有限的，而且不同的多媒体内容制作软件对头信息的设置不尽相同，此外这些软件在产生多媒体内容时难免会引入一些错误，因此这个函数并不保证能够获取所有需要的信息，在这种情况下，则需要考虑另一个函数：av_find_stream_info。
=== av_find_stream_info ===
<code c>
int av_find_stream_info(AVFormatContext *ic);
</code>
这个函数主要用于获取必要的编解码器参数，设置到ic->streams[i]->codec中。

首先必须得到各媒体流对应编解码器的类型和id，这是两个定义在avutils.h和avcodec.h中的枚举：
<code>
enum AVMediaType {
    AVMEDIA_TYPE_UNKNOWN = -1,
    AVMEDIA_TYPE_VIDEO,
    AVMEDIA_TYPE_AUDIO,
    AVMEDIA_TYPE_DATA,
    AVMEDIA_TYPE_SUBTITLE,
    AVMEDIA_TYPE_ATTACHMENT,
    AVMEDIA_TYPE_NB
};
enum CodecID {
    CODEC_ID_NONE,


    CODEC_ID_MPEG1VIDEO,
    CODEC_ID_MPEG2VIDEO, ///< preferred ID for MPEG-1/2 video decoding
    CODEC_ID_MPEG2VIDEO_XVMC,
    CODEC_ID_H261,
    CODEC_ID_H263,
    ...
};
</code>
通常，如果某种媒体格式具备完备而正确的头信息，调用av_open_input_file即可以得到这两个参数，但若是因某种原因av_open_input_file无法获取它们，这一任务将由av_find_stream_info完成。

其次还要获取各媒体流对应编解码器的时间基准。

此外，对于音频编解码器，还需要得到：
  - 采样率，
  - 声道数，
  - 位宽，
  - 帧长度（对于某些编解码器是必要的），
对于视频编解码器，则是：
  - 图像大小，
  - 色彩空间及格式，

=== av_seek_frame ===
<code c>
int av_seek_frame(AVFormatContext *s, int stream_index, int64_t timestamp, int flags);
</code>
这个函数用于实现对媒体文件的随机访问，支持以下三种方式：
  * 基于时间的随机访问：具体而言就是将媒体文件读写指针定位到某个给定的时间点上，则之后调用av_read_frame时能够读到时间标签等于给定时间点的媒体数据，通常用于实现媒体播放器的快进、快退等功能。
  * 基于文件偏移的随机访问：相当于普通文件的seek函数，timestamp也成为文件的偏移量。
  * 基于帧号的随机访问：timestamp为要访问的媒体数据的帧号。

关于参数：
  * s：是个AVFormatContext指针，就是av_open_input_file返回的那个结构。
  * stream_index：指定媒体流，如果是基于时间的随机访问，则第三个参数timestamp将以此媒体流的时间基准为单位；如果设为负数，则相当于不指定具体的媒体流，FFMPEG会按照特定的算法寻找缺省的媒体流，此时，timestamp的单位为AV_TIME_BASE（微秒）。
  * timestamp：时间标签，单位取决于其他参数。
  * flags：定位方式。AVSEEK_FLAG_BYTE表示基于字节偏移，AVSEEK_FLAG_FRAME表示基于帧号，其它表示基于时间。

=== av_read_frame ===
<code c>
int av_read_frame(AVFormatContext *s, AVPacket *pkt);
</code>