221

ijkplayer框架简析 — FFmpeg中重要结构体

 5 years ago
source link: http://yydcdut.com/2019/02/02/ijkplayer-ffmpeg-relation/?amp%3Butm_medium=referral
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

A complete, cross-platform solution to record, convert and stream audio and video.

FFmpeg 是一个多媒体框架,能够处理解编码、转码、mux、demux、流、滤镜和播放。它支持很多格式,且具有高度的移植性。

类库

libavformat - 用于解析和生成音视频格式

libavcodec - 编解码音视频

libavutil - 工具库

libswscale - 提供比例缩放、色彩映射转换、图像颜色空间或格式转换的功能

libswresample - 音频重采样,采样格式转换和混合等

libavfilter - 滤波器,如宽高比裁剪、格式化、非格式化、伸缩

libpostproc - 后期效果处理,如图像的去块效应等

libavdevice - 硬件采集、加速、显示

结构体之间关系

  • 协议(http, rtsp, rtmp, mms)

    AVIOContext,URLProtocol,URLContext 主要存储视音频使用的协议的类型以及状态。URLProtocol 存储输入视音频使用的封装格式;每种协议都对应一个URLProtocol结构 (注意:FFMPEG中文件也被当做一种协议 “file” )

  • 封装(flv, avi, rmvb, mp4)

    AVFormatContext 主要存储视音频封装格式中包含的信息;AVInputFormat 存储输入视音频使用的封装格式。每种视音频封装格式都对应一个 AVInputFormat 结构

  • 编解码(h264, mpeg2, aac, mp3)

    每个 AVStream 存储一个视频/音频流的相关数据;每个 AVStream 对应一个 AVCodecContext,存储该视频 / 音频流使用解码方式的相关数据;每个 AVCodecContext 中对应一个 AVCodec,包含该视频/音频对应的解码器。每种解码器都对应一个 AVCodec 结构

  • 数据

    视频的话,每个结构一般是存一帧 (音频可能有好几帧)

    解码前数据:AVPacket;解码后数据:AVFrame

ijkplayer_ffmpeg_%20relation.png

结构体

网络协议

AVIOContext

管理输入输出数据的结构体, avformat_open_input() 中进行初始化

typedef struct AVIOContext{
    const AVClass *av_class;
    unsigned char *buffer;  /**< Start of the buffer. */
    int buffer_size;        /**< Maximum buffer size */
    unsigned char *buf_ptr; /**< Current position in the buffer */
    unsigned char *buf_end; /**< End of the data */
    int (*read_packet)(void *opaque, uint8_t *buf, int buf_size);
    int (*write_packet)(void *opaque, uint8_t *buf, int buf_size);
    int64_t (*seek)(void *opaque, int64_t offset, int whence);
    int64_t pos;            /**< position in the file of the current buffer */
    int must_flush;         /**< true if the next seek should flush */
    int eof_reached;        /**< true if eof reached */
    int write_flag;         /**< true if open for writing */
    int max_packet_size;
    unsigned long checksum;
    unsigned char *checksum_ptr;
    unsigned long (*update_checksum)(unsigned long checksum, const uint8_t *buf, unsigned int size);
    int error;              /**< contains the error code or 0 if no error happened */
    int (*read_pause)(void *opaque, int pause);
    int64_t (*read_seek)(void *opaque, int stream_index,
                         int64_t timestamp, int flags);
    int seekable;
    int64_t maxsize;
    int direct;
    int64_t bytes_read;
    int seek_count;
    int writeout_count;
    int orig_buffer_size;
    int short_seek_threshold;
}
  • unsigned char *buffer : 缓存开始位置
  • int buffer_size : 缓存大小(默认32768)
  • unsigned char *buf_ptr : 当前指针读取到的位置
  • unsigned char *buf_end : 缓存结束的位置
  • void *opaque : URLContext 结构体

在解码的情况下,buffer 用于存储 ffmpeg 读入的数据。例如打开一个视频文件的时候,先把数据从硬盘读入buffer,然后在送给解码器用于解码。

URLProtocol

述了音视频数据传输所使用的协议,每种传输协议 (例如 HTTP、RTMP) 等,都会对应一个 URLProtocol 结构

typedef struct URLProtocol {
	const char *name;
	int (*url_open)(URLContext *h, const char *url, int flags);
	int (*url_read)(URLContext *h, unsigned char *buf, int size);
	int (*url_write)(URLContext *h, const unsigned char *buf, int size);
	int64_t (*url_seek)(URLContext *h, int64_t pos, int whence);
	int (*url_close)(URLContext *h);
	struct URLProtocol *next;
	int (*url_read_pause)(URLContext *h, int pause);
	int64_t (*url_read_seek)(URLContext *h, int stream_index,
		int64_t timestamp, int flags);
	int (*url_get_file_handle)(URLContext *h);
	int priv_data_size;
	const AVClass *priv_data_class;
	int flags;
	int (*url_check)(URLContext *h, int mask);
}
  • const char *name : 存储协议的名称,每一种输入协议都对应这样一个结构体

    URLProtocol ff_rtmp_protocol = {
        .name                = "rtmp",
        .url_open            = rtmp_open,
        .url_read            = rtmp_read,
        .url_write           = rtmp_write,
        .url_close           = rtmp_close,
        .url_read_pause      = rtmp_read_pause,
        .url_read_seek       = rtmp_read_seek,
        .url_get_file_handle = rtmp_get_file_handle,
        .priv_data_size      = sizeof(RTMP),
        .flags               = URL_PROTOCOL_FLAG_NETWORK,
    };
    

    等号右边的函数是完成具体读写功能的函数。可以看一下 file 协议的几个函数(file.c)

    static int file_read(URLContext *h, unsigned char *buf, int size)
    {
        int fd = (intptr_t) h->priv_data;
        int r = read(fd, buf, size);
        return (-1 == r)?AVERROR(errno):r;
    }
     
    static int file_write(URLContext *h, const unsigned char *buf, int size)
    {
        int fd = (intptr_t) h->priv_data;
        int r = write(fd, buf, size);
        return (-1 == r)?AVERROR(errno):r;
    }
     
    static int file_get_handle(URLContext *h)
    {
        return (intptr_t) h->priv_data;
    }
     
    static int file_check(URLContext *h, int mask)
    {
        struct stat st;
        int ret = stat(h->filename, &st);
        if (ret < 0)
            return AVERROR(errno);
     
        ret |= st.st_mode&S_IRUSR ? mask&AVIO_FLAG_READ  : 0;
        ret |= st.st_mode&S_IWUSR ? mask&AVIO_FLAG_WRITE : 0;
     
        return ret;
    }
     
    #if CONFIG_FILE_PROTOCOL
     
    static int file_open(URLContext *h, const char *filename, int flags)
    {
        int access;
        int fd;
     
        av_strstart(filename, "file:", &filename);
     
        if (flags & AVIO_FLAG_WRITE && flags & AVIO_FLAG_READ) {
            access = O_CREAT | O_TRUNC | O_RDWR;
        } else if (flags & AVIO_FLAG_WRITE) {
            access = O_CREAT | O_TRUNC | O_WRONLY;
        } else {
            access = O_RDONLY;
        }
    #ifdef O_BINARY
        access |= O_BINARY;
    #endif
        fd = open(filename, access, 0666);
        if (fd == -1)
            return AVERROR(errno);
        h->priv_data = (void *) (intptr_t) fd;
        return 0;
    }
     
    /* XXX: use llseek */
    static int64_t file_seek(URLContext *h, int64_t pos, int whence)
    {
        int fd = (intptr_t) h->priv_data;
        if (whence == AVSEEK_SIZE) {
            struct stat st;
            int ret = fstat(fd, &st);
            return ret < 0 ? AVERROR(errno) : st.st_size;
        }
        return lseek(fd, pos, whence);
    }
     
    static int file_close(URLContext *h)
    {
        int fd = (intptr_t) h->priv_data;
        return close(fd);
    }
    

URLContext

封装了协议对象及协议操作对象

封装格式

AVFormatContext

描述了媒体文件的构成及基本信息,是统领全局的基本结构体,贯穿程序始终,很多函数都要用它作为参数

typedef struct AVFormatContext {
	const AVClass *av_class;
	struct AVInputFormat *iformat;
	struct AVOutputFormat *oformat;
	void *priv_data;
	AVIOContext *pb;
	int ctx_flags;
	unsigned int nb_streams;
	AVStream **streams;
	char filename[1024];
	int64_t start_time;
	int64_t duration;
	int bit_rate;
    unsigned int packet_size;
    int max_delay;
    int flags;
    const uint8_t *key;
    int keylen;

    unsigned int nb_programs;
    AVProgram **programs;

    enum AVCodecID video_codec_id;
    enum AVCodecID audio_codec_id;
    enum AVCodecID subtitle_codec_id;

    unsigned int max_index_size;
    unsigned int max_picture_buffer;
    unsigned int nb_chapters;
    AVChapter **chapters;
    AVDictionary *metadata;
    int64_t start_time_realtime;
    int fps_probe_size;
    int error_recognition;
    AVIOInterruptCB interrupt_callback;
    int64_t max_interleave_delta;
    int strict_std_compliance;
    int event_flags;
    int max_ts_probe;
    int avoid_negative_ts;
    int ts_id;
    int audio_preload;
    int max_chunk_duration;
    int max_chunk_size;
    int use_wallclock_as_timestamps;
    int avio_flags;
    enum AVDurationEstimationMethod duration_estimation_method;
    int64_t skip_initial_bytes;
    unsigned int correct_ts_overflow;
    int seek2any;
    int probe_score;
    int format_probesize;
    char *codec_whitelist;
    char *format_whitelist;
    AVFormatInternal *internal;
    int io_repositioned;
    AVCodec *video_codec;
    AVCodec *audio_codec;
    AVCodec *subtitle_codec;
    AVCodec *data_codec;
    int metadata_header_padding;
    void *opaque;
    av_format_control_message control_message_cb;
    int64_t output_ts_offset;
    uint8_t *dump_separator;
    enum AVCodecID data_codec_id;
    int (*open_cb)(struct AVFormatContext *s, AVIOContext **p, const char *url, int flags, const AVIOInterruptCB *int_cb, AVDictionary **options);	
}
  • struct AVInputFormat *iformat : 输入数据的封装格式,由 avformat_open_input 设置,仅仅在 Demuxing 使用。
  • struct AVOutputFormat *oformat : 输出数据的封装格式,必须由使用者在 avformat_write_header 前设置,由 Muxing 使用
  • priv_data : 在 muxing 中,由 avformat_write_header 设置;在 demuxing 中,由 avformat_open_input 设置
  • AVIOContext *pb : 输入数据的缓存。如 果 iformat/oformat.flags 设置为 AVFMT_NOFILE 的话,该字段不需要设置。对于 Demuxing ,需要在 avformat_open_input 前设置,或由 avformat_open_input 设置;对于 Muxing,在 avformat_write_header 前设置
  • ctx_flags : 码流的信息,表明码流属性的的信号。由 libavformat 设置,例如 AVFMTCTX_NOHEADER
  • nb_streams : 指 AVFormatContext.streams 的数量,必须由 avformat_new_stream 设置
  • AVStream **streams : 文件中所有码流的列表,新的码流创建使用 avformat_new_stream 函数。Demuxing 中,码流由 avformat_open_input 创建。 如果 AVFMTCTX_NOHEADER 被设置,新的码流可以出现在 av_read_frame 中。Muxing 中,码流在 avformat_write_header 之前由用户创建,它的释放是由 avformat_free_context 完成的
  • filename : 输入或输出的文件名,Demuxing 中由 avformat_open_input 设置,Muxing 中在使用 avformat_write_header 前由调用者设置
  • int64_t duration : 码流的时长
  • bit_rate : 比特率
  • AVDictionary *metadata : 元数据,适用于整个文件。通过 av_dict_get() 函数获得视频的原数据

AVInputFormat

解复用器对象,每种作为输入的封装格式 (例如 FLVMP4TS 等) 对应一个该结构体

AVOutputFormat

复用器对象,每种作为输出的封装格式(例如 FLV , MP4TS 等)对应一个该结构体

AVStream

用于描述一个视频 / 音频流的相关数据信息

编解码

AVCodecContext

描述编解码器上下文的数据结构,包含了众多编解码器需要的参数信息

typedef struct AVCodecContext{
	const AVClass *av_class;
	int log_level_offset;
	enum AVMediaType codec_type;
	const struct AVCodec *codec;
	enum AVCodecID     codec_id;
	unsigned int codec_tag;
	void *priv_data;
	struct AVCodecInternal *internal;
	void *opaque;
	int bit_rate;
	int bit_rate_tolerance;
	int global_quality;
	int compression_level;
	int flags;
	int flags2;
	uint8_t *extradata;
	int extradata_size;
	AVRational time_base;
	int ticks_per_frame;
	int delay;
	int width, height;
	int coded_width, coded_height;
	int gop_size;
	enum AVPixelFormat pix_fmt;
	void (*draw_horiz_band)(struct AVCodecContext *s,
                            const AVFrame *src, int offset[AV_NUM_DATA_POINTERS],
                            int y, int type, int height);
	enum AVPixelFormat (*get_format)(struct AVCodecContext *s, const enum AVPixelFormat * fmt);
	int max_b_frames;
	float b_quant_factor;
	int b_frame_strategy;
	float b_quant_offset;
	int has_b_frames;
	int mpeg_quant; 		/*decoding: unused*/
	float i_quant_factor; 	/*decoding: unused*/
	float i_quant_offset; 	/*decoding: unused*/
	float lumi_masking;		/*decoding: unused*/
	float temporal_cplx_masking; /*decoding: unused*/
	float spatial_cplx_masking;  /*decoding: unused*/
	float p_masking;		/*decoding: unused*/
	float dark_masking;		/*decoding: unused*/
	int slice_count;
	int prediction_method;	/*decoding: unused*/
	int *slice_offset;
	AVRational sample_aspect_ratio;
	int me_cmp;				/*decoding: unused*/
	int me_sub_cmp;			/*decoding: unused*/
	int mb_cmp;				/*decoding: unused*/
    ...
}AVCodecContext;
  • AVMediaType codec_type : 编解码器的类型,如音频、视频、字幕
  • AVCdec *codec : 采用的解码器 AVCodec
  • int bit_rate : 平均比特率
  • uint8_t *extradata; int extradata_size : 针对特定编码器包含的附加信息(例如对于H.264解码器来说,存储SPS,PPS等)
  • AVRational time_base : 根据该参数,可以把PTS转化为实际的时间(单位为秒s)
  • int width, height : 视频的宽高
  • int refs : 运动估计参考帧的个数
  • int sample_rate : 采样率
  • int channels : 声道数
  • enum AVSampleFormat sample_fmt : 采样格式
  • int profile : 型(H.264里面就有,其他编码标准应该也有)
  • int level : 级(和profile差不太多)

AVCodecContext 使用 avcodec_alloc_context3 分配,该函数除了分配 AVCodecContext 外,还会初始化默认的字段。分配的内存必须通过 avcodec_free_context 释放。 AVCodecContext 中很多的参数是编码的时候使用的,而不是解码的时候使用的

avcodec_register_all();
...
codec = avcodec_find_decoder(AV_CODEC_ID_H264);
if(!codec)
    exit(1);

context = avcodec_alloc_context3(codec);

if(avcodec_open2(context, codec, opts) < 0)
    exit(1);

codec_type(编解码器类型)

enum AVMediaType {
    AVMEDIA_TYPE_UNKNOWN = -1,  ///< Usually treated as AVMEDIA_TYPE_DATA
    AVMEDIA_TYPE_VIDEO,
    AVMEDIA_TYPE_AUDIO,
    AVMEDIA_TYPE_DATA,          ///< Opaque data information usually continuous
    AVMEDIA_TYPE_SUBTITLE,
    AVMEDIA_TYPE_ATTACHMENT,    ///< Opaque data information usually sparse
    AVMEDIA_TYPE_NB
};

sample_fmt(音频采样格式)

enum AVSampleFormat {
    AV_SAMPLE_FMT_NONE = -1,
    AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
    AV_SAMPLE_FMT_S16,         ///< signed 16 bits
    AV_SAMPLE_FMT_S32,         ///< signed 32 bits
    AV_SAMPLE_FMT_FLT,         ///< float
    AV_SAMPLE_FMT_DBL,         ///< double
 
    AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar
    AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar
    AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar
    AV_SAMPLE_FMT_FLTP,        ///< float, planar
    AV_SAMPLE_FMT_DBLP,        ///< double, planar
 
    AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically
};

profile

#define FF_PROFILE_UNKNOWN -99
#define FF_PROFILE_RESERVED -100
 
#define FF_PROFILE_AAC_MAIN 0
#define FF_PROFILE_AAC_LOW  1
#define FF_PROFILE_AAC_SSR  2
#define FF_PROFILE_AAC_LTP  3
#define FF_PROFILE_AAC_HE   4
#define FF_PROFILE_AAC_HE_V2 28
#define FF_PROFILE_AAC_LD   22
#define FF_PROFILE_AAC_ELD  38
 
#define FF_PROFILE_DTS         20
#define FF_PROFILE_DTS_ES      30
#define FF_PROFILE_DTS_96_24   40
#define FF_PROFILE_DTS_HD_HRA  50
#define FF_PROFILE_DTS_HD_MA   60
 
#define FF_PROFILE_MPEG2_422    0
#define FF_PROFILE_MPEG2_HIGH   1
#define FF_PROFILE_MPEG2_SS     2
#define FF_PROFILE_MPEG2_SNR_SCALABLE  3
#define FF_PROFILE_MPEG2_MAIN   4
#define FF_PROFILE_MPEG2_SIMPLE 5
 
#define FF_PROFILE_H264_CONSTRAINED  (1<<9)  // 8+1; constraint_set1_flag
#define FF_PROFILE_H264_INTRA        (1<<11) // 8+3; constraint_set3_flag
 
#define FF_PROFILE_H264_BASELINE             66
#define FF_PROFILE_H264_CONSTRAINED_BASELINE (66|FF_PROFILE_H264_CONSTRAINED)
#define FF_PROFILE_H264_MAIN                 77
#define FF_PROFILE_H264_EXTENDED             88
#define FF_PROFILE_H264_HIGH                 100
#define FF_PROFILE_H264_HIGH_10              110
#define FF_PROFILE_H264_HIGH_10_INTRA        (110|FF_PROFILE_H264_INTRA)
#define FF_PROFILE_H264_HIGH_422             122
#define FF_PROFILE_H264_HIGH_422_INTRA       (122|FF_PROFILE_H264_INTRA)
#define FF_PROFILE_H264_HIGH_444             144
#define FF_PROFILE_H264_HIGH_444_PREDICTIVE  244
#define FF_PROFILE_H264_HIGH_444_INTRA       (244|FF_PROFILE_H264_INTRA)
#define FF_PROFILE_H264_CAVLC_444            44
 
#define FF_PROFILE_VC1_SIMPLE   0
#define FF_PROFILE_VC1_MAIN     1
#define FF_PROFILE_VC1_COMPLEX  2
#define FF_PROFILE_VC1_ADVANCED 3
 
#define FF_PROFILE_MPEG4_SIMPLE                     0
#define FF_PROFILE_MPEG4_SIMPLE_SCALABLE            1
#define FF_PROFILE_MPEG4_CORE                       2
#define FF_PROFILE_MPEG4_MAIN                       3
#define FF_PROFILE_MPEG4_N_BIT                      4
#define FF_PROFILE_MPEG4_SCALABLE_TEXTURE           5
#define FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION      6
#define FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE     7
#define FF_PROFILE_MPEG4_HYBRID                     8
#define FF_PROFILE_MPEG4_ADVANCED_REAL_TIME         9
#define FF_PROFILE_MPEG4_CORE_SCALABLE             10
#define FF_PROFILE_MPEG4_ADVANCED_CODING           11
#define FF_PROFILE_MPEG4_ADVANCED_CORE             12
#define FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE 13
#define FF_PROFILE_MPEG4_SIMPLE_STUDIO             14
#define FF_PROFILE_MPEG4_ADVANCED_SIMPLE           15

AVCodec

编解码器对象,每种编解码格式 (例如 H.264、AAC 等)对应一个该结构体;每个 AVCodecContext 中含有一个 AVCodec

typedef struct AVCodec{
	const char *name;
    const char *long_name;
    enum AVMediaType type;
    enum AVCodecID id;
    int capabilities;
    const AVRational *supported_framerates; ///< array of supported framerates, or NULL if any, array is terminated by {0,0}
    const enum AVPixelFormat *pix_fmts;     ///< array of supported pixel formats, or NULL if unknown, array is terminated by -1
    const int *supported_samplerates;       ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0
    const enum AVSampleFormat *sample_fmts; ///< array of supported sample formats, or NULL if unknown, array is terminated by -1
    const uint64_t *channel_layouts;         ///< array of support channel layouts, or NULL if unknown. array is terminated by 0
    uint8_t max_lowres;                     ///< maximum value for lowres supported by the decoder, no direct access, use av_codec_get_max_lowres()
    const AVClass *priv_class;              ///< AVClass for the private context
    const AVProfile *profiles;              ///< array of recognized profiles, or NULL if unknown, array is terminated by {FF_PROFILE_UNKNOWN}
    int priv_data_size;
    struct AVCodec *next;
    int (*init_thread_copy)(AVCodecContext *);
    int (*update_thread_context)(AVCodecContext *dst, const AVCodecContext *src);
    const AVCodecDefault *defaults;
    void (*init_static_data)(struct AVCodec *codec);

    int (*init)(AVCodecContext *);
    int (*encode_sub)(AVCodecContext *, uint8_t *buf, int buf_size,
    int (*encode2)(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame,
                   int *got_packet_ptr);
    int (*decode)(AVCodecContext *, void *outdata, int *outdata_size, AVPacket *avpkt);
    int (*close)(AVCodecContext *);
    void (*flush)(AVCodecContext *);
    int caps_internal;
}
  • name : 具体的 CODEC 的名称的简短描述,比如 “HEVC”/“H264” 等
  • long_name : CODEC 名称的详细描述,比如 “HEVC (High Efficiency Video Coding)”
  • type : 媒体类型的字段,它是 enum 型的,表示视频、音频、字幕等,比如AVMEDIA_TYPE_VIDEO、AVMEIDA_TYPE_AUDIO
  • id : 唯一标识的 CODEC 类型,比如 AV_CODEC_ID_HEVC
  • supported_framerates : 支持的视频帧率的数组,以{0,0}作为结束
  • pix_fmts : 编解码器支持的图像格式的数组,以 -1 作为结束
  • profiles : 编解码器支持的 Profile,以 HEVC 为例,包含 “Main” / “Main10” / “Main Still Picture”
  • supported_samplerates : 支持的音频采样率
  • sample_fmts : 支持的音频采样格式
  • channel_layouts : 支持的音频声道数
  • priv_data_size : 私有数据的大小

enum AVMediaType type

enum AVMediaType {
    AVMEDIA_TYPE_UNKNOWN = -1,  ///< Usually treated as AVMEDIA_TYPE_DATA
    AVMEDIA_TYPE_VIDEO,
    AVMEDIA_TYPE_AUDIO,
    AVMEDIA_TYPE_DATA,          ///< Opaque data information usually continuous
    AVMEDIA_TYPE_SUBTITLE,
    AVMEDIA_TYPE_ATTACHMENT,    ///< Opaque data information usually sparse
    AVMEDIA_TYPE_NB
}

enum AVCodecID id

enum AVCodecID {
    AV_CODEC_ID_NONE,
 
    /* video codecs */
    AV_CODEC_ID_MPEG1VIDEO,
    AV_CODEC_ID_MPEG2VIDEO, ///< preferred ID for MPEG-1/2 video decoding
    AV_CODEC_ID_MPEG2VIDEO_XVMC,
    AV_CODEC_ID_H261,
    AV_CODEC_ID_H263,
    AV_CODEC_ID_RV10,
    AV_CODEC_ID_RV20,
    AV_CODEC_ID_MJPEG,
    AV_CODEC_ID_MJPEGB,
    AV_CODEC_ID_LJPEG,
    AV_CODEC_ID_SP5X,
    AV_CODEC_ID_JPEGLS,
    AV_CODEC_ID_MPEG4,
    AV_CODEC_ID_RAWVIDEO,
    AV_CODEC_ID_MSMPEG4V1,
    AV_CODEC_ID_MSMPEG4V2,
    AV_CODEC_ID_MSMPEG4V3,
    AV_CODEC_ID_WMV1,
    AV_CODEC_ID_WMV2,
    AV_CODEC_ID_H263P,
    AV_CODEC_ID_H263I,
    AV_CODEC_ID_FLV1,
    AV_CODEC_ID_SVQ1,
    AV_CODEC_ID_SVQ3,
    AV_CODEC_ID_DVVIDEO,
    AV_CODEC_ID_HUFFYUV,
    AV_CODEC_ID_CYUV,
    AV_CODEC_ID_H264,
    ...
}

const enum AVPixelFormat *pix_fmts

enum AVPixelFormat {
    AV_PIX_FMT_NONE = -1,
    AV_PIX_FMT_YUV420P,   ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
    AV_PIX_FMT_YUYV422,   ///< packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr
    AV_PIX_FMT_RGB24,     ///< packed RGB 8:8:8, 24bpp, RGBRGB...
    AV_PIX_FMT_BGR24,     ///< packed RGB 8:8:8, 24bpp, BGRBGR...
    AV_PIX_FMT_YUV422P,   ///< planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
    AV_PIX_FMT_YUV444P,   ///< planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
    AV_PIX_FMT_YUV410P,   ///< planar YUV 4:1:0,  9bpp, (1 Cr & Cb sample per 4x4 Y samples)
    AV_PIX_FMT_YUV411P,   ///< planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
    AV_PIX_FMT_GRAY8,     ///<        Y        ,  8bpp
    AV_PIX_FMT_MONOWHITE, ///<        Y        ,  1bpp, 0 is white, 1 is black, in each byte pixels are ordered from the msb to the lsb
    AV_PIX_FMT_MONOBLACK, ///<        Y        ,  1bpp, 0 is black, 1 is white, in each byte pixels are ordered from the msb to the lsb
    AV_PIX_FMT_PAL8,      ///< 8 bit with PIX_FMT_RGB32 palette
    AV_PIX_FMT_YUVJ420P,  ///< planar YUV 4:2:0, 12bpp, full scale (JPEG), deprecated in favor of PIX_FMT_YUV420P and setting color_range
    AV_PIX_FMT_YUVJ422P,  ///< planar YUV 4:2:2, 16bpp, full scale (JPEG), deprecated in favor of PIX_FMT_YUV422P and setting color_range
    AV_PIX_FMT_YUVJ444P,  ///< planar YUV 4:4:4, 24bpp, full scale (JPEG), deprecated in favor of PIX_FMT_YUV444P and setting color_range
    AV_PIX_FMT_XVMC_MPEG2_MC,///< XVideo Motion Acceleration via common packet passing
    AV_PIX_FMT_XVMC_MPEG2_IDCT,
    ...
}

const enum AVSampleFormat *sample_fmts

enum AVSampleFormat {
    AV_SAMPLE_FMT_NONE = -1,
    AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
    AV_SAMPLE_FMT_S16,         ///< signed 16 bits
    AV_SAMPLE_FMT_S32,         ///< signed 32 bits
    AV_SAMPLE_FMT_FLT,         ///< float
    AV_SAMPLE_FMT_DBL,         ///< double
 
    AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar
    AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar
    AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar
    AV_SAMPLE_FMT_FLTP,        ///< float, planar
    AV_SAMPLE_FMT_DBLP,        ///< double, planar
 
    AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically
};

每一个编解码器对应一个 AVCodec 该结构体

AVCodec ff_h264_decoder = {
    .name           = "h264",
    .type           = AVMEDIA_TYPE_VIDEO,
    .id             = CODEC_ID_H264,
    .priv_data_size = sizeof(H264Context),
    .init           = ff_h264_decode_init,
    .close          = ff_h264_decode_end,
    .decode         = decode_frame,
    .capabilities   = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
                      CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
    .flush= flush_dpb,
    .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
    .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
    .profiles = NULL_IF_CONFIG_SMALL(profiles),
    .priv_class     = &h264_class,
};

AVCodecParameters

编解码参数,每个 AVStream 中都含有一个 AVCodecParameters,用来存放当前流的编解码参数

数据

AVPacket

存放编码后、解码前的压缩数据,即 ES 数据

typedef struct AVPacket{
	AVBufferRef *buf;
	int64_t      pts;
	int64_t      dts;
	uint8_t    *data;
	int         size;
	int stream_index;
	int        flags;
	AVPacketSideData *side_data;
	int side_data_elems;
	int   duration;
	int64_t pos;
	int64_t convergence_duration;
}
  • pts: 显示时间戳,它的单位是 AVStream->time_base;如果在文件中没有保存这个值,它被设置为 AV_NOPTS_VALUE。由于图像显示不可能早于图像解压,因此 PTS 必须比 DTS(解码时间戳)大或者相等。某些文件格式中可能会使用 PTS/DTS 表示其他含义,此时时间戳必须转为真正的时间戳才能保存到 AVPacket 结构中
  • dts : 解码时间戳,它的单位是 AVStream->time_base,表示压缩视频解码的时间,如果文件中没有保存该值,它被设置为 AV_NOPTS_VALUE
  • data : 指向真正的压缩编码的数据
  • size : 表示该 AVPacket 结构中 data 字段所指向的压缩数据的大小
  • stream_index : 标识该 AVPacket 结构所属的视频流或音频流
  • duration : 该 AVPacket 包以 AVStream->time_base 为单位,所持续的时间,0 表示未知,或者为显示时间戳的差值(next_pts - this pts)
  • pos : 表示该 AVPacket 数据在媒体中的位置,即字节偏移量

AVFrame

存放编码前、解码后的原始数据,如 YUV 格式的视频数据或 PCM 格式的音频数据等

AVFrame 结构体必须使用 av_frame_alloc() 分配,注意该函数只是分配了 AVFrame 结构本身,它的 data 区域要用其他方式管理;该结构体的释放要用 av_frame_free()

AVFrame 结构体通常只需分配一次,之后即可通过保存不同的数据来重复多次使用,比如一个 AVFrame 结构可以保存从解码器中解码出的多帧数据。此时,就可以使用 av_frame_unref() 释放任何由 Frame 保存的参考帧并还原回最原始的状态。

typedef struct AVFrame{
	uint8_t *data[AV_NUM_DATA_POINTERS];
	int linesize[AV_NUM_DATA_POINTERS];
	uint8_t **extended_data;
	int width, height;
	int nb_samples; /* number of audio samples(per channel) described by this frame */
	int format;
	int key_frame; /* 1->keyframe, 0->not*/
	enum AVPictureType pict_type;
	AVRational sample_aspect_ratio;
	int64_t pts;
	int64_t pkt_pts;
	int64_t pkt_dts;
	int coded_picture_number;
	int display_picture_number;
	int quality;
	void *opaque; /* for some private data of the user */
	uint64_t error[AV_NUM_DATA_POINTERS];
	int repeat_pict;
	int interlaced_frame;
	int top_field_first;	/* If the content is interlaced, is top field displayed first */
	int palette_has_changed;
    int64_t reordered_opaque;
    int sample_rate;    /*Sample rate of the audio data*/
    uint64_t channel_layout; /*channel layout of the audio data*/
    AVBufferRef *buf[AV_NUM_DATA_POINTERS];
    AVBufferRef **extended_buf;
    int nb_exteneded_buf;
    AVFrameSideData **side_data;
    int nb_side_data;

    int flags;
    enum AVColorRange color_range;
    enum AVColorPrimaries color_primaries;
    enum AVColorTransferCharacteristic color_trc;
    enum AVColorSpace colorspace;
    enum AVChromaLocation chroma_location;

    int64_t best_effort_timestamp;
    int64_t pkt_pos;
    int64_t pkt_duration;
    AVDictionary *metadata;
    int decode _error_flags;

    int channels;
    int pkt_size;
    AVBufferRef *qp_table_buf;
}
avcodec_align_dimensions2()

pict_type

enum AVPictureType {
    AV_PICTURE_TYPE_NONE = 0, ///< Undefined
    AV_PICTURE_TYPE_I,     ///< Intra
    AV_PICTURE_TYPE_P,     ///< Predicted
    AV_PICTURE_TYPE_B,     ///< Bi-dir predicted
    AV_PICTURE_TYPE_S,     ///< S(GMC)-VOP MPEG4
    AV_PICTURE_TYPE_SI,    ///< Switching Intra
    AV_PICTURE_TYPE_SP,    ///< Switching Predicted
    AV_PICTURE_TYPE_BI,    ///< BI type
};

About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK