SRS 对AAC音频文件的保存主要是调用SrsAacTransmuxer::write_audio(),函数定义如下:
int SrsAacTransmuxer::write_audio(int64_t timestamp, char* data, int size)
{
int ret = ERROR_SUCCESS;
srs_assert(data);
timestamp &= 0x7fffffff;
SrsBuffer* stream = tag_stream;
if ((ret = stream->initialize(data, size)) != ERROR_SUCCESS) {
return ret;
}
// audio decode
if (!stream->require(1)) {
ret = ERROR_AAC_DECODE_ERROR;
srs_error("aac decode audio sound_format failed. ret=%d", ret);
return ret;
}
//从音频Tag中读取AAC Sequence header
//SoundFormat 4bits | SoundRate 2bits | SoundSize 1bit | SoundType 1bits| AACPacketType 8bits|
// @see: E.4.2 Audio Tags, video_file_format_spec_v10_1.pdf, page 76
int8_t sound_format = stream->read_1bytes();//读取SoundFormat
//int8_t sound_type = sound_format & 0x01;
//int8_t sound_size = (sound_format >> 1) & 0x01;
//int8_t sound_rate = (sound_format >> 2) & 0x03;
sound_format = (sound_format >> 4) & 0x0f;
if ((SrsAudioCodecId)sound_format != SrsAudioCodecIdAAC) {//根据SoundFormat判断是否为AAC
ret = ERROR_AAC_DECODE_ERROR;
srs_error("aac required, format=%d. ret=%d", sound_format, ret);
return ret;
}
if (!stream->require(1)) {
ret = ERROR_AAC_DECODE_ERROR;
srs_error("aac decode aac_packet_type failed. ret=%d", ret);
return ret;
}
SrsAudioAacFrameTrait aac_packet_type = (SrsAudioAacFrameTrait)stream->read_1bytes();//读取包类型
if (aac_packet_type == SrsAudioAacFrameTraitSequenceHeader) { //根据包类型判断后面的数据是否为AudioSpecificConfig
//接着从音频TAG中提取2字节的AUDIO SPECIFIC CONFIG,后面会将其转换成7字节的ADTS HEADER
//AudioSpecificConfig(2byte)结构定义:
//|audioObjectType:5bit|samplingFrequencyIndex:4bit|channelConfiguration:4bit|frameLengthFlag:1bit |dependsOnCoreCoder:1bit|extensionFlag:1bit|
// AudioSpecificConfig
// 1.6.2.1 AudioSpecificConfig, in ISO_IEC_14496-3-AAC-2001.pdf, page 33.
//
// only need to decode the first 2bytes:
// audioObjectType, 5bits.
// samplingFrequencyIndex, aac_sample_rate, 4bits.
// channelConfiguration, aac_channels, 4bits
if (!stream->require(2)) {
ret = ERROR_AAC_DECODE_ERROR;
srs_error("aac decode sequence header failed. ret=%d", ret);
return ret;
}
int8_t audioObjectType = stream->read_1bytes();
aac_sample_rate = stream->read_1bytes();
aac_channels = (aac_sample_rate >> 3) & 0x0f;
aac_sample_rate = ((audioObjectType << 1) & 0x0e) | ((aac_sample_rate >> 7) & 0x01);
audioObjectType = (audioObjectType >> 3) & 0x1f;
aac_object = (SrsAacObjectType)audioObjectType;
got_sequence_header = true;
return ret;
}
if (!got_sequence_header) {
ret = ERROR_AAC_DECODE_ERROR;
srs_error("aac no sequence header. ret=%d", ret);
return ret;
}
// the left is the aac raw frame data.
int16_t aac_raw_length = stream->size() - stream->pos();
// write the ADTS header.
// @see ISO_IEC_14496-3-AAC-2001.pdf, page 75,
// 1.A.2.2 Audio_Data_Transport_Stream frame, ADTS
// @see https://github.com/ossrs/srs/issues/212#issuecomment-64145885
// byte_alignment()
// adts_fixed_header:
// 12bits syncword,
// 16bits left.
// adts_variable_header:
// 28bits
// 12+16+28=56bits
// adts_error_check:
// 16bits if protection_absent
// 56+16=72bits
// if protection_absent:
// require(7bytes)=56bits
// else
// require(9bytes)=72bits