目录
1、前言
使用ffmpeg,将MP4文件中的语音数据提取出来,然后播放。基本思路是:
(1)使用avformat_open_input打开mp4文件,获取AVFormatContext
(2)使用avformat_find_stream_info获取AVFormatContext中的流信息
(3)遍历nb_streams,确定语音流id
(4)使用avcodec_find_decoder创建解码器
(5)使用avcodec_alloc_context3分配解码器上下文
(6)avcodec_parameters_to_context配置上下文
(7)打开解码器
(8)使用av_read_frame读取语音帧
(9)使用avcodec_send_packet将语音帧送给解码器
(10)使用avcodec_receive_frame获取解码后的数据
(11)对语音进行重采样
(12)保存pcm到缓存
(13)将pcm送给AudioTrack进行播放
2、新建一个音视频分离功能的.h和.cpp
AudioVideoSeparator.h:
//
// Created by 13658 on 2024/9/13.
//
#ifndef FFMPEGTESTPRJ_AUDIOVIDEOSEPARATOR_H
#define FFMPEGTESTPRJ_AUDIOVIDEOSEPARATOR_H
#include <string>
#include <list>
using namespace std;
namespace xpg {
class AudioVideoSeparator {
public:
AudioVideoSeparator();
~AudioVideoSeparator();
string getTestString();
int process(const char *argv);
int getPcmData(char* data, int dataLen);
private:
bool start_;
std::list<std::pair<char*, int>> pcmData_;
int curDataPushOffset_ = 0;
int curDataPopOffset_ = 0;
int frameLen_ = 0;
string srcFilePath_{"/storage/emulated/0/Pictures/Screenshots/Record_2024-09-05-22-35-25.mp4"};
string destFilePath_{"/storage/emulated/0/Pictures/Screenshots/Record_2024-09-11.mp3"};
};
} // xpg
#endif //FFMPEGTESTPRJ_AUDIOVIDEOSEPARATOR_H
AudioVideoSeparator.cpp:
//
// Created by 13658 on 2024/9/13.
//
#include "AudioVideoSeparator.h"
#include <stdio.h>
extern "C"{//必须要添加该声明
#include "libavcodec/ac3_parser.h"
#include "libavcodec/adts_parser.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/avdct.h"
#include "libavcodec/avfft.h"
#include "libavcodec/bsf.h"
#include "libavcodec/codec_desc.h"
#include "libavcodec/codec_id.h"
#include "libavcodec/codec_par.h"
#include "libavcodec/codec.h"
//#include "libavcodec/d3d11va.h"
#include "libavcodec/defs.h"
#include "libavcodec/dirac.h"
#include "libavcodec/dv_profile.h"
//#include "libavcodec/dxva2.h"
#include "libavcodec/jni.h"
#include "libavcodec/mediacodec.h"
#include "libavcodec/packet.h"
//#include "libavcodec/qsv.h"
//#include "libavcodec/vdpau.h"
#include "libavcodec/version_major.h"
#include "libavcodec/version.h"
//#include "libavcodec/videotoolbox.h"
#include "libavcodec/vorbis_parser.h"
#include "libavdevice/avdevice.h"
#include "libavdevice/version_major.h"
#include "libavdevice/version.h"
#include "libavfilter/avfilter.h"
#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#include "libavfilter/version_major.h"
#include "libavfilter/version.h"
#include "libavformat/avformat.h"
#include "libavformat/avio.h"
#include "libavformat/version.h"
#include "libavformat/version_major.h"
#include "libavutil/adler32.h"
#include "libavutil/aes_ctr.h"
#include "libavutil/aes.h"
#include "libavutil/ambient_viewing_environment.h"
#include "libavutil/attributes.h"
#include "libavutil/audio_fifo.h"
#include "libavutil/avassert.h"
#include "libavutil/avconfig.h"
#include "libavutil/avstring.h"
#include "libavutil/avutil.h"
#include "libavutil/base64.h"
#include "libavutil/blowfish.h"
#include "libavutil/bprint.h"
#include "libavutil/bswap.h"
#include "libavutil/buffer.h"
#include "libavutil/camellia.h"
#include "libavutil/cast5.h"
#include "libavutil/channel_layout.h"
#include "libavutil/common.h"
#include "libavutil/cpu.h"
#include "libavutil/crc.h"
#include "libavutil/csp.h"
#include "libavutil/des.h"
#include "libavutil/detection_bbox.h"
#include "libavutil/dict.h"
#include "libavutil/display.h"
#include "libavutil/dovi_meta.h"
#include "libavutil/downmix_info.h"
#include "libavutil/encryption_info.h"
#include "libavutil/error.h"
#include "libavutil/eval.h"
#include "libavutil/executor.h"
#include "libavutil/ffversion.h"
#include "libavutil/fifo.h"
#include "libavutil/file.h"
#include "libavutil/film_grain_params.h"
#include "libavutil/frame.h"
#include "libavutil/hash.h"
#include "libavutil/hdr_dynamic_metadata.h"
#include "libavutil/hdr_dynamic_vivid_metadata.h"
#include "libavutil/hmac.h"
//#include "libavutil/hwcontext_cuda.h"
//#include "libavutil/hwcontext_d3d11va.h"
//#include "libavutil/hwcontext_d3d12va.h"
#include "libavutil/hwcontext_drm.h"
//#include "libavutil/hwcontext_dxva2.h"
#include "libavutil/hwcontext_mediacodec.h"
//#include "libavutil/hwcontext_opencl.h"
//#include "libavutil/hwcontext_qsv.h"
//#include "libavutil/hwcontext_vaapi.h"
//#include "libavutil/hwcontext_vdpau.h"
//#include "libavutil/hwcontext_videotoolbox.h"
#include "libavutil/hwcontext_vulkan.h"
#include "libavutil/hwcontext.h"
#include "libavutil/iamf.h"
#include "libavutil/imgutils.h"
#include "libavutil/intfloat.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/lfg.h"
#include "libavutil/log.h"
#include "libavutil/lzo.h"
#include "libavutil/macros.h"
#include "libavutil/mastering_display_metadata.h"
#include "libavutil/mathematics.h"
#include "libavutil/md5.h"
#include "libavutil/mem.h"
#include "libavutil/motion_vector.h"
#include "libavutil/murmur3.h"
#include "libavutil/opt.h"
#include "libavutil/parseutils.h"
#include "libavutil/pixdesc.h"
#include "libavutil/pixelutils.h"
#include "libavutil/pixfmt.h"
#include "libavutil/random_seed.h"
#include "libavutil/rational.h"
#include "libavutil/rc4.h"
#include "libavutil/replaygain.h"
#include "libavutil/ripemd.h"
#include "libavutil/samplefmt.h"
#include "libavutil/sha.h"
#include "libavutil/sha512.h"
#include "libavutil/spherical.h"
#include "libavutil/stereo3d.h"
#include "libavutil/tea.h"
#include "libavutil/threadmessage.h"
#include "libavutil/time.h"
#include "libavutil/timecode.h"
#include "libavutil/timestamp.h"
#include "libavutil/tree.h"
#include "libavutil/twofish.h"
#include "libavutil/tx.h"
#include "libavutil/uuid.h"
#include "libavutil/version.h"
#include "libavutil/video_enc_params.h"
#include "libavutil/video_hint.h"
#include "libavutil/xtea.h"
#include "libpostproc/postprocess.h"
#include "libpostproc/version_major.h"
#include "libpostproc/version.h"
#include "libswresample/swresample.h"
#include "libswresample/version_major.h"
#include "libswresample/version.h"
#include "libswscale/swscale.h"
#include "libswscale/version_major.h"
#include "libswscale/version.h"
}
namespace xpg {
//手动控制是否需要将h264的avcC码流转换为annexB码流
//1:avcC码流转换为annexB码流
//0:保持avcC码流不转换
#define H264_AnnexB 0
#define MAX_PCM_DATA_LEN (925*4*500) //20s 48K采样率1394
AudioVideoSeparator::AudioVideoSeparator(){
start_ = false;
}
AudioVideoSeparator::~AudioVideoSeparator() {
}
string AudioVideoSeparator::getTestString(){
return string("AudioVideoSeparator::getTestString") + avcodec_configuration();
}
int AudioVideoSeparator::process(const char *argv){
AVFormatContext *pFormatCtx = NULL;
//const char* filePath = "/storage/emulated/0/DCIM/Camera/VID20240922135029.mp4";
const char* filePath = "/storage/emulated/0/Pictures/Screenshots/Record_2024-09-05-22-35-25.mp4";
//const char* filePath = "/storage/emulated/0/tmp/ffmpegTest/it_is_realme.mp3";
if (avformat_open_input(&pFormatCtx, filePath, NULL, NULL) != 0) {
fprintf(stderr, "Error: Couldn't open file\n");
return -1;
}
if (avformat_find_stream_info(pFormatCtx, NULL) < 0) {
fprintf(stderr, "Error: Couldn't find stream information\n");
return -1;
}
int audio_stream_index = -1;
for (int i = 0; i < pFormatCtx->nb_streams; i++) {
if (pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
audio_stream_index = i;
break;
}
}
if (audio_stream_index == -1) {
fprintf(stderr, "Error: Couldn't find audio stream\n");
return -1;
}
AVCodecParameters *codecpar = pFormatCtx->streams[audio_stream_index]->codecpar;
const AVCodec *codec = avcodec_find_decoder(codecpar->codec_id);
AVCodecContext *codecCtx = avcodec_alloc_context3(codec);
avcodec_parameters_to_context(codecCtx, codecpar);
avcodec_open2(codecCtx, codec, NULL);
AVPacket packet;
AVFrame *frame = av_frame_alloc();
SwrContext* swr_ctx = NULL;
AVChannelLayout ch_layout_in = codecCtx->ch_layout;
AVChannelLayout ch_layout_out = codecCtx->ch_layout;
ch_layout_out.nb_channels = 2;
ch_layout_out.u.mask = (1 << AV_CHAN_FRONT_LEFT) | (1 << AV_CHAN_FRONT_RIGHT);
int ret = swr_alloc_set_opts2(&swr_ctx,
&ch_layout_out, AV_SAMPLE_FMT_S16, 44100,
&ch_layout_in, AV_SAMPLE_FMT_FLTP, 48000,
0, NULL);
if(ret < 0){
return -1;
}
ret = swr_init(swr_ctx);
if(ret < 0){
return -1;
}
while (av_read_frame(pFormatCtx, &packet) >= 0) {
if (packet.stream_index == audio_stream_index) {
avcodec_send_packet(codecCtx, &packet);
while (avcodec_receive_frame(codecCtx, frame) == 0) {
uint8_t *output;
int out_samples = av_rescale_rnd(swr_get_delay(swr_ctx, 48000) +
frame->nb_samples, 44100, 48000, AV_ROUND_UP);
av_samples_alloc(&output, NULL, 2, out_samples,
AV_SAMPLE_FMT_S16, 0);
out_samples = swr_convert(swr_ctx, &output, out_samples,
&frame->data[0], frame->nb_samples);
if(out_samples > 0){
pcmData_.push_back(std::make_pair((char*)output,out_samples*4));
}
}
}
av_packet_unref(&packet);
}
av_frame_free(&frame);
avcodec_free_context(&codecCtx);
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
return 0;
}
int AudioVideoSeparator::getPcmData(char* data, int dataLen){
if(!data || dataLen < pcmData_.front().second){
return -1;
}
int pcmLen = pcmData_.front().second;
memcpy(data,pcmData_.front().first, pcmLen);
av_free(pcmData_.front().first);
pcmData_.pop_front();
return pcmLen;
}
} // xpg
3、新增jni接口
新增一个按钮控件,调用jni接口,触发语音数据提取和缓存:
AudioVideoSeparator* audioVideoSeparator = new AudioVideoSeparator();
extern "C" JNIEXPORT jstring JNICALL
Java_com_example_ffmpegtestprj_MainActivity_fetchPcmFromMp4(
JNIEnv* env,
jobject /* this */) {
int ret = audioVideoSeparator->process("/storage/emulated/0/Pictures/Screenshots/Record_2024-09-05-22-35-25.mp4");
std::string resultStr;
if(0 == ret){
resultStr = "OK";
}else{
resultStr = "fail";
}
return env->NewStringUTF(resultStr.c_str());
}
新增一个按钮控件,调用jni接口,获取pcm数据:
extern "C" JNIEXPORT jint JNICALL
Java_com_example_ffmpegtestprj_MainActivity_getPcm(
JNIEnv* env,
jobject instance, jbyteArray javaArray, jint arrayLen) {
// 获取原始数组
jbyte *cArray = env->GetByteArrayElements(javaArray, NULL);
return audioVideoSeparator->getPcmData((char*)cArray, arrayLen);
}
4、修改MainActivity.java
通过jni接口从缓存拿到pcm数据,然后送给AudioTrack进行播放:
private void startPlayPcm(){
int sampleRateInHz = 44100;
int channelConfig = AudioFormat.CHANNEL_OUT_STEREO;
int audioFormat = AudioFormat.ENCODING_PCM_16BIT;
int bufferSizeInBytes = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
AudioTrack audioTrack = new AudioTrack(
AudioManager.STREAM_MUSIC, // 指定音频流类型
sampleRateInHz, // 采样率,例如44100Hz
channelConfig, // 声道配置,例如AudioFormat.CHANNEL_OUT_MONO
audioFormat, // 数据格式,例如AudioFormat.ENCODING_PCM_16BIT
bufferSizeInBytes, // 缓冲区大小,例如1024字节
AudioTrack.MODE_STREAM // 播放模式,使用流模式**
);
if ( audioTrack.getState() != AudioTrack.STATE_UNINITIALIZED) {
audioTrack.play();
}
byte[] data = new byte[bufferSizeInBytes];
while(true){
int pcmLen = getPcm(data,bufferSizeInBytes);
if(pcmLen > 0){
audioTrack.write(data, 0, pcmLen);
}else{
break;
}
}
if (audioTrack.getState() != AudioTrack.STATE_UNINITIALIZED) {
audioTrack.stop();
audioTrack.release();
}
}
5、效果展示
因为写文件不成功,原因待查(原因是app只对某些目录有权限,比如/storage/emulated/0),然后播放又不能页面展示,所以这里说明一下,播放的声音是没问题的。
6、踩坑记录
(1)因为手机播放的pcm一般和ffmpeg解出来的pcm格式不一样,所以需要进行重采样之后再拿来播放。
(2)重采样的出入采样率必须不一样,否则初始化会失败。