【入门到精通】鸿蒙next开发：音视频合成功能解读

往期鸿蒙5.0全套实战文章必看：（文中附带全栈鸿蒙5.0学习资料）

音视频合成功能

场景描述

类似音视频配音功能，适用于给视频配音，配乐。

场景1：输入一个视频文件和一个音频文件，将他们合成1个视频文件，要求音频文件合成到视频制定的时间范围。

场景2：输入一个视频文件和多个音频文件，将他们合成1个视频文件，要求将多个音频文件合成到视频制定的时间范围。1、多个音频文件串行合成。2、多个音频文件并行合成。备注：多个音频文件编码类型要一致，还要确保封装格式是支持的。

方案描述

1、TS侧通过XComponentController组件控制器来调用NDK侧的合成和播放方法，支持动态配置音频播放的时间点。

2、NDK侧收到合成请求后，读取resources/rawfile目录中的音视频输入文件保存到配置中，同时创建封装后的输出文件。

3、NDK侧开始合成：创建音频解封装器和视频解封装器，音频和视频放在两个子线程中分开处理。流程：原始音频(多个) -->解封装 -->封装(输出文件)。原始视频 -->解封装 -->封装(输出文件)。通过修改pts的值可以实现音频文件合成到视频制定的时间范围。

4、NDK侧收到播放请求后，从配置文件中读取合成后的输出文件进行播放。流程：解封装-->音频+视频。音频-->解码-->OH_AudioRenderer播放。视频-->解码-->Surface模式给到XComponent送显。

场景实现

场景一：输入一个视频文件和一个音频文件，将他们合成1个视频文件，要求音频文件合成到视频制定的时间范围。

核心代码

1、TS侧通过XComponentController组件控制器来调用NDK侧的合成和播放方法，支持动态配置音频播放的时间点。

build() { 
  Column() { 
    Column() { 
      XComponent({ 
        id: 'xcomponentId', 
        type: XComponentType.SURFACE, 
        libraryname: 'entry', 
        controller: this.mXComponentController 
      }) 
        .onLoad((xComponentContext) => { 
          this.xComponentContext = xComponentContext as XComponentContext; 
          this.mXComponentController.setXComponentSurfaceRect({ 
            surfaceWidth: Constants.PREVIEW_HEIGHT,  
            surfaceHeight: Constants.PREVIEW_WIDTH}) 
        }) 
        .onDestroy(() => { 
          console.log('onDestroy'); 
        }) 
        .id('xcomponent') 
        .margin(5) 
        .layoutWeight(1) 
    } 
    .layoutWeight(1) 
    .width('90%') 
 
    Blank(10) 
    Row() { 
      Column() { 
      }.width('95') 
      Button(this.buttonCombination) 
        .onClick(() => { 
          if (this.buttonCombination == this.START_COMBINATION) { 
            this.buttonCombination = this.STOP_COMBINATION 
            if (this.xComponentContext) { 
              this.textFocusAble = false; 
              this.xComponentContext.StartCombination(getContext(this).resourceManager, this.audioDelay); 
              this.startCheck(); 
            } 
          } else { 
            this.buttonCombination = this.START_COMBINATION 
            if (this.xComponentContext) { 
              this.stopCheck(); 
              this.xComponentContext.StopCombination(); 
              this.textFocusAble = true; 
            } 
          } 
        }) 
      TextInput({placeholder:'音频延时'}) 
        .type(InputType.Normal) 
        .maxLength(3) 
        .width('25%') 
        .onChange((value: string) => { 
          this.audioDelay = Number(value); 
        }) 
        .focusable(this.textFocusAble) 
    } 
 
    Blank(10) 
    Button(this.buttonPlayer) 
      .onClick(() => { 
        if (this.buttonPlayer == this.START_PLAYER) { 
          this.buttonPlayer = this.STOP_PLAYER 
          if (this.xComponentContext) { 
            this.xComponentContext.StartPlayer(); 
          } 
        } else { 
          this.buttonPlayer = this.START_PLAYER 
          if (this.xComponentContext) { 
            this.xComponentContext.StopPlayer(); 
          } 
        } 
      }) 
  } 
  .width('100%') 
  .height('100%') 
}

2、NDK侧收到合成请求后，读取resources/rawfile目录中的音视频输入文件保存到配置文件中，同时创建封装后的输出文件。

void PluginRender::ReadFileData(napi_env env, NativeResourceManager *ResMmgr, RES_TYPE type) 
{ 
  int32_t fileCount = 1; 
  if (type == RES_TYPE::RES_TYPE_AUDIO_IN) { 
  fileCount = AUDIO_FILES_COUNT; 
} 
  for (int32_t i = 0; i < fileCount; ++i) { 
  napi_value name_napi; 
  const std::string name = AppConfig::GetInstance().GetResDir(type, i); 
  napi_create_string_utf8(env, name.c_str(), name.length(), &name_napi); 
 
  size_t strSize; 
  char strBuf[256]; 
  napi_get_value_string_utf8(env, name_napi, strBuf, sizeof(strBuf), &strSize); 
  std::string filename(strBuf, strSize); 
  // 获取rawfile指针对象 
  RawFile *rawFile = OH_ResourceManager_OpenRawFile(ResMmgr, filename.c_str()); 
  if (rawFile == nullptr) { 
    OH_LOG_Print(LOG_APP, LOG_ERROR, LOG_PRINT_DOMAIN, "PluginRender", "OH_ResourceManager_OpenRawFile failed"); 
  } 
  // 获取rawfile的描述符RawFileDescriptor {fd, offset, length} 
  RawFileDescriptor descriptor; 
  OH_ResourceManager_GetRawFileDescriptor(rawFile, descriptor); 
  // 关闭打开的指针对象 
  OH_ResourceManager_CloseRawFile(rawFile); 
  // 保存文件配置 
  FdInfo info; 
  info.inputFd = descriptor.fd; 
  info.inputFileOffset = descriptor.start; 
  info.inputFileSize = descriptor.length; 
  AppConfig::GetInstance().SetFileData(type, info, i); 
} 
} 
 
napi_value PluginRender::StartCombination(napi_env env, napi_callback_info info) 
{ 
  PluginRender *render = GetPluginRender(env, info); 
  if (render == nullptr) { 
    return nullptr; 
  } 
  size_t argc = 2; 
  napi_value args[2] = { nullptr }; 
  napi_get_cb_info(env, info, &argc, args, nullptr, nullptr); 
  napi_valuetype valueType; 
  napi_typeof(env, args[0], &valueType); 
  // 获取native的resourceManager对象 
  NativeResourceManager *mNativeResMgr = OH_ResourceManager_InitNativeResourceManager(env, args[0]); 
  if(mNativeResMgr != nullptr){ 
    // 读取音视频输入文件 
    ReadFileData(env, mNativeResMgr, RES_TYPE::RES_TYPE_AUDIO_IN); 
    ReadFileData(env, mNativeResMgr, RES_TYPE::RES_TYPE_VIDEO_IN); 
    // 释放resourceManager对象 
    OH_ResourceManager_ReleaseNativeResourceManager(mNativeResMgr); 
 
    // 音频延迟时长，单位秒 
    int32_t value1; 
    napi_get_value_int32(env, args[1], &value1); 
    AppConfig::GetInstance().SetAudioDelay(value1); 
 
    // 开始合成 
    render->StartCombination(); 
  } 
 
  return nullptr; 
}

3、NDK侧开始合成：创建音频解封装器和视频解封装器，音频和视频放在两个子线程中分开处理。

void PluginRender::StartCombination(void) 
{ 
  SampleInfo sampleInfo; 
  // 合成后输出文件 
  int32_t outputFd = 
  open(AppConfig::GetInstance().GetResDir(RES_TYPE::RES_TYPE_VIDEO_OUT).c_str(), O_RDWR | O_CREAT, 0777); 
  sampleInfo.outputFd = outputFd; 
 
  // 视频输入文件 
  AppConfig::GetInstance().GetFileData(RES_TYPE::RES_TYPE_VIDEO_IN, sampleInfo.videoFd); 
  // 音频输入文件 
  for (int i = 0; i < AUDIO_FILES_COUNT; ++i) { 
  AppConfig::GetInstance().GetFileData(RES_TYPE::RES_TYPE_AUDIO_IN, sampleInfo.audioFd[i], i); 
} 
 
  // 开始合成 
  int32_t ret = Recorder::GetInstance().Start(sampleInfo); 
  if (ret != AVCODEC_SAMPLE_ERR_OK) { 
    return; 
  } 
} 
int32_t Recorder::Start(SampleInfo &sampleInfo) 
{ 
  std::lock_guard<std::mutex> lock(mutex_); 
 
  CHECK_AND_RETURN_RET_LOG(!isStarted_, AVCODEC_SAMPLE_ERR_ERROR, "Already started."); 
  for (int32_t i = 0; i < AUDIO_FILES_COUNT; ++i) { 
  CHECK_AND_RETURN_RET_LOG(demuxer_audio[i] == nullptr, AVCODEC_SAMPLE_ERR_ERROR, 
    "Already started audio demuxer."); 
} 
  CHECK_AND_RETURN_RET_LOG(demuxer_video == nullptr, AVCODEC_SAMPLE_ERR_ERROR, "Already started video demuxer."); 
  CHECK_AND_RETURN_RET_LOG(muxer_ == nullptr, AVCODEC_SAMPLE_ERR_ERROR, "Already started muxer_."); 
 
  sampleInfo_ = sampleInfo; 
 
  // 音频和视频解封转，从解封器中读取数据 
  demuxer_video = std::make_unique<Demuxer>(); 
  int32_t ret = demuxer_video->Create(VIDEO_TYPE, sampleInfo_); 
  CHECK_AND_RETURN_RET_LOG(ret == AVCODEC_SAMPLE_ERR_OK, ret, "Create demuxer_video failed"); 
  for (int32_t i = 0; i < AUDIO_FILES_COUNT; ++i) { 
  demuxer_audio[i] = std::make_unique<Demuxer>(); 
  ret = demuxer_audio[i]->Create(AUDIO_TYPE, sampleInfo_, i); 
  CHECK_AND_RETURN_RET_LOG(ret == AVCODEC_SAMPLE_ERR_OK, ret, "Create demuxer_audio failed"); 
} 
 
  // 封转音视频 
  muxer_ = std::make_unique<Muxer>(); 
  ret = muxer_->Create(sampleInfo_.outputFd); 
  CHECK_AND_RETURN_RET_LOG(ret == AVCODEC_SAMPLE_ERR_OK, ret, "Create muxer with fd(%{public}d) failed", 
    sampleInfo_.outputFd); 
  ret = muxer_->Config(sampleInfo_); 
  CHECK_AND_RETURN_RET_LOG(ret == AVCODEC_SAMPLE_ERR_OK, ret, "Recorder muxer config failed"); 
  ret = muxer_->Start(); 
  CHECK_AND_RETURN_RET_LOG(ret == AVCODEC_SAMPLE_ERR_OK, ret, "Muxer start failed"); 
 
  isStarted_ = true; 
  videoDemuxerThread_ = std::make_unique<std::thread>(&Recorder::VideoProcessThread, this); 
  audioDemuxerThread_ = std::make_unique<std::thread>(&Recorder::AudioProcessThread, this); 
  if (videoDemuxerThread_ == nullptr || audioDemuxerThread_ == nullptr) { 
    AVCODEC_SAMPLE_LOGE("Create thread failed"); 
    StartRelease(); 
    return AVCODEC_SAMPLE_ERR_ERROR; 
  } 
 
  releaseThread_ = nullptr; 
  AVCODEC_SAMPLE_LOGI("Succeed"); 
  return AVCODEC_SAMPLE_ERR_OK; 
} 
 
void Recorder::VideoProcessThread() 
{ 
  OH_AVBuffer *buffer = OH_AVBuffer_Create(sampleInfo_.videoWidth * sampleInfo_.videoHeight * 3 >> 1); 
  OH_AVCodecBufferAttr attr; 
  while (true) { 
    CHECK_AND_BREAK_LOG(isStarted_, "Work done, VideoDemuxerThread out"); 
    demuxer_video->ReadSample(demuxer_video->GetVideoTrackId(), reinterpret_cast<OH_AVBuffer *>(buffer), attr); 
    // 从封装器中读取结束 
    if (attr.flags == OH_AVCodecBufferFlags::AVCODEC_BUFFER_FLAGS_EOS) { 
      videoEnd = true; 
      break; 
    } 
    // 封装视频 
    muxer_->WriteSampleVideo(reinterpret_cast<OH_AVBuffer *>(buffer), attr); 
  } 
 
  OH_AVBuffer_Destroy(buffer); 
} 
 
void Recorder::AudioProcessThread() 
{ 
  OH_AVBuffer *buffer = OH_AVBuffer_Create(sampleInfo_.videoWidth * sampleInfo_.videoHeight * 3 >> 1); 
  OH_AVCodecBufferAttr attr; 
  int32_t lastPts = AppConfig::GetInstance().GetAudioDelay() * 1000 * 1000; 
  int32_t nextPts = 0; 
  // 封装音频 
  int32_t fileIndex = 0; // 代表第几个音频文件 
  while(fileIndex < AUDIO_FILES_COUNT){ 
    demuxer_audio[fileIndex]->ReadSample(demuxer_audio[fileIndex]->GetAudioTrackId(), 
      reinterpret_cast<OH_AVBuffer *>(buffer), attr); 
    if (attr.flags == OH_AVCodecBufferFlags::AVCODEC_BUFFER_FLAGS_EOS) { 
      audioEnd[fileIndex++] = true; 
      lastPts = nextPts; 
    } else { 
      attr.pts += lastPts; 
      nextPts = attr.pts; 
      muxer_->WriteSampleAudio(reinterpret_cast<OH_AVBuffer *>(buffer), attr); 
    } 
    CHECK_AND_BREAK_LOG(isStarted_, "Work done, AudioDemuxerThread out"); 
  } 
  OH_AVBuffer_Destroy(buffer); 
}

4、NDK侧收到播放请求后，从配置文件中读取合成后的输出文件进行播放。

void PluginRender::StartPlayer(void) 
{ 
  const std::string playerRoot = AppConfig::GetInstance().GetResDir(RES_TYPE::RES_TYPE_VIDEO_OUT); 
  int32_t inputFd = open(playerRoot.c_str(), O_RDONLY, 0777); 
 
  int64_t fileSize = 0; 
  struct stat fileStatus {}; 
if (stat(playerRoot.c_str(), &fileStatus) == 0) { 
  fileSize = static_cast<int64_t>(fileStatus.st_size); 
} else { 
  OH_LOG_Print(LOG_APP, LOG_ERROR, LOG_PRINT_DOMAIN, "PluginRender", "StartPlayer: get stat failed"); 
  return; 
} 
 
SampleInfo sampleInfo; 
sampleInfo.videoFd.inputFd = inputFd; 
sampleInfo.videoFd.inputFileOffset = 0; 
sampleInfo.videoFd.inputFileSize = fileSize; 
sampleInfo.window = nativeWindow_;  // 这里直接用XComponent对应的NativeWindow 
 
int32_t ret = Player::GetInstance().Init(sampleInfo); 
if (ret != AVCODEC_SAMPLE_ERR_OK) { 
  return; 
} 
 
Player::GetInstance().Start(); 
} 
 
int32_t Player::Start() 
{ 
  std::lock_guard<std::mutex> lock(mutex_); 
  CHECK_AND_RETURN_RET_LOG(!isStarted_, AVCODEC_SAMPLE_ERR_ERROR, "Already started."); 
  CHECK_AND_RETURN_RET_LOG(demuxer_video != nullptr && videoDecoder_ != nullptr, AVCODEC_SAMPLE_ERR_ERROR, 
    "Already started."); 
  int32_t ret; 
  if (videoDecContext_) { 
    ret = videoDecoder_->Start(); 
    CHECK_AND_RETURN_RET_LOG(ret == AVCODEC_SAMPLE_ERR_OK, ret, "Decoder start failed"); 
    isStarted_ = true; 
    videoDecInputThread_ = std::make_unique<std::thread>(&Player::VideoDecInputThread, this); 
    videoDecOutputThread_ = std::make_unique<std::thread>(&Player::VideoDecOutputThread, this); 
    if (videoDecInputThread_ == nullptr || videoDecOutputThread_ == nullptr) { 
      AVCODEC_SAMPLE_LOGE("Create thread failed"); 
      StartRelease(); 
      return AVCODEC_SAMPLE_ERR_ERROR; 
    } 
  } 
 
  if (audioDecContext_) { 
    ret = audioDecoder_->Start(); 
    CHECK_AND_RETURN_RET_LOG(ret == AVCODEC_SAMPLE_ERR_OK, ret, "Audio Decoder start failed"); 
    isStarted_ = true; 
    audioDecInputThread_ = std::make_unique<std::thread>(&Player::AudioDecInputThread, this); 
    audioDecOutputThread_ = std::make_unique<std::thread>(&Player::AudioDecOutputThread, this); 
    if (audioDecInputThread_ == nullptr || audioDecOutputThread_ == nullptr) { 
      AVCODEC_SAMPLE_LOGE("Create thread failed"); 
      StartRelease(); 
      return AVCODEC_SAMPLE_ERR_ERROR; 
    } 
 
    // 清空播放缓存 
    if (audioDecContext_) { 
      audioDecContext_->CodecUserCache_.ClearCache(); 
    } 
    // 开启音频播放 
    audioRenderer_->AudioRendererStart(); 
  } 
 
  AVCODEC_SAMPLE_LOGI("Succeed"); 
  doneCond_.notify_all(); 
  return AVCODEC_SAMPLE_ERR_OK; 
}

场景二：输入一个视频文件和多个音频文件，将他们合成1个视频文件，要求将多个音频文件合成到视频制定的时间范围。

1、多个音频文件串行合成。

可以实现，在如下配置文件中可设置音频文件数和音频源文件，多个音频文件可串行合入。

#ifndef APP_CONFIG_H 
#define APP_CONFIG_H 
 
#include <cstdint> 
#include <string> 
 
  const int32_t AUDIO_FILES_COUNT = 3; 
 
enum class RES_TYPE { RES_TYPE_AUDIO_IN, RES_TYPE_VIDEO_IN, RES_TYPE_VIDEO_OUT }; 
 
struct FdInfo { 
  int32_t inputFd = -1; 
  int64_t inputFileOffset = 0; 
  int64_t inputFileSize = 0; 
}; 
 
class AppConfig { 
  public: 
    static AppConfig &GetInstance() 
{ 
  static AppConfig config_; 
  return config_; 
} 
 
  int32_t GetAudioDelay(); 
  void SetAudioDelay(int32_t value); 
 
  const std::string &GetResDir(RES_TYPE type, int32_t index = 0); 
 
  void SetFileData(RES_TYPE type, FdInfo& data, int32_t index = 0); 
  void GetFileData(RES_TYPE type, FdInfo& data, int32_t index = 0); 
 
  private: 
    AppConfig() {} 
~AppConfig() {} 
 
private: 
  int32_t audioDelay_ = 0;  //合成后的视频播放几秒后再播放音频 
 
// 音视频输入文件在rawfile目录 
std::string videoInName = "video.mp4"; 
FdInfo videoInData; 
std::string audioInName[AUDIO_FILES_COUNT] = { 
  "boisterous.wav", 
  "boisterous.wav", 
  "boisterous.wav" 
}; 
FdInfo audioInData[AUDIO_FILES_COUNT]; 
 
// 合成后输出文件在应用沙箱目录 
std::string videoOut = "/data/storage/el2/base/haps/entry/files/recorder01.mp4"; 
}; 
 
#endif  // APP_CONFIG_H

音频多文件合成核心逻辑。

void Recorder::AudioProcessThread() 
{ 
  OH_AVBuffer *buffer = OH_AVBuffer_Create(sampleInfo_.videoWidth * sampleInfo_.videoHeight * 3 >> 1); 
  OH_AVCodecBufferAttr attr; 
  int32_t lastPts = AppConfig::GetInstance().GetAudioDelay() * 1000 * 1000; 
  int32_t nextPts = 0; 
  // 封装音频 
  int32_t fileIndex = 0; // 代表第几个音频文件 
  while(fileIndex < AUDIO_FILES_COUNT){ 
    demuxer_audio[fileIndex]->ReadSample(demuxer_audio[fileIndex]->GetAudioTrackId(), 
      reinterpret_cast<OH_AVBuffer *>(buffer), attr); 
    if (attr.flags == OH_AVCodecBufferFlags::AVCODEC_BUFFER_FLAGS_EOS) { 
      audioEnd[fileIndex++] = true; 
      lastPts = nextPts; 
    } else { 
      attr.pts += lastPts; 
      nextPts = attr.pts; 
      muxer_->WriteSampleAudio(reinterpret_cast<OH_AVBuffer *>(buffer), attr); 
    } 
    CHECK_AND_BREAK_LOG(isStarted_, "Work done, AudioDemuxerThread out"); 
  } 
  OH_AVBuffer_Destroy(buffer); 
}

2、多个音频文件并行合成。

封装器虽然可以创建多个音频轨，但是播放时播放器默认只会选择一个音频轨播放。所以，要想实现并行合成后播放，只能混音，即多个音频文件先混音成一个文件。但是框架目前没有提供实现该能力的系统API，只能通过FFmpeg等三方库来实现。