import Foundation
import AVFoundation
import Accelerate
@objc protocol MicrophoneSpectrumDelegate: AnyObject {
func microphone(_ microphone: MicrophoneSpectrum, didGenerateSpectrum spectra: [[Float]])
}
struct FrequencyBand {
let lower: Float
let upper: Float
}
@objcMembers
class MicrophoneSpectrum: NSObject {
weak var delegate: MicrophoneSpectrumDelegate?
let engine = AVAudioEngine()
let inputNode: AVAudioInputNode
let mixerNode = AVAudioMixerNode()
let playerNode = AVAudioPlayerNode() // 中间桥梁节点,隔离格式
var bufferSize: Int = 1024 {
didSet {
bufferSize = nearestPowerOfTwo(bufferSize)
setupAudioTap()
analyzer = RealtimeAnalyzer(fftSize: bufferSize)
}
}
var analyzer: RealtimeAnalyzer!
private var hardwareFormat: AVAudioFormat!
private var targetFormat: AVAudioFormat!
override init() {
inputNode = engine.inputNode
bufferSize = 1024
analyzer = RealtimeAnalyzer(fftSize: bufferSize)
super.init()
setupAudioEngine()
}
func start() {
AVAudioSession.sharedInstance().requestRecordPermission { [weak self] granted in
guard let self = self, granted else { return }
DispatchQueue.global().async {
do {
try self.engine.start()
self.playerNode.play() // 启动桥梁节点
print("引擎启动成功")
} catch {
print("引擎启动失败:\(error)")
}
}
}
}
func stop() {
engine.stop()
playerNode.stop()
mixerNode.removeTap(onBus: 0)
}
}
extension MicrophoneSpectrum {
private func setupAudioEngine() {
do {
let session = AVAudioSession.sharedInstance()
try session.setCategory(.playAndRecord, mode: .measurement)
try session.setActive(true)
// 1. 获取硬件格式(不可修改)
hardwareFormat = inputNode.outputFormat(forBus: 0)
logFormat("硬件原始格式", format: hardwareFormat)
// 2. 定义目标格式(与硬件采样率一致,仅修改其他参数)
targetFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: hardwareFormat.sampleRate, // 严格匹配硬件采样率
channels: 1,
interleaved: false
)!
logFormat("目标处理格式", format: targetFormat)
// 3. 配置节点链:input → mixer(硬件格式)→ player(桥梁)→ output(目标格式)
engine.attach(mixerNode)
engine.attach(playerNode)
// 输入链:用硬件格式连接 input → mixer
engine.connect(inputNode, to: mixerNode, format: hardwareFormat)
// 处理链:用目标格式连接 player → output
let outputNode = engine.outputNode
engine.connect(playerNode, to: outputNode, format: targetFormat)
// 4. 从 mixer 采集硬件格式数据,通过 player 转换为目标格式播放
mixerNode.installTap(
onBus: 0,
bufferSize: AVAudioFrameCount(bufferSize),
format: hardwareFormat
) { [weak self] buffer, _ in
guard let self = self else { return }
self.playerNode.scheduleBuffer(buffer) // 用player转换格式
}
engine.prepare()
setupAudioTap() // 在player输出端设置Tap(目标格式)
} catch {
print("引擎配置失败:\(error)")
}
}
// 关键:在playerNode的输出端设置Tap(已转换为目标格式)
private func setupAudioTap() {
guard let format = targetFormat else { return }
playerNode.removeTap(onBus: 0)
playerNode.installTap(
onBus: 0,
bufferSize: AVAudioFrameCount(bufferSize),
format: format
) { [weak self] (buffer: AVAudioPCMBuffer, _) in
guard let self = self, self.engine.isRunning else { return }
// 最终校验:确保Tap格式与目标格式完全一致
guard buffer.format.commonFormat == format.commonFormat,
buffer.format.sampleRate == format.sampleRate,
buffer.format.channelCount == format.channelCount else {
print("格式不匹配!预期:\(format),实际:\(buffer.format)")
return
}
let validFrameLength = min(buffer.frameLength, AVAudioFrameCount(self.bufferSize))
buffer.frameLength = validFrameLength
DispatchQueue.global().async {
let spectra = self.analyzer.analyse(with: buffer)
DispatchQueue.main.async {
self.delegate?.microphone(self, didGenerateSpectrum: spectra)
}
}
}
logFormat("Tap实际格式", format: format)
}
private func logFormat(_ label: String, format: AVAudioFormat) {
print("\(label):")
print(" 采样率:\(format.sampleRate)")
print(" 通道数:\(format.channelCount)")
print(" 数据格式:\(format.commonFormat)")
print(" 交错模式:\(format.isInterleaved)")
}
private func nearestPowerOfTwo(_ value: Int) -> Int {
guard value > 1 else { return 1 }
return 1 << (Int(log2(Double(value - 1))) + 1)
}
}
// 频谱分析器类(保持不变)
@objcMembers
class RealtimeAnalyzer: NSObject {
var fftSize: Int
lazy var fftSetup = vDSP_create_fftsetup(vDSP_Length(Int(round(log2(Double(fftSize))))), FFTRadix(kFFTRadix2))
public var frequencyBands: Int = 32
public var startFrequency: Float = 100
public var endFrequency: Float = 18000
lazy var bands: [FrequencyBand] = {
var bands = [FrequencyBand]()
guard endFrequency > startFrequency, frequencyBands > 0 else { return bands }
let n = log2(endFrequency/startFrequency) / Float(frequencyBands)
var currentLower = startFrequency
for i in 1...frequencyBands {
let currentUpper = currentLower * powf(2, n)
let adjustedUpper = i == frequencyBands ? endFrequency : currentUpper
bands.append(FrequencyBand(lower: currentLower, upper: adjustedUpper))
currentLower = currentUpper
}
return bands
}()
private var spectrumBuffer = [[Float]]()
private let spectrumQueue = DispatchQueue(label: "com.analyzer.spectrumQueue")
public var spectrumSmooth: Float = 0.5 {
didSet { spectrumSmooth = max(0.0, min(1.0, spectrumSmooth)) }
}
init(fftSize: Int) {
self.fftSize = fftSize.nextPowerOfTwo
super.init()
}
deinit {
vDSP_destroy_fftsetup(fftSetup)
}
func analyse(with buffer: AVAudioPCMBuffer) -> [[Float]] {
guard buffer.format.sampleRate > 0, let _ = buffer.floatChannelData else { return [] }
let channelsAmplitudes = fft(buffer)
let aWeights = createFrequencyWeights(for: buffer)
guard !channelsAmplitudes.isEmpty, aWeights.count == channelsAmplitudes[0].count else { return [] }
let result = spectrumQueue.sync {
// 修正:band → bands
if spectrumBuffer.count != channelsAmplitudes.count {
spectrumBuffer = channelsAmplitudes.map { _ in Array(repeating: 0, count: bands.count) }
}
for (index, amplitudes) in channelsAmplitudes.enumerated() {
guard index < spectrumBuffer.count, amplitudes.count == aWeights.count else { continue }
let weightedAmplitudes = amplitudes.enumerated().map { $0.1 * aWeights[$0.0] }
let bandWidth = Float(buffer.format.sampleRate) / Float(fftSize)
// 修正:band → bands
var spectrum = bands.enumerated().map { (i, band) -> Float in
guard i < weightedAmplitudes.count else { return 0.0 }
return findMaxAmplitude(for: band, in: weightedAmplitudes, with: bandWidth) * 5
}
spectrum = highlightWaveform(spectrum: spectrum)
guard spectrum.count == spectrumBuffer[index].count else { continue }
spectrumBuffer[index] = zip(spectrumBuffer[index], spectrum).map {
$0.0 * spectrumSmooth + $0.1 * (1 - spectrumSmooth)
}
}
return spectrumBuffer
}
return result
}
private func fft(_ buffer: AVAudioPCMBuffer) -> [[Float]] {
var amplitudes = [[Float]]()
guard let floatChannelData = buffer.floatChannelData else { return amplitudes }
let channelCount = Int(buffer.format.channelCount)
let frameCount = buffer.frameLength
guard frameCount <= UInt32(fftSize) else { return amplitudes }
var channels = [UnsafeMutablePointer<Float>]()
let isInterleaved = buffer.format.isInterleaved
if isInterleaved {
let interleavedData = UnsafeBufferPointer(start: floatChannelData[0], count: Int(frameCount) * channelCount)
for channel in 0..<channelCount {
var data = [Float](repeating: 0, count: fftSize)
for i in 0..<Int(frameCount) {
data[i] = interleavedData[i * channelCount + channel]
}
channels.append(&data)
}
} else {
for channel in 0..<channelCount {
var data = [Float](repeating: 0, count: fftSize)
memcpy(&data, floatChannelData[channel], Int(frameCount) * MemoryLayout<Float>.stride)
channels.append(&data)
}
}
for channel in channels {
var window = [Float](repeating: 0, count: fftSize)
vDSP_hann_window(&window, vDSP_Length(fftSize), Int32(vDSP_HANN_NORM))
vDSP_vmul(channel, 1, window, 1, channel, 1, vDSP_Length(fftSize))
var realp = [Float](repeating: 0, count: fftSize/2)
var imagp = [Float](repeating: 0, count: fftSize/2)
var fftInOut = DSPSplitComplex(realp: &realp, imagp: &imagp)
channel.withMemoryRebound(to: DSPComplex.self, capacity: fftSize) {
vDSP_ctoz($0, 2, &fftInOut, 1, vDSP_Length(fftSize/2))
}
let log2n = vDSP_Length(round(log2(Double(fftSize))))
vDSP_fft_zrip(fftSetup!, &fftInOut, 1, log2n, FFTDirection(FFT_FORWARD))
fftInOut.imagp[0] = 0
var normFactor = Float(1.0 / Double(fftSize))
vDSP_vsmul(fftInOut.realp, 1, &normFactor, fftInOut.realp, 1, vDSP_Length(fftSize/2))
vDSP_vsmul(fftInOut.imagp, 1, &normFactor, fftInOut.imagp, 1, vDSP_Length(fftSize/2))
var amp = [Float](repeating: 0, count: fftSize/2)
vDSP_zvabs(&fftInOut, 1, &, 1, vDSP_Length(fftSize/2))
amp[0] /= 2
amplitudes.append(amp)
}
return amplitudes
}
private func findMaxAmplitude(for band: FrequencyBand, in amplitudes: [Float], with bandWidth: Float) -> Float {
guard band.lower <= band.upper, bandWidth > 0 else { return 0 }
let start = max(0, min(Int(round(band.lower / bandWidth)), amplitudes.count - 1))
let end = max(0, min(Int(round(band.upper / bandWidth)), amplitudes.count - 1))
guard start <= end else { return 0 }
return amplitudes[start...end].max() ?? 0
}
private func createFrequencyWeights(for buffer: AVAudioPCMBuffer) -> [Float] {
let bins = fftSize / 2
let sampleRate = buffer.format.sampleRate
let deltaF = Float(sampleRate) / Float(fftSize)
var weights = [Float](repeating: 0, count: bins)
for i in 0..<bins {
let f = Float(i) * deltaF
guard f > 0 else { continue }
let f2 = f * f
let c1 = powf(12194.217, 2)
let c2 = powf(20.598997, 2)
let c3 = powf(107.65265, 2)
let c4 = powf(737.86223, 2)
let num = c1 * f2 * f2
let den = (f2 + c2) * sqrtf((f2 + c3) * (f2 + c4)) * (f2 + c1)
weights[i] = den == 0 ? 0 : 1.2589 * num / den
}
return weights
}
private func highlightWaveform(spectrum: [Float]) -> [Float] {
let weights: [Float] = [1, 2, 3, 5, 3, 2, 1]
let total = Float(weights.reduce(0, +))
let offset = weights.count / 2
guard !spectrum.isEmpty else { return [] }
var result = [Float]()
let safeOffset = min(offset, spectrum.count)
result.append(contentsOf: spectrum[0..<safeOffset])
let maxIndex = spectrum.count - offset
guard maxIndex > offset else {
return spectrum
}
for i in offset..<maxIndex {
let window = (0..<weights.count).map {
let idx = i - offset + $0
return idx < spectrum.count ? spectrum[idx] : 0
}
result.append(zip(window, weights).map { $0 * $1 }.reduce(0, +) / total)
}
let remainingStart = max(spectrum.count - offset, offset)
if remainingStart < spectrum.count {
result.append(contentsOf: spectrum[remainingStart..<spectrum.count])
}
return result
}
}
extension Int {
var nextPowerOfTwo: Int {
guard self > 1 else { return 1 }
return 1 << (Int(log2(Double(self - 1))) + 1)
}
}
import UIKit
@objcMembers class SpectrumView: UIView {
// 柱子宽度(略微加宽,增强存在感)
var barWidth: CGFloat = 5.0 {
didSet {
if isDefaultState {
barHeight = barWidth
}
setNeedsLayout()
}
}
// 柱子间距(进一步缩小,让排列更密集)
var space: CGFloat = 0.3 {
didSet {
setNeedsLayout()
}
}
// 基础高度(默认状态下的最小高度)
var barHeight: CGFloat = 5.0
// 边框宽度(加粗边框,增强灰色边框的存在感)
var borderWidth: CGFloat = 1.0
// 填充色(使用稍深的颜色,与浅灰背景形成对比)
var fillColor: UIColor = UIColor(red: 0.6, green: 0.6, blue: 0.6, alpha: 1.0) {
didSet {
barLayers.forEach { $0.fillColor = fillColor.cgColor }
}
}
// 灰色边框(中灰色,清晰可见)
var borderColor: UIColor = UIColor(red: 0.4, green: 0.4, blue: 0.4, alpha: 1.0) {
didSet {
barLayers.forEach { $0.strokeColor = borderColor.cgColor }
}
}
// 浅灰色背景
override var backgroundColor: UIColor? {
didSet {
// 强制背景为浅灰色,避免外部修改
super.backgroundColor = UIColor(red: 0.9, green: 0.9, blue: 0.9, alpha: 1.0)
}
}
// 存储所有柱子图层
private var barLayers = [CAShapeLayer]()
// 标记是否为默认状态
private var isDefaultState: Bool = true
// 频谱数据
var spectra: [[Float]]? {
didSet {
updateBars()
}
}
// 振幅放大倍数(进一步提高,让变化更剧烈)
var amplitudeScale: CGFloat = 6.0 // 从1.5提高到6.0
// 最大高度限制(适当放宽,允许更高的柱形)
private var maxBarHeight: CGFloat {
return bounds.height * 0.95 // 占视图高度的95%
}
// 默认状态下的圆点数量(与数据状态保持一致)
private let defaultDotCount = 24
override init(frame: CGRect) {
super.init(frame: frame)
setupView()
}
required init?(coder aDecoder: NSCoder) {
super.init(coder: aDecoder)
setupView()
}
private func setupView() {
// 强制设置浅灰色背景
backgroundColor = UIColor(red: 0.9, green: 0.9, blue: 0.9, alpha: 1.0)
layer.contentsScale = UIScreen.main.scale
barHeight = barWidth
}
private func updateBars() {
// 清除旧图层
barLayers.forEach { $0.removeFromSuperlayer() }
barLayers.removeAll()
// 验证数据
guard let spectra = spectra, !spectra.isEmpty, spectra.count >= 2 else {
drawDefaultDots()
return
}
guard bounds.height > 0, bounds.width > 0 else { return }
let centerY = bounds.height / 2
isDefaultState = true
// 右声道数据(取绝对值,避免负振幅)
let amplitudes = spectra[1].map { abs($0) }
let barCount = amplitudes.count
// 找到最大振幅(用于动态调整灵敏度)
let maxAmplitude = amplitudes.max() ?? 0
// 计算总宽度(所有柱子+间距的总长度)
let totalBarWidth = CGFloat(barCount) * barWidth
let totalSpaceWidth = CGFloat(barCount - 1) * space // 柱子间的总间距
let totalContentWidth = totalBarWidth + totalSpaceWidth
// 计算起始X偏移量(让整体居中)
let startX = (bounds.width - totalContentWidth) / 2
// 确保不超出左边界
let safeStartX = max(0, startX)
// 绘制柱子
for (i, amplitude) in amplitudes.enumerated() {
// 降低阈值,更早进入非默认状态
if amplitude > 0.01 {
isDefaultState = false
}
// 计算当前柱子的X位置(从左到右排列,整体居中)
let x = safeStartX + CGFloat(i) * (barWidth + space)
// 超出右边界则停止绘制
guard x + barWidth <= bounds.width else { break }
// 计算高度
let barHeight = calculateBarHeight(
amplitude: amplitude,
maxAmplitude: maxAmplitude
)
let y = centerY - barHeight / 2 // 垂直居中
// 创建柱子
let barLayer = createBarLayer(
x: x,
y: y,
width: barWidth,
height: barHeight
)
layer.addSublayer(barLayer)
barLayers.append(barLayer)
}
}
// 绘制默认圆点(静态状态)- 已修改为居中布局
private func drawDefaultDots() {
guard bounds.height > 0, bounds.width > 0 else { return }
isDefaultState = true
let centerY = bounds.height / 2
barHeight = barWidth
// 计算总宽度(所有圆点+间距的总长度)
let totalBarWidth = CGFloat(defaultDotCount) * barWidth
let totalSpaceWidth = CGFloat(defaultDotCount - 1) * space
let totalContentWidth = totalBarWidth + totalSpaceWidth
// 计算起始X偏移量(让整体居中)
let startX = (bounds.width - totalContentWidth) / 2
let safeStartX = max(0, startX)
// 绘制默认圆点(使用与数据状态相同的居中布局逻辑)
for i in 0..<defaultDotCount {
// 从左到右排列,整体居中
let x = safeStartX + CGFloat(i) * (barWidth + space)
guard x + barWidth <= bounds.width else { break }
let dotLayer = createBarLayer(
x: x,
y: centerY - barHeight / 2,
width: barWidth,
height: barHeight
)
layer.addSublayer(dotLayer)
barLayers.append(dotLayer)
}
}
// 创建柱子图层(优化圆角和动画)
private func createBarLayer(x: CGFloat, y: CGFloat, width: CGFloat, height: CGFloat) -> CAShapeLayer {
let layer = CAShapeLayer()
layer.contentsScale = UIScreen.main.scale
// 圆角半径(保持圆润感)
let cornerRadius = width * 0.5
let rect = CGRect(x: x, y: y, width: width, height: height)
layer.path = UIBezierPath(roundedRect: rect, cornerRadius: cornerRadius).cgPath
// 样式设置
layer.fillColor = fillColor.cgColor
layer.strokeColor = borderColor.cgColor
layer.lineWidth = borderWidth
// 加快动画响应速度,让变化更及时
let animation = CABasicAnimation(keyPath: "path")
animation.duration = 0.08 // 从0.1缩短到0.08
animation.timingFunction = CAMediaTimingFunction(name: .easeOut)
layer.add(animation, forKey: "heightAnimation")
return layer
}
// 核心:计算高度(进一步放大振幅影响)
private func calculateBarHeight(amplitude: Float, maxAmplitude: Float) -> CGFloat {
// 1. 提高基础高度,确保柱子更明显
let baseHeight: CGFloat = 8.0 // 从6.0提高到8.0
// 2. 动态高度(根据振幅计算)
let dynamicHeight: CGFloat
if maxAmplitude < 0.1 {
// 微弱信号时放大倍数更高
dynamicHeight = CGFloat(amplitude) * maxBarHeight * amplitudeScale * 3 // 从2倍提高到3倍
} else {
// 正常信号时保持高放大倍数
dynamicHeight = CGFloat(amplitude) * maxBarHeight * amplitudeScale
}
// 3. 总高度 = 基础高度 + 动态高度
let totalHeight = baseHeight + dynamicHeight
// 4. 限制最大高度,避免超出视图
return min(totalHeight, maxBarHeight)
}
override func layoutSubviews() {
super.layoutSubviews()
updateBars()
}
}
//
// QDRealTimeRecognizeViewController.m
// QCloudSDKDemo
//
// Created by Sword on 2019/4/12.
// Copyright © 2019 Tencent. All rights reserved.
//
#import "QDRealTimeRecognizeVC.h"
#import <AVFoundation/AVFoundation.h>
#import "UIView+Toast.h"
#import <QCloudRealTime/QCloudRealTimeRecognizer.h>
#import <QCloudRealTime/QCloudConfig.h>
#import <QCloudRealTime/QCloudRealTimeResult.h>
#import "Channel-Swift.h"
@interface QDRealTimeRecognizeVC ()<QCloudRealTimeRecognizerDelegate,MicrophoneSpectrumDelegate>
#pragma mark - 核心属性
@property (weak, nonatomic) IBOutlet UIView *contentBgView;
@property (weak, nonatomic) IBOutlet NSLayoutConstraint *contentViewHeight;//530-600
@property (weak, nonatomic) IBOutlet UILabel *aleartLabel;
@property (weak, nonatomic) IBOutlet SpectrumView *spectrumView;// 波形动画视图
@property (nonatomic,strong) MicrophoneSpectrum *spectrumAnalyzer; // 声明频谱分析器
@property (nonatomic, strong) QCloudRealTimeRecognizer *realTimeRecognizer; // 实时识别管理器
@property (nonatomic, assign) BOOL isRecording; // 录音状态标记
@property (nonatomic, assign) float currentVolume; // 当前音量
#pragma mark - UI组件
@property (weak, nonatomic) IBOutlet UITextView *recognizedTextView; // 识别结果展示
@property (weak, nonatomic) IBOutlet UISwitch *volumeDetectSwitch; // 音量检测开关
@property (weak, nonatomic) IBOutlet UISwitch *silenceDetectEndSwitch;// 静音停止开关
@property (weak, nonatomic) IBOutlet UIButton *recognizeButton; // 开始/停止按钮
@property (weak, nonatomic) IBOutlet UILabel *volumeLabel; // 音量显示标签
@end
@implementation QDRealTimeRecognizeVC
#pragma mark - 生命周期
- (void)viewDidLoad {
[super viewDidLoad];
[self initMicrophone];//初始化分析器
[self setupUI]; // 初始化UI
}
- (void)viewDidAppear:(BOOL)animated {
[super viewDidAppear:animated];
[self configureAudioSession]; // 配置音频会话
}
- (void)viewWillDisappear:(BOOL)animated {
[super viewWillDisappear:animated];
[self stopRecognizeIfNeeded]; // 页面消失时停止识别
}
#pragma mark - 初始化
/** 初始化UI组件 */
//- (void)setupUI {
//
// // 设置半透明背景
// self.view.backgroundColor = [[UIColor blackColor] colorWithAlphaComponent:0.3];
//
// // 设置圆角
// self.contentViewHeight.constant = 530;
// self.contentBgView.frame = CGRectMake(0,kScreenHeight, kScreenWidth, self.contentViewHeight.constant);
// self.contentBgView.layer.cornerRadius = 20;
// self.contentBgView.layer.maskedCorners = kCALayerMinXMinYCorner | kCALayerMaxXMinYCorner;
//
// [UIView animateWithDuration:0.3 animations:^{
// self.contentBgView.frame = CGRectMake(0,kScreenHeight-self.contentViewHeight.constant, kScreenWidth, self.contentViewHeight.constant);
// } completion:^(BOOL finished) {
// [self.view layoutIfNeeded];
//
// }];
//
//
//}
- (void)setupUI {
// 设置半透明背景
self.view.backgroundColor = [[UIColor blackColor] colorWithAlphaComponent:0.3];
// 1. 配置contentBgView的约束(推荐用Auto Layout)
self.contentBgView.translatesAutoresizingMaskIntoConstraints = NO;
// 约束:底部对齐父视图底部,左右充满,高度固定为530
[NSLayoutConstraint activateConstraints:@[
[self.contentBgView.bottomAnchor constraintEqualToAnchor:self.view.bottomAnchor],
[self.contentBgView.leftAnchor constraintEqualToAnchor:self.view.leftAnchor],
[self.contentBgView.rightAnchor constraintEqualToAnchor:self.view.rightAnchor],
[self.contentBgView.heightAnchor constraintEqualToConstant:530]
]];
// 2. 设置圆角(只顶部两角)
self.contentBgView.layer.cornerRadius = 20;
self.contentBgView.layer.maskedCorners = kCALayerMinXMinYCorner | kCALayerMaxXMinYCorner; // 注意:顶部两角需要用 MinY
self.contentBgView.clipsToBounds = YES; // 确保圆角生效
// 3. 初始位置:让视图位于屏幕底部外侧(动画起点)
self.contentBgView.transform = CGAffineTransformMakeTranslation(0, 530);
// 4. 执行滑入动画
// [UIView animateWithDuration:0.3 animations:^{
self.contentBgView.transform = CGAffineTransformIdentity; // 恢复到原位置
// [self.view layoutIfNeeded]; // 确保约束动画生效
// }];
[self onRecognizeButtonTouched];
}
// 初始化分析器
-(void)initMicrophone{
// 初始化分析器
self.spectrumAnalyzer = [[MicrophoneSpectrum alloc] init];
// 设置代理(自己作为代理接收频谱数据)
self.spectrumAnalyzer.delegate = self;
// 可选:调整缓冲区大小(必须是2的幂,如512、1024、2048等)
self.spectrumAnalyzer.bufferSize = 1024;
// 初始化频谱视图的默认参数
self.spectrumView.space = 5.0; // 设置间距
}
/** 配置音频会话(录音模式) */
- (void)configureAudioSession {
NSError *error = nil;
AVAudioSession *session = [AVAudioSession sharedInstance];
// 设置为录音模式(仅支持录音,不播放声音)
[session setCategory:AVAudioSessionCategoryPlayAndRecord error:&error];
if (error) {
NSLog(@"音频会话配置错误: %@", error.localizedDescription);
[self.view makeToast:error.localizedDescription duration:2 position:CSToastPositionCenter];
return;
}
[session setActive:YES error:&error];
[self.spectrumAnalyzer start];
}
#pragma mark - 布局调整
- (void)viewDidLayoutSubviews {
[super viewDidLayoutSubviews];
// 确保频谱视图有默认高度
if (CGRectGetHeight(self.spectrumView.bounds) == 0) {
CGRect frame = self.spectrumView.frame;
frame.size.height = 60; // 视图总高度
self.spectrumView.frame = frame;
}
// 计算总柱子数量
CGFloat totalBarCount = self.spectrumAnalyzer.analyzer.frequencyBands;
// 使用 spectrumView 的 space 属性计算总间距
CGFloat totalSpace = self.spectrumView.space * (totalBarCount + 1);
CGFloat availableWidth = CGRectGetWidth(self.spectrumView.bounds) - totalSpace;
// 确保柱子宽度不超过可用空间(可选优化)
if (availableWidth > 0) {
CGFloat maxPossibleBarWidth = availableWidth / self.spectrumAnalyzer.analyzer.frequencyBands;
// 保证线宽不超过最大可能宽度(但不强制覆盖用户设置的默认值)
if (self.spectrumView.barWidth > maxPossibleBarWidth) {
self.spectrumView.barWidth = maxPossibleBarWidth;
}
}
}
#pragma mark - 识别控制
/** 开始/停止识别(根据当前状态切换) */
- (void)toggleRecognize {
if (self.isRecording) {
[self stopRecognize];
} else {
[self startRecognize];
}
}
/** 开始识别 */
- (void)startRecognize {
// 初始化识别器(首次调用时)
if (!self.realTimeRecognizer) {
[self setupRealTimeRecognizer];
}
// 启动识别
[self.realTimeRecognizer start];
self.recognizedTextView.text = @""; // 清空历史结果
self.spectrumView.hidden = NO;
// [self.spectrumAnalyzer start];//开始采集
}
/** 停止识别 */
- (void)stopRecognize {
[self.realTimeRecognizer stop];
[self.spectrumAnalyzer stop]; // 停止监测
self.spectrumView.hidden = YES;
// 释放音频会话资源
[[AVAudioSession sharedInstance] setActive:NO
withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation
error:nil];
}
/** 初始化实时识别器配置 */
- (void)setupRealTimeRecognizer {
// 1. 配置鉴权与基础参数
QCloudConfig *config = [self createQCloudConfig];
// 2. 初始化识别器(使用内置录音器)
self.realTimeRecognizer = [[QCloudRealTimeRecognizer alloc] initWithConfig:config];
self.realTimeRecognizer.delegate = self;
}
/** 创建腾讯云配置(鉴权+参数) */
- (QCloudConfig *)createQCloudConfig {
QCloudConfig *config = nil;
// 永久密钥鉴权(无token)
if ([kQDToken isEqualToString:@""]) {
config = [[QCloudConfig alloc] initWithAppId:kQDAppId
secretId:kQDSecretId
secretKey:kQDSecretKey
projectId:[kQDProjectId integerValue]];
} else {
// 临时密钥鉴权(带token)
config = [[QCloudConfig alloc] initWithAppId:kQDAppId
secretId:kQDSecretId
secretKey:kQDSecretKey
token:kQDToken
projectId:[kQDProjectId integerValue]];
}
// 2. 识别参数配置
config.sliceTime = 40; // 语音分片时长(40ms)
config.enableDetectVolume = self.volumeDetectSwitch.on; // 开启音量检测
config.endRecognizeWhenDetectSilence = self.silenceDetectEndSwitch.on; // 静音停止识别
config.endRecognizeWhenDetectSilenceAutoStop = YES; // 静音时自动停止
config.silenceDetectDuration = 3.0; // 静音超时时间(3秒)
config.requestTimeout = 10; // 请求超时(10秒)
config.engineType = @"16k_zh"; // 引擎模型(16k中文普通话,必填!)
config.reinforceHotword = 1; // 增强热词
config.noiseThreshold = 0.5; // 噪音阈值
config.compression = YES; // 音频压缩(弱网优化)
[config setApiParam:@"hotword_list" value:@"腾讯云|10,语音识别|5,ASR|11"]; // 热词配置
return config;
}
/** 必要时停止识别(页面消失等场景) */
- (void)stopRecognizeIfNeeded {
if (self.isRecording) {
[self stopRecognize];
}
[self.spectrumAnalyzer stop];//音频幅
}
#pragma mark - UI更新
/** 更新按钮标题(根据录音状态) */
- (void)updateButtonTitle {
NSString *title = self.isRecording ? @"停止" : @"开始";
[self.recognizeButton setTitle:title forState:UIControlStateNormal];
}
/** 更新音量显示 */
- (void)updateVolumeLabelWithVolume:(float)volume min:(float)min max:(float)max {
if (self.volumeDetectSwitch.on) {
self.volumeLabel.text = [NSString stringWithFormat:@"音量: %.2f (%.2f-%.2f)", volume, min, max];
} else {
self.volumeLabel.text = @"音量检测已关闭";
}
}
#pragma mark - QCloudRealTimeRecognizerDelegate
/** 开始录音回调 */
- (void)realTimeRecognizerDidStartRecord:(QCloudRealTimeRecognizer *)recorder error:(NSError *)error {
if (!error) {
self.isRecording = YES;
[self updateButtonTitle];
self.currentVolume = 0;
} else {
NSLog(@"录音启动失败: %@", error.localizedDescription);
[self.view makeToast:error.localizedDescription duration:2 position:CSToastPositionCenter];
}
}
/** 停止录音回调 */
- (void)realTimeRecognizerDidStopRecord:(QCloudRealTimeRecognizer *)recorder {
_isRecording = NO;
[self.spectrumAnalyzer stop];//停止采集
self.spectrumView.hidden = YES;
[self updateButtonTitle];
}
/** 实时识别结果回调(中间结果) */
- (void)realTimeRecognizerOnSliceRecognize:(QCloudRealTimeRecognizer *)recognizer result:(QCloudRealTimeResult *)result {
if (result.code == 0) {
self.recognizedTextView.text = result.recognizedText;
}
}
- (void)realTimeRecognizerDidFinish:(QCloudRealTimeRecognizer *)recorder result:(NSString *)result
{
NSLog(@"realTimeRecognizerDidFinish:%@", result);
}
/** 音量更新回调 */
- (void)realTimeRecognizerDidUpdateVolumeDB:(QCloudRealTimeRecognizer *)recognizer volume:(float)volume {
static float minVolume = MAXFLOAT;
static float maxVolume = 0;
self.currentVolume = volume;
minVolume = MIN(minVolume, volume);
maxVolume = MAX(maxVolume, volume);
[self updateVolumeLabelWithVolume:volume min:minVolume max:maxVolume];
}
/** 识别错误回调 */
- (void)realTimeRecognizerDidError:(QCloudRealTimeRecognizer *)recognizer result:(QCloudRealTimeResult *)result {
NSString *errorMsg = result.clientErrCode != QCloudRealTimeClientErrCode_Success ?
result.clientErrMessage : result.jsonText;
NSLog(@"识别错误: %@", errorMsg);
self.recognizedTextView.text = [NSString stringWithFormat:@"错误: %@", errorMsg];
[self.view makeToast:errorMsg duration:2 position:CSToastPositionCenter];
}
/** 识别流程开始回调 */
- (void)realTimeRecognizerOnFlowRecognizeStart:(QCloudRealTimeRecognizer *)recognizer voiceId:(NSString *)voiceId seq:(NSInteger)seq {
NSLog(@"识别流程开始 - voiceId: %@, seq: %ld", voiceId, seq);
}
- (void)realTimeRecognizerOnSegmentSuccessRecognize:(nonnull QCloudRealTimeRecognizer *)recognizer result:(nonnull QCloudRealTimeResult *)result {
QCloudRealTimeResultResponse *currentResult = [result.resultList firstObject];
NSLog(@"realTimeRecognizerOnSegmentSuccessRecognize:%@ index:%ld", currentResult.voiceTextStr, currentResult.index);
}
-(void)realTimeRecognizerOnSliceDetectTimeOut{
NSLog(@"realTimeRecognizeronSliceDetectTimeOut:触发了静音超时");
//当QCloudConfig.endRecognizeWhenDetectSilence 打开时,触发静音超时事件会回调此事件
//当QCloudConfig.endRecognizeWhenDetectSilenceAutoStop 打开时,回调此事件的同时会停止本次识别,此配置默认打开
}
#pragma mark - 事件响应
/** 开始/停止按钮点击 */
- (void)onRecognizeButtonTouched {
[self toggleRecognize];
}
/** 取消按钮点击 */
- (IBAction)onCancelButtonTouched:(UIButton *)sender {
if (self.realTimeRecognizer) {
[self.realTimeRecognizer cancel];
}
}
/** 音量检测开关切换(仅停止状态可修改) */
- (IBAction)onVolumeDetectSwitchChanged:(UISwitch *)sender {
if (self.isRecording) {
sender.on = !sender.on; // 强制还原
[self.view makeToast:@"识别中无法修改" duration:1.5 position:CSToastPositionCenter];
}
}
/** 静音停止开关切换(仅停止状态可修改) */
- (IBAction)onSilenceDetectSwitchChanged:(UISwitch *)sender {
if (self.isRecording) {
sender.on = !sender.on; // 强制还原
[self.view makeToast:@"识别中无法修改" duration:1.5 position:CSToastPositionCenter];
} else {
self.realTimeRecognizer = nil; // 重新初始化识别器以应用新配置
}
}
#pragma mark - MicrophoneSpectrumDelegate
- (void)microphone:(MicrophoneSpectrum *)microphone didGenerateSpectrum:(NSArray<NSArray<NSNumber *> *> *)spectra{
// spectra 是频谱数据数组,每个元素是一个 Float 数组,表示不同频段的能量值(0~1范围)
dispatch_async(dispatch_get_main_queue(), ^{
self.spectrumView.spectra = spectra;
});
}
#pragma mark - 界面控件设置
//关闭按钮
- (IBAction)closeBtnClicked:(UIButton *)sender {
// [UIView animateWithDuration:0.3 animations:^{
// self.contentBgView.frame = CGRectMake(0,-self.contentViewHeight.constant, kScreenWidth, self.contentViewHeight.constant);
// } completion:^(BOOL finished) {
// [self.view layoutIfNeeded];
// [self dismissViewControllerAnimated:NO completion:nil];
// }];
[self.spectrumAnalyzer start];
}
@end
engine.connect(inputNode, to: mixerNode, format: hardwareFormat)
崩溃Thread 1: "required condition is false: IsFormatSampleRateAndChannelCountValid(format)"