ESP32打造一个可以将一切声音变成音乐的合成器

原创已于 2025-11-01 12:49:15 修改 · 129 阅读

1 ·

CC 4.0 BY-SA版权

文章标签：

#算法

于 2025-11-01 12:46:45 首次发布

部署运行你感兴趣的模型镜像

以下基于ESP32-S3和ESP-IDF平台的PO-35风格音频合成器实现方案，提供C语言核心逻辑与Python辅助脚本的完整代码实现：

音频输入处理（C语言）

#include "driver/i2s.h"

#define SAMPLE_RATE 44100
#define I2S_PORT I2S_NUM_0

void mic_init() {
    i2s_config_t i2s_config = {
        .mode = I2S_MODE_MASTER | I2S_MODE_RX,
        .sample_rate = SAMPLE_RATE,
        .bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
        .channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
        .communication_format = I2S_COMM_FORMAT_STAND_I2S,
        .dma_buf_count = 8,
        .dma_buf_len = 1024,
        .use_apll = false,
        .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1
    };

    i2s_pin_config_t pin_config = {
        .bck_io_num = GPIO_NUM_15,
        .ws_io_num = GPIO_NUM_16,
        .data_in_num = GPIO_NUM_17,
        .data_out_num = I2S_PIN_NO_CHANGE
    };

    i2s_driver_install(I2S_PORT, &i2s_config, 0, NULL);
    i2s_set_pin(I2S_PORT, &pin_config);
}

void capture_audio(int16_t* buffer, size_t frames) {
    size_t bytes_read = 0;
    i2s_read(I2S_PORT, buffer, frames * sizeof(int16_t), &bytes_read, portMAX_DELAY);
}

音高检测算法

#include <math.h>

float detect_pitch(int16_t* samples, size_t len) {
    float max_corr = 0;
    int best_lag = 0;
    
    for (int lag = 20; lag < 500; lag++) {
        float corr = 0;
        for (int i = 0; i < len - lag; i++) {
            corr += samples[i] * samples[i + lag];
        }
        if (corr > max_corr) {
            max_corr = corr;
            best_lag = lag;
        }
    }
    return (best_lag > 0) ? (float)SAMPLE_RATE / best_lag : 0;
}

量化音阶处理

const float SCALE_NOTES[] = {261.63, 293.66, 329.63, 349.23, 392.00, 440.00, 493.88}; // C Major

float quantize_to_scale(float freq) {
    if (freq <= 0) return 0;
    
    float min_dist = INFINITY;
    float closest_note = 0;
    
    for (int i = 0; i < sizeof(SCALE_NOTES)/sizeof(float); i++) {
        float dist = fabs(log(freq / SCALE_NOTES[i]));
        if (dist < min_dist) {
            min_dist = dist;
            closest_note = SCALE_NOTES[i];
        }
    }
    return closest_note;
}

波形合成引擎

#include "driver/dac.h"

typedef enum {
    WAVE_SINE,
    WAVE_SQUARE,
    WAVE_SAW
} wave_type_t;

void generate_wave(float freq, wave_type_t type, size_t duration_ms) {
    const size_t samples = SAMPLE_RATE * duration_ms / 1000;
    int16_t* buffer = malloc(samples * sizeof(int16_t));
    
    for (int i = 0; i < samples; i++) {
        float phase = 2 * M_PI * freq * i / SAMPLE_RATE;
        
        switch(type) {
            case WAVE_SINE:
                buffer[i] = INT16_MAX * sin(phase);
                break;
            case WAVE_SQUARE:
                buffer[i] = (sin(phase) > 0) ? INT16_MAX : -INT16_MAX;
                break;
            case WAVE_SAW:
                buffer[i] = INT16_MAX * (2 * (phase/(2*M_PI)) - 1);
                break;
        }
    }
    
    size_t bytes_written;
    i2s_write(I2S_PORT, buffer, samples * sizeof(int16_t), &bytes_written, portMAX_DELAY);
    free(buffer);
}

Python配置脚本（microphone_calibration.py）

import numpy as np
from scipy.fft import rfft, rfftfreq
import sounddevice as sd

def find_peak_frequency(samples, sr):
    n = len(samples)
    yf = rfft(samples)
    xf = rfftfreq(n, 1 / sr)
    return xf[np.argmax(np.abs(yf))]

def calibrate_mic(duration=3):
    print("Recording...")
    recording = sd.rec(int(duration * 44100), samplerate=44100, channels=1)
    sd.wait()
    freq = find_peak_frequency(recording[:,0], 44100)
    print(f"Detected fundamental frequency: {freq:.2f} Hz")
    return freq

主控制逻辑

void app_main() {
    mic_init();
    i2s_config_t i2s_out_config = {
        .mode = I2S_MODE_MASTER | I2S_MODE_TX,
        .sample_rate = SAMPLE_RATE,
        .bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
        .channel_format = I2S_CHANNEL_FMT_ONLY_RIGHT,
        .communication_format = I2S_COMM_FORMAT_STAND_I2S,
        .dma_buf_count = 8,
        .dma_buf_len = 1024,
        .use_apll = false
    };
    i2s_driver_install(I2S_PORT, &i2s_out_config, 0, NULL);

    int16_t* capture_buf = malloc(4096 * sizeof(int16_t));
    
    while(1) {
        capture_audio(capture_buf, 4096);
        float detected = detect_pitch(capture_buf, 4096);
        float quantized = quantize_to_scale(detected);
        
        if(quantized > 0) {
            generate_wave(quantized, WAVE_SQUARE, 500);
        }
        vTaskDelay(pdMS_TO_TICKS(100));
    }
}

实现说明：

音频采集使用I2S接口配置为16位44.1kHz单声道输入
音高检测采用自相关算法寻找周期性特征
音阶量化使用对数距离计算最近音阶频率
合成引擎支持三种基本波形生成
Python脚本用于开发阶段的麦克风校准和算法调优

扩展建议：

增加低通滤波器平滑音高检测结果
实现ADSR包络控制合成音色
添加节奏量化功能（如1/16音符量化）
通过蓝牙MIDI扩展控制接口

您可能感兴趣的与本文相关的镜像

Python3.11

Conda

Python

Python 是一种高级、解释型、通用的编程语言，以其简洁易读的语法而闻名，适用于广泛的应用，包括Web开发、数据分析、人工智能和自动化脚本