pyaudio:录制并保存系统声音+绘制频谱

本文介绍了如何在Ubuntu系统上通过pip和conda安装PyAudio库,以及如何使用PyAudio进行音频录制到wav文件,并展示了如何使用PyAudio和PyQtGraph绘制音频频谱。内容还包括了在Python交互环境中如IPython的使用示例。
部署运行你感兴趣的模型镜像

pyaudio INSTALL

  • sudo apt install python3-pyaudio
$ sudo apt install python3-pyaudio 
正在读取软件包列表... 完成
正在分析软件包的依赖关系树... 完成
正在读取状态信息... 完成                 
有一些软件包无法被安装。如果您用的是 unstable 发行版,这也许是
因为系统无法达到您要求的状态造成的。该版本中可能会有一些您需要的软件
包尚未被创建或是它们已被从新到(Incoming)目录移出。
下列信息可能会对解决问题有所帮助:

下列软件包有未满足的依赖关系:
 python3-pyaudio : 依赖: python3 (< 3.6) 但是 3.10.6-1~22.04 正要被安装
E: 无法修正错误,因为您要求某些软件包保持现状,就是它们破坏了软件包间的依赖关系。
  • conda install PyAudio
$ conda install PyAudio #这个并不需要python3 (< 3.6)
Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/pdd/anaconda3/envs/myaudio

  added / updated specs:
    - pyaudio


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    portaudio-19.6.0           |       h7b6447c_4         114 KB  defaults
    pyaudio-0.2.11             |   py35h14c3975_1          64 KB  defaults
    ------------------------------------------------------------
                                           Total:         178 KB

The following NEW packages will be INSTALLED:

  portaudio          anaconda/pkgs/main/linux-64::portaudio-19.6.0-h7b6447c_4 None
  pyaudio            anaconda/pkgs/main/linux-64::pyaudio-0.2.11-py35h14c3975_1 None


Proceed ([y]/n)? y


Downloading and Extracting Packages
portaudio-19.6.0     | 114 KB    | ####################################################################################################### | 100% 
pyaudio-0.2.11       | 64 KB     | ####################################################################################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
Retrieving notices: ...working... done

使用pyaudio保存音频到wav文件

import pyaudio
import wave

def record_and_save_wav(filename, duration=5, sample_rate=44100, channels=2, format=pyaudio.paInt16):
    p = pyaudio.PyAudio()

    # 打开音频流
    stream = p.open(format=format,
                    channels=channels,
                    rate=sample_rate,
                    input=True,
                    frames_per_buffer=1024)

    print("Recording...")

    frames = []

    # 录制音频
    for i in range(0, int(sample_rate / 1024 * duration)):
        data = stream.read(1024)
        frames.append(data)

    print("Finished recording.")

    # 停止和关闭音频流
    stream.stop_stream()
    stream.close()
    p.terminate()

    # 将录制的音频保存为wav文件
    with wave.open(filename, 'wb') as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(p.get_sample_size(format))
        wf.setframerate(sample_rate)
        wf.writeframes(b''.join(frames))

# 指定保存的文件名和录制的时长
filename = "output.wav"
record_and_save_wav(filename, duration=5)

使用pyaudio绘制频谱

在这里插入图片描述

# 代码来自https://github.com/sbarratt/spectrum-analyzer/blob/master/sa.py,略有修改
import pyaudio
import struct
import math
import sys
import numpy as np
import IPython as ipy

import pyqtgraph as pg
from PyQt5 import QtWidgets  
from pyqtgraph.Qt import QtGui, QtCore

# Audio Format (check Audio MIDI Setup if on Mac)
FORMAT = pyaudio.paInt16
RATE = 44100
CHANNELS = 2

# Set Plot Range [-RANGE,RANGE], default is nyquist/2
RANGE = None
if not RANGE:
	RANGE = RATE/2

# Set these parameters (How much data to plot per FFT)
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)

# Which Channel? (L or R)
LR = "l"

class SpectrumAnalyzer():
	def __init__(self):
		self.pa = pyaudio.PyAudio()
		self.initMicrophone()
		self.initUI()

	def find_input_device(self):
		device_index = None            
		for i in range(self.pa.get_device_count()):     
			devinfo = self.pa.get_device_info_by_index(i)
			if devinfo["name"].lower() in ["mic","input"]:
				device_index = i
		return device_index

	def initMicrophone(self):
		device_index = self.find_input_device()

		self.stream = self.pa.open(	format = FORMAT,
									channels = CHANNELS,
									rate = RATE,
									input = True,
									input_device_index = device_index,
									frames_per_buffer = INPUT_FRAMES_PER_BLOCK)

	def readData(self):
		block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
		count = len(block)/2
		format = "%dh"%(count)
		shorts = struct.unpack( format, block )
		if CHANNELS == 1:
			return np.array(shorts)
		else:
			l = shorts[::2]
			r = shorts[1::2]
			if LR == 'l':
				return np.array(l)
			else:
				return np.array(r)

	def initUI(self):
		self.app = QtWidgets.QApplication([]) # self.app = QtGui.QApplication([])
		self.app.quitOnLastWindowClosed()

		self.mainWindow = QtWidgets.QMainWindow()
		self.mainWindow.setWindowTitle("Spectrum Analyzer")
		self.mainWindow.resize(800,300)
		self.centralWid = QtWidgets.QWidget()
		self.mainWindow.setCentralWidget(self.centralWid)
		self.lay = QtWidgets.QVBoxLayout()
		self.centralWid.setLayout(self.lay)

		self.specWid = pg.PlotWidget(name="spectrum")
		self.specItem = self.specWid.getPlotItem()
		self.specItem.setMouseEnabled(y=False)
		self.specItem.setYRange(0,1000)
		self.specItem.setXRange(-RANGE,RANGE, padding=0)

		self.specAxis = self.specItem.getAxis("bottom")
		self.specAxis.setLabel("Frequency [Hz]")
		self.lay.addWidget(self.specWid)

		self.mainWindow.show()
		self.app.aboutToQuit.connect(self.close)

	def close(self):
		self.stream.close()
		sys.exit()

	def get_spectrum(self, data):
		T = 1.0/RATE
		N = data.shape[0]
		Pxx = (1./N)*np.fft.fft(data)
		f = np.fft.fftfreq(N,T)
		Pxx = np.fft.fftshift(Pxx)
		f = np.fft.fftshift(f)

		return f.tolist(), (np.absolute(Pxx)).tolist()

	def mainLoop(self):
		while 1:
			# Sometimes Input overflowed because of mouse events, ignore this
			try:
				data = self.readData()
			except IOError:
				continue
			f, Pxx = self.get_spectrum(data)
			self.specItem.plot(x=f,y=Pxx, clear=True)
			QtWidgets.QApplication.processEvents()

if __name__ == '__main__':
	sa = SpectrumAnalyzer()
	sa.mainLoop()

使用pyaudio绘制频谱+悬浮窗口

  • 添加到 setWindowFlags 中的标志,确保窗口保持在其他窗口之上。这样,即使焦点切换到其他窗口,悬浮窗口仍然会保持在顶部。

在这里插入图片描述

import pyaudio
import struct
import math
import sys
import numpy as np
import IPython as ipy

import pyqtgraph as pg
from PyQt5 import QtWidgets  
from pyqtgraph.Qt import QtGui, QtCore
from PyQt5.QtCore import Qt

# Audio Format (check Audio MIDI Setup if on Mac)
FORMAT = pyaudio.paInt16
RATE = 44100
CHANNELS = 2

# Set Plot Range [-RANGE,RANGE], default is nyquist/2
RANGE = None
if not RANGE:
	RANGE = RATE/2

# Set these parameters (How much data to plot per FFT)
INPUT_BLOCK_TIME = 0.05
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)

# Which Channel? (L or R)
LR = "l"

class SpectrumAnalyzer():
	def __init__(self):
		self.pa = pyaudio.PyAudio()
		self.initMicrophone()
		self.initUI()

	def find_input_device(self):
		device_index = None            
		for i in range(self.pa.get_device_count()):     
			devinfo = self.pa.get_device_info_by_index(i)
			if devinfo["name"].lower() in ["mic","input"]:
				device_index = i
		return device_index

	def initMicrophone(self):
		device_index = self.find_input_device()

		self.stream = self.pa.open(	format = FORMAT,
									channels = CHANNELS,
									rate = RATE,
									input = True,
									input_device_index = device_index,
									frames_per_buffer = INPUT_FRAMES_PER_BLOCK)

	def readData(self):
		block = self.stream.read(INPUT_FRAMES_PER_BLOCK)
		count = len(block)/2
		format = "%dh"%(count)
		shorts = struct.unpack( format, block )
		if CHANNELS == 1:
			return np.array(shorts)
		else:
			l = shorts[::2]
			r = shorts[1::2]
			if LR == 'l':
				return np.array(l)
			else:
				return np.array(r)

	def initUI(self):
		self.app = QtWidgets.QApplication([]) # self.app = QtGui.QApplication([])
		self.app.quitOnLastWindowClosed()

		self.mainWindow = QtWidgets.QMainWindow()
		self.mainWindow.setWindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
		self.mainWindow.setWindowTitle("Spectrum Analyzer")
		self.mainWindow.setGeometry(100, 100, 300, 200)#self.mainWindow.resize(800,300)
		self.centralWid = QtWidgets.QWidget()
		self.mainWindow.setCentralWidget(self.centralWid)
		self.lay = QtWidgets.QVBoxLayout()
		self.centralWid.setLayout(self.lay)

		self.specWid = pg.PlotWidget(name="spectrum")
		self.specItem = self.specWid.getPlotItem()
		self.specItem.setMouseEnabled(y=False)
		self.specItem.setYRange(0,1000)
		self.specItem.setXRange(-RANGE,RANGE, padding=0)

		self.specAxis = self.specItem.getAxis("bottom")
		self.specAxis.setLabel("Frequency [Hz]")
		self.lay.addWidget(self.specWid)

		self.mainWindow.show()
		self.app.aboutToQuit.connect(self.close)

	def close(self):
		self.stream.close()
		sys.exit()

	def get_spectrum(self, data):
		T = 1.0/RATE
		N = data.shape[0]
		Pxx = (1./N)*np.fft.fft(data)
		f = np.fft.fftfreq(N,T)
		Pxx = np.fft.fftshift(Pxx)
		f = np.fft.fftshift(f)

		return f.tolist(), (np.absolute(Pxx)).tolist()

	def mainLoop(self):
		while 1:
			# Sometimes Input overflowed because of mouse events, ignore this
			try:
				data = self.readData()
			except IOError:
				continue
			f, Pxx = self.get_spectrum(data)
			self.specItem.plot(x=f,y=Pxx, clear=True)
			QtWidgets.QApplication.processEvents()

if __name__ == '__main__':
	sa = SpectrumAnalyzer()
	sa.mainLoop()

CG

您可能感兴趣的与本文相关的镜像

Python3.8

Python3.8

Conda
Python

Python 是一种高级、解释型、通用的编程语言,以其简洁易读的语法而闻名,适用于广泛的应用,包括Web开发、数据分析、人工智能和自动化脚本

评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值