wave convert

本文介绍如何使用C#读取音频文件并将其转换为适用于DirectSound播放的格式。通过复制原始音频数据来实现音频文件的扩展,并利用DirectSound API创建音频缓冲区,最终保存为WAV格式。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

 

FileStream fs = new FileStream(@"c:/x/t.txt.bak", FileMode.Open, FileAccess.Read);

byte[] sbuf = new byte[fs.Length];

BinaryReader br = new System.IO.BinaryReader(fs);

br.Read(sbuf, 0, (

int)fs.Length);

int times = 2;

byte[] buf = new byte[sbuf.Length * times];

for (int i = 0; i < sbuf.Length; i++)

{

for (int n = 0; n < times; n++)

{

buf[i * times + n] = sbuf[i];

}

}

DxVBLibA.

DirectSoundSecondaryBuffer8 dsToneBuffer = WavUtility.CreateWav(this.Handle.ToInt32(), buf);

dsToneBuffer.SaveToFile(

@"c:/x/wavtest.wav");

MessageBox.Show("done");

 

======================

 

using

System;

using

System.Collections.Generic;

using

System.Text;

 

namespace

WaveConvert

{

public class WavUtility

{

const int SRATE = 44100;//Sampling Rate

const double DUR = 1;//Tone duration

const double FREQ = 500;//Tone frequency

public static DxVBLibA.DirectSoundSecondaryBuffer8 CreateWav(int hwnd, byte[] orgdata)

{

byte[] data = new byte[orgdata.Length + 455];

for (int i = 0; i < 255; i++)

{

data[i] = (

byte)i;

}

for (int i = 255; i < 100 + 255; i++)

{

data[i] =

byte.MaxValue;

}

for (int i = data.Length - 100; i < data.Length; i++)

{

data[i] =

byte.MaxValue;

}

Array.Copy(orgdata, 0, data, 355, orgdata.Length);

 

DxVBLibA.

DirectX8 DX = new DxVBLibA.DirectX8();

DxVBLibA.

DirectSound8 DS = DX.DirectSoundCreate(string.Empty);

DS.SetCooperativeLevel(hwnd, DxVBLibA.

CONST_DSSCLFLAGS.DSSCL_NORMAL);

DxVBLibA.

DSBUFFERDESC desc = new DxVBLibA.DSBUFFERDESC();

#region

wFormatTag

// Waveform-audio format type.

// Format tags are registered with Microsoft Corporation for many compression algorithms.

// A complete list of format tags can be found in the Mmreg.h header file.

// For one- or two-channel PCM data, this value should be WAVE_FORMAT_PCM.

#endregion

desc.fxFormat.nFormatTag = (

short)DxVBLibA.CONST_DSOUND.WAVE_FORMAT_PCM;

#region

cbSize

// Size, in bytes, of extra format information appended to the end of the WAVEFORMATEX structure.

// This information can be used by non-PCM formats to store extra attributes for the wFormatTag.

// If no extra information is required by the wFormatTag, this member must be set to zero.

// For WAVE_FORMAT_PCM formats, this member is ignored.

#endregion

desc.fxFormat.nSize = 0;

desc.fxFormat.lExtra = 0;

#region

nChannels

// Number of channels in the waveform-audio data. Monaural data uses one channel and stereo data uses two channels.

#endregion

desc.fxFormat.nChannels = 1;

#region

nSamplesPerSec

// Sample rate, in samples per second (hertz).

// If wFormatTag is WAVE_FORMAT_PCM,

// then common values for nSamplesPerSec are 8.0 kHz, 11.025 kHz, 22.05 kHz, and 44.1 kHz. For non-PCM formats,

// this member must be computed according to the manufacturer's specification of the format tag.

#endregion

desc.fxFormat.lSamplesPerSec = SRATE;

#region

wBitsPerSample

// Bits per sample for the wFormatTag format type. If wFormatTag is WAVE_FORMAT_PCM,

// then wBitsPerSample should be equal to 8 or 16.

// If wFormatTag is WAVE_FORMAT_EXTENSIBLE, this value can be any integer multiple of 8.

#endregion

desc.fxFormat.nBitsPerSample = 16;

#region

nBlockAlign

// Block alignment, in bytes. The block alignment is the minimum atomic unit of data for the wFormatTag format type.

// If wFormatTag is WAVE_FORMAT_PCM or WAVE_FORMAT_EXTENSIBLE,

// nBlockAlign must be equal to the product of nChannels and wBitsPerSample divided by 8 (bits per byte).

// For non-PCM formats, this member must be computed according to the manufacturer's specification of the format tag.

// Software must process a multiple of nBlockAlign bytes of data at a time.

// Data written and read from a device must always start at the beginning of a block.

// For example, it is illegal to start playback of PCM data in the middle of a sample (that is, on a non-block-aligned boundary).

#endregion

desc.fxFormat.nBlockAlign = (

short)(desc.fxFormat.nChannels * desc.fxFormat.nBitsPerSample / 8);

#region

nAvgBytesPerSec

// Required average data-transfer rate, in bytes per second, for the format tag.

// If wFormatTag is WAVE_FORMAT_PCM, nAvgBytesPerSec should be equal to the product of nSamplesPerSec and nBlockAlign.

// For non-PCM formats, this member must be computed according to the manufacturer's specification of the format tag.

#endregion

desc.fxFormat.lAvgBytesPerSec = desc.fxFormat.lSamplesPerSec * desc.fxFormat.nBlockAlign;

desc.lFlags = 0;

desc.lBufferBytes = data.Length;

DxVBLibA.

DirectSoundSecondaryBuffer8 dsToneBuffer = DS.CreateSoundBuffer(ref desc);

System.Runtime.InteropServices.

GCHandle hmem = System.Runtime.InteropServices.GCHandle.Alloc(data, System.Runtime.InteropServices.GCHandleType.Pinned);

IntPtr aaa = hmem.AddrOfPinnedObject();

dsToneBuffer.WriteBuffer(0, data.Length, aaa, DxVBLibA.

CONST_DSBLOCKFLAGS.DSBLOCK_DEFAULT);

hmem.Free();

return dsToneBuffer;

}

}

}

不需要accept_wave函数伪代码,只需要逐行解释: def accept_wave(self, wave): assert isinstance(wave, bytes), \ "please make sure the input format is bytes(raw PCM)" # convert bytes into float32 data = [] for i in range(0, len(wave), 2): value = struct.unpack('<h', wave[i:i + 2])[0] data.append(value) # here we don't divide 32768.0, # because kaldi.fbank accept original input wave = np.array(data) wave = np.append(self.wave_remained, wave) if wave.size < (self.frame_length * self.sample_rate / 1000) \ * self.right_context : self.wave_remained = wave return None wave_tensor = torch.from_numpy(wave).float().to(self.device) wave_tensor = wave_tensor.unsqueeze(0) # add a channel dimension feats = kaldi.fbank(wave_tensor, num_mel_bins=self.num_mel_bins, frame_length=self.frame_length, frame_shift=self.frame_shift, dither=0, energy_floor=0.0, sample_frequency=self.sample_rate) # update wave remained feat_len = len(feats) frame_shift = int(self.frame_shift / 1000 * self.sample_rate) self.wave_remained = wave[feat_len * frame_shift:] if self.context_expansion: assert feat_len > self.right_context, \ "make sure each chunk feat length is large than right context." # pad feats with remained feature from last chunk if self.feature_remained is None: # first chunk # pad first frame at the beginning, # replicate just support last dimension, so we do transpose. feats_pad = F.pad(feats.T, (self.left_context, 0), mode='replicate').T else: feats_pad = torch.cat((self.feature_remained, feats)) ctx_frm = feats_pad.shape[0] - (self.right_context + self.right_context) ctx_win = (self.left_context + self.right_context + 1) ctx_dim = feats.shape[1] * ctx_win feats_ctx = torch.zeros(ctx_frm, ctx_dim, dtype=torch.float32) for i in range(ctx_frm): feats_ctx[i] = torch.cat(tuple( feats_pad[i:i + ctx_win])).unsqueeze(0) # update feature remained, and feats self.feature_remained = \ feats[-(self.left_context + self.right_context):] feats = feats_ctx.to(self.device) if self.downsampling > 1: last_remainder = 0 if self.feats_ctx_offset == 0 \ else self.downsampling - self.feats_ctx_offset remainder = (feats.size(0) + last_remainder) % self.downsampling feats = feats[self.feats_ctx_offset::self.downsampling, :] self.feats_ctx_offset = remainder \ if remainder == 0 else self.downsampling - remainder return feats
最新发布
07-10
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值