1. **返回值**([sweepsine.py#L1285](sweepsine.py#L1285)):
```python
return pcss # 返回 list[list[t_wave]]
```
**数据形式**:
```
pcss = [
[pc_00, pc_01, ..., pc_0N], # 第1个仰角的所有方位角
[pc_10, pc_11, ..., pc_1N], # 第2个仰角的所有方位角
...
]
```
每个 `pc` 是一个 `t_wave` 对象,包含:
- `waves[0]`: 该方向的IR波形
- `lag`: 时钟漂移补偿值
- `pos`: [方位角, 仰角, 距离]
**wave_to_clips() 的输出总结**:
- 输入:长录音文件
- 输出:`pcss`(二维列表),每个元素是一个方向的IR片段
- 形状:`[n_elevations][n_azimuths]`,如 `[7][72]` = 504个方向
def wave_to_clips(wave, fs, clipwaves,
n_sweeps, seconds_per_sweep, seconds_before_first_sweep,
invsweep, invsync,
hp_fc, lp_fc, pw_order,
azimuths, elevations, degree_offset, direction,
plot, plotshow = True, final = False, beep = None, check_lag = True):
"""
Extract decoded wave clips from a wave file of sine sweeps. Each wave clip is a t_wave object hosting
a sine sweep clipped from *wave*, an impulse response decoded from that sine sweep, and a lag value
indicating the temporal alignment of the hosted waves.
Parameters
----------
wave : array or string
Waveforms [t, ch] of sine sweeps, or path to file containing such sweeps.
fs : int
Sample rate of the sweep.
clipwaves : int
Number of wave contents to collect. 1 for decwave only; 2 for decwave and wave.
n_sweeps : array
Number of sweep groups and of sweeps per group, organized as array (#g, #s/g).
seconds_per_sweep : array
Number of seconds per sweep group and per sweep, as array (spg, spsw).
invsync : array
Inversed sync signal for timing purpose.
plot : binary
Specifies whether to show plots for visual inspection of correctness.
plotshow : binary
Specifies whether to call plt.show() within this function to render plots.
final : binary
Specifies whether to return the last event (IR/sweep), which can be beyond n_sweeps.
beep : array
Specifies the sync signal used in recording. Specifying this signal triggers calculation of
local-remote delays, i.e. delay of capturing remote source compared to local source.
Returns
-------
pcs : list[list[t_wave]]
List of groups of wave clips.
For unmentioned parameters see `wave_to_sofa`.
"""
n_groups = len(n_sweeps)
# Parameter normalization
# elevations: sequence of elevations corresponding to each group
# azimuths: sequence of subsequences of azimuths, each subsequence corresponding to one elevation
if elevations is None:
elevations = np.linspace(-90, 90, n_groups, endpoint=False) + 90 / n_groups
elif np.size(elevations) == 1:
elevations = np.atleast_1d(elevations)
if azimuths is None:
azimuths = [(direction * np.linspace(0, 360, n_sw + 1, endpoint=True) + degree_offset) % 360 for n_sw in n_sweeps]
assert len(elevations) == n_groups
for az, n_sw in zip(azimuths, n_sweeps):
assert len(az) == n_sw + 1
if isinstance(wave, str):
wavename = wave
decwavename = wavename[:-4] + '.dec.wav'
decwaveexists = os.path.isfile(decwavename)
decsyncname = wavename[:-4] + '.decs.wav'
decsyncexists = os.path.isfile(decsyncname)
if clipwaves > 1 or beep is not None or not decwaveexists or (invsync is not None and not decsyncexists):
# Recording of sine sweeps in all directions
wave, wav_fs = sf.read(wavename, always_2d=True)
if wav_fs != fs:
wave = soxr.resample(wave, wav_fs, fs)
else:
wave = None
else:
wavename = None
# LP filter for delay estimation
lpsos = scipy.signal.butter(6, lp_fc, btype='lowpass', output='sos', fs=fs)
samples_per_sweep = seconds_per_sweep * fs
lenivs = len(invsweep)
hfs = fs // 2
# Suppress low end: may or may not be useful.
if hp_fc > 0 and wave is not None:
hp = scipy.signal.butter(6, hp_fc, btype='highpass', output='sos', fs=fs)
wave = scipy.signal.sosfiltfilt(hp, wave, axis=0)
# Decode whole wave using inverse sweep
dec_sync_amp = 1 / 100
if wavename is not None:
# decwavename = wavename[:-4] + '.dec.wav'
if decwaveexists:#os.path.isfile(decwavename):
dec_wave, dec_fs = sf.read(decwavename, always_2d=True)
assert dec_fs == fs
else:
dec_wave = np.array([scipy.signal.fftconvolve(w, invsweep) for w in wave.T]).T[lenivs - 1:]
sf.write(decwavename, data=dec_wave, samplerate=fs, subtype='FLOAT')
if invsync is not None:
# decsyncname = wavename[:-4] + '.decs.wav'
if decsyncexists:#os.path.isfile(decsyncname):
dec_sync, decs_fs = sf.read(decsyncname, always_2d=True)
assert decs_fs == fs
else:
dec_sync = np.array([scipy.signal.fftconvolve(w, invsync['wave']) for w in wave.T]).T[len(invsync['wave']) - 1:] * (dec_sync_amp * invsync['gain'])
sf.write(decsyncname, data=dec_sync, samplerate=fs, subtype='FLOAT')
else:
dec_wave = np.array([scipy.signal.fftconvolve(w, invsweep) for w in wave.T]).T[lenivs - 1:]
if invsync is not None:
dec_sync = np.array([scipy.signal.fftconvolve(w, invsync['wave']) for w in wave.T]).T[len(invsync['wave']) - 1:] * (dec_sync_amp * invsync['gain'])
print('Value range of dec_wave:', np.min(dec_wave), np.max(dec_wave))
print('Value range of dec_sync:', np.min(dec_sync), np.max(dec_sync))
dec_wave_max = np.max(np.abs(dec_wave), axis=0) # [ch]
# Locate the first IR at the earliest time of reaching 1/4 of the channel-wize max magnitude in
# decoded wave, using *seconds_before_first_sweep* as clue if provided.
if seconds_before_first_sweep is not None:
sbf = int(fs * seconds_before_first_sweep) # sbf for "samples_before_first_sweep"
p0 = get_onset_th(dec_wave[sbf : sbf + fs], 0.1, t_axis=0) + sbf #np.argmin(np.abs(dec_wave[sbf : sbf + fs]) < dec_wave_max / 4, axis=0) + sbf
else:
p0 = get_onset_th(dec_wave, 0.1, t_axis=0) #np.argmin(np.abs(dec_wave) < dec_wave_max / 4, axis=0) # [ch]
if final:
assert n_groups == 1
# Infer the position of the final IR without compensation for a clocking lag. This is always
# multiple samples_per_sweep's from p1, and pinpoints an impulse in dec_wave. We scan backwards
# from the last of such positions until an impulse is located, whereupon ip3 counts the number
# of sweeps between p1 and p3.
ip3 = (len(dec_wave) - lenivs - hfs - p0[0]) // samples_per_sweep[-1]
p3 = p0 + ip3 * samples_per_sweep[-1]
while not np.all([np.any(np.abs(dw[p - hfs : p + hfs]) > dw_max / 10) for dw, p, dw_max in zip(dec_wave.T, p3, dec_wave_max)]):
ip3 = ip3 - 1
p3 = p0 + ip3 * samples_per_sweep[-1]
if plot:
fig, ax = plt.subplots()
fig.suptitle("Locating first and last IRs")
# ax.plot(np.arange(len(dec_wave)), dec_wave)
ax.wax = WaveAxe(ax, colors=['k', 'yellow'], connect_events=False)
ax.wax.draw_wave(dec_wave[:, 0], type='auto', blocksize=1024, blockoffst=1024)
# ax.plot(dec_wave)
if final:
ax.plot([p3 - hfs, p3 + hfs, p3 + hfs, p3 - hfs, p3 - hfs],
[dec_wave_max, dec_wave_max, -dec_wave_max, -dec_wave_max, dec_wave_max], ls=':')
# Plot main timestamps and do sanity check
for iel, elevation in enumerate(elevations):
# Infer the position of the 0th and n_sweep'th IR, without compensation for a clocking lag
p1 = p0 + np.sum(samples_per_sweep[:iel])#iel * samples_per_sweep[0]
p2 = p1 + n_sweeps[iel] * samples_per_sweep[-1]#n_sweeps[-1] * samples_per_sweep[-1] # [ch]
# Visual inspection: check if the first and last IRs are correctly identified
if plot:
ax.plot([p1 - hfs, p1 + hfs, p1 + hfs, p1 - hfs, p1 - hfs],
[dec_wave_max, dec_wave_max, -dec_wave_max, -dec_wave_max, dec_wave_max], ls=':')
ax.plot([p2 - hfs, p2 + hfs, p2 + hfs, p2 - hfs, p2 - hfs],
[dec_wave_max, dec_wave_max, -dec_wave_max, -dec_wave_max, dec_wave_max], ls=':')
# Sanity check: the magnitude goes above 1/10 the channel's max in every channel near p2, the inferred position of the n_sweep'th IR
assert np.all([np.any(np.abs(dw[p - fs // 2 : p + fs // 2]) > dw_max / 10) for dw, p, dw_max in zip(dec_wave.T, p2, dec_wave_max)])
if plotshow: plt.show()
# Collect clips
pcss = []
lag_acc = 0
for iel, elevation in enumerate(elevations):
# Infer the position of the 0th and n_sweep'th IR, without compensation for a clocking lag
p1 = p0 + np.sum(samples_per_sweep[:iel])#iel * samples_per_sweep[0]
p2 = p1 + n_sweeps[iel] * samples_per_sweep[-1]#n_sweeps[-1] * samples_per_sweep[-1] # [ch]
# Collect wave pieces every samples_per_sweep into t_wave objects, specifying dec_wave (i.e. the
# IR) as waves[0]
pcs = []
for iaz, az in enumerate(azimuths[iel]):
t0 = p1[0] + iaz * samples_per_sweep[-1]
dec_wave_i = dec_wave[t0 - hfs // 2 : t0 + hfs].T
wave_i = wave[t0 - hfs // 2 : t0 + hfs + lenivs].T if wave is not None else None
pc = t_wave(waves=[dec_wave_i, wave_i] if clipwaves == 2 else [dec_wave_i],
lag=0, lpsos=lpsos, pw_order=pw_order, pos=np.array([az, elevation, 1]))
# if invsync is not None:
# sync_time = []
# for oc in invsync['occurence']:
# ti = int(t0 + oc * fs)
# lagi = int(lag_acc + lag * iaz / n_sweeps[-1])
# pki = get_onset_peak(dec_sync[ti + lagi - 256 : ti + lagi + 1024], th=.25) + lagi - 256
# sync_time.append(pki)
# pc.sync_time = np.array(sync_time)
if beep is not None:
# Calculate local-remote delays, i.e. delay of receiving remote sync signal compared to
# receiving local sync signal.
# Identify local sync as -2s ~ -1.1s from main impulse
bbl = wave[p1[0] + iaz * samples_per_sweep[-1] - 2 * fs : p1[0] + iaz * samples_per_sweep[-1] - 11 * fs // 10]
# Identify remote sync as -1s ~ -0.1s from main impulse.
bbr = wave[p1[0] + iaz * samples_per_sweep[-1] - 1 * fs : p1[0] + iaz * samples_per_sweep[-1] - fs // 10]
# Calculate prewhitening filters for each channel based on local sync signal
blpwf = [librosa.lpc(bl, order=15) for bl in bbl.T]
# Prewhiten local and remote syncs
blpw = [scipy.signal.lfilter(bp, [1], bl)[15:] for bp, bl in zip(blpwf, bbl.T)]
brpw = [scipy.signal.lfilter(bp, [1], br)[15:] for bp, br in zip(blpwf, bbr.T)]
# Calculate delay from prewhitened sync signals
pc.LRd = [np.argmax(scipy.signal.fftconvolve(br, bl[::-1])) - (len(bl) - 1) for bl, br in zip(blpw, brpw)]
# cLRd = np.argmax(np.sum([scipy.signal.fftconvolve(br, bl[::-1]) for bl, br in zip(bbl.T, bbr.T)], axis=0)) - (len(bbl) - 1)
# pc.LRd = [cLRd] + [np.argmax(scipy.signal.fftconvolve(br, bl[::-1])) - (len(bl) - 1) for bl, br in zip(bbl.T, bbr.T)]
pcs.append(pc)
if beep is not None:
# Post-processing local-remote delays based on a "good" value range within *tol* points from
# the median value of all delays: those falling out of the good range are recalculated within
# the range, assuming the x-correlation function must have a local maximum within the range.
mLRd = int(np.median([pc.LRd for pc in pcs]))
tol = 36
for i, pc in enumerate(pcs):
if np.any(np.abs(np.array(pc.LRd) - mLRd) >= tol):
bbl = wave[p1[0] + iaz * samples_per_sweep[-1] - 2 * fs : p1[0] + iaz * samples_per_sweep[-1] - 11 * fs // 10]
bbr = wave[p1[0] + iaz * samples_per_sweep[-1] - 1 * fs : p1[0] + iaz * samples_per_sweep[-1] - fs // 10]
for j, d in enumerate(pc.LRd):
if d <= mLRd - tol or d >= mLRd + tol:
blpwf = librosa.lpc(bbl[:, j], order=15)
blpw = scipy.signal.lfilter(blpwf, [1], bbl[:, j])[15:]
brpw = scipy.signal.lfilter(blpwf, [1], bbr[:, j])[15:]
pc.LRd[j] = np.argmax(scipy.signal.fftconvolve(brpw, blpw[::-1])[len(blpw) - 1 + mLRd - tol : len(blpw) - 1 + mLRd + tol]) + mLRd - tol
# pcs = [t_wave(waves=[dec_wave[p1[0] + i * samples_per_sweep - hfs // 2 : p1[0] + i * samples_per_sweep + hfs].T,
# wave[p1[0] + i * samples_per_sweep - hfs // 2 : p1[0] + i * samples_per_sweep + hfs + lenivs].T],
# lag=0, lpsos=lpsos, pw_order=pw_order, pos=np.array([az, el, 1]))
# for i, (az, el) in enumerate(zip(azimuth, elevation))]
# Find the lag over n_sweeps IRs by comparing the first (0th) and last (n_sweeps-th) IRs
if check_lag:
lag = pcs[0].delay_of(pcs[n_sweeps[iel]], wid=0, ch=0, use_pw=False)
print('Lag of', lag, 'points in', n_sweeps[iel] * seconds_per_sweep[-1], 'seconds at', fs, 'Hz.')
else:
lag = 0
print('Lag compensation disabled.')
for iaz, (az, pc) in enumerate(zip(azimuths[iel], pcs)):
# Fill in lags of wave pieces
pc.lag = lag_acc + lag * iaz / n_sweeps[iel]
if invsync is not None:
# The sync signal is repeated several times given by invsync['occurence'] relative to
# the start of the main sweep. Deviations from that position are primarily due to different
# distances from microphones to speakers. 256 points covers roughly 1.8m distance difference
# at 48kHz, which should be more than enough to enclose the dec_sync peak.
t0 = p1[0] + iaz * samples_per_sweep[-1]
sync_time = []
for oc in invsync['occurence']:
ti = int(t0 + oc * fs)
pki = get_onset_peak(dec_sync[ti + int(pc.lag) - 256 : ti + int(pc.lag) + 1024], th=.25) + int(pc.lag) - 256
# It is observed that the first reflection can get too strong in one channel and
# consequently the sync_time is pinned onto reflection rather than onset. Here we
# check for detected onsets away from the median and recalculate them from a close
# region that should exclude the first reflection.
mid_pki = int(np.median(pki))
assert np.count_nonzero(np.abs(pki - mid_pki) > fs * 0.14 / 341) <= 1
for i, pk in enumerate(pki):
# Number of time points for sound travelling 0.14m
if np.abs(pk - mid_pki) > fs * 0.14 / 341:
# if np.any(np.abs(pki - mid_pki) > fs * 0.14 / 341):
# Number of time points for sound travelling 0.3m
sfr = int(fs * 0.3 / 341)
sto = int(fs * 0.3 / 341)
pki[i] = get_onset_peak(dec_sync[ti + int(pc.lag) + mid_pki - sfr : ti + int(pc.lag) + mid_pki + sto, i], th=.25) + int(pc.lag) + mid_pki - sfr
sync_time.append(pki)
pc.sync_time = np.array(sync_time) - pc.lag # [speaker, microphone]
lag_acc = lag_acc + lag * samples_per_sweep[iel] / (samples_per_sweep[-1] * n_sweeps[iel])
pcss.append(pcs)
if final:
# Lag of the final impulse is linearly extrapolated from that of p2. There's no accumulated
# lag involved since n_groups is 1.
pc = t_wave(waves=[dec_wave[p3[0] - hfs // 2 : p3[0] + hfs].T, wave[p3[0] - hfs : p3[0] + hfs + lenivs].T] if clipwaves == 2 else [dec_wave[p3[0] - hfs // 2 : p3[0] + hfs].T],
lag=lag * ip3 / n_sweeps[iel], lpsos=lpsos, pw_order=pw_order, pos=np.array([azimuths[0][0], 0, 1]))
if invsync is not None:
sync_time = []
for oc in invsync['occurence']:
ti = int(p3[0] + oc * fs)
pki = get_onset_peak(dec_sync[ti + int(pc.lag) - 256 : ti + int(pc.lag) + 1024], th=.25) + int(pc.lag) - 256
sync_time.append(pki)
pc.sync_time = np.array(sync_time) - pc.lag
if beep is not None:
# Calculate local-remote delays, and do so within the good range
bbl = wave[p3[0] - 2 * fs : p3[0] - fs * 11 // 10]
bbr = wave[p3[0] - 1 * fs : p3[0] - fs // 10]
blpwf = [librosa.lpc(bl, order=15) for bl in bbl.T]
blpw = [scipy.signal.lfilter(bp, [1], bl)[15:] for bp, bl in zip(blpwf, bbl.T)]
brpw = [scipy.signal.lfilter(bp, [1], br)[15:] for bp, br in zip(blpwf, bbr.T)]
pc.LRd = [np.argmax(scipy.signal.fftconvolve(br, bl[::-1])[len(bl) - 1 + mLRd - tol : len(bl) - 1 + mLRd + tol]) + mLRd - tol for bl, br in zip(blpw, brpw)]
# cLRd = np.argmax(np.sum([scipy.signal.fftconvolve(br, bl[::-1]) for bl, br in zip(bbl.T, bbr.T)], axis=0)) - (len(bbl) - 1)
# pc.LRd = [cLRd] + [np.argmax(scipy.signal.fftconvolve(br, bl[::-1])) - (len(bl) - 1) for bl, br in zip(bbl.T, bbr.T)]
pcss[-1].append(pc)
return pcss
这个pcss到底是什么形式,(二维列表)为什么每个元素是一个方向的IR片段形式