% TODO:
% [ ] waterfall sounds
% [X] tweak CB bandwidths at LF to be wider
+% [ ] consider a floor in mask to interpolate missing bits
+% [ ] some sort of filter on min amp/diff from previous peak, e.g. vk5qi:161, 172
+% + min spacing in bark? Make larger at higher freq?
+% + or some other measure, like making sure choice minimises MSE
% [ ] way to output at various processing steps, like PF initial mask, pre PF
% [ ] BPF at all?
% [ ] phase model? Fit LPC, just swing phase at peaks? Try no phase tweaks
1;
-% model mask using just a few samples of critical band filter centre frequencies
+% Create a "decimated mask" model using just a few samples of
+% critical band filter centre frequencies. For voiced speech,
+% we fit the amplitude of these samples to a straight line.
+% TODO:
+% [ ] track down bg noises on vk5qi and kristoff
+% [ ] return data for plotting, like slope m
+% [ ] quantise m
-function [newmaskdB local_maxima_sort] = make_newmask(maskdB, AmdB, Wo, L, mask_sample_freqs_kHz)
+function [decmaskdB local_maxima_sort] = make_decmask(maskdB, AmdB, Wo, L, mask_sample_freqs_kHz)
- % find local maxima and sort in descending order of magnitude
+ % band pass filter: limit search to 250 to 3800 Hz
+
+ m_st = max(1,floor((pi*250/4000)/Wo));
+ m_en = floor((pi*3800/4000)/Wo);
+
+ % We start off by assuming that the local maxima in the masking
+ % curve are the centres of the samples we want to keep.
local_maxima = [];
- if maskdB(1) > maskdB(2)
- local_maxima = [local_maxima; AmdB(1) 1];
+ if maskdB(m_st) > maskdB(m_st+1)
+ local_maxima = [local_maxima; AmdB(m_st) m_st];
end
- for m=2:L-1
+ for m=m_st+1:m_en-1
if (maskdB(m-1) < maskdB(m)) && (maskdB(m) > maskdB(m+1))
local_maxima = [local_maxima; AmdB(m) m];
end
end
[nlm tmp] = size(local_maxima);
+
+ % Occasionally there are no local maxima so pop one in
+
if nlm == 0
- local_maxima = [AmdB(1) 1];
+ local_maxima = [AmdB(m_st) m_st];
nlm = 1;
end
- local_maxima_sort = flipud(sortrows(local_maxima,1));
+
+ % fit a straight line to the amplitudes of our candidate samples,
+ % this will help us later when we code and transmit the amplitude
+ % of each sample
+
+ if nlm > 1
+ [m b] = linreg(local_maxima(:,2), local_maxima(:,1), nlm);
+ local_maxima_fit = local_maxima(:,2)*m + b;
+ else
+ local_maxima_fit = local_maxima(1,1);
+ end
+
+ % Remove any outliers to the straight line fit: Sometimes local
+ % maxima appear in an antiformant regions, say if F1 and F2 are a
+ % long way apart. After a straight line fit the anti-format
+ % amplitide sample will be way off the straight line, which will
+ % cause a spike of spectral energy right where we don't want it -
+ % in the middle of an antiformat. So lets test the fit of each
+ % sample, and only include those that work well with the straight
+ % line fit. For voiced frames, m < 0. For UV frames, we don't
+ % care about the straight line fit as unvoiced speech is all over
+ % the place in amplitude anyway.
+
+ local_maxima2 = [];
+ for i=1:nlm
+ if (local_maxima_fit(i) - local_maxima(i,1) < 6) || (m > 0)
+ local_maxima2 = [local_maxima2; local_maxima(i,1) local_maxima(i,2)];
+ end
+ end
+
+ % now sort and keep the top 4 samples
+
+ local_maxima_sort = flipud(sortrows(local_maxima2,1));
+ [nlm tmp] = size(local_maxima_sort);
+ nlm = min(nlm,4);
+ local_maxima_sort = local_maxima_sort(1:nlm,:);
- % construct new mask from a small subset of sorted local maxima,
- % this is the highly compressed model of the amplitude envelope
- % that we send to the decoder
+ % fit straight line again, this time with outliers removed
- masker_amps_dB = local_maxima_sort(1:min(4,nlm),1);
- masker_freqs_kHz = local_maxima_sort(1:min(4,nlm),2)*Wo*4/pi;
- newmaskdB = determine_mask(masker_amps_dB, masker_freqs_kHz, mask_sample_freqs_kHz);
+ [m b] = linreg(local_maxima_sort(:,2), local_maxima_sort(:,1), nlm);
+ masker_amps_dB = local_maxima_sort(:,2)*m + b;
+ masker_freqs_kHz = local_maxima_sort(:,2)*Wo*4/pi;
+ %masker_amps_dB = local_maxima_sort(:,1);
+ %masker_freqs_kHz = local_maxima_sort(:,2)*Wo*4/pi;
+
+ % and construct new, decimated mask using our small set of
+ % samples, with amplitudes fitted to a linear line
+
+ decmaskdB = determine_mask(masker_amps_dB, masker_freqs_kHz, mask_sample_freqs_kHz);
endfunction
grid
endfunction
+
+% produce a scatter diagram of amplitudes
+
+function amp_scatter(samname)
+
+ model_name = strcat(samname,"_model.txt");
+ model = load(model_name);
+ [frames nc] = size(model);
+ max_amp = 80;
+
+ Am_out_name = sprintf("%s_am.out", samname);
+ freqs = [];
+ ampsdB = [];
+
+ for f=1:frames
+
+ L = min([model(f,2) max_amp-1]);
+ Wo = model(f,1);
+ Am = model(f,3:(L+2));
+ AmdB = 20*log10(Am);
+
+ maskdB = mask_model(AmdB, Wo, L);
+ mask_sample_freqs_kHz = (1:L)*Wo*4/pi;
+ [newmaskdB local_maxima] = make_newmask(maskdB, AmdB, Wo, L, mask_sample_freqs_kHz);
+
+ [nlm tmp] = size(local_maxima);
+ freqs = [freqs (local_maxima(1:min(4,nlm),2)*Wo*4000/pi)'];
+ an_ampsdB = local_maxima(1:min(4,nlm),1)';
+ ampsdB = [ampsdB an_ampsdB-mean(an_ampsdB)];
+ end
+
+ figure(1)
+ plot(freqs, ampsdB,'+');
+ figure(2)
+ subplot(211)
+ hist(freqs,20)
+ subplot(212)
+ hist(ampsdB,20)
+endfunction
+
+%amp_scatter("../build_linux/src/all")
function newamp_batch(samname)
newamp;
-
+ more off;
+
model_name = strcat(samname,"_model.txt");
model = load(model_name);
[frames nc] = size(model);
maskdB = mask_model(AmdB, Wo, L);
mask_sample_freqs_kHz = (1:L)*Wo*4/pi;
- [newmaskdB local_maxima] = make_newmask(maskdB, AmdB, Wo, L, mask_sample_freqs_kHz);
+ [newmaskdB local_maxima] = make_decmask(maskdB, AmdB, Wo, L, mask_sample_freqs_kHz);
[nlm tmp] = size(local_maxima);
non_masked_m = local_maxima(1:min(4,nlm),2);
- maskdB_pf = newmaskdB;
+
+ % post filter - bump up samples by 6dB, reduce mask by same level to normalise gain
+
+ maskdB_pf = newmaskdB - 6;
maskdB_pf(non_masked_m) = maskdB_pf(non_masked_m) + 6;
-
+
+ if 0
+ % Early work as per blog post part 1
+ % Attempt 1
+ maskdB_pf = zeros(1,L);
+ maskdB_pf(non_masked_m) = maskdB(non_masked_m);
+ % Attempt 2
+ %maskdB_pf = maskdB;
+ % Attempt 3
+ %maskdB_pf = maskdB;
+ %maskdB_pf(non_masked_m) += 6;
+ end
+
Am_ = zeros(1,max_amp);
Am_(2:L) = 10 .^ (maskdB_pf(1:L-1)/20);
axis([1 length(s) -20000 20000]);
figure(2);
+ clf;
Wo = model(f,1);
L = model(f,2);
Am = model(f,3:(L+2));
plot((1:L)*Wo*4000/pi, AmdB,";Am;r");
axis([1 4000 0 80]);
hold on;
+ plot((1:L)*Wo*4000/pi, AmdB,";Am;r+");
plot((0:255)*4000/256, Sw(f,:),";Sw;");
[maskdB Am_freqs_kHz] = mask_model(AmdB, Wo, L);
- plot(Am_freqs_kHz*1000, maskdB, 'g');
+ %plot(Am_freqs_kHz*1000, maskdB, 'g');
- % Analysis by synthesis ---------------------------------------
+ % optionally show harmonics that are not masked
- mask_sample_freqs_kHz = (1:L)*Wo*4/pi;
- [newmaskdB local_maxima] = make_newmask(maskdB, AmdB, Wo, L, mask_sample_freqs_kHz);
+ not_masked_m = find(maskdB < AmdB);
+ if 0
+ plot(not_masked_m*Wo*4000/pi, 70*ones(1,length(not_masked_m)), 'bk+');
+ end
+
+ % optionally plot synthesised spectrum (early simple model)
- plot(local_maxima(:,2)*Wo*4000/pi, 70*ones(1,length(local_maxima)), 'r+');
- plot(mask_sample_freqs_kHz*1000, newmaskdB, 'm');
+ if 0
+ AmdB_ = maskdB;
+ AmdB_(not_masked_m) += 6;
+ plot(Am_freqs_kHz*1000, AmdB_, 'g');
+ plot(Am_freqs_kHz*1000, AmdB_, 'g+');
+ end
+
+ % estimate low rate samples
+
+ mask_sample_freqs_kHz = (1:L)*Wo*4/pi;
+ [decmaskdB local_maxima] = make_decmask(maskdB, AmdB, Wo, L, mask_sample_freqs_kHz);
+
+ [nlm tmp] = size(local_maxima(:,2));
+ nlm = min(nlm,4);
+ tonef_kHz = local_maxima(1:nlm,2)*Wo*4/pi;
+ toneamp_dB = local_maxima(1:nlm,1);
+ plot(tonef_kHz*1000, 70*ones(1,nlm), 'bk+');
+ plot(mask_sample_freqs_kHz*1000, decmaskdB, 'm');
+
+ % fit a line to amplitudes
+
+ %[m b] = linreg(tonef_kHz, toneamp_dB, nlm);
+ %plot(tonef_kHz*1000, tonef_kHz*m + b, "bk");
+ %plot(tonef_kHz*1000, 60 + toneamp_dB - (tonef_kHz*m + b), "r+");
% optionally plot all masking curves