% quantisation
if nargin == 4
- [res dk_ vq_ind] = mbest(vq, dk, 4);
- [D1_ D1_ind] = quantise(0:(2000/15):2500, D1);
+ [res tmp vq_ind] = mbest(vq, dk, 4);
+ [tmp D1_ind] = quantise(0:(2000/15):2500, D1);
ind = [vq_ind D1_ind];
+ [dk_ D1_] = index_to_params(ind, vq);
printf(" vq: %4.1f D1: %4.1f\n", std(dk_ - dk), D1_- D1);
else
dk_ = dk;
D1_ = D1;
end
+ maskdB_ = params_to_mask(L, k, dk_, D1_);
+
+endfunction
+
+
+function amodel = post_filter(amodel)
+ max_amp = 80;
+
+ % post filter
+
+ L = min([amodel(2) max_amp-1]);
+ Wo = amodel(1);
+ Am_ = amodel(3:(L+2));
+ AmdB_ = 20*log10(Am_);
+ AmdB_pf = AmdB_*1.5;
+ AmdB_pf += max(AmdB_) - max(AmdB_pf);
+ amodel(3:(L+2)) = 10 .^ (AmdB_pf(1:L)/20);
+endfunction
+
+
+function [dk_ D1_] = index_to_params(ind, vq)
+ [Nvec order stages] = size(vq);
+ dk_ = zeros(1,order);
+ for s=1:stages
+ dk_ = dk_ + vq(ind(s),:,s);
+ end
+ D1_tab = 0:(2000/15):2500;
+ D1_ = D1_tab(ind(stages+1));
+endfunction
+
+
+% decoder side
+
+function maskdB_ = params_to_mask(L, k, dk_, D1_)
+
+ anchor = floor(7*L/8);
+
% convert quantised dk back to rate L magnitude spectrum
Dk_ = fft(dk_);
D_ = zeros(1,L);
- D_(1) = D1; % energy seprately quantised
+ D_(1) = D1_; % energy seprately quantised
D_(2:k-1) = Dk_(2:k-1);
D_(L-k+1:L) = Dk_(k+1:2*k);
d_ = ifft(D_); % back to spectrum at rate L
ypts = [ maskdB_(anchor-1) maskdB_(anchor) (maskdB_(anchor)-10)];
mask_pp = splinefit(xpts, ypts, 1);
maskdB_ = [maskdB_(1:anchor) ppval(mask_pp, anchor+1:L)];
-
endfunction
+function index = encode_log_Wo(Wo, bits)
+ Wo_levels = 2.^bits;
+ Wo_min = 2*pi/160;
+ Wo_max = 2*pi/20;
+
+ norm = (log10(Wo) - log10(Wo_min))/(log10(Wo_max) - log10(Wo_min));
+ index = floor(Wo_levels * norm + 0.5);
+ index = max(index, 0);
+ index = min(index, Wo_levels-1)
+endfunction
+
+
+function Wo = decode_log_Wo(index, bits)
+ Wo_levels = 2.^bits;
+ Wo_min = 2*pi/160;
+ Wo_max = 2*pi/20;
+
+ step = (log10(Wo_max) - log10(Wo_min))/Wo_levels;
+ Wo = log10(Wo_min) + step*index;
+
+ Wo = 10 .^ Wo;
+endfunction
function tp = est_pf_locations(maskdB_)
faw = fopen(Aw_out_name,"wb");
end
- fam = fopen(Am_out_name,"wb");
- if synth_phase
- faw = fopen(Aw_out_name,"wb");
- end
-
if vq_en
load vq;
end
+
% encoder loop ------------------------------------------------------
sd_sum = 0;
D1_log = [D1_log D1];
end
end
- %AmdB_(1:L/8) = maskdB(1:L/8);
- AmdB_pf = AmdB_*1.5;
- AmdB_pf += max(AmdB_) - max(AmdB_pf);
sd_sum += std(maskdB - AmdB_);
- %AmdB_pf = AmdB_;
Am_ = zeros(1,max_amp);
- Am_ = 10 .^ (AmdB_pf(1:L-1)/20);
+ Am_ = 10 .^ (AmdB_(1:L-1)/20);
model_(f,3:(L+1)) = Am_;
end
- % decoder loop -----------------------------------------------------
+ if train == 0
+ decode_model(model_, Am_out_name, Aw_out_name, synth_phase, dec_in_time);
+ end
+
+ printf("\nsd_sum: %5.2f\n", sd_sum/frames);
+ printf("\n");
+endfunction
- if train
- % short circuit decoder
- frames = 0;
+
+% generate array of indexes
+% convert to bits and save to file of one char/bit
+% function to encode and save to file of bits
+% function to decode from file of bits
+% quantise Wo
+% move voicing bit through, move Wo through, maybe just load Wo and L and v?
+
+function decode_model(model_, Am_out_name, Aw_out_name, synth_phase, dec_in_time)
+ max_amp = 80;
+
+ fam = fopen(Am_out_name,"wb");
+ if synth_phase
+ faw = fopen(Aw_out_name,"wb");
end
+ % decoder loop -----------------------------------------------------
+
+ [frames tmp] = size(model_);
for f=1:frames
%printf("frame: %d\n", f);
L = min([model_(f,2) max_amp-1]);
Wo = model_(f,1);
Am_ = model_(f,3:(L+2));
+ AmdB_ = 20*log10(Am_);
+ sample_freqs_kHz = (1:L)*Wo*4/pi;
- maskdB_ = 20*log10(Am_);
- mask_sample_freqs_kHz = (1:L)*Wo*4/pi;
+ % run post filter ahead of time so dec in time has post filtered frames to work with
+
+ if f+3 <= frames
+ model_(f+3,:) = post_filter(model_(f+3,:));
+ end
if dec_in_time
% decimate mask samples in time
decimate = 4;
- maskdB_ = decimate_frame_rate(model_, decimate, f, frames, mask_sample_freqs_kHz);
-
- if 0
- a_non_masked_m = est_pf_locations(maskdB_);
- end
-
- if 0
-
- left_f = decimate*floor((f-1)/decimate)+1;
- right_f = left_f + decimate;
-
- m = max(find(non_masked_m(left_f,:) > 0));
- a_non_masked_m = non_masked_m(left_f,1:m);
-
- % now convert these to m on current frame
-
- left_L = min([model_(left_f,2) max_amp-1]);
- a_non_masked_m = round(a_non_masked_m*L/left_L);
- end
- else
- % read non-masked (PF freqs) from analysis stage
- % number of non-masked samples is variable when not using AbyS,
- % but fixed when using AbyS
-
- m = max(find(non_masked_m(f,:) > 0));
- a_non_masked_m = non_masked_m(f,1:m);
- end
-
- % post filter - bump up samples by 6dB, reduce mask by same level to normalise gain
-
- if (postfilter == 1) || (postfilter == 2)
-
- % Apply post filter - enhances formants, suppresses others, as pe Part 1 blog
- % Pretty simple but makes a big difference
-
- maskdB_pf = maskdB_ - 6;
- maskdB_pf(a_non_masked_m) = maskdB_pf(a_non_masked_m) + 6;
-
- Am_ = zeros(1,max_amp);
- Am_ = 10 .^ (maskdB_pf(1:L-1)/20);
- model_(f,3:(L+1)) = Am_;
- else
- maskdB_pf = maskdB_;
+ AmdB_ = decimate_frame_rate(model_, decimate, f, frames, sample_freqs_kHz);
end
Am_ = zeros(1,max_amp);
- Am_(2:L) = 10 .^ (maskdB_pf(1:L-1)/20); % C array doesnt use A[0]
+ Am_(2:L) = 10 .^ (AmdB_(1:L-1)/20); % C array doesnt use A[0]
fwrite(fam, Am_, "float32");
if synth_phase
% synthesis phase spectra from magnitiude spectra using minimum phase techniques
fft_enc = 512;
- model_(f,3:(L+2)) = 10 .^ (maskdB_(1:L)/20);
+ model_(f,3:(L+2)) = 10 .^ (AmdB_(1:L)/20);
phase = determine_phase(model_, f);
assert(length(phase) == fft_enc);
Aw = zeros(1, fft_enc*2);
end
fclose(fam);
- if synth_phase
- fclose(faw);
- end
-
- printf("\nsd_sum: %5.2f\n", sd_sum/frames);
- printf("\n");
+ fclose(faw);
endfunction