melvq;
-function [maskdB_ maskdB_cyclic Dabs dk_ D1 ind] = decimate_in_freq(maskdB, cyclic=1, k=7, vq)
-
- L = length(maskdB);
+function [maskdB_ maskdB_cyclic Dabs dk_ D1_ ind] = decimate_in_freq(maskdB, cyclic=1, k=7, vq)
% Lets try to come up with a smoothed, cyclic model. Replace
% points from 3500 Hz to 4000Hz with a sequence that joins without
% it more cyclical and make the DFT happier, less high freq
% energy. Yes, happier is an extremely technical term.
+ L = length(maskdB);
anchor = floor(7*L/8);
xpts = [ anchor-1 anchor L+1 L+2];
ypts = [ maskdB(anchor-1) maskdB(anchor) maskdB(1) maskdB(2)];
% Now DFT, truncating DFT coeffs to undersample
if cyclic
- D = fft(maskdB_cyclic);
+ D = fft(maskdB_cyclic)/L;
else
- D = fft(maskdB);
+ D = fft(maskdB)/L;
end
Dabs = abs(D); % this returned for plotting
- D1 = D(1); % pass energy back for training
% truncate D to rate k, convert to 2k length real vector for quantisation and transmission
- % note we remove DC term as this is the frame energy that we quantise elsewhere
Dk = [0 D(2:k-1) real(D(k)) D(L-k+1:L)];
- dk = real(ifft(Dk)); % Q: why is there any imag part at all?
-
+ dk = real(ifft(Dk));
+ D1 = D(1);
+
% quantisation
if nargin == 4
[res tmp vq_ind] = mbest(vq, dk, 4);
- [tmp D1_ind] = quantise(0:(2500/15):2500, D1);
+ D1_tab = 0:(60/15):60;
+ assert(length(D1_tab) == 16);
+ [tmp D1_ind] = quantise(D1_tab, D1);
ind = [vq_ind D1_ind];
[dk_ D1_] = index_to_params(ind, vq);
- D1_ = D1;
- %printf(" vq: %4.1f D1: %4.1f\n", std(dk_ - dk), D1_- D1);
+ %std(dk_ - dk)
else
dk_ = dk;
D1_ = D1;
end
- maskdB_ = params_to_mask(L, k, dk_, D1_);
-
-endfunction
-
+ if 0
+ % convert quantised dk back to rate L magnitude spectrum
-function amodel = post_filter(amodel)
- max_amp = 80;
+ Dk_ = fft(dk_);
+ D_ = zeros(1,L);
+ D_(1) = D1_; % lets assume energy comes through separately
+ D_(2:k-1) = Dk_(2:k-1);
+ D_(L-k+1:L) = Dk_(k+1:2*k);
+ d_ = L*ifft(D_); % back to spectrum at rate L
+ maskdB_ = real(d_);
+
+ % Finally fix up last 500Hz, taper down 10dB at 4000Hz
- % post filter
+ xpts = [ anchor-1 anchor L];
+ ypts = [ maskdB_(anchor-1) maskdB_(anchor) (maskdB_(anchor)-10)];
+ mask_pp = splinefit(xpts, ypts, 1);
+ maskdB_ = [maskdB_(1:anchor) ppval(mask_pp, anchor+1:L)];
+ %printf("two masks: %f\n", std(maskdB_-maskdB1_));
+ end
- L = min([amodel(2) max_amp-1]);
- Wo = amodel(1);
- Am_ = amodel(3:(L+2));
- AmdB_ = 20*log10(Am_);
- AmdB_pf = AmdB_*1.5;
- AmdB_pf += max(AmdB_) - max(AmdB_pf);
- amodel(3:(L+2)) = 10 .^ (AmdB_pf(1:L)/20);
+ maskdB_ = params_to_mask(L, k, dk_, D1_);
endfunction
+
function [dk_ D1_] = index_to_params(ind, vq)
[Nvec order stages] = size(vq);
dk_ = zeros(1,order);
for s=1:stages
dk_ = dk_ + vq(ind(s),:,s);
end
- D1_tab = 0:(2500/15):2500;
+ D1_tab = 0:(60/15):60;
D1_ = D1_tab(ind(stages+1));
endfunction
D_(1) = D1_; % energy seperately quantised
D_(2:k-1) = Dk_(2:k-1);
D_(L-k+1:L) = Dk_(k+1:2*k);
- d_ = ifft(D_); % back to spectrum at rate L
+ d_ = L*ifft(D_); % back to spectrum at rate L
maskdB_ = real(d_);
% Finally fix up last 500Hz, taper down 10dB at 4000Hz
endfunction
-function index = encode_log_Wo(Wo, bits)
- Wo_levels = 2.^bits;
- Wo_min = 2*pi/160;
- Wo_max = 2*pi/20;
-
- norm = (log10(Wo) - log10(Wo_min))/(log10(Wo_max) - log10(Wo_min));
- index = floor(Wo_levels * norm + 0.5);
- index = max(index, 0);
- index = min(index, Wo_levels-1);
-endfunction
-
-
-function Wo = decode_log_Wo(index, bits)
- Wo_levels = 2.^bits;
- Wo_min = 2*pi/160;
- Wo_max = 2*pi/20;
-
- step = (log10(Wo_max) - log10(Wo_min))/Wo_levels;
- Wo = log10(Wo_min) + step*index;
-
- Wo = 10 .^ Wo;
-endfunction
-
-
function tp = est_pf_locations(maskdB_)
% find turning points - used for finding PF freqs when we decimate in time
% decimate frame rate of mask, use linear interpolation in the log domain
-function [maskdB_ Wo L] = decimate_frame_rate(model, decimate, f, frames, mask_sample_freqs_kHz)
+function maskdB_ = decimate_frame_rate(model, decimate, f, frames, mask_sample_freqs_kHz)
max_amp = 80;
+ Wo = model(f,1);
+ L = min([model(f,2) max_amp]);
+
% determine frames that bracket the one we need to interp
left_f = decimate*floor((f-1)/decimate)+1;
left_fraction = 1 - mod((f-1),decimate)/decimate;
right_fraction = 1 - left_fraction;
- % printf("f: %d left_f: %d right_f: %d left_fraction: %3.2f right_fraction: %3.2f \n", f, left_f, right_f, left_fraction, right_fraction)
+ printf("f: %d left_f: %d right_f: %d left_fraction: %f right_fraction: %f \n", f, left_f, right_f, left_fraction, right_fraction)
% fit splines to left and right masks
right_AmdB = 20*log10(model(right_f,3:(right_L+2)));
right_mask_sample_freqs_kHz = (1:right_L)*right_Wo*4/pi;
- % printf(" right_Wo: %f left_Wo: %f right_L: %d left_L %d\n",right_Wo,left_Wo,right_L,left_L);
-
maskdB_left_pp = splinefit(left_mask_sample_freqs_kHz, left_AmdB, left_L);
maskdB_right_pp = splinefit(right_mask_sample_freqs_kHz, right_AmdB, right_L);
% determine mask for left and right frames, sampling at Wo for this frame
- Wo = left_fraction*left_Wo + right_fraction*right_Wo;
- L = floor(pi/Wo);
- %Wo = model(f,1); L = model(f,2);
-
mask_sample_freqs_kHz = (1:L)*Wo*4/pi;
maskdB_left = ppval(maskdB_left_pp, mask_sample_freqs_kHz);
maskdB_right = ppval(maskdB_right_pp, mask_sample_freqs_kHz);
title('y-int');
endfunction
-
-
-% Given a matrix with indexes on each row, convert to a bit stream and
-% write to file. We only write every 4th frame due to DIT
-
-function write_bit_stream_file(fn, ind_log, bits_per_param)
- fbit = fopen(fn,"wb");
- decimate = 4;
-
- % take a row of quantiser indexes, convert to bits, save to file
-
- [frames nind] = size(ind_log);
- for f=1:decimate:frames
- frame_of_bits = [];
- arow = ind_log(f,:);
- for i=1:nind
- %printf("i: %d bits_per_param: %d\n", i, bits_per_param(i));
- some_bits = index_to_bits(arow(i), bits_per_param(i));
- frame_of_bits = [frame_of_bits some_bits];
- end
- fwrite(fbit, frame_of_bits, "uchar");
- end
- fclose(fbit);
-endfunction
-
-
-% Decode from a bit stream file
-
-function decode_bit_stream_file(samname, bits_per_param, ber_mask, ber)
- max_amp = 80;
- nc = max_amp + 3;
- load vq;
- [tmp1 k2 tmp2] = size(vq);
- k = k2/2;
- rand("seed", 0);
- f = 1;
-
- bit_stream_name = strcat(samname,".bit");
- fbit = fopen(bit_stream_name, "rb");
-
- model_= []; log_v = []; ind_log = [];
- nind = length(bits_per_param);
- bits_per_frame = sum(bits_per_param);
-
- % read a frame, decode to indexes, fill in model_ array
-
- [frame nread] = fread(fbit, sum(bits_per_param), "uchar");
- while (nread == bits_per_frame)
-
- % optionally add bit errors
-
- if nargin == 4
- error_pattern = bitand(rand(bits_per_frame,1) < ber, ber_mask);
- frame = bitxor(frame, error_pattern);
- end
-
- % read a frame, convert to indexes
-
- nbit = 1;
- ind = [];
- for i=1:nind
- field = frame(nbit:nbit+bits_per_param(i)-1);
- nbit += bits_per_param(i);
- ind = [ind bits_to_index(field, bits_per_param(i))];
- end
- ind_log = [ind_log; ind];
- printf("f: %d\n", f);
- ind
- if 0
- if sum(error_pattern)
- printf(" Error f: %d\n", f);
- %if f == 169
- % ind(1) = 50;
- %end
- end
- end
-
- % convert index to model parameters
-
- amodel_ = zeros(1,nc);
- amodel_(1) = Wo_ = decode_log_Wo(ind(1), 6);
- L_ = floor(pi/Wo_);
- amodel_(2) = L_ = min([L_ max_amp-1]);
-
- [dk_ D1_] = index_to_params(ind(3:5)+1, vq);
- AmdB_ = params_to_mask(L_, k, dk_, D1_);
-
- Am_ = zeros(1,max_amp);
- Am_ = 10 .^ (AmdB_(1:L_)/20);
- amodel_(3:(L_+2)) = Am_;
- model_ = [ model_; amodel_; zeros(3,nc)];
- f+=4;
-
- log_v = [log_v ind(2)];
-
- % read next frame
-
- [frame nread] = fread(fbit, sum(bits_per_param), "uchar");
- endwhile
-
- % decode entire array of model parameters
-
- decode_model(model_, samname, 1, 1);
-
- % save voicing file
-
- v_out_name = sprintf("%s_v.txt", samname);
- fv = fopen(v_out_name,"wt");
- for f=1:length(log_v)
- for i=1:4
- fprintf(fv,"%d\n",log_v(f));
- end
- end
- fclose(fv);
-
-endfunction
-
-
-% convert index to binary bits
-
-function bits = index_to_bits(value, numbits)
- levels = 2.^numbits;
- bits = zeros(1, numbits);
- for b=1:numbits
- bits(b) = bitand(value,2^(numbits-b)) != 0;
- end
-end
-
-
-function value = bits_to_index(bits, numbits)
- value = 2.^(numbits-1:-1:0) * bits;
-endfunction
-
-
-function decode_model(model_, samname, synth_phase, dec_in_time)
- max_amp = 80;
-
- Am_out_name = sprintf("%s_am.out", samname);
- Aw_out_name = sprintf("%s_aw.out", samname);
- Wo_out_name = sprintf("%s_Wo.out", samname);
- fam = fopen(Am_out_name,"wb");
- fWo = fopen(Wo_out_name,"wb");
- if synth_phase
- faw = fopen(Aw_out_name,"wb");
- end
-
- % decoder loop -----------------------------------------------------
-
- [frames tmp] = size(model_);
-
- for f=1:frames
- L = min([model_(f,2) max_amp-1]);
- Wo = model_(f,1);
-
- Am_ = model_(f,3:(L+2));
- AmdB_ = 20*log10(Am_);
- sample_freqs_kHz = (1:L)*Wo*4/pi;
- printf("frame: %d Fo: %f L: %d\n", f, Wo*4000/pi, L);
-
- % run post filter ahead of time so dec in time has post filtered frames to work with
-
- if f+4 <= frames
- model_(f+4,:) = post_filter(model_(f+4,:));
- % printf("pf on %d\n", f+4);
- end
-
- if dec_in_time
- % decimate mask samples in time
-
- decimate = 4;
- [AmdB_ Wo L] = decimate_frame_rate(model_, decimate, f, frames, sample_freqs_kHz);
- end
-
- Am_ = zeros(1,max_amp);
- Am_(2:L) = 10 .^ (AmdB_(1:L-1)/20); % C array doesnt use A[0]
- fwrite(fam, Am_, "float32");
- fwrite(fWo, Wo, "float32");
-
- if synth_phase
-
- % synthesis phase spectra from magnitiude spectra using minimum phase techniques
-
- fft_enc = 512;
- model_(f,1) = Wo;
- model_(f,2) = L;
- model_(f,3:(L+2)) = 10 .^ (AmdB_(1:L)/20);
- phase = determine_phase(model_, f);
- assert(length(phase) == fft_enc);
- Aw = zeros(1, fft_enc*2);
- Aw(1:2:fft_enc*2) = cos(phase);
- Aw(2:2:fft_enc*2) = -sin(phase);
-
- fwrite(faw, Aw, "float32");
- end
- end
-
- fclose(fam);
- fclose(fWo);
- fclose(faw);
-endfunction
-
-
-% work out a gradient m and y intercept c to map x to y using
-% a least sqaures fit, y_ = m*x + c
-
-function [m c] = map_vector(x, y)
- L = length(x);
- num = sum(x.*y) - sum(y)*sum(x)/L;
- den = sum(x.*x) - sum(x)*sum(x)/L;
- m = num/den;
- c = (sum(y) - m*sum(x))/L;
-endfunction
-
-
-% for each target
-% run thru each cb entry
-% find m and c
-% measure MSE
-% record best
-
-function [best_i best_mse best_vec] = search_linear_fit(target, vq)
- [rows cols] = size(vq);
- best_mse = 1E32;
- best_i = 1;
- for i=1:rows
- [m c] = map_vector(vq(i,:),target);
- error = m*vq(i,:) + c - target;
- mse = sum(error.^2);
- if mse < best_mse
- best_mse = mse;
- best_i = i;
- best_vec = m*vq(i,:) + c;
- best_m = m; best_c = c;
- end
- end
- printf("best_m: %f best_c: %f\n", best_m, best_c);
-endfunction
-
% $ cd ~/codec2-dev/octave
% octave:14> newamp_batch("../build_linux/src/hts1a")
% ~/codec2-dev/build_linux/src$ ./c2sim ../../raw/hts1a.raw --amread hts1a_am.out -o - | play -t raw -r 8000 -s -2 -
-%
-
-% Generate voicing file:
-% $ ./c2sim ../../raw/hts2a.raw --dump hts2a --lpc 10 --phase0 --dump_pitch_e hts2a_pitche.txt
-
-% Bit stream decode:
-% $ ./c2sim ../../raw/hts2a.raw --amread hts2a_am.out --awread hts2a_aw.out --Woread hts2a_Wo.out --phase0 --postfilter -o - | play -t raw -r 8000 -s -2 -
+% Or with a little more processing:
+% codec2-dev/build_linux/src$ ./c2sim ../../raw/hts2a.raw --amread hts2a_am.out --awread hts2a_aw.out --phase0 --postfilter --Woread hts2a_Wo.out -o - | play -q -t raw -r 8000 -s -2 -
% process a whole file and write results
more off;
max_amp = 80;
+ k = 10;
+ decimate = 4;
dec_in_freq = 1;
- postfilter = 0;
- dec_in_time = 0;
+ dec_in_time = 1;
synth_phase = 1;
vq_en = 1;
- D_log = []; dk_log = []; D1_log = []; ind_log = [];
+ dk_log = []; D1_log = [];
train = 0;
- fully_quant = 0;
- Wo_quant = 0;
- diff_freq = 0;
+ Wo_quant = 1;
+ ind_log = [];
model_name = strcat(samname,"_model.txt");
model = load(model_name);
[frames nc] = size(model);
- %frames = 5;
-
- voicing_name = strcat(samname,"_pitche.txt");
- if exist(voicing_name, "file") == 2
- pitche = load(voicing_name);
- voicing = pitche(:, 3);
- end
model_ = zeros(frames, nc);
non_masked_m = zeros(frames,max_amp);
faw = fopen(Aw_out_name,"wb");
end
+ fam = fopen(Am_out_name,"wb");
+ if synth_phase
+ faw = fopen(Aw_out_name,"wb");
+ end
+
+ Wo_out_name = sprintf("%s_Wo.out", samname);
+ fWo = fopen(Wo_out_name,"wb");
+
+ voicing_name = strcat(samname,"_pitche.txt");
+ voicing = zeros(1,frames);
+ if exist(voicing_name, "file") == 2
+ pitche = load(voicing_name);
+ voicing = pitche(:, 3);
+ end
+
if vq_en
load vq;
- load d20_vq;
end
- load d20_vq;
-
- prev_dk_ = zeros(1,2*10);
% encoder loop ------------------------------------------------------
sd_sum = 0;
for f=1:frames
printf("%d ", f);
- Wo = model(f,1); L = model(f,2);
- model_(f,3:(L+2)) = Am = model(f,3:(L+2));
- AmdB = 20*log10(Am);
+ Wo = model(f,1);
+ L = min([model(f,2) max_amp-1]);
- if Wo_quant || fully_quant
+ if Wo_quant
ind_Wo = encode_log_Wo(Wo, 6);
- model_(f,1) = Wo_ = decode_log_Wo(ind_Wo, 6);
- L_ = floor(pi/Wo);
- model_(f,2) = L_ = min([L_ max_amp-1]);
- else
- model_(f,1) = Wo_ = Wo;
- model_(f,2) = L_ = L;
+ Wo = decode_log_Wo(ind_Wo, 6);
+ L = floor(pi/Wo);
+ L = min([L max_amp-1]);
end
+ model_(f,1) = Wo;
+ model_(f,2) = L;
+ model_(f,3:(L+2)) = Am = model(f,3:(L+2));
+
+ AmdB = 20*log10(Am);
+
% find mask
- mask_sample_freqs_kHz = (1:L_)*Wo_*4/pi;
- maskdB = mask_model(AmdB, Wo_, L_);
- maskdB_ = maskdB;
- AmdB_ = AmdB;
+ mask_sample_freqs_kHz = (1:L)*Wo*4/pi;
+ maskdB = mask_model(AmdB, Wo, L);
+ maskdB_ = maskdB;
if dec_in_freq
if vq_en
- [AmdB_ tmp1 Dabs dk_ D1 ind_vq] = decimate_in_freq(maskdB, 1, 10, vq);
+ [maskdB_ tmp1 D dk_ D1_ ind_vq] = decimate_in_freq(maskdB, 1, k, vq);
else
- [AmdB_ tmp1 D dk_ D1] = decimate_in_freq(maskdB, 0, 10);
+ [maskdB_ tmp1 D dk_ D1_] = decimate_in_freq(maskdB, 1, k);
end
if train
dk_log = [dk_log; dk_];
- D1_log = [D1_log D1];
+ D1_log = [D1_log; D1_];
end
+ end
+ %maskdB_pf = maskdB_*1.5;
+ %maskdB_pf += max(maskdB_) - max(maskdB_pf);
+
+ % log info for bit stream
- if diff_freq
- diff_dk = dk_ - 0.9*prev_dk_;
- [res tmp vq_ind] = mbest(d20_vq, diff_dk, 4);
- %printf("vq_ind: %d\n", vq_ind);
- dk_d = 0.9*prev_dk_ + tmp;
- prev_dk_ = dk_d;
- AmdB_ = params_to_mask(L, 10, dk_d, D1);
- end
+ ind_log = [ind_log; ind_Wo voicing(f) (ind_vq-1) 0];
+
+ %sd_sum += sum(maskdB_ - maskdB);
+
+ Am_ = zeros(1,max_amp);
+ Am_ = 10 .^ (maskdB_(1:L)/20);
+ model_(f,3:(L+2)) = Am_;
+ end
+
+ bit_stream_name = strcat(samname,".bit");
+ bits_per_param = [6 1 8 8 4 1];
+ write_bit_stream_file(bit_stream_name, ind_log, bits_per_param);
+
+ % read in bit stream and convert to ind_log[]
+
+ ind_log = [];
+ fbit = fopen(bit_stream_name, "rb");
+ bits_per_frame = sum(bits_per_param);
+ nind = length(bits_per_param);
+
+ [frame nread] = fread(fbit, sum(bits_per_param), "uchar");
+ while (nread == bits_per_frame)
+ % read a frame, convert to indexes
+
+ nbit = 1;
+ ind = [];
+ for i=1:nind
+ field = frame(nbit:nbit+bits_per_param(i)-1);
+ nbit += bits_per_param(i);
+ ind = [ind bits_to_index(field, bits_per_param(i))];
end
- sd_sum += std(maskdB - AmdB_);
+ ind_log = [ind_log; ind];
+ [frame nread] = fread(fbit, sum(bits_per_param), "uchar");
+ endwhile
+ fclose(fbit);
- if fully_quant
- if vq_en
- ind_log = [ind_log; ind_Wo voicing(f) (ind_vq-1) 0];
- end
+ % convert ind_log to modem params
+
+ frames = 4*length(ind_log);
+ model_ = zeros(frames, max_amp+2);
+ v = zeros(frames,1);
+
+ fdec = 1;
+ for f=1:4:frames
+ ind_Wo = ind_log(fdec,1);
+
+ Wo = decode_log_Wo(ind_Wo, 6);
+ L = floor(pi/Wo);
+ L = min([L max_amp-1]);
+ model_(f,1) = Wo;
+ model_(f,2) = L;
+
+ v1 = ind_log(fdec,2);
+ if (fdec+1) < length(ind_log)
+ v5 = ind_log(fdec+1,2);
+ else
+ v5 = 0;
+ end
+ v(f:f+3) = est_voicing_bits(v1, v5);
+
+ ind_vq = ind_log(fdec,3:5) + 1;
+ [dk_ D1_] = index_to_params(ind_vq, vq);
+ maskdB_ = params_to_mask(L, k, dk_, D1_);
+ Am_ = zeros(1,max_amp);
+ Am_ = 10 .^ (maskdB_(1:L)/20);
+ model_(f,3:(L+2)) = Am_;
+
+ fdec += 1;
+ end
+
+ % decoder loop -----------------------------------------------------
+
+ if train
+ % short circuit decoder
+ frames = 0;
+ end
+
+ % run post filter ahead of time so dec in time has post filtered frames to work with
+
+ for f=1:frames
+ model_(f,:) = post_filter(model_(f,:));
+ end
+
+ for f=1:frames
+ %printf("frame: %d\n", f);
+ L = min([model_(f,2) max_amp-1]);
+ Wo = model_(f,1);
+ Am_ = model_(f,3:(L+2));
+
+ maskdB_ = 20*log10(Am_);
+
+ if dec_in_time
+ % decimate mask samples in time
+
+ [maskdB_ Wo L] = decimate_frame_rate2(model_, decimate, f, frames);
+ model_(f,1) = Wo;
+ model_(f,2) = L;
end
Am_ = zeros(1,max_amp);
- Am_ = 10 .^ (AmdB_(1:L_)/20);
- model_(f,3:(L_+2)) = Am_;
+ Am_(2:L) = 10 .^ (maskdB_(1:L-1)/20); % C array doesnt use A[0]
+ fwrite(fam, Am_, "float32");
+ fwrite(fWo, Wo, "float32");
+
+ if synth_phase
+
+ % synthesis phase spectra from magnitiude spectra using minimum phase techniques
+
+ fft_enc = 512;
+ model_(f,3:(L+2)) = 10 .^ (maskdB_(1:L)/20);
+ phase = determine_phase(model_, f);
+ assert(length(phase) == fft_enc);
+ Aw = zeros(1, fft_enc*2);
+ Aw(1:2:fft_enc*2) = cos(phase);
+ Aw(2:2:fft_enc*2) = -sin(phase);
+ fwrite(faw, Aw, "float32");
+ end
+ end
+
+ fclose(fam);
+ fclose(fWo);
+ if synth_phase
+ fclose(faw);
end
+
+ if exist(voicing_name, "file") == 2
+ % save voicing file
- if train == 0
- if fully_quant
- bit_stream_name = strcat(samname,".bit");
- bits_per_param = [6 1 8 8 4 1];
- write_bit_stream_file(bit_stream_name, ind_log, bits_per_param);
- decode_bit_stream_file(samname, bits_per_param);
- else
- decode_model(model_, samname, synth_phase, dec_in_time);
+ v_out_name = sprintf("%s_v.txt", samname);
+ fv = fopen(v_out_name,"wt");
+ for f=1:length(v)
+ fprintf(fv,"%d\n", v(f));
end
+ fclose(fv);
end
printf("\nsd_sum: %5.2f\n", sd_sum/frames);
printf("\n");
endfunction
+
+% decimate frame rate of mask, use linear interpolation in the log domain
+
+function [maskdB_ Wo L] = decimate_frame_rate2(model, decimate, f, frames)
+ max_amp = 80;
+
+ % determine frames that bracket the one we need to interp
+
+ left_f = decimate*floor((f-1)/decimate)+1;
+ right_f = left_f + decimate;
+ if right_f > frames
+ right_f = left_f;
+ end
+
+ % determine fraction of each frame to use
+
+ left_fraction = 1 - mod((f-1),decimate)/decimate;
+ right_fraction = 1 - left_fraction;
+
+ % printf("f: %d left_f: %d right_f: %d left_fraction: %3.2f right_fraction: %3.2f \n", f, left_f, right_f, left_fraction, right_fraction)
+
+ % fit splines to left and right masks
+
+ left_Wo = model(left_f,1);
+ left_L = min([model(left_f,2) max_amp]);
+ left_AmdB = 20*log10(model(left_f,3:(left_L+2)));
+ left_mask_sample_freqs_kHz = (1:left_L)*left_Wo*4/pi;
+
+ right_Wo = model(right_f,1);
+ right_L = min([model(right_f,2) max_amp]);
+ right_AmdB = 20*log10(model(right_f,3:(right_L+2)));
+ right_mask_sample_freqs_kHz = (1:right_L)*right_Wo*4/pi;
+
+ % printf(" right_Wo: %f left_Wo: %f right_L: %d left_L %d\n",right_Wo,left_Wo,right_L,left_L);
+ printf("%f %f\n", left_AmdB(left_L), right_AmdB(right_L));
+
+ maskdB_left_pp = splinefit(left_mask_sample_freqs_kHz, left_AmdB, left_L);
+ maskdB_right_pp = splinefit(right_mask_sample_freqs_kHz, right_AmdB, right_L);
+
+ % determine mask for left and right frames, sampling at Wo for this frame
+
+ Wo = left_fraction*left_Wo + right_fraction*right_Wo;
+ L = floor(pi/Wo);
+ %Wo = model(f,1); L = model(f,2);
+
+ mask_sample_freqs_kHz = (1:L)*Wo*4/pi;
+ maskdB_left = ppval(maskdB_left_pp, mask_sample_freqs_kHz);
+ maskdB_right = ppval(maskdB_right_pp, mask_sample_freqs_kHz);
+
+ maskdB_ = left_fraction*maskdB_left + right_fraction*maskdB_right;
+endfunction
+
+function amodel = post_filter(amodel)
+ max_amp = 80;
+
+ % post filter
+
+ L = min([amodel(2) max_amp-1]);
+ Wo = amodel(1);
+ Am_ = amodel(3:(L+2));
+ AmdB_ = 20*log10(Am_);
+ AmdB_pf = AmdB_*1.5;
+ AmdB_pf += max(AmdB_) - max(AmdB_pf);
+ amodel(3:(L+2)) = 10 .^ (AmdB_pf(1:L)/20);
+endfunction
+
+
+function index = encode_log_Wo(Wo, bits)
+ Wo_levels = 2.^bits;
+ Wo_min = 2*pi/160;
+ Wo_max = 2*pi/20;
+
+ norm = (log10(Wo) - log10(Wo_min))/(log10(Wo_max) - log10(Wo_min));
+ index = floor(Wo_levels * norm + 0.5);
+ index = max(index, 0);
+ index = min(index, Wo_levels-1);
+endfunction
+
+
+function Wo = decode_log_Wo(index, bits)
+ Wo_levels = 2.^bits;
+ Wo_min = 2*pi/160;
+ Wo_max = 2*pi/20;
+
+ step = (log10(Wo_max) - log10(Wo_min))/Wo_levels;
+ Wo = log10(Wo_min) + step*index;
+
+ Wo = 10 .^ Wo;
+endfunction
+
+% Given a matrix with indexes on each row, convert to a bit stream and
+% write to file. We only write every 4th frame due to DIT
+
+function write_bit_stream_file(fn, ind_log, bits_per_param)
+ fbit = fopen(fn,"wb");
+ decimate = 4;
+
+ % take a row of quantiser indexes, convert to bits, save to file
+
+ [frames nind] = size(ind_log);
+ for f=1:decimate:frames
+ frame_of_bits = [];
+ arow = ind_log(f,:);
+ for i=1:nind
+ %printf("i: %d bits_per_param: %d\n", i, bits_per_param(i));
+ some_bits = index_to_bits(arow(i), bits_per_param(i));
+ frame_of_bits = [frame_of_bits some_bits];
+ end
+ fwrite(fbit, frame_of_bits, "uchar");
+ end
+ fclose(fbit);
+endfunction
+
+
+% convert index to binary bits
+
+function bits = index_to_bits(value, numbits)
+ levels = 2.^numbits;
+ bits = zeros(1, numbits);
+ for b=1:numbits
+ bits(b) = bitand(value,2^(numbits-b)) != 0;
+ end
+end
+
+
+function value = bits_to_index(bits, numbits)
+ value = 2.^(numbits-1:-1:0) * bits;
+endfunction
+
+
+% determine 4 voicing bits based on 2 decimated voicing bits
+
+function [v] = est_voicing_bits(v1, v5)
+ if v1 == v5
+ v(1:4) = v1;
+ else
+ v(1:2) = v1;
+ v(3:4) = v5;
+ end
+endfunction
plot_spectrum = 1;
dec_in_freq = 1;
dec_in_time = 0;
- vq_en = 1;
- mask_en = 1;
- k = 10;
+ vq_en = 0;
+ mask_en = 0;
% load up text files dumped from c2sim ---------------------------------------
[frames tmp] = size(model);
load vq;
- load d20_vq;
-
- prev_dk_ = zeros(1,2*k);
% Keyboard loop --------------------------------------------------------------
- key = ' ';
+ k = ' ';
do
figure(1);
clf;
%a_non_masked_m = find(AmdB > maskdB);
%maskdB = maskdB - 6;
%maskdB(a_non_masked_m) = maskdB(a_non_masked_m) + 6;
- plot(Am_freqs_kHz*1000, maskdB, ';mask;g');
+ %plot(Am_freqs_kHz*1000, maskdB, ';mask;g');
if mask_en
- AmdB_ = AmdB = maskdB;
+ AmdB_ = maskdB;
else
- AmdB_ = AmdB = AmdB;
+ AmdB_ = AmdB;
end
if dec_in_freq
[tmp1 tmp2 D] = decimate_in_freq(AmdB, 0);
- [AmdB_ AmdB_cyclic D_cyclic dk D1] = decimate_in_freq(AmdB_, 1, k);
if vq_en
- [AmdB_ AmdB_cyclic D_cyclic dk_ D1_ ind] = decimate_in_freq(AmdB, 1, k, vq);
- plot(Am_freqs_kHz*1000, AmdB_, ';vq;c');
+ [AmdB_ AmdB_cyclic D_cyclic dk_] = decimate_in_freq(AmdB, 1, 10, vq);
+ else
+ [AmdB_ AmdB_cyclic D_cyclic dk_] = decimate_in_freq(AmdB_, 1, 10);
end
- % experimental differential in time
- % get mask from 20ms ago (two frames), VQ delta, put back together.
-
- diff_dk = dk - prev_dk_;
- [res tmp vq_ind] = mbest(d20_vq, diff_dk, 1);
- dk_d = prev_dk_ + tmp;
- prev_dk_ = dk_d;
- AmdB_d_ = params_to_mask(L, k, dk_d, D1);
- plot(Am_freqs_kHz*1000, AmdB_d_, ';vq diff;bk');
-
- %plot(Am_freqs_kHz*1000, AmdB_cyclic, ';mask cyclic;b');
- %AmdB_pf = AmdB_*1.5;
- %AmdB_pf += max(AmdB_) - max(AmdB_pf);
- %plot(Am_freqs_kHz*1000, AmdB_, ';ind vq;g');
-
- % option decode from indexes, used to test effect of bit errors on Wo
-
- if 0
- Wo_ = pi*169/4000;
- L_ = floor(pi/Wo_);
- if vq_en
- [dk_ D1_] = index_to_params(ind, vq);
- end
- maskdB_ = params_to_mask(L_, k, dk_, D1_);
- plot((1:L_)*Wo_*4000/pi, maskdB_, ';ind vq;b-+');
- end
+ plot(Am_freqs_kHz*1000, AmdB_cyclic, ';mask cyclic;b');
+ plot(Am_freqs_kHz*1000, AmdB_, ';mask trunc;c');
+ AmdB_pf = AmdB_*(1.5);
+ AmdB_pf += mean(AmdB) - mean(AmdB_pf);
+ %max(AmdB_pf)-max(AmdB_)
+ %AmdB_pf -= max(AmdB_pf)-max(AmdB_);
end
- axis([0 4000 00 80]);
+ %AmdB_pf = AmdB_*(1.5);
+ %AmdB_pf += mean(AmdB) - mean(AmdB_pf);
+ AmdB_pf = AmdB_;
+ plot(Am_freqs_kHz*1000, AmdB_pf, ';mask trunc pf;g');
% Optional decimated parameters
% need to general model_ parameters either side
if dec_in_time
decimate = 4;
- model_ = set_up_model_(model, f, decimate, vq_en, vq);
- [maskdB_dit Wo_ L_] = decimate_frame_rate(model_, decimate, f, frames, Am_freqs_kHz);
- plot((1:L_)*Wo_*4000/pi, maskdB_dit, ';mask dit;b');
+ model_ = set_up_model_(model, f, decimate, vq_en, vq);
+ maskdB_dit = decimate_frame_rate(model_, decimate, f, frames, Am_freqs_kHz);
+ plot(Am_freqs_kHz*1000, maskdB_dit, ';mask dit;b');
end
hold off;
figure(5)
clf
- stem(dk,'b');
- hold on;
- stem(dk_,'g');
- hold off;
+ stem(dk_)
end
% interactive menu ------------------------------------------
printf("\rframe: %d menu: n-next b-back q-quit m-mask_en", f);
fflush(stdout);
- key = kbhit();
+ k = kbhit();
- if (key == 'm')
+ if (k == 'm')
if mask_en
mask_en = 0;
else
mask_en = 1;
end
endif
- if (key == 'n')
+ if (k == 'n')
f = f + 1;
endif
- if (key == 'b')
+ if (k == 'b')
f = f - 1;
endif
- until (key == 'q')
+ until (k == 'q')
printf("\n");
endfunction
right_f = left_f + decimate;
model_(left_f,:) = set_up_maskdB_(model, left_f, vq_en, vq);
- model_(left_f,:) = post_filter(model_(left_f,:));
model_(right_f,:) = set_up_maskdB_(model, right_f, vq_en, vq);
- model_(right_f,:) = post_filter(model_(right_f,:));
model_(f,1) = model(f,1); % Wo
model_(f,2) = model(f,2); % L
AmdB = 20*log10(Am);
[maskdB Am_freqs_kHz] = mask_model(AmdB, Wo, L);
+ a_non_masked_m = find(AmdB > maskdB);
+ maskdB = maskdB - 6;
+ maskdB(a_non_masked_m) = maskdB(a_non_masked_m) + 6;
+ if 0
if vq_en
- maskdB_ = decimate_in_freq(maskdB, 1, 10, vq);
+ maskdB_ = decimate_in_freq(maskdB, 1, 7, vq);
else
- maskdB_ = decimate_in_freq(maskdB, 1, 10);
+ maskdB_ = decimate_in_freq(maskdB, 1);
+ end
end
maskdB_ = maskdB;