From aee0f395a0e4f74e314010719bbb59d804a03a3f Mon Sep 17 00:00:00 2001 From: drowe67 Date: Sat, 31 Dec 2016 00:43:21 +0000 Subject: [PATCH] first pass at decoder refactored to run frame by frame git-svn-id: https://svn.code.sf.net/p/freetel/code@2950 01035d8c-6547-0410-b346-abe4f91aad63 --- codec2-dev/octave/newamp1_batch.m | 113 +++++++++++++++++++++++++++- codec2-dev/octave/newamp1_compare.m | 14 ++-- 2 files changed, 120 insertions(+), 7 deletions(-) diff --git a/codec2-dev/octave/newamp1_batch.m b/codec2-dev/octave/newamp1_batch.m index d002712c..90346ee6 100644 --- a/codec2-dev/octave/newamp1_batch.m +++ b/codec2-dev/octave/newamp1_batch.m @@ -72,7 +72,8 @@ function surface = newamp1_batch(input_prefix, output_prefix) %model_ = experiment_dec_abys(model, 8, 1, 1, 1, voicing); [model_ voicing_ indexes] = experiment_rate_K_dec(model, voicing); % encoder/decoder, lets toss away results except for indexes - [model_ voicing_] = model_from_indexes(indexes); % decoder uses just indexes, outputs vecs for synthesis + %[model_ voicing_] = model_from_indexes(indexes); % decoder uses just indexes, outputs vecs for synthesis + [model_ voicing_] = model_from_indexes_fbf(indexes); % decoder uses just indexes, outputs vecs for synthesis %model_ = experiment_dec_linear(model_); %model_ = experiment_energy_rate_linear(model, 1, 0); @@ -411,6 +412,116 @@ function [model_ voicing_] = model_from_indexes(indexes) endfunction +% --------------------------------------------------------------------------------------- +% Stand alone decoder that takes indexes and creates model_, just like +% model_from_indexes above. This version is refactored to perform frame by frame +% processing, as a stepping stone to C. + +function [model_ voicing_] = model_from_indexes_fbf(indexes) + max_amp = 80; K = 20; M = 4; + + [frames nc] = size(indexes); + model = model_ = zeros(frames, max_amp+3); + sample_freqs_kHz = mel_sample_freqs_kHz(K); + energy_q = 10 + 40/16*(0:15); + + melvq; + load train_120_vq; + + surface_no_mean_ = zeros(frames,K); + surface_ = zeros(frames, K); + interpolated_surface_ = zeros(frames, K); + voicing = zeros(1, frames); + voicing_ = zeros(1, frames); + + for f=1:M:frames + % decode vector quantised surface + + surface_no_mean_(f,:) = train_120_vq(indexes(f,1),:,1) + train_120_vq(indexes(f,2),:,2); + surface_no_mean_(f,:) = post_filter(surface_no_mean_(f,:), sample_freqs_kHz, 1.5); + mean_f_ = energy_q(indexes(f,3)+1); + surface_(f,:) = surface_no_mean_(f,:) + mean_f_; + + % break into segments of M frames. We have 2 samples spaced M apart + % and interpolate the rest. + + if f > M + left_vec = surface_(f-M,:); + right_vec = surface_(f,:); + sample_points = [f-M f]; + resample_points = f-M:f-1; + for k=1:K + interpolated_surface_(resample_points,k) = interp_linear(sample_points, [left_vec(k) right_vec(k)], resample_points); + end + end + + % recover Wo and voicing + + if indexes(f,4) == 0 + voicing(f) = 0; + model(f,1) = 2*pi/100; + else + voicing(f) = 1; + model(f,1) = decode_log_Wo(indexes(f,4), 6); + end + + if f > M + Wo1 = model(f-M,1); + Wo2 = model(f,1); + + [Wo_ avoicing_] = interp_Wo_v(Wo1, Wo2, voicing(f-M), voicing(f)); + model_(f-M:f-1,1) = Wo_; + voicing_(f-M:f-1) = avoicing_; + model_(f-M:f-1,2) = floor(pi ./ model_(f-M:f-1,1)); % calculate L for each interpolated Wo + end + + end + + model_(frames-M:frames,1) = pi/100; % set end frames to something sensible + model_(frames-M:frames,2) = floor(pi ./ model_(frames-M:frames,1)); + + model_ = resample_rate_L(model_, interpolated_surface_, sample_freqs_kHz); + +endfunction + + +function [Wo_ voicing_] = interp_Wo_v(Wo1, Wo2, voicing1, voicing2) + M = 4; + max_amp = 80; + + Wo_ = zeros(1,M); + voicing_ = zeros(1,M); + if !voicing1 && !voicing2 + Wo_(1:M) = 2*pi/100; + end + + if voicing1 && !voicing2 + Wo_(1:M/2) = Wo1; + Wo_(M/2+1:M) = 2*pi/100; + voicing_(1:M/2) = 1; + end + + if !voicing1 && voicing2 + Wo_(1:M/2) = 2*pi/100; + Wo_(M/2+1:M) = Wo2; + voicing_(M/2+1:M) = 1; + end + + if voicing1 && voicing2 + Wo_samples = [Wo1 Wo2]; + Wo_(1:M) = interp_linear([1 M+1], Wo_samples, 1:M); + voicing_(1:M) = 1; + end + + #{ + printf("f: %d f+M/2: %d Wo: %f %f (%f %%) v: %d %d \n", f, f+M/2, model(f,1), model(f+M/2,1), 100*abs(model(f,1) - model(f+M/2,1))/model(f,1), voicing(f), voicing(f+M/2)); + for i=f:f+M/2-1 + printf(" f: %d v: %d v_: %d Wo: %f Wo_: %f\n", i, voicing(i), voicing_(i), model(i,1), model_(i,1)); + end + #} +endfunction + + % --------------------------------------------------------------------------------------- % Various experiments tried during development diff --git a/codec2-dev/octave/newamp1_compare.m b/codec2-dev/octave/newamp1_compare.m index 497c7aaa..eac9ecde 100644 --- a/codec2-dev/octave/newamp1_compare.m +++ b/codec2-dev/octave/newamp1_compare.m @@ -15,9 +15,11 @@ function newamp1_compare(prefixa, prefixb) b = load_params(prefixb, frames); check(a.Am, b.Am, 'Am'); - check(a.Aw, b.Aw, 'Aw'); + check(a.Hm, b.Hm, 'Hm'); check(a.Wo, b.Wo, 'Wo'); check(a.v, b.v, 'v'); + + figure(1); clf; plot(a.v); hold on; plot(a.v-b.v,'r'); hold off; endfunction @@ -29,22 +31,22 @@ function params = load_params(prefix, frames) fam = fopen(Am_out_name,"rb"); Wo_out_name = sprintf("%s_Wo.out", prefix); fWo = fopen(Wo_out_name,"rb"); - Aw_out_name = sprintf("%s_aw.out", prefix); - faw = fopen(Aw_out_name,"rb"); + Hm_out_name = sprintf("%s_hm.out", prefix); + fhm = fopen(Hm_out_name,"rb"); % load up values from binary files params.Am = zeros(frames, max_amp); params.Wo = zeros(frames, 1); params.v = zeros(frames, 1); - params.Aw = zeros(frames, fft_enc); + params.Hm = zeros(frames, max_amp); for f=1:frames params.Am(f,:) = fread(fam, max_amp, "float32"); params.Wo(f) = fread(fWo, 1, "float32"); - params.Aw(f,:) = fread(faw, fft_enc, "float32"); + params.Hm(f,:) = fread(fhm, max_amp, "float32"); end - fclose(fam); fclose(fWo); fclose(faw); + fclose(fam); fclose(fWo); fclose(fhm); % voicing is a text file -- 2.25.1