From 7471ab54009599de5804c67ab54e6741b39fde67 Mon Sep 17 00:00:00 2001 From: drowe67 Date: Thu, 15 Dec 2016 21:03:03 +0000 Subject: [PATCH] fbf working set up for mel vq, getting some gd results with train.spc git-svn-id: https://svn.code.sf.net/p/freetel/code@2934 01035d8c-6547-0410-b346-abe4f91aad63 --- codec2-dev/octave/newamp.m | 88 ++++++++++++++++++-- codec2-dev/octave/newamp1_batch.m | 130 +++++++----------------------- codec2-dev/octave/newamp1_fbf.m | 81 ++++++++++++++----- 3 files changed, 172 insertions(+), 127 deletions(-) diff --git a/codec2-dev/octave/newamp.m b/codec2-dev/octave/newamp.m index 722d419b..f9bee1b3 100644 --- a/codec2-dev/octave/newamp.m +++ b/codec2-dev/octave/newamp.m @@ -263,7 +263,7 @@ function maskdB = resonator_fast(freq_tone_kHz, mask_sample_freqs_kHz) endfunction -% Alternative mask function that uses parabolas for fast computetion. +% Alternative mask function that uses parabolas for fast computation function maskdB = parabolic_resonator(freq_tone_kHz, mask_sample_freqs_kHz) @@ -323,7 +323,7 @@ endfunction % decimate frame rate of mask, use linear interpolation in the log domain -function maskdB_ = decimate_frame_rate(model, decimate, f, frames, mask_sample_freqs_kHz) +function maskdB_ = decimate_frame_rate(model, decimate, f, frames) max_amp = 80; Wo = model(f,1); @@ -342,7 +342,7 @@ function maskdB_ = decimate_frame_rate(model, decimate, f, frames, mask_sample_f left_fraction = 1 - mod((f-1),decimate)/decimate; right_fraction = 1 - left_fraction; - printf("f: %d left_f: %d right_f: %d left_fraction: %f right_fraction: %f \n", f, left_f, right_f, left_fraction, right_fraction) + printf("f: %d left_f: %d right_f: %d left_fraction: %3.2f right_fraction: %3.2f \n", f, left_f, right_f, left_fraction, right_fraction) % fit splines to left and right masks @@ -359,8 +359,8 @@ function maskdB_ = decimate_frame_rate(model, decimate, f, frames, mask_sample_f % determine mask for left and right frames, sampling at Wo for this frame sample_freqs_kHz = (1:L)*Wo*4/pi; - maskdB_left = interp1(left_sample_freqs_kHz, left_AmdB, sample_freqs_kHz); - maskdB_right = interp1(right_sample_freqs_kHz, right_AmdB, sample_freqs_kHz); + maskdB_left = interp1(left_sample_freqs_kHz, left_AmdB, sample_freqs_kHz, "extrap"); + maskdB_right = interp1(right_sample_freqs_kHz, right_AmdB, sample_freqs_kHz, "extrap"); maskdB_ = left_fraction*maskdB_left + right_fraction*maskdB_right; endfunction @@ -916,7 +916,7 @@ function [AmdB_ residual fvec fvec_ amps] = piecewise_model(AmdB, Wo, vq, vq_m) amp(3) = AmdB(mx_ind); AmdB_ = max(AmdB_, parabolic_resonator(mx_ind*Wo*4/pi, mask_sample_freqs_kHz) + amp(3)); fr3 = mx_ind*Wo*4/pi; - + % 4th resonator fmin = fr3 - 0.300; @@ -1014,3 +1014,79 @@ function lmin = abys(AmdB_, AmdB, Wo, L, mask_sample_freqs_kHz) plot(mask_sample_freqs_kHz*1000, e) endfunction + +% Non linear sampling of frequency axis, reducing the "rate" is a +% first step before VQ + +function mel = ftomel(fHz) + mel = floor(2595*log10(1+fHz/700)+0.5); +endfunction + + +function [rate_K_surface rate_K_sample_freqs_kHz] = resample_const_rate_f_mel(model, K) + [frames nc] = size(model); + mel_start = ftomel(200); mel_end = ftomel(3700); + step = (mel_end-mel_start)/(K-1); + mel = mel_start:step:mel_end; + rate_K_sample_freqs_Hz = 700*((10 .^ (mel/2595)) - 1); + rate_K_sample_freqs_kHz = rate_K_sample_freqs_Hz/1000; + + rate_K_surface = resample_const_rate_f(model, rate_K_sample_freqs_kHz); +endfunction + + +% Resample Am from time-varying rate L=floor(pi/Wo) to fixed rate K. This can be viewed +% as a 3D surface with time, freq, and ampitude axis. + +function [rate_K_surface rate_K_sample_freqs_kHz] = resample_const_rate_f(model, rate_K_sample_freqs_kHz) + + % convert rate L=pi/Wo amplitude samples to fixed rate K + + max_amp = 80; + [frames col] = size(model); + K = length(rate_K_sample_freqs_kHz); + rate_K_surface = zeros(frames, K); + + + for f=1:frames + Wo = model(f,1); + L = min([model(f,2) max_amp-1]); + Am = model(f,3:(L+2)); + AmdB = 20*log10(Am); + %pre = 10*log10((1:L)*Wo*4/(pi*0.3)); + %AmdB += pre; + + % clip between peak and peak -50dB, to reduce dynamic range + + AmdB_peak = max(AmdB); + AmdB(find(AmdB < (AmdB_peak-50))) = AmdB_peak-50; + + rate_L_sample_freqs_kHz = (1:L)*Wo*4/pi; + + rate_K_surface(f,:) = interp1(rate_L_sample_freqs_kHz, AmdB, rate_K_sample_freqs_kHz, "spline", "extrap"); + + %printf("\r%d/%d", f, frames); + end + %printf("\n"); +endfunction + + +% Take a rate K surface and convert back to time varying rate L + +function [model_ AmdB_] = resample_rate_L(model, rate_K_surface, rate_K_sample_freqs_kHz) + max_amp = 80; + [frames col] = size(model); + + model_ = zeros(frames, max_amp+3); + for f=1:frames + Wo = model(f,1); + L = model(f,2); + rate_L_sample_freqs_kHz = (1:L)*Wo*4/pi; + + % back down to rate L + + AmdB_ = interp1(rate_K_sample_freqs_kHz, rate_K_surface(f,:), rate_L_sample_freqs_kHz, "spline", 0); + + model_(f,1) = Wo; model_(f,2) = L; model_(f,3:(L+2)) = 10 .^ (AmdB_(1:L)/20); + end +endfunction diff --git a/codec2-dev/octave/newamp1_batch.m b/codec2-dev/octave/newamp1_batch.m index 8b294385..c63641b6 100644 --- a/codec2-dev/octave/newamp1_batch.m +++ b/codec2-dev/octave/newamp1_batch.m @@ -43,9 +43,18 @@ function surface = newamp1_batch(samname, optional_Am_out_name, optional_Aw_out_ %model_ = experiment_filter(model); %model_ = experiment_filter_dec_filter(model); - [model_ surface] = experiment_mel_freq(model, 0); + + [model_ surface] = experiment_mel_freq(model, 1); + + % extract energy + + % interpolate + + model_ = experiment_dec_linear(model_); + + % add energy back in + %[model_ surface] = experiment_mel_diff_freq(model, 0); - %model_ = experiment_dec_linear(model_); %[model_ rate_K_surface] = experiment_closed_loop_mean(model); % ---------------------------------------------------- @@ -70,8 +79,9 @@ function surface = newamp1_batch(samname, optional_Am_out_name, optional_Aw_out_ Am_ = zeros(1,max_amp); Am_(2:L) = Am(1:L-1); - % post filter, boosts higher amplitudes more than lower, improving - % shape of formants and reducing muffling. Note energy normalisation + % optional post filter on linear {Am}, boosts higher amplitudes more than lower, + % improving shape of formants and reducing muffling. Note energy + % normalisation if postfilter e1 = sum(Am_(2:L).^2); @@ -90,43 +100,28 @@ function surface = newamp1_batch(samname, optional_Am_out_name, optional_Aw_out_ endfunction -% Non linear sampling of frequency axis, reducing the "rate" is a -% first step before VQ - -function mel = ftomel(fHz) - mel = floor(2595*log10(1+fHz/700)+0.5); -endfunction - - -function [rate_K_surface rate_K_sample_freqs_kHz] = resample_const_rate_f_mel(model, K) - [frames nc] = size(model); - mel_start = ftomel(200); mel_end = ftomel(3700); - step = (mel_end-mel_start)/(K-1); - mel = mel_start:step:mel_end; - rate_K_sample_freqs_Hz = 700*((10 .^ (mel/2595)) - 1); - rate_K_sample_freqs_kHz = rate_K_sample_freqs_Hz/1000; - - rate_K_surface = resample_const_rate_f(model, rate_K_sample_freqs_kHz); -endfunction +% experiment to resample freq axis on mel scale, then optionally vq -function [model_ rate_K_surface] = experiment_mel_freq(model, vq_en=0) +function [model_ rate_K_surface] = experiment_mel_freq(model, vq_en=0, plots=1) [frames nc] = size(model); K = 20; - [rate_K_surface rate_K_sample_freqs_kHz] = resample_const_rate_f_mel(model, K); + [rate_K_surface rate_K_sample_freqs_kHz] = resample_const_rate_f_mel(model, K); - figure(1); clf; mesh(rate_K_surface); + if plots + figure(1); clf; mesh(rate_K_surface); + end + + for f=1:frames + mean_f(f) = mean(rate_K_surface(f,:)); + rate_K_surface_no_mean(f,:) = rate_K_surface(f,:) - mean_f(f); + end if vq_en melvq; - load surface_vq; m=5; - - for f=1:frames - mean_f(f) = mean(rate_K_surface(f,:)); - rate_K_surface_no_mean(f,:) = rate_K_surface(f,:) - mean_f(f); - end - - [res rate_K_surface_ ind] = mbest(surface_vq, rate_K_surface_no_mean, m); + load train_120_vq; m=5; + + [res rate_K_surface_ ind] = mbest(train_120_vq, rate_K_surface_no_mean, m); % pf, needs some energy equalisation, does gd things for hts1a rate_K_surface_ *= 1.2; @@ -149,9 +144,10 @@ function [model_ rate_K_surface] = experiment_mel_freq(model, vq_en=0) model_ = resample_rate_L(model, rate_K_surface, rate_K_sample_freqs_kHz); - %figure(2); clf; mesh(model_); + if plots + figure(2); clf; mesh(rate_K_surface_no_mean); + end - for f=1:frames rate_K_surface(f,:) -= mean(rate_K_surface(f,:)); end @@ -430,70 +426,6 @@ endfunction -% Resample Am from time-varying rate L=floor(pi/Wo) to fixed rate K. This can be viewed -% as a 3D surface with time, freq, and ampitude axis. - -function [rate_K_surface rate_K_sample_freqs_kHz] = resample_const_rate_f(model, K=50) - - % convert rate L=pi/Wo amplitude samples to fixed rate K - - max_amp = 80; - [frames col] = size(model); - rate_K_sample_freqs_kHz = (1:K)*4/K; - rate_K_surface = zeros(frames, K); - - for f=1:frames - Wo = model(f,1); - L = min([model(f,2) max_amp-1]); - Am = model(f,3:(L+2)); - AmdB = 20*log10(Am); - rate_L_sample_freqs_kHz = (1:L)*Wo*4/pi; - - rate_K_surface(f,:) = interp1(rate_L_sample_freqs_kHz, AmdB, rate_K_sample_freqs_kHz, "spline", "extrap"); - end -endfunction - - -function [rate_K_surface rate_K_sample_freqs_kHz] = resample_const_rate_f(model, rate_K_sample_freqs_kHz) - - % convert rate L=pi/Wo amplitude samples to fixed rate K - - max_amp = 80; - [frames col] = size(model); - K = length(rate_K_sample_freqs_kHz); - rate_K_surface = zeros(frames, K); - - for f=1:frames - Wo = model(f,1); - L = min([model(f,2) max_amp-1]); - Am = model(f,3:(L+2)); - AmdB = 20*log10(Am); - rate_L_sample_freqs_kHz = (1:L)*Wo*4/pi; - - rate_K_surface(f,:) = interp1(rate_L_sample_freqs_kHz, AmdB, rate_K_sample_freqs_kHz, "spline", 0); - end -endfunction - - -% Take a rate K surface and convert back to time varying rate L - -function model_ = resample_rate_L(model, rate_K_surface, rate_K_sample_freqs_kHz) - max_amp = 80; - [frames col] = size(model); - - model_ = zeros(frames, max_amp+3); - for f=1:frames-1 - Wo = model(f,1); - L = min(pi/Wo, max_amp-1); - rate_L_sample_freqs_kHz = (1:L)*Wo*4/pi; - - % back down to rate L - - AmdB_ = interp1(rate_K_sample_freqs_kHz, rate_K_surface(f,:), rate_L_sample_freqs_kHz, "spline", 0); - - model_(f,1) = Wo; model_(f,2) = L; model_(f,3:(L+2)) = 10 .^ (AmdB_(1:L)/20); - end -endfunction % early test, devised to test rate K<->L changes along frequency axis diff --git a/codec2-dev/octave/newamp1_fbf.m b/codec2-dev/octave/newamp1_fbf.m index 1cc8e4ca..a80aa1da 100644 --- a/codec2-dev/octave/newamp1_fbf.m +++ b/codec2-dev/octave/newamp1_fbf.m @@ -19,9 +19,10 @@ function newamp1_fbf(samname, f=10) newamp; more off; - quant_en = 0; + quant_en = 0; pf_en = 0; + melvq; - load vq; + K=20; load train_120_vq; m=5; % load up text files dumped from c2sim --------------------------------------- @@ -40,7 +41,6 @@ function newamp1_fbf(samname, f=10) figure(1); clf; s = [ Sn(2*f-1,:) Sn(2*f,:) ]; - size(s); plot(s); axis([1 length(s) -20000 20000]); title('Time Domain Speech'); @@ -51,45 +51,68 @@ function newamp1_fbf(samname, f=10) AmdB = 20*log10(Am); Am_freqs_kHz = (1:L)*Wo*4/pi; - #{ - [maskdB Am_freqs_kHz] = mask_model(AmdB, Wo, L); - AmdB_ = maskdB; - [mx mx_ind] = max(AmdB_); - AmdB_(mx_ind) += 6; - #} + % plots for mel sampling - if quant_en - [AmdB_ residual fvec fvec_] = piecewise_model(AmdB, Wo, vq, 2); + [rate_K_vec rate_K_sample_freqs_kHz] = resample_const_rate_f_mel(model(f,:), K); + + mean_f = mean(rate_K_vec); + rate_K_vec_no_mean = rate_K_vec - mean_f; + + if quant_en == 2 + [res rate_K_vec_no_mean_ ind] = mbest(train_120_vq, rate_K_vec_no_mean, m); else - [AmdB_ residual fvec] = piecewise_model(AmdB, Wo); + rate_K_vec_no_mean_ = rate_K_vec_no_mean; end + if pf_en + % pf, needs some energy equalisation, does gd things for hts1a + rate_K_surface_ *= 1.2; + end + + rate_K_vec_ = rate_K_vec_no_mean_ + mean_f; + [model_ AmdB_] = resample_rate_L(model(f,:), rate_K_vec_, rate_K_sample_freqs_kHz); + + % plots ---------------------------------- + figure(2); clf; - title('Frequency Domain'); + title('Frequency Domain 1'); axis([1 4000 -20 80]); hold on; - plot((1:L)*Wo*4000/pi, AmdB,";Am;r+-"); - plot(Am_freqs_kHz*1000, AmdB_, ';model;c'); - plot(fvec*1000, 60*ones(1,4), ';fvec;go'); - if quant_en - plot(fvec_*1000, 60*ones(1,4), ';fvec q;ro'); + plot((1:L)*Wo*4000/pi, AmdB,";Am;b+-"); + plot(rate_K_sample_freqs_kHz*1000, rate_K_vec, ';rate K mel;g+-'); + if quant_en >= 1 + plot((1:L)*Wo*4000/pi, AmdB_,";Am quant;k+-"); + end + if quant_en == 2 + plot(rate_K_sample_freqs_kHz*1000, rate_K_vec_, ';rate K mel quant;r+-'); end hold off; + figure(3); + clf; + title('Frequency Domain 2'); + axis([1 4000 -80 80]); + hold on; + plot((1:L)*Wo*4000/pi, AmdB,";Am;b+-"); + plot(rate_K_sample_freqs_kHz*1000, rate_K_vec_no_mean, ';rate K mel no mean;g+-'); + if quant_en == 2 + plot(rate_K_sample_freqs_kHz*1000, rate_K_vec_no_mean_, ';rate K mel no mean quant;r+-'); + end + hold off; + % interactive menu ------------------------------------------ - printf("\rframe: %d menu: n-next b-back q-quit m-quant_en", f); + printf("\rframe: %d menu: n-next b-back q-quit m-quant_en[%d]", f, quant_en); fflush(stdout); k = kbhit(); if (k == 'm') - if quant_en + quant_en++; + if quant_en > 2 quant_en = 0; - else - quant_en = 1; end endif if (k == 'n') @@ -104,3 +127,17 @@ function newamp1_fbf(samname, f=10) endfunction +#{ Piecewise model stuff, organise later if rqd + [maskdB Am_freqs_kHz] = mask_model(AmdB, Wo, L); + AmdB_ = maskdB; + [mx mx_ind] = max(AmdB_); + AmdB_(mx_ind) += 6; + + + if quant_en + [AmdB_ residual fvec fvec_] = piecewise_model(AmdB, Wo, vq, 1); + else + [AmdB_ residual fvec] = piecewise_model(AmdB, Wo); + end + fvec +#} -- 2.25.1