From 0f540a51fb740cc47d1b1932e8b1e76c4ec0c1ad Mon Sep 17 00:00:00 2001 From: drowe67 Date: Tue, 20 Dec 2016 05:34:29 +0000 Subject: [PATCH] chased down a bug with interpolated Wo and vocicing, no looking gd for hts1a and hts2a git-svn-id: https://svn.code.sf.net/p/freetel/code@2937 01035d8c-6547-0410-b346-abe4f91aad63 --- codec2-dev/octave/newamp1_batch.m | 128 ++++++++++++++++++++++++++++-- codec2-dev/src/c2sim.c | 47 ++++++++++- 2 files changed, 166 insertions(+), 9 deletions(-) diff --git a/codec2-dev/octave/newamp1_batch.m b/codec2-dev/octave/newamp1_batch.m index 52479994..04c3d334 100644 --- a/codec2-dev/octave/newamp1_batch.m +++ b/codec2-dev/octave/newamp1_batch.m @@ -38,7 +38,7 @@ function surface = newamp1_batch(samname, optional_Am_out_name, optional_Aw_out_ model_name = strcat(samname,"_model.txt"); model = load(model_name); - [frames nc] = size(model) + [frames nc] = size(model); voicing_name = strcat(samname,"_pitche.txt"); voicing = zeros(1,frames); @@ -54,8 +54,9 @@ function surface = newamp1_batch(samname, optional_Am_out_name, optional_Aw_out_ %model_ = experiment_filter_dec_filter(model); %[model_ surface] = experiment_mel_freq(model, 1, 1, voicing); - model_ = experiment_dec_abys(model, 8, 1, 1, 1, voicing); - + %model_ = experiment_dec_abys(model, 8, 1, 1, 1, voicing); + [model_ voicing_] = experiment_rate_K_dec(model, voicing); + %model_ = experiment_dec_linear(model_); %model_ = experiment_energy_rate_linear(model, 1, 0); @@ -122,6 +123,18 @@ function surface = newamp1_batch(samname, optional_Am_out_name, optional_Aw_out_ if synth_phase fclose(faw); end + + % save voicing file + + if exist("voicing_", "var") + v_out_name = sprintf("%s_v.txt", samname); + fv = fopen(v_out_name,"wt"); + for f=1:length(voicing_) + fprintf(fv,"%d\n", voicing_(f)); + end + fclose(fv); + end + printf("\n") endfunction @@ -312,9 +325,112 @@ function model_ = experiment_dec_linear(model) endfunction -% Experimental AbyS decimator that chooses best frames to match surface -% based on AbyS approach. Can apply post filter at different points, -% and optionally do fixed decimation, at rate K. +% Linear decimator/interpolator that operates at rate K, includes VQ, post filter, and Wo/E +% quantisation. Evevoled from abys decimator below. + +function [model_ voicing_ ] = experiment_rate_K_dec(model, voicing) + max_amp = 80; + [frames nc] = size(model); + model_ = zeros(frames, max_amp+3); + + M = 8; + + % create frames x K surface. TODO make all of this operate frame by + % frame, or at least M/2=4 frames rather than one big chunk + + K = 20; + [surface sample_freqs_kHz] = resample_const_rate_f_mel(model, K); + target_surface = surface; + + % VQ rate K surface. TODO: If we are decimating by M/2=4 we really + % only need to do this every 4th frame. + + melvq; + load train_120_vq; m=5; + + for f=1:frames + mean_f(f) = mean(surface(f,:)); + surface_no_mean(f,:) = surface(f,:) - mean_f(f); + end + + [res surface_no_mean_ ind] = mbest(train_120_vq, surface_no_mean, m); + + for f=1:frames + surface_no_mean_(f,:) = post_filter(surface_no_mean_(f,:), sample_freqs_kHz, 1.5); + end + + surface_ = zeros(frames, K); + for f=1:frames + surface_(f,:) = surface_no_mean_(f,:) + mean_f(f); + end + + % break into segments of M frames. We have 3 samples in M frame + % segment spaced M/2 apart and interpolate the rest. This evolved + % from AbyS scheme below but could be simplified to simple linear + % interpolation, or using 3 or 4 points but shift of M/2=4 frames. + + interpolated_surface_ = zeros(frames, K); + for f=1:M:frames-M + left_vec = surface_(f,:); + m = f+M/2; + centre_vec = surface_(m,:); + right_vec = surface_(f+M,:); + sample_points = [f m f+M]; + resample_points = f:f+M-1; + for k=1:K + interpolated_surface_(resample_points,k) = interp1(sample_points, [left_vec(k) centre_vec(k) right_vec(k)], resample_points, "spline", 0); + end + end + + % break into M/2 segments for purposes of Wo interpolation + + voicing_ = zeros(1, frames); + for f=1:M/2:frames-M/2 + + if !voicing(f) && !voicing(f+M/2) + model_(f:f+M/2-1,1) = 2*pi/100; + end + + if voicing(f) && !voicing(f+M/2) + model_(f:f+M/4-1,1) = model(f,1); + model_(f+M/4:f+M/2-1,1) = 2*pi/100; + voicing_(f:f+M/4-1) = 1; + end + + if !voicing(f) && voicing(f+M/2) + model_(f:f+M/4-1,1) = 2*pi/100; + model_(f+M/4:f+M/2-1,1) = model(f+M/2,1); + voicing_(f+M/4:f+M/2-1) = 1; + end + + if voicing(f) && voicing(f+M/2) + Wo_samples = [model(f,1) model(f+M/2,1)]; + model_(f:f+M/2-1,1) = interp1([f f+M/2], Wo_samples, f:f+M/2-1, "linear", 0); + voicing_(f:f+M/2-1) = 1; + end + + printf("f: %d f+M/2: %d Wo: %f %f (%f %%) v: %d %d \n", f, f+M/2, model(f,1), model(f+M/2,1), 100*abs(model(f,1) - model_(f+M/2,1))/model(f,1), voicing(f), voicing(f+M/2)); + for i=f:f+M/2-1 + printf(" f: %d v: %d v_: %d Wo: %f Wo_: %f\n", i, voicing(i), voicing_(i), model(i,1), model_(i,1)); + end + end + model_(frames-M/2:frames,1) = pi/100; % set end frames to something sensible + + voicing_ = voicing; + model_(:,1) = model(:,1); + %model_(221:225,1) = model(221:225,1); + %model_(223:224,1) = model(223:224,1); + model_(:,2) = floor(pi ./ model_(:,1)); % calculate L for each interpolated Wo + model_ = resample_rate_L(model_, interpolated_surface_, sample_freqs_kHz); + +endfunction + + +% Experimental AbyS decimator that chooses best frames to match +% surface based on AbyS approach. Can apply post filter at different +% points, and optionally do fixed decimation, at rate K. Didn't +% produce anything spectacular in AbyS mode, suggest anotehr look with +% some sort of fbf display to see what's going on internally. function model_ = experiment_dec_abys(model, M=8, vq_en=0, pf_en=1, fixed_dec=0, voicing) max_amp = 80; diff --git a/codec2-dev/src/c2sim.c b/codec2-dev/src/c2sim.c index 4bd7c6ea..9a15be9b 100644 --- a/codec2-dev/src/c2sim.c +++ b/codec2-dev/src/c2sim.c @@ -53,6 +53,7 @@ void synth_one_frame(codec2_fftr_cfg fftr_inv_cfg, short buf[], MODEL *model, float Sn_[], float Pn[], int prede, float *de_mem, float gain); void print_help(const struct option *long_options, int num_opts, char* argv[]); +static void ear_protection(float in_out[], int n); /*---------------------------------------------------------------------------*\ @@ -547,8 +548,8 @@ int main(int argc, char *argv[]) //printf("snr %3.2f v: %d Wo: %f prev_Wo: %f\n", snr, model.voiced, // model.Wo, prev_uq_Wo); #ifdef DUMP - dump_Sw_(Sw_); - dump_Ew(Ew); + //dump_Sw_(Sw_); + //dump_Ew(Ew); dump_snr(snr); #endif @@ -770,6 +771,7 @@ int main(int argc, char *argv[]) if (Woread) { int ret = fread(&model.Wo, sizeof(float), 1, fWo); + model.L = floor(PI/model.Wo); assert(ret == 1); } @@ -825,7 +827,7 @@ int main(int argc, char *argv[]) #endif } - /* optionally rewad in Aw FFT vector, we really only care about the phase + /* optionally read in Aw FFT vector, we really only care about the phase of each entry, used for reading in phases generated by Octave */ if (awread) { @@ -837,6 +839,7 @@ int main(int argc, char *argv[]) assert(ret == FFT_ENC); } + fprintf(stderr, "frame: %d Wo: %f L: %d v: %d\n", frames, model_dec[i].Wo, model_dec[i].L, model_dec[i].voiced); if (phase0) phase_synth_zero_order(fft_fwd_cfg, &model_dec[i], ex_phase, Aw); if (postfilt) @@ -944,3 +947,41 @@ void print_help(const struct option* long_options, int num_opts, char* argv[]) exit(1); } +/*---------------------------------------------------------------------------*\ + + FUNCTION....: ear_protection() + AUTHOR......: David Rowe + DATE CREATED: Nov 7 2012 + + Limits output level to protect ears when there are bit errors or the input + is overdriven. This doesn't correct or mask bit errors, just reduces the + worst of their damage. + +\*---------------------------------------------------------------------------*/ + +static void ear_protection(float in_out[], int n) { + float max_sample, over, gain; + int i; + + /* find maximum sample in frame */ + + max_sample = 0.0; + for(i=0; i max_sample) + max_sample = in_out[i]; + + /* determine how far above set point */ + + over = max_sample/30000.0; + + /* If we are x dB over set point we reduce level by 2x dB, this + attenuates major excursions in amplitude (likely to be caused + by bit errors) more than smaller ones */ + + if (over > 1.0) { + gain = 1.0/(over*over); + //fprintf(stderr, "gain: %f\n", gain); + for(i=0; i