From 245403ef64dd22ddcc33d6a4ce79a0fe2d02436f Mon Sep 17 00:00:00 2001 From: drowe67 Date: Thu, 12 Jan 2017 21:59:43 +0000 Subject: [PATCH] debugging phase on decode using c2sim git-svn-id: https://svn.code.sf.net/p/freetel/code@2962 01035d8c-6547-0410-b346-abe4f91aad63 --- codec2-dev/octave/tnewamp1.m | 243 +++++++---------------------------- codec2-dev/src/c2sim.c | 76 ++++------- 2 files changed, 74 insertions(+), 245 deletions(-) diff --git a/codec2-dev/octave/tnewamp1.m b/codec2-dev/octave/tnewamp1.m index 7e684709..34c87476 100644 --- a/codec2-dev/octave/tnewamp1.m +++ b/codec2-dev/octave/tnewamp1.m @@ -26,6 +26,12 @@ octave:1> tnewamp1("../build_linux/src/hts1a") + 5/ Optionally listen to output + + ~/codec2-dev/build_linux/src$ ./c2sim ../../raw/hts1a.raw --phase0 --postfilter \ + --amread hts1a_am.out --hmread hts1a_hm.out \ + --Woread hts1a_Wo.out --hand_voicing hts1a_v.txt -o - \ + | play -q -t raw -r 8000 -s -2 - #} @@ -92,9 +98,11 @@ function tnewamp1(input_prefix) % break into segments of M frames. We have 2 samples spaced M apart % and interpolate the rest. - Nfft_phase = 128; + Nfft_phase = 128; % note this needs to be 512 (FFT_ENC in codec2 if using --awread) + % with --hmread 128 is preferred as less memory/CPU model_ = zeros(frames, max_amp+2); voicing_ = zeros(1,frames); + Aw = zeros(frames, Nfft_phase); H = zeros(frames, max_amp); model_(1,1) = Wo_left = 2*pi/100; voicing_left = 0; @@ -133,10 +141,10 @@ function tnewamp1(input_prefix) for k=f-M:f-1 model_(k,:) = resample_rate_L(model_(k,:), interpolated_surface_(k,:), sample_freqs_kHz); - phase = determine_phase(model_, k, Nfft_phase); + Aw(k,:) = determine_phase(model_, k, Nfft_phase); for m=1:model_(k,2) b = round(m*model_(k,1)*Nfft_phase/(2*pi)); % map harmonic centre to DFT bin - H(k,m) = exp(-j*phase(b+1)); + H(k,m) = exp(j*Aw(k, b+1)); end end end @@ -148,24 +156,12 @@ function tnewamp1(input_prefix) left_vec = right_vec; end - - %model_(1,1:77) - %model__c(1,1:77) - %sum(model_(1,1:77)-model__c(1,1:77)) - %[mx mxi] = max(model_(1,1:77)-model__c(1,1:77)) - - %interpolated_surface_(1,:) - %interpolated_surface__c(1,:) - %sum(interpolated_surface_(1,:) - interpolated_surface__c(1,:)) - - - %Hm(2,:) - Hm_c(2,:) - figure(1); + figure(1); clf; mesh(angle(H)); - figure(2); + figure(2); clf; mesh(angle(H_c)); - figure(3); + figure(3); clf; mesh(abs(H - H_c)); check(rate_K_surface, rate_K_surface_c, 'rate_K_surface', 0.01); @@ -177,7 +173,20 @@ function tnewamp1(input_prefix) check(model_(:,3:max_amp+2), model__c(:,3:max_amp+2), 'rate L Am surface ', 0.1); check(H, H_c, 'phase surface'); - #{ + % Save to disk to check synthesis is OK with c2sim + + output_prefix = input_prefix; + Am_out_name = sprintf("%s_am.out", output_prefix); + fam = fopen(Am_out_name,"wb"); + + Wo_out_name = sprintf("%s_Wo.out", output_prefix); + fWo = fopen(Wo_out_name,"wb"); + + Aw_out_name = sprintf("%s_aw.out", output_prefix); + faw = fopen(Aw_out_name,"wb"); + + Hm_out_name = sprintf("%s_hm.out", output_prefix); + fhm = fopen(Hm_out_name,"wb"); for f=1:frames printf("%d ", f); @@ -188,193 +197,37 @@ function tnewamp1(input_prefix) Am_ = zeros(1,max_amp); Am_(2:L) = Am(1:L-1); - % optional post filter on linear {Am}, boosts higher amplitudes more than lower, - % improving shape of formants and reducing muffling. Note energy - % normalisation - - if postfilter - e1 = sum(Am_(2:L).^2); - Am_(2:L) = Am_(2:L) .^ 1.5; - e2 = sum(Am_(2:L).^2); - Am_(2:L) *= sqrt(e1/e2); - end - fwrite(fam, Am_, "float32"); fwrite(fWo, Wo, "float32"); - if synth_phase + % Note we send opposite phase as c2sim expects phase of LPC + % analysis filter, just a convention based on historical + % development of Codec 2 - % synthesis phase spectra from magnitiude spectra using minimum phase techniques + Aw1 = zeros(1, Nfft_phase*2); + Aw1(1:2:Nfft_phase*2) = cos(Aw(f,:)); + Aw1(2:2:Nfft_phase*2) = -sin(Aw(f,:)); + fwrite(faw, Aw1, "float32"); - fft_enc = 128; - phase = determine_phase(model_, f, fft_enc); - assert(length(phase) == fft_enc); - %Aw = zeros(1, fft_enc*2); - %Aw(1:2:fft_enc*2) = cos(phase); - %Aw(2:2:fft_enc*2) = -sin(phase); + Hm = zeros(1, 2*max_amp); + for m=1:L + Hm(2*m+1) = real(H(f,m)); + Hm(2*m+2) = imag(H(f,m)); + end + fwrite(fhm, Hm, "float32"); + end - % sample phase at centre of each harmonic, not 1st entry Hm[1] in octave Hm[0] in C - % is not used + fclose(fam); fclose(fWo); fclose(faw); fclose(fhm); - Hm = zeros(1, 2*max_amp); - for m=1:L - b = round(m*Wo*fft_enc/(2*pi)); - Hm(2*m) = cos(phase(b)); - Hm(2*m+1) = -sin(phase(b)); - end - fwrite(fhm, Hm, "float32"); - end + v_out_name = sprintf("%s_v.txt", output_prefix); + fv = fopen(v_out_name,"wt"); + for f=1:length(voicing__c) + fprintf(fv,"%d\n", voicing__c(f)); end - - #} + fclose(fv); printf("\n") endfunction -% ----------------------------------------------------------------------------------------- -% Linear decimator/interpolator that operates at rate K, includes VQ, post filter, and Wo/E -% quantisation. Evolved from abys decimator below. Simulates the entire encoder/decoder. - -function [model_ voicing_ indexes] = experiment_rate_K_dec(model, voicing) - max_amp = 80; - [frames nc] = size(model); - model_ = zeros(frames, max_amp+3); - indexes = zeros(frames,4); - - M = 4; - - % create frames x K surface. TODO make all of this operate frame by - % frame, or at least M/2=4 frames rather than one big chunk - - K = 20; - [surface sample_freqs_kHz] = resample_const_rate_f_mel(model, K); - target_surface = surface; - - figure(1); - mesh(surface); - - % VQ rate K surface. TODO: If we are decimating by M/2=4 we really - % only need to do this every 4th frame. - - melvq; - load train_120_vq; m=5; - - for f=1:frames - mean_f(f) = mean(surface(f,:)); - surface_no_mean(f,:) = surface(f,:) - mean_f(f); - end - figure(2); - mesh(surface_no_mean); - - [res surface_no_mean_ ind] = mbest(train_120_vq, surface_no_mean, m); - indexes(:,1:2) = ind; - - for f=1:frames - surface_no_mean_(f,:) = post_filter(surface_no_mean_(f,:), sample_freqs_kHz, 1.5); - end - figure(3); - mesh(surface_no_mean_); - - surface_ = zeros(frames, K); - energy_q = 10 + 40/16*(0:15); - for f=1:frames - [mean_f_ indx] = quantise(energy_q, mean_f(f)); - indexes(f,3) = indx - 1; - %mean_f_ = mean_f(f); - surface_(f,:) = surface_no_mean_(f,:) + mean_f_; - end - - figure(); - mesh(surface_); - - % break into segments of M frames. We have 3 samples in M frame - % segment spaced M/2 apart and interpolate the rest. This evolved - % from AbyS scheme below but could be simplified to simple linear - % interpolation, or using 3 or 4 points but shift of M/2=4 frames. - - interpolated_surface_ = zeros(frames, K); - for f=1:M:frames-M - left_vec = surface_(f,:); - right_vec = surface_(f+M,:); - sample_points = [f f+M]; - resample_points = f:f+M-1; - for k=1:K - interpolated_surface_(resample_points,k) = interp_linear(sample_points, [left_vec(k) right_vec(k)], resample_points); - end - end - - % break into segments for purposes of Wo interpolation - - for f=1:M:frames - % quantise Wo - - % UV/V flag is coded using a zero index for Wo, this means we need to - % adjust Wo index slightly for the lowest Wo V frames - - if voicing(f) - index = encode_log_Wo(model(f,1), 6); - if index == 0 - index = 1; - end - indexes(f,4) = index; - voicing(f) = 1; - model_(f,1) = decode_log_Wo(indexes(f,4), 6); - else - indexes(f,4) = 0; - voicing(f) = 0; - model_(f,1) = 2*pi/100; - end - end - - - voicing_ = zeros(1, frames); - for f=1:M:frames-M - - Wo1_ = model_(f,1); - Wo2_ = model_(f+M,1); - - % uncomment to use unquantised values - %Wo1_ = model(f,1); - %Wo2_ = model(f+M,1); - - if !voicing(f) && !voicing(f+M) - model_(f:f+M-1,1) = 2*pi/100; - end - - if voicing(f) && !voicing(f+M) - model_(f:f+M/2-1,1) = Wo1_; - model_(f+M/2:f+M-1,1) = 2*pi/100; - voicing_(f:f+M/2-1) = 1; - end - - if !voicing(f) && voicing(f+M) - model_(f:f+M/2-1,1) = 2*pi/100; - model_(f+M/2:f+M-1,1) = Wo2_; - voicing_(f+M/2:f+M-1) = 1; - end - - if voicing(f) && voicing(f+M) - Wo_samples = [Wo1_ Wo2_]; - model_(f:f+M-1,1) = interp1([f f+M], Wo_samples, f:f+M-1, "linear", 0); - voicing_(f:f+M-1) = 1; - end - - #{ - printf("f: %d f+M/2: %d Wo: %f %f (%f %%) v: %d %d \n", f, f+M/2, model(f,1), model(f+M/2,1), 100*abs(model(f,1) - model(f+M/2,1))/model(f,1), voicing(f), voicing(f+M/2)); - for i=f:f+M/2-1 - printf(" f: %d v: %d v_: %d Wo: %f Wo_: %f\n", i, voicing(i), voicing_(i), model(i,1), model_(i,1)); - end - #} - end - model_(frames-M:frames,1) = pi/100; % set end frames to something sensible - - % enable these to use original (non interpolated) voicing and Wo - %voicing_ = voicing; - %model_(:,1) = model(:,1); - - model_(:,2) = floor(pi ./ model_(:,1)); % calculate L for each interpolated Wo - model_ = resample_rate_L(model_, interpolated_surface_, sample_freqs_kHz); - -endfunction diff --git a/codec2-dev/src/c2sim.c b/codec2-dev/src/c2sim.c index 9a4e1eca..f77543d4 100644 --- a/codec2-dev/src/c2sim.c +++ b/codec2-dev/src/c2sim.c @@ -53,7 +53,6 @@ void synth_one_frame(codec2_fftr_cfg fftr_inv_cfg, short buf[], MODEL *model, float Sn_[], float Pn[], int prede, float *de_mem, float gain); void print_help(const struct option *long_options, int num_opts, char* argv[]); -static void ear_protection(float in_out[], int n); /*---------------------------------------------------------------------------*\ @@ -145,6 +144,8 @@ int main(int argc, char *argv[]) float lspmelvq_mse = 0.0; int amread, Woread; FILE *fam = NULL, *fWo = NULL; + int awread; + FILE *faw = NULL; int hmread; FILE *fhm = NULL; @@ -183,6 +184,7 @@ int main(int argc, char *argv[]) { "bpfb", no_argument, &bpfb_en, 1 }, { "amread", required_argument, &amread, 1 }, { "hmread", required_argument, &hmread, 1 }, + { "awread", required_argument, &awread, 1 }, { "Woread", required_argument, &Woread, 1 }, #ifdef DUMP { "dump", required_argument, &dump, 1 }, @@ -299,6 +301,12 @@ int main(int argc, char *argv[]) optarg, strerror(errno)); exit(1); } + } else if(strcmp(long_options[option_index].name, "awread") == 0) { + if ((faw = fopen(optarg,"rb")) == NULL) { + fprintf(stderr, "Error opening float Aw file: %s: %s.\n", + optarg, strerror(errno)); + exit(1); + } } else if(strcmp(long_options[option_index].name, "dump_pitch_e") == 0) { if ((fjvm = fopen(optarg,"wt")) == NULL) { fprintf(stderr, "Error opening pitch & energy dump file: %s: %s.\n", @@ -572,7 +580,8 @@ int main(int argc, char *argv[]) codec2_fft(fft_fwd_cfg, a, Aw); if (hand_voicing) { - fscanf(fvoicing,"%d\n",&model.voiced); + int ret = fscanf(fvoicing,"%d\n",&model.voiced); + assert(ret == 1); } } @@ -828,17 +837,22 @@ int main(int argc, char *argv[]) #endif } - /* optionally read in Aw FFT vector, we really only care about the phase - of each entry, used for reading in phases generated by Octave */ - - if (hmread) { - int ret = fread(&H, sizeof(float), MAX_AMP, fhm); - assert(ret == MAX_AMP); - } - - //fprintf(stderr, "frame: %d Wo: %f L: %d v: %d\n", frames, model_dec[i].Wo, model_dec[i].L, model_dec[i].voiced); if (phase0) { - sample_phase(&model_dec[i], H, Aw); + /* optionally read in Aw, replacing values generated using LPC */ + + if (awread) { + int ret = fread(Aw, sizeof(COMP), FFT_ENC, faw); + assert(ret == FFT_ENC); + } + + /* optionally read in Hm directly, bypassing sampling of Aw[] */ + + if (hmread) { + int ret = fread(H, sizeof(COMP), MAX_AMP, fhm); + assert(ret == MAX_AMP); + } else { + sample_phase(&model_dec[i], H, Aw); + } phase_synth_zero_order(&model_dec[i], ex_phase, H); } if (postfilt) @@ -946,41 +960,3 @@ void print_help(const struct option* long_options, int num_opts, char* argv[]) exit(1); } -/*---------------------------------------------------------------------------*\ - - FUNCTION....: ear_protection() - AUTHOR......: David Rowe - DATE CREATED: Nov 7 2012 - - Limits output level to protect ears when there are bit errors or the input - is overdriven. This doesn't correct or mask bit errors, just reduces the - worst of their damage. - -\*---------------------------------------------------------------------------*/ - -static void ear_protection(float in_out[], int n) { - float max_sample, over, gain; - int i; - - /* find maximum sample in frame */ - - max_sample = 0.0; - for(i=0; i max_sample) - max_sample = in_out[i]; - - /* determine how far above set point */ - - over = max_sample/30000.0; - - /* If we are x dB over set point we reduce level by 2x dB, this - attenuates major excursions in amplitude (likely to be caused - by bit errors) more than smaller ones */ - - if (over > 1.0) { - gain = 1.0/(over*over); - //fprintf(stderr, "gain: %f\n", gain); - for(i=0; i