model_name = strcat(samname,"_model.txt");
model = load(model_name);
- [frames nc] = size(model)
+ [frames nc] = size(model);
voicing_name = strcat(samname,"_pitche.txt");
voicing = zeros(1,frames);
%model_ = experiment_filter_dec_filter(model);
%[model_ surface] = experiment_mel_freq(model, 1, 1, voicing);
- model_ = experiment_dec_abys(model, 8, 1, 1, 1, voicing);
-
+ %model_ = experiment_dec_abys(model, 8, 1, 1, 1, voicing);
+ [model_ voicing_] = experiment_rate_K_dec(model, voicing);
+
%model_ = experiment_dec_linear(model_);
%model_ = experiment_energy_rate_linear(model, 1, 0);
if synth_phase
fclose(faw);
end
+
+ % save voicing file
+
+ if exist("voicing_", "var")
+ v_out_name = sprintf("%s_v.txt", samname);
+ fv = fopen(v_out_name,"wt");
+ for f=1:length(voicing_)
+ fprintf(fv,"%d\n", voicing_(f));
+ end
+ fclose(fv);
+ end
+
printf("\n")
endfunction
endfunction
-% Experimental AbyS decimator that chooses best frames to match surface
-% based on AbyS approach. Can apply post filter at different points,
-% and optionally do fixed decimation, at rate K.
+% Linear decimator/interpolator that operates at rate K, includes VQ, post filter, and Wo/E
+% quantisation. Evevoled from abys decimator below.
+
+function [model_ voicing_ ] = experiment_rate_K_dec(model, voicing)
+ max_amp = 80;
+ [frames nc] = size(model);
+ model_ = zeros(frames, max_amp+3);
+
+ M = 8;
+
+ % create frames x K surface. TODO make all of this operate frame by
+ % frame, or at least M/2=4 frames rather than one big chunk
+
+ K = 20;
+ [surface sample_freqs_kHz] = resample_const_rate_f_mel(model, K);
+ target_surface = surface;
+
+ % VQ rate K surface. TODO: If we are decimating by M/2=4 we really
+ % only need to do this every 4th frame.
+
+ melvq;
+ load train_120_vq; m=5;
+
+ for f=1:frames
+ mean_f(f) = mean(surface(f,:));
+ surface_no_mean(f,:) = surface(f,:) - mean_f(f);
+ end
+
+ [res surface_no_mean_ ind] = mbest(train_120_vq, surface_no_mean, m);
+
+ for f=1:frames
+ surface_no_mean_(f,:) = post_filter(surface_no_mean_(f,:), sample_freqs_kHz, 1.5);
+ end
+
+ surface_ = zeros(frames, K);
+ for f=1:frames
+ surface_(f,:) = surface_no_mean_(f,:) + mean_f(f);
+ end
+
+ % break into segments of M frames. We have 3 samples in M frame
+ % segment spaced M/2 apart and interpolate the rest. This evolved
+ % from AbyS scheme below but could be simplified to simple linear
+ % interpolation, or using 3 or 4 points but shift of M/2=4 frames.
+
+ interpolated_surface_ = zeros(frames, K);
+ for f=1:M:frames-M
+ left_vec = surface_(f,:);
+ m = f+M/2;
+ centre_vec = surface_(m,:);
+ right_vec = surface_(f+M,:);
+ sample_points = [f m f+M];
+ resample_points = f:f+M-1;
+ for k=1:K
+ interpolated_surface_(resample_points,k) = interp1(sample_points, [left_vec(k) centre_vec(k) right_vec(k)], resample_points, "spline", 0);
+ end
+ end
+
+ % break into M/2 segments for purposes of Wo interpolation
+
+ voicing_ = zeros(1, frames);
+ for f=1:M/2:frames-M/2
+
+ if !voicing(f) && !voicing(f+M/2)
+ model_(f:f+M/2-1,1) = 2*pi/100;
+ end
+
+ if voicing(f) && !voicing(f+M/2)
+ model_(f:f+M/4-1,1) = model(f,1);
+ model_(f+M/4:f+M/2-1,1) = 2*pi/100;
+ voicing_(f:f+M/4-1) = 1;
+ end
+
+ if !voicing(f) && voicing(f+M/2)
+ model_(f:f+M/4-1,1) = 2*pi/100;
+ model_(f+M/4:f+M/2-1,1) = model(f+M/2,1);
+ voicing_(f+M/4:f+M/2-1) = 1;
+ end
+
+ if voicing(f) && voicing(f+M/2)
+ Wo_samples = [model(f,1) model(f+M/2,1)];
+ model_(f:f+M/2-1,1) = interp1([f f+M/2], Wo_samples, f:f+M/2-1, "linear", 0);
+ voicing_(f:f+M/2-1) = 1;
+ end
+
+ printf("f: %d f+M/2: %d Wo: %f %f (%f %%) v: %d %d \n", f, f+M/2, model(f,1), model(f+M/2,1), 100*abs(model(f,1) - model_(f+M/2,1))/model(f,1), voicing(f), voicing(f+M/2));
+ for i=f:f+M/2-1
+ printf(" f: %d v: %d v_: %d Wo: %f Wo_: %f\n", i, voicing(i), voicing_(i), model(i,1), model_(i,1));
+ end
+ end
+ model_(frames-M/2:frames,1) = pi/100; % set end frames to something sensible
+
+ voicing_ = voicing;
+ model_(:,1) = model(:,1);
+ %model_(221:225,1) = model(221:225,1);
+ %model_(223:224,1) = model(223:224,1);
+ model_(:,2) = floor(pi ./ model_(:,1)); % calculate L for each interpolated Wo
+ model_ = resample_rate_L(model_, interpolated_surface_, sample_freqs_kHz);
+
+endfunction
+
+
+% Experimental AbyS decimator that chooses best frames to match
+% surface based on AbyS approach. Can apply post filter at different
+% points, and optionally do fixed decimation, at rate K. Didn't
+% produce anything spectacular in AbyS mode, suggest anotehr look with
+% some sort of fbf display to see what's going on internally.
function model_ = experiment_dec_abys(model, M=8, vq_en=0, pf_en=1, fixed_dec=0, voicing)
max_amp = 80;
void synth_one_frame(codec2_fftr_cfg fftr_inv_cfg, short buf[], MODEL *model, float Sn_[], float Pn[], int prede, float *de_mem, float gain);
void print_help(const struct option *long_options, int num_opts, char* argv[]);
+static void ear_protection(float in_out[], int n);
/*---------------------------------------------------------------------------*\
//printf("snr %3.2f v: %d Wo: %f prev_Wo: %f\n", snr, model.voiced,
// model.Wo, prev_uq_Wo);
#ifdef DUMP
- dump_Sw_(Sw_);
- dump_Ew(Ew);
+ //dump_Sw_(Sw_);
+ //dump_Ew(Ew);
dump_snr(snr);
#endif
if (Woread) {
int ret = fread(&model.Wo, sizeof(float), 1, fWo);
+ model.L = floor(PI/model.Wo);
assert(ret == 1);
}
#endif
}
- /* optionally rewad in Aw FFT vector, we really only care about the phase
+ /* optionally read in Aw FFT vector, we really only care about the phase
of each entry, used for reading in phases generated by Octave */
if (awread) {
assert(ret == FFT_ENC);
}
+ fprintf(stderr, "frame: %d Wo: %f L: %d v: %d\n", frames, model_dec[i].Wo, model_dec[i].L, model_dec[i].voiced);
if (phase0)
phase_synth_zero_order(fft_fwd_cfg, &model_dec[i], ex_phase, Aw);
if (postfilt)
exit(1);
}
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: ear_protection()
+ AUTHOR......: David Rowe
+ DATE CREATED: Nov 7 2012
+
+ Limits output level to protect ears when there are bit errors or the input
+ is overdriven. This doesn't correct or mask bit errors, just reduces the
+ worst of their damage.
+
+\*---------------------------------------------------------------------------*/
+
+static void ear_protection(float in_out[], int n) {
+ float max_sample, over, gain;
+ int i;
+
+ /* find maximum sample in frame */
+
+ max_sample = 0.0;
+ for(i=0; i<n; i++)
+ if (in_out[i] > max_sample)
+ max_sample = in_out[i];
+
+ /* determine how far above set point */
+
+ over = max_sample/30000.0;
+
+ /* If we are x dB over set point we reduce level by 2x dB, this
+ attenuates major excursions in amplitude (likely to be caused
+ by bit errors) more than smaller ones */
+
+ if (over > 1.0) {
+ gain = 1.0/(over*over);
+ //fprintf(stderr, "gain: %f\n", gain);
+ for(i=0; i<n; i++)
+ in_out[i] *= gain;
+ }
+}