% TODO: we may be able to sample at a lower rate, like mWo
% but start with something that works
-function [phase Gdbfk s Aw] = determine_phase(model, f, ak)
- Nfft = 512; % FFT size to use
+function [phase Gdbfk s Aw] = determine_phase(model, f, Nfft=512, ak)
Fs = 8000;
max_amp = 80;
L = min([model(f,2) max_amp-1]);
% optional input of aks for testing
- if nargin == 3
+ if nargin == 4
Aw = 1 ./ fft(ak,Nfft);
Gdbfk = 20*log10(abs(Aw(1:Nfft/2+1)));
end
- [phase s] = mag_to_phase(Gdbfk);
+ [phase s] = mag_to_phase(Gdbfk, Nfft);
endfunction
for f=1:frames
Wo = model(f,1);
L = model(f,2);
- f
rate_L_sample_freqs_kHz = (1:L)*Wo*4/pi;
% back down to rate L
The newamp1_xxx scripts have evolved to (i) resample {Am} using a
mel frequency axis, (ii) 2 stage VQ the mean removed vector. Seems to work
- OK at 700 bit/s.
+ OK at 700 bit/s, comparable to 1300.
Usage:
%[model_ surface] = experiment_mel_freq(model, 1, 1, voicing);
%model_ = experiment_dec_abys(model, 8, 1, 1, 1, voicing);
- [model_ voicing_ indexes] = experiment_rate_K_dec(model, voicing); % encoder, toss away results except for indexes
- [model_ voicing_] = model_from_indexes(indexes); % decoder uses just indexes, outputs vecs for synthesis
+ [model_ voicing_ indexes] = experiment_rate_K_dec(model, voicing); % encoder/decoder, lets toss away results except for indexes
+ [model_ voicing_] = model_from_indexes(indexes); % decoder uses just indexes, outputs vecs for synthesis
%model_ = experiment_dec_linear(model_);
%model_ = experiment_energy_rate_linear(model, 1, 0);
fWo = fopen(Wo_out_name,"wb");
if synth_phase
- Aw_out_name = sprintf("%s_aw.out", output_prefix);
- faw = fopen(Aw_out_name,"wb");
+ Hm_out_name = sprintf("%s_hm.out", output_prefix);
+ fhm = fopen(Hm_out_name,"wb");
end
for f=1:frames
% synthesis phase spectra from magnitiude spectra using minimum phase techniques
- fft_enc = 512;
- phase = determine_phase(model_, f);
+ fft_enc = 128;
+ phase = determine_phase(model_, f, fft_enc);
assert(length(phase) == fft_enc);
- Aw = zeros(1, fft_enc*2);
- Aw(1:2:fft_enc*2) = cos(phase);
- Aw(2:2:fft_enc*2) = -sin(phase);
- fwrite(faw, Aw, "float32");
+ %Aw = zeros(1, fft_enc*2);
+ %Aw(1:2:fft_enc*2) = cos(phase);
+ %Aw(2:2:fft_enc*2) = -sin(phase);
+
+ % sample phase at centre of each harmonic, not 1st entry Hm[1] in octave Hm[0] in C
+ % is not used
+
+ Hm = zeros(1, 2*max_amp);
+ for m=1:L
+ b = round(m*Wo*fft_enc/(2*pi));
+ Hm(2*m) = cos(phase(b));
+ Hm(2*m+1) = -sin(phase(b));
+ end
+ fwrite(fhm, Hm, "float32");
end
end
fclose(fam);
fclose(fWo);
if synth_phase
- fclose(faw);
+ fclose(fhm);
end
% save voicing file
% Linear decimator/interpolator that operates at rate K, includes VQ, post filter, and Wo/E
% quantisation. Evolved from abys decimator below. Simulates the entire encoder/decoder.
-#{
- todo:
- [ ] M=8 nomenclature, make it closer to existing 25ms C modes
- [ ] frame by frame-ish operation, close to C implementations
-#}
-
function [model_ voicing_ indexes] = experiment_rate_K_dec(model, voicing)
max_amp = 80;
[frames nc] = size(model);
% Experimental AbyS decimator that chooses best frames to match
% surface based on AbyS approach. Can apply post filter at different
% points, and optionally do fixed decimation, at rate K. Didn't
-% produce anything spectacular in AbyS mode, suggest anotehr look with
+% produce anything spectacular in AbyS mode, suggest another look with
% some sort of fbf display to see what's going on internally.
function model_ = experiment_dec_abys(model, M=8, vq_en=0, pf_en=1, fixed_dec=0, voicing)
My original idea was to used a 3-4 "resonators" to construct a
piecewise model of the spectrum. Kind of got distracted by the
surface and mel sampling that ended up working OK. This method was
- working OK, soem issues with backgorund noise but rather easy to
+ working OK, soem issues with background noise but rather easy to
quantise.
todo: get this working again
function newamp1_fbf(samname, f=10)
newamp;
more off;
- quant_en = 0; pf_en = 0;
+ quant_en = 0; pf_en = 0; plot_phase = 1;
melvq;
K=20; load train_120_vq; m=5;
pitche = load(voicing_name);
voicing = pitche(:, 3);
end
- size(voicing)
% Keyboard loop --------------------------------------------------------------
end
hold off;
+ if plot_phase
+ phase_512 = determine_phase(model_, 1, 512);
+ phase_128 = determine_phase(model_, 1, 128);
+ figure(4); clf;
+ plot((1:512/2)*8000/512, phase_512(1:256), ';512;b');
+ hold on;
+ plot((1:128/2)*8000/128, phase_128(1:64), ';128;g');
+ hold off;
+ end
+
% interactive menu ------------------------------------------
printf("\rframe: %d menu: n-next b-back q-quit m-quant_en[%d] p-pf[%d]", f, quant_en, pf_en);
float lspmelvq_mse = 0.0;
int amread, Woread;
FILE *fam = NULL, *fWo = NULL;
- int awread;
- FILE *faw = NULL;
+ int hmread;
+ FILE *fhm = NULL;
char* opt_string = "ho:";
struct option long_options[] = {
{ "bpf", no_argument, &bpf_en, 1 },
{ "bpfb", no_argument, &bpfb_en, 1 },
{ "amread", required_argument, &amread, 1 },
- { "awread", required_argument, &awread, 1 },
+ { "hmread", required_argument, &hmread, 1 },
{ "Woread", required_argument, &Woread, 1 },
#ifdef DUMP
{ "dump", required_argument, &dump, 1 },
};
int num_opts=sizeof(long_options)/sizeof(struct option);
COMP Aw[FFT_ENC];
+ COMP H[MAX_AMP];
for(i=0; i<M_PITCH; i++) {
Sn[i] = 1.0;
optarg, strerror(errno));
exit(1);
}
- } else if(strcmp(long_options[option_index].name, "awread") == 0) {
- if ((faw = fopen(optarg,"rb")) == NULL) {
- fprintf(stderr, "Error opening float Aw file: %s: %s.\n",
+ } else if(strcmp(long_options[option_index].name, "hmread") == 0) {
+ if ((fhm = fopen(optarg,"rb")) == NULL) {
+ fprintf(stderr, "Error opening float Hm file: %s: %s.\n",
optarg, strerror(errno));
exit(1);
}
/* optionally read in Aw FFT vector, we really only care about the phase
of each entry, used for reading in phases generated by Octave */
- if (awread) {
- int ret = fread(Aw, sizeof(COMP), FFT_ENC, faw);
- //for(int j=0; j<10; j++) {
- // fprintf(stderr, "%f %f\n", Aw[j].real, Aw[j].imag);
- //}
- //exit(0);
- assert(ret == FFT_ENC);
+ if (hmread) {
+ int ret = fread(&H, sizeof(float), MAX_AMP, fhm);
+ assert(ret == MAX_AMP);
}
//fprintf(stderr, "frame: %d Wo: %f L: %d v: %d\n", frames, model_dec[i].Wo, model_dec[i].L, model_dec[i].voiced);
- if (phase0)
- phase_synth_zero_order(fft_fwd_cfg, &model_dec[i], ex_phase, Aw);
+ if (phase0) {
+ sample_phase(&model_dec[i], H, Aw);
+ phase_synth_zero_order(&model_dec[i], ex_phase, H);
+ }
if (postfilt)
postfilter(&model_dec[i], &bg_est);
synth_one_frame(fftr_inv_cfg, buf, &model_dec[i], Sn_, Pn, prede, &de_mem, gain);
PROFILE_SAMPLE(phase_start);
- phase_synth_zero_order(c2->fft_fwd_cfg, model, &c2->ex_phase, Aw);
+ COMP H[MAX_AMP];
+ sample_phase(model, H, Aw);
+ phase_synth_zero_order(model, &c2->ex_phase, H);
PROFILE_SAMPLE_AND_LOG(pf_start, phase_start, " phase_synth");
#include <string.h>
#include <stdlib.h>
+/*---------------------------------------------------------------------------*\
+
+ sample_phase()
+
+ Samples phase at centre of each harmonic from and array of FFT_ENC
+ DFT samples.
+
+\*---------------------------------------------------------------------------*/
+
+void sample_phase(MODEL *model, COMP H[], COMP A[])
+{
+ int m, b;
+ float phi_, r;
+
+ r = TWO_PI/(FFT_ENC);
+
+ /* Sample phase at harmonics */
+
+ for(m=1; m<=model->L; m++) {
+ b = (int)(m*model->Wo/r + 0.5);
+ phi_ = -atan2f(A[b].imag, A[b].real);
+ H[m].real = cosf(phi_);
+ H[m].imag = sinf(phi_);
+ }
+}
+
/*---------------------------------------------------------------------------*\
parameters are required apart from the SNR (which can be reduced to a
1 bit V/UV decision per frame).
- The phase of each harmonic is modelled as the phase of a LPC
- synthesis filter excited by an impulse. Unlike the first order
- model the position of the impulse is not transmitted, so we create
- an excitation pulse train using a rule based approach.
+ The phase of each harmonic is modelled as the phase of a synthesis
+ filter excited by an impulse. In many Codec 2 modes the synthesis
+ filter is a LPC filter. Unlike the first order model the position
+ of the impulse is not transmitted, so we create an excitation pulse
+ train using a rule based approach.
Consider a pulse train with a pulse starting time n=0, with pulses
repeated at a rate of Wo, the fundamental frequency. A pulse train
- If there are voicing errors, the speech can sound clicky or
staticy. If V speech is mistakenly declared UV, this model tends to
synthesise impulses or clicks, as there is usually very little shift or
- dispersion through the LPC filter.
+ dispersion through the LPC synthesis filter.
- When combined with LPC amplitude modelling there is an additional
drop in quality. I am not sure why, theory is interformant energy
\*---------------------------------------------------------------------------*/
void phase_synth_zero_order(
- codec2_fft_cfg fft_fwd_cfg,
MODEL *model,
- float *ex_phase, /* excitation phase of fundamental */
- COMP A[]
+ float *ex_phase, /* excitation phase of fundamental */
+ COMP H[] /* L synthesis filter freq domain samples */
+
)
{
- int m, b;
- float phi_, new_phi, r;
+ int m;
+ float new_phi;
COMP Ex[MAX_AMP+1]; /* excitation samples */
COMP A_[MAX_AMP+1]; /* synthesised harmonic samples */
- COMP H[MAX_AMP+1]; /* LPC freq domain samples */
-
- r = TWO_PI/(FFT_ENC);
-
- /* Sample phase at harmonics */
-
- for(m=1; m<=model->L; m++) {
- b = (int)(m*model->Wo/r + 0.5);
- phi_ = -atan2f(A[b].imag, A[b].real);
- H[m].real = cosf(phi_);
- H[m].imag = sinf(phi_);
- }
/*
Update excitation fundamental phase track, this sets the position
#include "codec2_fft.h"
#include "comp.h"
-void phase_synth_zero_order(codec2_fft_cfg fft_dec_cfg,
- MODEL *model,
- float *ex_phase,
- COMP A[]);
+void sample_phase(MODEL *model, COMP filter_phase[], COMP A[]);
+void phase_synth_zero_order(MODEL *model, float *ex_phase, COMP filter_phase[]);
#endif