#include "codec2_internal.h"
#include "machdep.h"
#include "bpf.h"
+#include "bpfb.h"
/*---------------------------------------------------------------------------*\
void codec2_decode_1200(struct CODEC2 *c2, short speech[], const unsigned char * bits);
void codec2_encode_700(struct CODEC2 *c2, unsigned char * bits, short speech[]);
void codec2_decode_700(struct CODEC2 *c2, short speech[], const unsigned char * bits);
+void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[]);
+void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits);
static void ear_protection(float in_out[], int n);
/*---------------------------------------------------------------------------*\
(mode == CODEC2_MODE_1400) ||
(mode == CODEC2_MODE_1300) ||
(mode == CODEC2_MODE_1200) ||
- (mode == CODEC2_MODE_700)
+ (mode == CODEC2_MODE_700) ||
+ (mode == CODEC2_MODE_700B)
);
c2->mode = mode;
for(i=0; i<M; i++)
return NULL;
}
- c2->gray = 1;
+ if (mode == CODEC2_MODE_700B)
+ c2->gray = 0; // natural binary better for trellis decoding (hopefully added later)
+ else
+ c2->gray = 1;
c2->lpc_pf = 1; c2->bass_boost = 1; c2->beta = LPCPF_BETA; c2->gamma = LPCPF_GAMMA;
return 48;
if (c2->mode == CODEC2_MODE_700)
return 28;
+ if (c2->mode == CODEC2_MODE_700B)
+ return 28;
return 0; /* shouldn't get here */
}
return 320;
if (c2->mode == CODEC2_MODE_700)
return 320;
+ if (c2->mode == CODEC2_MODE_700B)
+ return 320;
return 0; /* shouldnt get here */
}
(c2->mode == CODEC2_MODE_1400) ||
(c2->mode == CODEC2_MODE_1300) ||
(c2->mode == CODEC2_MODE_1200) ||
- (c2->mode == CODEC2_MODE_700)
+ (c2->mode == CODEC2_MODE_700) ||
+ (c2->mode == CODEC2_MODE_700B)
);
if (c2->mode == CODEC2_MODE_3200)
codec2_encode_1200(c2, bits, speech);
if (c2->mode == CODEC2_MODE_700)
codec2_encode_700(c2, bits, speech);
+ if (c2->mode == CODEC2_MODE_700B)
+ codec2_encode_700b(c2, bits, speech);
}
void codec2_decode(struct CODEC2 *c2, short speech[], const unsigned char *bits)
(c2->mode == CODEC2_MODE_1400) ||
(c2->mode == CODEC2_MODE_1300) ||
(c2->mode == CODEC2_MODE_1200) ||
- (c2->mode == CODEC2_MODE_700)
+ (c2->mode == CODEC2_MODE_700) ||
+ (c2->mode == CODEC2_MODE_700B)
);
if (c2->mode == CODEC2_MODE_3200)
codec2_decode_1200(c2, speech, bits);
if (c2->mode == CODEC2_MODE_700)
codec2_decode_700(c2, speech, bits);
+ if (c2->mode == CODEC2_MODE_700B)
+ codec2_decode_700b(c2, speech, bits);
}
}
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_encode_700b
+ AUTHOR......: David Rowe
+ DATE CREATED: August 2015
+
+ Version b of 700 bit/s codec. After some experiments over the air I
+ wanted was unhappy with the rate 700 codec so spent a few weeks
+ trying to improve the speech quality. This version uses a wider BPF
+ and vector quantised mel-lsps.
+
+ Encodes 320 speech samples (40ms of speech) into 28 bits.
+
+ The codec2 algorithm actually operates internally on 10ms (80
+ sample) frames, so we run the encoding algorithm four times:
+
+ frame 0: nothing
+ frame 1: nothing
+ frame 2: nothing
+ frame 3: voicing bit, 5 bit scalar Wo and 3 bit E, 18 bit LSP MEL VQ,
+ 1 spare
+
+ The bit allocation is:
+
+ Parameter frames 1-3 frame 4 Total
+ -----------------------------------------------------------
+ Harmonic magnitudes (LSPs) 0 18 18
+ Energy 0 3 3
+ log Wo 0 5 5
+ Voicing 0 1 1
+ spare 0 1 1
+ TOTAL 0 28 28
+
+\*---------------------------------------------------------------------------*/
+
+void codec2_encode_700b(struct CODEC2 *c2, unsigned char * bits, short speech[])
+{
+ MODEL model;
+ float lsps[LPC_ORD_LOW];
+ float mel[LPC_ORD_LOW];
+ float mel_[LPC_ORD_LOW];
+ float ak[LPC_ORD_LOW+1];
+ float e, f;
+ int indexes[3];
+ int Wo_index, e_index, i;
+ unsigned int nbit = 0;
+ float bpf_out[4*N];
+ short bpf_speech[4*N];
+ int spare = 0;
+
+ assert(c2 != NULL);
+
+ memset(bits, '\0', ((codec2_bits_per_frame(c2) + 7) / 8));
+
+ /* band pass filter */
+
+ for(i=0; i<BPF_N; i++)
+ c2->bpf_buf[i] = c2->bpf_buf[4*N+i];
+ for(i=0; i<4*N; i++)
+ c2->bpf_buf[BPF_N+i] = speech[i];
+ inverse_filter(&c2->bpf_buf[BPF_N], bpfb, 4*N, bpf_out, BPF_N);
+ for(i=0; i<4*N; i++)
+ bpf_speech[i] = bpf_out[i];
+
+ /* frame 1 --------------------------------------------------------*/
+
+ analyse_one_frame(c2, &model, bpf_speech);
+
+ /* frame 2 --------------------------------------------------------*/
+
+ analyse_one_frame(c2, &model, &bpf_speech[N]);
+
+ /* frame 3 --------------------------------------------------------*/
+
+ analyse_one_frame(c2, &model, &bpf_speech[2*N]);
+
+ /* frame 4: - voicing, scalar Wo & E, VQ mel LSPs -----------------*/
+
+ analyse_one_frame(c2, &model, &bpf_speech[3*N]);
+ pack(bits, &nbit, model.voiced, 1);
+ Wo_index = encode_log_Wo(model.Wo, 5);
+ pack_natural_or_gray(bits, &nbit, Wo_index, 5, c2->gray);
+
+ e = speech_to_uq_lsps(lsps, ak, c2->Sn, c2->w, LPC_ORD_LOW);
+ e_index = encode_energy(e, 3);
+ pack_natural_or_gray(bits, &nbit, e_index, 3, c2->gray);
+
+ for(i=0; i<LPC_ORD_LOW; i++) {
+ f = (4000.0/PI)*lsps[i];
+ mel[i] = floor(2595.0*log10(1.0 + f/700.0) + 0.5);
+ }
+ lspmelvq_mbest_encode(indexes, mel, mel_, LPC_ORD_LOW, 5);
+
+ for(i=0; i<3; i++) {
+ pack_natural_or_gray(bits, &nbit, indexes[i], lspmelvq_cb_bits(i), c2->gray);
+ }
+
+ pack_natural_or_gray(bits, &nbit, spare, 1, c2->gray);
+
+ assert(nbit == (unsigned)codec2_bits_per_frame(c2));
+}
+
+
+/*---------------------------------------------------------------------------*\
+
+ FUNCTION....: codec2_decode_700b
+ AUTHOR......: David Rowe
+ DATE CREATED: August 2015
+
+ Decodes frames of 28 bits into 320 samples (40ms) of speech.
+
+\*---------------------------------------------------------------------------*/
+
+void codec2_decode_700b(struct CODEC2 *c2, short speech[], const unsigned char * bits)
+{
+ MODEL model[4];
+ int indexes[3];
+ float mel[LPC_ORD_LOW];
+ float lsps[4][LPC_ORD_LOW];
+ int Wo_index, e_index;
+ float e[4];
+ float snr, f_;
+ float ak[4][LPC_ORD_LOW+1];
+ int i,j;
+ unsigned int nbit = 0;
+ float weight;
+ COMP Aw[FFT_ENC];
+
+ assert(c2 != NULL);
+
+ /* only need to zero these out due to (unused) snr calculation */
+
+ for(i=0; i<4; i++)
+ for(j=1; j<=MAX_AMP; j++)
+ model[i].A[j] = 0.0;
+
+ /* unpack bits from channel ------------------------------------*/
+
+ model[3].voiced = unpack(bits, &nbit, 1);
+ model[0].voiced = model[1].voiced = model[2].voiced = model[3].voiced;
+
+ Wo_index = unpack_natural_or_gray(bits, &nbit, 5, c2->gray);
+ model[3].Wo = decode_log_Wo(Wo_index, 5);
+ model[3].L = PI/model[3].Wo;
+
+ e_index = unpack_natural_or_gray(bits, &nbit, 3, c2->gray);
+ e[3] = decode_energy(e_index, 3);
+
+ for(i=0; i<3; i++) {
+ indexes[i] = unpack_natural_or_gray(bits, &nbit, lspmelvq_cb_bits(i), c2->gray);
+ }
+
+ lspmelvq_decode(indexes, mel, LPC_ORD_LOW);
+
+ #define MEL_ROUND 10
+ for(i=1; i<LPC_ORD_LOW; i++) {
+ if (mel[i] <= mel[i-1]+MEL_ROUND) {
+ mel[i]+=MEL_ROUND/2;
+ mel[i-1]-=MEL_ROUND/2;
+ i = 1;
+ }
+ }
+
+ for(i=0; i<LPC_ORD_LOW; i++) {
+ f_ = 700.0*( pow(10.0, (float)mel[i]/2595.0) - 1.0);
+ lsps[3][i] = f_*(PI/4000.0);
+ //printf("lsps[3][%d] %f\n", i, lsps[3][i]);
+ }
+
+ /* interpolate ------------------------------------------------*/
+
+ /* LSPs, Wo, and energy are sampled every 40ms so we interpolate
+ the 3 frames in between, then recover spectral amplitudes */
+
+ for(i=0, weight=0.25; i<3; i++, weight += 0.25) {
+ interpolate_lsp_ver2(&lsps[i][0], c2->prev_lsps_dec, &lsps[3][0], weight, LPC_ORD_LOW);
+ interp_Wo2(&model[i], &c2->prev_model_dec, &model[3], weight);
+ e[i] = interp_energy2(c2->prev_e_dec, e[3],weight);
+ }
+ for(i=0; i<4; i++) {
+ lsp_to_lpc(&lsps[i][0], &ak[i][0], LPC_ORD_LOW);
+ aks_to_M2(c2->fft_fwd_cfg, &ak[i][0], LPC_ORD_LOW, &model[i], e[i], &snr, 0, 0,
+ c2->lpc_pf, c2->bass_boost, c2->beta, c2->gamma, Aw);
+ apply_lpc_correction(&model[i]);
+ synthesise_one_frame(c2, &speech[N*i], &model[i], Aw);
+ }
+
+ #ifdef DUMP
+ dump_lsp_(&lsps[3][0]);
+ dump_ak_(&ak[3][0], LPC_ORD_LOW);
+ dump_model(&model[3]);
+ if (c2->softdec)
+ dump_softdec(c2->softdec, nbit);
+ #endif
+
+ /* update memories for next frame ----------------------------*/
+
+ c2->prev_model_dec = model[3];
+ c2->prev_e_dec = e[3];
+ for(i=0; i<LPC_ORD_LOW; i++)
+ c2->prev_lsps_dec[i] = lsps[3][i];
+}
+
+
/*---------------------------------------------------------------------------*\
FUNCTION....: synthesise_one_frame()
case CODEC2_MODE_700:
return 26; // bits 26 and 27 are spare
break;
+ case CODEC2_MODE_700B:
+ return 26; // bits 26 and 27 are spare
+ break;
}
return -1;