nin = next_nin;
 
-  % nin = M;  % when debugging good idea to uncomment this to "open loop"
+  %nin = M;  % when debugging good idea to uncomment this to "open loop"
 
   channel = [channel real(tx_fdm)];
   channel_count += M;
     rx_fdm_fcorr(i) = rx_fdm(i)*foff_phase_rect;
   end
 
-  rx_baseband = fdm_downconvert(rx_fdm_fcorr, nin);
+if 1
+  % more memory efficient but more complex
+  rx_filt = down_convert_and_rx_filter(rx_fdm_fcorr, nin);
+else
+  rx_baseband = fdm_downconvert(rx_fdm_corr,nin);
   rx_baseband_log = [rx_baseband_log rx_baseband];
-
   rx_filt = rx_filter(rx_baseband, nin);
+end
+
   rx_filt_log = [rx_filt_log rx_filt];
 
   [rx_symbols rx_timing env] = rx_est_timing(rx_filt, nin);
 plot_sig_and_error(10, 212, sync_log, sync_log - sync_log_c, 'Sync & Freq Est Coarse(0) Fine(1)', [1 frames -1.5 1.5] )
 
 c=1;
+if 0
 plot_sig_and_error(11, 211, real(rx_baseband_log(c,:)), real(rx_baseband_log(c,:) - rx_baseband_log_c(c,:)), 'Rx baseband real' )
 plot_sig_and_error(11, 212, imag(rx_baseband_log(c,:)), imag(rx_baseband_log(c,:) - rx_baseband_log_c(c,:)), 'Rx baseband imag' )
+end
 
 plot_sig_and_error(12, 211, real(rx_filt_log(c,:)), real(rx_filt_log(c,:) - rx_filt_log_c(c,:)), 'Rx filt real' )
 plot_sig_and_error(12, 212, imag(rx_filt_log(c,:)), imag(rx_filt_log(c,:) - rx_filt_log_c(c,:)), 'Rx filt imag' )
 check(foff_coarse_log, foff_coarse_log_c, 'foff_coarse');
 check(foff_fine_log, foff_fine_log_c, 'foff_fine');
 check(foff_log, foff_log_c, 'foff');
-check(rx_baseband_log, rx_baseband_log_c, 'rx baseband');
+%check(rx_baseband_log, rx_baseband_log_c, 'rx baseband');
 check(rx_filt_log, rx_filt_log_c, 'rx filt');
 check(env_log, env_log_c, 'env');
 check(rx_timing_log, rx_timing_log_c, 'rx_timing');
 
     float  clock_offset;               /* Estimated tx/rx sample clock offset in ppm         */
 };
 
-struct FDMDV * CODEC2_WIN32SUPPORT fdmdv_create(int Nc);
-void           CODEC2_WIN32SUPPORT fdmdv_destroy(struct FDMDV *fdmdv_state);
-void           CODEC2_WIN32SUPPORT fdmdv_use_old_qpsk_mapping(struct FDMDV *fdmdv_state);
-int            CODEC2_WIN32SUPPORT fdmdv_bits_per_frame(struct FDMDV *fdmdv_state);
-float          CODEC2_WIN32SUPPORT fdmdv_get_fsep(struct FDMDV *fdmdv_state);
-void           CODEC2_WIN32SUPPORT fdmdv_set_fsep(struct FDMDV *fdmdv_state, float fsep);
-
-void           CODEC2_WIN32SUPPORT fdmdv_mod(struct FDMDV *fdmdv_state, COMP tx_fdm[], int tx_bits[], int *sync_bit);
-void           CODEC2_WIN32SUPPORT fdmdv_demod(struct FDMDV *fdmdv_state, int rx_bits[], int *reliable_sync_bit, COMP rx_fdm[], int *nin);
+struct FDMDV * fdmdv_create(int Nc);
+void           fdmdv_destroy(struct FDMDV *fdmdv_state);
+void           fdmdv_use_old_qpsk_mapping(struct FDMDV *fdmdv_state);
+int            fdmdv_bits_per_frame(struct FDMDV *fdmdv_state);
+float          fdmdv_get_fsep(struct FDMDV *fdmdv_state);
+void           fdmdv_set_fsep(struct FDMDV *fdmdv_state, float fsep);
+
+void           fdmdv_mod(struct FDMDV *fdmdv_state, COMP tx_fdm[], int tx_bits[], int *sync_bit);
+void           fdmdv_demod(struct FDMDV *fdmdv_state, int rx_bits[], int *reliable_sync_bit, COMP rx_fdm[], int *nin);
     
-void           CODEC2_WIN32SUPPORT fdmdv_get_test_bits(struct FDMDV *fdmdv_state, int tx_bits[]);
-int            CODEC2_WIN32SUPPORT fdmdv_error_pattern_size(struct FDMDV *fdmdv_state);
-void           CODEC2_WIN32SUPPORT fdmdv_put_test_bits(struct FDMDV *f, int *sync, short error_pattern[], int *bit_errors, int *ntest_bits, int rx_bits[]);
+void           fdmdv_get_test_bits(struct FDMDV *fdmdv_state, int tx_bits[]);
+int            fdmdv_error_pattern_size(struct FDMDV *fdmdv_state);
+void           fdmdv_put_test_bits(struct FDMDV *f, int *sync, short error_pattern[], int *bit_errors, int *ntest_bits, int rx_bits[]);
     
-void           CODEC2_WIN32SUPPORT fdmdv_get_demod_stats(struct FDMDV *fdmdv_state, struct FDMDV_STATS *fdmdv_stats);
-void           CODEC2_WIN32SUPPORT fdmdv_get_rx_spectrum(struct FDMDV *fdmdv_state, float mag_dB[], COMP rx_fdm[], int nin);
+void           fdmdv_get_demod_stats(struct FDMDV *fdmdv_state, struct FDMDV_STATS *fdmdv_stats);
+void           fdmdv_get_rx_spectrum(struct FDMDV *fdmdv_state, float mag_dB[], COMP rx_fdm[], int nin);
 
-void           CODEC2_WIN32SUPPORT fdmdv_8_to_48(float out48k[], float in8k[], int n);
-void           CODEC2_WIN32SUPPORT fdmdv_48_to_8(float out8k[], float in48k[], int n);
+void           fdmdv_8_to_48(float out48k[], float in8k[], int n);
+void           fdmdv_48_to_8(float out8k[], float in48k[], int n);
 
-void           CODEC2_WIN32SUPPORT fdmdv_freq_shift(COMP rx_fdm_fcorr[], COMP rx_fdm[], float foff, COMP *foff_phase_rect, int nin);
+void           fdmdv_freq_shift(COMP rx_fdm_fcorr[], COMP rx_fdm[], float foff, COMP *foff_phase_rect, int nin);
 
 /* debug/development function(s) */
 
-void CODEC2_WIN32SUPPORT fdmdv_dump_osc_mags(struct FDMDV *f);
+void fdmdv_dump_osc_mags(struct FDMDV *f);
  
 #ifdef __cplusplus
 }
 
 
 \*---------------------------------------------------------------------------*/
 
-struct FDMDV * CODEC2_WIN32SUPPORT fdmdv_create(int Nc)
+struct FDMDV * fdmdv_create(int Nc)
 {
     struct FDMDV *f;
     int           c, i, k;
            f->tx_filter_memory[c][k].imag = 0.0;
        }
 
-       for(k=0; k<NFILTER; k++) {
-           f->rx_filter_memory[c][k].real = 0.0;
-           f->rx_filter_memory[c][k].imag = 0.0;
-       }
-
        /* Spread initial FDM carrier phase out as far as possible.
            This helped PAPR for a few dB.  We don't need to adjust rx
            phase as DQPSK takes care of that. */
     fdmdv_set_fsep(f, FSEP);
     f->freq[Nc].real = cos(2.0*PI*FDMDV_FCENTRE/FS);
     f->freq[Nc].imag = sin(2.0*PI*FDMDV_FCENTRE/FS);
+    f->freq_pol[Nc]  = 2.0*PI*FDMDV_FCENTRE/FS;
 
     /* Generate DBPSK pilot Look Up Table (LUT) */
 
     f->foff_phase_rect.real = 1.0;
     f->foff_phase_rect.imag = 0.0;
 
+    for(i=0; i<NFILTER+M; i++) {
+        f->rx_fdm_mem[i].real = 0.0; 
+        f->rx_fdm_mem[i].imag = 0.0; 
+    }
+
     f->fest_state = 0;
     f->sync = 0;
     f->timer = 0;
 
 \*---------------------------------------------------------------------------*/
 
-void CODEC2_WIN32SUPPORT fdmdv_destroy(struct FDMDV *fdmdv)
+void fdmdv_destroy(struct FDMDV *fdmdv)
 {
     assert(fdmdv != NULL);
     KISS_FFT_FREE(fdmdv->fft_pilot_cfg);
 }
 
 
-void CODEC2_WIN32SUPPORT fdmdv_use_old_qpsk_mapping(struct FDMDV *fdmdv) {
+void fdmdv_use_old_qpsk_mapping(struct FDMDV *fdmdv) {
     fdmdv->old_qpsk_mapping = 1;  
 }
 
 
-int CODEC2_WIN32SUPPORT fdmdv_bits_per_frame(struct FDMDV *fdmdv)
+int fdmdv_bits_per_frame(struct FDMDV *fdmdv)
 {
     return (fdmdv->Nc * NB);
 }
 
 \*---------------------------------------------------------------------------*/
 
-void CODEC2_WIN32SUPPORT fdmdv_get_test_bits(struct FDMDV *f, int tx_bits[])
+void fdmdv_get_test_bits(struct FDMDV *f, int tx_bits[])
 {
     int i;
     int bits_per_frame = fdmdv_bits_per_frame(f);
     }
  }
 
-float CODEC2_WIN32SUPPORT fdmdv_get_fsep(struct FDMDV *f)
+float fdmdv_get_fsep(struct FDMDV *f)
 {
     return f->fsep;
 }
 
-void CODEC2_WIN32SUPPORT fdmdv_set_fsep(struct FDMDV *f, float fsep) {
+void fdmdv_set_fsep(struct FDMDV *f, float fsep) {
     int   c;
     float carrier_freq;
 
        carrier_freq = (-f->Nc/2 + c)*f->fsep + FDMDV_FCENTRE;
        f->freq[c].real = cos(2.0*PI*carrier_freq/FS);
        f->freq[c].imag = sin(2.0*PI*carrier_freq/FS);
+       f->freq_pol[c]  = 2.0*PI*carrier_freq/FS;
     }
 
     for(c=f->Nc/2; c<f->Nc; c++) {
        carrier_freq = (-f->Nc/2 + c + 1)*f->fsep + FDMDV_FCENTRE;
        f->freq[c].real = cos(2.0*PI*carrier_freq/FS);
        f->freq[c].imag = sin(2.0*PI*carrier_freq/FS);
+       f->freq_pol[c]  = 2.0*PI*carrier_freq/FS;
     }
 }
 
 
 \*---------------------------------------------------------------------------*/
 
-void CODEC2_WIN32SUPPORT fdmdv_mod(struct FDMDV *fdmdv, COMP tx_fdm[], 
-                                  int tx_bits[], int *sync_bit)
+void fdmdv_mod(struct FDMDV *fdmdv, COMP tx_fdm[], int tx_bits[], int *sync_bit)
 {
     COMP          tx_symbols[NC+1];
     COMP          tx_baseband[NC+1][M];
 
 \*---------------------------------------------------------------------------*/
 
-void CODEC2_WIN32SUPPORT fdmdv_freq_shift(COMP rx_fdm_fcorr[], COMP rx_fdm[], float foff, 
-                                          COMP *foff_phase_rect, int nin)
+void fdmdv_freq_shift(COMP rx_fdm_fcorr[], COMP rx_fdm[], float foff, 
+                      COMP *foff_phase_rect, int nin)
 {
     COMP  foff_rect;
     float mag;
     assert(j <= (P+1)); /* check for any over runs */
 }
 
+/*---------------------------------------------------------------------------*\
+                                                       
+  FUNCTION....: down_convert_and_rx_filter()        
+  AUTHOR......: David Rowe                           
+  DATE CREATED: 30/6/2014
+
+  Combined down convert and rx filter, more memory efficient but less
+  intuitive design.  
+
+  Depending on the number of input samples to the demod nin, we
+  produce P-1, P (usually), or P+1 filtered samples at rate P.  nin is
+  occasionally adjusted to compensate for timing slips due to
+  different tx and rx sample clocks.
+
+\*---------------------------------------------------------------------------*/
+
+/*
+TODO:
+  [ ] windback phase at init time, incl cconj
+*/
+
+void down_convert_and_rx_filter(COMP rx_filt[NC+1][P+1], int Nc, COMP rx_fdm[], 
+                                COMP rx_fdm_mem[], COMP phase_rx[], COMP freq[], 
+                                float freq_pol[], int nin)
+{
+    int i,k,m,c,st,N;
+    float windback_phase, mag;
+    COMP  windback_phase_rect;
+    COMP  rx_baseband[NFILTER+M];
+
+    /* update memory of rx_fdm */
+
+    for(i=0; i<NFILTER+M-nin; i++)
+        rx_fdm_mem[i] = rx_fdm_mem[i+nin];
+    for(i=NFILTER+M-nin, k=0; i<NFILTER+M; i++, k++)
+        rx_fdm_mem[i] = rx_fdm[k];
+
+    for(c=0; c<Nc+1; c++) {
+
+        /*
+          now downconvert using current freq offset to get Nfilter+nin
+          baseband samples.
+     
+                     Nfilter             nin
+          |--------------------------|---------|
+                                      | 
+                                  phase_rx(c)
+     
+          This means winding phase(c) back from this point to ensure
+          phase continuity.
+        */
+
+        windback_phase           = -freq_pol[c]*NFILTER;
+        windback_phase_rect.real = cos(windback_phase);
+        windback_phase_rect.imag = sin(windback_phase);
+        phase_rx[c]              = cmult(phase_rx[c],windback_phase_rect);
+    
+        /* down convert all samples in buffer */
+
+        st  = NFILTER+M-1;    /* end of buffer                  */
+        st -= nin-1;          /* first new sample               */
+        st -= NFILTER;        /* first sample used in filtering */
+        
+        for(i=st; i<NFILTER+M; i++) {
+            phase_rx[c]    = cmult(phase_rx[c], freq[c]);
+            rx_baseband[i] = cmult(rx_fdm_mem[i],cconj(phase_rx[c]));
+        }
+ 
+        /* now we can filter this carrier's P symbols */
+
+        N=M/P;
+        for(i=0, k=0; i<nin; i+=N, k++) {
+ 
+           rx_filt[c][k].real = 0.0; rx_filt[c][k].imag = 0.0;
+            
+            for(m=0; m<NFILTER; m++) 
+                rx_filt[c][k] = cadd(rx_filt[c][k], fcmult(gt_alpha5_root[m], rx_baseband[st+i+m]));
+        }
+
+        /* normalise digital oscilators as the magnitude can drift over time */
+
+        mag = cabsolute(phase_rx[c]);
+       phase_rx[c].real /= mag;          
+       phase_rx[c].imag /= mag;
+         
+       //printf("phase_rx[%d] = %f %f\n", c, phase_rx[c].real, phase_rx[c].imag);
+    }
+}
+
 /*---------------------------------------------------------------------------*\
                                                        
   FUNCTION....: rx_est_timing()             
 
 // returns number of shorts in error_pattern[], one short per error
 
-int CODEC2_WIN32SUPPORT fdmdv_error_pattern_size(struct FDMDV *f) {
+int fdmdv_error_pattern_size(struct FDMDV *f) {
     return f->ntest_bits;
 }
 
 
 \*---------------------------------------------------------------------------*/
 
-void CODEC2_WIN32SUPPORT fdmdv_put_test_bits(struct FDMDV *f, int *sync, short error_pattern[],
-                                            int *bit_errors, int *ntest_bits, 
-                                            int rx_bits[])
+void fdmdv_put_test_bits(struct FDMDV *f, int *sync, short error_pattern[],
+                        int *bit_errors, int *ntest_bits, int rx_bits[])
 {
     int   i,j;
     float ber;
 
 \*---------------------------------------------------------------------------*/
 
-void CODEC2_WIN32SUPPORT fdmdv_demod(struct FDMDV *fdmdv, int rx_bits[], 
-                                    int *reliable_sync_bit, COMP rx_fdm[], int *nin)
+void fdmdv_demod(struct FDMDV *fdmdv, int rx_bits[], 
+                int *reliable_sync_bit, COMP rx_fdm[], int *nin)
 {
     float         foff_coarse, foff_fine;
     COMP          rx_fdm_fcorr[M+M/P];
-    COMP          rx_baseband[NC+1][M+M/P];
     COMP          rx_filt[NC+1][P+1];
     COMP          rx_symbols[NC+1];
     float         env[NT*P];
        
     /* baseband processing */
 
-    fdm_downconvert(rx_baseband, fdmdv->Nc, rx_fdm_fcorr, fdmdv->phase_rx, fdmdv->freq, *nin);
-    rx_filter(rx_filt, fdmdv->Nc, rx_baseband, fdmdv->rx_filter_memory, *nin);
+    down_convert_and_rx_filter(rx_filt, fdmdv->Nc, rx_fdm, fdmdv->rx_fdm_mem, fdmdv->phase_rx, fdmdv->freq, 
+                               fdmdv->freq_pol, *nin);
     fdmdv->rx_timing = rx_est_timing(rx_symbols, fdmdv->Nc, rx_filt, fdmdv->rx_filter_mem_timing, env, *nin);   
     
     /* Adjust number of input samples to keep timing within bounds */
 
 \*---------------------------------------------------------------------------*/
 
-void CODEC2_WIN32SUPPORT fdmdv_get_demod_stats(struct FDMDV *fdmdv, 
-                                              struct FDMDV_STATS *fdmdv_stats)
+void fdmdv_get_demod_stats(struct FDMDV *fdmdv, struct FDMDV_STATS *fdmdv_stats)
 {
     int   c;
 
 
 \*---------------------------------------------------------------------------*/
 
-void CODEC2_WIN32SUPPORT fdmdv_8_to_48(float out48k[], float in8k[], int n)
+void fdmdv_8_to_48(float out48k[], float in8k[], int n)
 {
     int i,j,k,l;
 
 
 \*---------------------------------------------------------------------------*/
 
-void CODEC2_WIN32SUPPORT fdmdv_48_to_8(float out8k[], float in48k[], int n)
+void fdmdv_48_to_8(float out8k[], float in48k[], int n)
 {
     int i,j;
 
 
 \*---------------------------------------------------------------------------*/
 
-void CODEC2_WIN32SUPPORT fdmdv_get_rx_spectrum(struct FDMDV *f, float mag_spec_dB[], 
+void fdmdv_get_rx_spectrum(struct FDMDV *f, float mag_spec_dB[], 
                                               COMP rx_fdm[], int nin) 
 {
     int   i,j;
 
 \*---------------------------------------------------------------------------*/
 
-void CODEC2_WIN32SUPPORT fdmdv_dump_osc_mags(struct FDMDV *f) 
+void fdmdv_dump_osc_mags(struct FDMDV *f) 
 {
     int   i;
 
 
 
 #define NT                       5  /* number of symbols we estimate timing over                            */
 #define P                        4  /* oversample factor used for initial rx symbol filtering               */
-#define NFILTERTIMING (M+NFILTER+M) /* filter memory used for resampling after timing estimation            */
 
 #define NPILOT_LUT                 (4*M)    /* number of pilot look up table samples                 */
 #define NPILOTCOEFF                   30    /* number of FIR filter coeffs in LP filter              */
 
     /* Modulator */
 
-    int  old_qpsk_mapping;
-    int  tx_pilot_bit;
-    COMP prev_tx_symbols[NC+1];
-    COMP tx_filter_memory[NC+1][NSYM];
-    COMP phase_tx[NC+1];
-    COMP freq[NC+1];
- 
+    int   old_qpsk_mapping;
+    int   tx_pilot_bit;
+    COMP  prev_tx_symbols[NC+1];
+    COMP  tx_filter_memory[NC+1][NSYM];
+    COMP  phase_tx[NC+1];
+    COMP  freq[NC+1];
+    float freq_pol[NC+1];
+  
     /* Pilot generation at demodulator */
 
     COMP pilot_lut[NPILOT_LUT];
     
     /* Demodulator */
 
+    COMP  rx_fdm_mem[NFILTER+M]; 
     COMP  phase_rx[NC+1];
-    COMP  rx_filter_memory[NC+1][NFILTER];
     COMP  rx_filter_mem_timing[NC+1][NT*P];
-    COMP  rx_baseband_mem_timing[NC+1][NFILTERTIMING];
     float rx_timing;
     COMP  phase_difference[NC+1];
     COMP  prev_rx_symbols[NC+1];
 
     float fft_buf[2*FDMDV_NSPEC];
     kiss_fft_cfg fft_cfg;             
- };
+};
 
 /*---------------------------------------------------------------------------*\
                                                                              
 void lpf_peak_pick(float *foff, float *max, COMP pilot_baseband[], COMP pilot_lpf[], kiss_fft_cfg fft_pilot_cfg, COMP S[], int nin);
 void fdm_downconvert(COMP rx_baseband[NC+1][M+M/P], int Nc, COMP rx_fdm[], COMP phase_rx[], COMP freq[], int nin);
 void rx_filter(COMP rx_filt[NC+1][P+1], int Nc, COMP rx_baseband[NC+1][M+M/P], COMP rx_filter_memory[NC+1][NFILTER], int nin);
+void down_convert_and_rx_filter(COMP rx_filt[NC+1][P+1], int Nc, COMP rx_fdm[], 
+                                COMP rx_fdm_mem[], COMP phase_rx[], COMP freq[], 
+                                float freq_pol[], int nin);
 float rx_est_timing(COMP  rx_symbols[], int Nc, 
-                  COMP  rx_filt[NC+1][P+1], 
-                  COMP  rx_baseband[NC+1][M+M/P], 
-                  COMP  rx_filter_mem_timing[NC+1][NT*P], 
-                  float env[],
-                  COMP  rx_baseband_mem_timing[NC+1][NFILTERTIMING], 
-                  int   nin);   
+                   COMP  rx_filt[NC+1][P+1], 
+                   COMP  rx_filter_mem_timing[NC+1][NT*P], 
+                   float env[],
+                   int   nin);  
 float qpsk_to_bits(int rx_bits[], int *sync_bit, int Nc, COMP phase_difference[], COMP prev_rx_symbols[], COMP rx_symbols[], int old_qpsk_mapping);
 void snr_update(float sig_est[], float noise_est[], int Nc, COMP phase_difference[]);
 int freq_state(int *reliable_sync_bit, int sync_bit, int *state, int *timer, int *sync_mem);