diff --git a/misc/CMakeLists.txt b/misc/CMakeLists.txt
index 4db39a80f..55728b427 100644
--- a/misc/CMakeLists.txt
+++ b/misc/CMakeLists.txt
@@ -28,5 +28,8 @@ target_link_libraries(timpulse m)
 add_executable(vq_mbest vq_mbest.c)
 target_link_libraries(vq_mbest codec2)
 
+add_executable(vq_binary_switch vq_binary_switch.c)
+target_link_libraries(vq_binary_switch m)
+
 add_executable(pre pre.c)
 target_link_libraries(pre codec2)
diff --git a/misc/vq_binary_switch.c b/misc/vq_binary_switch.c
new file mode 100644
index 000000000..e61aa0996
--- /dev/null
+++ b/misc/vq_binary_switch.c
@@ -0,0 +1,296 @@
+/*
+  vq_binary_switch.c
+  David Rowe Dec 2021
+
+  C implementation of [1], that re-arranges VQ indexes so they are robust to single
+  bit errors.
+
+  [1] Psuedo Gray Coding, Zeger & Gersho 1990 
+*/
+
+#include <assert.h>
+#include <getopt.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include "mbest.h"
+
+#define MAX_DIM     20
+#define MAX_ENTRIES 4096
+
+// equation (33) of [1], total cost of all hamming distance 1 vectors of vq index k
+float cost_of_distance_one(float *vq, int n, int dim, float *prob, int k, int st, int en, int verbose) {
+  int log2N = log2(n);
+  float c = 0.0;
+  for (int b=0; b<log2N; b++) {
+    unsigned int index_neighbour = k ^ (1<<b);
+    float dist = 0.0;
+    for(int i=st; i<=en; i++)
+      dist += pow(vq[k*dim+i] - vq[index_neighbour*dim+i], 2.0);
+    c += prob[k]*dist;
+    if (verbose)
+      printf("k: %d b: %d index_neighbour: %d dist: %f prob: %f c: %f \n", k, b, index_neighbour, dist, prob[k], c);
+  }
+  return c;
+}
+
+// equation (39) of [1]
+float distortion_of_current_mapping(float *vq, int n, int dim, float *prob, int st, int en) {
+  float d = 0.0;
+  for(int k=0; k<n; k++)
+    d += cost_of_distance_one(vq, n, dim, prob, k, st, en, 0);
+  return d;
+}
+ 
+// we sort the cost array c[], returning the indexes of sorted elements
+float c[MAX_ENTRIES];
+
+/* Note how the compare function compares the values of the
+ * array to be sorted. The passed value to this function
+ * by `qsort' are actually the `idx' array elements.
+ */
+int compare_increase (const void * a, const void * b) {
+    int aa = *((int *) a), bb = *((int *) b);
+    if (c[aa] < c[bb]) {
+        return 1;
+    } else if (c[aa] == c[bb]) {
+        return 0;
+    } else {
+        return -1;
+     }
+}
+
+void sort_c(int *idx, const size_t n) {
+    for (size_t i=0; i<n; i++) idx[i] = i;
+    qsort(idx, n, sizeof(int), compare_increase);
+}
+
+void swap(float *vq, int dim, float *prob, int index1, int index2) {
+  float tmp[dim];
+  for(int i=0; i<dim; i++) tmp[i] = vq[index1*dim+i];
+  for(int i=0; i<dim; i++) vq[index1*dim+i] = vq[index2*dim+i];
+  for(int i=0; i<dim; i++) vq[index2*dim+i] = tmp[i];
+
+  tmp[0] = prob[index1];
+  prob[index1] = prob[index2];
+  prob[index2] = tmp[0];
+}
+
+int main(int argc, char *argv[]) {
+    float vq[MAX_DIM*MAX_ENTRIES];
+    int   dim = MAX_DIM;
+    int   max_iter = INT_MAX;
+    int   st = -1;
+    int   en = -1;
+    int   verbose = 0;
+    int   n = 0;
+    int   fast_en = 0;
+    char  prob_fn[80]="";
+    
+    int o = 0; int opt_idx = 0;
+    while (o != -1) {
+       static struct option long_opts[] = {
+           {"prob",    required_argument, 0, 'p'},
+           {"st",      required_argument, 0, 't'},
+           {"en",      required_argument, 0, 'e'},
+	   {0, 0, 0, 0}
+        };
+        o = getopt_long(argc,argv,"hd:m:vt:e:n:fp:",long_opts,&opt_idx);
+        switch (o) {
+	case 'd':
+	    dim = atoi(optarg);
+	    assert(dim <= MAX_DIM);
+	    break; 
+        case 'm':
+            max_iter = atoi(optarg);
+            break;
+        case 't':
+            st = atoi(optarg);
+            break;
+        case 'e':
+            en = atoi(optarg);
+            break;
+        case 'f':
+	    fast_en = 1;
+            break;
+        case 'n':
+            n = atoi(optarg);
+            break;
+        case 'p':
+	    strcpy(prob_fn,optarg);
+            break;	    
+        case 'v':
+            verbose = 1;
+            break;
+	help:
+	    fprintf(stderr, "\n");
+            fprintf(stderr, "usage: %s -d dimension [-m max_iterations -v --st Kst --en Ken -n nVQ] vq_in.f32 vq_out.f32\n", argv[0]);
+	    fprintf(stderr, "\n");
+            fprintf(stderr, "-n nVQ           Run with just the first nVQ entries of the VQ\n");
+            fprintf(stderr, "--st Kst         Start vector element for error calculation (default 0)\n");
+            fprintf(stderr, "--en Ken         End vector element for error calculation (default K-1)\n");
+            fprintf(stderr, "--prob probFile  f32 file of probabilities for each VQ element (default 1.0)\n");
+            fprintf(stderr, "-v               verbose\n");
+            exit(1);
+        }
+    }
+
+    int dx = optind;
+    if ((argc - dx) < 2) {
+        fprintf(stderr, "Too few arguments\n");
+	goto help;
+    }
+    if (dim == 0) goto help;
+
+    /* default to measuring error on entire vector */
+    if (st == -1) st = 0; 
+    if (en == -1) en = dim-1;
+
+    /* load VQ quantiser file --------------------*/
+
+    fprintf(stderr, "loading %s ... ", argv[dx]);
+    FILE *fq=fopen(argv[dx], "rb");
+    if (fq == NULL) {
+      fprintf(stderr, "Couldn't open: %s\n", argv[dx]);
+      exit(1);
+    }
+
+    if (n==0) {
+      /* count how many entries m of dimension k are in this VQ file */
+      float dummy[dim];
+      while (fread(dummy, sizeof(float), dim, fq) == (size_t)dim)
+	n++;
+      assert(n <= MAX_ENTRIES);
+      fprintf(stderr, "%d entries of vectors width %d\n", n, dim);
+
+      rewind(fq);
+    }
+
+    /* load VQ into memory */
+    int nrd = fread(vq, sizeof(float), n*dim, fq);
+    assert(nrd == n*dim);
+    fclose(fq);
+   
+    /* set probability of each vector to 1.0 as default */
+    float prob[n];
+    for(int l=0; l<n; l++) prob[l] = 1.0;
+    if (strlen(prob_fn)) {
+      fprintf(stderr, "Reading probability file: %s\n", prob_fn);
+      FILE *fp = fopen(prob_fn,"rb");
+      assert(fp != NULL);
+      int nrd = fread(prob, sizeof(float), n, fp);
+      assert(nrd == n);
+      fclose(fp);
+      float sum = 0.0;
+      for(int l=0; l<n; l++) sum += prob[l];
+      fprintf(stderr, "sum = %f\n", sum);
+    }
+    
+    int iteration = 0;
+    int i = 0;
+    int finished = 0;
+    int switches = 0;
+    int log2N = log2(n);
+    float distortion0 = distortion_of_current_mapping(vq, n, dim, prob, st, en);
+    fprintf(stderr, "distortion0: %f\n", distortion0);
+
+    while(!finished) {
+
+      // generate a list A(i) of which vectors have the largest cost of bit errors
+      for(int k=0; k<n; k++) {
+	c[k] = cost_of_distance_one(vq, n, dim, prob, k, st, en, verbose);
+      }      
+      int A[n];
+      sort_c(A, n);
+      
+      // Try switching each vector with A(i)
+      float best_delta = 0; int best_j = 0;
+      for(int j=1; j<n; j++) {
+	float distortion1, distortion2, delta = 0.0;
+	
+	// we can't switch with ourself
+	if (j != A[i]) {
+	  if (fast_en) {
+	    // subtract just those contributions to delta that will change
+	    delta -= cost_of_distance_one(vq, n, dim, prob, A[i], st, en, verbose);
+	    delta -= cost_of_distance_one(vq, n, dim, prob, j, st, en, verbose);
+	    for (int b=0; b<log2N; b++) {
+	      unsigned int index_neighbour;
+	      index_neighbour = A[i] ^ (1<<b);
+	      if ((index_neighbour != j) && (index_neighbour != A[i]))
+		   delta -= cost_of_distance_one(vq, n, dim, prob, index_neighbour, st, en, verbose);
+	      index_neighbour = j ^ (1<<b);
+	      if ((index_neighbour != j) && (index_neighbour != A[i]))
+		   delta -= cost_of_distance_one(vq, n, dim, prob, index_neighbour, st, en, verbose);
+	    }
+	  }
+	  else
+	    distortion1 = distortion_of_current_mapping(vq, n, dim, prob, st, en);
+
+	  // switch vq entries A(i) and j
+	  swap(vq, dim, prob, A[i], j);
+
+	  if (fast_en) {
+	    // add just those contributions to delta that will change
+	    delta += cost_of_distance_one(vq, n, dim, prob, A[i], st, en, verbose);
+	    delta += cost_of_distance_one(vq, n, dim, prob, j, st, en, verbose);
+	    for (int b=0; b<log2N; b++) {
+	      unsigned int index_neighbour;
+	      index_neighbour = A[i] ^ (1<<b);
+	      if ((index_neighbour != j) && (index_neighbour != A[i]))
+		   delta += cost_of_distance_one(vq, n, dim, prob, index_neighbour, st, en, verbose);
+	      index_neighbour = j ^ (1<<b);
+	      if ((index_neighbour != j) && (index_neighbour != A[i]))
+		   delta += cost_of_distance_one(vq, n, dim, prob, index_neighbour, st, en, verbose);
+	    }
+	  }
+	  else {
+	    distortion2 = distortion_of_current_mapping(vq, n, dim, prob, st, en);
+	    delta = distortion2 - distortion1;
+	  }
+
+	  if (delta < 0.0) {
+	    if (fabs(delta) > best_delta) {
+	      best_delta = fabs(delta);
+	      best_j = j;
+	    }
+	  }
+	  // unswitch
+	  swap(vq, dim, prob, A[i], j);
+	}
+      } //next j
+
+      // printf("best_delta: %f best_j: %d\n", best_delta, best_j);
+      if (best_delta == 0.0) {
+	// Hmm, no improvement, lets try the next vector in the sorted cost list
+	if (i == n-1) finished = 1; else i++;
+      } else {
+	// OK keep the switch that minimised the distortion
+	swap(vq, dim, prob, A[i], best_j);
+	switches++;
+
+	// save results
+	FILE *fq=fopen(argv[dx+1], "wb");
+	if (fq == NULL) {
+	    fprintf(stderr, "Couldn't open: %s\n", argv[dx+1]);
+	    exit(1);
+	}
+	int nwr = fwrite(vq, sizeof(float), n*dim, fq);
+	assert(nwr == n*dim);
+	fclose(fq);
+	
+	// set up for next iteration
+	iteration++;
+	float distortion = distortion_of_current_mapping(vq, n, dim, prob, st, en);
+	fprintf(stderr, "it: %3d dist: %f %3.2f i: %3d sw: %3d\n", iteration, distortion,
+	                distortion/distortion0, i, switches);
+	if (iteration >= max_iter) finished = 1;
+	i = 0;
+      }
+    }
+    
+    return 0;
+}
+
diff --git a/misc/vq_mbest.c b/misc/vq_mbest.c
index bef2e19d2..5df2440ab 100644
--- a/misc/vq_mbest.c
+++ b/misc/vq_mbest.c
@@ -39,22 +39,24 @@ int main(int argc, char *argv[]) {
     int   st = -1;
     int   en = -1;
     int   num = INT_MAX;
+    int   output_vec_usage = 0;
     
     int o = 0; int opt_idx = 0;
     while (o != -1) {
        static struct option long_opts[] = {
-	   {"k",       required_argument, 0, 'k'},
-	   {"quant",   required_argument, 0, 'q'},
-	   {"mbest",   required_argument, 0, 'm'},
-	   {"lower",   required_argument, 0, 'l'},
-	   {"verbose", required_argument, 0, 'v'},
-           {"st",      required_argument, 0, 't'},
-           {"en",      required_argument, 0, 'e'},
-           {"num",     required_argument, 0, 'n'},
+	   {"k",         required_argument, 0, 'k'},
+	   {"quant",     required_argument, 0, 'q'},
+	   {"mbest",     required_argument, 0, 'm'},
+	   {"lower",     required_argument, 0, 'l'},
+	   {"verbose",   required_argument, 0, 'v'},
+           {"st",        required_argument, 0, 't'},
+           {"en",        required_argument, 0, 'e'},
+           {"num",       required_argument, 0, 'n'},
+           {"vec_usage", no_argument, 0, 'u'},
 	   {0, 0, 0, 0}
         };
 
-        o = getopt_long(argc,argv,"hk:q:m:vt:e:n:",long_opts,&opt_idx);
+        o = getopt_long(argc,argv,"hk:q:m:vt:e:n:u",long_opts,&opt_idx);
         switch (o) {
 	case 'k':
 	    k = atoi(optarg);
@@ -111,7 +113,10 @@ int main(int argc, char *argv[]) {
         case 'e':
             en = atoi(optarg);
             break;
-        case 'v':
+        case 'u':
+            output_vec_usage = 1;
+            break;
+	case 'v':
             verbose = 1;
             break;
 	help:
@@ -125,7 +130,8 @@ int main(int argc, char *argv[]) {
             fprintf(stderr, "--st    Kst              start vector element for error calculation (default 0)\n");
             fprintf(stderr, "--en    Ken              end vector element for error calculation (default K-1)\n");
 	    fprintf(stderr, "--num   numToProcess     number of vectors to quantise (default to EOF)\n");
-	    fprintf(stderr, "-v                       Verbose\n");
+	    fprintf(stderr, "--vec_usage              Output a record of how many times each vector is used\n");
+ 	    fprintf(stderr, "-v                       Verbose\n");
             exit(1);
         }
     }
@@ -137,7 +143,8 @@ int main(int argc, char *argv[]) {
     if (st == -1) st = 0; 
     if (en == -1) en = k-1;
 
-    int   indexes[num_stages], nvecs = 0;
+    int   indexes[num_stages], nvecs = 0; int vec_usage[m[0]];
+    for(int i=0; i<m[0]; i++) vec_usage[i] = 0;
     float target[k], quantised[k];
     float sqe = 0.0;
     while(fread(&target, sizeof(float), k, stdin) && (nvecs < num)) {
@@ -161,8 +168,17 @@ int main(int argc, char *argv[]) {
 	}
 	fwrite(&quantised, sizeof(float), k, stdout);
 	nvecs++;
+	// count number f time each vector is used (just for first stage)
+	vec_usage[indexes[0]]++;
     }
+
     fprintf(stderr, "%4.2f\n", sqe/(nvecs*(en-st+1)));
+
+    if (output_vec_usage) {
+      for(int i=0; i<m[0]; i++)
+	fprintf(stderr, "%d\n", vec_usage[i]);
+    }
+
     return 0;
 }
 
@@ -200,7 +216,6 @@ void quant_pred_mbest(float vec_out[],
         index[i] = 0;
     }
 
-    
     for(i=0; i<st; i++)
         w[i] = 0.0;
     for(i=st; i<=en; i++)
diff --git a/misc/vqtrain.c b/misc/vqtrain.c
index 9659ce412..7060f12b7 100644
--- a/misc/vqtrain.c
+++ b/misc/vqtrain.c
@@ -76,7 +76,7 @@ int main(int argc, char *argv[]) {
     long   J;		/* number of vectors in training set		*/
     long   i,j;
     long   ind;	     	/* index of current vector			*/
-    float  e;           /* sqaured error for current vector             */
+    float  e;           /* squared error for current vector             */
     float  se;		/* squared error for this iteration		*/
     float  var,var_1;	/* current and previous iterations distortion	*/
     float  delta;	/* improvement in distortion 			*/
@@ -131,12 +131,11 @@ int main(int argc, char *argv[]) {
     }
     int dx = optind;
 
-    //fprintf(stderr, "argc: %d dx: %d\n", argc, dx);
     if ((argc - dx) < 4) {
         fprintf(stderr, "Too few arguments\n");
     helpmsg:
         fprintf(stderr, "usage: %s [Options] TrainFile.f32 K(dimension) M(codebook size) VQFile.f32\n", argv[0]);
-        fprintf(stderr, "  -r --residual VQResidualErrorFile.f32usage\n");
+        fprintf(stderr, "  -r --residual VQResidualErrorFile.f32\n");
         fprintf(stderr, "  -s --stop StopDelta\n");
         fprintf(stderr, "     --st   Kst        start vector element for error calculation (default 0)\n");
         fprintf(stderr, "     --en   Ken        end vector element for error calculation (default K-1)\n");
@@ -164,7 +163,7 @@ int main(int argc, char *argv[]) {
     cb = (float*)malloc(sizeof(float)*k*m);
     cent = (float*)malloc(sizeof(float)*k*m);
     n = (long*)malloc(sizeof(long)*m);
-    if (cb == NULL || cb == NULL || cent == NULL || vec == NULL) {
+    if (vec == NULL || cb == NULL || cent == NULL || n == NULL) {
 	printf("Error in malloc.\n");
 	exit(1);
     }
@@ -187,7 +186,7 @@ int main(int argc, char *argv[]) {
         assert(ret == k);
         quantise(cb, vec, k, 1, st, en, &e, &se);
     }
-    var = se/(J*k);
+    var = se/(J*(en-st+1));
     printf("\r  It: 0, var: %f sd: %f\n", var, sqrt(var));
 
     /* set up initial codebook state from samples of training set */
@@ -222,13 +221,11 @@ int main(int argc, char *argv[]) {
 	    ind = quantise(cb, vec, k, m, st, en, &e, &se);
 	    n[ind]++;
 	    acc(&cent[ind*k], vec, k);
-            //if (i < 100)
-            //    printf("e: %f sqrt(e/k): %f sd: %f noutliers: %ld\n", e, sqrt(e/k), sd, noutliers[0]);
-            if (sqrt(e/k) > 1.0) noutliers[0]++;
-            if (sqrt(e/k) > 2.0) noutliers[1]++;
-            if (sqrt(e/k) > 3.0) noutliers[2]++;
+            if (sqrt(e/(en-st+1)) > 1.0) noutliers[0]++;
+            if (sqrt(e/(en-st+1)) > 2.0) noutliers[1]++;
+            if (sqrt(e/(en-st+1)) > 3.0) noutliers[2]++;
 	}
-	var = se/(J*k);
+	var = se/(J*(en-st+1));
 	delta = (var_1-var)/var;
         int n_min = J;
         int n_max = 0;
@@ -289,7 +286,7 @@ int main(int argc, char *argv[]) {
 
 \*-----------------------------------------------------------------------*/
 
-/*---------------------------------------------------------------------------*\
+/*-----------------------------------------------------------------------*\
 
 	FUNCTION....: zero()
 
@@ -298,7 +295,7 @@ int main(int argc, char *argv[]) {
 
 	Zeros a vector of length k.
 
-\*---------------------------------------------------------------------------*/
+\*-----------------------------------------------------------------------*/
 
 void zero(float v[], int k)
 /*  float  v[];		ptr to start of vector		*/
@@ -358,7 +355,6 @@ void norm(float v[], int k, long n)
 /*---------------------------------------------------------------------------*\
 
 	FUNCTION....: quantise()
-
 	AUTHOR......: David Rowe
 	DATE CREATED: 23/2/95
 
diff --git a/octave/ldpcut.m b/octave/ldpcut.m
index b05c803f8..e83642f51 100644
--- a/octave/ldpcut.m
+++ b/octave/ldpcut.m
@@ -71,7 +71,6 @@
 
     tx_bits = [];
     tx_symbols = [];
-    rx_symbols = [];
 
     % Encode a bunch of frames
 
diff --git a/octave/save_f32.m b/octave/save_f32.m
new file mode 100644
index 000000000..62214d47a
--- /dev/null
+++ b/octave/save_f32.m
@@ -0,0 +1,12 @@
+% save_f32.m
+% David Rowe Sep 2021
+%
+% save a matrix to .f32 binary files in row-major order
+
+function save_f32(fn, m)
+  f=fopen(fn,"wb");
+  [r c] = size(m);
+  mlinear = reshape(m', 1, r*c);
+  fwrite(f, mlinear, 'float32');
+  fclose(f);
+endfunction
diff --git a/octave/trellis.m b/octave/trellis.m
index a11c0bd56..501e728eb 100644
--- a/octave/trellis.m
+++ b/octave/trellis.m
@@ -397,16 +397,15 @@
 % Simulations ---------------------------------------------------------------------
 
 % top level function to set up and run a test
-function results = test_trellis(nframes=100, dec=1, ntxcw=8, nstages=3, EbNodB=3, verbose=0)
+function [results target_] = test_trellis(target_fn, nframes=100, dec=1, ntxcw=8, nstages=3, EbNodB=3, verbose=0)
   K = 20; K_st=2+1; K_en=16+1;
-  vq_fn = "../build_linux/vq_stage1.f32";
+  vq_fn = "../build_linux/vq_stage1_bs004.f32";
   vq_output_fn = "../build_linux/all_speech_8k_test.f32";
-  target_fn = "../build_linux/all_speech_8k_lim.f32";
   
   % load VQ
   vq = load_f32(vq_fn, K);
   [vq_size tmp] = size(vq);
-  vq = vq(:,K_st:K_en);
+  vqsub = vq(:,K_st:K_en);
   
   % load file of VQ-ed vectors to train up SD PDF estimator
   [sd_table h_table] = vq_hist(vq_output_fn, dec);
@@ -415,16 +414,35 @@
   target = load_f32(target_fn, K);
 
   % limit test to the first nframes vectors
-  target = target(1:dec:dec*nframes,K_st:K_en);
+  if nframes != -1
+    last = nframes;
+  else
+    last = length(target);
+  end
+  target = target(1:dec:last,K_st:K_en);
   
   % run a test
   EbNo=10^(EbNodB/10);
-  results = run_test(target, vq, sd_table, h_table, ntxcw, nstages, EbNo, verbose);
+  results = run_test(target, vqsub, sd_table, h_table, ntxcw, nstages, EbNo, verbose);
   if verbose
     for f=2:nframes-1
       printf("f: %03d tx_index: %04d rx_index: %04d\n", f,  results.tx_indexes(f), results.rx_indexes(f));
     end
-  end  
+  end
+
+  % return full band vq-ed vectors
+  target_ = zeros(last,K);
+  target_(1:dec:last,:) = vq(results.rx_indexes+1,:);
+  
+  % use linear interpolation to restore original frame rate
+  for f=1:dec:last-dec
+    prev = f; next = f + dec;
+    for g=prev+1:next-1
+      cnext = (g-prev)/dec; cprev = 1 - cnext;
+      target_(g,:) = cprev*target_(prev,:) + cnext*target_(next,:);
+      %printf("f: %d g: %d cprev: %f cnext: %f\n", f, g, cprev, cnext);
+    end
+  end
 endfunction
 
 % Plot histograms of SD at different decimations in time
@@ -511,11 +529,13 @@ function test_vq(vq_fn)
 endfunction
 
 % generate sets of curves
-function run_curves(frames=100, dec=1)
+function [EbNodB rms_sd] = run_curves(frames=100, dec=1, nstages=5)
   results_log = [];
-  EbNodB = [1 2 3 4];
+  EbNodB = [0 1 2 3 4 5];
+  target_fn = "../build_linux/all_speech_8k_lim.f32";
+
   for i=1:length(EbNodB)
-    results = test_trellis(frames, dec, ntxcw=8, nstages=3, EbNodB(i), verbose=0);
+    results = test_trellis(target_fn, frames, dec, ntxcw=8, nstages, EbNodB(i), verbose=0);
     results_log = [results_log results];
   end
   for i=1:length(results_log)
@@ -523,46 +543,51 @@ function run_curves(frames=100, dec=1)
     ber_vanilla(i) = results_log(i).ber_vanilla;
     per(i) = results_log(i).per;
     per_vanilla(i) = results_log(i).per_vanilla;
-    mse_noerrors(i) = sqrt(results_log(i).mse_noerrors);
-    mse(i) = sqrt(results_log(i).mse);
-    mse_vanilla(i) = sqrt(results_log(i).mse_vanilla);
+    rms_sd_noerrors(i) = sqrt(results_log(i).mse_noerrors);
+    rms_sd(i) = sqrt(results_log(i).mse);
+    rms_sd_vanilla(i) = sqrt(results_log(i).mse_vanilla);
   end
 
   figure(1); clf; semilogy(EbNodB, ber_vanilla, "r+-;uncoded;"); hold on;
   semilogy(EbNodB, ber, "g+-;trellis;"); hold off;
-  grid; title(sprintf("BER dec=%d nstages=%d",dec,nstages));
+  grid('minor'); title(sprintf("BER dec=%d nstages=%d",dec,nstages));
   print("-dpng", sprintf("trellis_dec_%d_ber.png",dec));
   
   figure(2); clf; semilogy(EbNodB, per_vanilla, "r+-;uncoded;"); hold on;
   semilogy(EbNodB, per, "g+-;trellis;");
-  grid; title(sprintf("PER dec=%d nstages=%d",dec,nstages));
+  grid('minor'); title(sprintf("PER dec=%d nstages=%d",dec,nstages));
   print("-dpng", sprintf("trellis_dec_%d_per.png",dec));
 
-  figure(3); clf; plot(EbNodB, mse_noerrors, "b+-;no errors;"); hold on;
-  plot(EbNodB, mse_vanilla, "r+-;uncoded;");
-  plot(EbNodB, mse, "g+-;trellis;"); hold off;
-  grid; title(sprintf("RMS SD dec=%d nstages=%d",dec,nstages));
+  figure(3); clf; plot(EbNodB, rms_sd_noerrors, "b+-;no errors;"); hold on;
+  plot(EbNodB, rms_sd_vanilla, "r+-;uncoded;");
+  plot(EbNodB, rms_sd, "g+-;trellis;"); hold off;
+  grid('minor'); title(sprintf("RMS SD dec=%d nstages=%d",dec,nstages));
   print("-dpng", sprintf("trellis_dec_%d_rms_sd.png",dec));
 endfunction
 
+function vq_file(vq_fn, dec, EbNodB, in_fn, out_fn)
+  [results target_] = test_trellis(in_fn, nframes=-1, dec, ntxcw=8, nstages=3, EbNodB, verbose=0);
+  save_f32(out_fn, target_);
+endfunction
+
 % -------------------------------------------------------------------
 
-graphics_toolkit ("gnuplot");
 more off;
 randn('state',1);
 
 % uncomment one of the below to run a test or simulation
 
 % These two tests show where we are at:
-%test_trellis(nframes=600, dec=1, ntxcw=8, nstages=3, EbNodB=3, verbose=0);
-%test_trellis(nframes=600, dec=4, ntxcw=8, nstages=3, EbNodB=3, verbose=0);
+%test_trellis(target_fn, nframes=600, dec=1, ntxcw=8, nstages=3, EbNodB=3, verbose=0);
+%test_trellis(target_fn, nframes=600, dec=4, ntxcw=8, nstages=3, EbNodB=3, verbose=0);
 
-run_curves(600,1)
-run_curves(600,2)
-run_curves(600,4)
+%run_curves(600,1)
+%run_curves(600,2)
+%run_curves(600,4)
+%[EbNodB rms_sd] = run_curves(30*100,3,3)
 
-%test_trellis(nframes=200, dec=1, ntxcw=1, nstages=3, EbNodB=3, verbose=0);
-%test_trellis(nframes=100, dec=2, ntxcw=8, nstages=3, EbNodB=3, verbose=0);
+%test_trellis(target_fn, nframes=200, dec=1, ntxcw=1, nstages=3, EbNodB=3, verbose=0);
+%test_trellis(target_fn, nframes=100, dec=2, ntxcw=8, nstages=3, EbNodB=3, verbose=0);
 %test_vq("../build_linux/vq_stage1.f32");
 %vq_hist_dec("../build_linux/all_speech_8k_test.f32");
 %test_single
diff --git a/octave/trellis_dec3_nstage3.txt b/octave/trellis_dec3_nstage3.txt
new file mode 100644
index 000000000..b4d8e98e5
--- /dev/null
+++ b/octave/trellis_dec3_nstage3.txt
@@ -0,0 +1,15 @@
+# Created by Octave 5.2.0, Sun Sep 12 12:26:33 2021 ACST <david@boomer>
+# name: EbNodB
+# type: matrix
+# rows: 1
+# columns: 6
+ 0 1 2 3 4 5
+
+
+# name: rms_sd
+# type: matrix
+# rows: 1
+# columns: 6
+ 7.7412523956816406 6.4841339000579836 5.8017966072531637 4.9470616511274006 4.3575512816148612 3.5924646693565796
+
+
diff --git a/octave/vq_700c_eq.m b/octave/vq_700c_eq.m
index 6da4b8447..3e57b9afa 100644
--- a/octave/vq_700c_eq.m
+++ b/octave/vq_700c_eq.m
@@ -164,14 +164,6 @@ function vq_700c_plots(fn_array)
   eq2 /= (ntargets*nvq);
 endfunction
 
-function save_f32(fn, m)
-  f=fopen(fn,"wb");
-  [r c] = size(m);
-  mlinear = reshape(m', 1, r*c);
-  fwrite(f, mlinear, 'float32');
-  fclose(f);
-endfunction
-
 function [targets e] = load_targets(fn_target_f32)
   nb_features = 41;
   K = 20;
diff --git a/octave/vq_binary_switch.m b/octave/vq_binary_switch.m
new file mode 100644
index 000000000..1a63de53e
--- /dev/null
+++ b/octave/vq_binary_switch.m
@@ -0,0 +1,210 @@
+% vq_binary_switch.m
+% David Rowe Sep 2021
+%
+% Experiments in making VQs robust to bit errors, this is an Octave
+% implementation of [1].
+%
+% [1] Psuedo Gray Coding, Zeger & Gersho 1990
+
+1;
+
+% returns indexes of hamming distance 1 neighbours
+function index_neighbours = distance_one_neighbours(N,k)
+  log2N = log2(N);
+  index_neighbours = [];
+  for b=0:log2N-1
+    index_neighbour = bitxor(k-1,2.^b) + 1;
+    index_neighbours = [index_neighbours index_neighbour];
+  end
+end
+
+% equation (33) of [1], for hamming distance 1
+function c = cost_of_distance_one(vq, prob, k, verbose=0)
+  [N K] = size(vq);
+  log2N = log2(N);
+  c = 0;
+  for b=0:log2N-1
+    index_neighbour = bitxor(k-1,2.^b) + 1;
+    diff = vq(k,:) - vq(index_neighbour, :);
+    dist = sum(diff*diff');
+    c += prob(k)*dist;
+    if verbose
+      printf("k: %d b: %d index_neighbour: %d dist: %f prob: %f c: %f \n", k, b, index_neighbour, dist, prob(k), c);
+    end
+  end
+endfunction
+
+% equation (39) of [1]
+function d = distortion_of_current_mapping(vq, prob, verbose=0)
+  [N K] = size(vq);
+  
+  d = 0;
+  for k=1:N
+    c = cost_of_distance_one(vq, prob, k);
+    d += c;
+    if verbose
+      printf("k: %2d c: %f d: %f\n", k, c, d);
+    end
+  end
+endfunction
+
+function [vq distortion] = binary_switching(vq, prob, max_iteration, fast_en=1)
+  [N K] = size(vq);
+  iteration = 0;
+  i = 1;
+  finished = 0;
+  switches = 0;
+  distortion0 = distortion_of_current_mapping(vq, prob)
+  
+  while !finished
+
+    % generate a list A(i) of which vectors have the largest cost of bit errors
+    c = zeros(1,N);
+    for k=1:N
+      c(k) = cost_of_distance_one(vq, prob, k);
+    end
+    [tmp A] = sort(c,"descend");
+    
+    % Try switching each vector with A(i)
+    best_delta = 0;
+    for j=2:N
+      % we can't switch with ourself
+      if j != A(i)
+        if fast_en
+          delta = -cost_of_distance_one(vq, prob, A(i)) - cost_of_distance_one(vq, prob, j);
+	  n1 = [distance_one_neighbours(N,A(i)) distance_one_neighbours(N,j)];
+	  n1(n1 == A(i)) = [];
+	  n1(n1 == j) = [];
+	  for l=1:length(n1)
+	    delta -= cost_of_distance_one(vq, prob, n1(l));
+	  end
+        else
+	  distortion1 = distortion_of_current_mapping(vq, prob);
+	end
+	
+        % switch vq entries A(i) and j
+	tmp = vq(A(i),:);
+	vq(A(i),:) = vq(j,:);
+	vq(j,:) = tmp;
+		
+        if fast_en
+	  delta += cost_of_distance_one(vq, prob, A(i)) + cost_of_distance_one(vq, prob, j);
+	  for l=1:length(n1)
+	    delta += cost_of_distance_one(vq, prob, n1(l));
+	  end
+	else
+	  distortion2 = distortion_of_current_mapping(vq, prob);
+          delta = distortion2 - distortion1;
+	end
+	
+	if delta < 0
+	  if abs(delta) > best_delta
+	    best_delta = abs(delta);
+	    best_j = j;
+	  end
+	end
+	
+	% unswitch
+	tmp = vq(A(i),:);
+	vq(A(i),:) = vq(j,:);
+	vq(j,:) = tmp;
+      end
+    end % next j
+
+    % printf("best_delta: %f best_j: %d\n", best_delta, best_j);
+    if best_delta == 0
+      % Hmm, no improvement, lets try the next vector in the sorted cost list
+      if i == N
+        finished = 1;
+      else
+        i++;
+      end
+    else
+      % OK keep the switch that minimised the distortion
+ 
+      tmp = vq(A(i),:);
+      vq(A(i),:) = vq(best_j,:);
+      vq(best_j,:) = tmp;
+      switches++;
+
+      % set up for next iteration
+      iteration++;
+      distortion = distortion_of_current_mapping(vq, prob);
+      printf("it: %3d dist: %f %3.2f i: %3d sw: %3d\n", iteration, distortion,
+             distortion/distortion0, i, switches);
+      if iteration >= max_iteration, finished = 1, end
+      i = 1;
+     end
+
+  end
+
+endfunction
+
+% return indexes of hamming distance one vectors
+function ind = neighbour_indexes(vq, k)
+  [N K] = size(vq);
+  log2N = log2(N);
+  ind = [];
+  for b=0:log2N-1
+    index_neighbour = bitxor(k-1,2.^b) + 1;
+    ind = [ind index_neighbour];
+  end
+endfunction
+
+function test_binary_switch
+  vq1 = [1 1; -1 1; -1 -1; 1 -1];
+  %f=fopen("vq1.f32","wb"); fwrite(f, vq1, 'float32'); fclose(f);
+  [vq2 distortion] = binary_switching(vq1, ones(1,4), 10);
+  % algorithm should put hamming distance 1 neighbours in adjacent quadrants
+  distance_to_closest_neighbours = 2;
+  % there are two hamming distance 1 neighbours
+  target_distortion = 2^2*distance_to_closest_neighbours*length(vq1);
+  assert(target_distortion == distortion);
+  printf("test_binary_switch OK!\n");
+endfunction
+
+function test_fast
+  N=16;     % Number of VQ codebook vectors
+  K=2;      % Vector length
+  Ntrain=10000;
+
+  training_data = randn(Ntrain,K);
+  [idx vq1] = kmeans(training_data, N);
+  f=fopen("vq1.f32","wb");
+  for r=1:rows(vq1)
+    fwrite(f,vq1(r,:),"float32");
+  end
+  fclose(f);
+  [vq2 distortion] = binary_switching(vq1, [1 ones(1,N-1)], 1000, fast_en = 0);
+  [vq3 distortion] = binary_switching(vq1, [1 ones(1,N-1)], 1000, fast_en = 1);
+  assert(vq2 == vq3);  
+  printf("test_fast OK!\n");
+endfunction
+
+function demo
+  N=16;     % Number of VQ codebook vectors
+  K=2;      % Vector length
+  Ntrain=10000;
+  training_data = randn(Ntrain,K);
+  [idx vq1] = kmeans(training_data, N);
+  [vq2 distortion] = binary_switching(vq1, [1 ones(1,N-1)], 1000, 1);
+
+  figure(1); clf; plot(training_data(:,1), training_data(:,2),'+');
+  hold on;
+  plot(vq1(:,1), vq1(:,2),'og','linewidth', 2);
+  plot(vq2(:,1), vq2(:,2),'or','linewidth', 2);
+
+  % plot hamming distance 1 neighbours
+  k = 1;
+  ind = neighbour_indexes(vq2, k);
+  for i=1:length(ind)
+    plot([vq2(k,1) vq2(ind(i),1)],[vq2(k,2) vq2(ind(i),2)],'r-','linewidth', 2);
+  end
+  hold off;
+endfunction
+
+pkg load statistics
+%test_binary_switch;
+test_fast;
+%demo
+
diff --git a/octave/vq_compare.m b/octave/vq_compare.m
new file mode 100644
index 000000000..5527b3c2d
--- /dev/null
+++ b/octave/vq_compare.m
@@ -0,0 +1,349 @@
+% vq_compare.m
+% David Rowe Sep 2021
+%
+% Compare the Eb/No performance of Vector Quantisers (robustness to bit errors) using
+% Spectral Distortion (SD) measure.
+
+#{
+  usage:
+
+  1. Generate the initial VQ (vq_stage1.f32) and input test vector file (all_speech_8k_lim.f32):
+ 
+    cd codec2/build_linux
+    ../script/train_trellis.sh
+
+  2. Run the Psuedo-Gray binary switch tool to optimise the VQ against single bit errors:
+
+    ./misc/vq_binary_switch -d 20 vq_stage1.f32 vq_stage1_bs001.f32 -m 5000 --st 2 --en 16 -f
+
+    This can take a while, but if you ctrl-C at any time it will have saved the most recent optimised VQ.
+   
+  3. Run this script to compare the two VQs:
+
+    octave:34> vq_compare
+#}
+
+
+function vq_compare(action="run_curves", vq_fn, dec=1, EbNodB=3, in_fn, out_fn)
+  more off;
+  randn('state',1);
+  graphics_toolkit("gnuplot");
+  
+  if strcmp(action, "run_curves")
+    run_curves(30*100);
+  end
+  if strcmp(action, "vq_file")
+    vq_file(vq_fn, dec, EbNodB, in_fn, out_fn)
+  end
+endfunction
+
+
+% -------------------------------------------------------------------
+
+% converts a decimal value to a soft dec binary value
+function c = dec2sd(dec, nbits)
+    
+    % convert to binary
+
+    c = zeros(1,nbits);
+    for j=0:nbits-1
+      mask = 2.^j;
+      if bitand(dec,mask)
+        c(nbits-j) = 1;
+      end
+    end
+
+    % map to +/- 1
+
+    c = -1 + 2*c;
+endfunction
+
+% fast version of vector quantiser
+function [indexes target_] = vector_quantiser_fast(vq, target, verbose=1)
+  [vq_size K] = size(vq);
+  [ntarget tmp] = size(target);
+  target_ = zeros(ntarget,K);
+  indexes = zeros(1,ntarget);
+
+  % pre-compute energy of each VQ vector
+  vqsq = zeros(vq_size,1);
+  for i=1:vq_size
+    vqsq(i) = vq(i,:)*vq(i,:)';
+  end
+
+  % use efficient matrix multiplies to search for best match to target
+  for i=1:ntarget
+    best_e = 1E32;
+    e = vqsq - 2*(vq * target(i,:)');
+    [best_e best_ind] = min(e);
+    if verbose printf("best_e: %f best_ind: %d\n", best_e, best_ind), end;
+    target_(i,:) = vq(best_ind,:); indexes(i) = best_ind;
+  end
+endfunction
+
+
+% VQ a target sequence of frames then run a test using vanilla uncoded/trellis decoder
+function results = run_test(target, vq, EbNo, verbose)
+  [frames tmp]      = size(target);
+  [vq_length tmp]   = size(vq);
+  nbits             = log2(vq_length);
+  nerrors           = 0;
+  tbits             = 0;
+  nframes           = 0;
+  nper              = 0;
+  
+  % Vector Quantise target vectors sequence
+  [tx_indexes target_ ] = vector_quantiser_fast(vq, target, verbose);
+  % use convention of indexes starting from 0
+  tx_indexes -= 1; 
+  %  mean SD of VQ with no errors
+  diff = target - target_;
+  mse_noerrors = mean(diff(:).^2);
+  
+  % construct tx symbol codewords from VQ indexes
+  tx_codewords = zeros(frames, nbits);
+  for f=1:frames
+    tx_codewords(f,:) = dec2sd(tx_indexes(f), nbits);
+  end
+
+  rx_codewords = tx_codewords + randn(frames, nbits)*sqrt(1/(2*EbNo));
+  rx_indexes = zeros(1,frames);
+
+  for f=1:frames
+    tx_bits        = tx_codewords(f,:) > 0;
+    rx_bits        = rx_codewords(f,:) > 0;
+    rx_indexes(f)  = sum(rx_bits .* 2.^(nbits-1:-1:0));
+    errors         = sum(xor(tx_bits, rx_bits));
+    nerrors       += errors;
+    if errors nper++;, end
+    tbits += nbits;
+    nframes++;
+  end
+
+  EbNodB = 10*log10(EbNo);
+  target_ = vq(rx_indexes+1,:);
+  diff = target - target_;
+  mse = mean(diff(:).^2);
+  printf("Eb/No: %3.2f dB nframes: %3d nerrors: %4d BER: %4.3f PER: %3.2f mse: %3.2f %3.2f\n", 
+         EbNodB, nframes, nerrors, nerrors/tbits, nper/nframes, mse_noerrors, mse);
+  results.ber = nerrors/tbits;	 
+  results.per = nper/nframes;	 
+  results.mse_noerrors = mse_noerrors;
+  results.mse = mse;
+  results.tx_indexes = tx_indexes;
+  results.rx_indexes = rx_indexes;
+endfunction
+
+% VQ a target sequence of frames then run a test using a LDPC code
+function results = run_test_ldpc(target, vq, EbNo, verbose)
+  [frames tmp]      = size(target);
+  [vq_length tmp]   = size(vq);
+  nbits             = log2(vq_length);
+  nerrors           = 0;
+  tbits             = 0;
+  nframes           = 0;
+  nper              = 0;
+  
+  % init LDPC code
+  mod_order = 4; bps = 2;
+  modulation = 'QPSK';
+  mapping = 'gray';
+  max_iterations = 100; demod_type = 0; decoder_type = 0;
+  ldpc; init_cml('~/cml/');
+  tempStruct = load("HRA_56_56.txt");
+  b = fieldnames(tempStruct);
+  ldpcArrayName = b{1,1};
+  % extract the array from the struct
+  HRA = tempStruct.(ldpcArrayName);
+  [code_param framesize rate] = ldpc_init_user(HRA, modulation, mod_order, mapping);
+
+  % set up noise
+  EbNodB = 10*log10(EbNo);
+  EsNodB = EbNodB + 10*log10(rate) + 10*log10(bps);
+  EsNo = 10^(EsNodB/10);
+  variance = 1/EsNo;
+
+  % Vector Quantise target vectors sequence
+  [tx_indexes target_ ] = vector_quantiser_fast(vq, target, verbose);
+  % use convention of indexes starting from 0
+  tx_indexes -= 1; 
+  %  mean SD of VQ with no errors
+  diff = target - target_;
+  mse_noerrors = mean(diff(:).^2);
+
+  % construct tx frames x nbit matrix using VQ indexes
+  tx_bits = zeros(frames, nbits);
+  for f=1:frames
+    tx_bits(f,:) = dec2sd(tx_indexes(f), nbits) > 0;
+  end
+
+  % find a superframe size, that has an integer number of nbits and data_bits_per_frame frames
+  bits_per_superframe = nbits;
+  while mod(bits_per_superframe,nbits) || mod(bits_per_superframe,code_param.data_bits_per_frame)
+    bits_per_superframe += nbits;
+  end
+
+  Nsuperframes = floor(frames*nbits/bits_per_superframe);
+  Nldpc_codewords = Nsuperframes*bits_per_superframe/code_param.data_bits_per_frame;
+  frames = Nsuperframes*bits_per_superframe/nbits;
+  %printf("bits_per_superframe: %d Nldpc_codewords: %d frames: %d\n", bits_per_superframe, Nldpc_codewords, frames);
+  
+  % reshape tx_bits matrix into Nldpc_codewords x data_bits_per_frame
+  tx_bits = tx_bits(1:frames,:);
+  tx_bits_ldpc = reshape(tx_bits',code_param.data_bits_per_frame, Nldpc_codewords)';
+
+  % modulate tx symbols
+  tx_symbols = [];
+  for nn=1:Nldpc_codewords
+    [tx_codeword atx_symbols] = ldpc_enc(tx_bits_ldpc(nn,:), code_param);
+    tx_symbols = [tx_symbols atx_symbols];
+  end
+  
+  noise = sqrt(variance*0.5)*(randn(1,length(tx_symbols)) + j*randn(1,length(tx_symbols)));
+  rx_symbols = tx_symbols+noise;
+  
+  % LDPC decode  
+  for nn = 1:Nldpc_codewords
+    st = (nn-1)*code_param.coded_syms_per_frame + 1;
+    en = (nn)*code_param.coded_syms_per_frame;
+
+    arx_codeword = ldpc_dec(code_param, max_iterations, demod_type, decoder_type, rx_symbols(st:en), EsNo, ones(1,code_param.coded_syms_per_frame));
+    rx_bits_ldpc(nn,:) = arx_codeword(1:code_param.data_bits_per_frame);
+  end
+  
+  % reshape rx_bits_ldpc matrix into frames x nbits
+  rx_bits = reshape(rx_bits_ldpc',nbits,frames)';
+
+  rx_indexes = tx_indexes;
+  for f=1:frames
+    rx_indexes(f)  = sum(rx_bits(f,:) .* 2.^(nbits-1:-1:0));
+    errors         = sum(xor(tx_bits(f,:), rx_bits(f,:)));
+    nerrors       += errors;
+    if errors nper++;, end
+    tbits += nbits;
+    nframes++;
+  end
+
+  EbNodB = 10*log10(EbNo);
+  target_ = vq(rx_indexes+1,:);
+  diff = target - target_;
+  mse = mean(diff(:).^2);
+  printf("Eb/No: %3.2f dB nframes: %4d nerrors: %4d BER: %4.3f PER: %3.2f mse: %3.2f %3.2f\n", 
+         EbNodB, nframes, nerrors, nerrors/tbits, nper/nframes, mse_noerrors, mse);
+  results.ber = nerrors/tbits;	 
+  results.per = nper/nframes;	 
+  results.mse = mse;
+  results.tx_indexes = tx_indexes;
+  results.rx_indexes = rx_indexes;
+endfunction
+
+% Simulations ---------------------------------------------------------------------
+
+% top level function to set up and run a test with a specific vq
+function [results target_] = run_test_vq(vq_fn, target_fn, nframes=100, dec=1, EbNodB=3, ldpc_en=0, verbose=0)
+  K = 20; K_st=2+1; K_en=16+1;
+  
+  % load VQ
+  vq = load_f32(vq_fn, K);
+  [vq_size tmp] = size(vq);
+  vqsub = vq(:,K_st:K_en);
+  
+  % load sequence of target vectors we wish to VQ
+  target = load_f32(target_fn, K);
+
+  % limit test to the first nframes vectors
+  if nframes != -1
+    last = nframes;
+  else
+    last = length(target);
+  end
+  target = target(1:dec:last, K_st:K_en);
+  
+  % run a test
+  EbNo=10^(EbNodB/10);
+  if ldpc_en
+    results = run_test_ldpc(target, vqsub, EbNo, verbose);
+  else
+    results = run_test(target, vqsub, EbNo, verbose);
+  end
+  if verbose
+    for f=2:nframes-1
+      printf("f: %03d tx_index: %04d rx_index: %04d\n", f,  results.tx_indexes(f), results.rx_indexes(f));
+    end
+  end
+
+  % return full band vq-ed vectors
+  target_ = zeros(last,K);
+  target_(1:dec:last,:) = vq(results.rx_indexes+1,:);
+  
+  % use linear interpolation to restore original frame rate
+  for f=1:dec:last-dec
+    prev = f; next = f + dec;
+    for g=prev+1:next-1
+      cnext = (g-prev)/dec; cprev = 1 - cnext;
+      target_(g,:) = cprev*target_(prev,:) + cnext*target_(next,:);
+      %printf("f: %d g: %d cprev: %f cnext: %f\n", f, g, cprev, cnext);
+    end
+  end
+endfunction
+
+% generate sets of curves
+function run_curves(frames=100, dec=1)
+  target_fn = "../build_linux/all_speech_8k_lim.f32";
+  EbNodB = 0:5;
+
+  results1_ldpc_log = [];
+  for i=1:length(EbNodB)
+    results = run_test_vq("../build_linux/vq_stage1.f32", target_fn, frames, dec, EbNodB(i), ldpc_en=1, verbose=0);
+    results1_ldpc_log = [results1_ldpc_log results];
+  end
+  results4_ldpc_log = [];
+  for i=1:length(EbNodB)
+    results = run_test_vq("../build_linux/vq_stage1_bs004.f32", target_fn, frames, dec, EbNodB(i), ldpc_en=1, verbose=0);
+    results4_ldpc_log = [results4_ldpc_log results];
+  end
+
+  results1_log = [];
+  for i=1:length(EbNodB)
+    results = run_test_vq("../build_linux/vq_stage1.f32", target_fn, frames, dec, EbNodB(i), ldpc_en=0, verbose=0);
+    results1_log = [results1_log results];
+  end
+  results4_log = [];
+  for i=1:length(EbNodB)
+    results = run_test_vq("../build_linux/vq_stage1_bs004.f32", target_fn, frames, dec, EbNodB(i), ldpc_en=0, verbose=0);
+    results4_log = [results4_log results];
+  end
+  for i=1:length(results1_log)
+    ber(i) = results1_log(i).ber;
+    per(i) = results1_log(i).per;
+    mse_noerrors(i) = sqrt(results1_log(i).mse_noerrors);
+    mse_vq1(i) = sqrt(results1_log(i).mse);
+    mse_vq4(i) = sqrt(results4_log(i).mse);
+    mse_vq1_ldpc(i) = sqrt(results1_ldpc_log(i).mse);
+    mse_vq4_ldpc(i) = sqrt(results4_ldpc_log(i).mse);
+  end
+
+  figure(1); clf;
+  semilogy(EbNodB, ber, 'g+-;ber;','linewidth', 2); hold on;
+  semilogy(EbNodB, per, 'b+-;per;','linewidth', 2);
+  grid('minor'); xlabel('Eb/No(dB)');
+  hold off;
+  
+  figure(2); clf;
+  plot(EbNodB, mse_noerrors, "b+-;no errors;"); hold on;
+  plot(EbNodB, mse_vq1, "g+-;vanilla AWGN;");
+  plot(EbNodB, mse_vq4, "b+-;binary switch;");
+  plot(EbNodB, mse_vq1_ldpc, "r+-;ldpc (112,56);");
+  plot(EbNodB, mse_vq4_ldpc, "k+-;binary switch ldpc (112,56);");
+  load trellis_dec3_nstage3.txt
+  plot(EbNodB, rms_sd, "c+-;binary switch trellis dec3;");  
+  hold off; grid; title("RMS SD (dB)"); xlabel('Eb/No(dB)');
+endfunction
+
+
+function vq_file(vq_fn, dec, EbNodB, in_fn, out_fn)
+  [results target_] = run_test_vq(vq_fn, in_fn, nframes=-1, dec, EbNodB, verbose=0);
+  save_f32(out_fn, target_);
+endfunction
+
+
diff --git a/script/subsetvq.sh b/script/subsetvq.sh
new file mode 100755
index 000000000..562a4e821
--- /dev/null
+++ b/script/subsetvq.sh
@@ -0,0 +1,148 @@
+#!/bin/bash
+# subsetvq.sh
+# David Rowe August 2021
+#
+# Script to support:
+# 1. Subset VQ training and listening
+# 1. Training Trellis Vector Quantiser for Codec 2 newamp1, supports octave/trellis.m
+# 2. VQ sorting/optimisation experiments, octave/vq_compare.m
+
+TRAIN=~/Downloads/all_speech_8k.sw
+CODEC2_PATH=$HOME/codec2
+PATH=$PATH:$CODEC2_PATH/build_linux/src:$CODEC2_PATH/build_linux/misc
+K=20
+Kst=2
+Ken=16
+
+# train a new VQ and generate quantised training material
+function train() {
+  fullfile=$TRAIN
+  filename=$(basename -- "$fullfile")
+  extension="${filename##*.}"
+  filename="${filename%.*}"
+  
+  c2sim $fullfile --rateK --rateKout ${filename}.f32
+  echo "ratek=load_f32('../build_linux/${filename}.f32',20); vq_700c_eq; ratek_lim=limit_vec(ratek, 0, 40); save_f32('../build_linux/${filename}_lim.f32', ratek_lim); quit" | \
+  octave -p ${CODEC2_PATH}/octave -qf
+  vqtrain ${filename}_lim.f32 $K 4096 vq_stage1.f32 -s 1e-3 --st $Kst --en $Ken
+
+  # VQ the training file
+  cat ${filename}_lim.f32 | vq_mbest --st $Kst --en $Ken -k $K -q vq_stage1.f32 > ${filename}_test.f32
+}
+
+function listen_vq() {
+  vq_fn=$1
+  dec=$2
+  EbNodB=$3
+  fullfile=$4
+  filename=$(basename -- "$fullfile")
+  extension="${filename##*.}"
+  filename="${filename%.*}"
+  
+  fullfile_out=$5
+  do_trellis=$6
+  sox_options='-t raw -e signed-integer -b 16' 
+  sox $fullfile $sox_options - | c2sim - --rateK --rateKout ${filename}.f32
+  
+  echo "ratek=load_f32('../build_linux/${filename}.f32',20); vq_700c_eq; ratek_lim=limit_vec(ratek, 0, 40); save_f32('../build_linux/${filename}_lim.f32', ratek_lim); quit" | \
+  octave -p ${CODEC2_PATH}/octave -qf
+
+  if [ "$do_trellis" -eq 0 ]; then
+     echo "pkg load statistics; vq_compare(action='vq_file', '${vq_fn}', ${dec}, ${EbNodB}, '${filename}_lim.f32', '${filename}_test.f32'); quit" \ |
+     octave -p ${CODEC2_PATH}/octave -qf
+  else
+     echo "pkg load statistics; trellis; vq_file('${vq_fn}', ${dec}, ${EbNodB}, '${filename}_lim.f32', '${filename}_test.f32'); quit" \ |
+     octave -p ${CODEC2_PATH}/octave -qf
+  fi
+  
+  if [ "$fullfile_out" = "aplay" ]; then
+     sox $fullfile $sox_options - | c2sim - --rateK --rateKin ${filename}_test.f32 -o - | aplay -f S16_LE
+  else
+     sox $fullfile $sox_options - | c2sim - --rateK --rateKin ${filename}_test.f32 -o - | sox -t .s16 -r 8000 -c 1 - ${fullfile_out}
+  fi
+     
+}
+
+function print_help {
+    echo
+    echo "Trellis/VQ optimisation support script"
+    echo
+    echo "  usage ./train_trellis.sh [-x] [-t] [-v vq.f32 in.wav out.wav] [-e EbNodB] [-d dec]"
+    echo
+    echo "    -x                         debug mode; trace script execution"
+    echo "    -t                         train VQ and generate a fully quantised version of training vectors"
+    echo "    -v  vq.f32 in.wav out.wav  synthesise an output file out.wav from in.raw, using the VQ vq.f32"
+    echo "    -v  vq.f32 in.wav aplay    synthesise output, play immediately using aplay, using the VQ vq.f32"
+    echo "    -e  EbNodB                 Eb/No in dB for AWGn channel simulation (error insertion)"
+    echo "    -d  dec                    decimation/interpolation rate"
+    echo "    -r                         use trellis decoder"
+    echo
+    exit
+}
+
+# command line arguments to select function
+
+if [ $# -lt 1 ]; then
+    print_help
+fi
+
+do_train=0
+do_vq=0
+do_trellis=0
+EbNodB=100
+dec=1
+POSITIONAL=()
+while [[ $# -gt 0 ]]
+do
+key="$1"
+case $key in
+    -x)
+        set -x	
+        shift
+    ;;
+    -t)
+        do_train=1
+	shift
+    ;;
+    -v)
+        do_vq=1
+	vq_fn="$2"
+	in_wav="$3"
+	out_wav="$4"
+	shift
+	shift
+	shift
+	shift
+    ;;
+    -r)
+        do_trellis=1
+	shift
+    ;;
+    -d)
+	dec="$2"
+	shift
+	shift	
+	;;
+    -e)
+	EbNodB="$2"
+	shift
+	shift	
+	;;
+    -h)
+        print_help	
+    ;;
+    *)
+    POSITIONAL+=("$1") # save it in an array for later
+    shift
+    ;;
+esac
+done
+set -- "${POSITIONAL[@]}" # restore positional parameters
+
+if [ $do_train -eq 1 ]; then
+    train
+fi
+
+if [ $do_vq -eq 1 ]; then
+  listen_vq ${vq_fn} ${dec} ${EbNodB} ${in_wav} ${out_wav} ${do_trellis}
+fi
diff --git a/script/train_trellis.sh b/script/train_trellis.sh
deleted file mode 100755
index e2bd77740..000000000
--- a/script/train_trellis.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash -x
-# train_trellis.sh
-# David Rowe August 2021
-#
-# Training Trellis Vector Quantiser for Codec 2 newamp1, supports octave/trellis.m
-
-TRAIN=~/Downloads/all_speech_8k.sw
-CODEC2_PATH=$HOME/codec2
-PATH=$PATH:$CODEC2_PATH/build_linux/src:$CODEC2_PATH/build_linux/misc
-K=20
-Kst=2
-Ken=16
-
-# train a new VQ and generate quantised training material
-function train() {
-  fullfile=$TRAIN
-  filename=$(basename -- "$fullfile")
-  extension="${filename##*.}"
-  filename="${filename%.*}"
-  
-  c2sim $fullfile --rateK --rateKout ${filename}.f32
-  echo "ratek=load_f32('../build_linux/${filename}.f32',20); vq_700c_eq; ratek_lim=limit_vec(ratek, 0, 40); save_f32('../build_linux/${filename}_lim.f32', ratek_lim); quit" | \
-  octave -p ${CODEC2_PATH}/octave -qf
-  #vqtrain ${filename}_lim.f32 $K 4096 vq_stage1.f32 -s 1e-3 --st $Kst --en $Ken
-  cat ${filename}_lim.f32 | vq_mbest --st $Kst --en $Ken -k $K -q vq_stage1.f32 > ${filename}_test.f32
-}
-
-# choose which function to run here
-train