singaxiong · xuchenglin28 · Jan 9, 2017 · Jan 16, 2017 · Feb 8, 2017 · Feb 8, 2017
diff --git a/graph/B_ConcatRealImag.m b/graph/B_ConcatRealImag.m
@@ -0,0 +1,13 @@
+function grad = B_ConcatRealImag(prev_layer, curr_layer, future_layers)
+
+future_grad = GetFutureGrad(future_layers, curr_layer);
+
+[D, T, N] = size(future_grad);
+j = sqrt(-1);
+
+realpart = future_grad(1:D/2,:,:);
+imagpart = future_grad(D/2+1:end,:,:);
+
+grad = realpart + j*imagpart;
+
+end
diff --git a/graph/DNN_Cost10.m b/graph/DNN_Cost10.m
@@ -140,14 +140,20 @@
             [layer{i}.a, layer{i}.validFrameMask] = F_comp_gcc(prev_layers{1}, layer{i});
         case 'stft'
             [layer{i}.a, layer{i}.validFrameMask] = F_stft(prev_layers{1}, layer{i});
-
+        case 'spatialcov'
+            [layer{i}.a, layer{i}.validFrameMask] = F_SpatialCov(prev_layers{1}, layer{i});       % do not support variable length yet
         case 'spatialcovmask'
-            layer{i}.a = F_SpatialCovMask(prev_layers, layer{i});       % do not support variable length yet
+            [layer{i}.a, layer{i}.validFrameMask] = F_SpatialCovMask(prev_layers, layer{i});       % do not support variable length yet
         case 'spatialcovsplitmask'
             layer{i}.a = F_SpatialCovSplitMask(prev_layers, layer{i});       % do not support variable length yet
         case 'mvdr_spatialcov'
             layer{i} = F_MVDR_spatialCov(prev_layers{1}, layer{i});       % do not support variable length yet
-
+        case 'extspatialcovfeat'
+            layer{i}.a = F_ExtSpatialCovFeat(prev_layers{1}, layer{i});   % extract up triangle real and imag parts, diagonal part from spatial cov
+        case 'spatialnorm'
+            layer{i}.a = F_SpatialNorm(prev_layers{1}, layer{i});
+        case 'concatrealimag'
+            layer{i}.a = F_ConcatRealImag(prev_layers{1}); 
         case 'cov'
             layer{i}.a = F_cov(prev_layers{1}.a);       % do not support variable length yet
         case 'logdet'
@@ -247,7 +253,7 @@
             if isfield(layer{i}, 'mask')        % the mask defines what values can be tuned and what cannot be tuned. 
                 tmp = tmp .* layer{i}.mask;
             end
-			cost_func.cost = cost_func.cost + 0.5* L2weight * sum(sum(tmp.*tmp));
+			cost_func.cost = cost_func.cost + 0.5* L2weight * sum(sum(real(tmp.*conj(tmp))));
 		end
     end
 end
@@ -453,6 +459,8 @@
             layer{i}.grad = B_inner_product_normalized(prev_layers, future_layers);
         case 'concatenate'
             layer{i}.grad = B_concatenate(prev_layers, layer{i}, future_layers);
+        case 'concatrealimag'
+            layer{i}.grad = B_ConcatRealImag(prev_layers, layer{i}, future_layers);
 
         otherwise
             fprintf('Error: unknown output node type %s!\n', layer{i}.name);

diff --git a/graph/DNN_update.m b/graph/DNN_update.m
@@ -30,7 +30,7 @@
         end
     end
 
-    if para.NET.gradientClipThreshold > 0
+    if para.NET.gradientClipThreshold > 0 && isreal(grad_W)
         grad_W = max(-para.NET.gradientClipThreshold, grad_W);
         grad_W = min(para.NET.gradientClipThreshold, grad_W);
     end
@@ -65,7 +65,7 @@
         layer{Lidx(1)}.W = layer{Lidx(1)}.W - update{i}.W;
     end
 
-    if para.NET.weight_clip
+    if para.NET.weight_clip && isreal(layer{Lidx(1)}.W)
         % sometimes the weight will explode, so we need to add a limit to the value of the weights, e.g. +-10
         layer{Lidx(1)}.W = max(-para.NET.weight_clip,layer{Lidx(1)}.W);
         layer{Lidx(1)}.W = min(para.NET.weight_clip,layer{Lidx(1)}.W);

diff --git a/graph/F_ConcatRealImag.m b/graph/F_ConcatRealImag.m
@@ -0,0 +1,7 @@
+function output = F_ConcatRealImag(prev_layer)
+
+covMat = prev_layer.a;
+
+output = [real(covMat); imag(covMat)];
+
+end
diff --git a/graph/F_ExtSpatialCovFeat.m b/graph/F_ExtSpatialCovFeat.m
@@ -0,0 +1,160 @@
+function feat = F_ExtSpatialCovFeat(prev_layer, curr_layer)
+
+covMat = prev_layer.a;
+prev_mask = prev_layer.validFrameMask;
+nCh = curr_layer.nCh;
+nBin = curr_layer.nBin;
+[~, nf, N] = size(covMat);
+
+if isfield(curr_layer, 'scm_select')
+    scm_select = curr_layer.scm_select;
+else
+    scm_select = 'uptriangle';
+end
+if isfield(curr_layer, 'scm_select_diag')
+    scm_select_diag = curr_layer.scm_select_diag;
+else
+    scm_select_diag = 1;
+end
+if isfield(curr_layer, 'scm_select_bin')
+    scm_select_bin = curr_layer.scm_select_bin;
+    scm_bin_shift = curr_layer.scm_bin_shift;
+else
+    scm_select_bin = 0;
+end
+
+if N == 1
+
+    % normalize the cov matrix by their diagonal elements, remove the effect of
+    % spectral power and only retains the phase information
+    dimSelectMask1 = bsxfun(@times, eye(nCh, nCh), ones(nCh, nCh, nBin));
+    dimSelectIdx1 = find(reshape(dimSelectMask1, numel(dimSelectMask1),1) == 1); % diag elements index
+    diag_mean = squeeze(mean(reshape(covMat(dimSelectIdx1,:), nCh, nBin, nf), 1));
+    if nf ==1
+        diag_mean = diag_mean.';
+    end
+    normCovMat = bsxfun(@times, permute(reshape(covMat, nCh, nCh, nBin, nf), [3 4 1 2]), 1./diag_mean);
+    normCovMat = reshape(permute(normCovMat, [3 4 1 2]), nCh^2*nBin, nf);
+
+    % get the upper triangle off-diagonal elements which are complex-valued
+    if strcmpi(scm_select, 'uptriangle')
+        selectMat = triu(ones(nCh, nCh),1); % 1. up-trialgle
+    elseif strcmpi(scm_select, 'row')
+        selectMat = zeros(nCh, nCh); selectMat(1,2:end) = ones(1, nCh-1); % 2. first row
+    else
+        fprintf('Error: unknown scm feature select type: %s', lower(scm_select))
+    end
+
+    dimSelectMask2 = bsxfun(@times, selectMat, ones(nCh, nCh, nBin));
+    dimSelectIdx2 = find(reshape(dimSelectMask2, numel(dimSelectMask2),1) == 1);
+    real_part = real(normCovMat(dimSelectIdx2,:));
+    % imag_part = imag(normCovMat(dimSelectIdx2,:));
+    % for freq bin 1 and 257, no imag part
+    dimSelectMask3 = bsxfun(@times, selectMat, cat(3,zeros(nCh, nCh, 1), ones(nCh, nCh, nBin-2), zeros(nCh, nCh, 1)));
+    dimSelectIdx3 = find(reshape(dimSelectMask3, numel(dimSelectMask3),1) == 1);
+    imag_part = imag(normCovMat(dimSelectIdx3,:));
+
+    % get the diagonal elements which are real values
+    % diag_part = covMat(dimSelectIdx1,:);
+    % diag_part = log(max(eps,abs(diag_part)));
+    if scm_select_diag
+        diag_part = real(normCovMat(dimSelectIdx1,:));
+    end
+else
+    % select 1 bin by average every scm_bin_shift bins
+    if scm_select_bin
+        covMat1 = reshape(covMat, nCh^2, nBin, nf, N);
+        covMat2 = reshape(permute(covMat1, [1 3 4 2]), nCh^2*nf*N, nBin);
+        covMat3 = conv2(covMat2, ones(1,scm_bin_shift, 'like', covMat2(1))/scm_bin_shift, 'valid');
+        covMat4 = covMat3(:, 1:scm_bin_shift:end);
+        nBin = size(covMat4, 2);
+        covMat = reshape(permute(reshape(covMat4, nCh^2, nf, N, nBin), [1 4 2 3]), nCh^2*nBin, nf, N);
+
+    end
+
+    % normalize the cov matrix by their diagonal elements, remove the effect of
+    % spectral power and only retains the phase information
+    dimSelectMask1 = bsxfun(@times, eye(nCh, nCh), ones(nCh, nCh, nBin));
+    dimSelectIdx1 = find(reshape(dimSelectMask1, numel(dimSelectMask1),1) == 1); % diag elements index
+    diag_mean = squeeze(mean(reshape(covMat(dimSelectIdx1,:,:), nCh, nBin, nf, N), 1));
+    if nf ==1
+        diag_mean = reshape(diag_mean, size(diag_mean,1), 1, size(diag_mean, 2));
+    end
+    % minibatch padding makes some frames zero, mean of that still be zero, can not be divided.
+    diag_mean1 = permute(bsxfun(@plus, permute(diag_mean, [2 3 1]), -1e10.*prev_mask), [3 1 2]);
+    normCovMat = bsxfun(@times, permute(reshape(covMat, nCh, nCh, nBin, nf, N), [3 4 5 1 2]), 1./diag_mean1);
+    normCovMat = reshape(permute(normCovMat, [4 5 1 2 3]), nCh^2*nBin, nf, N);
+
+%     % select 1 bin by average every scm_bin_shift bins
+%     if scm_select_bin
+%         normCovMat1 = reshape(normCovMat, nCh^2, nBin, nf, N);
+%         normCovMat2 = reshape(permute(normCovMat1, [1 3 4 2]), nCh^2*nf*N, nBin);
+%         normCovMat3 = conv2(normCovMat2, ones(1,scm_bin_shift, 'like', normCovMat2(1))/scm_bin_shift, 'valid');
+%         normCovMat4 = normCovMat3(:, 1:scm_bin_shift:end);
+%         nBin = size(normCovMat4, 2);
+%         normCovMat = reshape(permute(reshape(normCovMat4, nCh^2, nf, N, nBin), [1 4 2 3]), nCh^2*nBin, nf, N);
+%         
+%     end
+
+    % get the upper triangle off-diagonal elements which are complex-valued
+    if strcmpi(scm_select, 'uptriangle')
+        selectMat = triu(ones(nCh, nCh),1); % 1. up-trialgle
+    elseif strcmpi(scm_select, 'row')
+        selectMat = zeros(nCh, nCh); selectMat(1,2:end) = ones(1, nCh-1); % 2. first row
+    else
+        fprintf('Error: unknown scm feature select type: %s', lower(scm_select))
+    end
+
+    dimSelectMask2 = bsxfun(@times, selectMat, ones(nCh, nCh, nBin));
+    dimSelectIdx2 = find(reshape(dimSelectMask2, numel(dimSelectMask2),1) == 1);
+    real_part = real(normCovMat(dimSelectIdx2,:,:));
+    % imag_part = imag(normCovMat(dimSelectIdx2,:));
+    % for freq bin 1 and 257, no imag part
+    if scm_select_bin
+        dimSelectMask3 = bsxfun(@times, selectMat, ones(nCh, nCh, nBin));
+    else
+        dimSelectMask3 = bsxfun(@times, selectMat, cat(3,zeros(nCh, nCh, 1), ones(nCh, nCh, nBin-2), zeros(nCh, nCh, 1)));
+    end
+    dimSelectIdx3 = find(reshape(dimSelectMask3, numel(dimSelectMask3),1) == 1);
+    imag_part = imag(normCovMat(dimSelectIdx3,:,:));
+
+    % get the diagonal elements which are real values
+    if scm_select_diag
+        dimSelectMask1 = bsxfun(@times, eye(nCh, nCh), ones(nCh, nCh, nBin));
+        dimSelectIdx1 = find(reshape(dimSelectMask1, numel(dimSelectMask1),1) == 1);
+        diag_part = real(normCovMat(dimSelectIdx1,:,:));
+    end
+end
+
+% get the final feature vector
+if scm_select_diag
+    feat = [real_part; imag_part; diag_part];
+else
+    feat = [real_part; imag_part];
+end
+% real_part = reshape(real_part, 7, 257, nf, N);
+% imag_part = reshape(imag_part, 7, 255, nf, N);
+% real_part = real_part(:, 6:5:end,:,:);
+% imag_part = imag_part(:, 5:5:end,:,:);
+% 
+% feat = [reshape(real_part, 7*51, nf, N); reshape(imag_part, 7*51, nf, N)];
+
+
+% covMat = reshape(covMat(:,:,:), nCh, nCh, nBin, nf, N);
+% covMatCell = num2cell(covMat, [1 2]);
+% omegaTau = cellfun(@GetPrincVec, covMatCell, 'UniformOutput', 0);
+% output = permute(cell2mat(omegaTau), [1 3 4 5 2]);
+% 
+% feat = output(2:8, 5:5:end, :,:);
+% [d1,d2,d3,d4] = size(feat);
+% feat = reshape(feat, d1*d2, d3, d4);
+
+end
+
+function omegaTau = GetPrincVec(A)
+[V,D] = eig(A);
+D = diag(D);
+[~, idx] = max(D);
+ev = V(:,idx);
+omegaTau = gather(angle(ev/ev(1)));
+end
diff --git a/graph/F_SpatialCov.m b/graph/F_SpatialCov.m
@@ -1,21 +1,92 @@
-function output = F_SpatialCov(input_layer, curr_layer)
+function [output, mask] = F_SpatialCov(input_layer, curr_layer)
 
 input = input_layer.a;
 [D,T,N] = size(input);
 
 curr_layer = SetDefaultValue(curr_layer, 'winSize', 0);
 curr_layer = SetDefaultValue(curr_layer, 'winShift', 1);
 
+if T <= curr_layer.winSize
+    windowSize = 0;
+    windowShift = 1;
+else
+    windowSize = curr_layer.winSize;
+    windowShift = curr_layer.winShift;
+end
+
 nBin = length(curr_layer.freqBin);
 nCh = D/nBin;
 
+input2 = reshape(input, nBin, nCh, T, N);
+
+if windowSize == 0
+    nf = 1;
+else
+    nf = fix((T-windowSize+windowShift)/windowShift);
+end
+mask = zeros(nf, N, 'like', real(input2(1)));
+
 if N==1
-    input2 = reshape(input, nBin, nCh, T,N);
-    R = ComplexSpectrum2SpatialCov(input2, curr_layer.winSize, curr_layer.winShift);
-    output = permute(R, [3 1 2 4]);
-    output = reshape(output, nBin*nCh^2, size(output,4),N);
+%     R = ComplexSpectrum2SpatialCov(input2, windowSize, windowShift);
+% %     output = permute(R, [3 1 2 4]);
+% %     output = reshape(output, nBin*nCh^2, size(output,4),N);
+%     output = reshape(R, nCh^2*nBin, size(R,4),N);
+
+    X2 = permute(input2, [2 1 3]);
+    XX = outProdND(X2);
+    XX2 = reshape(XX, nCh^2*nBin, T);
+
+    if windowSize == 0
+        output = squeeze(mean(XX2, 2));
+    else
+%         idx = [ones(1,half_ctx) 1:T ones(1,half_ctx)*T];
+%         SCM = conv2(XX2, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid');
+        SCM = conv2(XX2, ones(1,windowSize, class(gather(input2))), 'valid');
+        output = SCM(:, 1:windowShift:end);
+    end
+
 else
-    % to be implemented
+    X2 = permute(input2, [2 1 3 4]);
+    XX = outProdND(X2);
+    XX2 = reshape(XX, nCh^2*nBin, T, N);
+
+    if windowSize == 0
+        output = mean(XX2, 2);
+    else
+% %         idx = [ones(1,half_ctx) 1:T ones(1,half_ctx)*T];
+%         XX3 = reshape(permute(XX2, [1 3 2]), nCh^2*nBin*N, T);
+%         SCM = conv2(XX3, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid');
+%         output = SCM(:, 1:windowShift:end);
+%         output = permute(reshape(output, nCh^2*nBin, N, size(output, 2)), [1 3 2]);
+
+%         % Version 1
+%         prev_mask = input_layer.validFrameMask;
+%         output = zeros(nCh^2*nBin, nf, N, 'like', XX2);
+%         for i=1:N
+%             idx = find(prev_mask(:,i) == 0, 1, 'last');
+%             idx2 = fix((idx-windowSize+windowShift)/windowShift);
+%             XX3 = squeeze(XX2(:,1:idx,i));
+%             SCM = conv2(XX3, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid');
+%             output(:, 1:idx2, i) = SCM(:, 1:windowShift:end);
+%             mask(idx2+1:end, i) = 1;
+%         end
+
+        % Version 2, much fast
+        prev_mask = input_layer.validFrameMask;
+        idx = arrayfun(@(x) find(gather(prev_mask(:,x)) == 0, 1, 'last'), 1:size(prev_mask,2));
+        idx2 = arrayfun(@(x) fix((idx(x)-windowSize+windowShift)/windowShift), 1:length(idx));
+        XX31 = reshape(permute(XX2, [1 3 2]), nCh^2*nBin*N, T);
+%         SCM1 = conv2(XX31, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid');
+        SCM1 = conv2(XX31, ones(1,windowSize, class(gather(input2))), 'valid');
+        output1 = SCM1(:, 1:windowShift:end);
+        output2 = permute(reshape(output1, nCh^2*nBin, N, size(output1, 2)), [1 3 2]);
+        output = zeros(nCh^2*nBin, nf, N, 'like', XX2);
+        for i = 1:N
+            output(:, 1:idx2(i), i) = output2(:, 1:idx2(i), i);
+            mask(idx2(i)+1:end, i) = 1;
+        end
+
+    end
 end
 
-end
+end