From 9361cca3d51443b9f6bb56af60155dc3691d11ab Mon Sep 17 00:00:00 2001 From: xuchenglin28 Date: Mon, 9 Jan 2017 17:53:07 +0800 Subject: [PATCH 01/17] revise F_tdoa2weight with current code style by passing (input_layer, curr_layer) --- graph/F_tdoa2weight.m | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/graph/F_tdoa2weight.m b/graph/F_tdoa2weight.m index 7529aa9..8568860 100644 --- a/graph/F_tdoa2weight.m +++ b/graph/F_tdoa2weight.m @@ -1,8 +1,10 @@ -function output = F_tdoa2weight(input, freq_bin) +function output = F_tdoa2weight(input_layer, curr_layer) % assume input is an array of time delay of C microphone channels. % freq_bin is an array of center frequencies of N FFT bins. +input = input_layer.a; +freq_bin = curr_layer.freq_bin; [D,T,N] = size(input); nCh = D+1; delay = [zeros(1,T); input]; From 1400475d2118724b4213434ee4bbdfa2f6b95d4c Mon Sep 17 00:00:00 2001 From: xuchenglin28 Date: Thu, 9 Feb 2017 11:38:02 +0800 Subject: [PATCH 02/17] Implement the moving window average of F_SpatialCovMask --- graph/F_SpatialCovMask.m | 11 ++++++++++- signal/gmm/ComputeWinCovMask.m | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 signal/gmm/ComputeWinCovMask.m diff --git a/graph/F_SpatialCovMask.m b/graph/F_SpatialCovMask.m index 45473e0..9568e74 100644 --- a/graph/F_SpatialCovMask.m +++ b/graph/F_SpatialCovMask.m @@ -8,6 +8,7 @@ if isfield(curr_layer, 'windowSize') windowSize = curr_layer.windowSize; + shift = fix(windowSize/2); else windowSize = 0; end @@ -57,7 +58,15 @@ scm_noise2 = reshape(scm_noise, nCh^2*D, 1, N); output = [scm_speech2; scm_noise2]; else % online mode, estiamte covariance matrices for a sliding window of frames. - % to be implemented. + % to be implemented. + % frame number after moving window + nf = fix((T-windowSize+shift)/shift); + mask2 = permute(mask, [4 2 1 3]); + scm_speech = ComputeWinCovMask(data, mask2, windowSize, shift); + scm_noise = ComputeWinCovMask(data, 1-mask2, windowSize, shift); + scm_speech2 = reshape(scm_speech, nCh^2*D, nf, N); + scm_noise2 = reshape(scm_noise, nCh^2*D, nf, N); + output = [scm_speech2; scm_noise2]; end diff --git a/signal/gmm/ComputeWinCovMask.m b/signal/gmm/ComputeWinCovMask.m new file mode 100644 index 0000000..606fdaf --- /dev/null +++ b/signal/gmm/ComputeWinCovMask.m @@ -0,0 +1,18 @@ +function winCovMat = ComputeWinCovMask(data, mask, winsize, shift) + +weight = sqrt(bsxfun(@times, mask, 1./sum(mask))); +data_scaled = bsxfun(@times, data, weight); + +covMat = outProdND(data_scaled); + +% convert the stft frame based Cov to winsize based Cov +[nch, ~, nf_stft, nbin] = size(covMat); +nf = fix((nf_stft-winsize+shift)/shift); +winCovMat = zeros(nf, nch*nch*nbin*winsize); +covMat = reshape(permute(covMat, [1 2 4 3]), 1, nch*nch*nbin*nf_stft); +indf = shift*(0:(nf-1)).'; +inds = (1:nch*nch*nbin*winsize); +winCovMat(:) = covMat(indf(:,ones(1,nch*nch*nbin*winsize))+inds(ones(nf,1),:)); +winCovMat = permute(reshape(winCovMat, nf, nch, nch, nbin, winsize), [2 3 4 5 1]); +winCovMat = squeeze(sum(winCovMat, 4)./winsize); +end \ No newline at end of file From 1f6a39ddc7853b9e6af2384962b92ab1f5ddaa1d Mon Sep 17 00:00:00 2001 From: xuchenglin28 Date: Thu, 9 Feb 2017 14:23:00 +0800 Subject: [PATCH 03/17] fix index bug in ComputeWinCovMask --- signal/gmm/ComputeWinCovMask.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/signal/gmm/ComputeWinCovMask.m b/signal/gmm/ComputeWinCovMask.m index 606fdaf..5e9f630 100644 --- a/signal/gmm/ComputeWinCovMask.m +++ b/signal/gmm/ComputeWinCovMask.m @@ -10,7 +10,7 @@ nf = fix((nf_stft-winsize+shift)/shift); winCovMat = zeros(nf, nch*nch*nbin*winsize); covMat = reshape(permute(covMat, [1 2 4 3]), 1, nch*nch*nbin*nf_stft); -indf = shift*(0:(nf-1)).'; +indf = nch*nch*nbin*shift*(0:(nf-1)).'; inds = (1:nch*nch*nbin*winsize); winCovMat(:) = covMat(indf(:,ones(1,nch*nch*nbin*winsize))+inds(ones(nf,1),:)); winCovMat = permute(reshape(winCovMat, nf, nch, nch, nbin, winsize), [2 3 4 5 1]); From 24b513c5815530520f5bae184cfb4bfd95031aa5 Mon Sep 17 00:00:00 2001 From: xuchenglin28 Date: Thu, 16 Feb 2017 20:05:53 +0800 Subject: [PATCH 04/17] Add F_SpatialCov and F_ExtSpatialCovFeat for SCM based TDOA estimation --- graph/DNN_Cost10.m | 6 ++- graph/F_ExtSpatialCovFeat.m | 96 ++++++++++++++++++++++++++++++++++ graph/F_SpatialCov.m | 60 ++++++++++++++++++--- graph/F_SpatialCovMask.m | 28 +++++++--- signal/feature/sfft_multi.m | 6 +-- signal/gmm/ComputeWinCovMask.m | 43 +++++++++++---- 6 files changed, 209 insertions(+), 30 deletions(-) create mode 100644 graph/F_ExtSpatialCovFeat.m diff --git a/graph/DNN_Cost10.m b/graph/DNN_Cost10.m index ee405f1..b18566d 100644 --- a/graph/DNN_Cost10.m +++ b/graph/DNN_Cost10.m @@ -140,14 +140,16 @@ [layer{i}.a, layer{i}.validFrameMask] = F_comp_gcc(prev_layers{1}, layer{i}); case 'stft' [layer{i}.a, layer{i}.validFrameMask] = F_stft(prev_layers{1}, layer{i}); - + case 'spatialcov' + [layer{i}.a, layer{i}.validFrameMask] = F_SpatialCov(prev_layers{1}, layer{i}); % do not support variable length yet case 'spatialcovmask' layer{i}.a = F_SpatialCovMask(prev_layers, layer{i}); % do not support variable length yet case 'spatialcovsplitmask' layer{i}.a = F_SpatialCovSplitMask(prev_layers, layer{i}); % do not support variable length yet case 'mvdr_spatialcov' layer{i} = F_MVDR_spatialCov(prev_layers{1}, layer{i}); % do not support variable length yet - + case 'extspatialcovfeat' + layer{i}.a = F_ExtSpatialCovFeat(prev_layers{1}, layer{i}); % extract up triangle real and imag parts, diagonal part from spatial cov case 'cov' layer{i}.a = F_cov(prev_layers{1}.a); % do not support variable length yet case 'logdet' diff --git a/graph/F_ExtSpatialCovFeat.m b/graph/F_ExtSpatialCovFeat.m new file mode 100644 index 0000000..01c76d0 --- /dev/null +++ b/graph/F_ExtSpatialCovFeat.m @@ -0,0 +1,96 @@ +function feat = F_ExtSpatialCovFeat(prev_layer, curr_layer) + +covMat = prev_layer.a; +prev_mask = prev_layer.validFrameMask; +nCh = curr_layer.nCh; +nBin = curr_layer.nBin; +[~, nf, N] = size(covMat); + +if N == 1 + % normalize the cov matrix by their diagonal elements, remove the effect of + % spectral power and only retains the phase information + dimSelectMask1 = bsxfun(@times, eye(nCh, nCh), ones(nCh, nCh, nBin)); + dimSelectIdx1 = find(reshape(dimSelectMask1, numel(dimSelectMask1),1) == 1); % diag elements index + diag_mean = squeeze(mean(reshape(covMat(dimSelectIdx1,:), nCh, nBin, nf), 1)); + if nf ==1 + diag_mean = reshape(diag_mean, size(diag_mean,1), 1, size(diag_mean, 2)); + end + normCovMat = bsxfun(@times, permute(reshape(covMat, nCh, nCh, nBin, nf), [3 4 1 2]), 1./diag_mean); + normCovMat = reshape(permute(normCovMat, [3 4 1 2]), nCh^2*nBin, nf); + + % get the upper triangle off-diagonal elements which are complex-valued + % selectMat = triu(ones(nCh, nCh),1); % 1. up-trialgle + selectMat = zeros(nCh, nCh); selectMat(1,2:end) = ones(1, nCh-1); % 2. first row + + dimSelectMask2 = bsxfun(@times, selectMat, ones(nCh, nCh, nBin)); + dimSelectIdx2 = find(reshape(dimSelectMask2, numel(dimSelectMask2),1) == 1); + real_part = real(normCovMat(dimSelectIdx2,:)); + % imag_part = imag(normCovMat(dimSelectIdx2,:)); + % for freq bin 1 and 257, no imag part + dimSelectMask3 = bsxfun(@times, selectMat, cat(3,zeros(nCh, nCh, 1), ones(nCh, nCh, nBin-2), zeros(nCh, nCh, 1))); + dimSelectIdx3 = find(reshape(dimSelectMask3, numel(dimSelectMask3),1) == 1); + imag_part = imag(normCovMat(dimSelectIdx3,:)); + + % get the diagonal elements which are real values + % diag_part = covMat(dimSelectIdx1,:); + % diag_part = log(max(eps,abs(diag_part))); + diag_part = real(normCovMat(dimSelectIdx1,:)); +else + % normalize the cov matrix by their diagonal elements, remove the effect of + % spectral power and only retains the phase information + dimSelectMask1 = bsxfun(@times, eye(nCh, nCh), ones(nCh, nCh, nBin)); + dimSelectIdx1 = find(reshape(dimSelectMask1, numel(dimSelectMask1),1) == 1); % diag elements index + diag_mean = squeeze(mean(reshape(covMat(dimSelectIdx1,:,:), nCh, nBin, nf, N), 1)); + if nf ==1 + diag_mean = reshape(diag_mean, size(diag_mean,1), 1, size(diag_mean, 2)); + end + % minibatch padding makes some frames zero, mean of that still be zero, can not be divided. + diag_mean1 = permute(bsxfun(@plus, permute(diag_mean, [2 3 1]), -1e10.*prev_mask), [3 1 2]); + normCovMat = bsxfun(@times, permute(reshape(covMat, nCh, nCh, nBin, nf, N), [3 4 5 1 2]), 1./diag_mean1); + normCovMat = reshape(permute(normCovMat, [4 5 1 2 3]), nCh^2*nBin, nf, N); + + % get the upper triangle off-diagonal elements which are complex-valued + selectMat = triu(ones(nCh, nCh),1); % 1. up-trialgle +% selectMat = zeros(nCh, nCh); selectMat(1,2:end) = ones(1, nCh-1); % 2. first row + + dimSelectMask2 = bsxfun(@times, selectMat, ones(nCh, nCh, nBin)); + dimSelectIdx2 = find(reshape(dimSelectMask2, numel(dimSelectMask2),1) == 1); + real_part = real(normCovMat(dimSelectIdx2,:,:)); + % imag_part = imag(normCovMat(dimSelectIdx2,:)); + % for freq bin 1 and 257, no imag part + dimSelectMask3 = bsxfun(@times, selectMat, cat(3,zeros(nCh, nCh, 1), ones(nCh, nCh, nBin-2), zeros(nCh, nCh, 1))); + dimSelectIdx3 = find(reshape(dimSelectMask3, numel(dimSelectMask3),1) == 1); + imag_part = imag(normCovMat(dimSelectIdx3,:,:)); + + % get the diagonal elements which are real values + diag_part = real(normCovMat(dimSelectIdx1,:,:)); +end + +% get the final feature vector +feat = [real_part; imag_part; diag_part]; +% real_part = reshape(real_part, 7, 257, nf, N); +% imag_part = reshape(imag_part, 7, 255, nf, N); +% real_part = real_part(:, 6:5:end,:,:); +% imag_part = imag_part(:, 5:5:end,:,:); +% +% feat = [reshape(real_part, 7*51, nf, N); reshape(imag_part, 7*51, nf, N)]; + + +% covMat = reshape(covMat(:,:,:), nCh, nCh, nBin, nf, N); +% covMatCell = num2cell(covMat, [1 2]); +% omegaTau = cellfun(@GetPrincVec, covMatCell, 'UniformOutput', 0); +% output = permute(cell2mat(omegaTau), [1 3 4 5 2]); +% +% feat = output(2:8, 5:5:end, :,:); +% [d1,d2,d3,d4] = size(feat); +% feat = reshape(feat, d1*d2, d3, d4); + +end + +function omegaTau = GetPrincVec(A) +[V,D] = eig(A); +D = diag(D); +[~, idx] = max(D); +ev = V(:,idx); +omegaTau = gather(angle(ev/ev(1))); +end diff --git a/graph/F_SpatialCov.m b/graph/F_SpatialCov.m index 5c8e67c..4ece5b9 100644 --- a/graph/F_SpatialCov.m +++ b/graph/F_SpatialCov.m @@ -1,4 +1,4 @@ -function output = F_SpatialCov(input_layer, curr_layer) +function [output, mask] = F_SpatialCov(input_layer, curr_layer) input = input_layer.a; [D,T,N] = size(input); @@ -9,13 +9,59 @@ nBin = length(curr_layer.freqBin); nCh = D/nBin; +input2 = reshape(input, nBin, nCh, T, N); + +if curr_layer.winSize == 0 + nf = 1; +else + nf = fix((T-curr_layer.winSize+curr_layer.winShift)/curr_layer.winShift); +end +mask = zeros(nf, N, 'like', real(input2)); + if N==1 - input2 = reshape(input, nBin, nCh, T,N); - R = ComplexSpectrum2SpatialCov(input2, curr_layer.winSize, curr_layer.winShift); - output = permute(R, [3 1 2 4]); - output = reshape(output, nBin*nCh^2, size(output,4),N); +% R = ComplexSpectrum2SpatialCov(input2, curr_layer.winSize, curr_layer.winShift); +% % output = permute(R, [3 1 2 4]); +% % output = reshape(output, nBin*nCh^2, size(output,4),N); +% output = reshape(R, nCh^2*nBin, size(R,4),N); + + X2 = permute(input2, [2 1 3]); + XX = outProdND(X2); + XX2 = reshape(XX, nCh^2*nBin, T); + + if curr_layer.winSize == 0 + output = squeeze(mean(XX2, 2)); + else +% idx = [ones(1,half_ctx) 1:T ones(1,half_ctx)*T]; + SCM = conv2(XX2, ones(1,curr_layer.winSize, class(gather(input2)))/curr_layer.winSize, 'valid'); + output = SCM(:, 1:curr_layer.winShift:end); + end + else - % to be implemented + X2 = permute(input2, [2 1 3 4]); + XX = outProdND(X2); + XX2 = reshape(XX, nCh^2*nBin, T, N); + + if curr_layer.winSize == 0 + output = mean(XX2, 2); + else +% % idx = [ones(1,half_ctx) 1:T ones(1,half_ctx)*T]; +% XX3 = reshape(permute(XX2, [1 3 2]), nCh^2*nBin*N, T); +% SCM = conv2(XX3, ones(1,curr_layer.winSize, class(gather(input2)))/curr_layer.winSize, 'valid'); +% output = SCM(:, 1:curr_layer.winShift:end); +% output = permute(reshape(output, nCh^2*nBin, N, size(output, 2)), [1 3 2]); + + prev_mask = input_layer.validFrameMask; + output = zeros(nCh^2*nBin, nf, N, 'like', XX2); + for i=1:N + idx = find(prev_mask(:,i) == 0, 1, 'last'); + idx2 = fix((idx-curr_layer.winSize+curr_layer.winShift)/curr_layer.winShift); + XX3 = squeeze(XX2(:,1:idx,i)); + SCM = conv2(XX3, ones(1,curr_layer.winSize, class(gather(input2)))/curr_layer.winSize, 'valid'); + output(:, 1:idx2, i) = SCM(:, 1:curr_layer.winShift:end); + mask(idx2+1:end, i) = 1; + end + + end end -end \ No newline at end of file +end diff --git a/graph/F_SpatialCovMask.m b/graph/F_SpatialCovMask.m index 9568e74..add3831 100644 --- a/graph/F_SpatialCovMask.m +++ b/graph/F_SpatialCovMask.m @@ -13,6 +13,12 @@ windowSize = 0; end +if isfield(curr_layer, 'speechOnly') + speechOnly = curr_layer.speechOnly; +else + speechOnly = false; +end + [D,T,N] = size(mask); [D2,T,N] = size(data); nCh = D2/D; @@ -51,22 +57,30 @@ mask2 = permute(mask, [4 2 1 3]); scm_speech = ComputeCovMask(data, mask2); - scm_noise = ComputeCovMask(data, 1-mask2); + if ~speechOnly + scm_noise = ComputeCovMask(data, 1-mask2); + end end scm_speech2 = reshape(scm_speech, nCh^2*D, 1, N); - scm_noise2 = reshape(scm_noise, nCh^2*D, 1, N); - output = [scm_speech2; scm_noise2]; + if speechOnly + output = scm_speech2; + else + scm_noise2 = reshape(scm_noise, nCh^2*D, 1, N); + output = [scm_speech2; scm_noise2]; + end else % online mode, estiamte covariance matrices for a sliding window of frames. % to be implemented. % frame number after moving window nf = fix((T-windowSize+shift)/shift); mask2 = permute(mask, [4 2 1 3]); scm_speech = ComputeWinCovMask(data, mask2, windowSize, shift); - scm_noise = ComputeWinCovMask(data, 1-mask2, windowSize, shift); - scm_speech2 = reshape(scm_speech, nCh^2*D, nf, N); - scm_noise2 = reshape(scm_noise, nCh^2*D, nf, N); - output = [scm_speech2; scm_noise2]; + if speechOnly + output = scm_speech; + else + scm_noise = ComputeWinCovMask(data, 1-mask2, windowSize, shift); + output = [scm_speech; scm_noise]; + end end diff --git a/signal/feature/sfft_multi.m b/signal/feature/sfft_multi.m index 635983d..3847484 100644 --- a/signal/feature/sfft_multi.m +++ b/signal/feature/sfft_multi.m @@ -17,9 +17,9 @@ useGPU = 0; end -if exist('doDithering')==0 || length(doDithering)==0 - x = x + randn(size(x))/2^32; -end +% if exist('doDithering')==0 || length(doDithering)==0 +% x = x + randn(size(x))/2^32; +% end % produce the hamming windowm if exist('window_type')==0 || length(window_type)==0 diff --git a/signal/gmm/ComputeWinCovMask.m b/signal/gmm/ComputeWinCovMask.m index 5e9f630..d119160 100644 --- a/signal/gmm/ComputeWinCovMask.m +++ b/signal/gmm/ComputeWinCovMask.m @@ -1,18 +1,39 @@ function winCovMat = ComputeWinCovMask(data, mask, winsize, shift) - +[nch, nf_stft, nbin, N] = size(data); weight = sqrt(bsxfun(@times, mask, 1./sum(mask))); data_scaled = bsxfun(@times, data, weight); covMat = outProdND(data_scaled); +if N == 1 + covMat1 = reshape(permute(covMat, [1 2 4 3]), nch^2*nbin, nf_stft); +else + covMat1 = reshape(permute(covMat, [1 2 4 3 5]), nch^2*nbin, nf_stft, N); +end + +% % Version 1: fast, but consume memory when winsize is large +% nf = fix((nf_stft-winsize+shift)/shift); +% covMat2 = ExpandContext_v2(covMat1, 0:winsize-1); +% nf_idx = 1:shift:nf_stft-winsize+1; +% covMat3 = covMat2(:, nf_idx, :); +% covMat3 = reshape(covMat3, nch^2*nbin, winsize, nf, N); +% winCovMat = squeeze(mean(covMat3, 2)); + +% Version 2: less fast than version 1 +SCM1 = conv2(covMat1, ones(1,winsize, class(gather(covMat)))/winsize, 'valid'); +winCovMat = SCM1(:, 1:shift:end); + +% % Version 3: slowest in repmat and not support multiple sentences +% if IsInGPU(data) +% winCovMat11 = gpuArray.zeros(nf, nch*nch*nbin*winsize); +% else +% winCovMat11 = zeros(nf, nch*nch*nbin*winsize); +% end +% covMat11 = reshape(permute(covMat, [1 2 4 3]), 1, nch*nch*nbin*nf_stft); +% indf = nch*nch*nbin*shift*(0:(nf-1)).'; +% inds = (1:nch*nch*nbin*winsize); +% % winCovMat(:) = covMat(indf(:,ones(1,nch*nch*nbin*winsize))+inds(ones(nf,1),:)); % slow +% winCovMat11(:) = covMat11(repmat(indf,1,nch*nch*nbin*winsize)+repmat(inds,nf,1)); +% winCovMat11 = permute(reshape(winCovMat11, nf, nch*nch*nbin, winsize), [2 3 1]); +% winCovMat = squeeze(mean(winCovMat11, 2)); -% convert the stft frame based Cov to winsize based Cov -[nch, ~, nf_stft, nbin] = size(covMat); -nf = fix((nf_stft-winsize+shift)/shift); -winCovMat = zeros(nf, nch*nch*nbin*winsize); -covMat = reshape(permute(covMat, [1 2 4 3]), 1, nch*nch*nbin*nf_stft); -indf = nch*nch*nbin*shift*(0:(nf-1)).'; -inds = (1:nch*nch*nbin*winsize); -winCovMat(:) = covMat(indf(:,ones(1,nch*nch*nbin*winsize))+inds(ones(nf,1),:)); -winCovMat = permute(reshape(winCovMat, nf, nch, nch, nbin, winsize), [2 3 4 5 1]); -winCovMat = squeeze(sum(winCovMat, 4)./winsize); end \ No newline at end of file From 794c7d5f7108fa3e79ef0829f6ea2b31623a34c6 Mon Sep 17 00:00:00 2001 From: ellenrw Date: Mon, 20 Feb 2017 15:33:34 +0800 Subject: [PATCH 05/17] Fix a bug in F_ExtSpatialCovFeat when N==1 --- graph/F_ExtSpatialCovFeat.m | 6 +++--- graph/F_SpatialCovMask.m | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/graph/F_ExtSpatialCovFeat.m b/graph/F_ExtSpatialCovFeat.m index 01c76d0..487273a 100644 --- a/graph/F_ExtSpatialCovFeat.m +++ b/graph/F_ExtSpatialCovFeat.m @@ -13,14 +13,14 @@ dimSelectIdx1 = find(reshape(dimSelectMask1, numel(dimSelectMask1),1) == 1); % diag elements index diag_mean = squeeze(mean(reshape(covMat(dimSelectIdx1,:), nCh, nBin, nf), 1)); if nf ==1 - diag_mean = reshape(diag_mean, size(diag_mean,1), 1, size(diag_mean, 2)); + diag_mean = diag_mean.'; end normCovMat = bsxfun(@times, permute(reshape(covMat, nCh, nCh, nBin, nf), [3 4 1 2]), 1./diag_mean); normCovMat = reshape(permute(normCovMat, [3 4 1 2]), nCh^2*nBin, nf); % get the upper triangle off-diagonal elements which are complex-valued - % selectMat = triu(ones(nCh, nCh),1); % 1. up-trialgle - selectMat = zeros(nCh, nCh); selectMat(1,2:end) = ones(1, nCh-1); % 2. first row + selectMat = triu(ones(nCh, nCh),1); % 1. up-trialgle +% selectMat = zeros(nCh, nCh); selectMat(1,2:end) = ones(1, nCh-1); % 2. first row dimSelectMask2 = bsxfun(@times, selectMat, ones(nCh, nCh, nBin)); dimSelectIdx2 = find(reshape(dimSelectMask2, numel(dimSelectMask2),1) == 1); diff --git a/graph/F_SpatialCovMask.m b/graph/F_SpatialCovMask.m index add3831..84e6d91 100644 --- a/graph/F_SpatialCovMask.m +++ b/graph/F_SpatialCovMask.m @@ -26,6 +26,10 @@ data = permute(data, [2 3 1 4]); % data = abs(data); +if T <= windowSize + windowSize = 0; +end + if windowSize == 0 % utterance mode, estimate two spatial covariance matrixes for each utterance, one is speech and the other is noise. if 0 % for loop version if IsInGPU(data) From 209d0691b81ce45999e15ee084371ca5e0137355 Mon Sep 17 00:00:00 2001 From: xuchenglin28 Date: Mon, 20 Feb 2017 15:38:38 +0800 Subject: [PATCH 06/17] commit for merge new version with fix bugs in F_ExtSpatialCovFeat --- graph/F_ExtSpatialCovFeat.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graph/F_ExtSpatialCovFeat.m b/graph/F_ExtSpatialCovFeat.m index 01c76d0..d1e146c 100644 --- a/graph/F_ExtSpatialCovFeat.m +++ b/graph/F_ExtSpatialCovFeat.m @@ -19,8 +19,8 @@ normCovMat = reshape(permute(normCovMat, [3 4 1 2]), nCh^2*nBin, nf); % get the upper triangle off-diagonal elements which are complex-valued - % selectMat = triu(ones(nCh, nCh),1); % 1. up-trialgle - selectMat = zeros(nCh, nCh); selectMat(1,2:end) = ones(1, nCh-1); % 2. first row + selectMat = triu(ones(nCh, nCh),1); % 1. up-trialgle + % selectMat = zeros(nCh, nCh); selectMat(1,2:end) = ones(1, nCh-1); % 2. first row dimSelectMask2 = bsxfun(@times, selectMat, ones(nCh, nCh, nBin)); dimSelectIdx2 = find(reshape(dimSelectMask2, numel(dimSelectMask2),1) == 1); From b40c503ba9d24d424a69b8749212f934c0e88919 Mon Sep 17 00:00:00 2001 From: xuchenglin28 Date: Mon, 20 Feb 2017 16:13:11 +0800 Subject: [PATCH 07/17] Fix problems caused by frame less than window size in F_SpatialCov --- graph/F_SpatialCov.m | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/graph/F_SpatialCov.m b/graph/F_SpatialCov.m index 4ece5b9..623e6f1 100644 --- a/graph/F_SpatialCov.m +++ b/graph/F_SpatialCov.m @@ -6,20 +6,28 @@ curr_layer = SetDefaultValue(curr_layer, 'winSize', 0); curr_layer = SetDefaultValue(curr_layer, 'winShift', 1); +if T <= curr_layer.winSize + windowSize = 0; + windowShift = 1; +else + windowSize = curr_layer.winSize; + windowShift = curr_layer.winShift; +end + nBin = length(curr_layer.freqBin); nCh = D/nBin; input2 = reshape(input, nBin, nCh, T, N); -if curr_layer.winSize == 0 +if windowSize == 0 nf = 1; else - nf = fix((T-curr_layer.winSize+curr_layer.winShift)/curr_layer.winShift); + nf = fix((T-windowSize+windowShift)/windowShift); end mask = zeros(nf, N, 'like', real(input2)); if N==1 -% R = ComplexSpectrum2SpatialCov(input2, curr_layer.winSize, curr_layer.winShift); +% R = ComplexSpectrum2SpatialCov(input2, windowSize, windowShift); % % output = permute(R, [3 1 2 4]); % % output = reshape(output, nBin*nCh^2, size(output,4),N); % output = reshape(R, nCh^2*nBin, size(R,4),N); @@ -28,12 +36,12 @@ XX = outProdND(X2); XX2 = reshape(XX, nCh^2*nBin, T); - if curr_layer.winSize == 0 + if windowSize == 0 output = squeeze(mean(XX2, 2)); else % idx = [ones(1,half_ctx) 1:T ones(1,half_ctx)*T]; - SCM = conv2(XX2, ones(1,curr_layer.winSize, class(gather(input2)))/curr_layer.winSize, 'valid'); - output = SCM(:, 1:curr_layer.winShift:end); + SCM = conv2(XX2, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid'); + output = SCM(:, 1:windowShift:end); end else @@ -41,23 +49,23 @@ XX = outProdND(X2); XX2 = reshape(XX, nCh^2*nBin, T, N); - if curr_layer.winSize == 0 + if windowSize == 0 output = mean(XX2, 2); else % % idx = [ones(1,half_ctx) 1:T ones(1,half_ctx)*T]; % XX3 = reshape(permute(XX2, [1 3 2]), nCh^2*nBin*N, T); -% SCM = conv2(XX3, ones(1,curr_layer.winSize, class(gather(input2)))/curr_layer.winSize, 'valid'); -% output = SCM(:, 1:curr_layer.winShift:end); +% SCM = conv2(XX3, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid'); +% output = SCM(:, 1:windowShift:end); % output = permute(reshape(output, nCh^2*nBin, N, size(output, 2)), [1 3 2]); prev_mask = input_layer.validFrameMask; output = zeros(nCh^2*nBin, nf, N, 'like', XX2); for i=1:N idx = find(prev_mask(:,i) == 0, 1, 'last'); - idx2 = fix((idx-curr_layer.winSize+curr_layer.winShift)/curr_layer.winShift); + idx2 = fix((idx-windowSize+windowShift)/windowShift); XX3 = squeeze(XX2(:,1:idx,i)); - SCM = conv2(XX3, ones(1,curr_layer.winSize, class(gather(input2)))/curr_layer.winSize, 'valid'); - output(:, 1:idx2, i) = SCM(:, 1:curr_layer.winShift:end); + SCM = conv2(XX3, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid'); + output(:, 1:idx2, i) = SCM(:, 1:windowShift:end); mask(idx2+1:end, i) = 1; end From 359635abc91ce7ec92edfe61072063a91c957978 Mon Sep 17 00:00:00 2001 From: xuchenglin28 Date: Tue, 21 Feb 2017 11:13:02 +0800 Subject: [PATCH 08/17] Add scm feat select type in F_ExtSpatialCovFeat --- graph/F_ExtSpatialCovFeat.m | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/graph/F_ExtSpatialCovFeat.m b/graph/F_ExtSpatialCovFeat.m index 487273a..a4c48c2 100644 --- a/graph/F_ExtSpatialCovFeat.m +++ b/graph/F_ExtSpatialCovFeat.m @@ -5,6 +5,7 @@ nCh = curr_layer.nCh; nBin = curr_layer.nBin; [~, nf, N] = size(covMat); +scm_select = curr_layer.scm_select; if N == 1 % normalize the cov matrix by their diagonal elements, remove the effect of @@ -19,8 +20,13 @@ normCovMat = reshape(permute(normCovMat, [3 4 1 2]), nCh^2*nBin, nf); % get the upper triangle off-diagonal elements which are complex-valued - selectMat = triu(ones(nCh, nCh),1); % 1. up-trialgle -% selectMat = zeros(nCh, nCh); selectMat(1,2:end) = ones(1, nCh-1); % 2. first row + if strcmpi(scm_select, 'uptriangle') + selectMat = triu(ones(nCh, nCh),1); % 1. up-trialgle + elseif strcmpi(scm_select, 'row') + selectMat = zeros(nCh, nCh); selectMat(1,2:end) = ones(1, nCh-1); % 2. first row + else + fprintf('Error: unknown scm feature select type: %s', lower(scm_select)) + end dimSelectMask2 = bsxfun(@times, selectMat, ones(nCh, nCh, nBin)); dimSelectIdx2 = find(reshape(dimSelectMask2, numel(dimSelectMask2),1) == 1); @@ -50,8 +56,13 @@ normCovMat = reshape(permute(normCovMat, [4 5 1 2 3]), nCh^2*nBin, nf, N); % get the upper triangle off-diagonal elements which are complex-valued - selectMat = triu(ones(nCh, nCh),1); % 1. up-trialgle -% selectMat = zeros(nCh, nCh); selectMat(1,2:end) = ones(1, nCh-1); % 2. first row + if strcmpi(scm_select, 'uptriangle') + selectMat = triu(ones(nCh, nCh),1); % 1. up-trialgle + elseif strcmpi(scm_select, 'row') + selectMat = zeros(nCh, nCh); selectMat(1,2:end) = ones(1, nCh-1); % 2. first row + else + fprintf('Error: unknown scm feature select type: %s', lower(scm_select)) + end dimSelectMask2 = bsxfun(@times, selectMat, ones(nCh, nCh, nBin)); dimSelectIdx2 = find(reshape(dimSelectMask2, numel(dimSelectMask2),1) == 1); From 26522c106e23140a5faf15ae6736ac0fc4f5458d Mon Sep 17 00:00:00 2001 From: xuchenglin28 Date: Mon, 27 Feb 2017 14:24:08 +0800 Subject: [PATCH 09/17] add bin selection to reduce feature dims --- graph/F_ExtSpatialCovFeat.m | 48 +++++++++++++++++++++++++++++++++---- graph/F_SpatialCov.m | 29 ++++++++++++++++------ 2 files changed, 66 insertions(+), 11 deletions(-) diff --git a/graph/F_ExtSpatialCovFeat.m b/graph/F_ExtSpatialCovFeat.m index a4c48c2..037b9c9 100644 --- a/graph/F_ExtSpatialCovFeat.m +++ b/graph/F_ExtSpatialCovFeat.m @@ -6,8 +6,12 @@ nBin = curr_layer.nBin; [~, nf, N] = size(covMat); scm_select = curr_layer.scm_select; +scm_select_diag = curr_layer.scm_select_diag; +scm_select_bin = curr_layer.scm_select_bin; +scm_bin_shift = curr_layer.scm_bin_shift; if N == 1 + % normalize the cov matrix by their diagonal elements, remove the effect of % spectral power and only retains the phase information dimSelectMask1 = bsxfun(@times, eye(nCh, nCh), ones(nCh, nCh, nBin)); @@ -40,8 +44,21 @@ % get the diagonal elements which are real values % diag_part = covMat(dimSelectIdx1,:); % diag_part = log(max(eps,abs(diag_part))); - diag_part = real(normCovMat(dimSelectIdx1,:)); + if scm_select_diag + diag_part = real(normCovMat(dimSelectIdx1,:)); + end else + % select 1 bin by average every scm_bin_shift bins + if scm_select_bin + covMat1 = reshape(covMat, nCh^2, nBin, nf, N); + covMat2 = reshape(permute(covMat1, [1 3 4 2]), nCh^2*nf*N, nBin); + covMat3 = conv2(covMat2, ones(1,scm_bin_shift, 'like', covMat2(1))/scm_bin_shift, 'valid'); + covMat4 = covMat3(:, 1:scm_bin_shift:end); + nBin = size(covMat4, 2); + covMat = reshape(permute(reshape(covMat4, nCh^2, nf, N, nBin), [1 4 2 3]), nCh^2*nBin, nf, N); + + end + % normalize the cov matrix by their diagonal elements, remove the effect of % spectral power and only retains the phase information dimSelectMask1 = bsxfun(@times, eye(nCh, nCh), ones(nCh, nCh, nBin)); @@ -55,6 +72,17 @@ normCovMat = bsxfun(@times, permute(reshape(covMat, nCh, nCh, nBin, nf, N), [3 4 5 1 2]), 1./diag_mean1); normCovMat = reshape(permute(normCovMat, [4 5 1 2 3]), nCh^2*nBin, nf, N); +% % select 1 bin by average every scm_bin_shift bins +% if scm_select_bin +% normCovMat1 = reshape(normCovMat, nCh^2, nBin, nf, N); +% normCovMat2 = reshape(permute(normCovMat1, [1 3 4 2]), nCh^2*nf*N, nBin); +% normCovMat3 = conv2(normCovMat2, ones(1,scm_bin_shift, 'like', normCovMat2(1))/scm_bin_shift, 'valid'); +% normCovMat4 = normCovMat3(:, 1:scm_bin_shift:end); +% nBin = size(normCovMat4, 2); +% normCovMat = reshape(permute(reshape(normCovMat4, nCh^2, nf, N, nBin), [1 4 2 3]), nCh^2*nBin, nf, N); +% +% end + % get the upper triangle off-diagonal elements which are complex-valued if strcmpi(scm_select, 'uptriangle') selectMat = triu(ones(nCh, nCh),1); % 1. up-trialgle @@ -69,16 +97,28 @@ real_part = real(normCovMat(dimSelectIdx2,:,:)); % imag_part = imag(normCovMat(dimSelectIdx2,:)); % for freq bin 1 and 257, no imag part - dimSelectMask3 = bsxfun(@times, selectMat, cat(3,zeros(nCh, nCh, 1), ones(nCh, nCh, nBin-2), zeros(nCh, nCh, 1))); + if scm_select_bin + dimSelectMask3 = bsxfun(@times, selectMat, ones(nCh, nCh, nBin)); + else + dimSelectMask3 = bsxfun(@times, selectMat, cat(3,zeros(nCh, nCh, 1), ones(nCh, nCh, nBin-2), zeros(nCh, nCh, 1))); + end dimSelectIdx3 = find(reshape(dimSelectMask3, numel(dimSelectMask3),1) == 1); imag_part = imag(normCovMat(dimSelectIdx3,:,:)); % get the diagonal elements which are real values - diag_part = real(normCovMat(dimSelectIdx1,:,:)); + if scm_select_diag + dimSelectMask1 = bsxfun(@times, eye(nCh, nCh), ones(nCh, nCh, nBin)); + dimSelectIdx1 = find(reshape(dimSelectMask1, numel(dimSelectMask1),1) == 1); + diag_part = real(normCovMat(dimSelectIdx1,:,:)); + end end % get the final feature vector -feat = [real_part; imag_part; diag_part]; +if scm_select_diag + feat = [real_part; imag_part; diag_part]; +else + feat = [real_part; imag_part]; +end % real_part = reshape(real_part, 7, 257, nf, N); % imag_part = reshape(imag_part, 7, 255, nf, N); % real_part = real_part(:, 6:5:end,:,:); diff --git a/graph/F_SpatialCov.m b/graph/F_SpatialCov.m index 623e6f1..d55fb53 100644 --- a/graph/F_SpatialCov.m +++ b/graph/F_SpatialCov.m @@ -58,15 +58,30 @@ % output = SCM(:, 1:windowShift:end); % output = permute(reshape(output, nCh^2*nBin, N, size(output, 2)), [1 3 2]); +% % Version 1 +% prev_mask = input_layer.validFrameMask; +% output = zeros(nCh^2*nBin, nf, N, 'like', XX2); +% for i=1:N +% idx = find(prev_mask(:,i) == 0, 1, 'last'); +% idx2 = fix((idx-windowSize+windowShift)/windowShift); +% XX3 = squeeze(XX2(:,1:idx,i)); +% SCM = conv2(XX3, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid'); +% output(:, 1:idx2, i) = SCM(:, 1:windowShift:end); +% mask(idx2+1:end, i) = 1; +% end + + % Version 2, much fast prev_mask = input_layer.validFrameMask; + idx = arrayfun(@(x) find(gather(prev_mask(:,x)) == 0, 1, 'last'), 1:size(prev_mask,2)); + idx2 = arrayfun(@(x) fix((idx(x)-windowSize+windowShift)/windowShift), 1:length(idx)); + XX31 = reshape(permute(XX2, [1 3 2]), nCh^2*nBin*N, T); + SCM1 = conv2(XX31, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid'); + output1 = SCM1(:, 1:windowShift:end); + output2 = permute(reshape(output1, nCh^2*nBin, N, size(output1, 2)), [1 3 2]); output = zeros(nCh^2*nBin, nf, N, 'like', XX2); - for i=1:N - idx = find(prev_mask(:,i) == 0, 1, 'last'); - idx2 = fix((idx-windowSize+windowShift)/windowShift); - XX3 = squeeze(XX2(:,1:idx,i)); - SCM = conv2(XX3, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid'); - output(:, 1:idx2, i) = SCM(:, 1:windowShift:end); - mask(idx2+1:end, i) = 1; + for i = 1:N + output(:, 1:idx2(i), i) = output2(:, 1:idx2(i), i); + mask(idx2(i)+1:end, i) = 1; end end From 651850914ce364d1ffbdf30b6ff43b224017b6ee Mon Sep 17 00:00:00 2001 From: ellenrw Date: Mon, 27 Feb 2017 14:31:29 +0800 Subject: [PATCH 10/17] Make Mask based SCM with window fast --- graph/DNN_Cost10.m | 2 +- graph/F_SpatialCov.m | 4 +- graph/F_SpatialCovMask.m | 84 ++++++++++++++++---------------- signal/gmm/ComputeCovMask.m | 19 ++++++-- signal/gmm/ComputeWinCovMask.m | 88 ++++++++++++++++++++++------------ 5 files changed, 116 insertions(+), 81 deletions(-) diff --git a/graph/DNN_Cost10.m b/graph/DNN_Cost10.m index b18566d..9e8d3c3 100644 --- a/graph/DNN_Cost10.m +++ b/graph/DNN_Cost10.m @@ -143,7 +143,7 @@ case 'spatialcov' [layer{i}.a, layer{i}.validFrameMask] = F_SpatialCov(prev_layers{1}, layer{i}); % do not support variable length yet case 'spatialcovmask' - layer{i}.a = F_SpatialCovMask(prev_layers, layer{i}); % do not support variable length yet + [layer{i}.a, layer{i}.validFrameMask] = F_SpatialCovMask(prev_layers, layer{i}); % do not support variable length yet case 'spatialcovsplitmask' layer{i}.a = F_SpatialCovSplitMask(prev_layers, layer{i}); % do not support variable length yet case 'mvdr_spatialcov' diff --git a/graph/F_SpatialCov.m b/graph/F_SpatialCov.m index 623e6f1..f7187ec 100644 --- a/graph/F_SpatialCov.m +++ b/graph/F_SpatialCov.m @@ -24,7 +24,7 @@ else nf = fix((T-windowSize+windowShift)/windowShift); end -mask = zeros(nf, N, 'like', real(input2)); +mask = zeros(nf, N, 'like', real(input2(1))); if N==1 % R = ComplexSpectrum2SpatialCov(input2, windowSize, windowShift); @@ -59,7 +59,7 @@ % output = permute(reshape(output, nCh^2*nBin, N, size(output, 2)), [1 3 2]); prev_mask = input_layer.validFrameMask; - output = zeros(nCh^2*nBin, nf, N, 'like', XX2); + output = zeros(nCh^2*nBin, nf, N, 'like', XX2(1)); for i=1:N idx = find(prev_mask(:,i) == 0, 1, 'last'); idx2 = fix((idx-windowSize+windowShift)/windowShift); diff --git a/graph/F_SpatialCovMask.m b/graph/F_SpatialCovMask.m index 84e6d91..4db29ad 100644 --- a/graph/F_SpatialCovMask.m +++ b/graph/F_SpatialCovMask.m @@ -1,14 +1,22 @@ -% Estimate spatial covariance matrix for sentences using a mask. The mask +% Estimate spatial covariance matrix for sentences using a speechMask. The speechMask % specifies speech presense probability at all time frequency locations, % with a 1 means speech present and 0 means speech absent. % -function output = F_SpatialCovMask(prev_layers, curr_layer) -mask = prev_layers{1}.a; +function [output, uttMask] = F_SpatialCovMask(prev_layers, curr_layer) +speechMask = prev_layers{1}.a; data = prev_layers{2}.a; +prev_mask =prev_layers{2}.validFrameMask; -if isfield(curr_layer, 'windowSize') - windowSize = curr_layer.windowSize; - shift = fix(windowSize/2); +[D,T,N] = size(speechMask); +[D2,T,N] = size(data); +nCh = D2/D; +data = reshape(data, D, nCh, T, N); +data = permute(data, [2 3 1 4]); +% data = abs(data); + +if isfield(curr_layer, 'winSize') && T > curr_layer.winSize + windowSize = curr_layer.winSize; + windowShift = curr_layer.winShift; else windowSize = 0; end @@ -19,16 +27,12 @@ speechOnly = false; end -[D,T,N] = size(mask); -[D2,T,N] = size(data); -nCh = D2/D; -data = reshape(data, D, nCh, T, N); -data = permute(data, [2 3 1 4]); -% data = abs(data); - -if T <= windowSize - windowSize = 0; -end +% if windowSize == 0 +% nf = 1; +% else +% nf = fix((T-windowSize+windowShift)/windowShift); +% end +% uttMask = zeros(nf, N, 'like', real(data(1))); if windowSize == 0 % utterance mode, estimate two spatial covariance matrixes for each utterance, one is speech and the other is noise. if 0 % for loop version @@ -38,54 +42,50 @@ else scm_speech = zeros(nCh, nCh, D, N); scm_noise = zeros(nCh, nCh, D, N); - end + end for d=1:D for n=1:N for t=1:T - scm_speech(:,:,d,n) = scm_speech(:,:,d,n) + mask(d,t,n) * data(:,t,d) * data(:,t,d)'; - scm_noise(:,:,d,n) = scm_noise(:,:,d,n) + (1-mask(d,t,n)) * data(:,t,d) * data(:,t,d)'; + scm_speech(:,:,d,n) = scm_speech(:,:,d,n) + speechMask(d,t,n) * data(:,t,d) * data(:,t,d)'; + scm_noise(:,:,d,n) = scm_noise(:,:,d,n) + (1-speechMask(d,t,n)) * data(:,t,d) * data(:,t,d)'; end - scm_speech(:,:,d,n) = scm_speech(:,:,d,n) / sum(mask(d,:,n)); - scm_noise(:,:,d,n) = scm_noise(:,:,d,n) / (T-sum(mask(d,:,n))); + scm_speech(:,:,d,n) = scm_speech(:,:,d,n) / sum(speechMask(d,:,n)); + scm_noise(:,:,d,n) = scm_noise(:,:,d,n) / (T-sum(speechMask(d,:,n))); end end - else % vectorized + else +% vectorized: version 1 % data_cell = num2cell(data, [1]); -% mask_cell = num2cell(permute(mask, [3 2 1]), [1]); +% mask_cell = num2cell(permute(speechMask, [3 2 1]), [1]); % scm_speech_cell = cellfun(@(x,y) (reshape(x*y*y',nCh^2,1)), mask_cell, data_cell, 'UniformOutput', 0); % scm_noise_cell = cellfun(@(x,y) (reshape((1-x)*y*y',nCh^2,1)), mask_cell, data_cell, 'UniformOutput', 0); % scm_speech = reshape(sum(cell2mat(scm_speech_cell),2),nCh,nCh,D); -% scm_speech = bsxfun(@times, scm_speech, permute(1./sum(mask,2), [3 2 1])); +% scm_speech = bsxfun(@times, scm_speech, permute(1./sum(speechMask,2), [3 2 1])); % scm_noise = reshape(sum(cell2mat(scm_noise_cell),2),nCh,nCh,D); -% scm_noise = bsxfun(@times, scm_noise, permute(1./sum(1-mask,2), [3 2 1])); - - mask2 = permute(mask, [4 2 1 3]); +% scm_noise = bsxfun(@times, scm_noise, permute(1./sum(1-speechMask,2), [3 2 1])); +% + % version 2 + mask2 = permute(speechMask, [4 2 1 3]); scm_speech = ComputeCovMask(data, mask2); - if ~speechOnly + if speechOnly + output = scm_speech; + else scm_noise = ComputeCovMask(data, 1-mask2); + output = [scm_speech; scm_noise]; end end - - scm_speech2 = reshape(scm_speech, nCh^2*D, 1, N); - if speechOnly - output = scm_speech2; - else - scm_noise2 = reshape(scm_noise, nCh^2*D, 1, N); - output = [scm_speech2; scm_noise2]; - end -else % online mode, estiamte covariance matrices for a sliding window of frames. + uttMask = zeros(1, N, 'like', real(data(1))); +else % online mode, estiamte covariance matrices for a sliding window of frames. % to be implemented. % frame number after moving window - nf = fix((T-windowSize+shift)/shift); - mask2 = permute(mask, [4 2 1 3]); - scm_speech = ComputeWinCovMask(data, mask2, windowSize, shift); + mask2 = permute(speechMask, [4 2 1 3]); + [scm_speech, uttMask] = ComputeWinCovMask(data, mask2, prev_mask, windowSize, windowShift); if speechOnly output = scm_speech; else - scm_noise = ComputeWinCovMask(data, 1-mask2, windowSize, shift); + scm_noise = ComputeWinCovMask(data, 1-mask2, prev_mask, windowSize, windowShift); output = [scm_speech; scm_noise]; end end - end diff --git a/signal/gmm/ComputeCovMask.m b/signal/gmm/ComputeCovMask.m index 4d3c09f..1be3d07 100644 --- a/signal/gmm/ComputeCovMask.m +++ b/signal/gmm/ComputeCovMask.m @@ -5,12 +5,21 @@ % feature vector to the covariance matrix % function covMat = ComputeCovMask(data, mask) - +[nCh, ~, nBin, N] = size(data); weight = sqrt(bsxfun(@times, mask, 1./sum(mask))); data_scaled = bsxfun(@times, data, weight); -data_cell = num2cell(data_scaled, [1 2]); % convert to cell array and call cellfun for speed -tmp = cellfun(@(x) gather(x*x'), data_cell, 'UniformOutput', 0); -covMat = cell2mat(tmp); -% covMat = cell2mat_gpu(tmp); + +% % version 1 +% data_cell = num2cell(data_scaled, [1 2]); % convert to cell array and call cellfun for speed +% tmp = cellfun(@(x) gather(x*x'), data_cell, 'UniformOutput', 0); +% covMat = cell2mat(tmp); +% % covMat = cell2mat_gpu(tmp); +% covMat = reshape(covMat, nCh^2*nBin, 1, N); + +% version 2 + +covMat1 = outProdND(data_scaled); +covMat2 = squeeze(mean(covMat1, 3)); +covMat = reshape(covMat2, nCh^2*nBin, 1, N); end diff --git a/signal/gmm/ComputeWinCovMask.m b/signal/gmm/ComputeWinCovMask.m index d119160..d4382a5 100644 --- a/signal/gmm/ComputeWinCovMask.m +++ b/signal/gmm/ComputeWinCovMask.m @@ -1,39 +1,65 @@ -function winCovMat = ComputeWinCovMask(data, mask, winsize, shift) -[nch, nf_stft, nbin, N] = size(data); +function [winCovMat, winMask] = ComputeWinCovMask(data, mask, prev_mask, windowSize, windowShift) +[nCh, nf_stft, nBin, N] = size(data); weight = sqrt(bsxfun(@times, mask, 1./sum(mask))); data_scaled = bsxfun(@times, data, weight); covMat = outProdND(data_scaled); + +nf = fix((nf_stft-windowSize+windowShift)/windowShift); +winMask = zeros(nf, N, 'like', real(covMat(1))); + if N == 1 - covMat1 = reshape(permute(covMat, [1 2 4 3]), nch^2*nbin, nf_stft); + covMat1 = reshape(permute(covMat, [1 2 4 3]), nCh^2*nBin, nf_stft); + % % Version 1: fast, but consume memory when windowSize is large +% nf = fix((nf_stft-windowSize+windowShift)/windowShift); +% covMat2 = ExpandContext_v2(covMat1, 0:windowSize-1); +% nf_idx = 1:windowShift:nf_stft-windowSize+1; +% covMat3 = covMat2(:, nf_idx, :); +% covMat3 = reshape(covMat3, nCh^2*nBin, windowSize, nf, N); +% winCovMat = squeeze(mean(covMat3, 2)); +% + % Version 2: less fast than version 1 + SCM1 = conv2(covMat1, ones(1,windowSize, class(gather(covMat)))/windowSize, 'valid'); + winCovMat = SCM1(:, 1:windowShift:end); +% +% % Version 3: slowest in repmat and not support multiple sentences +% if IsInGPU(data) +% winCovMat11 = gpuArray.zeros(nf, nCh*nCh*nBin*windowSize); +% else +% winCovMat11 = zeros(nf, nCh*nCh*nBin*windowSize); +% end +% covMat11 = reshape(permute(covMat, [1 2 4 3]), 1, nCh*nCh*nBin*nf_stft); +% indf = nCh*nCh*nBin*windowShift*(0:(nf-1)).'; +% inds = (1:nCh*nCh*nBin*windowSize); +% % winCovMat(:) = covMat(indf(:,ones(1,nCh*nCh*nBin*windowSize))+inds(ones(nf,1),:)); % slow +% winCovMat11(:) = covMat11(repmat(indf,1,nCh*nCh*nBin*windowSize)+repmat(inds,nf,1)); +% winCovMat11 = permute(reshape(winCovMat11, nf, nCh*nCh*nBin, windowSize), [2 3 1]); +% winCovMat = squeeze(mean(winCovMat11, 2)); else - covMat1 = reshape(permute(covMat, [1 2 4 3 5]), nch^2*nbin, nf_stft, N); +% % version 1 +% covMat1 = reshape(permute(covMat, [1 2 4 3 5]), nCh^2*nBin, nf_stft, N); +% winCovMat = zeros(nCh^2*nBin, nf, N, 'like', covMat1(1)); +% for i=1:N +% idx = find(prev_mask(:,i) == 0, 1, 'last'); +% idx2 = fix((idx-windowSize+windowShift)/windowShift); +% covMat2 = squeeze(covMat1(:,1:idx,i)); +% SCM = conv2(covMat2, ones(1,windowSize, 'like', covMat1(1))/windowSize, 'valid'); +% winCovMat(:, 1:idx2, i) = SCM(:, 1:windowShift:end); +% winMask(idx2+1:end, i) = 1; +% end + + % Version 2, much fast + covMat2 = reshape(permute(covMat, [1 2 4 5 3]), nCh^2*nBin*N, nf_stft); + idx = arrayfun(@(x) find(gather(prev_mask(:,x)) == 0, 1, 'last'), 1:size(prev_mask,2)); + idx2 = arrayfun(@(x) fix((idx(x)-windowSize+windowShift)/windowShift), 1:length(idx)); + covMat3 = conv2(covMat2, ones(1,windowSize, 'like', covMat2(1))/windowSize, 'valid'); + winCovMat1 = covMat3(:, 1:windowShift:end); + winCovMat2 = permute(reshape(winCovMat1, nCh^2*nBin, N, size(winCovMat1, 2)), [1 3 2]); + winCovMat = zeros(nCh^2*nBin, nf, N, 'like', winCovMat2(1)); + for i = 1:N + winCovMat(:, 1:idx2(i), i) = winCovMat2(:, 1:idx2(i), i); + winMask(idx2(i)+1:end, i) = 1; + end + end - -% % Version 1: fast, but consume memory when winsize is large -% nf = fix((nf_stft-winsize+shift)/shift); -% covMat2 = ExpandContext_v2(covMat1, 0:winsize-1); -% nf_idx = 1:shift:nf_stft-winsize+1; -% covMat3 = covMat2(:, nf_idx, :); -% covMat3 = reshape(covMat3, nch^2*nbin, winsize, nf, N); -% winCovMat = squeeze(mean(covMat3, 2)); - -% Version 2: less fast than version 1 -SCM1 = conv2(covMat1, ones(1,winsize, class(gather(covMat)))/winsize, 'valid'); -winCovMat = SCM1(:, 1:shift:end); - -% % Version 3: slowest in repmat and not support multiple sentences -% if IsInGPU(data) -% winCovMat11 = gpuArray.zeros(nf, nch*nch*nbin*winsize); -% else -% winCovMat11 = zeros(nf, nch*nch*nbin*winsize); -% end -% covMat11 = reshape(permute(covMat, [1 2 4 3]), 1, nch*nch*nbin*nf_stft); -% indf = nch*nch*nbin*shift*(0:(nf-1)).'; -% inds = (1:nch*nch*nbin*winsize); -% % winCovMat(:) = covMat(indf(:,ones(1,nch*nch*nbin*winsize))+inds(ones(nf,1),:)); % slow -% winCovMat11(:) = covMat11(repmat(indf,1,nch*nch*nbin*winsize)+repmat(inds,nf,1)); -% winCovMat11 = permute(reshape(winCovMat11, nf, nch*nch*nbin, winsize), [2 3 1]); -% winCovMat = squeeze(mean(winCovMat11, 2)); - end \ No newline at end of file From 2831f5aae5a2a9c0675d24327d261a859cb8d925 Mon Sep 17 00:00:00 2001 From: ellenrw Date: Tue, 28 Feb 2017 19:31:16 +0800 Subject: [PATCH 11/17] Make F_ExtSpatialCovFeat compatiable --- graph/F_ExtSpatialCovFeat.m | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/graph/F_ExtSpatialCovFeat.m b/graph/F_ExtSpatialCovFeat.m index 037b9c9..3f282ae 100644 --- a/graph/F_ExtSpatialCovFeat.m +++ b/graph/F_ExtSpatialCovFeat.m @@ -5,10 +5,23 @@ nCh = curr_layer.nCh; nBin = curr_layer.nBin; [~, nf, N] = size(covMat); -scm_select = curr_layer.scm_select; -scm_select_diag = curr_layer.scm_select_diag; -scm_select_bin = curr_layer.scm_select_bin; -scm_bin_shift = curr_layer.scm_bin_shift; + +if isfield(curr_layer, 'scm_select') + scm_select = curr_layer.scm_select; +else + scm_select = 'uptriangle'; +end +if isfield(curr_layer, 'scm_select_diag') + scm_select_diag = curr_layer.scm_select_diag; +else + scm_select_diag = 1; +end +if isfield(curr_layer, 'scm_select_bin') + scm_select_bin = curr_layer.scm_select_bin; + scm_bin_shift = curr_layer.scm_bin_shift; +else + scm_select_bin = 0; +end if N == 1 From eacb694c372efa52906ddefd414930c00a727c9d Mon Sep 17 00:00:00 2001 From: xuchenglin28 Date: Sun, 5 Mar 2017 12:48:52 +0800 Subject: [PATCH 12/17] Implement PCA to reduce dimesions of SCM --- graph/F_ConcatRealImag.m | 7 ++++++ graph/F_SpatialNorm.m | 38 ++++++++++++++++++++++++++++ prototypes/computePCA.m | 45 ++++++++++++++++++++++++++++++++++ signal/gmm/ComputeWinCovMask.m | 1 + 4 files changed, 91 insertions(+) create mode 100644 graph/F_ConcatRealImag.m create mode 100644 graph/F_SpatialNorm.m create mode 100644 prototypes/computePCA.m diff --git a/graph/F_ConcatRealImag.m b/graph/F_ConcatRealImag.m new file mode 100644 index 0000000..045f504 --- /dev/null +++ b/graph/F_ConcatRealImag.m @@ -0,0 +1,7 @@ +function output = F_ConcatRealImag(prev_layer) + +covMat = prev_layer.a; + +output = [real(covMat); imag(covMat)]; + +end diff --git a/graph/F_SpatialNorm.m b/graph/F_SpatialNorm.m new file mode 100644 index 0000000..ac5c0ed --- /dev/null +++ b/graph/F_SpatialNorm.m @@ -0,0 +1,38 @@ +function normCovMat = F_SpatialNorm(prev_layer, curr_layer) + +covMat = prev_layer.a; +prev_mask = prev_layer.validFrameMask; +nCh = curr_layer.nCh; +nBin = curr_layer.nBin; +[~, nf, N] = size(covMat); + +if N == 1 + + % normalize the cov matrix by their diagonal elements, remove the effect of + % spectral power and only retains the phase information + dimSelectMask1 = bsxfun(@times, eye(nCh, nCh), ones(nCh, nCh, nBin)); + dimSelectIdx1 = find(reshape(dimSelectMask1, numel(dimSelectMask1),1) == 1); % diag elements index + diag_mean = squeeze(mean(reshape(covMat(dimSelectIdx1,:), nCh, nBin, nf), 1)); + if nf ==1 + diag_mean = diag_mean.'; + end + normCovMat = bsxfun(@times, permute(reshape(covMat, nCh, nCh, nBin, nf), [3 4 1 2]), 1./diag_mean); + normCovMat = reshape(permute(normCovMat, [3 4 1 2]), nCh^2*nBin, nf); + +else + % normalize the cov matrix by their diagonal elements, remove the effect of + % spectral power and only retains the phase information + dimSelectMask1 = bsxfun(@times, eye(nCh, nCh), ones(nCh, nCh, nBin)); + dimSelectIdx1 = find(reshape(dimSelectMask1, numel(dimSelectMask1),1) == 1); % diag elements index + diag_mean = squeeze(mean(reshape(covMat(dimSelectIdx1,:,:), nCh, nBin, nf, N), 1)); + if nf ==1 + diag_mean = reshape(diag_mean, size(diag_mean,1), 1, size(diag_mean, 2)); + end + % minibatch padding makes some frames zero, mean of that still be zero, can not be divided. + diag_mean1 = permute(bsxfun(@plus, permute(diag_mean, [2 3 1]), -1e10.*prev_mask), [3 1 2]); + normCovMat = bsxfun(@times, permute(reshape(covMat, nCh, nCh, nBin, nf, N), [3 4 5 1 2]), 1./diag_mean1); + normCovMat = reshape(permute(normCovMat, [4 5 1 2 3]), nCh^2*nBin, nf, N); + +end + +end diff --git a/prototypes/computePCA.m b/prototypes/computePCA.m new file mode 100644 index 0000000..a4b961f --- /dev/null +++ b/prototypes/computePCA.m @@ -0,0 +1,45 @@ + +function [W, b] = computePCA(Visible, nUttUsed, para, layer) +if exist('nUttUsed')==0 || length(nUttUsed)==0 + nUttUsed = 500; +end +nUtt = length(Visible(1).data); +if nUtt>nUttUsed + step = ceil(nUtt/nUttUsed); + for i=1:length(Visible) + Visible(i).data = Visible(i).data(1:step:end); + end +end + +para.out_layer_idx = length(layer); +para.output = 'dummy'; +para = ParseOptions2(para); +output = FeatureTree2(Visible, para, layer); + +if para.NET.variableLengthMinibatch + for i=1:length(output) + featTmp = gather(output{i}{1}); + [featTmp2, mask, variableLength] = ExtractVariableLengthTrajectory(featTmp); + feat{i} = cell2mat(featTmp2); + end +else + for i=1:length(output) + feat{i} = gather(output{i}{1}); + [D,T,N] = size(feat{i}); + if N>1 + feat{i} = reshape(feat{i},D,T*N); + end + end +end +feat = cell2mat(feat); + +% [coeff, scores, latent] = princomp(feat','econ'); +% tmp=cumsum(latent)./sum(latent); +coeff = princomp(feat','econ'); +% coeff = princomp(feat'); +W = coeff(:,1:para.topology.pcaDim)'; +b = -W*mean(feat,2); +% plot(b) +% std(b) + +end diff --git a/signal/gmm/ComputeWinCovMask.m b/signal/gmm/ComputeWinCovMask.m index d4382a5..e8a8320 100644 --- a/signal/gmm/ComputeWinCovMask.m +++ b/signal/gmm/ComputeWinCovMask.m @@ -10,6 +10,7 @@ if N == 1 covMat1 = reshape(permute(covMat, [1 2 4 3]), nCh^2*nBin, nf_stft); +% covMat1 = repmat(mean(covMat1,2), 1, size(covMat1, 2)); % % Version 1: fast, but consume memory when windowSize is large % nf = fix((nf_stft-windowSize+windowShift)/windowShift); % covMat2 = ExpandContext_v2(covMat1, 0:windowSize-1); From 418d60278c895ed080dd990ec1543ec728caa61c Mon Sep 17 00:00:00 2001 From: xuchenglin28 Date: Sun, 5 Mar 2017 13:06:37 +0800 Subject: [PATCH 13/17] Add some layer in DNN_Cost10 --- graph/DNN_Cost10.m | 4 ++++ graph/F_SpatialCov.m | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/graph/DNN_Cost10.m b/graph/DNN_Cost10.m index 9e8d3c3..fc9c717 100644 --- a/graph/DNN_Cost10.m +++ b/graph/DNN_Cost10.m @@ -150,6 +150,10 @@ layer{i} = F_MVDR_spatialCov(prev_layers{1}, layer{i}); % do not support variable length yet case 'extspatialcovfeat' layer{i}.a = F_ExtSpatialCovFeat(prev_layers{1}, layer{i}); % extract up triangle real and imag parts, diagonal part from spatial cov + case 'spatialnorm' + layer{i}.a = F_SpatialNorm(prev_layers{1}, layer{i}); + case 'concatrealimag' + layer{i}.a = F_ConcatRealImag(prev_layers{1}); case 'cov' layer{i}.a = F_cov(prev_layers{1}.a); % do not support variable length yet case 'logdet' diff --git a/graph/F_SpatialCov.m b/graph/F_SpatialCov.m index 2875560..62d253f 100644 --- a/graph/F_SpatialCov.m +++ b/graph/F_SpatialCov.m @@ -40,7 +40,8 @@ output = squeeze(mean(XX2, 2)); else % idx = [ones(1,half_ctx) 1:T ones(1,half_ctx)*T]; - SCM = conv2(XX2, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid'); +% SCM = conv2(XX2, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid'); + SCM = conv2(XX2, ones(1,windowSize, class(gather(input2))), 'valid'); output = SCM(:, 1:windowShift:end); end @@ -75,7 +76,8 @@ idx = arrayfun(@(x) find(gather(prev_mask(:,x)) == 0, 1, 'last'), 1:size(prev_mask,2)); idx2 = arrayfun(@(x) fix((idx(x)-windowSize+windowShift)/windowShift), 1:length(idx)); XX31 = reshape(permute(XX2, [1 3 2]), nCh^2*nBin*N, T); - SCM1 = conv2(XX31, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid'); +% SCM1 = conv2(XX31, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid'); + SCM1 = conv2(XX31, ones(1,windowSize, class(gather(input2))), 'valid'); output1 = SCM1(:, 1:windowShift:end); output2 = permute(reshape(output1, nCh^2*nBin, N, size(output1, 2)), [1 3 2]); output = zeros(nCh^2*nBin, nf, N, 'like', XX2); From 15d558b5f1ca3338e5e2623ace44dc3c0bdd0e01 Mon Sep 17 00:00:00 2001 From: ellenrw Date: Sun, 5 Mar 2017 13:07:57 +0800 Subject: [PATCH 14/17] do nothing --- graph/F_SpatialCov.m | 3 ++- signal/gmm/ComputeWinCovMask.m | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/graph/F_SpatialCov.m b/graph/F_SpatialCov.m index 2875560..71e4da9 100644 --- a/graph/F_SpatialCov.m +++ b/graph/F_SpatialCov.m @@ -75,7 +75,8 @@ idx = arrayfun(@(x) find(gather(prev_mask(:,x)) == 0, 1, 'last'), 1:size(prev_mask,2)); idx2 = arrayfun(@(x) fix((idx(x)-windowSize+windowShift)/windowShift), 1:length(idx)); XX31 = reshape(permute(XX2, [1 3 2]), nCh^2*nBin*N, T); - SCM1 = conv2(XX31, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid'); +% SCM1 = conv2(XX31, ones(1,windowSize, class(gather(input2)))/windowSize, 'valid'); + SCM1 = conv2(XX31, ones(1,windowSize, class(gather(input2))), 'valid'); output1 = SCM1(:, 1:windowShift:end); output2 = permute(reshape(output1, nCh^2*nBin, N, size(output1, 2)), [1 3 2]); output = zeros(nCh^2*nBin, nf, N, 'like', XX2); diff --git a/signal/gmm/ComputeWinCovMask.m b/signal/gmm/ComputeWinCovMask.m index d4382a5..a1848f0 100644 --- a/signal/gmm/ComputeWinCovMask.m +++ b/signal/gmm/ComputeWinCovMask.m @@ -19,7 +19,8 @@ % winCovMat = squeeze(mean(covMat3, 2)); % % Version 2: less fast than version 1 - SCM1 = conv2(covMat1, ones(1,windowSize, class(gather(covMat)))/windowSize, 'valid'); +% SCM1 = conv2(covMat1, ones(1,windowSize, class(gather(covMat)))/windowSize, 'valid'); + SCM1 = conv2(covMat1, ones(1,windowSize, class(gather(covMat))), 'valid'); winCovMat = SCM1(:, 1:windowShift:end); % % % Version 3: slowest in repmat and not support multiple sentences @@ -52,7 +53,8 @@ covMat2 = reshape(permute(covMat, [1 2 4 5 3]), nCh^2*nBin*N, nf_stft); idx = arrayfun(@(x) find(gather(prev_mask(:,x)) == 0, 1, 'last'), 1:size(prev_mask,2)); idx2 = arrayfun(@(x) fix((idx(x)-windowSize+windowShift)/windowShift), 1:length(idx)); - covMat3 = conv2(covMat2, ones(1,windowSize, 'like', covMat2(1))/windowSize, 'valid'); +% covMat3 = conv2(covMat2, ones(1,windowSize, 'like', covMat2(1))/windowSize, 'valid'); + covMat3 = conv2(covMat2, ones(1,windowSize, 'like', covMat2(1)), 'valid'); winCovMat1 = covMat3(:, 1:windowShift:end); winCovMat2 = permute(reshape(winCovMat1, nCh^2*nBin, N, size(winCovMat1, 2)), [1 3 2]); winCovMat = zeros(nCh^2*nBin, nf, N, 'like', winCovMat2(1)); From f593c442f05d41a7064ee6e5d06f99c08527bba1 Mon Sep 17 00:00:00 2001 From: xuchenglin28 Date: Mon, 6 Mar 2017 11:56:43 +0800 Subject: [PATCH 15/17] save pca coeff in computePCA --- prototypes/computePCA.m | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/prototypes/computePCA.m b/prototypes/computePCA.m index a4b961f..a8fbb26 100644 --- a/prototypes/computePCA.m +++ b/prototypes/computePCA.m @@ -35,11 +35,23 @@ % [coeff, scores, latent] = princomp(feat','econ'); % tmp=cumsum(latent)./sum(latent); -coeff = princomp(feat','econ'); -% coeff = princomp(feat'); +% coeff = princomp(feat','econ'); +% W = coeff(:,1:para.topology.pcaDim)'; +% b = -W*mean(feat,2); +% +% [coeff1, scores1, latent1] = pca(feat'); +% cov1 = cov(feat'); +% [V1,D1] = eig(cov1); +% D2 = diag(D1); + +fprintf('Load %d utts feats, begin pca ...', nUttUsed); +[coeff, ~, latent] = pca(feat'); +tmp=cumsum(latent)./sum(latent); +idx = find(tmp == 0.95); +fprintf('End of PCA, select %d can cover 99.9 percent', idx); W = coeff(:,1:para.topology.pcaDim)'; b = -W*mean(feat,2); -% plot(b) -% std(b) + +save(['PCA_U' num2str(nUttUsed) '_W_B.mat'], 'W', 'b'); end From 546c38993a0145e527395547c9cebced589c2638 Mon Sep 17 00:00:00 2001 From: ellenrw Date: Mon, 6 Mar 2017 13:53:39 +0800 Subject: [PATCH 16/17] add backpropagation of concatRealImag --- graph/B_ConcatRealImag.m | 13 +++++++++++++ graph/DNN_Cost10.m | 2 ++ prototypes/computePCA.m | 5 +++-- 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 graph/B_ConcatRealImag.m diff --git a/graph/B_ConcatRealImag.m b/graph/B_ConcatRealImag.m new file mode 100644 index 0000000..ecdd06f --- /dev/null +++ b/graph/B_ConcatRealImag.m @@ -0,0 +1,13 @@ +function grad = B_ConcatRealImag(prev_layer, curr_layer, future_layers) + +future_grad = GetFutureGrad(future_layers, curr_layer); + +[D, T, N] = size(future_grad); +j = sqrt(-1); + +realpart = future_grad(1:D/2,:,:); +imagpart = future_grad(D/2+1:end,:,:); + +grad = realpart + j*imagpart; + +end \ No newline at end of file diff --git a/graph/DNN_Cost10.m b/graph/DNN_Cost10.m index fc9c717..fa64e63 100644 --- a/graph/DNN_Cost10.m +++ b/graph/DNN_Cost10.m @@ -459,6 +459,8 @@ layer{i}.grad = B_inner_product_normalized(prev_layers, future_layers); case 'concatenate' layer{i}.grad = B_concatenate(prev_layers, layer{i}, future_layers); + case 'concatrealimag' + layer{i}.grad = B_ConcatRealImag(prev_layers, layer{i}, future_layers); otherwise fprintf('Error: unknown output node type %s!\n', layer{i}.name); diff --git a/prototypes/computePCA.m b/prototypes/computePCA.m index a8fbb26..e16c9d5 100644 --- a/prototypes/computePCA.m +++ b/prototypes/computePCA.m @@ -49,9 +49,10 @@ tmp=cumsum(latent)./sum(latent); idx = find(tmp == 0.95); fprintf('End of PCA, select %d can cover 99.9 percent', idx); -W = coeff(:,1:para.topology.pcaDim)'; +% W = coeff(:,1:para.topology.pcaDim)'; +W = coeff(:,1:1000)'; b = -W*mean(feat,2); -save(['PCA_U' num2str(nUttUsed) '_W_B.mat'], 'W', 'b'); +save(['PCA_U' num2str(nUttUsed) '_W_B.mat'], 'W', 'b', 'latent'); end From 20ad720145444477d983ee839b422958979c9e6b Mon Sep 17 00:00:00 2001 From: xuchenglin28 Date: Mon, 6 Mar 2017 14:52:16 +0800 Subject: [PATCH 17/17] Modify DNN_Cost and DNN_update to support complex valued grads. Problems occurs in grad_clip and weight clip --- graph/DNN_Cost10.m | 2 +- graph/DNN_update.m | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/graph/DNN_Cost10.m b/graph/DNN_Cost10.m index fa64e63..019f9e7 100644 --- a/graph/DNN_Cost10.m +++ b/graph/DNN_Cost10.m @@ -253,7 +253,7 @@ if isfield(layer{i}, 'mask') % the mask defines what values can be tuned and what cannot be tuned. tmp = tmp .* layer{i}.mask; end - cost_func.cost = cost_func.cost + 0.5* L2weight * sum(sum(tmp.*tmp)); + cost_func.cost = cost_func.cost + 0.5* L2weight * sum(sum(real(tmp.*conj(tmp)))); end end end diff --git a/graph/DNN_update.m b/graph/DNN_update.m index 01d0685..84f1512 100644 --- a/graph/DNN_update.m +++ b/graph/DNN_update.m @@ -30,7 +30,7 @@ end end - if para.NET.gradientClipThreshold > 0 + if para.NET.gradientClipThreshold > 0 && isreal(grad_W) grad_W = max(-para.NET.gradientClipThreshold, grad_W); grad_W = min(para.NET.gradientClipThreshold, grad_W); end @@ -65,7 +65,7 @@ layer{Lidx(1)}.W = layer{Lidx(1)}.W - update{i}.W; end - if para.NET.weight_clip + if para.NET.weight_clip && isreal(layer{Lidx(1)}.W) % sometimes the weight will explode, so we need to add a limit to the value of the weights, e.g. +-10 layer{Lidx(1)}.W = max(-para.NET.weight_clip,layer{Lidx(1)}.W); layer{Lidx(1)}.W = min(para.NET.weight_clip,layer{Lidx(1)}.W);