From e30fbafaa631fd21e05f797904bdcf65f082e6b3 Mon Sep 17 00:00:00 2001 From: Alberto Carlevaro <99175531+AlbiCarle@users.noreply.github.com> Date: Tue, 8 Feb 2022 15:12:50 +0100 Subject: [PATCH] Add files via upload --- KernelMatrix.m | 17 ++++++ MixGauss.m | 29 +++++++++ OptimiseParam_NSVDD.m | 69 +++++++++++++++++++++ RadiusReductionSVDD.m | 46 ++++++++++++++ SVDD_N1C_TEST.m | 17 ++++++ SVDD_N1C_TRAINING.m | 87 ++++++++++++++++++++++++++ SquareDist.m | 9 +++ TestObject_N.m | 36 +++++++++++ ZeroFPR_SVDD.m | 95 +++++++++++++++++++++++++++++ Zero_FPR_exe.m | 139 ++++++++++++++++++++++++++++++++++++++++++ calcErr.m | 4 ++ plotSVDD.m | 71 +++++++++++++++++++++ 12 files changed, 619 insertions(+) create mode 100644 KernelMatrix.m create mode 100644 MixGauss.m create mode 100644 OptimiseParam_NSVDD.m create mode 100644 RadiusReductionSVDD.m create mode 100644 SVDD_N1C_TEST.m create mode 100644 SVDD_N1C_TRAINING.m create mode 100644 SquareDist.m create mode 100644 TestObject_N.m create mode 100644 ZeroFPR_SVDD.m create mode 100644 Zero_FPR_exe.m create mode 100644 calcErr.m create mode 100644 plotSVDD.m diff --git a/KernelMatrix.m b/KernelMatrix.m new file mode 100644 index 0000000..4dc59da --- /dev/null +++ b/KernelMatrix.m @@ -0,0 +1,17 @@ +function K = KernelMatrix(X1, X2, kernel, param) + +% Usage: K = KernelMatrix(X1, X2, kernel, param) +% X1 and X2 are the two collections of points on which to compute the Gram matrix + +%kernel = 'linear', 'polynoial', 'gaussian' + if numel(kernel) == 0 + kernel = 'linear'; + end + if isequal(kernel, 'linear') + K = X1*X2'; + elseif isequal(kernel, 'polynomial') + K = (1 + X1*X2').^param; + elseif isequal(kernel, 'gaussian') + K = exp(-1/(2*param^2)*SquareDist(X1,X2)); + end +end diff --git a/MixGauss.m b/MixGauss.m new file mode 100644 index 0000000..d892084 --- /dev/null +++ b/MixGauss.m @@ -0,0 +1,29 @@ +function [X, Y] = MixGauss(means, sigmas, n) + +% function [X, Y] = MixGauss(means, sigmas, n) + +% EXAMPLE: [X, Y] = MixGauss([[0;0],[1;1]],[0.5,0.25],1000); +% generates a 2D dataset with two classes, the first one centered on (0,0) +% with standard deviation 0.5, the second one centered on (1,1) with standard deviation 0.25. +% each class will contain 1000 points + +d = size(means,1); +p = size(means,2); + +X = []; +Y = []; +for i = 1:p + m = means(:,i); + S = sigmas(i); + Xi = zeros(n,d); + Yi = zeros(n,1); + for j = 1:n + x = S*randn(d,1) + m; + Xi(j,:) = x; + Yi(j) = i-1; + end + X = [X; Xi]; + Y = [Y; Yi]; +end + + diff --git a/OptimiseParam_NSVDD.m b/OptimiseParam_NSVDD.m new file mode 100644 index 0000000..1d9c98a --- /dev/null +++ b/OptimiseParam_NSVDD.m @@ -0,0 +1,69 @@ +function [s, Vm, Vs, Tm, Ts] = OptimiseParam_NSVDD(X, Y, kernel, perc, nrip, intKerPar, C1, C2) + +%Usage + +%[s, Vm, Vs, Tm, Ts] = OptimiseParam_NSVDD(X, Y, kernel, perc, nrip, intKerPar, C1, C2) +% X: training set +% Y: labels of training set +% kernel: 'linear, 'gaussian', 'polynomial' +% perc: percentage of the dataset to be used for validation +% nrip: number of repetitions of the test for the parameter +% intKerPar: list of kernel parameters +% C1, C2: SVDD weights +% +% Output: +% s: kernel parameter that minimize the median of the +% validation error +% Vm, Vs: median and variance of the validation error for the parameter +% Tm, Ts: median and variance of the error computed on the training set for the parameter + + + nKerPar = numel(intKerPar); + + n = size(X,1); + ntr = ceil(n*(1-perc)); + + tmn = zeros(nKerPar, nrip); + vmn = zeros(nKerPar, nrip); + + for rip = 1:nrip + I = randperm(n); + Xtr = X(I(1:ntr),:); + Ytr = Y(I(1:ntr),:); + Xvl = X(I(ntr+1:end),:); + Yvl = Y(I(ntr+1:end),:); + + + is = 0; + for param=intKerPar + is = is + 1; + + [alpha, Rsquared,~,~,~] = ... + SVDD_N1C_TRAINING(Xtr, Ytr, kernel, param, C1, C2, 'off'); + + tmn(is, rip) = ... + calcErr(SVDD_N1C_TEST(Xtr, Ytr, alpha, Xtr, kernel, param, Rsquared), Ytr); + vmn(is, rip) = ... + calcErr(SVDD_N1C_TEST(Xtr, Ytr, alpha, Xvl, kernel, param, Rsquared), Yvl); + + end + + disp(['Opt_iter ', num2str(rip)]); + + end + + Tm = median(tmn,2); + Ts = std(tmn,0,2); + Vm = median(vmn,2); + Vs = std(vmn,0,2); + + [row, col] = find(Vm <= min(min(Vm))); + + s = intKerPar(row(1)); + +end + +function err = calcErr(T, Y) + n=size(T,1); + err=sum(T~=Y)/n; +end diff --git a/RadiusReductionSVDD.m b/RadiusReductionSVDD.m new file mode 100644 index 0000000..740e5c0 --- /dev/null +++ b/RadiusReductionSVDD.m @@ -0,0 +1,46 @@ +function R_star = RadiusReductionSVDD(Xtr, Ytr, alpha, Xts, kernel, param, Rsquared, treshold) + +% RadiusReductionSVDD + +% Usage: R_star = RadiusReductionSVDD(Xtr, Ytr, alpha, Xts, kernel, param, Rsquared, treshold) + +% Xtr: training set +% Ytr: labels of training set +% alpha: lagrange multipliers of SVDD +% Xts: test set +% param: kernel parameter +% Rsquared: squared radius of the SVDD +% kernel: 'linear, 'gaussian', 'polynomial' +% treshold: percentage of FP to be achieved + +maxiter = 1000; + +i = 1; + +Rsq = Rsquared; + +while(i ',num2str(i)]) + +end \ No newline at end of file diff --git a/SVDD_N1C_TEST.m b/SVDD_N1C_TEST.m new file mode 100644 index 0000000..98d520a --- /dev/null +++ b/SVDD_N1C_TEST.m @@ -0,0 +1,17 @@ +function y = SVDD_N1C_TEST(Xtr, Ytr, alpha, Xts, kernel, param, Rsquared) + +% SVDD_N1C_TEST +% Usage: y = SVDD_N1C_TEST(Xtr, Ytr, alpha, Xts, kernel, param, Rsquared) + +% Xtr: training set +% Ytr: labels of training set +% Xts: test set +% Kernel: 'linear, 'gaussian', 'polynomial' +% param: kernel parameter +% Rsquared: radius of the SVDD + + Tts = TestObject_N(Xtr, Ytr, alpha, Xts, kernel, param); + y = sign(Rsquared-Tts); + +end + \ No newline at end of file diff --git a/SVDD_N1C_TRAINING.m b/SVDD_N1C_TRAINING.m new file mode 100644 index 0000000..6780b3d --- /dev/null +++ b/SVDD_N1C_TRAINING.m @@ -0,0 +1,87 @@ +function [alpha, Rsquared,a,SV,YSV] = SVDD_N1C_TRAINING(Xtr, Ytr, kernel, param, C1, C2, qdprg_opts) + +% SVDD_N1C_TRAINING +% Usage: [alpha, Rsquared,a,SV,YSV] = SVDD_N1C_TRAINING(Xtr, Ytr, kernel, param, C1, C2, qdprg_opts) + +% Xtr: training set +% Ytr: labels of training set +% Kernel: 'linear, 'gaussian', 'polynomial' +% param: kernel parameter +% C1, C2: SVDD weights +% qdprg_opts: display optimization informations + +if(size(Ytr(Ytr==-1),1)==0) + + disp('Error: there must be a target class and a negative class') + +else + n=size(Xtr,1); + + if (isequal(kernel,'linear') || isequal(kernel,'polynomial')) + + Ztr = Xtr+10; + Ztr = normalize(Ztr, 2,'norm',2); + + else + + Ztr = Xtr; + + end + + K = KernelMatrix(Ztr, Ztr, kernel, param); + + % Computing alpha, maximizing L=sum_i alpha_i*(x_i*x_i)-sum_l alpha_l*(x_l*x_l) + % -sum_(i,j) alpha_i*alpha_j*(x_i*x_j) + % +2sum_(l,j)alpha_l*alpha_j*(x_l*x_j) + % -sum_(l,m) alpha_l*alpha_m*(x_l*x_m), + % by using quadprog with L=1/2x'Hx+f'x + + H=Ytr*Ytr'.*K; + H=H+H'; + f=Ytr.*diag(K); + + lb = zeros(n,1); + ub = ones(n,1); + ub(Ytr==-1,1)=C2; + ub(Ytr==+1,1)=C1; + Aeq = ones(1,n); + Aeq(1,Ytr==-1)=-1; + Aeq(1,Ytr==+1)=+1; + beq = 1; + + if isequal(qdprg_opts,'on') + options = optimset('Display', 'on'); + elseif isequal(qdprg_opts,'off') + options = optimset('Display', 'off'); + end + + alpha = quadprog(H,f,[],[],Aeq,beq,lb,ub,[],options); + + % Center + + a = alpha'*(Ytr.*Xtr); + + % Support Vectors + + inc=1E-5; + + idxSV = find(all(abs(alpha)>inc & abs(alpha)inc & abs(alpha)0) + + rand=randperm(size(SV,1),1); + + x_s = SV(rand,:); + + Rsquared = TestObject_N(Xtr, Ytr, alpha, x_s, kernel, param); + else + Rsquared=0; + end +end + + + + diff --git a/SquareDist.m b/SquareDist.m new file mode 100644 index 0000000..5e21e82 --- /dev/null +++ b/SquareDist.m @@ -0,0 +1,9 @@ +function D = SquareDist(X1, X2) + n = size(X1,1); + m = size(X2,1); + + sq1 = sum(X1.*X1,2); + sq2 = sum(X2.*X2,2); + + D = sq1*ones(1,m) + ones(n,1)*sq2' - 2*(X1*X2'); +end diff --git a/TestObject_N.m b/TestObject_N.m new file mode 100644 index 0000000..c4b29d5 --- /dev/null +++ b/TestObject_N.m @@ -0,0 +1,36 @@ +function T=TestObject_N(Xtr, Ytr, alpha, Z, kernel, param) + +% TestObject_N +% Usage: T=TestObject_N(Xtr, Ytr, alpha, Z, kernel, param) + +% Xtr: training set +% Ytr: labels of training set +% alpha: lagrange multipliers of SVDD +% Z: test object +% Kernel: 'linear, 'gaussian', 'polynomial' +% param: kernel parameter + +alf_i=alpha(Ytr==+1,1); +alf_l=alpha(Ytr==-1,1); + +flag_i=find(all(Ytr==+1,2)); +flag_l=find(all(Ytr==-1,2)); + +X_i=Xtr(flag_i,:); +X_l=Xtr(flag_l,:); + + K_i=KernelMatrix(X_i, X_i, kernel, param); + K_l=KernelMatrix(X_l, X_l, kernel, param); + + Zker=KernelMatrix(Z, Z, kernel, param); + Kz=diag(Zker); + + KZX_i=KernelMatrix(Z,X_i,kernel,param); + KZX_l=KernelMatrix(Z,X_l,kernel,param); + + KX_lX_i=KernelMatrix(X_l, X_i, kernel, param); + + T=Kz-2*(KZX_i*alf_i-KZX_l*alf_l)+ ... + +alf_i'*K_i*alf_i-2*alf_l'*KX_lX_i*alf_i+alf_l'*K_l*alf_l; + +end \ No newline at end of file diff --git a/ZeroFPR_SVDD.m b/ZeroFPR_SVDD.m new file mode 100644 index 0000000..bf6c2fe --- /dev/null +++ b/ZeroFPR_SVDD.m @@ -0,0 +1,95 @@ +function [X_star, Y_star, alpha_star, Rsquared_star, ... + a_star, SV_star, YSV_star, param_star] = ... + ZeroFPR_SVDD(X, Y, alpha, Rsquared, kernel, param, C1, C2, treshold, param_opt) + +% ZeroFPR_SVDD + +% Usage:[X_star, Y_star, alpha_star, Rsquared_star, ... +% a_star, SV_star, YSV_star, param_star] = ... +% ZeroFPR_SVDD(X, Y, alpha, Rsquared, kernel, param, C1, C2, treshold, param_opt) + +% X: training set +% Y: labels of training set +% alpha: lagrange multipliers of SVDD +% Rsquared: squared radius of the SVDD +% kernel: 'linear, 'gaussian', 'polynomial' +% param: kernel parameter +% C1, C2: SVDD weights +% treshold: percentage of FP to be achieved +% param_opt: 'Y' if a parameter optimization is necessary + +maxiter=1000; + +i=0; + +m = size(X,2); + +y_i= SVDD_N1C_TEST(X, Y, alpha, X, kernel, param, Rsquared); + +%FPR_old=0; + +while(i FPR = ', num2str(FPR_i)]) + +end + +X_star = X_i; +Y_star = Y_i; +alpha_star = alpha; + +Rsquared_star = Rsquared_i; + +a_star = a_i; + +SV_star = SV_i; +YSV_star = YSV_i; diff --git a/Zero_FPR_exe.m b/Zero_FPR_exe.m new file mode 100644 index 0000000..1b87791 --- /dev/null +++ b/Zero_FPR_exe.m @@ -0,0 +1,139 @@ +%% Example of ZeroFPR_SVDD +% This script shows how the zeroFPR_SVDD algorithm works. +% Given a training set X labelled in a target class (+1) +% and in a negative class (-1), the algorithm performs a +% cleaning of the SVDD classification until a threshold on +% the percentage of False Positives (FP) is reached. + +clc; clear all; close all; %#ok + +% Create the dataset + +N1 = 1000; % number of target points +N2 = 100; % number of negative points + +X1 = MixGauss([1;1],[1,1],N1); % target class +X2 = MixGauss([1;1],[2,2],N2); % negative class + +Xtr = [X1; X2]; + +Y1 = ones(N1,1); +Y2 = -ones(N2,1); + +Ytr = [Y1; Y2]; + +ir = randperm(size(Xtr, 1)); +Xtr = Xtr(ir',:); +Ytr = Ytr(ir'); + +%%%%%%%%%%%%%% XXXX %%%%%%%%%%%%%%%%%%%%%% + +figure(1) + +gscatter(Xtr(:,1), Xtr(:,2), Ytr, 'br'); % display the data + +%%%%%%%%%%%%%% XXXX %%%%%%%%%%%%%%%%%%%%%% + +% Classification via SVDD + +N1 = nnz(Ytr(:,1)==+1); +N2 = nnz(Ytr(:,1)==-1); + +C1 = 0.5; +C2 = 1; % if Kernel is linear, choose C2 = 1/N2; +kernel='gaussian'; + +param=1; + +[alpha, Rsquared,a,SV,YSV]= ... + SVDD_N1C_TRAINING(Xtr, Ytr, kernel, C1, C2, param,'on'); + +%%%%%%%%%%%%%% XXXX %%%%%%%%%%%%%%%%%%%%%% + +% Display the classification + +plotSVDD(Xtr, Ytr, Xtr, Ytr, SV, YSV, kernel, param, alpha, Rsquared, a, 2); + +%%%%%%%%%%%%%% XXXX %%%%%%%%%%%%%%%%%%%%%% + +% Display the metrics + +y = SVDD_N1C_TEST(Xtr, Ytr, alpha, Xtr, kernel, param, Rsquared); + +P = nnz(Ytr(:,1)==+1); +N = nnz(Ytr(:,1)==-1); + +Y = [y Ytr]; + +TN = sum(Y(:,1)==-1 & Y(:,2)==-1); +FN = sum(Y(:,1)==-1 & Y(:,2)==+1); +TP = sum(Y(:,1)==+1 & Y(:,2)==+1); +FP = sum(Y(:,1)==+1 & Y(:,2)==-1); + +FNR_start = FN/P; +FPR_start = FP/N; + +ACC_start = (TP+TN)/(P+N); + +F1_start = 2*TP/(2*TP+FP+FN); + +PPV_start = TP/(TP+FP); +NPV_start = TN/(TN+FN); + +TotalN_start = TP+FP; + +%figure(3) + +%cm = confusionchart(Ytr, y); + +%%%%%%%%%%%%%% XXXX %%%%%%%%%%%%%%%%%%%%%% + +% zeroFPR_SVDD algorithm + +treshold = 0.3; % treshold + +[X_star, Y_star, alpha_star, Rsquared_star, ... + a_star, SV_star, YSV_star, param_star] = ... + ZeroFPR_SVDD(Xtr, Ytr, alpha, Rsquared, kernel, param, C1, C2, treshold, 'Y'); + +%%%%%%%%%%%%%% XXXX %%%%%%%%%%%%%%%%%%%%%% + +% Display the classification after the algorithm cleaning + +plotSVDD(X_star, Y_star, Xtr, Ytr, SV_star, YSV_star, kernel, param_star, alpha_star, Rsquared_star,a_star,4); + +%%%%%%%%%%%%%% XXXX %%%%%%%%%%%%%%%%%%%%%% + +% Display the metrics + +y = SVDD_N1C_TEST(X_star, Y_star, alpha_star, Xtr, kernel, param_star, Rsquared_star); + +P = nnz(Ytr(:,1)==+1); +N = nnz(Ytr(:,1)==-1); + +Y = [y Ytr]; + +TN = sum(Y(:,1)==-1 & Y(:,2)==-1); +FN = sum(Y(:,1)==-1 & Y(:,2)==+1); +TP = sum(Y(:,1)==+1 & Y(:,2)==+1); +FP = sum(Y(:,1)==+1 & Y(:,2)==-1); + +FNR_start = FN/P; +FPR_start = FP/N; + +ACC_start = (TP+TN)/(P+N); + +F1_start = 2*TP/(2*TP+FP+FN); + +PPV_start = TP/(TP+FP); +NPV_start = TN/(TN+FN); + +TotalN_start = TP+FP; + +%figure(5) + +%cm = confusionchart(Ytr, y); + + + + diff --git a/calcErr.m b/calcErr.m new file mode 100644 index 0000000..f5fb6dd --- /dev/null +++ b/calcErr.m @@ -0,0 +1,4 @@ +function err = calcErr(T, Y) + n=size(T,1); + err=sum(T~=Y)/n; +end \ No newline at end of file diff --git a/plotSVDD.m b/plotSVDD.m new file mode 100644 index 0000000..dc925ba --- /dev/null +++ b/plotSVDD.m @@ -0,0 +1,71 @@ +function [] = plotSVDD(Xtr, Ytr, Xts, Yts, SV, YSV, kernel, param, alpha, Rsquared, a, ind) + +% plotSVDD +% Usage: plotSVDD(Xtr, Ytr, Xts, Yts, SV, YSV, kernel, param, alpha, Rsquared, a, ind) + +% Xtr: training set +% Ytr: labels of training set +% Xts: test set +% Yts: labesl of test set +% SV: Support Vectors of the training set +% YSV: labels of SVs +% Kernel: 'linear, 'gaussian', 'polynomial' +% param: kernel parameter +% alpha: lagrange multipliers of SVDD +% Rsquared: radius of the SVDD +% a: center of the SVDD +% ind: number of the figure, figure(ind) + +if isequal(kernel, 'linear') + + R=sqrt(Rsquared); + + th = 0:pi/50:2*pi; + xunit = R*cos(th) + a(1,1); + yunit = R*sin(th) + a(1,2); + plot(xunit, yunit, 'linewidth',1,'Color', 'g'); + axis equal; + + hold on + +else + +dimGrid=90; % dimGrid*dimGrid + +[K1, Z1] = meshgrid(linspace(min(Xtr(:,1))-1, max(Xtr(:,1))+1,dimGrid),... + linspace(min(Xtr(:,2))-1, max(Xtr(:,2))+1,dimGrid)); + +x=linspace(min(Xtr(:,1))-1, max(Xtr(:,1))+1, dimGrid); +y=linspace(min(Xtr(:,2))-1, max(Xtr(:,2))+1, dimGrid); + +K1=K1(:); Z1=Z1(:); +E=[K1 Z1]; + +target = SVDD_N1C_TEST(Xtr, Ytr, alpha, E, kernel, param, Rsquared); + +target(target~=1)=2; +target(target==1)=-1; + +figure(ind) +axis equal +contour(x, y, reshape(target,numel(y),numel(x)),[0.9999 0.9999] , ... + 'linecolor', 'g', 'LineWidth', 1); + +end + +hold on +g = gscatter(Xts(:,1), Xts(:,2), Yts,'br','.',[8 8]); +hold on +g1 = gscatter(SV(:,1), SV(:,2), YSV,'br','*',[5 5]); +if(YSV==ones(size(SV,1),1)) +hold on +gscatter(SV(:,1), SV(:,2), YSV,'r','*',[5 5]); +end +xlabel('$x_1$', 'Interpreter', 'Latex') +ylabel('$x_2$', 'Interpreter', 'Latex') +hold off + +%for i = 1:numel(g) +% g(i).DisplayName = strcat(g(i).DisplayName); +%end +legend('off') \ No newline at end of file