From f5db1999cc9ed122e91aff1674d3db51ab9edc5f Mon Sep 17 00:00:00 2001 From: Fabian Prasser Date: Thu, 2 Jan 2025 23:35:39 +0100 Subject: [PATCH] Add lowest risk --- .../arx/risk/RiskModelSampleSummary.java | 50 +++++++++++++++---- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/src/main/org/deidentifier/arx/risk/RiskModelSampleSummary.java b/src/main/org/deidentifier/arx/risk/RiskModelSampleSummary.java index b5435c213..d11972b83 100644 --- a/src/main/org/deidentifier/arx/risk/RiskModelSampleSummary.java +++ b/src/main/org/deidentifier/arx/risk/RiskModelSampleSummary.java @@ -49,9 +49,10 @@ public static class JournalistRisk extends RiskSummary { * @param rA * @param rB * @param rC + * @param rM */ - protected JournalistRisk(double t, double rA, double rB, double rC) { - super(t, rA, rB, rC); + protected JournalistRisk(double t, double rA, double rB, double rC, double rM) { + super(t, rA, rB, rC, rM); } } @@ -94,9 +95,10 @@ public static class ProsecutorRisk extends RiskSummary { * @param rA * @param rB * @param rC + * @param rM */ - protected ProsecutorRisk(double t,double rA, double rB, double rC) { - super(t, rA, rB, rC); + protected ProsecutorRisk(double t,double rA, double rB, double rC, double rM) { + super(t, rA, rB, rC, rM); } } /** @@ -112,6 +114,8 @@ public static class RiskSummary { private final double rA; /** Maximum probability of re-identification*/ private final double rB; + /** Minimum probability of re-identification*/ + private final double rM; /** Proportion of records that can be re-identified on average*/ private final double rC; @@ -121,12 +125,14 @@ public static class RiskSummary { * @param rA * @param rB * @param rC + * @param rM */ - protected RiskSummary(double t, double rA, double rB, double rC) { + protected RiskSummary(double t, double rA, double rB, double rC, double rM) { this.t = t; this.rA = rA; this.rB = rB; this.rC = rC; + this.rM = rM; } /** @@ -152,6 +158,14 @@ public double getEffectiveRiskThreshold() { public double getHighestRisk() { return Double.isNaN(rB) ? 0d : rB; } + + /** + * Minimum probability of re-identification + * @return + */ + public double getLowestRisk() { + return Double.isNaN(rM) ? 0d : rM; + } /** * Proportion of records with risk above threshold @@ -219,8 +233,8 @@ public RiskModelSampleSummary(DataHandleInternal handle, population = sample; } if (sample.size() == 0) { - this.prosecutorRisk = new ProsecutorRisk(threshold, 0d, 0d, 0d); - this.journalistRisk = new JournalistRisk(threshold, 0d, 0d, 0d); + this.prosecutorRisk = new ProsecutorRisk(threshold, 0d, 0d, 0d, 0d); + this.journalistRisk = new JournalistRisk(threshold, 0d, 0d, 0d, 0d); this.marketerRisk = new MarketerRisk(0d); } else { this.prosecutorRisk = getProsecutorRisk(population, sample, 0.9d, stop, progress); @@ -359,12 +373,14 @@ private JournalistRisk getJournalistRisk(Groupify population, // Init double rA = 0d; double rB = 0d; + double rM = 0d; double rC = 0d; double rC1 = 0d; double rC2 = 0d; double numRecordsInSample = 0d; double numClassesInSample = 0d; double smallestClassSizeInPopulation = Integer.MAX_VALUE; + double largestClassSizeInPopulation = Integer.MIN_VALUE; int maxindex = sample.size(); int index = 0; @@ -393,6 +409,10 @@ private JournalistRisk getJournalistRisk(Groupify population, if (groupSizeInPopulation < smallestClassSizeInPopulation) { smallestClassSizeInPopulation = groupSizeInPopulation; } + // Compute rM + if (groupSizeInPopulation > largestClassSizeInPopulation) { + largestClassSizeInPopulation = groupSizeInPopulation; + } // Compute rC numClassesInSample++; numRecordsInSample += groupSizeInSample; @@ -411,6 +431,9 @@ private JournalistRisk getJournalistRisk(Groupify population, // Compute rB: smallest class is first class in the histogram rB = 1d / smallestClassSizeInPopulation; + + // Compute rM + rM = 1d / largestClassSizeInPopulation; // Compute rC rC1 = numClassesInSample / rC1; @@ -418,7 +441,7 @@ private JournalistRisk getJournalistRisk(Groupify population, rC = Math.max(rC1, rC2); // Return - return new JournalistRisk(threshold, rA, rB, rC); + return new JournalistRisk(threshold, rA, rB, rC, rM); } /** @@ -495,10 +518,12 @@ private ProsecutorRisk getProsecutorRisk(Groupify population, // Init double rA = 0d; double rB = 0d; + double rM = 0d; double rC = 0d; double numRecords = 0d; double numClasses = 0d; double smallestClassSize = Integer.MAX_VALUE; + double largestClassSize = Integer.MIN_VALUE; int maxindex = sample.size(); int index = 0; @@ -521,6 +546,10 @@ private ProsecutorRisk getProsecutorRisk(Groupify population, if (groupSize < smallestClassSize) { smallestClassSize = groupSize; } + // Compute rM + if (groupSize < largestClassSize) { + largestClassSize = groupSize; + } // Compute rC numClasses++; numRecords += groupSize; @@ -538,10 +567,13 @@ private ProsecutorRisk getProsecutorRisk(Groupify population, // Compute rB: smallest class is first class in the histogram rB = 1d / smallestClassSize; + // Compute rB + rM = 1d / largestClassSize; + // Compute rC rC = numClasses / numRecords; // Return - return new ProsecutorRisk(threshold, rA, rB, rC); + return new ProsecutorRisk(threshold, rA, rB, rC, rM); } }