From 3cdf05a1e0e90db98c3bea16dd007ef66fda03a5 Mon Sep 17 00:00:00 2001 From: Dhruv Gupta Date: Tue, 6 Jan 2026 01:27:18 +0530 Subject: [PATCH 1/2] Added AhoCorasick Algorithm --- .../algorithms/strings/AhoCorasick.java | 133 ++++++++++++++++++ .../algorithms/strings/AhoCorasickTest.java | 72 ++++++++++ 2 files changed, 205 insertions(+) create mode 100644 src/main/java/com/williamfiset/algorithms/strings/AhoCorasick.java create mode 100644 src/test/java/com/williamfiset/algorithms/strings/AhoCorasickTest.java diff --git a/src/main/java/com/williamfiset/algorithms/strings/AhoCorasick.java b/src/main/java/com/williamfiset/algorithms/strings/AhoCorasick.java new file mode 100644 index 000000000..32e04452c --- /dev/null +++ b/src/main/java/com/williamfiset/algorithms/strings/AhoCorasick.java @@ -0,0 +1,133 @@ +package com.williamfiset.algorithms.strings; + +import java.util.*; + +/** + * Aho–Corasick algorithm + *

+ * Multi-pattern string matching in O(n + m + z): + * n = text length, m = total pattern length, z = number of matches. + *

+ * Typical use cases: spam filters, IDS, virus scanning, keyword detection. + */ +public class AhoCorasick { + + private final Node root = new Node(); + private boolean built = false; + + // Example usage + public static void main(String[] args) { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("he"); + ac.addPattern("she"); + ac.addPattern("his"); + ac.addPattern("hers"); + + ac.build(); + + String text = "ushers"; + List matches = ac.search(text); + + for (Match m : matches) { + System.out.println( + "Match \"" + text.substring(m.index, m.index + m.length) + + "\" at index " + m.index); + } + } + + /** + * Insert a pattern (call build() once all patterns are added). + */ + public void addPattern(String word) { + built = false; + Node node = root; + for (char c : word.toCharArray()) { + Node next = node.children.get(c); + if (next == null) { + next = new Node(); + node.children.put(c, next); + } + node = next; + } + node.output.add(word); + } + + /** + * Build failure links (BFS). Must be called before search(). + */ + public void build() { + Queue q = new ArrayDeque(); + + // Level 1 → fail points to root + for (Node child : root.children.values()) { + child.fail = root; + q.add(child); + } + + while (!q.isEmpty()) { + Node node = q.remove(); + + for (Map.Entry entry : node.children.entrySet()) { + char c = entry.getKey(); + Node nxt = entry.getValue(); + q.add(nxt); + + Node f = node.fail; + while (f != null && !f.children.containsKey(c)) { + f = f.fail; + } + + nxt.fail = (f == null) ? root : f.children.get(c); + nxt.output.addAll(nxt.fail.output); // inherit matches + } + } + + built = true; + } + + /** + * Search text and return a list of matches (index, length). + */ + public List search(String text) { + if (!built) + throw new IllegalStateException("Call build() before search()."); + + List results = new ArrayList(); + Node node = root; + + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); + + while (node != null && !node.children.containsKey(c)) { + node = node.fail; + } + + if (node == null) node = root; + else node = node.children.get(c); + + for (String w : node.output) { + results.add(new Match(i - w.length() + 1, w.length())); + } + } + + return results; + } + + // Match result + public static class Match { + public final int index; + public final int length; + + Match(int index, int length) { + this.index = index; + this.length = length; + } + } + + // Trie node + private static class Node { + Map children = new HashMap<>(); + Node fail; + List output = new ArrayList<>(); + } +} diff --git a/src/test/java/com/williamfiset/algorithms/strings/AhoCorasickTest.java b/src/test/java/com/williamfiset/algorithms/strings/AhoCorasickTest.java new file mode 100644 index 000000000..18b6e1c73 --- /dev/null +++ b/src/test/java/com/williamfiset/algorithms/strings/AhoCorasickTest.java @@ -0,0 +1,72 @@ +package com.williamfiset.algorithms.strings; + + +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + + +public class AhoCorasickTest { + + @Test + public void basicMatching() { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("he"); + ac.addPattern("she"); + ac.addPattern("his"); + ac.addPattern("hers"); + ac.build(); + + List matches = ac.search("ushers"); + + // Expected: "she", "he", "hers" + assertEquals(3, matches.size()); + + assertEquals(1, matches.get(0).index); + assertEquals(3, matches.get(0).length); + + assertEquals(2, matches.get(1).index); + assertEquals(2, matches.get(1).length); + + assertEquals(2, matches.get(2).index); + assertEquals(4, matches.get(2).length); + } + + @Test + public void overlappingPatterns() { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("aba"); + ac.addPattern("ba"); + ac.build(); + + List matches = ac.search("ababa"); + + assertEquals(4, matches.size()); + + assertEquals(0, matches.get(0).index); // "aba" + assertEquals(1, matches.get(1).index); // "ba" + assertEquals(2, matches.get(2).index); // "aba" + assertEquals(3, matches.get(3).index); // "ba" + } + + @Test + public void noMatches() { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("cat"); + ac.addPattern("dog"); + ac.build(); + + List matches = ac.search("aaaaa"); + assertTrue(matches.isEmpty()); + } + + @Test + public void searchWithoutBuildThrows() { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("abc"); + + assertThrows(IllegalStateException.class, () -> ac.search("abc")); + } +} From 996f97dc0d6fe1e359e13c2ef409a1fcfd05b722 Mon Sep 17 00:00:00 2001 From: Dhruv Gupta Date: Tue, 6 Jan 2026 14:29:59 +0530 Subject: [PATCH 2/2] Fixed spotlesscheck --- .../algorithms/strings/AhoCorasick.java | 186 +++++++++--------- .../algorithms/strings/AhoCorasickTest.java | 125 ++++++------ 2 files changed, 150 insertions(+), 161 deletions(-) diff --git a/src/main/java/com/williamfiset/algorithms/strings/AhoCorasick.java b/src/main/java/com/williamfiset/algorithms/strings/AhoCorasick.java index 32e04452c..32716d496 100644 --- a/src/main/java/com/williamfiset/algorithms/strings/AhoCorasick.java +++ b/src/main/java/com/williamfiset/algorithms/strings/AhoCorasick.java @@ -4,130 +4,122 @@ /** * Aho–Corasick algorithm - *

- * Multi-pattern string matching in O(n + m + z): - * n = text length, m = total pattern length, z = number of matches. - *

- * Typical use cases: spam filters, IDS, virus scanning, keyword detection. + * + *

Multi-pattern string matching in O(n + m + z): n = text length, m = total pattern length, z = + * number of matches. + * + *

Typical use cases: spam filters, IDS, virus scanning, keyword detection. */ public class AhoCorasick { - private final Node root = new Node(); - private boolean built = false; + private final Node root = new Node(); + private boolean built = false; - // Example usage - public static void main(String[] args) { - AhoCorasick ac = new AhoCorasick(); - ac.addPattern("he"); - ac.addPattern("she"); - ac.addPattern("his"); - ac.addPattern("hers"); + // Example usage + public static void main(String[] args) { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("he"); + ac.addPattern("she"); + ac.addPattern("his"); + ac.addPattern("hers"); - ac.build(); + ac.build(); - String text = "ushers"; - List matches = ac.search(text); + String text = "ushers"; + List matches = ac.search(text); - for (Match m : matches) { - System.out.println( - "Match \"" + text.substring(m.index, m.index + m.length) + - "\" at index " + m.index); - } + for (Match m : matches) { + System.out.println( + "Match \"" + text.substring(m.index, m.index + m.length) + "\" at index " + m.index); } - - /** - * Insert a pattern (call build() once all patterns are added). - */ - public void addPattern(String word) { - built = false; - Node node = root; - for (char c : word.toCharArray()) { - Node next = node.children.get(c); - if (next == null) { - next = new Node(); - node.children.put(c, next); - } - node = next; - } - node.output.add(word); + } + + /** Insert a pattern (call build() once all patterns are added). */ + public void addPattern(String word) { + built = false; + Node node = root; + for (char c : word.toCharArray()) { + Node next = node.children.get(c); + if (next == null) { + next = new Node(); + node.children.put(c, next); + } + node = next; } + node.output.add(word); + } - /** - * Build failure links (BFS). Must be called before search(). - */ - public void build() { - Queue q = new ArrayDeque(); - - // Level 1 → fail points to root - for (Node child : root.children.values()) { - child.fail = root; - q.add(child); - } + /** Build failure links (BFS). Must be called before search(). */ + public void build() { + Queue q = new ArrayDeque(); - while (!q.isEmpty()) { - Node node = q.remove(); + // Level 1 → fail points to root + for (Node child : root.children.values()) { + child.fail = root; + q.add(child); + } - for (Map.Entry entry : node.children.entrySet()) { - char c = entry.getKey(); - Node nxt = entry.getValue(); - q.add(nxt); + while (!q.isEmpty()) { + Node node = q.remove(); - Node f = node.fail; - while (f != null && !f.children.containsKey(c)) { - f = f.fail; - } + for (Map.Entry entry : node.children.entrySet()) { + char c = entry.getKey(); + Node nxt = entry.getValue(); + q.add(nxt); - nxt.fail = (f == null) ? root : f.children.get(c); - nxt.output.addAll(nxt.fail.output); // inherit matches - } + Node f = node.fail; + while (f != null && !f.children.containsKey(c)) { + f = f.fail; } - built = true; + nxt.fail = (f == null) ? root : f.children.get(c); + nxt.output.addAll(nxt.fail.output); // inherit matches + } } - /** - * Search text and return a list of matches (index, length). - */ - public List search(String text) { - if (!built) - throw new IllegalStateException("Call build() before search()."); + built = true; + } - List results = new ArrayList(); - Node node = root; + /** Search text and return a list of matches (index, length). */ + public List search(String text) { + if (!built) throw new IllegalStateException("Call build() before search()."); - for (int i = 0; i < text.length(); i++) { - char c = text.charAt(i); + List results = new ArrayList(); + Node node = root; - while (node != null && !node.children.containsKey(c)) { - node = node.fail; - } + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); - if (node == null) node = root; - else node = node.children.get(c); + while (node != null && !node.children.containsKey(c)) { + node = node.fail; + } - for (String w : node.output) { - results.add(new Match(i - w.length() + 1, w.length())); - } - } + if (node == null) node = root; + else node = node.children.get(c); - return results; + for (String w : node.output) { + results.add(new Match(i - w.length() + 1, w.length())); + } } - // Match result - public static class Match { - public final int index; - public final int length; + return results; + } - Match(int index, int length) { - this.index = index; - this.length = length; - } - } + // Match result + public static class Match { + public final int index; + public final int length; - // Trie node - private static class Node { - Map children = new HashMap<>(); - Node fail; - List output = new ArrayList<>(); + Match(int index, int length) { + this.index = index; + this.length = length; } + } + + // Trie node + private static class Node { + Map children = new HashMap<>(); + Node fail; + List output = new ArrayList<>(); + } } diff --git a/src/test/java/com/williamfiset/algorithms/strings/AhoCorasickTest.java b/src/test/java/com/williamfiset/algorithms/strings/AhoCorasickTest.java index 18b6e1c73..f656cecb5 100644 --- a/src/test/java/com/williamfiset/algorithms/strings/AhoCorasickTest.java +++ b/src/test/java/com/williamfiset/algorithms/strings/AhoCorasickTest.java @@ -1,72 +1,69 @@ package com.williamfiset.algorithms.strings; - -import org.junit.jupiter.api.Test; - -import java.util.List; - import static org.junit.jupiter.api.Assertions.*; +import java.util.List; +import org.junit.jupiter.api.Test; public class AhoCorasickTest { - @Test - public void basicMatching() { - AhoCorasick ac = new AhoCorasick(); - ac.addPattern("he"); - ac.addPattern("she"); - ac.addPattern("his"); - ac.addPattern("hers"); - ac.build(); - - List matches = ac.search("ushers"); - - // Expected: "she", "he", "hers" - assertEquals(3, matches.size()); - - assertEquals(1, matches.get(0).index); - assertEquals(3, matches.get(0).length); - - assertEquals(2, matches.get(1).index); - assertEquals(2, matches.get(1).length); - - assertEquals(2, matches.get(2).index); - assertEquals(4, matches.get(2).length); - } - - @Test - public void overlappingPatterns() { - AhoCorasick ac = new AhoCorasick(); - ac.addPattern("aba"); - ac.addPattern("ba"); - ac.build(); - - List matches = ac.search("ababa"); - - assertEquals(4, matches.size()); - - assertEquals(0, matches.get(0).index); // "aba" - assertEquals(1, matches.get(1).index); // "ba" - assertEquals(2, matches.get(2).index); // "aba" - assertEquals(3, matches.get(3).index); // "ba" - } - - @Test - public void noMatches() { - AhoCorasick ac = new AhoCorasick(); - ac.addPattern("cat"); - ac.addPattern("dog"); - ac.build(); - - List matches = ac.search("aaaaa"); - assertTrue(matches.isEmpty()); - } - - @Test - public void searchWithoutBuildThrows() { - AhoCorasick ac = new AhoCorasick(); - ac.addPattern("abc"); - - assertThrows(IllegalStateException.class, () -> ac.search("abc")); - } + @Test + public void basicMatching() { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("he"); + ac.addPattern("she"); + ac.addPattern("his"); + ac.addPattern("hers"); + ac.build(); + + List matches = ac.search("ushers"); + + // Expected: "she", "he", "hers" + assertEquals(3, matches.size()); + + assertEquals(1, matches.get(0).index); + assertEquals(3, matches.get(0).length); + + assertEquals(2, matches.get(1).index); + assertEquals(2, matches.get(1).length); + + assertEquals(2, matches.get(2).index); + assertEquals(4, matches.get(2).length); + } + + @Test + public void overlappingPatterns() { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("aba"); + ac.addPattern("ba"); + ac.build(); + + List matches = ac.search("ababa"); + + assertEquals(4, matches.size()); + + assertEquals(0, matches.get(0).index); // "aba" + assertEquals(1, matches.get(1).index); // "ba" + assertEquals(2, matches.get(2).index); // "aba" + assertEquals(3, matches.get(3).index); // "ba" + } + + @Test + public void noMatches() { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("cat"); + ac.addPattern("dog"); + ac.build(); + + List matches = ac.search("aaaaa"); + assertTrue(matches.isEmpty()); + } + + @Test + public void searchWithoutBuildThrows() { + AhoCorasick ac = new AhoCorasick(); + ac.addPattern("abc"); + + assertThrows(IllegalStateException.class, () -> ac.search("abc")); + } }