From cc7dc913db46b3d44a25dd7c58c8113c31129ee5 Mon Sep 17 00:00:00 2001 From: Splat Date: Wed, 11 Feb 2026 13:30:09 -0500 Subject: [PATCH] Thread strategyName from the typogenerator library through to results.json and the frontend UI, replacing the client-side heuristic that attempted to infer variant class from Levenshtein edit distance. Added a candidate struct to carry the strategy name through the worker pipeline and a Strategy field on the Output struct. On the frontend, removed classifyVariant(), levenshtein(), and a duplicate shannonEntropy() definition. The variant filter dropdown is now dynamically populated from actual strategy names in the data. Also fixed a runtime bug where parseList() was called but never defined (replaced with splitAny). --- lib/typo/typo.go | 2 -- main.go | 18 +++++++++---- site/home.html | 7 ----- site/js/state.js | 38 ++++++++++----------------- site/js/utilities.js | 61 -------------------------------------------- 5 files changed, 27 insertions(+), 99 deletions(-) diff --git a/lib/typo/typo.go b/lib/typo/typo.go index 1a17fdc..e4762eb 100644 --- a/lib/typo/typo.go +++ b/lib/typo/typo.go @@ -40,8 +40,6 @@ func Generate(domain string, cfg []strategy.Strategy, logger slog.Logger) ([]typ } } - // TODO: Issue #15 here the strategy name is preserved. - // On the return pass that through to the HTTPResult object for results storing results, err := typogenerator.Fuzz(sld, cfg...) if err != nil { return results, err diff --git a/main.go b/main.go index 92aeb62..de1751f 100644 --- a/main.go +++ b/main.go @@ -16,9 +16,16 @@ import ( "time" ) +// candidate pairs a permutation with the strategy that generated it. +type candidate struct { + Permutation string + StrategyName string +} + // Output is the shape of what is returned to the results.json and thus site type Output struct { Domain string `json:"domain"` + Strategy string `json:"strategy"` Resolvable bool `json:"resolvable"` HasMail bool `json:"has_mail"` DNS verify.DNSResult `json:"dns"` @@ -90,7 +97,7 @@ func main() { ctx := context.Background() - in := make(chan string) + in := make(chan candidate) out := make(chan Output) var wg sync.WaitGroup @@ -98,19 +105,20 @@ func main() { wg.Add(1) go func() { defer wg.Done() - for d := range in { + for c := range in { for _, tld := range tldsOverride { - v, err := verify.VerifyDomain(ctx, d+"."+tld, vCfg) + v, err := verify.VerifyDomain(ctx, c.Permutation+"."+tld, vCfg) if err != nil { continue } - // Simple triage: only emit domains that show signs of being “real” + // Simple triage: only emit domains that show signs of being "real" if !v.Resolvable && !v.HasMail { continue } out <- Output{ Domain: v.ASCII, + Strategy: c.StrategyName, Resolvable: v.Resolvable, HasMail: v.HasMail, DNS: v.DNS, @@ -125,7 +133,7 @@ func main() { go func() { for _, d := range candidates { for _, p := range d.Permutations { - in <- p // the actual typo permutation + in <- candidate{Permutation: p, StrategyName: d.StrategyName} } } close(in) diff --git a/site/home.html b/site/home.html index 7064f1f..d47a431 100644 --- a/site/home.html +++ b/site/home.html @@ -63,13 +63,6 @@

Filters

diff --git a/site/js/state.js b/site/js/state.js index e962b31..840e222 100644 --- a/site/js/state.js +++ b/site/js/state.js @@ -51,17 +51,15 @@ function normalizeRecord(r){ .concat(dns.AAAA||[]) .filter(Boolean); - const v = classifyVariant(CFG.baseDomain, d); // TODO: this doens't seem to do anything + const cand = parseDomainParts(d); const scored = scoreRecord(r, CFG); return { _raw:r, domain:d, resolvable: !!r.resolvable, - variantClass: v.variantClass, - tld: v.tld || "", - editDistance: v.editDistance, - tldOnly: v.tldOnly, + variantClass: r.strategy || "unknown", + tld: cand.tld || "", score: scored.score, tags: scored.tags, ips: ips.join(" "), @@ -126,7 +124,7 @@ function sortView(){ VIEW.sort((a,b)=>{ let av = a[k], bv = b[k]; - if(k==="score" || k==="httpStatusCode" || k==="editDistance"){ + if(k==="score" || k==="httpStatusCode"){ av = Number(av||0); bv = Number(bv||0); return (av-bv)*mul; } @@ -158,7 +156,7 @@ function render(){ tr.appendChild(dom); const vc = document.createElement("td"); - vc.innerHTML = `${r.variantClass}${r.editDistance!==null?`d=${r.editDistance}`:""}`; + vc.innerHTML = `${r.variantClass}`; tr.appendChild(vc); const tld = document.createElement("td"); @@ -209,6 +207,13 @@ function render(){ tb.appendChild(tr); } + // update variant dropdown + const variants = Array.from(new Set(RAW.map(r=>r.variantClass).filter(Boolean))).sort(); + const vSel = $("variantFilter"); + const vCurrent = vSel.value; + vSel.innerHTML = '' + variants.map(v=>``).join(""); + vSel.value = variants.includes(vCurrent) ? vCurrent : ""; + // update TLD dropdown const tlds = Array.from(new Set(RAW.map(r=>r.tld).filter(Boolean))).sort(); const sel = $("tldFilter"); @@ -371,7 +376,7 @@ function fingerprintIndicators(r){ } // Sinkhole IP indicator - const sinkholes = parseList($("sinkholeIps").value); + const sinkholes = splitAny($("sinkholeIps").value); if(sinkholes.length && r.ips){ const hit = sinkholes.find(ip => (" "+r.ips+" ").includes(ip)); if(hit) out.push(`sinkhole-hit Matches sinkhole IP: ${escapeHtml(hit)}.`); @@ -380,7 +385,7 @@ function fingerprintIndicators(r){ // TLS issuer familiarity / entropy heuristic (only if issuer present) const issuer = safe(r.tlsIssuer); if(issuer){ - const known = parseList($("knownIssuers").value).some(k => issuer.toLowerCase().includes(k.toLowerCase())); + const known = splitAny($("knownIssuers").value).some(k => issuer.toLowerCase().includes(k.toLowerCase())); if(!known){ out.push(`unfamiliar-issuer TLS issuer not in known list.`); } @@ -398,21 +403,6 @@ function fingerprintIndicators(r){ return out; } -// Shannon entropy of a string (for heuristic use only). -function shannonEntropy(str){ - const s = (str||""); - if(!s) return 0; - const freq = new Map(); - for(const ch of s){ - freq.set(ch, (freq.get(ch)||0)+1); - } - let ent = 0; - for(const [_, count] of freq){ - const p = count / s.length; - ent -= p * Math.log2(p); - } - return ent; -} function renderGroups(){ const byVariant = {}; const byTld = {}; diff --git a/site/js/utilities.js b/site/js/utilities.js index 0914a88..e0b69d8 100644 --- a/site/js/utilities.js +++ b/site/js/utilities.js @@ -39,67 +39,6 @@ function registrableHint(domain){ return parts.slice(-2).join("."); } -function levenshtein(a,b){ - a = a||""; b = b||""; - const n=a.length, m=b.length; - const dp = Array.from({length:n+1}, ()=>Array(m+1).fill(0)); - for(let i=0;i<=n;i++) dp[i][0]=i; - for(let j=0;j<=m;j++) dp[0][j]=j; - for(let i=1;i<=n;i++){ - for(let j=1;j<=m;j++){ - const cost = a[i-1]===b[j-1]?0:1; - dp[i][j] = Math.min( - dp[i-1][j]+1, - dp[i][j-1]+1, - dp[i-1][j-1]+cost - ); - // transposition (Damerau-lite) - if(i>1 && j>1 && a[i-1]===b[j-2] && a[i-2]===b[j-1]){ - dp[i][j] = Math.min(dp[i][j], dp[i-2][j-2]+1); - } - } - } - return dp[n][m]; -} - -function classifyVariant(baseDomain, candidateDomain){ - const base = parseDomainParts(baseDomain||""); - const cand = parseDomainParts(candidateDomain||""); - if(!baseDomain) return {variantClass:"unknown", editDistance:null, tld:cand.tld, tldOnly:false}; - - // Compare SLDs only; treat TLD-only separately - const baseSLD = base.sld; - const candSLD = cand.sld; - const tldOnly = (baseSLD === candSLD) && (base.tld !== cand.tld); - const dist = levenshtein(baseSLD, candSLD); - - if(tldOnly) return {variantClass:"tld", editDistance:0, tld:cand.tld, tldOnly:true}; - - // quick classifiers for single-edit categories - if(dist === 1){ - if(baseSLD.length + 1 === candSLD.length) return {variantClass:"insert", editDistance:1, tld:cand.tld, tldOnly:false}; - if(baseSLD.length - 1 === candSLD.length) return {variantClass:"delete", editDistance:1, tld:cand.tld, tldOnly:false}; - if(baseSLD.length === candSLD.length) return {variantClass:"substitute", editDistance:1, tld:cand.tld, tldOnly:false}; - } - - // transpose check: distance 1 with same length often captures adjacent transpositions already, but be explicit - if(baseSLD.length === candSLD.length){ - let diffs = []; - for(let i=0;i2) break; - } - if(diffs.length===2){ - const [i,j]=diffs; - if(j===i+1 && baseSLD[i]===candSLD[j] && baseSLD[j]===candSLD[i]){ - return {variantClass:"transpose", editDistance:dist, tld:cand.tld, tldOnly:false}; - } - } - } - - return {variantClass: dist<=2 ? "other" : "other", editDistance:dist, tld:cand.tld, tldOnly:false}; -} - function scoreRecord(r, cfg){ const sinkholes = cfg.sinkholes; const indicators = cfg.indicators;