generated from ubiquity/ts-template
-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #149 from sshivaditya2019/devsim
Client Side Searching
- Loading branch information
Showing
9 changed files
with
458 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,9 @@ static/dist | |
|
||
cypress/screenshots | ||
cypress/videos | ||
|
||
# Wrangler | ||
.wrangler | ||
|
||
# vscode | ||
.vscode |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
import { GitHubIssue } from "./github-types"; | ||
import { TaskManager } from "./task-manager"; | ||
import { SearchResult, SearchWeights, SearchConfig } from "./types/search-types"; | ||
import { SearchScorer } from "./search/search-scorer"; | ||
|
||
export class IssueSearch { | ||
private readonly _weights: SearchWeights = { | ||
title: 0.375, | ||
body: 0.25, | ||
fuzzy: 0.25, | ||
meta: 0.125, | ||
}; | ||
|
||
private readonly _config: SearchConfig = { | ||
fuzzySearchThreshold: 0.7, | ||
exactMatchBonus: 1.0, | ||
fuzzyMatchWeight: 0.7, | ||
}; | ||
|
||
private readonly _searchScorer: SearchScorer; | ||
private _searchableIssues: Map<number, string> = new Map(); | ||
|
||
constructor(private _taskManager: TaskManager) { | ||
this._searchScorer = new SearchScorer(this._config); | ||
} | ||
|
||
public async initializeIssues(issues: GitHubIssue[]) { | ||
this._searchableIssues.clear(); | ||
issues.forEach((issue) => { | ||
const searchableContent = this._getSearchableContent(issue); | ||
this._searchableIssues.set(issue.id, searchableContent); | ||
}); | ||
} | ||
|
||
public search(searchText: string): Map<number, SearchResult> { | ||
let filterText = searchText.toLowerCase().trim(); | ||
const results = new Map<number, SearchResult>(); | ||
const isFuzzySearchEnabled = filterText.startsWith("?"); | ||
|
||
if (isFuzzySearchEnabled) { | ||
filterText = filterText.slice(1).trim(); | ||
} | ||
|
||
if (!filterText) { | ||
for (const id of this._searchableIssues.keys()) { | ||
results.set(id, this._createEmptyResult()); | ||
} | ||
return results; | ||
} | ||
|
||
const searchTerms = this._preprocessSearchTerms(filterText); | ||
|
||
for (const issueId of this._searchableIssues.keys()) { | ||
const issue = this._taskManager.getGitHubIssueById(issueId); | ||
if (!issue) { | ||
results.set(issueId, this._createEmptyResult(false)); | ||
continue; | ||
} | ||
|
||
const result = this._calculateIssueRelevance(issue, searchTerms, isFuzzySearchEnabled); | ||
results.set(issueId, result); | ||
} | ||
|
||
this._calculateNDCGScore(results); | ||
return results; | ||
} | ||
|
||
private _calculateIssueRelevance(issue: GitHubIssue, searchTerms: string[], enableFuzzy: boolean): SearchResult { | ||
const matchDetails = { | ||
titleMatches: [] as string[], | ||
bodyMatches: [] as string[], | ||
labelMatches: [] as string[], | ||
numberMatch: false, | ||
fuzzyMatches: [] as Array<{ | ||
original: string; | ||
matched: string; | ||
score: number; | ||
}>, | ||
}; | ||
|
||
const searchableContent = this._searchableIssues.get(issue.id) || this._getSearchableContent(issue); | ||
|
||
// Calculate individual scores | ||
const scores = { | ||
title: this._searchScorer.calculateTitleScore(issue, searchTerms, matchDetails), | ||
body: this._searchScorer.calculateBodyScore(issue, searchTerms, matchDetails), | ||
fuzzy: enableFuzzy ? this._searchScorer.calculateFuzzyScore(searchableContent, searchTerms, matchDetails) : 0, | ||
meta: this._searchScorer.calculateMetaScore(issue, searchTerms, matchDetails), | ||
}; | ||
|
||
// Calculate weighted total score | ||
const totalScore = Object.entries(scores).reduce((total, [key, score]) => { | ||
return total + score * this._weights[key as keyof SearchWeights]; | ||
}, 0); | ||
|
||
const isVisible = totalScore > 0 || matchDetails.numberMatch; | ||
|
||
return { | ||
visible: isVisible, | ||
score: isVisible ? totalScore : 0, | ||
matchDetails, | ||
}; | ||
} | ||
|
||
private _calculateNDCGScore(results: Map<number, SearchResult>): number { | ||
const scores = Array.from(results.values()) | ||
.filter((r) => r.visible) | ||
.map((r) => r.score) | ||
.sort((a, b) => b - a); | ||
|
||
if (scores.length === 0) return 0; | ||
|
||
const dcg = scores.reduce((sum, score, index) => { | ||
return sum + (Math.pow(2, score) - 1) / Math.log2(index + 2); | ||
}, 0); | ||
|
||
const idcg = [...scores] | ||
.sort((a, b) => b - a) | ||
.reduce((sum, score, index) => { | ||
return sum + (Math.pow(2, score) - 1) / Math.log2(index + 2); | ||
}, 0); | ||
|
||
return idcg === 0 ? 0 : dcg / idcg; | ||
} | ||
|
||
private _preprocessSearchTerms(searchText: string): string[] { | ||
return searchText | ||
.split(/\s+/) | ||
.filter(Boolean) | ||
.map((term) => term.toLowerCase()); | ||
} | ||
|
||
private _getSearchableContent(issue: GitHubIssue): string { | ||
// Remove URLs from the content | ||
const removeUrls = (text: string): string => { | ||
return text.replace(/(?:https?:\/\/|http?:\/\/|www\.)[^\s]+/g, ""); | ||
}; | ||
|
||
const title = issue.title; | ||
const body = removeUrls(issue.body || ""); | ||
const labels = issue.labels?.map((l) => (typeof l === "object" && l.name ? l.name : "")).join(" ") || ""; | ||
|
||
return `${title} ${body} ${labels}`.toLowerCase(); | ||
} | ||
|
||
private _createEmptyResult(visible: boolean = true): SearchResult { | ||
return { | ||
visible, | ||
score: visible ? 1 : 0, | ||
matchDetails: { | ||
titleMatches: [], | ||
bodyMatches: [], | ||
labelMatches: [], | ||
numberMatch: false, | ||
fuzzyMatches: [], | ||
}, | ||
}; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
import { GitHubIssue } from "../github-types"; | ||
import { SearchConfig, SearchResult } from "../types/search-types"; | ||
import { StringSimilarity } from "./string-similarity"; | ||
|
||
export class SearchScorer { | ||
constructor(private _config: SearchConfig) {} | ||
|
||
public calculateTitleScore(issue: GitHubIssue, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number { | ||
let score = 0; | ||
const title = issue.title.toLowerCase(); | ||
const words = title.split(/\s+/); | ||
|
||
searchTerms.forEach((term) => { | ||
if (title.includes(term)) { | ||
matchDetails.titleMatches.push(term); | ||
score += this._config.exactMatchBonus; | ||
|
||
// Apply exponential boost for word beginnings | ||
words.forEach((word) => { | ||
if (word.startsWith(term)) { | ||
// e^(-x) where x is the position of the match relative to word length | ||
const positionBoost = Math.exp(-term.length / word.length); | ||
score += positionBoost; | ||
} | ||
}); | ||
} | ||
}); | ||
|
||
if (searchTerms.length > 1 && title.includes(searchTerms.join(" "))) { | ||
score += 1; | ||
} | ||
return Math.min(score, 3); | ||
} | ||
|
||
public calculateBodyScore(issue: GitHubIssue, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number { | ||
let score = 0; | ||
const body = (issue.body || "").toLowerCase(); | ||
const words = body.split(/\s+/); | ||
|
||
searchTerms.forEach((term) => { | ||
let termScore = 0; | ||
words.forEach((word) => { | ||
if (word.startsWith(term)) { | ||
// Apply exponential boost for word beginnings | ||
const positionBoost = Math.exp(-term.length / word.length); | ||
termScore += positionBoost; | ||
} | ||
}); | ||
|
||
if (termScore > 0) { | ||
matchDetails.bodyMatches.push(term); | ||
score += Math.min(termScore, 1); | ||
} | ||
|
||
const codeBlockMatches = body.match(/```[\s\S]*?```/g) || []; | ||
codeBlockMatches.forEach((block) => { | ||
if (block.toLowerCase().includes(term)) { | ||
score += 0.5; | ||
} | ||
}); | ||
}); | ||
return Math.min(score, 2); | ||
} | ||
|
||
public calculateMetaScore(issue: GitHubIssue, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number { | ||
let score = 0; | ||
const numberTerm = searchTerms.find((term) => /^\d+$/.test(term)); | ||
if (numberTerm && issue.number.toString() === numberTerm) { | ||
matchDetails.numberMatch = true; | ||
score += 2; | ||
} | ||
if (issue.labels) { | ||
searchTerms.forEach((term) => { | ||
issue.labels?.forEach((label) => { | ||
if (typeof label === "object" && label.name) { | ||
const labelName = label.name.toLowerCase(); | ||
if (labelName.includes(term)) { | ||
matchDetails.labelMatches.push(label.name); | ||
// Apply exponential boost for label matches at word start | ||
if (labelName.startsWith(term)) { | ||
score += 0.8; | ||
} else { | ||
score += 0.5; | ||
} | ||
} | ||
} | ||
}); | ||
}); | ||
} | ||
|
||
return score; | ||
} | ||
|
||
public calculateFuzzyScore(content: string, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number { | ||
let score = 0; | ||
const contentWords = this._tokenizeContent(content); | ||
|
||
searchTerms.forEach((searchTerm) => { | ||
let bestMatch = { | ||
word: "", | ||
score: 0, | ||
isWordStart: false, | ||
}; | ||
|
||
contentWords.forEach((word) => { | ||
const similarity = StringSimilarity.calculate(searchTerm, word); | ||
const isWordStart = word.startsWith(searchTerm); | ||
|
||
// Calculate position-based boost | ||
const positionBoost = isWordStart ? Math.exp(-searchTerm.length / word.length) : 0; | ||
const adjustedScore = similarity + positionBoost; | ||
|
||
if (adjustedScore > this._config.fuzzySearchThreshold && adjustedScore > bestMatch.score) { | ||
bestMatch = { | ||
word, | ||
score: adjustedScore, | ||
isWordStart, | ||
}; | ||
} | ||
}); | ||
|
||
if (bestMatch.score > 0) { | ||
matchDetails.fuzzyMatches.push({ | ||
original: searchTerm, | ||
matched: bestMatch.word, | ||
score: bestMatch.score, | ||
}); | ||
|
||
// Apply exponential weight for word-start matches | ||
const finalScore = bestMatch.isWordStart ? bestMatch.score * Math.exp(this._config.fuzzyMatchWeight) : bestMatch.score * this._config.fuzzyMatchWeight; | ||
|
||
score += finalScore; | ||
} | ||
}); | ||
|
||
return Math.min(score, 2); | ||
} | ||
|
||
private _tokenizeContent(content: string): string[] { | ||
return content | ||
.toLowerCase() | ||
.replace(/[^\w\s]/g, " ") | ||
.split(/\s+/) | ||
.filter((word) => word.length > 2); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
export class StringSimilarity { | ||
public static calculate(str1: string, str2: string): number { | ||
const maxLen = Math.max(str1.length, str2.length); | ||
if (maxLen === 0) return 1.0; | ||
|
||
const distance = this._calculateLevenshteinDistance(str1, str2); | ||
return 1 - (distance / maxLen); | ||
} | ||
|
||
private static _calculateLevenshteinDistance(str1: string, str2: string): number { | ||
const matrix: number[][] = Array(str2.length + 1).fill(null).map(() => | ||
Array(str1.length + 1).fill(null) | ||
); | ||
|
||
for (let i = 0; i <= str1.length; i++) matrix[0][i] = i; | ||
for (let j = 0; j <= str2.length; j++) matrix[j][0] = j; | ||
|
||
for (let j = 1; j <= str2.length; j++) { | ||
for (let i = 1; i <= str1.length; i++) { | ||
const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1; | ||
matrix[j][i] = Math.min( | ||
matrix[j][i - 1] + 1, | ||
matrix[j - 1][i] + 1, | ||
matrix[j - 1][i - 1] + indicator | ||
); | ||
} | ||
} | ||
|
||
return matrix[str2.length][str1.length]; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import { GitHubIssue } from "../github-types"; | ||
import { taskManager } from "../home"; | ||
|
||
export function filterIssuesBySearch(filterText: string) { | ||
const searchResults = taskManager.issueSearcher.search(filterText); | ||
//Create the new GithubIssue[] array based on the ranking in the searchResults | ||
const sortedIssues = Array.from(searchResults.entries()) | ||
.filter(([, result]) => result.score > 0) | ||
.sort((a, b) => b[1].score - a[1].score) | ||
.map(([id]) => taskManager.getGitHubIssueById(id)) | ||
.filter((issue): issue is GitHubIssue => issue !== undefined); | ||
return sortedIssues; | ||
} |
Oops, something went wrong.