Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Client Side Searching #149

Merged
merged 19 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,9 @@ static/dist

cypress/screenshots
cypress/videos

# Wrangler
.wrangler

# vscode
.vscode
157 changes: 157 additions & 0 deletions src/home/issues-search.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import { GitHubIssue } from "./github-types";
import { TaskManager } from "./task-manager";
import { SearchResult, SearchWeights, SearchConfig } from "./types/search-types";
import { SearchScorer } from "./search/search-scorer";

export class IssueSearch {
private readonly _weights: SearchWeights = {
title: 0.375,
body: 0.25,
fuzzy: 0.25,
meta: 0.125,
};

private readonly _config: SearchConfig = {
fuzzySearchThreshold: 0.7,
exactMatchBonus: 1.0,
fuzzyMatchWeight: 0.7,
};

private readonly _searchScorer: SearchScorer;
private _searchableIssues: Map<number, string> = new Map();

constructor(private _taskManager: TaskManager) {
this._searchScorer = new SearchScorer(this._config);
}

public async initializeIssues(issues: GitHubIssue[]) {
this._searchableIssues.clear();
issues.forEach((issue) => {
const searchableContent = this._getSearchableContent(issue);
this._searchableIssues.set(issue.id, searchableContent);
});
}

public search(searchText: string, issueIds: number[]): Map<number, SearchResult> {
let filterText = searchText.toLowerCase().trim();
const results = new Map<number, SearchResult>();
const isFuzzySearchEnabled = filterText.startsWith("?");

if (isFuzzySearchEnabled) {
filterText = filterText.slice(1).trim();
}

if (!filterText) {
issueIds.forEach((id) => results.set(id, this._createEmptyResult()));
return results;
}

const searchTerms = this._preprocessSearchTerms(filterText);

issueIds.forEach((issueId) => {
const issue = this._taskManager.getGitHubIssueById(issueId);
if (!issue) {
results.set(issueId, this._createEmptyResult(false));
return;
}

const result = this._calculateIssueRelevance(issue, searchTerms, isFuzzySearchEnabled);
results.set(issueId, result);
});

this._calculateNDCGScore(results);
return results;
}

private _calculateIssueRelevance(issue: GitHubIssue, searchTerms: string[], enableFuzzy: boolean): SearchResult {
const matchDetails = {
titleMatches: [] as string[],
bodyMatches: [] as string[],
labelMatches: [] as string[],
numberMatch: false,
fuzzyMatches: [] as Array<{
original: string;
matched: string;
score: number;
}>,
};

const searchableContent = this._searchableIssues.get(issue.id) || this._getSearchableContent(issue);

// Calculate individual scores
const scores = {
title: this._searchScorer.calculateTitleScore(issue, searchTerms, matchDetails),
body: this._searchScorer.calculateBodyScore(issue, searchTerms, matchDetails),
fuzzy: enableFuzzy ? this._searchScorer.calculateFuzzyScore(searchableContent, searchTerms, matchDetails) : 0,
meta: this._searchScorer.calculateMetaScore(issue, searchTerms, matchDetails),
};

// Calculate weighted total score
const totalScore = Object.entries(scores).reduce((total, [key, score]) => {
return total + score * this._weights[key as keyof SearchWeights];
}, 0);

const isVisible = totalScore > 0 || matchDetails.numberMatch;

return {
visible: isVisible,
score: isVisible ? totalScore : 0,
matchDetails,
};
}

private _calculateNDCGScore(results: Map<number, SearchResult>): number {
const scores = Array.from(results.values())
.filter((r) => r.visible)
.map((r) => r.score)
.sort((a, b) => b - a);

if (scores.length === 0) return 0;

const dcg = scores.reduce((sum, score, index) => {
return sum + (Math.pow(2, score) - 1) / Math.log2(index + 2);
}, 0);

const idcg = [...scores]
.sort((a, b) => b - a)
.reduce((sum, score, index) => {
return sum + (Math.pow(2, score) - 1) / Math.log2(index + 2);
}, 0);

return idcg === 0 ? 0 : dcg / idcg;
}

private _preprocessSearchTerms(searchText: string): string[] {
return searchText
.split(/\s+/)
.filter(Boolean)
.map((term) => term.toLowerCase());
}

private _getSearchableContent(issue: GitHubIssue): string {
// Remove URLs from the content
const removeUrls = (text: string): string => {
return text.replace(/(?:https?:\/\/|http?:\/\/|www\.)[^\s]+/g, "");
};

const title = issue.title;
const body = removeUrls(issue.body || "");
const labels = issue.labels?.map((l) => (typeof l === "object" && l.name ? l.name : "")).join(" ") || "";

return `${title} ${body} ${labels}`.toLowerCase();
}

private _createEmptyResult(visible: boolean = true): SearchResult {
return {
visible,
score: visible ? 1 : 0,
matchDetails: {
titleMatches: [],
bodyMatches: [],
labelMatches: [],
numberMatch: false,
fuzzyMatches: [],
},
};
}
}
146 changes: 146 additions & 0 deletions src/home/search/search-scorer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import { GitHubIssue } from "../github-types";
import { SearchConfig, SearchResult } from "../types/search-types";
import { StringSimilarity } from "./string-similarity";

export class SearchScorer {
constructor(private _config: SearchConfig) {}

public calculateTitleScore(issue: GitHubIssue, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number {
let score = 0;
const title = issue.title.toLowerCase();
const words = title.split(/\s+/);

searchTerms.forEach((term) => {
if (title.includes(term)) {
matchDetails.titleMatches.push(term);
score += this._config.exactMatchBonus;

// Apply exponential boost for word beginnings
words.forEach((word) => {
if (word.startsWith(term)) {
// e^(-x) where x is the position of the match relative to word length
const positionBoost = Math.exp(-term.length / word.length);
score += positionBoost;
}
});
}
});

if (searchTerms.length > 1 && title.includes(searchTerms.join(" "))) {
score += 1;
}
return Math.min(score, 3);
}

public calculateBodyScore(issue: GitHubIssue, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number {
let score = 0;
const body = (issue.body || "").toLowerCase();
const words = body.split(/\s+/);

searchTerms.forEach((term) => {
let termScore = 0;
words.forEach((word) => {
if (word.startsWith(term)) {
// Apply exponential boost for word beginnings
const positionBoost = Math.exp(-term.length / word.length);
termScore += positionBoost;
}
});

if (termScore > 0) {
matchDetails.bodyMatches.push(term);
score += Math.min(termScore, 1);
}

const codeBlockMatches = body.match(/```[\s\S]*?```/g) || [];
codeBlockMatches.forEach((block) => {
if (block.toLowerCase().includes(term)) {
score += 0.5;
}
});
});
return Math.min(score, 2);
}

public calculateMetaScore(issue: GitHubIssue, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number {
let score = 0;
const numberTerm = searchTerms.find((term) => /^\d+$/.test(term));
if (numberTerm && issue.number.toString() === numberTerm) {
matchDetails.numberMatch = true;
score += 2;
}
if (issue.labels) {
searchTerms.forEach((term) => {
issue.labels?.forEach((label) => {
if (typeof label === "object" && label.name) {
const labelName = label.name.toLowerCase();
if (labelName.includes(term)) {
matchDetails.labelMatches.push(label.name);
// Apply exponential boost for label matches at word start
if (labelName.startsWith(term)) {
score += 0.8;
} else {
score += 0.5;
}
}
}
});
});
}

return score;
}

public calculateFuzzyScore(content: string, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number {
let score = 0;
const contentWords = this._tokenizeContent(content);

searchTerms.forEach((searchTerm) => {
let bestMatch = {
word: "",
score: 0,
isWordStart: false,
};

contentWords.forEach((word) => {
const similarity = StringSimilarity.calculate(searchTerm, word);
const isWordStart = word.startsWith(searchTerm);

// Calculate position-based boost
const positionBoost = isWordStart ? Math.exp(-searchTerm.length / word.length) : 0;
const adjustedScore = similarity + positionBoost;

if (adjustedScore > this._config.fuzzySearchThreshold && adjustedScore > bestMatch.score) {
bestMatch = {
word,
score: adjustedScore,
isWordStart,
};
}
});

if (bestMatch.score > 0) {
matchDetails.fuzzyMatches.push({
original: searchTerm,
matched: bestMatch.word,
score: bestMatch.score,
});

// Apply exponential weight for word-start matches
const finalScore = bestMatch.isWordStart ? bestMatch.score * Math.exp(this._config.fuzzyMatchWeight) : bestMatch.score * this._config.fuzzyMatchWeight;

score += finalScore;
}
});

return Math.min(score, 2);
}

private _tokenizeContent(content: string): string[] {
return content
.toLowerCase()
.replace(/[^\w\s]/g, " ")
.split(/\s+/)
.filter((word) => word.length > 2);
}
}
31 changes: 31 additions & 0 deletions src/home/search/string-similarity.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
export class StringSimilarity {
public static calculate(str1: string, str2: string): number {
const maxLen = Math.max(str1.length, str2.length);
if (maxLen === 0) return 1.0;

const distance = this._calculateLevenshteinDistance(str1, str2);
return 1 - (distance / maxLen);
}

private static _calculateLevenshteinDistance(str1: string, str2: string): number {
const matrix: number[][] = Array(str2.length + 1).fill(null).map(() =>
Array(str1.length + 1).fill(null)
);

for (let i = 0; i <= str1.length; i++) matrix[0][i] = i;
for (let j = 0; j <= str2.length; j++) matrix[j][0] = j;

for (let j = 1; j <= str2.length; j++) {
for (let i = 1; i <= str1.length; i++) {
const indicator = str1[i - 1] === str2[j - 1] ? 0 : 1;
matrix[j][i] = Math.min(
matrix[j][i - 1] + 1,
matrix[j - 1][i] + 1,
matrix[j - 1][i - 1] + indicator
);
}
}

return matrix[str2.length][str1.length];
}
}
Loading
Loading