Skip to content

Commit

Permalink
fix: updated regex to better match urls
Browse files Browse the repository at this point in the history
  • Loading branch information
sshivaditya committed Nov 1, 2024
1 parent 25e06c2 commit 0035740
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 147 deletions.
4 changes: 2 additions & 2 deletions src/home/issues-search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ export class IssueSearch {
public search(searchText: string, issueIds: number[]): Map<number, SearchResult> {
let filterText = searchText.toLowerCase().trim();
const results = new Map<number, SearchResult>();
const isFuzzySearchEnabled = filterText.startsWith('?');
const isFuzzySearchEnabled = filterText.startsWith("?");

if (isFuzzySearchEnabled) {
filterText = filterText.slice(1).trim();
Expand Down Expand Up @@ -131,7 +131,7 @@ export class IssueSearch {
private _getSearchableContent(issue: GitHubIssue): string {
// Remove URLs from the content
const removeUrls = (text: string): string => {
return text.replace(/https?:\/\/[^\s]+/g, '');
return text.replace(/(?:https?:\/\/|http?:\/\/|www\.)[^\s]+/g, "");
};

const title = issue.title;
Expand Down
272 changes: 127 additions & 145 deletions src/home/search/search-scorer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,162 +3,144 @@ import { SearchConfig, SearchResult } from "../types/search-types";
import { StringSimilarity } from "./string-similarity";

export class SearchScorer {
constructor(private _config: SearchConfig) {}

public calculateTitleScore(
issue: GitHubIssue,
searchTerms: string[],
matchDetails: SearchResult['matchDetails']
): number {
let score = 0;
const title = issue.title.toLowerCase();
const words = title.split(/\s+/);

searchTerms.forEach(term => {
if (title.includes(term)) {
matchDetails.titleMatches.push(term);
score += this._config.exactMatchBonus;

// Apply exponential boost for word beginnings
words.forEach(word => {
if (word.startsWith(term)) {
// e^(-x) where x is the position of the match relative to word length
const positionBoost = Math.exp(-term.length / word.length);
score += positionBoost;
}
});
}
constructor(private _config: SearchConfig) {}

public calculateTitleScore(issue: GitHubIssue, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number {
let score = 0;
const title = issue.title.toLowerCase();
const words = title.split(/\s+/);

searchTerms.forEach((term) => {
if (title.includes(term)) {
matchDetails.titleMatches.push(term);
score += this._config.exactMatchBonus;

// Apply exponential boost for word beginnings
words.forEach((word) => {
if (word.startsWith(term)) {
// e^(-x) where x is the position of the match relative to word length
const positionBoost = Math.exp(-term.length / word.length);
score += positionBoost;
}
});
}
});

if (searchTerms.length > 1 && title.includes(searchTerms.join(' '))) {
score += 1;
}
return Math.min(score, 3);
if (searchTerms.length > 1 && title.includes(searchTerms.join(" "))) {
score += 1;
}
return Math.min(score, 3);
}

public calculateBodyScore(issue: GitHubIssue, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number {
let score = 0;
const body = (issue.body || "").toLowerCase();
const words = body.split(/\s+/);

searchTerms.forEach((term) => {
let termScore = 0;
words.forEach((word) => {
if (word.startsWith(term)) {
// Apply exponential boost for word beginnings
const positionBoost = Math.exp(-term.length / word.length);
termScore += positionBoost;
}
});

public calculateBodyScore(
issue: GitHubIssue,
searchTerms: string[],
matchDetails: SearchResult['matchDetails']
): number {
let score = 0;
const body = (issue.body || '').toLowerCase();
const words = body.split(/\s+/);

searchTerms.forEach(term => {
let termScore = 0;
words.forEach(word => {
if (word.startsWith(term)) {
// Apply exponential boost for word beginnings
const positionBoost = Math.exp(-term.length / word.length);
termScore += positionBoost;
}
});

if (termScore > 0) {
matchDetails.bodyMatches.push(term);
score += Math.min(termScore, 1);
}
if (termScore > 0) {
matchDetails.bodyMatches.push(term);
score += Math.min(termScore, 1);
}

const codeBlockMatches = body.match(/```[\s\S]*?```/g) || [];
codeBlockMatches.forEach(block => {
if (block.toLowerCase().includes(term)) {
score += 0.5;
}
});
const codeBlockMatches = body.match(/```[\s\S]*?```/g) || [];
codeBlockMatches.forEach((block) => {
if (block.toLowerCase().includes(term)) {
score += 0.5;
}
});
});
return Math.min(score, 2);
}

public calculateMetaScore(issue: GitHubIssue, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number {
let score = 0;
const numberTerm = searchTerms.find((term) => /^\d+$/.test(term));
if (numberTerm && issue.number.toString() === numberTerm) {
matchDetails.numberMatch = true;
score += 2;
}
if (issue.labels) {
searchTerms.forEach((term) => {
issue.labels?.forEach((label) => {
if (typeof label === "object" && label.name) {
const labelName = label.name.toLowerCase();
if (labelName.includes(term)) {
matchDetails.labelMatches.push(label.name);
// Apply exponential boost for label matches at word start
if (labelName.startsWith(term)) {
score += 0.8;
} else {
score += 0.5;
}
}
}
});
return Math.min(score, 2);
});
}

public calculateMetaScore(
issue: GitHubIssue,
searchTerms: string[],
matchDetails: SearchResult['matchDetails']
): number {
let score = 0;
const numberTerm = searchTerms.find(term => /^\d+$/.test(term));
if (numberTerm && issue.number.toString() === numberTerm) {
matchDetails.numberMatch = true;
score += 2;
}
if (issue.labels) {
searchTerms.forEach(term => {
issue.labels?.forEach(label => {
if (typeof label === 'object' && label.name) {
const labelName = label.name.toLowerCase();
if (labelName.includes(term)) {
matchDetails.labelMatches.push(label.name);
// Apply exponential boost for label matches at word start
if (labelName.startsWith(term)) {
score += 0.8;
} else {
score += 0.5;
}
}
}
});
});
return score;
}

public calculateFuzzyScore(content: string, searchTerms: string[], matchDetails: SearchResult["matchDetails"]): number {
let score = 0;
const contentWords = this._tokenizeContent(content);

searchTerms.forEach((searchTerm) => {
let bestMatch = {
word: "",
score: 0,
isWordStart: false,
};

contentWords.forEach((word) => {
const similarity = StringSimilarity.calculate(searchTerm, word);
const isWordStart = word.startsWith(searchTerm);

// Calculate position-based boost
const positionBoost = isWordStart ? Math.exp(-searchTerm.length / word.length) : 0;
const adjustedScore = similarity + positionBoost;

if (adjustedScore > this._config.fuzzySearchThreshold && adjustedScore > bestMatch.score) {
bestMatch = {
word,
score: adjustedScore,
isWordStart,
};
}
});

return score;
}

public calculateFuzzyScore(
content: string,
searchTerms: string[],
matchDetails: SearchResult['matchDetails']
): number {
let score = 0;
const contentWords = this._tokenizeContent(content);

searchTerms.forEach(searchTerm => {
let bestMatch = {
word: '',
score: 0,
isWordStart: false
};

contentWords.forEach(word => {
const similarity = StringSimilarity.calculate(searchTerm, word);
const isWordStart = word.startsWith(searchTerm);

// Calculate position-based boost
const positionBoost = isWordStart ? Math.exp(-searchTerm.length / word.length) : 0;
const adjustedScore = similarity + positionBoost;

if (adjustedScore > this._config.fuzzySearchThreshold && adjustedScore > bestMatch.score) {
bestMatch = {
word,
score: adjustedScore,
isWordStart
};
}
});

if (bestMatch.score > 0) {
matchDetails.fuzzyMatches.push({
original: searchTerm,
matched: bestMatch.word,
score: bestMatch.score
});

// Apply exponential weight for word-start matches
const finalScore = bestMatch.isWordStart
? bestMatch.score * Math.exp(this._config.fuzzyMatchWeight)
: bestMatch.score * this._config.fuzzyMatchWeight;

score += finalScore;
}
if (bestMatch.score > 0) {
matchDetails.fuzzyMatches.push({
original: searchTerm,
matched: bestMatch.word,
score: bestMatch.score,
});

return Math.min(score, 2);
}
// Apply exponential weight for word-start matches
const finalScore = bestMatch.isWordStart ? bestMatch.score * Math.exp(this._config.fuzzyMatchWeight) : bestMatch.score * this._config.fuzzyMatchWeight;

private _tokenizeContent(content: string): string[] {
return content
.toLowerCase()
.replace(/[^\w\s]/g, ' ')
.split(/\s+/)
.filter(word => word.length > 2);
}
score += finalScore;
}
});

return Math.min(score, 2);
}

private _tokenizeContent(content: string): string[] {
return content
.toLowerCase()
.replace(/[^\w\s]/g, " ")
.split(/\s+/)
.filter((word) => word.length > 2);
}
}

0 comments on commit 0035740

Please sign in to comment.