Skip to content

Commit

Permalink
✨ improve: sort search results by textual similarity; resolve #37
Browse files Browse the repository at this point in the history
Squashed commit of the following:

commit b0afbb01381f42590b43e5095c4ef043803e57e9
Author: Huey <hello@huey.xyz>
Date:   Sat Jun 12 13:18:11 2021 +0800

    ⚡improve: sort search results using Ratcliff/Obershelp

commit 2ae77404f2296055059d90f2c1efdacb33a2e34d
Author: Huey <hello@huey.xyz>
Date:   Sat Jun 12 12:03:45 2021 +0800

    SG: return only matching citations
  • Loading branch information
hueyy committed Jun 12, 2021
1 parent a3a3011 commit 1d55392
Show file tree
Hide file tree
Showing 13 changed files with 81 additions and 32 deletions.
15 changes: 13 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "clerkent",
"version": "2.5.0",
"version": "2.6.0",
"private": true,
"description": "quick search for international caselaw and legislation",
"repository": "https://github.com/lacuna-technologies/clerkent.git",
Expand Down Expand Up @@ -47,6 +47,7 @@
"axios": "^0.21.1",
"axios-cache-adapter": "^2.7.3",
"cheerio": "^1.0.0-rc.9",
"gestalt-pattern-matcher": "^0.0.12",
"memoizee": "^0.4.15",
"qs": "^6.10.1",
"react": "^17.0.2",
Expand Down
2 changes: 1 addition & 1 deletion src/utils/Finder/CaseCitationFinder/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ export const sortCitationsByVolume = (abbrsList, citationsArray: any[], attribut
abbrsList,
citationsArray.map(c => c[attribute]),
).map(c => citationsArray.find(v => v[attribute] === c))
}
}
12 changes: 8 additions & 4 deletions src/utils/scraper/AU/AU.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import Logger from '../../Logger'
import Constants from '../../Constants'
import { sortAUCitations } from '../../Finder/CaseCitationFinder/AU'
import Helpers from '../../Helpers'
import { sortByNameSimilarity } from '../utils'

const getCaseByName = async (caseName: string): Promise<Law.Case[]> => {
try {
Expand All @@ -16,10 +17,13 @@ const getCaseByName = async (caseName: string): Promise<Law.Case[]> => {
.flatMap(({ value }: PromiseFulfilledResult<Law.Case[]>) => value)
.filter(({ jurisdiction }) => jurisdiction === Constants.JURISDICTIONS.AU.id)

return sortAUCitations(
Helpers.uniqueBy(results, `citation`),
`citation`,
)
return sortByNameSimilarity(
caseName,
sortAUCitations(
Helpers.uniqueBy(results, `citation`),
`citation`,
),
)
} catch (error) {
Logger.error(error)
}
Expand Down
12 changes: 8 additions & 4 deletions src/utils/scraper/CA/CA.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import Logger from '../../Logger'
import Constants from '../../Constants'
import { sortCACitations } from '../../Finder/CaseCitationFinder/CA'
import Helpers from '../../Helpers'
import { sortByNameSimilarity } from '../utils'

const getCaseByName = async (caseName: string): Promise<Law.Case[]> => {
try {
Expand All @@ -16,10 +17,13 @@ const getCaseByName = async (caseName: string): Promise<Law.Case[]> => {
.flatMap(({ value }: PromiseFulfilledResult<Law.Case[]>) => value)
.filter(({ jurisdiction }) => jurisdiction === Constants.JURISDICTIONS.CA.id)

return sortCACitations(
Helpers.uniqueBy(results, `citation`),
`citation`,
)
return sortByNameSimilarity(
caseName,
sortCACitations(
Helpers.uniqueBy(results, `citation`),
`citation`,
),
)
} catch (error) {
Logger.error(error)
}
Expand Down
8 changes: 6 additions & 2 deletions src/utils/scraper/EU/EU.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import type Law from '../../../types/Law'
import Constants from '../../Constants'
import Helpers from '../../Helpers'
import Logger from '../../Logger'
import { sortByNameSimilarity } from '../utils'

const getLegislation = EURLex.getLegislation

Expand All @@ -17,7 +18,10 @@ const getCaseByName = async (caseName: string): Promise<Law.Case[]> => {
.flatMap(({ value }: PromiseFulfilledResult<Law.Case[]>) => value)
.filter(({ jurisdiction }) => jurisdiction === Constants.JURISDICTIONS.EU.id)

return Helpers.uniqueBy(results, `citation`)
return sortByNameSimilarity(
caseName,
Helpers.uniqueBy(results, `citation`),
)
} catch (error) {
Logger.error(error)
}
Expand All @@ -30,7 +34,7 @@ const getCaseByCitation = async (citation: string, court: string): Promise<Law.C
try {
return await option.getCaseByCitation(citation)
} catch (error) {
console.error(error)
Logger.error(error)
}
}
return []
Expand Down
12 changes: 8 additions & 4 deletions src/utils/scraper/HK/HK.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import Logger from '../../Logger'
import Constants from '../../Constants'
import { sortHKCitations } from '../../Finder/CaseCitationFinder/HK'
import Helpers from '../../Helpers'
import { sortByNameSimilarity } from '../utils'

const getCaseByName = async (caseName: string): Promise<Law.Case[]> => {
try {
Expand All @@ -19,10 +20,13 @@ const getCaseByName = async (caseName: string): Promise<Law.Case[]> => {
.flatMap(({ value }: PromiseFulfilledResult<Law.Case[]>) => value)
.filter(({ jurisdiction }) => jurisdiction === Constants.JURISDICTIONS.HK.id)

return sortHKCitations(
Helpers.uniqueBy(results, `citation`),
`citation`,
)
return sortByNameSimilarity(
caseName,
sortHKCitations(
Helpers.uniqueBy(results, `citation`),
`citation`,
),
)
} catch (error) {
Logger.error(error)
}
Expand Down
10 changes: 7 additions & 3 deletions src/utils/scraper/NZ/NZ.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import Logger from '../../Logger'
import Helpers from '../../Helpers'
import Constants from '../../Constants'
import { sortNZCitations } from '../../Finder/CaseCitationFinder/NZ'
import { sortByNameSimilarity } from '../utils'

const getCaseByName = async (caseName: string): Promise<Law.Case[]> => {
try {
Expand All @@ -16,9 +17,12 @@ const getCaseByName = async (caseName: string): Promise<Law.Case[]> => {
.flatMap(({ value }: PromiseFulfilledResult<Law.Case[]>) => value)
.filter(({ jurisdiction }) => jurisdiction === Constants.JURISDICTIONS.NZ.id)

return sortNZCitations(
Helpers.uniqueBy(results, `citation`),
`citation`,
return sortByNameSimilarity(
caseName,
sortNZCitations(
Helpers.uniqueBy(results, `citation`),
`citation`,
),
)
} catch (error) {
Logger.error(error)
Expand Down
10 changes: 7 additions & 3 deletions src/utils/scraper/SG/SG.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import Helpers from '../../Helpers'
import Logger from '../../Logger'
import Constants from '../../Constants'
import { sortSGCitations } from '../../Finder/CaseCitationFinder/SG'
import { sortByNameSimilarity } from '../utils'

const getLegislation = SSO.getLegislation

Expand All @@ -21,9 +22,12 @@ const getCaseByName = async (caseName: string): Promise<Law.Case[]> => {
.flatMap(({ value }: PromiseFulfilledResult<Law.Case[]>) => value)
.filter(({ jurisdiction }) => jurisdiction === Constants.JURISDICTIONS.SG.id)

return sortSGCitations(
Helpers.uniqueBy(results, `citation`),
`citation`,
return sortByNameSimilarity(
caseName,
sortSGCitations(
Helpers.uniqueBy(results, `citation`),
`citation`,
),
)
} catch (error) {
Logger.error(error)
Expand Down
6 changes: 5 additions & 1 deletion src/utils/scraper/SG/SGSC.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,18 @@ const parseCase = ($: cheerio.Root, cheerioElement: cheerio.Element): Law.Case =
}
}

const trimLeadingPageZeros = (citation: string) => citation.replace(/ 0+([1-9]+)$/, ` $1`)

const getCaseByCitation = async (citation: string): Promise<Law.Case[]> => {
const { data } = await Request.get(getSearchResults(citation))
const $ = cheerio.load(data)

const results = $(`.judgmentpage`)
.map((_, element) => parseCase($, element))
.get()
.filter((match: Law.Case)=> match.citation.toLowerCase() === citation.toLowerCase())
.filter(({ citation: scrapedCitation })=> (
trimLeadingPageZeros(scrapedCitation).toLowerCase() === citation.toLowerCase()
))
Logger.log(`SGSC scrape results`, results)
return results
}
Expand Down
5 changes: 1 addition & 4 deletions src/utils/scraper/SG/SLW.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,7 @@ const getCaseByCitation = async (citation: string): Promise<Law.Case[]> => {

const results = data
.map(([name, link]) => parseCase(name, link))
.filter(({ citation: scrapedCitation }) => (
Helpers.isCitationValid(scrapedCitation) &&
citation === scrapedCitation
))
.filter(({ citation: scrapedCitation }) => citation.toLowerCase() === scrapedCitation.toLowerCase())
Logger.log(`SLW scrape results`, results)
return results
}
Expand Down
10 changes: 7 additions & 3 deletions src/utils/scraper/UK/UK.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import Logger from '../../Logger'
import Helpers from '../../Helpers'
import { sortUKCitations } from '../../Finder/CaseCitationFinder/UK'
import Constants from '../../Constants'
import { sortByNameSimilarity } from '../utils'

const getLegislation = LegislationGovUk.getLegislation
const getCaseByName = async (caseName: string): Promise<Law.Case[]> => {
Expand All @@ -18,9 +19,12 @@ const getCaseByName = async (caseName: string): Promise<Law.Case[]> => {
.flatMap(({ value }: PromiseFulfilledResult<Law.Case[]>) => value)
.filter(({ jurisdiction }) => jurisdiction === Constants.JURISDICTIONS.UK.id)

return sortUKCitations(
Helpers.uniqueBy(results, `citation`),
`citation`,
return sortByNameSimilarity(
caseName,
sortUKCitations(
Helpers.uniqueBy(results, `citation`),
`citation`,
),
)
} catch (error) {
Logger.error(error)
Expand Down
8 changes: 8 additions & 0 deletions src/utils/scraper/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import GestaltSimilarity from 'gestalt-pattern-matcher'
import Law from '../../types/Law'

export const sortByNameSimilarity = (query: string, cases: Law.Case[]) => cases.sort((a, b) => {
const similarityA = GestaltSimilarity(query, a.name)
const similarityB = GestaltSimilarity(query, b.name)
return similarityB - similarityA
})

0 comments on commit 1d55392

Please sign in to comment.