Skip to content

Commit

Permalink
fix(rules): Better Redirect Rules (#1256)
Browse files Browse the repository at this point in the history
* fix(mv3): 🔧 Modifying the default local redirect behaviour.

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix(mv3): 🔧 Modifying the default local redirect behaviour.

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix(mv3): 🐛 Making rules less greedy

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix(mv3): ✨ Dynamic Rules for subdomain gateways.

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix(types): Adding ambient types for is-ipfs.

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix(test):

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix(test): helper

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* feat(mv3): less greedy rules

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* feat: Adding simpler regex for redirects from similar namespaces.

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix(lint): 🚨 Warnings

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* feat(mv3): Better Default Rules (#1260)

* refactor(mv3): blockOrRequest code

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* refactor(mv3): Port Logic for Default Rules is more robust.

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* feat(test): Adding tests for default rule logic.

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

---------

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* Update add-on/src/lib/redirect-handler/blockOrObserve.ts

* fix(docs): ✏️ Adding comments

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* refactor(regexfilters): Better Structure and Readability (#1261)

* refactor(regexFilters): ✨ Adding a base class for regexFilters.

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* refactor(regexFilters): ♻️ Moving subdomain filter to a subclass

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* refactor(regexFilters): ♻️ Moving namespace filter to a subclass

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* refactor(regexFilters): ♻️ Moving common filter to a subclass

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* feat(regexFilters): ✨ Hooking Up All together

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix(lint): ✏️ Lint

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix(regexFilters): ✏️ Updating message.

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix(rename): ✏️ CommonPatterRedirectRegexFilter -> CommonPatternRedirectRegexFilter

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix(regexFilters): ♻️ Refactor to remove call to super

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix: make _canHandle private

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix: ⚡ Fix math.min on every loop.

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

---------

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix(mv3): no blanket redirect for subdomains without namespaces.

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

* fix(lint): unused import

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>

---------

Signed-off-by: Nishant Arora <1895906+whizzzkid@users.noreply.github.com>
  • Loading branch information
whizzzkid authored Aug 27, 2023
1 parent 47ea944 commit 3013400
Show file tree
Hide file tree
Showing 13 changed files with 409 additions and 131 deletions.
104 changes: 104 additions & 0 deletions add-on/src/lib/redirect-handler/baseRegexFilter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
export interface IRegexFilter {
originUrl: string
redirectUrl: string
}

export interface IFilter {
regexFilter: string
regexSubstitution: string
}

/**
* Base class for all regex filters.
*/
export class RegexFilter {
readonly _redirectUrl!: string
readonly _originUrl!: string
readonly originURL: URL
readonly redirectURL: URL
readonly originNS: string
readonly redirectNS: string
// by default we cannot handle the request.
private _canHandle = false
regexFilter!: string
regexSubstitution!: string

constructor ({ originUrl, redirectUrl }: IRegexFilter) {
this._originUrl = originUrl
this._redirectUrl = redirectUrl
this.originURL = new URL(this._originUrl)
this.redirectURL = new URL(this._redirectUrl)
this.redirectNS = this.computeNamespaceFromUrl(this.redirectURL)
this.originNS = this.computeNamespaceFromUrl(this.originURL)
this.computeFilter()
this.normalizeRegexFilter()
}

/**
* Getter for the originUrl provided at construction.
*/
get originUrl (): string {
return this._originUrl
}

/**
* Getter for the redirectUrl provided at construction.
*/
get redirectUrl (): string {
return this._redirectUrl
}

/**
* Getter for the canHandle flag.
*/
get canHandle (): boolean {
return this._canHandle
}

/**
* Setter for the canHandle flag.
*/
set canHandle (value: boolean) {
this._canHandle = value
}

/**
* Getter for the filter. This is the regex filter and substitution.
*/
get filter (): IFilter {
if (!this.canHandle) {
throw new Error('Cannot handle this request')
}

return {
regexFilter: this.regexFilter,
regexSubstitution: this.regexSubstitution
}
}

/**
* Compute the regex filter and substitution.
* This is the main method that needs to be implemented by subclasses.
*/
computeFilter (): void {
throw new Error('Method not implemented.')
}

/**
* Normalize the regex filter. This is a helper method that can be used by subclasses.
*/
normalizeRegexFilter (): void {
this.regexFilter = this.regexFilter.replace(/https?\??/ig, 'https?')
}

/**
* Compute the namespace from the URL. This finds the first path segment.
* e.g. http://<gateway>/<namespace>/path/to/file/or/cid
*
* @param url URL
*/
computeNamespaceFromUrl ({ pathname }: URL): string {
// regex to match the first path segment.
return (/\/([^/]+)\//i.exec(pathname)?.[1] ?? '').toLowerCase()
}
}
76 changes: 40 additions & 36 deletions add-on/src/lib/redirect-handler/blockOrObserve.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,24 @@
import debug from 'debug'
import browser from 'webextension-polyfill'
import { CompanionState } from '../../types/companion.js'
import { IFilter, IRegexFilter, RegexFilter } from './baseRegexFilter.js'
import { CommonPatternRedirectRegexFilter } from './commonPatternRedirectRegexFilter.js'
import { NamespaceRedirectRegexFilter } from './namespaceRedirectRegexFilter.js'
import { SubdomainRedirectRegexFilter } from './subdomainRedirectRegexFilter.js'

// this won't work in webworker context. Needs to be enabled manually
// https://github.com/debug-js/debug/issues/916
const log = debug('ipfs-companion:redirect-handler:blockOrObserve')
log.error = debug('ipfs-companion:redirect-handler:blockOrObserve:error')

export const DEFAULT_NAMESPACES = new Set(['ipfs', 'ipns'])

export const GLOBAL_STATE_CHANGE = 'GLOBAL_STATE_CHANGE'
export const GLOBAL_STATE_OPTION_CHANGE = 'GLOBAL_STATE_OPTION_CHANGE'
export const DELETE_RULE_REQUEST = 'DELETE_RULE_REQUEST'
export const DELETE_RULE_REQUEST_SUCCESS = 'DELETE_RULE_REQUEST_SUCCESS'

// We need to match the rest of the URL, so we can use a wildcard.
export const RULE_REGEX_ENDING = '((?:[^\\.]|$).*)$'

interface regexFilterMap {
Expand All @@ -21,6 +29,7 @@ interface regexFilterMap {
interface redirectHandlerInput {
originUrl: string
redirectUrl: string
getPort: (state: CompanionState) => string
}

type messageToSelfType = typeof GLOBAL_STATE_CHANGE | typeof GLOBAL_STATE_OPTION_CHANGE | typeof DELETE_RULE_REQUEST
Expand All @@ -29,6 +38,8 @@ interface messageToSelf {
value?: string | Record<string, unknown>
}

export const defaultNSRegexStr = `(${[...DEFAULT_NAMESPACES].join('|')})`

// We need to check if the browser supports the declarativeNetRequest API.
// TODO: replace with check for `Blocking` in `chrome.webRequest.OnBeforeRequestOptions`
// which is currently a bug https://bugs.chromium.org/p/chromium/issues/detail?id=1427952
Expand Down Expand Up @@ -75,11 +86,18 @@ const savedRegexFilters: Map<string, regexFilterMap> = new Map()
const DEFAULT_LOCAL_RULES: redirectHandlerInput[] = [
{
originUrl: 'http://127.0.0.1',
redirectUrl: 'http://localhost'
redirectUrl: 'http://localhost',
getPort: ({ gwURLString }): string => new URL(gwURLString).port
},
{
originUrl: 'http://[::1]',
redirectUrl: 'http://localhost'
redirectUrl: 'http://localhost',
getPort: ({ gwURLString }): string => new URL(gwURLString).port
},
{
originUrl: 'http://localhost',
redirectUrl: 'http://127.0.0.1',
getPort: ({ apiURL }): string => new URL(apiURL).port
}
]

Expand All @@ -101,7 +119,7 @@ export function isLocalHost (url: string): boolean {
* @param str URL string to escape
* @returns
*/
function escapeURLRegex (str: string): string {
export function escapeURLRegex (str: string): string {
// these characters are allowed in the URL, but not in the regex.
// eslint-disable-next-line no-useless-escape
const ALLOWED_CHARS_URL_REGEX = /([:\/\?#\[\]@!$&'\(\ )\*\+,;=\-_\.~])/g
Expand All @@ -115,38 +133,24 @@ function escapeURLRegex (str: string): string {
* @param redirectUrl
* @returns
*/
function constructRegexFilter ({ originUrl, redirectUrl }: redirectHandlerInput): {
regexSubstitution: string
regexFilter: string
} {
// We can traverse the URL from the end, and find the first character that is different.
let commonIdx = 1
while (commonIdx < Math.min(originUrl.length, redirectUrl.length)) {
if (originUrl[originUrl.length - commonIdx] !== redirectUrl[redirectUrl.length - commonIdx]) {
break
function constructRegexFilter ({ originUrl, redirectUrl }: IRegexFilter): IFilter {
// the order is very important here, because we want to match the best possible filter.
const filtersToTryInOrder: Array<typeof RegexFilter> = [
SubdomainRedirectRegexFilter,
NamespaceRedirectRegexFilter,
CommonPatternRedirectRegexFilter
]

for (const Filter of filtersToTryInOrder) {
const filter = new Filter({ originUrl, redirectUrl })
if (filter.canHandle) {
return filter.filter
}
commonIdx += 1
}

// We can now construct the regex filter and substitution.
let regexSubstitution = redirectUrl.slice(0, redirectUrl.length - commonIdx + 1) + '\\1'
// We need to escape the characters that are allowed in the URL, but not in the regex.
const regexFilterFirst = escapeURLRegex(originUrl.slice(0, originUrl.length - commonIdx + 1))
// We need to match the rest of the URL, so we can use a wildcard.
const RULE_REGEX_ENDING = '((?:[^\\.]|$).*)$'
let regexFilter = `^${regexFilterFirst}${RULE_REGEX_ENDING}`.replace(/https?/ig, 'https?')

// This method does not parse:
// originUrl: "https://awesome.ipfs.io/"
// redirectUrl: "http://localhost:8081/ipns/awesome.ipfs.io/"
// that ends up with capturing all urls which we do not want.
if (regexFilter === `^https?\\:\\/${RULE_REGEX_ENDING}`) {
const subdomain = new URL(originUrl).hostname
regexFilter = `^https?\\:\\/\\/${escapeURLRegex(subdomain)}${RULE_REGEX_ENDING}`
regexSubstitution = regexSubstitution.replace('\\1', `/${subdomain}\\1`)
}

return { regexSubstitution, regexFilter }
// this is just to satisfy the compiler, this should never happen. Because CommonPatternRedirectRegexFilter can always
// handle.
return new CommonPatternRedirectRegexFilter({ originUrl, redirectUrl }).filter
}

// If the browser supports the declarativeNetRequest API, we can block the request.
Expand Down Expand Up @@ -248,10 +252,10 @@ async function reconcileRulesAndRemoveOld (state: CompanionState): Promise<void>
}

// make sure that the default rules are added.
for (const { originUrl, redirectUrl } of DEFAULT_LOCAL_RULES) {
const { port } = new URL(state.gwURLString)
const regexFilter = `^${escapeURLRegex(`${originUrl}:${port}`)}(.*)$`
const regexSubstitution = `${redirectUrl}:${port}\\1`
for (const { originUrl, redirectUrl, getPort } of DEFAULT_LOCAL_RULES) {
const port = getPort(state)
const regexFilter = `^${escapeURLRegex(`${originUrl}:${port}`)}\\/${defaultNSRegexStr}\\/${RULE_REGEX_ENDING}`
const regexSubstitution = `${redirectUrl}:${port}/\\1/\\2`

if (!savedRegexFilters.has(regexFilter)) {
// We need to add the new rule.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import { RegexFilter } from './baseRegexFilter.js'
import { RULE_REGEX_ENDING, escapeURLRegex } from './blockOrObserve.js'

/**
* Handles redirects like:
* origin: '^https?\\:\\/\\/awesome\\.ipfs\\.io\\/(.*)'
* destination: 'http://localhost:8081/ipns/awesome.ipfs.io/$1'
*/
export class CommonPatternRedirectRegexFilter extends RegexFilter {
computeFilter (): void {
// this filter is the worst case scenario, we can handle any redirect.
this.canHandle = true
// We can traverse the URL from the end, and find the first character that is different.
let commonIdx = 1
const leastLength = Math.min(this.originUrl.length, this.redirectUrl.length)
while (commonIdx < leastLength) {
if (this.originUrl[this.originUrl.length - commonIdx] !== this.redirectUrl[this.redirectUrl.length - commonIdx]) {
break
}
commonIdx += 1
}

// We can now construct the regex filter and substitution.
this.regexSubstitution = this.redirectUrl.slice(0, this.redirectUrl.length - commonIdx + 1) + '\\1'
// We need to escape the characters that are allowed in the URL, but not in the regex.
const regexFilterFirst = escapeURLRegex(this.originUrl.slice(0, this.originUrl.length - commonIdx + 1))
this.regexFilter = `^${regexFilterFirst}${RULE_REGEX_ENDING}`
// calling normalize should add the protocol in the regexFilter.
this.normalizeRegexFilter()

// This method does not parse:
// originUrl: "https://awesome.ipfs.io/"
// redirectUrl: "http://localhost:8081/ipns/awesome.ipfs.io/"
// that ends up with capturing all urls which we do not want.
if (this.regexFilter === `^https?\\:\\/${RULE_REGEX_ENDING}`) {
const subdomain = new URL(this.originUrl).hostname
this.regexFilter = `^https?\\:\\/\\/${escapeURLRegex(subdomain)}${RULE_REGEX_ENDING}`
this.regexSubstitution = this.regexSubstitution.replace('\\1', `/${subdomain}\\1`)
}
}
}
25 changes: 25 additions & 0 deletions add-on/src/lib/redirect-handler/namespaceRedirectRegexFilter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import { RegexFilter } from './baseRegexFilter.js'
import { DEFAULT_NAMESPACES, RULE_REGEX_ENDING, defaultNSRegexStr, escapeURLRegex } from './blockOrObserve.js'

/**
* Handles namespace redirects like:
* origin: '^https?\\:\\/\\/ipfs\\.io\\/(ipfs|ipns)\\/(.*)'
* destination: 'http://localhost:8080/$1/$2'
*/
export class NamespaceRedirectRegexFilter extends RegexFilter {
computeFilter (): void {
this.canHandle = DEFAULT_NAMESPACES.has(this.originNS) &&
DEFAULT_NAMESPACES.has(this.redirectNS) &&
this.originNS === this.redirectNS &&
this.originURL.searchParams.get('uri') == null
// if the namespaces are the same, we can generate simpler regex.
// The only value that needs special handling is the `uri` param.
// A redirect like
// https://ipfs.io/ipfs/QmZMxU -> http://localhost:8080/ipfs/QmZMxU
const [originFirst, originLast] = this.originUrl.split(`/${this.originNS}/`)
this.regexFilter = `^${escapeURLRegex(originFirst)}\\/${defaultNSRegexStr}\\/${RULE_REGEX_ENDING}`
this.regexSubstitution = this.redirectUrl
.replace(`/${this.redirectNS}/`, '/\\1/')
.replace(originLast, '\\2')
}
}
70 changes: 70 additions & 0 deletions add-on/src/lib/redirect-handler/subdomainRedirectRegexFilter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import { IRegexFilter, RegexFilter } from './baseRegexFilter.js'
import { DEFAULT_NAMESPACES, RULE_REGEX_ENDING, defaultNSRegexStr, escapeURLRegex } from './blockOrObserve.js'

/**
* Handles subdomain redirects like:
* origin: '^https?\\:\\/\\/bafybeigfejjsuq5im5c3w3t3krsiytszhfdc4v5myltcg4myv2n2w6jumy\\.ipfs\\.dweb\\.link'
* destination: 'http://localhost:8080/ipfs/bafybeigfejjsuq5im5c3w3t3krsiytszhfdc4v5myltcg4myv2n2w6jumy'
*/
export class SubdomainRedirectRegexFilter extends RegexFilter {
constructor ({ originUrl, redirectUrl }: IRegexFilter) {
super({ originUrl, redirectUrl })
}

computeFilter (): void {
this.regexSubstitution = this.redirectUrl
this.regexFilter = this.originUrl
if (!DEFAULT_NAMESPACES.has(this.originNS) && DEFAULT_NAMESPACES.has(this.redirectNS)) {
// We'll use this to match the origin URL later.
this.regexFilter = `^${escapeURLRegex(this.regexFilter)}`
this.normalizeRegexFilter()
const origRegexFilter = this.regexFilter
// tld and root are known, we are just interested in the remainder of URL.
const [tld, root, ...urlParts] = this.originURL.hostname.split('.').reverse()
// can use the staticUrlParts to match the origin URL later.
const staticUrlParts = [root, tld]
// regex to match the start of the URL, this remains common.
const commonStaticUrlStart = escapeURLRegex(`^${this.originURL.protocol}//`)
// going though the subdomains to find a namespace or CID.
while (urlParts.length > 0) {
// get the urlPart at the 0th index and remove it from the array.
const subdomainPart = urlParts.shift() as string
// this needs to be computed for every iteration as the staticUrlParts changes
const commonStaticUrlEnd = `\\.${escapeURLRegex(staticUrlParts.join('.'))}\\/${RULE_REGEX_ENDING}`

// this does not work for subdomains where namespace is not provided.
// e.g. https://helia-identify.on.fleek.co/
// e.g. https://bafybeib3bzis4mejzsnzsb65od3rnv5ffit7vsllratddjkgfgq4wiamqu.on.fleek.co/
// check if the subdomainPart is a namespace.
if (DEFAULT_NAMESPACES.has(subdomainPart)) {
// We found a namespace, this is going to match group 2, i.e. namespace.
// e.g https://bafybeib3bzis4mejzsnzsb65od3rnv5ffit7vsllratddjkgfgq4wiamqu.ipfs.dweb.link
this.regexFilter = `${commonStaticUrlStart}(.*?)\\.${defaultNSRegexStr}${commonStaticUrlEnd}`

this.regexSubstitution = this._redirectUrl
.replace(urlParts.reverse().join('.'), '\\1') // replace urlParts or CID.
.replace(`/${subdomainPart}/`, '/\\2/') // replace namespace dynamically.

const pathWithSearch = this.originURL.pathname + this.originURL.search
if (pathWithSearch !== '/') {
this.regexSubstitution = this.regexSubstitution.replace(pathWithSearch, '/\\3') // replace path
} else {
this.regexSubstitution += '\\3'
}

// no need to continue, we found a namespace.
break
}

// till we find a namespace or CID, we keep adding subdomains to the staticUrlParts.
staticUrlParts.unshift(subdomainPart)
}

if (this.regexFilter !== origRegexFilter) {
// this means we constructed a regexFilter with dynamic parts, instead of the original regexFilter which was
// static. There might be other suited regexFilters in that case.
this.canHandle = true
}
}
}
}
3 changes: 3 additions & 0 deletions add-on/src/types/global.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
declare module 'is-ipfs' {
function cid (value: string): boolean
}
Loading

0 comments on commit 3013400

Please sign in to comment.