From 09dd68936da7c7f54977bd2b659c74d83e31d809 Mon Sep 17 00:00:00 2001 From: uri Date: Thu, 25 Jan 2024 16:04:21 +0200 Subject: [PATCH] improve-regexp --- src/index.ts | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/index.ts b/src/index.ts index bbc724d..67e2c4a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -261,7 +261,10 @@ export const stringToRegexp = (x: string) => export const caseInsensitive = addFlag("i"); export const regExpOr = (x: RegExp, y: RegExp) => - new RegExp(`(?:${x.source}|${y.source})`, combineFlags(x, y)); + new RegExp( + `(?:${bracketIfNeeded(x.source)}|${bracketIfNeeded(y.source)})`, + combineFlags(x, y), + ); export const selectionGroup = (x: RegExp) => new RegExp(`(${x.source})`, x.flags); @@ -277,13 +280,21 @@ const optional = (x: RegExp) => export const zeroOrMore = (x: RegExp) => new RegExp(`${bracketIfNeeded(x.source)}*`, x.flags); + export const oneOrMore = (x: RegExp) => new RegExp(`${bracketIfNeeded(x.source)}+`, x.flags); + export const globalize = addFlag("g"); -const personName = [zeroOrMore(/'?[A-Z][\w-]*\.?'?\s/), /[\w-]+/].reduce( - concatRegexp, -); +const speakerTitle = [/ms\./, /mrs\./, /mr\./, /dr\./] + .map(caseInsensitive) + .reduce(regExpOr); + +const personName = [ + optional(concatRegexp(speakerTitle, /\s/)), + zeroOrMore(/'?[A-Z][\w-]*\.?'?\s/), + /[\w-]+/, +].reduce(concatRegexp); const hyphen = /[―-]/; @@ -295,7 +306,10 @@ const speaker = globalize( const speakerInEnd = [hyphen, /\s*/, personName, /$/].reduce(concatRegexp); -const splitSentences = split(/(?=[!.])/); +export const negativeLookBehind = (x: RegExp) => + new RegExp(`(?