From fd84fddcc06456a4edb44a7c77833b7936cfb555 Mon Sep 17 00:00:00 2001 From: Joe Date: Mon, 2 Oct 2023 17:22:29 -0700 Subject: [PATCH] refactor(matchers): remove NfaMatcher BREAKING CHANGE: The NfaMatcher class has been removed. Use the RegExpMatcher instead. --- docs/reference/README.md | 149 +++++++++--------- docs/reference/classes/DataSet.md | 47 +++--- docs/reference/classes/ParserError.md | 18 +-- docs/reference/classes/PhraseBuilder.md | 26 +-- docs/reference/classes/RegExpMatcher.md | 54 +++---- docs/reference/classes/TextCensor.md | 14 +- docs/reference/enums/SyntaxKind.md | 14 +- docs/reference/interfaces/BlacklistedTerm.md | 2 +- .../CollapseDuplicatesTransformerOptions.md | 6 +- docs/reference/interfaces/LiteralNode.md | 2 +- docs/reference/interfaces/MatchPayload.md | 6 +- docs/reference/interfaces/Matcher.md | 13 +- docs/reference/interfaces/OptionalNode.md | 2 +- docs/reference/interfaces/ParsedPattern.md | 4 +- docs/reference/interfaces/PhraseContainer.md | 8 +- ...sedCollapseDuplicatesTransformerOptions.md | 2 +- .../interfaces/RegExpMatcherOptions.md | 12 +- 17 files changed, 195 insertions(+), 184 deletions(-) diff --git a/docs/reference/README.md b/docs/reference/README.md index 5265257..d712059 100644 --- a/docs/reference/README.md +++ b/docs/reference/README.md @@ -72,7 +72,7 @@ obscenity ### CensorContext -Ƭ **CensorContext**: [`MatchPayload`](interfaces/MatchPayload.md) & { `input`: `string` ; `overlapsAtEnd`: `boolean` ; `overlapsAtStart`: `boolean` } +Ƭ **CensorContext**: [`MatchPayload`](interfaces/MatchPayload.md) & { `input`: `string` ; `overlapsAtEnd`: `boolean` ; `overlapsAtStart`: `boolean` } Context passed to [[TextCensorStrategy | text censoring strategies]]. @@ -80,7 +80,7 @@ Context passed to [[TextCensorStrategy | text censoring strategies]]. [src/censor/TextCensor.ts:104](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/censor/TextCensor.ts#L104) -___ +--- ### CharacterMapping @@ -94,11 +94,11 @@ should be a set of characters that map to the transformed character. [src/transformer/remap-characters/index.ts:60](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/transformer/remap-characters/index.ts#L60) -___ +--- ### EnglishProfaneWord -Ƭ **EnglishProfaneWord**: ``"abbo"`` \| ``"abeed"`` \| ``"africoon"`` \| ``"anal"`` \| ``"anus"`` \| ``"arabush"`` \| ``"arse"`` \| ``"ass"`` \| ``"bastard"`` \| ``"bestiality"`` \| ``"bitch"`` \| ``"blowjob"`` \| ``"boob"`` \| ``"boonga"`` \| ``"buttplug"`` \| ``"chingchong"`` \| ``"chink"`` \| ``"cock"`` \| ``"cuck"`` \| ``"cum"`` \| ``"cunt"`` \| ``"deepthroat"`` \| ``"dick"`` \| ``"dildo"`` \| ``"doggystyle"`` \| ``"double penetration"`` \| ``"ejaculate"`` \| ``"fag"`` \| ``"felch"`` \| ``"fellatio"`` \| ``"finger bang"`` \| ``"fisting"`` \| ``"fuck"`` \| ``"gangbang"`` \| ``"handjob"`` \| ``"hentai"`` \| ``"hooker"`` \| ``"incest"`` \| ``"jerk off"`` \| ``"jizz"`` \| ``"lubejob"`` \| ``"masturbate"`` \| ``"nigger"`` \| ``"orgasm"`` \| ``"orgy"`` \| ``"penis"`` \| ``"porn"`` \| ``"pussy"`` \| ``"rape"`` \| ``"retard"`` \| ``"scat"`` \| ``"semen"`` \| ``"sex"`` \| ``"shit"`` \| ``"slut"`` \| ``"tit"`` \| ``"tranny"`` \| ``"vagina"`` \| ``"whore"`` +Ƭ **EnglishProfaneWord**: `"abbo"` \| `"abeed"` \| `"africoon"` \| `"anal"` \| `"anus"` \| `"arabush"` \| `"arse"` \| `"ass"` \| `"bastard"` \| `"bestiality"` \| `"bitch"` \| `"blowjob"` \| `"boob"` \| `"boonga"` \| `"buttplug"` \| `"chingchong"` \| `"chink"` \| `"cock"` \| `"cuck"` \| `"cum"` \| `"cunt"` \| `"deepthroat"` \| `"dick"` \| `"dildo"` \| `"doggystyle"` \| `"double penetration"` \| `"ejaculate"` \| `"fag"` \| `"felch"` \| `"fellatio"` \| `"finger bang"` \| `"fisting"` \| `"fuck"` \| `"gangbang"` \| `"handjob"` \| `"hentai"` \| `"hooker"` \| `"incest"` \| `"jerk off"` \| `"jizz"` \| `"lubejob"` \| `"masturbate"` \| `"nigger"` \| `"orgasm"` \| `"orgy"` \| `"penis"` \| `"porn"` \| `"pussy"` \| `"rape"` \| `"retard"` \| `"scat"` \| `"semen"` \| `"sex"` \| `"shit"` \| `"slut"` \| `"tit"` \| `"tranny"` \| `"vagina"` \| `"whore"` All the profane words that are included in the [[englishDataset | english dataset]] by default. @@ -106,25 +106,25 @@ All the profane words that are included in the [[englishDataset | english datase [src/preset/english.ts:383](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/preset/english.ts#L383) -___ +--- ### MatchPayloadWithPhraseMetadata -Ƭ **MatchPayloadWithPhraseMetadata**<`MetadataType`\>: [`MatchPayload`](interfaces/MatchPayload.md) & { `phraseMetadata?`: `MetadataType` } +Ƭ **MatchPayloadWithPhraseMetadata**<`MetadataType`\>: [`MatchPayload`](interfaces/MatchPayload.md) & { `phraseMetadata?`: `MetadataType` } Extends the default match payload by adding phrase metadata. #### Type parameters -| Name | -| :------ | +| Name | +| :------------- | | `MetadataType` | #### Defined in [src/dataset/DataSet.ts:190](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/dataset/DataSet.ts#L190) -___ +--- ### Node @@ -136,7 +136,7 @@ All the possible kinds of nodes. [src/pattern/Nodes.ts:24](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/pattern/Nodes.ts#L24) -___ +--- ### TextCensorStrategy @@ -151,8 +151,8 @@ replacement string. ##### Parameters -| Name | Type | -| :------ | :------ | +| Name | Type | +| :---- | :----------------------------------------- | | `ctx` | [`CensorContext`](README.md#censorcontext) | ##### Returns @@ -167,7 +167,7 @@ replacement string. ### englishDataset -• `Const` **englishDataset**: [`DataSet`](classes/DataSet.md)<{ `originalWord`: [`EnglishProfaneWord`](README.md#englishprofaneword) }\> +• `Const` **englishDataset**: [`DataSet`](classes/DataSet.md)<{ `originalWord`: [`EnglishProfaneWord`](README.md#englishprofaneword) }\> A dataset of profane English words. @@ -224,7 +224,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. [src/preset/english.ts:103](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/preset/english.ts#L103) -___ +--- ### englishRecommendedBlacklistMatcherTransformers @@ -237,11 +237,11 @@ A set of transformers to be used when matching blacklisted patterns with the [src/preset/english.ts:13](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/preset/english.ts#L13) -___ +--- ### englishRecommendedTransformers -• `Const` **englishRecommendedTransformers**: `Pick`<[`RegExpMatcherOptions`](interfaces/RegExpMatcherOptions.md), ``"blacklistMatcherTransformers"`` \| ``"whitelistMatcherTransformers"``\> +• `Const` **englishRecommendedTransformers**: `Pick`<[`RegExpMatcherOptions`](interfaces/RegExpMatcherOptions.md), `"blacklistMatcherTransformers"` \| `"whitelistMatcherTransformers"`\> Recommended transformers to be used with the [[englishDataset | english word dataset]] and the [[RegExpMatcher]]. @@ -250,7 +250,7 @@ dataset]] and the [[RegExpMatcher]]. [src/preset/english.ts:48](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/preset/english.ts#L48) -___ +--- ### englishRecommendedWhitelistMatcherTransformers @@ -263,11 +263,11 @@ A set of transformers to be used when matching whitelisted terms with the [src/preset/english.ts:36](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/preset/english.ts#L36) -___ +--- ### version -• `Const` **version**: ``"0.1.0"`` +• `Const` **version**: `"0.1.0"` The current version of the library, formatted as `MAJOR.MINOR.PATCH`. @@ -299,8 +299,8 @@ const matcher = new RegExpMatcher({ #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :--------- | :----------------------------------------------- | :----------------------- | | `patterns` | [`ParsedPattern`](interfaces/ParsedPattern.md)[] | List of parsed patterns. | #### Returns @@ -314,7 +314,7 @@ to the [[RegExpMatcher]]. [src/matcher/BlacklistedTerm.ts:37](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/matcher/BlacklistedTerm.ts#L37) -___ +--- ### asteriskCensorStrategy @@ -341,7 +341,7 @@ A [[TextCensorStrategy]] for use with the [[TextCensor]]. [src/censor/BuiltinStrategies.ts:71](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/censor/BuiltinStrategies.ts#L71) -___ +--- ### collapseDuplicatesTransformer @@ -389,8 +389,8 @@ const matcher = new RegExpMatcher({ ..., blacklistMatcherTransformers: [transfor #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :-------- | :------------------------------------------------------------------------------------------- | :--------------------------- | | `options` | [`CollapseDuplicatesTransformerOptions`](interfaces/CollapseDuplicatesTransformerOptions.md) | Options for the transformer. | #### Returns @@ -404,38 +404,38 @@ A container holding the transformer, which can then be passed to the [src/transformer/collapse-duplicates/index.ts:46](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/transformer/collapse-duplicates/index.ts#L46) -___ +--- ### compareMatchByPositionAndId -▸ **compareMatchByPositionAndId**(`a`, `b`): ``0`` \| ``1`` \| ``-1`` +▸ **compareMatchByPositionAndId**(`a`, `b`): `0` \| `1` \| `-1` Compares two match payloads. If the first match payload's start index is less than the second's, `-1` is - returned; +returned; If the second match payload's start index is less than the first's, `1` is - returned; +returned; If the first match payload's end index is less than the second's, `-1` is - returned; +returned; If the second match payload's end index is less than the first's, `1` is - returned; +returned; If the first match payload's term ID is less than the second's, `-1` is - returned; +returned; If the first match payload's term ID is equal to the second's, `0` is - returned; +returned; Otherwise, `1` is returned. #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | -| `a` | [`MatchPayload`](interfaces/MatchPayload.md) | First match payload. | -| `b` | [`MatchPayload`](interfaces/MatchPayload.md) | Second match payload. | +| Name | Type | Description | +| :--- | :------------------------------------------- | :-------------------- | +| `a` | [`MatchPayload`](interfaces/MatchPayload.md) | First match payload. | +| `b` | [`MatchPayload`](interfaces/MatchPayload.md) | Second match payload. | #### Returns -``0`` \| ``1`` \| ``-1`` +`0` \| `1` \| `-1` The result of the comparison: -1 if the first should sort lower than the second, 0 if they are the same, and 1 if the second should sort lower @@ -445,7 +445,7 @@ than the first. [src/matcher/MatchPayload.ts:57](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/matcher/MatchPayload.ts#L57) -___ +--- ### fixedCharCensorStrategy @@ -465,8 +465,8 @@ const censor = new TextCensor().setStrategy(strategy); #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :----- | :------- | :------------------------------------------------------------------------------------------------------------------------------------------- | | `char` | `string` | String that represents the code point which should be used when generating the replacement string. Must be exactly one code point in length. | #### Returns @@ -479,7 +479,7 @@ A [[TextCensorStrategy]] for use with the [[TextCensor]]. [src/censor/BuiltinStrategies.ts:134](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/censor/BuiltinStrategies.ts#L134) -___ +--- ### fixedPhraseCensorStrategy @@ -509,8 +509,8 @@ const censor = new TextCensor().setStrategy(strategy); #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :------- | :------- | :------------------------- | | `phrase` | `string` | Replacement phrase to use. | #### Returns @@ -523,7 +523,7 @@ A [[TextCensorStrategy]] for use with the [[TextCensor]]. [src/censor/BuiltinStrategies.ts:115](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/censor/BuiltinStrategies.ts#L115) -___ +--- ### grawlixCensorStrategy @@ -552,7 +552,7 @@ A [[TextCensorStrategy]] for use with the [[TextCensor]]. [src/censor/BuiltinStrategies.ts:89](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/censor/BuiltinStrategies.ts#L89) -___ +--- ### keepEndCensorStrategy @@ -572,8 +572,8 @@ const censor = new TextCensor().setStrategy(strategy); #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :------------- | :--------------------------------------------------- | :-------------------------------------------------------------------------------- | | `baseStrategy` | [`TextCensorStrategy`](README.md#textcensorstrategy) | Strategy to extend. It will be used to produce the start of the generated string. | #### Returns @@ -586,7 +586,7 @@ A [[TextCensorStrategy]] for use with the [[TextCensor]]. [src/censor/BuiltinStrategies.ts:51](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/censor/BuiltinStrategies.ts#L51) -___ +--- ### keepStartCensorStrategy @@ -609,7 +609,9 @@ const censor = new TextCensor().setStrategy(strategy); ```typescript // Since keepEndCensorStrategy() returns another text censoring strategy, you can use it // as the base strategy to pass to keepStartCensorStrategy(). -const strategy = keepStartCensorStrategy(keepEndCensorStrategy(asteriskCensorStrategy())); +const strategy = keepStartCensorStrategy( + keepEndCensorStrategy(asteriskCensorStrategy()), +); const censor = new TextCensor().setStrategy(strategy); // Before: 'fuck you' // After: 'f**k you' @@ -617,8 +619,8 @@ const censor = new TextCensor().setStrategy(strategy); #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :------------- | :--------------------------------------------------- | :------------------------------------------------------------------------------ | | `baseStrategy` | [`TextCensorStrategy`](README.md#textcensorstrategy) | Strategy to extend. It will be used to produce the end of the generated string. | #### Returns @@ -631,7 +633,7 @@ A [[TextCensorStrategy]] for use with the [[TextCensor]]. [src/censor/BuiltinStrategies.ts:28](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/censor/BuiltinStrategies.ts#L28) -___ +--- ### parseRawPattern @@ -651,8 +653,8 @@ pattern. #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :-------- | :------- | :------------------- | | `pattern` | `string` | The string to parse. | #### Returns @@ -666,7 +668,7 @@ The parsed pattern, which can then be used with the [src/pattern/Pattern.ts:130](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/pattern/Pattern.ts#L130) -___ +--- ### pattern @@ -696,6 +698,7 @@ However, there are several constructs that have special meaning: A word character is an lower-case or upper-case ASCII alphabet character or an ASCII digit. + - In a literal, a backslash may be used to **escape** one of the meta-characters mentioned above so that it does match literally: `\\[` matches `[`, and does not mark the start of an optional expression. @@ -720,6 +723,7 @@ However, there are several constructs that have special meaning: - `d?ude` matches `d`, then any character, then `ude`. All of the following strings are matched by this pattern: + - `dyude` - `d;ude` - `d!ude` @@ -728,6 +732,7 @@ However, there are several constructs that have special meaning: string `hello`. The set of strings it matches is equal to the union of the set of strings that the two patterns `hello` and `h?ello` match. All of the following strings are matched by this pattern: + - `hello` - `h!ello` - `h;ello` @@ -783,10 +788,10 @@ using a template tag. #### Parameters -| Name | Type | -| :------ | :------ | -| `strings` | `TemplateStringsArray` | -| `...expressions` | `unknown`[] | +| Name | Type | +| :--------------- | :--------------------- | +| `strings` | `TemplateStringsArray` | +| `...expressions` | `unknown`[] | #### Returns @@ -799,7 +804,7 @@ The parsed pattern, which can then be used with the [src/pattern/Pattern.ts:106](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/pattern/Pattern.ts#L106) -___ +--- ### randomCharFromSetCensorStrategy @@ -819,8 +824,8 @@ const censor = new TextCensor().setStrategy(strategy); #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :-------- | :------- | :-------------------------------------------------------------------------------------------- | | `charset` | `string` | Set of characters from which the replacement string should be constructed. Must not be empty. | #### Returns @@ -833,7 +838,7 @@ A [[TextCensorStrategy]] for use with the [[TextCensor]]. [src/censor/BuiltinStrategies.ts:155](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/censor/BuiltinStrategies.ts#L155) -___ +--- ### remapCharactersTransformer @@ -873,13 +878,13 @@ const matcher = new RegExpMatcher({ ..., blacklistMatcherTransformers: [transfor **`See`** - - [[resolveConfusablesTransformer| Transformer that handles confusable Unicode characters]] - - [[resolveLeetSpeakTransformer | Transformer that handles leet-speak]] +- [[resolveConfusablesTransformer| Transformer that handles confusable Unicode characters]] +- [[resolveLeetSpeakTransformer | Transformer that handles leet-speak]] #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :-------- | :----------------------------------------------- | :------------------------------------------------- | | `mapping` | [`CharacterMapping`](README.md#charactermapping) | A map/object mapping certain characters to others. | #### Returns @@ -893,7 +898,7 @@ A container holding the transformer, which can then be passed to the [src/transformer/remap-characters/index.ts:38](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/transformer/remap-characters/index.ts#L38) -___ +--- ### resolveConfusablesTransformer @@ -926,7 +931,7 @@ A container holding the transformer, which can then be passed to the [src/transformer/resolve-confusables/index.ts:22](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/transformer/resolve-confusables/index.ts#L22) -___ +--- ### resolveLeetSpeakTransformer @@ -960,7 +965,7 @@ A container holding the transformer, which can then be passed to the [src/transformer/resolve-leetspeak/index.ts:23](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/transformer/resolve-leetspeak/index.ts#L23) -___ +--- ### skipNonAlphabeticTransformer @@ -1002,7 +1007,7 @@ A container holding the transformer, which can then be passed to the [src/transformer/skip-non-alphabetic/index.ts:31](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/transformer/skip-non-alphabetic/index.ts#L31) -___ +--- ### toAsciiLowerCaseTransformer diff --git a/docs/reference/classes/DataSet.md b/docs/reference/classes/DataSet.md index 56140c4..d8aa7ea 100644 --- a/docs/reference/classes/DataSet.md +++ b/docs/reference/classes/DataSet.md @@ -7,8 +7,8 @@ associating metadata with them. ## Type parameters -| Name | Description | -| :------ | :------ | +| Name | Description | +| :------------- | :----------------------------------------------------------------------------- | | `MetadataType` | Metadata type for phrases. Note that the metadata type is implicitly nullable. | ## Table of contents @@ -33,8 +33,8 @@ associating metadata with them. #### Type parameters -| Name | -| :------ | +| Name | +| :------------- | | `MetadataType` | ## Methods @@ -53,8 +53,8 @@ const customDataset = new DataSet().addAll(englishDataset); #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :------ | :--------------------------------------- | :------------- | | `other` | [`DataSet`](DataSet.md)<`MetadataType`\> | Other dataset. | #### Returns @@ -65,7 +65,7 @@ const customDataset = new DataSet().addAll(englishDataset); [src/dataset/DataSet.ts:29](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/dataset/DataSet.ts#L29) -___ +--- ### addPhrase @@ -77,17 +77,20 @@ Adds a phrase to this dataset. ```typescript const data = new DataSet<{ originalWord: string }>() - .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'fuck' }) - .addPattern(pattern`fuck`) - .addPattern(pattern`f[?]ck`) - .addWhitelistedTerm('Afck')) + .addPhrase((phrase) => + phrase + .setMetadata({ originalWord: 'fuck' }) + .addPattern(pattern`fuck`) + .addPattern(pattern`f[?]ck`) + .addWhitelistedTerm('Afck'), + ) .build(); ``` #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :--- | :------------------------------------------------------------------------------------------------------------------------ | :--------------------------------------------------------------------------------------------------------- | | `fn` | (`builder`: [`PhraseBuilder`](PhraseBuilder.md)<`MetadataType`\>) => [`PhraseBuilder`](PhraseBuilder.md)<`MetadataType`\> | A function that takes a [[PhraseBuilder]], adds patterns/whitelisted terms/metadata to it, and returns it. | #### Returns @@ -98,11 +101,11 @@ const data = new DataSet<{ originalWord: string }>() [src/dataset/DataSet.ts:75](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/dataset/DataSet.ts#L75) -___ +--- ### build -▸ **build**(): `Pick`<[`RegExpMatcherOptions`](../interfaces/RegExpMatcherOptions.md), ``"blacklistedTerms"`` \| ``"whitelistedTerms"``\> +▸ **build**(): `Pick`<[`RegExpMatcherOptions`](../interfaces/RegExpMatcherOptions.md), `"blacklistedTerms"` \| `"whitelistedTerms"`\> Returns the dataset in a format suitable for usage with the [[RegExpMatcher]]. @@ -118,13 +121,13 @@ const matcher = new RegExpMatcher({ #### Returns -`Pick`<[`RegExpMatcherOptions`](../interfaces/RegExpMatcherOptions.md), ``"blacklistedTerms"`` \| ``"whitelistedTerms"``\> +`Pick`<[`RegExpMatcherOptions`](../interfaces/RegExpMatcherOptions.md), `"blacklistedTerms"` \| `"whitelistedTerms"`\> #### Defined in [src/dataset/DataSet.ts:118](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/dataset/DataSet.ts#L118) -___ +--- ### getPayloadWithPhraseMetadata @@ -144,8 +147,8 @@ const phraseMetadata = matchesWithPhraseMetadata[0].phraseMetadata; #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :-------- | :---------------------------------------------- | :---------------------- | | `payload` | [`MatchPayload`](../interfaces/MatchPayload.md) | Original match payload. | #### Returns @@ -156,7 +159,7 @@ const phraseMetadata = matchesWithPhraseMetadata[0].phraseMetadata; [src/dataset/DataSet.ts:94](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/dataset/DataSet.ts#L94) -___ +--- ### removePhrasesIf @@ -174,8 +177,8 @@ const customDataset = new DataSet<{ originalWord: string }>() #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :---------- | :---------------------------------------------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------- | | `predicate` | (`phrase`: [`PhraseContainer`](../interfaces/PhraseContainer.md)<`MetadataType`\>) => `boolean` | A predicate that determines whether or not a phrase should be removed. Return `true` to remove, `false` to keep. | #### Returns diff --git a/docs/reference/classes/ParserError.md b/docs/reference/classes/ParserError.md index 85b8e29..4d7cc1c 100644 --- a/docs/reference/classes/ParserError.md +++ b/docs/reference/classes/ParserError.md @@ -32,11 +32,11 @@ Custom error thrown by the parser when syntactical errors are detected. #### Parameters -| Name | Type | -| :------ | :------ | +| Name | Type | +| :-------- | :------- | | `message` | `string` | -| `line` | `number` | -| `column` | `number` | +| `line` | `number` | +| `column` | `number` | #### Overrides @@ -59,7 +59,7 @@ Note that surrogate pairs are counted as 1 column wide, not 2. [src/pattern/ParserError.ts:16](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/pattern/ParserError.ts#L16) -___ +--- ### line @@ -71,7 +71,7 @@ The line on which the error occurred (one-based). [src/pattern/ParserError.ts:10](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/pattern/ParserError.ts#L10) -___ +--- ### message @@ -85,11 +85,11 @@ Error.message node_modules/.pnpm/typescript@5.2.2/node_modules/typescript/lib/lib.es5.d.ts:1068 -___ +--- ### name -• `Readonly` **name**: ``"ParserError"`` +• `Readonly` **name**: `"ParserError"` #### Overrides @@ -99,7 +99,7 @@ Error.name [src/pattern/ParserError.ts:5](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/pattern/ParserError.ts#L5) -___ +--- ### stack diff --git a/docs/reference/classes/PhraseBuilder.md b/docs/reference/classes/PhraseBuilder.md index a085dc3..0344144 100644 --- a/docs/reference/classes/PhraseBuilder.md +++ b/docs/reference/classes/PhraseBuilder.md @@ -6,8 +6,8 @@ Builder for phrases. ## Type parameters -| Name | -| :------ | +| Name | +| :------------- | | `MetadataType` | ## Table of contents @@ -31,8 +31,8 @@ Builder for phrases. #### Type parameters -| Name | -| :------ | +| Name | +| :------------- | | `MetadataType` | ## Methods @@ -45,8 +45,8 @@ Associates a pattern with this phrase. #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :-------- | :------------------------------------------------ | :-------------- | | `pattern` | [`ParsedPattern`](../interfaces/ParsedPattern.md) | Pattern to add. | #### Returns @@ -57,7 +57,7 @@ Associates a pattern with this phrase. [src/dataset/DataSet.ts:149](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/dataset/DataSet.ts#L149) -___ +--- ### addWhitelistedTerm @@ -67,8 +67,8 @@ Associates a whitelisted pattern with this phrase. #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :----- | :------- | :----------------------- | | `term` | `string` | Whitelisted term to add. | #### Returns @@ -79,7 +79,7 @@ Associates a whitelisted pattern with this phrase. [src/dataset/DataSet.ts:159](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/dataset/DataSet.ts#L159) -___ +--- ### build @@ -96,7 +96,7 @@ Builds the phrase, returning a [[PhraseContainer]] for use with the [src/dataset/DataSet.ts:178](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/dataset/DataSet.ts#L178) -___ +--- ### setMetadata @@ -106,8 +106,8 @@ Associates some metadata with this phrase. #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :---------- | :------------- | :--------------- | | `metadata?` | `MetadataType` | Metadata to use. | #### Returns diff --git a/docs/reference/classes/RegExpMatcher.md b/docs/reference/classes/RegExpMatcher.md index 659aa1c..49e94b7 100644 --- a/docs/reference/classes/RegExpMatcher.md +++ b/docs/reference/classes/RegExpMatcher.md @@ -43,12 +43,12 @@ const matcher = new RegExpMatcher({ ```typescript // Simple matcher that only has blacklisted patterns. const matcher = new RegExpMatcher({ - blacklistedTerms: assignIncrementingIds([ - pattern`fuck`, - pattern`f?uck`, // wildcards (?) - pattern`bitch`, - pattern`b[i]tch` // optionals ([i] matches either "i" or "") - ]), + blacklistedTerms: assignIncrementingIds([ + pattern`fuck`, + pattern`f?uck`, // wildcards (?) + pattern`bitch`, + pattern`b[i]tch`, // optionals ([i] matches either "i" or "") + ]), }); // Check whether some string matches any of the patterns. @@ -60,18 +60,18 @@ const doesMatch = matcher.hasMatch('fuck you bitch'); ```typescript // A more advanced example, with transformers and whitelisted terms. const matcher = new RegExpMatcher({ - blacklistedTerms: [ - { id: 1, pattern: pattern`penis` }, - { id: 2, pattern: pattern`fuck` }, - ], - whitelistedTerms: ['pen is'], - blacklistMatcherTransformers: [ - resolveConfusablesTransformer(), // '🅰' => 'a' - resolveLeetSpeakTransformer(), // '$' => 's' - foldAsciiCharCaseTransformer(), // case insensitive matching - skipNonAlphabeticTransformer(), // 'f.u...c.k' => 'fuck' - collapseDuplicatesTransformer(), // 'aaaa' => 'a' - ], + blacklistedTerms: [ + { id: 1, pattern: pattern`penis` }, + { id: 2, pattern: pattern`fuck` }, + ], + whitelistedTerms: ['pen is'], + blacklistMatcherTransformers: [ + resolveConfusablesTransformer(), // '🅰' => 'a' + resolveLeetSpeakTransformer(), // '$' => 's' + foldAsciiCharCaseTransformer(), // case insensitive matching + skipNonAlphabeticTransformer(), // 'f.u...c.k' => 'fuck' + collapseDuplicatesTransformer(), // 'aaaa' => 'a' + ], }); // Output all matches. @@ -80,8 +80,8 @@ console.log(matcher.getAllMatches('fu.....uuuuCK the pen is mightier than the sw #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :-------- | :-------------------------------------------------------------- | :-------------- | | `options` | [`RegExpMatcherOptions`](../interfaces/RegExpMatcherOptions.md) | Options to use. | #### Defined in @@ -102,10 +102,10 @@ which is typically more efficient. #### Parameters -| Name | Type | Default value | Description | -| :------ | :------ | :------ | :------ | -| `input` | `string` | `undefined` | Text to find profanities in. | -| `sorted` | `boolean` | `false` | Whether the resulting list of matches should be sorted using [[compareMatchByPositionAndId]]. Defaults to `false`. | +| Name | Type | Default value | Description | +| :------- | :-------- | :------------ | :----------------------------------------------------------------------------------------------------------------- | +| `input` | `string` | `undefined` | Text to find profanities in. | +| `sorted` | `boolean` | `false` | Whether the resulting list of matches should be sorted using [[compareMatchByPositionAndId]]. Defaults to `false`. | #### Returns @@ -123,7 +123,7 @@ otherwise, their order is unspecified. [src/matcher/regexp/RegExpMatcher.ts:86](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/matcher/regexp/RegExpMatcher.ts#L86) -___ +--- ### hasMatch @@ -136,8 +136,8 @@ checking the result, though it depends on the implementation. #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :------ | :------- | :------------- | | `input` | `string` | Text to check. | #### Returns diff --git a/docs/reference/classes/TextCensor.md b/docs/reference/classes/TextCensor.md index d17fd82..b9d5b9f 100644 --- a/docs/reference/classes/TextCensor.md +++ b/docs/reference/classes/TextCensor.md @@ -45,9 +45,9 @@ replaced, following the rules below: #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | -| `input` | `string` | Input text. | +| Name | Type | Description | +| :-------- | :------------------------------------------------ | :----------------- | +| `input` | `string` | Input text. | | `matches` | [`MatchPayload`](../interfaces/MatchPayload.md)[] | A list of matches. | #### Returns @@ -60,7 +60,7 @@ The censored text. [src/censor/TextCensor.ts:66](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/censor/TextCensor.ts#L66) -___ +--- ### setStrategy @@ -72,6 +72,7 @@ replacement text for regions of the text that should be censored. The default censoring strategy is the [[grawlixCensorStrategy]], generating text like `$%@*`. There are several other built-in strategies available: + - [[keepStartCensorStrategy]] - extends another strategy and keeps the first character matched, e.g. `f***`. - [[keepEndCensorStrategy]] - extends another strategy and keeps the last @@ -86,6 +87,7 @@ create your own. To ease creation of common censoring strategies, we provide a number of utility functions: + - [[fixedPhraseCensorStrategy]] - generates a fixed phrase, e.g. `fudge`. - [[fixedCharCensorStrategy]] - generates replacement strings constructed from the character given, repeated as many times as needed. @@ -94,8 +96,8 @@ utility functions: #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :--------- | :------------------------------------------------------ | :------------------------------ | | `strategy` | [`TextCensorStrategy`](../README.md#textcensorstrategy) | Text censoring strategy to use. | #### Returns diff --git a/docs/reference/enums/SyntaxKind.md b/docs/reference/enums/SyntaxKind.md index 3b468c1..2085eda 100644 --- a/docs/reference/enums/SyntaxKind.md +++ b/docs/reference/enums/SyntaxKind.md @@ -17,37 +17,37 @@ An enumeration of the kinds of nodes there are. ### BoundaryAssertion -• **BoundaryAssertion** = ``3`` +• **BoundaryAssertion** = `3` #### Defined in [src/pattern/Nodes.ts:33](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/pattern/Nodes.ts#L33) -___ +--- ### Literal -• **Literal** = ``2`` +• **Literal** = `2` #### Defined in [src/pattern/Nodes.ts:32](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/pattern/Nodes.ts#L32) -___ +--- ### Optional -• **Optional** = ``0`` +• **Optional** = `0` #### Defined in [src/pattern/Nodes.ts:30](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/pattern/Nodes.ts#L30) -___ +--- ### Wildcard -• **Wildcard** = ``1`` +• **Wildcard** = `1` #### Defined in diff --git a/docs/reference/interfaces/BlacklistedTerm.md b/docs/reference/interfaces/BlacklistedTerm.md index 8008a67..670e32b 100644 --- a/docs/reference/interfaces/BlacklistedTerm.md +++ b/docs/reference/interfaces/BlacklistedTerm.md @@ -23,7 +23,7 @@ The identifier of the pattern; should be unique across all patterns. [src/matcher/BlacklistedTerm.ts:10](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/matcher/BlacklistedTerm.ts#L10) -___ +--- ### pattern diff --git a/docs/reference/interfaces/CollapseDuplicatesTransformerOptions.md b/docs/reference/interfaces/CollapseDuplicatesTransformerOptions.md index ca2cc4a..fe17f17 100644 --- a/docs/reference/interfaces/CollapseDuplicatesTransformerOptions.md +++ b/docs/reference/interfaces/CollapseDuplicatesTransformerOptions.md @@ -32,14 +32,14 @@ threshold to be `2`, with the effect of leaving `book` unchanged. **`Default`** ```ts -new Map() +new Map(); ``` #### Defined in [src/transformer/collapse-duplicates/index.ts:91](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/transformer/collapse-duplicates/index.ts#L91) -___ +--- ### defaultThreshold @@ -54,7 +54,7 @@ would be transformed to `aa`. **`Default`** ```ts -1 +1; ``` #### Defined in diff --git a/docs/reference/interfaces/LiteralNode.md b/docs/reference/interfaces/LiteralNode.md index b6a7d57..bc82db7 100644 --- a/docs/reference/interfaces/LiteralNode.md +++ b/docs/reference/interfaces/LiteralNode.md @@ -23,7 +23,7 @@ The code points that this literal matches. [src/pattern/Nodes.ts:63](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/pattern/Nodes.ts#L63) -___ +--- ### kind diff --git a/docs/reference/interfaces/MatchPayload.md b/docs/reference/interfaces/MatchPayload.md index 399f956..cbed842 100644 --- a/docs/reference/interfaces/MatchPayload.md +++ b/docs/reference/interfaces/MatchPayload.md @@ -31,7 +31,7 @@ then this points to the index of the low surrogate. [src/matcher/MatchPayload.ts:16](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/matcher/MatchPayload.ts#L16) -___ +--- ### matchLength @@ -43,7 +43,7 @@ Total number of of code points that matched. [src/matcher/MatchPayload.ts:21](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/matcher/MatchPayload.ts#L21) -___ +--- ### startIndex @@ -55,7 +55,7 @@ Start index of the match, inclusive. [src/matcher/MatchPayload.ts:26](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/matcher/MatchPayload.ts#L26) -___ +--- ### termId diff --git a/docs/reference/interfaces/Matcher.md b/docs/reference/interfaces/Matcher.md index ead9e9c..af82e3d 100644 --- a/docs/reference/interfaces/Matcher.md +++ b/docs/reference/interfaces/Matcher.md @@ -6,6 +6,7 @@ Searches for blacklisted terms in text, ignoring parts matched by whitelisted terms. See: + - [[RegExpMatcher]] for an implementation using regular expressions. ## Implemented by @@ -33,9 +34,9 @@ which is typically more efficient. #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | -| `input` | `string` | Text to find profanities in. | +| Name | Type | Description | +| :-------- | :-------- | :----------------------------------------------------------------------------------------------------------------- | +| `input` | `string` | Text to find profanities in. | | `sorted?` | `boolean` | Whether the resulting list of matches should be sorted using [[compareMatchByPositionAndId]]. Defaults to `false`. | #### Returns @@ -50,7 +51,7 @@ otherwise, their order is unspecified. [src/matcher/Matcher.ts:25](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/matcher/Matcher.ts#L25) -___ +--- ### hasMatch @@ -63,8 +64,8 @@ checking the result, though it depends on the implementation. #### Parameters -| Name | Type | Description | -| :------ | :------ | :------ | +| Name | Type | Description | +| :------ | :------- | :------------- | | `input` | `string` | Text to check. | #### Returns diff --git a/docs/reference/interfaces/OptionalNode.md b/docs/reference/interfaces/OptionalNode.md index a8fc9c9..4fc1bea 100644 --- a/docs/reference/interfaces/OptionalNode.md +++ b/docs/reference/interfaces/OptionalNode.md @@ -24,7 +24,7 @@ would be a literal node with the value `abc`. [src/pattern/Nodes.ts:44](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/pattern/Nodes.ts#L44) -___ +--- ### kind diff --git a/docs/reference/interfaces/ParsedPattern.md b/docs/reference/interfaces/ParsedPattern.md index 29f3881..7fc9f45 100644 --- a/docs/reference/interfaces/ParsedPattern.md +++ b/docs/reference/interfaces/ParsedPattern.md @@ -24,7 +24,7 @@ A list of nodes which make up the pattern. [src/pattern/Nodes.ts:8](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/pattern/Nodes.ts#L8) -___ +--- ### requireWordBoundaryAtEnd @@ -36,7 +36,7 @@ Whether the pattern requires a word boundary at the end. [src/pattern/Nodes.ts:13](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/pattern/Nodes.ts#L13) -___ +--- ### requireWordBoundaryAtStart diff --git a/docs/reference/interfaces/PhraseContainer.md b/docs/reference/interfaces/PhraseContainer.md index fc810d2..4467ed3 100644 --- a/docs/reference/interfaces/PhraseContainer.md +++ b/docs/reference/interfaces/PhraseContainer.md @@ -6,8 +6,8 @@ Represents a phrase. ## Type parameters -| Name | -| :------ | +| Name | +| :------------- | | `MetadataType` | ## Table of contents @@ -30,7 +30,7 @@ Metadata associated with this phrase. [src/dataset/DataSet.ts:204](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/dataset/DataSet.ts#L204) -___ +--- ### patterns @@ -42,7 +42,7 @@ Patterns associated with this phrase. [src/dataset/DataSet.ts:209](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/dataset/DataSet.ts#L209) -___ +--- ### whitelistedTerms diff --git a/docs/reference/interfaces/ProcessedCollapseDuplicatesTransformerOptions.md b/docs/reference/interfaces/ProcessedCollapseDuplicatesTransformerOptions.md index c9ede12..e0a0ba6 100644 --- a/docs/reference/interfaces/ProcessedCollapseDuplicatesTransformerOptions.md +++ b/docs/reference/interfaces/ProcessedCollapseDuplicatesTransformerOptions.md @@ -19,7 +19,7 @@ [src/transformer/collapse-duplicates/index.ts:68](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/transformer/collapse-duplicates/index.ts#L68) -___ +--- ### defaultThreshold diff --git a/docs/reference/interfaces/RegExpMatcherOptions.md b/docs/reference/interfaces/RegExpMatcherOptions.md index 2edad81..7bf791d 100644 --- a/docs/reference/interfaces/RegExpMatcherOptions.md +++ b/docs/reference/interfaces/RegExpMatcherOptions.md @@ -28,14 +28,14 @@ Transformers will be applied in the order they appear. **`Default`** ```ts -[] +[]; ``` #### Defined in [src/matcher/regexp/RegExpMatcher.ts:220](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/matcher/regexp/RegExpMatcher.ts#L220) -___ +--- ### blacklistedTerms @@ -47,7 +47,7 @@ A list of blacklisted terms. [src/matcher/regexp/RegExpMatcher.ts:225](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/matcher/regexp/RegExpMatcher.ts#L225) -___ +--- ### whitelistMatcherTransformers @@ -62,14 +62,14 @@ Transformers will be applied in the order they appear. **`Default`** ```ts -[] +[]; ``` #### Defined in [src/matcher/regexp/RegExpMatcher.ts:236](https://github.com/jo3-l/obscenity/blob/1a1f44e/src/matcher/regexp/RegExpMatcher.ts#L236) -___ +--- ### whitelistedTerms @@ -86,7 +86,7 @@ than the sword.` **`Default`** ```ts -[] +[]; ``` #### Defined in