Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 121 additions & 18 deletions src/components/wiki.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { BotComponent } from "../bot-component.js";
import { Wheatley } from "../wheatley.js";
import { EarlyReplyMode, TextBasedCommandBuilder } from "../command-abstractions/text-based-command-builder.js";
import { TextBasedCommand } from "../command-abstractions/text-based-command.js";
import { index_of_first_unescaped } from "../utils/strings.js";

export const wiki_dir = "wiki/articles";

Expand Down Expand Up @@ -46,7 +47,6 @@ enum parse_state {

const image_regex = /!\[[^\]]*]\(([^)]*)\)/;
const reference_definition_regex = /\s*\[([^\]]*)]: (.+)/;
const reference_link_regex = /\[([^\]]*)]\[([^\]]*)]/g;

class ArticleParser {
private readonly aliases = new Set<string>();
Expand Down Expand Up @@ -216,7 +216,7 @@ class ArticleParser {
}

/**
* Substitutes placeholders such as <br> or reference-style links in the
* Substitutes placeholders such as `<br>` or reference-style links in the
* string, but only outside inline code.
* @param line the line, possibly containing backticks for inline code
*/
Expand All @@ -226,24 +226,124 @@ class ArticleParser {
}
let result = "";
let piece = "";
let in_inline_code = false;
let prev = "";
for (const c of line) {
if (c === "`") {
if (in_inline_code) {
result += piece + c;
piece = "";
let after_escape = false;
for (let i = 0; i < line.length; i++) {
const c = line[i];
if (after_escape) {
// Re-inserting the backslash may seem pointless, but remember that we are passing
// this Markdown through to Discord, which needs the same escapes as we do.
// We still need to be escape-aware so that we know where inline code begins/ends.
piece += "\\";
piece += c;
after_escape = false;
} else if (c === "\\") {
after_escape = true;
} else if (c === "`") {
result += this.substitute_placeholders_no_code(piece);
const end_of_inline_code = index_of_first_unescaped(line, "`", i + 1);
if (end_of_inline_code === null) {
return result + line.substring(i);
}
result += line.substring(i, end_of_inline_code + 1);
i = end_of_inline_code;
piece = "";
} else if (c === "[") {
const masked_link = this.substitute_placeholders_in_link_mask(line, i);
if (masked_link.consumed_length === 0) {
piece += c;
} else {
result += this.substitute_placeholders_no_code(piece);
piece = c;
result += masked_link.result;
piece = "";
// -1 because is already incremented unconditionall each iteration
i += masked_link.consumed_length - 1;
}
in_inline_code = prev !== "\\" ? !in_inline_code : in_inline_code;
} else {
piece += c;
}
prev = c;
}
return result + (in_inline_code ? piece : this.substitute_placeholders_no_code(piece));
return result + this.substitute_placeholders_no_code(piece);
}

/**
* Substitutes placeholders in what could be a link mask, such as `[mask][ref]`.
* Note that the mask can contain inline code.
* If the string turns out not to be a link mask (e.g. `[bla.`), substitution takes place
* as usual.
* @param line the remainder of the line, where the first character
* @param start the start index on the line
* shall be `[`
* @return An object containing
* - the `result` string (may be empty string) of the substitution, and
* - the `consumed_length` within the original string (may be zero).
*/
private substitute_placeholders_in_link_mask(line: string, start: number) {
assert(line[start] === "[");

let after_escape = false;
let square_brackets_level = 0;

for (let i = start; i < line.length; i++) {
if (after_escape) {
after_escape = false;
} else if (line[i] === "\\") {
after_escape = true;
} else if (line[i] === "`") {
const end_of_inline_code = index_of_first_unescaped(line, "`", i + 1);
if (end_of_inline_code === null) {
// Unterminated masked link with inline code, such as [`abc
return { result: "", consumed_length: 0 };
}
i = end_of_inline_code;
} else if (line[i] == "[") {
square_brackets_level++;
} else if (line[i] === "]") {
assert(square_brackets_level > 0);
if (--square_brackets_level > 0) {
continue;
}
// Links of the form [mask](url).
// Discord supports these natively, so no processing is required.
if (line[i + 1] === "(") {
const end = index_of_first_unescaped(line, ")", i + 1);
assert(end !== null, "Masked link with unterminated URL found");
return {
result: line.substring(start, end + 1),
consumed_length: end + 1 - start,
};
}

const mask = line.substring(start, i + 1);

// Links of the form [mask][ref], transformed into [mask](url).
if (line[i + 1] === "[") {
const end = index_of_first_unescaped(line, "]", i + 2);
// We could technically allow this and just render it as [mask][,
// or try to interpret it like [mask][mask][.
// However, such links are almost certainly unintentional, so let's error.
// The same reasoning applies to [mask]( below.
assert(end !== null, "Masked link with unterminated reference found");
const ref = line.substring(i + 2, end);
return {
result: `${mask}(${this.get_referenced_link(ref)})`,
consumed_length: end + 1 - start,
};
}

// Links of the form [mask], equivalent to [mask][mask].
const ref = line.substring(start + 1, i);
return {
result: `${mask}(${this.get_referenced_link(ref)})`,
consumed_length: i + 1 - start,
};
}
}

// Reaching this point implies that we have an unterminated mask on the current line,
// i.e. an opening [ with no closing ].
// We could error, but this isn't fatal and may even be intentional if the user simply
// wants to put things into square brackets, spanning a few lines.
return { result: "", consumed_length: 0 };
}

private substitute_emojis(str: string) {
Expand All @@ -257,23 +357,26 @@ class ArticleParser {
/**
* Substitutes placeholders in a string with no backticks, i.e. no
* possibility of having inline code.
* Masked links should also have been processed at an earlier stage.
* @param str the string to substitute in
*/
private substitute_placeholders_no_code(str: string): string {
const freestanding_result = this.substitute_emojis(str)
.replace(/<br>\n|<br\/>\n/, "\n")
.replaceAll(/<br>|<br\/>/g, "\n")
.replaceAll(reference_link_regex, (_, text: string, ref: string) => {
assert(this.reference_definitions.has(ref), "Unknown reference in reference-style link");
return `[${text}](${this.reference_definitions.get(ref)})`;
});
.replaceAll(/<br>|<br\/>/g, "\n");
return this.wheatley.freestanding
? freestanding_result
: freestanding_result
.replaceAll(/#resources(?![a-zA-Z0-9_])/g, `<#${this.wheatley.channels.resources.id}>`)
.replaceAll(/#rules(?![a-zA-Z0-9_])/g, `<#${this.wheatley.channels.rules.id}>`);
}

private get_referenced_link(ref: string): string {
const result = this.reference_definitions.get(ref);
assert(result !== undefined, `Unknown reference "${ref}" in reference-style link`);
return result;
}

private collect_references(lines: string[]) {
for (const line of lines) {
if (line.match(reference_definition_regex)) {
Expand Down
26 changes: 26 additions & 0 deletions src/utils/strings.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import * as Discord from "discord.js";
import XXH from "xxhashjs";
import { strict as assert } from "assert";
import { DAY, HOUR, MINUTE, MONTH, YEAR } from "../common.js";
import { round, unwrap } from "./misc.js";
import { remove } from "./arrays.js";
Expand Down Expand Up @@ -137,3 +138,28 @@ export function debug_unicode(str: string) {
export function truncate(str: string, length: number) {
return str.length <= length ? str : str.slice(0, length - 3) + "...";
}

/**
* Searches for a `c` character in `str` and returns its index,
* but respects backslash (`\`) escape characters.
* For example, when searching for `"` in `\""`, the result is the index of the second `"`.
* @param str the string to search in
* @param c the single-character string to search for
* @param start the start index, or `0` by default
* @return the index of the first unescaped occurrence of `c`, or `null` if none could be found
*/
export function index_of_first_unescaped(str: string, c: string, start: number = 0) {
assert(c.length === 1, "terminator must be single character");
let after_escape = false;

for (let i = start; i < str.length; i++) {
if (after_escape) {
after_escape = false;
} else if (str[i] === "\\") {
after_escape = true;
} else if (str[i] === c) {
return i;
}
}
return null;
}