-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathfileformat.ts
64 lines (59 loc) · 2.02 KB
/
fileformat.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import type { Predicate } from "@core/unknownutil/type";
import { isLiteralOneOf } from "@core/unknownutil/is/literal-one-of";
export type FileFormat = "unix" | "dos" | "mac";
/**
* Predicate that the value is FileFormat.
*/
export const isFileFormat: Predicate<FileFormat> = isLiteralOneOf(
["unix", "dos", "mac"] as const,
);
const fileFormatDelimiters = {
unix: "\n",
dos: "\r\n",
mac: "\r",
};
/**
* Split text as Text File in POSIX.
*/
export function splitText(text: string, fileformat: FileFormat): string[] {
const delimiter = fileFormatDelimiters[fileformat];
const items = text.split(delimiter);
// NOTE:
//
// "Text File" in POSIX definition said that text file ends with newline thus we need to remove
// the last empty item from `lines`.
//
// > 3.206 Line
// > A sequence of zero or more non- <newline> characters plus a terminating <newline> character.
// >
// > 3.403 Text File
// > A file that contains characters organized into zero or more lines. The lines do not contain NUL
// > characters and none can exceed {LINE_MAX} bytes in length, including the <newline> character.
// > Although POSIX.1-2017 does not distinguish between text files and binary files (see the ISO C
// > standard), many utilities only produce predictable or meaningful output when operating on text
// > files. The standard utilities that have such restrictions always specify "text files" in their
// > STDIN or INPUT FILES sections.
//
// https://pubs.opengroup.org/onlinepubs/9699919799/
return items.length && items.at(-1) === "" ? items.slice(0, -1) : items;
}
/**
* Find proper fileformat from text
*/
export function findFileFormat(
text: string,
fileformats: FileFormat[],
): FileFormat | undefined {
if (
fileformats.includes("dos") && /\r\n/.test(text) && !/[^\r]\n/.test(text)
) {
return "dos";
}
if (fileformats.includes("unix") && /\n/.test(text)) {
return "unix";
}
if (fileformats.includes("mac") && /\r/.test(text)) {
return "mac";
}
return fileformats.at(0);
}