Skip to content

Commit 49e1743

Browse files
authored
Asserter#generateTypeChecks: Ignore RestElement's for ObjectPattern (#178)
* Asserter#generateTypeChecks: Ignore RestElement's for ObjectPattern * src-transpiler/Asserter.js: Handle RestElement in ArrayPattern * Add unit tests
1 parent cda8b69 commit 49e1743

File tree

6 files changed

+239
-4
lines changed

6 files changed

+239
-4
lines changed

src-transpiler/Asserter.js

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,12 @@ class Asserter extends Stringifier {
439439
const tmp = JSON.stringify(templates, null, 2).replaceAll('\n', '\n' + spaces);
440440
out += `\n${spaces}const rtiTemplates = ${tmp};`;
441441
}
442+
function newlineBeforeFirst() {
443+
if (first) {
444+
out += '\n';
445+
first = false;
446+
}
447+
}
442448
//out += `${spaces}/*${spaces} node.type=${node.type}\n${spaces}
443449
// ${JSON.stringify(jsdoc)}\n${parent}\n${spaces}*/\n`;
444450
for (let name in params) {
@@ -476,11 +482,19 @@ class Asserter extends Stringifier {
476482
if (param.left.type === 'ArrayPattern' && type.type === 'array') {
477483
// Add a type assertion for each element of the ArrayPattern
478484
for (const element of param.left.elements) {
485+
if (element.type === 'RestElement') {
486+
// Case for:
487+
// function test([a, b, ...rest] = [1, 2, 3]) {return rest;}
488+
/** @todo We could typecheck the rest element too */
489+
// console.log("Ignore RestElement", element);
490+
continue;
491+
}
479492
if (element.type !== 'Identifier') {
480493
this.warn('Only Identifier case handled right now');
481494
continue;
482495
}
483496
const t = JSON.stringify(type.elementType, null, 2).replaceAll('\n', '\n' + spaces);
497+
newlineBeforeFirst();
484498
if (templates) {
485499
out += `${spaces}if (!inspectTypeWithTemplates(${element.name}, ${t}, '${loc}', '${nameFancy}', rtiTemplates)) {\n`;
486500
} else {
@@ -493,6 +507,11 @@ class Asserter extends Stringifier {
493507
if (type.type === 'object') {
494508
// Add a type assertion for each property of the ObjectPattern
495509
for (const property of param.left.properties) {
510+
if (property.type !== 'ObjectProperty') {
511+
// E.g. case of RestElement, {a, b, c, ...kwargs}
512+
// console.log("Ignore property", property);
513+
continue;
514+
}
496515
if (property.key.type !== 'Identifier') {
497516
this.warn('ObjectPattern> Only Identifier case handled right now');
498517
continue;
@@ -512,6 +531,7 @@ class Asserter extends Stringifier {
512531
continue;
513532
}
514533
const t = JSON.stringify(subType, null, 2).replaceAll('\n', '\n' + spaces);
534+
newlineBeforeFirst();
515535
if (templates) {
516536
out += `${spaces}if (!inspectTypeWithTemplates(${keyName}, ${t}, '${loc}', '${nameFancy}', rtiTemplates)) {\n`;
517537
} else {
@@ -555,10 +575,7 @@ class Asserter extends Stringifier {
555575
) {
556576
prevCheck = 'arguments.length !== 0 && ';
557577
}
558-
if (first) {
559-
out += '\n';
560-
first = false;
561-
}
578+
newlineBeforeFirst();
562579
if (templates) {
563580
out += `${spaces}if (${prevCheck}!inspectTypeWithTemplates(${name}, ${t}, '${loc}', '${nameFancy}', rtiTemplates)) {\n`;
564581
} else {

test/typechecking.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@
103103
"input": "./test/typechecking/simple-ArrayPattern-input.mjs",
104104
"output": "./test/typechecking/simple-ArrayPattern-output.mjs"
105105
},
106+
{
107+
"input": "./test/typechecking/simple-ArrayPattern-rest-input.mjs",
108+
"output": "./test/typechecking/simple-ArrayPattern-rest-output.mjs"
109+
},
106110
{
107111
"input": "./test/typechecking/simple-ObjectMethod-computed-input.mjs",
108112
"output": "./test/typechecking/simple-ObjectMethod-computed-output.mjs"
@@ -119,6 +123,10 @@
119123
"input": "./test/typechecking/simple-ObjectPattern-input.mjs",
120124
"output": "./test/typechecking/simple-ObjectPattern-output.mjs"
121125
},
126+
{
127+
"input": "./test/typechecking/simple-ObjectPattern-rest-input.mjs",
128+
"output": "./test/typechecking/simple-ObjectPattern-rest-output.mjs"
129+
},
122130
{
123131
"input": "./test/typechecking/simple-ObjectPattern-typedef-input.mjs",
124132
"output": "./test/typechecking/simple-ObjectPattern-typedef-output.mjs"
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
/**
2+
* @param {number[]} arr - The number array.
3+
* @returns {number[]} The return value.
4+
*/
5+
function test([a, b, ...rest] = [1, 2, 3]) {
6+
return rest;
7+
}
8+
const ret = test([10, 20]);
9+
console.log('ret', ret); // Quiz for you!
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/**
2+
* @param {number[]} arr - The number array.
3+
* @returns {number[]} The return value.
4+
*/
5+
function test([a, b, ...rest] = [1, 2, 3]) {
6+
if (!inspectType(a, "number", 'test', 'arr')) {
7+
youCanAddABreakpointHere();
8+
}
9+
if (!inspectType(b, "number", 'test', 'arr')) {
10+
youCanAddABreakpointHere();
11+
}
12+
return rest;
13+
}
14+
const ret = test([10, 20]);
15+
console.log('ret', ret); // Quiz for you!
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
class Tokenizer {
2+
/**
3+
* Converts a list of message objects with `"role"` and `"content"` keys to a list of token
4+
* ids. This method is intended for use with chat models, and will read the tokenizer's chat_template attribute to
5+
* determine the format and control tokens to use when converting. When chat_template is None, it will fall back
6+
* to the default_chat_template specified at the class level.
7+
*
8+
* See [here](https://huggingface.co/docs/transformers/chat_templating) for more information.
9+
*
10+
* **Example:** Applying a chat template to a conversation.
11+
*
12+
* ```javascript
13+
* import { AutoTokenizer } from "@xenova/transformers";
14+
*
15+
* const tokenizer = await AutoTokenizer.from_pretrained("Xenova/mistral-tokenizer-v1");
16+
*
17+
* const chat = [
18+
* { "role": "user", "content": "Hello, how are you?" },
19+
* { "role": "assistant", "content": "I'm doing great. How can I help you today?" },
20+
* { "role": "user", "content": "I'd like to show off how chat templating works!" },
21+
* ]
22+
*
23+
* const text = tokenizer.apply_chat_template(chat, { tokenize: false });
24+
* // "<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]"
25+
*
26+
* const input_ids = tokenizer.apply_chat_template(chat, { tokenize: true, return_tensor: false });
27+
* // [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793]
28+
* ```
29+
*
30+
* @param {Message[]} conversation A list of message objects with `"role"` and `"content"` keys.
31+
* @param {Object} options An optional object containing the following properties:
32+
* @param {string} [options.chat_template=null] A Jinja template to use for this conversion. If
33+
* this is not passed, the model's default chat template will be used instead.
34+
* @param {boolean} [options.add_generation_prompt=false] Whether to end the prompt with the token(s) that indicate
35+
* the start of an assistant message. This is useful when you want to generate a response from the model.
36+
* Note that this argument will be passed to the chat template, and so it must be supported in the
37+
* template for this argument to have any effect.
38+
* @param {boolean} [options.tokenize=true] Whether to tokenize the output. If false, the output will be a string.
39+
* @param {boolean} [options.padding=false] Whether to pad sequences to the maximum length. Has no effect if tokenize is false.
40+
* @param {boolean} [options.truncation=false] Whether to truncate sequences to the maximum length. Has no effect if tokenize is false.
41+
* @param {number} [options.max_length=null] Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false.
42+
* If not specified, the tokenizer's `max_length` attribute will be used as a default.
43+
* @param {boolean} [options.return_tensor=true] Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false.
44+
* @param {Object} [options.tokenizer_kwargs={}] Additional options to pass to the tokenizer.
45+
* @returns {string | Tensor | number[]| number[][]} The tokenized output.
46+
*/
47+
apply_chat_template(conversation, {
48+
chat_template = null,
49+
add_generation_prompt = false,
50+
tokenize = true,
51+
padding = false,
52+
truncation = false,
53+
max_length = null,
54+
return_tensor = true,
55+
tokenizer_kwargs = {},
56+
...kwargs
57+
} = {}) {
58+
return kwargs;
59+
}
60+
}
61+
const tokenizer = new Tokenizer();
62+
tokenizer.apply_chat_template([1, 2, 3], {
63+
chat_template: "nope",
64+
lol: 123,
65+
});
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
class Tokenizer {
2+
/**
3+
* Converts a list of message objects with `"role"` and `"content"` keys to a list of token
4+
* ids. This method is intended for use with chat models, and will read the tokenizer's chat_template attribute to
5+
* determine the format and control tokens to use when converting. When chat_template is None, it will fall back
6+
* to the default_chat_template specified at the class level.
7+
*
8+
* See [here](https://huggingface.co/docs/transformers/chat_templating) for more information.
9+
*
10+
* **Example:** Applying a chat template to a conversation.
11+
*
12+
* ```javascript
13+
* import { AutoTokenizer } from "@xenova/transformers";
14+
*
15+
* const tokenizer = await AutoTokenizer.from_pretrained("Xenova/mistral-tokenizer-v1");
16+
*
17+
* const chat = [
18+
* { "role": "user", "content": "Hello, how are you?" },
19+
* { "role": "assistant", "content": "I'm doing great. How can I help you today?" },
20+
* { "role": "user", "content": "I'd like to show off how chat templating works!" },
21+
* ]
22+
*
23+
* const text = tokenizer.apply_chat_template(chat, { tokenize: false });
24+
* // "<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]"
25+
*
26+
* const input_ids = tokenizer.apply_chat_template(chat, { tokenize: true, return_tensor: false });
27+
* // [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793]
28+
* ```
29+
*
30+
* @param {Message[]} conversation A list of message objects with `"role"` and `"content"` keys.
31+
* @param {Object} options An optional object containing the following properties:
32+
* @param {string} [options.chat_template=null] A Jinja template to use for this conversion. If
33+
* this is not passed, the model's default chat template will be used instead.
34+
* @param {boolean} [options.add_generation_prompt=false] Whether to end the prompt with the token(s) that indicate
35+
* the start of an assistant message. This is useful when you want to generate a response from the model.
36+
* Note that this argument will be passed to the chat template, and so it must be supported in the
37+
* template for this argument to have any effect.
38+
* @param {boolean} [options.tokenize=true] Whether to tokenize the output. If false, the output will be a string.
39+
* @param {boolean} [options.padding=false] Whether to pad sequences to the maximum length. Has no effect if tokenize is false.
40+
* @param {boolean} [options.truncation=false] Whether to truncate sequences to the maximum length. Has no effect if tokenize is false.
41+
* @param {number} [options.max_length=null] Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false.
42+
* If not specified, the tokenizer's `max_length` attribute will be used as a default.
43+
* @param {boolean} [options.return_tensor=true] Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false.
44+
* @param {Object} [options.tokenizer_kwargs={}] Additional options to pass to the tokenizer.
45+
* @returns {string | Tensor | number[]| number[][]} The tokenized output.
46+
*/
47+
apply_chat_template(conversation, {
48+
chat_template = null,
49+
add_generation_prompt = false,
50+
tokenize = true,
51+
padding = false,
52+
truncation = false,
53+
max_length = null,
54+
return_tensor = true,
55+
tokenizer_kwargs = {},
56+
...kwargs
57+
} = {}) {
58+
if (!inspectType(conversation, {
59+
"type": "array",
60+
"elementType": "Message",
61+
"optional": false
62+
}, 'Tokenizer#apply_chat_template', 'conversation')) {
63+
youCanAddABreakpointHere();
64+
}
65+
if (!inspectType(chat_template, {
66+
"type": "string",
67+
"optional": true
68+
}, 'Tokenizer#apply_chat_template', 'options')) {
69+
youCanAddABreakpointHere();
70+
}
71+
if (!inspectType(add_generation_prompt, {
72+
"type": "boolean",
73+
"optional": true
74+
}, 'Tokenizer#apply_chat_template', 'options')) {
75+
youCanAddABreakpointHere();
76+
}
77+
if (!inspectType(tokenize, {
78+
"type": "boolean",
79+
"optional": true
80+
}, 'Tokenizer#apply_chat_template', 'options')) {
81+
youCanAddABreakpointHere();
82+
}
83+
if (!inspectType(padding, {
84+
"type": "boolean",
85+
"optional": true
86+
}, 'Tokenizer#apply_chat_template', 'options')) {
87+
youCanAddABreakpointHere();
88+
}
89+
if (!inspectType(truncation, {
90+
"type": "boolean",
91+
"optional": true
92+
}, 'Tokenizer#apply_chat_template', 'options')) {
93+
youCanAddABreakpointHere();
94+
}
95+
if (!inspectType(max_length, {
96+
"type": "number",
97+
"optional": true
98+
}, 'Tokenizer#apply_chat_template', 'options')) {
99+
youCanAddABreakpointHere();
100+
}
101+
if (!inspectType(return_tensor, {
102+
"type": "boolean",
103+
"optional": true
104+
}, 'Tokenizer#apply_chat_template', 'options')) {
105+
youCanAddABreakpointHere();
106+
}
107+
if (!inspectType(tokenizer_kwargs, {
108+
"type": "object",
109+
"optional": true
110+
}, 'Tokenizer#apply_chat_template', 'options')) {
111+
youCanAddABreakpointHere();
112+
}
113+
return kwargs;
114+
}
115+
}
116+
registerClass(Tokenizer);
117+
const tokenizer = new Tokenizer();
118+
tokenizer.apply_chat_template([1, 2, 3], {
119+
chat_template: "nope",
120+
lol: 123,
121+
});

0 commit comments

Comments
 (0)