From 79ed2daf0d1b8e94415e99d34302fadbccf7a477 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Mon, 4 Jun 2018 16:33:10 -0700 Subject: [PATCH] Syntax: More stringent escape handling. This makes a few changes to strings and byte/char literals: - Allow _ in unicode escapes. - Reject byte/char literals with too many chars. - Reject non-ascii in byte literals. - Reject ascii escape with >0x7f. - Reject bad escape sequences. cc #284 --- RustEnhanced.sublime-syntax | 64 ++++++++++++++++++----- syntax_test_rust.rs | 101 ++++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 14 deletions(-) diff --git a/RustEnhanced.sublime-syntax b/RustEnhanced.sublime-syntax index adc156c8..40d57c5f 100644 --- a/RustEnhanced.sublime-syntax +++ b/RustEnhanced.sublime-syntax @@ -8,7 +8,7 @@ scope: source.rust variables: identifier: '(?:[[:alpha:]][_[:alnum:]]*|_[_[:alnum:]]+)' escaped_byte: '\\([nrt0\"''\\]|x\h{2})' - escaped_char: '\\([nrt0\"''\\]|x\h{2}|u\{\h{1,6}\})' + escaped_char: '\\([nrt0\"''\\]|x[0-7]\h|u\{\h_*(?:\h_*){,5}\})' int_suffixes: '[iu](?:8|16|32|64|128|size)' float_suffixes: 'f(32|64)' contexts: @@ -902,22 +902,35 @@ contexts: - include: char - include: byte - escaped-byte: - - match: '{{escaped_byte}}' - scope: constant.character.escape.rust - byte: - - match: "(b)(')(?=([^'\\\\]|{{escaped_byte}})')" + - match: "(b)(')" captures: 1: storage.type.string.rust 2: punctuation.definition.string.begin.rust push: - meta_include_prototype: false - meta_scope: string.quoted.single.rust - - match: \' - scope: punctuation.definition.string.end.rust + # ASCII except ', \, \n, \r or \t + - match: '[\x00-\x08\x0b-\x0c\x0e-\x26\x28-\x5b\x5d-\x7f]' + set: byte-tail + # Don't mark entire file invalid while writing, even though this is + # not valid syntax. + - match: '\n' pop: true - - include: escaped-byte + - match: '{{escaped_byte}}' + scope: constant.character.escape.rust + set: byte-tail + - match: '' + set: byte-tail + + byte-tail: + - match: "'" + scope: string.quoted.single.rust punctuation.definition.string.end.rust + pop: true + - match: '\n' + pop: true + - match: '.' + scope: invalid.illegal.byte.rust byte-string: - match: '(b)(")' @@ -930,7 +943,10 @@ contexts: - match: '"' scope: punctuation.definition.string.end.rust pop: true - - include: escaped-byte + - match: '{{escaped_byte}}' + scope: constant.character.escape.rust + - match: '\\.' + scope: invalid.illegal.character.escape.rust raw-byte-string: - match: (br)(#*)" @@ -947,16 +963,36 @@ contexts: escaped-char: - match: '{{escaped_char}}' scope: constant.character.escape.rust + - match: '\\u{[^}]*}' + scope: invalid.illegal.character.escape.rust + - match: '\\.' + scope: invalid.illegal.character.escape.rust char: - - match: "'(?=([^'\\\\]|{{escaped_char}})')" + - match: "'" scope: punctuation.definition.string.begin.rust push: - meta_scope: string.quoted.single.rust - - match: \' - scope: punctuation.definition.string.end.rust + - match: "[^'\\\\\n\r\t]" + set: char-tail + # Don't mark entire file invalid while writing, even though this is + # not valid syntax. + - match: '\n' pop: true - - include: escaped-char + - match: '{{escaped_char}}' + scope: constant.character.escape.rust + set: char-tail + - match: '' + set: char-tail + + char-tail: + - match: "'" + scope: string.quoted.single.rust punctuation.definition.string.end.rust + pop: true + - match: '\n' + pop: true + - match: '.' + scope: invalid.illegal.char.rust string: - match: '"' diff --git a/syntax_test_rust.rs b/syntax_test_rust.rs index 19411025..3f821f88 100644 --- a/syntax_test_rust.rs +++ b/syntax_test_rust.rs @@ -66,6 +66,107 @@ let raw_bytes = br#"This won't escape anything either \x01 \""#; // ^^ storage.type // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ string.quoted.double - constant.character.escape +let b_simple = b'a'; +// ^^^^ string.quoted.single +// ^ storage.type.string +// ^ punctuation.definition.string.begin +// ^ punctuation.definition.string.end +// ^ punctuation.terminator +let b_newline = b'\n'; +// ^^^^^ string.quoted.single +// ^^ string.quoted.single constant.character.escape +let b_nul = b'\0'; +// ^^ string.quoted.single constant.character.escape +let b_back = b'\\'; +// ^^ string.quoted.single constant.character.escape +let b_quote = b'\''; +// ^^ string.quoted.single constant.character.escape +let b_esc_nul = b'\x00'; +// ^^^^ string.quoted.single constant.character.escape +let b_esc_255 = b'\xff'; +// ^^^^ string.quoted.single constant.character.escape +let b_esc_inv = b'\a'; +// ^^ invalid.illegal.byte +// ^ string.quoted.single punctuation.definition.string.end +let b_inv_len = b'abc'; +// ^ string.quoted.single +// ^^ invalid.illegal.byte +// ^ string.quoted.single punctuation.definition.string.end +let b_inv_uni = b'♥'; +// ^ invalid.illegal.byte +// ^ string.quoted.single punctuation.definition.string.end +let b_inv_empty = b''; +// ^^^ string.quoted.single +// ^ punctuation.definition.string.begin +// ^ punctuation.definition.string.end +let b_unclosed1 = b' +// Avoid error on entire file. +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ comment.line.double-slash - invalid - string + +let bs_newline = b"abc\n"; +// ^^^^^^^^ string.quoted.double +// ^ punctuation.definition.string.begin +// ^^ constant.character.escape +// ^ punctuation.definition.string.end +// ^ punctuation.terminator +let bs_nul = b"abc\0"; +// ^^ string.quoted.double constant.character.escape +let bs_esc_nul = b"abc\x00"; +// ^^^^ string.quoted.double constant.character.escape +let bs_esc_255 = b"abc\xff"; +// ^^^^ string.quoted.double constant.character.escape +let bs_esc_inv = b"abc\a"; +// ^^ string.quoted.double invalid.illegal.character.escape +// ^ string.quoted.double punctuation.definition.string.end - invalid + +let char_newline = '\n'; +// ^^^^ string.quoted.single +// ^ punctuation.definition.string.begin +// ^^ constant.character.escape +// ^ punctuation.definition.string.end +// ^ punctuation.terminator +let char_nul = '\0'; +// ^^ string.quoted.single constant.character.escape +let char_extra_inv = 'ab'; +// ^ string.quoted.single +// ^ invalid.illegal.char +// ^ string.quoted.single punctuation.definition.string.end +let char_ascii_esc_nul = '\x00'; +// ^^^^ string.quoted.single constant.character.escape +let char_ascii_esc_127 = '\x7f'; +// ^^^^ string.quoted.single constant.character.escape +let char_ascii_inv_255 = '\xff'; +// ^^^^ invalid.illegal.char +let char_uni_esc = '\u{3b1}'; +// ^^^^^^^ string.quoted.single constant.character.escape +let char_uni_esc_empty = '\u{}'; +// ^^^^ invalid.illegal.char +let char_uni_esc_under_start = '\u{_1_}'; +// ^^^^^^^ invalid.illegal.char +let char_uni_esc_under1 = '\u{1_}'; +// ^^^^^^ string.quoted.single constant.character.escape +let char_uni_esc_under2 = '\u{1_2__3___}'; +// ^^^^^^^^^^^^^ string.quoted.single constant.character.escape +let char_uni_esc_under3 = '\u{10__FFFF}'; +// ^^^^^^^^^^^^ string.quoted.single constant.character.escape +let char_uni_esc_extra = '\u{1234567}'; +// ^^^^^^^^^^^ invalid.illegal.char + +let s_ascii_inv_255 = "\xff"; +// ^^ string.quoted.double invalid.illegal.character.escape +let s_uni_esc_empty = "\u{}"; +// ^^^^ string.quoted.double invalid.illegal.character.escape +let s_uni_esc_under_start = "\u{_1_}"; +// ^^^^^^^ string.quoted.double invalid.illegal.character.escape +let s_uni_esc_under1 = "\u{1_}"; +// ^^^^^^ string.quoted.double constant.character.escape +let s_uni_esc_under2 = "\u{1_2__3___}"; +// ^^^^^^^^^^^^^ string.quoted.double constant.character.escape +let s_uni_esc_under3 = "\u{10__FFFF}"; +// ^^^^^^^^^^^^ string.quoted.double constant.character.escape +let s_uni_esc_extra = "\u{1234567}"; +// ^^^^^^^^^^^ string.quoted.double invalid.illegal.character.escape + 0; // <- constant.numeric.integer.decimal 1_000u32;