Skip to content

Commit

Permalink
Implemented Swift lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
vallentin committed Jul 4, 2023
1 parent 444586d commit 5640751
Show file tree
Hide file tree
Showing 3 changed files with 193 additions and 32 deletions.
2 changes: 2 additions & 0 deletions any-lexer/src/lexers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod jsonc;
mod python;
mod rust;
mod scss;
mod swift;

pub use self::c::*;
pub use self::cpp::*;
Expand All @@ -15,3 +16,4 @@ pub use self::jsonc::*;
pub use self::python::*;
pub use self::rust::*;
pub use self::scss::*;
pub use self::swift::*;
122 changes: 122 additions & 0 deletions any-lexer/src/lexers/swift.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
use text_scanner::ext::{SwiftScannerExt, SwiftStrExt};
use text_scanner::Scanner;

use crate::{impl_lexer_from_scanner, ScanToken, ScannerExt, TokenSpan};

#[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum SwiftToken {
Space,
LineComment,
BlockComment,
Ident,
AttribName,
Keyword,
Punct,
Delim,
Nil,
Boolean,
Int,
Float,
String,
Regex,
/// Given valid Swift code, then this variant should never be encountered.
/// If is is encountered, then check if an issue has already been submitted,
/// otherwise please [submit an issue].
///
/// [submit an issue]: https://github.com/vallentin/colorblast/issues
Unknown,
}

impl ScanToken for SwiftToken {
fn scan_token<'text>(scanner: &mut Scanner<'text>) -> Option<(Self, TokenSpan<'text>)> {
let (r, _s) = scanner.skip_whitespace();
if !r.is_empty() {
return Some((Self::Space, scanner.span(r)));
}

if let Ok((r, _s)) = scanner.scan_swift_line_comment() {
return Some((Self::LineComment, scanner.span(r)));
} else if let Ok((r, _s)) = scanner.scan_swift_block_comment() {
return Some((Self::BlockComment, scanner.span(r)));
}

if let Ok((r, ident)) = scanner.scan_swift_identifier() {
let tok = match ident {
_ if ident.is_swift_nil_literal() => Self::Nil,
_ if ident.is_swift_boolean_literal() => Self::Boolean,
_ if ident.is_swift_keyword() => Self::Keyword,
_ => Self::Ident,
};
return Some((tok, scanner.span(r)));
} else if let Ok((r, _c)) = scanner.scan_swift_attribute_name() {
return Some((Self::AttribName, scanner.span(r)));
}

if let Ok((r, f)) = scanner.scan_swift_float_literal() {
let tok = if f.contains('.') {
Self::Float
} else {
Self::Int
};
return Some((tok, scanner.span(r)));
} else if let Ok((r, _s)) = scanner.scan_swift_int_literal() {
return Some((Self::Int, scanner.span(r)));
}

if let Ok((r, _s)) = scanner.scan_swift_string_literal() {
return Some((Self::String, scanner.span(r)));
} else if let Ok((r, _c)) = scanner.scan_swift_regex_literal() {
return Some((Self::Regex, scanner.span(r)));
}

if let Ok((r, _c)) = scanner.scan_swift_operator() {
return Some((Self::Punct, scanner.span(r)));
} else if let Ok((r, _c)) = scanner.scan_swift_delimiter() {
return Some((Self::Delim, scanner.span(r)));
}

let (r, _c) = scanner.next().ok()?;
Some((Self::Unknown, scanner.span(r)))
}
}

/// Swift lexer producing [`SwiftToken`]s.
///
/// **Note:** Cloning `SwiftLexer` is essentially a copy, as it just contains
/// a `&str` and a `usize` for its `cursor`. However, `Copy` is not
/// implemented, to avoid accidentally copying immutable `SwiftLexer`s.
#[derive(Clone, Debug)]
pub struct SwiftLexer<'text> {
scanner: Scanner<'text>,
}

impl<'text> SwiftLexer<'text> {
#[inline]
pub fn new(text: &'text str) -> Self {
Self {
scanner: Scanner::new(text),
}
}
}

impl_lexer_from_scanner!('text, SwiftLexer<'text>, SwiftToken, scanner);

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_swift_lexer_spans() {
// This intentionally uses Rust code as input, as it is
// only testing that SwiftLexer returns all characters
let input = include_str!("../../../text-scanner/src/ext/rust.rs");
let mut output = String::new();

let lexer = SwiftLexer::new(input);
for (_tok, span) in lexer {
output.push_str(span.as_str());
}

assert_eq!(input, output);
}
}
101 changes: 69 additions & 32 deletions text-scanner/src/ext/swift.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ pub const SWIFT_OPERATORS: &[&str] = &[
".",
".&", ".|", ".^",
".&=", ".|=", ".^=",
",", ";",
"#", "\\",
// "is", "as", "as?", "as!",
];

Expand Down Expand Up @@ -243,8 +245,8 @@ impl<'text> SwiftScannerExt<'text> for Scanner<'text> {
'?' => {
_ = scanner.accept_char('?');
}
// :
':' => {}
// : ; ,
':' | ';' | ',' | '#' | '\\' => {}
_ => return Err(scanner.ranged_text(first)),
}
Ok(())
Expand Down Expand Up @@ -292,6 +294,7 @@ impl<'text> SwiftScannerExt<'text> for Scanner<'text> {
#[inline]
fn scan_swift_int_dec_literal(&mut self) -> ScannerResult<'text, &'text str> {
self.scan_with(|scanner| {
_ = scanner.accept_char('-');
scanner.accept_if_ext(char::is_ascii_digit)?;
scanner.skip_while(|c| c.is_ascii_digit() || (c == '_'));
Ok(())
Expand All @@ -304,6 +307,7 @@ impl<'text> SwiftScannerExt<'text> for Scanner<'text> {
#[inline]
fn scan_swift_int_hex_literal(&mut self) -> ScannerResult<'text, &'text str> {
self.scan_with(|scanner| {
_ = scanner.accept_char('-');
scanner.accept_char('0')?;
scanner.accept_char('x')?;

Expand All @@ -322,6 +326,7 @@ impl<'text> SwiftScannerExt<'text> for Scanner<'text> {
#[inline]
fn scan_swift_int_oct_literal(&mut self) -> ScannerResult<'text, &'text str> {
self.scan_with(|scanner| {
_ = scanner.accept_char('-');
scanner.accept_char('0')?;
scanner.accept_char('o')?;

Expand All @@ -340,6 +345,7 @@ impl<'text> SwiftScannerExt<'text> for Scanner<'text> {
#[inline]
fn scan_swift_int_bin_literal(&mut self) -> ScannerResult<'text, &'text str> {
self.scan_with(|scanner| {
_ = scanner.accept_char('-');
scanner.accept_char('0')?;
scanner.accept_char('b')?;

Expand Down Expand Up @@ -368,7 +374,8 @@ impl<'text> SwiftScannerExt<'text> for Scanner<'text> {
scanner.scan_swift_int_dec_literal()?;

if scanner.accept_char('.').is_ok() {
scanner.scan_swift_int_dec_literal()?;
scanner.accept_if_ext(char::is_ascii_digit)?;
scanner.skip_while(|c| c.is_ascii_digit() || (c == '_'));
}

if scanner.accept_char_any(&['e', 'E']).is_ok() {
Expand All @@ -387,6 +394,7 @@ impl<'text> SwiftScannerExt<'text> for Scanner<'text> {
// - https://docs.swift.org/swift-book/documentation/the-swift-programming-language/thebasics#Floating-Point-Numbers
fn scan_swift_float_hex_literal(&mut self) -> ScannerResult<'text, &'text str> {
self.scan_with(|scanner| {
_ = scanner.accept_char('-');
scanner.scan_swift_int_hex_literal()?;

if scanner.accept_char('.').is_ok() {
Expand All @@ -397,7 +405,8 @@ impl<'text> SwiftScannerExt<'text> for Scanner<'text> {
scanner.accept_char_any(&['p', 'P'])?;

_ = scanner.accept_char_any(&['+', '-']);
scanner.scan_swift_int_dec_literal()?;
scanner.accept_if_ext(char::is_ascii_digit)?;
scanner.skip_while(|c| c.is_ascii_digit() || (c == '_'));

Ok(())
})
Expand Down Expand Up @@ -431,38 +440,44 @@ impl<'text> SwiftScannerExt<'text> for Scanner<'text> {

break;
}
Ok((_r, '\\')) => {
if let Ok((_, '(')) = scanner.next() {
let mut nested = 0;
loop {
scanner.skip_until_char_any(&['(', ')', '"', '\\']);
match scanner.next() {
Ok((_r, '(')) => {
nested += 1;
}
Ok((_r, ')')) => {
if nested == 0 {
continue 'scan;
Ok((r, '\\')) => {
let inner_hashes = scanner.skip_while_char('#').0.len();
if hashes == inner_hashes {
if let Ok((_, '(')) = scanner.next() {
let mut nested = 0;
loop {
scanner.skip_until_char_any(&['(', ')', '"', '\\']);
match scanner.next() {
Ok((_r, '(')) => {
nested += 1;
}
nested -= 1;
}
Ok((r, '"')) => {
scanner.cursor = r.start;
scanner.scan_swift_string_literal()?;
}
Ok((_r, '\\')) => {
// Skip the next character as it is escaped
// Note: Technically any character is not valid
_ = scanner.next();
Ok((_r, ')')) => {
if nested == 0 {
continue 'scan;
}
nested -= 1;
}
Ok((r, '"')) => {
scanner.cursor = r.start;
scanner.scan_swift_string_literal()?;
}
Ok((_r, '\\')) => {
// Skip the next character as it is escaped
// Note: Technically any character is not valid
_ = scanner.next();
}
Ok(_) => unreachable!(),
Err(_) => break,
}
Ok(_) => unreachable!(),
Err(_) => break,
}
} else {
scanner.cursor = r.end;

// Skip the next character as it is escaped
// Note: Technically any character is not valid
_ = scanner.next();
}
}
// else
// Skip the next character as it is escaped
// Note: Technically any character is not valid
}
Ok(_) => unreachable!(),
Err(_) => break,
Expand Down Expand Up @@ -657,13 +672,17 @@ mod tests {
fn test_swift_int_dec_literals() {
let cases = [
"0",
"-0",
"2",
"00",
"0000",
"-0000",
"0_00_0",
"0_00_0__",
"-0_00_0__",
"2147_483_648",
"2147483648",
"-2147483648",
];

assert_valid_cases!(scan_swift_int_dec_literal, cases);
Expand All @@ -675,7 +694,7 @@ mod tests {

#[test]
fn test_swift_int_dec_literals_invalid() {
let cases = ["_0", "_10", "+1", "-123"];
let cases = ["_0", "_10"];

assert_invalid_cases!(scan_swift_int_dec_literal, cases);
assert_invalid_cases!(scan_swift_int_literal, cases);
Expand All @@ -684,6 +703,11 @@ mod tests {
#[test]
fn test_swift_int_hex_literals() {
let cases = [
"0x0",
"-0x0",
"0xf",
"0xF",
"-0xF",
"0xDada_Cafe",
"0x00_FF__00_FF",
"0x100000000",
Expand All @@ -706,9 +730,12 @@ mod tests {
#[test]
fn test_swift_int_oct_literals() {
let cases = [
"0o0",
"-0o0",
"0o372",
"0o777",
"0o177_7777_7777",
"-0o177_7777_7777",
"0o200_0000_0000",
"0o377_7777_7777",
"0o7_7777_7777_7777_7777_7777",
Expand All @@ -728,8 +755,10 @@ mod tests {
let cases = [
"0b0",
"0b1",
"-0b1",
//
"0b0111_1111_1111_1111_1111_1111_1111_1111",
"-0b0111_1111_1111_1111_1111_1111_1111_1111",
"0b1000_0000_0000_0000_0000_0000_0000_0000",
"0b1111_1111_1111_1111_1111_1111_1111_1111",
"0b0111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111",
Expand All @@ -749,10 +778,12 @@ mod tests {
let cases = [
"0",
"1",
"-1",
"123",
"1.5",
"2.0",
"3.14",
"-3.14",
"1e1",
"1e137",
"1e+15",
Expand All @@ -761,6 +792,7 @@ mod tests {
//
"000123.456",
"1_000_000",
"-1_000_000",
"1_000_000.000_000_1",
];

Expand All @@ -776,6 +808,7 @@ mod tests {
let cases = [
"0x0.0p0",
"0xC.3p0",
"-0xC.3p0",
"0xF.Fp9",
"0xFFFF.FFFFp9999",
"0xFFFF.FFFFp+9999",
Expand Down Expand Up @@ -817,6 +850,10 @@ mod tests {
r#""1 2 \((""))""#,
r#""1 2 \(("\""))""#,
r#""1 2 \(("\("3\"4")"))""#,
//
r#""\(multiplier) times 2.5 is \(Double(multiplier) * 2.5)""#,
r###"#"Write an interpolated string in Swift using \(multiplier."#"###,
r###"#"6 times 7 is \#(6 * 7)."#"###,
];

assert_valid_cases!(scan_swift_string_literal, cases);
Expand Down

0 comments on commit 5640751

Please sign in to comment.