diff --git a/lib/core/src/errors.ri b/lib/core/src/errors.ri index 2e33ec71d..f38579bc6 100644 --- a/lib/core/src/errors.ri +++ b/lib/core/src/errors.ri @@ -81,3 +81,40 @@ pub struct ReadFailedError < Throwable { return self.msg; } } + +#[boxed] +pub struct InvalidSyntaxError < Throwable { + msg: string; + + pub func to_string(self) -> string { + return self.msg; + } +} + +#[boxed] +pub struct InvalidBaseError < Throwable { + msg: string; + + pub func to_string(self) -> string { + return self.msg; + } +} + +#[boxed] +pub struct InvalidBitSizeError < Throwable { + msg: string; + + pub func to_string(self) -> string { + return self.msg; + } +} + +#[boxed] +pub struct ValueOutOfRangeError < Throwable { + msg: string; + + pub func to_string(self) -> string { + return self.msg; + } +} + diff --git a/lib/core/src/parse_int.ri b/lib/core/src/parse_int.ri new file mode 100644 index 000000000..77f300a63 --- /dev/null +++ b/lib/core/src/parse_int.ri @@ -0,0 +1,215 @@ +// Copyright (C) 2023 The Rivet Developers. All rights reserved. +// Use of this source code is governed by an MIT license that can +// be found in the LICENSE file. + +#[inline] +func lower(c: uint8) -> uint8 { + return c | (b'x' - b'X'); +} + +extend string { + /// This is like `parse_int` but for unsigned numbers. + /// A sign prefix is not permitted. + pub func parse_uint(self, mut base: int32, mut bit_size: uint) -> !uint { + if self == "" { + throw InvalidSyntaxError("empty string"); + } + s := self; + base0 := base == 0; + + mut start_index: uint := 0; + match { + (base >= 2 and base <= 36) -> { /* valid base; nothing to do */ }, + (base == 0) -> { + // look for octal, hex prefix. + base = 10; + if s[0] == b'0' { + match { + (s.len >= 3 and lower(s[1]) == b'b') -> { + base = 2; + start_index = 2; + }, + (s.len >= 3 and lower(s[1]) == b'o') -> { + base = 8; + start_index = 2; + }, + (s.len >= 3 and lower(s[1]) == b'x') -> { + base = 16; + start_index = 2; + }, + // manage leading zeros in decimal base's numbers + s.len >= 2 and (s[1] >= b'0' and s[1] <= b'9') -> { + base = 10; + start_index = 1; + }, + else -> { + base = 8; + start_index = 1; + } + } + } + }, + else -> throw InvalidBaseError("invalid base {}".fmt(base)) + } + + if bit_size == 0 { + bit_size = int.bits(); + } else if bit_size < 0 or bit_size > 64 { + throw InvalidBitSizeError("invalid bit size {}".fmt(bit_size)); + } + + // Cutoff is the smallest number such that `cutoff * base > MAX_U64`. + // Use compile-time constants for common cases. + cutoff: uint := (uint.MAX / @as(uint, base)) + 1; + + max_val: uint := if bit_size == 64 { + uint.MAX + } else { + (@as(uint, 1) << bit_size) - 1 + }; + mut underscores := false; + mut n: uint := 0; + + mut i: uint := start_index; + while i < s.len : i += 1 { + c := s[i]; + cl := lower(c); + + mut d: uint8 := 0; + match { + (c == b'_' and base0) -> { + underscores = true; + continue; + }, + (b'0' <= c and c <= b'9') -> d = c - b'0', + (b'a' <= cl and cl <= b'z') -> d = cl - b'a' + 1, + else -> throw InvalidSyntaxError("invalid syntax") + } + + if d >= @as(uint8, base) { + throw InvalidSyntaxError("invalid syntax"); + } + + if n >= cutoff { + // `n * base` overflows + throw ValueOutOfRangeError("value out of range"); + } + n *= @as(uint, base); + + n1 := n + @as(uint, d); + if n1 < n or n1 > max_val { + // `n + d` overflows + throw ValueOutOfRangeError("value out of range"); + } + n = n1; + } + + if underscores and !underscore_ok(s) { + throw InvalidSyntaxError("invalid syntax"); + } + + return n; + } + + /// Interprets a string `s` in the given `base` (0, 2 to 36) and bit size + /// (0 to 64) and returns the corresponding value i. + /// + /// The string may begin with a leading sign: "+" or "-". + /// + /// If the base argument is 0, the true base is implied by the string's + /// prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o", + /// 16 for "0x", and 10 otherwise. Also, for argument base 0 only, + /// underscore characters are permitted as defined by the Rivet syntax for + /// integer literals. + /// + /// The `bit_size` argument specifies the integer type that the result must + /// fit into. Bit sizes 0, 8, 16, 32, and 64 correspond to int8, int16, int32, and + /// int. If `bit_size` is below 0 or above 64, an error is returned. + pub func parse_int(self, base: int32, mut bit_size: uint) -> !int { + if self == "" { + throw InvalidSyntaxError("invalid syntax"); + } + + // pick off leading sign. + mut s0 := self; + mut neg := false; + if self[0] == b'+' { + s0 = self[1..]; + } else if self[0] == b'-' { + neg = true; + s0 = self[1..]; + } + + // convert unsigned and check range. + un := s0.parse_uint(base, bit_size)!; + if bit_size == 0 { + bit_size = int.bits(); + } + + cutoff := @as(uint, 1) << bit_size - 1; + if !neg and un >= cutoff { + throw ValueOutOfRangeError("value out of range"); + } + if neg and un > cutoff { + throw ValueOutOfRangeError("value out of range"); + } + + return if neg { -@as(int, un) } else { @as(int, un) }; + } +} + +/// Reports whether the underscores in `s_` are allowed. +/// Checking them in this one function lets all the parsers skip over them simply. +/// Underscore must appear only between digits or between a base prefix and a digit. +func underscore_ok(s_: string) -> bool { + // saw tracks the last character (class) we saw: + // ^ for beginning of number, + // 0 for a digit or base prefix, + // _ for an underscore, + // ! for none of the above. + mut saw := b'^'; + mut i: uint := 0; + mut s := s_; + + // optional sign. + if s.len >= 1 and (s[0] == b'-' or s[0] == b'+') { + s = s[1..]; + } + + // optional base prefix. + mut hex := false; + if s.len >= 2 and s[i] == b'0' and ( + lower(s[1]) == b'b' or lower(s[1]) == b'o' or lower(s[1]) == b'x' + ) { + // base prefix counts as a digit for "underscore as digit separator" + i = 2; + saw = b'0'; + hex = lower(s[1]) == b'x'; + } + + // number proper. + while i < s.len : i += 1 { + // digits are always okay. + if (b'0' <= s[i] and s[i] <= b'9') or + (hex and b'a' <= lower(s[i]) and lower(s[i]) <= b'f') { + saw = b'0'; + continue; + } + // underscore must follow digit. + if s[i] == b'_' { + if saw != b'0' { + return false; + } + saw = b'_'; + continue; + } + // underscore must also be followed by digit. + if saw == b'_' { + return false; + } + // saw non-digit, non-underscore. + saw = b'!'; + } + + return saw != b'_'; +} diff --git a/lib/core/src/string_to.c.ri b/lib/core/src/string_to.c.ri new file mode 100644 index 000000000..628ab1579 --- /dev/null +++ b/lib/core/src/string_to.c.ri @@ -0,0 +1,70 @@ +// Copyright (C) 2023 The Rivet Developers. All rights reserved. +// Use of this source code is governed by an MIT license that can +// be found in the LICENSE file. + +extend string { + #[inline] + pub func to_bool(self) -> bool { + return self == "true"; + } + + /// Equivalent to `parse_int(self, 0, 8)`, converted to type `int8`. + #[inline] + pub func to_int8(self) -> !int8 { + return @as(int8, self.parse_int(0, 8)!); + } + + /// Equivalent to `parse_int(self, 0, 16)`, converted to type `int16`. + #[inline] + pub func to_int16(self) -> !int16 { + return @as(int16, self.parse_int(0, 16)!); + } + + /// Equivalent to `parse_int(self, 0, 32)`, converted to type `int32`. + #[inline] + pub func to_int32(self) -> !int32 { + return @as(int32, self.parse_int(0, 32)!); + } + + /// Equivalent to `parse_int(self, 0, 64)`, converted to type `int64`. + #[inline] + pub func to_int64(self) -> !int64 { + return @as(int64, self.parse_int(0, 64)!); + } + + /// Equivalent to `parse_int(self, 0, int.bits())`, converted to type `int`. + #[inline] + pub func to_int(self) -> !int { + return self.parse_int(0, int.bits())!; + } + + /// Equivalent to `self.parse_uint(0, 8)`, converted to type `uint8`. + #[inline] + pub func to_uint8(self) -> !uint8 { + return @as(uint8, self.parse_uint(0, 8)!); + } + + /// Equivalent to `self.parse_uint(0, 16)`, converted to type `uint16`. + #[inline] + pub func to_uint16(self) -> !uint16 { + return @as(uint16, self.parse_uint(0, 16)!); + } + + /// Equivalent to `self.parse_uint(0, 32)`, converted to type `uint32`. + #[inline] + pub func to_uint32(self) -> !uint32 { + return @as(uint32, self.parse_uint(0, 32)!); + } + + /// Equivalent to `self.parse_uint(0, 64)`, converted to type `uint64`. + #[inline] + pub func to_uint64(self) -> !uint64 { + return @as(uint64, self.parse_uint(0, 64)!); + } + + /// Equivalent to `self.parse_uint(0, uint.bits())`, converted to type `uint`. + #[inline] + pub func to_uint(self) -> !uint { + return self.parse_uint(0, uint.bits())!; + } +} \ No newline at end of file diff --git a/lib/core/tests/string_test.ri b/lib/core/tests/string_test.ri index 26fee29e8..d94d3906b 100644 --- a/lib/core/tests/string_test.ri +++ b/lib/core/tests/string_test.ri @@ -109,3 +109,52 @@ test "string.split_into_lines()" { @assert(line == line_content or line == ""); } } + +test "string.to_int32()" { + @assert("16".to_int32()! == 16); + @assert("+16".to_int32()! == 16); + @assert("-16".to_int32()! == -16); + + // invalid strings + @assert(if _ := "".to_int32() { + false + } else { + true + }); + @assert(if _ := "str".to_int32() { + false + } else { + true + }); + @assert(if _ := "string_longer_than_10_chars".to_int32() { + false + } else { + true + }); +} + +test "string.parse_int()" { + // different bases + @assert("16".parse_int(16, 0)! == 0x16); + @assert("16".parse_int(8, 0)! == 0o16); + @assert("11".parse_int(2, 0)! == 3); + + // different bit sizes + @assert("127".parse_int(10, 8)! == 127); + @assert("32767".parse_int(10, 16)! == 32767); + @assert("2147483647".parse_int(10, 32)! == 2147483647); + @assert("9223372036854775807".parse_int(10, 64)! == 9223372036854775807); + @assert("baobab".parse_int(36, 64)! == 123314438); + + // invalid bit sizes + @assert(if _ := "123".parse_int(10, -1) { + false + } else { + true + }); + @assert(if _ := "123".parse_int(10, 65) { + false + } else { + true + }); +}