diff --git a/lib/core/src/bool.ri b/lib/core/src/bool.ri index 0ea68af09..a32accdda 100644 --- a/lib/core/src/bool.ri +++ b/lib/core/src/bool.ri @@ -3,6 +3,16 @@ // be found in the LICENSE file. extend bool < Stringable { + #[inline] + pub func to_int(self) -> int { + return if self { 1 } else { 0 }; + } + + #[inline] + pub func to_uint(self) -> uint { + return if self { 1 } else { 0 }; + } + #[inline] pub func to_string(&self) -> string { return if self.* { "true" } else { "false" }; diff --git a/lib/core/src/errors.ri b/lib/core/src/errors.ri index 2e33ec71d..f38579bc6 100644 --- a/lib/core/src/errors.ri +++ b/lib/core/src/errors.ri @@ -81,3 +81,40 @@ pub struct ReadFailedError < Throwable { return self.msg; } } + +#[boxed] +pub struct InvalidSyntaxError < Throwable { + msg: string; + + pub func to_string(self) -> string { + return self.msg; + } +} + +#[boxed] +pub struct InvalidBaseError < Throwable { + msg: string; + + pub func to_string(self) -> string { + return self.msg; + } +} + +#[boxed] +pub struct InvalidBitSizeError < Throwable { + msg: string; + + pub func to_string(self) -> string { + return self.msg; + } +} + +#[boxed] +pub struct ValueOutOfRangeError < Throwable { + msg: string; + + pub func to_string(self) -> string { + return self.msg; + } +} + diff --git a/lib/core/src/parse_int.ri b/lib/core/src/parse_int.ri new file mode 100644 index 000000000..77f300a63 --- /dev/null +++ b/lib/core/src/parse_int.ri @@ -0,0 +1,215 @@ +// Copyright (C) 2023 The Rivet Developers. All rights reserved. +// Use of this source code is governed by an MIT license that can +// be found in the LICENSE file. + +#[inline] +func lower(c: uint8) -> uint8 { + return c | (b'x' - b'X'); +} + +extend string { + /// This is like `parse_int` but for unsigned numbers. + /// A sign prefix is not permitted. + pub func parse_uint(self, mut base: int32, mut bit_size: uint) -> !uint { + if self == "" { + throw InvalidSyntaxError("empty string"); + } + s := self; + base0 := base == 0; + + mut start_index: uint := 0; + match { + (base >= 2 and base <= 36) -> { /* valid base; nothing to do */ }, + (base == 0) -> { + // look for octal, hex prefix. + base = 10; + if s[0] == b'0' { + match { + (s.len >= 3 and lower(s[1]) == b'b') -> { + base = 2; + start_index = 2; + }, + (s.len >= 3 and lower(s[1]) == b'o') -> { + base = 8; + start_index = 2; + }, + (s.len >= 3 and lower(s[1]) == b'x') -> { + base = 16; + start_index = 2; + }, + // manage leading zeros in decimal base's numbers + s.len >= 2 and (s[1] >= b'0' and s[1] <= b'9') -> { + base = 10; + start_index = 1; + }, + else -> { + base = 8; + start_index = 1; + } + } + } + }, + else -> throw InvalidBaseError("invalid base {}".fmt(base)) + } + + if bit_size == 0 { + bit_size = int.bits(); + } else if bit_size < 0 or bit_size > 64 { + throw InvalidBitSizeError("invalid bit size {}".fmt(bit_size)); + } + + // Cutoff is the smallest number such that `cutoff * base > MAX_U64`. + // Use compile-time constants for common cases. + cutoff: uint := (uint.MAX / @as(uint, base)) + 1; + + max_val: uint := if bit_size == 64 { + uint.MAX + } else { + (@as(uint, 1) << bit_size) - 1 + }; + mut underscores := false; + mut n: uint := 0; + + mut i: uint := start_index; + while i < s.len : i += 1 { + c := s[i]; + cl := lower(c); + + mut d: uint8 := 0; + match { + (c == b'_' and base0) -> { + underscores = true; + continue; + }, + (b'0' <= c and c <= b'9') -> d = c - b'0', + (b'a' <= cl and cl <= b'z') -> d = cl - b'a' + 1, + else -> throw InvalidSyntaxError("invalid syntax") + } + + if d >= @as(uint8, base) { + throw InvalidSyntaxError("invalid syntax"); + } + + if n >= cutoff { + // `n * base` overflows + throw ValueOutOfRangeError("value out of range"); + } + n *= @as(uint, base); + + n1 := n + @as(uint, d); + if n1 < n or n1 > max_val { + // `n + d` overflows + throw ValueOutOfRangeError("value out of range"); + } + n = n1; + } + + if underscores and !underscore_ok(s) { + throw InvalidSyntaxError("invalid syntax"); + } + + return n; + } + + /// Interprets a string `s` in the given `base` (0, 2 to 36) and bit size + /// (0 to 64) and returns the corresponding value i. + /// + /// The string may begin with a leading sign: "+" or "-". + /// + /// If the base argument is 0, the true base is implied by the string's + /// prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o", + /// 16 for "0x", and 10 otherwise. Also, for argument base 0 only, + /// underscore characters are permitted as defined by the Rivet syntax for + /// integer literals. + /// + /// The `bit_size` argument specifies the integer type that the result must + /// fit into. Bit sizes 0, 8, 16, 32, and 64 correspond to int8, int16, int32, and + /// int. If `bit_size` is below 0 or above 64, an error is returned. + pub func parse_int(self, base: int32, mut bit_size: uint) -> !int { + if self == "" { + throw InvalidSyntaxError("invalid syntax"); + } + + // pick off leading sign. + mut s0 := self; + mut neg := false; + if self[0] == b'+' { + s0 = self[1..]; + } else if self[0] == b'-' { + neg = true; + s0 = self[1..]; + } + + // convert unsigned and check range. + un := s0.parse_uint(base, bit_size)!; + if bit_size == 0 { + bit_size = int.bits(); + } + + cutoff := @as(uint, 1) << bit_size - 1; + if !neg and un >= cutoff { + throw ValueOutOfRangeError("value out of range"); + } + if neg and un > cutoff { + throw ValueOutOfRangeError("value out of range"); + } + + return if neg { -@as(int, un) } else { @as(int, un) }; + } +} + +/// Reports whether the underscores in `s_` are allowed. +/// Checking them in this one function lets all the parsers skip over them simply. +/// Underscore must appear only between digits or between a base prefix and a digit. +func underscore_ok(s_: string) -> bool { + // saw tracks the last character (class) we saw: + // ^ for beginning of number, + // 0 for a digit or base prefix, + // _ for an underscore, + // ! for none of the above. + mut saw := b'^'; + mut i: uint := 0; + mut s := s_; + + // optional sign. + if s.len >= 1 and (s[0] == b'-' or s[0] == b'+') { + s = s[1..]; + } + + // optional base prefix. + mut hex := false; + if s.len >= 2 and s[i] == b'0' and ( + lower(s[1]) == b'b' or lower(s[1]) == b'o' or lower(s[1]) == b'x' + ) { + // base prefix counts as a digit for "underscore as digit separator" + i = 2; + saw = b'0'; + hex = lower(s[1]) == b'x'; + } + + // number proper. + while i < s.len : i += 1 { + // digits are always okay. + if (b'0' <= s[i] and s[i] <= b'9') or + (hex and b'a' <= lower(s[i]) and lower(s[i]) <= b'f') { + saw = b'0'; + continue; + } + // underscore must follow digit. + if s[i] == b'_' { + if saw != b'0' { + return false; + } + saw = b'_'; + continue; + } + // underscore must also be followed by digit. + if saw == b'_' { + return false; + } + // saw non-digit, non-underscore. + saw = b'!'; + } + + return saw != b'_'; +} diff --git a/lib/core/src/string_to.c.ri b/lib/core/src/string_to.c.ri new file mode 100644 index 000000000..628ab1579 --- /dev/null +++ b/lib/core/src/string_to.c.ri @@ -0,0 +1,70 @@ +// Copyright (C) 2023 The Rivet Developers. All rights reserved. +// Use of this source code is governed by an MIT license that can +// be found in the LICENSE file. + +extend string { + #[inline] + pub func to_bool(self) -> bool { + return self == "true"; + } + + /// Equivalent to `parse_int(self, 0, 8)`, converted to type `int8`. + #[inline] + pub func to_int8(self) -> !int8 { + return @as(int8, self.parse_int(0, 8)!); + } + + /// Equivalent to `parse_int(self, 0, 16)`, converted to type `int16`. + #[inline] + pub func to_int16(self) -> !int16 { + return @as(int16, self.parse_int(0, 16)!); + } + + /// Equivalent to `parse_int(self, 0, 32)`, converted to type `int32`. + #[inline] + pub func to_int32(self) -> !int32 { + return @as(int32, self.parse_int(0, 32)!); + } + + /// Equivalent to `parse_int(self, 0, 64)`, converted to type `int64`. + #[inline] + pub func to_int64(self) -> !int64 { + return @as(int64, self.parse_int(0, 64)!); + } + + /// Equivalent to `parse_int(self, 0, int.bits())`, converted to type `int`. + #[inline] + pub func to_int(self) -> !int { + return self.parse_int(0, int.bits())!; + } + + /// Equivalent to `self.parse_uint(0, 8)`, converted to type `uint8`. + #[inline] + pub func to_uint8(self) -> !uint8 { + return @as(uint8, self.parse_uint(0, 8)!); + } + + /// Equivalent to `self.parse_uint(0, 16)`, converted to type `uint16`. + #[inline] + pub func to_uint16(self) -> !uint16 { + return @as(uint16, self.parse_uint(0, 16)!); + } + + /// Equivalent to `self.parse_uint(0, 32)`, converted to type `uint32`. + #[inline] + pub func to_uint32(self) -> !uint32 { + return @as(uint32, self.parse_uint(0, 32)!); + } + + /// Equivalent to `self.parse_uint(0, 64)`, converted to type `uint64`. + #[inline] + pub func to_uint64(self) -> !uint64 { + return @as(uint64, self.parse_uint(0, 64)!); + } + + /// Equivalent to `self.parse_uint(0, uint.bits())`, converted to type `uint`. + #[inline] + pub func to_uint(self) -> !uint { + return self.parse_uint(0, uint.bits())!; + } +} \ No newline at end of file diff --git a/lib/core/tests/string_test.ri b/lib/core/tests/string_test.ri index 26fee29e8..d94d3906b 100644 --- a/lib/core/tests/string_test.ri +++ b/lib/core/tests/string_test.ri @@ -109,3 +109,52 @@ test "string.split_into_lines()" { @assert(line == line_content or line == ""); } } + +test "string.to_int32()" { + @assert("16".to_int32()! == 16); + @assert("+16".to_int32()! == 16); + @assert("-16".to_int32()! == -16); + + // invalid strings + @assert(if _ := "".to_int32() { + false + } else { + true + }); + @assert(if _ := "str".to_int32() { + false + } else { + true + }); + @assert(if _ := "string_longer_than_10_chars".to_int32() { + false + } else { + true + }); +} + +test "string.parse_int()" { + // different bases + @assert("16".parse_int(16, 0)! == 0x16); + @assert("16".parse_int(8, 0)! == 0o16); + @assert("11".parse_int(2, 0)! == 3); + + // different bit sizes + @assert("127".parse_int(10, 8)! == 127); + @assert("32767".parse_int(10, 16)! == 32767); + @assert("2147483647".parse_int(10, 32)! == 2147483647); + @assert("9223372036854775807".parse_int(10, 64)! == 9223372036854775807); + @assert("baobab".parse_int(36, 64)! == 123314438); + + // invalid bit sizes + @assert(if _ := "123".parse_int(10, -1) { + false + } else { + true + }); + @assert(if _ := "123".parse_int(10, 65) { + false + } else { + true + }); +} diff --git a/lib/rivet/src/codegen/exprs.ri b/lib/rivet/src/codegen/exprs.ri index d5e80cdf4..a42e7491c 100644 --- a/lib/rivet/src/codegen/exprs.ri +++ b/lib/rivet/src/codegen/exprs.ri @@ -13,7 +13,7 @@ extend Codegen { .Paren as paren -> self.gen_expr(paren.expr), //.Ident -> {}, .BoolLiteral as bool_lit -> .Const( - .IntConst(conv.bool_to_int(bool_lit.value)), self.bool_t + .IntConst(bool_lit.value.to_int()), self.bool_t ), .IntegerLiteral as int_lit -> .Const( .IntConst(conv.string_to_int(int_lit.value) catch @unreachable()), diff --git a/lib/rivet/src/utils/mod.ri b/lib/rivet/src/utils/mod.ri index d1071beb7..15b2ec87c 100644 --- a/lib/rivet/src/utils/mod.ri +++ b/lib/rivet/src/utils/mod.ri @@ -102,7 +102,8 @@ pub func green(msg: string) -> string { } /// Rounds the number `n` up to the next mult -/// NOTE: `multiple` must be a power of 2.#[inline] +/// NOTE: `multiple` must be a power of 2. +#[inline] pub func round_up(n: uint, multiple: uint) -> uint { n_ := @as(int, n); multiple_ := @as(int, multiple); diff --git a/lib/std/src/conv/bool_to.ri b/lib/std/src/conv/bool_to.ri deleted file mode 100644 index 0618c6122..000000000 --- a/lib/std/src/conv/bool_to.ri +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright (C) 2023 The Rivet Developers. All rights reserved. -// Use of this source code is governed by an MIT license that can -// be found in the LICENSE file. - -#[inline] -pub func bool_to_uint(b: bool) -> uint { - return if b { 1 } else { 0 }; -} - -#[inline] -pub func bool_to_int(b: bool) -> int { - return if b { 1 } else { 0 }; -} diff --git a/lib/std/src/fs/Path.ri b/lib/std/src/fs/Path.ri index a8225ccc1..effc71dba 100644 --- a/lib/std/src/fs/Path.ri +++ b/lib/std/src/fs/Path.ri @@ -6,7 +6,6 @@ import c/libc; import core; import ../mem; -import ../conv; pub struct Path { pub alias MAX_LEN := libc.MAX_PATH_LEN; @@ -318,7 +317,7 @@ pub struct Path { } prev_sep := Self.is_separator(prev_path[prev_path.len - 1]); this_sep := Self.is_separator(this_path[0]); - sum += conv.bool_to_uint(!prev_sep and !this_sep); + sum += (!prev_sep and !this_sep).to_uint(); sum += if prev_sep and this_sep { this_path.len - 1 } else { this_path.len }; prev_path = this_path; }