-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(core): add
string.parse_int()
and string.parse_uint()
Remove `std/conv`, use `string` methods instead.
- Loading branch information
Showing
4 changed files
with
371 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
// Copyright (C) 2023 The Rivet Developers. All rights reserved. | ||
// Use of this source code is governed by an MIT license that can | ||
// be found in the LICENSE file. | ||
|
||
#[inline] | ||
func lower(c: uint8) -> uint8 { | ||
return c | (b'x' - b'X'); | ||
} | ||
|
||
extend string { | ||
/// This is like `parse_int` but for unsigned numbers. | ||
/// A sign prefix is not permitted. | ||
pub func parse_uint(self, mut base: int32, mut bit_size: uint) -> !uint { | ||
if self == "" { | ||
throw InvalidSyntaxError("empty string"); | ||
} | ||
s := self; | ||
base0 := base == 0; | ||
|
||
mut start_index: uint := 0; | ||
match { | ||
(base >= 2 and base <= 36) -> { /* valid base; nothing to do */ }, | ||
(base == 0) -> { | ||
// look for octal, hex prefix. | ||
base = 10; | ||
if s[0] == b'0' { | ||
match { | ||
(s.len >= 3 and lower(s[1]) == b'b') -> { | ||
base = 2; | ||
start_index = 2; | ||
}, | ||
(s.len >= 3 and lower(s[1]) == b'o') -> { | ||
base = 8; | ||
start_index = 2; | ||
}, | ||
(s.len >= 3 and lower(s[1]) == b'x') -> { | ||
base = 16; | ||
start_index = 2; | ||
}, | ||
// manage leading zeros in decimal base's numbers | ||
s.len >= 2 and (s[1] >= b'0' and s[1] <= b'9') -> { | ||
base = 10; | ||
start_index = 1; | ||
}, | ||
else -> { | ||
base = 8; | ||
start_index = 1; | ||
} | ||
} | ||
} | ||
}, | ||
else -> throw InvalidBaseError("invalid base {}".fmt(base)) | ||
} | ||
|
||
if bit_size == 0 { | ||
bit_size = int.bits(); | ||
} else if bit_size < 0 or bit_size > 64 { | ||
throw InvalidBitSizeError("invalid bit size {}".fmt(bit_size)); | ||
} | ||
|
||
// Cutoff is the smallest number such that `cutoff * base > MAX_U64`. | ||
// Use compile-time constants for common cases. | ||
cutoff: uint := (uint.MAX / @as(uint, base)) + 1; | ||
|
||
max_val: uint := if bit_size == 64 { | ||
uint.MAX | ||
} else { | ||
(@as(uint, 1) << bit_size) - 1 | ||
}; | ||
mut underscores := false; | ||
mut n: uint := 0; | ||
|
||
mut i: uint := start_index; | ||
while i < s.len : i += 1 { | ||
c := s[i]; | ||
cl := lower(c); | ||
|
||
mut d: uint8 := 0; | ||
match { | ||
(c == b'_' and base0) -> { | ||
underscores = true; | ||
continue; | ||
}, | ||
(b'0' <= c and c <= b'9') -> d = c - b'0', | ||
(b'a' <= cl and cl <= b'z') -> d = cl - b'a' + 1, | ||
else -> throw InvalidSyntaxError("invalid syntax") | ||
} | ||
|
||
if d >= @as(uint8, base) { | ||
throw InvalidSyntaxError("invalid syntax"); | ||
} | ||
|
||
if n >= cutoff { | ||
// `n * base` overflows | ||
throw ValueOutOfRangeError("value out of range"); | ||
} | ||
n *= @as(uint, base); | ||
|
||
n1 := n + @as(uint, d); | ||
if n1 < n or n1 > max_val { | ||
// `n + d` overflows | ||
throw ValueOutOfRangeError("value out of range"); | ||
} | ||
n = n1; | ||
} | ||
|
||
if underscores and !underscore_ok(s) { | ||
throw InvalidSyntaxError("invalid syntax"); | ||
} | ||
|
||
return n; | ||
} | ||
|
||
/// Interprets a string `s` in the given `base` (0, 2 to 36) and bit size | ||
/// (0 to 64) and returns the corresponding value i. | ||
/// | ||
/// The string may begin with a leading sign: "+" or "-". | ||
/// | ||
/// If the base argument is 0, the true base is implied by the string's | ||
/// prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o", | ||
/// 16 for "0x", and 10 otherwise. Also, for argument base 0 only, | ||
/// underscore characters are permitted as defined by the Rivet syntax for | ||
/// integer literals. | ||
/// | ||
/// The `bit_size` argument specifies the integer type that the result must | ||
/// fit into. Bit sizes 0, 8, 16, 32, and 64 correspond to int8, int16, int32, and | ||
/// int. If `bit_size` is below 0 or above 64, an error is returned. | ||
pub func parse_int(self, base: int32, mut bit_size: uint) -> !int { | ||
if self == "" { | ||
throw InvalidSyntaxError("invalid syntax"); | ||
} | ||
|
||
// pick off leading sign. | ||
mut s0 := self; | ||
mut neg := false; | ||
if self[0] == b'+' { | ||
s0 = self[1..]; | ||
} else if self[0] == b'-' { | ||
neg = true; | ||
s0 = self[1..]; | ||
} | ||
|
||
// convert unsigned and check range. | ||
un := s0.parse_uint(base, bit_size)!; | ||
if bit_size == 0 { | ||
bit_size = int.bits(); | ||
} | ||
|
||
cutoff := @as(uint, 1) << bit_size - 1; | ||
if !neg and un >= cutoff { | ||
throw ValueOutOfRangeError("value out of range"); | ||
} | ||
if neg and un > cutoff { | ||
throw ValueOutOfRangeError("value out of range"); | ||
} | ||
|
||
return if neg { -@as(int, un) } else { @as(int, un) }; | ||
} | ||
} | ||
|
||
/// Reports whether the underscores in `s_` are allowed. | ||
/// Checking them in this one function lets all the parsers skip over them simply. | ||
/// Underscore must appear only between digits or between a base prefix and a digit. | ||
func underscore_ok(s_: string) -> bool { | ||
// saw tracks the last character (class) we saw: | ||
// ^ for beginning of number, | ||
// 0 for a digit or base prefix, | ||
// _ for an underscore, | ||
// ! for none of the above. | ||
mut saw := b'^'; | ||
mut i: uint := 0; | ||
mut s := s_; | ||
|
||
// optional sign. | ||
if s.len >= 1 and (s[0] == b'-' or s[0] == b'+') { | ||
s = s[1..]; | ||
} | ||
|
||
// optional base prefix. | ||
mut hex := false; | ||
if s.len >= 2 and s[i] == b'0' and ( | ||
lower(s[1]) == b'b' or lower(s[1]) == b'o' or lower(s[1]) == b'x' | ||
) { | ||
// base prefix counts as a digit for "underscore as digit separator" | ||
i = 2; | ||
saw = b'0'; | ||
hex = lower(s[1]) == b'x'; | ||
} | ||
|
||
// number proper. | ||
while i < s.len : i += 1 { | ||
// digits are always okay. | ||
if (b'0' <= s[i] and s[i] <= b'9') or | ||
(hex and b'a' <= lower(s[i]) and lower(s[i]) <= b'f') { | ||
saw = b'0'; | ||
continue; | ||
} | ||
// underscore must follow digit. | ||
if s[i] == b'_' { | ||
if saw != b'0' { | ||
return false; | ||
} | ||
saw = b'_'; | ||
continue; | ||
} | ||
// underscore must also be followed by digit. | ||
if saw == b'_' { | ||
return false; | ||
} | ||
// saw non-digit, non-underscore. | ||
saw = b'!'; | ||
} | ||
|
||
return saw != b'_'; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
// Copyright (C) 2023 The Rivet Developers. All rights reserved. | ||
// Use of this source code is governed by an MIT license that can | ||
// be found in the LICENSE file. | ||
|
||
extend string { | ||
#[inline] | ||
pub func to_bool(self) -> bool { | ||
return self == "true"; | ||
} | ||
|
||
/// Equivalent to `parse_int(self, 0, 8)`, converted to type `int8`. | ||
#[inline] | ||
pub func to_int8(self) -> !int8 { | ||
return @as(int8, self.parse_int(0, 8)!); | ||
} | ||
|
||
/// Equivalent to `parse_int(self, 0, 16)`, converted to type `int16`. | ||
#[inline] | ||
pub func to_int16(self) -> !int16 { | ||
return @as(int16, self.parse_int(0, 16)!); | ||
} | ||
|
||
/// Equivalent to `parse_int(self, 0, 32)`, converted to type `int32`. | ||
#[inline] | ||
pub func to_int32(self) -> !int32 { | ||
return @as(int32, self.parse_int(0, 32)!); | ||
} | ||
|
||
/// Equivalent to `parse_int(self, 0, 64)`, converted to type `int64`. | ||
#[inline] | ||
pub func to_int64(self) -> !int64 { | ||
return @as(int64, self.parse_int(0, 64)!); | ||
} | ||
|
||
/// Equivalent to `parse_int(self, 0, int.bits())`, converted to type `int`. | ||
#[inline] | ||
pub func to_int(self) -> !int { | ||
return self.parse_int(0, int.bits())!; | ||
} | ||
|
||
/// Equivalent to `self.parse_uint(0, 8)`, converted to type `uint8`. | ||
#[inline] | ||
pub func to_uint8(self) -> !uint8 { | ||
return @as(uint8, self.parse_uint(0, 8)!); | ||
} | ||
|
||
/// Equivalent to `self.parse_uint(0, 16)`, converted to type `uint16`. | ||
#[inline] | ||
pub func to_uint16(self) -> !uint16 { | ||
return @as(uint16, self.parse_uint(0, 16)!); | ||
} | ||
|
||
/// Equivalent to `self.parse_uint(0, 32)`, converted to type `uint32`. | ||
#[inline] | ||
pub func to_uint32(self) -> !uint32 { | ||
return @as(uint32, self.parse_uint(0, 32)!); | ||
} | ||
|
||
/// Equivalent to `self.parse_uint(0, 64)`, converted to type `uint64`. | ||
#[inline] | ||
pub func to_uint64(self) -> !uint64 { | ||
return @as(uint64, self.parse_uint(0, 64)!); | ||
} | ||
|
||
/// Equivalent to `self.parse_uint(0, uint.bits())`, converted to type `uint`. | ||
#[inline] | ||
pub func to_uint(self) -> !uint { | ||
return self.parse_uint(0, uint.bits())!; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters