Skip to content

Commit

Permalink
feat(core): add string.parse_int() and string.parse_uint()
Browse files Browse the repository at this point in the history
Remove `std/conv`, use `string` methods instead.
  • Loading branch information
StunxFS committed Dec 2, 2023
1 parent faa6fa0 commit c4246b1
Show file tree
Hide file tree
Showing 4 changed files with 371 additions and 0 deletions.
37 changes: 37 additions & 0 deletions lib/core/src/errors.ri
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,40 @@ pub struct ReadFailedError < Throwable {
return self.msg;
}
}

#[boxed]
pub struct InvalidSyntaxError < Throwable {
msg: string;

pub func to_string(self) -> string {
return self.msg;
}
}

#[boxed]
pub struct InvalidBaseError < Throwable {
msg: string;

pub func to_string(self) -> string {
return self.msg;
}
}

#[boxed]
pub struct InvalidBitSizeError < Throwable {
msg: string;

pub func to_string(self) -> string {
return self.msg;
}
}

#[boxed]
pub struct ValueOutOfRangeError < Throwable {
msg: string;

pub func to_string(self) -> string {
return self.msg;
}
}

215 changes: 215 additions & 0 deletions lib/core/src/parse_int.ri
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
// Copyright (C) 2023 The Rivet Developers. All rights reserved.
// Use of this source code is governed by an MIT license that can
// be found in the LICENSE file.

#[inline]
func lower(c: uint8) -> uint8 {
return c | (b'x' - b'X');
}

extend string {
/// This is like `parse_int` but for unsigned numbers.
/// A sign prefix is not permitted.
pub func parse_uint(self, mut base: int32, mut bit_size: uint) -> !uint {
if self == "" {
throw InvalidSyntaxError("empty string");
}
s := self;
base0 := base == 0;

mut start_index: uint := 0;
match {
(base >= 2 and base <= 36) -> { /* valid base; nothing to do */ },
(base == 0) -> {
// look for octal, hex prefix.
base = 10;
if s[0] == b'0' {
match {
(s.len >= 3 and lower(s[1]) == b'b') -> {
base = 2;
start_index = 2;
},
(s.len >= 3 and lower(s[1]) == b'o') -> {
base = 8;
start_index = 2;
},
(s.len >= 3 and lower(s[1]) == b'x') -> {
base = 16;
start_index = 2;
},
// manage leading zeros in decimal base's numbers
s.len >= 2 and (s[1] >= b'0' and s[1] <= b'9') -> {
base = 10;
start_index = 1;
},
else -> {
base = 8;
start_index = 1;
}
}
}
},
else -> throw InvalidBaseError("invalid base {}".fmt(base))
}

if bit_size == 0 {
bit_size = int.bits();
} else if bit_size < 0 or bit_size > 64 {
throw InvalidBitSizeError("invalid bit size {}".fmt(bit_size));
}

// Cutoff is the smallest number such that `cutoff * base > MAX_U64`.
// Use compile-time constants for common cases.
cutoff: uint := (uint.MAX / @as(uint, base)) + 1;

max_val: uint := if bit_size == 64 {
uint.MAX
} else {
(@as(uint, 1) << bit_size) - 1
};
mut underscores := false;
mut n: uint := 0;

mut i: uint := start_index;
while i < s.len : i += 1 {
c := s[i];
cl := lower(c);

mut d: uint8 := 0;
match {
(c == b'_' and base0) -> {
underscores = true;
continue;
},
(b'0' <= c and c <= b'9') -> d = c - b'0',
(b'a' <= cl and cl <= b'z') -> d = cl - b'a' + 1,
else -> throw InvalidSyntaxError("invalid syntax")
}

if d >= @as(uint8, base) {
throw InvalidSyntaxError("invalid syntax");
}

if n >= cutoff {
// `n * base` overflows
throw ValueOutOfRangeError("value out of range");
}
n *= @as(uint, base);

n1 := n + @as(uint, d);
if n1 < n or n1 > max_val {
// `n + d` overflows
throw ValueOutOfRangeError("value out of range");
}
n = n1;
}

if underscores and !underscore_ok(s) {
throw InvalidSyntaxError("invalid syntax");
}

return n;
}

/// Interprets a string `s` in the given `base` (0, 2 to 36) and bit size
/// (0 to 64) and returns the corresponding value i.
///
/// The string may begin with a leading sign: "+" or "-".
///
/// If the base argument is 0, the true base is implied by the string's
/// prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o",
/// 16 for "0x", and 10 otherwise. Also, for argument base 0 only,
/// underscore characters are permitted as defined by the Rivet syntax for
/// integer literals.
///
/// The `bit_size` argument specifies the integer type that the result must
/// fit into. Bit sizes 0, 8, 16, 32, and 64 correspond to int8, int16, int32, and
/// int. If `bit_size` is below 0 or above 64, an error is returned.
pub func parse_int(self, base: int32, mut bit_size: uint) -> !int {
if self == "" {
throw InvalidSyntaxError("invalid syntax");
}

// pick off leading sign.
mut s0 := self;
mut neg := false;
if self[0] == b'+' {
s0 = self[1..];
} else if self[0] == b'-' {
neg = true;
s0 = self[1..];
}

// convert unsigned and check range.
un := s0.parse_uint(base, bit_size)!;
if bit_size == 0 {
bit_size = int.bits();
}

cutoff := @as(uint, 1) << bit_size - 1;
if !neg and un >= cutoff {
throw ValueOutOfRangeError("value out of range");
}
if neg and un > cutoff {
throw ValueOutOfRangeError("value out of range");
}

return if neg { -@as(int, un) } else { @as(int, un) };
}
}

/// Reports whether the underscores in `s_` are allowed.
/// Checking them in this one function lets all the parsers skip over them simply.
/// Underscore must appear only between digits or between a base prefix and a digit.
func underscore_ok(s_: string) -> bool {
// saw tracks the last character (class) we saw:
// ^ for beginning of number,
// 0 for a digit or base prefix,
// _ for an underscore,
// ! for none of the above.
mut saw := b'^';
mut i: uint := 0;
mut s := s_;

// optional sign.
if s.len >= 1 and (s[0] == b'-' or s[0] == b'+') {
s = s[1..];
}

// optional base prefix.
mut hex := false;
if s.len >= 2 and s[i] == b'0' and (
lower(s[1]) == b'b' or lower(s[1]) == b'o' or lower(s[1]) == b'x'
) {
// base prefix counts as a digit for "underscore as digit separator"
i = 2;
saw = b'0';
hex = lower(s[1]) == b'x';
}

// number proper.
while i < s.len : i += 1 {
// digits are always okay.
if (b'0' <= s[i] and s[i] <= b'9') or
(hex and b'a' <= lower(s[i]) and lower(s[i]) <= b'f') {
saw = b'0';
continue;
}
// underscore must follow digit.
if s[i] == b'_' {
if saw != b'0' {
return false;
}
saw = b'_';
continue;
}
// underscore must also be followed by digit.
if saw == b'_' {
return false;
}
// saw non-digit, non-underscore.
saw = b'!';
}

return saw != b'_';
}
70 changes: 70 additions & 0 deletions lib/core/src/string_to.c.ri
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright (C) 2023 The Rivet Developers. All rights reserved.
// Use of this source code is governed by an MIT license that can
// be found in the LICENSE file.

extend string {
#[inline]
pub func to_bool(self) -> bool {
return self == "true";
}

/// Equivalent to `parse_int(self, 0, 8)`, converted to type `int8`.
#[inline]
pub func to_int8(self) -> !int8 {
return @as(int8, self.parse_int(0, 8)!);
}

/// Equivalent to `parse_int(self, 0, 16)`, converted to type `int16`.
#[inline]
pub func to_int16(self) -> !int16 {
return @as(int16, self.parse_int(0, 16)!);
}

/// Equivalent to `parse_int(self, 0, 32)`, converted to type `int32`.
#[inline]
pub func to_int32(self) -> !int32 {
return @as(int32, self.parse_int(0, 32)!);
}

/// Equivalent to `parse_int(self, 0, 64)`, converted to type `int64`.
#[inline]
pub func to_int64(self) -> !int64 {
return @as(int64, self.parse_int(0, 64)!);
}

/// Equivalent to `parse_int(self, 0, int.bits())`, converted to type `int`.
#[inline]
pub func to_int(self) -> !int {
return self.parse_int(0, int.bits())!;
}

/// Equivalent to `self.parse_uint(0, 8)`, converted to type `uint8`.
#[inline]
pub func to_uint8(self) -> !uint8 {
return @as(uint8, self.parse_uint(0, 8)!);
}

/// Equivalent to `self.parse_uint(0, 16)`, converted to type `uint16`.
#[inline]
pub func to_uint16(self) -> !uint16 {
return @as(uint16, self.parse_uint(0, 16)!);
}

/// Equivalent to `self.parse_uint(0, 32)`, converted to type `uint32`.
#[inline]
pub func to_uint32(self) -> !uint32 {
return @as(uint32, self.parse_uint(0, 32)!);
}

/// Equivalent to `self.parse_uint(0, 64)`, converted to type `uint64`.
#[inline]
pub func to_uint64(self) -> !uint64 {
return @as(uint64, self.parse_uint(0, 64)!);
}

/// Equivalent to `self.parse_uint(0, uint.bits())`, converted to type `uint`.
#[inline]
pub func to_uint(self) -> !uint {
return self.parse_uint(0, uint.bits())!;
}
}
49 changes: 49 additions & 0 deletions lib/core/tests/string_test.ri
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,52 @@ test "string.split_into_lines()" {
@assert(line == line_content or line == "");
}
}

test "string.to_int32()" {
@assert("16".to_int32()! == 16);
@assert("+16".to_int32()! == 16);
@assert("-16".to_int32()! == -16);

// invalid strings
@assert(if _ := "".to_int32() {
false
} else {
true
});
@assert(if _ := "str".to_int32() {
false
} else {
true
});
@assert(if _ := "string_longer_than_10_chars".to_int32() {
false
} else {
true
});
}

test "string.parse_int()" {
// different bases
@assert("16".parse_int(16, 0)! == 0x16);
@assert("16".parse_int(8, 0)! == 0o16);
@assert("11".parse_int(2, 0)! == 3);

// different bit sizes
@assert("127".parse_int(10, 8)! == 127);
@assert("32767".parse_int(10, 16)! == 32767);
@assert("2147483647".parse_int(10, 32)! == 2147483647);
@assert("9223372036854775807".parse_int(10, 64)! == 9223372036854775807);
@assert("baobab".parse_int(36, 64)! == 123314438);

// invalid bit sizes
@assert(if _ := "123".parse_int(10, -1) {
false
} else {
true
});
@assert(if _ := "123".parse_int(10, 65) {
false
} else {
true
});
}

0 comments on commit c4246b1

Please sign in to comment.