Skip to content

Commit

Permalink
hex, octal, and binary literals for integers.
Browse files Browse the repository at this point in the history
Note that they can't be used on the command line since SnarkVM
does the parsing there.
  • Loading branch information
mikebenfield committed Oct 10, 2024
1 parent 8854b56 commit 5a0f39c
Show file tree
Hide file tree
Showing 71 changed files with 1,211 additions and 70 deletions.
93 changes: 93 additions & 0 deletions compiler/ast/src/expressions/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,96 @@ impl Node for Literal {
}
}
}

struct DisplayDecimal<'a>(&'a Literal);

impl Literal {
/// For displaying a literal as decimal, regardless of the radix in which it was parsed.
///
/// In particular this is useful for outputting .aleo files.
pub fn display_decimal(&self) -> impl '_ + fmt::Display {
DisplayDecimal(self)
}
}

impl fmt::Display for DisplayDecimal<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.0 {
Literal::Address(address, _, _) => write!(f, "{address}"),
Literal::Boolean(boolean, _, _) => write!(f, "{boolean}"),
Literal::Field(field, _, _) => write!(f, "{field}field"),
Literal::Group(group) => write!(f, "{group}group"),
Literal::Integer(type_, value, _, _) => {
if !value.starts_with("0x")
&& !value.starts_with("-0x")
&& !value.starts_with("0b")
&& !value.starts_with("-0b")
&& !value.starts_with("0o")
&& !value.starts_with("-0o")
{
// It's already decimal.
return write!(f, "{value}{type_}");
}
let string = value.replace('_', "");
if value.starts_with('-') {
let v = i128::from_str_by_radix(&string).expect("Failed to parse integer?");
write!(f, "{v}{type_}")
} else {
let v = u128::from_str_by_radix(&string).expect("Failed to parse integer?");
write!(f, "{v}{type_}")
}
}
Literal::Scalar(scalar, _, _) => write!(f, "{scalar}scalar"),
Literal::String(string, _, _) => write!(f, "\"{string}\""),
}
}
}

/// This trait allows to parse integer literals of any type generically.
///
/// The literal may optionally start with a `-` and/or `0x` or `0o` or 0b`.
pub trait FromStrRadix: Sized {
fn from_str_by_radix(src: &str) -> Result<Self, std::num::ParseIntError>;
}

macro_rules! implement_from_str_radix {
($($ty:ident)*) => {
$(
impl FromStrRadix for $ty {
fn from_str_by_radix(src: &str) -> Result<Self, std::num::ParseIntError> {
if let Some(stripped) = src.strip_prefix("0x") {
Self::from_str_radix(stripped, 16)
} else if let Some(stripped) = src.strip_prefix("0o") {
Self::from_str_radix(stripped, 8)
} else if let Some(stripped) = src.strip_prefix("0b") {
Self::from_str_radix(stripped, 2)
} else if let Some(stripped) = src.strip_prefix("-0x") {
// We have to remove the 0x prefix and put back in a - to use
// std's parsing. Alternatively we could jump through
// a few hoops to avoid allocating.
let mut s = String::new();
s.push('-');
s.push_str(stripped);
Self::from_str_radix(&s, 16)
} else if let Some(stripped) = src.strip_prefix("-0o") {
// Ditto.
let mut s = String::new();
s.push('-');
s.push_str(stripped);
Self::from_str_radix(&s, 8)
} else if let Some(stripped) = src.strip_prefix("-0b") {
// Ditto.
let mut s = String::new();
s.push('-');
s.push_str(stripped);
Self::from_str_radix(&s, 2)
} else {
Self::from_str_radix(src, 10)
}
}
}
)*
};
}

implement_from_str_radix! { u8 u16 u32 u64 u128 i8 i16 i32 i64 i128 }
22 changes: 11 additions & 11 deletions compiler/ast/src/value/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
// You should have received a copy of the GNU General Public License
// along with the Leo library. If not, see <https://www.gnu.org/licenses/>.

use crate::{GroupLiteral, Identifier, IntegerType, Literal, NodeID, Type};
use crate::{FromStrRadix as _, GroupLiteral, Identifier, IntegerType, Literal, NodeID, Type};

use leo_errors::{FlattenError, LeoError, Result, type_name};
use leo_span::{Span, Symbol};
Expand Down Expand Up @@ -874,16 +874,16 @@ impl TryFrom<&Literal> for Value {
Literal::Integer(integer_type, raw_string, span, _) => {
let string = raw_string.replace('_', "");
match integer_type {
IntegerType::U8 => Self::U8(string.parse()?, *span),
IntegerType::U16 => Self::U16(string.parse()?, *span),
IntegerType::U32 => Self::U32(string.parse()?, *span),
IntegerType::U64 => Self::U64(string.parse()?, *span),
IntegerType::U128 => Self::U128(string.parse()?, *span),
IntegerType::I8 => Self::I8(string.parse()?, *span),
IntegerType::I16 => Self::I16(string.parse()?, *span),
IntegerType::I32 => Self::I32(string.parse()?, *span),
IntegerType::I64 => Self::I64(string.parse()?, *span),
IntegerType::I128 => Self::I128(string.parse()?, *span),
IntegerType::U8 => Self::U8(u8::from_str_by_radix(&string)?, *span),
IntegerType::U16 => Self::U16(u16::from_str_by_radix(&string)?, *span),
IntegerType::U32 => Self::U32(u32::from_str_by_radix(&string)?, *span),
IntegerType::U64 => Self::U64(u64::from_str_by_radix(&string)?, *span),
IntegerType::U128 => Self::U128(u128::from_str_by_radix(&string)?, *span),
IntegerType::I8 => Self::I8(i8::from_str_by_radix(&string)?, *span),
IntegerType::I16 => Self::I16(i16::from_str_by_radix(&string)?, *span),
IntegerType::I32 => Self::I32(i32::from_str_by_radix(&string)?, *span),
IntegerType::I64 => Self::I64(i64::from_str_by_radix(&string)?, *span),
IntegerType::I128 => Self::I128(i128::from_str_by_radix(&string)?, *span),
}
}
})
Expand Down
11 changes: 11 additions & 0 deletions compiler/parser/src/parser/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,17 @@ impl<N: Network> ParserContext<'_, N> {
let full_span = span + suffix_span;
let assert_no_whitespace = |x| assert_no_whitespace(span, suffix_span, &value, x);
match self.eat_any(INT_TYPES).then_some(&self.prev_token.token) {
// Hex, octal, binary literal on a noninteger is an error.
Some(Token::Field) | Some(Token::Group) | Some(Token::Scalar)
if value.starts_with("0x")
|| value.starts_with("0o")
|| value.starts_with("0b")
|| value.starts_with("-0x")
|| value.starts_with("-0o")
|| value.starts_with("-0b") =>
{
return Err(ParserError::hexbin_literal_nonintegers(span).into());
}
// Literal followed by `field`, e.g., `42field`.
Some(Token::Field) => {
assert_no_whitespace("field")?;
Expand Down
37 changes: 27 additions & 10 deletions compiler/parser/src/tokenizer/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,25 +160,42 @@ impl Token {
/// If there is input but no integer, this function returns the tuple consisting of
/// length 0 and a dummy integer token that contains an empty string.
/// However, this function is always called when the next character is a digit.
/// This function eats a sequence of one or more digits and underscores
/// (starting from a digit, as explained above, given when it is called),
/// This function eats a sequence representing a decimal numeral, a hex
/// numeral (beginning with `0x`), an octal numeral (beginning with '0o'),
/// or a binary numeral (beginning with `0b`), optionally including underscores,
/// which corresponds to a numeral in the ABNF grammar.
fn eat_integer(input: &mut Peekable<impl Iterator<Item = char>>) -> Result<(usize, Token)> {
if input.peek().is_none() {
return Err(ParserError::lexer_empty_input().into());
}

if !input.peek().unwrap().is_ascii_digit() {
return Ok((0, Token::Integer("".into())));
}

let mut int = String::new();

// Note that it is still impossible to have a number that starts with an `_` because eat_integer is only called when the first character is a digit.
while let Some(c) = input.next_if(|c| c.is_ascii_digit() || *c == '_') {
if c == '0' && matches!(input.peek(), Some('x')) {
int.push(c);
int.push(input.next().unwrap());
return Err(ParserError::lexer_hex_number_provided(int).into());
}
let first = input.next().unwrap();
int.push(first);
if first == '0' && (input.peek() == Some(&'x') || input.peek() == Some(&'o') || input.peek() == Some(&'b')) {
int.push(input.next().unwrap());
}

// Allow only uppercase hex digits, so as not to interfere with parsing the `field` suffix.
int.extend(input.take_while(|&c| c.is_ascii_digit() || c == '_' || c.is_ascii_uppercase()));

let (s, radix) = if let Some(s) = int.strip_prefix("0x") {
(s, 16)
} else if let Some(s) = int.strip_prefix("0o") {
(s, 8)
} else if let Some(s) = int.strip_prefix("0b") {
(s, 2)
} else {
(int.as_str(), 10)
};

int.push(c);
if let Some(c) = s.chars().find(|&c| c != '_' && !c.is_digit(radix)) {
return Err(ParserError::wrong_digit_for_radix(c, radix, int).into());
}

Ok((int.len(), Token::Integer(int)))
Expand Down
4 changes: 3 additions & 1 deletion compiler/passes/src/code_generation/visit_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@ impl<'a> CodeGenerator<'a> {
}

fn visit_value(&mut self, input: &'a Literal) -> (String, String) {
(format!("{input}"), String::new())
// AVM can only parse decimal numbers.
let decimal_input = input.display_decimal();
(format!("{decimal_input}"), String::new())
}

fn visit_locator(&mut self, input: &'a LocatorExpression) -> (String, String) {
Expand Down
5 changes: 2 additions & 3 deletions compiler/passes/src/type_checking/check_expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ use leo_span::{Span, Symbol, sym};
use snarkvm::console::network::Network;

use itertools::Itertools;
use std::str::FromStr;

fn return_incorrect_type(t1: Option<Type>, t2: Option<Type>, expected: &Option<Type>) -> Option<Type> {
match (t1, t2) {
Expand Down Expand Up @@ -883,9 +882,9 @@ impl<'a, N: Network> ExpressionVisitor<'a> for TypeChecker<'a, N> {
}

fn visit_literal(&mut self, input: &'a Literal, expected: &Self::AdditionalInput) -> Self::Output {
fn parse_integer_literal<I: FromStr>(handler: &Handler, raw_string: &str, span: Span, type_string: &str) {
fn parse_integer_literal<I: FromStrRadix>(handler: &Handler, raw_string: &str, span: Span, type_string: &str) {
let string = raw_string.replace('_', "");
if string.parse::<I>().is_err() {
if I::from_str_by_radix(&string).is_err() {
handler.emit_err(TypeCheckerError::invalid_int_value(string, type_string, span));
}
}
Expand Down
15 changes: 15 additions & 0 deletions errors/src/errors/parser/parser_errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ create_messages!(
}

/// When a hex number is provided.
// TODO This error is unused. Remove it in a future version.
@backtraced
lexer_hex_number_provided {
args: (input: impl Display),
Expand Down Expand Up @@ -350,4 +351,18 @@ create_messages!(
msg: "Each member declaration in a struct or record must be followed by a comma (except the last).",
help: None,
}

@formatted
hexbin_literal_nonintegers {
args: (),
msg: format!("Hex, octal, and binary literals may only be used for integer types."),
help: None,
}

@backtraced
wrong_digit_for_radix {
args: (digit: char, radix: u32, token: String),
msg: format!("Digit {digit} invalid in radix {radix} (token {token})."),
help: None,
}
);
18 changes: 18 additions & 0 deletions tests/expectations/compiler/integers/i128/hex_and_bin.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
namespace: Compile
expectation: Pass
outputs:
- - compile:
- initial_symbol_table: b8fa9541b26d44c8348e7ded98e6bd1b235ee9c02e632192fb2da00234e84c5c
type_checked_symbol_table: 9cc9c032ecfa8ab95f117b6c2f6b5f77335cdc444604a28ddca7ea204c620277
unrolled_symbol_table: 9cc9c032ecfa8ab95f117b6c2f6b5f77335cdc444604a28ddca7ea204c620277
initial_ast: 1e34f3c30849e79eeba6b5ca17e438efb5bf24fa3ceb50ddc00133a8474be73c
unrolled_ast: 1e34f3c30849e79eeba6b5ca17e438efb5bf24fa3ceb50ddc00133a8474be73c
ssa_ast: 6926cc34fac90a4bed0c819030cf61f0f614e539e4b85b63c2abb3b8776cf532
flattened_ast: 108e01348ff9fd5dbbc00cc94208f98c5a5782a86ddf452ee59602c5bfe046f4
destructured_ast: d05df9936ea9ab1e577e8d4a14046ccd75af35b76dab552119728a2d240ed83e
inlined_ast: d05df9936ea9ab1e577e8d4a14046ccd75af35b76dab552119728a2d240ed83e
dce_ast: d05df9936ea9ab1e577e8d4a14046ccd75af35b76dab552119728a2d240ed83e
bytecode: 7b4ac23af746311a65be3314562eb9247cacd4b29ed1ea20d04b0dc8c06eef19
errors: ""
warnings: ""
18 changes: 18 additions & 0 deletions tests/expectations/compiler/integers/i16/hex_and_bin.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
namespace: Compile
expectation: Pass
outputs:
- - compile:
- initial_symbol_table: 29029f1a16e25967a35a6ed07eb857a7a14c2aff01cbe5cc53065fb37de75e33
type_checked_symbol_table: b18c7816d649a45de2cb8867c4d7f791b48da710b8a68be9ad2e81a8acebbf09
unrolled_symbol_table: b18c7816d649a45de2cb8867c4d7f791b48da710b8a68be9ad2e81a8acebbf09
initial_ast: 40da0941a9022d4da4860586a31ab26d1062f58eb5c254a8729160cc34d62f30
unrolled_ast: 40da0941a9022d4da4860586a31ab26d1062f58eb5c254a8729160cc34d62f30
ssa_ast: 10914c235af1410bcc77ba6b7ef1378d414b655086650d6b6814b5adfa29cb17
flattened_ast: 042a582d89960ccf8683decbf3a0b3b44c66d53f8922ef878ae83317c16eb777
destructured_ast: f9434ee8cb2104ba7085d97a57f5e9ff6c25a30064fb1b7cfebcd6c1726b54d6
inlined_ast: f9434ee8cb2104ba7085d97a57f5e9ff6c25a30064fb1b7cfebcd6c1726b54d6
dce_ast: f9434ee8cb2104ba7085d97a57f5e9ff6c25a30064fb1b7cfebcd6c1726b54d6
bytecode: abbe9790219732acac590659c2aa27351a46df65ca5cc03c9def1889d5642d2d
errors: ""
warnings: ""
18 changes: 18 additions & 0 deletions tests/expectations/compiler/integers/i32/hex_and_bin.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
namespace: Compile
expectation: Pass
outputs:
- - compile:
- initial_symbol_table: 64448615fb54198853ec0db1342703f538e30c529c17dc1de8ce21e73cb9aec0
type_checked_symbol_table: f91e02cc50da5608d6a4c2ddbb06a08d8887d7a271437ccc1f4f2dc7df4feb8b
unrolled_symbol_table: f91e02cc50da5608d6a4c2ddbb06a08d8887d7a271437ccc1f4f2dc7df4feb8b
initial_ast: d6e00a4cb2abea938e9eb0af35e058f5eb749a8d97dc56e434553511cfbf5bdc
unrolled_ast: d6e00a4cb2abea938e9eb0af35e058f5eb749a8d97dc56e434553511cfbf5bdc
ssa_ast: 9c01e57aa0ee03cf03b7da15f64c40a6fce773dac8afda6e8722f5d8d16d7730
flattened_ast: 5222a287ea1a208cb950a77630e32621375e0f9d1c78ee74828482c1a3d2d5fa
destructured_ast: 4dd127a21d0013d590115c8f373787c105d754b97ec0fc5e5014c7ddb32b6011
inlined_ast: 4dd127a21d0013d590115c8f373787c105d754b97ec0fc5e5014c7ddb32b6011
dce_ast: 4dd127a21d0013d590115c8f373787c105d754b97ec0fc5e5014c7ddb32b6011
bytecode: 60f3f7cf4996cce21e8854a05cd0018dbbc6f3836e82f110a7327a48d826604a
errors: ""
warnings: ""
18 changes: 18 additions & 0 deletions tests/expectations/compiler/integers/i64/hex_and_bin.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
namespace: Compile
expectation: Pass
outputs:
- - compile:
- initial_symbol_table: d2e895a60db1d09efc2efbc085307b6c978f705bc7b83dcaaaec16237b67878e
type_checked_symbol_table: 7933630b1e312d9c233d37513c560c18cadd072f2cead39e6b3d4de920808607
unrolled_symbol_table: 7933630b1e312d9c233d37513c560c18cadd072f2cead39e6b3d4de920808607
initial_ast: a60390cab967e18cfdb89052fca822bd3a0fe13862413bc7aa12c7c7f968325e
unrolled_ast: a60390cab967e18cfdb89052fca822bd3a0fe13862413bc7aa12c7c7f968325e
ssa_ast: c0e2803b8c6ad33be5427dc1b78c3c6b92c770ce5836c0a4f78ae8d3e46b86eb
flattened_ast: e67049b2d3a44fb83a96aaf3c96e133fbaebd0d811626d42c820a24679c5ae69
destructured_ast: 56c0713c15d43bba27dbcc87637b33c3aff98fa3c7c07e3ba77c38d9166eac81
inlined_ast: 56c0713c15d43bba27dbcc87637b33c3aff98fa3c7c07e3ba77c38d9166eac81
dce_ast: 56c0713c15d43bba27dbcc87637b33c3aff98fa3c7c07e3ba77c38d9166eac81
bytecode: a5e5d9c09c3ad932e4e2c7e04964675bfc54a27e86729fa99520a13c79cbdc81
errors: ""
warnings: ""
18 changes: 18 additions & 0 deletions tests/expectations/compiler/integers/i8/hex_and_bin.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
namespace: Compile
expectation: Pass
outputs:
- - compile:
- initial_symbol_table: 353cdbed822b512ef65bfb0724f108688f363f7e2a7c8ea3719bd71c4975aa48
type_checked_symbol_table: dc71d504735e6f386b32e9e3e51be6a8f59b661143983031532ba5ce27037597
unrolled_symbol_table: dc71d504735e6f386b32e9e3e51be6a8f59b661143983031532ba5ce27037597
initial_ast: 5403d97f87cbb0fcfe6f7a526d4f798caec145ec178e4779b9c710b52bb08ffd
unrolled_ast: 5403d97f87cbb0fcfe6f7a526d4f798caec145ec178e4779b9c710b52bb08ffd
ssa_ast: 3ab6fca47f539d5a00a8b4228c98a4d1dcbc2a568ba673d26d619cf1746607aa
flattened_ast: dbc980ee51c6a3b45d97a4b732f7c8b4c28655b83955417b2160fd3f8875c2c9
destructured_ast: 10545e3b44c9d34d4dd57f0028b1c3c7740d29f8623b71c27481e41d65cdd7c3
inlined_ast: 10545e3b44c9d34d4dd57f0028b1c3c7740d29f8623b71c27481e41d65cdd7c3
dce_ast: 10545e3b44c9d34d4dd57f0028b1c3c7740d29f8623b71c27481e41d65cdd7c3
bytecode: 576d91d751607b6f8045f511d39072799e174dd899e9962b1dadc94843836620
errors: ""
warnings: ""
18 changes: 18 additions & 0 deletions tests/expectations/compiler/integers/u128/hex_and_bin.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
namespace: Compile
expectation: Pass
outputs:
- - compile:
- initial_symbol_table: d900dfe2093ee00a1f8dbfe32766f6714c19e7aa625af411c14f3e1e4ace62d5
type_checked_symbol_table: 7f9f635729dc71d9463d8a943f79216857dd19cea5770713ab474a9f37aa7604
unrolled_symbol_table: 7f9f635729dc71d9463d8a943f79216857dd19cea5770713ab474a9f37aa7604
initial_ast: ea2d4e0f8cad656cba8604d765d5094b4a72aa2b5ce13057b8700a669117f279
unrolled_ast: ea2d4e0f8cad656cba8604d765d5094b4a72aa2b5ce13057b8700a669117f279
ssa_ast: 72472d8b6907245f07f890263095ffcac1e321306511a59ceb34dd8139c28265
flattened_ast: 2c68ce2e5dc1d0a10cb5d89602ca4f51a6d300b42e8fe23eb08163b7d1f3eaca
destructured_ast: 04e6742406ee38042a22e02f0eb9cb3012758c9c3f4d32ef67e976f15c958283
inlined_ast: 04e6742406ee38042a22e02f0eb9cb3012758c9c3f4d32ef67e976f15c958283
dce_ast: 04e6742406ee38042a22e02f0eb9cb3012758c9c3f4d32ef67e976f15c958283
bytecode: 82dd565867bf6030a6f9522fb0cd8e746d4c9ab5cd126fee6bf24b3f3ee210c6
errors: ""
warnings: ""
5 changes: 5 additions & 0 deletions tests/expectations/compiler/integers/u128/hex_min_fail.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
namespace: Compile
expectation: Fail
outputs:
- "Error [ETYC0372008]: The value -0x1 is not a valid `u128`\n --> compiler-test:5:23\n |\n 5 | let a: u128 = -0x1u128;\n | ^^^^^^^^\nError [ETYC0372083]: A program must have at least one transition function.\n --> compiler-test:1:1\n |\n 1 | \n 2 | \n 3 | program test.aleo { \n | ^^^^^^^^^^^^\n"
Loading

0 comments on commit 5a0f39c

Please sign in to comment.