diff --git a/Cargo.toml b/Cargo.toml index a7c980d..e6b8266 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cexpr" -version = "0.3.6" +version = "0.4.0" edition = "2018" authors = ["Jethro Beekman "] license = "Apache-2.0/MIT" @@ -13,7 +13,7 @@ keywords = ["C","expression","parser"] travis-ci = { repository = "jethrogb/rust-cexpr" } [dependencies] -nom = {version = "^4", features = ["verbose-errors"] } +nom = "5" [dev-dependencies] clang-sys = ">= 0.13.0, < 0.29.0" diff --git a/src/expr.rs b/src/expr.rs index 3bf3563..b1fbfb2 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -27,8 +27,13 @@ use std::ops::{ }; use crate::literal::{self, CChar}; -use crate::nom_crate::*; use crate::token::{Kind as TokenKind, Token}; +use crate::ToCexprResult; +use nom::branch::alt; +use nom::combinator::{complete, map, map_opt}; +use nom::multi::{fold_many0, many0, separated_list}; +use nom::sequence::{delimited, pair, preceded}; +use nom::*; /// Expression parser/evaluator that supports identifiers. #[derive(Debug)] @@ -38,10 +43,12 @@ pub struct IdentifierParser<'ident> { #[derive(Copy, Clone)] struct PRef<'a>(&'a IdentifierParser<'a>); -pub type CResult<'a, R> = IResult<&'a [Token], R, crate::Error>; +/// A shorthand for the type of cexpr expression evaluation results. +pub type CResult<'a, R> = IResult<&'a [Token], R, crate::Error<&'a [Token]>>; /// The result of parsing a literal or evaluating an expression. #[derive(Debug, Clone, PartialEq)] +#[allow(missing_docs)] pub enum EvalResult { Int(Wrapping), Float(f64), @@ -53,6 +60,7 @@ pub enum EvalResult { macro_rules! result_opt ( (fn $n:ident: $e:ident -> $t:ty) => ( #[allow(dead_code)] + #[allow(clippy::wrong_self_convention)] fn $n(self) -> Option<$t> { if let EvalResult::$e(v) = self { Some(v) @@ -69,6 +77,7 @@ impl EvalResult { result_opt!(fn as_char: Char -> CChar); result_opt!(fn as_str: Str -> Vec); + #[allow(clippy::wrong_self_convention)] fn as_numeric(self) -> Option { match self { EvalResult::Int(_) | EvalResult::Float(_) => Some(self), @@ -88,89 +97,76 @@ impl From> for EvalResult { // =========================================== macro_rules! exact_token ( - ($i:expr, $k:ident, $c:expr) => ({ - if $i.is_empty() { - let res: CResult<'_, &[u8]> = Err(crate::nom_crate::Err::Incomplete(Needed::Size($c.len()))); + ($k:ident, $c:expr) => ({ + move |input: &[Token]| { + if input.is_empty() { + let res: CResult<'_, &[u8]> = Err(crate::nom::Err::Incomplete(Needed::Size($c.len()))); res } else { - if $i[0].kind==TokenKind::$k && &$i[0].raw[..]==$c { - Ok((&$i[1..], &$i[0].raw[..])) + if input[0].kind==TokenKind::$k && &input[0].raw[..]==$c { + Ok((&input[1..], &input[0].raw[..])) } else { - Err(crate::nom_crate::Err::Error(error_position!($i, ErrorKind::Custom(crate::Error::ExactToken(TokenKind::$k,$c))))) + Err(crate::nom::Err::Error((input, crate::ErrorKind::ExactToken(TokenKind::$k,$c)).into())) } } + } }); ); macro_rules! typed_token ( - ($i:expr, $k:ident) => ({ - if $i.is_empty() { - let res: CResult<'_, &[u8]> = Err(crate::nom_crate::Err::Incomplete(Needed::Size(1))); + ($k:ident) => ({ + move |input: &[Token]| { + if input.is_empty() { + let res: CResult<'_, &[u8]> = Err(nom::Err::Incomplete(Needed::Size(1))); res } else { - if $i[0].kind==TokenKind::$k { - Ok((&$i[1..], &$i[0].raw[..])) + if input[0].kind==TokenKind::$k { + Ok((&input[1..], &input[0].raw[..])) } else { - Err(Err::Error(error_position!($i, ErrorKind::Custom(crate::Error::TypedToken(TokenKind::$k))))) + Err(crate::nom::Err::Error((input, crate::ErrorKind::TypedToken(TokenKind::$k)).into())) } } + } }); ); -#[allow(unused_macros)] -macro_rules! any_token ( - ($i:expr,) => ({ - if $i.is_empty() { - let res: CResult<'_, &Token> = Err(::nom_crate::Err::Incomplete(Needed::Size(1))); - res - } else { - Ok((&$i[1..], &$i[0])) - } - }); -); - -macro_rules! p ( - ($i:expr, $c:expr) => (exact_token!($i,Punctuation,$c.as_bytes())) -); +#[allow(dead_code)] +fn any_token(input: &[Token]) -> CResult<'_, &Token> { + if input.is_empty() { + Err(crate::nom::Err::Incomplete(Needed::Size(1))) + } else { + Ok((&input[1..], &input[0])) + } +} -macro_rules! one_of_punctuation ( - ($i:expr, $c:expr) => ({ - if $i.is_empty() { - let min = $c.iter().map(|opt|opt.len()).min().expect("at least one option"); - let res: CResult<'_, &[u8]> = Err(crate::nom_crate::Err::Incomplete(Needed::Size(min))); - res - } else { - if $i[0].kind==TokenKind::Punctuation && $c.iter().any(|opt|opt.as_bytes()==&$i[0].raw[..]) { - Ok((&$i[1..], &$i[0].raw[..])) - } else { - const VALID_VALUES: &'static [&'static str] = &$c; - Err(Err::Error(error_position!($i, ErrorKind::Custom(crate::Error::ExactTokens(TokenKind::Punctuation,VALID_VALUES))))) - } - } - }); -); +fn p(c: &'static str) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> { + exact_token!(Punctuation, c.as_bytes()) +} -/// equivalent to nom's complete! macro, but adds the custom error type -#[macro_export] -macro_rules! comp ( - ($i:expr, $submac:ident!( $($args:tt)* )) => ( - { - use crate::nom_crate::lib::std::result::Result::*; - use crate::nom_crate::{Err,ErrorKind}; - - let i_ = $i.clone(); - match $submac!(i_, $($args)*) { - Err(Err::Incomplete(_)) => { - Err(Err::Error(error_position!($i, ErrorKind::Complete::))) - }, - rest => rest - } - } - ); - ($i:expr, $f:expr) => ( - comp!($i, call!($f)); - ); -); +fn one_of_punctuation(c: &'static [&'static str]) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> { + move |input| { + if input.is_empty() { + let min = c + .iter() + .map(|opt| opt.len()) + .min() + .expect("at least one option"); + Err(crate::nom::Err::Incomplete(Needed::Size(min))) + } else if input[0].kind == TokenKind::Punctuation + && c.iter().any(|opt| opt.as_bytes() == &input[0].raw[..]) + { + Ok((&input[1..], &input[0].raw[..])) + } else { + Err(crate::nom::Err::Error( + ( + input, + crate::ErrorKind::ExactTokens(TokenKind::Punctuation, c), + ) + .into(), + )) + } + } +} // ================================================== // ============= Numeric expressions ================ @@ -297,120 +293,121 @@ fn unary_op(input: (&[u8], EvalResult)) -> Option { } } -macro_rules! numeric ( - ($i:expr, $submac:ident!( $($args:tt)* )) => (map_opt!($i,$submac!($($args)*),EvalResult::as_numeric)); - ($i:expr, $f:expr ) => (map_opt!($i,call!($f),EvalResult::as_numeric)); -); +fn numeric, F>( + f: F, +) -> impl Fn(I) -> nom::IResult +where + F: Fn(I) -> nom::IResult, +{ + nom::combinator::map_opt(f, EvalResult::as_numeric) +} impl<'a> PRef<'a> { - method!(unary,&[Token],EvalResult,crate::Error>, mut self, - alt!( - delimited!(p!("("),call_m!(self.numeric_expr),p!(")")) | - numeric!(call_m!(self.literal)) | - numeric!(call_m!(self.identifier)) | - map_opt!(pair!(one_of_punctuation!(["+", "-", "~"]),call_m!(self.unary)),unary_op) - ) - ); + fn unary(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + alt(( + delimited(p("("), |i| self.numeric_expr(i), p(")")), + numeric(|i| self.literal(i)), + numeric(|i| self.identifier(i)), + map_opt( + pair(one_of_punctuation(&["+", "-", "~"][..]), |i| self.unary(i)), + unary_op, + ), + ))(input) + } - method!(mul_div_rem,&[Token],EvalResult,crate::Error>, mut self, - do_parse!( - acc: call_m!(self.unary) >> - res: fold_many0!( - pair!(comp!(one_of_punctuation!(["*", "/", "%"])), call_m!(self.unary)), - acc, - |mut acc, (op, val): (&[u8], EvalResult)| { - match op[0] as char { - '*' => acc *= &val, - '/' => acc /= &val, - '%' => acc %= &val, - _ => unreachable!() - }; - acc - } - ) >> (res) - ) - ); + fn mul_div_rem(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + let (input, acc) = self.unary(input)?; + fold_many0( + pair(complete(one_of_punctuation(&["*", "/", "%"][..])), |i| { + self.unary(i) + }), + acc, + |mut acc, (op, val): (&[u8], EvalResult)| { + match op[0] as char { + '*' => acc *= &val, + '/' => acc /= &val, + '%' => acc %= &val, + _ => unreachable!(), + }; + acc + }, + )(input) + } - method!(add_sub,&[Token],EvalResult,crate::Error>, mut self, - do_parse!( - acc: call_m!(self.mul_div_rem) >> - res: fold_many0!( - pair!(comp!(one_of_punctuation!(["+", "-"])), call_m!(self.mul_div_rem)), - acc, - |mut acc, (op, val): (&[u8], EvalResult)| { - match op[0] as char { - '+' => acc += &val, - '-' => acc -= &val, - _ => unreachable!() - }; - acc - } - ) >> (res) - ) - ); + fn add_sub(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + let (input, acc) = self.mul_div_rem(input)?; + fold_many0( + pair(complete(one_of_punctuation(&["+", "-"][..])), |i| { + self.mul_div_rem(i) + }), + acc, + |mut acc, (op, val): (&[u8], EvalResult)| { + match op[0] as char { + '+' => acc += &val, + '-' => acc -= &val, + _ => unreachable!(), + }; + acc + }, + )(input) + } - method!(shl_shr,&[Token],EvalResult,crate::Error>, mut self, - numeric!(do_parse!( - acc: call_m!(self.add_sub) >> - res: fold_many0!( - pair!(comp!(one_of_punctuation!(["<<", ">>"])), call_m!(self.add_sub)), - acc, - |mut acc, (op, val): (&[u8], EvalResult)| { - match op { - b"<<" => acc <<= &val, - b">>" => acc >>= &val, - _ => unreachable!() - }; - acc - } - ) >> (res) - )) - ); + fn shl_shr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + let (input, acc) = self.add_sub(input)?; + numeric(fold_many0( + pair(complete(one_of_punctuation(&["<<", ">>"][..])), |i| { + self.add_sub(i) + }), + acc, + |mut acc, (op, val): (&[u8], EvalResult)| { + match op { + b"<<" => acc <<= &val, + b">>" => acc >>= &val, + _ => unreachable!(), + }; + acc + }, + ))(input) + } - method!(and,&[Token],EvalResult,crate::Error>, mut self, - numeric!(do_parse!( - acc: call_m!(self.shl_shr) >> - res: fold_many0!( - preceded!(comp!(p!("&")), call_m!(self.shl_shr)), - acc, - |mut acc, val: EvalResult| { - acc &= &val; - acc - } - ) >> (res) - )) - ); + fn and(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + let (input, acc) = self.shl_shr(input)?; + numeric(fold_many0( + preceded(complete(p("&")), |i| self.shl_shr(i)), + acc, + |mut acc, val: EvalResult| { + acc &= &val; + acc + }, + ))(input) + } - method!(xor,&[Token],EvalResult,crate::Error>, mut self, - numeric!(do_parse!( - acc: call_m!(self.and) >> - res: fold_many0!( - preceded!(comp!(p!("^")), call_m!(self.and)), - acc, - |mut acc, val: EvalResult| { - acc ^= &val; - acc - } - ) >> (res) - )) - ); + fn xor(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + let (input, acc) = self.and(input)?; + numeric(fold_many0( + preceded(complete(p("^")), |i| self.and(i)), + acc, + |mut acc, val: EvalResult| { + acc ^= &val; + acc + }, + ))(input) + } - method!(or,&[Token],EvalResult,crate::Error>, mut self, - numeric!(do_parse!( - acc: call_m!(self.xor) >> - res: fold_many0!( - preceded!(comp!(p!("|")), call_m!(self.xor)), - acc, - |mut acc, val: EvalResult| { - acc |= &val; - acc - } - ) >> (res) - )) - ); + fn or(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + let (input, acc) = self.xor(input)?; + numeric(fold_many0( + preceded(complete(p("|")), |i| self.xor(i)), + acc, + |mut acc, val: EvalResult| { + acc |= &val; + acc + }, + ))(input) + } #[inline(always)] - fn numeric_expr(self, input: &[Token]) -> (Self, CResult<'_, EvalResult>) { + fn numeric_expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { self.or(input) } } @@ -420,89 +417,87 @@ impl<'a> PRef<'a> { // ======================================================= impl<'a> PRef<'a> { - fn identifier(self, input: &[Token]) -> (Self, CResult<'_, EvalResult>) { - ( - self, - match input.split_first() { - None => Err(Err::Incomplete(Needed::Size(1))), - Some(( - &Token { - kind: TokenKind::Identifier, - ref raw, - }, - rest, - )) => { - if let Some(r) = self.identifiers.get(&raw[..]) { - Ok((rest, r.clone())) - } else { - Err(Err::Error(error_position!( - input, - ErrorKind::Custom(crate::Error::UnknownIdentifier) - ))) - } + fn identifier(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + match input.split_first() { + None => Err(Err::Incomplete(Needed::Size(1))), + Some(( + &Token { + kind: TokenKind::Identifier, + ref raw, + }, + rest, + )) => { + if let Some(r) = self.identifiers.get(&raw[..]) { + Ok((rest, r.clone())) + } else { + Err(Err::Error( + (input, crate::ErrorKind::UnknownIdentifier).into(), + )) } - Some(_) => Err(Err::Error(error_position!( - input, - ErrorKind::Custom(crate::Error::TypedToken(TokenKind::Identifier)) - ))), - }, - ) - } - - fn literal(self, input: &[Token]) -> (Self, CResult<'_, EvalResult>) { - ( - self, - match input.split_first() { - None => Err(Err::Incomplete(Needed::Size(1))), - Some(( - &Token { - kind: TokenKind::Literal, - ref raw, - }, - rest, - )) => match literal::parse(raw) { - Ok((_, result)) => Ok((rest, result)), - _ => Err(Err::Error(error_position!( - input, - ErrorKind::Custom(crate::Error::InvalidLiteral) - ))), + } + Some(_) => Err(Err::Error( + (input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into(), + )), + } + } + + fn literal(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + match input.split_first() { + None => Err(Err::Incomplete(Needed::Size(1))), + Some(( + &Token { + kind: TokenKind::Literal, + ref raw, }, - Some(_) => Err(Err::Error(error_position!( - input, - ErrorKind::Custom(crate::Error::TypedToken(TokenKind::Literal)) - ))), + rest, + )) => match literal::parse(raw) { + Ok((_, result)) => Ok((rest, result)), + _ => Err(Err::Error((input, crate::ErrorKind::InvalidLiteral).into())), }, - ) + Some(_) => Err(Err::Error( + (input, crate::ErrorKind::TypedToken(TokenKind::Literal)).into(), + )), + } } - method!(string,&[Token],Vec,crate::Error>, mut self, - alt!( - map_opt!(call_m!(self.literal),EvalResult::as_str) | - map_opt!(call_m!(self.identifier),EvalResult::as_str) - ) - ); + fn string(self, input: &'_ [Token]) -> CResult<'_, Vec> { + alt(( + map_opt(|i| self.literal(i), EvalResult::as_str), + map_opt(|i| self.identifier(i), EvalResult::as_str), + ))(input) + .to_cexpr_result() + } // "string1" "string2" etc... - method!(concat_str,&[Token],EvalResult,crate::Error>, mut self, - map!( - pair!(call_m!(self.string),many0!(comp!(call_m!(self.string)))), - |(first,v)| Vec::into_iter(v).fold(first,|mut s,elem|{Vec::extend_from_slice(&mut s,Vec::::as_slice(&elem));s}).into() - ) - ); + fn concat_str(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + map( + pair(|i| self.string(i), many0(complete(|i| self.string(i)))), + |(first, v)| { + Vec::into_iter(v) + .fold(first, |mut s, elem| { + Vec::extend_from_slice(&mut s, Vec::::as_slice(&elem)); + s + }) + .into() + }, + )(input) + .to_cexpr_result() + } - method!(expr,&[Token],EvalResult,crate::Error>, mut self, - alt!( - call_m!(self.numeric_expr) | - delimited!(p!("("),call_m!(self.expr),p!(")")) | - call_m!(self.concat_str) | - call_m!(self.literal) | - call_m!(self.identifier) - ) - ); + fn expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { + alt(( + |i| self.numeric_expr(i), + delimited(p("("), |i| self.expr(i), p(")")), + |i| self.concat_str(i), + |i| self.literal(i), + |i| self.identifier(i), + ))(input) + .to_cexpr_result() + } - method!(macro_definition,&[Token],(&[u8],EvalResult),crate::Error>, mut self, - pair!(typed_token!(Identifier),call_m!(self.expr)) - ); + fn macro_definition(self, input: &'_ [Token]) -> CResult<'_, (&'_ [u8], EvalResult)> { + pair(typed_token!(Identifier), |i| self.expr(i))(input) + } } impl<'a> ::std::ops::Deref for PRef<'a> { @@ -521,9 +516,7 @@ impl<'ident> IdentifierParser<'ident> { /// a known identifier is encountered during parsing, it is substituted /// for the value specified. pub fn new(identifiers: &HashMap, EvalResult>) -> IdentifierParser<'_> { - IdentifierParser { - identifiers: identifiers, - } + IdentifierParser { identifiers } } /// Parse and evalute an expression of a list of tokens. @@ -531,7 +524,7 @@ impl<'ident> IdentifierParser<'ident> { /// Returns an error if the input is not a valid expression or if the token /// stream contains comments, keywords or unknown identifiers. pub fn expr<'a>(&self, input: &'a [Token]) -> CResult<'a, EvalResult> { - self.as_ref().expr(input).1 + self.as_ref().expr(input) } /// Parse and evaluate a macro definition from of a list of tokens. @@ -555,7 +548,7 @@ impl<'ident> IdentifierParser<'ident> { /// #define NEGATIVE_THREE(IDENTIFIER) -3 /// ``` pub fn macro_definition<'a>(&self, input: &'a [Token]) -> CResult<'a, (&'a [u8], EvalResult)> { - crate::assert_full_parse(self.as_ref().macro_definition(input).1) + crate::assert_full_parse(self.as_ref().macro_definition(input)) } } @@ -563,7 +556,7 @@ impl<'ident> IdentifierParser<'ident> { /// /// Returns an error if the input is not a valid expression or if the token /// stream contains comments, keywords or identifiers. -pub fn expr<'a>(input: &'a [Token]) -> CResult<'a, EvalResult> { +pub fn expr(input: &[Token]) -> CResult<'_, EvalResult> { IdentifierParser::new(&HashMap::new()).expr(input) } @@ -575,11 +568,10 @@ pub fn expr<'a>(input: &'a [Token]) -> CResult<'a, EvalResult> { /// Returns an error if the replacement is not a valid expression, if called /// on a function-like macro, or if the token stream contains comments, /// keywords or identifiers. -pub fn macro_definition<'a>(input: &'a [Token]) -> CResult<'a, (&'a [u8], EvalResult)> { +pub fn macro_definition(input: &[Token]) -> CResult<'_, (&'_ [u8], EvalResult)> { IdentifierParser::new(&HashMap::new()).macro_definition(input) } -named_attr!( /// Parse a functional macro declaration from a list of tokens. /// /// Returns the identifier for the macro and the argument list (in order). The @@ -619,13 +611,13 @@ named_attr!( /// let (_, evaluated) = assert_full_parse(IdentifierParser::new(&idents).expr(expr)).unwrap(); /// assert_eq!(evaluated, EvalResult::Str(b"testsuffix".to_vec())); /// ``` -,pub fn_macro_declaration<&[Token],(&[u8],Vec<&[u8]>),crate::Error>, - pair!( - typed_token!(Identifier), - delimited!( - p!("("), - separated_list!(p!(","), typed_token!(Identifier)), - p!(")") - ) - ) -); +pub fn fn_macro_declaration(input: &[Token]) -> CResult<'_, (&[u8], Vec<&[u8]>)> { + pair( + typed_token!(Identifier), + delimited( + p("("), + separated_list(p(","), typed_token!(Identifier)), + p(")"), + ), + )(input) +} diff --git a/src/lib.rs b/src/lib.rs index 3458523..84e1e83 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,25 +5,29 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +//! A C expression parser and evaluator. +//! +//! This crate provides methods for parsing and evaluating simple C expressions. In general, the +//! crate can handle most arithmetic expressions that would appear in macros or the definition of +//! constants, as well as string and character constants. +//! +//! The main entry point for is [`token::parse`], which parses a byte string and returns its +//! evaluated value. #![warn(rust_2018_idioms)] +#![warn(missing_docs)] #![allow(deprecated)] -#[macro_use] -extern crate nom as nom_crate; - pub mod nom { //! nom's result types, re-exported. - pub use crate::nom_crate::{Err, ErrorKind, IResult, Needed}; + pub use nom::{error::ErrorKind, Err, IResult, Needed}; } pub mod expr; pub mod literal; pub mod token; -use crate::nom::*; - -#[derive(Debug)] /// Parsing errors specific to C parsing -pub enum Error { +#[derive(Debug)] +pub enum ErrorKind { /// Expected the specified token ExactToken(token::Kind, &'static [u8]), /// Expected one of the specified tokens @@ -39,35 +43,98 @@ pub enum Error { InvalidLiteral, /// A full parse was requested, but data was left over after parsing finished. Partial, + /// An error occurred in an underlying nom parser. + Parser(nom::ErrorKind), } -impl From for Error { +impl From for ErrorKind { + fn from(k: nom::ErrorKind) -> Self { + ErrorKind::Parser(k) + } +} + +impl From for ErrorKind { fn from(_: u32) -> Self { - Error::InvalidLiteral + ErrorKind::InvalidLiteral } } -macro_rules! identity ( - ($i:expr,$e:expr) => ($e); -); +/// Parsing errors specific to C parsing. +/// +/// This is a superset of `(I, nom::ErrorKind)` that includes the additional errors specified by +/// [`ErrorKind`]. +#[derive(Debug)] +pub struct Error { + /// The remainder of the input stream at the time of the error. + pub input: I, + /// The error that occurred. + pub error: ErrorKind, +} + +impl From<(I, nom::ErrorKind)> for Error { + fn from(e: (I, nom::ErrorKind)) -> Self { + Self::from((e.0, ErrorKind::from(e.1))) + } +} + +impl From<(I, ErrorKind)> for Error { + fn from(e: (I, ErrorKind)) -> Self { + Self { + input: e.0, + error: e.1, + } + } +} + +impl ::nom::error::ParseError for Error { + fn from_error_kind(input: I, kind: nom::ErrorKind) -> Self { + Self { + input, + error: kind.into(), + } + } + + fn append(_: I, _: nom::ErrorKind, other: Self) -> Self { + other + } +} + +// in lieu of https://github.com/Geal/nom/issues/1010 +trait ToCexprResult { + fn to_cexpr_result(self) -> nom::IResult>; +} +impl ToCexprResult for nom::IResult +where + Error: From, +{ + fn to_cexpr_result(self) -> nom::IResult> { + match self { + Ok(v) => Ok(v), + Err(nom::Err::Incomplete(n)) => Err(nom::Err::Incomplete(n)), + Err(nom::Err::Error(e)) => Err(nom::Err::Error(e.into())), + Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(e.into())), + } + } +} /// If the input result indicates a succesful parse, but there is data left, /// return an `Error::Partial` instead. -pub fn assert_full_parse(result: IResult<&[I], O, E>) -> IResult<&[I], O, crate::Error> +pub fn assert_full_parse<'i, I: 'i, O, E>( + result: nom::IResult<&'i [I], O, E>, +) -> nom::IResult<&'i [I], O, Error<&'i [I]>> where - Error: From, + Error<&'i [I]>: From, { - match fix_error!((), crate::Error, identity!(result)) { + match result.to_cexpr_result() { Ok((rem, output)) => { - if rem.len() == 0 { + if rem.is_empty() { Ok((rem, output)) } else { - Err(Err::Error(error_position!( - rem, - ErrorKind::Custom(crate::Error::Partial) - ))) + Err(nom::Err::Error((rem, ErrorKind::Partial).into())) } } - r => r, + Err(nom::Err::Incomplete(n)) => Err(nom::Err::Incomplete(n)), + Err(nom::Err::Failure(e)) => Err(nom::Err::Failure(e)), + Err(nom::Err::Error(e)) => Err(nom::Err::Error(e)), } } diff --git a/src/literal.rs b/src/literal.rs index 7fd8bf5..39f07be 100644 --- a/src/literal.rs +++ b/src/literal.rs @@ -39,9 +39,17 @@ use std::char; use std::str::{self, FromStr}; -use crate::nom_crate::*; +use nom::branch::alt; +use nom::bytes::complete::is_not; +use nom::bytes::complete::tag; +use nom::character::complete::{char, one_of}; +use nom::combinator::{complete, map, map_opt, opt, recognize}; +use nom::multi::{fold_many0, many0, many1, many_m_n}; +use nom::sequence::{delimited, pair, preceded, terminated, tuple}; +use nom::*; use crate::expr::EvalResult; +use crate::ToCexprResult; #[derive(Debug, Copy, Clone, PartialEq, Eq)] /// Representation of a C character @@ -80,56 +88,61 @@ impl Into> for CChar { } /// ensures the child parser consumes the whole input -#[macro_export] -macro_rules! full ( - ($i: expr, $submac:ident!( $($args:tt)* )) => ( - { - use crate::nom_crate::lib::std::result::Result::*; - let res = $submac!($i, $($args)*); - match res { - Ok((i, o)) => if i.len() == 0 { - Ok((i, o)) - } else { - Err(crate::nom_crate::Err::Error(error_position!(i, crate::nom_crate::ErrorKind::Custom(42)))) - }, - r => r, - } - } - ); - ($i:expr, $f:ident) => ( - full!($i, call!($f)); - ); -); - -// ==================================================== -// ======== macros that shouldn't be necessary ======== -// ==================================================== - -macro_rules! force_type ( - ($input:expr,IResult<$i:ty,$o:ty,$e:ty>) => (Err::<($i,$o),Err<$i,$e>>(crate::nom_crate::Err::Error(error_position!($input, ErrorKind::Fix)))) -); +pub fn full, F>( + f: F, +) -> impl Fn(I) -> nom::IResult +where + I: nom::InputLength, + F: Fn(I) -> nom::IResult, +{ + move |input| { + let res = f(input); + match res { + Ok((i, o)) => { + if i.input_len() == 0 { + Ok((i, o)) + } else { + Err(nom::Err::Error((i, nom::error::ErrorKind::Complete.into()))) + } + } + r => r, + } + } +} // ================================= // ======== matching digits ======== // ================================= -macro_rules! byte ( - ($i:expr, $($p: pat)|* ) => ({ - match $i.split_first() { - $(Some((&c @ $p,rest)))|* => Ok::<(&[_],u8),crate::nom_crate::Err<&[_],u32>>((rest,c)), - Some(_) => Err(crate::nom_crate::Err::Error(error_position!($i, ErrorKind::OneOf))), - None => Err(crate::nom_crate::Err::Incomplete(Needed::Size(1))), - } - }) -); - -named!(binary, byte!(b'0'..=b'1')); -named!(octal, byte!(b'0'..=b'7')); -named!(decimal, byte!(b'0'..=b'9')); -named!( - hexadecimal, - byte!(b'0' ..= b'9' | b'a' ..= b'f' | b'A' ..= b'F') -); +macro_rules! byte { + ($($p: pat)|* ) => {{ + fn parser(i: &[u8]) -> crate::nom::IResult<&[u8], u8> { + match i.split_first() { + $(Some((&c @ $p,rest)))|* => Ok((rest,c)), + Some(_) => Err(nom::Err::Error((i, nom::error::ErrorKind::OneOf))), + None => Err(nom::Err::Incomplete(Needed::Size(1))), + } + } + + parser + }} +} + +fn binary(i: &[u8]) -> nom::IResult<&[u8], u8> { + byte!(b'0'..=b'1')(i) +} + +fn octal(i: &[u8]) -> nom::IResult<&[u8], u8> { + byte!(b'0'..=b'7')(i) +} + +fn decimal(i: &[u8]) -> nom::IResult<&[u8], u8> { + byte!(b'0'..=b'9')(i) +} + +fn hexadecimal(i: &[u8]) -> nom::IResult<&[u8], u8> { + byte!(b'0' ..= b'9' | b'a' ..= b'f' | b'A' ..= b'F')(i) +} // ======================================== // ======== characters and strings ======== @@ -166,62 +179,60 @@ fn c_unicode_escape(n: Vec) -> Option { .map(CChar::Char) } -named!( - escaped_char, - preceded!( - complete!(char!('\\')), - alt_complete!( - map!(one_of!(r#"'"?\"#), CChar::Char) - | map!(one_of!("abfnrtv"), escape2char) - | map_opt!(many_m_n!(1, 3, octal), |v| c_raw_escape(v, 8)) - | map_opt!( - preceded!(char!('x'), many1!(hexadecimal)), - |v| c_raw_escape(v, 16) - ) - | map_opt!( - preceded!(char!('u'), many_m_n!(4, 4, hexadecimal)), - c_unicode_escape - ) - | map_opt!( - preceded!(char!('U'), many_m_n!(8, 8, hexadecimal)), - c_unicode_escape - ) - ) - ) -); - -named!( - c_width_prefix, - alt!(tag!("u8") | tag!("u") | tag!("U") | tag!("L")) -); - -named!( - c_char, - delimited!( - terminated!(opt!(c_width_prefix), char!('\'')), - alt!(escaped_char | map!(byte!(0 ..= 91 /* \=92 */ | 93 ..= 255), CChar::from)), - char!('\'') - ) -); - -named!( - c_string>, - delimited!( - alt!(preceded!(c_width_prefix, char!('"')) | char!('"')), - fold_many0!( - alt!( - map!(escaped_char, |c: CChar| c.into()) - | map!(is_not!([b'\\', b'"']), |c: &[u8]| c.into()) +fn escaped_char(i: &[u8]) -> nom::IResult<&[u8], CChar> { + preceded( + char('\\'), + alt(( + map(one_of(r#"'"?\"#), CChar::Char), + map(one_of("abfnrtv"), escape2char), + map_opt(many_m_n(1, 3, octal), |v| c_raw_escape(v, 8)), + map_opt(preceded(char('x'), many1(hexadecimal)), |v| { + c_raw_escape(v, 16) + }), + map_opt( + preceded(char('u'), many_m_n(4, 4, hexadecimal)), + c_unicode_escape, + ), + map_opt( + preceded(char('U'), many_m_n(8, 8, hexadecimal)), + c_unicode_escape, ), + )), + )(i) +} + +fn c_width_prefix(i: &[u8]) -> nom::IResult<&[u8], &[u8]> { + alt((tag("u8"), tag("u"), tag("U"), tag("L")))(i) +} + +fn c_char(i: &[u8]) -> nom::IResult<&[u8], CChar> { + delimited( + terminated(opt(c_width_prefix), char('\'')), + alt(( + escaped_char, + map(byte!(0 ..= 91 /* \=92 */ | 93 ..= 255), CChar::from), + )), + char('\''), + )(i) +} + +fn c_string(i: &[u8]) -> nom::IResult<&[u8], Vec> { + delimited( + alt((preceded(c_width_prefix, char('"')), char('"'))), + fold_many0( + alt(( + map(escaped_char, |c: CChar| c.into()), + map(is_not([b'\\', b'"']), |c: &[u8]| c.into()), + )), Vec::new(), |mut v: Vec, res: Vec| { v.extend_from_slice(&res); v - } + }, ), - char!('"') - ) -); + char('"'), + )(i) +} // ================================ // ======== parse integers ======== @@ -241,100 +252,110 @@ fn take_ul(input: &[u8]) -> IResult<&[u8], &[u8]> { } } -named!( - c_int, - map!( - terminated!( - alt_complete!( - map_opt!(preceded!(tag!("0x"), many1!(complete!(hexadecimal))), |v| { +fn c_int(i: &[u8]) -> nom::IResult<&[u8], i64> { + map( + terminated( + alt(( + map_opt(preceded(tag("0x"), many1(complete(hexadecimal))), |v| { c_int_radix(v, 16) - }) | map_opt!(preceded!(tag!("0X"), many1!(complete!(hexadecimal))), |v| { + }), + map_opt(preceded(tag("0X"), many1(complete(hexadecimal))), |v| { c_int_radix(v, 16) - }) | map_opt!(preceded!(tag!("0b"), many1!(complete!(binary))), |v| { + }), + map_opt(preceded(tag("0b"), many1(complete(binary))), |v| { c_int_radix(v, 2) - }) | map_opt!(preceded!(tag!("0B"), many1!(complete!(binary))), |v| { + }), + map_opt(preceded(tag("0B"), many1(complete(binary))), |v| { c_int_radix(v, 2) - }) | map_opt!(preceded!(char!('0'), many1!(complete!(octal))), |v| { + }), + map_opt(preceded(char('0'), many1(complete(octal))), |v| { c_int_radix(v, 8) - }) | map_opt!(many1!(complete!(decimal)), |v| c_int_radix(v, 10)) - | force_type!(IResult<_, _, u32>) - ), - opt!(take_ul) + }), + map_opt(many1(complete(decimal)), |v| c_int_radix(v, 10)), + |input| Err(crate::nom::Err::Error((input, crate::nom::ErrorKind::Fix))), + )), + opt(take_ul), ), - |i| i as i64 - ) -); + |i| i as i64, + )(i) +} // ============================== // ======== parse floats ======== // ============================== -named!(float_width, complete!(byte!(b'f' | b'l' | b'F' | b'L'))); -named!( - float_exp<(Option, Vec)>, - preceded!( +fn float_width(i: &[u8]) -> nom::IResult<&[u8], u8> { + nom::combinator::complete(byte!(b'f' | b'l' | b'F' | b'L'))(i) +} + +fn float_exp(i: &[u8]) -> nom::IResult<&[u8], (Option, Vec)> { + preceded( byte!(b'e' | b'E'), - pair!(opt!(byte!(b'-' | b'+')), many1!(complete!(decimal))) - ) -); - -named!( - c_float, - map_opt!( - alt!( - terminated!( - recognize!(tuple!( - many1!(complete!(decimal)), + pair(opt(byte!(b'-' | b'+')), many1(complete(decimal))), + )(i) +} + +fn c_float(i: &[u8]) -> nom::IResult<&[u8], f64> { + map_opt( + alt(( + terminated( + recognize(tuple(( + many1(complete(decimal)), byte!(b'.'), - many0!(complete!(decimal)) - )), - opt!(float_width) - ) | terminated!( - recognize!(tuple!( - many0!(complete!(decimal)), + many0(complete(decimal)), + ))), + opt(float_width), + ), + terminated( + recognize(tuple(( + many0(complete(decimal)), byte!(b'.'), - many1!(complete!(decimal)) - )), - opt!(float_width) - ) | terminated!( - recognize!(tuple!( - many0!(complete!(decimal)), - opt!(byte!(b'.')), - many1!(complete!(decimal)), - float_exp - )), - opt!(float_width) - ) | terminated!( - recognize!(tuple!( - many1!(complete!(decimal)), - opt!(byte!(b'.')), - many0!(complete!(decimal)), - float_exp - )), - opt!(float_width) - ) | terminated!(recognize!(many1!(complete!(decimal))), float_width) - ), - |v| str::from_utf8(v).ok().and_then(|i| f64::from_str(i).ok()) - ) -); + many1(complete(decimal)), + ))), + opt(float_width), + ), + terminated( + recognize(tuple(( + many0(complete(decimal)), + opt(byte!(b'.')), + many1(complete(decimal)), + float_exp, + ))), + opt(float_width), + ), + terminated( + recognize(tuple(( + many1(complete(decimal)), + opt(byte!(b'.')), + many0(complete(decimal)), + float_exp, + ))), + opt(float_width), + ), + terminated(recognize(many1(complete(decimal))), float_width), + )), + |v| str::from_utf8(v).ok().and_then(|i| f64::from_str(i).ok()), + )(i) +} // ================================ // ======== main interface ======== // ================================ -named!(one_literal<&[u8],EvalResult,crate::Error>, - fix_error!(crate::Error,alt_complete!( - map!(full!(c_char),EvalResult::Char) | - map!(full!(c_int),|i|EvalResult::Int(::std::num::Wrapping(i))) | - map!(full!(c_float),EvalResult::Float) | - map!(full!(c_string),EvalResult::Str) - )) -); +fn one_literal(input: &[u8]) -> nom::IResult<&[u8], EvalResult, crate::Error<&[u8]>> { + alt(( + map(full(c_char), EvalResult::Char), + map(full(c_int), |i| EvalResult::Int(::std::num::Wrapping(i))), + map(full(c_float), EvalResult::Float), + map(full(c_string), EvalResult::Str), + ))(input) + .to_cexpr_result() +} /// Parse a C literal. /// /// The input must contain exactly the representation of a single literal /// token, and in particular no whitespace or sign prefixes. -pub fn parse(input: &[u8]) -> IResult<&[u8], EvalResult, crate::Error> { +pub fn parse(input: &[u8]) -> IResult<&[u8], EvalResult, crate::Error<&[u8]>> { crate::assert_full_parse(one_literal(input)) } diff --git a/src/token.rs b/src/token.rs index d88242c..dbc5949 100644 --- a/src/token.rs +++ b/src/token.rs @@ -10,6 +10,7 @@ //! This is designed to map onto a libclang CXToken. #[derive(Debug, Copy, Clone, PartialEq, Eq)] +#[allow(missing_docs)] pub enum Kind { Punctuation, Keyword, @@ -18,16 +19,19 @@ pub enum Kind { Comment, } +/// A single token in a C expression. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Token { + /// The type of this token. pub kind: Kind, + /// The bytes that make up the token. pub raw: Box<[u8]>, } impl<'a> From<(Kind, &'a [u8])> for Token { fn from((kind, value): (Kind, &'a [u8])) -> Token { Token { - kind: kind, + kind, raw: value.to_owned().into_boxed_slice(), } } diff --git a/tests/clang.rs b/tests/clang.rs index 78f4f7d..b2484f0 100644 --- a/tests/clang.rs +++ b/tests/clang.rs @@ -235,7 +235,7 @@ fn file_visit_macros, Vec)>( let tu = { let index = clang_createIndex(true as _, false as _); let cfile = ffi::CString::new(file).unwrap(); - let mut tu = mem::uninitialized(); + let mut tu = mem::MaybeUninit::uninit(); assert!( clang_parseTranslationUnit2( index, @@ -245,12 +245,12 @@ fn file_visit_macros, Vec)>( ptr::null_mut(), 0, CXTranslationUnit_DetailedPreprocessingRecord, - &mut tu + &mut *tu.as_mut_ptr() ) == CXError_Success, "Failure reading test case {}", file ); - tu + tu.assume_init() }; visit_children(clang_getTranslationUnitCursor(tu), |cur, _parent| { if cur.kind == CXCursor_MacroDefinition { @@ -308,8 +308,8 @@ fn fix_bug_9069() -> bool { token_sets[0] != token_sets[1] } - use std::sync::atomic::{AtomicBool, Ordering, ATOMIC_BOOL_INIT}; - use std::sync::{Once, ONCE_INIT}; + use std::sync::atomic::{AtomicBool, Ordering}; + use std::sync::Once; static CHECK_FIX: Once = Once::new(); static FIX: AtomicBool = AtomicBool::new(false);