From 6693d9394439be9ce57a11812689cde99fcd27a9 Mon Sep 17 00:00:00 2001 From: Lawrence Bethlenfalvy Date: Sat, 19 Aug 2023 14:35:24 +0100 Subject: [PATCH] Interning Orchid string literals --- src/parse/lexer.rs | 6 ++-- src/representations/literal.rs | 6 ++-- src/representations/mod.rs | 2 ++ src/representations/string.rs | 55 ++++++++++++++++++++++++++++++++++ src/systems/cast_exprinst.rs | 6 ++-- src/systems/io/bindings.rs | 5 ++-- src/systems/io/instances.rs | 2 +- src/systems/stl/conv.rs | 9 +++--- src/systems/stl/panic.rs | 8 +++-- src/systems/stl/str.rs | 26 ++++++++-------- 10 files changed, 96 insertions(+), 29 deletions(-) create mode 100644 src/representations/string.rs diff --git a/src/parse/lexer.rs b/src/parse/lexer.rs index 07f1716..5734edd 100644 --- a/src/parse/lexer.rs +++ b/src/parse/lexer.rs @@ -185,12 +185,12 @@ fn paren_parser(lp: char, rp: char) -> impl SimpleParser { just(lp).to(Lexeme::LP(lp)).or(just(rp).to(Lexeme::RP(lp))) } -pub fn literal_parser() -> impl SimpleParser { +pub fn literal_parser<'a>(ctx: impl Context + 'a) -> impl SimpleParser + 'a { choice(( // all ints are valid floats so it takes precedence number::int_parser().map(Literal::Uint), number::float_parser().map(Literal::Num), - string::str_parser().map(Literal::Str), + string::str_parser().map(move |s| Literal::Str(ctx.interner().i(&s).into())), )) } @@ -229,7 +229,7 @@ pub fn lexer<'a>( just(':').to(Lexeme::Type), just('\n').to(Lexeme::BR), just('.').to(Lexeme::Dot), - literal_parser().map(Lexeme::Literal), + literal_parser(ctx.clone()).map(Lexeme::Literal), name::name_parser(&all_ops).map({ let ctx = ctx.clone(); move |n| Lexeme::Name(ctx.interner().i(&n)) diff --git a/src/representations/literal.rs b/src/representations/literal.rs index ec77a6a..2293029 100644 --- a/src/representations/literal.rs +++ b/src/representations/literal.rs @@ -2,6 +2,8 @@ use std::fmt::Debug; use ordered_float::NotNan; +use super::OrcString; + /// Exact values read from the AST which have a shared meaning recognized by all /// external functions #[derive(Clone, PartialEq, Eq, Hash)] @@ -11,7 +13,7 @@ pub enum Literal { /// An unsigned integer; a size, index or pointer Uint(u64), /// A utf-8 character sequence - Str(String), + Str(OrcString), } impl Debug for Literal { @@ -36,6 +38,6 @@ impl From for Literal { } impl From for Literal { fn from(value: String) -> Self { - Self::Str(value) + Self::Str(value.into()) } } diff --git a/src/representations/mod.rs b/src/representations/mod.rs index d4832bb..93d2fad 100644 --- a/src/representations/mod.rs +++ b/src/representations/mod.rs @@ -13,8 +13,10 @@ pub mod primitive; pub mod project; pub mod sourcefile; pub mod tree; +mod string; pub use const_tree::{from_const_tree, ConstTree}; +pub use string::OrcString; pub use literal::Literal; pub use location::Location; pub use namelike::{NameLike, Sym, VName}; diff --git a/src/representations/string.rs b/src/representations/string.rs new file mode 100644 index 0000000..78e8d05 --- /dev/null +++ b/src/representations/string.rs @@ -0,0 +1,55 @@ +use std::hash::Hash; +use std::ops::Deref; +use std::rc::Rc; + +use crate::Tok; + +#[derive(Clone, Debug, Eq)] +pub enum OrcString { + Interned(Tok), + Runtime(Rc), +} + +impl OrcString { + pub fn get_string(&self) -> String { + self.as_str().to_owned() + } +} + +impl Deref for OrcString { + type Target = String; + + fn deref(&self) -> &Self::Target { + match self { + Self::Interned(t) => t, + Self::Runtime(r) => r, + } + } +} + +impl Hash for OrcString { + fn hash(&self, state: &mut H) { + self.as_str().hash(state) + } +} + +impl From for OrcString { + fn from(value: String) -> Self { + Self::Runtime(Rc::new(value)) + } +} + +impl From> for OrcString { + fn from(value: Tok) -> Self { + Self::Interned(value) + } +} + +impl PartialEq for OrcString { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::Interned(t1), Self::Interned(t2)) => t1 == t2, + _ => **self == **other, + } + } +} diff --git a/src/systems/cast_exprinst.rs b/src/systems/cast_exprinst.rs index d711b24..cb36e7f 100644 --- a/src/systems/cast_exprinst.rs +++ b/src/systems/cast_exprinst.rs @@ -8,7 +8,7 @@ use super::assertion_error::AssertionError; use crate::foreign::{Atomic, ExternError}; use crate::interpreted::Clause; use crate::representations::interpreted::ExprInst; -use crate::representations::Literal; +use crate::representations::{Literal, OrcString}; use crate::Primitive; /// Tries to cast the [ExprInst] as a [Literal], calls the provided function on @@ -25,7 +25,7 @@ pub fn with_lit( /// Like [with_lit] but also unwraps [Literal::Str] pub fn with_str( x: &ExprInst, - predicate: impl FnOnce(&String) -> Result>, + predicate: impl FnOnce(&OrcString) -> Result>, ) -> Result> { with_lit(x, |l| { if let Literal::Str(s) = l { @@ -93,7 +93,7 @@ impl TryFrom<&ExprInst> for Literal { } } -impl TryFrom<&ExprInst> for String { +impl TryFrom<&ExprInst> for OrcString { type Error = Rc; fn try_from(value: &ExprInst) -> Result { diff --git a/src/systems/io/bindings.rs b/src/systems/io/bindings.rs index 5769e00..64890cb 100644 --- a/src/systems/io/bindings.rs +++ b/src/systems/io/bindings.rs @@ -4,6 +4,7 @@ use super::instances::{ }; use crate::foreign::cps_box::init_cps; use crate::foreign::{Atom, Atomic}; +use crate::representations::OrcString; use crate::systems::stl::Binary; use crate::systems::RuntimeError; use crate::{ast, define_fn, ConstTree, Interner, Primitive}; @@ -53,9 +54,9 @@ define_fn! { define_fn! { WriteStr { stream: SinkHandle, - string: String + string: OrcString } => Ok(init_cps(3, IOCmdHandlePack { - cmd: WriteCmd::WStr(string.clone()), + cmd: WriteCmd::WStr(string.get_string()), handle: *stream, })) } diff --git a/src/systems/io/instances.rs b/src/systems/io/instances.rs index 0f0391a..26e2c09 100644 --- a/src/systems/io/instances.rs +++ b/src/systems/io/instances.rs @@ -105,7 +105,7 @@ impl IOHandler for (ExprInst, ExprInst) { ReadResult::RBin(_, Ok(bytes)) => call(succ, vec![Binary(Arc::new(bytes)).atom_cls().wrap()]).wrap(), ReadResult::RStr(_, Ok(text)) => - call(succ, vec![Literal::Str(text).into()]).wrap(), + call(succ, vec![Literal::Str(text.into()).into()]).wrap(), } } } diff --git a/src/systems/stl/conv.rs b/src/systems/stl/conv.rs index 9bc6d75..b1a83c9 100644 --- a/src/systems/stl/conv.rs +++ b/src/systems/stl/conv.rs @@ -4,6 +4,7 @@ use ordered_float::NotNan; use super::ArithmeticError; use crate::foreign::ExternError; use crate::interner::Interner; +use crate::interpreted::Clause; use crate::parse::{float_parser, int_parser}; use crate::systems::cast_exprinst::with_lit; use crate::systems::AssertionError; @@ -43,10 +44,10 @@ define_fn! { /// Convert a literal to a string using Rust's conversions for floats, chars and /// uints respectively ToString = |x| with_lit(x, |l| Ok(match l { - Literal::Uint(i) => i.to_string(), - Literal::Num(n) => n.to_string(), - Literal::Str(s) => s.clone(), - })).map(|s| Literal::Str(s).into()) + Literal::Uint(i) => Literal::Str(i.to_string().into()), + Literal::Num(n) => Literal::Str(n.to_string().into()), + s@Literal::Str(_) => s.clone(), + })).map(Clause::from) } pub fn conv(i: &Interner) -> ConstTree { diff --git a/src/systems/stl/panic.rs b/src/systems/stl/panic.rs index 431a825..55ee24b 100644 --- a/src/systems/stl/panic.rs +++ b/src/systems/stl/panic.rs @@ -1,4 +1,5 @@ use std::fmt::Display; +use std::rc::Rc; use crate::foreign::ExternError; use crate::systems::cast_exprinst::with_str; @@ -6,7 +7,7 @@ use crate::{define_fn, ConstTree, Interner}; /// An unrecoverable error in Orchid land. Because Orchid is lazy, this only /// invalidates expressions that reference the one that generated it. -pub struct OrchidPanic(String); +pub struct OrchidPanic(Rc); impl Display for OrchidPanic { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -18,7 +19,10 @@ impl ExternError for OrchidPanic {} define_fn! { /// Takes a message, returns an [ExternError] unconditionally. - Panic = |x| with_str(x, |s| Err(OrchidPanic(s.clone()).into_extern())) + Panic = |x| with_str(x, |s| { + let msg = Rc::new(s.get_string()); + Err(OrchidPanic(msg).into_extern()) + }) } pub fn panic(i: &Interner) -> ConstTree { diff --git a/src/systems/stl/str.rs b/src/systems/stl/str.rs index 8ea267e..9ed956c 100644 --- a/src/systems/stl/str.rs +++ b/src/systems/stl/str.rs @@ -1,6 +1,7 @@ use unicode_segmentation::UnicodeSegmentation; use crate::interner::Interner; +use crate::representations::OrcString; use crate::systems::cast_exprinst::with_str; use crate::systems::codegen::{orchid_opt, tuple}; use crate::systems::RuntimeError; @@ -9,15 +10,16 @@ use crate::{define_fn, ConstTree, Literal}; define_fn! {expr=x in /// Append a string to another - pub Concatenate { a: String, b: String } - => Ok(Literal::Str(a.to_owned() + b).into()) + pub Concatenate { a: OrcString, b: OrcString } + => Ok(Literal::Str((a.get_string() + b.as_str()).into()).into()) } define_fn! {expr=x in - pub Slice { s: String, i: u64, len: u64 } => { - let graphs = s.graphemes(true); + pub Slice { s: OrcString, i: u64, len: u64 } => { + let graphs = s.as_str().graphemes(true); if *i == 0 { - Ok(Literal::Str(graphs.take(*len as usize).collect()).into()) + let orc_str = graphs.take(*len as usize).collect::().into(); + Ok(Literal::Str(orc_str).into()) } else { let mut prefix = graphs.skip(*i as usize - 1); if prefix.next().is_none() { @@ -27,10 +29,9 @@ define_fn! {expr=x in ) } else { let mut count = 0; - let ret = prefix - .take(*len as usize) + let ret = (prefix.take(*len as usize)) .map(|x| { count+=1; x }) - .collect(); + .collect::().into(); if count == *len { Ok(Literal::Str(ret).into()) } else { @@ -45,15 +46,16 @@ define_fn! {expr=x in } define_fn! {expr=x in - pub Find { haystack: String, needle: String } => { - let found = iter_find(haystack.graphemes(true), needle.graphemes(true)); + pub Find { haystack: OrcString, needle: OrcString } => { + let haystack_graphs = haystack.as_str().graphemes(true); + let found = iter_find(haystack_graphs, needle.as_str().graphemes(true)); Ok(orchid_opt(found.map(|x| Literal::Uint(x as u64).into()))) } } define_fn! {expr=x in - pub Split { s: String, i: u64 } => { - let mut graphs = s.graphemes(true); + pub Split { s: OrcString, i: u64 } => { + let mut graphs = s.as_str().graphemes(true); let a = graphs.by_ref().take(*i as usize).collect::(); let b = graphs.collect::(); Ok(tuple(vec![a.into(), b.into()]))