Skip to content

Commit

Permalink
Merge pull request #61 from jank-lang/interp-syntax
Browse files Browse the repository at this point in the history
Normalize interpolation syntax to ~{}
  • Loading branch information
jeaye authored Mar 15, 2024
2 parents 46ea760 + 4bfd17f commit 77a6655
Show file tree
Hide file tree
Showing 17 changed files with 659 additions and 362 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ add_library(
src/cpp/jank/util/cli.cpp
src/cpp/jank/util/mapped_file.cpp
src/cpp/jank/util/scope_exit.cpp
src/cpp/jank/util/escape.cpp
src/cpp/jank/profile/time.cpp
src/cpp/jank/read/lex.cpp
src/cpp/jank/read/parse.cpp
Expand Down
2 changes: 1 addition & 1 deletion include/cpp/jank/option.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ namespace jank
return *reinterpret_cast<T const *>(data) != *reinterpret_cast<T const *>(rhs.data);
}

return true;
return false;
}

constexpr native_bool operator==(option<T> const &rhs) const
Expand Down
4 changes: 3 additions & 1 deletion include/cpp/jank/read/lex.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ namespace jank::read::lex

struct token
{
token() = default;
token(token_kind const k);
token(size_t const p, token_kind const k);
token(size_t const p, token_kind const k, native_integer const);
Expand All @@ -51,6 +52,7 @@ namespace jank::read::lex
token(size_t const p, size_t const s, token_kind const k, native_integer const);
token(size_t const p, size_t const s, token_kind const k, native_real const);
token(size_t const p, size_t const s, token_kind const k, native_persistent_string_view const);
token(size_t const p, size_t const s, token_kind const k, char const * const);
token(size_t const p, size_t const s, token_kind const k, native_bool const);

native_bool operator==(token const &rhs) const;
Expand All @@ -67,7 +69,7 @@ namespace jank::read::lex
static constexpr size_t ignore_pos{ std::numeric_limits<size_t>::max() };
size_t pos{ ignore_pos };
size_t size{ 1 };
token_kind kind;
token_kind kind{ token_kind::eof };
boost::variant<no_data, native_integer, native_real, native_persistent_string_view, native_bool>
data;
};
Expand Down
13 changes: 11 additions & 2 deletions include/cpp/jank/read/parse.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,16 @@ namespace jank::read::parse
{
struct processor
{
/* TODO: none instead of nullptr. */
using object_result = result<runtime::object_ptr, error>;
struct object_source_info
{
native_bool operator==(object_source_info const &rhs) const;
native_bool operator!=(object_source_info const &rhs) const;

runtime::object_ptr ptr{};
lex::token start, end;
};

using object_result = result<option<object_source_info>, error>;

struct iterator
{
Expand Down Expand Up @@ -60,6 +68,7 @@ namespace jank::read::parse
runtime::context &rt_ctx;
lex::processor::iterator token_current, token_end;
option<lex::token_kind> expected_closer;
lex::token latest_token;
/* Whether or not the next form is considered quoted. */
native_bool quoted{};
};
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
#pragma once

/* This provides a fmt extension for escaping strings and wrapping them in
* quotes. It's largely adapted from here:
* https://github.com/fmtlib/fmt/issues/825#issuecomment-1227501168 */
namespace jank::codegen
namespace jank::util
{
/* This provides a fmt extension for escaping strings and wrapping them in
* quotes. It's largely adapted from here:
* https://github.com/fmtlib/fmt/issues/825#issuecomment-1227501168
*
* Usage just looks like:
* fmt::format("{}", util::escaped_quoted_view(s))
*/
template <typename S = native_persistent_string_view>
struct escape_view
{
Expand Down Expand Up @@ -50,16 +54,22 @@ namespace jank::codegen
};

constexpr escape_view<native_persistent_string_view>
escaped(native_persistent_string_view const &sv, char const q = '"', char const e = '\\')
escaped_quoted_view(native_persistent_string_view const &sv,
char const q = '"',
char const e = '\\')
{
return escape_view<native_persistent_string_view>{ sv, q, e };
}

/* These provide normal escaping/unescaping, with no quoting. */
string_result<native_transient_string> unescape(native_transient_string const &input);
native_transient_string escape(native_transient_string const &input);
}

template <typename S>
struct fmt::formatter<jank::codegen::escape_view<S>>
struct fmt::formatter<jank::util::escape_view<S>>
{
using V = jank::codegen::escape_view<S>;
using V = jank::util::escape_view<S>;

template <typename C>
constexpr auto parse(C &ctx)
Expand All @@ -68,7 +78,7 @@ struct fmt::formatter<jank::codegen::escape_view<S>>
}

template <typename C>
auto format(jank::codegen::escape_view<S> const &s, C &ctx)
auto format(jank::util::escape_view<S> const &s, C &ctx)
{
return s.copy(ctx.out());
}
Expand Down
64 changes: 39 additions & 25 deletions src/cpp/jank/analyze/processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ namespace jank::analyze
{
return err(parse_current->expect_err_move());
}
fn.push_back(parse_current->expect_ok());
fn.push_back(parse_current->expect_ok().unwrap().ptr);
}
auto fn_list(make_box<runtime::obj::persistent_list>(fn.rbegin(), fn.rend()));
return analyze(fn_list, expression_type::expression);
Expand Down Expand Up @@ -957,54 +957,68 @@ namespace jank::analyze
/* native/raw expressions are broken up into chunks of either literal C++ code or
* interpolated jank code, the latter needing to also be analyzed. */
decltype(expr::native_raw<expression>::chunks) chunks;
/* TODO: Just use } for end and rely on token parsing info for when that is.
* This requires storing line/col start/end meta in each object. */
constexpr native_persistent_string_view interp_start{ "#{" }, interp_end{ "}#" };
for(size_t it{}; it != native_persistent_string::npos;)
constexpr native_persistent_string_view interp_start{ "~{" };
for(size_t it{}; it < code_str->data.size();)
{
auto const next_start(code_str->data.find(interp_start.data(), it));
if(next_start == native_persistent_string::npos)
auto const next_interp(code_str->data.find(interp_start.data(), it));
if(next_interp == native_persistent_string::npos)
{
/* This is the final chunk. */
chunks.emplace_back(native_persistent_string_view{ code_str->data.data() + it });
break;
}
auto const next_end(code_str->data.find(interp_end.data(), next_start));
if(next_end == native_persistent_string::npos)
{
return err(
error{ fmt::format("no matching {} found for native/raw interpolation", interp_end) });
}

/* Once we've found the start of an interpolation, we begin lexing/parsing at that
* spot, so we can get a jank value. */
read::lex::processor l_prc{
{code_str->data.data() + next_start + interp_start.size(),
next_end - next_start - interp_end.size()}
{code_str->data.data() + next_interp + interp_start.size(),
code_str->data.data() + code_str->data.size()}
};
read::parse::processor p_prc{ rt_ctx, l_prc.begin(), l_prc.end() };
auto parsed_it(p_prc.begin());
if(parsed_it->is_err())
auto parsed_obj(p_prc.next());
if(parsed_obj.is_err())
{
return parsed_obj.expect_err_move();
}
else if(parsed_obj.expect_ok().is_none())
{
return parsed_it->expect_err_move();
return err(error{ next_interp + interp_start.size(), "invalid native/raw interpolation" });
}
auto result(
analyze(parsed_it->expect_ok(), current_frame, expression_type::expression, fn_ctx, true));

/* We get back an AST expression and keep track of it as a chunk for later codegen. */
auto result(analyze(parsed_obj.expect_ok().unwrap().ptr,
current_frame,
expression_type::expression,
fn_ctx,
true));
if(result.is_err())
{
return result.expect_err_move();
}

if(next_start - it > 0)
/* C++ code before the next interpolation. */
if(next_interp - it > 0)
{
chunks.emplace_back(
native_persistent_string_view{ code_str->data.data() + it, next_start - it });
native_persistent_string_view{ code_str->data.data() + it, next_interp - it });
}
chunks.emplace_back(result.expect_ok());
it = next_end + interp_end.size();

if(++parsed_it != p_prc.end())
/* The next token needs to be a }, to match our original ~{. If it's not, either multiple
* forms were included in the interpolation or there is no closing }. We don't know for
* sure. */
auto const next_token(*p_prc.token_current);
if(next_token.is_err())
{
return next_token.expect_err();
}
else if(next_token.expect_ok().kind != read::lex::token_kind::close_curly_bracket)
{
return err(error{ "invalid native/raw: only one expression per interpolation" });
return err(error{
"invalid native/raw interpolation: ~{ must be followed by a single form and then a }" });
}
it = next_interp + interp_start.size() + next_token.expect_ok().pos
+ next_token.expect_ok().size;
}

return make_box<expression>(expr::native_raw<expression>{
Expand Down
4 changes: 2 additions & 2 deletions src/cpp/jank/codegen/processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <jank/runtime/obj/number.hpp>
#include <jank/runtime/util.hpp>
#include <jank/codegen/processor.hpp>
#include <jank/codegen/escape.hpp>
#include <jank/util/escape.hpp>
#include <jank/detail/to_runtime_data.hpp>

/* The strategy for codegen to C++ is quite simple. Codegen always happens on a
Expand Down Expand Up @@ -181,7 +181,7 @@ namespace jank::codegen
{
fmt::format_to(inserter,
"jank::make_box<jank::runtime::obj::persistent_string>({})",
escaped(typed_o->data));
util::escaped_quoted_view(typed_o->data));
}
else if constexpr(std::same_as<T, runtime::obj::persistent_vector>)
{
Expand Down
8 changes: 8 additions & 0 deletions src/cpp/jank/read/lex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,14 @@ namespace jank::read
{
}

token::token(size_t const p, size_t const s, token_kind const k, char const * const d)
: pos{ p }
, size{ s }
, kind{ k }
, data{ native_persistent_string_view{ d } }
{
}

token::token(size_t const p, size_t const s, token_kind const k, native_bool const d)
: pos{ p }
, size{ s }
Expand Down
Loading

0 comments on commit 77a6655

Please sign in to comment.