Add support for shebangs (#236)

* support shebang comments; tests * use 'const' * format * use auto and direct initialization * more tests for shebang comments * format
jank-lang · Feb 11, 2025 · 56f1417 · 56f1417
1 parent 5d79266
commit 56f1417
Show file tree

Hide file tree

Showing 2 changed files with 192 additions and 0 deletions.
diff --git a/compiler+runtime/src/cpp/jank/read/lex.cpp b/compiler+runtime/src/cpp/jank/read/lex.cpp
@@ -1047,6 +1047,37 @@ namespace jank::read
                                      token_kind::reader_macro_conditional });
                   }
                 }
+              case '!':
+                {
+                  while(true)
+                  {
+                    auto const oc(peek());
+                    if(oc.is_err())
+                    {
+                      break;
+                    }
+                    auto const c(oc.expect_ok().character);
+                    if(c == '\n')
+                    {
+                      break;
+                    }
+
+                    ++pos;
+                  }
+
+                  ++pos;
+                  if(pos == token_start + 2)
+                  {
+                    return ok(token{ token_start, 1, token_kind::comment, ""sv });
+                  }
+                  else
+                  {
+                    auto const length{ pos - token_start - 2 };
+                    native_persistent_string_view const comment{ file.data() + token_start + 2,
+                                                                 length };
+                    return ok(token{ token_start, length, token_kind::comment, comment });
+                  }
+                }
               default:
                 break;
             }

diff --git a/compiler+runtime/test/cpp/jank/read/lex.cpp b/compiler+runtime/test/cpp/jank/read/lex.cpp
@@ -1592,6 +1592,167 @@ namespace jank::read::lex
         }
       }
 
+      SUBCASE("Shebang Comments")
+      {
+        SUBCASE("Empty")
+        {
+          processor p{ "#!" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 1, token_kind::comment, ""sv }
+          }));
+        }
+
+        SUBCASE("Empty multi-line")
+        {
+          processor p{ "#!\n#!" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 1, token_kind::comment, ""sv },
+                  { 3, 1, token_kind::comment, ""sv }
+          }));
+        }
+
+        SUBCASE("Non-empty")
+        {
+          processor p{ "#!foo" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 3, token_kind::comment, "foo"sv },
+          }));
+        }
+
+        SUBCASE("Multiple on same line")
+        {
+          processor p{ "#!foo #!bar" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 9, token_kind::comment, "foo #!bar"sv }
+          }));
+        }
+
+        SUBCASE("Multiple on same line; last is empty")
+        {
+          processor p{ "#!foo #!" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 6, token_kind::comment, "foo #!"sv }
+          }));
+        }
+
+        SUBCASE("Multiple #! in a row")
+        {
+          processor p{ "#!#!#!foo" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 7, token_kind::comment, "#!#!foo"sv }
+          }));
+        }
+
+        SUBCASE("Expressions before")
+        {
+          processor p{ "1 2 #!foo" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 1, token_kind::integer,     1ll },
+                  { 2, 1, token_kind::integer,     2ll },
+                  { 4, 3, token_kind::comment, "foo"sv }
+          }));
+        }
+
+        SUBCASE("Expressions before and after")
+        {
+          processor p{ "1 #!foo\n2" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 1, token_kind::integer,     1ll },
+                  { 2, 3, token_kind::comment, "foo"sv },
+                  { 8, 1, token_kind::integer,     2ll }
+          }));
+        }
+
+        SUBCASE("Multiple lines starting with #!")
+        {
+          processor p{ "#!foo\n#!bar" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 3, token_kind::comment, "foo"sv },
+                  { 6, 3, token_kind::comment, "bar"sv },
+          }));
+        }
+
+        SUBCASE("Double #")
+        {
+          processor p{ "##!foo" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 1, token_kind::reader_macro },
+                  { 1, 3, token_kind::comment, "foo"sv },
+          }));
+        }
+
+        SUBCASE("Double !")
+        {
+          processor p{ "#!!foo" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 4, token_kind::comment, "!foo"sv },
+          }));
+        }
+
+        SUBCASE("Hash Bang Hash")
+        {
+          processor p{ "#!#foo" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 4, token_kind::comment, "#foo"sv },
+          }));
+        }
+
+        SUBCASE("Don't parse list")
+        {
+          processor p{ "#!(+ 1 1)" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 7, token_kind::comment, "(+ 1 1)"sv },
+          }));
+        }
+
+        SUBCASE("Don't parse string")
+        {
+          processor p{ "#!\"foo\"" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 5, token_kind::comment, "\"foo\""sv },
+          }));
+        }
+
+        SUBCASE("Shebang in list")
+        {
+          processor p{ "(#!)" };
+          native_vector<result<token, error>> const tokens(p.begin(), p.end());
+          CHECK(tokens
+                == make_tokens({
+                  { 0, 1, token_kind::open_paren },
+                  { 1, 1, token_kind::comment, ")"sv },
+          }));
+        }
+      }
+
       SUBCASE("Conditional")
       {
         SUBCASE("Empty")