diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..a0a49d6109 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,10 @@ +version: 2 +# Before applying suggested PRs, make sure that the new versions of any +# updated actions are allowed in +# https://github.com/organizations/geany/settings/actions +# Versions are pinned and restricted for security reasons. +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 37a58f48bb..c63e7848da 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,7 @@ on: # cancel already running builds of the same branch or pull request concurrency: - group: ci-${{ github.head_ref }} || concat(${{ github.ref }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} cancel-in-progress: true env: @@ -45,7 +45,7 @@ jobs: CXX: ccache g++ steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # create and use a timestamp for the cache key: GH Actions will never update a cache # only use an existing cache item or create a new one. To use an existing cache *and* @@ -56,7 +56,7 @@ jobs: run: echo "timestamp=$(date +%Y-%m-%d-%H-%M)" >> $GITHUB_OUTPUT - name: Configure ccache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ${{ env.CCACHE_DIR }} key: ${{ runner.os }}-${{ github.job }}-ccache-${{ steps.ccache_cache_timestamp.outputs.timestamp }} @@ -117,7 +117,7 @@ jobs: CXX: ccache g++ steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # create and use a timestamp for the cache key: GH Actions will never update a cache # only use an existing cache item or create a new one. To use an existing cache *and* @@ -128,7 +128,7 @@ jobs: run: echo "timestamp=$(date +%Y-%m-%d-%H-%M)" >> $GITHUB_OUTPUT - name: Configure ccache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ${{ env.CCACHE_DIR }} key: ${{ runner.os }}-${{ github.job }}-ccache-${{ steps.ccache_cache_timestamp.outputs.timestamp }} @@ -183,10 +183,10 @@ jobs: steps: - name: Checkout Geany - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Checkout Build Scripts - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: master repository: geany/infrastructure @@ -212,7 +212,7 @@ jobs: env | sort - name: Log in to the Container registry - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ${{ env.DOCKER_REGISTRY }} username: ${{ github.actor }} @@ -243,7 +243,7 @@ jobs: bash start_build.sh --log-to-stdout --mingw64 --geany --geany-source "${{ github.workspace }}" - name: Archive Geany Installer - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: geany-installer-ci-${{ env.SHORT_SHA }}-pr${{ env.GITHUB_PULL_REQUEST_OR_REF }} retention-days: 30 diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000000..aa013a21e3 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,80 @@ +name: "CodeQL" + +on: + workflow_dispatch: + push: + branches: [ "master" ] + paths: + - '**.c' + - '**.cpp' + - '**.h' + - '**.hpp' + - '**.py' + - '!**.yml' + - '**/codeql.yml' + pull_request: + branches: [ "master" ] + paths: + - '**.c' + - '**.cpp' + - '**.h' + - '**.hpp' + - '**.py' + - '!**.yml' + - '**/codeql.yml' + schedule: + - cron: '15 20 15 * *' + + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: true + +jobs: + analyze: + name: Analyze + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} + permissions: + # required for all workflows + security-events: write + + # only required for workflows in private repositories + actions: read + contents: read + + strategy: + fail-fast: false + matrix: + language: [ 'c-cpp', 'python' ] + # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + + - name: Install dependencies + run: | + sudo apt-get update -qq + sudo apt-get install --assume-yes --no-install-recommends \ + ccache \ + gettext autopoint \ + libtool \ + libgtk-3-dev \ + doxygen \ + python3-docutils \ + python3-lxml \ + rst2pdf + + - name: Autobuild + uses: github/codeql-action/autobuild@v3 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/README.md b/README.md index d1a6eb61b9..f8d3677016 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,11 @@ as possible from a special Desktop Environment like KDE or GNOME. So it is using only the GTK+ toolkit and therefore you need only the GTK+ runtime libraries to run Geany. +.. image:: doc/images/main_window.png + :width: 750 + :alt: Screenshot of Geany showing the main user interface + :target: doc/images/main_window.png + ## Features diff --git a/ctags/Makefile.am b/ctags/Makefile.am index e12bd1b3da..ee2e283102 100644 --- a/ctags/Makefile.am +++ b/ctags/Makefile.am @@ -85,6 +85,7 @@ parsers = \ parsers/pascal.c \ parsers/perl.c \ parsers/perl.h \ + parsers/perl6.c \ parsers/php.c \ parsers/powershell.c \ parsers/python.c \ diff --git a/ctags/parsers/perl6.c b/ctags/parsers/perl6.c new file mode 100644 index 0000000000..e28c194c32 --- /dev/null +++ b/ctags/parsers/perl6.c @@ -0,0 +1,338 @@ +/* + * perl6.c -- Perl6 parser. + * Author: Dmitri Tikhonov + * + * This is a very basic Perl 6 parser. It does not know how to: + * - skip POD; + * - skip multiline comments; + * - skip quoted strings; + * - generate fully-qualified tags. + * + * This source code is released for free distribution under the terms of + * the GNU General Public License version 2 or (at your option) any later version. + */ + +#include "general.h" /* must always come first */ + +#include +#include + +#include "debug.h" +#include "entry.h" +#include "parse.h" +#include "read.h" +#include "routines.h" +#include "selectors.h" +#include "vstring.h" + +enum perl6Kind { + K_NONE = -1, + K_CLASS, + K_GRAMMAR, + K_METHOD, + K_MODULE, + K_PACKAGE, + K_ROLE, + K_RULE, + K_SUBMETHOD, + K_SUBROUTINE, + K_TOKEN, +}; + +static kindDefinition perl6Kinds[] = { + [K_CLASS] = { true, 'c', "class", "classes" }, + [K_GRAMMAR] = { true, 'g', "grammar", "grammars" }, + [K_METHOD] = { true, 'm', "method", "methods" }, + [K_MODULE] = { true, 'o', "module", "modules" }, + [K_PACKAGE] = { true, 'p', "package", "packages" }, + [K_ROLE] = { true, 'r', "role", "roles" }, + [K_RULE] = { true, 'u', "rule", "rules" }, + [K_SUBMETHOD] = { true, 'b', "submethod", "submethods" }, + [K_SUBROUTINE] = { true, 's', "subroutine", "subroutines" }, + [K_TOKEN] = { true, 't', "token", "tokens" }, +}; + +enum token { + T_CLASS, + T_GRAMMAR, + T_METHOD, + T_MODULE, + T_MULTI, + T_MY, + T_OUR, + T_PACKAGE, + T_PROTO, + T_ROLE, + T_RULE, + T_SUB, + T_SUBMETHOD, + T_UNIT, + T_TOKEN, +}; + +static const enum perl6Kind token2kind[] = { + [T_CLASS] = K_CLASS, + [T_GRAMMAR] = K_GRAMMAR, + [T_METHOD] = K_METHOD, + [T_MODULE] = K_MODULE, + [T_MULTI] = K_SUBROUTINE, + [T_MY] = K_NONE, + [T_OUR] = K_NONE, + [T_PACKAGE] = K_PACKAGE, + [T_PROTO] = K_NONE, + [T_ROLE] = K_ROLE, + [T_RULE] = K_RULE, + [T_SUB] = K_SUBROUTINE, + [T_SUBMETHOD] = K_SUBMETHOD, + [T_UNIT] = K_NONE, + [T_TOKEN] = K_TOKEN, +}; + +#define STRLEN(s) (sizeof(s) - 1) +#define STREQN(s, token) (0 == strncmp(s, token, STRLEN(token))) + +static enum token +matchToken (const char *s, int len) +{ + switch (len) { + case 2: + if (STREQN(s, "my")) return T_MY; + break; + case 3: + switch (s[0]) { + case 'o': + if (STREQN(s, "our")) return T_OUR; + break; + case 's': + if (STREQN(s, "sub")) return T_SUB; + break; + } + break; + case 4: + switch (s[1]) { + case 'o': + if (STREQN(s, "role")) return T_ROLE; + break; + case 'u': + if (STREQN(s, "rule")) return T_RULE; + break; + case 'n': + if (STREQN(s, "unit")) return T_UNIT; + break; + } + break; + case 5: + switch (s[0]) { + case 'c': + if (STREQN(s, "class")) return T_CLASS; + break; + case 'm': + if (STREQN(s, "multi")) return T_MULTI; + break; + case 'p': + if (STREQN(s, "proto")) return T_PROTO; + break; + case 't': + if (STREQN(s, "token")) return T_TOKEN; + break; + } + break; + case 6: + switch (s[1]) { + case 'e': + if (STREQN(s, "method")) return T_METHOD; + break; + case 'o': + if (STREQN(s, "module")) return T_MODULE; + break; + } + break; + case 7: + switch (s[0]) { + case 'g': + if (STREQN(s, "grammar")) return T_GRAMMAR; + break; + case 'p': + if (STREQN(s, "package")) return T_PACKAGE; + break; + } + break; + case 9: + if (STREQN(s, "submethod")) return T_SUBMETHOD; + break; + } + return -1; +} + +static const int validPerl6Identifier[0x100] = { +/* r!perl -e "print qq([(int)'\$_'] = 1,\n)for a..z,A..Z,0..9,':','-','_'"|fmt + */ + [(int)'a'] = 1, [(int)'b'] = 1, [(int)'c'] = 1, [(int)'d'] = 1, + [(int)'e'] = 1, [(int)'f'] = 1, [(int)'g'] = 1, [(int)'h'] = 1, + [(int)'i'] = 1, [(int)'j'] = 1, [(int)'k'] = 1, [(int)'l'] = 1, + [(int)'m'] = 1, [(int)'n'] = 1, [(int)'o'] = 1, [(int)'p'] = 1, + [(int)'q'] = 1, [(int)'r'] = 1, [(int)'s'] = 1, [(int)'t'] = 1, + [(int)'u'] = 1, [(int)'v'] = 1, [(int)'w'] = 1, [(int)'x'] = 1, + [(int)'y'] = 1, [(int)'z'] = 1, [(int)'A'] = 1, [(int)'B'] = 1, + [(int)'C'] = 1, [(int)'D'] = 1, [(int)'E'] = 1, [(int)'F'] = 1, + [(int)'G'] = 1, [(int)'H'] = 1, [(int)'I'] = 1, [(int)'J'] = 1, + [(int)'K'] = 1, [(int)'L'] = 1, [(int)'M'] = 1, [(int)'N'] = 1, + [(int)'O'] = 1, [(int)'P'] = 1, [(int)'Q'] = 1, [(int)'R'] = 1, + [(int)'S'] = 1, [(int)'T'] = 1, [(int)'U'] = 1, [(int)'V'] = 1, + [(int)'W'] = 1, [(int)'X'] = 1, [(int)'Y'] = 1, [(int)'Z'] = 1, + [(int)'0'] = 1, [(int)'1'] = 1, [(int)'2'] = 1, [(int)'3'] = 1, + [(int)'4'] = 1, [(int)'5'] = 1, [(int)'6'] = 1, [(int)'7'] = 1, + [(int)'8'] = 1, [(int)'9'] = 1, [(int)':'] = 1, [(int)'-'] = 1, + [(int)'_'] = 1, +}; + +static const int validMethodPrefix[0x100] = { + [(int)'!'] = 1, [(int)'^'] = 1, +}; + +static const int kindMayHaveMethodPrefix = (1 << K_SUBMETHOD) | + (1 << K_METHOD) ; + +/* Trim identifier pointed to by ps, possibly advancing it, and return + * the length of the valid portion. If the returned value is zero, the + * identifier is invalid. + */ +static int +trimIdentifier (enum perl6Kind kind, const char **ps, int len) +{ + Assert(len > 0); + const char *const end = *ps + len; + const char *s = *ps; + /* Trim the front if possible: */ + s += (kindMayHaveMethodPrefix & (1 << kind)) && + validMethodPrefix[(int)*s]; + /* Record the start of identifier: */ + *ps = s; + /* Continuous string of valid characters: */ + while (s < end && validPerl6Identifier[(int)*s]) + ++s; + /* sub multi infix:<...> -- we want the "infix" only */ + while (s - *ps > 0 && ':' == s[-1]) + --s; + /* It's ok if this is zero: */ + return s - *ps; +} + +struct p6Ctx { + enum token tokens[128 /* unlikely to need more than this */]; + unsigned int n_tokens; + vString *name; + const char *line; /* Saved from readLineFromInputFile() */ +}; + +static void +makeTag (struct p6Ctx *ctx, int kind, const char *name, int len) +{ + tagEntryInfo entry; + vStringNCopyS(ctx->name, name, len); + initTagEntry(&entry, vStringValue(ctx->name), kind); + makeTagEntry(&entry); +} + +static void +possiblyMakeTag (struct p6Ctx *ctx, const char *s, int len) +{ + Assert(ctx->n_tokens > 0); + enum perl6Kind kind = token2kind[ ctx->tokens[ctx->n_tokens - 1] ]; + if (K_NONE != kind && perl6Kinds[kind].enabled + && (len = trimIdentifier(kind, &s, len)) > 0) + makeTag(ctx, kind, s, len); +} + +static void +initP6Ctx (struct p6Ctx *ctx) +{ + ctx->n_tokens = 0; + ctx->name = vStringNew(); + ctx->line = NULL; +} + +static void +deinitP6Ctx (struct p6Ctx *ctx) +{ + vStringDelete(ctx->name); +} + +/* Read next contiguous sequence of non-whitespace characters, store + * the address in `ptok', and return its length. Return value of zero + * means EOF. + * + * TODO: Currently, POD and multi-line comments are not handled. + */ +static int +getNonSpaceStr (struct p6Ctx *ctx, const char **ptok) +{ + const char *s = ctx->line; + if (!s) { +next_line: + s = (const char *) readLineFromInputFile(); + if (!s) + return 0; /* EOF */ + } + while (*s && isspace(*s)) /* Skip whitespace */ + ++s; + if ('#' == *s) + goto next_line; + int non_white_len = strcspn(s, ",; \t"); + if (non_white_len) { + ctx->line = s + non_white_len; /* Save state */ + *ptok = s; + return non_white_len; + } else + goto next_line; +} + +static void +findPerl6Tags (void) +{ + struct p6Ctx ctx; + +#define RESET_TOKENS() do { ctx.n_tokens = 0; } while (0) + +#define PUSH_TOKEN(_t_) do { \ + if (ctx.n_tokens < ARRAY_SIZE(ctx.tokens)) { \ + ctx.tokens[ ctx.n_tokens ] = _t_; \ + ++ctx.n_tokens; \ + } else { \ + Assert(!"Token stack overflown: this is quite odd"); \ + RESET_TOKENS(); \ + } \ +} while (0) + + initP6Ctx(&ctx); + + const char *s; + int len; + + while ((len = getNonSpaceStr(&ctx, &s)) > 0) { + enum token token = matchToken(s, len); + if ((int) token >= 0) { + PUSH_TOKEN(token); + } else if (ctx.n_tokens > 0) { + possiblyMakeTag(&ctx, s, len); + RESET_TOKENS(); + } + } + + deinitP6Ctx(&ctx); +} + +parserDefinition * +Perl6Parser (void) +{ + static const char *const extensions[] = { "p6", "pm6", "pm", "pl6", NULL }; + static selectLanguage selectors [] = { selectByPickingPerlVersion, + NULL }; + parserDefinition* def = parserNew("Perl6"); + def->kindTable = perl6Kinds; + def->kindCount = ARRAY_SIZE(perl6Kinds); + def->extensions = extensions; + def->parser = findPerl6Tags; + def->selectLanguage = selectors; + return def; +} diff --git a/data/Makefile.am b/data/Makefile.am index 082e92c56c..15d75adf72 100644 --- a/data/Makefile.am +++ b/data/Makefile.am @@ -64,6 +64,7 @@ filetypes_dist = \ filedefs/filetypes.po \ filedefs/filetypes.powershell \ filedefs/filetypes.r \ + filedefs/filetypes.raku \ filedefs/filetypes.restructuredtext \ filedefs/filetypes.ruby \ filedefs/filetypes.rust \ diff --git a/data/filedefs/filetypes.raku b/data/filedefs/filetypes.raku new file mode 100644 index 0000000000..a291a94f53 --- /dev/null +++ b/data/filedefs/filetypes.raku @@ -0,0 +1,90 @@ +# For complete documentation of this file, please see Geany's main documentation +[styling] +# Edit these in the colorscheme .conf file instead +default=default +error=error +commentline=comment_line +commentembed=comment +pod=comment_doc +character=character +heredoc_qq=here_doc +string=string_1 +string_q=string_2 +string_qq=string_2 +string_q_lang=string_2 +string_var=string_2 +regex=regex +regex_var=identifier_2 +adverb=default +number=number_1 +preprocessor=preprocessor +operator=operator +word=keyword_1 +function=function +identifier=identifier +typedef=default +mu=default +positional=default +associative=default +callable=default +grammar=default +class=class + +[keywords] +keywords=BEGIN CATCH CHECK CONTROL END ENTER EVAL FIRST INIT KEEP LAST LEAVE NEXT POST PRE START TEMP UNDO after also andthen as async augment bag before but category circumfix class cmp complex constant contend default defer div does dynamic else elsif enum eq eqv extra fail fatal ff fff for gather gcd ge given grammar gt handles has if infix is lcm le leave leg let lift loop lt macro make maybe method mix mod module multi ne not o only oo or orelse orwith postcircumfix postfix prefix proto regex repeat require return-rw returns role rule size_t slang start str submethod subset supersede take temp term token trusts try unit unless until when where while with without x xor xx +functions=ACCEPTS AT-KEY EVALFILE EXISTS-KEY Filetests IO STORE abs accept acos acosec acosech acosh acotan acotanh alarm and antipairs asec asech asin asinh atan atan2 atanh base bind binmode bless break caller ceiling chars chdir chmod chomp chop chr chroot chrs cis close closedir codes comb conj connect contains continue cos cosec cosech cosh cotan cotanh crypt dbm defined die do dump each elems eof exec exists exit exp expmod fc fcntl fileno flat flip flock floor fmt fork formats functions get getc getpeername getpgrp getppid getpriority getsock gist glob gmtime goto grep hyper import index int invert ioctl is-prime iterator join keyof keys kill kv last lazy lc lcfirst lines link list listen local localtime lock log log10 lsb lstat map match mkdir msb msg my narrow new next no of open ord ords our pack package pairs path pick pipe polymod pop pos pred print printf prototype push quoting race rand read readdir readline readlink readpipe recv redo ref rename requires reset return reverse rewinddir rindex rmdir roots round samecase say scalar sec sech seek seekdir select semctl semget semop send set setpgrp setpriority setsockopt shift shm shutdown sign sin sinh sleep sockets sort splice split sprintf sqrt srand stat state study sub subst substr substr-rw succ symlink sys syscall system syswrite tan tanh tc tclc tell telldir tie time times trans trim trim-leading trim-trailing truncate uc ucfirst unimatch uniname uninames uniprop uniprops unival unlink unpack unpolar unshift untie use utime values wait waitpid wantarray warn wordcase words write +types_basic=AST Any Block Bool CallFrame Callable Code Collation Compiler Complex ComplexStr Cool CurrentThreadScheduler Date DateTime Dateish Distribution Distribution::Hash Distribution::Locally Distribution::Path Duration Encoding Encoding::Registry Endian FatRat ForeignCode HyperSeq HyperWhatever Instant Int IntStr Junction Label Lock::Async Macro Method Mu Nil Num NumStr Numeric ObjAt Parameter Perl PredictiveIterator Proxy RaceSeq Rat RatStr Rational Real Routine Routine::WrapHandle Scalar Sequence Signature Str StrDistance Stringy Sub Submethod Telemetry Telemetry::Instrument::Thread Telemetry::Instrument::ThreadPool Telemetry::Instrument::Usage Telemetry::Period Telemetry::Sampler UInt ValueObjAt Variable Version Whatever WhateverCode atomicint bit bool buf buf1 buf16 buf2 buf32 buf4 buf64 buf8 int int1 int16 int2 int32 int4 int64 int8 long longlong num num32 num64 rat rat1 rat16 rat2 rat32 rat4 rat64 rat8 uint uint1 uint16 uint2 uint32 uint4 uint64 uint8 utf16 utf32 utf8 +types_composite=Array Associative Bag BagHash Baggy Blob Buf Capture Enumeration Hash Iterable Iterator List Map Mix MixHash Mixy NFC NFD NFKC NFKD Pair Positional PositionalBindFailover PseudoStash QuantHash Range Seq Set SetHash Setty Slip Stash Uni utf8 +types_domain=Attribute Cancellation Channel CompUnit CompUnit::Repository CompUnit::Repository::FileSystem CompUnit::Repository::Installation Distro Grammar IO IO::ArgFiles IO::CatHandle IO::Handle IO::Notification IO::Path IO::Path::Cygwin IO::Path::QNX IO::Path::Unix IO::Path::Win32 IO::Pipe IO::Socket IO::Socket::Async IO::Socket::INET IO::Spec IO::Spec::Cygwin IO::Spec::QNX IO::Spec::Unix IO::Spec::Win32 IO::Special Kernel Lock Match Order Pod::Block Pod::Block::Code Pod::Block::Comment Pod::Block::Declarator Pod::Block::Named Pod::Block::Para Pod::Block::Table Pod::Defn Pod::FormattingCode Pod::Heading Pod::Item Proc Proc::Async Promise Regex Scheduler Semaphore Supplier Supplier::Preserving Supply Systemic Tap Thread ThreadPoolScheduler VM +types_exceptions=Backtrace Backtrace::Frame CX::Done CX::Emit CX::Last CX::Next CX::Proceed CX::Redo CX::Return CX::Succeed CX::Take CX::Warn Exception Failure X::AdHoc X::Anon::Augment X::Anon::Multi X::Assignment::RO X::Attribute::NoPackage X::Attribute::Package X::Attribute::Required X::Attribute::Undeclared X::Augment::NoSuchType X::Bind X::Bind::NativeType X::Bind::Slice X::Caller::NotDynamic X::Channel::ReceiveOnClosed X::Channel::SendOnClosed X::Comp X::Composition::NotComposable X::Constructor::Positional X::Control X::ControlFlow X::ControlFlow::Return X::DateTime::TimezoneClash X::Declaration::Scope X::Declaration::Scope::Multi X::Does::TypeObject X::Dynamic::NotFound X::Eval::NoSuchLang X::Export::NameClash X::IO X::IO::Chdir X::IO::Chmod X::IO::Copy X::IO::Cwd X::IO::Dir X::IO::DoesNotExist X::IO::Link X::IO::Mkdir X::IO::Move X::IO::Rename X::IO::Rmdir X::IO::Symlink X::IO::Unlink X::Inheritance::NotComposed X::Inheritance::Unsupported X::Method::InvalidQualifier X::Method::NotFound X::Method::Private::Permission X::Method::Private::Unqualified X::Mixin::NotComposable X::NYI X::NoDispatcher X::Numeric::Real X::OS X::Obsolete X::OutOfRange X::Package::Stubbed X::Parameter::Default X::Parameter::MultipleTypeConstraints X::Parameter::Placeholder X::Parameter::Twigil X::Parameter::WrongOrder X::Phaser::Multiple X::Phaser::PrePost X::Placeholder::Block X::Placeholder::Mainline X::Pod X::Proc::Async X::Proc::Async::AlreadyStarted X::Proc::Async::BindOrUse X::Proc::Async::CharsOrBytes X::Proc::Async::MustBeStarted X::Proc::Async::OpenForWriting X::Proc::Async::TapBeforeSpawn X::Proc::Unsuccessful X::Promise::CauseOnlyValidOnBroken X::Promise::Vowed X::Redeclaration X::Role::Initialization X::Scheduler::CueInNaNSeconds X::Seq::Consumed X::Sequence::Deduction X::Signature::NameClash X::Signature::Placeholder X::Str::Numeric X::StubCode X::Syntax X::Syntax::Augment::WithoutMonkeyTyping X::Syntax::Comment::Embedded X::Syntax::Confused X::Syntax::InfixInTermPosition X::Syntax::Malformed X::Syntax::Missing X::Syntax::NegatedPair X::Syntax::NoSelf X::Syntax::Number::RadixOutOfRange X::Syntax::P5 X::Syntax::Perl5Var X::Syntax::Regex::Adverb X::Syntax::Regex::SolitaryQuantifier X::Syntax::Reserved X::Syntax::Self::WithoutObject X::Syntax::Signature::InvocantMarker X::Syntax::Term::MissingInitializer X::Syntax::UnlessElse X::Syntax::Variable::Match X::Syntax::Variable::Numeric X::Syntax::Variable::Twigil X::Temporal X::Temporal::InvalidFormat X::TypeCheck X::TypeCheck::Assignment X::TypeCheck::Binding X::TypeCheck::Return X::TypeCheck::Splice X::Undeclared +adverbs=D a array b backslash c closure delete double exec exists f function h hash heredoc k kv p q qq quotewords s scalar single sym to v val w words ww x + +[lexer_properties] +styling.within.preprocessor=1 + +[settings] +# default extension used when saving files +extension=raku + +# MIME type +mime_type=text/x-perl6 + +# these characters define word boundaries when making selections and searching +# using word matching options +#wordchars=_.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 + +# single comments, like # in this file +comment_single=# +# multiline comments +#comment_open=#`(( +#comment_close=)) + +# set to false if a comment character/string should start at column 0 of a line, true uses any +# indentation of the line, e.g. setting to true causes the following on pressing CTRL+d +# #command_example(); +# setting to false would generate this +# # command_example(); +# This setting works only for single line comments +comment_use_indent=true + +# context action command (please see Geany's main documentation for details) +context_action_cmd= + +[indentation] +#width=4 +# 0 is spaces, 1 is tabs, 2 is tab & spaces +#type=1 + +[build_settings] +# %f will be replaced by the complete filename +# %e will be replaced by the filename without extension +# (use only one of it at one time) + +compiler=raku -c "%f" + +run_cmd=raku "%f" + +# Parse syntax check error messages and warnings, examples: +# Alphanumeric character is not allowed as a delimiter +# at foo.raku:1 +error_regex=.*at (.+):([0-9]+).* diff --git a/data/filetype_extensions.conf b/data/filetype_extensions.conf index a3c28b5545..bea8cacafe 100644 --- a/data/filetype_extensions.conf +++ b/data/filetype_extensions.conf @@ -64,6 +64,7 @@ Python=*.py;*.pyw;SConstruct;SConscript;wscript; PowerShell=*.ps1;*.psm1; reStructuredText=*.rest;*.reST;*.rst; R=*.R;*.r; +Raku=*.p6;*.pl6;*.pm6;*.t6;*.raku;*.rakumod;*.rakutest; Rust=*.rs; Ruby=*.rb;*.rhtml;*.ruby;*.gemspec;Gemfile;rakefile;Rakefile; Scala=*.scala;*.scl; diff --git a/meson.build b/meson.build index 100d774772..394ef619ac 100644 --- a/meson.build +++ b/meson.build @@ -280,6 +280,7 @@ lexilla = static_library('lexilla', 'scintilla/lexilla/lexers/LexProps.cxx', 'scintilla/lexilla/lexers/LexPython.cxx', 'scintilla/lexilla/lexers/LexR.cxx', + 'scintilla/lexilla/lexers/LexRaku.cxx', 'scintilla/lexilla/lexers/LexRuby.cxx', 'scintilla/lexilla/lexers/LexRust.cxx', 'scintilla/lexilla/lexers/LexSmalltalk.cxx', @@ -662,6 +663,7 @@ ctags = static_library('ctags', 'ctags/parsers/pascal.c', 'ctags/parsers/perl.c', 'ctags/parsers/perl.h', + 'ctags/parsers/perl6.c', 'ctags/parsers/php.c', 'ctags/parsers/powershell.c', 'ctags/parsers/python.c', diff --git a/scintilla/Makefile.am b/scintilla/Makefile.am index bf430016bc..7270e69661 100644 --- a/scintilla/Makefile.am +++ b/scintilla/Makefile.am @@ -50,6 +50,7 @@ lexilla/lexers/LexProps.cxx \ lexilla/lexers/LexPython.cxx \ lexilla/lexers/LexPO.cxx \ lexilla/lexers/LexR.cxx \ +lexilla/lexers/LexRaku.cxx \ lexilla/lexers/LexRuby.cxx \ lexilla/lexers/LexRust.cxx \ lexilla/lexers/LexSmalltalk.cxx \ diff --git a/scintilla/lexilla/lexers/LexRaku.cxx b/scintilla/lexilla/lexers/LexRaku.cxx new file mode 100644 index 0000000000..8284ebcaa2 --- /dev/null +++ b/scintilla/lexilla/lexers/LexRaku.cxx @@ -0,0 +1,1637 @@ +/** @file LexRaku.cxx + ** Lexer for Raku + ** + ** Copyright (c) 2019 Mark Reay + **/ +// Copyright 1998-2005 by Neil Hodgson +// The License.txt file describes the conditions under which this software may be distributed. + +/* + * Raku (Perl6) Lexer for Scintilla + * --------------------------------- + * --------------------------------- + * 06-Dec-2019: More Unicode support: + * - Added a full scope of allowed numbers and letters + * 29-Nov-2019: More highlighting / implemented basic folding: + * - Operators (blanket cover, no sequence checking) + * - Class / Grammar name highlighting + * - Folding: + * - Comments: line / multi-line + * - POD sections + * - Code blocks {} + * 26-Nov-2019: Basic syntax highlighting covering the following: + * - Comments, both line and embedded (multi-line) + * - POD, no inline highlighting as yet... + * - Heredoc block string, with variable highlighting (with qq) + * - Strings, with variable highlighting (with ") + * - Q Language, including adverbs (also basic q and qq) + * - Regex, including adverbs + * - Numbers + * - Bareword / identifiers + * - Types + * - Variables: mu, positional, associative, callable + * TODO: + * - POD inline + * - Better operator sequence coverage + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#include "WordList.h" +#include "LexAccessor.h" +#include "StyleContext.h" +#include "CharacterSet.h" +#include "CharacterCategory.h" +#include "LexerModule.h" +#include "OptionSet.h" +#include "DefaultLexer.h" + +using namespace Scintilla; +using namespace Lexilla; + +namespace { // anonymous namespace to isolate any name clashes +/*----------------------------------------------------------------------------* + * --- DEFINITIONS: OPTIONS / CONSTANTS --- + *----------------------------------------------------------------------------*/ + +// Number types +#define RAKUNUM_BINARY 1 // order is significant: 1-3 cannot have a dot +#define RAKUNUM_OCTAL 2 +#define RAKUNUM_FLOAT_EXP 3 // exponent part only +#define RAKUNUM_HEX 4 // may be a hex float +#define RAKUNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings +#define RAKUNUM_VECTOR 6 +#define RAKUNUM_V_VECTOR 7 +#define RAKUNUM_VERSION 8 // can contain multiple '.'s +#define RAKUNUM_BAD 9 + +// Regex / Q string types +#define RAKUTYPE_REGEX_NORM 0 // 0 char ident +#define RAKUTYPE_REGEX_S 1 // order is significant: +#define RAKUTYPE_REGEX_M 2 // 1 char ident +#define RAKUTYPE_REGEX_Y 3 // 1 char ident +#define RAKUTYPE_REGEX 4 // > RAKUTYPE_REGEX == 2 char identifiers +#define RAKUTYPE_REGEX_RX 5 // 2 char ident +#define RAKUTYPE_REGEX_TR 6 // 2 char ident +#define RAKUTYPE_QLANG 7 // < RAKUTYPE_QLANG == RAKUTYPE_REGEX_? +#define RAKUTYPE_STR_WQ 8 // 0 char ident < word quote > +#define RAKUTYPE_STR_Q 9 // 1 char ident +#define RAKUTYPE_STR_QX 10 // 2 char ident +#define RAKUTYPE_STR_QW 11 // 2 char ident +#define RAKUTYPE_STR_QQ 12 // 2 char ident +#define RAKUTYPE_STR_QQX 13 // 3 char ident +#define RAKUTYPE_STR_QQW 14 // 3 char ident +#define RAKUTYPE_STR_QQWW 15 // 4 char ident + +// Delimiter types +#define RAKUDELIM_BRACKET 0 // bracket: regex, Q language +#define RAKUDELIM_QUOTE 1 // quote: normal string + +// rakuWordLists: keywords as defined in config +const char *const rakuWordLists[] = { + "Keywords and identifiers", + "Functions", + "Types basic", + "Types composite", + "Types domain-specific", + "Types exception", + "Adverbs", + nullptr, +}; + +// Options and defaults +struct OptionsRaku { + bool fold; + bool foldCompact; + bool foldComment; + bool foldCommentMultiline; + bool foldCommentPOD; + OptionsRaku() { + fold = true; + foldCompact = false; + foldComment = true; + foldCommentMultiline = true; + foldCommentPOD = true; + } +}; + +// init options and words +struct OptionSetRaku : public OptionSet { + OptionSetRaku() { + DefineProperty("fold", &OptionsRaku::fold); + DefineProperty("fold.comment", &OptionsRaku::foldComment); + DefineProperty("fold.compact", &OptionsRaku::foldCompact); + + DefineProperty("fold.raku.comment.multiline", &OptionsRaku::foldCommentMultiline, + "Set this property to 0 to disable folding multi-line comments when fold.comment=1."); + DefineProperty("fold.raku.comment.pod", &OptionsRaku::foldCommentPOD, + "Set this property to 0 to disable folding POD comments when fold.comment=1."); + + // init word lists + DefineWordListSets(rakuWordLists); + } +}; + +// Delimiter pair +struct DelimPair { + int opener; // opener char + int closer[2]; // closer chars + bool interpol; // can variables be interpolated? + short count; // delimiter char count + DelimPair() { + opener = 0; + closer[0] = 0; + closer[1] = 0; + interpol = false; + count = 0; + } + bool isCloser(int ch) const { + return ch == closer[0] || ch == closer[1]; + } +}; + +/*----------------------------------------------------------------------------* + * --- FUNCTIONS --- + *----------------------------------------------------------------------------*/ + +/* + * IsANewLine + * - returns true if this is a new line char + */ +constexpr bool IsANewLine(int ch) noexcept { + return ch == '\r' || ch == '\n'; +} + +/* + * IsAWhitespace + * - returns true if this is a whitespace (or newline) char + */ +bool IsAWhitespace(int ch) noexcept { + return IsASpaceOrTab(ch) || IsANewLine(ch); +} + +/* + * IsAlphabet + * - returns true if this is an alphabetical char + */ +constexpr bool IsAlphabet(int ch) noexcept { + return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); +} + +/* + * IsCommentLine + * - returns true if this is a comment line + * - tests: SCE_RAKU_COMMENTLINE or SCE_RAKU_COMMENTEMBED + * modified from: LexPerl.cxx + */ +bool IsCommentLine(Sci_Position line, LexAccessor &styler, int type = SCE_RAKU_COMMENTLINE) { + Sci_Position pos = styler.LineStart(line); + Sci_Position eol_pos = styler.LineStart(line + 1) - 1; + for (Sci_Position i = pos; i < eol_pos; i++) { + char ch = styler[i]; + int style = styler.StyleAt(i); + if (type == SCE_RAKU_COMMENTEMBED) { + if (i == (eol_pos - 1) && style == type) + return true; + } else { // make sure the line is NOT a SCE_RAKU_COMMENTEMBED + if (ch == '#' && style == type && styler[i+1] != '`' ) + return true; + else if (!IsASpaceOrTab(ch)) + return false; + } + } + return false; +} + +/* + * ContainsQTo + * - returns true if this range contains ":to" in style SCE_RAKU_ADVERB indicating the start + * of a SCE_RAKU_HEREDOC_Q or SCE_RAKU_HEREDOC_QQ. + */ +bool ContainsQTo(Sci_Position start, Sci_Position end, LexAccessor &styler) { + std::string adverb; + for (Sci_Position i = start; i < end; i++) { + if (styler.StyleAt(i) == SCE_RAKU_ADVERB) { + adverb.push_back(styler[i]); + } + } + return adverb.find(":to") != std::string::npos; +} + +/* + * GetBracketCloseChar + * - returns the end bracket char: opposite of start + * - see: http://www.unicode.org/Public/5.1.0/ucd/BidiMirroring.txt (first section) + * - Categories are general matches for valid BiDi types + * - Most closer chars are opener + 1 + */ +int GetBracketCloseChar(const int ch) noexcept { + const CharacterCategory cc = CategoriseCharacter(ch); + switch (cc) { + case ccSm: + switch (ch) { + case 0x3C: return 0x3E; // LESS-THAN SIGN + case 0x2208: return 0x220B; // ELEMENT OF + case 0x2209: return 0x220C; // NOT AN ELEMENT OF + case 0x220A: return 0x220D; // SMALL ELEMENT OF + case 0x2215: return 0x29F5; // DIVISION SLASH + case 0x2243: return 0x22CD; // ASYMPTOTICALLY EQUAL TO + case 0x2298: return 0x29B8; // CIRCLED DIVISION SLASH + case 0x22A6: return 0x2ADE; // ASSERTION + case 0x22A8: return 0x2AE4; // TRUE + case 0x22A9: return 0x2AE3; // FORCES + case 0x22AB: return 0x2AE5; // DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE + case 0x22F2: return 0x22FA; // ELEMENT OF WITH LONG HORIZONTAL STROKE + case 0x22F3: return 0x22FB; // ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE + case 0x22F4: return 0x22FC; // SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE + case 0x22F6: return 0x22FD; // ELEMENT OF WITH OVERBAR + case 0x22F7: return 0x22FE; // SMALL ELEMENT OF WITH OVERBAR + case 0xFF1C: return 0xFF1E; // FULLWIDTH LESS-THAN SIGN + } + break; + case ccPs: + switch (ch) { + case 0x5B: return 0x5D; // LEFT SQUARE BRACKET + case 0x7B: return 0x7D; // LEFT CURLY BRACKET + case 0x298D: return 0x2990; // LEFT SQUARE BRACKET WITH TICK IN TOP CORNER + case 0x298F: return 0x298E; // LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER + case 0xFF3B: return 0xFF3D; // FULLWIDTH LEFT SQUARE BRACKET + case 0xFF5B: return 0xFF5D; // FULLWIDTH LEFT CURLY BRACKET + } + break; + case ccPi: + break; + default: return 0; + } + return ch + 1; +} + +/* + * IsValidQuoteOpener + * - + */ +bool IsValidQuoteOpener(const int ch, DelimPair &dp, int type = RAKUDELIM_BRACKET) noexcept { + dp.closer[0] = 0; + dp.closer[1] = 0; + dp.interpol = true; + if (type == RAKUDELIM_QUOTE) { + switch (ch) { + // Opener Closer Description + case '\'': dp.closer[0] = '\''; // APOSTROPHE + dp.interpol = false; + break; + case '"': dp.closer[0] = '"'; // QUOTATION MARK + break; + case 0x2018: dp.closer[0] = 0x2019; // LEFT SINGLE QUOTATION MARK + dp.interpol = false; + break; + case 0x201C: dp.closer[0] = 0x201D; // LEFT DOUBLE QUOTATION MARK + break; + case 0x201D: dp.closer[0] = 0x201C; // RIGHT DOUBLE QUOTATION MARK + break; + case 0x201E: dp.closer[0] = 0x201C; // DOUBLE LOW-9 QUOTATION MARK + dp.closer[1] = 0x201D; + break; + case 0xFF62: dp.closer[0] = 0xFF63; // HALFWIDTH LEFT CORNER BRACKET + dp.interpol = false; + break; + default: return false; + } + } else if (type == RAKUDELIM_BRACKET) { + dp.closer[0] = GetBracketCloseChar(ch); + } + dp.opener = ch; + dp.count = 1; + return dp.closer[0] > 0; +} + +/* + * IsBracketOpenChar + * - true if this is a valid start bracket character + */ +bool IsBracketOpenChar(int ch) noexcept { + return GetBracketCloseChar(ch) > 0; +} + +/* + * IsValidRegOrQAdjacent + * - returns true if ch is a valid character to put directly after Q / q + * * ref: Q Language: https://docs.raku.org/language/quoting + */ +bool IsValidRegOrQAdjacent(int ch) noexcept { + return !(IsAlphaNumeric(ch) || ch == '_' || ch == '(' || ch == ')' || ch == '\'' ); +} + +/* + * IsValidRegOrQPrecede + * - returns true if ch is a valid preceding character to put directly before Q / q + * * ref: Q Language: https://docs.raku.org/language/quoting + */ +bool IsValidRegOrQPrecede(int ch) noexcept { + return !(IsAlphaNumeric(ch) || ch == '_'); +} + +/* + * MatchCharInRange + * - returns true if the mach character is found in range (of length) + * - ignoreDelim (default false) + */ +bool MatchCharInRange(StyleContext &sc, const Sci_Position length, + const int match, bool ignoreDelim = false) { + Sci_Position len = 0; + int chPrev = sc.chPrev; + while (++len < length) { + const int ch = sc.GetRelativeCharacter(len); + if (ch == match && (ignoreDelim || chPrev != '\\')) + return true; + } + return false; +} + +/* + * PrevNonWhitespaceChar + * - returns the last non-whitespace char + */ +int PrevNonWhitespaceChar(StyleContext &sc) { + Sci_Position rel = 0; + Sci_Position max_back = 0 - sc.currentPos; + while (--rel > max_back) { + const int ch = sc.GetRelativeCharacter(rel); + if (!IsAWhitespace(ch)) + return ch; + } + return 0; // no matching char +} + +/* + * IsQLangStartAtScPos + * - returns true if this is a valid Q Language sc position + * - ref: https://docs.raku.org/language/quoting + * - Q :adverb :adverb //; + * - q,qx,qw,qq,qqx,qqw,qqww :adverb /:adverb /; + */ +bool IsQLangStartAtScPos(StyleContext &sc, int &type, const Sci_Position length) { + const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext); + const int chFw2 = sc.GetRelativeCharacter(2); + const int chFw3 = sc.GetRelativeCharacter(3); + type = -1; + if (IsValidRegOrQPrecede(sc.chPrev)) { + if (sc.ch == 'Q' && valid_adj) { + type = RAKUTYPE_QLANG; + } else if (sc.ch == 'q') { + switch (sc.chNext) { + case 'x': + type = RAKUTYPE_STR_QX; + break; + case 'w': + type = RAKUTYPE_STR_QW; + break; + case 'q': + if (chFw2 == 'x') { + type = RAKUTYPE_STR_QQX; + } else if (chFw2 == 'w') { + if (chFw3 == 'w') { + type = RAKUTYPE_STR_QQWW; + } else { + type = RAKUTYPE_STR_QQW; + } + } else { + type = RAKUTYPE_STR_QQ; + } + break; + default: + type = RAKUTYPE_STR_Q; + } + } else if (sc.ch == '<' && MatchCharInRange(sc, length, '>')) { + type = RAKUTYPE_STR_WQ; // < word quote > + } + } + return type >= 0; +} + +/* + * IsRegexStartAtScPos + * - returns true if this is a valid Regex sc position + * - ref: https://docs.raku.org/language/regexes + * - Regex: (rx/s/m/tr/y) :adverb /:adverb /; + * - regex R :adverb //; + * - /:adverb /; + */ +bool IsRegexStartAtScPos(StyleContext &sc, int &type, CharacterSet &set) { + const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext); + type = -1; + if (IsValidRegOrQPrecede(sc.chPrev)) { + switch (sc.ch) { + case 'r': + if (sc.chNext == 'x') + type = RAKUTYPE_REGEX_RX; + break; + case 't': + case 'T': + if (sc.chNext == 'r' || sc.chNext == 'R') + type = RAKUTYPE_REGEX_TR; + break; + case 'm': + if (valid_adj) + type = RAKUTYPE_REGEX_M; + break; + case 's': + case 'S': + if (valid_adj) + type = RAKUTYPE_REGEX_S; + break; + case 'y': + if (valid_adj) + type = RAKUTYPE_REGEX_Y; + break; + case '/': + if (set.Contains(PrevNonWhitespaceChar(sc))) + type = RAKUTYPE_REGEX_NORM; + } + } + return type >= 0; +} + +/* + * IsValidIdentPrecede + * - returns if ch is a valid preceding char to put directly before an identifier + */ +bool IsValidIdentPrecede(int ch) noexcept { + return !(IsAlphaNumeric(ch) || ch == '_' || ch == '@' || ch == '$' || ch == '%'); +} + +/* + * IsValidDelimiter + * - returns if ch is a valid delimiter (most chars are valid) + * * ref: Q Language: https://docs.raku.org/language/quoting + */ +bool IsValidDelimiter(int ch) noexcept { + return !(IsAlphaNumeric(ch) || ch == ':'); +} + +/* + * GetDelimiterCloseChar + * - returns the corresponding close char for a given delimiter (could be the same char) + */ +int GetDelimiterCloseChar(int ch) noexcept { + int ch_end = GetBracketCloseChar(ch); + if (ch_end == 0 && IsValidDelimiter(ch)) { + ch_end = ch; + } + return ch_end; +} + +/* + * GetRepeatCharCount + * - returns the occurrence count of match + */ +Sci_Position GetRepeatCharCount(StyleContext &sc, int chMatch, Sci_Position length) { + Sci_Position cnt = 0; + while (cnt < length) { + if (sc.GetRelativeCharacter(cnt) != chMatch) { + break; + } + cnt++; + } + return cnt; +} + +/* + * LengthToDelimiter + * - returns the length until the end of a delimited string section + * - Ignores nested delimiters (if opener != closer) + * - no trailing char after last closer (default false) + */ +Sci_Position LengthToDelimiter(StyleContext &sc, const DelimPair &dp, + Sci_Position length, bool noTrailing = false) { + short cnt_open = 0; // count open bracket + short cnt_close = 0; // count close bracket + bool is_escape = false; // has been escaped using '\'? + Sci_Position len = 0; // count characters + int chOpener = dp.opener; // look for nested opener / closer + if (dp.opener == dp.closer[0]) + chOpener = 0; // no opening delimiter (no nesting possible) + + while (len < length) { + const int chPrev = sc.GetRelativeCharacter(len - 1); + const int ch = sc.GetRelativeCharacter(len); + const int chNext = sc.GetRelativeCharacter(len+1); + + if (cnt_open == 0 && cnt_close == dp.count) { + return len; // end condition has been met + } else if (is_escape) { + is_escape = false; + } else if (ch == '\\') { + is_escape = true; + } else { + if (ch == chOpener) { + cnt_open++; // open nested bracket + } else if (dp.isCloser(ch)) { + if ( cnt_open > 0 ) { + cnt_open--; // close nested bracket + } else if (dp.count > 1 && cnt_close < (dp.count - 1)) { + if (cnt_close > 1) { + if (dp.isCloser(chPrev)) { + cnt_close++; + } else { // reset if previous char was not close + cnt_close = 0; + } + } else { + cnt_close++; + } + } else if (!noTrailing || (IsAWhitespace(chNext))) { + cnt_close++; // found last close + if (cnt_close > 1 && !dp.isCloser(chPrev)) { + cnt_close = 0; // reset if previous char was not close + } + } else { + cnt_close = 0; // non handled close: reset + } + } else if (IsANewLine(ch)) { + cnt_open = 0; // reset after each line + cnt_close = 0; + } + } + len++; + } + return -1; // end condition has NOT been met +} + +/* + * LengthToEndHeredoc + * - returns the length until the end of a heredoc section + * - delimiter string MUST begin on a new line + */ +Sci_Position LengthToEndHeredoc(const StyleContext &sc, LexAccessor &styler, + const Sci_Position length, const char *delim) { + bool on_new_ln = false; + int i = 0; // str index + for (int n = 0; n < length; n++) { + const char ch = styler.SafeGetCharAt(sc.currentPos + n, 0); + if (on_new_ln) { + if (delim[i] == '\0') + return n; // at end of str, match found! + if (ch != delim[i++]) + i = 0; // no char match, reset 'i'ndex + } + if (i == 0) // detect new line + on_new_ln = IsANewLine(ch); + } + return -1; // no match found +} + +/* + * LengthToNextChar + * - returns the length until the next character + */ +Sci_Position LengthToNextChar(StyleContext &sc, const Sci_Position length) { + Sci_Position len = 0; + while (++len < length) { + const int ch = sc.GetRelativeCharacter(len); + if (!IsASpaceOrTab(ch) && !IsANewLine(ch)) { + break; + } + } + return len; +} + +/* + * GetRelativeString + * - gets a relative string and sets it in &str + * - resets string before setting + */ +void GetRelativeString(StyleContext &sc, Sci_Position offset, Sci_Position length, + std::string &str) { + Sci_Position pos = offset; + str.clear(); + while (pos < length) { + str += sc.GetRelativeCharacter(pos++); + } +} + +} // end anonymous namespace + +/*----------------------------------------------------------------------------* + * --- class: LexerRaku --- + *----------------------------------------------------------------------------*/ +//class LexerRaku : public ILexerWithMetaData { +class LexerRaku : public DefaultLexer { + CharacterSet setWord; + CharacterSet setSigil; + CharacterSet setTwigil; + CharacterSet setOperator; + CharacterSet setSpecialVar; + WordList regexIdent; // identifiers that specify a regex + OptionsRaku options; // Options from config + OptionSetRaku osRaku; + WordList keywords; // Word Lists from config + WordList functions; + WordList typesBasic; + WordList typesComposite; + WordList typesDomainSpecific; + WordList typesExceptions; + WordList adverbs; + +public: + // Defined as explicit, so that constructor can not be copied + explicit LexerRaku() : + DefaultLexer("raku", SCLEX_RAKU), + setWord(CharacterSet::setAlphaNum, "-_", 0x80), + setSigil(CharacterSet::setNone, "$&%@"), + setTwigil(CharacterSet::setNone, "!*.:<=?^~"), + setOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;<>,?!.~"), + setSpecialVar(CharacterSet::setNone, "_/!") { + regexIdent.Set("regex rule token"); + } + // Deleted so LexerRaku objects can not be copied. + LexerRaku(const LexerRaku &) = delete; + LexerRaku(LexerRaku &&) = delete; + void operator=(const LexerRaku &) = delete; + void operator=(LexerRaku &&) = delete; + virtual ~LexerRaku() { + } + void SCI_METHOD Release() noexcept override { + delete this; + } + int SCI_METHOD Version() const noexcept override { + return lvRelease5; + } + const char *SCI_METHOD PropertyNames() override { + return osRaku.PropertyNames(); + } + int SCI_METHOD PropertyType(const char *name) override { + return osRaku.PropertyType(name); + } + const char *SCI_METHOD DescribeProperty(const char *name) override { + return osRaku.DescribeProperty(name); + } + Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; + const char *SCI_METHOD PropertyGet(const char *key) override { + return osRaku.PropertyGet(key); + } + const char *SCI_METHOD DescribeWordListSets() override { + return osRaku.DescribeWordListSets(); + } + Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; + void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; + void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; + + static ILexer5 *LexerFactoryRaku() { + return new LexerRaku(); + } + +protected: + bool IsOperatorChar(const int ch); + bool IsWordChar(const int ch, bool allowNumber = true); + bool IsWordStartChar(const int ch); + bool IsNumberChar(const int ch, int base = 10); + bool ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length, + int &type, const DelimPair &dp); + void ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState); + bool ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type, + WordList &wordsAdverbs, DelimPair &dp); + Sci_Position LengthToNonWordChar(StyleContext &sc, Sci_Position length, + char *s, const int size, Sci_Position offset = 0); +}; + +/*----------------------------------------------------------------------------* + * --- METHODS: LexerRaku --- + *----------------------------------------------------------------------------*/ + +/* + * LexerRaku::IsOperatorChar + * - Test for both ASCII and Unicode operators + * see: https://docs.raku.org/language/unicode_entry + */ +bool LexerRaku::IsOperatorChar(const int ch) { + if (ch > 0x7F) { + switch (ch) { + // Unicode ASCII Equiv. + case 0x2208: // (elem) + case 0x2209: // !(elem) + case 0x220B: // (cont) + case 0x220C: // !(cont) + case 0x2216: // (-) + case 0x2229: // (&) + case 0x222A: // (|) + case 0x2282: // (<) + case 0x2283: // (>) + case 0x2284: // !(<) + case 0x2285: // !(>) + case 0x2286: // (<=) + case 0x2287: // (>=) + case 0x2288: // !(<=) + case 0x2289: // !(>=) + case 0x228D: // (.) + case 0x228E: // (+) + case 0x2296: // (^) + return true; + } + } + return setOperator.Contains(ch); +} + +/* + * LexerRaku::IsWordChar + * - Test for both ASCII and Unicode identifier characters + * see: https://docs.raku.org/language/unicode_ascii + * also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt + * FIXME: *still* may not contain all valid characters + */ +bool LexerRaku::IsWordChar(const int ch, bool allowNumber) { + // Unicode numbers should not appear in word identifiers + if (ch > 0x7F) { + const CharacterCategory cc = CategoriseCharacter(ch); + switch (cc) { + // Letters + case ccLu: + case ccLl: + case ccLt: + case ccLm: + case ccLo: + return true; + default: + return false; + } + } else if (allowNumber && IsADigit(ch)) { + return true; // an ASCII number type + } + return setWord.Contains(ch); +} + +/* + * LexerRaku::IsWordStartChar + * - Test for both ASCII and Unicode identifier "start / first" characters + */ +bool LexerRaku::IsWordStartChar(const int ch) { + return ch != '-' && IsWordChar(ch, false); // no numbers allowed +} + +/* + * LexerRaku::IsNumberChar + * - Test for both ASCII and Unicode identifier number characters + * see: https://docs.raku.org/language/unicode_ascii + * also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt + * FILTERED by Unicode letters that are NUMBER + * and NOT PARENTHESIZED or CIRCLED + * FIXME: *still* may not contain all valid number characters + */ +bool LexerRaku::IsNumberChar(const int ch, int base) { + if (ch > 0x7F) { + const CharacterCategory cc = CategoriseCharacter(ch); + switch (cc) { + // Numbers + case ccNd: + case ccNl: + case ccNo: + return true; + default: + return false; + } + } + return IsADigit(ch, base); +} + +/* + * LexerRaku::PropertySet + * - + */ +Sci_Position SCI_METHOD LexerRaku::PropertySet(const char *key, const char *val) { + if (osRaku.PropertySet(&options, key, val)) + return 0; + return -1; +} + +/* + * LexerRaku::WordListSet + * - + */ +Sci_Position SCI_METHOD LexerRaku::WordListSet(int n, const char *wl) { + WordList *wordListN = nullptr; + switch (n) { + case 0: + wordListN = &keywords; + break; + case 1: + wordListN = &functions; + break; + case 2: + wordListN = &typesBasic; + break; + case 3: + wordListN = &typesComposite; + break; + case 4: + wordListN = &typesDomainSpecific; + break; + case 5: + wordListN = &typesExceptions; + break; + case 6: + wordListN = &adverbs; + break; + } + Sci_Position firstModification = -1; + if (wordListN) { + WordList wlNew; + wlNew.Set(wl); + if (*wordListN != wlNew) { + wordListN->Set(wl); + firstModification = 0; + } + } + return firstModification; +} + +/* + * LexerRaku::ProcessRegexTwinCapture + * - processes the transition between a regex pair (two sets of delimiters) + * - moves to first new delimiter, if a bracket + * - returns true when valid delimiter start found (if bracket) + */ +bool LexerRaku::ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length, + int &type, const DelimPair &dp) { + + if (type == RAKUTYPE_REGEX_S || type == RAKUTYPE_REGEX_TR || type == RAKUTYPE_REGEX_Y) { + type = -1; // clear type + + // move past chRegQClose if it was the previous char + if (dp.isCloser(sc.chPrev)) + sc.Forward(); + + // no processing needed for non-bracket + if (dp.isCloser(dp.opener)) + return true; + + // move to next opening bracket + const Sci_Position len = LengthToNextChar(sc, length); + if (sc.GetRelativeCharacter(len) == dp.opener) { + sc.Forward(len); + return true; + } + } + return false; +} + +/* + * LexerRaku::ProcessStringVars + * - processes a string and highlights any valid variables + */ +void LexerRaku::ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState) { + const int state = sc.state; + for (Sci_Position pos = 0; pos < length; pos++) { + if (sc.state == varState && !IsWordChar(sc.ch)) { + sc.SetState(state); + } else if (sc.chPrev != '\\' + && (sc.ch == '$' || sc.ch == '@') + && IsWordStartChar(sc.chNext)) { + sc.SetState(varState); + } + sc.Forward(); // Next character + } +} +/* + * LexerRaku::ProcessValidRegQlangStart + * - processes a section of the document range from after a Regex / Q delimiter + * - returns true on success + * - sets: adverbs, chOpen, chClose, chCount + * ref: https://docs.raku.org/language/regexes + */ +bool LexerRaku::ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type, + WordList &wordsAdverbs, DelimPair &dp) { + Sci_Position startPos = sc.currentPos; + Sci_Position startLen = length; + const int target_state = sc.state; + int state = SCE_RAKU_DEFAULT; + std::string str; + + // find our opening delimiter (and occurrences) / save any adverbs + dp.opener = 0; // adverbs can be after the first delimiter + bool got_all_adverbs = false; // in Regex statements + bool got_ident = false; // regex can have an identifier: 'regex R' + sc.SetState(state); // set state default to avoid pre-highlights + while ((dp.opener == 0 || !got_all_adverbs) && sc.More()) { + + // move to the next non-space character + const bool was_space = IsAWhitespace(sc.ch); + if (!got_all_adverbs && was_space) { + sc.Forward(LengthToNextChar(sc, length)); + } + length = startLen - (sc.currentPos - startPos); // update length remaining + + // parse / eat an identifier (if type == RAKUTYPE_REGEX) + if (dp.opener == 0 && !got_ident && type == RAKUTYPE_REGEX && IsAlphabet(sc.ch)) { + + // eat identifier / account for special adverb :sym + bool got_sym = false; + while (sc.More()) { + sc.SetState(SCE_RAKU_IDENTIFIER); + while (sc.More() && (IsAlphaNumeric(sc.chNext) + || sc.chNext == '_' || sc.chNext == '-')) { + sc.Forward(); + } + sc.Forward(); + if (got_sym && sc.ch == '>') { + sc.SetState(SCE_RAKU_OPERATOR); // '>' + sc.Forward(); + break; + } else if (type == RAKUTYPE_REGEX && sc.Match(":sym<")) { + sc.SetState(SCE_RAKU_ADVERB); // ':sym' + sc.Forward(4); + sc.SetState(SCE_RAKU_OPERATOR); // '<' + sc.Forward(); + got_sym = true; + } else { + break; + } + } + sc.SetState(state); + got_ident = true; + } + + // parse / save an adverb: RAKUTYPE_REGEX only has adverbs after delim + // >= RAKUTYPE_QLANG only has adverbs before delim + else if (!got_all_adverbs && sc.ch == ':' && (!(dp.opener == 0 && got_ident) + && !(dp.opener > 0 && type >= RAKUTYPE_QLANG))) { + sc.SetState(SCE_RAKU_ADVERB); + while (IsAlphaNumeric(sc.chNext) && sc.More()) { + sc.Forward(); + str += sc.ch; + } + str += ' '; + sc.Forward(); + sc.SetState(state); + } + + // find starting delimiter + else if (dp.opener == 0 && (was_space || IsValidRegOrQAdjacent(sc.ch)) + && IsValidDelimiter(sc.ch)) { // make sure the delimiter is legal (most are) + sc.SetState((state = target_state));// start state here... + dp.opener = sc.ch; // this is our delimiter, get count + if (type < RAKUTYPE_QLANG) // type is Regex + dp.count = 1; // has only one delimiter + else + dp.count = GetRepeatCharCount(sc, dp.opener, length); + sc.Forward(dp.count); + } + + // we must have all the adverbs by now... + else { + if (got_all_adverbs) + break; // prevent infinite loop: occurs on missing open char + got_all_adverbs = true; + } + } + + // set word list / find a valid closing delimiter (or bomb!) + wordsAdverbs.Set(str.c_str()); + dp.closer[0] = GetDelimiterCloseChar(dp.opener); + dp.closer[1] = 0; // no other closer char + return dp.closer[0] > 0; +} + +/* + * LexerRaku::LengthToNonWordChar + * - returns the length until the next non "word" character: AlphaNum + '_' + * - also sets all the parsed chars in 's' + */ +Sci_Position LexerRaku::LengthToNonWordChar(StyleContext &sc, Sci_Position length, + char *s, const int size, Sci_Position offset) { + Sci_Position len = 0; + Sci_Position max_length = size < length ? size : length; + while (len <= max_length) { + const int ch = sc.GetRelativeCharacter(len + offset); + if (!IsWordChar(ch)) { + s[len] = '\0'; + break; + } + s[len] = ch; + len++; + } + s[len + 1] = '\0'; + return len; +} + +/* + * LexerRaku::Lex + * - Main lexer method + */ +void SCI_METHOD LexerRaku::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { + LexAccessor styler(pAccess); + DelimPair dpEmbeded; // delimiter pair: embedded comments + DelimPair dpString; // delimiter pair: string + DelimPair dpRegQ; // delimiter pair: Regex / Q Lang + std::string hereDelim; // heredoc delimiter (if in heredoc) + int hereState = 0; // heredoc state to use (Q / QQ) + int numState = 0; // number state / type + short cntDecimal = 0; // number decimal count + std::string wordLast; // last word seen + std::string identLast; // last identifier seen + std::string adverbLast; // last (single) adverb seen + WordList lastAdverbs; // last adverbs seen + Sci_Position len; // temp length value + char s[100]; // temp char string + int typeDetect = -1; // temp type detected (for regex and Q lang) + Sci_Position lengthToEnd; // length until the end of range + + // Backtrack to safe start position before complex quoted elements + + Sci_PositionU newStartPos = startPos; + if (initStyle != SCE_RAKU_DEFAULT) { + // Backtrack to last SCE_RAKU_DEFAULT or 0 + while (newStartPos > 0) { + newStartPos--; + if (styler.StyleAt(newStartPos) == SCE_RAKU_DEFAULT) + break; + } + // Backtrack to start of line before SCE_RAKU_HEREDOC_Q? + if (initStyle == SCE_RAKU_HEREDOC_Q || initStyle == SCE_RAKU_HEREDOC_QQ) { + if (newStartPos > 0) { + newStartPos = styler.LineStart(styler.GetLine(newStartPos)); + } + } + } else { + const Sci_Position line = styler.GetLine(newStartPos); + if (line > 0) { + // If the previous line is a start of a q or qq heredoc, backtrack to start of line + const Sci_Position startPreviousLine = styler.LineStart(line-1); + if (ContainsQTo(startPreviousLine, newStartPos, styler)) { + newStartPos = startPreviousLine; + } + } + } + + + // Re-calculate (any) changed startPos, length and initStyle state + if (newStartPos < startPos) { + initStyle = SCE_RAKU_DEFAULT; + length += startPos - newStartPos; + startPos = newStartPos; + } + + // init StyleContext + StyleContext sc(startPos, length, initStyle, styler); + + // StyleContext Loop + for (; sc.More(); sc.Forward()) { + lengthToEnd = (length - (sc.currentPos - startPos)); // end of range + + /* *** Determine if the current state should terminate ************** * + * Everything within the 'switch' statement processes characters up + * until the end of a syntax highlight section / state. + * ****************************************************************** */ + switch (sc.state) { + case SCE_RAKU_OPERATOR: + sc.SetState(SCE_RAKU_DEFAULT); + break; // FIXME: better valid operator sequences needed? + case SCE_RAKU_COMMENTLINE: + if (IsANewLine(sc.ch)) { + sc.SetState(SCE_RAKU_DEFAULT); + } + break; + case SCE_RAKU_COMMENTEMBED: + if ((len = LengthToDelimiter(sc, dpEmbeded, lengthToEnd)) >= 0) { + sc.Forward(len); // Move to end delimiter + sc.SetState(SCE_RAKU_DEFAULT); + } else { + sc.Forward(lengthToEnd); // no end delimiter found + } + break; + case SCE_RAKU_POD: + if (sc.atLineStart && sc.Match("=end pod")) { + sc.Forward(8); + sc.SetState(SCE_RAKU_DEFAULT); + } + break; + case SCE_RAKU_STRING: + + // Process the string for variables: move to end delimiter + if ((len = LengthToDelimiter(sc, dpString, lengthToEnd)) >= 0) { + if (dpString.interpol) { + ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR); + } else { + sc.Forward(len); + } + sc.SetState(SCE_RAKU_DEFAULT); + } else { + sc.Forward(lengthToEnd); // no end delimiter found + } + break; + case SCE_RAKU_STRING_Q: + case SCE_RAKU_STRING_QQ: + case SCE_RAKU_STRING_Q_LANG: + + // No string: previous char was the delimiter + if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) { + sc.SetState(SCE_RAKU_DEFAULT); + } + + // Process the string for variables: move to end delimiter + else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) { + + // set (any) heredoc delimiter string + if (lastAdverbs.InList("to")) { + GetRelativeString(sc, -1, len - dpRegQ.count, hereDelim); + hereState = SCE_RAKU_HEREDOC_Q; // default heredoc state + } + + // select variable identifiers + if (sc.state == SCE_RAKU_STRING_QQ || lastAdverbs.InList("qq")) { + ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR); + hereState = SCE_RAKU_HEREDOC_QQ; // potential heredoc state + } else { + sc.Forward(len); + } + sc.SetState(SCE_RAKU_DEFAULT); + } else { + sc.Forward(lengthToEnd); // no end delimiter found + } + break; + case SCE_RAKU_HEREDOC_Q: + case SCE_RAKU_HEREDOC_QQ: + if ((len = LengthToEndHeredoc(sc, styler, lengthToEnd, hereDelim.c_str())) >= 0) { + // select variable identifiers + if (sc.state == SCE_RAKU_HEREDOC_QQ) { + ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR); + } else { + sc.Forward(len); + } + sc.SetState(SCE_RAKU_DEFAULT); + } else { + sc.Forward(lengthToEnd); // no end delimiter found + } + hereDelim.clear(); // clear heredoc delimiter + break; + case SCE_RAKU_REGEX: + // account for typeDetect = RAKUTYPE_REGEX_S/TR/Y + while (sc.state == SCE_RAKU_REGEX) { + + // No string: previous char was the delimiter + if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) { + if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ)) + continue; + sc.SetState(SCE_RAKU_DEFAULT); + break; + } + + // Process the string for variables: move to end delimiter + else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) { + ProcessStringVars(sc, len, SCE_RAKU_REGEX_VAR); + if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ)) + continue; + sc.SetState(SCE_RAKU_DEFAULT); + break; + } else { + sc.Forward(lengthToEnd); // no end delimiter found + break; + } + } + break; + case SCE_RAKU_NUMBER: + if (sc.ch == '.') { + if (sc.chNext == '.') { // '..' is an operator + sc.SetState(SCE_RAKU_OPERATOR); + sc.Forward(); + if (sc.chNext == '.') // '...' is also an operator + sc.Forward(); + break; + } else if (numState > RAKUNUM_FLOAT_EXP + && (cntDecimal < 1 || numState == RAKUNUM_VERSION)) { + cntDecimal++; + sc.Forward(); + } else { + sc.SetState(SCE_RAKU_DEFAULT); + break; // too many decimal places + } + } + switch (numState) { + case RAKUNUM_BINARY: + if (!IsNumberChar(sc.ch, 2)) + sc.SetState(SCE_RAKU_DEFAULT); + break; + case RAKUNUM_OCTAL: + if (!IsNumberChar(sc.ch, 8)) + sc.SetState(SCE_RAKU_DEFAULT); + break; + case RAKUNUM_HEX: + if (!IsNumberChar(sc.ch, 16)) + sc.SetState(SCE_RAKU_DEFAULT); + break; + case RAKUNUM_DECIMAL: + case RAKUNUM_VERSION: + if (!IsNumberChar(sc.ch)) + sc.SetState(SCE_RAKU_DEFAULT); + } + break; + case SCE_RAKU_WORD: + case SCE_RAKU_FUNCTION: + case SCE_RAKU_TYPEDEF: + case SCE_RAKU_ADVERB: + sc.SetState(SCE_RAKU_DEFAULT); + break; + case SCE_RAKU_MU: + case SCE_RAKU_POSITIONAL: + case SCE_RAKU_ASSOCIATIVE: + case SCE_RAKU_CALLABLE: + case SCE_RAKU_IDENTIFIER: + case SCE_RAKU_GRAMMAR: + case SCE_RAKU_CLASS: + sc.SetState(SCE_RAKU_DEFAULT); + break; + } + + /* *** Determine if a new state should be entered ******************* * + * Everything below here identifies the beginning of a state, all or part + * of the characters within this state are processed here, the rest are + * completed above in the terminate state section. + * ****************************************************************** */ + if (sc.state == SCE_RAKU_DEFAULT) { + + // --- Single line comment + if (sc.ch == '#') { + sc.SetState(SCE_RAKU_COMMENTLINE); + } + + // --- POD block + else if (sc.atLineStart && sc.Match("=begin pod")) { + sc.SetState(SCE_RAKU_POD); + sc.Forward(10); + } + + // --- String (normal) + else if (sc.chPrev != '\\' && (IsValidQuoteOpener(sc.ch, dpString, RAKUDELIM_QUOTE))) { + sc.SetState(SCE_RAKU_STRING); + } + + // --- String (Q Language) ---------------------------------------- + // - https://docs.raku.org/language/quoting + // - Q :adverb :adverb //; + // - q,qx,qw,qq,qqx,qqw,qqww :adverb :adverb //; + else if (IsQLangStartAtScPos(sc, typeDetect, lengthToEnd)) { + int state = SCE_RAKU_STRING_Q_LANG; + Sci_Position forward = 1; // single char ident (default) + if (typeDetect > RAKUTYPE_QLANG) { + state = SCE_RAKU_STRING_Q; + if (typeDetect == RAKUTYPE_STR_WQ) + forward = 0; // no char ident + } + if (typeDetect > RAKUTYPE_STR_Q) { + if (typeDetect == RAKUTYPE_STR_QQ) + state = SCE_RAKU_STRING_QQ; + forward++; // two char ident + } + if (typeDetect > RAKUTYPE_STR_QQ) + forward++; // three char ident + if (typeDetect == RAKUTYPE_STR_QQWW) + forward++; // four char ident + + // Proceed: check for a valid character after statement + if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_QLANG) { + sc.SetState(state); + sc.Forward(forward); + lastAdverbs.Clear(); + + // Process: adverbs / opening delimiter / adverbs after delim + if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect, + lastAdverbs, dpRegQ)) + sc.SetState(state); + } + } + + // --- Regex (rx/s/m/tr/y) ---------------------------------------- + // - https://docs.raku.org/language/regexes + else if ((IsRegexStartAtScPos(sc, typeDetect, setOperator) || regexIdent.InList(wordLast.c_str()))) { + if (typeDetect == -1) { // must be a regex identifier word + wordLast.clear(); + typeDetect = RAKUTYPE_REGEX; + } + Sci_Position forward = 0; // no ident (RAKUTYPE_REGEX, RAKUTYPE_REGEX_NORM) + if (typeDetect > 0 && typeDetect != RAKUTYPE_REGEX) + forward++; // single char ident + if (typeDetect > RAKUTYPE_REGEX) + forward++; // two char ident + + // Proceed: check for a valid character after statement + if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_REGEX_NORM) { + sc.SetState(SCE_RAKU_REGEX); + sc.Forward(forward); + lastAdverbs.Clear(); + + // Process: adverbs / opening delimiter / adverbs after delim + if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect, + lastAdverbs, dpRegQ)) + sc.SetState(SCE_RAKU_REGEX); + } + } + + // --- Numbers ---------------------------------------------------- + else if (IsValidIdentPrecede(sc.chPrev) && (IsNumberChar(sc.ch) + || (sc.ch == 'v' && IsNumberChar(sc.chNext) && wordLast == "use"))) { + numState = RAKUNUM_DECIMAL; // default: decimal (base 10) + cntDecimal = 0; + sc.SetState(SCE_RAKU_NUMBER); + if (sc.ch == 'v') // forward past 'v' + sc.Forward(); + if (wordLast == "use") { // package version number + numState = RAKUNUM_VERSION; + } else if (sc.ch == '0') { // other type of number + switch (sc.chNext) { + case 'b': // binary (base 2) + numState = RAKUNUM_BINARY; + break; + case 'o': // octal (base 8) + numState = RAKUNUM_OCTAL; + break; + case 'x': // hexadecimal (base 16) + numState = RAKUNUM_HEX; + } + if (numState != RAKUNUM_DECIMAL) + sc.Forward(); // forward to number type char + } + } + + // --- Keywords / functions / types / barewords ------------------- + else if ((sc.currentPos == 0 || sc.atLineStart || IsValidIdentPrecede(sc.chPrev)) + && IsWordStartChar(sc.ch)) { + len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s)); + if (keywords.InList(s)) { + sc.SetState(SCE_RAKU_WORD); // Keywords + } else if(functions.InList(s)) { + sc.SetState(SCE_RAKU_FUNCTION); // Functions + } else if(typesBasic.InList(s)) { + sc.SetState(SCE_RAKU_TYPEDEF); // Types (basic) + } else if(typesComposite.InList(s)) { + sc.SetState(SCE_RAKU_TYPEDEF); // Types (composite) + } else if(typesDomainSpecific.InList(s)) { + sc.SetState(SCE_RAKU_TYPEDEF); // Types (domain-specific) + } else if(typesExceptions.InList(s)) { + sc.SetState(SCE_RAKU_TYPEDEF); // Types (exceptions) + } else { + if (wordLast == "class") + sc.SetState(SCE_RAKU_CLASS); // a Class ident + else if (wordLast == "grammar") + sc.SetState(SCE_RAKU_GRAMMAR); // a Grammar ident + else + sc.SetState(SCE_RAKU_IDENTIFIER); // Bareword + identLast = s; // save identifier + } + if (adverbLast == "sym") { // special adverb ":sym" + sc.SetState(SCE_RAKU_IDENTIFIER); // treat as identifier + identLast = s; // save identifier + } + if (sc.state != SCE_RAKU_IDENTIFIER) + wordLast = s; // save word + sc.Forward(len - 1); // ...forward past word + } + + // --- Adverbs ---------------------------------------------------- + else if (sc.ch == ':' && IsWordStartChar(sc.chNext)) { + len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s), 1); + if (adverbs.InList(s)) { + sc.SetState(SCE_RAKU_ADVERB); // Adverbs (begin with ':') + adverbLast = s; // save word + sc.Forward(len); // ...forward past word (less offset: 1) + } + } + + // --- Identifiers: $mu / @positional / %associative / &callable -- + // see: https://docs.raku.org/language/variables + else if (setSigil.Contains(sc.ch) && (setTwigil.Contains(sc.chNext) + || setSpecialVar.Contains(sc.chNext) + || IsWordStartChar(sc.chNext))) { + + // State based on sigil + switch (sc.ch) { + case '$': sc.SetState(SCE_RAKU_MU); + break; + case '@': sc.SetState(SCE_RAKU_POSITIONAL); + break; + case '%': sc.SetState(SCE_RAKU_ASSOCIATIVE); + break; + case '&': sc.SetState(SCE_RAKU_CALLABLE); + } + const int state = sc.state; + sc.Forward(); + char ch_delim = 0; + if (setSpecialVar.Contains(sc.ch) + && !setWord.Contains(sc.chNext)) { // Process Special Var + ch_delim = -1; + } else if (setTwigil.Contains(sc.ch)) { // Process Twigil + sc.SetState(SCE_RAKU_OPERATOR); + if (sc.ch == '<' && setWord.Contains(sc.chNext)) + ch_delim = '>'; + sc.Forward(); + sc.SetState(state); + } + + // Process (any) identifier + if (ch_delim >= 0) { + sc.Forward(LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s)) - 1); + if (ch_delim > 0 && sc.chNext == ch_delim) { + sc.Forward(); + sc.SetState(SCE_RAKU_OPERATOR); + } + identLast = s; // save identifier + } + } + + // --- Operators -------------------------------------------------- + else if (IsOperatorChar(sc.ch)) { + // FIXME: better valid operator sequences needed? + sc.SetState(SCE_RAKU_OPERATOR); + } + + // --- Heredoc: begin --------------------------------------------- + else if (!hereDelim.empty() && sc.atLineEnd) { + if (IsANewLine(sc.ch)) + sc.Forward(); // skip a possible CRLF situation + sc.SetState(hereState); + } + + // Reset words: on operator semi-colon OR '}' (end of statement) + if (sc.state == SCE_RAKU_OPERATOR && (sc.ch == ';' || sc.ch == '}')) { + wordLast.clear(); + identLast.clear(); + adverbLast.clear(); + } + } + + /* *** Determine if an "embedded comment" is to be entered ********** * + * This type of embedded comment section, or multi-line comment comes + * after a normal comment has begun... e.g: #`[ ... ] + * ****************************************************************** */ + else if (sc.state == SCE_RAKU_COMMENTLINE && sc.chPrev == '#' && sc.ch == '`') { + if (IsBracketOpenChar(sc.chNext)) { + sc.Forward(); // Condition met for "embedded comment" + dpEmbeded.opener = sc.ch; + + // Find the opposite (termination) closing bracket (if any) + dpEmbeded.closer[0] = GetBracketCloseChar(dpEmbeded.opener); + if (dpEmbeded.closer[0] > 0) { // Enter "embedded comment" + + // Find multiple opening character occurrence + dpEmbeded.count = GetRepeatCharCount(sc, dpEmbeded.opener, lengthToEnd); + sc.SetState(SCE_RAKU_COMMENTEMBED); + sc.Forward(dpEmbeded.count - 1); // incremented in the next loop + } + } + } + } + + // And we're done... + sc.Complete(); +} + +/* + * LexerRaku::Lex + * - Main fold method + * NOTE: although Raku uses and supports UNICODE characters, we're only looking + * at normal chars here, using 'SafeGetCharAt' - for folding purposes + * that is all we need. + */ +#define RAKU_HEADFOLD_SHIFT 4 +#define RAKU_HEADFOLD_MASK 0xF0 +void SCI_METHOD LexerRaku::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) { + + // init LexAccessor / return if fold option is off + if (!options.fold) return; + LexAccessor styler(pAccess); + + // init char and line positions + const Sci_PositionU endPos = startPos + length; + Sci_Position lineCurrent = styler.GetLine(startPos); + + // Backtrack to last SCE_RAKU_DEFAULT line + if (startPos > 0 && lineCurrent > 0) { + while (lineCurrent > 0 && styler.StyleAt(startPos) != SCE_RAKU_DEFAULT) { + lineCurrent--; + startPos = styler.LineStart(lineCurrent); + } + lineCurrent = styler.GetLine(startPos); + } + Sci_PositionU lineStart = startPos; + Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1); + + // init line folding level + int levelPrev = SC_FOLDLEVELBASE; + if (lineCurrent > 0) + levelPrev = styler.LevelAt(lineCurrent - 1) >> 16; + int levelCurrent = levelPrev; + + // init char and style variables + char chNext = styler[startPos]; + int stylePrev = styler.StyleAt(startPos - 1); + int styleNext = styler.StyleAt(startPos); + int styleNextStartLine = styler.StyleAt(lineStartNext); + int visibleChars = 0; + bool wasCommentMulti = false; + + // main loop + for (Sci_PositionU i = startPos; i < endPos; i++) { + + // next char, style and flags + const char ch = chNext; + chNext = styler.SafeGetCharAt(i + 1); + const int style = styleNext; + styleNext = styler.StyleAt(i + 1); + const bool atEOL = i == (lineStartNext - 1); + const bool atLineStart = i == lineStart; + + // --- Comments / Multi-line / POD ------------------------------------ + if (options.foldComment) { + + // Multi-line + if (options.foldCommentMultiline) { + if (style == SCE_RAKU_COMMENTLINE && atLineStart && ch == '#' && chNext == '`' + && styleNextStartLine == SCE_RAKU_COMMENTEMBED) { + levelCurrent++; + wasCommentMulti = true; // don't confuse line comments + } else if (style == SCE_RAKU_COMMENTEMBED && atLineStart + && styleNextStartLine != SCE_RAKU_COMMENTEMBED) { + levelCurrent--; + } + } + + // Line comments + if (!wasCommentMulti && atEOL && stylePrev == SCE_RAKU_COMMENTLINE + && IsCommentLine(lineCurrent, styler)) { + if (!IsCommentLine(lineCurrent - 1, styler) + && IsCommentLine(lineCurrent + 1, styler)) + levelCurrent++; + else if (IsCommentLine(lineCurrent - 1, styler) + && !IsCommentLine(lineCurrent + 1, styler)) + levelCurrent--; + } + + // POD + if (options.foldCommentPOD && atLineStart && style == SCE_RAKU_POD) { + if (styler.Match(i, "=begin")) + levelCurrent++; + else if (styler.Match(i, "=end")) + levelCurrent--; + } + } + + // --- Code block ----------------------------------------------------- + if (style == SCE_RAKU_OPERATOR) { + if (ch == '{') { + if (levelCurrent < levelPrev) levelPrev--; + levelCurrent++; + } else if (ch == '}') { + levelCurrent--; + } + } + + // --- at end of line / range / apply fold ---------------------------- + if (atEOL) { + int level = levelPrev; + + // set level flags + level |= levelCurrent << 16; + if (visibleChars == 0 && options.foldCompact) + level |= SC_FOLDLEVELWHITEFLAG; + if ((levelCurrent > levelPrev) && (visibleChars > 0)) + level |= SC_FOLDLEVELHEADERFLAG; + if (level != styler.LevelAt(lineCurrent)) { + styler.SetLevel(lineCurrent, level); + } + lineCurrent++; + lineStart = lineStartNext; + lineStartNext = styler.LineStart(lineCurrent + 1); + styleNextStartLine = styler.StyleAt(lineStartNext); + levelPrev = levelCurrent; + visibleChars = 0; + wasCommentMulti = false; + } + + // increment visibleChars / set previous char + if (!isspacechar(ch)) + visibleChars++; + stylePrev = style; + } + + // Done: set real level of the next line + int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; + styler.SetLevel(lineCurrent, levelPrev | flagsNext); +} + +/*----------------------------------------------------------------------------* + * --- Scintilla: LexerModule --- + *----------------------------------------------------------------------------*/ + +LexerModule lmRaku(SCLEX_RAKU, LexerRaku::LexerFactoryRaku, "raku", rakuWordLists); diff --git a/scintilla/lexilla/src/Lexilla.cxx b/scintilla/lexilla/src/Lexilla.cxx index d66b3ff482..57535b8f9f 100644 --- a/scintilla/lexilla/src/Lexilla.cxx +++ b/scintilla/lexilla/src/Lexilla.cxx @@ -210,6 +210,7 @@ static void AddGeanyLexers() &lmProps, &lmPython, &lmR, + &lmRaku, &lmRuby, &lmRust, &lmSmalltalk, diff --git a/scintilla/scintilla_changes.patch b/scintilla/scintilla_changes.patch index e76ed257bb..99124497b0 100644 --- a/scintilla/scintilla_changes.patch +++ b/scintilla/scintilla_changes.patch @@ -109,6 +109,7 @@ index cd4b23617..af4a73db4 100644 + &lmProps, + &lmPython, + &lmR, ++ &lmRaku, + &lmRuby, + &lmRust, + &lmSmalltalk, diff --git a/scripts/gtk-bundle-from-msys2.sh b/scripts/gtk-bundle-from-msys2.sh index 40c865ab8f..e7cd1c5b09 100644 --- a/scripts/gtk-bundle-from-msys2.sh +++ b/scripts/gtk-bundle-from-msys2.sh @@ -211,8 +211,8 @@ cleanup_unnecessary_files() { echo "Cleanup unnecessary files" # cleanup temporary files rm -rf var/cache/fontconfig - rmdir var/cache - rmdir var + test -d var/cache && rmdir var/cache + test -d var && rmdir var # cleanup development and other unnecessary files rm -rf include rm -rf lib/cmake diff --git a/src/editor.c b/src/editor.c index d29b105448..5d5eefa084 100644 --- a/src/editor.c +++ b/src/editor.c @@ -1306,6 +1306,7 @@ static gboolean lexer_has_braces(ScintillaObject *sci) case SCLEX_PERL: case SCLEX_TCL: case SCLEX_R: + case SCLEX_RAKU: case SCLEX_RUST: return TRUE; default: diff --git a/src/filetypes.c b/src/filetypes.c index 35c91a603e..46c51b519c 100644 --- a/src/filetypes.c +++ b/src/filetypes.c @@ -188,6 +188,7 @@ static void init_builtin_filetypes(void) FT_INIT( SMALLTALK, NONE, "Smalltalk", NULL, SOURCE_FILE, SCRIPT ); FT_INIT( JULIA, JULIA, "Julia", NULL, SOURCE_FILE, SCRIPT ); FT_INIT( AU3, AUTOIT, "AutoIt", NULL, SCRIPT, SCRIPT ); + FT_INIT( RAKU, RAKU, "Raku", NULL, SOURCE_FILE, SCRIPT ); } @@ -603,6 +604,8 @@ static GeanyFiletype *find_shebang(const gchar *utf8_filename, const gchar *line { "sh", GEANY_FILETYPES_SH }, { "bash", GEANY_FILETYPES_SH }, { "dash", GEANY_FILETYPES_SH }, + { "raku", GEANY_FILETYPES_RAKU }, + { "perl6", GEANY_FILETYPES_RAKU }, { "perl", GEANY_FILETYPES_PERL }, { "python", GEANY_FILETYPES_PYTHON }, { "php", GEANY_FILETYPES_PHP }, diff --git a/src/filetypes.h b/src/filetypes.h index 77d6c78b76..61b572f50d 100644 --- a/src/filetypes.h +++ b/src/filetypes.h @@ -109,6 +109,7 @@ typedef enum GEANY_FILETYPES_SMALLTALK, GEANY_FILETYPES_JULIA, GEANY_FILETYPES_AU3, + GEANY_FILETYPES_RAKU, /* ^ append items here */ GEANY_MAX_BUILT_IN_FILETYPES /* Don't use this, use filetypes_array->len instead */ } diff --git a/src/highlighting.c b/src/highlighting.c index 9cc5bf2b2c..b2d5608b57 100644 --- a/src/highlighting.c +++ b/src/highlighting.c @@ -1043,6 +1043,7 @@ void highlighting_init_styles(guint filetype_idx, GKeyFile *config, GKeyFile *co init_styleset_case(POWERSHELL); init_styleset_case(PYTHON); init_styleset_case(R); + init_styleset_case(RAKU); init_styleset_case(RUBY); init_styleset_case(RUST); init_styleset_case(SH); @@ -1135,6 +1136,7 @@ void highlighting_set_styles(ScintillaObject *sci, GeanyFiletype *ft) styleset_case(POWERSHELL); styleset_case(PYTHON); styleset_case(R); + styleset_case(RAKU); styleset_case(RUBY); styleset_case(RUST); styleset_case(SH); @@ -1483,6 +1485,16 @@ gboolean highlighting_is_string_style(gint lexer, gint style) case SCLEX_R: return (style == SCE_R_STRING); + case SCLEX_RAKU: + return (style == SCE_RAKU_CHARACTER || + style == SCE_RAKU_HEREDOC_Q || + style == SCE_RAKU_HEREDOC_QQ || + style == SCE_RAKU_STRING || + style == SCE_RAKU_STRING_Q || + style == SCE_RAKU_STRING_QQ || + style == SCE_RAKU_STRING_Q_LANG || + style == SCE_RAKU_REGEX); + case SCLEX_RUBY: return (style == SCE_RB_CHARACTER || style == SCE_RB_STRING || @@ -1701,6 +1713,11 @@ gboolean highlighting_is_comment_style(gint lexer, gint style) case SCLEX_PERL: return (style == SCE_PL_COMMENTLINE); + case SCLEX_RAKU: + return (style == SCE_RAKU_COMMENTLINE || + style == SCE_RAKU_COMMENTEMBED || + style == SCE_RAKU_POD); + case SCLEX_PROPERTIES: return (style == SCE_PROPS_COMMENT); diff --git a/src/highlightingmappings.h b/src/highlightingmappings.h index b65aa8390c..3aca535607 100644 --- a/src/highlightingmappings.h +++ b/src/highlightingmappings.h @@ -1399,6 +1399,53 @@ static const HLKeyword highlighting_keywords_R[] = #define highlighting_properties_R EMPTY_PROPERTIES +/* Raku */ +#define highlighting_lexer_RAKU SCLEX_RAKU +static const HLStyle highlighting_styles_RAKU[] = +{ + { SCE_RAKU_DEFAULT, "default", FALSE }, + { SCE_RAKU_ERROR, "error", FALSE }, + { SCE_RAKU_COMMENTLINE, "commentline", FALSE }, + { SCE_RAKU_COMMENTEMBED, "commentembed", FALSE }, + { SCE_RAKU_POD, "pod", FALSE }, + { SCE_RAKU_CHARACTER, "character", FALSE }, + { SCE_RAKU_HEREDOC_Q, "heredoc_q", FALSE }, + { SCE_RAKU_HEREDOC_QQ, "heredoc_qq", FALSE }, + { SCE_RAKU_STRING, "string", FALSE }, + { SCE_RAKU_STRING_Q, "string_q", FALSE }, + { SCE_RAKU_STRING_QQ, "string_qq", FALSE }, + { SCE_RAKU_STRING_Q_LANG, "string_q_lang", FALSE }, + { SCE_RAKU_STRING_VAR, "string_var", FALSE }, + { SCE_RAKU_REGEX, "regex", FALSE }, + { SCE_RAKU_REGEX_VAR, "regex_var", FALSE }, + { SCE_RAKU_ADVERB, "adverb", FALSE }, + { SCE_RAKU_NUMBER, "number", FALSE }, + { SCE_RAKU_PREPROCESSOR, "preprocessor", FALSE }, + { SCE_RAKU_OPERATOR, "operator", FALSE }, + { SCE_RAKU_WORD, "word", FALSE }, + { SCE_RAKU_FUNCTION, "function", FALSE }, + { SCE_RAKU_IDENTIFIER, "identifier", FALSE }, + { SCE_RAKU_TYPEDEF, "typedef", FALSE }, + { SCE_RAKU_MU, "mu", FALSE }, + { SCE_RAKU_POSITIONAL, "positional", FALSE }, + { SCE_RAKU_ASSOCIATIVE, "associative", FALSE }, + { SCE_RAKU_CALLABLE, "callable", FALSE }, + { SCE_RAKU_GRAMMAR, "grammar", FALSE }, + { SCE_RAKU_CLASS, "class", FALSE } +}; +static const HLKeyword highlighting_keywords_RAKU[] = +{ + { 0, "keywords", FALSE }, + { 1, "functions", FALSE }, + { 2, "types_basic", FALSE }, + { 3, "types_composite", FALSE }, + { 4, "types_domain", FALSE }, + { 5, "types_exceptions", FALSE }, + { 6, "adverbs", FALSE }, +}; +#define highlighting_properties_RAKU EMPTY_PROPERTIES + + /* Ruby */ #define highlighting_lexer_RUBY SCLEX_RUBY static const HLStyle highlighting_styles_RUBY[] = diff --git a/src/tagmanager/tm_parser.c b/src/tagmanager/tm_parser.c index e0b7916914..c120b6117b 100644 --- a/src/tagmanager/tm_parser.c +++ b/src/tagmanager/tm_parser.c @@ -1093,6 +1093,27 @@ static TMParserMapGroup group_AUTOIT[] = { {N_("Variables"), TM_ICON_VAR, tm_tag_variable_t}, }; +static TMParserMapEntry map_RAKU[] = { + {'c', tm_tag_class_t}, // class + {'g', tm_tag_struct_t}, // grammar + {'m', tm_tag_method_t}, // method + {'o', tm_tag_namespace_t}, // module + {'p', tm_tag_package_t}, // package + {'r', tm_tag_class_t}, // role + {'u', tm_tag_variable_t}, // rule + {'b', tm_tag_method_t}, // submethod + {'s', tm_tag_function_t}, // subroutine + {'t', tm_tag_variable_t}, // token +}; +static TMParserMapGroup group_RAKU[] = { + {N_("Packages / Modules"), TM_ICON_NAMESPACE, tm_tag_package_t | tm_tag_namespace_t}, + {N_("Classes / Roles"), TM_ICON_CLASS, tm_tag_class_t}, + {N_("Grammars"), TM_ICON_STRUCT, tm_tag_struct_t}, + {N_("Methods"), TM_ICON_METHOD, tm_tag_method_t}, + {N_("Subroutines"), TM_ICON_METHOD, tm_tag_function_t}, + {N_("Rules / Tokens"), TM_ICON_VAR, tm_tag_variable_t}, +}; + typedef struct { TMParserMapEntry *entries; @@ -1165,6 +1186,7 @@ static TMParserMap parser_map[] = { MAP_ENTRY(TYPESCRIPT), MAP_ENTRY(BATCH), MAP_ENTRY(AUTOIT), + MAP_ENTRY(RAKU), }; /* make sure the parser map is consistent and complete */ G_STATIC_ASSERT(G_N_ELEMENTS(parser_map) == TM_PARSER_COUNT); diff --git a/src/tagmanager/tm_parser.h b/src/tagmanager/tm_parser.h index 405f4feb7d..7672880932 100644 --- a/src/tagmanager/tm_parser.h +++ b/src/tagmanager/tm_parser.h @@ -119,6 +119,7 @@ enum TM_PARSER_TYPESCRIPT, TM_PARSER_BATCH, TM_PARSER_AUTOIT, + TM_PARSER_RAKU, TM_PARSER_COUNT }; diff --git a/src/tagmanager/tm_parsers.h b/src/tagmanager/tm_parsers.h index e7b89f0ff7..535a496ee1 100644 --- a/src/tagmanager/tm_parsers.h +++ b/src/tagmanager/tm_parsers.h @@ -73,6 +73,7 @@ LispParser, \ TypeScriptParser, \ DosBatchParser, \ - AutoItParser + AutoItParser, \ + Perl6Parser #endif diff --git a/tests/ctags/Makefile.am b/tests/ctags/Makefile.am index a973eadd03..8bdea43729 100644 --- a/tests/ctags/Makefile.am +++ b/tests/ctags/Makefile.am @@ -317,6 +317,7 @@ test_sources = \ simple.pl \ simple.ps1 \ simple.py \ + simple.raku \ simple.rb \ simple.rst \ simple.sh \ diff --git a/tests/ctags/simple.raku b/tests/ctags/simple.raku new file mode 100644 index 0000000000..44d5a57769 --- /dev/null +++ b/tests/ctags/simple.raku @@ -0,0 +1,142 @@ +my class JSONPrettyActions { + method TOP($/) { + make $/.values.[0].ast; + }; + method object($/) { + make $.ast.hash.item; + } + + method pairlist($/) { + make $>>.ast.flat; + } + + method pair($/) { + make $.ast => $.ast; + } + + method array($/) { + make $.ast.item; + } + + method arraylist($/) { + make [$>>.ast]; + } + + method string($/) { + make $0.elems == 1 + ?? ($0[0]. || $0[0].).ast + !! join '', $0.list.map({ (. || .).ast }); + } + method value:sym($/) { make +$/.Str } + method value:sym($/) { make $.ast } + method value:sym($/) { make Bool::True } + method value:sym($/) { make Bool::False } + method value:sym($/) { make Any } + method value:sym($/) { make $.ast } + method value:sym($/) { make $.ast } + + method str($/) { make ~$/ } + + my %esc = '\\' => "\\", + '/' => "/", + 'b' => "\b", + 'n' => "\n", + 't' => "\t", + 'f' => "\f", + 'r' => "\r", + '"' => "\""; + method str_escape($/) { + make $ ?? chr(:16($.join)) !! %esc.AT-KEY(~$/); + } +} + +my grammar JSONPrettyGrammar { + token TOP { ^ \s* [ | ] \s* $ } + rule object { '{' ~ '}' } + rule pairlist { * % \, } + rule pair { ':' } + rule array { '[' ~ ']' } + rule arraylist { * % [ \, ] } + + proto token value {*}; + token value:sym { + '-'? + [ 0 | <[1..9]> <[0..9]>* ] + [ \. <[0..9]>+ ]? + [ <[eE]> [\+|\-]? <[0..9]>+ ]? + } + token value:sym { }; + token value:sym { }; + token value:sym { }; + token value:sym { }; + token value:sym { }; + token value:sym { } + + token string { + \" ~ \" ( | \\ )* + } + + token str { + <-["\\\t\n]>+ + } + + token str_escape { + <["\\/bfnrt]> | u **4 + } +} + +proto sub to-json($, :$indent = 0, :$first = 0) {*} + +multi sub to-json(Version:D $v, :$indent = 0, :$first = 0) { to-json(~$v, :$indent, :$first) } +multi sub to-json(Real:D $d, :$indent = 0, :$first = 0) { (' ' x $first) ~ ~$d } +multi sub to-json(Bool:D $d, :$indent = 0, :$first = 0) { (' ' x $first) ~ ($d ?? 'true' !! 'false') } +multi sub to-json(Str:D $d, :$indent = 0, :$first = 0) { + (' ' x $first) ~ '"' + ~ $d.trans(['"', '\\', "\b", "\f", "\n", "\r", "\t"] + => ['\"', '\\\\', '\b', '\f', '\n', '\r', '\t'])\ + .subst(/<-[\c32..\c126]>/, { ord(~$_).fmt('\u%04x') }, :g) + ~ '"' +} +multi sub to-json(Positional:D $d, :$indent = 0, :$first = 0) { + (' ' x $first) ~ "\[" + ~ ($d ?? $d.map({ "\n" ~ to-json($_, :indent($indent + 2), :first($indent + 2)) }).join(",") ~ "\n" ~ (' ' x $indent) !! ' ') + ~ ']'; +} +multi sub to-json(Associative:D $d, :$indent = 0, :$first = 0) { + (' ' x $first) ~ "\{" + ~ ($d ?? $d.map({ "\n" ~ to-json(.key, :first($indent + 2)) ~ ' : ' ~ to-json(.value, :indent($indent + 2)) }).join(",") ~ "\n" ~ (' ' x $indent) !! ' ') + ~ '}'; +} + +multi sub to-json(Mu:U $, :$indent = 0, :$first = 0) { 'null' } +multi sub to-json(Mu:D $s, :$indent = 0, :$first = 0) { + die "Can't serialize an object of type " ~ $s.WHAT.perl +} + +sub from-json($text) { + my $a = JSONPrettyActions.new(); + my $o = JSONPrettyGrammar.parse($text, :actions($a)); + $o.ast; +} + +class Area { + has $.size; + submethod BUILD(:$x, :$y, :$z) { + $!size = $x * $y * $z; + } +} + +role Notable { + has Str $.notes is rw; + + multi method notes() { "$!notes\n" }; + multi method notes( Str $note ) { $!notes ~= "$note\n" ~ ⲧ }; +} + +module M { + sub greeting ($name = 'Camelia') { "Greetings, $name!" } + our sub loud-greeting (--> Str) { greeting().uc } + sub friendly-greeting is export { greeting('friend') } +} + +package Foo:ver<0> {}; \ No newline at end of file diff --git a/tests/ctags/simple.raku.tags b/tests/ctags/simple.raku.tags new file mode 100644 index 0000000000..7440464e82 --- /dev/null +++ b/tests/ctags/simple.raku.tags @@ -0,0 +1,68 @@ +AreaÌ1Ö0 +class: Area +BUILDÌ128Ö0 +method: BUILD +Foo:verÌ512Ö0 +package: Foo:ver +JSONPrettyActionsÌ1Ö0 +class: JSONPrettyActions +JSONPrettyGrammarÌ2048Ö0 +struct: JSONPrettyGrammar +MÌ256Ö0 +namespace: M +NotableÌ1Ö0 +class: Notable +TOPÌ128Ö0 +method: TOP +TOPÌ16384Ö0 +variable: TOP +arrayÌ128Ö0 +method: array +arrayÌ16384Ö0 +variable: array +arraylistÌ128Ö0 +method: arraylist +arraylistÌ16384Ö0 +variable: arraylist +friendly-greetingÌ16Ö0 +function: friendly-greeting +from-jsonÌ16Ö0 +function: from-json +greetingÌ16Ö0 +function: greeting +loud-greetingÌ16Ö0 +function: loud-greeting +notesÌ128Ö0 +method: notes +objectÌ128Ö0 +method: object +objectÌ16384Ö0 +variable: object +pairÌ128Ö0 +method: pair +pairÌ16384Ö0 +variable: pair +pairlistÌ128Ö0 +method: pairlist +pairlistÌ16384Ö0 +variable: pairlist +strÌ128Ö0 +method: str +strÌ16384Ö0 +variable: str +str_escapeÌ128Ö0 +method: str_escape +str_escapeÌ16384Ö0 +variable: str_escape +stringÌ128Ö0 +method: string +stringÌ16384Ö0 +variable: string +to-jsonÌ16Ö0 +function: to-json +valueÌ16384Ö0 +variable: value +value:symÌ128Ö0 +method: value:sym +value:symÌ16384Ö0 +variable: value:sym diff --git a/tests/meson.build b/tests/meson.build index bd975327ea..74cc7b322a 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -314,6 +314,7 @@ ctags_tests = files([ 'ctags/simple.pl.tags', 'ctags/simple.ps1.tags', 'ctags/simple.py.tags', + 'ctags/simple.raku.tags', 'ctags/simple.rb.tags', 'ctags/simple.rst.tags', 'ctags/simple.sh.tags',