From 78dbc585748dc7794f7a801466511fa699a83783 Mon Sep 17 00:00:00 2001 From: notfoundzzz Date: Fri, 23 Jan 2026 16:22:38 +0800 Subject: [PATCH 1/4] =?UTF-8?q?[209=5F7]=20=E4=BF=AE=E5=A4=8Dbash=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E6=A8=A1=E5=BC=8F=E4=B8=8B=E6=96=87=E4=BB=B6=E5=90=8D?= =?UTF-8?q?=E8=B7=AF=E5=BE=84=E7=AD=89=E7=9A=84=E9=94=99=E8=AF=AF=E9=AB=98?= =?UTF-8?q?=E4=BA=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TeXmacs/tests/tmu/209_7.tmu | 16 +- devel/209_7.md | 65 ++++++- src/Data/Parser/keyword_parser.cpp | 44 ++++- src/Data/Parser/keyword_parser.hpp | 2 + src/Data/Parser/number_parser.cpp | 270 ++++++++++++++++++++++++-- src/Data/Parser/number_parser.hpp | 5 + src/System/Language/prog_language.cpp | 7 + 7 files changed, 386 insertions(+), 23 deletions(-) diff --git a/TeXmacs/tests/tmu/209_7.tmu b/TeXmacs/tests/tmu/209_7.tmu index 8aa92041c7..076e56f79d 100644 --- a/TeXmacs/tests/tmu/209_7.tmu +++ b/TeXmacs/tests/tmu/209_7.tmu @@ -1,4 +1,4 @@ -> +> > @@ -26,6 +26,20 @@ \; + # --------- file/url---------- + + tar xzvf nvim-linux-x86_64.tar.gz + + cd ~/.local/share/nvim/lazy + + cat chapter-3.1.tmu + + git clone git@gitee.com:XmacsLabs/mogan.git + + curl wttr.in + + \; + # ---------- globals ---------- NAME="Mogan" diff --git a/devel/209_7.md b/devel/209_7.md index 7a3d24e746..c8b71ba3e8 100644 --- a/devel/209_7.md +++ b/devel/209_7.md @@ -15,16 +15,73 @@ +## 2026/01/23 修复bash代码模式下文件名路径等的错误高亮 +### What + +修复了 Bash代码模式下,路径、文件名、URL、连字符等场景中命令名和数字被错误高亮的问题,包括: + +- 路径 / 文件名中的命令误高亮(abc.git、abc/git、nvim-linux.taz) +- URL 中的命令误高亮(git clone git@gitee.com:xxx.git) +- 文件名中的数字误高亮(209.in、209_9.tmu、file-1.txt) +- 负数与算术表达式高亮异常(-1、1-2) + +同时保证独立命令、独立数字及算术表达式仍能正确高亮,仅影响 Bash 语言。 + + +### How + +#### 1. 关键字解析器边界检查 + +文件:src/Data/Parser/keyword_parser.cpp + +- 增加 check_path_boundaries 开关 +- 在 can_parse() 中补充边界判断: + - 禁止在路径、URL、单词字符、连字符、下划线后匹配关键字 + - 禁止关键字后直接连接路径符号或连字符 + +避免 abc.git、abc/git、nvim-linux.taz 等误高亮。 + + +#### 2. 数字解析器边界检查 + +文件:src/Data/Parser/number_parser.cpp + +- 增加 check_path_boundaries 开关 +- 在 can_parse() 中: + - 禁止路径、URL、单词字符前匹配数字 + - 对连字符进行智能判断(区分 file-1 与 -1) +- 在 do_parse() 中: + - 避免 1-file.txt 误解析 + - 保留 1-2 等算术表达式 + +同时将 parse\_\* 的严格检查限制为仅在边界模式下生效,避免影响其他语言。 + + +#### 3. Bash 专用启用 + +文件:src/System/Language/prog_language.cpp + +- 仅为 Bash 启用关键字与数字解析器的边界检查 +- 其他语言保持原有行为 + +### Why + +- 避免路径、文件名、URL 中的命令误高亮 +- 避免文件名中的数字被错误识别 +- 保证负数与算术表达式正常显示 +- 保持 Bash 代码可读性 +- 限定修改范围,避免影响其他语言 + ## 2025/01/22 ### What 补充了bash-lang.scm中语法高亮的定义,增加了 -- **外部命令高亮(external_command)** - - GNU coreutils - - 常用开发工具,如 `git`、`gh`、`code`、`claude` +- **外部命令高亮(external_command)** + - GNU coreutils + - 常用开发工具,如 `git`、`gh`、`code`、`claude` - 常用 alias(如 `ll`、`gco`) -## 2025/01/20 +## 2025/01/20 ### What 为Mogan STEM添加Bash代码的语法高亮支持,包括语言定义文件和样式包。 diff --git a/src/Data/Parser/keyword_parser.cpp b/src/Data/Parser/keyword_parser.cpp index 541d42cb4d..7ee3a96d8d 100644 --- a/src/Data/Parser/keyword_parser.cpp +++ b/src/Data/Parser/keyword_parser.cpp @@ -18,10 +18,11 @@ #include "tree_helper.hpp" keyword_parser_rep::keyword_parser_rep () { - current_keyword= ""; - keyword_group = hashmap (); - extra_chars = array (); - start_chars = array (); + current_keyword = ""; + keyword_group = hashmap (); + extra_chars = array (); + start_chars = array (); + check_path_boundaries= false; } void @@ -54,10 +55,43 @@ read_keyword (string s, int& i, string& result, array extras, bool keyword_parser_rep::can_parse (string s, int pos) { + // Check that the preceding character is not a word character if path boundary + // checking is enabled + if (check_path_boundaries && pos > 0) { + char prev= s[pos - 1]; + if (is_alpha (prev) || is_digit (prev) || contains (prev, extra_chars) || + contains (prev, start_chars)) { + return false; + } + // Additionally, avoid matching keywords after '.' or '/' (common in paths) + // Also avoid matching after '@' (email/git user separator) and ':' + // (URL/Windows drive separator) Also avoid matching after '-' and '_' + // (common in filenames) + if (prev == '.' || prev == '/' || prev == '\\' || prev == '@' || + prev == ':' || prev == '-' || prev == '_') { + return false; + } + } + string word; bool hit= read_keyword (s, pos, word, extra_chars, start_chars) && keyword_group->contains (word); - if (hit) current_keyword= word; + + if (hit) { + // Avoid matching keywords before '.' or '/' (common in paths) if path + // boundary checking is enabled Also avoid matching before '@' (email/git + // user separator) and ':' (URL/Windows drive separator) Also avoid matching + // before '-' and '_' (common in filenames) + if (check_path_boundaries && pos < N (s)) { + char next= s[pos]; + if (next == '.' || next == '/' || next == '\\' || next == '@' || + next == ':' || next == '-' || next == '_') { + return false; + } + } + current_keyword= word; + } + return hit; } diff --git a/src/Data/Parser/keyword_parser.hpp b/src/Data/Parser/keyword_parser.hpp index e5fbc0aa24..45f7cb07d7 100644 --- a/src/Data/Parser/keyword_parser.hpp +++ b/src/Data/Parser/keyword_parser.hpp @@ -37,6 +37,7 @@ class keyword_parser_rep : public parser_rep { void use_keywords_of_lang (string lang_code); void insert_start_char (char start_char); void insert_extra_char (char extra_char); + void set_check_path_boundaries (bool check) { check_path_boundaries= check; } private: void do_parse (string s, int& pos); @@ -44,6 +45,7 @@ class keyword_parser_rep : public parser_rep { string current_keyword; array extra_chars; array start_chars; + bool check_path_boundaries; }; #endif diff --git a/src/Data/Parser/number_parser.cpp b/src/Data/Parser/number_parser.cpp index 0cb33f519f..ae9361c923 100644 --- a/src/Data/Parser/number_parser.cpp +++ b/src/Data/Parser/number_parser.cpp @@ -16,32 +16,140 @@ number_parser_rep::number_parser_rep () : PREFIX_0B ("prefix_0b"), PREFIX_0O ("prefix_0o"), PREFIX_0X ("prefix_0x"), PREFIX_HASH ("prefix_#"), NO_SUFFIX_WITH_BOX ("no_suffix_with_box"), SCIENTIFIC_NOTATION ("sci_notation") { - separator= '\0'; + separator = '\0'; + check_path_boundaries= false; } void number_parser_rep::parse_binary (string s, int& pos) { - while (pos < N (s) && (is_binary_digit (s[pos]) || is_separator (s[pos]))) - pos++; + while (pos < N (s)) { + if (is_binary_digit (s[pos])) { + pos++; + } + else if (is_separator (s[pos])) { + // Separator handling depends on path boundary checking + if (check_path_boundaries) { + // With path boundary checking: separator must be followed by a binary + // digit + if (pos + 1 < N (s) && is_binary_digit (s[pos + 1])) { + pos++; + } + else { + break; + } + } + else { + // Without path boundary checking: allow separator as before + pos++; + } + } + else { + break; + } + } } void number_parser_rep::parse_octal (string s, int& pos) { - while (pos < N (s) && (is_octal_digit (s[pos]) || is_separator (s[pos]))) - pos++; + while (pos < N (s)) { + if (is_octal_digit (s[pos])) { + pos++; + } + else if (is_separator (s[pos])) { + // Separator handling depends on path boundary checking + if (check_path_boundaries) { + // With path boundary checking: separator must be followed by an octal + // digit + if (pos + 1 < N (s) && is_octal_digit (s[pos + 1])) { + pos++; + } + else { + break; + } + } + else { + // Without path boundary checking: allow separator as before + pos++; + } + } + else { + break; + } + } } void number_parser_rep::parse_hex (string s, int& pos) { - while (pos < N (s) && (is_hex_digit (s[pos]) || is_separator (s[pos]))) - pos++; + while (pos < N (s)) { + if (is_hex_digit (s[pos])) { + pos++; + } + else if (is_separator (s[pos])) { + // Separator handling depends on path boundary checking + if (check_path_boundaries) { + // With path boundary checking: separator must be followed by a hex + // digit + if (pos + 1 < N (s) && is_hex_digit (s[pos + 1])) { + pos++; + } + else { + break; + } + } + else { + // Without path boundary checking: allow separator as before + pos++; + } + } + else { + break; + } + } } void number_parser_rep::parse_decimal (string s, int& pos) { - while (pos < N (s) && - (is_digit (s[pos]) || is_separator (s[pos]) || s[pos] == '.')) - pos++; + while (pos < N (s)) { + if (is_digit (s[pos])) { + pos++; + } + else if (is_separator (s[pos])) { + // Separator handling depends on path boundary checking + if (check_path_boundaries) { + // With path boundary checking: separator must be followed by a digit + if (pos + 1 < N (s) && is_digit (s[pos + 1])) { + pos++; + } + else { + break; + } + } + else { + // Without path boundary checking: allow separator as before + pos++; + } + } + else if (s[pos] == '.') { + // Decimal point handling depends on path boundary checking + if (check_path_boundaries) { + // With path boundary checking: decimal point must be followed by a + // digit + if (pos + 1 < N (s) && is_digit (s[pos + 1])) { + pos++; + } + else { + break; + } + } + else { + // Without path boundary checking: allow decimal point as before + pos++; + } + } + else { + break; + } + } } bool @@ -64,6 +172,38 @@ number_parser_rep::can_parse_prefix_x (string s, int pos) { bool number_parser_rep::can_parse (string s, int pos) { + // Check that the preceding character is not a word character if path boundary + // checking is enabled + if (check_path_boundaries && pos > 0) { + char prev= s[pos - 1]; + if (is_alpha (prev) || is_digit (prev) || prev == '_') { + return false; + } + // Additionally, avoid matching numbers after '.' or '/' (common in paths) + // Also avoid matching after '@' (email/git user separator) and ':' + // (URL/Windows drive separator) Also avoid matching after '_' (common in + // filenames) For '-', only avoid matching if it's likely a path separator + // (preceded by a word character) + if (prev == '.' || prev == '/' || prev == '\\' || prev == '@' || + prev == ':' || prev == '_') { + return false; + } + if (prev == '-') { + // Check if '-' is likely a path separator (preceded by a letter or + // underscore) If not, it might be a minus sign for negative numbers + // Numbers before '-' are allowed for arithmetic expressions + if (pos > 1) { + char prev2= s[pos - 2]; + if (is_alpha (prev2) || prev2 == '_') { + return false; // '-' is preceded by letter or underscore, likely path + // separator + } + } + // '-' at start of string or not preceded by letter/underscore, could be + // minus sign Don't return false, allow parsing to continue + } + } + // check on len >= 3 if (pos + 2 < N (s)) { if (can_parse_prefix_b (s, pos) || can_parse_prefix_x (s, pos) || @@ -80,6 +220,7 @@ number_parser_rep::can_parse (string s, int pos) { void number_parser_rep::do_parse (string s, int& pos) { + int orig_pos= pos; if (pos >= N (s)) return; if (!is_digit (s[pos]) && !(prefix_hash () && s[pos] == '#') && @@ -97,17 +238,95 @@ number_parser_rep::do_parse (string s, int& pos) { if (can_parse_prefix_b (s, pos)) { pos+= 2; parse_binary (s, pos); - if (no_suffix_with_box ()) return; + if (no_suffix_with_box ()) { + // Check path boundaries if enabled + if (check_path_boundaries && pos < N (s)) { + char next= s[pos]; + // Check if next character is a path separator + if (next == '.' || next == '/' || next == '\\' || next == '@' || + next == ':' || next == '_') { + pos= orig_pos; + } + else if (next == '-') { + // Check if '-' is likely a path separator (followed by a letter or + // underscore) If followed by letter or underscore, it might be a path + // separator Numbers after '-' are allowed for negative numbers and + // arithmetic expressions + if (pos + 1 < N (s)) { + char next2= s[pos + 1]; + if (is_alpha (next2) || next2 == '_') { + // '-' followed by letter or underscore, likely path separator + pos= orig_pos; + } + // Otherwise, '-' might be a minus sign in arithmetic expression or + // part of a number + } + } + } + return; + } } if (can_parse_prefix_o (s, pos)) { pos+= 2; parse_octal (s, pos); - if (no_suffix_with_box ()) return; + if (no_suffix_with_box ()) { + // Check path boundaries if enabled + if (check_path_boundaries && pos < N (s)) { + char next= s[pos]; + // Check if next character is a path separator + if (next == '.' || next == '/' || next == '\\' || next == '@' || + next == ':' || next == '_') { + pos= orig_pos; + } + else if (next == '-') { + // Check if '-' is likely a path separator (followed by a letter or + // underscore) If followed by letter or underscore, it might be a path + // separator Numbers after '-' are allowed for negative numbers and + // arithmetic expressions + if (pos + 1 < N (s)) { + char next2= s[pos + 1]; + if (is_alpha (next2) || next2 == '_') { + // '-' followed by letter or underscore, likely path separator + pos= orig_pos; + } + // Otherwise, '-' might be a minus sign in arithmetic expression or + // part of a number + } + } + } + return; + } } if (can_parse_prefix_x (s, pos)) { pos+= 2; parse_hex (s, pos); - if (no_suffix_with_box ()) return; + if (no_suffix_with_box ()) { + // Check path boundaries if enabled + if (check_path_boundaries && pos < N (s)) { + char next= s[pos]; + // Check if next character is a path separator + if (next == '.' || next == '/' || next == '\\' || next == '@' || + next == ':' || next == '_') { + pos= orig_pos; + } + else if (next == '-') { + // Check if '-' is likely a path separator (followed by a letter or + // underscore) If followed by letter or underscore, it might be a path + // separator Numbers after '-' are allowed for negative numbers and + // arithmetic expressions + if (pos + 1 < N (s)) { + char next2= s[pos + 1]; + if (is_alpha (next2) || next2 == '_') { + // '-' followed by letter or underscore, likely path separator + pos= orig_pos; + } + // Otherwise, '-' might be a minus sign in arithmetic expression or + // part of a number + } + } + } + return; + } } parse_decimal (s, pos); @@ -118,6 +337,31 @@ number_parser_rep::do_parse (string s, int& pos) { parse_decimal (s, pos); } suffix_parser.parse (s, pos); + + // Check path boundaries if enabled + if (check_path_boundaries && pos < N (s)) { + char next= s[pos]; + // Check if next character is a path separator + if (next == '.' || next == '/' || next == '\\' || next == '@' || + next == ':' || next == '_') { + pos= orig_pos; + } + else if (next == '-') { + // Check if '-' is likely a path separator (followed by a letter or + // underscore) If followed by letter or underscore, it might be a path + // separator Numbers after '-' are allowed for negative numbers and + // arithmetic expressions + if (pos + 1 < N (s)) { + char next2= s[pos + 1]; + if (is_alpha (next2) || next2 == '_') { + // '-' followed by letter or underscore, likely path separator + pos= orig_pos; + } + // Otherwise, '-' might be a minus sign in arithmetic expression or part + // of a number + } + } + } } void diff --git a/src/Data/Parser/number_parser.hpp b/src/Data/Parser/number_parser.hpp index 4dc159e2e3..5998e85853 100644 --- a/src/Data/Parser/number_parser.hpp +++ b/src/Data/Parser/number_parser.hpp @@ -89,6 +89,10 @@ class number_parser_rep : public parser_rep { return separator != '\0' && separator == param; } + inline void set_check_path_boundaries (bool check) { + check_path_boundaries= check; + } + void use_fortran_style (); void use_r_style (); @@ -96,6 +100,7 @@ class number_parser_rep : public parser_rep { char separator; hashset bool_features; keyword_parser_rep suffix_parser; + bool check_path_boundaries; void do_parse (string s, int& pos); diff --git a/src/System/Language/prog_language.cpp b/src/System/Language/prog_language.cpp index a971b257dd..710a427f81 100644 --- a/src/System/Language/prog_language.cpp +++ b/src/System/Language/prog_language.cpp @@ -49,6 +49,13 @@ prog_language_rep::prog_language_rep (string name) tree preprocessor_config= get_parser_config (name, "preprocessor"); customize_preprocessor (preprocessor_config); + + // Enable path boundary checking for bash to avoid highlighting commands in + // paths + if (name == "bash") { + keyword_parser.set_check_path_boundaries (true); + number_parser.set_check_path_boundaries (true); + } } tree From c577f37a5f25b01048fdeceeab7aaa3a82ddf209 Mon Sep 17 00:00:00 2001 From: notfoundzzz Date: Mon, 26 Jan 2026 17:07:24 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20Bash=20=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E6=A8=A1=E5=BC=8F=E4=B8=8B=20#=20=E8=A2=AB=E8=AF=AF?= =?UTF-8?q?=E8=AF=86=E5=88=AB=E4=B8=BA=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TeXmacs/tests/tmu/209_7.tmu | 23 +++++++ devel/209_7.md | 87 ++++++++++++++++++++++++++- src/System/Language/prog_language.cpp | 17 ++++++ 3 files changed, 125 insertions(+), 2 deletions(-) diff --git a/TeXmacs/tests/tmu/209_7.tmu b/TeXmacs/tests/tmu/209_7.tmu index 076e56f79d..b899454d4f 100644 --- a/TeXmacs/tests/tmu/209_7.tmu +++ b/TeXmacs/tests/tmu/209_7.tmu @@ -26,6 +26,29 @@ \; +<<<<<<< Updated upstream +======= + # --------- comment---------- + + if [[ $# -gt 0 ]]; then + + \ \ \ \ echo "参数个数: $#" + + \ \ \ \ echo "参数列表: $@" + + else + + \ \ \ \ echo "未传入任何参数" + + fi + + \; + + count=${#backups[@]} + + \; + +>>>>>>> Stashed changes # --------- file/url---------- tar xzvf nvim-linux-x86_64.tar.gz diff --git a/devel/209_7.md b/devel/209_7.md index c8b71ba3e8..edb2665bf5 100644 --- a/devel/209_7.md +++ b/devel/209_7.md @@ -13,6 +13,90 @@ - 高亮功能:TeXmacs/plugins/bash/progs/code/bash-lang.scm - 文档:TeXmacs/plugins/bash/doc/bash.en.tmu +## 2026/01/26 修复 Bash 代码模式下 `#` 被误识别为注释 + + +**特殊参数/参数展开中的 `#` 被错误识别为注释起始符** + + - `$#`(参数个数)中的 `#` 被渲染为注释颜色 + - `${#backups[@]}`(数组长度)中的 `#` 被渲染为注释颜色 + +### Why + +Bash 中的 `#` 同时出现在注释、特殊参数和参数展开语法中,一律按注释处理会导致误导。 + +### How + +#### 1. 修复注释识别逻辑(仅针对 Bash) + +文件:`src/System/Language/prog_language.cpp:312-335` + +- 在 `get_color()` 函数中增加 Bash 特殊处理规则: + - 仅当 `#` 位于行首或前面为空白字符时,才识别为注释 + - `$#` 与 `${#...}` 中的 `#` 不再被识别为注释 + +覆盖用例: + +- `$#` +- `${#var}` +- `${#arr[@]}` + +## 2026/01/23 修复bash代码模式下文件名路径等的错误高亮 +### What + +修复了 Bash代码模式下,路径、文件名、URL、连字符等场景中命令名和数字被错误高亮的问题,包括: + +- 路径 / 文件名中的命令误高亮(abc.git、abc/git、nvim-linux.taz) +- URL 中的命令误高亮(git clone git@gitee.com:xxx.git) +- 文件名中的数字误高亮(209.in、209_9.tmu、file-1.txt) +- 负数与算术表达式高亮异常(-1、1-2) + +同时保证独立命令、独立数字及算术表达式仍能正确高亮,仅影响 Bash 语言。 + + +### How + +#### 1. 关键字解析器边界检查 + +文件:src/Data/Parser/keyword_parser.cpp + +- 增加 check_path_boundaries 开关 +- 在 can_parse() 中补充边界判断: + - 禁止在路径、URL、单词字符、连字符、下划线后匹配关键字 + - 禁止关键字后直接连接路径符号或连字符 + +避免 abc.git、abc/git、nvim-linux.taz 等误高亮。 + + +#### 2. 数字解析器边界检查 + +文件:src/Data/Parser/number_parser.cpp + +- 增加 check_path_boundaries 开关 +- 在 can_parse() 中: + - 禁止路径、URL、单词字符前匹配数字 + - 对连字符进行智能判断(区分 file-1 与 -1) +- 在 do_parse() 中: + - 避免 1-file.txt 误解析 + - 保留 1-2 等算术表达式 + +同时将 parse\_\* 的严格检查限制为仅在边界模式下生效,避免影响其他语言。 + + +#### 3. Bash 专用启用 + +文件:src/System/Language/prog_language.cpp + +- 仅为 Bash 启用关键字与数字解析器的边界检查 +- 其他语言保持原有行为 + +### Why + +- 避免路径、文件名、URL 中的命令误高亮 +- 避免文件名中的数字被错误识别 +- 保证负数与算术表达式正常显示 +- 保持 Bash 代码可读性 +- 限定修改范围,避免影响其他语言 ## 2026/01/23 修复bash代码模式下文件名路径等的错误高亮 @@ -88,5 +172,4 @@ ### Why 满足用户的插入Bash代码需求 -关联 issue #2607 - +关联 issue #2607 \ No newline at end of file diff --git a/src/System/Language/prog_language.cpp b/src/System/Language/prog_language.cpp index 710a427f81..1515fb223b 100644 --- a/src/System/Language/prog_language.cpp +++ b/src/System/Language/prog_language.cpp @@ -320,6 +320,23 @@ prog_language_rep::get_color (tree t, int start, int end) { int pos= 0; while (pos <= start) { if (inline_comment_parser.can_parse (s, pos)) { + // Special handling for bash: # is only a comment at start of line or + // after whitespace + if (lan_name == "bash" && pos < N (s) && s[pos] == '#') { + bool is_comment_start= true; + if (pos > 0) { + char prev= s[pos - 1]; + // Check if previous character is whitespace + if (!is_space (prev)) { + is_comment_start= false; + } + } + // pos == 0 means start of line, which is a valid comment start + if (!is_comment_start) { + pos++; + continue; + } + } return decode_color (lan_name, encode_color ("comment")); } pos++; From d10bae00925e79d105830bf7e711db12e43bca46 Mon Sep 17 00:00:00 2001 From: Hongli Cha <128913486+notfoundzzz@users.noreply.github.com> Date: Wed, 28 Jan 2026 13:49:46 +0800 Subject: [PATCH 3/4] Add configurable inline comment whitespace rule --- TeXmacs/plugins/bash/progs/code/bash-lang.scm | 1 + src/Data/Parser/inline_comment_parser.cpp | 11 +++++++- src/Data/Parser/inline_comment_parser.hpp | 4 ++- src/System/Language/prog_language.cpp | 25 ++++++------------- 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/TeXmacs/plugins/bash/progs/code/bash-lang.scm b/TeXmacs/plugins/bash/progs/code/bash-lang.scm index e98ca6e042..30916604b2 100644 --- a/TeXmacs/plugins/bash/progs/code/bash-lang.scm +++ b/TeXmacs/plugins/bash/progs/code/bash-lang.scm @@ -163,6 +163,7 @@ (tm-define (parser-feature lan key) (:require (and (== lan "bash") (== key "comment"))) `(,(string->symbol key) + (space_before "true") (inline "#"))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/src/Data/Parser/inline_comment_parser.cpp b/src/Data/Parser/inline_comment_parser.cpp index d867c4091f..6b497b9979 100644 --- a/src/Data/Parser/inline_comment_parser.cpp +++ b/src/Data/Parser/inline_comment_parser.cpp @@ -14,6 +14,7 @@ inline_comment_parser_rep::inline_comment_parser_rep () { m_starts= array (); + m_require_space_before= false; } void @@ -21,10 +22,18 @@ inline_comment_parser_rep::set_starts (const array& p_starts) { m_starts= p_starts; } +void +inline_comment_parser_rep::set_require_space_before (bool require) { + m_require_space_before= require; +} + bool inline_comment_parser_rep::can_parse (string s, int pos) { if (pos >= N (s)) return false; if (N (m_starts) == 0) return false; + if (m_require_space_before && pos > 0 && !is_space (s[pos - 1])) { + return false; + } int i= 0; while (i < N (m_starts)) { @@ -40,4 +49,4 @@ inline_comment_parser_rep::can_parse (string s, int pos) { void inline_comment_parser_rep::do_parse (string s, int& pos) { pos= N (s); -} \ No newline at end of file +} diff --git a/src/Data/Parser/inline_comment_parser.hpp b/src/Data/Parser/inline_comment_parser.hpp index 3c0d6e5794..29cbe3349d 100644 --- a/src/Data/Parser/inline_comment_parser.hpp +++ b/src/Data/Parser/inline_comment_parser.hpp @@ -21,11 +21,13 @@ class inline_comment_parser_rep : public parser_rep { string get_parser_name () { return "inline_comment_parser"; } void set_starts (const array& p_starts); + void set_require_space_before (bool require); bool can_parse (string s, int pos); private: array m_starts; + bool m_require_space_before; void do_parse (string s, int& pos); }; -#endif \ No newline at end of file +#endif diff --git a/src/System/Language/prog_language.cpp b/src/System/Language/prog_language.cpp index 1515fb223b..ec2f07f42d 100644 --- a/src/System/Language/prog_language.cpp +++ b/src/System/Language/prog_language.cpp @@ -204,6 +204,7 @@ prog_language_rep::customize_string (tree config) { void prog_language_rep::customize_comment (tree config) { + bool require_space_before= false; for (int i= 0; i < N (config); i++) { tree feature= config[i]; string label = get_label (feature); @@ -214,7 +215,14 @@ prog_language_rep::customize_comment (tree config) { } inline_comment_parser.set_starts (inline_comment_starts); } + else if (label == "space_before") { + for (int j= 0; j < N (feature); j++) { + string key= get_label (feature[j]); + if (key == "true") require_space_before= true; + } + } } + inline_comment_parser.set_require_space_before (require_space_before); } void @@ -320,23 +328,6 @@ prog_language_rep::get_color (tree t, int start, int end) { int pos= 0; while (pos <= start) { if (inline_comment_parser.can_parse (s, pos)) { - // Special handling for bash: # is only a comment at start of line or - // after whitespace - if (lan_name == "bash" && pos < N (s) && s[pos] == '#') { - bool is_comment_start= true; - if (pos > 0) { - char prev= s[pos - 1]; - // Check if previous character is whitespace - if (!is_space (prev)) { - is_comment_start= false; - } - } - // pos == 0 means start of line, which is a valid comment start - if (!is_comment_start) { - pos++; - continue; - } - } return decode_color (lan_name, encode_color ("comment")); } pos++; From f3db825a9c6f77bfb9d2b143c9466295774d1207 Mon Sep 17 00:00:00 2001 From: Hongli Cha <128913486+notfoundzzz@users.noreply.github.com> Date: Wed, 28 Jan 2026 13:53:38 +0800 Subject: [PATCH 4/4] Add configurable inline-comment whitespace rule --- TeXmacs/plugins/bash/progs/code/bash-lang.scm | 2 +- src/System/Language/prog_language.cpp | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/TeXmacs/plugins/bash/progs/code/bash-lang.scm b/TeXmacs/plugins/bash/progs/code/bash-lang.scm index 30916604b2..be9ac55eca 100644 --- a/TeXmacs/plugins/bash/progs/code/bash-lang.scm +++ b/TeXmacs/plugins/bash/progs/code/bash-lang.scm @@ -163,7 +163,7 @@ (tm-define (parser-feature lan key) (:require (and (== lan "bash") (== key "comment"))) `(,(string->symbol key) - (space_before "true") + (bool_features "space_before") (inline "#"))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/src/System/Language/prog_language.cpp b/src/System/Language/prog_language.cpp index ec2f07f42d..27c136f970 100644 --- a/src/System/Language/prog_language.cpp +++ b/src/System/Language/prog_language.cpp @@ -215,6 +215,12 @@ prog_language_rep::customize_comment (tree config) { } inline_comment_parser.set_starts (inline_comment_starts); } + else if (label == "bool_features") { + for (int j= 0; j < N (feature); j++) { + string key= get_label (feature[j]); + if (key == "space_before") require_space_before= true; + } + } else if (label == "space_before") { for (int j= 0; j < N (feature); j++) { string key= get_label (feature[j]);