16
16
import java .util .Map ;
17
17
import java .util .Set ;
18
18
import java .util .TreeMap ;
19
+ import java .util .function .Predicate ;
20
+ import java .util .regex .Matcher ;
21
+ import java .util .regex .Pattern ;
19
22
import java .util .stream .Collectors ;
20
23
21
24
import org .eclipse .xtext .Grammar ;
22
25
import org .eclipse .xtext .GrammarUtil ;
23
26
import org .eclipse .xtext .TerminalRule ;
24
27
25
28
import com .google .common .base .Joiner ;
29
+ import com .google .common .collect .Iterables ;
26
30
import com .google .gson .annotations .Expose ;
27
31
28
32
/**
35
39
*/
36
40
public class TextMateGrammar {
37
41
42
+ private static final String ANY_OTHER = "ANY_OTHER" ;
43
+
38
44
@ Expose private final List <TextMateRule > patterns ;
39
45
@ Expose private String scopeName ;
40
46
@ Expose private Map <String , TextMateRule > repository ;
@@ -125,8 +131,7 @@ protected TextMateGrammar init(Grammar grammar) {
125
131
}
126
132
TextMateGrammar result = new TextMateGrammar ();
127
133
result .setScopeName (scopeName );
128
- TextMateRule keywords = getKeywordControlRule (grammar , ignoreCase );
129
- result .addRule (keywords );
134
+ result .addRule (getKeywordControlRule (grammar , ignoreCase ));
130
135
131
136
Set <String > seenTerminalRules = new HashSet <>();
132
137
for (TextMateRule pattern : patterns ) {
@@ -143,7 +148,7 @@ protected TextMateGrammar init(Grammar grammar) {
143
148
if (inferPatterns ) {
144
149
List <TerminalRule > terminals = GrammarUtil .allTerminalRules (grammar )
145
150
.stream ()
146
- .filter (r -> !r .isFragment ())
151
+ .filter (r -> !r .isFragment () && ! r . getName (). equals ( ANY_OTHER ) )
147
152
.collect (Collectors .toList ());
148
153
for (TerminalRule terminal : terminals ) {
149
154
if (!seenTerminalRules .add (terminal .getName ())) {
@@ -154,6 +159,14 @@ protected TextMateGrammar init(Grammar grammar) {
154
159
auto .init (grammar , ignoreCase , generator ).ifPresent (result ::addRule );
155
160
}
156
161
}
162
+
163
+ result .addRule (getPunctuationRule (grammar , ignoreCase ));
164
+ // invalid rule must be last, otherwise it prevents other rules from matching
165
+ if (inferPatterns && GrammarUtil .findRuleForName (grammar , ANY_OTHER ) != null ) {
166
+ AutoRule auto = newAutoRule ();
167
+ auto .setTerminalRule (ANY_OTHER );
168
+ auto .init (grammar , ignoreCase , generator ).ifPresent (result ::addRule );
169
+ }
157
170
return result ;
158
171
}
159
172
@@ -164,24 +177,52 @@ protected AutoRule newAutoRule() {
164
177
protected String getLanguageName (Grammar grammar ) {
165
178
return GrammarUtil .getSimpleName (grammar ).toLowerCase (Locale .ROOT );
166
179
}
167
-
180
+
168
181
protected TextMateRule getKeywordControlRule (Grammar grammar , boolean ignoreCase ) {
182
+ return createKeywordRule (grammar , "keyword.control" , keyword -> keyword .matches ("\\ w+" ), ignoreCase );
183
+ }
184
+
185
+ protected TextMateRule getPunctuationRule (Grammar grammar , boolean ignoreCase ) {
186
+ return createKeywordRule (grammar , "punctuation" , keyword -> !keyword .matches ("\\ w+" ), ignoreCase );
187
+ }
188
+
189
+ protected TextMateRule createKeywordRule (Grammar grammar , String namePrefix , Predicate <String > filter , boolean ignoreCase ) {
169
190
StringBuilder matchBuilder = new StringBuilder ();
170
191
if (ignoreCase ) {
171
192
matchBuilder .append ("(?i)" );
172
193
}
173
- matchBuilder .append ("\\ b (" );
194
+ matchBuilder .append ("(" );
174
195
List <String > allKeywords = GrammarUtil .getAllKeywords (grammar )
175
196
.stream ()
176
- .filter (s -> s . matches ( " \\ w+" ) )
177
- .sorted (Comparator . naturalOrder () )
178
- .collect ( Collectors . toList () );
179
- matchBuilder . append ( Joiner .on ("|" ).join ( allKeywords ));
180
- matchBuilder .append (")\\ b " );
197
+ .filter (filter )
198
+ .sorted ()
199
+ .toList ();
200
+ Joiner .on ("|" ).appendTo ( matchBuilder , Iterables . transform ( allKeywords , this :: escapeAndAddWordBoundaries ));
201
+ matchBuilder .append (")" );
181
202
MatchRule result = new MatchRule ();
182
- result .setName ("keyword.control ." + getLanguageName (grammar ));
203
+ result .setName (namePrefix + " ." + getLanguageName (grammar ));
183
204
result .setMatch (matchBuilder .toString ());
184
205
return result ;
185
206
}
186
207
208
+ private static final Pattern START_IS_LETTER = Pattern .compile ("^\\ w" );
209
+ private static final Pattern END_IS_LETTER = Pattern .compile ("\\ w$" );
210
+ protected String escapeAndAddWordBoundaries (String token ) {
211
+ StringBuilder result = new StringBuilder ();
212
+ if (START_IS_LETTER .matcher (token ).find ()) {
213
+ result .append ("\\ b" );
214
+ }
215
+ result .append (escapeForRegex (token ));
216
+ if (END_IS_LETTER .matcher (token ).find ()) {
217
+ result .append ("\\ b" );
218
+ }
219
+ return result .toString ();
220
+ }
221
+
222
+ private static final Pattern REGEX_CONTROL_CHARS = Pattern .compile ("[\\ \\ ^$.*+?()\\ [\\ ]{}|]" );
223
+ private static String escapeForRegex (String input ) {
224
+ Matcher matcher = REGEX_CONTROL_CHARS .matcher (input );
225
+ return matcher .replaceAll (match -> Matcher .quoteReplacement ("\\ " + match .group ()));
226
+ }
227
+
187
228
}
0 commit comments