Skip to content

Commit c9e91e2

Browse files
Add a modern Tree-sitter PHP grammar
This one’s got all the frills, including injections into HTML documents and a PHPDoc grammar. As part of this change, we're also migrating to `web-tree-sitter` version 0.20.8 with some customizations. The PR I submitted at tree-sitter/tree-sitter#2795 is landed on this fork, though if the same issues get fixed in a different way on the source, I'll adopt that approach as well. The PHPDoc parser needed another external added.
1 parent ffb754e commit c9e91e2

17 files changed

+1887
-982
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: 'PHP'
2+
scopeName: 'text.html.php'
3+
type: 'modern-tree-sitter'
4+
parser: 'tree-sitter-php'
5+
6+
injectionRegex: 'php|PHP'
7+
8+
treeSitter:
9+
grammar: 'tree-sitter/tree-sitter-php.wasm'
10+
highlightsQuery: 'tree-sitter/queries/highlights.scm'
11+
tagsQuery: 'tree-sitter/queries/tags.scm'
12+
foldsQuery: 'tree-sitter/queries/folds.scm'
13+
indentsQuery: 'tree-sitter/queries/indents.scm'
14+
15+
fileTypes: [
16+
'aw'
17+
'ctp'
18+
'inc'
19+
'install'
20+
'module'
21+
'php'
22+
'php_cs'
23+
'php3'
24+
'php4'
25+
'php5'
26+
'phpt'
27+
'phtml'
28+
'profile'
29+
]
30+
31+
firstLineRegex: "^\\#!.*(?:\\s|\\/)php\\d?(?:$|\\s)|^\\s*<\\?(php|=|\\s|$)"
32+
33+
contentRegex: "<\\?(php|=|\\s|$)"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
scopeName: 'source.php'
2+
type: 'modern-tree-sitter'
3+
parser: 'tree-sitter-php'
4+
5+
# Give it a precise injectionRegex that won't get accidentally matched with
6+
# anything. This grammar only exists as a way to apply the `source.php` scope.
7+
injectionRegex: '^(internal-php)$'
8+
9+
treeSitter:
10+
grammar: 'tree-sitter/tree-sitter-php.wasm'
11+
highlightsQuery: 'tree-sitter/queries/empty.scm'
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
2+
scopeName: 'comment.block.documentation.phpdoc.php'
3+
type: 'modern-tree-sitter'
4+
parser: 'tree-sitter-phpdoc'
5+
6+
injectionRegex: '^(phpdoc|PHPDoc)$'
7+
8+
treeSitter:
9+
parserSource: 'github:claytonrcarter/tree-sitter-phpdoc#915a527d5aafa81b31acf67fab31b0ac6b6319c0'
10+
grammar: 'tree-sitter/tree-sitter-phpdoc.wasm'
11+
highlightsQuery: 'tree-sitter/queries/phpdoc/highlights.scm'
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
; placeholder
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
;
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
; Placeholder

packages/language-php/grammars/tree-sitter/queries/highlights.scm

Lines changed: 444 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
;
2+
3+
["{" "(" "["] @indent
4+
["}" ")" "]"] @dedent
5+
6+
":" @indent
7+
8+
["endif" "endfor" "endforeach" "enddeclare" "endswitch"] @dedent
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
2+
((document) @punctuation.definition.begin.comment.phpdoc.php
3+
(#set! adjust.startAndEndAroundFirstMatchOf "^/\\*\\*"))
4+
5+
((document) @punctuation.definition.end.comment.phpdoc.php
6+
(#set! adjust.startAndEndAroundFirstMatchOf "(?:\\*)?\\*/$"))
7+
8+
(tag_name) @storage.type.class.phpdoc.php
9+
10+
(tag (type_list) @entity.other.type.instance.phpdoc.php)
11+
12+
(variable_name) @variable.other.phpdoc.php
13+
14+
(uri) @markup.underline.link.phpdoc.php
15+
16+
(inline_tag "{" @punctation.definition.tag.begin.brace.curly.phpdoc.php)
17+
(inline_tag "}" @punctation.definition.tag.end.brace.curly.phpdoc.php)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
2+
(namespace_definition
3+
name: (namespace_name) @name) @module
4+
5+
(interface_declaration
6+
name: (name) @name) @definition.interface
7+
8+
(trait_declaration
9+
name: (name) @name) @definition.interface
10+
11+
(class_declaration
12+
name: (name) @name) @definition.class
13+
14+
(class_interface_clause [(name) (qualified_name)] @name) @impl
15+
16+
(property_declaration
17+
(property_element (variable_name (name) @name))) @definition.field
18+
19+
(function_definition
20+
name: (name) @name) @definition.function
21+
22+
(method_declaration
23+
name: (name) @name) @definition.function
24+
25+
(object_creation_expression
26+
[
27+
(qualified_name (name) @name)
28+
(variable_name (name) @name)
29+
]) @reference.class
30+
31+
(function_call_expression
32+
function: [
33+
(qualified_name (name) @name)
34+
(variable_name (name)) @name
35+
]) @reference.call
36+
37+
(scoped_call_expression
38+
name: (name) @name) @reference.call
39+
40+
(member_call_expression
41+
name: (name) @name) @reference.call
Binary file not shown.
Binary file not shown.

packages/language-php/lib/main.js

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
exports.activate = function () {
2+
3+
// Here's how we handle the mixing of PHP and HTML:
4+
//
5+
// * The root language layer uses the `tree-sitter-php` parser, and handles
6+
// all tasks: highlighting, indentation, folds.
7+
// * HTML gets injected into all `text` nodes.
8+
// * A corresponding PHP injection point is created for non-text nodes and
9+
// represents all non-HTML regions of the buffer.
10+
//
11+
// We do it this way so that we can limit the scope `source.php` to only
12+
// those ranges identified by the injection. The way the `tree-sitter-php`
13+
// tree is organized rules out all other options for adding scope boundaries
14+
// to embedded PHP ranges. And injections enjoy an ability that query
15+
// captures do not: they are allowed to apply their base scope to arbitrary
16+
// ranges of the tree. If we can describe the ranges in the `content`
17+
// callback, we can scope each range the way we want.
18+
//
19+
// This may seem like an odd thing to do, but it's critical for us to know
20+
// when we're in `source.php` and when we aren't. For instance, nearly all of
21+
// the snippets in `language-php` are valid only in one mode or the other.
22+
//
23+
// This means that, technically speaking, the PHP injection layer doesn't
24+
// have any tasks, and doesn't even need to do any parsing. If injections had
25+
// the option of re-using another layer's tree, we'd want to do that, but
26+
// right now there's not a need for such a feature.
27+
28+
atom.grammars.addInjectionPoint('text.html.php', {
29+
type: 'program',
30+
language: () => 'html',
31+
content(node) {
32+
return node.descendantsOfType('text');
33+
},
34+
35+
// We don't need a base scope for this injection because the whole file is
36+
// already scoped as `text.html.php`. The PHP embeds add a `source.php`
37+
// scope, but still has `text.html.php` as the root. This is how the TM
38+
// grammar works, so we're replicating it here.
39+
languageScope: null
40+
});
41+
42+
atom.grammars.addInjectionPoint('text.html.php', {
43+
type: 'program',
44+
language() {
45+
return 'internal-php';
46+
},
47+
content(node) {
48+
let results = [];
49+
// At the top level we should ignore `text` nodes, since they're just
50+
// HTML. We should also ignore the middle children of
51+
// `text_interpolation` nodes (also `text`), but we need to include their
52+
// first and last children, which correspond to `?>` and `<?php`.
53+
//
54+
// In practice, it seems that `text` is always a child of the root except
55+
// inside of `text_interpolation`, and `text_interpolation` is always a
56+
// child of the root. The only exceptions I've noticed are when the tree
57+
// is in an error state, so they may not be worth worrying about.
58+
for (let child of node.children) {
59+
if (child.type === 'text') { continue; }
60+
if (child.type === 'text_interpolation') {
61+
for (let grandchild of child.children) {
62+
if (grandchild.type === 'text') { continue; }
63+
results.push(grandchild);
64+
}
65+
continue;
66+
}
67+
results.push(child);
68+
}
69+
return results;
70+
},
71+
includeChildren: true,
72+
newlinesBetween: true,
73+
includeAdjacentWhitespace: true
74+
});
75+
76+
const TODO_PATTERN = /\b(TODO|FIXME|CHANGED|XXX|IDEA|HACK|NOTE|REVIEW|NB|BUG|QUESTION|COMBAK|TEMP|DEBUG|OPTIMIZE|WARNING)\b/;
77+
const HYPERLINK_PATTERN = /\bhttps?:/
78+
79+
function isPhpDoc(node) {
80+
let { text } = node;
81+
return text.startsWith('/**') && !text.startsWith('/***')
82+
}
83+
84+
atom.grammars.addInjectionPoint('text.html.php', {
85+
type: 'comment',
86+
language: (node) => {
87+
return TODO_PATTERN.test(node.text) ? 'todo' : undefined;
88+
},
89+
content: (node) => node,
90+
languageScope: null
91+
});
92+
93+
for (let type of ['comment', 'string_value']) {
94+
atom.grammars.addInjectionPoint('text.html.php', {
95+
type,
96+
language(node) {
97+
// PHPDoc can parse URLs better than we can.
98+
if (isPhpDoc(node)) return undefined;
99+
return HYPERLINK_PATTERN.test(node.text) ?
100+
'hyperlink' : undefined;
101+
},
102+
content: (node) => node,
103+
languageScope: null
104+
});
105+
}
106+
107+
atom.grammars.addInjectionPoint('text.html.php', {
108+
type: 'heredoc',
109+
language(node) {
110+
let id = node.firstNamedChild;
111+
if (id.type !== 'heredoc_start') { return null; }
112+
console.log('returning heredoc name:', id.text);
113+
return id.text;
114+
},
115+
content(node) {
116+
let body = node.children.find(c => c.type === 'heredoc_body');
117+
let results = body.children.filter(c => c.type === 'string_value');
118+
return results;
119+
}
120+
});
121+
122+
atom.grammars.addInjectionPoint('text.html.php', {
123+
type: 'comment',
124+
language(node) {
125+
if (isPhpDoc(node)) {
126+
return 'phpdoc';
127+
}
128+
},
129+
content(node) { return node; }
130+
});
131+
132+
};

packages/language-php/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
{
22
"name": "language-php",
33
"version": "0.48.1",
4+
"main": "lib/main",
45
"description": "PHP language support in Atom",
56
"engines": {
67
"atom": "*",
7-
"node": "*"
8+
"node": ">=12"
89
},
910
"repository": "https://github.com/pulsar-edit/pulsar",
1011
"license": "MIT"

src/wasm-tree-sitter-language-mode.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3005,7 +3005,7 @@ class LanguageLayer {
30053005

30063006
this.tree = null;
30073007
this.lastSyntaxTree = null;
3008-
this.temporaryTrees = null;
3008+
this.temporaryTrees = [];
30093009

30103010
while (trees.length > 0) {
30113011
let tree = trees.pop();
@@ -4049,7 +4049,7 @@ class NodeRangeSet {
40494049
}
40504050

40514051
getRanges(buffer) {
4052-
const previousRanges = this.previous && this.previous.getRanges(buffer);
4052+
const previousRanges = this.previous?.getRanges(buffer);
40534053
let result = [];
40544054

40554055
for (const node of this.nodes) {
@@ -4192,7 +4192,7 @@ class NodeRangeSet {
41924192
// For injection points with `newlinesBetween` enabled, ensure that a
41934193
// newline is included between each disjoint range.
41944194
_ensureNewline(buffer, newRanges, startIndex, startPosition) {
4195-
const lastRange = newRanges[newRanges.length - 1];
4195+
const lastRange = last(newRanges);
41964196
if (lastRange && lastRange.endPosition.row < startPosition.row) {
41974197
newRanges.push({
41984198
startPosition: new Point(
@@ -4289,7 +4289,7 @@ class RangeList {
42894289
}
42904290

42914291
insertOrdered(newRange) {
4292-
let index = this.ranges.findIndex((r, i) => {
4292+
let index = this.ranges.findIndex(r => {
42934293
return r.start.compare(newRange.start) > 0;
42944294
});
42954295
this.ranges.splice(index, 0, newRange);

0 commit comments

Comments
 (0)