Skip to content

Commit 60f9535

Browse files
committed
lexers: Integrate new lexers with the rest of Elixir
1 parent 9b0ca9a commit 60f9535

File tree

7 files changed

+186
-60
lines changed

7 files changed

+186
-60
lines changed

elixir/filters/__init__.py

Lines changed: 50 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,51 @@
1-
from typing import List
2-
3-
from .utils import Filter, FilterContext
4-
from .projects import project_filters, default_filters
5-
6-
# Returns a list of applicable filters for project_name under provided filter context
7-
def get_filters(ctx: FilterContext, project_name: str) -> List[Filter]:
8-
filter_classes = project_filters.get(project_name, default_filters)
9-
filters = []
10-
11-
for filter_cls in filter_classes:
12-
if type(filter_cls) == tuple and len(filter_cls) == 2:
13-
cls, kwargs = filter_cls
14-
filters.append(cls(**kwargs))
15-
elif type(filter_cls) == type:
16-
filters.append(filter_cls())
17-
else:
18-
raise ValueError(f"Invalid filter: {filter_cls}, " \
19-
"should be either a two element tuple or a type. " \
20-
"Make sure project_filters in project.py is valid.")
21-
22-
return [f for f in filters if f.check_if_applies(ctx)]
1+
from .ident import IdentFilter
2+
3+
from .cppinc import CppIncFilter
4+
from .cpppathinc import CppPathIncFilter
5+
6+
from .defconfig import DefConfigIdentsFilter
7+
from .configin import ConfigInFilter
8+
9+
from .kconfig import KconfigFilter
10+
from .kconfigidents import KconfigIdentsFilter
11+
12+
from .dtsi import DtsiFilter
13+
from .dtscompdocs import DtsCompDocsFilter
14+
from .dtscompcode import DtsCompCodeFilter
15+
from .dtscompdts import DtsCompDtsFilter
16+
17+
from .makefileo import MakefileOFilter
18+
from .makefiledtb import MakefileDtbFilter
19+
from .makefiledir import MakefileDirFilter
20+
from .makefilesubdir import MakefileSubdirFilter
21+
from .makefilefile import MakefileFileFilter
22+
from .makefilesrctree import MakefileSrcTreeFilter
23+
from .makefilesubdir import MakefileSubdirFilter
24+
25+
26+
# List of filters applied to all projects
27+
default_filters = [
28+
DtsCompCodeFilter,
29+
DtsCompDtsFilter,
30+
DtsCompDocsFilter,
31+
IdentFilter,
32+
CppIncFilter,
33+
]
34+
35+
# List of filters for Kconfig files
36+
common_kconfig_filters = [
37+
KconfigFilter,
38+
KconfigIdentsFilter,
39+
DefConfigIdentsFilter,
40+
]
41+
42+
# List of filters for Makefiles
43+
common_makefile_filters = [
44+
MakefileOFilter,
45+
MakefileDtbFilter,
46+
MakefileDirFilter,
47+
MakefileFileFilter,
48+
MakefileSubdirFilter,
49+
MakefileSrcTreeFilter,
50+
]
2351

elixir/lexers/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from .lexers import *
2+
3+
default_lexers = {
4+
r'.*\.(c|h|cpp|hpp|c++|cxx|cc)': CLexer,
5+
r'makefile\..*': MakefileLexer,
6+
r'.*\.dts(i)?': DTSLexer,
7+
r'.*\.s': GasLexer,
8+
r'kconfig.*': KconfigLexer, #TODO negative lookahead for .rst
9+
}
10+

elixir/project_utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from .filters.utils import Filter, FilterContext
55
from .filters import default_filters
66
from .projects import projects
7+
from .lexers import default_lexers
78

89
# Returns a list of applicable filters for project_name under provided filter context
910
def get_filters(ctx: FilterContext, project_name: str) -> List[Filter]:
@@ -28,3 +29,19 @@ def get_filters(ctx: FilterContext, project_name: str) -> List[Filter]:
2829

2930
return [f for f in filters if f.check_if_applies(ctx)]
3031

32+
def get_lexer(path: str, project_name: str):
33+
project_config = projects.get(project_name)
34+
if project_config is None or 'lexers' not in project_config:
35+
lexers = default_lexers
36+
else:
37+
lexers = project_config['lexers']
38+
39+
path = path.lower()
40+
for regex, lexer in lexers.items():
41+
if re.match(regex, path):
42+
if type(lexer) == tuple:
43+
lexer_cls, kwargs = lexer
44+
return lambda code: lexer_cls(code, **kwargs)
45+
else:
46+
return lambda code: lexer(code)
47+

elixir/projects.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
from .filters import *
2+
from collections import OrderedDict
3+
from .filters import *
4+
from .lexers import *
25

36
# Dictionary of custom per-projects settings.
47
# filters:
@@ -48,6 +51,29 @@
4851
# Our solution is to ignore all includes in such paths
4952
(CppPathIncFilter, {"path_exceptions": {'^/include/uapi/.*'}}),
5053
],
54+
'lexers': OrderedDict({
55+
r'.*\.(c|h|cpp|hpp|c++|cxx|cc)': CLexer,
56+
r'makefile\..*': MakefileLexer,
57+
r'.*\.dts(i)?': DTSLexer,
58+
r'kconfig.*': KconfigLexer, #TODO negative lookahead for .rst
59+
60+
r'/arch/alpha/.*\.s': (GasLexer, {"arch": "alpha"}),
61+
r'/arch/arc/.*\.s': (GasLexer, {"arch": "arc"}),
62+
r'/arch/arm/.*\.s': (GasLexer, {"arch": "arm32"}),
63+
r'/arch/csky/.*\.s': (GasLexer, {"arch": "csky"}),
64+
r'/arch/m68k/.*\.s': (GasLexer, {"arch": "m68k"}),
65+
r'/arch/microblaze/.*\.s': (GasLexer, {"arch": "microblaze"}),
66+
r'/arch/mips/.*\.s': (GasLexer, {"arch": "mips"}),
67+
r'/arch/openrisc/.*\.s': (GasLexer, {"arch": "openrisc"}),
68+
r'/arch/parisc/.*\.s': (GasLexer, {"arch": "parisc"}),
69+
r'/arch/s390/.*\.s': (GasLexer, {"arch": "s390"}),
70+
r'/arch/sh/.*\.s': (GasLexer, {"arch": "sh"}),
71+
r'/arch/sparc/.*\.s': (GasLexer, {"arch": "sparc"}),
72+
r'/arch/um/.*\.s': (GasLexer, {"arch": "x86"}),
73+
r'/arch/x86/.*\.s': (GasLexer, {"arch": "x86"}),
74+
r'/arch/xtensa/.*\.s': (GasLexer, {"arch": "xtensa"}),
75+
r'.*\.s': GasLexer,
76+
}),
5177
},
5278
'qemu': {
5379
'filters': [
@@ -63,6 +89,24 @@
6389
CppPathIncFilter,
6490
*common_makefile_filters,
6591
],
92+
'lexers': OrderedDict({
93+
r'.*\.(c|h|cpp|hpp|c++|cxx|cc)': CLexer,
94+
r'makefile\..*': MakefileLexer,
95+
r'.*\.dts(i)?': DTSLexer,
96+
r'kconfig.*': KconfigLexer, #TODO negative lookahead for .rst
97+
98+
r'/arch/arc/.*\.s': (GasLexer, {"arch": "arc"}),
99+
r'/arch/arm/.*\.s': (GasLexer, {"arch": "arm32"}),
100+
r'/arch/m68k/.*\.s': (GasLexer, {"arch": "m68k"}),
101+
r'/arch/microblaze/.*\.s': (GasLexer, {"arch": "microblaze"}),
102+
r'/arch/mips/.*\.s': (GasLexer, {"arch": "mips"}),
103+
r'/arch/riscv/.*\.s': (GasLexer, {"arch": "riscv"}),
104+
r'/arch/sh/.*\.s': (GasLexer, {"arch": "sh"}),
105+
r'/arch/x86/.*\.s': (GasLexer, {"arch": "x86"}),
106+
r'/arch/sandbox/.*\.s': (GasLexer, {"arch": "x86"}),
107+
r'/arch/xtensa/.*\.s': (GasLexer, {"arch": "xtensa"}),
108+
r'.*\.s': GasLexer,
109+
}),
66110
},
67111
'uclibc-ng': {
68112
'filters': [

elixir/query.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
from .lib import script, scriptLines, decode
2222
from . import lib
2323
from . import data
24-
import os
24+
from .lexers import TokenType
25+
import os, sys
2526
from collections import OrderedDict
2627
from urllib import parse
2728

@@ -172,29 +173,38 @@ def query(self, cmd, *args):
172173

173174
version = args[0]
174175
path = args[1]
176+
lexer = args[2]
175177

176178
filename = os.path.basename(path)
177179
family = lib.getFileFamily(filename)
178180

179-
if family != None:
181+
if family is not None and lexer is not None:
180182
buffer = BytesIO()
181-
tokens = self.scriptLines('tokenize-file', version, path, family)
182-
even = True
183+
code = self.get_file_raw(version, path)
183184

184185
prefix = b''
185186
if family == 'K':
186187
prefix = b'CONFIG_'
187188

188-
for tok in tokens:
189-
even = not even
190-
tok2 = prefix + tok
191-
if (even and self.db.defs.exists(tok2) and
192-
(lib.compatibleFamily(self.db.defs.get(tok2).get_families(), family) or
193-
lib.compatibleMacro(self.db.defs.get(tok2).get_macros(), family))):
194-
tok = b'\033[31m' + tok2 + b'\033[0m'
195-
else:
196-
tok = lib.unescape(tok)
197-
buffer.write(tok)
189+
for token_type, token, _, line in lexer(code).lex():
190+
token = token.encode()
191+
192+
if token_type == TokenType.ERROR:
193+
print("error token: ", token, token_type, filename, line, file=sys.stderr)
194+
elif token_type == TokenType.IDENTIFIER:
195+
token_with_prefix = prefix + token
196+
token_in_db = self.db.defs.exists(token_with_prefix)
197+
if token_in_db:
198+
compatible = \
199+
lib.compatibleFamily(self.db.defs.get(token_with_prefix).get_families(), family) or \
200+
lib.compatibleMacro(self.db.defs.get(token_with_prefix).get_macros(), family)
201+
202+
if compatible:
203+
buffer.write(b'\033[31m' + token_with_prefix + b'\033[0m')
204+
continue
205+
206+
buffer.write(token)
207+
198208
return decode(buffer.getvalue())
199209
else:
200210
return decode(self.script('get-file', version, path))

elixir/web.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
from .lib import validFamily
3535
from .query import Query, SymbolInstance
36-
from .project_utils import get_filters
36+
from .project_utils import get_filters, get_lexer
3737
from .filters.utils import FilterContext
3838
from .autocomplete import AutocompleteResource
3939
from .api import ApiIdentGetterResource
@@ -485,7 +485,8 @@ def format_code(filename, code):
485485
# version: requested version of the project
486486
# path: path to the file in the repository
487487
def generate_source(q, project, version, path):
488-
code = q.query('file', version, path)
488+
lexer = get_lexer(path, project)
489+
code = q.query('file', version, path, lexer)
489490

490491
_, fname = os.path.split(path)
491492
_, extension = os.path.splitext(fname)

update.py

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,16 @@
2222
# Throughout, an "idx" is the sequential number associated with a blob.
2323
# This is different from that blob's Git hash.
2424

25+
import sys
2526
from sys import argv
2627
from threading import Thread, Lock, Event, Condition
2728

29+
from elixir.lexers import TokenType
2830
import elixir.lib as lib
2931
from elixir.lib import script, scriptLines
3032
import elixir.data as data
3133
from elixir.data import PathList
34+
from elixir.project_utils import get_lexer
3235
from find_compatible_dts import FindCompatibleDTS
3336

3437
verbose = False
@@ -56,6 +59,7 @@
5659
bindings_idxes = [] # DT bindings documentation files
5760
idx_key_mod = 1000000
5861
defs_idxes = {} # Idents definitions stored with (idx*idx_key_mod + line) as the key.
62+
file_paths = {}
5963

6064
tags_done = False # True if all tags have been added to new_idxes
6165

@@ -163,7 +167,7 @@ def run(self):
163167
progress('vers: Thread finished', index)
164168

165169
def update_versions(self, tag):
166-
global blobs_lock
170+
global blobs_lock, file_paths
167171

168172
# Get blob hashes and associated file paths
169173
blobs = scriptLines('list-blobs', '-p', tag)
@@ -174,12 +178,14 @@ def update_versions(self, tag):
174178
with blobs_lock:
175179
idx = db.blob.get(hash)
176180
buf.append((idx, path))
181+
file_paths[idx] = path
177182

178183
buf = sorted(buf)
179184
obj = PathList()
180185
for idx, path in buf:
181186
obj.append(idx, path)
182187

188+
183189
# Store DT bindings documentation files to parse them later
184190
if path[:33] == b'Documentation/devicetree/bindings':
185191
bindings_idxes.append(idx)
@@ -275,6 +281,7 @@ def run(self):
275281

276282
new_idxes[self.index][1].wait() # Make sure the tag is ready
277283
new_idxes[self.index][2].wait() # Make sure UpdateDefs processed the tag
284+
new_idxes[self.index][4].wait() # Tell that UpdateVersions processed the tag
278285

279286
with tags_refs_lock:
280287
tags_refs[0] += 1
@@ -288,45 +295,53 @@ def run(self):
288295
progress('refs: Thread ' + str(tags_refs[1]) + '/' + str(self.inc) + ' finished', tags_refs[0])
289296

290297
def update_references(self, idxes):
291-
global hash_file_lock, defs_lock, refs_lock, tags_refs
298+
global hash_file_lock, defs_lock, refs_lock, tags_refs, file_paths
292299

293300
for idx in idxes:
294301
if idx % 1000 == 0: progress('refs: ' + str(idx), tags_refs[0])
295302

296303
with hash_file_lock:
297304
hash = db.hash.get(idx)
298-
filename = db.file.get(idx)
305+
filename = file_paths[idx].decode()
299306

300307
family = lib.getFileFamily(filename)
301308
if family == None: continue
302309

310+
lexer = get_lexer(filename, project)
311+
if lexer is None:
312+
continue
313+
314+
try:
315+
code = script('get-blob', hash).decode()
316+
except UnicodeDecodeError:
317+
code = script('get-blob', hash).decode('raw_unicode_escape')
318+
303319
prefix = b''
304320
# Kconfig values are saved as CONFIG_<value>
305321
if family == 'K':
306322
prefix = b'CONFIG_'
307323

308-
tokens = scriptLines('tokenize-file', '-b', hash, family)
309-
even = True
310-
line_num = 1
311324
idents = {}
312325
with defs_lock:
313-
for tok in tokens:
314-
even = not even
315-
if even:
316-
tok = prefix + tok
317-
318-
if (db.defs.exists(tok) and
319-
not ( (idx*idx_key_mod + line_num) in defs_idxes and
320-
defs_idxes[idx*idx_key_mod + line_num] == tok ) and
321-
(family != 'M' or tok.startswith(b'CONFIG_'))):
322-
# We only index CONFIG_??? in makefiles
323-
if tok in idents:
324-
idents[tok] += ',' + str(line_num)
325-
else:
326-
idents[tok] = str(line_num)
326+
for token_type, token, _, line in lexer(code).lex():
327+
if token_type == TokenType.ERROR:
328+
print("error token: ", token, token_type, filename, line, file=sys.stderr)
329+
continue
327330

328-
else:
329-
line_num += tok.count(b'\1')
331+
token = prefix + token.encode()
332+
333+
if token_type != TokenType.IDENTIFIER:
334+
continue
335+
336+
if (db.defs.exists(token) and
337+
not ( (idx*idx_key_mod + line) in defs_idxes and
338+
defs_idxes[idx*idx_key_mod + line] == token ) and
339+
(family != 'M' or token.startswith(b'CONFIG_'))):
340+
# We only index CONFIG_??? in makefiles
341+
if token in idents:
342+
idents[token] += ',' + str(line)
343+
else:
344+
idents[token] = str(line)
330345

331346
with refs_lock:
332347
for ident, lines in idents.items():
@@ -579,6 +594,7 @@ def progress(msg, current):
579594
for tag in scriptLines('list-tags'):
580595
if not db.vers.exists(tag):
581596
tag_buf.append(tag)
597+
break
582598

583599
num_tags = len(tag_buf)
584600
project = lib.currentProject()

0 commit comments

Comments
 (0)