2222# Throughout, an "idx" is the sequential number associated with a blob.
2323# This is different from that blob's Git hash.
2424
25+ import sys
2526from sys import argv
2627from threading import Thread , Lock , Event , Condition
2728
29+ from elixir .lexers import TokenType
2830import elixir .lib as lib
2931from elixir .lib import script , scriptLines
3032import elixir .data as data
3133from elixir .data import PathList
34+ from elixir .project_utils import get_lexer
3235from find_compatible_dts import FindCompatibleDTS
3336
3437verbose = False
5659bindings_idxes = [] # DT bindings documentation files
5760idx_key_mod = 1000000
5861defs_idxes = {} # Idents definitions stored with (idx*idx_key_mod + line) as the key.
62+ file_paths = {}
5963
6064tags_done = False # True if all tags have been added to new_idxes
6165
@@ -163,7 +167,7 @@ def run(self):
163167 progress ('vers: Thread finished' , index )
164168
165169 def update_versions (self , tag ):
166- global blobs_lock
170+ global blobs_lock , file_paths
167171
168172 # Get blob hashes and associated file paths
169173 blobs = scriptLines ('list-blobs' , '-p' , tag )
@@ -174,12 +178,14 @@ def update_versions(self, tag):
174178 with blobs_lock :
175179 idx = db .blob .get (hash )
176180 buf .append ((idx , path ))
181+ file_paths [idx ] = path
177182
178183 buf = sorted (buf )
179184 obj = PathList ()
180185 for idx , path in buf :
181186 obj .append (idx , path )
182187
188+
183189 # Store DT bindings documentation files to parse them later
184190 if path [:33 ] == b'Documentation/devicetree/bindings' :
185191 bindings_idxes .append (idx )
@@ -275,6 +281,7 @@ def run(self):
275281
276282 new_idxes [self .index ][1 ].wait () # Make sure the tag is ready
277283 new_idxes [self .index ][2 ].wait () # Make sure UpdateDefs processed the tag
284+ new_idxes [self .index ][4 ].wait () # Tell that UpdateVersions processed the tag
278285
279286 with tags_refs_lock :
280287 tags_refs [0 ] += 1
@@ -288,45 +295,53 @@ def run(self):
288295 progress ('refs: Thread ' + str (tags_refs [1 ]) + '/' + str (self .inc ) + ' finished' , tags_refs [0 ])
289296
290297 def update_references (self , idxes ):
291- global hash_file_lock , defs_lock , refs_lock , tags_refs
298+ global hash_file_lock , defs_lock , refs_lock , tags_refs , file_paths
292299
293300 for idx in idxes :
294301 if idx % 1000 == 0 : progress ('refs: ' + str (idx ), tags_refs [0 ])
295302
296303 with hash_file_lock :
297304 hash = db .hash .get (idx )
298- filename = db . file . get ( idx )
305+ filename = file_paths [ idx ]. decode ( )
299306
300307 family = lib .getFileFamily (filename )
301308 if family == None : continue
302309
310+ lexer = get_lexer (filename , project )
311+ if lexer is None :
312+ continue
313+
314+ try :
315+ code = script ('get-blob' , hash ).decode ()
316+ except UnicodeDecodeError :
317+ code = script ('get-blob' , hash ).decode ('raw_unicode_escape' )
318+
303319 prefix = b''
304320 # Kconfig values are saved as CONFIG_<value>
305321 if family == 'K' :
306322 prefix = b'CONFIG_'
307323
308- tokens = scriptLines ('tokenize-file' , '-b' , hash , family )
309- even = True
310- line_num = 1
311324 idents = {}
312325 with defs_lock :
313- for tok in tokens :
314- even = not even
315- if even :
316- tok = prefix + tok
317-
318- if (db .defs .exists (tok ) and
319- not ( (idx * idx_key_mod + line_num ) in defs_idxes and
320- defs_idxes [idx * idx_key_mod + line_num ] == tok ) and
321- (family != 'M' or tok .startswith (b'CONFIG_' ))):
322- # We only index CONFIG_??? in makefiles
323- if tok in idents :
324- idents [tok ] += ',' + str (line_num )
325- else :
326- idents [tok ] = str (line_num )
326+ for token_type , token , _ , line in lexer (code ).lex ():
327+ if token_type == TokenType .ERROR :
328+ print ("error token: " , token , token_type , filename , line , file = sys .stderr )
329+ continue
327330
328- else :
329- line_num += tok .count (b'\1 ' )
331+ token = prefix + token .encode ()
332+
333+ if token_type != TokenType .IDENTIFIER :
334+ continue
335+
336+ if (db .defs .exists (token ) and
337+ not ( (idx * idx_key_mod + line ) in defs_idxes and
338+ defs_idxes [idx * idx_key_mod + line ] == token ) and
339+ (family != 'M' or token .startswith (b'CONFIG_' ))):
340+ # We only index CONFIG_??? in makefiles
341+ if token in idents :
342+ idents [token ] += ',' + str (line )
343+ else :
344+ idents [token ] = str (line )
330345
331346 with refs_lock :
332347 for ident , lines in idents .items ():
@@ -579,6 +594,7 @@ def progress(msg, current):
579594for tag in scriptLines ('list-tags' ):
580595 if not db .vers .exists (tag ):
581596 tag_buf .append (tag )
597+ break
582598
583599num_tags = len (tag_buf )
584600project = lib .currentProject ()
0 commit comments