Skip to content

Commit fa71488

Browse files
authored
Also retrieve file's package when fast-scanning for imports (#209)
This augments the experimental fast-scanner `imports.ScanForImports` to also return the package for each scanned file. It's also been renamed to `fastscan.Scan` since it does more than just imports now.
1 parent f7f6094 commit fa71488

File tree

3 files changed

+59
-23
lines changed

3 files changed

+59
-23
lines changed

internal/benchmarks/benchmark_test.go

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ import (
4848
"github.com/bufbuild/protocompile/internal/protoc"
4949
"github.com/bufbuild/protocompile/linker"
5050
"github.com/bufbuild/protocompile/parser"
51-
"github.com/bufbuild/protocompile/parser/imports"
51+
"github.com/bufbuild/protocompile/parser/fastscan"
5252
"github.com/bufbuild/protocompile/protoutil"
5353
"github.com/bufbuild/protocompile/reporter"
5454
)
@@ -356,15 +356,15 @@ func benchmarkGoogleapisProtoparse(b *testing.B, factory func() *protoparse.Pars
356356
}
357357
}
358358

359-
func BenchmarkGoogleapisScanImports(b *testing.B) {
359+
func BenchmarkGoogleapisFastScan(b *testing.B) {
360360
par := runtime.GOMAXPROCS(-1)
361361
cpus := runtime.NumCPU()
362362
if par > cpus {
363363
par = cpus
364364
}
365365
type entry struct {
366-
filename string
367-
imports []string
366+
filename string
367+
scanResult fastscan.Result
368368
}
369369
for i := 0; i < b.N; i++ {
370370
workCh := make(chan string, par)
@@ -405,24 +405,23 @@ func BenchmarkGoogleapisScanImports(b *testing.B) {
405405
return ctx.Err()
406406
}
407407
r, err := os.Open(filename)
408-
var imps []string
409408
if err != nil {
410409
return err
411410
}
412-
imps, err = imports.ScanForImports(r)
411+
res, err := fastscan.Scan(r)
413412
_ = r.Close()
414413
if err != nil {
415414
return err
416415
}
417416
select {
418-
case resultsCh <- entry{filename: filename, imports: imps}:
417+
case resultsCh <- entry{filename: filename, scanResult: res}:
419418
case <-ctx.Done():
420419
return ctx.Err()
421420
}
422421
}
423422
})
424423
}
425-
results := make(map[string][]string, len(googleapisSources))
424+
results := make(map[string]fastscan.Result, len(googleapisSources))
426425
grp.Go(func() error {
427426
// accumulator
428427
for {
@@ -431,7 +430,7 @@ func BenchmarkGoogleapisScanImports(b *testing.B) {
431430
if !ok {
432431
return nil
433432
}
434-
results[entry.filename] = entry.imports
433+
results[entry.filename] = entry.scanResult
435434
case <-ctx.Done():
436435
return ctx.Err()
437436
}

parser/imports/fast_imports.go renamed to parser/fastscan/fastscan.go

Lines changed: 48 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
package imports
15+
package fastscan
1616

1717
import (
1818
"io"
@@ -26,23 +26,38 @@ var closeSymbol = map[tokenType]tokenType{
2626
openAngleToken: closeAngleToken,
2727
}
2828

29-
// ScanForImports scans the given reader, which should contain Protobuf source, and
30-
// returns the set of imports declared in the file. It returns an error if there is
29+
// Result is the result of scanning a Protobuf source file. It contains the
30+
// information extracted from the file.
31+
type Result struct {
32+
PackageName string
33+
Imports []string
34+
}
35+
36+
// Scan scans the given reader, which should contain Protobuf source, and
37+
// returns the set of imports declared in the file. The result also contains the
38+
// value of any package declaration in the file. It returns an error if there is
3139
// an I/O error reading from r. In the event of such an error, it will still return
32-
// a slice of imports that contains as many imports as were found before the I/O
33-
// error occurred.
34-
func ScanForImports(r io.Reader) ([]string, error) {
35-
var imports []string
40+
// a result that contains as much information as was found before the I/O error
41+
// occurred.
42+
func Scan(r io.Reader) (Result, error) {
43+
var res Result
44+
45+
var currentImport []string // if non-nil, parsing an import statement
46+
var packageComponents []string // if non-nil, parsing a package statement
47+
48+
// current stack of open blocks -- those starting with {, [, (, or < for
49+
// which we haven't yet encountered the closing }, ], ), or >
3650
var contextStack []tokenType
37-
var currentImport []string
51+
declarationStart := true
52+
3853
lexer := newLexer(r)
3954
for {
4055
token, text, err := lexer.Lex()
4156
if err != nil {
42-
return imports, err
57+
return res, err
4358
}
4459
if token == eofToken {
45-
return imports, nil
60+
return res, nil
4661
}
4762

4863
if currentImport != nil {
@@ -51,12 +66,26 @@ func ScanForImports(r io.Reader) ([]string, error) {
5166
currentImport = append(currentImport, text.(string))
5267
default:
5368
if len(currentImport) > 0 {
54-
imports = append(imports, strings.Join(currentImport, ""))
69+
res.Imports = append(res.Imports, strings.Join(currentImport, ""))
5570
}
5671
currentImport = nil
5772
}
5873
}
5974

75+
if packageComponents != nil {
76+
switch token {
77+
case identifierToken:
78+
packageComponents = append(packageComponents, text.(string))
79+
case periodToken:
80+
packageComponents = append(packageComponents, ".")
81+
default:
82+
if len(packageComponents) > 0 {
83+
res.PackageName = strings.Join(packageComponents, "")
84+
}
85+
packageComponents = nil
86+
}
87+
}
88+
6089
switch token {
6190
case openParenToken, openBraceToken, openBracketToken, openAngleToken:
6291
contextStack = append(contextStack, closeSymbol[token])
@@ -65,9 +94,15 @@ func ScanForImports(r io.Reader) ([]string, error) {
6594
contextStack = contextStack[:len(contextStack)-1]
6695
}
6796
case identifierToken:
68-
if text == "import" && len(contextStack) == 0 {
69-
currentImport = []string{}
97+
if declarationStart && len(contextStack) == 0 {
98+
if text == "import" {
99+
currentImport = []string{}
100+
} else if text == "package" {
101+
packageComponents = []string{}
102+
}
70103
}
71104
}
105+
106+
declarationStart = token == closeBraceToken || token == semicolonToken
72107
}
73108
}

parser/imports/lexer.go renamed to parser/fastscan/lexer.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
package imports
15+
package fastscan
1616

1717
import (
1818
"bufio"
@@ -56,6 +56,8 @@ const (
5656
closeBraceToken = tokenType('}')
5757
closeBracketToken = tokenType(']')
5858
closeAngleToken = tokenType('>')
59+
periodToken = tokenType('.')
60+
semicolonToken = tokenType(';')
5961
)
6062

6163
type runeReader struct {

0 commit comments

Comments
 (0)