From cc28d26d20e532d04a2fc6c1d461c8b5cdd09d73 Mon Sep 17 00:00:00 2001 From: Alex Sharov Date: Wed, 23 Oct 2024 14:59:54 +0700 Subject: [PATCH] manifest-verify: parallel etag fetch (#12421) --- .github/workflows/manifest.yml | 18 +---------- erigon-lib/downloader/snaptype/files.go | 2 +- erigon-lib/downloader/webseed.go | 42 ++++++++++++++++--------- 3 files changed, 30 insertions(+), 32 deletions(-) diff --git a/.github/workflows/manifest.yml b/.github/workflows/manifest.yml index 7e65c0f619b..ee14b9f153e 100644 --- a/.github/workflows/manifest.yml +++ b/.github/workflows/manifest.yml @@ -20,22 +20,7 @@ on: workflow_dispatch: jobs: - # check-snap-modifications: - # runs-on: ubuntu-24.04 - # outputs: - # modified: ${{ steps.check-modified.outputs.modified }} - # - # steps: - # - uses: actions/checkout@v4 - # with: - # fetch-depth: 2 # Ensures we fetch enough history to compare - # - # - name: Is erigontech/erigon-snapshot updated in go.mod # if not, pipeline should exit because grep exit code >0 when no match - # run: | - # git diff HEAD~1 HEAD -- go.mod | grep 'github.com/erigontech/erigon-snapshot' - ManifestCheck: - # needs: check-snap-modifications if: github.event.pull_request.draft == false runs-on: ubuntu-24.04 @@ -43,8 +28,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-go@v5 with: - go-version: '1.22' - - run: sudo apt update && sudo apt install build-essential + go-version: '1.23' - run: make downloader - run: echo $ModModified - run: ./build/bin/downloader manifest-verify --chain mainnet diff --git a/erigon-lib/downloader/snaptype/files.go b/erigon-lib/downloader/snaptype/files.go index b835167e123..26cebf23e70 100644 --- a/erigon-lib/downloader/snaptype/files.go +++ b/erigon-lib/downloader/snaptype/files.go @@ -214,7 +214,7 @@ func AllV3Extensions() []string { } func IsSeedableExtension(name string) bool { - for _, ext := range append(SeedableV2Extensions(), SeedableV3Extensions()...) { + for _, ext := range append(AllV2Extensions(), AllV3Extensions()...) { if strings.HasSuffix(name, ext) { return true } diff --git a/erigon-lib/downloader/webseed.go b/erigon-lib/downloader/webseed.go index 69a9298768b..f7fca735c98 100644 --- a/erigon-lib/downloader/webseed.go +++ b/erigon-lib/downloader/webseed.go @@ -164,29 +164,43 @@ func (d *WebSeeds) checkHasTorrents(manifestResponse snaptype.WebSeedsFromProvid } func (d *WebSeeds) fetchFileEtags(ctx context.Context, manifestResponse snaptype.WebSeedsFromProvider) (tags map[string]string, invalidTags, etagFetchFailed []string, err error) { + lock := sync.Mutex{} etagFetchFailed = make([]string, 0) tags = make(map[string]string) invalidTagsMap := make(map[string]string) + eg := errgroup.Group{} + eg.SetLimit(100) for name, wurl := range manifestResponse { + name, wurl := name, wurl u, err := url.Parse(wurl) if err != nil { return nil, nil, nil, fmt.Errorf("webseed.fetchFileEtags: %w", err) } - md5Tag, err := d.retrieveFileEtag(ctx, u) - if err != nil { - if errors.Is(err, ErrInvalidEtag) { - invalidTagsMap[name] = md5Tag - continue - } - if errors.Is(err, ErrEtagNotFound) { - etagFetchFailed = append(etagFetchFailed, name) - continue + eg.Go(func() error { + md5Tag, err := d.retrieveFileEtag(ctx, u) + + lock.Lock() + defer lock.Unlock() + if err != nil { + d.logger.Debug("[snapshots.webseed] get file ETag", "err", err, "url", u.String()) + if errors.Is(err, ErrInvalidEtag) { + invalidTagsMap[name] = md5Tag + return nil + } + if errors.Is(err, ErrEtagNotFound) { + etagFetchFailed = append(etagFetchFailed, name) + return nil + } + return fmt.Errorf("webseed.fetchFileEtags: %w", err) } - d.logger.Debug("[snapshots.webseed] get file ETag", "err", err, "url", u.String()) - return nil, nil, nil, fmt.Errorf("webseed.fetchFileEtags: %w", err) - } - tags[name] = md5Tag + tags[name] = md5Tag + return nil + }) + + } + if err := eg.Wait(); err != nil { + return nil, nil, nil, err } invalidTags = make([]string, 0) @@ -522,7 +536,7 @@ func (d *WebSeeds) retrieveManifest(ctx context.Context, webSeedProviderUrl *url d.logger.Debug("[snapshots.webseed] empty line in manifest.txt", "webseed", webSeedProviderUrl.String(), "lineNum", fi) } continue - case "manifest.txt": + case "manifest.txt", "node.txt": continue default: response[trimmed] = webSeedProviderUrl.JoinPath(trimmed).String()