diff --git a/.github/workflows/bump-agent-versions.yml b/.github/workflows/bump-agent-versions.yml index b8114c0f423..b517178d0f3 100644 --- a/.github/workflows/bump-agent-versions.yml +++ b/.github/workflows/bump-agent-versions.yml @@ -2,21 +2,17 @@ name: update-agent-versions on: + workflow_dispatch: schedule: - cron: "0 0 * * *" jobs: update_versions: runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write steps: - - name: Setup Git - uses: elastic/apm-pipeline-library/.github/actions/setup-git@current - - - uses: elastic/apm-pipeline-library/.github/actions/github-token@current - with: - url: ${{ secrets.VAULT_ADDR }} - roleId: ${{ secrets.VAULT_ROLE_ID }} - secretId: ${{ secrets.VAULT_SECRET_ID }} - name: Checkout uses: actions/checkout@v4 @@ -42,24 +38,49 @@ jobs: - name: Update versions id: update env: - GH_TOKEN: ${{ env.GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: ./.github/workflows/bump-agent-versions.sh - - if: ${{ failure() }} - uses: elastic/apm-pipeline-library/.github/actions/slack-message@current + - if: ${{ failure() }} + uses: slackapi/slack-github-action@70cd7be8e40a46e8b0eced40b0de447bdb42f68e # v1.26.0 with: - url: ${{ secrets.VAULT_ADDR }} - roleId: ${{ secrets.VAULT_ROLE_ID }} - secretId: ${{ secrets.VAULT_SECRET_ID }} - message: ":traffic_cone: Elastic Agent version update failed: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" - channel: "#ingest-notifications" + channel-id: '#ingest-notifications' + payload: | + { + "text": "${{ env.SLACK_MESSAGE }}", + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "${{ env.SLACK_MESSAGE }}" + } + } + ] + } + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + SLACK_MESSAGE: ":traffic_cone: Elastic Agent version update failed: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" # if a PR was created as a result of this job, we notify on the Slack channel - if: ${{ startsWith(steps.update.outputs.pr, 'https') }} - uses: elastic/apm-pipeline-library/.github/actions/slack-message@current + uses: slackapi/slack-github-action@70cd7be8e40a46e8b0eced40b0de447bdb42f68e # v1.26.0 with: - url: ${{ secrets.VAULT_ADDR }} - roleId: ${{ secrets.VAULT_ROLE_ID }} - secretId: ${{ secrets.VAULT_SECRET_ID }} - message: "Update for Elastic Agent versions has been created: ${{ steps.update.outputs.pr }}" - channel: "#ingest-notifications" + channel-id: '#ingest-notifications' + payload: | + { + "text": "${{ env.SLACK_MESSAGE }}", + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "${{ env.SLACK_MESSAGE }}" + } + } + ] + } + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + SLACK_MESSAGE: "Update for Elastic Agent versions has been created: ${{ steps.update.outputs.pr }}" + diff --git a/changelog/fragments/1715266989-Make-delayed-enrollment-try-indefinitely.yaml b/changelog/fragments/1715266989-Make-delayed-enrollment-try-indefinitely.yaml new file mode 100644 index 00000000000..5c6a7000763 --- /dev/null +++ b/changelog/fragments/1715266989-Make-delayed-enrollment-try-indefinitely.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: bug-fix + +# Change summary; a 80ish characters long description of the change. +summary: Make delayed enrollment try indefinitely + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: https://github.com/elastic/elastic-agent/pull/4727 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: https://github.com/elastic/elastic-agent/issues/4716 diff --git a/internal/pkg/agent/cmd/run.go b/internal/pkg/agent/cmd/run.go index a1864dff7dc..de181923cec 100644 --- a/internal/pkg/agent/cmd/run.go +++ b/internal/pkg/agent/cmd/run.go @@ -547,9 +547,18 @@ func tryDelayEnroll(ctx context.Context, logger *logger.Logger, cfg *configurati if err != nil { return nil, err } - err = c.Execute(ctx, cli.NewIOStreams()) - if err != nil { - return nil, err + // perform the enrollment in a loop, it should keep trying to enroll no matter what + // the enrollCmd has built in backoff so no need to wrap this in its own backoff as well + for { + if ctx.Err() != nil { + return nil, ctx.Err() + } + err = c.Execute(ctx, cli.NewIOStreams()) + if err == nil { + // enrollment was successful + break + } + logger.Error(fmt.Errorf("failed to perform delayed enrollment (will try again): %w", err)) } err = os.Remove(enrollPath) if err != nil {