From 0357473d97cb5c0b357360472cb4a201f4bf2c55 Mon Sep 17 00:00:00 2001 From: Victor Lyuboslavsky Date: Tue, 5 Nov 2024 11:02:44 -0600 Subject: [PATCH] Fully deleting pending host. (#23503) (#23536) #23204 When deleting Pending hosts, using the standard `ds.DeleteHosts` method. This seems cleaner and more scalable than trying to handle every host table in cleanups cron. # Checklist for submitter - [x] Added/updated tests - [x] Manual QA for all new/changed functionality (cherry picked from commit 9e1c451e2b012915e09542cebb146fc704e2e054) --- server/datastore/mysql/apple_mdm.go | 35 ++++--- server/datastore/mysql/hosts.go | 107 ++++++++++++++------- server/datastore/mysql/hosts_test.go | 37 +++++++ server/service/integration_mdm_dep_test.go | 15 +++ 4 files changed, 147 insertions(+), 47 deletions(-) diff --git a/server/datastore/mysql/apple_mdm.go b/server/datastore/mysql/apple_mdm.go index b19566decb56..37d8dfe61611 100644 --- a/server/datastore/mysql/apple_mdm.go +++ b/server/datastore/mysql/apple_mdm.go @@ -1527,23 +1527,36 @@ func (ds *Datastore) DeleteHostDEPAssignments(ctx context.Context, abmTokenID ui } selectStmt, selectArgs, err := sqlx.In(` - SELECT h.id + SELECT h.id, hmdm.enrollment_status FROM hosts h JOIN host_dep_assignments hdep ON h.id = hdep.host_id + LEFT JOIN host_mdm hmdm ON h.id = hmdm.host_id WHERE hdep.abm_token_id = ? AND h.hardware_serial IN (?)`, abmTokenID, serials) if err != nil { return ctxerr.Wrap(ctx, err, "building IN statement for selecting host IDs") } return ds.withTx(ctx, func(tx sqlx.ExtContext) error { - var hostIDs []uint - if err = sqlx.SelectContext(ctx, tx, &hostIDs, selectStmt, selectArgs...); err != nil { + type hostWithEnrollmentStatus struct { + ID uint `db:"id"` + EnrollmentStatus *string `db:"enrollment_status"` + } + var hosts []hostWithEnrollmentStatus + if err = sqlx.SelectContext(ctx, tx, &hosts, selectStmt, selectArgs...); err != nil { return ctxerr.Wrap(ctx, err, "selecting host IDs") } - if len(hostIDs) == 0 { + if len(hosts) == 0 { // Nothing to delete. Hosts may have already been transferred to another ABM. return nil } + var hostIDs []uint + var hostIDsPending []uint + for _, host := range hosts { + hostIDs = append(hostIDs, host.ID) + if host.EnrollmentStatus != nil && *host.EnrollmentStatus == "Pending" { + hostIDsPending = append(hostIDsPending, host.ID) + } + } stmt, args, err := sqlx.In(` UPDATE host_dep_assignments @@ -1558,17 +1571,11 @@ func (ds *Datastore) DeleteHostDEPAssignments(ctx context.Context, abmTokenID ui // If pending host is no longer in ABM, we should delete it because it will never enroll in Fleet. // If the host is later re-added to ABM, it will be re-created. - deletePendingStmt, args, err := sqlx.In(` - DELETE h, hmdm FROM hosts h - JOIN host_mdm hmdm ON h.id = hmdm.host_id - WHERE h.id IN (?) AND hmdm.enrollment_status = 'Pending'`, hostIDs) - if err != nil { - return ctxerr.Wrap(ctx, err, "building delete IN statement") - } - if _, err := tx.ExecContext(ctx, deletePendingStmt, args...); err != nil { - return ctxerr.Wrap(ctx, err, "deleting pending hosts by host_id") + if len(hostIDsPending) == 0 { + return nil } - return nil + + return deleteHosts(ctx, tx, hostIDsPending) }) } diff --git a/server/datastore/mysql/hosts.go b/server/datastore/mysql/hosts.go index c2305dfe97ba..4a9af648f668 100644 --- a/server/datastore/mysql/hosts.go +++ b/server/datastore/mysql/hosts.go @@ -29,6 +29,7 @@ import ( var ( hostIssuesInsertBatchSize = 10000 hostIssuesUpdateFailingPoliciesBatchSize = 10000 + hostsDeleteBatchSize = 5000 ) // A large number of hosts could be changing teams at once, so we need to batch this operation to prevent excessive locks @@ -565,56 +566,87 @@ var additionalHostRefsSoftDelete = map[string]string{ } func (ds *Datastore) DeleteHost(ctx context.Context, hid uint) error { + return ds.withRetryTxx(ctx, func(tx sqlx.ExtContext) error { + return deleteHosts(ctx, tx, []uint{hid}) + }) +} + +func deleteHosts(ctx context.Context, tx sqlx.ExtContext, hostIDs []uint) error { + if len(hostIDs) == 0 { + return nil + } delHostRef := func(tx sqlx.ExtContext, table string) error { - _, err := tx.ExecContext(ctx, fmt.Sprintf(`DELETE FROM %s WHERE host_id=?`, table), hid) + stmt, args, err := sqlx.In(fmt.Sprintf("DELETE FROM %s WHERE host_id IN (?)", table), hostIDs) + if err != nil { + return ctxerr.Wrapf(ctx, err, "building delete statement for %s", table) + } + _, err = tx.ExecContext(ctx, stmt, args...) if err != nil { - return ctxerr.Wrapf(ctx, err, "deleting %s for host %d", table, hid) + return ctxerr.Wrapf(ctx, err, "deleting %s for hosts %v", table, hostIDs) } return nil } // load just the host uuid for the MDM tables that rely on this to be cleared. - var hostUUID string - if err := ds.writer(ctx).GetContext(ctx, &hostUUID, `SELECT uuid FROM hosts WHERE id = ?`, hid); err != nil { - return ctxerr.Wrapf(ctx, err, "get uuid for host %d", hid) + var hostUUIDs []string + stmt, args, err := sqlx.In(`SELECT uuid FROM hosts WHERE id IN (?)`, hostIDs) + if err != nil { + return ctxerr.Wrapf(ctx, err, "building select statement for host uuids") + } + if err := sqlx.SelectContext(ctx, tx, &hostUUIDs, stmt, args...); err != nil { + return ctxerr.Wrapf(ctx, err, "get uuid for hosts %v", hostIDs) } - return ds.withRetryTxx(ctx, func(tx sqlx.ExtContext) error { - _, err := tx.ExecContext(ctx, `DELETE FROM hosts WHERE id = ?`, hid) + stmt, args, err = sqlx.In(`DELETE FROM hosts WHERE id IN (?)`, hostIDs) + if err != nil { + return ctxerr.Wrapf(ctx, err, "building delete statement for hosts %v", hostIDs) + } + _, err = tx.ExecContext(ctx, stmt, args...) + if err != nil { + return ctxerr.Wrapf(ctx, err, "delete hosts") + } + + for _, table := range hostRefs { + err := delHostRef(tx, table) if err != nil { - return ctxerr.Wrapf(ctx, err, "delete host") + return err } + } - for _, table := range hostRefs { - err := delHostRef(tx, table) + stmt, args, err = sqlx.In(`DELETE FROM pack_targets WHERE type = ? AND target_id IN (?)`, fleet.TargetHost, hostIDs) + if err != nil { + return ctxerr.Wrapf(ctx, err, "building delete statement for pack_targets for hosts %v", hostIDs) + } + _, err = tx.ExecContext(ctx, stmt, args...) + if err != nil { + return ctxerr.Wrapf(ctx, err, "deleting pack_targets for hosts %v", hostIDs) + } + + // no point trying the uuid-based tables if the host's uuid is missing + if len(hostUUIDs) != 0 { + for table, col := range additionalHostRefsByUUID { + stmt, args, err := sqlx.In(fmt.Sprintf("DELETE FROM `%s` WHERE `%s` IN (?)", table, col), hostUUIDs) if err != nil { - return err + return ctxerr.Wrapf(ctx, err, "building delete statement for %s for hosts %v", table, hostUUIDs) + } + if _, err := tx.ExecContext(ctx, stmt, args...); err != nil { + return ctxerr.Wrapf(ctx, err, "deleting %s for host uuids %v", table, hostUUIDs) } } + } - _, err = tx.ExecContext(ctx, `DELETE FROM pack_targets WHERE type = ? AND target_id = ?`, fleet.TargetHost, hid) + // perform the soft-deletion of host-referencing tables + for table, col := range additionalHostRefsSoftDelete { + stmt, args, err := sqlx.In(fmt.Sprintf("UPDATE `%s` SET `%s` = NOW() WHERE host_id IN (?)", table, col), hostIDs) if err != nil { - return ctxerr.Wrapf(ctx, err, "deleting pack_targets for host %d", hid) + return ctxerr.Wrapf(ctx, err, "building update statement for %s for hosts %v", table, hostIDs) } - - // no point trying the uuid-based tables if the host's uuid is missing - if hostUUID != "" { - for table, col := range additionalHostRefsByUUID { - if _, err := tx.ExecContext(ctx, fmt.Sprintf("DELETE FROM `%s` WHERE `%s`=?", table, col), hostUUID); err != nil { - return ctxerr.Wrapf(ctx, err, "deleting %s for host uuid %s", table, hostUUID) - } - } - } - - // perform the soft-deletion of host-referencing tables - for table, col := range additionalHostRefsSoftDelete { - if _, err := tx.ExecContext(ctx, fmt.Sprintf("UPDATE `%s` SET `%s` = NOW() WHERE host_id=?", table, col), hid); err != nil { - return ctxerr.Wrapf(ctx, err, "soft-deleting %s for host id %d", table, hid) - } + if _, err := tx.ExecContext(ctx, stmt, args...); err != nil { + return ctxerr.Wrapf(ctx, err, "soft-deleting %s for host ids %v", table, hostIDs) } + } - return nil - }) + return nil } func (ds *Datastore) Host(ctx context.Context, id uint) (*fleet.Host, error) { @@ -2941,9 +2973,18 @@ func (ds *Datastore) TotalAndUnseenHostsSince(ctx context.Context, teamID *uint, } func (ds *Datastore) DeleteHosts(ctx context.Context, ids []uint) error { - for _, id := range ids { - if err := ds.DeleteHost(ctx, id); err != nil { - return ctxerr.Wrapf(ctx, err, "delete host %d", id) + for i := 0; i < len(ids); i += hostsDeleteBatchSize { + start := i + end := i + hostsDeleteBatchSize + if end > len(ids) { + end = len(ids) + } + idsBatch := ids[start:end] + err := ds.withRetryTxx(ctx, func(tx sqlx.ExtContext) error { + return deleteHosts(ctx, tx, idsBatch) + }) + if err != nil { + return err } } return nil diff --git a/server/datastore/mysql/hosts_test.go b/server/datastore/mysql/hosts_test.go index d87a880552a0..61899bd7decb 100644 --- a/server/datastore/mysql/hosts_test.go +++ b/server/datastore/mysql/hosts_test.go @@ -748,6 +748,43 @@ func testHostsDelete(t *testing.T, ds *Datastore) { _, err = ds.Host(context.Background(), host.ID) assert.NotNil(t, err) + + originalHostDeleteBatchSize := hostsDeleteBatchSize + hostsDeleteBatchSize = 2 + t.Cleanup(func() { + hostsDeleteBatchSize = originalHostDeleteBatchSize + }) + + // Delete nothing -- no-op + require.NoError(t, ds.DeleteHosts(context.Background(), nil)) + + numHosts := 5 + hosts := make([]*fleet.Host, numHosts) + for i := 0; i < numHosts; i++ { + hosts[i], err = ds.NewHost(context.Background(), &fleet.Host{ + DetailUpdatedAt: time.Now(), + LabelUpdatedAt: time.Now(), + PolicyUpdatedAt: time.Now(), + SeenTime: time.Now(), + NodeKey: ptr.String(fmt.Sprint(i)), + UUID: fmt.Sprint(i), + Hostname: fmt.Sprintf("foo.local.%d", i), + }) + require.NoError(t, err) + require.NotNil(t, hosts[i]) + } + var hostIDs []uint + for _, h := range hosts { + hostIDs = append(hostIDs, h.ID) + } + + // Delete all hosts + require.NoError(t, ds.DeleteHosts(context.Background(), hostIDs)) + // Make sure each host is deleted + for _, h := range hosts { + _, err = ds.Host(context.Background(), h.ID) + assert.NotNil(t, err) + } } func listHostsCheckCount(t *testing.T, ds *Datastore, filter fleet.TeamFilter, opt fleet.HostListOptions, expectedCount int) []*fleet.Host { diff --git a/server/service/integration_mdm_dep_test.go b/server/service/integration_mdm_dep_test.go index bdb365b9e7b0..a8ac0d5f9ab8 100644 --- a/server/service/integration_mdm_dep_test.go +++ b/server/service/integration_mdm_dep_test.go @@ -782,10 +782,13 @@ func (s *integrationMDMTestSuite) TestDEPProfileAssignment() { s.DoJSON("POST", "/api/latest/fleet/teams", team, http.StatusOK, &createTeamResp) require.NotZero(t, createTeamResp.Team.ID) team = createTeamResp.Team + var device1ID uint for _, h := range listHostsRes.Hosts { if h.HardwareSerial == devices[1].SerialNumber { err = s.ds.AddHostsToTeam(ctx, &team.ID, []uint{h.ID}) require.NoError(t, err) + device1ID = h.ID + break } } @@ -923,6 +926,12 @@ func (s *integrationMDMTestSuite) TestDEPProfileAssignment() { {SerialNumber: deletedSerial, Model: "MacBook Mini", OS: "osx", OpType: "deleted", OpDate: time.Now().Add(2 * time.Second)}, } profileAssignmentReqs = []profileAssignmentReq{} + // Check that host display name is present for the device to be deleted; later we will check that it has been deleted + mysql.ExecAdhocSQL(t, s.ds, func(q sqlx.ExtContext) error { + var dest uint + return sqlx.GetContext(ctx, q, &dest, + "SELECT 1 FROM host_display_names WHERE host_id = ?", device1ID) + }) s.runDEPSchedule() // all hosts should be returned from the hosts endpoint listHostsRes = listHostsResponse{} @@ -942,6 +951,12 @@ func (s *integrationMDMTestSuite) TestDEPProfileAssignment() { gotSerials = append(gotSerials, req.Devices...) } assert.ElementsMatch(t, []string{addedModifiedDeletedSerial, deletedAddedSerial}, gotSerials) + // Check that host display name was deleted + mysql.ExecAdhocSQL(t, s.ds, func(q sqlx.ExtContext) error { + var dest uint + return sqlx.GetContext(ctx, q, &dest, + "SELECT 1 FROM host_display_names WHERE NOT EXISTS (SELECT 1 FROM host_display_names WHERE host_id = ?)", device1ID) + }) // delete all MDM info mysql.ExecAdhocSQL(t, s.ds, func(q sqlx.ExtContext) error {