From 15f951560d1111d2d74a783748cf5aff2f5226cc Mon Sep 17 00:00:00 2001 From: Michael Hollister Date: Thu, 9 May 2024 15:47:32 -0500 Subject: [PATCH] Add support for batch record purging --- CHANGELOG.md | 9 +++++---- api/custom/purge.go | 34 +++++++++++++++++++++++----------- docs/admin.md | 4 +++- tasks/task_runner/purge.go | 14 ++++++++++---- 4 files changed, 41 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f20db813..6094d1b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ All notable changes to this project will be documented in this file. -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), * New datastore option to ignore Redis cache when downloading media served by a `publicBaseUrl`. This can help ensure more requests get redirected to the CDN. * `HEAD /download` is now supported, as per [MSC4120](https://github.com/matrix-org/matrix-spec-proposals/pull/4120). +* The `POST /_matrix/media/unstable/admin/purge//` endpoint now supports batch purging of media ids. ### Fixed @@ -109,13 +110,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), * IPFS support has been removed due to maintenance burden. * Exports initiated through the admin API no longer support `?include_data=false`. Exports will always contain data. -* Server-side blurhash calculation has been removed. Clients and bridges already calculate blurhashes locally where applicable. +* Server-side blurhash calculation has been removed. Clients and bridges already calculate blurhashes locally where applicable. ### Changed * **Mandatory configuration change**: You must add datastore IDs to your datastore configuration, as matrix-media-repo will no longer manage datastores for you. * If compiling `matrix-media-repo`, note that new external dependencies are required. See [the docs](https://docs.t2bot.io/matrix-media-repo/v1.3.3/installing/method/compilation.html). - * Docker images already contain these dependencies. + * Docker images already contain these dependencies. * Datastores no longer use the `enabled` flag set on them. Use `forKinds: []` instead to disable a datastore's usage. * Per-user upload quotas now do not allow users to exceed the maximum values, even by 1 byte. Previously, users could exceed the limits by a little bit. * Updated to Go 1.19, then Go 1.20 in the same release cycle. @@ -355,7 +356,7 @@ a large database (more than about 100k uploaded files), run the following steps user is `media`, then run: ```sql ALTER TABLE user_stats OWNER TO media; - ALTER FUNCTION track_update_user_media() OWNER TO media; + ALTER FUNCTION track_update_user_media() OWNER TO media; ``` ### Added diff --git a/api/custom/purge.go b/api/custom/purge.go index ce786c4a..50b0cbd2 100644 --- a/api/custom/purge.go +++ b/api/custom/purge.go @@ -53,6 +53,7 @@ func PurgeIndividualRecord(r *http.Request, rctx rcontext.RequestContext, user _ server := _routers.GetParam("server", r) mediaId := _routers.GetParam("mediaId", r) + additionalMediaIds := r.URL.Query()["id"] if !_routers.ServerNameRegex.MatchString(server) { return _responses.BadRequest("invalid server ID") @@ -63,12 +64,23 @@ func PurgeIndividualRecord(r *http.Request, rctx rcontext.RequestContext, user _ "mediaId": mediaId, }) - _, err := task_runner.PurgeMedia(rctx, authCtx, &task_runner.QuarantineThis{ + records := make([]*task_runner.QuarantineThis, 0) + records = append(records, &task_runner.QuarantineThis{ Single: &task_runner.QuarantineRecord{ Origin: server, MediaId: mediaId, }, }) + for _, id := range additionalMediaIds { + records = append(records, &task_runner.QuarantineThis{ + Single: &task_runner.QuarantineRecord{ + Origin: server, + MediaId: id, + }, + }) + } + + _, err := task_runner.PurgeMedia(rctx, authCtx, records) if err != nil { if errors.Is(err, common.ErrWrongUser) { return _responses.AuthFailed() @@ -101,9 +113,9 @@ func PurgeQuarantined(r *http.Request, rctx rcontext.RequestContext, user _apime return _responses.InternalServerError("error fetching media records") } - mxcs, err := task_runner.PurgeMedia(rctx, authCtx, &task_runner.QuarantineThis{ + mxcs, err := task_runner.PurgeMedia(rctx, authCtx, []*task_runner.QuarantineThis{{ DbMedia: affected, - }) + }}) if err != nil { if errors.Is(err, common.ErrWrongUser) { return _responses.AuthFailed() @@ -154,9 +166,9 @@ func PurgeOldMedia(r *http.Request, rctx rcontext.RequestContext, user _apimeta. return _responses.InternalServerError("error fetching media records") } - mxcs, err := task_runner.PurgeMedia(rctx, &task_runner.PurgeAuthContext{}, &task_runner.QuarantineThis{ + mxcs, err := task_runner.PurgeMedia(rctx, &task_runner.PurgeAuthContext{}, []*task_runner.QuarantineThis{{ DbMedia: records, - }) + }}) if err != nil { if errors.Is(err, common.ErrWrongUser) { return _responses.AuthFailed() @@ -211,9 +223,9 @@ func PurgeUserMedia(r *http.Request, rctx rcontext.RequestContext, user _apimeta return _responses.InternalServerError("error fetching media records") } - mxcs, err := task_runner.PurgeMedia(rctx, authCtx, &task_runner.QuarantineThis{ + mxcs, err := task_runner.PurgeMedia(rctx, authCtx, []*task_runner.QuarantineThis{{ DbMedia: records, - }) + }}) if err != nil { if errors.Is(err, common.ErrWrongUser) { return _responses.AuthFailed() @@ -284,9 +296,9 @@ func PurgeRoomMedia(r *http.Request, rctx rcontext.RequestContext, user _apimeta mxcs = append(mxcs, allMedia.RemoteMxcs...) } - mxcs2, err := task_runner.PurgeMedia(rctx, authCtx, &task_runner.QuarantineThis{ + mxcs2, err := task_runner.PurgeMedia(rctx, authCtx, []*task_runner.QuarantineThis{{ MxcUris: mxcs, - }) + }}) if err != nil { if errors.Is(err, common.ErrWrongUser) { return _responses.AuthFailed() @@ -338,9 +350,9 @@ func PurgeDomainMedia(r *http.Request, rctx rcontext.RequestContext, user _apime return _responses.InternalServerError("error fetching media records") } - mxcs, err := task_runner.PurgeMedia(rctx, authCtx, &task_runner.QuarantineThis{ + mxcs, err := task_runner.PurgeMedia(rctx, authCtx, []*task_runner.QuarantineThis{{ DbMedia: records, - }) + }}) if err != nil { if errors.Is(err, common.ErrWrongUser) { return _responses.AuthFailed() diff --git a/docs/admin.md b/docs/admin.md index 2dfa26c7..3295d58b 100644 --- a/docs/admin.md +++ b/docs/admin.md @@ -44,7 +44,7 @@ URL: `POST /_matrix/media/unstable/admin/purge/quarantined?access_token=your_acc This will delete all media that has previously been quarantined, local or remote. If called by a homeserver administrator (who is not a repository administrator), only content quarantined for their domain will be purged. -#### Purge individual record +#### Purge individual record (batch of records / single record) URL: `POST /_matrix/media/unstable/admin/purge//?access_token=your_access_token` @@ -52,6 +52,8 @@ URL: `POST /_matrix/media/unstable/admin/purge//?access_token= This will delete the media record, regardless of it being local or remote. Can be called by homeserver administrators and the uploader to delete it. +For a batch of records, use the same endpoint as above, but specifying one or more `?id=` query parameters. + #### Purge media uploaded by user URL: `POST /_matrix/media/unstable/admin/purge/user/?before_ts=1234567890&access_token=your_access_token` (`before_ts` is in milliseconds) diff --git a/tasks/task_runner/purge.go b/tasks/task_runner/purge.go index 4574a5cb..a1758da8 100644 --- a/tasks/task_runner/purge.go +++ b/tasks/task_runner/purge.go @@ -30,10 +30,16 @@ func (c *PurgeAuthContext) canAffect(media *database.DbMedia) bool { return true } -func PurgeMedia(ctx rcontext.RequestContext, authContext *PurgeAuthContext, toHandle *QuarantineThis) ([]string, error) { - records, err := resolveMedia(ctx, "", toHandle) - if err != nil { - return nil, err +func PurgeMedia(ctx rcontext.RequestContext, authContext *PurgeAuthContext, toHandles []*QuarantineThis) ([]string, error) { + records := make([]*database.DbMedia, 0) + + for _, toHandle := range toHandles { + record, err := resolveMedia(ctx, "", toHandle) + if err != nil { + return nil, err + } + + records = append(records, record...) } // Check auth on all records before actually processing them