diff --git a/cmd/admin-datasource-rehash.go b/cmd/admin-datasource-rehash.go new file mode 100644 index 0000000000..3dfa807a7c --- /dev/null +++ b/cmd/admin-datasource-rehash.go @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2019-2021. Abstrium SAS + * This file is part of Pydio Cells. + * + * Pydio Cells is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Pydio Cells is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with Pydio Cells. If not, see . + * + * The latest code can be found at . + */ + +package cmd + +import ( + "os" + "path" + + "github.com/manifoldco/promptui" + "github.com/spf13/cobra" + "google.golang.org/protobuf/types/known/anypb" + + "github.com/pydio/cells/v4/common" + "github.com/pydio/cells/v4/common/client/grpc" + "github.com/pydio/cells/v4/common/proto/jobs" + "github.com/pydio/cells/v4/common/proto/service" + "github.com/pydio/cells/v4/common/proto/tree" + "github.com/pydio/cells/v4/common/utils/uuid" +) + +var ( + rehashDsName string + rehashPath string + rehashUserName string + rehashMaxConcurrency int + rehashForceRecompute bool + rehashTimeout string +) + +var dsRehashCmd = &cobra.Command{ + Use: "rehash", + Short: "Trigger rehash for a structured datasource", + Long: ` +DESCRIPTION + + Look up for files where x-cells-hash is missing and recompute them. This operation is launched in scheduler and can take + some time (and CPU). + +EXAMPLES + + 1. To trigger the rehashing of "pydiods1" datasource: + $ ` + os.Args[0] + ` admin datasource rehash --datasource=pydiods1 + + 2. Process only the folder/subfolder data : + $ ` + os.Args[0] + ` admin datasource rehash --datasource=pydiods1 --path=folder/subfolder + +`, + Run: func(cmd *cobra.Command, args []string) { + if rehashDsName == "" || rehashUserName == "" { + cmd.Println("Please provide at least a datasource name (--datasource) and an admin user name") + cmd.Help() + return + } + + params := map[string]string{} + if rehashForceRecompute { + params["forceRecompute"] = "true" + } + ap, _ := anypb.New(&tree.Query{ + Type: tree.NodeType_LEAF, + PathPrefix: []string{path.Join(rehashDsName, rehashPath)}, + }) + + jobClient := jobs.NewJobServiceClient(grpc.GetClientConnFromCtx(ctx, common.ServiceJobs)) + job := &jobs.Job{ + ID: uuid.New(), + Owner: rehashUserName, + Label: "Recompute Cells Hash", + AutoStart: true, + AutoClean: true, + MaxConcurrency: int32(rehashMaxConcurrency), + Actions: []*jobs.Action{ + { + ID: "actions.tree.cells-hash", + Parameters: params, + NodesSelector: &jobs.NodesSelector{ + Query: &service.Query{SubQueries: []*anypb.Any{ap}}, + Label: "Files selection", + }, + }, + }, + Timeout: "30m", + } + + if _, err := jobClient.PutJob(ctx, &jobs.PutJobRequest{Job: job}); err != nil { + cmd.Println(promptui.IconBad + " [ERROR] " + err.Error()) + } else { + cmd.Println(promptui.IconGood + " [SUCCESS] Posted job for recomputing hashes on all files") + } + + }, +} + +func init() { + dsRehashCmd.PersistentFlags().StringVarP(&rehashDsName, "datasource", "d", "pydiods1", "Name of datasource to process") + dsRehashCmd.PersistentFlags().StringVarP(&rehashUserName, "username", "u", "", "Username under which the job will be executed (generally admin)") + dsRehashCmd.PersistentFlags().StringVarP(&rehashPath, "path", "p", "", "Restrict operation to a specific folder") + dsRehashCmd.PersistentFlags().StringVarP(&rehashTimeout, "timeout", "t", "30m", "Maximum job duration") + dsRehashCmd.PersistentFlags().IntVarP(&rehashMaxConcurrency, "concurrency", "c", 10, "Maximum concurrency for computing files hashes") + dsRehashCmd.PersistentFlags().BoolVarP(&rehashForceRecompute, "force", "f", false, "Force recomputing hash if it does not already exists") + DataSourceCmd.AddCommand(dsRehashCmd) +} diff --git a/scheduler/actions/tree/cells-hash.go b/scheduler/actions/tree/cells-hash.go index 7357c74e1e..1401bbfab8 100644 --- a/scheduler/actions/tree/cells-hash.go +++ b/scheduler/actions/tree/cells-hash.go @@ -23,6 +23,8 @@ package tree import ( "context" "encoding/hex" + "io" + "github.com/pydio/cells/v4/common" "github.com/pydio/cells/v4/common/client/grpc" "github.com/pydio/cells/v4/common/forms" @@ -34,7 +36,6 @@ import ( "github.com/pydio/cells/v4/common/utils/hasher/simd" "github.com/pydio/cells/v4/scheduler/actions" "github.com/pydio/cells/v4/scheduler/actions/tools" - "io" ) var ( @@ -43,24 +44,40 @@ var ( type CellsHashAction struct { tools.ScopedRouterConsumer + forceRecompute string } func (c *CellsHashAction) GetDescription(lang ...string) actions.ActionDescription { return actions.ActionDescription{ ID: cellsHashActionName, - Label: "Compute Internal Hash", + Label: "Compute Hash", Icon: "pound-box", Category: actions.ActionCategoryTree, Description: "Compute file signature using Cells internal algorithm", - InputDescription: "Multiple selection of files or folders", - OutputDescription: "Updated selection of files or folders", + InputDescription: "Multiple selection of files", + OutputDescription: "Updated selection of files", SummaryTemplate: "", - HasForm: false, + HasForm: true, } } func (c *CellsHashAction) GetParametersForm() *forms.Form { - return &forms.Form{} + return &forms.Form{ + Groups: []*forms.Group{ + { + Fields: []forms.Field{ + &forms.FormField{ + Name: "forceRecompute", + Type: forms.ParamBool, + Label: "Force Recompute", + Description: "Recompute X-Cells-Hash even if it already exists", + Default: false, + Mandatory: false, + }, + }, + }, + }, + } } // GetName returns this action unique identifier @@ -70,6 +87,9 @@ func (c *CellsHashAction) GetName() string { // Init passes parameters to the action func (c *CellsHashAction) Init(job *jobs.Job, action *jobs.Action) error { + if b, o := action.Parameters["forceRecompute"]; o { + c.forceRecompute = b + } return nil } @@ -80,6 +100,8 @@ func (c *CellsHashAction) Run(ctx context.Context, channels *actions.RunnableCha return input.WithIgnore(), nil // Ignore } + forceRecompute, _ := jobs.EvaluateFieldBool(ctx, input, c.forceRecompute) + ct, cli, e := c.GetHandler(ctx) if e != nil { return input.WithError(e), e @@ -88,11 +110,19 @@ func (c *CellsHashAction) Run(ctx context.Context, channels *actions.RunnableCha mc := tree.NewNodeReceiverClient(grpc.GetClientConnFromCtx(c.GetRuntimeContext(), common.ServiceMeta)) var outnodes []*tree.Node for _, node := range input.Nodes { - resp, er := cli.ReadNode(ctx, &tree.ReadNodeRequest{Node: node}) - if er != nil { - return input.WithError(er), er + if node.Etag == "" { + // Reload node if necessary + resp, er := cli.ReadNode(ctx, &tree.ReadNodeRequest{Node: node}) + if er != nil { + return input.WithError(er), er + } + node = resp.GetNode() + } + if !forceRecompute && node.GetStringMeta(common.MetaNamespaceHash) != "" { + // Meta already exists, do not recompute + continue } - rc, er := cli.GetObject(ctx, resp.GetNode(), &models.GetRequestData{Length: resp.Node.GetSize()}) + rc, er := cli.GetObject(ctx, node, &models.GetRequestData{Length: node.GetSize()}) if er != nil { return input.WithError(er), er } @@ -103,16 +133,16 @@ func (c *CellsHashAction) Run(ctx context.Context, channels *actions.RunnableCha } rc.Close() hash := hex.EncodeToString(bh.Sum(nil)) - n := resp.Node.Clone() + n := node.Clone() n.MetaStore = make(map[string]string) n.MustSetMeta(common.MetaNamespaceHash, hash) if _, er = mc.UpdateNode(ctx, &tree.UpdateNodeRequest{From: n, To: n}); er != nil { return input.WithError(er), er } else { - log.TasksLogger(ctx).Info("Successfully updated hash on node " + n.GetPath() + ": " + hash) + log.TasksLogger(ctx).Info("Computed hash for " + n.GetPath() + ": " + hash) } - resp.Node.MustSetMeta(common.MetaNamespaceHash, hash) - outnodes = append(outnodes, resp.Node) + node.MustSetMeta(common.MetaNamespaceHash, hash) + outnodes = append(outnodes, node) } // Reset