forked from starlinglab/integrity-v2
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[preprocessor/folder] Add folder preprocessor (#6)
* [preprocessor/folder] add folder scanner with config and hashing * [preprocessor/folder] add file watcher * [preprocessor/folder] add post to webhook client * [preprocessor/folder] change attribute key name to snake case * [preprocessor/folder] use net/http for media_type detection * [preprocessor/folder] add preprocessorfolder to main.go * [preprocessor/folder] add date_created to file metadata * [preprocessor/folder] update argument for PostFileToWebHook * [preprocessor/folder] add function comments * [preprocessor/folder] change date_created to time_created * [preprocessor/folder] optimize memory usage for hash calculation * [preprocessor/folder] update function naming, comment and package name * [preprocessor/folder] fix lint error * [preprocessor/folder] ensure utc timestamp * [webhook] calculate file hashes * [preprocessor/folder] use filepath.WalkDir instead of filepath.Walk * [preprocessor/folder] use log instead of fmt * [preprocessor/folder] change fsnotify to rjeczalik/notify * [preprocessor/folder] throw error on sync folder not set and unify log format * [preprocessor/folder] run file watcher handler in coroutine
- Loading branch information
1 parent
66adbf5
commit e6e42f5
Showing
10 changed files
with
235 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
package main | ||
|
||
import ( | ||
"os" | ||
|
||
folder_preprocessor "github.com/starlinglab/integrity-v2/preprocessor/folder" | ||
"github.com/starlinglab/integrity-v2/util" | ||
) | ||
|
||
func main() { | ||
util.Fatal(folder_preprocessor.Run(os.Args[1:])) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
package folder | ||
|
||
import ( | ||
"fmt" | ||
"net/http" | ||
"os" | ||
"path/filepath" | ||
"slices" | ||
"time" | ||
|
||
"github.com/starlinglab/integrity-v2/config" | ||
"github.com/starlinglab/integrity-v2/webhook" | ||
) | ||
|
||
// getFileMetadata calculates and returns a map of attributes for a file | ||
func getFileMetadata(filePath string) (map[string]any, error) { | ||
file, err := os.Open(filePath) | ||
if err != nil { | ||
return nil, err | ||
} | ||
defer file.Close() | ||
fileInfo, err := file.Stat() | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
buffer := make([]byte, 512) | ||
n, err := file.Read(buffer) | ||
if err != nil { | ||
return nil, err | ||
} | ||
mediaType := http.DetectContentType(buffer[:n]) | ||
_, err = file.Seek(0, 0) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return map[string]any{ | ||
"media_type": mediaType, | ||
"file_name": fileInfo.Name(), | ||
"last_modified": fileInfo.ModTime().UTC().Format(time.RFC3339), | ||
"time_created": fileInfo.ModTime().UTC().Format(time.RFC3339), | ||
}, nil | ||
} | ||
|
||
// handleNewFile posts a new file and its metadata to the webhook server, | ||
// and returns the CID of the file according to the server. | ||
func handleNewFile(filePath string) (string, error) { | ||
metadata, err := getFileMetadata(filePath) | ||
if err != nil { | ||
return "", fmt.Errorf("error getting metadata for file %s: %v", filePath, err) | ||
} | ||
resp, err := webhook.PostFileToWebHook(filePath, metadata, webhook.PostGenericWebhookOpt{}) | ||
if err != nil { | ||
return "", fmt.Errorf("error posting metadata for file %s: %v", filePath, err) | ||
} | ||
return resp.Cid, nil | ||
} | ||
|
||
// shouldIncludeFile reports whether the file should be included in the processing | ||
func shouldIncludeFile(fileName string) bool { | ||
whiteListExtension := config.GetConfig().FolderPreprocessor.FileExtensions | ||
if fileName[0] == '.' { | ||
return false | ||
} | ||
fileExt := filepath.Ext(fileName) | ||
if fileExt == ".partial" { | ||
return false | ||
} | ||
if slices.Contains(whiteListExtension, fileExt) { | ||
return true | ||
} | ||
return false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
package folder | ||
|
||
import ( | ||
"fmt" | ||
"log" | ||
"os" | ||
"path/filepath" | ||
|
||
"github.com/rjeczalik/notify" | ||
"github.com/starlinglab/integrity-v2/config" | ||
) | ||
|
||
// scanSyncDirectory scans a path under the sync directory and returns a list of files | ||
func scanSyncDirectory(subPath string) ([]string, error) { | ||
scanRoot := config.GetConfig().FolderPreprocessor.SyncFolderRoot | ||
if scanRoot == "" { | ||
return nil, fmt.Errorf("sync folder root not set") | ||
} | ||
scanPath := filepath.Join(scanRoot, subPath) | ||
fileList := []string{} | ||
err := filepath.WalkDir(scanPath, func(path string, info os.DirEntry, err error) error { | ||
if err != nil { | ||
return err | ||
} | ||
if shouldIncludeFile(info.Name()) { | ||
fileList = append(fileList, path) | ||
log.Println("found: " + path) | ||
return nil | ||
} | ||
return nil | ||
}) | ||
return fileList, err | ||
} | ||
|
||
func Run(args []string) error { | ||
// Scan whole sync directory | ||
fileList, err := scanSyncDirectory("") | ||
if err != nil { | ||
return err | ||
} | ||
for _, filePath := range fileList { | ||
cid, err := handleNewFile(filePath) | ||
if err != nil { | ||
log.Println(err) | ||
} else { | ||
log.Printf("file %s uploaded to webhook with CID %s\n", filePath, cid) | ||
} | ||
} | ||
|
||
// Init directory watcher | ||
c := make(chan notify.EventInfo, 1) | ||
scanRoot := config.GetConfig().FolderPreprocessor.SyncFolderRoot | ||
err = notify.Watch(scanRoot+"/...", c, notify.Create, notify.Rename) | ||
if err != nil { | ||
return err | ||
} | ||
defer notify.Stop(c) | ||
|
||
for { | ||
ei := <-c | ||
event := ei.Event() | ||
if event == notify.Rename || event == notify.Create { | ||
go func() { | ||
filePath := ei.Path() | ||
file, err := os.Open(filePath) | ||
if err != nil { | ||
// File may be moved away for notify.Rename | ||
return | ||
} | ||
fileInfo, err := file.Stat() | ||
if err != nil { | ||
log.Println("error getting file info:", err) | ||
return | ||
} | ||
if shouldIncludeFile(fileInfo.Name()) { | ||
cid, err := handleNewFile(filePath) | ||
if err != nil { | ||
log.Println(err) | ||
} else { | ||
log.Printf("file %s uploaded to webhook with CID %s\n", filePath, cid) | ||
} | ||
} | ||
file.Close() | ||
}() | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters