Skip to content

Commit

Permalink
Merge pull request #8 from penwern/fix-multipart-checksums
Browse files Browse the repository at this point in the history
Fix multipart checksums
  • Loading branch information
Sunday-Crunk authored Nov 20, 2024
2 parents 1546c7f + 45208ae commit afc7ab6
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 83 deletions.
110 changes: 70 additions & 40 deletions src/js/core/ChecksumValidation.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,67 +2,98 @@ import CurateWorkerManager from "./WorkerManager.js";

window.addEventListener("load", () => {
(async () => {
// Wait until UploaderModel is defined
while (typeof UploaderModel === "undefined") {
await new Promise((resolve) => setTimeout(resolve, 100)); // Wait for 100ms before checking again
await new Promise((resolve) => setTimeout(resolve, 100));
}

const workerManager = new CurateWorkerManager();
console.log("WorkerManager initialized");

// Save the original uploadPresigned method to call it later
const originalUploadPresigned =
UploaderModel.UploadItem.prototype.uploadPresigned;

// Override the uploadPresigned method
// Helper function to convert hex string to bytes
function hexToBytes(hex) {
const bytes = new Uint8Array(hex.length / 2);
for (let i = 0; i < hex.length; i += 2) {
bytes[i / 2] = parseInt(hex.substr(i, 2), 16);
}
return bytes;
}

UploaderModel.UploadItem.prototype.uploadPresigned = function () {
// Execute the original method
const originalPromise = originalUploadPresigned.apply(this, arguments);
const multipartThreshold = PydioApi.getMultipartPartSize();

// Attach an event listener to monitor when the upload status changes to complete
const observer = (status) => {
console.log(status);
const observer = async (status) => {
if (status === "loaded") {
// Remove the status observer to prevent memory leaks
this._observers.status.forEach((obs, index) => {
if (obs === observer) this._observers.status.splice(index, 1);
});
console.log("Attempting to generate checksum for: ", this._file);
// Start checksum generation
workerManager
.generateChecksum(this._file)
.then((checksumData) => {
console.log("Generated checksum data:", checksumData);
// Fetch stats after a delay
const delay = Math.min(
5000,
Math.max(500, this._file.size * 0.01)

try {
let finalChecksum;
if (this._file.size > multipartThreshold) {
console.log(
"File exceeds multipart threshold, generating part checksums"
);
const partSize = multipartThreshold;
const parts = Math.ceil(this._file.size / partSize);
const partChecksums = [];
const partDigests = new Uint8Array(16 * parts); // MD5 digest is 16 bytes

for (let i = 0; i < parts; i++) {
const start = i * partSize;
const end = Math.min(start + partSize, this._file.size);
const blob = this._file.slice(start, end);
const checksumData = await workerManager.generateChecksum(blob);
partChecksums.push(checksumData.hash);

// Convert hex checksum to bytes and store in concatenated array
const digestBytes = hexToBytes(checksumData.hash);
partDigests.set(digestBytes, i * 16);
}

// Generate final checksum from concatenated digest bytes
const digestBlob = new Blob([partDigests]);
const finalChecksumData = await workerManager.generateChecksum(
digestBlob
);
finalChecksum = `${finalChecksumData.hash}-${parts}`;

console.log("Generated multipart checksum:", finalChecksum);
} else {
console.log(
"File below multipart threshold, generating single checksum"
);
const checksumData = await workerManager.generateChecksum(
this._file
);
setTimeout(() => {
const p = this._targetNode._path;
const pathSuffix = p.endsWith("/") ? "" : "/";
const parentLabelPart = this._parent._label
? `${this._parent._label}/`
: "";
const filename = `${Curate.workspaces.getOpenWorkspace()}${p}${pathSuffix}${parentLabelPart}${
this._label
}`;
fetchCurateStats(filename, checksumData.hash, 0);
}, delay);
})
.catch((error) => {
console.error("Checksum generation failed:", error);
});
finalChecksum = checksumData.hash;
}

const delay = Math.min(5000, Math.max(500, this._file.size * 0.01));
setTimeout(() => {
const p = this._targetNode._path;
const pathSuffix = p.endsWith("/") ? "" : "/";
const parentLabelPart = this._parent._label
? `${this._parent._label}/`
: "";
const filename = `${Curate.workspaces.getOpenWorkspace()}${p}${pathSuffix}${parentLabelPart}${
this._label
}`;
fetchCurateStats(filename, finalChecksum, 0);
}, delay);
} catch (error) {
console.error("Checksum generation failed:", error);
}
}
};

// Subscribe to the status updates
this._observers.status.push(observer);

return originalPromise;
};

// Function to fetch stats from Curate API
function fetchCurateStats(filePath, expectedChecksum, retryCount) {
Curate.api
.fetchCurate("/a/tree/stats", "POST", {
Expand All @@ -81,14 +112,13 @@ window.addEventListener("load", () => {
});
}

// Function to validate the checksum
function validateChecksum(node, expectedChecksum, filePath, retryCount) {
const maxRetries = 3; // Set maximum retries to 3
const maxRetries = 3;
if (node.Etag === "temporary" && retryCount < maxRetries) {
console.log("Checksum temporary. Retrying...");
setTimeout(() => {
fetchCurateStats(filePath, expectedChecksum, retryCount + 1);
}, 2000); // Retry after 2 seconds
}, 2000);
} else if (node.Etag === expectedChecksum) {
console.log("Checksum validation passed.");
updateMetaField(
Expand Down
109 changes: 66 additions & 43 deletions src/js/core/WorkerManager.js
Original file line number Diff line number Diff line change
@@ -1,69 +1,92 @@
class CurateWorkerManager {
constructor() {
constructor(poolSize = 5) {
this.poolSize = poolSize;
this.workers = new Map(); // Map of active workers
this.taskQueue = [];
this.isProcessing = false;
this.worker = null;
this.currentTasks = new Map(); // Track current task for each worker
}

initWorker() {
if (this.worker) {
this.worker.terminate();
}
const worker = new Worker("/workers/hashWorker.js");
const workerId = crypto.randomUUID();

// Load the worker from jsDelivr
const workerUrl = "/workers/hashWorker.js";
this.worker = new Worker(workerUrl);
console.log("Worker initialized: ", this.worker);
this.setupWorkerHandlers();
}
setupWorkerHandlers() {
this.worker.onmessage = (event) => {
if (event.data.status === "complete" && this.currentResolve) {
this.currentResolve({
file: this.currentFile,
hash: event.data.hash,
name: this.currentFile.name,
});
worker.onmessage = (event) => {
if (event.data.status === "complete") {
const currentTask = this.currentTasks.get(workerId);
if (currentTask) {
currentTask.resolve({
file: currentTask.file,
hash: event.data.hash,
name: currentTask.file.name,
});
this.currentTasks.delete(workerId);
}
this.processNextTask(workerId, worker);
}
this.processNextTask();
};

this.worker.onerror = (event) => {
if (this.currentReject) {
this.currentReject("Worker error: " + event.message);
worker.onerror = (event) => {
const currentTask = this.currentTasks.get(workerId);
if (currentTask) {
currentTask.reject("Worker error: " + event.message);
this.currentTasks.delete(workerId);
}
this.processNextTask();
this.processNextTask(workerId, worker);
};

this.workers.set(workerId, worker);
return workerId;
}

generateChecksum(file) {
return new Promise((resolve, reject) => {
this.taskQueue.push({ file, resolve, reject });
if (!this.isProcessing) {
this.processNextTask();
const task = { file, resolve, reject };
this.taskQueue.push(task);

// Only create a new worker if we have more tasks than workers
// and we haven't reached the pool size limit
if (
this.taskQueue.length > this.workers.size &&
this.workers.size < this.poolSize
) {
const workerId = this.initWorker();
this.processNextTask(workerId, this.workers.get(workerId));
}
// If we have available workers, find one and process the task
else if (this.workers.size > 0) {
for (const [workerId, worker] of this.workers) {
if (!this.currentTasks.has(workerId)) {
this.processNextTask(workerId, worker);
break;
}
}
}
});
}

processNextTask() {
processNextTask(workerId, worker) {
if (this.taskQueue.length > 0) {
if (!this.worker) {
this.initWorker();
}
const task = this.taskQueue.shift();
this.currentResolve = task.resolve;
this.currentReject = task.reject;
this.currentFile = task.file;
this.isProcessing = true;
this.worker.postMessage({ file: task.file, msg: "begin hash" });
} else {
this.isProcessing = false;
if (this.worker) {
this.worker.terminate();
this.worker = null;
}
this.currentTasks.set(workerId, task);
worker.postMessage({ file: task.file, msg: "begin hash" });
} else if (this.currentTasks.size === 0) {
// No more tasks in queue and no running tasks - cleanup workers
this.cleanupWorkers();
}
}

cleanupWorkers() {
for (const [workerId, worker] of this.workers) {
worker.terminate();
}
this.workers.clear();
}

terminate() {
this.cleanupWorkers();
this.taskQueue = [];
this.currentTasks.clear();
}
}

export default CurateWorkerManager;

0 comments on commit afc7ab6

Please sign in to comment.