From 433a233148dde2b2a602e84f7bbd26523783ee6f Mon Sep 17 00:00:00 2001 From: Jake L Date: Sat, 28 Sep 2024 03:23:28 -0400 Subject: [PATCH] Update default size threshold to 2048 bytes (#53) update default size threshold to 2048 bytes. The idea is to prevent small payloads from being written as revisit records, as revisit records usually have a large playback cost. 2,048 bytes is seen as a better default for the time being. --- client.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client.go b/client.go index 620a4f4..72134e0 100644 --- a/client.go +++ b/client.go @@ -98,9 +98,9 @@ func NewWARCWritingHTTPClient(HTTPClientSettings HTTPClientSettings) (httpClient httpClient.dedupeOptions = HTTPClientSettings.DedupeOptions httpClient.dedupeHashTable = new(sync.Map) - // Set default deduplication threshold to 1024 bytes + // Set default deduplication threshold to 2048 bytes if httpClient.dedupeOptions.SizeThreshold == 0 { - httpClient.dedupeOptions.SizeThreshold = 1024 + httpClient.dedupeOptions.SizeThreshold = 2048 } // Configure HTTP status code skipping (usually 429)