From dda5a334e2e68df0052a517af81884766badc0b0 Mon Sep 17 00:00:00 2001 From: NRK Date: Mon, 10 Jun 2024 22:36:09 +0000 Subject: [PATCH] clipmenud: add option to deduplicate entries uses a linear search to find the duplicate and a memmove to move it over to the end. not perticularly efficient since each snip is 256 bytes while we're only interested in the first 8. and so iterating over it linearly like this isn't very cache friendly. the memmove worst case can also be around 250KiB with the default config. Closes: https://github.com/cdown/clipmenu/issues/224 --- src/clipmenud.c | 2 +- src/config.c | 1 + src/config.h | 1 + src/store.c | 34 +++++++++++++++++++++++++++------- src/store.h | 4 ++-- 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/clipmenud.c b/src/clipmenud.c index 3a1f718..2d50b7f 100644 --- a/src/clipmenud.c +++ b/src/clipmenud.c @@ -217,7 +217,7 @@ static uint64_t store_clip(char *text) { dbg("Possible partial of last clip, replacing\n"); expect(cs_replace(&cs, CS_ITER_NEWEST_FIRST, 0, text, &hash) == 0); } else { - expect(cs_add(&cs, text, &hash) == 0); + expect(cs_add(&cs, text, &hash, cfg.deduplicate) == 0); } if (last_text) { diff --git a/src/config.c b/src/config.c index bcbcefa..6d6852c 100644 --- a/src/config.c +++ b/src/config.c @@ -269,6 +269,7 @@ int config_setup_internal(FILE *file, struct config *cfg) { {"max_clips_batch", "CM_MAX_CLIPS_BATCH", &cfg->max_clips_batch, convert_positive_int, "100", 0}, {"oneshot", "CM_ONESHOT", &cfg->oneshot, convert_positive_int, "0", 0}, + {"deduplicate", NULL, &cfg->deduplicate, convert_bool, "1", 0}, {"own_clipboard", "CM_OWN_CLIPBOARD", &cfg->own_clipboard, convert_bool, "0", 0}, {"selections", "CM_SELECTIONS", &cfg->selections, convert_selections, diff --git a/src/config.h b/src/config.h index e464ab2..bbda550 100644 --- a/src/config.h +++ b/src/config.h @@ -43,6 +43,7 @@ struct config { int max_clips; int max_clips_batch; int oneshot; + bool deduplicate; bool own_clipboard; struct selection *owned_selections; struct selection *selections; diff --git a/src/store.c b/src/store.c index eac472d..ccffeaf 100644 --- a/src/store.c +++ b/src/store.c @@ -397,6 +397,7 @@ static int _must_use_ _nonnull_ cs_snip_add(struct clip_store *cs, /** * Add content to the content directory using the hash as the filename. + * Returns 1 if the entry was duplicate and no new entry was inserted. * * @cs: The clip store to operate on * @hash: The hash of the content to add @@ -404,7 +405,8 @@ static int _must_use_ _nonnull_ cs_snip_add(struct clip_store *cs, */ static int _must_use_ _nonnull_ cs_content_add(struct clip_store *cs, uint64_t hash, - const char *content) { + const char *content, + bool deduplicate) { bool dupe = false; char dir_path[CS_HASH_STR_MAX]; @@ -421,7 +423,7 @@ static int _must_use_ _nonnull_ cs_content_add(struct clip_store *cs, char base_file_path[PATH_MAX]; snprintf(base_file_path, sizeof(base_file_path), "%s/1", dir_path); - if (dupe) { + if (dupe && !deduplicate) { // This clip already exists, just create a link for refcounting struct stat st; if (fstatat(cs->content_dir_fd, base_file_path, &st, 0) < 0) { @@ -437,6 +439,23 @@ static int _must_use_ _nonnull_ cs_content_add(struct clip_store *cs, } return 0; + } else if (dupe && deduplicate) { + _drop_(cs_unref) struct ref_guard guard = cs_ref(cs); + if (guard.status < 0) { + return guard.status; + } + + for (int i = 0; i < (int)cs->local_nr_snips; ++i) { + if (cs->snips[i].hash == hash) { + // move cs->snips[i] to the end of the array + struct cs_snip tmp = cs->snips[i]; + memmove(cs->snips + i, cs->snips + i + 1, + (cs->local_nr_snips - (i + 1)) * sizeof(*cs->snips)); + cs->snips[cs->local_nr_snips - 1] = tmp; + return 1; + } + } + return -ENOENT; } // This is a new clip @@ -532,12 +551,13 @@ int cs_content_get(struct clip_store *cs, uint64_t hash, * @content: The content to add * @out_hash: Output for the generated hash, or NULL */ -int cs_add(struct clip_store *cs, const char *content, uint64_t *out_hash) { +int cs_add(struct clip_store *cs, const char *content, uint64_t *out_hash, + bool deduplicate) { uint64_t hash = djb64_hash(content); char line[CS_SNIP_LINE_SIZE]; size_t nr_lines = first_line(content, line); - int ret = cs_content_add(cs, hash, content); + int ret = cs_content_add(cs, hash, content, deduplicate); if (ret < 0) { return ret; } @@ -546,7 +566,7 @@ int cs_add(struct clip_store *cs, const char *content, uint64_t *out_hash) { *out_hash = hash; } - return cs_snip_add(cs, hash, line, nr_lines); + return (ret == 1) ? 0 : cs_snip_add(cs, hash, line, nr_lines); } /** @@ -767,8 +787,8 @@ int cs_replace(struct clip_store *cs, enum cs_iter_direction direction, size_t nr_lines = first_line(content, line); uint64_t hash = djb64_hash(content); cs_snip_update(snip, hash, line, nr_lines); - ret = cs_content_add(cs, hash, content); - if (ret) { + ret = cs_content_add(cs, hash, content, 0); + if (ret < 0) { return ret; } if (out_hash) { diff --git a/src/store.h b/src/store.h index a1bbfaf..5796ff1 100644 --- a/src/store.h +++ b/src/store.h @@ -154,8 +154,8 @@ void drop_cs_content_unmap(struct cs_content *content); void drop_cs_destroy(struct clip_store *cs); int _must_use_ _nonnull_ cs_content_get(struct clip_store *cs, uint64_t hash, struct cs_content *content); -int _must_use_ _nonnull_n_(1) - cs_add(struct clip_store *cs, const char *content, uint64_t *out_hash); +int _must_use_ _nonnull_n_(1) cs_add(struct clip_store *cs, const char *content, + uint64_t *out_hash, bool deduplicate); bool _must_use_ _nonnull_ cs_snip_iter(struct ref_guard *guard, enum cs_iter_direction direction, struct cs_snip **snip);