Skip to content

clipmenud: add option to deduplicate entries #227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/clipmenud.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,9 @@ static uint64_t store_clip(struct clip_text *ct) {
dbg("Possible partial of last clip, replacing\n");
expect(cs_replace(&cs, CS_ITER_NEWEST_FIRST, 0, ct->data, &hash) == 0);
} else {
expect(cs_add(&cs, ct->data, &hash) == 0);
expect(cs_add(&cs, ct->data, &hash,
cfg.deduplicate ? CS_DUPE_KEEP_LAST : CS_DUPE_KEEP_ALL) ==
0);
}

free_clip_text(&last_text);
Expand Down
2 changes: 2 additions & 0 deletions src/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,8 @@ int config_setup_internal(FILE *file, struct config *cfg) {
{"max_clips_batch", "CM_MAX_CLIPS_BATCH", &cfg->max_clips_batch,
convert_positive_int, "100", 0},
{"oneshot", "CM_ONESHOT", &cfg->oneshot, convert_positive_int, "0", 0},
{"deduplicate", "CM_DEDUPLICATE", &cfg->deduplicate, convert_bool, "0",
0},
{"own_clipboard", "CM_OWN_CLIPBOARD", &cfg->own_clipboard, convert_bool,
"0", 0},
{"selections", "CM_SELECTIONS", &cfg->selections, convert_selections,
Expand Down
1 change: 1 addition & 0 deletions src/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct config {
int max_clips;
int max_clips_batch;
int oneshot;
bool deduplicate;
bool own_clipboard;
struct selection *owned_selections;
struct selection *selections;
Expand Down
51 changes: 39 additions & 12 deletions src/store.c
Original file line number Diff line number Diff line change
Expand Up @@ -401,18 +401,20 @@ static int _must_use_ _nonnull_ cs_snip_add(struct clip_store *cs,
* @cs: The clip store to operate on
* @hash: The hash of the content to add
* @content: The content to add to the file
* @dupe_policy: If set to CS_DUPE_KEEP_LAST, will return with -EEXIST when
* trying to insert duplicate entry.
*/
static int _must_use_ _nonnull_ cs_content_add(struct clip_store *cs,
uint64_t hash,
const char *content) {
static int _must_use_ _nonnull_
cs_content_add(struct clip_store *cs, uint64_t hash, const char *content,
enum cs_dupe_policy dupe_policy) {
bool dupe = false;

char dir_path[CS_HASH_STR_MAX];
snprintf(dir_path, sizeof(dir_path), PRI_HASH, hash);

int ret = mkdirat(cs->content_dir_fd, dir_path, 0700);
if (ret < 0) {
if (errno != EEXIST) {
if (errno != EEXIST || dupe_policy == CS_DUPE_KEEP_LAST) {
return negative_errno();
}
dupe = true;
Expand Down Expand Up @@ -525,28 +527,53 @@ int cs_content_get(struct clip_store *cs, uint64_t hash,
return 0;
}

/**
* Move the entry with the specified hash to the newest slot.
*
* @cs: The clip store to operate on
* @hash: The hash of the entry to move
*/
static int cs_make_newest(struct clip_store *cs, uint64_t hash) {
_drop_(cs_unref) struct ref_guard guard = cs_ref(cs);
if (guard.status < 0) {
return guard.status;
}

for (int i = 0; i < (int)cs->local_nr_snips; ++i) {
if (cs->snips[i].hash == hash) {
struct cs_snip tmp = cs->snips[i];
memmove(cs->snips + i, cs->snips + i + 1,
(cs->local_nr_snips - (i + 1)) * sizeof(*cs->snips));
cs->snips[cs->local_nr_snips - 1] = tmp;
return 0;
}
}
die("unreachable");
}

/**
* Add a new content entry to the clip store and content directory.
*
* @cs: The clip store to operate on
* @content: The content to add
* @out_hash: Output for the generated hash, or NULL
* @dupe_policy: Policy to use for duplicate entries
*/
int cs_add(struct clip_store *cs, const char *content, uint64_t *out_hash) {
int cs_add(struct clip_store *cs, const char *content, uint64_t *out_hash,
enum cs_dupe_policy dupe_policy) {
uint64_t hash = djb64_hash(content);
char line[CS_SNIP_LINE_SIZE];
size_t nr_lines = first_line(content, line);

int ret = cs_content_add(cs, hash, content);
if (ret < 0) {
return ret;
}

if (out_hash) {
*out_hash = hash;
}

return cs_snip_add(cs, hash, line, nr_lines);
int ret = cs_content_add(cs, hash, content, dupe_policy);
if (ret == -EEXIST && dupe_policy == CS_DUPE_KEEP_LAST) {
return cs_make_newest(cs, hash);
}
return ret ? ret : cs_snip_add(cs, hash, line, nr_lines);
}

/**
Expand Down Expand Up @@ -767,7 +794,7 @@ int cs_replace(struct clip_store *cs, enum cs_iter_direction direction,
size_t nr_lines = first_line(content, line);
uint64_t hash = djb64_hash(content);
cs_snip_update(snip, hash, line, nr_lines);
ret = cs_content_add(cs, hash, content);
ret = cs_content_add(cs, hash, content, CS_DUPE_KEEP_ALL);
if (ret) {
return ret;
}
Expand Down
14 changes: 13 additions & 1 deletion src/store.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,17 @@ enum cs_remove_action {
CS_ACTION_STOP = BIT(2),
};

/**
* What to do when there's a duplicate entry.
*
* @CS_DUPE_KEEP_ALL: Keep all duplicate entries.
* @CS_DUPE_KEEP_LAST: Only keep the newest, do not insert duplicate entries.
*/
enum cs_dupe_policy {
CS_DUPE_KEEP_ALL,
CS_DUPE_KEEP_LAST,
};

struct ref_guard _must_use_ _nonnull_ cs_ref(struct clip_store *cs);
void _nonnull_ cs_unref(struct clip_store *cs);
void _nonnull_ drop_cs_unref(struct ref_guard *guard);
Expand All @@ -156,7 +167,8 @@ void drop_cs_destroy(struct clip_store *cs);
int _must_use_ _nonnull_ cs_content_get(struct clip_store *cs, uint64_t hash,
struct cs_content *content);
int _must_use_ _nonnull_n_(1)
cs_add(struct clip_store *cs, const char *content, uint64_t *out_hash);
cs_add(struct clip_store *cs, const char *content, uint64_t *out_hash,
enum cs_dupe_policy dupe_policy);
bool _must_use_ _nonnull_ cs_snip_iter(struct ref_guard *guard,
enum cs_iter_direction direction,
struct cs_snip **snip);
Expand Down
88 changes: 81 additions & 7 deletions tests/test_store.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#undef NDEBUG

#include <assert.h>
#include <dirent.h>
#include <errno.h>
Expand Down Expand Up @@ -151,7 +153,7 @@ static void add_ten_snips(struct clip_store *cs) {
for (char i = 0; i < 10; i++) {
char num[8];
snprintf(num, sizeof(num), "%d", i);
int ret = cs_add(cs, num, NULL);
int ret = cs_add(cs, num, NULL, CS_DUPE_KEEP_ALL);
assert(ret == 0);
}
}
Expand Down Expand Up @@ -203,7 +205,7 @@ static bool test__cs_add(void) {
snprintf(num, sizeof(num), "%d", i);

uint64_t hash;
int ret = cs_add(&cs, num, &hash);
int ret = cs_add(&cs, num, &hash, CS_DUPE_KEEP_ALL);
t_assert(ret == 0);

_drop_(cs_content_unmap) struct cs_content content;
Expand Down Expand Up @@ -358,7 +360,7 @@ static bool test__cs_add__exceeds_snip_line_size(void) {
memset(long_content, 'A', sizeof(long_content));
long_content[sizeof(long_content) - 1] = '\0';

int ret = cs_add(&cs, long_content, NULL);
int ret = cs_add(&cs, long_content, NULL, CS_DUPE_KEEP_ALL);
t_assert(ret == 0);

struct cs_snip *snip = NULL;
Expand Down Expand Up @@ -396,13 +398,13 @@ static bool test__cs_add__around_alloc_batch_threshold(void) {
_drop_(teardown_test) struct clip_store cs = setup_test();

for (size_t i = 0; i < CS_SNIP_ALLOC_BATCH - 1; i++) {
int ret = cs_add(&cs, "test content", NULL);
int ret = cs_add(&cs, "test content", NULL, CS_DUPE_KEEP_ALL);
assert(ret == 0);
}
t_assert(cs.header->nr_snips == CS_SNIP_ALLOC_BATCH - 1);

/* Add one more entry to exceed the batch threshold */
t_assert(cs_add(&cs, "test content", NULL) == 0);
t_assert(cs_add(&cs, "test content", NULL, CS_DUPE_KEEP_ALL) == 0);
t_assert(cs.header->nr_snips == CS_SNIP_ALLOC_BATCH);
t_assert(cs.header->nr_snips_alloc >= CS_SNIP_ALLOC_BATCH);

Expand All @@ -414,7 +416,7 @@ static bool test__cs_trim__no_remove_when_still_referenced(void) {

uint64_t hash;
for (size_t i = 0; i < 2; i++) {
int ret = cs_add(&cs, "test content", &hash);
int ret = cs_add(&cs, "test content", &hash, CS_DUPE_KEEP_ALL);
t_assert(ret == 0);
}

Expand Down Expand Up @@ -547,7 +549,7 @@ static bool test__synchronisation(void) {
t_assert(ret == 0);

uint64_t hash;
ret = cs_add(&cs1, "test content", &hash);
ret = cs_add(&cs1, "test content", &hash, CS_DUPE_KEEP_ALL);
t_assert(ret == 0);

bool found = false;
Expand Down Expand Up @@ -578,6 +580,75 @@ static bool test__synchronisation(void) {
return true;
}

static bool test__cs_add__dupe_keep_all(void) {
_drop_(teardown_test) struct clip_store cs = setup_test();

uint64_t hash1, hash2;
int ret = cs_add(&cs, "duplicate", &hash1, CS_DUPE_KEEP_ALL);
t_assert(ret == 0);
ret = cs_add(&cs, "duplicate", &hash2, CS_DUPE_KEEP_ALL);
t_assert(ret == 0);
t_assert(hash1 == hash2);
t_assert(cs.header->nr_snips == 2);

_drop_(cs_unref) struct ref_guard guard = cs_ref(&cs);
struct cs_snip *snip = NULL;
bool iter_ret = cs_snip_iter(&guard, CS_ITER_OLDEST_FIRST, &snip);
t_assert(iter_ret == true);
t_assert(snip->hash == hash1);
iter_ret = cs_snip_iter(&guard, CS_ITER_OLDEST_FIRST, &snip);
t_assert(iter_ret == true);
t_assert(snip->hash == hash2);

return true;
}

static bool test__cs_add__dupe_keep_last(void) {
_drop_(teardown_test) struct clip_store cs = setup_test();

uint64_t hash1, hash2, hash3;
int ret = cs_add(&cs, "duplicate", &hash1, CS_DUPE_KEEP_LAST);
t_assert(ret == 0);
t_assert(cs.header->nr_snips == 1);
ret = cs_add(&cs, "duplicate", &hash2, CS_DUPE_KEEP_LAST);
t_assert(ret == 0);
t_assert(cs.header->nr_snips == 1);
ret = cs_add(&cs, "duplicate", &hash3, CS_DUPE_KEEP_LAST);
t_assert(ret == 0);
t_assert(cs.header->nr_snips == 1);
t_assert(hash1 == hash2);
t_assert(hash1 == hash3);

return true;
}

/* After adding a duplicate entry, ensure the duplicate is moved to the newest
* slot while other entries remain in order. */
static bool test__cs_add__dupe_keep_last_with_multiple_entries(void) {
_drop_(teardown_test) struct clip_store cs = setup_test();

uint64_t hash_a, hash_dup;
int ret = cs_add(&cs, "A", &hash_a, CS_DUPE_KEEP_ALL);
t_assert(ret == 0);
ret = cs_add(&cs, "duplicate", &hash_dup, CS_DUPE_KEEP_ALL);
t_assert(ret == 0);
ret = cs_add(&cs, "B", NULL, CS_DUPE_KEEP_ALL);
t_assert(ret == 0);
t_assert(cs.header->nr_snips == 3);
/* Now add a duplicate entry with KEEP_LAST which should move the duplicate
* to the newest slot */
ret = cs_add(&cs, "duplicate", NULL, CS_DUPE_KEEP_LAST);
t_assert(ret == 0);
t_assert(cs.header->nr_snips == 3);
_drop_(cs_unref) struct ref_guard guard = cs_ref(&cs);
struct cs_snip *snip = NULL;
bool iter_ret = cs_snip_iter(&guard, CS_ITER_NEWEST_FIRST, &snip);
t_assert(iter_ret == true);
t_assert(snip->hash == hash_dup);

return true;
}

int main(void) {
t_run(test__cs_init);
t_run(test__cs_init__bad_size);
Expand All @@ -602,6 +673,9 @@ int main(void) {
t_run(test__first_line__no_final_newline);
t_run(test__first_line__ignore_blank_lines);
t_run(test__first_line__unicode);
t_run(test__cs_add__dupe_keep_all);
t_run(test__cs_add__dupe_keep_last);
t_run(test__cs_add__dupe_keep_last_with_multiple_entries);

return 0;
}