diff --git a/README.md b/README.md index cb6880d..87e3b35 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ available extremely fast. - ranks addresses explicitly emailed by you higher - configurable output via go templates - uses the most frequent non-empty display name for each email +- display name can be unicode normalized for search purposes - filters common "no reply" addresses, additional filters can be added via regexes - normalizes emails to lower case - ability to add additional email addresses from a command @@ -76,6 +77,7 @@ Available keys: ``` Address Name + NormalizedName: same as Name, but unicode normalized Names Class FrequencyRank @@ -139,7 +141,7 @@ addresses = [ ] filters = ["@spam.(com|org)"] outputpath = "~/.mail/addressbook" -template = "{{.Address}}\t{{.Name}}" +template = "{{.Address}}\t{{.Name}}\t{{.NormalizedName}}" ``` ## Integration @@ -152,11 +154,18 @@ Put something like this in your aerc config (using your favourite grep): address-book-cmd="ugrep -jP -m 100 --color=never %s /home/[myuser]/.cache/maildir-rank-addr/addressbook.tsv" ``` -(`-j` is smart case insensitive, and needs to be combined with `-P` for UTF-8) +(`-j` is smart case insensitive, and needs to be combined with `-P` for UTF-8). + +Since aerc only uses the first two of the tab separated columns any other +column can be added to help with search or to combine with external tools. For +example adding `NormalizedName` as the third column will allow you to type +"arpad", and still find and use the entry for "Árpád X" who uses accents in his +name properly, and "Arpad Y" who conformed to ASCII for some reason. Note that `address-book-cmd` is not executed in the shell, so you need to hard code the path without shell expansion. + # Behind the scenes ## Ranking diff --git a/data.go b/data.go index 3e01c3a..0cb43ab 100644 --- a/data.go +++ b/data.go @@ -7,15 +7,16 @@ import ( ) type AddressData struct { - Address string - Names []string - Class int - FrequencyRank int - RecencyRank int - TotalRank int - ClassCount [3]int - ClassDate [3]int64 - Name string + Address string + Names []string + Class int + FrequencyRank int + RecencyRank int + TotalRank int + ClassCount [3]int + ClassDate [3]int64 + Name string + NormalizedName string } type Config struct { diff --git a/ranking.go b/ranking.go index b8b7022..2470456 100644 --- a/ranking.go +++ b/ranking.go @@ -3,6 +3,10 @@ package main import ( "sort" "strings" + "unicode" + + "golang.org/x/text/transform" + "golang.org/x/text/unicode/norm" ) func getMostFrequent(names []string) string { @@ -31,11 +35,18 @@ func getMostFrequent(names []string) string { return lastname } +func isMn(r rune) bool { + return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks +} + func normalizeAddressNames(aD AddressData) AddressData { if aD.Name != "" { return aD } aD.Name = getMostFrequent(aD.Names) + t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC) + normStr, _, _ := transform.String(t, aD.Name) + aD.NormalizedName = normStr return aD }