Skip to content

Commit

Permalink
⚡ Benchmark using the default blake hash
Browse files Browse the repository at this point in the history
  • Loading branch information
hardbyte committed Apr 24, 2023
1 parent 188bb18 commit b5b169c
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 3 deletions.
74 changes: 74 additions & 0 deletions clkhash/data/randomnames-schema-v2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
{
"version": 3,
"clkConfig": {
"l": 1024,
"kdf": {
"type": "HKDF",
"hash": "SHA256",
"salt": "SCbL2zHNnmsckfzchsNkZY9XoHk96P/G5nUBrM7ybymlEFsMV6PAeDZCNp3rfNUPCtLDMOGQHG4pCQpfhiHCyA==",
"info": "c2NoZW1hX2V4YW1wbGU=",
"keySize": 64
}
},
"features": [
{
"identifier": "INDEX",
"ignored": true
},
{
"identifier": "NAME freetext",
"format": {
"type": "string",
"encoding": "utf-8",
"case": "mixed",
"minLength": 3
},
"hashing": {
"comparison": {
"type": "ngram",
"n": 2
},
"strategy": {
"bitsPerToken": 15
},
"hash": {"type": "blakeHash"}
}
},
{
"identifier": "DOB YYYY/MM/DD",
"format": {
"type": "date",
"description": "Numbers separated by slashes, in the year, month, day order",
"format": "%Y/%m/%d"
},
"hashing": {
"comparison": {
"type": "ngram",
"n": 1,
"positional": true
},
"strategy": {
"bitsPerToken": 30
},
"hash": {"type": "blakeHash"}
}
},
{
"identifier": "GENDER M or F",
"format": {
"type": "enum",
"values": ["M", "F"]
},
"hashing": {
"comparison": {
"type": "ngram",
"n": 1
},
"strategy": {
"bitsPerToken": 60
},
"hash": {"type": "blakeHash"}
}
}
]
}
2 changes: 1 addition & 1 deletion clkhash/randomnames.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class NameList:
""" Randomly generated PII records.
"""

randomname_schema_bytes = pkgutil.get_data('clkhash', 'data/randomnames-schema.json')
randomname_schema_bytes = pkgutil.get_data('clkhash', 'data/randomnames-schema-v2.json')
if randomname_schema_bytes is None:
raise Exception("Couldn't locate package data. Please file a bug report.")
randomname_schema = json.loads(randomname_schema_bytes.decode())
Expand Down
4 changes: 2 additions & 2 deletions tests/test_e2e_hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@ class TestV2(unittest.TestCase):
def test_compare_v1_v2_and_v3(self):
pii = randomnames.NameList(100).names
schema_v3 = randomnames.NameList.SCHEMA
# this v2 schema should be equivalent to the above v3 schema
# this v2 schema is no longer equivalent to the v3 schema as the hash type changed to blakeHash
schema_v2 = _test_schema('randomnames-schema-v2.json')
schema_v1 = _test_schema('randomnames-schema-v1.json')
secret = 'secret'
for clkv1, clkv2, clkv3 in zip(clk.generate_clks(pii, schema_v1, secret),
clk.generate_clks(pii, schema_v2, secret),
clk.generate_clks(pii, schema_v3, secret)):
self.assertEqual(clkv1, clkv2)
self.assertEqual(clkv1, clkv3)
self.assertNotEqual(clkv1, clkv3)

def test_compare_strategies(self):
def mkSchema(hashing_properties):
Expand Down

0 comments on commit b5b169c

Please sign in to comment.