Skip to content

Commit 3ce2808

Browse files
committed
added longest_prefix() method and corresponding tests
1 parent 239a9ae commit 3ce2808

File tree

3 files changed

+78
-2
lines changed

3 files changed

+78
-2
lines changed

src/dawg.pyx

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,24 @@ cdef class DAWG:
253253

254254
return res
255255

256+
def longest_prefix(self, unicode key):
257+
cdef BaseType index = self.dct.root()
258+
cdef int pos = 1
259+
cdef int lastpos = 0
260+
cdef CharType ch
261+
262+
for ch in key:
263+
if not self.dct.Follow(ch, &index):
264+
break
265+
if self.dct.has_value(index):
266+
lastpos = pos
267+
pos += 1
268+
269+
if lastpos:
270+
return key[:lastpos]
271+
else:
272+
raise KeyError("No prefix found")
273+
256274
def iterprefixes(self, unicode key):
257275
'''
258276
Return a generator with keys of this DAWG that are prefixes of the ``key``.
@@ -802,7 +820,28 @@ cdef class BytesDAWG(CompletionDAWG):
802820
"""
803821
return self._similar_item_values(0, key, self.dct.root(), replaces)
804822

823+
def longest_prefix(self, unicode key):
824+
cdef BaseType index = self.dct.root()
825+
cdef BaseType tmp
826+
cdef BaseType lastindex
827+
cdef int pos = 1
828+
cdef int lastpos = 0
829+
cdef CharType ch
805830

831+
for ch in key:
832+
if not self.dct.Follow(ch, &index):
833+
break
834+
835+
tmp = index
836+
if self.dct.Follow(self._c_payload_separator, &tmp):
837+
lastpos = pos
838+
lastindex = tmp
839+
pos += 1
840+
841+
if lastpos:
842+
return key[:lastpos], self._value_for_index(lastindex)
843+
else:
844+
raise KeyError("No prefix found")
806845

807846
cdef class RecordDAWG(BytesDAWG):
808847
"""
@@ -904,6 +943,26 @@ cdef class IntDAWG(DAWG):
904943
cpdef int b_get_value(self, bytes key):
905944
return self.dct.Find(key)
906945

946+
def longest_prefix(self, unicode key):
947+
cdef BaseType index = self.dct.root()
948+
cdef BaseType lastindex
949+
cdef int pos = 1
950+
cdef int lastpos = 0
951+
cdef CharType ch
952+
953+
for ch in key:
954+
if not self.dct.Follow(ch, &index):
955+
break
956+
957+
if self.dct.has_value(index):
958+
lastpos = pos
959+
lastindex = index
960+
pos += 1
961+
962+
if lastpos:
963+
return key[:lastpos], self.dct.value(lastindex)
964+
else:
965+
raise KeyError("No prefix found")
907966

908967
# FIXME: code duplication.
909968
cdef class IntCompletionDAWG(CompletionDAWG):

tests/test_dawg.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,12 @@ def test_unicode_sorting(self):
8383
# if data is sorted according to unicode rules.
8484
dawg.DAWG([key1, key2])
8585

86-
86+
def test_longest_prefix(self):
87+
d = dawg.DAWG(["a", "as", "asdf"])
88+
assert d.longest_prefix("a") == "a"
89+
assert d.longest_prefix("as") == "as"
90+
assert d.longest_prefix("asd") == "as"
91+
assert d.longest_prefix("asdf") == "asdf"
8792

8893
class TestIntDAWG(object):
8994

@@ -148,6 +153,13 @@ def test_int_value_ranges(self):
148153
with pytest.raises(OverflowError):
149154
self.IntDAWG({'f': 2**32-1})
150155

156+
def test_longest_prefix(self):
157+
d = dawg.IntDAWG([("a", 1), ("as", 2), ("asdf", 3)])
158+
assert d.longest_prefix("a") == ("a", 1)
159+
assert d.longest_prefix("as") == ("as", 2)
160+
assert d.longest_prefix("asd") == ("as", 2)
161+
assert d.longest_prefix("asdf") == ("asdf", 3)
162+
151163

152164
class TestIntCompletionDAWG(TestIntDAWG):
153165
IntDAWG = dawg.IntCompletionDAWG # checks that all tests for IntDAWG pass

tests/test_payload_dawg.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,12 @@ def test_build_error(self):
8383
with pytest.raises(dawg.Error):
8484
self.dawg(payload_separator=b'f')
8585

86-
86+
def test_longest_prefix(self):
87+
d = dawg.BytesDAWG([("a", b"a1"), ("a", b"a2"), ("as", b"as"), ("asdf", b"asdf")])
88+
assert d.longest_prefix("a") == ("a", [b"a1", b"a2"])
89+
assert d.longest_prefix("as") == ("as", [b"as"])
90+
assert d.longest_prefix("asd") == ("as", [b"as"])
91+
assert d.longest_prefix("asdf") == ("asdf", [b"asdf"])
8792

8893
class TestRecordDAWG(object):
8994

0 commit comments

Comments
 (0)