Skip to content

Commit

Permalink
python: format
Browse files Browse the repository at this point in the history
  • Loading branch information
kloetzl committed Jul 9, 2024
1 parent ab1d68d commit 47285a1
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 186 deletions.
1 change: 1 addition & 0 deletions Makefile.Maintainer
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ format:
clang-format -i test/*.cxx test/*.h
clang-format -i rttest/*.c rttest/*.cxx
clang-format -i examples/*.c examples/*.cxx
ruff format python/libdna

builddir:
meson $@
Expand Down
24 changes: 12 additions & 12 deletions python/examples/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@


def main():
parser = argparse.ArgumentParser(
prog='random',
description='Create a random string of DNA')
parser.add_argument('-s', type=int, help='seed', default=1729)
parser.add_argument('-l', type=int, help='length', default=80)
parser = argparse.ArgumentParser(
prog="random", description="Create a random string of DNA"
)
parser.add_argument("-s", type=int, help="seed", default=1729)
parser.add_argument("-l", type=int, help="length", default=80)

args = parser.parse_args()
seed = args.s
length = args.l
args = parser.parse_args()
seed = args.s
length = args.l

print(f">rnd {seed=}")
print(dna4.random(length, seed))
print(f">rnd {seed=}")
print(dna4.random(length, seed))


if __name__ == '__main__':
main()
if __name__ == "__main__":
main()
97 changes: 48 additions & 49 deletions python/libdna/dna.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,61 @@
# SPDX-License-Identifier: MIT
# Copyright 2024 (C) Fabian Klötzl


def version() -> int:
return 0
return 0


def _uint64(value: int) -> int:
return value & 0xffffffffFFFFFFFF
return value & 0xFFFFFFFFFFFFFFFF


def ihash(value: int) -> int:
key = _uint64(value)
key = _uint64(key * 0x7b215eedec9e1967)
key ^= key >> 37
key = _uint64(key + 0x16c5c874ea637686)
key ^= _uint64(key << 11)
key = _uint64(key * 0xff7dbf225491d985)
key ^= key >> 15
key = _uint64(key * 0x3d5fe41de2ea4e4f)
key ^= key >> 19
key = _uint64(key + 0x44ee2cfcca48954e)
key ^= key >> 23
key = _uint64(key * 0x4f7659e92097460b)
key ^= key >> 29
return key
key = _uint64(value)
key = _uint64(key * 0x7B215EEDEC9E1967)
key ^= key >> 37
key = _uint64(key + 0x16C5C874EA637686)
key ^= _uint64(key << 11)
key = _uint64(key * 0xFF7DBF225491D985)
key ^= key >> 15
key = _uint64(key * 0x3D5FE41DE2EA4E4F)
key ^= key >> 19
key = _uint64(key + 0x44EE2CFCCA48954E)
key ^= key >> 23
key = _uint64(key * 0x4F7659E92097460B)
key ^= key >> 29
return key


def ihash_invert(value: int) -> int:
key = value
tmp = key
tmp = key ^ key >> 29
tmp = key ^ tmp >> 29
key = key ^ tmp >> 29
key = _uint64(key * 0x66e53f4c3ffe95a3)
tmp = key ^ key >> 23
tmp = key ^ tmp >> 23
key = key ^ tmp >> 23
key = _uint64(key - 0x44ee2cfcca48954e)
tmp = key ^ key >> 19
tmp = key ^ tmp >> 19
tmp = key ^ tmp >> 19
key = key ^ tmp >> 19
key = _uint64(key * 0xf304917f13fe08af)
tmp = key ^ key >> 15
tmp = key ^ tmp >> 15
tmp = key ^ tmp >> 15
key = key ^ tmp >> 15
key = _uint64(key * 0x398b71be7aae374d)
tmp = key ^ key << 11
tmp = key ^ tmp << 11
tmp = key ^ tmp << 11
tmp = key ^ tmp << 11
tmp = key ^ tmp << 11
tmp = key ^ tmp << 11
key = key ^ tmp << 11
key = _uint64(key - 0x16c5c874ea637686)
key ^= key >> 37
key = _uint64(key * 0xee84443b086ef257)
return key


key = value
tmp = key
tmp = key ^ key >> 29
tmp = key ^ tmp >> 29
key = key ^ tmp >> 29
key = _uint64(key * 0x66E53F4C3FFE95A3)
tmp = key ^ key >> 23
tmp = key ^ tmp >> 23
key = key ^ tmp >> 23
key = _uint64(key - 0x44EE2CFCCA48954E)
tmp = key ^ key >> 19
tmp = key ^ tmp >> 19
tmp = key ^ tmp >> 19
key = key ^ tmp >> 19
key = _uint64(key * 0xF304917F13FE08AF)
tmp = key ^ key >> 15
tmp = key ^ tmp >> 15
tmp = key ^ tmp >> 15
key = key ^ tmp >> 15
key = _uint64(key * 0x398B71BE7AAE374D)
tmp = key ^ key << 11
tmp = key ^ tmp << 11
tmp = key ^ tmp << 11
tmp = key ^ tmp << 11
tmp = key ^ tmp << 11
tmp = key ^ tmp << 11
key = key ^ tmp << 11
key = _uint64(key - 0x16C5C874EA637686)
key ^= key >> 37
key = _uint64(key * 0xEE84443B086EF257)
return key
99 changes: 45 additions & 54 deletions python/libdna/dna4.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,84 +3,75 @@

_comp = str.maketrans("ACGT", "TGCA")

_NOISE1 = 0xb5297a4d # 0b1011'0101'0010'1001'0111'1010'0100'1101
_NOISE2 = 0x68e31da4 # 0b0110'1000'1110'0011'0001'1101'1010'0100
_NOISE3 = 0x1b56c4e9 # 0b0001'1011'0101'0110'1100'0100'1110'1001
_NOISE4 = 0xaaea97a5 # determined by fair dice roll
_NOISE1 = 0xB5297A4D # 0b1011'0101'0010'1001'0111'1010'0100'1101
_NOISE2 = 0x68E31DA4 # 0b0110'1000'1110'0011'0001'1101'1010'0100
_NOISE3 = 0x1B56C4E9 # 0b0001'1011'0101'0110'1100'0100'1110'1001
_NOISE4 = 0xAAEA97A5 # determined by fair dice roll

_str2int = {"A": 0, "C": 1, "G": 2, "T": 3}
_int2str = "ACGT"


def _uint32(value: int) -> int:
return value & 0xffffffff
return value & 0xFFFFFFFF


def _squirrel3(n: int, seed: int) -> int:
n = _uint32(n * _NOISE1)
n = _uint32(n + seed)
n = _uint32(n ^ n >> 8)
n = _uint32(n + _NOISE2)
n = _uint32(n ^ n << 13)
n = _uint32(n * _NOISE3)
n = _uint32(n ^ n >> 17)
return n
n = _uint32(n * _NOISE1)
n = _uint32(n + seed)
n = _uint32(n ^ n >> 8)
n = _uint32(n + _NOISE2)
n = _uint32(n ^ n << 13)
n = _uint32(n * _NOISE3)
n = _uint32(n ^ n >> 17)
return n


def _int2chars(value: int) -> str:
a = _int2str[value & 3]
value >>= 8
b = _int2str[value & 3]
value >>= 8
c = _int2str[value & 3]
value >>= 8
d = _int2str[value & 3]
return a + b + c + d
a = _int2str[value & 3]
value >>= 8
b = _int2str[value & 3]
value >>= 8
c = _int2str[value & 3]
value >>= 8
d = _int2str[value & 3]
return a + b + c + d


def revcomp(seq: str) -> str:
"""Compute the reverse complement.
"""
return seq[::-1].translate(_comp)
"""Compute the reverse complement."""
return seq[::-1].translate(_comp)


def random(length: int, seed: int) -> str:
"""Generate a string of random nucleotides.
"""
length4 = length // 4 + 1
seed = _squirrel3(seed, _NOISE4)
ints = (_squirrel3(value, seed) for value in range(length4))
chars = (_int2chars(value) for value in ints)
return "".join(chars)[:length]
"""Generate a string of random nucleotides."""
length4 = length // 4 + 1
seed = _squirrel3(seed, _NOISE4)
ints = (_squirrel3(value, seed) for value in range(length4))
chars = (_int2chars(value) for value in ints)
return "".join(chars)[:length]


def count_mismatches_rc(seq1: str, seq2: str) -> int:
"""Compute the number of mismatches between one sequence and the revcomp of another.
"""
rc = revcomp(seq2)
return sum(a != b for a, b in zip(seq1, rc))
"""Compute the number of mismatches between one sequence and the revcomp of another."""
rc = revcomp(seq2)
return sum(a != b for a, b in zip(seq1, rc))


def pack_2bits(kmer: str) -> int:
"""Pack a kmer into an int using two bits per nucleotide.
"""
value = 0
for c in kmer[:32]:
value <<= 2
value |= _str2int[c]
return value
"""Pack a kmer into an int using two bits per nucleotide."""
value = 0
for c in kmer[:32]:
value <<= 2
value |= _str2int[c]
return value


def unpack_2bits(packed: int, k: int) -> str:
"""Unpack a kmer from an integer using two bits per nucleotides.
"""
seq = ["_"] * k
while k:
seq[k - 1] = _int2str[packed & 3]
packed >>= 2
k -= 1
return "".join(seq)




"""Unpack a kmer from an integer using two bits per nucleotides."""
seq = ["_"] * k
while k:
seq[k - 1] = _int2str[packed & 3]
packed >>= 2
k -= 1
return "".join(seq)
64 changes: 29 additions & 35 deletions python/libdna/dnax.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,71 +5,65 @@


def count(seq: str) -> Counter[str]:
"""Count the occurences of characters.
"""
return Counter(seq)
"""Count the occurences of characters."""
return Counter(seq)


def count_mismatches(seq1: str, seq2: str) -> int:
"""Compute the number of mismatches between two sequences.
"""
return sum(a != b for a, b in zip(seq1, seq2))
"""Compute the number of mismatches between two sequences."""
return sum(a != b for a, b in zip(seq1, seq2))


def _replace(from_: str, to: str) -> str:
import re
exclude = re.compile(f"[^{from_}]")
tab = str.maketrans(from_, to)
import re

return lambda seq: exclude.sub("", seq).translate(tab)
exclude = re.compile(f"[^{from_}]")
tab = str.maketrans(from_, to)

return lambda seq: exclude.sub("", seq).translate(tab)


_extract_dna4 = _replace("acgtACGTuU", "ACGTACGTTT")


def extract_dna4(seq: str) -> str:
"""Extract the subsequence of only canonical nucleotides.
"""
return _extract_dna4(seq)
"""Extract the subsequence of only canonical nucleotides."""
return _extract_dna4(seq)


def _first_pos_where(it) -> int:
for i, a in enumerate(it):
if a:
return i
m = i
return m + 1
for i, a in enumerate(it):
if a:
return i
m = i
return m + 1


def find_first_mismatch(seq1: str, seq2: str) -> int:
"""Give the index of the first mismatch between two sequences.
"""
return _first_pos_where(a != b for a,b in zip(seq1, seq2))
"""Give the index of the first mismatch between two sequences."""
return _first_pos_where(a != b for a, b in zip(seq1, seq2))


def find_first_of(seq: str, chars) -> int:
"""Give the index of the first character matching the given set.
"""
return _first_pos_where(c in chars for c in seq)
"""Give the index of the first character matching the given set."""
return _first_pos_where(c in chars for c in seq)


def find_first_not_of(seq: str, chars) -> int:
"""Give the index of the first character not matching the given set.
"""
return _first_pos_where(c not in chars for c in seq)
"""Give the index of the first character not matching the given set."""
return _first_pos_where(c not in chars for c in seq)


def replace(seq: str, from_: str, to: str) -> str:
"""Replaces all occurrences of one set of characters with another set of characters.
"""
return _replace(from_, to)(seq)
"""Replaces all occurrences of one set of characters with another set of characters."""
return _replace(from_, to)(seq)


_revcomp = _replace("abcdghkmnrstuvwyABCDGHKMNRSTUVWY", "tvghcdmknysaabwrTVGHCDMKNYSAABWR")
_revcomp = _replace(
"abcdghkmnrstuvwyABCDGHKMNRSTUVWY", "tvghcdmknysaabwrTVGHCDMKNYSAABWR"
)


def revcomp(seq: str) -> str:
"""Compute the reverse complement.
"""
return _revcomp(seq[::-1])

"""Compute the reverse complement."""
return _revcomp(seq[::-1])
5 changes: 2 additions & 3 deletions python/libdna/test_dna.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,5 @@


def test_basic():
a = ihash(1729)
assert ihash_invert(a) == 1729

a = ihash(1729)
assert ihash_invert(a) == 1729
Loading

0 comments on commit 47285a1

Please sign in to comment.