Skip to content

Commit 976ee4d

Browse files
authored
Merge pull request #278 from UC-Davis-molecular-computing/dev
Dev
2 parents c274ec0 + 19975dd commit 976ee4d

File tree

2 files changed

+141
-34
lines changed

2 files changed

+141
-34
lines changed

scadnano/scadnano.py

Lines changed: 79 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
# needed to use forward annotations: https://docs.python.org/3/whatsnew/3.7.html#whatsnew37-pep563
5454
from __future__ import annotations
5555

56-
__version__ = "0.18.2" # version line; WARNING: do not remove or change this line or comment
56+
__version__ = "0.18.3" # version line; WARNING: do not remove or change this line or comment
5757

5858
import collections
5959
import dataclasses
@@ -1123,11 +1123,17 @@ class ModificationInternal(Modification):
11231123
"""Internal modification of DNA sequence, e.g., biotin or Cy3."""
11241124

11251125
allowed_bases: Optional[AbstractSet[str]] = None
1126-
"""If None, then this is an internal modification that goes between bases.
1126+
"""
1127+
If None, then this is an internal modification that goes between bases.
1128+
In this case, the key :data:`Strand.modifications_int` specifying the position of the internal
1129+
modification is interpreted to mean that the modification goes *after* the base at that position.
1130+
(For example, this is the parameter `idx` in :meth:`StrandBuilder.with_modification_internal`.)
1131+
11271132
If instead it is a list of bases, then this is an internal modification that attaches to a base,
11281133
and this lists the allowed bases for this internal modification to be placed at.
11291134
For example, internal biotins for IDT must be at a T. If any base is allowed, it should be
1130-
``['A','C','G','T']``."""
1135+
``{'A','C','G','T'}``.
1136+
"""
11311137

11321138
def __post_init__(self) -> None:
11331139
super().__post_init__()
@@ -2128,6 +2134,43 @@ def strand(self) -> Strand:
21282134
raise ValueError('_parent_strand has not yet been set')
21292135
return self._parent_strand
21302136

2137+
def idt_dna_sequence(self) -> Optional[str]:
2138+
"""
2139+
:return:
2140+
IDT DNA sequence of this :any:`Domain`, or ``None`` if no DNA sequence has been assigned.
2141+
The difference between this and the field :data:`Domain.dna_sequence` is that this
2142+
will add internal modification codes.
2143+
"""
2144+
if self.dna_sequence is None:
2145+
return None
2146+
2147+
strand = self.strand()
2148+
len_dna_prior = 0
2149+
for domain in strand.domains:
2150+
if domain is self:
2151+
break
2152+
len_dna_prior += domain.dna_length()
2153+
2154+
new_seq_list = []
2155+
for pos, base in enumerate(self.dna_sequence):
2156+
new_seq_list.append(base)
2157+
strand_pos = pos + len_dna_prior
2158+
if strand_pos in strand.modifications_int: # if internal mod attached to base, replace base
2159+
mod = strand.modifications_int[strand_pos]
2160+
if mod.idt_text is not None:
2161+
idt_text_with_delim = mod.idt_text
2162+
if mod.allowed_bases is not None:
2163+
if base not in mod.allowed_bases:
2164+
msg = (f'internal modification {mod} can only replace one of these bases: '
2165+
f'{",".join(mod.allowed_bases)}, '
2166+
f'but the base at position {strand_pos} is {base}')
2167+
raise IllegalDesignError(msg)
2168+
new_seq_list[-1] = idt_text_with_delim # replace base with modified base
2169+
else:
2170+
new_seq_list.append(idt_text_with_delim) # append modification between two bases
2171+
2172+
return ''.join(new_seq_list)
2173+
21312174
def set_name(self, name: str) -> None:
21322175
"""Sets name of this :any:`Domain`."""
21332176
self.name = name
@@ -3042,8 +3085,8 @@ def _most_recently_added_substrand_is_extension_3p(self) -> bool:
30423085

30433086
def update_to(self, offset: int) -> StrandBuilder:
30443087
"""
3045-
Like :py:meth:`StrandBuilder.to`, but changes the current offset without creating
3046-
a new :any:`Domain`. So unlike :py:meth:`StrandBuilder.to`, several consecutive calls to
3088+
Like :meth:`StrandBuilder.to`, but changes the current offset without creating
3089+
a new :any:`Domain`. So unlike :meth:`StrandBuilder.to`, several consecutive calls to
30473090
:meth:`StrandBuilder.update_to` are equivalent to only making the final call.
30483091
30493092
Generally there's no point in calling :meth:`StrandBuilder.update_to` in one line of code.
@@ -3146,19 +3189,19 @@ def with_modification_3p(self, mod: Modification3Prime) -> StrandBuilder:
31463189
self._strand.set_modification_3p(mod)
31473190
return self
31483191

3149-
def with_modification_internal(self, idx: int, mod: ModificationInternal, warn_on_no_dna: bool) \
3150-
-> StrandBuilder:
3192+
def with_modification_internal(self, idx: int, mod: ModificationInternal,
3193+
warn_no_dna: bool = True) -> StrandBuilder:
31513194
"""
31523195
Sets Strand being built to have given internal modification.
31533196
31543197
:param idx: idx along DNA sequence of internal modification
31553198
:param mod: internal modification
3156-
:param warn_on_no_dna: whether to print warning to screen if DNA has not been assigned
3199+
:param warn_no_dna: whether to print warning to screen if DNA has not been assigned
31573200
:return: self
31583201
"""
31593202
if self._strand is None:
31603203
raise ValueError('no Strand created yet; make at least one domain first')
3161-
self._strand.set_modification_internal(idx, mod, warn_on_no_dna)
3204+
self._strand.set_modification_internal(idx, mod, warn_no_dna)
31623205
return self
31633206

31643207
def with_color(self, color: Color) -> StrandBuilder:
@@ -3469,15 +3512,15 @@ class Strand(_JSONSerializable):
34693512
"""
34703513

34713514
domains: List[Union[Domain, Loopout, Extension]]
3472-
""":any:`Domain`'s (or :any:`Loopout`'s) composing this Strand.
3515+
""":any:`Domain`'s (or :any:`Loopout`'s or :any:`Extension`'s) composing this :any:`Strand`.
34733516
Each :any:`Domain` is contiguous on a single :any:`Helix`
34743517
and could be either single-stranded or double-stranded,
3475-
whereas each :any:`Loopout` is single-stranded and has no associated :any:`Helix`."""
3518+
whereas each :any:`Loopout` and :any:`Extension` is single-stranded and has no associated :any:`Helix`."""
34763519

34773520
circular: bool = False
34783521
"""If True, this :any:`Strand` is circular and has no 5' or 3' end. Although there is still a
34793522
first and last :any:`Domain`, we interpret there to be a crossover from the 3' end of the last domain
3480-
to the 5' end of the first domain, and any circular permutation of :py:data:`Strand.domains`
3523+
to the 5' end of the first domain, and any circular permutation of :data:`Strand.domains`
34813524
should result in a functionally equivalent :any:`Strand`. It is illegal to have a
34823525
:any:`Modification5Prime` or :any:`Modification3Prime` on a circular :any:`Strand`."""
34833526

@@ -3489,12 +3532,12 @@ def dna_sequence(self) -> Optional[str]:
34893532
34903533
Note that this does not include any IDT codes for :any:`Modification`'s.
34913534
To include those call :meth:`Strand.idt_dna_sequence`."""
3492-
sequence = ''
3535+
sequence_list = []
34933536
for domain in self.domains:
34943537
if domain.dna_sequence is None:
34953538
return None
3496-
sequence += domain.dna_sequence
3497-
return sequence
3539+
sequence_list.append(domain.dna_sequence)
3540+
return ''.join(sequence_list)
34983541

34993542
color: Optional[Color] = None
35003543
"""Color to show this strand in the main view. If not specified in the constructor,
@@ -4217,8 +4260,12 @@ def _ensure_domains_nonoverlapping(self) -> None:
42174260
f'\n{d1}'
42184261
f'\n{d2}')
42194262

4220-
def idt_dna_sequence(self) -> str:
4263+
def idt_dna_sequence(self, domain_delimiter: str = '') -> str:
42214264
"""
4265+
:param domain_delimiter:
4266+
string to put in between DNA sequences of each domain, and between 5'/3' modifications and DNA.
4267+
Note that the delimiter is not put between internal modifications and the next base(s)
4268+
in the same domain.
42224269
:return: DNA sequence as it needs to be typed to order from IDT, with
42234270
:py:data:`Modification5Prime`'s,
42244271
:py:data:`Modification3Prime`'s,
@@ -4232,27 +4279,17 @@ def idt_dna_sequence(self) -> str:
42324279
raise ValueError('DNA sequence has not been assigned yet')
42334280

42344281
ret_list: List[str] = []
4282+
42354283
if self.modification_5p is not None and self.modification_5p.idt_text is not None:
42364284
ret_list.append(self.modification_5p.idt_text)
42374285

4238-
for offset, base in enumerate(self.dna_sequence):
4239-
ret_list.append(base)
4240-
if offset in self.modifications_int: # if internal mod attached to base, replace base
4241-
mod = self.modifications_int[offset]
4242-
if mod.idt_text is not None:
4243-
if mod.allowed_bases is not None:
4244-
if base not in mod.allowed_bases:
4245-
msg = f'internal modification {mod} can only replace one of these bases: ' \
4246-
f'{",".join(mod.allowed_bases)}, but the base at offset {offset} is {base}'
4247-
raise IllegalDesignError(msg)
4248-
ret_list[-1] = mod.idt_text # replace base with modified base
4249-
else:
4250-
ret_list.append(mod.idt_text) # append modification between two bases
4286+
for substrand in self.domains:
4287+
ret_list.append(substrand.idt_dna_sequence())
42514288

42524289
if self.modification_3p is not None and self.modification_3p.idt_text is not None:
42534290
ret_list.append(self.modification_3p.idt_text)
42544291

4255-
return ''.join(ret_list)
4292+
return domain_delimiter.join(ret_list)
42564293

42574294
def no_modifications_version(self) -> Strand:
42584295
"""
@@ -7063,7 +7100,8 @@ def move_strands_on_helices(self, delta: int) -> None:
70637100
self._check_strands_reference_helices_legally()
70647101

70657102
def assign_dna(self, strand: Strand, sequence: str, assign_complement: bool = True,
7066-
domain: Union[Domain, Loopout, Extension] = None, check_length: bool = False) -> None:
7103+
domain: Union[Domain, Loopout, Extension, None] = None,
7104+
check_length: bool = False) -> None:
70677105
"""
70687106
Assigns `sequence` as DNA sequence of `strand`.
70697107
@@ -7172,6 +7210,7 @@ def assign_dna(self, strand: Strand, sequence: str, assign_complement: bool = Tr
71727210

71737211
def to_idt_bulk_input_format(self,
71747212
delimiter: str = ',',
7213+
domain_delimiter: str = '',
71757214
key: Optional[KeyFunction[Strand]] = None,
71767215
warn_duplicate_name: bool = False,
71777216
only_strands_with_idt: bool = False,
@@ -7203,7 +7242,7 @@ def to_idt_bulk_input_format(self,
72037242
scale = default_idt_scale
72047243
purification = default_idt_purification
72057244
idt_lines.append(delimiter.join(
7206-
[strand.idt_export_name(), strand.idt_dna_sequence(),
7245+
[strand.idt_export_name(), strand.idt_dna_sequence(domain_delimiter=domain_delimiter),
72077246
scale, purification]
72087247
))
72097248

@@ -7293,6 +7332,7 @@ def write_idt_bulk_input_file(self, *, directory: str = '.', filename: str = Non
72937332
key: Optional[KeyFunction[Strand]] = None,
72947333
extension: Optional[str] = None,
72957334
delimiter: str = ',',
7335+
domain_delimiter: str = '',
72967336
warn_duplicate_name: bool = True,
72977337
only_strands_with_idt: bool = False,
72987338
export_scaffold: bool = False,
@@ -7321,7 +7361,12 @@ def write_idt_bulk_input_file(self, *, directory: str = '.', filename: str = Non
73217361
:param extension:
73227362
alternate filename extension to use (instead of idt)
73237363
:param delimiter:
7324-
is the symbol to delimit the four IDT fields name,sequence,scale,purification.
7364+
symbol to delimit the four IDT fields name,sequence,scale,purification.
7365+
:param domain_delimiter:
7366+
This is placed between the DNA sequences of adjacent domains on a strand. For instance, IDT
7367+
(Integrated DNA Technologies, Coralville, IA, https://www.idtdna.com/) ignores spaces,
7368+
so setting `domain_delimiter` to ``' '`` will insert a space between adjacent domains while
7369+
remaining readable by IDT's website.
73257370
:param warn_duplicate_name:
73267371
if ``True`` prints a warning when two different :any:`Strand`'s have the same
73277372
:data:`IDTFields.name` and the same :data:`Strand.dna_sequence`. An :any:`IllegalDesignError` is
@@ -7344,6 +7389,7 @@ def write_idt_bulk_input_file(self, *, directory: str = '.', filename: str = Non
73447389
'_nomods' appended to it.
73457390
"""
73467391
contents = self.to_idt_bulk_input_format(delimiter=delimiter,
7392+
domain_delimiter=domain_delimiter,
73477393
key=key,
73487394
warn_duplicate_name=warn_duplicate_name,
73497395
only_strands_with_idt=only_strands_with_idt,

tests/scadnano_tests.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ def test_strand__multiple_strands_overlap_no_error(self) -> None:
482482
design = self.design_6helix
483483
design.draw_strand(0, 0).to(10).cross(1).to(0) \
484484
.as_scaffold() \
485-
.with_modification_internal(5, mod.cy3_int, warn_on_no_dna=False)
485+
.with_modification_internal(5, mod.cy3_int, warn_no_dna=False)
486486
design.draw_strand(0, 10).to(0).cross(1).to(10).with_modification_5p(mod.biotin_5p)
487487
expected_strand0 = sc.Strand([
488488
sc.Domain(0, True, 0, 10),
@@ -1098,6 +1098,67 @@ def _get_names_idt(design: sc.Design, key: sc.KeyFunction[sc.Strand]) -> str:
10981098
names_joined = ''.join(names)
10991099
return names_joined
11001100

1101+
def test_domain_delimiters(self) -> None:
1102+
helices = [sc.Helix(max_offset=100) for _ in range(6)]
1103+
design = sc.Design(helices=helices, strands=[], grid=sc.square)
1104+
strand_name = 's1'
1105+
(design.draw_strand(0, 0).move(5).with_domain_sequence('AAAAA')
1106+
.cross(1).move(-5).with_domain_sequence('CCCCC')
1107+
.cross(2).move(5).with_domain_sequence('GGGGG')
1108+
.with_name(strand_name))
1109+
idt_content = design.to_idt_bulk_input_format(delimiter=',', domain_delimiter=' ')
1110+
self.assertEqual(f'{strand_name},AAAAA CCCCC GGGGG,25nm,STD', idt_content)
1111+
1112+
def test_domain_delimiters_modifications(self) -> None:
1113+
strand_name = 's1'
1114+
mod_5 = sc.Modification5Prime(display_text='B', idt_text='/5Biosg/')
1115+
mod_3 = sc.Modification3Prime(display_text='Cy3', idt_text='/3Cy3Sp/')
1116+
mod_i = sc.ModificationInternal(display_text='B', idt_text='/iBiodT/', allowed_bases={'T'})
1117+
1118+
helices = [sc.Helix(max_offset=100) for _ in range(6)]
1119+
design = sc.Design(helices=helices, strands=[], grid=sc.square)
1120+
1121+
(design.draw_strand(0, 0)
1122+
.move(5).with_domain_sequence('AAAAA')
1123+
.cross(1).move(-5).with_domain_sequence('CCCCT')
1124+
.cross(2).move(5).with_domain_sequence('GGGGG')
1125+
.with_name(strand_name)
1126+
.with_modification_5p(mod_5)
1127+
.with_modification_internal(9, mod_i)
1128+
.with_modification_3p(mod_3)
1129+
)
1130+
1131+
strand = design.strands[0]
1132+
strand_idt_dna_sequence = strand.idt_dna_sequence(domain_delimiter=' ')
1133+
self.assertEqual('/5Biosg/ AAAAA CCCC/iBiodT/ GGGGG /3Cy3Sp/', strand_idt_dna_sequence)
1134+
1135+
idt_content = design.to_idt_bulk_input_format(delimiter=';', domain_delimiter=' ')
1136+
self.assertEqual(f'{strand_name};/5Biosg/ AAAAA CCCC/iBiodT/ GGGGG /3Cy3Sp/;25nm;STD',
1137+
idt_content)
1138+
1139+
def test_domain_delimiters_internal_nonbase_modifications(self) -> None:
1140+
strand_name = 's1'
1141+
mod_i = sc.ModificationInternal(display_text='9C', idt_text='/iSp9/')
1142+
1143+
helices = [sc.Helix(max_offset=100) for _ in range(6)]
1144+
design = sc.Design(helices=helices, strands=[], grid=sc.square)
1145+
1146+
(design.draw_strand(0, 0)
1147+
.move(5).with_domain_sequence('AAAAA')
1148+
.cross(1).move(-5).with_domain_sequence('CCCCT')
1149+
.cross(2).move(5).with_domain_sequence('GGGGG')
1150+
.with_name(strand_name)
1151+
.with_modification_internal(8, mod_i)
1152+
)
1153+
1154+
strand = design.strands[0]
1155+
strand_idt_dna_sequence = strand.idt_dna_sequence(domain_delimiter=' ')
1156+
self.assertEqual('AAAAA CCCC/iSp9/T GGGGG', strand_idt_dna_sequence)
1157+
1158+
idt_content = design.to_idt_bulk_input_format(delimiter=';', domain_delimiter=' ')
1159+
self.assertEqual(f'{strand_name};AAAAA CCCC/iSp9/T GGGGG;25nm;STD',
1160+
idt_content)
1161+
11011162
def test_to_idt_bulk_input_format__row_major_5p(self) -> None:
11021163
key = sc.strand_order_key_function(column_major=False, strand_order=sc.StrandOrder.five_prime)
11031164
names_joined = self._get_names_idt(self.design_6h, key)

0 commit comments

Comments
 (0)