Skip to content

Commit 91936b2

Browse files
authored
Merge pull request #277 from UC-Davis-molecular-computing/276-customize-delimiter-between-domains-in-exported-dna-sequences
276 customize delimiter between domains in exported dna sequences
2 parents 301edc4 + 93431b4 commit 91936b2

File tree

2 files changed

+110
-32
lines changed

2 files changed

+110
-32
lines changed

scadnano/scadnano.py

Lines changed: 71 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,7 +1127,7 @@ class ModificationInternal(Modification):
11271127
If instead it is a list of bases, then this is an internal modification that attaches to a base,
11281128
and this lists the allowed bases for this internal modification to be placed at.
11291129
For example, internal biotins for IDT must be at a T. If any base is allowed, it should be
1130-
``['A','C','G','T']``."""
1130+
``{'A','C','G','T'}``."""
11311131

11321132
def __post_init__(self) -> None:
11331133
super().__post_init__()
@@ -2128,6 +2128,43 @@ def strand(self) -> Strand:
21282128
raise ValueError('_parent_strand has not yet been set')
21292129
return self._parent_strand
21302130

2131+
def idt_dna_sequence(self) -> Optional[str]:
2132+
"""
2133+
:return:
2134+
IDT DNA sequence of this :any:`Domain`, or ``None`` if no DNA sequence has been assigned.
2135+
The difference between this and the field :data:`Domain.dna_sequence` is that this
2136+
will add internal modification codes.
2137+
"""
2138+
if self.dna_sequence is None:
2139+
return None
2140+
2141+
strand = self.strand()
2142+
len_dna_prior = 0
2143+
for domain in strand.domains:
2144+
if domain is self:
2145+
break
2146+
len_dna_prior += domain.dna_length()
2147+
2148+
new_seq_list = []
2149+
for pos, base in enumerate(self.dna_sequence):
2150+
new_seq_list.append(base)
2151+
strand_pos = pos + len_dna_prior
2152+
if strand_pos in strand.modifications_int: # if internal mod attached to base, replace base
2153+
mod = strand.modifications_int[strand_pos]
2154+
if mod.idt_text is not None:
2155+
idt_text_with_delim = mod.idt_text
2156+
if mod.allowed_bases is not None:
2157+
if base not in mod.allowed_bases:
2158+
msg = (f'internal modification {mod} can only replace one of these bases: '
2159+
f'{",".join(mod.allowed_bases)}, '
2160+
f'but the base at position {strand_pos} is {base}')
2161+
raise IllegalDesignError(msg)
2162+
new_seq_list[-1] = idt_text_with_delim # replace base with modified base
2163+
else:
2164+
new_seq_list.append(idt_text_with_delim) # append modification between two bases
2165+
2166+
return ''.join(new_seq_list)
2167+
21312168
def set_name(self, name: str) -> None:
21322169
"""Sets name of this :any:`Domain`."""
21332170
self.name = name
@@ -3042,8 +3079,8 @@ def _most_recently_added_substrand_is_extension_3p(self) -> bool:
30423079

30433080
def update_to(self, offset: int) -> StrandBuilder:
30443081
"""
3045-
Like :py:meth:`StrandBuilder.to`, but changes the current offset without creating
3046-
a new :any:`Domain`. So unlike :py:meth:`StrandBuilder.to`, several consecutive calls to
3082+
Like :meth:`StrandBuilder.to`, but changes the current offset without creating
3083+
a new :any:`Domain`. So unlike :meth:`StrandBuilder.to`, several consecutive calls to
30473084
:meth:`StrandBuilder.update_to` are equivalent to only making the final call.
30483085
30493086
Generally there's no point in calling :meth:`StrandBuilder.update_to` in one line of code.
@@ -3146,19 +3183,19 @@ def with_modification_3p(self, mod: Modification3Prime) -> StrandBuilder:
31463183
self._strand.set_modification_3p(mod)
31473184
return self
31483185

3149-
def with_modification_internal(self, idx: int, mod: ModificationInternal, warn_on_no_dna: bool) \
3150-
-> StrandBuilder:
3186+
def with_modification_internal(self, idx: int, mod: ModificationInternal,
3187+
warn_no_dna: bool = True) -> StrandBuilder:
31513188
"""
31523189
Sets Strand being built to have given internal modification.
31533190
31543191
:param idx: idx along DNA sequence of internal modification
31553192
:param mod: internal modification
3156-
:param warn_on_no_dna: whether to print warning to screen if DNA has not been assigned
3193+
:param warn_no_dna: whether to print warning to screen if DNA has not been assigned
31573194
:return: self
31583195
"""
31593196
if self._strand is None:
31603197
raise ValueError('no Strand created yet; make at least one domain first')
3161-
self._strand.set_modification_internal(idx, mod, warn_on_no_dna)
3198+
self._strand.set_modification_internal(idx, mod, warn_no_dna)
31623199
return self
31633200

31643201
def with_color(self, color: Color) -> StrandBuilder:
@@ -3469,15 +3506,15 @@ class Strand(_JSONSerializable):
34693506
"""
34703507

34713508
domains: List[Union[Domain, Loopout, Extension]]
3472-
""":any:`Domain`'s (or :any:`Loopout`'s) composing this Strand.
3509+
""":any:`Domain`'s (or :any:`Loopout`'s or :any:`Extension`'s) composing this :any:`Strand`.
34733510
Each :any:`Domain` is contiguous on a single :any:`Helix`
34743511
and could be either single-stranded or double-stranded,
3475-
whereas each :any:`Loopout` is single-stranded and has no associated :any:`Helix`."""
3512+
whereas each :any:`Loopout` and :any:`Extension` is single-stranded and has no associated :any:`Helix`."""
34763513

34773514
circular: bool = False
34783515
"""If True, this :any:`Strand` is circular and has no 5' or 3' end. Although there is still a
34793516
first and last :any:`Domain`, we interpret there to be a crossover from the 3' end of the last domain
3480-
to the 5' end of the first domain, and any circular permutation of :py:data:`Strand.domains`
3517+
to the 5' end of the first domain, and any circular permutation of :data:`Strand.domains`
34813518
should result in a functionally equivalent :any:`Strand`. It is illegal to have a
34823519
:any:`Modification5Prime` or :any:`Modification3Prime` on a circular :any:`Strand`."""
34833520

@@ -3489,12 +3526,12 @@ def dna_sequence(self) -> Optional[str]:
34893526
34903527
Note that this does not include any IDT codes for :any:`Modification`'s.
34913528
To include those call :meth:`Strand.idt_dna_sequence`."""
3492-
sequence = ''
3529+
sequence_list = []
34933530
for domain in self.domains:
34943531
if domain.dna_sequence is None:
34953532
return None
3496-
sequence += domain.dna_sequence
3497-
return sequence
3533+
sequence_list.append(domain.dna_sequence)
3534+
return ''.join(sequence_list)
34983535

34993536
color: Optional[Color] = None
35003537
"""Color to show this strand in the main view. If not specified in the constructor,
@@ -4217,8 +4254,12 @@ def _ensure_domains_nonoverlapping(self) -> None:
42174254
f'\n{d1}'
42184255
f'\n{d2}')
42194256

4220-
def idt_dna_sequence(self) -> str:
4257+
def idt_dna_sequence(self, domain_delimiter: str = '') -> str:
42214258
"""
4259+
:param domain_delimiter:
4260+
string to put in between DNA sequences of each domain, and between 5'/3' modifications and DNA.
4261+
Note that the delimiter is not put between internal modifications and the next base(s)
4262+
in the same domain.
42224263
:return: DNA sequence as it needs to be typed to order from IDT, with
42234264
:py:data:`Modification5Prime`'s,
42244265
:py:data:`Modification3Prime`'s,
@@ -4232,27 +4273,17 @@ def idt_dna_sequence(self) -> str:
42324273
raise ValueError('DNA sequence has not been assigned yet')
42334274

42344275
ret_list: List[str] = []
4276+
42354277
if self.modification_5p is not None and self.modification_5p.idt_text is not None:
42364278
ret_list.append(self.modification_5p.idt_text)
42374279

4238-
for offset, base in enumerate(self.dna_sequence):
4239-
ret_list.append(base)
4240-
if offset in self.modifications_int: # if internal mod attached to base, replace base
4241-
mod = self.modifications_int[offset]
4242-
if mod.idt_text is not None:
4243-
if mod.allowed_bases is not None:
4244-
if base not in mod.allowed_bases:
4245-
msg = f'internal modification {mod} can only replace one of these bases: ' \
4246-
f'{",".join(mod.allowed_bases)}, but the base at offset {offset} is {base}'
4247-
raise IllegalDesignError(msg)
4248-
ret_list[-1] = mod.idt_text # replace base with modified base
4249-
else:
4250-
ret_list.append(mod.idt_text) # append modification between two bases
4280+
for substrand in self.domains:
4281+
ret_list.append(substrand.idt_dna_sequence())
42514282

42524283
if self.modification_3p is not None and self.modification_3p.idt_text is not None:
42534284
ret_list.append(self.modification_3p.idt_text)
42544285

4255-
return ''.join(ret_list)
4286+
return domain_delimiter.join(ret_list)
42564287

42574288
def no_modifications_version(self) -> Strand:
42584289
"""
@@ -7063,7 +7094,8 @@ def move_strands_on_helices(self, delta: int) -> None:
70637094
self._check_strands_reference_helices_legally()
70647095

70657096
def assign_dna(self, strand: Strand, sequence: str, assign_complement: bool = True,
7066-
domain: Union[Domain, Loopout, Extension] = None, check_length: bool = False) -> None:
7097+
domain: Union[Domain, Loopout, Extension, None] = None,
7098+
check_length: bool = False) -> None:
70677099
"""
70687100
Assigns `sequence` as DNA sequence of `strand`.
70697101
@@ -7172,6 +7204,7 @@ def assign_dna(self, strand: Strand, sequence: str, assign_complement: bool = Tr
71727204

71737205
def to_idt_bulk_input_format(self,
71747206
delimiter: str = ',',
7207+
domain_delimiter: str = '',
71757208
key: Optional[KeyFunction[Strand]] = None,
71767209
warn_duplicate_name: bool = False,
71777210
only_strands_with_idt: bool = False,
@@ -7203,7 +7236,7 @@ def to_idt_bulk_input_format(self,
72037236
scale = default_idt_scale
72047237
purification = default_idt_purification
72057238
idt_lines.append(delimiter.join(
7206-
[strand.idt_export_name(), strand.idt_dna_sequence(),
7239+
[strand.idt_export_name(), strand.idt_dna_sequence(domain_delimiter=domain_delimiter),
72077240
scale, purification]
72087241
))
72097242

@@ -7293,6 +7326,7 @@ def write_idt_bulk_input_file(self, *, directory: str = '.', filename: str = Non
72937326
key: Optional[KeyFunction[Strand]] = None,
72947327
extension: Optional[str] = None,
72957328
delimiter: str = ',',
7329+
domain_delimiter: str = '',
72967330
warn_duplicate_name: bool = True,
72977331
only_strands_with_idt: bool = False,
72987332
export_scaffold: bool = False,
@@ -7321,7 +7355,12 @@ def write_idt_bulk_input_file(self, *, directory: str = '.', filename: str = Non
73217355
:param extension:
73227356
alternate filename extension to use (instead of idt)
73237357
:param delimiter:
7324-
is the symbol to delimit the four IDT fields name,sequence,scale,purification.
7358+
symbol to delimit the four IDT fields name,sequence,scale,purification.
7359+
:param domain_delimiter:
7360+
This is placed between the DNA sequences of adjacent domains on a strand. For instance, IDT
7361+
(Integrated DNA Technologies, Coralville, IA, https://www.idtdna.com/) ignores spaces,
7362+
so setting `domain_delimiter` to ``' '`` will insert a space between adjacent domains while
7363+
remaining readable by IDT's website.
73257364
:param warn_duplicate_name:
73267365
if ``True`` prints a warning when two different :any:`Strand`'s have the same
73277366
:data:`IDTFields.name` and the same :data:`Strand.dna_sequence`. An :any:`IllegalDesignError` is
@@ -7344,6 +7383,7 @@ def write_idt_bulk_input_file(self, *, directory: str = '.', filename: str = Non
73447383
'_nomods' appended to it.
73457384
"""
73467385
contents = self.to_idt_bulk_input_format(delimiter=delimiter,
7386+
domain_delimiter=domain_delimiter,
73477387
key=key,
73487388
warn_duplicate_name=warn_duplicate_name,
73497389
only_strands_with_idt=only_strands_with_idt,

tests/scadnano_tests.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ def test_strand__multiple_strands_overlap_no_error(self) -> None:
482482
design = self.design_6helix
483483
design.draw_strand(0, 0).to(10).cross(1).to(0) \
484484
.as_scaffold() \
485-
.with_modification_internal(5, mod.cy3_int, warn_on_no_dna=False)
485+
.with_modification_internal(5, mod.cy3_int, warn_no_dna=False)
486486
design.draw_strand(0, 10).to(0).cross(1).to(10).with_modification_5p(mod.biotin_5p)
487487
expected_strand0 = sc.Strand([
488488
sc.Domain(0, True, 0, 10),
@@ -1098,6 +1098,44 @@ def _get_names_idt(design: sc.Design, key: sc.KeyFunction[sc.Strand]) -> str:
10981098
names_joined = ''.join(names)
10991099
return names_joined
11001100

1101+
def test_domain_delimiters(self) -> None:
1102+
helices = [sc.Helix(max_offset=100) for _ in range(6)]
1103+
design = sc.Design(helices=helices, strands=[], grid=sc.square)
1104+
strand_name = 's1'
1105+
(design.draw_strand(0, 0).move(5).with_domain_sequence('AAAAA')
1106+
.cross(1).move(-5).with_domain_sequence('CCCCC')
1107+
.cross(2).move(5).with_domain_sequence('GGGGG')
1108+
.with_name(strand_name))
1109+
idt_content = design.to_idt_bulk_input_format(delimiter=',', domain_delimiter=' ')
1110+
self.assertEqual(f'{strand_name},AAAAA CCCCC GGGGG,25nm,STD', idt_content)
1111+
1112+
def test_domain_delimiters_modifications(self) -> None:
1113+
strand_name = 's1'
1114+
mod_5 = sc.Modification5Prime(display_text='B', idt_text='/5Biosg/')
1115+
mod_3 = sc.Modification3Prime(display_text='Cy3', idt_text='/3Cy3Sp/')
1116+
mod_i = sc.ModificationInternal(display_text='B', idt_text='/iBiodT/', allowed_bases={'T'})
1117+
1118+
helices = [sc.Helix(max_offset=100) for _ in range(6)]
1119+
design = sc.Design(helices=helices, strands=[], grid=sc.square)
1120+
1121+
(design.draw_strand(0, 0)
1122+
.move(5).with_domain_sequence('AAAAA')
1123+
.cross(1).move(-5).with_domain_sequence('CCCCT')
1124+
.cross(2).move(5).with_domain_sequence('GGGGG')
1125+
.with_name(strand_name)
1126+
.with_modification_5p(mod_5)
1127+
.with_modification_internal(9, mod_i)
1128+
.with_modification_3p(mod_3)
1129+
)
1130+
1131+
strand = design.strands[0]
1132+
strand_idt_dna_sequence = strand.idt_dna_sequence(domain_delimiter=' ')
1133+
self.assertEqual('/5Biosg/ AAAAA CCCC/iBiodT/ GGGGG /3Cy3Sp/', strand_idt_dna_sequence)
1134+
1135+
idt_content = design.to_idt_bulk_input_format(delimiter=';', domain_delimiter=' ')
1136+
self.assertEqual(f'{strand_name};/5Biosg/ AAAAA CCCC/iBiodT/ GGGGG /3Cy3Sp/;25nm;STD',
1137+
idt_content)
1138+
11011139
def test_to_idt_bulk_input_format__row_major_5p(self) -> None:
11021140
key = sc.strand_order_key_function(column_major=False, strand_order=sc.StrandOrder.five_prime)
11031141
names_joined = self._get_names_idt(self.design_6h, key)

0 commit comments

Comments
 (0)