Skip to content

Commit

Permalink
Merge pull request #24 from JonathanDZiegler/fix_bug_in_TRRMSADataset
Browse files Browse the repository at this point in the history
Fix bug in trrmsa dataset
  • Loading branch information
yangkky authored Nov 13, 2023
2 parents 418223d + cf61dd1 commit 32e3bd8
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions evodiff/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def __getitem__(self, idx):
slice_start = 0
seq_len = msa_seq_len

sliced_msa = msa[:, slice_start: slice_start + self.max_seq_len]
sliced_msa = msa[:, slice_start: slice_start + seq_len]
anchor_seq = sliced_msa[0] # This is the query sequence in MSA

sliced_msa = [list(seq) for seq in sliced_msa if (list(set(seq)) != [self.tokenizer.alphabet.index(GAP)])]
Expand All @@ -271,7 +271,7 @@ def __getitem__(self, idx):
anchor_seq = np.expand_dims(anchor_seq, axis=0)
output = np.concatenate((anchor_seq, sliced_msa[random_idx]), axis=0)
elif self.selection_type == 'non-random':
output = sliced_msa[:64]
output = sliced_msa[:self.n_sequences]
elif self.selection_type == "MaxHamming":
output = [list(anchor_seq)]
msa_subset = sliced_msa[1:]
Expand Down

0 comments on commit 32e3bd8

Please sign in to comment.