-
Notifications
You must be signed in to change notification settings - Fork 44
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature/nirvana 3.2.5 improvements (#608)
* Bugfix/nm 022148 set 1333 483 (#586) * Fixed the gene models for NM_022148.2 on chrX and chrY. Cleaned up some code in Transcript.cs. Added some more debugging methods to TranscriptValidator. * Fixed the gene model for NM_001220765.1. * Fixed NM_001220768.1 gene model. * Added introns for NM_001220768.1 * Fixed NM_006060.4 * Fixed NM_001220767.1 * Fixed NM_001220770.1 * Fixed NM_001220766.1 * Fixed NM_001220769.1 * Fixed NM_001220776.1 * Fixed NM_001220775.1 * Added support for updating the gene intervals after transcript correction. This unfortunately affects all intervals in Nirvana. * Added more tests to the TranscriptValidator * Fixed NM_001220771.1 * Verified NM_001220772.1 * Verified NM_001220774.1 * Updated NM_000314.4 * Re-sort the transcripts after updating coordinates. * Fixed unit tests * Updated the version number * Disabled debug testing. * Updated transcript models again using ANNOTATION_RELEASE.105 * Reverted interval (transcript & gene coordinates) setting logic since it's no longer needed. * Revert re-sorting after reading the transcripts from the cache. * Removed trailing commas and redundant using directives. * Added poly-A tails to NM_002834.3, NM_014953.3, NM_016507.2 * Improved the sequence comparison logic. Including all TSO500 transcripts, not just canonical. Added poly(A) issue detection. Fixed poly(A) issues in the transcripts. * Fixing line endings and file permissions * Fixed all the known poly(A) and substitution issues in the TSO500 transcripts * Separated amino acid abbreviations into a separate, dedicated class. Added AminoAcidEdit class. Added mocked data to unit tests. Replaced Selenocysteines with AminoAcidEdits in Transcript class. Updated NuGet dependencies. Updated version to 3.2.5. Got rid of the unnecessary SequenceChange object. Simplified the stop codon constant in AminoAcids. Updated the transcripts definitions for NM_001012288.1, NM_001012288.2, NM_022148.3, NM_003954.3, and NM_003954.4. Defined AminoAcidEdits for our non-AUG transcripts. * Retired old AminoAcids class in favor of the optimized AminoAcid class. Create two static amino acid implementations for standard and mitochondrial codons. Removed SequenceChange to prevent unnecessary object creation. * Added support for AA edits in TranslateBases2. * Updated TranslateBases and Translate to accept an aa start position and AA edits. Updated HGVS p. algorithms to use the AA edits. * Fixed AA edits for NM_001317010.1. * Updated AWS NuGet packages. Improved GetCoveredCdsAndProteinPositions to partially fix covered CDS calculation. Replaced ? AA in GetChangesAfterFrameshift with X. * Updated HGVS p. notation to support p.Met1? syntax instead of p.? when there's a variant affecting the translation initiation codon. * Optimized reverse complement handling in SequenceUtilities.cs. Improved cDNA mapped position calculation for insertions. Removed default allele arguments from GetHgvscAnnotation. Improved transcript ref allele creation. Streamlined CDS and cDNA sequence creation and gap detection. Removed SequenceUtilities.GetSubSubstring. * Limited output to only one AA abbreviation in HGVS p. notation for disrupted translation initiation codons. * Added a span version of TranslateBases for creating the alternate AA sequence when frameshifts and extensions are encountered. Created a span-version of the bi-directional trimmer. Created new implementation of the HGVS protein algorithm. Added a unit test (Extension_via_frameshift) that currently fails the legacy HGVS p. GetProteinChange method. * Fixed support for extensions via insertions. Added code for FindFirstChangeAfterFrameshift. Added more HGVS p. unit tests for different extension variants. * Added code automatically add stop codons to the AA sequences. * Added Sequence to ISequence. Fixed a bug in GetInsertionCdnaPositions that affected transcripts on the reverse strand. Added support for computing the extended CDS end & protein end positions. Added better HGVS p. support for start lost variants, extensions, and terminal codon variants. * Updated the protein sequences that were highlighted by ValidateTranscripts. * Added ability to identify silent stop codon mutations after frameshift or extension. * Fixed issue that caused frameshifts and extensions in HGVS p. to specify 0 AAs after the original terminal codon. * Added code to NormalizeAminoAcids to remove extra AA past the stop codon. This prevents overtrimming. Removed reference bases from HGVS g. notation.
- Loading branch information
Stromberg, Michael
authored and
GitHub Enterprise
committed
Aug 5, 2021
1 parent
06038b2
commit aba8f0a
Showing
178 changed files
with
14,612 additions
and
12,264 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,27 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>netcoreapp2.1</TargetFramework> | ||
<OutputPath>..\bin\$(Configuration)</OutputPath> | ||
<DebugType>Full</DebugType> | ||
</PropertyGroup> | ||
<Import Project="..\VariantAnnotation\CommonAssemblyInfo.props" /> | ||
<ItemGroup> | ||
<Content Include="CacheUtils.dll.gene.json"> | ||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> | ||
</Content> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<PackageReference Include="Microsoft.Extensions.Configuration" Version="2.2.0" /> | ||
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="2.2.0" /> | ||
<PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="2.2.0" /> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<ProjectReference Include="..\CommandLine\CommandLine.csproj" /> | ||
<ProjectReference Include="..\Compression\Compression.csproj" /> | ||
<ProjectReference Include="..\ReferenceUtils\ReferenceUtils.csproj" /> | ||
<ProjectReference Include="..\VariantAnnotation.Interface\VariantAnnotation.Interface.csproj" /> | ||
<ProjectReference Include="..\VariantAnnotation\VariantAnnotation.csproj" /> | ||
<ProjectReference Include="..\Vcf\Vcf.csproj" /> | ||
</ItemGroup> | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>netcoreapp2.1</TargetFramework> | ||
<OutputPath>..\bin\$(Configuration)</OutputPath> | ||
<DebugType>Full</DebugType> | ||
</PropertyGroup> | ||
<Import Project="..\VariantAnnotation\CommonAssemblyInfo.props" /> | ||
<ItemGroup> | ||
<Content Include="CacheUtils.dll.gene.json"> | ||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> | ||
</Content> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<PackageReference Include="Microsoft.Extensions.Configuration" Version="5.0.0" /> | ||
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" Version="5.0.0" /> | ||
<PackageReference Include="Microsoft.Extensions.Configuration.Json" Version="5.0.0" /> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<ProjectReference Include="..\CommandLine\CommandLine.csproj" /> | ||
<ProjectReference Include="..\Compression\Compression.csproj" /> | ||
<ProjectReference Include="..\ReferenceUtils\ReferenceUtils.csproj" /> | ||
<ProjectReference Include="..\VariantAnnotation.Interface\VariantAnnotation.Interface.csproj" /> | ||
<ProjectReference Include="..\VariantAnnotation\VariantAnnotation.csproj" /> | ||
<ProjectReference Include="..\Vcf\Vcf.csproj" /> | ||
</ItemGroup> | ||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,13 @@ | ||
| ||
using System; | ||
using Genome; | ||
|
||
namespace CacheUtils.TranscriptCache | ||
{ | ||
public sealed class NSequence : ISequence | ||
{ | ||
public int Length { get; } = 1000; | ||
public int Length => 1000; | ||
public string Substring(int offset, int length) => new string('N', length); | ||
public Band[] CytogeneticBands => null; | ||
public string Sequence => throw new NotImplementedException(); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,13 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
<PropertyGroup> | ||
<TargetFramework>netcoreapp2.1</TargetFramework> | ||
<OutputPath>..\bin\$(Configuration)</OutputPath> | ||
<DebugType>Full</DebugType> | ||
</PropertyGroup> | ||
<Import Project="..\VariantAnnotation\CommonAssemblyInfo.props" /> | ||
<ItemGroup> | ||
<ProjectReference Include="..\ErrorHandling\ErrorHandling.csproj" /> | ||
<ProjectReference Include="..\OptimizedCore\OptimizedCore.csproj" /> | ||
<ProjectReference Include="..\VariantAnnotation.Interface\VariantAnnotation.Interface.csproj" /> | ||
</ItemGroup> | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
<PropertyGroup> | ||
<TargetFramework>netcoreapp2.1</TargetFramework> | ||
<OutputPath>..\bin\$(Configuration)</OutputPath> | ||
<DebugType>Full</DebugType> | ||
</PropertyGroup> | ||
<Import Project="..\VariantAnnotation\CommonAssemblyInfo.props" /> | ||
<ItemGroup> | ||
<ProjectReference Include="..\ErrorHandling\ErrorHandling.csproj" /> | ||
<ProjectReference Include="..\OptimizedCore\OptimizedCore.csproj" /> | ||
<ProjectReference Include="..\VariantAnnotation.Interface\VariantAnnotation.Interface.csproj" /> | ||
</ItemGroup> | ||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,49 +1,49 @@ | ||
using System; | ||
using System.Runtime.InteropServices; | ||
using Compression.Utilities; | ||
|
||
namespace Compression.Algorithms | ||
{ | ||
public sealed class QuickLZ : ICompressionAlgorithm | ||
{ | ||
private const int CompressionOverhead = 400; | ||
|
||
public QuickLZ() => LibraryUtilities.CheckLibrary(); | ||
|
||
public int Compress(byte[] source, int srcLength, byte[] destination, int destLength) | ||
{ | ||
if (destination == null || GetCompressedBufferBounds(srcLength) > destination.Length) | ||
{ | ||
throw new InvalidOperationException("QuickLZ: Insufficient memory in destination buffer"); | ||
} | ||
|
||
return SafeNativeMethods.QuickLzCompress(source, srcLength, destination, destLength); | ||
} | ||
|
||
public int Decompress(byte[] source, int srcLength, byte[] destination, int destLength) | ||
{ | ||
if (destination == null) | ||
{ | ||
throw new InvalidOperationException("QuickLZ: Insufficient memory in destination buffer"); | ||
} | ||
|
||
return SafeNativeMethods.QuickLzDecompress(source, destination, destLength); | ||
} | ||
|
||
public int GetDecompressedLength(byte[] source, int srcLength) => (int)SafeNativeMethods.qlz_size_decompressed(source); | ||
|
||
public int GetCompressedBufferBounds(int srcLength) => srcLength + CompressionOverhead; | ||
|
||
private static class SafeNativeMethods | ||
{ | ||
[DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern long qlz_size_decompressed(byte[] bytes); | ||
|
||
[DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern int QuickLzCompress(byte[] source, int sourceLen, byte[] destination, int destinationLen); | ||
|
||
[DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern int QuickLzDecompress(byte[] source, byte[] destination, int destinationLen); | ||
} | ||
} | ||
} | ||
using System; | ||
using System.Runtime.InteropServices; | ||
using Compression.Utilities; | ||
|
||
namespace Compression.Algorithms | ||
{ | ||
public sealed class QuickLZ : ICompressionAlgorithm | ||
{ | ||
private const int CompressionOverhead = 400; | ||
|
||
public QuickLZ() => LibraryUtilities.CheckLibrary(); | ||
|
||
public int Compress(byte[] source, int srcLength, byte[] destination, int destLength) | ||
{ | ||
if (destination == null || GetCompressedBufferBounds(srcLength) > destination.Length) | ||
{ | ||
throw new InvalidOperationException("QuickLZ: Insufficient memory in destination buffer"); | ||
} | ||
|
||
return SafeNativeMethods.QuickLzCompress(source, srcLength, destination, destLength); | ||
} | ||
|
||
public int Decompress(byte[] source, int srcLength, byte[] destination, int destLength) | ||
{ | ||
if (destination == null) | ||
{ | ||
throw new InvalidOperationException("QuickLZ: Insufficient memory in destination buffer"); | ||
} | ||
|
||
return SafeNativeMethods.QuickLzDecompress(source, destination, destLength); | ||
} | ||
|
||
public int GetDecompressedLength(byte[] source, int srcLength) => (int)SafeNativeMethods.qlz_size_decompressed(source); | ||
|
||
public int GetCompressedBufferBounds(int srcLength) => srcLength + CompressionOverhead; | ||
|
||
private static class SafeNativeMethods | ||
{ | ||
[DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern long qlz_size_decompressed(byte[] bytes); | ||
|
||
[DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern int QuickLzCompress(byte[] source, int sourceLen, byte[] destination, int destinationLen); | ||
|
||
[DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern int QuickLzDecompress(byte[] source, byte[] destination, int destinationLen); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,54 +1,54 @@ | ||
using System; | ||
using System.Runtime.InteropServices; | ||
using Compression.Utilities; | ||
|
||
namespace Compression.Algorithms | ||
{ | ||
public sealed class Zlib : ICompressionAlgorithm | ||
{ | ||
private readonly int _compressionLevel; | ||
|
||
public Zlib(int compressionLevel = 1) | ||
{ | ||
_compressionLevel = compressionLevel; | ||
LibraryUtilities.CheckLibrary(); | ||
} | ||
|
||
public int Compress(byte[] source, int srcLength, byte[] destination, int destLength) | ||
{ | ||
if (destination == null || GetCompressedBufferBounds(srcLength) > destination.Length) | ||
{ | ||
throw new InvalidOperationException("Zlib: Insufficient memory in destination buffer"); | ||
} | ||
|
||
return SafeNativeMethods.bgzf_compress(destination, destLength, source, srcLength, _compressionLevel); | ||
} | ||
|
||
public int Decompress(byte[] source, int srcLength, byte[] destination, int destLength) | ||
{ | ||
if (destination == null) | ||
{ | ||
throw new InvalidOperationException("Zlib: Insufficient memory in destination buffer"); | ||
} | ||
|
||
return SafeNativeMethods.bgzf_decompress(destination, destLength, source, srcLength); | ||
} | ||
|
||
public int GetDecompressedLength(byte[] source, int srcLength) | ||
{ | ||
int pos = srcLength - 4; | ||
return source[pos + 3] << 24 | source[pos + 2] << 16 | source[pos + 1] << 8 | source[pos]; | ||
} | ||
|
||
public int GetCompressedBufferBounds(int srcLength) => (int)(srcLength * 1.06 + 28); | ||
|
||
private static class SafeNativeMethods | ||
{ | ||
[DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern int bgzf_decompress(byte[] uncompressedBlock, int uncompressedSize, byte[] compressedBlock, int compressedSize); | ||
|
||
[DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern int bgzf_compress(byte[] compressedBlock, int compressedLen, byte[] uncompressedBlock, int uncompressedLen, int compressionLevel); | ||
} | ||
} | ||
} | ||
using System; | ||
using System.Runtime.InteropServices; | ||
using Compression.Utilities; | ||
|
||
namespace Compression.Algorithms | ||
{ | ||
public sealed class Zlib : ICompressionAlgorithm | ||
{ | ||
private readonly int _compressionLevel; | ||
|
||
public Zlib(int compressionLevel = 1) | ||
{ | ||
_compressionLevel = compressionLevel; | ||
LibraryUtilities.CheckLibrary(); | ||
} | ||
|
||
public int Compress(byte[] source, int srcLength, byte[] destination, int destLength) | ||
{ | ||
if (destination == null || GetCompressedBufferBounds(srcLength) > destination.Length) | ||
{ | ||
throw new InvalidOperationException("Zlib: Insufficient memory in destination buffer"); | ||
} | ||
|
||
return SafeNativeMethods.bgzf_compress(destination, destLength, source, srcLength, _compressionLevel); | ||
} | ||
|
||
public int Decompress(byte[] source, int srcLength, byte[] destination, int destLength) | ||
{ | ||
if (destination == null) | ||
{ | ||
throw new InvalidOperationException("Zlib: Insufficient memory in destination buffer"); | ||
} | ||
|
||
return SafeNativeMethods.bgzf_decompress(destination, destLength, source, srcLength); | ||
} | ||
|
||
public int GetDecompressedLength(byte[] source, int srcLength) | ||
{ | ||
int pos = srcLength - 4; | ||
return source[pos + 3] << 24 | source[pos + 2] << 16 | source[pos + 1] << 8 | source[pos]; | ||
} | ||
|
||
public int GetCompressedBufferBounds(int srcLength) => (int)(srcLength * 1.06 + 28); | ||
|
||
private static class SafeNativeMethods | ||
{ | ||
[DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern int bgzf_decompress(byte[] uncompressedBlock, int uncompressedSize, byte[] compressedBlock, int compressedSize); | ||
|
||
[DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] | ||
public static extern int bgzf_compress(byte[] compressedBlock, int compressedLen, byte[] uncompressedBlock, int uncompressedLen, int compressionLevel); | ||
} | ||
} | ||
} |
Oops, something went wrong.