diff --git a/CacheUtils/CacheUtils.csproj b/CacheUtils/CacheUtils.csproj index a80455db..69c228c5 100644 --- a/CacheUtils/CacheUtils.csproj +++ b/CacheUtils/CacheUtils.csproj @@ -1,27 +1,27 @@ - - - Exe - netcoreapp2.1 - ..\bin\$(Configuration) - Full - - - - - PreserveNewest - - - - - - - - - - - - - - - + + + Exe + netcoreapp2.1 + ..\bin\$(Configuration) + Full + + + + + PreserveNewest + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/CacheUtils/IntermediateIO/MutableTranscriptReader.cs b/CacheUtils/IntermediateIO/MutableTranscriptReader.cs index 6344d337..27e02bdd 100644 --- a/CacheUtils/IntermediateIO/MutableTranscriptReader.cs +++ b/CacheUtils/IntermediateIO/MutableTranscriptReader.cs @@ -79,11 +79,7 @@ private void AddMutableContents(MutableTranscript mt) mt.NewStartExonPhase = mt.StartExonPhase < 0 ? (byte)0 : (byte)mt.StartExonPhase; if (mt.CodingRegion == null) return; - - var codingSequence = new CodingSequence(_sequence, mt.CodingRegion, mt.TranscriptRegions, - mt.Gene.OnReverseStrand, mt.NewStartExonPhase, mt.RnaEdits); - - mt.CdsLength = codingSequence.GetCodingSequence().Length; + mt.CdsLength = mt.CodingRegion.CdnaEnd - mt.CodingRegion.CdnaStart + 1; mt.CodingRegion = new CodingRegion(mt.CodingRegion.Start, mt.CodingRegion.End, mt.CodingRegion.CdnaStart, mt.CodingRegion.CdnaEnd, mt.CdsLength); diff --git a/CacheUtils/PredictionCache/PredictionUtilities.cs b/CacheUtils/PredictionCache/PredictionUtilities.cs index a0ae7331..10f7220d 100644 --- a/CacheUtils/PredictionCache/PredictionUtilities.cs +++ b/CacheUtils/PredictionCache/PredictionUtilities.cs @@ -34,7 +34,7 @@ internal static ITranscript UpdatePredictions(this ITranscript t, int siftIndex, return new Transcript(t.Chromosome, t.Start, t.End, t.Id, t.Translation, t.BioType, t.Gene, t.TotalExonLength, t.StartExonPhase, t.IsCanonical, t.TranscriptRegions, t.NumExons, t.MicroRnas, siftIndex, polyphenIndex, t.Source, t.CdsStartNotFound, t.CdsEndNotFound, - t.Selenocysteines, t.RnaEdits); + t.RnaEdits); } private static int GetNewIndex(IReadOnlyList oldPredictions, int index, diff --git a/CacheUtils/TranscriptCache/NSequence.cs b/CacheUtils/TranscriptCache/NSequence.cs index 5c32fccf..c9ec5a4c 100644 --- a/CacheUtils/TranscriptCache/NSequence.cs +++ b/CacheUtils/TranscriptCache/NSequence.cs @@ -1,12 +1,13 @@ - +using System; using Genome; namespace CacheUtils.TranscriptCache { public sealed class NSequence : ISequence { - public int Length { get; } = 1000; + public int Length => 1000; public string Substring(int offset, int length) => new string('N', length); public Band[] CytogeneticBands => null; + public string Sequence => throw new NotImplementedException(); } -} +} \ No newline at end of file diff --git a/CacheUtils/TranscriptCache/TranscriptConversionExtensions.cs b/CacheUtils/TranscriptCache/TranscriptConversionExtensions.cs index 7091d5b0..01f7c40c 100644 --- a/CacheUtils/TranscriptCache/TranscriptConversionExtensions.cs +++ b/CacheUtils/TranscriptCache/TranscriptConversionExtensions.cs @@ -28,7 +28,7 @@ private static ITranscript ToTranscript(this MutableTranscript mt) return new Transcript(mt.Chromosome, mt.Start, mt.End, CompactId.Convert(mt.Id, mt.Version), translation, mt.BioType, mt.UpdatedGene, mt.TotalExonLength, mt.NewStartExonPhase, mt.IsCanonical, mt.TranscriptRegions, (ushort) mt.Exons.Length, sortedMicroRnas, mt.SiftIndex, mt.PolyPhenIndex, - mt.Source, mt.CdsStartNotFound, mt.CdsEndNotFound, mt.SelenocysteinePositions, mt.RnaEdits); + mt.Source, mt.CdsStartNotFound, mt.CdsEndNotFound, mt.RnaEdits); } private static ITranslation GetTranslation(ICodingRegion oldCodingRegion, int cdsLength, CompactId proteinId, diff --git a/Cloud/Cloud.csproj b/Cloud/Cloud.csproj index a97cc2e5..abe6aba8 100644 --- a/Cloud/Cloud.csproj +++ b/Cloud/Cloud.csproj @@ -5,7 +5,7 @@ - + diff --git a/CommandLine/CommandLine.csproj b/CommandLine/CommandLine.csproj index 1a27ed6a..1b1b96d0 100644 --- a/CommandLine/CommandLine.csproj +++ b/CommandLine/CommandLine.csproj @@ -1,13 +1,13 @@ - - - netcoreapp2.1 - ..\bin\$(Configuration) - Full - - - - - - - + + + netcoreapp2.1 + ..\bin\$(Configuration) + Full + + + + + + + \ No newline at end of file diff --git a/Compression/Algorithms/QuickLZ.cs b/Compression/Algorithms/QuickLZ.cs index 93d2cbf0..e16fc782 100644 --- a/Compression/Algorithms/QuickLZ.cs +++ b/Compression/Algorithms/QuickLZ.cs @@ -1,49 +1,49 @@ -using System; -using System.Runtime.InteropServices; -using Compression.Utilities; - -namespace Compression.Algorithms -{ - public sealed class QuickLZ : ICompressionAlgorithm - { - private const int CompressionOverhead = 400; - - public QuickLZ() => LibraryUtilities.CheckLibrary(); - - public int Compress(byte[] source, int srcLength, byte[] destination, int destLength) - { - if (destination == null || GetCompressedBufferBounds(srcLength) > destination.Length) - { - throw new InvalidOperationException("QuickLZ: Insufficient memory in destination buffer"); - } - - return SafeNativeMethods.QuickLzCompress(source, srcLength, destination, destLength); - } - - public int Decompress(byte[] source, int srcLength, byte[] destination, int destLength) - { - if (destination == null) - { - throw new InvalidOperationException("QuickLZ: Insufficient memory in destination buffer"); - } - - return SafeNativeMethods.QuickLzDecompress(source, destination, destLength); - } - - public int GetDecompressedLength(byte[] source, int srcLength) => (int)SafeNativeMethods.qlz_size_decompressed(source); - - public int GetCompressedBufferBounds(int srcLength) => srcLength + CompressionOverhead; - - private static class SafeNativeMethods - { - [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] - public static extern long qlz_size_decompressed(byte[] bytes); - - [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] - public static extern int QuickLzCompress(byte[] source, int sourceLen, byte[] destination, int destinationLen); - - [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] - public static extern int QuickLzDecompress(byte[] source, byte[] destination, int destinationLen); - } - } -} +using System; +using System.Runtime.InteropServices; +using Compression.Utilities; + +namespace Compression.Algorithms +{ + public sealed class QuickLZ : ICompressionAlgorithm + { + private const int CompressionOverhead = 400; + + public QuickLZ() => LibraryUtilities.CheckLibrary(); + + public int Compress(byte[] source, int srcLength, byte[] destination, int destLength) + { + if (destination == null || GetCompressedBufferBounds(srcLength) > destination.Length) + { + throw new InvalidOperationException("QuickLZ: Insufficient memory in destination buffer"); + } + + return SafeNativeMethods.QuickLzCompress(source, srcLength, destination, destLength); + } + + public int Decompress(byte[] source, int srcLength, byte[] destination, int destLength) + { + if (destination == null) + { + throw new InvalidOperationException("QuickLZ: Insufficient memory in destination buffer"); + } + + return SafeNativeMethods.QuickLzDecompress(source, destination, destLength); + } + + public int GetDecompressedLength(byte[] source, int srcLength) => (int)SafeNativeMethods.qlz_size_decompressed(source); + + public int GetCompressedBufferBounds(int srcLength) => srcLength + CompressionOverhead; + + private static class SafeNativeMethods + { + [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] + public static extern long qlz_size_decompressed(byte[] bytes); + + [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] + public static extern int QuickLzCompress(byte[] source, int sourceLen, byte[] destination, int destinationLen); + + [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] + public static extern int QuickLzDecompress(byte[] source, byte[] destination, int destinationLen); + } + } +} diff --git a/Compression/Algorithms/Zlib.cs b/Compression/Algorithms/Zlib.cs index c07058d4..154aac6c 100644 --- a/Compression/Algorithms/Zlib.cs +++ b/Compression/Algorithms/Zlib.cs @@ -1,54 +1,54 @@ -using System; -using System.Runtime.InteropServices; -using Compression.Utilities; - -namespace Compression.Algorithms -{ - public sealed class Zlib : ICompressionAlgorithm - { - private readonly int _compressionLevel; - - public Zlib(int compressionLevel = 1) - { - _compressionLevel = compressionLevel; - LibraryUtilities.CheckLibrary(); - } - - public int Compress(byte[] source, int srcLength, byte[] destination, int destLength) - { - if (destination == null || GetCompressedBufferBounds(srcLength) > destination.Length) - { - throw new InvalidOperationException("Zlib: Insufficient memory in destination buffer"); - } - - return SafeNativeMethods.bgzf_compress(destination, destLength, source, srcLength, _compressionLevel); - } - - public int Decompress(byte[] source, int srcLength, byte[] destination, int destLength) - { - if (destination == null) - { - throw new InvalidOperationException("Zlib: Insufficient memory in destination buffer"); - } - - return SafeNativeMethods.bgzf_decompress(destination, destLength, source, srcLength); - } - - public int GetDecompressedLength(byte[] source, int srcLength) - { - int pos = srcLength - 4; - return source[pos + 3] << 24 | source[pos + 2] << 16 | source[pos + 1] << 8 | source[pos]; - } - - public int GetCompressedBufferBounds(int srcLength) => (int)(srcLength * 1.06 + 28); - - private static class SafeNativeMethods - { - [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] - public static extern int bgzf_decompress(byte[] uncompressedBlock, int uncompressedSize, byte[] compressedBlock, int compressedSize); - - [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] - public static extern int bgzf_compress(byte[] compressedBlock, int compressedLen, byte[] uncompressedBlock, int uncompressedLen, int compressionLevel); - } - } -} +using System; +using System.Runtime.InteropServices; +using Compression.Utilities; + +namespace Compression.Algorithms +{ + public sealed class Zlib : ICompressionAlgorithm + { + private readonly int _compressionLevel; + + public Zlib(int compressionLevel = 1) + { + _compressionLevel = compressionLevel; + LibraryUtilities.CheckLibrary(); + } + + public int Compress(byte[] source, int srcLength, byte[] destination, int destLength) + { + if (destination == null || GetCompressedBufferBounds(srcLength) > destination.Length) + { + throw new InvalidOperationException("Zlib: Insufficient memory in destination buffer"); + } + + return SafeNativeMethods.bgzf_compress(destination, destLength, source, srcLength, _compressionLevel); + } + + public int Decompress(byte[] source, int srcLength, byte[] destination, int destLength) + { + if (destination == null) + { + throw new InvalidOperationException("Zlib: Insufficient memory in destination buffer"); + } + + return SafeNativeMethods.bgzf_decompress(destination, destLength, source, srcLength); + } + + public int GetDecompressedLength(byte[] source, int srcLength) + { + int pos = srcLength - 4; + return source[pos + 3] << 24 | source[pos + 2] << 16 | source[pos + 1] << 8 | source[pos]; + } + + public int GetCompressedBufferBounds(int srcLength) => (int)(srcLength * 1.06 + 28); + + private static class SafeNativeMethods + { + [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] + public static extern int bgzf_decompress(byte[] uncompressedBlock, int uncompressedSize, byte[] compressedBlock, int compressedSize); + + [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] + public static extern int bgzf_compress(byte[] compressedBlock, int compressedLen, byte[] uncompressedBlock, int uncompressedLen, int compressionLevel); + } + } +} diff --git a/Compression/Algorithms/Zstandard.cs b/Compression/Algorithms/Zstandard.cs index 954457f2..bfe9eea1 100644 --- a/Compression/Algorithms/Zstandard.cs +++ b/Compression/Algorithms/Zstandard.cs @@ -1,54 +1,54 @@ -using System; -using System.Runtime.InteropServices; -using Compression.Utilities; - -namespace Compression.Algorithms -{ - public sealed class Zstandard : ICompressionAlgorithm - { - private readonly int _compressionLevel; - - public Zstandard(int compressionLevel = 17) - { - _compressionLevel = compressionLevel; - LibraryUtilities.CheckLibrary(); - } - - public int Compress(byte[] source, int srcLength, byte[] destination, int destLength) - { - if (destination == null || GetCompressedBufferBounds(srcLength) > destination.Length) - { - throw new InvalidOperationException("Zstandard: Insufficient memory in destination buffer"); - } - - return (int)SafeNativeMethods.ZSTD_compress(destination, (ulong)destLength, source, (ulong)srcLength, _compressionLevel); - } - - public int Decompress(byte[] source, int srcLength, byte[] destination, int destLength) - { - if (destination == null) - { - throw new InvalidOperationException("Zstandard: Insufficient memory in destination buffer"); - } - - return (int)SafeNativeMethods.ZSTD_decompress(destination, (ulong)destLength, source, (ulong)srcLength); - } - - public int GetDecompressedLength(byte[] source, int srcLength) => (int)SafeNativeMethods.ZSTD_getDecompressedSize(source, srcLength); - - // empirically derived via polynomial regression with additional padding added - public int GetCompressedBufferBounds(int srcLength) => srcLength + 32; - - private static class SafeNativeMethods - { - [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] - public static extern ulong ZSTD_compress(byte[] destination, ulong destinationLen, byte[] source, ulong sourceLen, int compressionLevel); - - [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] - public static extern ulong ZSTD_decompress(byte[] destination, ulong destinationLen, byte[] source, ulong sourceLen); - - [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] - public static extern ulong ZSTD_getDecompressedSize(byte[] source, int sourceLen); - } - } -} +using System; +using System.Runtime.InteropServices; +using Compression.Utilities; + +namespace Compression.Algorithms +{ + public sealed class Zstandard : ICompressionAlgorithm + { + private readonly int _compressionLevel; + + public Zstandard(int compressionLevel = 17) + { + _compressionLevel = compressionLevel; + LibraryUtilities.CheckLibrary(); + } + + public int Compress(byte[] source, int srcLength, byte[] destination, int destLength) + { + if (destination == null || GetCompressedBufferBounds(srcLength) > destination.Length) + { + throw new InvalidOperationException("Zstandard: Insufficient memory in destination buffer"); + } + + return (int)SafeNativeMethods.ZSTD_compress(destination, (ulong)destLength, source, (ulong)srcLength, _compressionLevel); + } + + public int Decompress(byte[] source, int srcLength, byte[] destination, int destLength) + { + if (destination == null) + { + throw new InvalidOperationException("Zstandard: Insufficient memory in destination buffer"); + } + + return (int)SafeNativeMethods.ZSTD_decompress(destination, (ulong)destLength, source, (ulong)srcLength); + } + + public int GetDecompressedLength(byte[] source, int srcLength) => (int)SafeNativeMethods.ZSTD_getDecompressedSize(source, srcLength); + + // empirically derived via polynomial regression with additional padding added + public int GetCompressedBufferBounds(int srcLength) => srcLength + 32; + + private static class SafeNativeMethods + { + [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] + public static extern ulong ZSTD_compress(byte[] destination, ulong destinationLen, byte[] source, ulong sourceLen, int compressionLevel); + + [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] + public static extern ulong ZSTD_decompress(byte[] destination, ulong destinationLen, byte[] source, ulong sourceLen); + + [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] + public static extern ulong ZSTD_getDecompressedSize(byte[] source, int sourceLen); + } + } +} diff --git a/Compression/Algorithms/ZstandardDict.cs b/Compression/Algorithms/ZstandardDict.cs index 471d646a..3220d5ae 100644 --- a/Compression/Algorithms/ZstandardDict.cs +++ b/Compression/Algorithms/ZstandardDict.cs @@ -1,62 +1,62 @@ -using System; -using System.Runtime.InteropServices; -using Compression.Utilities; - -namespace Compression.Algorithms -{ - public sealed class ZstandardDict : ICompressionAlgorithm - { - private readonly IntPtr _compressDict; - private readonly IntPtr _decompressDict; - private readonly IntPtr _compressContext; - private readonly IntPtr _decompressContext; - - public ZstandardDict(int compressionLevel, byte[] dictBuffer) - { - LibraryUtilities.CheckLibrary(); - - _compressDict = SafeNativeMethods.ZSTD_createCDict(dictBuffer, (ulong)dictBuffer.Length, compressionLevel); - _decompressDict = SafeNativeMethods.ZSTD_createDDict(dictBuffer, (ulong)dictBuffer.Length); - _compressContext = SafeNativeMethods.ZSTD_createCCtx(); - _decompressContext = SafeNativeMethods.ZSTD_createDCtx(); - } - - public int Compress(byte[] source, int srcLength, byte[] destination, int destLength) - { - if (destination == null) throw new InvalidOperationException("Zstandard: Insufficient memory in destination buffer"); - return (int)SafeNativeMethods.ZSTD_compress_usingCDict(_compressContext, destination, (ulong)destLength, source, (ulong)srcLength, _compressDict); - } - - public int Decompress(byte[] source, int srcLength, byte[] destination, int destLength) - { - if (destination == null) throw new InvalidOperationException("Zstandard: Insufficient memory in destination buffer"); - return (int)SafeNativeMethods.ZSTD_decompress_usingDDict(_decompressContext, destination, (ulong)destLength, source, (ulong)srcLength, _decompressDict); - } - - public int GetDecompressedLength(byte[] source, int srcLength) => throw new NotImplementedException(); - - // empirically derived via polynomial regression with additional padding added - public int GetCompressedBufferBounds(int srcLength) => srcLength + 32; - - private static class SafeNativeMethods - { - [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] - public static extern ulong ZSTD_compress_usingCDict(IntPtr cctx, byte[] destination, ulong destinationLen, byte[] source, ulong sourceLen, IntPtr cdict); - - [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] - public static extern ulong ZSTD_decompress_usingDDict(IntPtr dctx, byte[] destination, ulong destinationLen, byte[] source, ulong sourceLen, IntPtr ddict); - - [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] - public static extern IntPtr ZSTD_createCDict(byte[] dictBuffer, ulong dictSize, int compressionLevel); - - [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] - public static extern IntPtr ZSTD_createDDict(byte[] dictBuffer, ulong dictSize); - - [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] - public static extern IntPtr ZSTD_createCCtx(); - - [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] - public static extern IntPtr ZSTD_createDCtx(); - } - } -} +using System; +using System.Runtime.InteropServices; +using Compression.Utilities; + +namespace Compression.Algorithms +{ + public sealed class ZstandardDict : ICompressionAlgorithm + { + private readonly IntPtr _compressDict; + private readonly IntPtr _decompressDict; + private readonly IntPtr _compressContext; + private readonly IntPtr _decompressContext; + + public ZstandardDict(int compressionLevel, byte[] dictBuffer) + { + LibraryUtilities.CheckLibrary(); + + _compressDict = SafeNativeMethods.ZSTD_createCDict(dictBuffer, (ulong)dictBuffer.Length, compressionLevel); + _decompressDict = SafeNativeMethods.ZSTD_createDDict(dictBuffer, (ulong)dictBuffer.Length); + _compressContext = SafeNativeMethods.ZSTD_createCCtx(); + _decompressContext = SafeNativeMethods.ZSTD_createDCtx(); + } + + public int Compress(byte[] source, int srcLength, byte[] destination, int destLength) + { + if (destination == null) throw new InvalidOperationException("Zstandard: Insufficient memory in destination buffer"); + return (int)SafeNativeMethods.ZSTD_compress_usingCDict(_compressContext, destination, (ulong)destLength, source, (ulong)srcLength, _compressDict); + } + + public int Decompress(byte[] source, int srcLength, byte[] destination, int destLength) + { + if (destination == null) throw new InvalidOperationException("Zstandard: Insufficient memory in destination buffer"); + return (int)SafeNativeMethods.ZSTD_decompress_usingDDict(_decompressContext, destination, (ulong)destLength, source, (ulong)srcLength, _decompressDict); + } + + public int GetDecompressedLength(byte[] source, int srcLength) => throw new NotImplementedException(); + + // empirically derived via polynomial regression with additional padding added + public int GetCompressedBufferBounds(int srcLength) => srcLength + 32; + + private static class SafeNativeMethods + { + [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] + public static extern ulong ZSTD_compress_usingCDict(IntPtr cctx, byte[] destination, ulong destinationLen, byte[] source, ulong sourceLen, IntPtr cdict); + + [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] + public static extern ulong ZSTD_decompress_usingDDict(IntPtr dctx, byte[] destination, ulong destinationLen, byte[] source, ulong sourceLen, IntPtr ddict); + + [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] + public static extern IntPtr ZSTD_createCDict(byte[] dictBuffer, ulong dictSize, int compressionLevel); + + [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] + public static extern IntPtr ZSTD_createDDict(byte[] dictBuffer, ulong dictSize); + + [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] + public static extern IntPtr ZSTD_createCCtx(); + + [DllImport("BlockCompression", CallingConvention = CallingConvention.Cdecl)] + public static extern IntPtr ZSTD_createDCtx(); + } + } +} diff --git a/Compression/DataStructures/Block.cs b/Compression/DataStructures/Block.cs index 29c558bc..dbb50655 100644 --- a/Compression/DataStructures/Block.cs +++ b/Compression/DataStructures/Block.cs @@ -1,134 +1,134 @@ -using System; -using System.IO; -using Compression.Algorithms; -using Compression.FileHandling; -using ErrorHandling.Exceptions; - -namespace Compression.DataStructures -{ - public sealed class Block - { - private readonly ICompressionAlgorithm _compressionAlgorithm; - private readonly BlockHeader _header; - - private readonly byte[] _compressedBlock; - private readonly byte[] _uncompressedBlock; - - public long FileOffset { get; private set; } - public int Offset { get; internal set; } - - internal const int DefaultSize = 16777216; - private readonly int _size; - private readonly int _compressedBlockSize; - - public bool IsFull => Offset == _size; - public bool HasMoreData => Offset < _header.NumUncompressedBytes; - - public Block(ICompressionAlgorithm compressionAlgorithm, int size = DefaultSize) - { - _compressionAlgorithm = compressionAlgorithm; - Offset = 0; - _size = size; - - _uncompressedBlock = new byte[_size]; - _compressedBlockSize = compressionAlgorithm.GetCompressedBufferBounds(_size); - _compressedBlock = new byte[_compressedBlockSize]; - _header = new BlockHeader(); - } - - public int CopyTo(byte[] array, int offset, int count) - { - int copyLength = Math.Min(_size - Offset, count); - if (copyLength == 0) return 0; - - Buffer.BlockCopy(array, offset, _uncompressedBlock, Offset, copyLength); - Offset += copyLength; - - return copyLength; - } - - public int CopyFrom(byte[] array, int offset, int count) - { - int copyLength = Math.Min(_header.NumUncompressedBytes - Offset, count); - if (copyLength == 0) return 0; - - Buffer.BlockCopy(_uncompressedBlock, Offset, array, offset, copyLength); - Offset += copyLength; - - return copyLength; - } - - public void Write(Stream stream) - { - _header.NumUncompressedBytes = Offset; - - _header.NumCompressedBytes = _compressionAlgorithm.Compress(_uncompressedBlock, _header.NumUncompressedBytes, - _compressedBlock, _compressedBlockSize); - - if (_header.NumCompressedBytes > _header.NumUncompressedBytes) - { - _header.NumCompressedBytes = -1; - _header.Write(stream); - stream.Write(_uncompressedBlock, 0, _header.NumUncompressedBytes); - } - else - { - _header.Write(stream); - stream.Write(_compressedBlock, 0, _header.NumCompressedBytes); - } - - Offset = 0; - } - - public void WriteEof(Stream stream) - { - _header.NumUncompressedBytes = -1; - _header.NumCompressedBytes = -1; - _header.Write(stream); - } - - public int Read(Stream stream) - { - FileOffset = stream.Position; - - _header.Read(stream); - if (_header.IsEmpty) return -1; - - int numBytesRead = _header.NumCompressedBytes == -1 - ? ReadUncompressedBlock(stream) - : ReadCompressedBlock(stream); - - Offset = 0; - - return BlockHeader.HeaderSize + numBytesRead; - } - - private int ReadCompressedBlock(Stream stream) - { - int numBytesRead = stream.Read(_compressedBlock, 0, _header.NumCompressedBytes); - if (numBytesRead != _header.NumCompressedBytes) - { - throw new IOException($"Expected {_header.NumCompressedBytes} bytes from the block, but received only {numBytesRead} bytes."); - } - - int numUncompressedBytes = _compressionAlgorithm.Decompress(_compressedBlock, _header.NumCompressedBytes, _uncompressedBlock, _size); - if (numUncompressedBytes != _header.NumUncompressedBytes) - { - throw new CompressionException($"Expected {_header.NumUncompressedBytes} bytes after decompression, but found only {numUncompressedBytes} bytes."); - } - - return numBytesRead; - } - - private int ReadUncompressedBlock(Stream stream) - { - int numBytesRead = stream.Read(_uncompressedBlock, 0, _header.NumUncompressedBytes); - if (numBytesRead != _header.NumUncompressedBytes) - { - throw new IOException($"Expected {_header.NumUncompressedBytes} bytes from the uncompressed block, but received only {numBytesRead} bytes."); - } - - return numBytesRead; - } - } -} +using System; +using System.IO; +using Compression.Algorithms; +using Compression.FileHandling; +using ErrorHandling.Exceptions; + +namespace Compression.DataStructures +{ + public sealed class Block + { + private readonly ICompressionAlgorithm _compressionAlgorithm; + private readonly BlockHeader _header; + + private readonly byte[] _compressedBlock; + private readonly byte[] _uncompressedBlock; + + public long FileOffset { get; private set; } + public int Offset { get; internal set; } + + internal const int DefaultSize = 16777216; + private readonly int _size; + private readonly int _compressedBlockSize; + + public bool IsFull => Offset == _size; + public bool HasMoreData => Offset < _header.NumUncompressedBytes; + + public Block(ICompressionAlgorithm compressionAlgorithm, int size = DefaultSize) + { + _compressionAlgorithm = compressionAlgorithm; + Offset = 0; + _size = size; + + _uncompressedBlock = new byte[_size]; + _compressedBlockSize = compressionAlgorithm.GetCompressedBufferBounds(_size); + _compressedBlock = new byte[_compressedBlockSize]; + _header = new BlockHeader(); + } + + public int CopyTo(byte[] array, int offset, int count) + { + int copyLength = Math.Min(_size - Offset, count); + if (copyLength == 0) return 0; + + Buffer.BlockCopy(array, offset, _uncompressedBlock, Offset, copyLength); + Offset += copyLength; + + return copyLength; + } + + public int CopyFrom(byte[] array, int offset, int count) + { + int copyLength = Math.Min(_header.NumUncompressedBytes - Offset, count); + if (copyLength == 0) return 0; + + Buffer.BlockCopy(_uncompressedBlock, Offset, array, offset, copyLength); + Offset += copyLength; + + return copyLength; + } + + public void Write(Stream stream) + { + _header.NumUncompressedBytes = Offset; + + _header.NumCompressedBytes = _compressionAlgorithm.Compress(_uncompressedBlock, _header.NumUncompressedBytes, + _compressedBlock, _compressedBlockSize); + + if (_header.NumCompressedBytes > _header.NumUncompressedBytes) + { + _header.NumCompressedBytes = -1; + _header.Write(stream); + stream.Write(_uncompressedBlock, 0, _header.NumUncompressedBytes); + } + else + { + _header.Write(stream); + stream.Write(_compressedBlock, 0, _header.NumCompressedBytes); + } + + Offset = 0; + } + + public void WriteEof(Stream stream) + { + _header.NumUncompressedBytes = -1; + _header.NumCompressedBytes = -1; + _header.Write(stream); + } + + public int Read(Stream stream) + { + FileOffset = stream.Position; + + _header.Read(stream); + if (_header.IsEmpty) return -1; + + int numBytesRead = _header.NumCompressedBytes == -1 + ? ReadUncompressedBlock(stream) + : ReadCompressedBlock(stream); + + Offset = 0; + + return BlockHeader.HeaderSize + numBytesRead; + } + + private int ReadCompressedBlock(Stream stream) + { + int numBytesRead = stream.Read(_compressedBlock, 0, _header.NumCompressedBytes); + if (numBytesRead != _header.NumCompressedBytes) + { + throw new IOException($"Expected {_header.NumCompressedBytes} bytes from the block, but received only {numBytesRead} bytes."); + } + + int numUncompressedBytes = _compressionAlgorithm.Decompress(_compressedBlock, _header.NumCompressedBytes, _uncompressedBlock, _size); + if (numUncompressedBytes != _header.NumUncompressedBytes) + { + throw new CompressionException($"Expected {_header.NumUncompressedBytes} bytes after decompression, but found only {numUncompressedBytes} bytes."); + } + + return numBytesRead; + } + + private int ReadUncompressedBlock(Stream stream) + { + int numBytesRead = stream.Read(_uncompressedBlock, 0, _header.NumUncompressedBytes); + if (numBytesRead != _header.NumUncompressedBytes) + { + throw new IOException($"Expected {_header.NumUncompressedBytes} bytes from the uncompressed block, but received only {numBytesRead} bytes."); + } + + return numBytesRead; + } + } +} diff --git a/Compression/FileHandling/BgzipTextReader.cs b/Compression/FileHandling/BgzipTextReader.cs index bc495cb3..da19b859 100644 --- a/Compression/FileHandling/BgzipTextReader.cs +++ b/Compression/FileHandling/BgzipTextReader.cs @@ -1,42 +1,42 @@ -using System; -using System.IO; -using System.Reflection; -using System.Text; - -namespace Compression.FileHandling -{ - public sealed class BgzipTextReader : IDisposable - { - private readonly bool _leaveOpen; - private readonly StreamReader _reader; - private readonly FieldInfo _charPosInfo; - private readonly FieldInfo _charLenInfo; - - public BgzipTextReader(BlockGZipStream stream, bool leaveOpen = false) - { - _leaveOpen = leaveOpen; - _reader = new StreamReader(stream, Encoding.UTF8, leaveOpen); - - Type readerType = _reader.GetType(); - _charPosInfo = readerType.GetField("_charPos", BindingFlags.NonPublic | BindingFlags.Instance); - _charLenInfo = readerType.GetField("_charLen", BindingFlags.NonPublic | BindingFlags.Instance); - } - - public long Position - { - get - { - var bufferPos = (int)_charPosInfo.GetValue(_reader); - var bufferSize = (int)_charLenInfo.GetValue(_reader); - return _reader.BaseStream.Position - bufferSize + bufferPos; - } - } - - public string ReadLine() => _reader.ReadLine(); - - public void Dispose() - { - if (!_leaveOpen) _reader.Dispose(); - } - } +using System; +using System.IO; +using System.Reflection; +using System.Text; + +namespace Compression.FileHandling +{ + public sealed class BgzipTextReader : IDisposable + { + private readonly bool _leaveOpen; + private readonly StreamReader _reader; + private readonly FieldInfo _charPosInfo; + private readonly FieldInfo _charLenInfo; + + public BgzipTextReader(BlockGZipStream stream, bool leaveOpen = false) + { + _leaveOpen = leaveOpen; + _reader = new StreamReader(stream, Encoding.UTF8, leaveOpen); + + Type readerType = _reader.GetType(); + _charPosInfo = readerType.GetField("_charPos", BindingFlags.NonPublic | BindingFlags.Instance); + _charLenInfo = readerType.GetField("_charLen", BindingFlags.NonPublic | BindingFlags.Instance); + } + + public long Position + { + get + { + var bufferPos = (int)_charPosInfo.GetValue(_reader); + var bufferSize = (int)_charLenInfo.GetValue(_reader); + return _reader.BaseStream.Position - bufferSize + bufferPos; + } + } + + public string ReadLine() => _reader.ReadLine(); + + public void Dispose() + { + if (!_leaveOpen) _reader.Dispose(); + } + } } \ No newline at end of file diff --git a/Compression/FileHandling/BgzipTextWriter.cs b/Compression/FileHandling/BgzipTextWriter.cs index 00e33893..19e56fad 100644 --- a/Compression/FileHandling/BgzipTextWriter.cs +++ b/Compression/FileHandling/BgzipTextWriter.cs @@ -1,74 +1,74 @@ -using System; -using System.IO; -using System.Text; - -namespace Compression.FileHandling -{ - public sealed class BgzipTextWriter : StreamWriter, IDisposable - { - private readonly BlockGZipStream _stream; - private readonly byte[] _buffer; - private int _bufferIndex; - private const int BufferSize = BlockGZipStream.BlockGZipFormatCommon.BlockSize; - - private static readonly UTF8Encoding Utf8WithoutBom = new UTF8Encoding(false); - - public long Position => _stream.Position + _bufferIndex; - - public BgzipTextWriter(BlockGZipStream stream) : base(stream, Utf8WithoutBom, BufferSize, true) - { - _buffer = new byte[BufferSize]; - _stream = stream; - } - - public override void Flush() - { - if (_bufferIndex == 0) return; - _stream.Write(_buffer, 0, _bufferIndex); - _bufferIndex = 0; - } - - public override void WriteLine() => Write("\n"); - - public override void WriteLine(string value) => Write(value + "\n"); - - public override void Write(string value) - { - if (string.IsNullOrEmpty(value)) return; - var lineBytes = Encoding.UTF8.GetBytes(value); - - if (lineBytes.Length <= BufferSize - _bufferIndex) - { - Array.Copy(lineBytes, 0, _buffer, _bufferIndex, lineBytes.Length); - _bufferIndex += lineBytes.Length; - } - else - { - // fill up the buffer - Array.Copy(lineBytes, 0, _buffer, _bufferIndex, BufferSize - _bufferIndex); - int lineIndex = BufferSize - _bufferIndex; - - // write it out to the stream - _stream.Write(_buffer, 0, BufferSize); - _bufferIndex = 0; - - while (lineIndex + BufferSize <= lineBytes.Length) - { - _stream.Write(lineBytes, lineIndex, BufferSize); - lineIndex += BufferSize; - } - - // the leftover bytes should be saved in buffer - if (lineIndex >= lineBytes.Length) return; - Array.Copy(lineBytes, lineIndex, _buffer, 0, lineBytes.Length - lineIndex); - _bufferIndex = lineBytes.Length - lineIndex; - } - } - - public new void Dispose() - { - Flush(); - _stream.Dispose(); - } - } +using System; +using System.IO; +using System.Text; + +namespace Compression.FileHandling +{ + public sealed class BgzipTextWriter : StreamWriter, IDisposable + { + private readonly BlockGZipStream _stream; + private readonly byte[] _buffer; + private int _bufferIndex; + private const int BufferSize = BlockGZipStream.BlockGZipFormatCommon.BlockSize; + + private static readonly UTF8Encoding Utf8WithoutBom = new UTF8Encoding(false); + + public long Position => _stream.Position + _bufferIndex; + + public BgzipTextWriter(BlockGZipStream stream) : base(stream, Utf8WithoutBom, BufferSize, true) + { + _buffer = new byte[BufferSize]; + _stream = stream; + } + + public override void Flush() + { + if (_bufferIndex == 0) return; + _stream.Write(_buffer, 0, _bufferIndex); + _bufferIndex = 0; + } + + public override void WriteLine() => Write("\n"); + + public override void WriteLine(string value) => Write(value + "\n"); + + public override void Write(string value) + { + if (string.IsNullOrEmpty(value)) return; + var lineBytes = Encoding.UTF8.GetBytes(value); + + if (lineBytes.Length <= BufferSize - _bufferIndex) + { + Array.Copy(lineBytes, 0, _buffer, _bufferIndex, lineBytes.Length); + _bufferIndex += lineBytes.Length; + } + else + { + // fill up the buffer + Array.Copy(lineBytes, 0, _buffer, _bufferIndex, BufferSize - _bufferIndex); + int lineIndex = BufferSize - _bufferIndex; + + // write it out to the stream + _stream.Write(_buffer, 0, BufferSize); + _bufferIndex = 0; + + while (lineIndex + BufferSize <= lineBytes.Length) + { + _stream.Write(lineBytes, lineIndex, BufferSize); + lineIndex += BufferSize; + } + + // the leftover bytes should be saved in buffer + if (lineIndex >= lineBytes.Length) return; + Array.Copy(lineBytes, lineIndex, _buffer, 0, lineBytes.Length - lineIndex); + _bufferIndex = lineBytes.Length - lineIndex; + } + } + + public new void Dispose() + { + Flush(); + _stream.Dispose(); + } + } } \ No newline at end of file diff --git a/Compression/FileHandling/BlockGZipStream.cs b/Compression/FileHandling/BlockGZipStream.cs index b648f987..3fa3e13f 100644 --- a/Compression/FileHandling/BlockGZipStream.cs +++ b/Compression/FileHandling/BlockGZipStream.cs @@ -1,269 +1,269 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.IO.Compression; -using Compression.Algorithms; -using ErrorHandling.Exceptions; - -namespace Compression.FileHandling -{ - // BGZF/GZIP header (specialized from RFC 1952; little endian): - // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - // | 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN| - // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ - - // BGZF/GZIP footer: - // +---+---+---+---+---+---+---+---+ - // | CRC| Source len| - // +---+---+---+---+---+---+---+---+ - - public sealed class BlockGZipStream : Stream - { - private readonly byte[] _compressedBlock; - private readonly byte[] _uncompressedBlock; - private int _blockOffset; - private int _blockLength; - private long _blockAddress; - - private readonly bool _isCompressor; - private readonly bool _leaveStreamOpen; - - private readonly string _filePath; - private Stream _stream; - private readonly Zlib _bgzf; - private bool _isDisposed; - - public static class BlockGZipFormatCommon - { - public const int BlockSize = 65280; - public const int MaxBlockSize = 65536; - public const int BlockHeaderLength = 18; - } - - #region Stream - - public override bool CanRead => _stream != null && !_isCompressor && _stream.CanRead; - - public override bool CanWrite => _stream != null && _isCompressor && _stream.CanWrite; - - public override bool CanSeek => _stream != null && !_isCompressor && _stream.CanSeek; - - public override long Length => throw new NotSupportedException(); - - public override long Position - { - get => (_blockAddress << 16) | ((long)_blockOffset & 0xffff); - set => SeekVirtualFilePointer((ulong)value); - } - - public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); - - public override void SetLength(long value) => throw new NotSupportedException(); - - public override void Flush() => _stream.Flush(); - - protected override void Dispose(bool disposing) - { - if (_isDisposed) return; - - try - { - if (_isCompressor) - { - Flush(_blockOffset); - - // write an empty block (as EOF marker) - Flush(0); - } - - if (!_leaveStreamOpen) - { - _stream.Dispose(); - _stream = null; - } - - _isDisposed = true; - } - finally - { - base.Dispose(disposing); - } - } - - #endregion - - public BlockGZipStream(Stream stream, CompressionMode compressionMode, bool leaveStreamOpen = false, int compressionLevel = 1) - { - _filePath = "(stream)"; - _leaveStreamOpen = leaveStreamOpen; - _stream = stream; - - // sanity check: make sure the stream exists - if (stream == null) throw new ArgumentNullException(nameof(stream)); - - // sanity check: make sure we can use the stream for reading or writing - _isCompressor = compressionMode == CompressionMode.Compress; - if (_isCompressor && !_stream.CanWrite) throw new CompressionException("A stream lacking write capability was provided to the block GZip compressor."); - if (!_isCompressor && !_stream.CanRead) throw new CompressionException("A stream lacking read capability was provided to the block GZip decompressor."); - - _bgzf = new Zlib(compressionLevel); - _uncompressedBlock = new byte[BlockGZipFormatCommon.MaxBlockSize]; - _compressedBlock = new byte[_bgzf.GetCompressedBufferBounds(BlockGZipFormatCommon.MaxBlockSize)]; - } - - private void Flush(int uncompressedSize) - { - int blockLength = _bgzf.Compress(_uncompressedBlock, uncompressedSize, _compressedBlock, BlockGZipFormatCommon.MaxBlockSize); - _blockOffset = 0; - - _stream.Write(_compressedBlock, 0, blockLength); - _blockAddress = _stream.Position; - } - - private static bool HasValidHeader(int numHeaderBytes, IReadOnlyList header) - { - if (numHeaderBytes != BlockGZipFormatCommon.BlockHeaderLength) return false; - - return header[0] == 31 && - header[1] == 139 && - header[2] == 8 && - (header[3] & 4) != 0 && - header[12] == 66 && - header[13] == 67; - } - - private void ReadBlock() - { - long blockAddress = _stream.CanSeek ? _stream.Position : 0; - int count = _stream.Read(_compressedBlock, 0, BlockGZipFormatCommon.BlockHeaderLength); - - // handle the case where no data was read - if (count == 0) - { - _blockLength = 0; - return; - } - - // check the header - if (!HasValidHeader(count, _compressedBlock)) - { - throw new CompressionException($"Found an invalid header when reading the GZip block ({_filePath})"); - } - - int blockLength = BitConverter.ToUInt16(_compressedBlock, 16) + 1; - int remaining = blockLength - BlockGZipFormatCommon.BlockHeaderLength; - - count = _stream.Read(_compressedBlock, BlockGZipFormatCommon.BlockHeaderLength, remaining); - - // handle unexpected truncation - if (count != remaining) - { - throw new CompressionException($"Found unexpected truncation when reading the GZip block ({_filePath})"); - } - - count = _bgzf.Decompress(_compressedBlock, blockLength, _uncompressedBlock, BlockGZipFormatCommon.MaxBlockSize); - - if (count < 0) - { - throw new CompressionException($"Encountered an error when uncompressing the GZip block ({_filePath})"); - } - - // Do not reset offset if this read follows a seek - if (_blockLength != 0) _blockOffset = 0; - - _blockAddress = blockAddress; - _blockLength = count; - } - - public override int Read(byte[] buffer, int offset, int count) - { - if (_isCompressor) throw new CompressionException("Tried to read data from a compression BlockGZipStream."); - - if (count == 0) return 0; - - var numBytesRead = 0; - int dataOffset = offset; - - while (numBytesRead < count) - { - int numBytesAvailable = _blockLength - _blockOffset; - - if (numBytesAvailable <= 0) - { - ReadBlock(); - numBytesAvailable = _blockLength - _blockOffset; - if (numBytesAvailable <= 0) break; - } - - int copyLength = Math.Min(count - numBytesRead, numBytesAvailable); - Buffer.BlockCopy(_uncompressedBlock, _blockOffset, buffer, dataOffset, copyLength); - - _blockOffset += copyLength; - dataOffset += copyLength; - numBytesRead += copyLength; - } - - // ReSharper disable once InvertIf - if (_blockOffset == _blockLength) - { - _blockAddress = _stream.CanSeek ? _stream.Position : 0; - _blockOffset = _blockLength = 0; - } - - return numBytesRead; - } - - public override void Write(byte[] buffer, int offset, int count) - { - if (!_isCompressor) throw new CompressionException("Tried to write data to a decompression BlockGZipStream."); - - var numBytesWritten = 0; - int dataOffset = offset; - - // copy the data to the buffer - while (numBytesWritten < count) - { - int copyLength = Math.Min(BlockGZipFormatCommon.BlockSize - _blockOffset, count - numBytesWritten); - Buffer.BlockCopy(buffer, dataOffset, _uncompressedBlock, _blockOffset, copyLength); - - _blockOffset += copyLength; - dataOffset += copyLength; - numBytesWritten += copyLength; - - if (_blockOffset == BlockGZipFormatCommon.BlockSize) Flush(_blockOffset); - } - } - - private void SeekVirtualFilePointer(ulong virtualPosition) - { - long compressedOffset = GetCompressedOffset(virtualPosition); - int uncompressedOffset = GetUncompressedOffset(virtualPosition); - - // if we're already in the right block, no need to reload buffer. - if (_blockAddress != compressedOffset) - { - _blockAddress = compressedOffset; - _stream.Position = _blockAddress; - ReadBlock(); - } - - _blockOffset = uncompressedOffset; - } - - private static long GetCompressedOffset(ulong virtualPosition) - { - unchecked - { - return (long)((virtualPosition >> 16) & 0xFFFFFFFFFFFFL); - } - } - - private static int GetUncompressedOffset(ulong virtualPosition) - { - unchecked - { - return (int)(virtualPosition & 0xffff); - } - } - } -} +using System; +using System.Collections.Generic; +using System.IO; +using System.IO.Compression; +using Compression.Algorithms; +using ErrorHandling.Exceptions; + +namespace Compression.FileHandling +{ + // BGZF/GZIP header (specialized from RFC 1952; little endian): + // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ + // | 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN| + // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ + + // BGZF/GZIP footer: + // +---+---+---+---+---+---+---+---+ + // | CRC| Source len| + // +---+---+---+---+---+---+---+---+ + + public sealed class BlockGZipStream : Stream + { + private readonly byte[] _compressedBlock; + private readonly byte[] _uncompressedBlock; + private int _blockOffset; + private int _blockLength; + private long _blockAddress; + + private readonly bool _isCompressor; + private readonly bool _leaveStreamOpen; + + private readonly string _filePath; + private Stream _stream; + private readonly Zlib _bgzf; + private bool _isDisposed; + + public static class BlockGZipFormatCommon + { + public const int BlockSize = 65280; + public const int MaxBlockSize = 65536; + public const int BlockHeaderLength = 18; + } + + #region Stream + + public override bool CanRead => _stream != null && !_isCompressor && _stream.CanRead; + + public override bool CanWrite => _stream != null && _isCompressor && _stream.CanWrite; + + public override bool CanSeek => _stream != null && !_isCompressor && _stream.CanSeek; + + public override long Length => throw new NotSupportedException(); + + public override long Position + { + get => (_blockAddress << 16) | ((long)_blockOffset & 0xffff); + set => SeekVirtualFilePointer((ulong)value); + } + + public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); + + public override void SetLength(long value) => throw new NotSupportedException(); + + public override void Flush() => _stream.Flush(); + + protected override void Dispose(bool disposing) + { + if (_isDisposed) return; + + try + { + if (_isCompressor) + { + Flush(_blockOffset); + + // write an empty block (as EOF marker) + Flush(0); + } + + if (!_leaveStreamOpen) + { + _stream.Dispose(); + _stream = null; + } + + _isDisposed = true; + } + finally + { + base.Dispose(disposing); + } + } + + #endregion + + public BlockGZipStream(Stream stream, CompressionMode compressionMode, bool leaveStreamOpen = false, int compressionLevel = 1) + { + _filePath = "(stream)"; + _leaveStreamOpen = leaveStreamOpen; + _stream = stream; + + // sanity check: make sure the stream exists + if (stream == null) throw new ArgumentNullException(nameof(stream)); + + // sanity check: make sure we can use the stream for reading or writing + _isCompressor = compressionMode == CompressionMode.Compress; + if (_isCompressor && !_stream.CanWrite) throw new CompressionException("A stream lacking write capability was provided to the block GZip compressor."); + if (!_isCompressor && !_stream.CanRead) throw new CompressionException("A stream lacking read capability was provided to the block GZip decompressor."); + + _bgzf = new Zlib(compressionLevel); + _uncompressedBlock = new byte[BlockGZipFormatCommon.MaxBlockSize]; + _compressedBlock = new byte[_bgzf.GetCompressedBufferBounds(BlockGZipFormatCommon.MaxBlockSize)]; + } + + private void Flush(int uncompressedSize) + { + int blockLength = _bgzf.Compress(_uncompressedBlock, uncompressedSize, _compressedBlock, BlockGZipFormatCommon.MaxBlockSize); + _blockOffset = 0; + + _stream.Write(_compressedBlock, 0, blockLength); + _blockAddress = _stream.Position; + } + + private static bool HasValidHeader(int numHeaderBytes, IReadOnlyList header) + { + if (numHeaderBytes != BlockGZipFormatCommon.BlockHeaderLength) return false; + + return header[0] == 31 && + header[1] == 139 && + header[2] == 8 && + (header[3] & 4) != 0 && + header[12] == 66 && + header[13] == 67; + } + + private void ReadBlock() + { + long blockAddress = _stream.CanSeek ? _stream.Position : 0; + int count = _stream.Read(_compressedBlock, 0, BlockGZipFormatCommon.BlockHeaderLength); + + // handle the case where no data was read + if (count == 0) + { + _blockLength = 0; + return; + } + + // check the header + if (!HasValidHeader(count, _compressedBlock)) + { + throw new CompressionException($"Found an invalid header when reading the GZip block ({_filePath})"); + } + + int blockLength = BitConverter.ToUInt16(_compressedBlock, 16) + 1; + int remaining = blockLength - BlockGZipFormatCommon.BlockHeaderLength; + + count = _stream.Read(_compressedBlock, BlockGZipFormatCommon.BlockHeaderLength, remaining); + + // handle unexpected truncation + if (count != remaining) + { + throw new CompressionException($"Found unexpected truncation when reading the GZip block ({_filePath})"); + } + + count = _bgzf.Decompress(_compressedBlock, blockLength, _uncompressedBlock, BlockGZipFormatCommon.MaxBlockSize); + + if (count < 0) + { + throw new CompressionException($"Encountered an error when uncompressing the GZip block ({_filePath})"); + } + + // Do not reset offset if this read follows a seek + if (_blockLength != 0) _blockOffset = 0; + + _blockAddress = blockAddress; + _blockLength = count; + } + + public override int Read(byte[] buffer, int offset, int count) + { + if (_isCompressor) throw new CompressionException("Tried to read data from a compression BlockGZipStream."); + + if (count == 0) return 0; + + var numBytesRead = 0; + int dataOffset = offset; + + while (numBytesRead < count) + { + int numBytesAvailable = _blockLength - _blockOffset; + + if (numBytesAvailable <= 0) + { + ReadBlock(); + numBytesAvailable = _blockLength - _blockOffset; + if (numBytesAvailable <= 0) break; + } + + int copyLength = Math.Min(count - numBytesRead, numBytesAvailable); + Buffer.BlockCopy(_uncompressedBlock, _blockOffset, buffer, dataOffset, copyLength); + + _blockOffset += copyLength; + dataOffset += copyLength; + numBytesRead += copyLength; + } + + // ReSharper disable once InvertIf + if (_blockOffset == _blockLength) + { + _blockAddress = _stream.CanSeek ? _stream.Position : 0; + _blockOffset = _blockLength = 0; + } + + return numBytesRead; + } + + public override void Write(byte[] buffer, int offset, int count) + { + if (!_isCompressor) throw new CompressionException("Tried to write data to a decompression BlockGZipStream."); + + var numBytesWritten = 0; + int dataOffset = offset; + + // copy the data to the buffer + while (numBytesWritten < count) + { + int copyLength = Math.Min(BlockGZipFormatCommon.BlockSize - _blockOffset, count - numBytesWritten); + Buffer.BlockCopy(buffer, dataOffset, _uncompressedBlock, _blockOffset, copyLength); + + _blockOffset += copyLength; + dataOffset += copyLength; + numBytesWritten += copyLength; + + if (_blockOffset == BlockGZipFormatCommon.BlockSize) Flush(_blockOffset); + } + } + + private void SeekVirtualFilePointer(ulong virtualPosition) + { + long compressedOffset = GetCompressedOffset(virtualPosition); + int uncompressedOffset = GetUncompressedOffset(virtualPosition); + + // if we're already in the right block, no need to reload buffer. + if (_blockAddress != compressedOffset) + { + _blockAddress = compressedOffset; + _stream.Position = _blockAddress; + ReadBlock(); + } + + _blockOffset = uncompressedOffset; + } + + private static long GetCompressedOffset(ulong virtualPosition) + { + unchecked + { + return (long)((virtualPosition >> 16) & 0xFFFFFFFFFFFFL); + } + } + + private static int GetUncompressedOffset(ulong virtualPosition) + { + unchecked + { + return (int)(virtualPosition & 0xffff); + } + } + } +} diff --git a/Compression/FileHandling/BlockHeader.cs b/Compression/FileHandling/BlockHeader.cs index 5387edf0..69d430d0 100644 --- a/Compression/FileHandling/BlockHeader.cs +++ b/Compression/FileHandling/BlockHeader.cs @@ -1,59 +1,59 @@ -using System.IO; -using ErrorHandling.Exceptions; - -namespace Compression.FileHandling -{ - public sealed class BlockHeader - { - private readonly byte[] _header; - - public const int HeaderSize = 12; - private const int HeaderId = -822411574; // cafeface - - public int NumUncompressedBytes; - public int NumCompressedBytes; - - public bool IsEmpty => NumUncompressedBytes == -1 && NumCompressedBytes == -1; - - public BlockHeader() => _header = new byte[HeaderSize]; - - private int GetInt(int offset) => _header[offset] | _header[offset + 1] << 8 | _header[offset + 2] << 16 | - _header[offset + 3] << 24; - - public void Read(Stream stream) - { - int numBytesRead = stream.Read(_header, 0, HeaderSize); - - if (numBytesRead == 0) - { - NumUncompressedBytes = -1; - NumCompressedBytes = -1; - return; - } - - if (numBytesRead != HeaderSize) throw new IOException($"Expected {HeaderSize} bytes from the block header, but received only {numBytesRead} bytes."); - - int headerId = GetInt(0); - if (headerId != HeaderId) throw new CompressionException($"Expected the header ID ({HeaderId}), but found the following: {headerId}"); - - NumUncompressedBytes = GetInt(4); - NumCompressedBytes = GetInt(8); - } - - private void SetInt(int value, int offset) - { - _header[offset] = (byte)value; - _header[offset + 1] = (byte)(value >> 8); - _header[offset + 2] = (byte)(value >> 16); - _header[offset + 3] = (byte)(value >> 24); - } - - public void Write(Stream stream) - { - SetInt(HeaderId, 0); - SetInt(NumUncompressedBytes, 4); - SetInt(NumCompressedBytes, 8); - stream.Write(_header, 0, HeaderSize); - } - } -} +using System.IO; +using ErrorHandling.Exceptions; + +namespace Compression.FileHandling +{ + public sealed class BlockHeader + { + private readonly byte[] _header; + + public const int HeaderSize = 12; + private const int HeaderId = -822411574; // cafeface + + public int NumUncompressedBytes; + public int NumCompressedBytes; + + public bool IsEmpty => NumUncompressedBytes == -1 && NumCompressedBytes == -1; + + public BlockHeader() => _header = new byte[HeaderSize]; + + private int GetInt(int offset) => _header[offset] | _header[offset + 1] << 8 | _header[offset + 2] << 16 | + _header[offset + 3] << 24; + + public void Read(Stream stream) + { + int numBytesRead = stream.Read(_header, 0, HeaderSize); + + if (numBytesRead == 0) + { + NumUncompressedBytes = -1; + NumCompressedBytes = -1; + return; + } + + if (numBytesRead != HeaderSize) throw new IOException($"Expected {HeaderSize} bytes from the block header, but received only {numBytesRead} bytes."); + + int headerId = GetInt(0); + if (headerId != HeaderId) throw new CompressionException($"Expected the header ID ({HeaderId}), but found the following: {headerId}"); + + NumUncompressedBytes = GetInt(4); + NumCompressedBytes = GetInt(8); + } + + private void SetInt(int value, int offset) + { + _header[offset] = (byte)value; + _header[offset + 1] = (byte)(value >> 8); + _header[offset + 2] = (byte)(value >> 16); + _header[offset + 3] = (byte)(value >> 24); + } + + public void Write(Stream stream) + { + SetInt(HeaderId, 0); + SetInt(NumUncompressedBytes, 4); + SetInt(NumCompressedBytes, 8); + stream.Write(_header, 0, HeaderSize); + } + } +} diff --git a/Compression/FileHandling/BlockStream.cs b/Compression/FileHandling/BlockStream.cs index a9627809..c363e6cd 100644 --- a/Compression/FileHandling/BlockStream.cs +++ b/Compression/FileHandling/BlockStream.cs @@ -1,175 +1,175 @@ -using System; -using System.IO; -using System.IO.Compression; -using System.Text; -using Compression.Algorithms; -using Compression.DataStructures; -using ErrorHandling.Exceptions; - -namespace Compression.FileHandling -{ - public sealed class BlockStream : Stream - { - private readonly bool _isCompressor; - private readonly bool _leaveStreamOpen; - - private Stream _stream; - private BinaryWriter _writer; - private Action _headerWrite; - - private readonly Block _block; - private bool _foundEof; - private bool _isDisposed; - - #region Stream - - public override bool CanRead => _stream.CanRead; - public override bool CanWrite => _stream.CanWrite; - public override bool CanSeek => _stream.CanSeek; - public override long Length => throw new NotSupportedException(); - public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); - public override void SetLength(long value) => throw new NotSupportedException(); - - public override long Position - { - get => _stream.Position; - set => throw new NotSupportedException(); - } - - public override void Flush() - { - if (_block.Offset > 0) _block.Write(_stream); - } - - protected override void Dispose(bool disposing) - { - if (_isDisposed) return; - - try - { - if (_isCompressor) - { - Flush(); - _block.WriteEof(_stream); - - // update the header - if (_headerWrite != null) - { - _stream.Position = 0; - _headerWrite(_writer); - } - - _writer.Dispose(); - _writer = null; - } - - if (!_leaveStreamOpen) - { - _stream.Dispose(); - _stream = null; - } - - _isDisposed = true; - } - finally - { - base.Dispose(disposing); - } - } - - #endregion - - public BlockStream(ICompressionAlgorithm compressionAlgorithm, Stream stream, CompressionMode compressionMode, - bool leaveStreamOpen = false, int size = 16777216) - { - _stream = stream ?? throw new ArgumentNullException(nameof(stream)); - _isCompressor = compressionMode == CompressionMode.Compress; - _leaveStreamOpen = leaveStreamOpen; - _block = new Block(compressionAlgorithm, size); - - // sanity check: make sure we can use the stream for reading or writing - if (_isCompressor && !_stream.CanWrite) throw new ArgumentException("A stream lacking write capability was provided to the block GZip compressor."); - if (!_isCompressor && !_stream.CanRead) throw new ArgumentException("A stream lacking read capability was provided to the block GZip decompressor."); - - if (_isCompressor) _writer = new BinaryWriter(_stream, Encoding.UTF8, true); - } - - public void WriteHeader(Action headerWrite) - { - _headerWrite = headerWrite; - _headerWrite(_writer); - } - - public override int Read(byte[] buffer, int offset, int count) - { - if (_foundEof) return 0; - if (_isCompressor) throw new CompressionException("Tried to read data from a compression BlockGZipStream."); - - ValidateParameters(buffer, offset, count); - - var numBytesRead = 0; - int dataOffset = offset; - - while (numBytesRead < count) - { - if (!_block.HasMoreData) - { - int numBytes = _block.Read(_stream); - - if (numBytes == -1) - { - _foundEof = true; - return numBytesRead; - } - } - - int copyLength = _block.CopyFrom(buffer, dataOffset, count - numBytesRead); - - dataOffset += copyLength; - numBytesRead += copyLength; - } - - return numBytesRead; - } - - private void ValidateParameters(byte[] array, int offset, int count) - { - if (array == null) throw new ArgumentNullException(nameof(array)); - if (offset < 0) throw new ArgumentOutOfRangeException(nameof(offset)); - if (count < 0) throw new ArgumentOutOfRangeException(nameof(count)); - if (array.Length - offset < count) throw new ArgumentException("Invalid Argument Offset Count"); - } - - public override void Write(byte[] buffer, int offset, int count) - { - if (!_isCompressor) throw new CompressionException("Tried to write data to a decompression BlockGZipStream."); - - ValidateParameters(buffer, offset, count); - - var numBytesWritten = 0; - int dataOffset = offset; - - while (numBytesWritten < count) - { - int copyLength = _block.CopyTo(buffer, dataOffset, count - numBytesWritten); - dataOffset += copyLength; - numBytesWritten += copyLength; - if (_block.IsFull) _block.Write(_stream); - } - } - - public (long FileOffset, int InternalOffset) GetBlockPosition() => (_stream.Position, _block.Offset); - - public void SetBlockPosition(long fileOffset, int internalOffset = 0) - { - if (fileOffset != _block.FileOffset) - { - _stream.Position = fileOffset; - _block.Read(_stream); - } - - _foundEof = false; - _block.Offset = internalOffset; - } - } -} +using System; +using System.IO; +using System.IO.Compression; +using System.Text; +using Compression.Algorithms; +using Compression.DataStructures; +using ErrorHandling.Exceptions; + +namespace Compression.FileHandling +{ + public sealed class BlockStream : Stream + { + private readonly bool _isCompressor; + private readonly bool _leaveStreamOpen; + + private Stream _stream; + private BinaryWriter _writer; + private Action _headerWrite; + + private readonly Block _block; + private bool _foundEof; + private bool _isDisposed; + + #region Stream + + public override bool CanRead => _stream.CanRead; + public override bool CanWrite => _stream.CanWrite; + public override bool CanSeek => _stream.CanSeek; + public override long Length => throw new NotSupportedException(); + public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); + public override void SetLength(long value) => throw new NotSupportedException(); + + public override long Position + { + get => _stream.Position; + set => throw new NotSupportedException(); + } + + public override void Flush() + { + if (_block.Offset > 0) _block.Write(_stream); + } + + protected override void Dispose(bool disposing) + { + if (_isDisposed) return; + + try + { + if (_isCompressor) + { + Flush(); + _block.WriteEof(_stream); + + // update the header + if (_headerWrite != null) + { + _stream.Position = 0; + _headerWrite(_writer); + } + + _writer.Dispose(); + _writer = null; + } + + if (!_leaveStreamOpen) + { + _stream.Dispose(); + _stream = null; + } + + _isDisposed = true; + } + finally + { + base.Dispose(disposing); + } + } + + #endregion + + public BlockStream(ICompressionAlgorithm compressionAlgorithm, Stream stream, CompressionMode compressionMode, + bool leaveStreamOpen = false, int size = 16777216) + { + _stream = stream ?? throw new ArgumentNullException(nameof(stream)); + _isCompressor = compressionMode == CompressionMode.Compress; + _leaveStreamOpen = leaveStreamOpen; + _block = new Block(compressionAlgorithm, size); + + // sanity check: make sure we can use the stream for reading or writing + if (_isCompressor && !_stream.CanWrite) throw new ArgumentException("A stream lacking write capability was provided to the block GZip compressor."); + if (!_isCompressor && !_stream.CanRead) throw new ArgumentException("A stream lacking read capability was provided to the block GZip decompressor."); + + if (_isCompressor) _writer = new BinaryWriter(_stream, Encoding.UTF8, true); + } + + public void WriteHeader(Action headerWrite) + { + _headerWrite = headerWrite; + _headerWrite(_writer); + } + + public override int Read(byte[] buffer, int offset, int count) + { + if (_foundEof) return 0; + if (_isCompressor) throw new CompressionException("Tried to read data from a compression BlockGZipStream."); + + ValidateParameters(buffer, offset, count); + + var numBytesRead = 0; + int dataOffset = offset; + + while (numBytesRead < count) + { + if (!_block.HasMoreData) + { + int numBytes = _block.Read(_stream); + + if (numBytes == -1) + { + _foundEof = true; + return numBytesRead; + } + } + + int copyLength = _block.CopyFrom(buffer, dataOffset, count - numBytesRead); + + dataOffset += copyLength; + numBytesRead += copyLength; + } + + return numBytesRead; + } + + private void ValidateParameters(byte[] array, int offset, int count) + { + if (array == null) throw new ArgumentNullException(nameof(array)); + if (offset < 0) throw new ArgumentOutOfRangeException(nameof(offset)); + if (count < 0) throw new ArgumentOutOfRangeException(nameof(count)); + if (array.Length - offset < count) throw new ArgumentException("Invalid Argument Offset Count"); + } + + public override void Write(byte[] buffer, int offset, int count) + { + if (!_isCompressor) throw new CompressionException("Tried to write data to a decompression BlockGZipStream."); + + ValidateParameters(buffer, offset, count); + + var numBytesWritten = 0; + int dataOffset = offset; + + while (numBytesWritten < count) + { + int copyLength = _block.CopyTo(buffer, dataOffset, count - numBytesWritten); + dataOffset += copyLength; + numBytesWritten += copyLength; + if (_block.IsFull) _block.Write(_stream); + } + } + + public (long FileOffset, int InternalOffset) GetBlockPosition() => (_stream.Position, _block.Offset); + + public void SetBlockPosition(long fileOffset, int internalOffset = 0) + { + if (fileOffset != _block.FileOffset) + { + _stream.Position = fileOffset; + _block.Read(_stream); + } + + _foundEof = false; + _block.Offset = internalOffset; + } + } +} diff --git a/Compression/Utilities/GZipUtilities.cs b/Compression/Utilities/GZipUtilities.cs index 5aba0dca..ffa23aad 100644 --- a/Compression/Utilities/GZipUtilities.cs +++ b/Compression/Utilities/GZipUtilities.cs @@ -1,103 +1,103 @@ -using System; -using System.IO; -using System.IO.Compression; -using Compression.FileHandling; -using ErrorHandling.Exceptions; -using IO; - -namespace Compression.Utilities -{ - public static class GZipUtilities - { - private const int NumHeaderBytes = 18; - - private enum CompressionAlgorithm - { - Uncompressed, - GZip, - BlockGZip - } - - public static StreamReader GetAppropriateStreamReader(string filePath) => FileUtilities.GetStreamReader(GetAppropriateReadStream(filePath)); - public static BinaryReader GetAppropriateBinaryReader(string filePath) => new BinaryReader(GetAppropriateReadStream(filePath)); - public static StreamWriter GetStreamWriter(string filePath) => new StreamWriter(GetWriteStream(filePath)); - public static BinaryWriter GetBinaryWriter(string filePath) => new BinaryWriter(GetWriteStream(filePath)); - public static Stream GetWriteStream(string filePath) => new BlockGZipStream(FileUtilities.GetCreateStream(filePath), CompressionMode.Compress); - - private static Stream GetAppropriateStream(Stream stream, CompressionAlgorithm compressionAlgorithm) - { - Stream newStream; - - // ReSharper disable once SwitchStatementMissingSomeCases - switch (compressionAlgorithm) - { - case CompressionAlgorithm.BlockGZip: - newStream = new BlockGZipStream(stream, CompressionMode.Decompress); - break; - case CompressionAlgorithm.GZip: - newStream = new GZipStream(stream, CompressionMode.Decompress); - break; - default: - newStream = stream; - break; - } - - return newStream; - } - //used in custom annotation lambda - public static Stream GetAppropriateStream(PersistentStream pStream) - { - var header = GetHeader(pStream); - var compressionAlgorithm = IdentifyCompressionAlgorithm(header); - pStream.Position = 0; - var appropriateStream = GetAppropriateStream(pStream, compressionAlgorithm); - return appropriateStream; - } - - public static Stream GetAppropriateReadStream(string filePath) - { - var header = GetHeader(PersistentStreamUtils.GetReadStream(filePath)); - var compressionAlgorithm = IdentifyCompressionAlgorithm(header); - var fileStream = PersistentStreamUtils.GetReadStream(filePath); - return GetAppropriateStream(fileStream, compressionAlgorithm); - } - - private static byte[] GetHeader(Stream stream) - { - byte[] header = null; - - try - { - using (var reader = new ExtendedBinaryReader(stream)) - { - header = reader.ReadBytes(NumHeaderBytes); - } - } - catch (Exception e) - { - if (e.Message.Contains("because it is being used by another process.")) - { - throw new ProcessLockedFileException(e.Message); - } - } - - return header; - } - - // ReSharper disable once SuggestBaseTypeForParameter - private static CompressionAlgorithm IdentifyCompressionAlgorithm(byte[] header) - { - var result = CompressionAlgorithm.Uncompressed; - if (header == null || header.Length != NumHeaderBytes) return result; - - // check if this is a gzip file - if (header[0] != 31 || header[1] != 139 || header[2] != 8) return result; - result = CompressionAlgorithm.GZip; - - // check if this is a block GZip file - if ((header[3] & 4) != 0 && header[12] == 66 && header[13] == 67) result = CompressionAlgorithm.BlockGZip; - - return result; - } - } +using System; +using System.IO; +using System.IO.Compression; +using Compression.FileHandling; +using ErrorHandling.Exceptions; +using IO; + +namespace Compression.Utilities +{ + public static class GZipUtilities + { + private const int NumHeaderBytes = 18; + + private enum CompressionAlgorithm + { + Uncompressed, + GZip, + BlockGZip + } + + public static StreamReader GetAppropriateStreamReader(string filePath) => FileUtilities.GetStreamReader(GetAppropriateReadStream(filePath)); + public static BinaryReader GetAppropriateBinaryReader(string filePath) => new BinaryReader(GetAppropriateReadStream(filePath)); + public static StreamWriter GetStreamWriter(string filePath) => new StreamWriter(GetWriteStream(filePath)); + public static BinaryWriter GetBinaryWriter(string filePath) => new BinaryWriter(GetWriteStream(filePath)); + public static Stream GetWriteStream(string filePath) => new BlockGZipStream(FileUtilities.GetCreateStream(filePath), CompressionMode.Compress); + + private static Stream GetAppropriateStream(Stream stream, CompressionAlgorithm compressionAlgorithm) + { + Stream newStream; + + // ReSharper disable once SwitchStatementMissingSomeCases + switch (compressionAlgorithm) + { + case CompressionAlgorithm.BlockGZip: + newStream = new BlockGZipStream(stream, CompressionMode.Decompress); + break; + case CompressionAlgorithm.GZip: + newStream = new GZipStream(stream, CompressionMode.Decompress); + break; + default: + newStream = stream; + break; + } + + return newStream; + } + //used in custom annotation lambda + public static Stream GetAppropriateStream(PersistentStream pStream) + { + var header = GetHeader(pStream); + var compressionAlgorithm = IdentifyCompressionAlgorithm(header); + pStream.Position = 0; + var appropriateStream = GetAppropriateStream(pStream, compressionAlgorithm); + return appropriateStream; + } + + public static Stream GetAppropriateReadStream(string filePath) + { + var header = GetHeader(PersistentStreamUtils.GetReadStream(filePath)); + var compressionAlgorithm = IdentifyCompressionAlgorithm(header); + var fileStream = PersistentStreamUtils.GetReadStream(filePath); + return GetAppropriateStream(fileStream, compressionAlgorithm); + } + + private static byte[] GetHeader(Stream stream) + { + byte[] header = null; + + try + { + using (var reader = new ExtendedBinaryReader(stream)) + { + header = reader.ReadBytes(NumHeaderBytes); + } + } + catch (Exception e) + { + if (e.Message.Contains("because it is being used by another process.")) + { + throw new ProcessLockedFileException(e.Message); + } + } + + return header; + } + + // ReSharper disable once SuggestBaseTypeForParameter + private static CompressionAlgorithm IdentifyCompressionAlgorithm(byte[] header) + { + var result = CompressionAlgorithm.Uncompressed; + if (header == null || header.Length != NumHeaderBytes) return result; + + // check if this is a gzip file + if (header[0] != 31 || header[1] != 139 || header[2] != 8) return result; + result = CompressionAlgorithm.GZip; + + // check if this is a block GZip file + if ((header[3] & 4) != 0 && header[12] == 66 && header[13] == 67) result = CompressionAlgorithm.BlockGZip; + + return result; + } + } } \ No newline at end of file diff --git a/ErrorHandling/ErrorHandling.csproj b/ErrorHandling/ErrorHandling.csproj index 849c542a..b73d07ef 100644 --- a/ErrorHandling/ErrorHandling.csproj +++ b/ErrorHandling/ErrorHandling.csproj @@ -1,8 +1,8 @@ - - - netcoreapp2.1 - ..\bin\$(Configuration) - Full - - + + + netcoreapp2.1 + ..\bin\$(Configuration) + Full + + \ No newline at end of file diff --git a/Genome/Genome.csproj b/Genome/Genome.csproj index 9b86c6ee..8456384f 100644 --- a/Genome/Genome.csproj +++ b/Genome/Genome.csproj @@ -3,6 +3,7 @@ netcoreapp2.1 ..\bin\$(Configuration) Full + true diff --git a/Genome/ISequence.cs b/Genome/ISequence.cs index a5b82a22..6e4195dd 100644 --- a/Genome/ISequence.cs +++ b/Genome/ISequence.cs @@ -5,5 +5,6 @@ public interface ISequence int Length { get; } Band[] CytogeneticBands { get; } string Substring(int offset, int length); + string Sequence { get; } } } \ No newline at end of file diff --git a/Genome/SequenceUtilities.cs b/Genome/SequenceUtilities.cs index 47fe5ea1..d7559503 100644 --- a/Genome/SequenceUtilities.cs +++ b/Genome/SequenceUtilities.cs @@ -1,72 +1,68 @@ -using System.Collections.Generic; -using System.Linq; +using System; +using System.Buffers; namespace Genome { - public static class SequenceUtilities - { - #region members + public static class SequenceUtilities + { + private static readonly char[] ReverseComplementLookupTable; - private static readonly char[] ReverseComplementLookupTable; - private static readonly HashSet CanonicalBases; + static SequenceUtilities() + { + // initialize the reverse complement code + const string forwardBases = "ABCDGHKMRTVYabcdghkmrtvy"; + const string reverseBases = "TVGHCDMKYABRTVGHCDMKYABR"; + ReverseComplementLookupTable = new char[256]; - #endregion + for (var i = 0; i < 256; i++) ReverseComplementLookupTable[i] = 'N'; + for (var i = 0; i < forwardBases.Length; i++) + { + ReverseComplementLookupTable[forwardBases[i]] = reverseBases[i]; + } + } - static SequenceUtilities() - { - // initialize the reverse complement code - const string forwardBases = "ABCDGHKMRTVYabcdghkmrtvy"; - const string reverseBases = "TVGHCDMKYABRTVGHCDMKYABR"; - ReverseComplementLookupTable = new char[256]; + public static unsafe string GetReverseComplement(string bases) + { + if (bases == null) return null; + if (bases == string.Empty) return string.Empty; - for (var i = 0; i < 256; i++) ReverseComplementLookupTable[i] = 'N'; - for (var i = 0; i < forwardBases.Length; i++) - { - ReverseComplementLookupTable[forwardBases[i]] = reverseBases[i]; - } + ArrayPool charPool = ArrayPool.Shared; + int numBases = bases.Length; - CanonicalBases = new HashSet { 'A', 'C', 'G', 'T', '-' }; - } + char[] reverseChars = charPool.Rent(numBases); - /// - /// returns the reverse complement of the given bases - /// - public static string GetReverseComplement(string bases) - { - // sanity check - if (bases == null) return null; + fixed (char* pBases = bases) + fixed (char* pReverseChars = reverseChars) + { + char* pIn = pBases; + char* pOut = pReverseChars + numBases - 1; - int numBases = bases.Length; - var reverseChars = new char[numBases]; + for (var i = 0; i < numBases; i++) + { + *pOut = ReverseComplementLookupTable[*pIn]; + pOut--; + pIn++; + } + } - for (var i = 0; i < numBases; ++i) - { - reverseChars[i] = ReverseComplementLookupTable[bases[numBases - i - 1]]; - } + var reverseString = new string(reverseChars, 0, numBases); + charPool.Return(reverseChars); - return new string(reverseChars); - } + return reverseString; + } - /// - /// returns true if we have a base other than the 4 standard bases: A, C, G, and T - /// - public static bool HasNonCanonicalBase(string bases) - { - return !string.IsNullOrEmpty(bases) && bases.Any(c => !CanonicalBases.Contains(c)); - } + public static bool HasNonCanonicalBase(string bases) + { + if (bases == null) return false; + ReadOnlySpan baseSpan = bases.AsSpan(); - /// - /// returns the correct start value when retrieving a substring of a substring - /// where the top level might be reverse complemented - /// - public static string GetSubSubstring(int seqStart, int seqEnd, bool seqOnReverseStrand, int subStart, int subEnd, ISequence cs) - { - int start = seqOnReverseStrand ? seqEnd - subEnd : seqStart + subStart; + foreach (char b in baseSpan) + { + if (b == 'A' || b == 'C' || b == 'G' || b == 'T' || b == '-') continue; + return true; + } - string precedingBases = cs.Substring(start - 1, subEnd - subStart + 1); - if (seqOnReverseStrand) precedingBases = GetReverseComplement(precedingBases); - - return precedingBases; - } - } + return false; + } + } } \ No newline at end of file diff --git a/IO/IO.csproj b/IO/IO.csproj index 62a1c3cf..6cfb2fa5 100644 --- a/IO/IO.csproj +++ b/IO/IO.csproj @@ -12,7 +12,7 @@ - - + + diff --git a/Jasix/Jasix.csproj b/Jasix/Jasix.csproj index a55e5c26..f917a5e9 100644 --- a/Jasix/Jasix.csproj +++ b/Jasix/Jasix.csproj @@ -6,7 +6,7 @@ Full - + diff --git a/Nirvana.sln b/Nirvana.sln index 9a0e0a99..05a5e753 100644 --- a/Nirvana.sln +++ b/Nirvana.sln @@ -1,142 +1,142 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 16 -VisualStudioVersion = 16.0.29201.188 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "VariantAnnotation.Interface", "VariantAnnotation.Interface\VariantAnnotation.Interface.csproj", "{248C8736-3A76-4F45-A131-A776BD3257C9}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Nirvana", "Nirvana\Nirvana.csproj", "{84CD8FB5-1071-47D5-AF1A-E028134D3C70}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Vcf", "Vcf\Vcf.csproj", "{0DF48817-8AED-449A-AA87-CB91040D8439}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Compression", "Compression\Compression.csproj", "{8E2CD866-DFCF-4486-A289-32DEFA050E87}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "VariantAnnotation", "VariantAnnotation\VariantAnnotation.csproj", "{155E28ED-122E-49DD-A8F0-FE3F670073B8}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ErrorHandling", "ErrorHandling\ErrorHandling.csproj", "{A65F4919-CDB8-49C5-ADA4-66055A3F4923}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UnitTests", "UnitTests\UnitTests.csproj", "{0CB1644A-BEA1-4CF6-AD5F-E544512769C2}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CommandLine", "CommandLine\CommandLine.csproj", "{147C336A-6A6E-43F4-BDDC-8C8B72199C5D}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CacheUtils", "CacheUtils\CacheUtils.csproj", "{986CF15B-DFAE-4C39-98D0-75A15271B34A}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SAUtils", "SAUtils\SAUtils.csproj", "{F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Jasix", "Jasix\Jasix.csproj", "{ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Phantom", "Phantom\Phantom.csproj", "{86DB686A-17A8-446B-A2C4-100CD6ADCB39}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "OptimizedCore", "OptimizedCore\OptimizedCore.csproj", "{76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Intervals", "Intervals\Intervals.csproj", "{82CA75B3-37DF-40DA-AA1B-70888CF3ED05}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Variants", "Variants\Variants.csproj", "{0A94104A-71E7-4925-B667-C29C18E3356D}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Genome", "Genome\Genome.csproj", "{3B5C30A5-FBBC-4247-BE62-2B64960213FD}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "IO", "IO\IO.csproj", "{654069F3-3B86-4325-823F-BC78946A26FF}" - ProjectSection(ProjectDependencies) = postProject - {82CA75B3-37DF-40DA-AA1B-70888CF3ED05} = {82CA75B3-37DF-40DA-AA1B-70888CF3ED05} - EndProjectSection -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tabix", "Tabix\Tabix.csproj", "{F337E3F6-72AA-44B4-B11F-D69EE14B6152}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Cloud", "Cloud\Cloud.csproj", "{E93914C8-2599-46BE-BE18-6229E53F581B}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ReferenceUtils", "ReferenceUtils\ReferenceUtils.csproj", "{234765A8-2B5C-4FD5-ACBA-6D48002E9074}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU - Release|Any CPU = Release|Any CPU - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {248C8736-3A76-4F45-A131-A776BD3257C9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {248C8736-3A76-4F45-A131-A776BD3257C9}.Debug|Any CPU.Build.0 = Debug|Any CPU - {248C8736-3A76-4F45-A131-A776BD3257C9}.Release|Any CPU.ActiveCfg = Release|Any CPU - {248C8736-3A76-4F45-A131-A776BD3257C9}.Release|Any CPU.Build.0 = Release|Any CPU - {84CD8FB5-1071-47D5-AF1A-E028134D3C70}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {84CD8FB5-1071-47D5-AF1A-E028134D3C70}.Debug|Any CPU.Build.0 = Debug|Any CPU - {84CD8FB5-1071-47D5-AF1A-E028134D3C70}.Release|Any CPU.ActiveCfg = Release|Any CPU - {84CD8FB5-1071-47D5-AF1A-E028134D3C70}.Release|Any CPU.Build.0 = Release|Any CPU - {0DF48817-8AED-449A-AA87-CB91040D8439}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {0DF48817-8AED-449A-AA87-CB91040D8439}.Debug|Any CPU.Build.0 = Debug|Any CPU - {0DF48817-8AED-449A-AA87-CB91040D8439}.Release|Any CPU.ActiveCfg = Release|Any CPU - {0DF48817-8AED-449A-AA87-CB91040D8439}.Release|Any CPU.Build.0 = Release|Any CPU - {8E2CD866-DFCF-4486-A289-32DEFA050E87}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {8E2CD866-DFCF-4486-A289-32DEFA050E87}.Debug|Any CPU.Build.0 = Debug|Any CPU - {8E2CD866-DFCF-4486-A289-32DEFA050E87}.Release|Any CPU.ActiveCfg = Release|Any CPU - {8E2CD866-DFCF-4486-A289-32DEFA050E87}.Release|Any CPU.Build.0 = Release|Any CPU - {155E28ED-122E-49DD-A8F0-FE3F670073B8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {155E28ED-122E-49DD-A8F0-FE3F670073B8}.Debug|Any CPU.Build.0 = Debug|Any CPU - {155E28ED-122E-49DD-A8F0-FE3F670073B8}.Release|Any CPU.ActiveCfg = Release|Any CPU - {155E28ED-122E-49DD-A8F0-FE3F670073B8}.Release|Any CPU.Build.0 = Release|Any CPU - {A65F4919-CDB8-49C5-ADA4-66055A3F4923}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {A65F4919-CDB8-49C5-ADA4-66055A3F4923}.Debug|Any CPU.Build.0 = Debug|Any CPU - {A65F4919-CDB8-49C5-ADA4-66055A3F4923}.Release|Any CPU.ActiveCfg = Release|Any CPU - {A65F4919-CDB8-49C5-ADA4-66055A3F4923}.Release|Any CPU.Build.0 = Release|Any CPU - {0CB1644A-BEA1-4CF6-AD5F-E544512769C2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {0CB1644A-BEA1-4CF6-AD5F-E544512769C2}.Debug|Any CPU.Build.0 = Debug|Any CPU - {0CB1644A-BEA1-4CF6-AD5F-E544512769C2}.Release|Any CPU.ActiveCfg = Release|Any CPU - {0CB1644A-BEA1-4CF6-AD5F-E544512769C2}.Release|Any CPU.Build.0 = Release|Any CPU - {147C336A-6A6E-43F4-BDDC-8C8B72199C5D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {147C336A-6A6E-43F4-BDDC-8C8B72199C5D}.Debug|Any CPU.Build.0 = Debug|Any CPU - {147C336A-6A6E-43F4-BDDC-8C8B72199C5D}.Release|Any CPU.ActiveCfg = Release|Any CPU - {147C336A-6A6E-43F4-BDDC-8C8B72199C5D}.Release|Any CPU.Build.0 = Release|Any CPU - {986CF15B-DFAE-4C39-98D0-75A15271B34A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {986CF15B-DFAE-4C39-98D0-75A15271B34A}.Debug|Any CPU.Build.0 = Debug|Any CPU - {986CF15B-DFAE-4C39-98D0-75A15271B34A}.Release|Any CPU.ActiveCfg = Release|Any CPU - {986CF15B-DFAE-4C39-98D0-75A15271B34A}.Release|Any CPU.Build.0 = Release|Any CPU - {F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}.Debug|Any CPU.Build.0 = Debug|Any CPU - {F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}.Release|Any CPU.ActiveCfg = Release|Any CPU - {F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}.Release|Any CPU.Build.0 = Release|Any CPU - {ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}.Debug|Any CPU.Build.0 = Debug|Any CPU - {ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}.Release|Any CPU.ActiveCfg = Release|Any CPU - {ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}.Release|Any CPU.Build.0 = Release|Any CPU - {86DB686A-17A8-446B-A2C4-100CD6ADCB39}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {86DB686A-17A8-446B-A2C4-100CD6ADCB39}.Debug|Any CPU.Build.0 = Debug|Any CPU - {86DB686A-17A8-446B-A2C4-100CD6ADCB39}.Release|Any CPU.ActiveCfg = Release|Any CPU - {86DB686A-17A8-446B-A2C4-100CD6ADCB39}.Release|Any CPU.Build.0 = Release|Any CPU - {76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}.Debug|Any CPU.Build.0 = Debug|Any CPU - {76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}.Release|Any CPU.ActiveCfg = Release|Any CPU - {76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}.Release|Any CPU.Build.0 = Release|Any CPU - {82CA75B3-37DF-40DA-AA1B-70888CF3ED05}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {82CA75B3-37DF-40DA-AA1B-70888CF3ED05}.Debug|Any CPU.Build.0 = Debug|Any CPU - {82CA75B3-37DF-40DA-AA1B-70888CF3ED05}.Release|Any CPU.ActiveCfg = Release|Any CPU - {82CA75B3-37DF-40DA-AA1B-70888CF3ED05}.Release|Any CPU.Build.0 = Release|Any CPU - {0A94104A-71E7-4925-B667-C29C18E3356D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {0A94104A-71E7-4925-B667-C29C18E3356D}.Debug|Any CPU.Build.0 = Debug|Any CPU - {0A94104A-71E7-4925-B667-C29C18E3356D}.Release|Any CPU.ActiveCfg = Release|Any CPU - {0A94104A-71E7-4925-B667-C29C18E3356D}.Release|Any CPU.Build.0 = Release|Any CPU - {3B5C30A5-FBBC-4247-BE62-2B64960213FD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {3B5C30A5-FBBC-4247-BE62-2B64960213FD}.Debug|Any CPU.Build.0 = Debug|Any CPU - {3B5C30A5-FBBC-4247-BE62-2B64960213FD}.Release|Any CPU.ActiveCfg = Release|Any CPU - {3B5C30A5-FBBC-4247-BE62-2B64960213FD}.Release|Any CPU.Build.0 = Release|Any CPU - {654069F3-3B86-4325-823F-BC78946A26FF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {654069F3-3B86-4325-823F-BC78946A26FF}.Debug|Any CPU.Build.0 = Debug|Any CPU - {654069F3-3B86-4325-823F-BC78946A26FF}.Release|Any CPU.ActiveCfg = Release|Any CPU - {654069F3-3B86-4325-823F-BC78946A26FF}.Release|Any CPU.Build.0 = Release|Any CPU - {F337E3F6-72AA-44B4-B11F-D69EE14B6152}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {F337E3F6-72AA-44B4-B11F-D69EE14B6152}.Debug|Any CPU.Build.0 = Debug|Any CPU - {F337E3F6-72AA-44B4-B11F-D69EE14B6152}.Release|Any CPU.ActiveCfg = Release|Any CPU - {F337E3F6-72AA-44B4-B11F-D69EE14B6152}.Release|Any CPU.Build.0 = Release|Any CPU - {E93914C8-2599-46BE-BE18-6229E53F581B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {E93914C8-2599-46BE-BE18-6229E53F581B}.Debug|Any CPU.Build.0 = Debug|Any CPU - {E93914C8-2599-46BE-BE18-6229E53F581B}.Release|Any CPU.ActiveCfg = Release|Any CPU - {E93914C8-2599-46BE-BE18-6229E53F581B}.Release|Any CPU.Build.0 = Release|Any CPU - {234765A8-2B5C-4FD5-ACBA-6D48002E9074}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {234765A8-2B5C-4FD5-ACBA-6D48002E9074}.Debug|Any CPU.Build.0 = Debug|Any CPU - {234765A8-2B5C-4FD5-ACBA-6D48002E9074}.Release|Any CPU.ActiveCfg = Release|Any CPU - {234765A8-2B5C-4FD5-ACBA-6D48002E9074}.Release|Any CPU.Build.0 = Release|Any CPU - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {BA40BDB6-7E39-4F75-AC8A-EB65F7FC8209} - EndGlobalSection -EndGlobal + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.29201.188 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "VariantAnnotation.Interface", "VariantAnnotation.Interface\VariantAnnotation.Interface.csproj", "{248C8736-3A76-4F45-A131-A776BD3257C9}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Nirvana", "Nirvana\Nirvana.csproj", "{84CD8FB5-1071-47D5-AF1A-E028134D3C70}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Vcf", "Vcf\Vcf.csproj", "{0DF48817-8AED-449A-AA87-CB91040D8439}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Compression", "Compression\Compression.csproj", "{8E2CD866-DFCF-4486-A289-32DEFA050E87}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "VariantAnnotation", "VariantAnnotation\VariantAnnotation.csproj", "{155E28ED-122E-49DD-A8F0-FE3F670073B8}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ErrorHandling", "ErrorHandling\ErrorHandling.csproj", "{A65F4919-CDB8-49C5-ADA4-66055A3F4923}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UnitTests", "UnitTests\UnitTests.csproj", "{0CB1644A-BEA1-4CF6-AD5F-E544512769C2}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CommandLine", "CommandLine\CommandLine.csproj", "{147C336A-6A6E-43F4-BDDC-8C8B72199C5D}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CacheUtils", "CacheUtils\CacheUtils.csproj", "{986CF15B-DFAE-4C39-98D0-75A15271B34A}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SAUtils", "SAUtils\SAUtils.csproj", "{F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Jasix", "Jasix\Jasix.csproj", "{ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Phantom", "Phantom\Phantom.csproj", "{86DB686A-17A8-446B-A2C4-100CD6ADCB39}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "OptimizedCore", "OptimizedCore\OptimizedCore.csproj", "{76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Intervals", "Intervals\Intervals.csproj", "{82CA75B3-37DF-40DA-AA1B-70888CF3ED05}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Variants", "Variants\Variants.csproj", "{0A94104A-71E7-4925-B667-C29C18E3356D}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Genome", "Genome\Genome.csproj", "{3B5C30A5-FBBC-4247-BE62-2B64960213FD}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "IO", "IO\IO.csproj", "{654069F3-3B86-4325-823F-BC78946A26FF}" + ProjectSection(ProjectDependencies) = postProject + {82CA75B3-37DF-40DA-AA1B-70888CF3ED05} = {82CA75B3-37DF-40DA-AA1B-70888CF3ED05} + EndProjectSection +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tabix", "Tabix\Tabix.csproj", "{F337E3F6-72AA-44B4-B11F-D69EE14B6152}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Cloud", "Cloud\Cloud.csproj", "{E93914C8-2599-46BE-BE18-6229E53F581B}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ReferenceUtils", "ReferenceUtils\ReferenceUtils.csproj", "{234765A8-2B5C-4FD5-ACBA-6D48002E9074}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {248C8736-3A76-4F45-A131-A776BD3257C9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {248C8736-3A76-4F45-A131-A776BD3257C9}.Debug|Any CPU.Build.0 = Debug|Any CPU + {248C8736-3A76-4F45-A131-A776BD3257C9}.Release|Any CPU.ActiveCfg = Release|Any CPU + {248C8736-3A76-4F45-A131-A776BD3257C9}.Release|Any CPU.Build.0 = Release|Any CPU + {84CD8FB5-1071-47D5-AF1A-E028134D3C70}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {84CD8FB5-1071-47D5-AF1A-E028134D3C70}.Debug|Any CPU.Build.0 = Debug|Any CPU + {84CD8FB5-1071-47D5-AF1A-E028134D3C70}.Release|Any CPU.ActiveCfg = Release|Any CPU + {84CD8FB5-1071-47D5-AF1A-E028134D3C70}.Release|Any CPU.Build.0 = Release|Any CPU + {0DF48817-8AED-449A-AA87-CB91040D8439}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0DF48817-8AED-449A-AA87-CB91040D8439}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0DF48817-8AED-449A-AA87-CB91040D8439}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0DF48817-8AED-449A-AA87-CB91040D8439}.Release|Any CPU.Build.0 = Release|Any CPU + {8E2CD866-DFCF-4486-A289-32DEFA050E87}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {8E2CD866-DFCF-4486-A289-32DEFA050E87}.Debug|Any CPU.Build.0 = Debug|Any CPU + {8E2CD866-DFCF-4486-A289-32DEFA050E87}.Release|Any CPU.ActiveCfg = Release|Any CPU + {8E2CD866-DFCF-4486-A289-32DEFA050E87}.Release|Any CPU.Build.0 = Release|Any CPU + {155E28ED-122E-49DD-A8F0-FE3F670073B8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {155E28ED-122E-49DD-A8F0-FE3F670073B8}.Debug|Any CPU.Build.0 = Debug|Any CPU + {155E28ED-122E-49DD-A8F0-FE3F670073B8}.Release|Any CPU.ActiveCfg = Release|Any CPU + {155E28ED-122E-49DD-A8F0-FE3F670073B8}.Release|Any CPU.Build.0 = Release|Any CPU + {A65F4919-CDB8-49C5-ADA4-66055A3F4923}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A65F4919-CDB8-49C5-ADA4-66055A3F4923}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A65F4919-CDB8-49C5-ADA4-66055A3F4923}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A65F4919-CDB8-49C5-ADA4-66055A3F4923}.Release|Any CPU.Build.0 = Release|Any CPU + {0CB1644A-BEA1-4CF6-AD5F-E544512769C2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0CB1644A-BEA1-4CF6-AD5F-E544512769C2}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0CB1644A-BEA1-4CF6-AD5F-E544512769C2}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0CB1644A-BEA1-4CF6-AD5F-E544512769C2}.Release|Any CPU.Build.0 = Release|Any CPU + {147C336A-6A6E-43F4-BDDC-8C8B72199C5D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {147C336A-6A6E-43F4-BDDC-8C8B72199C5D}.Debug|Any CPU.Build.0 = Debug|Any CPU + {147C336A-6A6E-43F4-BDDC-8C8B72199C5D}.Release|Any CPU.ActiveCfg = Release|Any CPU + {147C336A-6A6E-43F4-BDDC-8C8B72199C5D}.Release|Any CPU.Build.0 = Release|Any CPU + {986CF15B-DFAE-4C39-98D0-75A15271B34A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {986CF15B-DFAE-4C39-98D0-75A15271B34A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {986CF15B-DFAE-4C39-98D0-75A15271B34A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {986CF15B-DFAE-4C39-98D0-75A15271B34A}.Release|Any CPU.Build.0 = Release|Any CPU + {F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {F1F05D39-1BE0-4CFD-AD60-F27FB31D925A}.Release|Any CPU.Build.0 = Release|Any CPU + {ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {ECC7869C-1B21-42C1-B8BD-4190F15B3B6F}.Release|Any CPU.Build.0 = Release|Any CPU + {86DB686A-17A8-446B-A2C4-100CD6ADCB39}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {86DB686A-17A8-446B-A2C4-100CD6ADCB39}.Debug|Any CPU.Build.0 = Debug|Any CPU + {86DB686A-17A8-446B-A2C4-100CD6ADCB39}.Release|Any CPU.ActiveCfg = Release|Any CPU + {86DB686A-17A8-446B-A2C4-100CD6ADCB39}.Release|Any CPU.Build.0 = Release|Any CPU + {76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}.Debug|Any CPU.Build.0 = Debug|Any CPU + {76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}.Release|Any CPU.ActiveCfg = Release|Any CPU + {76FEE3B3-FB8E-4421-A63F-CA659FB1ACA0}.Release|Any CPU.Build.0 = Release|Any CPU + {82CA75B3-37DF-40DA-AA1B-70888CF3ED05}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {82CA75B3-37DF-40DA-AA1B-70888CF3ED05}.Debug|Any CPU.Build.0 = Debug|Any CPU + {82CA75B3-37DF-40DA-AA1B-70888CF3ED05}.Release|Any CPU.ActiveCfg = Release|Any CPU + {82CA75B3-37DF-40DA-AA1B-70888CF3ED05}.Release|Any CPU.Build.0 = Release|Any CPU + {0A94104A-71E7-4925-B667-C29C18E3356D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0A94104A-71E7-4925-B667-C29C18E3356D}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0A94104A-71E7-4925-B667-C29C18E3356D}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0A94104A-71E7-4925-B667-C29C18E3356D}.Release|Any CPU.Build.0 = Release|Any CPU + {3B5C30A5-FBBC-4247-BE62-2B64960213FD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3B5C30A5-FBBC-4247-BE62-2B64960213FD}.Debug|Any CPU.Build.0 = Debug|Any CPU + {3B5C30A5-FBBC-4247-BE62-2B64960213FD}.Release|Any CPU.ActiveCfg = Release|Any CPU + {3B5C30A5-FBBC-4247-BE62-2B64960213FD}.Release|Any CPU.Build.0 = Release|Any CPU + {654069F3-3B86-4325-823F-BC78946A26FF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {654069F3-3B86-4325-823F-BC78946A26FF}.Debug|Any CPU.Build.0 = Debug|Any CPU + {654069F3-3B86-4325-823F-BC78946A26FF}.Release|Any CPU.ActiveCfg = Release|Any CPU + {654069F3-3B86-4325-823F-BC78946A26FF}.Release|Any CPU.Build.0 = Release|Any CPU + {F337E3F6-72AA-44B4-B11F-D69EE14B6152}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {F337E3F6-72AA-44B4-B11F-D69EE14B6152}.Debug|Any CPU.Build.0 = Debug|Any CPU + {F337E3F6-72AA-44B4-B11F-D69EE14B6152}.Release|Any CPU.ActiveCfg = Release|Any CPU + {F337E3F6-72AA-44B4-B11F-D69EE14B6152}.Release|Any CPU.Build.0 = Release|Any CPU + {E93914C8-2599-46BE-BE18-6229E53F581B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E93914C8-2599-46BE-BE18-6229E53F581B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E93914C8-2599-46BE-BE18-6229E53F581B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E93914C8-2599-46BE-BE18-6229E53F581B}.Release|Any CPU.Build.0 = Release|Any CPU + {234765A8-2B5C-4FD5-ACBA-6D48002E9074}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {234765A8-2B5C-4FD5-ACBA-6D48002E9074}.Debug|Any CPU.Build.0 = Debug|Any CPU + {234765A8-2B5C-4FD5-ACBA-6D48002E9074}.Release|Any CPU.ActiveCfg = Release|Any CPU + {234765A8-2B5C-4FD5-ACBA-6D48002E9074}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {BA40BDB6-7E39-4F75-AC8A-EB65F7FC8209} + EndGlobalSection +EndGlobal diff --git a/Nirvana.sln.DotSettings b/Nirvana.sln.DotSettings index ee3e6417..ccb12241 100644 --- a/Nirvana.sln.DotSettings +++ b/Nirvana.sln.DotSettings @@ -3,6 +3,24 @@ WARNING WARNING DO_NOT_SHOW + False + True + True + True + True + True + False + False + True + True + True + True + True + True + True + UseVarWhenEvident + UseVarWhenEvident + UseVarWhenEvident CNV GR IO @@ -11,8 +29,15 @@ SV C:\Users\Michael\AppData\Local\JetBrains\Transient\ReSharperPlatformVs15\v08_b4a306d0\SolutionCaches MarkersAndFullLine + True + True + True + True + True <data /> <data><IncludeFilters /><ExcludeFilters /></data> + True + True True True True @@ -47,9 +72,12 @@ True True True + True True True True + True + True True True True diff --git a/Nirvana/AnnotationResources.cs b/Nirvana/AnnotationResources.cs index 2b1f3a9f..adc86ac1 100644 --- a/Nirvana/AnnotationResources.cs +++ b/Nirvana/AnnotationResources.cs @@ -1,129 +1,128 @@ -using System; -using System.Collections.Generic; -using System.Collections.Immutable; -using System.IO; -using System.Linq; -using Cloud; -using CommandLine.Utilities; -using Genome; -using IO; -using VariantAnnotation.Interface; -using VariantAnnotation.Interface.GeneAnnotation; -using VariantAnnotation.Interface.IO; -using VariantAnnotation.Interface.Phantom; -using VariantAnnotation.Interface.Plugins; -using VariantAnnotation.Interface.Positions; -using VariantAnnotation.Interface.Providers; -using VariantAnnotation.IO.Caches; -using VariantAnnotation.Providers; -using VariantAnnotation.SA; -using Vcf; - -namespace Nirvana -{ - public sealed class AnnotationResources : IAnnotationResources - { - private ImmutableDictionary> _variantPositions; - public ISequenceProvider SequenceProvider { get; } - public ITranscriptAnnotationProvider TranscriptAnnotationProvider { get; } - public IAnnotationProvider SaProvider { get; } - public IAnnotationProvider ConservationProvider { get; } - public IRefMinorProvider RefMinorProvider { get; } - public IGeneAnnotationProvider GeneAnnotationProvider { get; } - public IPlugin[] Plugins { get; } - public IAnnotator Annotator { get; } - public IRecomposer Recomposer { get; } - public List DataSourceVersions { get; } - public string VepDataVersion { get; } - public long InputStartVirtualPosition { get; set; } - public string AnnotatorVersionTag { get; set; } = "Nirvana " + CommandLineUtilities.Version; - public bool OutputVcf { get; } - public bool OutputGvcf { get; } - public bool ForceMitochondrialAnnotation { get; } - - public AnnotationResources(string refSequencePath, string inputCachePrefix, List saDirectoryPaths, IS3Client s3Client, List annotationsInS3, - string pluginDirectory, bool outputVcf, bool outputGvcf, bool disableRecomposition, - bool forceMitochondrialAnnotation) - { - SequenceProvider = ProviderUtilities.GetSequenceProvider(refSequencePath); - - //read VCF to get positions for all variants - //_variantPositions = vcfStream == null ? null : PreLoadUtilities.GetPositions(vcfStream, SequenceProvider.RefNameToChromosome); - //preload annotation providers - var dataAndIndexPaths = new List<(string DataFile, string IndexFile)>(); - - foreach (var saDirectoryPath in saDirectoryPaths) - { - dataAndIndexPaths.AddRange(ProviderUtilities.GetSaDataAndIndexPaths(saDirectoryPath)); - } - - TranscriptAnnotationProvider = ProviderUtilities.GetTranscriptAnnotationProvider(inputCachePrefix, SequenceProvider); - SaProvider = ProviderUtilities.GetNsaProvider(dataAndIndexPaths, s3Client, annotationsInS3); - ConservationProvider = ProviderUtilities.GetConservationProvider(dataAndIndexPaths); - RefMinorProvider = ProviderUtilities.GetRefMinorProvider(dataAndIndexPaths); - GeneAnnotationProvider = ProviderUtilities.GetGeneAnnotationProvider(dataAndIndexPaths); - Plugins = PluginUtilities.LoadPlugins(pluginDirectory); - - Annotator = ProviderUtilities.GetAnnotator(TranscriptAnnotationProvider, SequenceProvider, SaProvider, - ConservationProvider, GeneAnnotationProvider, Plugins); - - Recomposer = disableRecomposition - ? new NullRecomposer() - : Phantom.Recomposer.Recomposer.Create(SequenceProvider, TranscriptAnnotationProvider); - DataSourceVersions = GetDataSourceVersions(Plugins, TranscriptAnnotationProvider, SaProvider, - GeneAnnotationProvider, ConservationProvider).ToList(); - VepDataVersion = TranscriptAnnotationProvider.VepVersion + "." + CacheConstants.DataVersion + "." + - SaCommon.DataVersion; - - OutputVcf = outputVcf; - OutputGvcf = outputGvcf; - ForceMitochondrialAnnotation = forceMitochondrialAnnotation; - } - - private static IEnumerable GetDataSourceVersions(IEnumerable plugins, - params IProvider[] providers) - { - var dataSourceVersions = new List(); - if (plugins != null) foreach (var provider in plugins) if (provider.DataSourceVersions != null) dataSourceVersions.AddRange(provider.DataSourceVersions); - foreach (var provider in providers) if (provider != null) dataSourceVersions.AddRange(provider.DataSourceVersions); - return dataSourceVersions.ToHashSet(new DataSourceVersionComparer()); - } - - public void SingleVariantPreLoad(IPosition position) - { - var chromToPositions = new Dictionary>(); - PreLoadUtilities.UpdateChromToPositions(chromToPositions, position.Chromosome, position.Start, position.RefAllele, position.VcfFields[VcfCommon.AltIndex], SequenceProvider.Sequence); - _variantPositions = chromToPositions.ToImmutableDictionary(); - PreLoad(position.Chromosome); - } - - public void GetVariantPositions(Stream vcfStream, AnnotationRange annotationRange) - { - if (vcfStream == null) - { - _variantPositions = null; - return; - } - - vcfStream.Position = Tabix.VirtualPosition.From(InputStartVirtualPosition).BlockOffset; - _variantPositions = PreLoadUtilities.GetPositions(vcfStream, annotationRange, SequenceProvider).ToImmutableDictionary(); - } - - public void PreLoad(IChromosome chromosome) - { - SequenceProvider.LoadChromosome(chromosome); - //NM_033487.1 - // if (chromosome.Index == 0) - // { - // var start = 1570603; - // var end = 1590558; - // - // var sequence = SequenceProvider.Sequence.Substring(start - 1, end - start + 1); - // Console.WriteLine(sequence); - // } - - if (_variantPositions == null || !_variantPositions.TryGetValue(chromosome, out var positions)) return; - SaProvider?.PreLoad(chromosome, positions); - } - } +using System.Collections.Generic; +using System.Collections.Immutable; +using System.IO; +using System.Linq; +using Cloud; +using CommandLine.Utilities; +using Genome; +using IO; +using VariantAnnotation.Interface; +using VariantAnnotation.Interface.GeneAnnotation; +using VariantAnnotation.Interface.IO; +using VariantAnnotation.Interface.Phantom; +using VariantAnnotation.Interface.Plugins; +using VariantAnnotation.Interface.Positions; +using VariantAnnotation.Interface.Providers; +using VariantAnnotation.IO.Caches; +using VariantAnnotation.Providers; +using VariantAnnotation.SA; +using Vcf; + +namespace Nirvana +{ + public sealed class AnnotationResources : IAnnotationResources + { + private ImmutableDictionary> _variantPositions; + public ISequenceProvider SequenceProvider { get; } + public ITranscriptAnnotationProvider TranscriptAnnotationProvider { get; } + public IAnnotationProvider SaProvider { get; } + public IAnnotationProvider ConservationProvider { get; } + public IRefMinorProvider RefMinorProvider { get; } + public IGeneAnnotationProvider GeneAnnotationProvider { get; } + public IPlugin[] Plugins { get; } + public IAnnotator Annotator { get; } + public IRecomposer Recomposer { get; } + public List DataSourceVersions { get; } + public string VepDataVersion { get; } + public long InputStartVirtualPosition { get; set; } + public string AnnotatorVersionTag { get; set; } = "Nirvana " + CommandLineUtilities.Version; + public bool OutputVcf { get; } + public bool OutputGvcf { get; } + public bool ForceMitochondrialAnnotation { get; } + + public AnnotationResources(string refSequencePath, string inputCachePrefix, List saDirectoryPaths, IS3Client s3Client, List annotationsInS3, + string pluginDirectory, bool outputVcf, bool outputGvcf, bool disableRecomposition, + bool forceMitochondrialAnnotation) + { + SequenceProvider = ProviderUtilities.GetSequenceProvider(refSequencePath); + + //read VCF to get positions for all variants + //_variantPositions = vcfStream == null ? null : PreLoadUtilities.GetPositions(vcfStream, SequenceProvider.RefNameToChromosome); + //preload annotation providers + var dataAndIndexPaths = new List<(string DataFile, string IndexFile)>(); + + foreach (var saDirectoryPath in saDirectoryPaths) + { + dataAndIndexPaths.AddRange(ProviderUtilities.GetSaDataAndIndexPaths(saDirectoryPath)); + } + + TranscriptAnnotationProvider = ProviderUtilities.GetTranscriptAnnotationProvider(inputCachePrefix, SequenceProvider); + SaProvider = ProviderUtilities.GetNsaProvider(dataAndIndexPaths, s3Client, annotationsInS3); + ConservationProvider = ProviderUtilities.GetConservationProvider(dataAndIndexPaths); + RefMinorProvider = ProviderUtilities.GetRefMinorProvider(dataAndIndexPaths); + GeneAnnotationProvider = ProviderUtilities.GetGeneAnnotationProvider(dataAndIndexPaths); + Plugins = PluginUtilities.LoadPlugins(pluginDirectory); + + Annotator = ProviderUtilities.GetAnnotator(TranscriptAnnotationProvider, SequenceProvider, SaProvider, + ConservationProvider, GeneAnnotationProvider, Plugins); + + Recomposer = disableRecomposition + ? new NullRecomposer() + : Phantom.Recomposer.Recomposer.Create(SequenceProvider, TranscriptAnnotationProvider); + DataSourceVersions = GetDataSourceVersions(Plugins, TranscriptAnnotationProvider, SaProvider, + GeneAnnotationProvider, ConservationProvider).ToList(); + VepDataVersion = TranscriptAnnotationProvider.VepVersion + "." + CacheConstants.DataVersion + "." + + SaCommon.DataVersion; + + OutputVcf = outputVcf; + OutputGvcf = outputGvcf; + ForceMitochondrialAnnotation = forceMitochondrialAnnotation; + } + + private static IEnumerable GetDataSourceVersions(IEnumerable plugins, + params IProvider[] providers) + { + var dataSourceVersions = new List(); + if (plugins != null) foreach (var provider in plugins) if (provider.DataSourceVersions != null) dataSourceVersions.AddRange(provider.DataSourceVersions); + foreach (var provider in providers) if (provider != null) dataSourceVersions.AddRange(provider.DataSourceVersions); + return dataSourceVersions.ToHashSet(new DataSourceVersionComparer()); + } + + public void SingleVariantPreLoad(IPosition position) + { + var chromToPositions = new Dictionary>(); + PreLoadUtilities.UpdateChromToPositions(chromToPositions, position.Chromosome, position.Start, position.RefAllele, position.VcfFields[VcfCommon.AltIndex], SequenceProvider.Sequence); + _variantPositions = chromToPositions.ToImmutableDictionary(); + PreLoad(position.Chromosome); + } + + public void GetVariantPositions(Stream vcfStream, AnnotationRange annotationRange) + { + if (vcfStream == null) + { + _variantPositions = null; + return; + } + + vcfStream.Position = Tabix.VirtualPosition.From(InputStartVirtualPosition).BlockOffset; + _variantPositions = PreLoadUtilities.GetPositions(vcfStream, annotationRange, SequenceProvider).ToImmutableDictionary(); + } + + public void PreLoad(IChromosome chromosome) + { + SequenceProvider.LoadChromosome(chromosome); + //NM_033487.1 + // if (chromosome.Index == 0) + // { + // var start = 1570603; + // var end = 1590558; + // + // var sequence = SequenceProvider.Sequence.Substring(start - 1, end - start + 1); + // Console.WriteLine(sequence); + // } + + if (_variantPositions == null || !_variantPositions.TryGetValue(chromosome, out var positions)) return; + SaProvider?.PreLoad(chromosome, positions); + } + } } \ No newline at end of file diff --git a/Nirvana/Nirvana.cs b/Nirvana/Nirvana.cs index 7294835d..b7be7db4 100644 --- a/Nirvana/Nirvana.cs +++ b/Nirvana/Nirvana.cs @@ -1,140 +1,140 @@ -using System; -using System.Collections.Generic; -using System.IO.Compression; -using CommandLine.Builders; -using CommandLine.NDesk.Options; -using Compression.FileHandling; -using Compression.Utilities; -using ErrorHandling; -using IO; -using Jasix.DataStructures; -using VariantAnnotation.Interface; -using VariantAnnotation.IO.Caches; -using VariantAnnotation.Providers; -using Vcf; - -namespace Nirvana -{ - public static class Nirvana - { - private static string _inputCachePrefix; - private static readonly List SupplementaryAnnotationDirectories = new List(); - private static string _vcfPath; - private static string _refSequencePath; - private static string _outputFileName; - private static string _pluginDirectory; - - private static bool _vcf; - private static bool _gvcf; - private static bool _forceMitochondrialAnnotation; - private static bool _disableRecomposition; - - private static ExitCodes ProgramExecution() - { - var annotationResources = GetAnnotationResources(); - - string jasixFileName = _outputFileName == "-" ? null : _outputFileName + ".json.gz" + JasixCommons.FileExt; - using (var inputVcfStream = _vcfPath == "-" ? Console.OpenStandardInput() : GZipUtilities.GetAppropriateReadStream(_vcfPath)) - using (var outputJsonStream = _outputFileName == "-" ? Console.OpenStandardOutput() : new BlockGZipStream(FileUtilities.GetCreateStream(_outputFileName + ".json.gz"), CompressionMode.Compress)) - using (var outputJsonIndexStream = jasixFileName == null ? null : FileUtilities.GetCreateStream(jasixFileName)) - using (var outputVcfStream = !_vcf ? null : _outputFileName == "-" ? Console.OpenStandardOutput() : GZipUtilities.GetWriteStream(_outputFileName + ".vcf.gz")) - using (var outputGvcfStream = !_gvcf ? null : _outputFileName == "-" ? Console.OpenStandardOutput() : GZipUtilities.GetWriteStream(_outputFileName + ".genome.vcf.gz")) - return StreamAnnotation.Annotate(null, inputVcfStream, outputJsonStream, outputJsonIndexStream, outputVcfStream, - outputGvcfStream, annotationResources, new NullVcfFilter()); - } - - private static AnnotationResources GetAnnotationResources() - { - var annotationResources = new AnnotationResources(_refSequencePath, _inputCachePrefix, SupplementaryAnnotationDirectories, null, null, _pluginDirectory, _vcf, _gvcf, _disableRecomposition, _forceMitochondrialAnnotation); - if (SupplementaryAnnotationDirectories.Count == 0) return annotationResources; - - using (var preloadVcfStream = GZipUtilities.GetAppropriateStream( - new PersistentStream(PersistentStreamUtils.GetReadStream(_vcfPath), - ConnectUtilities.GetFileConnectFunc(_vcfPath), 0))) - { - annotationResources.GetVariantPositions(preloadVcfStream, null); - } - return annotationResources; - } - - public static int Main(string[] args) - { - var ops = new OptionSet - { - { - "cache|c=", - "input cache {prefix}", - v => _inputCachePrefix = v - }, - { - "in|i=", - "input VCF {path}", - v => _vcfPath = v - }, - { - "plugin|p=", - "plugin {directory}", - v => _pluginDirectory = v - }, - { - "gvcf", - "enables genome vcf output", - v => _gvcf = v != null - }, - { - "vcf", - "enables vcf output", - v => _vcf = v != null - }, - { - "out|o=", - "output {file path}", - v => _outputFileName = v - }, - { - "ref|r=", - "input compressed reference sequence {path}", - v => _refSequencePath = v - }, - { - "sd=", - "input supplementary annotation {directory}", - v => SupplementaryAnnotationDirectories.Add(v) - }, - { - "force-mt", - "forces to annotate mitochondrial variants", - v => _forceMitochondrialAnnotation = v != null - }, - { - "disable-recomposition", - "don't recompose function relevant variants", - v => _disableRecomposition = v != null - } - }; - - var exitCode = new ConsoleAppBuilder(args, ops) - .UseVersionProvider(new VersionProvider()) - .Parse() - .CheckInputFilenameExists(_vcfPath, "vcf", "--in", true, "-") - //.CheckInputFilenameExists(_vcfPath + ".tbi", "tabix index file", "--in") - .CheckInputFilenameExists(_refSequencePath, "reference sequence", "--ref") - .CheckInputFilenameExists(CacheConstants.TranscriptPath(_inputCachePrefix), "transcript cache", "--cache") - .CheckInputFilenameExists(CacheConstants.SiftPath(_inputCachePrefix), "SIFT cache", "--cache") - .CheckInputFilenameExists(CacheConstants.PolyPhenPath(_inputCachePrefix), "PolyPhen cache", "--cache") - .HasRequiredParameter(_outputFileName, "output file stub", "--out") - .Enable(_outputFileName == "-", () => - { - _vcf = false; - _gvcf = false; - }) - .DisableOutput(_outputFileName == "-") - .ShowBanner(Constants.Authors) - .ShowHelpMenu("Annotates a set of variants", "-i -c --sd -r -o ") - .ShowErrors() - .Execute(ProgramExecution); - - return (int)exitCode; - } - } +using System; +using System.Collections.Generic; +using System.IO.Compression; +using CommandLine.Builders; +using CommandLine.NDesk.Options; +using Compression.FileHandling; +using Compression.Utilities; +using ErrorHandling; +using IO; +using Jasix.DataStructures; +using VariantAnnotation.Interface; +using VariantAnnotation.IO.Caches; +using VariantAnnotation.Providers; +using Vcf; + +namespace Nirvana +{ + public static class Nirvana + { + private static string _inputCachePrefix; + private static readonly List SupplementaryAnnotationDirectories = new List(); + private static string _vcfPath; + private static string _refSequencePath; + private static string _outputFileName; + private static string _pluginDirectory; + + private static bool _vcf; + private static bool _gvcf; + private static bool _forceMitochondrialAnnotation; + private static bool _disableRecomposition; + + private static ExitCodes ProgramExecution() + { + var annotationResources = GetAnnotationResources(); + + string jasixFileName = _outputFileName == "-" ? null : _outputFileName + ".json.gz" + JasixCommons.FileExt; + using (var inputVcfStream = _vcfPath == "-" ? Console.OpenStandardInput() : GZipUtilities.GetAppropriateReadStream(_vcfPath)) + using (var outputJsonStream = _outputFileName == "-" ? Console.OpenStandardOutput() : new BlockGZipStream(FileUtilities.GetCreateStream(_outputFileName + ".json.gz"), CompressionMode.Compress)) + using (var outputJsonIndexStream = jasixFileName == null ? null : FileUtilities.GetCreateStream(jasixFileName)) + using (var outputVcfStream = !_vcf ? null : _outputFileName == "-" ? Console.OpenStandardOutput() : GZipUtilities.GetWriteStream(_outputFileName + ".vcf.gz")) + using (var outputGvcfStream = !_gvcf ? null : _outputFileName == "-" ? Console.OpenStandardOutput() : GZipUtilities.GetWriteStream(_outputFileName + ".genome.vcf.gz")) + return StreamAnnotation.Annotate(null, inputVcfStream, outputJsonStream, outputJsonIndexStream, outputVcfStream, + outputGvcfStream, annotationResources, new NullVcfFilter()); + } + + private static AnnotationResources GetAnnotationResources() + { + var annotationResources = new AnnotationResources(_refSequencePath, _inputCachePrefix, SupplementaryAnnotationDirectories, null, null, _pluginDirectory, _vcf, _gvcf, _disableRecomposition, _forceMitochondrialAnnotation); + if (SupplementaryAnnotationDirectories.Count == 0) return annotationResources; + + using (var preloadVcfStream = GZipUtilities.GetAppropriateStream( + new PersistentStream(PersistentStreamUtils.GetReadStream(_vcfPath), + ConnectUtilities.GetFileConnectFunc(_vcfPath), 0))) + { + annotationResources.GetVariantPositions(preloadVcfStream, null); + } + return annotationResources; + } + + public static int Main(string[] args) + { + var ops = new OptionSet + { + { + "cache|c=", + "input cache {prefix}", + v => _inputCachePrefix = v + }, + { + "in|i=", + "input VCF {path}", + v => _vcfPath = v + }, + { + "plugin|p=", + "plugin {directory}", + v => _pluginDirectory = v + }, + { + "gvcf", + "enables genome vcf output", + v => _gvcf = v != null + }, + { + "vcf", + "enables vcf output", + v => _vcf = v != null + }, + { + "out|o=", + "output {file path}", + v => _outputFileName = v + }, + { + "ref|r=", + "input compressed reference sequence {path}", + v => _refSequencePath = v + }, + { + "sd=", + "input supplementary annotation {directory}", + v => SupplementaryAnnotationDirectories.Add(v) + }, + { + "force-mt", + "forces to annotate mitochondrial variants", + v => _forceMitochondrialAnnotation = v != null + }, + { + "disable-recomposition", + "don't recompose function relevant variants", + v => _disableRecomposition = v != null + } + }; + + var exitCode = new ConsoleAppBuilder(args, ops) + .UseVersionProvider(new VersionProvider()) + .Parse() + .CheckInputFilenameExists(_vcfPath, "vcf", "--in", true, "-") + //.CheckInputFilenameExists(_vcfPath + ".tbi", "tabix index file", "--in") + .CheckInputFilenameExists(_refSequencePath, "reference sequence", "--ref") + .CheckInputFilenameExists(CacheConstants.TranscriptPath(_inputCachePrefix), "transcript cache", "--cache") + .CheckInputFilenameExists(CacheConstants.SiftPath(_inputCachePrefix), "SIFT cache", "--cache") + .CheckInputFilenameExists(CacheConstants.PolyPhenPath(_inputCachePrefix), "PolyPhen cache", "--cache") + .HasRequiredParameter(_outputFileName, "output file stub", "--out") + .Enable(_outputFileName == "-", () => + { + _vcf = false; + _gvcf = false; + }) + .DisableOutput(_outputFileName == "-") + .ShowBanner(Constants.Authors) + .ShowHelpMenu("Annotates a set of variants", "-i -c --sd -r -o ") + .ShowErrors() + .Execute(ProgramExecution); + + return (int)exitCode; + } + } } \ No newline at end of file diff --git a/Nirvana/Nirvana.csproj b/Nirvana/Nirvana.csproj index 1a7513c1..92a697af 100644 --- a/Nirvana/Nirvana.csproj +++ b/Nirvana/Nirvana.csproj @@ -1,27 +1,27 @@ - - - Exe - netcoreapp2.1 - ..\bin\$(Configuration) - Full - - - - - - - - - - - - - - - - - - - - + + + Exe + netcoreapp2.1 + ..\bin\$(Configuration) + Full + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Nirvana/PluginUtilities.cs b/Nirvana/PluginUtilities.cs index 240af93b..a35b7893 100644 --- a/Nirvana/PluginUtilities.cs +++ b/Nirvana/PluginUtilities.cs @@ -1,41 +1,41 @@ -using System.Composition.Hosting; -using System.IO; -using System.Linq; -using System.Reflection; -using System.Runtime.Loader; -using VariantAnnotation.Interface.Plugins; - -namespace Nirvana -{ - public static class PluginUtilities - { - private const string DllExtension = ".dll"; - private const string ConfigExtension = ".config"; - - public static IPlugin[] LoadPlugins(string pluginDirectory) - { - string executableLocation = Assembly.GetEntryAssembly().Location; - string path = pluginDirectory ?? Path.Combine(Path.GetDirectoryName(executableLocation), "Plugins"); - - if (!Directory.Exists(path)) return null; - - var pluginFileNames = Directory.GetFiles(path, "*.dll", SearchOption.TopDirectoryOnly); - var assemblies = pluginFileNames.Select(AssemblyLoadContext.Default.LoadFromAssemblyPath).ToArray(); - var configuration = new ContainerConfiguration().WithAssemblies(assemblies); - - IPlugin[] plugins; - using (var container = configuration.CreateContainer()) - { - plugins = container.GetExports().ToArray(); - } - - foreach (var plugin in plugins) - { - string configFilePath = Path.Combine(path, plugin.Name + DllExtension + ConfigExtension); - if (!File.Exists(configFilePath)) throw new FileNotFoundException($"Missing expected config file: {configFilePath}!!"); - } - - return plugins; - } - } +using System.Composition.Hosting; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Runtime.Loader; +using VariantAnnotation.Interface.Plugins; + +namespace Nirvana +{ + public static class PluginUtilities + { + private const string DllExtension = ".dll"; + private const string ConfigExtension = ".config"; + + public static IPlugin[] LoadPlugins(string pluginDirectory) + { + string executableLocation = Assembly.GetEntryAssembly().Location; + string path = pluginDirectory ?? Path.Combine(Path.GetDirectoryName(executableLocation), "Plugins"); + + if (!Directory.Exists(path)) return null; + + var pluginFileNames = Directory.GetFiles(path, "*.dll", SearchOption.TopDirectoryOnly); + var assemblies = pluginFileNames.Select(AssemblyLoadContext.Default.LoadFromAssemblyPath).ToArray(); + var configuration = new ContainerConfiguration().WithAssemblies(assemblies); + + IPlugin[] plugins; + using (var container = configuration.CreateContainer()) + { + plugins = container.GetExports().ToArray(); + } + + foreach (var plugin in plugins) + { + string configFilePath = Path.Combine(path, plugin.Name + DllExtension + ConfigExtension); + if (!File.Exists(configFilePath)) throw new FileNotFoundException($"Missing expected config file: {configFilePath}!!"); + } + + return plugins; + } + } } \ No newline at end of file diff --git a/Nirvana/PreLoadUtilities.cs b/Nirvana/PreLoadUtilities.cs index 0686f6d0..34d33344 100644 --- a/Nirvana/PreLoadUtilities.cs +++ b/Nirvana/PreLoadUtilities.cs @@ -1,94 +1,94 @@ -using System; -using System.Collections.Generic; -using System.IO; -using CommandLine.Utilities; -using Genome; -using OptimizedCore; -using VariantAnnotation.Interface.IO; -using VariantAnnotation.Interface.Providers; -using Variants; - -namespace Nirvana -{ - public static class PreLoadUtilities - { - public static IDictionary> GetPositions(Stream vcfStream, AnnotationRange annotationRange, ISequenceProvider sequenceProvider) - { - var benchmark = new Benchmark(); - Console.Write("Scanning positions required for SA pre-loading...."); - var chromPositions = new Dictionary>(); - - var refNameToChrom = sequenceProvider.RefNameToChromosome; - IChromosome chromToAnnotate = null; - int endPosition = int.MaxValue; - if (annotationRange != null) - { - chromToAnnotate = ReferenceNameUtilities.GetChromosome(refNameToChrom, annotationRange.chromosome); - endPosition = annotationRange.end; - } - - using (var reader = new StreamReader(vcfStream)) - { - string line; - while ((line = reader.ReadLine()) != null) - { - if (!ReachedAnnotationRange(annotationRange, refNameToChrom, line, chromToAnnotate, out var splits, out IChromosome iChrom)) continue; - - int position = int.Parse(splits[VcfCommon.PosIndex]); - if (position > endPosition) break; - - string refAllele = splits[VcfCommon.RefIndex]; - string altAllele = splits[VcfCommon.AltIndex]; - sequenceProvider.LoadChromosome(iChrom); - UpdateChromToPositions(chromPositions, iChrom, position, refAllele, altAllele, sequenceProvider.Sequence); - } - } - - int count = SortPositionsAndGetCount(chromPositions); - - Console.WriteLine($"{count} positions found in {Benchmark.ToHumanReadable(benchmark.GetElapsedTime())}"); - - return chromPositions; - } - - public static void UpdateChromToPositions(Dictionary> chromPositions, IChromosome chromosome, int position, string refAllele, string altAllele, ISequence refSequence) - { - if (!chromPositions.ContainsKey(chromosome)) chromPositions.Add(chromosome, new List(16 * 1024)); - foreach (string allele in altAllele.OptimizedSplit(',')) - { - if (allele.OptimizedStartsWith('<') || allele.Contains('[') || altAllele.Contains(']')) continue; - - (int shiftedPos, string _, string _) = - VariantUtils.TrimAndLeftAlign(position, refAllele, allele, refSequence); - chromPositions[chromosome].Add(shiftedPos); - } - } - - private static int SortPositionsAndGetCount(Dictionary> chromPositions) - { - var count = 0; - foreach (var positions in chromPositions.Values) - { - positions.Sort(); - count += positions.Count; - } - - return count; - } - - private static bool ReachedAnnotationRange(AnnotationRange annotationRange, IDictionary refNameToChrom, string line, - IChromosome chromToAnnotate, out string[] splits, out IChromosome iChrom) - { - splits = null; - iChrom = null; - if (line.StartsWith('#')) return false; - splits = line.Split('\t', 6); - - string chrom = splits[VcfCommon.ChromIndex]; - - if (!refNameToChrom.TryGetValue(chrom, out iChrom)) return false; - if (annotationRange != null && chromToAnnotate != iChrom) return false; - return true; - } - } +using System; +using System.Collections.Generic; +using System.IO; +using CommandLine.Utilities; +using Genome; +using OptimizedCore; +using VariantAnnotation.Interface.IO; +using VariantAnnotation.Interface.Providers; +using Variants; + +namespace Nirvana +{ + public static class PreLoadUtilities + { + public static IDictionary> GetPositions(Stream vcfStream, AnnotationRange annotationRange, ISequenceProvider sequenceProvider) + { + var benchmark = new Benchmark(); + Console.Write("Scanning positions required for SA pre-loading...."); + var chromPositions = new Dictionary>(); + + var refNameToChrom = sequenceProvider.RefNameToChromosome; + IChromosome chromToAnnotate = null; + int endPosition = int.MaxValue; + if (annotationRange != null) + { + chromToAnnotate = ReferenceNameUtilities.GetChromosome(refNameToChrom, annotationRange.chromosome); + endPosition = annotationRange.end; + } + + using (var reader = new StreamReader(vcfStream)) + { + string line; + while ((line = reader.ReadLine()) != null) + { + if (!ReachedAnnotationRange(annotationRange, refNameToChrom, line, chromToAnnotate, out var splits, out IChromosome iChrom)) continue; + + int position = int.Parse(splits[VcfCommon.PosIndex]); + if (position > endPosition) break; + + string refAllele = splits[VcfCommon.RefIndex]; + string altAllele = splits[VcfCommon.AltIndex]; + sequenceProvider.LoadChromosome(iChrom); + UpdateChromToPositions(chromPositions, iChrom, position, refAllele, altAllele, sequenceProvider.Sequence); + } + } + + int count = SortPositionsAndGetCount(chromPositions); + + Console.WriteLine($"{count} positions found in {Benchmark.ToHumanReadable(benchmark.GetElapsedTime())}"); + + return chromPositions; + } + + public static void UpdateChromToPositions(Dictionary> chromPositions, IChromosome chromosome, int position, string refAllele, string altAllele, ISequence refSequence) + { + if (!chromPositions.ContainsKey(chromosome)) chromPositions.Add(chromosome, new List(16 * 1024)); + foreach (string allele in altAllele.OptimizedSplit(',')) + { + if (allele.OptimizedStartsWith('<') || allele.Contains('[') || altAllele.Contains(']')) continue; + + (int shiftedPos, string _, string _) = + VariantUtils.TrimAndLeftAlign(position, refAllele, allele, refSequence); + chromPositions[chromosome].Add(shiftedPos); + } + } + + private static int SortPositionsAndGetCount(Dictionary> chromPositions) + { + var count = 0; + foreach (var positions in chromPositions.Values) + { + positions.Sort(); + count += positions.Count; + } + + return count; + } + + private static bool ReachedAnnotationRange(AnnotationRange annotationRange, IDictionary refNameToChrom, string line, + IChromosome chromToAnnotate, out string[] splits, out IChromosome iChrom) + { + splits = null; + iChrom = null; + if (line.StartsWith('#')) return false; + splits = line.Split('\t', 6); + + string chrom = splits[VcfCommon.ChromIndex]; + + if (!refNameToChrom.TryGetValue(chrom, out iChrom)) return false; + if (annotationRange != null && chromToAnnotate != iChrom) return false; + return true; + } + } } \ No newline at end of file diff --git a/Nirvana/ProviderUtilities.cs b/Nirvana/ProviderUtilities.cs index 3f61bd54..376bbdcb 100644 --- a/Nirvana/ProviderUtilities.cs +++ b/Nirvana/ProviderUtilities.cs @@ -1,176 +1,174 @@ -using System; -using System.Collections.Generic; -using System.IO; -using Cloud; -using IO; -using VariantAnnotation; -using CommandLine.Utilities; -using VariantAnnotation.GeneAnnotation; -using VariantAnnotation.Interface; -using VariantAnnotation.Interface.GeneAnnotation; -using VariantAnnotation.Interface.Plugins; -using VariantAnnotation.Interface.Providers; -using VariantAnnotation.Interface.SA; -using VariantAnnotation.NSA; -using VariantAnnotation.Providers; -using VariantAnnotation.SA; - -namespace Nirvana -{ - public static class ProviderUtilities - { - public static IAnnotator GetAnnotator(IAnnotationProvider taProvider, ISequenceProvider sequenceProvider, - IAnnotationProvider saProviders, IAnnotationProvider conservationProvider, - IGeneAnnotationProvider geneAnnotationProviders, IEnumerable plugins = null) - { - return new Annotator(taProvider, sequenceProvider, saProviders, conservationProvider, - geneAnnotationProviders, plugins); - } - - public static ISequenceProvider GetSequenceProvider(string compressedReferencePath) - { - return new ReferenceSequenceProvider(PersistentStreamUtils.GetReadStream(compressedReferencePath)); - } - - public static IAnnotationProvider GetConservationProvider(IEnumerable<(string dataFile, string indexFile)> dataAndIndexFiles) - { - if (dataAndIndexFiles == null) return null; - - foreach ((string dataFile, string indexFile) in dataAndIndexFiles) - { - if (dataFile.EndsWith(SaCommon.PhylopFileSuffix)) - return new ConservationScoreProvider(PersistentStreamUtils.GetReadStream(dataFile), PersistentStreamUtils.GetReadStream(indexFile)); - } - - return null; - } - - public static IRefMinorProvider GetRefMinorProvider(IEnumerable<(string dataFile, string indexFile)> dataAndIndexFiles) - { - if (dataAndIndexFiles == null) return null; - - foreach ((string dataFile, string indexFile) in dataAndIndexFiles) - { - if (dataFile.EndsWith(SaCommon.RefMinorFileSuffix)) - return new RefMinorProvider(PersistentStreamUtils.GetReadStream(dataFile), PersistentStreamUtils.GetReadStream(indexFile)); - } - - return null; - } - - public static IGeneAnnotationProvider GetGeneAnnotationProvider(IEnumerable<(string dataFile, string indexFile)> dataAndIndexFiles) - { - if (dataAndIndexFiles == null) return null; - var ngaFiles = new List(); - foreach ((string dataFile, string _) in dataAndIndexFiles) - { - if (dataFile.EndsWith(SaCommon.NgaFileSuffix)) - ngaFiles.Add(dataFile); - } - return ngaFiles.Count > 0? new GeneAnnotationProvider(PersistentStreamUtils.GetStreams(ngaFiles)): null; - } - - public static IAnnotationProvider GetNsaProvider(IEnumerable<(string dataFile, string indexFile)> dataAndIndexFiles, IS3Client s3Client, List annotationsInS3) - { - if (dataAndIndexFiles == null && annotationsInS3 == null) return null; - - var nsaReaders = new List(); - var nsiReaders = new List(); - - if (dataAndIndexFiles != null) GetSaReaders(dataAndIndexFiles, nsaReaders, nsiReaders); - - if (annotationsInS3 != null) GetSaReadersFromS3(s3Client, annotationsInS3, nsaReaders, nsiReaders); - - if (nsaReaders.Count <= 0 && nsiReaders.Count <= 0) return null; - - nsaReaders.Sort((a, b) => string.Compare(a.JsonKey, b.JsonKey, StringComparison.Ordinal)); - nsiReaders.Sort((a, b) => string.Compare(a.JsonKey, b.JsonKey, StringComparison.Ordinal)); - return new NsaProvider(nsaReaders.ToArray(), nsiReaders.ToArray()); - } - - private static void GetSaReadersFromS3(IS3Client s3Client, List annotationsInS3, List nsaReaders, List nsiReaders) - { - foreach (var annotation in annotationsInS3) - { - if (annotation.path.EndsWith(SaCommon.SaFileSuffix)) - nsaReaders.Add(GetNsaReader( - PersistentStreamUtils.GetS3ReadStream(s3Client, annotation.bucketName, annotation.path, 0), - PersistentStreamUtils.GetS3ReadStream(s3Client, annotation.bucketName, - annotation.path + SaCommon.IndexSufix, 0))); - else - { - nsiReaders.Add(GetNsiReader( - PersistentStreamUtils.GetS3ReadStream(s3Client, annotation.bucketName, annotation.path, 0))); - } - } - } - - private static void GetSaReaders(IEnumerable<(string dataFile, string indexFile)> dataAndIndexFiles, List nsaReaders, List nsiReaders) - { - foreach ((string dataFile, string indexFile) in dataAndIndexFiles) - { - if (dataFile.EndsWith(SaCommon.SaFileSuffix)) - nsaReaders.Add( - GetNsaReader(PersistentStreamUtils.GetReadStream(dataFile), - PersistentStreamUtils.GetReadStream(indexFile)) - ); - if (dataFile.EndsWith(SaCommon.SiFileSuffix)) - nsiReaders.Add(GetNsiReader(PersistentStreamUtils.GetReadStream(dataFile))); - } - } - - public static IList<(string dataFile, string indexFile)> GetSaDataAndIndexPaths(string saDirectoryPath) => ConnectUtilities.IsHttpLocation(saDirectoryPath) ? GetSaPathsFromManifest(saDirectoryPath) : GetLocalSaPaths(saDirectoryPath); - - private static IList<(string dataFile, string indexFile)> GetSaPathsFromManifest(string saDirectoryPath) - { - var baseUrl = NirvanaHelper.S3Url; - - var paths = new List<(string, string)>(); - using (var reader = new StreamReader(PersistentStreamUtils.GetReadStream(saDirectoryPath))) - { - string line; - while ((line = reader.ReadLine()) != null) - { - if (line.EndsWith(SaCommon.SiFileSuffix) || line.EndsWith(SaCommon.NgaFileSuffix)) - paths.Add((baseUrl + line, null)); - else paths.Add((baseUrl + line, baseUrl + line + SaCommon.IndexSufix)); - } - } - - return paths.Count > 0 ? paths : null; - } - - private static List<(string, string)> GetLocalSaPaths(string saDirectoryPath) - { - var paths = new List<(string, string)>(); - foreach (var filePath in Directory.GetFiles(saDirectoryPath)) - { - if (filePath.EndsWith(SaCommon.SaFileSuffix) || filePath.EndsWith(SaCommon.PhylopFileSuffix) || - filePath.EndsWith(SaCommon.RefMinorFileSuffix)) - paths.Add((filePath, filePath + SaCommon.IndexSufix)); - - if (filePath.EndsWith(SaCommon.SiFileSuffix) || filePath.EndsWith(SaCommon.NgaFileSuffix)) - paths.Add((filePath, null)); - //skip files with all other extensions - } - - return paths.Count>0? paths:null; - } - - public static ITranscriptAnnotationProvider GetTranscriptAnnotationProvider(string path, - ISequenceProvider sequenceProvider) - { - var benchmark = new Benchmark(); - var provider = new TranscriptAnnotationProvider(path, sequenceProvider); - var wallTimeSpan = benchmark.GetElapsedTime(); - Console.WriteLine("Cache Time: {0} ms", wallTimeSpan.TotalMilliseconds); - return provider; - } - - - private static NsaReader GetNsaReader(Stream dataStream, Stream indexStream) => - new NsaReader(new ExtendedBinaryReader(dataStream), indexStream); - - private static NsiReader GetNsiReader(Stream stream) => new NsiReader(stream); - } +using System; +using System.Collections.Generic; +using System.IO; +using Cloud; +using IO; +using VariantAnnotation; +using CommandLine.Utilities; +using VariantAnnotation.GeneAnnotation; +using VariantAnnotation.Interface; +using VariantAnnotation.Interface.GeneAnnotation; +using VariantAnnotation.Interface.Plugins; +using VariantAnnotation.Interface.Providers; +using VariantAnnotation.Interface.SA; +using VariantAnnotation.NSA; +using VariantAnnotation.Providers; +using VariantAnnotation.SA; + +namespace Nirvana +{ + public static class ProviderUtilities + { + public static IAnnotator GetAnnotator(IAnnotationProvider taProvider, ISequenceProvider sequenceProvider, + IAnnotationProvider saProviders, IAnnotationProvider conservationProvider, + IGeneAnnotationProvider geneAnnotationProviders, IEnumerable plugins = null) + { + return new Annotator(taProvider, sequenceProvider, saProviders, conservationProvider, + geneAnnotationProviders, plugins); + } + + public static ISequenceProvider GetSequenceProvider(string compressedReferencePath) + { + return new ReferenceSequenceProvider(PersistentStreamUtils.GetReadStream(compressedReferencePath)); + } + + public static IAnnotationProvider GetConservationProvider(IEnumerable<(string dataFile, string indexFile)> dataAndIndexFiles) + { + if (dataAndIndexFiles == null) return null; + + foreach ((string dataFile, string indexFile) in dataAndIndexFiles) + { + if (dataFile.EndsWith(SaCommon.PhylopFileSuffix)) + return new ConservationScoreProvider(PersistentStreamUtils.GetReadStream(dataFile), PersistentStreamUtils.GetReadStream(indexFile)); + } + + return null; + } + + public static IRefMinorProvider GetRefMinorProvider(IEnumerable<(string dataFile, string indexFile)> dataAndIndexFiles) + { + if (dataAndIndexFiles == null) return null; + + foreach ((string dataFile, string indexFile) in dataAndIndexFiles) + { + if (dataFile.EndsWith(SaCommon.RefMinorFileSuffix)) + return new RefMinorProvider(PersistentStreamUtils.GetReadStream(dataFile), PersistentStreamUtils.GetReadStream(indexFile)); + } + + return null; + } + + public static IGeneAnnotationProvider GetGeneAnnotationProvider(IEnumerable<(string dataFile, string indexFile)> dataAndIndexFiles) + { + if (dataAndIndexFiles == null) return null; + var ngaFiles = new List(); + foreach ((string dataFile, string _) in dataAndIndexFiles) + { + if (dataFile.EndsWith(SaCommon.NgaFileSuffix)) + ngaFiles.Add(dataFile); + } + return ngaFiles.Count > 0? new GeneAnnotationProvider(PersistentStreamUtils.GetStreams(ngaFiles)): null; + } + + public static IAnnotationProvider GetNsaProvider(IEnumerable<(string dataFile, string indexFile)> dataAndIndexFiles, IS3Client s3Client, List annotationsInS3) + { + if (dataAndIndexFiles == null && annotationsInS3 == null) return null; + + var nsaReaders = new List(); + var nsiReaders = new List(); + + if (dataAndIndexFiles != null) GetSaReaders(dataAndIndexFiles, nsaReaders, nsiReaders); + + if (annotationsInS3 != null) GetSaReadersFromS3(s3Client, annotationsInS3, nsaReaders, nsiReaders); + + if (nsaReaders.Count <= 0 && nsiReaders.Count <= 0) return null; + + nsaReaders.Sort((a, b) => string.Compare(a.JsonKey, b.JsonKey, StringComparison.Ordinal)); + nsiReaders.Sort((a, b) => string.Compare(a.JsonKey, b.JsonKey, StringComparison.Ordinal)); + return new NsaProvider(nsaReaders.ToArray(), nsiReaders.ToArray()); + } + + private static void GetSaReadersFromS3(IS3Client s3Client, List annotationsInS3, List nsaReaders, List nsiReaders) + { + foreach (var annotation in annotationsInS3) + { + if (annotation.path.EndsWith(SaCommon.SaFileSuffix)) + nsaReaders.Add(GetNsaReader( + PersistentStreamUtils.GetS3ReadStream(s3Client, annotation.bucketName, annotation.path, 0), + PersistentStreamUtils.GetS3ReadStream(s3Client, annotation.bucketName, + annotation.path + SaCommon.IndexSufix, 0))); + else + { + nsiReaders.Add(GetNsiReader( + PersistentStreamUtils.GetS3ReadStream(s3Client, annotation.bucketName, annotation.path, 0))); + } + } + } + + private static void GetSaReaders(IEnumerable<(string dataFile, string indexFile)> dataAndIndexFiles, List nsaReaders, List nsiReaders) + { + foreach ((string dataFile, string indexFile) in dataAndIndexFiles) + { + if (dataFile.EndsWith(SaCommon.SaFileSuffix)) + nsaReaders.Add( + GetNsaReader(PersistentStreamUtils.GetReadStream(dataFile), + PersistentStreamUtils.GetReadStream(indexFile)) + ); + if (dataFile.EndsWith(SaCommon.SiFileSuffix)) + nsiReaders.Add(GetNsiReader(PersistentStreamUtils.GetReadStream(dataFile))); + } + } + + public static IList<(string dataFile, string indexFile)> GetSaDataAndIndexPaths(string saDirectoryPath) => ConnectUtilities.IsHttpLocation(saDirectoryPath) ? GetSaPathsFromManifest(saDirectoryPath) : GetLocalSaPaths(saDirectoryPath); + + private static IList<(string dataFile, string indexFile)> GetSaPathsFromManifest(string saDirectoryPath) + { + var baseUrl = NirvanaHelper.S3Url; + + var paths = new List<(string, string)>(); + using (var reader = new StreamReader(PersistentStreamUtils.GetReadStream(saDirectoryPath))) + { + string line; + while ((line = reader.ReadLine()) != null) + { + if (line.EndsWith(SaCommon.SiFileSuffix) || line.EndsWith(SaCommon.NgaFileSuffix)) + paths.Add((baseUrl + line, null)); + else paths.Add((baseUrl + line, baseUrl + line + SaCommon.IndexSufix)); + } + } + + return paths.Count > 0 ? paths : null; + } + + private static List<(string, string)> GetLocalSaPaths(string saDirectoryPath) + { + var paths = new List<(string, string)>(); + foreach (var filePath in Directory.GetFiles(saDirectoryPath)) + { + if (filePath.EndsWith(SaCommon.SaFileSuffix) || filePath.EndsWith(SaCommon.PhylopFileSuffix) || + filePath.EndsWith(SaCommon.RefMinorFileSuffix)) + paths.Add((filePath, filePath + SaCommon.IndexSufix)); + + if (filePath.EndsWith(SaCommon.SiFileSuffix) || filePath.EndsWith(SaCommon.NgaFileSuffix)) + paths.Add((filePath, null)); + //skip files with all other extensions + } + + return paths.Count>0? paths:null; + } + + public static ITranscriptAnnotationProvider GetTranscriptAnnotationProvider(string path, + ISequenceProvider sequenceProvider) + { + var benchmark = new Benchmark(); + var provider = new TranscriptAnnotationProvider(path, sequenceProvider); + Console.WriteLine("Cache Time: {0}\n", Benchmark.ToHumanReadable(benchmark.GetElapsedTime())); + return provider; + } + + private static NsaReader GetNsaReader(Stream dataStream, Stream indexStream) => + new NsaReader(new ExtendedBinaryReader(dataStream), indexStream); + + private static NsiReader GetNsiReader(Stream stream) => new NsiReader(stream); + } } \ No newline at end of file diff --git a/Nirvana/StreamAnnotation.cs b/Nirvana/StreamAnnotation.cs index 199df69b..124215e2 100644 --- a/Nirvana/StreamAnnotation.cs +++ b/Nirvana/StreamAnnotation.cs @@ -1,141 +1,141 @@ -using System; -using System.IO; -using Compression.FileHandling; -using ErrorHandling; -using ErrorHandling.Exceptions; -using Genome; -using IO; -using VariantAnnotation; -using VariantAnnotation.Interface; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.IO; -using VariantAnnotation.Interface.Positions; -using VariantAnnotation.IO; -using VariantAnnotation.IO.VcfWriter; -using VariantAnnotation.Logger; -using VariantAnnotation.Utilities; -using Vcf; -using static Vcf.VcfReader; - - -namespace Nirvana -{ - public static class StreamAnnotation - { - public static ExitCodes Annotate(Stream headerStream, Stream inputVcfStream, Stream outputJsonStream, Stream outputJsonIndexStream, - Stream outputVcfStream, Stream outputGvcfStream, AnnotationResources annotationResources, IVcfFilter vcfFilter) - { - - var logger = outputJsonStream is BlockGZipStream ? new ConsoleLogger() : (ILogger)new NullLogger(); - var metrics = new PerformanceMetrics(logger); - var vcfConversion = new VcfConversion(); - - using (var vcfReader = GetVcfReader(headerStream, inputVcfStream, annotationResources, vcfFilter)) - using (var jsonWriter = new JsonWriter(outputJsonStream, outputJsonIndexStream, annotationResources, Date.CurrentTimeStamp, vcfReader.GetSampleNames(), false)) - using (var vcfWriter = annotationResources.OutputVcf - ? new LiteVcfWriter(new StreamWriter(outputVcfStream), vcfReader.GetHeaderLines(), annotationResources) - : null) - using (var gvcfWriter = annotationResources.OutputGvcf - ? new LiteVcfWriter(new StreamWriter(outputGvcfStream), vcfReader.GetHeaderLines(), annotationResources) - : null) - { - try - { - CheckGenomeAssembly(annotationResources, vcfReader); - SetMitochondrialAnnotationBehavior(annotationResources, vcfReader); - - int previousChromIndex = -1; - IPosition position; - - while ((position = vcfReader.GetNextPosition()) != null) - { - if (previousChromIndex != position.Chromosome.Index) - annotationResources.PreLoad(position.Chromosome); - previousChromIndex = UpdatePerformanceMetrics(previousChromIndex, position.Chromosome, metrics); - - var annotatedPosition = position.Variants != null ? annotationResources.Annotator.Annotate(position) : null; - string json = annotatedPosition?.GetJsonString(); - - GenerateOutput(vcfConversion, jsonWriter, vcfWriter, gvcfWriter, position, annotatedPosition, json); - - metrics.Increment(); - } - - jsonWriter.WriteAnnotatedGenes(annotationResources.Annotator.GetGeneAnnotations()); - - } - catch (Exception e) - { - e.Data[ExitCodeUtilities.VcfLine] = vcfReader.VcfLine; - throw; - } - } - - metrics.ShowAnnotationTime(); - - return ExitCodes.Success; - } - - private static void CheckGenomeAssembly(AnnotationResources annotationResources, VcfReader vcfReader) - { - if (vcfReader.InferredGenomeAssembly != GenomeAssembly.Unknown && vcfReader.InferredGenomeAssembly != annotationResources.Annotator.Assembly) - throw new UserErrorException($"A mismatch between genome assemblies was found. The input VCF uses {vcfReader.InferredGenomeAssembly} whereas annotation was configured for {annotationResources.Annotator.Assembly}."); - } - - private static void SetMitochondrialAnnotationBehavior(AnnotationResources annotationResources, VcfReader vcfReader) - { - if (vcfReader.IsRcrsMitochondrion && annotationResources.Annotator.Assembly == GenomeAssembly.GRCh37 - || annotationResources.Annotator.Assembly == GenomeAssembly.GRCh38 - || annotationResources.ForceMitochondrialAnnotation) - annotationResources.Annotator.EnableMitochondrialAnnotation(); - } - - private static void GenerateOutput(VcfConversion vcfConversion, JsonWriter jsonWriter, LiteVcfWriter vcfWriter, LiteVcfWriter gvcfWriter, IPosition position, IAnnotatedPosition annotatedPosition, string json) - { - if (json != null) WriteAnnotatedPosition(annotatedPosition, jsonWriter, vcfWriter, gvcfWriter, json, vcfConversion); - else gvcfWriter?.Write(string.Join("\t", position.VcfFields)); - } - - private static VcfReader GetVcfReader(Stream headerStream, Stream vcfStream, IAnnotationResources annotationResources, - IVcfFilter vcfFilter) - { - var vcfReader = FileUtilities.GetStreamReader(vcfStream); - - StreamReader headerReader; - if (headerStream == null) - headerReader = vcfReader; - else - { - headerReader = FileUtilities.GetStreamReader(headerStream); - vcfStream.Position = Tabix.VirtualPosition.From(annotationResources.InputStartVirtualPosition).BlockOffset; - } - - return Create(headerReader, vcfReader, annotationResources.SequenceProvider, annotationResources.RefMinorProvider, annotationResources.Recomposer, vcfFilter); - } - - public static void WriteAnnotatedPosition(IAnnotatedPosition annotatedPosition, IJsonWriter jsonWriter, LiteVcfWriter vcfWriter, LiteVcfWriter gvcfWriter, string jsonOutput, VcfConversion vcfConversion) - { - jsonWriter.WriteJsonEntry(annotatedPosition.Position, jsonOutput); - - if (vcfWriter == null && gvcfWriter == null || annotatedPosition.Position.IsRecomposed) return; - - string vcfLine = vcfConversion.Convert(annotatedPosition); - vcfWriter?.Write(vcfLine); - gvcfWriter?.Write(vcfLine); - } - - private static int UpdatePerformanceMetrics(int previousChromIndex, IChromosome chromosome, - PerformanceMetrics metrics) - { - // ReSharper disable once InvertIf - if (chromosome.Index != previousChromIndex) - { - metrics.StartAnnotatingReference(chromosome); - previousChromIndex = chromosome.Index; - } - - return previousChromIndex; - } - - } +using System; +using System.IO; +using Compression.FileHandling; +using ErrorHandling; +using ErrorHandling.Exceptions; +using Genome; +using IO; +using VariantAnnotation; +using VariantAnnotation.Interface; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.IO; +using VariantAnnotation.Interface.Positions; +using VariantAnnotation.IO; +using VariantAnnotation.IO.VcfWriter; +using VariantAnnotation.Logger; +using VariantAnnotation.Utilities; +using Vcf; +using static Vcf.VcfReader; + + +namespace Nirvana +{ + public static class StreamAnnotation + { + public static ExitCodes Annotate(Stream headerStream, Stream inputVcfStream, Stream outputJsonStream, Stream outputJsonIndexStream, + Stream outputVcfStream, Stream outputGvcfStream, AnnotationResources annotationResources, IVcfFilter vcfFilter) + { + + var logger = outputJsonStream is BlockGZipStream ? new ConsoleLogger() : (ILogger)new NullLogger(); + var metrics = new PerformanceMetrics(logger); + var vcfConversion = new VcfConversion(); + + using (var vcfReader = GetVcfReader(headerStream, inputVcfStream, annotationResources, vcfFilter)) + using (var jsonWriter = new JsonWriter(outputJsonStream, outputJsonIndexStream, annotationResources, Date.CurrentTimeStamp, vcfReader.GetSampleNames(), false)) + using (var vcfWriter = annotationResources.OutputVcf + ? new LiteVcfWriter(new StreamWriter(outputVcfStream), vcfReader.GetHeaderLines(), annotationResources) + : null) + using (var gvcfWriter = annotationResources.OutputGvcf + ? new LiteVcfWriter(new StreamWriter(outputGvcfStream), vcfReader.GetHeaderLines(), annotationResources) + : null) + { + try + { + CheckGenomeAssembly(annotationResources, vcfReader); + SetMitochondrialAnnotationBehavior(annotationResources, vcfReader); + + int previousChromIndex = -1; + IPosition position; + + while ((position = vcfReader.GetNextPosition()) != null) + { + if (previousChromIndex != position.Chromosome.Index) + annotationResources.PreLoad(position.Chromosome); + previousChromIndex = UpdatePerformanceMetrics(previousChromIndex, position.Chromosome, metrics); + + var annotatedPosition = position.Variants != null ? annotationResources.Annotator.Annotate(position) : null; + string json = annotatedPosition?.GetJsonString(); + + GenerateOutput(vcfConversion, jsonWriter, vcfWriter, gvcfWriter, position, annotatedPosition, json); + + metrics.Increment(); + } + + jsonWriter.WriteAnnotatedGenes(annotationResources.Annotator.GetGeneAnnotations()); + + } + catch (Exception e) + { + e.Data[ExitCodeUtilities.VcfLine] = vcfReader.VcfLine; + throw; + } + } + + metrics.ShowAnnotationTime(); + + return ExitCodes.Success; + } + + private static void CheckGenomeAssembly(AnnotationResources annotationResources, VcfReader vcfReader) + { + if (vcfReader.InferredGenomeAssembly != GenomeAssembly.Unknown && vcfReader.InferredGenomeAssembly != annotationResources.Annotator.Assembly) + throw new UserErrorException($"A mismatch between genome assemblies was found. The input VCF uses {vcfReader.InferredGenomeAssembly} whereas annotation was configured for {annotationResources.Annotator.Assembly}."); + } + + private static void SetMitochondrialAnnotationBehavior(AnnotationResources annotationResources, VcfReader vcfReader) + { + if (vcfReader.IsRcrsMitochondrion && annotationResources.Annotator.Assembly == GenomeAssembly.GRCh37 + || annotationResources.Annotator.Assembly == GenomeAssembly.GRCh38 + || annotationResources.ForceMitochondrialAnnotation) + annotationResources.Annotator.EnableMitochondrialAnnotation(); + } + + private static void GenerateOutput(VcfConversion vcfConversion, JsonWriter jsonWriter, LiteVcfWriter vcfWriter, LiteVcfWriter gvcfWriter, IPosition position, IAnnotatedPosition annotatedPosition, string json) + { + if (json != null) WriteAnnotatedPosition(annotatedPosition, jsonWriter, vcfWriter, gvcfWriter, json, vcfConversion); + else gvcfWriter?.Write(string.Join("\t", position.VcfFields)); + } + + private static VcfReader GetVcfReader(Stream headerStream, Stream vcfStream, IAnnotationResources annotationResources, + IVcfFilter vcfFilter) + { + var vcfReader = FileUtilities.GetStreamReader(vcfStream); + + StreamReader headerReader; + if (headerStream == null) + headerReader = vcfReader; + else + { + headerReader = FileUtilities.GetStreamReader(headerStream); + vcfStream.Position = Tabix.VirtualPosition.From(annotationResources.InputStartVirtualPosition).BlockOffset; + } + + return Create(headerReader, vcfReader, annotationResources.SequenceProvider, annotationResources.RefMinorProvider, annotationResources.Recomposer, vcfFilter); + } + + public static void WriteAnnotatedPosition(IAnnotatedPosition annotatedPosition, IJsonWriter jsonWriter, LiteVcfWriter vcfWriter, LiteVcfWriter gvcfWriter, string jsonOutput, VcfConversion vcfConversion) + { + jsonWriter.WriteJsonEntry(annotatedPosition.Position, jsonOutput); + + if (vcfWriter == null && gvcfWriter == null || annotatedPosition.Position.IsRecomposed) return; + + string vcfLine = vcfConversion.Convert(annotatedPosition); + vcfWriter?.Write(vcfLine); + gvcfWriter?.Write(vcfLine); + } + + private static int UpdatePerformanceMetrics(int previousChromIndex, IChromosome chromosome, + PerformanceMetrics metrics) + { + // ReSharper disable once InvertIf + if (chromosome.Index != previousChromIndex) + { + metrics.StartAnnotatingReference(chromosome); + previousChromIndex = chromosome.Index; + } + + return previousChromIndex; + } + + } } \ No newline at end of file diff --git a/OptimizedCore/OptimizedCore.csproj b/OptimizedCore/OptimizedCore.csproj index 642c26be..43c61d40 100644 --- a/OptimizedCore/OptimizedCore.csproj +++ b/OptimizedCore/OptimizedCore.csproj @@ -1,9 +1,9 @@ - - - netcoreapp2.1 - ..\bin\$(Configuration) - Full - true - - - + + + netcoreapp2.1 + ..\bin\$(Configuration) + Full + true + + + diff --git a/Phantom/Phantom.csproj b/Phantom/Phantom.csproj index 0e27b78e..9295b030 100644 --- a/Phantom/Phantom.csproj +++ b/Phantom/Phantom.csproj @@ -1,23 +1,23 @@ - - - netcoreapp2.1 - ..\bin\$(Configuration) - Full - - - default - - - - - - - - - - - - - - - + + + netcoreapp2.1 + ..\bin\$(Configuration) + Full + + + default + + + + + + + + + + + + + + + diff --git a/ReferenceUtils/ReferenceUtils.csproj b/ReferenceUtils/ReferenceUtils.csproj index b1377612..a61133f8 100644 --- a/ReferenceUtils/ReferenceUtils.csproj +++ b/ReferenceUtils/ReferenceUtils.csproj @@ -1,16 +1,16 @@ - - - Exe - netcoreapp2.1 - ..\bin\$(Configuration) - Full - - - - - - - - - - + + + Exe + netcoreapp2.1 + ..\bin\$(Configuration) + Full + + + + + + + + + + diff --git a/SAUtils/SAUtils.csproj b/SAUtils/SAUtils.csproj index 4baf9213..3cc59d28 100644 --- a/SAUtils/SAUtils.csproj +++ b/SAUtils/SAUtils.csproj @@ -1,25 +1,25 @@ - - - Exe - netcoreapp2.1 - ..\bin\$(Configuration) - Full - - - - - - - - - - - - - - - - - - + + + Exe + netcoreapp2.1 + ..\bin\$(Configuration) + Full + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Tabix/Tabix.csproj b/Tabix/Tabix.csproj index e4fcebd8..2aabd55d 100644 --- a/Tabix/Tabix.csproj +++ b/Tabix/Tabix.csproj @@ -1,11 +1,11 @@ - - - netcoreapp2.1 - ..\bin\$(Configuration) - - - - - - - + + + netcoreapp2.1 + ..\bin\$(Configuration) + + + + + + + diff --git a/UnitTests/Genome/SequenceUtilitiesTests.cs b/UnitTests/Genome/SequenceUtilitiesTests.cs index 0741ab95..cd07845f 100644 --- a/UnitTests/Genome/SequenceUtilitiesTests.cs +++ b/UnitTests/Genome/SequenceUtilitiesTests.cs @@ -1,5 +1,4 @@ using Genome; -using UnitTests.TestDataStructures; using Xunit; namespace UnitTests.Genome @@ -8,7 +7,7 @@ public sealed class SequenceUtilitiesTests { [Theory] [InlineData("ACGTTTGA", "TCAAACGT")] - [InlineData(null, null)] + [InlineData(null, null)] public void GetReverseComplement(string bases, string expectedResult) { var observedResult = SequenceUtilities.GetReverseComplement(bases); @@ -18,20 +17,11 @@ public void GetReverseComplement(string bases, string expectedResult) [Theory] [InlineData("ACGT", false)] [InlineData("ACXT", true)] - [InlineData(null, false)] + [InlineData(null, false)] public void HasNonCanonicalBase(string bases, bool expectedResult) { var observedResult = SequenceUtilities.HasNonCanonicalBase(bases); Assert.Equal(expectedResult, observedResult); } - - [Fact] - public void GetSubSubstring() - { - const string expectedResult = "CGTG"; - var sequence = new SimpleSequence("GGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCC"); - var observedResult = SequenceUtilities.GetSubSubstring(4, 10, true, 1, 4, sequence); - Assert.Equal(expectedResult, observedResult); - } } -} +} \ No newline at end of file diff --git a/UnitTests/MockedData/Genes.cs b/UnitTests/MockedData/Genes.cs new file mode 100755 index 00000000..482cbb1f --- /dev/null +++ b/UnitTests/MockedData/Genes.cs @@ -0,0 +1,30 @@ +using UnitTests.TestUtilities; +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Caches.DataStructures; + +// ReSharper disable StringLiteralTypo +// ReSharper disable IdentifierTypo +// ReSharper disable InconsistentNaming +namespace UnitTests.MockedData +{ + public static class Genes + { + public static readonly Gene MED8 = new Gene(ChromosomeUtilities.Chr1, 43383908, 43389812, true, "MED8", 19971, + CompactId.Convert("112950"), CompactId.Convert("ENSG00000159479")); + + public static readonly Gene SAMD13 = new Gene(ChromosomeUtilities.Chr1, 84298366, 84389957, false, "SAMD13", + 24582, CompactId.Convert("148418"), CompactId.Convert("ENSG00000203943")); + + public static readonly Gene POTEI = new Gene(ChromosomeUtilities.Chr2, 130459455, 131626428, true, "POTEI", + 37093, CompactId.Convert("653269"), CompactId.Convert("ENSG00000196834")); + + public static readonly Gene PTPN18 = new Gene(ChromosomeUtilities.Chr2, 130356007, 130375409, false, "PTPN18", + 9649, CompactId.Convert("26469"), CompactId.Convert("ENSG00000072135")); + + public static readonly Gene AL078459_1 = new Gene(ChromosomeUtilities.Chr1, 85276715, 85448124, false, + "AL078459.1", -1, CompactId.Empty, CompactId.Convert("ENSG00000223653")); + + public static readonly Gene VEGFA = new Gene(ChromosomeUtilities.Chr6, 43737921, 43754224, false, "VEGFA", + 12680, CompactId.Convert("7422"), CompactId.Convert("ENSG00000112715")); + } +} \ No newline at end of file diff --git a/UnitTests/MockedData/RnaEdits.cs b/UnitTests/MockedData/RnaEdits.cs new file mode 100644 index 00000000..68f0319e --- /dev/null +++ b/UnitTests/MockedData/RnaEdits.cs @@ -0,0 +1,13 @@ +using VariantAnnotation.Caches.DataStructures; +using VariantAnnotation.Interface.AnnotatedPositions; +// ReSharper disable InconsistentNaming + +namespace UnitTests.MockedData +{ + public static class RnaEdits + { + public static readonly IRnaEdit[] NM_001025366_2 = { + new RnaEdit(3663, 3662, "AAAAAAAAAAAAAAA") + }; + } +} \ No newline at end of file diff --git a/UnitTests/MockedData/TranscriptRegions.cs b/UnitTests/MockedData/TranscriptRegions.cs new file mode 100755 index 00000000..08c3cc9d --- /dev/null +++ b/UnitTests/MockedData/TranscriptRegions.cs @@ -0,0 +1,126 @@ +using VariantAnnotation.Caches.DataStructures; +using VariantAnnotation.Interface.AnnotatedPositions; + +// ReSharper disable IdentifierTypo +// ReSharper disable InconsistentNaming +namespace UnitTests.MockedData +{ + public static class TranscriptRegions + { + public static readonly ITranscriptRegion[] ENST00000290663 = + { + new TranscriptRegion(TranscriptRegionType.Exon, 8, 43383917, 43384552, 848, 1483), + new TranscriptRegion(TranscriptRegionType.Intron, 7, 43384553, 43385045, 847, 848), + new TranscriptRegion(TranscriptRegionType.Exon, 7, 43385046, 43385106, 787, 847), + new TranscriptRegion(TranscriptRegionType.Intron, 6, 43385107, 43385977, 786, 787), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 43385978, 43386226, 538, 786), + new TranscriptRegion(TranscriptRegionType.Intron, 5, 43386227, 43386588, 537, 538), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 43386589, 43386670, 456, 537), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 43386671, 43386857, 455, 456), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 43386858, 43386998, 315, 455), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 43386999, 43387502, 314, 315), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 43387503, 43387647, 170, 314), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 43387648, 43388309, 169, 170), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 43388310, 43388428, 51, 169), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 43388429, 43389758, 50, 51), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 43389759, 43389808, 1, 50) + }; + + public static readonly ITranscriptRegion[] ENST00000370673 = + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 84298366, 84298567, 1, 202), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 84298568, 84303202, 202, 203), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 84303203, 84303287, 203, 287), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 84303288, 84325636, 287, 288), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 84325637, 84325748, 288, 399), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 84325749, 84349630, 399, 400), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 84349631, 84350798, 400, 1567) + }; + + public static readonly ITranscriptRegion[] ENST00000615053 = + { + new TranscriptRegion(TranscriptRegionType.Exon, 13, 130463799, 130464144, 1581, 1926), + new TranscriptRegion(TranscriptRegionType.Intron, 12, 130464145, 130465651, 1580, 1581), + new TranscriptRegion(TranscriptRegionType.Exon, 12, 130465652, 130465664, 1568, 1580), + new TranscriptRegion(TranscriptRegionType.Intron, 11, 130465665, 130465666, 1567, 1568), + new TranscriptRegion(TranscriptRegionType.Exon, 11, 130465667, 130465772, 1462, 1567), + new TranscriptRegion(TranscriptRegionType.Intron, 10, 130465773, 130474377, 1461, 1462), + new TranscriptRegion(TranscriptRegionType.Exon, 10, 130474378, 130474534, 1305, 1461), + new TranscriptRegion(TranscriptRegionType.Intron, 9, 130474535, 130488188, 1304, 1305), + new TranscriptRegion(TranscriptRegionType.Exon, 9, 130488189, 130488201, 1292, 1304), + new TranscriptRegion(TranscriptRegionType.Intron, 8, 130488202, 130489237, 1291, 1292), + new TranscriptRegion(TranscriptRegionType.Exon, 8, 130489238, 130489279, 1250, 1291), + new TranscriptRegion(TranscriptRegionType.Intron, 7, 130489280, 130490669, 1249, 1250), + new TranscriptRegion(TranscriptRegionType.Exon, 7, 130490670, 130490740, 1179, 1249), + new TranscriptRegion(TranscriptRegionType.Intron, 6, 130490741, 130496551, 1178, 1179), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 130496552, 130496622, 1108, 1178), + new TranscriptRegion(TranscriptRegionType.Intron, 5, 130496623, 130499083, 1107, 1108), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 130499084, 130499221, 970, 1107), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 130499222, 130500535, 969, 970), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 130500536, 130500642, 863, 969), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 130500643, 130503445, 862, 863), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 130503446, 130503619, 689, 862), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 130503620, 130503779, 688, 689), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 130503780, 130503894, 574, 688), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 130503895, 130508714, 573, 574), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 130508715, 130509287, 1, 573) + }; + + public static readonly ITranscriptRegion[] ENST00000347849 = + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 130356045, 130356200, 1, 156), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 130356201, 130369132, 156, 157), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 130369133, 130369201, 157, 225), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 130369202, 130369764, 225, 226), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 130369765, 130369827, 226, 288), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 130369828, 130370047, 288, 289), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 130370048, 130370190, 289, 431), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 130370191, 130370556, 431, 432), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 130370557, 130370623, 432, 498), + new TranscriptRegion(TranscriptRegionType.Intron, 5, 130370624, 130370704, 498, 499), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 130370705, 130370782, 499, 576), + new TranscriptRegion(TranscriptRegionType.Intron, 6, 130370783, 130370874, 576, 577), + new TranscriptRegion(TranscriptRegionType.Exon, 7, 130370875, 130370964, 577, 666), + new TranscriptRegion(TranscriptRegionType.Intron, 7, 130370965, 130371198, 666, 667), + new TranscriptRegion(TranscriptRegionType.Exon, 8, 130371199, 130371287, 667, 755), + new TranscriptRegion(TranscriptRegionType.Intron, 8, 130371288, 130372256, 755, 756), + new TranscriptRegion(TranscriptRegionType.Exon, 9, 130372257, 130372483, 756, 982), + new TranscriptRegion(TranscriptRegionType.Intron, 9, 130372484, 130372872, 982, 983), + new TranscriptRegion(TranscriptRegionType.Exon, 10, 130372873, 130372947, 983, 1057), + new TranscriptRegion(TranscriptRegionType.Intron, 10, 130372948, 130373156, 1057, 1058), + new TranscriptRegion(TranscriptRegionType.Exon, 11, 130373157, 130374571, 1058, 2472) + }; + + public static readonly ITranscriptRegion[] ENST00000427819 = + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 85276715, 85276797, 1, 83), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 85276798, 85277640, 83, 84), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 85277641, 85277738, 84, 181), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 85277739, 85376765, 181, 182), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 85376766, 85376835, 182, 251), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 85376836, 85380373, 251, 252), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 85380374, 85380565, 252, 443), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 85380566, 85398456, 443, 444), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 85398457, 85399963, 444, 1950) + }; + + public static readonly ITranscriptRegion[] NM_001025366_2 = + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 43737946, 43739049, 1, 1104), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 43739050, 43742077, 1104, 1105), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 43742078, 43742129, 1105, 1156), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 43742130, 43745205, 1156, 1157), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 43745206, 43745402, 1157, 1353), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 43745403, 43746196, 1353, 1354), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 43746197, 43746273, 1354, 1430), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 43746274, 43746625, 1430, 1431), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 43746626, 43746655, 1431, 1460), + new TranscriptRegion(TranscriptRegionType.Intron, 5, 43746656, 43748468, 1460, 1461), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 43748469, 43748591, 1461, 1583), + new TranscriptRegion(TranscriptRegionType.Intron, 6, 43748592, 43749692, 1583, 1584), + new TranscriptRegion(TranscriptRegionType.Exon, 7, 43749693, 43749824, 1584, 1715), + new TranscriptRegion(TranscriptRegionType.Intron, 7, 43749825, 43752277, 1715, 1716), + new TranscriptRegion(TranscriptRegionType.Exon, 8, 43752278, 43754224, 1716, 3662) + }; + } +} \ No newline at end of file diff --git a/UnitTests/MockedData/Transcripts.cs b/UnitTests/MockedData/Transcripts.cs new file mode 100755 index 00000000..4c9dd80e --- /dev/null +++ b/UnitTests/MockedData/Transcripts.cs @@ -0,0 +1,45 @@ +using UnitTests.TestUtilities; +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Caches.DataStructures; +using VariantAnnotation.Interface.AnnotatedPositions; + +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo +// ReSharper disable InconsistentNaming +namespace UnitTests.MockedData +{ + public static class Transcripts + { + // the following 5 transcripts were chosen to stress test our gene fusions: + public static readonly ITranscript ENST00000290663 = new Transcript(ChromosomeUtilities.Chr1, 43383917, + 43389808, CompactId.Convert("ENST00000290663", 10), Translations.ENST00000290663, BioType.protein_coding, + Genes.MED8, 1483, 0, true, TranscriptRegions.ENST00000290663, 8, null, 0, 0, Source.Ensembl, false, false, + null); + + public static readonly ITranscript ENST00000370673 = new Transcript(ChromosomeUtilities.Chr1, 84298366, + 84350798, CompactId.Convert("ENST00000370673", 7), Translations.ENST00000370673, BioType.protein_coding, + Genes.SAMD13, 1567, 0, false, TranscriptRegions.ENST00000370673, 4, null, 0, 0, Source.Ensembl, false, + false, null); + + public static readonly ITranscript ENST00000615053 = new Transcript(ChromosomeUtilities.Chr2, 130463799, + 130509287, CompactId.Convert("ENST00000615053", 3), Translations.ENST00000615053, BioType.protein_coding, + Genes.POTEI, 1926, 0, false, TranscriptRegions.ENST00000615053, 13, null, 0, 0, Source.Ensembl, false, + false, null); + + public static readonly ITranscript ENST00000347849 = new Transcript(ChromosomeUtilities.Chr2, 130356045, + 130374571, CompactId.Convert("ENST00000347849", 7), Translations.ENST00000347849, BioType.protein_coding, + Genes.PTPN18, 2472, 0, false, TranscriptRegions.ENST00000347849, 11, null, 0, 0, Source.Ensembl, false, + false, null); + + // antisense RNA + public static readonly ITranscript ENST00000427819 = new Transcript(ChromosomeUtilities.Chr1, 85276715, + 85399963, CompactId.Convert("ENST00000427819", 5), null, BioType.antisense_RNA, Genes.AL078459_1, 1950, 0, + false, TranscriptRegions.ENST00000427819, 5, null, 0, 0, Source.Ensembl, false, false, null); + + // used to test non-AUG start codons + public static readonly ITranscript NM_001025366_2 = new Transcript(ChromosomeUtilities.Chr6, 43737946, 43754224, + CompactId.Convert("NM_001025366", 2), Translations.NM_001025366_2, BioType.protein_coding, Genes.VEGFA, + 3662, 0, true, TranscriptRegions.NM_001025366_2, 8, null, 0, 0, Source.RefSeq, false, false, + RnaEdits.NM_001025366_2); + } +} \ No newline at end of file diff --git a/UnitTests/MockedData/Translations.cs b/UnitTests/MockedData/Translations.cs new file mode 100755 index 00000000..dd1568fb --- /dev/null +++ b/UnitTests/MockedData/Translations.cs @@ -0,0 +1,33 @@ +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Caches.DataStructures; +using VariantAnnotation.Interface.AnnotatedPositions; + +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo + +// ReSharper disable InconsistentNaming +namespace UnitTests.MockedData +{ + public static class Translations + { + public static readonly ITranslation ENST00000290663 = new Translation( + new CodingRegion(43384450, 43389764, 45, 950, 906), CompactId.Convert("ENSP00000290663", 6), + "MQREEKQLEASLDALLSQVADLKNSLGSFICKLENEYGRLTWPSVLDSFALLSGQLNTLNKVLKHEKTPLFRNQVIIPLVLSPDRDEDLMRQTEGRVPVFSHEVVPDHLRTKPDPEVEEQEKQLTTDAARIGADAAQKQIQSLNKMCSNLLEKISKEERESESGGLRPNKQTFNPTDTNALVAAVAFGKGLSNWRPSGSSGPGQAGQPGAGTILAGTSGLQQVQMAGAPSQQQPMLSGVQMAQAGQPGKMPSGIKTNIKSASMHPYQRPSCLGFILAIPLRRKVKKLLGQEGKKNAHLQLW"); + + public static readonly ITranslation ENST00000370673 = new Translation( + new CodingRegion(84298558, 84349774, 193, 543, 351), CompactId.Convert("ENSP00000359707", 3), + "MRGVAEVKEPCSLPMLSVDMENKENGSVGVKNSMENGRPPDPADWAVMDVVNYFRTVGFEEQASAFQEQEIDGKSLLLMTRNDVLTGLQLKLGPALKIYEYHVKPLQTKHLKNNSS"); + + public static readonly ITranslation ENST00000615053 = new Translation( + new CodingRegion(130465653, 130509235, 53, 1579, 1527), CompactId.Convert("ENSP00000483193", 1), + "MVAEVDSMPAASSVKKPFVLRSKMGKWCRHCFPCCRGSGKSNVGTSGDQDDSTMKTLRSKMGKWCCHCFPCCRGSGKSNVGTSGDHDDSAMKTLRSKMGKWCCHCFPCCRGSGKSNVGAWGDYDDSAFVEPRYHVRREDLDKLHRAAWWGKVARKDLIVMLRDTDVNKQDKQKRTALHLASANGNSGVVKLLLDRRCQLNVLDNKKRTALTKAVQCQEDECALMLLEHGTDPNIPDEYGNTTLHYAIYNEDKLMAKALLLYGADIESKNKHGLTPLLLGVHEQKQQVVKFLIKKKANLNALDRYGRTALILAVCCGSASIVSLLLEQNIDVSSQDLSGQTAREYAVSSHHHVICQLLSDYKEKQMLKISSENSNPEQDLKLTSEEESQRFKGSENSQPEKMSQEPEINKDGDRKVEEEMKKHGSTHVGFPENLTNGATAGNGDDGLIPPRKSRTPESQQFPDTENEEYHSDEQNDTQKQFCEEQNTGILHDEILIHEEKQIEVVENEF"); + + public static readonly ITranslation ENST00000347849 = new Translation( + new CodingRegion(130356108, 130373224, 64, 1125, 1062), CompactId.Convert("ENSP00000310092", 5), + "MSRSLDSARSFLERLEARGGREGAVLAGEFSKRCERYWAQEQEPLQTGLFCITLIKEKWLNEDIMLRTLKVTFQKESRSVYQLQYMSWPDRGVPSSPDHMLAMVEEARRLQGSGPEPLCVHCSAGCGRTGVLCTVDYVRQLLLTQMIPPDFSLFDVVLKMRKQRPAAVQTEEQYRFLYHTVAQMFCSTLQNASPHYQNIKENCAPLYDDALFLRTPQALLAIPRPPGGVLRSISVPGSPGHAMADTYAVVQKRGAPAGAGSGTQTGTGTGTGARSAEEAPLYSKVTPRAQRPGAHAEDARGTLPGRVPADQSPAGSGAYEDVAGGAQTGGLGFNLRIGRPKGPRDPPAEWTRV"); + + public static readonly ITranslation NM_001025366_2 = new Translation( + new CodingRegion(43738444, 43752299, 499, 1737, 1239), CompactId.Convert("NP_001020537", 2), + "LTDRQTDTAPSPSYHLLPGRRRTVDAAASRGQGPEPAPGGGVEGVGARGVALKLFVQLLGCSRFGGAVVRAGEAEPSGAARSASSGREEPQPEEGEEEEEKEEERGPQWRLGARKPGSWTGEAAVCADSAPAARAPQALARASGRGGRVARRGAEESGPPHSPSRRGSASRAGPGRASETMNFLLSWVHWSLALLLYLHHAKWSQAAPMAEGGGQNHHEVVKFMDVYQRSYCHPIETLVDIFQEYPDEIEYIFKPSCVPLMRCGGCCNDEGLECVPTEESNITMQIMRIKPHQGQHIGEMSFLQHNKCECRPKKDRARQEKKSVRGKGKGQKRKRKKSRYKSWSVYVGARCCLMPWSLPGPHPCGPCSERRKHLFVQDPQTCKCSCKNTDSRCKARQLELNERTCRCDKPRR"); + } +} \ No newline at end of file diff --git a/UnitTests/TestDataStructures/SimpleSequence.cs b/UnitTests/TestDataStructures/SimpleSequence.cs index b5f5345e..671f7351 100644 --- a/UnitTests/TestDataStructures/SimpleSequence.cs +++ b/UnitTests/TestDataStructures/SimpleSequence.cs @@ -17,6 +17,8 @@ public SimpleSequence(string s, int zeroBasedStartOffset = 0) _zeroBasedStartOffset = zeroBasedStartOffset; _sequence = s; } + + public string Sequence => _sequence; public string Substring(int offset, int length) { diff --git a/UnitTests/UnitTests.csproj b/UnitTests/UnitTests.csproj index 85a08a0f..04d268f1 100644 --- a/UnitTests/UnitTests.csproj +++ b/UnitTests/UnitTests.csproj @@ -1,37 +1,37 @@ - - - netcoreapp2.1 - ..\bin\$(Configuration) - true - - - - - - - all - runtime; build; native; contentfiles; analyzers - - - - - - - - - - - - - - - - - - - - - - - + + + netcoreapp2.1 + ..\bin\$(Configuration) + true + + + + + + + all + runtime; build; native; contentfiles; analyzers + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsCodingNomenclatureTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsCodingNomenclatureTests.cs index 4919df5d..1f941cd8 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsCodingNomenclatureTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsCodingNomenclatureTests.cs @@ -20,7 +20,7 @@ public HgvsCodingNomenclatureTests() { _forwardTranscript = GetForwardTranscript(); _reverseTranscript = GetReverseTranscript(); - _gapTranscript = GetGapTranscript(); + _gapTranscript = GetGapTranscript(); } internal static ITranscript GetForwardTranscript() @@ -28,11 +28,11 @@ internal static ITranscript GetForwardTranscript() // get info from ENST00000343938.4 var regions = new ITranscriptRegion[] { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 1260147, 1260482, 1, 336), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 1260147, 1260482, 1, 336), new TranscriptRegion(TranscriptRegionType.Intron, 1, 1260483, 1262215, 336, 337), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 1262216, 1262412, 337, 533), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 1262216, 1262412, 337, 533), new TranscriptRegion(TranscriptRegionType.Intron, 2, 1262413, 1262620, 533, 534), - new TranscriptRegion(TranscriptRegionType.Exon, 3, 1262621, 1264277, 534, 2190) + new TranscriptRegion(TranscriptRegionType.Exon, 3, 1262621, 1264277, 534, 2190) }; var translation = new Mock(); @@ -55,7 +55,7 @@ private static ITranscript GetForwardTranscriptWithoutUtr() //ENST00000579622.1 chrX:70361035-70361156, non-coding, forward strand, no utr var regions = new ITranscriptRegion[] { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 70361035,70361156, 1, 122) + new TranscriptRegion(TranscriptRegionType.Exon, 1, 70361035, 70361156, 1, 122) }; var transcript = new Mock(); @@ -68,14 +68,15 @@ private static ITranscript GetForwardTranscriptWithoutUtr() transcript.SetupGet(x => x.TotalExonLength).Returns(122); return transcript.Object; } + internal static ITranscript GetReverseTranscript() { // get info from "ENST00000423372.3 var regions = new ITranscriptRegion[] { - new TranscriptRegion(TranscriptRegionType.Exon, 2, 134901, 135802, 1760, 2661), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 134901, 135802, 1760, 2661), new TranscriptRegion(TranscriptRegionType.Intron, 1, 135803, 137620, 1759, 1760), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 137621, 139379, 1, 1759) + new TranscriptRegion(TranscriptRegionType.Exon, 1, 137621, 139379, 1, 1759) }; var translation = new Mock(); @@ -98,35 +99,35 @@ private static ITranscript GetGapTranscript() //NM_000314.4 var regions = new ITranscriptRegion[] { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 89623195, 89623860, 1, 666), - new TranscriptRegion(TranscriptRegionType.Gap, 1, 89623861, 89623861, 666, 667), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 89623862, 89624305, 667, 1110), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 89623195, 89623860, 1, 666), + new TranscriptRegion(TranscriptRegionType.Gap, 1, 89623861, 89623861, 666, 667), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 89623862, 89624305, 667, 1110), new TranscriptRegion(TranscriptRegionType.Intron, 1, 89624306, 89653781, 1110, 1111), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 89653782, 89653866, 1111, 1195), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 89653782, 89653866, 1111, 1195), new TranscriptRegion(TranscriptRegionType.Intron, 2, 89653867, 89685269, 1195, 1196), - new TranscriptRegion(TranscriptRegionType.Exon, 3, 89685270, 89685314, 1196, 1240), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 89685270, 89685314, 1196, 1240), new TranscriptRegion(TranscriptRegionType.Intron, 3, 89685315, 89690802, 1240, 1241), - new TranscriptRegion(TranscriptRegionType.Exon, 4, 89690803, 89690846, 1241, 1284), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 89690803, 89690846, 1241, 1284), new TranscriptRegion(TranscriptRegionType.Intron, 4, 89690847, 89692769, 1284, 1285), - new TranscriptRegion(TranscriptRegionType.Exon, 5, 89692770, 89693008, 1285, 1523), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 89692770, 89693008, 1285, 1523), new TranscriptRegion(TranscriptRegionType.Intron, 5, 89693009, 89711874, 1523, 1524), - new TranscriptRegion(TranscriptRegionType.Exon, 6, 89711875, 89712016, 1524, 1665), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 89711875, 89712016, 1524, 1665), new TranscriptRegion(TranscriptRegionType.Intron, 6, 89712017, 89717609, 1665, 1666), - new TranscriptRegion(TranscriptRegionType.Exon, 7, 89717610, 89717776, 1666, 1832), + new TranscriptRegion(TranscriptRegionType.Exon, 7, 89717610, 89717776, 1666, 1832), new TranscriptRegion(TranscriptRegionType.Intron, 7, 89717777, 89720650, 1832, 1833), - new TranscriptRegion(TranscriptRegionType.Exon, 8, 89720651, 89720875, 1833, 2057), + new TranscriptRegion(TranscriptRegionType.Exon, 8, 89720651, 89720875, 1833, 2057), new TranscriptRegion(TranscriptRegionType.Intron, 8, 89720876, 89725043, 2057, 2058), - new TranscriptRegion(TranscriptRegionType.Exon, 9, 89725044, 89728532, 2058, 5546) + new TranscriptRegion(TranscriptRegionType.Exon, 9, 89725044, 89728532, 2058, 5546) }; var translation = new Mock(); translation.SetupGet(x => x.CodingRegion).Returns(new CodingRegion(89624227, 89725229, 1032, 2243, 1212)); - - var rnaEdits = new IRnaEdit[3]; - rnaEdits[0] = new RnaEdit(667, 667, null); - rnaEdits[1] = new RnaEdit(707, 707, "C"); + + var rnaEdits = new IRnaEdit[3]; + rnaEdits[0] = new RnaEdit(667, 667, null); + rnaEdits[1] = new RnaEdit(707, 707, "C"); rnaEdits[2] = new RnaEdit(5548, 5547, "AAAAAAAAAAAAAAAAAAAAAAAAAA"); - + var transcript = new Mock(); transcript.SetupGet(x => x.Id).Returns(CompactId.Convert("NM_000314", 4)); transcript.SetupGet(x => x.Chromosome).Returns(ChromosomeUtilities.Chr10); @@ -141,34 +142,38 @@ private static ITranscript GetGapTranscript() } [Theory] - [InlineData(89623861, 89623861, "T", "", "T", VariantType.deletion, null)] - [InlineData(89623861, 89623861, "T", "G", "T", VariantType.SNV, null)] - [InlineData(89623901, 89623901, "G", "C", "C", VariantType.SNV, "NM_000314.4:c.-326=")] - [InlineData(89623901, 89623901, "G", "T", "C", VariantType.SNV, "NM_000314.4:c.-326C>T")] - [InlineData(89623861, 89623863, "TGG", "", "GG", VariantType.deletion, "NM_000314.4:c.-365_-364del")] - [InlineData(89623859, 89623861, "GCT", "", "GC", VariantType.deletion, "NM_000314.4:c.-367_-366del")] - [InlineData(89623860, 89623862, "CTG", "", "CG", VariantType.deletion, "NM_000314.4:c.-366_-365del")] - [InlineData(89624304, 89624308, "CTGTA", "", "CT", VariantType.deletion, "NM_000314.4:c.78_79+3del")] - [InlineData(89624308, 89624310, "ATC", "", "ATC", VariantType.deletion, "NM_000314.4:c.79+3_79+5del")] - public void GetHgvscAnnotation_in_intron_gap_substitution(int variantStart, int variantEnd, string reference, string alt, + [InlineData(89623861, 89623861, "T", "", "T", VariantType.deletion, null)] + [InlineData(89623861, 89623861, "T", "G", "T", VariantType.SNV, null)] + [InlineData(89623901, 89623901, "G", "C", "C", VariantType.SNV, "NM_000314.4:c.-326=")] + [InlineData(89623901, 89623901, "G", "T", "C", VariantType.SNV, "NM_000314.4:c.-326C>T")] + [InlineData(89623861, 89623863, "TGG", "", "GG", VariantType.deletion, "NM_000314.4:c.-365_-364del")] + [InlineData(89623859, 89623861, "GCT", "", "GC", VariantType.deletion, "NM_000314.4:c.-367_-366del")] + [InlineData(89623860, 89623862, "CTG", "", "CG", VariantType.deletion, "NM_000314.4:c.-366_-365del")] + [InlineData(89624304, 89624308, "CTGTA", "", "CT", VariantType.deletion, "NM_000314.4:c.78_79+3del")] + [InlineData(89624308, 89624310, "ATC", "", "ATC", VariantType.deletion, "NM_000314.4:c.79+3_79+5del")] + public void GetHgvscAnnotation_in_intron_gap_substitution(int variantStart, int variantEnd, string reference, + string alt, string transcriptRef, VariantType variantType, string expectedHgvsc) { var (startIndex, _) = MappedPositionUtilities.FindRegion(_gapTranscript.TranscriptRegions, variantStart); var (endIndex, _) = MappedPositionUtilities.FindRegion(_gapTranscript.TranscriptRegions, variantEnd); - var variant = new SimpleVariant(ChromosomeUtilities.Chr10, variantStart, variantEnd, reference, alt, variantType); + var variant = new SimpleVariant(ChromosomeUtilities.Chr10, variantStart, variantEnd, reference, alt, + variantType); var observedHgvsc = - HgvsCodingNomenclature.GetHgvscAnnotation(_gapTranscript, variant, null, startIndex, endIndex, transcriptRef); - + HgvsCodingNomenclature.GetHgvscAnnotation(_gapTranscript, variant, null, startIndex, endIndex, + transcriptRef, null); + Assert.Equal(expectedHgvsc, observedHgvsc); } [Fact] public void GetHgvscAnnotation_substitution_in_3UTR() { - var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1260247, 1260247, "A", "G", VariantType.SNV); - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, 0, 0); + var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1260247, 1260247, "A", "G", VariantType.SNV); + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, 0, 0, null, null); Assert.Equal("ENST00000343938.4:c.-311A>G", observedHgvsc); } @@ -176,8 +181,9 @@ public void GetHgvscAnnotation_substitution_in_3UTR() [Fact] public void GetHgvscAnnotation_substitution_in_intron_before_TSS() { - var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1262210, 1262210, "C", "G", VariantType.SNV); - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, 1, 1); + var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1262210, 1262210, "C", "G", VariantType.SNV); + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, 1, 1, null, null); Assert.Equal("ENST00000343938.4:c.-75-6C>G", observedHgvsc); } @@ -188,8 +194,10 @@ public void GetHgvscAnnotation_insertion_in_coding_region() var sequence = new Mock(); sequence.Setup(x => x.Substring(1262627, 1)).Returns("A"); - var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1262629, 1262628, "", "G", VariantType.insertion); - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, sequence.Object, 4, 4); + var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1262629, 1262628, "", "G", VariantType.insertion); + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, sequence.Object, 4, 4, null, + null); Assert.Equal("ENST00000343938.4:c.130_131insG", observedHgvsc); } @@ -200,8 +208,10 @@ public void GetHgvscAnnotation_insertion_after_coding_region() var sequence = new Mock(); sequence.Setup(x => x.Substring(1262627, 1)).Returns("A"); - var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1263159, 1263158, "", "G", VariantType.insertion); - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, sequence.Object, 4, 4); + var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1263159, 1263158, "", "G", VariantType.insertion); + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, sequence.Object, 4, 4, null, + null); Assert.Equal("ENST00000343938.4:c.*15_*16insG", observedHgvsc); } @@ -212,23 +222,26 @@ public void GetHgvscAnnotation_duplication_in_coding_region() var sequence = new Mock(); sequence.Setup(x => x.Substring(1262626, 2)).Returns("TA"); - var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1262629, 1262628, "", "TA", VariantType.insertion); - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, sequence.Object, 4, 4); + var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1262629, 1262628, "", "TA", + VariantType.insertion); + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, sequence.Object, 4, 4, null, + null); - Assert.Equal("ENST00000343938.4:c.129_130dupTA", observedHgvsc); + Assert.Equal("ENST00000343938.4:c.129_130dup", observedHgvsc); } [Fact] public void ApplyDuplicationAdjustments_NonCoding_Reverse() { var regions = new ITranscriptRegion[3]; - regions[0] = new TranscriptRegion(TranscriptRegionType.Exon, 2, 20976856, 20977050, 154, 348); + regions[0] = new TranscriptRegion(TranscriptRegionType.Exon, 2, 20976856, 20977050, 154, 348); regions[1] = new TranscriptRegion(TranscriptRegionType.Intron, 1, 20977051, 20977054, 153, 154); - regions[2] = new TranscriptRegion(TranscriptRegionType.Exon, 1, 20977055, 20977207, 1, 153); + regions[2] = new TranscriptRegion(TranscriptRegionType.Exon, 1, 20977055, 20977207, 1, 153); var observedResults = regions.ShiftDuplication(20977006, "AACT", true); - Assert.Equal("AACT", observedResults.RefAllele); + Assert.Equal("AACT", observedResults.RefAllele); Assert.Equal(20977009, observedResults.Start); Assert.Equal(20977006, observedResults.End); } @@ -237,14 +250,16 @@ public void ApplyDuplicationAdjustments_NonCoding_Reverse() public void ApplyDuplicationAdjustments_Coding_Forward() { var regions = new ITranscriptRegion[41]; - for (int i = 0; i < 22; i++) regions[i] = new TranscriptRegion(TranscriptRegionType.Exon, 0, 107000000, 107334926, 1, 1564); - for (int i = 23; i < regions.Length; i++) regions[i] = new TranscriptRegion(TranscriptRegionType.Exon, 0, 107335162, 108000000, 1662, 1700); + for (int i = 0; i < 22; i++) + regions[i] = new TranscriptRegion(TranscriptRegionType.Exon, 0, 107000000, 107334926, 1, 1564); + for (int i = 23; i < regions.Length; i++) + regions[i] = new TranscriptRegion(TranscriptRegionType.Exon, 0, 107335162, 108000000, 1662, 1700); regions[21] = new TranscriptRegion(TranscriptRegionType.Intron, 11, 107334926, 107335065, 1565, 1566); - regions[22] = new TranscriptRegion(TranscriptRegionType.Exon, 12, 107335066, 107335161, 1566, 1661); + regions[22] = new TranscriptRegion(TranscriptRegionType.Exon, 12, 107335066, 107335161, 1566, 1661); var observedResults = regions.ShiftDuplication(107335068, "AGTC", false); - Assert.Equal("AGTC", observedResults.RefAllele); + Assert.Equal("AGTC", observedResults.RefAllele); Assert.Equal(107335064, observedResults.Start); Assert.Equal(107335067, observedResults.End); } @@ -252,17 +267,21 @@ public void ApplyDuplicationAdjustments_Coding_Forward() [Fact] public void GetHgvscAnnotation_Deletion_start_before_transcript() { - var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1260144, 1260148, "ATGTC", "", VariantType.deletion); - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, -1, 0); + var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1260144, 1260148, "ATGTC", "", + VariantType.deletion); + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, -1, 0, null, null); Assert.Null(observedHgvsc); } [Fact] - public void GetHgvscAnnotation_Delin_start_from_Exon_end_in_intron() + public void GetHgvscAnnotation_Delins_start_from_Exon_end_in_intron() { - var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1262410, 1262414, "ATGTC", "TG", VariantType.indel); - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, 2, 3); + var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1262410, 1262414, "ATGTC", "TG", + VariantType.indel); + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, 2, 3, null, null); Assert.Equal("ENST00000343938.4:c.120_122+2delinsTG", observedHgvsc); } @@ -270,8 +289,10 @@ public void GetHgvscAnnotation_Delin_start_from_Exon_end_in_intron() [Fact] public void GetHgvscAnnotation_inversion_start_from_Exon_end_in_intron() { - var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1262410, 1262414, "ATGTC", "GACAT", VariantType.MNV); - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, 2, 3); + var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1262410, 1262414, "ATGTC", "GACAT", + VariantType.MNV); + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, 2, 3, null, null); Assert.Equal("ENST00000343938.4:c.120_122+2invATGTC", observedHgvsc); } @@ -279,8 +300,10 @@ public void GetHgvscAnnotation_inversion_start_from_Exon_end_in_intron() [Fact] public void GetHgvscAnnotation_Deletion_end_after_transcript() { - var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1260143, 1260148, "ATGTC", "", VariantType.deletion); - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, -1, 0); + var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1260143, 1260148, "ATGTC", "", + VariantType.deletion); + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, -1, 0, null, null); Assert.Null(observedHgvsc); } @@ -288,8 +311,10 @@ public void GetHgvscAnnotation_Deletion_end_after_transcript() [Fact] public void GetHgvscAnnotation_Reference_no_hgvsc() { - var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1260138, 1260138, "A", "A", VariantType.reference); - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, -1, -1); + var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 1260138, 1260138, "A", "A", + VariantType.reference); + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(_forwardTranscript, variant, null, -1, -1, null, null); Assert.Null(observedHgvsc); } @@ -297,8 +322,9 @@ public void GetHgvscAnnotation_Reference_no_hgvsc() [Fact] public void GetHgvscAnnotation_substitution_in_intron_of_reverse_gene() { - var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 136000, 136000, "A", "G", VariantType.SNV); - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_reverseTranscript, variant, null, 1, 1); + var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 136000, 136000, "A", "G", VariantType.SNV); + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(_reverseTranscript, variant, null, 1, 1, null, null); Assert.Equal("ENST00000423372.3:c.*910-198T>C", observedHgvsc); } @@ -306,8 +332,9 @@ public void GetHgvscAnnotation_substitution_in_intron_of_reverse_gene() [Fact] public void GetHgvscAnnotation_substitution_after_stopCodon_of_reverse_gene() { - var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 138529, 138529, "A", "G", VariantType.SNV); - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_reverseTranscript, variant, null, 2, -1); + var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 138529, 138529, "A", "G", VariantType.SNV); + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(_reverseTranscript, variant, null, 2, -1, null, null); Assert.Equal("ENST00000423372.3:c.*1T>C", observedHgvsc); } @@ -315,8 +342,11 @@ public void GetHgvscAnnotation_substitution_after_stopCodon_of_reverse_gene() [Fact] public void GetHgvscAnnotation_deletion_of_reverse_gene() { - var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 135802, 137619, "ATCGTGGGTTGT", "", VariantType.deletion); - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(_reverseTranscript, variant, null, 0, 1, "ACAACCCACGAT"); + var variant = new SimpleVariant(ChromosomeUtilities.Chr1, 135802, 137619, "ATCGTGGGTTGT", "", + VariantType.deletion); + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(_reverseTranscript, variant, null, 0, 1, "ACAACCCACGAT", + null); Assert.Equal("ENST00000423372.3:c.*909+2_*910del", observedHgvsc); } @@ -325,10 +355,13 @@ public void GetHgvscAnnotation_deletion_of_reverse_gene() public void GetHgvscAnnotation_insertion_at_last_position() { var sequence = new Mock(); - sequence.Setup(x => x.Substring(70361157-12, 12)).Returns("TATATATATATA"); + sequence.Setup(x => x.Substring(70361157 - 12, 12)).Returns("TATATATATATA"); - var variant = new SimpleVariant(ChromosomeUtilities.ChrX, 70361157, 70361156, "", "ACACCAGCAGCA", VariantType.insertion);//right shifted variant - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(GetForwardTranscriptWithoutUtr(), variant, sequence.Object, 0, 0); + var variant = new SimpleVariant(ChromosomeUtilities.ChrX, 70361157, 70361156, "", "ACACCAGCAGCA", + VariantType.insertion); //right shifted variant + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(GetForwardTranscriptWithoutUtr(), variant, sequence.Object, 0, + 0, null, null); Assert.Equal("ENST00000579622.1:n.122_123insACACCAGCAGCA", observedHgvsc); } @@ -339,10 +372,13 @@ public void GetHgvscAnnotation_duplication_at_last_position() var sequence = new Mock(); sequence.Setup(x => x.Substring(70361156 - 4, 4)).Returns("ACAC"); - var variant = new SimpleVariant(ChromosomeUtilities.ChrX, 70361157, 70361156, "", "ACAC", VariantType.insertion);//right shifted variant - var observedHgvsc = HgvsCodingNomenclature.GetHgvscAnnotation(GetForwardTranscriptWithoutUtr(), variant, sequence.Object, 0, 0); + var variant = new SimpleVariant(ChromosomeUtilities.ChrX, 70361157, 70361156, "", "ACAC", + VariantType.insertion); //right shifted variant + var observedHgvsc = + HgvsCodingNomenclature.GetHgvscAnnotation(GetForwardTranscriptWithoutUtr(), variant, sequence.Object, 0, + 0, null, null); - Assert.Equal("ENST00000579622.1:n.119_122dupACAC", observedHgvsc); + Assert.Equal("ENST00000579622.1:n.119_122dup", observedHgvsc); } } } \ No newline at end of file diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsProteinNomenclatureTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsProteinNomenclatureTests.cs index 01e8c4b2..1c9c163b 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsProteinNomenclatureTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsProteinNomenclatureTests.cs @@ -1,6 +1,7 @@ using Moq; using UnitTests.TestDataStructures; using UnitTests.TestUtilities; +using VariantAnnotation.AnnotatedPositions.AminoAcids; using VariantAnnotation.AnnotatedPositions.Transcript; using VariantAnnotation.Caches.DataStructures; using VariantAnnotation.Interface.AnnotatedPositions; @@ -58,7 +59,7 @@ public void GetHgvsProteinAnnotation_substitution() var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1); var transcript = GetMockedTranscriptOnForwardStrand(); - var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false)); + var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, AminoAcidCommon.StandardAminoAcids); var hgvspNotation = annotatedTranscript.HgvsProtein; @@ -72,7 +73,7 @@ public void GetHgvsProteinAnnotation_insertion() var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1); var transcript = GetMockedTranscriptOnForwardStrand(); - var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false)); + var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, AminoAcidCommon.StandardAminoAcids); var hgvspNotation = annotatedTranscript.HgvsProtein; @@ -86,7 +87,7 @@ public void GetHgvsProteinAnnotation_duplication_right_shifted() var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1); var transcript = GetMockedTranscriptOnForwardStrand(); - var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false)); + var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, AminoAcidCommon.StandardAminoAcids); var hgvspNotation = annotatedTranscript.HgvsProtein; @@ -100,7 +101,7 @@ public void GetHgvsProteinAnnotation_deletion() var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1); var transcript = GetMockedTranscriptOnForwardStrand(); - var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false)); + var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, AminoAcidCommon.StandardAminoAcids); var hgvspNotation = annotatedTranscript.HgvsProtein; @@ -114,7 +115,7 @@ public void GetHgvsProteinAnnotation_delIns() var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1); var transcript = GetMockedTranscriptOnForwardStrand(); - var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false)); + var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, AminoAcidCommon.StandardAminoAcids); var hgvspNotation = annotatedTranscript.HgvsProtein; @@ -128,7 +129,7 @@ public void GetHgvsProteinAnnotation_no_change() var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1); var transcript = GetMockedTranscriptOnForwardStrand(); - var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false)); + var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, AminoAcidCommon.StandardAminoAcids); var hgvspNotation = annotatedTranscript.HgvsProtein; @@ -142,7 +143,7 @@ public void GetHgvsProteinAnnotation_frameshift() var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1); var transcript = GetMockedTranscriptOnForwardStrand(); - var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false)); + var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, AminoAcidCommon.StandardAminoAcids); var hgvspNotation = annotatedTranscript.HgvsProtein; @@ -156,7 +157,7 @@ public void GetHgvsProteinAnnotation_frameshift_stop_gain() var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1); var transcript = GetMockedTranscriptOnForwardStrand(); - var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false)); + var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, AminoAcidCommon.StandardAminoAcids); var hgvspNotation = annotatedTranscript.HgvsProtein; @@ -170,7 +171,7 @@ public void GetHgvsProteinAnnotation_extension() var refSequence = new SimpleSequence(Enst00000343938GenomicSequence, 1260147 - 1); var transcript = GetMockedTranscriptOnForwardStrand(); - var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false)); + var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, AminoAcidCommon.StandardAminoAcids); var hgvspNotation = annotatedTranscript.HgvsProtein; diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsProteinTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsProteinTests.cs new file mode 100644 index 00000000..d1474cfd --- /dev/null +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsProteinTests.cs @@ -0,0 +1,536 @@ +using System; +using VariantAnnotation.AnnotatedPositions; +using VariantAnnotation.AnnotatedPositions.AminoAcids; +using Xunit; + +namespace UnitTests.VariantAnnotation.AnnotatedPositions +{ + public sealed class HgvsProteinTests + { + private const string CdsSequence = + "ATGGATGACTCGGAGACAGGTTTCAATCTGAAAGTCGTCCTGGTCAGTTTCAAGCAGTGTCTCGATGAGAAGGAAGAGGTCTTGCTGGACCCCTACATTGCCAGCTGGAAGGGCCTGGTCAGGTTTCTGAACAGCCTGGGCACCATCTTCTCATTCATCTCCAAGGACGTGGTCTCCAAGCTGCGGATCATGGAGCGCCTCAGGGGCGGCCCGCAGAGCGAGCACTACCGCAGCCTGCAGGCCATGGTGGCCCACGAGCTGAGCAACCGGCTGGTGGACCTGGAGCGCCGCTCCCACCACCCGGAGTCTGGCTGCCGGACGGTGCTGCGCCTGCACCGCGCCCTGCACTGGCTGCAGCTGTTCCTGGAGGGCCTGCGTACCAGCCCCGAGGACGCACGCACCTCCGCGCTCTGCGCCGACTCCTACAACGCCTCGCTGGCCGCCTACCACCCCTGGGTCGTGCGCCGCGCCGTCACCGTGGCCTTCTGCACGCTGCCCACACGCGAGGTCTTCCTGGAGGCCATGAACGTGGGGCCCCCGGAGCAGGCCGTGCAGATGCTAGGCGAGGCCCTCCCCTTCATCCAGCGTGTCTACAACGTCTCCCAGAAGCTCTACGCCGAGCACTCCCTGCTGGACCTGCCCTAGGGGCGGGAAGCCAGGGCCGCACCGGCTTTCCTGCTGCAGATCTGGGCTGCGGTGGCCAGGGCCGTGAGTCCCGTGGCAGAGCCTTCTGGGCGCTGCGGGAACAGGAGATCCTCTGTCGCCCCTGTGAGCTGAGCTGGTTAGGAACCACAGACTGTGACAGAGAAGGTGGCGACCAGCCCAGAAGAGGCCCACCCTCTCGGTCCGGAACAAGACGCCTCGGCCACGGCTCCCCCTCGGCCTATTACACGCGTGCGCAGCCAGGCCTCGCCAGGGTGCGGTGCAGAGCAGAGCAGGCAGGGGTGGGGGCCGGGCCTGCAAGAGCCCGAAAGGTCGCCACCCCCTAGCCTGTGGGGTGCATCTGCGAACCAGGGTGAAGTCACAGGTCCCGGGGTGTGGAGGCTCCATCCTTTCTCCTTTCTGCCAGCCGATGTGTCCTCATCTCAGGCCCGTGCCTGGGACCCCGTGTCTGCCCAGGTGGGCAGCCTTGAGCCCAGGGGACTCAGTGCCCTCCATGCCCTGGCTGGCAGAAACCCTCAACAGCAGTCTGGGCACTGTGGGGCTCTCCCCGCCTCTCCTGCCTTGTTTGCCCCTCAGCGTGCCAGGCAGACTGGGGGCAGGACAGCCGGAAGCTGAGACCAAGGCTCCTCACAGAAGGGCCCAGGAAGTCCCCGCCCTTGGGACAGCCTCCTCCGTAGCCCCTGCACGGCACCAGTTCCCCGAGGGACGCAGCAGGCCGCCTCCCGCAGCGGCCGTGGGTCTGCACAGCCCAGCCCAGCCCAAGGCCCCCAGGAGCTGGGACTCTGCTACACCCAGTGAAATGCTGTGTCCCTTCTCCCCCGTGCCCCTTGATGCCCCCTCCCCACAGTGCTCAGGAGACCCGTGGGGCACGGAACAGGAGGGTCTGGACCCTGTGGCCCAGCCAAAGGCTACCAGACAGCCACAACCAGCCCAGCCACCATCCAGTGCCTGGGGCCTGGCCACTGGCTCTTCACAGTGGACCCCAGCACCTCGGGGTGGCAGAGGGACGGCCCCCACGGCCCAGCAGACATGCGAGCTTCCAGAGTGCAATCTATGTGATGTCTTCCAACGTTAATAAATCACACAGCCTCCCAGGAGGGAGACGCTGGGGTGCAC"; + + private const string AaSequence = + "MDDSETGFNLKVVLVSFKQCLDEKEEVLLDPYIASWKGLVRFLNSLGTIFSFISKDVVSKLRIMERLRGGPQSEHYRSLQAMVAHELSNRLVDLERRSHHPESGCRTVLRLHRALHWLQLFLEGLRTSPEDARTSALCADSYNASLAAYHPWVVRRAVTVAFCTLPTREVFLEAMNVGPPEQAVQMLGEALPFIQRVYNVSQKLYAEHSLLDLP*"; + + [Fact] + public void GetHgvsProteinAnnotation_Substitution() + { + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("ENSP00000343890.4", "ENST00000343938.4:c.5A>C", + CdsSequence, AaSequence, 5, 5, 2, "D", "A", "C", false, AminoAcidCommon.StandardAminoAcids); + Assert.Equal("ENSP00000343890.4:p.(Asp2Ala)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Silent_SameAminoAcids() + { + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("ENSP00000343890.4", + "ENST00000343938.4:c.10_12delinsAGT", CdsSequence, AaSequence, 10, 12, 4, "S", "S", "AGT", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("ENST00000343938.4:c.10_12delinsAGT(p.(Ser4=))", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Silent_StopRetained() + { + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_001300882.1", + "NM_001313953.1:c.937_938del", CdsSequence, AaSequence, 938, 937, 313, "*", "*", "", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NM_001313953.1:c.937_938del(p.(Ter313=))", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Insertion_InsertStopCodon() + { + const string aaSequence = "MGRPRVRQAWPGCCGHTGRLPAGRGYLASHMCDPAGAELIGDGMSDPTPPSNACTGHIQDPASQRLTWNKSPKSVLVIKKMRDASLLQPFKELCTHLMEENMIVYVEKKVLEDPAIASDESFGAVKKKFCTFREDYDDISNQIDFIICLGGDGTLLYASSLFQGSVPPVMAFHLGSLGFLTPFSFENFQSQVTQVIEGNAAVVLRSRLKVRVVKELRGKKTAVHNGLGENGSQAAGLDMDVGKQAMQYQVLNEVVIDRGPSSYLSNVDVYLDGHLITTVQGDGVIVSTPTGSTAYAAAAGASMIHPNVPAIMITPICPHSLSFRPIVVPAGVELKIMLSPEARNTAWVSFDGRKRQEIRHGDSISITTSCYPLPSICVRDPVSDWFESLAQCLHWNVRKKQAHFEEEEEEEEEG"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_001185924.1", + "NM_001198995.1:c.1239_1240insTAG", CdsSequence, aaSequence, 1240, 1241, 414, "", "*", "TAG", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_001185924.1:p.(Gly414Ter)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Insertion() + { + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("ENSP00000343890.4", + "ENST00000343938.4:c.6_7insTTC", CdsSequence, AaSequence, 7, 6, 3, "", "F", "TTC", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("ENSP00000343890.4:p.(Asp2_Asp3insPhe)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Insertion_SilentTerminalCodon_TrimmedTooMuch() + { + // VID: chr2-48033998-A-AAG (NC_000002.11:g.48033998_48033999insAG), key: NM_001281493.1, expected: NP_001268422.1:p.(Ter1059Ter), actual: NP_001268422.1:p.(Xaa1060GlyfsTer) + const string cdsSequence = + "ATGGTGACTGGAAATGGCTCTCTTAAAAGGAAAAGCTCTAGGAAGGAAACGCCCTCAGCCACCAAACAAGCAACTAGCATTTCATCAGAAACCAAGAATACTTTGAGAGCTTTCTCTGCCCCTCAAAATTCTGAATCCCAAGCCCACGTTAGTGGAGGTGGTGATGACAGTAGTCGCCCTACTGTTTGGTATCATGAAACTTTAGAATGGCTTAAGGAGGAAAAGAGAAGAGATGAGCACAGGAGGAGGCCTGATCACCCCGATTTTGATGCATCTACACTCTATGTGCCTGAGGATTTCCTCAATTCTTGTACTCCTGGGATGAGGAAGTGGTGGCAGATTAAGTCTCAGAACTTTGATCTTGTCATCTGTTACAAGGTGGGGAAATTTTATGAGCTGTACCACATGGATGCTCTTATTGGAGTCAGTGAACTGGGGCTGGTATTCATGAAAGGCAACTGGGCCCATTCTGGCTTTCCTGAAATTGCATTTGGCCGTTATTCAGATTCCCTGGTGCAGAAGGGCTATAAAGTAGCACGAGTGGAACAGACTGAGACTCCAGAAATGATGGAGGCACGATGTAGAAAGATGGCACATATATCCAAGTATGATAGAGTGGTGAGGAGGGAGATCTGTAGGATCATTACCAAGGGTACACAGACTTACAGTGTGCTGGAAGGTGATCCCTCTGAGAACTACAGTAAGTATCTTCTTAGCCTCAAAGAAAAAGAGGAAGATTCTTCTGGCCATACTCGTGCATATGGTGTGTGCTTTGTTGATACTTCACTGGGAAAGTTTTTCATAGGTCAGTTTTCAGATGATCGCCATTGTTCGAGATTTAGGACTCTAGTGGCACACTATCCCCCAGTACAAGTTTTATTTGAAAAAGGAAATCTCTCAAAGGAAACTAAAACAATTCTAAAGAGTTCATTGTCCTGTTCTCTTCAGGAAGGTCTGATACCCGGCTCCCAGTTTTGGGATGCATCCAAAACTTTGAGAACTCTCCTTGAGGAAGAATATTTTAGGGAAAAGCTAAGTGATGGCATTGGGGTGATGTTACCCCAGGTGCTTAAAGGTATGACTTCAGAGTCTGATTCCATTGGGTTGACACCAGGAGAGAAAAGTGAATTGGCCCTCTCTGCTCTAGGTGGTTGTGTCTTCTACCTCAAAAAATGCCTTATTGATCAGGAGCTTTTATCAATGGCTAATTTTGAAGAATATATTCCCTTGGATTCTGACACAGTCAGCACTACAAGATCTGGTGCTATCTTCACCAAAGCCTATCAACGAATGGTGCTAGATGCAGTGACATTAAACAACTTGGAGATTTTTCTGAATGGAACAAATGGTTCTACTGAAGGAACCCTACTAGAGAGGGTTGATACTTGCCATACTCCTTTTGGTAAGCGGCTCCTAAAGCAATGGCTTTGTGCCCCACTCTGTAACCATTATGCTATTAATGATCGTCTAGATGCCATAGAAGACCTCATGGTTGTGCCTGACAAAATCTCCGAAGTTGTAGAGCTTCTAAAGAAGCTTCCAGATCTTGAGAGGCTACTCAGTAAAATTCATAATGTTGGGTCTCCCCTGAAGAGTCAGAACCACCCAGACAGCAGGGCTATAATGTATGAAGAAACTACATACAGCAAGAAGAAGATTATTGATTTTCTTTCTGCTCTGGAAGGATTCAAAGTAATGTGTAAAATTATAGGGATCATGGAAGAAGTTGCTGATGGTTTTAAGTCTAAAATCCTTAAGCAGGTCATCTCTCTGCAGACAAAAAATCCTGAAGGTCGTTTTCCTGATTTGACTGTAGAATTGAACCGATGGGATACAGCCTTTGACCATGAAAAGGCTCGAAAGACTGGACTTATTACTCCCAAAGCAGGCTTTGACTCTGATTATGACCAAGCTCTTGCTGACATAAGAGAAAATGAACAGAGCCTCCTGGAATACCTAGAGAAACAGCGCAACAGAATTGGCTGTAGGACCATAGTCTATTGGGGGATTGGTAGGAACCGTTACCAGCTGGAAATTCCTGAGAATTTCACCACTCGCAATTTGCCAGAAGAATACGAGTTGAAATCTACCAAGAAGGGCTGTAAACGATACTGGACCAAAACTATTGAAAAGAAGTTGGCTAATCTCATAAATGCTGAAGAACGGAGGGATGTATCATTGAAGGACTGCATGCGGCGACTGTTCTATAACTTTGATAAAAATTACAAGGACTGGCAGTCTGCTGTAGAGTGTATCGCAGTGTTGGATGTTTTACTGTGCCTGGCTAACTATAGTCGAGGGGGTGATGGTCCTATGTGTCGCCCAGTAATTCTGTTGCCGGAAGATACCCCCCCCTTCTTAGAGCTTAAAGGATCACGCCATCCTTGCATTACGAAGACTTTTTTTGGAGATGATTTTATTCCTAATGACATTCTAATAGGCTGTGAGGAAGAGGAGCAGGAAAATGGCAAAGCCTATTGTGTGCTTGTTACTGGACCAAATATGGGGGGCAAGTCTACGCTTATGAGACAGGCTGGCTTATTAGCTGTAATGGCCCAGATGGGTTGTTACGTCCCTGCTGAAGTGTGCAGGCTCACACCAATTGATAGAGTGTTTACTAGACTTGGTGCCTCAGACAGAATAATGTCAGGTGAAAGTACATTTTTTGTTGAATTAAGTGAAACTGCCAGCATACTCATGCATGCAACAGCACATTCTCTGGTGCTTGTGGATGAATTAGGAAGAGGTACTGCAACATTTGATGGGACGGCAATAGCAAATGCAGTTGTTAAAGAACTTGCTGAGACTATAAAATGTCGTACATTATTTTCAACTCACTACCATTCATTAGTAGAAGATTATTCTCAAAATGTTGCTGTGCGCCTAGGACATATGGCATGCATGGTAGAAAATGAATGTGAAGACCCCAGCCAGGAGACTATTACGTTCCTCTATAAATTCATTAAGGGAGCTTGTCCTAAAAGCTATGGCTTTAATGCAGCAAGGCTTGCTAATCTCCCAGAGGAAGTTATTCAAAAGGGACATAGAAAAGCAAGAGAATTTGAGAAGATGAATCAGTCACTACGATTATTTCGGGAAGTTTGCCTGGCTAGTGAAAGGTCAACTGTAGATGCTGAAGCTGTCCATAAATTGCTGACTTTGATTAAGGAATTATAGACTGACTACATTGGAAGCTTTGAGTTGACTTCTGACAAAGGTGGTAAATTCAGACAACATTATGATCTAATAAACTTTATTTTTTAAAAATGAAAAAAAAAAAAAAAAAAAA"; + const string aaSequence = + "MVTGNGSLKRKSSRKETPSATKQATSISSETKNTLRAFSAPQNSESQAHVSGGGDDSSRPTVWYHETLEWLKEEKRRDEHRRRPDHPDFDASTLYVPEDFLNSCTPGMRKWWQIKSQNFDLVICYKVGKFYELYHMDALIGVSELGLVFMKGNWAHSGFPEIAFGRYSDSLVQKGYKVARVEQTETPEMMEARCRKMAHISKYDRVVRREICRIITKGTQTYSVLEGDPSENYSKYLLSLKEKEEDSSGHTRAYGVCFVDTSLGKFFIGQFSDDRHCSRFRTLVAHYPPVQVLFEKGNLSKETKTILKSSLSCSLQEGLIPGSQFWDASKTLRTLLEEEYFREKLSDGIGVMLPQVLKGMTSESDSIGLTPGEKSELALSALGGCVFYLKKCLIDQELLSMANFEEYIPLDSDTVSTTRSGAIFTKAYQRMVLDAVTLNNLEIFLNGTNGSTEGTLLERVDTCHTPFGKRLLKQWLCAPLCNHYAINDRLDAIEDLMVVPDKISEVVELLKKLPDLERLLSKIHNVGSPLKSQNHPDSRAIMYEETTYSKKKIIDFLSALEGFKVMCKIIGIMEEVADGFKSKILKQVISLQTKNPEGRFPDLTVELNRWDTAFDHEKARKTGLITPKAGFDSDYDQALADIRENEQSLLEYLEKQRNRIGCRTIVYWGIGRNRYQLEIPENFTTRNLPEEYELKSTKKGCKRYWTKTIEKKLANLINAEERRDVSLKDCMRRLFYNFDKNYKDWQSAVECIAVLDVLLCLANYSRGGDGPMCRPVILLPEDTPPFLELKGSRHPCITKTFFGDDFIPNDILIGCEEEEQENGKAYCVLVTGPNMGGKSTLMRQAGLLAVMAQMGCYVPAEVCRLTPIDRVFTRLGASDRIMSGESTFFVELSETASILMHATAHSLVLVDELGRGTATFDGTAIANAVVKELAETIKCRTLFSTHYHSLVEDYSQNVAVRLGHMACMVENECEDPSQETITFLYKFIKGACPKSYGFNAARLANLPEEVIQKGHRKAREFEKMNQSLRLFREVCLASERSTVDAEAVHKLLTLIKEL*"; + + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_001268422.1", + "NM_001281493.1:c.3176_3177insAG", cdsSequence, aaSequence, 3177, 3176, 1059, "*", "*X", "AG", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NM_001281493.1:c.3176_3177insAG(p.(Ter1059=))", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Duplication_RightShifted() + { + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("ENSP00000343890.4", + "ENST00000343938.4:c.7_9dup", CdsSequence, AaSequence, 10, 9, 4, "", "D", "GAC", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("ENSP00000343890.4:p.(Asp3dup)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Duplication_MissingOutput() + { + // TODO: We still need to modify FullTranscriptAnnotator to provide this information + // VID: chr1-120057267-G-GAG (NC_000001.10:g.120057267_120057268dup), key: NM_000862.2, expected: NP_000853.1:p.(=), actual: + const string cdsSequence = + "ATGACGGGCTGGAGCTGCCTTGTGACAGGAGCAGGAGGGTTTCTGGGACAGAGGATCATCCGCCTCTTGGTGAAGGAGAAGGAGCTGAAGGAGATCAGGGTCTTGGACAAGGCCTTCGGACCAGAATTGAGAGAGGAATTTTCTAAACTCCAGAACAAGACCAAGCTGACAGTGCTGGAAGGAGACATTCTGGATGAGCCATTCCTGAAGAGAGCCTGCCAGGACGTCTCGGTCATCATCCACACCGCCTGTATCATTGATGTCTTCGGTGTCACTCACAGAGAGTCTATCATGAATGTCAATGTGAAAGGTACCCAGCTCCTGTTAGAGGCCTGTGTCCAAGCTAGTGTGCCAGTCTTCATCTACACCAGTAGCATAGAGGTAGCCGGGCCCAACTCCTACAAGGAAATCATCCAGAATGGCCATGAAGAAGAGCCTCTGGAAAACACATGGCCCGCTCCATACCCACACAGCAAAAAGCTTGCTGAGAAGGCTGTACTGGCGGCTAACGGGTGGAATCTGAAAAACGGCGGCACCCTGTACACTTGTGCCTTACGACCCATGTATATCTATGGGGAAGGAAGCCGATTCCTTTCTGCTAGTATAAACGAGGCCCTGAACAACAATGGGATCCTGTCAAGTGTTGGAAAGTTCTCCACTGTTAACCCAGTCTATGTTGGCAATGTGGCCTGGGCCCACATTCTGGCCTTGAGGGCCCTGCAGGACCCCAAGAAGGCCCCAAGCATCCGAGGACAGTTCTACTATATCTCAGATGACACGCCTCACCAAAGCTATGATAACCTTAATTACACCCTGAGCAAAGAGTTCGGCCTCCGCCTTGATTCCAGATGGAGCTTTCCTTTATCCCTGATGTATTGGATTGGCTTCCTGCTGGAAATAGTGAGCTTCCTACTCAGGCCAATTTACACCTATCGACCGCCCTTCAACCGCCACATAGTCACATTGTCAAATAGCGTATTCACCTTCTCTTATAAGAAGGCTCAGCGAGATCTGGCGTATAAGCCACTCTACAGCTGGGAGGAAGCCAAGCAGAAAACGGTGGAGTGGGTTGGTTCCCTTGTGGACCGGCACAAGGAGACCCTGAAGTCCAAGACTCAGTGATTTAAGGATGACAGAGATGTGCATGTGGGTATTGTTAGGAGATGTCATCAAGCTCCACCCTCCTGGCCTCATACAGAAAGTGACAAGGGCACAAGCTCAGGTCCTGCTGCCTCCCTTTCATACAATGGCCAACTTATTGTATTCCTCATGTCATCAAAACCTGCGCAGTCATTGGCCCAACAAGAAGGTTTCTGTCCTAATCATATACCAGAGGAAAGACCATGTGGTTTGCTGTTACCAAATCTCAGTAGCTGATTCTGAACAATTTAGGGACTCTTTTAACTTGAGGGTCGTTTTGACTACTAGAGCTCCATTTCTACTCTTAAATGAGAAAGGATTTCCTTTCTTTTTAATCTTCCATTCCTTCACATAGTTTGATAAAAAGATCAATAAATGTTTGAATGTTTAATGTGAAAAAAAAA"; + const string aaSequence = + "MTGWSCLVTGAGGFLGQRIIRLLVKEKELKEIRVLDKAFGPELREEFSKLQNKTKLTVLEGDILDEPFLKRACQDVSVIIHTACIIDVFGVTHRESIMNVNVKGTQLLLEACVQASVPVFIYTSSIEVAGPNSYKEIIQNGHEEEPLENTWPAPYPHSKKLAEKAVLAANGWNLKNGGTLYTCALRPMYIYGEGSRFLSASINEALNNNGILSSVGKFSTVNPVYVGNVAWAHILALRALQDPKKAPSIRGQFYYISDDTPHQSYDNLNYTLSKEFGLRLDSRWSFPLSLMYWIGFLLEIVSFLLRPIYTYRPPFNRHIVTLSNSVFTFSYKKAQRDLAYKPLYSWEEAKQKTVEWVGSLVDRHKETLKSKTQ*"; + + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_000853.1", + "NM_000862.2:c.1121_1122dup", cdsSequence, aaSequence, 1123, 1122, 374, "*", "X", "AG", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NM_000862.2:c.1121_1122dup(p.(Ter374=))", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Deletion() + { + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("ENSP00000343890.4", + "ENST00000343938.4:c.10_12del", CdsSequence, AaSequence, 10, 12, 4, "S", "", "", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("ENSP00000343890.4:p.(Ser4del)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Deletion_StartLost() + { + const string cdsSequence = + "ATGCTGAAAAAGCCTCTCTCAGCTGTGACCTGGCTCTGCATTTTCATCGTGGCCTTTGTCAGCCACCCAGCGTGGCTGCAGAAGCTCTCTAAGCACAAGACACCAGCACAGCCACAGCTCAAAGCGGCCAACTGCTGTGAGGAGGTGAAGGAGCTCAAGGCCCAAGTTGCCAACCTTAGCAGCCTGCTGAGTGAACTGAACAAGAAGCAGGAGAGGGACTGGGTCAGCGTGGTCATGCAGGTGATGGAGCTGGAGAGCAACAGCAAGCGCATGGAGTCGCGGCTCACAGATGCTGAGAGCAAGTACTCCGAGATGAACAACCAAATTGACATCATGCAGCTGCAGGCAGCACAGACGGTCACTCAGACCTCCGCAGATGCCATCTACGACTGCTCTTCCCTCTACCAGAAGAACTACCGCATCTCTGGAGTGTATAAGCTTCCTCCTGATGACTTCCTGGGCAGCCCTGAACTGGAGGTGTTCTGTGACATGGAGACTTCAGGCGGAGGCTGGACCATCATCCAGAGACGAAAAAGTGGCCTTGTCTCCTTCTACCGGGACTGGAAGCAGTACAAGCAGGGCTTTGGCAGCATCCGTGGGGACTTCTGGCTGGGGAACGAACACATCCACCGGCTCTCCAGACAGCCAACCCGGCTGCGTGTAGAGATGGAGGACTGGGAGGGCAACCTGCGCTACGCTGAGTATAGCCACTTTGTTTTGGGCAATGAACTCAACAGCTATCGCCTCTTCCTGGGGAACTACACTGGCAATGTGGGGAACGACGCCCTCCAGTATCATAACAACACAGCCTTCAGCACCAAGGACAAGGACAATGACAACTGCTTGGACAAGTGTGCACAGCTCCGCAAAGGTGGCTACTGGTACAACTGCTGCACAGACTCCAACCTCAATGGAGTGTACTACCGCCTGGGTGAGCACAATAAGCACCTGGATGGCATCACCTGGTATGGCTGGCATGGATCTACCTACTCCCTCAAACGGGTGGAGATGAAAATCCGCCCAGAAGACTTCAAGCCTTAAAAGGAGGCTGCCGTGGAGCACGGATACAGAAACTGAGACACGTGGAGACTGGATGAGGGCAGATGAGGACAGGAAGAGAGTGTTAGAAAGGGTAGGACTGAGAAACAGCCTATAATCTCCAAAGAAAGAATAAGTCTCCAAGGAGCACAAAAAAATCATATGTACCAAGGATGTTACAGTAAACAGGATGAACTATTTAAACCCACTGGGTCCTGCCACATCCTTCTCAAGGTGGTAGACTGAGTGGGGTCTCTCTGCCCAAGATCCCTGACATAGCAGTAGCTTGTCTTTTCCACATGATTTGTCTGTGAAAGAAAATAATTTTGAGATCGTTTTATCTATTTTCTCTACGGCTTAGGCTATGTGAGGGCAAAACACAAATCCCTTTGCTAAAAAGAACCATATTATTTTGATTCTCAAAGGATAGGCCTTTGAGTGTTAGAGAAAGGAGTGAAGGAGGCAGGTGGGAAATGGTATTTCTATTTTTAAATCCAGTGAAATTATCTTGAGTCTACACATTATTTTTAAAACACAAAAATTGTTCGGCTGGAACTGACCCAGGCTGGACTTGCGGGGAGGAAACTCCAGGGCACTGCATCTGGCGATCAGACTCTGAGCACTGCCCCTGCTCGCCTTGGTCATGTACAGCACTGAAAGGAATGAAGCACCAGCAGGAGGTGGACAGAGTCTCTCATGGATGCCGGCACAAAACTGCCTTAAAATATTCATAGTTAATACAGGTATATCTATTTTTATTTACTTTGTAAGAAACAAGCTCAAGGAGCTTCCTTTTAAATTTTGTCTGTAGGAAATGGTTGAAAACTGAAGGTAGATGGTGTTATAGTTAATAATAAATGCTGTAAATAAGCATCTCACTTTGTAAAAATAAAATATTGTGGTTTTGTTTTAAACATTCAACGTTTCTTTTCCTTCTACAATAAACACTTTCAAAATGTGAAAAAAAAAAAAAAAAAA"; + const string aaSequence = + "MLKKPLSAVTWLCIFIVAFVSHPAWLQKLSKHKTPAQPQLKAANCCEEVKELKAQVANLSSLLSELNKKQERDWVSVVMQVMELESNSKRMESRLTDAESKYSEMNNQIDIMQLQAAQTVTQTSADAIYDCSSLYQKNYRISGVYKLPPDDFLGSPELEVFCDMETSGGGWTIIQRRKSGLVSFYRDWKQYKQGFGSIRGDFWLGNEHIHRLSRQPTRLRVEMEDWEGNLRYAEYSHFVLGNELNSYRLFLGNYTGNVGNDALQYHNNTAFSTKDKDNDNCLDKCAQLRKGGYWYNCCTDSNLNGVYYRLGEHNKHLDGITWYGWHGSTYSLKRVEMKIRPEDFKP*"; + + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_066969.1", + "NM_021146.3:c.3_4del", cdsSequence, aaSequence, 3, 4, 1, "ML", "IX", "", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_066969.1:p.Met1?", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Deletion_TerminalCodon() + { + const string cdsSequence = + "ATGCCCCCTGGGGTGGACTGCCCCATGGAATTCTGGACCAAGGAGGAGAATCAGAGCGTTGTGGTTGACTTCCTGCTGCCCACAGGGGTCTACCTGAACTTCCCTGTGTCCCGCAATGCCAACCTCAGCACCATCAAGCAGCTGCTGTGGCACCGCGCCCAGTATGAGCCGCTCTTCCACATGCTCAGTGGCCCCGAGGCCTATGTGTTCACCTGCATCAACCAGACAGCGGAGCAGCAAGAGCTGGAGGACGAGCAACGGCGTCTGTGTGACGTGCAGCCCTTCCTGCCCGTCCTGCGCCTGGTGGCCCGTGAGGGCGACCGCGTGAAGAAGCTCATCAACTCACAGATCAGCCTCCTCATCGGCAAAGGCCTCCACGAGTTTGACTCCTTGTGCGACCCAGAAGTGAACGACTTTCGCGCCAAGATGTGCCAATTCTGCGAGGAGGCGGCCGCCCGCCGGCAGCAGCTGGGCTGGGAGGCCTGGCTGCAGTACAGTTTCCCCCTGCAGCTGGAGCCCTCGGCTCAAACCTGGGGGCCTGGTACCCTGCGGCTCCCGAACCGGGCCCTTCTGGTCAACGTTAAGTTTGAGGGCAGCGAGGAGAGCTTCACCTTCCAGGTGTCCACCAAGGACGTGCCGCTGGCGCTGATGGCCTGTGCCCTGCGGAAGAAGGCCACAGTGTTCCGGCAGCCGCTGGTGGAGCAGCCGGAAGACTACACGCTGCAGGTGAACGGCAGGCATGAGTACCTGTATGGCAGCTACCCGCTCTGCCAGTTCCAGTACATCTGCAGCTGCCTGCACAGTGGGTTGACCCCTCACCTGACCATGGTCCATTCCTCCTCCATCCTCGCCATGCGGGATGAGCAGAGCAACCCTGCCCCCCAGGTCCAGAAACCGCGTGCCAAACCACCTCCCATTCCTGCGAAGAAGCCTTCCTCTGTGTCCCTGTGGTCCCTGGAGCAGCCGTTCCGCATCGAGCTCATCCAGGGCAGCAAAGTGAACGCCGACGAGCGGATGAAGCTGGTGGTGCAGGCCGGGCTTTTCCACGGCAACGAGATGCTGTGCAAGACGGTGTCCAGCTCGGAGGTGAGCGTGTGCTCGGAGCCCGTGTGGAAGCAGCGGCTGGAGTTCGACATCAACATCTGCGACCTGCCCCGCATGGCCCGTCTCTGCTTTGCGCTGTACGCCGTGATCGAGAAAGCCAAGAAGGCTCGCTCCACCAAGAAGAAGTCCAAGAAGGCGGACTGCCCCATTGCCTGGGCCAACCTCATGCTGTTTGACTACAAGGACCAGCTTAAGACCGGGGAACGCTGCCTCTACATGTGGCCCTCCGTCCCAGATGAGAAGGGCGAGCTGCTGAACCCCACGGGCACTGTGCGCAGTAACCCCAACACGGATAGCGCCGCTGCCCTGCTCATCTGCCTGCCCGAGGTGGCCCCGCACCCCGTGTACTACCCCGCCCTGGAGAAGATCTTGGAGCTGGGGCGACACAGCGAGTGTGTGCATGTCACCGAGGAGGAGCAGCTGCAGCTGCGGGAAATCCTGGAGCGGCGGGGGTCTGGGGAGCTGTATGAGCACGAGAAGGACCTGGTGTGGAAGCTGCGGCATGAAGTCCAGGAGCACTTCCCGGAGGCGCTAGCCCGGCTGCTGCTGGTCACCAAGTGGAACAAGCATGAGGATGTGGCCCAGATGCTCTACCTGCTGTGCTCCTGGCCGGAGCTGCCCGTCCTGAGCGCCCTGGAGCTGCTAGACTTCAGCTTCCCCGATTGCCACGTAGGCTCCTTCGCCATCAAGTCGCTGCGGAAACTGACGGACGATGAGCTGTTCCAGTACCTGCTGCAGCTGGTGCAGGTGCTCAAGTACGAGTCCTACCTGGACTGCGAGCTGACCAAATTCCTGCTGGACCGGGCCCTGGCCAACCGCAAGATCGGCCACTTCCTTTTCTGGCACCTCCGCTCCGAGATGCACGTGCCGTCGGTGGCCCTGCGCTTCGGCCTCATCCTGGAGGCCTACTGCAGGGGCAGCACCCACCACATGAAGGTGCTGATGAAGCAGGGGGAAGCACTGAGCAAACTGAAGGCCCTGAATGACTTCGTCAAGCTGAGCTCTCAGAAGACCCCCAAGCCCCAGACCAAGGAGCTGATGCACTTGTGCATGCGGCAGGAGGCCTACCTAGAGGCCCTCTCCCACCTGCAGTCCCCACTCGACCCCAGCACCCTGCTGGCTGAAGTCTGCGTGGAGCAGTGCACCTTCATGGACTCCAAGATGAAGCCCCTGTGGATCATGTACAGCAACGAGGAGGCAGGCAGCGGCGGCAGCGTGGGCATCATCTTTAAGAACGGGGATGACCTCCGGCAGGACATGCTGACCCTGCAGATGATCCAGCTCATGGACGTCCTGTGGAAGCAGGAGGGGCTGGACCTGAGGATGACCCCCTATGGCTGCCTCCCCACCGGGGACCGCACAGGCCTCATTGAGGTGGTACTCCGTTCAGACACCATCGCCAACATCCAACTCAACAAGAGCAACATGGCAGCCACAGCCGCCTTCAACAAGGATGCCCTGCTCAACTGGCTGAAGTCCAAGAACCCGGGGGAGGCCCTGGATCGAGCCATTGAGGAGTTCACCCTCTCCTGTGCTGGCTATTGTGTGGCCACATATGTGCTGGGCATTGGCGATCGGCACAGCGACAACATCATGATCCGAGAGAGTGGGCAGCTGTTCCACATTGATTTTGGCCACTTTCTGGGGAATTTCAAGACCAAGTTTGGAATCAACCGCGAGCGTGTCCCATTCATCCTCACCTACGACTTTGTCCATGTGATTCAGCAGGGGAAGACTAATAATAGTGAGAAATTTGAACGGTTCCGGGGCTACTGTGAAAGGGCCTACACCATCCTGCGGCGCCACGGGCTTCTCTTCCTCCACCTCTTTGCCCTGATGCGGGCGGCAGGCCTGCCTGAGCTCAGCTGCTCCAAAGACATCCAGTATCTCAAGGACTCCCTGGCACTGGGGAAAACAGAGGAGGAGGCACTGAAGCACTTCCGAGTGAAGTTTAACGAAGCCCTCCGTGAGAGCTGGAAAACCAAAGTGAACTGGCTGGCCCACAACGTGTCCAAAGACAACAGGCAGTAGTGGCTCCTCCCAGCCCTGGGCCCAAGAGGAGGCGGCTGCGGGTCGTGGGGACCAAGCACATTGGTCCTAAAGGGGCTGAAGAGCCTGAACTGCACCTAACGGGAAAGAACCGACATGGCTGCCTTTTGTTTACACTGGTTATTTATTTATGACTTGAAATAGTTTAAGGAGCTAAACAGCCATAAACGGAAACGCCTCCTTCATGCAGCGGCGGTGCTGGGCCCCCCGAGGCTGCACCTGGCTCTCGGCTGAGGATTGTCACCCCAAGTCTTCCAGCTGGTGGATCTGGGCCCAGCAAAGACTGTTCTCCTCCCGAGGGAACCTTCTTCCCAGGCCTCCCGCCAGACTGCCTGGGTCCTGGCGCCTGGCGGTCACCTGGTGCCTACTGTCCGACAGGATGCCTTGATCCTCGTGCGACCCACCCTGTGTATCCTCCCTAGACTGAGTTCTGGCAGCTCCCCGAGGCAGCCGGGGTACCCTCTAGATTCAGGGATGCTTGCTCTCCACTTTTCAAGTGGGTCTTGGGTACGAGAATTCCCTCATCTTTCTCTACTGTAAAGTGATTTTGTTTGCAGGTAAGAAAATAATAGATGACTCACCACACCTCTACGGCTGGGGAGATCAGGCCCAGCCCCATAAAGGAGAATCTACGCTGGTCCTCAGGACGTGTTAAAGAGATCTGGGCCTCATGTAGCTCACCCCGGTCACGCATGAAGGCAAAAGCAGGTCAGAAGCGAATACTCTGCCATTATCTCAAAAATCTTTTTTTTTTTTTTGAGATGGGGTCTTCCTCTGTTGCCCAGGCTGGAGTGCAGTGGTGCAATCTTGGCTCACTGTAACCTCCGCCTCCCAGGTTCAAGTGATTCTTCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGTGTGCACCACCGTACCCAGCTAATTTTTGTATTTTAGTAGAGACGGGGGTTTCACCATGTTGGCTGGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCACCCGCCTGAGCCTCCCAAAGTGCTGGGATTACAGGCATGAGCCACCGCGCCCGGCCCACTCTGCCATTGTCTAAGCCACCTCTGAAAGCAGGTTTTAACAAAAGGATGAGGCCAGAACTCTTCCAGAACCATCACCTTTGGGAACCTGCTGTGAGAGTGCTGAGGTACCAGAAGTGTGAGAACGAGGGGGCGTGCTGGGATCTTTCTCTCTGACTATACTTAGTTTGAAATGGTGCAGGCTTAGTCTTAAGCCTCCAAAGGCCTGGATTTGAGCAGCTTTAGAAATGCAGGTTCTAGGGCTTCTCCCAGCCTTCAGAAGCCAACTAACTCTGCAGATGGGGCTAGGACTGTGGGCTTTTAGCAGCCCACAGGTGATCCTAACATATCAGGCCATGGACTCAGGACCTGCCCGGTGATGCTGTTGATTTCTCAAAGGTCTTCCAAAACTCAACAGAGCCAGAAGTAGCCGCCCGCTCAGCGGCTCAGGTGCCAGCTCTGTTCTGATTCACCAGGGGTCCGTCAGTAGTCATTGCCACCCGCGGGGCACCTCCCTGGCCACACGCCTGTTCCCAGCAAGTGCTGAAACTCACTAGACCGTCTGCCTGTTTCGAAATGGGGAAAGCCGTGCGTGCGCGTTATTTATTTAAGTGCGCCTGTGTGCGCGGGTGTGGGAGCACACTTTGCAAAGCCACAGCGTTTCTGGTTTTGGGTGTACAGTCTTGTGTGCCTGGCGAGAAGAATATTTTCTATTTTTTTAAGTCATTTCATGTTTCTGTCTGGGGAAGGCAAGTTAGTTAAGTATCACTGATGTGGGTTGAGACCAGCACTCTGTGAAACCTTGAAATGAGAAGTAAAGGCAGATGAAAAGAAAGAAAAAGCCTTTTTATGTTCTTTTATGTTCTCGGCTCAAAAAGAAACAAGGGAGTGTAGGTTTAAAACCAAAACAGGAGAGAAGACAAACCCCGCTCCGGCTGGAGTTAGTTAGAACCAGAACTTTATTGTAGCGGATACACTTTCTGACCTATCATGAGTATACACATCTGCGAAGGGAAACCGCGCGGCGACA"; + const string aaSequence = + "MPPGVDCPMEFWTKEENQSVVVDFLLPTGVYLNFPVSRNANLSTIKQLLWHRAQYEPLFHMLSGPEAYVFTCINQTAEQQELEDEQRRLCDVQPFLPVLRLVAREGDRVKKLINSQISLLIGKGLHEFDSLCDPEVNDFRAKMCQFCEEAAARRQQLGWEAWLQYSFPLQLEPSAQTWGPGTLRLPNRALLVNVKFEGSEESFTFQVSTKDVPLALMACALRKKATVFRQPLVEQPEDYTLQVNGRHEYLYGSYPLCQFQYICSCLHSGLTPHLTMVHSSSILAMRDEQSNPAPQVQKPRAKPPPIPAKKPSSVSLWSLEQPFRIELIQGSKVNADERMKLVVQAGLFHGNEMLCKTVSSSEVSVCSEPVWKQRLEFDINICDLPRMARLCFALYAVIEKAKKARSTKKKSKKADCPIAWANLMLFDYKDQLKTGERCLYMWPSVPDEKGELLNPTGTVRSNPNTDSAAALLICLPEVAPHPVYYPALEKILELGRHSECVHVTEEEQLQLREILERRGSGELYEHEKDLVWKLRHEVQEHFPEALARLLLVTKWNKHEDVAQMLYLLCSWPELPVLSALELLDFSFPDCHVGSFAIKSLRKLTDDELFQYLLQLVQVLKYESYLDCELTKFLLDRALANRKIGHFLFWHLRSEMHVPSVALRFGLILEAYCRGSTHHMKVLMKQGEALSKLKALNDFVKLSSQKTPKPQTKELMHLCMRQEAYLEALSHLQSPLDPSTLLAEVCVEQCTFMDSKMKPLWIMYSNEEAGSGGSVGIIFKNGDDLRQDMLTLQMIQLMDVLWKQEGLDLRMTPYGCLPTGDRTGLIEVVLRSDTIANIQLNKSNMAATAAFNKDALLNWLKSKNPGEALDRAIEEFTLSCAGYCVATYVLGIGDRHSDNIMIRESGQLFHIDFGHFLGNFKTKFGINRERVPFILTYDFVHVIQQGKTNNSEKFERFRGYCERAYTILRRHGLLFLHLFALMRAAGLPELSCSKDIQYLKDSLALGKTEEEALKHFRVKFNEALRESWKTKVNWLAHNVSKDNRQ*"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_005017.3", + "NM_005026.3:c.164_165del", cdsSequence, aaSequence, 164, 165, 55, "Y", "X", "", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_005017.3:p.(Tyr55Ter)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Deletion_TerminalCodon_GivenWrongAltAminoAcid() + { + // VID: chr1-8073599-GGG-G (NC_000001.10:g.8073600_8073601del), key: NM_018948.3, expected: NP_061821.1:p.(Pro354Ter), actual: NM_018948.3:c.1060_1061del(p.(Pro354=)) + const string cdsSequence = + "ATGTCAATAGCAGGAGTTGCTGCTCAGGAGATCAGAGTCCCATTAAAAACTGGATTTCTACATAATGGCCGAGCCATGGGGAATATGAGGAAGACCTACTGGAGCAGTCGCAGTGAGTTTAAAAACAACTTTTTAAATATTGACCCGATAACCATGGCCTACAGTCTGAACTCTTCTGCTCAGGAGCGCCTAATACCACTTGGGCATGCTTCCAAATCTGCTCCGATGAATGGCCACTGCTTTGCAGAAAATGGTCCATCTCAAAAGTCCAGCTTGCCCCCTCTTCTTATTCCCCCAAGTGAAAACTTGGGACCACATGAAGAGGATCAAGTTGTATGTGGTTTTAAGAAACTCACAGTGAATGGGGTTTGTGCTTCCACCCCTCCACTGACACCCATAAAAAACTCCCCTTCCCTTTTCCCCTGTGCCCCTCTTTGTGAACGGGGTTCTAGGCCTCTTCCACCGTTGCCAATCTCTGAAGCCCTCTCTCTGGATGACACAGACTGTGAGGTGGAATTCCTAACTAGCTCAGATACAGACTTCCTTTTAGAAGACTCTACACTTTCTGATTTCAAATATGATGTTCCTGGCAGGCGAAGCTTCCGTGGGTGTGGACAAATCAACTATGCATATTTTGATACCCCAGCTGTTTCTGCAGCAGATCTCAGCTATGTGTCTGACCAAAATGGAGGTGTCCCAGATCCAAATCCTCCTCCACCTCAGACCCACCGAAGATTAAGAAGGTCTCATTCGGGACCAGCTGGCTCCTTTAACAAGCCAGCCATAAGGATATCCAACTGTTGTATACACAGAGCTTCTCCTAACTCCGATGAAGACAAACCTGAGGTTCCCCCCAGAGTTCCCATACCTCCTAGACCAGTAAAGCCAGATTATAGAAGATGGTCAGCAGAAGTTACTTCGAGCACCTATAGTGATGAAGACAGGCCTCCCAAAGTACCGCCAAGAGAACCTTTGTCACCGAGTAACTCGCGCACACCGAGTCCCAAAAGCCTTCCGTCTTACCTCAATGGGGTCATGCCCCCGACACAGAGCTTTGCCCCTGATCCCAAGTATGTCAGCAGCAAAGCACTGCAAAGACAGAACAGCGAAGGATCTGCCAGTAAGGTTCCTTGCATTCTGCCCATTATTGAAAATGGGAAGAAGGTTAGTTCAACACATTATTACCTACTACCTGAACGACCACCATACCTGGACAAATATGAAAAATTTTTTAGGGAAGCAGAAGAAACAAATGGAGGCGCCCAAATCCAGCCATTACCTGCTGACTGCGGTATATCTTCAGCCACAGAAAAGCCAGACTCAAAAACAAAAATGGATCTGGGTGGCCACGTGAAGCGTAAACATTTATCCTATGTGGTTTCTCCTTAGACCTTGGGGTCATGGTTCAGCAGAGGTTACATAGGAGCAAATGGTTCTCAATTTTCCAGTTTGATTGAAGTGCAGAGAAAAATCCCTTAGATTGCAAAATAAAATAGTTGAACTCTCTGTCTTCATGTGGAAGGTTTAGAGCAGTTGTGAGATGCTGTTATGCTGAGAAACCCTGACTTTGTTAGTGTTGGAAAAAAGTCTTACAAGTCTATAATTTAAAGATGTGATGGTGGGGAGGGGAGGATGGGGAAGCTTTTTATATATGCATACATTACATACCTATATATAAACTTGTGGTATAACCATAGACCATAGCTGCAGGTTAACCAATTAGTTACTATCGTAGAGTAATATATATTCAGAATAATAAACTCAAGCTGGAGAAATGAGTCCTGATAGACTGAAAATTGAGCAAATGGAAGAAGATACAGTATTGTTTAGATCAGAATCATTAAAAAATATTTTTGTTTAGTAAGTTTGAAGATTTCTGGCTTTTAGGCCTTTTCTATTTTGTTCCATTTATTTTTGCAGGCAATCTTTTCCATGGAGGGCAGGGTATCCATTCTTTACCATGGGTGTACCTGCTTAGGTTAAAAATCATACCAAGGCCTCATACTTCCAGGTTTCATGTTGCGTCTTGTTGAGGGAGGGAGAGCAGGTTACTTGGCAACCATATTGTCACCTGTACCTGTCACACATCTTGAAAAATAAAACGATAATAGAACTAGTGACTAATTTTCCCTTACAGTTCCTGCTTGGTCCCACCCACTGAAGTAGCTCATCGTAGTGCGGGCCGTATTAGAGGCAGTGGGGTACGTTAGACTCAGATGGAAAAGTATTCTAGGTGCCAGTGTTAGGATGTCAGTTTTACAAAATAATGAAGCAATTAGCTATGTGATTGAGAGTTATTGTTTGGGGATGTGTGTTGTGGTTTTGCTTTTTTTTTTTAGACTGTATTAATAAACATACAACACAAGCTGGCCTTGTGTTGCTGGTTCCTATTCAGTATTTCCTGGGGATTGTTTGCTTTTTAAGTAAAACACTTCTGACCCATAGCTCAGTATGTCTGAATTCCAGAGGTCACATCAGCATCTTTCTGCTTTGAAAACTCTCACAGCTGTGGCTGCTTCACTTAGATGCAGTGAGACACATAGTTGGTGTTCCGATTTTCACATCCTTCCATGTATTTATCTTGAAGAGATAAGCACAGAAGAGAAGGTGCTCACTAACAGAGGTACATTACTGCAATGTTCTCTTAACAGTTAAACAAGCTGTTTACAGTTTAAACTGCTGAATATTATTTGAGCTATTTAAAGCTTATTATATTTTAGTATGAACTAAATGAAGGTTAAAACATGCTTAAGAAAAATGCACTGATTTCTGCATTATGTGTACAGTATTGGACAAAGGATTTTATTCATTTTGTTGCATTATTTTGAATATTGTCTTTTCATTTTAATAAAGTTATAATACTTATTTATGATACCATTAAAAAAAAAAAAAAAA"; + const string aaSequence = + "MSIAGVAAQEIRVPLKTGFLHNGRAMGNMRKTYWSSRSEFKNNFLNIDPITMAYSLNSSAQERLIPLGHASKSAPMNGHCFAENGPSQKSSLPPLLIPPSENLGPHEEDQVVCGFKKLTVNGVCASTPPLTPIKNSPSLFPCAPLCERGSRPLPPLPISEALSLDDTDCEVEFLTSSDTDFLLEDSTLSDFKYDVPGRRSFRGCGQINYAYFDTPAVSAADLSYVSDQNGGVPDPNPPPPQTHRRLRRSHSGPAGSFNKPAIRISNCCIHRASPNSDEDKPEVPPRVPIPPRPVKPDYRRWSAEVTSSTYSDEDRPPKVPPREPLSPSNSRTPSPKSLPSYLNGVMPPTQSFAPDPKYVSSKALQRQNSEGSASKVPCILPIIENGKKVSSTHYYLLPERPPYLDKYEKFFREAEETNGGAQIQPLPADCGISSATEKPDSKTKMDLGGHVKRKHLSYVVSP*"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_061821.1", + "NM_018948.3:c.1060_1061del", cdsSequence, aaSequence, 1060, 1061, 354, "P", "*", "", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_061821.1:p.(Pro354Ter)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_DeletionInsertion() + { + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("ENSP00000343890.4", + "ENST00000343938.4:c.10_15delinsGAGACA", CdsSequence, AaSequence, 10, 15, 4, "SE", "ET", "GAGACA", + false, AminoAcidCommon.StandardAminoAcids); + Assert.Equal("ENSP00000343890.4:p.(Ser4_Glu5delinsGluThr)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_DeletionInsertion_TerminalCodon() + { + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_003001.1", + "NM_003010.3:c.494_495insAG", CdsSequence, AaSequence, 495, 494, 165, "Y", "*X", "AG", + false, AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_003001.1:p.(Tyr165Ter)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Extension_ViaDeletion() + { + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("ENSP00000343890.4", + "ENST00000343938.4:c.643_645del", CdsSequence, AaSequence, 643, 645, 215, "*", "", "", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("ENSP00000343890.4:p.(Ter215GlyextTer43)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Extension_ViaDeletion_CodonBug() + { + const string cdsSequence = + "ATGCTCTTTCAGTCTCACATGGCAACGAAAACAACTTGTATGTCTTCACAAGGATCAGATGATGAACAGATAAAAAGAGAAAACATTCGTTCGTTGACTATGTCTGGCCATGTTGGTTTTGAGAGTTTGCCTGATCAGCTGGTGAACAGATCCATTCAGCAAGGTTTCTGCTTTAATATTCTCTGTGTGGGGGAAACTGGAATTGGAAAATCAACACTGATTGACACATTGTTTAATACTAATTTTGAAGACTATGAATCCTCACATTTTTGCCCAAATGTTAAACTTAAAGCTCAGACATATGAACTCCAGGAAAGTAATGTTCAATTGAAATTGACCATTGTGAATACAGTGGGATTTGGTGACCAAATAAATAAAGAAGAGAGCTACCAACCAATAGTTGACTACATAGATGCTCAGTTTGAGGCCTATCTCCAAGAAGAACTGAAGATTAAGCGTTCTCTCTTTACCTACCATGATTCTCGCATCCATGTGTGTCTCTACTTCATTTCACCGACAGGCCACTCTCTGAAGACACTTGATCTCTTAACCATGAAGAACCTTGACAGCAAGGTAAACATTATACCAGTGATTGCCAAAGCAGATACGGTTTCTAAAACTGAATTACAGAAGTTTAAGATCAAGCTCATGAGTGAATTGGTCAGCAATGGCGTCCAGATATACCAGTTCCCAACGGATGATGACACTATTGCTAAGGTCAACGCTGCAATGAATGGACAGTTGCCGTTTGCTGTTGTGGGAAGTATGGATGAGGTAAAAGTCGGAAACAAGATGGTCAAAGCTCGCCAGTACCCTTGGGGTGTTGTACAAGTGGAAAATGAAAACCACTGTGACTTTGTAAAGCTGCGGGAAATGCTCATTTGTACAAATATGGAGGACCTGCGAGAGCAGACCCATACCAGGCACTATGAGCTTTACAGGCGCTGCAAACTGGAGGAAATGGGCTTTACAGATGTGGGCCCAGAAAACAAGCCAGTCAGTGTTCAAGAGACCTATGAAGCCAAAAGACATGAGTTCCATGGTGAACGTCAGAGGAAGGAAGAAGAAATGAAACAGATGTTTGTGCAGCGAGTAAAGGAGAAAGAAGCCATATTGAAAGAAGCTGAGAGAGAGCTACAGGCCAAATTTGAGCACCTTAAGAGACTTCACCAAGAAGAGAGAATGAAGCTTGAAGAAAAGAGAAGACTTTTGGAAGAAGAAATAATTGCTTTCTCTAAAAAGAAAGCTACCTCCGAGATATTTCACAGCCAGTCCTTTCTGGCAACAGGCAGCAACCTGAGGAAGGACAAGGACCGTAAGAAGTAAGAGGCCCAGCCACCCTCCCTGGCAGCCCTGCCCACACTTGTCACATTGCCCTGTCGGATCTTCCTAGCGCTTCTTACTTCTGCTTCCTTATGCTTTGTCTTCCTTCTCCAACTAAATGTCAGCTCCTGGAGAGGAGAGACCTTGTCCCTCTTGTTCACTGCTGTTTCCCCAGCATCCAGAACAGCACCTGGCACATGTTATGTGTTCAGTATTTGCCAAAATGAATGAATGAACAAAATAAAAATACATCACAGTGTGAAAAGGATCAGTTAGCTGGGGGAAAATGGCTGCTCTTATTGAATAGCCCAGCCGTGGCCCTAGTACACAGCTAATGGCTGCTGACCTACCATGAGTTATTTTGAATTTCATGTCTAAATAAAGCTGTGCCCTTTGTTGGGGAATTATAGACATAAGATAGTATTTTATTGAATTAATTCTTAGGTTCAGTTTTTTAGAACAAATTCTACTGATACTGATTTTGTGAGAATTTTTATTTTAATAAGGGAACCAGGCTGTCGATTCGAACTCCTGTGCATTGATGTCAGAGCTTGTGAAACCAATGGTGGACGTAAAGACGCAGAGAAAGGTAGAGAGGGCCACAGTGCAAGGGGGCAGCCATGAGTAGCATGTGGATGATAATCGCAAGGCGTGTAGGTTTGGGGTATGTGTTAAAATCAGTTACCCAAGTTTTAGAAAATAAGTTTATTTTTCCTTTTTGATAATTTCCTTCTGTACTTTGTGTCCCTTGTTTTATTTCTCCCACTTAAGTATCCCTGTCGGTTTAACTATTTGGCTCTTGAGGAAATTCATCATGCCTTGTTCTATTCAGAATGCTCATTTGGGGGACTCTGCCTCAGTTTACTCTAGCTACCTCTGTGCTAGCAGCGTTGCATGCTCCCATGTTCCCCACAGAGCAACACATCTGGTTTAATGCTCTCACTGCATCTATTTCAGACCCATTTTCAGAATATTGCAACTCAACTTTAAAATAGGAAAACATAACCTTGGCCCATTGGTGGCTTTCAATTTTTAAATATGGAGAAGTAAACCACAGGACTTTGGCAGATTTTATTGGCTTATATATTTTACAGGTATTTTTGTTTGTTTGTTTGTTTTTTTGGAAAAGAGGAAGGAATTCTCATGTATGATCTTTCAATGCAAGCTCTCCAAGGGCATGGATCTTTATCTTGTTCATTGATATAGCCATCCCCAGAGCCTAGAGCAGTTGTCTGGTACATAGTAGGCATGCAATTATTTTTGAAAGAATAAATAGTATAAAAGCTGTATTTTATTCAAGCATTGAATTTAGAAAGATAAACTATAAATTTATTACTGCTTTTCAACATGCTTTGGATTTTAAAGCTAATGGATCTTTTATTAATACCTTTTTTCCTTTCATCTTAACCCAGTGTTACTAAAATTAGATTTCCCATTTTTTCCCCTATATGAAAGATAATTTACATTTACCTTGTAAAAATTATCACCCTGCTCCACTTGAGAACCCTGCTGTTGTTTGCAAAATCAGGACCAAGCCCATATTTTTTGAGAGCTCTCTAGAAGAATTTATCTTCAAGAAAATATGGTTTGGTGTTTTTGGTTTTTTCTTTTCTTTTTCAAAGTCCCATGGCCAGATATCCTATAATATACTAGATGCATGTTTGCTAATTTTTACTTGAATTTTTTTTAAATTGTACCAATCAAAAGGTTCTTTTTCTTTCCAGCTCCAATTTTTTGTAAAACAGAAGTTCCAGAGCACAGAAGGTCATCATCACAAGCAAACTTTATTAAAAAAAAACTAGAAGTGTGCTTTGATTTTGCTGTTATTTGTTTTATCACTTCTATATTTGGTGAACAGCCACAGTTACTGATATTTATGGAAAAGTACTTTCAAGTACAAGGTCAATACATAAGCCAGAGTGAATGATACTACAAGTTGAGCATCTCTAATTCAAAAATCTGAAATCCAGAAGCTTCAAAATCTGAATCTTTTTGAGCACTGACTTGACCCCACAAGTGGAAAATTCCCCACCCGACACCTTTGCTTTCTGATGGTTCAGTTTAAACAGATTTTGTTTCTTGCACAAAATTTTTGTATAAATTACTTTCAGGCTATATGTATAAGGTGGATGTGAAACATGAATTATGTAATTAGAGTCGGGTCCCGTTGTGTATATGCAGATATTCCAAACCTGAAATCCAAAACACTTCTGGTCCCTAGCATTTTGGATAAGGGATACTCAGCTTGTACCTATATATTCATATATATTCACTGTTGTTAGAAATGTTTAAGTTGCTGTTCTGTGATGAATCTAAATCTTTTCTCTTGCTACCAAGCTATTGTCACTGCAGTGCATTATACCAAAGAGCGAAGTCAGTGCCACTGAAAATACAGAACCCATTAATATCGTGGCTATCTGATTACATTTATATTCCAAGATGAACCTTTTTTATATATGCTAAAAATTTTGGGGAATATGTTTTGGGATGTATTATGGAGCTAAAACTCTAACCTCTTAATAGTTTTATAGAACTTAAAAATTTTTTATACAATTACCCAATTGGTGATATGATCTTAAGCTTTTGTGTCAGATTATTTAATATGATGACTTCATGCTTTATTATGCCTTATTATGGCTGACGTATTACTGTGGTGAAACAAAATATCTTTAAAAGTTAAAACATCCAGATATATAAGCTATTTTTTCCTAAGGATAAAGTACCTTTGAGCATGAGTGTATCACAGCTTTCATTAGGAAAACTTTTCATTACATACTTGTTTAAACTCTGTCTTCCAGGGTAAAAATAATAAGGTTGAATCATTTTATTAAAAATACTTTTTAAGAAAATAACTATGAACATCTGAATATTAAAGATATAAAAATGCACATAATTCATATTTCAGGTGGTATTTGCATTCAGTGCCTTACTGGTATTCTCAGAACATTTTAATGATTTCTAACATTTCTTAACAGTCATAGATATATACATTTTCATTTTTTGTACTTGAATATTCTAAATAAAACTGACATTTACTCTTGACAAATAAAACATATATTTACTAAAATGTGTTTAATTTTCCTTTCTGAAAACTCTCATTTTAAAAACGTTCATTTAATTATGTATTTGAATTATTTTGGAGATGAGGTATTTTATGAGTATTTTCAGACAATGAAACTTATTAGTCTGTGTCAGATTCTGAGCAATCATAGAGTCATCTAAGTTGTAAATAAAACCTTGCATAGCACAATT"; + const string aaSequence = + "MLFQSHMATKTTCMSSQGSDDEQIKRENIRSLTMSGHVGFESLPDQLVNRSIQQGFCFNILCVGETGIGKSTLIDTLFNTNFEDYESSHFCPNVKLKAQTYELQESNVQLKLTIVNTVGFGDQINKEESYQPIVDYIDAQFEAYLQEELKIKRSLFTYHDSRIHVCLYFISPTGHSLKTLDLLTMKNLDSKVNIIPVIAKADTVSKTELQKFKIKLMSELVSNGVQIYQFPTDDDTIAKVNAAMNGQLPFAVVGSMDEVKVGNKMVKARQYPWGVVQVENENHCDFVKLREMLICTNMEDLREQTHTRHYELYRRCKLEEMGFTDVGPENKPVSVQETYEAKRHEFHGERQRKEEEMKQMFVQRVKEKEAILKEAERELQAKFEHLKRLHQEERMKLEEKRRLLEEEIIAFSKKKATSEIFHSQSFLATGSNLRKDKDRKK*"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_001308442.1", + "NM_001321513.1:c.1324_1326del", cdsSequence, aaSequence, 1324, 1326, 442, "*", "", "", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_001308442.1:p.(Ter442GluextTer75)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Extension_ViaDeletion_MissingOutput() + { + // VID: chr1-16265921-GAG-G (NC_000001.10:g.16265922_16265923del), key: NM_015001.2, expected: NP_055816.2:p.(Ter3665CysextTer2), actual: + const string cdsSequence = + "ATGGTCCGGGAAACCAGGCATCTCTGGGTGGGCAACTTACCCGAGAACGTGCGGGAAGAGAAGATCATCGAGCATTTCAAACGATATGGCCGCGTGGAAAGTGTCAAAATTCTTCCCAAGAGGGGATCTGAAGGAGGAGTGGCTGCCTTTGTGGATTTTGTGGACATCAAAAGTGCACAGAAAGCTCACAACTCGGTCAACAAAATGGGTGACAGAGACCTACGCACGGATTATAATGAACCAGGCACCATCCCGAGTGCTGCTCGGGGATTGGATGATACAGTTTCCATAGCATCTCGTAGTAGAGAGGTTTCTGGGTTCAGAGGAGGTGGTGGAGGGCCTGCTTATGGTCCCCCACCGTCACTTCATGCACGAGAAGGACGTTATGAGCGGAGACTTGATGGGGCTTCAGATAACAGGGAGCGTGCTTATGAACATAGTGCCTATGGACACCATGAACGGGGGACGGGAGGATTTGATCGGACAAGACATTACGATCAGGATTACTATAGAGATCCTCGAGAGCGGACTTTACAACATGGGCTCTATTACGCTTCTCGGAGTCGAAGTCCAAATCGCTTTGATGCTCATGACCCCCGATATGAACCTAGGGCTCGCGAGCAGTTTACACTGCCCAGTGTGGTACACAGGGATATCTACAGGGATGATATTACCCGGGAGGTACGAGGCAGAAGGCCAGAGCGGAATTACCAGCACAGCAGGAGTCGGTCACCACATTCATCCCAGTCTAGAAATCAGTCTCCTCAGAGACTGGCTAGCCAAGCATCTAGACCCACAAGGTCCCCTAGCGGCAGCGGCTCTAGAAGTAGATCCTCCAGTAGTGATTCAATCAGCAGCAGCAGTAGTACCAGCAGTGACAGCAGTGATTCCAGCAGTAGTTCAAGTGATGATTCTCCAGCTCGATCAGTTCAGTCTGCAGCAGTCCCTGCACCCACTTCCCAGTTGCTTTCATCTCTGGAAAAAGATGAGCCCCGTAAAAGTTTTGGCATCAAGGTCCAGAATCTTCCAGTACGCTCTACAGATACAAGCCTTAAAGATGGCCTTTTCCATGAATTTAAGAAATTTGGAAAAGTAACTTCAGTGCAGATACATGGAACTTCAGAAGAGAGGTATGGTCTGGTATTCTTTCGGCAGCAAGAGGACCAAGAAAAAGCCTTGACTGCATCAAAAGGAAAACTTTTCTTTGGCATGCAGATTGAAGTAACAGCATGGATAGGTCCAGAAACAGAAAGTGAAAATGAATTTCGCCCCTTGGATGAAAGGATAGATGAATTTCACCCCAAAGCAACAAGAACTCTCTTTATTGGCAACCTTGAAAAAACCACTACTTACCATGACCTTCGCAACATCTTCCAGCGCTTTGGAGAAATTGTGGATATTGACATTAAGAAAGTAAATGGAGTTCCTCAGTATGCGTTTCTGCAATACTGTGATATTGCTAGCGTTTGTAAAGCTATTAAGAAGATGGATGGGGAATATCTTGGAAATAATCGCCTCAAGCTGGGTTTTGGAAAGAGCATGCCTACAAACTGCGTGTGGCTAGATGGGCTTTCTTCGAATGTGTCAGATCAGTATTTAACACGACATTTCTGCCGATATGGGCCTGTGGTAAAGGTGGTGTTTGACCGCTTAAAAGGCATGGCCCTGGTTCTCTACAATGAAATTGAATATGCACAAGCAGCTGTAAAAGAGACCAAAGGGAGGAAAATCGGTGGGAATAAAATTAAGGTGGATTTTGCAAATCGGGAAAGTCAGCTGGCTTTTTATCACTGCATGGAGAAATCTGGTCAAGACATCAGAGACTTTTATGAAATGTTAGCCGAAAGAAGAGAGGAACGAAGGGCATCCTACGACTATAACCAAGATCGTACATATTATGAGAGTGTTCGAACTCCAGGCACTTATCCTGAGGATTCCAGGCGGGACTATCCAGCTCGAGGGAGAGAGTTTTATTCAGAATGGGAAACTTACCAAGGAGACTACTATGAATCACGATACTACGATGATCCTCGGGAATACAGGGATTACAGGAATGATCCTTATGAACAAGATATTAGGGAATATAGTTACAGGCAAAGGGAACGAGAAAGAGAACGTGAAAGATTTGAGTCTGACCGGGACAGAGACCATGAGAGGAGGCCGATTGAACGAAGTCAAAGTCCTGTTCACTTGCGACGTCCACAGAGTCCTGGAGCGTCTCCCTCTCAGGCAGAGAGGTTGCCGAGTGATTCTGAGAGGAGGCTTTACAGCCGATCCTCAGACCGGAGTGGAAGCTGTAGCTCACTCTCCCCTCCAAGATATGAGAAACTGGACAAGTCTCGTTTGGAGCGCTATACAAAAAATGAAAAGACAGATAAAGAACGAACTTTTGATCCGGAGAGAGTGGAGAGAGAGAGACGCTTAATACGGAAGGAAAAAGTGGAAAAGGACAAAACTGACAAGCAGAAACGCAAAGGAAAGGTTCACTCCCCTAGTTCTCAGTCTTCAGAAACGGACCAAGAAAATGAGCGAGAGCAAAGCCCTGAAAAGCCCAGGAGTTGTAATAAACTGAGCAGAGAGAAAGCTGACAAAGAGGGAATAGCGAAAAACCGCCTGGAACTCATGCCTTGCGTGGTTTTGACTCGAGTGAAAGAGAAAGAGGGAAAGGTCATTGACCACACTCCTGTGGAAAAGTTGAAAGCCAAGCTTGATAATGACACTGTCAAATCTTCTGCCCTGGACCAGAAACTTCAGGTCTCTCAGACGGAGCCTGCAAAATCTGACTTGTCTAAACTGGAATCAGTTAGAATGAAAGTACCAAAGGAAAAGGGGCTTTCAAGCCATGTTGAAGTGGTGGAGAAGGAAGGCAGGCTTAAAGCCAGGAAGCACCTCAAGCCTGAGCAGCCTGCAGATGGGGTAAGTGCTGTGGATCTGGAGAAGCTGGAAGCCAGGAAAAGGCGCTTTGCAGATTCCAATTTAAAAGCAGAAAAGCAAAAACCAGAGGTCAAGAAAAGCAGTCCAGAGATGGAGGATGCTCGCGTGCTTTCAAAAAAGCAGCCTGACGTGTCCTCTAGAGAGGTCATTCTGCTGAGGGAAGGAGAGGCTGAAAGAAAGCCTGTGAGGAAAGAAATTCTTAAAAGAGAATCTAAAAAAATCAAACTGGACAGACTTAATACTGTTGCCAGCCCCAAAGACTGTCAGGAGCTTGCCAGTATTTCTGTTGGGTCTGGCTCAAGGCCCAGCTCAGACCTACAAGCAAGACTGGGAGAACTAGCAGGTGAATCTGTGGAAAATCAAGAAGTCCAATCAAAAAAGCCCATTCCCTCAAAACCACAGCTCAAACAGCTGCAGGTATTAGATGATCAAGGACCAGAGAGAGAAGACGTTAGGAAAAACTATTGCAGTCTTCGTGATGAAACACCTGAACGTAAATCAGGCCAAGAGAAATCACATTCAGTAAATACTGAAGAAAAAATTGGCATTGACATCGATCACACGCAGAGTTACCGAAAACAAATGGAACAGAGTCGTAGGAAACAGCAGATGGAAATGGAAATAGCCAAGTCTGAGAAGTTTGGCAGTCCTAAAAAAGATGTAGATGAATATGAAAGACGTAGCCTCGTTCACGAGGTAGGCAAACCCCCTCAAGATGTCACTGATGACTCTCCTCCTAGCAAAAAGAAAAGGATGGATCATGTCGATTTTGATATCTGCACCAAGCGAGAACGGAATTACAGAAGTTCACGCCAAATCAGCGAAGATTCTGAAAGGACTGGTGGTTCTCCCAGTGTCCGACATGGTTCCTTCCATGAAGATGAGGATCCCATAGGCTCCCCTAGGCTACTGTCAGTAAAAGGGTCTCCTAAAGTAGATGAAAAAGTCCTCCCCTATTCTAACATAACAGTCAGGGAAGAGTCTTTAAAATTTAATCCTTATGATTCTAGCAGGAGAGAACAGATGGCAGATATGGCCAAAATAAAACTATCTGTCTTGAATTCTGAAGATGAACTAAATCGTTGGGACTCTCAGATGAAACAGGATGCTGGCAGATTTGATGTGAGTTTCCCAAACAGCATAATTAAGAGAGATAGCCTTCGAAAAAGGTCTGTACGAGATCTGGAACCTGGTGAGGTGCCTTCTGATTCTGACGAAGATGGTGAACACAAATCCCACTCACCCAGAGCCTCTGCATTATATGAAAGTTCTCGATTGTCTTTTTTATTGAGGGACAGAGAAGACAAGCTACGTGAGCGAGATGAAAGACTCTCTAGTTCTTTAGAAAGGAACAAATTTTACTCTTTTGCATTGGATAAGACAATCACACCAGACACTAAAGCTTTGCTTGAAAGAGCTAAATCCCTCTCTTCATCTCGTGAAGAAAATTGGTCTTTTCTTGATTGGGACTCCCGATTTGCAAATTTTCGAAACAACAAAGATAAAGAAAAGGTTGACTCTGCTCCAAGACCTATTCCATCCTGGTACATGAAAAAGAAGAAAATTAGGACTGATTCAGAAGGGAAAATGGATGATAAGAAAGAGGACCATAAAGAAGAAGAGCAAGAGAGGCAGGAATTGTTTGCTTCTCGTTTTTTACACAGCTCAATCTTTGAACAAGATTCCAAGCGATTGCAGCATCTAGAGAGAAAAGAGGAAGATTCTGACTTCATTTCTGGTAGGATCTATGGGAAGCAGACATCTGAGGGAGCAAACAGCACAACTGATTCCATTCAAGAACCAGTAGTTCTGTTCCATAGCAGATTTATGGAGCTCACACGGATGCAACAGAAAGAAAAAGAAAAAGACCAGAAACCCAAAGAGGTTGAGAAACAGGAAGATACAGAGAATCATCCCAAGACCCCAGAATCTGCTCCTGAGAATAAAGATTCAGAACTGAAAACTCCACCTTCCGTTGGGCCTCCAAGTGTCACAGTCGTAACTCTAGAATCAGCCCCATCAGCACTAGAGAAGACCACTGGTGACAAAACGGTAGAGGCGCCTTTGGTAACAGAAGAGAAGACTGTGGAGCCAGCTACCGTCTCAGAAGAAGCAAAGCCTGCATCTGAACCTGCTCCTGCCCCTGTGGAACAGCTGGAACAAGTAGACCTGCCCCCAGGAGCAGACCCCGATAAAGAAGCTGCCATGATGCCTGCGGGTGTTGAGGAAGGTTCATCAGGTGACCAGCCGCCTTATCTGGATGCCAAGCCTCCAACTCCCGGGGCCTCGTTTTCCCAGGCAGAGAGCAACGTAGATCCAGAGCCTGACAGTACCCAGCCACTTTCAAAACCAGCTCAGAAGTCTGAGGAAGCCAATGAGCCAAAGGCCGAAAAGCCAGACGCCACTGCAGATGCTGAGCCTGATGCAAACCAGAAAGCCGAAGCTGCTCCTGAGTCTCAGCCCCCAGCTTCTGAAGATTTAGAGGTTGATCCTCCAGTTGCTGCAAAGGATAAAAAGCCAAACAAAAGCAAGCGTTCAAAGACCCCTGTTCAGGCAGCTGCAGTGAGTATCGTGGAGAAGCCCGTCACAAGGAAGAGTGAGAGGATAGACCGGGAAAAACTCAAGCGGTCCAATTCTCCTCGGGGAGAAGCACAGAAGCTTTTGGAATTGAAGATGGAGGCAGAGAAGATTACAAGGACTGCTTCTAAAAACTCTGCTGCAGACCTTGAACATCCCGAACCAAGTTTGCCTCTCAGCCGAACAAGGCGCCGGAATGTAAGGAGCGTCTATGCAACCATGGGTGACCATGAAAACCGCTCTCCTGTCAAAGAGCCCGTTGAGCAACCAAGAGTGACCAGAAAGAGATTGGAGCGAGAGCTTCAGGAGGCTGCAGCGGTTCCCACCACCCCTCGGAGGGGAAGGCCTCCAAAGACACGCCGGCGAGCCGATGAAGAGGAGGAGAACGAGGCCAAGGAACCTGCAGAAACACTCAAGCCACCTGAGGGATGGCGGTCGCCAAGGTCCCAGAAAACTGCAGCTGGTGGTGGACCCCAAGGGAAAAAGGGAAAAAATGAACCGAAGGTGGATGCTACACGTCCTGAGGCCACCACTGAGGTGGGCCCCCAAATAGGCGTGAAAGAGAGCTCCATGGAACCCAAGGCTGCTGAGGAGGAGGCAGGGAGTGAACAGAAACGTGACAGAAAAGATGCTGGCACAGACAAAAACCCCCCTGAAACCGCCCCTGTTGAAGTTGTAGAGAAAAAACCGGCCCCTGAAAAAAACTCCAAATCAAAGAGAGGAAGATCTCGAAACTCCAGGTTAGCAGTGGACAAATCTGCAAGTCTGAAAAATGTGGATGCTGCTGTCAGTCCCAGGGGGGCTGCAGCACAGGCAGGGGAGAGGGAATCTGGGGTGGTGGCAGTCTCCCCTGAGAAAAGTGAGAGTCCCCAAAAGGAGGATGGTTTATCATCCCAGTTGAAAAGTGATCCAGTTGATCCAGACAAGGAACCAGAGAAAGAAGACGTGTCTGCCTCTGGGCCGTCCCCAGAAGCCACCCAGTTAGCCAAGCAGATGGAGCTGGAGCAGGCCGTGGAACACATCGCAAAGCTCGCTGAGGCCTCTGCCTCTGCTGCCTATAAGGCAGATGCACCAGAGGGCCTTGCCCCAGAGGACAGGGACAAGCCTGCACACCAAGCAAGTGAAACAGAGCTGGCTGCGGCCATCGGCTCCATCATCAATGACATTTCTGGGGAGCCAGAAAACTTCCCAGCACCTCCACCTTATCCTGGAGAATCCCAGACAGATCTGCAACCCCCCGCAGGTGCACAGGCGCTGCAGCCTTCTGAGGAAGGAATGGAGACAGATGAGGCTGTATCTGGCATCCTGGAAACTGAGGCTGCTACAGAATCTTCTAGGCCTCCAGTCAATGCTCCTGACCCCTCAGCCGGCCCAACAGATACCAAGGAAGCCAGAGGAAATAGCAGTGAAACCTCACACTCAGTGCCAGAAGCCAAAGGGTCTAAAGAAGTGGAAGTCACTCTTGTTCGGAAAGACAAAGGGCGCCAGAAAACAACCCGATCACGCCGCAAGCGAAACACAAACAAGAAAGTGGTGGCTCCTGTAGAGAGCCATGTCCCTGAATCCAACCAAGCTCAAGGTGAGAGTCCTGCTGCAAATGAGGGGACAACAGTACAGCACCCCGAAGCCCCACAGGAAGAAAAGCAGAGTGAGAAACCCCATTCCACTCCTCCTCAGTCATGTACTTCTGACCTAAGCAAGATTCCCTCCACAGAGAATTCGTCCCAAGAAATCAGTGTTGAGGAAAGGACTCCAACCAAAGCATCTGTGCCCCCAGACCTTCCCCCACCTCCCCAGCCAGCACCGGTGGATGAGGAGCCTCAAGCCAGGTTCAGGGTGCATTCCATCATTGAAAGTGACCCGGTGACCCCACCCAGCGATCCAAGCATCCCCATACCCACACTGCCTTCTGTAACTGCAGCAAAGCTCTCACCTCCTGTCGCCTCTGGGGGGATCCCACACCAGAGCCCCCCTACTAAGGTGACAGAGTGGATCACAAGGCAGGAGGAGCCACGGGCTCAGTCTACTCCATCTCCAGCTCTTCCCCCAGACACAAAGGCCTCTGATGTTGACACCAGCTCCAGCACCCTGAGGAAGATTCTCATGGACCCCAAGTATGTGTCTGCCACAAGTGTCACTTCCACAAGTGTCACCACAGCCATTGCAGAGCCTGTCAGTGCTGCCCCTTGCCTACATGAGGCCCCGCCCCCGCCAGTTGACTCTAAAAAGCCTTTAGAAGAAAAAACAGCACCTCCAGTGACAAACAACTCTGAGATACAAGCCTCGGAGGTGCTGGTAGCTGCTGACAAGGAAAAGGTGGCTCCAGTCATTGCTCCCAAAATTACCTCTGTTATTAGCCGGATGCCTGTCAGCATTGACCTGGAAAATTCACAGAAGATAACCTTGGCAAAACCAGCTCCTCAAACCCTCACTGGTCTGGTGAGCGCACTCACTGGCCTGGTGAACGTCTCCCTGGTCCCGGTGAATGCCCTGAAAGGCCCCGTGAAGGGCTCAGTGACCACACTGAAAAGTTTGGTGAGCACCCCTGCTGGGCCCGTGAACGTCCTGAAAGGGCCTGTGAATGTTCTTACGGGGCCAGTGAATGTTCTCACCACTCCAGTGAACGCCACGGTGGGCACAGTGAATGCCGCCCCAGGCACAGTCAATGCCGCTGCGAGTGCAGTGAATGCCACAGCAAGTGCAGTGACCGTCACAGCGGGTGCGGTTACTGCTGCATCTGGTGGTGTAACGGCCACAACAGGCACGGTGACAATGGCAGGGGCAGTGATTGCGCCGTCAACAAAGTGCAAACAGAGAGCGAGTGCTAATGAAAACAGTCGGTTCCACCCAGGGTCCATGCCTGTGATCGACGATCGTCCGGCAGACGCGGGCTCAGGGGCGGGGCTGCGTGTGAACACTTCTGAAGGGGTTGTGCTCCTGAGTTACTCAGGGCAGAAGACCGAAGGCCCACAGCGGATCAGCGCCAAGATCAGCCAGATCCCCCCGGCCAGTGCAATGGACATTGAATTTCAGCAGTCAGTGTCCAAGTCCCAGGTCAAACCTGATTCTGTCACAGCATCGCAGCCTCCATCCAAAGGCCCTCAAGCTCCTGCAGGCTATGCGAACGTGGCCACCCATTCCACGTTGGTACTGACCGCCCAGACATATAATGCCTCTCCTGTGATTTCGTCTGTGAAGGCCGATAGGCCATCCTTGGAGAAGCCCGAGCCCATTCACCTCTCGGTGTCCACGCCTGTCACCCAGGGAGGCACAGTGAAGGTTCTCACCCAGGGGATCAACACACCCCCTGTGCTGGTTCACAACCAGCTGGTCCTCACCCCAAGCATTGTCACCACAAACAAGAAGCTTGCTGACCCCGTCACCCTTAAAATCGAGACCAAGGTCCTTCAGCCGGCCAACCTGGGGTCCACGCTCACGCCCCACCACCCTCCTGCTCTGCCCAGCAAACTGCCTACAGAAGTCAACCATGTCCCCTCGGGGCCCAGCATCCCAGCAGATCGAACTGTCTCCCATTTGGCAGCTGCAAAGCTAGATGCTCATTCTCCTCGACCAAGTGGACCCGGGCCATCCTCATTCCCAAGGGCAAGCCACCCCAGCAGTACTGCATCTACGGCGCTCTCCACCAACGCCACAGTCATGCTGGCTGCAGGCATCCCAGTGCCCCAGTTCATCTCCAGCATCCACCCAGAGCAGTCTGTCATCATGCCACCCCACAGCATCACCCAGACTGTGTCCCTGAGCCACCTCTCCCAGGGCGAGGTGAGAATGAACACTCCCACGCTGCCCAGTATCACCTACAGCATCCGGCCAGAAGCGCTTCACTCTCCTCGGGCTCCGCTGCAGCCCCAGCAAATAGAGGTCAGGGCCCCACAGCGTGCCAGCACCCCGCAGCCAGCCCCAGCTGGTGTGCCTGCACTGGCCTCCCAGCACCCTCCCGAGGAGGAAGTGCATTATCACCTTCCTGTCGCTCGAGCCACAGCCCCTGTGCAGTCAGAGGTACTAGTCATGCAGTCTGAGTACCGACTGCACCCCTATACTGTGCCACGGGATGTGAGGATCATGGTGCATCCACATGTGACGGCAGTCAGCGAGCAGCCCAGGGCCGCGGATGGGGTGGTGAAGGTGCCACCAGCCAGCAAGGCCCCTCAGCAGCCAGGGAAGGAAGCTGCCAAGACACCAGATGCCAAAGCTGCCCCCACCCCCACCCCTGCCCCCGTCCCTGTCCCTGTCCCCCTTCCTGCCCCTGCTCCTGCCCCTCATGGTGAGGCCCGTATCCTCACAGTTACCCCCAGTAACCAACTCCAGGGGCTGCCTCTGACCCCTCCTGTGGTGGTGACCCATGGGGTGCAGATTGTGCACTCCAGCGGGGAGCTGTTTCAAGAGTACCGGTACGGCGACATCCGCACCTACCACCCCCCGGCCCAGCTCACACACACTCAGTTTCCCGCCGCTTCCTCTGTTGGCCTGCCTTCCCGGACCAAGACAGCTGCTCAGGGCCCTCCTCCTGAAGGTGAGCCCCTGCAGCCTCCTCAGCCTGTGCAGTCCACACAGCCTGCCCAGCCTGCACCACCCTGCCCGCCCTCCCAGCTCGGTCAGCCCGGCCAGCCACCAAGCAGCAAGATGCCTCAAGTGTCCCAGGAGGCAAAGGGGACCCAGACGGGAGTAGAGCAGCCTCGCCTCCCAGCTGGACCTGCAAACAGGCCACCTGAGCCTCACACCCAGGTTCAGAGGGCACAAGCAGAAACAGGCCCGACTTCCTTCCCCTCCCCTGTGTCTGTCTCCATGAAGCCTGACCTTCCAGTCTCTCTTCCCACTCAGACTGCCCCAAAACAGCCGTTGTTTGTCCCAACAACCTCTGGCCCCAGCACCCCACCAGGACTGGTTCTGCCACACACTGAATTCCAGCCAGCCCCCAAACAAGATTCCTCTCCACACCTGACTTCCCAGAGACCCGTGGATATGGTTCAACTTCTGAAGAAGTACCCCATCGTGTGGCAGGGCCTGCTGGCCCTCAAGAATGACACAGCTGCTGTGCAGCTCCACTTCGTCTCTGGCAACAACGTCCTGGCCCATCGGTCCCTGCCCCTTTCTGAAGGAGGGCCCCCACTAAGGATCGCCCAGAGGATGCGGCTGGAGGCAACGCAGCTGGAAGGGGTTGCCCGAAGGATGACGGTGGAGACAGATTACTGTCTGCTGCTGGCTCTGCCCTGTGGCCGTGACCAAGAGGATGTTGTGAGCCAGACCGAGTCCCTCAAGGCTGCCTTCATCACTTACCTGCAGGCCAAGCAGGCGGCAGGGATCATCAACGTTCCCAACCCTGGCTCCAATCAGCCTGCCTACGTGCTGCAGATCTTCCCGCCCTGTGAGTTCTCTGAGAGTCACCTGTCCCGCCTGGCCCCTGACCTCCTTGCCAGCATCTCCAACATCTCTCCCCACCTCATGATTGTCATTGCCTCCGTGTGAGCCACTGAGTGGTTATCACCTCAGTGAATCTTCCCAGGGCTCTGCAGTAAAAACAAAGGACAACCCAGCCAAGCAGAGGAAGAAGCTGCCGAAGGGGACAGACTCCACTGCCAGACGGCCAGCCGTTTGCTGTCCTGCCGCCCGGCTCAGTCGGCCAGACTTCCTCTAGGAGTGGTGCTGCTACCTTGTATGTTTACATAATGCTTTAGCCCAAGGACACATCACCAACCCATGGACTCGCAGACACCGGGGCTGGGTTTCTCTTTCCTCTTTTTGGAGAAAAGGAACAGGGCAGTGGAATGAAAATTTTTTGTTTGTTTGTTTTTAAGAAACAAGAAAACAGAACTGCCTTTGCACTAAATTAGTGACTTGGACTTTTGCCCAGTGAAGACAGGCTGTGACACTCTGGATGTCTTGGTGTGTGTAGACACACATTGCAGACTCTTAACGCAGGAAGGACTTCAAACTTCTGCTGAGACCTTGGGGTCAAGGAACATTTCATTGGTTTTTTTTGTCCACCCCCATCTCCCTTGCTCATTTGGATGCGTCACCTTAATTCTCCTGCTGCCACCGTCTTTGATTCACCGGGATGTACAGTTTACAGTTGAAGAGCAAACAGAAAGGTTTTCTCTTGGTGGGATATGCAGAACTTGGGATGTGTGTATATATAAATATATAATATATATAAATATATATAATACTGACTTAAAAAATCAAATCCCCCGACATACGTTTTTTTTAATCTGTGCCAAAAATGTGTTTTCAGAGGAAATCTTATTTTCATATTCAGACTTTGTATTGCCCACTCATTTGTATAAGTGCGCTTCGGTACAGCACGGGTCCTGCTCCCGCGATGTGGAAGTGTCACACGGCACCTGTACAAAAAGACTGGCTAACCCCTCTTCCTATTACCTTGATCTCTTCCCCCAACTTCCTAACACTTATTAATTTATGAAACTGTTTTTCTCAGCGCAGTTTTGTTTTGTGTGTCCATTGGATTACAAACTTTATTAAAAAATATAAAACAC"; + const string aaSequence = + "MVRETRHLWVGNLPENVREEKIIEHFKRYGRVESVKILPKRGSEGGVAAFVDFVDIKSAQKAHNSVNKMGDRDLRTDYNEPGTIPSAARGLDDTVSIASRSREVSGFRGGGGGPAYGPPPSLHAREGRYERRLDGASDNRERAYEHSAYGHHERGTGGFDRTRHYDQDYYRDPRERTLQHGLYYASRSRSPNRFDAHDPRYEPRAREQFTLPSVVHRDIYRDDITREVRGRRPERNYQHSRSRSPHSSQSRNQSPQRLASQASRPTRSPSGSGSRSRSSSSDSISSSSSTSSDSSDSSSSSSDDSPARSVQSAAVPAPTSQLLSSLEKDEPRKSFGIKVQNLPVRSTDTSLKDGLFHEFKKFGKVTSVQIHGTSEERYGLVFFRQQEDQEKALTASKGKLFFGMQIEVTAWIGPETESENEFRPLDERIDEFHPKATRTLFIGNLEKTTTYHDLRNIFQRFGEIVDIDIKKVNGVPQYAFLQYCDIASVCKAIKKMDGEYLGNNRLKLGFGKSMPTNCVWLDGLSSNVSDQYLTRHFCRYGPVVKVVFDRLKGMALVLYNEIEYAQAAVKETKGRKIGGNKIKVDFANRESQLAFYHCMEKSGQDIRDFYEMLAERREERRASYDYNQDRTYYESVRTPGTYPEDSRRDYPARGREFYSEWETYQGDYYESRYYDDPREYRDYRNDPYEQDIREYSYRQRERERERERFESDRDRDHERRPIERSQSPVHLRRPQSPGASPSQAERLPSDSERRLYSRSSDRSGSCSSLSPPRYEKLDKSRLERYTKNEKTDKERTFDPERVERERRLIRKEKVEKDKTDKQKRKGKVHSPSSQSSETDQENEREQSPEKPRSCNKLSREKADKEGIAKNRLELMPCVVLTRVKEKEGKVIDHTPVEKLKAKLDNDTVKSSALDQKLQVSQTEPAKSDLSKLESVRMKVPKEKGLSSHVEVVEKEGRLKARKHLKPEQPADGVSAVDLEKLEARKRRFADSNLKAEKQKPEVKKSSPEMEDARVLSKKQPDVSSREVILLREGEAERKPVRKEILKRESKKIKLDRLNTVASPKDCQELASISVGSGSRPSSDLQARLGELAGESVENQEVQSKKPIPSKPQLKQLQVLDDQGPEREDVRKNYCSLRDETPERKSGQEKSHSVNTEEKIGIDIDHTQSYRKQMEQSRRKQQMEMEIAKSEKFGSPKKDVDEYERRSLVHEVGKPPQDVTDDSPPSKKKRMDHVDFDICTKRERNYRSSRQISEDSERTGGSPSVRHGSFHEDEDPIGSPRLLSVKGSPKVDEKVLPYSNITVREESLKFNPYDSSRREQMADMAKIKLSVLNSEDELNRWDSQMKQDAGRFDVSFPNSIIKRDSLRKRSVRDLEPGEVPSDSDEDGEHKSHSPRASALYESSRLSFLLRDREDKLRERDERLSSSLERNKFYSFALDKTITPDTKALLERAKSLSSSREENWSFLDWDSRFANFRNNKDKEKVDSAPRPIPSWYMKKKKIRTDSEGKMDDKKEDHKEEEQERQELFASRFLHSSIFEQDSKRLQHLERKEEDSDFISGRIYGKQTSEGANSTTDSIQEPVVLFHSRFMELTRMQQKEKEKDQKPKEVEKQEDTENHPKTPESAPENKDSELKTPPSVGPPSVTVVTLESAPSALEKTTGDKTVEAPLVTEEKTVEPATVSEEAKPASEPAPAPVEQLEQVDLPPGADPDKEAAMMPAGVEEGSSGDQPPYLDAKPPTPGASFSQAESNVDPEPDSTQPLSKPAQKSEEANEPKAEKPDATADAEPDANQKAEAAPESQPPASEDLEVDPPVAAKDKKPNKSKRSKTPVQAAAVSIVEKPVTRKSERIDREKLKRSNSPRGEAQKLLELKMEAEKITRTASKNSAADLEHPEPSLPLSRTRRRNVRSVYATMGDHENRSPVKEPVEQPRVTRKRLERELQEAAAVPTTPRRGRPPKTRRRADEEEENEAKEPAETLKPPEGWRSPRSQKTAAGGGPQGKKGKNEPKVDATRPEATTEVGPQIGVKESSMEPKAAEEEAGSEQKRDRKDAGTDKNPPETAPVEVVEKKPAPEKNSKSKRGRSRNSRLAVDKSASLKNVDAAVSPRGAAAQAGERESGVVAVSPEKSESPQKEDGLSSQLKSDPVDPDKEPEKEDVSASGPSPEATQLAKQMELEQAVEHIAKLAEASASAAYKADAPEGLAPEDRDKPAHQASETELAAAIGSIINDISGEPENFPAPPPYPGESQTDLQPPAGAQALQPSEEGMETDEAVSGILETEAATESSRPPVNAPDPSAGPTDTKEARGNSSETSHSVPEAKGSKEVEVTLVRKDKGRQKTTRSRRKRNTNKKVVAPVESHVPESNQAQGESPAANEGTTVQHPEAPQEEKQSEKPHSTPPQSCTSDLSKIPSTENSSQEISVEERTPTKASVPPDLPPPPQPAPVDEEPQARFRVHSIIESDPVTPPSDPSIPIPTLPSVTAAKLSPPVASGGIPHQSPPTKVTEWITRQEEPRAQSTPSPALPPDTKASDVDTSSSTLRKILMDPKYVSATSVTSTSVTTAIAEPVSAAPCLHEAPPPPVDSKKPLEEKTAPPVTNNSEIQASEVLVAADKEKVAPVIAPKITSVISRMPVSIDLENSQKITLAKPAPQTLTGLVSALTGLVNVSLVPVNALKGPVKGSVTTLKSLVSTPAGPVNVLKGPVNVLTGPVNVLTTPVNATVGTVNAAPGTVNAAASAVNATASAVTVTAGAVTAASGGVTATTGTVTMAGAVIAPSTKCKQRASANENSRFHPGSMPVIDDRPADAGSGAGLRVNTSEGVVLLSYSGQKTEGPQRISAKISQIPPASAMDIEFQQSVSKSQVKPDSVTASQPPSKGPQAPAGYANVATHSTLVLTAQTYNASPVISSVKADRPSLEKPEPIHLSVSTPVTQGGTVKVLTQGINTPPVLVHNQLVLTPSIVTTNKKLADPVTLKIETKVLQPANLGSTLTPHHPPALPSKLPTEVNHVPSGPSIPADRTVSHLAAAKLDAHSPRPSGPGPSSFPRASHPSSTASTALSTNATVMLAAGIPVPQFISSIHPEQSVIMPPHSITQTVSLSHLSQGEVRMNTPTLPSITYSIRPEALHSPRAPLQPQQIEVRAPQRASTPQPAPAGVPALASQHPPEEEVHYHLPVARATAPVQSEVLVMQSEYRLHPYTVPRDVRIMVHPHVTAVSEQPRAADGVVKVPPASKAPQQPGKEAAKTPDAKAAPTPTPAPVPVPVPLPAPAPAPHGEARILTVTPSNQLQGLPLTPPVVVTHGVQIVHSSGELFQEYRYGDIRTYHPPAQLTHTQFPAASSVGLPSRTKTAAQGPPPEGEPLQPPQPVQSTQPAQPAPPCPPSQLGQPGQPPSSKMPQVSQEAKGTQTGVEQPRLPAGPANRPPEPHTQVQRAQAETGPTSFPSPVSVSMKPDLPVSLPTQTAPKQPLFVPTTSGPSTPPGLVLPHTEFQPAPKQDSSPHLTSQRPVDMVQLLKKYPIVWQGLLALKNDTAAVQLHFVSGNNVLAHRSLPLSEGGPPLRIAQRMRLEATQLEGVARRMTVETDYCLLLALPCGRDQEDVVSQTESLKAAFITYLQAKQAAGIINVPNPGSNQPAYVLQIFPPCEFSESHLSRLAPDLLASISNISPHLMIVIASV*"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_055816.2", + "NM_015001.2:c.10995_*1del", cdsSequence, aaSequence, 10995, 10996, 3665, "*", "C", "", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_055816.2:p.(Ter3665CysextTer2)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Extension_ViaInsertion() + { + const string cdsSequence = + "ATGCCCTCCTGGGCCCTCTTCATGGTCACCTCCTGCCTCCTCCTGGCCCCTCAAAACCTGGCCCAAGTCAGCAGCCAAGATGTCTCCTTGCTGGCATCAGACTCAGAGCCCCTGAAGTGTTTCTCCCGAACATTTGAGGACCTCACTTGCTTCTGGGATGAGGAAGAGGCAGCGCCCAGTGGGACATACCAGCTGCTGTATGCCTACCCGCGGGAGAAGCCCCGTGCTTGCCCCCTGAGTTCCCAGAGCATGCCCCACTTTGGAACCCGATACGTGTGCCAGTTTCCAGACCAGGAGGAAGTGCGTCTCTTCTTTCCGCTGCACCTCTGGGTGAAGAATGTGTTCCTAAACCAGACTCGGACTCAGCGAGTCCTCTTTGTGGACAGTGTAGGCCTGCCGGCTCCCCCCAGTATCATCAAGGCCATGGGTGGGAGCCAGCCAGGGGAACTTCAGATCAGCTGGGAGGAGCCAGCTCCAGAAATCAGTGATTTCCTGAGGTACGAACTCCGCTATGGCCCCAGAGATCCCAAGAACTCCACTGGTCCCACGGTCATACAGCTGATTGCCACAGAAACCTGCTGCCCTGCTCTGCAGAGGCCTCACTCAGCCTCTGCTCTGGACCAGTCTCCATGTGCTCAGCCCACAATGCCCTGGCAAGATGGACCAAAGCAGACCTCCCCAAGTAGAGAAGCTTCAGCTCTGACAGCAGAGGGTGGAAGCTGCCTCATCTCAGGACTCCAGCCTGGCAACTCCTACTGGCTGCAGCTGCGCAGCGAACCTGATGGGATCTCCCTCGGTGGCTCCTGGGGATCCTGGTCCCTCCCTGTGACTGTGGACCTGCCTGGAGATGCAGTGGCACTTGGACTGCAATGCTTTACCTTGGACCTGAAGAATGTTACCTGTCAATGGCAGCAACAGGACCATGCTAGCTCCCAAGGCTTCTTCTACCACAGCAGGGCACGGTGCTGCCCCAGAGACAGGTACCCCATCTGGGAGAACTGCGAAGAGGAAGAGAAAACAAATCCAGGACTACAGACCCCACAGTTCTCTCGCTGCCACTTCAAGTCACGAAATGACAGCATTATTCACATCCTTGTGGAGGTGACCACAGCCCCGGGTACTGTTCACAGCTACCTGGGCTCCCCTTTCTGGATCCACCAGGCTGTGCGCCTCCCCACCCCAAACTTGCACTGGAGGGAGATCTCCAGTGGGCATCTGGAATTGGAGTGGCAGCACCCATCGTCCTGGGCAGCCCAAGAGACCTGTTATCAACTCCGATACACAGGAGAAGGCCATCAGGACTGGAAGGTGCTGGAGCCGCCTCTCGGGGCCCGAGGAGGGACCCTGGAGCTGCGCCCGCGATCTCGCTACCGTTTACAGCTGCGCGCCAGGCTCAACGGCCCCACCTACCAAGGTCCCTGGAGCTCGTGGTCGGACCCAACTAGGGTGGAGACCGCCACCGAGACCGCCTGGATCTCCTTGGTGACCGCTCTGCATCTAGTGCTGGGCCTCAGCGCCGTCCTGGGCCTGCTGCTGCTGAGGTGGCAGTTTCCTGCACACTACAGGAGACTGAGGCATGCCCTGTGGCCCTCACTTCCAGACCTGCACCGGGTCCTAGGCCAGTACCTTAGGGACACTGCAGCCCTGAGCCCGCCCAAGGCCACAGTCTCAGATACCTGTGAAGAAGTGGAACCCAGCCTCCTTGAAATCCTCCCCAAGTCCTCAGAGAGGACTCCTTTGCCCCTGTGTTCCTCCCAGGCCCAGATGGACTACCGAAGATTGCAGCCTTCTTGCCTGGGGACCATGCCCCTGTCTGTGTGCCCACCCATGGCTGAGTCAGGGTCCTGCTGTACCACCCACATTGCCAACCATTCCTACCTACCACTAAGCTATTGGCAGCAGCCTTGAGGACAGGCTCCTCACTCCCAGTTCCCTGGACAGAGCTAAACTCTCGAGACTTCTCTGTGAACTTCCCTACCCTACCCCCACAACACAAGCACCCCAGACCTCACCTCCATCCCCCTCTGTCTGCCCTCACAATTAGGCTTCATTGCACTGATCTTACTCTACTGCTGCTGACATAAAACCAGGACCCTTTCTCCACAGGCAGGCTCATTTCACTAAGCTCCTCCTTTACTTTCTCTCTCCTCTTTGATGTCAAACGCCTTGAAAACAAGCCTCCACTTCCCCACACTTCCCATTTACTCTTGAGACTACTTCAATTAGTTCCCCTACTACACTTTGCTAGTGAAACTGCCCAGGCAAAGTGCACCTCAAATCTTCTAATTCCAAGATCCAATAGGATCTCGTTAATCATCAGTTCCTTTGATCTCGCTGTAAGATTTGTCAAGGCTGACTACTCACTTCTCCTTTAAATTCTTTCCTACCTTGGTCCTGCCTCTTTGAGTATATTAGTAGGTTTTTTTTATTTGTTTGAGACAGGGTCTCACTCTGTCACCCAGGCTGCAGTGCAATGGCGCGATCTCAGCTCACTGCAACCTCCACCTCCGGGTTCAAGCGATTCTTGTGCCTCGGCCTCCCTAGTAGCTGGGATTACAGGCGCACACCACCACACACAGCTAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTAGACGGAGCCTTGCTCTGTTGCCAGACTGGAGTGCAGTGGCACGATCTCGGCTCACTGCAACCTCTGCCTCCCGGGTTCAAGCCATTCTGCCTCAGCCTCCCAAGTAGCTGGGAGTACAGGCGTCTGCCACCATGCCTAATTTTTTTCTATTTTTAGGAGAGACCGGTTTTCACCACGTTGGCCAGGATGGTCTCGATATCCTGATCTCGTGATCCGCCTGCCTCTGCCTCCCAAAGTGCTGGGATTACAGGTGTGACCCACTGCGCACAGCCCCAGCTAATTTTCATATTTTTAGTAGAGACAGGGTTTTGCCATGTTGCCCAGGCTGGTCTTGAACTCCTAACCTCGGGTGATCCACCCACCTTGGCCTCCCAAAGTGTTAGGATTACAGGCATGAGCCACTGCGCCCGGCTGAGTGTACTAGTAGTTAAGAGAATAAACTAGATCTAGAATCAGAGCTGGATTCAATTCCTGTCCTTCACATTTACTAGCTGTGCAACCTTGGGCACATAACTTAATGTCTTTGAGCCTTAGTTTTTTCATCTGTAAAACAGGGATAATAACAGCACCCCATAGAGTTGTGACGAGGATTGAGATAATCTAAGTAAAGCACAGTCCCTAGGACATAGTAAATGATTCATATATCCGAACTACTGTTATAATTATTCCTTCTTACTCTCCTCTTCTAGCATTTCTTCCAATTATTACAGTCCTTCAAGATTCCATTTCTTAACAGTCTCCAATCCCATCTATTCTCTGCCTTTACTATATGTTGACCATTCCAAAGTTCTTATCTCTAGCTCAGACATCTACTACAGCACTGTGATGCTTTATGCAACTAACTGTTTACATATCTGTCCCCTGCTACTAGATTGTGAGCTCCTTGAGGGAAAGGAACATGATTTATTTGTCCTTTTCCCCCAGCACCTAGAGTAGTGCTTGGTGCATGATAGTAGGCCTTCAATAAATTTTTTCTAAATGAATGA"; + const string aaSequence = + "MPSWALFMVTSCLLLAPQNLAQVSSQDVSLLASDSEPLKCFSRTFEDLTCFWDEEEAAPSGTYQLLYAYPREKPRACPLSSQSMPHFGTRYVCQFPDQEEVRLFFPLHLWVKNVFLNQTRTQRVLFVDSVGLPAPPSIIKAMGGSQPGELQISWEEPAPEISDFLRYELRYGPRDPKNSTGPTVIQLIATETCCPALQRPHSASALDQSPCAQPTMPWQDGPKQTSPSREASALTAEGGSCLISGLQPGNSYWLQLRSEPDGISLGGSWGSWSLPVTVDLPGDAVALGLQCFTLDLKNVTCQWQQQDHASSQGFFYHSRARCCPRDRYPIWENCEEEEKTNPGLQTPQFSRCHFKSRNDSIIHILVEVTTAPGTVHSYLGSPFWIHQAVRLPTPNLHWREISSGHLELEWQHPSSWAAQETCYQLRYTGEGHQDWKVLEPPLGARGGTLELRPRSRYRLQLRARLNGPTYQGPWSSWSDPTRVETATETAWISLVTALHLVLGLSAVLGLLLLRWQFPAHYRRLRHALWPSLPDLHRVLGQYLRDTAALSPPKATVSDTCEEVEPSLLEILPKSSERTPLPLCSSQAQMDYRRLQPSCLGTMPLSVCPPMAESGSCCTTHIANHSYLPLSYWQQP*"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_005364.1", + "NM_005373.2:c.1904_1905insAG", cdsSequence, aaSequence, 1905, 1904, 635, "P", "PX", "AG", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_005364.1:p.(Ter636ValextTer46)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Extension_ViaInsertion2() + { + // chr1-158581056-A-AAG + const string cdsSequence = + "ATGGAGCAATTTCCAAAGGAAACCGTTGTGGAGAGCAGTGGGCCAAAGGTTTTGGAAACAGCAGAAGAGATCCAGGAGAGGCGTCAGGAAGTGTTGACTCGGTATCAAAGTTTCAAGGAGCGGGTCGCTGAGAGGGGTCAGAAGCTTGAGGATTCCTATCACTTACAAGTTTTCAAGCGAGATGCAGATGATCTGGGGAAGTGGATCATGGAGAAAGTCAATATCTTAACCGATAAGAGCTATGAAGACCCAACTAATATACAGGGGAAATATCAGAAGCATCAATCCCTTGAAGCAGAGGTGCAAACAAAATCAAGACTCATGTCTGAACTGGAAAAAACAAGGGAAGAACGATTTACCATGGGTCATTCTGCCCACGAAGAAACGAAGGCCCATATAGAGGAGCTACGCCACCTGTGGGACCTGCTGTTAGAGCTGACCCTGGAGAAGGGTGACCAGTTGCTGCGGGCCCTGAAGTTCCAGCAGTATGTACAGGAGTGTGCTGACATCTTAGAGTGGATTGGAGACAAGGAGGCTATAGCGACATCAGTGGAGCTAGGTGAAGACTGGGAGCGCACCGAAGTTCTGCATAAGAAATTTGAAGACTTCCAAGTGGAGCTGGTAGCTAAAGAAGGGAGAGTTGTTGAAGTGAACCAATATGCCAATGAGTGTGCCGAGGAAAACCATCCTGACCTACCCTTAATTCAGTCTAAGCAAAATGAGGTGAATGCTGCCTGGGAGCGCCTTCGTGGTTTGGCTCTCCAGAGACAGAAAGCTCTGTCCAATGCTGCAAACTTACAACGATTCAAAAGGGATGTGACTGAAGCCATCCAGTGGATCAAGGAGAAGGAACCTGTACTCACCTCTGAGGACTATGGCAAAGACCTTGTTGCCTCTGAAGGACTGTTTCACAGTCACAAGGGACTTGAGAGAAATCTTGCTGTCATGAGTGACAAGGTGAAGGAGTTATGTGCTAAAGCAGAGAAGCTGACACTTTCCCATCCTTCAGATGCACCTCAGATCCAGGAGATGAAAGAAGATCTGGTCTCCAGCTGGGAGCATATTCGTGCCCTGGCCACCAGCAGATATGAAAAACTGCAGGCTACTTATTGGTACCATCGATTTTCATCTGACTTTGATGAACTCTCAGGCTGGATGAACGAGAAGACTGCTGCGATCAATGCTGATGAGCTGCCAACAGATGTGGCTGGTGGAGAAGTTCTGCTGGACAGGCATCAGCAGCATAAGCATGAGATTGACTCTTACGATGACCGATTTCAATCTGCTGATGAGACTGGTCAAGACCTCGTGAATGCCAATCATGAAGCCTCTGATGAAGTTCGGGAAAAGATGGAAATACTTGACAACAACTGGACTGCCCTGCTGGAACTGTGGGACGAGCGTCATCGTCAGTATGAGCAGTGCTTGGACTTTCATCTCTTCTACAGAGACAGTGAGCAAGTGGACAGTTGGATGAGTAGACAAGAGGCCTTCCTGGAAAACGAGGATCTGGGAAACTCACTGGGCAGTGCAGAAGCCCTTCTTCAGAAGCATGAAGACTTTGAGGAAGCCTTTACTGCCCAGGAAGAGAAGATCATAACTGTAGACAAGACTGCAACCAAATTGATTGGTGATGACCATTATGATTCAGAGAACATCAAGGCTATCCGTGACGGGCTGTTAGCCCGGCGGGATGCCCTACGTGAAAAGGCTGCCACTAGACGTAGATTGCTGAAGGAGTCATTGCTTCTGCAAAAACTGTATGAGGACTCAGATGACCTAAAGAACTGGATCAACAAGAAGAAAAAGTTGGCAGATGATGAAGATTACAAGGACATACAGAACTTGAAGAGCAGGGTTCAAAAGCAGCAAGTCTTTGAAAAGGAGTTGGCAGTTAATAAGACCCAGCTGGAAAACATACAGAAAACTGGCCAAGAGATGATTGAGGGTGGTCACTATGCCTCTGACAATGTGACCACTCGTCTGAGTGAAGTTGCCAGCCTCTGGGAGGAGTTGCTGGAGGCTACAAAACAGAAAGGGACCCAGTTGCATGAGGCCAACCAGCAGCTGCAATTTGAAAATAATGCAGAAGATTTGCAGCGCTGGCTGGAGGATGTTGAGTGGCAAGTCACCTCTGAGGATTATGGGAAAGGCCTGGCCGAGGTACAGAATCGACTCAGGAAACACGGCCTCCTGGAGTCGGCTGTGGCTGCTCGTCAGGATCAGGTGGATATCCTTACAGACCTGGCTGCATATTTTGAAGAAATAGGCCATCCTGATTCTAAGGATATAAGGGCAAGGCAAGAGTCCTTGGTATGCCGATTTGAAGCTCTGAAAGAGCCACTGGCCACCCGAAAGAAGAAGCTCTTAGACCTTCTCCATCTGCAGCTGATTTGTAGAGACACAGAGGATGAGGAGGCCTGGATCCAAGAGACTGAACCCTCAGCTACTTCCACCTACCTTGGAAAGGACCTGATTGCTTCCAAAAAGCTTCTGAATAGGCATAGAGTCATCCTGGAGAACATTGCCAGCCATGAACCACGCATTCAAGAGATAACAGAAAGGGGAAACAAAATGGTAGAGGAAGGACACTTTGCTGCAGAAGATGTGGCCTCTAGGGTCAAGAGTTTGAACCAGAATATGGAGTCTCTCCGTGCTCGAGCTGCTAGGCGACAAAATGATCTTGAAGCCAATGTCCAGTTCCAGCAGTACCTGGCTGACCTGCATGAAGCAGAAACATGGATCAGAGAGAAGGAACCTATTGTAGATAATACTAACTATGGTGCTGATGAAGAAGCAGCTGGGGCTCTTCTAAAGAAGCATGAGGCCTTTCTATTAGATCTCAATTCATTTGGAGACAGTATGAAAGCTCTGCGGAATCAGGCAAACGCCTGCCAGCAACAACAGGCTGCACCAGTGGAGGGAGTTGCTGGAGAACAAAGGGTCATGGCTTTATATGACTTCCAGGCCCGCAGCCCCCGAGAAGTCACCATGAAGAAAGGTGATGTCTTAACGCTGCTCAGTTCCATCAATAAGGACTGGTGGAAGGTGGAAGCTGCTGATCATCAGGGCATTGTCCCAGCTGTCTATGTCAGAAGACTGGCCCACGATGAGTTCCCGATGCTCCCACAGCGGCGACGAGAAGAGCCAGGAAACATCACCCAGCGCCAGGAGCAGATTGAGAACCAATACCGCTCCCTCTTGGATCGGGCAGAAGAACGCAGACGTCGTCTATTGCAACGTTATAATGAATTTTTATTGGCCTATGAGGCAGGAGACATGCTGGAATGGATTCAAGAGAAAAAGGCAGAAAACACTGGAGTGGAACTAGATGATGTTTGGGAGCTGCAGAAAAAGTTTGATGAGTTCCAAAAGGATTTGAATACCAATGAGCCTCGGCTAAGGGATATCAACAAGGTAGCTGATGATCTACTATTTGAAGGACTTCTAACACCAGAAGGAGCTCAAATCCGGCAGGAATTGAATTCCCGCTGGGGTTCTTTGCAGAGGCTTGCAGATGAACAGCGGCAGCTGCTGGGCAGTGCCCATGCTGTTGAAGTGTTTCACAGAGAAGCAGATGACACGAAGGAGCAGATTGAGAAGAAATGCCAGGCCCTCAGTGCTGCAGACCCTGGCTCAGATCTGTTCAGTGTTCAGGCTCTTCAGCGACGGCATGAGGGCTTTGAAAGGGACCTCGTACCCCTGGGAGATAAGGTGACCATACTGGGGGAGACAGCAGAGCGGCTCAGTGAGTCCCATCCAGATGCCACTGAGGACCTGCAGAGACAGAAAATGGAGCTGAATGAGGCCTGGGAAGACCTGCAGGGGCGTACAAAGGATCGTAAGGAGAGCCTAAATGAGGCCCAGAAATTCTACCTGTTCCTCAGCAAGGCCAGGGATCTGCAGAACTGGATCAGTAGCATTGGTGGCATGGTATCATCACAGGAGCTGGCCGAAGACTTAACTGGCATAGAGATCTTGCTGGAGAGACATCAGGAGCACCGTGCTGACATGGAGGCAGAGGCTCCCACCTTCCAGGCCTTAGAGGACTTCAGTGCAGAACTTATCGACAGTGGGCACCATGCTAGCCCTGAAATTGAAAAAAAGCTTCAAGCTGTCAAGCTAGAGAGAGATGATTTGGAGAAGGCTTGGGAAAAACGCAAGAAGATCCTAGACCAGTGCCTGGAGTTGCAGATGTTCCAGGGGAACTGTGATCAAGTTGAGAGCTGGATGGTGGCACGTGAGAATTCCCTGAGGTCAGATGACAAAAGTTCCTTAGACAGTCTGGAGGCTTTGATGAAGAAACGGGACGATTTGGACAAAGCAATCACTGCCCAGGAAGGGAAGATCACTGACCTAGAACATTTTGCTGAGAGCCTCATTGCTGATGAACACTATGCCAAAGAAGAGATTGCTACGCGGCTCCAACGTGTACTAGACAGGTGGAAGGCTCTCAAAGCACAACTGATTGATGAGCGGACAAAGCTTGGAGACTATGCCAACCTAAAACAATTCTACCGAGACCTTGAGGAGCTGGAAGAATGGATCAGTGAGATGCTGCCCACAGCCTGTGATGAATCCTACAAAGACGCCACTAACATTCAGAGGAAATACCTGAAACACCAGACCTTTGCACATGAAGTCGATGGCCGATCTGAGCAGGTGCATGGCGTCATCAACCTGGGGAACTCCCTGATTGAGTGTAGCGCTTGTGATGGCAATGAAGAGGCCATGAAGGAGCAACTGGAACAGCTGAAGGAACATTGGGATCATCTGCTTGAGAGAACAAATGACAAAGGGAAGAAGCTCAATGAGGCCAGTCGTCAACAGAGGTTCAACACAAGCATCCGGGACTTTGAGTTCTGGCTCTCAGAGGCAGAGACATTGCTGGCCATGAAAGATCAGGCCAGGGACTTGGCTTCAGCAGGAAACCTACTCAAGAAGCATCAGCTATTGGAGAGAGAGATGTTGGCTCGAGAGGATGCACTCAAGGACCTGAATACATTGGCTGAAGATTTGCTCTCCAGCGGGACTTTCAACGTTGATCAGATTGTGAAGAAAAAAGATAATGTCAACAAGCGTTTCCTGAATGTCCAAGAATTGGCAGCTGCACACCACGAAAAATTGAAAGAGGCCTATGCCTTGTTCCAGTTCTTCCAGGATCTAGATGATGAGGAATCCTGGATAGAGGAGAAGTTGATACGAGTGAGCTCCCAGGACTATGGGAGAGATCTTCAGGGGGTTCAGAACTTGCTGAAGAAGCACAAACGCCTAGAGGGGGAGCTGGTGGCCCATGAGCCTGCCATCCAGAATGTGCTGGATATGGCAGAGAAGCTGAAAGACAAGGCTGCTGTGGGGCAAGAGGAGATCCAGTTGCGGCTGGCTCAGTTTGTTGAACACTGGGAGAAGCTCAAAGAGTTGGCCAAGGCCCGAGGACTTAAGTTGGAAGAATCCCTAGAATACTTGCAATTCATGCAGAATGCTGAGGAAGAGGAAGCTTGGATCAATGAAAAGAATGCTTTGGCTGTCCGAGGAGATTGTGGAGATACATTAGCTGCTACTCAGAGCTTGCTAATGAAGCATGAAGCTTTGGAAAATGACTTTGCTGTCCATGAGACCCGAGTACAAAATGTGTGTGCACAAGGAGAAGACATCCTAAATAAGGTGTTGCAGGAGGAAAGTCAGAACAAAGAGATTTCTTCCAAGATAGAGGCTCTGAATGAAAAGACCCCTTCTCTGGCTAAGGCAATAGCTGCTTGGAAGTTGCAATTGGAAGACGATTATGCCTTTCAGGAATTCAACTGGAAGGCTGATGTGGTAGAGGCTTGGATAGCTGATAAGGAAACAAGCCTAAAGACCAATGGCAATGGTGCAGACCTTGGTGACTTCCTCACTCTTCTGGCAAAACAGGACACTCTGGATGCCAGTCTGCAGAGTTTCCAGCAAGAGAGACTTCCCGAGATCACTGACCTGAAGGACAAACTGATTTCTGCTCAACACAACCAGTCTAAAGCCATTGAAGAGCGTTATGCCGCTCTGCTGAAGCGCTGGGAACAGTTGCTGGAAGCCTCGGCAGTCCACAGACAGAAATTGCTGGAGAAACAGCTGCCTCTACAGAAGGCTGAGGACCTGTTCGTGGAATTTGCACATAAGGCTTCAGCTTTGAACAACTGGTGTGAAAAGATGGAAGAAAACTTGTCAGAGCCTGTGCACTGTGTCTCCCTGAATGAAATTCGGCAGCTGCAGAAAGACCATGAGGACTTCTTGGCCTCCCTGGCTAGGGCTCAAGCAGACTTTAAATGTTTGCTGGAGCTAGACCAGCAGATTAAGGCCTTAGGTGTGCCTTCCAGCCCTTATACCTGGTTAACAGTGGAGGTGCTGGAAAGGACCTGGAAGCACCTATCTGACATCATTGAGGAACGGGAGCAGGAGCTGCAAAAGGAAGAGGCAAGACAGGTCAAGAACTTTGAGATGTGTCAGGAGTTTGAACAGAATGCCAGTACCTTCCTTCAATGGATCCTGGAAACCAGGGCTTACTTTCTGGATGGATCATTGCTCAAAGAAACAGGAACTCTGGAATCTCAGCTGGAAGCAAATAAAAGAAAACAGAAGGAGATCCAGGCGATGAAGCGTCAACTAACCAAGATTGTGGACCTGGGGGACAACTTGGAAGACGCTCTGATCCTTGATATCAAATACAGCACCATTGGATTGGCTCAGCAGTGGGACCAGCTCTACCAGCTTGGGTTGCGGATGCAACACAACCTGGAGCAACAGATCCAGGCCAAGGACATCAAAGGTGTGAGTGAAGAGACTCTAAAGGAATTTAGCACAATCTATAAACACTTTGATGAGAATTTGACAGGGCGCCTGACTCACAAAGAGTTCCGGTCCTGCCTGAGAGGACTCAATTACTACTTGCCCATGGTGGAGGAGGATGAACATGAGCCCAAGTTTGAGAAGTTCCTGGATGCTGTGGATCCAGGGAGGAAGGGCTATGTCTCACTGGAGGACTATACTGCTTTCCTGATTGACAAGGAGTCAGAAAACATCAAGTCCAGTGATGAAATAGAGAATGCCTTCCAAGCCCTGGCAGAGGGCAAGTCATATATTACCAAAGAAGACATGAAGCAGGCCCTTACCCCAGAGCAAGTGTCATTCTGTGCCACACATATGCAGCAATATATGGACCCACGGGGTCGAAGCCATCTCTCTGGCTATGACTACGTTGGCTTCACCAATTCCTACTTTGGCAACTAATAAGCAGCTCCTCGTGGATCGTAGAAAATCTTAGTGTCGTGGGAAATTTACTGGGGGGCAAAGAGTACAGGCAAATGTGGAAGATAAAGATGGCCTCGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGCTTGTGTTTGTGTGCATATTACATTTATTGTAGGATCTTAAAAAATCTCAAGGGTGGGAGATAGAAAGGTTAATAGAGTTGGAGGAGTGGAAGCTATTTTGTATGCAACTAGTCACTGCTGAGGGGTGTCAAAGTTTCTATTTTTATTTGTTCTGTTTTGCACGTCTTTATCATTTTGCTTTATTCCGATTATAGAATAAAGTAATTCTTTTTAAAAATATTTTTTGGGGCAAAGTTAAGTAAAATGTTGAGCTTCTATATTTCTGGGAACTGTACTCATATAAGAGTGGGCAGCTAATTTTACTGTAAAGAAGGGCCATGGTATAGTAGATAAATAAAATCCAAGGCAATTTTCAAACAATTTTTTTAAACTTTGGAATGTGTTTAAATTTAAATTTGAAAATAAAGATATTTGATTTTCTGGGG"; + const string aaSequence = + "MEQFPKETVVESSGPKVLETAEEIQERRQEVLTRYQSFKERVAERGQKLEDSYHLQVFKRDADDLGKWIMEKVNILTDKSYEDPTNIQGKYQKHQSLEAEVQTKSRLMSELEKTREERFTMGHSAHEETKAHIEELRHLWDLLLELTLEKGDQLLRALKFQQYVQECADILEWIGDKEAIATSVELGEDWERTEVLHKKFEDFQVELVAKEGRVVEVNQYANECAEENHPDLPLIQSKQNEVNAAWERLRGLALQRQKALSNAANLQRFKRDVTEAIQWIKEKEPVLTSEDYGKDLVASEGLFHSHKGLERNLAVMSDKVKELCAKAEKLTLSHPSDAPQIQEMKEDLVSSWEHIRALATSRYEKLQATYWYHRFSSDFDELSGWMNEKTAAINADELPTDVAGGEVLLDRHQQHKHEIDSYDDRFQSADETGQDLVNANHEASDEVREKMEILDNNWTALLELWDERHRQYEQCLDFHLFYRDSEQVDSWMSRQEAFLENEDLGNSLGSAEALLQKHEDFEEAFTAQEEKIITVDKTATKLIGDDHYDSENIKAIRDGLLARRDALREKAATRRRLLKESLLLQKLYEDSDDLKNWINKKKKLADDEDYKDIQNLKSRVQKQQVFEKELAVNKTQLENIQKTGQEMIEGGHYASDNVTTRLSEVASLWEELLEATKQKGTQLHEANQQLQFENNAEDLQRWLEDVEWQVTSEDYGKGLAEVQNRLRKHGLLESAVAARQDQVDILTDLAAYFEEIGHPDSKDIRARQESLVCRFEALKEPLATRKKKLLDLLHLQLICRDTEDEEAWIQETEPSATSTYLGKDLIASKKLLNRHRVILENIASHEPRIQEITERGNKMVEEGHFAAEDVASRVKSLNQNMESLRARAARRQNDLEANVQFQQYLADLHEAETWIREKEPIVDNTNYGADEEAAGALLKKHEAFLLDLNSFGDSMKALRNQANACQQQQAAPVEGVAGEQRVMALYDFQARSPREVTMKKGDVLTLLSSINKDWWKVEAADHQGIVPAVYVRRLAHDEFPMLPQRRREEPGNITQRQEQIENQYRSLLDRAEERRRRLLQRYNEFLLAYEAGDMLEWIQEKKAENTGVELDDVWELQKKFDEFQKDLNTNEPRLRDINKVADDLLFEGLLTPEGAQIRQELNSRWGSLQRLADEQRQLLGSAHAVEVFHREADDTKEQIEKKCQALSAADPGSDLFSVQALQRRHEGFERDLVPLGDKVTILGETAERLSESHPDATEDLQRQKMELNEAWEDLQGRTKDRKESLNEAQKFYLFLSKARDLQNWISSIGGMVSSQELAEDLTGIEILLERHQEHRADMEAEAPTFQALEDFSAELIDSGHHASPEIEKKLQAVKLERDDLEKAWEKRKKILDQCLELQMFQGNCDQVESWMVARENSLRSDDKSSLDSLEALMKKRDDLDKAITAQEGKITDLEHFAESLIADEHYAKEEIATRLQRVLDRWKALKAQLIDERTKLGDYANLKQFYRDLEELEEWISEMLPTACDESYKDATNIQRKYLKHQTFAHEVDGRSEQVHGVINLGNSLIECSACDGNEEAMKEQLEQLKEHWDHLLERTNDKGKKLNEASRQQRFNTSIRDFEFWLSEAETLLAMKDQARDLASAGNLLKKHQLLEREMLAREDALKDLNTLAEDLLSSGTFNVDQIVKKKDNVNKRFLNVQELAAAHHEKLKEAYALFQFFQDLDDEESWIEEKLIRVSSQDYGRDLQGVQNLLKKHKRLEGELVAHEPAIQNVLDMAEKLKDKAAVGQEEIQLRLAQFVEHWEKLKELAKARGLKLEESLEYLQFMQNAEEEEAWINEKNALAVRGDCGDTLAATQSLLMKHEALENDFAVHETRVQNVCAQGEDILNKVLQEESQNKEISSKIEALNEKTPSLAKAIAAWKLQLEDDYAFQEFNWKADVVEAWIADKETSLKTNGNGADLGDFLTLLAKQDTLDASLQSFQQERLPEITDLKDKLISAQHNQSKAIEERYAALLKRWEQLLEASAVHRQKLLEKQLPLQKAEDLFVEFAHKASALNNWCEKMEENLSEPVHCVSLNEIRQLQKDHEDFLASLARAQADFKCLLELDQQIKALGVPSSPYTWLTVEVLERTWKHLSDIIEEREQELQKEEARQVKNFEMCQEFEQNASTFLQWILETRAYFLDGSLLKETGTLESQLEANKRKQKEIQAMKRQLTKIVDLGDNLEDALILDIKYSTIGLAQQWDQLYQLGLRMQHNLEQQIQAKDIKGVSEETLKEFSTIYKHFDENLTGRLTHKEFRSCLRGLNYYLPMVEEDEHEPKFEKFLDAVDPGRKGYVSLEDYTAFLIDKESENIKSSDEIENAFQALAEGKSYITKEDMKQALTPEQVSFCATHMQQYMDPRGRSHLSGYDYVGFTNSYFGN*"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_003117.2", + "NM_003126.2:c.7257_7258insCT", cdsSequence, aaSequence, 7258, 7257, 2420, "", "X", "CT", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_003117.2:p.(Ter2420LeuextTer12)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Extension_ViaInsertion3() + { + // VID: chr1-175914292-T-TAG (NC_000001.10:g.175914293_175914294insGA), key: NM_001001740.3, expected: NP_001001740.1:p.(Ter708TyrextTer19), actual: NP_001001740.1:p.(Ter708TyrfsTer20) + const string cdsSequence = + "ATGTCTGGTAGCCGCCAGGCCGGGTCGGGCTCCGCTGGGACAAGCCCCGGGTCCTCGGCGGCCTCCTCGGTGACTTCCGCCTCCTCGTCTTTATCCTCTTCCCCGTCGCCGCCTTCCGTGGCGGTTTCGGCGGCAGCGCTGGTGTCCGGCGGGGTGGCCCAGGCCGCCGGCTCGGGCGGCCTCGGGGGCCCGGTGCGGCCTGTGTTGGTGGCGCCCGCCGTATCGGGTAGCGGCGGCGGGGCGGTGTCCACGGGCCTGTCCCGGCACAGCTGCGCGGCCAGGCCCAGCGCCGGCGTAGGAGGCAGCAGCTCCAGCCTAGGCAGCGGCAGCAGGAAGCGACCTCTCCTCGCCCCCCTCTGCAACGGGCTCATCAACTCCTACGAGGACAAAAGCAACGACTTCGTATGCCCCATCTGCTTTGATATGATTGAAGAAGCATACATGACAAAATGTGGCCACAGCTTTTGCTACAAGTGTATTCATCAGAGTTTGGAGGACAATAATAGATGTCCCAAGTGTAACTATGTTGTGGACAATATTGACCATCTGTATCCTAATTTCTTGGTGAATGAACTCATTCTTAAACAGAAGCAAAGATTTGAGGAAAAGAGGTTCAAATTGGACCACTCAAATGGCCACAGGTGGCAGATATTTCAAGATTGGTTGGGAACTGACCAAGATAACCTTGATTTGGCCAATGTCAATCTTATGTTGGAGTTACTAGTGCAGAAGAAGAAACAACTGGAAGCAGAATCACATGCAGCCCAACTACAGATTCTTATGGAATTCCTCAAGGTTGCAAGAAGAAATAAGAGAGAGGAAATGAGTGGCTTATACTCTCCTGTCAGTGAGGATAGCACAGTGCCTCAATTTGAAGCTCCTTCTCCATCACACAGTAGTATTATTGATTCCACAGAATACAGCCAACCTCCAGGTTTCAGTGGCAGTTCTCAGACAAAGAAACAGCCTTGGTATAATAGCACGTTAGCATCAAGACGAAAACGACTTACTGCTCATTTTGAAGACTTGGAGCAGTGTTACTTTTCTACAAGGATGTCTCGTATCTCAGATGACAGTCGAACTGCAAGCCAGTTGGATGAATTTCAGGAATGCTTGTCCAAGTTTACTCGATATAATTCAGTACGACCTTTAGCCACATTGTCATATGCTAGTGATCTCTATAATGGTTCCAGTATAGTCTCTAGTATTGAATTTGACCGGGATTGTGACTATTTTGCGATTGCTGGAGTTACAAAGAAGATTAAAGTCTATGAATATGACACTGTCATCCAGGATGCAGTGGATATTCATTACCCTGAGAATGAAATGACCTGCAATTCGAAAATCAGCTGTATCAGTTGGAGTAGTTACCATAAGAACCTGTTAGCTAGCAGTGATTATGAAGGCACTGTTATTTTATGGGATGGATTCACAGGACAGAGGTCAAAGGTCTATCAGGAGCATGAGAAGAGGTGTTGGAGTGTTGACTTTAATTTGATGGATCCTAAACTCTTGGCTTCAGGTTCTGATGATGCAAAAGTGAAGCTGTGGTCTACCAATCTAGACAACTCAGTGGCAAGCATTGAGGCAAAGGCTAATGTGTGCTGTGTTAAATTCAGCCCCTCTTCCAGATACCATTTGGCTTTCGGCTGTGCAGATCACTGTGTCCACTACTATGATCTTCGTAACACTAAACAGCCAATCATGGTATTCAAAGGACACCGTAAAGCAGTCTCTTATGCAAAGTTTGTGAGTGGTGAGGAAATTGTCTCTGCCTCAACAGACAGTCAGCTAAAACTGTGGAATGTAGGGAAACCATACTGCCTACGTTCCTTCAAGGGTCATATCAATGAAAAAAACTTTGTAGGCCTGGCTTCCAATGGAGATTATATAGCTTGTGGAAGTGAAAATAACTCTCTCTACCTGTACTATAAAGGACTTTCTAAGACTTTGCTAACTTTTAAGTTTGATACAGTCAAAAGTGTTCTCGACAAAGACCGAAAAGAAGATGATACAAATGAATTTGTTAGTGCTGTGTGCTGGAGGGCACTACCAGATGGGGAGTCCAATGTGCTGATTGCTGCTAACAGTCAGGGTACAATTAAGGTGCTAGAATTGGTATGAAGGGTTAACTCAAGTCAAATTGTACTTGATCCTGCTGAAATACATCTGCAGCTGACAATGAGAGAAGAAACAGAAAATGTCATGTGATGTCTCTCCCCAAAGTCATCATGGGTTTTGGATTTGTTTTGAATATTTTTTTCTTTTTTTCTTTTCCCTCCTTTATGACCTTTGGGACATTGGGAATACCCAGCCAACTCTCCACCATCAATGTAACTCCATGGACATTGCTGCTCTTGGTGGTGTTATCTAATTTTTGTGATAGGGAAACAAATTCTTTTGAATAAAAATAAATAACAAAACAATAAAAGTTTATTGAGCCACAGTTGAAAAAAAAAAAAAAAAAA"; + const string aaSequence = + "MSGSRQAGSGSAGTSPGSSAASSVTSASSSLSSSPSPPSVAVSAAALVSGGVAQAAGSGGLGGPVRPVLVAPAVSGSGGGAVSTGLSRHSCAARPSAGVGGSSSSLGSGSRKRPLLAPLCNGLINSYEDKSNDFVCPICFDMIEEAYMTKCGHSFCYKCIHQSLEDNNRCPKCNYVVDNIDHLYPNFLVNELILKQKQRFEEKRFKLDHSNGHRWQIFQDWLGTDQDNLDLANVNLMLELLVQKKKQLEAESHAAQLQILMEFLKVARRNKREEMSGLYSPVSEDSTVPQFEAPSPSHSSIIDSTEYSQPPGFSGSSQTKKQPWYNSTLASRRKRLTAHFEDLEQCYFSTRMSRISDDSRTASQLDEFQECLSKFTRYNSVRPLATLSYASDLYNGSSIVSSIEFDRDCDYFAIAGVTKKIKVYEYDTVIQDAVDIHYPENEMTCNSKISCISWSSYHKNLLASSDYEGTVILWDGFTGQRSKVYQEHEKRCWSVDFNLMDPKLLASGSDDAKVKLWSTNLDNSVASIEAKANVCCVKFSPSSRYHLAFGCADHCVHYYDLRNTKQPIMVFKGHRKAVSYAKFVSGEEIVSASTDSQLKLWNVGKPYCLRSFKGHINEKNFVGLASNGDYIACGSENNSLYLYYKGLSKTLLTFKFDTVKSVLDKDRKEDDTNEFVSAVCWRALPDGESNVLIAANSQGTIKVLELV*"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_001001740.1", + "NM_001001740.3:c.2120_2121insCT", cdsSequence, aaSequence, 2121, 2120, 707, "V", "VX", "CT", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_001001740.1:p.(Ter708TyrextTer19)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Extension_ViaSnv() + { + // VID: chr1-40363046-A-C (NC_000001.10:g.40363046A>C), key: NM_001033081.2, expected: NP_001028253.1:p.(Ter365GluextTer18), actual: NP_001028253.1:p.(Ter365Glu) + const string cdsSequence = + "ATGGACTACGACTCGTACCAGCACTATTTCTACGACTATGACTGCGGGGAGGATTTCTACCGCTCCACGGCGCCCAGCGAGGACATCTGGAAGAAATTCGAGCTGGTGCCATCGCCCCCCACGTCGCCGCCCTGGGGCTTGGGTCCCGGCGCAGGGGACCCGGCCCCCGGGATTGGTCCCCCGGAGCCGTGGCCCGGAGGGTGCACCGGAGACGAAGCGGAATCCCGGGGCCACTCGAAAGGCTGGGGCAGGAACTACGCCTCCATCATACGCCGTGACTGCATGTGGAGCGGCTTCTCGGCCCGGGAACGGCTGGAGAGAGCTGTGAGCGACCGGCTCGCTCCTGGCGCGCCCCGGGGGAACCCGCCCAAGGCGTCCGCCGCCCCGGACTGCACTCCCAGCCTCGAAGCCGGCAACCCGGCGCCCGCCGCCCCCTGTCCGCTGGGCGAACCCAAGACCCAGGCCTGCTCCGGGTCCGAGAGCCCAAGCGACTCGGAGAATGAAGAAATTGATGTTGTGACAGTAGAGAAGAGGCAGTCTCTGGGTATTCGGAAGCCGGTCACCATCACGGTGCGAGCAGACCCCCTGGATCCCTGCATGAAGCATTTCCACATCTCCATCCATCAGCAACAGCACAACTATGCTGCCCGTTTTCCTCCAGAAAGCTGCTCCCAAGAAGAGGCTTCAGAGAGGGGTCCCCAAGAAGAGGTTCTGGAGAGAGATGCTGCAGGGGAAAAGGAAGATGAGGAGGATGAAGAGATTGTGAGTCCCCCACCTGTAGAAAGTGAGGCTGCCCAGTCCTGCCACCCCAAACCTGTCAGTTCTGATACTGAGGATGTGACCAAGAGGAAGAATCACAACTTCCTGGAGCGCAAGAGGCGGAATGACCTGCGTTCGCGATTCTTGGCGCTGAGGGACCAGGTGCCCACCCTGGCCAGCTGCTCCAAGGCCCCCAAAGTAGTGATCCTAAGCAAGGCCTTGGAATACTTGCAAGCCCTGGTGGGGGCTGAGAAGAGGATGGCTACAGAGAAAAGACAGCTCCGATGCCGGCAGCAGCAGTTGCAGAAAAGAATTGCATACCTCACTGGCTACTAACTGACCAAAAAGCCTGACAGTTCTGTCTTACGAAGACACAAGTTTATTTTTTAACCTCCCTCTCCCCTTTAGTAATTTGCACATTTTGGTTATGGTGGGACAGTCTGGACAGTAGATCCCAGAATGCATTGCAGCCGGTGCACACACAATAAAGGCTTGCATTCTTGGAAACCTTGAAACCCAGCTCTCCCTCTTCCCTGACTCATGGGAGTGCTGTATGTTCTCTGGCGCCTTTGGCTTCCCAGCAGGCAGCTGACTGAGGAGCCTTGGGGTCTGCCTAGCTCACTAGCTCTGAAGAAAAGGCTGACAGATGCTATGCAACAGGTGGTGGATGTTGTCAGGGGCTCCAGCCTGCATGAAATCTCACACTCTGCATGAGCTTTAGGCTAGGAAAGGATGCTCCCAACTGGTGTCTCTGGGGTGATGCAAGGACAGCTGGGCCTGGATGCTCTCCCTGAGGCTCCTTTTTCCAGAAGACACACGAGCTGTCTTGGGTGAAGACAAGCTTGCAGACTTGATCAACATTGACCATTACCTCACTGTCAGACACTTTACAGTAGCCAAGGAGTTGGAAACCTTTATATATTATGATGTTAGCTGACCCCCTTCCTCCCACTCCCAATGCTGCGACCCTGGGAACACTTAAAAAGCTTGGCCTCTAGATTCTTTGTCTCAGAGCCCTCTGGGCTCTCTCCTCTGAGGGAGGGACCTTTCTTTCCTCACAAGGGACTTTTTTGTTCCATTATGCCTTGTTATGCAATGGGCTCTACAGCACCCTTTCCCACAGGTCAGAAATATTTCCCCAAGACACAGGGAAATCGGTCCTAGCCTGGGGCCTGGGGATAGCTTGGAGTCCTGGCCCATGAACTTGATCCCTGCCCAGGTGTTTTCCGAGGGGCACTTGAGGCCCAGTCTTTTCTCAAGGCAGGTGTAAGACACCTCAGAGGGAGAACTGTACTGCTGCCTCTTTCCCACCTGCCTCATCTCAATCCTTGAGCGGCAAGTTTGAAGTTCTTCTGGAACCATGCAAATCTGTCCTCCTCATGCAATTCCAAGGAGCTTGCTGGCTCTGCAGCCACCCTTGGGCCCCTTCCAGCCTGCCATGAATCAGATATCTTTCCCAGAATCTGGGCGTTTCTGAAGTTTTGGGGAGAGCTGTTGGGACTCATCCAGTGCTCCAGAAGGTGGACTTGCTTCTGGTGGGTTTTAAAGGAGCCTCCAGGAGATATGCTTAGCCAACCATGATGGATTTTACCCCAGCTGGACTCGGCAGCTCCAAGTGGAATCCACGTGCAGCTTCTAGTCTGGGAAAGTCACCCAACCTAGCAGTTGTCATGTGGGTAACCTCAGGCACCTCTAAGCCTGTCCTGGAAGAAGGACCAGCAGCCCCTCCAGAACTCTGCCCAGGACAGCAGGTGCCTGCTGGCTCTGGGTTTGGAAGTTGGGGTGGGTAGGGGGTGGTAAGTACTATATATGGCTCTGGAAAACCAGCTGCTACTTCCAAATCTATTGTCCATAATGGTTTCTTTCTGAGGTTGCTTCTTGGCCTCAGAGGACCCCAGGGGATGTTTGGAAATAGCCTCTCTACCCTTCTGGAGCATGGTTTACAAAAGCCAGCTGACTTCTGGAATTGTCTATGGAGGACAGTTTGGGTGTAGGTTACTGATGTCTCAACTGAATAGCTTGTGTTTTATAAGCTGCTGTTGGCTATTATGCTGGGGGAGTCTTTTTTTTTTATATTGTATTTTTGTATGCCTTTTGCAAAGTGGTGTTAACTGTTTTTGTACAAGGAAAAAAACTCTTGGGGCAATTTCCTGTTGCAAGGGTCTGATTTATTTTGAAAGGCAAGTTCACCTGAAATTTTGTATTTAGTTGTGATTACTGATTGCCTGATTTTAAAATGTTGCCTTCTGGGACATCTTCTAATAAAAGATTTCTCAAACATGTC"; + const string aaSequence = + "MDYDSYQHYFYDYDCGEDFYRSTAPSEDIWKKFELVPSPPTSPPWGLGPGAGDPAPGIGPPEPWPGGCTGDEAESRGHSKGWGRNYASIIRRDCMWSGFSARERLERAVSDRLAPGAPRGNPPKASAAPDCTPSLEAGNPAPAAPCPLGEPKTQACSGSESPSDSENEEIDVVTVEKRQSLGIRKPVTITVRADPLDPCMKHFHISIHQQQHNYAARFPPESCSQEEASERGPQEEVLERDAAGEKEDEEDEEIVSPPPVESEAAQSCHPKPVSSDTEDVTKRKNHNFLERKRRNDLRSRFLALRDQVPTLASCSKAPKVVILSKALEYLQALVGAEKRMATEKRQLRCRQQQLQKRIAYLTGY*"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_001028253.1", + "NM_001033081.2:c.1093T>G", cdsSequence, aaSequence, 1093, 1093, 365, "*", "E", "G", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_001028253.1:p.(Ter365GluextTer18)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Frameshift_ViaDeletion() + { + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("ENSP00000343890.4", + "ENST00000343938.4:c.10_11del", CdsSequence, AaSequence, 10, 11, 4, "S", "X", "", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("ENSP00000343890.4:p.(Ser4GlyfsTer19)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Frameshift_ViaDeletion2() + { + // VID: chr1-156851289-CAC-C (NC_000001.10:g.156851290_156851291del), key: NM_001007792.1, expected: NP_001007793.1:p.(Arg714CysfsTer117), actual: NP_001007793.1:p.(Arg714CysfsTer?) + const string cdsSequence = + "ATGAAGGAGGCCGCCCTCATCTGCCTGGCACCCTCTGTACCCCCGATCTTGACGGTGAAGTCCTGGGACACCATGCAGTTGCGGGCTGCTAGATCTCGGTGCACAAACTTGTTGGCAGCAAGCTACATCGAGAACCAGCAGCATCTGCAGCATCTGGAGCTCCGTGATCTGAGGGGCCTGGGGGAGCTGAGAAACCTCACCATCGTGAAGAGTGGTCTCCGTTTCGTGGCGCCAGATGCCTTCCATTTCACTCCTCGGCTCAGTCGCCTGAATCTCTCCTTCAACGCTCTGGAGTCTCTCTCCTGGAAAACTGTGCAGGGCCTCTCCTTACAGGAACTGGTCCTGTCGGGGAACCCTCTGCACTGTTCTTGTGCCCTGCGCTGGCTACAGCGCTGGGAGGAGGAGGGACTGGGCGGAGTGCCTGAACAGAAGCTGCAGTGTCATGGGCAAGGGCCCCTGGCCCACATGCCCAATGCCAGCTGTGGTGTGCCCACGCTGAAGGTCCAGGTGCCCAATGCCTCGGTGGATGTGGGGGACGACGTGCTGCTGCGGTGCCAGGTGGAGGGGCGGGGCCTGGAGCAGGCCGGCTGGATCCTCACAGAGCTGGAGCAGTCAGCCACGGTGATGAAATCTGGGGGTCTGCCATCCCTGGGGCTGACCCTGGCCAATGTCACCAGTGACCTCAACAGGAAGAACGTGACGTGCTGGGCAGAGAACGATGTGGGCCGGGCAGAGGTCTCTGTTCAGGTCAACGTCTCCTTCCCGGCCAGTGTGCAGCTGCACACGGCGGTGGAGATGCACCACTGGTGCATCCCCTTCTCTGTGGATGGGCAGCCGGCACCGTCTCTGCGCTGGCTCTTCAATGGCTCCGTGCTCAATGAGACCAGCTTCATCTTCACTGAGTTCCTGGAGCCGGCAGCCAATGAGACCGTGCGGCACGGGTGTCTGCGCCTCAACCAGCCCACCCACGTCAACAACGGCAACTACACGCTGCTGGCTGCCAACCCCTTCGGCCAGGCCTCCGCCTCCATCATGGCTGCCTTCATGGACAACCCTTTCGAGTTCAACCCCGAGGACCCCATCCCTGACACTAACAGCACATCTGGAGACCCGGTGGAGAAGAAGGACGAAACACCTTTTGGGGTCTCGGTGGCTGTGGGCCTGGCCGTCTTTGCCTGCCTCTTCCTTTCTACGCTGCTCCTTGTGCTCAACAAATGTGGACGGAGAAACAAGTTTGGGATCAACCGCCCGGCTGTGCTGGCTCCAGAGGATGGGCTGGCCATGTCCCTGCATTTCATGACATTGGGTGGCAGCTCCCTGTCCCCCACCGAGGGCAAAGGCTCTGGGCTCCAAGGCCACATCATCGAGAACCCACAATACTTCAGTGATGCCTGTGTTCACCACATCAAGCGCCGGGACATCGTGCTCAAGTGGGAGCTGGGGGAGGGCGCCTTTGGGAAGGTCTTCCTTGCTGAGTGCCACAACCTCCTGCCTGAGCAGGACAAGATGCTGGTGGCTGTCAAGGCACTGAAGGAGGCGTCCGAGAGTGCTCGGCAGGACTTCCAGCGTGAGGCTGAGCTGCTCACCATGCTGCAGCACCAGCACATCGTGCGCTTCTTCGGCGTCTGCACCGAGGGCCGCCCCCTGCTCATGGTCTTTGAGTATATGCGGCACGGGGACCTCAACCGCTTCCTCCGATCCCATGGACCTGATGCCAAGCTGCTGGCTGGTGGGGAGGATGTGGCTCCAGGCCCCCTGGGTCTGGGGCAGCTGCTGGCCGTGGCTAGCCAGGTCGCTGCGGGGATGGTGTACCTGGCGGGTCTGCATTTTGTGCACCGGGACCTGGCCACACGCAACTGTCTAGTGGGCCAGGGACTGGTGGTCAAGATTGGTGATTTTGGCATGAGCAGGGATATCTACAGCACCGACTATTACCGTGTGGGAGGCCGCACCATGCTGCCCATTCGCTGGATGCCGCCCGAGAGCATCCTGTACCGTAAGTTCACCACCGAGAGCGACGTGTGGAGCTTCGGCGTGGTGCTCTGGGAGATCTTCACCTACGGCAAGCAGCCCTGGTACCAGCTCTCCAACACGGAGGCAATCGACTGCATCACGCAGGGACGTGAGTTGGAGCGGCCACGTGCCTGCCCACCAGAGGTCTACGCCATCATGCGGGGCTGCTGGCAGCGGGAGCCCCAGCAACGCCACAGCATCAAGGATGTGCACGCCCGGCTGCAAGCCCTGGCCCAGGCACCTCCTGTCTACCTGGATGTCCTGGGCTAGGGGGCCGGCCCAGGGGCTGGGAGTGGTTAGCCGGAATACTGGGGCCTGCCCTCAGCATCCCCCATAGCTCCCAGCAGCCCCAGGGTGATCTCAAAGTATCTAATTCACCCTCAGCATGTGGGAAGGGACAGGTGGGGGCTGGGAGTAGAGGATGTTCCTGCTTCTCTAGGCAAGGTCCCGTCATAGCAATTATATTTATTATCCCTTGAAAAAAAAAA"; + const string aaSequence = + "MKEAALICLAPSVPPILTVKSWDTMQLRAARSRCTNLLAASYIENQQHLQHLELRDLRGLGELRNLTIVKSGLRFVAPDAFHFTPRLSRLNLSFNALESLSWKTVQGLSLQELVLSGNPLHCSCALRWLQRWEEEGLGGVPEQKLQCHGQGPLAHMPNASCGVPTLKVQVPNASVDVGDDVLLRCQVEGRGLEQAGWILTELEQSATVMKSGGLPSLGLTLANVTSDLNRKNVTCWAENDVGRAEVSVQVNVSFPASVQLHTAVEMHHWCIPFSVDGQPAPSLRWLFNGSVLNETSFIFTEFLEPAANETVRHGCLRLNQPTHVNNGNYTLLAANPFGQASASIMAAFMDNPFEFNPEDPIPDTNSTSGDPVEKKDETPFGVSVAVGLAVFACLFLSTLLLVLNKCGRRNKFGINRPAVLAPEDGLAMSLHFMTLGGSSLSPTEGKGSGLQGHIIENPQYFSDACVHHIKRRDIVLKWELGEGAFGKVFLAECHNLLPEQDKMLVAVKALKEASESARQDFQREAELLTMLQHQHIVRFFGVCTEGRPLLMVFEYMRHGDLNRFLRSHGPDAKLLAGGEDVAPGPLGLGQLLAVASQVAAGMVYLAGLHFVHRDLATRNCLVGQGLVVKIGDFGMSRDIYSTDYYRVGGRTMLPIRWMPPESILYRKFTTESDVWSFGVVLWEIFTYGKQPWYQLSNTEAIDCITQGRELERPRACPPEVYAIMRGCWQREPQQRHSIKDVHARLQALAQAPPVYLDVLG"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_001007793.1", + "NM_001007792.1:c.2139_2140del", cdsSequence, aaSequence, 2139, 2140, 714, "R", "X", "", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_001007793.1:p.(Arg714CysfsTer117)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Frameshift_IncorrectTranslationPeptideSeq() + { + // VID: chr2-42515387-TAT-T (NC_000002.11:g.42515388_42515389del), key: NM_001145076.1, expected: NP_001138548.1:p.(Val324TyrfsTer2), actual: NP_001138548.1:p.(Ile324TyrfsTer2) + const string cdsSequence = + "ATGGACGGTTTCGCCGGCAGTCTCGATGATAGTATTTCTGCTGCAAGTACTTCTGATGTTCAAGATCGCCTGTCAGCTCTTGAGTCACGAGTTCAGCAACAAGAAGATGAAATCACTGTGCTAAAGGCGGCTTTGGCTGATGTTTTGAGGCGTCTTGCAATCTCTGAAGATCATGTGGCCTCAGTGAAAAAATCAGTCTCAAGTAAAGGCCAACCAAGCCCTCGAGCAGTTATTCCCATGTCCTGTATAACCAATGGAAGTGGTGCAAACAGAAAACCAAGTCATACCAGTGCTGTCTCAATTGCAGGAAAAGAAACTCTTTCATCTGCTGCTAAAAGCATAAAACGACCATCACCAGCTGAAAAGTCACATAATTCTTGGGAAAATTCAGATGATAGCCGTAATAAATTGTCGAAAATACCTTCAACACCCAAATTAATACCAAAAGTTACCAAAACTGCAGACAAGCATAAAGATGTCATCATCAACCAAGAAGGAGAATATATTAAAATGTTTATGCGCGGTCGGCCAATTACCATGTTCATTCCTTCCGATGTTGACAACTATGATGACATCAGAACGGAACTGCCTCCTGAGAAGCTCAAACTGGAGTGGGCATATGGTTATCGAGGAAAGGACTGTAGAGCTAATGTTTACCTTCTTCCGACCGGGGAAATAGTTTATTTCATTGCATCAGTAGTAGTACTATTTAATTATGAGGAGAGAACTCAGCGACACTACCTGGGCCATACAGACTGTGTGAAATGCCTTGCTATACATCCTGACAAAATTAGGATTGCAACTGGACAGATAGCTGGCGTGGATAAAGATGGAAGGCCTCTACAACCCCACGTCAGAGTGTGGGATTCTGTTACTCTATCCACACTGCAGATTATTGGACTTGGCACTTTTGAGCGTGGAGTAGGATGCCTGGATTTTTCAAAAGCAGATTCAGGTGTTCATTTATGTGTTATTGATGACTCCAATGAGCATATGCTTACTGTATGGGACTGGCAGAAGAAAGCAAAAGGAGCAGAAATAAAGACAACAAATGAAGTTGTTTTGGCTGTGGAGTTTCACCCAACAGATGCAAATACCATAATTACATGCGGTAAATCTCATATTTTCTTCTGGACCTGGAGCGGCAATTCACTAACAAGAAAACAGGGAATTTTTGGGAAATATGAAAAGCCAAAATTTGTGCAGTGTTTAGCATTCTTGGGGAATGGAGATGTTCTTACTGGAGACTCAGGTGGAGTCATGCTTATATGGAGCAAAACTACTGTAGAGCCCACACCTGGGAAAGGACCTAAAGGTGTATATCAAATCAGCAAACAAATCAAAGCTCATGATGGCAGTGTGTTCACACTTTGTCAGATGAGAAATGGGATGTTATTAACTGGAGGAGGGAAAGACAGAAAAATAATTCTGTGGGATCATGATCTGAATCCTGAAAGAGAAATAGAGGTTCCTGATCAGTATGGCACAATCAGAGCTGTAGCAGAAGGAAAGGCAGATCAATTTTTAGTAGGCACATCACGAAACTTTATTTTACGAGGAACATTTAATGATGGCTTCCAAATAGAAGTACAGGGTCATACAGATGAGCTTTGGGGTCTTGCCACACATCCCTTCAAAGATTTGCTCTTGACATGTGCTCAGGACAGGCAGGTGTGCCTGTGGAACTCAATGGAACACAGGCTGGAATGGACCAGGCTGGTAGATGAACCAGGACACTGTGCAGATTTTCATCCAAGTGGCACAGTGGTGGCCATAGGAACGCACTCAGGCAGGTGGTTTGTTCTGGATGCAGAAACCAGAGATCTAGTTTCTATCCACACAGACGGGAATGAACAGCTCTCTGTGATGCGCTACTCAATAGATGGTACCTTCCTGGCTGTAGGATCTCATGACAACTTTATTTACCTCTATGTAGTCTCTGAAAATGGAAGAAAATATAGCAGATATGGAAGGTGCACTGGACATTCCAGCTACATCACACACCTTGACTGGTCCCCAGACAACAAGTATATAATGTCTAACTCGGGAGACTATGAAATATTGTACTGGGACATTCCAAATGGCTGCAAACTAATCAGGAATCGATCGGATTGTAAGGACATTGATTGGACGACATATACCTGTGTGCTAGGATTTCAAGTATTTGGTGTCTGGCCAGAAGGATCTGATGGGACAGATATCAATGCACTGGTGCGATCCCACAATAGAAAGGTGATAGCTGTTGCCGATGACTTTTGTAAAGTCCATCTGTTTCAGTATCCCTGCTCCAAAGCAAAGGCTCCCAGTCACAAGTACAGTGCCCACAGCAGCCATGTCACCAATGTCAGTTTTACTCACAATGACAGTCACCTGATATCAACTGGTGGAAAAGACATGAGCATCATTCAGTGGAAACTTGTGGAAAAGTTATCTTTGCCTCAGAATGAGACTGTAGCGGATACTACTCTAACCAAAGCCCCCGTCTCTTCCACTGAAAGTGTCATCCAATCTAATACTCCCACACCGCCTCCTTCTCAGCCCTTAAATGAGACAGCTGAAGAGGAAAGTAGAATAAGCAGTTCTCCCACACTTCTGGAGAACAGCCTGGAACAAACTGTGGAGCCAAGTGAAGACCACAGCGAGGAGGAGAGTGAAGAGGGCAGCGGAGACCTTGGTGAGCCTCTTTATGAAGAGCCATGCAACGAGATAAGCAAGGAGCAGGCCAAAGCCACCCTTCTGGAGGACCAGCAAGACCCTTCGCCCTCGTCCTAACACCCTGGCTTCAGTGCAACTCTTTTCCTTCAGCTGCATGTGATTTTGTGATAAAGTTCAGGTAACAGGATGGGCAGTGATGGAGAATCACTGTTGATTGAGATTTTGGTTTCCATGTGATTTGTTTTCTTCAATAGTCTTATTTTCAGTCTCTCAAATACAGCCAACTTAAAGTTTTAGTTTGGTGTTTATTGAAAATTAACCAAACTTAATACTAGGAGAAGACTGAATCATTAATGATGTCTCACAAATTACTGTGTACCTAAGTGGTGTGATGTAAATACTGGAAACAAAAACAGCAGTTGCATTGATTTTGAAAACAAACCCCCTTGTTATCTGAACATGTTTTCTTCAGGAACAACCAGAGGTATCACAAACACTGTTACTCATCTACTGGCTCAGACTGTACTACTTTTTTTTTTTTTTTTCCTGAAAAAGAAACCAGAAAAAAATGTACTCTTACTGAGATACCCTCTCACCCCAAATGTGTAATGGAAAATTTTTAATTAAGAAAAACTTCAGTTTTGCCAAGTGCAATGGTGTTGCCTTCTTTAAAAAATGCCGTTTTCTTACACTACCAGTGGATGTCCAGACATGCTCTTAGTCTACTAGAGAGGTGCTGCCTTTTCTAAGTCATAATGAGGAACAGTCCCTTAATTTCTTGTGTGCAACTCTGTTTTATCCTAGAACTAAGAGAGCATTGGTTTGTTAAAGAGCTTTCAATGTATATTAAAACCTTCAATACTCAGAAATGATGGATTCCTCCAAGGAGTCCTTTACTAGCCTAAACATTCTCAAATGTTTGAGATTCAAGTGAATGGAAGGAAAACCACATGCCTTTAAAACTAAACTGTAATAATTACCTGGCTAATTTCAGCTAAGCCTTCATCATAATTTGTTCCCTCAGTAATAGGAGAAATATAAATACAGTAAGTTTAGATTATTGAATTGGTGCTTGAAATTTATTGGTTTTGTTGTAATTTTATACAGATTATATGAGGGATAAGATACTCATCAAATTGCAAATTCTTTTTTTTACAGAAGTGTGGGTAACAGTCACAGCAGTTTTTTTTACCAACAGCATACTTAACAGACTTGCTGTGTAGCAGTTTTTTTCTGGTGGAGTTGCTGTAAGTCTTGTAAGTCTAATGTGGCTATCCTACTCTTTTGGGCAATGCATGTATTATGCATTGGAAAGGTATTTTTTTTAAGTTCTGTTGGCTAGCTATGGTTTTCAGTACATTTCCTACTTTAAGAGTAATTACTGACAAATATGTATTTCCTATATGTTTATACTTTGATTATAAAAAAGTATTTTGTTTTGATTTTTTAACTTGCTGCATTGTTTTGATACTTTCTATTTTTTTGGTCAAATCATGTTTAGAAACTTTGGATGAGTTAAGAAGTCTTAAGTATGCAGGCGTTTACGTGATTGTGCCATTCCAAAGTGCATCAGAACTGTCATTCCCTTCTAATATCTTCTCAGGAGTAATACAAATCAGGTATTTCATCATCATTTGGTAATATGAAAACTCCAGTGAACTCCCAAGGACATTTACAACATTTATATTCACACGCTGTATGGAAGGGTGTGGGTGTGTGTGAAGGGGCGAGTGGAGACACTGTGTGTATCTCTAGATAAGAAGATATGCACCACGTTGAAAATACTCAGTGTAGATCTCTATGTGTATAGGTATCTGTATATCTTTCCTTTTGTTTACAACTGTTAAAAAACCTCAAAATAGTTCTCTTCAAAAGAAGAGAGATTCCAAGCAACCCATCTTTCTTCAGTATGTATGTTCTGTACATACTTATCGGAGCGCGCCAGTAAGTATCAGGCATATATATCTGTCTGTTAGCAATGATTATTACATCATCAGATCAGCATGTGCTATACTCCCTGCAAGAAATATACTGACATGAACAGGCAGTTCTTGGAGAAGAAAGAGCATTTCTTTAAGTACCTGGGGAATACAGCTCTCAGTGATCAGCAGGGAGTTTATTTGAGGACATCAGTCACCTTTGGGGTTGCCATGTACAATGAGATTTATAATCATGATACTCTTCGGTGGTAGTTTCAAAAGACACTACTAATACGCAGGAAGCGTTCCAGCTATTTAATGCTGGCAACTACTGTTTAATGGTCAGTTAAATCTGTGATAATGGTTGGAAGTGGGTGGGGTTATGAAATTGTAGATGTTTTTAGAAAAACTTGTGAATGAAAATGAATCCAAGTGTTTCATGTGAAGATGTTGAGCCATTGCTATCATGCATTCCTGTCTCATGGCAGAAAATTTTGAAGATTAAAAAATAAAATAATCAAAATGTTTCCTCTTTCTAAAAAAAAAAAAAAAAAA"; + const string aaSequence = + "MDGFAGSLDDSISAASTSDVQDRLSALESRVQQQEDEITVLKAALADVLRRLAISEDHVASVKKSVSSKGQPSPRAVIPMSCITNGSGANRKPSHTSAVSIAGKETLSSAAKSIKRPSPAEKSHNSWENSDDSRNKLSKIPSTPKLIPKVTKTADKHKDVIINQEGEYIKMFMRGRPITMFIPSDVDNYDDIRTELPPEKLKLEWAYGYRGKDCRANVYLLPTGEIVYFIASVVVLFNYEERTQRHYLGHTDCVKCLAIHPDKIRIATGQIAGVDKDGRPLQPHVRVWDSVTLSTLQIIGLGTFERGVGCLDFSKADSGVHLCVIDDSNEHMLTVWDWQKKAKGAEIKTTNEVVLAVEFHPTDANTIITCGKSHIFFWTWSGNSLTRKQGIFGKYEKPKFVQCLAFLGNGDVLTGDSGGVMLIWSKTTVEPTPGKGPKGVYQISKQIKAHDGSVFTLCQMRNGMLLTGGGKDRKIILWDHDLNPEREIEVPDQYGTIRAVAEGKADQFLVGTSRNFILRGTFNDGFQIEVQGHTDELWGLATHPFKDLLLTCAQDRQVCLWNSMEHRLEWTRLVDEPGHCADFHPSGTVVAIGTHSGRWFVLDAETRDLVSIHTDGNEQLSVMRYSIDGTFLAVGSHDNFIYLYVVSENGRKYSRYGRCTGHSSYITHLDWSPDNKYIMSNSGDYEILYWDIPNGCKLIRNRSDCKDIDWTTYTCVLGFQVFGVWPEGSDGTDINALVRSHNRKVIAVADDFCKVHLFQYPCSKAKAPSHKYSAHSSHVTNVSFTHNDSHLISTGGKDMSIIQWKLVEKLSLPQNETVADTTLTKAPVSSTESVIQSNTPTPPPSQPLNETAEEESRISSSPTLLENSLEQTVEPSEDHSEEESEEGSGDLGEPLYEEPCNEISKEQAKATLLEDQQDPSPSS**"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_001138548.1", + "NM_001145076.1:c.970_971del", cdsSequence, aaSequence, 970, 971, 324, "V", "X", "", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_001138548.1:p.(Val324TyrfsTer2)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Frameshift_SilentBug() + { + // VID: chr1-8073299-TAC-T (NC_000001.10:g.8073300_8073301del), key: NM_018948.3, expected: NP_061821.1:p.(Arg453GlnfsTer33), actual: NM_018948.3:c.1358_1359del(p.(Arg453=)) + const string cdsSequence = + "ATGTCAATAGCAGGAGTTGCTGCTCAGGAGATCAGAGTCCCATTAAAAACTGGATTTCTACATAATGGCCGAGCCATGGGGAATATGAGGAAGACCTACTGGAGCAGTCGCAGTGAGTTTAAAAACAACTTTTTAAATATTGACCCGATAACCATGGCCTACAGTCTGAACTCTTCTGCTCAGGAGCGCCTAATACCACTTGGGCATGCTTCCAAATCTGCTCCGATGAATGGCCACTGCTTTGCAGAAAATGGTCCATCTCAAAAGTCCAGCTTGCCCCCTCTTCTTATTCCCCCAAGTGAAAACTTGGGACCACATGAAGAGGATCAAGTTGTATGTGGTTTTAAGAAACTCACAGTGAATGGGGTTTGTGCTTCCACCCCTCCACTGACACCCATAAAAAACTCCCCTTCCCTTTTCCCCTGTGCCCCTCTTTGTGAACGGGGTTCTAGGCCTCTTCCACCGTTGCCAATCTCTGAAGCCCTCTCTCTGGATGACACAGACTGTGAGGTGGAATTCCTAACTAGCTCAGATACAGACTTCCTTTTAGAAGACTCTACACTTTCTGATTTCAAATATGATGTTCCTGGCAGGCGAAGCTTCCGTGGGTGTGGACAAATCAACTATGCATATTTTGATACCCCAGCTGTTTCTGCAGCAGATCTCAGCTATGTGTCTGACCAAAATGGAGGTGTCCCAGATCCAAATCCTCCTCCACCTCAGACCCACCGAAGATTAAGAAGGTCTCATTCGGGACCAGCTGGCTCCTTTAACAAGCCAGCCATAAGGATATCCAACTGTTGTATACACAGAGCTTCTCCTAACTCCGATGAAGACAAACCTGAGGTTCCCCCCAGAGTTCCCATACCTCCTAGACCAGTAAAGCCAGATTATAGAAGATGGTCAGCAGAAGTTACTTCGAGCACCTATAGTGATGAAGACAGGCCTCCCAAAGTACCGCCAAGAGAACCTTTGTCACCGAGTAACTCGCGCACACCGAGTCCCAAAAGCCTTCCGTCTTACCTCAATGGGGTCATGCCCCCGACACAGAGCTTTGCCCCTGATCCCAAGTATGTCAGCAGCAAAGCACTGCAAAGACAGAACAGCGAAGGATCTGCCAGTAAGGTTCCTTGCATTCTGCCCATTATTGAAAATGGGAAGAAGGTTAGTTCAACACATTATTACCTACTACCTGAACGACCACCATACCTGGACAAATATGAAAAATTTTTTAGGGAAGCAGAAGAAACAAATGGAGGCGCCCAAATCCAGCCATTACCTGCTGACTGCGGTATATCTTCAGCCACAGAAAAGCCAGACTCAAAAACAAAAATGGATCTGGGTGGCCACGTGAAGCGTAAACATTTATCCTATGTGGTTTCTCCTTAGACCTTGGGGTCATGGTTCAGCAGAGGTTACATAGGAGCAAATGGTTCTCAATTTTCCAGTTTGATTGAAGTGCAGAGAAAAATCCCTTAGATTGCAAAATAAAATAGTTGAACTCTCTGTCTTCATGTGGAAGGTTTAGAGCAGTTGTGAGATGCTGTTATGCTGAGAAACCCTGACTTTGTTAGTGTTGGAAAAAAGTCTTACAAGTCTATAATTTAAAGATGTGATGGTGGGGAGGGGAGGATGGGGAAGCTTTTTATATATGCATACATTACATACCTATATATAAACTTGTGGTATAACCATAGACCATAGCTGCAGGTTAACCAATTAGTTACTATCGTAGAGTAATATATATTCAGAATAATAAACTCAAGCTGGAGAAATGAGTCCTGATAGACTGAAAATTGAGCAAATGGAAGAAGATACAGTATTGTTTAGATCAGAATCATTAAAAAATATTTTTGTTTAGTAAGTTTGAAGATTTCTGGCTTTTAGGCCTTTTCTATTTTGTTCCATTTATTTTTGCAGGCAATCTTTTCCATGGAGGGCAGGGTATCCATTCTTTACCATGGGTGTACCTGCTTAGGTTAAAAATCATACCAAGGCCTCATACTTCCAGGTTTCATGTTGCGTCTTGTTGAGGGAGGGAGAGCAGGTTACTTGGCAACCATATTGTCACCTGTACCTGTCACACATCTTGAAAAATAAAACGATAATAGAACTAGTGACTAATTTTCCCTTACAGTTCCTGCTTGGTCCCACCCACTGAAGTAGCTCATCGTAGTGCGGGCCGTATTAGAGGCAGTGGGGTACGTTAGACTCAGATGGAAAAGTATTCTAGGTGCCAGTGTTAGGATGTCAGTTTTACAAAATAATGAAGCAATTAGCTATGTGATTGAGAGTTATTGTTTGGGGATGTGTGTTGTGGTTTTGCTTTTTTTTTTTAGACTGTATTAATAAACATACAACACAAGCTGGCCTTGTGTTGCTGGTTCCTATTCAGTATTTCCTGGGGATTGTTTGCTTTTTAAGTAAAACACTTCTGACCCATAGCTCAGTATGTCTGAATTCCAGAGGTCACATCAGCATCTTTCTGCTTTGAAAACTCTCACAGCTGTGGCTGCTTCACTTAGATGCAGTGAGACACATAGTTGGTGTTCCGATTTTCACATCCTTCCATGTATTTATCTTGAAGAGATAAGCACAGAAGAGAAGGTGCTCACTAACAGAGGTACATTACTGCAATGTTCTCTTAACAGTTAAACAAGCTGTTTACAGTTTAAACTGCTGAATATTATTTGAGCTATTTAAAGCTTATTATATTTTAGTATGAACTAAATGAAGGTTAAAACATGCTTAAGAAAAATGCACTGATTTCTGCATTATGTGTACAGTATTGGACAAAGGATTTTATTCATTTTGTTGCATTATTTTGAATATTGTCTTTTCATTTTAATAAAGTTATAATACTTATTTATGATACCATTAAAAAAAAAAAAAAAA"; + const string aaSequence = + "MSIAGVAAQEIRVPLKTGFLHNGRAMGNMRKTYWSSRSEFKNNFLNIDPITMAYSLNSSAQERLIPLGHASKSAPMNGHCFAENGPSQKSSLPPLLIPPSENLGPHEEDQVVCGFKKLTVNGVCASTPPLTPIKNSPSLFPCAPLCERGSRPLPPLPISEALSLDDTDCEVEFLTSSDTDFLLEDSTLSDFKYDVPGRRSFRGCGQINYAYFDTPAVSAADLSYVSDQNGGVPDPNPPPPQTHRRLRRSHSGPAGSFNKPAIRISNCCIHRASPNSDEDKPEVPPRVPIPPRPVKPDYRRWSAEVTSSTYSDEDRPPKVPPREPLSPSNSRTPSPKSLPSYLNGVMPPTQSFAPDPKYVSSKALQRQNSEGSASKVPCILPIIENGKKVSSTHYYLLPERPPYLDKYEKFFREAEETNGGAQIQPLPADCGISSATEKPDSKTKMDLGGHVKRKHLSYVVSP*"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_061821.1", + "NM_018948.3:c.1358_1359del", cdsSequence, aaSequence, 1358, 1359, 453, "R", "Q", "", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_061821.1:p.(Arg453GlnfsTer33)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Frameshift_ViaInsertion() + { + const string cdsSequence = + "ATGGTGACTGAATTCATTTTTCTGGGTCTCTCTGATTCTCAGGAACTCCAGACCTTCCTATTTATGTTGTTTTTTGTATTCTATGGAGGAATCGTGTTTGGAAACCTTCTTATTGTCATAACAGTGGTATCTGACTCCCACCTTCACTCTCCCATGTACTTCCTGCTAGCCAACCTCTCACTCATTGATCTGTCTCTGTCTTCAGTCACAGCCCCCAAGATGATTACTGACTTTTTCAGCCAGCGCAAAGTCATCTCTTTCAAGGGCTGCCTTGTTCAGATATTTCTCCTTCACTTCTTTGGTGGGAGTGAGATGGTGATCCTCATAGCCATGGGCTTTGACAGATATATAGCAATATGCAAGCCCCTACACTACACTACAATTATGTGTGGCAACGCATGTGTCGGCATTATGGCTGTCACATGGGGAATTGGCTTTCTCCATTCGGTGAGCCAGTTGGCGTTTGCCGTGCACTTACTCTTCTGTGGTCCCAATGAGGTCGATAGTTTTTATTGTGACCTTCCTAGGGTAATCAAACTTGCCTGTACAGATACCTACAGGCTAGATATTATGGTCATTGCTAACAGTGGTGTGCTCACTGTGTGTTCTTTTGTTCTTCTAATCATCTCATACACTATCATCCTAATGACCATCCAGCATCGCCCTTTAGATAAGTCGTCCAAAGCTCTGTCCACTTTGACTGCTCACATTACAGTAGTTCTTTTGTTCTTTGGACCATGTGTCTTTATTTATGCCTGGCCATTCCCCATCAAGTCATTAGATAAATTCCTTGCTGTATTTTATTCTGTGATCACCCCTCTCTTGAACCCAATTATATACACACTGAGGAACAAAGACATGAAGACGGCAATAAGACAGCTGAGAAAATGGGATGCACATTCTAGTGTAAAGTTTTAG"; + const string aaSequence = + "MVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLLFCGPNEVDSFYCDLPRVIKLACTDTYRLDIMVIANSGVLTVCSFVLLIISYTIILMTIQHRPLDKSSKALSTLTAHITVVLLFFGPCVFIYAWPFPIKSLDKFLAVFYSVITPLLNPIIYTLRNKDMKTAIRQLRKWDAHSSVKF"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_001005484.1", + "NM_001005484.1:c.477_478insT", cdsSequence, aaSequence, 478, 477, 160, "", "X", "T", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_001005484.1:p.(Leu160SerfsTer7)", actualResult); + } + + [Fact] + public void GetHgvsProteinAnnotation_Frameshift_ViaInsertion_UnknownNumber() + { + const string cdsSequence = + "ATGGCAAGGCTGGGAAACTGCAGCCTGACTTGGGCTGCCCTGATCATCCTGCTGCTCCCCGGAAGTCTGGAGGAGTGCGGGCACATCAGTGTCTCAGCCCCCATCGTCCACCTGGGGGATCCCATCACAGCCTCCTGCATCATCAAGCAGAACTGCAGCCATCTGGACCCGGAGCCACAGATTCTGTGGAGACTGGGAGCAGAGCTTCAGCCCGGGGGCAGGCAGCAGCGTCTGTCTGATGGGACCCAGGAATCTATCATCACCCTGCCCCACCTCAACCACACTCAGGCCTTTCTCTCCTGCTGCCTGAACTGGGGCAACAGCCTGCAGATCCTGGACCAGGTTGAGCTGCGCGCAGGCTACCCTCCAGCCATACCCCACAACCTCTCCTGCCTCATGAACCTCACAACCAGCAGCCTCATCTGCCAGTGGGAGCCAGGACCTGAGACCCACCTACCCACCAGCTTCACTCTGAAGAGTTTCAAGAGCCGGGGCAACTGTCAGACCCAAGGGGACTCCATCCTGGACTGCGTGCCCAAGGACGGGCAGAGCCACTGCTGCATCCCACGCAAACACCTGCTGTTGTACCAGAATATGGGCATCTGGGTGCAGGCAGAGAATGCGCTGGGGACCAGCATGTCCCCACAACTGTGTCTTGATCCCATGGATGTTGTGAAACTGGAGCCCCCCATGCTGCGGACCATGGACCCCAGCCCTGAAGCGGCCCCTCCCCAGGCAGGCTGCCTACAGCTGTGCTGGGAGCCATGGCAGCCAGGCCTGCACATAAATCAGAAGTGTGAGCTGCGCCACAAGCCGCAGCGTGGAGAAGCCAGCTGGGCACTGGTGGGCCCCCTCCCCTTGGAGGCCCTTCAGTATGAGCTCTGCGGGCTCCTCCCAGCCACGGCCTACACCCTGCAGATACGCTGCATCCGCTGGCCCCTGCCTGGCCACTGGAGCGACTGGAGCCCCAGCCTGGAGCTGAGAACTACCGAACGGGCCCCCACTGTCAGACTGGACACATGGTGGCGGCAGAGGCAGCTGGACCCCAGGACAGTGCAGCTGTTCTGGAAGCCAGTGCCCCTGGAGGAAGACAGCGGACGGATCCAAGGTTATGTGGTTTCTTGGAGACCCTCAGGCCAGGCTGGGGCCATCCTGCCCCTCTGCAACACCACAGAGCTCAGCTGCACCTTCCACCTGCCTTCAGAAGCCCAGGAGGTGGCCCTTGTGGCCTATAACTCAGCCGGGACCTCTCGTCCCACTCCGGTGGTCTTCTCAGAAAGCAGAGGCCCAGCTCTGACCAGACTCCATGCCATGGCCCGAGACCCTCACAGCCTCTGGGTAGGCTGGGAGCCCCCCAATCCATGGCCTCAGGGCTATGTGATTGAGTGGGGCCTGGGCCCCCCCAGCGCGAGCAATAGCAACAAGACCTGGAGGATGGAACAGAATGGGAGAGCCACGGGGTTTCTGCTGAAGGAGAACATCAGGCCCTTTCAGCTCTATGAGATCATCGTGACTCCCTTGTACCAGGACACCATGGGACCCTCCCAGCATGTCTATGCCTACTCTCAAGAAATGGCTCCCTCCCATGCCCCAGAGCTGCATCTAAAGCACATTGGCAAGACCTGGGCACAGCTGGAGTGGGTGCCTGAGCCCCCTGAGCTGGGGAAGAGCCCCCTTACCCACTACACCATCTTCTGGACCAACGCTCAGAACCAGTCCTTCTCCGCCATCCTGAATGCCTCCTCCCGTGGCTTTGTCCTCCATGGCCTGGAGCCCGCCAGTCTGTATCACATCCACCTCATGGCTGCCAGCCAGGCTGGGGCCACCAACAGTACAGTCCTCACCCTGATGACCTTGACCCCAGAGGGGTCGGAGCTACACATCATCCTGGGCCTGTTCGGCCTCCTGCTGTTGCTCACCTGCCTCTGTGGAACTGCCTGGCTCTGTTGCAGCCCCAACAGGAAGAATCCCCTCTGGCCAAGTGTCCCAGACCCAGCTCACAGCAGCCTGGGCTCCTGGGTGCCCACAATCATGGAGGAGGATGCCTTCCAGCTGCCCGGCCTTGGCACGCCACCCATCACCAAGCTCACAGTGCTGGAGGAGGATGAAAAGAAGCCGGTGCCCTGGGAGTCCCATAACAGCTCAGAGACCTGTGGCCTCCCCACTCTGGTCCAGACCTATGTGCTCCAGGGGGACCCAAGAGCAGTTTCCACCCAGCCCCAATCCCAGTCTGGCACCAGCGATCAGGCTGGGCCTCCCAGGCGATCTGCATACTTTAAGGACCAGATCATGCTCCATCCAGCCCCACCCAATGGCCTTTTGTGCTTGTTTCCTATAACTTCAGTATTGTAAACTAGTTTTTGGTTTGCAGTTTTTGTTGTTGTTTATAGACACTCTTGGGTGTAAAAAAAAAAA"; + const string aaSequence = + "MARLGNCSLTWAALIILLLPGSLEECGHISVSAPIVHLGDPITASCIIKQNCSHLDPEPQILWRLGAELQPGGRQQRLSDGTQESIITLPHLNHTQAFLSCCLNWGNSLQILDQVELRAGYPPAIPHNLSCLMNLTTSSLICQWEPGPETHLPTSFTLKSFKSRGNCQTQGDSILDCVPKDGQSHCCIPRKHLLLYQNMGIWVQAENALGTSMSPQLCLDPMDVVKLEPPMLRTMDPSPEAAPPQAGCLQLCWEPWQPGLHINQKCELRHKPQRGEASWALVGPLPLEALQYELCGLLPATAYTLQIRCIRWPLPGHWSDWSPSLELRTTERAPTVRLDTWWRQRQLDPRTVQLFWKPVPLEEDSGRIQGYVVSWRPSGQAGAILPLCNTTELSCTFHLPSEAQEVALVAYNSAGTSRPTPVVFSESRGPALTRLHAMARDPHSLWVGWEPPNPWPQGYVIEWGLGPPSASNSNKTWRMEQNGRATGFLLKENIRPFQLYEIIVTPLYQDTMGPSQHVYAYSQEMAPSHAPELHLKHIGKTWAQLEWVPEPPELGKSPLTHYTIFWTNAQNQSFSAILNASSRGFVLHGLEPASLYHIHLMAASQAGATNSTVLTLMTLTPEGSELHIILGLFGLLLLLTCLCGTAWLCCSPNRKNPLWPSVPDPAHSSLGSWVPTIMEEDAFQLPGLGTPPITKLTVLEEDEKKPVPWESHNSSETCGLPTLVQTYVLQGDPRAVSTQPQSQSGTSDQAGPPRRSAYFKDQIMLHPAPPNGLLCLFPITSVL*"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_758519.1", + "NM_172313.2:c.2344_2345insCT", cdsSequence, aaSequence, 2345, 2344, 782, "V", "AX", "CT", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_758519.1:p.(Val782AlafsTer?)", actualResult); + } + + + [Fact] + public void GetHgvsProteinAnnotation_Frameshift_ViaInsertion_SubstitutionBug() + { + // VID: chr1-8073289-G-GAG (NC_000001.10:g.8073289_8073290dup), key: NM_018948.3, expected: NP_061821.1:p.(Tyr458ProfsTer18), actual: NP_061821.1:p.(Ser457Phe) + const string cdsSequence = + "ATGTCAATAGCAGGAGTTGCTGCTCAGGAGATCAGAGTCCCATTAAAAACTGGATTTCTACATAATGGCCGAGCCATGGGGAATATGAGGAAGACCTACTGGAGCAGTCGCAGTGAGTTTAAAAACAACTTTTTAAATATTGACCCGATAACCATGGCCTACAGTCTGAACTCTTCTGCTCAGGAGCGCCTAATACCACTTGGGCATGCTTCCAAATCTGCTCCGATGAATGGCCACTGCTTTGCAGAAAATGGTCCATCTCAAAAGTCCAGCTTGCCCCCTCTTCTTATTCCCCCAAGTGAAAACTTGGGACCACATGAAGAGGATCAAGTTGTATGTGGTTTTAAGAAACTCACAGTGAATGGGGTTTGTGCTTCCACCCCTCCACTGACACCCATAAAAAACTCCCCTTCCCTTTTCCCCTGTGCCCCTCTTTGTGAACGGGGTTCTAGGCCTCTTCCACCGTTGCCAATCTCTGAAGCCCTCTCTCTGGATGACACAGACTGTGAGGTGGAATTCCTAACTAGCTCAGATACAGACTTCCTTTTAGAAGACTCTACACTTTCTGATTTCAAATATGATGTTCCTGGCAGGCGAAGCTTCCGTGGGTGTGGACAAATCAACTATGCATATTTTGATACCCCAGCTGTTTCTGCAGCAGATCTCAGCTATGTGTCTGACCAAAATGGAGGTGTCCCAGATCCAAATCCTCCTCCACCTCAGACCCACCGAAGATTAAGAAGGTCTCATTCGGGACCAGCTGGCTCCTTTAACAAGCCAGCCATAAGGATATCCAACTGTTGTATACACAGAGCTTCTCCTAACTCCGATGAAGACAAACCTGAGGTTCCCCCCAGAGTTCCCATACCTCCTAGACCAGTAAAGCCAGATTATAGAAGATGGTCAGCAGAAGTTACTTCGAGCACCTATAGTGATGAAGACAGGCCTCCCAAAGTACCGCCAAGAGAACCTTTGTCACCGAGTAACTCGCGCACACCGAGTCCCAAAAGCCTTCCGTCTTACCTCAATGGGGTCATGCCCCCGACACAGAGCTTTGCCCCTGATCCCAAGTATGTCAGCAGCAAAGCACTGCAAAGACAGAACAGCGAAGGATCTGCCAGTAAGGTTCCTTGCATTCTGCCCATTATTGAAAATGGGAAGAAGGTTAGTTCAACACATTATTACCTACTACCTGAACGACCACCATACCTGGACAAATATGAAAAATTTTTTAGGGAAGCAGAAGAAACAAATGGAGGCGCCCAAATCCAGCCATTACCTGCTGACTGCGGTATATCTTCAGCCACAGAAAAGCCAGACTCAAAAACAAAAATGGATCTGGGTGGCCACGTGAAGCGTAAACATTTATCCTATGTGGTTTCTCCTTAGACCTTGGGGTCATGGTTCAGCAGAGGTTACATAGGAGCAAATGGTTCTCAATTTTCCAGTTTGATTGAAGTGCAGAGAAAAATCCCTTAGATTGCAAAATAAAATAGTTGAACTCTCTGTCTTCATGTGGAAGGTTTAGAGCAGTTGTGAGATGCTGTTATGCTGAGAAACCCTGACTTTGTTAGTGTTGGAAAAAAGTCTTACAAGTCTATAATTTAAAGATGTGATGGTGGGGAGGGGAGGATGGGGAAGCTTTTTATATATGCATACATTACATACCTATATATAAACTTGTGGTATAACCATAGACCATAGCTGCAGGTTAACCAATTAGTTACTATCGTAGAGTAATATATATTCAGAATAATAAACTCAAGCTGGAGAAATGAGTCCTGATAGACTGAAAATTGAGCAAATGGAAGAAGATACAGTATTGTTTAGATCAGAATCATTAAAAAATATTTTTGTTTAGTAAGTTTGAAGATTTCTGGCTTTTAGGCCTTTTCTATTTTGTTCCATTTATTTTTGCAGGCAATCTTTTCCATGGAGGGCAGGGTATCCATTCTTTACCATGGGTGTACCTGCTTAGGTTAAAAATCATACCAAGGCCTCATACTTCCAGGTTTCATGTTGCGTCTTGTTGAGGGAGGGAGAGCAGGTTACTTGGCAACCATATTGTCACCTGTACCTGTCACACATCTTGAAAAATAAAACGATAATAGAACTAGTGACTAATTTTCCCTTACAGTTCCTGCTTGGTCCCACCCACTGAAGTAGCTCATCGTAGTGCGGGCCGTATTAGAGGCAGTGGGGTACGTTAGACTCAGATGGAAAAGTATTCTAGGTGCCAGTGTTAGGATGTCAGTTTTACAAAATAATGAAGCAATTAGCTATGTGATTGAGAGTTATTGTTTGGGGATGTGTGTTGTGGTTTTGCTTTTTTTTTTTAGACTGTATTAATAAACATACAACACAAGCTGGCCTTGTGTTGCTGGTTCCTATTCAGTATTTCCTGGGGATTGTTTGCTTTTTAAGTAAAACACTTCTGACCCATAGCTCAGTATGTCTGAATTCCAGAGGTCACATCAGCATCTTTCTGCTTTGAAAACTCTCACAGCTGTGGCTGCTTCACTTAGATGCAGTGAGACACATAGTTGGTGTTCCGATTTTCACATCCTTCCATGTATTTATCTTGAAGAGATAAGCACAGAAGAGAAGGTGCTCACTAACAGAGGTACATTACTGCAATGTTCTCTTAACAGTTAAACAAGCTGTTTACAGTTTAAACTGCTGAATATTATTTGAGCTATTTAAAGCTTATTATATTTTAGTATGAACTAAATGAAGGTTAAAACATGCTTAAGAAAAATGCACTGATTTCTGCATTATGTGTACAGTATTGGACAAAGGATTTTATTCATTTTGTTGCATTATTTTGAATATTGTCTTTTCATTTTAATAAAGTTATAATACTTATTTATGATACCATTAAAAAAAAAAAAAAAA"; + const string aaSequence = + "MSIAGVAAQEIRVPLKTGFLHNGRAMGNMRKTYWSSRSEFKNNFLNIDPITMAYSLNSSAQERLIPLGHASKSAPMNGHCFAENGPSQKSSLPPLLIPPSENLGPHEEDQVVCGFKKLTVNGVCASTPPLTPIKNSPSLFPCAPLCERGSRPLPPLPISEALSLDDTDCEVEFLTSSDTDFLLEDSTLSDFKYDVPGRRSFRGCGQINYAYFDTPAVSAADLSYVSDQNGGVPDPNPPPPQTHRRLRRSHSGPAGSFNKPAIRISNCCIHRASPNSDEDKPEVPPRVPIPPRPVKPDYRRWSAEVTSSTYSDEDRPPKVPPREPLSPSNSRTPSPKSLPSYLNGVMPPTQSFAPDPKYVSSKALQRQNSEGSASKVPCILPIIENGKKVSSTHYYLLPERPPYLDKYEKFFREAEETNGGAQIQPLPADCGISSATEKPDSKTKMDLGGHVKRKHLSYVVSP*"; + string actualResult = HgvsProtein.GetHgvsProteinAnnotation("NP_061821.1", + "NM_018948.3:c.1369_1370dup", cdsSequence, aaSequence, 1371, 1370, 457, "S", "SX", "TC", false, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NP_061821.1:p.(Tyr458ProfsTer18)", actualResult); + } + + [Fact] + public void GetAltPeptideSequence_SyntheticExample_ExpectedResults() + { + const string cdsSequence = "AATAGGGGGGGGCGTTGCTAACGCC"; + const string altAllele = "TTCG"; + string actualResult = HgvsProtein.GetAltPeptideSequence(cdsSequence, 5, 12, altAllele, + AminoAcidCommon.StandardAminoAcids); + Assert.Equal("NIRVANA", actualResult); + } + + [Fact] + public void GetAltPeptideSequence_RealExample_ExpectedResults() + { + const string cdnaSequence = + "GAGGGCGGGGCGAGGGCGGGGCGGTGGGCGGGGACGGGGCCCGCACGGCGGCTACGGCCTAGGTGAGCGGCTCGGACTCGGCGGCCGCACCTGCCCAACCCAACCCGCACGGTCCGGAAGTCGCCGAGGGGCCGGGAGCGGGAGGGGACGTCGTCCTAGAGGGCCGGAGCGGGCGGGCGGCCGAGGACCCGGCTCCCGCGCAGGACGGAGCCGTGGCTCAGGTCGGCCCCTCCCCAACACCACCCCGGGCCTCCGCCCCTTCCTGGGCCTCTCGGTGGAGCAGGGACCCGAACCGGTGCCCATCCAGTCCGGTGCCATCTGAAGCCCCCTTCCCAGAAAATGAGCCACAGAGCAAGCTGACCCCAGCGACACAGCCCCCCAGCCCTACTGTATTTCCGTTCCTATCAAAAAATGGATGACTCGGAGACAGGTTTCAATCTGAAAGTCGTCCTGGTCAGTTTCAAGCAGTGTCTCGATGAGAAGGAAGAGGTCTTGCTGGACCCCTACATTGCCAGCTGGAAGGGCCTGGTCAGGTTTCTGAACAGCCTGGGCACCATCTTCTCATTCATCTCCAAGGACGTGGTCTCCAAGCTGCGGATCATGGAGCGCCTCAGGGGCGGCCCGCAGAGCGAGCACTACCGCAGCCTGCAGGCCATGGTGGCCCACGAGCTGAGCAACCGGCTGGTGGACCTGGAGCGCCGCTCCCACCACCCGGAGTCTGGCTGCCGGACGGTGCTGCGCCTGCACCGCGCCCTGCACTGGCTGCAGCTGTTCCTGGAGGGCCTGCGTACCAGCCCCGAGGACGCACGCACCTCCGCGCTCTGCGCCGACTCCTACAACGCCTCGCTGGCCGCCTACCACCCCTGGGTCGTGCGCCGCGCCGTCACCGTGGCCTTCTGCACGCTGCCCACACGCGAGGTCTTCCTGGAGGCCATGAACGTGGGGCCCCCGGAGCAGGCCGTGCAGATGCTAGGCGAGGCCCTCCCCTTCATCCAGCGTGTCTACAACGTCTCCCAGAAGCTCTACGCCGAGCACTCCCTGCTGGACCTGCCCTAGGGGCGGGAAGCCAGGGCCGCACCGGCTTTCCTGCTGCAGATCTGGGCTGCGGTGGCCAGGGCCGTGAGTCCCGTGGCAGAGCCTTCTGGGCGCTGCGGGAACAGGAGATCCTCTGTCGCCCCTGTGAGCTGAGCTGGTTAGGAACCACAGACTGTGACAGAGAAGGTGGCGACCAGCCCAGAAGAGGCCCACCCTCTCGGTCCGGAACAAGACGCCTCGGCCACGGCTCCCCCTCGGCCTATTACACGCGTGCGCAGCCAGGCCTCGCCAGGGTGCGGTGCAGAGCAGAGCAGGCAGGGGTGGGGGCCGGGCCTGCAAGAGCCCGAAAGGTCGCCACCCCCTAGCCTGTGGGGTGCATCTGCGAACCAGGGTGAAGTCACAGGTCCCGGGGTGTGGAGGCTCCATCCTTTCTCCTTTCTGCCAGCCGATGTGTCCTCATCTCAGGCCCGTGCCTGGGACCCCGTGTCTGCCCAGGTGGGCAGCCTTGAGCCCAGGGGACTCAGTGCCCTCCATGCCCTGGCTGGCAGAAACCCTCAACAGCAGTCTGGGCACTGTGGGGCTCTCCCCGCCTCTCCTGCCTTGTTTGCCCCTCAGCGTGCCAGGCAGACTGGGGGCAGGACAGCCGGAAGCTGAGACCAAGGCTCCTCACAGAAGGGCCCAGGAAGTCCCCGCCCTTGGGACAGCCTCCTCCGTAGCCCCTGCACGGCACCAGTTCCCCGAGGGACGCAGCAGGCCGCCTCCCGCAGCGGCCGTGGGTCTGCACAGCCCAGCCCAGCCCAAGGCCCCCAGGAGCTGGGACTCTGCTACACCCAGTGAAATGCTGTGTCCCTTCTCCCCCGTGCCCCTTGATGCCCCCTCCCCACAGTGCTCAGGAGACCCGTGGGGCACGGAACAGGAGGGTCTGGACCCTGTGGCCCAGCCAAAGGCTACCAGACAGCCACAACCAGCCCAGCCACCATCCAGTGCCTGGGGCCTGGCCACTGGCTCTTCACAGTGGACCCCAGCACCTCGGGGTGGCAGAGGGACGGCCCCCACGGCCCAGCAGACATGCGAGCTTCCAGAGTGCAATCTATGTGATGTCTTCCAACGTTAATAAATCACACAGCCTCCCAGGAGGGAGACGCTGGGGTGCAC"; + ReadOnlySpan cdsSpan = cdnaSequence.AsSpan().Slice(412 - 1); + + const string altAllele = ""; + string actualResult = + HgvsProtein.GetAltPeptideSequence(cdsSpan, 643, 645, altAllele, AminoAcidCommon.StandardAminoAcids); + Assert.Equal( + "MDDSETGFNLKVVLVSFKQCLDEKEEVLLDPYIASWKGLVRFLNSLGTIFSFISKDVVSKLRIMERLRGGPQSEHYRSLQAMVAHELSNRLVDLERRSHHPESGCRTVLRLHRALHWLQLFLEGLRTSPEDARTSALCADSYNASLAAYHPWVVRRAVTVAFCTLPTREVFLEAMNVGPPEQAVQMLGEALPFIQRVYNVSQKLYAEHSLLDLPGREARAAPAFLLQIWAAVARAVSPVAEPSGRCGNRRSSVAPVS*AG*EPQTVTEKVATSPEEAHPLGPEQDASATAPPRPITRVRSQASPGCGAEQSRQGWGPGLQEPERSPPPSLWGASANQGEVTGPGVWRLHPFSFLPADVSSSQARAWDPVSAQVGSLEPRGLSALHALAGRNPQQQSGHCGALPASPALFAPQRARQTGGRTAGS*DQGSSQKGPGSPRPWDSLLRSPCTAPVPRGTQQAASRSGRGSAQPSPAQGPQELGLCYTQ*NAVSLLPRAP*CPLPTVLRRPVGHGTGGSGPCGPAKGYQTATTSPATIQCLGPGHWLFTVDPSTSGWQRDGPHGPADMRASRVQSM*CLPTLINHTASQEGDAGVH", + actualResult); + } + + [Fact] + public void CountAminoAcidsUntilNextStopCodon_NoOtherStopCodons_ReturnNull() + { + int? actualResult = HgvsProtein.CountAminoAcidsUntilNextStopCodon( + "RHRNRNTQTETNTETQRHRNTQKHRNKHRDTETHRNTETNTETQKHTETQKQTQRHRNTQKHTDRNKHRNTETQKYRNTQKHRNKHRDTETQKHSDAETQQHKHRNTETHRNRNTETNTETQTHRHRETQKHTETLKHSGRCPGCRGSIA", + 37); + Assert.Null(actualResult); + } + + [Fact] + public void CountAminoAcidsUntilNextStopCodon_FoundStopCodon() + { + int? actualResult = + HgvsProtein.CountAminoAcidsUntilNextStopCodon( + "MLAEPFNWHVEYAHSGDVLGPSGLPASPGAPGTCLHNPAGSNWGPG*EVLMAGTVPAVPG*", 9); + Assert.Equal(38, actualResult); + } + + [Theory] + [InlineData("KRZ", "K", false)] + [InlineData("KRZ", "", false)] + [InlineData("KRZ", "*", true)] + [InlineData("KRZ", "KR*", true)] + public void IsTruncatedByStop_ExpectedResults(string refAminoAcids, string altAminoAcids, bool expectedResult) + { + bool actualResult = HgvsProtein.IsTruncatedByStop(refAminoAcids, altAminoAcids); + Assert.Equal(expectedResult, actualResult); + } + + [Fact] + public void FindFirstChangeAfterFrameshift_Insertion_NoShift() + { + const string aaSequence = + "MEPPGDWGPPPWRSTPKTDVLRLVLYLTFLGAPCYAPALPSCKEDEYPVGSECCPKCSPGYRVKEACGELTGTVCEPCPPGTYIAHLNGLSKCLQCQMCDPDIGSPCDLRGRGHLEAGAHLSPGRQKGEPDPEVAFESLSAEPVHAANGSVPLEPHARLSMASAPCGQAGLHLRDRADGTPGGRA"; + const string altAaSequence = + "MEPPGETGGLLPGDPPPKPTS*GWCCISPSWEPPATPQLCRPARRTSTQWAPSAAPSAVQVIV*RRPAGS*RAQCVNPALQAPTLPTSMA*ASVCSAKCVTQILVPPVTSGEEVTWRLVPT*VQADRKGNQTQRWPLSH*AQSLSMRPTALSPWSLMPGSAWPVLPAARQDCTCGTGLTAHLGAGPEPTGRHRAGGLAMNRRGSWSALGVHACRLGFGAHTSTCMPSSMPLPSCESTCLLGLRMWGHRWQVRLPSGGAQAQLVPHLHQYLKKWGSHPTCLCHWNGLVCTDGKPVCGVGVWVGTWGEDLPAGPCPGTDSASSASCPSGQKAGFSHQRSPPRTFLQALVPHAALPSLGVPPGLRSSMLGTSGHLVWLSQGFSLAGRPGSSPWPVDAV"; + + (int actualBegin, char actualRefAminoAcid, char actualAltAminoAcid) = + HgvsProtein.FindFirstChangeAfterFrameshift(6, aaSequence, altAaSequence); + + Assert.Equal(6, actualBegin); + Assert.Equal('D', actualRefAminoAcid); + Assert.Equal('E', actualAltAminoAcid); + } + + [Fact] + public void FindFirstChangeAfterFrameshift_Insertion_Shifted() + { + const string aaSequence = + "MEPPGDWGPPPWRSTPKTDVLRLVLYLTFLGAPCYAPALPSCKEDEYPVGSECCPKCSPGYRVKEACGELTGTVCEPCPPGTYIAHLNGLSKCLQCQMCDPDIGSPCDLRGRGHLEAGAHLSPGRQKGEPDPEVAFESLSAEPVHAANGSVPLEPHARLSMASAPCGQAGLHLRDRADGTPGGRA"; + const string altAaSequence = + "MEPPGDWGPPPWRSTAPKPTS*GWCCISPSWEPPATPQLCRPARRTSTQWAPSAAPSAVQVIV*RRPAGS*RAQCVNPALQAPTLPTSMA*ASVCSAKCVTQILVPPVTSGEEVTWRLVPT*VQADRKGNQTQRWPLSH*AQSLSMRPTALSPWSLMPGSAWPVLPAARQDCTCGTGLTAHLGAGPEPTGRHRAGGLAMNRRGSWSALGVHACRLGFGAHTSTCMPSSMPLPSCESTCLLGLRMWGHRWQVRLPSGGAQAQLVPHLHQYLKKWGSHPTCLCHWNGLVCTDGKPVCGVGVWVGTWGEDLPAGPCPGTDSASSASCPSGQKAGFSHQRSPPRTFLQALVPHAALPSLGVPPGLRSSMLGTSGHLVWLSQGFSLAGRPGSSPWPVDAV"; + + (int actualBegin, char actualRefAminoAcid, char actualAltAminoAcid) = + HgvsProtein.FindFirstChangeAfterFrameshift(16, aaSequence, altAaSequence); + + Assert.Equal(16, actualBegin); + Assert.Equal('P', actualRefAminoAcid); + Assert.Equal('A', actualAltAminoAcid); + } + + [Fact] + public void FindFirstChangeAfterFrameshift_Insertion_Shifted_BothHaveStopCodon() + { + const string aaSequence = + "MKNVDSDDLVTGTLPKLKSSKEWLEPKPLCFMEVLAKEDTEAAIQSILYKENSVIKELDKYLQHHAFLNARRKEMLYKRWVDCVADPLQKKIIEKVCSHKKIKKRRQGELDGFLKHVNKKGNAFIEHYDPKEYDPFYMSKKDPNFLKVTIPPFHDPLKKAQYDKDNEKRTLLQCETGKIYSIKEFKEVEKVQLHSRFPQISNSRHFITPNEWLKLPTRYIESEFCRRRRLKVKVNFNDCSFDLKPLARAPYLLESQEEEKTVIYKNKGSSFLEREPLCYQEGNNPSAKEAISEGYFSSLSLSQEREEDQDGSPSPRLGLLKLEL*"; + const string altAaSequence = + "MKNVDSDDLVTGTLPKLKSSKEWLEPKPLCFMEVLAKEDTEAAIQSILYKENSVIKELDKYLQHHAFLNARRKEMLYKRWVDCVADPLQKKIIEKVCSHKKIKKRRQGELDGFLKHVNKKGNAFIEHYDPKEYDPFYMSKKDPNFLKVTIPPFHDPLKKAQYDKDNEKRTLLQCETGKIYSIKEFKEVEKVQLHSRFPQISNSRHFITPNEWLKLPTRYIESEFCRRRRLKVKVNFNDCSFDLKPLARAPYLLESQEEEKTVIYKNKGSSFLEREPLCYQEGNNPSAKEAISEGYFSSLSLSQEREEDQDGSPSPRLGLLKLEL*ERRGRFQQPRSTHPDP*LVKGYTKSGCFRGSHLLTGIRVKAKDGTQELSCL*KKSICNELPLIFEFSVCDN*EMAIPCIFPRS*EILLNEPIKIITAPS"; + + (int actualBegin, char actualRefAminoAcid, char actualAltAminoAcid) = + HgvsProtein.FindFirstChangeAfterFrameshift(302, aaSequence, altAaSequence); + + Assert.Equal(325, actualBegin); + Assert.Equal('*', actualRefAminoAcid); + Assert.Equal('*', actualAltAminoAcid); + } + + [Fact] + public void FindFirstChangeAfterFrameshift_Insertion_SameLength_Shifted() + { + const string aaSequence = + "MVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLLFCGPNEVDSFYCDLPRVIKLACTDTYRLDIMVIANSGVLTVCSFVLLIISYTIILMTIQHRPLDKSSKALSTLTAHITVVLLFFGPCVFIYAWPFPIKSLDKFLAVFYSVITPLLNPIIYTLRNKDMKTAIRQLRKWDAHSSVKF*"; + const string altAaSequence = + "MVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLSLLWSQ*GR*FLL*PS*GNQTCLYRYLQARYYGHC*QWCAHCVFFCSSNHLIHYHPNDHPASPFR*VVQSSVHFDCSHYSSSFVLWTMCLYLCLAIPHQVIR*IPCCILFCDHPSLEPNYIHTEEQRHEDGNKTAEKMGCTF*CKVL"; + + (int actualBegin, char actualRefAminoAcid, char actualAltAminoAcid) = + HgvsProtein.FindFirstChangeAfterFrameshift(159, aaSequence, altAaSequence); + + Assert.Equal(160, actualBegin); + Assert.Equal('L', actualRefAminoAcid); + Assert.Equal('S', actualAltAminoAcid); + } + + [Fact] + public void FindFirstChangeAfterFrameshift_Deletion_SameLength_NoShift() + { + const string aaSequence = + "MAALPGTVPRMMRPAPGQNYPRTGFPLEVSTPLGQGRVNQLGGVFINGRPLPNHIRHKIVEMAHHGIRPCVISRQLRVSHGCVSKILCRYQETGSIRPGAIGGSKPRQVATPDVEKKIEEYKRENPGMFSWEIRDRLLKDGHCDRSTVPSVSSISRVLRIKFGKKEEEDEADKKEDDGEKKAKHSIDGILGDKGNRLDEGSDVESEPDLPLKRKQRRSRTTFTAEQLEELEKAFERTHYPDIYTREELAQRTKLTEARVQVWFSNRRARWRKQAGANQLAAFNHLLPGGFPPTGMPTLPPYQLPDSTYPTTTISQDGGSTVHRPQPLPPSTMHQGGLAAAAAAADTSSAYGARHSFSSYSDSFMNPAAPSNHMNPVSNGLSPQVMSILGNPSAVPPQPQADFSISPLHGGLDSATSISASCSQRADSIKPGDSLPTSQAYCPPTYSTTGYSVDPVAGYQYGQYGQTAVDYLAKNVSLSTQRRMKLGEHSAVLGLLPVETGQAY"; + const string altAaSequence = + "MGPSRHGTENDAAGSGAELPPHGIPFGSVHPAWPRPGQSAGRGLHQWATPA*PHPPQDSGDGPPWHPALCHLPTAACLPRLRLQDSLPLPGDRVHPAWGHRRQQAQTGGDSGCREKD*GVQEGKPRHVQLGDPGQAAEGWAL*PKHCALSEFD*PRAQNQVREERGGG*SGQEGGRRRKEGQTQHRRHPGRQREPAGRGLGCGVGT*PPTEAQAATQSDHIHGRAAGGAGEGL*EDPLPRHIHPRGAGAEDQADRGACAGLVQ*PPRPLA*AGRSQPAGGVQPPSARRLPTHRHAHAAPLPAAGLHLPHHHHLPRWGQHCAPASAPATVHHAPGRAGCSGCSRRHQLCLRSPPQLLQLL*QLHESGGALQPHEPGQQRPVSSGDEHLGQPQCGAPAATG*LLHLPAAWRPGLGHLHLSQLQPAGRLHQARRQPAHLPGLLPTHLQHHRLQRGPRGRLSVRPVRPDCC*LSGQKCEPLHPASHEARGALCCAGTPACGNWPGLL"; + + (int actualBegin, char actualRefAminoAcid, char actualAltAminoAcid) = + HgvsProtein.FindFirstChangeAfterFrameshift(2, aaSequence, altAaSequence); + + Assert.Equal(2, actualBegin); + Assert.Equal('A', actualRefAminoAcid); + Assert.Equal('G', actualAltAminoAcid); + } + + [Fact] + public void FindFirstChangeAfterFrameshift_Deletion_SameLength_Shifted() + { + const string aaSequence = + "MAALPGTVPRMMRPAPGQNYPRTGFPLEVSTPLGQGRVNQLGGVFINGRPLPNHIRHKIVEMAHHGIRPCVISRQLRVSHGCVSKILCRYQETGSIRPGAIGGSKPRQVATPDVEKKIEEYKRENPGMFSWEIRDRLLKDGHCDRSTVPSVSSISRVLRIKFGKKEEEDEADKKEDDGEKKAKHSIDGILGDKGNRLDEGSDVESEPDLPLKRKQRRSRTTFTAEQLEELEKAFERTHYPDIYTREELAQRTKLTEARVQVWFSNRRARWRKQAGANQLAAFNHLLPGGFPPTGMPTLPPYQLPDSTYPTTTISQDGGSTVHRPQPLPPSTMHQGGLAAAAAAADTSSAYGARHSFSSYSDSFMNPAAPSNHMNPVSNGLSPQVMSILGNPSAVPPQPQADFSISPLHGGLDSATSISASCSQRADSIKPGDSLPTSQAYCPPTYSTTGYSVDPVAGYQYGQYGQTAVDYLAKNVSLSTQRRMKLGEHSAVLGLLPVETGQAY"; + const string altAaSequence = + "MAALPGTVPRMMRPAPGQNYPRTGFPLEVSTPLGQGRVNQLGGVFINGRPLPNHIRHKIVEMAHHGIRPCVISRQLRVSHGCVSKILCRYQETGSIRPGAIGGSKPRQVATPDVEKKIEEYKRENPGMFSWEIRDRLLKDGHCDRSTVPSVSSISRVLRIKFGKKEEEDEADKKEDDGEKKAKHSIDGILGDKGNRLDEGSDVESEPDLPLKRKQRRSRTTFTAEQLEELEKAFERTHYPDIYTREELAQRTKLTEARVQVWFSNRRARWRKQAGANQLAAFNHLLPGGFPPTGMPTLPPYQLPDSTYPTTTISQDGGSTVHRPQPLPPSTMHQGGLAAAAAAADTSSAYGARHSFSSYSDSFMNPAAPSNHMNPVSNGLSPQVMSILGNPSAVPPQPQADFSISPLHGGLDSATSISQLQPAGRLHQARRQPAHLPGLLPTHLQHHRLQRGPRGRLSVRPVRPDCC*LSGQKCEPLHPASHEARGALCCAGTPACGNWPGLL"; + + (int actualBegin, char actualRefAminoAcid, char actualAltAminoAcid) = + HgvsProtein.FindFirstChangeAfterFrameshift(418, aaSequence, altAaSequence); + + Assert.Equal(419, actualBegin); + Assert.Equal('A', actualRefAminoAcid); + Assert.Equal('Q', actualAltAminoAcid); + } + + [Fact] + public void FindFirstChangeAfterFrameshift_Deletion_NotShifted() + { + const string aaSequence = + "MLGPAVLGLSLWALLHPGTGAPLCLSQQLRMKGDYVLGGLFPLGEAEEAGLRSRTRPSSPVCTRFSSNGLLWALAMKMAVEEINNKSDLLPGLRLGYDLFDTCSEPVVAMKPSLMFLAKAGSRDIAAYCNYTQYQPRVLAVIGPHSSELAMVTGKFFSFFLMPQVSYGASMELLSARETFPSFFRTVPSDRVQLTAAAELLQEFGWNWVAALGSDDEYGRQGLSIFSALAAARGICIAHEGLVPLPRADDSRLGKVQDVLHQVNQSSVQVVLLFASVHAAHALFNYSISSRLSPKVWVASEAWLTSDLVMGLPGMAQMGTVLGFLQRGAQLHEFPQYVKTHLALATDPAFCSALGEREQGLEEDVVGQRCPQCDCITLQNVSAGLNHHQTFSVYAAVYSVAQALHNTLQCNASGCPAQDPVKPWQLLENMYNLTFHVGGLPLRFDSSGNVDMEYDLKLWVWQGSVPRLHDVGRFNGSLRTERLKIRWHTSDNQKPVSRCSRQCQEGQVRRVKGFHSCCYDCVDCEAGSYRQNPDDIACTFCGQDEWSPERSTRCFRRRSRFLAWGEPAVLLLLLLLSLALGLVLAALGLFVHHRDSPLVQASGGPLACFGLVCLGLVCLSVLLFPGQPSPARCLAQQPLSHLPLTGCLSTLFLQAAEIFVESELPLSWADRLSGCLRGPWAWLVVLLAMLVEVALCTWYLVAFPPEVVTDWHMLPTEALVHCRTRSWVSFGLAHATNATLAFLCFLGTFLVRSQPGCYNRARGLTFAMLAYFITWVSFVPLLANVQVVLRPAVQMGALLLCVLGILAAFHLPRCYLLMRQPGLNTPEFFLGGGPGDAQGQNDGNTGNQGKHE"; + const string altAaSequence = + "MLGPAVLGLSLWALLHPGTGAPLCLSQQLRMKGDYVLGGLFPLGEAEEAGLRSRTRPSSPVCTRFSSNGLLWALAMKMAVEEINNKSDLLPGLRLGYDLFDTCSEPVVAMKPSLMFLAKAGSRDIAAYCNYTQYQPRVLAVIGPHSSELAMVTGKFFSFFLMPQVSYGASMELLSARETFPSFFRTVPSDRVQLTAAAELLQEFGWNWVAALGSDDEYGRQGLSIFSALAAARGICIAHEGLVPLPRADDSRLGKVQDVLHQVNQSSVQVVLLFASVHAAHALFNYSISSRLSPKVWVASEAWLTSDLVMGLPGMAQMGTVLGFLQRGAQLHEFPQYVKTHLALATDPAFCSALGEREQGLEEDVVGQRCPQCDCITLQNVSAGLNHHQTFSVYAAVYSVAQALHNTLQCNASGCPAQDPVKPWQLLENMYNLTFHVGGLPLRFDSSGNVDMEYDLKLWVWQGSVPRLHDVGRFNGSLRTERLKIRWHTSDNQKPVSRCSRQCQEGQVRRVKGFHSCCYDCVDCEAGSYRQNPDDIACTFCGQDEWSPERSTRCFRRRSRFLAWGEPAVLLLLLLLSLALGLVLAALGLFTIGTAHWFRPRGGPWPALAWCAWAWSASASSCSLASPALPDAWPSSPCPTSRSRAA*AHSSCRRPRSSWSQNCL*AGQTG*VAACGGPGPGWWCCWPCWWRSHCAPGTWWPSRRRW*RTGTCCPRRRWCTAAHAPGSASA*RTPPMPRWPFSASWALSWCGASRAATTVPVASPLPCWPTSSPGSPLCPSWPMCRWSSGPPCRWAPSCSVSWASWLPSTCPGVTCSCGSQGSTPPSSSWEGALGMPKARMTGTQEIRGNMS"; + + (int actualBegin, char actualRefAminoAcid, char actualAltAminoAcid) = + HgvsProtein.FindFirstChangeAfterFrameshift(591, aaSequence, altAaSequence); + + Assert.Equal(591, actualBegin); + Assert.Equal('V', actualRefAminoAcid); + Assert.Equal('T', actualAltAminoAcid); + } + + [Theory] + [InlineData("MABCEFGH*", 5, 'D', 'E')] + [InlineData("MABCDEFGHIIIKLL*", 15, '*', 'L')] + [InlineData("MABCDEFGHIIIKL*", 15, '*', '*')] + public void FindFirstChangeAfterFrameshift(string altAaSequence, int expectedBegin, char expectedRefAminoAcid, + char expectedAltAminoAcid) + { + const int aaBegin = 4; + const string aaSequence = "MABCDEFGHIIIKL*"; + + (int actualBegin, char actualRefAminoAcid, char actualAltAminoAcid) = + HgvsProtein.FindFirstChangeAfterFrameshift(aaBegin, aaSequence, altAaSequence); + + Assert.Equal(expectedBegin, actualBegin); + Assert.Equal(expectedRefAminoAcid, actualRefAminoAcid); + Assert.Equal(expectedAltAminoAcid, actualAltAminoAcid); + } + } +} \ No newline at end of file diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsUtilitiesTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsUtilitiesTests.cs index 1c9bcee6..0614325c 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsUtilitiesTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsUtilitiesTests.cs @@ -2,6 +2,7 @@ using Moq; using UnitTests.TestDataStructures; using VariantAnnotation.AnnotatedPositions; +using VariantAnnotation.AnnotatedPositions.AminoAcids; using VariantAnnotation.Caches.DataStructures; using VariantAnnotation.Interface.AnnotatedPositions; using Xunit; @@ -169,16 +170,17 @@ public void GetChangesAfterFrameshift_FirstAminoAcidIsStop() [Fact] public void GetAltPeptideSequence_Genomic() { - var refSequence = GetGenomicRefSequence(); - var transcript = GetGenomicTranscript(); - const int cdsBegin = 112; - const int cdsEnd = 121; + var refSequence = GetGenomicRefSequence(); + var transcript = GetGenomicTranscript(); + const int cdsBegin = 112; + const int cdsEnd = 121; + const int aaBegin = 38; const string transcriptAltAllele = ""; const string expectedResult = "RHRNRNTQTETNTETQRHRNTQKHRNKHRDTETHRNTETNTETQKHTETQKQTQRHRNTQKHTDRNKHRNTETQKYRNTQKHRNKHRDTETQKHSDAETQQHKHRNTETHRNRNTETNTETQTHRHRETQKHTETLKHSGRCPGCRGSIA"; var observedResult = HgvsUtilities.GetAltPeptideSequence(refSequence, cdsBegin, cdsEnd, transcriptAltAllele, transcript, - false); + AminoAcidCommon.StandardAminoAcids, aaBegin); Assert.Equal(expectedResult, observedResult); } diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvscNotationTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvscNotationTests.cs index 848f00dd..ca0c79c0 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvscNotationTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvscNotationTests.cs @@ -50,7 +50,7 @@ public void ToString_one_base_duplication() var hgvsc = new HgvscNotation("T", "T", "NM_012232.1", GenomicChange.Duplication, startPosOff, endPosOff, true); - Assert.Equal("NM_012232.1:c.7dupT", hgvsc.ToString()); + Assert.Equal("NM_012232.1:c.7dup", hgvsc.ToString()); } // NM_012232.1:c.6_8dupTGC (multi base duplication) @@ -62,7 +62,7 @@ public void ToString_multi_base_duplication() var hgvsc = new HgvscNotation("TGC", "TGC", "NM_012232.1", GenomicChange.Duplication, startPosOff, endPosOff, true); - Assert.Equal("NM_012232.1:c.6_8dupTGC", hgvsc.ToString()); + Assert.Equal("NM_012232.1:c.6_8dup", hgvsc.ToString()); } // NM_012232.1:c.5756_5757insAGG (multi base insertion) diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsgNotationTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsgNotationTests.cs index b632971b..51031a82 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsgNotationTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvsgNotationTests.cs @@ -19,7 +19,7 @@ public sealed class HgvsgNotationTests [InlineData(10, 12, "GAC", "", VariantType.deletion, "NC_012920.1:g.12_14del")] [InlineData(16, 15, "", "GATA", VariantType.insertion, "NC_012920.1:g.15_16insGATA")] [InlineData(19, 22, "TGAC", "GTCA", VariantType.MNV, "NC_012920.1:g.19_22invTGAC")] - [InlineData(10, 9, "", "GAC", VariantType.insertion, "NC_012920.1:g.12_14dupCGA")] + [InlineData(10, 9, "", "GAC", VariantType.insertion, "NC_012920.1:g.12_14dup")] public void GetNotation_MT(int start,int end,string referenceAllele,string altAllele, VariantType type, string expectedHgvsg) { var simpleVariant = new SimpleVariant(null,start,end,referenceAllele,altAllele,type); diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvspNotationTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvspNotationTests.cs index 92ab197d..a9ada19d 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/HgvspNotationTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/HgvspNotationTests.cs @@ -173,14 +173,14 @@ public void Frameshift_due_to_insertion() //NP_001263627.1:p.(Met1?) public void Start_lost_start_equals_end() { - Assert.Equal("NP_001263627.1:p.?", HgvspNotation.GetStartLostNotation("NP_001263627.1", 1, 1, "Met")); + Assert.Equal("NP_001263627.1:p.Met1?", HgvspNotation.GetStartLostNotation("NP_001263627.1", "Met")); } [Fact] //NP_001263627.1:p.(Met1?) public void Start_lost_start_not_equals_end() { - Assert.Equal("NP_001263627.1:p.?", HgvspNotation.GetStartLostNotation("NP_001263627.1", 1, 3, "Met")); + Assert.Equal("NP_001263627.1:p.Met1?", HgvspNotation.GetStartLostNotation("NP_001263627.1", "Met")); } [Fact] diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/ProteinChangeTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/ProteinChangeTests.cs index 1b9d2ac0..9e0e9d91 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/ProteinChangeTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/ProteinChangeTests.cs @@ -66,7 +66,7 @@ public void Extension() Assert.Equal(ProteinChange.Extension, proteinChange); } - + [Fact] public void Duplication() { diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/AminoAcidAbbreviationTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/AminoAcidAbbreviationTests.cs new file mode 100644 index 00000000..552b96b9 --- /dev/null +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/AminoAcidAbbreviationTests.cs @@ -0,0 +1,30 @@ +using System; +using VariantAnnotation.AnnotatedPositions.AminoAcids; +using Xunit; + +namespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript +{ + public sealed class AminoAcidAbbreviationTests + { + [Fact] + public void ConvertToThreeLetterAbbreviations_ExpectedResults() + { + // https://www.ddbj.nig.ac.jp/ddbj/code-e.html + Assert.Equal("AlaArgAsnAspCysGlnGluGlyHisIleLeuLysMetPheProPylSerSecThrTrpTyrValAsxGlxXaaXleTer", + AminoAcidAbbreviation.ConvertToThreeLetterAbbreviations("ARNDCQEGHILKMFPOSUTWYVBZXJ*")); + } + + [Fact] + public void ConvertToThreeLetterAbbreviations_NullOrEmptyInput_ReturnEmpty() + { + Assert.Equal("", AminoAcidAbbreviation.ConvertToThreeLetterAbbreviations(null)); + Assert.Equal("", AminoAcidAbbreviation.ConvertToThreeLetterAbbreviations("")); + } + + [Fact] + public void GetThreeLetterAbbreviation_ThrowException() + { + Assert.Throws(delegate { AminoAcidAbbreviation.GetThreeLetterAbbreviation('a'); }); + } + } +} \ No newline at end of file diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/AminoAcidTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/AminoAcidTests.cs index 3d939b7c..7b1a5785 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/AminoAcidTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/AminoAcidTests.cs @@ -1,5 +1,5 @@ -using System; -using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.AnnotatedPositions.AminoAcids; +using VariantAnnotation.Interface.AnnotatedPositions; using Xunit; namespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript @@ -7,98 +7,229 @@ namespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript public sealed class AminoAcidTests { [Fact] - public void AddUnknownAminoAcid_identity_seq() + public void AddUnknownAminoAcid_ExpectedResults() { - const string aminoAcids = "*"; - Assert.Equal(aminoAcids, AminoAcids.AddUnknownAminoAcid(aminoAcids)); + const string aminoAcids = "MACGYIL"; + Assert.Equal(aminoAcids + 'X', AminoAcid.AddUnknownAminoAcid(aminoAcids)); } [Fact] - public void AddUnknownAminoAcid_incomplete_peptideSeq() + public void AddUnknownAminoAcid_SameIfStopCodon() { - const string aminoAcids = "MACGYIL"; - Assert.Equal(aminoAcids + 'X', AminoAcids.AddUnknownAminoAcid(aminoAcids)); + const string aminoAcids = "*"; + Assert.Equal(aminoAcids, AminoAcid.AddUnknownAminoAcid(aminoAcids)); } [Fact] - public void Assign_null_or_empty_input() + public void Translate_ExpectedResults() { - var aminoAcids = new AminoAcids(true); + (string actualRefAa, string actualAltAa) = AminoAcidCommon.StandardAminoAcids.Translate("TTC", "CTC", null, 1); + Assert.Equal("F", actualRefAa); + Assert.Equal("L", actualAltAa); + } - // null - var aa = aminoAcids.Translate(null, null); - Assert.Equal("", aa.Reference); - Assert.Equal("", aa.Alternate); + [Fact] + public void Translate_NullOrEmptyInput_ReturnEmpty() + { + (string actualRefAa, string actualAltAa) = AminoAcidCommon.StandardAminoAcids.Translate(null, null, null, 1); + Assert.Equal("", actualRefAa); + Assert.Equal("", actualAltAa); - // empty - aa = aminoAcids.Translate("", ""); - Assert.Equal("", aa.Reference); - Assert.Equal("", aa.Alternate); + (actualRefAa, actualAltAa) = AminoAcidCommon.StandardAminoAcids.Translate("", "", null, 1); + Assert.Equal("", actualRefAa); + Assert.Equal("", actualAltAa); } [Fact] - public void Assign_codons_with_N() + public void Translate_NsInInput_ReturnEmpty() { - var aminoAcids = new AminoAcids(true); + (string actualRefAa, string actualAltAa) = AminoAcidCommon.StandardAminoAcids.Translate("ANA", "AAA", null, 1); + Assert.Equal("", actualRefAa); + Assert.Equal("", actualAltAa); - // referenceCodons with "N" - var aa = aminoAcids.Translate("ANA", "AAA"); - Assert.Equal("", aa.Reference); - Assert.Equal("", aa.Alternate); + (actualRefAa, actualAltAa) = AminoAcidCommon.StandardAminoAcids.Translate("AAA", "ANA", null, 1); + Assert.Equal("", actualRefAa); + Assert.Equal("", actualAltAa); + } - // alternateCodons with "N" - aa = aminoAcids.Translate("AAA", "ANA"); - Assert.Equal("", aa.Reference); - Assert.Equal("", aa.Alternate); + [Theory] + [ClassData(typeof(StandardGeneticCodeData))] + public void ConvertTripletToAminoAcid_StandardGeneticCode(char expected, string[] triplets) + { + string expectedResult = expected.ToString(); + foreach (string triplet in triplets) + { + string actualResult = AminoAcidCommon.StandardAminoAcids.TranslateBases(triplet, null, 1, false); + Assert.Equal(expectedResult, actualResult); + } } + [Theory] + [ClassData(typeof(VertebrateMitochondrialCodeData))] + public void ConvertTripletToAminoAcid_VertebrateMitochondrialCode(char expected, string[] triplets) + { + string expectedResult = expected.ToString(); + foreach (string triplet in triplets) + { + string actualResult = AminoAcidCommon.MitochondrialAminoAcids.TranslateBases(triplet, null, 1, false); + Assert.Equal(expectedResult, actualResult); + } + } + [Fact] - public void Assign_translate() + public void TranslateBases_ExpectedResults() { - var aminoAcids = new AminoAcids(false); - var aa = aminoAcids.Translate("TTC", "CTC"); - Assert.Equal("F", aa.Reference); - Assert.Equal("L", aa.Alternate); + const string expectedResult = "RAD"; + string actualResult = AminoAcidCommon.StandardAminoAcids.TranslateBases("CGCGCAGAT", null, 1, true); + Assert.Equal(expectedResult, actualResult); } - + [Fact] - public void ConvertAminoAcidToAbbreviation_not_support() + public void TranslateBases2_NoAminoAcidEdits_NoChanges() { - Assert.Throws(delegate + const string expectedResult = "RAD"; + string actualResult = AminoAcidCommon.StandardAminoAcids.TranslateBases("CGCGCAGAT", null, 1, true); + Assert.Equal(expectedResult, actualResult); + } + + [Fact] + public void TranslateBases2_AminoAcidEditBefore_NoChanges() + { + const string expectedResult = "AD"; + + AminoAcidEdit[] aaEdits = { - AminoAcids.ConvertAminoAcidToAbbreviation('a'); - }); + new AminoAcidEdit(1, 'M') + }; + + string actualResult = AminoAcidCommon.StandardAminoAcids.TranslateBases("GCAGAT", aaEdits, 2, true); + Assert.Equal(expectedResult, actualResult); } - + [Fact] - public void ConvertTripletToAminoAcid_mitochondrial_codon() + public void TranslateBases2_AminoAcidEditAfter_NoChanges() { - var aminoAcids = new AminoAcids(true); - Assert.Equal('W', aminoAcids.ConvertTripletToAminoAcid("TGA")); + const string expectedResult = "AD"; + + AminoAcidEdit[] aaEdits = + { + new AminoAcidEdit(4, 'M') + }; + + string actualResult = AminoAcidCommon.StandardAminoAcids.TranslateBases("GCAGAT", aaEdits, 2, true); + Assert.Equal(expectedResult, actualResult); } + + [Fact] + public void TranslateBases2_AminoAcidEdits_OverrideTwoAminoAcids() + { + const string expectedResult = "MAT"; + AminoAcidEdit[] aaEdits = + { + new AminoAcidEdit(1, 'M'), + new AminoAcidEdit(3, 'T') + }; + + string actualResult = AminoAcidCommon.StandardAminoAcids.TranslateBases("CGCGCAGAT", aaEdits, 1, true); + Assert.Equal(expectedResult, actualResult); + } + [Fact] - public void GetAbbreviations_null_or_empty_input() + public void TranslateBases2_AminoAcidEdits_WithOffset_OverrideTwoAminoAcids() { - // null - Assert.Equal("", AminoAcids.GetAbbreviations(null)); - // empty - Assert.Equal("", AminoAcids.GetAbbreviations("")); + const string expectedResult = "NI"; + + AminoAcidEdit[] aaEdits = + { + new AminoAcidEdit(2, 'N'), + new AminoAcidEdit(3, 'I') + }; + + string actualResult = AminoAcidCommon.StandardAminoAcids.TranslateBases("GCAGAT", aaEdits, 2, true); + Assert.Equal(expectedResult, actualResult); } - [Fact] - public void GetAbbreviations_string_input() + [Theory] + [InlineData("CGCGCAGA")] + [InlineData("CGCGCAG")] + public void TranslateBases_Incomplete_ExpectedResults(string cdsBases) { - Assert.Equal("AspTyrCys", AminoAcids.GetAbbreviations("DYC")); + const string expectedResult = "RAX"; + string actualResult = AminoAcidCommon.StandardAminoAcids.TranslateBases(cdsBases, null, 1, false); + Assert.Equal(expectedResult, actualResult); + } + + [Theory] + [InlineData("CGCGCAGA")] + [InlineData("CGCGCAG")] + public void TranslateBases_Incomplete_ForceNonTriplet_ExpectedResults(string cdsBases) + { + const string expectedResult = "RA"; + string actualResult = AminoAcidCommon.StandardAminoAcids.TranslateBases(cdsBases, null, 1, true); + Assert.Equal(expectedResult, actualResult); } [Fact] - public void TranslateBases_nulls_input() + public void TranslateBases_NullInput_ReturnNull() { - var aminoAcids = new AminoAcids(true); - // null - Assert.Null(aminoAcids.TranslateBases(null, true)); + Assert.Null(AminoAcidCommon.StandardAminoAcids.TranslateBases(null, null, 1, true)); + } + private sealed class StandardGeneticCodeData : TheoryData + { + public StandardGeneticCodeData() + { + Add('A', new[] {"GCT", "GCC", "GCA", "GCG"}); + Add('R', new[] {"CGT", "CGC", "CGA", "CGG", "AGA", "AGG"}); + Add('N', new[] {"AAT", "AAC"}); + Add('D', new[] {"GAT", "GAC"}); + Add('C', new[] {"TGT", "TGC"}); + Add('Q', new[] {"CAA", "CAG"}); + Add('E', new[] {"GAA", "GAG"}); + Add('G', new[] {"GGT", "GGC", "GGA", "GGG"}); + Add('H', new[] {"CAT", "CAC"}); + Add('I', new[] {"ATT", "ATC", "ATA"}); + Add('L', new[] {"CTT", "CTC", "CTA", "CTG", "TTA", "TTG"}); + Add('K', new[] {"AAA", "AAG"}); + Add('M', new[] {"ATG"}); + Add('F', new[] {"TTT", "TTC"}); + Add('P', new[] {"CCT", "CCC", "CCA", "CCG"}); + Add('S', new[] {"TCT", "TCC", "TCA", "TCG", "AGT", "AGC"}); + Add('T', new[] {"ACT", "ACC", "ACA", "ACG"}); + Add('W', new[] {"TGG"}); + Add('Y', new[] {"TAT", "TAC"}); + Add('V', new[] {"GTT", "GTC", "GTA", "GTG"}); + Add('*', new[] {"TAA", "TGA", "TAG"}); + } + } + + private sealed class VertebrateMitochondrialCodeData : TheoryData + { + public VertebrateMitochondrialCodeData() + { + Add('A', new[] {"GCT", "GCC", "GCA", "GCG"}); + Add('R', new[] {"CGT", "CGC", "CGA", "CGG"}); + Add('N', new[] {"AAT", "AAC"}); + Add('D', new[] {"GAT", "GAC"}); + Add('C', new[] {"TGT", "TGC"}); + Add('Q', new[] {"CAA", "CAG"}); + Add('E', new[] {"GAA", "GAG"}); + Add('G', new[] {"GGT", "GGC", "GGA", "GGG"}); + Add('H', new[] {"CAT", "CAC"}); + Add('I', new[] {"ATT", "ATC"}); + Add('L', new[] {"CTT", "CTC", "CTA", "CTG", "TTA", "TTG"}); + Add('K', new[] {"AAA", "AAG"}); + Add('M', new[] {"ATG", "ATA"}); + Add('F', new[] {"TTT", "TTC"}); + Add('P', new[] {"CCT", "CCC", "CCA", "CCG"}); + Add('S', new[] {"TCT", "TCC", "TCA", "TCG", "AGT", "AGC"}); + Add('T', new[] {"ACT", "ACC", "ACA", "ACG"}); + Add('W', new[] {"TGG", "TGA"}); + Add('Y', new[] {"TAT", "TAC"}); + Add('V', new[] {"GTT", "GTC", "GTA", "GTG"}); + Add('*', new[] {"TAA", "TAG", "AGA", "AGG"}); + } } } } \ No newline at end of file diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/AnnotatedTranscriptTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/AnnotatedTranscriptTests.cs index 0e9a6998..d43ea706 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/AnnotatedTranscriptTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/AnnotatedTranscriptTests.cs @@ -1,7 +1,7 @@ using OptimizedCore; using UnitTests.TestDataStructures; using UnitTests.TestUtilities; -using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.AnnotatedPositions.AminoAcids; using VariantAnnotation.TranscriptAnnotation; using Variants; using Xunit; @@ -11,16 +11,16 @@ namespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript public sealed class AnnotatedTranscriptTests { [Fact] - public void SerializeJson_NominalUsage() + public void SerializeJson_ExpectedResults() { var variant = new Variant(ChromosomeUtilities.Chr1, 1263141, 1263143, "TAG", "", VariantType.deletion, "1:1263141:1263143", false, false, false, null, null, new AnnotationBehavior(false, false, false, false, false)); var refSequence = new SimpleSequence(HgvsProteinNomenclatureTests.Enst00000343938GenomicSequence, 1260147 - 1); var transcript = HgvsProteinNomenclatureTests.GetMockedTranscriptOnForwardStrand(); - var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, new AminoAcids(false)); + var annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, refSequence, null, null, AminoAcidCommon.StandardAminoAcids); var sb = StringBuilderCache.Acquire(); annotatedTranscript.SerializeJson(sb); - var jsonString = StringBuilderCache.GetStringAndRelease(sb); + string jsonString = StringBuilderCache.GetStringAndRelease(sb); Assert.Contains("ENST00000343938.4:p.(Ter215GlyextTer43)", jsonString); } diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CdnaSequenceTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CdnaSequenceTests.cs index d42a9f33..94ce0d70 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CdnaSequenceTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CdnaSequenceTests.cs @@ -113,9 +113,6 @@ public void With_rnaEdits_reverse_deletion_utr() var expectedCodingSeq = "ATGGAAGAGGCCATCCTACTCAATCAAACTTCTTTAGTGACATATTTTCGGCTTAGAGGTTTATCTGTAAATCATAAGGCACGGATAGCTATGTTTTCCATGTTCCTCATTTTTTATGTCCTGACACTGATTGGGAATGTTCTCATTGTCATAACTATTATCTATGACCACCGGCTCCATACTCCCATGTATTTCTTCCTCAGCAACCTGTCCTTTATTGATGTCTGCCACTCCACTGTCACTGTCCCCAAGATGCTGAGAGACGTGTGGTCAGAGGAAAAGCTCATCTCTTTTGATGCCTGTGTGACCCAGATGTTCTTCCTGCACCTCTTTGCCTGCACAGAGATCTTCCTCCTCACCGTCATGGCCTATGATCGGTATGTGGCCATCTGTAAACCCCTGCAGTACATGATAGTGATGAACTGGAAGGTATGTGTGCTGCTGGCTGTGGCCCTCTGGACAGGAGGGACCATCCACTCCATAGCCCTCACCTCCCTTACCATCAAGCTGCCCTACTGTGGTCCTGATGAGATTGACAACTTCTTCTGTGATGTACCTCAGGTGATCAAGCTGGCCTGCATTGACACCCACGTCATTGAGATCCTCATTGTCTCCAACAGTGGATTGATCTCCGTGGTCTGTTTTGTGGTCCTGGTGGTGTCCTACGCAGTCATCCTGGTGAGTCTGAGGCAGCAGATCTCCAAGGGCAAGCGGAAGGCCCTGTCCACCTGTGCAGCCCATCTCACTGTAGTTACACTGTTCCTGGGACACTGCATCTTCATCTATTCCCGCCCATCCACCAGCCTCCCAGAGGACAAGGTAGTATCTGTGTTTTTCACTGCAGTCACCCCCCTGCTGAACCCCATTATCTATACCCTTAGGAATGAAGAAATGAAGAGTGCCTTAAACAAGTTAGTGGGGAGAAAAGAGAGAAAAGAAGAAAAATGA"; Assert.Equal(expectedCodingSeq, codingSequence.GetCdnaSequence().Substring(codingRegion.CdnaStart-1, codingRegion.Length)); - } - - } } \ No newline at end of file diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CodingSequenceTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CodingSequenceTests.cs index 50d6d4dd..2c01a6fb 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CodingSequenceTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CodingSequenceTests.cs @@ -9,830 +9,830 @@ namespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript { public sealed class CodingSequenceTests { - [Fact] - public void Create() - { - // ENST00000374673.3 - var sequence = new SimpleSequence( - "GGGGTGTGTCTCCAGGGCCTTCCGCACTCAGCCAGGGAGAGCAAACAAACAGGCTTGGGGGACTGGGGAGGGGGGAAAGCGGAGGGGCAGGGTAGGGGCGGGGCAGGAGTGGAAGGCGGGGCAGGAGCAAGCGGCCTGGGCAGGGCAAGGGGGCCTCAGCTGGACCCTCGGATACTCACGGCAGTTGGCTTCATCAGTTCGGTCCTCACAGTCAAAGTCACCATCGCAGCGCCACAGCTTGAGGGCACAATGTCCATTCCCGCAGGGGAACTCGTTGGGCTCACAGGGTGGCGGGGGGCCTAGGAGACCGGGCAGGGGTCAGCAGCATCCTCCCGGGCCAGCTTCCTGCTCCCCGCACCCACCTGCACCCCTGCCGGTGCGCACCACAGTCTAGCTCATCGCTGCCGTCCTCGCAGTCCTCCTGTCCGTCGCAGAGGTAGTCTCTGGGGATGCAGTGCCCATTGCGGCATGCGGCCTCCTGGGGCCCACAGGGCAGGGGCCTGACGGAACCGGGAAGCAGGGGCTGAGGAGCGTGGGTGACTGGTGGCTGTCGCATGATGGTTGTCTCTGGCCGGGGCGGTAAAGATGTCGTCTCCACAAGGAGAGAGAATGTGGGGCTGATACCCAGGACTGGCTCCTCTGTGGATAGATTCCGCTTGGCATTTGGCAGAAGCAGATGGCTCCTCACCTGCTCCTTGTCCCCAACCCTCCCCAGGCCCACCCTGTACTCCCCAACACCACTCCCTGCCACCCCCTGCCTGGCTCTGTCATCACCCTTCCTATGCCCCCATCCTCTGCCTGCACCAAACCCTCATAGTCCTTGATGGGCTCCAAGACCCAGGTGTAGGACCCTGGCCCTCCCCTGGCACCCAAACCACTCGTGGCCCCGGACATCCCCTCACCACAATTGAGCTCATCAGACATGTCCCTGCAGTCGGGCCGCCGGTCACAGCGATACTCCAGGGCCACACACTCATTGTAGCTGTGGCAGGCAAACTCGGCCTCCGTGCAGGCTCTTGGGAACTGGGGCACTGCAGGTGGAAAGGAAGCAGACTGGAGTCAGAGGCGGCAGGAGGCAGGTGCGGGAAGCTGTAGGTGCTGTGTGGCTGGAGTGGGCTCCAGGGCCCTGTGTCAGGCAGCTCGGTTTCTGGCAGGCACAACGAGGGCAAGCAGCACACACTAGACACATCCACAGCACACGTGGGGCATGGGACATGCGGCAGTGGCCTCCCCCATCTCTAAAACAGACCCCACACACAGTTGACATGCCACACGCATGCAACCACCACACCACACACATGCAGGCCACAGCCTGGCCCAGTGAGGACAAAGAAGGAGGGGAGAAGGGAGTGCCCAGCTGTCTTGGGCTGTGCCCAGCCAGCCATCTTGCCCACACCCTTCTTTCCTCTCCATCCTTTAAAAAATTTTTTTCTCTCTTCTTTTTTATTTTTTTAGAGACAGGGTCTTGCTACGTTGCCCAAGCTGGTCTCGAACTCTTTGCCTCAAGCAATCCTCCCGTCTTGGCCTCCCAAAGTGCTGGGGTTACAGGCGTGAGCCCCTGCACCCGGCCTCCTCTCCAACCTTAACTTCTCTAGGAACCTGGCTGGGCCTCGGCCTGGCTTACACTCTCACCTGGTGTCACTGCGACCGCCACAGCGGCCGGCGGGGGTGGGGGGGTCTGTGCTGGAAAGGAAGATGTGATCAGTGGCTGTTCCACCTGGGAGCCGGGAGCTGAGGGCTGCAGGGCTGGGCCACATTCCACCATCCCTAGCCAGGAGGACTTATTGAAAAGTGAGAGAGGAGGGCTGGACCCCCAGCAGTCTTTAGACCTGGGCCTGATGATGCAGAAGAGCAAGCTTGATCTCTGGGTGCAATAATTAAGGGTTTTTGTTTGTTTGTCTTGTTTTAGAGGCAGGGTTTTGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCATGATCCTAGCTCACTGCAGCCTCAAACTCCTGGGCTCCGGTGATCCTC", - 22213528); - - var codingRegion = new CodingRegion(22213728, 22215214, 1, 538, 538); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 4, 22213728, 22213827, 439, 538), - new TranscriptRegion(TranscriptRegionType.Intron, 3, 22213828, 22213912, 438, 439), - new TranscriptRegion(TranscriptRegionType.Exon, 3, 22213913, 22214167, 184, 438), - new TranscriptRegion(TranscriptRegionType.Intron, 2, 22214166, 22214430, 183, 184), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 22214431, 22214559, 55, 183), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 22214560, 22215160, 54, 55), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 22215161, 22215214, 1, 54) - }; - - const string expectedResults = - "NCAGCACAGACCCCCCCACCCCCGCCGGCCGCTGTGGCGGTCGCAGTGACACCAGTGCCCCAGTTCCCAAGAGCCTGCACGGAGGCCGAGTTTGCCTGCCACAGCTACAATGAGTGTGTGGCCCTGGAGTATCGCTGTGACCGGCGGCCCGACTGCAGGGACATGTCTGATGAGCTCAATTGTGAGGAGCCAGTCCTGGGTATCAGCCCCACATTCTCTCTCCTTGTGGAGACGACATCTTTACCGCCCCGGCCAGAGACAACCATCATGCGACAGCCACCAGTCACCCACGCTCCTCAGCCCCTGCTTCCCGGTTCCGTCAGGCCCCTGCCCTGTGGGCCCCAGGAGGCCGCATGCCGCAATGGGCACTGCATCCCCAGAGACTACCTCTGCGACGGACAGGAGGACTGCGAGGACGGCAGCGATGAGCTAGACTGTGGCCCCCCGCCACCCTGTGAGCCCAACGAGTTCCCCTGCGGGAATGGACATTGTGCCCTCAAGCTGTGGCGCTGCGATGGTGACTTTGACTGTGAGGACCG"; - var codingSequence = new CodingSequence(sequence, codingRegion, regions, true, 1, null); - var observedResults = codingSequence.Substring(0, expectedResults.Length); - - Assert.Equal(expectedResults, observedResults); - } - - [Fact] - public void Length_ReturnTrueLength_WhenGapsArePresent() - { - const int expectedResults = 720; - - var sequence = new NSequence(); - var codingRegion = new CodingRegion(10051, 12770, 51, 769, 720); - const byte startExonPhase = 1; - - int naiveCodingRegionLen = codingRegion.CdnaEnd - codingRegion.CdnaStart + 1; - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 10001, 10299, 1, 299), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 10300, 12300, 229, 331), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 12301, 12970, 331, 1000) - }; - - var codingSequence = new CodingSequence(sequence, codingRegion, regions, false, startExonPhase, null); - var observedResults = codingSequence.Length; - - Assert.Equal(expectedResults, observedResults); - Assert.NotEqual(expectedResults, naiveCodingRegionLen); - } - - [Fact] - public void RnaEdits_snv_forward_no_utr() - { - //NR_002754.2 - var genomicSeq = - new SimpleSequence( - "actctggtttctcttcaaatcgtataaatctttcgccttttactaaagatttccgtggagagaaacgagtgtgagtctgaaaccaattttttgaggccttgcgtttattagcagggctt", - 11968210); - var codingRegion = new CodingRegion(11968211, 11968329, 1, 119, 119); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 11968211, 11968329, 1, 119) - }; - - var rnaEdits = new IRnaEdit[] {new RnaEdit(107, 107, "t")}; - const byte startExonPhase = 0; - - var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits); - - Assert.Equal( - "actctggtttctcttcaaatcgtataaatctttcgccttttactaaagatttccgtggagagaaacgagtgtgagtctgaaaccaattttttgaggccttgcgttttttagcagggctt", - codingSequence.GetCodingSequence()); - } - - [Fact] - public void RnaEdits_snv_forward_with_utr() - { - //NM_001144032.2 chr1:148644011-148644795 - var genomicSeq = new SimpleSequence( - "ACTATAAAGACAGTGAAAAGATCAGTGGTTATCTTTGCAGACGCCACCATCGCTGTGAGCCCTGTACTATCAGCCATGGTCAACTCCGTCGTCTTTTTTGAAATCACCAGGGATGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGTGCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCGTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCACGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAATGAGTTTGACTTGTGTTTTATTTTCACCACCAGACCCATTCCTTCTGTAGCTCAGGAGAGCACCCCTCCACCACATTTGCTTGCAATATCCTAGAATCTTTGTGCTCTTGCTGCAGTTCCCTTTGGGTTCCATGTTTTCCTTGTTCCCTTCCATGCCTAGCTGGATGGCAGAGTTGAGTTAAGTTTATGATTATGAAATAAAAACTAAGTAACAA", - 148644011 - 1); - - var codingRegion = new CodingRegion(148644086, 148644580, 76, 570, 495); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 148644011, 148644795, 1, 785) - }; - - var rnaEdits = new IRnaEdit[] - { - new RnaEdit(420, 420, "C"), - new RnaEdit(500, 500, "T"), - new RnaEdit(737, 737, "T") - }; - const byte startExonPhase = 0; - - var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits); - - Assert.Equal( - "ATGGTCAACTCCGTCGTCTTTTTTGAAATCACCAGGGATGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGCGCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCGTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAA", - codingSequence.GetCodingSequence()); - } - - [Fact] - public void RnaEdits_snv_reverse_utr() - { - //NM_031947.3, chr5:140682196-140683630 - var genomicSeq = new SimpleSequence( - "TGCATGTACACACAAATGGCTTTATGCAAAGGCCCTGACAGAACGATATTTAGTTTTTCAGATTAGGTACATAGGGCCAACCAGCCCACCCTGTACATTCCAGCAAGTGCAAGAGCAGCAACTTTCCTATTTCAATACAATTATGGGCAGAAATTATATGATGTAAAATAGAGGCCCTTCCATAAAGTTAAGATTTAGGGTAGAAGAAGGGAAGATAAAACCAAAATTCCCATGAAGTCAAAATTAGACAGTGGTCTTGTACTCTGCTGAACCCTGTGATGAACTGTAGTCCTCAAACTCATGGACTCGGATCCAGGTTCACCAAGACACTTCAGTATGCTTCCAACTGTTTCATCATCATCTTCCTGCTGTATTCGTAGGCCACAAACAGTGCCCCATTGGCAGGGATTGCTCGAATCATAGTAGCTTTCAGTCCAGAATATAAGGCTACTATTCCTTCATTTCTCACAACACTTAAGAGGGTACCAATAAATCCTGCCTGTTTCCCATACATGGAAAGAACTTGAATTCTGGATTTAATACAATCCACTGGGAACACGACAAGCCACAGGCAAATTCCAGCAACTCCACCACTTAACATCAAATGGACAGGGCCTAGTTCATCTTTTGATCTCCCTGACGCAAAAAACGATCGGCTCAGTTCATAGCCACCAAAGAAAAAGAAATAACCCGGTACTTCTTGAAGTAGAGTACTCGAGAGTCCATGGTAGAAGCCCAAGGGGCCATCCTTTTTAAGGATACCCTTCACGACAGACCAAATTGTATTATGGCTTTTTGCTATCTTCCCTGACATCTCCATTTCATACATGGTCTGTAGCCGGCACTTCACAAGCTCAGTGGGGCAGAGAGCCAGTGCAGCAAATGCAGAGGCGAAGGACCCCGCGGCTGCAGTCTGGAGATCACTCAGCTTTGCCTGCTTGTCCATTCCAGCCACTTTCCTGACAAACTGCTGGCAGAACCCGTAGCACATGAAGAGGACCGAGTTTTCGGCGACGTAGGCCATAAGTGCCGGGCCGGTGCCCTTGTAGAAGCCCCGGAGACCCACTTGGGCGTATGTCTTCAGGAAGCAGTCGGTGAGGCCCTTGTACAGGTCAGGGAACGTCTGCATCTTCACTTTTATTGTGTCGAAGGGCTGCCCAGTCAGTACACACGCTGTCCCCCCTGCGGCCCCCGCTGTGAGGTCGATGGCGGCTTGGATGCCAGGACCGGACTTCATGTTCGCTCACTCGTCTGAGGGTCCCAGTGGAAGGCGACTAACTCCCCAGAGCGTGAGACCGGCTTTTCACGTCCAGCCGCAGCGAGCGCGGGGAATGGAGTTGGGGGTGGTGGGGTGGCTCTACCGCCTGTTCTGGGCTCTCACCCCAGTGCGGGGGAAGCCGCTCAACCCTACGCTCCGCCGCGGGCCGCCCCCTCC", - 140682196 - 1); - - var codingRegion = new CodingRegion(140682527, 140683432, 199, 1104, 906); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 140682196, 140683630, 1, 1435) - }; - - var rnaEdits = new IRnaEdit[] - { - new RnaEdit(366, 366, "T") - }; - - const byte startExonPhase = 0; - - var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits); - - Assert.Equal( - "ATGAAGTCCGGTCCTGGCATCCAAGCCGCCATCGACCTCACAGCGGGGGCCGCAGGGGGGACAGCGTGTGTACTGACTGGGCAGCCCTTCGACACAATAAAAGTGAAGATGCAGACGTTCCCTGACCTGTACAAGGGCCTCACCGACTGCTTCCTGAAGACATACGCTCAAGTGGGTCTCCGGGGCTTCTACAAGGGCACCGGCCCGGCACTTATGGCCTACGTCGCCGAAAACTCGGTCCTCTTCATGTGCTACGGGTTCTGCCAGCAGTTTGTCAGGAAAGTGGCTGGAATGGACAAGCAGGCAAAGCTGAGTGATCTCCAGACTGCAGCCGCGGGGTCCTTCGCCTCTGCATTTGCTGCACTGGCTCTCTGCCCCACTGAGCTTGTGAAGTGCCGGCTACAGACCATGTATGAAATGGAGATGTCAGGGAAGATAGCAAAAAGCCATAATACAATTTGGTCTGTCGTGAAGGGTATCCTTAAAAAGGATGGCCCCTTGGGCTTCTACCATGGACTCTCGAGTACTCTACTTCAAGAAGTACCGGGTTATTTCTTTTTCTTTGGTGGCTATGAACTGAGCCGATCGTTTTTTGCGTCAGGGAGATCAAAAGATGAACTAGGCCCTGTCCATTTGATGTTAAGTGGTGGAGTTGCTGGAATTTGCCTGTGGCTTGTCGTGTTCCCAGTGGATTGTATTAAATCCAGAATTCAAGTTCTTTCCATGTATGGGAAACAGGCAGGATTTATTGGTACCCTCTTAAGTGTTGTGAGAAATGAAGGAATAGTAGCCTTATATTCTGGACTGAAAGCTACTATGATTCGAGCAATCCCTGCCAATGGGGCACTGTTTGTGGCCTACGAATACAGCAGGAAGATGATGATGAAACAGTTGGAAGCATACTGA", - codingSequence.GetCodingSequence()); - } - - [Fact] - public void RnaEdits_insertion_in_utr() - { - //NM_080431.4, chrom: chr1:2938046-2939467 - var genomicSeq = new SimpleSequence( - "TGGAAGAGGCCTCAGCAGGCCCAGGCCACCTGGAGGGAGAGCAGACCTGCGGCTGAGGATGCAGGGCTCCCGGGCACGGTGCTAGCCCTGCCTTGAGACACCCCGAGAGCTGTGGGAAGAGCTGTGGGATCCCCTATTGCATCACAAAGCGGCCCTGGAGGGCTGGTCTTTATTTTGATGAGGCTGAGAAGGGAAGGCTGCGGGCATGTTTAATCCGCACGCTTTAGACTCCCCGGCTGTGATTTTTGACAATGGCTCGGGGTTCTGCAAAGCGGGCCTGTCTGGGGAGTTTGGACCCCGGCACATGGTCAGCTCCATCGTGGGGCACCTGAAATTCCAGGCTCCCTCAGCAGAGGCCAACCAGAAGAAGTACTTTGTGGGGGAGGAGGCCCTGTACAAGCAGGAGGCCCTGCAGCTGCACTCCCCTTTCGAGCGTGGCCTGATCACAGGGTGGGATGACGTGGAGAGACTCTGGAAGCACCTCTTTGAGTGGGAGCTAGGCGTGAAACCCAGCGACCAGCCCCTGCTTGCAACGGAGCCCTCCCTGAACCCCAGGGAGAACCGTGAGAAGATGGCAGAAGTCATGTTCGAGAACTTCGGCGTGCCCGCTTTCTACCTGTCGGACCAGGCGGTGCTGGCTCTCTACGCCTCTGCCTGTGTCACGGGCCTGGTGGTGGACAGCGGGGATGCGGTCACCTGCACTGTCCCCATCTTTGAGGGTTACTCCCTGCCCCACGCAGTCACCAAGCTCCACGTGGCGGGCAGGGACATCACGGAGCTCCTCATGCAGCTGCTCCTGGCCAGCGGCCACACCTTCCCCTGCCAGCTGGACAAGGGTCTCGTGGACGACATCAAAAAGAAGCTGTGCTACGTGGCCTTGGAGCCCGAGAAGGAGCTTTCCCGGAGGCCGGAGGAGGTCCTGAGGGAGTACAAGCTGCCCGACGGGAACATCATCAGCCTCGGGGACCCGCTGCACCAGGCGCCCGAGGCCCTGTTCGTGCCCCAGCAGCTGGGCAGCCAGAGCCCCGGGCTCTCGAATATGGTCTCCAGCAGCATCACCAAGTGTGATACCGACATCCAGAAGATCCTCTTTGGGGAGATTGTGCTGTCGGGGGGCACTACCCTGTTCCACGGGCTGGATGACCGGCTTCTCAAGGAGCTGGAGCAGCTGGCCTCCAAGGACACCCCCATCAAGATCACGGCTCCCCCCGACCGGTGGTTCTCCACCTGGATTGGAGCCTCCATCGTCACCTCTCTGAGTAGCTTCAAGCAGATGTGGGTCACCGCCGCAGACTTCAAGGAGTTTGGGACCTCCGTGGTGCAGAGAAGATGCTTCTGAAGGCCGCTTCTCGTTGGGTACCGTGGGGGGTGAACCCTAGCCCCAGCTTTGGGAGGATGTTCAATAAAGGACCAATGCCGGAA", - 2938046 - 1); - var codingRegion = new CodingRegion(2938251, 2939384, 206, 1339, 1134); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 2938046, 2939467, 1, 1422) - }; - - var rnaEdits = new IRnaEdit[] {new RnaEdit(1423, 1422, "AAAAAAAAAAAAAAA")}; - const byte startExonPhase = 0; - - var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits); - - var expectedCodingSeq = - "ATGTTTAATCCGCACGCTTTAGACTCCCCGGCTGTGATTTTTGACAATGGCTCGGGGTTCTGCAAAGCGGGCCTGTCTGGGGAGTTTGGACCCCGGCACATGGTCAGCTCCATCGTGGGGCACCTGAAATTCCAGGCTCCCTCAGCAGAGGCCAACCAGAAGAAGTACTTTGTGGGGGAGGAGGCCCTGTACAAGCAGGAGGCCCTGCAGCTGCACTCCCCTTTCGAGCGTGGCCTGATCACAGGGTGGGATGACGTGGAGAGACTCTGGAAGCACCTCTTTGAGTGGGAGCTAGGCGTGAAACCCAGCGACCAGCCCCTGCTTGCAACGGAGCCCTCCCTGAACCCCAGGGAGAACCGTGAGAAGATGGCAGAAGTCATGTTCGAGAACTTCGGCGTGCCCGCTTTCTACCTGTCGGACCAGGCGGTGCTGGCTCTCTACGCCTCTGCCTGTGTCACGGGCCTGGTGGTGGACAGCGGGGATGCGGTCACCTGCACTGTCCCCATCTTTGAGGGTTACTCCCTGCCCCACGCAGTCACCAAGCTCCACGTGGCGGGCAGGGACATCACGGAGCTCCTCATGCAGCTGCTCCTGGCCAGCGGCCACACCTTCCCCTGCCAGCTGGACAAGGGTCTCGTGGACGACATCAAAAAGAAGCTGTGCTACGTGGCCTTGGAGCCCGAGAAGGAGCTTTCCCGGAGGCCGGAGGAGGTCCTGAGGGAGTACAAGCTGCCCGACGGGAACATCATCAGCCTCGGGGACCCGCTGCACCAGGCGCCCGAGGCCCTGTTCGTGCCCCAGCAGCTGGGCAGCCAGAGCCCCGGGCTCTCGAATATGGTCTCCAGCAGCATCACCAAGTGTGATACCGACATCCAGAAGATCCTCTTTGGGGAGATTGTGCTGTCGGGGGGCACTACCCTGTTCCACGGGCTGGATGACCGGCTTCTCAAGGAGCTGGAGCAGCTGGCCTCCAAGGACACCCCCATCAAGATCACGGCTCCCCCCGACCGGTGGTTCTCCACCTGGATTGGAGCCTCCATCGTCACCTCTCTGAGTAGCTTCAAGCAGATGTGGGTCACCGCCGCAGACTTCAAGGAGTTTGGGACCTCCGTGGTGCAGAGAAGATGCTTCTGA"; - - Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); - } - - [Fact] - public void RnaEdits_insertion_reverse_in_utr() - { - //NM_001242659.1, chrom: chr1:1533388-1535476 - var genomicSeq = new SimpleSequence( - "TCTGTTGGTCTGAGAATGATGGACATTTAGACACTGGCGCCAGGTTTGCGCCTGACCGGCGCCACGCAGGGGTGGGCGGAGCAAAGACACACAGGTGGGCTACAGGTGTCACACGGCACCAGCCAGGGCCCGGGGTGGCTGGGGTGAGGATGGGTGTTTGGCCAGTGACCAGGAGTCAGGTCAAGTCCAGGTGGTCAGTGCCAGGGGCTCCAGGAGGGGAGGGCAGTGCCATAACCCTCCTGGTGTCCAGCGTCACCAGGCGGTCGTCACAGAAAGCAACCTCGGCCCGGGGCCCGGGTCTGCAGCAGGTGGGCAGGGTCAGCTTTTCTTCCATGGCGGGTGGCATTGTCTGGGCCGGATACTGGCTCTCGACCCCTGGGCATGCAAAGGCTAGGGGTGGTGCTGTCAGTCACACCGTTGCCACCAAGGTCCCCTGGGTCGGCTGAGGCTTGGGATCCAGGCAGCGGTGGAAGGTCGGGCTGCTCAAGGCCGGTATCTAAGCTTCTGCCCTGGGACCCAGTGGTGATGGCCGCCATCTGCCCCATTCCCACAGGGACCTAGTCAGAGGTCGCACACACAAAAGGGGTACCTGGCCCTGGAGAACCACCAGCTGCCCGGGGTCTGAGAACACTCACCCTGGCCGCTGGGCCAGGCCTGCCAGGCTCCCGGCTGGTCCAACACCCTAAACGGTACAGAGCGCTGCAGGCCCTCACCTCATGCTTCTGCAGCGCTTGAGGGTGAAGGTGTCTCCAAGGGGACGCTGGCCAGATGCATGGAGAGGCCGGCCAATCTTAGGGCCACAGACCCCCCCTGGACAGCAGAGGGTGTTGGCTCCTGCAGTGGGCCCGAGATACTAAGGCACGAAGCTAACCTAAGCCCAGTGGGGTGGGGCGGGGCAGGACAGGCTTGGAGAGCTGCGCCCCAGGCCTGCAAAGCACAGTGACCGAGCAATGGCGACGGTCTGTCTGGGACAATTCGGCACAGGATGGAGGTGCGGGGTGAGCCGGAGTGCCCATGGCTCTTGCTGGAAGGGGCTCCATGCCCTGGCCGCCTCTATAAAGGCCTGCGGAGAGCGGGGAGAGCCCTGGATGCGGCTGGCACAGCAGCGCAAGCCCAGGGGCCAATCCGGGGCCAGAGTCTGGGAGTCTGACGCCCGGCTGGAAAGGGCGTGTGATGATGCCAAAGTGCCGGAGCCGTCGCCGGCAGGTCCTCCTCCGCGGGGATCTTAAGGAGGCAGCAGGAATGAGGAGAGGAGAGCGGGCGGAGGACCTGGGAGCTCAGGCGCCCTCAGGCAGGTGGCGCAAAGATGGGCGGGCGGCCTCGCGCTTCAGGGGTGTCTGCGCAGGCCGGGGCGCGCGAGGGCCGGGCGCATGAGGTTCTCGGTGATGTAGGCCACCAGCAGGCAGATGACCACCAGCATGACGCAGATGGAGCCGCCCACCGCCGTCATGGCCACCACGATGTCCTGCATGCCGGCCGGCTCGGCGGTGAACTCCACGCACTCGGCCGGCTCGGGGGTCTCTGGCGCGGCGGCGGCGGGCCCAGCGCGCAGCGGCAGCGGCTGCAGGCACAGGCGGTAGAGGACGCTGTCGTGCACGTCGGGCAGCAGGTAGTCGCGGCAGGAGGCCCCGAGGAGCACGCGCTCGCACGGGAAGCGCGTGTAGGCGCCGCGCCACGAGCAGTTGAGCGCGAAGGCGCGCACGCGGCGCGCGGCGGCCGGGGCCAGGCGCCACTGCAGGAGGACGCTGCGGTTGCGCAGGACGCTGGCGCGCAGGGAGCGGCCGGCCGGGGCGTGCAGCACGCAGCCCGGAGCCTGGCAGCGGAAGCCGCGCGCGGGGCTGCGGAAGCACAGGCGCCCGCCGCCCGCCTCGGGGCCCTCGGGCAGCACCTTGTAGGGGCACCAGGGCGCGTCGGGGGTCGGCTCCCAGCCCGGCGGCGTCGGGGCGGCCGCGGCGCAGGGCGGCGGCGCGCAGGCGGCCAGCAGCAGCAGCAGCGGCGGGGCGCGCATCCTGCGGCGGGGCCACGGGGCGCGGCGCTGGGTCACGCGGGCCGCGCCGCCGCCGTCCCCGCTGCCCGCTCCCCGCGATCC", - 1533387); - var codingRegion = new CodingRegion(1534715, 1535395, 82, 762, 681); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 1533388, 1535476, 1, 2089) - }; - - var rnaEdits = new IRnaEdit[] {new RnaEdit(2090, 2089, "AAAAAAAAAAAAAAA")}; - const byte startExonPhase = 0; - - var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits); - - var expectedCodingSeq = - "ATGCGCGCCCCGCCGCTGCTGCTGCTGCTGGCCGCCTGCGCGCCGCCGCCCTGCGCCGCGGCCGCCCCGACGCCGCCGGGCTGGGAGCCGACCCCCGACGCGCCCTGGTGCCCCTACAAGGTGCTGCCCGAGGGCCCCGAGGCGGGCGGCGGGCGCCTGTGCTTCCGCAGCCCCGCGCGCGGCTTCCGCTGCCAGGCTCCGGGCTGCGTGCTGCACGCCCCGGCCGGCCGCTCCCTGCGCGCCAGCGTCCTGCGCAACCGCAGCGTCCTCCTGCAGTGGCGCCTGGCCCCGGCCGCCGCGCGCCGCGTGCGCGCCTTCGCGCTCAACTGCTCGTGGCGCGGCGCCTACACGCGCTTCCCGTGCGAGCGCGTGCTCCTCGGGGCCTCCTGCCGCGACTACCTGCTGCCCGACGTGCACGACAGCGTCCTCTACCGCCTGTGCCTGCAGCCGCTGCCGCTGCGCGCTGGGCCCGCCGCCGCCGCGCCAGAGACCCCCGAGCCGGCCGAGTGCGTGGAGTTCACCGCCGAGCCGGCCGGCATGCAGGACATCGTGGTGGCCATGACGGCGGTGGGCGGCTCCATCTGCGTCATGCTGGTGGTCATCTGCCTGCTGGTGGCCTACATCACCGAGAACCTCATGCGCCCGGCCCTCGCGCGCCCCGGCCTGCGCAGACACCCCTGA"; - - Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); - } - - [Fact] - public void GetCodingSequence_InsertionGeneModel_InsertionRnaEdit() - { - // NM_019119.4, chr5:140566701-140571111 - var genomicSeq = new SimpleSequence( - "AGAATGCTACGGAAGTCCTTGACAAAAAGGAAACACTGAGACAGATGGGCTGAGAAGAAGAGCTGTCGAGTCCCTGATTGGGAAAGGAAAAATTAAAAACCCTAGATCTCTGGTACACATAAGTCTGGGTTTGCGATTGCTATTTGTGCTGGGGCAGTGTGATTGAGACTGACATTGAGGAAAGAAGCAGCTATGAAGACCAGGGGGTTCAGCTTTCCAAGACAAAGGCAAGTCCTGTTTCTTTTTCTTTTCTGGGGAGTGTCCTTGGCAGGTTCTGGGTTTGGACGTTATTCGGTGACTGAGGAAACAGAGAAAGGATCCTTTGTGGTCAATCTGGCAAAGGATCTGGGACTAGCAGAGGGGGAGCTGGCTGCAAGGGGAACCAGGGTGGTTTCCGATGATAACAAACAATACCTGCTCCTGGATTCACATACCGGGAATTTGCTCACAAATGAGAAACTGGACCGAGAGAAGCTGTGTGGCCCTAAAGAGCCCTGTATGCTGTATTTCCAAATTTTAATGGATGATCCCTTTCAGATTTACCGGGCTGAGCTGAGAGTCAGGGATATAAATGATCACTCGCCAGTGTTTCGGCACAAAGAGATGGTCTTAAAAATATCAGAAAATACAGCTGAAGGGACAGCATTTAGACTAGAAAGAGCACAGGATCCAGATGAAGGTCATAACAGTATCCAAAACTACACGATCAGCTCCAACTCTTTTTTCCATATTAAAATTAGTGGCAGTGATGAAGGCATGATATATCCAGAGCTAGTGTTGGACAAAGCACTGGATCGGGAGGAGCAGGAAGAGCTCAGCTTAACCCTCACAGCGCTGGATGGTGGGTCTCCATCCAGGTCTGGGACCTCCACTATACGCATTGTGGTCTTGGATGTCAATGACAATGCCCCACAGTTTGCCCAGGCTCTGTATGAGACCCAGGCTCCAGAAAACAGTCCAGTAGGGTCCCTTATTGTTAAAGTGTCTGCAGGAGATGCAGACTCAGGAGTCAATGCAGAAGTATCCTATTCATTTTTTGATGCTTCTGAAGATATTTTAACAACGTTTCAAATCAATCCTTTTTCTGGGGAAATCTTTCTCAGAGAATTGCTTGATTATGAGTTAGTAAATTCTTACAAAATAAATATACAGGCAATGGACGGCGGAGGCCTTTCTGCAAGATGTACAGTTTTGATAAAAGTATTAGATTCCAATGACAATCCTCCTGAACTGATCATATCATCACTTTCCAACTCTGTTGCTGAAAACTCTCCTGGGATAGTATTGGCTGTTTTTAAGATTAAAGACAGAGACTCCGGAGAAAATGGAAAGACATTTGCTATGTTCAAGATAATCTGCCTTTTTTTCTGAAACCGTCTGTTGACAATTTTTACATCCTAATGACTGAAGGTGCACTGGACAGAGAGAGCAAAGCTGAGTACAACATCACCATCACCGTCACTGACTTGGGGACACCCAGGCTGAAAACCGAGCACAGCATAACCCTGCAGGTCTCCGACGTCAATGACAACGCCCCCGCCTTCACCCAAACCTCCTACACCCTGTTCGTCCGGGAGAACAACAGCCCCGCCCTGCACATCGGCAGTGTCAGCGCCACAGACAGAGACTCAGGCACCAACGCCCAGGTCACCTACTCGCTGCTGCCGCCCCAGGACCCACACCTGCCCCTCGCCTCCCTGGTCTCCATCAACGCGGACAATGGCCACCTGTTTGCCCTCAGGTCGCTGGACTACGAGGCCCTGCAGGCTTTCGACTTCCGCGTGGGCGCCTCAGACCGCGGCTCCCCGGCTTTGAGCAGCGAGGCGCTGGTGCGCGTACTGGTGCTGGACGCCAACGACAACTCGCCCTTCGTGCTGTACCCGCTGCAGAACGGCTCCGCGCCCTGCACCGAGCTGGTGCCCCGGGCGGCCGAGCCGGGCTACCTGGTGACCAAGGTGGTGGCGGTGGACGGCGACTCGGGCCAGAACGCCTGGCTGTCGTACCAGCTGCTCAAGGCCACGGAGCCCGGGCTGTTCGGTGTGTGGGCGCACAATGGGGAGGTGCGCACCGCCAGGCTGCTGAGCGAGCGCGACGCAGCCAAGCACAGGCTGGTGGTGCTTGTCAAGGACAATGGCGAGCCTCCTCGCTCGGCCACCGCCACGCTGCACGTGCTCCTGGTGGACGGCTTCTCCCAGCCCTACCTGCCTCTCCCGGAGGCGGCCCCGGCCCAGGCCCAGGCCGACTTGCTCACCGTCTACCTGGTGGTGGCGTTGGCCTCGGTGTCTTCGCTCTTCCTCCTCTCGGTGCTCCTGTTCGTGGCGGTGCGGCTGTGCAGGAGGAGCAGGGCGGCCTCGGTGGGTCGCTGCTCGGTGCCCGAGGGTCCTTTTCCAGGGCATCTGGTGGACGTGAGCGGCACCGGGACCCTGTTCCAGAGCTACCAGTACGAGGTGTGTCTGACTGGAGGTTCAGAGACCGGCGAGTTCAAGTTCTTGAAGCCGATTACCCCCCACCTCCCGCCCCATAGGGGTGGGAAAGAAATAGAGGAAAATTCTACTCTCCCCAATAGCTTTGGATTTAATTATTGAAAGGAACCCACTTAATAAAGACATTTACTTCTTTAATATATTCTTGTTGGCTAACTAAATTGTGTATGCCCACCACAAAGAAGGTACTATTTTTTGTTTGATTCATCTTCAACTTTGCGTATTATGCTTAACTTCACAAGTTAACTTTTTCTTATTTTGTATCCTGATGAGGCATTTCTTACTAGAATCCCATAAGTGAAATATAATATTTTTCAAAGTTGATATCATTTAAAAATTTTTGGTCGTTTTAAATGTCTTTATTGACTTTAAATTCATTGCCTCTACATTATTCATTAGTTCTTCTTTTCCTAAAACTTTTTACTTGTTAAAATAGTCTGCTGCATGTAATATGTGCTTTTACTATTTGATATTTCTTCTATTTTTCTTTTGAAACCGGTGTTCTTATTGGTTTGCCATCCTTGTTCATTACAACTGTTTTTTGTTTGTTTGTTTGTTTTTTGGTTTGTTTGTTTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCAGCTCACTGCAACCTCCGCCTCCCAGGTTCAAGCGATTCTCCTGCCTCAGCCTCCAGAGTATCTGGGACTACAGTTGCATGTCACCACGTTCGGCTAATTTTTGTATTTTCAGTAGAGACGGGTTTCATCATGGTGGCCAGGATGGTCTATCTCTTGACCTCGTGATCCACCCCACTCAGCCTCCCAAATTGCTGGGATTTACAGGCATGAGCCACCGCACCCAGCCTACAATAATTTTCTTAAACTTTACCTTTTATTTTAAAGTTCTAGTTTCCCGGCATTGATAGTTCCCTATTTGAAATATAATGTTTCTCTTGTAAGTGATATGATAAATAAACCCCTAATTAGCCTTAGAAGAAAAACCACTGCAAGATATTAAGCGTGTGTAAATGGGCTTTAGTCTGGAAACCAAAAAAAAAAAAAAAATTTAGTCATTCTATAGGATCATGTGAAAATATTTAATTTGCTCCTTTTAATTCTGTATAAACAAATCAGAGGTTCCTGAGGTTCCTGTTAAATTTTTAATGGCTAATAGCCCAGTGCCATCCAGTTGAAAAAACAACAGCAATCACAAAGTAGAGGTTTATATTGTGCGGCTTTTATATTCAGCTATTAGAGTGTTATTGGTAGTGTCTAGCCTTTTCCTCCACGACATTCCTTGACTTAATCCATTTGGGCCTATTATAGACAAAATAGAGCTTCTTTCTAGATATAAGGTCTTTGAGGCAGGGCTCAGTGGCTCATTCCTGTAATCCCAGCACTTTGGGAGGCCAAGGCGGGCAGATCACCTTAGGTCACGAGTTTGAGACCAGCCTGACCAACGTTAAGTAACCCCGTCTTTACTAAAAATACAAAATTAGCCAGGCATGGTGGCACATGCTTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCAGGAGGTGGAAGTTGCTTTGAGCCGAGATTGCACCATTGTACTCCAGCCTGGGCAATAAGAGCAAAACTCCATCAAAATAAAATAAAATAAAATATAAAATAACTTAAAAAGAACTTTGAATAAAATTCTATGAAAAAAGACACTAGAATGCTGTTCTTAATTTTAATAGTGTTAAGATAGGTGTTAGTGTGGTCTGTTCTTTACCTCCCTTTATTTGGTGCAGAGAAGTTAGATCCTGCTAAATTTCAATTAAGAGGGGACCTTAAAATAAGGATCAATCTCTTATTTAACCCTGTAAGTTACTTTAAAGCTAATACAAGAAAAACAAAGACAAGTGAAAGTAAGGAAACAGAAATTGC", - 140566701 - 1); - var codingRegion = new CodingRegion(140566893, 140569285, 193, 2586, 2394); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 140566701, 140568035, 1, 1335), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 140568036, 140571111, 1337, 4412) - }; - - var rnaEdits = new IRnaEdit[] - { - new RnaEdit(908, 908, "T"), - new RnaEdit(1336, 1335, "A"), - new RnaEdit(2096, 2096, "G") - }; - - const byte startExonPhase = 0; - var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits); - - const string expectedCodingSeq = "ATGAAGACCAGGGGGTTCAGCTTTCCAAGACAAAGGCAAGTCCTGTTTCTTTTTCTTTTCTGGGGAGTGTCCTTGGCAGGTTCTGGGTTTGGACGTTATTCGGTGACTGAGGAAACAGAGAAAGGATCCTTTGTGGTCAATCTGGCAAAGGATCTGGGACTAGCAGAGGGGGAGCTGGCTGCAAGGGGAACCAGGGTGGTTTCCGATGATAACAAACAATACCTGCTCCTGGATTCACATACCGGGAATTTGCTCACAAATGAGAAACTGGACCGAGAGAAGCTGTGTGGCCCTAAAGAGCCCTGTATGCTGTATTTCCAAATTTTAATGGATGATCCCTTTCAGATTTACCGGGCTGAGCTGAGAGTCAGGGATATAAATGATCACTCGCCAGTGTTTCGGCACAAAGAGATGGTCTTAAAAATATCAGAAAATACAGCTGAAGGGACAGCATTTAGACTAGAAAGAGCACAGGATCCAGATGAAGGTCATAACAGTATCCAAAACTACACGATCAGCTCCAACTCTTTTTTCCATATTAAAATTAGTGGCAGTGATGAAGGCATGATATATCCAGAGCTAGTGTTGGACAAAGCACTGGATCGGGAGGAGCAGGAAGAGCTCAGCTTAACCCTCACAGCGCTGGATGGTGGGTCTCCATCCAGGTCTGGGACCTCCACTATACGCATTGTGGTCTTGGATGTCAATGACAATGTCCCACAGTTTGCCCAGGCTCTGTATGAGACCCAGGCTCCAGAAAACAGTCCAGTAGGGTCCCTTATTGTTAAAGTGTCTGCAGGAGATGCAGACTCAGGAGTCAATGCAGAAGTATCCTATTCATTTTTTGATGCTTCTGAAGATATTTTAACAACGTTTCAAATCAATCCTTTTTCTGGGGAAATCTTTCTCAGAGAATTGCTTGATTATGAGTTAGTAAATTCTTACAAAATAAATATACAGGCAATGGACGGCGGAGGCCTTTCTGCAAGATGTACAGTTTTGATAAAAGTATTAGATTCCAATGACAATCCTCCTGAACTGATCATATCATCACTTTCCAACTCTGTTGCTGAAAACTCTCCTGGGATAGTATTGGCTGTTTTTAAGATTAAAGACAGAGACTCCGGAGAAAATGGAAAGACAATTTGCTATGTTCAAGATAATCTGCCTTTTTTTCTGAAACCGTCTGTTGACAATTTTTACATCCTAATGACTGAAGGTGCACTGGACAGAGAGAGCAAAGCTGAGTACAACATCACCATCACCGTCACTGACTTGGGGACACCCAGGCTGAAAACCGAGCACAGCATAACCCTGCAGGTCTCCGACGTCAATGACAACGCCCCCGCCTTCACCCAAACCTCCTACACCCTGTTCGTCCGGGAGAACAACAGCCCCGCCCTGCACATCGGCAGTGTCAGCGCCACAGACAGAGACTCAGGCACCAACGCCCAGGTCACCTACTCGCTGCTGCCGCCCCAGGACCCACACCTGCCCCTCGCCTCCCTGGTCTCCATCAACGCGGACAATGGCCACCTGTTTGCCCTCAGGTCGCTGGACTACGAGGCCCTGCAGGCTTTCGACTTCCGCGTGGGCGCCTCAGACCGCGGCTCCCCGGCTTTGAGCAGCGAGGCGCTGGTGCGCGTACTGGTGCTGGACGCCAACGACAACTCGCCCTTCGTGCTGTACCCGCTGCAGAACGGCTCCGCGCCCTGCACCGAGCTGGTGCCCCGGGCGGCCGAGCCGGGCTACCTGGTGACCAAGGTGGTGGCGGTGGACGGCGACTCGGGCCAGAACGCCTGGCTGTCGTACCAGCTGCTCAAGGCCACGGAGCCCGGGCTGTTCGGTGTGTGGGCGCACAATGGGGAGGTGCGCACCGCCAGGCTGCTGAGCGAGCGCGACGCGGCCAAGCACAGGCTGGTGGTGCTTGTCAAGGACAATGGCGAGCCTCCTCGCTCGGCCACCGCCACGCTGCACGTGCTCCTGGTGGACGGCTTCTCCCAGCCCTACCTGCCTCTCCCGGAGGCGGCCCCGGCCCAGGCCCAGGCCGACTTGCTCACCGTCTACCTGGTGGTGGCGTTGGCCTCGGTGTCTTCGCTCTTCCTCCTCTCGGTGCTCCTGTTCGTGGCGGTGCGGCTGTGCAGGAGGAGCAGGGCGGCCTCGGTGGGTCGCTGCTCGGTGCCCGAGGGTCCTTTTCCAGGGCATCTGGTGGACGTGAGCGGCACCGGGACCCTGTTCCAGAGCTACCAGTACGAGGTGTGTCTGACTGGAGGTTCAGAGACCGGCGAGTTCAAGTTCTTGAAGCCGATTACCCCCCACCTCCCGCCCCATAGGGGTGGGAAAGAAATAGAGGAAAATTCTACTCTCCCCAATAGCTTTGGATTTAATTATTGA"; - - Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); - } - - [Fact] - public void RnaEdits_in_coding_sequence_reverse_insertion() - { - //NM_000682.6, chrom: chr2:96778623-96781984 - var genomicSeq = new SimpleSequence( - "CTTATTACAAAATATCCTTTATTGATAAAATAGCTCAGAGTTTAAAAAAAAAAAAAACACCACCTGCATGTCGCAATAAGAGGTCACAGGCAAGAACACTGGGGGTCCCATGGGGCGCACACAAGACCGGCCAGCAGAGGGTCACAGTCAGTCCCTCTCCTGGCCCAGCTCCCCACCACATCCCAGGGCGATACTCTGGCCTCAACAACCCACTGAGGACCAAGCTGGGAAGCCTCCCACACCCCAGGAAGGACTCTTTTTGGTCCCCTCCATTCTCTCTACACCCAGAAAACTCCCTCGGTGCCCTTCCAAATCTAGCAGGTCCATCTGGCCCATTCCCCCGACACCTGCCAAGCTAAGATGCCTACTGGCCCAATGTTGAAGCCAGGCCCTCTCCAAGGGAAGGCCGATAAACCTCCTTTCCACACTTCCAACTGTTCTGGGTGCCAGGTTTTGGGGTGGGACTGAGAACCAGGAAGCAGGGGTCCTCAATGCACAGCCCCATCAGCATTGCGGGGAGCAGCGTGGCTGGGTCCGAGGCAGTCCACAAGCACCCACCTGGGGGGATCAGTTGTGGTTCACAAGGACTCATTTGGGGCTTGGAGACCTGGCCGGGCACTCCAGTGGGAGGCTCCCCTAGGGGCGCACCAGGCTCTGATGCCAGTACCCCACCTGGGGGCGCTGCCACCTGTCACAGGCTCTCATCTTAGACTGTTGCCGAGGTGTGGATATTTTGAGCTGTCTTGGGGAGACAATTTGCCTCCTTGATGACAAAAGACTTATCCCCCACTGGGGAGACCCAAGCCACTAAAAACCCTCTTGGTGTTGCCGGTGAAATGTCGAAACGTTGTCATGTAGCGTAATAACTCAGACCTTTGCAGCCAGAAGAACACATTCTCAAAGAGATCCTTTAACTTGAAATAGTGATTCTGTCTGCCACTCCCGGCTTCCAGTTCGGGGTAGGAATTCACACACCCCAGGGACAGAACAAAAGTCTACAGGAAGACAGGTGGTGGTAAACACAGAGGAAAGGGATTTTTATATCACCATATAATCACATTTTTGGTTCTCTAGTGTGTTCCCCCACAGAGCTCAAAGCTTTCTGCAAAGCCTTTCATCTCCCTGCAGCAAGTAGGCAGTGAGCTATTGTCGCCCCGATTTTTGCAGGGGGTGAATGCCAGTGATCGGGGATCTCCCGTCGAGGCAGAGACCAGGCCTCCAAGACCGCCCCAGCGAGGCATCCACGTGGCCACCCACCTACCGGAGGGGTGCTGGGTAAGGAAGCCGATCCATTGTTCTGGCTTTCAAAGGAACCACAGATCCGAAAACAGGCAAAGGGGGAAAGGAGGGCCCAGAGACGATGCCACCCCATAAGCCCCCATCCCAGCGCCTGCCAGGGACCGCGAGTGCCTAGCGTGGGTGATCAGTCTTCGTTTCTTCCTCCCCCTCAGCAGCAGGCCCCACTGGGAAAAGTGGAAGGCTGGCTCCGTGCTCTTTGTGGGTGGGGGGGAGATGAAAAAGAAACGAAAACACCACAAGCAAGTGACCTGCCAGGAACACAAGGTCCTCAAGAAAGGGAAGCCCAGACATTGGTCTGGAGAGCATGGGGCTCTGGGAAGAAAGTGCTCTCTCTTCTCCTGGTCTTGGCTATGTTCCAGAGGATTTGAACCACCTCCATCGGCCTGTGCTCAGGGAGAGGGTGGAGAAGGGGTCCCCCACAGCTAAGCCGGCAAGGGGAAGCTTCACTGGGACCCTTGCTAGCAGCCCCCCTGCCCACCCCTCCCAAGGGGTTCCTAAGATGAGGCCTACAGGATCTGGGCAGGGAGCAGAAAGCCCAGGGGAGGCAGCCACACACAGCAGGGCAAGAAGCAGGGTGACCCCGGCGCCACCGCACCAACCCCACAGGGGCAGCGCAGGCGGGCTCACCAGGCCGTCTGGGTCCACGGGCGGCACAGGATCCTCCGGAAGGCACGGCGGAAGTCCTGGTTGAAGATGGTGTAGATAACAGGGTTCAGTGAGCTGTTGCAGTAGCCGATCCAGAAGAAGAACTGGAAGAGGCCATGGGGCACCTTGCAGTGCTTCGGGCAGATGGCTCCCAGGCTGTAGCTGAAGAAGAAGGGGAACCAGCAGAGCACAAAAACGCCAATGACCACAGCCAGCACGAAGGTGAAGCGCTTCTCCCGGGTCAGCTGCGCCCGTCGACGCCACCACTGCCCACCTATAGCACCCACGCCCCTGCCCAGGAGCACCTGGCCACGTAGGGTGGCCAGCACCCGGGAGCCCTGTGGCTGCTGCAGCGGGGGGCTGCAAGCTGAGGCCGGAGACACTGGCACTGCCTGGGGTTCACACTCTTCCTCCTCCTCCTCCTCCTCTTCAGCTTCATCCTCTGGAGATGCCCCACAAACACCCTCCTTCTGGCCCTGGCCTGAGTTGGGAAGGGCAGCCCAACTGGGTGGCAAGGCCCGGGTCCCAGTATCTTCAGGGGTCTCCCCCTCCTCCTTCTCCCCAGTGGACTTCGAGTGTCCGTTGACCTCTCTGGCAGAAGCCACAGAGGCCAGGGCTGGCAGTTTGGCTGAGGCCAAAGCCCCACCATGGTCGGGTCGGGGCTGCTTGGACTCACCCTGCCCAGGCCCCCCCTTGGCCCTGGGACCTCTGCGGTTGCTGCGTTTGGCGATCAGGTAGATGCGCAGGTAGACAAGGATCATGATGAGGCAAGGAGCAAAGAAAGATCCGATGCTGGAGGCCAGGATGTACCAGGCCTCCTGGTTGAGCTTGCACTGGGGGCGCCCGCGCGGCTGGGGGCCCTGGTCGCCCTTGTAGATGAGGGGCGGCAGCGAGATGACGGCGGCGATGAGCCACACAGTGAGGATGATGCACTTGATGCGGCGCGGGGTGCGCTTGGAGTTGTACTCCAGCGCGCGGCTCACGGCCCAGTAGCGGTCCAGGCTGATGGCGCACAGGTGCACGATGGACGAGGTGCAGAAGAGCACGTCGAGCGCCAGGTACACCTCGCACCACGTGCGCCGGAAGTACCAGTAGCCCAGCAGCTCGTTGGCCAGCGAGAAAGGGATGATGAGCGTGGCCACCAGGATGTCGGCGGCGGCCAGCGACACCAGGAACAGGTTCTGAGGGGCGCGCAGCGAGCGGCTGGTCAACACAGCCAGGATGACCAGAGCGTTGCCGAAGATGGTAAAGAGAATGAGGAAGGTGATGGCCGCCGCTATGGCCGCTGTGGCCTGCACGGAGTAGGGGTCCTGGTGGTCCATGACGGGGCGGGAGGTGGGCAGAGGGAGCGCTGCCCGCCCAGTGCGCACCGTGGACGACAGCGCTGCCCGGCTCGGCTAGACAAGAGCGTCGCCCCT", - 96778623 - 1); - var codingRegion = new CodingRegion(96780545, 96781888, 97, 1449, 1353); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 96778623, 96780986, 1008, 3371), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 96780987, 96781984, 1, 998) - }; - - var rnaEdits = new IRnaEdit[] - { - new RnaEdit(999, 998, "AGAGGAGGA") - }; - - const byte startExonPhase = 0; - const bool onReverseStrand = true; - - var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, onReverseStrand, startExonPhase, - rnaEdits); - - var expectedCodingSeq = - "ATGGACCACCAGGACCCCTACTCCGTGCAGGCCACAGCGGCCATAGCGGCGGCCATCACCTTCCTCATTCTCTTTACCATCTTCGGCAACGCTCTGGTCATCCTGGCTGTGTTGACCAGCCGCTCGCTGCGCGCCCCTCAGAACCTGTTCCTGGTGTCGCTGGCCGCCGCCGACATCCTGGTGGCCACGCTCATCATCCCTTTCTCGCTGGCCAACGAGCTGCTGGGCTACTGGTACTTCCGGCGCACGTGGTGCGAGGTGTACCTGGCGCTCGACGTGCTCTTCTGCACCTCGTCCATCGTGCACCTGTGCGCCATCAGCCTGGACCGCTACTGGGCCGTGAGCCGCGCGCTGGAGTACAACTCCAAGCGCACCCCGCGCCGCATCAAGTGCATCATCCTCACTGTGTGGCTCATCGCCGCCGTCATCTCGCTGCCGCCCCTCATCTACAAGGGCGACCAGGGCCCCCAGCCGCGCGGGCGCCCCCAGTGCAAGCTCAACCAGGAGGCCTGGTACATCCTGGCCTCCAGCATCGGATCTTTCTTTGCTCCTTGCCTCATCATGATCCTTGTCTACCTGCGCATCTACCTGATCGCCAAACGCAGCAACCGCAGAGGTCCCAGGGCCAAGGGGGGGCCTGGGCAGGGTGAGTCCAAGCAGCCCCGACCCGACCATGGTGGGGCTTTGGCCTCAGCCAAACTGCCAGCCCTGGCCTCTGTGGCTTCTGCCAGAGAGGTCAACGGACACTCGAAGTCCACTGGGGAGAAGGAGGAGGGGGAGACCCCTGAAGATACTGGGACCCGGGCCTTGCCACCCAGTTGGGCTGCCCTTCCCAACTCAGGCCAGGGCCAGAAGGAGGGTGTTTGTGGGGCATCTCCAGAGGATGAAGCTGAAGAGGAGGAAGAGGAGGAGGAGGAGGAGGAAGAGTGTGAACCCCAGGCAGTGCCAGTGTCTCCGGCCTCAGCTTGCAGCCCCCCGCTGCAGCAGCCACAGGGCTCCCGGGTGCTGGCCACCCTACGTGGCCAGGTGCTCCTGGGCAGGGGCGTGGGTGCTATAGGTGGGCAGTGGTGGCGTCGACGGGCGCAGCTGACCCGGGAGAAGCGCTTCACCTTCGTGCTGGCTGTGGTCATTGGCGTTTTTGTGCTCTGCTGGTTCCCCTTCTTCTTCAGCTACAGCCTGGGAGCCATCTGCCCGAAGCACTGCAAGGTGCCCCATGGCCTCTTCCAGTTCTTCTTCTGGATCGGCTACTGCAACAGCTCACTGAACCCTGTTATCTACACCATCTTCAACCAGGACTTCCGCCGTGCCTTCCGGAGGATCCTGTGCCGCCCGTGGACCCAGACGGCCTGGTGA"; - - Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); - } - - [Fact] - public void With_rnaEdits_snv_mnv() - { - //NM_001242659.1 - var genomicSeq = new SimpleSequence( - "ACTATAAAGACAGTAAAAAGATCAGTGGTTATCTTTGCAGACGCCACCATCXCTGTGAGCCCTGTACTATCAGCCATGGTCAACTCCGTCGTCTTTTTTGACATCACCGTCGACGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCXAAAGACAGCXGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCXTAATGGCACXGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGXXCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCCTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAATGAGTTTGACTTGTGTTTTATTTTCACCACCAGACCCATTCCTTCTGTAGCTCAGGAGAGCACCCCTCCACCACATTTGCTTGCAATATCCTAGAATCTXXGTGCTCTTGCTGCAGTTCCCTTTGGGTTCCATGTTTTCCTTGTTCCCTTCCATGCCTAGCTGGATTGCAGAGTTGAGTTAAGTTTATGATTATGAAATAAAAACTAAGTAACAA", - 149553002); - var codingRegion = new CodingRegion(149553003, 149553787, 1, 785, 785); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 149553003, 149553787, 1, 785) - }; - - var rnaEdits = new IRnaEdit[] - { - new RnaEdit(52, 52, "G"), - new RnaEdit(164, 164, "C"), - new RnaEdit(174, 174, "A"), - new RnaEdit(284, 284, "C"), - new RnaEdit(294, 294, "C"), - new RnaEdit(420, 421, "CA"), - new RnaEdit(670, 671, "CT") - }; - - const byte startExonPhase = 0; - - var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits); - - var expectedCodingSeq = - "ACTATAAAGACAGTAAAAAGATCAGTGGTTATCTTTGCAGACGCCACCATCGCTGTGAGCCCTGTACTATCAGCCATGGTCAACTCCGTCGTCTTTTTTGACATCACCGTCGACGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGCACTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCCTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAATGAGTTTGACTTGTGTTTTATTTTCACCACCAGACCCATTCCTTCTGTAGCTCAGGAGAGCACCCCTCCACCACATTTGCTTGCAATATCCTAGAATCTCTGTGCTCTTGCTGCAGTTCCCTTTGGGTTCCATGTTTTCCTTGTTCCCTTCCATGCCTAGCTGGATTGCAGAGTTGAGTTAAGTTTATGATTATGAAATAAAAACTAAGTAACAA"; - - Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); - } - - [Fact] - public void With_rnaEdits_deletion() - { - //NM_033089.6, chrom: chr20:278204-280965 - var genomicSeq = new SimpleSequence( - "GGAGGATGCTGGGAAGGAGGTAAAATGGCCACCGGCGGCGGCGCGGAGGAAGAGAGGAAACGGGGGCGGCCGCAGCTTCTGCCCCCCGCGCGGCCCGCGGCCCGGGGCGAGGAGGCCGACGGCGGCCGCGAGAAGATGGGCTGGGCCCAGGTGGTGAAGAATCTAGCCGAGAAGAAGGGCGAATTCCGCGAGCCGCGGCCGCCGCGGCGGGAGGAGGAAAGCGGCGGCGGTGGAGGGAGCGCCGGGCTCGGCGGCCCCGCGGGCCTGGCGGCGCCGGACCTCGGCGACTTCCCACCGGCTGGCCGCGGGGATCCGAAGGGCCGTCGGAGAGATCCGGCCGGCGAGGCGGTGGACCCCCGCAAAAAGAAGGGCGCTGCGGAGGCGGGCAGGAGGAAGAAGGCCGAGGCGGCGGCGGCCGCCATGGCGACCCCGGCCAGGCCCGGCGAGGCCGAGGACGCGGCCGAGCGGCCCCTCCAGGATGAGCCGGCGGCGGCGGCGGCAGGCCCGGGCAAGGGTCGCTTCCTCGTCCGCATCTGTTTCCAGGGAGACGAGGGCGCCTGCCCGACCCGGGACTTCGTGGTAGGAGCGCTTATCCTGCGCTCCATCGGCATGGACCCGAGCGACATCTACGCGGTCATCCAGATCCCGGGCAGCCGCGAATTCGACGTGAGCTTCCGCTCAGCGGAGAAGCTGGCCCTGTTCCTACGCGTCTACGAGGAGAAGCGGGAGCAGGAGGACTGCTGGGAGAACTTTGTGGTGCTGGGGCGGAGCAAGTCCAGCTTGAAGACGCTCTTCATCCTCTTCCGGAACGAGACGGTGGACGTGGAGGACATTGTGACTTGGCTCAAGCGCCACTGCGACGTGCTGGCCGTGCCGGTGAAAGTGACCGACAGGTTTGGGATCTGGACCGGGGAGTACAAATGCGAGATCGAGCTGCGCCAGGGGGAGGGCGGGGTCAGGCACTTGCCAGGGGCCTTCTTCCTGGGGGCCGAGAGGGGCTACAGCTGGTACAAGGGGCAGCCCAAGACATGCTTTAAATGTGGTTCCCGGACCCACATGAGCGGCAGCTGCACGCAGGACAGGTGCTTCAGGTGCGGGGAGGAGGGGCACCTGAGCCCTTACTGCCGGAAGGGCATCGTGTGCAACCTCTGTGGCAAGCGAGGACACGCCTTTGCCCAGTGTCCCAAAGCAGTGCACAATTCCGTGGCAGCTCAGCTAACCGGCGTGGCCGGGCACTAAACACCCGCCTGCCTGCCAGGGTGAACACACAGCCAGCTTATCCCTCTTAAGTGCCAAAACTTTTTTTTAAACCATTTTTTATCGTTTTTGAAGGAGATCTTTTTAAAACCTACAAGAGACATCTCTCTATGCCTTCTTAAACCGAGTTTACTCCATTTCAGCCTGTTCTGAATTGGTGACTCTGTCACCAATAACGACTGCGGAGAACTGTAGCGTGCAGATGTGTTGCCCCTCCCTTTTAAAATTTTATTTTCGTTTTTCTATTGGGTATTTGTTTTGTTTCTTGTACTTTTTCTCTCTCTCCTTGCCCCCCTCCCGCCCTCCCCGCCCCATACCTTTTCTTCCCCTGGATTTTCACCCTTTGGGCTGCCTTGCTCATCTTTATGCCCCAGCACTAGGTACGGGGCCCAACACGTGGTAGGCACTCCATCAGTGTTTGCTGAATTGAAAACATTGTTGACTGTGGCTTCTATCAGAGTGTCTACCTTTTGCAGCTCTTCCCCTCCCTCATTTAATTTGCTGCTTTTAATCTACGTGGTCTGAGAATTTGTGAAACCAGTGTTGTTAGAAGTGTATATAATCTGAATCAATAAGCTCTGAATGGTGGCCAAGGGCCTCTCTTATGGCACAAAGATGCATGGACTTCATGACAGCTCTTTTGGTGGCTCAGAAGCCATTTTTTATAGAATCATGGAATCTAGAATATTCCTGCTGGAAAGAACCTGAGAGTTGGTTTGGACCAATTCCCTGGTTTTCCAGCAGATGAAACAGGCCCAAAGAGGTTAAATGACTGGGTGAAAATCACATAGCTGTCTGGTGCCAGAGCCAGCCTATAGTAGAGTCCCCTGACCCCAAGCCCGGTGCTCATTCCACTACCTCTCACACTTCACAACAATTTCCTCAACACTTGAGGGCCCAGAAAGTCTGATCTCTCCAGAATGATCAGCCCAGAGGAATGCTGAGAAATCACCTGGAGGAGGGAGCAGAAAGAGAAGGTTTTTAAGGAGGGGCTTCTGAATACTTGGGAGATACGGAACGGACCAAGGACCACACTCCAGGGTGCATTCGTTGCTCCCTGGGGCACCACTTCTGGATTACAGTGTGCCAGGTCCTTTGGAGGCCCTACCCCTTCCCCATTCATTGCCACCAGTGAGAAATGGGGGTGCCCCTGTGTAAAGAAACCTACCAAAGGTTTACATTTGCACCTTAGCCTCAATAGCTACGAACCCTAGAGAAGCAGCTAGCTGGAGCTCATGTGCAACTCCTGATTCTCAGGAGAAAGATGGATTTTAACCCAAAATTATGAGTGAGCTGTTAACTCTAAAATGTACTTGGGAGATAGGCCAAGCGAGAGGTCATGGGCCAACTAAGTGTTATCCAGTAGAAAAGACAGTACACTGCTTTTCTTTTAGTGTTTGCTTTTCCTTTGCTATATGTTTTGCTATTTCCTTGTGGCTTAGAATGTAAAATTGATTGTTAAAAGTTTTGTTCTGAATAAATATTTATCTTTTGTATTGCTAAAA", - 278204 - 1); - var codingRegion = new CodingRegion(278228, 279442, 25, 1236, 1212); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 278204, 278687, 1, 484), - new TranscriptRegion(TranscriptRegionType.Gap, 1, 278688, 278690, 484, 485), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 278691, 280965, 485, 2759) - }; - - var rnaEdits = new IRnaEdit[] - { - new RnaEdit(485, 487, ""), - new RnaEdit(2763, 2762, "AAAAAAAAAAAAAA") - }; - - const byte startExonPhase = 0; - const bool onReverseStrand = false; - - var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, onReverseStrand, startExonPhase, - rnaEdits); - - //The coding sequence from refseq does not have the deletion from rna edit. That was manually inserted. - var expectedCodingSeq = - "ATGGCCACCGGCGGCGGCGCGGAGGAAGAGAGGAAACGGGGGCGGCCGCAGCTTCTGCCCCCCGCGCGGCCCGCGGCCCGGGGCGAGGAGGCCGACGGCGGCCGCGAGAAGATGGGCTGGGCCCAGGTGGTGAAGAATCTAGCCGAGAAGAAGGGCGAATTCCGCGAGCCGCGGCCGCCGCGGCGGGAGGAGGAAAGCGGCGGCGGTGGAGGGAGCGCCGGGCTCGGCGGCCCCGCGGGCCTGGCGGCGCCGGACCTCGGCGACTTCCCACCGGCTGGCCGCGGGGATCCGAAGGGCCGTCGGAGAGATCCGGCCGGCGAGGCGGTGGACCCCCGCAAAAAGAAGGGCGCTGCGGAGGCGGGCAGGAGGAAGAAGGCCGAGGCGGCGGCGGCCGCCATGGCGACCCCGGCCAGGCCCGGCGAGGCCGAGGACGCGGCCGAGCGGCCCCTCCAGGATGAGCCGGCGGCGGCGGCAGGCCCGGGCAAGGGTCGCTTCCTCGTCCGCATCTGTTTCCAGGGAGACGAGGGCGCCTGCCCGACCCGGGACTTCGTGGTAGGAGCGCTTATCCTGCGCTCCATCGGCATGGACCCGAGCGACATCTACGCGGTCATCCAGATCCCGGGCAGCCGCGAATTCGACGTGAGCTTCCGCTCAGCGGAGAAGCTGGCCCTGTTCCTACGCGTCTACGAGGAGAAGCGGGAGCAGGAGGACTGCTGGGAGAACTTTGTGGTGCTGGGGCGGAGCAAGTCCAGCTTGAAGACGCTCTTCATCCTCTTCCGGAACGAGACGGTGGACGTGGAGGACATTGTGACTTGGCTCAAGCGCCACTGCGACGTGCTGGCCGTGCCGGTGAAAGTGACCGACAGGTTTGGGATCTGGACCGGGGAGTACAAATGCGAGATCGAGCTGCGCCAGGGGGAGGGCGGGGTCAGGCACTTGCCAGGGGCCTTCTTCCTGGGGGCCGAGAGGGGCTACAGCTGGTACAAGGGGCAGCCCAAGACATGCTTTAAATGTGGTTCCCGGACCCACATGAGCGGCAGCTGCACGCAGGACAGGTGCTTCAGGTGCGGGGAGGAGGGGCACCTGAGCCCTTACTGCCGGAAGGGCATCGTGTGCAACCTCTGTGGCAAGCGAGGACACGCCTTTGCCCAGTGTCCCAAAGCAGTGCACAATTCCGTGGCAGCTCAGCTAACCGGCGTGGCCGGGCACTAA"; - - Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); - } - - [Fact] - public void With_rnaEdits_reverse_deletion_utr() - { - //NM_001317107.1 chr14:22138125-22139232 - var genomicSeq = new SimpleSequence( - "ATATGGTATGTAACTTATTCTTTGCAAGGCGCTTCTTTAATTTGGAGCACCACGTATCCTAAGGACGTAGACATTTTCATTTTTCTTCTTTTCTCTCTTTTCTCCCCACTAACTTGTTTAAGGCACTCTTCATTTCTTCATTCCTAAGGGTATAGATAATGGGGTTCAGCAGGGGGGTGACTGCAGTGAAAAACACAGATACTGCCTTGTCCTCTGGGAGGCTGGTGGATGGGCGGGAATAGATGAAGATGCAGTGTCCCAGGAACAGTGTAACTACAGTGAGATGGGCTGCACAGGTGGACAGGGCCTTCCACTTGCCCTTGGAGATCTGCTGCCTCAGACTCACCAGGATGACTGCGTAGGACACCACCAGGACCACAAAACAGACCACGGAGATCAATCCACTGTTGGAGACAATGAGGATCTCAAGGACGTGGGTGTGTCAATGCAGGCCAGCTTGATCACCTGAGGTACATCACAGAAGAAGTTGTCAATCTCATCAGGACCACAGTAGGGCAGCTTGATGGTAAGGGAGGTGAGGGCTATGGAGTGGATGGTCCCTCCTGTCCAGAGGGCCACAGCCAGCAGCACACATACCTTCCAGTTCATCACTATCATGTACTGCAGGGGTTTACAGATGGCCACATACCGATCATAGGCCATGACGGTGAGGAGGAAGATCTCTGTGCAGGCAAAGAGGTGCAGGAAGAACATCTGGGTCACACAGGCATCAAAAGAGATGAGCTTTTCCTCTGACCACACGTCTCTCAGCATCTTGGGGACAGTGACAGTGGAGTGGCAGACATCAATAAAGGACAGGTTGCTGAGGAAGAAATACATGGGAGTATGGAGCCGGTGGTCATAGATAATAGTTATGACAATGAGAACATTCCCAATCAGTGTCAGGACATAAAAAATGAGGAACATGGAAAACATAGCTATCCGTGCCTTATGATTTACAGATAAACCTCTAAGCCGAAAATATGTCACTAAAGAAGTTTGATTGAGTAGGATGGCCTCTTCCATTCTCTTTGTTAGACAACCTGTAAAGAATTAGAAAAAAAGTCTAATATAACACAGTATCTGCATCAATCATTTGGTCATTTAA", - 22138125 - 1); - var codingRegion = new CodingRegion(22138201, 22139150, 83, 1030, 948); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 22138125, 22138561, 670, 1106), - new TranscriptRegion(TranscriptRegionType.Gap, 1, 22138562, 22138563, 669, 670), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 22138564, 22139232, 1, 669) - }; - - var rnaEdits = new IRnaEdit[] - { - new RnaEdit(905, 905, "T"), - new RnaEdit(796, 796, "C"), - new RnaEdit(679, 679, "A"), - new RnaEdit(670, 671, "") - }; - - const byte startExonPhase = 0; - var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits); - - var expectedCodingSeq = - "ATGGAAGAGGCCATCCTACTCAATCAAACTTCTTTAGTGACATATTTTCGGCTTAGAGGTTTATCTGTAAATCATAAGGCACGGATAGCTATGTTTTCCATGTTCCTCATTTTTTATGTCCTGACACTGATTGGGAATGTTCTCATTGTCATAACTATTATCTATGACCACCGGCTCCATACTCCCATGTATTTCTTCCTCAGCAACCTGTCCTTTATTGATGTCTGCCACTCCACTGTCACTGTCCCCAAGATGCTGAGAGACGTGTGGTCAGAGGAAAAGCTCATCTCTTTTGATGCCTGTGTGACCCAGATGTTCTTCCTGCACCTCTTTGCCTGCACAGAGATCTTCCTCCTCACCGTCATGGCCTATGATCGGTATGTGGCCATCTGTAAACCCCTGCAGTACATGATAGTGATGAACTGGAAGGTATGTGTGCTGCTGGCTGTGGCCCTCTGGACAGGAGGGACCATCCACTCCATAGCCCTCACCTCCCTTACCATCAAGCTGCCCTACTGTGGTCCTGATGAGATTGACAACTTCTTCTGTGATGTACCTCAGGTGATCAAGCTGGCCTGCATTGACACCCACGTCATTGAGATCCTCATTGTCTCCAACAGTGGATTGATCTCCGTGGTCTGTTTTGTGGTCCTGGTGGTGTCCTACGCAGTCATCCTGGTGAGTCTGAGGCAGCAGATCTCCAAGGGCAAGCGGAAGGCCCTGTCCACCTGTGCAGCCCATCTCACTGTAGTTACACTGTTCCTGGGACACTGCATCTTCATCTATTCCCGCCCATCCACCAGCCTCCCAGAGGACAAGGTAGTATCTGTGTTTTTCACTGCAGTCACCCCCCTGCTGAACCCCATTATCTATACCCTTAGGAATGAAGAAATGAAGAGTGCCTTAAACAAGTTAGTGGGGAGAAAAGAGAGAAAAGAAGAAAAATGA"; - - Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); - } - - [Fact] - public void RnaEdits_deletion_reverse_utr() - { - //NM_001123068.1 chrom: chr1:147954635-147955377 - var genomicSeq = new SimpleSequence( - "TTGTTACTTAGTTTTTATTTCATAATCATAAACTTAACTCAACTCTGCAATCCAGCTAGGCATGGAAGGGAACAAGGAAAACATGGAACCCAAAGGGAACTGCAGCAAGAGCACAAAGATTCTAGGATATTGCAAGCAAATGTGGTGGAGGGGTGCTCTCCTGAGCTACAGAAGGAATGGGTCTGGTGGTGAAAATAAAACACAAGTCAAACTCATTAGAATTGTCCACAGTCAGCAATGGTGATCTTCTTGCTGGTCTTGCTATTCCTGTACCCAAAGTGCTCCATGGCTTCCACAATATTCACACGTTCTTTCACCTTGCCAAAGGCCACATGCTTGCCATCCAACCACTCAGTCTTGGCAGCACAGATGAAAAACTGGGAACCATTTGTGTTGGGTCCAGCATTTGCCATGGACAAGATGCCAGAACCTGTATGCTTTCGGATGAGGTTCTCATCATCAAATTTCTCCCCATAGATGGACTTGTCACCAGTGCCATTATGGCGTGTGAAGTCACCACCCTGACACATAAACCCTGGAATAATTCTGTGAAAGCAGGAACCCTTATAACGAAATCCTTTCTCTCCAGTGCTCAGAGCACGAAAGTTTTCCGCTGTCTTTAGAATCTTGTCTGCAAACAGTTTGATGGAGATGCGGCCCAAGGGCTTGCCGTCGACGGTGATGTCAAAAAAGACGACGGAGTTGACCATGGCTGATAGTACAGGGCTCACAGTGATGGTGGC", - 147954635 - 1); - - //coding region between 34..528 - var codingRegion = new CodingRegion(147954850, 147955344, 34, 528, 495); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 147954635, 147954669, 704, 738), - new TranscriptRegion(TranscriptRegionType.Gap, 1, 147954670, 147954674, 703, 704), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 147954675, 147955377, 1, 703) - }; - - var rnaEdits = new IRnaEdit[] - { - new RnaEdit(704, 708, null), - new RnaEdit(378, 379, "CA"), - new RnaEdit(252, 252, "C"), - new RnaEdit(242, 242, "C"), - new RnaEdit(239, 239, "A"), - new RnaEdit(132, 132, "A"), - new RnaEdit(122, 122, "C"), - new RnaEdit(104, 104, "A"), - new RnaEdit(49, 49, "A"), - new RnaEdit(10, 10, "G"), - new RnaEdit(4, 4, "G") - }; - - const byte startExonPhase = 0; - - var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits); - - var expectedCodingSeq = - "ATGGTCAACTCCGTCATCTTTTTTGACATCACCGTCGACGGCAAGCCCTTGGGCCGCATCTCCATCAAACAGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACACCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGCACTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCCTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAA"; - - Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); - } - - [Fact] - public void RnaEdits_big_insertions_reverse() - { - //Transcript id:NM_032508.3, chrom: chrX:148678216-148713568 - var genomicSeq = new SimpleSequence( - "TAAAATGAGGAACCGGTTTATTGAACAGCTTAAGGAGAGCAAAAATAGTGGCTTTAGCTACATTTTTTACACACTGAGCAGGAAAGTCTAAACCATCCCGTTCCCCTGTACCCCAAAGAGAACAGGGCTTGCTGGAGGCCAGTGCCAAGGGCGGAGTCGTGCTCGCAGCAGACTTGAATTAACCCCATGTAGGCCGGCGAGCAGTTGCCCGCGTGAAAACACCACCCTCTTCTCCTGGCTGAGAAGATCAAAGCTCTTTTTTTACCCTCTTTTCAGCAAAGGACCTATTTGTTTTCAGGCAGGAGGATGTTAAACTTGCAGCCTCTGACACACGGTGGAACCTGCAGTGCTTGGAGAAACGGCACGCACACGTGAAAACATCATGCCTACTCCAAAGCCTTCTTGTTGCTGGCAGGAGGGAAGCTTGAGACTTTCCCACGCATAGTCGTGACCCGCGTGGCCGTTTCTGCTCTCAGCAACATTCTCTAGTGTTCCGGCTTCAAGCAGCGCTTGTCAGGTTTGAAGCTAGCCACTATTCTGAGAACGTCAGAAAAGCATGGACCATCTCTTGCTTGGTGTTGCCGTTCTGGCAGTAGCAGCTACTACGTACCTGCACGAGTTCCAGGGCAGAAGTGGCAATGTCCCATGAAGGCGTGGCACCCCACGGGGGGGGGGGGAGTGTGCCACGGGCGTCCACTTCTGCAGCAGAAGGCATGTGCCTACAGCACAAGCTTGTAAAAAAATACTTGAACAGAATATGCTGTACAGAACTAGGGGTTAACACCGCATATGAAGATGCTAAAACATTTGTATAAATACTCTGTATACAAGCATGGAGTCACTCCCGTAGAAAGGGCTCATCCGTGAGGCTATGAAAAACTGCTGTCAGCATGCCCAAAGAGAAACTACTTCCACAGTAGGAACAGAAAAAAGGACTGTGCTGTGTCTAAACACGTGGTGCATCAGAGACATAGTTACAGTTCCTACTGACTGCCCCAGCCACGACCTGGGAGTGCTGAGGACCTGGGAGTGCTCAGCGAGCTGCAGGAGGTCAGCCCTGTGGAGAAATACATTTCTAAACAATACTTTTGATTGGGATTTCAGCACCGTATAGACAGATGTTCCTTCTGGGGGCCTGGCAAGCAGCCATCTCCCAGTGGGTCTGACGGGGAAGAGGGGTACCTGGAGCCCCTCCCAGACAGACGGTAATCCCACCCCTGTTCTCACACTCTTCCTGGCATCCGCATCTGCTGGCACACACCCCCGTCACCTGCCACTTCCGCGTCCCGTCGTGGTGAGTGGCTGATAGGCGCTGGATGCAAACAAGGCATGAGATGGACGTACCTGGAGACCCAGCTCCAGTACTGGTTCTGGTCTGCGGGGTGAACGAGGGGGCAGAGGAAGGCGGAGAGAGTGCGTCCCAGTCCACTTAAGCTCTGTCCCCGGAAGTGGCATCTAATCTGGCATTTCGATATTTAATTTGGGAGGTGGGAGCACATACTTCCCAGGGCTCTGGGTAATGACCACCCTGGCCTTCTTTCGAAACATGGGTGCGATTTTAGGGGGCTCCGGAACTGGGGTCTCTTCGGTTTCTTCATTATCTTCGTGATGGAGATCATAGGAAATGTTTCCATATTCTCGTAGAAATGGGAAGATTTCAAGCAGAAACTGACAGAAATCTTTGCGGATACCAAACCACCCTGAAAAATAAGAATTTTTTATTTCACACACGAGGCTCAACTGACCTTCCTGTTAACTTTCTTTCCGTAACAAGAAGTTTCACTCCTACAATGTCATAACATACTTTATCCAGACTCCTGAGTCACAAAGCCTGAACAGGGCTTGAGTACCCAAAATGGGGAAGAAGTGCAAATGCTAGCTCTGTGGTGCTTGGAGTGGGGTTCCCGGACCGGCAGGGACAGCGTCCACGGGGCCTAGTTAGGGATGCCATTCTCGGGCCCCAGCCCAGACCTCCAGAAACTGAGTCGGGCTAGGGTGGGCTCCAGCGGTCCCCTTTTCCTGGCCCTTTTGGGATTCTGCTGGATGCCCAAGTTTGAGAACTACTGCTCCAGTGAGTCTCAAAATATCTGTGGTGCGCAGACTACGGTGTCTTCCGCTAATCTTCTCCAGCCAGGATAAACTCATGGATGACAGTGCCACCCAAGAACAAGATTTCTGTCACCCTCTGGAATCCGTGAGGGCGGTAGTCATGCACGGGTCCTGGCCAGGAGGGGGCCTGAACTCATGGAGCCACCTTAAAGCCACTTTCCCAGTCCCACTACTCCTCTCTGTAGGCTACTGGAGTGTCAGCTCGGTGCAAGCCCTCCCTGCTCCCGGGTGCGGGGTAGGGGGCAGAGGCACAAACAGCAAGCACAGCCCGGGCTGCTGGGCTGCAGTGAGGCCCTGCCCCCAAACCCACTGGCTTTCCGAAGGGCAATGCTCTGGGCTTCCGTGCCATGGAGCCCACAGCCTTGCCAGGAAGGCACCCTCTGCAGAGATCGTTTTGGAAGTGTCTGCCTCAGCAAGCAGGTGGAGGGGAATAGAGTGTTAGCAAGGCAAGACAGGCAAGACTCGGGTGATGGCAGCAAGGATATGGGGGAGGCAGAGAGGCCAACAGGGACCTAGGATGAATCCCAGGTTTGGGTGGGAGATGTGGATTTTCCATCAAACCCTCCCGGGCCTGGGAAGAATCTGTCTTGATCCCCATTTTGCAGAGGAGGGAACGGGATCTCTGAGAGGTTGCCTGCCGTGTCTGGTTCTACCTCAAATGGCAGCGTGCACTGCGAGAAAAGTCCCGGTGCAGGCCAGCAGAACACCAGAGTTACGGCATGCCCTTCCCTTAGAAGGTCCCAGAATTTCCTCAGCCCTCACTTTCCCACACAAGCTTCTAAATTGGGGCCCTCGGGGACTCATCCCTTCCTAGACTTCTATCCGCCCCCCCCCCCACTCCCTGGTCCCCCCCCAGACACACACCAAGGACTTCTGAAATGCTGAGTACATACAGTGGTTTCCTCCCTTCTGTCCAAATGTGGTTGCCATCAGCGTGATCAACGAGAGCCAAAGGGGGACAAAGATCGGGATGCTGGAGAAGGCGTTGTGGCCATCCAGTTTGTGAACCAGCAGAATCTAAAGAAAGAGACATAGTCCCGGTTGATGCCAGCACCGAAAATGGGCAGAGGCGGAAGCCAGACTTCATTAGGCAGTTCCTCCCCACCACCCCACCCCCGCGTGAGCTCCCACAAGAGGGAACATCAGCACCGCCAGAAAAAGGCAGGAAACCACCTATCCCTGGGGAAAGCTCGAAATGAGCTTTTATGTCCCTCTTCAGAGCTCGGCAATAGCCTATCCACTTGAAAAGTTCCCAGTGCCAGCAGTTTTATGGCAAACTCCTCCGGGTGTTTGTTCTAAGGAGTCAACAGCTCCCATTCTAGAATTCTCCACGTGACTCCAATACACAAATCTGACATCCCACTCTGCTTTCCCCAGAGTGGAAACTGGAGCCATACAGAGGCACCATGGCTAAAAAGGTGCACTCTTCTCCCTGCCAGCCCCACGTGCTGCCCCCAAGAGAAAGGAAGGATGCTCTCCTTTCACCGAAGCTCCCTCTCGGAGATGGCTGTGTTCTCTCCCCTCTCCTGGAGTGGGCTCACTGTGAGCTCGAGGGACAGAGGCTGCCTTTCTAGGGGTGCAGAATCCTGTCAGGGGAAGCGCAAGCTTCAGGGGCTGAAGAGGCTTCCCGTGGAACGCTTACCTCAAATGTAAGAAGGGGCACGACGATGGTCATCCAGCTCAGGGCCATGGTTATGTGTGTCCTGCGCTGCTCTGCAATCACATCCATAGAGCGCAAGAACAAGACGGACCACACAATGTAGTAGAGGACCACCAGGCACAGAAAGGACATGAGAATCCACAGCGGGACACACACAACCTGGGGGTGGGTGAGAGAACAGCAAGAGAAGTCTCTTTAGAGCTTCCAACCTGGCCTCTGATGGAAGGCATCTTTAGCACCTTGCTGTGTCTGTCCAGTTAAGGCGGTCCTTCCCGTGAGCCGAATAAGGACCGTTCCATCTCCCAGGACTGCTGGGAGCATCGCTCAGGACAGAAAAGGTATGGTATGTTCACTATGGGGCCTGCTGCCACCAGGGGACACACACGCTCAGTGAGTCATCAGTCCCTCTTCCTTTGGGTGACAGACAGCCCTGCACCTGGCTCCGCAGCCTCTACTCTTCCAGAGGCCCACTCTCCCACACTCTCTCAGGCTCCTCTAGGTTCTGCTGCCATCACAGCTTCCCGGGAAATGGGACACAACTGTCACCCTGTGCACACACACAAGATCTCACCCCAACAGACTCTCTTCACAGGCAACATTCCCACAACCTGCTGGGGGTACTTTGGCAACACAAATGGGAATGGGCTCCCCAGAAAGTCTGGCTGCCTGGGCTCCTAAGGATCCCTAACCTCACCCCTACCAAGTTAGTGAACTTGGCGGGTTGATGCTGGATACAGGTTGATGCTGGATACGTAGCGCTGCCGGGTCCCCGCCTCCACGGCAAGGGCGCATTCCCAGTATGTCCCTGTCGTACCAGGTAGACCTTGTCTCATCCACACACAAGCCCAGAGGACGAGTTCCGGGGGCGCCACTTGGCCAGGCTCCCCTGTGACACGTCTTCGCCCTCCTGCCCTGCCTCCTGGGACGACACTCCTCCGTTCTCCCTTTTTATTAATTATCTATCATACAGTAGGAAAAGTGACCGTCTTCCTTTGGTGTGAGTTCCCTGAGTCTTCACACAAGTAGATTCGCACAGCCGTTGGCAGGATGCAGAAGAGGTCTGTCACCCTGCAAAACTCTCCGTGCTGTCCCTTCACTATCACACCGTCCCCACCATTAGCCCCGGCAAACACTGATCTGTTCTCTGTCACTGTACTTTTGTCTCTGCTGGAACTTTATGTAGATGGCATCGCGAGACAAGTAACCTGTTGAGACTGGCTTCCCGCCATCCACATAATGTCTCTAATGAGATTCATCCAAGTTGTTCCCTCCCTGTATCCACAGGTCGTTCCCTCTCAGTTCTGAGTGGTATTCCATTGTATGGATGCCTACAGTTTATCTGACTGTCCGCTGAGGGTGGTTTGTGAAAACCAAACAAGGCCGCTATCCAAAATGCAAACAAGACTGCTACAAACACTGGTGTGTGGGTTTCTACAAGGCTGCGCGCTTTCACTTCTCTGGGGGTAAATCTTATACCCAGGAGTGGGGCAGCCAAGTCCCACGGGAAGTGCGCTTTTAACTGCATCAGAGATGGCCAAACCATTTTCTACAGTGCCCGTACCACCTGCCTTCCCGCCAGTAACACTGGAGTGTCCCAGTTCCTCTGCATCCTCCCAGGCACGTGGCCTCGTCAGTGTTGCTGAGTTTCACCATTCTGAAGCCATGTGTTTCGGGCCCTCATCCTGGTTGTAGTTTGTCTTCCCTAACCTGTAATGGCGTTGAGCATCTTTTCCTGTGCTTTTTCGCCATGTGTATATCCCCTTCGCAAATTGTCAACTCTTTTGCCAATTTTTAGGTGTTTCTTTTTGCAGTTTTGAGTTTTTAAGAGTTCTCTGTATGTTCTGGGTGCAAGTCAGTGTTTTGATGTGTGCTTTGCAAATATTTTCTCCCAGTCTGTGGCCTGTCTTCATTTCATTTTAATTTTGAGGAAGTCCAAATTTATGATTTCTCTCTCGTATGGACCATATTAACAGTGCCATTTCTAAGTACTCTCTGCCTAATTGCAAACCCCAAAGATTCTGTCCTATGTTATTTCCTAACAGATCTATAGTTTTACATTTTCTTTTAGATCTATGATTTGAGTTGGCATACGAATTTTACTTCTCCTGACCAGTTCTTGATTGTAGGTTTCTCTGCAGAGTCTATGCACAGCCTTTCTTCCCCGTTCCCCATTCTGTGATGAGATTCTCCTTTTTACTGAAGTTCCCTTCATGGGTGGAATGTTAGATCTCAATAGGCTTCCTTGTTTTCTCTTGCTCACTATGGGAAACATGTACTCAGACTGCTTCATCAACTGGGATCTACAGAGGATGAAGGGCAGAAAAAATCTTTCTTCTCATTTGTGGAAAGCTCTCAAAATTACTAATCTTTTTTTTCCCTGACAGTATCTTATACGAAAAAATTTGGTCTTGTTTAGATATGTTTCCTTCACGTCACAACAGAAACAGTTTTGAACCCAATGACCATTCTCCAGATACAGCACTGTGAAGTTGTAGGATGAGTGACTGAATATTTTTTATTATGAATGTTTTATCAAAGACTTTGGGCAGAAGTGATTATTCCCATCTTTAAATATGGAGTATACTTAGGTTCCCTTCATTTCTTCTCTGCTCCCAATTCTTTACTATACTTTTCACTTTTTAAGGGTAGCTATATTTAATACATAAAATATATTGTATGCAAAATTATACATCAAACAACAGAGAAAATAAAACCGAACAAAAACACTAGCATGACCTTACCTCCCAGTGGCAATGGAACTCTTCTGCCTTTAGTCTCCATCTTTTTTCCATGCATTTAATAGTGGAATCTATACTGTGTTCTCTAATTTCTACCTTGCCACTTATCTTTCTATCTCTGCATCCATCTACCCATTTATTCACAGTTAATTTCAACCAAATGCCCAGTAACTGAAGTCACAGTTTAGAAGCATGACATAGATGCCACCACCAGCAAGAGTGTAAATGGGTATGGCTTTTTTTTTTTTTCTTTTGAGACAGGGTCTTGCTACATTGCACAGGTTGGTCTCGAACTCCTGAGCTCAAGTGATCAGCTCACCTCGGCCTCCCAAAGTGCTGGGACTGTAAGTGTAAGCCACTGCGCCTGGCCAGGTATGGCTTTTGGAAAAGCAAGTTGGCAGTGCAGTATACGTATATAGGAATCTCAAATAGTTCCCAACCTCTAGCTCAGTAACACTATTTCTGGACTATTTCCTAAGAAAACAACCAAAAAACAAAAGGCAAAAATTTTAATGCATAAACATATATACTGCAGTATGATTTAAAATCATTCAACACTGGCAACAATGGAAATACCATATTTTAGAAATAAGAGGATGGTCTGATACATGCACTTGAAGAATATTTTGTATCAATTAAACTTTAGAAGTCATGTTTATAAAGACCTTTTATTAACATGATAAAATGTTTATGATACAACATTAAAACAAAAAAATCAGGATACAAAATGGTGCACACAGTTATATCCAAACTGTTTGTATAAAACACAGACATAAAAACACTAACCATGTTATCTCCCCATGGTGGGATTATGGGTGACTATTAGGCTATTACTTCTGCTTGTCCGTCTTTTCCAAGCTTTGTACAGTGAATATGAATTACTTTTATAATAAAAAAGAAGTTTATTTAAGGATTTTAAAAGTTACATACAAGCCAGGGCCAGTGGATGATCTTGTCCAGTCTTAAGGCAATGAATATAAACTGGAGAATGTTGACAGAACACAGGATTTCTAACTAAAAATGAAGAGAAGAATCAGTTAAACAAAGTATAATTTGCATTTAATACTGCAGTAATTTGGTTAACACACTAAAAGACAATACACATTATAATACAGTGTAACTTGTATAATATTATATTGCACTGGAAACTCCTGATTTTCGGTACCAGAGGGGCACAGCAGTATTATGGCAAGGGGAAATGGGGCTCAGCCTGCTGCCTTGCCCCTCTGCTGCCTTGCCCCTCTGCTGCCTTGCCCTTTCTGGCCTGAGGCCTGAGCACAGATGAGAACCTTGTTTCCAGGTTCACTGGGAACACAGGCTAGCTGCAATAGACCACTAAGCTTCCTTCATGTCCCTACCAACAATACTGTCTACTCGAAACCATCCTTGTGCATCCTTCTTCTCATACCTCGGTTTTGCCCTTGGACCCCACCCCTTCCTTTCAGGCTCCTTGAGGACCTTGTCACATCAAGCCATCAACTGTAGATTCCGTACTGTATCTTTAACCCCACCTATTCCATTGATCCCTCCTAGCAGCACAGAAATGACTCCTTCTCATTAAAAACAAAAGCCCCGAAACCAGATAAACCCACCCCATCCTTTGATCCAAGACCCCTCTTAGCCTCAGGTTTCTCTCTGCGAGACAGTAATGAATATGCAAGCCCCTACTATCTCATCTACCATTCCCAGCTTCCTTGTGATCTGGCCTCCACCCCCATGACCACCAGAAGACAGCTTTCATCAAAGTCACCGATAACCGGGAGGCAGCAAAAACACTTCAACTGGCATCCTACTGATACTGTGGATGACTCTTTCCTTTTTGCTCCCCTCCTCCTGGCCATCTAGGATCCCACCATACAGTCTTGGAGCCCTCTGGAGTGTTCTGGTAATTCCTTCTGTCTCCTTTTCCCCCTCCTTAAATGGTGATGCTCCGCAGAGCCACAAACTCAGTCTTCTCTCCCGACTAGGAAGACAGACACACAAACACACATATTTTCACTCACCCGCTGCACCCTGGACTGGGGGAGAAGAAATTTCAACCAGACCCTTGGTCTGCATTATTACCTCCTTCATGGTTAGTTCTCACATCTGTCTCCAACTTAGCCTTCCTGCTGCATAGCAGACCCAGCTGTGCATAGGCCACTCGGCTCTCCTAAAGGCACCTCAAATGAAGCCAGTCCCAAATAGAGATCATTATCGCCTGCACTTAGAACCTTGTATCTCCTCTTTCTATAGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATAGCCTCCCATGTCACTTTCTATAGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTATGGCCTCCCATGTCACTTTCTATGGCCTCCCTTGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATAGCCTCCCATGTCACTTTCTATAGCCTCCCATGTCACTTGGAGGCACAACAGTGGTCTCATTTCTCAAGCCATGAGCCCGGGAATCACCCTGCATTCTTTTTCCCACACTGTCACGTTCAGTCACCACATCCTGTCCACTGTAAAGTCCAGATTTCTCCTGAATCCCGTGCCCACTTCCTATCCTGATGATGACTGCCTAGTGAGGCCTTCTTCGGCACCATCCGCCTACCACTTCAGCAACCCCTTACTAACCTCCCGCCTCCAGCCCAAACGCCCTGCAGGCTGCTCCTCACTCTGGTAAAGTACATTCTTGCCCACAAAATTGAAATCTGGGACCAGGCCCAGAGCTTCCCAAAAGTTCTCAGTACATAGGTACATGGGGAATTTAGTAATTCCTCCATAGACCTCTAGGCCAAAAGAAATCCCTAATGTGGCTGGGAATGCCAACAGTTGCACTGATTTAAGTAATTACGTCTAACTTAGTAAGTATTTCTAAGTAGCCACCAGAAAAAATAATTCATATAAATCTAAAGAAAAATGTTAATATTATTCTTAAATAACCAAAACTAATTCCCAGTGGGATGCGTGTGCCTGTCAGGTAGCTCACCATTTCCCACGCCTTGGAATCAGACAAGGTGCTCCCACTCGTTACCTGTTCTTCACCCGGATTTTCACACAGCATTAGCCTTTTTTGTTTTCACAGCAACTGCTGAAAACCCAGCTTCTTAAAGATACGACGTCACTGAAAGGAATGCAGTGTGGCCTAAAAATAAACCTGTAAACTATTTCAAGCTAGTAGTTTGTATGGTGTCCCAACAAATGTCAGGTATTACTGTTTTCCTCAAAATGTCCACTATCCCCTGGTGCCCTGTGAGTGCACTGGGGTGCCTGGGGCACTTCAGCACATGCTCTGTGGGCTGAGGACGTGGCCCCTATTTGCCCCAATCCCTCTGTAGAGGCTCCCTCTCTTCTCAACTCCCACAGTTCCTTCCTTTCCACCTTGTGCTCCCGTCGGATCTGAATCGCCACAGTCCACTCAGCTGATGGAGTGTTTCCTGCCTCTAGGCTTCAATGTGTCCCAAAAATGCCATCCCTTCCCTCCCAACACAGGGCTTCCTGGCAGACCCTCAACTCCTCCTTCCCATCTCTGTATGAGCCTACTCCAGACCACCCCCTCACCAACATAGGTACTGTTCTTGCATCACAGGAGGAGGGAGCTCAGCTCCTGGTATGTTGTTTCTTCCAAGGGCAGGAATGCCTAGAGTGTGAATATATGAAAGACTTACTTTCTGTAATTCAGGCCCAATGCAGTCCTAGTCCTTGTATAGTTGTCCCTCAGTATCTGTGGGGGATTGGTTCCAGGACCCCCTGTGGATACCAAAATCCATGGATGCTCAAGTCCCTGATATAAACTGGGAATTGTAGGGAGGGTGAAAGTGGACCATCAGATACTCTCCCTCCACGGCTCACTCCTACCTACCACTATACCCATGCTGTGGCCTTTCCACCAGCATGAAAATCAGGGAATAGCTCCCCTTGTTCAAGGCCAAACCCTTCCTGGTGCTCTAGGTCCCATTCTCCCTGAGTCCCACAGGGCCTTGCTCCATCACCGTCCCTCTGTCTACATACCTTCCCCTCACGCCCATACCTGGGGTCTAGGCTTATATGCCTGACCCACCCTACAGCTGCTATGTTTACTTCCTAAGCCAACTGCAGTCTTCTCCCTTCACTCTTCATCCACACGGCTCAAAACCAGGCATCTACAGCCTCTAGTTCTCCCCTCAACCCATCAGTATTCAGCTTTGGGCCCTCAGCTTCTATGCAGTTATGTAGTTATGTGCCCGGGCTTTAGAGTCGGGCTGACTCAAACGGAATCCTGGTCCTGCCCCTTCACATGTGGCCATGAACAAATGACTTATCTTCTCTGGACCTACCTCACAGAGTTAGTTAGTAAGAAAACTACCACATGTAATGTGCCTCGCACAATGCCTGGCACACAGTAAGTGCTCAATAAACGTTATCTGCAATTACTTTCATTACTATTATTACTAGTCCTGGTATTTTATTCATCTGCATATCCTCTATGCTTAGGGAAAAAGGGCTTGGCATCTAGTAAATACTTGATAAATGTTTATTGAATGAATAAACAAACACAGGGGCACATCAGGATAAGCTAACCAGACAGCAGGGGAGGTGCTAAATCATGGGGTCTGAGGTGGGGAGATGGTCAGTTTTGAGTGTCAACTTGGCTGGGCTATAGTACCCAGTTATTTAATCAAACACTAAGCTTGCTGTTGCAGTGAAGGTACTGACTTTGAATAAAGGAGACTACCCTCCATAGCATGGGTGGGCCTCATGCAATCAGGTGAAGGCCTTAAAAGCAAAAACTGTGGTTTCCTGGAGAGGAAAAAATTCTGTGCCAGGACTGCAGTGTCAACTCCTCCTAGGTCTCCAGCCTGTTCACCTGCCCTGGAGATTTCAGACTTGCCAGCCCCACAGTAATGTGAGCCAGTTCCTTAACTCTCTTTATACATATATCTGTATCTAACCTATCAGTTCTGTTTCTGTCTGATTGATACAGGATGTGAAGCTGGGAGAAGGCTGATGTCCTGGGTGAAAAGCTAGTGTTCTAAGTGAAGAAAGAAAAATTGTTATTTCCATCATCTTTTGTTGCCTTGTTGTCTCATGATGTAGAGTTGGTAATGATCAAGCTCTTCCTAACAAAGGGTAAGAAATTGACATCTGAATAACTGAGCAAAATATTTTACTTTTGAAAACATTCTTTTTTTTTTTTTTTAAGACGGAGTTTCATTCTGTCACCCAGGCTGGAGTGCAATGGTTCAATCTTGGCTCACTGCAACCTCCGCCTCCTGGGTTCAAGAGATTCTCCCGCCTCAGCCTCCGGAGAAGCTGGGATTACTGGCGCATGCCACCACACCCGGCTAATTTTTGTATTATTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCAAACTCCTGACCTCAAGTGATCCACCTGCCTTGACCTCCCAAAGTGGAAAACATTCTTAAATATATGAAATCTCACCTCTAGTGACCTGTCATGTCGAAAGCCCCAAACGCAAGCTGCAACAGACACCGGGGAAACAAAGAACAGCGGCATGAAGACCAGGAGCCAGAAATGGCTTCCTCTCTCGATTCTGTCACAGACCAGAACTTCAAACATCAACAAGAGCAAGTGGATGCCCACTGCAATCAACATGGCTTTAAACTCCACACACGTTTCTCCTTCTGCTCTAAAAAAGGGAGAGAAGAAGAAAACACCCTCAGTTCAGAATCTCCACTATAAGCAAGCAGTTCAGGGCAAATACCAACTTATATTTATACTTTGAATTTTACTTGAAAATTTGACAAAAGCAAAGGGAAATCAGGTAGAAAGCTAACTTAAACCTAAGCTTTGGTAGGCAATCTCTGAAACATCGAAGAACTACTACATAATACAAAATGAACATTACAACCAAACCAGAATTTAATGTTTTAACCGTATAAGGATATTCTCAAAAGTAATAGCCAGTTCTTATTTCCCTGACAATGTACATAAACACTTCTGTTCACATCTTTAAATTCAACAACAAGAGTTACTTCCAAGATTATTCAAGCTGATTTGCTTCTGCTGCTAAAACCAGGCAAATACCCCTTAAGTCTCATGATCCTCATTTTTCAAGAAACCATGTAAACCACCCTTCACACAATATTATAAAAATAACTCTAGTTCTATGAACAAGTGCCAGTTATATTTCAAGATAGTAGTAACTATTGTTAGGTGCTATTTTTAAATGCAAATAAAACGTATAAATGATTTTCATTTTCCTTTCTATTCCATTAAGATAAATTAACACCTGCAGATGAAAGAGAAAGAAGAACAAGAGTTAAAACTGTTCTCAAACAAAATCAGTTTAATTAGCTAAGTATCATGCACAATAACCTTAACAGATCTACAATTGAGCAATGGTAAGGCCGCTAATCAGGAAAAGGCTCTATAATGCATCTGAAAGGCCTACAATGTTTATTCAAAATACAGATGAACATTTATGATATACATGTATTGTGGGTGACAAATACACCGGAAGTTAGATCATAGAGAAAATGCTATCAGAGGTTATTCCTGTAGGACCCAACCATGTTCCACTGGTTAATGTTAACATGAGAATGACCACGCCTGTACATTCCTTACATTCAACCCCACATACACAATTCCTTTCCTTGCTCAAAACATCTTAAATAAGACCAACAAAGAGAAGTTTGAATATATTCTAAATATCAATTAGTAGAACCTAAATGTTTATTTAACTTTGCATTCTTTGAGAAGCAATTAATATTAGATATCTGAAAATATCTCATAAAAAAATAAAACACATACAGCCACACAGGTCATCAATCTTTCAAAAAAAATCTAAGAACTCTGAAACAGCTATACATGAATGTCCCTCACCTGCCAAGGCTCTCTGTGTAACTGTTTTACAGTTCTTAGACATGTATGTGATATGTAATTTACACAATCTGAATCATTTTCATATTTAGTAAACAAAAATTTAAAAAGTTGATGTAGTGGCCGGACGTGGTGGCTCACACCTGTAATCCCAGCACTTCAGGAGGCCAAGGCGGGTGGATCACCTGATGTCAGGAGTTCAAAACTAGCCTGGCATGGTGAAACCCTATCTCTACTAAAAATACAAAAAATTAGCTGGGCATGGTGGCAGGTGCCTGTAATCCCAGCTACTCAGGAGGCCGAGGCAGGAGAATCACTTGAACCCGGGATGTGGAGGCTGCAGTGGGCCGAGATCACACCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCTGTCTCAAAAAAAAAAAAAAAAAAAAAAAAAAGGTGCGGTAGTTTCAACTTTACACTTTTCCCACATGAGCAGCTGCCTTCTGGGAATTCCTGTACTCCTCATTTTCCCAGTGGAGGTTCATAATAGCCTCCCAGTCTTAAGTCCCCCTTTTTCCCTTTATGTAGTTACAGTCTCTGTGGCAGAGAAGGGAAAGCCTCTCGCAGGTCCCAGCAACAAGCAGGTTGCATGCTGGCGTGAGCCAACTCCCAGGAATTTGGGCCAGCGGAGGCATCCAAGAGCAGGGAGGGCAAGAGTTGGAAACGGTAAAGGGTACCCCTCTCCCCTGCCCCCAAAGGCTCTGCTTTCCTTCTCGGCATCCAATCTTTGACTTTCCTCATTCCCCAGCTGCTGTCTCAGGGACTCATGGTCTCCTGGTCAAGCCACCTCCCCTACACTGCTAATAGTCCTAAAGCTCTGGGACTAGGAGGGTGGGACAAGGGGAGCCCCAGTTCCAAAACTGTATTGGAGAAAGATCTTTCATGACCAAACATAATATGAGTGTCTTTTCCTAAAAACAGGGTGGTTTCATGCTGCTTAGTCTAGTATGGCATACCAGTTCTGTATTTTGGGTGCATTTTGGATTATATAGGCATCTGTGAGCTGGCTGGCAACTTACCCCCAAATGGCACTGCTTTTGTAAGAAAATACATACCAAAGACTAAATTTTCAAAAACACAGAAGAGATCTGTTAGCTTATACTATAGTTCTAAGACCCCAGATAGGTAGAAAATAAAATGGTCCTTACTTCATCAAAAGTGAGAAAAGTCAAGATATTGCTCCCTCATGCTAGAGACCAATGGGTTGTATAAAGCAGTATTACCGATATTGAGGATTTCGTGCCCAGACTCCAGTTCCAACTGAGGCTCCAACAATGACCATTAACTTCCACAGCCATATTGGAGCAAAGACAGCCCAGTAACTCCACTGTATGATGCCATCCAAACGAAGGGCCAGCAGCACAGAGAACAGCAGCAGACAGGCATAGATGAGGAATTTACTAGGAGAAAAGTAAAACGATTAAGAAGGATTCACTTTTACAAATATGTGATACTGAAATGGGGAGTAATAAGAGCCACATTTGTCAGCATGTAAAAGGAGTCACTAACTCAATAATCATTTATTGAAAAGGTCTATGGGGCAGACATAATGTGTTGGGAATAAAAGAAACATAAAGAAGACTAAAATAAGTCTCCTGCTTTCCAAGGCTTCCTCATAGAAGGAAGACCACACAGAAACATATAATACAGCACAATGTTTGTGATGAGAGCTTGGAAGAGGAAATACAGACTGTGTCTGAGGAGGCACTCAGAAGCAGAGATGTGGTGACCCTAGAGCTGGTTCCAGAGGGCAAGTAGGAAGCTGCCAGGCAGGAAAGCAAGTGAGATGAGGAAGTATTCCAGGCAGAAGGAACTAGCTATACCAAGACACAGAGACTGGAAAAGGCTGACATGCTCTGAAAATGGTCAAGTTCTATCACTAACTGATTCTATTTCTAAAAAGGCAGCCATCTGTCATATTCATATGGCATGAACATTTTAGTGTATGTATTATACTTTCCATGAATGAATAAATTACACATACACACATGCTCACGTCTCATATAAAAGGGAATTGCTACAGAGGATGTCCTTGAAATAATTAGAAATTATACTCTTGAGGACCTCTATTTCCAGCCTTGACTTAATAATAGGAATATAATTTACCTTCCCGCCTAAATAAGAAGCTTGATACAGTCTACAAAAGAACAGTTTTCAGACATTGACAACAGGCAGTGGAGAACAGGTGAGAAGGAGGAAATAAAGGAGGTAAACCCTACTATTGCCCCAGTTTGCAGATCAGAGGCAGTTTCCAGGCTGCAGCAAGAAAACAGTTAAAACTCAACCCTGTCAATAACTATATTAAATATAAATGGCCAACTGAAAGACAAAGATGATCAGATTGGATAAGTAAGCAAGACAACTATATGCTGTCTGTAAGAATCCCACTTTATCTATCTATAAAGACACAGATAGATTAAAAGCAAAAGGAAAGAAAAAGTTATACCAAATAAACACTAACCAAAAGAAAGCTGGAATGACTATATTAATATCTGTTTAGTCTTCCATTGCTGCTGTAACAAATTACCACAAACTTAGCAGCTTAAAACAACGTAAATTTATTATCTCACAGTTCTATATGACAAAAGGCGAGACGGGCTTGGTTGGTTTCTCTACTCAGAATCTCACAAGGCTGAAATAAAGATGTCTGTTGGTGGAATGCTTATCAGGGGACTCTGGCAGAATCTACTTCTAAGCTCATTCAGGTTGTTGGCAGAATCCAGTTTCTTGTGGTTGTAGGACTGAGGTATGTGTCTCTTTGCTTGCTGTCACGCAGCAGCTGATCTTGCGATAGTAGGGGCCTCTCCTGGGTCCTTGTAAATAGGCCCCTACATCTCAAAGCCAGTAACAAGCTATAGCATATTCAATCTTTCTCATGCTTGGGATGTTTTCTCACTACTTCTGCCATATCGCTTCTGCTTCCACTGAGAGAAAGTTCTCCGCTTTTAAGAGTTCATGTGATGAAACTGAGTCCACCTGGTTAAGCCAGGCTACTCTCCCTATTTTAAGGTCCATAACTGTGGTTGGTAGGCAGAATTCTAAAGAAGTTTCCCAGGATTCCTGTCCCCTGATTATTCAATCAAACACTAATCTGAGTAATACTGTGAAGGGACTTTGCAGATGGAATTAAGGTTACTAATCAGCTAACTTTACAATAGGAAGATTATACTGGATTATCCAGGTGTGCCCAGTGTAATCCCATAAGCCCTTAAGAAAGCAGAAGAGTAAGTCAGAGAAATGTGGTGGAAGAGAGATGAGGCAGAAGTCAGAGAGATTCCAGACTTGAGAAGGATTCAGCCTGTTACTTCTGGCTTTGAACATGGAGGTAAGGAACCATGAGCCAAGGAATGCAGGCAGGCTTCAGAAGCTGAGAATAACTCGCAGCTGACAGCCAGCAAGGTAAATGGGACCTCAGCCCTACAACCCCAAGGAACTAAATTCTGACAATAGCCCAAATGTGCTTGAAAGCAGATTAATCCCTGGAGGCTCCAGAAAGGAATAGAGCCCTCCTGACACTTTGATTTTGACCCTGTGAAACTAGGCAGAAGACCCATCTGAGTTGTGCTGTACCCGGACTTCTGACCTAAAGAACTGAGAGTAATTTGTCATGGTAACAGCAGAAACGAATGCTAATAAATATAGCTTCAAAGTCCCCTTTGCCATGTAAAATAATAACATATTCACAGGTTTCAGGGCTTAGGGCCTGGGTATCTGTGTATTTGTGGGGGTGGGGCATTCTGCCTACCACAACATAAGACACAGTATATTTTTGAACAAGGACTATTTCCAGGGACAAATAGAGGTAGTTCATAATGATAAAGGGGTCAATTTGTCATATGCCTAATAACAAAGTTTCACAATACATGTAGAAAGTACTGATCAATCTAAAAGGAGAAATAAAAAAATCAAACTGTTATAAATGGAAATTAACATTCCTTTCTTAGTAACTAATAGAACACATAAACAGAAAATTACTAAGGATATATATGATTGTAGCAACACTATCAACCAACTTGACCTAATTAATATTAATGAGTTCCTCCCAACAAAAGCAAAATACAGATTCCTTTCAAACACACACGGAACATTCACCAAGATAGATTGAATTCTGGGCCATAAAACAAAGCTCAACAAATTTAAAAGGACTGAAATCATACAAAGTAAACAAGCACAATGGAGTCAAACTAGAAATCAACAATAGAAAAATATCTGGAAAATTCTCAAAATACTTGAAAATTAAATGCCACACTGCGAAATAATCCATAGGTCAAAGACTATGAAGAAAATTGAAAAATATTTTGGACTAAAGGCAAAAACACAATATACCAAAATTTGTGAGATACACTAAAGCAGTACTTAAGGGAAATTTTAGCATCAAATACTTACATTAGAAAAGATATCAAGTCAATAATCTAAGATTCTATCTTATGAAACTAGAAAGAACATGGAGGTAAGGAACCTCCATGTAAGAAACGGAAGGAAATTTTTAAAAAGTAAATGGAAAGAAGAAAATGATAAATGTAAGCACATTAATCAATAAAATACAGTAAAAAGGGATTAGAGAAAAAAATCAATGAAACTAAAAGCAGTTTCTTTGAGAAAGTAAGAAAATTGGTAAATCTACAGCCAGAATAATCAGTAAAAAAGAGTAGGCTCAAATTACTAATATCAAGAATGAAAACAGGGATATCACTACAAATCCTAATAATACTAAAGGGATAAGTAGGGGATATTATAAACAACTTTATGCCAGGAAATTTCTTGAAACAAAGACATGGAAACTGCAATTCTAGTTAAAACCTTTTTCAGAAAGAAAATGTCAGGCCATGGCAAAATCTACCAAACATTTAATGAGGAAATACCACCAATTCTTCACAAACTCTTGCAAAAAAGATGAGGGGGGAACATTTCCCAATTTATTTTATGAAGCCAGCATTACCCTGATAACAAAACTGACAAAGAAAGAAAACTACAGACCAATATCTCTCATGAACAGAGATGCAAAAATCCTGAAAAGATTTTAGCCAACTGAATTCAGCAATATATAAGGATACTACATTACGACTAGGTGTGGTTTAGCTAGGATTGCAAGATTGCAATCTTGGATTAACATTTGAAAATCAGTTAGCAACCTTCGTGGTATTAGCAGGCTGAAAAAGAAAAATCATATGATCATCTCAATAGTTGTGTAAAAAGCATTTGACGTAATTTGCTACCCATTCAAGTTAAAAGCGTTAAACAAATTAGGAAAAGAAGAGCATCTATGAAAAACCTGCAGTTATAATGCTTAATGGTGAGAGACTCAATACTTTCCCCTTAAGACAGGAAACACAGCAAGTATGTCCACTTCAACACATCTATTCAACATTGTACTAAAGGTCCTAGCCACAACAATAAGATGATAAAAAGAAATTAAAAGAATATAGTTTTAAAAGAAAGAAGTAAACCTGTCATTATCTATAGGCAACATGATATTCCCTGTAGGAAATCCTAAGTAATCTAAAAAAAAGCTAGTAGATCTAGTAAGTGAATTTAACAAAATTGAAGGACACAAGGTAAATTTACAAAAATTATTGCATTTCTATATAGTAGCTATCACAAATTGGAAACAGAAATTTAAAAATATGTATCATTTACTGCAACTTTCCTATAAATTTATAACTATTCAAAAATTAAAAGTTTATGAAAAAATCCACCATTTACAATAGCATCACAAATATAAAACACTTAGGGATAAATGTAACAACATATATGCAAAACCTATATACTGAAACCTACAAAACACTGGTAGGACTGTGGGGAAGCTAATAAACAACAGAAATTTATTCCTCACAGTTCCAGAGGCTGGAAGTTGGAGATCAGGATGCCAGCATGGCTGGGTTCTGGCGAGGGCTGCCTTCTGGGTGGCAGATGGCAGACTTCTCAATACCCTTCACATGGTGAAAAGAGAGTGAGTTAGCTCTCTGGTCTCTTTTTATAAGGGCACTAGTTCCAACCATGAGGGTTCCACCCTCATAACCTAATCACCTCCCAAATGCCCCACCTCCAAATACCATCACACTGGGGACTAGAGTCAACTTGTGATTTTTAAAGGGACACATTCAGTTCATAACTGCCAAGAAAAATTAAAGATCTAAATAAATGGAGACATATACTGTGTTCATAGAACACTCAATACTGTTAAGATTTACATCCTCTGTGTATTAGTTTCCTATGGGTGCTGTAACAAACTACCACAAAATTGGTGACTTAAAATGGCACACATTTATTATCCTGAGGCCAGAGGTCCAAAAAGGGTTTCACTGGGCTAAAACCAAGATGTCAGCAGGACCTACTCCTTCTGGAGACTCTATGGGAGAATCTATTACTTGTTTTTTTCCAGTTTCCAGAGCTGTATTCCCTTGGCTCATGGCCCCTTTCTCTATCATCAAAACCAGCTGCATAAAATCTTCAAATCTCTGTCTCTGTTTCCACCACACTGCCTTCTCCTCTTAATATTATCTCCCTCTTTTAAGGGAACCTGTGATTGATTGCATTTATTGCCCCACTTGGATAACCCCATCATCTCAAGATCTTTAACAGGTTCCATGGAAGTGGGTATCTTTGTGGGTCATGATTTAGCCTACCACACACACCAAATTAACCAAATTATTTTAAAGAATCAATGCAATCCCAGCGAAAATTGGGAGCCAGCAGGTTCTTCTGTAGAAATTGACATACTGACTTTAAAAGTCATATGACAGTGCAAAGGACCTAGAATGGGCAAAACAATTCTGAAAAAGAACAAAGTTGGACAATTTATACTACATGCTTTCAAAGCTTACTATAAAGCTACAGTAATAAACACAATGTAGTACTGGCATAAGAATAGATATATAGAACGTACTAGAGAGTTGGAAAACAGACCCATACATATAGAGTCAGTTAATTTTCCACAAAGGTGCCAAAACAACTCAATGGAGAAATAATTATGTTTCAATAAATGGTGGTACAACTGGATAAATGAGGAAAGTACACCTTGACTCTTAACCTTATAACATATACAAAAATTTATGTGACATGGATCATAGACCTAAATGTCATACACAAAACTATAAAATGTCCAAAACACATATGAAAAAAAAATACTTGCAACCTACGGTTAGGCAAACATTTCTTAGATGGGATATGAAATTGGACTTCTCCAAAATGAAAAACTCTTACTCTCCAAAAGATACCTATCATTAACAAAATGACAGCCAAGCCACAAACTGGGAGACCATACTCTGAAAACACGTATCTGACAAAGTAACCTGTATCCAGTATATAAAAAGGACTTTTACAACTAAAAAAATAAGTGAACAACCCGATTTAAAATGGCCAAAAAATTCAGAGACATCCCATCAAAGAAGGCATACAAAAAGCAAATAAGCACACAAAAAATACTCAGTATTTTCAGATGTTAGGACAATGCAAACTAAAATGACAATGAGATAAGATTACACACACACTAGATCTCTTATATATGCTGGTGGGAATGCAAAATAGTACAGCCACTTTAGAAAACATTTTGGAAGTTTCTTATACAGTTAAGTGTATACTTATTCTATGGCCCAGCAATCCCACACCTAGGTATTTTACTCAAGAAAAAGGAAAACGTATGTCCACACAAAGGCCTGTATTCAAATATTCCAAGAAGCTTTATTCATAATTGCCACTGGTAACAACTCACACATCCATAAACTGGTGAATGGTTAACTGAATTTTGATATATCCGTTCAATGGAATACTAATCAGCAGTAAAAAGTAACAAACTATGGATGACAACAACAACATTGATGAATCTAAAATGCACTATAAGTAAAATAATTCAGATGGAAAACACAAGATAGTGTACAATGCTACTTATTTGACATTCTGGAAAGGGCAAAACTCTAGACACAGAAAAACAGATCAGTGCAGTTACAGATAAGGAACAGAAAGCTCAGATGTTAAATAACTTGTCTAAGAGCTCAGAGTTGGGAGTCAAACCATTTGACTCCAAAATCCTTCTAATTGAGTATTATACTATACCGGCTTTACACTGATAAATGAACATGTTTAGAAAAGCATCTGTATAAATATCTGCTGAGCAGCTAACAGGTCTCCACTCGGGAAGATTAAGGATAGTCTTTATACTCTTTTGCTTATGATATCTGAGTTTTCTGTAAGGTACGTGTACTGTTTTTGTACTATGAAAAATAACATAAGAAACTTCGATTGGAAAAAAGGGAGGAGGGTTGTAAGAAGACTGGACCTACAAAATTAAACTTGGAAGCAAGCCTCCCAGAGAGAAAACTACGCTGGGAAGATCATGAGATTCAGAGGGGTTGCTAAAAAAATATACATAGCATCATTGTGTCATGGTATTCTTACCCATAAAGTTAGGGCAATAATATGTACCTCATAAGGTGGCTACGAATATAAGATAATGAGTATAACAAGTGCTTTGTATAAAGCACTAAATCAGTGGCTCTCAAAGTTTTTAGTATCAAAAATTTTGACGTTTAACTCTTAAAAAGAACTCCTTTTCAGTTCTAACACTTACTGAGGATCCCAAAGAGCTCTGGTTGATATGGGTTATCCCTATTGGTATTTAATATTTACGATACAGTTGTTATTCAAAAAATATCTACTCTTCGAAGGTAATAATAAACACATTACATACTAACATACATAACATTATTTAAAAACATTTCCAAAACCAACACAAATTTAGTGAGAGTAGCGACACTGTTTCTACAGTTTAGCAAAATTTTTAATGTATGATATCATAGAAGACACCTAGATTCTAATATCTATATTTACATTCATTCTGTTGCAATACATTGTTTTGGTTCAAGTATACAAAGAAAATCTGGCCTCACAGAGATCTGTAGTTAGAAAAGAACTATTTTAATAGCTCTTTCAGATAATGGTAGACGTTCTTCAACAGGACACTAAAACTCAACAAGTGGTAGTTTCTTAAAGGTTAACTATGATGTGGAATCTGAAATCGTATCAATGACCTTTTTGTACTCTGCTACACTGAAATCCACTGGTCTCTCTTATACTTTCAATGAATCTTTTATCCCAGCATTATTGTATAATGCAATGTAGGTCGACTGGAAAATATTGGTTTACTGAGTTATAAAGATCTTTCAAATGTTGACACATTTCATATACAATAGCAAAAAAAATCACATTACTTACTATCTTCACCAACTTCATCAGAAATATTTTAATTACTGGGAGGTTGTCAAGCTCATGGTAGCAGATACAACTTTTCAAAAATTCTGGTTTTCCCATGAAAGATCAAATTTTACCATTTACAAAAAATACCATGAGTTGTTTTCCTTGAAGTTTTGCTTCCTGCACTTTCTTTTTTTGTGTGTGTGATAGAAGCATCTTTTATTATAGTATTTTTGTCTTTTTTTTTCTTTTTTTTATTATTATTATACTTTAAGATTTAGGGTACATGTGCACAATGTGCAGGTTAGTTACATATGTATACATGTGCCATGCTGGTGTGCTGCACCCATTAACTCGTCACTTAGCATTAGGTATATCTCCTAATGCTATCCCTCCCCCCAACCCCCACCCCACAACAGTCCCCAGAGTGTGATGTTCCCCTTCCTGTGTCCATGTGTTCTCATTGTTCAATTCCCATCTATGAGTGAGAACATGTGGTGTTTGGTTTTTTGTCCTTGCAATAGTTTACTGAGAATGATGATTTCCAATTTCACCCATGTCCCTACAAAGGACATGAACTCATCATTTTTTATGGCTGTATAGTATTCCATGGTGTATATGTGCCACATTTTCTTGATCCAGTCTATCATTGTTGGACATTTGGGTTGGTTCCAAATCTTTGCTATTGTGAATAGTGCCAGTTAGAATGTCAATCATTAAAAAGTCAGGAAACAACAGGTGCTGGAGAGGATGTGGAGAAATAGGAACACTTTTACACTGTTGGTGGGACTGTAAACTAGTTCAACCGTTGTGGAAGTCAGTGTGGCGATTCCTCAGGGATCTAGAACTAGAAATACCATTTGACCCAGCCATCCCATTACTGGGTATATACCCAAAGGACTATAAATCATGCTGCTATAAAGACACATGCACACGTATGTTTATTGTGGCTTCCTGCATTTTCAAGAAAATGTCTGCCAAACACCATAACACAAATAACCACAGTCTTGTCTGTCAGTTGTCCTTCCAAATAAAAATGATACTCCATGAAAACAGCAGCTTGTAACTCAGGCACACACGTTTTTCCTTGAGTCAACCATTGCACTTCAGCATACAACAGGCCTTTATGCATTCTTCCCATTTCATCACATGGAATATTAAACTAGATGTGCATTCAGGGGTCAAGATTAAATGAGATTAATATTTTTCTGCTTTATCAAGGACATTCGTAAGTGAAGCTGGCATTTTTTTTTTAACTGCAAGTCCAGCACGTGGTGGTGAAGAATCCAGTGACAGAAGACTAGTGCAGCTGGTGCCACTGTCCTGATTTGTGCTCCAGCACCAGCTGTTTGATGCACTACTTTTGTACCACCAGTGCCAATGTCGACCAAGGCAAAGAATGTCTTAGTATTATAATTTTGACTTTGCAGATACCTGGAAAGGGTCTCAAGGCCCACTCCCAAGGTCTGTGGGAAACAGTCTGATAACCAGCGTACTCAATACACATTAGCTAATATTATTAATACTCGAAAACAAAAACAGGCTTTTATAAGCTGCTACAAAGAAAGAACATTTAAAGAAACCAAGATAAAAATGACTTCATCCCTTAGACTTTGGACAAGAAGAGAGCCTGTGTCAAGGTGATTCACAAGTTAATAGAAATGGAGTTATCACCACAATTACAGTACACAATTAGTAGAGGCAGAAGTTTTCTCTCTTAAAGCAGAGGGAAATATTCCAAAATCTAAAAAAATCAGAACCAATTTGTCAACTAAAGCCTGTGGCTACAAATATAATTACCAGAAACAATGGACTTGAAAACAGACTATTTAAAAAAGAAATTAGTGGATTCAAACATTAACAAATGCTAAGATAATGACGACACAGGATTCATATGTAAATTAGATAACATGTACAGGTAATTTTTATCCTAAAGGATTTTCAGACTATAAATAAAAAGTAAATTGGTGGGGGGGGTGGTAAATGCTGTAAGCTAACTTCATTATCCTCTGTAAAACAGTTTCCTTATTTAATAAAAAAAGAGAGTAGTGGAACTAAAGAAAAGAGAAGTACCAAAATGTTTTAAATGTTGGAAAAGGGATTATTTTATGTGGTTTTTGACATAATTTAAAGGAAACTTAAAATTTTATGTCTATTACATGAAAAATAGAAAACAAGATAAACTTACAAAAGGACCCACAAAATATAATTCATGATTCAAATACTAGGGTTAGAAAAATATACATGAGCTGAATGTTCTCTTATTAATTTCCTATTGCTGCCATTCAAAACTGCCACAAACCTAGTGGTTTAAAAAGCATAAACCTAGTGGTTTAAAAGGCATAACCCTTACAATTCTGGAGGTCAGAGTCCAATATGGGTCTCGCTGGGCTAAACTTGAGGTGTCAGCAGGACTATGCTACTGAAGGCTCTAGGAGAGAATGTTTCTTTGCCTTTTCTAGCTCTAGAGGCTGCCACATTCCTTAGCTCATGGCCCCTTCCTCCATCTTCAAAGTCAACAATGGAGAATGCAGTTCTTCTCATACTGAATCACTCTGACCTCCTTTTCTGTCTCCCTCTTCTATATTTAAGGGCCCTGTAATTACATTGGGCTCAGCCAGAGAATCCAGAATAATCTATTTTAAGGTCAGCTGATGAGCAAACTTAATTCCATCTGCTACCTTAATTCCCCTTTGCCATGTAACAGAACATATTCCCTGGTTCCACGGATTAGGATGTGGACATACTTGGGGGCGTCATTATTCTGCCTACCACAGCTCTGTAAAAAGAAAGGTTGCTTCCTAGATTAAGAAAGTAAACCTGTTACATTCAAGAGTTACGCTTGGAACCAAAGTCAAAAGTCGAACCAACTATCAGAAATCAGATGACAGAGGTTTAGCACGCCTGGATGAAGACAAGAGCAACAATTTACTATCTTTAAAATTAGATAAATTAGATTTCAAAGCCGAAAGTATTAAAATGCTAAAAGGACTAATAAGGAAAACCTAAATAACAAAAACCTAACATGACCATGGAATATGGAAATAATTTTACAATTTTTCTTTTTTAAGGATACACAGAAATATTTTAATTGTGGGCTTCCTCATGCTACTCTTAAATCATGACAGATAAAACAGACAAAGTTCCTAAGGAAAATACGGAAATGAACACAAGAGTAGATTTAAAAAACAGCAAGGTTATATGAAGAGAATTAAGTAGCAGAACGAGAAAATTTAAAAAACAGCAAGCTTATATGAAGATAATTACGTAGCAGAACAAGAAAACATACTTGCTTTACATATATATAGTTTTTCCAGGGAGAAAAAAAGGATCATGGGGCAGCAATACACAATAAACACACATCTAATTTACAATTGTTAAAAATATTTGAGTGCCTGAAATGTGTGAGATACATAAAAAGGAGCAACAATGCGTTCGCTCAAGATGCTCAGACTAATAGGAACAGAGGGAGATGATGTATCAACAAGTAACTACATATGGTAAACTAGGGGACTTGGAAGATTAATTTCCCTGAGAGAGATGGGAAGAGAAGTGGAATCTGGGCAACAAGTTGGGAATAAGGAATTCCAGACAAAGGGAATAAGATGTTTTCCAATTTTTTAAAAATTATTTTTATTTATTATGGATACAAAACAGTTGTGCCTATTTATGGGGTACACGTGATACTGTGATGCAAGCATACAATGTGTAATGATCAAATCAGAGTAACTGGGATATCCATCACCTCAAGCACTGATCACTTACTTGTGTTAGGCACATTTCAATTCCACTCTTTTAGTTATTTGCAAATATACATTATTAACTATATTTGCCCTCTTGTGCTACCAAACACTAGATTTATTATTTCTATCTAACTGTATTATTGTACCCATTGCTCATCCCCTCTTTATCCTCCCCTCTCCAATTCCAGCAGACTGGAACACACAGAATACTTTCTATAAATCGTGGTGGGTAGGGTGCTGCAGAGACAGGAGAAAGGGTACTGGGGGGAATAAGCCAGAAGTTGGCTGTGGTAACATGGTAGGGGAATGTGAATATCAAGGTAAAGTCCTTGGAATTTATTAATAGGTAATATGGGGCCACTGCAGATTTTCTCAACAAGAGTGACAGAAGATACATATGAGTGTGTAAGGTAGATTGTAATCAGTAGAGACCAGAGGTTGACATTTTATACAATTCATAAAAACACTGACAAAAAATGGATCATTTTCAATACTGATTTGTTAAGCAGTTTAGTAGAGTTACTAAAATACAATTGAAACATAAAAATACCTGGGCAGCTGCCAAAACAAAACTCAAGAGAAAATGTGTTCATTTAAAATGTTTAAGTAAAAGTAGAAAACAAAGAAAAAAAAAAGAGGCAAAAGAAAACTAGTAAACTGAGTTTCTAAGAAATTTGGGGAAAAAGCCAAAACATGAAAATAATAAAACTAAAAGTAGAAATATAGATATAAAAATGAAACTGATCAGTAATCCCAACAGCTAGCTTTTTAAAAAAATTAAACTACAAAGTTGATCAAGTAAAACAAAGTAATACAATCAGTAAGTTCAAAAGATGTAATTCATACACACTCCTAGTTGGGGAAAGAAAGAATAGTATCTTAACTATATCAATATATTTGCAAGCAAAGTCTAAAAGGTGACTGCATAGCCAAAAGGAAATATCAAGCCTGATTTGCAAAAGAATACACAATAATATGCAATAATACAGTGATAAGAAAATGTATTTACAAAAATAACTTATGGTCATAGCCTAAACTACTTTGAGTTACTTTTCAAGAAACATTCAGACCAGATTCCAAGTATAAAAATAGACTGCTTAATTCTCCACAAACCTGGGAGAAATGGGAGGTTCTGGTTTAACATTAATCACTACTTCTTAAAATTCACTTTTCCAGTTACACCTTTAGAATGGATTTATTCCACTCTCATTTGAATCTGTAGTGTTAGTATACGATGAATCACTAAGTAGTGCTACCTGGGCCAAAGCTAGTATCCTCTCTGAAATTTACATGGAGCTTTCTGCTCAGGCTCAAATTCCCTCTCTCCACTTTGCAGGGTTGGGAGAATAGGGAGGGAGAAAAGGAAGAAGGGAGGGAGAGAGAGAATATACTTAGCAAAGGAATGATGACTCTGAATGTAAAAGTTCTAGTATCTGTTTTCTGCATATCATAAGAATGCAGCTGGACTCTTTTTAGACCTATCAGTTTTTTTCCAGTGGAAGCTGCTGGCTTCACTGGAGCACATACACGCGCATTAACACATGCCTGCATGCACGTACACACGCACAAGCACATACACACACACAAACTTTGGATGGCACATCCCAGTCTAAAGCTTGACAGAATGGCTCCAAATGACAACCTGACATACTCACAAACTATCAAGGGAACTGCCCCTCTTACACCTTATTAACCAAATTTAAAGTCTTTTAGTGGAGACTTTAGCTGGTTTTATTAAGATAAAAAATGTTTAAGAGCATTTAACCAGTAGATACAAAAAAGCACAAAAGCCAAATAAAAGCAGAGCCCAGGCCAGTAGGAAGATCTGCATTTAAGTTCCTCAACTGCCATTCATCAGCGGTGTGGTCTTGGGTGAGTTATTTTCCCCCAGGTCTGTCTCCTCATCTGATTCCTCATATACTGATTCATTAAGACAAGGGTATGTGAAAGCACTGTGTAAATGATACATTTTACCCATTCTAGCTTTAGCAGTATTATGAAAGACAAAAGTTCTGCCACATTGTAGGTAATAATCTCATTTAGCAATCATTGTTGTCACTATTAGGTTGGAGCTGACAAAGTATGAGTCTCCACTTATAAAGCATCTATCTCCAGAGTTCGAGGCTTTTGTCTTAAATTCCAGTCATCTTGTACAGGCATGACATATCAGGCAAAATGATTAGAAAACTCAAGCTCCATATTAAAAAGCTTAACTTCTGGAGTCCTAGGAAACTATCTAAAACTCCTTCAATCCACGGTCTCTCCTGTACAACAAAGACTTCCCAGTGGTAGATGTCTATTTGTACCCAACATCCACTCATTCAGCACGTAATTCAGCACCTCCAAATCCTGGCCCTCAAAGAACACAGCTAAGAACAATGTGTGTGTAATTATCAAGTAATAGGAATGATACTTTTAAAACTGGAAATTATACATTCAAATGAGATTTCTCTCCTTTAACCAGTCCCCTTGGGAGGCAATGCAGTAATTCCAATGGTACTTCATTACTCAAATCATCTTTGAAGCTTTCTTCTTGGAAGCACCTTGAGAACCTGCAGTCTGATCTTCTGACTATCCCAAATGGTGCTAAATTTTCACTGAGGGTGGATTCAAATTTTGGAAATGGCAAACAGTCAGTCAGAGCCAAGGTTAGTGAATAAGATGTGTGATCAAACTAGGTGGAACTATTTTGGTTGAAAATGATAGATGATCATAAAGCAATGAGATGGATCTTCTTATGTGATATGTAAACTGACTTTAAAGGGAATTCCAGATGAGTAACAAGGAGTATGAACAATGGAATAGGTGTATACATTCTCTTTCCCCAAGTAACCTCTTTGAATAACAACACTCATTTGGATGTATAAGCGCCACTAACAGGTTTGTTTTGTTTAATCACTTACAATTTGTAAATAGAGGGTTCCTTAGCATACTGGATCTACAATGTCATGGAGGAAAGGAGTTGTCACTTAACTAGAACTATGGCATGGCACGCAGGAAAGAGCACAGGCTTCATGAGACTGACGTGGATTCGAATGTTGACTTAGATTCTTCCTGCCTATGCAACTTCAGGGAAGTCACAAGCTGTCAAGACTTTAAAATAAGGCTGATACCTAATTTTGCAGGGTTGTTGTAAAGATCATGGATAATGGATGCAAAGCCCCTACATTGGGCCTGGCATGTGGTGGGTACTCAAGAAGTCACAGCTATTATTAACAGCAACATCAGTAAAATCAAGACCATTTTCTGACTGTAGGTGGCTTGAGAGGACAGAACAACAGATAAGCAGTCACTTGAGTGGTAAGTAGTTCATTGACAGTACTGACAGTACACTCGTGCATTAGATACTGCTTTCTTAAAAACAATAACCTGTAAAAACTATTTGCAGTAACTATGTATTTTTAATTCATACGACACATATCATCTGCAGTGCTCATACCACATGGACCTCTTCACTACCTGAACATACCAGGCACTATCATGACCCCATGCCTTTGTGTACTTTGTCCCTTCTGAGATACTGAAAGGGGCCAGCCCCTCCACACCTGTGGGTATTTCTCGTCAGGTGGGACGAGACTGAGAAAAGAAATAAGACACAGAAACAAAGTATAGAGAAAGAACAGTGGGCCCAGGGGACCGGCACTCAGCATACGGAGGACCCGCACCAGCGCTAGCCTCTGAGTTACCTCAGTATTTACTGATCATTATTTTTACTATCTTAGCGAGGGGAGTGTAGCAGGGCAACAGGTGGGGAGAAGGTCAGCAGGGAAACGTGAGCAAAGGAATCTGTATCATGAATAAGTTCAAGGAAAGGTACTGTGCCTGGATGTGCACGCAGGCTAGATTTATGTTTCTCTTTACCCAAACAACTCAGTGTAGCAAAGAGTAACAGAGCAGTATTGCTGCCAGCATACTTCGCCTCCAGCCACAGGGTGGTTTTCTCCTATCTCAGAATAGAACGAATGGGAATGGTCAGCTTTACACAGAGACATTCCATTCCCAGGGATGAGCAGGAGACAGAAGCCTTCCTCTTATCTCAACTGCAAAGAGGCCCCCCTCTTTCACTACTCCTCCTCAGCACAGACACTTTACGGGTGTCGGGCTGGGGGGTGGTAAGGTCTTTCCTTTCCCACAAGGCCATATCTCAGGCTGTCTCAGTGGGGGGAAACCTTGGACAATACCCAGGCTTTCTTGGGCAGATGTCCCTGCGGCCTTCCGCAGTGCACTGTGTCCCTGGTTAATCGAGAATGGAGAATGGCGATGACTTTTACCAAGCATACTGCCTGCAAACATATTGTTAACAAGGTACATCCTGCACAGCCCTAAATCCATTAAACCTTGATTCATTACAGCACAGGTTTCTGTGAGCACAGGGTTGGGACTAAAGTTACAGGTTAACAGCATCTCAAAGCAGAAACAATTTTTCTTAGTACAGATCAAAATGGAGTTTCTTATGTCTTCCTTTTCTACATAGACACAGTAACAATCTGATCTCTCTCTCTCTTCCTCACAAGGTACCCTTCCTACTCTCTGGTCTGATGACAACCCTACAATTCTAGCTCAAAAGTCTGGCTCAAAAGACTAGCTCAATTCAAAATCTAGCTCAAAAGTCATTTCCACTATAGACTCTTCCCTGCCTGCTCGAGACAGAATTAGTAGCTCTGCCATTTGTACTTCTAAGACATTTGGTTTCTTTCACTGTAATGCCTCATACTCTATTGAATTCTTATTTCCCTTTCATTTCTGCTTCTTCCACTAGGCTTACGCATTGAGAGGAGGACTATATTCCTTTATTTTTGTACTTAGAGCACCTAGTACATCACTTGGCACGAGATAGGAATCTAGATGTTTGAGGAATCAATGTTATAATATACTTTGCAAATAACTGTTAGAGGCCAGGTAGCTGACTTTAGTGAACTATATTACCAGCAATCGCATTCCTTTTTCCTGCTTCAAACAAGAGACAAGATAACTTATATGGACTCCAGAATGTCTTCTGAAGATGGAGCATACAGAAGTCTCCAAACACTACTGATCTCATCAGTACCCCATGCCCACAAGTTGGAACCCCCAGAGCAACACTTCTCAGCCTTTGTTCCAGTCTAGCACACCTGAGGGATACACCACACTCCCATCAGGAGCTCACAACAGATGACTAAGAAGGCCCAGCAGAGTTTTCACAGCCTTCCAGTGCCACATGGCTACCAGGGTGGAAGGGCCTTCTCTGAACCAGCAACTCCCCCAAAGTAGCAGAAAATTTCAAGGCTCCATGGTGAGTGCCGGGAGCCCACCTAATGCTACCTACTACCAGATCCCTACCACCTACAATGCCTCAGAATCAGTTTGCCTACTCTAAGAACAGGGCTCTTTCACTGTTAGGAAGCCTCAGGCATTCAAGGAAGGACTTGTTTGTGGGTCCGGTACATCTGTTTCGGCATGTCTTAACCTGCATAACTAAAAAGCAATTATGAAAAGAAGTTCATGATCAACCAGTTCAGTGCCAGATGGAAATAGGTAGAGTGGGTCAGCTGGCAGCCCTGTACATCTGAGTGTTGACACTTGTGAATCATTCTCTGTCACTATAGTTTCCAGAGCCTTGCCAACGCAGCAGTGGGTTCAGACTTACCAAGATCCAAAAGGCTGGAAAGGGGATCTGGAGCCATGACCCATGCCTTGGGACCCACCAAACCATGACAGTGACTATTTCTATGACAAAGACATGCTCTTCAAAGTAAATATAGCCATATTCTGCCTTAGTGCCATGTTCTCTCCCATCTCTCCAAAGCAATGGCTGCACTCTGAAGGTGGGAAGAGTGTGAGAAGAAAGAGAACCAGGCCACAGACCACCAGGAAAGCAGTGATTGAAAGCAGCAGCTCTAGATTTATCTTATAGGCTCTGGAGAGTTATTCAGTTATTTACTCCACAAATATTTATTGAGCCAGGTACTATTCCAAGTATGCAGGCTACGATACTGACACAGTCACGGCCTTGACCCCATGGAGCTTAAAGTTTAGTGGGGGAGGCAGGTATTAATCAAATAAGCATCCAGACAGACACAAGGGTACAAATATGCACTATGATGGAGGAGTAGTAAATACTACAGGAACTGTAGAAACAGGGAGCAGTAAATCTGGTCAGGGAAGTCAGGAATGCTTCCCTTAGCAAGTGCCAAGTGAGGATAAATAGGAGTTCACTAGGTCTGGAGGAGAGGAAAAGAGAGGGAAACATTCCTGGTAAAGAGAACAGTGTGTGACAAGGTCCTGGGGTAGGAGGGAGATGTTGAATTGGTTCAAAAATAGAGGCAGGGAGCAAAGCAGTAGCATCAAGGTTTGAGTCCCCAGCATAAGGTAACTTCCTGTGTACACCCCTCAGTTTTCTTTCTCCTATAGAGAAGGCCTTCCTCAGCATGGAAGGAGACTAAAGCACACTGTCAAAAACTAGGAGTGCTCAGGAGAGGTGAAACACTACCAGCTAAAAAAACAAATCTGATTTACCAATAATACCTGGCCAATGTCCTCTTGAAATATCTGTAAAATTAAGGAACTCACTACCTCTGGCAACAATGAATCAGGAAAATGGCACAGTATAAAAGACCTTTAAACTTTTCAGGTAGAAATACATTTTAATGCAGGTAGAATTAAGAGATTGATGAATATGTTGAGAATTACTATAAATCTGCTTAGATACCAGGTATTTCACTGTAATTTCATATACTAATTTTTTTGGGGGGAAAAGGGCTAAAGGAAAAAACGGTTAAAGGTAGTATCAGTGCAGCACTATTTTGCTTTGGCAACTCTGTTAGCCTGTTTGATCACTTAAGGCAGTTTCTTGAGGCTTCTGAATTCTCTGTCACCTTGCAAACAAGTCACTGTAGCTTTCTATTTGTAAGGCACTTTGTAATCAATCCTTAATTGGCCTCCCTACCCCACTGAGAGATTATACTCATTCTACAGATAAAACAATAGATAAGTGAGTTGCCCAAGGGCACAAAACAATTCAGTTTTTACTTAACTTTCTTATCACCATCTCCAAATAGAAGACATACAGGGCTTGTCCAGCCACAGCACCAACCCTACTACCCTGCAGACCAATCTTAACCCCCTGATGCAGCAGAGGAAGGGACTGCAGTTAGTTCTGTTACCTCCTCTATGCCTTCGAAGCCAACAATGATAATGTCTGCACCGTGTTAGAGACACTGGCACTCCTAACCAGAACATGAACTCAAAGTGGAATAAGAGCACATACAAAGTCACAATGACTCACAAGTTAAAATGTCACCATCCCTAGGGAGACTGCACCCTCAACATACAAATACTCCCCTAAAACTTCCTTTCATCCTCAACAAGCAGCCAAGGCATAGGTCCCCAGAGTTGGCCCCCCCGCAGTACAGCACTGCACAGTACTGTAAGGCATCTTAAGTGTACAAGCCCTTCTTCACCCCATGCTGAGACCGCCAGCATCTAAAGCACCTAGCATAGGTTACAGCACACAAATATTTACTGAGCACGTCTTCCCAACTGGCAAATAAGGGAAACGGATACTCCTTTCGGAGAGTAGTGCATTTAAGAGATCAGTTAGGCCAACTCCTCATTTTACAGAAGGGCCAAAGAAGGGCAGAGAATCACCCAGTTACATATGGCCTCGTCTCCAATCCAAGTCTTTGGGCTCAGTTTAGTTCCACAAACATTTTATATGTGCCTACTATGTGCCAGATACGGTGGGTGGGAGGCGCTGGGGACACTGAGATGAGCAGGAGATGGTCTCAGCTTCAGGGAGCTCACAGCCTGGGGTAAAGAACAGTTACAAAACAGTCGGGCAAGTGCAAGGACAGGCGCTTGAGTGCTTAGCCAGTGTTTTCTCCACACTGTCCACTGGGTGTTCCCCATCTGGAAAAGAAGGACAGCAATATCTACTTCCTGCAAATAAACTTGTAGTGAAGACTGATGAGAAAATGAGTATGGAAAATGATCAGGCTCACCCAGAAATGCATTATATAAGGTATTGCCATACCCCGACACTGACTCCAAGGATTCAACAGTGTTTCAGGTATCCGTTCAACAAAGAAGCAAACAAGAGAGGGATGGGCATAAACCCAAGGAAAACCCCAAGGACTGCGAGGAACTGGAGGAAACAGCTGGGGTGGGGGTAGGGGTGTTGGGTCGAGTGAGACGGCCCTGACGCGGAGAATGGAGGGCCCGCAGCGGCGCAGAAGAGGATGGAACCGAGACGAAGAAGTTGGGACACCAATGAGGGACAGCAAGCAGAAAAGAATGGGGTTCCCTTGGGGCAGGACGGGGCTCGCGGCCGGGCCCTTCCGGCCGTGGCCGGGCAGGGGCTGAAAGCACCGGGCACGGGAGGAGGAAGCGGGCGGGCGCCGAGGCCGACTGTTTTGCCTGGGGACCGCTTGCACCCGCAGGGAGGCTCGGGCAGGCGCCCGGGTCCTCGGGCTGCAGCATCTCGCCCGCCGTGCCTCCCCGGAGCCGAACACCAGCCCGCGCCCGAGCCCGCAGCGCGGACTCCCGGGGGCGCCAACGACGCCGCCTCACCTCGGGTTGAAGTCCTGGAAGAGGCCCCTCAGGTTCATGGCGGAGAACTTCACCGCGGCGTCCTCCTCCTCCTCCCCCGCACCCCGTGCTGCACAGCCTGCGCCTTACAGCGGGTTCATGGCGCCAGCGCCAGCCGCGTCCACGCTGCTGCTCCCGCTACTGCTGCCGTCCCCGCTGCCGTCGCCGTCGCCGTCGCCGCCGCCGCCGCCGCCCGGAGAAACCTGAGCCACCGCCCCCTGCCCCTCCTTCCGGGCTTCCGTACGAGGGCCGCGCATGCGTCCGGAGCCCCGCCCAGAGCGCTCCTCGCTGGGAGGTCCCCATCCTTGTGTCCGCACGCGACCGG", - 148678216 - 1); - - var codingRegion = new CodingRegion(148679671, 148713263, 333, 1385, 1053); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 7, 148678216, 148679915, 1141, 2840), - new TranscriptRegion(TranscriptRegionType.Intron, 6, 148679916, 148681217, 1140, 1141), - new TranscriptRegion(TranscriptRegionType.Exon, 6, 148681218, 148681341, 1017, 1140), - new TranscriptRegion(TranscriptRegionType.Intron, 5, 148681342, 148681966, 1016, 1017), - new TranscriptRegion(TranscriptRegionType.Exon, 5, 148681967, 148682143, 840, 1016), - new TranscriptRegion(TranscriptRegionType.Intron, 4, 148682144, 148685652, 839, 840), - new TranscriptRegion(TranscriptRegionType.Exon, 4, 148685653, 148685736, 756, 839), - new TranscriptRegion(TranscriptRegionType.Intron, 3, 148685737, 148690313, 755, 756), - new TranscriptRegion(TranscriptRegionType.Exon, 3, 148690314, 148690521, 548, 755), - new TranscriptRegion(TranscriptRegionType.Intron, 2, 148690522, 148692969, 547, 548), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 148692970, 148693146, 371, 547), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 148693147, 148713225, 370, 371), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 148713226, 148713418, 178, 370), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 148713419, 148713568, 1, 150) - }; - - var rnaEdits = new IRnaEdit[] - { - new RnaEdit(2814, 2813, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAA"), - new RnaEdit(2227, 2227, "C"), - new RnaEdit(1032, 1032, "T"), - new RnaEdit(917, 917, "G"), - new RnaEdit(151, 150, "GCGGCGGCGGCGGCGGCGGCGGCGGCG") - }; - - const byte startExonPhase = 0; - - var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits); - - const string expectedCodingSeq = "ATGAACCTGAGGGGCCTCTTCCAGGACTTCAACCCGAGTAAATTCCTCATCTATGCCTGTCTGCTGCTGTTCTCTGTGCTGCTGGCCCTTCGTTTGGATGGCATCATACAGTGGAGTTACTGGGCTGTCTTTGCTCCAATATGGCTGTGGAAGTTAATGGTCATTGTTGGAGCCTCAGTTGGAACTGGAGTCTGGGCACGAAATCCTCAATATCGAGCAGAAGGAGAAACGTGTGTGGAGTTTAAAGCCATGTTGATTGCAGTGGGCATCCACTTGCTCTTGTTGATGTTTGAAGTTCTGGTCTGTGACAGAATCGAGAGAGGAAGCCATTTCTGGCTCCTGGTCTTCATGCCGCTGTTCTTTGTTTCCCCGGTGTCTGTTGCAGCTTGCGTTTGGGGCTTTCGACATGACAGGTCACTAGAGTTAGAAATCCTGTGTTCTGTCAACATTCTCCAGTTTATATTCATTGCCTTAAGACTGGACAAGATCATCCACTGGCCCTGGCTTGTTGTGTGTGTCCCGCTGTGGATTCTCATGTCCTTTCTGTGCCTGGTGGTCCTCTACTACATTGTGTGGTCCGTCTTGTTCTTGCGCTCTATGGATGTGATTGCGGAGCAGCGCAGGACACACATAACCATGGCCCTGAGCTGGATGACCATCGTCGTGCCCCTTCTTACATTTGAGATTCTGCTGGTTCACAAACTGGATGGCCACAACGCCTTCTCCTGCATCCCGATCTTTGTCCCCCTTTGGCTCTCGTTGATCACGCTGATGGCAACCACATTTGGACAGAAGGGAGGAAACCACTGGTGGTTTGGTATCCGCAAAGATTTCTGTCAGTTTCTGCTTGAAATCTTCCCATTTCTACGAGAATATGGAAACATTTCCTATGATCTCCATCACGAAGATAATGAAGAAACCGAAGAGACCCCAGTTCCGGAGCCCCCTAAAATCGCACCCATGTTTCGAAAGAAGGCCAGGGTGGTCATTACCCAGAGCCCTGGGAAGTATGTGCTCCCACCTCCCAAATTAAATATCGAAATGCCAGATTAG"; - - Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); - } - - //NM_017940.4 - [Fact] - public void RnaEdits_big_test_reverse() - { - //Transcript id:NM_017940.4, chrom: chr1: 16888922 - 16940100 - var genomicSeq = new SimpleSequence( - "TGATAGGCAAAAGCTTTTAATTGTATAGATTAAAATAACTTTGGACAAAAATTAAAACTCAGGCAGAGAATGTTTTTTTTTTCAACAACACACACTAGCAAAAACAAAGGCACAGTAAACATTGAGGCAGAAAGTTTCCAGCGTAGAGATATGAATATAATAATAGACACAGGCAGGGATGATTAATAAATGATAAAATGTTTACAGGATGATCATTGGAATACAGGACATTTCTAATTTTGAAAACCACCCTCCCAAATACTTCATTATAAGTAAGGTGTCTCTAAAAGGGACAGATCTCCTAGACCCCTCCTTAACCAAGTAACCAGTCCTGATATCATGATAATGCTGATGGACAAACTAGACCTTCTCTGCCCGCAGATGGGCTAAGGTTGGAAACTCACAGCATTGTCTCTGCAGTGTTCCCGGCAAAACGTTTAGGCTGAATTTAATCATGAAGACATTTTCAGACAACTTCAGAATGTAGATCATTGAGCCAGAGAGCTGACCTGTCCTCTATAAACAAGTCCATGTCACCACCATCAATGACAACAACAAAAAGATGAGGAAATATTTGGGGTTCAAAATAACTAAAGAAATGCAGCTATATTATCTTTTTACTTTTTTTGAACCCAAAATATCTCTTCTCCTTTTTGTTGTGTGATTTGTGGTGATATGGACTATGTGAAGGAGACAGGTCAGTTGTCCTGCTCAGTGTTCTACATTCTGCAGTTGTCTGGTAATTACCTCCTATGAAACTCAGGCTAAGCGTTTTCTGCAAGAACATGGCGTTGTTCATATTCTGCACCGGCAGAGTCCTGGGTGACATGCTGTCTCCTGCCAGCGGCTCCTGACTCCTGTTCTCTACAGGATGGAATCGAGAGGAGCAGGGCTAAGGCCTCCCAATGCTGTTTGTCCATCTAGCTGTGGTCTTCCTAAGTACTGACACCAATTGGAGGCTGAAGGACTGTGGCTTCTCTAACCAAAGGAGCCTAGCGGGTTAACAATTGTCAAGAGCAGTTGGTGGTTCTGAAATACAATCCTCAGCCAAGGATCCCTCCTGTGTTAAAGATGGATCAGCTAAAACAATTCAACACTGAAGATACAAAGAATGAGGTTAGGTTCATTGAAACCAGGGTAACACCTTTGGATGAGCTAAACACAAAGATGACACTGACCTTGAGCAGGTATAGAAGCTCAGAGACATGACTGCAAAATGAAATCCCTGAGGAACTTTGTAGCTACCCAGAGATAAGTGGTTCAAATTAAAATGTCTGACTGATCACTCCCGGCATGTGCTGCACAGTTATGTGAACGTGTCACACCTAACTTGGGTCCATTGTCTTCAGACTGAGCACAGGGTGCCACTGGCATGGTCTGAGAATAGGAATAGAGCCATGCCCACTGACCCATCCTATGTCTGGGCTTCCAAATGGAACTATAGTTTCATTCAAATCTTCACGTGCCTATAGGTCCTGCCTGCAGGAATGACATCTCTCGGCTTAGTAAGGGCTGCTTACTGTGGGAATATGACTCCCATCTGGAAGACCAGGTGGAGACTTGTTCCCATCAAAGTAAGAAACCTATTGTCCACGTCAAGGGCGAAGCTGATGTGCTGTTCCTCAAATGAGTAAAACACACTTCTGTAGTGCTGGAATGAGTCAGGTAGTTCAAAGTACATTGACGGAGTCGAATAACATCTATCCAGTGAGTCCTGCAAGACTTCAGGCTCTTCCACTTCCATCAGCATGCCGCTGAGCCTGGAAAAGCAGACAAAACTAAAGAAGCAGCCAGGGAAAATCAGACACCACAGAGCCCCACTAGATTTCAGAAGTAACGTAAGGAAGTGGTAAGAAAAGAAAAGGATAGATCCATTAGATCCATTAATGAGGTAAAAAAAAAAAATTATTGCCTTTATGTTGGGATAGAAAAGGGCCAGGTAGAAAACAATGAAAGAGAAAGACAGAGAGACAGAGACAGAGACAGAGACAGAGAGAAAGTGAGCTAGTGAATTGGCCAGGTGACATACTGGTAAGGGAGTAAAAGGACACTCTGAGTTAGTGCCCTCATGACACACAGCACACTGCGATCATGAAAAGAGTGAGCTCAATAGTTTTCCATAAAATATGCTCAAAATTCGATGCAGTGGCCATGAGAGTACAGCTTTTGAAGTATGGTCATCCTATGGTACGTTAGTAAATGATAAGGGGAGGAAGAAATGGAAACCTAAACATCTACTGCAATGAAAACCAACAGCAATGACAGTAGGAGTAATTCAGCCTTCGTTGAAAACATGAAATCAAACACACTCTGGTTTCCCTCAATCTGTTGCCTCCAGGTGTTAACACAGAATTAAGCATCCACAATTGCTGAAAGTTACCTGGGGCATGGTGGGTTTTGATCTTCTTCCCCTTCTTTTCTTCCCCTTCTCCTTCTTTTCTTCGTTGATCTTCTTCCCCTTCTTTTCTTCCCCTTCCCCTTCTTTTCAATTTCTGCAATAAATTCAGACATGGACAGACACATTAAGCTGATTCCCCTACACACATAACAATCCACTGTCTAACCCTCACACAGGGACCTCAGGCTCCTCAGCATAAGAATAGGAGACTGTGAGAGATATATTTCAGGAGGCCTGAAGGCTGGTCATGATAGAAATTCCTCGGTTTTTCTCCCAGAAACTGTGGGTAAAATGTCCCTATTCTAGTAGATCGTTATCCCAATATCATTTGTCCCGAGTTTGTGCAAACAGTTATGCCATATTTTTCCAATCAATTTAAAGCAAATACCCTCAAATGATTTCTAGGAGAAAAACTGCAATATTTAGCCCTGTCTCATCAAATACTCAGATTGTTCATGGTTGTGAGGACTTTAGACACTGAAATTAGAGTGAAAAAGGAAATCTACAAACCCTTGAGTCAAAATCATAGTTCTCTGAATTTGTCACATCTGCCCAGGTCCAATGTCATGAGAGTAGAATCAGAGTGCCACAGGCATGGCCTGAGACTAGGAAGAGAGCCATGCTCACTGACCCATCCCATGTCTGGGCTTCCAGTTAGAACTAGAGTTTCATTCAACCTACATGTGCCTATAGGTCCTCACTGCAGCAATGACATCTCTCAGCTCAGTAATGGCCACTTGGAGCAGGAATATGATCTTTATATGGAAGACTCAGTGGATCCTTATCACCTTCATAGAAAGGTACTCACCTCCCACGTCAAGAGAAAAGCCAACATGTTTTTCCTCCAATGCATAAAAGGAACTTCCATAGGGCAGGCAGGAGTCAGGCTGTTCAAGACAACTGGAAGGAGTTGAATAACATCTATCCAGTGAGTCCTGCAAGACTTCAGGCTCTACTGCCTCCAGCAGCTCCCTGCTGAGCCTGGAAAAGTAGGAAAAAGTAAAGAATAAGCCAGGGGGAATCAGAAACCACACAGCCCCAGCTACATTTCATGGCTAACATAAGGAACTGTTTAAACAGAAAAAGGACAGATCCATTAATGAGGTAATGAATTATTGCCTTTATGTTGGGATAGACCAGGGCCAGGTAGAAAAGAATGAAAGAGAAAGACAGGGAGAGGGAGAGGGAGAGAGAGACAGAGGAGAAAGTGAGCTCAGCGAATTGGCCGGGTGACACACTGACGAAGGGGTCAAAGGACACTCTGAGTTAGTGCCCTCGGGACACACAGAGAACAGTGATCATGAAAAGAGTGGGCTCAATAATTTTCCATAAACTTGCTTAAGATTCCATGCAGTTGCCATACAGCCTTTGAGGTATGGTCAACCTACAGTAAGTTAGTAAATGATAAGGGGAGGAAGAAATGGAAACCTAAACATCTACTGCAAGGAAAACCAACAGCAATGTCAGTAGGAGTAATTCAACCTTCGTTGAAAACATGAAATTGAACATACTCTTGTTTTCCCTGGACCTGGCATCTCCAGGTGTCAACACAGAATTAAGCATCCATAATTGCTCAAAGTTACCTGGGGCATGATGGGTCTTGGTCTTCTTCCACTTCTTGGTACTTTTCAATTTCTGCAATAAGTTCAGACATGGACAGACATATTAAGCTGGTTCTCCTACACACATAACAATCCACTGTCTAATCCTCACGCAGGGACTTCAGGCTCCTCAGCATGAGAATAGGACACTGTGAGAGATCTTCTTCAGGAGGCCTGAAGGCTGATCATGATAGAGATTCCTGGGTTTTTGTCCCAGAAACTGTGGGTAAAATTCCCTATTCTGGTAGATCGTTATCCCAAGATCATTTGTCCCAAGTTTGTGCAAATGGTTATGCCATATTTTTCCAATCGATTTAAAGCAAATGCCCCCAAATGGTTGCTGGGAGAAAAACTGCAATATTCAGCCCTGTCTCATCAAATACTCAGATTCTTCATGGTAGCGAGGATTTTAGATGCTGAAATTAGAGTGAAGGATGAAATCTACAAGATCTACAAAATTGAGACAAAATCAGAGTTGTGTGAATTTGTCACATCTGCCCAGATCCAACATCTTGAGAGTGGGATTAGGGTGCCACAGGCATGGCCTGAGACTAGGAAGAGAGCCCTGCTCACTGACCCATCCCTTGCCTGGGCTTCCAAGTGGAACTAGAGTTTCATTCAACCTACATGTGCCTATAGGTCCTCCCTGTGGCAATGACATCTCTCAGCTCAGTAAGGGCCATTTGCAGTAGGAATATGACCCTAACCAGAAGACTCAGTGGATCCTTATCACCTTCATAGAAAGGTACTCACCATCCATGTCAAGAGCCCAGCCAACACGCTGTTGCTCCAATATGTAAAAGGCACTTCTGTAGGGCTGGCATGAGTCAGTCAGTTCAAGATAACCTGAAGGAGTTGAATAACATCTATCCAGTGAGTCCTGCAAGACTTCAGGCCCTTTCTCATCCAGCAGCTCCCTGCTGAGCCTGGAACAGTGGGAAAAAGTAAAGAATAAGCCAGGGGGAATCAGAAACCACACAGCCCCAGCTAGATTTCATGGCTAACATAAGGAAGAGTTTGAAAAGAAAAAGGACAGATCCATTAATGAGGTAACAAATTATTGCCTTTATATTGGGATAGACTAGGGCCAGGTAGAAAAGGATGAAAGAGAAAGACACACACACACACACACACACACACACACACACACACACACACACAGAGTGAGCTCAGTGAATTGGCCAGGTGACACACTGATGAGGGAGTCAACGGTCATTCTCTATTTGTGCTCTCAGGACACACAGTGAACAGTGATCATGAAAAGCATGGCCTCAATAATTTTGCATAAAATGTGCTCAAGTTTCCCTGCAGCCACCATGAGAATACAGCTTTTGAGGTATGGTCAACCTTCACTAGGTTAGTAAATGATAAGGGTAGGAAGAAATGGAAACCTAAACATTTACTCTAATGAGAACCAAAAAGCAATGTAGTAGGCATAATTTAGACTTGTCTGACAAGACAAAATCATTATTTTCAGCATGTACTGTTTTCCCTGGACTTGGCATCTCCAGGTGTCAACATCAAATTAACTGTCCACAATTTCTCAGACTCACCTGGGACCTGTTGCCTCTTGGTCCTCCTTTTTCACTTGATCCCACCGATGTCCTGCAAATAAATTCAGATGGGGCCTCTTACATTAAGCAGTTCTTCCTTGCACACAGAAACATTCCTCTGTCCAATCCTAACACAGGTACATCAGTCTGGTCAGTGTGAGAACAGGAGACTTTGAGAGAAATATTCCAGCAGGCCTGAGGTCAAGTCTTGAGAAAACTGGCTTGGGTTCTTTCATGAGCCTTGGGCAAAATTACCCTGTTTTGGAATGTTATCTTCCCTATGTGCTCTGTCCTAGGTTTGTGTACACAAATGAGCAACTTTTTCCCCAATAAATTGTAGGCAAATAGTTCTAACACCTCATAGGAGAGATACTTCAATATTAAGCTTTCTCTCATCAAATACCCAGAATTTGATAGTTTATGAGATTGTGGACACAGAGATTTGATGAAGGGGTGCAATGTACCAGCTCTTGAGTCAAAATGAAACTTGGTTCTACACAGAAGCATCAGCTATTATGGCTTTTGTGGGTGAAAAGTCAGCCATTTATCTAGAAAACATACCAGGAACATGACGGACAGATGAGCTAAAGCAAGCGAACTTAGAAGACACAGAAAATGGGAATAAATTCAGTGAAACCTGGGCCACATCTTTCACTGAGAGGTAGACAAGGGTGACACTTGCCTTGGGCAGGTAAAGAACCACACAGACATGCTTTGGGAACAAAACTCATAAGGAATTTTGTAGCTGGCAAGAGACATTTAATTCAGATGAGCTGATCTGACAGACAACTCCTGGTCATGTGCTGCATAGTTTGGTGTGAGCTTGCCACACCTGCCTTGAGTTCAATGTCGTGACAGTCAGTCCAGGTTGGCACGGGCATGGCCTGAGACTAGGAAGAGAGCAAAGCTCACTCACCCACCCCATGCCTGTGCTTCAGACTCGACTCCAGAGTGATTGAAATCTACATTGATATATAGGTTCAGCCCACAGTGATGGCAAATCTCAGCCCAACAAGGGGCACAAGGCCCAAAGATTATGGGGTCTACCTGGGCCATGAACTGGAGCTTTATCACCTTCACAATGGAGTACTCACCGCCTATGTCAACAGCCATGCAGACTTGCTGTTCCTCTAATGAGTGAAATGTGCCGCTGTAAGACTTGTACGAGGCCAACATTTCAGGAGGAATTGAGAGAGTCGAATAACCTTCATCCCAGGACTCCTGGGGGACTTCCTCCTCTTCAGACTCCTGCAGATTCCTGATGAGCCAGGCAGGACAGGGATGATAGAAGATTTAACCAACAGACATTAGACAACAAAACCTCCCAGATGATCTGATGGGAGACAGAATGGAGTGGTCACAGAAACCAAAGGCATTTTTCCTTCAAGAGAAATAAAACTAGCCTTCTAAATACAGGGTGGAGGGTGACTGCTCTGGGGACAGAGCAAAAATGGGCAGCATGTGCTCAGTACATTTGCCACAGATGAGCCAACTCAGGGCACCCAGACTCTCCCTGTAAACTACCATCATGACTTGCAGCACAGAGAACTGACACAGGGCTTCAACTACTTTGCATAAATTGGGTTGAATTTTACATGCAGCATTCAAGTGAAGAGAGTTCTTGACACAGTGCAGACACAGATCTTGTGTATTAAGGGCCCCATTTTCCCAATATTTTGATATAATATATTTACCTTTTCAATTTCTTTTCTTGCAAAAATACTAGCCAACATACTACCAACAGATAGGAAGAAAGCATATATACATCTCTCCCTGGATTTAAACACATGGGAGAGAATAGGCAACACCAAGAAATCCCTGTTTGAGGGTCTGGAGTGGACTTCCAGCAAACTCCAACAGACCTGAAGCTGAGGGACCTGATTGTTAGAAGGAAAACTAACACACAGAAAGGAATAGCATCAACATCAACAAAAAAGACATCCATCCCAAAACCCCATCTGTAGGTCGCCATCATCAAAGACCAAGGGTAGATAAAACCACAAAGGTGGGGAGAAACCAGAGCACAAAAGCTGAAAATTCCAAAAACCTGACATCCCTTCTCCTCCAAAGGATCACAGCTCCTCGCCAGCAATGGAACAAAGCAGGATGGAGAATGACTTTGATGAGCTGACAGAAGTAGGCTTCAGAAAGTCGGTAATAACAAACTTCTCTGAGCTAAAGGAGGATGTGCGAACTCATCGCAAGGAAGCTAAAAACCTTGAAAAAAGATTAGACGAATGGCCAACCAGAATGAACAGTGTAGAGAAGACCTTAAATGACCTGATGGAGCTGAAAACCATGGCACGAGAACTACGTGATGCATGCACAAGCTTCAGTAGTCAATTCGATCAAGTGCAAGAAACGGTATCAGTGATTCAAGATCAAATTAGTGAAATGAAGCGAGAAGAGAAGTTTAGAGAAAAAAGAGTAAAAAGAAATGAACAAGCCTCCAATAAATATGGGACTATGTGGAAAGACCAAATCTACGTTTGATTGGTGCACTGAAAGTGACGGGGAGAATGGAACCAAGCTGGGAAACATTCTTCAGGATATTATCCAGGAGGACTTCCCCAACCTAGCAAGGAAGGCCAACATTCAAATTCAGGAAACACAGAGAACACCATAAAGATACTCCTCGAGAAGAGCAACCCCAAAACACATAATTGTCAGATTCACCAAGGTTGAAATGAAGGAAAAAATGCTAAGTGCAGCCAGAGAGAAAGGTCGGATTACCCACAAAGGGAAGCCCATCAGACTAGCAGCAGATCTCTTGGCACAAACCCTACAAGCCAGAAGAGAGTGGGAGCAATATTCAACATTCTTTTTTTTTTCCATATGTATAGTTTTCCTTTATTATTTTTTGTGTGTATGTATATATATGTATATATATTTTTCAATACTTTAAGTCTTAGGGTACATGTGCACAACGTGCAGGTTAGTTACATATGTATACATGTCCACATTGGTGTGCTTCACCCATTAACTCATCATTTAACATTAGGTATATCTCCTAATGCTACCCCTCCTCCCTCCCCCCACCCTACAACAGGCCCCAGTGTGTGATGTTCCCCTTCCTGTGTCCATGTGTTCTCATTGTTCAATTCCCACCTGTGAGTAAGAACATGCGGTATTTCGTTTTTTGTCCTTGCGATAGTTTGCTGAGAATGATGGTTTCCAGCTTCATCCATGCCCCTACAAAGGACATGAACTCATCATTTTTTATAGCTGCATAGTATTCCATGTTGTATATGTGCCACATTTTCTTAATCCAGTCTATCATTGCTGGATATTTGGCTTGGTTCCAAGTCTTTGCTATTGTGAATAGTGCCACAATAAACATATGTGTGCATGTGTCTTTACAACAGCATGATTTATAATCCTTTGGGTATACACCCAGTAATGGGATGGCTGGGTCAAATGGTATTTCTAGTTCTAGATCCCTGAGGAATTGCCACACTGTCTTCCACAATCGTTGAACTAGTTTACACTCCCACCAACAGTGTAAAAGTGTTCCTATTTCTCCACATCCTCTCCAGCATCTTCAACATTCTTAAAGAAAAGAATTTTCAACCCAGAATTTCATATCCAGCCAAACAAAGCTTCATAAGTGAAGGAGAAATAAATCCTTTACAGAGAAGCAAATGCTGAGAGATTTTGTCACCACCAGGCCTGCCTTACAAGAGCTCCTAAAGGAAGCACTAAACATGGAAAGGAACAACCGGTACCAGCCACTGCAAAAACATGCCAAACTGTAAAGACCATTGACGCTAGGAAGAAACTGCATCAACTAACGGGCGAAATAACCAGCTAACATCATAACGACAGGCTCAAATTCACACATAACAATATTAACCTTAAATGTAAATGGGCTAAATGCCCCAGTTAAAAAACACAGAATGGCAAATTGGACAAAGAGTCAAGACCCATCAGTGTGCTGTACTCAGGAAACCCATCTCACATGCAGAGACACACATAGGCTCAAAATAAAGGGATGGAGGAAGATCTACCAAGCAAATGGAAAGCAAAAAAATGCAGGGGTTGCAATCCTAGTCTCTGATAAAACAGACTTTAAACCAACAAAGATCAAAAGAGACAAAGAAGGCCACTACATAATGGTAAAGGGATCAATTCAACAAGAAGAGTTAACTATCCTAAATATATATGCACCCTATACGGGAGCACCCAGATTCATAAAGCAAGTCCTGAGAGACCTACAAAGAGATTTAGACTCCACACAATCATAATGGGAGACTTTAACACCCCACTGTCAATATTAGACAGATCAATGAGACAGAAGCTTTACAAGGATATCCAGGACTTGAACTCAGCTCTCCACCAAGCAGACCTAAAAGACATCTACAGAACTCTCCACCCCAAATCAACAGAATATACATTCTTCTCAGCACCACATCACACTTATTCCAAAATTGACCACATAGTTGGAGGTAAAGCACTCGTCAGCAAATGTAAAAGAATGGAAACCACAACAAACTGTCAGACCACAGTGCAATCAAATTAGAACTCAGGATTAAGAAACTCACTCAAAACCGCACAACTACATGGAAACTGAACAACCTGCTCCTGAATGACTACTGGGAAAATAACAAAATGAAGGCAGAAATAAAGATGTTCTTTGAAACCAATGAGAACAAAGACACAACATACCAGAATCTCTGGGACACATTTAAAGCAATGTGTAGAGGGAAAATTATAGCACTAAATGCCCACAAGAGAAAGCAGAAAAGATCTAAAATTGACACCCTAACATCACAATTAAAATAACTAGAGAAGCAAAGCAAACAAATTCAAAAGCTAGCAGAAGACAAGAAGTAACTAAGATCAGAGCAGAACTAAAGGAGATAGACACACAAAAAACCCTTCAAAAAATCAATGAATCCAGGGCTGGTTTTTTGAAAAGATCAACAAGAAAACCCTGTTTGGCTAGTTCACCTGGCTCATCTGATGGCAAGTTCCTATCTTGAGAGGACTATGAAATTAAAACCAATACAAGTGCCACAAATAACATACAACATTGTAAATCAGCACAATTTGTAGCTGGGTGAATGGAAGAAATAGTTCTATTCATCACTTCCTCATTTTCCCTAAATCTACAATCTCCAGATGTCACTACTGAATTAACAGCCAACAATTCCACAACATTACCTGGGAGACACTGGCCCTTTTTCTTCCTCTTCCTCATCATCACTTTCATTTTCTGTAAATAAATTCAGAGAAGCAGGTCACATTAAGCAATTCATACTTCACATATGACCAAATCACTGTCCAGTCATAGCACAAGGACATAACTATTCTCAGTGCAAGAATAAGGATTCTGACAGGAATATTCTAGGGTGCCCTAGATTAACTTTGGTGAGAATTAGATGACCCTGCTTTCCAGACCCACAGGCCAAAATCTCCCTCTACGTGTAGACCATAATGCCATATTCCCTGCCTGAGTCAAAGTTAAACAAAATTTTTTCCCCAAAAAAATCTCCAAAAATTGGTCCATTTTCTAAGAGTGTTGCTGCAATACGGACTTATATCACCAGATAACATGGACATTAAATGTTTAGAGGCATCTATACATGAAACACACATGATAGATAAATTTGAACAACTCTTGCTTTAAAAAGAATCTGTGATTTGGGAGGCCAAGACAGGTGAATCATTTGAGGTCATGAGTTCAGGACTACCCTGGCCAATATGGGGAAACCCTGTCTCTACTAAAAATACAAAAATTAGCCAGATGTGATGTTGTGCACCTGTGGTCCCAGCAACTCAGGAGGCTGAGGCAGGAGAATCACTTGAATCTGGGAGGCAGAGGTTGCACCAAGCCAAGATGGTGCAACTGCACTCTAGCCTGGGTGACAGAGCAAGACTCCATCGCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATCCACGATGCTACAAAGAAACATTGGATCAGCCATTGCATTGACAGGGTGGAGAACCAGGGTCCAGCCTTGCTTTATGGAAATATATCAGCAAAGTAAAGAAGAAAAGTTTCCGTCCTGATTTCAGGGTGACTGTGCAGCTAAGCAAGCTGACTTAAAGGAGATCCGGATGAAAGCTGAGAGCAGTGAAGCCTGGGGAACAATATTTCCAAATACAAAGGCAAGGCTGCCAGCTTCCTGAAACAGGCATAGAAACTCCATGGACATTGTTCAGGGACAGATGACTTAATCACAGATGACAAGAGATACTGAATCGAAGCTAGGAGGCCTGACAGATACTGCCTGTGCACCTCCTGCACTCAGGTGACTATGAGATTGTCACACTTGCCTGGGGTCGAGTAACTTGATACTGGGGACTGGCAGACAAAGGCATGACATTAGCTGAGAAGGACAAAAAAACTCCCTGATATCTGTTTAGAAACCCATCATAGTTTTTTATTCAAATGAATTTGTGTTTATAGAGCCTGTCTTCAGAGTTTATCTTCCTCAGCCTAGAGAGAGGTATGAGACACAAGGAAAACAGAGGCTACCTGGGATAATGTGTACAGCATCCTCCCATTCAACATGAGAGGATGAGCCAATGAGAGTTGAGTCGACTTTGTCTTCCTCAAATGTGATTTTGGTTTTCCTATGTGGCTGGTTGGAGTCATAAGGGCCATGGCTATTTGAACAAGTGATGGCACATTCCTCCAGTGAGTCCTCAGGGACTTCCTTTTCTTCAGCCTTCGGCATCTCCCTGATGAGCCAGGTGGGACAGAGATGACAGAAGATTAAACACAGAGGGATTGGACCCCAGGGAGTCCTAGCTGGTTTTGACAGGCGGCATTAAGAGAGTGGTCCCAGAAAGCAAAATGGAGGTTCCCTTTAAGGGGGAACATGCAATCCTGTTCTCTCTGCAACAGAGCATGGCTGCCATGGGAACCAGAGAGGAAGAGAGCAGCTGGTGTTCATTGCAGTGGACAGATAGGAGCTGAGGAGGATGAAGACTCAGCTATCCCTGTATGGTGCAGACATGACACTCGGCACACATAGAGAAACATGACAGCTGCCGCACCCTGTGTCTAAGCTGGGTTATATTTCACATACTGTGGCCAAGCAAATGCGGGTTTTTGGCCCATCATAGATGCCAGAGAGGGTGTACCTCCTAGATATTCTTCATATGTTACCATCCATTACTTGTTCCTGAGTATTCAGTGTTACCTGGGGGCAGACGATTTCTGCACTTTCTCAGCCACCTCAACTTGAACATCTTCATCGTCATCGTTGTCATTTTCTGTAAATACAGAAGTGTTCGTTCAGATATTTCCCACTTCACAGTCTGCAAGCACAGTCAGCCCAATGTGCAACAGAGACATGAACATCTAGGCATGGGTCACCGTTCAACTGAAAACTCTCATGTTTTATCTTTAACAGAATGCCCTGGCATGGTTTCCTGATCCATCAGGCAATGCATTTCTGATCTGGAGGGCCACCATCAAGATGTGGCCAAATATTGAAAAGACCTTTTGCTTCCCATATCACTGGAGGCTTGTGCAGCCTCTCTCTGGACTTTGGCAGCTGTCGCCCCCATCCTGCCAGATCTGATTCCCAGGCACAGGCTTGGTGTCCTGTCACAGTTTGCATTTCAAACCTAATTCTTTCTCTTAGAAGCAGACAAACTTATCCCACAGTCCTCTATGCATCAGAAGATTTCAAGCCTCCAAGTGGCTTCTGCTGTGTTATTCAGGGACATTCTATCCATGGGGAGTGCTCCAGTCTGAAGCACTTCCTACCACGAAACGCCACCACATAAAGTGCCTTCTCCAACATCACACGGCGAGGGGCTTCATCTCATTTTGGAAAGCAGTTTTAAGTGTTCCCACATTTGAATGCTTCAGACCCTTGCAAGAGACAATTTGCCATGGAGAGAGAGAAACTCAGGAAAGACAAGTCATTCAGTCACTGACAGTTACTAAGAACATTGCCGAAAAGACACCCTGGGAACCTTCATTCTTAGTCCAGAGCTCTTTTCACTCTAACAAGCCTGCTCCTATCGCAGCCTCCTTCCTGTCCTTTAAAACTAGATAGATGCTGCCTCTTACTCCAAAGACAACCTTCCATCAAGGGAGGAGGGACAATTGCAATACTGTGACCTCCAACCCCATGGGTTTCCCAACTCCGTTCTTACCCAGGAAGTCCTGGTCATGTCATGGCCACATAAGCTTAGTGGCAAAAAACACCATTGATACAACTGTCATTGTGAAAGTATGGAGGTCTGGAGTCTCTCATAAGCCTGGGGTTTTGGGTCATCAGGGCCTATGGCCACCTTACCTGGGCTGAGCTTTTGGACAAGGTGCTGTGCCAGTCTACACCCCTCAGCCAGCTGTTCTTGGAGGTCCTGCCCCTGGGACTTGTCTGGCTCATCCGGAGTGAGGAGGGCCTGGAGATGCTGATTCAATGAGCAGGAGGCATCTCTCCCTTCCCGTAACTTCTCCCTTAACTGGGTCAGCTCTCGTTCCTGAGAGTGAACCAGGACTTTATATTGCCTAAGGTGAGACGGTAGAGAAAATTTAAGAGTGGAAAGGGTTGAGTGATCCGCTCAAATATTGCAACAGAGATTTCTGAGACAATGTCCTCAAGGAGACCTCCAAGCAGAAGGTCAGCACATGTTGGAAGGAATGTCTGTGGCTAAGAGAAAGAATAGAAAATGGTTTACAGGTTTCCTCTGTATCAGAGAGGGCTCCTGCAAGATCCTCGATGATGTTCCATTCATCTTTCCCTTCTGTAAACAAAAGTAGGTGTCTTCCTAATTCCATTTCAAAAAGACATCCTTTCAGTCCCTCACTCTGGCCATGGACATTTCCATGTGAAAATACACATAGTGCATCTTGCGGCCACTAGATACAAAGCCATGTACAGAAATGAGGCCAGGTGCAGATGGGGCGAATTGAAAAGATGAAAGAAGAAAAGAATGACAGGGTCGAGAAGGCAACATTGATTGAGTGAAAGAATGAGAAGACGCAGTCAGTCAGAAGGTGATTCTCACTAAGGGTAAGTGGGGTGGTGATGGCACACCATTTTGAGTATACTGAATGCTGCTGTGTGGTTCACACTCCTTTGGTTAATTTTGTGTTATGTAAATTTCACATCAACAATTACTTGTTTGAAAAAGAGAAAACAAGGCTCTGAGAAACAACTGCAACCCATAAATTTTTATTATCCTTCTTCTCTGCTTGATAAATACTTGTGTGTTGCGAGCCTGCCATGGCAATTCCTGCCCTTCCCCTGGCCCAGCTTAGTTCTTAAGTCTCCCCACTGAGCTGCTGTACTTCAGAGATTTACACACCTGCCCCCCTGCCTGCCCCCATGGGGTCCCCTCACCTGAGCTCCTCAGCTTGCTTGAGCTGCTCTGCAAGCTTCTCCTCCTTGAACTGTCGCTCATTCCTCAGCATAGATTTTATGAGGTCTTTGCACTCTTCATTTTCTGAGAAAAGACAGACACGCCTGCCTCAGTGGAAGGCTGGACATGCTGCTGTGGTCATTGCCTACAGGGCAGGAGCCAGGTCCATCCCAAGGACAAAACTCTCCCCAGTACCAGGGTCTAGACAGGGATTTCCACATCTTTACTCTTCAGTCTCCTGACTTTCTGGCATCTGATCCTCCAAAATTTAGAGATGAAGAGAACCTCAATGGCACATCAAGGAAGTTGACAAGATGATTCAACCACAACGAAGTGGAGTCAGAATTCACAGCCCCTGAGGTCTGACTCTGAATGCAGGGCCACTTTCCCAAGACTTGCAGCCTCTCCTCTAAAACACTGCACTGGGGCATGAAGTAGTGATTTCTTGTACAGTCGGGAAGGCCCCTAGGACTATGGGACTGATGGTTTCCCTTTTACTGGGAATTTCAAGGACAAGTATGCAAAAGATTTTAAAAATCTTTGATTTTTAAATCATATCTTCAGTTATGATTTTAAGAATCATATCTGAAGCATAAAGTGTGACACATAACACCATAAGGCCATGAAGGAAATATGCCCAAATGTTAATAAAGTTTGTGTTAATTTAGAAACAGCAGAATGAAGAACTAATAGATAGTGTTTACTGTGTGCTAATAAATGTTCTAGGAGATTGACAAGAAATAGCTCATGTAATTCACTGCAGCAATTTACAGAGGTAGGTATTATTGTAGTACCCTCTGAACAGGTGAGGAAACAGGGACAGAAAAGACAAGCAACTTGGATGGAGCCCAGGAGACAGGCCCACGGTCTCTGCTCTGTACACTGCACTGCTATCTCCACACATTCTCGGGTGCGATCTTTCTTCCTCTTTAGGAACAAGACTCTGTGCCCCAGGAAGCAGGACTTCACTCTCACCAAGCTACATTCTGCTTCTTATTCTTATTTTTATTTATCATTATTAGTATTATTTTTTTAACAGTCTTGCCCTGTCGCCCAGGCTGGAGTGCAATGGCAAAATCTTGGCTCACTGCAACCTCAGCCTCCTGGGTTCAAAGGATTCTCCTGCCTCAGCCTCCTGAGCAGGGGTGATTACAGTCACCTGCCACCATGCCCATCTACTTTTTGTATTTTTAGTGGAGATGGGGTTTCTCCATGTTTCCCAGGCTGGTCTCAAACTCCTGACCTCGTGCTCTACCCGCCTCAGCCTCCCAAAGGGCTGGGATTACAGGAGTGAGCCACCATGCACAGCCCCTACTCCCTGCTCTTGATGCTGTCACTTATAGATAGCACAGGTTCTATTAGGAGCAGACTCCTCTTGAAGCCCCTCAGAGCAGGTACTGGCTACTATCACCAAGTTTCCCTCAGAGTCACTAGAACAGAGCTTTGCATATTGGGCCTCAACAGAAACTTGAACTGAATAAAAGTTCACTAGTCTCAGACATTTAGAACAACAGACTAGATGTTATTTGTCTGCAGGATCTTACATGGTACAGAGAGGATTCTTGAAAACATGATTGAGCCTCTTGGAGAAAACAGGTCATTCTGTGCCTGTGTCAGAAATCAATAAATGGCAGTTTAACTCTAGTCCCACCCCCACCTGATTGCAAACATGGAAAGTTGCTAAATATTTTGGGACCTCTGTCTTCCAACTTTAACAAAATGTTAAAATACCCATTTCTGTTTTCCTAGAAGTATGGGGAGGATGACATTATTTTAGATGGAGAGAGCACTTAGTTTCTCAGAGAGAAGACAGGACTTCGTTCATCACTTTCGTGATGGTGAGCCTATAGATCTTACTGTATTTGTTCTGCTGGTTGGCCAGGAAGCAGGCCAGTTGAGTTACAAAACATTTCTCTTTGAGGTTTCTGAACTGCTGTTTCTTCTCTGCCAGCTGGGGATGCAATTTCTCGTTGATTTCTAGAATGTTCATCTCTGCCTTCTCGCTGGACAAAGGGCCGGCTGATACCACCATGCTGACGTTTGTGGCAGAAGAGGTGGGGCCAGGGACTGGGGAGAAGAAAGGCAAACACATGATGGGTTAAAAACTGGTGAAATCAAATAGGTTTAATCACACTGAGGGATGTCAGCGGCAGCCTTGTCTACTTATTTGAAGATGATGTTTCCCTGGTTTCACTCTTGTCATCTCCAGTCTTGATCTCCTTTAAGTCAACTTATCTTAGCTATGCAGTCACCTTGAAACCAGGACATAAACACTTCTACACTTTTCTTGCTTATAAGTTTCTATAAAGCAAGGCTTGGCCCTGAGATTTTTACCCCATGAGTGGCCAATGTTTCTGTGTAGCACAAAAGGTTTCATTTTGCCTTTTTAATTTTTTTCTTTTTTGGTTTTTTGTTTTTTGTTTGAGACGGAGTCTCACTCTGTCACGCAGGCTGCAGTGCAGAGGCACAATCTCAGCTCACTGCCACCTCTGCCTCCCGGGTTCAAGCGATTCTCATCCCTCAGCCTGCCAAACATCTGGGATTACAAGCGCCAAGTAACATGCCAGCTAATTTTTGTATTTTTAGTAGAGATGGGGTTTCGCCATCTTGGACAGGCTGGTTTCGAACTCCTGACCTCAGGTGTTCCGCCCACCTTGGCCTCCCAAAGTGCTGGGATTAAGATGTGAGCCAGCACCCCCGGTCAGAGACTTTTTTTTTTTTTTTTGAGATGGAGTCTCGCTCTGTCTCCCAGGCTGGAGTGCAGTGGCACAATCTAGGCTCACTGCAAACTCCGGTTCCTGGGTTCATGCCATTCTCCTGCCACAGCCTCCCGAGTAGCTGGGACTACAGGTGCCCAACACCGTGCCCAGCTAATTTTTTTTTTTTGTATTTTTAGTAACGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCACCCGCCCCAGCCTCCGAAAGTTCTGGGATTACATGTGTGAGCCACCGCGCCCGGCCGAGACTTCTTATTAATAGCTAAGACAAGCCAATGAAAAGGAGAGAGAGTCTAGCCTGAGAGGAGTGAACCAGGGTGGGAGGATCGTCTCAGCCGATCCTCCCACCTAAGTCTCCTGAGCAGTTGGGACTAGAGGCACGCAGCACCATGCCTGCCTAATTTTTTGTATTCTTTGTAAAGATGGGTTTCACCATATTGTCCAGGCTGGTCTTCAACTCCTGAACTCAAGTCATCCTCCCACTTGGGCCTTCCAAAGTGCTGTGATTATATGTGTGAGTCACAGAACCTAGCTCCATCCTAGTTTCTGACTAAAACAATATGTGCGTATACAGCCTGTCCTCAGAATTGATCTTCCATAGCCTAGACAGAGGTATGAGACACAAGGAAAATAGAGGCTACCTGGGAGAATGTTTACAGCATCCTGACATTCATCATGAGAGGATTCTCTGTCTACAACCAGAGCTGAGTTGACTTTGTCTTCCTCAAAGGTGATGTTGATGTTCTTGTGAGGCTGGTTGGAGTCACAAGGGCCGTGGCTATTTGAACAAGTGATGGCACATTCCTCCAGTGAGTCCTCAGGGACTTTGCTTTCTTCAGCCTTCTGCACCTCCCTGATGAGCCAGGTGGGACAGAGATGACAGAAGATTAAACACAGAGGGATTGGACCCCAGGGAGTCCTAGCTGGTTTTGACAGGCGGCATTAAGACAGTGGTCCCAGAAAGCAAAATGGAGGTTCCCTTTAAGGGGGAACAGGCAATCCTCTTCTCTCTGCAACAGAGCATGGCTGCCATGGGAGCCAGAGAGGAAGAGAGCAGCTGGTGTTCAGTGCACTGGACAGATAGGAGCTGAGGAGGATGAAGACTCAGCTATCCCTGTATGGTACAGACATGACACTTGGCACACATAGAGAAACACGACAGCTGCCACACCCTGTGTCTAAGCTGGGTTGAATTTCACATACTGTGGCCAAGCGAATGCGGGCTTTTGGCCCATCATAGATGCCAGAGAGGGTGTGCCTCCTAGACATTTTCATATGTTACCACCCATTACTTGCTCCTGAGTATTCAGTGTTACCTGGGGGCAGATGATTCCAGTACTTTCTCAGCCTCCTCAACTTGAACATCTTCATCCTCATCTTCGTCATTTTCTGTAAATACAAAATGTTCGTTCAGATATTTCCCACTTCCCATTCTCCAAGCACAGTCAGCCCAATGTGCACAGAGACATGAACATCTATGTGTGGTTCAGCATTGTACTGAAAACTGTCATGTTTTATCTTTCACAAAATGCCCTGGCATGGTTTCCTGGTCCATCGGGCAATGCATTTCTGATCTGGAGGGCCACCATCAAGATGTGGCCAAATATTGAAAAGACCTTTTGCTTCCCATATCACTGGAGGCTTGTGCAGCCTCTCTCTGGACTTTGGCAGCTGTCTCCCCCATCCTGCCACAGATCTGATTCCCAGGAACAGGTTTGGTGTCCTGTCACAGTTCGCATTTCAAACCTCATTCTTTCTCTTAGGAGAGGACAAACTTGTCCCACAGTCCTCTATGTGTCATGAGACTGCACAGGCCCTCCATGTGGCTTCTGCTGTGTTATTCAGGGACATTCTATCCATGGGGAGTGCTCCAGTCTGAAGCACTTCCTACCACCAAATGCCCCCACATCAAGTGCCTTCTCCAACACCAAACGGAGAGGGGCTGCATCTCATTTTAAAAAGCATTCGTAAGTGTTCCCATATTTGGATGCTTCAGACCCTTGCAAGAGACAATTTGTTTGCCTTTGCAGATGGAGAGAGAGAAACTCTGGAAAGATAAATCACTCACTCACCGACAGTTACTAAGAACATTGCCAAAAAGACAGCCTGGGAACCTTCATTCTTAGCCCAGAGCTCTTTTCACTCCAACAAGCGCCCTCCCATCACAGCCTCCTTCCTGTCCTTTAAAACTAGACAGATGCTGCCTCTTGCTCCAAAGACCACCTTCCATCAAGGAAGGAGGGACACTTGCAATACTGTGACCTCCAACCCCATGGGTTTCCCATCTCTGTTCTTACCCAGGAAGTCCTGGTCATGTCATGGCCACATATGTGTAGCAGAAAATAACCCCACTGATACAACTGTCATTGTGAAAGTATGGAGGTCTGGAGCCTCTCATAAGCCTGGGGTTTTGGGTCATCAGGGCCTATGGCCACCTTACCTGGGCTGAGCTTCTGGAAAAGTTGCTGTGCCAGTCTACACCCCTCAGCCAGCTGTTCTTGGAGGTCCTGCCCCTGGGACTTGTCTGGCTTATCCGGAGTGAGGAGGGCCTGGAGATGCTGATTCAATGAGCGGGAGGCATCTCTCCCTTCCCGTAACTTCTCCCTTAACTGGGTCAGCTCTCGTTCCTGAGAGTGAACCAGGACTTTATATTGCCTAAGGTGAGACGGTAGAGAAAATTTAAGAGTGGAAAGGGTTGAGTGATCCGTTCAAATATTGCAACAGAGATTTCTGAGACAATGTCCTCAAGGAGACCTCCAAGCAGAAGGTCAGCACATGTTGAAAGGAATGACTGTGGCCAAGAGAAATAATAGAAAATGGTTTACAGGCTTCCTCTGTATCAGAGAGGGCTCCTGCAAGATCCTCGATGATGTTCCATTCATCTTTCCCTTCTGTAAACAAAAGTAGGTGTCTTCCTAATTCCGTTTCAAAAAGACATCCTTTCAGTTCCTCACTCTGGCCATGGACATTTCCATGTGAAAATACACATAGTGCAACTTGCAGCCACTAGATACAAAGCCATGTACAGAAATGAGGCCAGGTGCAGATGGGGCGAATTGAAAAGACGAAAGAAGAAAAGAATGACAGGGTCAAGAAGGCAACATTGATTGAGTGAAAGAATGAGAAGACGCAGTCAGTCAGAAGGTGGTTCTCACTAAGGGTAAGTGGGGTGGTGATGGCACACCATTTTGAGTATACTGAGTGCTGCTGTGTGGTTCACACTCCTTTGGTTAATTTTGTGTTATGTAAATTTCACATCAACAATTACTTGTTTGAAAAAGAGAAAACAAGGCTCTAAGAAACAACTGCAACCCATAAATTTTTATTATCCTTCTTCTCTGCTTGATAAATACTTGTGTGTTGCGAGCCTGCCATGGCAATTCCTGCCCTTCCCCTGGCCCAGCTTAGCTCTTACGTCTCCCCACCGAGCTGCTGTACTTCAGAGATTTACACAGCTGCTCCCCCACCTGCCCCCATGGGGTCCCCTCACCTGAGCTCCTCAGCTTGCTTGAGCTGCTCTGCAAGCTTCTCCTCCTTGAACTGTCGCTCATTCCTCAGCATAGATTTTATGAGGTCTTTGCACTCTTCATATTCTGAGAAAAGACAGACACGCCTGCCTCAGTGGAAGGCTGGACATGCTGCTGTGGTCATTGCCTACAGGGCAGGAGCCAGGTCCATCCCAAGGACAAAACTCTCCCCAGTACCAGGGTCTAGACAGGGATTTCCACATCTTTACTCTTCAGTCTCCTGACTTTCTGGCATCTGATCCTCCAAAATTTAGAGATGAAGAAAGGGAACCTCAAGGGCACATCAAGGAAGTTGACAAGATGATTCAACCACAACGAAGTGGAGTCAGAATTCACAGCCCCTGAGGTCTGACTCTGAATGCAGGGCCACTTTCCCAAGACTTGCAGCCTCTCCTCTAAAACACTGCACTGGGGCATGAAGTAGTGATTTCTTGTACAGTCGGGAAGGCCCCTAGGACTATGGGACTGACGGTTTCCCTTTTACTGGGAATTTCAAAGACAAGTATGCGAAAGATTTTAAAAATCTTTGATTTTTAAATCATATCTTCAGTTATGATTTTAAGAATCATATCTGAAGCATAAAGTGTGACACATAACACCATAAGGTCATGAAGGAAATATGCCCAAATGCTAATAAAGTTTGTGTTAATTTAGAAACAGCAGAATGAAGAACTAATAGATAGTGTTTACTGTGTGCCAATAAATGTTCTAGGAGATTGACAAGAAATAGCTCATGTAATTCACTGCAGCAATTTACAGAGGTAGGTATTATTGTAGTACCCTCTGAACAGGTGAGGAAACTGAGGGACAGACAAGACAAGCAACTTGGATGGAGCCCAGGAGACAGGCCCACGGTCTCTGCTCTGTACACTGCACTGCTACCTCCACACATTCTCAGGTGCGATCTTTCTTCCTCTTTAGGAACAAGACTCTGTGCCCCAGGAAGCAGGACTTCACTCTCACCAAGCTACACTCTGCTTCTTATTCTTATTTTTATTTATCATTATTATTATTATTATTATTATTATTATTTTTACCAGTCTTGCCCTGTCACCCAGAGTGGAGTGCAATGGCAAAATCTTGGCTCACTGCAACCTCAGCCTCCTGGGTTCAAAGGATTCTCCTGCCTCAGCCTCCTGAGCAGGGGTGATTACAGTCACCTGCCACCATGCCCATCTACTTTTTGTATTTTTAGTGGAGATGGGGTTTCTCCATGTTGCCCAGGCTGGTCTCAAACTCCTGACCTTGTGATCTGCCCGCCTCAGCCTCCCAAAGGGCTGGGATTACAGGAGTGAGCCACCATGCACAGCCCCTACTCCCTGCTCTTGATGCTGTCACTTATAGATAGCACAGGTTCTATTAGGAGCAGACTCCTCTTGAAGCCCCTCAGAGCGGGTACTGGCTACTATCACCAAGTTTCCCTCAGAGTCACTAGAACAGAGCTGTGCCTGTTGGGCCTCAACAGAAACTTGAACTGAATAAAAGTTCACTAGTCTCAGACATTTAGAACAACAGACTAGATGTTATTTGTCTGCAGGATCTTACATGGTACAGAGAGGATTCTTGGAAACATGATTGAGCCTCTTGGAGAAAACAGGTCATTCTGTGCCTGTGTCAGAAATCAATAAATGGCAGTTTAACTCTAGTCCCACCCCCACCTGATTGCAAACATGGAAAGTTGCTAAATACTTTGGTACCTCTCTCTTCCAACTTTAACAAAATGTTAAAATACCCATTTCTGTTTTCCTAGAAGTATGGGGAGGATGACATTATTTTAGATGGAGAGAGCACTTAGTTTCTCAGAGAGAAGACAGGACTTCGTTCATCACTTTCGTGATGGTGAGCCTATAGATCTTACTGTATTTGTTCTGCTGGTTGGCCAGGAAGCAGGCCAGTTGAGTTACAAAACATTTCTCTTTGAGGTTTCTGAACTGCTGTTTCTTCTCTGCCAGCTGGGGATGCAATTTCTCGTTGATTTCTAGAATGTTCATCTCTGCCTTCTCGCTGGACAAAGGGCCGGCTGATACCACCATGCTGACGTTTGTGGCAGAAGAGGTGGGGCCAGGGACTGGGGAGAAGAAAGGCAAACACATGATGGGTTAAAAACTGGTGAAATCAAATAGGTTTAATCACACTGAGGGATGTCAGTGGCAGCCTTGTCTACTTATTTGAAAATGTTGTTTCCCTGGTTTCACTCTTGTCATCTCCAGTCTTGATCTCCTTTAAGTCAACTTGTCTTAGCTATGCAGTCACCTTGAAACCAGGACATAAACACTTCTACACTTTTCTTGCTTATAAGTTTCTATAAAGCAAGGCTGGGCCCTGAGATTTTTACCCCATGAGTGGCCAATGTTTCTGTGTAGCACAAAAGATTGCATTTTCCTTTTTCGATATTTTTCTCTTTTGGTTTTTTGTTTTTTGTTTGAGACGGAGTCTCACTCTGTCACGCAGGCTGCAGTGCAGTGGCGCAATCTCAGCTCACTGCCACCTCTGCCTCCCGGGTTCAAGTGATTCTCATCCCTCAGCCTGCCAAACATCTGGGATTACAAGCGCCAAGTAACATGCCAGCTAATTTTTGCATTTTTAGTAGAGATTGGGTTTCGCCATCTTGGACAGGCTGGTTTCGAACTCCTGACCTCAGGTGTTCCGCCCACCTTGGCCTCCCAAAGTGCTGGGATTAAGATGTGAGCCAGCACCCCCGGTCAGAGACTTTTTTTTTTTTTTTTTTTGAGATGGAGTCTCGCTCTGTCTCCCAGGCTGGAGTGCAGTGGCACAATCTAGGCTCACTGCAAGCTCCGGTTCCTGGGTTCATGCCATTCTCCTGCCACAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCAACACCGTGCCCAGCTAATTTTTTTTTTTTGTATTTTTAGTAACGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCACCCGCCCCGGCCTCCCAAAGTGCTGGGATTACATGTGTGAGCCACCGCGCCCGGCCGAGACTTCTTATTAATAGCTAAGACAAGCCAATGAAAAGGAGAGAGAGTCTAGCCTGAGAGGAGTGAACCAGGGTGGGAGGATCGTCTCAGCCGATCCTCCCACCTAAGTCTCCTGAGCAGTTGGGACTAGAGGCACGCAGCACCATGCCTGCCTAATTTTTTGTATTCTTTGTAAAGATGGGTTTCACCATATTGTCCAGGCTGGTCTTCAACTCCTGAACTCAAGTCATCCTCCCACTTGGGCCTTCCAAAGTGCTGTGATTATATGTGTGAGTCACAGAACCTAGCTCCATCCTAGTTTCTGACTAAAAGAATAACAATATGTGTATATACAGCCTGTCCTCAGAATTGATCTTCCATAGCCTAGACAGAGGTATGAGACACAAGGAAAATAGAGGCTACCTGGGAGAATGTTTACAGCATCCTGACATTCATCATGAGAGGATTCTCTGTCTACAACCAGAGTTGAGTTGACTTTGTCTTCCTCAAAGGTGATGTTGATGTTCTTGTGAGGCTGGTTGGAGTCACAAGGGCTGTGGCTATTTGAACAAGTGATGGCACATTCCTCCAGTGAGTCCTCAGGGACTTTGCTTTCTTCAGCCTTCTGCACCTCCCTGATGAGCCAGGTGGGACAGAGATGACAGAAGATTAAACACAGAGGGATTGGACCCCAGGGAGTCCTAGCTGGTTTTGACAGGCGGCATTAAGACAGTGGTCCCAGAAAGCAAAATGGAGGTTCCCTTTAAGGGGGAACAGGCAATCCTCTTCTCTCTGCAACAGAGCATGGCTGCCATGGGAGCCAGAGAGGAAGAGAGCAGCTGGTGTTCAGTGCACTGGACAGATAGGAGCTGAGGAGGATGAAGACTCAGCTATCCCTGTATGGTACAGACATGACACTTGGCACACATAGAGAAACACGACAGCTGCCGCACCCTGTGTCTAAGCTGGGTTGAATTTCACATACTGTGGCCAAGCGAATGCGGGCTTTTGGCCCATCATAGATGCCAGAGAGGGTGTGCCTCCTACACATTTTCATATGTTACCACCCATTACTTGCTCCCGAGTATTCAGTGTTACCTGGGGGCAGATGATTCCAGTACTTTCTCAGCCTCCTCAACTTGAACATCTTCATCCTCATCTTCGTCATTTTCTGTAAATACAAAATGTTCGTTCAGATATTTCCCACTTCACATTCTGCAAGCACAGTCAGCCCAATGTGCACAGAGACATGAACATCTATGTATGGTTCAGCACTGTACTGAAAACTGTCATATTTTATCTTTCACAAAATGCCCTGGCATGGTTTCCTGGTCCATCGGGCAATGCATTTCTGATCTGGAGGGCCACCATCAAGATGTGGTCAAATATTGAAAAGACCTTTTGCTTCCCATATCACTGGAGGCTTGTGCAGCCTCTCTCTGGACTTTGGCAGCTGTCGCCCCCATCCTGCCACATATCTGATTCCCAGGAACAGGCTTGGTGTCCTGTCACAGTTCGCATTTCAAACCTCATTCTTTCTCTTAGGAGAGGACAAACTTGTCCCACAGTCCTCTATGCATCATGAGACTGCACAGGCCCTCCATGTGGCTTCTGCTGTGTTATTCAGGGACATTCTATCCACGGGGAGTGCTCCAGTCTGAAGCACTTCCTACCACCAAATGCCCCTACACCAAGTGCCTTCTCCAACACCAAACGGAGAGGGGCTTCATCTCATTTTAAAAAGCATTCGTAAGTGTTCCCATATTTGGATGCTTCAGACCCTTGCAAGAGACAATTTGTTTGCCTTTGCAGATGGAGAGAGAGAAACTCTGGAAAGATAAATCACTCACTCACCGACAGTTACTAAGAACATTGTCAAAAAGACAGCCTGGGAACCTTCATTCTTAGCCCAGAGCTCTTTTCACTCCAACAAGCGCCCTCCCATCACAGCCTCCTTCCTGTCCTTTAAAACTAGACAGATGCTGCCTCTTGCTCCAAAGACCACCTTCCATCAAGGAAGGAGGGACACTTGCAATACTGTGACCTCCAACCCCATGGGTTTCCCATCTCTGTTCTTACCCAGGAAGTCCTGGTCATGTCATGGCCACATATGTATAGCAGAAAAAAACCCCACTGATACAACTGTCATTGTGAAAGTATGGAGGTCTGGAGCCTCTCATAAGCCTGGGGTTTTGGGTCATCAGGGCCTATGGCCACCTTACCTGGGCTGAGCTTCTGGAAAAGTTGCTGTGCCAGTCTACACCCCTCAGCCAGCTGTTCTTGGAGGTCCTGCCCCTGGGACTTGTCTGGCTTATCCGGAGTGAGGAGGGCCTGGAGATGCTGATTCAATGAGCGGGAGGCATCTCTCCCTTCCCGTAACTTCTCCCTTAACTGGGTCAGCTCTCGTTCCTGAGAGTGAACCAGGACTTTATATTGCCTAAGGTGAGACGGTAGAGAAAATTTAAGAGTGGAAAGGGTTGAGTGATCCGTTCAAATATTGCAACAGAGATTTCTGAGACAATGTCCTCAAGGAGACCTCCAAGCAGAAGGTCAGCACATGTTGAAAGGAATGACTGTGGCCAAGAGAAAGAATAGAAAATGGTTTACAGGCTTCCTCTGTATCAGAGAGGGCTCCTGCAAGATCCTCGATGATGTTCCATTCATCTTTCCCTTCTGTAAACAAAAGTAGGTGTCTTCCTAATTCCGTTTCAAAAAGACATCCTTTCAGTTCCTCACTCTGGCCATGGACATTTCCATGTGAAAATACACATAGTGCATCTTGCGGCCACTAGATACAAAGCCATGTACAGAAATGAGGCCAGGTGCAGATGGGGCGAATTGAAAAGACGAAAGAAGAAAAGAATGACAGGGTCGAGAAGGCAACATTGATTGAGTGAAAGAATGAGAAGACGCAGTCAGTCAGAAGGTGATTCTCACTAAGGGTAAGTGGGGTGGCGATGGCACACCATTTTGAGTATACTGAATGCTGCTGTGTGGTTCACACTCCTTTGGTTAATTTTGTGTTATGTAAATTTCACATCAACAATTACTTGTTTGAAAAAGAGAAAACAAGGCTCTAAGAAACAACTGCAACCCATAAATTTTTATTATCCTTCTTCTCTGCTTGATAAATACTTGTGTGTTGCGAGCCTGCCATGGCAATTCCTGCCCTTCCCCTGGCCCAGCTTAGCTCTTACGTCTCCCCACCGAGCTGCTGTACTTCAGAGATTTACACAGCTGCTCCCCTGCCTGCCCCCATGGGGTCCCCTCACCTGAGCTCCTCAGCTTGCTTGAGCTGCTCTGCAAGCTTCTCCTCCTTGAACTGTCGCTCATTCCTCAGCATAAATTTTATGAGGTCTTTACACTCTTCATACTCTGAGAAAAGACAGACACGCCTGCCTCAGTGGAAGGCTGGACATGCTGCTGTGGTCACTGCCTACAGGGCAGAAGCCAGGTCCATCCCAAGGACAAAACTGTCCCCCGTACCAGGCTCTAGGCAGGGATTTCCACATCTTTACTCTTCAGTCTCCTGACTTTCTGGCATCTTATCCTCCAAAATTTAAAGACGAAGAAAGAGAAATTCAAGGCACATCAAGGAAGTTGACAAGATGATTCAACCACAACGAAGTGGAGTCAGAACTCACAGCCCCTGAGGTCTGACTCTGAATGCGGGGCCACTTTCCCAAGACTTGCAGCCTCTCCTCTGAAACACTGCACTGGGGCATGAAGTAGTGATTTCTTGTACAGTTGGGAAGGCCCCTAGGACTATGGGACTGACGGTTTCCCTTTTACTGGGAATTTCAAAGACAAGTATGCGAAAGATTTTAAAAATCTTTGATTTTTAAATCATATCTTCAGTTATGATTTTAAGAATCATATCTGAAGCATAAAGTGTGACACATAACACCATAAGGCCATGAAGGAAATATGCCCAAATATTTTATTAGTATGACAGGCAGCATCAAGATTTAGATTAGTTGTGTTAATTTAGAAACAGCATAAGATTAGTTTGTGTTAATTTAGAAACATCAGAATGAAGAACTAATAGATAGTGTTTACACTGTGCCAATTAATGTTCAAGGAGATTGACAGGAAATACCTCATGTAATTCATTGCAGCAATTTACAGAGGTAGGTATTATTGTAGTACCCTCTGAACAGATGAGGAAACTGAGGGACAGACAAGACAAGCAACTTGGATGGAGCCCAGGAGACAGGCTGAGGGTCCCTGCTTTGCACACTGCACTGCTGCTTCCACACATTCTCGGGTGTGATCTTTCTTCCTCTTTAGGAACAAGAGCCTGTGCACCAGGAAGCAGGACTTCACTCTCACCAAGGTACTCTCTGCTTTTTATTTTTATTTTTGTTTTATTTATCTTTTTGTTTGTTTGTTTTTTGATGAGTCTTGCCCTGTCACCCATGCTGGAGTGCAATAGTGCAATCTTGGCTCACTGCAACATCTGCCTGCTGGGTTCAAAGGATTCTTCTGCCTCAGCCTCCCGATTAGTGGTGATTACAGTTGCCCGCCACGACGCCCATCTACTTTTTGTATTTTTAGTGGAGATGGGGTTTCTCCATGTTGCCCAGGCTAGTCTCAAACTCCTCACCTCGTGCTCTGCCCGCCTCAGCCTCCCAAAGTGCTGAGATTACAGGAGTGAGCCACGTTGCACGGCCCCTACTCCCTGCTCTTGATGCTGTCACTTATAGATAGCACAGGTTCTATTAGGAGCAGACTCCTCTTGAAGCCCCTCAGAGCAGGTACTGGCTACTATCACCAAGTTTCCCTCAGAGTCACTAGAACAGAGCCTTGCCTGTTGGGCCTCAACAGAAACTTGAACTGAATAAAAGTTCACTAGTCTCAGACATTTAGAACAACAGACTAGATGTTATTTGTCTGCAGGATCTTATATGGTACAGAGAGGATTCTTGAAAACATGATTGAGCCTCTTGGAGAAAACAGGTCGTTCTGTGTCTGTGTTAGAAATCAATAACTGTGAGTTTAACTCTAGTCCCACCCCCACCTGATTGCAAACATGGAAAGTTGCTAAATACTTTGGTACCTCTGTCTTCCAACTTTAACAAAATGTTAAAATACCCATTTCTGTTTTCCTAGAAGTACAGGAAGGATGAAATTATTTTTGATGGAGAGAGCATTTAGTGTCTCAGAGAGAAGACAGGACATCATTCATCACTTTCATGATGGTGAGCCTATAGATCTTACTGTATTTCTTCTGTCGGTTGGCCAGGAAGCCGGCCAGTTGAGTTACAAAACATTTCTCTTTGAGGTTTCTGAACTGCTGTTTGTTCTCTGCCAGCTGGGGGCGCAATTTCTCGTTGATTTCTAAAATGTTCGTCTCTGCCTTCTCGCTGGACCAAGGGCCAGCTGATACCACCATGCTGACGTTTGTGGCAGAAGAGGTGGAGCCAGGGACTGGGGAGAAGAAACCCAAACATATGATGGGTTAAAAACTGGTGAAATCAAATAGGTTTAATCAGGACTGAGGGATGTCAGTAACTGAAATTCTTAACTTACTGTTGTGAAAAATGTGATCACTCCCCACAGCACTTTAGGATCCTTCACCACAAAAACAAGGTTCGAGGTGCCTGAACTCAGAGCTGAAAGCACTGCCAGTAGCTCAGACTCTGATAAGAGTGAGGTAGACTGTGGCCAGCGTGCCAGGTAACCGTCTGCAGTTGCAATAACAGAATTAGAAGGTGGGGGTGTCATGGAATCTTAGGAGCCCTGCATTCCAATTGCCCAGGCTTTCCTGAAACACAGGCACCCTAGTCTCACCTGAGGGTCACCACCAATGGGGATCATTCCTTCAGCATTCACTCTCAGTATTCGTGTACCCTTGTGATGATGCCACAGACCCGTGTCTTTCCCAATACATCTAAGCATATTCCTCACTGTTTATCTCTTGTCTGTACAACATCATCAAGGCAGAAACAGTTTCCCAACAGGTTGTATTTTCTTAATGGTAGTCATGAAGTCACCCCACCTGCTCTCAGTTAAAACAGAGCTTAAGGCCTTTCCACAGGTGTAAGATATCAAACTTTTAGCCTGCCCTGATTTCCTCTGGGTCTTCTGCAGTTTTGTCTGTATCCACTAGAAAGTGAATGAATAATTCATTTGTAAAAAATGTTGTCTTTCCTGTCTCAGTATTCTTCTTGCTGTTTCCCATTGTTATGTTGATTTCTTTTTTCTCACTGGGGCACCATCTTTGCTTTTCATTACACTCTAGACCAGTTTGACATCCCTATGTCCAGAGCTCTTCCTCTATGTGGGTTGATTTGGTTTTTGATGTCACTGAGCGCTACATTTTATACTTGTCACTTATGGATGTCATTCTAGTGTCACAAGAGCTCTTTTCAAGGTATCAAGTGATCAAAATCATTTATATAGAGATCTCCTGAAAACATGTGTGACCATCTATCTTGGGAAGTTTCATAAACCTGATGCTATTTTGTTGTTTCCATTTTGTTTTCCCATATACTGAAAAGAACAGGGCCATGAGCGGTTCTTATGCAATATGGTTTGATATATATTTTGTTGAGATGACCTAACACCATTGATTTTGGGTTGCATTCCACTAACAGAACATGGCAAGATCAAGGTTATGGTCACGGTTGGTTGGTGATCCTCAGTGTTGCAGTAGAAGGTGAGTTTGAGATGAGAGGAATGAGTAGGAAAGAGTGATCCCCTGAACCACCTCCTCGCTTTCTCAGCTTTCACCCCACCTAGGTTTTGTGAGCCTGGAACTTGGGAGACTGTTCTGTAGCCCAGGTCTCCTAAGATTGGCTGCTGGACTTGCCTGAGTTGAGGGTGCGGTGGGTTGACCCTGGGCTGCCCAGCATTCATGTGGTAGTGAAGGAAGGAGGACTGGATCAATCCCATTTCAAAGCATGTCTCTCTGCACTCCACACTGTCCTCCAATGACACTGTAAGGAAACCGCTTTAAGACGTATCAACGGCTTTAAGTAAATGTATTTTCTGGCATCTGGGAGACCTGACATTCTGTGTCATAATGAAAATCTGTCATGTTTCTTTATTTTAAAAATGATAAAACTGCAGGTTCACAGAGTTACATGGCTTACTTGAGGTCACACGGGGATGAGTTTTCAGCACTGCCAATAAAAGCAATCACATGAATTATTCAGTAATTATTCATAGGATCCATATAATTCAGTAAATATTCACATAATTATTTACTAGTTGTTCATTGACCAATTCGTACAAGGCATTTTGCTCAAAACTGTGTTTATATTTGGACATTGTATCTTCATCATAATCCTGTGGTAATGCTGTTATCCGTAAGTAACAGGTAAGAAACCTGAAGAGGAGGGATAGCAAATCATGTATTTGGACATATTTCCTTTTTTTTTTTTTGGTTTTTGTGATGCTGGAAGAATGACCAGAATGAGTCATAGGAAGAGTATACATTCCTGTAGTATTTTCCAGGACAGAGGTGTGACCTCCTAGAGTACTGGGACCAAAATTCCCAAGTGTCTGCAACCTTGCTTTAACAGTATGGGAGATCACCTCTATCACCTGGAATTCCCCTGGAACTCTGGAATATACAAGAGAAGTATGAGACTTGGGTCTTCCCTTGGCTGTGTTTAATTCACTCTTCTATGGAATACCAATGATTCTCACTAAGACTGGCCTTTTCATAAGCACAATGTGCATTTTATGGAGAAGATTTTACACTTTGCTCTATTTAGAAAGAATAAATATGAGCAGTGGTTTAGGTTTTATGCCCTGGACTTAATATGTTTCTGATTCCTGTTTTGAGATTAAATTCTCATGTAAATAGAAAAATACTTATTATTTCTCATAAGGCCAAGTTTGTTATTAGTTTGAGTTTTTGAAGATGAAGCACAAACTTTTGATTTTATCTTTGTCTGTCTCTGTCAGCGCCACTCGTTGTCTCTCAGTATGACCTGGACTTGCCCCTGCACTTACCCTTGTCCTGCTGAACCATCTCCATGCACTGTCCAATTCCATCAGTGATTCGGGCTCCTTCCAAGGCTCCCTGAAAAGGGCACAGAGATCAGGACATTAGGCACATTCCGGACACAAAGGCAACCCATACTGTAGAGTGGGCAGCTGTGTTTCCACTTCCCTAATATTCCAGTGATGTCCTCAAACTGAAAGGAACACTTTCCCTTTTTAGGGGTCTGTTCTTCATGTCTCAGTGCCTCTGATCTAGTCAACACAACTGTCCTGAATGTGAAAGAACTTGCTAAATTTCTAGTTTCTTGTTAGGTGGCTAAAATAGATTTATAAGACTTCCTTACTTACCCATGACTGCTGAAGTTTGAATTCTTAGCAGTACGATTCGTTTTCTTGTAAGGTGAGCAGCTTAGGAAAGATTGGCCATCTTCCTGTGCAAAAAGAGGCAAACTTAATTTCTACTCAAAGCATGCTTGAATTTGGAATCAGGGCTTCCACTCTTCCGAAGTTGGAGTGTCACTGCGACAGGCATGTGTCCCGAAGGGCTCGTGTCTCTGCTATACTCAAAGTTTAAATGGAGCCCAGCAAGCCAGATGTCCTTTACTTCTAGGTTCCCTCAACAGTTTCTCCTCCGCTTTAGAGACCGCATTGAAAATATTCTTGTTCTGCTGTTGTGTTTTGGCTTTGGAATGATGTGATGCAGCTCAATGGGTCCCACCCCCAACTTGATCAAAGTAAGAAACAGCTGGGAAAGTCAGTGCAAATACAAGTTCATTGTCCTCCTTGCAGGGATTCTGATTCAGAGGGCTCAGGTGGGGCCTGGAATGTTTGTTAACATGACTCAGATGTGCAGTCAATTTGGGGACTCACTGACAGCATTGACCTTACAGTTTATGGGATGATTCTTTCTGTTTGGTGATGAAGAAACTGAGGCACACAGAGTCTGTAACTTGCCCAAGTTCCCCTTGTTGTAAGTCCTGGAGCCAGATCTCAGGTGGACCAGTGCTTCTCTCCCCTATACCTCATTTCTGAGAAAAGGAAATCTTCTGCAATTTGACTTCTTTCATCTAACACATTTCCTCACAACATGCAGCCAGCATCATATTTTGGCCACTTACTATTAAAGTGAGATGCTTTTTTTTTTTTTTTTTTGAGACAGGGTCTTATTCTGTCACCCAGGGTGGAGTGCACTGGTGATTATAGATCACGGCAATCTTGAACTTCTGGGCTCAAGCGATCCTCCTGCCTCAGCTTTCCAAGTAGTTGGAACTATAGGCACACATCACCATTTCTGGCTAATTTTATATTTTTCATAGAGACAAGGTCTTGCTATGTTGCTCAGGCTGGTTTTGAACTTCTGGCCTCAAGCGATCCTCCCACCTAGGCCTCCAAAAGTGCTGGGATTACAGAAGTTAGCCACTGAACCTGGCCCTGAAATGCTTTTATTTCTTTCTTTTTTTTAATGAAAATACTGGACATGGAGATGTGGAAAGACACCTTGCTTTATTACTTTTGTTGTTATTATTATTTCTACAGTAGAATTTATACATCACAAAATTCACCATTTTTAAGCATACATTTCAGTGTCTTTTACCATATTCCAAAACTTTCGCAACCATCGCCACTACCTAATTCCAGAATATTTTCATAATGCCAAAAAGCATGCCTGTACCTATGGGCAGACACTCTCCAATTCCCCCCTTCTTGCGCTCTCTGACAACCACTAATCTACCTTCTCTATATATTGATGTACTTGTTCTGGGCACTTCCTCTATATGGAATAACAAAGTGTGGTATTTTCTATCTGCTTCTTAGAATATTGTTCTCAAGTTTCATCCTTTCTAGCCTGCGTCAGTACTTCAACTTTTTATGGCCAGATAATATTCCACTATATGGTTATACCACATTTTGTTTATTCATCAACTCATGGTGGTTTAAGATGTTTCCACTTTTTAACTATTAGGAATAATGCTGCTGTGAACAGCTTTGTACAGGTTTTTGAGTGAACATCTGTTTTTCATTTTCTTGGTTATAAACCTAGGAGTGCAATTGCTGCATCATATGTCACTTTATGTTTCACTTTTTGAGGAACTCACACACTGTTTACTAACTTCAGTAGCTATATCATTTTAGATTCCCAATAGTAATATATGAGAATTCCATATTCTCCATCACTTTTGAAACATGTGTTGTCTTTATTTTTTTCTTAAGTCATACTGCTGGGTGTGAAGTGGTATCTCATTTTGGTTTAAATTTACATTTTCCTAATGACGAAAAACATTGAACATCTTTGCATGTGCTTCTTGGCCATTTGTGTGTTTCCTTTAGAGAAACCTCTACTCACAGCTTTTTTTCCCCATTGTTAAATGTGGTTGTCGTTTATTGCTCAGTTATATGAATTCCTTATACACTCTAGGTACTAGACCTGTGTCAAACATACAATTTGGAAATAGTTCTCCCATTATGTGGATTATCTTTTCACTTCCTTGACAGTGTCCTTTGAAGCATACAAGTTTTTTATTTTAATGAAGTCCATTTATCTATTTTTCGGTTGTTTGTGCCTACTTAAAAAATGTCTAATCCAAAATCACAAAGATTTGTACCTAGGTTTCCTTCAAGACATCGTCTTTTGAATGAGAACTTTCCTGGGTTTTAGAGGAGGGTGGACATTGTTTATTGATGCCTCCTGTCCATTACCGATGTTTCTCTTGATTGTTATTCATATGCTCACCACCCCTCCATGGAGCATCCATGGCCTGTGACAGAGCTCTGGGGACTGATATCCTTCCACTGACTTTGGCGCTGGTGAGAGCCCTGGTCATGTGATTCAGCTTGGCCTTAACCCGACCCAGTTGCACATATTCCTCAGGCCCTTTAGAGTTGAAGTCGAGACCTCTCTGAGAACGCTTGCCAGCCCATGCTCTTCTAAGGCTGGAGCAAACTTCCTCCATCTATTCCAGACAGAGGGGACTGCAGGGGTTGGACTCACTCAAGATATCTCTGGTGTTAGAAAGAAGACCTGTTTCAGGCTTTGGGGAAGATTGTTCAATATGAACTAGGTCCTCTCTAATTATTTTTACCGTATGTGTGACTTCTTTCTAGAAACAAGGGAAGAATATTTATGTTAGAACATTTTGTCTATTCTTTGTCAATTGTTGTTTATCTACAATTTTAACATGGATAAAGGAGAGTTCAGTGTCAATATATTCTTAACAACTAATTACGGCTCATGTCCACCGCCATGCGATCATATTTAAATCTGTCAACTATCCTGTTACTTAGGTATTATCCTGTTCCTGATGAGAAAACAAACTCAGAAAGATTGCAAAATTTCCCTAGGTCACAAAACTAGTGAGGAGAGGAGTAAGAATTAGATATCCGTTCCTTTTGGCCTTCAAAGCTAACCTTGTACCATTAGATCAAACTGATTTACATACTTTTGCTGGAATTAGTCTCAGACTTGTGGTTCTCACTTGATTTTCCCAAGGAAACAGTGTGCCACTTTAATATCATTTCAAACTTTGAAATTTAAAACTCTTTTTATTATACTTTTTTGTCTTTGTTCTATTCCGTTGCTTTTGGTTTCTTCTCAACGGATCCCTCTTATTTATATGCTAAATATTTGTTACCTATTTTCTGTCAATTTTCACCTTTTTGAGTGTTTGTTATCTGTCTGTTGTATGCTAACAGTTTTTCACTGAGGTAAAATTTGCGTAGAGTATACTGCAAAAAAACCTAAAGGCACAGCTTAATAAATTTTAATATAATTATAATTGTAAAGTAACACCCAGTTAAAGACAGAGAACATTTTCCCCCATGCCACAAAGTTCTGATGTGGTCCTTGCCAGTCAATACTCATCCCCCAAATGAAGAATATATTCTGAATGTTGTCACTGCCTTAGCCCCTTTGTGTTGCTGGAAAGGAATACCAGAGGCTGGGTAAGTTATCAAGACAAGAGGTGCCTTTTGCTCATAGTTCTGCAGGCTGTACAAGAAGCATGGCCCCCGCATCTGCTCCTAATGAGGGCCTGAGGCTGCTTCCACTTGCAGCAGAAGGTGAAAAGGAACCAGGGTGTGCAGAGATCATATGGCGAGAGAGGAAGCAAAAGAGAGCAAGGAAAGGTGAGAGGCACTTTTTAATAACCAGCTCCTACAGGAACTAAGAGAGTGAGAATTCACTCACTACCTTCTCCCAGGGTGGGGATTCATCTATTCATGAGGGATCCACTCCCATGACCCAAACACCTCCCATTTACCCCCACCTCCAACACTGGGGACCACATTTGAACATGTGATTTGGAGGGGACCAATATTTAAACTTAGCAGCCACCATAGATTCATTTTGCTTGATCATGTGCTTCATAAAAATGGAATCATTTTGGCTGGGCCTGGTGGCTCATGCCTGTAATCCCAAGACTTTGCAAGGCTGAGGCGGGCAGATCACCTGAGGTCAGGCGTTCAAGACCAGCCTGGCCAACATGGTAAAACCCTGCCTCTACTGAAAATACAAAAAATTAGCCAGGCATGGTGGCCGGTGCCTGTAATCCCAGGCACCAGATATGTACTGGTATCTCATATGTACAGGACATGTACTGGTATCTCATTGTTGTATTGATTGATGTTCCTGATGGCTAAACTGTAGAGCATCTTTTCCTATGCTAATTGACCATTCATGTATCTTCTTTTCTTAAGTACCTATTCAAGTCTTTTGAGAAATTGTTTCATTGTGCTGTTTATCTTATTAAACTTATATATATATACATACATATATATACAAATACACTCTAAAAAACCCCTTTGTTGGAAATAAATATATCTCCTATATTGTGGTTTCTTTTAATGTTCTCTTAATGTTCCCTGTTTGGAGATAACGATAGATAATCTTCAAAAAGGTGAATATACACACCCACACCCACCCACACACATACACACACACACACACACACACACACACACACACGTGAGCCACCGGATCCAGCCTGTTGAATTTATTTCTAAGCACAACATGTATTTAGATGTTACTTGAAATGAAATTGTATTTTTATTTCATTTTCCAAATGCTCATTGCTAATACACAGAAATACAAAAGACTACTTCTATTGAGCTTATATTCTGCAACATTACCAAACTCACTAATTACTTTTGGCAGATTTTTATAGATTTCTAGGATTATTAACATACACAGTCATTATCTGTGAATAAAGACAGCTTCAATTCTTTCTTTTCAATCTTTTCAATACTTTTATTTTTCTTACTTTATTGCATTGATTTAGATCTCTAGTATAATGCTGAATTGAAAGAATAACAACAGATATTCTACTTTTTTCTCTGATTTAATAGAAAAGCATTCAATCCTATGCCATTTAATATAATGTTACCTCTGAGTTTTTTTCAAATCTACCCTTAATAGGGTTGAAAGTGTTGCCTTCTCTTCTTATCATGCTGAGAGTTTTCTGGGGTTTGTTTTTATAAATCATGAAAAAAGTTTTCAATTGTGCCAAATGCTTTTACTGTGTATGACAAGGTAATCATATGGTTTTTCTCTTTTGCCCTGATAATACATAACATTACATTTTCTTAAATATAAAAAAGATTTCTTGAATCAAGCTAGGACAGTTTTTTTAATTATAAACTTTTAACAAATATATTGAAATATAACTTACATGCAATTGAGATGCATGAAAGTGTATAATCATTAAAGTGTATAATTTTAAGAGTTTGAGCACACTATACACGAGTCAAAGAGAAAGGACAGAAAATACTAACGATGGCTCAGCACATGTGGTCTATCTTGCTGAATGCTCTATGTGAGTTTGAGAAGAGTTATTTGTTAGCTGTTCTTAGATGTATTTTGCTTAAATATCGACCTGGCTAACATGTGTCATTGATTGTGTGAATTAATTTTGTTCTAGTGGGCAGTAAAATTACTGTCTGATCACTTTGGACTTATGTGGACTGGTTCATGTTTTATTACAACGGATTCATGGAAAGCCCACAGCATTTCCCAAGACCCTCTAATTTGGCAGGACTCAATCACCAATCCACCCCTTTGTGAATTTGTCAGGGTTTGCTTTTAGGCTTTAGCAGGTTGGTCTACAATAGGCCTTATTGAAAAGTGTGACACTTATTCCTAAAGCACATCCATTCTAGTGTCTCAGTTGGATACCTGGGTGCTAATGAGGTGTGCATGAGTTCTTCCCACCATGGATGGCAGAAACTCCATCATACATTCCCCAACCCTCCTCCACCTCAAGTACCTCTGGTCCAAACTCAATTTCATAGCAGCCACCCCTCTGTTAAATCTGTTAGTCTTTTCCTTGTGCAGGTAGAGTCCACTCCTTGATAAGTATGCACATGGAACCCCACATAGACTTTGAGAGCTGCACCTTTGATCAGCTGTCTCCTCACTGGTGCCCTGCCCTGCAGATTGCAGTTGCTTCAGCCGTCTTGAACTCTGATCTCTGCCTTCTCAGCTCAGTGAGCTGCCCTGCCCTGAGTGGACTCTAGCTCACTATGCAGCTGCTGAGAAATTCTCCCCAAACAACTAGGAAATCATGGGGCTTCCCCCTTAAGTTTTCTCTTGGACTGCCTGTTGTACACTGCTGAAAACAATTTTACGTTTGTTTATGGAGGCAGGGTTAGTCTGATATGATTTATTCTAACAGACAGAAGCAGAAATCTGTTATACTCTTTTAATTACTGTGTCTTTATAATATTATGGTAGACAGAATCCTAAGATGACCCCCAGTGATCTTTGCTCTTATATAATCACTTCCTCCTGAGTGTAGACAAAGCTACTGAGGAGATGTCACTCCTGTGATTGTGTTACAATTTATGGCAAAAACAAGTTAACAGATGTAATCGAGATCCCAAATCGGTCCAATTTAAGATAGACAGATTATCTGATGAGCTTGACCTAGTGAACGTGAGTTCCTTGGAGGGACTGAGGACTTCCTGGAGAGATGTGAAGTGCAGGAGGGTTTCCATGCAGGGCGATCCTCCTCTGCTGGCTGGAGGAAGCATGCAGTGGGAACATGGGAGGCCTCTAGGAGCAGCGAGAGGCCCCTGGCTGACAGCCAGCAAGAAAACAGAGATCTCAGTCCTACAGTCACAAGGAACTGAACTCAGCTGACAACCTGAGGAAACTTGAGAGGAAGTTCTTCCCCAGAACCTCCAGAAAGAAACCCAGCCTAATTTCAGCCTGTGAGGCCCTGAGAAGAAGACCCAGAGAATCCAGGCCTGAACTTCTGATCTGTGGACACTGCAAGAAAATAAATCATTCTTATTTTACGCCGCTAATGCTTGCAGTAATTTAGTATGCAGCAATAGAAAATTAATACAAATAAAATGGAGAAGGCTTTGGAGTGGGGACAAGAAGGAAACGGTGGGAGAGGGATGCCTGTATGCTGATATGGTTGATGCCTGTATGGTTGAATTGGGTCTACCGTTCCTCATCTAATTAGCTATGGTCTATTAAGGTGCATAGCTACACACAAATATTGGTACTACGTTCAATTCAGAGGAATAAGATATTGCATTCTTGACAGTAGACAAGAACACCCTGAATTTGGGGTCACTGTATCATAAGTCATGTTATCAGGTCCCTCTAGGAAGGCTTAGAGGAAGATTTCCAGGATACACTTGTGACAACATTGAAGGCTTCTTTTTTCCCCAAAGGGACCCGATCTCCCCTCAGTCGAGAAGCTCCAAGTCTCTGAACTGGATGCCAGGTTATAAATTCCCCCTATACTGACTCCATCAGGCTTCTGTCCTCAGAACTAGAGTTTATCAGTAAAAGATAGACTCATGGGAGTCTAGGCATTTATTCTCTTATTTTATATAAATCAGTTAATGTGCAGGAACAAAACAGACTTTGAAGAAAGACACTCACAGTTGCCACAGGAAAACACCTTCAACATCCTCATGAGTCATCATGGGTGTTCTGTTGGGAGGACTTGATAGGAGGCTTTCCTCCTCACGGGCTAGTGCAGATCCAGGGGAAATGTCATCAAGTCCTCCATTCGGAGGGTAGCAGCTGAGGCTGCTGATTCGTTAGGCCTCCTGCAGCTGGAGATGCAAGTAGTGCATTTTCATGGCCACCGCAGGGCCCTCAGTTTAGCATTCTTCAGAGCCAGCATCCAACAAGCCACAGAAGCTCTGAGTATTTCCCTTTCTTCAGTCACCCACATAAATGGCTTCAGGGCCTTCTGGGGAAGGCCTGAAGGAAGATTTACAGCATACACTTGTGGCAGCATTGAAGGCTTCACTCTTCCTCAAGGGATCCAATCTCCCCTCAGTCAAGAAGCTCCAGGTATCTGAACTGGATGCCAGGTCATAAATTCCCACTATGGTGACTCCATCAGGTCTCTGTCCTCAGAACTAGAGCTTTTCTAAGTGTAACGTAAGTTGATTTCTTAGTAGATGTCCCATCCATTACATTCCCAGACACCTCACAATGATTCGAATGATTAGTAACCACCACATATCCCTGCCTCTCAGGGAAATCCCTCCCGCCTTGTCTCTAGATGGCCAAGTCCCACGGCCTGTCCTCTACTCTTCCAGAACCCTGTTGTTCTCACTGACAGCAGGGAGGGCAAATCCATGCAGCAGCTCCCGCCATGACCTCCAGCCTGCAGAGGATGGGCGCCACAGGACTTTTAAACGCATGCCGCTGTTCCCCTCACCTGTGCATTTCTTAACGCCTTGGTGAGGAGAATGTCTCTGGATCTTCCTTGATGGGAGCTAAAGGAACAAAGGTAAATAATGCTATGGGACCCACTGAGAACTGGGGCTGTGGAAGAGTGGCCACTGAAGTAATAGACAGATGCAGCTATTGCCAGATACTCAGTGCCAGAGCAGGGAGGGACAGGGAAGAAATACGGACCTCACCTTCCTCTCACTTCCAGGATCCATCGGGGGCCCTCCATTGCTAAACCTAACTAGAAGTGTGCACGCACGGGAGCCAGGGATGCATTCTAGGAGGGACGAGCCCCGAGTGGCATGAGACAGGATGGAAATGAGTGGACAGTGGATCTGTGGGAAGAAGGAGGGGATGTTATGGGAAAACAAAAGGAGAATACTAGCTAAGAACGCTAGGTGACATTAATATTCCGAAGTCTGTGCTCATATTCAGCAAAGAAAGTTCAGCATAAAGCACTAAATAAGGAGTCAAGATATTGTACTTCCAACTGTTGTTCCAACAGCTGTATTATGAAGGGCCACTTTATTTCATGCCTTTCTAATTTGACCTAAAGTGCCAGGTGGCACTGGGGCTGGCACAGCCTTGCTCAATTATGTGTTGCAGAGTACACAGAGACTGCCAGGCTGAGGGAAGATGCAAGAGAATAGAAGAGATGCTCTCAGGGAACAAGAGACCACATGGCCCCAGAGTCAGGGGCAGCATCAGCCACTGTCAGCTGCTCATTTTCCCAGACAGAGCCCACAAGCCTCAGCCATGCTTTGCTTCTGCAAGACGCTTCTTCACCTTTTCAATAAACCTGCCTGAATTTAAGCTGACAGGGTTTATTTCTCCTTCATCATAAATGAAATTCTTCACCACAACAATCTCCAATGAATTTTGGGCACAGCAGGCAGGCCCATTTCTGCTTCTGTTCCACTATCTCTCCTGTAGGTTGAAAAGGAGGAGGTACTGAATTACCTCCAAATGTTCCTCTGGCTCTGATATTCTGTTATTCTGGTTCCTTTTTGGCTACTTTGTTTTTGGTAGCGTGTATCCTAAGGCGTCCAGTTGAACAACTTTTGTCTACTGTGTCCAGGCATTCCTGGTGGTATTTCAGATAAGACTCTCTTGGGTTGCTGAACTCACAACCACTGAACCAATTCTATGACCATCTGTTTCATGGCCACATGTTTGCTCATTTTATATGTACATAAAGGGAGGGGACAGACAGCAAACTTGCGTGTTACAAATTGTATCATCTTAAAAAGGAAACAAGGCAACACTTTGCAATAAAACCTTAAGATGCATGAAATTTGAGCCTAATGCAATAAAGGATGCCCATAAAATTCTTATCTAAAGAATGTTTCGAAAATTGTTGTACAAGGACATCATCATTTAAAGTGATATGAAGAAACCTTCTCAGCTAAGCATATGGGCTAGATTAGAGAGAAAAATAAAGGACCCATCTCTGCCCTGGAAAAACTGCTGGTAGCATCTTTCAAAAAGCTCTCTGTGTTTGAGTACGCACCTTGATCCATAGGCTCACATTTGATCCCAACTGGCAGCTGCTTCTTGGCATTAACATTGGATTCCCAACTAGTAAATCTTACCAAGATCTGACTTTCTGCAGATATAATATTATTTTGTTTGACCATCCTTATCTTCAAGGGCTACCAAGAAGGAACCAAGAATTTATTTACCTCCCCAAGGGAAAAGGTTTTACCAATGAGACCCTTTCTCACCATGACCCCAGGACCCCATATGCCCTGTTCACTTGAGTGCCCTGTGTGGCCTGATAGAAGCTCATGCTGGTCACAGGATTCCTTATATGACTAGCCTCCTTCCTGAATCCCAATTTCATGGTGGTGGTCATGACAGGTGTCCTGTATCCCATGCTCATGTCCCTGAAGTCACCAGCCTATCTCCAGTTAGAAAAAATTACATGTATATAGAGAGGCCTCTTTGGAAGGAGCAAAAGCTTTCTCACCTTCGTACACTAATGGTTGGAAGGTACAACAGCATATGCACTTTGGGAAAAAATATCTGGCATATTCTTACAGAAACAAACAACTACCTATTCTATGACTCAGTAATTCCTAAGCATTTATCCAAGAGAAACTAAAACCTATGTCCAGAAAATGACTTATACAAGAATGTTCATAGCAGTTTTATTCATAATACAAAAAACTGGAAACATTCAAGTATCTGTCAATACAAGAATGGATCAATAAACTGTGATACACTCATTCCATGGAATGGCTAAAGGAACAAACTGGTGACACACAGAACAACATGGATGAATCTCAAAAACATTTGGAGTGCGATAGAAGCCATACCCAAAAAAGTGTGAGAAAAAAAGATAAATAATAATGGTTCCAAGAAATGCACAGCAGACAGCCCAGAGGCAAAGACCCACAGGACGGCGGGCCGGTCCCAGGCTGTCGATCCTAATTAAGAAACTTCTGCTGGATTTTGCCCAGCTCCATTTCCAAACTATTTTGGGTCAGTGACTTCTTTATCCCTTCCATGTTGCCTCATTTTGAACTAGAATCACTGTAAGTGTTATTCTATGTCTGTCACATCATTCCACAGTAGGGGCAGATAAGCTGTTTAGAATGGCTAAAATTCAAAAAGGTGAACACACCAAATGCTGTCAAAGATGAGGAGCAACCAGAACTTTCCATCGCTAGTGGAAATCAAAAGGGTACAGTCACTTTGGAAAACTTAAGTTCACTCAAAATCCTGCACAGAAGTACTTACAGCAATTTTATTCATCATCGCCAAAACTTGGAAGTGCCCAAGATGTCTTTCACCAAGCGAAAGAATAAACAAACTGTTGTAGCCATACAAGGAAATCTGATTCACTGATTTTAAAAAACAAGTTATCAAGCCATGAAAAGACATGAAGGAACTTAAAGTACATAATGCTAGAAAGAAGCCAGTCTGGAAACCCACATACTGTACCACTCCAACTCTAGGACATTCTTGGAAAGTCAAAAAGATAGAAGTAGTAAAATGGTGAGTGGTTGTCAGGGGTGGAGGAGAGGAGGACGCGTGAAATGGTGAAGCACAGGGAATTTTCAGCAGTGAAACTCTTTCGCATGATGCTGTATTGGGGATTTAGGACATTATGTAATTGCCAAAACCCATAATCTGTGAAACTCAAAGAATGACCTCTAATGTAAACTATGGACTTTAGTTGATAATGACGTATCAACAGTGGTTCATCAATTGTAATGAATGGACCACACTAATACAACATACTAGTAGGGAAAATTGTGTGCTGGAGGACAGGGGAGCCTAGGAGAACTCTCTGTATTATCCACTCAAGTTTTCTGTAAACCTAGAACTGTTCTAAAAAATAATGTCTATTAACTGTTTTTTTAATTAGGATGCAGCAGCCCCATATCAAGGTTTTGGTGGCATCCTGTAATTGTGTGGTTAGTACTTGGCATTGAAGTGCACCAACCTGGAGTCAGAGCAGTTGGAGATTTCAAGGCCTGTGCCATTTACCTCTAACCCTGGGGTGCCCCTGGAATACAGATAGCAGATCGGTTAAGGAGAAGCAGCCTCAGCAATCTAGACAGTGCAGGTTTCTGGTGAGGACAGGTAAAAACCATCTGGGTGGGCAGAACTTGGTGAAGACCAGAAACCACTGAGACTCAGCAGCTGCCGCAGTGGCACCCACAAATCAAAGGAGGGGGCTGGGAAGAGCTAAGGGCTACTGGATGAGCTCTCTGCCTGCAAGACAGAAGCAGATCCAGAGATTTTGGAAAATAATGTAGGTTTCAGTACAGTGTGATCTCTTCAAAAAAGTAGAGAGAATGAAAAGGAAAGAAAAAGAGAGAGCATGAGAGAGAAAGAAGAAGAAAAGAAGAAAGGAAGAAAGGAAAGAAGGGAGGGAAGGAGGGAAGGAGGAAGGAAGGGAGGGATGGAGGGCGGGTGGGAAGGAGGGAAAGAATAAAAAGAGAGAGAAAGAGAGTTGGAGGGAAGTAGGGAAGGAAGGAAGGAAGGAAGGAAATGAACAAATTTACATGAAGATGAGAACAGTGGGGAAACTTACACCACCAATATTTTCCATTAACAGGAACACGCTAAGTAGTTATTAGAGAAAGACACGCTACTGTAAAACAATATACTGTTTCCATGGGGTACAACAACCCCTTCCTCCTCCTCTGAAACACATTCTATCTCTGGCTCACTGTTGCCAGAGACACTGAGTCTTGTCTTTGGATACGTTCTGGTGCCCACAAGAATGAGATGAGACAGTGGATCCCAGAACACCAGGCCACGAACTTCCCTGTTGCTCCTTGTCCACTCCAGAAGCTACCCAGCTGCAGTTGGGGACCTCAGCCCCTGGGTCTGATGTCATCCATTTGCCTTTCTCAATGGACTTCTCTCCTTGCACTGGCTCCTACTCCCCCAGGACCTGTGGGTGACCACATGAGAAGAACACAAACAGGCCATGCCCCTTTCTTTCTCCCCCTCTCAATGCCTGCAGTAGTGGGTTCCATGGGGTAGTGACCTGAGATTTACTCATTGTGGGGCCTCTAGCCCAGAGCAGGGCCTACTACCTCACAGTCACCCCATGAATGCTCAGTGAAAGAAGACGTCCACCACAAGGTCCTGGGGAACCAAGAATTCCACTGTGGCCCATAAATTCTAAGTCTACAGGATTCTGGAATGGGAGATGGGAAAGGCCTTCAAAAGTGGCCACTTTTAACCCATTATACTGGCAACTGAGCCATGTTTCCCCATCCTGGACACATCCAGAGGGCACTGCCTAAAACCAGACACATCTCCCCACCCAGGACAGTGTAGGAGCCTTAGCCTGGGGGATGCAGGTGGACAGGGAGGGGGTGAGCCACCAAAGCTGAAGAGCAGAAAGCAGGTGAAAGGGGACAGCAGGGTGGAAACAGAGAGAAATGGGGGCAGAGAATGGGGGGTGAGAGGGGAAGAGTGAGGAGAGGGATGCAGATCTAGCTAGTAAGGAAAAGTCCTGGAGAGAACACTGTCCTCTCCTGAAGTAAAATCACTTCTACCTGACCACGGCACTGCAGCTCATGGGCAGCACATGCTGTGGATATTTGTTCATTCATTTAACAAATATTTATTTAATATCTGTTGCATGCCAAGCAAGGCCCTGCAATGTTTAGGGACCTTGACATCTTCCCTTCACATCTGAGTCATAATACAAAGAGGACTCTCTGACCCCACTGAGCTGGCAATGCCTCGGGATTTTTACCTGTTGGATCTGGCAGCTCTTGATGTCAGCCCACACCATGTGAGGCTGCTCTTGGTGCACCCAATGGGGAAGTTTCTACATCAGGGCCTCGGAGAATCCACTGGAAGCCCTGGACAGTGGGAGTCAGCGGCATCCCCAGTGTGGAGGCCAAGAGCACACAGTGCTTAAGCTCCAGGCACCCTCAGGAGGACGGCAAGGGACAATTGGCTGGTGAGAGCCCGGGTCACCGGGAACCTTCGCCTGGGTCTAAACAGGATTTGCCTTCAGATTGCCTGTGAGATAAAAGAGAGAAATCAAGGTTAACGTTGAGATTTAGGGCTTCGGTAACTTGAAGGATGGAGCTGCCATTTACGGAGACTGGGAAGACCCAGGGAAGAGCAGATTGAAAGGTGGTGGGAACTAGAGGTGGTTGGGTTTCTGTCATATGTAATCAACAGTCCTGACCAGCCTGGGCAACATAGTAAGACCCCGTCTGGGAAAAGAAAAAAGGAAAAATAAGCTGAGCATGGTGGTGCACACTTGTAGTCTCAGCTACTTGGGAGGCTGAGGCAGGAGGATTCCTTGAGCCTTCAGTTAGCGGTTAGTGAGCTATGATGGCACCACTGTACTCCAGCCTGGGGGGAAAAAAATAAAGAGTCCTGACTAAATACTAGAGTAGCCAGGGAAGTTTTCACAAAGTAAGTAATATTTGAGGCAGATCTTAGTGAACAAGAATTCCATTATTTCTGTTAGGGAATTAAGAGAGTGTGGGTGTCGTTAGTTAATGCTTATTAAAGTAGCTTTGGAATCTCATCTACTGGTCTAGCTGGTCTATCTGTACACGTATATTGTATATGCTGTCTCTCTGAGCTTTCGCTAGGTTATGCTACGGTAACAAAAGCCCCAAAATCTTAGCAGCTACACATACGAAGGTTTATTTTTCATTGACATGTCCTTTTATGGCAGGTTGACTGTGACTCTACTCTATACAAGCTACTTTATTTGTTAGATGGTGAAAACTGTGATACTCGGAGGTTGTTGAATATGGTATTAGTATGTTCATTCATTCATTCATTTAAGAAATATTTATTCAATATCTGTTTCATGCCAGGCAAGGTCAAGTACTGAGAATACACTGGTGAATCAAAGAGACAAAATCTCTAATTGCCAGGAGCTTATATTGAAAATCAGATTAAACACATACAAAATCATCATAATAACAACAATGAATACTATATTCATAAATAATAGCTGTAAGAGATTTTAGTACATCTTTTAAATTAGAAAAATATAAAAATTATTAAAACTAAAATGGCCAGGTGTGATGGCTCATGCCTGTGATCCCAACACTTTGGGATGCCAAGGTGGGAGGATCATTTCAGCCCAGGAGTTTGAAACCAGTCTGGGCACTACAGGAAAACCCTGTCTACAAAAAGGAGAAAATTAGCCGGGCACAGTGGTGCATGCCTGTAGACCCAGCTACTAAGGAGGCTGAGGTGGGAGGAGTGCTTGAGCCTGAGAGATCAAGGCTGCAGAGAGCCATGATCATACCACTGCACTCCAGCCTGGGCGACAGAGCGAGACACTGTCTCAAGAAAAAAAAAAAAATTATTTGATGTAGTCCTAAAACTATTATGTAGAATACTATTGTTTATATCACAGCACGTGAGCCCCTTAAATGGCTTAACACTTATTTAGGTATGATCCATAAAGCTTTTCTGGTAATTAAGTATACTTAAGAACAATTAAGTATAAAAGAGTTACTGCCTTGACAGGAAGATTGTAAAAATTTTAAAAAGACAAATAAATAAAAGAGTAAAAACTGTAGCTCTGTGAGGCTCAAATAACATCTAATTCAAGTCACAATGAACATCTAGCAATCATTCTGAACACCATATAATTCACTTAATACGTTTTGCCTGAACACCCAACACATCTGAATTACCAACACCCATATGTAGCCAAGAAACTGGCAATCATTTATAAATTATCACCTATGACTCCATCTGCTCTACGCACTTATTTTTTAAATTTTATTCATTTATTTATTATTTTTATTTGTTGTAGAGATGGGATCTCACTATGTTACCCAAGTTGGTCCAGAAACAGAAACAGACCCACACTAATTTCATAAATCAGATGACCATACAGTCATTCGATTTATGAAAAAAAGTGCCACATGGTGCGGAAGGAAAAGGATGGTCTTTTCAATAAATGGTGCTGGATCAAGCAGACACATCCATGTAGTAAAAAGTGAATCATAGCCAGGTGGGGTGGCTCACACCTGTAATTCCAGCACTCTGGGAGGCTGAAGCGGGCAGATTACTTGAGCCCAGGAGTTCGAGACCAACCTGGGAAACATGTTGAATCCCCATCTCTACAAAAAATATGAAAATTAGCCAGGCATGGTGGCACATGCCTATAGTCGCAGCTACTCAGGAGGCTGAGGTGGGAGGATCACTTGAGCCAGGAGATGGAGGTTGAGTGAGCTGAGATCCTGCCACCACACTCTAGCCTGGGCAATAATAGACTGAGGCCCTGTCTGAAAAAAAAAAAAAGCAAAAACTAAAATAAAATCGTTATAAGGTTAACACAGAAAAATGTGTTCATACTCTTAGGTTAGGCATTGATTTCTTAAACAGGACACAAAAAACAGTAACCATAAAGGAAAAGATTAATAAAGTATAATTTCATTAAAATGAAGAATCTCAGGCTGGGTGCAGTGGCTCATGCCTGTAATCCCAACCCTTTGGGAGGCCGAGGCAGGTGTATCACTTGAGCCTAGGAATTCCAGACCAGCCTATGCAACGTGGCAAAACCCATCTCTACTAAAAATACAGAAAACAGCTGAGTGTGGTGGTACTCCCCTGTAGGTCCCAGCTACTTGGGGGCTGAGGCAGGGGGATCACCTGAGCCTTGTGAGGTCAAGGTTGCAGTGAGCTGTGATTGTGCCACTGCACTCCAGCCTGGGCGATGGAGTGAGATCCTGTCTCAAAAAGAAAAAAAAAAAAAGAGAATCTCCCTTCATGAAAAAACACCATAAAAGAGTGAAAACGCAAGCTACAGATTGAAAAAAGGGAAATGCAATACATATAAATCCTAGAAAGGAGGCATATCCAGAATAAAGTATTACAAATCAACAGGAAAACAAGCATATCAATGAAAACTGGATAAAAAGATTTAACAGGCACGTCACAAAAGAGGACATATAAATGGCAATAAAAGATACTCAATCTCAATGAAACCACACTGATATATTACTGCACCCCTACTAGAATGGCAAAATAATTTTTAACTGACAGGTATCAGCGAAGATGTGGGGTAACCAGCATATCCCTGCTAAATGGTACAACTACTTTGGGAAAATGTTCAACAATATGTAATACTAAAGTTTTATCATTCATATACCTCTAAAACCAACAATGCCACCCCTACAAATATACCCCAGACTAGTAATGTTCAATTTCTTGATCTGTGGTGGTTCACTTGGTAAAAATTCATTACTTTTTTTTTTTTTTTTTTGAGACAGGGTCTCACTCTGCCATCCAGGTCGGAGTGCACTGCCATGATCACGGCTCACTGCAATCTCAACCTCCCGGGCTCTGGTGATCCTCCCAACTCAGCCTACCGGGTAGCTGGGACTACAGGCACACGCCACCACACACAGCTAACTTCTGTATTTTTAGTAGAGAAAGGGTTTTGCCACATTGCCCAGGCTGGTCTGGAAATCCTGGGCTCAAGTGATCTACCCACCTTGGCGTCCCAAAGTGCTGGGATTACAGGTGTGATCACTGCGCCCGGGCCACCTGCACATGTAAAATTGTGAACTTCTGTATACTTCAGTAACTTTTCCAAGATTTCTTTGACGCAAAGTTCTCAGAAATCTTAAAGCTAGCATTTCAGAATAGAAAAAGTAGCTTCTGGTTCACTAGTGAAATTTTACCAATAGAATTTAAAAACAAAAAGCTACTAACGCATATCAGCTCAGAACACTACCAGCAGATCTTTTCTTTAACTTCCTGAAGCACTGGGATTCATTCTTTTGGCAAAGAAAGGATGAACAACACTGTAACCCAAAGAAAAGATACCACTGCCAGAAAAGACTTCTTTTCGAAAGCAGCTCTAAGCAAAAGATAGGAGGAAAACAAGGAAGCCAGGCCAAACGTCTTGGTTAACTCTCCGCTGAAAGGACGCCACATGAGATGATCTAAGAAGCCAGCCAGCCAGCCAGACGCAGGGAAATCACAGCAACTCTTTGGAGTGCAAACAGCAACCCCACAATCCAATCTACCCGAAATCCTGCGGTTCATTTGAGGCTTGCCCCGCTAGTCAGGAGGTGATTCAGTGATGGCTACAAATGCTGCTCATGTGCATCCTGGAGCTGGCACACCTGGCTTGCCCATCACCAGCCTGGAGACACCGCCAGGAGCAGAAGCCCGGAGGCCAGTAAAGACCCCAACTTTGCAAGTCAGGGGCGCGAGCGCGCTCGCCTCTCAGGTCCGCAGAGGGAACGGATTTCTGGCCTGGAGGGTGGGGTGCGGGGTCAGTGTCCTCTACAGGATATAGGAGGACGTGCCCCCGAAGCTGCTCCGTCCCTCCACCCCCTGGGATGCCACAGAACACCCGCCAGCGAGTTTCTTCCCCAGCGCCCACGAGAGTTGGGCTGCGGGCGGCAGCGGCAGGCGAAGAATCCAGCGCGGGGAACTCAGGCCCCGGCGGTGCACGACCCCCCACAGCCCCCACCCGCCCCCGCGCTCGCGCAACAAAACTTGCCACGGCCGCGCCTCGACCCAGCTGTGCGCCCGCGGGTCCCGGATTCACCGCCCGCCCAGCCTGGCGCGGCGCCCTCACCTCAGAAACGCTGGGTGGACTTCGCGTAACTTCCCATTCACAGGGCAGCCGGCAGCCGCGCCGCCGCGCCTCGGCCCAGCTCCTGGCGCCGCAGATCGCCCGTCCCGCGTTCCCAAAAGCACCGCGCTCGCTCAGAAGCTCGGGCAGCCTCGCGACCCTCACCTACGCCTCCCAGTACCGCCGCTGTCTCAACCGCCACCCAGCCCCTCGCCTGCGCCTGCGCCTGCAGCCCACTGGCTCCTCAGGATCCCGATGGGCGTGTCAGGAT", - 16888922 - 1); - - //coding region between 34..528 - var codingRegion = new CodingRegion(16890438, 16918516, 1007, 4651, 3645); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 28, 16888922, 16890681, 4408, 6167), - new TranscriptRegion(TranscriptRegionType.Intron, 27, 16890682, 16891301, 4407, 4408), - new TranscriptRegion(TranscriptRegionType.Exon, 27, 16891302, 16891413, 4296, 4407), - new TranscriptRegion(TranscriptRegionType.Intron, 26, 16891414, 16892127, 4295, 4296), - new TranscriptRegion(TranscriptRegionType.Exon, 26, 16892128, 16892302, 4121, 4295), - new TranscriptRegion(TranscriptRegionType.Intron, 25, 16892303, 16893674, 3844, 4121), - new TranscriptRegion(TranscriptRegionType.Exon, 25, 16893675, 16893846, 3673, 3844), - new TranscriptRegion(TranscriptRegionType.Intron, 24, 16893847, 16894473, 3672, 3673), - new TranscriptRegion(TranscriptRegionType.Exon, 24, 16894474, 16894525, 3621, 3672), - new TranscriptRegion(TranscriptRegionType.Intron, 23, 16894526, 16895567, 3620, 3621), - new TranscriptRegion(TranscriptRegionType.Exon, 23, 16895568, 16895731, 3457, 3620), - new TranscriptRegion(TranscriptRegionType.Intron, 22, 16895732, 16899636, 3456, 3457), - new TranscriptRegion(TranscriptRegionType.Exon, 22, 16899637, 16899688, 3405, 3456), - new TranscriptRegion(TranscriptRegionType.Intron, 21, 16899689, 16900981, 3404, 3405), - new TranscriptRegion(TranscriptRegionType.Exon, 21, 16900982, 16901187, 3199, 3404), - new TranscriptRegion(TranscriptRegionType.Intron, 20, 16901188, 16901651, 3198, 3199), - new TranscriptRegion(TranscriptRegionType.Exon, 20, 16901652, 16901724, 3126, 3198), - new TranscriptRegion(TranscriptRegionType.Intron, 19, 16901725, 16902761, 3125, 3126), - new TranscriptRegion(TranscriptRegionType.Exon, 19, 16902762, 16902976, 2911, 3125), - new TranscriptRegion(TranscriptRegionType.Intron, 18, 16902977, 16903811, 2910, 2911), - new TranscriptRegion(TranscriptRegionType.Exon, 18, 16903812, 16903914, 2808, 2910), - new TranscriptRegion(TranscriptRegionType.Intron, 17, 16903915, 16905687, 2807, 2808), - new TranscriptRegion(TranscriptRegionType.Exon, 17, 16905688, 16905897, 2598, 2807), - new TranscriptRegion(TranscriptRegionType.Intron, 16, 16905898, 16907239, 2597, 2598), - new TranscriptRegion(TranscriptRegionType.Exon, 16, 16907240, 16907451, 2386, 2597), - new TranscriptRegion(TranscriptRegionType.Intron, 15, 16907452, 16907914, 2385, 2386), - new TranscriptRegion(TranscriptRegionType.Exon, 15, 16907915, 16907987, 2313, 2385), - new TranscriptRegion(TranscriptRegionType.Intron, 14, 16907988, 16909038, 2312, 2313), - new TranscriptRegion(TranscriptRegionType.Exon, 14, 16909039, 16909253, 2098, 2312), - new TranscriptRegion(TranscriptRegionType.Intron, 13, 16909254, 16910088, 2097, 2098), - new TranscriptRegion(TranscriptRegionType.Exon, 13, 16910089, 16910191, 1995, 2097), - new TranscriptRegion(TranscriptRegionType.Intron, 12, 16910192, 16911983, 1994, 1995), - new TranscriptRegion(TranscriptRegionType.Exon, 12, 16911984, 16912193, 1785, 1994), - new TranscriptRegion(TranscriptRegionType.Intron, 11, 16912194, 16913544, 1784, 1785), - new TranscriptRegion(TranscriptRegionType.Exon, 11, 16913545, 16913756, 1573, 1784), - new TranscriptRegion(TranscriptRegionType.Intron, 10, 16913757, 16914219, 1572, 1573), - new TranscriptRegion(TranscriptRegionType.Exon, 10, 16914220, 16914292, 1500, 1572), - new TranscriptRegion(TranscriptRegionType.Intron, 9, 16914293, 16915343, 1499, 1500), - new TranscriptRegion(TranscriptRegionType.Exon, 9, 16915344, 16915558, 1285, 1499), - new TranscriptRegion(TranscriptRegionType.Intron, 8, 16915559, 16916393, 1284, 1285), - new TranscriptRegion(TranscriptRegionType.Exon, 8, 16916394, 16916496, 1182, 1284), - new TranscriptRegion(TranscriptRegionType.Intron, 7, 16916497, 16918341, 1181, 1182), - new TranscriptRegion(TranscriptRegionType.Exon, 7, 16918342, 16918551, 972, 1181), - new TranscriptRegion(TranscriptRegionType.Intron, 6, 16918552, 16918653, 971, 972), - new TranscriptRegion(TranscriptRegionType.Exon, 6, 16918654, 16918808, 817, 971), - new TranscriptRegion(TranscriptRegionType.Intron, 5, 16918809, 16919935, 816, 817), - new TranscriptRegion(TranscriptRegionType.Exon, 5, 16919936, 16920062, 690, 816), - new TranscriptRegion(TranscriptRegionType.Intron, 4, 16920063, 16921086, 689, 690), - new TranscriptRegion(TranscriptRegionType.Exon, 4, 16921087, 16921156, 620, 689), - new TranscriptRegion(TranscriptRegionType.Intron, 3, 16921157, 16921425, 619, 620), - new TranscriptRegion(TranscriptRegionType.Exon, 3, 16921426, 16921504, 541, 619), - new TranscriptRegion(TranscriptRegionType.Intron, 2, 16921505, 16935002, 540, 541), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 16935003, 16935274, 269, 540), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 16935275, 16939832, 268, 269), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 16939833, 16940100, 1, 268) - }; - - var rnaEdits = new IRnaEdit[] - { - new RnaEdit(5892, 5891, "AAAAAAAAAAAAAAAA"), - new RnaEdit(5799, 5799, "T"), - new RnaEdit(5675, 5675, "G"), - new RnaEdit(5655, 5655, "G"), - new RnaEdit(5390, 5390, "G"), - new RnaEdit(5174, 5174, "G"), - new RnaEdit(5150, 5150, "C"), - new RnaEdit(4993, 4993, "A"), - new RnaEdit(4828, 4828, "G"), - new RnaEdit(4683, 4683, "G"), - new RnaEdit(4637, 4637, "G"), - new RnaEdit(4530, 4530, "A"), - new RnaEdit(3845, 3844, - "GAAATTGAAAAGTACCAAGAAGTGGAAGAAGACCAAGACCCATCATGCCCCAGGCTCAGCAGGGAGCTGCTGGATGAGAAAGAGCCTGAAGTCTTGCAGGACTCCCTGGATAGATGTTATTCGACTCCTTCAGGTTATCTTGAACTGCCTGACTTAGGCCAGCCCTACAGAAGTGCTGTTTACTCATTGGAGGAACAGTACCTTGGCTTGGCTCTTGACGTGGACAGAATTAAAAAGGACCAGGAAGAGGAAGAAGACCAAGGCCCACCATGCCCC"), - new RnaEdit(3769, 3769, "C"), - new RnaEdit(3554, 3554, "C"), - new RnaEdit(3207, 3207, "A"), - new RnaEdit(3140, 3140, "C"), - new RnaEdit(3136, 3136, "T"), - new RnaEdit(3107, 3107, "T"), - new RnaEdit(3103, 3103, "A"), - new RnaEdit(2993, 2993, "C"), - new RnaEdit(2944, 2944, "G"), - new RnaEdit(2840, 2840, "G"), - new RnaEdit(2810, 2810, "T"), - new RnaEdit(2706, 2707, "GC"), - new RnaEdit(2695, 2695, "T"), - new RnaEdit(2692, 2692, "G"), - new RnaEdit(2509, 2509, "A"), - new RnaEdit(2299, 2299, "A"), - new RnaEdit(2294, 2294, "G"), - new RnaEdit(2290, 2290, "C"), - new RnaEdit(2222, 2222, "G"), - new RnaEdit(2009, 2009, "G"), - new RnaEdit(1964, 1964, "G"), - new RnaEdit(1893, 1894, "GC"), - new RnaEdit(1882, 1882, "T"), - new RnaEdit(1879, 1879, "G"), - new RnaEdit(1696, 1696, "A"), - new RnaEdit(1652, 1652, "G"), - new RnaEdit(1486, 1486, "A"), - new RnaEdit(1481, 1481, "G"), - new RnaEdit(1477, 1477, "C"), - new RnaEdit(1409, 1409, "G"), - new RnaEdit(1405, 1405, "A"), - new RnaEdit(1318, 1318, "G"), - new RnaEdit(1021, 1021, "C"), - new RnaEdit(932, 932, "A") - }; - - const byte startExonPhase = 0; - const bool onReverseStrand = true; - var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, onReverseStrand, startExonPhase, - rnaEdits); - - var expectedCodingSeq = - "ATGGTGGTATCAGCCGGCCCTTGGTCCAGCGAGAAGGCAGAGACGAACATTTTAGAAATCAACGAGAAATTGCGCCCCCAGCTGGCAGAGAACAAACAGCAGTTCAGAAACCTCAAAGAGAAATGTTTTGTAACTCAACTGGCCGGCTTCCTGGCCAACCGACAGAAGAAATACAAGTATGAAGAGTGTAAAGACCTCATAAAATTTATGCTGAGGAATGAGCGACAGTTCAAGGAGGAGAAGCTTGCAGAGCAGCTCAAGCAAGCTGAGGAGCTCAGGCAATATAAAGTCCTGGTTCACTCTCAGGAACGGGAGCTGACCCAGTTAAGGGAGAAGTTACGGGAAGGGAGAGATGCCTCCCGCTCATTGAATCAGCATCTCCAGGCCCTCCTCACTCCAGATGAGCCAGACAAGTCCCAGGGGCAGGACCTCCAAGAACAGCTGGCTGAGGGGTGTAGACTGGCACAGCACCTTGTCCAAAAGCTCAGCCCAGAAAATGACGAAGATGAGGATGAAGATGTTCAAGTTGAGGAGGCTGAGAAAGTACTGGAATCATCTGCCCCCAGGGAGGTGCAGAAGGCTGAAGAAAGCAAAGTCCCTGAGGACTCACTGGAGGAATGTGCCATCACTTGTTCAAATAGCCACGGCCCTTGTGACTCCAACCAGCCTCACAAGAACATCAACATCACATTTGAGGAAGACAAAGTCAACTCAACTCTGGTTGTAGACAGAGAATCCTCTCATGATGAATGTCAGGATGCTGTAAACATTCTCCCAGTCCCTGGCCCCACCTCTTCTGCCACAAACGTCAGCATGGTGGTATCAGCCGGCCCTTTGTCCAGCGAGAAGGCAGAGATGAACATTCTAGAAATGAATGAGAAATTGCGCCCCCAGCTGGCAGAGAAGAAACAGCAGTTCAGAAACCTCAAAGAGAAATGTTTTGTAACTCAACTGGCCGGCTTCCTGGCCAACCAGCAGAACAAATACAAATATGAAGAGTGCGAAGACCTCATAAAATCTATGCTGAGGAATGAGCGACAGTTCAAGGAGGAGAAGCTTGCAGAGCAGCTCAAGCAAGCTGAGGAGCTCAGGCAATATAAAGTCCTGGTTCACTCTCAGGAACGAGAGCTGACCCAGTTAAGGGAGAAGTTACGGGAAGGGAGAGATGCCTCCCGCTCATTGAATCAGCATCTCCAGGCCCTCCTCACTCCGGATGAGCCAGACAAGTCCCAGGGGCAGGACCTCCAAGAACAGCTGGCTGAGGGGTGTAGACTGGCACAGCACCTTGTCCAAAAGCTCAGCCCAGAAAATGACGAAGATGAGGATGAAGATGTTCAAGTTGAGGAGGCTGAGAAAGTACTGGAATCATCTGCCCCCAGGGAGGTGCAGAAGGCTGAAGAAAGCAAAGTCCCTGAGGACTCACTGGAGGAATGTGCCATCACTTGTTCAAATAGCCACGGCCCTTGTGACTCCAACCAGCCTCACAAGAACATCAACATCACATTTGAGGAAGACAAAGTCAACTCAGCTCTGGTTGTAGACAGAGAATCCTCTCATGATGAATGTCAGGATGCTGTAAACATTCTCCCAGTCCCTGGCCCCACCTCTTCTGCCACAAACGTCAGCATGGTGGTATCAGCCGGCCCTTTGTCCAGCGAGAAGGCAGAGATGAACATTCTAGAAATGAATGAGAAATTGCGCCCCCAGCTGGCAGAGAAGAAACAGCAGTTCAGAAACCTCAAAGAGAAATGTTTTGTAACTCAACTGGCCTGCTTCCTGGCCAACCAGCAGAACAAATACAAATATGAAGAGTGCAAAGACCTCATAAAATCTGTGCTGAGGAATGAGCGACAGTTCAAGGAGGAGAAGCTTGCAGAGCAGCTCAAGCAAGCTGAGGAGCTCAGGCAATATAAAGTCCTGGTTCACTCTCAGGAACGGGAGCTGACCCAGTTAAGGGAGAAGTTACGGGAAGGGAGAGATGCCTCCCGCTCATTGAATCAGCATCTCCAGGCCCTCCTCACTCCGGATGAGCCAGACAAGTCCCAGGGGCAGGACCTCCAAGAACAGCTGGCTGAGGGGTGTAGACTGGCACAGCAACTTTTCCAAAAGCTCAGCCCAGAAAATGACAATGATCACGATGAAGATGTTCAAGTTGAGGTGGCTGAGAAAGTGCAGAAATCGTCTGCCCCCAGGGAGATGCAGAAGGCTGAAGAAAAGGAAGTCCCTGAGGACTCACTGGAGGAATGTGCCATCACTTGTTCAAATAGCCATGGCCCTTATGACTCCAACCAGCCACATAGGAAAACCAAAATCACATTTGAGGAAGACAAAGTCGACTCAACTCTCATTGGCTCATCCTCTCATGTTGAATGGGAGGATGCTGTACACATTATCCCAGAAAATGAAAGTGATGATGAGGAAGAGGAAGAAAAAGGGCCAGTGTCTCCCAGGAATCTGCAGGAGTCTGAAGAGGAGGAAGTCCCCCAGGAGTCCTGGGATGAAGGTTATTCGACTCTCTCAATTCCTCCTGAAATGTTGGCCTCGTACCAGTCTTACAGCGGCACATTTCACTCATTAGAGGAACAGCAAGTCTGCATGGCTGTTGACATAGGCGGACATCGGTGGGATCAAGTGAAAAAGGAGGACCAAGAGGCAACAGGTCCCAGGCTCAGCAGGGAGCTGCTGGATGAGAAAGGGCCTGAAGTCTTGCAGGACTCACTGGATAGATGTTATTCAACTCCTTCAGGTTATCTTGAACTGACCGACTCATGCCAGCCCTACAGAAGTGCCTTTTACATATTGGAGCAACAGCGTGTTGGCTGGGCTCTTGACATGGATGAAATTGAAAAGTACCAAGAAGTGGAAGAAGACCAAGACCCATCATGCCCCAGGCTCAGCAGGGAGCTGCTGGATGAGAAAGAGCCTGAAGTCTTGCAGGACTCCCTGGATAGATGTTATTCGACTCCTTCAGGTTATCTTGAACTGCCTGACTTAGGCCAGCCCTACAGAAGTGCTGTTTACTCATTGGAGGAACAGTACCTTGGCTTGGCTCTTGACGTGGACAGAATTAAAAAGGACCAGGAAGAGGAAGAAGACCAAGGCCCACCATGCCCCAGGCTCAGCAGGGAGCTGCTGGAGGCAGTAGAGCCTGAAGTCTTGCAGGACTCACTGGATAGATGTTATTCAACTCCTTCCAGTTGTCTTGAACAGCCTGACTCCTGCCTGCCCTATGGAAGTTCCTTTTATGCATTGGAGGAAAAACATGTTGGCTTTTCTCTTGACGTGGGAGAAATTGAAAAGAAGGGGAAGGGGAAGAAAAGAAGGGGAAGAAGATCAACGAAGAAAAGAAGGAGAAGGGGAAGAAAAGAAGGGGAAGAAGATCAAAACCCACCATGCCCCAGGCTCAGCGGCATGCTGATGGAAGTGGAAGAGCCTGAAGTCTTGCAGGACTCACTGGATAGATGTTATTCGACTCCGTCAATGTACTTTGAACTACCTGACTCATTCCAGCACTACAGAAGTGTGTTTTACTCATTTGAGGAACAGCACATCAGCTTCGCCCTTGACGTGGACAATAGGTTTCTTACTTTGATGGGAACAAGTCTCCACCTGGTCTTCCAGATGGGAGTCATATTCCCACAGTAA"; - - Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); - } - - [Fact] - public void RnaEdits_big_test() - { - //NM_005960.1, chrom: chr7:100547052-100611619 - var genomicSeq = new SimpleSequence( - "GCGCTGACGTCTGTCTGTCCGGGTGCAGGGAGAAGGGAGGAAGAGGGGAGAGGTGGGGCGGTGCAAAGGTGAGGCTGTGCTCAGCCCTGACGCTCAGCAAAACCGATAACCAGCACTTTCATTACGTGCACGCCCCAGGGCCACGTCCCTGCCGCTGTCTTGGTCCTGAAGCCTGTTCTGCCCCAGCCCCCTGCCCGCTGGGCCCATGCAGCTGTTGGGGCTCCTCGGCCTCCTCTGGATGCTCAAGGCCTCCCCGTGGGCCACAGGTAAGGGGGAGAGGCGGAAGGGGGTTGGAGAAAAGCTCCTGATGTGATGTTCCAGGAAAGGGGAGGGAAAAGTGGCTGTAAGGCCTGGGGAGGGGGGATAAGAAGGCACCGCTTGGGGCTCTGGGTGCAGGGAGAACCGAGGCACGGCCTGACTGGGGGAGGGGGCGATGAGGAGAGGTTTCTTCCAGAGCTCCAGGTGCAGGGAAAACCCCGAGGTTGGGAAAGAGTGAGGGAGCTGGGTCTCTGCCACTCTCCACCAAGCACTGAGCAGGTTGCAGCGGCTGAGCCCCAATCTGTATCTGCAGCTGGAGGGTAGAGGGTGGGATTTACGTCTTCCCAGAGCAGTGCCCTTCCTGTCTTGACTCCTTCTGTCACCTGCCTCATGCCCCCAGCTTGAGTGTCCCCTTCACACTGGCCTCTCCCTCCCTGACAGCCCTCTAACTTCTACCCCTGGTCTCGGTCCTCTGGTTTCAGCCTCTCTGCCTTTTGTCCCCCGGCGGCTCCTCCCCAGCTCTGCCGTCACTCTCTTACCCCGGCCAGGGCCCATGTGTCTGGGTACAGCTGTTGGTACCAGGGCCGGGACAGGGAGCTCCTGATGTCCACTTTGCTTCCAAGGGTGCGTCTGAGACTACCCGTCGTGGGGTCCCAGGCTTCTCTCTTCTGCCCTGCAGGAACACCTCGCAATTCCTCTATACTTCTCTTTTCTCTGTACTTCAGTGTCTGCTTCTGATCCCCGATCCCAGGCCACCCAGCCTACAGGCCCATGAGTCCCCTTCTCAGTCACCTCCAGGGCCACATCCTGGAGCCAAGGGCTGTAGCCTGGGGATTCTCATAATCCCTGACCCCACTTCCCTGGCACCCACGAGCTAGGTTGAGACGTGACACCCCAGCTCTCAGCCACAAGATGGGCTGTGCCCGAGGTGAGGGGTAGCAGATCGGGTACTTCCCACTTCCCGTCTGCTGTGGCTGCCTGTCTTCCTTGTCCCTGACACCCCCGACAGCCGGATCTCTGATCCTAACTCTGACAAATTGTGAAATGGGTTGAAATCCACATGCTGGGGTTCATGCTTGTAAACTAATGAATCCCACGGCCAAAAGGGAATAGTATAGAAAAATATGTCTATTTGTGTGATGAACACTCACTGCTAAGCCTTAAGGTCTCCAGAACTCATCACGCCTGACTGCTGAGGTAGCTCCTTCTGGAAGTTTCCTTTTCTATGCTGTCTCTCTGCCTCTTCACCTAGTCCTCACTCCATGCCCTTTGAAGTCATTAGTGTGTGAGCTTAGCCTGTTTCTCTTTGGTGTTCTCTGGGCAGTTTTTTGTTTTTGTTTTTGTTTTTGTTTTTGTTTTTGATGGAGTCTTGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACCATCTCAGCTCACTGCAGCCTCTGCTTCCTGGGTTCAAGCGATTCTCCTGCCTCAGCTACAGGTACACGCCACTACGCCTGGCTAATTTTTGTATTTTGTTTTAGTAGAGAAAGGGTTTCACCATGTTGGTCAGACTGGTCTCGAACTCCTGACCTCAAGTGATCTCCTGCCTTGGCCTTTCAAAGTGCTGGGATTACAGGCATGAGCCACCGTGCCTGGCCCTCTTTGTTCAGCTTTCCCTGTCTCCTGGTTTTTGTGATGCGCCCCCTGCCAGGACATGGCTGGGTTCTCTCTTTTTCTCTTTGAAAGCGGAGTCAGCCCAGACAGCAGCAGGGTGCCGGGAGAAGCAGTGGTGGGAGCAGAAGGTTAATGGGGGGATGGGAGCACTCCAGGCAGTAGCAGGGGGAGGAGAGGAAGGGGCAGAGGGAGGAGGAGCCTGGGTGTGACCAGAGGAGGGAAGGGAGGGGAGGGAGGCTCTGCCCAGCTCGGCTATATCAGGACAGGAGGACCTGCCATGACAAGGCCAAGGCCCGGTGAGGAATGAGGGCTCCCATGCCCCACTGCTCCCCTAGGAGCAGACAGGCAGTCGTCTCCAGCACCACAAAGCACCCAGCTCCAAGCTGCCTCTGATGCAGGAGTCAGCTGTAATATGCCCTGCCCTCTGTGATGCTGCCTGGAAAATGGGTGAGTGAGTAGCTTACATGAGTGATGTAACAAAATGACCCACGGATTTACCAGTGGATTCCTCTGCTCTGCCGCCAATGCAGGAACTTTATCCACGGCCACATCCATCTCTCAAGTGCCTTTCCCCAGAGCAGAAGCAGCCAGCGCTGTGCTCAGCAATTCTCCACACTCCAGAGACCTGGCTGGGTGGCCACTTGGTGTCCCCCAGCTCGCCTCTCCTGCTCCTGGCCACAGGGAAAATGCACCTATGACACTCACTACCTCCCCCCATGACACACTCATCTCTGAAACATTGCTCAACTCTCCAGTCAGTTCCAACACCTCAACCACCCCGACGTCCAAGTTTGCCTTCAAGGTTGAAACCACTCCACCCACCGTGTTGGTCTATTCAGCCACCACTGAGTGCGTGTATCCAACGAGCTTTATAATCACCATCTCCCACCCCACCTCCATCTGTGTGACCACGACGCAGGTGGCCTTCACCAGCTCTTACACCTCGACTCCCGTGACACAGAAGCCAGTGACCACCGTCACCAGTACTTACTCTATGACCACTACTGAGAAAGGAACGTCAGCCATGACATCTTCTCCCTCTACCACCACTGCAAGGGAAACTCCCATAGTGACAGTGACACCCTCCTCTGTGTCAGCCACAGACACAACCTTCCACACTACAATCTCATCTACAACTAGAACCACAGAAAGGACTCCCCTGCCCACTGGAAGCATCCATACAACCACGTCCCCAACCCCAGTATTTACTACTCTCAAAACAGCAGTGACTTCCACTTCCCCCATCACTTCTTCAATCACTTCCACAAATACAGTGACTTCTATGACAACGACCGCCTCCCAGCCCACAGCCACTAATACATTGTCATCACCCACTAGGACCATTTTATCTTCCACACCTGTCCTGAGCACAGAAACAATCACCAGTGGTATCACAAACACCACCCCCCTATCCACCTTGGTGACCACACTCCCCACTACCATCAGCAGGTCTACACCTACATCTGAGACCACCTACCCTACTTCTCCCACCAGCACTGTCACAGACTCCACTACCAAAATCGCCTACTCCACAAGTATGACAGGTACATTGTCCACAGAGACTTCTCTCCCACCCACCTCTTCCTCTCTCCCAACCACAGAAACAGCCACGACTCCTATGACAAACTTGGTAACCACCACCACTGAGATCTCCTCCCACAGTACTCCCAGCTTCTCTTCATCAACCATCTACTCCACAGTCACCTCACACAGTACTCCCAGATTCACTTCTTCAATCACCACTACCGAGACCCCCTCACACAGTACTCCCAGATTCACTTCTTCATTCACCAATACCAAGACCACCTCACACAGATCTCCCAGCTTCACTTCTTTGATCACCACCACGGAGACCACCTCACACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAGTGCTCGCAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAATACTCGGAGCTTCACTTCTTCGATCACCACCACCGAGACCAACTCTCACAGTACTACCAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAGTACTCCCAGCTTCAGTTCTTCAATCACCACCACTGAGACCCCCTTACACAGTACTCCTGGCCTCACTTCGTGGGTCACCACCACCAAGACCACCTCACACATTACTCCTGGCCTCACTTCTTCAATCACCACCACTGAGACTACCTCACACAGTACTCCCGGCTTCACTTCTTCAATCACCACCACTGAGACCACCTCAGAGAGTACTCCCAGCCTCAGTTCTTCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCACCTCACATTTTACTACCTCAGAGACTGCGGTGACTCCCACACCTGTAACCCCATCTTCTCTGAGTACAGACATCCCGACCACAAGCCTACGAACTCTCACCCCTTCGTCTGTGGGCACCAGCACTTCATTGACTACAACCACAGACTTTCCCTCTATACCCACTGATATCAGTACCTTACCAACTCGAACACACATCATTTCATCTTCTCCCTCCATCCAAAGTACAGAAACCTCATCCCTTGTGGGCACCACCTCTCCCACCATGTCCACTGTGAGAATGACCCTCAGAATTACTGAGAACACCCCAATCAGTTCCTTTAGCACAAGTATTGTTGTTATACCTGAAACCCCAACACAGACCCCTCCTGTACTGACGTCAGCCACTGGGACCCAAACATCTCCTGCACCTACTACTGTCACCTTTGGAAGTACGGATTCCTCCACGTCCACTCTTCATACTCTTACTCCATCAACAGCCTTGAGCACGATCGTGTCAACATCACAGGTTCCTATTCCTAGCACACATTCCTCCACCCTTCAAACAACTCCTTCTACTCCCTCATTGCAAACTTCACTCACATCTACAAGTGAGTTCACTACAGAATCTTTCACTAGGGGAAGTACGTCTACAAATGCAATCTTGACTTCTTTTAGTACCATCATCTGGTCCTCAACACCCACTATTATCATGTCCTCTTCTCCATCTTCTGCCAGCATAACTCCAGTGTTCTCCACTACCATTCATTCTGTTCCTTCTTCACCATACATTTTCAGTACAGAAAATGTGGGCTCCGCTTCTATCACAGGCTTTCCTAGTCTCTCTTCCTCTGCAACTACCAGCACTTCTTCAACCAGCTCCTCTCTGACCACAGCTCTCACTGAAATAACCCCCTTTTCTTATATTTCCCTTCCCTCCACCACACCCTGTCCAGGAACTATAACAATTACCATAGTCCCTGCCTCTCCCACTGATCCATGTGTTGAAATGGATCCCAGCACTGAAGCTACTTCTCCTCCCACCACCCCATTAACAGTCTTTCCCTTTACTACCGAAATGGTCACCTGTCCTACCTCCATCAGTATCCAAACTACTCTTACTACATATATGGACACTTCTTCCATGATGCCAGAAAGTGAGTCCAGCATCTCACCCAATGCTTCCAGTTCCACTGGCACTGGGACTGTACCCACAAACACAGTTTTCACAAGTACTCGACTGCCCACCAGTGAGACCTGGCTGAGCAACAGTTCTGTGATCCCCCTACCTCTTCCTGGCGTCTCTACCATCCCGCTCACCATGAAACCAAGCAGTAGCCTCCCGACCATCCTGAGGACTTCAAGCAAGTCAACACACCCCTCCCCACCCACCACTAGGACTTCAGAGACACCAGTGGCCACTACCCAGACTCCTACCACCCTTACATCACGCAGGACAACTCGCATCACTTCTCAGATGACCACACAGTCCACGTTGACCACCACTGCAGGTTGGACCTTCTGCCTCTCTGTTCCCCTCCTTCCTCCCCTGCAAAATTCCTGTGTCACTGAGGTCAGGCTTTATCCTGAGCTTCCCTTTCTTTCTGTGTTTTCCAGGCACCTGTGACAATGGTGGCACCTGGGAACAGGGACAGTGTGCTTGCCTTCCGGGGTTTTCTGGGGACCGCTGTCAGCTCCAGACCAGATGCCAGAATGGGGGTCAGTGGGATGGCCTCAAATGCCAGTGCCCCAGCACCTTCTATGGTTCCAGTTGTGAGTTTGCTGTGGAACAGGTGGATCTAGGTGAGTTGCCAGAGCTATGCCTTCTGCACTTCCTCCCACAGGGTGTCACTGACTCTCCCCAGACTTATCCCTCTGTGGGGCCTGGAGGCACCCATGCCTTTTTGCCCGGTCCTTCCCTCCCTGCCATCTCTCCCATGCCCTCCGCTGCCCTGTGTCATGCTCCTCTCCGTCCTCACCCTTAGGAGGTGGCTGGGACTACCCTCCCTCCTGGGCCCATCTCCTGACTTGGGCTGCTTGGAGCTGTATCAGTTTCCAACTGCTGCCGGGCCAACAAACACAAATCTGGCTGCTGGAACAACACGACATTATCATGTTAGAATTCTGTAGATTAGAAGTCTGATGTGGGTGTCACTGGGCTGAAATCAAGGCGTCACCAGGGCTGTGTTGTCTTTCAGCGGCTCCAGGGAAGAATCCATTTTTTTGCCCTTTGCAGCTTCTGGAGCCTCCCACAGCAAGGCTGCATCTCTCTGTGTCTTTCTCCCATAGCCTCATCTCCCTCTAATGAACTCTGGCCTCCTCAATTGCTTCTCCCACTGTTAAGGACCCTTGTGATAACTTTGCCTCCTCCCCAAATAGTCTATGTTAATTTTCTCAAGATCAGCTGATTACGCCGGGCGGGGTGGCTCACACCTGTAATCCCAGCAGTTTGGGAGGCTGAGGGGGTAGGATCACCTGAGGTCAGGAGTTGGAGACTAGCCTGGCCAACATGGTGAAACTGTCTCTACTATTAGTTGGGCATGGTGGCAGATGCCTATAATCCTCACTATTCAGGAGGCTGAGGCAGGAAAATCGCTTGAATCCAGGAGGTGGATATTGCAGTAAAGCGAGATTTCGCCACTGCACACTAGCGTAGATGACAGAGGGAGAGTGAGACTCTGTCTTAAAAGAAAAAAAAAAATCAGCTGATTGTCTTATAATCCCTGCACTTTGGAAGGCCGAGGAGGGAGTATCGCTTGAGGCGAGGAGTTCAGGACCAGCCTGGGCAACACAGCGAGACCCTCATCTCCACAAAAAATTTTAAAAACTTACCTGGGCATGGTGGCTCATGCCTGTGGTCCCAGTTATTTGGAAGGCTGAGGTGGGAGAATCACTTGAGCCTGGGAGTTCAAGGCTGCAGTGAGCTATGATCCCCCTATTGCACTTCAGCCTGGATGACAGAATGAGACCCTATCTCAACAATAAAAAAAAGTTAGGCTGATTAGCAATCTAATTCAATCTGCACCCTTGATCCTCCCTTGCCATGTAGTATAGCATAGTCACAGTTCTGGGGATTAGGACATGGACATCTTCCCACTATGGGGGCAGCCAGGAGGGACCACAGGCTGACCGCTATCTTTCTGCCTGCTTTCACTCATCTCCACACAATTCCTTCCTTCCTCTCGCTCTCTTCTTTCTTTTCTTCTTTTCTTTTGTCTCTTTCTCTTTTCTTTTTTTCTTTCTCTTTTTCTTTCTTTCTTTCTCTTTCTTTTCTTTCCTTTCTTTTCTTCTTTCCCTCATCTTCACACAATGCTTTCTCCCTTTCTTTCTTTCCTTCTTTCTTTCTCTTTCTTTCTTTCTCTCTCTCTCTCTCTCTCTCTTTCTTTCTTTCTTTCTTTCTTTTTCTCTCTCTCTCTCTATCTTTCTTTTTCATTTTCTTGAGAGACAGTCTCACTCTGTGGCCCAGGCTGGAGTGCAGTGATGCAATCTCGGCTCACTACAACCTCCGCCTCCTGAGTTCAAGTGATTCTCCTGCCTCAGCCTCCTGAGGCATAATGCATCCTGGAATTCCTACATCTGTTCCTGCCTCTGGGCATCAGTCCTCAGGGATCTTGGAGGGGAGCAGCAGGAGGAGCCTGTGGGTTGGGGTGGTGGTGTTGGTGGCTTCAGACAAAAGCAGACAGAGAAGTGACTGGGGACATGCATGCTCTGTGTAGATGTAGTGGAGACCGAGGTGGGCATGGAAGTGTCTGTGGATCAGCAGTTCTCGCCGGACCTCAATGACAACACTTCCCAGGCCTACAGGGATTTCAACAAGACCTTCTGGAATCAGGTAAAGGGCAAAGAGAGGGGATTTTTTTTTTTTTTGAGGTGTAGTCTCGCACTCTCACCCTGGCTGGAGTGCAATGGTGCGATCTTGGGTCACTGCAACCTCCGCCTCCCGGGTTCACATGATTCTCTTGCCTCAGCCTCCCAAGTAGCTGGGATTACAGGTGCACACCATGACACCTGGCTAATGTTTTGTATTTTTAGTAGAGACGGGTTTTCACTATGTTGGCCAGACTGGTCTCGAACTACTGACTTTGTGATCCACCTGCCTCGTCCTCCCAAAGTGTTGGGATTACAGGCGTGAGCCACGGCACCCGGCCGGGGAGGGGAATTGAAGGGTCTTCCCTGGAGCTGGGGTTGGGCGTCTGGGTCCCCTCAGGTCTGCAGGTTCGGACGTGAGCCCAGGGATCCTTGGTGTTTCAGATGCAGAAGATTTTTGCAGACATGCAGGGCTTCACCTTCAAGGGTGTGGAGATCCTGTCCCTGAGGTAGGAGACCCATCTGGGGATGCGGAGGCGGTGTTGGGTGGGGGAAATGTGCGCACACAAAAAACCCATTCCTTTCTTTTGTAATCATCAGATTTTATAAAGAGGGGTGGAGGGGGTACATAAGGAATCACTCCCTGGGTATTTTTTCGGATCGTTTTCTGGGGCCATTTATCTGGAGGAGGGGTGGCACCTCTCTTCTTCAGCACACTGGAAGGAGAGAAGTTGCAGGGACATGTGGGAAGGTGGTGCCTGGATTGATGACTTTGTCCCCCTCTGGCTGGCCCCTGCTCTACTGAGTGGGTCAGCATTAGAAAGAGAGAGAGAGAAAGAGAGAGAGGGAGAGAACGCACGTCTAGGGGCGCCCGGTGGATGATGGCTTGATGCAACAAGAAGAGAACGTCAGGCCAGATGTGGTGGCTCACACTTGCAATCCTAGCACTTTGGGTAGGCTAGGCGGGTGGATCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGAATTCCTACATCTGTTCCTGCCTCTGGGCATCAGTCCTCAGGGATCTTGGAGGGGAGCAGCAGGAGGAGCCTGTGGGTGGTGGTGGTGGTGTCGGTGGCTTCAGACAAAAGCAGACAGAGAAGTGACTGGGGACATACATGCTCTGTGCAGATGTAGTGGAGACCGAGGTGGGCATGGAAGTGTCTGTGGATCAGCAGTTCTCGCCGGACCTCAATGACAACACTTCCCAGGCCTACAGGGATTTCAACAAGACCTTCTGGAATCAGGTAAAGGGCAAAGAGAGGGGATTTTTTTTTTTTTTTTTTTTTGAGATGGAGTCTCGCACTGTCATCCTGGCTGGAGTGCAATGGCGTGATCTTGGCTCACTGCAACCTCCGCTTCCCAGGTTCACATGATTCTCTTGCCTCAGCCTCCCGAGCAGCTGAGATTACAGGTGCACACCACCACACCTGGCTAATGTTTTGTATTTTTAGTAGAGACGGGGTTTCACTATGTTGGCCAGACTGGTCTCGAACTACTGACTTTGTGATCCGCCTGCCTCGTCCTCCCAAAGTGTTGGGATTACAGGCGTGAGCCACGGCACCCGGCCGGGGAGGGGAATTGAAGGGTCTTCCCTGGAGCTGGGGTTGGGCATCTGGGTCCCCTCAGGTCTGCAGGTTCGGACGTGAGCCCAGGGATCCTTGGTGTTTCAGATGCAGAAGATTTTTGCAGACATGCAGGGCTTCACCTTCAAGGGTGTGGAGATCCTGTCCCTGAGGTAGGAGACCCATCTGGGGATGTGGAGGCAGTGTTGGGTGGGGGAAATGTGCGCACACAAAAAACCCATTCCTTTCTTTTGTAATCATCAGATTTTATAAAGAGAGGGGTGGAGGAGGTACATAAGGAATCGCTCCCTGGGTATTTTTTTGGATCGTTTTCTGGGGCCATTTATCTGGAGGAGGGGTGGCACCTCCCTTCTTCAGCACACTGGAAAGAGAGAAGTTGCAGGGACATGTGGGAAGGTGGTGCCTGGATTGGTGACTTCGTCCCCCTCTGGCTGGCCCCTGCTCTACTGAGTGGGTCAGCATTAGAGAGAGAGAGAGAGAAAGAGAGAGAGGGAGAGAACGCGCGTCCAGGGGCGCCCGGTGGATGATGGCTTGATGCAACGAGAACGTCAGGCCAGATGTGGTGGCTCACACTTGCAATCCTAGCACTTTGGCAAGGTTAGGTGGGTGGATCACCTGAAACCAGTTCAAGACCAGCCTGGGCAACATAGTGAGAACCCATCCCTACAACAATAAAAATAGTAATAATAATAATAATAATAATAAAATGATTATCCAGGCGTGGTAGTGCACACCTGTAGTCCCAGATACTTGGGAGGCTGAGGAGAAAGGATCACTTTAGCCCAGGAGTTGGAGGCTGCAGTGAGCTACAATGATACCACTGCACTCCAGCCTGGGTGACAGCAAGACTTTGTCTCTATAAAACACACAGAGAGAGGAAGTCAATCATGTCAGTCATTCCTTGTCCTGCCTTCCCAGGCAGACCAAGTCAGGAATGCTGGCAGCCCCTTCTGAAAAGGATGCACGTGGCATCCCAACTCATGACCTCTGCCCTCTTTCCCCCTTCTGGTGCACTTTGGGTTGCTTCTGGAGGTGCCCCTCCAAGGACCCATATGTTCCTGGCTGGGGCACTCTCTAAGGCTGTGGACCCCTCAGGAATGGCAGCATCGTGGTGGACTACCTGGTCCTGCTGGAGATGCCCTTCAGCCCCCAGCTGGAGAGCGAGTATGAGCAGGTGAAGACCACGCTGAAGGAGGGGCTCCAGAACGCCAGCCAGGATGCGAACAGCTGCCAGGACTCCCAGAGTGAGCCCGGGCTGGAGGGAGGGGCCAGGGCCTGAGGTGTCACCCCAGCCCACTCCAGCTCAGCCAGGGGGCCACTGGACTCAGGTGCCAGCCCTGTGGTACCTCTGGCAGGTTGGGAGAACGGGAATAAGTCTACACACAATGCCATCAAGAGTGGGGCTAGGGAGGGTCTCCCCAGGACCTGGGTACTGGGGAAGAGACCCCCTGATCGTCAGGCTCAGCATTTCCCGATGGCTGAAGACCTCGGATTATTCAGGGGGGATAAGGGAGAGAACAGGAGTCTTCCCCTGTGGCCCCTCCACACTCCCCCAGACGGAGAGAGCCCTCACTGCCCTCCCTGTGCCTATCCTGCTTCCTGGCCCTAACCCCTTGACCTGCCCCGCCCATTCCATCTGTGCCTGTGTTTCCGCAGCCCTGTGTTTTAAGCCTGACTCCATCAAGGTGAACAACAACAGCAAGACAGAGCTGACCCCGGAAGGTGAGGGTGGGGTAAAGGGCTGAGTGGTCTCCCATGGCCATGACCCCTGCCACCAGGGACATTTGCCCATTGAAGCCTGTGGGCAGGGAGAGACCTTTGCGGGAGGCAAGTCATGTGGCCTAGGGAGGCTCTTCCTGGCGTTGGTTAGTGGCTTCCACCTGAGGACAGCAGGGGCCACGAGGAGAGGGTGAGGGTGCTGGGGGTGGCCTCCCCTCATCGAATCCCAGGGTCTACCCCACAGCATCCCACCTCGGAAATGGAATCCTCCTCGCGCATATTCAGAGGCACCATTATCAGGCCCCTGAATAGAATGGATGAGGTCCTTGTCTCTGTGCATACCCCTCCCCAACCCCCAGCCATCTGCCGCCGCGCCGCTCCCACGGGCTATGAAGAGTTCTACTTCCCTCTGGTGGAGGCCACCCGGCTCCGCTGTGTCACCAAATGCACGTCGGGCGTGGACAACGCCATCGACTGTCACCAGGGCCAGTGCGTTCTAGAGACGAGCGGTCCCGCGTGTCGGTAAGGCCCCGCTCACCATCAGCATCAGTCGAGCCCCGCCCACTCATTCTAGGATGAAGCCTAGCCTCACGCGCCGCCCCGGCTCTGCCCCCAGGCCCTACAGTGGAGCCTCGTCCCCAGAGTCCCGCTCCAAGCCCATCCCCGTTGCCCTACAGTGGAGCCCTGCCCTGGAGCTCTGCTCCGTCGCCCTAAAGTGTAGCCCCGCCTCCTTGATGGGGTTGAGTCCAATCCCCTGGTTCTGGGATAGACCCCGCCCACTCATTCTAGGGTGGGGCCCCGCCCCTTCGTTCTAGGGCTGAACCTTGCCCCCTTCTTCTGGGGTGGAGCCCCGCCCCCTTGTTCTAGGGTGGATCCCCGCCCCCTCCTTTTAGGGTGAAGCCCTGCCCACTTGATCTAAAGTGGAATCCCGCCCCCTCACCTAGGGTAGAGCCCCGCCCCCTCGTTCTAGGGTGGAGACCCGTCCGCTTGTTCTACGGTGGATTCCGGCCGCTTGTCTAGGGTGGAACCCCCCAGCTTGCCCTAGGGTGGAACCCCCCCGCTGCCCTAGGCTGGAGCCCCGCCCCCTCACCCGCCCCCGCGGGGCCCAGGTGCACGCGTGGACCCCGAGCCCGGAGGTGAAGAGGGTCTGACCCTGCGATCTCCCGCAGCTGCTACTCCACCGACACGCACTGGTTCTCTGGCCCGCGCTGCGAGGTGGCCGTCCACTGGAGGGCGCTGGTCGGGGGCCTGACGGCCGGCGCCGCGCTGCTGGTGCTGCTGCTGCTGGCGCTGGGCGTCCGGGCGGTGCGCTCCGGATGGTGGGGCGGCCAGCGCCGAGGCCGGTGAGCGTGCGGGGGGCGGGGCCGGGGGGCGAGGGCAGCCAAGGGGTCCCAGGCGGGCCGGCTCTGTCTGACCGCGCGGCGGCCCCACCTAGGTCCTGGGACCAGGACAGGAAATGGTTCGAGACCTGGGATGAGGAAGTCGTGGGCACTTTTTCAAACTGGGGTTTCGAGGACGACGGAACAGGTGAGTCCTGCCTCCTGGGGAAGCAGGCAGAGGCTTTCCTGGGCACCACTGCGAGGACAGACGCCCTCCTTGCCTTCCTCGCATTTACTCTGTCCCCCTCTCCCTTCCGTCCCCTCCCTCTCCCCTTCTATTTCTCCGCTCCTCTCTCTCTCTAGACAAGGATACAAATTTCCATGTGGCCTTGGAGAACGTGGACACCACTATGAAGGTGAGGGGCTAAAGAGGGGGACCCCAAGGAACTCTCCCAGCCTCCATTCCAGAATCCCTCCCCGACCCCCACCAGGGCAGGGAGGGGGCTGGGCTCGGATCAGCAGTGACCTCCCTGTCAGCCCAAACCAGTGGCTCCGCGTTCCCGTCCCTCACTGTGACTCTGACAGGTGCACATCAAGAGACCCGAGATGACCTCGTCCTCAGTGTGAGCCCTGCGGGGCCCCTTCACCACCCCCTCCGCCCTGCCCCGGACACAAGGGTCTGCATTGCGTCCATTTCAAGAGGTGGCCCCAGGACGCGGGCAGCCCAGGCTCCTGCTGTTCTTGGGCAAGATGAGACTGTTCCCCCAAATCCCATCCTTCTCCTTCCAACTTGGCTGAAACCCACCTGGAGACGCAGTTCACGTCCAGGCTCTTCCACTGTGGAATCTTGGGCAAGTCAGTAACGAGCCTCAGTTTCCTCACCTGCAAAACGGGTACAGCATTCCTGTATGATAGCTCACGCCGTCGTTGTGAAAACCACATAGACTTGGTCAATTCTCGGTCCTACTCTGCCCTCCCGTCTCAGCCCTCGTGTTGCCATTGCCTCTCTCGGATCCTCCAATCCTCACGTCCTTCACCTGGTCTCTGGCCCTGGTTCTTATTTTCTCTCAATTCCCTACTGCCTGTTTCTTACTTTGAACCTGGAGGCAGCCTGCAGCCCCATCCCATCTCCTGCCCTCTCCTGATCTAACTCCCTGCTGCATCTCTTGCTCTCATTCCTTAGACGTCCTCCCCTTTTGACCCCGTTCCTTCATCCATCCTGCACCCCAGTCCCCCAGCCCTAAATCCTCCCTCCTCTCCTCACATCCTGGTCCCTAGCAAGGTATAGATAGCCTCTGTGTCTTAGGATACCCCGGGTGCTGTTCCCTCGGTCACCCTGTTGCCCAGTTCCCCGTTTCTCTTGCTCTCATTCCTTGTATCCTTTCCCCTTTTGAGCCCGTCCATTCATCGGTTCTGCCCCCGACTCCCCCTGACCTAAATACCCCAGCTCCTAATTCCCCCCTCACCCCGTTGCTCAATTCCCCGTTTCTCTTGCTCTCATTCCTTGTATCTTCTCCCCTTCTGAGCCTGTCCATTCATCGGTGGTTCTGCCCCTACTCCCCCAGCCCTAAATACCCCAGCTGCTGTTCCTCCCCATCACCCAGCCACCGGATTCTCCATTCACCCCTTTCTCTCACCCCTGGAGCCCCGTGGGTGGGGGCAGGGCATGAGTTCCCCAGTCCCCAAGGAAAGGCAGCCCCCTCAGTCTCCCTCCTCCTCATTCCCTTCCATCTCCCTCCCCTCTGCCTTTTAAACCCATCCCCTCCGATTCCCCTCCTCCCCCCTCTCTCCCTGGTGTCAACTCGATTCCTGCGGTAACTCTGAGCCCTGAAATCCTCAGTCTCCTTGGCGGGGAAGATTGGCTTTGGGAACAGGAAGTCGGCACATCTCCAGGTCTCCATGTGCACAATATAGAGTTTATTGTAAAAAGC", - 100547052 - 1); - - var codingRegion = new CodingRegion(100547257, 100610315, 206, 10177, 9972); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 100547052, 100547317, 1, 266), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 100547318, 100549480, 266, 267), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 100549481, 100550652, 267, 1438), - new TranscriptRegion(TranscriptRegionType.Intron, 2, 100550653, 100550655, 1438, 5957), - new TranscriptRegion(TranscriptRegionType.Exon, 3, 100550656, 100550784, 5957, 6085), - new TranscriptRegion(TranscriptRegionType.Intron, 3, 100550785, 100550787, 6085, 6698), - new TranscriptRegion(TranscriptRegionType.Exon, 4, 100550788, 100550814, 6698, 6724), - new TranscriptRegion(TranscriptRegionType.Intron, 4, 100550815, 100550817, 6724, 7115), - new TranscriptRegion(TranscriptRegionType.Exon, 5, 100550818, 100552774, 7115, 9071), - new TranscriptRegion(TranscriptRegionType.Intron, 5, 100552775, 100552880, 9071, 9072), - new TranscriptRegion(TranscriptRegionType.Exon, 6, 100552881, 100553066, 9072, 9257), - new TranscriptRegion(TranscriptRegionType.Intron, 6, 100553067, 100554979, 9257, 9258), - new TranscriptRegion(TranscriptRegionType.Exon, 7, 100554980, 100555095, 9258, 9373), - new TranscriptRegion(TranscriptRegionType.Intron, 7, 100555096, 100555514, 9373, 9374), - new TranscriptRegion(TranscriptRegionType.Exon, 8, 100555515, 100555579, 9374, 9438), - new TranscriptRegion(TranscriptRegionType.Intron, 8, 100555580, 100607745, 9438, 9439), - new TranscriptRegion(TranscriptRegionType.Exon, 9, 100607746, 100607894, 9439, 9587), - new TranscriptRegion(TranscriptRegionType.Intron, 9, 100607895, 100608306, 9587, 9588), - new TranscriptRegion(TranscriptRegionType.Exon, 10, 100608307, 100608372, 9588, 9653), - new TranscriptRegion(TranscriptRegionType.Intron, 10, 100608373, 100608728, 9653, 9654), - new TranscriptRegion(TranscriptRegionType.Exon, 11, 100608729, 100608891, 9654, 9816), - new TranscriptRegion(TranscriptRegionType.Intron, 11, 100608892, 100609538, 9816, 9817), - new TranscriptRegion(TranscriptRegionType.Exon, 12, 100609539, 100609712, 9817, 9990), - new TranscriptRegion(TranscriptRegionType.Intron, 12, 100609713, 100609804, 9990, 9991), - new TranscriptRegion(TranscriptRegionType.Exon, 13, 100609805, 100609896, 9991, 10082), - new TranscriptRegion(TranscriptRegionType.Intron, 13, 100609897, 100610051, 10082, 10083), - new TranscriptRegion(TranscriptRegionType.Exon, 14, 100610052, 100610104, 10083, 10135), - new TranscriptRegion(TranscriptRegionType.Intron, 14, 100610105, 100610273, 10135, 10136), - new TranscriptRegion(TranscriptRegionType.Exon, 15, 100610274, 100611004, 10136, 10866), - new TranscriptRegion(TranscriptRegionType.Gap, 15, 100611005, 100611005, 10866, 10867), - new TranscriptRegion(TranscriptRegionType.Exon, 15, 100611006, 100611075, 10867, 10936), - new TranscriptRegion(TranscriptRegionType.Gap, 15, 100611076, 100611076, 10936, 10937), - new TranscriptRegion(TranscriptRegionType.Exon, 15, 100611077, 100611169, 10937, 11029), - new TranscriptRegion(TranscriptRegionType.Gap, 15, 100611170, 100611173, 11029, 11030), - new TranscriptRegion(TranscriptRegionType.Exon, 15, 100611174, 100611176, 11030, 11032), - new TranscriptRegion(TranscriptRegionType.Intron, 15, 100611177, 100611299, 11032, 11033), - new TranscriptRegion(TranscriptRegionType.Exon, 16, 100611300, 100611307, 11033, 11040), - new TranscriptRegion(TranscriptRegionType.Gap, 16, 100611308, 100611311, 11040, 11041), - new TranscriptRegion(TranscriptRegionType.Exon, 16, 100611312, 100611428, 11041, 11157), - new TranscriptRegion(TranscriptRegionType.Exon, 16, 100611429, 100611517, 11170, 11258), - new TranscriptRegion(TranscriptRegionType.Exon, 16, 100611518, 100611619, 11260, 11361) - }; - - var rnaEdits = new IRnaEdit[] - { - new RnaEdit(865, 865, "G"), - new RnaEdit(1214, 1214, "A"), - new RnaEdit(1439, 1438, - "AGCACATCCACAACTGCCATCTCCTCACTTCCCCCTACCTCAGGTACTATGGTGACTTCCACAACCATGACCCCATCTTCTCTGAGTACAGACATCCCTTTCACAACACCAACAACTATCACCCACCATTCTGTGGGCTCTACCGGTTTCCTGACTACAGCAACAGACCTCACATCAACATTCACGGTTTCCAGTTCCTCAGCAATGTCCACGAGTGTCATTCCATCTTCCCCCAGCATTCAGAATACAGAAACCTCATCCCTTGTCAGCATGACCTCTGCCACTACTCCCAATGTGAGACCAACTTTTGTAAGTACACTCAGCACTCCTACAAGTTCCCTCCTGACGACCTTCCCAGCAACATATTCATTTTCATCTTCCATGTCTGCCAGCAGTGCTGGGACCACTCACACAGAGAGTATCTCCTCACCTCCAGCCAGCACCAGTACACTCCACACAACAGCTGAATCCACCCTGGCACCCACTACCACCACCTCATTCACAACTTCCACAACTATGGAACCACCTTCAACCACTGCAGCAACTACAGGAACAGGTCAGACCACCTTCACCAGCTCTACAGCCACATTTCCTGAGACCACCACACCGACTCCTACAACTGACATGTCCACAGAATCTCTCACAACAGCCATGACTTCTCCTCCCATCACTTCATCAGTCACTTCCACAAATACAGTGACTTCTATGACAACTACGACCTCTCCTCCCACAACCACCAATTCTTTTACATCACTGACCAGTATGCCTCTGTCTTCTACACCTGTCCCAAGCACAGAAGTAGTCACCAGTGGCACCATAAACACAATCCCTCCATCTATCTTGGTGACCACACTCCCCACTCCAAATGCTTCATCTATGACTACATCTGAGACCACCTATCCTAATTCTCCGACTGGTCCTGGTACAAACTCCACGACGGAAATCACCTATCCCACCACTATGACAGAGACATCATCCACTGCCACCTCTCTTCCACCCACCTCTCCCTTGGTCTCAACTGCAAAAACAGCCAAAACTCCTACCACAAACTTGGTAACCACCACCACCAAGACCACCTCACATAGTACCACCAGCTTCACTTCTTCAACCGTCTACTCCACAGCCAGCACATACACAACTGCCATCACCTCAGTTCCCACTACGTTGGGTACCATGGTAACTTCTACATCCATGATCTCATCTACTGTGAGTACAGGTATCCCTACCTCACAACCAACAACCATCACTCCCTCATCCGTGGGCATCAGTGGTTCATTACCTATGATGACAGACCTCACCTCAGTGTACACAGTCTCTAACATGTCTGCAAGGCCAACAACTGTCATTCCCTCATCTCCCACTGTCCAGAATACAGAAATCTCAATCTCTGTTAGCATGACGTCTGCTACCACTCCCAGTGGAGGACCAACTTTCACAAGTACTGAGAACACTCCAACAAGGTCCCTCCTGACAAGCTTTCCAATGACACATTCATTCTCTTCTTCTATGTCTGAAAGTAGTGCTGGGACCACTCACACAGAGAGTATCTCCTCACCTCGAGGCACCACCAGTACACTCCACACAACAGTTGAATCCACCCCATCACCCACTACCACCACCTCATTTACCACATCCACAATGATGGAACCACCTTCATCCACTGTATCAACTACAGGCAGAGGTCAGACCACCTTTCCCAGCTCTACAGCCACATTCCCTGAGACCACTACACTGACTCCTACAACTGACATTTCTACAGTATCTCTCACAACAGCCATGACTTCTCCTCCCCCCGTCAGTTCTTCAATCACTCCCACCAATACAATGACTTCTATGAGAACTACAACCTATTGGCCCACAGCCACTAATACATTATCACCACTCACCAGTAGCATTTTATCTTCTACACCTGTCCCAAGCACAGAAATGATCACCAGTCATACCACAAACACCACCCCTCTATCCACCTTGGTGACTACACTCCTCACTACCATCACCAGATCTACACCTACATCTGAGACCACCTACCCTACTTCTCCCACCAGCATTGTCTCAGACTCCACGACTGAAATCACCTATTCCACAAGTATAACAGGTACATTGTCCACTGCCACTACTCTCCCACCCACCTCTTCCTCTCTCCCAACCACAGAAACAGCCACGATGACTCCTACCACAACCTTGATAACCACCACCCCTAATACCACCTCCCTTAGTACCCCCAGCTTCACTTCTTCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCTCCTCAGCTTCCCCTACCTCAGGTACCATGGTAACTTCCACAACCATGACCCCATCTTCTCTGAGTACAGACACCCCTTCCACAACACCAACAACTATCACCTACCCTTCTGTGGGCTCTACCGGTTTCCTGACTACAGCAACAGACCTCACATCAACATTCACTGTTTCCAGTTCCTCAGCAATGTCCACAAGTGTCATTCCATCTTCCCCCAGCATCCAGAATACAGAAACCTCATCCCTTGTCAGCATGACCTCTGCCACCACTCCCAGTTTGAGACCAACTATCACAAGTACTGACAGCACTCTAACAAGTTCCCTCCTGACGACCTTCCCAAGTACATATTCATTTTCATCTTCCATGTCTGCCAGCAGTGCAGGGACCACTCACACAGAGACTATTTCCTCACTTCCAGCCAGCACCAATACAATCCACACAACAGCTGAATCCGCCCTGGCACCCACTACCACCACCTCATTCACCACATCCCCAACGATGGAACCACCTTCAACCACTGTAGCGACTACAGGCACAGGTCAGACCACCTTCCCCAGCTCTACAGCCACATTCCTTGAGACCACCACACTGACTCCTACAACTGACTTTTCTACAGAATCTCTCACAACAGCCATGACTTCTACTCCCCCCATCACTTCTTCAATCACTCCCACCGATACAATGACTTCTATGAGAACTACGACCTCTTGGCCCACAGCCACTAATACGTTATCACCACTCACCAGTAGCATTTTATCTTCTACACCTGTCCCAAGCACAGAGGTGACCACCAGTCATACCACAAACACCAATCCTGTATCCACGTTGGTGACTACACTCCCCATTACCATCACCAGGTCTACACTTACATCTGAGACCGCCTACCCTAGTTCTCCCACAAGCACTGTCACAGAGTCCACAACTGAAATCACCTATCCCACCACTATGACAGAGACATCATCTACTGCCACCTCTCTTCCACCCACCTCTTCCTTGGTCTCAACCGCAGAAACAGCCAAAACTCCTACCACAAACTTGGTAACCACCACCACCAAGACCACCTCACATAGTACCACCAGCTTCACTTCTTCAACCATCTACTCCACAGCCAGCACACCCACCACTGCCATCACCTCAGTTCCCACTACCTTGGGTACCATGGTGACTTCTACATCCATGATCCCATCTACTGTGAGTACAGGTATCCCTACCTCACAACCAACAACTATTACTCCCTCATCCGTGGGCATCAGTGGTTCATTACCTATGATGACAGACCTCACCTCAGTGTACACAGTCTCCAGCATGTCTGCAAGGCCAACAAGTGTCATTCCTTCATCTCCCACTGTCCAGAATACAGAAACCTCAATCTTTGTTAGCATGATGTCTGCTACCACTCCCAGTGGAGGACCAACTTTCACAAGTACTGAGAACACTCCAACAAGGTCCCTCCTGACAAGCTTTCCAGTGACACATTCATTTTCCTCTTCCATGTCTGCCAGCAGTGTAGGGACCACTCACACCCAGAGTATCTCCTCACCCCCAGCCATCACCAGTACACTCCACACAACAGCTGAATCCACCCCATCACCTACAACCACCATGTCATTCACAACATTTACAAAGATGGAAACACCTTCATCCACTGTAGCAACTACAGGCACAGGTCAGACTACATTCACCAGTTCAACAGCCACATCCCCTAAGACCACCACACTGACTCCTACCTCTGACATTTCCACAGGATCTTTCAAAACAGCCGTGAGTTCTACTCCCCCCATCACTTCTTCAATCACCTCCACATATACGGTGACTTCGATGACAACTACCACCCCTCTAGGGCCCACAGCCACTAATACGTTACCATCATTTACCAGTAGCGTTTCATCTTCTACGCCTGTCCCAAGTACAGAAGCGATCACCAGTGGTACCACAAACACCACCCCTCTATCTACATTGGTGACCACATTCTCCAATTCCGACACCAGTTCTACACCTACATCTGAGACCACCTACCCTACTTCTCTTACTAGTGCTCTCACAGATTCCACGACCAGAACCACCTATTCCACCAATATGACAGGTACATTGTCCACTGTGACCTCTCTTCGACCCACCTCTTCCTCTCTCCTCACCACAGTAACAGCCACAGTTCCAACAACAAACTTGGTAACCACGACCACCAAGATCACCTCACACAGTACTCCTAGCTTCACTTCTTCAATCGCAACCACCGAGACCCCC"), - new RnaEdit(1520, 1520, "A"), - new RnaEdit(1549, 1549, "C"), - new RnaEdit(1568, 1567, - "TCGATCACCACCACCGAGACCACATCCCACAATACTCCCAGCCTCACTTCTTCAATCACCACCACCAAGACCACCTCACACAGTACTCCCAGCTACACTTCTTTGATCACCACAACCACCACCACCTCACACAGTACTCCCAGCTTCACTTCTTCCATCACCACCACTGAGACCACATCCCACAATACTCCCAGCTTGACTTCTTCGATCACAACCACCGAGACCACATCCCATAGTACTCCCAGCTTCACTTCTTCGATCACCACCGAGACCACATCCCACAGTACTCCCAGCTTCACTTCATTGATCACCATCACCGAGATCACCTCACACAGTACTCTCAGCTACACTACCTCAATCACCACCACCGAGACCCCCTCACACAGTACTCTCAGCTTCACTTCTTCAATCACCACCACTGAGACCACCTCACACAGTACTCCCAGCTTCACTTCCTCAATCACCACCTCTGAGATGCCCTCACACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACTGAGAACGCCACACACAGTACTCCCAACTTCACTTCTTCAATCACCACCACCGAGACCACATCCCACAGTACTCCCAGCTTCACTTCTTTG"), - new RnaEdit(1595, 1594, - "AGGTGGGGGACCACCGAGACCACATCCTACAGTACTCCCAGCTTCACTTCTTCAAATACCATCACTGAGACCACCTCACACAGTACTCCCAGCTACATTACCTCAATCACCACCACCGAGACCCCCTCAAGCAGTACTCCCAGCTTCAGTTCTTCGATCACCACCACTGAGACCACATCCCACAGTACTCCCGGCTTCACTTCTTCAATCACCACCACTGAGACTACATCCCACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACTGAGACCACCTCACATGATACTCCCAGCTTCACTTCTTCAATCACCACCAGTGAGACCCCCTCACACAGTACTCCCAGCTCCACTTCTTTAATCACCACCACCAAGACCACCTCACACAGT"), - new RnaEdit(1627, 1627, "G"), - new RnaEdit(1650, 1650, "A"), - new RnaEdit(1702, 1702, "C"), - new RnaEdit(1957, 1957, "T"), - new RnaEdit(2794, 2794, "T"), - new RnaEdit(3028, 3028, "C"), - new RnaEdit(3586, 3586, "C"), - new RnaEdit(4024, 4024, "G"), - new RnaEdit(4044, 4044, "T"), - new RnaEdit(4131, 4131, "C"), - new RnaEdit(4183, 4184, "CT"), - new RnaEdit(4231, 4231, "G"), - new RnaEdit(4273, 4273, "G"), - new RnaEdit(4289, 4289, "A"), - new RnaEdit(4580, 4580, "T"), - new RnaEdit(4956, 4956, "T"), - new RnaEdit(5344, 5344, "A"), - new RnaEdit(5347, 5347, ""), - new RnaEdit(5356, 5356, "T"), - new RnaEdit(5358, 5358, "C"), - new RnaEdit(5360, 5360, "C"), - new RnaEdit(5368, 5368, "C"), - new RnaEdit(5411, 5411, "T"), - new RnaEdit(5413, 5413, "C"), - new RnaEdit(5418, 5418, ""), - new RnaEdit(5421, 5421, "C"), - new RnaEdit(5480, 5480, "G"), - new RnaEdit(5483, 5484, "GT"), - new RnaEdit(5486, 5486, "C"), - new RnaEdit(5492, 5492, "A"), - new RnaEdit(5499, 5499, "T"), - new RnaEdit(5501, 5501, "C"), - new RnaEdit(5505, 5505, "C"), - new RnaEdit(5512, 5515, ""), - new RnaEdit(5527, 5530, ""), - new RnaEdit(5556, 5556, "T"), - new RnaEdit(5558, 5558, "C"), - new RnaEdit(5630, 5630, "T"), - new RnaEdit(5636, 5636, "G"), - new RnaEdit(5640, 5640, "C"), - new RnaEdit(5648, 5647, "TTCTTGCCTCCC"), - new RnaEdit(5691, 5691, "T"), - new RnaEdit(5697, 5697, "T"), - new RnaEdit(5708, 5708, "C"), - new RnaEdit(5711, 5711, "G"), - new RnaEdit(5721, 5721, "A"), - new RnaEdit(5737, 5736, "T"), - new RnaEdit(5750, 5750, "G"), - new RnaEdit(5777, 5777, "G"), - new RnaEdit(5805, 5805, "T"), - new RnaEdit(5819, 5819, "C") - }; - - const byte startExonPhase = 0; - const bool onReverseStrand = false; - var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, onReverseStrand, startExonPhase, - rnaEdits); - - var expectedCodingSeq = - "ATGCAGCTGTTGGGGCTCCTCGGCCTCCTCTGGATGCTCAAGGCCTCCCCGTGGGCCACAGGAACTTTATCCACGGCCACATCCATCTCTCAAGTGCCTTTCCCCAGAGCAGAAGCAGCCAGCGCTGTGCTCAGCAATTCTCCACACTCCAGAGACCTGGCTGGGTGGCCACTTGGTGTCCCCCAGCTCGCCTCTCCTGCTCCTGGCCACAGGGAAAATGCACCTATGACACTCACTACCTCCCCCCATGACACACTCATCTCTGAAACATTGCTCAACTCTCCAGTCAGTTCCAACACCTCAACCACCCCGACGTCCAAGTTTGCCTTCAAGGTTGAAACCACTCCACCCACCGTGTTGGTCTATTCAGCCACCACTGAGTGCGTGTATCCAACGAGCTTTATAATCACCATCTCCCACCCCACCTCCATCTGTGTGACCACGACGCAGGTGGCCTTCACCAGCTCTTACACCTCGACTCCCGTGACACAGAAGCCAGTGACCACCGTCACCAGTACTTACTCTATGACCACTACTGAGAAAGGAACGTCAGCCATGACATCTTCTCCCTCTACCACCACTGCAAGGGAAACTCCCATAGTGACAGTGACACCCTCCTCTGTGTCAGCCACAGACACAACCTTCCACACTACAATCTCGTCTACAACTAGAACCACAGAAAGGACTCCCCTGCCCACTGGAAGCATCCATACAACCACGTCCCCAACCCCAGTATTTACTACTCTCAAAACAGCAGTGACTTCCACTTCCCCCATCACTTCTTCAATCACTTCCACAAATACAGTGACTTCTATGACAACGACCGCCTCCCAGCCCACAGCCACTAATACATTGTCATCACCCACTAGGACCATTTTATCTTCCACACCTGTCCTGAGCACAGAAACAATCACCAGTGGTATCACAAACACCACCCCCCTATCCACCTTGGTGACCACACTCCCCACTACCATCAGCAGGTCTACACCTACATCTGAGACCACCTACACTACTTCTCCCACCAGCACTGTCACAGACTCCACTACCAAAATCGCCTACTCCACAAGTATGACAGGTACATTGTCCACAGAGACTTCTCTCCCACCCACCTCTTCCTCTCTCCCAACCACAGAAACAGCCACGACTCCTATGACAAACTTGGTAACCACCACCACTGAGATCTCCTCCCACAGTACTCCCAGCTTCTCTTCATCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCTCCTCACTTCCCCCTACCTCAGGTACTATGGTGACTTCCACAACCATGACCCCATCTTCTCTGAGTACAGACATCCCTTTCACAACACCAACAACTATCACCCACCATTCTGTGGGCTCTACCGGTTTCCTGACTACAGCAACAGACCTCACATCAACATTCACGGTTTCCAGTTCCTCAGCAATGTCCACGAGTGTCATTCCATCTTCCCCCAGCATTCAGAATACAGAAACCTCATCCCTTGTCAGCATGACCTCTGCCACTACTCCCAATGTGAGACCAACTTTTGTAAGTACACTCAGCACTCCTACAAGTTCCCTCCTGACGACCTTCCCAGCAACATATTCATTTTCATCTTCCATGTCTGCCAGCAGTGCTGGGACCACTCACACAGAGAGTATCTCCTCACCTCCAGCCAGCACCAGTACACTCCACACAACAGCTGAATCCACCCTGGCACCCACTACCACCACCTCATTCACAACTTCCACAACTATGGAACCACCTTCAACCACTGCAGCAACTACAGGAACAGGTCAGACCACCTTCACCAGCTCTACAGCCACATTTCCTGAGACCACCACACCGACTCCTACAACTGACATGTCCACAGAATCTCTCACAACAGCCATGACTTCTCCTCCCATCACTTCATCAGTCACTTCCACAAATACAGTGACTTCTATGACAACTACGACCTCTCCTCCCACAACCACCAATTCTTTTACATCACTGACCAGTATGCCTCTGTCTTCTACACCTGTCCCAAGCACAGAAGTAGTCACCAGTGGCACCATAAACACAATCCCTCCATCTATCTTGGTGACCACACTCCCCACTCCAAATGCTTCATCTATGACTACATCTGAGACCACCTATCCTAATTCTCCGACTGGTCCTGGTACAAACTCCACGACGGAAATCACCTATCCCACCACTATGACAGAGACATCATCCACTGCCACCTCTCTTCCACCCACCTCTCCCTTGGTCTCAACTGCAAAAACAGCCAAAACTCCTACCACAAACTTGGTAACCACCACCACCAAGACCACCTCACATAGTACCACCAGCTTCACTTCTTCAACCGTCTACTCCACAGCCAGCACATACACAACTGCCATCACCTCAGTTCCCACTACGTTGGGTACCATGGTAACTTCTACATCCATGATCTCATCTACTGTGAGTACAGGTATCCCTACCTCACAACCAACAACCATCACTCCCTCATCCGTGGGCATCAGTGGTTCATTACCTATGATGACAGACCTCACCTCAGTGTACACAGTCTCTAACATGTCTGCAAGGCCAACAACTGTCATTCCCTCATCTCCCACTGTCCAGAATACAGAAATCTCAATCTCTGTTAGCATGACGTCTGCTACCACTCCCAGTGGAGGACCAACTTTCACAAGTACTGAGAACACTCCAACAAGGTCCCTCCTGACAAGCTTTCCAATGACACATTCATTCTCTTCTTCTATGTCTGAAAGTAGTGCTGGGACCACTCACACAGAGAGTATCTCCTCACCTCGAGGCACCACCAGTACACTCCACACAACAGTTGAATCCACCCCATCACCCACTACCACCACCTCATTTACCACATCCACAATGATGGAACCACCTTCATCCACTGTATCAACTACAGGCAGAGGTCAGACCACCTTTCCCAGCTCTACAGCCACATTCCCTGAGACCACTACACTGACTCCTACAACTGACATTTCTACAGTATCTCTCACAACAGCCATGACTTCTCCTCCCCCCGTCAGTTCTTCAATCACTCCCACCAATACAATGACTTCTATGAGAACTACAACCTATTGGCCCACAGCCACTAATACATTATCACCACTCACCAGTAGCATTTTATCTTCTACACCTGTCCCAAGCACAGAAATGATCACCAGTCATACCACAAACACCACCCCTCTATCCACCTTGGTGACTACACTCCTCACTACCATCACCAGATCTACACCTACATCTGAGACCACCTACCCTACTTCTCCCACCAGCATTGTCTCAGACTCCACGACTGAAATCACCTATTCCACAAGTATAACAGGTACATTGTCCACTGCCACTACTCTCCCACCCACCTCTTCCTCTCTCCCAACCACAGAAACAGCCACGATGACTCCTACCACAACCTTGATAACCACCACCCCTAATACCACCTCCCTTAGTACCCCCAGCTTCACTTCTTCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCTCCTCAGCTTCCCCTACCTCAGGTACCATGGTAACTTCCACAACCATGACCCCATCTTCTCTGAGTACAGACACCCCTTCCACAACACCAACAACTATCACCTACCCTTCTGTGGGCTCTACCGGTTTCCTGACTACAGCAACAGACCTCACATCAACATTCACTGTTTCCAGTTCCTCAGCAATGTCCACAAGTGTCATTCCATCTTCCCCCAGCATCCAGAATACAGAAACCTCATCCCTTGTCAGCATGACCTCTGCCACCACTCCCAGTTTGAGACCAACTATCACAAGTACTGACAGCACTCTAACAAGTTCCCTCCTGACGACCTTCCCAAGTACATATTCATTTTCATCTTCCATGTCTGCCAGCAGTGCAGGGACCACTCACACAGAGACTATTTCCTCACTTCCAGCCAGCACCAATACAATCCACACAACAGCTGAATCCGCCCTGGCACCCACTACCACCACCTCATTCACCACATCCCCAACGATGGAACCACCTTCAACCACTGTAGCGACTACAGGCACAGGTCAGACCACCTTCCCCAGCTCTACAGCCACATTCCTTGAGACCACCACACTGACTCCTACAACTGACTTTTCTACAGAATCTCTCACAACAGCCATGACTTCTACTCCCCCCATCACTTCTTCAATCACTCCCACCGATACAATGACTTCTATGAGAACTACGACCTCTTGGCCCACAGCCACTAATACGTTATCACCACTCACCAGTAGCATTTTATCTTCTACACCTGTCCCAAGCACAGAGGTGACCACCAGTCATACCACAAACACCAATCCTGTATCCACGTTGGTGACTACACTCCCCATTACCATCACCAGGTCTACACTTACATCTGAGACCGCCTACCCTAGTTCTCCCACAAGCACTGTCACAGAGTCCACAACTGAAATCACCTATCCCACCACTATGACAGAGACATCATCTACTGCCACCTCTCTTCCACCCACCTCTTCCTTGGTCTCAACCGCAGAAACAGCCAAAACTCCTACCACAAACTTGGTAACCACCACCACCAAGACCACCTCACATAGTACCACCAGCTTCACTTCTTCAACCATCTACTCCACAGCCAGCACACCCACCACTGCCATCACCTCAGTTCCCACTACCTTGGGTACCATGGTGACTTCTACATCCATGATCCCATCTACTGTGAGTACAGGTATCCCTACCTCACAACCAACAACTATTACTCCCTCATCCGTGGGCATCAGTGGTTCATTACCTATGATGACAGACCTCACCTCAGTGTACACAGTCTCCAGCATGTCTGCAAGGCCAACAAGTGTCATTCCTTCATCTCCCACTGTCCAGAATACAGAAACCTCAATCTTTGTTAGCATGATGTCTGCTACCACTCCCAGTGGAGGACCAACTTTCACAAGTACTGAGAACACTCCAACAAGGTCCCTCCTGACAAGCTTTCCAGTGACACATTCATTTTCCTCTTCCATGTCTGCCAGCAGTGTAGGGACCACTCACACCCAGAGTATCTCCTCACCCCCAGCCATCACCAGTACACTCCACACAACAGCTGAATCCACCCCATCACCTACAACCACCATGTCATTCACAACATTTACAAAGATGGAAACACCTTCATCCACTGTAGCAACTACAGGCACAGGTCAGACTACATTCACCAGTTCAACAGCCACATCCCCTAAGACCACCACACTGACTCCTACCTCTGACATTTCCACAGGATCTTTCAAAACAGCCGTGAGTTCTACTCCCCCCATCACTTCTTCAATCACCTCCACATATACGGTGACTTCGATGACAACTACCACCCCTCTAGGGCCCACAGCCACTAATACGTTACCATCATTTACCAGTAGCGTTTCATCTTCTACGCCTGTCCCAAGTACAGAAGCGATCACCAGTGGTACCACAAACACCACCCCTCTATCTACATTGGTGACCACATTCTCCAATTCCGACACCAGTTCTACACCTACATCTGAGACCACCTACCCTACTTCTCTTACTAGTGCTCTCACAGATTCCACGACCAGAACCACCTATTCCACCAATATGACAGGTACATTGTCCACTGTGACCTCTCTTCGACCCACCTCTTCCTCTCTCCTCACCACAGTAACAGCCACAGTTCCAACAACAAACTTGGTAACCACGACCACCAAGATCACCTCACACAGTACTCCTAGCTTCACTTCTTCAATCGCAACCACCGAGACCCCCTCACACAGTACTCCCAGATTCACTTCTTCAATCACCACTACCGAGACCCCCTCACACAGTACTCCCAGATTCACTTCTTCAATCACCAATACCAAGACCACCTCACACAGCTCTCCCAGCTTCACTTCTTCGATCACCACCACCGAGACCACATCCCACAATACTCCCAGCCTCACTTCTTCAATCACCACCACCAAGACCACCTCACACAGTACTCCCAGCTACACTTCTTTGATCACCACAACCACCACCACCTCACACAGTACTCCCAGCTTCACTTCTTCCATCACCACCACTGAGACCACATCCCACAATACTCCCAGCTTGACTTCTTCGATCACAACCACCGAGACCACATCCCATAGTACTCCCAGCTTCACTTCTTCGATCACCACCGAGACCACATCCCACAGTACTCCCAGCTTCACTTCATTGATCACCATCACCGAGATCACCTCACACAGTACTCTCAGCTACACTACCTCAATCACCACCACCGAGACCCCCTCACACAGTACTCTCAGCTTCACTTCTTCAATCACCACCACTGAGACCACCTCACACAGTACTCCCAGCTTCACTTCCTCAATCACCACCTCTGAGATGCCCTCACACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACTGAGAACGCCACACACAGTACTCCCAACTTCACTTCTTCAATCACCACCACCGAGACCACATCCCACAGTACTCCCAGCTTCACTTCTTTGATCACCACCACGGAGACCACCTCACACAGGTGGGGGACCACCGAGACCACATCCTACAGTACTCCCAGCTTCACTTCTTCAAATACCATCACTGAGACCACCTCACACAGTACTCCCAGCTACATTACCTCAATCACCACCACCGAGACCCCCTCAAGCAGTACTCCCAGCTTCAGTTCTTCGATCACCACCACTGAGACCACATCCCACAGTACTCCCGGCTTCACTTCTTCAATCACCACCACTGAGACTACATCCCACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACTGAGACCACCTCACATGATACTCCCAGCTTCACTTCTTCAATCACCACCAGTGAGACCCCCTCACACAGTACTCCCAGCTCCACTTCTTTAATCACCACCACCAAGACCACCTCACACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACGGAGACCACCTCACACAGTGCTCACAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAATACTCGCAGCTTCACTTCTTCGATCACCACCACCGAGACCAACTCTCACAGTACTACCAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAGTACTCCCAGCTTCAGTTCTTCAATCACCACCACTGAGACCCCCTTACACAGTACTCCTGGCCTCACTTCGTGGGTCACCACCACCAAGACCACCTCACACATTACTCCTGGCCTCACTTCTTCAATCACCACCACTGAGACTACCTCACACAGTACTCCTGGCTTCACTTCTTCAATCACCACCACTGAGACCACCTCAGAGAGTACTCCCAGCCTCAGTTCTTCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCACCTCACATTTTACTACCTCAGAGACTGCGGTGACTCCCACACCTGTAACCCCATCTTCTCTGAGTACAGACATCCCGACCACAAGCCTACGAACTCTCACCCCTTCGTCTGTGGGCACCAGCACTTCATTGACTACAACCACAGACTTTCCCTCTATACCCACTGATATCAGTACCTTACCAACTCGAACACACATCATTTCATCTTCTCCCTCCATCCAAAGTACAGAAACCTCATCCCTTGTGGGCACCACCTCTCCCACCATGTCCACTGTGAGAATGACCCTCAGAATTACTGAGAACACCCCAATCAGTTCCTTTAGCACAAGTATTGTTGTTATACCTGAAACCCCAACACAGACCCCTCCTGTACTGACGTCAGCCACTGGGACCCAAACATCTCCTGCACCTACTACTGTCACCTTTGGAAGTACGGATTCCTCCACGTCCACTCTTCATACTCTTACTCCATCAACAGCCTTGAGCACGATCGTGTCAACATCACAGGTTCCTATTCCTAGCACACATTCCTCCACCCTTCAAACAACTCCTTCTACTCCCTCATTGCAAACTTCACTCACATCTACAAGTGAGTTCACTACAGAATCTTTCACTAGGGGAAGTACGTCTACAAATGCAATCTTGACTTCTTTTAGTACCATCATCTGGTCCTCAACACCCACTATTATCATGTCCTCTTCTCCATCTTCTGCCAGCATAACTCCAGTGTTTTCCACTACCATTCATTCTGTTCCTTCTTCACCATACATTTTCAGTACAGAAAATGTGGGCTCCGCTTCTATCACAGGCTTTCCTAGTCTCTCTTCCTCTGCAACTACCAGCACTTCTTCAACCAGCTCCTCTCTGACCACAGCTCTCACTGAAATAACCCCCTTTTCTTATATTTCCCTTCCCTCCACCACACCCTGTCCAGGAACTATAACAATTACCATAGTCCCTGCCTCCCCCACTGATCCATGTGTTGAAATGGATCCCAGCACTGAAGCTACTTCTCCTCCCACCACCCCATTAACAGTCTTTCCCTTTACTACCGAAATGGTCACCTGTCCTACCTCCATCAGTATCCAAACTACTCTTACTACATATATGGACACTTCTTCCATGATGCCAGAAAGTGAGTCCAGCATCTCACCCAATGCTTCCAGTTCCACTGGCACTGGGACTGTACCCACAAACACAGTTTTCACAAGTACTCGACTGCCCACCAGTGAGACCTGGCTGAGCAACAGTTCTGTGATCCCCCTACCTCTTCCTGGCGTCTCTACCATCCCGCTCACCATGAAACCAAGCAGTAGCCTCCCGACCATCCTGAGGACTTCAAGCAAGTCAACACACCCCTCCCCACCCACCACTAGGACTTCAGAGACACCAGTGGCCACTACCCAGACTCCTACCACCCTTACATCACGCAGGACAACTCGCATCACTTCTCAGATGACCACACAGTCCACGTTGACCACCACTGCAGGCACCTGTGACAATGGTGGCACCTGGGAACAGGGCCAGTGTGCTTGCCTTCCGGGGTTTTCTGGGGACCGCTGTCAGCTCCAGACCAGATGCCAGAATGGGGGTCAGTGGGATGGCCTCAAATGCCAGTGCCCCAGCACCTTCTATGGTTCCAGTTGTGAGTTTGCTGTGGAACAGGTGGATCTAGATGTAGTGGAGACCGAGGTGGGCATGGAAGTGTCTGTGGATCAGCAGTTCTCGCCGGACCTCAATGACAACACTTCCCAGGCCTACAGGGATTTCAACAAGACCTTCTGGAATCAGATGCAGAAGATTTTTGCAGACATGCAGGGCTTCACCTTCAAGGGTGTGGAGATCCTGTCCCTGAGGAATGGCAGCATCGTGGTGGACTACCTGGTCCTGCTGGAGATGCCCTTCAGCCCCCAGCTGGAGAGCGAGTATGAGCAGGTGAAGACCACGCTGAAGGAGGGGCTGCAGAACGCCAGCCAGGATGTGAACAGCTGCCAGGACTCCCAGACCCTGTGTTTTAAGCCTGACTCCATCAAGGTGAACAACAACAGCAAGACAGAGCTGACCCCGGCAGCCATCTGCCGCCGCGCCGCTCCCACGGGCTATGAAGAGTTCTACTTCCCCTTGGTGGAGGCCACCCGGCTCCGCTGTGTCACCAAATGCACGTCGGGGGTGGACAACGCCATCGACTGTCACCAGGGCCAGTGCGTTCTGGAGACGAGCGGTCCCACGTGTCGCTGCTACTCCACCGACACGCACTGGTTCTCTGGCCCGCGCTGCGAGGTGGCCGTCCACTGGAGGGCGCTGGTCGGGGGCCTGACGGCCGGCGCCGCGCTGCTGGTGCTGCTGCTGCTGGCGCTGGGCGTCCGGGCGGTGCGCTCCGGATGGTGGGGCGGCCAGCGCCGAGGCCGGTCCTGGGACCAGGACAGGAAATGGTTCGAGACCTGGGATGAGGAAGTCGTGGGCACTTTTTCAAACTGGGGTTTCGAGGACGACGGAACAGACAAGGATACAAATTTCTATGTGGCCTTGGAGAACGTGGACACCACTATGAAGGTGCACATCAAGAGACCCGAGATGACCTCGTCCTCAGTGTGA"; - - Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); - } - - [Fact] - public void GetCodingSequence_RnaEditInsertion_StartsCds_EndsUtr() - { - // NM_001220765.1, chr7:50344378-50367353 - var genomicSeq = new SimpleSequence( - "CGCGGCGCATCCCAGCCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCACCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACGTGAGTGTTTTCAAATTGAATTTCAATAGGAAAACTTGGGGTAACTGGTGAATTTAAAAAAAAAAAAACACAGTAAAGAAAAGCGGTAAGGTTGGTAGACCCTGGTGTCGCTCAGGTCCGCCTCTCTTTTCTGAGGACAGTGAGAGAGTTCACTTCTGTCAAGCGTCTGTTGCTCTGCACTGTGCCAGCAGGTGCAGGACCAGGCCGACATGGGACACTTCTGAGCAGCCCCGCTGTCACCAGGAGAGGAGTTCTAGCTCCCAACCATATTTAAATTTATGTAGACCTACATATACCCACGGAAGTCAGCCTTTATAAAGTCGTGTGTAAAGAGTTTTCCTTATATTTGAGCCGGGAGCTTTCTTTTTATACTATAAATATGATGAGATCGAGTCTGAACTTAATTTCTGCAAGAGAGGAATTATCCCGGCTTTGAAAAGTTAGTCCTTTTGCTGACCGCAGGTTTGACGCTCAAGTCACCAAACCTTCTCAGGAAAACCCTTAGTAATATTAAGGCATCAGGTTACTTGCGGTTATATTTGAAATGTATTTTAAATATTTGTCAAGCATCGCTGCTGATGCCTAAGGAACCTCGTGAGGGCTTGTTTTTCCTTCTAATTTGGAGGCATCTAATGACCGAAAACCGTAGCGATTCCATAGGGTCTGACCAGGCACAGCTTTCAAATGCAGCTTCCCTCTCTCTAGGGACTGCAGCCCACCCAGACTGAATTTCAATGCGGTGCGCTTTGCTTAGGTTACCCACTCACAATTTCCCACTGCGCCGCAGGCAGTATATTTCAGCTTTGAGATACCTTGTTTTAAAATTCCAGACAAAATGGTGTTGAGGAAATGTCTCCTTACTAGTCCCATCAACTTCTGTTAAAAGAGGAAAATTTATGGAATTTGAAAATACTGCGTATGATATTTAAACTTTCATAGACATTCAAATGCTTTTAAGGCCAGGTTCAATTTGGTTATGAGTCGAGGGGTGGGGGGGACCCACATAGAAATGTCCTGGGTCCTCTTGAGTTTATTTCTTTGTTTGAAGATGTTTGTTCAATGAGTTTTATTGTACTCATCTTTTATATGGAATTTTAAAAAGTAACAATTTCAGTATTATTTATATTAGAATGTGTCAGAATTATTTCCGTGACAAATCAGATCATTTGGGCTATGGCTTAAAATGTACACGAGGCAAATATTCATGACAAGAAGATTCACCTTCTTACGCTGGCATCTTGTAAAATGCAGAACAAGTTAAAGAAATAATGTGTACACATACAAATAATGATGTCACATTAAAAATACTACACTATTCTTGCTTGATGGAATGTATCTGATTTCCAATTTCACCATGAACATATTTCATACATTTTTTACATGAAAAAAAACGTGACTCTTAAGTCTCACAGTCAATCAGAGCTGGTGACCAGAACATTTTATTGAACTAAATGGTCATGTTTTCTTCCCCTTTTGTTTCACGGTGAGAGTTGAAGGAAGGAGTTTAGAAACTCTCCAGTACTTGTTTAATTCATCAGTGTTCTAATTAGAGTGGTACCTCTTGGAAAACTACACACCCCCCTAATGCAGAAACATCATAGCAATAATCACCCACCCTCAGGGTCTCCAGGAGACCACAAGGGCTGCAGATAAAAGTCTGGATGTGTTAGGTTTGACCCTTTCGAAGAGTTTTACACAGGCTCCTAAAGAGAAGATCAGCTGTGGCCGTTTGTAGCCATTTCCTTTGTCGAAAAACTAAGATCGCAGTGAATGTATTAGCCAAGAGGTCTAAAGCCCTGTTGTACTGCAGGCCACTGTCTTCCTTGTTTGACTAGAGACTTGGAGTTTGAGAACAGTGGTTCTTTGGTTTGGATACATTTTTTGTTCTTGATTTGGATGTGTGTGTTTCATGCGTGGTTAATATAGCATATTTTCAATATAAATGTCAAAAATTTTGAAATAGGAAAGAACTCTCTATATATTAATGTACTTATACACACACTTCAAGATTATGCATTTATTAACAGATACATGAAATAAATTCCATGTGCATATGCACATATGCACACAGAGCGTGCACACACACAGCATGCACACAGCGTGGAGTGAGAGGCATGGGGCAGTGTGGAAGAGTTTTAACATCAAACAGACCTGAAATGAGTATTAAAGGCCCCCTTTATTTTTAAACTTTTACTAAAACAAGATGGATTTCCCTATGTTATATAATGGTGAATTTTAGGCATAAATAACGTTTTTTGAGTGTTGCATAATTGTACGTATTAATGTAATGTAACTGTGGTTAACGAAGAATTCATCAAGGATATCACTGTTTTGTGGCATTTTTTTTTTCCTCCTCTAATCTTTGGACTTGTGAAATAATTTCACTATGAAATAAATGTTGGTTCTTGTCATATTCTAAGGGAGATTGATGTAAGTGGCTCCACTCCAGCTTACAGAAGGTAAACCACGACCTTTTTGCGTTCTCTGAAAACGCTTGTCTTCCGATGCCTCTGTTTCTAAGACTGACAAGCACTCTGGGGGCACTGTGACGCCTGCTTCTAGCGGCAGAGTTGCTGCAGCTCCTGTCCTGGCTGTGAACATTGTTCTCTCTCTGGTGTCTCTATGTTCATAACTACAGAGACTTCAGCTCTATTCCATTTCATATTTGTGCTGAATAATCATTCCATTTTATGGGAGAAAACACAAGATGTAAAAGCAACAAGTGACCCATCCTTTGAAGCTTACAAGAAGAGAAACATTAATCTATTTCACGTCTTGAAAACAGATCAGTTTTATTTTGCTCAAAAAGGGCACATGTACATTTTTGATCTAGGTCTTAGAAACGTAGAGTTTCAGAGGATCAGCATTATACACACTGTCACACACACACACACTTAAAATTCAGATGAGGAACAAGATAGGAATGAGGTTTTGTTAGGGACGCAGAGCACCTAAAACCAAAGGATATCGACAGTAACAAAGCTGTTTTTACTGTAGTGCTGACTGAACACTCATGCTGGTGTCTTCATGTGGACCATGGCTTTCTTGTATTTCTTTGCAGTTTAATAAATGACTTCATATCTCAGGTTACCTTTCCACATCTCCTGGAATATATGTTTATGTCCTTAAAGTTTCAGTGTCGTCACTTTAGTAGCTTTAGTTTGAGTTTTTAAATGTTTGGTAATATTCCAACAAATATTTTTTAAGACATTATGAAACCTTATGAAGTGCCATATATTACAAGTGAGATAAAACAGCAAGCAAAAGAAGGTTTGCAGAAGGTTTTTAAGTGGCGAAGTGCGGGCCTGCCCATTTTGGTGTCTCCTTGGTGGTTACTCCTGAGAAGGGCCTGGAGGAAGAGCAACTGAGGCCTAATCTACAGGCAACTGCCAAATTGTTTCAGTTGACGTTTTTCCCTCTCATGTTTGACTATAATAAATAGGTAGTTGCCAGTGGAGCCTTCAGCCAACCACCTGGTAATAAACTGTTAAAAATGGTGCAAACCCTAGGTCACAGGTGTGGGGGCCATTTGTCTTGCCTGTTAACAGGCCTGGCCTTAATTCTTTTCTCCCATGGCCATTTCTGCCTTTGGGGAACTCACAATTCCTGTTGACTAAAAGAGCACCCTTTTCCACCACAAGCCTGACAAATCAGACGTCCACATAATTTCTGAACTCGTTTTGGTTAGGACAGGAAGCACAGGCTCCCTTCCTGTCTGTGTTTTCCTAAGAGAAAACGGTCTTCCCTCCTTTTTTGCATATTTGGCAAGTGGTTCCACCTTTCTCTGCACCCTGGTGGAGTGTGAAGGCAGCAGAGGAACCTTTTGGAGGAGGAAGAGGACACAGAGGCCCTGTAGCCAGGCACCAAGATCCCTCCCAGGTGGCTGGGTCTGAGGGGAACTCCGAGCAGCCCTAGGTCCTCAAAGTCTGGATTTGTGTGGAAAAGGCAGCTCTCACTTGGCCTTGGCGAGGCCTCGGTTGGTTGGTGAGTGCCACACGGTTTCTTTGTGTGCTTGCATGGATTGGAATAGCCATTGTGTTCTTCCGTCTTCCCTGCTGGTGTTTCCACAGTGGGTGGCCTGAGCCCAGAGCAGCTCCCCATATCCCTGTGCAGGCCACCTGTCTCGGGTGATGGAGAGCATCATTATGCTCCGTCTGAACGCTCTGCTTTCGGATGGCCCCATGCTCCACCTCCTGATAGCTCGTGGCGCGGGGCCACGGCTTAACAAATGGCTGAAAATGGGTCCTAATTAGTGGAAAAGTGCTTTCTTCATATTTTCTCACTCGAGTGTGCAGTGATTCATTTTTCTTCTGCAATCAGCTCACTGCTAAAGTAAATCTGACTCTCTTCCCGCCATTGCACACCAAAAGTTAACTCTAATGGGTAGGAGGTTAGGTTTGTTGAGAGAGCAATGCAGTAAAAAGAGGGGATCCAATGTGGTCTTGTCTGTCTGGTCTTCCTTTCTTCGTTTTTTCCTCCCTTGTCTTCTCTGTCATTCCCTTCCCTCCATTTGCCTTGCCTTTCCTGTCCTTCCCTTCCCTTCCTTCCCCTCTTTCTTTCTATAATTGGTGGGGGGTTTGCACAGACTGCCAAAACACTAAGAACTGTGTAAAGTGTTTTTGAATGGCCTTACACATATTGAAGTAGATTTTTATGCTCCATTTTTGAGATCACACACTAAAATCTATACCTTTAAAGCATTTTCTGTTAGTTTGAAACTATTTGAAAATGAACAATGTGGTTTAGATTAGAGTCCTGTTCTGAAGCTAGGAGTTCCACTATGAATATTGATTTATCAGTTTTTGACAAATTTTTGTTGTTATACCAGATTTTCACTGGCAAACCTAGAGCAAATAAAATTCCACATAAGATACTTCCCTAGACCTAATGGGAAAAATGTTTAATTTAGAGTCTTTAGGAGAAATGAGAATGAGGAATTGACCTTTTGTAAGCTTACTTCTGAGGCACTCTGAAGTGTGTTCCAGTGCTTTTAATGGAAACTAGAGAGAGCCAGCAACCCCCTAGTGTGAGCCCCACTTTTAACCGGAAAAAGTGACCTTTTCCTCCTCCTTTGTGCTGAGTTTTGCGTAGGGCAGAAAATTAAGCTGATATTCAAAGAGATTCACTGCAAAAACATATTGATAAATCGTATATTCTATTTCATTAAATTAAAACCATACTGCTAATTATCTCAGGTTGTTAAACATAAGGCAATTAATTATCATTTTAAAAGTTGGTAGGAAGTTGTGAGTACTTTTGCAGTATGAGTGTTTTCCCGCTTTAGTATGAGGTTGTGTATGTTTGCTTGAATTTACAGAATTTTCACTTTAAGAGCAGACAATGTTTTGTTAAAGAAATGAAATTTGCTAAAAAGGAGCATGTAAAGTGAAACATTAAAAATAAATAATTTCAACTTACTTAAGAGCTGCAGAAAAATCTGATTGCTGTGTTTAAAATGAATTTTCCCACATTTCGCTCTCTTATGGACAGGAGCATTTTCTGTCAGGTTATAAATAAAGACATGCCCATTTTTTGTACCCCCACAAATGAGGAAGTTGTAAGCTCTCTGAGGTTTTACTGATGAGCCCCCTCCCCCTGGGTTTGCATGAAGAGATCATAGGCCACAAATAAAGGACTACAAAATGGGGTCTAAACTATCCTGGTGGGGCCTGATACCCACGTTTCGCATGGACCTTACGATGTGATGAATGGTTTTGGCATGAGTGTCTTAAGAATGCTTCCAGATTCGGGTTACAGGACAGCCAGCGCTGAGCTCCCTATTGCAGAACAAAGTAGGAATCTAGAACTTTCTTGCTAACAGGATCCAGCTAAAACACCAAGTTAGATTCTTAAATGATGTTCTTTTCTGTCATTATTTGATTGTTGTCAGTAGCAGTAATTGTTACCAAGCCATTGATGCTTCTATTCTTCCCTTTGCCCTTCTGAGACACAGCTCATTTTGACTTCAGTGGAACCCCTCGAAGGTGGGGTGATGAGCAAGGTGAATTTTCAAAGTAAAGCTACTAAGAGACCAAACTACAATTTAAGGAACCTGATTTTTGAATCAAATTCCATATACTGTGGGTATAGTTCAACATAGATTAATTTCTTATAGTTATTATGAAAAAAATCTCATCTTGATGATAGCTGATAATTTTGTGGGTGTCGTAAACAAAACAGAGGTCAGAATTCAGTCCCTTGGGGAAAATTTCCAATTAGTAGGAAACCAAGTGGCCTACCTTAGTTTGAAGACACCCATCAGGATGTCTGCACCTTTTCATCCTCTCTGGAGGAAAGACTAAATACCCATTATTGTATATAGGTCAGGCCAAAGCAGCCTTTTATATTGCAAGGAATAAGAGGTAAATAGATATATGTGCAACAATGAATCCCCTAATGTGTTTACTCTAGAACACATGTTCTTTCTGTATTTATATGTAGATTTTGTAGATCTTGTCTTACCACCTGCTAATGGTAGATACTGTATCTAAATAAGTTGAGGAAAATTTATAGTACCTAGGAATGTGTCCTCAGTGGGCCAATCAATCAATCATGACTTCAGGTTATTTTTAATAAATATACACGTATGGGTTCATAAACAATGGGATGTTCTTGTGAAGATCTAAATAATTTTACTTCTTTGGGACTAAATAAAATATAGCTTTTGCCAAATAAACTCACACAAGCACTTATTTTAATAGAAGTCAAATGGCTTTGCAGAAACTTCAGTTTTACAGGTGCATTGTTTGAAATGTTACGGGTATACAAGTGGATTTCTCTATTATGTACAGTGTTAAGTTTGAGTTTCAAAATGTCCACCTGAAATGATTTACTTGTACGTTAAGATAATTTAACTGCTAAGAAGGCAAGATAAAGCATTCTTTGTGACACCATATGGCCTTGCTGAGGGAAAAACTTACTGTTATAAGTTTGTGTTTATCTCTCTTTTTAAAAAAAAATGAAGAAAAAAACGTTTAAAATAATGGGAACACAGCAGTTCCTGGGGTCCTCTGTCTCTTTATCTTATTATAGTAAATTACCAAAAAAATAATGACCTGGGGCATGTCTGTGTGGACCCTTCTTTTAGAGGCAGTTTCTGTGTTTTGTAAAGCTGTAGGTTCTATTTTCATTGCACTTCATATTGCTGCACAGCTCCTGACCATGCATGAAGGTCCTCTGAAATCGGTAAGAGGGCAGAAGAAAATGATTCTAAACTTAGATTTTTTTAACTTAAGTGATGAAGTGTGAAACGCCATTTATATTTGAGGAAGCTACCTAGGAAGTGGCTCATGTCGATGGCCCAAATCAGAAGAGGGCCTGTAAAAGCTTCTATCAATTTTGACTGTGTATGCTTCTACCATGGCGGCTCAATAAACAGCAGTATTAGTTTAAGAGTGGATGGTACAGTAGTATAGACGGGAAGCCTCTCCTCTCCGTGTGAACCGTGCACCCCTATGAGAGGGTAGAGACAATACAATATGCCTGTAACGTCAGGACAGACAGTCATGGCCAGCTTGAACTCCAGCCCTGGGCTTCTTGCAGCAACAAACGTGAACACAGAGGACTGTCTCCAACTCCACTTTCTCTATTTTTAAAACAACTTTTTGAATACAGTATCTGCCATCTTTTCTTATACCTCACTTTGAAACAGGTGGCTCCACTGTGGCATTTAAAATGTTCTGTTTCTTTTCCCTCTGTATCAAATACCTCTTTACCAAGAAAACATTCAAACAGCATAGTTTTTAACTGTATTTTGAAAGGTTTCCTTAGTTCCCTTTGACCCTTCCTCTTTTGCATATCAGTTCCTGGCCATAAAAATAAAAAATGCTAGGACAGAATTGCACATCTGAGCTGATTTGCCCTCAAAAAGTTTCACAGTGGAACAAACCGCAGGAGGAGTTTTCTGTGGCTCAGTTAAATGTCGGGGGAGGGTGGTGTGAAAGCCAAATTGGATTCCTGCTTTCCTGTTTAAATCTTGTTTTTCATTGTTATTTGCACCAGCAATACTCTGTGGAATAATCATGAAAATGTGTAGATTGGCAGCTAATTTTTGAAAAATGAAAAGAATCAGAAATGAAATAAGAGTGCTCGGAAGTTTTTATGTTCTCTCAACCTGTTTTGTCAAATTGTTACGAAAACCTATAAGGTCTCTTTGACTAGATACAAAGACTTTGCACATTGCCTTAGCTTTCTCTTGAAGCATTTCCTTTTTTAAAATACAGTGTAATTCACAGTGATATGATAGATTTGCAAAAGTAAAATCTACCAGTCTGAAGATGAAAGGACTTGTCTCTTAGCAGGAATAATGGGTTTTATTAAAGAGGTCTGTGACCTAAGGCATTTTAAATAAATTACAGGCTTGGTCCCTGTCTCCCCCATGTATCTACTCCCTTCAATATAAGCATCATTGAGTATTTAAGGAAATAACCCCAAATGTAACTCTAGTGTAGCTTCACTTGTCAGGGAGGAAAAAGTAAATAGCATACATTTGGCCAAATAACCAGAACTTTACTGTAGAAGTTTTATGATGAAATTTGCCTTTAGTGCAGAGTATTACAAAGATCATGTTTAGTTTCTAGCAGTATATAAGTAGCATCCATCCTTATCTGTCATGCATTTGGAGTGTGCGACCCCTGCACTGGGCTGCAACATTCTGATGGGCAAGAGTGCTAGGGAGAAAGAGGCATCACCATCAGACTGCACGGGTTCAAGTGTCAGCTCTGTGGTTGATTAGCTGTGTGACCTGGGGAAAGCTATTTCTCTTAGCCTTGGTTCTCTCATCTATAAAATGGAGATAATGATGCAGATGCCTTGGGTTTAATTGGGAGAGTTAAAGACACATTTACATATTTAGCAAGTAGGTGTTGAATTCTAGCTCTACATTGGACACTATGCCAGGTGCTCAAATAAACAAGTGGACAAGACAGACAACACCCATGGTCTTATGAGGCTTAACCATTTGCCTCTTCAATGCCAGAAACTTAGTAGGTTGATTAGATAAAGCCAGTGAGTACCAGTATCCTTTTCTTTGCAGCCTTTTCCTGGCACACTAAAAATACTCAGTACATATGAAATATCACTGGACAAAGAATCCCCCTTAGAGTACCAGTGGAGAAGGAAGGCATTTGCTTAAAAGCAAACCAACAGAAAGACATTGTAAGGCAGTTGTTTAAGTCTCAGAGACCTATAATTTTTTTCTTTTTTCTTTTTTTTCATCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCACAATCTCAGCTCACTGCAAGCTCCACCTTCCGGGTTCATGCCATTCTTCTGCCTCAGCCTCCCAAGTAGCAGAGACTACAGGCGCCCGCCACCACACCTGGCTAATTTTTTGTATTTTTAGTGGAGACGGGGTTTCGCCGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTCATGATCCGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACTGGCATGAGCCACCACGCCCGGCAACTACAATTGTTCTTAAAGCTTGTAGAATTACTGTGTGCTACCAACAGACAGGCTAATTTTGAGTGACCCTCAGTACTTTGTACAGTTAATTTGGCACGCTGTGTACTTAGTGGCTTTTTAACAGCTATAAATTTGGGCTGCTAGAAAAGTAGTAAAGTTGTGATTCTTGACAGGCATCTATCTGCATTTTCATTTTTACTTCATTTGTCTAGACTCAGCTTGTCAGAATTATGGAAGAGACTCCTTGTGTCAGGGCAAGCACTGTGAAGAGAGGTATTCACTGTCAGAAAAGAGAGGGGAGCTGGAGGCAGCTCAGAGGCCTGAGACCCGCCTCCACAGGAGCCCCAGCAGGTTCGGTGGAGCTCTGGCCACACTCTCCTTTGGGATGCTGAAGTCAGAATGAGTTCACTTCCCAGCCAGTCTTGCCAAGGCTCCTCACCTGGAAGCAGCAACTGCCCAGGGCTGTTGGATGTTTCTCCCCAGGGGACAGCCAGGTCCCAGTCCCGCCTCGGTGTGGAAGGAGGAAAGGCAGGGTCCAGGAAGCTGTTTCAGGACAGGCCCAAGGTCCCCCAGGGATGCCTTTCAGGGTCAGCGGAGGCTGTAAATCAGCAGGGCCCACACGGCCTGGAAGAGGCCCCTGTGCTGTCGGCTTGCCCGGCTTGCCCGGCTCCTAGTCCGGCTTCTGCTCCTCCTTTGTAAAGTTATGGATATGCTAATAGTTTCCAACTGAGACTAGGAAAGTAAGTCCTACTTGACACTGTTTGGTCAGAAAGAGGGAGAGAAAGGAGAAGGACAGAGAGAGACTGAGAGAGAGACAGTCTCAGACAAAGGGAGACGGAGGGAGGGAGGGAGAGACAGAGAAAGAGATGGGAGGTAGGTGTGGGAGGAGGGAGAGATGCAGAAGGCAGAGGAAAGACAGACAGAGATTTAGACCTCCCAAGTCAGTGAGCAGTCCAGAGTTGGAGTGGAGGGTGCCTGGTGGCTTGTGACTGCAGACTCCACTCCCCGCTCCTAGAGGCACAGCCATGGACAGCTTCTGTCACGTTGGCCCTGCACTTATCTCTGCATCTATTTCCCCTTGTGCAAGATTCAGAACTGCATGCTCCAAAAAAACAATAAAAGCATTCATGTTCATAAGAATTGCACAGGTAAAAGGTAGTTTGCTGATATTGTTGTATTTTTTACTATCGCTTCTTTTAGGTCTTGCCTGAAATTGTTTGGGTTTCCCAGGCAAAGTAGAAAACTGCGGTACGTTTCTGTGAAATAATTATTCCTTCTGGCATCTCCCTTTACAGACCTACTGATCTTGATTTTTCATTTAGGTGAAAGTTTGTGAAAACATGCCATTAGCTTGCTTTGTGATTAACTCCTTTTACTGAATGTGAGCTCCTTTTAAATTGAGGCCATATCAAGCTTAAATTCCATATTTTACCCGGCACTCTGCATTTCTTCCATGTGGGAGAGGAGGGGCTCAGTAAGTGCTTTGTAAAATACACAGCCGAAGTGATGCACGTGCTAACAAAGGAGTGTGACAGGACTTAAGTGCCCTTCTAGACACTTCAGGCTCCCCTTTGTAAGCTGTCTTGGAAGAGGCCACATTTCCTTTCCCTCAAACAGTTTCTCATTGTTTGATTATTCTTTTAGCCTTTCTCTGGAAGCAAAGCCACTTTTACGAGAAAGTCACTGCTTTTTCATCTCAAGAGATGCAAGTTTGGAGTTTGGGGAAGTTTTCAGGTGCCCGTCAAGTCATCCTTTATGATGTCAGACGAGTCAGGCCACAGAATTCACAGGGCTCAGTGCAGACCGAAAACTTGAGGCCTCTTGTTCAGAAATTATTAAAAATTTTGGTGAACATCACCCCAAGCAAAGAGATCCCCTAAGCACCAGCCCCCAAGCAACTGCACTCATAAGCCCATGAAGCCCCCTGCTGTCAGAAACAATGTGGTTGAAATTGTGTATGCACTTGGAAGTGAGATGGATTGCAAAACACAGGTCTCCATGCTGGGGCAGGAGTGGTGATAGGGCATGGAGTGGAAATGTCCAGCAGGCCCACGTGCGAAAATGCAGAGCTCTCTGGCTCTTGCAGACTTGGCTGCTGACAATAGACGCGCTCCAGGAAGGTGCTCGCTGTGGTGTGATCTGCTGCCCACCCCTAGCTCCCTCCAGGAGACTGGTGCGGGGACTGTTTGCAAATGACTGCAAAAGTAAGAAGGTTCCCACAGAGCAGAGCTTGATTTGGGGACCAGCCGAGGGCAGTTTGTCAGGATTCCGGCTTGAAACTGTTCTCACATCTCACCGCCTGAAAGGACGAGTGTGTCCAGAGGACTTAGCATTGATCACCTCTGTCTCCATGCAGCAAACTCAGAGGCTCAGCCCGCATTCCACTGGAAGGGCGTTTGCCAGTGGTGTTGGTTGGAAGAGCCTTGACTTTGCCTTAGGAAACATCTTTTTTTAAGAATTGAAAATAACTTGAGTATGCAACAGTAGGGCATTTGTTATATAAATTAGTTGACTAGTGTGTAGCCAGTAAAATGATGATGGTGGTGTGTATTTGTTAAATAAAAAGATATGTGTGGTATTAAATTAAAAAATATTTTAAAACAACATATTTGTAATCTGTTTAGTGTCCTCTTTTTGTAAAAAGTACAGAAATAAATATACAGAAAAAATAGTAGTCCTAAGTGGTAGAAATTATGAGCATTTTCTTGCCTTTAAAAAAAGTTGTAAAAGATTGTATCATTTATGTAGCAAAAAGTTTTAAGTCAGCATTCTAAAAATTTCGTGTTGTTATAGTTGCTGTGACAAGATTTAACTTCTGTATGCTTCACCAATCAATACAGAGGTATTTAAGACCCGGTGTGTGATAGGCCGCGCTAAAATACTATACACATCTTCAGAAAACTAGAGAACTAACTTCTAACTTCCTATATTAGTGTGGCACGGCTGTTACAAAGATTTTTCTCATTTGAGTCTATCTTGCTTCTTTATCATTGTTTTGACAGTTTCAGAAGAATCGTGGCTTTTCCCCTTTTTTACAGTAAAGGTACCTGAGACTCTTGACGTATTGCTTTTTGGAAATGCTTGTGCTGGTCACATGCTTGCATCTGGGCTAGTGTGTCTGGCTTCCGTGTGCTGGTGGATGCTTACTCTGTTTTCTGAAATACTTTTTCTGTACAGTGGCCACTAGCTGTACTCCTAAGCCACACACCTACCTTGAAAATTCATGTCACTTTTAGAAATAGATAAAAGCCCCTCCCATCCAGAAAAAGTGACTATCATGTATATCCTCATCATGACTAATACTGATATTCCTGAAATTGAAAATACATATTCCATATGTACCATAAAAGGTATTAAAGATATATGGAGTGATAGATATATTATATATAACACTTCTACCCTCACAGTTTTCAGCCTAATTGAGAGGGTAAGATCCCTGAATCATCCATCAGTTTTTCAGGTCTCTGCTGAAAGCAGGCCACAGCTCAGATCCACACATCTGAACCAGAGACAGAGGTGGCCAAAAATAAAAAGGGGGACAGGGGGACAACCTGGTTTAGAGTCAACAAATAGACTGCATTTTCTGGTTAGTGAAGGAGCTCTCCTGAAAGTCATATACCAGAGCATAAATGAGCAGATTTCCTTGAGGTCACCTTCTGCTGGCCATAGCTTTCTTATCTGTGGAGCTGCCAGCTGTCATCCACTTTGGGGCACCTGAGACTGCCGAGCGGCAGGCCAGGACCCAAGTGCGAAAACACAGAACACCTTTTTGTTTCTACTCCACTGATGCTGGGGTTCTCTCCCTGGTGTTTGTGGCTCGTAGTACACTCTGTGGAACATTCACTATGGTCATCGAAGGGCAGCATCTTCCCAGTTGTTTCTTTCTTTTCTTTTTTTTTTTTAATTTAAACCGATCTGAGAAGCCAGCCATCTGTCAGCAAAACAGGAAGGCTCGGGCTGTCTCCTGGGCTCGTTTTGCTGCCGTAGTGAGCGTCACTTCTCCCCGTGTAAGAGTGCTGGTGAAGGCTGAGGCAAGGGCCCAGAAAGATTGAGGGACAAAGACAGGAGCGCCCGCATTGCCCATCTGCCAGGCTGGAGGTGTATTCATTATTGATGGAGGTAGTGCAGTTGCTGCTCAGATATGCAGCCCTGCCTGGGTAAATGAGACATTCTTCAGCAAATTGCTTCGTTTTTTGATTGCTGATTGTACGCGTGTCACCAAGCTGACTCAAGGTTCATCGATGCATGCTCAGTAAATTAGAAAGAACATAACTATGGATCAGCCAAGAGAATGAATTCTGTGCCTACAATGACCCAGGGCCATTTAATTTTCTGCTTAATTTTGTTGCAGTCAGTTTGCATTTTGGGTTATTATGCAGTAGGAAATTAACAATAAATAACAAATTTGGTCCTCCTGTGCTTGTAATGATATTTTTATAAATCTTTGTAATGCTGTTTTTAAAAGGATCAAGGTCTGTGCCAGTCTGATACTCCAGCAAGTATGTGAGGAGGAAAATGCATTATTCTTGCTAGATAACCTTGTTGTTAAATAGCATAGGGGTTCTTTATCTCTCTCTCTTTCTCATATCTTATTAGTATTTTTGCTTTAAACTAAAATCCCTTCCTCTCTTTCTCAGATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGTGAGACCTTATGAGATAGCTGTGTGGGAAGTTCATGAGAAAAGCTTCCCTGGGGCCGGAAGTCACAGTGCTTGGTATGCTCATGGGGGAGGAATAGGGGCTATTCTGCAAAAGAAAAGACCATGATGGAATTTGCCTGAGTGTTTCCTTCACCTGTTACAAATTATCTCACTTTGAGCTGAACAGAAAGCCTCCAAGATGAAATTAGTTTTACTGTTAAACTTCAGGAAAAAAAAACGGGAAGAGTTAAATACATTTTTGTACTGTTGGAAGGAAAAATGGCTGATTGGTTTAAAACCCAAACACATGCCAATGATGGTACTTAAAGAGAGAGAGAGAGAGAAGCTTGAAAAACATAATTGTTGGGCACAGTCATGACTGTTTGTTCATTAAGCATGGACACAACATTGCTCCCCTTTGCCATATATCTTTTCAAGCCGTATTGGATATAGCTCTTCTCATCCAGGAGACCCAGGAAGTGGAGAAGTCTGTAGTAGGAAAAGCCTAAGGGTAGGTCACAGACTGTGACCATTTGGCAGCACTGAGGGTGGACGGCGAGCCAGTCCAACAAAACCGCACAGTTCCCCAGTGCATGGACATAGGAAGACAGCTTTCTATCTGGCCCTGTATCCAGAGGCGTCAGCCCCAGTAGCAGCTTTCATGGACTTTGGGGTTTTCGGTATTTCATATTTTTGAGCCTCACAGACTCACAGCCAGCCCCAGAGGCTGACTTATATTTGAGAAAGTTCTCAGTGGCACCTTGCCTTGGCTGAGCGCCCTCGTGTTTTGAAGTTTCTATGGGATTCTACAAGTTGGTGCTCCTGATGAAGACCAGGACCTATGTGTGGCTGCTCCCCTGCTTGGTGGTTTCCCTGGGGAAGGTGCAGGAGAGGATCTTCTGAGTTCCATGGAACTGGAGATAGATCTGCCAATCACAGGCTTCCTTCTCCACCACTCCTCAGCCGCTCTATTCATGTTTCAGATTTTGGACTTAAACTCTCCCAGGTGCAAAGAACAAACAAAAGGCTAGCTTATTTTTCTTTTAGAGTGAGGCTTCGTATTTATTACAATATAATTGCCACATTCTTTGTGTAATTCTCACATTTATATCTTAAATATAATTCTCATGAATGAGAATTATATAATTCTCTTTTTGTATATCATTGAATATTTTCACTTAATTTTTAATTTTTTTAATCGTCACAAAATAATTGTGTACATAGACACAAAATAATTGGGTACATAGTGATGTTGTGATATATACAATGTATAGTAATCGGATCAGGTAAATCAGCATATTCATCATCTCAAACATTTATCGTTTCTTTGTATTAGGAACATTCGACATCTTCCTTCTAGCTATTTGAAACTATATATTATTGTTGACTACAGTCATCCTGCAATGGTGTAGAACACTAGAACTTATTCTTCCTACCTAGCTGTAATTTTGTCTCCTTTAACAAATCTCTCCCTATCTTCCACTCCCCCGACCTTTCCAGCCTCTATTAGCCTCTGTCCTACTTTCTACTTATAATGATGACAGCAGCATTTGTTAGTTTCCACATGTGAGTGAGAACATGTGGCTTTTTAACTTTTAGAATGTGGTATTCAGGCACTTCATGGTACAGTTGGTAAAAGTGAAAATGTGTCCAAAAGTTTGTGATTATCTATATAAACAAAAATGGTATAAATACAAATATCAATTTTGCATTGAAGAACTTACCTTAGAGGTATATTCTCACAAGTGCACAGAGCATTTAAGCATTTGTTCACTGCAGCATTGTTATCAGTATTTTAAAACTATGGTACATCCATGTACTTCCACATACAGCTCTTAAAAATAAGGAGGATATGAATGAACTAGTATGAAAAGAAGTCCAAATACATGTGAAAGTGAGAATAGCATGGTTCTGGATGGTATGCAAAGTATGATCTCGTTCTTTTAAAAGAAAATAAATTACATACACATACATATTTTCTATATGCTTGCCCATAACGTTTAGGAAAATTCTTGGGTGATATTTATTAACCTGGACTTCCTCTTGGAAGACTGATGGTAGAAGGAAGGGGACGAGTTAGGGAAGAGGAGGAGAAGGAAAACTTTGCTTTTCATCTTCTACCTTTTAGCATTATTTGAATTTATTTTCCTTAAGCGTTTACTTTGTTTCGTAAACAAAAAAGCACAAAAACAAAAAACGAGTTAAATGGGAAAAAAAGCAGTTTAGCTCTTTATAGCCTCTCATTTGGCTTCGCCAGCCTCTCACTGCAGCCTCAGAGAGCTGGTCTGGGAAACACTGGTAGATGAGGACTGTAATCCTCACTCATGGAAGAGGATCTCATTCACTGGGTTTGCTGACTGTGACTAGAAGTGATTAGGGTGTCAAAAAACCCAAGCATGTTAAAAATTTCCAGAGGCCAAAAAGATGCTTTCATTGTTCTGCTCTTCTTTTCCTTGTCGCTTTCACTTTGGGTAGCTTCTAAATTGGTATTTTGCATGGTGCATTTAAAGAAAATGAGACCCCTTTGGCCAATGCAGGAGTCTACACTCTGATATTCTAGAGTCAAAGCTGAATGCTGACACCTAGGAATTCATCTCTAGAATGTTTATATAAGGAATAGCCCCTCAGTATTCCGATCTCGTATCTTAGTAACGAAACTAACAAAAGCCTGATTCTCCTCTGGTAGTTTTCTTGTCTTTACCATAATACAAAATAAGTAATTTGTTCTGCACCCTGACTGTTCAAAGGATAGGGTAGCTGGGGGCGGGGACAAGAATGGAGACCTTATTACATAAGACTTCCTGAAAAAGGAAACTCTGTTTTTGTTTGAAATGATTTGGTCTGAAATTTAGTTTGTGTACACTTACCAAAGGGATTCCTATTTCTAAAACACTCATACTGCTTTTGATTCCTGTTAACCTTTGAGCACTCTACGTAATGATGAGAGCACTTAAAGAGTCATGTCACTTTTAGTAAAGAATCAAAGGATACTTTTTCTACTTCTTCGAGTTTGATCTCTGCTTCTCCAGTTAAAACCAGTATTTGTTTTTTTCATTTCTAAAGTTGGAAGAAATGACAGTTAGTTATGGCATAAGGATGTACATTTAACCAAATAGGAGTTGACATTCTTGGTAAGAAATCTTACCAAGATTATGTTATAGATTATAAGAAATCTTAACAAGAATATGTTCCTAAATCATCCTCTTTTCCCATAAAATATTAAAGTATCAGCAATTTCATAGGATTCAACCTAATGTATGCGAAATGCTAGATAAACAGATAAATACTTAATATCTGGCTTTTTTTCAAAGCACTGGGTTATTTGTTCCTTGAGATTTATCCTAAATGTGGGCTATACCCTGGTTTACAGTGTCTCACAGATGTGTAGTAGTAGACACTCCATAAGTGTTTACTGACTTGAATCCACAGGGTACTGAGAAAATGCTACTGATAGACTTGGAGGAGAGCATATCTAAAGCAAGCTACCCTTTCCTTTAGGGCACGTCTCACTAATTCTTTGGGTAAAGCGTATTTTTCTTCCTTTTGTGTTTTTGGCAGTCTTTCCAAAAATACGTGTTATACCTATGCATTATTTTTTGGTTTGGTTTCTAAAGAAAGAGTCAGCCGGTGGGAAAGTGAAGGATGTGGGAACTGAGAGATCTGCATCAGCATCCCACCTCTACCTCCCACGATGGGACCTGAGACAGTTATTTTTGCCTCCTGGACCACTATAGTATCATCTGTAACAGGAGGGACTTGAGCCAGTTGATCTCTAAGGTTCCTCTGGCACCTGTGACCCTAAATAGATATTGGATATTGGTTTAATGCTATTTGTAGTGTGTTTTTTTGGGGATATGGAAACCAGAAGTTTGTTTCCATAAACATAAACATAAACTGTATATATCTAAAGGATATGGAAACCTTTAGATATATATAATCTGCTTACGTAAAGAAGGTTTGTATATATTGCAGTGTCAATGGGAATATTTTATCAAGTTAAGCATAGTAAATCACATTGATTAAATGCTTTGTATTTACCAAACATTACCCAAAGTGTTTTCTCCTTTCAACCTCACAAGGACCCACAGAAGAAAATACAGTTATCATTTCCAACCTGCAGGGAGCTGAGACACAGAGAATTTAAGCAACTGACCGGAAGTCCAACAGGGAGTCAGAGATTGCTCTGGGGTGTGATCCCCACTTGGACCCTAGAGTGGAAGCTTCTCCACTACTTTATAGAGTTGAGATTCTATATTTTGAGCTTGTATTTACCCAGAGAATTATATCCTCTTGGGCAATTGTGTATAATAAAACCTCATGCATTTAGGAGAGGCGGGATGACAGAACTTTGTTGAGTGAATTATAATCTACTTGAGAAATTATTTGCTTACATTTTATAAGCTAATTATACCATATCTCATCCAGTTTTCCCAGAACACTTCTCATAGGTAATGCTTTATTTGAAACATAGGCCATAGGTAAGTTAAGTGTAAATGTGTATTTTTATAATTTAACCAGAAGTTTATTTCATTTTTCTAAATAAGTGAAATTGTATTGCATCTTCTAAATTATTCTATTTAAACACTTGATGTCTTGCTGTCTCCGTCTCTGTGTGTTTGCATGTCATTGTACATGTTCTTAGGAAAAGTGTGGGAGCTTGACGCAATATATACCTTATGTTTCTATGTGCATATAGTTTACCAAATAATACCATAAGTTTACTTAGCATATTAGAATCCATGCACATTATTTTTATTTTATCTTCACCGCAACCCTGTGGGATAGACCAAAATCATGCTTTTCAGCCTCCTTTTTCCACTTGAGGAAAGGAGTCTTAAAAAAGGGACCAGTCTCATGTTCCCATTCGTCTTACAACTAATTGGTCAAGCCAGAAAGCCAGAACTATGTCCTGGGTCACTAACTCCTAGTCACTGTGTGTTAGTATTTGAGATGCCTGTTGGCTTGATTTAGTCATTTATTTTTTAGTGTTTTATAATCCTTGCATACTTTTACATTTTAAATGGTTAACCAGGCAAATTGGTTTAAAATCAGTGCATAAAAATACTGTGCCTATCATGATGGGTTTCATGAAGTGATAACTTTTCATCATGGAGATCCTCAGCTGTCACAGAAGATGAGGGGCCCTGGGTACAGAGGCTCACGTGAGGGATGAAAGTCTCAGCAGCCCGGACTTACACTTTGGGGCTTTTAGGCAAATCAGACAACCTCTTAAGAACTATCACTGAGTTCAGGCAAGGCGAGCTTGAATTAACACAGGGCCCTTGGTGGGCATGTGAATATATCTCACTTCACTACCATCCAGTTCTGACTCTTTACTAGATGCCCCTGTACATACCAAGACTGATTTTTTATTCTCCCTTCTCCCCATGTGGTTTCTTCTGCATAGAGAGTTCCTATTGATCAGTCTGACCCATGGTATTTTAGAATTGCGATCCCTACTGTTTCATTATTCCTTTTTCTCCCCCATGTTGAAAAAAATAAATGTCCTGAGATGCAAGATCAGGGACACTGGAGCACTGACATTTAGTTCAGTGCAGGAACTGAAGGCAGATGTAATTCTTAAGAAGCGTACCTGTTATTATGAACCATCCTCAACAAATTGTAGTGGATCTTGTTTTCTCATAGATACAGCAGTTAAATTTTTTAATAAAAGTAACTAAGAGTTATTTGGATGTATTTTAGCATGCACTGAGCGGAAAGTACGACATTTCTTCATTGGGTAAGTCCTGATTCTTTATGATCCTCACTTGGTTCCAGGGCCCCATGCATCTAAGGGTGTCTCAGAGCATCCTGCAGTGCTCCAGCATGATCGCAGGGAAAAGCTATAGGAGGAAAAGAGTCAATAAAGTTTAGTTTCTCAACCTCCCACCTCCACCCCATAATAATGACAGCTGGTTAATCATGAGACGCGTGCACACCCCACACGCCCTGTACATGTTTACTCATTGGGATAGCATGTCAGGCCAGAAGGCTCCATGGTCATTTCTATGAAGGTACTTTAGCAGGTCTTCAAGAAGGCAAGTGGCCTGGGTCCCTGCCTCCCCAAATTGCAAGCTCCCTGCTTTATGTAGGAGACCTATGTGTATATTACAGTTCTGTGTAAGATTATTTTGTTATTCTTACCCCCACACCCACCCCCCAACCCCCCGCTGCCACCAAAAAAAAAAAAAAAAAATTCCTCTGACAACCTTCATAAAGTCCTGGGAGTTTGAACACCATTGCTCTAGGAAGTCATCTTATACAAAAATAAGAGTTGTGAGGTGGTTCATATACCTCCTGCGTTCTCCTATTTGGAGTTTTTCCCCATTTATGAAAGAGGTGAAAACGCTAAGATATTTAGCAATTATTACTTTAAACATTTTCTATTTATAGGCCGGGCGCAGTGGCTCATGCCTGTAATCCCAGCGCTTGGGAGGCCAAGGCAGGCAGATCACGAGGTCAGGAGATCGAGACCATCTTGGCTAACACGGTGAAACCCTGTCTCTACTAAAAAAATACAAAAATTTAGTTGGGCATGGTGGGGGATACCTGTGGTCCCAGCTACTCGGGAGGCTGAGACAGGAGAATGGCTTGAACCTGGGAGGCCGAGCTTGCAGTGAGCCAAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCAAGACTCTGTCTCGAAAAAAAAAAAAAATTCTATTTACAGCAGTGAAAATAGTAGTGACTTAATGCACATTGCCAAGGCTTTAGCATAACATGAACACTTTCACTCAATGTCTCTCTGGCCTTTTGTTTTTCCTTGGGAAATTCTTATAATCCTGCTCCGTCTTTAACTATTCATTTTGTATTGGCTATCCAAATATACCCAATAATGCTCTTTCTGAAAATATGCCAATTGTGGTAATTACAGCTAAGCTGGAATATTAAATTGTGATGTCTGTTTTCCAGAGAATGAAGTAGTATTCCCCAGAGCATAGGCTTGGTGCCTGTGCAGGTTCTATTTTAAATATTCCAGGAAGGGTTGTTTTATATACTGAGGATGATTTTACTGGTCTTGCCAGTCGTCTGAAATGCTGGTATTACTCTTGTGGAAGGTTTATTCAAACAAACAAGGACATTTCACACAATACCTAGTCATGTTTTTCAGACATTTTAATGTTTGGTTCATCATTTGCACACACTCTCAAAAATCTAGGTTTGTCTATGTGTTCATATCATTTTGCCTGTTGCCAGCTCAGTCAGCAGGCACACTCTCCCAGGCTGTTGCTGTTTTGTTAGACTTCTTCAGGACCTTCATCTAAAATGGTCTTCCACACGTAGCTATACTGCATAAGTTCACATCATCTGTTTCTTGCATGTGGGTTGTGTCTCAACTCAAGTTTAAGTTAGATTTGGAAGGGCGGAAACTATAGGAGTTGCAGCTTCAGTGGAGAAAAGAGCATTTCCTACTAGTTATGGCTTCCCAAGGAAGGTTAGATTCCTCAGAGTAGGAGTGATTCCCCAATGCTAGAACCTTTGGTCAAATATAATTCTAATCCAGTCAAAATAAATACAGGTATTCTGTAAAACCCGATTTCATTTTGTAAATCCTACTTTGTATAGTATAAGCAATTTTTGTATTTGTGTGGATTATATTTTATTTTCCTATTTCAAAGAGAAGAATTTGTATTAGCAGACTCCCTTTGCATGCGGAGAGGGGATCATTTTCCCAGTAGGCATGGGGTTCCCTTCCATTCCTTGTCCAGTCTTCTTTTCCCCACTAAGTTAAGTCAAACTAAGCAGCTGGTAAGATATTCCCTGGTTCTTGCAAAGAAAGTGAGCAGATGGCAGAATGTATAGCTCTAAGCAGAATACCTGGTGTGGTATCCTCAAACACAAATTGACAGGAGGGTGTGGTGTGGCAAGCTCATTGTGGGGGTAAATTGGAATAAGCTTACAGGGGGAAGAGTTGACAAAAGATAGGAAGAACCTTAAAAATATAGATGCCTTTTATGCAGTGATAAAATGTCTAGATATTTATACTGTGGTGATTATTAGGAATATGTGCAAAGATTGGCTATTAGGATGTTCATTACAGTGTTGTTTAATAATTATAAAAGGACAGAAAGCAATGTGGACTCAAAAATAGGAAAAGAATTTAAATAAATCCTAGTGTACCCGTTATACATGAAATTATGGAAATATGACCCTGAGCATGGAAATATGTACATGAGAATGTCTAAAAGCTAGTTCATTTTGAAAAACAAAATAATGTCACCTCATATTATTTATAGTATATAAAGATGATTTTAAGAGTGGCAGTGTCTGGGATTATAGGTGATTGTATTTCTTCCCTTTTGCACATCTATGTTCTCTCATTTGTATTGTGTGGGGAGAAGTGACTTTTTTTATAAAAAGAAAAAGGTATATGCATCCCAGCAGAGAAGCACTGGCTCCACCCAGTACCTGCCTCCTCATGCCACCCTCTCAAGCCAAAAGCCGGGGGAAGCCCAGGCACCTTGACCATGACCGCCCGAGACTCACACTTCTTCTTTCTCATCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGG", - 50344378 - 1); - - // the stuff from the genome represents a small part, most of it is brought in through the RNA-edit insertion - var codingRegion = new CodingRegion(50358658, 50367353, 169, 1602, 1434); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344378, 50344383, 13, 17), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344384, 50344518, 19, 154), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 50344519, 50358643, 154, 155), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 50358644, 50358697, 155, 208), - new TranscriptRegion(TranscriptRegionType.Intron, 2, 50358698, 50367233, 208, 209), - new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367353, 209, 328) - }; - - var rnaEdits = new IRnaEdit[] - { - new RnaEdit(1, 0, "GAATTCCGGCGT"), - new RnaEdit(6, 5, "A"), - new RnaEdit(16, 16, "T"), - new RnaEdit(97, 97, "C"), - new RnaEdit(316, 315, - "CCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA") - }; - - string actualCds = - new CodingSequence(genomicSeq, codingRegion, regions, false, 0, rnaEdits).GetCodingSequence(); - - const string expectedCds = - "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGCCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"; - - Assert.Equal(expectedCds, actualCds); - } - - [Fact] - public void GetCodingSequence_RnaEditSnv_StartsUtr() - { - // NM_001135635.1, chr11:65684281-65686531 - var genomicSeq = new SimpleSequence( - "TTTTAAAAAACACTCAAGACACAGACCCAAGCCGGGTTTTATTGAAATGCCAGGAGCAGGCACATGTCAAAGTAGCCAAGGAAGGGGGGACAGTGGTACAGGCTGTGTAAGTTGGCAGGGATGGGCAAGCCTCATGTCCATGGTCCTGGCATCCCCTCTGCCAGGGGATAAGTAGGCACAACTACCCTCCCCTCAAAATGGCATGCTCAGGCCAGTGGGGCCCCTACCCCTGGACCATGAAGGCTCCAAGAAGGGCTGGAAGCACTAAGTTTTCTCTCTCCTGAGGGGGAAGGAAAGAAGGGGAGATGCAGGAGGAAGGGGAGGTATAGCGGGGGATGAGCGTTCCAAGAAGTCTCTCCTTCTAGGTGTCTGCACCCAACTCATGGTGCTGGGCAGTGGAGAGGAGCAGCATTACAAAGGGAGGCTGAAGGCTCATCCCTCAGGGAACCGGAGCCCCCCAGCCTGTGGGGCTTGTGTCAGCCCTGAACAGAGGGCAGAAGTTCAAGGGGACTGAAGATGCAGGTAGTTCCCAAGTGACCTAGGAGTCCCCAGAGCTGGGGGGTGTGGCCTTCATAGGACAAGGAGGAAGACAGGAGGATCCAACCCCAGCATGGAGGGGGGAGTGGGCAGTCTCCCCAATTTGGCCCCCCTAGGTCAGTTCCACGTTGTTGGCACGGTCAAGCACTCGGGAGCCACGGGCACTACCCCCAAGCTGGAAACGGCTCTCATAGAGAGTGGGGCAGAGGTGCCAGCGATTGGCCCGGTAGATGCCCAGGTAGGTGTAGACATCAGGCTTGTAGGTGAGCAGGCACTTAATGCCCGCTGCACGGATGGCTGAATCCGCCTCCAGTACACCCAAGCGGTCCGTGAAGTCGTCCGTGTAAACACAGATGACCTGGCGCCCACCCTCCTTGGCACGTGGGCTCACCTTGGCCACCTGAAGCTGGCCTTCAACCACGGCCCGGGCAATGCCAGCCCAGGCGTGGTCCAGCTTGAAGCCCGGTGCCAGATGCATAAGCCACTTGCCCGAGAGCACGTGGTGGGTGATGGCGAGCTGGCGCAGGGTACCCGGTGTGATGGGCCGCCCACTGGTCTGCAGAGCTTCCCAGGCTGCCTGCAGGCCCTGCACGTCCCCGGAGTTGGGGCTGTAGCCCTGCCCATACACTGCAATCCAGCCCACAGGCTCTGAGTTGGGTGAACCGGGGTCCCCATAGCGGGTAACTTGGGATGGTGGGTACTTGGCCAGCCAGGCATCCAGCTCAGTGGCAGGCGTTGTGCGGGCATCAAACACTAGCCAGGGGTCCATGTCAGCTGCCATGGCCTCTGCAGCCAGGTGCTCGGCGGTGAAGCCATCCTCACGGCCACCTGGAGAGCCCTCCTCTTCCAGCTCCTCACCTGGTTCCATCCTGCTGTGAGGGAACCGAGTCAGGGCAGGGTCTGAGACAATAACTACAGATGCCAGGCACTGGATTAAACTGTGGCCTTGAGTAAGAGTTACTGTCGATGCGCCTCAGTTGCCTCATCTTTACAATGGGATAACAACTGTTCCTGTCCCGTAGATCTGCTATGAAAATTAGATGCCTGAGGAGTCAGCGCTCCAGAAGGGTTGCTGCAGTTATTACTATTCTCCTTGACTTACAGAAAAGGAAACTGAGGCTGAGAAAAAGGACTTGCCCAAGGTCACACCTGCAGTGCGTGGCAGGGCCGAAGGGTGAATCCAGGCGTGGGAGCAACCAGCCCCAGCTACACCTCCCGGCCCTGCCAAGGCCCCCTTTTCCTGGCAGGTATCCGGTGCGCTGGCATTTAATAGAGGAACGCAAAGAAGCGCACGTTCGCGCAGCTCCCGAGGCCGGCTCTGTAAGGCCAGGCCTCCCAGGCAGGCGTTATCGGGCCCACTTACAGACGAGGACGCTGAAGTCCAGAGAGGTTACAGGCCGTTCCGAGGCCAATGGGGCGGTTCCCAGACTCGAACCAGGGCTTGTTAGAGCCTGCAGGAGAGCCAGGCTCCGGCCGTGCCGCGCCCGCCGCCATTAACGCCCACGGGCCCGAGCTGTGCTCCCGCCCCGGCCCTGCCCTGCCCCTCCCGCCGCCCGCAGTCACCTCCGGCCTTCGCTGCGTTCGACGCCGGCCCAGCCCCGGGCCCGGCTCCGCTCCTGCCGTGGCTCCGCGCCACCGCCACCGCGCCCCACCCCCGCCACGGCCGCCGCCGCCGCCGCCGCCATCTTAGCGCCGCGCCACCTCAACAACAACT", - 65684281 - 1); - - // the stuff from the genome represents a small part, most of it is brought in through the RNA-edit insertion - var codingRegion = new CodingRegion(65684930, 65686502, 30, 911, 882); - - var regions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 65684281, 65685689, 152, 1560), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 65685690, 65686380, 151, 152), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 65686381, 65686531, 1, 151) - }; - - var rnaEdits = new IRnaEdit[] - { - new RnaEdit(912, 912, "A"), - new RnaEdit(986, 986, "C"), - new RnaEdit(1561, 1560, "AAAAAAAAAAA") - }; - - string actualCds = - new CodingSequence(genomicSeq, codingRegion, regions, true, 0, rnaEdits).GetCodingSequence(); - - const string expectedCds = - "ATGGCGGCGGCGGCGGCGGCGGCCGTGGCGGGGGTGGGGCGCGGTGGCGGTGGCGCGGAGCCACGGCAGGAGCGGAGCCGGGCCCGGGGCTGGGCCGGCGTCGAACGCAGCGAAGGCCGGAGCAGGATGGAACCAGGTGAGGAGCTGGAAGAGGAGGGCTCTCCAGGTGGCCGTGAGGATGGCTTCACCGCCGAGCACCTGGCTGCAGAGGCCATGGCAGCTGACATGGACCCCTGGCTAGTGTTTGATGCCCGCACAACGCCTGCCACTGAGCTGGATGCCTGGCTGGCCAAGTACCCACCATCCCAAGTTACCCGCTATGGGGACCCCGGTTCACCCAACTCAGAGCCTGTGGGCTGGATTGCAGTGTATGGGCAGGGCTACAGCCCCAACTCCGGGGACGTGCAGGGCCTGCAGGCAGCCTGGGAAGCTCTGCAGACCAGTGGGCGGCCCATCACACCGGGTACCCTGCGCCAGCTCGCCATCACCCACCACGTGCTCTCGGGCAAGTGGCTTATGCATCTGGCACCGGGCTTCAAGCTGGACCACGCCTGGGCTGGCATTGCCCGGGCCGTGGTTGAAGGCCAGCTTCAGGTGGCCAAGGTGAGCCCACGTGCCAAGGAGGGTGGGCGCCAGGTCATCTGTGTTTACACGGACGACTTCACGGACCGCTTGGGTGTACTGGAGGCGGATTCAGCCATCCGTGCAGCGGGCATTAAGTGCCTGCTCACCTACAAGCCTGATGTCTACACCTACCTGGGCATCTACCGGGCCAATCGCTGGCACCTCTGCCCCACTCTCTATGAGAGCCGTTTCCAGCTTGGGGGTAGTGCCCGTGGCTCCCGAGTGCTTGACCGTGCCAACAACGTGGAACTGACCTAG"; - - Assert.Equal(expectedCds, actualCds); - } - - [Fact] - public void GetCodingSequence_NonZeroStartExonPhase_CdsBeforeFirstExon() - { - // NM_001220775.1, chr7: - var genomicSeq = new SimpleSequence( - "ACTTTAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGTAAGAGTTAAATGTTTGCTGTCTCTTAAAAAAAAACTATGTGGGTGTTTTAGATGCAAGTAGAAATGAGTTGAGGGTGGAAGAAAGGGAAAAAAATCTTATTTTTTCAAAAGGAAAAATTGGTAAGCTTAACATTCCTTAAATATCTTAGAATTTTTTCCAATAAGTATCTTAAAAATAACAAACCTCCCATCAGTTTTTCCTAGATTTGATTTTGCAGCATCTGGGGCCTGCCCTGTGATCTGCCTGTGGACATCGCTCTTAGGGGCGGCTGCACCAGCGTGCACAGGGTGGAGAGTTTGGGCCTGGCTCGTCCGGGGGACACCACACTGCAGGACACTCCAGGCCTGGCCGGCTTCTCAGAGCTTCAGATCCTCATTTTTCATATGAAGCTCCTAATGCTCCCCTTATGGGGGACTCTGAAGGGTTAATGGGAGGAATCATACAGTGACTGACCCCTGAGAAGTGTCCAGTGAAGACAGGGCTTAGCTAGGATTGCTGTTTTGCCTAATGCTCTGCGGGATTAAAAAAAAAGAAGAAGAAGAACAAGACCATTCGTCTCTCTAGGAGCATTGCCCAGAGTAGGTATTAGACACACCAACACCACCATCCAGCCAGACGCTGCAGGGACAGTGAGCCAGGGTCCGAGTGGAAAGGCGCTAGGCTTGGGAACCAGCTCAGAGTCAATACAGAGCCACCGCCACTCACCAACTCTGTCAGCTTAGTAAAATGGCTCTGCCCCTAGAGCCCTGGTTCCATCCTTTAGTATCTCACAGGGTGATTGTGAATATCCCATGACTCCAAGATTGAGAAAACGTTTAGAATCCCTCGGTGTGAAGGTTAACTCTGTCCGGAAAGAGGACCAGTAAAAGCTTCATGAGGCTGAGATGCACTTTGGAAGAGGAATAGAGTTTCAGCACATTCTAGGTGTTGGAGGAATGGGGGAATCTAGGCAGATGTTTAAAATCAATGAGAAACCAGAATGCTGACCATGAGGGTTGGAGTGGGGGCCTAAGGACATGACGGAGGAGCAGGGTGTGTTCCCAGCTTAACTCAGGTACCCATGGGGAAGCAGGAAAAGTGAAGGTGTCCTAGGCAGCTCTGCCACAGGATGAATGGCTTCAGATGCCAGGTGAGCGAGGGACCCTTCATTCAGTCAGCAGGAAAGAAGCACTGGCATATTTTTTATGAGAACAAAGGCTAGGATAGTAAAGACAGCAAGTACCAAAAAATGACTGGAAAAGGGAGACTGTGGAGGCAGTGGCAGCAGGCATGGAAAGAAGGGCTTGTGAAGGGGAAGGGGTGGTGTCAGAGGAACATAGGGCTGGGGGCAGGGATTAGGTGAGGGAAACCATGAGTCACACTGATTCTAGAGTAGTGTGCCCTTGATGAAAAGGATAACACCAGGTTCTAGGAAAAGATGGGGTTCTGTTTTTGACGTGTTGATTTTCAAGGACTTCTGGTGTTTGTGACACATGGGGAAATTGTGGTGGGAGAGAGGTGGGGCCAGAACAGGGGCTGGTGAGGCCAAGGGTCCCAGAGGGCACCTGTTGACCTGCAGGATGACATGAAGGGGGAAGGACAGAGGCAAGGCCAAGTCCTGGGCACCAGCCTCCCTCTTGCAGCTTCAAATAGGGCTCCATTTTGACCTTTTGATTAATTAGAGGTTTGTCATAGGTTGGGGGTTGAGAGGAGCAAGGGAGAGAAGGATTCAGTGTACAAAAAGAATGAAAGCCACTGGCTGAGCCAGTGGGGAGTTGTCCACACACACATGAGCCTTTGGACCATGAGAACGAGGGAGGCCTTGCCTTCCTGAACGGAGTAGGAGTGAGGTCCTGTGCTGAGCGTAAGCAGTGGGATTCCCACAGCACTGGGCACAGAGCCCACGGGCTGCCTCCTGAGCAGCCAGCATCTGCCTGGGGTGGACACAGTGACAGAGAGATGGGTGGTGACTGGGGTATGGGCAGAGATAAGGCAGCAAGTGTGTGCAAGGGGAGTGAAGGGTTACTGACCTTAAGAAGCAGGGATGGCGTCCTCTGTCAGGTGAGGAGCCTGGAGAATGCTTTGGTGAATGAACGTTTGCAGCCCCTTTTAGCTTTTGGAGACTTGAAACCAAAGGAGAGATTCATCTGTGAAACTCTACTGGAGCCACTCCCCAACCCCCACCCTTGTGAGACCACAATGTGGGCGTTGGCTTGAGATGCTTCTGTGTTAGTAGAAGAAATAAACAACACAGTGCTCTGATGAGGCAAAGCGAAGATGAAAAAGGAGTTCCCAGGGGACATAGTAGGAACAGTGGACGAGGGTAGCAGAAGAGGAGTTTGGAGCAAAAGACTCACAAGCAGCTGCATAATCTGTTGGTGCTTGGCAGTTCATTTGTAAAAATGATGCCTCTTCCTGCCCTAAAATACCTACCTTACCCCCGCTTCAACTTGATGAGATTTCCATCAGTCACTCCCAATGTGTCACAGCTTCTGCAGCCCTAAAATTAAAAGGTGAGTGAGTCTCTGAGGCCCCTCTCCACTTCTCGGATGCTGAGTTTAGCCTTCATGTGAATGTGGAAAGACTAGGAATACAGCTGTTATCACACAAGCTGGCCCAATAGTGGTTCAGTTGAGAGAGCCCCATCCTTCAGAGTCAGCTCCAGCTAGGAGTGACTGGTGGCCTTGAGCATGGTGCTGGGCTTAGTGTTGCCATCTGTGGAATGGGTGTGGGTCTGTTGCCCTGCCTCCTCCCAGAGCTATTCTGAGGCTCAGAAGGGGTGATGGATGTGATGGTGCTCCCAACACTAGAAAGCATCTTAAGAATGTAAGATTTTCATGATGACTGTTGCTCAGAGTGGCTATTATAGTTTTGCTTTATTGTTCTATAACCTATGATTAAAATTTTTACCTTAAACTTTGACGTGAGTGTGAATAAGTATTTGTTTTGCCAGCAACATTCCTCACCACTGGGGCCATTAAAGATCTCCCCCTCTGAGACCATCAAATACAGGTCAACAGGACTGATTAATCTAATTAGAAAAGGGCTTGTATTAAATAGCAATGATAATTGTTGTTTTTAGTCTGTCTGGTGTTTGACTTGGGAACGTTTTTAAAATAGAGAAAAGCACAAAGAGGAAAACAACAATTACCAATATTCCTGCTACCCATTATAATTATCTAGGTATATTTTCTTCTTTTGTAAGAAAAAGAAACCCTGTTATATTGTTAAAATAACACAAAGTTAATATAAAGAATTTTAATGCAAAGATTAATGTTTTCAAATCACCACAAAACCCAACATCCAGAAATTACCAATATTAAAAGTAGAAAAGTATCATTCTAAATATTTTCTGTTGCATATGTATGTGAGTGGATAGGCTGATGAATTAGGTGGATTGATGGATAGGTAAATATGAAATAAATACTTTCATAAATATTCCAACTTATCATACATGCCTTAAATTCAAGAGGTGAAAAAAGACCCAAACAAAACTAGAGAAGCGGCTTATTTTAAATATCCTCTGACATAAAGGAATATTATATTTAAAGGATCCTCTAAGATTAAAAATATGTACTATGAAAAACATTAAGAAATTTGAATTTTTTTTAATCCATTTGTTTCAATTTAAGCAGCATCTACTGGCTCACTGCTTTGAAAAATAAGGACAGTATTCCAGTTCACATTCAGTGTTCCAGTGTTCACATTATCTTATTATTTTTACATTGTCCAGCTTTGTAATATTCACATTCTATTCTGTAATCATAATTCATAGTAGTTTAGTTATTTATTACTAACTCTATTTAAATAGATTCAAGGATCAGACCCTGCCCTTTTCTTCTTATTTATGTTTATTTTGATTAATCTCTTAATTGATTGGACTTTACATTCAAGCAACTTTTTTAAAAAAAAGTTTCTATAGATGTTCTATTTCTATCATTGTATTGTTTTTGAGGATGTTGGCCTGTTGCCTTTGTATTTGATGAGCATTTTGACAGAGTCTATGGTCTTGGGCCACTCTTTCTTTTTCTCCCTTGAGAACTTTTTAGATTTTGCTGATGGCATTGCTTGTTGAATGTTGCTGTGGAAACATCAAGTCTAGTGTAACTGTTTCTTCTTCAAGGTGATTTGCATTTTATTCCTGAATGCCTGAGGGTTCTTTATTTAACCTTGAAGTTAAATACCCTAATTAGGATGTATCTTGGTCTATTCATTCGGAATAAAAAATTCCTGCCATTTTGTCTAGAGAGTCCCTTTTTTTTCTCTTTATTTCTGGGAAATTCTCTTTTATATAAATATGTTTTGTTCCATCTATTGTGATCTCTGTTGAGGGATACCAGTTGTCCATATGTTAGATAATTTGTCTTCCATATCTGTTAACAGTTCTTAAAGTTTTTTGTTTATTTCTTTGTCTATTTTTACATTTACTCACTGTTCTCTTGTGGTTTTCCTCTGTCAGTAATTTAATTTTTAGTAGTTCCTGTTCTATTACTTGCTATTTTTAATCCATGCATTAATTTTATAATAATATTATTTTGCTCCTTATTTTGTCTCCTGAGACCCGAAATCTCTTTTTTCCTCTTACTCTGTTGCTTTTGCATTTTATTTTGAATACTTTTAAAATTGATTCCATGTTATGAAGCAATTATGAGGCATTTCCTCTCTTGTTGGAATTAACGATTTTTTCCCCTAGGAGGGACTCTATGGTCTGTGTTTTACTTCCTTTCTTCCCCTGTATTTCTAGAAAATATTTTCCTAGTAACCCTGACATTTCTTTTCATCTTGCTTATTCTAGTTGGTCTGATATAGCTTGATTGACATTTCAGCCTTCTTCCCACTATATTTTTTTTTCCTGTGAGAGCTATTGGGTTTTCTAAATCCTGAAAGAATGCCAAAGATGGGGTTGGAGGAGTTTGGTGAGGCAAAGTGCAGCCTTTGTTAAAATACTTTTCCTTTGCTCTCTCTCCCTCATCTGAAATTTAGTTAAATACCCTAAGCCATCAGCACTGTACCTAGTTGGGGAATGCTTTCATCCCCACAGGAGATTCTCTGGGGCTTTGGGCCATCTTCCCCTTCAGTGTAGACCACAGAGGACTTTGCTTCTGTCCCAGGGAGCCCGCAGGGGCTCACTTCTCCATGTTCATCTGATTCTTGTCAGCCAAGGTTTCAAATGCTTTTCTGATCAGAACAGGGAAAAGATACCTATCTGAATCATGTCTTTATAGATATGAGGCTATGAGGGAAAATTCTGAGGTTATTCTTGACTCACACCTAAAGATTTGGAAATGAGATTAGCAGCAAAGCTTTGCCCTACATCTCATGTCAGAATTTTCTGTTTCTTTCTAGTCTTTGAGTGTATGTGTGTTCTCACACACGCCATAATGAAATGCATATTATATATAATTATGTGTATATATAATATTCTATGACTATACATGACATGTTCCTTTAGCTGATTGCTGTTAAGAGAAATTTATAGGTTTTTATTTTTCTTGTTTTGTTGGGTATTAAGGAAGAGAAATTCTATGGTAATTTTCATGTGGCACAGTAATCTGGCATATATGTTGATTTTTTTCCTACACCCATTTGTTGTGATACCAAGTTTGAAAACAACAGATTTCAGTGGTTGCTTGGGAAACCACAGAACCATGACTTGGGGAGAGACAGGATGATTAGGTGGGAAAGCACCCTTTTGGTGGGGCTGTAAAGACTTTTATATTTAGCAAAATTGGCTACAAAGTCCATTCCCCTCCTTTTCTTGCCTTGATTTGGTAGAGGGATAGACTTGGATACAAACTAGAATGGATTCATTCTTCTCTGGAGTTAGTGTAACAAGACATTTAGCTGCTCAACACAAAAACAGAAACAAAAAAATTGTGTGGTTTCAGCAGTGCTATACAATTACTTTTTCTGACCTTTAATGGAGAGAAACACCACTTCTTTGGTCCCTACCATCAGCTTCATAGGGTTTTCATCCTGTTCTGTTTCTGGGAGGGCGTAACTGGCCATGCACAAGTTTTTTTTCTCTAATCAGAGTATGTGCCACTTCTGACCACCAGTAGATGAAAACGAATGGAAACCAGGCTATTATATGATACATATCCATTACAAAATAAGACATGAAACTCAAAGGTACTTTATGGTATAATGGGGCATATATTCCTGGACAATTCTTAATGGTCACAGATTTTATAAAAGGACTATTAGTAAATGTATGAATTACAGAGTAATTTATCCTTCTGTTAGTAAGAACCAGCTGATGACCTCAGTGTCAGGTGCATCGTGGAAGGTGTTGGGACCTTCCCTTGCCACCACCCTCACCAGCCATCATCAGCCATAACCTGCACATTGGGGAAGTTTTGACTTATCCCTCACTTTTGCCCCTCTTCAAGCTGTTCTTTCCACAGTGAATGAGAAGGCCACTTCTTCCTTCAAACCTTTCAGTGGTTTCCATTTTCCTTTAGACAAAGTCTCTGCCTAGCTGGCCTCTGCCTGCCCCTCCTGCCTACCTCTCGAGCACTGCCCCCACCTAGGGCTCTGGTTCCCCAACCTTCACTCGGTCCTGCCACACCTCCCAGCCCCTTCTCCCTTCAGAACTTTCCTTCTTGTTGTCCCCAACACTGGGACACAAAACCCTCCTTATCAACCCTCCTTATCTGGCTGACTCTTACAAGATCAGAAACCTGTGTAATGCTCTCATGGCACGCTCCCCTTGTCTTCGTGGATTTCTCAGATGGGAAGGAATTATCCATGCAATCACACATAAACTTCTACCTACCCTCCCCTAGTAGCTGTCTGCTGCTAAGGATGGGGACCATTCTCACTTACTCACTGTTCTGTCCCTCTGCCCAGTCCAGATGTGTTGAAGGATGGAAATATACAGAGTAGTGGTAAAATATAAACCGTTCAGACATTCCAAGGATGGGCTCATGTGCTTTGACTCATTAATGTACCACTGCTGAAAACAGAACACAGCCGCAGTCTTGCCAGTAAGAGTGCAGTTACTGTAATTAATGAATTTGCTAATTAAGCCATGATTTCATACTGAACTTATGACCAACATATTGAGAAGGTGTGTCTTCAAGAAAATTTATTTTTTGTATTAAGATATTTACTCCAAAGCTAATTGAAGAAGCCAAATCTAGGCTCTGGTTTCACCATTGCCAGGGAAATGAGCTCATGGACTCCTATGAACTGATGATGTTAGATCAGAAGTTTCTCAAGGCCAGGGCCCAATCACTGCTGAGGCGTCAACAGTAGTTCCTTGTACATCAATAATTCTCATTACTTTTAAAAAATAACAGATGAATAGCAACTATTTTCCCTGTAGCTCCCTTGCTGTGCCTCCTACCCTCCACCACATGTTTCTGGGGAGCCCTGCTTCGGGCCTGCCAACTACAGAGAATTACTTTTGAGTATCCCTTCCACTCTCATCTCAAGACAGAGTTCATCTACCTTTGGGTTATTTGTCAAAAATGTGTCATTTTATTACAAAAAATATACAATCATCATGTATTTTGATTAAATTTTACACTAGATTATTAAAATTATTAAATACAATTATTAAAATTAATAATTTAACATATCACATATTTTAAATATATTGTATATAATGAATAATAATATAATTATTGTCTATTTTAATTCAATAAATGTATAGTAAGTTAGCCAGTTGTAAATTACTGAGAACACTCTACTGAAAAAGCATCATTTCAAATACACTATTTAAAATATTAAATGAAATACAATAACATAATTAAACTAATCTTTGGTTCCCCTATTTATGTATTCATTTATCCAACAAAATCTCCTTAAGTGCTTATAATGGGTAGGTCCTGGCTCGGTGTCCCCTAGACAGACGCATGGGCCTTCCCCCAGCCCGTCAGTATGGTGCAGGTGTGATGTGTCCGCAGGTGTGTGTGTATGTGTGCAGGTGTGGGGTCCGCAGGCGTGCTGGGCCCCCAGGCCGTGTTCCCCTTCCCCTCCCCGGTTGTAGATTTCAGCTGTTGCTGCCAGACCTGACCGGTTCCGGAGGTGGCCGCGCCCCACTCACTGTCGCCTGCTTTCCACAGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAACC", - 50459420 - 1); - - var codingRegion = new CodingRegion(50459422, 50468325, 169, 1053, 885); - - var rnaEdits = new IRnaEdit[] - { - new RnaEdit(1, 0, - "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAG"), - new RnaEdit(4, 3, "C"), - new RnaEdit(5325, 5324, "AAAAAAAAAAAAAAA") - }; - - var regions = new ITranscriptRegion[] - { - // insertion - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50459422, 50459424, 204, 206), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50459425, 50459561, 208, 343), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 50467616, 50472799, 344, 5527) - }; - - string actualCdna = - new CdnaSequence(genomicSeq, codingRegion, regions, false, rnaEdits).GetCdnaSequence(); - const string expectedCdna = - "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"; - - Assert.Equal(expectedCdna, actualCdna); - - // var cdsTemp = actualCdna.Substring(codingRegion.CdnaStart - 1, codingRegion.Length); - const string expectedCds = - "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"; - - // Assert.Equal(expectedCds, cdsTemp); - - string actualCds = - new CodingSequence(genomicSeq, codingRegion, regions, false, 0, rnaEdits).GetCodingSequence(); - - Assert.Equal(expectedCds, actualCds); - } + // [Fact] + // public void Create() + // { + // // ENST00000374673.3 + // var sequence = new SimpleSequence( + // "GGGGTGTGTCTCCAGGGCCTTCCGCACTCAGCCAGGGAGAGCAAACAAACAGGCTTGGGGGACTGGGGAGGGGGGAAAGCGGAGGGGCAGGGTAGGGGCGGGGCAGGAGTGGAAGGCGGGGCAGGAGCAAGCGGCCTGGGCAGGGCAAGGGGGCCTCAGCTGGACCCTCGGATACTCACGGCAGTTGGCTTCATCAGTTCGGTCCTCACAGTCAAAGTCACCATCGCAGCGCCACAGCTTGAGGGCACAATGTCCATTCCCGCAGGGGAACTCGTTGGGCTCACAGGGTGGCGGGGGGCCTAGGAGACCGGGCAGGGGTCAGCAGCATCCTCCCGGGCCAGCTTCCTGCTCCCCGCACCCACCTGCACCCCTGCCGGTGCGCACCACAGTCTAGCTCATCGCTGCCGTCCTCGCAGTCCTCCTGTCCGTCGCAGAGGTAGTCTCTGGGGATGCAGTGCCCATTGCGGCATGCGGCCTCCTGGGGCCCACAGGGCAGGGGCCTGACGGAACCGGGAAGCAGGGGCTGAGGAGCGTGGGTGACTGGTGGCTGTCGCATGATGGTTGTCTCTGGCCGGGGCGGTAAAGATGTCGTCTCCACAAGGAGAGAGAATGTGGGGCTGATACCCAGGACTGGCTCCTCTGTGGATAGATTCCGCTTGGCATTTGGCAGAAGCAGATGGCTCCTCACCTGCTCCTTGTCCCCAACCCTCCCCAGGCCCACCCTGTACTCCCCAACACCACTCCCTGCCACCCCCTGCCTGGCTCTGTCATCACCCTTCCTATGCCCCCATCCTCTGCCTGCACCAAACCCTCATAGTCCTTGATGGGCTCCAAGACCCAGGTGTAGGACCCTGGCCCTCCCCTGGCACCCAAACCACTCGTGGCCCCGGACATCCCCTCACCACAATTGAGCTCATCAGACATGTCCCTGCAGTCGGGCCGCCGGTCACAGCGATACTCCAGGGCCACACACTCATTGTAGCTGTGGCAGGCAAACTCGGCCTCCGTGCAGGCTCTTGGGAACTGGGGCACTGCAGGTGGAAAGGAAGCAGACTGGAGTCAGAGGCGGCAGGAGGCAGGTGCGGGAAGCTGTAGGTGCTGTGTGGCTGGAGTGGGCTCCAGGGCCCTGTGTCAGGCAGCTCGGTTTCTGGCAGGCACAACGAGGGCAAGCAGCACACACTAGACACATCCACAGCACACGTGGGGCATGGGACATGCGGCAGTGGCCTCCCCCATCTCTAAAACAGACCCCACACACAGTTGACATGCCACACGCATGCAACCACCACACCACACACATGCAGGCCACAGCCTGGCCCAGTGAGGACAAAGAAGGAGGGGAGAAGGGAGTGCCCAGCTGTCTTGGGCTGTGCCCAGCCAGCCATCTTGCCCACACCCTTCTTTCCTCTCCATCCTTTAAAAAATTTTTTTCTCTCTTCTTTTTTATTTTTTTAGAGACAGGGTCTTGCTACGTTGCCCAAGCTGGTCTCGAACTCTTTGCCTCAAGCAATCCTCCCGTCTTGGCCTCCCAAAGTGCTGGGGTTACAGGCGTGAGCCCCTGCACCCGGCCTCCTCTCCAACCTTAACTTCTCTAGGAACCTGGCTGGGCCTCGGCCTGGCTTACACTCTCACCTGGTGTCACTGCGACCGCCACAGCGGCCGGCGGGGGTGGGGGGGTCTGTGCTGGAAAGGAAGATGTGATCAGTGGCTGTTCCACCTGGGAGCCGGGAGCTGAGGGCTGCAGGGCTGGGCCACATTCCACCATCCCTAGCCAGGAGGACTTATTGAAAAGTGAGAGAGGAGGGCTGGACCCCCAGCAGTCTTTAGACCTGGGCCTGATGATGCAGAAGAGCAAGCTTGATCTCTGGGTGCAATAATTAAGGGTTTTTGTTTGTTTGTCTTGTTTTAGAGGCAGGGTTTTGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCATGATCCTAGCTCACTGCAGCCTCAAACTCCTGGGCTCCGGTGATCCTC", + // 22213528); + // + // var codingRegion = new CodingRegion(22213728, 22215214, 1, 538, 538); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 4, 22213728, 22213827, 439, 538), + // new TranscriptRegion(TranscriptRegionType.Intron, 3, 22213828, 22213912, 438, 439), + // new TranscriptRegion(TranscriptRegionType.Exon, 3, 22213913, 22214167, 184, 438), + // new TranscriptRegion(TranscriptRegionType.Intron, 2, 22214166, 22214430, 183, 184), + // new TranscriptRegion(TranscriptRegionType.Exon, 2, 22214431, 22214559, 55, 183), + // new TranscriptRegion(TranscriptRegionType.Intron, 1, 22214560, 22215160, 54, 55), + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 22215161, 22215214, 1, 54) + // }; + // + // const string expectedResults = + // "NCAGCACAGACCCCCCCACCCCCGCCGGCCGCTGTGGCGGTCGCAGTGACACCAGTGCCCCAGTTCCCAAGAGCCTGCACGGAGGCCGAGTTTGCCTGCCACAGCTACAATGAGTGTGTGGCCCTGGAGTATCGCTGTGACCGGCGGCCCGACTGCAGGGACATGTCTGATGAGCTCAATTGTGAGGAGCCAGTCCTGGGTATCAGCCCCACATTCTCTCTCCTTGTGGAGACGACATCTTTACCGCCCCGGCCAGAGACAACCATCATGCGACAGCCACCAGTCACCCACGCTCCTCAGCCCCTGCTTCCCGGTTCCGTCAGGCCCCTGCCCTGTGGGCCCCAGGAGGCCGCATGCCGCAATGGGCACTGCATCCCCAGAGACTACCTCTGCGACGGACAGGAGGACTGCGAGGACGGCAGCGATGAGCTAGACTGTGGCCCCCCGCCACCCTGTGAGCCCAACGAGTTCCCCTGCGGGAATGGACATTGTGCCCTCAAGCTGTGGCGCTGCGATGGTGACTTTGACTGTGAGGACCG"; + // var codingSequence = new CodingSequence(sequence, codingRegion, regions, true, 1, null); + // var observedResults = codingSequence.Substring(0, expectedResults.Length); + // + // Assert.Equal(expectedResults, observedResults); + // } + // + // [Fact] + // public void Length_ReturnTrueLength_WhenGapsArePresent() + // { + // const int expectedResults = 720; + // + // var sequence = new NSequence(); + // var codingRegion = new CodingRegion(10051, 12770, 51, 769, 720); + // const byte startExonPhase = 1; + // + // int naiveCodingRegionLen = codingRegion.CdnaEnd - codingRegion.CdnaStart + 1; + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 10001, 10299, 1, 299), + // new TranscriptRegion(TranscriptRegionType.Intron, 1, 10300, 12300, 229, 331), + // new TranscriptRegion(TranscriptRegionType.Exon, 2, 12301, 12970, 331, 1000) + // }; + // + // var codingSequence = new CodingSequence(sequence, codingRegion, regions, false, startExonPhase, null); + // var observedResults = codingSequence.Length; + // + // Assert.Equal(expectedResults, observedResults); + // Assert.NotEqual(expectedResults, naiveCodingRegionLen); + // } + // + // [Fact] + // public void RnaEdits_snv_forward_no_utr() + // { + // //NR_002754.2 + // var genomicSeq = + // new SimpleSequence( + // "actctggtttctcttcaaatcgtataaatctttcgccttttactaaagatttccgtggagagaaacgagtgtgagtctgaaaccaattttttgaggccttgcgtttattagcagggctt", + // 11968210); + // var codingRegion = new CodingRegion(11968211, 11968329, 1, 119, 119); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 11968211, 11968329, 1, 119) + // }; + // + // var rnaEdits = new IRnaEdit[] {new RnaEdit(107, 107, "t")}; + // const byte startExonPhase = 0; + // + // var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits); + // + // Assert.Equal( + // "actctggtttctcttcaaatcgtataaatctttcgccttttactaaagatttccgtggagagaaacgagtgtgagtctgaaaccaattttttgaggccttgcgttttttagcagggctt", + // codingSequence.GetCodingSequence()); + // } + // + // [Fact] + // public void RnaEdits_snv_forward_with_utr() + // { + // //NM_001144032.2 chr1:148644011-148644795 + // var genomicSeq = new SimpleSequence( + // "ACTATAAAGACAGTGAAAAGATCAGTGGTTATCTTTGCAGACGCCACCATCGCTGTGAGCCCTGTACTATCAGCCATGGTCAACTCCGTCGTCTTTTTTGAAATCACCAGGGATGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGTGCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCGTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCACGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAATGAGTTTGACTTGTGTTTTATTTTCACCACCAGACCCATTCCTTCTGTAGCTCAGGAGAGCACCCCTCCACCACATTTGCTTGCAATATCCTAGAATCTTTGTGCTCTTGCTGCAGTTCCCTTTGGGTTCCATGTTTTCCTTGTTCCCTTCCATGCCTAGCTGGATGGCAGAGTTGAGTTAAGTTTATGATTATGAAATAAAAACTAAGTAACAA", + // 148644011 - 1); + // + // var codingRegion = new CodingRegion(148644086, 148644580, 76, 570, 495); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 148644011, 148644795, 1, 785) + // }; + // + // var rnaEdits = new IRnaEdit[] + // { + // new RnaEdit(420, 420, "C"), + // new RnaEdit(500, 500, "T"), + // new RnaEdit(737, 737, "T") + // }; + // const byte startExonPhase = 0; + // + // var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits); + // + // Assert.Equal( + // "ATGGTCAACTCCGTCGTCTTTTTTGAAATCACCAGGGATGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGCGCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCGTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAA", + // codingSequence.GetCodingSequence()); + // } + // + // [Fact] + // public void RnaEdits_snv_reverse_utr() + // { + // //NM_031947.3, chr5:140682196-140683630 + // var genomicSeq = new SimpleSequence( + // "TGCATGTACACACAAATGGCTTTATGCAAAGGCCCTGACAGAACGATATTTAGTTTTTCAGATTAGGTACATAGGGCCAACCAGCCCACCCTGTACATTCCAGCAAGTGCAAGAGCAGCAACTTTCCTATTTCAATACAATTATGGGCAGAAATTATATGATGTAAAATAGAGGCCCTTCCATAAAGTTAAGATTTAGGGTAGAAGAAGGGAAGATAAAACCAAAATTCCCATGAAGTCAAAATTAGACAGTGGTCTTGTACTCTGCTGAACCCTGTGATGAACTGTAGTCCTCAAACTCATGGACTCGGATCCAGGTTCACCAAGACACTTCAGTATGCTTCCAACTGTTTCATCATCATCTTCCTGCTGTATTCGTAGGCCACAAACAGTGCCCCATTGGCAGGGATTGCTCGAATCATAGTAGCTTTCAGTCCAGAATATAAGGCTACTATTCCTTCATTTCTCACAACACTTAAGAGGGTACCAATAAATCCTGCCTGTTTCCCATACATGGAAAGAACTTGAATTCTGGATTTAATACAATCCACTGGGAACACGACAAGCCACAGGCAAATTCCAGCAACTCCACCACTTAACATCAAATGGACAGGGCCTAGTTCATCTTTTGATCTCCCTGACGCAAAAAACGATCGGCTCAGTTCATAGCCACCAAAGAAAAAGAAATAACCCGGTACTTCTTGAAGTAGAGTACTCGAGAGTCCATGGTAGAAGCCCAAGGGGCCATCCTTTTTAAGGATACCCTTCACGACAGACCAAATTGTATTATGGCTTTTTGCTATCTTCCCTGACATCTCCATTTCATACATGGTCTGTAGCCGGCACTTCACAAGCTCAGTGGGGCAGAGAGCCAGTGCAGCAAATGCAGAGGCGAAGGACCCCGCGGCTGCAGTCTGGAGATCACTCAGCTTTGCCTGCTTGTCCATTCCAGCCACTTTCCTGACAAACTGCTGGCAGAACCCGTAGCACATGAAGAGGACCGAGTTTTCGGCGACGTAGGCCATAAGTGCCGGGCCGGTGCCCTTGTAGAAGCCCCGGAGACCCACTTGGGCGTATGTCTTCAGGAAGCAGTCGGTGAGGCCCTTGTACAGGTCAGGGAACGTCTGCATCTTCACTTTTATTGTGTCGAAGGGCTGCCCAGTCAGTACACACGCTGTCCCCCCTGCGGCCCCCGCTGTGAGGTCGATGGCGGCTTGGATGCCAGGACCGGACTTCATGTTCGCTCACTCGTCTGAGGGTCCCAGTGGAAGGCGACTAACTCCCCAGAGCGTGAGACCGGCTTTTCACGTCCAGCCGCAGCGAGCGCGGGGAATGGAGTTGGGGGTGGTGGGGTGGCTCTACCGCCTGTTCTGGGCTCTCACCCCAGTGCGGGGGAAGCCGCTCAACCCTACGCTCCGCCGCGGGCCGCCCCCTCC", + // 140682196 - 1); + // + // var codingRegion = new CodingRegion(140682527, 140683432, 199, 1104, 906); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 140682196, 140683630, 1, 1435) + // }; + // + // var rnaEdits = new IRnaEdit[] + // { + // new RnaEdit(366, 366, "T") + // }; + // + // const byte startExonPhase = 0; + // + // var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits); + // + // Assert.Equal( + // "ATGAAGTCCGGTCCTGGCATCCAAGCCGCCATCGACCTCACAGCGGGGGCCGCAGGGGGGACAGCGTGTGTACTGACTGGGCAGCCCTTCGACACAATAAAAGTGAAGATGCAGACGTTCCCTGACCTGTACAAGGGCCTCACCGACTGCTTCCTGAAGACATACGCTCAAGTGGGTCTCCGGGGCTTCTACAAGGGCACCGGCCCGGCACTTATGGCCTACGTCGCCGAAAACTCGGTCCTCTTCATGTGCTACGGGTTCTGCCAGCAGTTTGTCAGGAAAGTGGCTGGAATGGACAAGCAGGCAAAGCTGAGTGATCTCCAGACTGCAGCCGCGGGGTCCTTCGCCTCTGCATTTGCTGCACTGGCTCTCTGCCCCACTGAGCTTGTGAAGTGCCGGCTACAGACCATGTATGAAATGGAGATGTCAGGGAAGATAGCAAAAAGCCATAATACAATTTGGTCTGTCGTGAAGGGTATCCTTAAAAAGGATGGCCCCTTGGGCTTCTACCATGGACTCTCGAGTACTCTACTTCAAGAAGTACCGGGTTATTTCTTTTTCTTTGGTGGCTATGAACTGAGCCGATCGTTTTTTGCGTCAGGGAGATCAAAAGATGAACTAGGCCCTGTCCATTTGATGTTAAGTGGTGGAGTTGCTGGAATTTGCCTGTGGCTTGTCGTGTTCCCAGTGGATTGTATTAAATCCAGAATTCAAGTTCTTTCCATGTATGGGAAACAGGCAGGATTTATTGGTACCCTCTTAAGTGTTGTGAGAAATGAAGGAATAGTAGCCTTATATTCTGGACTGAAAGCTACTATGATTCGAGCAATCCCTGCCAATGGGGCACTGTTTGTGGCCTACGAATACAGCAGGAAGATGATGATGAAACAGTTGGAAGCATACTGA", + // codingSequence.GetCodingSequence()); + // } + // + // [Fact] + // public void RnaEdits_insertion_in_utr() + // { + // //NM_080431.4, chrom: chr1:2938046-2939467 + // var genomicSeq = new SimpleSequence( + // "TGGAAGAGGCCTCAGCAGGCCCAGGCCACCTGGAGGGAGAGCAGACCTGCGGCTGAGGATGCAGGGCTCCCGGGCACGGTGCTAGCCCTGCCTTGAGACACCCCGAGAGCTGTGGGAAGAGCTGTGGGATCCCCTATTGCATCACAAAGCGGCCCTGGAGGGCTGGTCTTTATTTTGATGAGGCTGAGAAGGGAAGGCTGCGGGCATGTTTAATCCGCACGCTTTAGACTCCCCGGCTGTGATTTTTGACAATGGCTCGGGGTTCTGCAAAGCGGGCCTGTCTGGGGAGTTTGGACCCCGGCACATGGTCAGCTCCATCGTGGGGCACCTGAAATTCCAGGCTCCCTCAGCAGAGGCCAACCAGAAGAAGTACTTTGTGGGGGAGGAGGCCCTGTACAAGCAGGAGGCCCTGCAGCTGCACTCCCCTTTCGAGCGTGGCCTGATCACAGGGTGGGATGACGTGGAGAGACTCTGGAAGCACCTCTTTGAGTGGGAGCTAGGCGTGAAACCCAGCGACCAGCCCCTGCTTGCAACGGAGCCCTCCCTGAACCCCAGGGAGAACCGTGAGAAGATGGCAGAAGTCATGTTCGAGAACTTCGGCGTGCCCGCTTTCTACCTGTCGGACCAGGCGGTGCTGGCTCTCTACGCCTCTGCCTGTGTCACGGGCCTGGTGGTGGACAGCGGGGATGCGGTCACCTGCACTGTCCCCATCTTTGAGGGTTACTCCCTGCCCCACGCAGTCACCAAGCTCCACGTGGCGGGCAGGGACATCACGGAGCTCCTCATGCAGCTGCTCCTGGCCAGCGGCCACACCTTCCCCTGCCAGCTGGACAAGGGTCTCGTGGACGACATCAAAAAGAAGCTGTGCTACGTGGCCTTGGAGCCCGAGAAGGAGCTTTCCCGGAGGCCGGAGGAGGTCCTGAGGGAGTACAAGCTGCCCGACGGGAACATCATCAGCCTCGGGGACCCGCTGCACCAGGCGCCCGAGGCCCTGTTCGTGCCCCAGCAGCTGGGCAGCCAGAGCCCCGGGCTCTCGAATATGGTCTCCAGCAGCATCACCAAGTGTGATACCGACATCCAGAAGATCCTCTTTGGGGAGATTGTGCTGTCGGGGGGCACTACCCTGTTCCACGGGCTGGATGACCGGCTTCTCAAGGAGCTGGAGCAGCTGGCCTCCAAGGACACCCCCATCAAGATCACGGCTCCCCCCGACCGGTGGTTCTCCACCTGGATTGGAGCCTCCATCGTCACCTCTCTGAGTAGCTTCAAGCAGATGTGGGTCACCGCCGCAGACTTCAAGGAGTTTGGGACCTCCGTGGTGCAGAGAAGATGCTTCTGAAGGCCGCTTCTCGTTGGGTACCGTGGGGGGTGAACCCTAGCCCCAGCTTTGGGAGGATGTTCAATAAAGGACCAATGCCGGAA", + // 2938046 - 1); + // var codingRegion = new CodingRegion(2938251, 2939384, 206, 1339, 1134); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 2938046, 2939467, 1, 1422) + // }; + // + // var rnaEdits = new IRnaEdit[] {new RnaEdit(1423, 1422, "AAAAAAAAAAAAAAA")}; + // const byte startExonPhase = 0; + // + // var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits); + // + // var expectedCodingSeq = + // "ATGTTTAATCCGCACGCTTTAGACTCCCCGGCTGTGATTTTTGACAATGGCTCGGGGTTCTGCAAAGCGGGCCTGTCTGGGGAGTTTGGACCCCGGCACATGGTCAGCTCCATCGTGGGGCACCTGAAATTCCAGGCTCCCTCAGCAGAGGCCAACCAGAAGAAGTACTTTGTGGGGGAGGAGGCCCTGTACAAGCAGGAGGCCCTGCAGCTGCACTCCCCTTTCGAGCGTGGCCTGATCACAGGGTGGGATGACGTGGAGAGACTCTGGAAGCACCTCTTTGAGTGGGAGCTAGGCGTGAAACCCAGCGACCAGCCCCTGCTTGCAACGGAGCCCTCCCTGAACCCCAGGGAGAACCGTGAGAAGATGGCAGAAGTCATGTTCGAGAACTTCGGCGTGCCCGCTTTCTACCTGTCGGACCAGGCGGTGCTGGCTCTCTACGCCTCTGCCTGTGTCACGGGCCTGGTGGTGGACAGCGGGGATGCGGTCACCTGCACTGTCCCCATCTTTGAGGGTTACTCCCTGCCCCACGCAGTCACCAAGCTCCACGTGGCGGGCAGGGACATCACGGAGCTCCTCATGCAGCTGCTCCTGGCCAGCGGCCACACCTTCCCCTGCCAGCTGGACAAGGGTCTCGTGGACGACATCAAAAAGAAGCTGTGCTACGTGGCCTTGGAGCCCGAGAAGGAGCTTTCCCGGAGGCCGGAGGAGGTCCTGAGGGAGTACAAGCTGCCCGACGGGAACATCATCAGCCTCGGGGACCCGCTGCACCAGGCGCCCGAGGCCCTGTTCGTGCCCCAGCAGCTGGGCAGCCAGAGCCCCGGGCTCTCGAATATGGTCTCCAGCAGCATCACCAAGTGTGATACCGACATCCAGAAGATCCTCTTTGGGGAGATTGTGCTGTCGGGGGGCACTACCCTGTTCCACGGGCTGGATGACCGGCTTCTCAAGGAGCTGGAGCAGCTGGCCTCCAAGGACACCCCCATCAAGATCACGGCTCCCCCCGACCGGTGGTTCTCCACCTGGATTGGAGCCTCCATCGTCACCTCTCTGAGTAGCTTCAAGCAGATGTGGGTCACCGCCGCAGACTTCAAGGAGTTTGGGACCTCCGTGGTGCAGAGAAGATGCTTCTGA"; + // + // Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); + // } + // + // [Fact] + // public void RnaEdits_insertion_reverse_in_utr() + // { + // //NM_001242659.1, chrom: chr1:1533388-1535476 + // var genomicSeq = new SimpleSequence( + // "TCTGTTGGTCTGAGAATGATGGACATTTAGACACTGGCGCCAGGTTTGCGCCTGACCGGCGCCACGCAGGGGTGGGCGGAGCAAAGACACACAGGTGGGCTACAGGTGTCACACGGCACCAGCCAGGGCCCGGGGTGGCTGGGGTGAGGATGGGTGTTTGGCCAGTGACCAGGAGTCAGGTCAAGTCCAGGTGGTCAGTGCCAGGGGCTCCAGGAGGGGAGGGCAGTGCCATAACCCTCCTGGTGTCCAGCGTCACCAGGCGGTCGTCACAGAAAGCAACCTCGGCCCGGGGCCCGGGTCTGCAGCAGGTGGGCAGGGTCAGCTTTTCTTCCATGGCGGGTGGCATTGTCTGGGCCGGATACTGGCTCTCGACCCCTGGGCATGCAAAGGCTAGGGGTGGTGCTGTCAGTCACACCGTTGCCACCAAGGTCCCCTGGGTCGGCTGAGGCTTGGGATCCAGGCAGCGGTGGAAGGTCGGGCTGCTCAAGGCCGGTATCTAAGCTTCTGCCCTGGGACCCAGTGGTGATGGCCGCCATCTGCCCCATTCCCACAGGGACCTAGTCAGAGGTCGCACACACAAAAGGGGTACCTGGCCCTGGAGAACCACCAGCTGCCCGGGGTCTGAGAACACTCACCCTGGCCGCTGGGCCAGGCCTGCCAGGCTCCCGGCTGGTCCAACACCCTAAACGGTACAGAGCGCTGCAGGCCCTCACCTCATGCTTCTGCAGCGCTTGAGGGTGAAGGTGTCTCCAAGGGGACGCTGGCCAGATGCATGGAGAGGCCGGCCAATCTTAGGGCCACAGACCCCCCCTGGACAGCAGAGGGTGTTGGCTCCTGCAGTGGGCCCGAGATACTAAGGCACGAAGCTAACCTAAGCCCAGTGGGGTGGGGCGGGGCAGGACAGGCTTGGAGAGCTGCGCCCCAGGCCTGCAAAGCACAGTGACCGAGCAATGGCGACGGTCTGTCTGGGACAATTCGGCACAGGATGGAGGTGCGGGGTGAGCCGGAGTGCCCATGGCTCTTGCTGGAAGGGGCTCCATGCCCTGGCCGCCTCTATAAAGGCCTGCGGAGAGCGGGGAGAGCCCTGGATGCGGCTGGCACAGCAGCGCAAGCCCAGGGGCCAATCCGGGGCCAGAGTCTGGGAGTCTGACGCCCGGCTGGAAAGGGCGTGTGATGATGCCAAAGTGCCGGAGCCGTCGCCGGCAGGTCCTCCTCCGCGGGGATCTTAAGGAGGCAGCAGGAATGAGGAGAGGAGAGCGGGCGGAGGACCTGGGAGCTCAGGCGCCCTCAGGCAGGTGGCGCAAAGATGGGCGGGCGGCCTCGCGCTTCAGGGGTGTCTGCGCAGGCCGGGGCGCGCGAGGGCCGGGCGCATGAGGTTCTCGGTGATGTAGGCCACCAGCAGGCAGATGACCACCAGCATGACGCAGATGGAGCCGCCCACCGCCGTCATGGCCACCACGATGTCCTGCATGCCGGCCGGCTCGGCGGTGAACTCCACGCACTCGGCCGGCTCGGGGGTCTCTGGCGCGGCGGCGGCGGGCCCAGCGCGCAGCGGCAGCGGCTGCAGGCACAGGCGGTAGAGGACGCTGTCGTGCACGTCGGGCAGCAGGTAGTCGCGGCAGGAGGCCCCGAGGAGCACGCGCTCGCACGGGAAGCGCGTGTAGGCGCCGCGCCACGAGCAGTTGAGCGCGAAGGCGCGCACGCGGCGCGCGGCGGCCGGGGCCAGGCGCCACTGCAGGAGGACGCTGCGGTTGCGCAGGACGCTGGCGCGCAGGGAGCGGCCGGCCGGGGCGTGCAGCACGCAGCCCGGAGCCTGGCAGCGGAAGCCGCGCGCGGGGCTGCGGAAGCACAGGCGCCCGCCGCCCGCCTCGGGGCCCTCGGGCAGCACCTTGTAGGGGCACCAGGGCGCGTCGGGGGTCGGCTCCCAGCCCGGCGGCGTCGGGGCGGCCGCGGCGCAGGGCGGCGGCGCGCAGGCGGCCAGCAGCAGCAGCAGCGGCGGGGCGCGCATCCTGCGGCGGGGCCACGGGGCGCGGCGCTGGGTCACGCGGGCCGCGCCGCCGCCGTCCCCGCTGCCCGCTCCCCGCGATCC", + // 1533387); + // var codingRegion = new CodingRegion(1534715, 1535395, 82, 762, 681); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 1533388, 1535476, 1, 2089) + // }; + // + // var rnaEdits = new IRnaEdit[] {new RnaEdit(2090, 2089, "AAAAAAAAAAAAAAA")}; + // const byte startExonPhase = 0; + // + // var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits); + // + // var expectedCodingSeq = + // "ATGCGCGCCCCGCCGCTGCTGCTGCTGCTGGCCGCCTGCGCGCCGCCGCCCTGCGCCGCGGCCGCCCCGACGCCGCCGGGCTGGGAGCCGACCCCCGACGCGCCCTGGTGCCCCTACAAGGTGCTGCCCGAGGGCCCCGAGGCGGGCGGCGGGCGCCTGTGCTTCCGCAGCCCCGCGCGCGGCTTCCGCTGCCAGGCTCCGGGCTGCGTGCTGCACGCCCCGGCCGGCCGCTCCCTGCGCGCCAGCGTCCTGCGCAACCGCAGCGTCCTCCTGCAGTGGCGCCTGGCCCCGGCCGCCGCGCGCCGCGTGCGCGCCTTCGCGCTCAACTGCTCGTGGCGCGGCGCCTACACGCGCTTCCCGTGCGAGCGCGTGCTCCTCGGGGCCTCCTGCCGCGACTACCTGCTGCCCGACGTGCACGACAGCGTCCTCTACCGCCTGTGCCTGCAGCCGCTGCCGCTGCGCGCTGGGCCCGCCGCCGCCGCGCCAGAGACCCCCGAGCCGGCCGAGTGCGTGGAGTTCACCGCCGAGCCGGCCGGCATGCAGGACATCGTGGTGGCCATGACGGCGGTGGGCGGCTCCATCTGCGTCATGCTGGTGGTCATCTGCCTGCTGGTGGCCTACATCACCGAGAACCTCATGCGCCCGGCCCTCGCGCGCCCCGGCCTGCGCAGACACCCCTGA"; + // + // Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); + // } + // + // [Fact] + // public void GetCodingSequence_InsertionGeneModel_InsertionRnaEdit() + // { + // // NM_019119.4, chr5:140566701-140571111 + // var genomicSeq = new SimpleSequence( + // "AGAATGCTACGGAAGTCCTTGACAAAAAGGAAACACTGAGACAGATGGGCTGAGAAGAAGAGCTGTCGAGTCCCTGATTGGGAAAGGAAAAATTAAAAACCCTAGATCTCTGGTACACATAAGTCTGGGTTTGCGATTGCTATTTGTGCTGGGGCAGTGTGATTGAGACTGACATTGAGGAAAGAAGCAGCTATGAAGACCAGGGGGTTCAGCTTTCCAAGACAAAGGCAAGTCCTGTTTCTTTTTCTTTTCTGGGGAGTGTCCTTGGCAGGTTCTGGGTTTGGACGTTATTCGGTGACTGAGGAAACAGAGAAAGGATCCTTTGTGGTCAATCTGGCAAAGGATCTGGGACTAGCAGAGGGGGAGCTGGCTGCAAGGGGAACCAGGGTGGTTTCCGATGATAACAAACAATACCTGCTCCTGGATTCACATACCGGGAATTTGCTCACAAATGAGAAACTGGACCGAGAGAAGCTGTGTGGCCCTAAAGAGCCCTGTATGCTGTATTTCCAAATTTTAATGGATGATCCCTTTCAGATTTACCGGGCTGAGCTGAGAGTCAGGGATATAAATGATCACTCGCCAGTGTTTCGGCACAAAGAGATGGTCTTAAAAATATCAGAAAATACAGCTGAAGGGACAGCATTTAGACTAGAAAGAGCACAGGATCCAGATGAAGGTCATAACAGTATCCAAAACTACACGATCAGCTCCAACTCTTTTTTCCATATTAAAATTAGTGGCAGTGATGAAGGCATGATATATCCAGAGCTAGTGTTGGACAAAGCACTGGATCGGGAGGAGCAGGAAGAGCTCAGCTTAACCCTCACAGCGCTGGATGGTGGGTCTCCATCCAGGTCTGGGACCTCCACTATACGCATTGTGGTCTTGGATGTCAATGACAATGCCCCACAGTTTGCCCAGGCTCTGTATGAGACCCAGGCTCCAGAAAACAGTCCAGTAGGGTCCCTTATTGTTAAAGTGTCTGCAGGAGATGCAGACTCAGGAGTCAATGCAGAAGTATCCTATTCATTTTTTGATGCTTCTGAAGATATTTTAACAACGTTTCAAATCAATCCTTTTTCTGGGGAAATCTTTCTCAGAGAATTGCTTGATTATGAGTTAGTAAATTCTTACAAAATAAATATACAGGCAATGGACGGCGGAGGCCTTTCTGCAAGATGTACAGTTTTGATAAAAGTATTAGATTCCAATGACAATCCTCCTGAACTGATCATATCATCACTTTCCAACTCTGTTGCTGAAAACTCTCCTGGGATAGTATTGGCTGTTTTTAAGATTAAAGACAGAGACTCCGGAGAAAATGGAAAGACATTTGCTATGTTCAAGATAATCTGCCTTTTTTTCTGAAACCGTCTGTTGACAATTTTTACATCCTAATGACTGAAGGTGCACTGGACAGAGAGAGCAAAGCTGAGTACAACATCACCATCACCGTCACTGACTTGGGGACACCCAGGCTGAAAACCGAGCACAGCATAACCCTGCAGGTCTCCGACGTCAATGACAACGCCCCCGCCTTCACCCAAACCTCCTACACCCTGTTCGTCCGGGAGAACAACAGCCCCGCCCTGCACATCGGCAGTGTCAGCGCCACAGACAGAGACTCAGGCACCAACGCCCAGGTCACCTACTCGCTGCTGCCGCCCCAGGACCCACACCTGCCCCTCGCCTCCCTGGTCTCCATCAACGCGGACAATGGCCACCTGTTTGCCCTCAGGTCGCTGGACTACGAGGCCCTGCAGGCTTTCGACTTCCGCGTGGGCGCCTCAGACCGCGGCTCCCCGGCTTTGAGCAGCGAGGCGCTGGTGCGCGTACTGGTGCTGGACGCCAACGACAACTCGCCCTTCGTGCTGTACCCGCTGCAGAACGGCTCCGCGCCCTGCACCGAGCTGGTGCCCCGGGCGGCCGAGCCGGGCTACCTGGTGACCAAGGTGGTGGCGGTGGACGGCGACTCGGGCCAGAACGCCTGGCTGTCGTACCAGCTGCTCAAGGCCACGGAGCCCGGGCTGTTCGGTGTGTGGGCGCACAATGGGGAGGTGCGCACCGCCAGGCTGCTGAGCGAGCGCGACGCAGCCAAGCACAGGCTGGTGGTGCTTGTCAAGGACAATGGCGAGCCTCCTCGCTCGGCCACCGCCACGCTGCACGTGCTCCTGGTGGACGGCTTCTCCCAGCCCTACCTGCCTCTCCCGGAGGCGGCCCCGGCCCAGGCCCAGGCCGACTTGCTCACCGTCTACCTGGTGGTGGCGTTGGCCTCGGTGTCTTCGCTCTTCCTCCTCTCGGTGCTCCTGTTCGTGGCGGTGCGGCTGTGCAGGAGGAGCAGGGCGGCCTCGGTGGGTCGCTGCTCGGTGCCCGAGGGTCCTTTTCCAGGGCATCTGGTGGACGTGAGCGGCACCGGGACCCTGTTCCAGAGCTACCAGTACGAGGTGTGTCTGACTGGAGGTTCAGAGACCGGCGAGTTCAAGTTCTTGAAGCCGATTACCCCCCACCTCCCGCCCCATAGGGGTGGGAAAGAAATAGAGGAAAATTCTACTCTCCCCAATAGCTTTGGATTTAATTATTGAAAGGAACCCACTTAATAAAGACATTTACTTCTTTAATATATTCTTGTTGGCTAACTAAATTGTGTATGCCCACCACAAAGAAGGTACTATTTTTTGTTTGATTCATCTTCAACTTTGCGTATTATGCTTAACTTCACAAGTTAACTTTTTCTTATTTTGTATCCTGATGAGGCATTTCTTACTAGAATCCCATAAGTGAAATATAATATTTTTCAAAGTTGATATCATTTAAAAATTTTTGGTCGTTTTAAATGTCTTTATTGACTTTAAATTCATTGCCTCTACATTATTCATTAGTTCTTCTTTTCCTAAAACTTTTTACTTGTTAAAATAGTCTGCTGCATGTAATATGTGCTTTTACTATTTGATATTTCTTCTATTTTTCTTTTGAAACCGGTGTTCTTATTGGTTTGCCATCCTTGTTCATTACAACTGTTTTTTGTTTGTTTGTTTGTTTTTTGGTTTGTTTGTTTTTTTTTTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCAGCTCACTGCAACCTCCGCCTCCCAGGTTCAAGCGATTCTCCTGCCTCAGCCTCCAGAGTATCTGGGACTACAGTTGCATGTCACCACGTTCGGCTAATTTTTGTATTTTCAGTAGAGACGGGTTTCATCATGGTGGCCAGGATGGTCTATCTCTTGACCTCGTGATCCACCCCACTCAGCCTCCCAAATTGCTGGGATTTACAGGCATGAGCCACCGCACCCAGCCTACAATAATTTTCTTAAACTTTACCTTTTATTTTAAAGTTCTAGTTTCCCGGCATTGATAGTTCCCTATTTGAAATATAATGTTTCTCTTGTAAGTGATATGATAAATAAACCCCTAATTAGCCTTAGAAGAAAAACCACTGCAAGATATTAAGCGTGTGTAAATGGGCTTTAGTCTGGAAACCAAAAAAAAAAAAAAAATTTAGTCATTCTATAGGATCATGTGAAAATATTTAATTTGCTCCTTTTAATTCTGTATAAACAAATCAGAGGTTCCTGAGGTTCCTGTTAAATTTTTAATGGCTAATAGCCCAGTGCCATCCAGTTGAAAAAACAACAGCAATCACAAAGTAGAGGTTTATATTGTGCGGCTTTTATATTCAGCTATTAGAGTGTTATTGGTAGTGTCTAGCCTTTTCCTCCACGACATTCCTTGACTTAATCCATTTGGGCCTATTATAGACAAAATAGAGCTTCTTTCTAGATATAAGGTCTTTGAGGCAGGGCTCAGTGGCTCATTCCTGTAATCCCAGCACTTTGGGAGGCCAAGGCGGGCAGATCACCTTAGGTCACGAGTTTGAGACCAGCCTGACCAACGTTAAGTAACCCCGTCTTTACTAAAAATACAAAATTAGCCAGGCATGGTGGCACATGCTTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCAGGAGGTGGAAGTTGCTTTGAGCCGAGATTGCACCATTGTACTCCAGCCTGGGCAATAAGAGCAAAACTCCATCAAAATAAAATAAAATAAAATATAAAATAACTTAAAAAGAACTTTGAATAAAATTCTATGAAAAAAGACACTAGAATGCTGTTCTTAATTTTAATAGTGTTAAGATAGGTGTTAGTGTGGTCTGTTCTTTACCTCCCTTTATTTGGTGCAGAGAAGTTAGATCCTGCTAAATTTCAATTAAGAGGGGACCTTAAAATAAGGATCAATCTCTTATTTAACCCTGTAAGTTACTTTAAAGCTAATACAAGAAAAACAAAGACAAGTGAAAGTAAGGAAACAGAAATTGC", + // 140566701 - 1); + // var codingRegion = new CodingRegion(140566893, 140569285, 193, 2586, 2394); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 140566701, 140568035, 1, 1335), + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 140568036, 140571111, 1337, 4412) + // }; + // + // var rnaEdits = new IRnaEdit[] + // { + // new RnaEdit(908, 908, "T"), + // new RnaEdit(1336, 1335, "A"), + // new RnaEdit(2096, 2096, "G") + // }; + // + // const byte startExonPhase = 0; + // var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits); + // + // const string expectedCodingSeq = "ATGAAGACCAGGGGGTTCAGCTTTCCAAGACAAAGGCAAGTCCTGTTTCTTTTTCTTTTCTGGGGAGTGTCCTTGGCAGGTTCTGGGTTTGGACGTTATTCGGTGACTGAGGAAACAGAGAAAGGATCCTTTGTGGTCAATCTGGCAAAGGATCTGGGACTAGCAGAGGGGGAGCTGGCTGCAAGGGGAACCAGGGTGGTTTCCGATGATAACAAACAATACCTGCTCCTGGATTCACATACCGGGAATTTGCTCACAAATGAGAAACTGGACCGAGAGAAGCTGTGTGGCCCTAAAGAGCCCTGTATGCTGTATTTCCAAATTTTAATGGATGATCCCTTTCAGATTTACCGGGCTGAGCTGAGAGTCAGGGATATAAATGATCACTCGCCAGTGTTTCGGCACAAAGAGATGGTCTTAAAAATATCAGAAAATACAGCTGAAGGGACAGCATTTAGACTAGAAAGAGCACAGGATCCAGATGAAGGTCATAACAGTATCCAAAACTACACGATCAGCTCCAACTCTTTTTTCCATATTAAAATTAGTGGCAGTGATGAAGGCATGATATATCCAGAGCTAGTGTTGGACAAAGCACTGGATCGGGAGGAGCAGGAAGAGCTCAGCTTAACCCTCACAGCGCTGGATGGTGGGTCTCCATCCAGGTCTGGGACCTCCACTATACGCATTGTGGTCTTGGATGTCAATGACAATGTCCCACAGTTTGCCCAGGCTCTGTATGAGACCCAGGCTCCAGAAAACAGTCCAGTAGGGTCCCTTATTGTTAAAGTGTCTGCAGGAGATGCAGACTCAGGAGTCAATGCAGAAGTATCCTATTCATTTTTTGATGCTTCTGAAGATATTTTAACAACGTTTCAAATCAATCCTTTTTCTGGGGAAATCTTTCTCAGAGAATTGCTTGATTATGAGTTAGTAAATTCTTACAAAATAAATATACAGGCAATGGACGGCGGAGGCCTTTCTGCAAGATGTACAGTTTTGATAAAAGTATTAGATTCCAATGACAATCCTCCTGAACTGATCATATCATCACTTTCCAACTCTGTTGCTGAAAACTCTCCTGGGATAGTATTGGCTGTTTTTAAGATTAAAGACAGAGACTCCGGAGAAAATGGAAAGACAATTTGCTATGTTCAAGATAATCTGCCTTTTTTTCTGAAACCGTCTGTTGACAATTTTTACATCCTAATGACTGAAGGTGCACTGGACAGAGAGAGCAAAGCTGAGTACAACATCACCATCACCGTCACTGACTTGGGGACACCCAGGCTGAAAACCGAGCACAGCATAACCCTGCAGGTCTCCGACGTCAATGACAACGCCCCCGCCTTCACCCAAACCTCCTACACCCTGTTCGTCCGGGAGAACAACAGCCCCGCCCTGCACATCGGCAGTGTCAGCGCCACAGACAGAGACTCAGGCACCAACGCCCAGGTCACCTACTCGCTGCTGCCGCCCCAGGACCCACACCTGCCCCTCGCCTCCCTGGTCTCCATCAACGCGGACAATGGCCACCTGTTTGCCCTCAGGTCGCTGGACTACGAGGCCCTGCAGGCTTTCGACTTCCGCGTGGGCGCCTCAGACCGCGGCTCCCCGGCTTTGAGCAGCGAGGCGCTGGTGCGCGTACTGGTGCTGGACGCCAACGACAACTCGCCCTTCGTGCTGTACCCGCTGCAGAACGGCTCCGCGCCCTGCACCGAGCTGGTGCCCCGGGCGGCCGAGCCGGGCTACCTGGTGACCAAGGTGGTGGCGGTGGACGGCGACTCGGGCCAGAACGCCTGGCTGTCGTACCAGCTGCTCAAGGCCACGGAGCCCGGGCTGTTCGGTGTGTGGGCGCACAATGGGGAGGTGCGCACCGCCAGGCTGCTGAGCGAGCGCGACGCGGCCAAGCACAGGCTGGTGGTGCTTGTCAAGGACAATGGCGAGCCTCCTCGCTCGGCCACCGCCACGCTGCACGTGCTCCTGGTGGACGGCTTCTCCCAGCCCTACCTGCCTCTCCCGGAGGCGGCCCCGGCCCAGGCCCAGGCCGACTTGCTCACCGTCTACCTGGTGGTGGCGTTGGCCTCGGTGTCTTCGCTCTTCCTCCTCTCGGTGCTCCTGTTCGTGGCGGTGCGGCTGTGCAGGAGGAGCAGGGCGGCCTCGGTGGGTCGCTGCTCGGTGCCCGAGGGTCCTTTTCCAGGGCATCTGGTGGACGTGAGCGGCACCGGGACCCTGTTCCAGAGCTACCAGTACGAGGTGTGTCTGACTGGAGGTTCAGAGACCGGCGAGTTCAAGTTCTTGAAGCCGATTACCCCCCACCTCCCGCCCCATAGGGGTGGGAAAGAAATAGAGGAAAATTCTACTCTCCCCAATAGCTTTGGATTTAATTATTGA"; + // + // Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); + // } + // + // [Fact] + // public void RnaEdits_in_coding_sequence_reverse_insertion() + // { + // //NM_000682.6, chrom: chr2:96778623-96781984 + // var genomicSeq = new SimpleSequence( + // "CTTATTACAAAATATCCTTTATTGATAAAATAGCTCAGAGTTTAAAAAAAAAAAAAACACCACCTGCATGTCGCAATAAGAGGTCACAGGCAAGAACACTGGGGGTCCCATGGGGCGCACACAAGACCGGCCAGCAGAGGGTCACAGTCAGTCCCTCTCCTGGCCCAGCTCCCCACCACATCCCAGGGCGATACTCTGGCCTCAACAACCCACTGAGGACCAAGCTGGGAAGCCTCCCACACCCCAGGAAGGACTCTTTTTGGTCCCCTCCATTCTCTCTACACCCAGAAAACTCCCTCGGTGCCCTTCCAAATCTAGCAGGTCCATCTGGCCCATTCCCCCGACACCTGCCAAGCTAAGATGCCTACTGGCCCAATGTTGAAGCCAGGCCCTCTCCAAGGGAAGGCCGATAAACCTCCTTTCCACACTTCCAACTGTTCTGGGTGCCAGGTTTTGGGGTGGGACTGAGAACCAGGAAGCAGGGGTCCTCAATGCACAGCCCCATCAGCATTGCGGGGAGCAGCGTGGCTGGGTCCGAGGCAGTCCACAAGCACCCACCTGGGGGGATCAGTTGTGGTTCACAAGGACTCATTTGGGGCTTGGAGACCTGGCCGGGCACTCCAGTGGGAGGCTCCCCTAGGGGCGCACCAGGCTCTGATGCCAGTACCCCACCTGGGGGCGCTGCCACCTGTCACAGGCTCTCATCTTAGACTGTTGCCGAGGTGTGGATATTTTGAGCTGTCTTGGGGAGACAATTTGCCTCCTTGATGACAAAAGACTTATCCCCCACTGGGGAGACCCAAGCCACTAAAAACCCTCTTGGTGTTGCCGGTGAAATGTCGAAACGTTGTCATGTAGCGTAATAACTCAGACCTTTGCAGCCAGAAGAACACATTCTCAAAGAGATCCTTTAACTTGAAATAGTGATTCTGTCTGCCACTCCCGGCTTCCAGTTCGGGGTAGGAATTCACACACCCCAGGGACAGAACAAAAGTCTACAGGAAGACAGGTGGTGGTAAACACAGAGGAAAGGGATTTTTATATCACCATATAATCACATTTTTGGTTCTCTAGTGTGTTCCCCCACAGAGCTCAAAGCTTTCTGCAAAGCCTTTCATCTCCCTGCAGCAAGTAGGCAGTGAGCTATTGTCGCCCCGATTTTTGCAGGGGGTGAATGCCAGTGATCGGGGATCTCCCGTCGAGGCAGAGACCAGGCCTCCAAGACCGCCCCAGCGAGGCATCCACGTGGCCACCCACCTACCGGAGGGGTGCTGGGTAAGGAAGCCGATCCATTGTTCTGGCTTTCAAAGGAACCACAGATCCGAAAACAGGCAAAGGGGGAAAGGAGGGCCCAGAGACGATGCCACCCCATAAGCCCCCATCCCAGCGCCTGCCAGGGACCGCGAGTGCCTAGCGTGGGTGATCAGTCTTCGTTTCTTCCTCCCCCTCAGCAGCAGGCCCCACTGGGAAAAGTGGAAGGCTGGCTCCGTGCTCTTTGTGGGTGGGGGGGAGATGAAAAAGAAACGAAAACACCACAAGCAAGTGACCTGCCAGGAACACAAGGTCCTCAAGAAAGGGAAGCCCAGACATTGGTCTGGAGAGCATGGGGCTCTGGGAAGAAAGTGCTCTCTCTTCTCCTGGTCTTGGCTATGTTCCAGAGGATTTGAACCACCTCCATCGGCCTGTGCTCAGGGAGAGGGTGGAGAAGGGGTCCCCCACAGCTAAGCCGGCAAGGGGAAGCTTCACTGGGACCCTTGCTAGCAGCCCCCCTGCCCACCCCTCCCAAGGGGTTCCTAAGATGAGGCCTACAGGATCTGGGCAGGGAGCAGAAAGCCCAGGGGAGGCAGCCACACACAGCAGGGCAAGAAGCAGGGTGACCCCGGCGCCACCGCACCAACCCCACAGGGGCAGCGCAGGCGGGCTCACCAGGCCGTCTGGGTCCACGGGCGGCACAGGATCCTCCGGAAGGCACGGCGGAAGTCCTGGTTGAAGATGGTGTAGATAACAGGGTTCAGTGAGCTGTTGCAGTAGCCGATCCAGAAGAAGAACTGGAAGAGGCCATGGGGCACCTTGCAGTGCTTCGGGCAGATGGCTCCCAGGCTGTAGCTGAAGAAGAAGGGGAACCAGCAGAGCACAAAAACGCCAATGACCACAGCCAGCACGAAGGTGAAGCGCTTCTCCCGGGTCAGCTGCGCCCGTCGACGCCACCACTGCCCACCTATAGCACCCACGCCCCTGCCCAGGAGCACCTGGCCACGTAGGGTGGCCAGCACCCGGGAGCCCTGTGGCTGCTGCAGCGGGGGGCTGCAAGCTGAGGCCGGAGACACTGGCACTGCCTGGGGTTCACACTCTTCCTCCTCCTCCTCCTCCTCTTCAGCTTCATCCTCTGGAGATGCCCCACAAACACCCTCCTTCTGGCCCTGGCCTGAGTTGGGAAGGGCAGCCCAACTGGGTGGCAAGGCCCGGGTCCCAGTATCTTCAGGGGTCTCCCCCTCCTCCTTCTCCCCAGTGGACTTCGAGTGTCCGTTGACCTCTCTGGCAGAAGCCACAGAGGCCAGGGCTGGCAGTTTGGCTGAGGCCAAAGCCCCACCATGGTCGGGTCGGGGCTGCTTGGACTCACCCTGCCCAGGCCCCCCCTTGGCCCTGGGACCTCTGCGGTTGCTGCGTTTGGCGATCAGGTAGATGCGCAGGTAGACAAGGATCATGATGAGGCAAGGAGCAAAGAAAGATCCGATGCTGGAGGCCAGGATGTACCAGGCCTCCTGGTTGAGCTTGCACTGGGGGCGCCCGCGCGGCTGGGGGCCCTGGTCGCCCTTGTAGATGAGGGGCGGCAGCGAGATGACGGCGGCGATGAGCCACACAGTGAGGATGATGCACTTGATGCGGCGCGGGGTGCGCTTGGAGTTGTACTCCAGCGCGCGGCTCACGGCCCAGTAGCGGTCCAGGCTGATGGCGCACAGGTGCACGATGGACGAGGTGCAGAAGAGCACGTCGAGCGCCAGGTACACCTCGCACCACGTGCGCCGGAAGTACCAGTAGCCCAGCAGCTCGTTGGCCAGCGAGAAAGGGATGATGAGCGTGGCCACCAGGATGTCGGCGGCGGCCAGCGACACCAGGAACAGGTTCTGAGGGGCGCGCAGCGAGCGGCTGGTCAACACAGCCAGGATGACCAGAGCGTTGCCGAAGATGGTAAAGAGAATGAGGAAGGTGATGGCCGCCGCTATGGCCGCTGTGGCCTGCACGGAGTAGGGGTCCTGGTGGTCCATGACGGGGCGGGAGGTGGGCAGAGGGAGCGCTGCCCGCCCAGTGCGCACCGTGGACGACAGCGCTGCCCGGCTCGGCTAGACAAGAGCGTCGCCCCT", + // 96778623 - 1); + // var codingRegion = new CodingRegion(96780545, 96781888, 97, 1449, 1353); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 96778623, 96780986, 1008, 3371), + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 96780987, 96781984, 1, 998) + // }; + // + // var rnaEdits = new IRnaEdit[] + // { + // new RnaEdit(999, 998, "AGAGGAGGA") + // }; + // + // const byte startExonPhase = 0; + // const bool onReverseStrand = true; + // + // var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, onReverseStrand, startExonPhase, + // rnaEdits); + // + // var expectedCodingSeq = + // "ATGGACCACCAGGACCCCTACTCCGTGCAGGCCACAGCGGCCATAGCGGCGGCCATCACCTTCCTCATTCTCTTTACCATCTTCGGCAACGCTCTGGTCATCCTGGCTGTGTTGACCAGCCGCTCGCTGCGCGCCCCTCAGAACCTGTTCCTGGTGTCGCTGGCCGCCGCCGACATCCTGGTGGCCACGCTCATCATCCCTTTCTCGCTGGCCAACGAGCTGCTGGGCTACTGGTACTTCCGGCGCACGTGGTGCGAGGTGTACCTGGCGCTCGACGTGCTCTTCTGCACCTCGTCCATCGTGCACCTGTGCGCCATCAGCCTGGACCGCTACTGGGCCGTGAGCCGCGCGCTGGAGTACAACTCCAAGCGCACCCCGCGCCGCATCAAGTGCATCATCCTCACTGTGTGGCTCATCGCCGCCGTCATCTCGCTGCCGCCCCTCATCTACAAGGGCGACCAGGGCCCCCAGCCGCGCGGGCGCCCCCAGTGCAAGCTCAACCAGGAGGCCTGGTACATCCTGGCCTCCAGCATCGGATCTTTCTTTGCTCCTTGCCTCATCATGATCCTTGTCTACCTGCGCATCTACCTGATCGCCAAACGCAGCAACCGCAGAGGTCCCAGGGCCAAGGGGGGGCCTGGGCAGGGTGAGTCCAAGCAGCCCCGACCCGACCATGGTGGGGCTTTGGCCTCAGCCAAACTGCCAGCCCTGGCCTCTGTGGCTTCTGCCAGAGAGGTCAACGGACACTCGAAGTCCACTGGGGAGAAGGAGGAGGGGGAGACCCCTGAAGATACTGGGACCCGGGCCTTGCCACCCAGTTGGGCTGCCCTTCCCAACTCAGGCCAGGGCCAGAAGGAGGGTGTTTGTGGGGCATCTCCAGAGGATGAAGCTGAAGAGGAGGAAGAGGAGGAGGAGGAGGAGGAAGAGTGTGAACCCCAGGCAGTGCCAGTGTCTCCGGCCTCAGCTTGCAGCCCCCCGCTGCAGCAGCCACAGGGCTCCCGGGTGCTGGCCACCCTACGTGGCCAGGTGCTCCTGGGCAGGGGCGTGGGTGCTATAGGTGGGCAGTGGTGGCGTCGACGGGCGCAGCTGACCCGGGAGAAGCGCTTCACCTTCGTGCTGGCTGTGGTCATTGGCGTTTTTGTGCTCTGCTGGTTCCCCTTCTTCTTCAGCTACAGCCTGGGAGCCATCTGCCCGAAGCACTGCAAGGTGCCCCATGGCCTCTTCCAGTTCTTCTTCTGGATCGGCTACTGCAACAGCTCACTGAACCCTGTTATCTACACCATCTTCAACCAGGACTTCCGCCGTGCCTTCCGGAGGATCCTGTGCCGCCCGTGGACCCAGACGGCCTGGTGA"; + // + // Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); + // } + // + // [Fact] + // public void With_rnaEdits_snv_mnv() + // { + // //NM_001242659.1 + // var genomicSeq = new SimpleSequence( + // "ACTATAAAGACAGTAAAAAGATCAGTGGTTATCTTTGCAGACGCCACCATCXCTGTGAGCCCTGTACTATCAGCCATGGTCAACTCCGTCGTCTTTTTTGACATCACCGTCGACGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCXAAAGACAGCXGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCXTAATGGCACXGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGXXCTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCCTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAATGAGTTTGACTTGTGTTTTATTTTCACCACCAGACCCATTCCTTCTGTAGCTCAGGAGAGCACCCCTCCACCACATTTGCTTGCAATATCCTAGAATCTXXGTGCTCTTGCTGCAGTTCCCTTTGGGTTCCATGTTTTCCTTGTTCCCTTCCATGCCTAGCTGGATTGCAGAGTTGAGTTAAGTTTATGATTATGAAATAAAAACTAAGTAACAA", + // 149553002); + // var codingRegion = new CodingRegion(149553003, 149553787, 1, 785, 785); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 149553003, 149553787, 1, 785) + // }; + // + // var rnaEdits = new IRnaEdit[] + // { + // new RnaEdit(52, 52, "G"), + // new RnaEdit(164, 164, "C"), + // new RnaEdit(174, 174, "A"), + // new RnaEdit(284, 284, "C"), + // new RnaEdit(294, 294, "C"), + // new RnaEdit(420, 421, "CA"), + // new RnaEdit(670, 671, "CT") + // }; + // + // const byte startExonPhase = 0; + // + // var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, false, startExonPhase, rnaEdits); + // + // var expectedCodingSeq = + // "ACTATAAAGACAGTAAAAAGATCAGTGGTTATCTTTGCAGACGCCACCATCGCTGTGAGCCCTGTACTATCAGCCATGGTCAACTCCGTCGTCTTTTTTGACATCACCGTCGACGGCAAGCCCTTGGGCCGCATCTCCATCAAACTGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACGCCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGCACTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCCTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAATGAGTTTGACTTGTGTTTTATTTTCACCACCAGACCCATTCCTTCTGTAGCTCAGGAGAGCACCCCTCCACCACATTTGCTTGCAATATCCTAGAATCTCTGTGCTCTTGCTGCAGTTCCCTTTGGGTTCCATGTTTTCCTTGTTCCCTTCCATGCCTAGCTGGATTGCAGAGTTGAGTTAAGTTTATGATTATGAAATAAAAACTAAGTAACAA"; + // + // Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); + // } + // + // [Fact] + // public void With_rnaEdits_deletion() + // { + // //NM_033089.6, chrom: chr20:278204-280965 + // var genomicSeq = new SimpleSequence( + // "GGAGGATGCTGGGAAGGAGGTAAAATGGCCACCGGCGGCGGCGCGGAGGAAGAGAGGAAACGGGGGCGGCCGCAGCTTCTGCCCCCCGCGCGGCCCGCGGCCCGGGGCGAGGAGGCCGACGGCGGCCGCGAGAAGATGGGCTGGGCCCAGGTGGTGAAGAATCTAGCCGAGAAGAAGGGCGAATTCCGCGAGCCGCGGCCGCCGCGGCGGGAGGAGGAAAGCGGCGGCGGTGGAGGGAGCGCCGGGCTCGGCGGCCCCGCGGGCCTGGCGGCGCCGGACCTCGGCGACTTCCCACCGGCTGGCCGCGGGGATCCGAAGGGCCGTCGGAGAGATCCGGCCGGCGAGGCGGTGGACCCCCGCAAAAAGAAGGGCGCTGCGGAGGCGGGCAGGAGGAAGAAGGCCGAGGCGGCGGCGGCCGCCATGGCGACCCCGGCCAGGCCCGGCGAGGCCGAGGACGCGGCCGAGCGGCCCCTCCAGGATGAGCCGGCGGCGGCGGCGGCAGGCCCGGGCAAGGGTCGCTTCCTCGTCCGCATCTGTTTCCAGGGAGACGAGGGCGCCTGCCCGACCCGGGACTTCGTGGTAGGAGCGCTTATCCTGCGCTCCATCGGCATGGACCCGAGCGACATCTACGCGGTCATCCAGATCCCGGGCAGCCGCGAATTCGACGTGAGCTTCCGCTCAGCGGAGAAGCTGGCCCTGTTCCTACGCGTCTACGAGGAGAAGCGGGAGCAGGAGGACTGCTGGGAGAACTTTGTGGTGCTGGGGCGGAGCAAGTCCAGCTTGAAGACGCTCTTCATCCTCTTCCGGAACGAGACGGTGGACGTGGAGGACATTGTGACTTGGCTCAAGCGCCACTGCGACGTGCTGGCCGTGCCGGTGAAAGTGACCGACAGGTTTGGGATCTGGACCGGGGAGTACAAATGCGAGATCGAGCTGCGCCAGGGGGAGGGCGGGGTCAGGCACTTGCCAGGGGCCTTCTTCCTGGGGGCCGAGAGGGGCTACAGCTGGTACAAGGGGCAGCCCAAGACATGCTTTAAATGTGGTTCCCGGACCCACATGAGCGGCAGCTGCACGCAGGACAGGTGCTTCAGGTGCGGGGAGGAGGGGCACCTGAGCCCTTACTGCCGGAAGGGCATCGTGTGCAACCTCTGTGGCAAGCGAGGACACGCCTTTGCCCAGTGTCCCAAAGCAGTGCACAATTCCGTGGCAGCTCAGCTAACCGGCGTGGCCGGGCACTAAACACCCGCCTGCCTGCCAGGGTGAACACACAGCCAGCTTATCCCTCTTAAGTGCCAAAACTTTTTTTTAAACCATTTTTTATCGTTTTTGAAGGAGATCTTTTTAAAACCTACAAGAGACATCTCTCTATGCCTTCTTAAACCGAGTTTACTCCATTTCAGCCTGTTCTGAATTGGTGACTCTGTCACCAATAACGACTGCGGAGAACTGTAGCGTGCAGATGTGTTGCCCCTCCCTTTTAAAATTTTATTTTCGTTTTTCTATTGGGTATTTGTTTTGTTTCTTGTACTTTTTCTCTCTCTCCTTGCCCCCCTCCCGCCCTCCCCGCCCCATACCTTTTCTTCCCCTGGATTTTCACCCTTTGGGCTGCCTTGCTCATCTTTATGCCCCAGCACTAGGTACGGGGCCCAACACGTGGTAGGCACTCCATCAGTGTTTGCTGAATTGAAAACATTGTTGACTGTGGCTTCTATCAGAGTGTCTACCTTTTGCAGCTCTTCCCCTCCCTCATTTAATTTGCTGCTTTTAATCTACGTGGTCTGAGAATTTGTGAAACCAGTGTTGTTAGAAGTGTATATAATCTGAATCAATAAGCTCTGAATGGTGGCCAAGGGCCTCTCTTATGGCACAAAGATGCATGGACTTCATGACAGCTCTTTTGGTGGCTCAGAAGCCATTTTTTATAGAATCATGGAATCTAGAATATTCCTGCTGGAAAGAACCTGAGAGTTGGTTTGGACCAATTCCCTGGTTTTCCAGCAGATGAAACAGGCCCAAAGAGGTTAAATGACTGGGTGAAAATCACATAGCTGTCTGGTGCCAGAGCCAGCCTATAGTAGAGTCCCCTGACCCCAAGCCCGGTGCTCATTCCACTACCTCTCACACTTCACAACAATTTCCTCAACACTTGAGGGCCCAGAAAGTCTGATCTCTCCAGAATGATCAGCCCAGAGGAATGCTGAGAAATCACCTGGAGGAGGGAGCAGAAAGAGAAGGTTTTTAAGGAGGGGCTTCTGAATACTTGGGAGATACGGAACGGACCAAGGACCACACTCCAGGGTGCATTCGTTGCTCCCTGGGGCACCACTTCTGGATTACAGTGTGCCAGGTCCTTTGGAGGCCCTACCCCTTCCCCATTCATTGCCACCAGTGAGAAATGGGGGTGCCCCTGTGTAAAGAAACCTACCAAAGGTTTACATTTGCACCTTAGCCTCAATAGCTACGAACCCTAGAGAAGCAGCTAGCTGGAGCTCATGTGCAACTCCTGATTCTCAGGAGAAAGATGGATTTTAACCCAAAATTATGAGTGAGCTGTTAACTCTAAAATGTACTTGGGAGATAGGCCAAGCGAGAGGTCATGGGCCAACTAAGTGTTATCCAGTAGAAAAGACAGTACACTGCTTTTCTTTTAGTGTTTGCTTTTCCTTTGCTATATGTTTTGCTATTTCCTTGTGGCTTAGAATGTAAAATTGATTGTTAAAAGTTTTGTTCTGAATAAATATTTATCTTTTGTATTGCTAAAA", + // 278204 - 1); + // var codingRegion = new CodingRegion(278228, 279442, 25, 1236, 1212); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 278204, 278687, 1, 484), + // new TranscriptRegion(TranscriptRegionType.Gap, 1, 278688, 278690, 484, 485), + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 278691, 280965, 485, 2759) + // }; + // + // var rnaEdits = new IRnaEdit[] + // { + // new RnaEdit(485, 487, ""), + // new RnaEdit(2763, 2762, "AAAAAAAAAAAAAA") + // }; + // + // const byte startExonPhase = 0; + // const bool onReverseStrand = false; + // + // var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, onReverseStrand, startExonPhase, + // rnaEdits); + // + // //The coding sequence from refseq does not have the deletion from rna edit. That was manually inserted. + // var expectedCodingSeq = + // "ATGGCCACCGGCGGCGGCGCGGAGGAAGAGAGGAAACGGGGGCGGCCGCAGCTTCTGCCCCCCGCGCGGCCCGCGGCCCGGGGCGAGGAGGCCGACGGCGGCCGCGAGAAGATGGGCTGGGCCCAGGTGGTGAAGAATCTAGCCGAGAAGAAGGGCGAATTCCGCGAGCCGCGGCCGCCGCGGCGGGAGGAGGAAAGCGGCGGCGGTGGAGGGAGCGCCGGGCTCGGCGGCCCCGCGGGCCTGGCGGCGCCGGACCTCGGCGACTTCCCACCGGCTGGCCGCGGGGATCCGAAGGGCCGTCGGAGAGATCCGGCCGGCGAGGCGGTGGACCCCCGCAAAAAGAAGGGCGCTGCGGAGGCGGGCAGGAGGAAGAAGGCCGAGGCGGCGGCGGCCGCCATGGCGACCCCGGCCAGGCCCGGCGAGGCCGAGGACGCGGCCGAGCGGCCCCTCCAGGATGAGCCGGCGGCGGCGGCAGGCCCGGGCAAGGGTCGCTTCCTCGTCCGCATCTGTTTCCAGGGAGACGAGGGCGCCTGCCCGACCCGGGACTTCGTGGTAGGAGCGCTTATCCTGCGCTCCATCGGCATGGACCCGAGCGACATCTACGCGGTCATCCAGATCCCGGGCAGCCGCGAATTCGACGTGAGCTTCCGCTCAGCGGAGAAGCTGGCCCTGTTCCTACGCGTCTACGAGGAGAAGCGGGAGCAGGAGGACTGCTGGGAGAACTTTGTGGTGCTGGGGCGGAGCAAGTCCAGCTTGAAGACGCTCTTCATCCTCTTCCGGAACGAGACGGTGGACGTGGAGGACATTGTGACTTGGCTCAAGCGCCACTGCGACGTGCTGGCCGTGCCGGTGAAAGTGACCGACAGGTTTGGGATCTGGACCGGGGAGTACAAATGCGAGATCGAGCTGCGCCAGGGGGAGGGCGGGGTCAGGCACTTGCCAGGGGCCTTCTTCCTGGGGGCCGAGAGGGGCTACAGCTGGTACAAGGGGCAGCCCAAGACATGCTTTAAATGTGGTTCCCGGACCCACATGAGCGGCAGCTGCACGCAGGACAGGTGCTTCAGGTGCGGGGAGGAGGGGCACCTGAGCCCTTACTGCCGGAAGGGCATCGTGTGCAACCTCTGTGGCAAGCGAGGACACGCCTTTGCCCAGTGTCCCAAAGCAGTGCACAATTCCGTGGCAGCTCAGCTAACCGGCGTGGCCGGGCACTAA"; + // + // Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); + // } + // + // [Fact] + // public void With_rnaEdits_reverse_deletion_utr() + // { + // //NM_001317107.1 chr14:22138125-22139232 + // var genomicSeq = new SimpleSequence( + // "ATATGGTATGTAACTTATTCTTTGCAAGGCGCTTCTTTAATTTGGAGCACCACGTATCCTAAGGACGTAGACATTTTCATTTTTCTTCTTTTCTCTCTTTTCTCCCCACTAACTTGTTTAAGGCACTCTTCATTTCTTCATTCCTAAGGGTATAGATAATGGGGTTCAGCAGGGGGGTGACTGCAGTGAAAAACACAGATACTGCCTTGTCCTCTGGGAGGCTGGTGGATGGGCGGGAATAGATGAAGATGCAGTGTCCCAGGAACAGTGTAACTACAGTGAGATGGGCTGCACAGGTGGACAGGGCCTTCCACTTGCCCTTGGAGATCTGCTGCCTCAGACTCACCAGGATGACTGCGTAGGACACCACCAGGACCACAAAACAGACCACGGAGATCAATCCACTGTTGGAGACAATGAGGATCTCAAGGACGTGGGTGTGTCAATGCAGGCCAGCTTGATCACCTGAGGTACATCACAGAAGAAGTTGTCAATCTCATCAGGACCACAGTAGGGCAGCTTGATGGTAAGGGAGGTGAGGGCTATGGAGTGGATGGTCCCTCCTGTCCAGAGGGCCACAGCCAGCAGCACACATACCTTCCAGTTCATCACTATCATGTACTGCAGGGGTTTACAGATGGCCACATACCGATCATAGGCCATGACGGTGAGGAGGAAGATCTCTGTGCAGGCAAAGAGGTGCAGGAAGAACATCTGGGTCACACAGGCATCAAAAGAGATGAGCTTTTCCTCTGACCACACGTCTCTCAGCATCTTGGGGACAGTGACAGTGGAGTGGCAGACATCAATAAAGGACAGGTTGCTGAGGAAGAAATACATGGGAGTATGGAGCCGGTGGTCATAGATAATAGTTATGACAATGAGAACATTCCCAATCAGTGTCAGGACATAAAAAATGAGGAACATGGAAAACATAGCTATCCGTGCCTTATGATTTACAGATAAACCTCTAAGCCGAAAATATGTCACTAAAGAAGTTTGATTGAGTAGGATGGCCTCTTCCATTCTCTTTGTTAGACAACCTGTAAAGAATTAGAAAAAAAGTCTAATATAACACAGTATCTGCATCAATCATTTGGTCATTTAA", + // 22138125 - 1); + // var codingRegion = new CodingRegion(22138201, 22139150, 83, 1030, 948); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 22138125, 22138561, 670, 1106), + // new TranscriptRegion(TranscriptRegionType.Gap, 1, 22138562, 22138563, 669, 670), + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 22138564, 22139232, 1, 669) + // }; + // + // var rnaEdits = new IRnaEdit[] + // { + // new RnaEdit(905, 905, "T"), + // new RnaEdit(796, 796, "C"), + // new RnaEdit(679, 679, "A"), + // new RnaEdit(670, 671, "") + // }; + // + // const byte startExonPhase = 0; + // var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits); + // + // var expectedCodingSeq = + // "ATGGAAGAGGCCATCCTACTCAATCAAACTTCTTTAGTGACATATTTTCGGCTTAGAGGTTTATCTGTAAATCATAAGGCACGGATAGCTATGTTTTCCATGTTCCTCATTTTTTATGTCCTGACACTGATTGGGAATGTTCTCATTGTCATAACTATTATCTATGACCACCGGCTCCATACTCCCATGTATTTCTTCCTCAGCAACCTGTCCTTTATTGATGTCTGCCACTCCACTGTCACTGTCCCCAAGATGCTGAGAGACGTGTGGTCAGAGGAAAAGCTCATCTCTTTTGATGCCTGTGTGACCCAGATGTTCTTCCTGCACCTCTTTGCCTGCACAGAGATCTTCCTCCTCACCGTCATGGCCTATGATCGGTATGTGGCCATCTGTAAACCCCTGCAGTACATGATAGTGATGAACTGGAAGGTATGTGTGCTGCTGGCTGTGGCCCTCTGGACAGGAGGGACCATCCACTCCATAGCCCTCACCTCCCTTACCATCAAGCTGCCCTACTGTGGTCCTGATGAGATTGACAACTTCTTCTGTGATGTACCTCAGGTGATCAAGCTGGCCTGCATTGACACCCACGTCATTGAGATCCTCATTGTCTCCAACAGTGGATTGATCTCCGTGGTCTGTTTTGTGGTCCTGGTGGTGTCCTACGCAGTCATCCTGGTGAGTCTGAGGCAGCAGATCTCCAAGGGCAAGCGGAAGGCCCTGTCCACCTGTGCAGCCCATCTCACTGTAGTTACACTGTTCCTGGGACACTGCATCTTCATCTATTCCCGCCCATCCACCAGCCTCCCAGAGGACAAGGTAGTATCTGTGTTTTTCACTGCAGTCACCCCCCTGCTGAACCCCATTATCTATACCCTTAGGAATGAAGAAATGAAGAGTGCCTTAAACAAGTTAGTGGGGAGAAAAGAGAGAAAAGAAGAAAAATGA"; + // + // Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); + // } + // + // [Fact] + // public void RnaEdits_deletion_reverse_utr() + // { + // //NM_001123068.1 chrom: chr1:147954635-147955377 + // var genomicSeq = new SimpleSequence( + // "TTGTTACTTAGTTTTTATTTCATAATCATAAACTTAACTCAACTCTGCAATCCAGCTAGGCATGGAAGGGAACAAGGAAAACATGGAACCCAAAGGGAACTGCAGCAAGAGCACAAAGATTCTAGGATATTGCAAGCAAATGTGGTGGAGGGGTGCTCTCCTGAGCTACAGAAGGAATGGGTCTGGTGGTGAAAATAAAACACAAGTCAAACTCATTAGAATTGTCCACAGTCAGCAATGGTGATCTTCTTGCTGGTCTTGCTATTCCTGTACCCAAAGTGCTCCATGGCTTCCACAATATTCACACGTTCTTTCACCTTGCCAAAGGCCACATGCTTGCCATCCAACCACTCAGTCTTGGCAGCACAGATGAAAAACTGGGAACCATTTGTGTTGGGTCCAGCATTTGCCATGGACAAGATGCCAGAACCTGTATGCTTTCGGATGAGGTTCTCATCATCAAATTTCTCCCCATAGATGGACTTGTCACCAGTGCCATTATGGCGTGTGAAGTCACCACCCTGACACATAAACCCTGGAATAATTCTGTGAAAGCAGGAACCCTTATAACGAAATCCTTTCTCTCCAGTGCTCAGAGCACGAAAGTTTTCCGCTGTCTTTAGAATCTTGTCTGCAAACAGTTTGATGGAGATGCGGCCCAAGGGCTTGCCGTCGACGGTGATGTCAAAAAAGACGACGGAGTTGACCATGGCTGATAGTACAGGGCTCACAGTGATGGTGGC", + // 147954635 - 1); + // + // //coding region between 34..528 + // var codingRegion = new CodingRegion(147954850, 147955344, 34, 528, 495); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 147954635, 147954669, 704, 738), + // new TranscriptRegion(TranscriptRegionType.Gap, 1, 147954670, 147954674, 703, 704), + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 147954675, 147955377, 1, 703) + // }; + // + // var rnaEdits = new IRnaEdit[] + // { + // new RnaEdit(704, 708, null), + // new RnaEdit(378, 379, "CA"), + // new RnaEdit(252, 252, "C"), + // new RnaEdit(242, 242, "C"), + // new RnaEdit(239, 239, "A"), + // new RnaEdit(132, 132, "A"), + // new RnaEdit(122, 122, "C"), + // new RnaEdit(104, 104, "A"), + // new RnaEdit(49, 49, "A"), + // new RnaEdit(10, 10, "G"), + // new RnaEdit(4, 4, "G") + // }; + // + // const byte startExonPhase = 0; + // + // var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits); + // + // var expectedCodingSeq = + // "ATGGTCAACTCCGTCATCTTTTTTGACATCACCGTCGACGGCAAGCCCTTGGGCCGCATCTCCATCAAACAGTTTGCAGACAAGATTCCAAAGACAGCAGAAAACTTTCGTGCTCTGAGCACTGGAGAGAAAGGATTTCGTTATAAGGGTTCCTGCTTTCACAGAATTATTCCAGGGTTTATGTGTCAGGGTGGTGACTTCACACACCCTAATGGCACCGGTGACAAGTCCATCTATGGGGAGAAATTTGATGATGAGAACCTCATCCGAAAGCATACAGGTTCTGGCATCTTGTCCATGGCAAATGCTGGACCCAACACAAATGGTTCCCAGTTTTTCATCTGCACTGCCAAGACTGAGTGGTTGGATGGCAAGCATGTGGCCTTTGGCAAGGTGAAAGAACGTGTGAATATTGTGGAAGCCATGGAGCACTTTGGGTACAGGAATAGCAAGACCAGCAAGAAGATCACCATTGCTGACTGTGGACAATTCTAA"; + // + // Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); + // } + // + // [Fact] + // public void RnaEdits_big_insertions_reverse() + // { + // //Transcript id:NM_032508.3, chrom: chrX:148678216-148713568 + // var genomicSeq = new SimpleSequence( + // "TAAAATGAGGAACCGGTTTATTGAACAGCTTAAGGAGAGCAAAAATAGTGGCTTTAGCTACATTTTTTACACACTGAGCAGGAAAGTCTAAACCATCCCGTTCCCCTGTACCCCAAAGAGAACAGGGCTTGCTGGAGGCCAGTGCCAAGGGCGGAGTCGTGCTCGCAGCAGACTTGAATTAACCCCATGTAGGCCGGCGAGCAGTTGCCCGCGTGAAAACACCACCCTCTTCTCCTGGCTGAGAAGATCAAAGCTCTTTTTTTACCCTCTTTTCAGCAAAGGACCTATTTGTTTTCAGGCAGGAGGATGTTAAACTTGCAGCCTCTGACACACGGTGGAACCTGCAGTGCTTGGAGAAACGGCACGCACACGTGAAAACATCATGCCTACTCCAAAGCCTTCTTGTTGCTGGCAGGAGGGAAGCTTGAGACTTTCCCACGCATAGTCGTGACCCGCGTGGCCGTTTCTGCTCTCAGCAACATTCTCTAGTGTTCCGGCTTCAAGCAGCGCTTGTCAGGTTTGAAGCTAGCCACTATTCTGAGAACGTCAGAAAAGCATGGACCATCTCTTGCTTGGTGTTGCCGTTCTGGCAGTAGCAGCTACTACGTACCTGCACGAGTTCCAGGGCAGAAGTGGCAATGTCCCATGAAGGCGTGGCACCCCACGGGGGGGGGGGGAGTGTGCCACGGGCGTCCACTTCTGCAGCAGAAGGCATGTGCCTACAGCACAAGCTTGTAAAAAAATACTTGAACAGAATATGCTGTACAGAACTAGGGGTTAACACCGCATATGAAGATGCTAAAACATTTGTATAAATACTCTGTATACAAGCATGGAGTCACTCCCGTAGAAAGGGCTCATCCGTGAGGCTATGAAAAACTGCTGTCAGCATGCCCAAAGAGAAACTACTTCCACAGTAGGAACAGAAAAAAGGACTGTGCTGTGTCTAAACACGTGGTGCATCAGAGACATAGTTACAGTTCCTACTGACTGCCCCAGCCACGACCTGGGAGTGCTGAGGACCTGGGAGTGCTCAGCGAGCTGCAGGAGGTCAGCCCTGTGGAGAAATACATTTCTAAACAATACTTTTGATTGGGATTTCAGCACCGTATAGACAGATGTTCCTTCTGGGGGCCTGGCAAGCAGCCATCTCCCAGTGGGTCTGACGGGGAAGAGGGGTACCTGGAGCCCCTCCCAGACAGACGGTAATCCCACCCCTGTTCTCACACTCTTCCTGGCATCCGCATCTGCTGGCACACACCCCCGTCACCTGCCACTTCCGCGTCCCGTCGTGGTGAGTGGCTGATAGGCGCTGGATGCAAACAAGGCATGAGATGGACGTACCTGGAGACCCAGCTCCAGTACTGGTTCTGGTCTGCGGGGTGAACGAGGGGGCAGAGGAAGGCGGAGAGAGTGCGTCCCAGTCCACTTAAGCTCTGTCCCCGGAAGTGGCATCTAATCTGGCATTTCGATATTTAATTTGGGAGGTGGGAGCACATACTTCCCAGGGCTCTGGGTAATGACCACCCTGGCCTTCTTTCGAAACATGGGTGCGATTTTAGGGGGCTCCGGAACTGGGGTCTCTTCGGTTTCTTCATTATCTTCGTGATGGAGATCATAGGAAATGTTTCCATATTCTCGTAGAAATGGGAAGATTTCAAGCAGAAACTGACAGAAATCTTTGCGGATACCAAACCACCCTGAAAAATAAGAATTTTTTATTTCACACACGAGGCTCAACTGACCTTCCTGTTAACTTTCTTTCCGTAACAAGAAGTTTCACTCCTACAATGTCATAACATACTTTATCCAGACTCCTGAGTCACAAAGCCTGAACAGGGCTTGAGTACCCAAAATGGGGAAGAAGTGCAAATGCTAGCTCTGTGGTGCTTGGAGTGGGGTTCCCGGACCGGCAGGGACAGCGTCCACGGGGCCTAGTTAGGGATGCCATTCTCGGGCCCCAGCCCAGACCTCCAGAAACTGAGTCGGGCTAGGGTGGGCTCCAGCGGTCCCCTTTTCCTGGCCCTTTTGGGATTCTGCTGGATGCCCAAGTTTGAGAACTACTGCTCCAGTGAGTCTCAAAATATCTGTGGTGCGCAGACTACGGTGTCTTCCGCTAATCTTCTCCAGCCAGGATAAACTCATGGATGACAGTGCCACCCAAGAACAAGATTTCTGTCACCCTCTGGAATCCGTGAGGGCGGTAGTCATGCACGGGTCCTGGCCAGGAGGGGGCCTGAACTCATGGAGCCACCTTAAAGCCACTTTCCCAGTCCCACTACTCCTCTCTGTAGGCTACTGGAGTGTCAGCTCGGTGCAAGCCCTCCCTGCTCCCGGGTGCGGGGTAGGGGGCAGAGGCACAAACAGCAAGCACAGCCCGGGCTGCTGGGCTGCAGTGAGGCCCTGCCCCCAAACCCACTGGCTTTCCGAAGGGCAATGCTCTGGGCTTCCGTGCCATGGAGCCCACAGCCTTGCCAGGAAGGCACCCTCTGCAGAGATCGTTTTGGAAGTGTCTGCCTCAGCAAGCAGGTGGAGGGGAATAGAGTGTTAGCAAGGCAAGACAGGCAAGACTCGGGTGATGGCAGCAAGGATATGGGGGAGGCAGAGAGGCCAACAGGGACCTAGGATGAATCCCAGGTTTGGGTGGGAGATGTGGATTTTCCATCAAACCCTCCCGGGCCTGGGAAGAATCTGTCTTGATCCCCATTTTGCAGAGGAGGGAACGGGATCTCTGAGAGGTTGCCTGCCGTGTCTGGTTCTACCTCAAATGGCAGCGTGCACTGCGAGAAAAGTCCCGGTGCAGGCCAGCAGAACACCAGAGTTACGGCATGCCCTTCCCTTAGAAGGTCCCAGAATTTCCTCAGCCCTCACTTTCCCACACAAGCTTCTAAATTGGGGCCCTCGGGGACTCATCCCTTCCTAGACTTCTATCCGCCCCCCCCCCCACTCCCTGGTCCCCCCCCAGACACACACCAAGGACTTCTGAAATGCTGAGTACATACAGTGGTTTCCTCCCTTCTGTCCAAATGTGGTTGCCATCAGCGTGATCAACGAGAGCCAAAGGGGGACAAAGATCGGGATGCTGGAGAAGGCGTTGTGGCCATCCAGTTTGTGAACCAGCAGAATCTAAAGAAAGAGACATAGTCCCGGTTGATGCCAGCACCGAAAATGGGCAGAGGCGGAAGCCAGACTTCATTAGGCAGTTCCTCCCCACCACCCCACCCCCGCGTGAGCTCCCACAAGAGGGAACATCAGCACCGCCAGAAAAAGGCAGGAAACCACCTATCCCTGGGGAAAGCTCGAAATGAGCTTTTATGTCCCTCTTCAGAGCTCGGCAATAGCCTATCCACTTGAAAAGTTCCCAGTGCCAGCAGTTTTATGGCAAACTCCTCCGGGTGTTTGTTCTAAGGAGTCAACAGCTCCCATTCTAGAATTCTCCACGTGACTCCAATACACAAATCTGACATCCCACTCTGCTTTCCCCAGAGTGGAAACTGGAGCCATACAGAGGCACCATGGCTAAAAAGGTGCACTCTTCTCCCTGCCAGCCCCACGTGCTGCCCCCAAGAGAAAGGAAGGATGCTCTCCTTTCACCGAAGCTCCCTCTCGGAGATGGCTGTGTTCTCTCCCCTCTCCTGGAGTGGGCTCACTGTGAGCTCGAGGGACAGAGGCTGCCTTTCTAGGGGTGCAGAATCCTGTCAGGGGAAGCGCAAGCTTCAGGGGCTGAAGAGGCTTCCCGTGGAACGCTTACCTCAAATGTAAGAAGGGGCACGACGATGGTCATCCAGCTCAGGGCCATGGTTATGTGTGTCCTGCGCTGCTCTGCAATCACATCCATAGAGCGCAAGAACAAGACGGACCACACAATGTAGTAGAGGACCACCAGGCACAGAAAGGACATGAGAATCCACAGCGGGACACACACAACCTGGGGGTGGGTGAGAGAACAGCAAGAGAAGTCTCTTTAGAGCTTCCAACCTGGCCTCTGATGGAAGGCATCTTTAGCACCTTGCTGTGTCTGTCCAGTTAAGGCGGTCCTTCCCGTGAGCCGAATAAGGACCGTTCCATCTCCCAGGACTGCTGGGAGCATCGCTCAGGACAGAAAAGGTATGGTATGTTCACTATGGGGCCTGCTGCCACCAGGGGACACACACGCTCAGTGAGTCATCAGTCCCTCTTCCTTTGGGTGACAGACAGCCCTGCACCTGGCTCCGCAGCCTCTACTCTTCCAGAGGCCCACTCTCCCACACTCTCTCAGGCTCCTCTAGGTTCTGCTGCCATCACAGCTTCCCGGGAAATGGGACACAACTGTCACCCTGTGCACACACACAAGATCTCACCCCAACAGACTCTCTTCACAGGCAACATTCCCACAACCTGCTGGGGGTACTTTGGCAACACAAATGGGAATGGGCTCCCCAGAAAGTCTGGCTGCCTGGGCTCCTAAGGATCCCTAACCTCACCCCTACCAAGTTAGTGAACTTGGCGGGTTGATGCTGGATACAGGTTGATGCTGGATACGTAGCGCTGCCGGGTCCCCGCCTCCACGGCAAGGGCGCATTCCCAGTATGTCCCTGTCGTACCAGGTAGACCTTGTCTCATCCACACACAAGCCCAGAGGACGAGTTCCGGGGGCGCCACTTGGCCAGGCTCCCCTGTGACACGTCTTCGCCCTCCTGCCCTGCCTCCTGGGACGACACTCCTCCGTTCTCCCTTTTTATTAATTATCTATCATACAGTAGGAAAAGTGACCGTCTTCCTTTGGTGTGAGTTCCCTGAGTCTTCACACAAGTAGATTCGCACAGCCGTTGGCAGGATGCAGAAGAGGTCTGTCACCCTGCAAAACTCTCCGTGCTGTCCCTTCACTATCACACCGTCCCCACCATTAGCCCCGGCAAACACTGATCTGTTCTCTGTCACTGTACTTTTGTCTCTGCTGGAACTTTATGTAGATGGCATCGCGAGACAAGTAACCTGTTGAGACTGGCTTCCCGCCATCCACATAATGTCTCTAATGAGATTCATCCAAGTTGTTCCCTCCCTGTATCCACAGGTCGTTCCCTCTCAGTTCTGAGTGGTATTCCATTGTATGGATGCCTACAGTTTATCTGACTGTCCGCTGAGGGTGGTTTGTGAAAACCAAACAAGGCCGCTATCCAAAATGCAAACAAGACTGCTACAAACACTGGTGTGTGGGTTTCTACAAGGCTGCGCGCTTTCACTTCTCTGGGGGTAAATCTTATACCCAGGAGTGGGGCAGCCAAGTCCCACGGGAAGTGCGCTTTTAACTGCATCAGAGATGGCCAAACCATTTTCTACAGTGCCCGTACCACCTGCCTTCCCGCCAGTAACACTGGAGTGTCCCAGTTCCTCTGCATCCTCCCAGGCACGTGGCCTCGTCAGTGTTGCTGAGTTTCACCATTCTGAAGCCATGTGTTTCGGGCCCTCATCCTGGTTGTAGTTTGTCTTCCCTAACCTGTAATGGCGTTGAGCATCTTTTCCTGTGCTTTTTCGCCATGTGTATATCCCCTTCGCAAATTGTCAACTCTTTTGCCAATTTTTAGGTGTTTCTTTTTGCAGTTTTGAGTTTTTAAGAGTTCTCTGTATGTTCTGGGTGCAAGTCAGTGTTTTGATGTGTGCTTTGCAAATATTTTCTCCCAGTCTGTGGCCTGTCTTCATTTCATTTTAATTTTGAGGAAGTCCAAATTTATGATTTCTCTCTCGTATGGACCATATTAACAGTGCCATTTCTAAGTACTCTCTGCCTAATTGCAAACCCCAAAGATTCTGTCCTATGTTATTTCCTAACAGATCTATAGTTTTACATTTTCTTTTAGATCTATGATTTGAGTTGGCATACGAATTTTACTTCTCCTGACCAGTTCTTGATTGTAGGTTTCTCTGCAGAGTCTATGCACAGCCTTTCTTCCCCGTTCCCCATTCTGTGATGAGATTCTCCTTTTTACTGAAGTTCCCTTCATGGGTGGAATGTTAGATCTCAATAGGCTTCCTTGTTTTCTCTTGCTCACTATGGGAAACATGTACTCAGACTGCTTCATCAACTGGGATCTACAGAGGATGAAGGGCAGAAAAAATCTTTCTTCTCATTTGTGGAAAGCTCTCAAAATTACTAATCTTTTTTTTCCCTGACAGTATCTTATACGAAAAAATTTGGTCTTGTTTAGATATGTTTCCTTCACGTCACAACAGAAACAGTTTTGAACCCAATGACCATTCTCCAGATACAGCACTGTGAAGTTGTAGGATGAGTGACTGAATATTTTTTATTATGAATGTTTTATCAAAGACTTTGGGCAGAAGTGATTATTCCCATCTTTAAATATGGAGTATACTTAGGTTCCCTTCATTTCTTCTCTGCTCCCAATTCTTTACTATACTTTTCACTTTTTAAGGGTAGCTATATTTAATACATAAAATATATTGTATGCAAAATTATACATCAAACAACAGAGAAAATAAAACCGAACAAAAACACTAGCATGACCTTACCTCCCAGTGGCAATGGAACTCTTCTGCCTTTAGTCTCCATCTTTTTTCCATGCATTTAATAGTGGAATCTATACTGTGTTCTCTAATTTCTACCTTGCCACTTATCTTTCTATCTCTGCATCCATCTACCCATTTATTCACAGTTAATTTCAACCAAATGCCCAGTAACTGAAGTCACAGTTTAGAAGCATGACATAGATGCCACCACCAGCAAGAGTGTAAATGGGTATGGCTTTTTTTTTTTTTCTTTTGAGACAGGGTCTTGCTACATTGCACAGGTTGGTCTCGAACTCCTGAGCTCAAGTGATCAGCTCACCTCGGCCTCCCAAAGTGCTGGGACTGTAAGTGTAAGCCACTGCGCCTGGCCAGGTATGGCTTTTGGAAAAGCAAGTTGGCAGTGCAGTATACGTATATAGGAATCTCAAATAGTTCCCAACCTCTAGCTCAGTAACACTATTTCTGGACTATTTCCTAAGAAAACAACCAAAAAACAAAAGGCAAAAATTTTAATGCATAAACATATATACTGCAGTATGATTTAAAATCATTCAACACTGGCAACAATGGAAATACCATATTTTAGAAATAAGAGGATGGTCTGATACATGCACTTGAAGAATATTTTGTATCAATTAAACTTTAGAAGTCATGTTTATAAAGACCTTTTATTAACATGATAAAATGTTTATGATACAACATTAAAACAAAAAAATCAGGATACAAAATGGTGCACACAGTTATATCCAAACTGTTTGTATAAAACACAGACATAAAAACACTAACCATGTTATCTCCCCATGGTGGGATTATGGGTGACTATTAGGCTATTACTTCTGCTTGTCCGTCTTTTCCAAGCTTTGTACAGTGAATATGAATTACTTTTATAATAAAAAAGAAGTTTATTTAAGGATTTTAAAAGTTACATACAAGCCAGGGCCAGTGGATGATCTTGTCCAGTCTTAAGGCAATGAATATAAACTGGAGAATGTTGACAGAACACAGGATTTCTAACTAAAAATGAAGAGAAGAATCAGTTAAACAAAGTATAATTTGCATTTAATACTGCAGTAATTTGGTTAACACACTAAAAGACAATACACATTATAATACAGTGTAACTTGTATAATATTATATTGCACTGGAAACTCCTGATTTTCGGTACCAGAGGGGCACAGCAGTATTATGGCAAGGGGAAATGGGGCTCAGCCTGCTGCCTTGCCCCTCTGCTGCCTTGCCCCTCTGCTGCCTTGCCCTTTCTGGCCTGAGGCCTGAGCACAGATGAGAACCTTGTTTCCAGGTTCACTGGGAACACAGGCTAGCTGCAATAGACCACTAAGCTTCCTTCATGTCCCTACCAACAATACTGTCTACTCGAAACCATCCTTGTGCATCCTTCTTCTCATACCTCGGTTTTGCCCTTGGACCCCACCCCTTCCTTTCAGGCTCCTTGAGGACCTTGTCACATCAAGCCATCAACTGTAGATTCCGTACTGTATCTTTAACCCCACCTATTCCATTGATCCCTCCTAGCAGCACAGAAATGACTCCTTCTCATTAAAAACAAAAGCCCCGAAACCAGATAAACCCACCCCATCCTTTGATCCAAGACCCCTCTTAGCCTCAGGTTTCTCTCTGCGAGACAGTAATGAATATGCAAGCCCCTACTATCTCATCTACCATTCCCAGCTTCCTTGTGATCTGGCCTCCACCCCCATGACCACCAGAAGACAGCTTTCATCAAAGTCACCGATAACCGGGAGGCAGCAAAAACACTTCAACTGGCATCCTACTGATACTGTGGATGACTCTTTCCTTTTTGCTCCCCTCCTCCTGGCCATCTAGGATCCCACCATACAGTCTTGGAGCCCTCTGGAGTGTTCTGGTAATTCCTTCTGTCTCCTTTTCCCCCTCCTTAAATGGTGATGCTCCGCAGAGCCACAAACTCAGTCTTCTCTCCCGACTAGGAAGACAGACACACAAACACACATATTTTCACTCACCCGCTGCACCCTGGACTGGGGGAGAAGAAATTTCAACCAGACCCTTGGTCTGCATTATTACCTCCTTCATGGTTAGTTCTCACATCTGTCTCCAACTTAGCCTTCCTGCTGCATAGCAGACCCAGCTGTGCATAGGCCACTCGGCTCTCCTAAAGGCACCTCAAATGAAGCCAGTCCCAAATAGAGATCATTATCGCCTGCACTTAGAACCTTGTATCTCCTCTTTCTATAGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATAGCCTCCCATGTCACTTTCTATAGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTATGGCCTCCCATGTCACTTTCTATGGCCTCCCTTGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATGGCCTCCCATGTCACTTTCTATAGCCTCCCATGTCACTTTCTATAGCCTCCCATGTCACTTGGAGGCACAACAGTGGTCTCATTTCTCAAGCCATGAGCCCGGGAATCACCCTGCATTCTTTTTCCCACACTGTCACGTTCAGTCACCACATCCTGTCCACTGTAAAGTCCAGATTTCTCCTGAATCCCGTGCCCACTTCCTATCCTGATGATGACTGCCTAGTGAGGCCTTCTTCGGCACCATCCGCCTACCACTTCAGCAACCCCTTACTAACCTCCCGCCTCCAGCCCAAACGCCCTGCAGGCTGCTCCTCACTCTGGTAAAGTACATTCTTGCCCACAAAATTGAAATCTGGGACCAGGCCCAGAGCTTCCCAAAAGTTCTCAGTACATAGGTACATGGGGAATTTAGTAATTCCTCCATAGACCTCTAGGCCAAAAGAAATCCCTAATGTGGCTGGGAATGCCAACAGTTGCACTGATTTAAGTAATTACGTCTAACTTAGTAAGTATTTCTAAGTAGCCACCAGAAAAAATAATTCATATAAATCTAAAGAAAAATGTTAATATTATTCTTAAATAACCAAAACTAATTCCCAGTGGGATGCGTGTGCCTGTCAGGTAGCTCACCATTTCCCACGCCTTGGAATCAGACAAGGTGCTCCCACTCGTTACCTGTTCTTCACCCGGATTTTCACACAGCATTAGCCTTTTTTGTTTTCACAGCAACTGCTGAAAACCCAGCTTCTTAAAGATACGACGTCACTGAAAGGAATGCAGTGTGGCCTAAAAATAAACCTGTAAACTATTTCAAGCTAGTAGTTTGTATGGTGTCCCAACAAATGTCAGGTATTACTGTTTTCCTCAAAATGTCCACTATCCCCTGGTGCCCTGTGAGTGCACTGGGGTGCCTGGGGCACTTCAGCACATGCTCTGTGGGCTGAGGACGTGGCCCCTATTTGCCCCAATCCCTCTGTAGAGGCTCCCTCTCTTCTCAACTCCCACAGTTCCTTCCTTTCCACCTTGTGCTCCCGTCGGATCTGAATCGCCACAGTCCACTCAGCTGATGGAGTGTTTCCTGCCTCTAGGCTTCAATGTGTCCCAAAAATGCCATCCCTTCCCTCCCAACACAGGGCTTCCTGGCAGACCCTCAACTCCTCCTTCCCATCTCTGTATGAGCCTACTCCAGACCACCCCCTCACCAACATAGGTACTGTTCTTGCATCACAGGAGGAGGGAGCTCAGCTCCTGGTATGTTGTTTCTTCCAAGGGCAGGAATGCCTAGAGTGTGAATATATGAAAGACTTACTTTCTGTAATTCAGGCCCAATGCAGTCCTAGTCCTTGTATAGTTGTCCCTCAGTATCTGTGGGGGATTGGTTCCAGGACCCCCTGTGGATACCAAAATCCATGGATGCTCAAGTCCCTGATATAAACTGGGAATTGTAGGGAGGGTGAAAGTGGACCATCAGATACTCTCCCTCCACGGCTCACTCCTACCTACCACTATACCCATGCTGTGGCCTTTCCACCAGCATGAAAATCAGGGAATAGCTCCCCTTGTTCAAGGCCAAACCCTTCCTGGTGCTCTAGGTCCCATTCTCCCTGAGTCCCACAGGGCCTTGCTCCATCACCGTCCCTCTGTCTACATACCTTCCCCTCACGCCCATACCTGGGGTCTAGGCTTATATGCCTGACCCACCCTACAGCTGCTATGTTTACTTCCTAAGCCAACTGCAGTCTTCTCCCTTCACTCTTCATCCACACGGCTCAAAACCAGGCATCTACAGCCTCTAGTTCTCCCCTCAACCCATCAGTATTCAGCTTTGGGCCCTCAGCTTCTATGCAGTTATGTAGTTATGTGCCCGGGCTTTAGAGTCGGGCTGACTCAAACGGAATCCTGGTCCTGCCCCTTCACATGTGGCCATGAACAAATGACTTATCTTCTCTGGACCTACCTCACAGAGTTAGTTAGTAAGAAAACTACCACATGTAATGTGCCTCGCACAATGCCTGGCACACAGTAAGTGCTCAATAAACGTTATCTGCAATTACTTTCATTACTATTATTACTAGTCCTGGTATTTTATTCATCTGCATATCCTCTATGCTTAGGGAAAAAGGGCTTGGCATCTAGTAAATACTTGATAAATGTTTATTGAATGAATAAACAAACACAGGGGCACATCAGGATAAGCTAACCAGACAGCAGGGGAGGTGCTAAATCATGGGGTCTGAGGTGGGGAGATGGTCAGTTTTGAGTGTCAACTTGGCTGGGCTATAGTACCCAGTTATTTAATCAAACACTAAGCTTGCTGTTGCAGTGAAGGTACTGACTTTGAATAAAGGAGACTACCCTCCATAGCATGGGTGGGCCTCATGCAATCAGGTGAAGGCCTTAAAAGCAAAAACTGTGGTTTCCTGGAGAGGAAAAAATTCTGTGCCAGGACTGCAGTGTCAACTCCTCCTAGGTCTCCAGCCTGTTCACCTGCCCTGGAGATTTCAGACTTGCCAGCCCCACAGTAATGTGAGCCAGTTCCTTAACTCTCTTTATACATATATCTGTATCTAACCTATCAGTTCTGTTTCTGTCTGATTGATACAGGATGTGAAGCTGGGAGAAGGCTGATGTCCTGGGTGAAAAGCTAGTGTTCTAAGTGAAGAAAGAAAAATTGTTATTTCCATCATCTTTTGTTGCCTTGTTGTCTCATGATGTAGAGTTGGTAATGATCAAGCTCTTCCTAACAAAGGGTAAGAAATTGACATCTGAATAACTGAGCAAAATATTTTACTTTTGAAAACATTCTTTTTTTTTTTTTTTAAGACGGAGTTTCATTCTGTCACCCAGGCTGGAGTGCAATGGTTCAATCTTGGCTCACTGCAACCTCCGCCTCCTGGGTTCAAGAGATTCTCCCGCCTCAGCCTCCGGAGAAGCTGGGATTACTGGCGCATGCCACCACACCCGGCTAATTTTTGTATTATTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCAAACTCCTGACCTCAAGTGATCCACCTGCCTTGACCTCCCAAAGTGGAAAACATTCTTAAATATATGAAATCTCACCTCTAGTGACCTGTCATGTCGAAAGCCCCAAACGCAAGCTGCAACAGACACCGGGGAAACAAAGAACAGCGGCATGAAGACCAGGAGCCAGAAATGGCTTCCTCTCTCGATTCTGTCACAGACCAGAACTTCAAACATCAACAAGAGCAAGTGGATGCCCACTGCAATCAACATGGCTTTAAACTCCACACACGTTTCTCCTTCTGCTCTAAAAAAGGGAGAGAAGAAGAAAACACCCTCAGTTCAGAATCTCCACTATAAGCAAGCAGTTCAGGGCAAATACCAACTTATATTTATACTTTGAATTTTACTTGAAAATTTGACAAAAGCAAAGGGAAATCAGGTAGAAAGCTAACTTAAACCTAAGCTTTGGTAGGCAATCTCTGAAACATCGAAGAACTACTACATAATACAAAATGAACATTACAACCAAACCAGAATTTAATGTTTTAACCGTATAAGGATATTCTCAAAAGTAATAGCCAGTTCTTATTTCCCTGACAATGTACATAAACACTTCTGTTCACATCTTTAAATTCAACAACAAGAGTTACTTCCAAGATTATTCAAGCTGATTTGCTTCTGCTGCTAAAACCAGGCAAATACCCCTTAAGTCTCATGATCCTCATTTTTCAAGAAACCATGTAAACCACCCTTCACACAATATTATAAAAATAACTCTAGTTCTATGAACAAGTGCCAGTTATATTTCAAGATAGTAGTAACTATTGTTAGGTGCTATTTTTAAATGCAAATAAAACGTATAAATGATTTTCATTTTCCTTTCTATTCCATTAAGATAAATTAACACCTGCAGATGAAAGAGAAAGAAGAACAAGAGTTAAAACTGTTCTCAAACAAAATCAGTTTAATTAGCTAAGTATCATGCACAATAACCTTAACAGATCTACAATTGAGCAATGGTAAGGCCGCTAATCAGGAAAAGGCTCTATAATGCATCTGAAAGGCCTACAATGTTTATTCAAAATACAGATGAACATTTATGATATACATGTATTGTGGGTGACAAATACACCGGAAGTTAGATCATAGAGAAAATGCTATCAGAGGTTATTCCTGTAGGACCCAACCATGTTCCACTGGTTAATGTTAACATGAGAATGACCACGCCTGTACATTCCTTACATTCAACCCCACATACACAATTCCTTTCCTTGCTCAAAACATCTTAAATAAGACCAACAAAGAGAAGTTTGAATATATTCTAAATATCAATTAGTAGAACCTAAATGTTTATTTAACTTTGCATTCTTTGAGAAGCAATTAATATTAGATATCTGAAAATATCTCATAAAAAAATAAAACACATACAGCCACACAGGTCATCAATCTTTCAAAAAAAATCTAAGAACTCTGAAACAGCTATACATGAATGTCCCTCACCTGCCAAGGCTCTCTGTGTAACTGTTTTACAGTTCTTAGACATGTATGTGATATGTAATTTACACAATCTGAATCATTTTCATATTTAGTAAACAAAAATTTAAAAAGTTGATGTAGTGGCCGGACGTGGTGGCTCACACCTGTAATCCCAGCACTTCAGGAGGCCAAGGCGGGTGGATCACCTGATGTCAGGAGTTCAAAACTAGCCTGGCATGGTGAAACCCTATCTCTACTAAAAATACAAAAAATTAGCTGGGCATGGTGGCAGGTGCCTGTAATCCCAGCTACTCAGGAGGCCGAGGCAGGAGAATCACTTGAACCCGGGATGTGGAGGCTGCAGTGGGCCGAGATCACACCATTGCACTCCAGCCTGGGCAACAAGAGCGAAACTCTGTCTCAAAAAAAAAAAAAAAAAAAAAAAAAAGGTGCGGTAGTTTCAACTTTACACTTTTCCCACATGAGCAGCTGCCTTCTGGGAATTCCTGTACTCCTCATTTTCCCAGTGGAGGTTCATAATAGCCTCCCAGTCTTAAGTCCCCCTTTTTCCCTTTATGTAGTTACAGTCTCTGTGGCAGAGAAGGGAAAGCCTCTCGCAGGTCCCAGCAACAAGCAGGTTGCATGCTGGCGTGAGCCAACTCCCAGGAATTTGGGCCAGCGGAGGCATCCAAGAGCAGGGAGGGCAAGAGTTGGAAACGGTAAAGGGTACCCCTCTCCCCTGCCCCCAAAGGCTCTGCTTTCCTTCTCGGCATCCAATCTTTGACTTTCCTCATTCCCCAGCTGCTGTCTCAGGGACTCATGGTCTCCTGGTCAAGCCACCTCCCCTACACTGCTAATAGTCCTAAAGCTCTGGGACTAGGAGGGTGGGACAAGGGGAGCCCCAGTTCCAAAACTGTATTGGAGAAAGATCTTTCATGACCAAACATAATATGAGTGTCTTTTCCTAAAAACAGGGTGGTTTCATGCTGCTTAGTCTAGTATGGCATACCAGTTCTGTATTTTGGGTGCATTTTGGATTATATAGGCATCTGTGAGCTGGCTGGCAACTTACCCCCAAATGGCACTGCTTTTGTAAGAAAATACATACCAAAGACTAAATTTTCAAAAACACAGAAGAGATCTGTTAGCTTATACTATAGTTCTAAGACCCCAGATAGGTAGAAAATAAAATGGTCCTTACTTCATCAAAAGTGAGAAAAGTCAAGATATTGCTCCCTCATGCTAGAGACCAATGGGTTGTATAAAGCAGTATTACCGATATTGAGGATTTCGTGCCCAGACTCCAGTTCCAACTGAGGCTCCAACAATGACCATTAACTTCCACAGCCATATTGGAGCAAAGACAGCCCAGTAACTCCACTGTATGATGCCATCCAAACGAAGGGCCAGCAGCACAGAGAACAGCAGCAGACAGGCATAGATGAGGAATTTACTAGGAGAAAAGTAAAACGATTAAGAAGGATTCACTTTTACAAATATGTGATACTGAAATGGGGAGTAATAAGAGCCACATTTGTCAGCATGTAAAAGGAGTCACTAACTCAATAATCATTTATTGAAAAGGTCTATGGGGCAGACATAATGTGTTGGGAATAAAAGAAACATAAAGAAGACTAAAATAAGTCTCCTGCTTTCCAAGGCTTCCTCATAGAAGGAAGACCACACAGAAACATATAATACAGCACAATGTTTGTGATGAGAGCTTGGAAGAGGAAATACAGACTGTGTCTGAGGAGGCACTCAGAAGCAGAGATGTGGTGACCCTAGAGCTGGTTCCAGAGGGCAAGTAGGAAGCTGCCAGGCAGGAAAGCAAGTGAGATGAGGAAGTATTCCAGGCAGAAGGAACTAGCTATACCAAGACACAGAGACTGGAAAAGGCTGACATGCTCTGAAAATGGTCAAGTTCTATCACTAACTGATTCTATTTCTAAAAAGGCAGCCATCTGTCATATTCATATGGCATGAACATTTTAGTGTATGTATTATACTTTCCATGAATGAATAAATTACACATACACACATGCTCACGTCTCATATAAAAGGGAATTGCTACAGAGGATGTCCTTGAAATAATTAGAAATTATACTCTTGAGGACCTCTATTTCCAGCCTTGACTTAATAATAGGAATATAATTTACCTTCCCGCCTAAATAAGAAGCTTGATACAGTCTACAAAAGAACAGTTTTCAGACATTGACAACAGGCAGTGGAGAACAGGTGAGAAGGAGGAAATAAAGGAGGTAAACCCTACTATTGCCCCAGTTTGCAGATCAGAGGCAGTTTCCAGGCTGCAGCAAGAAAACAGTTAAAACTCAACCCTGTCAATAACTATATTAAATATAAATGGCCAACTGAAAGACAAAGATGATCAGATTGGATAAGTAAGCAAGACAACTATATGCTGTCTGTAAGAATCCCACTTTATCTATCTATAAAGACACAGATAGATTAAAAGCAAAAGGAAAGAAAAAGTTATACCAAATAAACACTAACCAAAAGAAAGCTGGAATGACTATATTAATATCTGTTTAGTCTTCCATTGCTGCTGTAACAAATTACCACAAACTTAGCAGCTTAAAACAACGTAAATTTATTATCTCACAGTTCTATATGACAAAAGGCGAGACGGGCTTGGTTGGTTTCTCTACTCAGAATCTCACAAGGCTGAAATAAAGATGTCTGTTGGTGGAATGCTTATCAGGGGACTCTGGCAGAATCTACTTCTAAGCTCATTCAGGTTGTTGGCAGAATCCAGTTTCTTGTGGTTGTAGGACTGAGGTATGTGTCTCTTTGCTTGCTGTCACGCAGCAGCTGATCTTGCGATAGTAGGGGCCTCTCCTGGGTCCTTGTAAATAGGCCCCTACATCTCAAAGCCAGTAACAAGCTATAGCATATTCAATCTTTCTCATGCTTGGGATGTTTTCTCACTACTTCTGCCATATCGCTTCTGCTTCCACTGAGAGAAAGTTCTCCGCTTTTAAGAGTTCATGTGATGAAACTGAGTCCACCTGGTTAAGCCAGGCTACTCTCCCTATTTTAAGGTCCATAACTGTGGTTGGTAGGCAGAATTCTAAAGAAGTTTCCCAGGATTCCTGTCCCCTGATTATTCAATCAAACACTAATCTGAGTAATACTGTGAAGGGACTTTGCAGATGGAATTAAGGTTACTAATCAGCTAACTTTACAATAGGAAGATTATACTGGATTATCCAGGTGTGCCCAGTGTAATCCCATAAGCCCTTAAGAAAGCAGAAGAGTAAGTCAGAGAAATGTGGTGGAAGAGAGATGAGGCAGAAGTCAGAGAGATTCCAGACTTGAGAAGGATTCAGCCTGTTACTTCTGGCTTTGAACATGGAGGTAAGGAACCATGAGCCAAGGAATGCAGGCAGGCTTCAGAAGCTGAGAATAACTCGCAGCTGACAGCCAGCAAGGTAAATGGGACCTCAGCCCTACAACCCCAAGGAACTAAATTCTGACAATAGCCCAAATGTGCTTGAAAGCAGATTAATCCCTGGAGGCTCCAGAAAGGAATAGAGCCCTCCTGACACTTTGATTTTGACCCTGTGAAACTAGGCAGAAGACCCATCTGAGTTGTGCTGTACCCGGACTTCTGACCTAAAGAACTGAGAGTAATTTGTCATGGTAACAGCAGAAACGAATGCTAATAAATATAGCTTCAAAGTCCCCTTTGCCATGTAAAATAATAACATATTCACAGGTTTCAGGGCTTAGGGCCTGGGTATCTGTGTATTTGTGGGGGTGGGGCATTCTGCCTACCACAACATAAGACACAGTATATTTTTGAACAAGGACTATTTCCAGGGACAAATAGAGGTAGTTCATAATGATAAAGGGGTCAATTTGTCATATGCCTAATAACAAAGTTTCACAATACATGTAGAAAGTACTGATCAATCTAAAAGGAGAAATAAAAAAATCAAACTGTTATAAATGGAAATTAACATTCCTTTCTTAGTAACTAATAGAACACATAAACAGAAAATTACTAAGGATATATATGATTGTAGCAACACTATCAACCAACTTGACCTAATTAATATTAATGAGTTCCTCCCAACAAAAGCAAAATACAGATTCCTTTCAAACACACACGGAACATTCACCAAGATAGATTGAATTCTGGGCCATAAAACAAAGCTCAACAAATTTAAAAGGACTGAAATCATACAAAGTAAACAAGCACAATGGAGTCAAACTAGAAATCAACAATAGAAAAATATCTGGAAAATTCTCAAAATACTTGAAAATTAAATGCCACACTGCGAAATAATCCATAGGTCAAAGACTATGAAGAAAATTGAAAAATATTTTGGACTAAAGGCAAAAACACAATATACCAAAATTTGTGAGATACACTAAAGCAGTACTTAAGGGAAATTTTAGCATCAAATACTTACATTAGAAAAGATATCAAGTCAATAATCTAAGATTCTATCTTATGAAACTAGAAAGAACATGGAGGTAAGGAACCTCCATGTAAGAAACGGAAGGAAATTTTTAAAAAGTAAATGGAAAGAAGAAAATGATAAATGTAAGCACATTAATCAATAAAATACAGTAAAAAGGGATTAGAGAAAAAAATCAATGAAACTAAAAGCAGTTTCTTTGAGAAAGTAAGAAAATTGGTAAATCTACAGCCAGAATAATCAGTAAAAAAGAGTAGGCTCAAATTACTAATATCAAGAATGAAAACAGGGATATCACTACAAATCCTAATAATACTAAAGGGATAAGTAGGGGATATTATAAACAACTTTATGCCAGGAAATTTCTTGAAACAAAGACATGGAAACTGCAATTCTAGTTAAAACCTTTTTCAGAAAGAAAATGTCAGGCCATGGCAAAATCTACCAAACATTTAATGAGGAAATACCACCAATTCTTCACAAACTCTTGCAAAAAAGATGAGGGGGGAACATTTCCCAATTTATTTTATGAAGCCAGCATTACCCTGATAACAAAACTGACAAAGAAAGAAAACTACAGACCAATATCTCTCATGAACAGAGATGCAAAAATCCTGAAAAGATTTTAGCCAACTGAATTCAGCAATATATAAGGATACTACATTACGACTAGGTGTGGTTTAGCTAGGATTGCAAGATTGCAATCTTGGATTAACATTTGAAAATCAGTTAGCAACCTTCGTGGTATTAGCAGGCTGAAAAAGAAAAATCATATGATCATCTCAATAGTTGTGTAAAAAGCATTTGACGTAATTTGCTACCCATTCAAGTTAAAAGCGTTAAACAAATTAGGAAAAGAAGAGCATCTATGAAAAACCTGCAGTTATAATGCTTAATGGTGAGAGACTCAATACTTTCCCCTTAAGACAGGAAACACAGCAAGTATGTCCACTTCAACACATCTATTCAACATTGTACTAAAGGTCCTAGCCACAACAATAAGATGATAAAAAGAAATTAAAAGAATATAGTTTTAAAAGAAAGAAGTAAACCTGTCATTATCTATAGGCAACATGATATTCCCTGTAGGAAATCCTAAGTAATCTAAAAAAAAGCTAGTAGATCTAGTAAGTGAATTTAACAAAATTGAAGGACACAAGGTAAATTTACAAAAATTATTGCATTTCTATATAGTAGCTATCACAAATTGGAAACAGAAATTTAAAAATATGTATCATTTACTGCAACTTTCCTATAAATTTATAACTATTCAAAAATTAAAAGTTTATGAAAAAATCCACCATTTACAATAGCATCACAAATATAAAACACTTAGGGATAAATGTAACAACATATATGCAAAACCTATATACTGAAACCTACAAAACACTGGTAGGACTGTGGGGAAGCTAATAAACAACAGAAATTTATTCCTCACAGTTCCAGAGGCTGGAAGTTGGAGATCAGGATGCCAGCATGGCTGGGTTCTGGCGAGGGCTGCCTTCTGGGTGGCAGATGGCAGACTTCTCAATACCCTTCACATGGTGAAAAGAGAGTGAGTTAGCTCTCTGGTCTCTTTTTATAAGGGCACTAGTTCCAACCATGAGGGTTCCACCCTCATAACCTAATCACCTCCCAAATGCCCCACCTCCAAATACCATCACACTGGGGACTAGAGTCAACTTGTGATTTTTAAAGGGACACATTCAGTTCATAACTGCCAAGAAAAATTAAAGATCTAAATAAATGGAGACATATACTGTGTTCATAGAACACTCAATACTGTTAAGATTTACATCCTCTGTGTATTAGTTTCCTATGGGTGCTGTAACAAACTACCACAAAATTGGTGACTTAAAATGGCACACATTTATTATCCTGAGGCCAGAGGTCCAAAAAGGGTTTCACTGGGCTAAAACCAAGATGTCAGCAGGACCTACTCCTTCTGGAGACTCTATGGGAGAATCTATTACTTGTTTTTTTCCAGTTTCCAGAGCTGTATTCCCTTGGCTCATGGCCCCTTTCTCTATCATCAAAACCAGCTGCATAAAATCTTCAAATCTCTGTCTCTGTTTCCACCACACTGCCTTCTCCTCTTAATATTATCTCCCTCTTTTAAGGGAACCTGTGATTGATTGCATTTATTGCCCCACTTGGATAACCCCATCATCTCAAGATCTTTAACAGGTTCCATGGAAGTGGGTATCTTTGTGGGTCATGATTTAGCCTACCACACACACCAAATTAACCAAATTATTTTAAAGAATCAATGCAATCCCAGCGAAAATTGGGAGCCAGCAGGTTCTTCTGTAGAAATTGACATACTGACTTTAAAAGTCATATGACAGTGCAAAGGACCTAGAATGGGCAAAACAATTCTGAAAAAGAACAAAGTTGGACAATTTATACTACATGCTTTCAAAGCTTACTATAAAGCTACAGTAATAAACACAATGTAGTACTGGCATAAGAATAGATATATAGAACGTACTAGAGAGTTGGAAAACAGACCCATACATATAGAGTCAGTTAATTTTCCACAAAGGTGCCAAAACAACTCAATGGAGAAATAATTATGTTTCAATAAATGGTGGTACAACTGGATAAATGAGGAAAGTACACCTTGACTCTTAACCTTATAACATATACAAAAATTTATGTGACATGGATCATAGACCTAAATGTCATACACAAAACTATAAAATGTCCAAAACACATATGAAAAAAAAATACTTGCAACCTACGGTTAGGCAAACATTTCTTAGATGGGATATGAAATTGGACTTCTCCAAAATGAAAAACTCTTACTCTCCAAAAGATACCTATCATTAACAAAATGACAGCCAAGCCACAAACTGGGAGACCATACTCTGAAAACACGTATCTGACAAAGTAACCTGTATCCAGTATATAAAAAGGACTTTTACAACTAAAAAAATAAGTGAACAACCCGATTTAAAATGGCCAAAAAATTCAGAGACATCCCATCAAAGAAGGCATACAAAAAGCAAATAAGCACACAAAAAATACTCAGTATTTTCAGATGTTAGGACAATGCAAACTAAAATGACAATGAGATAAGATTACACACACACTAGATCTCTTATATATGCTGGTGGGAATGCAAAATAGTACAGCCACTTTAGAAAACATTTTGGAAGTTTCTTATACAGTTAAGTGTATACTTATTCTATGGCCCAGCAATCCCACACCTAGGTATTTTACTCAAGAAAAAGGAAAACGTATGTCCACACAAAGGCCTGTATTCAAATATTCCAAGAAGCTTTATTCATAATTGCCACTGGTAACAACTCACACATCCATAAACTGGTGAATGGTTAACTGAATTTTGATATATCCGTTCAATGGAATACTAATCAGCAGTAAAAAGTAACAAACTATGGATGACAACAACAACATTGATGAATCTAAAATGCACTATAAGTAAAATAATTCAGATGGAAAACACAAGATAGTGTACAATGCTACTTATTTGACATTCTGGAAAGGGCAAAACTCTAGACACAGAAAAACAGATCAGTGCAGTTACAGATAAGGAACAGAAAGCTCAGATGTTAAATAACTTGTCTAAGAGCTCAGAGTTGGGAGTCAAACCATTTGACTCCAAAATCCTTCTAATTGAGTATTATACTATACCGGCTTTACACTGATAAATGAACATGTTTAGAAAAGCATCTGTATAAATATCTGCTGAGCAGCTAACAGGTCTCCACTCGGGAAGATTAAGGATAGTCTTTATACTCTTTTGCTTATGATATCTGAGTTTTCTGTAAGGTACGTGTACTGTTTTTGTACTATGAAAAATAACATAAGAAACTTCGATTGGAAAAAAGGGAGGAGGGTTGTAAGAAGACTGGACCTACAAAATTAAACTTGGAAGCAAGCCTCCCAGAGAGAAAACTACGCTGGGAAGATCATGAGATTCAGAGGGGTTGCTAAAAAAATATACATAGCATCATTGTGTCATGGTATTCTTACCCATAAAGTTAGGGCAATAATATGTACCTCATAAGGTGGCTACGAATATAAGATAATGAGTATAACAAGTGCTTTGTATAAAGCACTAAATCAGTGGCTCTCAAAGTTTTTAGTATCAAAAATTTTGACGTTTAACTCTTAAAAAGAACTCCTTTTCAGTTCTAACACTTACTGAGGATCCCAAAGAGCTCTGGTTGATATGGGTTATCCCTATTGGTATTTAATATTTACGATACAGTTGTTATTCAAAAAATATCTACTCTTCGAAGGTAATAATAAACACATTACATACTAACATACATAACATTATTTAAAAACATTTCCAAAACCAACACAAATTTAGTGAGAGTAGCGACACTGTTTCTACAGTTTAGCAAAATTTTTAATGTATGATATCATAGAAGACACCTAGATTCTAATATCTATATTTACATTCATTCTGTTGCAATACATTGTTTTGGTTCAAGTATACAAAGAAAATCTGGCCTCACAGAGATCTGTAGTTAGAAAAGAACTATTTTAATAGCTCTTTCAGATAATGGTAGACGTTCTTCAACAGGACACTAAAACTCAACAAGTGGTAGTTTCTTAAAGGTTAACTATGATGTGGAATCTGAAATCGTATCAATGACCTTTTTGTACTCTGCTACACTGAAATCCACTGGTCTCTCTTATACTTTCAATGAATCTTTTATCCCAGCATTATTGTATAATGCAATGTAGGTCGACTGGAAAATATTGGTTTACTGAGTTATAAAGATCTTTCAAATGTTGACACATTTCATATACAATAGCAAAAAAAATCACATTACTTACTATCTTCACCAACTTCATCAGAAATATTTTAATTACTGGGAGGTTGTCAAGCTCATGGTAGCAGATACAACTTTTCAAAAATTCTGGTTTTCCCATGAAAGATCAAATTTTACCATTTACAAAAAATACCATGAGTTGTTTTCCTTGAAGTTTTGCTTCCTGCACTTTCTTTTTTTGTGTGTGTGATAGAAGCATCTTTTATTATAGTATTTTTGTCTTTTTTTTTCTTTTTTTTATTATTATTATACTTTAAGATTTAGGGTACATGTGCACAATGTGCAGGTTAGTTACATATGTATACATGTGCCATGCTGGTGTGCTGCACCCATTAACTCGTCACTTAGCATTAGGTATATCTCCTAATGCTATCCCTCCCCCCAACCCCCACCCCACAACAGTCCCCAGAGTGTGATGTTCCCCTTCCTGTGTCCATGTGTTCTCATTGTTCAATTCCCATCTATGAGTGAGAACATGTGGTGTTTGGTTTTTTGTCCTTGCAATAGTTTACTGAGAATGATGATTTCCAATTTCACCCATGTCCCTACAAAGGACATGAACTCATCATTTTTTATGGCTGTATAGTATTCCATGGTGTATATGTGCCACATTTTCTTGATCCAGTCTATCATTGTTGGACATTTGGGTTGGTTCCAAATCTTTGCTATTGTGAATAGTGCCAGTTAGAATGTCAATCATTAAAAAGTCAGGAAACAACAGGTGCTGGAGAGGATGTGGAGAAATAGGAACACTTTTACACTGTTGGTGGGACTGTAAACTAGTTCAACCGTTGTGGAAGTCAGTGTGGCGATTCCTCAGGGATCTAGAACTAGAAATACCATTTGACCCAGCCATCCCATTACTGGGTATATACCCAAAGGACTATAAATCATGCTGCTATAAAGACACATGCACACGTATGTTTATTGTGGCTTCCTGCATTTTCAAGAAAATGTCTGCCAAACACCATAACACAAATAACCACAGTCTTGTCTGTCAGTTGTCCTTCCAAATAAAAATGATACTCCATGAAAACAGCAGCTTGTAACTCAGGCACACACGTTTTTCCTTGAGTCAACCATTGCACTTCAGCATACAACAGGCCTTTATGCATTCTTCCCATTTCATCACATGGAATATTAAACTAGATGTGCATTCAGGGGTCAAGATTAAATGAGATTAATATTTTTCTGCTTTATCAAGGACATTCGTAAGTGAAGCTGGCATTTTTTTTTTAACTGCAAGTCCAGCACGTGGTGGTGAAGAATCCAGTGACAGAAGACTAGTGCAGCTGGTGCCACTGTCCTGATTTGTGCTCCAGCACCAGCTGTTTGATGCACTACTTTTGTACCACCAGTGCCAATGTCGACCAAGGCAAAGAATGTCTTAGTATTATAATTTTGACTTTGCAGATACCTGGAAAGGGTCTCAAGGCCCACTCCCAAGGTCTGTGGGAAACAGTCTGATAACCAGCGTACTCAATACACATTAGCTAATATTATTAATACTCGAAAACAAAAACAGGCTTTTATAAGCTGCTACAAAGAAAGAACATTTAAAGAAACCAAGATAAAAATGACTTCATCCCTTAGACTTTGGACAAGAAGAGAGCCTGTGTCAAGGTGATTCACAAGTTAATAGAAATGGAGTTATCACCACAATTACAGTACACAATTAGTAGAGGCAGAAGTTTTCTCTCTTAAAGCAGAGGGAAATATTCCAAAATCTAAAAAAATCAGAACCAATTTGTCAACTAAAGCCTGTGGCTACAAATATAATTACCAGAAACAATGGACTTGAAAACAGACTATTTAAAAAAGAAATTAGTGGATTCAAACATTAACAAATGCTAAGATAATGACGACACAGGATTCATATGTAAATTAGATAACATGTACAGGTAATTTTTATCCTAAAGGATTTTCAGACTATAAATAAAAAGTAAATTGGTGGGGGGGGTGGTAAATGCTGTAAGCTAACTTCATTATCCTCTGTAAAACAGTTTCCTTATTTAATAAAAAAAGAGAGTAGTGGAACTAAAGAAAAGAGAAGTACCAAAATGTTTTAAATGTTGGAAAAGGGATTATTTTATGTGGTTTTTGACATAATTTAAAGGAAACTTAAAATTTTATGTCTATTACATGAAAAATAGAAAACAAGATAAACTTACAAAAGGACCCACAAAATATAATTCATGATTCAAATACTAGGGTTAGAAAAATATACATGAGCTGAATGTTCTCTTATTAATTTCCTATTGCTGCCATTCAAAACTGCCACAAACCTAGTGGTTTAAAAAGCATAAACCTAGTGGTTTAAAAGGCATAACCCTTACAATTCTGGAGGTCAGAGTCCAATATGGGTCTCGCTGGGCTAAACTTGAGGTGTCAGCAGGACTATGCTACTGAAGGCTCTAGGAGAGAATGTTTCTTTGCCTTTTCTAGCTCTAGAGGCTGCCACATTCCTTAGCTCATGGCCCCTTCCTCCATCTTCAAAGTCAACAATGGAGAATGCAGTTCTTCTCATACTGAATCACTCTGACCTCCTTTTCTGTCTCCCTCTTCTATATTTAAGGGCCCTGTAATTACATTGGGCTCAGCCAGAGAATCCAGAATAATCTATTTTAAGGTCAGCTGATGAGCAAACTTAATTCCATCTGCTACCTTAATTCCCCTTTGCCATGTAACAGAACATATTCCCTGGTTCCACGGATTAGGATGTGGACATACTTGGGGGCGTCATTATTCTGCCTACCACAGCTCTGTAAAAAGAAAGGTTGCTTCCTAGATTAAGAAAGTAAACCTGTTACATTCAAGAGTTACGCTTGGAACCAAAGTCAAAAGTCGAACCAACTATCAGAAATCAGATGACAGAGGTTTAGCACGCCTGGATGAAGACAAGAGCAACAATTTACTATCTTTAAAATTAGATAAATTAGATTTCAAAGCCGAAAGTATTAAAATGCTAAAAGGACTAATAAGGAAAACCTAAATAACAAAAACCTAACATGACCATGGAATATGGAAATAATTTTACAATTTTTCTTTTTTAAGGATACACAGAAATATTTTAATTGTGGGCTTCCTCATGCTACTCTTAAATCATGACAGATAAAACAGACAAAGTTCCTAAGGAAAATACGGAAATGAACACAAGAGTAGATTTAAAAAACAGCAAGGTTATATGAAGAGAATTAAGTAGCAGAACGAGAAAATTTAAAAAACAGCAAGCTTATATGAAGATAATTACGTAGCAGAACAAGAAAACATACTTGCTTTACATATATATAGTTTTTCCAGGGAGAAAAAAAGGATCATGGGGCAGCAATACACAATAAACACACATCTAATTTACAATTGTTAAAAATATTTGAGTGCCTGAAATGTGTGAGATACATAAAAAGGAGCAACAATGCGTTCGCTCAAGATGCTCAGACTAATAGGAACAGAGGGAGATGATGTATCAACAAGTAACTACATATGGTAAACTAGGGGACTTGGAAGATTAATTTCCCTGAGAGAGATGGGAAGAGAAGTGGAATCTGGGCAACAAGTTGGGAATAAGGAATTCCAGACAAAGGGAATAAGATGTTTTCCAATTTTTTAAAAATTATTTTTATTTATTATGGATACAAAACAGTTGTGCCTATTTATGGGGTACACGTGATACTGTGATGCAAGCATACAATGTGTAATGATCAAATCAGAGTAACTGGGATATCCATCACCTCAAGCACTGATCACTTACTTGTGTTAGGCACATTTCAATTCCACTCTTTTAGTTATTTGCAAATATACATTATTAACTATATTTGCCCTCTTGTGCTACCAAACACTAGATTTATTATTTCTATCTAACTGTATTATTGTACCCATTGCTCATCCCCTCTTTATCCTCCCCTCTCCAATTCCAGCAGACTGGAACACACAGAATACTTTCTATAAATCGTGGTGGGTAGGGTGCTGCAGAGACAGGAGAAAGGGTACTGGGGGGAATAAGCCAGAAGTTGGCTGTGGTAACATGGTAGGGGAATGTGAATATCAAGGTAAAGTCCTTGGAATTTATTAATAGGTAATATGGGGCCACTGCAGATTTTCTCAACAAGAGTGACAGAAGATACATATGAGTGTGTAAGGTAGATTGTAATCAGTAGAGACCAGAGGTTGACATTTTATACAATTCATAAAAACACTGACAAAAAATGGATCATTTTCAATACTGATTTGTTAAGCAGTTTAGTAGAGTTACTAAAATACAATTGAAACATAAAAATACCTGGGCAGCTGCCAAAACAAAACTCAAGAGAAAATGTGTTCATTTAAAATGTTTAAGTAAAAGTAGAAAACAAAGAAAAAAAAAAGAGGCAAAAGAAAACTAGTAAACTGAGTTTCTAAGAAATTTGGGGAAAAAGCCAAAACATGAAAATAATAAAACTAAAAGTAGAAATATAGATATAAAAATGAAACTGATCAGTAATCCCAACAGCTAGCTTTTTAAAAAAATTAAACTACAAAGTTGATCAAGTAAAACAAAGTAATACAATCAGTAAGTTCAAAAGATGTAATTCATACACACTCCTAGTTGGGGAAAGAAAGAATAGTATCTTAACTATATCAATATATTTGCAAGCAAAGTCTAAAAGGTGACTGCATAGCCAAAAGGAAATATCAAGCCTGATTTGCAAAAGAATACACAATAATATGCAATAATACAGTGATAAGAAAATGTATTTACAAAAATAACTTATGGTCATAGCCTAAACTACTTTGAGTTACTTTTCAAGAAACATTCAGACCAGATTCCAAGTATAAAAATAGACTGCTTAATTCTCCACAAACCTGGGAGAAATGGGAGGTTCTGGTTTAACATTAATCACTACTTCTTAAAATTCACTTTTCCAGTTACACCTTTAGAATGGATTTATTCCACTCTCATTTGAATCTGTAGTGTTAGTATACGATGAATCACTAAGTAGTGCTACCTGGGCCAAAGCTAGTATCCTCTCTGAAATTTACATGGAGCTTTCTGCTCAGGCTCAAATTCCCTCTCTCCACTTTGCAGGGTTGGGAGAATAGGGAGGGAGAAAAGGAAGAAGGGAGGGAGAGAGAGAATATACTTAGCAAAGGAATGATGACTCTGAATGTAAAAGTTCTAGTATCTGTTTTCTGCATATCATAAGAATGCAGCTGGACTCTTTTTAGACCTATCAGTTTTTTTCCAGTGGAAGCTGCTGGCTTCACTGGAGCACATACACGCGCATTAACACATGCCTGCATGCACGTACACACGCACAAGCACATACACACACACAAACTTTGGATGGCACATCCCAGTCTAAAGCTTGACAGAATGGCTCCAAATGACAACCTGACATACTCACAAACTATCAAGGGAACTGCCCCTCTTACACCTTATTAACCAAATTTAAAGTCTTTTAGTGGAGACTTTAGCTGGTTTTATTAAGATAAAAAATGTTTAAGAGCATTTAACCAGTAGATACAAAAAAGCACAAAAGCCAAATAAAAGCAGAGCCCAGGCCAGTAGGAAGATCTGCATTTAAGTTCCTCAACTGCCATTCATCAGCGGTGTGGTCTTGGGTGAGTTATTTTCCCCCAGGTCTGTCTCCTCATCTGATTCCTCATATACTGATTCATTAAGACAAGGGTATGTGAAAGCACTGTGTAAATGATACATTTTACCCATTCTAGCTTTAGCAGTATTATGAAAGACAAAAGTTCTGCCACATTGTAGGTAATAATCTCATTTAGCAATCATTGTTGTCACTATTAGGTTGGAGCTGACAAAGTATGAGTCTCCACTTATAAAGCATCTATCTCCAGAGTTCGAGGCTTTTGTCTTAAATTCCAGTCATCTTGTACAGGCATGACATATCAGGCAAAATGATTAGAAAACTCAAGCTCCATATTAAAAAGCTTAACTTCTGGAGTCCTAGGAAACTATCTAAAACTCCTTCAATCCACGGTCTCTCCTGTACAACAAAGACTTCCCAGTGGTAGATGTCTATTTGTACCCAACATCCACTCATTCAGCACGTAATTCAGCACCTCCAAATCCTGGCCCTCAAAGAACACAGCTAAGAACAATGTGTGTGTAATTATCAAGTAATAGGAATGATACTTTTAAAACTGGAAATTATACATTCAAATGAGATTTCTCTCCTTTAACCAGTCCCCTTGGGAGGCAATGCAGTAATTCCAATGGTACTTCATTACTCAAATCATCTTTGAAGCTTTCTTCTTGGAAGCACCTTGAGAACCTGCAGTCTGATCTTCTGACTATCCCAAATGGTGCTAAATTTTCACTGAGGGTGGATTCAAATTTTGGAAATGGCAAACAGTCAGTCAGAGCCAAGGTTAGTGAATAAGATGTGTGATCAAACTAGGTGGAACTATTTTGGTTGAAAATGATAGATGATCATAAAGCAATGAGATGGATCTTCTTATGTGATATGTAAACTGACTTTAAAGGGAATTCCAGATGAGTAACAAGGAGTATGAACAATGGAATAGGTGTATACATTCTCTTTCCCCAAGTAACCTCTTTGAATAACAACACTCATTTGGATGTATAAGCGCCACTAACAGGTTTGTTTTGTTTAATCACTTACAATTTGTAAATAGAGGGTTCCTTAGCATACTGGATCTACAATGTCATGGAGGAAAGGAGTTGTCACTTAACTAGAACTATGGCATGGCACGCAGGAAAGAGCACAGGCTTCATGAGACTGACGTGGATTCGAATGTTGACTTAGATTCTTCCTGCCTATGCAACTTCAGGGAAGTCACAAGCTGTCAAGACTTTAAAATAAGGCTGATACCTAATTTTGCAGGGTTGTTGTAAAGATCATGGATAATGGATGCAAAGCCCCTACATTGGGCCTGGCATGTGGTGGGTACTCAAGAAGTCACAGCTATTATTAACAGCAACATCAGTAAAATCAAGACCATTTTCTGACTGTAGGTGGCTTGAGAGGACAGAACAACAGATAAGCAGTCACTTGAGTGGTAAGTAGTTCATTGACAGTACTGACAGTACACTCGTGCATTAGATACTGCTTTCTTAAAAACAATAACCTGTAAAAACTATTTGCAGTAACTATGTATTTTTAATTCATACGACACATATCATCTGCAGTGCTCATACCACATGGACCTCTTCACTACCTGAACATACCAGGCACTATCATGACCCCATGCCTTTGTGTACTTTGTCCCTTCTGAGATACTGAAAGGGGCCAGCCCCTCCACACCTGTGGGTATTTCTCGTCAGGTGGGACGAGACTGAGAAAAGAAATAAGACACAGAAACAAAGTATAGAGAAAGAACAGTGGGCCCAGGGGACCGGCACTCAGCATACGGAGGACCCGCACCAGCGCTAGCCTCTGAGTTACCTCAGTATTTACTGATCATTATTTTTACTATCTTAGCGAGGGGAGTGTAGCAGGGCAACAGGTGGGGAGAAGGTCAGCAGGGAAACGTGAGCAAAGGAATCTGTATCATGAATAAGTTCAAGGAAAGGTACTGTGCCTGGATGTGCACGCAGGCTAGATTTATGTTTCTCTTTACCCAAACAACTCAGTGTAGCAAAGAGTAACAGAGCAGTATTGCTGCCAGCATACTTCGCCTCCAGCCACAGGGTGGTTTTCTCCTATCTCAGAATAGAACGAATGGGAATGGTCAGCTTTACACAGAGACATTCCATTCCCAGGGATGAGCAGGAGACAGAAGCCTTCCTCTTATCTCAACTGCAAAGAGGCCCCCCTCTTTCACTACTCCTCCTCAGCACAGACACTTTACGGGTGTCGGGCTGGGGGGTGGTAAGGTCTTTCCTTTCCCACAAGGCCATATCTCAGGCTGTCTCAGTGGGGGGAAACCTTGGACAATACCCAGGCTTTCTTGGGCAGATGTCCCTGCGGCCTTCCGCAGTGCACTGTGTCCCTGGTTAATCGAGAATGGAGAATGGCGATGACTTTTACCAAGCATACTGCCTGCAAACATATTGTTAACAAGGTACATCCTGCACAGCCCTAAATCCATTAAACCTTGATTCATTACAGCACAGGTTTCTGTGAGCACAGGGTTGGGACTAAAGTTACAGGTTAACAGCATCTCAAAGCAGAAACAATTTTTCTTAGTACAGATCAAAATGGAGTTTCTTATGTCTTCCTTTTCTACATAGACACAGTAACAATCTGATCTCTCTCTCTCTTCCTCACAAGGTACCCTTCCTACTCTCTGGTCTGATGACAACCCTACAATTCTAGCTCAAAAGTCTGGCTCAAAAGACTAGCTCAATTCAAAATCTAGCTCAAAAGTCATTTCCACTATAGACTCTTCCCTGCCTGCTCGAGACAGAATTAGTAGCTCTGCCATTTGTACTTCTAAGACATTTGGTTTCTTTCACTGTAATGCCTCATACTCTATTGAATTCTTATTTCCCTTTCATTTCTGCTTCTTCCACTAGGCTTACGCATTGAGAGGAGGACTATATTCCTTTATTTTTGTACTTAGAGCACCTAGTACATCACTTGGCACGAGATAGGAATCTAGATGTTTGAGGAATCAATGTTATAATATACTTTGCAAATAACTGTTAGAGGCCAGGTAGCTGACTTTAGTGAACTATATTACCAGCAATCGCATTCCTTTTTCCTGCTTCAAACAAGAGACAAGATAACTTATATGGACTCCAGAATGTCTTCTGAAGATGGAGCATACAGAAGTCTCCAAACACTACTGATCTCATCAGTACCCCATGCCCACAAGTTGGAACCCCCAGAGCAACACTTCTCAGCCTTTGTTCCAGTCTAGCACACCTGAGGGATACACCACACTCCCATCAGGAGCTCACAACAGATGACTAAGAAGGCCCAGCAGAGTTTTCACAGCCTTCCAGTGCCACATGGCTACCAGGGTGGAAGGGCCTTCTCTGAACCAGCAACTCCCCCAAAGTAGCAGAAAATTTCAAGGCTCCATGGTGAGTGCCGGGAGCCCACCTAATGCTACCTACTACCAGATCCCTACCACCTACAATGCCTCAGAATCAGTTTGCCTACTCTAAGAACAGGGCTCTTTCACTGTTAGGAAGCCTCAGGCATTCAAGGAAGGACTTGTTTGTGGGTCCGGTACATCTGTTTCGGCATGTCTTAACCTGCATAACTAAAAAGCAATTATGAAAAGAAGTTCATGATCAACCAGTTCAGTGCCAGATGGAAATAGGTAGAGTGGGTCAGCTGGCAGCCCTGTACATCTGAGTGTTGACACTTGTGAATCATTCTCTGTCACTATAGTTTCCAGAGCCTTGCCAACGCAGCAGTGGGTTCAGACTTACCAAGATCCAAAAGGCTGGAAAGGGGATCTGGAGCCATGACCCATGCCTTGGGACCCACCAAACCATGACAGTGACTATTTCTATGACAAAGACATGCTCTTCAAAGTAAATATAGCCATATTCTGCCTTAGTGCCATGTTCTCTCCCATCTCTCCAAAGCAATGGCTGCACTCTGAAGGTGGGAAGAGTGTGAGAAGAAAGAGAACCAGGCCACAGACCACCAGGAAAGCAGTGATTGAAAGCAGCAGCTCTAGATTTATCTTATAGGCTCTGGAGAGTTATTCAGTTATTTACTCCACAAATATTTATTGAGCCAGGTACTATTCCAAGTATGCAGGCTACGATACTGACACAGTCACGGCCTTGACCCCATGGAGCTTAAAGTTTAGTGGGGGAGGCAGGTATTAATCAAATAAGCATCCAGACAGACACAAGGGTACAAATATGCACTATGATGGAGGAGTAGTAAATACTACAGGAACTGTAGAAACAGGGAGCAGTAAATCTGGTCAGGGAAGTCAGGAATGCTTCCCTTAGCAAGTGCCAAGTGAGGATAAATAGGAGTTCACTAGGTCTGGAGGAGAGGAAAAGAGAGGGAAACATTCCTGGTAAAGAGAACAGTGTGTGACAAGGTCCTGGGGTAGGAGGGAGATGTTGAATTGGTTCAAAAATAGAGGCAGGGAGCAAAGCAGTAGCATCAAGGTTTGAGTCCCCAGCATAAGGTAACTTCCTGTGTACACCCCTCAGTTTTCTTTCTCCTATAGAGAAGGCCTTCCTCAGCATGGAAGGAGACTAAAGCACACTGTCAAAAACTAGGAGTGCTCAGGAGAGGTGAAACACTACCAGCTAAAAAAACAAATCTGATTTACCAATAATACCTGGCCAATGTCCTCTTGAAATATCTGTAAAATTAAGGAACTCACTACCTCTGGCAACAATGAATCAGGAAAATGGCACAGTATAAAAGACCTTTAAACTTTTCAGGTAGAAATACATTTTAATGCAGGTAGAATTAAGAGATTGATGAATATGTTGAGAATTACTATAAATCTGCTTAGATACCAGGTATTTCACTGTAATTTCATATACTAATTTTTTTGGGGGGAAAAGGGCTAAAGGAAAAAACGGTTAAAGGTAGTATCAGTGCAGCACTATTTTGCTTTGGCAACTCTGTTAGCCTGTTTGATCACTTAAGGCAGTTTCTTGAGGCTTCTGAATTCTCTGTCACCTTGCAAACAAGTCACTGTAGCTTTCTATTTGTAAGGCACTTTGTAATCAATCCTTAATTGGCCTCCCTACCCCACTGAGAGATTATACTCATTCTACAGATAAAACAATAGATAAGTGAGTTGCCCAAGGGCACAAAACAATTCAGTTTTTACTTAACTTTCTTATCACCATCTCCAAATAGAAGACATACAGGGCTTGTCCAGCCACAGCACCAACCCTACTACCCTGCAGACCAATCTTAACCCCCTGATGCAGCAGAGGAAGGGACTGCAGTTAGTTCTGTTACCTCCTCTATGCCTTCGAAGCCAACAATGATAATGTCTGCACCGTGTTAGAGACACTGGCACTCCTAACCAGAACATGAACTCAAAGTGGAATAAGAGCACATACAAAGTCACAATGACTCACAAGTTAAAATGTCACCATCCCTAGGGAGACTGCACCCTCAACATACAAATACTCCCCTAAAACTTCCTTTCATCCTCAACAAGCAGCCAAGGCATAGGTCCCCAGAGTTGGCCCCCCCGCAGTACAGCACTGCACAGTACTGTAAGGCATCTTAAGTGTACAAGCCCTTCTTCACCCCATGCTGAGACCGCCAGCATCTAAAGCACCTAGCATAGGTTACAGCACACAAATATTTACTGAGCACGTCTTCCCAACTGGCAAATAAGGGAAACGGATACTCCTTTCGGAGAGTAGTGCATTTAAGAGATCAGTTAGGCCAACTCCTCATTTTACAGAAGGGCCAAAGAAGGGCAGAGAATCACCCAGTTACATATGGCCTCGTCTCCAATCCAAGTCTTTGGGCTCAGTTTAGTTCCACAAACATTTTATATGTGCCTACTATGTGCCAGATACGGTGGGTGGGAGGCGCTGGGGACACTGAGATGAGCAGGAGATGGTCTCAGCTTCAGGGAGCTCACAGCCTGGGGTAAAGAACAGTTACAAAACAGTCGGGCAAGTGCAAGGACAGGCGCTTGAGTGCTTAGCCAGTGTTTTCTCCACACTGTCCACTGGGTGTTCCCCATCTGGAAAAGAAGGACAGCAATATCTACTTCCTGCAAATAAACTTGTAGTGAAGACTGATGAGAAAATGAGTATGGAAAATGATCAGGCTCACCCAGAAATGCATTATATAAGGTATTGCCATACCCCGACACTGACTCCAAGGATTCAACAGTGTTTCAGGTATCCGTTCAACAAAGAAGCAAACAAGAGAGGGATGGGCATAAACCCAAGGAAAACCCCAAGGACTGCGAGGAACTGGAGGAAACAGCTGGGGTGGGGGTAGGGGTGTTGGGTCGAGTGAGACGGCCCTGACGCGGAGAATGGAGGGCCCGCAGCGGCGCAGAAGAGGATGGAACCGAGACGAAGAAGTTGGGACACCAATGAGGGACAGCAAGCAGAAAAGAATGGGGTTCCCTTGGGGCAGGACGGGGCTCGCGGCCGGGCCCTTCCGGCCGTGGCCGGGCAGGGGCTGAAAGCACCGGGCACGGGAGGAGGAAGCGGGCGGGCGCCGAGGCCGACTGTTTTGCCTGGGGACCGCTTGCACCCGCAGGGAGGCTCGGGCAGGCGCCCGGGTCCTCGGGCTGCAGCATCTCGCCCGCCGTGCCTCCCCGGAGCCGAACACCAGCCCGCGCCCGAGCCCGCAGCGCGGACTCCCGGGGGCGCCAACGACGCCGCCTCACCTCGGGTTGAAGTCCTGGAAGAGGCCCCTCAGGTTCATGGCGGAGAACTTCACCGCGGCGTCCTCCTCCTCCTCCCCCGCACCCCGTGCTGCACAGCCTGCGCCTTACAGCGGGTTCATGGCGCCAGCGCCAGCCGCGTCCACGCTGCTGCTCCCGCTACTGCTGCCGTCCCCGCTGCCGTCGCCGTCGCCGTCGCCGCCGCCGCCGCCGCCCGGAGAAACCTGAGCCACCGCCCCCTGCCCCTCCTTCCGGGCTTCCGTACGAGGGCCGCGCATGCGTCCGGAGCCCCGCCCAGAGCGCTCCTCGCTGGGAGGTCCCCATCCTTGTGTCCGCACGCGACCGG", + // 148678216 - 1); + // + // var codingRegion = new CodingRegion(148679671, 148713263, 333, 1385, 1053); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 7, 148678216, 148679915, 1141, 2840), + // new TranscriptRegion(TranscriptRegionType.Intron, 6, 148679916, 148681217, 1140, 1141), + // new TranscriptRegion(TranscriptRegionType.Exon, 6, 148681218, 148681341, 1017, 1140), + // new TranscriptRegion(TranscriptRegionType.Intron, 5, 148681342, 148681966, 1016, 1017), + // new TranscriptRegion(TranscriptRegionType.Exon, 5, 148681967, 148682143, 840, 1016), + // new TranscriptRegion(TranscriptRegionType.Intron, 4, 148682144, 148685652, 839, 840), + // new TranscriptRegion(TranscriptRegionType.Exon, 4, 148685653, 148685736, 756, 839), + // new TranscriptRegion(TranscriptRegionType.Intron, 3, 148685737, 148690313, 755, 756), + // new TranscriptRegion(TranscriptRegionType.Exon, 3, 148690314, 148690521, 548, 755), + // new TranscriptRegion(TranscriptRegionType.Intron, 2, 148690522, 148692969, 547, 548), + // new TranscriptRegion(TranscriptRegionType.Exon, 2, 148692970, 148693146, 371, 547), + // new TranscriptRegion(TranscriptRegionType.Intron, 1, 148693147, 148713225, 370, 371), + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 148713226, 148713418, 178, 370), + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 148713419, 148713568, 1, 150) + // }; + // + // var rnaEdits = new IRnaEdit[] + // { + // new RnaEdit(2814, 2813, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAA"), + // new RnaEdit(2227, 2227, "C"), + // new RnaEdit(1032, 1032, "T"), + // new RnaEdit(917, 917, "G"), + // new RnaEdit(151, 150, "GCGGCGGCGGCGGCGGCGGCGGCGGCG") + // }; + // + // const byte startExonPhase = 0; + // + // var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, true, startExonPhase, rnaEdits); + // + // const string expectedCodingSeq = "ATGAACCTGAGGGGCCTCTTCCAGGACTTCAACCCGAGTAAATTCCTCATCTATGCCTGTCTGCTGCTGTTCTCTGTGCTGCTGGCCCTTCGTTTGGATGGCATCATACAGTGGAGTTACTGGGCTGTCTTTGCTCCAATATGGCTGTGGAAGTTAATGGTCATTGTTGGAGCCTCAGTTGGAACTGGAGTCTGGGCACGAAATCCTCAATATCGAGCAGAAGGAGAAACGTGTGTGGAGTTTAAAGCCATGTTGATTGCAGTGGGCATCCACTTGCTCTTGTTGATGTTTGAAGTTCTGGTCTGTGACAGAATCGAGAGAGGAAGCCATTTCTGGCTCCTGGTCTTCATGCCGCTGTTCTTTGTTTCCCCGGTGTCTGTTGCAGCTTGCGTTTGGGGCTTTCGACATGACAGGTCACTAGAGTTAGAAATCCTGTGTTCTGTCAACATTCTCCAGTTTATATTCATTGCCTTAAGACTGGACAAGATCATCCACTGGCCCTGGCTTGTTGTGTGTGTCCCGCTGTGGATTCTCATGTCCTTTCTGTGCCTGGTGGTCCTCTACTACATTGTGTGGTCCGTCTTGTTCTTGCGCTCTATGGATGTGATTGCGGAGCAGCGCAGGACACACATAACCATGGCCCTGAGCTGGATGACCATCGTCGTGCCCCTTCTTACATTTGAGATTCTGCTGGTTCACAAACTGGATGGCCACAACGCCTTCTCCTGCATCCCGATCTTTGTCCCCCTTTGGCTCTCGTTGATCACGCTGATGGCAACCACATTTGGACAGAAGGGAGGAAACCACTGGTGGTTTGGTATCCGCAAAGATTTCTGTCAGTTTCTGCTTGAAATCTTCCCATTTCTACGAGAATATGGAAACATTTCCTATGATCTCCATCACGAAGATAATGAAGAAACCGAAGAGACCCCAGTTCCGGAGCCCCCTAAAATCGCACCCATGTTTCGAAAGAAGGCCAGGGTGGTCATTACCCAGAGCCCTGGGAAGTATGTGCTCCCACCTCCCAAATTAAATATCGAAATGCCAGATTAG"; + // + // Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); + // } + // + // //NM_017940.4 + // [Fact] + // public void RnaEdits_big_test_reverse() + // { + // //Transcript id:NM_017940.4, chrom: chr1: 16888922 - 16940100 + // var genomicSeq = new SimpleSequence( + // "TGATAGGCAAAAGCTTTTAATTGTATAGATTAAAATAACTTTGGACAAAAATTAAAACTCAGGCAGAGAATGTTTTTTTTTTCAACAACACACACTAGCAAAAACAAAGGCACAGTAAACATTGAGGCAGAAAGTTTCCAGCGTAGAGATATGAATATAATAATAGACACAGGCAGGGATGATTAATAAATGATAAAATGTTTACAGGATGATCATTGGAATACAGGACATTTCTAATTTTGAAAACCACCCTCCCAAATACTTCATTATAAGTAAGGTGTCTCTAAAAGGGACAGATCTCCTAGACCCCTCCTTAACCAAGTAACCAGTCCTGATATCATGATAATGCTGATGGACAAACTAGACCTTCTCTGCCCGCAGATGGGCTAAGGTTGGAAACTCACAGCATTGTCTCTGCAGTGTTCCCGGCAAAACGTTTAGGCTGAATTTAATCATGAAGACATTTTCAGACAACTTCAGAATGTAGATCATTGAGCCAGAGAGCTGACCTGTCCTCTATAAACAAGTCCATGTCACCACCATCAATGACAACAACAAAAAGATGAGGAAATATTTGGGGTTCAAAATAACTAAAGAAATGCAGCTATATTATCTTTTTACTTTTTTTGAACCCAAAATATCTCTTCTCCTTTTTGTTGTGTGATTTGTGGTGATATGGACTATGTGAAGGAGACAGGTCAGTTGTCCTGCTCAGTGTTCTACATTCTGCAGTTGTCTGGTAATTACCTCCTATGAAACTCAGGCTAAGCGTTTTCTGCAAGAACATGGCGTTGTTCATATTCTGCACCGGCAGAGTCCTGGGTGACATGCTGTCTCCTGCCAGCGGCTCCTGACTCCTGTTCTCTACAGGATGGAATCGAGAGGAGCAGGGCTAAGGCCTCCCAATGCTGTTTGTCCATCTAGCTGTGGTCTTCCTAAGTACTGACACCAATTGGAGGCTGAAGGACTGTGGCTTCTCTAACCAAAGGAGCCTAGCGGGTTAACAATTGTCAAGAGCAGTTGGTGGTTCTGAAATACAATCCTCAGCCAAGGATCCCTCCTGTGTTAAAGATGGATCAGCTAAAACAATTCAACACTGAAGATACAAAGAATGAGGTTAGGTTCATTGAAACCAGGGTAACACCTTTGGATGAGCTAAACACAAAGATGACACTGACCTTGAGCAGGTATAGAAGCTCAGAGACATGACTGCAAAATGAAATCCCTGAGGAACTTTGTAGCTACCCAGAGATAAGTGGTTCAAATTAAAATGTCTGACTGATCACTCCCGGCATGTGCTGCACAGTTATGTGAACGTGTCACACCTAACTTGGGTCCATTGTCTTCAGACTGAGCACAGGGTGCCACTGGCATGGTCTGAGAATAGGAATAGAGCCATGCCCACTGACCCATCCTATGTCTGGGCTTCCAAATGGAACTATAGTTTCATTCAAATCTTCACGTGCCTATAGGTCCTGCCTGCAGGAATGACATCTCTCGGCTTAGTAAGGGCTGCTTACTGTGGGAATATGACTCCCATCTGGAAGACCAGGTGGAGACTTGTTCCCATCAAAGTAAGAAACCTATTGTCCACGTCAAGGGCGAAGCTGATGTGCTGTTCCTCAAATGAGTAAAACACACTTCTGTAGTGCTGGAATGAGTCAGGTAGTTCAAAGTACATTGACGGAGTCGAATAACATCTATCCAGTGAGTCCTGCAAGACTTCAGGCTCTTCCACTTCCATCAGCATGCCGCTGAGCCTGGAAAAGCAGACAAAACTAAAGAAGCAGCCAGGGAAAATCAGACACCACAGAGCCCCACTAGATTTCAGAAGTAACGTAAGGAAGTGGTAAGAAAAGAAAAGGATAGATCCATTAGATCCATTAATGAGGTAAAAAAAAAAAATTATTGCCTTTATGTTGGGATAGAAAAGGGCCAGGTAGAAAACAATGAAAGAGAAAGACAGAGAGACAGAGACAGAGACAGAGACAGAGAGAAAGTGAGCTAGTGAATTGGCCAGGTGACATACTGGTAAGGGAGTAAAAGGACACTCTGAGTTAGTGCCCTCATGACACACAGCACACTGCGATCATGAAAAGAGTGAGCTCAATAGTTTTCCATAAAATATGCTCAAAATTCGATGCAGTGGCCATGAGAGTACAGCTTTTGAAGTATGGTCATCCTATGGTACGTTAGTAAATGATAAGGGGAGGAAGAAATGGAAACCTAAACATCTACTGCAATGAAAACCAACAGCAATGACAGTAGGAGTAATTCAGCCTTCGTTGAAAACATGAAATCAAACACACTCTGGTTTCCCTCAATCTGTTGCCTCCAGGTGTTAACACAGAATTAAGCATCCACAATTGCTGAAAGTTACCTGGGGCATGGTGGGTTTTGATCTTCTTCCCCTTCTTTTCTTCCCCTTCTCCTTCTTTTCTTCGTTGATCTTCTTCCCCTTCTTTTCTTCCCCTTCCCCTTCTTTTCAATTTCTGCAATAAATTCAGACATGGACAGACACATTAAGCTGATTCCCCTACACACATAACAATCCACTGTCTAACCCTCACACAGGGACCTCAGGCTCCTCAGCATAAGAATAGGAGACTGTGAGAGATATATTTCAGGAGGCCTGAAGGCTGGTCATGATAGAAATTCCTCGGTTTTTCTCCCAGAAACTGTGGGTAAAATGTCCCTATTCTAGTAGATCGTTATCCCAATATCATTTGTCCCGAGTTTGTGCAAACAGTTATGCCATATTTTTCCAATCAATTTAAAGCAAATACCCTCAAATGATTTCTAGGAGAAAAACTGCAATATTTAGCCCTGTCTCATCAAATACTCAGATTGTTCATGGTTGTGAGGACTTTAGACACTGAAATTAGAGTGAAAAAGGAAATCTACAAACCCTTGAGTCAAAATCATAGTTCTCTGAATTTGTCACATCTGCCCAGGTCCAATGTCATGAGAGTAGAATCAGAGTGCCACAGGCATGGCCTGAGACTAGGAAGAGAGCCATGCTCACTGACCCATCCCATGTCTGGGCTTCCAGTTAGAACTAGAGTTTCATTCAACCTACATGTGCCTATAGGTCCTCACTGCAGCAATGACATCTCTCAGCTCAGTAATGGCCACTTGGAGCAGGAATATGATCTTTATATGGAAGACTCAGTGGATCCTTATCACCTTCATAGAAAGGTACTCACCTCCCACGTCAAGAGAAAAGCCAACATGTTTTTCCTCCAATGCATAAAAGGAACTTCCATAGGGCAGGCAGGAGTCAGGCTGTTCAAGACAACTGGAAGGAGTTGAATAACATCTATCCAGTGAGTCCTGCAAGACTTCAGGCTCTACTGCCTCCAGCAGCTCCCTGCTGAGCCTGGAAAAGTAGGAAAAAGTAAAGAATAAGCCAGGGGGAATCAGAAACCACACAGCCCCAGCTACATTTCATGGCTAACATAAGGAACTGTTTAAACAGAAAAAGGACAGATCCATTAATGAGGTAATGAATTATTGCCTTTATGTTGGGATAGACCAGGGCCAGGTAGAAAAGAATGAAAGAGAAAGACAGGGAGAGGGAGAGGGAGAGAGAGACAGAGGAGAAAGTGAGCTCAGCGAATTGGCCGGGTGACACACTGACGAAGGGGTCAAAGGACACTCTGAGTTAGTGCCCTCGGGACACACAGAGAACAGTGATCATGAAAAGAGTGGGCTCAATAATTTTCCATAAACTTGCTTAAGATTCCATGCAGTTGCCATACAGCCTTTGAGGTATGGTCAACCTACAGTAAGTTAGTAAATGATAAGGGGAGGAAGAAATGGAAACCTAAACATCTACTGCAAGGAAAACCAACAGCAATGTCAGTAGGAGTAATTCAACCTTCGTTGAAAACATGAAATTGAACATACTCTTGTTTTCCCTGGACCTGGCATCTCCAGGTGTCAACACAGAATTAAGCATCCATAATTGCTCAAAGTTACCTGGGGCATGATGGGTCTTGGTCTTCTTCCACTTCTTGGTACTTTTCAATTTCTGCAATAAGTTCAGACATGGACAGACATATTAAGCTGGTTCTCCTACACACATAACAATCCACTGTCTAATCCTCACGCAGGGACTTCAGGCTCCTCAGCATGAGAATAGGACACTGTGAGAGATCTTCTTCAGGAGGCCTGAAGGCTGATCATGATAGAGATTCCTGGGTTTTTGTCCCAGAAACTGTGGGTAAAATTCCCTATTCTGGTAGATCGTTATCCCAAGATCATTTGTCCCAAGTTTGTGCAAATGGTTATGCCATATTTTTCCAATCGATTTAAAGCAAATGCCCCCAAATGGTTGCTGGGAGAAAAACTGCAATATTCAGCCCTGTCTCATCAAATACTCAGATTCTTCATGGTAGCGAGGATTTTAGATGCTGAAATTAGAGTGAAGGATGAAATCTACAAGATCTACAAAATTGAGACAAAATCAGAGTTGTGTGAATTTGTCACATCTGCCCAGATCCAACATCTTGAGAGTGGGATTAGGGTGCCACAGGCATGGCCTGAGACTAGGAAGAGAGCCCTGCTCACTGACCCATCCCTTGCCTGGGCTTCCAAGTGGAACTAGAGTTTCATTCAACCTACATGTGCCTATAGGTCCTCCCTGTGGCAATGACATCTCTCAGCTCAGTAAGGGCCATTTGCAGTAGGAATATGACCCTAACCAGAAGACTCAGTGGATCCTTATCACCTTCATAGAAAGGTACTCACCATCCATGTCAAGAGCCCAGCCAACACGCTGTTGCTCCAATATGTAAAAGGCACTTCTGTAGGGCTGGCATGAGTCAGTCAGTTCAAGATAACCTGAAGGAGTTGAATAACATCTATCCAGTGAGTCCTGCAAGACTTCAGGCCCTTTCTCATCCAGCAGCTCCCTGCTGAGCCTGGAACAGTGGGAAAAAGTAAAGAATAAGCCAGGGGGAATCAGAAACCACACAGCCCCAGCTAGATTTCATGGCTAACATAAGGAAGAGTTTGAAAAGAAAAAGGACAGATCCATTAATGAGGTAACAAATTATTGCCTTTATATTGGGATAGACTAGGGCCAGGTAGAAAAGGATGAAAGAGAAAGACACACACACACACACACACACACACACACACACACACACACACACAGAGTGAGCTCAGTGAATTGGCCAGGTGACACACTGATGAGGGAGTCAACGGTCATTCTCTATTTGTGCTCTCAGGACACACAGTGAACAGTGATCATGAAAAGCATGGCCTCAATAATTTTGCATAAAATGTGCTCAAGTTTCCCTGCAGCCACCATGAGAATACAGCTTTTGAGGTATGGTCAACCTTCACTAGGTTAGTAAATGATAAGGGTAGGAAGAAATGGAAACCTAAACATTTACTCTAATGAGAACCAAAAAGCAATGTAGTAGGCATAATTTAGACTTGTCTGACAAGACAAAATCATTATTTTCAGCATGTACTGTTTTCCCTGGACTTGGCATCTCCAGGTGTCAACATCAAATTAACTGTCCACAATTTCTCAGACTCACCTGGGACCTGTTGCCTCTTGGTCCTCCTTTTTCACTTGATCCCACCGATGTCCTGCAAATAAATTCAGATGGGGCCTCTTACATTAAGCAGTTCTTCCTTGCACACAGAAACATTCCTCTGTCCAATCCTAACACAGGTACATCAGTCTGGTCAGTGTGAGAACAGGAGACTTTGAGAGAAATATTCCAGCAGGCCTGAGGTCAAGTCTTGAGAAAACTGGCTTGGGTTCTTTCATGAGCCTTGGGCAAAATTACCCTGTTTTGGAATGTTATCTTCCCTATGTGCTCTGTCCTAGGTTTGTGTACACAAATGAGCAACTTTTTCCCCAATAAATTGTAGGCAAATAGTTCTAACACCTCATAGGAGAGATACTTCAATATTAAGCTTTCTCTCATCAAATACCCAGAATTTGATAGTTTATGAGATTGTGGACACAGAGATTTGATGAAGGGGTGCAATGTACCAGCTCTTGAGTCAAAATGAAACTTGGTTCTACACAGAAGCATCAGCTATTATGGCTTTTGTGGGTGAAAAGTCAGCCATTTATCTAGAAAACATACCAGGAACATGACGGACAGATGAGCTAAAGCAAGCGAACTTAGAAGACACAGAAAATGGGAATAAATTCAGTGAAACCTGGGCCACATCTTTCACTGAGAGGTAGACAAGGGTGACACTTGCCTTGGGCAGGTAAAGAACCACACAGACATGCTTTGGGAACAAAACTCATAAGGAATTTTGTAGCTGGCAAGAGACATTTAATTCAGATGAGCTGATCTGACAGACAACTCCTGGTCATGTGCTGCATAGTTTGGTGTGAGCTTGCCACACCTGCCTTGAGTTCAATGTCGTGACAGTCAGTCCAGGTTGGCACGGGCATGGCCTGAGACTAGGAAGAGAGCAAAGCTCACTCACCCACCCCATGCCTGTGCTTCAGACTCGACTCCAGAGTGATTGAAATCTACATTGATATATAGGTTCAGCCCACAGTGATGGCAAATCTCAGCCCAACAAGGGGCACAAGGCCCAAAGATTATGGGGTCTACCTGGGCCATGAACTGGAGCTTTATCACCTTCACAATGGAGTACTCACCGCCTATGTCAACAGCCATGCAGACTTGCTGTTCCTCTAATGAGTGAAATGTGCCGCTGTAAGACTTGTACGAGGCCAACATTTCAGGAGGAATTGAGAGAGTCGAATAACCTTCATCCCAGGACTCCTGGGGGACTTCCTCCTCTTCAGACTCCTGCAGATTCCTGATGAGCCAGGCAGGACAGGGATGATAGAAGATTTAACCAACAGACATTAGACAACAAAACCTCCCAGATGATCTGATGGGAGACAGAATGGAGTGGTCACAGAAACCAAAGGCATTTTTCCTTCAAGAGAAATAAAACTAGCCTTCTAAATACAGGGTGGAGGGTGACTGCTCTGGGGACAGAGCAAAAATGGGCAGCATGTGCTCAGTACATTTGCCACAGATGAGCCAACTCAGGGCACCCAGACTCTCCCTGTAAACTACCATCATGACTTGCAGCACAGAGAACTGACACAGGGCTTCAACTACTTTGCATAAATTGGGTTGAATTTTACATGCAGCATTCAAGTGAAGAGAGTTCTTGACACAGTGCAGACACAGATCTTGTGTATTAAGGGCCCCATTTTCCCAATATTTTGATATAATATATTTACCTTTTCAATTTCTTTTCTTGCAAAAATACTAGCCAACATACTACCAACAGATAGGAAGAAAGCATATATACATCTCTCCCTGGATTTAAACACATGGGAGAGAATAGGCAACACCAAGAAATCCCTGTTTGAGGGTCTGGAGTGGACTTCCAGCAAACTCCAACAGACCTGAAGCTGAGGGACCTGATTGTTAGAAGGAAAACTAACACACAGAAAGGAATAGCATCAACATCAACAAAAAAGACATCCATCCCAAAACCCCATCTGTAGGTCGCCATCATCAAAGACCAAGGGTAGATAAAACCACAAAGGTGGGGAGAAACCAGAGCACAAAAGCTGAAAATTCCAAAAACCTGACATCCCTTCTCCTCCAAAGGATCACAGCTCCTCGCCAGCAATGGAACAAAGCAGGATGGAGAATGACTTTGATGAGCTGACAGAAGTAGGCTTCAGAAAGTCGGTAATAACAAACTTCTCTGAGCTAAAGGAGGATGTGCGAACTCATCGCAAGGAAGCTAAAAACCTTGAAAAAAGATTAGACGAATGGCCAACCAGAATGAACAGTGTAGAGAAGACCTTAAATGACCTGATGGAGCTGAAAACCATGGCACGAGAACTACGTGATGCATGCACAAGCTTCAGTAGTCAATTCGATCAAGTGCAAGAAACGGTATCAGTGATTCAAGATCAAATTAGTGAAATGAAGCGAGAAGAGAAGTTTAGAGAAAAAAGAGTAAAAAGAAATGAACAAGCCTCCAATAAATATGGGACTATGTGGAAAGACCAAATCTACGTTTGATTGGTGCACTGAAAGTGACGGGGAGAATGGAACCAAGCTGGGAAACATTCTTCAGGATATTATCCAGGAGGACTTCCCCAACCTAGCAAGGAAGGCCAACATTCAAATTCAGGAAACACAGAGAACACCATAAAGATACTCCTCGAGAAGAGCAACCCCAAAACACATAATTGTCAGATTCACCAAGGTTGAAATGAAGGAAAAAATGCTAAGTGCAGCCAGAGAGAAAGGTCGGATTACCCACAAAGGGAAGCCCATCAGACTAGCAGCAGATCTCTTGGCACAAACCCTACAAGCCAGAAGAGAGTGGGAGCAATATTCAACATTCTTTTTTTTTTCCATATGTATAGTTTTCCTTTATTATTTTTTGTGTGTATGTATATATATGTATATATATTTTTCAATACTTTAAGTCTTAGGGTACATGTGCACAACGTGCAGGTTAGTTACATATGTATACATGTCCACATTGGTGTGCTTCACCCATTAACTCATCATTTAACATTAGGTATATCTCCTAATGCTACCCCTCCTCCCTCCCCCCACCCTACAACAGGCCCCAGTGTGTGATGTTCCCCTTCCTGTGTCCATGTGTTCTCATTGTTCAATTCCCACCTGTGAGTAAGAACATGCGGTATTTCGTTTTTTGTCCTTGCGATAGTTTGCTGAGAATGATGGTTTCCAGCTTCATCCATGCCCCTACAAAGGACATGAACTCATCATTTTTTATAGCTGCATAGTATTCCATGTTGTATATGTGCCACATTTTCTTAATCCAGTCTATCATTGCTGGATATTTGGCTTGGTTCCAAGTCTTTGCTATTGTGAATAGTGCCACAATAAACATATGTGTGCATGTGTCTTTACAACAGCATGATTTATAATCCTTTGGGTATACACCCAGTAATGGGATGGCTGGGTCAAATGGTATTTCTAGTTCTAGATCCCTGAGGAATTGCCACACTGTCTTCCACAATCGTTGAACTAGTTTACACTCCCACCAACAGTGTAAAAGTGTTCCTATTTCTCCACATCCTCTCCAGCATCTTCAACATTCTTAAAGAAAAGAATTTTCAACCCAGAATTTCATATCCAGCCAAACAAAGCTTCATAAGTGAAGGAGAAATAAATCCTTTACAGAGAAGCAAATGCTGAGAGATTTTGTCACCACCAGGCCTGCCTTACAAGAGCTCCTAAAGGAAGCACTAAACATGGAAAGGAACAACCGGTACCAGCCACTGCAAAAACATGCCAAACTGTAAAGACCATTGACGCTAGGAAGAAACTGCATCAACTAACGGGCGAAATAACCAGCTAACATCATAACGACAGGCTCAAATTCACACATAACAATATTAACCTTAAATGTAAATGGGCTAAATGCCCCAGTTAAAAAACACAGAATGGCAAATTGGACAAAGAGTCAAGACCCATCAGTGTGCTGTACTCAGGAAACCCATCTCACATGCAGAGACACACATAGGCTCAAAATAAAGGGATGGAGGAAGATCTACCAAGCAAATGGAAAGCAAAAAAATGCAGGGGTTGCAATCCTAGTCTCTGATAAAACAGACTTTAAACCAACAAAGATCAAAAGAGACAAAGAAGGCCACTACATAATGGTAAAGGGATCAATTCAACAAGAAGAGTTAACTATCCTAAATATATATGCACCCTATACGGGAGCACCCAGATTCATAAAGCAAGTCCTGAGAGACCTACAAAGAGATTTAGACTCCACACAATCATAATGGGAGACTTTAACACCCCACTGTCAATATTAGACAGATCAATGAGACAGAAGCTTTACAAGGATATCCAGGACTTGAACTCAGCTCTCCACCAAGCAGACCTAAAAGACATCTACAGAACTCTCCACCCCAAATCAACAGAATATACATTCTTCTCAGCACCACATCACACTTATTCCAAAATTGACCACATAGTTGGAGGTAAAGCACTCGTCAGCAAATGTAAAAGAATGGAAACCACAACAAACTGTCAGACCACAGTGCAATCAAATTAGAACTCAGGATTAAGAAACTCACTCAAAACCGCACAACTACATGGAAACTGAACAACCTGCTCCTGAATGACTACTGGGAAAATAACAAAATGAAGGCAGAAATAAAGATGTTCTTTGAAACCAATGAGAACAAAGACACAACATACCAGAATCTCTGGGACACATTTAAAGCAATGTGTAGAGGGAAAATTATAGCACTAAATGCCCACAAGAGAAAGCAGAAAAGATCTAAAATTGACACCCTAACATCACAATTAAAATAACTAGAGAAGCAAAGCAAACAAATTCAAAAGCTAGCAGAAGACAAGAAGTAACTAAGATCAGAGCAGAACTAAAGGAGATAGACACACAAAAAACCCTTCAAAAAATCAATGAATCCAGGGCTGGTTTTTTGAAAAGATCAACAAGAAAACCCTGTTTGGCTAGTTCACCTGGCTCATCTGATGGCAAGTTCCTATCTTGAGAGGACTATGAAATTAAAACCAATACAAGTGCCACAAATAACATACAACATTGTAAATCAGCACAATTTGTAGCTGGGTGAATGGAAGAAATAGTTCTATTCATCACTTCCTCATTTTCCCTAAATCTACAATCTCCAGATGTCACTACTGAATTAACAGCCAACAATTCCACAACATTACCTGGGAGACACTGGCCCTTTTTCTTCCTCTTCCTCATCATCACTTTCATTTTCTGTAAATAAATTCAGAGAAGCAGGTCACATTAAGCAATTCATACTTCACATATGACCAAATCACTGTCCAGTCATAGCACAAGGACATAACTATTCTCAGTGCAAGAATAAGGATTCTGACAGGAATATTCTAGGGTGCCCTAGATTAACTTTGGTGAGAATTAGATGACCCTGCTTTCCAGACCCACAGGCCAAAATCTCCCTCTACGTGTAGACCATAATGCCATATTCCCTGCCTGAGTCAAAGTTAAACAAAATTTTTTCCCCAAAAAAATCTCCAAAAATTGGTCCATTTTCTAAGAGTGTTGCTGCAATACGGACTTATATCACCAGATAACATGGACATTAAATGTTTAGAGGCATCTATACATGAAACACACATGATAGATAAATTTGAACAACTCTTGCTTTAAAAAGAATCTGTGATTTGGGAGGCCAAGACAGGTGAATCATTTGAGGTCATGAGTTCAGGACTACCCTGGCCAATATGGGGAAACCCTGTCTCTACTAAAAATACAAAAATTAGCCAGATGTGATGTTGTGCACCTGTGGTCCCAGCAACTCAGGAGGCTGAGGCAGGAGAATCACTTGAATCTGGGAGGCAGAGGTTGCACCAAGCCAAGATGGTGCAACTGCACTCTAGCCTGGGTGACAGAGCAAGACTCCATCGCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATCCACGATGCTACAAAGAAACATTGGATCAGCCATTGCATTGACAGGGTGGAGAACCAGGGTCCAGCCTTGCTTTATGGAAATATATCAGCAAAGTAAAGAAGAAAAGTTTCCGTCCTGATTTCAGGGTGACTGTGCAGCTAAGCAAGCTGACTTAAAGGAGATCCGGATGAAAGCTGAGAGCAGTGAAGCCTGGGGAACAATATTTCCAAATACAAAGGCAAGGCTGCCAGCTTCCTGAAACAGGCATAGAAACTCCATGGACATTGTTCAGGGACAGATGACTTAATCACAGATGACAAGAGATACTGAATCGAAGCTAGGAGGCCTGACAGATACTGCCTGTGCACCTCCTGCACTCAGGTGACTATGAGATTGTCACACTTGCCTGGGGTCGAGTAACTTGATACTGGGGACTGGCAGACAAAGGCATGACATTAGCTGAGAAGGACAAAAAAACTCCCTGATATCTGTTTAGAAACCCATCATAGTTTTTTATTCAAATGAATTTGTGTTTATAGAGCCTGTCTTCAGAGTTTATCTTCCTCAGCCTAGAGAGAGGTATGAGACACAAGGAAAACAGAGGCTACCTGGGATAATGTGTACAGCATCCTCCCATTCAACATGAGAGGATGAGCCAATGAGAGTTGAGTCGACTTTGTCTTCCTCAAATGTGATTTTGGTTTTCCTATGTGGCTGGTTGGAGTCATAAGGGCCATGGCTATTTGAACAAGTGATGGCACATTCCTCCAGTGAGTCCTCAGGGACTTCCTTTTCTTCAGCCTTCGGCATCTCCCTGATGAGCCAGGTGGGACAGAGATGACAGAAGATTAAACACAGAGGGATTGGACCCCAGGGAGTCCTAGCTGGTTTTGACAGGCGGCATTAAGAGAGTGGTCCCAGAAAGCAAAATGGAGGTTCCCTTTAAGGGGGAACATGCAATCCTGTTCTCTCTGCAACAGAGCATGGCTGCCATGGGAACCAGAGAGGAAGAGAGCAGCTGGTGTTCATTGCAGTGGACAGATAGGAGCTGAGGAGGATGAAGACTCAGCTATCCCTGTATGGTGCAGACATGACACTCGGCACACATAGAGAAACATGACAGCTGCCGCACCCTGTGTCTAAGCTGGGTTATATTTCACATACTGTGGCCAAGCAAATGCGGGTTTTTGGCCCATCATAGATGCCAGAGAGGGTGTACCTCCTAGATATTCTTCATATGTTACCATCCATTACTTGTTCCTGAGTATTCAGTGTTACCTGGGGGCAGACGATTTCTGCACTTTCTCAGCCACCTCAACTTGAACATCTTCATCGTCATCGTTGTCATTTTCTGTAAATACAGAAGTGTTCGTTCAGATATTTCCCACTTCACAGTCTGCAAGCACAGTCAGCCCAATGTGCAACAGAGACATGAACATCTAGGCATGGGTCACCGTTCAACTGAAAACTCTCATGTTTTATCTTTAACAGAATGCCCTGGCATGGTTTCCTGATCCATCAGGCAATGCATTTCTGATCTGGAGGGCCACCATCAAGATGTGGCCAAATATTGAAAAGACCTTTTGCTTCCCATATCACTGGAGGCTTGTGCAGCCTCTCTCTGGACTTTGGCAGCTGTCGCCCCCATCCTGCCAGATCTGATTCCCAGGCACAGGCTTGGTGTCCTGTCACAGTTTGCATTTCAAACCTAATTCTTTCTCTTAGAAGCAGACAAACTTATCCCACAGTCCTCTATGCATCAGAAGATTTCAAGCCTCCAAGTGGCTTCTGCTGTGTTATTCAGGGACATTCTATCCATGGGGAGTGCTCCAGTCTGAAGCACTTCCTACCACGAAACGCCACCACATAAAGTGCCTTCTCCAACATCACACGGCGAGGGGCTTCATCTCATTTTGGAAAGCAGTTTTAAGTGTTCCCACATTTGAATGCTTCAGACCCTTGCAAGAGACAATTTGCCATGGAGAGAGAGAAACTCAGGAAAGACAAGTCATTCAGTCACTGACAGTTACTAAGAACATTGCCGAAAAGACACCCTGGGAACCTTCATTCTTAGTCCAGAGCTCTTTTCACTCTAACAAGCCTGCTCCTATCGCAGCCTCCTTCCTGTCCTTTAAAACTAGATAGATGCTGCCTCTTACTCCAAAGACAACCTTCCATCAAGGGAGGAGGGACAATTGCAATACTGTGACCTCCAACCCCATGGGTTTCCCAACTCCGTTCTTACCCAGGAAGTCCTGGTCATGTCATGGCCACATAAGCTTAGTGGCAAAAAACACCATTGATACAACTGTCATTGTGAAAGTATGGAGGTCTGGAGTCTCTCATAAGCCTGGGGTTTTGGGTCATCAGGGCCTATGGCCACCTTACCTGGGCTGAGCTTTTGGACAAGGTGCTGTGCCAGTCTACACCCCTCAGCCAGCTGTTCTTGGAGGTCCTGCCCCTGGGACTTGTCTGGCTCATCCGGAGTGAGGAGGGCCTGGAGATGCTGATTCAATGAGCAGGAGGCATCTCTCCCTTCCCGTAACTTCTCCCTTAACTGGGTCAGCTCTCGTTCCTGAGAGTGAACCAGGACTTTATATTGCCTAAGGTGAGACGGTAGAGAAAATTTAAGAGTGGAAAGGGTTGAGTGATCCGCTCAAATATTGCAACAGAGATTTCTGAGACAATGTCCTCAAGGAGACCTCCAAGCAGAAGGTCAGCACATGTTGGAAGGAATGTCTGTGGCTAAGAGAAAGAATAGAAAATGGTTTACAGGTTTCCTCTGTATCAGAGAGGGCTCCTGCAAGATCCTCGATGATGTTCCATTCATCTTTCCCTTCTGTAAACAAAAGTAGGTGTCTTCCTAATTCCATTTCAAAAAGACATCCTTTCAGTCCCTCACTCTGGCCATGGACATTTCCATGTGAAAATACACATAGTGCATCTTGCGGCCACTAGATACAAAGCCATGTACAGAAATGAGGCCAGGTGCAGATGGGGCGAATTGAAAAGATGAAAGAAGAAAAGAATGACAGGGTCGAGAAGGCAACATTGATTGAGTGAAAGAATGAGAAGACGCAGTCAGTCAGAAGGTGATTCTCACTAAGGGTAAGTGGGGTGGTGATGGCACACCATTTTGAGTATACTGAATGCTGCTGTGTGGTTCACACTCCTTTGGTTAATTTTGTGTTATGTAAATTTCACATCAACAATTACTTGTTTGAAAAAGAGAAAACAAGGCTCTGAGAAACAACTGCAACCCATAAATTTTTATTATCCTTCTTCTCTGCTTGATAAATACTTGTGTGTTGCGAGCCTGCCATGGCAATTCCTGCCCTTCCCCTGGCCCAGCTTAGTTCTTAAGTCTCCCCACTGAGCTGCTGTACTTCAGAGATTTACACACCTGCCCCCCTGCCTGCCCCCATGGGGTCCCCTCACCTGAGCTCCTCAGCTTGCTTGAGCTGCTCTGCAAGCTTCTCCTCCTTGAACTGTCGCTCATTCCTCAGCATAGATTTTATGAGGTCTTTGCACTCTTCATTTTCTGAGAAAAGACAGACACGCCTGCCTCAGTGGAAGGCTGGACATGCTGCTGTGGTCATTGCCTACAGGGCAGGAGCCAGGTCCATCCCAAGGACAAAACTCTCCCCAGTACCAGGGTCTAGACAGGGATTTCCACATCTTTACTCTTCAGTCTCCTGACTTTCTGGCATCTGATCCTCCAAAATTTAGAGATGAAGAGAACCTCAATGGCACATCAAGGAAGTTGACAAGATGATTCAACCACAACGAAGTGGAGTCAGAATTCACAGCCCCTGAGGTCTGACTCTGAATGCAGGGCCACTTTCCCAAGACTTGCAGCCTCTCCTCTAAAACACTGCACTGGGGCATGAAGTAGTGATTTCTTGTACAGTCGGGAAGGCCCCTAGGACTATGGGACTGATGGTTTCCCTTTTACTGGGAATTTCAAGGACAAGTATGCAAAAGATTTTAAAAATCTTTGATTTTTAAATCATATCTTCAGTTATGATTTTAAGAATCATATCTGAAGCATAAAGTGTGACACATAACACCATAAGGCCATGAAGGAAATATGCCCAAATGTTAATAAAGTTTGTGTTAATTTAGAAACAGCAGAATGAAGAACTAATAGATAGTGTTTACTGTGTGCTAATAAATGTTCTAGGAGATTGACAAGAAATAGCTCATGTAATTCACTGCAGCAATTTACAGAGGTAGGTATTATTGTAGTACCCTCTGAACAGGTGAGGAAACAGGGACAGAAAAGACAAGCAACTTGGATGGAGCCCAGGAGACAGGCCCACGGTCTCTGCTCTGTACACTGCACTGCTATCTCCACACATTCTCGGGTGCGATCTTTCTTCCTCTTTAGGAACAAGACTCTGTGCCCCAGGAAGCAGGACTTCACTCTCACCAAGCTACATTCTGCTTCTTATTCTTATTTTTATTTATCATTATTAGTATTATTTTTTTAACAGTCTTGCCCTGTCGCCCAGGCTGGAGTGCAATGGCAAAATCTTGGCTCACTGCAACCTCAGCCTCCTGGGTTCAAAGGATTCTCCTGCCTCAGCCTCCTGAGCAGGGGTGATTACAGTCACCTGCCACCATGCCCATCTACTTTTTGTATTTTTAGTGGAGATGGGGTTTCTCCATGTTTCCCAGGCTGGTCTCAAACTCCTGACCTCGTGCTCTACCCGCCTCAGCCTCCCAAAGGGCTGGGATTACAGGAGTGAGCCACCATGCACAGCCCCTACTCCCTGCTCTTGATGCTGTCACTTATAGATAGCACAGGTTCTATTAGGAGCAGACTCCTCTTGAAGCCCCTCAGAGCAGGTACTGGCTACTATCACCAAGTTTCCCTCAGAGTCACTAGAACAGAGCTTTGCATATTGGGCCTCAACAGAAACTTGAACTGAATAAAAGTTCACTAGTCTCAGACATTTAGAACAACAGACTAGATGTTATTTGTCTGCAGGATCTTACATGGTACAGAGAGGATTCTTGAAAACATGATTGAGCCTCTTGGAGAAAACAGGTCATTCTGTGCCTGTGTCAGAAATCAATAAATGGCAGTTTAACTCTAGTCCCACCCCCACCTGATTGCAAACATGGAAAGTTGCTAAATATTTTGGGACCTCTGTCTTCCAACTTTAACAAAATGTTAAAATACCCATTTCTGTTTTCCTAGAAGTATGGGGAGGATGACATTATTTTAGATGGAGAGAGCACTTAGTTTCTCAGAGAGAAGACAGGACTTCGTTCATCACTTTCGTGATGGTGAGCCTATAGATCTTACTGTATTTGTTCTGCTGGTTGGCCAGGAAGCAGGCCAGTTGAGTTACAAAACATTTCTCTTTGAGGTTTCTGAACTGCTGTTTCTTCTCTGCCAGCTGGGGATGCAATTTCTCGTTGATTTCTAGAATGTTCATCTCTGCCTTCTCGCTGGACAAAGGGCCGGCTGATACCACCATGCTGACGTTTGTGGCAGAAGAGGTGGGGCCAGGGACTGGGGAGAAGAAAGGCAAACACATGATGGGTTAAAAACTGGTGAAATCAAATAGGTTTAATCACACTGAGGGATGTCAGCGGCAGCCTTGTCTACTTATTTGAAGATGATGTTTCCCTGGTTTCACTCTTGTCATCTCCAGTCTTGATCTCCTTTAAGTCAACTTATCTTAGCTATGCAGTCACCTTGAAACCAGGACATAAACACTTCTACACTTTTCTTGCTTATAAGTTTCTATAAAGCAAGGCTTGGCCCTGAGATTTTTACCCCATGAGTGGCCAATGTTTCTGTGTAGCACAAAAGGTTTCATTTTGCCTTTTTAATTTTTTTCTTTTTTGGTTTTTTGTTTTTTGTTTGAGACGGAGTCTCACTCTGTCACGCAGGCTGCAGTGCAGAGGCACAATCTCAGCTCACTGCCACCTCTGCCTCCCGGGTTCAAGCGATTCTCATCCCTCAGCCTGCCAAACATCTGGGATTACAAGCGCCAAGTAACATGCCAGCTAATTTTTGTATTTTTAGTAGAGATGGGGTTTCGCCATCTTGGACAGGCTGGTTTCGAACTCCTGACCTCAGGTGTTCCGCCCACCTTGGCCTCCCAAAGTGCTGGGATTAAGATGTGAGCCAGCACCCCCGGTCAGAGACTTTTTTTTTTTTTTTTGAGATGGAGTCTCGCTCTGTCTCCCAGGCTGGAGTGCAGTGGCACAATCTAGGCTCACTGCAAACTCCGGTTCCTGGGTTCATGCCATTCTCCTGCCACAGCCTCCCGAGTAGCTGGGACTACAGGTGCCCAACACCGTGCCCAGCTAATTTTTTTTTTTTGTATTTTTAGTAACGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCACCCGCCCCAGCCTCCGAAAGTTCTGGGATTACATGTGTGAGCCACCGCGCCCGGCCGAGACTTCTTATTAATAGCTAAGACAAGCCAATGAAAAGGAGAGAGAGTCTAGCCTGAGAGGAGTGAACCAGGGTGGGAGGATCGTCTCAGCCGATCCTCCCACCTAAGTCTCCTGAGCAGTTGGGACTAGAGGCACGCAGCACCATGCCTGCCTAATTTTTTGTATTCTTTGTAAAGATGGGTTTCACCATATTGTCCAGGCTGGTCTTCAACTCCTGAACTCAAGTCATCCTCCCACTTGGGCCTTCCAAAGTGCTGTGATTATATGTGTGAGTCACAGAACCTAGCTCCATCCTAGTTTCTGACTAAAACAATATGTGCGTATACAGCCTGTCCTCAGAATTGATCTTCCATAGCCTAGACAGAGGTATGAGACACAAGGAAAATAGAGGCTACCTGGGAGAATGTTTACAGCATCCTGACATTCATCATGAGAGGATTCTCTGTCTACAACCAGAGCTGAGTTGACTTTGTCTTCCTCAAAGGTGATGTTGATGTTCTTGTGAGGCTGGTTGGAGTCACAAGGGCCGTGGCTATTTGAACAAGTGATGGCACATTCCTCCAGTGAGTCCTCAGGGACTTTGCTTTCTTCAGCCTTCTGCACCTCCCTGATGAGCCAGGTGGGACAGAGATGACAGAAGATTAAACACAGAGGGATTGGACCCCAGGGAGTCCTAGCTGGTTTTGACAGGCGGCATTAAGACAGTGGTCCCAGAAAGCAAAATGGAGGTTCCCTTTAAGGGGGAACAGGCAATCCTCTTCTCTCTGCAACAGAGCATGGCTGCCATGGGAGCCAGAGAGGAAGAGAGCAGCTGGTGTTCAGTGCACTGGACAGATAGGAGCTGAGGAGGATGAAGACTCAGCTATCCCTGTATGGTACAGACATGACACTTGGCACACATAGAGAAACACGACAGCTGCCACACCCTGTGTCTAAGCTGGGTTGAATTTCACATACTGTGGCCAAGCGAATGCGGGCTTTTGGCCCATCATAGATGCCAGAGAGGGTGTGCCTCCTAGACATTTTCATATGTTACCACCCATTACTTGCTCCTGAGTATTCAGTGTTACCTGGGGGCAGATGATTCCAGTACTTTCTCAGCCTCCTCAACTTGAACATCTTCATCCTCATCTTCGTCATTTTCTGTAAATACAAAATGTTCGTTCAGATATTTCCCACTTCCCATTCTCCAAGCACAGTCAGCCCAATGTGCACAGAGACATGAACATCTATGTGTGGTTCAGCATTGTACTGAAAACTGTCATGTTTTATCTTTCACAAAATGCCCTGGCATGGTTTCCTGGTCCATCGGGCAATGCATTTCTGATCTGGAGGGCCACCATCAAGATGTGGCCAAATATTGAAAAGACCTTTTGCTTCCCATATCACTGGAGGCTTGTGCAGCCTCTCTCTGGACTTTGGCAGCTGTCTCCCCCATCCTGCCACAGATCTGATTCCCAGGAACAGGTTTGGTGTCCTGTCACAGTTCGCATTTCAAACCTCATTCTTTCTCTTAGGAGAGGACAAACTTGTCCCACAGTCCTCTATGTGTCATGAGACTGCACAGGCCCTCCATGTGGCTTCTGCTGTGTTATTCAGGGACATTCTATCCATGGGGAGTGCTCCAGTCTGAAGCACTTCCTACCACCAAATGCCCCCACATCAAGTGCCTTCTCCAACACCAAACGGAGAGGGGCTGCATCTCATTTTAAAAAGCATTCGTAAGTGTTCCCATATTTGGATGCTTCAGACCCTTGCAAGAGACAATTTGTTTGCCTTTGCAGATGGAGAGAGAGAAACTCTGGAAAGATAAATCACTCACTCACCGACAGTTACTAAGAACATTGCCAAAAAGACAGCCTGGGAACCTTCATTCTTAGCCCAGAGCTCTTTTCACTCCAACAAGCGCCCTCCCATCACAGCCTCCTTCCTGTCCTTTAAAACTAGACAGATGCTGCCTCTTGCTCCAAAGACCACCTTCCATCAAGGAAGGAGGGACACTTGCAATACTGTGACCTCCAACCCCATGGGTTTCCCATCTCTGTTCTTACCCAGGAAGTCCTGGTCATGTCATGGCCACATATGTGTAGCAGAAAATAACCCCACTGATACAACTGTCATTGTGAAAGTATGGAGGTCTGGAGCCTCTCATAAGCCTGGGGTTTTGGGTCATCAGGGCCTATGGCCACCTTACCTGGGCTGAGCTTCTGGAAAAGTTGCTGTGCCAGTCTACACCCCTCAGCCAGCTGTTCTTGGAGGTCCTGCCCCTGGGACTTGTCTGGCTTATCCGGAGTGAGGAGGGCCTGGAGATGCTGATTCAATGAGCGGGAGGCATCTCTCCCTTCCCGTAACTTCTCCCTTAACTGGGTCAGCTCTCGTTCCTGAGAGTGAACCAGGACTTTATATTGCCTAAGGTGAGACGGTAGAGAAAATTTAAGAGTGGAAAGGGTTGAGTGATCCGTTCAAATATTGCAACAGAGATTTCTGAGACAATGTCCTCAAGGAGACCTCCAAGCAGAAGGTCAGCACATGTTGAAAGGAATGACTGTGGCCAAGAGAAATAATAGAAAATGGTTTACAGGCTTCCTCTGTATCAGAGAGGGCTCCTGCAAGATCCTCGATGATGTTCCATTCATCTTTCCCTTCTGTAAACAAAAGTAGGTGTCTTCCTAATTCCGTTTCAAAAAGACATCCTTTCAGTTCCTCACTCTGGCCATGGACATTTCCATGTGAAAATACACATAGTGCAACTTGCAGCCACTAGATACAAAGCCATGTACAGAAATGAGGCCAGGTGCAGATGGGGCGAATTGAAAAGACGAAAGAAGAAAAGAATGACAGGGTCAAGAAGGCAACATTGATTGAGTGAAAGAATGAGAAGACGCAGTCAGTCAGAAGGTGGTTCTCACTAAGGGTAAGTGGGGTGGTGATGGCACACCATTTTGAGTATACTGAGTGCTGCTGTGTGGTTCACACTCCTTTGGTTAATTTTGTGTTATGTAAATTTCACATCAACAATTACTTGTTTGAAAAAGAGAAAACAAGGCTCTAAGAAACAACTGCAACCCATAAATTTTTATTATCCTTCTTCTCTGCTTGATAAATACTTGTGTGTTGCGAGCCTGCCATGGCAATTCCTGCCCTTCCCCTGGCCCAGCTTAGCTCTTACGTCTCCCCACCGAGCTGCTGTACTTCAGAGATTTACACAGCTGCTCCCCCACCTGCCCCCATGGGGTCCCCTCACCTGAGCTCCTCAGCTTGCTTGAGCTGCTCTGCAAGCTTCTCCTCCTTGAACTGTCGCTCATTCCTCAGCATAGATTTTATGAGGTCTTTGCACTCTTCATATTCTGAGAAAAGACAGACACGCCTGCCTCAGTGGAAGGCTGGACATGCTGCTGTGGTCATTGCCTACAGGGCAGGAGCCAGGTCCATCCCAAGGACAAAACTCTCCCCAGTACCAGGGTCTAGACAGGGATTTCCACATCTTTACTCTTCAGTCTCCTGACTTTCTGGCATCTGATCCTCCAAAATTTAGAGATGAAGAAAGGGAACCTCAAGGGCACATCAAGGAAGTTGACAAGATGATTCAACCACAACGAAGTGGAGTCAGAATTCACAGCCCCTGAGGTCTGACTCTGAATGCAGGGCCACTTTCCCAAGACTTGCAGCCTCTCCTCTAAAACACTGCACTGGGGCATGAAGTAGTGATTTCTTGTACAGTCGGGAAGGCCCCTAGGACTATGGGACTGACGGTTTCCCTTTTACTGGGAATTTCAAAGACAAGTATGCGAAAGATTTTAAAAATCTTTGATTTTTAAATCATATCTTCAGTTATGATTTTAAGAATCATATCTGAAGCATAAAGTGTGACACATAACACCATAAGGTCATGAAGGAAATATGCCCAAATGCTAATAAAGTTTGTGTTAATTTAGAAACAGCAGAATGAAGAACTAATAGATAGTGTTTACTGTGTGCCAATAAATGTTCTAGGAGATTGACAAGAAATAGCTCATGTAATTCACTGCAGCAATTTACAGAGGTAGGTATTATTGTAGTACCCTCTGAACAGGTGAGGAAACTGAGGGACAGACAAGACAAGCAACTTGGATGGAGCCCAGGAGACAGGCCCACGGTCTCTGCTCTGTACACTGCACTGCTACCTCCACACATTCTCAGGTGCGATCTTTCTTCCTCTTTAGGAACAAGACTCTGTGCCCCAGGAAGCAGGACTTCACTCTCACCAAGCTACACTCTGCTTCTTATTCTTATTTTTATTTATCATTATTATTATTATTATTATTATTATTATTTTTACCAGTCTTGCCCTGTCACCCAGAGTGGAGTGCAATGGCAAAATCTTGGCTCACTGCAACCTCAGCCTCCTGGGTTCAAAGGATTCTCCTGCCTCAGCCTCCTGAGCAGGGGTGATTACAGTCACCTGCCACCATGCCCATCTACTTTTTGTATTTTTAGTGGAGATGGGGTTTCTCCATGTTGCCCAGGCTGGTCTCAAACTCCTGACCTTGTGATCTGCCCGCCTCAGCCTCCCAAAGGGCTGGGATTACAGGAGTGAGCCACCATGCACAGCCCCTACTCCCTGCTCTTGATGCTGTCACTTATAGATAGCACAGGTTCTATTAGGAGCAGACTCCTCTTGAAGCCCCTCAGAGCGGGTACTGGCTACTATCACCAAGTTTCCCTCAGAGTCACTAGAACAGAGCTGTGCCTGTTGGGCCTCAACAGAAACTTGAACTGAATAAAAGTTCACTAGTCTCAGACATTTAGAACAACAGACTAGATGTTATTTGTCTGCAGGATCTTACATGGTACAGAGAGGATTCTTGGAAACATGATTGAGCCTCTTGGAGAAAACAGGTCATTCTGTGCCTGTGTCAGAAATCAATAAATGGCAGTTTAACTCTAGTCCCACCCCCACCTGATTGCAAACATGGAAAGTTGCTAAATACTTTGGTACCTCTCTCTTCCAACTTTAACAAAATGTTAAAATACCCATTTCTGTTTTCCTAGAAGTATGGGGAGGATGACATTATTTTAGATGGAGAGAGCACTTAGTTTCTCAGAGAGAAGACAGGACTTCGTTCATCACTTTCGTGATGGTGAGCCTATAGATCTTACTGTATTTGTTCTGCTGGTTGGCCAGGAAGCAGGCCAGTTGAGTTACAAAACATTTCTCTTTGAGGTTTCTGAACTGCTGTTTCTTCTCTGCCAGCTGGGGATGCAATTTCTCGTTGATTTCTAGAATGTTCATCTCTGCCTTCTCGCTGGACAAAGGGCCGGCTGATACCACCATGCTGACGTTTGTGGCAGAAGAGGTGGGGCCAGGGACTGGGGAGAAGAAAGGCAAACACATGATGGGTTAAAAACTGGTGAAATCAAATAGGTTTAATCACACTGAGGGATGTCAGTGGCAGCCTTGTCTACTTATTTGAAAATGTTGTTTCCCTGGTTTCACTCTTGTCATCTCCAGTCTTGATCTCCTTTAAGTCAACTTGTCTTAGCTATGCAGTCACCTTGAAACCAGGACATAAACACTTCTACACTTTTCTTGCTTATAAGTTTCTATAAAGCAAGGCTGGGCCCTGAGATTTTTACCCCATGAGTGGCCAATGTTTCTGTGTAGCACAAAAGATTGCATTTTCCTTTTTCGATATTTTTCTCTTTTGGTTTTTTGTTTTTTGTTTGAGACGGAGTCTCACTCTGTCACGCAGGCTGCAGTGCAGTGGCGCAATCTCAGCTCACTGCCACCTCTGCCTCCCGGGTTCAAGTGATTCTCATCCCTCAGCCTGCCAAACATCTGGGATTACAAGCGCCAAGTAACATGCCAGCTAATTTTTGCATTTTTAGTAGAGATTGGGTTTCGCCATCTTGGACAGGCTGGTTTCGAACTCCTGACCTCAGGTGTTCCGCCCACCTTGGCCTCCCAAAGTGCTGGGATTAAGATGTGAGCCAGCACCCCCGGTCAGAGACTTTTTTTTTTTTTTTTTTTGAGATGGAGTCTCGCTCTGTCTCCCAGGCTGGAGTGCAGTGGCACAATCTAGGCTCACTGCAAGCTCCGGTTCCTGGGTTCATGCCATTCTCCTGCCACAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCAACACCGTGCCCAGCTAATTTTTTTTTTTTGTATTTTTAGTAACGACGGGGTTTCACCGTGTTAGCCAGGATGGTCTCGATCTCCTGACCTCGTGATCCACCCGCCCCGGCCTCCCAAAGTGCTGGGATTACATGTGTGAGCCACCGCGCCCGGCCGAGACTTCTTATTAATAGCTAAGACAAGCCAATGAAAAGGAGAGAGAGTCTAGCCTGAGAGGAGTGAACCAGGGTGGGAGGATCGTCTCAGCCGATCCTCCCACCTAAGTCTCCTGAGCAGTTGGGACTAGAGGCACGCAGCACCATGCCTGCCTAATTTTTTGTATTCTTTGTAAAGATGGGTTTCACCATATTGTCCAGGCTGGTCTTCAACTCCTGAACTCAAGTCATCCTCCCACTTGGGCCTTCCAAAGTGCTGTGATTATATGTGTGAGTCACAGAACCTAGCTCCATCCTAGTTTCTGACTAAAAGAATAACAATATGTGTATATACAGCCTGTCCTCAGAATTGATCTTCCATAGCCTAGACAGAGGTATGAGACACAAGGAAAATAGAGGCTACCTGGGAGAATGTTTACAGCATCCTGACATTCATCATGAGAGGATTCTCTGTCTACAACCAGAGTTGAGTTGACTTTGTCTTCCTCAAAGGTGATGTTGATGTTCTTGTGAGGCTGGTTGGAGTCACAAGGGCTGTGGCTATTTGAACAAGTGATGGCACATTCCTCCAGTGAGTCCTCAGGGACTTTGCTTTCTTCAGCCTTCTGCACCTCCCTGATGAGCCAGGTGGGACAGAGATGACAGAAGATTAAACACAGAGGGATTGGACCCCAGGGAGTCCTAGCTGGTTTTGACAGGCGGCATTAAGACAGTGGTCCCAGAAAGCAAAATGGAGGTTCCCTTTAAGGGGGAACAGGCAATCCTCTTCTCTCTGCAACAGAGCATGGCTGCCATGGGAGCCAGAGAGGAAGAGAGCAGCTGGTGTTCAGTGCACTGGACAGATAGGAGCTGAGGAGGATGAAGACTCAGCTATCCCTGTATGGTACAGACATGACACTTGGCACACATAGAGAAACACGACAGCTGCCGCACCCTGTGTCTAAGCTGGGTTGAATTTCACATACTGTGGCCAAGCGAATGCGGGCTTTTGGCCCATCATAGATGCCAGAGAGGGTGTGCCTCCTACACATTTTCATATGTTACCACCCATTACTTGCTCCCGAGTATTCAGTGTTACCTGGGGGCAGATGATTCCAGTACTTTCTCAGCCTCCTCAACTTGAACATCTTCATCCTCATCTTCGTCATTTTCTGTAAATACAAAATGTTCGTTCAGATATTTCCCACTTCACATTCTGCAAGCACAGTCAGCCCAATGTGCACAGAGACATGAACATCTATGTATGGTTCAGCACTGTACTGAAAACTGTCATATTTTATCTTTCACAAAATGCCCTGGCATGGTTTCCTGGTCCATCGGGCAATGCATTTCTGATCTGGAGGGCCACCATCAAGATGTGGTCAAATATTGAAAAGACCTTTTGCTTCCCATATCACTGGAGGCTTGTGCAGCCTCTCTCTGGACTTTGGCAGCTGTCGCCCCCATCCTGCCACATATCTGATTCCCAGGAACAGGCTTGGTGTCCTGTCACAGTTCGCATTTCAAACCTCATTCTTTCTCTTAGGAGAGGACAAACTTGTCCCACAGTCCTCTATGCATCATGAGACTGCACAGGCCCTCCATGTGGCTTCTGCTGTGTTATTCAGGGACATTCTATCCACGGGGAGTGCTCCAGTCTGAAGCACTTCCTACCACCAAATGCCCCTACACCAAGTGCCTTCTCCAACACCAAACGGAGAGGGGCTTCATCTCATTTTAAAAAGCATTCGTAAGTGTTCCCATATTTGGATGCTTCAGACCCTTGCAAGAGACAATTTGTTTGCCTTTGCAGATGGAGAGAGAGAAACTCTGGAAAGATAAATCACTCACTCACCGACAGTTACTAAGAACATTGTCAAAAAGACAGCCTGGGAACCTTCATTCTTAGCCCAGAGCTCTTTTCACTCCAACAAGCGCCCTCCCATCACAGCCTCCTTCCTGTCCTTTAAAACTAGACAGATGCTGCCTCTTGCTCCAAAGACCACCTTCCATCAAGGAAGGAGGGACACTTGCAATACTGTGACCTCCAACCCCATGGGTTTCCCATCTCTGTTCTTACCCAGGAAGTCCTGGTCATGTCATGGCCACATATGTATAGCAGAAAAAAACCCCACTGATACAACTGTCATTGTGAAAGTATGGAGGTCTGGAGCCTCTCATAAGCCTGGGGTTTTGGGTCATCAGGGCCTATGGCCACCTTACCTGGGCTGAGCTTCTGGAAAAGTTGCTGTGCCAGTCTACACCCCTCAGCCAGCTGTTCTTGGAGGTCCTGCCCCTGGGACTTGTCTGGCTTATCCGGAGTGAGGAGGGCCTGGAGATGCTGATTCAATGAGCGGGAGGCATCTCTCCCTTCCCGTAACTTCTCCCTTAACTGGGTCAGCTCTCGTTCCTGAGAGTGAACCAGGACTTTATATTGCCTAAGGTGAGACGGTAGAGAAAATTTAAGAGTGGAAAGGGTTGAGTGATCCGTTCAAATATTGCAACAGAGATTTCTGAGACAATGTCCTCAAGGAGACCTCCAAGCAGAAGGTCAGCACATGTTGAAAGGAATGACTGTGGCCAAGAGAAAGAATAGAAAATGGTTTACAGGCTTCCTCTGTATCAGAGAGGGCTCCTGCAAGATCCTCGATGATGTTCCATTCATCTTTCCCTTCTGTAAACAAAAGTAGGTGTCTTCCTAATTCCGTTTCAAAAAGACATCCTTTCAGTTCCTCACTCTGGCCATGGACATTTCCATGTGAAAATACACATAGTGCATCTTGCGGCCACTAGATACAAAGCCATGTACAGAAATGAGGCCAGGTGCAGATGGGGCGAATTGAAAAGACGAAAGAAGAAAAGAATGACAGGGTCGAGAAGGCAACATTGATTGAGTGAAAGAATGAGAAGACGCAGTCAGTCAGAAGGTGATTCTCACTAAGGGTAAGTGGGGTGGCGATGGCACACCATTTTGAGTATACTGAATGCTGCTGTGTGGTTCACACTCCTTTGGTTAATTTTGTGTTATGTAAATTTCACATCAACAATTACTTGTTTGAAAAAGAGAAAACAAGGCTCTAAGAAACAACTGCAACCCATAAATTTTTATTATCCTTCTTCTCTGCTTGATAAATACTTGTGTGTTGCGAGCCTGCCATGGCAATTCCTGCCCTTCCCCTGGCCCAGCTTAGCTCTTACGTCTCCCCACCGAGCTGCTGTACTTCAGAGATTTACACAGCTGCTCCCCTGCCTGCCCCCATGGGGTCCCCTCACCTGAGCTCCTCAGCTTGCTTGAGCTGCTCTGCAAGCTTCTCCTCCTTGAACTGTCGCTCATTCCTCAGCATAAATTTTATGAGGTCTTTACACTCTTCATACTCTGAGAAAAGACAGACACGCCTGCCTCAGTGGAAGGCTGGACATGCTGCTGTGGTCACTGCCTACAGGGCAGAAGCCAGGTCCATCCCAAGGACAAAACTGTCCCCCGTACCAGGCTCTAGGCAGGGATTTCCACATCTTTACTCTTCAGTCTCCTGACTTTCTGGCATCTTATCCTCCAAAATTTAAAGACGAAGAAAGAGAAATTCAAGGCACATCAAGGAAGTTGACAAGATGATTCAACCACAACGAAGTGGAGTCAGAACTCACAGCCCCTGAGGTCTGACTCTGAATGCGGGGCCACTTTCCCAAGACTTGCAGCCTCTCCTCTGAAACACTGCACTGGGGCATGAAGTAGTGATTTCTTGTACAGTTGGGAAGGCCCCTAGGACTATGGGACTGACGGTTTCCCTTTTACTGGGAATTTCAAAGACAAGTATGCGAAAGATTTTAAAAATCTTTGATTTTTAAATCATATCTTCAGTTATGATTTTAAGAATCATATCTGAAGCATAAAGTGTGACACATAACACCATAAGGCCATGAAGGAAATATGCCCAAATATTTTATTAGTATGACAGGCAGCATCAAGATTTAGATTAGTTGTGTTAATTTAGAAACAGCATAAGATTAGTTTGTGTTAATTTAGAAACATCAGAATGAAGAACTAATAGATAGTGTTTACACTGTGCCAATTAATGTTCAAGGAGATTGACAGGAAATACCTCATGTAATTCATTGCAGCAATTTACAGAGGTAGGTATTATTGTAGTACCCTCTGAACAGATGAGGAAACTGAGGGACAGACAAGACAAGCAACTTGGATGGAGCCCAGGAGACAGGCTGAGGGTCCCTGCTTTGCACACTGCACTGCTGCTTCCACACATTCTCGGGTGTGATCTTTCTTCCTCTTTAGGAACAAGAGCCTGTGCACCAGGAAGCAGGACTTCACTCTCACCAAGGTACTCTCTGCTTTTTATTTTTATTTTTGTTTTATTTATCTTTTTGTTTGTTTGTTTTTTGATGAGTCTTGCCCTGTCACCCATGCTGGAGTGCAATAGTGCAATCTTGGCTCACTGCAACATCTGCCTGCTGGGTTCAAAGGATTCTTCTGCCTCAGCCTCCCGATTAGTGGTGATTACAGTTGCCCGCCACGACGCCCATCTACTTTTTGTATTTTTAGTGGAGATGGGGTTTCTCCATGTTGCCCAGGCTAGTCTCAAACTCCTCACCTCGTGCTCTGCCCGCCTCAGCCTCCCAAAGTGCTGAGATTACAGGAGTGAGCCACGTTGCACGGCCCCTACTCCCTGCTCTTGATGCTGTCACTTATAGATAGCACAGGTTCTATTAGGAGCAGACTCCTCTTGAAGCCCCTCAGAGCAGGTACTGGCTACTATCACCAAGTTTCCCTCAGAGTCACTAGAACAGAGCCTTGCCTGTTGGGCCTCAACAGAAACTTGAACTGAATAAAAGTTCACTAGTCTCAGACATTTAGAACAACAGACTAGATGTTATTTGTCTGCAGGATCTTATATGGTACAGAGAGGATTCTTGAAAACATGATTGAGCCTCTTGGAGAAAACAGGTCGTTCTGTGTCTGTGTTAGAAATCAATAACTGTGAGTTTAACTCTAGTCCCACCCCCACCTGATTGCAAACATGGAAAGTTGCTAAATACTTTGGTACCTCTGTCTTCCAACTTTAACAAAATGTTAAAATACCCATTTCTGTTTTCCTAGAAGTACAGGAAGGATGAAATTATTTTTGATGGAGAGAGCATTTAGTGTCTCAGAGAGAAGACAGGACATCATTCATCACTTTCATGATGGTGAGCCTATAGATCTTACTGTATTTCTTCTGTCGGTTGGCCAGGAAGCCGGCCAGTTGAGTTACAAAACATTTCTCTTTGAGGTTTCTGAACTGCTGTTTGTTCTCTGCCAGCTGGGGGCGCAATTTCTCGTTGATTTCTAAAATGTTCGTCTCTGCCTTCTCGCTGGACCAAGGGCCAGCTGATACCACCATGCTGACGTTTGTGGCAGAAGAGGTGGAGCCAGGGACTGGGGAGAAGAAACCCAAACATATGATGGGTTAAAAACTGGTGAAATCAAATAGGTTTAATCAGGACTGAGGGATGTCAGTAACTGAAATTCTTAACTTACTGTTGTGAAAAATGTGATCACTCCCCACAGCACTTTAGGATCCTTCACCACAAAAACAAGGTTCGAGGTGCCTGAACTCAGAGCTGAAAGCACTGCCAGTAGCTCAGACTCTGATAAGAGTGAGGTAGACTGTGGCCAGCGTGCCAGGTAACCGTCTGCAGTTGCAATAACAGAATTAGAAGGTGGGGGTGTCATGGAATCTTAGGAGCCCTGCATTCCAATTGCCCAGGCTTTCCTGAAACACAGGCACCCTAGTCTCACCTGAGGGTCACCACCAATGGGGATCATTCCTTCAGCATTCACTCTCAGTATTCGTGTACCCTTGTGATGATGCCACAGACCCGTGTCTTTCCCAATACATCTAAGCATATTCCTCACTGTTTATCTCTTGTCTGTACAACATCATCAAGGCAGAAACAGTTTCCCAACAGGTTGTATTTTCTTAATGGTAGTCATGAAGTCACCCCACCTGCTCTCAGTTAAAACAGAGCTTAAGGCCTTTCCACAGGTGTAAGATATCAAACTTTTAGCCTGCCCTGATTTCCTCTGGGTCTTCTGCAGTTTTGTCTGTATCCACTAGAAAGTGAATGAATAATTCATTTGTAAAAAATGTTGTCTTTCCTGTCTCAGTATTCTTCTTGCTGTTTCCCATTGTTATGTTGATTTCTTTTTTCTCACTGGGGCACCATCTTTGCTTTTCATTACACTCTAGACCAGTTTGACATCCCTATGTCCAGAGCTCTTCCTCTATGTGGGTTGATTTGGTTTTTGATGTCACTGAGCGCTACATTTTATACTTGTCACTTATGGATGTCATTCTAGTGTCACAAGAGCTCTTTTCAAGGTATCAAGTGATCAAAATCATTTATATAGAGATCTCCTGAAAACATGTGTGACCATCTATCTTGGGAAGTTTCATAAACCTGATGCTATTTTGTTGTTTCCATTTTGTTTTCCCATATACTGAAAAGAACAGGGCCATGAGCGGTTCTTATGCAATATGGTTTGATATATATTTTGTTGAGATGACCTAACACCATTGATTTTGGGTTGCATTCCACTAACAGAACATGGCAAGATCAAGGTTATGGTCACGGTTGGTTGGTGATCCTCAGTGTTGCAGTAGAAGGTGAGTTTGAGATGAGAGGAATGAGTAGGAAAGAGTGATCCCCTGAACCACCTCCTCGCTTTCTCAGCTTTCACCCCACCTAGGTTTTGTGAGCCTGGAACTTGGGAGACTGTTCTGTAGCCCAGGTCTCCTAAGATTGGCTGCTGGACTTGCCTGAGTTGAGGGTGCGGTGGGTTGACCCTGGGCTGCCCAGCATTCATGTGGTAGTGAAGGAAGGAGGACTGGATCAATCCCATTTCAAAGCATGTCTCTCTGCACTCCACACTGTCCTCCAATGACACTGTAAGGAAACCGCTTTAAGACGTATCAACGGCTTTAAGTAAATGTATTTTCTGGCATCTGGGAGACCTGACATTCTGTGTCATAATGAAAATCTGTCATGTTTCTTTATTTTAAAAATGATAAAACTGCAGGTTCACAGAGTTACATGGCTTACTTGAGGTCACACGGGGATGAGTTTTCAGCACTGCCAATAAAAGCAATCACATGAATTATTCAGTAATTATTCATAGGATCCATATAATTCAGTAAATATTCACATAATTATTTACTAGTTGTTCATTGACCAATTCGTACAAGGCATTTTGCTCAAAACTGTGTTTATATTTGGACATTGTATCTTCATCATAATCCTGTGGTAATGCTGTTATCCGTAAGTAACAGGTAAGAAACCTGAAGAGGAGGGATAGCAAATCATGTATTTGGACATATTTCCTTTTTTTTTTTTTGGTTTTTGTGATGCTGGAAGAATGACCAGAATGAGTCATAGGAAGAGTATACATTCCTGTAGTATTTTCCAGGACAGAGGTGTGACCTCCTAGAGTACTGGGACCAAAATTCCCAAGTGTCTGCAACCTTGCTTTAACAGTATGGGAGATCACCTCTATCACCTGGAATTCCCCTGGAACTCTGGAATATACAAGAGAAGTATGAGACTTGGGTCTTCCCTTGGCTGTGTTTAATTCACTCTTCTATGGAATACCAATGATTCTCACTAAGACTGGCCTTTTCATAAGCACAATGTGCATTTTATGGAGAAGATTTTACACTTTGCTCTATTTAGAAAGAATAAATATGAGCAGTGGTTTAGGTTTTATGCCCTGGACTTAATATGTTTCTGATTCCTGTTTTGAGATTAAATTCTCATGTAAATAGAAAAATACTTATTATTTCTCATAAGGCCAAGTTTGTTATTAGTTTGAGTTTTTGAAGATGAAGCACAAACTTTTGATTTTATCTTTGTCTGTCTCTGTCAGCGCCACTCGTTGTCTCTCAGTATGACCTGGACTTGCCCCTGCACTTACCCTTGTCCTGCTGAACCATCTCCATGCACTGTCCAATTCCATCAGTGATTCGGGCTCCTTCCAAGGCTCCCTGAAAAGGGCACAGAGATCAGGACATTAGGCACATTCCGGACACAAAGGCAACCCATACTGTAGAGTGGGCAGCTGTGTTTCCACTTCCCTAATATTCCAGTGATGTCCTCAAACTGAAAGGAACACTTTCCCTTTTTAGGGGTCTGTTCTTCATGTCTCAGTGCCTCTGATCTAGTCAACACAACTGTCCTGAATGTGAAAGAACTTGCTAAATTTCTAGTTTCTTGTTAGGTGGCTAAAATAGATTTATAAGACTTCCTTACTTACCCATGACTGCTGAAGTTTGAATTCTTAGCAGTACGATTCGTTTTCTTGTAAGGTGAGCAGCTTAGGAAAGATTGGCCATCTTCCTGTGCAAAAAGAGGCAAACTTAATTTCTACTCAAAGCATGCTTGAATTTGGAATCAGGGCTTCCACTCTTCCGAAGTTGGAGTGTCACTGCGACAGGCATGTGTCCCGAAGGGCTCGTGTCTCTGCTATACTCAAAGTTTAAATGGAGCCCAGCAAGCCAGATGTCCTTTACTTCTAGGTTCCCTCAACAGTTTCTCCTCCGCTTTAGAGACCGCATTGAAAATATTCTTGTTCTGCTGTTGTGTTTTGGCTTTGGAATGATGTGATGCAGCTCAATGGGTCCCACCCCCAACTTGATCAAAGTAAGAAACAGCTGGGAAAGTCAGTGCAAATACAAGTTCATTGTCCTCCTTGCAGGGATTCTGATTCAGAGGGCTCAGGTGGGGCCTGGAATGTTTGTTAACATGACTCAGATGTGCAGTCAATTTGGGGACTCACTGACAGCATTGACCTTACAGTTTATGGGATGATTCTTTCTGTTTGGTGATGAAGAAACTGAGGCACACAGAGTCTGTAACTTGCCCAAGTTCCCCTTGTTGTAAGTCCTGGAGCCAGATCTCAGGTGGACCAGTGCTTCTCTCCCCTATACCTCATTTCTGAGAAAAGGAAATCTTCTGCAATTTGACTTCTTTCATCTAACACATTTCCTCACAACATGCAGCCAGCATCATATTTTGGCCACTTACTATTAAAGTGAGATGCTTTTTTTTTTTTTTTTTTGAGACAGGGTCTTATTCTGTCACCCAGGGTGGAGTGCACTGGTGATTATAGATCACGGCAATCTTGAACTTCTGGGCTCAAGCGATCCTCCTGCCTCAGCTTTCCAAGTAGTTGGAACTATAGGCACACATCACCATTTCTGGCTAATTTTATATTTTTCATAGAGACAAGGTCTTGCTATGTTGCTCAGGCTGGTTTTGAACTTCTGGCCTCAAGCGATCCTCCCACCTAGGCCTCCAAAAGTGCTGGGATTACAGAAGTTAGCCACTGAACCTGGCCCTGAAATGCTTTTATTTCTTTCTTTTTTTTAATGAAAATACTGGACATGGAGATGTGGAAAGACACCTTGCTTTATTACTTTTGTTGTTATTATTATTTCTACAGTAGAATTTATACATCACAAAATTCACCATTTTTAAGCATACATTTCAGTGTCTTTTACCATATTCCAAAACTTTCGCAACCATCGCCACTACCTAATTCCAGAATATTTTCATAATGCCAAAAAGCATGCCTGTACCTATGGGCAGACACTCTCCAATTCCCCCCTTCTTGCGCTCTCTGACAACCACTAATCTACCTTCTCTATATATTGATGTACTTGTTCTGGGCACTTCCTCTATATGGAATAACAAAGTGTGGTATTTTCTATCTGCTTCTTAGAATATTGTTCTCAAGTTTCATCCTTTCTAGCCTGCGTCAGTACTTCAACTTTTTATGGCCAGATAATATTCCACTATATGGTTATACCACATTTTGTTTATTCATCAACTCATGGTGGTTTAAGATGTTTCCACTTTTTAACTATTAGGAATAATGCTGCTGTGAACAGCTTTGTACAGGTTTTTGAGTGAACATCTGTTTTTCATTTTCTTGGTTATAAACCTAGGAGTGCAATTGCTGCATCATATGTCACTTTATGTTTCACTTTTTGAGGAACTCACACACTGTTTACTAACTTCAGTAGCTATATCATTTTAGATTCCCAATAGTAATATATGAGAATTCCATATTCTCCATCACTTTTGAAACATGTGTTGTCTTTATTTTTTTCTTAAGTCATACTGCTGGGTGTGAAGTGGTATCTCATTTTGGTTTAAATTTACATTTTCCTAATGACGAAAAACATTGAACATCTTTGCATGTGCTTCTTGGCCATTTGTGTGTTTCCTTTAGAGAAACCTCTACTCACAGCTTTTTTTCCCCATTGTTAAATGTGGTTGTCGTTTATTGCTCAGTTATATGAATTCCTTATACACTCTAGGTACTAGACCTGTGTCAAACATACAATTTGGAAATAGTTCTCCCATTATGTGGATTATCTTTTCACTTCCTTGACAGTGTCCTTTGAAGCATACAAGTTTTTTATTTTAATGAAGTCCATTTATCTATTTTTCGGTTGTTTGTGCCTACTTAAAAAATGTCTAATCCAAAATCACAAAGATTTGTACCTAGGTTTCCTTCAAGACATCGTCTTTTGAATGAGAACTTTCCTGGGTTTTAGAGGAGGGTGGACATTGTTTATTGATGCCTCCTGTCCATTACCGATGTTTCTCTTGATTGTTATTCATATGCTCACCACCCCTCCATGGAGCATCCATGGCCTGTGACAGAGCTCTGGGGACTGATATCCTTCCACTGACTTTGGCGCTGGTGAGAGCCCTGGTCATGTGATTCAGCTTGGCCTTAACCCGACCCAGTTGCACATATTCCTCAGGCCCTTTAGAGTTGAAGTCGAGACCTCTCTGAGAACGCTTGCCAGCCCATGCTCTTCTAAGGCTGGAGCAAACTTCCTCCATCTATTCCAGACAGAGGGGACTGCAGGGGTTGGACTCACTCAAGATATCTCTGGTGTTAGAAAGAAGACCTGTTTCAGGCTTTGGGGAAGATTGTTCAATATGAACTAGGTCCTCTCTAATTATTTTTACCGTATGTGTGACTTCTTTCTAGAAACAAGGGAAGAATATTTATGTTAGAACATTTTGTCTATTCTTTGTCAATTGTTGTTTATCTACAATTTTAACATGGATAAAGGAGAGTTCAGTGTCAATATATTCTTAACAACTAATTACGGCTCATGTCCACCGCCATGCGATCATATTTAAATCTGTCAACTATCCTGTTACTTAGGTATTATCCTGTTCCTGATGAGAAAACAAACTCAGAAAGATTGCAAAATTTCCCTAGGTCACAAAACTAGTGAGGAGAGGAGTAAGAATTAGATATCCGTTCCTTTTGGCCTTCAAAGCTAACCTTGTACCATTAGATCAAACTGATTTACATACTTTTGCTGGAATTAGTCTCAGACTTGTGGTTCTCACTTGATTTTCCCAAGGAAACAGTGTGCCACTTTAATATCATTTCAAACTTTGAAATTTAAAACTCTTTTTATTATACTTTTTTGTCTTTGTTCTATTCCGTTGCTTTTGGTTTCTTCTCAACGGATCCCTCTTATTTATATGCTAAATATTTGTTACCTATTTTCTGTCAATTTTCACCTTTTTGAGTGTTTGTTATCTGTCTGTTGTATGCTAACAGTTTTTCACTGAGGTAAAATTTGCGTAGAGTATACTGCAAAAAAACCTAAAGGCACAGCTTAATAAATTTTAATATAATTATAATTGTAAAGTAACACCCAGTTAAAGACAGAGAACATTTTCCCCCATGCCACAAAGTTCTGATGTGGTCCTTGCCAGTCAATACTCATCCCCCAAATGAAGAATATATTCTGAATGTTGTCACTGCCTTAGCCCCTTTGTGTTGCTGGAAAGGAATACCAGAGGCTGGGTAAGTTATCAAGACAAGAGGTGCCTTTTGCTCATAGTTCTGCAGGCTGTACAAGAAGCATGGCCCCCGCATCTGCTCCTAATGAGGGCCTGAGGCTGCTTCCACTTGCAGCAGAAGGTGAAAAGGAACCAGGGTGTGCAGAGATCATATGGCGAGAGAGGAAGCAAAAGAGAGCAAGGAAAGGTGAGAGGCACTTTTTAATAACCAGCTCCTACAGGAACTAAGAGAGTGAGAATTCACTCACTACCTTCTCCCAGGGTGGGGATTCATCTATTCATGAGGGATCCACTCCCATGACCCAAACACCTCCCATTTACCCCCACCTCCAACACTGGGGACCACATTTGAACATGTGATTTGGAGGGGACCAATATTTAAACTTAGCAGCCACCATAGATTCATTTTGCTTGATCATGTGCTTCATAAAAATGGAATCATTTTGGCTGGGCCTGGTGGCTCATGCCTGTAATCCCAAGACTTTGCAAGGCTGAGGCGGGCAGATCACCTGAGGTCAGGCGTTCAAGACCAGCCTGGCCAACATGGTAAAACCCTGCCTCTACTGAAAATACAAAAAATTAGCCAGGCATGGTGGCCGGTGCCTGTAATCCCAGGCACCAGATATGTACTGGTATCTCATATGTACAGGACATGTACTGGTATCTCATTGTTGTATTGATTGATGTTCCTGATGGCTAAACTGTAGAGCATCTTTTCCTATGCTAATTGACCATTCATGTATCTTCTTTTCTTAAGTACCTATTCAAGTCTTTTGAGAAATTGTTTCATTGTGCTGTTTATCTTATTAAACTTATATATATATACATACATATATATACAAATACACTCTAAAAAACCCCTTTGTTGGAAATAAATATATCTCCTATATTGTGGTTTCTTTTAATGTTCTCTTAATGTTCCCTGTTTGGAGATAACGATAGATAATCTTCAAAAAGGTGAATATACACACCCACACCCACCCACACACATACACACACACACACACACACACACACACACACACGTGAGCCACCGGATCCAGCCTGTTGAATTTATTTCTAAGCACAACATGTATTTAGATGTTACTTGAAATGAAATTGTATTTTTATTTCATTTTCCAAATGCTCATTGCTAATACACAGAAATACAAAAGACTACTTCTATTGAGCTTATATTCTGCAACATTACCAAACTCACTAATTACTTTTGGCAGATTTTTATAGATTTCTAGGATTATTAACATACACAGTCATTATCTGTGAATAAAGACAGCTTCAATTCTTTCTTTTCAATCTTTTCAATACTTTTATTTTTCTTACTTTATTGCATTGATTTAGATCTCTAGTATAATGCTGAATTGAAAGAATAACAACAGATATTCTACTTTTTTCTCTGATTTAATAGAAAAGCATTCAATCCTATGCCATTTAATATAATGTTACCTCTGAGTTTTTTTCAAATCTACCCTTAATAGGGTTGAAAGTGTTGCCTTCTCTTCTTATCATGCTGAGAGTTTTCTGGGGTTTGTTTTTATAAATCATGAAAAAAGTTTTCAATTGTGCCAAATGCTTTTACTGTGTATGACAAGGTAATCATATGGTTTTTCTCTTTTGCCCTGATAATACATAACATTACATTTTCTTAAATATAAAAAAGATTTCTTGAATCAAGCTAGGACAGTTTTTTTAATTATAAACTTTTAACAAATATATTGAAATATAACTTACATGCAATTGAGATGCATGAAAGTGTATAATCATTAAAGTGTATAATTTTAAGAGTTTGAGCACACTATACACGAGTCAAAGAGAAAGGACAGAAAATACTAACGATGGCTCAGCACATGTGGTCTATCTTGCTGAATGCTCTATGTGAGTTTGAGAAGAGTTATTTGTTAGCTGTTCTTAGATGTATTTTGCTTAAATATCGACCTGGCTAACATGTGTCATTGATTGTGTGAATTAATTTTGTTCTAGTGGGCAGTAAAATTACTGTCTGATCACTTTGGACTTATGTGGACTGGTTCATGTTTTATTACAACGGATTCATGGAAAGCCCACAGCATTTCCCAAGACCCTCTAATTTGGCAGGACTCAATCACCAATCCACCCCTTTGTGAATTTGTCAGGGTTTGCTTTTAGGCTTTAGCAGGTTGGTCTACAATAGGCCTTATTGAAAAGTGTGACACTTATTCCTAAAGCACATCCATTCTAGTGTCTCAGTTGGATACCTGGGTGCTAATGAGGTGTGCATGAGTTCTTCCCACCATGGATGGCAGAAACTCCATCATACATTCCCCAACCCTCCTCCACCTCAAGTACCTCTGGTCCAAACTCAATTTCATAGCAGCCACCCCTCTGTTAAATCTGTTAGTCTTTTCCTTGTGCAGGTAGAGTCCACTCCTTGATAAGTATGCACATGGAACCCCACATAGACTTTGAGAGCTGCACCTTTGATCAGCTGTCTCCTCACTGGTGCCCTGCCCTGCAGATTGCAGTTGCTTCAGCCGTCTTGAACTCTGATCTCTGCCTTCTCAGCTCAGTGAGCTGCCCTGCCCTGAGTGGACTCTAGCTCACTATGCAGCTGCTGAGAAATTCTCCCCAAACAACTAGGAAATCATGGGGCTTCCCCCTTAAGTTTTCTCTTGGACTGCCTGTTGTACACTGCTGAAAACAATTTTACGTTTGTTTATGGAGGCAGGGTTAGTCTGATATGATTTATTCTAACAGACAGAAGCAGAAATCTGTTATACTCTTTTAATTACTGTGTCTTTATAATATTATGGTAGACAGAATCCTAAGATGACCCCCAGTGATCTTTGCTCTTATATAATCACTTCCTCCTGAGTGTAGACAAAGCTACTGAGGAGATGTCACTCCTGTGATTGTGTTACAATTTATGGCAAAAACAAGTTAACAGATGTAATCGAGATCCCAAATCGGTCCAATTTAAGATAGACAGATTATCTGATGAGCTTGACCTAGTGAACGTGAGTTCCTTGGAGGGACTGAGGACTTCCTGGAGAGATGTGAAGTGCAGGAGGGTTTCCATGCAGGGCGATCCTCCTCTGCTGGCTGGAGGAAGCATGCAGTGGGAACATGGGAGGCCTCTAGGAGCAGCGAGAGGCCCCTGGCTGACAGCCAGCAAGAAAACAGAGATCTCAGTCCTACAGTCACAAGGAACTGAACTCAGCTGACAACCTGAGGAAACTTGAGAGGAAGTTCTTCCCCAGAACCTCCAGAAAGAAACCCAGCCTAATTTCAGCCTGTGAGGCCCTGAGAAGAAGACCCAGAGAATCCAGGCCTGAACTTCTGATCTGTGGACACTGCAAGAAAATAAATCATTCTTATTTTACGCCGCTAATGCTTGCAGTAATTTAGTATGCAGCAATAGAAAATTAATACAAATAAAATGGAGAAGGCTTTGGAGTGGGGACAAGAAGGAAACGGTGGGAGAGGGATGCCTGTATGCTGATATGGTTGATGCCTGTATGGTTGAATTGGGTCTACCGTTCCTCATCTAATTAGCTATGGTCTATTAAGGTGCATAGCTACACACAAATATTGGTACTACGTTCAATTCAGAGGAATAAGATATTGCATTCTTGACAGTAGACAAGAACACCCTGAATTTGGGGTCACTGTATCATAAGTCATGTTATCAGGTCCCTCTAGGAAGGCTTAGAGGAAGATTTCCAGGATACACTTGTGACAACATTGAAGGCTTCTTTTTTCCCCAAAGGGACCCGATCTCCCCTCAGTCGAGAAGCTCCAAGTCTCTGAACTGGATGCCAGGTTATAAATTCCCCCTATACTGACTCCATCAGGCTTCTGTCCTCAGAACTAGAGTTTATCAGTAAAAGATAGACTCATGGGAGTCTAGGCATTTATTCTCTTATTTTATATAAATCAGTTAATGTGCAGGAACAAAACAGACTTTGAAGAAAGACACTCACAGTTGCCACAGGAAAACACCTTCAACATCCTCATGAGTCATCATGGGTGTTCTGTTGGGAGGACTTGATAGGAGGCTTTCCTCCTCACGGGCTAGTGCAGATCCAGGGGAAATGTCATCAAGTCCTCCATTCGGAGGGTAGCAGCTGAGGCTGCTGATTCGTTAGGCCTCCTGCAGCTGGAGATGCAAGTAGTGCATTTTCATGGCCACCGCAGGGCCCTCAGTTTAGCATTCTTCAGAGCCAGCATCCAACAAGCCACAGAAGCTCTGAGTATTTCCCTTTCTTCAGTCACCCACATAAATGGCTTCAGGGCCTTCTGGGGAAGGCCTGAAGGAAGATTTACAGCATACACTTGTGGCAGCATTGAAGGCTTCACTCTTCCTCAAGGGATCCAATCTCCCCTCAGTCAAGAAGCTCCAGGTATCTGAACTGGATGCCAGGTCATAAATTCCCACTATGGTGACTCCATCAGGTCTCTGTCCTCAGAACTAGAGCTTTTCTAAGTGTAACGTAAGTTGATTTCTTAGTAGATGTCCCATCCATTACATTCCCAGACACCTCACAATGATTCGAATGATTAGTAACCACCACATATCCCTGCCTCTCAGGGAAATCCCTCCCGCCTTGTCTCTAGATGGCCAAGTCCCACGGCCTGTCCTCTACTCTTCCAGAACCCTGTTGTTCTCACTGACAGCAGGGAGGGCAAATCCATGCAGCAGCTCCCGCCATGACCTCCAGCCTGCAGAGGATGGGCGCCACAGGACTTTTAAACGCATGCCGCTGTTCCCCTCACCTGTGCATTTCTTAACGCCTTGGTGAGGAGAATGTCTCTGGATCTTCCTTGATGGGAGCTAAAGGAACAAAGGTAAATAATGCTATGGGACCCACTGAGAACTGGGGCTGTGGAAGAGTGGCCACTGAAGTAATAGACAGATGCAGCTATTGCCAGATACTCAGTGCCAGAGCAGGGAGGGACAGGGAAGAAATACGGACCTCACCTTCCTCTCACTTCCAGGATCCATCGGGGGCCCTCCATTGCTAAACCTAACTAGAAGTGTGCACGCACGGGAGCCAGGGATGCATTCTAGGAGGGACGAGCCCCGAGTGGCATGAGACAGGATGGAAATGAGTGGACAGTGGATCTGTGGGAAGAAGGAGGGGATGTTATGGGAAAACAAAAGGAGAATACTAGCTAAGAACGCTAGGTGACATTAATATTCCGAAGTCTGTGCTCATATTCAGCAAAGAAAGTTCAGCATAAAGCACTAAATAAGGAGTCAAGATATTGTACTTCCAACTGTTGTTCCAACAGCTGTATTATGAAGGGCCACTTTATTTCATGCCTTTCTAATTTGACCTAAAGTGCCAGGTGGCACTGGGGCTGGCACAGCCTTGCTCAATTATGTGTTGCAGAGTACACAGAGACTGCCAGGCTGAGGGAAGATGCAAGAGAATAGAAGAGATGCTCTCAGGGAACAAGAGACCACATGGCCCCAGAGTCAGGGGCAGCATCAGCCACTGTCAGCTGCTCATTTTCCCAGACAGAGCCCACAAGCCTCAGCCATGCTTTGCTTCTGCAAGACGCTTCTTCACCTTTTCAATAAACCTGCCTGAATTTAAGCTGACAGGGTTTATTTCTCCTTCATCATAAATGAAATTCTTCACCACAACAATCTCCAATGAATTTTGGGCACAGCAGGCAGGCCCATTTCTGCTTCTGTTCCACTATCTCTCCTGTAGGTTGAAAAGGAGGAGGTACTGAATTACCTCCAAATGTTCCTCTGGCTCTGATATTCTGTTATTCTGGTTCCTTTTTGGCTACTTTGTTTTTGGTAGCGTGTATCCTAAGGCGTCCAGTTGAACAACTTTTGTCTACTGTGTCCAGGCATTCCTGGTGGTATTTCAGATAAGACTCTCTTGGGTTGCTGAACTCACAACCACTGAACCAATTCTATGACCATCTGTTTCATGGCCACATGTTTGCTCATTTTATATGTACATAAAGGGAGGGGACAGACAGCAAACTTGCGTGTTACAAATTGTATCATCTTAAAAAGGAAACAAGGCAACACTTTGCAATAAAACCTTAAGATGCATGAAATTTGAGCCTAATGCAATAAAGGATGCCCATAAAATTCTTATCTAAAGAATGTTTCGAAAATTGTTGTACAAGGACATCATCATTTAAAGTGATATGAAGAAACCTTCTCAGCTAAGCATATGGGCTAGATTAGAGAGAAAAATAAAGGACCCATCTCTGCCCTGGAAAAACTGCTGGTAGCATCTTTCAAAAAGCTCTCTGTGTTTGAGTACGCACCTTGATCCATAGGCTCACATTTGATCCCAACTGGCAGCTGCTTCTTGGCATTAACATTGGATTCCCAACTAGTAAATCTTACCAAGATCTGACTTTCTGCAGATATAATATTATTTTGTTTGACCATCCTTATCTTCAAGGGCTACCAAGAAGGAACCAAGAATTTATTTACCTCCCCAAGGGAAAAGGTTTTACCAATGAGACCCTTTCTCACCATGACCCCAGGACCCCATATGCCCTGTTCACTTGAGTGCCCTGTGTGGCCTGATAGAAGCTCATGCTGGTCACAGGATTCCTTATATGACTAGCCTCCTTCCTGAATCCCAATTTCATGGTGGTGGTCATGACAGGTGTCCTGTATCCCATGCTCATGTCCCTGAAGTCACCAGCCTATCTCCAGTTAGAAAAAATTACATGTATATAGAGAGGCCTCTTTGGAAGGAGCAAAAGCTTTCTCACCTTCGTACACTAATGGTTGGAAGGTACAACAGCATATGCACTTTGGGAAAAAATATCTGGCATATTCTTACAGAAACAAACAACTACCTATTCTATGACTCAGTAATTCCTAAGCATTTATCCAAGAGAAACTAAAACCTATGTCCAGAAAATGACTTATACAAGAATGTTCATAGCAGTTTTATTCATAATACAAAAAACTGGAAACATTCAAGTATCTGTCAATACAAGAATGGATCAATAAACTGTGATACACTCATTCCATGGAATGGCTAAAGGAACAAACTGGTGACACACAGAACAACATGGATGAATCTCAAAAACATTTGGAGTGCGATAGAAGCCATACCCAAAAAAGTGTGAGAAAAAAAGATAAATAATAATGGTTCCAAGAAATGCACAGCAGACAGCCCAGAGGCAAAGACCCACAGGACGGCGGGCCGGTCCCAGGCTGTCGATCCTAATTAAGAAACTTCTGCTGGATTTTGCCCAGCTCCATTTCCAAACTATTTTGGGTCAGTGACTTCTTTATCCCTTCCATGTTGCCTCATTTTGAACTAGAATCACTGTAAGTGTTATTCTATGTCTGTCACATCATTCCACAGTAGGGGCAGATAAGCTGTTTAGAATGGCTAAAATTCAAAAAGGTGAACACACCAAATGCTGTCAAAGATGAGGAGCAACCAGAACTTTCCATCGCTAGTGGAAATCAAAAGGGTACAGTCACTTTGGAAAACTTAAGTTCACTCAAAATCCTGCACAGAAGTACTTACAGCAATTTTATTCATCATCGCCAAAACTTGGAAGTGCCCAAGATGTCTTTCACCAAGCGAAAGAATAAACAAACTGTTGTAGCCATACAAGGAAATCTGATTCACTGATTTTAAAAAACAAGTTATCAAGCCATGAAAAGACATGAAGGAACTTAAAGTACATAATGCTAGAAAGAAGCCAGTCTGGAAACCCACATACTGTACCACTCCAACTCTAGGACATTCTTGGAAAGTCAAAAAGATAGAAGTAGTAAAATGGTGAGTGGTTGTCAGGGGTGGAGGAGAGGAGGACGCGTGAAATGGTGAAGCACAGGGAATTTTCAGCAGTGAAACTCTTTCGCATGATGCTGTATTGGGGATTTAGGACATTATGTAATTGCCAAAACCCATAATCTGTGAAACTCAAAGAATGACCTCTAATGTAAACTATGGACTTTAGTTGATAATGACGTATCAACAGTGGTTCATCAATTGTAATGAATGGACCACACTAATACAACATACTAGTAGGGAAAATTGTGTGCTGGAGGACAGGGGAGCCTAGGAGAACTCTCTGTATTATCCACTCAAGTTTTCTGTAAACCTAGAACTGTTCTAAAAAATAATGTCTATTAACTGTTTTTTTAATTAGGATGCAGCAGCCCCATATCAAGGTTTTGGTGGCATCCTGTAATTGTGTGGTTAGTACTTGGCATTGAAGTGCACCAACCTGGAGTCAGAGCAGTTGGAGATTTCAAGGCCTGTGCCATTTACCTCTAACCCTGGGGTGCCCCTGGAATACAGATAGCAGATCGGTTAAGGAGAAGCAGCCTCAGCAATCTAGACAGTGCAGGTTTCTGGTGAGGACAGGTAAAAACCATCTGGGTGGGCAGAACTTGGTGAAGACCAGAAACCACTGAGACTCAGCAGCTGCCGCAGTGGCACCCACAAATCAAAGGAGGGGGCTGGGAAGAGCTAAGGGCTACTGGATGAGCTCTCTGCCTGCAAGACAGAAGCAGATCCAGAGATTTTGGAAAATAATGTAGGTTTCAGTACAGTGTGATCTCTTCAAAAAAGTAGAGAGAATGAAAAGGAAAGAAAAAGAGAGAGCATGAGAGAGAAAGAAGAAGAAAAGAAGAAAGGAAGAAAGGAAAGAAGGGAGGGAAGGAGGGAAGGAGGAAGGAAGGGAGGGATGGAGGGCGGGTGGGAAGGAGGGAAAGAATAAAAAGAGAGAGAAAGAGAGTTGGAGGGAAGTAGGGAAGGAAGGAAGGAAGGAAGGAAATGAACAAATTTACATGAAGATGAGAACAGTGGGGAAACTTACACCACCAATATTTTCCATTAACAGGAACACGCTAAGTAGTTATTAGAGAAAGACACGCTACTGTAAAACAATATACTGTTTCCATGGGGTACAACAACCCCTTCCTCCTCCTCTGAAACACATTCTATCTCTGGCTCACTGTTGCCAGAGACACTGAGTCTTGTCTTTGGATACGTTCTGGTGCCCACAAGAATGAGATGAGACAGTGGATCCCAGAACACCAGGCCACGAACTTCCCTGTTGCTCCTTGTCCACTCCAGAAGCTACCCAGCTGCAGTTGGGGACCTCAGCCCCTGGGTCTGATGTCATCCATTTGCCTTTCTCAATGGACTTCTCTCCTTGCACTGGCTCCTACTCCCCCAGGACCTGTGGGTGACCACATGAGAAGAACACAAACAGGCCATGCCCCTTTCTTTCTCCCCCTCTCAATGCCTGCAGTAGTGGGTTCCATGGGGTAGTGACCTGAGATTTACTCATTGTGGGGCCTCTAGCCCAGAGCAGGGCCTACTACCTCACAGTCACCCCATGAATGCTCAGTGAAAGAAGACGTCCACCACAAGGTCCTGGGGAACCAAGAATTCCACTGTGGCCCATAAATTCTAAGTCTACAGGATTCTGGAATGGGAGATGGGAAAGGCCTTCAAAAGTGGCCACTTTTAACCCATTATACTGGCAACTGAGCCATGTTTCCCCATCCTGGACACATCCAGAGGGCACTGCCTAAAACCAGACACATCTCCCCACCCAGGACAGTGTAGGAGCCTTAGCCTGGGGGATGCAGGTGGACAGGGAGGGGGTGAGCCACCAAAGCTGAAGAGCAGAAAGCAGGTGAAAGGGGACAGCAGGGTGGAAACAGAGAGAAATGGGGGCAGAGAATGGGGGGTGAGAGGGGAAGAGTGAGGAGAGGGATGCAGATCTAGCTAGTAAGGAAAAGTCCTGGAGAGAACACTGTCCTCTCCTGAAGTAAAATCACTTCTACCTGACCACGGCACTGCAGCTCATGGGCAGCACATGCTGTGGATATTTGTTCATTCATTTAACAAATATTTATTTAATATCTGTTGCATGCCAAGCAAGGCCCTGCAATGTTTAGGGACCTTGACATCTTCCCTTCACATCTGAGTCATAATACAAAGAGGACTCTCTGACCCCACTGAGCTGGCAATGCCTCGGGATTTTTACCTGTTGGATCTGGCAGCTCTTGATGTCAGCCCACACCATGTGAGGCTGCTCTTGGTGCACCCAATGGGGAAGTTTCTACATCAGGGCCTCGGAGAATCCACTGGAAGCCCTGGACAGTGGGAGTCAGCGGCATCCCCAGTGTGGAGGCCAAGAGCACACAGTGCTTAAGCTCCAGGCACCCTCAGGAGGACGGCAAGGGACAATTGGCTGGTGAGAGCCCGGGTCACCGGGAACCTTCGCCTGGGTCTAAACAGGATTTGCCTTCAGATTGCCTGTGAGATAAAAGAGAGAAATCAAGGTTAACGTTGAGATTTAGGGCTTCGGTAACTTGAAGGATGGAGCTGCCATTTACGGAGACTGGGAAGACCCAGGGAAGAGCAGATTGAAAGGTGGTGGGAACTAGAGGTGGTTGGGTTTCTGTCATATGTAATCAACAGTCCTGACCAGCCTGGGCAACATAGTAAGACCCCGTCTGGGAAAAGAAAAAAGGAAAAATAAGCTGAGCATGGTGGTGCACACTTGTAGTCTCAGCTACTTGGGAGGCTGAGGCAGGAGGATTCCTTGAGCCTTCAGTTAGCGGTTAGTGAGCTATGATGGCACCACTGTACTCCAGCCTGGGGGGAAAAAAATAAAGAGTCCTGACTAAATACTAGAGTAGCCAGGGAAGTTTTCACAAAGTAAGTAATATTTGAGGCAGATCTTAGTGAACAAGAATTCCATTATTTCTGTTAGGGAATTAAGAGAGTGTGGGTGTCGTTAGTTAATGCTTATTAAAGTAGCTTTGGAATCTCATCTACTGGTCTAGCTGGTCTATCTGTACACGTATATTGTATATGCTGTCTCTCTGAGCTTTCGCTAGGTTATGCTACGGTAACAAAAGCCCCAAAATCTTAGCAGCTACACATACGAAGGTTTATTTTTCATTGACATGTCCTTTTATGGCAGGTTGACTGTGACTCTACTCTATACAAGCTACTTTATTTGTTAGATGGTGAAAACTGTGATACTCGGAGGTTGTTGAATATGGTATTAGTATGTTCATTCATTCATTCATTTAAGAAATATTTATTCAATATCTGTTTCATGCCAGGCAAGGTCAAGTACTGAGAATACACTGGTGAATCAAAGAGACAAAATCTCTAATTGCCAGGAGCTTATATTGAAAATCAGATTAAACACATACAAAATCATCATAATAACAACAATGAATACTATATTCATAAATAATAGCTGTAAGAGATTTTAGTACATCTTTTAAATTAGAAAAATATAAAAATTATTAAAACTAAAATGGCCAGGTGTGATGGCTCATGCCTGTGATCCCAACACTTTGGGATGCCAAGGTGGGAGGATCATTTCAGCCCAGGAGTTTGAAACCAGTCTGGGCACTACAGGAAAACCCTGTCTACAAAAAGGAGAAAATTAGCCGGGCACAGTGGTGCATGCCTGTAGACCCAGCTACTAAGGAGGCTGAGGTGGGAGGAGTGCTTGAGCCTGAGAGATCAAGGCTGCAGAGAGCCATGATCATACCACTGCACTCCAGCCTGGGCGACAGAGCGAGACACTGTCTCAAGAAAAAAAAAAAAATTATTTGATGTAGTCCTAAAACTATTATGTAGAATACTATTGTTTATATCACAGCACGTGAGCCCCTTAAATGGCTTAACACTTATTTAGGTATGATCCATAAAGCTTTTCTGGTAATTAAGTATACTTAAGAACAATTAAGTATAAAAGAGTTACTGCCTTGACAGGAAGATTGTAAAAATTTTAAAAAGACAAATAAATAAAAGAGTAAAAACTGTAGCTCTGTGAGGCTCAAATAACATCTAATTCAAGTCACAATGAACATCTAGCAATCATTCTGAACACCATATAATTCACTTAATACGTTTTGCCTGAACACCCAACACATCTGAATTACCAACACCCATATGTAGCCAAGAAACTGGCAATCATTTATAAATTATCACCTATGACTCCATCTGCTCTACGCACTTATTTTTTAAATTTTATTCATTTATTTATTATTTTTATTTGTTGTAGAGATGGGATCTCACTATGTTACCCAAGTTGGTCCAGAAACAGAAACAGACCCACACTAATTTCATAAATCAGATGACCATACAGTCATTCGATTTATGAAAAAAAGTGCCACATGGTGCGGAAGGAAAAGGATGGTCTTTTCAATAAATGGTGCTGGATCAAGCAGACACATCCATGTAGTAAAAAGTGAATCATAGCCAGGTGGGGTGGCTCACACCTGTAATTCCAGCACTCTGGGAGGCTGAAGCGGGCAGATTACTTGAGCCCAGGAGTTCGAGACCAACCTGGGAAACATGTTGAATCCCCATCTCTACAAAAAATATGAAAATTAGCCAGGCATGGTGGCACATGCCTATAGTCGCAGCTACTCAGGAGGCTGAGGTGGGAGGATCACTTGAGCCAGGAGATGGAGGTTGAGTGAGCTGAGATCCTGCCACCACACTCTAGCCTGGGCAATAATAGACTGAGGCCCTGTCTGAAAAAAAAAAAAAGCAAAAACTAAAATAAAATCGTTATAAGGTTAACACAGAAAAATGTGTTCATACTCTTAGGTTAGGCATTGATTTCTTAAACAGGACACAAAAAACAGTAACCATAAAGGAAAAGATTAATAAAGTATAATTTCATTAAAATGAAGAATCTCAGGCTGGGTGCAGTGGCTCATGCCTGTAATCCCAACCCTTTGGGAGGCCGAGGCAGGTGTATCACTTGAGCCTAGGAATTCCAGACCAGCCTATGCAACGTGGCAAAACCCATCTCTACTAAAAATACAGAAAACAGCTGAGTGTGGTGGTACTCCCCTGTAGGTCCCAGCTACTTGGGGGCTGAGGCAGGGGGATCACCTGAGCCTTGTGAGGTCAAGGTTGCAGTGAGCTGTGATTGTGCCACTGCACTCCAGCCTGGGCGATGGAGTGAGATCCTGTCTCAAAAAGAAAAAAAAAAAAAGAGAATCTCCCTTCATGAAAAAACACCATAAAAGAGTGAAAACGCAAGCTACAGATTGAAAAAAGGGAAATGCAATACATATAAATCCTAGAAAGGAGGCATATCCAGAATAAAGTATTACAAATCAACAGGAAAACAAGCATATCAATGAAAACTGGATAAAAAGATTTAACAGGCACGTCACAAAAGAGGACATATAAATGGCAATAAAAGATACTCAATCTCAATGAAACCACACTGATATATTACTGCACCCCTACTAGAATGGCAAAATAATTTTTAACTGACAGGTATCAGCGAAGATGTGGGGTAACCAGCATATCCCTGCTAAATGGTACAACTACTTTGGGAAAATGTTCAACAATATGTAATACTAAAGTTTTATCATTCATATACCTCTAAAACCAACAATGCCACCCCTACAAATATACCCCAGACTAGTAATGTTCAATTTCTTGATCTGTGGTGGTTCACTTGGTAAAAATTCATTACTTTTTTTTTTTTTTTTTTGAGACAGGGTCTCACTCTGCCATCCAGGTCGGAGTGCACTGCCATGATCACGGCTCACTGCAATCTCAACCTCCCGGGCTCTGGTGATCCTCCCAACTCAGCCTACCGGGTAGCTGGGACTACAGGCACACGCCACCACACACAGCTAACTTCTGTATTTTTAGTAGAGAAAGGGTTTTGCCACATTGCCCAGGCTGGTCTGGAAATCCTGGGCTCAAGTGATCTACCCACCTTGGCGTCCCAAAGTGCTGGGATTACAGGTGTGATCACTGCGCCCGGGCCACCTGCACATGTAAAATTGTGAACTTCTGTATACTTCAGTAACTTTTCCAAGATTTCTTTGACGCAAAGTTCTCAGAAATCTTAAAGCTAGCATTTCAGAATAGAAAAAGTAGCTTCTGGTTCACTAGTGAAATTTTACCAATAGAATTTAAAAACAAAAAGCTACTAACGCATATCAGCTCAGAACACTACCAGCAGATCTTTTCTTTAACTTCCTGAAGCACTGGGATTCATTCTTTTGGCAAAGAAAGGATGAACAACACTGTAACCCAAAGAAAAGATACCACTGCCAGAAAAGACTTCTTTTCGAAAGCAGCTCTAAGCAAAAGATAGGAGGAAAACAAGGAAGCCAGGCCAAACGTCTTGGTTAACTCTCCGCTGAAAGGACGCCACATGAGATGATCTAAGAAGCCAGCCAGCCAGCCAGACGCAGGGAAATCACAGCAACTCTTTGGAGTGCAAACAGCAACCCCACAATCCAATCTACCCGAAATCCTGCGGTTCATTTGAGGCTTGCCCCGCTAGTCAGGAGGTGATTCAGTGATGGCTACAAATGCTGCTCATGTGCATCCTGGAGCTGGCACACCTGGCTTGCCCATCACCAGCCTGGAGACACCGCCAGGAGCAGAAGCCCGGAGGCCAGTAAAGACCCCAACTTTGCAAGTCAGGGGCGCGAGCGCGCTCGCCTCTCAGGTCCGCAGAGGGAACGGATTTCTGGCCTGGAGGGTGGGGTGCGGGGTCAGTGTCCTCTACAGGATATAGGAGGACGTGCCCCCGAAGCTGCTCCGTCCCTCCACCCCCTGGGATGCCACAGAACACCCGCCAGCGAGTTTCTTCCCCAGCGCCCACGAGAGTTGGGCTGCGGGCGGCAGCGGCAGGCGAAGAATCCAGCGCGGGGAACTCAGGCCCCGGCGGTGCACGACCCCCCACAGCCCCCACCCGCCCCCGCGCTCGCGCAACAAAACTTGCCACGGCCGCGCCTCGACCCAGCTGTGCGCCCGCGGGTCCCGGATTCACCGCCCGCCCAGCCTGGCGCGGCGCCCTCACCTCAGAAACGCTGGGTGGACTTCGCGTAACTTCCCATTCACAGGGCAGCCGGCAGCCGCGCCGCCGCGCCTCGGCCCAGCTCCTGGCGCCGCAGATCGCCCGTCCCGCGTTCCCAAAAGCACCGCGCTCGCTCAGAAGCTCGGGCAGCCTCGCGACCCTCACCTACGCCTCCCAGTACCGCCGCTGTCTCAACCGCCACCCAGCCCCTCGCCTGCGCCTGCGCCTGCAGCCCACTGGCTCCTCAGGATCCCGATGGGCGTGTCAGGAT", + // 16888922 - 1); + // + // //coding region between 34..528 + // var codingRegion = new CodingRegion(16890438, 16918516, 1007, 4651, 3645); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 28, 16888922, 16890681, 4408, 6167), + // new TranscriptRegion(TranscriptRegionType.Intron, 27, 16890682, 16891301, 4407, 4408), + // new TranscriptRegion(TranscriptRegionType.Exon, 27, 16891302, 16891413, 4296, 4407), + // new TranscriptRegion(TranscriptRegionType.Intron, 26, 16891414, 16892127, 4295, 4296), + // new TranscriptRegion(TranscriptRegionType.Exon, 26, 16892128, 16892302, 4121, 4295), + // new TranscriptRegion(TranscriptRegionType.Intron, 25, 16892303, 16893674, 3844, 4121), + // new TranscriptRegion(TranscriptRegionType.Exon, 25, 16893675, 16893846, 3673, 3844), + // new TranscriptRegion(TranscriptRegionType.Intron, 24, 16893847, 16894473, 3672, 3673), + // new TranscriptRegion(TranscriptRegionType.Exon, 24, 16894474, 16894525, 3621, 3672), + // new TranscriptRegion(TranscriptRegionType.Intron, 23, 16894526, 16895567, 3620, 3621), + // new TranscriptRegion(TranscriptRegionType.Exon, 23, 16895568, 16895731, 3457, 3620), + // new TranscriptRegion(TranscriptRegionType.Intron, 22, 16895732, 16899636, 3456, 3457), + // new TranscriptRegion(TranscriptRegionType.Exon, 22, 16899637, 16899688, 3405, 3456), + // new TranscriptRegion(TranscriptRegionType.Intron, 21, 16899689, 16900981, 3404, 3405), + // new TranscriptRegion(TranscriptRegionType.Exon, 21, 16900982, 16901187, 3199, 3404), + // new TranscriptRegion(TranscriptRegionType.Intron, 20, 16901188, 16901651, 3198, 3199), + // new TranscriptRegion(TranscriptRegionType.Exon, 20, 16901652, 16901724, 3126, 3198), + // new TranscriptRegion(TranscriptRegionType.Intron, 19, 16901725, 16902761, 3125, 3126), + // new TranscriptRegion(TranscriptRegionType.Exon, 19, 16902762, 16902976, 2911, 3125), + // new TranscriptRegion(TranscriptRegionType.Intron, 18, 16902977, 16903811, 2910, 2911), + // new TranscriptRegion(TranscriptRegionType.Exon, 18, 16903812, 16903914, 2808, 2910), + // new TranscriptRegion(TranscriptRegionType.Intron, 17, 16903915, 16905687, 2807, 2808), + // new TranscriptRegion(TranscriptRegionType.Exon, 17, 16905688, 16905897, 2598, 2807), + // new TranscriptRegion(TranscriptRegionType.Intron, 16, 16905898, 16907239, 2597, 2598), + // new TranscriptRegion(TranscriptRegionType.Exon, 16, 16907240, 16907451, 2386, 2597), + // new TranscriptRegion(TranscriptRegionType.Intron, 15, 16907452, 16907914, 2385, 2386), + // new TranscriptRegion(TranscriptRegionType.Exon, 15, 16907915, 16907987, 2313, 2385), + // new TranscriptRegion(TranscriptRegionType.Intron, 14, 16907988, 16909038, 2312, 2313), + // new TranscriptRegion(TranscriptRegionType.Exon, 14, 16909039, 16909253, 2098, 2312), + // new TranscriptRegion(TranscriptRegionType.Intron, 13, 16909254, 16910088, 2097, 2098), + // new TranscriptRegion(TranscriptRegionType.Exon, 13, 16910089, 16910191, 1995, 2097), + // new TranscriptRegion(TranscriptRegionType.Intron, 12, 16910192, 16911983, 1994, 1995), + // new TranscriptRegion(TranscriptRegionType.Exon, 12, 16911984, 16912193, 1785, 1994), + // new TranscriptRegion(TranscriptRegionType.Intron, 11, 16912194, 16913544, 1784, 1785), + // new TranscriptRegion(TranscriptRegionType.Exon, 11, 16913545, 16913756, 1573, 1784), + // new TranscriptRegion(TranscriptRegionType.Intron, 10, 16913757, 16914219, 1572, 1573), + // new TranscriptRegion(TranscriptRegionType.Exon, 10, 16914220, 16914292, 1500, 1572), + // new TranscriptRegion(TranscriptRegionType.Intron, 9, 16914293, 16915343, 1499, 1500), + // new TranscriptRegion(TranscriptRegionType.Exon, 9, 16915344, 16915558, 1285, 1499), + // new TranscriptRegion(TranscriptRegionType.Intron, 8, 16915559, 16916393, 1284, 1285), + // new TranscriptRegion(TranscriptRegionType.Exon, 8, 16916394, 16916496, 1182, 1284), + // new TranscriptRegion(TranscriptRegionType.Intron, 7, 16916497, 16918341, 1181, 1182), + // new TranscriptRegion(TranscriptRegionType.Exon, 7, 16918342, 16918551, 972, 1181), + // new TranscriptRegion(TranscriptRegionType.Intron, 6, 16918552, 16918653, 971, 972), + // new TranscriptRegion(TranscriptRegionType.Exon, 6, 16918654, 16918808, 817, 971), + // new TranscriptRegion(TranscriptRegionType.Intron, 5, 16918809, 16919935, 816, 817), + // new TranscriptRegion(TranscriptRegionType.Exon, 5, 16919936, 16920062, 690, 816), + // new TranscriptRegion(TranscriptRegionType.Intron, 4, 16920063, 16921086, 689, 690), + // new TranscriptRegion(TranscriptRegionType.Exon, 4, 16921087, 16921156, 620, 689), + // new TranscriptRegion(TranscriptRegionType.Intron, 3, 16921157, 16921425, 619, 620), + // new TranscriptRegion(TranscriptRegionType.Exon, 3, 16921426, 16921504, 541, 619), + // new TranscriptRegion(TranscriptRegionType.Intron, 2, 16921505, 16935002, 540, 541), + // new TranscriptRegion(TranscriptRegionType.Exon, 2, 16935003, 16935274, 269, 540), + // new TranscriptRegion(TranscriptRegionType.Intron, 1, 16935275, 16939832, 268, 269), + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 16939833, 16940100, 1, 268) + // }; + // + // var rnaEdits = new IRnaEdit[] + // { + // new RnaEdit(5892, 5891, "AAAAAAAAAAAAAAAA"), + // new RnaEdit(5799, 5799, "T"), + // new RnaEdit(5675, 5675, "G"), + // new RnaEdit(5655, 5655, "G"), + // new RnaEdit(5390, 5390, "G"), + // new RnaEdit(5174, 5174, "G"), + // new RnaEdit(5150, 5150, "C"), + // new RnaEdit(4993, 4993, "A"), + // new RnaEdit(4828, 4828, "G"), + // new RnaEdit(4683, 4683, "G"), + // new RnaEdit(4637, 4637, "G"), + // new RnaEdit(4530, 4530, "A"), + // new RnaEdit(3845, 3844, + // "GAAATTGAAAAGTACCAAGAAGTGGAAGAAGACCAAGACCCATCATGCCCCAGGCTCAGCAGGGAGCTGCTGGATGAGAAAGAGCCTGAAGTCTTGCAGGACTCCCTGGATAGATGTTATTCGACTCCTTCAGGTTATCTTGAACTGCCTGACTTAGGCCAGCCCTACAGAAGTGCTGTTTACTCATTGGAGGAACAGTACCTTGGCTTGGCTCTTGACGTGGACAGAATTAAAAAGGACCAGGAAGAGGAAGAAGACCAAGGCCCACCATGCCCC"), + // new RnaEdit(3769, 3769, "C"), + // new RnaEdit(3554, 3554, "C"), + // new RnaEdit(3207, 3207, "A"), + // new RnaEdit(3140, 3140, "C"), + // new RnaEdit(3136, 3136, "T"), + // new RnaEdit(3107, 3107, "T"), + // new RnaEdit(3103, 3103, "A"), + // new RnaEdit(2993, 2993, "C"), + // new RnaEdit(2944, 2944, "G"), + // new RnaEdit(2840, 2840, "G"), + // new RnaEdit(2810, 2810, "T"), + // new RnaEdit(2706, 2707, "GC"), + // new RnaEdit(2695, 2695, "T"), + // new RnaEdit(2692, 2692, "G"), + // new RnaEdit(2509, 2509, "A"), + // new RnaEdit(2299, 2299, "A"), + // new RnaEdit(2294, 2294, "G"), + // new RnaEdit(2290, 2290, "C"), + // new RnaEdit(2222, 2222, "G"), + // new RnaEdit(2009, 2009, "G"), + // new RnaEdit(1964, 1964, "G"), + // new RnaEdit(1893, 1894, "GC"), + // new RnaEdit(1882, 1882, "T"), + // new RnaEdit(1879, 1879, "G"), + // new RnaEdit(1696, 1696, "A"), + // new RnaEdit(1652, 1652, "G"), + // new RnaEdit(1486, 1486, "A"), + // new RnaEdit(1481, 1481, "G"), + // new RnaEdit(1477, 1477, "C"), + // new RnaEdit(1409, 1409, "G"), + // new RnaEdit(1405, 1405, "A"), + // new RnaEdit(1318, 1318, "G"), + // new RnaEdit(1021, 1021, "C"), + // new RnaEdit(932, 932, "A") + // }; + // + // const byte startExonPhase = 0; + // const bool onReverseStrand = true; + // var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, onReverseStrand, startExonPhase, + // rnaEdits); + // + // var expectedCodingSeq = + // "ATGGTGGTATCAGCCGGCCCTTGGTCCAGCGAGAAGGCAGAGACGAACATTTTAGAAATCAACGAGAAATTGCGCCCCCAGCTGGCAGAGAACAAACAGCAGTTCAGAAACCTCAAAGAGAAATGTTTTGTAACTCAACTGGCCGGCTTCCTGGCCAACCGACAGAAGAAATACAAGTATGAAGAGTGTAAAGACCTCATAAAATTTATGCTGAGGAATGAGCGACAGTTCAAGGAGGAGAAGCTTGCAGAGCAGCTCAAGCAAGCTGAGGAGCTCAGGCAATATAAAGTCCTGGTTCACTCTCAGGAACGGGAGCTGACCCAGTTAAGGGAGAAGTTACGGGAAGGGAGAGATGCCTCCCGCTCATTGAATCAGCATCTCCAGGCCCTCCTCACTCCAGATGAGCCAGACAAGTCCCAGGGGCAGGACCTCCAAGAACAGCTGGCTGAGGGGTGTAGACTGGCACAGCACCTTGTCCAAAAGCTCAGCCCAGAAAATGACGAAGATGAGGATGAAGATGTTCAAGTTGAGGAGGCTGAGAAAGTACTGGAATCATCTGCCCCCAGGGAGGTGCAGAAGGCTGAAGAAAGCAAAGTCCCTGAGGACTCACTGGAGGAATGTGCCATCACTTGTTCAAATAGCCACGGCCCTTGTGACTCCAACCAGCCTCACAAGAACATCAACATCACATTTGAGGAAGACAAAGTCAACTCAACTCTGGTTGTAGACAGAGAATCCTCTCATGATGAATGTCAGGATGCTGTAAACATTCTCCCAGTCCCTGGCCCCACCTCTTCTGCCACAAACGTCAGCATGGTGGTATCAGCCGGCCCTTTGTCCAGCGAGAAGGCAGAGATGAACATTCTAGAAATGAATGAGAAATTGCGCCCCCAGCTGGCAGAGAAGAAACAGCAGTTCAGAAACCTCAAAGAGAAATGTTTTGTAACTCAACTGGCCGGCTTCCTGGCCAACCAGCAGAACAAATACAAATATGAAGAGTGCGAAGACCTCATAAAATCTATGCTGAGGAATGAGCGACAGTTCAAGGAGGAGAAGCTTGCAGAGCAGCTCAAGCAAGCTGAGGAGCTCAGGCAATATAAAGTCCTGGTTCACTCTCAGGAACGAGAGCTGACCCAGTTAAGGGAGAAGTTACGGGAAGGGAGAGATGCCTCCCGCTCATTGAATCAGCATCTCCAGGCCCTCCTCACTCCGGATGAGCCAGACAAGTCCCAGGGGCAGGACCTCCAAGAACAGCTGGCTGAGGGGTGTAGACTGGCACAGCACCTTGTCCAAAAGCTCAGCCCAGAAAATGACGAAGATGAGGATGAAGATGTTCAAGTTGAGGAGGCTGAGAAAGTACTGGAATCATCTGCCCCCAGGGAGGTGCAGAAGGCTGAAGAAAGCAAAGTCCCTGAGGACTCACTGGAGGAATGTGCCATCACTTGTTCAAATAGCCACGGCCCTTGTGACTCCAACCAGCCTCACAAGAACATCAACATCACATTTGAGGAAGACAAAGTCAACTCAGCTCTGGTTGTAGACAGAGAATCCTCTCATGATGAATGTCAGGATGCTGTAAACATTCTCCCAGTCCCTGGCCCCACCTCTTCTGCCACAAACGTCAGCATGGTGGTATCAGCCGGCCCTTTGTCCAGCGAGAAGGCAGAGATGAACATTCTAGAAATGAATGAGAAATTGCGCCCCCAGCTGGCAGAGAAGAAACAGCAGTTCAGAAACCTCAAAGAGAAATGTTTTGTAACTCAACTGGCCTGCTTCCTGGCCAACCAGCAGAACAAATACAAATATGAAGAGTGCAAAGACCTCATAAAATCTGTGCTGAGGAATGAGCGACAGTTCAAGGAGGAGAAGCTTGCAGAGCAGCTCAAGCAAGCTGAGGAGCTCAGGCAATATAAAGTCCTGGTTCACTCTCAGGAACGGGAGCTGACCCAGTTAAGGGAGAAGTTACGGGAAGGGAGAGATGCCTCCCGCTCATTGAATCAGCATCTCCAGGCCCTCCTCACTCCGGATGAGCCAGACAAGTCCCAGGGGCAGGACCTCCAAGAACAGCTGGCTGAGGGGTGTAGACTGGCACAGCAACTTTTCCAAAAGCTCAGCCCAGAAAATGACAATGATCACGATGAAGATGTTCAAGTTGAGGTGGCTGAGAAAGTGCAGAAATCGTCTGCCCCCAGGGAGATGCAGAAGGCTGAAGAAAAGGAAGTCCCTGAGGACTCACTGGAGGAATGTGCCATCACTTGTTCAAATAGCCATGGCCCTTATGACTCCAACCAGCCACATAGGAAAACCAAAATCACATTTGAGGAAGACAAAGTCGACTCAACTCTCATTGGCTCATCCTCTCATGTTGAATGGGAGGATGCTGTACACATTATCCCAGAAAATGAAAGTGATGATGAGGAAGAGGAAGAAAAAGGGCCAGTGTCTCCCAGGAATCTGCAGGAGTCTGAAGAGGAGGAAGTCCCCCAGGAGTCCTGGGATGAAGGTTATTCGACTCTCTCAATTCCTCCTGAAATGTTGGCCTCGTACCAGTCTTACAGCGGCACATTTCACTCATTAGAGGAACAGCAAGTCTGCATGGCTGTTGACATAGGCGGACATCGGTGGGATCAAGTGAAAAAGGAGGACCAAGAGGCAACAGGTCCCAGGCTCAGCAGGGAGCTGCTGGATGAGAAAGGGCCTGAAGTCTTGCAGGACTCACTGGATAGATGTTATTCAACTCCTTCAGGTTATCTTGAACTGACCGACTCATGCCAGCCCTACAGAAGTGCCTTTTACATATTGGAGCAACAGCGTGTTGGCTGGGCTCTTGACATGGATGAAATTGAAAAGTACCAAGAAGTGGAAGAAGACCAAGACCCATCATGCCCCAGGCTCAGCAGGGAGCTGCTGGATGAGAAAGAGCCTGAAGTCTTGCAGGACTCCCTGGATAGATGTTATTCGACTCCTTCAGGTTATCTTGAACTGCCTGACTTAGGCCAGCCCTACAGAAGTGCTGTTTACTCATTGGAGGAACAGTACCTTGGCTTGGCTCTTGACGTGGACAGAATTAAAAAGGACCAGGAAGAGGAAGAAGACCAAGGCCCACCATGCCCCAGGCTCAGCAGGGAGCTGCTGGAGGCAGTAGAGCCTGAAGTCTTGCAGGACTCACTGGATAGATGTTATTCAACTCCTTCCAGTTGTCTTGAACAGCCTGACTCCTGCCTGCCCTATGGAAGTTCCTTTTATGCATTGGAGGAAAAACATGTTGGCTTTTCTCTTGACGTGGGAGAAATTGAAAAGAAGGGGAAGGGGAAGAAAAGAAGGGGAAGAAGATCAACGAAGAAAAGAAGGAGAAGGGGAAGAAAAGAAGGGGAAGAAGATCAAAACCCACCATGCCCCAGGCTCAGCGGCATGCTGATGGAAGTGGAAGAGCCTGAAGTCTTGCAGGACTCACTGGATAGATGTTATTCGACTCCGTCAATGTACTTTGAACTACCTGACTCATTCCAGCACTACAGAAGTGTGTTTTACTCATTTGAGGAACAGCACATCAGCTTCGCCCTTGACGTGGACAATAGGTTTCTTACTTTGATGGGAACAAGTCTCCACCTGGTCTTCCAGATGGGAGTCATATTCCCACAGTAA"; + // + // Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); + // } + // + // [Fact] + // public void RnaEdits_big_test() + // { + // //NM_005960.1, chrom: chr7:100547052-100611619 + // var genomicSeq = new SimpleSequence( + // "GCGCTGACGTCTGTCTGTCCGGGTGCAGGGAGAAGGGAGGAAGAGGGGAGAGGTGGGGCGGTGCAAAGGTGAGGCTGTGCTCAGCCCTGACGCTCAGCAAAACCGATAACCAGCACTTTCATTACGTGCACGCCCCAGGGCCACGTCCCTGCCGCTGTCTTGGTCCTGAAGCCTGTTCTGCCCCAGCCCCCTGCCCGCTGGGCCCATGCAGCTGTTGGGGCTCCTCGGCCTCCTCTGGATGCTCAAGGCCTCCCCGTGGGCCACAGGTAAGGGGGAGAGGCGGAAGGGGGTTGGAGAAAAGCTCCTGATGTGATGTTCCAGGAAAGGGGAGGGAAAAGTGGCTGTAAGGCCTGGGGAGGGGGGATAAGAAGGCACCGCTTGGGGCTCTGGGTGCAGGGAGAACCGAGGCACGGCCTGACTGGGGGAGGGGGCGATGAGGAGAGGTTTCTTCCAGAGCTCCAGGTGCAGGGAAAACCCCGAGGTTGGGAAAGAGTGAGGGAGCTGGGTCTCTGCCACTCTCCACCAAGCACTGAGCAGGTTGCAGCGGCTGAGCCCCAATCTGTATCTGCAGCTGGAGGGTAGAGGGTGGGATTTACGTCTTCCCAGAGCAGTGCCCTTCCTGTCTTGACTCCTTCTGTCACCTGCCTCATGCCCCCAGCTTGAGTGTCCCCTTCACACTGGCCTCTCCCTCCCTGACAGCCCTCTAACTTCTACCCCTGGTCTCGGTCCTCTGGTTTCAGCCTCTCTGCCTTTTGTCCCCCGGCGGCTCCTCCCCAGCTCTGCCGTCACTCTCTTACCCCGGCCAGGGCCCATGTGTCTGGGTACAGCTGTTGGTACCAGGGCCGGGACAGGGAGCTCCTGATGTCCACTTTGCTTCCAAGGGTGCGTCTGAGACTACCCGTCGTGGGGTCCCAGGCTTCTCTCTTCTGCCCTGCAGGAACACCTCGCAATTCCTCTATACTTCTCTTTTCTCTGTACTTCAGTGTCTGCTTCTGATCCCCGATCCCAGGCCACCCAGCCTACAGGCCCATGAGTCCCCTTCTCAGTCACCTCCAGGGCCACATCCTGGAGCCAAGGGCTGTAGCCTGGGGATTCTCATAATCCCTGACCCCACTTCCCTGGCACCCACGAGCTAGGTTGAGACGTGACACCCCAGCTCTCAGCCACAAGATGGGCTGTGCCCGAGGTGAGGGGTAGCAGATCGGGTACTTCCCACTTCCCGTCTGCTGTGGCTGCCTGTCTTCCTTGTCCCTGACACCCCCGACAGCCGGATCTCTGATCCTAACTCTGACAAATTGTGAAATGGGTTGAAATCCACATGCTGGGGTTCATGCTTGTAAACTAATGAATCCCACGGCCAAAAGGGAATAGTATAGAAAAATATGTCTATTTGTGTGATGAACACTCACTGCTAAGCCTTAAGGTCTCCAGAACTCATCACGCCTGACTGCTGAGGTAGCTCCTTCTGGAAGTTTCCTTTTCTATGCTGTCTCTCTGCCTCTTCACCTAGTCCTCACTCCATGCCCTTTGAAGTCATTAGTGTGTGAGCTTAGCCTGTTTCTCTTTGGTGTTCTCTGGGCAGTTTTTTGTTTTTGTTTTTGTTTTTGTTTTTGTTTTTGATGGAGTCTTGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACCATCTCAGCTCACTGCAGCCTCTGCTTCCTGGGTTCAAGCGATTCTCCTGCCTCAGCTACAGGTACACGCCACTACGCCTGGCTAATTTTTGTATTTTGTTTTAGTAGAGAAAGGGTTTCACCATGTTGGTCAGACTGGTCTCGAACTCCTGACCTCAAGTGATCTCCTGCCTTGGCCTTTCAAAGTGCTGGGATTACAGGCATGAGCCACCGTGCCTGGCCCTCTTTGTTCAGCTTTCCCTGTCTCCTGGTTTTTGTGATGCGCCCCCTGCCAGGACATGGCTGGGTTCTCTCTTTTTCTCTTTGAAAGCGGAGTCAGCCCAGACAGCAGCAGGGTGCCGGGAGAAGCAGTGGTGGGAGCAGAAGGTTAATGGGGGGATGGGAGCACTCCAGGCAGTAGCAGGGGGAGGAGAGGAAGGGGCAGAGGGAGGAGGAGCCTGGGTGTGACCAGAGGAGGGAAGGGAGGGGAGGGAGGCTCTGCCCAGCTCGGCTATATCAGGACAGGAGGACCTGCCATGACAAGGCCAAGGCCCGGTGAGGAATGAGGGCTCCCATGCCCCACTGCTCCCCTAGGAGCAGACAGGCAGTCGTCTCCAGCACCACAAAGCACCCAGCTCCAAGCTGCCTCTGATGCAGGAGTCAGCTGTAATATGCCCTGCCCTCTGTGATGCTGCCTGGAAAATGGGTGAGTGAGTAGCTTACATGAGTGATGTAACAAAATGACCCACGGATTTACCAGTGGATTCCTCTGCTCTGCCGCCAATGCAGGAACTTTATCCACGGCCACATCCATCTCTCAAGTGCCTTTCCCCAGAGCAGAAGCAGCCAGCGCTGTGCTCAGCAATTCTCCACACTCCAGAGACCTGGCTGGGTGGCCACTTGGTGTCCCCCAGCTCGCCTCTCCTGCTCCTGGCCACAGGGAAAATGCACCTATGACACTCACTACCTCCCCCCATGACACACTCATCTCTGAAACATTGCTCAACTCTCCAGTCAGTTCCAACACCTCAACCACCCCGACGTCCAAGTTTGCCTTCAAGGTTGAAACCACTCCACCCACCGTGTTGGTCTATTCAGCCACCACTGAGTGCGTGTATCCAACGAGCTTTATAATCACCATCTCCCACCCCACCTCCATCTGTGTGACCACGACGCAGGTGGCCTTCACCAGCTCTTACACCTCGACTCCCGTGACACAGAAGCCAGTGACCACCGTCACCAGTACTTACTCTATGACCACTACTGAGAAAGGAACGTCAGCCATGACATCTTCTCCCTCTACCACCACTGCAAGGGAAACTCCCATAGTGACAGTGACACCCTCCTCTGTGTCAGCCACAGACACAACCTTCCACACTACAATCTCATCTACAACTAGAACCACAGAAAGGACTCCCCTGCCCACTGGAAGCATCCATACAACCACGTCCCCAACCCCAGTATTTACTACTCTCAAAACAGCAGTGACTTCCACTTCCCCCATCACTTCTTCAATCACTTCCACAAATACAGTGACTTCTATGACAACGACCGCCTCCCAGCCCACAGCCACTAATACATTGTCATCACCCACTAGGACCATTTTATCTTCCACACCTGTCCTGAGCACAGAAACAATCACCAGTGGTATCACAAACACCACCCCCCTATCCACCTTGGTGACCACACTCCCCACTACCATCAGCAGGTCTACACCTACATCTGAGACCACCTACCCTACTTCTCCCACCAGCACTGTCACAGACTCCACTACCAAAATCGCCTACTCCACAAGTATGACAGGTACATTGTCCACAGAGACTTCTCTCCCACCCACCTCTTCCTCTCTCCCAACCACAGAAACAGCCACGACTCCTATGACAAACTTGGTAACCACCACCACTGAGATCTCCTCCCACAGTACTCCCAGCTTCTCTTCATCAACCATCTACTCCACAGTCACCTCACACAGTACTCCCAGATTCACTTCTTCAATCACCACTACCGAGACCCCCTCACACAGTACTCCCAGATTCACTTCTTCATTCACCAATACCAAGACCACCTCACACAGATCTCCCAGCTTCACTTCTTTGATCACCACCACGGAGACCACCTCACACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAGTGCTCGCAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAATACTCGGAGCTTCACTTCTTCGATCACCACCACCGAGACCAACTCTCACAGTACTACCAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAGTACTCCCAGCTTCAGTTCTTCAATCACCACCACTGAGACCCCCTTACACAGTACTCCTGGCCTCACTTCGTGGGTCACCACCACCAAGACCACCTCACACATTACTCCTGGCCTCACTTCTTCAATCACCACCACTGAGACTACCTCACACAGTACTCCCGGCTTCACTTCTTCAATCACCACCACTGAGACCACCTCAGAGAGTACTCCCAGCCTCAGTTCTTCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCACCTCACATTTTACTACCTCAGAGACTGCGGTGACTCCCACACCTGTAACCCCATCTTCTCTGAGTACAGACATCCCGACCACAAGCCTACGAACTCTCACCCCTTCGTCTGTGGGCACCAGCACTTCATTGACTACAACCACAGACTTTCCCTCTATACCCACTGATATCAGTACCTTACCAACTCGAACACACATCATTTCATCTTCTCCCTCCATCCAAAGTACAGAAACCTCATCCCTTGTGGGCACCACCTCTCCCACCATGTCCACTGTGAGAATGACCCTCAGAATTACTGAGAACACCCCAATCAGTTCCTTTAGCACAAGTATTGTTGTTATACCTGAAACCCCAACACAGACCCCTCCTGTACTGACGTCAGCCACTGGGACCCAAACATCTCCTGCACCTACTACTGTCACCTTTGGAAGTACGGATTCCTCCACGTCCACTCTTCATACTCTTACTCCATCAACAGCCTTGAGCACGATCGTGTCAACATCACAGGTTCCTATTCCTAGCACACATTCCTCCACCCTTCAAACAACTCCTTCTACTCCCTCATTGCAAACTTCACTCACATCTACAAGTGAGTTCACTACAGAATCTTTCACTAGGGGAAGTACGTCTACAAATGCAATCTTGACTTCTTTTAGTACCATCATCTGGTCCTCAACACCCACTATTATCATGTCCTCTTCTCCATCTTCTGCCAGCATAACTCCAGTGTTCTCCACTACCATTCATTCTGTTCCTTCTTCACCATACATTTTCAGTACAGAAAATGTGGGCTCCGCTTCTATCACAGGCTTTCCTAGTCTCTCTTCCTCTGCAACTACCAGCACTTCTTCAACCAGCTCCTCTCTGACCACAGCTCTCACTGAAATAACCCCCTTTTCTTATATTTCCCTTCCCTCCACCACACCCTGTCCAGGAACTATAACAATTACCATAGTCCCTGCCTCTCCCACTGATCCATGTGTTGAAATGGATCCCAGCACTGAAGCTACTTCTCCTCCCACCACCCCATTAACAGTCTTTCCCTTTACTACCGAAATGGTCACCTGTCCTACCTCCATCAGTATCCAAACTACTCTTACTACATATATGGACACTTCTTCCATGATGCCAGAAAGTGAGTCCAGCATCTCACCCAATGCTTCCAGTTCCACTGGCACTGGGACTGTACCCACAAACACAGTTTTCACAAGTACTCGACTGCCCACCAGTGAGACCTGGCTGAGCAACAGTTCTGTGATCCCCCTACCTCTTCCTGGCGTCTCTACCATCCCGCTCACCATGAAACCAAGCAGTAGCCTCCCGACCATCCTGAGGACTTCAAGCAAGTCAACACACCCCTCCCCACCCACCACTAGGACTTCAGAGACACCAGTGGCCACTACCCAGACTCCTACCACCCTTACATCACGCAGGACAACTCGCATCACTTCTCAGATGACCACACAGTCCACGTTGACCACCACTGCAGGTTGGACCTTCTGCCTCTCTGTTCCCCTCCTTCCTCCCCTGCAAAATTCCTGTGTCACTGAGGTCAGGCTTTATCCTGAGCTTCCCTTTCTTTCTGTGTTTTCCAGGCACCTGTGACAATGGTGGCACCTGGGAACAGGGACAGTGTGCTTGCCTTCCGGGGTTTTCTGGGGACCGCTGTCAGCTCCAGACCAGATGCCAGAATGGGGGTCAGTGGGATGGCCTCAAATGCCAGTGCCCCAGCACCTTCTATGGTTCCAGTTGTGAGTTTGCTGTGGAACAGGTGGATCTAGGTGAGTTGCCAGAGCTATGCCTTCTGCACTTCCTCCCACAGGGTGTCACTGACTCTCCCCAGACTTATCCCTCTGTGGGGCCTGGAGGCACCCATGCCTTTTTGCCCGGTCCTTCCCTCCCTGCCATCTCTCCCATGCCCTCCGCTGCCCTGTGTCATGCTCCTCTCCGTCCTCACCCTTAGGAGGTGGCTGGGACTACCCTCCCTCCTGGGCCCATCTCCTGACTTGGGCTGCTTGGAGCTGTATCAGTTTCCAACTGCTGCCGGGCCAACAAACACAAATCTGGCTGCTGGAACAACACGACATTATCATGTTAGAATTCTGTAGATTAGAAGTCTGATGTGGGTGTCACTGGGCTGAAATCAAGGCGTCACCAGGGCTGTGTTGTCTTTCAGCGGCTCCAGGGAAGAATCCATTTTTTTGCCCTTTGCAGCTTCTGGAGCCTCCCACAGCAAGGCTGCATCTCTCTGTGTCTTTCTCCCATAGCCTCATCTCCCTCTAATGAACTCTGGCCTCCTCAATTGCTTCTCCCACTGTTAAGGACCCTTGTGATAACTTTGCCTCCTCCCCAAATAGTCTATGTTAATTTTCTCAAGATCAGCTGATTACGCCGGGCGGGGTGGCTCACACCTGTAATCCCAGCAGTTTGGGAGGCTGAGGGGGTAGGATCACCTGAGGTCAGGAGTTGGAGACTAGCCTGGCCAACATGGTGAAACTGTCTCTACTATTAGTTGGGCATGGTGGCAGATGCCTATAATCCTCACTATTCAGGAGGCTGAGGCAGGAAAATCGCTTGAATCCAGGAGGTGGATATTGCAGTAAAGCGAGATTTCGCCACTGCACACTAGCGTAGATGACAGAGGGAGAGTGAGACTCTGTCTTAAAAGAAAAAAAAAAATCAGCTGATTGTCTTATAATCCCTGCACTTTGGAAGGCCGAGGAGGGAGTATCGCTTGAGGCGAGGAGTTCAGGACCAGCCTGGGCAACACAGCGAGACCCTCATCTCCACAAAAAATTTTAAAAACTTACCTGGGCATGGTGGCTCATGCCTGTGGTCCCAGTTATTTGGAAGGCTGAGGTGGGAGAATCACTTGAGCCTGGGAGTTCAAGGCTGCAGTGAGCTATGATCCCCCTATTGCACTTCAGCCTGGATGACAGAATGAGACCCTATCTCAACAATAAAAAAAAGTTAGGCTGATTAGCAATCTAATTCAATCTGCACCCTTGATCCTCCCTTGCCATGTAGTATAGCATAGTCACAGTTCTGGGGATTAGGACATGGACATCTTCCCACTATGGGGGCAGCCAGGAGGGACCACAGGCTGACCGCTATCTTTCTGCCTGCTTTCACTCATCTCCACACAATTCCTTCCTTCCTCTCGCTCTCTTCTTTCTTTTCTTCTTTTCTTTTGTCTCTTTCTCTTTTCTTTTTTTCTTTCTCTTTTTCTTTCTTTCTTTCTCTTTCTTTTCTTTCCTTTCTTTTCTTCTTTCCCTCATCTTCACACAATGCTTTCTCCCTTTCTTTCTTTCCTTCTTTCTTTCTCTTTCTTTCTTTCTCTCTCTCTCTCTCTCTCTCTTTCTTTCTTTCTTTCTTTCTTTTTCTCTCTCTCTCTCTATCTTTCTTTTTCATTTTCTTGAGAGACAGTCTCACTCTGTGGCCCAGGCTGGAGTGCAGTGATGCAATCTCGGCTCACTACAACCTCCGCCTCCTGAGTTCAAGTGATTCTCCTGCCTCAGCCTCCTGAGGCATAATGCATCCTGGAATTCCTACATCTGTTCCTGCCTCTGGGCATCAGTCCTCAGGGATCTTGGAGGGGAGCAGCAGGAGGAGCCTGTGGGTTGGGGTGGTGGTGTTGGTGGCTTCAGACAAAAGCAGACAGAGAAGTGACTGGGGACATGCATGCTCTGTGTAGATGTAGTGGAGACCGAGGTGGGCATGGAAGTGTCTGTGGATCAGCAGTTCTCGCCGGACCTCAATGACAACACTTCCCAGGCCTACAGGGATTTCAACAAGACCTTCTGGAATCAGGTAAAGGGCAAAGAGAGGGGATTTTTTTTTTTTTTGAGGTGTAGTCTCGCACTCTCACCCTGGCTGGAGTGCAATGGTGCGATCTTGGGTCACTGCAACCTCCGCCTCCCGGGTTCACATGATTCTCTTGCCTCAGCCTCCCAAGTAGCTGGGATTACAGGTGCACACCATGACACCTGGCTAATGTTTTGTATTTTTAGTAGAGACGGGTTTTCACTATGTTGGCCAGACTGGTCTCGAACTACTGACTTTGTGATCCACCTGCCTCGTCCTCCCAAAGTGTTGGGATTACAGGCGTGAGCCACGGCACCCGGCCGGGGAGGGGAATTGAAGGGTCTTCCCTGGAGCTGGGGTTGGGCGTCTGGGTCCCCTCAGGTCTGCAGGTTCGGACGTGAGCCCAGGGATCCTTGGTGTTTCAGATGCAGAAGATTTTTGCAGACATGCAGGGCTTCACCTTCAAGGGTGTGGAGATCCTGTCCCTGAGGTAGGAGACCCATCTGGGGATGCGGAGGCGGTGTTGGGTGGGGGAAATGTGCGCACACAAAAAACCCATTCCTTTCTTTTGTAATCATCAGATTTTATAAAGAGGGGTGGAGGGGGTACATAAGGAATCACTCCCTGGGTATTTTTTCGGATCGTTTTCTGGGGCCATTTATCTGGAGGAGGGGTGGCACCTCTCTTCTTCAGCACACTGGAAGGAGAGAAGTTGCAGGGACATGTGGGAAGGTGGTGCCTGGATTGATGACTTTGTCCCCCTCTGGCTGGCCCCTGCTCTACTGAGTGGGTCAGCATTAGAAAGAGAGAGAGAGAAAGAGAGAGAGGGAGAGAACGCACGTCTAGGGGCGCCCGGTGGATGATGGCTTGATGCAACAAGAAGAGAACGTCAGGCCAGATGTGGTGGCTCACACTTGCAATCCTAGCACTTTGGGTAGGCTAGGCGGGTGGATCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGAATTCCTACATCTGTTCCTGCCTCTGGGCATCAGTCCTCAGGGATCTTGGAGGGGAGCAGCAGGAGGAGCCTGTGGGTGGTGGTGGTGGTGTCGGTGGCTTCAGACAAAAGCAGACAGAGAAGTGACTGGGGACATACATGCTCTGTGCAGATGTAGTGGAGACCGAGGTGGGCATGGAAGTGTCTGTGGATCAGCAGTTCTCGCCGGACCTCAATGACAACACTTCCCAGGCCTACAGGGATTTCAACAAGACCTTCTGGAATCAGGTAAAGGGCAAAGAGAGGGGATTTTTTTTTTTTTTTTTTTTTGAGATGGAGTCTCGCACTGTCATCCTGGCTGGAGTGCAATGGCGTGATCTTGGCTCACTGCAACCTCCGCTTCCCAGGTTCACATGATTCTCTTGCCTCAGCCTCCCGAGCAGCTGAGATTACAGGTGCACACCACCACACCTGGCTAATGTTTTGTATTTTTAGTAGAGACGGGGTTTCACTATGTTGGCCAGACTGGTCTCGAACTACTGACTTTGTGATCCGCCTGCCTCGTCCTCCCAAAGTGTTGGGATTACAGGCGTGAGCCACGGCACCCGGCCGGGGAGGGGAATTGAAGGGTCTTCCCTGGAGCTGGGGTTGGGCATCTGGGTCCCCTCAGGTCTGCAGGTTCGGACGTGAGCCCAGGGATCCTTGGTGTTTCAGATGCAGAAGATTTTTGCAGACATGCAGGGCTTCACCTTCAAGGGTGTGGAGATCCTGTCCCTGAGGTAGGAGACCCATCTGGGGATGTGGAGGCAGTGTTGGGTGGGGGAAATGTGCGCACACAAAAAACCCATTCCTTTCTTTTGTAATCATCAGATTTTATAAAGAGAGGGGTGGAGGAGGTACATAAGGAATCGCTCCCTGGGTATTTTTTTGGATCGTTTTCTGGGGCCATTTATCTGGAGGAGGGGTGGCACCTCCCTTCTTCAGCACACTGGAAAGAGAGAAGTTGCAGGGACATGTGGGAAGGTGGTGCCTGGATTGGTGACTTCGTCCCCCTCTGGCTGGCCCCTGCTCTACTGAGTGGGTCAGCATTAGAGAGAGAGAGAGAGAAAGAGAGAGAGGGAGAGAACGCGCGTCCAGGGGCGCCCGGTGGATGATGGCTTGATGCAACGAGAACGTCAGGCCAGATGTGGTGGCTCACACTTGCAATCCTAGCACTTTGGCAAGGTTAGGTGGGTGGATCACCTGAAACCAGTTCAAGACCAGCCTGGGCAACATAGTGAGAACCCATCCCTACAACAATAAAAATAGTAATAATAATAATAATAATAATAAAATGATTATCCAGGCGTGGTAGTGCACACCTGTAGTCCCAGATACTTGGGAGGCTGAGGAGAAAGGATCACTTTAGCCCAGGAGTTGGAGGCTGCAGTGAGCTACAATGATACCACTGCACTCCAGCCTGGGTGACAGCAAGACTTTGTCTCTATAAAACACACAGAGAGAGGAAGTCAATCATGTCAGTCATTCCTTGTCCTGCCTTCCCAGGCAGACCAAGTCAGGAATGCTGGCAGCCCCTTCTGAAAAGGATGCACGTGGCATCCCAACTCATGACCTCTGCCCTCTTTCCCCCTTCTGGTGCACTTTGGGTTGCTTCTGGAGGTGCCCCTCCAAGGACCCATATGTTCCTGGCTGGGGCACTCTCTAAGGCTGTGGACCCCTCAGGAATGGCAGCATCGTGGTGGACTACCTGGTCCTGCTGGAGATGCCCTTCAGCCCCCAGCTGGAGAGCGAGTATGAGCAGGTGAAGACCACGCTGAAGGAGGGGCTCCAGAACGCCAGCCAGGATGCGAACAGCTGCCAGGACTCCCAGAGTGAGCCCGGGCTGGAGGGAGGGGCCAGGGCCTGAGGTGTCACCCCAGCCCACTCCAGCTCAGCCAGGGGGCCACTGGACTCAGGTGCCAGCCCTGTGGTACCTCTGGCAGGTTGGGAGAACGGGAATAAGTCTACACACAATGCCATCAAGAGTGGGGCTAGGGAGGGTCTCCCCAGGACCTGGGTACTGGGGAAGAGACCCCCTGATCGTCAGGCTCAGCATTTCCCGATGGCTGAAGACCTCGGATTATTCAGGGGGGATAAGGGAGAGAACAGGAGTCTTCCCCTGTGGCCCCTCCACACTCCCCCAGACGGAGAGAGCCCTCACTGCCCTCCCTGTGCCTATCCTGCTTCCTGGCCCTAACCCCTTGACCTGCCCCGCCCATTCCATCTGTGCCTGTGTTTCCGCAGCCCTGTGTTTTAAGCCTGACTCCATCAAGGTGAACAACAACAGCAAGACAGAGCTGACCCCGGAAGGTGAGGGTGGGGTAAAGGGCTGAGTGGTCTCCCATGGCCATGACCCCTGCCACCAGGGACATTTGCCCATTGAAGCCTGTGGGCAGGGAGAGACCTTTGCGGGAGGCAAGTCATGTGGCCTAGGGAGGCTCTTCCTGGCGTTGGTTAGTGGCTTCCACCTGAGGACAGCAGGGGCCACGAGGAGAGGGTGAGGGTGCTGGGGGTGGCCTCCCCTCATCGAATCCCAGGGTCTACCCCACAGCATCCCACCTCGGAAATGGAATCCTCCTCGCGCATATTCAGAGGCACCATTATCAGGCCCCTGAATAGAATGGATGAGGTCCTTGTCTCTGTGCATACCCCTCCCCAACCCCCAGCCATCTGCCGCCGCGCCGCTCCCACGGGCTATGAAGAGTTCTACTTCCCTCTGGTGGAGGCCACCCGGCTCCGCTGTGTCACCAAATGCACGTCGGGCGTGGACAACGCCATCGACTGTCACCAGGGCCAGTGCGTTCTAGAGACGAGCGGTCCCGCGTGTCGGTAAGGCCCCGCTCACCATCAGCATCAGTCGAGCCCCGCCCACTCATTCTAGGATGAAGCCTAGCCTCACGCGCCGCCCCGGCTCTGCCCCCAGGCCCTACAGTGGAGCCTCGTCCCCAGAGTCCCGCTCCAAGCCCATCCCCGTTGCCCTACAGTGGAGCCCTGCCCTGGAGCTCTGCTCCGTCGCCCTAAAGTGTAGCCCCGCCTCCTTGATGGGGTTGAGTCCAATCCCCTGGTTCTGGGATAGACCCCGCCCACTCATTCTAGGGTGGGGCCCCGCCCCTTCGTTCTAGGGCTGAACCTTGCCCCCTTCTTCTGGGGTGGAGCCCCGCCCCCTTGTTCTAGGGTGGATCCCCGCCCCCTCCTTTTAGGGTGAAGCCCTGCCCACTTGATCTAAAGTGGAATCCCGCCCCCTCACCTAGGGTAGAGCCCCGCCCCCTCGTTCTAGGGTGGAGACCCGTCCGCTTGTTCTACGGTGGATTCCGGCCGCTTGTCTAGGGTGGAACCCCCCAGCTTGCCCTAGGGTGGAACCCCCCCGCTGCCCTAGGCTGGAGCCCCGCCCCCTCACCCGCCCCCGCGGGGCCCAGGTGCACGCGTGGACCCCGAGCCCGGAGGTGAAGAGGGTCTGACCCTGCGATCTCCCGCAGCTGCTACTCCACCGACACGCACTGGTTCTCTGGCCCGCGCTGCGAGGTGGCCGTCCACTGGAGGGCGCTGGTCGGGGGCCTGACGGCCGGCGCCGCGCTGCTGGTGCTGCTGCTGCTGGCGCTGGGCGTCCGGGCGGTGCGCTCCGGATGGTGGGGCGGCCAGCGCCGAGGCCGGTGAGCGTGCGGGGGGCGGGGCCGGGGGGCGAGGGCAGCCAAGGGGTCCCAGGCGGGCCGGCTCTGTCTGACCGCGCGGCGGCCCCACCTAGGTCCTGGGACCAGGACAGGAAATGGTTCGAGACCTGGGATGAGGAAGTCGTGGGCACTTTTTCAAACTGGGGTTTCGAGGACGACGGAACAGGTGAGTCCTGCCTCCTGGGGAAGCAGGCAGAGGCTTTCCTGGGCACCACTGCGAGGACAGACGCCCTCCTTGCCTTCCTCGCATTTACTCTGTCCCCCTCTCCCTTCCGTCCCCTCCCTCTCCCCTTCTATTTCTCCGCTCCTCTCTCTCTCTAGACAAGGATACAAATTTCCATGTGGCCTTGGAGAACGTGGACACCACTATGAAGGTGAGGGGCTAAAGAGGGGGACCCCAAGGAACTCTCCCAGCCTCCATTCCAGAATCCCTCCCCGACCCCCACCAGGGCAGGGAGGGGGCTGGGCTCGGATCAGCAGTGACCTCCCTGTCAGCCCAAACCAGTGGCTCCGCGTTCCCGTCCCTCACTGTGACTCTGACAGGTGCACATCAAGAGACCCGAGATGACCTCGTCCTCAGTGTGAGCCCTGCGGGGCCCCTTCACCACCCCCTCCGCCCTGCCCCGGACACAAGGGTCTGCATTGCGTCCATTTCAAGAGGTGGCCCCAGGACGCGGGCAGCCCAGGCTCCTGCTGTTCTTGGGCAAGATGAGACTGTTCCCCCAAATCCCATCCTTCTCCTTCCAACTTGGCTGAAACCCACCTGGAGACGCAGTTCACGTCCAGGCTCTTCCACTGTGGAATCTTGGGCAAGTCAGTAACGAGCCTCAGTTTCCTCACCTGCAAAACGGGTACAGCATTCCTGTATGATAGCTCACGCCGTCGTTGTGAAAACCACATAGACTTGGTCAATTCTCGGTCCTACTCTGCCCTCCCGTCTCAGCCCTCGTGTTGCCATTGCCTCTCTCGGATCCTCCAATCCTCACGTCCTTCACCTGGTCTCTGGCCCTGGTTCTTATTTTCTCTCAATTCCCTACTGCCTGTTTCTTACTTTGAACCTGGAGGCAGCCTGCAGCCCCATCCCATCTCCTGCCCTCTCCTGATCTAACTCCCTGCTGCATCTCTTGCTCTCATTCCTTAGACGTCCTCCCCTTTTGACCCCGTTCCTTCATCCATCCTGCACCCCAGTCCCCCAGCCCTAAATCCTCCCTCCTCTCCTCACATCCTGGTCCCTAGCAAGGTATAGATAGCCTCTGTGTCTTAGGATACCCCGGGTGCTGTTCCCTCGGTCACCCTGTTGCCCAGTTCCCCGTTTCTCTTGCTCTCATTCCTTGTATCCTTTCCCCTTTTGAGCCCGTCCATTCATCGGTTCTGCCCCCGACTCCCCCTGACCTAAATACCCCAGCTCCTAATTCCCCCCTCACCCCGTTGCTCAATTCCCCGTTTCTCTTGCTCTCATTCCTTGTATCTTCTCCCCTTCTGAGCCTGTCCATTCATCGGTGGTTCTGCCCCTACTCCCCCAGCCCTAAATACCCCAGCTGCTGTTCCTCCCCATCACCCAGCCACCGGATTCTCCATTCACCCCTTTCTCTCACCCCTGGAGCCCCGTGGGTGGGGGCAGGGCATGAGTTCCCCAGTCCCCAAGGAAAGGCAGCCCCCTCAGTCTCCCTCCTCCTCATTCCCTTCCATCTCCCTCCCCTCTGCCTTTTAAACCCATCCCCTCCGATTCCCCTCCTCCCCCCTCTCTCCCTGGTGTCAACTCGATTCCTGCGGTAACTCTGAGCCCTGAAATCCTCAGTCTCCTTGGCGGGGAAGATTGGCTTTGGGAACAGGAAGTCGGCACATCTCCAGGTCTCCATGTGCACAATATAGAGTTTATTGTAAAAAGC", + // 100547052 - 1); + // + // var codingRegion = new CodingRegion(100547257, 100610315, 206, 10177, 9972); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 100547052, 100547317, 1, 266), + // new TranscriptRegion(TranscriptRegionType.Intron, 1, 100547318, 100549480, 266, 267), + // new TranscriptRegion(TranscriptRegionType.Exon, 2, 100549481, 100550652, 267, 1438), + // new TranscriptRegion(TranscriptRegionType.Intron, 2, 100550653, 100550655, 1438, 5957), + // new TranscriptRegion(TranscriptRegionType.Exon, 3, 100550656, 100550784, 5957, 6085), + // new TranscriptRegion(TranscriptRegionType.Intron, 3, 100550785, 100550787, 6085, 6698), + // new TranscriptRegion(TranscriptRegionType.Exon, 4, 100550788, 100550814, 6698, 6724), + // new TranscriptRegion(TranscriptRegionType.Intron, 4, 100550815, 100550817, 6724, 7115), + // new TranscriptRegion(TranscriptRegionType.Exon, 5, 100550818, 100552774, 7115, 9071), + // new TranscriptRegion(TranscriptRegionType.Intron, 5, 100552775, 100552880, 9071, 9072), + // new TranscriptRegion(TranscriptRegionType.Exon, 6, 100552881, 100553066, 9072, 9257), + // new TranscriptRegion(TranscriptRegionType.Intron, 6, 100553067, 100554979, 9257, 9258), + // new TranscriptRegion(TranscriptRegionType.Exon, 7, 100554980, 100555095, 9258, 9373), + // new TranscriptRegion(TranscriptRegionType.Intron, 7, 100555096, 100555514, 9373, 9374), + // new TranscriptRegion(TranscriptRegionType.Exon, 8, 100555515, 100555579, 9374, 9438), + // new TranscriptRegion(TranscriptRegionType.Intron, 8, 100555580, 100607745, 9438, 9439), + // new TranscriptRegion(TranscriptRegionType.Exon, 9, 100607746, 100607894, 9439, 9587), + // new TranscriptRegion(TranscriptRegionType.Intron, 9, 100607895, 100608306, 9587, 9588), + // new TranscriptRegion(TranscriptRegionType.Exon, 10, 100608307, 100608372, 9588, 9653), + // new TranscriptRegion(TranscriptRegionType.Intron, 10, 100608373, 100608728, 9653, 9654), + // new TranscriptRegion(TranscriptRegionType.Exon, 11, 100608729, 100608891, 9654, 9816), + // new TranscriptRegion(TranscriptRegionType.Intron, 11, 100608892, 100609538, 9816, 9817), + // new TranscriptRegion(TranscriptRegionType.Exon, 12, 100609539, 100609712, 9817, 9990), + // new TranscriptRegion(TranscriptRegionType.Intron, 12, 100609713, 100609804, 9990, 9991), + // new TranscriptRegion(TranscriptRegionType.Exon, 13, 100609805, 100609896, 9991, 10082), + // new TranscriptRegion(TranscriptRegionType.Intron, 13, 100609897, 100610051, 10082, 10083), + // new TranscriptRegion(TranscriptRegionType.Exon, 14, 100610052, 100610104, 10083, 10135), + // new TranscriptRegion(TranscriptRegionType.Intron, 14, 100610105, 100610273, 10135, 10136), + // new TranscriptRegion(TranscriptRegionType.Exon, 15, 100610274, 100611004, 10136, 10866), + // new TranscriptRegion(TranscriptRegionType.Gap, 15, 100611005, 100611005, 10866, 10867), + // new TranscriptRegion(TranscriptRegionType.Exon, 15, 100611006, 100611075, 10867, 10936), + // new TranscriptRegion(TranscriptRegionType.Gap, 15, 100611076, 100611076, 10936, 10937), + // new TranscriptRegion(TranscriptRegionType.Exon, 15, 100611077, 100611169, 10937, 11029), + // new TranscriptRegion(TranscriptRegionType.Gap, 15, 100611170, 100611173, 11029, 11030), + // new TranscriptRegion(TranscriptRegionType.Exon, 15, 100611174, 100611176, 11030, 11032), + // new TranscriptRegion(TranscriptRegionType.Intron, 15, 100611177, 100611299, 11032, 11033), + // new TranscriptRegion(TranscriptRegionType.Exon, 16, 100611300, 100611307, 11033, 11040), + // new TranscriptRegion(TranscriptRegionType.Gap, 16, 100611308, 100611311, 11040, 11041), + // new TranscriptRegion(TranscriptRegionType.Exon, 16, 100611312, 100611428, 11041, 11157), + // new TranscriptRegion(TranscriptRegionType.Exon, 16, 100611429, 100611517, 11170, 11258), + // new TranscriptRegion(TranscriptRegionType.Exon, 16, 100611518, 100611619, 11260, 11361) + // }; + // + // var rnaEdits = new IRnaEdit[] + // { + // new RnaEdit(865, 865, "G"), + // new RnaEdit(1214, 1214, "A"), + // new RnaEdit(1439, 1438, + // "AGCACATCCACAACTGCCATCTCCTCACTTCCCCCTACCTCAGGTACTATGGTGACTTCCACAACCATGACCCCATCTTCTCTGAGTACAGACATCCCTTTCACAACACCAACAACTATCACCCACCATTCTGTGGGCTCTACCGGTTTCCTGACTACAGCAACAGACCTCACATCAACATTCACGGTTTCCAGTTCCTCAGCAATGTCCACGAGTGTCATTCCATCTTCCCCCAGCATTCAGAATACAGAAACCTCATCCCTTGTCAGCATGACCTCTGCCACTACTCCCAATGTGAGACCAACTTTTGTAAGTACACTCAGCACTCCTACAAGTTCCCTCCTGACGACCTTCCCAGCAACATATTCATTTTCATCTTCCATGTCTGCCAGCAGTGCTGGGACCACTCACACAGAGAGTATCTCCTCACCTCCAGCCAGCACCAGTACACTCCACACAACAGCTGAATCCACCCTGGCACCCACTACCACCACCTCATTCACAACTTCCACAACTATGGAACCACCTTCAACCACTGCAGCAACTACAGGAACAGGTCAGACCACCTTCACCAGCTCTACAGCCACATTTCCTGAGACCACCACACCGACTCCTACAACTGACATGTCCACAGAATCTCTCACAACAGCCATGACTTCTCCTCCCATCACTTCATCAGTCACTTCCACAAATACAGTGACTTCTATGACAACTACGACCTCTCCTCCCACAACCACCAATTCTTTTACATCACTGACCAGTATGCCTCTGTCTTCTACACCTGTCCCAAGCACAGAAGTAGTCACCAGTGGCACCATAAACACAATCCCTCCATCTATCTTGGTGACCACACTCCCCACTCCAAATGCTTCATCTATGACTACATCTGAGACCACCTATCCTAATTCTCCGACTGGTCCTGGTACAAACTCCACGACGGAAATCACCTATCCCACCACTATGACAGAGACATCATCCACTGCCACCTCTCTTCCACCCACCTCTCCCTTGGTCTCAACTGCAAAAACAGCCAAAACTCCTACCACAAACTTGGTAACCACCACCACCAAGACCACCTCACATAGTACCACCAGCTTCACTTCTTCAACCGTCTACTCCACAGCCAGCACATACACAACTGCCATCACCTCAGTTCCCACTACGTTGGGTACCATGGTAACTTCTACATCCATGATCTCATCTACTGTGAGTACAGGTATCCCTACCTCACAACCAACAACCATCACTCCCTCATCCGTGGGCATCAGTGGTTCATTACCTATGATGACAGACCTCACCTCAGTGTACACAGTCTCTAACATGTCTGCAAGGCCAACAACTGTCATTCCCTCATCTCCCACTGTCCAGAATACAGAAATCTCAATCTCTGTTAGCATGACGTCTGCTACCACTCCCAGTGGAGGACCAACTTTCACAAGTACTGAGAACACTCCAACAAGGTCCCTCCTGACAAGCTTTCCAATGACACATTCATTCTCTTCTTCTATGTCTGAAAGTAGTGCTGGGACCACTCACACAGAGAGTATCTCCTCACCTCGAGGCACCACCAGTACACTCCACACAACAGTTGAATCCACCCCATCACCCACTACCACCACCTCATTTACCACATCCACAATGATGGAACCACCTTCATCCACTGTATCAACTACAGGCAGAGGTCAGACCACCTTTCCCAGCTCTACAGCCACATTCCCTGAGACCACTACACTGACTCCTACAACTGACATTTCTACAGTATCTCTCACAACAGCCATGACTTCTCCTCCCCCCGTCAGTTCTTCAATCACTCCCACCAATACAATGACTTCTATGAGAACTACAACCTATTGGCCCACAGCCACTAATACATTATCACCACTCACCAGTAGCATTTTATCTTCTACACCTGTCCCAAGCACAGAAATGATCACCAGTCATACCACAAACACCACCCCTCTATCCACCTTGGTGACTACACTCCTCACTACCATCACCAGATCTACACCTACATCTGAGACCACCTACCCTACTTCTCCCACCAGCATTGTCTCAGACTCCACGACTGAAATCACCTATTCCACAAGTATAACAGGTACATTGTCCACTGCCACTACTCTCCCACCCACCTCTTCCTCTCTCCCAACCACAGAAACAGCCACGATGACTCCTACCACAACCTTGATAACCACCACCCCTAATACCACCTCCCTTAGTACCCCCAGCTTCACTTCTTCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCTCCTCAGCTTCCCCTACCTCAGGTACCATGGTAACTTCCACAACCATGACCCCATCTTCTCTGAGTACAGACACCCCTTCCACAACACCAACAACTATCACCTACCCTTCTGTGGGCTCTACCGGTTTCCTGACTACAGCAACAGACCTCACATCAACATTCACTGTTTCCAGTTCCTCAGCAATGTCCACAAGTGTCATTCCATCTTCCCCCAGCATCCAGAATACAGAAACCTCATCCCTTGTCAGCATGACCTCTGCCACCACTCCCAGTTTGAGACCAACTATCACAAGTACTGACAGCACTCTAACAAGTTCCCTCCTGACGACCTTCCCAAGTACATATTCATTTTCATCTTCCATGTCTGCCAGCAGTGCAGGGACCACTCACACAGAGACTATTTCCTCACTTCCAGCCAGCACCAATACAATCCACACAACAGCTGAATCCGCCCTGGCACCCACTACCACCACCTCATTCACCACATCCCCAACGATGGAACCACCTTCAACCACTGTAGCGACTACAGGCACAGGTCAGACCACCTTCCCCAGCTCTACAGCCACATTCCTTGAGACCACCACACTGACTCCTACAACTGACTTTTCTACAGAATCTCTCACAACAGCCATGACTTCTACTCCCCCCATCACTTCTTCAATCACTCCCACCGATACAATGACTTCTATGAGAACTACGACCTCTTGGCCCACAGCCACTAATACGTTATCACCACTCACCAGTAGCATTTTATCTTCTACACCTGTCCCAAGCACAGAGGTGACCACCAGTCATACCACAAACACCAATCCTGTATCCACGTTGGTGACTACACTCCCCATTACCATCACCAGGTCTACACTTACATCTGAGACCGCCTACCCTAGTTCTCCCACAAGCACTGTCACAGAGTCCACAACTGAAATCACCTATCCCACCACTATGACAGAGACATCATCTACTGCCACCTCTCTTCCACCCACCTCTTCCTTGGTCTCAACCGCAGAAACAGCCAAAACTCCTACCACAAACTTGGTAACCACCACCACCAAGACCACCTCACATAGTACCACCAGCTTCACTTCTTCAACCATCTACTCCACAGCCAGCACACCCACCACTGCCATCACCTCAGTTCCCACTACCTTGGGTACCATGGTGACTTCTACATCCATGATCCCATCTACTGTGAGTACAGGTATCCCTACCTCACAACCAACAACTATTACTCCCTCATCCGTGGGCATCAGTGGTTCATTACCTATGATGACAGACCTCACCTCAGTGTACACAGTCTCCAGCATGTCTGCAAGGCCAACAAGTGTCATTCCTTCATCTCCCACTGTCCAGAATACAGAAACCTCAATCTTTGTTAGCATGATGTCTGCTACCACTCCCAGTGGAGGACCAACTTTCACAAGTACTGAGAACACTCCAACAAGGTCCCTCCTGACAAGCTTTCCAGTGACACATTCATTTTCCTCTTCCATGTCTGCCAGCAGTGTAGGGACCACTCACACCCAGAGTATCTCCTCACCCCCAGCCATCACCAGTACACTCCACACAACAGCTGAATCCACCCCATCACCTACAACCACCATGTCATTCACAACATTTACAAAGATGGAAACACCTTCATCCACTGTAGCAACTACAGGCACAGGTCAGACTACATTCACCAGTTCAACAGCCACATCCCCTAAGACCACCACACTGACTCCTACCTCTGACATTTCCACAGGATCTTTCAAAACAGCCGTGAGTTCTACTCCCCCCATCACTTCTTCAATCACCTCCACATATACGGTGACTTCGATGACAACTACCACCCCTCTAGGGCCCACAGCCACTAATACGTTACCATCATTTACCAGTAGCGTTTCATCTTCTACGCCTGTCCCAAGTACAGAAGCGATCACCAGTGGTACCACAAACACCACCCCTCTATCTACATTGGTGACCACATTCTCCAATTCCGACACCAGTTCTACACCTACATCTGAGACCACCTACCCTACTTCTCTTACTAGTGCTCTCACAGATTCCACGACCAGAACCACCTATTCCACCAATATGACAGGTACATTGTCCACTGTGACCTCTCTTCGACCCACCTCTTCCTCTCTCCTCACCACAGTAACAGCCACAGTTCCAACAACAAACTTGGTAACCACGACCACCAAGATCACCTCACACAGTACTCCTAGCTTCACTTCTTCAATCGCAACCACCGAGACCCCC"), + // new RnaEdit(1520, 1520, "A"), + // new RnaEdit(1549, 1549, "C"), + // new RnaEdit(1568, 1567, + // "TCGATCACCACCACCGAGACCACATCCCACAATACTCCCAGCCTCACTTCTTCAATCACCACCACCAAGACCACCTCACACAGTACTCCCAGCTACACTTCTTTGATCACCACAACCACCACCACCTCACACAGTACTCCCAGCTTCACTTCTTCCATCACCACCACTGAGACCACATCCCACAATACTCCCAGCTTGACTTCTTCGATCACAACCACCGAGACCACATCCCATAGTACTCCCAGCTTCACTTCTTCGATCACCACCGAGACCACATCCCACAGTACTCCCAGCTTCACTTCATTGATCACCATCACCGAGATCACCTCACACAGTACTCTCAGCTACACTACCTCAATCACCACCACCGAGACCCCCTCACACAGTACTCTCAGCTTCACTTCTTCAATCACCACCACTGAGACCACCTCACACAGTACTCCCAGCTTCACTTCCTCAATCACCACCTCTGAGATGCCCTCACACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACTGAGAACGCCACACACAGTACTCCCAACTTCACTTCTTCAATCACCACCACCGAGACCACATCCCACAGTACTCCCAGCTTCACTTCTTTG"), + // new RnaEdit(1595, 1594, + // "AGGTGGGGGACCACCGAGACCACATCCTACAGTACTCCCAGCTTCACTTCTTCAAATACCATCACTGAGACCACCTCACACAGTACTCCCAGCTACATTACCTCAATCACCACCACCGAGACCCCCTCAAGCAGTACTCCCAGCTTCAGTTCTTCGATCACCACCACTGAGACCACATCCCACAGTACTCCCGGCTTCACTTCTTCAATCACCACCACTGAGACTACATCCCACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACTGAGACCACCTCACATGATACTCCCAGCTTCACTTCTTCAATCACCACCAGTGAGACCCCCTCACACAGTACTCCCAGCTCCACTTCTTTAATCACCACCACCAAGACCACCTCACACAGT"), + // new RnaEdit(1627, 1627, "G"), + // new RnaEdit(1650, 1650, "A"), + // new RnaEdit(1702, 1702, "C"), + // new RnaEdit(1957, 1957, "T"), + // new RnaEdit(2794, 2794, "T"), + // new RnaEdit(3028, 3028, "C"), + // new RnaEdit(3586, 3586, "C"), + // new RnaEdit(4024, 4024, "G"), + // new RnaEdit(4044, 4044, "T"), + // new RnaEdit(4131, 4131, "C"), + // new RnaEdit(4183, 4184, "CT"), + // new RnaEdit(4231, 4231, "G"), + // new RnaEdit(4273, 4273, "G"), + // new RnaEdit(4289, 4289, "A"), + // new RnaEdit(4580, 4580, "T"), + // new RnaEdit(4956, 4956, "T"), + // new RnaEdit(5344, 5344, "A"), + // new RnaEdit(5347, 5347, ""), + // new RnaEdit(5356, 5356, "T"), + // new RnaEdit(5358, 5358, "C"), + // new RnaEdit(5360, 5360, "C"), + // new RnaEdit(5368, 5368, "C"), + // new RnaEdit(5411, 5411, "T"), + // new RnaEdit(5413, 5413, "C"), + // new RnaEdit(5418, 5418, ""), + // new RnaEdit(5421, 5421, "C"), + // new RnaEdit(5480, 5480, "G"), + // new RnaEdit(5483, 5484, "GT"), + // new RnaEdit(5486, 5486, "C"), + // new RnaEdit(5492, 5492, "A"), + // new RnaEdit(5499, 5499, "T"), + // new RnaEdit(5501, 5501, "C"), + // new RnaEdit(5505, 5505, "C"), + // new RnaEdit(5512, 5515, ""), + // new RnaEdit(5527, 5530, ""), + // new RnaEdit(5556, 5556, "T"), + // new RnaEdit(5558, 5558, "C"), + // new RnaEdit(5630, 5630, "T"), + // new RnaEdit(5636, 5636, "G"), + // new RnaEdit(5640, 5640, "C"), + // new RnaEdit(5648, 5647, "TTCTTGCCTCCC"), + // new RnaEdit(5691, 5691, "T"), + // new RnaEdit(5697, 5697, "T"), + // new RnaEdit(5708, 5708, "C"), + // new RnaEdit(5711, 5711, "G"), + // new RnaEdit(5721, 5721, "A"), + // new RnaEdit(5737, 5736, "T"), + // new RnaEdit(5750, 5750, "G"), + // new RnaEdit(5777, 5777, "G"), + // new RnaEdit(5805, 5805, "T"), + // new RnaEdit(5819, 5819, "C") + // }; + // + // const byte startExonPhase = 0; + // const bool onReverseStrand = false; + // var codingSequence = new CodingSequence(genomicSeq, codingRegion, regions, onReverseStrand, startExonPhase, + // rnaEdits); + // + // var expectedCodingSeq = + // "ATGCAGCTGTTGGGGCTCCTCGGCCTCCTCTGGATGCTCAAGGCCTCCCCGTGGGCCACAGGAACTTTATCCACGGCCACATCCATCTCTCAAGTGCCTTTCCCCAGAGCAGAAGCAGCCAGCGCTGTGCTCAGCAATTCTCCACACTCCAGAGACCTGGCTGGGTGGCCACTTGGTGTCCCCCAGCTCGCCTCTCCTGCTCCTGGCCACAGGGAAAATGCACCTATGACACTCACTACCTCCCCCCATGACACACTCATCTCTGAAACATTGCTCAACTCTCCAGTCAGTTCCAACACCTCAACCACCCCGACGTCCAAGTTTGCCTTCAAGGTTGAAACCACTCCACCCACCGTGTTGGTCTATTCAGCCACCACTGAGTGCGTGTATCCAACGAGCTTTATAATCACCATCTCCCACCCCACCTCCATCTGTGTGACCACGACGCAGGTGGCCTTCACCAGCTCTTACACCTCGACTCCCGTGACACAGAAGCCAGTGACCACCGTCACCAGTACTTACTCTATGACCACTACTGAGAAAGGAACGTCAGCCATGACATCTTCTCCCTCTACCACCACTGCAAGGGAAACTCCCATAGTGACAGTGACACCCTCCTCTGTGTCAGCCACAGACACAACCTTCCACACTACAATCTCGTCTACAACTAGAACCACAGAAAGGACTCCCCTGCCCACTGGAAGCATCCATACAACCACGTCCCCAACCCCAGTATTTACTACTCTCAAAACAGCAGTGACTTCCACTTCCCCCATCACTTCTTCAATCACTTCCACAAATACAGTGACTTCTATGACAACGACCGCCTCCCAGCCCACAGCCACTAATACATTGTCATCACCCACTAGGACCATTTTATCTTCCACACCTGTCCTGAGCACAGAAACAATCACCAGTGGTATCACAAACACCACCCCCCTATCCACCTTGGTGACCACACTCCCCACTACCATCAGCAGGTCTACACCTACATCTGAGACCACCTACACTACTTCTCCCACCAGCACTGTCACAGACTCCACTACCAAAATCGCCTACTCCACAAGTATGACAGGTACATTGTCCACAGAGACTTCTCTCCCACCCACCTCTTCCTCTCTCCCAACCACAGAAACAGCCACGACTCCTATGACAAACTTGGTAACCACCACCACTGAGATCTCCTCCCACAGTACTCCCAGCTTCTCTTCATCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCTCCTCACTTCCCCCTACCTCAGGTACTATGGTGACTTCCACAACCATGACCCCATCTTCTCTGAGTACAGACATCCCTTTCACAACACCAACAACTATCACCCACCATTCTGTGGGCTCTACCGGTTTCCTGACTACAGCAACAGACCTCACATCAACATTCACGGTTTCCAGTTCCTCAGCAATGTCCACGAGTGTCATTCCATCTTCCCCCAGCATTCAGAATACAGAAACCTCATCCCTTGTCAGCATGACCTCTGCCACTACTCCCAATGTGAGACCAACTTTTGTAAGTACACTCAGCACTCCTACAAGTTCCCTCCTGACGACCTTCCCAGCAACATATTCATTTTCATCTTCCATGTCTGCCAGCAGTGCTGGGACCACTCACACAGAGAGTATCTCCTCACCTCCAGCCAGCACCAGTACACTCCACACAACAGCTGAATCCACCCTGGCACCCACTACCACCACCTCATTCACAACTTCCACAACTATGGAACCACCTTCAACCACTGCAGCAACTACAGGAACAGGTCAGACCACCTTCACCAGCTCTACAGCCACATTTCCTGAGACCACCACACCGACTCCTACAACTGACATGTCCACAGAATCTCTCACAACAGCCATGACTTCTCCTCCCATCACTTCATCAGTCACTTCCACAAATACAGTGACTTCTATGACAACTACGACCTCTCCTCCCACAACCACCAATTCTTTTACATCACTGACCAGTATGCCTCTGTCTTCTACACCTGTCCCAAGCACAGAAGTAGTCACCAGTGGCACCATAAACACAATCCCTCCATCTATCTTGGTGACCACACTCCCCACTCCAAATGCTTCATCTATGACTACATCTGAGACCACCTATCCTAATTCTCCGACTGGTCCTGGTACAAACTCCACGACGGAAATCACCTATCCCACCACTATGACAGAGACATCATCCACTGCCACCTCTCTTCCACCCACCTCTCCCTTGGTCTCAACTGCAAAAACAGCCAAAACTCCTACCACAAACTTGGTAACCACCACCACCAAGACCACCTCACATAGTACCACCAGCTTCACTTCTTCAACCGTCTACTCCACAGCCAGCACATACACAACTGCCATCACCTCAGTTCCCACTACGTTGGGTACCATGGTAACTTCTACATCCATGATCTCATCTACTGTGAGTACAGGTATCCCTACCTCACAACCAACAACCATCACTCCCTCATCCGTGGGCATCAGTGGTTCATTACCTATGATGACAGACCTCACCTCAGTGTACACAGTCTCTAACATGTCTGCAAGGCCAACAACTGTCATTCCCTCATCTCCCACTGTCCAGAATACAGAAATCTCAATCTCTGTTAGCATGACGTCTGCTACCACTCCCAGTGGAGGACCAACTTTCACAAGTACTGAGAACACTCCAACAAGGTCCCTCCTGACAAGCTTTCCAATGACACATTCATTCTCTTCTTCTATGTCTGAAAGTAGTGCTGGGACCACTCACACAGAGAGTATCTCCTCACCTCGAGGCACCACCAGTACACTCCACACAACAGTTGAATCCACCCCATCACCCACTACCACCACCTCATTTACCACATCCACAATGATGGAACCACCTTCATCCACTGTATCAACTACAGGCAGAGGTCAGACCACCTTTCCCAGCTCTACAGCCACATTCCCTGAGACCACTACACTGACTCCTACAACTGACATTTCTACAGTATCTCTCACAACAGCCATGACTTCTCCTCCCCCCGTCAGTTCTTCAATCACTCCCACCAATACAATGACTTCTATGAGAACTACAACCTATTGGCCCACAGCCACTAATACATTATCACCACTCACCAGTAGCATTTTATCTTCTACACCTGTCCCAAGCACAGAAATGATCACCAGTCATACCACAAACACCACCCCTCTATCCACCTTGGTGACTACACTCCTCACTACCATCACCAGATCTACACCTACATCTGAGACCACCTACCCTACTTCTCCCACCAGCATTGTCTCAGACTCCACGACTGAAATCACCTATTCCACAAGTATAACAGGTACATTGTCCACTGCCACTACTCTCCCACCCACCTCTTCCTCTCTCCCAACCACAGAAACAGCCACGATGACTCCTACCACAACCTTGATAACCACCACCCCTAATACCACCTCCCTTAGTACCCCCAGCTTCACTTCTTCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCTCCTCAGCTTCCCCTACCTCAGGTACCATGGTAACTTCCACAACCATGACCCCATCTTCTCTGAGTACAGACACCCCTTCCACAACACCAACAACTATCACCTACCCTTCTGTGGGCTCTACCGGTTTCCTGACTACAGCAACAGACCTCACATCAACATTCACTGTTTCCAGTTCCTCAGCAATGTCCACAAGTGTCATTCCATCTTCCCCCAGCATCCAGAATACAGAAACCTCATCCCTTGTCAGCATGACCTCTGCCACCACTCCCAGTTTGAGACCAACTATCACAAGTACTGACAGCACTCTAACAAGTTCCCTCCTGACGACCTTCCCAAGTACATATTCATTTTCATCTTCCATGTCTGCCAGCAGTGCAGGGACCACTCACACAGAGACTATTTCCTCACTTCCAGCCAGCACCAATACAATCCACACAACAGCTGAATCCGCCCTGGCACCCACTACCACCACCTCATTCACCACATCCCCAACGATGGAACCACCTTCAACCACTGTAGCGACTACAGGCACAGGTCAGACCACCTTCCCCAGCTCTACAGCCACATTCCTTGAGACCACCACACTGACTCCTACAACTGACTTTTCTACAGAATCTCTCACAACAGCCATGACTTCTACTCCCCCCATCACTTCTTCAATCACTCCCACCGATACAATGACTTCTATGAGAACTACGACCTCTTGGCCCACAGCCACTAATACGTTATCACCACTCACCAGTAGCATTTTATCTTCTACACCTGTCCCAAGCACAGAGGTGACCACCAGTCATACCACAAACACCAATCCTGTATCCACGTTGGTGACTACACTCCCCATTACCATCACCAGGTCTACACTTACATCTGAGACCGCCTACCCTAGTTCTCCCACAAGCACTGTCACAGAGTCCACAACTGAAATCACCTATCCCACCACTATGACAGAGACATCATCTACTGCCACCTCTCTTCCACCCACCTCTTCCTTGGTCTCAACCGCAGAAACAGCCAAAACTCCTACCACAAACTTGGTAACCACCACCACCAAGACCACCTCACATAGTACCACCAGCTTCACTTCTTCAACCATCTACTCCACAGCCAGCACACCCACCACTGCCATCACCTCAGTTCCCACTACCTTGGGTACCATGGTGACTTCTACATCCATGATCCCATCTACTGTGAGTACAGGTATCCCTACCTCACAACCAACAACTATTACTCCCTCATCCGTGGGCATCAGTGGTTCATTACCTATGATGACAGACCTCACCTCAGTGTACACAGTCTCCAGCATGTCTGCAAGGCCAACAAGTGTCATTCCTTCATCTCCCACTGTCCAGAATACAGAAACCTCAATCTTTGTTAGCATGATGTCTGCTACCACTCCCAGTGGAGGACCAACTTTCACAAGTACTGAGAACACTCCAACAAGGTCCCTCCTGACAAGCTTTCCAGTGACACATTCATTTTCCTCTTCCATGTCTGCCAGCAGTGTAGGGACCACTCACACCCAGAGTATCTCCTCACCCCCAGCCATCACCAGTACACTCCACACAACAGCTGAATCCACCCCATCACCTACAACCACCATGTCATTCACAACATTTACAAAGATGGAAACACCTTCATCCACTGTAGCAACTACAGGCACAGGTCAGACTACATTCACCAGTTCAACAGCCACATCCCCTAAGACCACCACACTGACTCCTACCTCTGACATTTCCACAGGATCTTTCAAAACAGCCGTGAGTTCTACTCCCCCCATCACTTCTTCAATCACCTCCACATATACGGTGACTTCGATGACAACTACCACCCCTCTAGGGCCCACAGCCACTAATACGTTACCATCATTTACCAGTAGCGTTTCATCTTCTACGCCTGTCCCAAGTACAGAAGCGATCACCAGTGGTACCACAAACACCACCCCTCTATCTACATTGGTGACCACATTCTCCAATTCCGACACCAGTTCTACACCTACATCTGAGACCACCTACCCTACTTCTCTTACTAGTGCTCTCACAGATTCCACGACCAGAACCACCTATTCCACCAATATGACAGGTACATTGTCCACTGTGACCTCTCTTCGACCCACCTCTTCCTCTCTCCTCACCACAGTAACAGCCACAGTTCCAACAACAAACTTGGTAACCACGACCACCAAGATCACCTCACACAGTACTCCTAGCTTCACTTCTTCAATCGCAACCACCGAGACCCCCTCACACAGTACTCCCAGATTCACTTCTTCAATCACCACTACCGAGACCCCCTCACACAGTACTCCCAGATTCACTTCTTCAATCACCAATACCAAGACCACCTCACACAGCTCTCCCAGCTTCACTTCTTCGATCACCACCACCGAGACCACATCCCACAATACTCCCAGCCTCACTTCTTCAATCACCACCACCAAGACCACCTCACACAGTACTCCCAGCTACACTTCTTTGATCACCACAACCACCACCACCTCACACAGTACTCCCAGCTTCACTTCTTCCATCACCACCACTGAGACCACATCCCACAATACTCCCAGCTTGACTTCTTCGATCACAACCACCGAGACCACATCCCATAGTACTCCCAGCTTCACTTCTTCGATCACCACCGAGACCACATCCCACAGTACTCCCAGCTTCACTTCATTGATCACCATCACCGAGATCACCTCACACAGTACTCTCAGCTACACTACCTCAATCACCACCACCGAGACCCCCTCACACAGTACTCTCAGCTTCACTTCTTCAATCACCACCACTGAGACCACCTCACACAGTACTCCCAGCTTCACTTCCTCAATCACCACCTCTGAGATGCCCTCACACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACTGAGAACGCCACACACAGTACTCCCAACTTCACTTCTTCAATCACCACCACCGAGACCACATCCCACAGTACTCCCAGCTTCACTTCTTTGATCACCACCACGGAGACCACCTCACACAGGTGGGGGACCACCGAGACCACATCCTACAGTACTCCCAGCTTCACTTCTTCAAATACCATCACTGAGACCACCTCACACAGTACTCCCAGCTACATTACCTCAATCACCACCACCGAGACCCCCTCAAGCAGTACTCCCAGCTTCAGTTCTTCGATCACCACCACTGAGACCACATCCCACAGTACTCCCGGCTTCACTTCTTCAATCACCACCACTGAGACTACATCCCACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACTGAGACCACCTCACATGATACTCCCAGCTTCACTTCTTCAATCACCACCAGTGAGACCCCCTCACACAGTACTCCCAGCTCCACTTCTTTAATCACCACCACCAAGACCACCTCACACAGTACTCCCAGCTTCACTTCTTCGATCACCACCACGGAGACCACCTCACACAGTGCTCACAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAATACTCGCAGCTTCACTTCTTCGATCACCACCACCGAGACCAACTCTCACAGTACTACCAGCTTCACTTCTTCGATCACCACCACCGAGACCACCTCACACAGTACTCCCAGCTTCAGTTCTTCAATCACCACCACTGAGACCCCCTTACACAGTACTCCTGGCCTCACTTCGTGGGTCACCACCACCAAGACCACCTCACACATTACTCCTGGCCTCACTTCTTCAATCACCACCACTGAGACTACCTCACACAGTACTCCTGGCTTCACTTCTTCAATCACCACCACTGAGACCACCTCAGAGAGTACTCCCAGCCTCAGTTCTTCAACCATCTACTCCACAGTCAGCACATCCACAACTGCCATCACCTCACATTTTACTACCTCAGAGACTGCGGTGACTCCCACACCTGTAACCCCATCTTCTCTGAGTACAGACATCCCGACCACAAGCCTACGAACTCTCACCCCTTCGTCTGTGGGCACCAGCACTTCATTGACTACAACCACAGACTTTCCCTCTATACCCACTGATATCAGTACCTTACCAACTCGAACACACATCATTTCATCTTCTCCCTCCATCCAAAGTACAGAAACCTCATCCCTTGTGGGCACCACCTCTCCCACCATGTCCACTGTGAGAATGACCCTCAGAATTACTGAGAACACCCCAATCAGTTCCTTTAGCACAAGTATTGTTGTTATACCTGAAACCCCAACACAGACCCCTCCTGTACTGACGTCAGCCACTGGGACCCAAACATCTCCTGCACCTACTACTGTCACCTTTGGAAGTACGGATTCCTCCACGTCCACTCTTCATACTCTTACTCCATCAACAGCCTTGAGCACGATCGTGTCAACATCACAGGTTCCTATTCCTAGCACACATTCCTCCACCCTTCAAACAACTCCTTCTACTCCCTCATTGCAAACTTCACTCACATCTACAAGTGAGTTCACTACAGAATCTTTCACTAGGGGAAGTACGTCTACAAATGCAATCTTGACTTCTTTTAGTACCATCATCTGGTCCTCAACACCCACTATTATCATGTCCTCTTCTCCATCTTCTGCCAGCATAACTCCAGTGTTTTCCACTACCATTCATTCTGTTCCTTCTTCACCATACATTTTCAGTACAGAAAATGTGGGCTCCGCTTCTATCACAGGCTTTCCTAGTCTCTCTTCCTCTGCAACTACCAGCACTTCTTCAACCAGCTCCTCTCTGACCACAGCTCTCACTGAAATAACCCCCTTTTCTTATATTTCCCTTCCCTCCACCACACCCTGTCCAGGAACTATAACAATTACCATAGTCCCTGCCTCCCCCACTGATCCATGTGTTGAAATGGATCCCAGCACTGAAGCTACTTCTCCTCCCACCACCCCATTAACAGTCTTTCCCTTTACTACCGAAATGGTCACCTGTCCTACCTCCATCAGTATCCAAACTACTCTTACTACATATATGGACACTTCTTCCATGATGCCAGAAAGTGAGTCCAGCATCTCACCCAATGCTTCCAGTTCCACTGGCACTGGGACTGTACCCACAAACACAGTTTTCACAAGTACTCGACTGCCCACCAGTGAGACCTGGCTGAGCAACAGTTCTGTGATCCCCCTACCTCTTCCTGGCGTCTCTACCATCCCGCTCACCATGAAACCAAGCAGTAGCCTCCCGACCATCCTGAGGACTTCAAGCAAGTCAACACACCCCTCCCCACCCACCACTAGGACTTCAGAGACACCAGTGGCCACTACCCAGACTCCTACCACCCTTACATCACGCAGGACAACTCGCATCACTTCTCAGATGACCACACAGTCCACGTTGACCACCACTGCAGGCACCTGTGACAATGGTGGCACCTGGGAACAGGGCCAGTGTGCTTGCCTTCCGGGGTTTTCTGGGGACCGCTGTCAGCTCCAGACCAGATGCCAGAATGGGGGTCAGTGGGATGGCCTCAAATGCCAGTGCCCCAGCACCTTCTATGGTTCCAGTTGTGAGTTTGCTGTGGAACAGGTGGATCTAGATGTAGTGGAGACCGAGGTGGGCATGGAAGTGTCTGTGGATCAGCAGTTCTCGCCGGACCTCAATGACAACACTTCCCAGGCCTACAGGGATTTCAACAAGACCTTCTGGAATCAGATGCAGAAGATTTTTGCAGACATGCAGGGCTTCACCTTCAAGGGTGTGGAGATCCTGTCCCTGAGGAATGGCAGCATCGTGGTGGACTACCTGGTCCTGCTGGAGATGCCCTTCAGCCCCCAGCTGGAGAGCGAGTATGAGCAGGTGAAGACCACGCTGAAGGAGGGGCTGCAGAACGCCAGCCAGGATGTGAACAGCTGCCAGGACTCCCAGACCCTGTGTTTTAAGCCTGACTCCATCAAGGTGAACAACAACAGCAAGACAGAGCTGACCCCGGCAGCCATCTGCCGCCGCGCCGCTCCCACGGGCTATGAAGAGTTCTACTTCCCCTTGGTGGAGGCCACCCGGCTCCGCTGTGTCACCAAATGCACGTCGGGGGTGGACAACGCCATCGACTGTCACCAGGGCCAGTGCGTTCTGGAGACGAGCGGTCCCACGTGTCGCTGCTACTCCACCGACACGCACTGGTTCTCTGGCCCGCGCTGCGAGGTGGCCGTCCACTGGAGGGCGCTGGTCGGGGGCCTGACGGCCGGCGCCGCGCTGCTGGTGCTGCTGCTGCTGGCGCTGGGCGTCCGGGCGGTGCGCTCCGGATGGTGGGGCGGCCAGCGCCGAGGCCGGTCCTGGGACCAGGACAGGAAATGGTTCGAGACCTGGGATGAGGAAGTCGTGGGCACTTTTTCAAACTGGGGTTTCGAGGACGACGGAACAGACAAGGATACAAATTTCTATGTGGCCTTGGAGAACGTGGACACCACTATGAAGGTGCACATCAAGAGACCCGAGATGACCTCGTCCTCAGTGTGA"; + // + // Assert.Equal(expectedCodingSeq, codingSequence.GetCodingSequence()); + // } + // + // [Fact] + // public void GetCodingSequence_RnaEditInsertion_StartsCds_EndsUtr() + // { + // // NM_001220765.1, chr7:50344378-50367353 + // var genomicSeq = new SimpleSequence( + // "CGCGGCGCATCCCAGCCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCACCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACGTGAGTGTTTTCAAATTGAATTTCAATAGGAAAACTTGGGGTAACTGGTGAATTTAAAAAAAAAAAAACACAGTAAAGAAAAGCGGTAAGGTTGGTAGACCCTGGTGTCGCTCAGGTCCGCCTCTCTTTTCTGAGGACAGTGAGAGAGTTCACTTCTGTCAAGCGTCTGTTGCTCTGCACTGTGCCAGCAGGTGCAGGACCAGGCCGACATGGGACACTTCTGAGCAGCCCCGCTGTCACCAGGAGAGGAGTTCTAGCTCCCAACCATATTTAAATTTATGTAGACCTACATATACCCACGGAAGTCAGCCTTTATAAAGTCGTGTGTAAAGAGTTTTCCTTATATTTGAGCCGGGAGCTTTCTTTTTATACTATAAATATGATGAGATCGAGTCTGAACTTAATTTCTGCAAGAGAGGAATTATCCCGGCTTTGAAAAGTTAGTCCTTTTGCTGACCGCAGGTTTGACGCTCAAGTCACCAAACCTTCTCAGGAAAACCCTTAGTAATATTAAGGCATCAGGTTACTTGCGGTTATATTTGAAATGTATTTTAAATATTTGTCAAGCATCGCTGCTGATGCCTAAGGAACCTCGTGAGGGCTTGTTTTTCCTTCTAATTTGGAGGCATCTAATGACCGAAAACCGTAGCGATTCCATAGGGTCTGACCAGGCACAGCTTTCAAATGCAGCTTCCCTCTCTCTAGGGACTGCAGCCCACCCAGACTGAATTTCAATGCGGTGCGCTTTGCTTAGGTTACCCACTCACAATTTCCCACTGCGCCGCAGGCAGTATATTTCAGCTTTGAGATACCTTGTTTTAAAATTCCAGACAAAATGGTGTTGAGGAAATGTCTCCTTACTAGTCCCATCAACTTCTGTTAAAAGAGGAAAATTTATGGAATTTGAAAATACTGCGTATGATATTTAAACTTTCATAGACATTCAAATGCTTTTAAGGCCAGGTTCAATTTGGTTATGAGTCGAGGGGTGGGGGGGACCCACATAGAAATGTCCTGGGTCCTCTTGAGTTTATTTCTTTGTTTGAAGATGTTTGTTCAATGAGTTTTATTGTACTCATCTTTTATATGGAATTTTAAAAAGTAACAATTTCAGTATTATTTATATTAGAATGTGTCAGAATTATTTCCGTGACAAATCAGATCATTTGGGCTATGGCTTAAAATGTACACGAGGCAAATATTCATGACAAGAAGATTCACCTTCTTACGCTGGCATCTTGTAAAATGCAGAACAAGTTAAAGAAATAATGTGTACACATACAAATAATGATGTCACATTAAAAATACTACACTATTCTTGCTTGATGGAATGTATCTGATTTCCAATTTCACCATGAACATATTTCATACATTTTTTACATGAAAAAAAACGTGACTCTTAAGTCTCACAGTCAATCAGAGCTGGTGACCAGAACATTTTATTGAACTAAATGGTCATGTTTTCTTCCCCTTTTGTTTCACGGTGAGAGTTGAAGGAAGGAGTTTAGAAACTCTCCAGTACTTGTTTAATTCATCAGTGTTCTAATTAGAGTGGTACCTCTTGGAAAACTACACACCCCCCTAATGCAGAAACATCATAGCAATAATCACCCACCCTCAGGGTCTCCAGGAGACCACAAGGGCTGCAGATAAAAGTCTGGATGTGTTAGGTTTGACCCTTTCGAAGAGTTTTACACAGGCTCCTAAAGAGAAGATCAGCTGTGGCCGTTTGTAGCCATTTCCTTTGTCGAAAAACTAAGATCGCAGTGAATGTATTAGCCAAGAGGTCTAAAGCCCTGTTGTACTGCAGGCCACTGTCTTCCTTGTTTGACTAGAGACTTGGAGTTTGAGAACAGTGGTTCTTTGGTTTGGATACATTTTTTGTTCTTGATTTGGATGTGTGTGTTTCATGCGTGGTTAATATAGCATATTTTCAATATAAATGTCAAAAATTTTGAAATAGGAAAGAACTCTCTATATATTAATGTACTTATACACACACTTCAAGATTATGCATTTATTAACAGATACATGAAATAAATTCCATGTGCATATGCACATATGCACACAGAGCGTGCACACACACAGCATGCACACAGCGTGGAGTGAGAGGCATGGGGCAGTGTGGAAGAGTTTTAACATCAAACAGACCTGAAATGAGTATTAAAGGCCCCCTTTATTTTTAAACTTTTACTAAAACAAGATGGATTTCCCTATGTTATATAATGGTGAATTTTAGGCATAAATAACGTTTTTTGAGTGTTGCATAATTGTACGTATTAATGTAATGTAACTGTGGTTAACGAAGAATTCATCAAGGATATCACTGTTTTGTGGCATTTTTTTTTTCCTCCTCTAATCTTTGGACTTGTGAAATAATTTCACTATGAAATAAATGTTGGTTCTTGTCATATTCTAAGGGAGATTGATGTAAGTGGCTCCACTCCAGCTTACAGAAGGTAAACCACGACCTTTTTGCGTTCTCTGAAAACGCTTGTCTTCCGATGCCTCTGTTTCTAAGACTGACAAGCACTCTGGGGGCACTGTGACGCCTGCTTCTAGCGGCAGAGTTGCTGCAGCTCCTGTCCTGGCTGTGAACATTGTTCTCTCTCTGGTGTCTCTATGTTCATAACTACAGAGACTTCAGCTCTATTCCATTTCATATTTGTGCTGAATAATCATTCCATTTTATGGGAGAAAACACAAGATGTAAAAGCAACAAGTGACCCATCCTTTGAAGCTTACAAGAAGAGAAACATTAATCTATTTCACGTCTTGAAAACAGATCAGTTTTATTTTGCTCAAAAAGGGCACATGTACATTTTTGATCTAGGTCTTAGAAACGTAGAGTTTCAGAGGATCAGCATTATACACACTGTCACACACACACACACTTAAAATTCAGATGAGGAACAAGATAGGAATGAGGTTTTGTTAGGGACGCAGAGCACCTAAAACCAAAGGATATCGACAGTAACAAAGCTGTTTTTACTGTAGTGCTGACTGAACACTCATGCTGGTGTCTTCATGTGGACCATGGCTTTCTTGTATTTCTTTGCAGTTTAATAAATGACTTCATATCTCAGGTTACCTTTCCACATCTCCTGGAATATATGTTTATGTCCTTAAAGTTTCAGTGTCGTCACTTTAGTAGCTTTAGTTTGAGTTTTTAAATGTTTGGTAATATTCCAACAAATATTTTTTAAGACATTATGAAACCTTATGAAGTGCCATATATTACAAGTGAGATAAAACAGCAAGCAAAAGAAGGTTTGCAGAAGGTTTTTAAGTGGCGAAGTGCGGGCCTGCCCATTTTGGTGTCTCCTTGGTGGTTACTCCTGAGAAGGGCCTGGAGGAAGAGCAACTGAGGCCTAATCTACAGGCAACTGCCAAATTGTTTCAGTTGACGTTTTTCCCTCTCATGTTTGACTATAATAAATAGGTAGTTGCCAGTGGAGCCTTCAGCCAACCACCTGGTAATAAACTGTTAAAAATGGTGCAAACCCTAGGTCACAGGTGTGGGGGCCATTTGTCTTGCCTGTTAACAGGCCTGGCCTTAATTCTTTTCTCCCATGGCCATTTCTGCCTTTGGGGAACTCACAATTCCTGTTGACTAAAAGAGCACCCTTTTCCACCACAAGCCTGACAAATCAGACGTCCACATAATTTCTGAACTCGTTTTGGTTAGGACAGGAAGCACAGGCTCCCTTCCTGTCTGTGTTTTCCTAAGAGAAAACGGTCTTCCCTCCTTTTTTGCATATTTGGCAAGTGGTTCCACCTTTCTCTGCACCCTGGTGGAGTGTGAAGGCAGCAGAGGAACCTTTTGGAGGAGGAAGAGGACACAGAGGCCCTGTAGCCAGGCACCAAGATCCCTCCCAGGTGGCTGGGTCTGAGGGGAACTCCGAGCAGCCCTAGGTCCTCAAAGTCTGGATTTGTGTGGAAAAGGCAGCTCTCACTTGGCCTTGGCGAGGCCTCGGTTGGTTGGTGAGTGCCACACGGTTTCTTTGTGTGCTTGCATGGATTGGAATAGCCATTGTGTTCTTCCGTCTTCCCTGCTGGTGTTTCCACAGTGGGTGGCCTGAGCCCAGAGCAGCTCCCCATATCCCTGTGCAGGCCACCTGTCTCGGGTGATGGAGAGCATCATTATGCTCCGTCTGAACGCTCTGCTTTCGGATGGCCCCATGCTCCACCTCCTGATAGCTCGTGGCGCGGGGCCACGGCTTAACAAATGGCTGAAAATGGGTCCTAATTAGTGGAAAAGTGCTTTCTTCATATTTTCTCACTCGAGTGTGCAGTGATTCATTTTTCTTCTGCAATCAGCTCACTGCTAAAGTAAATCTGACTCTCTTCCCGCCATTGCACACCAAAAGTTAACTCTAATGGGTAGGAGGTTAGGTTTGTTGAGAGAGCAATGCAGTAAAAAGAGGGGATCCAATGTGGTCTTGTCTGTCTGGTCTTCCTTTCTTCGTTTTTTCCTCCCTTGTCTTCTCTGTCATTCCCTTCCCTCCATTTGCCTTGCCTTTCCTGTCCTTCCCTTCCCTTCCTTCCCCTCTTTCTTTCTATAATTGGTGGGGGGTTTGCACAGACTGCCAAAACACTAAGAACTGTGTAAAGTGTTTTTGAATGGCCTTACACATATTGAAGTAGATTTTTATGCTCCATTTTTGAGATCACACACTAAAATCTATACCTTTAAAGCATTTTCTGTTAGTTTGAAACTATTTGAAAATGAACAATGTGGTTTAGATTAGAGTCCTGTTCTGAAGCTAGGAGTTCCACTATGAATATTGATTTATCAGTTTTTGACAAATTTTTGTTGTTATACCAGATTTTCACTGGCAAACCTAGAGCAAATAAAATTCCACATAAGATACTTCCCTAGACCTAATGGGAAAAATGTTTAATTTAGAGTCTTTAGGAGAAATGAGAATGAGGAATTGACCTTTTGTAAGCTTACTTCTGAGGCACTCTGAAGTGTGTTCCAGTGCTTTTAATGGAAACTAGAGAGAGCCAGCAACCCCCTAGTGTGAGCCCCACTTTTAACCGGAAAAAGTGACCTTTTCCTCCTCCTTTGTGCTGAGTTTTGCGTAGGGCAGAAAATTAAGCTGATATTCAAAGAGATTCACTGCAAAAACATATTGATAAATCGTATATTCTATTTCATTAAATTAAAACCATACTGCTAATTATCTCAGGTTGTTAAACATAAGGCAATTAATTATCATTTTAAAAGTTGGTAGGAAGTTGTGAGTACTTTTGCAGTATGAGTGTTTTCCCGCTTTAGTATGAGGTTGTGTATGTTTGCTTGAATTTACAGAATTTTCACTTTAAGAGCAGACAATGTTTTGTTAAAGAAATGAAATTTGCTAAAAAGGAGCATGTAAAGTGAAACATTAAAAATAAATAATTTCAACTTACTTAAGAGCTGCAGAAAAATCTGATTGCTGTGTTTAAAATGAATTTTCCCACATTTCGCTCTCTTATGGACAGGAGCATTTTCTGTCAGGTTATAAATAAAGACATGCCCATTTTTTGTACCCCCACAAATGAGGAAGTTGTAAGCTCTCTGAGGTTTTACTGATGAGCCCCCTCCCCCTGGGTTTGCATGAAGAGATCATAGGCCACAAATAAAGGACTACAAAATGGGGTCTAAACTATCCTGGTGGGGCCTGATACCCACGTTTCGCATGGACCTTACGATGTGATGAATGGTTTTGGCATGAGTGTCTTAAGAATGCTTCCAGATTCGGGTTACAGGACAGCCAGCGCTGAGCTCCCTATTGCAGAACAAAGTAGGAATCTAGAACTTTCTTGCTAACAGGATCCAGCTAAAACACCAAGTTAGATTCTTAAATGATGTTCTTTTCTGTCATTATTTGATTGTTGTCAGTAGCAGTAATTGTTACCAAGCCATTGATGCTTCTATTCTTCCCTTTGCCCTTCTGAGACACAGCTCATTTTGACTTCAGTGGAACCCCTCGAAGGTGGGGTGATGAGCAAGGTGAATTTTCAAAGTAAAGCTACTAAGAGACCAAACTACAATTTAAGGAACCTGATTTTTGAATCAAATTCCATATACTGTGGGTATAGTTCAACATAGATTAATTTCTTATAGTTATTATGAAAAAAATCTCATCTTGATGATAGCTGATAATTTTGTGGGTGTCGTAAACAAAACAGAGGTCAGAATTCAGTCCCTTGGGGAAAATTTCCAATTAGTAGGAAACCAAGTGGCCTACCTTAGTTTGAAGACACCCATCAGGATGTCTGCACCTTTTCATCCTCTCTGGAGGAAAGACTAAATACCCATTATTGTATATAGGTCAGGCCAAAGCAGCCTTTTATATTGCAAGGAATAAGAGGTAAATAGATATATGTGCAACAATGAATCCCCTAATGTGTTTACTCTAGAACACATGTTCTTTCTGTATTTATATGTAGATTTTGTAGATCTTGTCTTACCACCTGCTAATGGTAGATACTGTATCTAAATAAGTTGAGGAAAATTTATAGTACCTAGGAATGTGTCCTCAGTGGGCCAATCAATCAATCATGACTTCAGGTTATTTTTAATAAATATACACGTATGGGTTCATAAACAATGGGATGTTCTTGTGAAGATCTAAATAATTTTACTTCTTTGGGACTAAATAAAATATAGCTTTTGCCAAATAAACTCACACAAGCACTTATTTTAATAGAAGTCAAATGGCTTTGCAGAAACTTCAGTTTTACAGGTGCATTGTTTGAAATGTTACGGGTATACAAGTGGATTTCTCTATTATGTACAGTGTTAAGTTTGAGTTTCAAAATGTCCACCTGAAATGATTTACTTGTACGTTAAGATAATTTAACTGCTAAGAAGGCAAGATAAAGCATTCTTTGTGACACCATATGGCCTTGCTGAGGGAAAAACTTACTGTTATAAGTTTGTGTTTATCTCTCTTTTTAAAAAAAAATGAAGAAAAAAACGTTTAAAATAATGGGAACACAGCAGTTCCTGGGGTCCTCTGTCTCTTTATCTTATTATAGTAAATTACCAAAAAAATAATGACCTGGGGCATGTCTGTGTGGACCCTTCTTTTAGAGGCAGTTTCTGTGTTTTGTAAAGCTGTAGGTTCTATTTTCATTGCACTTCATATTGCTGCACAGCTCCTGACCATGCATGAAGGTCCTCTGAAATCGGTAAGAGGGCAGAAGAAAATGATTCTAAACTTAGATTTTTTTAACTTAAGTGATGAAGTGTGAAACGCCATTTATATTTGAGGAAGCTACCTAGGAAGTGGCTCATGTCGATGGCCCAAATCAGAAGAGGGCCTGTAAAAGCTTCTATCAATTTTGACTGTGTATGCTTCTACCATGGCGGCTCAATAAACAGCAGTATTAGTTTAAGAGTGGATGGTACAGTAGTATAGACGGGAAGCCTCTCCTCTCCGTGTGAACCGTGCACCCCTATGAGAGGGTAGAGACAATACAATATGCCTGTAACGTCAGGACAGACAGTCATGGCCAGCTTGAACTCCAGCCCTGGGCTTCTTGCAGCAACAAACGTGAACACAGAGGACTGTCTCCAACTCCACTTTCTCTATTTTTAAAACAACTTTTTGAATACAGTATCTGCCATCTTTTCTTATACCTCACTTTGAAACAGGTGGCTCCACTGTGGCATTTAAAATGTTCTGTTTCTTTTCCCTCTGTATCAAATACCTCTTTACCAAGAAAACATTCAAACAGCATAGTTTTTAACTGTATTTTGAAAGGTTTCCTTAGTTCCCTTTGACCCTTCCTCTTTTGCATATCAGTTCCTGGCCATAAAAATAAAAAATGCTAGGACAGAATTGCACATCTGAGCTGATTTGCCCTCAAAAAGTTTCACAGTGGAACAAACCGCAGGAGGAGTTTTCTGTGGCTCAGTTAAATGTCGGGGGAGGGTGGTGTGAAAGCCAAATTGGATTCCTGCTTTCCTGTTTAAATCTTGTTTTTCATTGTTATTTGCACCAGCAATACTCTGTGGAATAATCATGAAAATGTGTAGATTGGCAGCTAATTTTTGAAAAATGAAAAGAATCAGAAATGAAATAAGAGTGCTCGGAAGTTTTTATGTTCTCTCAACCTGTTTTGTCAAATTGTTACGAAAACCTATAAGGTCTCTTTGACTAGATACAAAGACTTTGCACATTGCCTTAGCTTTCTCTTGAAGCATTTCCTTTTTTAAAATACAGTGTAATTCACAGTGATATGATAGATTTGCAAAAGTAAAATCTACCAGTCTGAAGATGAAAGGACTTGTCTCTTAGCAGGAATAATGGGTTTTATTAAAGAGGTCTGTGACCTAAGGCATTTTAAATAAATTACAGGCTTGGTCCCTGTCTCCCCCATGTATCTACTCCCTTCAATATAAGCATCATTGAGTATTTAAGGAAATAACCCCAAATGTAACTCTAGTGTAGCTTCACTTGTCAGGGAGGAAAAAGTAAATAGCATACATTTGGCCAAATAACCAGAACTTTACTGTAGAAGTTTTATGATGAAATTTGCCTTTAGTGCAGAGTATTACAAAGATCATGTTTAGTTTCTAGCAGTATATAAGTAGCATCCATCCTTATCTGTCATGCATTTGGAGTGTGCGACCCCTGCACTGGGCTGCAACATTCTGATGGGCAAGAGTGCTAGGGAGAAAGAGGCATCACCATCAGACTGCACGGGTTCAAGTGTCAGCTCTGTGGTTGATTAGCTGTGTGACCTGGGGAAAGCTATTTCTCTTAGCCTTGGTTCTCTCATCTATAAAATGGAGATAATGATGCAGATGCCTTGGGTTTAATTGGGAGAGTTAAAGACACATTTACATATTTAGCAAGTAGGTGTTGAATTCTAGCTCTACATTGGACACTATGCCAGGTGCTCAAATAAACAAGTGGACAAGACAGACAACACCCATGGTCTTATGAGGCTTAACCATTTGCCTCTTCAATGCCAGAAACTTAGTAGGTTGATTAGATAAAGCCAGTGAGTACCAGTATCCTTTTCTTTGCAGCCTTTTCCTGGCACACTAAAAATACTCAGTACATATGAAATATCACTGGACAAAGAATCCCCCTTAGAGTACCAGTGGAGAAGGAAGGCATTTGCTTAAAAGCAAACCAACAGAAAGACATTGTAAGGCAGTTGTTTAAGTCTCAGAGACCTATAATTTTTTTCTTTTTTCTTTTTTTTCATCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCACAATCTCAGCTCACTGCAAGCTCCACCTTCCGGGTTCATGCCATTCTTCTGCCTCAGCCTCCCAAGTAGCAGAGACTACAGGCGCCCGCCACCACACCTGGCTAATTTTTTGTATTTTTAGTGGAGACGGGGTTTCGCCGTGTTAGCCAGGATGGTCTTGATCTCCTGACCTCATGATCCGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACTGGCATGAGCCACCACGCCCGGCAACTACAATTGTTCTTAAAGCTTGTAGAATTACTGTGTGCTACCAACAGACAGGCTAATTTTGAGTGACCCTCAGTACTTTGTACAGTTAATTTGGCACGCTGTGTACTTAGTGGCTTTTTAACAGCTATAAATTTGGGCTGCTAGAAAAGTAGTAAAGTTGTGATTCTTGACAGGCATCTATCTGCATTTTCATTTTTACTTCATTTGTCTAGACTCAGCTTGTCAGAATTATGGAAGAGACTCCTTGTGTCAGGGCAAGCACTGTGAAGAGAGGTATTCACTGTCAGAAAAGAGAGGGGAGCTGGAGGCAGCTCAGAGGCCTGAGACCCGCCTCCACAGGAGCCCCAGCAGGTTCGGTGGAGCTCTGGCCACACTCTCCTTTGGGATGCTGAAGTCAGAATGAGTTCACTTCCCAGCCAGTCTTGCCAAGGCTCCTCACCTGGAAGCAGCAACTGCCCAGGGCTGTTGGATGTTTCTCCCCAGGGGACAGCCAGGTCCCAGTCCCGCCTCGGTGTGGAAGGAGGAAAGGCAGGGTCCAGGAAGCTGTTTCAGGACAGGCCCAAGGTCCCCCAGGGATGCCTTTCAGGGTCAGCGGAGGCTGTAAATCAGCAGGGCCCACACGGCCTGGAAGAGGCCCCTGTGCTGTCGGCTTGCCCGGCTTGCCCGGCTCCTAGTCCGGCTTCTGCTCCTCCTTTGTAAAGTTATGGATATGCTAATAGTTTCCAACTGAGACTAGGAAAGTAAGTCCTACTTGACACTGTTTGGTCAGAAAGAGGGAGAGAAAGGAGAAGGACAGAGAGAGACTGAGAGAGAGACAGTCTCAGACAAAGGGAGACGGAGGGAGGGAGGGAGAGACAGAGAAAGAGATGGGAGGTAGGTGTGGGAGGAGGGAGAGATGCAGAAGGCAGAGGAAAGACAGACAGAGATTTAGACCTCCCAAGTCAGTGAGCAGTCCAGAGTTGGAGTGGAGGGTGCCTGGTGGCTTGTGACTGCAGACTCCACTCCCCGCTCCTAGAGGCACAGCCATGGACAGCTTCTGTCACGTTGGCCCTGCACTTATCTCTGCATCTATTTCCCCTTGTGCAAGATTCAGAACTGCATGCTCCAAAAAAACAATAAAAGCATTCATGTTCATAAGAATTGCACAGGTAAAAGGTAGTTTGCTGATATTGTTGTATTTTTTACTATCGCTTCTTTTAGGTCTTGCCTGAAATTGTTTGGGTTTCCCAGGCAAAGTAGAAAACTGCGGTACGTTTCTGTGAAATAATTATTCCTTCTGGCATCTCCCTTTACAGACCTACTGATCTTGATTTTTCATTTAGGTGAAAGTTTGTGAAAACATGCCATTAGCTTGCTTTGTGATTAACTCCTTTTACTGAATGTGAGCTCCTTTTAAATTGAGGCCATATCAAGCTTAAATTCCATATTTTACCCGGCACTCTGCATTTCTTCCATGTGGGAGAGGAGGGGCTCAGTAAGTGCTTTGTAAAATACACAGCCGAAGTGATGCACGTGCTAACAAAGGAGTGTGACAGGACTTAAGTGCCCTTCTAGACACTTCAGGCTCCCCTTTGTAAGCTGTCTTGGAAGAGGCCACATTTCCTTTCCCTCAAACAGTTTCTCATTGTTTGATTATTCTTTTAGCCTTTCTCTGGAAGCAAAGCCACTTTTACGAGAAAGTCACTGCTTTTTCATCTCAAGAGATGCAAGTTTGGAGTTTGGGGAAGTTTTCAGGTGCCCGTCAAGTCATCCTTTATGATGTCAGACGAGTCAGGCCACAGAATTCACAGGGCTCAGTGCAGACCGAAAACTTGAGGCCTCTTGTTCAGAAATTATTAAAAATTTTGGTGAACATCACCCCAAGCAAAGAGATCCCCTAAGCACCAGCCCCCAAGCAACTGCACTCATAAGCCCATGAAGCCCCCTGCTGTCAGAAACAATGTGGTTGAAATTGTGTATGCACTTGGAAGTGAGATGGATTGCAAAACACAGGTCTCCATGCTGGGGCAGGAGTGGTGATAGGGCATGGAGTGGAAATGTCCAGCAGGCCCACGTGCGAAAATGCAGAGCTCTCTGGCTCTTGCAGACTTGGCTGCTGACAATAGACGCGCTCCAGGAAGGTGCTCGCTGTGGTGTGATCTGCTGCCCACCCCTAGCTCCCTCCAGGAGACTGGTGCGGGGACTGTTTGCAAATGACTGCAAAAGTAAGAAGGTTCCCACAGAGCAGAGCTTGATTTGGGGACCAGCCGAGGGCAGTTTGTCAGGATTCCGGCTTGAAACTGTTCTCACATCTCACCGCCTGAAAGGACGAGTGTGTCCAGAGGACTTAGCATTGATCACCTCTGTCTCCATGCAGCAAACTCAGAGGCTCAGCCCGCATTCCACTGGAAGGGCGTTTGCCAGTGGTGTTGGTTGGAAGAGCCTTGACTTTGCCTTAGGAAACATCTTTTTTTAAGAATTGAAAATAACTTGAGTATGCAACAGTAGGGCATTTGTTATATAAATTAGTTGACTAGTGTGTAGCCAGTAAAATGATGATGGTGGTGTGTATTTGTTAAATAAAAAGATATGTGTGGTATTAAATTAAAAAATATTTTAAAACAACATATTTGTAATCTGTTTAGTGTCCTCTTTTTGTAAAAAGTACAGAAATAAATATACAGAAAAAATAGTAGTCCTAAGTGGTAGAAATTATGAGCATTTTCTTGCCTTTAAAAAAAGTTGTAAAAGATTGTATCATTTATGTAGCAAAAAGTTTTAAGTCAGCATTCTAAAAATTTCGTGTTGTTATAGTTGCTGTGACAAGATTTAACTTCTGTATGCTTCACCAATCAATACAGAGGTATTTAAGACCCGGTGTGTGATAGGCCGCGCTAAAATACTATACACATCTTCAGAAAACTAGAGAACTAACTTCTAACTTCCTATATTAGTGTGGCACGGCTGTTACAAAGATTTTTCTCATTTGAGTCTATCTTGCTTCTTTATCATTGTTTTGACAGTTTCAGAAGAATCGTGGCTTTTCCCCTTTTTTACAGTAAAGGTACCTGAGACTCTTGACGTATTGCTTTTTGGAAATGCTTGTGCTGGTCACATGCTTGCATCTGGGCTAGTGTGTCTGGCTTCCGTGTGCTGGTGGATGCTTACTCTGTTTTCTGAAATACTTTTTCTGTACAGTGGCCACTAGCTGTACTCCTAAGCCACACACCTACCTTGAAAATTCATGTCACTTTTAGAAATAGATAAAAGCCCCTCCCATCCAGAAAAAGTGACTATCATGTATATCCTCATCATGACTAATACTGATATTCCTGAAATTGAAAATACATATTCCATATGTACCATAAAAGGTATTAAAGATATATGGAGTGATAGATATATTATATATAACACTTCTACCCTCACAGTTTTCAGCCTAATTGAGAGGGTAAGATCCCTGAATCATCCATCAGTTTTTCAGGTCTCTGCTGAAAGCAGGCCACAGCTCAGATCCACACATCTGAACCAGAGACAGAGGTGGCCAAAAATAAAAAGGGGGACAGGGGGACAACCTGGTTTAGAGTCAACAAATAGACTGCATTTTCTGGTTAGTGAAGGAGCTCTCCTGAAAGTCATATACCAGAGCATAAATGAGCAGATTTCCTTGAGGTCACCTTCTGCTGGCCATAGCTTTCTTATCTGTGGAGCTGCCAGCTGTCATCCACTTTGGGGCACCTGAGACTGCCGAGCGGCAGGCCAGGACCCAAGTGCGAAAACACAGAACACCTTTTTGTTTCTACTCCACTGATGCTGGGGTTCTCTCCCTGGTGTTTGTGGCTCGTAGTACACTCTGTGGAACATTCACTATGGTCATCGAAGGGCAGCATCTTCCCAGTTGTTTCTTTCTTTTCTTTTTTTTTTTTAATTTAAACCGATCTGAGAAGCCAGCCATCTGTCAGCAAAACAGGAAGGCTCGGGCTGTCTCCTGGGCTCGTTTTGCTGCCGTAGTGAGCGTCACTTCTCCCCGTGTAAGAGTGCTGGTGAAGGCTGAGGCAAGGGCCCAGAAAGATTGAGGGACAAAGACAGGAGCGCCCGCATTGCCCATCTGCCAGGCTGGAGGTGTATTCATTATTGATGGAGGTAGTGCAGTTGCTGCTCAGATATGCAGCCCTGCCTGGGTAAATGAGACATTCTTCAGCAAATTGCTTCGTTTTTTGATTGCTGATTGTACGCGTGTCACCAAGCTGACTCAAGGTTCATCGATGCATGCTCAGTAAATTAGAAAGAACATAACTATGGATCAGCCAAGAGAATGAATTCTGTGCCTACAATGACCCAGGGCCATTTAATTTTCTGCTTAATTTTGTTGCAGTCAGTTTGCATTTTGGGTTATTATGCAGTAGGAAATTAACAATAAATAACAAATTTGGTCCTCCTGTGCTTGTAATGATATTTTTATAAATCTTTGTAATGCTGTTTTTAAAAGGATCAAGGTCTGTGCCAGTCTGATACTCCAGCAAGTATGTGAGGAGGAAAATGCATTATTCTTGCTAGATAACCTTGTTGTTAAATAGCATAGGGGTTCTTTATCTCTCTCTCTTTCTCATATCTTATTAGTATTTTTGCTTTAAACTAAAATCCCTTCCTCTCTTTCTCAGATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGTGAGACCTTATGAGATAGCTGTGTGGGAAGTTCATGAGAAAAGCTTCCCTGGGGCCGGAAGTCACAGTGCTTGGTATGCTCATGGGGGAGGAATAGGGGCTATTCTGCAAAAGAAAAGACCATGATGGAATTTGCCTGAGTGTTTCCTTCACCTGTTACAAATTATCTCACTTTGAGCTGAACAGAAAGCCTCCAAGATGAAATTAGTTTTACTGTTAAACTTCAGGAAAAAAAAACGGGAAGAGTTAAATACATTTTTGTACTGTTGGAAGGAAAAATGGCTGATTGGTTTAAAACCCAAACACATGCCAATGATGGTACTTAAAGAGAGAGAGAGAGAGAAGCTTGAAAAACATAATTGTTGGGCACAGTCATGACTGTTTGTTCATTAAGCATGGACACAACATTGCTCCCCTTTGCCATATATCTTTTCAAGCCGTATTGGATATAGCTCTTCTCATCCAGGAGACCCAGGAAGTGGAGAAGTCTGTAGTAGGAAAAGCCTAAGGGTAGGTCACAGACTGTGACCATTTGGCAGCACTGAGGGTGGACGGCGAGCCAGTCCAACAAAACCGCACAGTTCCCCAGTGCATGGACATAGGAAGACAGCTTTCTATCTGGCCCTGTATCCAGAGGCGTCAGCCCCAGTAGCAGCTTTCATGGACTTTGGGGTTTTCGGTATTTCATATTTTTGAGCCTCACAGACTCACAGCCAGCCCCAGAGGCTGACTTATATTTGAGAAAGTTCTCAGTGGCACCTTGCCTTGGCTGAGCGCCCTCGTGTTTTGAAGTTTCTATGGGATTCTACAAGTTGGTGCTCCTGATGAAGACCAGGACCTATGTGTGGCTGCTCCCCTGCTTGGTGGTTTCCCTGGGGAAGGTGCAGGAGAGGATCTTCTGAGTTCCATGGAACTGGAGATAGATCTGCCAATCACAGGCTTCCTTCTCCACCACTCCTCAGCCGCTCTATTCATGTTTCAGATTTTGGACTTAAACTCTCCCAGGTGCAAAGAACAAACAAAAGGCTAGCTTATTTTTCTTTTAGAGTGAGGCTTCGTATTTATTACAATATAATTGCCACATTCTTTGTGTAATTCTCACATTTATATCTTAAATATAATTCTCATGAATGAGAATTATATAATTCTCTTTTTGTATATCATTGAATATTTTCACTTAATTTTTAATTTTTTTAATCGTCACAAAATAATTGTGTACATAGACACAAAATAATTGGGTACATAGTGATGTTGTGATATATACAATGTATAGTAATCGGATCAGGTAAATCAGCATATTCATCATCTCAAACATTTATCGTTTCTTTGTATTAGGAACATTCGACATCTTCCTTCTAGCTATTTGAAACTATATATTATTGTTGACTACAGTCATCCTGCAATGGTGTAGAACACTAGAACTTATTCTTCCTACCTAGCTGTAATTTTGTCTCCTTTAACAAATCTCTCCCTATCTTCCACTCCCCCGACCTTTCCAGCCTCTATTAGCCTCTGTCCTACTTTCTACTTATAATGATGACAGCAGCATTTGTTAGTTTCCACATGTGAGTGAGAACATGTGGCTTTTTAACTTTTAGAATGTGGTATTCAGGCACTTCATGGTACAGTTGGTAAAAGTGAAAATGTGTCCAAAAGTTTGTGATTATCTATATAAACAAAAATGGTATAAATACAAATATCAATTTTGCATTGAAGAACTTACCTTAGAGGTATATTCTCACAAGTGCACAGAGCATTTAAGCATTTGTTCACTGCAGCATTGTTATCAGTATTTTAAAACTATGGTACATCCATGTACTTCCACATACAGCTCTTAAAAATAAGGAGGATATGAATGAACTAGTATGAAAAGAAGTCCAAATACATGTGAAAGTGAGAATAGCATGGTTCTGGATGGTATGCAAAGTATGATCTCGTTCTTTTAAAAGAAAATAAATTACATACACATACATATTTTCTATATGCTTGCCCATAACGTTTAGGAAAATTCTTGGGTGATATTTATTAACCTGGACTTCCTCTTGGAAGACTGATGGTAGAAGGAAGGGGACGAGTTAGGGAAGAGGAGGAGAAGGAAAACTTTGCTTTTCATCTTCTACCTTTTAGCATTATTTGAATTTATTTTCCTTAAGCGTTTACTTTGTTTCGTAAACAAAAAAGCACAAAAACAAAAAACGAGTTAAATGGGAAAAAAAGCAGTTTAGCTCTTTATAGCCTCTCATTTGGCTTCGCCAGCCTCTCACTGCAGCCTCAGAGAGCTGGTCTGGGAAACACTGGTAGATGAGGACTGTAATCCTCACTCATGGAAGAGGATCTCATTCACTGGGTTTGCTGACTGTGACTAGAAGTGATTAGGGTGTCAAAAAACCCAAGCATGTTAAAAATTTCCAGAGGCCAAAAAGATGCTTTCATTGTTCTGCTCTTCTTTTCCTTGTCGCTTTCACTTTGGGTAGCTTCTAAATTGGTATTTTGCATGGTGCATTTAAAGAAAATGAGACCCCTTTGGCCAATGCAGGAGTCTACACTCTGATATTCTAGAGTCAAAGCTGAATGCTGACACCTAGGAATTCATCTCTAGAATGTTTATATAAGGAATAGCCCCTCAGTATTCCGATCTCGTATCTTAGTAACGAAACTAACAAAAGCCTGATTCTCCTCTGGTAGTTTTCTTGTCTTTACCATAATACAAAATAAGTAATTTGTTCTGCACCCTGACTGTTCAAAGGATAGGGTAGCTGGGGGCGGGGACAAGAATGGAGACCTTATTACATAAGACTTCCTGAAAAAGGAAACTCTGTTTTTGTTTGAAATGATTTGGTCTGAAATTTAGTTTGTGTACACTTACCAAAGGGATTCCTATTTCTAAAACACTCATACTGCTTTTGATTCCTGTTAACCTTTGAGCACTCTACGTAATGATGAGAGCACTTAAAGAGTCATGTCACTTTTAGTAAAGAATCAAAGGATACTTTTTCTACTTCTTCGAGTTTGATCTCTGCTTCTCCAGTTAAAACCAGTATTTGTTTTTTTCATTTCTAAAGTTGGAAGAAATGACAGTTAGTTATGGCATAAGGATGTACATTTAACCAAATAGGAGTTGACATTCTTGGTAAGAAATCTTACCAAGATTATGTTATAGATTATAAGAAATCTTAACAAGAATATGTTCCTAAATCATCCTCTTTTCCCATAAAATATTAAAGTATCAGCAATTTCATAGGATTCAACCTAATGTATGCGAAATGCTAGATAAACAGATAAATACTTAATATCTGGCTTTTTTTCAAAGCACTGGGTTATTTGTTCCTTGAGATTTATCCTAAATGTGGGCTATACCCTGGTTTACAGTGTCTCACAGATGTGTAGTAGTAGACACTCCATAAGTGTTTACTGACTTGAATCCACAGGGTACTGAGAAAATGCTACTGATAGACTTGGAGGAGAGCATATCTAAAGCAAGCTACCCTTTCCTTTAGGGCACGTCTCACTAATTCTTTGGGTAAAGCGTATTTTTCTTCCTTTTGTGTTTTTGGCAGTCTTTCCAAAAATACGTGTTATACCTATGCATTATTTTTTGGTTTGGTTTCTAAAGAAAGAGTCAGCCGGTGGGAAAGTGAAGGATGTGGGAACTGAGAGATCTGCATCAGCATCCCACCTCTACCTCCCACGATGGGACCTGAGACAGTTATTTTTGCCTCCTGGACCACTATAGTATCATCTGTAACAGGAGGGACTTGAGCCAGTTGATCTCTAAGGTTCCTCTGGCACCTGTGACCCTAAATAGATATTGGATATTGGTTTAATGCTATTTGTAGTGTGTTTTTTTGGGGATATGGAAACCAGAAGTTTGTTTCCATAAACATAAACATAAACTGTATATATCTAAAGGATATGGAAACCTTTAGATATATATAATCTGCTTACGTAAAGAAGGTTTGTATATATTGCAGTGTCAATGGGAATATTTTATCAAGTTAAGCATAGTAAATCACATTGATTAAATGCTTTGTATTTACCAAACATTACCCAAAGTGTTTTCTCCTTTCAACCTCACAAGGACCCACAGAAGAAAATACAGTTATCATTTCCAACCTGCAGGGAGCTGAGACACAGAGAATTTAAGCAACTGACCGGAAGTCCAACAGGGAGTCAGAGATTGCTCTGGGGTGTGATCCCCACTTGGACCCTAGAGTGGAAGCTTCTCCACTACTTTATAGAGTTGAGATTCTATATTTTGAGCTTGTATTTACCCAGAGAATTATATCCTCTTGGGCAATTGTGTATAATAAAACCTCATGCATTTAGGAGAGGCGGGATGACAGAACTTTGTTGAGTGAATTATAATCTACTTGAGAAATTATTTGCTTACATTTTATAAGCTAATTATACCATATCTCATCCAGTTTTCCCAGAACACTTCTCATAGGTAATGCTTTATTTGAAACATAGGCCATAGGTAAGTTAAGTGTAAATGTGTATTTTTATAATTTAACCAGAAGTTTATTTCATTTTTCTAAATAAGTGAAATTGTATTGCATCTTCTAAATTATTCTATTTAAACACTTGATGTCTTGCTGTCTCCGTCTCTGTGTGTTTGCATGTCATTGTACATGTTCTTAGGAAAAGTGTGGGAGCTTGACGCAATATATACCTTATGTTTCTATGTGCATATAGTTTACCAAATAATACCATAAGTTTACTTAGCATATTAGAATCCATGCACATTATTTTTATTTTATCTTCACCGCAACCCTGTGGGATAGACCAAAATCATGCTTTTCAGCCTCCTTTTTCCACTTGAGGAAAGGAGTCTTAAAAAAGGGACCAGTCTCATGTTCCCATTCGTCTTACAACTAATTGGTCAAGCCAGAAAGCCAGAACTATGTCCTGGGTCACTAACTCCTAGTCACTGTGTGTTAGTATTTGAGATGCCTGTTGGCTTGATTTAGTCATTTATTTTTTAGTGTTTTATAATCCTTGCATACTTTTACATTTTAAATGGTTAACCAGGCAAATTGGTTTAAAATCAGTGCATAAAAATACTGTGCCTATCATGATGGGTTTCATGAAGTGATAACTTTTCATCATGGAGATCCTCAGCTGTCACAGAAGATGAGGGGCCCTGGGTACAGAGGCTCACGTGAGGGATGAAAGTCTCAGCAGCCCGGACTTACACTTTGGGGCTTTTAGGCAAATCAGACAACCTCTTAAGAACTATCACTGAGTTCAGGCAAGGCGAGCTTGAATTAACACAGGGCCCTTGGTGGGCATGTGAATATATCTCACTTCACTACCATCCAGTTCTGACTCTTTACTAGATGCCCCTGTACATACCAAGACTGATTTTTTATTCTCCCTTCTCCCCATGTGGTTTCTTCTGCATAGAGAGTTCCTATTGATCAGTCTGACCCATGGTATTTTAGAATTGCGATCCCTACTGTTTCATTATTCCTTTTTCTCCCCCATGTTGAAAAAAATAAATGTCCTGAGATGCAAGATCAGGGACACTGGAGCACTGACATTTAGTTCAGTGCAGGAACTGAAGGCAGATGTAATTCTTAAGAAGCGTACCTGTTATTATGAACCATCCTCAACAAATTGTAGTGGATCTTGTTTTCTCATAGATACAGCAGTTAAATTTTTTAATAAAAGTAACTAAGAGTTATTTGGATGTATTTTAGCATGCACTGAGCGGAAAGTACGACATTTCTTCATTGGGTAAGTCCTGATTCTTTATGATCCTCACTTGGTTCCAGGGCCCCATGCATCTAAGGGTGTCTCAGAGCATCCTGCAGTGCTCCAGCATGATCGCAGGGAAAAGCTATAGGAGGAAAAGAGTCAATAAAGTTTAGTTTCTCAACCTCCCACCTCCACCCCATAATAATGACAGCTGGTTAATCATGAGACGCGTGCACACCCCACACGCCCTGTACATGTTTACTCATTGGGATAGCATGTCAGGCCAGAAGGCTCCATGGTCATTTCTATGAAGGTACTTTAGCAGGTCTTCAAGAAGGCAAGTGGCCTGGGTCCCTGCCTCCCCAAATTGCAAGCTCCCTGCTTTATGTAGGAGACCTATGTGTATATTACAGTTCTGTGTAAGATTATTTTGTTATTCTTACCCCCACACCCACCCCCCAACCCCCCGCTGCCACCAAAAAAAAAAAAAAAAAATTCCTCTGACAACCTTCATAAAGTCCTGGGAGTTTGAACACCATTGCTCTAGGAAGTCATCTTATACAAAAATAAGAGTTGTGAGGTGGTTCATATACCTCCTGCGTTCTCCTATTTGGAGTTTTTCCCCATTTATGAAAGAGGTGAAAACGCTAAGATATTTAGCAATTATTACTTTAAACATTTTCTATTTATAGGCCGGGCGCAGTGGCTCATGCCTGTAATCCCAGCGCTTGGGAGGCCAAGGCAGGCAGATCACGAGGTCAGGAGATCGAGACCATCTTGGCTAACACGGTGAAACCCTGTCTCTACTAAAAAAATACAAAAATTTAGTTGGGCATGGTGGGGGATACCTGTGGTCCCAGCTACTCGGGAGGCTGAGACAGGAGAATGGCTTGAACCTGGGAGGCCGAGCTTGCAGTGAGCCAAGATCGCGCCACTGCACTCCAGCCTGGGTGACAGAGCAAGACTCTGTCTCGAAAAAAAAAAAAAATTCTATTTACAGCAGTGAAAATAGTAGTGACTTAATGCACATTGCCAAGGCTTTAGCATAACATGAACACTTTCACTCAATGTCTCTCTGGCCTTTTGTTTTTCCTTGGGAAATTCTTATAATCCTGCTCCGTCTTTAACTATTCATTTTGTATTGGCTATCCAAATATACCCAATAATGCTCTTTCTGAAAATATGCCAATTGTGGTAATTACAGCTAAGCTGGAATATTAAATTGTGATGTCTGTTTTCCAGAGAATGAAGTAGTATTCCCCAGAGCATAGGCTTGGTGCCTGTGCAGGTTCTATTTTAAATATTCCAGGAAGGGTTGTTTTATATACTGAGGATGATTTTACTGGTCTTGCCAGTCGTCTGAAATGCTGGTATTACTCTTGTGGAAGGTTTATTCAAACAAACAAGGACATTTCACACAATACCTAGTCATGTTTTTCAGACATTTTAATGTTTGGTTCATCATTTGCACACACTCTCAAAAATCTAGGTTTGTCTATGTGTTCATATCATTTTGCCTGTTGCCAGCTCAGTCAGCAGGCACACTCTCCCAGGCTGTTGCTGTTTTGTTAGACTTCTTCAGGACCTTCATCTAAAATGGTCTTCCACACGTAGCTATACTGCATAAGTTCACATCATCTGTTTCTTGCATGTGGGTTGTGTCTCAACTCAAGTTTAAGTTAGATTTGGAAGGGCGGAAACTATAGGAGTTGCAGCTTCAGTGGAGAAAAGAGCATTTCCTACTAGTTATGGCTTCCCAAGGAAGGTTAGATTCCTCAGAGTAGGAGTGATTCCCCAATGCTAGAACCTTTGGTCAAATATAATTCTAATCCAGTCAAAATAAATACAGGTATTCTGTAAAACCCGATTTCATTTTGTAAATCCTACTTTGTATAGTATAAGCAATTTTTGTATTTGTGTGGATTATATTTTATTTTCCTATTTCAAAGAGAAGAATTTGTATTAGCAGACTCCCTTTGCATGCGGAGAGGGGATCATTTTCCCAGTAGGCATGGGGTTCCCTTCCATTCCTTGTCCAGTCTTCTTTTCCCCACTAAGTTAAGTCAAACTAAGCAGCTGGTAAGATATTCCCTGGTTCTTGCAAAGAAAGTGAGCAGATGGCAGAATGTATAGCTCTAAGCAGAATACCTGGTGTGGTATCCTCAAACACAAATTGACAGGAGGGTGTGGTGTGGCAAGCTCATTGTGGGGGTAAATTGGAATAAGCTTACAGGGGGAAGAGTTGACAAAAGATAGGAAGAACCTTAAAAATATAGATGCCTTTTATGCAGTGATAAAATGTCTAGATATTTATACTGTGGTGATTATTAGGAATATGTGCAAAGATTGGCTATTAGGATGTTCATTACAGTGTTGTTTAATAATTATAAAAGGACAGAAAGCAATGTGGACTCAAAAATAGGAAAAGAATTTAAATAAATCCTAGTGTACCCGTTATACATGAAATTATGGAAATATGACCCTGAGCATGGAAATATGTACATGAGAATGTCTAAAAGCTAGTTCATTTTGAAAAACAAAATAATGTCACCTCATATTATTTATAGTATATAAAGATGATTTTAAGAGTGGCAGTGTCTGGGATTATAGGTGATTGTATTTCTTCCCTTTTGCACATCTATGTTCTCTCATTTGTATTGTGTGGGGAGAAGTGACTTTTTTTATAAAAAGAAAAAGGTATATGCATCCCAGCAGAGAAGCACTGGCTCCACCCAGTACCTGCCTCCTCATGCCACCCTCTCAAGCCAAAAGCCGGGGGAAGCCCAGGCACCTTGACCATGACCGCCCGAGACTCACACTTCTTCTTTCTCATCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGG", + // 50344378 - 1); + // + // // the stuff from the genome represents a small part, most of it is brought in through the RNA-edit insertion + // var codingRegion = new CodingRegion(50358658, 50367353, 169, 1602, 1434); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344378, 50344383, 13, 17), + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344384, 50344518, 19, 154), + // new TranscriptRegion(TranscriptRegionType.Intron, 1, 50344519, 50358643, 154, 155), + // new TranscriptRegion(TranscriptRegionType.Exon, 2, 50358644, 50358697, 155, 208), + // new TranscriptRegion(TranscriptRegionType.Intron, 2, 50358698, 50367233, 208, 209), + // new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367353, 209, 328) + // }; + // + // var rnaEdits = new IRnaEdit[] + // { + // new RnaEdit(1, 0, "GAATTCCGGCGT"), + // new RnaEdit(6, 5, "A"), + // new RnaEdit(16, 16, "T"), + // new RnaEdit(97, 97, "C"), + // new RnaEdit(316, 315, + // "CCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA") + // }; + // + // string actualCds = + // new CodingSequence(genomicSeq, codingRegion, regions, false, 0, rnaEdits).GetCodingSequence(); + // + // const string expectedCds = + // "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGCCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"; + // + // Assert.Equal(expectedCds, actualCds); + // } + // + // [Fact] + // public void GetCodingSequence_RnaEditSnv_StartsUtr() + // { + // // NM_001135635.1, chr11:65684281-65686531 + // var genomicSeq = new SimpleSequence( + // "TTTTAAAAAACACTCAAGACACAGACCCAAGCCGGGTTTTATTGAAATGCCAGGAGCAGGCACATGTCAAAGTAGCCAAGGAAGGGGGGACAGTGGTACAGGCTGTGTAAGTTGGCAGGGATGGGCAAGCCTCATGTCCATGGTCCTGGCATCCCCTCTGCCAGGGGATAAGTAGGCACAACTACCCTCCCCTCAAAATGGCATGCTCAGGCCAGTGGGGCCCCTACCCCTGGACCATGAAGGCTCCAAGAAGGGCTGGAAGCACTAAGTTTTCTCTCTCCTGAGGGGGAAGGAAAGAAGGGGAGATGCAGGAGGAAGGGGAGGTATAGCGGGGGATGAGCGTTCCAAGAAGTCTCTCCTTCTAGGTGTCTGCACCCAACTCATGGTGCTGGGCAGTGGAGAGGAGCAGCATTACAAAGGGAGGCTGAAGGCTCATCCCTCAGGGAACCGGAGCCCCCCAGCCTGTGGGGCTTGTGTCAGCCCTGAACAGAGGGCAGAAGTTCAAGGGGACTGAAGATGCAGGTAGTTCCCAAGTGACCTAGGAGTCCCCAGAGCTGGGGGGTGTGGCCTTCATAGGACAAGGAGGAAGACAGGAGGATCCAACCCCAGCATGGAGGGGGGAGTGGGCAGTCTCCCCAATTTGGCCCCCCTAGGTCAGTTCCACGTTGTTGGCACGGTCAAGCACTCGGGAGCCACGGGCACTACCCCCAAGCTGGAAACGGCTCTCATAGAGAGTGGGGCAGAGGTGCCAGCGATTGGCCCGGTAGATGCCCAGGTAGGTGTAGACATCAGGCTTGTAGGTGAGCAGGCACTTAATGCCCGCTGCACGGATGGCTGAATCCGCCTCCAGTACACCCAAGCGGTCCGTGAAGTCGTCCGTGTAAACACAGATGACCTGGCGCCCACCCTCCTTGGCACGTGGGCTCACCTTGGCCACCTGAAGCTGGCCTTCAACCACGGCCCGGGCAATGCCAGCCCAGGCGTGGTCCAGCTTGAAGCCCGGTGCCAGATGCATAAGCCACTTGCCCGAGAGCACGTGGTGGGTGATGGCGAGCTGGCGCAGGGTACCCGGTGTGATGGGCCGCCCACTGGTCTGCAGAGCTTCCCAGGCTGCCTGCAGGCCCTGCACGTCCCCGGAGTTGGGGCTGTAGCCCTGCCCATACACTGCAATCCAGCCCACAGGCTCTGAGTTGGGTGAACCGGGGTCCCCATAGCGGGTAACTTGGGATGGTGGGTACTTGGCCAGCCAGGCATCCAGCTCAGTGGCAGGCGTTGTGCGGGCATCAAACACTAGCCAGGGGTCCATGTCAGCTGCCATGGCCTCTGCAGCCAGGTGCTCGGCGGTGAAGCCATCCTCACGGCCACCTGGAGAGCCCTCCTCTTCCAGCTCCTCACCTGGTTCCATCCTGCTGTGAGGGAACCGAGTCAGGGCAGGGTCTGAGACAATAACTACAGATGCCAGGCACTGGATTAAACTGTGGCCTTGAGTAAGAGTTACTGTCGATGCGCCTCAGTTGCCTCATCTTTACAATGGGATAACAACTGTTCCTGTCCCGTAGATCTGCTATGAAAATTAGATGCCTGAGGAGTCAGCGCTCCAGAAGGGTTGCTGCAGTTATTACTATTCTCCTTGACTTACAGAAAAGGAAACTGAGGCTGAGAAAAAGGACTTGCCCAAGGTCACACCTGCAGTGCGTGGCAGGGCCGAAGGGTGAATCCAGGCGTGGGAGCAACCAGCCCCAGCTACACCTCCCGGCCCTGCCAAGGCCCCCTTTTCCTGGCAGGTATCCGGTGCGCTGGCATTTAATAGAGGAACGCAAAGAAGCGCACGTTCGCGCAGCTCCCGAGGCCGGCTCTGTAAGGCCAGGCCTCCCAGGCAGGCGTTATCGGGCCCACTTACAGACGAGGACGCTGAAGTCCAGAGAGGTTACAGGCCGTTCCGAGGCCAATGGGGCGGTTCCCAGACTCGAACCAGGGCTTGTTAGAGCCTGCAGGAGAGCCAGGCTCCGGCCGTGCCGCGCCCGCCGCCATTAACGCCCACGGGCCCGAGCTGTGCTCCCGCCCCGGCCCTGCCCTGCCCCTCCCGCCGCCCGCAGTCACCTCCGGCCTTCGCTGCGTTCGACGCCGGCCCAGCCCCGGGCCCGGCTCCGCTCCTGCCGTGGCTCCGCGCCACCGCCACCGCGCCCCACCCCCGCCACGGCCGCCGCCGCCGCCGCCGCCATCTTAGCGCCGCGCCACCTCAACAACAACT", + // 65684281 - 1); + // + // // the stuff from the genome represents a small part, most of it is brought in through the RNA-edit insertion + // var codingRegion = new CodingRegion(65684930, 65686502, 30, 911, 882); + // + // var regions = new ITranscriptRegion[] + // { + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 65684281, 65685689, 152, 1560), + // new TranscriptRegion(TranscriptRegionType.Intron, 1, 65685690, 65686380, 151, 152), + // new TranscriptRegion(TranscriptRegionType.Exon, 2, 65686381, 65686531, 1, 151) + // }; + // + // var rnaEdits = new IRnaEdit[] + // { + // new RnaEdit(912, 912, "A"), + // new RnaEdit(986, 986, "C"), + // new RnaEdit(1561, 1560, "AAAAAAAAAAA") + // }; + // + // string actualCds = + // new CodingSequence(genomicSeq, codingRegion, regions, true, 0, rnaEdits).GetCodingSequence(); + // + // const string expectedCds = + // "ATGGCGGCGGCGGCGGCGGCGGCCGTGGCGGGGGTGGGGCGCGGTGGCGGTGGCGCGGAGCCACGGCAGGAGCGGAGCCGGGCCCGGGGCTGGGCCGGCGTCGAACGCAGCGAAGGCCGGAGCAGGATGGAACCAGGTGAGGAGCTGGAAGAGGAGGGCTCTCCAGGTGGCCGTGAGGATGGCTTCACCGCCGAGCACCTGGCTGCAGAGGCCATGGCAGCTGACATGGACCCCTGGCTAGTGTTTGATGCCCGCACAACGCCTGCCACTGAGCTGGATGCCTGGCTGGCCAAGTACCCACCATCCCAAGTTACCCGCTATGGGGACCCCGGTTCACCCAACTCAGAGCCTGTGGGCTGGATTGCAGTGTATGGGCAGGGCTACAGCCCCAACTCCGGGGACGTGCAGGGCCTGCAGGCAGCCTGGGAAGCTCTGCAGACCAGTGGGCGGCCCATCACACCGGGTACCCTGCGCCAGCTCGCCATCACCCACCACGTGCTCTCGGGCAAGTGGCTTATGCATCTGGCACCGGGCTTCAAGCTGGACCACGCCTGGGCTGGCATTGCCCGGGCCGTGGTTGAAGGCCAGCTTCAGGTGGCCAAGGTGAGCCCACGTGCCAAGGAGGGTGGGCGCCAGGTCATCTGTGTTTACACGGACGACTTCACGGACCGCTTGGGTGTACTGGAGGCGGATTCAGCCATCCGTGCAGCGGGCATTAAGTGCCTGCTCACCTACAAGCCTGATGTCTACACCTACCTGGGCATCTACCGGGCCAATCGCTGGCACCTCTGCCCCACTCTCTATGAGAGCCGTTTCCAGCTTGGGGGTAGTGCCCGTGGCTCCCGAGTGCTTGACCGTGCCAACAACGTGGAACTGACCTAG"; + // + // Assert.Equal(expectedCds, actualCds); + // } + // + // [Fact] + // public void GetCodingSequence_NonZeroStartExonPhase_CdsBeforeFirstExon() + // { + // // NM_001220775.1, chr7: + // var genomicSeq = new SimpleSequence( + // "ACTTTAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGTAAGAGTTAAATGTTTGCTGTCTCTTAAAAAAAAACTATGTGGGTGTTTTAGATGCAAGTAGAAATGAGTTGAGGGTGGAAGAAAGGGAAAAAAATCTTATTTTTTCAAAAGGAAAAATTGGTAAGCTTAACATTCCTTAAATATCTTAGAATTTTTTCCAATAAGTATCTTAAAAATAACAAACCTCCCATCAGTTTTTCCTAGATTTGATTTTGCAGCATCTGGGGCCTGCCCTGTGATCTGCCTGTGGACATCGCTCTTAGGGGCGGCTGCACCAGCGTGCACAGGGTGGAGAGTTTGGGCCTGGCTCGTCCGGGGGACACCACACTGCAGGACACTCCAGGCCTGGCCGGCTTCTCAGAGCTTCAGATCCTCATTTTTCATATGAAGCTCCTAATGCTCCCCTTATGGGGGACTCTGAAGGGTTAATGGGAGGAATCATACAGTGACTGACCCCTGAGAAGTGTCCAGTGAAGACAGGGCTTAGCTAGGATTGCTGTTTTGCCTAATGCTCTGCGGGATTAAAAAAAAAGAAGAAGAAGAACAAGACCATTCGTCTCTCTAGGAGCATTGCCCAGAGTAGGTATTAGACACACCAACACCACCATCCAGCCAGACGCTGCAGGGACAGTGAGCCAGGGTCCGAGTGGAAAGGCGCTAGGCTTGGGAACCAGCTCAGAGTCAATACAGAGCCACCGCCACTCACCAACTCTGTCAGCTTAGTAAAATGGCTCTGCCCCTAGAGCCCTGGTTCCATCCTTTAGTATCTCACAGGGTGATTGTGAATATCCCATGACTCCAAGATTGAGAAAACGTTTAGAATCCCTCGGTGTGAAGGTTAACTCTGTCCGGAAAGAGGACCAGTAAAAGCTTCATGAGGCTGAGATGCACTTTGGAAGAGGAATAGAGTTTCAGCACATTCTAGGTGTTGGAGGAATGGGGGAATCTAGGCAGATGTTTAAAATCAATGAGAAACCAGAATGCTGACCATGAGGGTTGGAGTGGGGGCCTAAGGACATGACGGAGGAGCAGGGTGTGTTCCCAGCTTAACTCAGGTACCCATGGGGAAGCAGGAAAAGTGAAGGTGTCCTAGGCAGCTCTGCCACAGGATGAATGGCTTCAGATGCCAGGTGAGCGAGGGACCCTTCATTCAGTCAGCAGGAAAGAAGCACTGGCATATTTTTTATGAGAACAAAGGCTAGGATAGTAAAGACAGCAAGTACCAAAAAATGACTGGAAAAGGGAGACTGTGGAGGCAGTGGCAGCAGGCATGGAAAGAAGGGCTTGTGAAGGGGAAGGGGTGGTGTCAGAGGAACATAGGGCTGGGGGCAGGGATTAGGTGAGGGAAACCATGAGTCACACTGATTCTAGAGTAGTGTGCCCTTGATGAAAAGGATAACACCAGGTTCTAGGAAAAGATGGGGTTCTGTTTTTGACGTGTTGATTTTCAAGGACTTCTGGTGTTTGTGACACATGGGGAAATTGTGGTGGGAGAGAGGTGGGGCCAGAACAGGGGCTGGTGAGGCCAAGGGTCCCAGAGGGCACCTGTTGACCTGCAGGATGACATGAAGGGGGAAGGACAGAGGCAAGGCCAAGTCCTGGGCACCAGCCTCCCTCTTGCAGCTTCAAATAGGGCTCCATTTTGACCTTTTGATTAATTAGAGGTTTGTCATAGGTTGGGGGTTGAGAGGAGCAAGGGAGAGAAGGATTCAGTGTACAAAAAGAATGAAAGCCACTGGCTGAGCCAGTGGGGAGTTGTCCACACACACATGAGCCTTTGGACCATGAGAACGAGGGAGGCCTTGCCTTCCTGAACGGAGTAGGAGTGAGGTCCTGTGCTGAGCGTAAGCAGTGGGATTCCCACAGCACTGGGCACAGAGCCCACGGGCTGCCTCCTGAGCAGCCAGCATCTGCCTGGGGTGGACACAGTGACAGAGAGATGGGTGGTGACTGGGGTATGGGCAGAGATAAGGCAGCAAGTGTGTGCAAGGGGAGTGAAGGGTTACTGACCTTAAGAAGCAGGGATGGCGTCCTCTGTCAGGTGAGGAGCCTGGAGAATGCTTTGGTGAATGAACGTTTGCAGCCCCTTTTAGCTTTTGGAGACTTGAAACCAAAGGAGAGATTCATCTGTGAAACTCTACTGGAGCCACTCCCCAACCCCCACCCTTGTGAGACCACAATGTGGGCGTTGGCTTGAGATGCTTCTGTGTTAGTAGAAGAAATAAACAACACAGTGCTCTGATGAGGCAAAGCGAAGATGAAAAAGGAGTTCCCAGGGGACATAGTAGGAACAGTGGACGAGGGTAGCAGAAGAGGAGTTTGGAGCAAAAGACTCACAAGCAGCTGCATAATCTGTTGGTGCTTGGCAGTTCATTTGTAAAAATGATGCCTCTTCCTGCCCTAAAATACCTACCTTACCCCCGCTTCAACTTGATGAGATTTCCATCAGTCACTCCCAATGTGTCACAGCTTCTGCAGCCCTAAAATTAAAAGGTGAGTGAGTCTCTGAGGCCCCTCTCCACTTCTCGGATGCTGAGTTTAGCCTTCATGTGAATGTGGAAAGACTAGGAATACAGCTGTTATCACACAAGCTGGCCCAATAGTGGTTCAGTTGAGAGAGCCCCATCCTTCAGAGTCAGCTCCAGCTAGGAGTGACTGGTGGCCTTGAGCATGGTGCTGGGCTTAGTGTTGCCATCTGTGGAATGGGTGTGGGTCTGTTGCCCTGCCTCCTCCCAGAGCTATTCTGAGGCTCAGAAGGGGTGATGGATGTGATGGTGCTCCCAACACTAGAAAGCATCTTAAGAATGTAAGATTTTCATGATGACTGTTGCTCAGAGTGGCTATTATAGTTTTGCTTTATTGTTCTATAACCTATGATTAAAATTTTTACCTTAAACTTTGACGTGAGTGTGAATAAGTATTTGTTTTGCCAGCAACATTCCTCACCACTGGGGCCATTAAAGATCTCCCCCTCTGAGACCATCAAATACAGGTCAACAGGACTGATTAATCTAATTAGAAAAGGGCTTGTATTAAATAGCAATGATAATTGTTGTTTTTAGTCTGTCTGGTGTTTGACTTGGGAACGTTTTTAAAATAGAGAAAAGCACAAAGAGGAAAACAACAATTACCAATATTCCTGCTACCCATTATAATTATCTAGGTATATTTTCTTCTTTTGTAAGAAAAAGAAACCCTGTTATATTGTTAAAATAACACAAAGTTAATATAAAGAATTTTAATGCAAAGATTAATGTTTTCAAATCACCACAAAACCCAACATCCAGAAATTACCAATATTAAAAGTAGAAAAGTATCATTCTAAATATTTTCTGTTGCATATGTATGTGAGTGGATAGGCTGATGAATTAGGTGGATTGATGGATAGGTAAATATGAAATAAATACTTTCATAAATATTCCAACTTATCATACATGCCTTAAATTCAAGAGGTGAAAAAAGACCCAAACAAAACTAGAGAAGCGGCTTATTTTAAATATCCTCTGACATAAAGGAATATTATATTTAAAGGATCCTCTAAGATTAAAAATATGTACTATGAAAAACATTAAGAAATTTGAATTTTTTTTAATCCATTTGTTTCAATTTAAGCAGCATCTACTGGCTCACTGCTTTGAAAAATAAGGACAGTATTCCAGTTCACATTCAGTGTTCCAGTGTTCACATTATCTTATTATTTTTACATTGTCCAGCTTTGTAATATTCACATTCTATTCTGTAATCATAATTCATAGTAGTTTAGTTATTTATTACTAACTCTATTTAAATAGATTCAAGGATCAGACCCTGCCCTTTTCTTCTTATTTATGTTTATTTTGATTAATCTCTTAATTGATTGGACTTTACATTCAAGCAACTTTTTTAAAAAAAAGTTTCTATAGATGTTCTATTTCTATCATTGTATTGTTTTTGAGGATGTTGGCCTGTTGCCTTTGTATTTGATGAGCATTTTGACAGAGTCTATGGTCTTGGGCCACTCTTTCTTTTTCTCCCTTGAGAACTTTTTAGATTTTGCTGATGGCATTGCTTGTTGAATGTTGCTGTGGAAACATCAAGTCTAGTGTAACTGTTTCTTCTTCAAGGTGATTTGCATTTTATTCCTGAATGCCTGAGGGTTCTTTATTTAACCTTGAAGTTAAATACCCTAATTAGGATGTATCTTGGTCTATTCATTCGGAATAAAAAATTCCTGCCATTTTGTCTAGAGAGTCCCTTTTTTTTCTCTTTATTTCTGGGAAATTCTCTTTTATATAAATATGTTTTGTTCCATCTATTGTGATCTCTGTTGAGGGATACCAGTTGTCCATATGTTAGATAATTTGTCTTCCATATCTGTTAACAGTTCTTAAAGTTTTTTGTTTATTTCTTTGTCTATTTTTACATTTACTCACTGTTCTCTTGTGGTTTTCCTCTGTCAGTAATTTAATTTTTAGTAGTTCCTGTTCTATTACTTGCTATTTTTAATCCATGCATTAATTTTATAATAATATTATTTTGCTCCTTATTTTGTCTCCTGAGACCCGAAATCTCTTTTTTCCTCTTACTCTGTTGCTTTTGCATTTTATTTTGAATACTTTTAAAATTGATTCCATGTTATGAAGCAATTATGAGGCATTTCCTCTCTTGTTGGAATTAACGATTTTTTCCCCTAGGAGGGACTCTATGGTCTGTGTTTTACTTCCTTTCTTCCCCTGTATTTCTAGAAAATATTTTCCTAGTAACCCTGACATTTCTTTTCATCTTGCTTATTCTAGTTGGTCTGATATAGCTTGATTGACATTTCAGCCTTCTTCCCACTATATTTTTTTTTCCTGTGAGAGCTATTGGGTTTTCTAAATCCTGAAAGAATGCCAAAGATGGGGTTGGAGGAGTTTGGTGAGGCAAAGTGCAGCCTTTGTTAAAATACTTTTCCTTTGCTCTCTCTCCCTCATCTGAAATTTAGTTAAATACCCTAAGCCATCAGCACTGTACCTAGTTGGGGAATGCTTTCATCCCCACAGGAGATTCTCTGGGGCTTTGGGCCATCTTCCCCTTCAGTGTAGACCACAGAGGACTTTGCTTCTGTCCCAGGGAGCCCGCAGGGGCTCACTTCTCCATGTTCATCTGATTCTTGTCAGCCAAGGTTTCAAATGCTTTTCTGATCAGAACAGGGAAAAGATACCTATCTGAATCATGTCTTTATAGATATGAGGCTATGAGGGAAAATTCTGAGGTTATTCTTGACTCACACCTAAAGATTTGGAAATGAGATTAGCAGCAAAGCTTTGCCCTACATCTCATGTCAGAATTTTCTGTTTCTTTCTAGTCTTTGAGTGTATGTGTGTTCTCACACACGCCATAATGAAATGCATATTATATATAATTATGTGTATATATAATATTCTATGACTATACATGACATGTTCCTTTAGCTGATTGCTGTTAAGAGAAATTTATAGGTTTTTATTTTTCTTGTTTTGTTGGGTATTAAGGAAGAGAAATTCTATGGTAATTTTCATGTGGCACAGTAATCTGGCATATATGTTGATTTTTTTCCTACACCCATTTGTTGTGATACCAAGTTTGAAAACAACAGATTTCAGTGGTTGCTTGGGAAACCACAGAACCATGACTTGGGGAGAGACAGGATGATTAGGTGGGAAAGCACCCTTTTGGTGGGGCTGTAAAGACTTTTATATTTAGCAAAATTGGCTACAAAGTCCATTCCCCTCCTTTTCTTGCCTTGATTTGGTAGAGGGATAGACTTGGATACAAACTAGAATGGATTCATTCTTCTCTGGAGTTAGTGTAACAAGACATTTAGCTGCTCAACACAAAAACAGAAACAAAAAAATTGTGTGGTTTCAGCAGTGCTATACAATTACTTTTTCTGACCTTTAATGGAGAGAAACACCACTTCTTTGGTCCCTACCATCAGCTTCATAGGGTTTTCATCCTGTTCTGTTTCTGGGAGGGCGTAACTGGCCATGCACAAGTTTTTTTTCTCTAATCAGAGTATGTGCCACTTCTGACCACCAGTAGATGAAAACGAATGGAAACCAGGCTATTATATGATACATATCCATTACAAAATAAGACATGAAACTCAAAGGTACTTTATGGTATAATGGGGCATATATTCCTGGACAATTCTTAATGGTCACAGATTTTATAAAAGGACTATTAGTAAATGTATGAATTACAGAGTAATTTATCCTTCTGTTAGTAAGAACCAGCTGATGACCTCAGTGTCAGGTGCATCGTGGAAGGTGTTGGGACCTTCCCTTGCCACCACCCTCACCAGCCATCATCAGCCATAACCTGCACATTGGGGAAGTTTTGACTTATCCCTCACTTTTGCCCCTCTTCAAGCTGTTCTTTCCACAGTGAATGAGAAGGCCACTTCTTCCTTCAAACCTTTCAGTGGTTTCCATTTTCCTTTAGACAAAGTCTCTGCCTAGCTGGCCTCTGCCTGCCCCTCCTGCCTACCTCTCGAGCACTGCCCCCACCTAGGGCTCTGGTTCCCCAACCTTCACTCGGTCCTGCCACACCTCCCAGCCCCTTCTCCCTTCAGAACTTTCCTTCTTGTTGTCCCCAACACTGGGACACAAAACCCTCCTTATCAACCCTCCTTATCTGGCTGACTCTTACAAGATCAGAAACCTGTGTAATGCTCTCATGGCACGCTCCCCTTGTCTTCGTGGATTTCTCAGATGGGAAGGAATTATCCATGCAATCACACATAAACTTCTACCTACCCTCCCCTAGTAGCTGTCTGCTGCTAAGGATGGGGACCATTCTCACTTACTCACTGTTCTGTCCCTCTGCCCAGTCCAGATGTGTTGAAGGATGGAAATATACAGAGTAGTGGTAAAATATAAACCGTTCAGACATTCCAAGGATGGGCTCATGTGCTTTGACTCATTAATGTACCACTGCTGAAAACAGAACACAGCCGCAGTCTTGCCAGTAAGAGTGCAGTTACTGTAATTAATGAATTTGCTAATTAAGCCATGATTTCATACTGAACTTATGACCAACATATTGAGAAGGTGTGTCTTCAAGAAAATTTATTTTTTGTATTAAGATATTTACTCCAAAGCTAATTGAAGAAGCCAAATCTAGGCTCTGGTTTCACCATTGCCAGGGAAATGAGCTCATGGACTCCTATGAACTGATGATGTTAGATCAGAAGTTTCTCAAGGCCAGGGCCCAATCACTGCTGAGGCGTCAACAGTAGTTCCTTGTACATCAATAATTCTCATTACTTTTAAAAAATAACAGATGAATAGCAACTATTTTCCCTGTAGCTCCCTTGCTGTGCCTCCTACCCTCCACCACATGTTTCTGGGGAGCCCTGCTTCGGGCCTGCCAACTACAGAGAATTACTTTTGAGTATCCCTTCCACTCTCATCTCAAGACAGAGTTCATCTACCTTTGGGTTATTTGTCAAAAATGTGTCATTTTATTACAAAAAATATACAATCATCATGTATTTTGATTAAATTTTACACTAGATTATTAAAATTATTAAATACAATTATTAAAATTAATAATTTAACATATCACATATTTTAAATATATTGTATATAATGAATAATAATATAATTATTGTCTATTTTAATTCAATAAATGTATAGTAAGTTAGCCAGTTGTAAATTACTGAGAACACTCTACTGAAAAAGCATCATTTCAAATACACTATTTAAAATATTAAATGAAATACAATAACATAATTAAACTAATCTTTGGTTCCCCTATTTATGTATTCATTTATCCAACAAAATCTCCTTAAGTGCTTATAATGGGTAGGTCCTGGCTCGGTGTCCCCTAGACAGACGCATGGGCCTTCCCCCAGCCCGTCAGTATGGTGCAGGTGTGATGTGTCCGCAGGTGTGTGTGTATGTGTGCAGGTGTGGGGTCCGCAGGCGTGCTGGGCCCCCAGGCCGTGTTCCCCTTCCCCTCCCCGGTTGTAGATTTCAGCTGTTGCTGCCAGACCTGACCGGTTCCGGAGGTGGCCGCGCCCCACTCACTGTCGCCTGCTTTCCACAGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAACC", + // 50459420 - 1); + // + // var codingRegion = new CodingRegion(50459422, 50468325, 169, 1053, 885); + // + // var rnaEdits = new IRnaEdit[] + // { + // new RnaEdit(1, 0, + // "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAG"), + // new RnaEdit(4, 3, "C"), + // new RnaEdit(5325, 5324, "AAAAAAAAAAAAAAA") + // }; + // + // var regions = new ITranscriptRegion[] + // { + // // insertion + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 50459422, 50459424, 204, 206), + // new TranscriptRegion(TranscriptRegionType.Exon, 1, 50459425, 50459561, 208, 343), + // new TranscriptRegion(TranscriptRegionType.Exon, 2, 50467616, 50472799, 344, 5527) + // }; + // + // string actualCdna = + // new CdnaSequence(genomicSeq, codingRegion, regions, false, rnaEdits).GetCdnaSequence(); + // const string expectedCdna = + // "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"; + // + // Assert.Equal(expectedCdna, actualCdna); + // + // // var cdsTemp = actualCdna.Substring(codingRegion.CdnaStart - 1, codingRegion.Length); + // const string expectedCds = + // "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"; + // + // // Assert.Equal(expectedCds, cdsTemp); + // + // string actualCds = + // new CodingSequence(genomicSeq, codingRegion, regions, false, 0, rnaEdits).GetCodingSequence(); + // + // Assert.Equal(expectedCds, actualCds); + // } } } \ No newline at end of file diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CodonsTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CodonsTests.cs index ff496085..4890eebf 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CodonsTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/CodonsTests.cs @@ -1,6 +1,4 @@ -using Genome; -using Moq; -using UnitTests.TestDataStructures; +using System; using VariantAnnotation.AnnotatedPositions.Transcript; using Xunit; @@ -9,91 +7,89 @@ namespace UnitTests.VariantAnnotation.AnnotatedPositions.Transcript public sealed class CodonsTests { [Fact] - public void Assign_WhenIntervalsNull_ReturnNull() + public void GetCodons_UndefinedInterval_ReturnEmpty() { - var sequence = new SimpleSequence("AAA"); - var codons = Codons.GetCodons("G", -1, -1, -1, -1, sequence); + var sequence = "AAA".AsSpan(); + (string actualRefCodons, string actualAltCodons) = Codons.GetCodons("G", -1, -1, -1, -1, sequence); - Assert.Equal("", codons.Reference); - Assert.Equal("", codons.Alternate); + Assert.Equal("", actualRefCodons); + Assert.Equal("", actualAltCodons); } [Fact] - public void Assign_SNV_SuffixLenTooBig() + public void GetCodons_SNV_SuffixLenTooBig() { - var sequence = new Mock(); - sequence.SetupGet(x => x.Length).Returns(89); - sequence.Setup(x => x.Substring(87, 1)).Returns("t"); - sequence.Setup(x => x.Substring(88, 1)).Returns("C"); + Span sequence = stackalloc char[89]; + ReadOnlySpan contentSpan = "tC".AsSpan(); + contentSpan.CopyTo(sequence.Slice(87)); - var codons = Codons.GetCodons("T", 89, 89, 30, 30, sequence.Object); + (string actualRefCodons, string actualAltCodons) = Codons.GetCodons("T", 89, 89, 30, 30, sequence); - Assert.Equal("tC", codons.Reference); - Assert.Equal("tT", codons.Alternate); + Assert.Equal("tC", actualRefCodons); + Assert.Equal("tT", actualAltCodons); } [Fact] - public void Assign_SNV() + public void GetCodons_SNV_ExpectedResults() { - var sequence = new Mock(); - sequence.SetupGet(x => x.Length).Returns(100); - sequence.Setup(x => x.Substring(21, 2)).Returns("CA"); - sequence.Setup(x => x.Substring(23, 1)).Returns("A"); + Span sequence = stackalloc char[100]; + ReadOnlySpan contentSpan = "CAA".AsSpan(); + contentSpan.CopyTo(sequence.Slice(21)); - var codons = Codons.GetCodons("G", 24, 24, 8, 8, sequence.Object); + (string actualRefCodons, string actualAltCodons) = Codons.GetCodons("G", 24, 24, 8, 8, sequence); - Assert.Equal("caA", codons.Reference); - Assert.Equal("caG", codons.Alternate); + Assert.Equal("caA", actualRefCodons); + Assert.Equal("caG", actualAltCodons); } [Fact] - public void Assign_MNV() + public void GetCodons_MNV_ExpectedResults() { - var sequence = new Mock(); - sequence.SetupGet(x => x.Length).Returns(100); - sequence.Setup(x => x.Substring(21, 2)).Returns("CA"); - sequence.Setup(x => x.Substring(28, 2)).Returns("GG"); - sequence.Setup(x => x.Substring(23, 5)).Returns("GTGCT"); + Span sequence = stackalloc char[100]; + ReadOnlySpan contentSpan = "CAGTGCT".AsSpan(); + ReadOnlySpan contentSpan2 = "GG".AsSpan(); + contentSpan.CopyTo(sequence.Slice(21)); + contentSpan2.CopyTo(sequence.Slice(28)); - var codons = Codons.GetCodons("ACCGA", 24, 28, 8, 10, sequence.Object); + (string actualRefCodons, string actualAltCodons) = + Codons.GetCodons("ACCGA", 24, 28, 8, 10, sequence); - Assert.Equal("caGTGCTgg", codons.Reference); - Assert.Equal("caACCGAgg", codons.Alternate); + Assert.Equal("caGTGCTgg", actualRefCodons); + Assert.Equal("caACCGAgg", actualAltCodons); } [Fact] public void GetCodon_NullPrefixAndSuffix() { - const string allele = "GAA"; - var observedResult = Codons.GetCodon(allele, "", ""); - Assert.Equal(allele, observedResult); + const string expectedResult = "GAA"; + string observedResult = Codons.GetCodon(expectedResult, "", ""); + Assert.Equal(expectedResult, observedResult); } [Theory] [InlineData(3, true)] [InlineData(1, false)] - public void IsTriplet(int len, bool expectedResult) + public void IsTriplet_ExpectedResults(int len, bool expectedResult) { - var observedResult = Codons.IsTriplet(len); - Assert.Equal(expectedResult, observedResult); + bool actualResult = Codons.IsTriplet(len); + Assert.Equal(expectedResult, actualResult); } [Theory] [InlineData(-33, 4, -11, 2, "ACGTca")] [InlineData(95, 101, 32, 34, "gGCTGA")] - public void GetCodons_OutOfRangeIndexes_Adjusted(int cdsStart, int cdsEnd, int proteinBegin, int proteinEnd, string expectedRefCodons) + public void GetCodons_OutOfRangeIndexes_Adjusted(int cdsStart, int cdsEnd, int proteinBegin, int proteinEnd, + string expectedRefCodons) { - var sequence = new Mock(); - sequence.SetupGet(x => x.Length).Returns(99); - sequence.Setup(x => x.Substring(0, 0)).Returns(""); - sequence.Setup(x => x.Substring(0, 4)).Returns("ACGT"); - sequence.Setup(x => x.Substring(4, 2)).Returns("CA"); - sequence.Setup(x => x.Substring(94, 5)).Returns("GCTGA"); - sequence.Setup(x => x.Substring(93, 1)).Returns("G"); - - var codons = Codons.GetCodons("", cdsStart, cdsEnd, proteinBegin, proteinEnd, sequence.Object); - - Assert.Equal(expectedRefCodons, codons.Reference); + Span sequence = stackalloc char[99]; + ReadOnlySpan contentSpan = "ACGTCA".AsSpan(); + ReadOnlySpan contentSpan2 = "GGCTGA".AsSpan(); + contentSpan.CopyTo(sequence); + contentSpan2.CopyTo(sequence.Slice(93)); + + (string actualRefCodons, _) = + Codons.GetCodons("", cdsStart, cdsEnd, proteinBegin, proteinEnd, sequence); + Assert.Equal(expectedRefCodons, actualRefCodons); } } } \ No newline at end of file diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/MappedPositionUtilitiesTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/MappedPositionUtilitiesTests.cs index ad788939..0d0d2238 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/MappedPositionUtilitiesTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/MappedPositionUtilitiesTests.cs @@ -154,9 +154,9 @@ public void FindRegion_Reverse_Deletion() [Fact] public void GetCdnaPosition_Forward_Insertion() { - var variant = new Interval(ForwardVariantStart, ForwardVariantEnd); - (int cdnaStart, int cdnaEnd) = MappedPositionUtilities.GetCdnaPositions(_forwardTranscriptRegions[4], - _forwardTranscriptRegions[4], variant, false, true); + (int cdnaStart, int cdnaEnd) = MappedPositionUtilities.GetInsertionCdnaPositions( + _forwardTranscriptRegions[4], _forwardTranscriptRegions[4], ForwardVariantStart, ForwardVariantEnd, + false); Assert.Equal(486, cdnaStart); Assert.Equal(485, cdnaEnd); @@ -165,8 +165,7 @@ public void GetCdnaPosition_Forward_Insertion() [Fact] public void GetCdnaPosition_Reverse_Deletion() { - var variant = new Interval(ReverseVariantStart, ReverseVariantEnd); - (int cdnaStart, int cdnaEnd) = MappedPositionUtilities.GetCdnaPositions(_reverseTranscriptRegions[6], _reverseTranscriptRegions[7], variant, true, false); + (int cdnaStart, int cdnaEnd) = MappedPositionUtilities.GetCdnaPositions(_reverseTranscriptRegions[6], _reverseTranscriptRegions[7], ReverseVariantStart, ReverseVariantEnd, true); Assert.Equal(123, cdnaStart); Assert.Equal(-1, cdnaEnd); @@ -176,8 +175,8 @@ public void GetCdnaPosition_Reverse_Deletion() public void GetCdnaPosition_Snv_AfterOutFrameRnaEditDeletion() { // NM_001317107.1 - var variant = new Interval(22138550, 22138550); - var observed = MappedPositionUtilities.GetCdnaPositions(_regionsNm1317107[0], _regionsNm1317107[0], variant, true, false); + var observed = MappedPositionUtilities.GetCdnaPositions(_regionsNm1317107[0], _regionsNm1317107[0], + 22138550, 22138550, true); Assert.Equal(681, observed.CdnaStart); } @@ -186,8 +185,7 @@ public void GetCdnaPosition_Snv_AfterOutFrameRnaEditDeletion() public void GetCdnaPosition_Snv_AfterInframeRnaEditInsertion() { // NM_000682.6 - var variant = new Interval(96780984, 96780984); - var observed = MappedPositionUtilities.GetCdnaPositions(_regionsNm682[0], _regionsNm682[0], variant, true, false); + var observed = MappedPositionUtilities.GetCdnaPositions(_regionsNm682[0], _regionsNm682[0], 96780984, 96780984, true); Assert.Equal(1010, observed.CdnaStart); } @@ -196,8 +194,7 @@ public void GetCdnaPosition_Snv_AfterInframeRnaEditInsertion() public void GetCdnaPosition_Snv_AfterOutframeRnaEditInsertion() { // NM_033517.1 - var variant = new Interval(51135986, 51135986); - var observed = MappedPositionUtilities.GetCdnaPositions(_regionsNm33517[20], _regionsNm33517[20], variant, false, false); + var observed = MappedPositionUtilities.GetCdnaPositions(_regionsNm33517[20], _regionsNm33517[20], 51135986, 51135986, false); Assert.Equal(1343, observed.CdnaStart); } @@ -302,7 +299,7 @@ public void GetCoveredCdnaPositions_Reverse_StartExon_EndIntron() var regions = new ITranscriptRegion[] { new TranscriptRegion(TranscriptRegionType.Exon, 2, 25398208, 25398329, 167, 288), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 25398330, 25403697, 166, 167), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 25398330, 25403697, 166, 167) }; // ENST00000556131 @@ -473,95 +470,65 @@ public void GetProteinPosition_Reverse_Deletion() } [Fact] - public void FoundExonEndpointInsertion_NotInsertion_ReturnFalse() - { - Assert.False(MappedPositionUtilities.FoundExonEndpointInsertion(false, -1, 100, _exon, _intron)); - } - - [Fact] - public void FoundExonEndpointInsertion_BothExons_ReturnFalse() - { - Assert.False(MappedPositionUtilities.FoundExonEndpointInsertion(true, -1, 100, _exon, _exon)); - } - - [Fact] - public void FoundExonEndpointInsertion_BothIntrons_ReturnFalse() - { - Assert.False(MappedPositionUtilities.FoundExonEndpointInsertion(true, -1, 100, _intron, _intron)); - } - - [Fact] - public void FoundExonEndpointInsertion_BothDefinedCdnaPositions_ReturnFalse() - { - Assert.False(MappedPositionUtilities.FoundExonEndpointInsertion(true, 100, 110, _exon, _intron)); - } - - [Fact] - public void FoundExonEndpointInsertion_BothUndefinedCdnaPositions_ReturnFalse() - { - Assert.False(MappedPositionUtilities.FoundExonEndpointInsertion(true, -1, -1, _exon, _intron)); - } - - [Fact] - public void FoundExonEndpointInsertion_UndefinedRegion_ReturnFalse() - { - Assert.False(MappedPositionUtilities.FoundExonEndpointInsertion(true, -1, -1, null, _intron)); - } - - [Fact] - public void FoundExonEndpointInsertion_OneIntron_OneExon_OneUndefinedPosition_ReturnTrue() - { - Assert.True(MappedPositionUtilities.FoundExonEndpointInsertion(true, 108, -1, _exon, _intron)); - } - - [Fact] - public void FixExonEndpointInsertion_VariantEnd_ExonEnd_Reverse() + public void GetInsertionCdnaPositions_ExonEndpointInsertion_UndefinedCdnaStart_Reverse() { var startRegion = new TranscriptRegion(TranscriptRegionType.Intron, 7, 243736351, 243776972, 762, 763); - var endRegion = new TranscriptRegion(TranscriptRegionType.Exon, 8, 243736228, 243736350, 763, 885); + var endRegion = new TranscriptRegion(TranscriptRegionType.Exon, 8, 243736228, 243736350, 763, 885); - (int cdnaStart, int cdnaEnd) = MappedPositionUtilities.FixExonEndpointInsertion(-1, 763, true, startRegion, endRegion, - new Interval(243736351, 243736350)); + (int cdnaStart, int cdnaEnd) = MappedPositionUtilities.GetInsertionCdnaPositions(startRegion, endRegion, + 243736351, 243736350, true); Assert.Equal(762, cdnaStart); Assert.Equal(763, cdnaEnd); } [Fact] - public void FixExonEndpointInsertion_VariantStart_ExonStart_Reverse() + public void GetInsertionCdnaPositions_ExonEndpointInsertion_NotObserved_UndefinedCdnaEnd_Reverse() { - // N.B. this configuration has never been spotted in the wild - var startRegion = new TranscriptRegion(TranscriptRegionType.Exon, 2, 2000, 2199, 1, 200); + var startRegion = new TranscriptRegion(TranscriptRegionType.Exon, 2, 2000, 2199, 1, 200); var endRegion = new TranscriptRegion(TranscriptRegionType.Intron, 2, 1999, 1000, 200, 201); - (int cdnaStart, int cdnaEnd) = MappedPositionUtilities.FixExonEndpointInsertion(200, -1, true, startRegion, endRegion, - new Interval(2000, 1999)); + (int cdnaStart, int cdnaEnd) = + MappedPositionUtilities.GetInsertionCdnaPositions(startRegion, endRegion, 2000, 1999, true); Assert.Equal(200, cdnaStart); Assert.Equal(201, cdnaEnd); } + + [Fact] + public void GetInsertionCdnaPositions_ExonEndpointInsertion_UndefinedCdnaEnd_Reverse() + { + var startRegion = new TranscriptRegion(TranscriptRegionType.Exon, 3, 1333613, 1333722, 396, 505); + var endRegion = new TranscriptRegion(TranscriptRegionType.Intron, 3, 1330895, 1333612, 505, 506); + + (int cdnaStart, int cdnaEnd) = + MappedPositionUtilities.GetInsertionCdnaPositions(startRegion, endRegion, 1333613, 1333612, true); + + Assert.Equal(505, cdnaStart); + Assert.Equal(506, cdnaEnd); + } [Fact] - public void FixExonEndpointInsertion_VariantEnd_ExonEnd_Forward() + public void GetInsertionCdnaPositions_ExonEndpointInsertion_UndefinedCdnaStart_Forward() { var startRegion = new TranscriptRegion(TranscriptRegionType.Intron, 16, 89521770, 89528546, 3071, 3072); var endRegion = new TranscriptRegion(TranscriptRegionType.Exon, 16, 89521614, 89521769, 2916, 3071); - (int cdnaStart, int cdnaEnd) = MappedPositionUtilities.FixExonEndpointInsertion(-1, 3071, false, startRegion, endRegion, - new Interval(89521770, 89521769)); + (int cdnaStart, int cdnaEnd) = MappedPositionUtilities.GetInsertionCdnaPositions(startRegion, endRegion, + 89521770, 89521769, false); Assert.Equal(3072, cdnaStart); Assert.Equal(3071, cdnaEnd); } [Fact] - public void FixExonEndpointInsertion_VariantStart_ExonStart_Forward() + public void GetInsertionCdnaPositions_ExonEndpointInsertion_UndefinedCdnaEnd_Forward() { var startRegion = new TranscriptRegion(TranscriptRegionType.Exon, 2, 99459243, 99459360, 108, 225); var endRegion = new TranscriptRegion(TranscriptRegionType.Intron, 1, 99456512, 99459242, 107, 108); - (int cdnaStart, int cdnaEnd) = MappedPositionUtilities.FixExonEndpointInsertion(108, -1, false, startRegion, endRegion, - new Interval(99459243, 99459242)); + (int cdnaStart, int cdnaEnd) = MappedPositionUtilities.GetInsertionCdnaPositions(startRegion, endRegion, + 99459243, 99459242, false); Assert.Equal(108, cdnaStart); Assert.Equal(107, cdnaEnd); diff --git a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/TranscriptPositionalEffectTests.cs b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/TranscriptPositionalEffectTests.cs index 101c3150..57b149f6 100644 --- a/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/TranscriptPositionalEffectTests.cs +++ b/UnitTests/VariantAnnotation/AnnotatedPositions/Transcript/TranscriptPositionalEffectTests.cs @@ -168,7 +168,7 @@ public void DetermineIntronicEffect_IsWithinSpliceSiteRegion() public void DetermineExonicEffect_HasExonOverlap() { IInterval variant = new Interval(876686, 876686); - var position = new MappedPosition(349, 349, 349, 349, 117, 117, 2, 2, -1, -1, 2, 2); + var position = new MappedPosition(349, 349, 349, 349, 349,117, 117, 2, 2,2, -1, -1, 2, 2); var positionalEffect = new TranscriptPositionalEffect(); positionalEffect.DetermineExonicEffect(_forwardTranscript.Object, variant, position, 349, 349, 349, 349, "G", false); @@ -180,7 +180,7 @@ public void DetermineExonicEffect_HasExonOverlap() public void DetermineExonicEffect_AfterCoding() { IInterval variant = new Interval(879600, 879600); - var position = new MappedPosition(1692, 1692, -1, -1, -1, -1, 7, 7, -1, -1, 12, 12); + var position = new MappedPosition(1692, 1692, -1, -1, -1,-1, -1, -1, 7, 7, -1, -1, 12, 12); var positionalEffect = new TranscriptPositionalEffect(); positionalEffect.DetermineExonicEffect(_forwardTranscript.Object, variant, position, 1692, 1692, -1, -1, "G", false); @@ -191,7 +191,7 @@ public void DetermineExonicEffect_AfterCoding() public void DetermineExonicEffect_WithinCdna() { IInterval variant = new Interval(879600, 879600); - var position = new MappedPosition(1692, 1692, -1, -1, -1, -1, 7, 7, -1, -1, 12, 12); + var position = new MappedPosition(1692, 1692, -1, -1, -1,-1, -1, -1, 7, 7, -1, -1, 12, 12); var positionalEffect = new TranscriptPositionalEffect(); positionalEffect.DetermineExonicEffect(_forwardTranscript.Object, variant, position, 1692, 1692, -1, -1, "G", false); @@ -202,7 +202,7 @@ public void DetermineExonicEffect_WithinCdna() public void DetermineExonicEffect_WithinCds() { IInterval variant = new Interval(876543, 876543); - var position = new MappedPosition(206, 206, 206, 206, 69, 69, 2, 2, -1, -1, 2, 2); + var position = new MappedPosition(206, 206, 206, 206, 69, 69,69, 69, 2, 2, -1, -1, 2, 2); var positionalEffect = new TranscriptPositionalEffect(); positionalEffect.DetermineExonicEffect(_forwardTranscript.Object, variant, position, 206, 206, 206, 206, "G", false); @@ -213,7 +213,7 @@ public void DetermineExonicEffect_WithinCds() public void DetermineExonicEffect_OverlapWithMicroRna() { IInterval variant = new Interval(3477284, 3477284); - var position = new MappedPosition(71, 71, -1, -1, -1, -1, 1, 1, -1, -1, 0, 0); + var position = new MappedPosition(71, 71, -1, -1, -1, -1, -1,-1, 1, 1, -1, -1, 0, 0); var positionalEffect = new TranscriptPositionalEffect(); positionalEffect.DetermineExonicEffect(_reverseTranscript.Object, variant, position, 71, 71, -1, -1, "G", false); diff --git a/UnitTests/VariantAnnotation/Caches/DataStructures/TranscriptTests.cs b/UnitTests/VariantAnnotation/Caches/DataStructures/TranscriptTests.cs index ad3609e1..ad04ecf8 100644 --- a/UnitTests/VariantAnnotation/Caches/DataStructures/TranscriptTests.cs +++ b/UnitTests/VariantAnnotation/Caches/DataStructures/TranscriptTests.cs @@ -4,10 +4,12 @@ using Genome; using Intervals; using IO; +using Moq; using UnitTests.TestUtilities; using VariantAnnotation.AnnotatedPositions.Transcript; using VariantAnnotation.Caches.DataStructures; using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.Providers; using Xunit; namespace UnitTests.VariantAnnotation.Caches.DataStructures @@ -41,7 +43,7 @@ public void Transcript_EndToEnd() IInterval[] expectedMicroRnas = GetMicroRnas(); - ITranslation expectedTranslation = new Translation(expectedCodingRegion, CompactId.Convert("ENSP00000446475", 17), "VEIDSD"); + ITranslation expectedTranslation = new Translation(expectedCodingRegion, CompactId.Convert("ENSP00000446475", 17), "VEIDSD*"); IGene expectedGene = new Gene(expectedChromosome, 100, 200, true, "TP53", 300, CompactId.Convert("7157"), CompactId.Convert("ENSG00000141510")); @@ -67,10 +69,12 @@ public void Transcript_EndToEnd() CompactId.Convert(expectedId, expectedVersion), expectedTranslation, expectedBioType, expectedGene, expectedTotalExonLength, expectedStartExonPhase, expectedCanonical, expectedTranscriptRegions, expectedNumExons, expectedMicroRnas, expectedSiftIndex, expectedPolyPhenIndex, - expectedSource, expectedCdsStartNotFound, expectedCdsEndNotFound, null, null); + expectedSource, expectedCdsStartNotFound, expectedCdsEndNotFound, null); // ReSharper restore ConditionIsAlwaysTrueOrFalse ITranscript observedTranscript; + var mockSequenceProvider = new Mock(); + mockSequenceProvider.Setup(x => x.Assembly).Returns(GenomeAssembly.GRCh38); using (var ms = new MemoryStream()) { @@ -84,7 +88,7 @@ public void Transcript_EndToEnd() using (var reader = new BufferedBinaryReader(ms)) { // SET-362 DEBUG: Remove the null arguments in the future - observedTranscript = Transcript.Read(reader, indexToChromosome, genes, expectedTranscriptRegions, expectedMicroRnas, peptideSeqs, null); + observedTranscript = Transcript.Read(reader, indexToChromosome, genes, expectedTranscriptRegions, expectedMicroRnas, peptideSeqs, mockSequenceProvider.Object); } } diff --git a/UnitTests/VariantAnnotation/Caches/DataStructures/TranslationTests.cs b/UnitTests/VariantAnnotation/Caches/DataStructures/TranslationTests.cs index 60efc9af..4f893557 100644 --- a/UnitTests/VariantAnnotation/Caches/DataStructures/TranslationTests.cs +++ b/UnitTests/VariantAnnotation/Caches/DataStructures/TranslationTests.cs @@ -15,7 +15,7 @@ public void Translation_EndToEnd() { ICodingRegion expectedCodingRegion = new CodingRegion(100, 200, 300, 400, 101); const string expectedProteinId = "ENSP00000446475.7"; - const string expectedPeptideSeq = "VEIDSD"; + const string expectedPeptideSeq = "VEIDSD*"; string[] peptideSeqs = { expectedPeptideSeq }; diff --git a/UnitTests/VariantAnnotation/Caches/TranscriptCacheTests.cs b/UnitTests/VariantAnnotation/Caches/TranscriptCacheTests.cs index 144b759f..90ec5e91 100644 --- a/UnitTests/VariantAnnotation/Caches/TranscriptCacheTests.cs +++ b/UnitTests/VariantAnnotation/Caches/TranscriptCacheTests.cs @@ -117,11 +117,11 @@ private ITranscript[] GetTranscripts() return new ITranscript[] { new Transcript(ChromosomeUtilities.Chr11, 11000, 12000, CompactId.Empty, null, BioType.other, null, 0, 0, - false, null, 0, null, 0, 0, Source.None, false, false, null, null), + false, null, 0, null, 0, 0, Source.None, false, false, null), new Transcript(ChromosomeUtilities.Chr1, 120, 180, CompactId.Empty, null, BioType.other, null, 0, 0, - false, null, 0, null, 0, 0, Source.None, false, false, null, null), + false, null, 0, null, 0, 0, Source.None, false, false, null), new Transcript(ChromosomeUtilities.Chr1, 300, 320, CompactId.Empty, null, BioType.other, null, 0, 0, - false, null, 0, null, 0, 0, Source.None, false, false, null, null) + false, null, 0, null, 0, 0, Source.None, false, false, null) }; } } diff --git a/UnitTests/VariantAnnotation/IO/Caches/TranscriptCacheReaderTests.cs b/UnitTests/VariantAnnotation/IO/Caches/TranscriptCacheReaderTests.cs index c8a92155..c1159acb 100644 --- a/UnitTests/VariantAnnotation/IO/Caches/TranscriptCacheReaderTests.cs +++ b/UnitTests/VariantAnnotation/IO/Caches/TranscriptCacheReaderTests.cs @@ -6,12 +6,14 @@ using Genome; using Intervals; using IO; +using Moq; using UnitTests.TestUtilities; using VariantAnnotation.AnnotatedPositions.Transcript; using VariantAnnotation.Caches; using VariantAnnotation.Caches.DataStructures; using VariantAnnotation.Interface.AnnotatedPositions; using VariantAnnotation.Interface.Caches; +using VariantAnnotation.Interface.Providers; using VariantAnnotation.IO.Caches; using Xunit; @@ -63,6 +65,8 @@ public TranscriptCacheReaderTests() public void TranscriptCacheReader_EndToEnd() { TranscriptCacheData observedCache; + var mockSequenceProvider = new Mock(); + mockSequenceProvider.Setup(x => x.Assembly).Returns(GenomeAssembly.GRCh38); using (var ms = new MemoryStream()) { @@ -76,7 +80,7 @@ public void TranscriptCacheReader_EndToEnd() using (var reader = new TranscriptCacheReader(ms)) { // SET-362 DEBUG: Remove the null arguments in the future - observedCache = reader.Read(null, ChromosomeUtilities.RefIndexToChromosome); + observedCache = reader.Read(mockSequenceProvider.Object, ChromosomeUtilities.RefIndexToChromosome); } } @@ -196,7 +200,7 @@ private static ITranscript[] GetTranscripts(IChromosome chromosome, IGene[] gene return new ITranscript[] { new Transcript(chromosome, 120, 180, CompactId.Convert("789"), null, BioType.IG_D_gene, genes[0], 0, 0, - false, regions, 0, mirnas, -1, -1, Source.None, false, false, null, null) + false, regions, 0, mirnas, -1, -1, Source.None, false, false, null) }; } } diff --git a/UnitTests/VariantAnnotation/TranscriptAnnotation/FullTranscriptAnnotatorTests.cs b/UnitTests/VariantAnnotation/TranscriptAnnotation/FullTranscriptAnnotatorTests.cs index c699e34d..330d83c8 100644 --- a/UnitTests/VariantAnnotation/TranscriptAnnotation/FullTranscriptAnnotatorTests.cs +++ b/UnitTests/VariantAnnotation/TranscriptAnnotation/FullTranscriptAnnotatorTests.cs @@ -6,30 +6,34 @@ namespace UnitTests.VariantAnnotation.TranscriptAnnotation public sealed class FullTranscriptAnnotatorTests { [Theory] - [InlineData("S", "S", 60, 60, "S", "S", 60, 60)] - [InlineData("S", "T", 60, 60, "S", "T", 60, 60)] - [InlineData("ELC", "DVR", 632, 634, "ELC", "DVR", 632, 634)] - [InlineData("LL", "LI", 213, 214, "L", "I", 214, 214)] - [InlineData("K", "KLX", 523, 523, "K", "KLX", 523, 523 )] - [InlineData("C", "CC", 46, 46, "C", "CC", 46, 46)] - [InlineData("R", "KR", 22955, 22955, "R", "KR", 22955, 22955)] - [InlineData("PPPPPQQQQ", "", 65, 73, "PPPPPQQQQ", "", 65, 73)] - [InlineData("DMEIHA", "D", 370, 375, "MEIHA", "", 371, 375)] - [InlineData("VV", "V", 690, 691, "V", "", 691, 691)] - [InlineData("NARCN", "N", 243, 247, "ARCN", "", 244, 247)] - [InlineData("QQQQP", "P", 52, 56, "QQQQ", "", 52, 55)] - [InlineData("RV", "X", 1172, 1173, "RV", "X", 1172, 1173)] - [InlineData("GA", "GX", 112, 113, "A", "X", 113, 113)] - [InlineData("SPDGHE", "R", 566, 571, "SPDGHE", "R", 566, 571)] - [InlineData("Q", "*VRX", 96, 96, "Q", "*VRX", 96, 96)] - public void TryTrimAminoAcidsAndUpdateProteinPositions_AsExpected(string reference, string alt, int start, int end, string newReference, string newAlt, int newStart, int newEnd) + [InlineData("S", "S", 60, 60, "S", "S", 60, 60)] + [InlineData("S", "T", 60, 60, "S", "T", 60, 60)] + [InlineData("ELC", "DVR", 632, 634, "ELC", "DVR", 632, 634)] + [InlineData("LL", "LI", 213, 214, "L", "I", 214, 214)] + [InlineData("K", "KLX", 523, 523, "K", "KLX", 523, 523)] + [InlineData("C", "CC", 46, 46, "C", "CC", 46, 46)] + [InlineData("R", "KR", 22955, 22955, "R", "KR", 22955, 22955)] + [InlineData("PPPPPQQQQ", "", 65, 73, "PPPPPQQQQ", "", 65, 73)] + [InlineData("DMEIHA", "D", 370, 375, "MEIHA", "", 371, 375)] + [InlineData("VV", "V", 690, 691, "V", "", 691, 691)] + [InlineData("NARCN", "N", 243, 247, "ARCN", "", 244, 247)] + [InlineData("QQQQP", "P", 52, 56, "QQQQ", "", 52, 55)] + [InlineData("RV", "X", 1172, 1173, "RV", "X", 1172, 1173)] + [InlineData("GA", "GX", 112, 113, "A", "X", 113, 113)] + [InlineData("SPDGHE", "R", 566, 571, "SPDGHE", "R", 566, 571)] + [InlineData("Q", "*VRX", 96, 96, "Q", "*VRX", 96, 96)] + public void TryTrimAminoAcidsAndUpdateProteinPositions_ExpectedResults(string refAminoAcids, + string altAminoAcids, int start, int end, string expectedRefAa, string expectedAltAa, int expectedStart, + int expectedEnd) { - var trimmedAa = FullTranscriptAnnotator.TryTrimAminoAcidsAndUpdateProteinPositions(new SequenceChange(reference, alt), start, end); + (string actualRefAa, string actualAltAa, int actualStart, int actualEnd) = + FullTranscriptAnnotator.TryTrimAminoAcidsAndUpdateProteinPositions(refAminoAcids, altAminoAcids, start, + end); - Assert.Equal(newReference, trimmedAa.AaChange.Reference); - Assert.Equal(newAlt, trimmedAa.AaChange.Alternate); - Assert.Equal(newStart, trimmedAa.ProteinStart); - Assert.Equal(newEnd, trimmedAa.ProteinEnd); + Assert.Equal(expectedRefAa, actualRefAa); + Assert.Equal(expectedAltAa, actualAltAa); + Assert.Equal(expectedStart, actualStart); + Assert.Equal(expectedEnd, actualEnd); } } } \ No newline at end of file diff --git a/VariantAnnotation.Interface/AnnotatedPositions/AminoAcidEdit.cs b/VariantAnnotation.Interface/AnnotatedPositions/AminoAcidEdit.cs new file mode 100644 index 00000000..5d9d4443 --- /dev/null +++ b/VariantAnnotation.Interface/AnnotatedPositions/AminoAcidEdit.cs @@ -0,0 +1,16 @@ +namespace VariantAnnotation.Interface.AnnotatedPositions +{ + // AminoAcidEdit describes an override where a triplet at a particular position always codes for an + // alternative amino acid (e.g. non-AUG start codon). + public readonly struct AminoAcidEdit + { + public readonly int Position; + public readonly char AminoAcid; + + public AminoAcidEdit(int position, char aminoAcid) + { + Position = position; + AminoAcid = aminoAcid; + } + } +} \ No newline at end of file diff --git a/VariantAnnotation.Interface/AnnotatedPositions/IMappedPosition.cs b/VariantAnnotation.Interface/AnnotatedPositions/IMappedPosition.cs index d5b8b157..fc1c7927 100644 --- a/VariantAnnotation.Interface/AnnotatedPositions/IMappedPosition.cs +++ b/VariantAnnotation.Interface/AnnotatedPositions/IMappedPosition.cs @@ -2,23 +2,25 @@ { public interface IMappedPosition { - int ProteinStart { get; set; } - int ProteinEnd { get; set; } - int CdsStart { get; } - int CdsEnd { get; } - int CdnaStart { get; } - int CdnaEnd { get; } - int ExonStart { get; } - int ExonEnd { get; } - int IntronStart { get; } - int IntronEnd { get; } - int RegionStartIndex { get; } - int RegionEndIndex { get; } + int ProteinStart { get; set; } + int ProteinEnd { get; set; } + int ExtendedProteinEnd { get; } + int CdsStart { get; } + int CdsEnd { get; } + int ExtendedCdsEnd { get; } + int CdnaStart { get; } + int CdnaEnd { get; } + int ExonStart { get; } + int ExonEnd { get; } + int IntronStart { get; } + int IntronEnd { get; } + int RegionStartIndex { get; } + int RegionEndIndex { get; } int CoveredProteinStart { get; set; } - int CoveredProteinEnd { get; set; } - int CoveredCdsStart { get; set; } - int CoveredCdsEnd { get; set; } - int CoveredCdnaStart { get; set; } - int CoveredCdnaEnd { get; set; } + int CoveredProteinEnd { get; set; } + int CoveredCdsStart { get; set; } + int CoveredCdsEnd { get; set; } + int CoveredCdnaStart { get; set; } + int CoveredCdnaEnd { get; set; } } } \ No newline at end of file diff --git a/VariantAnnotation.Interface/AnnotatedPositions/ITranscript.cs b/VariantAnnotation.Interface/AnnotatedPositions/ITranscript.cs index 868726ab..60eb31a0 100644 --- a/VariantAnnotation.Interface/AnnotatedPositions/ITranscript.cs +++ b/VariantAnnotation.Interface/AnnotatedPositions/ITranscript.cs @@ -20,10 +20,10 @@ public interface ITranscript : IChromosomeInterval int SiftIndex { get; } int PolyPhenIndex { get; } - ITranslation Translation { get; } - IInterval[] MicroRnas { get; } - int[] Selenocysteines { get; } - IRnaEdit[] RnaEdits { get; } + ITranslation Translation { get; } + IInterval[] MicroRnas { get; } + IRnaEdit[] RnaEdits { get; } + AminoAcidEdit[] AminoAcidEdits { get; } bool CdsStartNotFound { get; } bool CdsEndNotFound { get; } diff --git a/VariantAnnotation/Algorithms/Swap.cs b/VariantAnnotation/Algorithms/Swap.cs index a89d0031..5e7f61c5 100644 --- a/VariantAnnotation/Algorithms/Swap.cs +++ b/VariantAnnotation/Algorithms/Swap.cs @@ -1,15 +1,15 @@ -namespace VariantAnnotation.Algorithms -{ - public static class Swap - { - /// - /// swaps two integers - /// - public static void Int(ref int a, ref int b) - { - var temp = a; - a = b; - b = temp; - } - } +namespace VariantAnnotation.Algorithms +{ + public static class Swap + { + /// + /// swaps two integers + /// + public static void Int(ref int a, ref int b) + { + var temp = a; + a = b; + b = temp; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/AminoAcids/AminoAcid.cs b/VariantAnnotation/AnnotatedPositions/AminoAcids/AminoAcid.cs new file mode 100644 index 00000000..e3b27339 --- /dev/null +++ b/VariantAnnotation/AnnotatedPositions/AminoAcids/AminoAcid.cs @@ -0,0 +1,143 @@ +using System; +using System.Buffers; +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.AnnotatedPositions.AminoAcids +{ + public sealed class AminoAcid + { + private readonly AminoAcidEntry[] _aminoAcidEntries; + private readonly int _maxIndex; + + internal AminoAcid(AminoAcidEntry[] aminoAcidEntries) + { + _aminoAcidEntries = aminoAcidEntries; + _maxIndex = aminoAcidEntries.Length - 1; + } + + private static readonly (string, string) EmptyTuple = (string.Empty, string.Empty); + + public (string ReferenceAminoAcids, string AlternateAminoAcids) Translate(string referenceCodons, + string alternateCodons, AminoAcidEdit[] aaEdits, int aaStart) + { + if (string.IsNullOrEmpty(referenceCodons) && string.IsNullOrEmpty(alternateCodons)) return EmptyTuple; + if (referenceCodons != null && (referenceCodons.Contains("N") || alternateCodons.Contains("N"))) + return EmptyTuple; + + string referenceAminoAcids = TranslateBases(referenceCodons, aaEdits, aaStart, false); + string alternateAminoAcids = TranslateBases(alternateCodons, null, aaStart, false); + return (referenceAminoAcids, alternateAminoAcids); + } + + public string TranslateBases(string bases, AminoAcidEdit[] aaEdits, int aaStart, bool ignoreIncompleteCodons) + { + if (bases == null) return null; + + int numBases = bases.Length; + int numAminoAcids = bases.Length / 3; + + ReadOnlySpan cdsSpan = bases.AsSpan(); + ArrayPool charPool = ArrayPool.Shared; + + // convert the bases to uppercase + char[] upperCds = charPool.Rent(numBases); + Span upperCdsSpan = upperCds.AsSpan().Slice(0, numBases); + cdsSpan.ToUpperInvariant(upperCdsSpan); + + // create output buffer + bool addX = !ignoreIncompleteCodons && numAminoAcids * 3 != numBases; + int bufferSize = addX ? numAminoAcids + 1 : numAminoAcids; + + char[] buffer = charPool.Rent(bufferSize); + Span aaSpan = buffer.AsSpan().Slice(0, bufferSize); + + // convert codons to amino acids + var offset = 0; + for (var i = 0; i < numAminoAcids; i++, offset += 3) + { + ReadOnlySpan span = upperCdsSpan.Slice(offset, 3); + int triplet = (span[0] << 16) | (span[1] << 8) | span[2]; + aaSpan[i] = BinarySearch(triplet); + } + + if (addX) aaSpan[numAminoAcids] = 'X'; + + if (aaEdits != null) ApplyAminoAcidEdits(aaSpan, aaEdits, aaStart); + var aaString = aaSpan.ToString(); + + charPool.Return(upperCds); + charPool.Return(buffer); + + return aaString; + } + + // this is only used for alt alleles and therefore no need to support AA edits + public string TranslateBases(ReadOnlySpan cdsSpan) + { + int numBases = cdsSpan.Length; + int numAminoAcids = cdsSpan.Length / 3; + + ArrayPool charPool = ArrayPool.Shared; + + // convert the bases to uppercase + char[] upperCds = charPool.Rent(numBases); + Span upperCdsSpan = upperCds.AsSpan().Slice(0, numBases); + cdsSpan.ToUpperInvariant(upperCdsSpan); + + // create output buffer + int bufferSize = numAminoAcids; + + char[] buffer = charPool.Rent(bufferSize); + Span aaSpan = buffer.AsSpan().Slice(0, bufferSize); + + // convert codons to amino acids + var offset = 0; + for (var i = 0; i < numAminoAcids; i++, offset += 3) + { + ReadOnlySpan span = upperCdsSpan.Slice(offset, 3); + int triplet = (span[0] << 16) | (span[1] << 8) | span[2]; + aaSpan[i] = BinarySearch(triplet); + } + + var aaString = new string(aaSpan); + + charPool.Return(upperCds); + charPool.Return(buffer); + + return aaString; + } + + private static void ApplyAminoAcidEdits(Span aaSpan, AminoAcidEdit[] aaEdits, int aaStart) + { + int aaEnd = aaStart + aaSpan.Length - 1; + + foreach (var aaEdit in aaEdits) + { + if (aaEdit.Position > aaEnd) break; + if (aaEdit.Position < aaStart) continue; + aaSpan[aaEdit.Position - aaStart] = aaEdit.AminoAcid; + } + } + + private char BinarySearch(int triplet) + { + var begin = 0; + int end = _maxIndex; + + while (begin <= end) + { + int index = begin + (end - begin >> 1); + var entry = _aminoAcidEntries[index]; + + if (entry.Triplet == triplet) return entry.AminoAcid; + if (entry.Triplet < triplet) begin = index + 1; + else end = index - 1; + } + + return 'X'; + } + + public static string AddUnknownAminoAcid(string aminoAcids) => + aminoAcids.StartsWith(AminoAcidCommon.StopCodon) ? aminoAcids : aminoAcids + 'X'; + } +} \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/AminoAcids/AminoAcidAbbreviation.cs b/VariantAnnotation/AnnotatedPositions/AminoAcids/AminoAcidAbbreviation.cs new file mode 100644 index 00000000..ddd0a10d --- /dev/null +++ b/VariantAnnotation/AnnotatedPositions/AminoAcids/AminoAcidAbbreviation.cs @@ -0,0 +1,82 @@ +using System; +using OptimizedCore; + +namespace VariantAnnotation.AnnotatedPositions.AminoAcids +{ + public static class AminoAcidAbbreviation + { + private static readonly AbbreviationEntry[] Abbreviations = + { + new AbbreviationEntry('*', "Ter"), + new AbbreviationEntry('A', "Ala"), + new AbbreviationEntry('B', "Asx"), // not expected, included for completeness + new AbbreviationEntry('C', "Cys"), + new AbbreviationEntry('D', "Asp"), + new AbbreviationEntry('E', "Glu"), + new AbbreviationEntry('F', "Phe"), + new AbbreviationEntry('G', "Gly"), + new AbbreviationEntry('H', "His"), + new AbbreviationEntry('I', "Ile"), + new AbbreviationEntry('J', "Xle"), // not expected, included for completeness + new AbbreviationEntry('K', "Lys"), + new AbbreviationEntry('L', "Leu"), + new AbbreviationEntry('M', "Met"), + new AbbreviationEntry('N', "Asn"), + new AbbreviationEntry('O', "Pyl"), // rare - pyrrolysine + new AbbreviationEntry('P', "Pro"), + new AbbreviationEntry('Q', "Gln"), + new AbbreviationEntry('R', "Arg"), + new AbbreviationEntry('S', "Ser"), + new AbbreviationEntry('T', "Thr"), + new AbbreviationEntry('U', "Sec"), // rare - selenocysteine + new AbbreviationEntry('V', "Val"), + new AbbreviationEntry('W', "Trp"), + new AbbreviationEntry('X', "Xaa"), // not expected, included for completeness + new AbbreviationEntry('Y', "Tyr"), + new AbbreviationEntry('Z', "Glx") // not expected, included for completeness + }; + + private static readonly int EndIndex = Abbreviations.Length - 1; + + public static string GetThreeLetterAbbreviation(char oneLetterCode) => BinarySearch(oneLetterCode); + + public static string ConvertToThreeLetterAbbreviations(string aminoAcids) + { + if (string.IsNullOrEmpty(aminoAcids)) return ""; + + var sb = StringBuilderCache.Acquire(); + foreach (char oneLetterCode in aminoAcids) sb.Append(BinarySearch(oneLetterCode)); + return StringBuilderCache.GetStringAndRelease(sb); + } + + private static string BinarySearch(char oneLetterCode) + { + var begin = 0; + int end = EndIndex; + + while (begin <= end) + { + int index = begin + (end - begin >> 1); + var entry = Abbreviations[index]; + + if (entry.OneLetterCode == oneLetterCode) return entry.ThreeLetterCode; + if (entry.OneLetterCode < oneLetterCode) begin = index + 1; + else end = index - 1; + } + + throw new NotSupportedException($"Unable to convert the following 1-letter code to a 3-letter amino acid abbreviation: {oneLetterCode}"); + } + + private readonly struct AbbreviationEntry + { + public readonly char OneLetterCode; + public readonly string ThreeLetterCode; + + public AbbreviationEntry(char oneLetterCode, string threeLetterCode) + { + OneLetterCode = oneLetterCode; + ThreeLetterCode = threeLetterCode; + } + } + } +} \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/AminoAcids/AminoAcidCommon.cs b/VariantAnnotation/AnnotatedPositions/AminoAcids/AminoAcidCommon.cs new file mode 100644 index 00000000..b28d59f4 --- /dev/null +++ b/VariantAnnotation/AnnotatedPositions/AminoAcids/AminoAcidCommon.cs @@ -0,0 +1,153 @@ +namespace VariantAnnotation.AnnotatedPositions.AminoAcids +{ + public static class AminoAcidCommon + { + public const char StopCodon = '*'; + + public static readonly AminoAcid StandardAminoAcids; + public static readonly AminoAcid MitochondrialAminoAcids; + + static AminoAcidCommon() + { + AminoAcidEntry[] standardEntries = + { + new AminoAcidEntry(4276545, 'K'), // AAA + new AminoAcidEntry(4276547, 'N'), // AAC + new AminoAcidEntry(4276551, 'K'), // AAG + new AminoAcidEntry(4276564, 'N'), // AAT + new AminoAcidEntry(4277057, 'T'), // ACA + new AminoAcidEntry(4277059, 'T'), // ACC + new AminoAcidEntry(4277063, 'T'), // ACG + new AminoAcidEntry(4277076, 'T'), // ACT + new AminoAcidEntry(4278081, 'R'), // AGA + new AminoAcidEntry(4278083, 'S'), // AGC + new AminoAcidEntry(4278087, 'R'), // AGG + new AminoAcidEntry(4278100, 'S'), // AGT + new AminoAcidEntry(4281409, 'I'), // ATA + new AminoAcidEntry(4281411, 'I'), // ATC + new AminoAcidEntry(4281415, 'M'), // ATG + new AminoAcidEntry(4281428, 'I'), // ATT + new AminoAcidEntry(4407617, 'Q'), // CAA + new AminoAcidEntry(4407619, 'H'), // CAC + new AminoAcidEntry(4407623, 'Q'), // CAG + new AminoAcidEntry(4407636, 'H'), // CAT + new AminoAcidEntry(4408129, 'P'), // CCA + new AminoAcidEntry(4408131, 'P'), // CCC + new AminoAcidEntry(4408135, 'P'), // CCG + new AminoAcidEntry(4408148, 'P'), // CCT + new AminoAcidEntry(4409153, 'R'), // CGA + new AminoAcidEntry(4409155, 'R'), // CGC + new AminoAcidEntry(4409159, 'R'), // CGG + new AminoAcidEntry(4409172, 'R'), // CGT + new AminoAcidEntry(4412481, 'L'), // CTA + new AminoAcidEntry(4412483, 'L'), // CTC + new AminoAcidEntry(4412487, 'L'), // CTG + new AminoAcidEntry(4412500, 'L'), // CTT + new AminoAcidEntry(4669761, 'E'), // GAA + new AminoAcidEntry(4669763, 'D'), // GAC + new AminoAcidEntry(4669767, 'E'), // GAG + new AminoAcidEntry(4669780, 'D'), // GAT + new AminoAcidEntry(4670273, 'A'), // GCA + new AminoAcidEntry(4670275, 'A'), // GCC + new AminoAcidEntry(4670279, 'A'), // GCG + new AminoAcidEntry(4670292, 'A'), // GCT + new AminoAcidEntry(4671297, 'G'), // GGA + new AminoAcidEntry(4671299, 'G'), // GGC + new AminoAcidEntry(4671303, 'G'), // GGG + new AminoAcidEntry(4671316, 'G'), // GGT + new AminoAcidEntry(4674625, 'V'), // GTA + new AminoAcidEntry(4674627, 'V'), // GTC + new AminoAcidEntry(4674631, 'V'), // GTG + new AminoAcidEntry(4674644, 'V'), // GTT + new AminoAcidEntry(5521729, '*'), // TAA + new AminoAcidEntry(5521731, 'Y'), // TAC + new AminoAcidEntry(5521735, '*'), // TAG + new AminoAcidEntry(5521748, 'Y'), // TAT + new AminoAcidEntry(5522241, 'S'), // TCA + new AminoAcidEntry(5522243, 'S'), // TCC + new AminoAcidEntry(5522247, 'S'), // TCG + new AminoAcidEntry(5522260, 'S'), // TCT + new AminoAcidEntry(5523265, '*'), // TGA + new AminoAcidEntry(5523267, 'C'), // TGC + new AminoAcidEntry(5523271, 'W'), // TGG + new AminoAcidEntry(5523284, 'C'), // TGT + new AminoAcidEntry(5526593, 'L'), // TTA + new AminoAcidEntry(5526595, 'F'), // TTC + new AminoAcidEntry(5526599, 'L'), // TTG + new AminoAcidEntry(5526612, 'F') // TTT + }; + + StandardAminoAcids = new AminoAcid(standardEntries); + + AminoAcidEntry[] mitochondrialEntries = + { + new AminoAcidEntry(4276545, 'K'), // AAA + new AminoAcidEntry(4276547, 'N'), // AAC + new AminoAcidEntry(4276551, 'K'), // AAG + new AminoAcidEntry(4276564, 'N'), // AAT + new AminoAcidEntry(4277057, 'T'), // ACA + new AminoAcidEntry(4277059, 'T'), // ACC + new AminoAcidEntry(4277063, 'T'), // ACG + new AminoAcidEntry(4277076, 'T'), // ACT + new AminoAcidEntry(4278081, '*'), // AGA - R to * + new AminoAcidEntry(4278083, 'S'), // AGC + new AminoAcidEntry(4278087, '*'), // AGG - R to * + new AminoAcidEntry(4278100, 'S'), // AGT + new AminoAcidEntry(4281409, 'M'), // ATA - I to M + new AminoAcidEntry(4281411, 'I'), // ATC + new AminoAcidEntry(4281415, 'M'), // ATG + new AminoAcidEntry(4281428, 'I'), // ATT + new AminoAcidEntry(4407617, 'Q'), // CAA + new AminoAcidEntry(4407619, 'H'), // CAC + new AminoAcidEntry(4407623, 'Q'), // CAG + new AminoAcidEntry(4407636, 'H'), // CAT + new AminoAcidEntry(4408129, 'P'), // CCA + new AminoAcidEntry(4408131, 'P'), // CCC + new AminoAcidEntry(4408135, 'P'), // CCG + new AminoAcidEntry(4408148, 'P'), // CCT + new AminoAcidEntry(4409153, 'R'), // CGA + new AminoAcidEntry(4409155, 'R'), // CGC + new AminoAcidEntry(4409159, 'R'), // CGG + new AminoAcidEntry(4409172, 'R'), // CGT + new AminoAcidEntry(4412481, 'L'), // CTA + new AminoAcidEntry(4412483, 'L'), // CTC + new AminoAcidEntry(4412487, 'L'), // CTG + new AminoAcidEntry(4412500, 'L'), // CTT + new AminoAcidEntry(4669761, 'E'), // GAA + new AminoAcidEntry(4669763, 'D'), // GAC + new AminoAcidEntry(4669767, 'E'), // GAG + new AminoAcidEntry(4669780, 'D'), // GAT + new AminoAcidEntry(4670273, 'A'), // GCA + new AminoAcidEntry(4670275, 'A'), // GCC + new AminoAcidEntry(4670279, 'A'), // GCG + new AminoAcidEntry(4670292, 'A'), // GCT + new AminoAcidEntry(4671297, 'G'), // GGA + new AminoAcidEntry(4671299, 'G'), // GGC + new AminoAcidEntry(4671303, 'G'), // GGG + new AminoAcidEntry(4671316, 'G'), // GGT + new AminoAcidEntry(4674625, 'V'), // GTA + new AminoAcidEntry(4674627, 'V'), // GTC + new AminoAcidEntry(4674631, 'V'), // GTG + new AminoAcidEntry(4674644, 'V'), // GTT + new AminoAcidEntry(5521729, '*'), // TAA + new AminoAcidEntry(5521731, 'Y'), // TAC + new AminoAcidEntry(5521735, '*'), // TAG + new AminoAcidEntry(5521748, 'Y'), // TAT + new AminoAcidEntry(5522241, 'S'), // TCA + new AminoAcidEntry(5522243, 'S'), // TCC + new AminoAcidEntry(5522247, 'S'), // TCG + new AminoAcidEntry(5522260, 'S'), // TCT + new AminoAcidEntry(5523265, 'W'), // TGA - * to W + new AminoAcidEntry(5523267, 'C'), // TGC + new AminoAcidEntry(5523271, 'W'), // TGG + new AminoAcidEntry(5523284, 'C'), // TGT + new AminoAcidEntry(5526593, 'L'), // TTA + new AminoAcidEntry(5526595, 'F'), // TTC + new AminoAcidEntry(5526599, 'L'), // TTG + new AminoAcidEntry(5526612, 'F') // TTT + }; + + MitochondrialAminoAcids = new AminoAcid(mitochondrialEntries); + } + } +} \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/AminoAcids/AminoAcidEntry.cs b/VariantAnnotation/AnnotatedPositions/AminoAcids/AminoAcidEntry.cs new file mode 100644 index 00000000..0f29bfe3 --- /dev/null +++ b/VariantAnnotation/AnnotatedPositions/AminoAcids/AminoAcidEntry.cs @@ -0,0 +1,14 @@ +namespace VariantAnnotation.AnnotatedPositions.AminoAcids +{ + internal readonly struct AminoAcidEntry + { + public readonly int Triplet; + public readonly char AminoAcid; + + public AminoAcidEntry(int triplet, char aminoAcid) + { + Triplet = triplet; + AminoAcid = aminoAcid; + } + } +} \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/AnnotatedPosition.cs b/VariantAnnotation/AnnotatedPositions/AnnotatedPosition.cs index f00a9432..11af6941 100644 --- a/VariantAnnotation/AnnotatedPositions/AnnotatedPosition.cs +++ b/VariantAnnotation/AnnotatedPositions/AnnotatedPosition.cs @@ -1,99 +1,99 @@ -using System.Collections.Generic; -using System.Linq; -using OptimizedCore; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.Positions; -using VariantAnnotation.Interface.SA; -using VariantAnnotation.IO; -using Variants; - -namespace VariantAnnotation.AnnotatedPositions -{ - public sealed class AnnotatedPosition : IAnnotatedPosition - { - public IPosition Position { get; } - public string CytogeneticBand { get; set; } - public IAnnotatedVariant[] AnnotatedVariants { get; } - public IList SupplementaryIntervals { get; } = new List(); - - public AnnotatedPosition(IPosition position, IAnnotatedVariant[] annotatedVariants) - { - Position = position; - AnnotatedVariants = annotatedVariants; - } - - public string GetJsonString() - { - if (AnnotatedVariants == null || AnnotatedVariants.Length == 0) return null; - - var sb = StringBuilderCache.Acquire(); - var jsonObject = new JsonObject(sb); - - sb.Append(JsonObject.OpenBrace); - - var originalChromName = Position.VcfFields[0]; - - jsonObject.AddStringValue("chromosome", originalChromName); - jsonObject.AddIntValue("position", Position.Start); - - if (IsShortTandemRepeat()) - { - jsonObject.AddStringValue("repeatUnit", Position.InfoData?.RepeatUnit); - jsonObject.AddIntValue("refRepeatCount", Position.InfoData?.RefRepeatCount); - } - - if (AnnotatedVariants.Any(IsStructVariant)) jsonObject.AddIntValue("svEnd", Position.InfoData?.End); - - jsonObject.AddStringValue("refAllele", Position.RefAllele); - jsonObject.AddStringValues("altAlleles", Position.AltAlleles); - - jsonObject.AddDoubleValue("quality", Position.Quality); - - jsonObject.AddStringValues("filters", Position.Filters); - - jsonObject.AddIntValues("ciPos", Position.InfoData?.CiPos); - jsonObject.AddIntValues("ciEnd", Position.InfoData?.CiEnd); - jsonObject.AddIntValue("svLength", Position.InfoData?.SvLength); - - jsonObject.AddDoubleValue("strandBias", Position.InfoData?.StrandBias,JsonCommon.FrequencyRoundingFormat); - jsonObject.AddIntValue("jointSomaticNormalQuality", Position.InfoData?.JointSomaticNormalQuality); - jsonObject.AddDoubleValue("recalibratedQuality", Position.InfoData?.RecalibratedQuality); - jsonObject.AddIntValue("copyNumber", Position.InfoData?.CopyNumber); - jsonObject.AddBoolValue("colocalizedWithCnv", Position.InfoData?.ColocalizedWithCnv ?? false); - - - jsonObject.AddStringValue("cytogeneticBand", CytogeneticBand); - - if (Position.Samples != null && Position.Samples.Length > 0) jsonObject.AddStringValues("samples", Position.Samples.Select(s => s.GetJsonString()), false); - - if (SupplementaryIntervals != null && SupplementaryIntervals.Any()) - { - AddSuppIntervalToJsonObject(jsonObject); - } - - jsonObject.AddStringValues("variants", AnnotatedVariants.Select(v => v.GetJsonString(originalChromName)), false); - - sb.Append(JsonObject.CloseBrace); - return StringBuilderCache.GetStringAndRelease(sb); - } - - private static bool IsStructVariant(IAnnotatedVariant annotatedVariant) => - annotatedVariant.Variant.Behavior.StructuralVariantConsequence; - - private bool IsShortTandemRepeat() - { - return Position.Variants.Any(x => - x.Type == VariantType.short_tandem_repeat_variation - || x.Type == VariantType.short_tandem_repeat_contraction - || x.Type == VariantType.short_tandem_repeat_expansion); - } - - private void AddSuppIntervalToJsonObject(JsonObject jsonObject) - { - foreach (var si in SupplementaryIntervals) - { - jsonObject.AddObjectValue(si.JsonKey, si); - } - } - } +using System.Collections.Generic; +using System.Linq; +using OptimizedCore; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.Positions; +using VariantAnnotation.Interface.SA; +using VariantAnnotation.IO; +using Variants; + +namespace VariantAnnotation.AnnotatedPositions +{ + public sealed class AnnotatedPosition : IAnnotatedPosition + { + public IPosition Position { get; } + public string CytogeneticBand { get; set; } + public IAnnotatedVariant[] AnnotatedVariants { get; } + public IList SupplementaryIntervals { get; } = new List(); + + public AnnotatedPosition(IPosition position, IAnnotatedVariant[] annotatedVariants) + { + Position = position; + AnnotatedVariants = annotatedVariants; + } + + public string GetJsonString() + { + if (AnnotatedVariants == null || AnnotatedVariants.Length == 0) return null; + + var sb = StringBuilderCache.Acquire(); + var jsonObject = new JsonObject(sb); + + sb.Append(JsonObject.OpenBrace); + + var originalChromName = Position.VcfFields[0]; + + jsonObject.AddStringValue("chromosome", originalChromName); + jsonObject.AddIntValue("position", Position.Start); + + if (IsShortTandemRepeat()) + { + jsonObject.AddStringValue("repeatUnit", Position.InfoData?.RepeatUnit); + jsonObject.AddIntValue("refRepeatCount", Position.InfoData?.RefRepeatCount); + } + + if (AnnotatedVariants.Any(IsStructVariant)) jsonObject.AddIntValue("svEnd", Position.InfoData?.End); + + jsonObject.AddStringValue("refAllele", Position.RefAllele); + jsonObject.AddStringValues("altAlleles", Position.AltAlleles); + + jsonObject.AddDoubleValue("quality", Position.Quality); + + jsonObject.AddStringValues("filters", Position.Filters); + + jsonObject.AddIntValues("ciPos", Position.InfoData?.CiPos); + jsonObject.AddIntValues("ciEnd", Position.InfoData?.CiEnd); + jsonObject.AddIntValue("svLength", Position.InfoData?.SvLength); + + jsonObject.AddDoubleValue("strandBias", Position.InfoData?.StrandBias,JsonCommon.FrequencyRoundingFormat); + jsonObject.AddIntValue("jointSomaticNormalQuality", Position.InfoData?.JointSomaticNormalQuality); + jsonObject.AddDoubleValue("recalibratedQuality", Position.InfoData?.RecalibratedQuality); + jsonObject.AddIntValue("copyNumber", Position.InfoData?.CopyNumber); + jsonObject.AddBoolValue("colocalizedWithCnv", Position.InfoData?.ColocalizedWithCnv ?? false); + + + jsonObject.AddStringValue("cytogeneticBand", CytogeneticBand); + + if (Position.Samples != null && Position.Samples.Length > 0) jsonObject.AddStringValues("samples", Position.Samples.Select(s => s.GetJsonString()), false); + + if (SupplementaryIntervals != null && SupplementaryIntervals.Any()) + { + AddSuppIntervalToJsonObject(jsonObject); + } + + jsonObject.AddStringValues("variants", AnnotatedVariants.Select(v => v.GetJsonString(originalChromName)), false); + + sb.Append(JsonObject.CloseBrace); + return StringBuilderCache.GetStringAndRelease(sb); + } + + private static bool IsStructVariant(IAnnotatedVariant annotatedVariant) => + annotatedVariant.Variant.Behavior.StructuralVariantConsequence; + + private bool IsShortTandemRepeat() + { + return Position.Variants.Any(x => + x.Type == VariantType.short_tandem_repeat_variation + || x.Type == VariantType.short_tandem_repeat_contraction + || x.Type == VariantType.short_tandem_repeat_expansion); + } + + private void AddSuppIntervalToJsonObject(JsonObject jsonObject) + { + foreach (var si in SupplementaryIntervals) + { + jsonObject.AddObjectValue(si.JsonKey, si); + } + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/AnnotatedRegulatoryRegion.cs b/VariantAnnotation/AnnotatedPositions/AnnotatedRegulatoryRegion.cs index d280cd18..a3fd0856 100644 --- a/VariantAnnotation/AnnotatedPositions/AnnotatedRegulatoryRegion.cs +++ b/VariantAnnotation/AnnotatedPositions/AnnotatedRegulatoryRegion.cs @@ -1,31 +1,31 @@ -using System.Collections.Generic; -using System.Linq; -using System.Text; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.IO; - -namespace VariantAnnotation.AnnotatedPositions -{ - public sealed class AnnotatedRegulatoryRegion : IAnnotatedRegulatoryRegion - { - public IRegulatoryRegion RegulatoryRegion { get; } - public IEnumerable Consequences { get; } - - public AnnotatedRegulatoryRegion(IRegulatoryRegion regulatoryRegion, List consequences) - { - RegulatoryRegion = regulatoryRegion; - Consequences = consequences; - } - - public void SerializeJson(StringBuilder sb) - { - var jsonObject = new JsonObject(sb); - - sb.Append(JsonObject.OpenBrace); - jsonObject.AddStringValue("id", RegulatoryRegion.Id.WithoutVersion); - jsonObject.AddStringValue("type", RegulatoryRegion.Type.ToString()); - jsonObject.AddStringValues("consequence", Consequences?.Select(ConsequenceUtil.GetConsequence)); - sb.Append(JsonObject.CloseBrace); - } - } +using System.Collections.Generic; +using System.Linq; +using System.Text; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.IO; + +namespace VariantAnnotation.AnnotatedPositions +{ + public sealed class AnnotatedRegulatoryRegion : IAnnotatedRegulatoryRegion + { + public IRegulatoryRegion RegulatoryRegion { get; } + public IEnumerable Consequences { get; } + + public AnnotatedRegulatoryRegion(IRegulatoryRegion regulatoryRegion, List consequences) + { + RegulatoryRegion = regulatoryRegion; + Consequences = consequences; + } + + public void SerializeJson(StringBuilder sb) + { + var jsonObject = new JsonObject(sb); + + sb.Append(JsonObject.OpenBrace); + jsonObject.AddStringValue("id", RegulatoryRegion.Id.WithoutVersion); + jsonObject.AddStringValue("type", RegulatoryRegion.Type.ToString()); + jsonObject.AddStringValues("consequence", Consequences?.Select(ConsequenceUtil.GetConsequence)); + sb.Append(JsonObject.CloseBrace); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/AnnotatedVariant.cs b/VariantAnnotation/AnnotatedPositions/AnnotatedVariant.cs index a40e9862..f8356207 100644 --- a/VariantAnnotation/AnnotatedPositions/AnnotatedVariant.cs +++ b/VariantAnnotation/AnnotatedPositions/AnnotatedVariant.cs @@ -1,83 +1,83 @@ -using System.Collections.Generic; -using OptimizedCore; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.SA; -using VariantAnnotation.IO; -using Variants; - -namespace VariantAnnotation.AnnotatedPositions -{ - public sealed class AnnotatedVariant : IAnnotatedVariant - { - public IVariant Variant { get; } - public string HgvsgNotation { get; set; } - public IList RegulatoryRegions { get; } = new List(); - public IList Transcripts { get; } = new List(); - public IList SupplementaryAnnotations { get; } = new List(); - public IList SaList { get; } = new List(); - public double? PhylopScore { get; set; } - public IList PluginDataSet { get; } = new List(); - - public AnnotatedVariant(IVariant variant) => Variant = variant; - - public string GetJsonString(string originalChromName) - { - var sb = StringBuilderCache.Acquire(); - var jsonObject = new JsonObject(sb); - - // data section - sb.Append(JsonObject.OpenBrace); - - jsonObject.AddStringValue("vid", Variant.VariantId); - jsonObject.AddStringValue("chromosome", originalChromName); - jsonObject.AddIntValue("begin", Variant.Start); - jsonObject.AddIntValue("end", Variant.End); - jsonObject.AddBoolValue("isReferenceMinorAllele", Variant.IsRefMinor); - jsonObject.AddBoolValue("isStructuralVariant", Variant.Behavior.StructuralVariantConsequence); - - jsonObject.AddStringValue("refAllele", - string.IsNullOrEmpty(Variant.RefAllele) ? "-" : Variant.RefAllele); - jsonObject.AddStringValue("altAllele", - string.IsNullOrEmpty(Variant.AltAllele) ? "-" : Variant.AltAllele); - - var variantType = GetVariantType(Variant.Type); - jsonObject.AddStringValue("variantType", variantType.ToString()); - jsonObject.AddBoolValue("isDecomposedVariant", Variant.IsDecomposed); - if (variantType.ToString() != "SNV") jsonObject.AddBoolValue("isRecomposedVariant", Variant.IsRecomposed); - jsonObject.AddStringValue("hgvsg", HgvsgNotation); - - jsonObject.AddDoubleValue("phylopScore", PhylopScore); - - if (RegulatoryRegions?.Count > 0) jsonObject.AddObjectValues("regulatoryRegions", RegulatoryRegions); - - foreach (ISupplementaryAnnotation saItem in SaList) - { - jsonObject.AddObjectValue(saItem.JsonKey, saItem); - } - - foreach (var pluginData in PluginDataSet) - { - jsonObject.AddStringValue(pluginData.Name, pluginData.GetJsonString(), false); - } - - if (Transcripts?.Count > 0) jsonObject.AddObjectValues("transcripts", Transcripts); - - sb.Append(JsonObject.CloseBrace); - return StringBuilderCache.GetStringAndRelease(sb); - } - - private static VariantType GetVariantType(VariantType variantType) - { - // ReSharper disable once SwitchStatementMissingSomeCases - switch (variantType) - { - case VariantType.short_tandem_repeat_variation: - case VariantType.short_tandem_repeat_contraction: - case VariantType.short_tandem_repeat_expansion: - return VariantType.short_tandem_repeat_variation; - default: - return variantType; - } - } - } +using System.Collections.Generic; +using OptimizedCore; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.SA; +using VariantAnnotation.IO; +using Variants; + +namespace VariantAnnotation.AnnotatedPositions +{ + public sealed class AnnotatedVariant : IAnnotatedVariant + { + public IVariant Variant { get; } + public string HgvsgNotation { get; set; } + public IList RegulatoryRegions { get; } = new List(); + public IList Transcripts { get; } = new List(); + public IList SupplementaryAnnotations { get; } = new List(); + public IList SaList { get; } = new List(); + public double? PhylopScore { get; set; } + public IList PluginDataSet { get; } = new List(); + + public AnnotatedVariant(IVariant variant) => Variant = variant; + + public string GetJsonString(string originalChromName) + { + var sb = StringBuilderCache.Acquire(); + var jsonObject = new JsonObject(sb); + + // data section + sb.Append(JsonObject.OpenBrace); + + jsonObject.AddStringValue("vid", Variant.VariantId); + jsonObject.AddStringValue("chromosome", originalChromName); + jsonObject.AddIntValue("begin", Variant.Start); + jsonObject.AddIntValue("end", Variant.End); + jsonObject.AddBoolValue("isReferenceMinorAllele", Variant.IsRefMinor); + jsonObject.AddBoolValue("isStructuralVariant", Variant.Behavior.StructuralVariantConsequence); + + jsonObject.AddStringValue("refAllele", + string.IsNullOrEmpty(Variant.RefAllele) ? "-" : Variant.RefAllele); + jsonObject.AddStringValue("altAllele", + string.IsNullOrEmpty(Variant.AltAllele) ? "-" : Variant.AltAllele); + + var variantType = GetVariantType(Variant.Type); + jsonObject.AddStringValue("variantType", variantType.ToString()); + jsonObject.AddBoolValue("isDecomposedVariant", Variant.IsDecomposed); + if (variantType.ToString() != "SNV") jsonObject.AddBoolValue("isRecomposedVariant", Variant.IsRecomposed); + jsonObject.AddStringValue("hgvsg", HgvsgNotation); + + jsonObject.AddDoubleValue("phylopScore", PhylopScore); + + if (RegulatoryRegions?.Count > 0) jsonObject.AddObjectValues("regulatoryRegions", RegulatoryRegions); + + foreach (ISupplementaryAnnotation saItem in SaList) + { + jsonObject.AddObjectValue(saItem.JsonKey, saItem); + } + + foreach (var pluginData in PluginDataSet) + { + jsonObject.AddStringValue(pluginData.Name, pluginData.GetJsonString(), false); + } + + if (Transcripts?.Count > 0) jsonObject.AddObjectValues("transcripts", Transcripts); + + sb.Append(JsonObject.CloseBrace); + return StringBuilderCache.GetStringAndRelease(sb); + } + + private static VariantType GetVariantType(VariantType variantType) + { + // ReSharper disable once SwitchStatementMissingSomeCases + switch (variantType) + { + case VariantType.short_tandem_repeat_variation: + case VariantType.short_tandem_repeat_contraction: + case VariantType.short_tandem_repeat_expansion: + return VariantType.short_tandem_repeat_variation; + default: + return variantType; + } + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Consequence/Consequences.cs b/VariantAnnotation/AnnotatedPositions/Consequence/Consequences.cs index d291357f..d9d66762 100644 --- a/VariantAnnotation/AnnotatedPositions/Consequence/Consequences.cs +++ b/VariantAnnotation/AnnotatedPositions/Consequence/Consequences.cs @@ -1,158 +1,158 @@ -using System; -using System.Collections.Generic; -using System.Collections.Immutable; -using VariantAnnotation.Interface.AnnotatedPositions; -using Variants; - -namespace VariantAnnotation.AnnotatedPositions.Consequence -{ - public sealed class Consequences - { - private readonly List _consequences; - public List GetConsequences() => _consequences; - - private readonly IVariantEffect _variantEffect; - private readonly IFeatureVariantEffects _featureEffect; - - private readonly ImmutableArray<(Func, ConsequenceTag)> _tier3Consequences; - - public Consequences(IVariantEffect variantEffect = null, IFeatureVariantEffects featureEffect = null) - { - _consequences = new List(); - _variantEffect = variantEffect; - _featureEffect = featureEffect; - - _tier3Consequences = new List<(Func, ConsequenceTag)> - { - (() => _variantEffect.IsSpliceDonorVariant(), ConsequenceTag.splice_donor_variant), - (() => _variantEffect.IsSpliceAcceptorVariant(), ConsequenceTag.splice_acceptor_variant), - (() => _variantEffect.IsStopGained(), ConsequenceTag.stop_gained), - (() => _variantEffect.IsFrameshiftVariant(), ConsequenceTag.frameshift_variant), - (() => _variantEffect.IsStopLost(), ConsequenceTag.stop_lost), - (() => _variantEffect.IsStartLost(), ConsequenceTag.start_lost), - (() => _variantEffect.IsInframeInsertion(), ConsequenceTag.inframe_insertion), - (() => _variantEffect.IsInframeDeletion(), ConsequenceTag.inframe_deletion), - (() => _variantEffect.IsMissenseVariant(), ConsequenceTag.missense_variant), - (() => _variantEffect.IsProteinAlteringVariant(), ConsequenceTag.protein_altering_variant), - (() => _variantEffect.IsSpliceRegionVariant(), ConsequenceTag.splice_region_variant), - (() => _variantEffect.IsIncompleteTerminalCodonVariant(), ConsequenceTag.incomplete_terminal_codon_variant), - (() => _variantEffect.IsStartRetained(), ConsequenceTag.start_retained_variant), - (() => _variantEffect.IsStopRetained(), ConsequenceTag.stop_retained_variant), - (() => _variantEffect.IsSynonymousVariant(), ConsequenceTag.synonymous_variant), - (() => _variantEffect.IsCodingSequenceVariant(), ConsequenceTag.coding_sequence_variant), - (() => _variantEffect.IsFivePrimeUtrVariant(), ConsequenceTag.five_prime_UTR_variant), - (() => _variantEffect.IsThreePrimeUtrVariant(), ConsequenceTag.three_prime_UTR_variant), - (() => _variantEffect.IsNonCodingTranscriptExonVariant(), ConsequenceTag.non_coding_transcript_exon_variant), - (() => _variantEffect.IsWithinIntron(), ConsequenceTag.intron_variant), - (() => _variantEffect.IsNonsenseMediatedDecayTranscriptVariant(), ConsequenceTag.NMD_transcript_variant), - (() => _variantEffect.IsNonCodingTranscriptVariant(), ConsequenceTag.non_coding_transcript_variant), - (() => _featureEffect.Elongation(), ConsequenceTag.feature_elongation), - (() => _featureEffect.Truncation(), ConsequenceTag.transcript_truncation) - }.ToImmutableArray(); - } - - public void DetermineFlankingVariantEffects(bool isDownstreamVariant) - { - _consequences.Add(isDownstreamVariant - ? ConsequenceTag.downstream_gene_variant - : ConsequenceTag.upstream_gene_variant); - } - - public void DetermineSmallVariantEffects() - { - GetTier1Types(); - if (_consequences.Count == 0) GetTier2Types(); - if (_consequences.Count == 0) GetTier3Types(); - if (_consequences.Count == 0) _consequences.Add(ConsequenceTag.transcript_variant); - } - - public void DetermineStructuralVariantEffect(VariantType variantType, bool addGeneFusion) - { - GetTier1Types(); - if (_consequences.Count == 0) GetStructuralTier2Types(); - if (addGeneFusion) _consequences.Add(ConsequenceTag.unidirectional_gene_fusion); - - DetermineCopyNumberEffect(variantType); - DetermineRepeatExpansionEffect(variantType); - if (_consequences.Count == 0) _consequences.Add(ConsequenceTag.transcript_variant); - } - - private void DetermineRepeatExpansionEffect(VariantType variantType) - { - // ReSharper disable once SwitchStatementMissingSomeCases - switch (variantType) - { - case VariantType.short_tandem_repeat_variation: - _consequences.Add(ConsequenceTag.short_tandem_repeat_change); - break; - case VariantType.short_tandem_repeat_contraction: - _consequences.Add(ConsequenceTag.short_tandem_repeat_contraction); - break; - case VariantType.short_tandem_repeat_expansion: - _consequences.Add(ConsequenceTag.short_tandem_repeat_expansion); - break; - } - } - - private void DetermineCopyNumberEffect(VariantType variantType) - { - // ReSharper disable once SwitchStatementMissingSomeCases - switch (variantType) - { - case VariantType.copy_number_gain: - _consequences.Add(ConsequenceTag.copy_number_increase); - break; - case VariantType.copy_number_loss: - _consequences.Add(ConsequenceTag.copy_number_decrease); - break; - case VariantType.copy_number_variation: - _consequences.Add(ConsequenceTag.copy_number_change); - break; - } - } - - private void GetStructuralTier2Types() - { - // FeatureElongation - if (_featureEffect.Elongation()) _consequences.Add(ConsequenceTag.feature_elongation); - - // TranscriptTruncation - if (_featureEffect.Truncation()) _consequences.Add(ConsequenceTag.transcript_truncation); - } - - private void GetTier1Types() - { - // TranscriptAblation - if (_featureEffect.Ablation()) _consequences.Add(ConsequenceTag.transcript_ablation); - - // TranscriptAmplification - if (_featureEffect.Amplification()) _consequences.Add(ConsequenceTag.transcript_amplification); - } - - private void GetTier2Types() - { - // MatureMirnaVariant - if (_variantEffect.IsMatureMirnaVariant()) _consequences.Add(ConsequenceTag.mature_miRNA_variant); - } - - private void GetTier3Types() - { - foreach ((var consequenceTest, ConsequenceTag consequenceTag) in _tier3Consequences) - { - if (consequenceTest()) _consequences.Add(consequenceTag); - } - } - - public void DetermineRegulatoryVariantEffects() - { - // RegulatoryRegionAmplification - if (_featureEffect.Amplification()) _consequences.Add(ConsequenceTag.regulatory_region_amplification); - - // RegulatoryRegionAblation - if (_featureEffect.Ablation()) _consequences.Add(ConsequenceTag.regulatory_region_ablation); - - // RegulatoryRegionVariant - _consequences.Add(ConsequenceTag.regulatory_region_variant); - } - } +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using VariantAnnotation.Interface.AnnotatedPositions; +using Variants; + +namespace VariantAnnotation.AnnotatedPositions.Consequence +{ + public sealed class Consequences + { + private readonly List _consequences; + public List GetConsequences() => _consequences; + + private readonly IVariantEffect _variantEffect; + private readonly IFeatureVariantEffects _featureEffect; + + private readonly ImmutableArray<(Func, ConsequenceTag)> _tier3Consequences; + + public Consequences(IVariantEffect variantEffect = null, IFeatureVariantEffects featureEffect = null) + { + _consequences = new List(); + _variantEffect = variantEffect; + _featureEffect = featureEffect; + + _tier3Consequences = new List<(Func, ConsequenceTag)> + { + (() => _variantEffect.IsSpliceDonorVariant(), ConsequenceTag.splice_donor_variant), + (() => _variantEffect.IsSpliceAcceptorVariant(), ConsequenceTag.splice_acceptor_variant), + (() => _variantEffect.IsStopGained(), ConsequenceTag.stop_gained), + (() => _variantEffect.IsFrameshiftVariant(), ConsequenceTag.frameshift_variant), + (() => _variantEffect.IsStopLost(), ConsequenceTag.stop_lost), + (() => _variantEffect.IsStartLost(), ConsequenceTag.start_lost), + (() => _variantEffect.IsInframeInsertion(), ConsequenceTag.inframe_insertion), + (() => _variantEffect.IsInframeDeletion(), ConsequenceTag.inframe_deletion), + (() => _variantEffect.IsMissenseVariant(), ConsequenceTag.missense_variant), + (() => _variantEffect.IsProteinAlteringVariant(), ConsequenceTag.protein_altering_variant), + (() => _variantEffect.IsSpliceRegionVariant(), ConsequenceTag.splice_region_variant), + (() => _variantEffect.IsIncompleteTerminalCodonVariant(), ConsequenceTag.incomplete_terminal_codon_variant), + (() => _variantEffect.IsStartRetained(), ConsequenceTag.start_retained_variant), + (() => _variantEffect.IsStopRetained(), ConsequenceTag.stop_retained_variant), + (() => _variantEffect.IsSynonymousVariant(), ConsequenceTag.synonymous_variant), + (() => _variantEffect.IsCodingSequenceVariant(), ConsequenceTag.coding_sequence_variant), + (() => _variantEffect.IsFivePrimeUtrVariant(), ConsequenceTag.five_prime_UTR_variant), + (() => _variantEffect.IsThreePrimeUtrVariant(), ConsequenceTag.three_prime_UTR_variant), + (() => _variantEffect.IsNonCodingTranscriptExonVariant(), ConsequenceTag.non_coding_transcript_exon_variant), + (() => _variantEffect.IsWithinIntron(), ConsequenceTag.intron_variant), + (() => _variantEffect.IsNonsenseMediatedDecayTranscriptVariant(), ConsequenceTag.NMD_transcript_variant), + (() => _variantEffect.IsNonCodingTranscriptVariant(), ConsequenceTag.non_coding_transcript_variant), + (() => _featureEffect.Elongation(), ConsequenceTag.feature_elongation), + (() => _featureEffect.Truncation(), ConsequenceTag.transcript_truncation) + }.ToImmutableArray(); + } + + public void DetermineFlankingVariantEffects(bool isDownstreamVariant) + { + _consequences.Add(isDownstreamVariant + ? ConsequenceTag.downstream_gene_variant + : ConsequenceTag.upstream_gene_variant); + } + + public void DetermineSmallVariantEffects() + { + GetTier1Types(); + if (_consequences.Count == 0) GetTier2Types(); + if (_consequences.Count == 0) GetTier3Types(); + if (_consequences.Count == 0) _consequences.Add(ConsequenceTag.transcript_variant); + } + + public void DetermineStructuralVariantEffect(VariantType variantType, bool addGeneFusion) + { + GetTier1Types(); + if (_consequences.Count == 0) GetStructuralTier2Types(); + if (addGeneFusion) _consequences.Add(ConsequenceTag.unidirectional_gene_fusion); + + DetermineCopyNumberEffect(variantType); + DetermineRepeatExpansionEffect(variantType); + if (_consequences.Count == 0) _consequences.Add(ConsequenceTag.transcript_variant); + } + + private void DetermineRepeatExpansionEffect(VariantType variantType) + { + // ReSharper disable once SwitchStatementMissingSomeCases + switch (variantType) + { + case VariantType.short_tandem_repeat_variation: + _consequences.Add(ConsequenceTag.short_tandem_repeat_change); + break; + case VariantType.short_tandem_repeat_contraction: + _consequences.Add(ConsequenceTag.short_tandem_repeat_contraction); + break; + case VariantType.short_tandem_repeat_expansion: + _consequences.Add(ConsequenceTag.short_tandem_repeat_expansion); + break; + } + } + + private void DetermineCopyNumberEffect(VariantType variantType) + { + // ReSharper disable once SwitchStatementMissingSomeCases + switch (variantType) + { + case VariantType.copy_number_gain: + _consequences.Add(ConsequenceTag.copy_number_increase); + break; + case VariantType.copy_number_loss: + _consequences.Add(ConsequenceTag.copy_number_decrease); + break; + case VariantType.copy_number_variation: + _consequences.Add(ConsequenceTag.copy_number_change); + break; + } + } + + private void GetStructuralTier2Types() + { + // FeatureElongation + if (_featureEffect.Elongation()) _consequences.Add(ConsequenceTag.feature_elongation); + + // TranscriptTruncation + if (_featureEffect.Truncation()) _consequences.Add(ConsequenceTag.transcript_truncation); + } + + private void GetTier1Types() + { + // TranscriptAblation + if (_featureEffect.Ablation()) _consequences.Add(ConsequenceTag.transcript_ablation); + + // TranscriptAmplification + if (_featureEffect.Amplification()) _consequences.Add(ConsequenceTag.transcript_amplification); + } + + private void GetTier2Types() + { + // MatureMirnaVariant + if (_variantEffect.IsMatureMirnaVariant()) _consequences.Add(ConsequenceTag.mature_miRNA_variant); + } + + private void GetTier3Types() + { + foreach ((var consequenceTest, ConsequenceTag consequenceTag) in _tier3Consequences) + { + if (consequenceTest()) _consequences.Add(consequenceTag); + } + } + + public void DetermineRegulatoryVariantEffects() + { + // RegulatoryRegionAmplification + if (_featureEffect.Amplification()) _consequences.Add(ConsequenceTag.regulatory_region_amplification); + + // RegulatoryRegionAblation + if (_featureEffect.Ablation()) _consequences.Add(ConsequenceTag.regulatory_region_ablation); + + // RegulatoryRegionVariant + _consequences.Add(ConsequenceTag.regulatory_region_variant); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/HgvsCodingNomenclature.cs b/VariantAnnotation/AnnotatedPositions/HgvsCodingNomenclature.cs index a095191a..dab1eda4 100644 --- a/VariantAnnotation/AnnotatedPositions/HgvsCodingNomenclature.cs +++ b/VariantAnnotation/AnnotatedPositions/HgvsCodingNomenclature.cs @@ -1,136 +1,136 @@ -using Genome; -using Intervals; -using VariantAnnotation.AnnotatedPositions.Transcript; -using VariantAnnotation.Interface.AnnotatedPositions; -using Variants; - -namespace VariantAnnotation.AnnotatedPositions -{ - public static class HgvsCodingNomenclature - { - public static string GetHgvscAnnotation(ITranscript transcript, ISimpleVariant variant, ISequence refSequence, - int regionStart, int regionEnd, string transcriptRef = null, string transcriptAlt = null) - { - // sanity check: don't try to handle odd characters, make sure this is not a reference allele, - // and make sure that we have protein coordinates - if (variant.Type == VariantType.reference || SequenceUtilities.HasNonCanonicalBase(variant.AltAllele)) return null; - - // do not report HGVSc notation when variant lands inside gap region - if (regionStart > -1 && regionEnd > -1) - { - var startRegion = transcript.TranscriptRegions[regionStart]; - var endRegion = transcript.TranscriptRegions[regionEnd]; - if (startRegion.Id == endRegion.Id && startRegion.Type == TranscriptRegionType.Gap && - endRegion.Type == TranscriptRegionType.Gap) return null; - } - - bool onReverseStrand = transcript.Gene.OnReverseStrand; - - string refAllele = string.IsNullOrEmpty(transcriptRef) - ? onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.RefAllele) : variant.RefAllele - : transcriptRef; - string altAllele = string.IsNullOrEmpty(transcriptAlt) - ? onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.AltAllele) : variant.AltAllele - : transcriptAlt; - - // decide event type from HGVS nomenclature - var genomicChange = GetGenomicChange(transcript, onReverseStrand, refSequence, variant); - - int variantStart = variant.Start; - int variantEnd = variant.End; - - if (genomicChange == GenomicChange.Duplication) - { - (variantStart, variantEnd, refAllele, regionStart, regionEnd) = transcript.TranscriptRegions.ShiftDuplication(variantStart, altAllele, onReverseStrand); - } - - var startPositionOffset = HgvsUtilities.GetCdnaPositionOffset(transcript, variantStart, regionStart, true); - var endPositionOffset = variantStart == variantEnd - ? startPositionOffset - : HgvsUtilities.GetCdnaPositionOffset(transcript, variantEnd, regionEnd, false); - - if (onReverseStrand) - { - var tmp = startPositionOffset; - startPositionOffset = endPositionOffset; - endPositionOffset = tmp; - } - - if (startPositionOffset == null && variant.Type == VariantType.insertion) - { - startPositionOffset= new PositionOffset( endPositionOffset.Position+1, endPositionOffset.Offset, $"{endPositionOffset.Position + 1}", endPositionOffset.HasStopCodonNotation); - } - - // sanity check: make sure we have coordinates - if (startPositionOffset == null || endPositionOffset == null) return null; - - var hgvsNotation = new HgvscNotation(refAllele, altAllele, transcript.Id.WithVersion, genomicChange, - startPositionOffset, endPositionOffset, transcript.Translation != null); - - // generic formatting - return hgvsNotation.ToString(); - } - - /// - /// Adjust positions by alt allele length - /// - internal static (int Start, int End, string RefAllele, int RegionStart, int RegionEnd) ShiftDuplication( - this ITranscriptRegion[] regions, int start, string altAllele, bool onReverseStrand) - { - int incrementLength = altAllele.Length; - int dupStart = onReverseStrand ? start + incrementLength - 1 : start - incrementLength; - int dupEnd = onReverseStrand ? dupStart - incrementLength + 1 : dupStart + incrementLength - 1; - - (int regionStart, _) = MappedPositionUtilities.FindRegion(regions, dupStart); - (int regionEnd, _) = MappedPositionUtilities.FindRegion(regions, dupEnd); - - return (dupStart, dupEnd, altAllele, regionStart, regionEnd); - } - - public static GenomicChange GetGenomicChange(IInterval interval, bool onReverseStrand, ISequence refSequence, ISimpleVariant variant) - { - // length of the reference allele. Negative lengths make no sense - int refLength = variant.End - variant.Start + 1; - if (refLength < 0) refLength = 0; - - // length of alternative allele - int altLength = variant.AltAllele.Length; - - // sanity check: make sure that the alleles are different - if (variant.RefAllele == variant.AltAllele) return GenomicChange.Unknown; - - // deletion - if (altLength == 0) return GenomicChange.Deletion; - - if (refLength == altLength) - { - // substitution - if (refLength == 1) return GenomicChange.Substitution; - - // inversion - string rcRefAllele = SequenceUtilities.GetReverseComplement(variant.RefAllele); - return variant.AltAllele == rcRefAllele ? GenomicChange.Inversion : GenomicChange.DelIns; - } - - // deletion/insertion - if (refLength != 0) return GenomicChange.DelIns; - - // If this is an insertion, we should check if the preceding reference nucleotides - // match the insertion. In that case it should be annotated as a multiplication. - bool isGenomicDuplicate = HgvsUtilities.IsDuplicateWithinInterval(refSequence, variant, interval, onReverseStrand); - - return isGenomicDuplicate ? GenomicChange.Duplication : GenomicChange.Insertion; - } - } - - public enum GenomicChange - { - Unknown, - Deletion, - Duplication, - DelIns, - Insertion, - Inversion, - Substitution - } -} +using Genome; +using Intervals; +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Interface.AnnotatedPositions; +using Variants; + +namespace VariantAnnotation.AnnotatedPositions +{ + public static class HgvsCodingNomenclature + { + public static string GetHgvscAnnotation(ITranscript transcript, ISimpleVariant variant, ISequence refSequence, + int regionStart, int regionEnd, string transcriptRef, string transcriptAlt) + { + // sanity check: don't try to handle odd characters, make sure this is not a reference allele, + // and make sure that we have protein coordinates + if (variant.Type == VariantType.reference || SequenceUtilities.HasNonCanonicalBase(variant.AltAllele)) return null; + + // do not report HGVSc notation when variant lands inside gap region + if (regionStart > -1 && regionEnd > -1) + { + var startRegion = transcript.TranscriptRegions[regionStart]; + var endRegion = transcript.TranscriptRegions[regionEnd]; + if (startRegion.Id == endRegion.Id && startRegion.Type == TranscriptRegionType.Gap && + endRegion.Type == TranscriptRegionType.Gap) return null; + } + + bool onReverseStrand = transcript.Gene.OnReverseStrand; + + string refAllele = string.IsNullOrEmpty(transcriptRef) + ? onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.RefAllele) : variant.RefAllele + : transcriptRef; + string altAllele = string.IsNullOrEmpty(transcriptAlt) + ? onReverseStrand ? SequenceUtilities.GetReverseComplement(variant.AltAllele) : variant.AltAllele + : transcriptAlt; + + // decide event type from HGVS nomenclature + var genomicChange = GetGenomicChange(transcript, onReverseStrand, refSequence, variant); + + int variantStart = variant.Start; + int variantEnd = variant.End; + + if (genomicChange == GenomicChange.Duplication) + { + (variantStart, variantEnd, refAllele, regionStart, regionEnd) = transcript.TranscriptRegions.ShiftDuplication(variantStart, altAllele, onReverseStrand); + } + + var startPositionOffset = HgvsUtilities.GetCdnaPositionOffset(transcript, variantStart, regionStart, true); + var endPositionOffset = variantStart == variantEnd + ? startPositionOffset + : HgvsUtilities.GetCdnaPositionOffset(transcript, variantEnd, regionEnd, false); + + if (onReverseStrand) + { + var tmp = startPositionOffset; + startPositionOffset = endPositionOffset; + endPositionOffset = tmp; + } + + if (startPositionOffset == null && variant.Type == VariantType.insertion) + { + startPositionOffset= new PositionOffset( endPositionOffset.Position+1, endPositionOffset.Offset, $"{endPositionOffset.Position + 1}", endPositionOffset.HasStopCodonNotation); + } + + // sanity check: make sure we have coordinates + if (startPositionOffset == null || endPositionOffset == null) return null; + + var hgvsNotation = new HgvscNotation(refAllele, altAllele, transcript.Id.WithVersion, genomicChange, + startPositionOffset, endPositionOffset, transcript.Translation != null); + + // generic formatting + return hgvsNotation.ToString(); + } + + /// + /// Adjust positions by alt allele length + /// + internal static (int Start, int End, string RefAllele, int RegionStart, int RegionEnd) ShiftDuplication( + this ITranscriptRegion[] regions, int start, string altAllele, bool onReverseStrand) + { + int incrementLength = altAllele.Length; + int dupStart = onReverseStrand ? start + incrementLength - 1 : start - incrementLength; + int dupEnd = onReverseStrand ? dupStart - incrementLength + 1 : dupStart + incrementLength - 1; + + (int regionStart, _) = MappedPositionUtilities.FindRegion(regions, dupStart); + (int regionEnd, _) = MappedPositionUtilities.FindRegion(regions, dupEnd); + + return (dupStart, dupEnd, altAllele, regionStart, regionEnd); + } + + public static GenomicChange GetGenomicChange(IInterval interval, bool onReverseStrand, ISequence refSequence, ISimpleVariant variant) + { + // length of the reference allele. Negative lengths make no sense + int refLength = variant.End - variant.Start + 1; + if (refLength < 0) refLength = 0; + + // length of alternative allele + int altLength = variant.AltAllele.Length; + + // sanity check: make sure that the alleles are different + if (variant.RefAllele == variant.AltAllele) return GenomicChange.Unknown; + + // deletion + if (altLength == 0) return GenomicChange.Deletion; + + if (refLength == altLength) + { + // substitution + if (refLength == 1) return GenomicChange.Substitution; + + // inversion + string rcRefAllele = SequenceUtilities.GetReverseComplement(variant.RefAllele); + return variant.AltAllele == rcRefAllele ? GenomicChange.Inversion : GenomicChange.DelIns; + } + + // deletion/insertion + if (refLength != 0) return GenomicChange.DelIns; + + // If this is an insertion, we should check if the preceding reference nucleotides + // match the insertion. In that case it should be annotated as a multiplication. + bool isGenomicDuplicate = HgvsUtilities.IsDuplicateWithinInterval(refSequence, variant, interval, onReverseStrand); + + return isGenomicDuplicate ? GenomicChange.Duplication : GenomicChange.Insertion; + } + } + + public enum GenomicChange + { + Unknown, + Deletion, + Duplication, + DelIns, + Insertion, + Inversion, + Substitution + } +} diff --git a/VariantAnnotation/AnnotatedPositions/HgvsProtein.cs b/VariantAnnotation/AnnotatedPositions/HgvsProtein.cs new file mode 100644 index 00000000..225b256a --- /dev/null +++ b/VariantAnnotation/AnnotatedPositions/HgvsProtein.cs @@ -0,0 +1,436 @@ +using System; +using System.Buffers; +using Genome; +using VariantAnnotation.AnnotatedPositions.AminoAcids; +using VariantAnnotation.AnnotatedPositions.Transcript; +using Variants; + +namespace VariantAnnotation.AnnotatedPositions +{ + public static class HgvsProtein + { + private const char AnyAminoAcid = 'X'; + + // the extended CDS sequence starts where the normal CDS sequence starts, but continues until the end of the + // cDNA sequence + public static string GetHgvsProteinAnnotation(string proteinId, string hgvsCoding, + ReadOnlySpan extendedCdsSequence, string aaSequence, int cdsBegin, int cdsEnd, int aaBegin, + string refAminoAcids, string altAminoAcids, string altAllele, bool isReference, AminoAcid aminoAcid) + { + if (SkipHgvsProtein(isReference, cdsBegin, cdsEnd, hgvsCoding, altAllele)) return null; + + int aaEnd, refAlleleLen, altAlleleLen; + char refAminoAcid, altAminoAcid; + + (aaBegin, aaEnd, refAminoAcids, altAminoAcids, refAlleleLen, altAlleleLen, refAminoAcid, altAminoAcid) = + NormalizeAminoAcids(aaBegin, refAminoAcids, altAminoAcids, aaSequence); + + bool hasFrameshift = HasFrameshift(cdsBegin, cdsEnd, altAllele.Length); + var proteinCategory = GetProteinCategory(aaBegin, refAminoAcids, altAminoAcids, aaSequence, hasFrameshift); + + // convert these to substitutions + if ((proteinCategory == ProteinCategory.Insertion || proteinCategory == ProteinCategory.Duplication || + proteinCategory == ProteinCategory.DeletionInsertion) && altAminoAcid == AminoAcidCommon.StopCodon) + { + if (proteinCategory != ProteinCategory.DeletionInsertion) refAminoAcid = aaSequence[aaEnd]; + proteinCategory = ProteinCategory.Substitution; + } + + bool insertionBeforeTranscript = proteinCategory == ProteinCategory.Insertion && aaBegin == 1; + + if (proteinCategory == ProteinCategory.StartLost) + { + return UseStartLostNotation(proteinId, refAminoAcid, aaBegin); + } + + if (refAminoAcids == altAminoAcids || insertionBeforeTranscript) + { + return UseSilentNotation(hgvsCoding, refAminoAcid, aaBegin); + } + + if (proteinCategory == ProteinCategory.Substitution) + { + return UseSubstitutionNotation(proteinId, refAminoAcid, aaBegin, altAminoAcid); + } + + if (proteinCategory == ProteinCategory.Deletion) + { + return UseDeletionNotation(proteinId, refAminoAcid, aaBegin, refAminoAcids[refAlleleLen - 1], aaEnd); + } + + if (proteinCategory == ProteinCategory.Insertion) + { + int beforePosition = aaEnd; + int afterPosition = aaBegin; + return UseInsertionNotation(proteinId, aaSequence[beforePosition - 1], beforePosition, + aaSequence[afterPosition - 1], afterPosition, altAminoAcids); + } + + if (proteinCategory == ProteinCategory.Duplication) + { + int firstPosition = aaBegin - altAlleleLen; + return UseDuplicationNotation(proteinId, altAminoAcid, firstPosition, altAminoAcids[altAlleleLen - 1], + aaEnd); + } + + if (proteinCategory == ProteinCategory.DeletionInsertion) + { + return UseDeletionInsertionNotation(proteinId, refAminoAcids[0], aaBegin, + refAminoAcids[refAlleleLen - 1], aaEnd, altAminoAcids); + } + + // when dealing with frameshifts and extensions, we need to create an alternate AA sequence and find the + // first difference between the ref and alt AA sequences + string altAaSequence = GetAltPeptideSequence(extendedCdsSequence, cdsBegin, cdsEnd, altAllele, aminoAcid); + + (aaBegin, refAminoAcid, altAminoAcid) = FindFirstChangeAfterFrameshift(aaBegin, aaSequence, altAaSequence); + + if (altAminoAcid == AminoAcidCommon.StopCodon) + { + return refAminoAcid == AminoAcidCommon.StopCodon + ? UseSilentNotation(hgvsCoding, refAminoAcid, aaBegin) + : UseSubstitutionNotation(proteinId, refAminoAcid, aaBegin, altAminoAcid); + } + + if (refAminoAcid == AminoAcidCommon.StopCodon && altAminoAcid != AminoAcidCommon.StopCodon) + proteinCategory = ProteinCategory.Extension; + + int? newTerPosition; + + if (proteinCategory == ProteinCategory.Extension) + { + newTerPosition = CountAminoAcidsUntilNextStopCodon(altAaSequence, aaBegin); + return UseExtensionNotation(proteinId, aaBegin, altAminoAcid, newTerPosition); + } + + newTerPosition = CountAminoAcidsUntilNextStopCodon(altAaSequence, aaBegin - 1); + return UseFrameshiftNotation(proteinId, refAminoAcid, aaBegin, altAminoAcid, newTerPosition); + } + + internal static (int aaBegin, char refAminoAcid, char altAminoAcid) FindFirstChangeAfterFrameshift(int aaBegin, + string aaSequence, string altAaSequence) + { + char refAminoAcid = aaBegin < aaSequence.Length ? aaSequence[aaBegin - 1] : 'X'; + char altAminoAcid = aaBegin < altAaSequence.Length ? altAaSequence[aaBegin - 1] : 'X'; + + int maxPosition = Math.Min(aaSequence.Length, altAaSequence.Length); + + while (aaBegin <= maxPosition) + { + refAminoAcid = aaSequence[aaBegin - 1]; + altAminoAcid = altAaSequence[aaBegin - 1]; + if (refAminoAcid == AminoAcidCommon.StopCodon && altAminoAcid == AminoAcidCommon.StopCodon || + refAminoAcid != altAminoAcid) break; + aaBegin++; + } + + return (aaBegin, refAminoAcid, altAminoAcid); + } + + // https://varnomen.hgvs.org/recommendations/protein/variant/extension/ + // both N-terminal & C-terminal are defined, but only C-terminal extensions are implemented + private static string UseExtensionNotation(string proteinId, int position, char altAminoAcid, + int? newTerPosition) + { + string altAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(altAminoAcid); + string terminalPositionSuffix = GetTerminalPositionSuffix(newTerPosition); + return $"{proteinId}:p.(Ter{position}{altAbbreviation}extTer{terminalPositionSuffix})"; + } + + // https://varnomen.hgvs.org/recommendations/protein/variant/frameshift/ + private static string UseFrameshiftNotation(string proteinId, char refAminoAcid, int position, + char altAminoAcid, int? newTerPosition) + { + string refAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(refAminoAcid); + string altAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(altAminoAcid); + string terminalPositionSuffix = GetTerminalPositionSuffix(newTerPosition); + return $"{proteinId}:p.({refAbbreviation}{position}{altAbbreviation}fsTer{terminalPositionSuffix})"; + } + + private static string GetTerminalPositionSuffix(int? newTerPosition) => newTerPosition switch + { + 0 => "", + null => "?", + _ => newTerPosition.Value.ToString() + }; + + // https://varnomen.hgvs.org/recommendations/protein/variant/delins/ + private static string UseDeletionInsertionNotation(string proteinId, char firstAminoAcid, int firstPosition, + char lastAminoAcid, int lastPosition, string insertedAminoAcids) + { + string firstAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(firstAminoAcid); + string lastAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(lastAminoAcid); + string insertedAbbreviations = AminoAcidAbbreviation.ConvertToThreeLetterAbbreviations(insertedAminoAcids); + + return firstPosition == lastPosition + ? $"{proteinId}:p.({firstAbbreviation}{firstPosition}delins{insertedAbbreviations})" + : $"{proteinId}:p.({firstAbbreviation}{firstPosition}_{lastAbbreviation}{lastPosition}delins{insertedAbbreviations})"; + } + + // https://varnomen.hgvs.org/recommendations/protein/variant/insertion/ + private static string UseInsertionNotation(string proteinId, char beforeAminoAcid, int beforePosition, + char afterAminoAcid, int afterPosition, string insertedAminoAcids) + { + string beforeAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(beforeAminoAcid); + string afterAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(afterAminoAcid); + string insertedAbbreviations = AminoAcidAbbreviation.ConvertToThreeLetterAbbreviations(insertedAminoAcids); + + return + $"{proteinId}:p.({beforeAbbreviation}{beforePosition}_{afterAbbreviation}{afterPosition}ins{insertedAbbreviations})"; + } + + // https://varnomen.hgvs.org/recommendations/protein/variant/duplication/ + private static string UseDuplicationNotation(string proteinId, char firstAminoAcid, int firstPosition, + char lastAminoAcid, int lastPosition) + { + string firstAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(firstAminoAcid); + string lastAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(lastAminoAcid); + return firstPosition == lastPosition + ? $"{proteinId}:p.({firstAbbreviation}{firstPosition}dup)" + : $"{proteinId}:p.({firstAbbreviation}{firstPosition}_{lastAbbreviation}{lastPosition}dup)"; + } + + // https://varnomen.hgvs.org/recommendations/protein/variant/deletion/ + private static string UseDeletionNotation(string proteinId, char firstAminoAcid, int firstPosition, + char lastAminoAcid, int lastPosition) + { + string firstAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(firstAminoAcid); + string lastAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(lastAminoAcid); + + return firstPosition == lastPosition + ? $"{proteinId}:p.({firstAbbreviation}{firstPosition}del)" + : $"{proteinId}:p.({firstAbbreviation}{firstPosition}_{lastAbbreviation}{lastPosition}del)"; + } + + // https://varnomen.hgvs.org/recommendations/protein/variant/substitution/ + private static string UseSubstitutionNotation(string proteinId, char refAminoAcid, int position, + char altAminoAcid) + { + string refAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(refAminoAcid); + string altAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(altAminoAcid); + return $"{proteinId}:p.({refAbbreviation}{position}{altAbbreviation})"; + } + + // specialized version of substitution + private static string UseSilentNotation(string hgvsCoding, char refAminoAcid, int position) + { + string refAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(refAminoAcid); + return $"{hgvsCoding}(p.({refAbbreviation}{position}=))"; + } + + private static string UseStartLostNotation(string proteinId, char refAminoAcid, int position) + { + string refAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(refAminoAcid); + return $"{proteinId}:p.{refAbbreviation}{position}?"; + } + + internal static ProteinCategory GetProteinCategory(int aaBegin, string refAminoAcids, string altAminoAcids, + string aaSequence, bool hasFrameshift) + { + int refLength = refAminoAcids.Length; + int altLength = altAminoAcids.Length; + + bool isInsertion = refLength == 0 && altLength != 0; + bool isDeletion = refLength != 0 && altLength == 0; + + bool truncatedByStop = IsTruncatedByStop(refAminoAcids, altAminoAcids); + bool startLost = IsStartLost(aaBegin, refAminoAcids, altAminoAcids); + + if (startLost) return ProteinCategory.StartLost; + + if (refAminoAcids.Contains(AminoAcidCommon.StopCodon) && !altAminoAcids.Contains(AminoAcidCommon.StopCodon)) + return ProteinCategory.Extension; + + if (hasFrameshift && !truncatedByStop) return ProteinCategory.Frameshift; + if (refLength == 1 && altLength == 1) return ProteinCategory.Substitution; + + if (isInsertion) + { + return IsDuplicate(aaBegin, altAminoAcids, aaSequence) + ? ProteinCategory.Duplication + : ProteinCategory.Insertion; + } + + return isDeletion ? ProteinCategory.Deletion : ProteinCategory.DeletionInsertion; + } + + internal static bool IsStartLost(int aaBegin, string refAminoAcids, string altAminoAcids) + { + if (aaBegin != 1) return false; + + // handle most SNVs/MNVs + if (refAminoAcids.Length > 0 && altAminoAcids.Length > 0) return refAminoAcids[0] != altAminoAcids[0]; + + // TODO: we might need to reconstruct the alt AA sequence to see what happens here + + return false; + } + + internal static bool IsTruncatedByStop(string refAminoAcids, string altAminoAcids) + { + if (altAminoAcids == "") return false; + + int stopPosition = altAminoAcids.IndexOf(AminoAcidCommon.StopCodon); + if (stopPosition == -1) return false; + + if (altAminoAcids[0] == AminoAcidCommon.StopCodon) return true; + + ReadOnlySpan refSpan = refAminoAcids.AsSpan(); + ReadOnlySpan altSpan = altAminoAcids.AsSpan().Slice(0, stopPosition); + return refSpan.StartsWith(altSpan); + } + + internal static bool IsDuplicate(int start, string altAminoAcids, string aaSequence) + { + ReadOnlySpan aaSpan = aaSequence.AsSpan(); + + int altLen = altAminoAcids.Length; + int testPosition = start - altLen - 1; + if (testPosition < 0) return false; + + ReadOnlySpan precedingSpan = aaSpan.Slice(testPosition, altLen); + return precedingSpan.Equals(altAminoAcids, StringComparison.Ordinal); + } + + private static bool SkipHgvsProtein(bool isReference, int cdsBegin, int cdsEnd, string hgvsCoding, + string altAllele) + { + return isReference || string.IsNullOrEmpty(hgvsCoding) || cdsBegin == -1 || cdsEnd == -1 || + SequenceUtilities.HasNonCanonicalBase(altAllele); + } + + private static (int aaBegin, int aaEnd, string refAminoAcids, string altAminoAcids, int refLength, int altLength + , char refAminoAcid, char altAminoAcid) NormalizeAminoAcids(int aaBegin, string refAminoAcids, + string altAminoAcids, string aaSequence) + { + refAminoAcids = RemoveAminoAcidsAfterStopCodon(refAminoAcids); + altAminoAcids = RemoveAminoAcidsAfterStopCodon(altAminoAcids); + + (aaBegin, refAminoAcids, altAminoAcids) = BiDirectionalTrimmer.Trim(aaBegin, refAminoAcids, altAminoAcids); + + int refLength = refAminoAcids.Length; + int altLength = altAminoAcids.Length; + bool isInsertion = refLength == 0 && altLength != 0; + bool isDeletion = refLength != 0 && altLength == 0; + + if (isInsertion || isDeletion) + { + (aaBegin, refAminoAcids, altAminoAcids) = + Rotate3Prime(refAminoAcids, altAminoAcids, aaBegin, aaSequence, isInsertion); + } + + int aaEnd = aaBegin + refAminoAcids.Length - 1; + if (aaEnd >= aaSequence.Length) aaEnd = aaSequence.Length - 1; + + char refAminoAcid = refLength > 0 ? refAminoAcids[0] : AnyAminoAcid; + char altAminoAcid = altLength > 0 ? altAminoAcids[0] : AnyAminoAcid; + + return (aaBegin, aaEnd, refAminoAcids, altAminoAcids, refLength, altLength, refAminoAcid, altAminoAcid); + } + + private static string RemoveAminoAcidsAfterStopCodon(string aminoAcids) + { + int stopPosition = aminoAcids.IndexOf(AminoAcidCommon.StopCodon); + return stopPosition == -1 ? aminoAcids : aminoAcids.Substring(0, stopPosition + 1); + } + + internal static string GetAltPeptideSequence(ReadOnlySpan cdsSpan, int cdsBegin, int cdsEnd, + string altAllele, AminoAcid aminoAcid) + { + ArrayPool charPool = ArrayPool.Shared; + ReadOnlySpan beforeSpan = cdsSpan.Slice(0, cdsBegin - 1); + ReadOnlySpan afterSpan = cdsSpan.Slice(cdsEnd); + + ReadOnlySpan altAlleleSpan = altAllele.AsSpan(); + int altAlleleLen = altAllele.Length; + + int bufferLen = beforeSpan.Length + altAlleleLen + afterSpan.Length; + char[] buffer = charPool.Rent(bufferLen); + Span bufferSpan = buffer.AsSpan(); + + // build our CDS sequence + beforeSpan.CopyTo(bufferSpan); + bufferSpan = bufferSpan.Slice(beforeSpan.Length); + altAlleleSpan.CopyTo(bufferSpan); + bufferSpan = bufferSpan.Slice(altAlleleLen); + afterSpan.CopyTo(bufferSpan); + + bufferSpan = buffer.AsSpan().Slice(0,bufferLen); + string aaSequence = aminoAcid.TranslateBases(bufferSpan); + charPool.Return(buffer); + + return aaSequence; + } + + // returns null if there are no stop codons + public static int? CountAminoAcidsUntilNextStopCodon(string aaSequence, int aaBegin) + { + ReadOnlySpan aaSpan = aaSequence.AsSpan().Slice(aaBegin - 1); + int termCodonPos = aaSpan.IndexOf(AminoAcidCommon.StopCodon); + return termCodonPos == -1 ? (int?) null : termCodonPos; + } + + // according to https://varnomen.hgvs.org/recommendations/checklist/#:~:text=The%203'%20rule, this should be + // applied to deletions, duplications, and insertions + internal static (int Start, string RefAminoAcids, string AltAminoAcids) Rotate3Prime(string refAminoAcids, + string altAminoAcids, int start, string peptides, bool isInsertion) + { + string aminoAcids = isInsertion ? altAminoAcids : refAminoAcids; + ReadOnlySpan aminoAcidsSpan = aminoAcids.AsSpan(); + int alleleLen = aminoAcids.Length; + int end = start + refAminoAcids.Length - 1; + + ArrayPool charPool = ArrayPool.Shared; + ReadOnlySpan peptideSpan = end >= peptides.Length ? null : peptides.AsSpan().Slice(end); + + int bufferLen = alleleLen + peptideSpan.Length; + char[] buffer = charPool.Rent(bufferLen); + Span bufferSpan = buffer.AsSpan(); + + aminoAcidsSpan.CopyTo(bufferSpan); + peptideSpan.CopyTo(bufferSpan.Slice(alleleLen)); + + var shiftStart = 0; + int shiftEnd = alleleLen; + + for (; shiftEnd < bufferLen; shiftStart++, shiftEnd++) + { + if (bufferSpan[shiftStart] != bufferSpan[shiftEnd]) break; + } + + if (shiftStart == 0) + { + charPool.Return(buffer); + return (start, refAminoAcids, altAminoAcids); + } + + aminoAcids = new string(bufferSpan.Slice(shiftStart, alleleLen)); + start += shiftStart; + charPool.Return(buffer); + + if (isInsertion) altAminoAcids = aminoAcids; + else refAminoAcids = aminoAcids; + + return (start, refAminoAcids, altAminoAcids); + } + + private static bool HasFrameshift(int cdsBegin, int cdsEnd, int altAlleleLength) + { + int refAlleleLen = cdsEnd - cdsBegin + 1; + return !Codons.IsTriplet(altAlleleLength - refAlleleLen); + } + + // https://varnomen.hgvs.org/recommendations/protein/ + internal enum ProteinCategory + { + Substitution, + Deletion, + Duplication, + Insertion, + DeletionInsertion, + Frameshift, + Extension, + StartLost + } + } +} \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/HgvsProteinNomenclature.cs b/VariantAnnotation/AnnotatedPositions/HgvsProteinNomenclature.cs index 66d9d144..64e527c9 100644 --- a/VariantAnnotation/AnnotatedPositions/HgvsProteinNomenclature.cs +++ b/VariantAnnotation/AnnotatedPositions/HgvsProteinNomenclature.cs @@ -1,164 +1,164 @@ -using Genome; -using OptimizedCore; -using VariantAnnotation.Algorithms; -using VariantAnnotation.AnnotatedPositions.Transcript; -using VariantAnnotation.Interface.AnnotatedPositions; -using Variants; - -namespace VariantAnnotation.AnnotatedPositions -{ - public static class HgvsProteinNomenclature - { - public static string GetHgvsProteinAnnotation( - ITranscript transcript, - string refAminoAcids, - string altAminoAcids, - string transcriptAltAllele, - IMappedPosition position, - VariantEffect variantEffect, - ISimpleVariant variant, - ISequence refSequence, - string hgvscNotation, - bool isMitochondrial) - { - if (IsHgvspNull(transcriptAltAllele, position.CdsStart, position.CdsEnd, variant, hgvscNotation)) return null; - - var peptideSeq = transcript.Translation.PeptideSeq; - - // Amino acid seq should never go past the stop codon - refAminoAcids = !refAminoAcids.EndsWith(AminoAcids.StopCodon) && refAminoAcids.Contains(AminoAcids.StopCodon) - ? refAminoAcids.OptimizedSplit(AminoAcids.StopCodon[0])[0] + AminoAcids.StopCodon - : refAminoAcids; - - int proteinStart = position.ProteinStart; - HgvsUtilities.ShiftAndRotateAlleles(ref proteinStart, ref refAminoAcids, ref altAminoAcids, peptideSeq); - - var end = proteinStart + refAminoAcids.Length - 1; - var refAbbreviation = AminoAcids.GetAbbreviations(refAminoAcids); - var altAbbreviation = AminoAcids.GetAbbreviations(altAminoAcids); - - var proteinId = transcript.Translation.ProteinId.WithVersion; - var proteinChange = GetProteinChange(proteinStart, refAminoAcids, altAminoAcids, peptideSeq, variantEffect); - - // ReSharper disable once SwitchStatementMissingSomeCases - switch (proteinChange) - { - case ProteinChange.Substitution: - return HgvspNotation.GetSubstitutionNotation(proteinId, proteinStart, refAbbreviation, altAbbreviation); - - case ProteinChange.Unknown: - return HgvspNotation.GetUnknownNotation(proteinId, proteinStart, end, refAbbreviation, altAbbreviation); - - case ProteinChange.Deletion: - return HgvspNotation.GetDeletionNotation(proteinId, proteinStart, end, refAbbreviation, variantEffect.IsStopGained()); - - case ProteinChange.Duplication: - proteinStart -= altAminoAcids.Length; - return HgvspNotation.GetDuplicationNotation(proteinId, proteinStart, end, altAbbreviation); - - case ProteinChange.Frameshift: - return GetHgvsFrameshiftNotation(refSequence, position.CdsStart, position.CdsEnd, transcriptAltAllele, - transcript, isMitochondrial, proteinId, proteinStart, end); - - case ProteinChange.None: - return HgvspNotation.GetSilentNotation(hgvscNotation, proteinStart, refAbbreviation, variantEffect.IsStopRetained()); - - case ProteinChange.DelIns: - return HgvspNotation.GetDelInsNotation(proteinId, proteinStart, end, refAbbreviation, altAbbreviation); - - case ProteinChange.Insertion: - Swap.Int(ref proteinStart, ref end); - return HgvspNotation.GetInsertionNotation(proteinId, proteinStart, end, altAbbreviation, peptideSeq); - - case ProteinChange.Extension: - var altPeptideSequence = HgvsUtilities.GetAltPeptideSequence(refSequence, position.CdsStart, position.CdsEnd, - transcriptAltAllele, transcript, isMitochondrial); - altAbbreviation = proteinStart <= altPeptideSequence.Length ? AminoAcids.ConvertAminoAcidToAbbreviation(altPeptideSequence[proteinStart - 1]): "Ter"; - var countToStop = HgvsUtilities.GetNumAminoAcidsUntilStopCodon(altPeptideSequence, peptideSeq, proteinStart - 1, false); - - return HgvspNotation.GetExtensionNotation(proteinId, proteinStart, refAbbreviation, altAbbreviation,countToStop); - - case ProteinChange.StartLost: - return HgvspNotation.GetStartLostNotation(proteinId, proteinStart, end, refAbbreviation); - } - - return null; - } - - private static string GetHgvsFrameshiftNotation(ISequence refSequence, int cdsBegin, int cdsEnd, - string transcriptAltAllele, ITranscript transcript, bool isMitochondrial, string proteinId, int start, - int end) - { - var peptideSeq = transcript.Translation.PeptideSeq; - var altPeptideSeq = HgvsUtilities.GetAltPeptideSequence(refSequence, cdsBegin, cdsEnd, transcriptAltAllele, transcript, isMitochondrial); - - if (start > end) Swap.Int(ref start, ref end); - - var frameshiftedParameters = HgvsUtilities.GetChangesAfterFrameshift(start, peptideSeq, altPeptideSeq); - - start = frameshiftedParameters.Item1; - var refAminoAcid = frameshiftedParameters.Item2; - var altAminoAcid = frameshiftedParameters.Item3; - - var refAbbreviation = AminoAcids.ConvertAminoAcidToAbbreviation(refAminoAcid); - - if (altAminoAcid == AminoAcids.StopCodonChar) - return HgvspNotation.GetSubstitutionNotation(proteinId, start, refAbbreviation, "Ter"); - - var altAbbreviation = AminoAcids.ConvertAminoAcidToAbbreviation(altAminoAcid); - var countToStop = HgvsUtilities.GetNumAminoAcidsUntilStopCodon(altPeptideSeq, peptideSeq, start - 1, true); - - return HgvspNotation.GetFrameshiftNotation(proteinId, start, refAbbreviation, altAbbreviation, countToStop); - } - - private static bool IsHgvspNull(string transcriptAltAllele, int cdsStart, int cdsEnd, ISimpleVariant variant, - string hgvscNotation) - { - return string.IsNullOrEmpty(hgvscNotation) || - variant.Type == VariantType.reference || - SequenceUtilities.HasNonCanonicalBase(transcriptAltAllele) || - cdsStart == -1 || - cdsEnd == -1; - } - - internal static ProteinChange GetProteinChange(int start, string refAminoAcids, string altAminoAcids, - string peptideSeq, IVariantEffect variantEffect) - { - var insertionBeforeTranscript = refAminoAcids.Length == 0 && start == 1; - if (refAminoAcids == altAminoAcids || variantEffect.IsStopRetained() || insertionBeforeTranscript) return ProteinChange.None; - - if (variantEffect.IsStartLost()) return ProteinChange.StartLost; - - // according to var nom, only if the Stop codon is effected, we call it an extension - if (variantEffect.IsStopLost() && refAminoAcids.OptimizedStartsWith(AminoAcids.StopCodonChar)) return ProteinChange.Extension; - - if (variantEffect.IsFrameshiftVariant()) return ProteinChange.Frameshift; - - if (altAminoAcids.Length > refAminoAcids.Length && - HgvsUtilities.IsAminoAcidDuplicate(start, altAminoAcids, peptideSeq)) return ProteinChange.Duplication; - - if (refAminoAcids.Length == 0 && altAminoAcids.Length != 0) return ProteinChange.Insertion; - - if (refAminoAcids.Length != 0 && altAminoAcids.Length == 0) return ProteinChange.Deletion; - - if (refAminoAcids.Length == 1 && altAminoAcids.Length == 1) return ProteinChange.Substitution; - - // the only remaining possibility is deletions/insertions - return ProteinChange.DelIns; - } - } - - public enum ProteinChange - { - Unknown, - Deletion, - Duplication, - Frameshift, - DelIns, - Insertion, - None, - Extension, - StartLost, - Substitution - } -} +using Genome; +using OptimizedCore; +using VariantAnnotation.Algorithms; +using VariantAnnotation.AnnotatedPositions.AminoAcids; +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Interface.AnnotatedPositions; +using Variants; + +namespace VariantAnnotation.AnnotatedPositions +{ + public static class HgvsProteinNomenclature + { + public static string GetHgvsProteinAnnotation(ITranscript transcript, string refAminoAcids, + string altAminoAcids, string transcriptAltAllele, IMappedPosition position, VariantEffect variantEffect, + ISimpleVariant variant, ISequence refSequence, string hgvscNotation, AminoAcid aminoAcids) + { + if (IsHgvspNull(transcriptAltAllele, position.CdsStart, position.CdsEnd, variant, hgvscNotation)) + return null; + + string peptideSeq = transcript.Translation.PeptideSeq; + + // Amino acid seq should never go past the stop codon + refAminoAcids = !refAminoAcids.EndsWith(AminoAcidCommon.StopCodon) && + refAminoAcids.Contains(AminoAcidCommon.StopCodon) + ? refAminoAcids.OptimizedSplit(AminoAcidCommon.StopCodon)[0] + AminoAcidCommon.StopCodon + : refAminoAcids; + + int proteinStart = position.ProteinStart; + HgvsUtilities.ShiftAndRotateAlleles(ref proteinStart, ref refAminoAcids, ref altAminoAcids, peptideSeq); + + int end = proteinStart + refAminoAcids.Length - 1; + string refAbbreviation = AminoAcidAbbreviation.ConvertToThreeLetterAbbreviations(refAminoAcids); + string altAbbreviation = AminoAcidAbbreviation.ConvertToThreeLetterAbbreviations(altAminoAcids); + + string proteinId = transcript.Translation.ProteinId.WithVersion; + var proteinChange = GetProteinChange(proteinStart, refAminoAcids, altAminoAcids, peptideSeq, variantEffect); + + // ReSharper disable once SwitchStatementMissingSomeCases + switch (proteinChange) + { + case ProteinChange.Substitution: + return HgvspNotation.GetSubstitutionNotation(proteinId, proteinStart, refAbbreviation, + altAbbreviation); + + case ProteinChange.Unknown: + return HgvspNotation.GetUnknownNotation(proteinId, proteinStart, end, refAbbreviation, altAbbreviation); + + case ProteinChange.Deletion: + return HgvspNotation.GetDeletionNotation(proteinId, proteinStart, end, refAbbreviation, variantEffect.IsStopGained()); + + case ProteinChange.Duplication: + proteinStart -= altAminoAcids.Length; + return HgvspNotation.GetDuplicationNotation(proteinId, proteinStart, end, altAbbreviation); + + case ProteinChange.Frameshift: + return GetHgvsFrameshiftNotation(refSequence, position.CdsStart, position.CdsEnd, transcriptAltAllele, + transcript, aminoAcids, position.ProteinStart, proteinId, proteinStart, end); + + case ProteinChange.None: + return HgvspNotation.GetSilentNotation(hgvscNotation, proteinStart, refAbbreviation, variantEffect.IsStopRetained()); + + case ProteinChange.DelIns: + return HgvspNotation.GetDelInsNotation(proteinId, proteinStart, end, refAbbreviation, altAbbreviation); + + case ProteinChange.Insertion: + Swap.Int(ref proteinStart, ref end); + return HgvspNotation.GetInsertionNotation(proteinId, proteinStart, end, altAbbreviation, peptideSeq); + + case ProteinChange.Extension: + string altPeptideSequence = HgvsUtilities.GetAltPeptideSequence(refSequence, position.CdsStart, + position.CdsEnd, transcriptAltAllele, transcript, aminoAcids, position.ProteinStart); + + altAbbreviation = proteinStart <= altPeptideSequence.Length + ? AminoAcidAbbreviation.GetThreeLetterAbbreviation(altPeptideSequence[proteinStart - 1]) + : "Ter"; + + int countToStop = HgvsUtilities.GetNumAminoAcidsUntilStopCodon(altPeptideSequence, peptideSeq, + proteinStart - 1, false); + + return HgvspNotation.GetExtensionNotation(proteinId, proteinStart, refAbbreviation, altAbbreviation,countToStop); + + case ProteinChange.StartLost: + return HgvspNotation.GetStartLostNotation(proteinId, refAbbreviation); + } + + return null; + } + + private static string GetHgvsFrameshiftNotation(ISequence refSequence, int cdsBegin, int cdsEnd, + string transcriptAltAllele, ITranscript transcript, AminoAcid aminoAcids, int aaBegin, string proteinId, + int start, int end) + { + string peptideSeq = transcript.Translation.PeptideSeq; + string altPeptideSeq = HgvsUtilities.GetAltPeptideSequence(refSequence, cdsBegin, cdsEnd, + transcriptAltAllele, transcript, aminoAcids, aaBegin); + + if (start > end) Swap.Int(ref start, ref end); + + char refAminoAcid, altAminoAcid; + (start, refAminoAcid, altAminoAcid) = + HgvsUtilities.GetChangesAfterFrameshift(start, peptideSeq, altPeptideSeq); + + string refAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(refAminoAcid); + + if (altAminoAcid == AminoAcidCommon.StopCodon) + return HgvspNotation.GetSubstitutionNotation(proteinId, start, refAbbreviation, "Ter"); + + string altAbbreviation = AminoAcidAbbreviation.GetThreeLetterAbbreviation(altAminoAcid); + int countToStop = HgvsUtilities.GetNumAminoAcidsUntilStopCodon(altPeptideSeq, peptideSeq, start - 1, true); + + return HgvspNotation.GetFrameshiftNotation(proteinId, start, refAbbreviation, altAbbreviation, countToStop); + } + + private static bool IsHgvspNull(string transcriptAltAllele, int cdsStart, int cdsEnd, ISimpleVariant variant, + string hgvscNotation) + { + return string.IsNullOrEmpty(hgvscNotation) || + variant.Type == VariantType.reference || + SequenceUtilities.HasNonCanonicalBase(transcriptAltAllele) || + cdsStart == -1 || + cdsEnd == -1; + } + + internal static ProteinChange GetProteinChange(int start, string refAminoAcids, string altAminoAcids, + string peptideSeq, IVariantEffect variantEffect) + { + bool insertionBeforeTranscript = refAminoAcids.Length == 0 && start == 1; + if (refAminoAcids == altAminoAcids || variantEffect.IsStopRetained() || insertionBeforeTranscript) return ProteinChange.None; + + if (variantEffect.IsStartLost()) return ProteinChange.StartLost; + + // according to var nom, only if the Stop codon is effected, we call it an extension + if (variantEffect.IsStopLost() && refAminoAcids.OptimizedStartsWith(AminoAcidCommon.StopCodon)) return ProteinChange.Extension; + + if (variantEffect.IsFrameshiftVariant()) return ProteinChange.Frameshift; + + if (altAminoAcids.Length > refAminoAcids.Length && + HgvsUtilities.IsAminoAcidDuplicate(start, altAminoAcids, peptideSeq)) return ProteinChange.Duplication; + + if (refAminoAcids.Length == 0 && altAminoAcids.Length != 0) return ProteinChange.Insertion; + + if (refAminoAcids.Length != 0 && altAminoAcids.Length == 0) return ProteinChange.Deletion; + + if (refAminoAcids.Length == 1 && altAminoAcids.Length == 1) return ProteinChange.Substitution; + + // the only remaining possibility is deletions/insertions + return ProteinChange.DelIns; + } + } + + public enum ProteinChange + { + Unknown, + Deletion, + Duplication, + Frameshift, + DelIns, + Insertion, + None, + Extension, + StartLost, + Substitution + } +} diff --git a/VariantAnnotation/AnnotatedPositions/HgvsUtilities.cs b/VariantAnnotation/AnnotatedPositions/HgvsUtilities.cs index 0851b596..9cde9e68 100644 --- a/VariantAnnotation/AnnotatedPositions/HgvsUtilities.cs +++ b/VariantAnnotation/AnnotatedPositions/HgvsUtilities.cs @@ -1,345 +1,315 @@ -using System; -using Genome; -using Intervals; -using OptimizedCore; -using VariantAnnotation.AnnotatedPositions.Transcript; -using VariantAnnotation.Caches.Utilities; -using VariantAnnotation.Interface.AnnotatedPositions; -using Variants; - -namespace VariantAnnotation.AnnotatedPositions -{ - public static class HgvsUtilities - { - public static void ShiftAndRotateAlleles(ref int start, ref string refAminoAcids, ref string altAminoAcids, string peptideSeq) - { - var trimmedAlleles = BiDirectionalTrimmer.Trim(start, refAminoAcids, altAminoAcids); - - start = trimmedAlleles.Start; - refAminoAcids = trimmedAlleles.RefAllele; - altAminoAcids = trimmedAlleles.AltAllele; - - var rotatedAlleles = Rotate3Prime(refAminoAcids, altAminoAcids, start, peptideSeq); - - start = rotatedAlleles.Start; - refAminoAcids = rotatedAlleles.RefAminoAcids; - altAminoAcids = rotatedAlleles.AltAminoAcids; - } - - internal static (int Start, string RefAminoAcids, string AltAminoAcids) Rotate3Prime(string refAminoAcids, string altAminoAcids, int start, string peptides) - { - if (!(string.IsNullOrEmpty(refAminoAcids) || string.IsNullOrEmpty(altAminoAcids))) - return (start, refAminoAcids, altAminoAcids); - - var isInsertion = !string.IsNullOrEmpty(altAminoAcids); - - // ReSharper disable once PossibleNullReferenceException - var end = start + refAminoAcids.Length - 1; - - // for insertion, the reference bases will be empty string. The shift should happen on the alternate allele - var rotatingPeptides = isInsertion ? altAminoAcids : refAminoAcids; - var numBases = rotatingPeptides.Length; - - var downstreamPeptides = peptides.Length >= end ? peptides.Substring(end) : null; - var combinedSequence = rotatingPeptides + downstreamPeptides; - - int shiftStart, shiftEnd; - var hasShifted = false; - - for (shiftStart = 0, shiftEnd = numBases; shiftEnd < combinedSequence.Length; shiftStart++, shiftEnd++) - { - if (combinedSequence[shiftStart] != combinedSequence[shiftEnd]) break; - start++; - hasShifted = true; - } - - if (hasShifted) rotatingPeptides = combinedSequence.Substring(shiftStart, numBases); - - if (isInsertion) altAminoAcids = rotatingPeptides; - else refAminoAcids = rotatingPeptides; - - return (start, refAminoAcids, altAminoAcids); - } - - /// - /// returns true if this insertion has the same amino acids preceding it [TranscriptVariationAllele.pm:1494 _check_for_peptide_duplication] - /// - public static bool IsAminoAcidDuplicate(int start, string altAminoAcids, string transcriptPeptides) - { - if (altAminoAcids == null || transcriptPeptides == null) return false; - - var testAminoAcidPos = start - altAminoAcids.Length - 1; - if (testAminoAcidPos < 0) return false; - - var precedingAminoAcids = testAminoAcidPos + altAminoAcids.Length <= transcriptPeptides.Length - ? transcriptPeptides.Substring(testAminoAcidPos, altAminoAcids.Length) - : ""; - - return testAminoAcidPos >= 0 && precedingAminoAcids == altAminoAcids; - } - - /// - /// returns the number of amino acids until the next stop codon is encountered [TranscriptVariationAllele.pm:1531 _stop_loss_extra_AA] - /// - public static int GetNumAminoAcidsUntilStopCodon(string altCds, string peptideSeq, int refVarPos, bool isFrameshift) - { - var numExtraAminoAcids = -1; - var refLen = peptideSeq.Length; - - // find the number of residues that are translated until a termination codon is encountered - var terPos = altCds.IndexOf('*'); - if (terPos != -1) - { - numExtraAminoAcids = terPos + 1 - (isFrameshift ? refVarPos : refLen + 1); - } - - // A special case is if the first aa is a stop codon => don't display the number of residues until the stop codon - return numExtraAminoAcids > 0 ? numExtraAminoAcids : -1; - } - - public static (int Start, char RefAminoAcid, char AltAminoAcid) GetChangesAfterFrameshift(int start, string peptideSeq, string altPeptideSeq) - { - start = Math.Min(start, peptideSeq.Length); - - // for deletions at the end of peptide sequence - if (start > altPeptideSeq.Length) return (start, peptideSeq[start - 1], '?'); - - var refPeptideSeq = peptideSeq + "*"; - char refAminoAcid = refPeptideSeq[start - 1]; - char altAminoAcid = altPeptideSeq[start - 1]; - - while (start <= altPeptideSeq.Length && start <= refPeptideSeq.Length) - { - refAminoAcid = refPeptideSeq[start - 1]; - altAminoAcid = altPeptideSeq[start - 1]; - - // variation at stop codon, but maintains stop codon - set to synonymous - if (refAminoAcid == '*' && altAminoAcid == '*' || refAminoAcid != altAminoAcid) break; - start++; - } - - return (start, refAminoAcid, altAminoAcid); - } - - /// - /// returns the translated coding sequence including the variant and the 3' UTR - /// - public static string GetAltPeptideSequence(ISequence refSequence, int cdsBegin, int cdsEnd, - string trancriptAltAllele, ITranscript transcript, bool isMitochondrial) - { - string altCds = TranscriptUtilities.GetAlternateCds(refSequence, cdsBegin, - cdsEnd, trancriptAltAllele, transcript.TranscriptRegions, - transcript.Gene.OnReverseStrand, transcript.StartExonPhase, - transcript.Translation.CodingRegion.CdnaStart); - - var aminoAcids = new AminoAcids(isMitochondrial); - return aminoAcids.TranslateBases(altCds, true); - } - - public static PositionOffset GetCdnaPositionOffset(ITranscript transcript, int position, int regionIndex, bool isRegionStart) - { - - if (!transcript.Overlaps(position, position)) return null; - - var region = transcript.TranscriptRegions[regionIndex]; - int codingRegionStart = transcript.Translation?.CodingRegion.CdnaStart ?? -1; - int codingRegionEnd = transcript.Translation?.CodingRegion.CdnaEnd ?? -1; - var po = GetPositionAndOffset(position, region, transcript.Gene.OnReverseStrand, isRegionStart); - if (po.Position == -1) return null; - - var cdnaCoord = GetCdnaCoord(po.Position, po.Offset, codingRegionStart, codingRegionEnd); - string offset = po.Offset == 0 ? "" : po.Offset.ToString("+0;-0;+0"); - string value = cdnaCoord.HasNoPosition ? "*" + po.Offset : cdnaCoord.CdnaCoord + offset; - - return new PositionOffset(po.Position, po.Offset, value, cdnaCoord.HasStopCodonNotation); - } - - private static (int Position, int Offset) GetPositionAndOffset(int position, ITranscriptRegion region, bool onReverseStrand, bool isRegionStart) - { - int cdsPos = -1; - int offset = -1; - switch (region.Type) - { - case TranscriptRegionType.Exon: - cdsPos = region.CdnaStart + (onReverseStrand ? region.End - position : position - region.Start); - offset = 0; - break; - case TranscriptRegionType.Gap: - (cdsPos, offset) = GetGapPositionAndOffset(region, isRegionStart); - break; - case TranscriptRegionType.Intron: - (cdsPos, offset) = GetIntronPositionAndOffset(position, region, onReverseStrand); - break; - } - - //var rnaEditOffset = GetRnaEditOffset(cdsPos, rnaEdits); - //cdsPos -= rnaEditOffset; - return (cdsPos, offset); - - } - - public static int GetRnaEditOffset(int position, IRnaEdit[] rnaEdits) - { - var rnaEditOffset = 0; - if (rnaEdits == null) return rnaEditOffset; - - RnaEditUtilities.SetTypesAndSort(rnaEdits); - - foreach (var rnaEdit in rnaEdits) - { - if (rnaEdit.Start > position) break; - - switch (rnaEdit.Type) - { - case VariantType.insertion: - rnaEditOffset += rnaEdit.Bases.Length; - break; - case VariantType.deletion: - rnaEditOffset -= rnaEdit.End - rnaEdit.Start + 1; - break; - } - } - - return rnaEditOffset; - } - - private static (int Position, int Offset) GetIntronPositionAndOffset(int position, ITranscriptRegion region, - bool onReverseStrand) - { - int leftDist = position - region.Start + 1; - int rightDist = region.End - position + 1; - - int offset = Math.Min(leftDist, rightDist); - if (!onReverseStrand && rightDist < leftDist || onReverseStrand && rightDist > leftDist) offset = -offset; - - // cDNA position truth table - // - // forward reverse - // ------------------------- - // L < R | CdnaStart | CdnaEnd | - // L = R | CdnaStart | CdnaStart | - // L > R | CdnaEnd | CdnaStart | - // ------------------------- - - int cdnaPosition = leftDist < rightDist && onReverseStrand || leftDist > rightDist && !onReverseStrand - ? region.CdnaEnd - : region.CdnaStart; - - return (cdnaPosition, offset); - } - - private static (int Position, int Offset) GetGapPositionAndOffset(ITranscriptRegion region, bool isRegionStart) - { - return isRegionStart ? (region.CdnaEnd, 0) : (region.CdnaStart, 0); - } - - private static (string CdnaCoord, bool HasStopCodonNotation, bool HasNoPosition) GetCdnaCoord(int position, - int offset, int codingRegionStart, int codingRegionEnd) - { - string cdnaCoord = null; - bool hasStopCodonNotation = false; - bool hasNoPosition = false; - - if (codingRegionEnd != -1) - { - - if (position > codingRegionEnd) - { - cdnaCoord = "*" + (position - codingRegionEnd); - hasStopCodonNotation = true; - } - // else if (offset != 0 && position == codingRegionEnd) - // { - // cdnaCoord = "*"; - // hasStopCodonNotation = true; - // hasNoPosition = true; - // } - } - - if (!hasStopCodonNotation && codingRegionStart != -1) - { - cdnaCoord = (position + (position >= codingRegionStart ? 1 : 0) - codingRegionStart).ToString(); - } - - if (cdnaCoord == null) cdnaCoord = position.ToString(); - return (cdnaCoord, hasStopCodonNotation, hasNoPosition); - } - - public static string GetTranscriptAllele(string variantAllele, bool onReverseStrand) => - onReverseStrand ? SequenceUtilities.GetReverseComplement(variantAllele) : variantAllele; - - public static string FormatDnaNotation(string start, string end, string referenceId, string referenceBases, - string alternateBases, GenomicChange type, char notationType) - { - var sb = StringBuilderCache.Acquire(); - - // all start with transcript name & numbering type - sb.Append(referenceId + ':' + notationType + '.'); - - // handle single and multiple positions - string coordinates = start == end - ? start - : start + '_' + end; - - // format rest of string according to type - // note: inversion and multiple are never assigned as genomic changes - // ReSharper disable once SwitchStatementMissingSomeCases - switch (type) - { - case GenomicChange.Deletion: - sb.Append(coordinates + "del"); - break; - case GenomicChange.Inversion: - sb.Append(coordinates + "inv" + referenceBases); - break; - case GenomicChange.Duplication: - sb.Append(coordinates + "dup"); - break; - case GenomicChange.Substitution: - if (referenceBases == alternateBases) - { - sb.Append(start + '='); - } - else - { - sb.Append(start + referenceBases + '>' + alternateBases); - } - break; - case GenomicChange.DelIns: - // NOTE: change to delins, now use del--ins-- to reduce anavarin differences - sb.Append(coordinates + "delins" + alternateBases); - break; - case GenomicChange.Insertion: - sb.Append(coordinates + "ins" + alternateBases); - break; - - default: - throw new InvalidOperationException("Unhandled genomic change found: " + type); - } - - return StringBuilderCache.GetStringAndRelease(sb); - } - - public static bool IsDuplicateWithinInterval(ISequence refSequence, ISimpleVariant variant, IInterval interval, bool onReverseStrand) - { - if (variant.Type != VariantType.insertion) return false; - - int altAlleleLen = variant.AltAllele.Length; - string compareRegion; - - if (onReverseStrand) - { - if (variant.End + altAlleleLen > interval.End) return false; - compareRegion = refSequence.Substring(variant.Start - 1, altAlleleLen); - } - else - { - if (variant.Start - altAlleleLen < interval.Start) return false; - compareRegion = refSequence.Substring(variant.End - altAlleleLen, altAlleleLen); - - } - - return compareRegion == variant.AltAllele; - } - } +using System; +using Genome; +using Intervals; +using OptimizedCore; +using VariantAnnotation.AnnotatedPositions.AminoAcids; +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Interface.AnnotatedPositions; +using Variants; + +namespace VariantAnnotation.AnnotatedPositions +{ + public static class HgvsUtilities + { + public static void ShiftAndRotateAlleles(ref int start, ref string refAminoAcids, ref string altAminoAcids, + string peptideSeq) + { + (start, refAminoAcids, altAminoAcids) = BiDirectionalTrimmer.Trim(start, refAminoAcids, altAminoAcids); + (start, refAminoAcids, altAminoAcids) = Rotate3Prime(refAminoAcids, altAminoAcids, start, peptideSeq); + } + + internal static (int Start, string RefAminoAcids, string AltAminoAcids) Rotate3Prime(string refAminoAcids, + string altAminoAcids, int start, string peptides) + { + if (!(string.IsNullOrEmpty(refAminoAcids) || string.IsNullOrEmpty(altAminoAcids))) + return (start, refAminoAcids, altAminoAcids); + + bool isInsertion = !string.IsNullOrEmpty(altAminoAcids); + + // ReSharper disable once PossibleNullReferenceException + int end = start + refAminoAcids.Length - 1; + + // for insertion, the reference bases will be empty string. The shift should happen on the alternate allele + string rotatingPeptides = isInsertion ? altAminoAcids : refAminoAcids; + int numBases = rotatingPeptides.Length; + + string downstreamPeptides = peptides.Length >= end ? peptides.Substring(end) : null; + string combinedSequence = rotatingPeptides + downstreamPeptides; + + int shiftStart, shiftEnd; + var hasShifted = false; + + for (shiftStart = 0, shiftEnd = numBases; shiftEnd < combinedSequence.Length; shiftStart++, shiftEnd++) + { + if (combinedSequence[shiftStart] != combinedSequence[shiftEnd]) break; + start++; + hasShifted = true; + } + + if (hasShifted) rotatingPeptides = combinedSequence.Substring(shiftStart, numBases); + + if (isInsertion) altAminoAcids = rotatingPeptides; + else refAminoAcids = rotatingPeptides; + + return (start, refAminoAcids, altAminoAcids); + } + + /// + /// returns true if this insertion has the same amino acids preceding it [TranscriptVariationAllele.pm:1494 _check_for_peptide_duplication] + /// + public static bool IsAminoAcidDuplicate(int start, string altAminoAcids, string transcriptPeptides) + { + if (altAminoAcids == null || transcriptPeptides == null) return false; + + int testAminoAcidPos = start - altAminoAcids.Length - 1; + if (testAminoAcidPos < 0) return false; + + string precedingAminoAcids = testAminoAcidPos + altAminoAcids.Length <= transcriptPeptides.Length + ? transcriptPeptides.Substring(testAminoAcidPos, altAminoAcids.Length) + : ""; + + return precedingAminoAcids == altAminoAcids; + } + + /// + /// returns the number of amino acids until the next stop codon is encountered [TranscriptVariationAllele.pm:1531 _stop_loss_extra_AA] + /// + public static int GetNumAminoAcidsUntilStopCodon(string altCds, string peptideSeq, int refVarPos, + bool isFrameshift) + { + int numExtraAminoAcids = -1; + int refLen = peptideSeq.Length; + + // find the number of residues that are translated until a termination codon is encountered + int terPos = altCds.IndexOf('*'); + if (terPos != -1) + { + numExtraAminoAcids = terPos + 1 - (isFrameshift ? refVarPos : refLen + 1); + } + + // A special case is if the first aa is a stop codon => don't display the number of residues until the stop codon + return numExtraAminoAcids > 0 ? numExtraAminoAcids : -1; + } + + public static (int Start, char RefAminoAcid, char AltAminoAcid) GetChangesAfterFrameshift(int start, + string peptideSeq, string altPeptideSeq) + { + start = Math.Min(start, peptideSeq.Length); + + // for deletions at the end of peptide sequence + if (start > altPeptideSeq.Length) return (start, peptideSeq[start - 1], 'X'); + + string refPeptideSeq = peptideSeq + "*"; + char refAminoAcid = refPeptideSeq[start - 1]; + char altAminoAcid = altPeptideSeq[start - 1]; + + while (start <= altPeptideSeq.Length && start <= refPeptideSeq.Length) + { + refAminoAcid = refPeptideSeq[start - 1]; + altAminoAcid = altPeptideSeq[start - 1]; + + // variation at stop codon, but maintains stop codon - set to synonymous + if (refAminoAcid == '*' && altAminoAcid == '*' || refAminoAcid != altAminoAcid) break; + start++; + } + + return (start, refAminoAcid, altAminoAcid); + } + + /// + /// returns the translated coding sequence including the variant and the 3' UTR + /// + public static string GetAltPeptideSequence(ISequence refSequence, int cdsBegin, int cdsEnd, + string transcriptAltAllele, ITranscript transcript, AminoAcid aminoAcids, int aaStart) + { + string altCds = TranscriptUtilities.GetAlternateCds(refSequence, cdsBegin, cdsEnd, transcriptAltAllele, + transcript.TranscriptRegions, transcript.Gene.OnReverseStrand, transcript.StartExonPhase, + transcript.Translation.CodingRegion.CdnaStart); + + return aminoAcids.TranslateBases(altCds, transcript.AminoAcidEdits, aaStart, true); + } + + public static PositionOffset GetCdnaPositionOffset(ITranscript transcript, int position, int regionIndex, + bool isRegionStart) + { + if (!transcript.Overlaps(position, position)) return null; + + var region = transcript.TranscriptRegions[regionIndex]; + int codingRegionStart = transcript.Translation?.CodingRegion.CdnaStart ?? -1; + int codingRegionEnd = transcript.Translation?.CodingRegion.CdnaEnd ?? -1; + var po = GetPositionAndOffset(position, region, transcript.Gene.OnReverseStrand, isRegionStart); + if (po.Position == -1) return null; + + var cdnaCoord = GetCdnaCoord(po.Position, codingRegionStart, codingRegionEnd); + string offset = po.Offset == 0 ? "" : po.Offset.ToString("+0;-0;+0"); + string value = cdnaCoord.CdnaCoord + offset; + + return new PositionOffset(po.Position, po.Offset, value, cdnaCoord.HasStopCodonNotation); + } + + private static (int Position, int Offset) GetPositionAndOffset(int position, ITranscriptRegion region, + bool onReverseStrand, bool isRegionStart) + { + int cdsPos = -1; + int offset = -1; + + // ReSharper disable once SwitchStatementHandlesSomeKnownEnumValuesWithDefault + switch (region.Type) + { + case TranscriptRegionType.Exon: + cdsPos = region.CdnaStart + (onReverseStrand ? region.End - position : position - region.Start); + offset = 0; + break; + case TranscriptRegionType.Gap: + (cdsPos, offset) = GetGapPositionAndOffset(region, isRegionStart); + break; + case TranscriptRegionType.Intron: + (cdsPos, offset) = GetIntronPositionAndOffset(position, region, onReverseStrand); + break; + } + + return (cdsPos, offset); + } + + private static (int Position, int Offset) GetIntronPositionAndOffset(int position, ITranscriptRegion region, + bool onReverseStrand) + { + int leftDist = position - region.Start + 1; + int rightDist = region.End - position + 1; + + int offset = Math.Min(leftDist, rightDist); + if (!onReverseStrand && rightDist < leftDist || onReverseStrand && rightDist > leftDist) offset = -offset; + + // cDNA position truth table + // + // forward reverse + // ------------------------- + // L < R | CdnaStart | CdnaEnd | + // L = R | CdnaStart | CdnaStart | + // L > R | CdnaEnd | CdnaStart | + // ------------------------- + + int cdnaPosition = leftDist < rightDist && onReverseStrand || leftDist > rightDist && !onReverseStrand + ? region.CdnaEnd + : region.CdnaStart; + + return (cdnaPosition, offset); + } + + private static (int Position, int Offset) + GetGapPositionAndOffset(ITranscriptRegion region, bool isRegionStart) => + isRegionStart ? (region.CdnaEnd, 0) : (region.CdnaStart, 0); + + private static (string CdnaCoord, bool HasStopCodonNotation) GetCdnaCoord(int position, int codingRegionStart, + int codingRegionEnd) + { + string cdnaCoord = null; + var hasStopCodonNotation = false; + + if (codingRegionEnd != -1) + { + if (position > codingRegionEnd) + { + cdnaCoord = "*" + (position - codingRegionEnd); + hasStopCodonNotation = true; + } + } + + if (!hasStopCodonNotation && codingRegionStart != -1) + { + cdnaCoord = (position + (position >= codingRegionStart ? 1 : 0) - codingRegionStart).ToString(); + } + + if (cdnaCoord == null) cdnaCoord = position.ToString(); + return (cdnaCoord, hasStopCodonNotation); + } + + public static string AdjustTranscriptRefAllele(string transcriptRefAllele, int coveredCdnaStart, + int coveredCdnaEnd, ISequence cdnaSequence) + { + if (coveredCdnaStart == -1 || coveredCdnaEnd == -1 || cdnaSequence == null) return transcriptRefAllele; + + return coveredCdnaEnd < coveredCdnaStart + ? string.Empty + : cdnaSequence.Substring(coveredCdnaStart - 1, coveredCdnaEnd - coveredCdnaStart + 1); + } + + public static string GetTranscriptAllele(string variantAllele, bool onReverseStrand) => + onReverseStrand ? SequenceUtilities.GetReverseComplement(variantAllele) : variantAllele; + + public static string FormatDnaNotation(string start, string end, string referenceId, string referenceBases, + string alternateBases, GenomicChange type, char notationType) + { + var sb = StringBuilderCache.Acquire(); + + // all start with transcript name & numbering type + sb.Append(referenceId + ':' + notationType + '.'); + + // handle single and multiple positions + string coordinates = start == end + ? start + : start + '_' + end; + + // format rest of string according to type + // note: inversion and multiple are never assigned as genomic changes + // ReSharper disable once SwitchStatementMissingSomeCases + switch (type) + { + case GenomicChange.Deletion: + sb.Append(coordinates + "del"); + break; + case GenomicChange.Inversion: + sb.Append(coordinates + "inv"); + break; + case GenomicChange.Duplication: + sb.Append(coordinates + "dup"); + break; + case GenomicChange.Substitution: + if (referenceBases == alternateBases) + { + sb.Append(start + '='); + } + else + { + sb.Append(start + referenceBases + '>' + alternateBases); + } + + break; + case GenomicChange.DelIns: + // NOTE: change to delins, now use del--ins-- to reduce Anavrin differences + sb.Append(coordinates + "delins" + alternateBases); + break; + case GenomicChange.Insertion: + sb.Append(coordinates + "ins" + alternateBases); + break; + + default: + throw new InvalidOperationException("Unhandled genomic change found: " + type); + } + + return StringBuilderCache.GetStringAndRelease(sb); + } + + public static bool IsDuplicateWithinInterval(ISequence refSequence, ISimpleVariant variant, IInterval interval, + bool onReverseStrand) + { + if (variant.Type != VariantType.insertion) return false; + + int altAlleleLen = variant.AltAllele.Length; + string compareRegion; + + if (onReverseStrand) + { + if (variant.End + altAlleleLen > interval.End) return false; + compareRegion = refSequence.Substring(variant.Start - 1, altAlleleLen); + } + else + { + if (variant.Start - altAlleleLen < interval.Start) return false; + compareRegion = refSequence.Substring(variant.End - altAlleleLen, altAlleleLen); + } + + return compareRegion == variant.AltAllele; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/HgvscNotation.cs b/VariantAnnotation/AnnotatedPositions/HgvscNotation.cs index 59a9b853..8743b8ee 100644 --- a/VariantAnnotation/AnnotatedPositions/HgvscNotation.cs +++ b/VariantAnnotation/AnnotatedPositions/HgvscNotation.cs @@ -1,73 +1,73 @@ -namespace VariantAnnotation.AnnotatedPositions -{ - public sealed class PositionOffset - { - public readonly int Position; - public readonly int Offset; - public readonly string Value; - public readonly bool HasStopCodonNotation; - - public PositionOffset(int position, int offset, string value, bool hasStopCodonNotation) - { - Position = position; - Offset = offset; - Value = value; - HasStopCodonNotation = hasStopCodonNotation; - } - } - - public sealed class HgvscNotation - { - private readonly string _referenceBases; - private readonly string _alternateBases; - - private PositionOffset _start; - private PositionOffset _end; - - private readonly string _transcriptId; - - private readonly char _transcriptType; - - private readonly GenomicChange _type; - - private const char CodingType = 'c'; - private const char NonCodingType = 'n'; - - public HgvscNotation(string referenceBases, string alternateBases, string transcriptId, GenomicChange changeType, PositionOffset start, PositionOffset end, bool isCoding) - { - _transcriptId = transcriptId; - _start = start; - _end = end; - _type = changeType; - - SwapEndpoints(); - - _referenceBases = referenceBases ?? ""; - _alternateBases = alternateBases ?? ""; - - _transcriptType = isCoding ? CodingType : NonCodingType; - } - - /// - /// HGVS aligns changes 3' - /// e.g. given a ATG/- deletion in C[ATG]ATGT, we want to move to: CATG[ATG]T - /// given a A/- deletion in TA[A]AAAA, we want to move to: TAAAAA[A] - /// given a AA/- deletion in TA[AA]AAA, we want to move to: TAAAA[AA] - /// - private void SwapEndpoints() - { - if (_start.Position <= _end.Position && - (_start.Position != _end.Position || _start.Offset <= _end.Offset)) return; - - var temp = _start; - _start = _end; - _end = temp; - } - - public override string ToString() - { - return HgvsUtilities.FormatDnaNotation(_start.Value, _end.Value, _transcriptId, _referenceBases, - _alternateBases, _type, _transcriptType); - } - } +namespace VariantAnnotation.AnnotatedPositions +{ + public sealed class PositionOffset + { + public readonly int Position; + public readonly int Offset; + public readonly string Value; + public readonly bool HasStopCodonNotation; + + public PositionOffset(int position, int offset, string value, bool hasStopCodonNotation) + { + Position = position; + Offset = offset; + Value = value; + HasStopCodonNotation = hasStopCodonNotation; + } + } + + public sealed class HgvscNotation + { + private readonly string _referenceBases; + private readonly string _alternateBases; + + private PositionOffset _start; + private PositionOffset _end; + + private readonly string _transcriptId; + + private readonly char _transcriptType; + + private readonly GenomicChange _type; + + private const char CodingType = 'c'; + private const char NonCodingType = 'n'; + + public HgvscNotation(string referenceBases, string alternateBases, string transcriptId, GenomicChange changeType, PositionOffset start, PositionOffset end, bool isCoding) + { + _transcriptId = transcriptId; + _start = start; + _end = end; + _type = changeType; + + SwapEndpoints(); + + _referenceBases = referenceBases ?? ""; + _alternateBases = alternateBases ?? ""; + + _transcriptType = isCoding ? CodingType : NonCodingType; + } + + /// + /// HGVS aligns changes 3' + /// e.g. given a ATG/- deletion in C[ATG]ATGT, we want to move to: CATG[ATG]T + /// given a A/- deletion in TA[A]AAAA, we want to move to: TAAAAA[A] + /// given a AA/- deletion in TA[AA]AAA, we want to move to: TAAAA[AA] + /// + private void SwapEndpoints() + { + if (_start.Position <= _end.Position && + (_start.Position != _end.Position || _start.Offset <= _end.Offset)) return; + + var temp = _start; + _start = _end; + _end = temp; + } + + public override string ToString() + { + return HgvsUtilities.FormatDnaNotation(_start.Value, _end.Value, _transcriptId, _referenceBases, + _alternateBases, _type, _transcriptType); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/HgvsgNotation.cs b/VariantAnnotation/AnnotatedPositions/HgvsgNotation.cs index 1049419b..a3867cc9 100644 --- a/VariantAnnotation/AnnotatedPositions/HgvsgNotation.cs +++ b/VariantAnnotation/AnnotatedPositions/HgvsgNotation.cs @@ -1,32 +1,32 @@ -using System; -using Genome; -using Intervals; -using Variants; - -namespace VariantAnnotation.AnnotatedPositions -{ - public static class HgvsgNotation - { - private const char NotationType = 'g'; - - public static string GetNotation(string refseqAccession, ISimpleVariant variant, ISequence refSequence, - IInterval referenceInterval) - { - var rotatedVariant = VariantRotator.Right(variant, referenceInterval, refSequence, false); - var start = Math.Min(rotatedVariant.Start, rotatedVariant.End); - var end = Math.Max(rotatedVariant.Start, rotatedVariant.End); - var referenceBases = rotatedVariant.RefAllele; - var alternateBases = rotatedVariant.AltAllele; - var type = HgvsCodingNomenclature.GetGenomicChange(referenceInterval, false, refSequence, rotatedVariant); - - if (type == GenomicChange.Duplication && variant.Type == VariantType.insertion) - { - referenceBases = alternateBases; - end = start; - start = end - referenceBases.Length + 1; - } - - return HgvsUtilities.FormatDnaNotation(start.ToString(), end.ToString(), refseqAccession, referenceBases, alternateBases, type, NotationType); - } - } +using System; +using Genome; +using Intervals; +using Variants; + +namespace VariantAnnotation.AnnotatedPositions +{ + public static class HgvsgNotation + { + private const char NotationType = 'g'; + + public static string GetNotation(string refseqAccession, ISimpleVariant variant, ISequence refSequence, + IInterval referenceInterval) + { + var rotatedVariant = VariantRotator.Right(variant, referenceInterval, refSequence, false); + var start = Math.Min(rotatedVariant.Start, rotatedVariant.End); + var end = Math.Max(rotatedVariant.Start, rotatedVariant.End); + var referenceBases = rotatedVariant.RefAllele; + var alternateBases = rotatedVariant.AltAllele; + var type = HgvsCodingNomenclature.GetGenomicChange(referenceInterval, false, refSequence, rotatedVariant); + + if (type == GenomicChange.Duplication && variant.Type == VariantType.insertion) + { + referenceBases = alternateBases; + end = start; + start = end - referenceBases.Length + 1; + } + + return HgvsUtilities.FormatDnaNotation(start.ToString(), end.ToString(), refseqAccession, referenceBases, alternateBases, type, NotationType); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/HgvspNotation.cs b/VariantAnnotation/AnnotatedPositions/HgvspNotation.cs index a6f7062f..1cb8c23c 100644 --- a/VariantAnnotation/AnnotatedPositions/HgvspNotation.cs +++ b/VariantAnnotation/AnnotatedPositions/HgvspNotation.cs @@ -1,96 +1,93 @@ -using VariantAnnotation.AnnotatedPositions.Transcript; - -namespace VariantAnnotation.AnnotatedPositions -{ - public static class HgvspNotation - { - public static string GetDelInsNotation(string proteinId, int start, int end, string refAbbreviation, string altAbbreviation) - { - if (altAbbreviation.StartsWith("Ter")) - return GetSubstitutionNotation(proteinId, start, refAbbreviation.Substring(0, 3), "Ter"); - - return start == end - ? $"{proteinId}:p.({refAbbreviation}{start}delins{altAbbreviation})" - : $"{proteinId}:p.({refAbbreviation.Substring(0, 3)}{start}_{refAbbreviation.Substring(refAbbreviation.Length - 3)}{end}delins{altAbbreviation})"; - } - - public static string GetInsertionNotation(string proteinId, int start, int end, string altAbbreviation, string peptideSeq) - { - // insertion past the last AA - if (end > peptideSeq.Length) return null; - - var leftFlankingAa = AminoAcids.ConvertAminoAcidToAbbreviation(peptideSeq[start - 1]); - if (altAbbreviation.StartsWith("Ter")) - { - var refAminoAcid = AminoAcids.ConvertAminoAcidToAbbreviation(peptideSeq[start]); - return $"{proteinId}:p.({refAminoAcid}{end}Ter)"; - } - - var rightFlankingAa = end > peptideSeq.Length ? "Ter" : AminoAcids.ConvertAminoAcidToAbbreviation(peptideSeq[end - 1]); - - return $"{proteinId}:p.({leftFlankingAa}{start}_{rightFlankingAa}{end}ins{altAbbreviation})"; - } - - public static string GetFrameshiftNotation(string proteinId, int start, string refAbbreviation, string altAbbreviation, int countToStop) - { - if (altAbbreviation.StartsWith("Ter")) - return $"{proteinId}:p.({refAbbreviation}{start}Ter)"; - - return countToStop > 0 ? - $"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation}fsTer{countToStop})" : - $"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation}fsTer?)"; - } - - public static string GetExtensionNotation(string proteinId, int start, string refAbbreviation, string altAbbreviation, int countToStop) - { - return countToStop > 0 ? - $"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation.Substring(0, 3)}extTer{countToStop})" : - $"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation.Substring(0, 3)}extTer?)"; - } - - public static string GetDuplicationNotation(string proteinId, int start, int end, string altAbbreviation) - { - return start == end ? - $"{proteinId}:p.({altAbbreviation}{start}dup)" : - $"{proteinId}:p.({altAbbreviation.Substring(0, 3)}{start}_{altAbbreviation.Substring(altAbbreviation.Length - 3)}{end}dup)"; - } - - - public static string GetStartLostNotation(string proteinId, int start, int end, string refAbbreviation) - { - return $"{proteinId}:p.?"; - } - - public static string GetSilentNotation(string hgvscNotation, int start, string refAbbreviation, bool isStopRetained) - { - return isStopRetained ? $"{hgvscNotation}(p.(Ter{start}=))" : $"{hgvscNotation}(p.({refAbbreviation}{start}=))"; - } - - internal static string GetSubstitutionNotation(string proteinId, int start, string refAbbreviation, string altAbbreviation) - { - // start lost - if (start == 1 && refAbbreviation != altAbbreviation) - return $"{proteinId}:p.({refAbbreviation}{start}?)"; - - return $"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation})"; - } - - internal static string GetUnknownNotation(string proteinId, int start, int end, string refAbbreviation, string altAbbreviation) - { - - return start == end - ? $"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation})" - : $"{proteinId}:p.({refAbbreviation}{start}_{altAbbreviation}{end})"; - } - - internal static string GetDeletionNotation(string proteinId, int start, int end, string refAbbreviation, bool isStopGained) - { - if (isStopGained) - return $"{proteinId}:p.({refAbbreviation}{start}Ter)"; - - return start == end ? - $"{proteinId}:p.({refAbbreviation}{start}del)" : - $"{proteinId}:p.({refAbbreviation.Substring(0, 3)}{start}_{refAbbreviation.Substring(refAbbreviation.Length - 3)}{end}del)"; - } - } +using VariantAnnotation.AnnotatedPositions.AminoAcids; + +namespace VariantAnnotation.AnnotatedPositions +{ + public static class HgvspNotation + { + public static string GetDelInsNotation(string proteinId, int start, int end, string refAbbreviation, string altAbbreviation) + { + if (altAbbreviation.StartsWith("Ter")) + return GetSubstitutionNotation(proteinId, start, refAbbreviation.Substring(0, 3), "Ter"); + + return start == end + ? $"{proteinId}:p.({refAbbreviation}{start}delins{altAbbreviation})" + : $"{proteinId}:p.({refAbbreviation.Substring(0, 3)}{start}_{refAbbreviation.Substring(refAbbreviation.Length - 3)}{end}delins{altAbbreviation})"; + } + + public static string GetInsertionNotation(string proteinId, int start, int end, string altAbbreviation, string peptideSeq) + { + // insertion past the last AA + if (end > peptideSeq.Length) return null; + + var leftFlankingAa = AminoAcidAbbreviation.GetThreeLetterAbbreviation(peptideSeq[start - 1]); + if (altAbbreviation.StartsWith("Ter")) + { + var refAminoAcid = AminoAcidAbbreviation.GetThreeLetterAbbreviation(peptideSeq[start]); + return $"{proteinId}:p.({refAminoAcid}{end}Ter)"; + } + + var rightFlankingAa = end > peptideSeq.Length ? "Ter" : AminoAcidAbbreviation.GetThreeLetterAbbreviation(peptideSeq[end - 1]); + + return $"{proteinId}:p.({leftFlankingAa}{start}_{rightFlankingAa}{end}ins{altAbbreviation})"; + } + + public static string GetFrameshiftNotation(string proteinId, int start, string refAbbreviation, string altAbbreviation, int countToStop) + { + if (altAbbreviation.StartsWith("Ter")) + return $"{proteinId}:p.({refAbbreviation}{start}Ter)"; + + return countToStop > 0 ? + $"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation}fsTer{countToStop})" : + $"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation}fsTer?)"; + } + + public static string GetExtensionNotation(string proteinId, int start, string refAbbreviation, string altAbbreviation, int countToStop) + { + return countToStop > 0 ? + $"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation.Substring(0, 3)}extTer{countToStop})" : + $"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation.Substring(0, 3)}extTer?)"; + } + + public static string GetDuplicationNotation(string proteinId, int start, int end, string altAbbreviation) + { + return start == end ? + $"{proteinId}:p.({altAbbreviation}{start}dup)" : + $"{proteinId}:p.({altAbbreviation.Substring(0, 3)}{start}_{altAbbreviation.Substring(altAbbreviation.Length - 3)}{end}dup)"; + } + + + public static string GetStartLostNotation(string proteinId, string refAbbreviation) => $"{proteinId}:p.{refAbbreviation.Substring(0, 3)}1?"; + + public static string GetSilentNotation(string hgvscNotation, int start, string refAbbreviation, bool isStopRetained) + { + return isStopRetained ? $"{hgvscNotation}(p.(Ter{start}=))" : $"{hgvscNotation}(p.({refAbbreviation}{start}=))"; + } + + internal static string GetSubstitutionNotation(string proteinId, int start, string refAbbreviation, string altAbbreviation) + { + // start lost + if (start == 1 && refAbbreviation != altAbbreviation) + return $"{proteinId}:p.({refAbbreviation}{start}?)"; + + return $"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation})"; + } + + internal static string GetUnknownNotation(string proteinId, int start, int end, string refAbbreviation, string altAbbreviation) + { + + return start == end + ? $"{proteinId}:p.({refAbbreviation}{start}{altAbbreviation})" + : $"{proteinId}:p.({refAbbreviation}{start}_{altAbbreviation}{end})"; + } + + internal static string GetDeletionNotation(string proteinId, int start, int end, string refAbbreviation, bool isStopGained) + { + if (isStopGained) + return $"{proteinId}:p.({refAbbreviation}{start}Ter)"; + + return start == end ? + $"{proteinId}:p.({refAbbreviation}{start}del)" : + $"{proteinId}:p.({refAbbreviation.Substring(0, 3)}{start}_{refAbbreviation.Substring(refAbbreviation.Length - 3)}{end}del)"; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/RegulatoryRegionAnnotator.cs b/VariantAnnotation/AnnotatedPositions/RegulatoryRegionAnnotator.cs index 8c5c248d..2f50aba2 100644 --- a/VariantAnnotation/AnnotatedPositions/RegulatoryRegionAnnotator.cs +++ b/VariantAnnotation/AnnotatedPositions/RegulatoryRegionAnnotator.cs @@ -1,20 +1,20 @@ -using VariantAnnotation.AnnotatedPositions.Consequence; -using VariantAnnotation.AnnotatedPositions.Transcript; -using VariantAnnotation.Interface.AnnotatedPositions; -using Variants; - -namespace VariantAnnotation.AnnotatedPositions -{ - public static class RegulatoryRegionAnnotator - { - public static IAnnotatedRegulatoryRegion Annotate(IVariant variant, IRegulatoryRegion regulatoryRegion) - { - var featureEffect = new FeatureVariantEffects(regulatoryRegion, variant.Type, variant, - variant.Behavior.StructuralVariantConsequence); - - var consequence = new Consequences(null, featureEffect); - consequence.DetermineRegulatoryVariantEffects(); - return new AnnotatedRegulatoryRegion(regulatoryRegion, consequence.GetConsequences()); - } - } +using VariantAnnotation.AnnotatedPositions.Consequence; +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Interface.AnnotatedPositions; +using Variants; + +namespace VariantAnnotation.AnnotatedPositions +{ + public static class RegulatoryRegionAnnotator + { + public static IAnnotatedRegulatoryRegion Annotate(IVariant variant, IRegulatoryRegion regulatoryRegion) + { + var featureEffect = new FeatureVariantEffects(regulatoryRegion, variant.Type, variant, + variant.Behavior.StructuralVariantConsequence); + + var consequence = new Consequences(null, featureEffect); + consequence.DetermineRegulatoryVariantEffects(); + return new AnnotatedRegulatoryRegion(regulatoryRegion, consequence.GetConsequences()); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/AminoAcids.cs b/VariantAnnotation/AnnotatedPositions/Transcript/AminoAcids.cs deleted file mode 100644 index 1e0f2f7f..00000000 --- a/VariantAnnotation/AnnotatedPositions/Transcript/AminoAcids.cs +++ /dev/null @@ -1,244 +0,0 @@ -using System; -using System.Collections.Generic; -using OptimizedCore; -using VariantAnnotation.TranscriptAnnotation; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - public sealed class AminoAcids - { - public const string StopCodon = "*"; - public const char StopCodonChar = '*'; - - private readonly CodonConversion _codonConversionScheme = CodonConversion.HumanChromosome; - - private readonly Dictionary _aminoAcidLookupTable; - private readonly Dictionary _mitoDifferences; - - // converts single letter amino acid ambiguity codes to three - // letter abbreviations - private static readonly Dictionary SingleToThreeAminoAcids = new Dictionary - { - {'A', "Ala"}, - {'B', "Asx"}, - {'C', "Cys"}, - {'D', "Asp"}, - {'E', "Glu"}, - {'F', "Phe"}, - {'G', "Gly"}, - {'H', "His"}, - {'I', "Ile"}, - {'K', "Lys"}, - {'L', "Leu"}, - {'M', "Met"}, - {'N', "Asn"}, - {'P', "Pro"}, - {'Q', "Gln"}, - {'R', "Arg"}, - {'S', "Ser"}, - {'T', "Thr"}, - {'V', "Val"}, - {'W', "Trp"}, - {'Y', "Tyr"}, - {'Z', "Glx"}, - {'X', "Ter"}, // Ter now recommended in HGVS - {'*', "Ter"}, - {'U', "Sec"}, - {'O', "Pyl"}, - {'J', "Xle"}, - {'?', "_?_"} //deletion at the end of incomplete transcript results in unknown change - }; - - private enum CodonConversion : byte - { - HumanChromosome, - HumanMitochondrion - } - - public AminoAcids(bool isMitochondrial) - { - if (isMitochondrial) _codonConversionScheme = CodonConversion.HumanMitochondrion; - - _aminoAcidLookupTable = new Dictionary - { - // 2nd base: T - {"TTT", 'F'}, - {"TTC", 'F'}, - {"TTA", 'L'}, - {"TTG", 'L'}, - {"CTT", 'L'}, - {"CTC", 'L'}, - {"CTA", 'L'}, - {"CTG", 'L'}, - {"ATT", 'I'}, - {"ATC", 'I'}, - {"ATA", 'I'}, - {"ATG", 'M'}, - {"GTT", 'V'}, - {"GTC", 'V'}, - {"GTA", 'V'}, - {"GTG", 'V'}, - - // 2nd base: C - {"TCT", 'S'}, - {"TCC", 'S'}, - {"TCA", 'S'}, - {"TCG", 'S'}, - {"CCT", 'P'}, - {"CCC", 'P'}, - {"CCA", 'P'}, - {"CCG", 'P'}, - {"ACT", 'T'}, - {"ACC", 'T'}, - {"ACA", 'T'}, - {"ACG", 'T'}, - {"GCT", 'A'}, - {"GCC", 'A'}, - {"GCA", 'A'}, - {"GCG", 'A'}, - - // 2nd base: A - {"TAT", 'Y'}, - {"TAC", 'Y'}, - {"TAA", '*'}, - {"TAG", '*'}, - {"CAT", 'H'}, - {"CAC", 'H'}, - {"CAA", 'Q'}, - {"CAG", 'Q'}, - {"AAT", 'N'}, - {"AAC", 'N'}, - {"AAA", 'K'}, - {"AAG", 'K'}, - {"GAT", 'D'}, - {"GAC", 'D'}, - {"GAA", 'E'}, - {"GAG", 'E'}, - - // 2nd base: G - {"TGT", 'C'}, - {"TGC", 'C'}, - {"TGA", '*'}, - {"TGG", 'W'}, - {"CGT", 'R'}, - {"CGC", 'R'}, - {"CGA", 'R'}, - {"CGG", 'R'}, - {"AGT", 'S'}, - {"AGC", 'S'}, - {"AGA", 'R'}, - {"AGG", 'R'}, - {"GGT", 'G'}, - {"GGC", 'G'}, - {"GGA", 'G'}, - {"GGG", 'G'} - }; - - _mitoDifferences = new Dictionary - { - {"ATA", 'M'}, - {"TGA", 'W'}, - {"AGA", '*'}, - {"AGG", '*'} - }; - } - - internal static string AddUnknownAminoAcid(string aminoAcids) => aminoAcids == StopCodon ? aminoAcids : aminoAcids + 'X'; - - public SequenceChange Translate(string referenceCodons, string alternateCodons) - { - if (string.IsNullOrEmpty(referenceCodons) && string.IsNullOrEmpty(alternateCodons)) return new SequenceChange("", ""); - if (referenceCodons != null && (referenceCodons.Contains("N") || alternateCodons.Contains("N"))) return new SequenceChange("", ""); - - var referenceAminoAcids = TranslateBases(referenceCodons, false); - var alternateAminoAcids = TranslateBases(alternateCodons, false); - return new SequenceChange(referenceAminoAcids, alternateAminoAcids); - } - - /// - /// converts a DNA triplet to the appropriate amino acid abbreviation - /// - public static string ConvertAminoAcidToAbbreviation(char aminoAcid) - { - if (!SingleToThreeAminoAcids.TryGetValue(aminoAcid, out var abbreviation)) - { - throw new NotSupportedException($"Unable to convert the following string to an amino acid abbreviation: {aminoAcid}"); - } - - return abbreviation; - } - - /// - /// converts a DNA triplet to the appropriate amino acid abbreviation - /// The default conversion is human chromosomes. The second parameter also allows the user to specify other codon conversions like mitochondria, etc. - /// - internal char ConvertTripletToAminoAcid(string triplet) - { - var upperTriplet = triplet.ToUpper(); - - // check our exceptions first - if (_codonConversionScheme == CodonConversion.HumanMitochondrion && - _mitoDifferences.TryGetValue(upperTriplet, out var mitoAminoAcid)) return mitoAminoAcid; - - // the default case - return _aminoAcidLookupTable.TryGetValue(upperTriplet, out var aminoAcid) ? aminoAcid : 'X'; - } - - - /// - /// given a string of 1-letter amino acid ambiguity codes, this function - /// returns a string of 3-letter amino acid abbreviations up until the first - /// stop codon. - /// - public static string GetAbbreviations(string aminoAcids) - { - if (string.IsNullOrEmpty(aminoAcids)) return ""; - if (aminoAcids.Length == 1) return ConvertAminoAcidToAbbreviation(aminoAcids[0]); - - var sb = StringBuilderCache.Acquire(); - - foreach (var aminoAcid in aminoAcids) - { - sb.Append(ConvertAminoAcidToAbbreviation(aminoAcid)); - } - - return StringBuilderCache.GetStringAndRelease(sb); - } - - /// - /// returns a string of single-letter amino acids translated from a string of bases. - /// The bases must already be grouped by triplets (i.e. len must be a multiple of 3) - /// - public string TranslateBases(string bases, bool forceNonTriplet) - { - // sanity check: handle the empty case - if (bases == null) return null; - - var numAminoAcids = bases.Length / 3; - - // check if we have a non triplet case - var nonTriplet = !forceNonTriplet && numAminoAcids * 3 != bases.Length; - - // special case: single amino acid - string aminoAcidString; - if (numAminoAcids == 1) - { - aminoAcidString = - ConvertTripletToAminoAcid(bases.Substring(0, 3 * numAminoAcids)) - .ToString(); - return nonTriplet ? AddUnknownAminoAcid(aminoAcidString) : aminoAcidString; - } - - // multiple amino acid case - var aminoAcids = new char[numAminoAcids]; - for (var i = 0; i < numAminoAcids; i++) - { - aminoAcids[i] = ConvertTripletToAminoAcid(bases.Substring(i * 3, 3)); - } - - aminoAcidString = new string(aminoAcids); - return nonTriplet ? AddUnknownAminoAcid(aminoAcidString) : aminoAcidString; - } - } -} - diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/AnnotatedTranscript.cs b/VariantAnnotation/AnnotatedPositions/Transcript/AnnotatedTranscript.cs index 6de5e10c..93274fd9 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/AnnotatedTranscript.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/AnnotatedTranscript.cs @@ -1,137 +1,137 @@ -using System.Collections.Generic; -using System.Globalization; -using System.Linq; -using System.Text; -using VariantAnnotation.Algorithms; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.IO; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - public sealed class AnnotatedTranscript : IAnnotatedTranscript - { - public ITranscript Transcript { get; } - public string ReferenceAminoAcids { get; } - public string AlternateAminoAcids { get; } - public string ReferenceCodons { get; } - public string AlternateCodons { get; } - public IMappedPosition MappedPosition { get; } - public string HgvsCoding { get; } - public string HgvsProtein { get; } - public PredictionScore Sift { get; } - public PredictionScore PolyPhen { get; } - public IEnumerable Consequences { get; } - public IGeneFusionAnnotation GeneFusionAnnotation { get; } - public IList PluginData { get; } - public bool CompleteOverlap { get; } - - public AnnotatedTranscript(ITranscript transcript, string referenceAminoAcids, string alternateAminoAcids, - string referenceCodons, string alternateCodons, IMappedPosition mappedPosition, string hgvsCoding, - string hgvsProtein, PredictionScore sift, PredictionScore polyphen, - IEnumerable consequences, IGeneFusionAnnotation geneFusionAnnotation, bool completeOverlap) - { - Transcript = transcript; - ReferenceAminoAcids = referenceAminoAcids; - AlternateAminoAcids = alternateAminoAcids; - ReferenceCodons = referenceCodons; - AlternateCodons = alternateCodons; - MappedPosition = mappedPosition; - HgvsCoding = hgvsCoding; - HgvsProtein = hgvsProtein; - Sift = sift; - PolyPhen = polyphen; - Consequences = consequences; - GeneFusionAnnotation = geneFusionAnnotation; - PluginData = new List(); - CompleteOverlap = completeOverlap; - } - - public void SerializeJson(StringBuilder sb) - { - var jsonObject = new JsonObject(sb); - - sb.Append(JsonObject.OpenBrace); - jsonObject.AddStringValue("transcript", Transcript.Id.WithVersion); - jsonObject.AddStringValue("source", Transcript.Source.ToString()); - if (!CompleteOverlap) jsonObject.AddStringValue("bioType", GetBioType(Transcript.BioType)); - jsonObject.AddStringValue("codons", GetCodonString(ReferenceCodons, AlternateCodons)); - jsonObject.AddStringValue("aminoAcids", GetAminoAcidString(ReferenceAminoAcids, AlternateAminoAcids)); - - if (MappedPosition != null) - { - jsonObject.AddStringValue("cdnaPos", GetRangeString(MappedPosition.CoveredCdnaStart, MappedPosition.CoveredCdnaEnd)); - jsonObject.AddStringValue("cdsPos", GetRangeString(MappedPosition.CoveredCdsStart, MappedPosition.CoveredCdsEnd)); - jsonObject.AddStringValue("exons", GetFractionString(MappedPosition.ExonStart, MappedPosition.ExonEnd, Transcript.NumExons)); - jsonObject.AddStringValue("introns", GetFractionString(MappedPosition.IntronStart, MappedPosition.IntronEnd, Transcript.NumExons - 1)); - jsonObject.AddStringValue("proteinPos", GetRangeString(MappedPosition.CoveredProteinStart, MappedPosition.CoveredProteinEnd)); - } - - var geneId = Transcript.Source == Source.Ensembl - ? Transcript.Gene.EnsemblId.ToString() - : Transcript.Gene.EntrezGeneId.ToString(); - - if (!CompleteOverlap) jsonObject.AddStringValue("geneId", geneId); - jsonObject.AddStringValue("hgnc", Transcript.Gene.Symbol); - jsonObject.AddStringValues("consequence", Consequences?.Select(ConsequenceUtil.GetConsequence)); - jsonObject.AddStringValue("hgvsc", HgvsCoding); - jsonObject.AddStringValue("hgvsp", HgvsProtein); - jsonObject.AddStringValue("geneFusion", GeneFusionAnnotation?.ToString(), false); - - jsonObject.AddBoolValue("isCanonical", Transcript.IsCanonical); - - jsonObject.AddDoubleValue("polyPhenScore", PolyPhen?.Score); - - jsonObject.AddStringValue("polyPhenPrediction", PolyPhen?.Prediction); - if (!CompleteOverlap && Transcript.Translation != null) jsonObject.AddStringValue("proteinId", Transcript.Translation.ProteinId.WithVersion); - - jsonObject.AddDoubleValue("siftScore", Sift?.Score); - - jsonObject.AddStringValue("siftPrediction", Sift?.Prediction); - - if (PluginData != null) - foreach (var pluginData in PluginData) - { - jsonObject.AddStringValue(pluginData.Name, pluginData.GetJsonString(), false); - } - - jsonObject.AddBoolValue("completeOverlap", CompleteOverlap); - - sb.Append(JsonObject.CloseBrace); - } - - public static string GetBioType(BioType bioType) => bioType == BioType.three_prime_overlapping_ncRNA - ? "3prime_overlapping_ncRNA" - : bioType.ToString(); - - private static string GetAminoAcidString(string a, string b) - { - if (a == b) return a; - a = string.IsNullOrEmpty(a) ? "-" : a; - b = string.IsNullOrEmpty(b) ? "-" : b; - return $"{a}/{b}"; - } - - private static string GetCodonString(string a, string b) - { - if (a == b && string.IsNullOrEmpty(a)) return a; - a = string.IsNullOrEmpty(a) ? "-" : a; - b = string.IsNullOrEmpty(b) ? "-" : b; - return $"{a}/{b}"; - } - - private static string GetRangeString(int start, int end) - { - if (start == -1 && end == -1) return null; - if (start == -1) return "?-" + end; - if (end == -1) return start + "-?"; - if (start > end) Swap.Int(ref start, ref end); - return start == end ? start.ToString(CultureInfo.InvariantCulture) : start + "-" + end; - } - - private static string GetFractionString(int start, int end, int total) - { - if (start == -1 && end == -1) return null; - return GetRangeString(start, end) + "/" + total; - } - } +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using System.Text; +using VariantAnnotation.Algorithms; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.IO; + +namespace VariantAnnotation.AnnotatedPositions.Transcript +{ + public sealed class AnnotatedTranscript : IAnnotatedTranscript + { + public ITranscript Transcript { get; } + public string ReferenceAminoAcids { get; } + public string AlternateAminoAcids { get; } + public string ReferenceCodons { get; } + public string AlternateCodons { get; } + public IMappedPosition MappedPosition { get; } + public string HgvsCoding { get; } + public string HgvsProtein { get; } + public PredictionScore Sift { get; } + public PredictionScore PolyPhen { get; } + public IEnumerable Consequences { get; } + public IGeneFusionAnnotation GeneFusionAnnotation { get; } + public IList PluginData { get; } + public bool CompleteOverlap { get; } + + public AnnotatedTranscript(ITranscript transcript, string referenceAminoAcids, string alternateAminoAcids, + string referenceCodons, string alternateCodons, IMappedPosition mappedPosition, string hgvsCoding, + string hgvsProtein, PredictionScore sift, PredictionScore polyphen, + IEnumerable consequences, IGeneFusionAnnotation geneFusionAnnotation, bool completeOverlap) + { + Transcript = transcript; + ReferenceAminoAcids = referenceAminoAcids; + AlternateAminoAcids = alternateAminoAcids; + ReferenceCodons = referenceCodons; + AlternateCodons = alternateCodons; + MappedPosition = mappedPosition; + HgvsCoding = hgvsCoding; + HgvsProtein = hgvsProtein; + Sift = sift; + PolyPhen = polyphen; + Consequences = consequences; + GeneFusionAnnotation = geneFusionAnnotation; + PluginData = new List(); + CompleteOverlap = completeOverlap; + } + + public void SerializeJson(StringBuilder sb) + { + var jsonObject = new JsonObject(sb); + + sb.Append(JsonObject.OpenBrace); + jsonObject.AddStringValue("transcript", Transcript.Id.WithVersion); + jsonObject.AddStringValue("source", Transcript.Source.ToString()); + if (!CompleteOverlap) jsonObject.AddStringValue("bioType", GetBioType(Transcript.BioType)); + jsonObject.AddStringValue("codons", GetCodonString(ReferenceCodons, AlternateCodons)); + jsonObject.AddStringValue("aminoAcids", GetAminoAcidString(ReferenceAminoAcids, AlternateAminoAcids)); + + if (MappedPosition != null) + { + jsonObject.AddStringValue("cdnaPos", GetRangeString(MappedPosition.CoveredCdnaStart, MappedPosition.CoveredCdnaEnd)); + jsonObject.AddStringValue("cdsPos", GetRangeString(MappedPosition.CoveredCdsStart, MappedPosition.CoveredCdsEnd)); + jsonObject.AddStringValue("exons", GetFractionString(MappedPosition.ExonStart, MappedPosition.ExonEnd, Transcript.NumExons)); + jsonObject.AddStringValue("introns", GetFractionString(MappedPosition.IntronStart, MappedPosition.IntronEnd, Transcript.NumExons - 1)); + jsonObject.AddStringValue("proteinPos", GetRangeString(MappedPosition.CoveredProteinStart, MappedPosition.CoveredProteinEnd)); + } + + var geneId = Transcript.Source == Source.Ensembl + ? Transcript.Gene.EnsemblId.ToString() + : Transcript.Gene.EntrezGeneId.ToString(); + + if (!CompleteOverlap) jsonObject.AddStringValue("geneId", geneId); + jsonObject.AddStringValue("hgnc", Transcript.Gene.Symbol); + jsonObject.AddStringValues("consequence", Consequences?.Select(ConsequenceUtil.GetConsequence)); + jsonObject.AddStringValue("hgvsc", HgvsCoding); + jsonObject.AddStringValue("hgvsp", HgvsProtein); + jsonObject.AddStringValue("geneFusion", GeneFusionAnnotation?.ToString(), false); + + jsonObject.AddBoolValue("isCanonical", Transcript.IsCanonical); + + jsonObject.AddDoubleValue("polyPhenScore", PolyPhen?.Score); + + jsonObject.AddStringValue("polyPhenPrediction", PolyPhen?.Prediction); + if (!CompleteOverlap && Transcript.Translation != null) jsonObject.AddStringValue("proteinId", Transcript.Translation.ProteinId.WithVersion); + + jsonObject.AddDoubleValue("siftScore", Sift?.Score); + + jsonObject.AddStringValue("siftPrediction", Sift?.Prediction); + + if (PluginData != null) + foreach (var pluginData in PluginData) + { + jsonObject.AddStringValue(pluginData.Name, pluginData.GetJsonString(), false); + } + + jsonObject.AddBoolValue("completeOverlap", CompleteOverlap); + + sb.Append(JsonObject.CloseBrace); + } + + public static string GetBioType(BioType bioType) => bioType == BioType.three_prime_overlapping_ncRNA + ? "3prime_overlapping_ncRNA" + : bioType.ToString(); + + private static string GetAminoAcidString(string a, string b) + { + if (a == b) return a; + a = string.IsNullOrEmpty(a) ? "-" : a; + b = string.IsNullOrEmpty(b) ? "-" : b; + return $"{a}/{b}"; + } + + private static string GetCodonString(string a, string b) + { + if (a == b && string.IsNullOrEmpty(a)) return a; + a = string.IsNullOrEmpty(a) ? "-" : a; + b = string.IsNullOrEmpty(b) ? "-" : b; + return $"{a}/{b}"; + } + + private static string GetRangeString(int start, int end) + { + if (start == -1 && end == -1) return null; + if (start == -1) return "?-" + end; + if (end == -1) return start + "-?"; + if (start > end) Swap.Int(ref start, ref end); + return start == end ? start.ToString(CultureInfo.InvariantCulture) : start + "-" + end; + } + + private static string GetFractionString(int start, int end, int total) + { + if (start == -1 && end == -1) return null; + return GetRangeString(start, end) + "/" + total; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/CdnaSequence.cs b/VariantAnnotation/AnnotatedPositions/Transcript/CdnaSequence.cs index b83aaf40..32f2c391 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/CdnaSequence.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/CdnaSequence.cs @@ -29,6 +29,8 @@ public CdnaSequence(ISequence compressedSequence, ICodingRegion codingRegion, IT _sequence = GetCdnaSequence(); } + public string Sequence => _sequence; + public string GetCdnaSequence() { if (_sequence != null) return _sequence; diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/CodingSequence.cs b/VariantAnnotation/AnnotatedPositions/Transcript/CodingSequence.cs deleted file mode 100644 index 9aacfe5f..00000000 --- a/VariantAnnotation/AnnotatedPositions/Transcript/CodingSequence.cs +++ /dev/null @@ -1,26 +0,0 @@ -using Genome; -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - public sealed class CodingSequence : ISequence - { - private readonly string _sequence; - - public CodingSequence(ISequence compressedSequence, ICodingRegion codingRegion, ITranscriptRegion[] regions, - bool onReverseStrand, byte startExonPhase, IRnaEdit[] rnaEdits) - { - string cdnaSequence = - new CdnaSequence(compressedSequence, codingRegion, regions, onReverseStrand, rnaEdits) - .GetCdnaSequence(); - int cdsLen = codingRegion.CdnaEnd - codingRegion.CdnaStart + 1; - - _sequence = new string('N', startExonPhase) + cdnaSequence.Substring(codingRegion.CdnaStart - 1, cdsLen); - } - - public string GetCodingSequence() => _sequence; - public int Length => _sequence.Length; - public Band[] CytogeneticBands => null; - public string Substring(int offset, int length) => _sequence.Substring(offset, length); - } -} \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/Codons.cs b/VariantAnnotation/AnnotatedPositions/Transcript/Codons.cs index f4287304..cac18f20 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/Codons.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/Codons.cs @@ -1,56 +1,58 @@ -using System; -using Genome; -using VariantAnnotation.TranscriptAnnotation; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - public static class Codons - { - public static SequenceChange GetCodons(string transcriptAlternateAllele, - int cdsStart, int cdsEnd, int proteinBegin, int proteinEnd, ISequence codingSequence) - { - if (cdsStart == -1 || cdsEnd == -1 || proteinBegin == -1 || proteinEnd == -1) return new SequenceChange("", ""); - - // current implementation of GetCoveredCdsAndProteinPositions may return negative cdsStart and cdsEnd beyond the CDS region - if (cdsStart < 1) cdsStart = 1; - if (cdsEnd > codingSequence.Length) cdsEnd = codingSequence.Length; - - int aminoAcidStart = Math.Max(proteinBegin * 3 - 2, 1); - int aminoAcidEnd = Math.Min(proteinEnd * 3, codingSequence.Length); - - var transcriptReferenceAllele = cdsEnd >= cdsStart ? codingSequence.Substring(cdsStart - 1, cdsEnd - cdsStart + 1) : ""; - - int prefixStartIndex = aminoAcidStart - 1; - int prefixLen = cdsStart - aminoAcidStart; - - int suffixStartIndex = cdsEnd; - int suffixLen = aminoAcidEnd - cdsEnd; - - string prefix = prefixStartIndex + prefixLen < codingSequence.Length - ? codingSequence.Substring(prefixStartIndex, prefixLen).ToLower() - : "AAA"; - - string suffix = suffixLen > 0 - ? codingSequence.Substring(suffixStartIndex, suffixLen).ToLower() - : ""; - - var refCodons = GetCodon(transcriptReferenceAllele, prefix, suffix); - var altCodons = GetCodon(transcriptAlternateAllele, prefix, suffix); - return new SequenceChange(refCodons, altCodons); - } - - /// - /// returns the codon string consisting of the prefix and suffix bases flanking the allele bases - /// - public static string GetCodon(string allele, string prefix, string suffix) - { - if (prefix.Length == 0 && suffix.Length == 0) return allele; - return $"{prefix}{allele}{suffix}"; - } - - /// - /// returns true if the length is a multiple of three, false otherwise - /// - public static bool IsTriplet(int len) => Math.Abs(len) % 3 == 0; - } +using System; + +namespace VariantAnnotation.AnnotatedPositions.Transcript +{ + public static class Codons + { + private static readonly (string, string) EmptyTuple = (string.Empty, string.Empty); + + public static (string ReferenceCodons, string AlternateCodons) GetCodons(string transcriptAlternateAllele, + int cdsStart, int cdsEnd, int proteinBegin, int proteinEnd, ReadOnlySpan codingSequence) + { + if (cdsStart == -1 || cdsEnd == -1 || proteinBegin == -1 || proteinEnd == -1) return EmptyTuple; + + // current implementation of GetCoveredCdsAndProteinPositions may return negative cdsStart and cdsEnd beyond the CDS region + if (cdsStart < 1) cdsStart = 1; + if (cdsEnd > codingSequence.Length) cdsEnd = codingSequence.Length; + + int aminoAcidStart = Math.Max(proteinBegin * 3 - 2, 1); + int aminoAcidEnd = Math.Min(proteinEnd * 3, codingSequence.Length); + + string transcriptReferenceAllele = cdsEnd >= cdsStart + ? codingSequence.Slice(cdsStart - 1, cdsEnd - cdsStart + 1).ToString() + : ""; + + int prefixStartIndex = aminoAcidStart - 1; + int prefixLen = cdsStart - aminoAcidStart; + + int suffixStartIndex = cdsEnd; + int suffixLen = aminoAcidEnd - cdsEnd; + + string prefix = prefixStartIndex + prefixLen < codingSequence.Length + ? codingSequence.Slice(prefixStartIndex, prefixLen).ToString().ToLower() + : "AAA"; + + string suffix = suffixLen > 0 + ? codingSequence.Slice(suffixStartIndex, suffixLen).ToString().ToLower() + : ""; + + string refCodons = GetCodon(transcriptReferenceAllele, prefix, suffix); + string altCodons = GetCodon(transcriptAlternateAllele, prefix, suffix); + return (refCodons, altCodons); + } + + /// + /// returns the codon string consisting of the prefix and suffix bases flanking the allele bases + /// + public static string GetCodon(string allele, string prefix, string suffix) + { + if (prefix.Length == 0 && suffix.Length == 0) return allele; + return $"{prefix}{allele}{suffix}"; + } + + /// + /// returns true if the length is a multiple of three, false otherwise + /// + public static bool IsTriplet(int len) => Math.Abs(len) % 3 == 0; + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/CompactId.cs b/VariantAnnotation/AnnotatedPositions/Transcript/CompactId.cs index 9d9c6f5e..cb6d50f6 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/CompactId.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/CompactId.cs @@ -1,167 +1,167 @@ -using System; -using IO; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Utilities; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - public struct CompactId : ICompactId - { - private readonly IdType _id; - private readonly byte _version; - private readonly uint _info; - - private const int NoInfo = int.MaxValue; - private const byte NoVersion = byte.MaxValue; - private const int NumShift = 4; - private const int LengthMask = 0xf; - private const int MaxNumber = 0xfffffff; - - internal static CompactId Empty => new CompactId(IdType.Unknown, NoVersion, NoInfo); - public bool IsEmpty() => _id == IdType.Unknown; - - private CompactId(IdType id, byte version, uint info) - { - _id = id; - _version = version; - _info = info; - } - - public override string ToString() => ConvertToString(true); - public string WithVersion => ConvertToString(true); - public string WithoutVersion => ConvertToString(false); - - public static CompactId Convert(string s, byte version = NoVersion) - { - if (string.IsNullOrEmpty(s)) return Empty; - - if (s.StartsWith("ENSG")) return GetCompactId(s, 4, IdType.EnsemblGene, version); - if (s.StartsWith("ENST")) return GetCompactId(s, 4, IdType.EnsemblTranscript, version); - if (s.StartsWith("ENSP")) return GetCompactId(s, 4, IdType.EnsemblProtein, version); - if (s.StartsWith("ENSESTG")) return GetCompactId(s, 7, IdType.EnsemblEstGene, version); - if (s.StartsWith("ENSESTP")) return GetCompactId(s, 7, IdType.EnsemblEstProtein, version); - if (s.StartsWith("ENSR")) return GetCompactId(s, 4, IdType.EnsemblRegulatory, version); - if (s.StartsWith("CCDS")) return GetCompactId(s, 4, IdType.Ccds, version); - if (s.StartsWith("NR_")) return GetCompactId(s, 3, IdType.RefSeqNonCodingRNA, version); - if (s.StartsWith("NM_")) return GetCompactId(s, 3, IdType.RefSeqMessengerRNA, version); - if (s.StartsWith("NP_")) return GetCompactId(s, 3, IdType.RefSeqProtein, version); - if (s.StartsWith("XR_")) return GetCompactId(s, 3, IdType.RefSeqPredictedNonCodingRNA, version); - if (s.StartsWith("XM_")) return GetCompactId(s, 3, IdType.RefSeqPredictedMessengerRNA, version); - if (s.StartsWith("XP_")) return GetCompactId(s, 3, IdType.RefSeqPredictedProtein, version); - - if (int.TryParse(s, out int i)) return GetNumericalCompactId(i, s.Length); - - Console.WriteLine("Unknown ID: [{0}] ({1})", s, s.Length); - return Empty; - } - - private static uint ToInfo(int num, int len) => (uint)(num << 4 | (len & LengthMask)); - - private static CompactId GetCompactId(string s, int prefixLen, IdType idType, byte version) - { - var (id, _) = FormatUtilities.SplitVersion(s); - int num = int.Parse(id.Substring(prefixLen)); - return new CompactId(idType, version, ToInfo(num, id.Length - prefixLen)); - } - - private static CompactId GetNumericalCompactId(int num, int paddedLength) - { - if (num > MaxNumber) throw new ArgumentOutOfRangeException($"Could not convert the number ({num}) to a CompactID. Max supported number is {MaxNumber}."); - return new CompactId(IdType.OnlyNumbers, NoVersion, ToInfo(num, paddedLength)); - } - - private string ConvertToString(bool showVersion) - { - if (_id == IdType.Unknown) return null; - var prefix = GetPrefix(); - var number = GetNumber(); - var version = GetVersion(showVersion); - return prefix + number + version; - } - - private string GetVersion(bool showVersion) - { - if (!showVersion || _version == NoVersion) return null; - return "." + _version; - } - - private string GetNumber() - { - var num = _info >> NumShift; - var length = _info & LengthMask; - return num.ToString("D" + length); - } - - private string GetPrefix() - { - // ReSharper disable once SwitchStatementMissingSomeCases - switch (_id) - { - case IdType.EnsemblGene: - return "ENSG"; - case IdType.EnsemblTranscript: - return "ENST"; - case IdType.EnsemblProtein: - return "ENSP"; - case IdType.EnsemblEstGene: - return "ENSESTG"; - case IdType.EnsemblEstProtein: - return "ENSESTP"; - case IdType.EnsemblRegulatory: - return "ENSR"; - case IdType.Ccds: - return "CCDS"; - case IdType.RefSeqNonCodingRNA: - return "NR_"; - case IdType.RefSeqMessengerRNA: - return "NM_"; - case IdType.RefSeqProtein: - return "NP_"; - case IdType.RefSeqPredictedNonCodingRNA: - return "XR_"; - case IdType.RefSeqPredictedMessengerRNA: - return "XM_"; - case IdType.RefSeqPredictedProtein: - return "XP_"; - } - - return null; - } - - public void Write(IExtendedBinaryWriter writer) - { - writer.Write((byte)_id); - writer.Write(_version); - writer.Write(_info); - } - - public static CompactId Read(IBufferedBinaryReader reader) - { - var id = (IdType)reader.ReadByte(); - var version = reader.ReadByte(); - var info = reader.ReadUInt32(); - return new CompactId(id, version, info); - } - } - - public enum IdType : byte - { - // ReSharper disable InconsistentNaming - Unknown, - Ccds, - EnsemblEstGene, - EnsemblEstProtein, - EnsemblGene, - EnsemblProtein, - EnsemblRegulatory, - EnsemblTranscript, - OnlyNumbers, - RefSeqMessengerRNA, - RefSeqNonCodingRNA, - RefSeqPredictedMessengerRNA, - RefSeqPredictedNonCodingRNA, - RefSeqPredictedProtein, - RefSeqProtein - // ReSharper restore InconsistentNaming - } +using System; +using IO; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Utilities; + +namespace VariantAnnotation.AnnotatedPositions.Transcript +{ + public struct CompactId : ICompactId + { + private readonly IdType _id; + private readonly byte _version; + private readonly uint _info; + + private const int NoInfo = int.MaxValue; + private const byte NoVersion = byte.MaxValue; + private const int NumShift = 4; + private const int LengthMask = 0xf; + private const int MaxNumber = 0xfffffff; + + internal static CompactId Empty => new CompactId(IdType.Unknown, NoVersion, NoInfo); + public bool IsEmpty() => _id == IdType.Unknown; + + private CompactId(IdType id, byte version, uint info) + { + _id = id; + _version = version; + _info = info; + } + + public override string ToString() => ConvertToString(true); + public string WithVersion => ConvertToString(true); + public string WithoutVersion => ConvertToString(false); + + public static CompactId Convert(string s, byte version = NoVersion) + { + if (string.IsNullOrEmpty(s)) return Empty; + + if (s.StartsWith("ENSG")) return GetCompactId(s, 4, IdType.EnsemblGene, version); + if (s.StartsWith("ENST")) return GetCompactId(s, 4, IdType.EnsemblTranscript, version); + if (s.StartsWith("ENSP")) return GetCompactId(s, 4, IdType.EnsemblProtein, version); + if (s.StartsWith("ENSESTG")) return GetCompactId(s, 7, IdType.EnsemblEstGene, version); + if (s.StartsWith("ENSESTP")) return GetCompactId(s, 7, IdType.EnsemblEstProtein, version); + if (s.StartsWith("ENSR")) return GetCompactId(s, 4, IdType.EnsemblRegulatory, version); + if (s.StartsWith("CCDS")) return GetCompactId(s, 4, IdType.Ccds, version); + if (s.StartsWith("NR_")) return GetCompactId(s, 3, IdType.RefSeqNonCodingRNA, version); + if (s.StartsWith("NM_")) return GetCompactId(s, 3, IdType.RefSeqMessengerRNA, version); + if (s.StartsWith("NP_")) return GetCompactId(s, 3, IdType.RefSeqProtein, version); + if (s.StartsWith("XR_")) return GetCompactId(s, 3, IdType.RefSeqPredictedNonCodingRNA, version); + if (s.StartsWith("XM_")) return GetCompactId(s, 3, IdType.RefSeqPredictedMessengerRNA, version); + if (s.StartsWith("XP_")) return GetCompactId(s, 3, IdType.RefSeqPredictedProtein, version); + + if (int.TryParse(s, out int i)) return GetNumericalCompactId(i, s.Length); + + Console.WriteLine("Unknown ID: [{0}] ({1})", s, s.Length); + return Empty; + } + + private static uint ToInfo(int num, int len) => (uint)(num << 4 | (len & LengthMask)); + + private static CompactId GetCompactId(string s, int prefixLen, IdType idType, byte version) + { + var (id, _) = FormatUtilities.SplitVersion(s); + int num = int.Parse(id.Substring(prefixLen)); + return new CompactId(idType, version, ToInfo(num, id.Length - prefixLen)); + } + + private static CompactId GetNumericalCompactId(int num, int paddedLength) + { + if (num > MaxNumber) throw new ArgumentOutOfRangeException($"Could not convert the number ({num}) to a CompactID. Max supported number is {MaxNumber}."); + return new CompactId(IdType.OnlyNumbers, NoVersion, ToInfo(num, paddedLength)); + } + + private string ConvertToString(bool showVersion) + { + if (_id == IdType.Unknown) return null; + var prefix = GetPrefix(); + var number = GetNumber(); + var version = GetVersion(showVersion); + return prefix + number + version; + } + + private string GetVersion(bool showVersion) + { + if (!showVersion || _version == NoVersion) return null; + return "." + _version; + } + + private string GetNumber() + { + var num = _info >> NumShift; + var length = _info & LengthMask; + return num.ToString("D" + length); + } + + private string GetPrefix() + { + // ReSharper disable once SwitchStatementMissingSomeCases + switch (_id) + { + case IdType.EnsemblGene: + return "ENSG"; + case IdType.EnsemblTranscript: + return "ENST"; + case IdType.EnsemblProtein: + return "ENSP"; + case IdType.EnsemblEstGene: + return "ENSESTG"; + case IdType.EnsemblEstProtein: + return "ENSESTP"; + case IdType.EnsemblRegulatory: + return "ENSR"; + case IdType.Ccds: + return "CCDS"; + case IdType.RefSeqNonCodingRNA: + return "NR_"; + case IdType.RefSeqMessengerRNA: + return "NM_"; + case IdType.RefSeqProtein: + return "NP_"; + case IdType.RefSeqPredictedNonCodingRNA: + return "XR_"; + case IdType.RefSeqPredictedMessengerRNA: + return "XM_"; + case IdType.RefSeqPredictedProtein: + return "XP_"; + } + + return null; + } + + public void Write(IExtendedBinaryWriter writer) + { + writer.Write((byte)_id); + writer.Write(_version); + writer.Write(_info); + } + + public static CompactId Read(IBufferedBinaryReader reader) + { + var id = (IdType)reader.ReadByte(); + var version = reader.ReadByte(); + var info = reader.ReadUInt32(); + return new CompactId(id, version, info); + } + } + + public enum IdType : byte + { + // ReSharper disable InconsistentNaming + Unknown, + Ccds, + EnsemblEstGene, + EnsemblEstProtein, + EnsemblGene, + EnsemblProtein, + EnsemblRegulatory, + EnsemblTranscript, + OnlyNumbers, + RefSeqMessengerRNA, + RefSeqNonCodingRNA, + RefSeqPredictedMessengerRNA, + RefSeqPredictedNonCodingRNA, + RefSeqPredictedProtein, + RefSeqProtein + // ReSharper restore InconsistentNaming + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/FeatureVariantEffects.cs b/VariantAnnotation/AnnotatedPositions/Transcript/FeatureVariantEffects.cs index 95b53efa..a0034176 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/FeatureVariantEffects.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/FeatureVariantEffects.cs @@ -1,39 +1,39 @@ -using Intervals; -using VariantAnnotation.Interface.AnnotatedPositions; -using Variants; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - public sealed class FeatureVariantEffects : IFeatureVariantEffects - { - private readonly bool _isSv; - - private readonly bool _completelyOverlaps; - private readonly bool _overlaps; - private readonly bool _completelyWithin; - - private readonly bool _lossOrDeletion; - private readonly bool _gainOrDuplication; - private readonly bool _isInsertionDeletion; - private readonly bool _isInsertion; - - public FeatureVariantEffects(IInterval feature, VariantType vt, IInterval variant, bool isSv) - { - _isSv = isSv; - - _completelyOverlaps = Intervals.Utilities.Contains(variant.Start, variant.End, feature.Start, feature.End); - _overlaps = feature.Overlaps(variant); - _completelyWithin = variant.Start >= feature.Start && variant.End <= feature.End; - - _lossOrDeletion = vt == VariantType.copy_number_loss || vt == VariantType.deletion; - _gainOrDuplication = vt == VariantType.copy_number_gain || vt == VariantType.duplication || vt == VariantType.tandem_duplication; - _isInsertionDeletion = vt == VariantType.indel; - _isInsertion = vt == VariantType.insertion; - } - - public bool Ablation() => (_lossOrDeletion || _isInsertionDeletion) && _completelyOverlaps; - public bool Amplification() => _gainOrDuplication && _completelyOverlaps; - public bool Truncation() => _isSv && _lossOrDeletion && _overlaps && !_completelyOverlaps; - public bool Elongation() => _isSv && _completelyWithin && (_gainOrDuplication || _isInsertion); - } +using Intervals; +using VariantAnnotation.Interface.AnnotatedPositions; +using Variants; + +namespace VariantAnnotation.AnnotatedPositions.Transcript +{ + public sealed class FeatureVariantEffects : IFeatureVariantEffects + { + private readonly bool _isSv; + + private readonly bool _completelyOverlaps; + private readonly bool _overlaps; + private readonly bool _completelyWithin; + + private readonly bool _lossOrDeletion; + private readonly bool _gainOrDuplication; + private readonly bool _isInsertionDeletion; + private readonly bool _isInsertion; + + public FeatureVariantEffects(IInterval feature, VariantType vt, IInterval variant, bool isSv) + { + _isSv = isSv; + + _completelyOverlaps = Intervals.Utilities.Contains(variant.Start, variant.End, feature.Start, feature.End); + _overlaps = feature.Overlaps(variant); + _completelyWithin = variant.Start >= feature.Start && variant.End <= feature.End; + + _lossOrDeletion = vt == VariantType.copy_number_loss || vt == VariantType.deletion; + _gainOrDuplication = vt == VariantType.copy_number_gain || vt == VariantType.duplication || vt == VariantType.tandem_duplication; + _isInsertionDeletion = vt == VariantType.indel; + _isInsertion = vt == VariantType.insertion; + } + + public bool Ablation() => (_lossOrDeletion || _isInsertionDeletion) && _completelyOverlaps; + public bool Amplification() => _gainOrDuplication && _completelyOverlaps; + public bool Truncation() => _isSv && _lossOrDeletion && _overlaps && !_completelyOverlaps; + public bool Elongation() => _isSv && _completelyWithin && (_gainOrDuplication || _isInsertion); + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/GeneFusion.cs b/VariantAnnotation/AnnotatedPositions/Transcript/GeneFusion.cs index 8a54fd91..0deff2b7 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/GeneFusion.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/GeneFusion.cs @@ -1,30 +1,30 @@ -using System.Text; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.IO; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - public sealed class GeneFusion : IGeneFusion - { - public int? Exon { get; } - public int? Intron { get; } - public string HgvsCoding { get; } - - public GeneFusion(int? exon, int? intron, string hgvsCoding) - { - Exon = exon; - Intron = intron; - HgvsCoding = hgvsCoding; - } - - public void SerializeJson(StringBuilder sb) - { - var jsonObject = new JsonObject(sb); - sb.Append(JsonObject.OpenBrace); - jsonObject.AddStringValue("hgvsc", HgvsCoding); - jsonObject.AddIntValue("exon", Exon); - jsonObject.AddIntValue("intron", Intron); - sb.Append(JsonObject.CloseBrace); - } - } +using System.Text; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.IO; + +namespace VariantAnnotation.AnnotatedPositions.Transcript +{ + public sealed class GeneFusion : IGeneFusion + { + public int? Exon { get; } + public int? Intron { get; } + public string HgvsCoding { get; } + + public GeneFusion(int? exon, int? intron, string hgvsCoding) + { + Exon = exon; + Intron = intron; + HgvsCoding = hgvsCoding; + } + + public void SerializeJson(StringBuilder sb) + { + var jsonObject = new JsonObject(sb); + sb.Append(JsonObject.OpenBrace); + jsonObject.AddStringValue("hgvsc", HgvsCoding); + jsonObject.AddIntValue("exon", Exon); + jsonObject.AddIntValue("intron", Intron); + sb.Append(JsonObject.CloseBrace); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/GeneFusionAnnotation.cs b/VariantAnnotation/AnnotatedPositions/Transcript/GeneFusionAnnotation.cs index 15b01052..bb36ae70 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/GeneFusionAnnotation.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/GeneFusionAnnotation.cs @@ -1,33 +1,33 @@ -using OptimizedCore; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.IO; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - public sealed class GeneFusionAnnotation : IGeneFusionAnnotation - { - public int? Exon { get; } - public int? Intron { get; } - public IGeneFusion[] GeneFusions { get; } - - public GeneFusionAnnotation(int? exon, int? intron, IGeneFusion[] geneFusions) - { - Exon = exon; - Intron = intron; - GeneFusions = geneFusions; - } - - public override string ToString() - { - var sb = StringBuilderCache.Acquire(); - var jsonObject = new JsonObject(sb); - sb.Append(JsonObject.OpenBrace); - jsonObject.AddIntValue("exon", Exon); - jsonObject.AddIntValue("intron", Intron); - jsonObject.AddObjectValues("fusions", GeneFusions); - sb.Append(JsonObject.CloseBrace); - - return StringBuilderCache.GetStringAndRelease(sb); - } - } +using OptimizedCore; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.IO; + +namespace VariantAnnotation.AnnotatedPositions.Transcript +{ + public sealed class GeneFusionAnnotation : IGeneFusionAnnotation + { + public int? Exon { get; } + public int? Intron { get; } + public IGeneFusion[] GeneFusions { get; } + + public GeneFusionAnnotation(int? exon, int? intron, IGeneFusion[] geneFusions) + { + Exon = exon; + Intron = intron; + GeneFusions = geneFusions; + } + + public override string ToString() + { + var sb = StringBuilderCache.Acquire(); + var jsonObject = new JsonObject(sb); + sb.Append(JsonObject.OpenBrace); + jsonObject.AddIntValue("exon", Exon); + jsonObject.AddIntValue("intron", Intron); + jsonObject.AddObjectValues("fusions", GeneFusions); + sb.Append(JsonObject.CloseBrace); + + return StringBuilderCache.GetStringAndRelease(sb); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/MappedPosition.cs b/VariantAnnotation/AnnotatedPositions/Transcript/MappedPosition.cs index 2c3daa01..84424c95 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/MappedPosition.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/MappedPosition.cs @@ -1,43 +1,46 @@ -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - public sealed class MappedPosition : IMappedPosition - { - public int CdnaStart { get; } - public int CdnaEnd { get; } - public int CdsStart { get; } - public int CdsEnd { get; } - public int ProteinStart { get; set; } - public int ProteinEnd { get; set; } - public int ExonStart { get; } - public int ExonEnd { get; } - public int IntronStart { get; } - public int IntronEnd { get; } - public int RegionStartIndex { get; } - public int RegionEndIndex { get; } - public int CoveredProteinStart { get; set; } - public int CoveredProteinEnd { get; set; } - public int CoveredCdsStart { get; set; } - public int CoveredCdsEnd { get; set; } - public int CoveredCdnaStart { get; set; } - public int CoveredCdnaEnd { get; set; } - - public MappedPosition(int cdnaStart, int cdnaEnd, int cdsStart, int cdsEnd, int proteinStart, int proteinEnd, - int exonStart, int exonEnd, int intronStart, int intronEnd, int regionStartIndex, int regionEndIndex) - { - CdnaStart = cdnaStart; - CdnaEnd = cdnaEnd; - CdsStart = cdsStart; - CdsEnd = cdsEnd; - ProteinStart = proteinStart; - ProteinEnd = proteinEnd; - ExonStart = exonStart; - ExonEnd = exonEnd; - IntronStart = intronStart; - IntronEnd = intronEnd; - RegionStartIndex = regionStartIndex; - RegionEndIndex = regionEndIndex; - } - } +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.AnnotatedPositions.Transcript +{ + public sealed class MappedPosition : IMappedPosition + { + public int CdnaStart { get; } + public int CdnaEnd { get; } + public int CdsStart { get; } + public int CdsEnd { get; } + public int ExtendedCdsEnd { get; } + public int ProteinStart { get; set; } + public int ProteinEnd { get; set; } + public int ExtendedProteinEnd { get; } + public int ExonStart { get; } + public int ExonEnd { get; } + public int IntronStart { get; } + public int IntronEnd { get; } + public int RegionStartIndex { get; } + public int RegionEndIndex { get; } + public int CoveredProteinStart { get; set; } + public int CoveredProteinEnd { get; set; } + public int CoveredCdsStart { get; set; } + public int CoveredCdsEnd { get; set; } + public int CoveredCdnaStart { get; set; } + public int CoveredCdnaEnd { get; set; } + + public MappedPosition(int cdnaStart, int cdnaEnd, int cdsStart, int cdsEnd, int extendedCdsEnd, int proteinStart, int proteinEnd,int extendedProteinEnd, int exonStart, int exonEnd, int intronStart, int intronEnd, int regionStartIndex, int regionEndIndex) + { + CdnaStart = cdnaStart; + CdnaEnd = cdnaEnd; + CdsStart = cdsStart; + CdsEnd = cdsEnd; + ExtendedCdsEnd = extendedCdsEnd; + ProteinStart = proteinStart; + ProteinEnd = proteinEnd; + ExtendedProteinEnd = extendedProteinEnd; + ExonStart = exonStart; + ExonEnd = exonEnd; + IntronStart = intronStart; + IntronEnd = intronEnd; + RegionStartIndex = regionStartIndex; + RegionEndIndex = regionEndIndex; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/MappedPositionUtilities.cs b/VariantAnnotation/AnnotatedPositions/Transcript/MappedPositionUtilities.cs index ff058a00..88ceeca6 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/MappedPositionUtilities.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/MappedPositionUtilities.cs @@ -1,172 +1,155 @@ -using System; -using Intervals; -using VariantAnnotation.Algorithms; -using VariantAnnotation.Caches.DataStructures; -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - public static class MappedPositionUtilities - { - public static (int Index, ITranscriptRegion Region) FindRegion(ITranscriptRegion[] regions, - int variantPosition) - { - int index = regions.BinarySearch(variantPosition); - var region = index < 0 ? null : regions[index]; - return (index, region); - } - - public static (int CdnaStart, int CdnaEnd) GetCdnaPositions(ITranscriptRegion startRegion, - ITranscriptRegion endRegion, IInterval variant, bool onReverseStrand, bool isInsertion) - { - int cdnaStart = GetCdnaPosition(startRegion, variant.Start, onReverseStrand); - int cdnaEnd = GetCdnaPosition(endRegion, variant.End, onReverseStrand); - - if (FoundExonEndpointInsertion(isInsertion, cdnaStart, cdnaEnd, startRegion, endRegion)) - { - (cdnaStart, cdnaEnd) = FixExonEndpointInsertion(cdnaStart, cdnaEnd, onReverseStrand, startRegion, - endRegion, variant); - } - - return (cdnaStart, cdnaEnd); - } - - private static int GetCdnaPosition(ITranscriptRegion region, int variantPosition, bool onReverseStrand) - { - if (region == null || region.Type != TranscriptRegionType.Exon) return -1; - - return onReverseStrand - ? region.End - variantPosition + region.CdnaStart - : variantPosition - region.Start + region.CdnaStart; - } - - /// - /// Assuming at least one cDNA coordinate overlaps with an exon, the covered cDNA coordinates represent - /// the coordinates actually covered by the variant. - /// - public static (int Start, int End) GetCoveredCdnaPositions(this ITranscriptRegion[] regions, int cdnaStart, int startRegionIndex, - int cdnaEnd, int endRegionIndex, bool onReverseStrand) - { - // exon case - if (cdnaStart != -1 && cdnaEnd != -1) return (cdnaStart, cdnaEnd); - - if (onReverseStrand) Swap.Int(ref startRegionIndex, ref endRegionIndex); - - var startRegion = regions.GetCoveredRegion(startRegionIndex); - var endRegion = regions.GetCoveredRegion(endRegionIndex); - - if (startRegion.Type != TranscriptRegionType.Exon && endRegion.Type != TranscriptRegionType.Exon) - return (-1, -1); - - int codingEnd = onReverseStrand ? regions[0].CdnaEnd : regions[regions.Length - 1].CdnaEnd; - - cdnaStart = GetCoveredCdnaPosition(true, cdnaStart, startRegion, startRegionIndex, onReverseStrand, codingEnd); - cdnaEnd = GetCoveredCdnaPosition(false, cdnaEnd, endRegion, endRegionIndex, onReverseStrand, codingEnd); - - return cdnaStart < cdnaEnd ? (cdnaStart, cdnaEnd) : (cdnaEnd, cdnaStart); - } - - private static ITranscriptRegion GetCoveredRegion(this ITranscriptRegion[] regions, int regionIndex) - { - if (regionIndex == -1) return regions[0]; - return regionIndex == ~regions.Length ? regions[regions.Length - 1] : regions[regionIndex]; - } - - private static int GetCoveredCdnaPosition(bool isStart, int cdnaPosition, ITranscriptRegion region, int regionIndex, bool onReverseStrand, int codingEnd) - { - if (cdnaPosition >= 0) return cdnaPosition; - - // start before transcript - if (regionIndex == -1) return onReverseStrand ? codingEnd : 1; - - // end after transcript - if (regionIndex < -1) return onReverseStrand ? 1 : codingEnd; - - // intron - return isStart ? region.CdnaEnd : region.CdnaStart; - } - - public static (int CdsStart, int CdsEnd, int ProteinStart, int ProteinEnd) GetCoveredCdsAndProteinPositions(int coveredCdnaStart, int coveredCdnaEnd, - byte startExonPhase, ICodingRegion codingRegion) - { - if (codingRegion == null || - coveredCdnaEnd < codingRegion.CdnaStart || - coveredCdnaStart > codingRegion.CdnaEnd || - coveredCdnaStart == -1 && coveredCdnaEnd == -1) return (-1, -1, -1, -1); - - int beginOffset = startExonPhase - codingRegion.CdnaStart + 1; - int start = Math.Max(coveredCdnaStart + beginOffset, 1 + startExonPhase); - int end = Math.Min(coveredCdnaEnd + beginOffset, codingRegion.Length + startExonPhase); - - return (start, end, GetProteinPosition(start), GetProteinPosition(end)); - } - - public static int GetProteinPosition(int cdsPosition) - { - if (cdsPosition == -1) return -1; - return (cdsPosition + 2) / 3; - } - - public static (int CdsStart, int CdsEnd) GetCdsPositions(ICodingRegion codingRegion, int cdnaStart, - int cdnaEnd, byte startExonPhase, bool isInsertion) - { - int cdsStart = GetCdsPosition(codingRegion, cdnaStart, startExonPhase); - int cdsEnd = GetCdsPosition(codingRegion, cdnaEnd, startExonPhase); - - // silence CDS for insertions that occur just after the coding region - if (isInsertion && codingRegion != null && (cdnaEnd == codingRegion.CdnaEnd || cdnaStart == codingRegion.CdnaStart)) - { - cdsStart = -1; - cdsEnd = -1; - } - - return (cdsStart, cdsEnd); - } - - private static int GetCdsPosition(ICodingRegion codingRegion, int cdnaPosition, byte startExonPhase) - { - if (codingRegion == null || cdnaPosition < codingRegion.CdnaStart || - cdnaPosition > codingRegion.CdnaEnd) return -1; - return cdnaPosition - codingRegion.CdnaStart + startExonPhase + 1; - } - - /// - /// Fixes the missing cDNA coordinate for situations where an insertion occurs on either the first or last - /// base of an exon - /// - internal static (int CdnaStart, int CdnaEnd) FixExonEndpointInsertion(int cdnaStart, int cdnaEnd, - bool onReverseStrand, ITranscriptRegion startRegion, ITranscriptRegion endRegion, IInterval variant) - { - (ITranscriptRegion intron, ITranscriptRegion exon) = startRegion.Type == TranscriptRegionType.Exon - ? (endRegion, startRegion) - : (startRegion, endRegion); - - bool matchExonStart = variant.Start == exon.Start; - - int cdnaPos = !onReverseStrand && matchExonStart || onReverseStrand && !matchExonStart - ? intron.CdnaStart - : intron.CdnaEnd; - - if (cdnaStart == -1) cdnaStart = cdnaPos; - else cdnaEnd = cdnaPos; - - return (cdnaStart, cdnaEnd); - } - - /// - /// Identifies when an insertion on an exon boundary needs special attention. Here we're looking for one - /// intron & one exon where one cDNA coordinate is defined, but the other isn't. - /// - internal static bool FoundExonEndpointInsertion(bool isInsertion, int cdnaStart, int cdnaEnd, - ITranscriptRegion startRegion, ITranscriptRegion endRegion) - { - bool isCdnaStartUndef = cdnaStart == -1; - bool isCdnaEndUndef = cdnaEnd == -1; - bool isStartExon = startRegion?.Type == TranscriptRegionType.Exon; - bool isEndExon = endRegion?.Type == TranscriptRegionType.Exon; - - return isInsertion && startRegion != null && endRegion != null && isStartExon ^ isEndExon && - isCdnaStartUndef ^ isCdnaEndUndef; - } - } +using VariantAnnotation.Algorithms; +using VariantAnnotation.Caches.DataStructures; +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.AnnotatedPositions.Transcript +{ + public static class MappedPositionUtilities + { + public static (int Index, ITranscriptRegion Region) FindRegion(ITranscriptRegion[] regions, + int variantPosition) + { + int index = regions.BinarySearch(variantPosition); + var region = index < 0 ? null : regions[index]; + return (index, region); + } + + public static (int CdnaStart, int CdnaEnd) GetCdnaPositions(ITranscriptRegion startRegion, + ITranscriptRegion endRegion, int start, int end, bool onReverseStrand) + { + int cdnaStart = GetCdnaPosition(startRegion, start, onReverseStrand); + int cdnaEnd = GetCdnaPosition(endRegion, end, onReverseStrand); + return (cdnaStart, cdnaEnd); + } + + public static (int CdnaStart, int CdnaEnd) GetInsertionCdnaPositions(ITranscriptRegion startRegion, + ITranscriptRegion endRegion, int start, int end, bool onReverseStrand) + { + int cdnaStart, cdnaEnd; + + if (onReverseStrand) + { + cdnaStart = GetCdnaPosition(startRegion, start, true); + if (cdnaStart != -1) return (cdnaStart, cdnaStart + 1); + + cdnaEnd = GetCdnaPosition(endRegion, end, true); + return cdnaEnd != -1 ? (cdnaEnd - 1, cdnaEnd) : (-1, -1); + } + + cdnaEnd = GetCdnaPosition(endRegion, end, false); + if (cdnaEnd != -1) return (cdnaEnd + 1, cdnaEnd); + + cdnaStart = GetCdnaPosition(startRegion, start, false); + return cdnaStart != -1 ? (cdnaStart, cdnaStart - 1) : (-1, -1); + } + + private static int GetCdnaPosition(ITranscriptRegion region, int variantPosition, bool onReverseStrand) + { + if (region == null || region.Type != TranscriptRegionType.Exon) return -1; + + return onReverseStrand + ? region.End - variantPosition + region.CdnaStart + : variantPosition - region.Start + region.CdnaStart; + } + + /// + /// Assuming at least one cDNA coordinate overlaps with an exon, the covered cDNA coordinates represent + /// the coordinates actually covered by the variant. + /// + public static (int Start, int End) GetCoveredCdnaPositions(this ITranscriptRegion[] regions, int cdnaStart, int startRegionIndex, + int cdnaEnd, int endRegionIndex, bool onReverseStrand) + { + // exon case + if (cdnaStart != -1 && cdnaEnd != -1) return (cdnaStart, cdnaEnd); + + if (onReverseStrand) Swap.Int(ref startRegionIndex, ref endRegionIndex); + + var startRegion = regions.GetCoveredRegion(startRegionIndex); + var endRegion = regions.GetCoveredRegion(endRegionIndex); + + if (startRegion.Type != TranscriptRegionType.Exon && endRegion.Type != TranscriptRegionType.Exon) + return (-1, -1); + + int codingEnd = onReverseStrand ? regions[0].CdnaEnd : regions[regions.Length - 1].CdnaEnd; + + cdnaStart = GetCoveredCdnaPosition(true, cdnaStart, startRegion, startRegionIndex, onReverseStrand, codingEnd); + cdnaEnd = GetCoveredCdnaPosition(false, cdnaEnd, endRegion, endRegionIndex, onReverseStrand, codingEnd); + + return cdnaStart < cdnaEnd ? (cdnaStart, cdnaEnd) : (cdnaEnd, cdnaStart); + } + + private static ITranscriptRegion GetCoveredRegion(this ITranscriptRegion[] regions, int regionIndex) + { + if (regionIndex == -1) return regions[0]; + return regionIndex == ~regions.Length ? regions[regions.Length - 1] : regions[regionIndex]; + } + + private static int GetCoveredCdnaPosition(bool isStart, int cdnaPosition, ITranscriptRegion region, int regionIndex, bool onReverseStrand, int codingEnd) + { + if (cdnaPosition >= 0) return cdnaPosition; + + // start before transcript + if (regionIndex == -1) return onReverseStrand ? codingEnd : 1; + + // end after transcript + if (regionIndex < -1) return onReverseStrand ? 1 : codingEnd; + + // intron + return isStart ? region.CdnaEnd : region.CdnaStart; + } + + public static (int CdsStart, int CdsEnd, int ProteinStart, int ProteinEnd) GetCoveredCdsAndProteinPositions( + int coveredCdnaStart, int coveredCdnaEnd, byte startExonPhase, ICodingRegion codingRegion) + { + if (codingRegion == null || + coveredCdnaEnd < codingRegion.CdnaStart || + coveredCdnaStart > codingRegion.CdnaEnd || + coveredCdnaStart == -1 && coveredCdnaEnd == -1) return (-1, -1, -1, -1); + + if (coveredCdnaStart < codingRegion.CdnaStart) coveredCdnaStart = codingRegion.CdnaStart; + if (coveredCdnaEnd > codingRegion.CdnaEnd) coveredCdnaEnd = codingRegion.CdnaEnd; + + int offset = startExonPhase - codingRegion.CdnaStart + 1; + int start = coveredCdnaStart + offset; + int end = coveredCdnaEnd + offset; + + return (start, end, GetProteinPosition(start), GetProteinPosition(end)); + } + + public static int GetProteinPosition(int cdsPosition) + { + if (cdsPosition == -1) return -1; + return (cdsPosition + 2) / 3; + } + + public static (int CdsStart, int CdsEnd) GetCdsPositions(ICodingRegion codingRegion, int cdnaStart, + int cdnaEnd, byte startExonPhase, bool isInsertion) + { + int cdsStart = GetCdsPosition(codingRegion, cdnaStart, startExonPhase); + int cdsEnd = GetCdsPosition(codingRegion, cdnaEnd, startExonPhase); + + // silence CDS for insertions that occur just after the coding region + if (isInsertion && codingRegion != null && (cdnaEnd == codingRegion.CdnaEnd || cdnaStart == codingRegion.CdnaStart)) + { + cdsStart = -1; + cdsEnd = -1; + } + + return (cdsStart, cdsEnd); + } + + private static int GetCdsPosition(ICodingRegion codingRegion, int cdnaPosition, byte startExonPhase) + { + if (codingRegion == null || cdnaPosition < codingRegion.CdnaStart || + cdnaPosition > codingRegion.CdnaEnd) return -1; + return cdnaPosition - codingRegion.CdnaStart + startExonPhase + 1; + } + + // this is used to get CDS coordinates past the last CDS position + public static int GetExtendedCdsPosition(int cdnaStart, int cdnaPosition, byte startExonPhase) + { + if (cdnaPosition < cdnaStart) return -1; + return cdnaPosition - cdnaStart + startExonPhase + 1; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/StringExtensions.cs b/VariantAnnotation/AnnotatedPositions/Transcript/StringExtensions.cs index b8984a29..a93f69ca 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/StringExtensions.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/StringExtensions.cs @@ -1,33 +1,33 @@ -using System; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - public static class StringExtensions - { - - public static int CommonPrefixLength(this string a, string b) - { - if (a == null || b == null) return 0; - - var maxPrefixLength = Math.Min(a.Length, b.Length); - - var prefixLength = 0; - while (prefixLength < maxPrefixLength && a[prefixLength] == b[prefixLength]) prefixLength++; - - return prefixLength; - } - - public static int CommonSuffixLength(this string a, string b) - { - if (a == null || b == null) return 0; - - var maxSuffixLength = Math.Min(a.Length, b.Length); - - var suffixLength = 0; - while (suffixLength < maxSuffixLength && - a[a.Length - suffixLength - 1] == b[b.Length - suffixLength - 1]) suffixLength++; - - return suffixLength; - } - } +using System; + +namespace VariantAnnotation.AnnotatedPositions.Transcript +{ + public static class StringExtensions + { + + public static int CommonPrefixLength(this string a, string b) + { + if (a == null || b == null) return 0; + + var maxPrefixLength = Math.Min(a.Length, b.Length); + + var prefixLength = 0; + while (prefixLength < maxPrefixLength && a[prefixLength] == b[prefixLength]) prefixLength++; + + return prefixLength; + } + + public static int CommonSuffixLength(this string a, string b) + { + if (a == null || b == null) return 0; + + var maxSuffixLength = Math.Min(a.Length, b.Length); + + var suffixLength = 0; + while (suffixLength < maxSuffixLength && + a[a.Length - suffixLength - 1] == b[b.Length - suffixLength - 1]) suffixLength++; + + return suffixLength; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/TranscriptPositionalEffect.cs b/VariantAnnotation/AnnotatedPositions/Transcript/TranscriptPositionalEffect.cs index 7d1c78ae..a0339431 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/TranscriptPositionalEffect.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/TranscriptPositionalEffect.cs @@ -1,165 +1,165 @@ -using System.Linq; -using Intervals; -using VariantAnnotation.Interface.AnnotatedPositions; -using Variants; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - public sealed class TranscriptPositionalEffect - { - public bool IsEndSpliceSite; - public bool IsStartSpliceSite; - public bool IsWithinFrameshiftIntron; - public bool IsWithinIntron; - public bool IsWithinSpliceSiteRegion; - - public bool HasExonOverlap; - public bool AfterCoding; - public bool BeforeCoding; - public bool WithinCdna; - public bool WithinCds; - public bool HasFrameShift; - public bool IsCoding; - - public bool OverlapWithMicroRna; - - public void DetermineIntronicEffect(ITranscriptRegion[] regions, IInterval variant, VariantType variantType) - { - if (regions == null) return; - - var isInsertion = variantType == VariantType.insertion; - - foreach (var region in regions) - { - if (region.Type != TranscriptRegionType.Intron) continue; - - // skip this one if variant is out of range : the range is set to 3 instead of the original old: - // all of the checking occured in the region between start-3 to end+3, if we set to 8, we can made mistakes when - // checking IsWithinIntron when we have a small exon - if (!variant.Overlaps(region.Start - 3, region.End + 3)) continue; - - // under various circumstances the genebuild process can introduce artificial - // short (<= 12 nucleotide) introns into transcripts (e.g. to deal with errors - // in the reference sequence etc.), we don't want to categorize variations that - // fall in these introns as intronic, or as any kind of splice variant - - var isFrameshiftIntron = region.End - region.Start <= 12; - - if (isFrameshiftIntron && variant.Overlaps(region.Start, region.End)) - { - IsWithinFrameshiftIntron = true; - continue; - } - - CheckSpliceSiteOverlap(variant, region); - CheckIntronOverlap(variant, isInsertion, region); - - // the definition of splice_region (SO:0001630) is "within 1-3 bases of the - // exon or 3-8 bases of the intron." We also need to special case insertions - // between the edge of an exon and a donor or acceptor site and between a donor - // or acceptor site and the intron - IsWithinSpliceSiteRegion = variant.Overlaps(region.Start + 2, region.Start + 7) || - variant.Overlaps(region.End - 7, region.End - 2) || - variant.Overlaps(region.Start - 3, region.Start - 1) || - variant.Overlaps(region.End + 1, region.End + 3) || - isInsertion && - (variant.Start == region.Start || - variant.End == region.End || - variant.Start == region.Start + 2 || - variant.End == region.End - 2); - } - } - - private void CheckSpliceSiteOverlap(IInterval variant, ITranscriptRegion region) - { - if (variant.Overlaps(region.Start, region.Start + 1)) - { - IsStartSpliceSite = true; - } - - if (variant.Overlaps(region.End - 1, region.End)) - { - IsEndSpliceSite = true; - } - } - - private void CheckIntronOverlap(IInterval variant, bool isInsertion, ITranscriptRegion region) - { - // we need to special case insertions between the donor and acceptor sites - // make sure the size of intron is larger than 4 - if (region.Start <= region.End - 4 && (variant.Overlaps(region.Start + 2, region.End - 2) || - isInsertion && - (variant.Start == region.Start + 2 || - variant.End == region.End - 2))) - { - IsWithinIntron = true; - } - } - - public void DetermineExonicEffect(ITranscript transcript, IInterval variant, IMappedPosition position, - int coveredCdnaStart, int coveredCdnaEnd, int coveredCdsStart, int coveredCdsEnd, string altAllele, - bool startCodonInsertionWithNoImpact) - { - HasExonOverlap = position.ExonStart != -1 || position.ExonEnd != -1; - - if (transcript.Translation != null) - { - var codingRegion = transcript.Translation.CodingRegion; - AfterCoding = IsAfterCoding(variant.Start, variant.End, transcript.End, codingRegion.End); - BeforeCoding = IsBeforeCoding(variant.Start, variant.End, transcript.Start, codingRegion.Start); - WithinCds = IsWithinCds(coveredCdsStart, coveredCdsEnd, codingRegion, variant); - IsCoding = !startCodonInsertionWithNoImpact && (position.CdsStart != -1 || position.CdsEnd != -1); - } - - WithinCdna = IsWithinCdna(coveredCdnaStart, coveredCdnaEnd, transcript.TotalExonLength); - - if (coveredCdsStart != -1 && coveredCdsEnd != -1) - { - var varLen = coveredCdsEnd - coveredCdsStart + 1; - var alleleLen = altAllele?.Length ?? 0; - HasFrameShift = position.CdsStart != -1 && position.CdsEnd != -1 && !Codons.IsTriplet(alleleLen - varLen); - } - - OverlapWithMicroRna = IsMatureMirnaVariant(position.CdnaStart, position.CdnaEnd, transcript.MicroRnas, - transcript.BioType == BioType.miRNA); - } - - internal static bool IsMatureMirnaVariant(int cdnaStart, int cdnaEnd, IInterval[] microRnas, bool isMiRna) - { - if (microRnas == null) return false; - if (!isMiRna || cdnaStart == -1 || cdnaEnd == -1) return false; - return microRnas.Any(microRna => microRna.Overlaps(cdnaStart, cdnaEnd)); - } - - internal static bool IsAfterCoding(int variantRefBegin, int variantRefEnd, int transcriptEnd, int codingRegionEnd) - { - // special case to handle insertions after the CDS end - if (variantRefBegin == variantRefEnd + 1 && variantRefEnd == codingRegionEnd) - { - return true; - } - - var result = Intervals.Utilities.Overlaps(variantRefBegin, variantRefEnd, codingRegionEnd + 1, transcriptEnd); - - return result; - } - - internal static bool IsBeforeCoding(int variantRefBegin, int variantRefEnd, int transcriptStart, int codingRegionStart) - { - // special case to handle insertions before the CDS start - if (variantRefBegin == variantRefEnd + 1 && variantRefBegin == codingRegionStart) return true; - - bool result = Intervals.Utilities.Overlaps(variantRefBegin, variantRefEnd, transcriptStart, codingRegionStart - 1); - return result; - } - - internal static bool IsWithinCdna(int coveredCdnaStart, int coveredCdnaEnd, int totalExonLen) => - coveredCdnaStart > 0 && coveredCdnaEnd <= totalExonLen; - - internal bool IsWithinCds(int coveredCdsBegin, int coveredCdsEnd, IInterval codingRegion, IInterval variant) - { - if (IsWithinFrameshiftIntron) return variant.Overlaps(codingRegion); - return coveredCdsBegin != -1 && coveredCdsEnd != -1; - } - } +using System.Linq; +using Intervals; +using VariantAnnotation.Interface.AnnotatedPositions; +using Variants; + +namespace VariantAnnotation.AnnotatedPositions.Transcript +{ + public sealed class TranscriptPositionalEffect + { + public bool IsEndSpliceSite; + public bool IsStartSpliceSite; + public bool IsWithinFrameshiftIntron; + public bool IsWithinIntron; + public bool IsWithinSpliceSiteRegion; + + public bool HasExonOverlap; + public bool AfterCoding; + public bool BeforeCoding; + public bool WithinCdna; + public bool WithinCds; + public bool HasFrameShift; + public bool IsCoding; + + public bool OverlapWithMicroRna; + + public void DetermineIntronicEffect(ITranscriptRegion[] regions, IInterval variant, VariantType variantType) + { + if (regions == null) return; + + var isInsertion = variantType == VariantType.insertion; + + foreach (var region in regions) + { + if (region.Type != TranscriptRegionType.Intron) continue; + + // skip this one if variant is out of range : the range is set to 3 instead of the original old: + // all of the checking occured in the region between start-3 to end+3, if we set to 8, we can made mistakes when + // checking IsWithinIntron when we have a small exon + if (!variant.Overlaps(region.Start - 3, region.End + 3)) continue; + + // under various circumstances the genebuild process can introduce artificial + // short (<= 12 nucleotide) introns into transcripts (e.g. to deal with errors + // in the reference sequence etc.), we don't want to categorize variations that + // fall in these introns as intronic, or as any kind of splice variant + + var isFrameshiftIntron = region.End - region.Start <= 12; + + if (isFrameshiftIntron && variant.Overlaps(region.Start, region.End)) + { + IsWithinFrameshiftIntron = true; + continue; + } + + CheckSpliceSiteOverlap(variant, region); + CheckIntronOverlap(variant, isInsertion, region); + + // the definition of splice_region (SO:0001630) is "within 1-3 bases of the + // exon or 3-8 bases of the intron." We also need to special case insertions + // between the edge of an exon and a donor or acceptor site and between a donor + // or acceptor site and the intron + IsWithinSpliceSiteRegion = variant.Overlaps(region.Start + 2, region.Start + 7) || + variant.Overlaps(region.End - 7, region.End - 2) || + variant.Overlaps(region.Start - 3, region.Start - 1) || + variant.Overlaps(region.End + 1, region.End + 3) || + isInsertion && + (variant.Start == region.Start || + variant.End == region.End || + variant.Start == region.Start + 2 || + variant.End == region.End - 2); + } + } + + private void CheckSpliceSiteOverlap(IInterval variant, ITranscriptRegion region) + { + if (variant.Overlaps(region.Start, region.Start + 1)) + { + IsStartSpliceSite = true; + } + + if (variant.Overlaps(region.End - 1, region.End)) + { + IsEndSpliceSite = true; + } + } + + private void CheckIntronOverlap(IInterval variant, bool isInsertion, ITranscriptRegion region) + { + // we need to special case insertions between the donor and acceptor sites + // make sure the size of intron is larger than 4 + if (region.Start <= region.End - 4 && (variant.Overlaps(region.Start + 2, region.End - 2) || + isInsertion && + (variant.Start == region.Start + 2 || + variant.End == region.End - 2))) + { + IsWithinIntron = true; + } + } + + public void DetermineExonicEffect(ITranscript transcript, IInterval variant, IMappedPosition position, + int coveredCdnaStart, int coveredCdnaEnd, int coveredCdsStart, int coveredCdsEnd, string altAllele, + bool startCodonInsertionWithNoImpact) + { + HasExonOverlap = position.ExonStart != -1 || position.ExonEnd != -1; + + if (transcript.Translation != null) + { + var codingRegion = transcript.Translation.CodingRegion; + AfterCoding = IsAfterCoding(variant.Start, variant.End, transcript.End, codingRegion.End); + BeforeCoding = IsBeforeCoding(variant.Start, variant.End, transcript.Start, codingRegion.Start); + WithinCds = IsWithinCds(coveredCdsStart, coveredCdsEnd, codingRegion, variant); + IsCoding = !startCodonInsertionWithNoImpact && (position.CdsStart != -1 || position.CdsEnd != -1); + } + + WithinCdna = IsWithinCdna(coveredCdnaStart, coveredCdnaEnd, transcript.TotalExonLength); + + if (coveredCdsStart != -1 && coveredCdsEnd != -1) + { + var varLen = coveredCdsEnd - coveredCdsStart + 1; + var alleleLen = altAllele?.Length ?? 0; + HasFrameShift = position.CdsStart != -1 && position.CdsEnd != -1 && !Codons.IsTriplet(alleleLen - varLen); + } + + OverlapWithMicroRna = IsMatureMirnaVariant(position.CdnaStart, position.CdnaEnd, transcript.MicroRnas, + transcript.BioType == BioType.miRNA); + } + + internal static bool IsMatureMirnaVariant(int cdnaStart, int cdnaEnd, IInterval[] microRnas, bool isMiRna) + { + if (microRnas == null) return false; + if (!isMiRna || cdnaStart == -1 || cdnaEnd == -1) return false; + return microRnas.Any(microRna => microRna.Overlaps(cdnaStart, cdnaEnd)); + } + + internal static bool IsAfterCoding(int variantRefBegin, int variantRefEnd, int transcriptEnd, int codingRegionEnd) + { + // special case to handle insertions after the CDS end + if (variantRefBegin == variantRefEnd + 1 && variantRefEnd == codingRegionEnd) + { + return true; + } + + var result = Intervals.Utilities.Overlaps(variantRefBegin, variantRefEnd, codingRegionEnd + 1, transcriptEnd); + + return result; + } + + internal static bool IsBeforeCoding(int variantRefBegin, int variantRefEnd, int transcriptStart, int codingRegionStart) + { + // special case to handle insertions before the CDS start + if (variantRefBegin == variantRefEnd + 1 && variantRefBegin == codingRegionStart) return true; + + bool result = Intervals.Utilities.Overlaps(variantRefBegin, variantRefEnd, transcriptStart, codingRegionStart - 1); + return result; + } + + internal static bool IsWithinCdna(int coveredCdnaStart, int coveredCdnaEnd, int totalExonLen) => + coveredCdnaStart > 0 && coveredCdnaEnd <= totalExonLen; + + internal bool IsWithinCds(int coveredCdsBegin, int coveredCdsEnd, IInterval codingRegion, IInterval variant) + { + if (IsWithinFrameshiftIntron) return variant.Overlaps(codingRegion); + return coveredCdsBegin != -1 && coveredCdsEnd != -1; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/TranscriptUtilities.cs b/VariantAnnotation/AnnotatedPositions/Transcript/TranscriptUtilities.cs index 1e9b6be0..9f337d48 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/TranscriptUtilities.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/TranscriptUtilities.cs @@ -1,87 +1,85 @@ -using System; -using System.Net.Mime; -using Genome; -using OptimizedCore; -using VariantAnnotation.Caches.DataStructures; -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - public static class TranscriptUtilities - { - /// - /// returns the alternate CDS given the reference sequence, the cds coordinates, and the alternate allele. - /// - public static string GetAlternateCds(ISequence refSequence, int cdsBegin, int cdsEnd, string alternateAllele, - ITranscriptRegion[] regions, bool onReverseStrand, byte startExonPhase, int cdnaCodingStart) - { - var splicedSeq = GetSplicedSequence(refSequence, regions, onReverseStrand); - int numPaddedBases = startExonPhase; - - int shift = cdnaCodingStart - 1; - int upstreamLength = GetUpstreamLength(shift, cdsBegin - numPaddedBases - 1, splicedSeq.Length); - int downstreamStart = cdsEnd - numPaddedBases + shift; - - string upstreamSeq = splicedSeq.Substring(shift, upstreamLength); - string downstreamSeq = downstreamStart < splicedSeq.Length ? splicedSeq.Substring(downstreamStart) : ""; - - if (alternateAllele == null) alternateAllele = string.Empty; - var paddedBases = numPaddedBases > 0 ? new string('N', numPaddedBases) : ""; - - return paddedBases + upstreamSeq + alternateAllele + downstreamSeq; - } - - private static int GetUpstreamLength(int start, int length, int seqLength) - { - int desiredLength = start + length; - int maxLength = seqLength - start; - return desiredLength <= seqLength ? length : maxLength; - } - - /// - /// Retrieves all Exon sequences and concats them together. - /// This includes 5' UTR + cDNA + 3' UTR [Transcript.pm:862 spliced_seq] - /// - private static string GetSplicedSequence(ISequence refSequence, ITranscriptRegion[] regions, bool onReverseStrand) - { - var sb = StringBuilderCache.Acquire(); - - foreach (var region in regions) - { - if (region.Type != TranscriptRegionType.Exon) continue; - var exonLength = region.End - region.Start + 1; - - // sanity check: handle the situation where no reference has been provided - if (refSequence == null) - { - sb.Append(new string('N', exonLength)); - continue; - } - - sb.Append(refSequence.Substring(region.Start - 1, exonLength)); - } - - var results = StringBuilderCache.GetStringAndRelease(sb); - return onReverseStrand ? SequenceUtilities.GetReverseComplement(results) : results; - } - - public static void PrintTranscriptDetails(int start, int end, ICodingRegion codingRegion, - ITranscriptRegion[] transcriptRegions, IRnaEdit[] rnaEdits, byte startExonPhase) - { - Console.WriteLine($"genomic region: {start}-{end}"); - Console.WriteLine($"Start exon phase:{startExonPhase}"); - Console.WriteLine($"new CodingRegion({codingRegion.Start}, {codingRegion.End}, {codingRegion.CdnaStart}, {codingRegion.CdnaEnd}, {codingRegion.Length})"); - foreach (var transcriptRegion in transcriptRegions) - { - Console.WriteLine($"new TranscriptRegion(TranscriptRegionType.{transcriptRegion.Type}, {transcriptRegion.Id}, {transcriptRegion.Start}, {transcriptRegion.End}, " + - $"{transcriptRegion.CdnaStart}, {transcriptRegion.CdnaEnd}),"); - } - - if(rnaEdits!=null) - foreach (var rnaEdit in rnaEdits) - { - Console.WriteLine($"new RnaEdit({rnaEdit.Start}, {rnaEdit.End}, \"{rnaEdit.Bases}\"),"); - } - } - } +using System; +using Genome; +using OptimizedCore; +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.AnnotatedPositions.Transcript +{ + public static class TranscriptUtilities + { + /// + /// returns the alternate CDS given the reference sequence, the cds coordinates, and the alternate allele. + /// + public static string GetAlternateCds(ISequence refSequence, int cdsBegin, int cdsEnd, string alternateAllele, + ITranscriptRegion[] regions, bool onReverseStrand, byte startExonPhase, int cdnaCodingStart) + { + var splicedSeq = GetSplicedSequence(refSequence, regions, onReverseStrand); + int numPaddedBases = startExonPhase; + + int shift = cdnaCodingStart - 1; + int upstreamLength = GetUpstreamLength(shift, cdsBegin - numPaddedBases - 1, splicedSeq.Length); + int downstreamStart = cdsEnd - numPaddedBases + shift; + + string upstreamSeq = splicedSeq.Substring(shift, upstreamLength); + string downstreamSeq = downstreamStart < splicedSeq.Length ? splicedSeq.Substring(downstreamStart) : ""; + + if (alternateAllele == null) alternateAllele = string.Empty; + var paddedBases = numPaddedBases > 0 ? new string('N', numPaddedBases) : ""; + + return paddedBases + upstreamSeq + alternateAllele + downstreamSeq; + } + + private static int GetUpstreamLength(int start, int length, int seqLength) + { + int desiredLength = start + length; + int maxLength = seqLength - start; + return desiredLength <= seqLength ? length : maxLength; + } + + /// + /// Retrieves all Exon sequences and concats them together. + /// This includes 5' UTR + cDNA + 3' UTR [Transcript.pm:862 spliced_seq] + /// + private static string GetSplicedSequence(ISequence refSequence, ITranscriptRegion[] regions, bool onReverseStrand) + { + var sb = StringBuilderCache.Acquire(); + + foreach (var region in regions) + { + if (region.Type != TranscriptRegionType.Exon) continue; + var exonLength = region.End - region.Start + 1; + + // sanity check: handle the situation where no reference has been provided + if (refSequence == null) + { + sb.Append(new string('N', exonLength)); + continue; + } + + sb.Append(refSequence.Substring(region.Start - 1, exonLength)); + } + + var results = StringBuilderCache.GetStringAndRelease(sb); + return onReverseStrand ? SequenceUtilities.GetReverseComplement(results) : results; + } + + public static void PrintTranscriptDetails(int start, int end, ICodingRegion codingRegion, + ITranscriptRegion[] transcriptRegions, IRnaEdit[] rnaEdits, byte startExonPhase) + { + Console.WriteLine($"genomic region: {start}-{end}"); + Console.WriteLine($"Start exon phase:{startExonPhase}"); + Console.WriteLine($"new CodingRegion({codingRegion.Start}, {codingRegion.End}, {codingRegion.CdnaStart}, {codingRegion.CdnaEnd}, {codingRegion.Length})"); + foreach (var transcriptRegion in transcriptRegions) + { + Console.WriteLine($"new TranscriptRegion(TranscriptRegionType.{transcriptRegion.Type}, {transcriptRegion.Id}, {transcriptRegion.Start}, {transcriptRegion.End}, " + + $"{transcriptRegion.CdnaStart}, {transcriptRegion.CdnaEnd}),"); + } + + if(rnaEdits!=null) + foreach (var rnaEdit in rnaEdits) + { + Console.WriteLine($"new RnaEdit({rnaEdit.Start}, {rnaEdit.End}, \"{rnaEdit.Bases}\"),"); + } + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/VariantEffect.cs b/VariantAnnotation/AnnotatedPositions/Transcript/VariantEffect.cs index 607812f8..bcad4118 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/VariantEffect.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/VariantEffect.cs @@ -1,585 +1,586 @@ -using System; -using OptimizedCore; -using VariantAnnotation.Interface.AnnotatedPositions; -using Variants; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - /// - /// This class performs all of the functional consequence testing. An additional caching layer - /// has been added to prevent unneeded calculations. The caching layer is reset when each new - /// variant has been read. - /// - public sealed class VariantEffect : IVariantEffect - { - private readonly TranscriptPositionalEffect _preCache; - - private readonly ITranscript _transcript; - private readonly ISimpleVariant _variant; - - private readonly VariantEffectCache _cache; - - private readonly string _referenceAminoAcids; - private readonly string _alternateAminoAcids; - - private readonly int _referenceAminoAcidsLen; - private readonly int _alternateAminoAcidsLen; - - private readonly string _coveredReferenceAminoAcids; - private readonly string _coveredAlternateAminoAcids; - - private readonly string _referenceCodons; - private readonly string _alternateCodons; - - private readonly int _referenceCodonsLen; - private readonly int _alternateCodonsLen; - - private readonly bool _isInsertion; - private readonly bool _isDeletion; - - private readonly int _proteinBegin; - - public VariantEffect(TranscriptPositionalEffect transcriptEffect, ISimpleVariant variant, ITranscript transcript, - string referenAminoAcids, string alternateAminoAcids, string referenceCodons, string alternateCodons, - int? proteinBegin, string coveredReferenceAminoAcids, string coveredAlternateAminoAcids, VariantEffectCache cache = null) - { - _transcript = transcript; - _variant = variant; - - _preCache = transcriptEffect; - - _cache = cache ?? new VariantEffectCache(); - - _referenceAminoAcids = referenAminoAcids; - _alternateAminoAcids = alternateAminoAcids; - _referenceAminoAcidsLen = _referenceAminoAcids?.Length ?? 0; - _alternateAminoAcidsLen = _alternateAminoAcids?.Length ?? 0; - - _coveredReferenceAminoAcids = coveredReferenceAminoAcids; - _coveredAlternateAminoAcids = coveredAlternateAminoAcids; - - _referenceCodons = referenceCodons; - _alternateCodons = alternateCodons; - _referenceCodonsLen = _referenceCodons?.Length ?? 0; - _alternateCodonsLen = _alternateCodons?.Length ?? 0; - - _isInsertion = variant.AltAllele.Length > variant.RefAllele.Length; - _isDeletion = variant.AltAllele.Length < variant.RefAllele.Length; - - _proteinBegin = proteinBegin ?? -1; - } - - /// - /// returns true if the variant is a splice acceptor variant [VariationEffect.pm:404 acceptor_splice_site] - /// - public bool IsSpliceAcceptorVariant() - { - const ConsequenceTag ct = ConsequenceTag.splice_acceptor_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - bool result = _transcript.Gene.OnReverseStrand ? _preCache.IsStartSpliceSite : _preCache.IsEndSpliceSite; - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant is a splice donor variant [VariationEffect.pm:459 donor_splice_site] - /// - public bool IsSpliceDonorVariant() - { - const ConsequenceTag ct = ConsequenceTag.splice_donor_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - bool result = _transcript.Gene.OnReverseStrand ? _preCache.IsEndSpliceSite : _preCache.IsStartSpliceSite; - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant is a 5' UTR variant (VariationEffect.pm:595 within_5_prime_utr) - /// - public bool IsFivePrimeUtrVariant() - { - const ConsequenceTag ct = ConsequenceTag.five_prime_UTR_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - bool result = false; - - if (_transcript.Translation != null) - { - - var isFivePrimeOfCoding = _transcript.Gene.OnReverseStrand - ? _preCache.AfterCoding - : _preCache.BeforeCoding; - - result = isFivePrimeOfCoding && _preCache.WithinCdna; - } - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant is a frameshift variant [VariantEffect.pm:940 frameshift] - /// - public bool IsFrameshiftVariant() - { - const ConsequenceTag ct = ConsequenceTag.frameshift_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - // check the predicates - if (!_preCache.IsCoding) - { - _cache.Add(ct, false); - return false; - } - - if (IsIncompleteTerminalCodonVariant()) - { - _cache.Add(ct, false); - return false; - } - - bool result = _preCache.HasFrameShift && !IsStopRetained() && !IsTruncatedByStop(); - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if we have an incomplete terminal codon variant. [VariantEffect.pm:983 partial_codon] - /// - public bool IsIncompleteTerminalCodonVariant() - { - const ConsequenceTag ct = ConsequenceTag.incomplete_terminal_codon_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - if (_transcript.Translation == null) - { - _cache.Add(ct, false); - return false; - } - - int cdsLength = _transcript.Translation.CodingRegion.Length; - int codonCdsStart = _proteinBegin * 3 - 2; - int lastCodonLength = cdsLength - (codonCdsStart - 1); - - bool result = lastCodonLength < 3 && lastCodonLength > 0; - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant is an inframe deletion [VariantEffect.pm:825 inframe_deletion] - /// - public bool IsInframeDeletion() - { - const ConsequenceTag ct = ConsequenceTag.inframe_deletion; - if (_cache.Contains(ct)) return _cache.Get(ct); - - // check the predicates - if (!_preCache.IsCoding || !_isDeletion) - { - _cache.Add(ct, false); - return false; - } - - if (_referenceCodonsLen == 0 //|| (PreCache.ReferenceCodonLen < PreCache.AlternateCodonLen) - || IsFrameshiftVariant() - || IsIncompleteTerminalCodonVariant() - || IsStopGained()) - { - _cache.Add(ct, false); - return false; - } - - // simple string match - var referenceCodon = _referenceCodons.ToLower(); - var alternateCodon = _alternateCodons.ToLower(); - - if (referenceCodon.StartsWith(alternateCodon) || referenceCodon.EndsWith(alternateCodon)) - { - _cache.Add(ct, true); - return true; - } - - // try a more complex string match - var commonPrefixLength = _referenceCodons.CommonPrefixLength(_alternateCodons); - var commonSuffixLength = _referenceCodons.CommonSuffixLength(_alternateCodons); - - bool result = _alternateCodonsLen - commonPrefixLength - commonSuffixLength == 0; - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant is an inframe insertion [VariantEffect.pm:780 inframe_insertion] - /// - public bool IsInframeInsertion() - { - const ConsequenceTag ct = ConsequenceTag.inframe_insertion; - if (_cache.Contains(ct)) return _cache.Get(ct); - - // check the predicates - if (!_preCache.IsCoding || !_isInsertion) - { - _cache.Add(ct, false); - return false; - } - - if (IsStopRetained() || - IsFrameshiftVariant() || - IsStartLost() || - _alternateCodonsLen <= _referenceCodonsLen || - IsIncompleteTerminalCodonVariant()) - { - _cache.Add(ct, false); - return false; - } - - bool result = !IsTruncatedByStop(); - - _cache.Add(ct, result); - return result; - } - - private bool IsTruncatedByStop() - { - if (_alternateAminoAcids != null && _alternateAminoAcids.Contains(AminoAcids.StopCodon)) - { - var stopPos = _alternateAminoAcids.IndexOf(AminoAcids.StopCodon, StringComparison.Ordinal); - var altAminoAcidesBeforeStop = _alternateAminoAcids.Substring(0, stopPos); - if (_alternateAminoAcids.OptimizedStartsWith(AminoAcids.StopCodonChar) || - _referenceAminoAcids.StartsWith(altAminoAcidesBeforeStop)) - return true; - } - return false; - } - - /// - /// returns true if at least one base of the first codon was changed in the transcript [VariantEffect.pm:722 affects_start_codon] - /// - public bool IsStartLost() - { - const ConsequenceTag ct = ConsequenceTag.start_lost; - if (_cache.Contains(ct)) return _cache.Get(ct); - - // check the predicates - if (!_preCache.IsCoding) - { - _cache.Add(ct, false); - return false; - } - - if (_proteinBegin != 1 || _referenceAminoAcidsLen == 0) - { - _cache.Add(ct, false); - return false; - } - - // insertion in start codon and do not change start codon - if (_isInsertion && _proteinBegin == 1 && _alternateAminoAcids.EndsWith(_referenceAminoAcids)) - { - _cache.Add(ct, false); - return false; - } - - bool result = _alternateAminoAcidsLen == 0 || _alternateAminoAcids[0] != _referenceAminoAcids[0]; - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant is a missense variant [VariantEffect.pm:682 missense_variant] - /// - public bool IsMissenseVariant() - { - const ConsequenceTag ct = ConsequenceTag.missense_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - // check the predicates - if (!_preCache.IsCoding) - { - _cache.Add(ct, false); - return false; - } - - if (IsStartLost() || - IsStopLost() || - IsStopGained() || - IsIncompleteTerminalCodonVariant() || - IsFrameshiftVariant() || - IsInframeDeletion() || - IsInframeInsertion()) - { - _cache.Add(ct, false); - return false; - } - - bool result = _referenceAminoAcids != _alternateAminoAcids && - _referenceAminoAcidsLen == _alternateAminoAcidsLen; - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant is a non-coding transcript exon variant [VariationEffect.pm:405 non_coding_exon_variant] - /// - public bool IsNonCodingTranscriptExonVariant() - { - const ConsequenceTag ct = ConsequenceTag.non_coding_transcript_exon_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - bool result = _preCache.HasExonOverlap && _transcript.Translation == null && !_preCache.OverlapWithMicroRna; - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant is a nonsense-mediated decay transcript variant [VariationEffect.pm:391 within_nmd_transcript] - /// - public bool IsNonsenseMediatedDecayTranscriptVariant() - { - const ConsequenceTag ct = ConsequenceTag.NMD_transcript_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - var result = _transcript.BioType == BioType.nonsense_mediated_decay; - _cache.Add(ct, result); - return result; - } - - - /// - /// returns true if the variant is a protein altering variant [VariationEffect.pm:300 protein_altering_variant] - /// - public bool IsProteinAlteringVariant() - { - const ConsequenceTag ct = ConsequenceTag.protein_altering_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - var result = true; - - var sameLen = _referenceAminoAcidsLen == _alternateAminoAcidsLen; - var startsWithTer = _referenceAminoAcids.OptimizedStartsWith('X') || _alternateAminoAcids.OptimizedStartsWith('X'); - - var isInframeDeletion = IsInframeDeletion(); - // Note: sequence ontology says that stop retained should not be here (http://www.sequenceontology.org/browser/current_svn/term/SO:0001567) - var isStopCodonVarinat = IsStopLost() || IsStopGained(); - - if (sameLen || startsWithTer || isInframeDeletion || isStopCodonVarinat || - IsStartLost() || IsFrameshiftVariant() || IsInframeInsertion() || IsStopRetained() || !_preCache.IsCoding) - { - result = false; - } - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant is a splice region variant [VariationEffect.pm:483 splice_region] - /// - public bool IsSpliceRegionVariant() - { - const ConsequenceTag ct = ConsequenceTag.splice_region_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - bool result = false; - - if (IsSpliceDonorVariant() || IsSpliceAcceptorVariant()) - { - // false - } - else - { - result = _preCache.IsWithinSpliceSiteRegion; - } - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant's amino acid changes to a stop codon [VariationEffect.pm:884 stop_gained] - /// - public bool IsStopGained() - { - const ConsequenceTag ct = ConsequenceTag.stop_gained; - if (_cache.Contains(ct)) return _cache.Get(ct); - - bool result = !IsStopRetained() && - (string.IsNullOrEmpty(_referenceAminoAcids) || !_referenceAminoAcids.Contains(AminoAcids.StopCodon)) && - !string.IsNullOrEmpty(_alternateAminoAcids) && _alternateAminoAcids.Contains(AminoAcids.StopCodon); - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant is a stop lost variant [VariationEffect.pm:898 stop_lost] - /// - public bool IsStopLost() - { - const ConsequenceTag ct = ConsequenceTag.stop_lost; - if (_cache.Contains(ct)) return _cache.Get(ct); - - bool result = false; - if (!string.IsNullOrEmpty(_coveredReferenceAminoAcids) && _coveredAlternateAminoAcids != null) - result = _coveredReferenceAminoAcids.Contains(AminoAcids.StopCodon) && - !_coveredAlternateAminoAcids.Contains(AminoAcids.StopCodon); - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant is a stop retained variant [VariationEffect.pm:701 stop_lost] - /// - public bool IsStopRetained() - { - const ConsequenceTag ct = ConsequenceTag.stop_retained_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - var alternateAminoAcids = TrimPeptides(_alternateAminoAcids); - - bool result = !string.IsNullOrEmpty(_referenceAminoAcids) && alternateAminoAcids != null && - _referenceAminoAcids == alternateAminoAcids && - _referenceAminoAcids.Contains(AminoAcids.StopCodon) || - string.IsNullOrEmpty(_referenceAminoAcids) && alternateAminoAcids != null && - _proteinBegin == _transcript.Translation?.PeptideSeq.Length + 1 && - alternateAminoAcids == AminoAcids.StopCodon; - - _cache.Add(ct, result); - return result; - } - - public bool IsStartRetained() - { - const ConsequenceTag ct = ConsequenceTag.start_retained_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - if (_proteinBegin != 1 || string.IsNullOrEmpty(_referenceAminoAcids)) - { - _cache.Add(ct, false); - return false; - } - - var startProtein = _referenceAminoAcids[0].ToString(); - var alternateAminoAcids = TrimPeptides(_alternateAminoAcids); - - var result = alternateAminoAcids != null - && alternateAminoAcids.Contains(startProtein); - - _cache.Add(ct, result); - return result; - } - - private static string TrimPeptides(string alternateAminoAcids) - { - if (string.IsNullOrEmpty(alternateAminoAcids)) return null; - if (!alternateAminoAcids.Contains(AminoAcids.StopCodon)) return alternateAminoAcids; - var pos = alternateAminoAcids.IndexOf(AminoAcids.StopCodon, StringComparison.Ordinal); - return pos < 0 ? alternateAminoAcids : alternateAminoAcids.Substring(0, pos + 1); - } - - /// - /// returns true if the variant is a synonymous variant [VariationEffect.pm:755 synonymous_variant] - /// - public bool IsSynonymousVariant() - { - const ConsequenceTag ct = ConsequenceTag.synonymous_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - bool result = !string.IsNullOrEmpty(_referenceAminoAcids) && - (_variant.Type == VariantType.SNV || - _variant.Type == VariantType.MNV) && - _referenceAminoAcids == _alternateAminoAcids && !_referenceAminoAcids.Contains("X") && - !_alternateAminoAcids.Contains("X") && !IsStopRetained(); - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant is a 3' UTR variant [VariationEffect.pm:609 within_3_prime_utr] - /// - public bool IsThreePrimeUtrVariant() - { - const ConsequenceTag ct = ConsequenceTag.three_prime_UTR_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - bool result = false; - - if (_transcript.Translation != null) - { - var isThreePrimeOfCoding = _transcript.Gene.OnReverseStrand - ? _preCache.BeforeCoding - : _preCache.AfterCoding; - - result = isThreePrimeOfCoding && _preCache.WithinCdna; - } - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant is within a non-coding gene [VariationEffect.pm:398 within_non_coding_gene] - /// - public bool IsNonCodingTranscriptVariant() - { - const ConsequenceTag ct = ConsequenceTag.non_coding_transcript_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - // NOTE: Isn't IsWithinTranscript always true? and not within mature miRNA is always true - // For Ensembl transcript, miRNA may be a valid attribute. We have their location and we would like to check if the variant overlaps with the miRNA - var result = !_preCache.HasExonOverlap && _transcript.Translation == null && !_preCache.OverlapWithMicroRna; - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if it's a coding sequnce variant [VariationEffect.pm:998 coding_unknown] - /// - public bool IsCodingSequenceVariant() - { - const ConsequenceTag ct = ConsequenceTag.coding_sequence_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - bool result = _preCache.WithinCds && - (string.IsNullOrEmpty(_transcript.Translation.PeptideSeq) || - string.IsNullOrEmpty(_alternateAminoAcids) || _alternateAminoAcids.Contains("X")) - && !(IsFrameshiftVariant() || IsInframeDeletion() || IsIncompleteTerminalCodonVariant() || - IsProteinAlteringVariant() || IsStopGained() || IsStopRetained() || IsStopLost()); - - _cache.Add(ct, result); - return result; - } - - /// - /// returns true if the variant occurs within an intron [VariationEffect.pm:494 within_intron] - /// - public bool IsWithinIntron() => _preCache.IsWithinIntron; - - /// - /// returns true if the variant overlaps a mature MiRNA. [VariationEffect.pm:432 within_mature_miRNA] - /// - public bool IsMatureMirnaVariant() - { - const ConsequenceTag ct = ConsequenceTag.mature_miRNA_variant; - if (_cache.Contains(ct)) return _cache.Get(ct); - - bool result = _preCache.OverlapWithMicroRna; - - _cache.Add(ct, result); - return result; - } - } +using System; +using OptimizedCore; +using VariantAnnotation.AnnotatedPositions.AminoAcids; +using VariantAnnotation.Interface.AnnotatedPositions; +using Variants; + +namespace VariantAnnotation.AnnotatedPositions.Transcript +{ + /// + /// This class performs all of the functional consequence testing. An additional caching layer + /// has been added to prevent unneeded calculations. The caching layer is reset when each new + /// variant has been read. + /// + public sealed class VariantEffect : IVariantEffect + { + private readonly TranscriptPositionalEffect _preCache; + + private readonly ITranscript _transcript; + private readonly ISimpleVariant _variant; + + private readonly VariantEffectCache _cache; + + private readonly string _referenceAminoAcids; + private readonly string _alternateAminoAcids; + + private readonly int _referenceAminoAcidsLen; + private readonly int _alternateAminoAcidsLen; + + private readonly string _coveredReferenceAminoAcids; + private readonly string _coveredAlternateAminoAcids; + + private readonly string _referenceCodons; + private readonly string _alternateCodons; + + private readonly int _referenceCodonsLen; + private readonly int _alternateCodonsLen; + + private readonly bool _isInsertion; + private readonly bool _isDeletion; + + private readonly int _proteinBegin; + + public VariantEffect(TranscriptPositionalEffect transcriptEffect, ISimpleVariant variant, ITranscript transcript, + string referenAminoAcids, string alternateAminoAcids, string referenceCodons, string alternateCodons, + int? proteinBegin, string coveredReferenceAminoAcids, string coveredAlternateAminoAcids, VariantEffectCache cache = null) + { + _transcript = transcript; + _variant = variant; + + _preCache = transcriptEffect; + + _cache = cache ?? new VariantEffectCache(); + + _referenceAminoAcids = referenAminoAcids; + _alternateAminoAcids = alternateAminoAcids; + _referenceAminoAcidsLen = _referenceAminoAcids?.Length ?? 0; + _alternateAminoAcidsLen = _alternateAminoAcids?.Length ?? 0; + + _coveredReferenceAminoAcids = coveredReferenceAminoAcids; + _coveredAlternateAminoAcids = coveredAlternateAminoAcids; + + _referenceCodons = referenceCodons; + _alternateCodons = alternateCodons; + _referenceCodonsLen = _referenceCodons?.Length ?? 0; + _alternateCodonsLen = _alternateCodons?.Length ?? 0; + + _isInsertion = variant.AltAllele.Length > variant.RefAllele.Length; + _isDeletion = variant.AltAllele.Length < variant.RefAllele.Length; + + _proteinBegin = proteinBegin ?? -1; + } + + /// + /// returns true if the variant is a splice acceptor variant [VariationEffect.pm:404 acceptor_splice_site] + /// + public bool IsSpliceAcceptorVariant() + { + const ConsequenceTag ct = ConsequenceTag.splice_acceptor_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + bool result = _transcript.Gene.OnReverseStrand ? _preCache.IsStartSpliceSite : _preCache.IsEndSpliceSite; + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant is a splice donor variant [VariationEffect.pm:459 donor_splice_site] + /// + public bool IsSpliceDonorVariant() + { + const ConsequenceTag ct = ConsequenceTag.splice_donor_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + bool result = _transcript.Gene.OnReverseStrand ? _preCache.IsEndSpliceSite : _preCache.IsStartSpliceSite; + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant is a 5' UTR variant (VariationEffect.pm:595 within_5_prime_utr) + /// + public bool IsFivePrimeUtrVariant() + { + const ConsequenceTag ct = ConsequenceTag.five_prime_UTR_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + bool result = false; + + if (_transcript.Translation != null) + { + + var isFivePrimeOfCoding = _transcript.Gene.OnReverseStrand + ? _preCache.AfterCoding + : _preCache.BeforeCoding; + + result = isFivePrimeOfCoding && _preCache.WithinCdna; + } + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant is a frameshift variant [VariantEffect.pm:940 frameshift] + /// + public bool IsFrameshiftVariant() + { + const ConsequenceTag ct = ConsequenceTag.frameshift_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + // check the predicates + if (!_preCache.IsCoding) + { + _cache.Add(ct, false); + return false; + } + + if (IsIncompleteTerminalCodonVariant()) + { + _cache.Add(ct, false); + return false; + } + + bool result = _preCache.HasFrameShift && !IsStopRetained() && !IsTruncatedByStop(); + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if we have an incomplete terminal codon variant. [VariantEffect.pm:983 partial_codon] + /// + public bool IsIncompleteTerminalCodonVariant() + { + const ConsequenceTag ct = ConsequenceTag.incomplete_terminal_codon_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + if (_transcript.Translation == null) + { + _cache.Add(ct, false); + return false; + } + + int cdsLength = _transcript.Translation.CodingRegion.Length; + int codonCdsStart = _proteinBegin * 3 - 2; + int lastCodonLength = cdsLength - (codonCdsStart - 1); + + bool result = lastCodonLength < 3 && lastCodonLength > 0; + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant is an inframe deletion [VariantEffect.pm:825 inframe_deletion] + /// + public bool IsInframeDeletion() + { + const ConsequenceTag ct = ConsequenceTag.inframe_deletion; + if (_cache.Contains(ct)) return _cache.Get(ct); + + // check the predicates + if (!_preCache.IsCoding || !_isDeletion) + { + _cache.Add(ct, false); + return false; + } + + if (_referenceCodonsLen == 0 //|| (PreCache.ReferenceCodonLen < PreCache.AlternateCodonLen) + || IsFrameshiftVariant() + || IsIncompleteTerminalCodonVariant() + || IsStopGained()) + { + _cache.Add(ct, false); + return false; + } + + // simple string match + var referenceCodon = _referenceCodons.ToLower(); + var alternateCodon = _alternateCodons.ToLower(); + + if (referenceCodon.StartsWith(alternateCodon) || referenceCodon.EndsWith(alternateCodon)) + { + _cache.Add(ct, true); + return true; + } + + // try a more complex string match + var commonPrefixLength = _referenceCodons.CommonPrefixLength(_alternateCodons); + var commonSuffixLength = _referenceCodons.CommonSuffixLength(_alternateCodons); + + bool result = _alternateCodonsLen - commonPrefixLength - commonSuffixLength == 0; + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant is an inframe insertion [VariantEffect.pm:780 inframe_insertion] + /// + public bool IsInframeInsertion() + { + const ConsequenceTag ct = ConsequenceTag.inframe_insertion; + if (_cache.Contains(ct)) return _cache.Get(ct); + + // check the predicates + if (!_preCache.IsCoding || !_isInsertion) + { + _cache.Add(ct, false); + return false; + } + + if (IsStopRetained() || + IsFrameshiftVariant() || + IsStartLost() || + _alternateCodonsLen <= _referenceCodonsLen || + IsIncompleteTerminalCodonVariant()) + { + _cache.Add(ct, false); + return false; + } + + bool result = !IsTruncatedByStop(); + + _cache.Add(ct, result); + return result; + } + + private bool IsTruncatedByStop() + { + if (_alternateAminoAcids != null && _alternateAminoAcids.Contains(AminoAcidCommon.StopCodon)) + { + int stopPos = _alternateAminoAcids.IndexOf(AminoAcidCommon.StopCodon, StringComparison.Ordinal); + string altAminoAcidsBeforeStop = _alternateAminoAcids.Substring(0, stopPos); + if (_alternateAminoAcids.OptimizedStartsWith(AminoAcidCommon.StopCodon) || + _referenceAminoAcids.StartsWith(altAminoAcidsBeforeStop)) + return true; + } + return false; + } + + /// + /// returns true if at least one base of the first codon was changed in the transcript [VariantEffect.pm:722 affects_start_codon] + /// + public bool IsStartLost() + { + const ConsequenceTag ct = ConsequenceTag.start_lost; + if (_cache.Contains(ct)) return _cache.Get(ct); + + // check the predicates + if (!_preCache.IsCoding) + { + _cache.Add(ct, false); + return false; + } + + if (_proteinBegin != 1 || _referenceAminoAcidsLen == 0) + { + _cache.Add(ct, false); + return false; + } + + // insertion in start codon and do not change start codon + if (_isInsertion && _proteinBegin == 1 && _alternateAminoAcids.EndsWith(_referenceAminoAcids)) + { + _cache.Add(ct, false); + return false; + } + + bool result = _alternateAminoAcidsLen == 0 || _alternateAminoAcids[0] != _referenceAminoAcids[0]; + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant is a missense variant [VariantEffect.pm:682 missense_variant] + /// + public bool IsMissenseVariant() + { + const ConsequenceTag ct = ConsequenceTag.missense_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + // check the predicates + if (!_preCache.IsCoding) + { + _cache.Add(ct, false); + return false; + } + + if (IsStartLost() || + IsStopLost() || + IsStopGained() || + IsIncompleteTerminalCodonVariant() || + IsFrameshiftVariant() || + IsInframeDeletion() || + IsInframeInsertion()) + { + _cache.Add(ct, false); + return false; + } + + bool result = _referenceAminoAcids != _alternateAminoAcids && + _referenceAminoAcidsLen == _alternateAminoAcidsLen; + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant is a non-coding transcript exon variant [VariationEffect.pm:405 non_coding_exon_variant] + /// + public bool IsNonCodingTranscriptExonVariant() + { + const ConsequenceTag ct = ConsequenceTag.non_coding_transcript_exon_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + bool result = _preCache.HasExonOverlap && _transcript.Translation == null && !_preCache.OverlapWithMicroRna; + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant is a nonsense-mediated decay transcript variant [VariationEffect.pm:391 within_nmd_transcript] + /// + public bool IsNonsenseMediatedDecayTranscriptVariant() + { + const ConsequenceTag ct = ConsequenceTag.NMD_transcript_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + var result = _transcript.BioType == BioType.nonsense_mediated_decay; + _cache.Add(ct, result); + return result; + } + + + /// + /// returns true if the variant is a protein altering variant [VariationEffect.pm:300 protein_altering_variant] + /// + public bool IsProteinAlteringVariant() + { + const ConsequenceTag ct = ConsequenceTag.protein_altering_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + var result = true; + + var sameLen = _referenceAminoAcidsLen == _alternateAminoAcidsLen; + var startsWithTer = _referenceAminoAcids.OptimizedStartsWith('X') || _alternateAminoAcids.OptimizedStartsWith('X'); + + var isInframeDeletion = IsInframeDeletion(); + // Note: sequence ontology says that stop retained should not be here (http://www.sequenceontology.org/browser/current_svn/term/SO:0001567) + var isStopCodonVarinat = IsStopLost() || IsStopGained(); + + if (sameLen || startsWithTer || isInframeDeletion || isStopCodonVarinat || + IsStartLost() || IsFrameshiftVariant() || IsInframeInsertion() || IsStopRetained() || !_preCache.IsCoding) + { + result = false; + } + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant is a splice region variant [VariationEffect.pm:483 splice_region] + /// + public bool IsSpliceRegionVariant() + { + const ConsequenceTag ct = ConsequenceTag.splice_region_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + bool result = false; + + if (IsSpliceDonorVariant() || IsSpliceAcceptorVariant()) + { + // false + } + else + { + result = _preCache.IsWithinSpliceSiteRegion; + } + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant's amino acid changes to a stop codon [VariationEffect.pm:884 stop_gained] + /// + public bool IsStopGained() + { + const ConsequenceTag ct = ConsequenceTag.stop_gained; + if (_cache.Contains(ct)) return _cache.Get(ct); + + bool result = !IsStopRetained() && + (string.IsNullOrEmpty(_referenceAminoAcids) || !_referenceAminoAcids.Contains(AminoAcidCommon.StopCodon)) && + !string.IsNullOrEmpty(_alternateAminoAcids) && _alternateAminoAcids.Contains(AminoAcidCommon.StopCodon); + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant is a stop lost variant [VariationEffect.pm:898 stop_lost] + /// + public bool IsStopLost() + { + const ConsequenceTag ct = ConsequenceTag.stop_lost; + if (_cache.Contains(ct)) return _cache.Get(ct); + + bool result = false; + if (!string.IsNullOrEmpty(_coveredReferenceAminoAcids) && _coveredAlternateAminoAcids != null) + result = _coveredReferenceAminoAcids.Contains(AminoAcidCommon.StopCodon) && + !_coveredAlternateAminoAcids.Contains(AminoAcidCommon.StopCodon); + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant is a stop retained variant [VariationEffect.pm:701 stop_lost] + /// + public bool IsStopRetained() + { + const ConsequenceTag ct = ConsequenceTag.stop_retained_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + string alternateAminoAcids = TrimPeptides(_alternateAminoAcids); + + bool result = !string.IsNullOrEmpty(_referenceAminoAcids) && alternateAminoAcids != null && + _referenceAminoAcids == alternateAminoAcids && + _referenceAminoAcids.Contains(AminoAcidCommon.StopCodon) || + string.IsNullOrEmpty(_referenceAminoAcids) && alternateAminoAcids != null && + _proteinBegin == _transcript.Translation?.PeptideSeq.Length + 1 && + alternateAminoAcids.StartsWith(AminoAcidCommon.StopCodon); + + _cache.Add(ct, result); + return result; + } + + public bool IsStartRetained() + { + const ConsequenceTag ct = ConsequenceTag.start_retained_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + if (_proteinBegin != 1 || string.IsNullOrEmpty(_referenceAminoAcids)) + { + _cache.Add(ct, false); + return false; + } + + var startProtein = _referenceAminoAcids[0].ToString(); + var alternateAminoAcids = TrimPeptides(_alternateAminoAcids); + + var result = alternateAminoAcids != null + && alternateAminoAcids.Contains(startProtein); + + _cache.Add(ct, result); + return result; + } + + private static string TrimPeptides(string alternateAminoAcids) + { + if (string.IsNullOrEmpty(alternateAminoAcids)) return null; + if (!alternateAminoAcids.Contains(AminoAcidCommon.StopCodon)) return alternateAminoAcids; + var pos = alternateAminoAcids.IndexOf(AminoAcidCommon.StopCodon, StringComparison.Ordinal); + return pos < 0 ? alternateAminoAcids : alternateAminoAcids.Substring(0, pos + 1); + } + + /// + /// returns true if the variant is a synonymous variant [VariationEffect.pm:755 synonymous_variant] + /// + public bool IsSynonymousVariant() + { + const ConsequenceTag ct = ConsequenceTag.synonymous_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + bool result = !string.IsNullOrEmpty(_referenceAminoAcids) && + (_variant.Type == VariantType.SNV || + _variant.Type == VariantType.MNV) && + _referenceAminoAcids == _alternateAminoAcids && !_referenceAminoAcids.Contains("X") && + !_alternateAminoAcids.Contains("X") && !IsStopRetained(); + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant is a 3' UTR variant [VariationEffect.pm:609 within_3_prime_utr] + /// + public bool IsThreePrimeUtrVariant() + { + const ConsequenceTag ct = ConsequenceTag.three_prime_UTR_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + bool result = false; + + if (_transcript.Translation != null) + { + var isThreePrimeOfCoding = _transcript.Gene.OnReverseStrand + ? _preCache.BeforeCoding + : _preCache.AfterCoding; + + result = isThreePrimeOfCoding && _preCache.WithinCdna; + } + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant is within a non-coding gene [VariationEffect.pm:398 within_non_coding_gene] + /// + public bool IsNonCodingTranscriptVariant() + { + const ConsequenceTag ct = ConsequenceTag.non_coding_transcript_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + // NOTE: Isn't IsWithinTranscript always true? and not within mature miRNA is always true + // For Ensembl transcript, miRNA may be a valid attribute. We have their location and we would like to check if the variant overlaps with the miRNA + var result = !_preCache.HasExonOverlap && _transcript.Translation == null && !_preCache.OverlapWithMicroRna; + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if it's a coding sequnce variant [VariationEffect.pm:998 coding_unknown] + /// + public bool IsCodingSequenceVariant() + { + const ConsequenceTag ct = ConsequenceTag.coding_sequence_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + bool result = _preCache.WithinCds && + (string.IsNullOrEmpty(_transcript.Translation.PeptideSeq) || + string.IsNullOrEmpty(_alternateAminoAcids) || _alternateAminoAcids.Contains("X")) + && !(IsFrameshiftVariant() || IsInframeDeletion() || IsIncompleteTerminalCodonVariant() || + IsProteinAlteringVariant() || IsStopGained() || IsStopRetained() || IsStopLost()); + + _cache.Add(ct, result); + return result; + } + + /// + /// returns true if the variant occurs within an intron [VariationEffect.pm:494 within_intron] + /// + public bool IsWithinIntron() => _preCache.IsWithinIntron; + + /// + /// returns true if the variant overlaps a mature MiRNA. [VariationEffect.pm:432 within_mature_miRNA] + /// + public bool IsMatureMirnaVariant() + { + const ConsequenceTag ct = ConsequenceTag.mature_miRNA_variant; + if (_cache.Contains(ct)) return _cache.Get(ct); + + bool result = _preCache.OverlapWithMicroRna; + + _cache.Add(ct, result); + return result; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/AnnotatedPositions/Transcript/VariantEffectCache.cs b/VariantAnnotation/AnnotatedPositions/Transcript/VariantEffectCache.cs index d7b965e5..54d8a2a0 100644 --- a/VariantAnnotation/AnnotatedPositions/Transcript/VariantEffectCache.cs +++ b/VariantAnnotation/AnnotatedPositions/Transcript/VariantEffectCache.cs @@ -1,50 +1,50 @@ -using System; -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.AnnotatedPositions.Transcript -{ - public sealed class VariantEffectCache - { - #region members - - private readonly bool[] _isCached; - private readonly bool[] _cachedResults; - - #endregion - - // constructor - public VariantEffectCache() - { - var numConsequences = Enum.GetNames(typeof(ConsequenceTag)).Length; - _isCached = new bool[numConsequences]; - _cachedResults = new bool[numConsequences]; - } - - /// - /// returns true if the corresponding value has been cached - /// - public void Add(ConsequenceTag consequence, bool result) - { - var index = (int)consequence; - - _isCached[index] = true; - _cachedResults[index] = result; - } - - /// - /// returns the cached value for the corresponding result - /// - public bool Get(ConsequenceTag consequence) - { - return _cachedResults[(int)consequence]; - } - - /// - /// returns true if the corresponding value has been cached - /// - public bool Contains(ConsequenceTag consequence) - { - return _isCached[(int)consequence]; - } - } +using System; +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.AnnotatedPositions.Transcript +{ + public sealed class VariantEffectCache + { + #region members + + private readonly bool[] _isCached; + private readonly bool[] _cachedResults; + + #endregion + + // constructor + public VariantEffectCache() + { + var numConsequences = Enum.GetNames(typeof(ConsequenceTag)).Length; + _isCached = new bool[numConsequences]; + _cachedResults = new bool[numConsequences]; + } + + /// + /// returns true if the corresponding value has been cached + /// + public void Add(ConsequenceTag consequence, bool result) + { + var index = (int)consequence; + + _isCached[index] = true; + _cachedResults[index] = result; + } + + /// + /// returns the cached value for the corresponding result + /// + public bool Get(ConsequenceTag consequence) + { + return _cachedResults[(int)consequence]; + } + + /// + /// returns true if the corresponding value has been cached + /// + public bool Contains(ConsequenceTag consequence) + { + return _isCached[(int)consequence]; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Annotator.cs b/VariantAnnotation/Annotator.cs index 03cbb545..d780121f 100644 --- a/VariantAnnotation/Annotator.cs +++ b/VariantAnnotation/Annotator.cs @@ -1,174 +1,174 @@ -using System.Collections.Generic; -using System.Linq; -using ErrorHandling.Exceptions; -using Genome; -using OptimizedCore; -using VariantAnnotation.AnnotatedPositions; -using VariantAnnotation.Interface; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.GeneAnnotation; -using VariantAnnotation.Interface.Plugins; -using VariantAnnotation.Interface.Positions; -using VariantAnnotation.Interface.Providers; -using Variants; - -namespace VariantAnnotation -{ - public sealed class Annotator : IAnnotator - { - private readonly IAnnotationProvider _saProviders; - private readonly IAnnotationProvider _taProvider; - private readonly ISequenceProvider _sequenceProvider; - private readonly IAnnotationProvider _conservationProvider; - private readonly IGeneAnnotationProvider _geneAnnotationProvider; - private readonly IEnumerable _plugins; - private readonly HashSet _affectedGenes; - - private bool _annotateMito; - public GenomeAssembly Assembly { get; } - - public Annotator(IAnnotationProvider taProvider, ISequenceProvider sequenceProvider, - IAnnotationProvider saProviders, IAnnotationProvider conservationProvider, - IGeneAnnotationProvider geneAnnotationProvider, IEnumerable plugins = null) - { - _saProviders = saProviders; - _taProvider = taProvider; - _sequenceProvider = sequenceProvider; - _conservationProvider = conservationProvider; - _geneAnnotationProvider = geneAnnotationProvider; - _affectedGenes = new HashSet(); - _plugins = plugins; - Assembly = GetAssembly(); - } - - private GenomeAssembly GetAssembly() - { - var assemblies = new Dictionary>(); - AddAssembly(assemblies, _taProvider); - AddAssembly(assemblies, _saProviders); - AddAssembly(assemblies, _sequenceProvider); - AddAssembly(assemblies, _conservationProvider); - - if (assemblies.Count == 0) return GenomeAssembly.Unknown; - if (assemblies.Count != 1) throw new UserErrorException(GetAssemblyErrorMessage(assemblies)); - - CheckPluginAssemblyConsistency(assemblies.First().Key); - return assemblies.First().Key; - } - - private static void AddAssembly(IDictionary> assemblies, IProvider provider) - { - if (provider == null) return; - if (assemblies.TryGetValue(provider.Assembly, out var assemblyList)) assemblyList.Add(provider.Name); - else assemblies[provider.Assembly] = new List { provider.Name }; - } - - private static string GetAssemblyErrorMessage(Dictionary> assemblies) - { - var sb = StringBuilderCache.Acquire(); - sb.AppendLine("Not all of the data sources have the same genome assembly:"); - foreach (var assembly in assemblies) sb.AppendLine($"- Using {assembly.Key}: {string.Join(", ", assembly.Value)}"); - return StringBuilderCache.GetStringAndRelease(sb); - } - - private void CheckPluginAssemblyConsistency(GenomeAssembly systemAssembly) - { - if (_plugins == null || !_plugins.Any()) return; - - foreach (var plugin in _plugins) - { - if (plugin.Assembly == systemAssembly || plugin.Assembly == GenomeAssembly.Unknown) continue; - throw new UserErrorException($"At least one plugin does not have the same genome assembly ({plugin.Assembly}) as the system genome assembly ({systemAssembly})"); - } - } - - public IAnnotatedPosition Annotate(IPosition position) - { - if (position == null) return null; - var annotatedVariants = GetAnnotatedVariants(position.Variants); - var annotatedPosition = new AnnotatedPosition(position, annotatedVariants); - - if (annotatedPosition.AnnotatedVariants == null - || annotatedPosition.AnnotatedVariants.Length == 0 - || position.Chromosome.UcscName == "chrM" && !_annotateMito - ) return annotatedPosition; - - _sequenceProvider?.Annotate(annotatedPosition); - _saProviders?.Annotate(annotatedPosition); - _conservationProvider?.Annotate(annotatedPosition); - _taProvider.Annotate(annotatedPosition); - _plugins?.Annotate(annotatedPosition, _sequenceProvider?.Sequence); - - TrackAffectedGenes(annotatedPosition); - return annotatedPosition; - } - - private void TrackAffectedGenes(IAnnotatedPosition annotatedPosition) - { - if (_geneAnnotationProvider == null) return; - - foreach (var variant in annotatedPosition.AnnotatedVariants) - { - AddGenesFromTranscripts(variant.Transcripts); - } - } - - private void AddGenesFromTranscripts(IList transcripts) - { - foreach (var transcript in transcripts) - { - if (IsFlankingTranscript(transcript)) continue; - _affectedGenes.Add(transcript.Transcript.Gene.Symbol); - } - } - - private static bool IsFlankingTranscript(IAnnotatedTranscript transcript) - { - if (transcript.Consequences == null) return false; - - // ReSharper disable once LoopCanBeConvertedToQuery - foreach (var consequence in transcript.Consequences) - { - if (consequence == ConsequenceTag.downstream_gene_variant || - consequence == ConsequenceTag.upstream_gene_variant) return true; - } - - return false; - } - - internal static IAnnotatedVariant[] GetAnnotatedVariants(IVariant[] variants) - { - if (variants?[0].Behavior == null) return null; - int numVariants = variants.Length; - var annotatedVariants = new IAnnotatedVariant[numVariants]; - for (var i = 0; i < numVariants; i++) annotatedVariants[i] = new AnnotatedVariant(variants[i]); - return annotatedVariants; - } - - public IEnumerable GetGeneAnnotations() - { - var geneAnnotations = new List(); - - foreach (var gene in _affectedGenes.OrderBy(x => x)) - { - var annotation = _geneAnnotationProvider.Annotate(gene); - if (string.IsNullOrEmpty(annotation)) continue; - geneAnnotations.Add(annotation); - } - - return geneAnnotations.Count > 0 ? geneAnnotations : null; - } - - public void EnableMitochondrialAnnotation() => _annotateMito = true; - } - - internal static class PluginExtensions - { - public static void Annotate(this IEnumerable plugins, IAnnotatedPosition annotatedPosition, - ISequence sequence) - { - if (sequence == null) return; - foreach (var plugin in plugins) plugin.Annotate(annotatedPosition, sequence); - } - } -} +using System.Collections.Generic; +using System.Linq; +using ErrorHandling.Exceptions; +using Genome; +using OptimizedCore; +using VariantAnnotation.AnnotatedPositions; +using VariantAnnotation.Interface; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.GeneAnnotation; +using VariantAnnotation.Interface.Plugins; +using VariantAnnotation.Interface.Positions; +using VariantAnnotation.Interface.Providers; +using Variants; + +namespace VariantAnnotation +{ + public sealed class Annotator : IAnnotator + { + private readonly IAnnotationProvider _saProviders; + private readonly IAnnotationProvider _taProvider; + private readonly ISequenceProvider _sequenceProvider; + private readonly IAnnotationProvider _conservationProvider; + private readonly IGeneAnnotationProvider _geneAnnotationProvider; + private readonly IEnumerable _plugins; + private readonly HashSet _affectedGenes; + + private bool _annotateMito; + public GenomeAssembly Assembly { get; } + + public Annotator(IAnnotationProvider taProvider, ISequenceProvider sequenceProvider, + IAnnotationProvider saProviders, IAnnotationProvider conservationProvider, + IGeneAnnotationProvider geneAnnotationProvider, IEnumerable plugins = null) + { + _saProviders = saProviders; + _taProvider = taProvider; + _sequenceProvider = sequenceProvider; + _conservationProvider = conservationProvider; + _geneAnnotationProvider = geneAnnotationProvider; + _affectedGenes = new HashSet(); + _plugins = plugins; + Assembly = GetAssembly(); + } + + private GenomeAssembly GetAssembly() + { + var assemblies = new Dictionary>(); + AddAssembly(assemblies, _taProvider); + AddAssembly(assemblies, _saProviders); + AddAssembly(assemblies, _sequenceProvider); + AddAssembly(assemblies, _conservationProvider); + + if (assemblies.Count == 0) return GenomeAssembly.Unknown; + if (assemblies.Count != 1) throw new UserErrorException(GetAssemblyErrorMessage(assemblies)); + + CheckPluginAssemblyConsistency(assemblies.First().Key); + return assemblies.First().Key; + } + + private static void AddAssembly(IDictionary> assemblies, IProvider provider) + { + if (provider == null) return; + if (assemblies.TryGetValue(provider.Assembly, out var assemblyList)) assemblyList.Add(provider.Name); + else assemblies[provider.Assembly] = new List { provider.Name }; + } + + private static string GetAssemblyErrorMessage(Dictionary> assemblies) + { + var sb = StringBuilderCache.Acquire(); + sb.AppendLine("Not all of the data sources have the same genome assembly:"); + foreach (var assembly in assemblies) sb.AppendLine($"- Using {assembly.Key}: {string.Join(", ", assembly.Value)}"); + return StringBuilderCache.GetStringAndRelease(sb); + } + + private void CheckPluginAssemblyConsistency(GenomeAssembly systemAssembly) + { + if (_plugins == null || !_plugins.Any()) return; + + foreach (var plugin in _plugins) + { + if (plugin.Assembly == systemAssembly || plugin.Assembly == GenomeAssembly.Unknown) continue; + throw new UserErrorException($"At least one plugin does not have the same genome assembly ({plugin.Assembly}) as the system genome assembly ({systemAssembly})"); + } + } + + public IAnnotatedPosition Annotate(IPosition position) + { + if (position == null) return null; + var annotatedVariants = GetAnnotatedVariants(position.Variants); + var annotatedPosition = new AnnotatedPosition(position, annotatedVariants); + + if (annotatedPosition.AnnotatedVariants == null + || annotatedPosition.AnnotatedVariants.Length == 0 + || position.Chromosome.UcscName == "chrM" && !_annotateMito + ) return annotatedPosition; + + _sequenceProvider?.Annotate(annotatedPosition); + _saProviders?.Annotate(annotatedPosition); + _conservationProvider?.Annotate(annotatedPosition); + _taProvider.Annotate(annotatedPosition); + _plugins?.Annotate(annotatedPosition, _sequenceProvider?.Sequence); + + TrackAffectedGenes(annotatedPosition); + return annotatedPosition; + } + + private void TrackAffectedGenes(IAnnotatedPosition annotatedPosition) + { + if (_geneAnnotationProvider == null) return; + + foreach (var variant in annotatedPosition.AnnotatedVariants) + { + AddGenesFromTranscripts(variant.Transcripts); + } + } + + private void AddGenesFromTranscripts(IList transcripts) + { + foreach (var transcript in transcripts) + { + if (IsFlankingTranscript(transcript)) continue; + _affectedGenes.Add(transcript.Transcript.Gene.Symbol); + } + } + + private static bool IsFlankingTranscript(IAnnotatedTranscript transcript) + { + if (transcript.Consequences == null) return false; + + // ReSharper disable once LoopCanBeConvertedToQuery + foreach (var consequence in transcript.Consequences) + { + if (consequence == ConsequenceTag.downstream_gene_variant || + consequence == ConsequenceTag.upstream_gene_variant) return true; + } + + return false; + } + + internal static IAnnotatedVariant[] GetAnnotatedVariants(IVariant[] variants) + { + if (variants?[0].Behavior == null) return null; + int numVariants = variants.Length; + var annotatedVariants = new IAnnotatedVariant[numVariants]; + for (var i = 0; i < numVariants; i++) annotatedVariants[i] = new AnnotatedVariant(variants[i]); + return annotatedVariants; + } + + public IEnumerable GetGeneAnnotations() + { + var geneAnnotations = new List(); + + foreach (var gene in _affectedGenes.OrderBy(x => x)) + { + var annotation = _geneAnnotationProvider.Annotate(gene); + if (string.IsNullOrEmpty(annotation)) continue; + geneAnnotations.Add(annotation); + } + + return geneAnnotations.Count > 0 ? geneAnnotations : null; + } + + public void EnableMitochondrialAnnotation() => _annotateMito = true; + } + + internal static class PluginExtensions + { + public static void Annotate(this IEnumerable plugins, IAnnotatedPosition annotatedPosition, + ISequence sequence) + { + if (sequence == null) return; + foreach (var plugin in plugins) plugin.Annotate(annotatedPosition, sequence); + } + } +} diff --git a/VariantAnnotation/Caches/DataStructures/CodingRegion.cs b/VariantAnnotation/Caches/DataStructures/CodingRegion.cs index e8cb324f..38e395a9 100644 --- a/VariantAnnotation/Caches/DataStructures/CodingRegion.cs +++ b/VariantAnnotation/Caches/DataStructures/CodingRegion.cs @@ -1,43 +1,43 @@ -using IO; -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.Caches.DataStructures -{ - public sealed class CodingRegion : ICodingRegion - { - public int Start { get; } - public int End { get; } - public int CdnaStart { get; } - public int CdnaEnd { get; } - public int Length { get; } - - public CodingRegion(int start, int end, int cdnaStart, int cdnaEnd, int length) - { - Start = start; - End = end; - CdnaStart = cdnaStart; - CdnaEnd = cdnaEnd; - Length = length; - } - - public static ICodingRegion Read(BufferedBinaryReader reader) - { - int genomicStart = reader.ReadOptInt32(); - int genomicEnd = reader.ReadOptInt32(); - int cdnaStart = reader.ReadOptInt32(); - int cdnaEnd = reader.ReadOptInt32(); - int length = reader.ReadOptInt32(); - - return new CodingRegion(genomicStart, genomicEnd, cdnaStart, cdnaEnd, length); - } - - public void Write(IExtendedBinaryWriter writer) - { - writer.WriteOpt(Start); - writer.WriteOpt(End); - writer.WriteOpt(CdnaStart); - writer.WriteOpt(CdnaEnd); - writer.WriteOpt(Length); - } - } -} +using IO; +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.Caches.DataStructures +{ + public sealed class CodingRegion : ICodingRegion + { + public int Start { get; } + public int End { get; } + public int CdnaStart { get; } + public int CdnaEnd { get; } + public int Length { get; } + + public CodingRegion(int start, int end, int cdnaStart, int cdnaEnd, int length) + { + Start = start; + End = end; + CdnaStart = cdnaStart; + CdnaEnd = cdnaEnd; + Length = length; + } + + public static ICodingRegion Read(BufferedBinaryReader reader) + { + int genomicStart = reader.ReadOptInt32(); + int genomicEnd = reader.ReadOptInt32(); + int cdnaStart = reader.ReadOptInt32(); + int cdnaEnd = reader.ReadOptInt32(); + int length = reader.ReadOptInt32(); + + return new CodingRegion(genomicStart, genomicEnd, cdnaStart, cdnaEnd, length); + } + + public void Write(IExtendedBinaryWriter writer) + { + writer.WriteOpt(Start); + writer.WriteOpt(End); + writer.WriteOpt(CdnaStart); + writer.WriteOpt(CdnaEnd); + writer.WriteOpt(Length); + } + } +} diff --git a/VariantAnnotation/Caches/DataStructures/EncodedTranscriptData.cs b/VariantAnnotation/Caches/DataStructures/EncodedTranscriptData.cs index db553c5e..ff1cbf69 100644 --- a/VariantAnnotation/Caches/DataStructures/EncodedTranscriptData.cs +++ b/VariantAnnotation/Caches/DataStructures/EncodedTranscriptData.cs @@ -1,92 +1,92 @@ -using IO; -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.Caches.DataStructures -{ - public sealed class EncodedTranscriptData - { - private readonly ushort _info; - private readonly byte _contents; - - // contents - // +====+====+====+====+====+====+====+====+ - // |Tran|TReg|////|Mirn|Poly|Sift|StrExonPh| - // +====+====+====+====+====+====+====+====+ - private const int StartExonMask = 3; - private const int SiftMask = 4; - private const int PolyPhenMask = 8; - private const int MirnasMask = 16; - private const int TranscriptRegionsMask = 64; - private const int TranslationMask = 128; - - public byte StartExonPhase => (byte)(_contents & StartExonMask); - public bool HasSift => (_contents & SiftMask) != 0; - public bool HasPolyPhen => (_contents & PolyPhenMask) != 0; - public bool HasMirnas => (_contents & MirnasMask) != 0; - public bool HasRnaEdits => (_info & RnaEditsMask) != 0; - public bool HasSelenocysteines => (_info & SelenocysteinesMask) != 0; - public bool HasTranscriptRegions => (_contents & TranscriptRegionsMask) != 0; - public bool HasTranslation => (_contents & TranslationMask) != 0; - - // info - // +====+====+====+====+====+====+====+====+====+====+====+====+====+====+====+====+ - // |Cano| Source |\\\\|Sele|RnaE|CSNF|CENF| BioType | - // +====+====+====+====+====+====+====+====+====+====+====+====+====+====+====+====+ - private const int BioTypeMask = 0xff; - private const int CdsStartNotFoundMask = 0x100; - private const int CdsEndNotFoundMask = 0x200; - private const int TranscriptSourceMask = 0x3; - private const int CanonicalMask = 0x8000; - private const int TranscriptSourceShift = 13; - private const int RnaEditsMask = 1024; - private const int SelenocysteinesMask = 2048; - - public BioType BioType => (BioType)(_info & BioTypeMask); - public bool CdsStartNotFound => (_info & CdsStartNotFoundMask) != 0; - public bool CdsEndNotFound => (_info & CdsEndNotFoundMask) != 0; - public Source TranscriptSource => (Source)((_info >> TranscriptSourceShift) & TranscriptSourceMask); - public bool IsCanonical => (_info & CanonicalMask) != 0; - - private EncodedTranscriptData(ushort info, byte contents) - { - _info = info; - _contents = contents; - } - - public static EncodedTranscriptData GetEncodedTranscriptData(BioType bioType, bool cdsStartNotFound, - bool cdsEndNotFound, Source source, bool isCanonical, bool hasSift, bool hasPolyPhen, bool hasMicroRnas, - bool hasRnaEdits, bool hasSelenocysteines, bool hasTranscriptRegions, bool hasTranslation, - byte startExonPhase) - { - ushort info = (ushort)bioType; - if (cdsStartNotFound) info |= CdsStartNotFoundMask; - if (cdsEndNotFound) info |= CdsEndNotFoundMask; - if (isCanonical) info |= CanonicalMask; - if (hasRnaEdits) info |= RnaEditsMask; - if (hasSelenocysteines) info |= SelenocysteinesMask; - info |= (ushort)((ushort)source << TranscriptSourceShift); - - byte contents = startExonPhase; - if (hasSift) contents |= SiftMask; - if (hasPolyPhen) contents |= PolyPhenMask; - if (hasMicroRnas) contents |= MirnasMask; - if (hasTranscriptRegions) contents |= TranscriptRegionsMask; - if (hasTranslation) contents |= TranslationMask; - - return new EncodedTranscriptData(info, contents); - } - - public static EncodedTranscriptData Read(BufferedBinaryReader reader) - { - var info = reader.ReadUInt16(); - var contents = reader.ReadByte(); - return new EncodedTranscriptData(info, contents); - } - - internal void Write(IExtendedBinaryWriter writer) - { - writer.Write(_info); - writer.Write(_contents); - } - } +using IO; +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.Caches.DataStructures +{ + public sealed class EncodedTranscriptData + { + private readonly ushort _info; + private readonly byte _contents; + + // contents + // +====+====+====+====+====+====+====+====+ + // |Tran|TReg|////|Mirn|Poly|Sift|StrExonPh| + // +====+====+====+====+====+====+====+====+ + private const int StartExonMask = 3; + private const int SiftMask = 4; + private const int PolyPhenMask = 8; + private const int MirnasMask = 16; + private const int TranscriptRegionsMask = 64; + private const int TranslationMask = 128; + + public byte StartExonPhase => (byte)(_contents & StartExonMask); + public bool HasSift => (_contents & SiftMask) != 0; + public bool HasPolyPhen => (_contents & PolyPhenMask) != 0; + public bool HasMirnas => (_contents & MirnasMask) != 0; + public bool HasRnaEdits => (_info & RnaEditsMask) != 0; + public bool HasSelenocysteines => (_info & SelenocysteinesMask) != 0; + public bool HasTranscriptRegions => (_contents & TranscriptRegionsMask) != 0; + public bool HasTranslation => (_contents & TranslationMask) != 0; + + // info + // +====+====+====+====+====+====+====+====+====+====+====+====+====+====+====+====+ + // |Cano| Source |\\\\|Sele|RnaE|CSNF|CENF| BioType | + // +====+====+====+====+====+====+====+====+====+====+====+====+====+====+====+====+ + private const int BioTypeMask = 0xff; + private const int CdsStartNotFoundMask = 0x100; + private const int CdsEndNotFoundMask = 0x200; + private const int TranscriptSourceMask = 0x3; + private const int CanonicalMask = 0x8000; + private const int TranscriptSourceShift = 13; + private const int RnaEditsMask = 1024; + private const int SelenocysteinesMask = 2048; + + public BioType BioType => (BioType)(_info & BioTypeMask); + public bool CdsStartNotFound => (_info & CdsStartNotFoundMask) != 0; + public bool CdsEndNotFound => (_info & CdsEndNotFoundMask) != 0; + public Source TranscriptSource => (Source)((_info >> TranscriptSourceShift) & TranscriptSourceMask); + public bool IsCanonical => (_info & CanonicalMask) != 0; + + private EncodedTranscriptData(ushort info, byte contents) + { + _info = info; + _contents = contents; + } + + public static EncodedTranscriptData GetEncodedTranscriptData(BioType bioType, bool cdsStartNotFound, + bool cdsEndNotFound, Source source, bool isCanonical, bool hasSift, bool hasPolyPhen, bool hasMicroRnas, + bool hasRnaEdits, bool hasSelenocysteines, bool hasTranscriptRegions, bool hasTranslation, + byte startExonPhase) + { + ushort info = (ushort)bioType; + if (cdsStartNotFound) info |= CdsStartNotFoundMask; + if (cdsEndNotFound) info |= CdsEndNotFoundMask; + if (isCanonical) info |= CanonicalMask; + if (hasRnaEdits) info |= RnaEditsMask; + if (hasSelenocysteines) info |= SelenocysteinesMask; + info |= (ushort)((ushort)source << TranscriptSourceShift); + + byte contents = startExonPhase; + if (hasSift) contents |= SiftMask; + if (hasPolyPhen) contents |= PolyPhenMask; + if (hasMicroRnas) contents |= MirnasMask; + if (hasTranscriptRegions) contents |= TranscriptRegionsMask; + if (hasTranslation) contents |= TranslationMask; + + return new EncodedTranscriptData(info, contents); + } + + public static EncodedTranscriptData Read(BufferedBinaryReader reader) + { + var info = reader.ReadUInt16(); + var contents = reader.ReadByte(); + return new EncodedTranscriptData(info, contents); + } + + internal void Write(IExtendedBinaryWriter writer) + { + writer.Write(_info); + writer.Write(_contents); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Caches/DataStructures/Gene.cs b/VariantAnnotation/Caches/DataStructures/Gene.cs index 65c6aa46..5c5bf021 100644 --- a/VariantAnnotation/Caches/DataStructures/Gene.cs +++ b/VariantAnnotation/Caches/DataStructures/Gene.cs @@ -1,60 +1,60 @@ -using System.Collections.Generic; -using Genome; -using IO; -using VariantAnnotation.AnnotatedPositions.Transcript; -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.Caches.DataStructures -{ - public sealed class Gene : IGene - { - public int Start { get; } - public int End { get; } - public IChromosome Chromosome { get; } - public bool OnReverseStrand { get; } - public string Symbol { get; } - public ICompactId EntrezGeneId { get; } - public ICompactId EnsemblId { get; } - public int HgncId { get; } - - public Gene(IChromosome chromosome, int start, int end, bool onReverseStrand, string symbol, int hgncId, - CompactId entrezGeneId, CompactId ensemblId) - { - OnReverseStrand = onReverseStrand; - Symbol = symbol; - HgncId = hgncId; - EntrezGeneId = entrezGeneId; - EnsemblId = ensemblId; - Start = start; - End = end; - Chromosome = chromosome; - } - - public static IGene Read(IBufferedBinaryReader reader, IDictionary indexToChromosome) - { - ushort referenceIndex = reader.ReadOptUInt16(); - int start = reader.ReadOptInt32(); - int end = reader.ReadOptInt32(); - bool onReverseStrand = reader.ReadBoolean(); - string symbol = reader.ReadAsciiString(); - int hgncId = reader.ReadOptInt32(); - var entrezId = CompactId.Read(reader); - var ensemblId = CompactId.Read(reader); - - return new Gene(indexToChromosome[referenceIndex], start, end, onReverseStrand, symbol, hgncId, entrezId, ensemblId); - } - - public void Write(IExtendedBinaryWriter writer) - { - writer.WriteOpt(Chromosome.Index); - writer.WriteOpt(Start); - writer.WriteOpt(End); - writer.Write(OnReverseStrand); - writer.WriteOptAscii(Symbol); - writer.WriteOpt(HgncId); - // ReSharper disable ImpureMethodCallOnReadonlyValueField - EntrezGeneId.Write(writer); - EnsemblId.Write(writer); - } - } -} +using System.Collections.Generic; +using Genome; +using IO; +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.Caches.DataStructures +{ + public sealed class Gene : IGene + { + public int Start { get; } + public int End { get; } + public IChromosome Chromosome { get; } + public bool OnReverseStrand { get; } + public string Symbol { get; } + public ICompactId EntrezGeneId { get; } + public ICompactId EnsemblId { get; } + public int HgncId { get; } + + public Gene(IChromosome chromosome, int start, int end, bool onReverseStrand, string symbol, int hgncId, + CompactId entrezGeneId, CompactId ensemblId) + { + OnReverseStrand = onReverseStrand; + Symbol = symbol; + HgncId = hgncId; + EntrezGeneId = entrezGeneId; + EnsemblId = ensemblId; + Start = start; + End = end; + Chromosome = chromosome; + } + + public static IGene Read(IBufferedBinaryReader reader, IDictionary indexToChromosome) + { + ushort referenceIndex = reader.ReadOptUInt16(); + int start = reader.ReadOptInt32(); + int end = reader.ReadOptInt32(); + bool onReverseStrand = reader.ReadBoolean(); + string symbol = reader.ReadAsciiString(); + int hgncId = reader.ReadOptInt32(); + var entrezId = CompactId.Read(reader); + var ensemblId = CompactId.Read(reader); + + return new Gene(indexToChromosome[referenceIndex], start, end, onReverseStrand, symbol, hgncId, entrezId, ensemblId); + } + + public void Write(IExtendedBinaryWriter writer) + { + writer.WriteOpt(Chromosome.Index); + writer.WriteOpt(Start); + writer.WriteOpt(End); + writer.Write(OnReverseStrand); + writer.WriteOptAscii(Symbol); + writer.WriteOpt(HgncId); + // ReSharper disable ImpureMethodCallOnReadonlyValueField + EntrezGeneId.Write(writer); + EnsemblId.Write(writer); + } + } +} diff --git a/VariantAnnotation/Caches/DataStructures/IndexEntry.cs b/VariantAnnotation/Caches/DataStructures/IndexEntry.cs index 6d5f7962..8b41a8b5 100644 --- a/VariantAnnotation/Caches/DataStructures/IndexEntry.cs +++ b/VariantAnnotation/Caches/DataStructures/IndexEntry.cs @@ -1,22 +1,22 @@ -using System.IO; - -namespace VariantAnnotation.Caches.DataStructures -{ - public struct IndexEntry - { - public long FileOffset; - public int Count; - - public void Read(BinaryReader reader) - { - FileOffset = reader.ReadInt64(); - Count = reader.ReadInt32(); - } - - public void Write(BinaryWriter writer) - { - writer.Write(FileOffset); - writer.Write(Count); - } - } +using System.IO; + +namespace VariantAnnotation.Caches.DataStructures +{ + public struct IndexEntry + { + public long FileOffset; + public int Count; + + public void Read(BinaryReader reader) + { + FileOffset = reader.ReadInt64(); + Count = reader.ReadInt32(); + } + + public void Write(BinaryWriter writer) + { + writer.Write(FileOffset); + writer.Write(Count); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Caches/DataStructures/Prediction.cs b/VariantAnnotation/Caches/DataStructures/Prediction.cs index 89560d35..0efcff5c 100644 --- a/VariantAnnotation/Caches/DataStructures/Prediction.cs +++ b/VariantAnnotation/Caches/DataStructures/Prediction.cs @@ -1,97 +1,97 @@ -using System.IO; -using IO; -using VariantAnnotation.AnnotatedPositions.Transcript; - -namespace VariantAnnotation.Caches.DataStructures -{ - public sealed class Prediction - { - private readonly byte[] _data; - private readonly Entry[] _lut; - - // A X C D E F G H I X K L M N X P Q R S T X V W X Y X - private static readonly int[] AminoAcidIndices = { 0, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11, -1, 12, 13, 14, 15, 16, -1, 17, 18, -1, 19, -1 }; - - private const int NumAminoAcids = 20; - private const byte NullEntry = 0xff; - - public Prediction(byte[] data, Entry[] lut) - { - _data = data; - _lut = lut; - } - - public Entry GetPrediction(char newAminoAcid, int aaPosition) - { - // sanity check: skip stop codons - if (newAminoAcid == AminoAcids.StopCodonChar || newAminoAcid == 'X') return null; - - int index = GetIndex(newAminoAcid, aaPosition); - - // sanity check: skip instances where the data isn't long enough - if (index >= _data.Length) return null; - - byte entry = _data[index]; - return entry == NullEntry ? null : _lut[entry]; - } - - private static int GetIndex(char newAminoAcid, int aaPosition) - { - int asciiIndex = char.ToUpper(newAminoAcid) - 'A'; - - // sanity check: make sure the array index is within range - if (asciiIndex < 0 || asciiIndex >= 26) - { - throw new InvalidDataException($"Expected an array index on the interval [0, 25], but observed the following: {asciiIndex} ({newAminoAcid})"); - } - - int aaIndex = AminoAcidIndices[asciiIndex]; - - // sanity check: make sure the array index is within range - if (aaIndex == -1) - { - throw new InvalidDataException($"An invalid amino acid was given: {newAminoAcid}"); - } - - return NumAminoAcids * (aaPosition - 1) + aaIndex; - } - - public void Write(BinaryWriter writer) - { - writer.Write(_data.Length); - writer.Write(_data); - } - - public static Prediction Read(ExtendedBinaryReader reader, Entry[] lut) - { - int numBytes = reader.ReadInt32(); - var data = reader.ReadBytes(numBytes); - return new Prediction(data, lut); - } - - public sealed class Entry - { - public readonly double Score; - public readonly byte EnumIndex; - - public Entry(double score, byte enumIndex) - { - Score = score; - EnumIndex = enumIndex; - } - - public static Entry ReadEntry(ExtendedBinaryReader reader) - { - double score = reader.ReadDouble(); - byte enumIndex = reader.ReadByte(); - return new Entry(score, enumIndex); - } - - public void Write(BinaryWriter writer) - { - writer.Write(Score); - writer.Write(EnumIndex); - } - } - } +using System.IO; +using IO; +using VariantAnnotation.AnnotatedPositions.AminoAcids; + +namespace VariantAnnotation.Caches.DataStructures +{ + public sealed class Prediction + { + private readonly byte[] _data; + private readonly Entry[] _lut; + + // A X C D E F G H I X K L M N X P Q R S T X V W X Y X + private static readonly int[] AminoAcidIndices = { 0, -1, 1, 2, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11, -1, 12, 13, 14, 15, 16, -1, 17, 18, -1, 19, -1 }; + + private const int NumAminoAcids = 20; + private const byte NullEntry = 0xff; + + public Prediction(byte[] data, Entry[] lut) + { + _data = data; + _lut = lut; + } + + public Entry GetPrediction(char newAminoAcid, int aaPosition) + { + // sanity check: skip stop codons + if (newAminoAcid == AminoAcidCommon.StopCodon || newAminoAcid == 'X') return null; + + int index = GetIndex(newAminoAcid, aaPosition); + + // sanity check: skip instances where the data isn't long enough + if (index >= _data.Length) return null; + + byte entry = _data[index]; + return entry == NullEntry ? null : _lut[entry]; + } + + private static int GetIndex(char newAminoAcid, int aaPosition) + { + int asciiIndex = char.ToUpper(newAminoAcid) - 'A'; + + // sanity check: make sure the array index is within range + if (asciiIndex < 0 || asciiIndex >= 26) + { + throw new InvalidDataException($"Expected an array index on the interval [0, 25], but observed the following: {asciiIndex} ({newAminoAcid})"); + } + + int aaIndex = AminoAcidIndices[asciiIndex]; + + // sanity check: make sure the array index is within range + if (aaIndex == -1) + { + throw new InvalidDataException($"An invalid amino acid was given: {newAminoAcid}"); + } + + return NumAminoAcids * (aaPosition - 1) + aaIndex; + } + + public void Write(BinaryWriter writer) + { + writer.Write(_data.Length); + writer.Write(_data); + } + + public static Prediction Read(ExtendedBinaryReader reader, Entry[] lut) + { + int numBytes = reader.ReadInt32(); + var data = reader.ReadBytes(numBytes); + return new Prediction(data, lut); + } + + public sealed class Entry + { + public readonly double Score; + public readonly byte EnumIndex; + + public Entry(double score, byte enumIndex) + { + Score = score; + EnumIndex = enumIndex; + } + + public static Entry ReadEntry(ExtendedBinaryReader reader) + { + double score = reader.ReadDouble(); + byte enumIndex = reader.ReadByte(); + return new Entry(score, enumIndex); + } + + public void Write(BinaryWriter writer) + { + writer.Write(Score); + writer.Write(EnumIndex); + } + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Caches/DataStructures/RegulatoryRegion.cs b/VariantAnnotation/Caches/DataStructures/RegulatoryRegion.cs index 5b777f4d..e7789241 100644 --- a/VariantAnnotation/Caches/DataStructures/RegulatoryRegion.cs +++ b/VariantAnnotation/Caches/DataStructures/RegulatoryRegion.cs @@ -1,48 +1,48 @@ -using System.Collections.Generic; -using Genome; -using IO; -using VariantAnnotation.AnnotatedPositions.Transcript; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.Caches; - -namespace VariantAnnotation.Caches.DataStructures -{ - public sealed class RegulatoryRegion : IRegulatoryRegion - { - public int Start { get; } - public int End { get; } - public IChromosome Chromosome { get; } - public ICompactId Id { get; } - public RegulatoryRegionType Type { get; } - - public RegulatoryRegion(IChromosome chromosome, int start, int end, CompactId id, RegulatoryRegionType type) - { - Id = id; - Type = type; - Start = start; - End = end; - Chromosome = chromosome; - } - - public static IRegulatoryRegion Read(IBufferedBinaryReader reader, IDictionary chromosomeIndexDictionary) - { - var refIndex = reader.ReadOptUInt16(); - int start = reader.ReadOptInt32(); - int end = reader.ReadOptInt32(); - var type = (RegulatoryRegionType)reader.ReadByte(); - var id = CompactId.Read(reader); - - return new RegulatoryRegion(chromosomeIndexDictionary[refIndex], start, end, id, type); - } - - public void Write(IExtendedBinaryWriter writer) - { - writer.WriteOpt(Chromosome.Index); - writer.WriteOpt(Start); - writer.WriteOpt(End); - writer.Write((byte)Type); - // ReSharper disable once ImpureMethodCallOnReadonlyValueField - Id.Write(writer); - } - } -} +using System.Collections.Generic; +using Genome; +using IO; +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.Caches; + +namespace VariantAnnotation.Caches.DataStructures +{ + public sealed class RegulatoryRegion : IRegulatoryRegion + { + public int Start { get; } + public int End { get; } + public IChromosome Chromosome { get; } + public ICompactId Id { get; } + public RegulatoryRegionType Type { get; } + + public RegulatoryRegion(IChromosome chromosome, int start, int end, CompactId id, RegulatoryRegionType type) + { + Id = id; + Type = type; + Start = start; + End = end; + Chromosome = chromosome; + } + + public static IRegulatoryRegion Read(IBufferedBinaryReader reader, IDictionary chromosomeIndexDictionary) + { + var refIndex = reader.ReadOptUInt16(); + int start = reader.ReadOptInt32(); + int end = reader.ReadOptInt32(); + var type = (RegulatoryRegionType)reader.ReadByte(); + var id = CompactId.Read(reader); + + return new RegulatoryRegion(chromosomeIndexDictionary[refIndex], start, end, id, type); + } + + public void Write(IExtendedBinaryWriter writer) + { + writer.WriteOpt(Chromosome.Index); + writer.WriteOpt(Start); + writer.WriteOpt(End); + writer.Write((byte)Type); + // ReSharper disable once ImpureMethodCallOnReadonlyValueField + Id.Write(writer); + } + } +} diff --git a/VariantAnnotation/Caches/DataStructures/RnaEdit.cs b/VariantAnnotation/Caches/DataStructures/RnaEdit.cs index 1a5827c2..fa431c3a 100644 --- a/VariantAnnotation/Caches/DataStructures/RnaEdit.cs +++ b/VariantAnnotation/Caches/DataStructures/RnaEdit.cs @@ -1,42 +1,42 @@ -using IO; -using VariantAnnotation.Interface.AnnotatedPositions; -using Variants; - -namespace VariantAnnotation.Caches.DataStructures -{ - public sealed class RnaEdit : IRnaEdit - { - public int Start { get; } - public int End { get; } - public string Bases { get; } - public VariantType Type { get; set; } - - public RnaEdit(int start, int end, string bases) - { - Start = start; - End = end; - Bases = bases; - Type = VariantType.unknown; - } - - public static IRnaEdit Read(BufferedBinaryReader reader) - { - int start = reader.ReadOptInt32(); - int end = reader.ReadOptInt32(); - string bases = reader.ReadAsciiString(); - return new RnaEdit(start, end, bases); - } - - public void Write(IExtendedBinaryWriter writer) - { - writer.WriteOpt(Start); - writer.WriteOpt(End); - writer.WriteOptAscii(Bases); - } - - public int CompareTo(IRnaEdit other) - { - return Start.CompareTo(other.Start); - } - } -} +using IO; +using VariantAnnotation.Interface.AnnotatedPositions; +using Variants; + +namespace VariantAnnotation.Caches.DataStructures +{ + public sealed class RnaEdit : IRnaEdit + { + public int Start { get; } + public int End { get; } + public string Bases { get; } + public VariantType Type { get; set; } + + public RnaEdit(int start, int end, string bases) + { + Start = start; + End = end; + Bases = bases; + Type = VariantType.unknown; + } + + public static IRnaEdit Read(BufferedBinaryReader reader) + { + int start = reader.ReadOptInt32(); + int end = reader.ReadOptInt32(); + string bases = reader.ReadAsciiString(); + return new RnaEdit(start, end, bases); + } + + public void Write(IExtendedBinaryWriter writer) + { + writer.WriteOpt(Start); + writer.WriteOpt(End); + writer.WriteOptAscii(Bases); + } + + public int CompareTo(IRnaEdit other) + { + return Start.CompareTo(other.Start); + } + } +} diff --git a/VariantAnnotation/Caches/DataStructures/Transcript.cs b/VariantAnnotation/Caches/DataStructures/Transcript.cs index a92f3e2e..88a9831b 100644 --- a/VariantAnnotation/Caches/DataStructures/Transcript.cs +++ b/VariantAnnotation/Caches/DataStructures/Transcript.cs @@ -1,1108 +1,2093 @@ -using System;using System.Collections.Generic; -using System.IO; -using System.Linq; -using Genome; -using Intervals; -using IO; -using VariantAnnotation.AnnotatedPositions; -using VariantAnnotation.AnnotatedPositions.Transcript; -using VariantAnnotation.Caches.Utilities; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.Providers; - -namespace VariantAnnotation.Caches.DataStructures -{ - public sealed class Transcript : ITranscript - { - public IChromosome Chromosome { get; } - public int Start { get; } - public int End { get; } - public ICompactId Id { get; } - public BioType BioType { get; } - public bool IsCanonical { get; } - public Source Source { get; } - public IGene Gene { get; } - public ITranscriptRegion[] TranscriptRegions { get; } - public ushort NumExons { get; } - public int TotalExonLength { get; } - public byte StartExonPhase { get; } - public int SiftIndex { get; } - public int PolyPhenIndex { get; } - public ITranslation Translation { get; } - public IInterval[] MicroRnas { get; } - public int[] Selenocysteines { get; } - public IRnaEdit[] RnaEdits { get; } - public bool CdsStartNotFound { get; } - public bool CdsEndNotFound { get; } - public ISequence CodingSequence { get; set; } - public ISequence CdnaSequence { get; set; } - - public Transcript(IChromosome chromosome, int start, int end, ICompactId id, ITranslation translation, - BioType bioType, IGene gene, int totalExonLength, byte startExonPhase, bool isCanonical, - ITranscriptRegion[] transcriptRegions, ushort numExons, IInterval[] microRnas, int siftIndex, - int polyPhenIndex, Source source, bool cdsStartNotFound, bool cdsEndNotFound, int[] selenocysteines, - IRnaEdit[] rnaEdits) - { - Chromosome = chromosome; - Start = start; - End = end; - Id = id; - Translation = translation; - BioType = bioType; - Gene = gene; - TotalExonLength = totalExonLength; - StartExonPhase = startExonPhase; - IsCanonical = isCanonical; - TranscriptRegions = transcriptRegions; - NumExons = numExons; - MicroRnas = microRnas; - SiftIndex = siftIndex; - PolyPhenIndex = polyPhenIndex; - Source = source; - CdsStartNotFound = cdsStartNotFound; - CdsEndNotFound = cdsEndNotFound; - Selenocysteines = selenocysteines; - RnaEdits = rnaEdits; - } - - // SET-362 DEBUG: Remove the sequenceProvider argument in the future - public static ITranscript Read(BufferedBinaryReader reader, - IDictionary chromosomeIndexDictionary, IGene[] cacheGenes, - ITranscriptRegion[] cacheTranscriptRegions, IInterval[] cacheMirnas, string[] cachePeptideSeqs, ISequenceProvider sequenceProvider) - { - // transcript - var referenceIndex = reader.ReadOptUInt16(); - var start = reader.ReadOptInt32(); - var end = reader.ReadOptInt32(); - var id = CompactId.Read(reader); - - // gene - var geneIndex = reader.ReadOptInt32(); - var gene = cacheGenes[geneIndex]; - - // encoded data - var encoded = EncodedTranscriptData.Read(reader); - - // transcript regions - var transcriptRegions = encoded.HasTranscriptRegions ? ReadIndices(reader, cacheTranscriptRegions) : null; - ushort numExons = reader.ReadOptUInt16(); - - // protein function predictions - int siftIndex = encoded.HasSift ? reader.ReadOptInt32() : -1; - int polyphenIndex = encoded.HasPolyPhen ? reader.ReadOptInt32() : -1; - - // translation - var translation = encoded.HasTranslation ? DataStructures.Translation.Read(reader, cachePeptideSeqs) : null; - - // attributes - var mirnas = encoded.HasMirnas ? ReadIndices(reader, cacheMirnas) : null; - var rnaEdits = encoded.HasRnaEdits ? ReadItems(reader, RnaEdit.Read) : null; - var selenocysteines = encoded.HasSelenocysteines ? ReadItems(reader, x => x.ReadOptInt32()) : null; - - var chromosome = chromosomeIndexDictionary[referenceIndex]; - - var startExonPhase = encoded.StartExonPhase; - - //NM_022148.2 - if (id.WithVersion == "NM_022148.2" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - var transcriptId = "NM_022148.2"; - rnaEdits = new IRnaEdit[] - { - new RnaEdit(1, 0, "AATTCGGCACGAGG"), - new RnaEdit(770, 780, "AGTGAAGAAGT"), - new RnaEdit(782, 782, "T"), - new RnaEdit(785, 788, "CATT"), - new RnaEdit(792, 794, "AGC"), - new RnaEdit(795, 794, "GTGCCAGACCCGAAATCCATCTTCCCCGGGCTCTTTGAGATACACCAAGGGAACTTCCAGGAGTGGATCACAGACACCCAGAACGTGGCCCACCTCCACAAGATGGCAGGTGCAGAGCAAGAAAGTGGCCCCGAGGAGCCCCTGGTAGTCCAGTTGGCCAAGACTGAAGCCGAGTCTCCCAGGATGCTGGACCCACAGACCGAGGAGAAAGAGGCCTCTGGGGGATCCCTCCAGCTTCCCCACCAGCCCCTCCAAGGCGGTGATGTGGTCACAATCGGGGGCTTCACCTTTGTGATGAATGACCGCTCCTACGTGGCGTTGTGATGGACACACCACTGTCAAAGTCAACGTCAGGATCCACGTTGACATTTAAAGACAGAGGGGACTGTCCCGGGGACTCCACACCACCATGGATGGGAAGTCTCCACGCCAATGATGGTAGGACTAGGAGACTCTGAAGACCCAGCCTCACCGCCTAATGCGGCCACTGCCCTGCTAACTTTCCCCCACATGAGTCTCTGTGTTCAAAGGCTTGATGGCAGATGGGAGCCAATTGCTCCAGGAGATTTACTCCCAGTTCCTTTTCGTGCCTGAACGTTGTCACATAAACCCCAAGGCAGCACGTCCAAAATGCTGTAAAACCATCTTCCCACTCTGTGAGTCCCCAGTTCCGTCCATGTACCTGTTCCATAGCATTGGATTCTCGGAGGATTTTTTGTCTGTTTTGAGACTCCAAACCACCTCTACCCCTACAAAAAAAAAAAAAAAAAA"), - }; - - var codingRegion = new CodingRegion(translation.CodingRegion.Start, translation.CodingRegion.End, 17, 1132, 1116); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, - "MGRLVLLWGAAVFLLGGWMALGQGGAAEGVQIQIIYFNLETVQVTWNASKYSRTNLTFHYRFNGDEAYDQCTNYLLQEGHTSGCLLDAEQRDDILYFSIRNGTHPVFTASRWMVYYLKPSSPKHVRFSWHQDAVTVTCSDLSYGDLLYEVQYRSPFDTEWQSKQENTCNVTIEGLDAEKCYSFWVRVKAMEDVYGPDTYPSDWSEVTCWQRGEIRDACAETPTPPKPKLSKFILISSLAILLMVSLLLLSLWKLWRVKKFLIPSVPDPKSIFPGLFEIHQGNFQEWITDTQNVAHLHKMAGAEQESGPEEPLVVQLAKTEAESPRMLDPQTEEKEASGGSLQLPHQPLQGGDVVTIGGFTFVMNDRSYVAL*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, transcriptId, gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - } - - //NM_012234.6 - if (id.WithVersion == "NM_012234.6" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - var transcriptId = "NM_012234.6"; - var codingRegion = new CodingRegion(translation.CodingRegion.Start, translation.CodingRegion.End, translation.CodingRegion.CdnaStart+183, translation.CodingRegion.CdnaEnd, 687); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, - "MTMGDKKSPTRPKRQAKPAADEGFWDCSVCTFRNSAEAFKCSICDVRKGTSTRKPRINSQLVAQQVAQQYATPPPPKKEKKEKVEKQDKEKPEKDKEISPSVTKKNTNKKTKPKSDILKDPPSEANSIQSANATTKTSETNHTSRPRLKNVDRSTAQQLAVTVGNVTVIITDFKEKTRSSSTSSSTVTSSAGSEQQNQSSSGSESTDKGSSRSSTPKGDMSAVNDESF*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, transcriptId, gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - // NM_001220773.1 - if (id.WithVersion == "NM_001220773.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - var transcriptId = "NM_001220773.1"; - rnaEdits = new IRnaEdit[] - { - new RnaEdit(1, 0, - "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAA"), - new RnaEdit(5, 8, null), - new RnaEdit(5457, 5456, "AAAAAAAAAAAAAAA") - }; - - transcriptRegions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50455032, 50455035, 202, 205), - new TranscriptRegion(TranscriptRegionType.Gap, 1, 50455036, 50455039, 205, 206), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50455040, 50455168, 206, 334), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 50455169, 50459426, 334, 335), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50459427, 50459561, 335, 469), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 50459562, 50467615, 469, 470), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50467616, 50472799, 470, 5653), - }; - - var codingRegion = new CodingRegion(50455032, 50468325, 169, 1179, 1011); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, - "MDADEGQDMSQVSVGKPHKCGYCGRSYKQRSSLEEHKERCHNYLESMGLPGTLYPVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, transcriptId, gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NM_152756.3" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - var newRnaEdits = new IRnaEdit[] - { - new RnaEdit(1,0,"GG"), - new RnaEdit(3196,3196,"T") - }; - - rnaEdits = newRnaEdits; - - var oldCodingRegion = translation.CodingRegion; - var codingRegion = new CodingRegion(oldCodingRegion.Start, oldCodingRegion.End, 25, 5151, 5127); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, translation.PeptideSeq); - - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_152756.3", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NM_001242758.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(40,40,"T"), - new RnaEdit(287,287,"A"), - new RnaEdit(355,355,"A"), - new RnaEdit(366,366,"C"), - new RnaEdit(383,383,"C"), - new RnaEdit(385,385,"A"), - new RnaEdit(425,425,"A"), - new RnaEdit(469,469,"C"), - new RnaEdit(573,573,"A"), - new RnaEdit(605,605,"T"), - new RnaEdit(611,611,"C"), - new RnaEdit(622,623,"CG"), - new RnaEdit(629,629,"T"), - new RnaEdit(639,639,"G"), - new RnaEdit(643,643,"C"), - new RnaEdit(643,644,"CG"), - new RnaEdit(654,655,"CG"), - new RnaEdit(1161,1161,"T"), - new RnaEdit(1324,1324,"G"), - new RnaEdit(1380,1380,"T"), - new RnaEdit(1492,1492,"G"), - new RnaEdit(1580,1580,"T"), - new RnaEdit(1588,1589,"CG"), - }; - - TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001242758.1", gene.OnReverseStrand, - transcriptRegions, - rnaEdits, translation); - - } - - if (id.WithVersion == "NM_002447.2" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(3847,3847,"G"), - new RnaEdit(4773,4772,"AAAAAAAAAAAAA"), - }; - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_002447.2", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - } - - if (id.WithVersion == "NM_005228.3" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(2955,2955,"C"), - new RnaEdit(5601,5600,"AAAAAAAAAAAAAAAA"), - }; - - TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_005228.3", gene.OnReverseStrand, - transcriptRegions, - rnaEdits, translation); - - } - - if (id.WithVersion == "NM_005922.2" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(612,612,"A"), - new RnaEdit(5485,5484,"AAAAAAAAAAAAAAAAA"), - }; - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_005922.2", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NM_006724.2" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(612,612,"A"), - new RnaEdit(5335,5334,"AAAAAAAAAAAAAAAAA"), - }; - - TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_006724.2", gene.OnReverseStrand, - transcriptRegions, - rnaEdits, translation); - } - - if (id.WithVersion == "NM_019063.3" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(1109,1109,"G"), - new RnaEdit(1406,1406,"G"), - new RnaEdit(5550,5549,"AAAAAAAAAAAAAAAA"), - }; - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_019063.3", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NM_175741.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(220,220,"T"), - new RnaEdit(380,380,"C") - }; - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_175741.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NR_003085.2" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(1703,1703,"G"), - new RnaEdit(2832,2831,"AAAAAAAAAAAAAAA"), - }; - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NR_003085.2", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NM_001244937.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(3700,3700,"G"), - new RnaEdit(4626,4625,"AAAAAAAAAAAAA"), - }; - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001244937.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NM_001278433.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(4308, 4307, "AAAAAAAAAAAAAAAAAAAAA") - }; - - transcriptRegions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 66409764, 66409936, 1, 173), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 66409937, 66511534, 173, 174), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 66511535, 66511717, 174, 356), - new TranscriptRegion(TranscriptRegionType.Intron, 2, 66511718, 66518896, 356, 357), - new TranscriptRegion(TranscriptRegionType.Exon, 3, 66518897, 66519067, 357, 527), - new TranscriptRegion(TranscriptRegionType.Intron, 3, 66519068, 66519865, 527, 528), - new TranscriptRegion(TranscriptRegionType.Exon, 4, 66519866, 66519957, 528, 619), - new TranscriptRegion(TranscriptRegionType.Intron, 4, 66519958, 66520156, 619, 620), - new TranscriptRegion(TranscriptRegionType.Exon, 5, 66520157, 66520218, 620, 681), - new TranscriptRegion(TranscriptRegionType.Intron, 5, 66520219, 66521052, 681, 682), - new TranscriptRegion(TranscriptRegionType.Exon, 6, 66521053, 66521099, 682, 728), - new TranscriptRegion(TranscriptRegionType.Intron, 6, 66521100, 66521894, 728, 729), - new TranscriptRegion(TranscriptRegionType.Exon, 7, 66521895, 66522053, 729, 887), - new TranscriptRegion(TranscriptRegionType.Intron, 7, 66522054, 66523980, 887, 888), - new TranscriptRegion(TranscriptRegionType.Exon, 8, 66523981, 66524041, 888, 948), - new TranscriptRegion(TranscriptRegionType.Intron, 8, 66524042, 66525010, 948, 949), - new TranscriptRegion(TranscriptRegionType.Exon, 9, 66525011, 66525132, 949, 1070), - new TranscriptRegion(TranscriptRegionType.Intron, 9, 66525133, 66526060, 1070, 1071), - new TranscriptRegion(TranscriptRegionType.Exon, 10, 66526061, 66526142, 1071, 1152), - new TranscriptRegion(TranscriptRegionType.Intron, 10, 66526143, 66526417, 1152, 1153), - new TranscriptRegion(TranscriptRegionType.Exon, 11, 66526418, 66529572, 1153, 4307) - }; - - startExonPhase = 0; - - var codingRegion = new CodingRegion(66511541, 66526590, 180, 1325, 1146); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, - "MESGSTAASEEARSLRECELYVQKHNIQALLKDSIVQLCTARPERPMAFLREYFERLEKEEAKQIQNLQKAGTRTDSREDEISPPPPNPVVKGRRRRGAISAEVYTEEDAASYVRKVIPKDYKTMAALAKAIEKNVLFSHLDDNERSDIFDAMFSVSFIAGETVIQQGDEGDNFYVIDQGETDVYVNNEWATSVGEGGSFGELALIYGTPRAATVKAKTNVKLWGIDRDSYRRILMGSTLRKRKMYEEFLSKVSILESLDKWERLTVADALEPVQFEDGQKIVVQGEPGDEFFIILEGSAAVLQRRSENEEFVEVGRLGPSDYFGEIALLMNRPRAATVVARGPLKCVKLDRPRFERVLGPCSDILKRNIQQYNSFVSLSV*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001278433.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation, startExonPhase); - } - - if (id.WithVersion == "NM_001260.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(1, 0, "GGG") - }; - - transcriptRegions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 26828756, 26828906, 4, 154), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 26828907, 26911703, 154, 155), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 26911704, 26911779, 155, 230), - new TranscriptRegion(TranscriptRegionType.Intron, 2, 26911780, 26923208, 230, 231), - new TranscriptRegion(TranscriptRegionType.Exon, 3, 26923209, 26923319, 231, 341), - new TranscriptRegion(TranscriptRegionType.Intron, 3, 26923320, 26927876, 341, 342), - new TranscriptRegion(TranscriptRegionType.Exon, 4, 26927877, 26928017, 342, 482), - new TranscriptRegion(TranscriptRegionType.Intron, 4, 26928018, 26956950, 482, 483), - new TranscriptRegion(TranscriptRegionType.Exon, 5, 26956951, 26957008, 483, 540), - new TranscriptRegion(TranscriptRegionType.Intron, 5, 26957009, 26959347, 540, 541), - new TranscriptRegion(TranscriptRegionType.Exon, 6, 26959348, 26959479, 541, 672), - new TranscriptRegion(TranscriptRegionType.Intron, 6, 26959480, 26967503, 672, 673), - new TranscriptRegion(TranscriptRegionType.Exon, 7, 26967504, 26967647, 673, 816), - new TranscriptRegion(TranscriptRegionType.Intron, 7, 26967648, 26970421, 816, 817), - new TranscriptRegion(TranscriptRegionType.Exon, 8, 26970422, 26970491, 817, 886), - new TranscriptRegion(TranscriptRegionType.Intron, 8, 26970492, 26971289, 886, 887), - new TranscriptRegion(TranscriptRegionType.Exon, 9, 26971290, 26971362, 887, 959), - new TranscriptRegion(TranscriptRegionType.Intron, 9, 26971363, 26974589, 959, 960), - new TranscriptRegion(TranscriptRegionType.Exon, 10, 26974590, 26974687, 960, 1057), - new TranscriptRegion(TranscriptRegionType.Intron, 10, 26974688, 26975405, 1057, 1058), - new TranscriptRegion(TranscriptRegionType.Exon, 11, 26975406, 26975484, 1058, 1136), - new TranscriptRegion(TranscriptRegionType.Intron, 11, 26975485, 26975602, 1136, 1137), - new TranscriptRegion(TranscriptRegionType.Exon, 12, 26975603, 26975761, 1137, 1295), - new TranscriptRegion(TranscriptRegionType.Intron, 12, 26975762, 26978092, 1295, 1296), - new TranscriptRegion(TranscriptRegionType.Exon, 13, 26978093, 26978569, 1296, 1772) - }; - - var codingRegion = new CodingRegion(26828779, 26978218, 27, 1421, 1395); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, - "MDYDFKVKLSSERERVEDLFEYEGCKVGRGTYGHVYKAKRKDGKDDKDYALKQIEGTGISMSACREIALLRELKHPNVISLQKVFLSHADRKVWLLFDYAEHDLWHIIKFHRASKANKKPVQLPRGMVKSLLYQILDGIHYLHANWVLHRDLKPANILVMGEGPERGRVKIADMGFARLFNSPLKPLADLDPVVVTFWYRAPELLLGARHYTKAIDIWAIGCIFAELLTSEPIFHCRQEDIKTSNPYHHDQLDRIFNVMGFPADKDWEDIKKMPEHSTLMKDFRRNTYTNCSLIKYMEKHKVKPDSKAFHLLQKLLTMDPIKRITSEQAMQDPYFLEDPLPTSDVFAGCQIPYPKREFLTEEEPDDKGDKKNQQQQQGNNHTNGTGHPGNQDSSHTQGPPLKKVRVVPPTTTSGGLIMTSDYQRSNPHAAYPNPGPSTSQPQSSMGYSATSQQPPQYSHQTHRY*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001260.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation, startExonPhase); - - } - - if (id.WithVersion == "NM_000314.4" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - var newRegions = new List(); - - var oldExon = transcriptRegions[0]; - var exon1a = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start, 89623860, - oldExon.CdnaStart, 666); - var gap1 = new TranscriptRegion(TranscriptRegionType.Gap, 1, 89623861, 89623861, 666, 667); - var exon1b = new TranscriptRegion(TranscriptRegionType.Exon, 1, 89623862, oldExon.End, 667, - oldExon.CdnaEnd - 1); - - newRegions.Add(exon1a); - newRegions.Add(gap1); - newRegions.Add(exon1b); - - for (int i = 1; i < transcriptRegions.Length; i++) - { - var region = transcriptRegions[i]; - newRegions.Add(new TranscriptRegion(region.Type, region.Id, region.Start, region.End, - region.CdnaStart - 1, region.CdnaEnd - 1)); - } - - transcriptRegions = newRegions.ToArray(); - - rnaEdits = new IRnaEdit[3]; - rnaEdits[0] = new RnaEdit(667, 667, null); - rnaEdits[1] = new RnaEdit(707, 707, "C"); - rnaEdits[2] = new RnaEdit(5548, 5547, "AAAAAAAAAAAAAAAAAAAAAAAAAA"); - - var oldCodingRegion = translation.CodingRegion; - var codingRegion = new CodingRegion(oldCodingRegion.Start, oldCodingRegion.End, 1032, 2243, 1212); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, translation.PeptideSeq); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_000314.4", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation, startExonPhase); - - } - - if (id.WithVersion == "NM_000535.5" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[2]; - rnaEdits[0] = new RnaEdit(1708, 1708, "G"); - rnaEdits[1] = new RnaEdit(2837, 2836, "AAAAAAAAAAAAAAA"); - - var oldCodingRegion = translation.CodingRegion; - var codingRegion = new CodingRegion(oldCodingRegion.Start, oldCodingRegion.End, 88, 2676, 2589); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, translation.PeptideSeq); - - TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_000535.5", gene.OnReverseStrand, - transcriptRegions, - rnaEdits, translation); - - } - - if (id.WithVersion == "NM_000545.5" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[2]; - rnaEdits[0] = new RnaEdit(1743, 1743, "G"); - rnaEdits[1] = new RnaEdit(3240, 3239, "AA"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_000545.5", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NM_001145076.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[3]; - rnaEdits[0] = new RnaEdit(935, 935, "G"); - rnaEdits[1] = new RnaEdit(1232, 1232, "G"); - rnaEdits[2] = new RnaEdit(5376, 5375, "AAAAAAAAAAAAAAAA"); - - TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001145076.1", gene.OnReverseStrand, - transcriptRegions, - rnaEdits, translation); - - } - - if (id.WithVersion == "NM_001220765.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[5]; - rnaEdits[0] = new RnaEdit(1, 0, "GAATTCCGGCGT"); - rnaEdits[1] = new RnaEdit(6, 5, "A"); - rnaEdits[2] = new RnaEdit(16, 16, "T"); - rnaEdits[3] = new RnaEdit(97, 97, "C"); - rnaEdits[4] = new RnaEdit(316, 315, "CCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"); - - var newRegions = new List(); - ITranscriptRegion oldExon = transcriptRegions[0]; - - var exon1a = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start, oldExon.Start + 5, 13, - 17); - var exon1b = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start + 6, oldExon.End, 19, - oldExon.CdnaEnd + 13); - - newRegions.Add(exon1a); - newRegions.Add(exon1b); - - for (int i = 1; i < transcriptRegions.Length; i++) - { - var region = transcriptRegions[i]; - newRegions.Add(new TranscriptRegion(region.Type, region.Id, region.Start, region.End, - region.CdnaStart + 13, region.CdnaEnd + 13)); - } - - transcriptRegions = newRegions.ToArray(); - - // in genomic coordinates we only have enough information until 50367353, but because of RNA-edits, our cDNA end is now at 1602 - var oldCodingRegion = translation.CodingRegion; - var codingRegion = new CodingRegion(oldCodingRegion.Start, 50367353, 169, 1602, 1434); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVASNVKVETQSDEENGRACEMNGEECAEDLRMLDASGEKMNGSHRDQGSSALSGVGGIRLPNGKLKCDICGIICIGPNVLMVHKRSHTGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001220765.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - - } - - if (id.WithVersion == "NM_001220766.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[6]; - rnaEdits[0] = new RnaEdit(1, 0, "GAATTCCGGCGT"); - rnaEdits[1] = new RnaEdit(6, 5, "A"); - rnaEdits[2] = new RnaEdit(16, 16, "T"); - rnaEdits[3] = new RnaEdit(97, 97, "C"); - rnaEdits[4] = new RnaEdit(317, 318, null); - rnaEdits[5] = new RnaEdit(321, 320, - "AACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTTGGTAAACCTCACAAATGTGGATATTGTGGCCGAAGCTATAAACAGCGAAGCTCTTTAGAGGAACATAAAGAGCGCTGCCACAACTACTTGGAAAGCATGGGCCTTCCGGGCACACTGTACCCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"); - - var newRegions = new List(); - ITranscriptRegion oldExon = transcriptRegions[0]; - - var exon1a = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start, oldExon.Start + 5, 13, - 17); - var exon1b = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start + 6, oldExon.End, 19, - oldExon.CdnaEnd + 13); - - newRegions.Add(exon1a); - newRegions.Add(exon1b); - ITranscriptRegion region; - - for (int i = 1; i < transcriptRegions.Length - 1; i++) - { - region = transcriptRegions[i]; - newRegions.Add(new TranscriptRegion(region.Type, region.Id, region.Start, region.End, - region.CdnaStart + 13, region.CdnaEnd + 13)); - } - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367354, - 209, 329)); - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Gap, 3, 50367355, 50367356, - 329, 330)); - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367357, 50367358, - 330, 331)); - - transcriptRegions = newRegions.ToArray(); - - // in genomic coordinates we only have enough information until 50367358, but because of RNA-edits, our cDNA end is now at 1467 - var oldCodingRegion = translation.CodingRegion; - var codingRegion = new CodingRegion(oldCodingRegion.Start, 50367358, 169, 1467, 1299); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVGKPHKCGYCGRSYKQRSSLEEHKERCHNYLESMGLPGTLYPVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001220766.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - - } - - if (id.WithVersion == "NM_001220767.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[6]; - rnaEdits[0] = new RnaEdit(1, 0, "GAATTCCGGCGT"); - rnaEdits[1] = new RnaEdit(6, 5, "A"); - rnaEdits[2] = new RnaEdit(16, 16, "T"); - rnaEdits[3] = new RnaEdit(97, 97, "C"); - rnaEdits[4] = new RnaEdit(317, 318, null); - rnaEdits[5] = new RnaEdit(321, 320, "AACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"); - - var newRegions = new List(); - ITranscriptRegion oldExon = transcriptRegions[0]; - - var exon1a = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start, oldExon.Start + 5, 13, - 17); - var exon1b = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start + 6, oldExon.End, 19, - oldExon.CdnaEnd + 13); - - newRegions.Add(exon1a); - newRegions.Add(exon1b); - ITranscriptRegion region; - - for (int i = 1; i < transcriptRegions.Length - 1; i++) - { - region = transcriptRegions[i]; - newRegions.Add(new TranscriptRegion(region.Type, region.Id, region.Start, region.End, - region.CdnaStart + 13, region.CdnaEnd + 13)); - } - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367354, - 209, 329)); - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Gap, 3, 50367355, 50367356, - 329, 330)); - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367357, 50367358, - 330, 331)); - - transcriptRegions = newRegions.ToArray(); - - // in genomic coordinates we only have enough information until 50367358, but because of RNA-edits, our cDNA end is now at 1467 - var oldCodingRegion = translation.CodingRegion; - var codingRegion = new CodingRegion(oldCodingRegion.Start, 50367358, 169, 1311, 1143); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001220767.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NM_001220769.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[6]; - rnaEdits[0] = new RnaEdit(1, 0, "GAATTCCGGCGT"); - rnaEdits[1] = new RnaEdit(6, 5, "A"); - rnaEdits[2] = new RnaEdit(16, 16, "T"); - rnaEdits[3] = new RnaEdit(97, 97, "C"); - rnaEdits[4] = new RnaEdit(317, 318, null); - rnaEdits[5] = new RnaEdit(321, 320, "AACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"); - - var newRegions = new List(); - ITranscriptRegion oldExon = transcriptRegions[0]; - - var exon1a = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start, oldExon.Start + 5, 13, - 17); - var exon1b = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start + 6, oldExon.End, 19, - oldExon.CdnaEnd + 13); - - newRegions.Add(exon1a); - newRegions.Add(exon1b); - ITranscriptRegion region; - - for (int i = 1; i < transcriptRegions.Length - 1; i++) - { - region = transcriptRegions[i]; - newRegions.Add(new TranscriptRegion(region.Type, region.Id, region.Start, region.End, - region.CdnaStart + 13, region.CdnaEnd + 13)); - } - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367354, - 209, 329)); - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Gap, 3, 50367355, 50367356, - 329, 330)); - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367357, 50367358, - 330, 331)); - - transcriptRegions = newRegions.ToArray(); - - // in genomic coordinates we only have enough information until 50367358, but because of RNA-edits, our cDNA end is now at 1341 - var oldCodingRegion = translation.CodingRegion; - var codingRegion = new CodingRegion(oldCodingRegion.Start, 50367358, 169, 1341, 1173); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001220769.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NM_001220770.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[6]; - rnaEdits[0] = new RnaEdit(1, 0, "GAATTCCGGCGT"); - rnaEdits[1] = new RnaEdit(6, 5, "A"); - rnaEdits[2] = new RnaEdit(16, 16, "T"); - rnaEdits[3] = new RnaEdit(97, 97, "C"); - rnaEdits[4] = new RnaEdit(317, 318, null); - rnaEdits[5] = new RnaEdit(321, 320, - "AACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"); - - var newRegions = new List(); - ITranscriptRegion oldExon = transcriptRegions[0]; - - var exon1a = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start, oldExon.Start + 5, 13, - 17); - var exon1b = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start + 6, oldExon.End, 19, - oldExon.CdnaEnd + 13); - - newRegions.Add(exon1a); - newRegions.Add(exon1b); - ITranscriptRegion region; - - for (int i = 1; i < transcriptRegions.Length - 1; i++) - { - region = transcriptRegions[i]; - newRegions.Add(new TranscriptRegion(region.Type, region.Id, region.Start, region.End, - region.CdnaStart + 13, region.CdnaEnd + 13)); - } - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367354, - 209, 329)); - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Gap, 3, 50367355, 50367356, - 329, 330)); - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367357, 50367358, - 330, 331)); - - transcriptRegions = newRegions.ToArray(); - - // in genomic coordinates we only have enough information until 50367358, but because of RNA-edits, our cDNA end is now at 1341 - var oldCodingRegion = translation.CodingRegion; - var codingRegion = new CodingRegion(oldCodingRegion.Start, 50367358, 169, 1311, 1143); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, - "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001220770.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NM_001220768.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[6]; - rnaEdits[0] = new RnaEdit(1, 0, "GAATTCCGGCGT"); - rnaEdits[1] = new RnaEdit(6, 5, "A"); - rnaEdits[2] = new RnaEdit(16, 16, "T"); - rnaEdits[3] = new RnaEdit(97, 97, "C"); - rnaEdits[4] = new RnaEdit(316, 315, "CCA"); - rnaEdits[5] = new RnaEdit(320, 319, "TGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"); - - var newRegions = new List(); - ITranscriptRegion oldExon = transcriptRegions[0]; - - var exon1a = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start, oldExon.Start + 5, 13, - 17); - var exon1b = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start + 6, oldExon.End, 19, - oldExon.CdnaEnd + 13); - - newRegions.Add(exon1a); - newRegions.Add(exon1b); - ITranscriptRegion region; - - for (int i = 1; i < transcriptRegions.Length - 1; i++) - { - region = transcriptRegions[i]; - newRegions.Add(new TranscriptRegion(region.Type, region.Id, region.Start, region.End, - region.CdnaStart + 13, region.CdnaEnd + 13)); - } - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367353, - 209, 329)); - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367354, 50367357, - 332, 335)); - - transcriptRegions = newRegions.ToArray(); - - // in genomic coordinates we only have enough information until 50367357, but because of RNA-edits, our cDNA end is now at 1467 - var oldCodingRegion = translation.CodingRegion; - var codingRegion = new CodingRegion(oldCodingRegion.Start, 50367357, 169, 1467, 1299); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, - "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVASNVKVETQSDEENGRACEMNGEECAEDLRMLDASGEKMNGSHRDQGSSALSGVGGIRLPNGKLKCDICGIICIGPNVLMVHKRSHTGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001220768.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - - } - - if (id.WithVersion == "NM_006060.4" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[6]; - rnaEdits[0] = new RnaEdit(1, 0, "GAATTCCGGCGT"); - rnaEdits[1] = new RnaEdit(6, 5, "A"); - rnaEdits[2] = new RnaEdit(16, 16, "T"); - rnaEdits[3] = new RnaEdit(97, 97, "C"); - rnaEdits[4] = new RnaEdit(316, 315, "CCA"); - rnaEdits[5] = new RnaEdit(320, 319, "TGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTTGGTAAACCTCACAAATGTGGATATTGTGGCCGAAGCTATAAACAGCGAAGCTCTTTAGAGGAACATAAAGAGCGCTGCCACAACTACTTGGAAAGCATGGGCCTTCCGGGCACACTGTACCCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"); - - var newRegions = new List(); - ITranscriptRegion oldExon = transcriptRegions[0]; - - var exon1a = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start, oldExon.Start + 5, 13, - 17); - var exon1b = new TranscriptRegion(TranscriptRegionType.Exon, 1, oldExon.Start + 6, oldExon.End, 19, - oldExon.CdnaEnd + 13); - - newRegions.Add(exon1a); - newRegions.Add(exon1b); - ITranscriptRegion region; - - for (int i = 1; i < transcriptRegions.Length - 1; i++) - { - region = transcriptRegions[i]; - newRegions.Add(new TranscriptRegion(region.Type, region.Id, region.Start, region.End, - region.CdnaStart + 13, region.CdnaEnd + 13)); - } - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367353, - 209, 329)); - - newRegions.Add(new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367354, 50367357, - 332, 335)); - - transcriptRegions = newRegions.ToArray(); - - // in genomic coordinates we only have enough information until 50367357, but because of RNA-edits, our cDNA end is now at 1728 - var oldCodingRegion = translation.CodingRegion; - var codingRegion = new CodingRegion(oldCodingRegion.Start, 50367357, 169, 1728, 1560); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, - "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVASNVKVETQSDEENGRACEMNGEECAEDLRMLDASGEKMNGSHRDQGSSALSGVGGIRLPNGKLKCDICGIICIGPNVLMVHKRSHTGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVGKPHKCGYCGRSYKQRSSLEEHKERCHNYLESMGLPGTLYPVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_006060.4", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NM_001220775.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(1, 0, - "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAG"), - new RnaEdit(4, 3, "C"), - new RnaEdit(5325, 5324, "AAAAAAAAAAAAAAA") - }; - - transcriptRegions = new ITranscriptRegion[] - { - // insertion - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50459422, 50459424, 204, 206), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50459425, 50459561, 208, 343), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 50459562, 50467615, 343, 344), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 50467616, 50472799, 344, 5527) - }; - - startExonPhase = 0; - - var codingRegion = new CodingRegion(50459422, 50468325, 169, 1053, 885); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, - "MDADEGQDMSQVSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001220775.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NM_001220774.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(1, 0, - "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAA"), - new RnaEdit(5, 8, null), - new RnaEdit(5427, 5426, "AAAAAAAAAAAAAAA") - }; - - transcriptRegions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50455032, 50455035, 202, 205), - new TranscriptRegion(TranscriptRegionType.Gap, 1, 50455036, 50455039, 205, 206), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50455040, 50455168, 206, 334), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 50455169, 50459426, 334, 335), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 50459427, 50459531, 335, 439), - new TranscriptRegion(TranscriptRegionType.Intron, 2, 50459532, 50467615, 439, 440), - new TranscriptRegion(TranscriptRegionType.Exon, 3, 50467616, 50472799, 440, 5623) - }; - - var codingRegion = new CodingRegion(50455032, 50468325, 169, 1149, 981); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, - "MDADEGQDMSQVSVGKPHKCGYCGRSYKQRSSLEEHKERCHNYLESMGLPGTLYPVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001220774.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NM_001220776.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(1, 0, - "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAG"), - new RnaEdit(4, 3, "C"), - new RnaEdit(5295, 5294, "AAAAAAAAAAAAAAA") - }; - - transcriptRegions = new ITranscriptRegion[] - { - // insertion - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50459422, 50459424, 204, 206), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50459425, 50459531, 208, 313), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 50459532, 50467615, 313, 314), - new TranscriptRegion(TranscriptRegionType.Exon, 2, 50467616, 50472799, 314, 5497), - }; - - startExonPhase = 0; - - var codingRegion = new CodingRegion(50459422, 50468325, 169, 1023, 855); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, - "MDADEGQDMSQVSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001220776.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - if (id.WithVersion == "NM_001220772.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(1, 0, - "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTT"), - new RnaEdit(5188, 5187, "AAAAAAAAAAAAAAA") - }; - - transcriptRegions = new ITranscriptRegion[] - { - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50467613, 50472799, 206, 5392), - }; - - startExonPhase = 0; - - var codingRegion = new CodingRegion(50467613, 50468325, 169, 918, 750); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, - "MDADEGQDMSQVSGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001220772.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - // - } - - if (id.WithVersion == "NM_001220771.1" && sequenceProvider.Assembly != GenomeAssembly.GRCh38) - { - rnaEdits = new IRnaEdit[] - { - new RnaEdit(1, 0, "GAATTCCGGCGT"), - new RnaEdit(6, 5, "A"), - new RnaEdit(16, 16, "T"), - new RnaEdit(97, 97, "C"), - new RnaEdit(316, 315, "CCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA") - }; - - transcriptRegions = new ITranscriptRegion[] - { - // insertion - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344378, 50344382, 13, 17), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344383, 50344518, 19, 154), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 50344519, 50358643, 154, 155), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50358644, 50358697, 155, 208), - new TranscriptRegion(TranscriptRegionType.Intron, 1, 50358698, 50367233, 208, 209), - new TranscriptRegion(TranscriptRegionType.Exon, 1, 50367234, 50367353, 209, 328), - }; - - startExonPhase = 0; - - var codingRegion = new CodingRegion(50358658, 50367353, 169, 1299, 1131); - translation = new Translation(codingRegion, (CompactId) translation.ProteinId, - "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVASNVKVETQSDEENGRACEMNGEECAEDLRMLDASGEKMNGSHRDQGSSALSGVGGIRLPNGKLKCDICGIICIGPNVLMVHKRSHTGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); - - // TranscriptValidator.Validate(sequenceProvider, chromosome, "NM_001220771.1", gene.OnReverseStrand, - // transcriptRegions, - // rnaEdits, translation); - - } - - return new Transcript(chromosomeIndexDictionary[referenceIndex], start, end, id, translation, - encoded.BioType, gene, ExonUtilities.GetTotalExonLength(transcriptRegions), startExonPhase, - encoded.IsCanonical, transcriptRegions, numExons, mirnas, siftIndex, polyphenIndex, - encoded.TranscriptSource, encoded.CdsStartNotFound, encoded.CdsEndNotFound, selenocysteines, rnaEdits); - } - - /// - /// writes the transcript to the binary writer - /// - public void Write(IExtendedBinaryWriter writer, Dictionary geneIndices, - Dictionary transcriptRegionIndices, Dictionary microRnaIndices, - Dictionary peptideIndices) - { - // transcript - writer.WriteOpt(Chromosome.Index); - writer.WriteOpt(Start); - writer.WriteOpt(End); - // ReSharper disable once ImpureMethodCallOnReadonlyValueField - Id.Write(writer); - - // gene - writer.WriteOpt(GetIndex(Gene, geneIndices)); - - // encoded data - var encoded = EncodedTranscriptData.GetEncodedTranscriptData(BioType, CdsStartNotFound, CdsEndNotFound, - Source, IsCanonical, SiftIndex != -1, PolyPhenIndex != -1, MicroRnas != null, RnaEdits != null, - Selenocysteines != null, TranscriptRegions != null, Translation != null, StartExonPhase); - encoded.Write(writer); - - // transcript regions - if (encoded.HasTranscriptRegions) WriteIndices(writer, TranscriptRegions, transcriptRegionIndices); - writer.WriteOpt(NumExons); - - // protein function predictions - if (encoded.HasSift) writer.WriteOpt(SiftIndex); - if (encoded.HasPolyPhen) writer.WriteOpt(PolyPhenIndex); - - // translation - if (encoded.HasTranslation) - { - // ReSharper disable once PossibleNullReferenceException - var peptideIndex = GetIndex(Translation.PeptideSeq, peptideIndices); - Translation.Write(writer, peptideIndex); - } - - // attributes - if (encoded.HasMirnas) WriteIndices(writer, MicroRnas, microRnaIndices); - if (encoded.HasRnaEdits) WriteItems(writer, RnaEdits, (x, y) => x.Write(y)); - if (encoded.HasSelenocysteines) WriteItems(writer, Selenocysteines, (x, y) => y.WriteOpt(x)); - } - - private static T[] ReadItems(BufferedBinaryReader reader, Func readFunc) - { - int numItems = reader.ReadOptInt32(); - var items = new T[numItems]; - for (int i = 0; i < numItems; i++) items[i] = readFunc(reader); - return items; - } - - private static void WriteItems(IExtendedBinaryWriter writer, T[] items, Action writeAction) - { - writer.WriteOpt(items.Length); - foreach (var item in items) writeAction(item, writer); - } - - private static T[] ReadIndices(IBufferedBinaryReader reader, T[] cachedItems) - { - int numItems = reader.ReadOptInt32(); - var items = new T[numItems]; - - for (int i = 0; i < numItems; i++) - { - var index = reader.ReadOptInt32(); - items[i] = cachedItems[index]; - } - - return items; - } - - private static void WriteIndices(IExtendedBinaryWriter writer, T[] items, IReadOnlyDictionary indices) - { - writer.WriteOpt(items.Length); - foreach (var item in items) writer.WriteOpt(GetIndex(item, indices)); - } - - private static int GetIndex(T item, IReadOnlyDictionary indices) - { - if (item == null) return -1; - - if (!indices.TryGetValue(item, out var index)) - { - throw new InvalidDataException($"Unable to locate the {typeof(T)} in the indices: {item}"); - } - - return index; - } - } +using System; +using System.Collections.Generic; +using System.IO; +using Genome; +using Intervals; +using IO; +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Caches.Utilities; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.Providers; + +namespace VariantAnnotation.Caches.DataStructures +{ + public sealed class Transcript : ITranscript + { + public IChromosome Chromosome { get; } + public int Start { get; } + public int End { get; } + public ICompactId Id { get; } + public BioType BioType { get; } + public bool IsCanonical { get; } + public Source Source { get; } + public IGene Gene { get; } + public ITranscriptRegion[] TranscriptRegions { get; } + public ushort NumExons { get; } + public int TotalExonLength { get; } + public byte StartExonPhase { get; } + public int SiftIndex { get; } + public int PolyPhenIndex { get; } + public ITranslation Translation { get; } + public IInterval[] MicroRnas { get; } + public IRnaEdit[] RnaEdits { get; } + public AminoAcidEdit[] AminoAcidEdits { get; set; } + public bool CdsStartNotFound { get; } + public bool CdsEndNotFound { get; } + public ISequence CodingSequence { get; set; } + public ISequence CdnaSequence { get; set; } + + public Transcript(IChromosome chromosome, int start, int end, ICompactId id, ITranslation translation, + BioType bioType, IGene gene, int totalExonLength, byte startExonPhase, bool isCanonical, + ITranscriptRegion[] transcriptRegions, ushort numExons, IInterval[] microRnas, int siftIndex, + int polyPhenIndex, Source source, bool cdsStartNotFound, bool cdsEndNotFound, + IRnaEdit[] rnaEdits) + { + Chromosome = chromosome; + Start = start; + End = end; + Id = id; + Translation = translation; + BioType = bioType; + Gene = gene; + TotalExonLength = totalExonLength; + StartExonPhase = startExonPhase; + IsCanonical = isCanonical; + TranscriptRegions = transcriptRegions; + NumExons = numExons; + MicroRnas = microRnas; + SiftIndex = siftIndex; + PolyPhenIndex = polyPhenIndex; + Source = source; + CdsStartNotFound = cdsStartNotFound; + CdsEndNotFound = cdsEndNotFound; + RnaEdits = rnaEdits; + } + + // SET-362 DEBUG: Remove the sequenceProvider argument in the future + public static ITranscript Read(BufferedBinaryReader reader, + IDictionary chromosomeIndexDictionary, IGene[] cacheGenes, + ITranscriptRegion[] cacheTranscriptRegions, IInterval[] cacheMirnas, string[] cachePeptideSeqs, + ISequenceProvider sequenceProvider) + { + // transcript + ushort referenceIndex = reader.ReadOptUInt16(); + int start = reader.ReadOptInt32(); + int end = reader.ReadOptInt32(); + var id = CompactId.Read(reader); + + // gene + int geneIndex = reader.ReadOptInt32(); + var gene = cacheGenes[geneIndex]; + + // encoded data + var encoded = EncodedTranscriptData.Read(reader); + + // transcript regions + ITranscriptRegion[] transcriptRegions = + encoded.HasTranscriptRegions ? ReadIndices(reader, cacheTranscriptRegions) : null; + ushort numExons = reader.ReadOptUInt16(); + + // protein function predictions + int siftIndex = encoded.HasSift ? reader.ReadOptInt32() : -1; + int polyphenIndex = encoded.HasPolyPhen ? reader.ReadOptInt32() : -1; + + // translation + var translation = encoded.HasTranslation ? DataStructures.Translation.Read(reader, cachePeptideSeqs) : null; + + // attributes + IInterval[] mirnas = encoded.HasMirnas ? ReadIndices(reader, cacheMirnas) : null; + IRnaEdit[] rnaEdits = encoded.HasRnaEdits ? ReadItems(reader, RnaEdit.Read) : null; + int[] selenocysteines = encoded.HasSelenocysteines ? ReadItems(reader, x => x.ReadOptInt32()) : null; + + var chromosome = chromosomeIndexDictionary[referenceIndex]; + + byte startExonPhase = encoded.StartExonPhase; + string transcriptId = id.WithVersion; + + if (sequenceProvider.Assembly == GenomeAssembly.GRCh37) + { + bool updatedGeneModel = false; + + if (transcriptId == "NM_022148.2") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, "AATTCGGCACGAGG"), + new RnaEdit(770, 769, "A"), + new RnaEdit(772, 772, "G"), + new RnaEdit(774, 774, "A"), + new RnaEdit(777, 777, "A"), + new RnaEdit(779, 779, "T"), + new RnaEdit(780, 779, "TT"), + new RnaEdit(783, 783, "A"), + new RnaEdit(785, 785, "T"), + new RnaEdit(788, 790, "CAG"), + new RnaEdit(795, 794, + "CCAGACCCGAAATCCATCTTCCCCGGGCTCTTTGAGATACACCAAGGGAACTTCCAGGAGTGGATCACAGACACCCAGAACGTGGCCCACCTCCACAAGATGGCAGGTGCAGAGCAAGAAAGTGGCCCCGAGGAGCCCCTGGTAGTCCAGTTGGCCAAGACTGAAGCCGAGTCTCCCAGGATGCTGGACCCACAGACCGAGGAGAAAGAGGCCTCTGGGGGATCCCTCCAGCTTCCCCACCAGCCCCTCCAAGGCGGTGATGTGGTCACAATCGGGGGCTTCACCTTTGTGATGAATGACCGCTCCTACGTGGCGTTGTGATGGACACACCACTGTCAAAGTCAACGTCAGGATCCACGTTGACATTTAAAGACAGAGGGGACTGTCCCGGGGACTCCACACCACCATGGATGGGAAGTCTCCACGCCAATGATGGTAGGACTAGGAGACTCTGAAGACCCAGCCTCACCGCCTAATGCGGCCACTGCCCTGCTAACTTTCCCCCACATGAGTCTCTGTGTTCAAAGGCTTGATGGCAGATGGGAGCCAATTGCTCCAGGAGATTTACTCCCAGTTCCTTTTCGTGCCTGAACGTTGTCACATAAACCCCAAGGCAGCACGTCCAAAATGCTGTAAAACCATCTTCCCACTCTGTGAGTCCCCAGTTCCGTCCATGTACCTGTTCCATAGCATTGGATTCTCGGAGGATTTTTTGTCTGTTTTGAGACTCCAAACCACCTCTACCCCTACAAAAAAAAAAAAAAAAAA") + }; + + if (chromosome.UcscName == "chrX") + { + // we have two RNA-edit insertions in exon 6 - so it's split into three intervals + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 6, 1314869, 1314883, 797, 811), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 1314884, 1314893, 785, 794), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 1314894, 1315014, 663, 783), + new TranscriptRegion(TranscriptRegionType.Intron, 5, 1315015, 1317418, 662, 663), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1317419, 1317581, 500, 662), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 1317582, 1321271, 499, 500), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 1321272, 1321405, 366, 499), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 1321406, 1325325, 365, 366), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 1325326, 1325492, 199, 365), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 1325493, 1327698, 198, 199), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 1327699, 1327801, 96, 198), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 1327802, 1331448, 95, 96), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 1331449, 1331529, 15, 95) + }; + + // covers 17-811, 812-1132 are covered by RNA-edit + var codingRegion = new CodingRegion(1314869, 1331527, 17, 1132, 1116); + + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MGRLVLLWGAAVFLLGGWMALGQGGAAEGVQIQIIYFNLETVQVTWNASKYSRTNLTFHYRFNGDEAYDQCTNYLLQEGHTSGCLLDAEQRDDILYFSIRNGTHPVFTASRWMVYYLKPSSPKHVRFSWHQDAVTVTCSDLSYGDLLYEVQYRSPFDTEWQSKQENTCNVTIEGLDAEKCYSFWVRVKAMEDVYGPDTYPSDWSEVTCWQRGEIRDACAETPTPPKPKLSKFILISSLAILLMVSLLLLSLWKLWRVKKFLIPSVPDPKSIFPGLFEIHQGNFQEWITDTQNVAHLHKMAGAEQESGPEEPLVVQLAKTEAESPRMLDPQTEEKEASGGSLQLPHQPLQGGDVVTIGGFTFVMNDRSYVAL*"); + + updatedGeneModel = true; + } + + if (chromosome.UcscName == "chrY") + { + // we have two RNA-edit insertions in exon 6 - so it's split into three intervals + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 6, 1264869, 1264883, 797, 811), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 1264884, 1264893, 785, 794), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 1264894, 1265014, 663, 783), + new TranscriptRegion(TranscriptRegionType.Intron, 5, 1265015, 1267418, 662, 663), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1267419, 1267581, 500, 662), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 1267582, 1271271, 499, 500), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 1271272, 1271405, 366, 499), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 1271406, 1275325, 365, 366), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 1275326, 1275492, 199, 365), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 1275493, 1277698, 198, 199), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 1277699, 1277801, 96, 198), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 1277802, 1281448, 95, 96), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 1281449, 1281529, 15, 95) + }; + + // covers 17-811, 812-1132 are covered by RNA-edit + var codingRegion = new CodingRegion(1264869, 1281527, 17, 1132, 1116); + + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MGRLVLLWGAAVFLLGGWMALGQGGAAEGVQIQIIYFNLETVQVTWNASKYSRTNLTFHYRFNGDEAYDQCTNYLLQEGHTSGCLLDAEQRDDILYFSIRNGTHPVFTASRWMVYYLKPSSPKHVRFSWHQDAVTVTCSDLSYGDLLYEVQYRSPFDTEWQSKQENTCNVTIEGLDAEKCYSFWVRVKAMEDVYGPDTYPSDWSEVTCWQRGEIRDACAETPTPPKPKLSKFILISSLAILLMVSLLLLSLWKLWRVKKFLIPSVPDPKSIFPGLFEIHQGNFQEWITDTQNVAHLHKMAGAEQESGPEEPLVVQLAKTEAESPRMLDPQTEEKEASGGSLQLPHQPLQGGDVVTIGGFTFVMNDRSYVAL*"); + + updatedGeneModel = true; + } + } + + if (transcriptId == "NM_001012288.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(665, 664, "A"), + new RnaEdit(667, 667, "G"), + new RnaEdit(672, 672, "A"), + new RnaEdit(674, 674, "T"), + new RnaEdit(675, 674, "TT"), + new RnaEdit(678, 678, "A"), + new RnaEdit(680, 680, "T"), + new RnaEdit(683, 685, "CAG"), + new RnaEdit(690, 689, + "CCAGACCCGAAATCCATCTTCCCCGGGCTCTTTGAGATACACCAAGGGAACTTCCAGGAGTGGATCACAGACACCCAGAACGTGGCCCACCTCCACAAGATGGCAGGTGCAGAGCAAGGAAGTGGCCCTGAGGAGCCCCTGGTGGTCCAGTTGGCCAAGACTGAAGCCGAGTCCCCCAGGATGCTGGACCCACAGACCGAGGAGAAAGAGGCCTCTGGGGGATCCCTCCAGCTTCCCCACCAGCCCCTCCAAGGTGGTGATGTGGTCACAATCGGGGACTTCACCTTTGTGATGAATGACCGCTCCTACGTGGCGTTGTGA"), + }; + + if (chromosome.UcscName == "chrX") + { + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1314869, 1314883, 678, 692), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1314884, 1314893, 666, 675), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1314894, 1315014, 544, 664), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 1315015, 1317418, 543, 544), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 1317419, 1317581, 381, 543), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 1317582, 1321271, 380, 381), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 1321272, 1321405, 247, 380), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 1321406, 1325325, 246, 247), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 1325326, 1325492, 80, 246), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 1325493, 1331448, 79, 80), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 1331449, 1331527, 1, 79), + }; + + var codingRegion = new CodingRegion(1314869, 1325338, 234, 1013, 780); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MVYYLKPSSPKHVRFSWHQDAVTVTCSDLSYGDLLYEVQYRSPFDTEWQSKQENTCNVTIEGLDAEKCYSFWVRVKAMEDVYGPDTYPSDWSEVTCWQRGEIRDACAETPTPPKPKLSKFILISSLAILLMVSLLLLSLWKLWRVRKFLIPSVPDPKSIFPGLFEIHQGNFQEWITDTQNVAHLHKMAGAEQGSGPEEPLVVQLAKTEAESPRMLDPQTEEKEASGGSLQLPHQPLQGGDVVTIGDFTFVMNDRSYVAL*"); + } + + if (chromosome.UcscName == "chrY") + { + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1264869, 1264883, 678, 692), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1264884, 1264893, 666, 675), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1264894, 1265014, 544, 664), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 1265015, 1267418, 543, 544), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 1267419, 1267581, 381, 543), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 1267582, 1271271, 380, 381), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 1271272, 1271405, 247, 380), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 1271406, 1275325, 246, 247), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 1275326, 1275492, 80, 246), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 1275493, 1281448, 79, 80), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 1281449, 1281527, 1, 79), + }; + + var codingRegion = new CodingRegion(1264869, 1275338, 234, 1013, 780); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MVYYLKPSSPKHVRFSWHQDAVTVTCSDLSYGDLLYEVQYRSPFDTEWQSKQENTCNVTIEGLDAEKCYSFWVRVKAMEDVYGPDTYPSDWSEVTCWQRGEIRDACAETPTPPKPKLSKFILISSLAILLMVSLLLLSLWKLWRVRKFLIPSVPDPKSIFPGLFEIHQGNFQEWITDTQNVAHLHKMAGAEQGSGPEEPLVVQLAKTEAESPRMLDPQTEEKEASGGSLQLPHQPLQGGDVVTIGDFTFVMNDRSYVAL*"); + } + } + + if (transcriptId == "NM_001012288.2") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(754, 753, "A"), + new RnaEdit(756, 756, "G"), + new RnaEdit(758, 758, "A"), + new RnaEdit(761, 761, "A"), + new RnaEdit(763, 763, "T"), + new RnaEdit(764, 763, "TT"), + new RnaEdit(767, 767, "A"), + new RnaEdit(769, 769, "T"), + new RnaEdit(772, 774, "CAG"), + new RnaEdit(779, 778, + "CCAGACCCGAAATCCATCTTCCCCGGGCTCTTTGAGATACACCAAGGGAACTTCCAGGAGTGGATCACAGACACCCAGAACGTGGCCCACCTCCACAAGATGGCAGGTGCAGAGCAAGAAAGTGGCCCCGAGGAGCCCCTGGTAGTCCAGTTGGCCAAGACTGAAGCCGAGTCTCCCAGGATGCTGGACCCACAGACCGAGGAGAAAGAGGCCTCTGGGGGATCCCTCCAGCTTCCCCACCAGCCCCTCCAAGGCGGTGATGTGGTCACAATCGGGGGCTTCACCTTTGTGATGAATGACCGCTCCTACGTGGCGTTGTGATGGACACACCACTGTCAAAGTCAACGTCAGGATCCACGTTGACATTTAAAGACAGAGGGGACTGTCCCGGGGACTCCACACCACCATGGATGGGAAGTCTCCACGCCAATGATGGTAGGACTAGGAGACTCTGAAGACCCAGCCTCACCGCCTAATGCGGCCACTGCCCTGCTAACTTTCCCCCACATGAGTCTCTGTGTTCAAAGGCTTGATGGCAGATGGGAGCCAATTGCTCCAGGAGATTTACTCCCAGTTCCTTTTCGTGCCTGAACGTTGTCACATAAACCCCAAGGCAGCACGTCCAAAATGCTGTAAAACCATCTTCCCACTCTGTGAGTCCCCAGTTCCGTCCATGTACCATTCCCATAGCATTGGATTCTCGGAGGATTTTTTGTCTGTTT"), + }; + + if (chromosome.UcscName == "chrX") + { + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1314869, 1314883, 767, 781), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1314884, 1314893, 755, 764), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1314894, 1315014, 633, 753), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 1315015, 1317418, 632, 633), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 1317419, 1317581, 470, 632), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 1317582, 1321271, 469, 470), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 1321272, 1321405, 336, 469), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 1321406, 1325325, 335, 336), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 1325326, 1325492, 169, 335), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 1325493, 1331448, 168, 169), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 1331449, 1331616, 1, 168), + }; + + var codingRegion = new CodingRegion(1314869, 1325338, 323, 1102, 780); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MVYYLKPSSPKHVRFSWHQDAVTVTCSDLSYGDLLYEVQYRSPFDTEWQSKQENTCNVTIEGLDAEKCYSFWVRVKAMEDVYGPDTYPSDWSEVTCWQRGEIRDACAETPTPPKPKLSKFILISSLAILLMVSLLLLSLWKLWRVKKFLIPSVPDPKSIFPGLFEIHQGNFQEWITDTQNVAHLHKMAGAEQESGPEEPLVVQLAKTEAESPRMLDPQTEEKEASGGSLQLPHQPLQGGDVVTIGGFTFVMNDRSYVAL*"); + } + + if (chromosome.UcscName == "chrY") + { + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1264869, 1264883, 767, 781), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1264884, 1264893, 755, 764), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1264894, 1265014, 633, 753), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 1265015, 1267418, 632, 633), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 1267419, 1267581, 470, 632), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 1267582, 1271271, 469, 470), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 1271272, 1271405, 336, 469), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 1271406, 1275325, 335, 336), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 1275326, 1275492, 169, 335), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 1275493, 1281448, 168, 169), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 1281449, 1281616, 1, 168), + }; + + var codingRegion = new CodingRegion(1264869, 1275338, 323, 1102, 780); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MVYYLKPSSPKHVRFSWHQDAVTVTCSDLSYGDLLYEVQYRSPFDTEWQSKQENTCNVTIEGLDAEKCYSFWVRVKAMEDVYGPDTYPSDWSEVTCWQRGEIRDACAETPTPPKPKLSKFILISSLAILLMVSLLLLSLWKLWRVKKFLIPSVPDPKSIFPGLFEIHQGNFQEWITDTQNVAHLHKMAGAEQESGPEEPLVVQLAKTEAESPRMLDPQTEEKEASGGSLQLPHQPLQGGDVVTIGGFTFVMNDRSYVAL*"); + } + } + + if (transcriptId == "NM_022148.3") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(857, 856, "A"), + new RnaEdit(859, 859, "G"), + new RnaEdit(861, 861, "A"), + new RnaEdit(864, 864, "A"), + new RnaEdit(866, 866, "T"), + new RnaEdit(867, 866, "TT"), + new RnaEdit(870, 870, "A"), + new RnaEdit(872, 872, "T"), + new RnaEdit(875, 877, "CAG"), + new RnaEdit(882, 881, + "CCAGACCCGAAATCCATCTTCCCCGGGCTCTTTGAGATACACCAAGGGAACTTCCAGGAGTGGATCACAGACACCCAGAACGTGGCCCACCTCCACAAGATGGCAGGTGCAGAGCAAGAAAGTGGCCCCGAGGAGCCCCTGGTAGTCCAGTTGGCCAAGACTGAAGCCGAGTCTCCCAGGATGCTGGACCCACAGACCGAGGAGAAAGAGGCCTCTGGGGGATCCCTCCAGCTTCCCCACCAGCCCCTCCAAGGCGGTGATGTGGTCACAATCGGGGGCTTCACCTTTGTGATGAATGACCGCTCCTACGTGGCGTTGTGATGGACACACCACTGTCAAAGTCAACGTCAGGATCCACGTTGACATTTAAAGACAGAGGGGACTGTCCCGGGGACTCCACACCACCATGGATGGGAAGTCTCCACGCCAATGATGGTAGGACTAGGAGACTCTGAAGACCCAGCCTCACCGCCTAATGCGGCCACTGCCCTGCTAACTTTCCCCCACATGAGTCTCTGTGTTCAAAGGCTTGATGGCAGATGGGAGCCAATTGCTCCAGGAGATTTACTCCCAGTTCCTTTTCGTGCCTGAACGTTGTCACATAAACCCCAAGGCAGCACGTCCAAAATGCTGTAAAACCATCTTCCCACTCTGTGAGTCCCCAGTTCCGTCCATGTACCATTCCCATAGCATTGGATTCTCGGAGGATTTTTTGTCTGTTT"), + }; + + if (chromosome.UcscName == "chrX") + { + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 6, 1314869, 1314883, 870, 884), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 1314884, 1314893, 858, 867), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 1314894, 1315014, 736, 856), + new TranscriptRegion(TranscriptRegionType.Intron, 5, 1315015, 1317418, 735, 736), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1317419, 1317581, 573, 735), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 1317582, 1321271, 572, 573), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 1321272, 1321405, 439, 572), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 1321406, 1325325, 438, 439), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 1325326, 1325492, 272, 438), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 1325493, 1327698, 271, 272), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 1327699, 1327801, 169, 271), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 1327802, 1331448, 168, 169), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 1331449, 1331616, 1, 168), + }; + + var codingRegion = new CodingRegion(1314869, 1331527, 90, 1205, 1116); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MGRLVLLWGAAVFLLGGWMALGQGGAAEGVQIQIIYFNLETVQVTWNASKYSRTNLTFHYRFNGDEAYDQCTNYLLQEGHTSGCLLDAEQRDDILYFSIRNGTHPVFTASRWMVYYLKPSSPKHVRFSWHQDAVTVTCSDLSYGDLLYEVQYRSPFDTEWQSKQENTCNVTIEGLDAEKCYSFWVRVKAMEDVYGPDTYPSDWSEVTCWQRGEIRDACAETPTPPKPKLSKFILISSLAILLMVSLLLLSLWKLWRVKKFLIPSVPDPKSIFPGLFEIHQGNFQEWITDTQNVAHLHKMAGAEQESGPEEPLVVQLAKTEAESPRMLDPQTEEKEASGGSLQLPHQPLQGGDVVTIGGFTFVMNDRSYVAL*"); + } + + if (chromosome.UcscName == "chrY") + { + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 6, 1264869, 1264883, 870, 884), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 1264884, 1264893, 858, 867), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 1264894, 1265014, 736, 856), + new TranscriptRegion(TranscriptRegionType.Intron, 5, 1265015, 1267418, 735, 736), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 1267419, 1267581, 573, 735), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 1267582, 1271271, 572, 573), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 1271272, 1271405, 439, 572), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 1271406, 1275325, 438, 439), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 1275326, 1275492, 272, 438), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 1275493, 1277698, 271, 272), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 1277699, 1277801, 169, 271), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 1277802, 1281448, 168, 169), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 1281449, 1281616, 1, 168), + }; + + var codingRegion = new CodingRegion(1264869, 1281527, 90, 1205, 1116); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MGRLVLLWGAAVFLLGGWMALGQGGAAEGVQIQIIYFNLETVQVTWNASKYSRTNLTFHYRFNGDEAYDQCTNYLLQEGHTSGCLLDAEQRDDILYFSIRNGTHPVFTASRWMVYYLKPSSPKHVRFSWHQDAVTVTCSDLSYGDLLYEVQYRSPFDTEWQSKQENTCNVTIEGLDAEKCYSFWVRVKAMEDVYGPDTYPSDWSEVTCWQRGEIRDACAETPTPPKPKLSKFILISSLAILLMVSLLLLSLWKLWRVKKFLIPSVPDPKSIFPGLFEIHQGNFQEWITDTQNVAHLHKMAGAEQESGPEEPLVVQLAKTEAESPRMLDPQTEEKEASGGSLQLPHQPLQGGDVVTIGGFTFVMNDRSYVAL*"); + } + } + + if (transcriptId == "NM_012234.6") + { + // first exon starts at 72495647, so the genomic portion of the coding region is clipped + var codingRegion = new CodingRegion(72427536, 72495647, 184, 870, 688); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MTMGDKKSPTRPKRQAKPAADEGFWDCSVCTFRNSAEAFKCSICDVRKGTSTRKPRINSQLVAQQVAQQYATPPPPKKEKKEKVEKQDKEKPEKDKEISPSVTKKNTNKKTKPKSDILKDPPSEANSIQSANATTKTSETNHTSRPRLKNVDRSTAQQLAVTVGNVTVIITDFKEKTRSSSTSSSTVTSSAGSEQQNQSSSGSESTDKGSSRSSTPKGDMSAVNDESF*"); + + updatedGeneModel = true; + } + + // NM_001220773.1 + if (transcriptId == "NM_001220773.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, + "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAA"), + new RnaEdit(5, 8, null), + new RnaEdit(5457, 5456, "AAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50455032, 50455035, 202, 205), + new TranscriptRegion(TranscriptRegionType.Gap, 1, 50455036, 50455039, 205, 206), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50455040, 50455168, 206, 334), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 50455169, 50459426, 334, 335), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50459427, 50459561, 335, 469), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 50459562, 50467615, 469, 470), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50467616, 50472799, 470, 5653) + }; + + var codingRegion = new CodingRegion(50455032, 50468325, 169, 1179, 1011); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MDADEGQDMSQVSVGKPHKCGYCGRSYKQRSSLEEHKERCHNYLESMGLPGTLYPVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); + updatedGeneModel = true; + } + + if (transcriptId == "NM_152756.3") + { + var newRnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, "GG"), + new RnaEdit(3196, 3196, "T") + }; + + rnaEdits = newRnaEdits; + + var oldCodingRegion = translation.CodingRegion; + var codingRegion = new CodingRegion(oldCodingRegion.Start, oldCodingRegion.End, 25, 5151, 5127); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + translation.PeptideSeq); + updatedGeneModel = true; + } + + if (transcriptId == "NM_001242758.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(40, 40, "T"), + new RnaEdit(287, 287, "A"), + new RnaEdit(355, 355, "A"), + new RnaEdit(366, 366, "C"), + new RnaEdit(383, 383, "C"), + new RnaEdit(385, 385, "A"), + new RnaEdit(425, 425, "A"), + new RnaEdit(469, 469, "C"), + new RnaEdit(573, 573, "A"), + new RnaEdit(605, 605, "T"), + new RnaEdit(611, 611, "C"), + new RnaEdit(622, 623, "CG"), + new RnaEdit(629, 629, "T"), + new RnaEdit(639, 639, "G"), + new RnaEdit(643, 643, "C"), + new RnaEdit(643, 644, "CG"), + new RnaEdit(654, 655, "CG"), + new RnaEdit(1161, 1161, "T"), + new RnaEdit(1324, 1324, "G"), + new RnaEdit(1380, 1380, "T"), + new RnaEdit(1492, 1492, "G"), + new RnaEdit(1580, 1580, "T"), + new RnaEdit(1588, 1589, "CG") + }; + + translation = new Translation(translation.CodingRegion, (CompactId) translation.ProteinId, + "MAVMAPRTLLLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQKMEPRAPWIEQEGPEYWDQETRNMKAHSQTDRANLGTLRGYYNQSEDGSHTIQIMYGCDVGPDGRFLRGYRQDAYDGKDYIALNEDLRSWTAADMAAQITKRKWEAVHAAEQRRVYLEGRCVDGLRRYLENGKETLQRTDPPKTHMTHHPISDHEATLRCWALGFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPKPLTLRWELSSQPTIPIVGIIAGLVLLGAVITGAVVAAVMWRRKSSDRKGGSYTQAASSDSAQGSDVSLTACKV*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_002447.2") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(3847, 3847, "G"), + new RnaEdit(4773, 4772, "AAAAAAAAAAAAA") + }; + + translation = new Translation(translation.CodingRegion, (CompactId) translation.ProteinId, + "MELLPPLPQSFLLLLLLPAKPAAGEDWQCPRTPYAASRDFDVKYVVPSFSAGGLVQAMVTYEGDRNESAVFVAIRNRLHVLGPDLKSVQSLATGPAGDPGCQTCAACGPGPHGPPGDTDTKVLVLDPALPALVSCGSSLQGRCFLHDLEPQGTAVHLAAPACLFSAHHNRPDDCPDCVASPLGTRVTVVEQGQASYFYVASSLDAAVAASFSPRSVSIRRLKADASGFAPGFVALSVLPKHLVSYSIEYVHSFHTGAFVYFLTVQPASVTDDPSALHTRLARLSATEPELGDYRELVLDCRFAPKRRRRGAPEGGQPYPVLRVAHSAPVGAQLATELSIAEGQEVLFGVFVTGKDGGPGVGPNSVVCAFPIDLLDTLIDEGVERCCESPVHPGLRRGLDFFQSPSFCPNPPGLEALSPNTSCRHFPLLVSSSFSRVDLFNGLLGPVQVTALYVTRLDNVTVAHMGTMDGRILQVELVRSLNYLLYVSNFSLGDSGQPVQRDVSRLGDHLLFASGDQVFQVPIQGPGCRHFLTCGRCLRAWHFMGCGWCGNMCGQQKECPGSWQQDHCPPKLTEFHPHSGPLRGSTRLTLCGSNFYLHPSGLVPEGTHQVTVGQSPCRPLPKDSSKLRPVPRKDFVEEFECELEPLGTQAVGPTNVSLTVTNMPPGKHFRVDGTSVLRGFSFMEPVLIAVQPLFGPRAGGTCLTLEGQSLSVGTSRAVLVNGTECLLARVSEGQLLCATPPGATVASVPLSLQVGGAQVPGSWTFQYREDPVVLSISPNCGYINSHITICGQHLTSAWHLVLSFHDGLRAVESRCERQLPEQQLCRLPEYVVRDPQGWVAGNLSARGDGAAGFTLPGFRFLPPPHPPSANLVPLKPEEHAIKFEYIGLGAVADCVGINVTVGGESCQHEFRGDMVVCPLPPSLQLGQDGAPLQVCVDGECHILGRVVRPGPDGVPQSTLLGILLPLLLLVAALATALVFSYWWRRKQLVLPPNLNDLASLDQTAGATPLPILYSGSDYRSGLALPAIDGLDSTTCVHGASFSDSEDESCVPLLRKESIQLRDLDSALLAEVKDVLIPHERVVTHSDRVIGKGHFGVVYHGEYIDQAQNRIQCAIKSLSRITEMQQVEAFLREGLLMRGLNHPNVLALIGIMLPPEGLPHVLLPYMCHGDLLQFIRSPQRNPTVKDLISFGLQVARGMEYLAEQKFVHRDLAARNCMLDESFTVKVADFGLARDILDREYYSVQQHRHARLPVKWMALESLQTYRFTTKSDVWSFGVLLWELLTRGAPPYRHIDPFDLTHFLAQGRRLPQPEYCPDSLYQVMQQCWEADPAVRPTFRVLVGEVEQIVSALLGDHYVQLPATYMNLGPSTSHEMNVRPEQPQFSPMPGNVRRPRPLSEPPRPT*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_005228.3") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(2955, 2955, "C"), + new RnaEdit(5601, 5600, "AAAAAAAAAAAAAAAA") + }; + + updatedGeneModel = true; + } + + if (transcriptId == "NM_005922.2") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(612, 612, "A"), + new RnaEdit(5485, 5484, "AAAAAAAAAAAAAAAAA") + }; + + translation = new Translation(translation.CodingRegion, (CompactId) translation.ProteinId, + "MREAAAALVPPPAFAVTPAAAMEEPPPPPPPPPPPPEPETESEPECCLAARQEGTLGDSACKSPESDLEDFSDETNTENLYGTSPPSTPRQMKRMSTKHQRNNVGRPASRSNLKEKMNAPNQPPHKDTGKTVENVEEYSYKQEKKIRAALRTTERDHKKNVQCSFMLDSVGGSLPKKSIPDVDLNKPYLSLGCSNAKLPVSVPMPIARPARQTSRTDCPADRLKFFETLRLLLKLTSVSKKKDREQRGQENTSGFWLNRSNELIWLELQAWHAGRTINDQDFFLYTARQAIPDIINEILTFKVDYGSFAFVRDRAGFNGTSVEGQCKATPGTKIVGYSTHHEHLQRQRVSFEQVKRIMELLEYIEALYPSLQALQKDYEKYAAKDFQDRVQALCLWLNITKDLNQKLRIMGTVLGIKNLSDIGWPVFEIPSPRPSKGNEPEYEGDDTEGELKELESSTDESEEEQISDPRVPEIRQPIDNSFDIQSRDCISKKLERLESEDDSLGWGAPDWSTEAGFSRHCLTSIYRPFVDKALKQMGLRKLILRLHKLMDGSLQRARIALVKNDRPVEFSEFPDPMWGSDYVQLSRTPPSSEEKCSAVSWEELKAMDLPSFEPAFLVLCRVLLNVIHECLKLRLEQRPAGEPSLLSIKQLVRECKEVLKGGLLMKQYYQFMLQEVLEDLEKPDCNIDAFEEDLHKMLMVYFDYMRSWIQMLQQLPQASHSLKNLLEEEWNFTKEITHYIRGGEAQAGKLFCDIAGMLLKSTGSFLEFGLQESCAEFWTSADDSSASDEIRRSVIEISRALKELFHEARERASKALGFAKMLRKDLEIAAEFRLSAPVRDLLDVLKSKQYVKVQIPGLENLQMFVPDTLAEEKSIILQLLNAAAGKDCSKDSDDVLIDAYLLLTKHGDRARDSEDSWGTWEAQPVKVVPQVETVDTLRSMQVDNLLLVVMQSAHLTIQRKAFQQSIEGLMTLCQEQTSSQPVIAKALQQLKNDALELCNRISNAIDRVDHMFTSEFDAEVDESESVTLQQYYREAMIQGYNFGFEYHKEVVRLMSGEFRQKIGDKYISFARKWMNYVLTKCESGRGTRPRWATQGFDFLQAIEPAFISALPEDDFLSLQALMNECIGHVIGKPHSPVTGLYLAIHRNSPRPMKVPRCHSDPPNPHLIIPTPEGFSTRSMPSDARSHGSPAAAAAAAAAAVAASRPSPSGGDSVLPKSISSAHDTRGSSVPENDRLASIAAELQFRSLSRHSSPTEERDEPAYPRGDSSGSTRRSWELRTLISQSKDTASKLGPIEAIQKSVRLFEEKRYREMRRKNIIGQVCDTPKSYDNVMHVGLRKVTFKWQRGNKIGEGQYGKVYTCISVDTGELMAMKEIRFQPNDHKTIKETADELKIFEGIKHPNLVRYFGVELHREEMYIFMEYCDEGTLEEVSRLGLQEHVIRLYSKQITIAINVLHEHGIVHRDIKGANIFLTSSGLIKLGDFGCSVKLKNNAQTMPGEVNSTLGTAAYMAPEVITRAKGEGHGRAADIWSLGCVVIEMVTGKRPWHEYEHNFQIMYKVGMGHKPPIPERLSPEGKDFLSHCLESDPKMRWTASQLLDHSFVKVCTDEE*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_006724.2") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(612, 612, "A"), + new RnaEdit(5335, 5334, "AAAAAAAAAAAAAAAAA") + }; + + translation = new Translation(translation.CodingRegion, (CompactId) translation.ProteinId, + "MREAAAALVPPPAFAVTPAAAMEEPPPPPPPPPPPPEPETESEPECCLAARQEGTLGDSACKSPESDLEDFSDETNTENLYGTSPPSTPRQMKRMSTKHQRNNVGRPASRSNLKEKMNAPNQPPHKDTGKTVENVEEYSYKQEKKIRAALRTTERDHKKNVQCSFMLDSVGGSLPKKSIPDVDLNKPYLSLGCSNAKLPVSVPMPIARPARQTSRTDCPADRLKFFETLRLLLKLTSVSKKKDREQRGQENTSGFWLNRSNELIWLELQAWHAGRTINDQDFFLYTARQAIPDIINEILTFKVDYGSFAFVRDRAGFNGTSVEGQCKATPGTKIVGYSTHHEHLQRQRVSFEQVKRIMELLEYIEALYPSLQALQKDYEKYAAKDFQDRVQALCLWLNITKDLNQKLRIMGTVLGIKNLSDIGWPVFEIPSPRPSKGNEPEYEGDDTEGELKELESSTDESEEEQISDPRVPEIRQPIDNSFDIQSRDCISKKLERLESEDDSLGWGAPDWSTEAGFSRHCLTSIYRPFVDKALKQMGLRKLILRLHKLMDGSLQRARIALVKNDRPVEFSEFPDPMWGSDYVQLSRTPPSSEEKCSAVSWEELKAMDLPSFEPAFLVLCRVLLNVIHECLKLRLEQRPAGEPSLLSIKQLVRECKEVLKGGLLMKQYYQFMLQEVLEDLEKPDCNIDAFEEDLHKMLMVYFDYMRSWIQMLQQLPQASHSLKNLLEEEWNFTKEITHYIRGGEAQAGKLFCDIAGMLLKSTGSFLEFGLQESCAEFWTSADDSSASDEIRRSVIEISRALKELFHEARERASKALGFAKMLRKDLEIAAEFRLSAPVRDLLDVLKSKQYVKVQIPGLENLQMFVPDTLAEEKSIILQLLNAAAGKDCSKDSDDVLIDAYLLLTKHGDRARDSEDSWGTWEAQPVKVVPQVETVDTLRSMQVDNLLLVVMQSAHLTIQRKAFQQSIEGLMTLCQEQTSSQPVIAKALQQLKNDALELCNRISNAIDRVDHMFTSEFDAEVDESESVTLQQYYREAMIQGYNFGFEYHKEVVRLMSGEFRQKIGDKYISFARKWMNYVLTKCESGRGTRPRWATQGFDFLQAIEPAFISALPEDDFLSLQALMNECIGHVIGKPHSPVTGLYLAIHRNSPRPMKVPRCHSDPPNPHLIIPTPEGFRGSSVPENDRLASIAAELQFRSLSRHSSPTEERDEPAYPRGDSSGSTRRSWELRTLISQSKDTASKLGPIEAIQKSVRLFEEKRYREMRRKNIIGQVCDTPKSYDNVMHVGLRKVTFKWQRGNKIGEGQYGKVYTCISVDTGELMAMKEIRFQPNDHKTIKETADELKIFEGIKHPNLVRYFGVELHREEMYIFMEYCDEGTLEEVSRLGLQEHVIRLYSKQITIAINVLHEHGIVHRDIKGANIFLTSSGLIKLGDFGCSVKLKNNAQTMPGEVNSTLGTAAYMAPEVITRAKGEGHGRAADIWSLGCVVIEMVTGKRPWHEYEHNFQIMYKVGMGHKPPIPERLSPEGKDFLSHCLESDPKMRWTASQLLDHSFVKVCTDEE*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_019063.3") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1109, 1109, "G"), + new RnaEdit(1406, 1406, "G"), + new RnaEdit(5550, 5549, "AAAAAAAAAAAAAAAA") + }; + + translation = new Translation(translation.CodingRegion, (CompactId) translation.ProteinId, + "MDGFAGSLDDSISAASTSDVQDRLSALESRVQQQEDEITVLKAALADVLRRLAISEDHVASVKKSVSSKGQPSPRAVIPMSCITNGSGANRKPSHTSAVSIAGKETLSSAAKSGTEKKKEKPQGQREKKEESHSNDQSPQIRASPSPQPSSQPLQIHRQTPESKNATPTKSIKRPSPAEKSHNSWENSDDSRNKLSKIPSTPKLIPKVTKTADKHKDVIINQEGEYIKMFMRGRPITMFIPSDVDNYDDIRTELPPEKLKLEWAYGYRGKDCRANVYLLPTGEIVYFIASVVVLFNYEERTQRHYLGHTDCVKCLAIHPDKIRIATGQIAGVDKDGRPLQPHVRVWDSVTLSTLQIIGLGTFERGVGCLDFSKADSGVHLCVIDDSNEHMLTVWDWQKKAKGAEIKTTNEVVLAVEFHPTDANTIITCGKSHIFFWTWSGNSLTRKQGIFGKYEKPKFVQCLAFLGNGDVLTGDSGGVMLIWSKTTVEPTPGKGPKGVYQISKQIKAHDGSVFTLCQMRNGMLLTGGGKDRKIILWDHDLNPEREIEVPDQYGTIRAVAEGKADQFLVGTSRNFILRGTFNDGFQIEVQGHTDELWGLATHPFKDLLLTCAQDRQVCLWNSMEHRLEWTRLVDEPGHCADFHPSGTVVAIGTHSGRWFVLDAETRDLVSIHTDGNEQLSVMRYSIDGTFLAVGSHDNFIYLYVVSENGRKYSRYGRCTGHSSYITHLDWSPDNKYIMSNSGDYEILYWDIPNGCKLIRNRSDCKDIDWTTYTCVLGFQVFGVWPEGSDGTDINALVRSHNRKVIAVADDFCKVHLFQYPCSKAKAPSHKYSAHSSHVTNVSFTHNDSHLISTGGKDMSIIQWKLVEKLSLPQNETVADTTLTKAPVSSTESVIQSNTPTPPPSQPLNETAEEESRISSSPTLLENSLEQTVEPSEDHSEEESEEGSGDLGEPLYEEPCNEISKEQAKATLLEDQQDPSPSS*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_175741.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(220, 220, "T"), + new RnaEdit(380, 380, "C") + }; + + translation = new Translation(translation.CodingRegion, (CompactId) translation.ProteinId, + "MASDGASALPGPDMSMKPSAALSPSPALPFLPPTSDPPDHPPREPPPQPIMPSVFSPDNPLMLSAFPSSLLVTGDGGPCLSGAGAGKVIVKVKTEGGSAEPSQTQNFILTQTALNSTAPGTPCGGLEGPAPPFVTASNVKTILPSKAVGVSQEGPPGLPPQPPPPVAQLVPIVPLEKAWPGPHGTTGEGGPVATLSKPSLGDRSKISKDVYENFRQWQRYKALARRHLSQSPDTEALSCFLIPVLRSLARLKPTMTLEEGLPLAVQEWEHTSNFDRMIFYEMAERFMEFEAEEMQIQNTQLMNGSQGLSPATPLKLDPLGPLASEVCQQPVYIPKKAASKTRAPRRRQRKAQRPPAPEAPKEIPPEAVKEYVDIMEWLVGTHLATGESDGKQEEEGQQQEEEGMYPDPGLLSYINELCSQKVFVSKVEAVIHPQFLADLLSPEKQRDPLALIEELEQEEGLTLAQLVQKRLMALEEEEDAEAPPSFSGAQLDSSPSGSVEDEDGDGRLRPSPGLQGAGGAACLGKVSSSGKRAREVHGGQEQALDSPRGMHRDGNTLPSPSSWDLQPELAAPQGTPGPLGVERRGSGKVINQVSLHQDGHLGGAGPPGHCLVADRTSEALPLCWQGGFQPESTPSLDAGLAELAPLQGQGLEKQVLGLQKGQQTGGRGVLPQGKEPLAVPWEGSSGAMWGDDRGTPMAQSYDQNPSPRAAGERDDVCLSPGVWLSSEMDAVGLELPVQIEEVIESFQVEKCVTEYQEGCQGLGSRGNISLGPGETLVPGDTESSVIPCGGTVAAAALEKRNYCSLPGPLRANSPPLRSKENQEQSCETVGHPSDLWAEGCFPLLESGDSTLGSSKETLPPTCQGNLLIMGTEDASSLPEASQEAGSRGNSFSPLLETIEPVNILDVKDDCGLQLRVSEDTCPLNVHSYDPQGEGRVDPDLSKPKNLAPLQESQESYTTGTPKATSSHQGLGSTLPRRGTRNAIVPRETSVSKTHRSADRAKGKEKKKKEAEEEDEELSNFAYLLASKLSLSPREHPLSPHHASGGQGSQRASHLLPAGAKGPSKLPYPVAKSGKRALAGGPAPTEKTPHSGAQLGVPREKPLALGVVRPSQPRKRRCDSFVTGRRKKRRRSQ*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NR_003085.2") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1703, 1703, "G"), + new RnaEdit(2832, 2831, "AAAAAAAAAAAAAAA") + }; + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001244937.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(3700, 3700, "G"), + new RnaEdit(4626, 4625, "AAAAAAAAAAAAA") + }; + + translation = new Translation(translation.CodingRegion, (CompactId) translation.ProteinId, + "MELLPPLPQSFLLLLLLPAKPAAGEDWQCPRTPYAASRDFDVKYVVPSFSAGGLVQAMVTYEGDRNESAVFVAIRNRLHVLGPDLKSVQSLATGPAGDPGCQTCAACGPGPHGPPGDTDTKVLVLDPALPALVSCGSSLQGRCFLHDLEPQGTAVHLAAPACLFSAHHNRPDDCPDCVASPLGTRVTVVEQGQASYFYVASSLDAAVAASFSPRSVSIRRLKADASGFAPGFVALSVLPKHLVSYSIEYVHSFHTGAFVYFLTVQPASVTDDPSALHTRLARLSATEPELGDYRELVLDCRFAPKRRRRGAPEGGQPYPVLRVAHSAPVGAQLATELSIAEGQEVLFGVFVTGKDGGPGVGPNSVVCAFPIDLLDTLIDEGVERCCESPVHPGLRRGLDFFQSPSFCPNPPGLEALSPNTSCRHFPLLVSSSFSRVDLFNGLLGPVQVTALYVTRLDNVTVAHMGTMDGRILQVELVRSLNYLLYVSNFSLGDSGQPVQRDVSRLGDHLLFASGDQVFQVPIQGPGCRHFLTCGRCLRAWHFMGCGWCGNMCGQQKECPGSWQQDHCPPKLTEFHPHSGPLRGSTRLTLCGSNFYLHPSGLVPEGTHQVTVGQSPCRPLPKDSSKLRPVPRKDFVEEFECELEPLGTQAVGPTNVSLTVTNMPPGKHFRVDGTSVLRGFSFMEPVLIAVQPLFGPRAGGTCLTLEGQSLSVGTSRAVLVNGTECLLARVSEGQLLCATPPGATVASVPLSLQVGGAQVPGSWTFQYREDPVVLSISPNCGYINSHITICGQHLTSAWHLVLSFHDGLRAVESRCERQLPEQQLCRLPEYVVRDPQGWVAGNLSARGDGAAGFTLPGFRFLPPPHPPSANLVPLKPEEHAIKFEVCVDGECHILGRVVRPGPDGVPQSTLLGILLPLLLLVAALATALVFSYWWRRKQLVLPPNLNDLASLDQTAGATPLPILYSGSDYRSGLALPAIDGLDSTTCVHGASFSDSEDESCVPLLRKESIQLRDLDSALLAEVKDVLIPHERVVTHSDRVIGKGHFGVVYHGEYIDQAQNRIQCAIKSLSRITEMQQVEAFLREGLLMRGLNHPNVLALIGIMLPPEGLPHVLLPYMCHGDLLQFIRSPQRNPTVKDLISFGLQVARGMEYLAEQKFVHRDLAARNCMLDESFTVKVADFGLARDILDREYYSVQQHRHARLPVKWMALESLQTYRFTTKSDVWSFGVLLWELLTRGAPPYRHIDPFDLTHFLAQGRRLPQPEYCPDSLYQVMQQCWEADPAVRPTFRVLVGEVEQIVSALLGDHYVQLPATYMNLGPSTSHEMNVRPEQPQFSPMPGNVRRPRPLSEPPRPT*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001278433.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, + "GAGCTGTGGTGGGCTCCACCCAGTTCGAGCTTCCCGGCTGCTTTGGTTACCTAATCAAGCCTGGGCAATGGCAGGCGCCCCTCCCCCAGCCTCGCTGCCGCCTTGCAGTTTGATCTCAGACTGCTGTGCTAGCAATCAGCGAGACTCCGTGGGCGTAGGACCCTCCGAGC"), + new RnaEdit(4138, 4137, "AAAAAAAAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 66511532, 66511717, 171, 356), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 66511718, 66518896, 356, 357), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 66518897, 66519067, 357, 527), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 66519068, 66519865, 527, 528), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 66519866, 66519957, 528, 619), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 66519958, 66520156, 619, 620), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 66520157, 66520218, 620, 681), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 66520219, 66521052, 681, 682), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 66521053, 66521099, 682, 728), + new TranscriptRegion(TranscriptRegionType.Intron, 5, 66521100, 66521894, 728, 729), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 66521895, 66522053, 729, 887), + new TranscriptRegion(TranscriptRegionType.Intron, 6, 66522054, 66523980, 887, 888), + new TranscriptRegion(TranscriptRegionType.Exon, 7, 66523981, 66524041, 888, 948), + new TranscriptRegion(TranscriptRegionType.Intron, 7, 66524042, 66525010, 948, 949), + new TranscriptRegion(TranscriptRegionType.Exon, 8, 66525011, 66525132, 949, 1070), + new TranscriptRegion(TranscriptRegionType.Intron, 8, 66525133, 66526060, 1070, 1071), + new TranscriptRegion(TranscriptRegionType.Exon, 9, 66526061, 66526142, 1071, 1152), + new TranscriptRegion(TranscriptRegionType.Intron, 9, 66526143, 66526417, 1152, 1153), + new TranscriptRegion(TranscriptRegionType.Exon, 10, 66526418, 66529572, 1153, 4307) + }; + + startExonPhase = 0; + + var codingRegion = new CodingRegion(66511541, 66526590, 180, 1325, 1146); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MESGSTAASEEARSLRECELYVQKHNIQALLKDSIVQLCTARPERPMAFLREYFERLEKEEAKQIQNLQKAGTRTDSREDEISPPPPNPVVKGRRRRGAISAEVYTEEDAASYVRKVIPKDYKTMAALAKAIEKNVLFSHLDDNERSDIFDAMFSVSFIAGETVIQQGDEGDNFYVIDQGETDVYVNNEWATSVGEGGSFGELALIYGTPRAATVKAKTNVKLWGIDRDSYRRILMGSTLRKRKMYEEFLSKVSILESLDKWERLTVADALEPVQFEDGQKIVVQGEPGDEFFIILEGSAAVLQRRSENEEFVEVGRLGPSDYFGEIALLMNRPRAATVVARGPLKCVKLDRPRFERVLGPCSDILKRNIQQYNSFVSLSV*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001260.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, "GGG") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 26828756, 26828906, 4, 154), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 26828907, 26911703, 154, 155), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 26911704, 26911779, 155, 230), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 26911780, 26923208, 230, 231), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 26923209, 26923319, 231, 341), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 26923320, 26927876, 341, 342), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 26927877, 26928017, 342, 482), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 26928018, 26956950, 482, 483), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 26956951, 26957008, 483, 540), + new TranscriptRegion(TranscriptRegionType.Intron, 5, 26957009, 26959347, 540, 541), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 26959348, 26959479, 541, 672), + new TranscriptRegion(TranscriptRegionType.Intron, 6, 26959480, 26967503, 672, 673), + new TranscriptRegion(TranscriptRegionType.Exon, 7, 26967504, 26967647, 673, 816), + new TranscriptRegion(TranscriptRegionType.Intron, 7, 26967648, 26970421, 816, 817), + new TranscriptRegion(TranscriptRegionType.Exon, 8, 26970422, 26970491, 817, 886), + new TranscriptRegion(TranscriptRegionType.Intron, 8, 26970492, 26971289, 886, 887), + new TranscriptRegion(TranscriptRegionType.Exon, 9, 26971290, 26971362, 887, 959), + new TranscriptRegion(TranscriptRegionType.Intron, 9, 26971363, 26974589, 959, 960), + new TranscriptRegion(TranscriptRegionType.Exon, 10, 26974590, 26974687, 960, 1057), + new TranscriptRegion(TranscriptRegionType.Intron, 10, 26974688, 26975405, 1057, 1058), + new TranscriptRegion(TranscriptRegionType.Exon, 11, 26975406, 26975484, 1058, 1136), + new TranscriptRegion(TranscriptRegionType.Intron, 11, 26975485, 26975602, 1136, 1137), + new TranscriptRegion(TranscriptRegionType.Exon, 12, 26975603, 26975761, 1137, 1295), + new TranscriptRegion(TranscriptRegionType.Intron, 12, 26975762, 26978092, 1295, 1296), + new TranscriptRegion(TranscriptRegionType.Exon, 13, 26978093, 26978569, 1296, 1772) + }; + + var codingRegion = new CodingRegion(26828779, 26978218, 27, 1421, 1395); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MDYDFKVKLSSERERVEDLFEYEGCKVGRGTYGHVYKAKRKDGKDDKDYALKQIEGTGISMSACREIALLRELKHPNVISLQKVFLSHADRKVWLLFDYAEHDLWHIIKFHRASKANKKPVQLPRGMVKSLLYQILDGIHYLHANWVLHRDLKPANILVMGEGPERGRVKIADMGFARLFNSPLKPLADLDPVVVTFWYRAPELLLGARHYTKAIDIWAIGCIFAELLTSEPIFHCRQEDIKTSNPYHHDQLDRIFNVMGFPADKDWEDIKKMPEHSTLMKDFRRNTYTNCSLIKYMEKHKVKPDSKAFHLLQKLLTMDPIKRITSEQAMQDPYFLEDPLPTSDVFAGCQIPYPKREFLTEEEPDDKGDKKNQQQQQGNNHTNGTGHPGNQDSSHTQGPPLKKVRVVPPTTTSGGLIMTSDYQRSNPHAAYPNPGPSTSQPQSSMGYSATSQQPPQYSHQTHRY*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_000314.4") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(667, 667, null), + new RnaEdit(707, 707, "C"), + new RnaEdit(5547, 5546, "AAAAAAAAAAAAAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 89623195, 89623860, 1, 666), + new TranscriptRegion(TranscriptRegionType.Gap, 1, 89623861, 89623861, 666, 667), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 89623862, 89624305, 667, 1110), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 89624306, 89653781, 1110, 1111), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 89653782, 89653866, 1111, 1195), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 89653867, 89685269, 1195, 1196), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 89685270, 89685314, 1196, 1240), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 89685315, 89690802, 1240, 1241), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 89690803, 89690846, 1241, 1284), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 89690847, 89692769, 1284, 1285), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 89692770, 89693008, 1285, 1523), + new TranscriptRegion(TranscriptRegionType.Intron, 5, 89693009, 89711874, 1523, 1524), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 89711875, 89712016, 1524, 1665), + new TranscriptRegion(TranscriptRegionType.Intron, 6, 89712017, 89717609, 1665, 1666), + new TranscriptRegion(TranscriptRegionType.Exon, 7, 89717610, 89717776, 1666, 1832), + new TranscriptRegion(TranscriptRegionType.Intron, 7, 89717777, 89720650, 1832, 1833), + new TranscriptRegion(TranscriptRegionType.Exon, 8, 89720651, 89720875, 1833, 2057), + new TranscriptRegion(TranscriptRegionType.Intron, 8, 89720876, 89725043, 2057, 2058), + new TranscriptRegion(TranscriptRegionType.Exon, 9, 89725044, 89728532, 2058, 5546) + }; + + var codingRegion = new CodingRegion(89624227, 89725229, 1032, 2243, 1212); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCAERHYDTAKFNCRVAQYPFEDHNPPQLELIKPFCEDLDQWLSEDDNHVAAIHCKAGKGRTGVMICAYLLHRGKFLKAQEALDFYGEVRTRDKKGVTIPSQRRYVYYYSYLLKNHLDYRPVALLFHKMMFETIPMFSGGTCNPQFVVCQLKVKIYSSNSGPTRREDKFMYFEFPQPLPVCGDIKVEFFHKQNKMLKKDKMFHFWVNTFFIPGPEETSEKVENGSLCDQEIDSICSIERADNDKEYLVLTLTKNDLDKANKDKANRYFSPNFKVKLYFTKTVEEPSNPEASSSTSVTPDVSDNEPDHYRYSDTTDSDPENEPFDEDQHTQITKV*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_000535.5") + { + rnaEdits = new IRnaEdit[2]; + rnaEdits[0] = new RnaEdit(1708, 1708, "G"); + rnaEdits[1] = new RnaEdit(2837, 2836, "AAAAAAAAAAAAAAA"); + + var oldCodingRegion = translation.CodingRegion; + var codingRegion = new CodingRegion(oldCodingRegion.Start, oldCodingRegion.End, 88, 2676, 2589); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MERAESSSTEPAKAIKPIDRKSVHQICSGQVVLSLSTAVKELVENSLDAGATNIDLKLKDYGVDLIEVSDNGCGVEEENFEGLTLKHHTSKIQEFADLTQVETFGFRGEALSSLCALSDVTISTCHASAKVGTRLMFDHNGKIIQKTPYPRPRGTTVSVQQLFSTLPVRHKEFQRNIKKEYAKMVQVLHAYCIISAGIRVSCTNQLGQGKRQPVVCTGGSPSIKENIGSVFGQKQLQSLIPFVQLPPSDSVCEEYGLSCSDALHNLFYISGFISQCTHGVGRSSTDRQFFFINRRPCDPAKVCRLVNEVYHMYNRHQYPFVVLNISVDSECVDINVTPDKRQILLQEEKLLLAVLKTSLIGMFDSDVNKLNVSQQPLLDVEGNLIKMHAADLEKPMVEKQDQSPSLRTGEEKKDVSISRLREAFSLRHTTENKPHSPKTPEPRRSPLGQKRGMLSSSTSGAISDKGVLRPQKEAVSSSHGPSDPTDRAEVEKDSGHGSTSVDSEGFSIPDTGSHCSSEYAASSPGDRGSQEHVDSQEKAPETDDSFSDVDCHSNQEDTGCKFRVLPQPTNLATPNTKRFKKEEILSSSDICQKLVNTQDMSASQVDVAVKINKKVVPLDFSMSSLAKRIKQLHHEAQQSEGEQNYRKFRAKICPGENQAAEDELRKEISKTMFAEMEIIGQFNLGFIITKLNEDIFIVDQHATDEKYNFEMLQQHTVLQGQRLIAPQTLNLTAVNEAVLIENLEIFRKNGFDFVIDENAPVTERAKLISLPTSKNWTFGPQDVDELIFMLSDSPGVMCRPSRVKQMFASRACRKSVMIGTALNTSEMKKLITHMGEMDHPWNCPHGRPTMRHIANLGVISQN*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_000545.5") + { + rnaEdits = new IRnaEdit[2]; + rnaEdits[0] = new RnaEdit(1743, 1743, "G"); + rnaEdits[1] = new RnaEdit(3240, 3239, "AA"); + + translation = new Translation(translation.CodingRegion, (CompactId) translation.ProteinId, + "MVSKLSQLQTELLAALLESGLSKEALIQALGEPGPYLLAGEGPLDKGESCGGGRGELAELPNGLGETRGSEDETDDDGEDFTPPILKELENLSPEEAAHQKAVVETLLQEDPWRVAKMVKSYLQQHNIPQREVVDTTGLNQSHLSQHLNKGTPMKTQKRAALYTWYVRKQREVAQQFTHAGQGGLIEEPTGDELPTKKGRRNRFKWGPASQQILFQAYERQKNPSKEERETLVEECNRAECIQRGVSPSQAQGLGSNLVTEVRVYNWFANRRKEEAFRHKLAMDTYSGPPPGPGPGPALPAHSSPGLPPPALSPSKVHGVRYGQPATSETAEVPSSSGGPLVTVSTPLHQVSPTGLEPSHSLLSTEAKLVSAAGGPLPPVSTLTALHSLEQTSPGLNQQPQNLIMASLPGVMTIGPGEPASLGPTFTNTGASTLVIGLASTQAQSVPVINSMGSSLTTLQPVQFSQPLHPSYQQPLMPPVQSHVTQSPFMATMAQLQSPHALYSHKPEVAQYTHTGLLPQTMLITDTTNLSALASLTPTKQVFTSDTEASSESGLHTPASQATTLHVPSQDPAGIQHLQPAHRLSASPTVSSSSLVLYQSSDSSNGQSHLLPSNHSVIETFISTQMASSSQ*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001145076.1") + { + rnaEdits = new IRnaEdit[3]; + rnaEdits[0] = new RnaEdit(935, 935, "G"); + rnaEdits[1] = new RnaEdit(1232, 1232, "G"); + rnaEdits[2] = new RnaEdit(5376, 5375, "AAAAAAAAAAAAAAAA"); + + translation = new Translation(translation.CodingRegion, (CompactId) translation.ProteinId, + "MDGFAGSLDDSISAASTSDVQDRLSALESRVQQQEDEITVLKAALADVLRRLAISEDHVASVKKSVSSKGQPSPRAVIPMSCITNGSGANRKPSHTSAVSIAGKETLSSAAKSIKRPSPAEKSHNSWENSDDSRNKLSKIPSTPKLIPKVTKTADKHKDVIINQEGEYIKMFMRGRPITMFIPSDVDNYDDIRTELPPEKLKLEWAYGYRGKDCRANVYLLPTGEIVYFIASVVVLFNYEERTQRHYLGHTDCVKCLAIHPDKIRIATGQIAGVDKDGRPLQPHVRVWDSVTLSTLQIIGLGTFERGVGCLDFSKADSGVHLCVIDDSNEHMLTVWDWQKKAKGAEIKTTNEVVLAVEFHPTDANTIITCGKSHIFFWTWSGNSLTRKQGIFGKYEKPKFVQCLAFLGNGDVLTGDSGGVMLIWSKTTVEPTPGKGPKGVYQISKQIKAHDGSVFTLCQMRNGMLLTGGGKDRKIILWDHDLNPEREIEVPDQYGTIRAVAEGKADQFLVGTSRNFILRGTFNDGFQIEVQGHTDELWGLATHPFKDLLLTCAQDRQVCLWNSMEHRLEWTRLVDEPGHCADFHPSGTVVAIGTHSGRWFVLDAETRDLVSIHTDGNEQLSVMRYSIDGTFLAVGSHDNFIYLYVVSENGRKYSRYGRCTGHSSYITHLDWSPDNKYIMSNSGDYEILYWDIPNGCKLIRNRSDCKDIDWTTYTCVLGFQVFGVWPEGSDGTDINALVRSHNRKVIAVADDFCKVHLFQYPCSKAKAPSHKYSAHSSHVTNVSFTHNDSHLISTGGKDMSIIQWKLVEKLSLPQNETVADTTLTKAPVSSTESVIQSNTPTPPPSQPLNETAEEESRISSSPTLLENSLEQTVEPSEDHSEEESEEGSGDLGEPLYEEPCNEISKEQAKATLLEDQQDPSPSS*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001220765.1" && start == 50344378) + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, "GAATTCCGGCGT"), + new RnaEdit(6, 5, "A"), + new RnaEdit(16, 16, "T"), + new RnaEdit(97, 97, "C"), + new RnaEdit(316, 315, + "CCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344378, 50344382, 13, 17), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344383, 50344518, 19, 154), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 50344519, 50358643, 154, 155), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 50358644, 50358697, 155, 208), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 50358698, 50367233, 208, 209), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367353, 209, 328) + }; + + var codingRegion = new CodingRegion(50358658, 50367353, 169, 1602, 1434); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVASNVKVETQSDEENGRACEMNGEECAEDLRMLDASGEKMNGSHRDQGSSALSGVGGIRLPNGKLKCDICGIICIGPNVLMVHKRSHTGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001220766.1" && start == 50344378) + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, "GAATTCCGGCGT"), + new RnaEdit(6, 5, "A"), + new RnaEdit(16, 16, "T"), + new RnaEdit(97, 97, "C"), + new RnaEdit(317, 318, null), + new RnaEdit(321, 320, + "AACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTTGGTAAACCTCACAAATGTGGATATTGTGGCCGAAGCTATAAACAGCGAAGCTCTTTAGAGGAACATAAAGAGCGCTGCCACAACTACTTGGAAAGCATGGGCCTTCCGGGCACACTGTACCCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344378, 50344382, 13, 17), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344383, 50344518, 19, 154), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 50344519, 50358643, 154, 155), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 50358644, 50358697, 155, 208), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 50358698, 50367233, 208, 209), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367354, 209, 329), + new TranscriptRegion(TranscriptRegionType.Gap, 3, 50367355, 50367356, 329, 330), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367357, 50367358, 330, 331) + }; + + var codingRegion = new CodingRegion(50358658, 50367358, 169, 1467, 1299); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVGKPHKCGYCGRSYKQRSSLEEHKERCHNYLESMGLPGTLYPVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001220767.1" && start == 50344378) + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, "GAATTCCGGCGT"), + new RnaEdit(6, 5, "A"), + new RnaEdit(16, 16, "T"), + new RnaEdit(97, 97, "C"), + new RnaEdit(319, 318, + "AACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTTGGTAAACCTCACAAATGTGGATATTGTGGCCGAAGCTATAAACAGCGAAGCTCTTTAGAGGAACATAAAGAGCGCTGCCACAACTACTTGGAAAGCATGGGCCTTCCGGGCACACTGTACCCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344378, 50344382, 13, 17), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344383, 50344518, 19, 154), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 50344519, 50358643, 154, 155), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 50358644, 50358697, 155, 208), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 50358698, 50367233, 208, 209), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367354, 209, 329), + new TranscriptRegion(TranscriptRegionType.Gap, 3, 50367355, 50367356, 329, 330), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367357, 50367358, 330, 331) + }; + + // last exon ends before coding region finished + var codingRegion = new CodingRegion(50358658, 50367358, 169, 1437, 1269); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVGKPHKCGYCGRSYKQRSSLEEHKERCHNYLESMGLPGTLYPVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001220769.1" && start == 50344378) + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, "GAATTCCGGCGT"), + new RnaEdit(6, 5, "A"), + new RnaEdit(16, 16, "T"), + new RnaEdit(97, 97, "C"), + new RnaEdit(317, 318, null), + new RnaEdit(321, 320, + "AACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344378, 50344382, 13, 17), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344383, 50344518, 19, 154), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 50344519, 50358643, 154, 155), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 50358644, 50358697, 155, 208), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 50358698, 50367233, 208, 209), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367354, 209, 329), + new TranscriptRegion(TranscriptRegionType.Gap, 3, 50367355, 50367356, 329, 330), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367357, 50367358, 330, 331) + }; + + var codingRegion = new CodingRegion(50358658, 50367358, 169, 1341, 1173); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001220770.1" && start == 50344378) + { + // final RNA-edit offset by 2 to compensate for deletion + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, "GAATTCCGGCGT"), + new RnaEdit(6, 5, "A"), + new RnaEdit(16, 16, "T"), + new RnaEdit(97, 97, "C"), + new RnaEdit(317, 318, null), + new RnaEdit(321, 320, + "AACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344378, 50344382, 13, 17), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344383, 50344518, 19, 154), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 50344519, 50358643, 154, 155), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 50358644, 50358697, 155, 208), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 50358698, 50367233, 208, 209), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367354, 209, 329), + new TranscriptRegion(TranscriptRegionType.Gap, 3, 50367355, 50367356, 329, 330), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367357, 50367358, 330, 331) + }; + + var codingRegion = new CodingRegion(50358658, 50367358, 169, 1311, 1143); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001220768.1" && start == 50344378) + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, "GAATTCCGGCGT"), + new RnaEdit(6, 5, "A"), + new RnaEdit(16, 16, "T"), + new RnaEdit(97, 97, "C"), + new RnaEdit(316, 315, "CCA"), + new RnaEdit(320, 319, + "TGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344378, 50344382, 13, 17), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344383, 50344518, 19, 154), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 50344519, 50358643, 154, 155), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 50358644, 50358697, 155, 208), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 50358698, 50367233, 208, 209), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367353, 209, 328), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367354, 50367357, 332, 335) + }; + + var codingRegion = new CodingRegion(50358658, 50367357, 169, 1467, 1299); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVASNVKVETQSDEENGRACEMNGEECAEDLRMLDASGEKMNGSHRDQGSSALSGVGGIRLPNGKLKCDICGIICIGPNVLMVHKRSHTGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_006060.4" && start == 50344378) + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, "GAATTCCGGCGT"), + new RnaEdit(6, 5, "A"), + new RnaEdit(16, 16, "T"), + new RnaEdit(97, 97, "C"), + new RnaEdit(316, 315, "CCA"), + new RnaEdit(320, 319, + "TGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTTGGTAAACCTCACAAATGTGGATATTGTGGCCGAAGCTATAAACAGCGAAGCTCTTTAGAGGAACATAAAGAGCGCTGCCACAACTACTTGGAAAGCATGGGCCTTCCGGGCACACTGTACCCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344378, 50344382, 13, 17), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344383, 50344518, 19, 154), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 50344519, 50358643, 154, 155), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 50358644, 50358697, 155, 208), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 50358698, 50367233, 208, 209), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367353, 209, 328), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367354, 50367357, 332, 335) + }; + + var codingRegion = new CodingRegion(50358658, 50367357, 169, 1728, 1560); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVASNVKVETQSDEENGRACEMNGEECAEDLRMLDASGEKMNGSHRDQGSSALSGVGGIRLPNGKLKCDICGIICIGPNVLMVHKRSHTGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVGKPHKCGYCGRSYKQRSSLEEHKERCHNYLESMGLPGTLYPVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001220775.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, + "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAG"), + new RnaEdit(4, 3, "C"), + new RnaEdit(5325, 5324, "AAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50459422, 50459561, 204, 343), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 50459562, 50467615, 343, 344), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 50467616, 50472799, 344, 5527) + }; + + startExonPhase = 0; + + // first exon starts at 50459422, so the genomic portion of the coding region is clipped + var codingRegion = new CodingRegion(50459422, 50468325, 169, 1053, 885); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MDADEGQDMSQVSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001220774.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, + "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAA"), + new RnaEdit(5, 8, null), + new RnaEdit(5427, 5426, "AAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50455032, 50455035, 202, 205), + new TranscriptRegion(TranscriptRegionType.Gap, 1, 50455036, 50455039, 205, 206), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50455040, 50455168, 206, 334), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 50455169, 50459426, 334, 335), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 50459427, 50459531, 335, 439), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 50459532, 50467615, 439, 440), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50467616, 50472799, 440, 5623) + }; + + // first exon starts at 50455032, so the genomic portion of the coding region is clipped + var codingRegion = new CodingRegion(50455032, 50468325, 169, 1149, 981); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MDADEGQDMSQVSVGKPHKCGYCGRSYKQRSSLEEHKERCHNYLESMGLPGTLYPVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001220776.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, + "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGT"), + new RnaEdit(3, 3, "C"), + new RnaEdit(5295, 5294, "AAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50459422, 50459531, 204, 313), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 50459532, 50467615, 313, 314), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 50467616, 50472799, 314, 5497) + }; + + startExonPhase = 0; + + // first exon starts at 50459422, so the genomic portion of the coding region is clipped + var codingRegion = new CodingRegion(50459422, 50468325, 169, 1023, 855); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MDADEGQDMSQVSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001220772.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, + "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTT"), + new RnaEdit(5188, 5187, "AAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50467613, 50472799, 206, 5392) + }; + + startExonPhase = 0; + + // first exon starts at 50467613, so the genomic portion of the coding region is clipped + var codingRegion = new CodingRegion(50467613, 50468325, 169, 918, 750); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MDADEGQDMSQVSGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_001220771.1" && start == 50344378) + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1, 0, "GAATTCCGGCGT"), + new RnaEdit(6, 5, "A"), + new RnaEdit(16, 16, "T"), + new RnaEdit(97, 97, "C"), + new RnaEdit(316, 315, + "CCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA") + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344378, 50344382, 13, 17), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 50344383, 50344518, 19, 154), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 50344519, 50358643, 154, 155), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 50358644, 50358697, 155, 208), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 50358698, 50367233, 208, 209), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 50367234, 50367353, 209, 328) + }; + + startExonPhase = 0; + + var codingRegion = new CodingRegion(50358658, 50468325, 169, 1299, 1131); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVASNVKVETQSDEENGRACEMNGEECAEDLRMLDASGEKMNGSHRDQGSSALSGVGGIRLPNGKLKCDICGIICIGPNVLMVHKRSHTGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"); + + updatedGeneModel = true; + } + + if (transcriptId == "NM_003820.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1708, 1707, "AAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_032017.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(3847, 3846, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001001740.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2703, 2702, "AAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_022457.5") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2775, 2774, "AAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_005378.4") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2606, 2605, "AAAAAAAA")}; + } + + if (transcriptId == "NM_001008540.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1896, 1895, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001145413.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2722, 2721, "AAAA")}; + } + + if (transcriptId == "NM_001145412.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2743, 2742, "AAAA")}; + } + + if (transcriptId == "NM_012433.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(4293, 4292, "AAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001005526.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(620, 619, "AAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_000465.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2608, 2607, "AAA")}; + } + + if (transcriptId == "NM_001018115.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(5103, 5102, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001664.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1923, 1922, "AAAA")}; + } + + if (transcriptId == "NM_006218.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(3713, 3712, "AAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_020640.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(3153, 3152, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_003866.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(4125, 4124, "AAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001101669.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(4026, 4025, "AAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_004168.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2391, 2390, "AAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001903.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(3762, 3761, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_213647.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(3071, 3070, "AAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_002011.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(3027, 3026, "AAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_022963.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2793, 2792, "AAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_005514.6") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1573, 1572, "AAAAAA")}; + } + + if (transcriptId == "NM_001760.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2052, 2051, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001136125.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1836, 1835, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001136017.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2061, 2060, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001136126.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1845, 1844, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_005375.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(3315, 3314, "AAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001010932.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2791, 2790, "AAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_000601.4") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2806, 2805, "AAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001010933.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1275, 1274, "AAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001010931.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1290, 1289, "AAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001010934.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(2011, 2010, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA") + }; + } + + if (transcriptId == "NM_001127500.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(6677, 6676, "AAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_000245.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(6623, 6622, "AAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_002052.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(3409, 3408, "AAAAAAAAAAA")}; + } + + if (transcriptId == "NM_002072.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2199, 2198, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_031263.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2944, 2943, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_002140.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2979, 2978, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_031262.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2919, 2918, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001135052.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(5005, 5004, "AAAAAA")}; + } + + if (transcriptId == "NM_003177.5") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(5074, 5073, "AAAAAA")}; + } + + if (transcriptId == "NM_001174168.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(4922, 4921, "AAAAAA")}; + } + + if (transcriptId == "NM_001174167.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(5067, 5066, "AAAAAA")}; + } + + if (transcriptId == "NM_004235.4") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2934, 2933, "AAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_017617.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(9296, 9295, "AAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NR_028036.2") + { + rnaEdits = new IRnaEdit[] + {new RnaEdit(2581, 2580, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NR_028033.2") + { + rnaEdits = new IRnaEdit[] + {new RnaEdit(2518, 2517, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NR_028035.2") + { + rnaEdits = new IRnaEdit[] + {new RnaEdit(2443, 2442, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_152871.2") + { + rnaEdits = new IRnaEdit[] + {new RnaEdit(2627, 2626, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NR_028034.2") + { + rnaEdits = new IRnaEdit[] + {new RnaEdit(2380, 2379, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_152872.2") + { + rnaEdits = new IRnaEdit[] + {new RnaEdit(2665, 2664, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_000043.4") + { + rnaEdits = new IRnaEdit[] + {new RnaEdit(2690, 2689, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_005343.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1045, 1044, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_176795.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1235, 1234, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001130442.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1153, 1152, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_000612.4") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(5166, 5165, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001127598.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(4840, 4839, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001007139.4") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(5140, 5139, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_020193.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(5512, 5511, "AAAAAAA")}; + } + + if (transcriptId == "NM_152991.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2418, 2417, "AAAAAAAAAA")}; + } + + if (transcriptId == "NM_003797.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2011, 2010, "AAAAAAAAAA")}; + } + + if (transcriptId == "NM_001273.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(6498, 6497, "AAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_080601.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2054, 2053, "AAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_002834.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(6284, 6283, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_006231.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(7841, 7840, "AAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001128226.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(7555, 7554, "AAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_014953.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(7645, 7644, "AAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_030621.3") + { + rnaEdits = new IRnaEdit[] + {new RnaEdit(10222, 10221, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001271282.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(6175, 6174, "AAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_002168.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1734, 1733, "AAAAAAA")}; + } + + if (transcriptId == "NM_001077183.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(5432, 5431, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_000548.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(5633, 5632, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001114382.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(5564, 5563, "AAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_032444.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(7294, 7293, "AAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001134407.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(14685, 14684, "AAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_000833.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(14447, 14446, "AAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001134408.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(4736, 4735, "AAAAAAAAAA")}; + } + + if (transcriptId == "NM_016507.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(8291, 8290, "AAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_015083.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(8264, 8263, "AAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_017763.4") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(4559, 4558, "AAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001039933.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1265, 1264, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_000626.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1262, 1261, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_021602.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(950, 949, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_002647.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(3070, 3069, "AAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_002067.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1614, 1613, "AA")}; + } + + if (transcriptId == "NM_001379.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(5353, 5352, "AAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001130823.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(5401, 5400, "AAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001238.2") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1948, 1947, + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA") + }; + } + + if (transcriptId == "NM_138578.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2560, 2559, "AAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001191.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2371, 2370, "AAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_003600.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2340, 2339, "AAAAAAA")}; + } + + if (transcriptId == "NM_198433.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2548, 2547, "AAAAAAA")}; + } + + if (transcriptId == "NM_198434.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2239, 2238, "AAAAAAA")}; + } + + if (transcriptId == "NM_198435.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2129, 2128, "AAAAAAA")}; + } + + if (transcriptId == "NM_198436.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2225, 2224, "AAAAAAA")}; + } + + if (transcriptId == "NM_198437.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2115, 2114, "AAAAAAA")}; + } + + if (transcriptId == "NM_016592.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(2563, 2562, "AAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001077490.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(3766, 3765, "AAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_080425.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(3766, 3765, "AAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001077489.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1863, 1862, "AAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_000516.4") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1908, 1907, "AAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001077488.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1911, 1910, "AAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_080426.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1866, 1865, "AAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001243432.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1515, 1514, "AAAAAA")}; + } + + if (transcriptId == "NM_003073.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1704, 1703, "AAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001007468.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(1677, 1676, "AAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_021140.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(5773, 5772, "AAAAAA")}; + } + + if (transcriptId == "NM_006521.4") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(3394, 3393, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_138923.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(7630, 7629, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_004606.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(7693, 7692, "AAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_138270.2") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(11071, 11070, "AAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_000489.3") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(11185, 11184, "AAAAAAAAAAAAAAAAAA")}; + } + + if (transcriptId == "NM_001042749.1") + { + rnaEdits = new IRnaEdit[] {new RnaEdit(6271, 6270, "AAAAAAA")}; + } + + if (transcriptId == "NM_005896.2") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1971, 1971, "T"), + new RnaEdit(2330, 2329, "AAAAAAAAAA") + }; + } + + if (transcriptId == "NM_004787.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(6, 6, "C"), + new RnaEdit(72, 72, "T"), + new RnaEdit(4473, 4473, "C"), + new RnaEdit(4845, 4845, "A") + }; + } + + if (transcriptId == "NM_016222.2") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(2001, 2001, "A"), + new RnaEdit(2105, 2104, "AAAAAAAAAAAAAA") + }; + } + + if (transcriptId == "NM_020861.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(80, 80, "C"), + new RnaEdit(3087, 3086, "AAAAAAAAAAAAAAAAAAA") + }; + } + + if (transcriptId == "NM_002649.2") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1282, 1282, "G"), + new RnaEdit(4444, 4444, "A") + }; + } + + if (transcriptId == "NM_025069.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(2092, 2092, "A") + }; + } + + if (transcriptId == "NM_001304717.2") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(706, 706, "C"), + new RnaEdit(8702, 8701, "AAAAAAAAAAAAAAAAA") + }; + + translation = new Translation(translation.CodingRegion, (CompactId) translation.ProteinId, + "LERGGEAAAAAAAAAAAPGRGSESPVTISRAGNAGELVSPLLLPPTRRRRRRHIQGPGPVLNLPSAAAAPPVARAPEAAGGGSRSEDYSSSPHSAAAAARPLAAEEKQAQSLQPSSSRRSSHYPAAVQSQAAAERGASATAKSRAISILQKKPRHQQLLPSLSSFFFSHRLPDMTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCAERHYDTAKFNCRVAQYPFEDHNPPQLELIKPFCEDLDQWLSEDDNHVAAIHCKAGKGRTGVMICAYLLHRGKFLKAQEALDFYGEVRTRDKKGVTIPSQRRYVYYYSYLLKNHLDYRPVALLFHKMMFETIPMFSGGTCNPQFVVCQLKVKIYSSNSGPTRREDKFMYFEFPQPLPVCGDIKVEFFHKQNKMLKKDKMFHFWVNTFFIPGPEETSEKVENGSLCDQEIDSICSIERADNDKEYLVLTLTKNDLDKANKDKANRYFSPNFKVKLYFTKTVEEPSNPEASSSTSVTPDVSDNEPDHYRYSDTTDSDPENEPFDEDQHTQITKV*"); + } + + if (transcriptId == "NM_033360.2") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1136, 1136, "T"), + new RnaEdit(3463, 3463, "G"), + new RnaEdit(5422, 5421, "AAAAAAAAAAAAAAA") + }; + } + + if (transcriptId == "NM_004985.3") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1012, 1012, "T"), + new RnaEdit(3339, 3339, "G"), + new RnaEdit(5298, 5297, "AAAAAAAAAAAAAAA") + }; + } + + if (transcriptId == "NM_002661.3") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(352, 352, "C"), + new RnaEdit(4273, 4272, "AAAAAAAAAAAAAAAAA") + }; + } + + if (transcriptId == "NM_005324.3") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(2110, 2110, "G"), + new RnaEdit(2114, 2114, "T"), + new RnaEdit(2706, 2705, "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA") + }; + } + + if (transcriptId == "NM_015898.2") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(4139, 4139, "G"), + new RnaEdit(4442, 4441, "AAAAAAAAAAAAAAA") + }; + } + + if (transcriptId == "NM_006145.1") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(1407, 1408, "GG") + }; + } + + if (transcriptId == "NM_003954.3") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(765, 764, "C"), + new RnaEdit(4468, 4467, "AAAAAAAA"), + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 16, 43340488, 43342167, 2789, 4468), + new TranscriptRegion(TranscriptRegionType.Intron, 15, 43342168, 43342529, 2788, 2789), + new TranscriptRegion(TranscriptRegionType.Exon, 15, 43342530, 43342630, 2688, 2788), + new TranscriptRegion(TranscriptRegionType.Intron, 14, 43342631, 43343903, 2687, 2688), + new TranscriptRegion(TranscriptRegionType.Exon, 14, 43343904, 43344048, 2543, 2687), + new TranscriptRegion(TranscriptRegionType.Intron, 13, 43344049, 43344458, 2687, 2543), + new TranscriptRegion(TranscriptRegionType.Exon, 13, 43344459, 43344565, 2436, 2687), + new TranscriptRegion(TranscriptRegionType.Intron, 12, 43344566, 43344772, 2435, 2436), + new TranscriptRegion(TranscriptRegionType.Exon, 12, 43344773, 43345126, 2082, 2435), + new TranscriptRegion(TranscriptRegionType.Intron, 11, 43345127, 43347779, 2081, 2082), + new TranscriptRegion(TranscriptRegionType.Exon, 11, 43347780, 43347930, 1931, 2081), + new TranscriptRegion(TranscriptRegionType.Intron, 10, 43347931, 43348424, 1930, 1931), + new TranscriptRegion(TranscriptRegionType.Exon, 10, 43348425, 43348588, 1767, 1930), + new TranscriptRegion(TranscriptRegionType.Intron, 9, 43348589, 43350869, 1766, 1767), + new TranscriptRegion(TranscriptRegionType.Exon, 9, 43350870, 43350974, 1662, 1766), + new TranscriptRegion(TranscriptRegionType.Intron, 8, 43350975, 43351489, 1661, 1662), + new TranscriptRegion(TranscriptRegionType.Exon, 8, 43351490, 43351621, 1530, 1661), + new TranscriptRegion(TranscriptRegionType.Intron, 7, 43351622, 43351830, 1529, 1530), + new TranscriptRegion(TranscriptRegionType.Exon, 7, 43351831, 43351960, 1400, 1529), + new TranscriptRegion(TranscriptRegionType.Intron, 6, 43351961, 43362178, 1399, 1400), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 43362179, 43362316, 1262, 1399), + new TranscriptRegion(TranscriptRegionType.Intron, 5, 43362317, 43363797, 1261, 1262), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 43363798, 43364293, 766, 1261), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 43364294, 43364411, 647, 764), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 43364412, 43364519, 646, 647), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 43364520, 43364730, 436, 646), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 43364731, 43366601, 435, 436), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 43366602, 43366671, 366, 435), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 43366672, 43367855, 365, 366), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 43367856, 43368131, 90, 365), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 43368132, 43394325, 89, 90), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 43394326, 43394414, 1, 89), + }; + + var codingRegion = new CodingRegion(43342003, 43368111, 110, 2953, 2844); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MAVMEMACPGAPGSAVGQQKELPKAKEKTPPLGKKQSSVYKLEAVEKSPVFCGKWEILNDVITKGTAKEGSEAGPAAISIIAQAECENSQEFSPTFSERIFIAGSKQYSQSESLDQIPNNVAHATEGKMARVCWKGKRRSKARKKRKKKSSKSLAHAGVALAKPLPRTPEQESCTIPVQEDESPLGAPYVRNTPQFTKPLKEPGLGQLCFKQLGEGLRPALPRSELHKLISPLQCLNHVWKLHHPQDGGPLPLPTHPFPYSRLPHPFPFHPLQPWKPHPLESFLGKLACVDSQKPLPDPHLSKLACVDSPKPLPGPHLEPSCLSRGAHEKFSVEEYLVHALQGSVSSGQAHSLTSLAKTWAARGSRSREPSPKTEDNEGVLLTEKLKPVDYEYREEVHWATHQLRLGRGSFGEVHRMEDKQTGFQCAVKKVRLEVFRAEELMACAGLTSPRIVPLYGAVREGPWVNIFMELLEGGSLGQLVKEQGCLPEDRALYYLGQALEGLEYLHSRRILHGDVKADNVLLSSDGSHAALCDFGHAVCLQPDGLGKSLLTGDYIPGTETHMAPEVVLGRSCDAKVDVWSSCCMMLHMLNGCHPWTQFFRGPLCLKIASEPPPVREIPPSCAPLTAQAIQEGLRKEPIHRVSAAELGGKVNRALQQVGGLKSPWRGEYKEPRHPPPNQANYHQTLHAQPRELSPRAPGPRPAEETTGRAPKLQPPLPPEPPEPNKSPPLTLSKEESGMWEPLPLSSLEPAPARNPSSPERKATVPEQELQQLEIELFLNSLSQPFSLEEQEQILSCLSIDSLSLSDDSEKNPSKASQSSRDTLSSGVHSWSSQAEARSSSWNMVLARGRPTDTPSYFNGVKVQIQSLNGEHLHIREFHRVKVGDIATGISSQIPAAAFSLVTKDGQPVRYDMEVPDSGIDLQCTLAPDGSFAWSWRVKHGQLENRP*"); + } + + if (transcriptId == "NM_003954.4") + { + rnaEdits = new IRnaEdit[] + { + new RnaEdit(781, 780, "C"), + new RnaEdit(4486, 4485, "AAAAAAAAAAAAA"), + }; + + transcriptRegions = new ITranscriptRegion[] + { + new TranscriptRegion(TranscriptRegionType.Exon, 16, 43340486, 43342167, 2805, 4486), + new TranscriptRegion(TranscriptRegionType.Intron, 15, 43342168, 43342529, 2804, 2805), + new TranscriptRegion(TranscriptRegionType.Exon, 15, 43342530, 43342630, 2704, 2804), + new TranscriptRegion(TranscriptRegionType.Intron, 14, 43342631, 43343903, 2703, 2704), + new TranscriptRegion(TranscriptRegionType.Exon, 14, 43343904, 43344048, 2559, 2703), + new TranscriptRegion(TranscriptRegionType.Intron, 13, 43344049, 43344458, 2558, 2559), + new TranscriptRegion(TranscriptRegionType.Exon, 13, 43344459, 43344565, 2452, 2558), + new TranscriptRegion(TranscriptRegionType.Intron, 12, 43344566, 43344772, 2451, 2452), + new TranscriptRegion(TranscriptRegionType.Exon, 12, 43344773, 43345126, 2098, 2451), + new TranscriptRegion(TranscriptRegionType.Intron, 11, 43345127, 43347779, 2097, 2098), + new TranscriptRegion(TranscriptRegionType.Exon, 11, 43347780, 43347930, 1947, 2097), + new TranscriptRegion(TranscriptRegionType.Intron, 10, 43347931, 43348424, 1946, 1947), + new TranscriptRegion(TranscriptRegionType.Exon, 10, 43348425, 43348588, 1783, 1946), + new TranscriptRegion(TranscriptRegionType.Intron, 9, 43348589, 43350869, 1782, 1783), + new TranscriptRegion(TranscriptRegionType.Exon, 9, 43350870, 43350974, 1678, 1782), + new TranscriptRegion(TranscriptRegionType.Intron, 8, 43350975, 43351489, 1677, 1678), + new TranscriptRegion(TranscriptRegionType.Exon, 8, 43351490, 43351621, 1546, 1677), + new TranscriptRegion(TranscriptRegionType.Intron, 7, 43351622, 43351830, 1545, 1546), + new TranscriptRegion(TranscriptRegionType.Exon, 7, 43351831, 43351960, 1416, 1545), + new TranscriptRegion(TranscriptRegionType.Intron, 6, 43351961, 43362178, 1415, 1416), + new TranscriptRegion(TranscriptRegionType.Exon, 6, 43362179, 43362316, 1278, 1415), + new TranscriptRegion(TranscriptRegionType.Intron, 5, 43362317, 43363797, 1277, 1278), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 43363798, 43364293, 782, 1277), + new TranscriptRegion(TranscriptRegionType.Exon, 5, 43364294, 43364411, 663, 780), + new TranscriptRegion(TranscriptRegionType.Intron, 4, 43364412, 43364519, 662, 663), + new TranscriptRegion(TranscriptRegionType.Exon, 4, 43364520, 43364730, 452, 662), + new TranscriptRegion(TranscriptRegionType.Intron, 3, 43364731, 43366601, 451, 452), + new TranscriptRegion(TranscriptRegionType.Exon, 3, 43366602, 43366671, 382, 451), + new TranscriptRegion(TranscriptRegionType.Intron, 2, 43366672, 43367855, 381, 382), + new TranscriptRegion(TranscriptRegionType.Exon, 2, 43367856, 43368131, 106, 381), + new TranscriptRegion(TranscriptRegionType.Intron, 1, 43368132, 43394325, 105, 106), + new TranscriptRegion(TranscriptRegionType.Exon, 1, 43394326, 43394430, 1, 105), + }; + + var codingRegion = new CodingRegion(43342003, 43368111, 126, 2969, 2844); + translation = new Translation(codingRegion, (CompactId) translation.ProteinId, + "MAVMEMACPGAPGSAVGQQKELPKAKEKTPPLGKKQSSVYKLEAVEKSPVFCGKWEILNDVITKGTAKEGSEAGPAAISIIAQAECENSQEFSPTFSERIFIAGSKQYSQSESLDQIPNNVAHATEGKMARVCWKGKRRSKARKKRKKKSSKSLAHAGVALAKPLPRTPEQESCTIPVQEDESPLGAPYVRNTPQFTKPLKEPGLGQLCFKQLGEGLRPALPRSELHKLISPLQCLNHVWKLHHPQDGGPLPLPTHPFPYSRLPHPFPFHPLQPWKPHPLESFLGKLACVDSQKPLPDPHLSKLACVDSPKPLPGPHLEPSCLSRGAHEKFSVEEYLVHALQGSVSSGQAHSLTSLAKTWAARGSRSREPSPKTEDNEGVLLTEKLKPVDYEYREEVHWATHQLRLGRGSFGEVHRMEDKQTGFQCAVKKVRLEVFRAEELMACAGLTSPRIVPLYGAVREGPWVNIFMELLEGGSLGQLVKEQGCLPEDRALYYLGQALEGLEYLHSRRILHGDVKADNVLLSSDGSHAALCDFGHAVCLQPDGLGKSLLTGDYIPGTETHMAPEVVLGRSCDAKVDVWSSCCMMLHMLNGCHPWTQFFRGPLCLKIASEPPPVREIPPSCAPLTAQAIQEGLRKEPIHRVSAAELGGKVNRALQQVGGLKSPWRGEYKEPRHPPPNQANYHQTLHAQPRELSPRAPGPRPAEETTGRAPKLQPPLPPEPPEPNKSPPLTLSKEESGMWEPLPLSSLEPAPARNPSSPERKATVPEQELQQLEIELFLNSLSQPFSLEEQEQILSCLSIDSLSLSDDSEKNPSKASQSSRDTLSSGVHSWSSQAEARSSSWNMVLARGRPTDTPSYFNGVKVQIQSLNGEHLHIREFHRVKVGDIATGISSQIPAAAFSLVTKDGQPVRYDMEVPDSGIDLQCTLAPDGSFAWSWRVKHGQLENRP*"); + } + + // update the protein sequences + switch (transcriptId) + { + case "NM_002006.4": + case "NM_001243186.1": + case "NM_003376.5": + case "NM_001171622.1": + case "NM_001033756.2": + case "NM_001025366.2": + case "NM_001025368.2": + case "NM_001204385.1": + case "NM_001025370.2": + case "NM_001025369.2": + case "NM_001025367.2": + case "NM_002467.4": + case "NM_024424.3": + case "NM_000378.4": + case "NM_024426.4": + case "NM_001287424.1": + char[] aaSequence = translation.PeptideSeq.ToCharArray(); + aaSequence[0] = 'M'; + translation = new Translation(translation.CodingRegion, (CompactId)translation.ProteinId, + new string(aaSequence)); + break; + + case "NM_001317010.1": + char[] aaSequence2 = translation.PeptideSeq.ToCharArray(); + aaSequence2[191] = 'S'; + translation = new Translation(translation.CodingRegion, (CompactId)translation.ProteinId, + new string(aaSequence2)); + break; + } + + if (updatedGeneModel) + { + int newStart = transcriptRegions[0].Start; + int newEnd = transcriptRegions[transcriptRegions.Length - 1].End; + + if (newStart != start) + { + Console.WriteLine($"Found new start for {transcriptId}: old: {start:N0}, new: {newStart:N0}"); + // start = newStart; + } + + if (newEnd != end) + { + Console.WriteLine($"Found new end for {transcriptId}: old: {end:N0}, new: {newEnd:N0}"); + // end = newEnd; + } + + if (newStart < gene.Start) + { + Console.WriteLine( + $"Found new GENE start for {gene.Symbol}: old: {gene.Start:N0}, new: {newStart:N0}"); + // gene.Start = newStart; + } + + if (newEnd > gene.End) + { + Console.WriteLine( + $"Found new GENE end for {gene.Symbol}: old: {gene.End:N0}, new: {newEnd:N0}"); + // gene.End = newEnd; + } + } + } + + var transcript = new Transcript(chromosomeIndexDictionary[referenceIndex], start, end, id, translation, + encoded.BioType, gene, ExonUtilities.GetTotalExonLength(transcriptRegions), startExonPhase, + encoded.IsCanonical, transcriptRegions, numExons, mirnas, siftIndex, polyphenIndex, + encoded.TranscriptSource, encoded.CdsStartNotFound, encoded.CdsEndNotFound, rnaEdits); + + // add the AA edits + switch (transcriptId) + { + case "NM_002006.4": + case "NM_001243186.1": + case "NM_003376.5": + case "NM_001171622.1": + case "NM_001033756.2": + case "NM_001025366.2": + case "NM_001025368.2": + case "NM_001204385.1": + case "NM_001025370.2": + case "NM_001025369.2": + case "NM_001025367.2": + case "NM_002467.4": + case "NM_024424.3": + case "NM_000378.4": + case "NM_024426.4": + case "NM_001287424.1": + transcript.AminoAcidEdits = new[] {new AminoAcidEdit(1, 'M')}; + break; + + case "NM_001317010.1": + transcript.AminoAcidEdits = new[] {new AminoAcidEdit(192, 'S')}; + break; + } + + return transcript; + } + + /// + /// writes the transcript to the binary writer + /// + public void Write(IExtendedBinaryWriter writer, Dictionary geneIndices, + Dictionary transcriptRegionIndices, Dictionary microRnaIndices, + Dictionary peptideIndices) + { + // transcript + writer.WriteOpt(Chromosome.Index); + writer.WriteOpt(Start); + writer.WriteOpt(End); + // ReSharper disable once ImpureMethodCallOnReadonlyValueField + Id.Write(writer); + + // gene + writer.WriteOpt(GetIndex(Gene, geneIndices)); + + // encoded data + var encoded = EncodedTranscriptData.GetEncodedTranscriptData(BioType, CdsStartNotFound, CdsEndNotFound, + Source, IsCanonical, SiftIndex != -1, PolyPhenIndex != -1, MicroRnas != null, RnaEdits != null, + false, TranscriptRegions != null, Translation != null, StartExonPhase); + encoded.Write(writer); + + // transcript regions + if (encoded.HasTranscriptRegions) WriteIndices(writer, TranscriptRegions, transcriptRegionIndices); + writer.WriteOpt(NumExons); + + // protein function predictions + if (encoded.HasSift) writer.WriteOpt(SiftIndex); + if (encoded.HasPolyPhen) writer.WriteOpt(PolyPhenIndex); + + // translation + if (encoded.HasTranslation) + { + // ReSharper disable once PossibleNullReferenceException + var peptideIndex = GetIndex(Translation.PeptideSeq, peptideIndices); + Translation.Write(writer, peptideIndex); + } + + // attributes + if (encoded.HasMirnas) WriteIndices(writer, MicroRnas, microRnaIndices); + if (encoded.HasRnaEdits) WriteItems(writer, RnaEdits, (x, y) => x.Write(y)); + } + + private static T[] ReadItems(BufferedBinaryReader reader, Func readFunc) + { + int numItems = reader.ReadOptInt32(); + var items = new T[numItems]; + for (int i = 0; i < numItems; i++) items[i] = readFunc(reader); + return items; + } + + private static void WriteItems(IExtendedBinaryWriter writer, T[] items, + Action writeAction) + { + writer.WriteOpt(items.Length); + foreach (var item in items) writeAction(item, writer); + } + + private static T[] ReadIndices(IBufferedBinaryReader reader, T[] cachedItems) + { + int numItems = reader.ReadOptInt32(); + var items = new T[numItems]; + + for (int i = 0; i < numItems; i++) + { + var index = reader.ReadOptInt32(); + items[i] = cachedItems[index]; + } + + return items; + } + + private static void WriteIndices(IExtendedBinaryWriter writer, T[] items, + IReadOnlyDictionary indices) + { + writer.WriteOpt(items.Length); + foreach (var item in items) writer.WriteOpt(GetIndex(item, indices)); + } + + private static int GetIndex(T item, IReadOnlyDictionary indices) + { + if (item == null) return -1; + + if (!indices.TryGetValue(item, out var index)) + { + throw new InvalidDataException($"Unable to locate the {typeof(T)} in the indices: {item}"); + } + + return index; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Caches/DataStructures/TranscriptRegion.cs b/VariantAnnotation/Caches/DataStructures/TranscriptRegion.cs index bf32ed61..cb7effe7 100644 --- a/VariantAnnotation/Caches/DataStructures/TranscriptRegion.cs +++ b/VariantAnnotation/Caches/DataStructures/TranscriptRegion.cs @@ -1,48 +1,48 @@ -using IO; -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.Caches.DataStructures -{ - public sealed class TranscriptRegion : ITranscriptRegion - { - public TranscriptRegionType Type { get; } - public ushort Id { get; } - public int Start { get; } - public int End { get; } - public int CdnaStart { get; } - public int CdnaEnd { get; } - - public TranscriptRegion(TranscriptRegionType type, ushort id, int start, int end, int cdnaStart, int cdnaEnd) - { - Type = type; - Id = id; - Start = start; - End = end; - CdnaStart = cdnaStart; - CdnaEnd = cdnaEnd; - } - - public static ITranscriptRegion Read(BufferedBinaryReader reader) - { - TranscriptRegionType type = (TranscriptRegionType)reader.ReadByte(); - ushort id = reader.ReadOptUInt16(); - int genomicStart = reader.ReadOptInt32(); - int genomicEnd = reader.ReadOptInt32(); - - int cdnaStart = reader.ReadOptInt32(); - int cdnaEnd = reader.ReadOptInt32(); - - return new TranscriptRegion(type, id, genomicStart, genomicEnd, cdnaStart, cdnaEnd); - } - - public void Write(IExtendedBinaryWriter writer) - { - writer.Write((byte)Type); - writer.WriteOpt(Id); - writer.WriteOpt(Start); - writer.WriteOpt(End); - writer.WriteOpt(CdnaStart); - writer.WriteOpt(CdnaEnd); - } - } -} +using IO; +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.Caches.DataStructures +{ + public sealed class TranscriptRegion : ITranscriptRegion + { + public TranscriptRegionType Type { get; } + public ushort Id { get; } + public int Start { get; } + public int End { get; } + public int CdnaStart { get; } + public int CdnaEnd { get; } + + public TranscriptRegion(TranscriptRegionType type, ushort id, int start, int end, int cdnaStart, int cdnaEnd) + { + Type = type; + Id = id; + Start = start; + End = end; + CdnaStart = cdnaStart; + CdnaEnd = cdnaEnd; + } + + public static ITranscriptRegion Read(BufferedBinaryReader reader) + { + TranscriptRegionType type = (TranscriptRegionType)reader.ReadByte(); + ushort id = reader.ReadOptUInt16(); + int genomicStart = reader.ReadOptInt32(); + int genomicEnd = reader.ReadOptInt32(); + + int cdnaStart = reader.ReadOptInt32(); + int cdnaEnd = reader.ReadOptInt32(); + + return new TranscriptRegion(type, id, genomicStart, genomicEnd, cdnaStart, cdnaEnd); + } + + public void Write(IExtendedBinaryWriter writer) + { + writer.Write((byte)Type); + writer.WriteOpt(Id); + writer.WriteOpt(Start); + writer.WriteOpt(End); + writer.WriteOpt(CdnaStart); + writer.WriteOpt(CdnaEnd); + } + } +} diff --git a/VariantAnnotation/Caches/DataStructures/TranscriptRegionExtensions.cs b/VariantAnnotation/Caches/DataStructures/TranscriptRegionExtensions.cs index afe495d4..9f8ed49e 100644 --- a/VariantAnnotation/Caches/DataStructures/TranscriptRegionExtensions.cs +++ b/VariantAnnotation/Caches/DataStructures/TranscriptRegionExtensions.cs @@ -1,73 +1,73 @@ -using VariantAnnotation.Algorithms; -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.Caches.DataStructures -{ - public static class TranscriptRegionExtensions - { - public static int BinarySearch(this ITranscriptRegion[] regions, int position) - { - var begin = 0; - var end = regions.Length - 1; - - while (begin <= end) - { - var index = begin + (end - begin >> 1); - var region = regions[index]; - - if (position >= region.Start && position <= region.End) return index; - if (region.End < position) begin = index + 1; - else if (position < region.Start) end = index - 1; - } - - return ~begin; - } - - public static (int ExonStart, int ExonEnd, int IntronStart, int IntronEnd) GetExonsAndIntrons( - this ITranscriptRegion[] regions, int startIndex, int endIndex) - { - int affectedStartIndex = GetAffectedRegionIndex(startIndex); - int affectedEndIndex = GetAffectedRegionIndex(endIndex); - - var exons = regions.FindDesiredRegionIds(TranscriptRegionType.Exon, affectedStartIndex, affectedEndIndex); - var introns = regions.FindDesiredRegionIds(TranscriptRegionType.Intron, affectedStartIndex, affectedEndIndex); - - return (exons.Start, exons.End, introns.Start, introns.End); - } - - private static (int Start, int End) FindDesiredRegionIds(this ITranscriptRegion[] regions, - TranscriptRegionType desiredType, int startIndex, int endIndex) - { - var regionStart = FindFirst(regions, desiredType, startIndex, endIndex); - var newStartIndex = regionStart != -1 ? regionStart : startIndex; - var regionEnd = FindLast(regions, desiredType, newStartIndex, endIndex); - - var startId = regionStart == -1 ? -1 : regions[regionStart].Id; - var endId = regionEnd == -1 ? -1 : regions[regionEnd].Id; - - if (endId < startId) Swap.Int(ref startId, ref endId); - return (startId, endId); - } - - private static int FindFirst(ITranscriptRegion[] regions, TranscriptRegionType desiredType, int startIndex, - int endIndex) - { - for (int i = startIndex; i <= endIndex; i++) if (regions[i].Type == desiredType) return i; - return -1; - } - - private static int FindLast(ITranscriptRegion[] regions, TranscriptRegionType desiredType, int startIndex, - int endIndex) - { - for (int i = endIndex; i >= startIndex; i--) if (regions[i].Type == desiredType) return i; - return -1; - } - - private static int GetAffectedRegionIndex(int index) - { - if (index >= 0) return index; - index = ~index; - return index == 0 ? 0 : index - 1; - } - } -} +using VariantAnnotation.Algorithms; +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.Caches.DataStructures +{ + public static class TranscriptRegionExtensions + { + public static int BinarySearch(this ITranscriptRegion[] regions, int position) + { + var begin = 0; + var end = regions.Length - 1; + + while (begin <= end) + { + var index = begin + (end - begin >> 1); + var region = regions[index]; + + if (position >= region.Start && position <= region.End) return index; + if (region.End < position) begin = index + 1; + else if (position < region.Start) end = index - 1; + } + + return ~begin; + } + + public static (int ExonStart, int ExonEnd, int IntronStart, int IntronEnd) GetExonsAndIntrons( + this ITranscriptRegion[] regions, int startIndex, int endIndex) + { + int affectedStartIndex = GetAffectedRegionIndex(startIndex); + int affectedEndIndex = GetAffectedRegionIndex(endIndex); + + var exons = regions.FindDesiredRegionIds(TranscriptRegionType.Exon, affectedStartIndex, affectedEndIndex); + var introns = regions.FindDesiredRegionIds(TranscriptRegionType.Intron, affectedStartIndex, affectedEndIndex); + + return (exons.Start, exons.End, introns.Start, introns.End); + } + + private static (int Start, int End) FindDesiredRegionIds(this ITranscriptRegion[] regions, + TranscriptRegionType desiredType, int startIndex, int endIndex) + { + var regionStart = FindFirst(regions, desiredType, startIndex, endIndex); + var newStartIndex = regionStart != -1 ? regionStart : startIndex; + var regionEnd = FindLast(regions, desiredType, newStartIndex, endIndex); + + var startId = regionStart == -1 ? -1 : regions[regionStart].Id; + var endId = regionEnd == -1 ? -1 : regions[regionEnd].Id; + + if (endId < startId) Swap.Int(ref startId, ref endId); + return (startId, endId); + } + + private static int FindFirst(ITranscriptRegion[] regions, TranscriptRegionType desiredType, int startIndex, + int endIndex) + { + for (int i = startIndex; i <= endIndex; i++) if (regions[i].Type == desiredType) return i; + return -1; + } + + private static int FindLast(ITranscriptRegion[] regions, TranscriptRegionType desiredType, int startIndex, + int endIndex) + { + for (int i = endIndex; i >= startIndex; i--) if (regions[i].Type == desiredType) return i; + return -1; + } + + private static int GetAffectedRegionIndex(int index) + { + if (index >= 0) return index; + index = ~index; + return index == 0 ? 0 : index - 1; + } + } +} diff --git a/VariantAnnotation/Caches/DataStructures/Translation.cs b/VariantAnnotation/Caches/DataStructures/Translation.cs index 5776eac9..fe3f57e5 100644 --- a/VariantAnnotation/Caches/DataStructures/Translation.cs +++ b/VariantAnnotation/Caches/DataStructures/Translation.cs @@ -1,37 +1,39 @@ -using IO; -using VariantAnnotation.AnnotatedPositions.Transcript; -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.Caches.DataStructures -{ - public sealed class Translation : ITranslation - { - public ICodingRegion CodingRegion { get; } - public ICompactId ProteinId { get; } - public string PeptideSeq { get; } - - public Translation(ICodingRegion codingRegion, CompactId proteinId, string peptideSeq) - { - CodingRegion = codingRegion; - ProteinId = proteinId; - PeptideSeq = peptideSeq; - } - - public static ITranslation Read(BufferedBinaryReader reader, string[] peptideSeqs) - { - var codingRegion = DataStructures.CodingRegion.Read(reader); - var proteinId = CompactId.Read(reader); - var peptideIndex = reader.ReadOptInt32(); - var peptideSeq = peptideIndex == -1 ? null : peptideSeqs[peptideIndex]; - - return new Translation(codingRegion, proteinId, peptideSeq); - } - - public void Write(IExtendedBinaryWriter writer, int peptideIndex) - { - CodingRegion.Write(writer); - ProteinId.Write(writer); - writer.WriteOpt(peptideIndex); - } - } +using IO; +using VariantAnnotation.AnnotatedPositions.AminoAcids; +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.Caches.DataStructures +{ + public sealed class Translation : ITranslation + { + public ICodingRegion CodingRegion { get; } + public ICompactId ProteinId { get; } + public string PeptideSeq { get; } + + public Translation(ICodingRegion codingRegion, CompactId proteinId, string peptideSeq) + { + CodingRegion = codingRegion; + ProteinId = proteinId; + PeptideSeq = peptideSeq; + } + + public static ITranslation Read(BufferedBinaryReader reader, string[] peptideSeqs) + { + var codingRegion = DataStructures.CodingRegion.Read(reader); + var proteinId = CompactId.Read(reader); + int peptideIndex = reader.ReadOptInt32(); + string peptideSeq = peptideIndex == -1 ? null : peptideSeqs[peptideIndex]; + if (!peptideSeq.EndsWith(AminoAcidCommon.StopCodon)) peptideSeq += AminoAcidCommon.StopCodon; + + return new Translation(codingRegion, proteinId, peptideSeq); + } + + public void Write(IExtendedBinaryWriter writer, int peptideIndex) + { + CodingRegion.Write(writer); + ProteinId.Write(writer); + writer.WriteOpt(peptideIndex); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Caches/PredictionCache.cs b/VariantAnnotation/Caches/PredictionCache.cs index 1f203a9f..732b9bff 100644 --- a/VariantAnnotation/Caches/PredictionCache.cs +++ b/VariantAnnotation/Caches/PredictionCache.cs @@ -1,36 +1,36 @@ -using System.Collections.Generic; -using System.Collections.Immutable; -using Genome; -using VariantAnnotation.Caches.DataStructures; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.Caches; -using VariantAnnotation.Interface.Providers; - -namespace VariantAnnotation.Caches -{ - public sealed class PredictionCache : IPredictionCache - { - private readonly Prediction[] _predictions; - public string Name { get; } = string.Empty; - public GenomeAssembly Assembly { get; } - public IEnumerable DataSourceVersions { get; } = new List(); - private readonly ImmutableArray _descriptions; - - public PredictionCache(GenomeAssembly genomeAssembly, Prediction[] predictions, ImmutableArray descriptions) - { - Assembly = genomeAssembly; - _predictions = predictions; - _descriptions = descriptions; - } - - public PredictionScore GetProteinFunctionPrediction(int predictionIndex, char newAminoAcid, - int aaPosition) - { - var entry = _predictions[predictionIndex].GetPrediction(newAminoAcid, aaPosition); - - return entry == null - ? null - : new PredictionScore(_descriptions[entry.EnumIndex], entry.Score); - } - } +using System.Collections.Generic; +using System.Collections.Immutable; +using Genome; +using VariantAnnotation.Caches.DataStructures; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.Caches; +using VariantAnnotation.Interface.Providers; + +namespace VariantAnnotation.Caches +{ + public sealed class PredictionCache : IPredictionCache + { + private readonly Prediction[] _predictions; + public string Name { get; } = string.Empty; + public GenomeAssembly Assembly { get; } + public IEnumerable DataSourceVersions { get; } = new List(); + private readonly ImmutableArray _descriptions; + + public PredictionCache(GenomeAssembly genomeAssembly, Prediction[] predictions, ImmutableArray descriptions) + { + Assembly = genomeAssembly; + _predictions = predictions; + _descriptions = descriptions; + } + + public PredictionScore GetProteinFunctionPrediction(int predictionIndex, char newAminoAcid, + int aaPosition) + { + var entry = _predictions[predictionIndex].GetPrediction(newAminoAcid, aaPosition); + + return entry == null + ? null + : new PredictionScore(_descriptions[entry.EnumIndex], entry.Score); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Caches/TranscriptCache.cs b/VariantAnnotation/Caches/TranscriptCache.cs index e2ea0871..bba3004a 100644 --- a/VariantAnnotation/Caches/TranscriptCache.cs +++ b/VariantAnnotation/Caches/TranscriptCache.cs @@ -1,29 +1,29 @@ -using System.Collections.Generic; -using Genome; -using Intervals; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.Caches; -using VariantAnnotation.Interface.Providers; - -namespace VariantAnnotation.Caches -{ - public sealed class TranscriptCache : ITranscriptCache - { - public IIntervalForest TranscriptIntervalForest { get; } - public IIntervalForest RegulatoryIntervalForest { get; } - public string Name { get; } - public GenomeAssembly Assembly { get; } - public IEnumerable DataSourceVersions { get; } - - public TranscriptCache(IEnumerable dataSourceVersions, GenomeAssembly genomeAssembly, - IntervalArray[] transcriptIntervalArrays, - IntervalArray[] regulatoryRegionIntervalArrays) - { - Name = "Transcript annotation provider"; - DataSourceVersions = dataSourceVersions; - Assembly = genomeAssembly; - TranscriptIntervalForest = new IntervalForest(transcriptIntervalArrays); - RegulatoryIntervalForest = new IntervalForest(regulatoryRegionIntervalArrays); - } - } +using System.Collections.Generic; +using Genome; +using Intervals; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.Caches; +using VariantAnnotation.Interface.Providers; + +namespace VariantAnnotation.Caches +{ + public sealed class TranscriptCache : ITranscriptCache + { + public IIntervalForest TranscriptIntervalForest { get; } + public IIntervalForest RegulatoryIntervalForest { get; } + public string Name { get; } + public GenomeAssembly Assembly { get; } + public IEnumerable DataSourceVersions { get; } + + public TranscriptCache(IEnumerable dataSourceVersions, GenomeAssembly genomeAssembly, + IntervalArray[] transcriptIntervalArrays, + IntervalArray[] regulatoryRegionIntervalArrays) + { + Name = "Transcript annotation provider"; + DataSourceVersions = dataSourceVersions; + Assembly = genomeAssembly; + TranscriptIntervalForest = new IntervalForest(transcriptIntervalArrays); + RegulatoryIntervalForest = new IntervalForest(regulatoryRegionIntervalArrays); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Caches/TranscriptCacheData.cs b/VariantAnnotation/Caches/TranscriptCacheData.cs index b1040de5..0baf9cbe 100644 --- a/VariantAnnotation/Caches/TranscriptCacheData.cs +++ b/VariantAnnotation/Caches/TranscriptCacheData.cs @@ -1,52 +1,52 @@ -using System.Collections.Generic; -using Intervals; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.Providers; -using VariantAnnotation.IO.Caches; -using VariantAnnotation.Providers; - -namespace VariantAnnotation.Caches -{ - public sealed class TranscriptCacheData - { - public readonly CacheHeader Header; - - public readonly IGene[] Genes; - public readonly ITranscriptRegion[] TranscriptRegions; - public readonly IInterval[] Mirnas; - public readonly string[] PeptideSeqs; - public readonly IntervalArray[] TranscriptIntervalArrays; - public readonly IntervalArray[] RegulatoryRegionIntervalArrays; - - public TranscriptCacheData(CacheHeader header, IGene[] genes, ITranscriptRegion[] transcriptRegions, - IInterval[] mirnas, string[] peptideSeqs, IntervalArray[] transcriptIntervalArrays, - IntervalArray[] regulatoryRegionIntervalArrays) - { - Header = header; - Genes = genes; - TranscriptRegions = transcriptRegions; - Mirnas = mirnas; - PeptideSeqs = peptideSeqs; - TranscriptIntervalArrays = transcriptIntervalArrays; - RegulatoryRegionIntervalArrays = regulatoryRegionIntervalArrays; - } - - public TranscriptCache GetCache() - { - var dataSourceVersions = GetDataSourceVersions(Header); - return new TranscriptCache(dataSourceVersions, Header.Assembly, TranscriptIntervalArrays, RegulatoryRegionIntervalArrays); - } - - private static IEnumerable GetDataSourceVersions(CacheHeader header) - { - var dataSourceVersions = new List(); - if (header == null) return dataSourceVersions; - - ushort vepVersion = header.Custom.VepVersion; - - var dataSourceVersion = new DataSourceVersion("VEP", vepVersion.ToString(), header.CreationTimeTicks, header.Source.ToString()); - dataSourceVersions.Add(dataSourceVersion); - return dataSourceVersions; - } - } -} +using System.Collections.Generic; +using Intervals; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.Providers; +using VariantAnnotation.IO.Caches; +using VariantAnnotation.Providers; + +namespace VariantAnnotation.Caches +{ + public sealed class TranscriptCacheData + { + public readonly CacheHeader Header; + + public readonly IGene[] Genes; + public readonly ITranscriptRegion[] TranscriptRegions; + public readonly IInterval[] Mirnas; + public readonly string[] PeptideSeqs; + public readonly IntervalArray[] TranscriptIntervalArrays; + public readonly IntervalArray[] RegulatoryRegionIntervalArrays; + + public TranscriptCacheData(CacheHeader header, IGene[] genes, ITranscriptRegion[] transcriptRegions, + IInterval[] mirnas, string[] peptideSeqs, IntervalArray[] transcriptIntervalArrays, + IntervalArray[] regulatoryRegionIntervalArrays) + { + Header = header; + Genes = genes; + TranscriptRegions = transcriptRegions; + Mirnas = mirnas; + PeptideSeqs = peptideSeqs; + TranscriptIntervalArrays = transcriptIntervalArrays; + RegulatoryRegionIntervalArrays = regulatoryRegionIntervalArrays; + } + + public TranscriptCache GetCache() + { + var dataSourceVersions = GetDataSourceVersions(Header); + return new TranscriptCache(dataSourceVersions, Header.Assembly, TranscriptIntervalArrays, RegulatoryRegionIntervalArrays); + } + + private static IEnumerable GetDataSourceVersions(CacheHeader header) + { + var dataSourceVersions = new List(); + if (header == null) return dataSourceVersions; + + ushort vepVersion = header.Custom.VepVersion; + + var dataSourceVersion = new DataSourceVersion("VEP", vepVersion.ToString(), header.CreationTimeTicks, header.Source.ToString()); + dataSourceVersions.Add(dataSourceVersion); + return dataSourceVersions; + } + } +} diff --git a/VariantAnnotation/Caches/TranscriptIntervalForestExtensions.cs b/VariantAnnotation/Caches/TranscriptIntervalForestExtensions.cs index 3949bac3..fec2453c 100644 --- a/VariantAnnotation/Caches/TranscriptIntervalForestExtensions.cs +++ b/VariantAnnotation/Caches/TranscriptIntervalForestExtensions.cs @@ -1,14 +1,14 @@ -using Genome; -using Intervals; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.Intervals; - -namespace VariantAnnotation.Caches -{ - public static class TranscriptIntervalForestExtensions - { - public static ITranscript[] GetAllFlankingValues(this IIntervalForest transcriptIntervalForest, - IChromosomeInterval interval) => transcriptIntervalForest.GetAllOverlappingValues(interval.Chromosome.Index, - interval.Start - OverlapBehavior.FlankingLength, interval.End + OverlapBehavior.FlankingLength); - } -} +using Genome; +using Intervals; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.Intervals; + +namespace VariantAnnotation.Caches +{ + public static class TranscriptIntervalForestExtensions + { + public static ITranscript[] GetAllFlankingValues(this IIntervalForest transcriptIntervalForest, + IChromosomeInterval interval) => transcriptIntervalForest.GetAllOverlappingValues(interval.Chromosome.Index, + interval.Start - OverlapBehavior.FlankingLength, interval.End + OverlapBehavior.FlankingLength); + } +} diff --git a/VariantAnnotation/Caches/Utilities/ExonUtilities.cs b/VariantAnnotation/Caches/Utilities/ExonUtilities.cs index e79e4db6..d8e655a5 100644 --- a/VariantAnnotation/Caches/Utilities/ExonUtilities.cs +++ b/VariantAnnotation/Caches/Utilities/ExonUtilities.cs @@ -1,20 +1,20 @@ -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.Caches.Utilities -{ - public static class ExonUtilities - { - public static int GetTotalExonLength(ITranscriptRegion[] regions) - { - int totalExonLength = 0; - - foreach (var region in regions) - { - if (region.Type != TranscriptRegionType.Exon) continue; - totalExonLength += region.End - region.Start + 1; - } - - return totalExonLength; - } - } +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.Caches.Utilities +{ + public static class ExonUtilities + { + public static int GetTotalExonLength(ITranscriptRegion[] regions) + { + int totalExonLength = 0; + + foreach (var region in regions) + { + if (region.Type != TranscriptRegionType.Exon) continue; + totalExonLength += region.End - region.Start + 1; + } + + return totalExonLength; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Caches/Utilities/RnaEditUtilities.cs b/VariantAnnotation/Caches/Utilities/RnaEditUtilities.cs index 6dca1562..5091bcb9 100644 --- a/VariantAnnotation/Caches/Utilities/RnaEditUtilities.cs +++ b/VariantAnnotation/Caches/Utilities/RnaEditUtilities.cs @@ -1,5 +1,4 @@ using System; -using System.Linq; using VariantAnnotation.Interface.AnnotatedPositions; using Variants; @@ -19,7 +18,7 @@ public static VariantType GetRnaEditType(IRnaEdit rnaEdit) return VariantType.unknown; } - + public static void SetTypesAndSort(IRnaEdit[] rnaEdits) { if (rnaEdits == null) return; @@ -31,7 +30,5 @@ public static void SetTypesAndSort(IRnaEdit[] rnaEdits) Array.Sort(rnaEdits); } - - } } \ No newline at end of file diff --git a/VariantAnnotation/Caches/Utilities/TranscriptValidator.cs b/VariantAnnotation/Caches/Utilities/TranscriptValidator.cs deleted file mode 100644 index 53da35d0..00000000 --- a/VariantAnnotation/Caches/Utilities/TranscriptValidator.cs +++ /dev/null @@ -1,159 +0,0 @@ -using System.Collections.Generic; -using System.IO; -using Genome; -using VariantAnnotation.AnnotatedPositions.Transcript; -using VariantAnnotation.Caches.DataStructures; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.Providers; - -namespace VariantAnnotation.Caches.Utilities -{ - public static class TranscriptValidator - { - public static Dictionary TranscriptCdnas = new Dictionary() - { - {"NM_022148.2", "AATTCGGCACGAGGGCATGGGGCGGCTGGTTCTGCTGTGGGGAGCTGCCGTCTTTCTGCTGGGAGGCTGGATGGCTTTGGGGCAAGGAGGAGCAGCAGAAGGAGTACAGATTCAGATCATCTACTTCAATTTAGAAACCGTGCAGGTGACATGGAATGCCAGCAAATACTCCAGGACCAACCTGACTTTCCACTACAGATTCAACGGTGATGAGGCCTATGACCAGTGCACCAACTACCTTCTCCAGGAAGGTCACACTTCGGGGTGCCTCCTAGACGCAGAGCAGCGAGACGACATTCTCTATTTCTCCATCAGGAATGGGACGCACCCCGTTTTCACCGCAAGTCGCTGGATGGTTTATTACCTGAAACCCAGTTCCCCGAAGCACGTGAGATTTTCGTGGCATCAGGATGCAGTGACGGTGACGTGTTCTGACCTGTCCTACGGGGATCTCCTCTATGAGGTTCAGTACCGGAGCCCCTTCGACACCGAGTGGCAGTCCAAACAGGAAAATACCTGCAACGTCACCATAGAAGGCTTGGATGCCGAGAAGTGTTACTCTTTCTGGGTCAGGGTGAAGGCTATGGAGGATGTATATGGGCCAGACACATACCCAAGCGACTGGTCAGAGGTGACATGCTGGCAGAGAGGCGAGATTCGGGATGCCTGTGCAGAGACACCAACGCCTCCCAAACCAAAGCTGTCCAAATTTATTTTAATTTCCAGCCTGGCCATCCTTCTGATGGTGTCTCTCCTCCTTCTGTCTTTATGGAAATTATGGAGAGTGAAGAAGTTTCTCATTCCCAGCGTGCCAGACCCGAAATCCATCTTCCCCGGGCTCTTTGAGATACACCAAGGGAACTTCCAGGAGTGGATCACAGACACCCAGAACGTGGCCCACCTCCACAAGATGGCAGGTGCAGAGCAAGAAAGTGGCCCCGAGGAGCCCCTGGTAGTCCAGTTGGCCAAGACTGAAGCCGAGTCTCCCAGGATGCTGGACCCACAGACCGAGGAGAAAGAGGCCTCTGGGGGATCCCTCCAGCTTCCCCACCAGCCCCTCCAAGGCGGTGATGTGGTCACAATCGGGGGCTTCACCTTTGTGATGAATGACCGCTCCTACGTGGCGTTGTGATGGACACACCACTGTCAAAGTCAACGTCAGGATCCACGTTGACATTTAAAGACAGAGGGGACTGTCCCGGGGACTCCACACCACCATGGATGGGAAGTCTCCACGCCAATGATGGTAGGACTAGGAGACTCTGAAGACCCAGCCTCACCGCCTAATGCGGCCACTGCCCTGCTAACTTTCCCCCACATGAGTCTCTGTGTTCAAAGGCTTGATGGCAGATGGGAGCCAATTGCTCCAGGAGATTTACTCCCAGTTCCTTTTCGTGCCTGAACGTTGTCACATAAACCCCAAGGCAGCACGTCCAAAATGCTGTAAAACCATCTTCCCACTCTGTGAGTCCCCAGTTCCGTCCATGTACCTGTTCCATAGCATTGGATTCTCGGAGGATTTTTTGTCTGTTTTGAGACTCCAAACCACCTCTACCCCTACAAAAAAAAAAAAAAAAAA"}, - {"NM_012234.6", "AGTCTCGTCCGGAGACTGGCAGCGGCGGCGGCGGCGGCGGCCGGAGCTCGAGCCCCAGCGGCTGAGGGCGGGCGGGCGGGCGCGGGGGAGGGAGGGGGGCCGGTCCGCGACGACTCCCCGGACGGCGTTTCTCCTCCGAGCGGCGCCGGTTTCGGCTTGGGGGGGGCGGGGTACAGCCCATCCATGACCATGGGCGACAAGAAGAGCCCGACCAGGCCAAAAAGACAAGCGAAACCTGCCGCAGACGAAGGGTTTTGGGATTGTAGCGTCTGCACCTTCAGAAACAGTGCTGAAGCCTTTAAATGCAGCATCTGCGATGTGAGGAAAGGCACCTCCACCAGAAAACCTCGGATCAATTCTCAGCTGGTGGCACAACAAGTGGCACAACAGTATGCCACCCCACCACCCCCTAAAAAGGAGAAGAAGGAGAAAGTTGAAAAGCAGGACAAAGAGAAACCTGAGAAAGACAAGGAAATTAGTCCTAGTGTTACCAAGAAAAATACCAACAAGAAAACCAAACCAAAGTCTGACATTCTGAAAGATCCTCCTAGTGAAGCAAACAGCATACAGTCTGCAAATGCTACAACAAAGACCAGCGAAACAAATCACACCTCAAGGCCCCGGCTGAAAAACGTGGACAGGAGCACTGCACAGCAGTTGGCAGTAACTGTGGGCAACGTCACCGTCATTATCACAGACTTTAAGGAAAAGACTCGCTCCTCATCGACATCCTCATCCACAGTGACCTCCAGTGCAGGGTCAGAACAGCAGAACCAGAGCAGCTCGGGGTCAGAGAGCACAGACAAGGGCTCCTCCCGTTCCTCCACGCCAAAGGGCGACATGTCAGCAGTCAATGATGAATCTTTCTGAAATTGCACATGGAATTGTGAAAACTATGAATCAGGGTATGAAATTCAAAACCTCCACCTGCCCATGCTGCTTGCATCCCTGGAGAATCTTCTGTGGACATCGACCTCTTAGTGATGCTGCCAGGATAATTTCTGCTTGCCATGGGCATCTGGCCACCAAGGAATTTCGCACCCTGACGATTACTCTTGACACTTTTATGTATTCCATTGTTTTATATGATTTTCCTAACAATCATTTATAATTGGATGTGCTCCTGAATCTACTTTTTATAAAAAAAAAAAAATCTGCTGTGCACAATTTTCCATGTACATTACAACTGGTTTTTTGTTTTTGTTTTGTTGCCGGTGGGGAGGGCTGGGAGGGGGAGGGAACTTTTATTTATTGTGTTCACAAACTCCATCCTTTCAGCATATCCTTTTAAGTTTAGTTCTTTCTTCCAGTTATACTATGTACTATCAGTTTTGATATAACTATATATATATAAATATAAAATTATATATAAAGGGTTATTTGAAACCAATCCATGGCAACGCTGGTGCTTGATACACTGTGAAGTGAATACAACATTGAACAGTTACAGATCTGGGACAGTCCCTTCTATGAAAGTGCTGAAATTTAATTAAAATCAGTCTTATATGAAGTATGTTCCAATCCATGTGGGAACTTGACTCTCTCATCTGTCTAAAGAGTACTGGACGATATAAAAATATATATTTTTTAAACAATGTGATCTCAAATTTAAAGACTGCTCCAGATAGCCTGCATTTGCAATGGAATAACTGACAAATCACAAGTGGTTTAGTTGGGCAGGGCTTTGATCATTCAAAAGTAACTAAAGTAGCTCCAGAATGCCAAGTATTCGTGTAAATTACGGTTACATGTTATCATTTGCTGTTCTTACATAAGCACTCATGAAAATATGGTATTCTGTAACTTGAATTCCATCCATTTTCCAGACCTCTACTCATGTCTGAGGTAAATCTAGAAATTGTCTTAGTTTTAGGATTGAAACAGTCTATAAACTGTATTTTTGGTCCATCCAGGAAGCTAGTCCCTTGTTTCTCCTTTCTACATGACATTGCAGTGGTGGTTTCTGTAATTAAAATTTGTTTGCCTCATGTCCCTTTGTCTGATAAACCTTCACTCTACCGATTCAGTTGTGAGCATTCTTTTTTTCCTTCTCAAAACCTACTATGATTTGTTTTACTGAACAAAGGTTATCAACCACACATCCAGTCCTGACATGGAGCTTTTCAGTGTTTGGAGACATTTCTCAATCCCCTGCTGTGGTAGGAACTCCAGTGGTGAACGGCTTGCGCGCCTGCAGCCAGAGTTGCAGGGAAAGCTCGTACTTACTGCGAGCAGCATGTAATCTTTTTTCTTCCTGGACATAAAGATAGCTTGAGTAAACTGTTCTATTTCATTCTCTTCACTCTTTTTACTGTCTTGCAAAAAAAAAAAAAAAAAAAATAATCAAAGACCACTAATAAGATTCCACCTCTCCTTATTAAAATAATTTTTTAAAATTTTGTTTTGCTTTTGTTTGGATGTGGGGTCTCTCTTCTATTTGACTTTTACATTTAGATACAGAGTTTGTAGTACTTCAGAGACATTTCAAGCATGAGAATTTGAGGTTACCTCTCTTTATTTGACCTTTAGGGACTCACGGGAGGGCAGCCTGATTTGTAATGAAGCACCACATTTTGGTGTTAAAAACCTGGTTTGCTTAATAATAGCAGTAATTTCTGTCTGTGGAGGCAACAAATAAAAAAATTAACAGCTTGAATTGAGTAGCCAACAGGAAAGGTTCCTTTCACATTTACATTAAAACTATTCTGTAGTCACTAATGTACCATAATTTAAATTCTTTTCTCAAAGGTATAGATTATAAAGCAGTGCCATTTGTTGCTGTGGTCCTATTCTCAAATGCATGGACAATGTTCCCCCCTTTTTAAAATAATGCTTGTGTCTGGGATGCAAGCTTTGCTTATCTTTTTAAATACATTTTTAAAGTATTTATTAATGAACCAAAGGAAATCAGATGCTTTCTATAAGCATCAGAATATATAATACATAGTGATTTGACTATGAATTTTAAATCCACATTTTAATATTGGTGGGATATTGCAAAGACATTCCTTCTAAAGTTTTAATATTCCTTTTATTAAGGGTCTCAGGGAGGGTAAATTAGTCAGCCATATTTATTTTCCAGAGGTTTAAGAAATTGCTGTTTTTAACTTTTTGAAAAAACTTAAATGCCACCAAACTCATGTAGGTTGCACTGCTTATTGAACCAATAACTGTTGGTATGCACTTTGTTCAGACACACTGTGTACTTTTTCAAAAACTAGTTTCATGTAAAGTGATTGGACCCCATAGATTAGTGGAAAAAGCTGATTAACCAGCTACTCATAGGCTGCTAATTCATTCATGCCAATGTTTTGGTTTTTCAGTTTTGCCTCCGTGATAAATTAAAGAATGGGGAGGGGTGAAGGAAGGGGAAGAAGATTGCTTTAGAACAAGTGGCATGAAATTACCATCTTTGTAGAAACCGCAGCTAACAGTGGGAGTTATCTAAGCAATCAGATGTTACAGGGCCAGCCCTTTAGCTGCTGTGGTGTATTCTGTTGGGTAGTGAGGTAGTAGGTACTTTATAGACTTTTAATTTTGGAAATTGATGACATCCCTCAGGCATGTATTCTGGAAATGGAATTCCTGTAACTTCCTGTGTCTGCAGTATGCCCTACAATTAGTAGGCAGCGTGTAAAAACACTAGTGTAGATTATAAAGATATACATTAAAAGAGGACCAGAAATACTTGGTATTCAGTGGCACAGAAAGCAGGTTAAACAAACAAAAAGCACAGTGTTACGCTTGCAAGTTTCCATTTGTTTTAATACCACGCAATCTTTCACACTCGTGCGTGTGCGCGCACACAGAGCTTACCTGACTTGCTCTGCTTGAGTCATGCAGTTACAAAAAAAAAGACATCTTGACACCCACACAATATTCTAATCAAAACCTTTCAGTTTCAATCTGGATATTTAAAAACATTGGCAGAAGCTTCTGTGAGTTTAGTTCCACTAAGATGTTTCACCTGCCTTATCAAGACCATTCTCAGTCTACTTTTTTAAGCTACCGTATCTTAAATTATTGAAAATTTATTAATTGCTGAATATATAATAACCTTTGCTTGTATGTAACCGAAAATGGTTTAAGAGCCAACATTTAGAGTATGACAATGGAGCTGAACAGTTTTTAATGCGCAAGCAGTTCTGTTCTTGTGTATGACTTGTAACCTTAATTTACTGTGTAAAGATGGTTACATTATTTCCTTAGCTTTGTTTGTTGGAGACAAATAGAGAATGCTTGTTAAGTATGTCAAAACAATCTTATCTTGTGAATTTTTGTTAATGTATTATACGAGCTATATTTTTCATTTGCCCAGAAAGACAGCTTGTATAACGCTTTTGGAAGTTTCTGCTCTGTAATGTCTTTAGAGCTGACAGTCTGTTAGGTTTGTTTTTTTCTTCATGCTAAAGTGTCAGTTGGTGGTTTTGTGAACTGGTCAAAAATTCACAGGTCTTAAATGTTTTGGGGGAAATTTATATTGGACACTGCTCTTTGTCTAGCAAATAAAAGATGTTAATATATTCCTGTTACTGGCATGTGCACGACTATGTTATTAGAAGCCACTTTATCATTTTCCTGCTTTAAATAGAAATGTCTATTTATGAATTCTGCTTGTAGTTTTTTCACAAATAAAATAGTAAAATTTCCATTGGAAATCTTAAAAAAAAAAAAAAAA"}, - {"NM_001145076.1", "GGCGCGGCGCTCGCGGCTGCTGCCTGGGAGGGAGGCCGGGCAGGCGGCTGAGCGGCGCGGCTCTCAACGTGACGGGGAAGTGGTTCGGGCGGCCGCGGCTTACTACCCCAGGGCGAACGGACGGACGACGGAGGCGGGAGCCGGTAGCCGAGCCGGGCGACCTAGAGAACGAGCGGGTCAGGCTCAGCGTCGGCCACTCTGTCGGTCCGCTGAATGAAGTGCCCGCCCCTCTAAGCCCGGAGCCCGGCGCTTTCCCCGCAAGATGGACGGTTTCGCCGGCAGTCTCGATGATAGTATTTCTGCTGCAAGTACTTCTGATGTTCAAGATCGCCTGTCAGCTCTTGAGTCACGAGTTCAGCAACAAGAAGATGAAATCACTGTGCTAAAGGCGGCTTTGGCTGATGTTTTGAGGCGTCTTGCAATCTCTGAAGATCATGTGGCCTCAGTGAAAAAATCAGTCTCAAGTAAAGGCCAACCAAGCCCTCGAGCAGTTATTCCCATGTCCTGTATAACCAATGGAAGTGGTGCAAACAGAAAACCAAGTCATACCAGTGCTGTCTCAATTGCAGGAAAAGAAACTCTTTCATCTGCTGCTAAAAGCATAAAACGACCATCACCAGCTGAAAAGTCACATAATTCTTGGGAAAATTCAGATGATAGCCGTAATAAATTGTCGAAAATACCTTCAACACCCAAATTAATACCAAAAGTTACCAAAACTGCAGACAAGCATAAAGATGTCATCATCAACCAAGAAGGAGAATATATTAAAATGTTTATGCGCGGTCGGCCAATTACCATGTTCATTCCTTCCGATGTTGACAACTATGATGACATCAGAACGGAACTGCCTCCTGAGAAGCTCAAACTGGAGTGGGCATATGGTTATCGAGGAAAGGACTGTAGAGCTAATGTTTACCTTCTTCCGACCGGGGAAATAGTTTATTTCATTGCATCAGTAGTAGTACTATTTAATTATGAGGAGAGAACTCAGCGACACTACCTGGGCCATACAGACTGTGTGAAATGCCTTGCTATACATCCTGACAAAATTAGGATTGCAACTGGACAGATAGCTGGCGTGGATAAAGATGGAAGGCCTCTACAACCCCACGTCAGAGTGTGGGATTCTGTTACTCTATCCACACTGCAGATTATTGGACTTGGCACTTTTGAGCGTGGAGTAGGATGCCTGGATTTTTCAAAAGCAGATTCAGGTGTTCATTTATGTGTTATTGATGACTCCAATGAGCATATGCTTACTGTATGGGACTGGCAGAAGAAAGCAAAAGGAGCAGAAATAAAGACAACAAATGAAGTTGTTTTGGCTGTGGAGTTTCACCCAACAGATGCAAATACCATAATTACATGCGGTAAATCTCATATTTTCTTCTGGACCTGGAGCGGCAATTCACTAACAAGAAAACAGGGAATTTTTGGGAAATATGAAAAGCCAAAATTTGTGCAGTGTTTAGCATTCTTGGGGAATGGAGATGTTCTTACTGGAGACTCAGGTGGAGTCATGCTTATATGGAGCAAAACTACTGTAGAGCCCACACCTGGGAAAGGACCTAAAGGTGTATATCAAATCAGCAAACAAATCAAAGCTCATGATGGCAGTGTGTTCACACTTTGTCAGATGAGAAATGGGATGTTATTAACTGGAGGAGGGAAAGACAGAAAAATAATTCTGTGGGATCATGATCTGAATCCTGAAAGAGAAATAGAGGTTCCTGATCAGTATGGCACAATCAGAGCTGTAGCAGAAGGAAAGGCAGATCAATTTTTAGTAGGCACATCACGAAACTTTATTTTACGAGGAACATTTAATGATGGCTTCCAAATAGAAGTACAGGGTCATACAGATGAGCTTTGGGGTCTTGCCACACATCCCTTCAAAGATTTGCTCTTGACATGTGCTCAGGACAGGCAGGTGTGCCTGTGGAACTCAATGGAACACAGGCTGGAATGGACCAGGCTGGTAGATGAACCAGGACACTGTGCAGATTTTCATCCAAGTGGCACAGTGGTGGCCATAGGAACGCACTCAGGCAGGTGGTTTGTTCTGGATGCAGAAACCAGAGATCTAGTTTCTATCCACACAGACGGGAATGAACAGCTCTCTGTGATGCGCTACTCAATAGATGGTACCTTCCTGGCTGTAGGATCTCATGACAACTTTATTTACCTCTATGTAGTCTCTGAAAATGGAAGAAAATATAGCAGATATGGAAGGTGCACTGGACATTCCAGCTACATCACACACCTTGACTGGTCCCCAGACAACAAGTATATAATGTCTAACTCGGGAGACTATGAAATATTGTACTGGGACATTCCAAATGGCTGCAAACTAATCAGGAATCGATCGGATTGTAAGGACATTGATTGGACGACATATACCTGTGTGCTAGGATTTCAAGTATTTGGTGTCTGGCCAGAAGGATCTGATGGGACAGATATCAATGCACTGGTGCGATCCCACAATAGAAAGGTGATAGCTGTTGCCGATGACTTTTGTAAAGTCCATCTGTTTCAGTATCCCTGCTCCAAAGCAAAGGCTCCCAGTCACAAGTACAGTGCCCACAGCAGCCATGTCACCAATGTCAGTTTTACTCACAATGACAGTCACCTGATATCAACTGGTGGAAAAGACATGAGCATCATTCAGTGGAAACTTGTGGAAAAGTTATCTTTGCCTCAGAATGAGACTGTAGCGGATACTACTCTAACCAAAGCCCCCGTCTCTTCCACTGAAAGTGTCATCCAATCTAATACTCCCACACCGCCTCCTTCTCAGCCCTTAAATGAGACAGCTGAAGAGGAAAGTAGAATAAGCAGTTCTCCCACACTTCTGGAGAACAGCCTGGAACAAACTGTGGAGCCAAGTGAAGACCACAGCGAGGAGGAGAGTGAAGAGGGCAGCGGAGACCTTGGTGAGCCTCTTTATGAAGAGCCATGCAACGAGATAAGCAAGGAGCAGGCCAAAGCCACCCTTCTGGAGGACCAGCAAGACCCTTCGCCCTCGTCCTAACACCCTGGCTTCAGTGCAACTCTTTTCCTTCAGCTGCATGTGATTTTGTGATAAAGTTCAGGTAACAGGATGGGCAGTGATGGAGAATCACTGTTGATTGAGATTTTGGTTTCCATGTGATTTGTTTTCTTCAATAGTCTTATTTTCAGTCTCTCAAATACAGCCAACTTAAAGTTTTAGTTTGGTGTTTATTGAAAATTAACCAAACTTAATACTAGGAGAAGACTGAATCATTAATGATGTCTCACAAATTACTGTGTACCTAAGTGGTGTGATGTAAATACTGGAAACAAAAACAGCAGTTGCATTGATTTTGAAAACAAACCCCCTTGTTATCTGAACATGTTTTCTTCAGGAACAACCAGAGGTATCACAAACACTGTTACTCATCTACTGGCTCAGACTGTACTACTTTTTTTTTTTTTTTTCCTGAAAAAGAAACCAGAAAAAAATGTACTCTTACTGAGATACCCTCTCACCCCAAATGTGTAATGGAAAATTTTTAATTAAGAAAAACTTCAGTTTTGCCAAGTGCAATGGTGTTGCCTTCTTTAAAAAATGCCGTTTTCTTACACTACCAGTGGATGTCCAGACATGCTCTTAGTCTACTAGAGAGGTGCTGCCTTTTCTAAGTCATAATGAGGAACAGTCCCTTAATTTCTTGTGTGCAACTCTGTTTTATCCTAGAACTAAGAGAGCATTGGTTTGTTAAAGAGCTTTCAATGTATATTAAAACCTTCAATACTCAGAAATGATGGATTCCTCCAAGGAGTCCTTTACTAGCCTAAACATTCTCAAATGTTTGAGATTCAAGTGAATGGAAGGAAAACCACATGCCTTTAAAACTAAACTGTAATAATTACCTGGCTAATTTCAGCTAAGCCTTCATCATAATTTGTTCCCTCAGTAATAGGAGAAATATAAATACAGTAAGTTTAGATTATTGAATTGGTGCTTGAAATTTATTGGTTTTGTTGTAATTTTATACAGATTATATGAGGGATAAGATACTCATCAAATTGCAAATTCTTTTTTTTACAGAAGTGTGGGTAACAGTCACAGCAGTTTTTTTTACCAACAGCATACTTAACAGACTTGCTGTGTAGCAGTTTTTTTCTGGTGGAGTTGCTGTAAGTCTTGTAAGTCTAATGTGGCTATCCTACTCTTTTGGGCAATGCATGTATTATGCATTGGAAAGGTATTTTTTTTAAGTTCTGTTGGCTAGCTATGGTTTTCAGTACATTTCCTACTTTAAGAGTAATTACTGACAAATATGTATTTCCTATATGTTTATACTTTGATTATAAAAAAGTATTTTGTTTTGATTTTTTAACTTGCTGCATTGTTTTGATACTTTCTATTTTTTTGGTCAAATCATGTTTAGAAACTTTGGATGAGTTAAGAAGTCTTAAGTATGCAGGCGTTTACGTGATTGTGCCATTCCAAAGTGCATCAGAACTGTCATTCCCTTCTAATATCTTCTCAGGAGTAATACAAATCAGGTATTTCATCATCATTTGGTAATATGAAAACTCCAGTGAACTCCCAAGGACATTTACAACATTTATATTCACACGCTGTATGGAAGGGTGTGGGTGTGTGTGAAGGGGCGAGTGGAGACACTGTGTGTATCTCTAGATAAGAAGATATGCACCACGTTGAAAATACTCAGTGTAGATCTCTATGTGTATAGGTATCTGTATATCTTTCCTTTTGTTTACAACTGTTAAAAAACCTCAAAATAGTTCTCTTCAAAAGAAGAGAGATTCCAAGCAACCCATCTTTCTTCAGTATGTATGTTCTGTACATACTTATCGGAGCGCGCCAGTAAGTATCAGGCATATATATCTGTCTGTTAGCAATGATTATTACATCATCAGATCAGCATGTGCTATACTCCCTGCAAGAAATATACTGACATGAACAGGCAGTTCTTGGAGAAGAAAGAGCATTTCTTTAAGTACCTGGGGAATACAGCTCTCAGTGATCAGCAGGGAGTTTATTTGAGGACATCAGTCACCTTTGGGGTTGCCATGTACAATGAGATTTATAATCATGATACTCTTCGGTGGTAGTTTCAAAAGACACTACTAATACGCAGGAAGCGTTCCAGCTATTTAATGCTGGCAACTACTGTTTAATGGTCAGTTAAATCTGTGATAATGGTTGGAAGTGGGTGGGGTTATGAAATTGTAGATGTTTTTAGAAAAACTTGTGAATGAAAATGAATCCAAGTGTTTCATGTGAAGATGTTGAGCCATTGCTATCATGCATTCCTGTCTCATGGCAGAAAATTTTGAAGATTAAAAAATAAAATAATCAAAATGTTTCCTCTTTCTAAAAAAAAAAAAAAAAAA"}, - {"NM_001220773.1", "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGTTGGTAAACCTCACAAATGTGGATATTGTGGCCGAAGCTATAAACAGCGAAGCTCTTTAGAGGAACATAAAGAGCGCTGCCACAACTACTTGGAAAGCATGGGCCTTCCGGGCACACTGTACCCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"}, - {"NM_001220770.1", "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"}, - {"NM_001220767.1", "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"}, - {"NM_152756.3", "GGGTTGTGACTGAAACCCGTCAATATGGCGGCGATCGGCCGCGGCCGCTCTCTGAAGAACCTCCGAGTACGAGGGCGGAATGACAGCGGCGAGGAGAACGTCCCGCTGGATCTGACCCGAGAACCTTCTGATAACTTAAGAGAGATTCTCCAAAATGTGGCCAGATTGCAGGGAGTATCAAATATGAGAAAGCTAGGCCATCTGAATAACTTTACTAAGCTTCTTTGTGATATTGGCCACAGTGAAGAAAAACTGGGCTTTCACTATGAGGATATCATAATTTGTTTGCGGTTAGCTTTATTAAATGAAGCAAAAGAAGTGCGAGCAGCAGGGCTACGAGCGCTTCGATATCTCATCCAAGACTCCAGTATTCTCCAGAAGGTGCTAAAATTGAAAGTGGACTATTTAATAGCTAGGTGCATTGACATACAACAGAGCAACGAGGTAGAGAGGACACAAGCACTTCGATTAGTCAGAAAGATGATTACTGTGAATGCTTCCTTGTTTCCTAGTTCTGTGACCAACTCATTAATTGCAGTTGGAAATGATGGACTTCAAGAAAGAGACAGAATGGTCCGAGCATGCATTGCCATTATCTGTGAACTAGCACTTCAGAATCCAGAGGTGGTGGCCCTTCGAGGTGGACTAAACACCATCTTGAAAAATGTGATCGATTGCCAATTAAGTCGAATAAATGAGGCCCTAATTACTACAATTTTGCACCTTCTTAATCATCCAAAGACTCGACAGTATGTGCGAGCTGATGTAGAATTAGAGAGAATTTTAGCACCCTATACTGATTTTCACTACAGACATAGTCCAGATACAGCTGAAGGACAGCTCAAAGAAGACAGAGAAGCACGATTTCTAGCCAGTAAAATGGGAATCATAGCAACATTCCGATCATGGGCAGGTATTATTAATTTATGTAAACCTGGAAATTCTGGGATCCAGTCTCTAATAGGAGTACTTTGCATACCAAATATGGAAATAAGGCGAGGTCTACTTGAAGTGCTTTATGATATATTTCGTCTTCCTCTACCTGTTGTGACTGAGGAGTTCATAGAAGCACTACTCAGTGTAGATCCAGGGAGGTTCCAAGACAGTTGGAGGCTTTCAGATGGCTTTGTGGCAGCTGAGGCAAAAACTATTCTTCCTCATCGTGCCAGATCCAGGCCAGACCTCATGGATAATTATTTGGCACTGATACTCTCTGCATTTATTCGTAATGGACTTTTAGAGGGTCTAGTTGAAGTGATAACAAACAGTGATGATCATATCTCAGTTAGAGCTACCATCCTTTTAGGAGAGCTTTTACATATGGCAAACACAATTCTTCCTCATTCACATAGCCATCATTTACACTGCTTGCCAACCCTAATGAATATGGCTGCATCCTTTGATATCCCCAAGGAAAAGAGACTGCGAGCCAGTGCAGCCTTGAACTGTTTAAAACGCTTCCATGAAATGAAGAAACGAGGACCTAAGCCTTATAGTCTTCATTTAGACCACATTATTCAGAAAGCAATTGCAACACACCAGAAACGGGATCAGTATCTCCGAGTTCAGAAAGATATATTTATCCTTAAGGATACAGAGGAAGCTCTTTTAATTAACCTTAGAGATAGCCAAGTCCTTCAACATAAAGAGAATCTTGAATGGAATTGGAATCTTATAGGGACCATTCTTAAGTGGCCAAATGTAAATCTAAGAAACTATAAAGATGAACAGTTACACAGGTTTGTACGAAGACTACTTTATTTTTACAAGCCCAGCAGTAAATTATATGCCAACCTGGATCTGGATTTTGCCAAGGCCAAACAGCTCACGGTTGTAGGTTGCCAGTTTACAGAATTTCTTCTTGAATCTGAAGAGGATGGGCAAGGCTACTTAGAAGATCTAGTAAAGGATATTGTTCAGTGGCTCAATGCTTCATCTGGAATGAAACCCGAAAGAAGTCTTCAAAATAATGGTTTATTGACCACCCTTAGTCAACACTACTTTTTATTTATTGGAACACTTTCTTGCCACCCTCATGGAGTTAAAATGCTGGAAAAATGCAGTGTATTTCAGTGTCTCCTTAATCTTTGCTCCTTGAAAAACCAAGATCACTTGCTAAAACTTACTGTTTCTAGCTTGGACTATAGCAGAGATGGATTGGCTAGAGTCATCCTTTCCAAAATTTTAACTGCAGCTACTGATGCCTGCAGACTCTATGCAACAAAACATTTAAGGGTATTATTGAGAGCTAATGTTGAATTCTTTAATAATTGGGGAATTGAGTTGTTAGTGACCCAGCTACATGATAAAAACAAAACGATTTCCTCTGAAGCTCTTGATATCCTCGATGAAGCATGTGAAGACAAGGCCAATCTTCATGCTCTCATTCAGATGAAACCAGCGTTATCCCACCTTGGAGACAAGGGTTTGCTTCTCCTGCTGAGATTTCTCTCCATTCCAAAAGGATTTTCCTATCTGAATGAAAGAGGTTATGTAGCAAAACAATTGGAAAAGTGGCACAGGGAATACAACTCCAAATATGTTGACTTGATTGAGGAACAACTCAATGAAGCACTTACTACTTACCGGAAGCCTGTTGATGGTGATAACTATGTTCGTCGGAGTAACCAAAGATTACAGCGTCCTCACGTCTACCTGCCTATACACCTTTATGGACAACTAGTACACCATAAAACAGGCTGCCATTTGTTGGAAGTACAGAATATTATTACAGAACTCTGTCGTAATGTTCGTACACCAGATTTGGATAAGTGGGAAGAAATTAAAAAACTGAAAGCATCTCTTTGGGCCTTGGGAAATATCGGCTCATCAAATTGGGGTCTCAATTTGCTACAGGAAGAAAACGTGATTCCAGATATACTAAAACTTGCAAAACAGTGTGAAGTTCTTTCCATCAGAGGGACCTGTGTATATGTACTTGGGCTCATAGCTAAAACCAAACAAGGCTGTGATATTCTAAAATGTCACAACTGGGATGCTGTGAGGCATAGTCGCAAACATCTGTGGCCAGTGGTTCCAGATGATGTGGAACAACTCTGTAATGAACTTTCATCTATCCCAAGCACTCTAAGTTTGAACTCGGAGTCAACCAGCTCTAGACATAATAGTGAAAGTGAATCTGTGCCATCGAGTATGTTCATATTGGAGGATGACCGGTTTGGCAGCAGCTCTACTAGTACATTTTTCCTTGATATCAATGAAGATACAGAGCCAACATTTTATGACCGATCTGGACCCATAAAGGATAAAAATTCATTCCCTTTCTTTGCTTCTAGTAAACTTGTGAAGAATCGTATCTTAAATTCGCTTACTTTGCCTAACAAAAAACATCGTAGTAGCAGTGATCCAAAAGGAGGGAAATTATCATCTGAAAGTAAGACAAGCAACAGGCGAATCAGAACACTTACGGAGCCCAGTGTTGATTTTAATCATAGTGATGATTTTACACCCATATCCACTGTACAGAAAACATTACAATTAGAGACTTCATTTATGGGGAATAAGCACATTGAAGACACTGGTAGTACACCAAGCATTGGAGAAAATGACTTAAAATTCACCAAGAATTTTGGTACAGAGAATCACAGAGAAAATACAAGCCGAGAGAGGTTAGTAGTAGAAAGTTCAACGAGCTCACATATGAAGATACGTAGCCAAAGTTTCAATACAGACACTACAACAAGTGGCATAAGTTCAATGAGCTCAAGTCCTTCACGAGAGACAGTAGGTGTAGATGCTACAACTATGGACACAGACTGTGGAAGCATGAGTACTGTGGTAAGTACTAAAACTATTAAGACAAGCCACTATTTGACGCCACAGTCTAACCATCTGTCTCTCTCCAAATCAAATTCGGTGTCCCTGGTGCCTCCAGGTTCTTCTCATACGCTTCCTAGAAGAGCACAGTCCCTTAAAGCACCCTCTATTGCTACAATTAAAAGTCTAGCAGATTGTAACTTTAGTTACACAAGTTCTAGAGATGCTTTTGGCTATGCTACACTGAAAAGACTACAGCAACAAAGAATGCATCCATCCTTATCTCACTCTGAAGCTTTGGCATCTCCAGCAAAAGATGTGCTATTTACTGATACCATCACCATGAAGGCCAACAGTTTTGAGTCCAGATTAACACCAAGCAGGTTCATGAAAGCCTTAAGTTATGCATCATTAGATAAAGAAGATTTATTGAGTCCTATTAATCAAAATACCCTGCAACGATCTTCCTCAGTGCGGTCCATGGTGTCCAGTGCCACATATGGGGGTTCAGATGATTACATTGGTCTTGCTCTCCCGGTGGATATAAATGATATATTCCAGGTAAAGGATATTCCCTATTTTCAGACAAAAAACATACCACCACATGATGATCGAGGTGCAAGAGCATTTGCCCATGATGCAGGAGGTCTTCCATCTGGAACTGGAGGTCTTGTAAAAAATTCTTTTCACTTGCTACGACAGCAGATGAGTCTTACGGAAATAATGAATTCAATCCATTCAGATGCCTCTCTGTTTTTAGAAAGTACAGAAGACACTGGACTACAGGAACATACAGATGATAACTGCCTTTATTGTGTCTGTATTGAAATTCTGGGTTTCCAGCCCAGCAACCAACTGAGTGCAATATGTAGTCATTCAGACTTTCAAGATATTCCATATTCTGATTGGTGTGAGCAGACTATCCATAATCCTTTAGAAGTGGTTCCCTCTAAGTTTTCGGGGATTTCTGGATGCAGTGATGGGGTGTCTCAAGAAGGCTCAGCTAGCAGCACCAAAAGCACAGAATTGTTACTAGGTGTTAAAACAATTCCAGATGATACACCAATGTGCCGTATACTCCTTCGCAAAGAAGTTCTAAGATTAGTCATTAATTTGAGTAGTTCAGTTTCAACTAAATGTCATGAGACTGGGCTTTTAACAATTAAGGAGAAGTATCCTCAAACATTTGATGACATATGCCTTTACTCTGAGGTTTCCCATTTGCTGTCACACTGCACATTCAGACTTCCGTGTCGGAGGTTCATACAAGAATTATTTCAAGATGTACAGTTTCTACAAATGCATGAAGAAGCAGAGGCTGTGTTGGCAACACCACCAAAGCAACCTATAGTTGATACATCTGCTGAATCCTGACCTCATATTTATGATGGATATAGATACATACTATATATATTCATATTTGTGGATTTCCTAAAAGCCTCAGAAAATACGACTGACTAGGCAGCAAAGACAGGAGTATCTTCTGTACACTGTTCCGCAGTTACTGGTACATGAACAGTTGGAACTGCTGACTTTCCTAACCAAAACAACTTCCTTCTCTCCTTTGTTGAGCCTTTTGAGGGGTTCATGATTCATTACCACAGTTTTAAGAGTTTCAGTTACCATTGTATGCAAGAGCCAAGCACTGAATACCTACATAGGTTTTCTATTTTCTTTCATTTTAAAAGCATAATGACAGTGGAACAATAATGGGATATGCAGAAGCACCCTTCACAAGTTATTTCTGAATGATTTTTAGGGTAAATAATACAGATGCCTTGTTTGTTAACTAACTTGTGGAAAGCAGGAATCAGTGTCTCTAAGGCTGCATCCTATTACCACAATGGGGTGTGCTATAACTGCTGGTATTAGAGAGGGAACTTTGGCCCTTTCACGTTTTTCTTAATGTTTGTAACACTACTTCAGAGGTTTATAACCTCAAAGCAGAAGAAGAGCCTCAACAACCCGGGACTTATAAGTTATTTTTATGTTACTAGACTTGCATAAAGATTCTTGTTTTCCAACTCTTCATTTTGTTGCAATGTGTTATTACAGGATATATGAACCAATTAAGGTTTTTCACTACAGTTCTTGAATAAAATTTAAAAATCATTTTTTATTTTAATTAAAAATATTTCCCATTTATAGAATGCATATATTTGCAATGGACTTCCACTTTCATCAACTTTCCATCTCATCGCTTTAAACAGGAACTTGAACAAGCACTGTTAGTTTAGACCTAAAGGATAGGAAAGCATTAAATAATACTTTGGATCTCCTGAGGAAAAGATAAGTTTGCTTGCAATTTACACATTCCATGGGGAAAGAAGAGCCATATTTCCTTAAAAAAAACATTAATAAAGCTTGTTATTGAGAAAAATTGTAGTGAAAAGCCTTAAGTACCAAATTTTAAAGCAGCAGTAACTTAATTTTTATATCAGTGTTTTTGTTTTGCACAAACTAAATGCAGTGGTAGGTGGGTTTATGAGTATATTAATTGCCTTTATCCATTTGTGAAGTTAAGTTGATGAGGGCAAGGTTTTTGTTTGTTTAATTTGTATATGTCTAAAGGTATTTGGAACTTTTTACAGGAATTAAACATATATGCAAATTTGTATATAAAAATAGCATGGCCATCATTTGAATGCTTGTAAATGAAAGGATTATCTTTTTTGAGATCTATATATAAATAGAAATAGAAAATCCAGCTGGACTGATTAGGATTCTTTTTTAATTCATTTGTGTATAACATTTTTATTACAATTACACATCAGTTTTGACACAGTCATAGCAACATTAATATTTTCCCATGATGCAGATCCTTTTTGTAATGGGCTTGTTCTTTGAGATCTCTGTAAAGAACCCTGTGAACTAGAAAACATAACTCACAGAGATACTTTTTTAAAAAATTTATTTACTGGAACTGAAAGTTCCAGTTGGGATGAAGCATTTCATCTCACTTCATAACACCTCTTTGACTGCACTTCAGTGAATTGTTCTTATGTGCACTGTGTAGCAACTTACATTATAACAAAGCAGATAAGGGCTGTAAGCTGCTGCTTATGTTGAAAAGTGGTTCTTCAGATTTTCTCTCATAAAATCCAGTTGAAGATAAATAATTTTTTTATACTTTATCACTGAACCCAAGTGTTTATTTAAATGTCAACAGTACTTCTAAGAACGTTGCCTGTCATCGTGGTCTTTGGTCTTGGATAACTAAACTGCCTTTCCAGAGAACCAAATGTCAGAGTTACTAGACCAAATAGTGGTTAAAACCTCCAAAGGAAGTAATGTAATCTTATTCATAATGGGATTAACATATTTTAGACATTCATTTTAAACACTACCTCAGTTAATATAGAGTATAAAAATCTGTGGTTTAATCCCTCAAAAGTTAACAGTAATTTTTTTTTTGTCTTACACACACACACACCCCCTCCCCCACCATCACTATCCCTGTACCCTCACCTTGGTCATCTATCCTGAAATAAGGCTTAGTTAGTATTGGCCTGAATGTTTTGTGTTTTTTTTTTTGTTTTTTTTTTTTACTGTTACTTTGAAAAATATGTATGTATACCTTATCATATCTGCCTATATCACTTACTTTGGGGAGATACTCAGAGCTTTGTGGTTATCAGTATACTAAAAAAAAAAAAAAGTCTACGCTTAAATTTATAGTGCTATTTGGTTTCTCCATGATTTCACTGACAGGTCTAATACATTTTCTTTGAGTACTTGTTTGTAAAAAGTAGACTTTATGGTGAAAAATACATGCAGTGCCAAGTGATTAACTTAAGTGTTTAAAAATATTAAATTATAGCAGAAGAGGTTAGGAATGATATCAGCAGTAATAGAAATAATTGAGAAAATCATCTATAAATAATAGATATTACAGACTATAGAATACCAAAATAATGTCAATACTGTAGTTTTTAAAGATTTTAGGATTAATCTTAGTCCATATAAATTTGTACTATTGGTAATTATTGAATAATTGGGAGGAATCTGGGCAGTTGTGCTGGTTGTAAACTATGAATTTCTAATCGTAAAGTGAATTGTTATTTCTAATTGAACTTTTTTTCAAGAACAGATTTCAGCCTCACATACTAAGTAAATACTGATAAATAAGGAAATTAGAAATTTAGTATTCATAATTAAATATGCTCTAAAATTTCCTATACTTTTATTTCCTGTTTATTCTTAGGTAGATTGGAAGGGGGAAACAGTCTGTTCTCCCTAATTAAATTTTTTCTAATAACGATTAGTAGAATATGGACATTCTATATGACAGTGACATTAAAAGAGGCTCTTTGGAAGTATATACATTATTAACATAATGTGTACAAGTCCTTTTGAAATGACAACTTTAATGGGTTTCAGCTCTTTTATCTAGAGCTTGAGATAATTCAAGCTGAGTTTTTCAGGGCATATCACAACGGCCAAGTGTTCAGCAGTGGGATATCAATGCTTATTTACATTTTCCTACTGCTATTTATATAAAATGTTATTCCATTCAGAGGATGCCTTTTATCCCCACATTAAAGCACAGATCATTAAGCAATAAAAACCAAATTGTCTGTCATTCAAATTATAACTGCAGTTATTTTTGCATGGTAAGAGTGAGGTGCTAATTTTGTGTGAGATGAACTTTGTAAACTACTTTGGGAAATGTTCTTTGGAAGTAAGGTTTTTTCTCCTTTAGTCTTATGCTTCCACTTTTGTCTCAGATTCACAATCCATTAAAACATGGGGAAAAAAGAAAAGGTAAAATTGAGAGACTTTTGTTAGAGGAGCTATTTGGAATGAACCAACATTTCAGATTTTCCAAAATGTAAGTTAGGAAGTCTCCATTGTCTCTGCATTAACAAAATACACTGTTACTATCTTAATCTCAAGAGTGTCATTACAGTGAGAATCTCATTTAAAAGCATACCAGTGAAATTAATAGCAGTGCTTATCAAAGAACACTGAAATCTGTGAGAATCTTTCTAGGAGCATTCTTTTCTTCTTTTAGTTCCAAGTTCCAGGGTATTTTTCATTCCTAGTAGGTTTATATGACTCACAGAATGTGGACTTTTTTCCTGTTTGGAGTATTTTTGTAATGTAAGTATCGGATAGCTGCACCACAGCATGCATAAATTGCACATTTTGTTTTACTTTCTTTATAGAATATTTAATTTCAAAAATATAATTTATGCCAAAAAAAGCATACCTTTCAATTTTGCTACTTGGTTGATTTAGCACAAAATGCAAAGTCTTGGGGCAGAGAGGGGGAGTGAAAAAAATTTTATAGGTAATTGTTACAAAAATACCTGTCAGAAACCCTAAAGCTGCATTGTAAAACAAATGGTGTAAACTAGTTTTGAAAAGTGGTAAGGAATTGTGAAAAAAATCTCAGACTTAATGCTCTCTAACCACATGAGTTTCTTCTTTTTTATTTAGTAATACGCTGCTACATATTTGGAGGTTCTGGTGTTTGTAGGTCACTGAACAGACATTGAAATCTGATTTATATTGTATAACTGTAACATAGAAAGAAAAAGTATTTATATTTTTTCTGTAAGAATATTTCATTGAGTTGTGTATAATTTAAATAAGATTTGTCCCCAAATGGTTTTGCTCACCTTGATTTTTTTTGTTGTGATTTTCTTGTTTTTGTATAATGTGTATAGTTTATGTCAAGGGCATTAAAAGCCTCCTGAAGCATAATCTTATCAAAGGGATACATTGTTAATAAAATGTACTTAAAATTCTTAAA"}, - {"NM_001242758.1", "GAGAAGCCAATCAGTGTCGTCGCGGTCGCTGTTCTAAAGTCCGCACGCACCCACCGGGACTCAGATTCTCCCCAGACGCCGAGGATGGCCGTCATGGCGCCCCGAACCCTCCTCCTGCTACTCTCGGGGGCCCTGGCCCTGACCCAGACCTGGGCGGGCTCCCACTCCATGAGGTATTTCTTCACATCCGTGTCCCGGCCCGGCCGCGGGGAGCCCCGCTTCATCGCCGTGGGCTACGTGGACGACACGCAGTTCGTGCGGTTCGACAGCGACGCCGCGAGCCAGAAGATGGAGCCGCGGGCGCCGTGGATAGAGCAGGAGGGGCCGGAGTATTGGGACCAGGAGACACGGAATATGAAGGCCCACTCACAGACTGACCGAGCGAACCTGGGGACCCTGCGCGGCTACTACAACCAGAGCGAGGACGGTTCTCACACCATCCAGATAATGTATGGCTGCGACGTGGGGCCGGACGGGCGCTTCCTCCGCGGGTACCGGCAGGACGCCTACGACGGCAAGGATTACATCGCCCTGAACGAGGACCTGCGCTCTTGGACCGCGGCGGACATGGCAGCTCAGATCACCAAGCGCAAGTGGGAGGCGGTCCATGCGGCGGAGCAGCGGAGAGTCTACCTGGAGGGCCGGTGCGTGGACGGGCTCCGCAGATACCTGGAGAACGGGAAGGAGACGCTGCAGCGCACGGACCCCCCCAAGACACATATGACCCACCACCCCATCTCTGACCATGAGGCCACCCTGAGGTGCTGGGCCCTGGGCTTCTACCCTGCGGAGATCACACTGACCTGGCAGCGGGATGGGGAGGACCAGACCCAGGACACGGAGCTCGTGGAGACCAGGCCTGCAGGGGATGGAACCTTCCAGAAGTGGGCGGCTGTGGTGGTGCCTTCTGGAGAGGAGCAGAGATACACCTGCCATGTGCAGCATGAGGGTCTGCCCAAGCCCCTCACCCTGAGATGGGAGCTGTCTTCCCAGCCCACCATCCCCATCGTGGGCATCATTGCTGGCCTGGTTCTCCTTGGAGCTGTGATCACTGGAGCTGTGGTCGCTGCCGTGATGTGGAGGAGGAAGAGCTCAGATAGAAAAGGAGGGAGTTACACTCAGGCTGCAAGCAGTGACAGTGCCCAGGGCTCTGATGTGTCTCTCACAGCTTGTAAAGTGTGAGACAGCTGCCTTGTGTGGGACTGAGAGGCAAGAGTTGTTCCTGCCCTTCCCTTTGTGACTTGAAGAACCCTGACTTTGTTTCTGCAAAGGCACCTGCATGTGTCTGTGTTCGTGTAGGCATAATGTGAGGAGGTGGGGAGAGCACCCCACCCCCATGTCCACCATGACCCTCTTCCCACGCTGACCTGTGCTCCCTCTCCAATCATCTTTCCTGTTCCAGAGAGGTGGGGCTGAGGTGTCTCCATCTCTGTCTCAACTTCATGGTGCACTGAGCTGTAACTTCTTCCTTCCCTATTAAAATTAGAACCTGAGTATAAATTTACTTTCTCAAATTCTTGCCATGAGAGGTTGATGAGTTAATTAAAGGAGAAGATTCCTAAAATTTGAGAGACAAAATTAATGGAACGCATGAGAACCTTCCAGAGTCCA"}, - {"NM_001220768.1", "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGCCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"}, - {"NM_002447.2", "AGTGTACAGCGGCGGCTGGGGCGGCAGGTGAGGCGGCTGGGGCGTTGCTGTCGTGCGTCCGCAGGCGTCAGGTGCTCAGACCCGAGGGCCGGGAAGGGATTTGGGTTTCACAGGAACCTGGGGCGGGGGTCCGCTATCTTGGGGCTGTCGGGACCGCTGCTTAAATTTGGCCCAGTCCAGACCTCGAGTCGGGCCCCCAGCCAGGCCCACGCCCAGGTCCAGGCCCAGGCCGGTAGGGATCCTCTAGGGTCCCAGCTCGCCTCGATGGAGCTCCTCCCGCCGCTGCCTCAGTCCTTCCTGTTGCTGCTGCTGTTGCCTGCCAAGCCCGCGGCGGGCGAGGACTGGCAGTGCCCGCGCACCCCCTACGCGGCCTCTCGCGACTTTGACGTGAAGTACGTGGTGCCCAGCTTCTCCGCCGGAGGCCTGGTACAGGCCATGGTGACCTACGAGGGCGACAGAAATGAGAGTGCTGTGTTTGTAGCCATACGCAATCGCCTGCATGTGCTTGGGCCTGACCTGAAGTCTGTCCAGAGCCTGGCCACGGGCCCTGCTGGAGACCCTGGCTGCCAGACGTGTGCAGCCTGTGGCCCAGGACCCCACGGCCCTCCCGGTGACACAGACACAAAGGTGCTGGTGCTGGATCCCGCGCTGCCTGCGCTGGTCAGTTGTGGCTCCAGCCTGCAGGGCCGCTGCTTCCTGCATGACCTAGAGCCCCAAGGGACAGCCGTGCATCTGGCAGCGCCAGCCTGCCTCTTCTCAGCCCACCATAACCGGCCCGATGACTGCCCCGACTGTGTGGCCAGCCCATTGGGCACCCGTGTAACTGTGGTTGAGCAAGGCCAGGCCTCCTATTTCTACGTGGCATCCTCACTGGACGCAGCCGTGGCTGCCAGCTTCAGCCCACGCTCAGTGTCTATCAGGCGTCTCAAGGCTGACGCCTCGGGATTCGCACCGGGCTTTGTGGCGTTGTCAGTGCTGCCCAAGCATCTTGTCTCCTACAGTATTGAATACGTGCACAGCTTCCACACGGGAGCCTTCGTATACTTCCTGACTGTACAGCCGGCCAGCGTGACAGATGATCCTAGTGCCCTGCACACACGCCTGGCACGGCTTAGCGCCACTGAGCCAGAGTTGGGTGACTATCGGGAGCTGGTCCTCGACTGCAGATTTGCTCCAAAACGCAGGCGCCGGGGGGCCCCAGAAGGCGGACAGCCCTACCCTGTGCTGCGGGTGGCCCACTCCGCTCCAGTGGGTGCCCAACTTGCCACTGAGCTGAGCATCGCCGAGGGCCAGGAAGTACTATTTGGGGTCTTTGTGACTGGCAAGGATGGTGGTCCTGGCGTGGGCCCCAACTCTGTCGTCTGTGCCTTCCCCATTGACCTGCTGGACACACTAATTGATGAGGGTGTGGAGCGCTGTTGTGAATCCCCAGTCCATCCAGGCCTCCGGCGAGGCCTCGACTTCTTCCAGTCGCCCAGTTTTTGCCCCAACCCGCCTGGCCTGGAAGCCCTCAGCCCCAACACCAGCTGCCGCCACTTCCCTCTGCTGGTCAGTAGCAGCTTCTCACGTGTGGACCTATTCAATGGGCTGTTGGGACCAGTACAGGTCACTGCATTGTATGTGACACGCCTTGACAACGTCACAGTGGCACACATGGGCACAATGGATGGGCGTATCCTGCAGGTGGAGCTGGTCAGGTCACTAAACTACTTGCTGTATGTGTCCAACTTCTCACTGGGTGACAGTGGGCAGCCCGTGCAGCGGGATGTCAGTCGTCTTGGGGACCACCTACTCTTTGCCTCTGGGGACCAGGTTTTCCAGGTACCTATCCAAGGCCCTGGCTGCCGCCACTTCCTGACCTGTGGGCGTTGCCTAAGGGCATGGCATTTCATGGGCTGTGGCTGGTGTGGGAACATGTGCGGCCAGCAGAAGGAGTGTCCTGGCTCCTGGCAACAGGACCACTGCCCACCTAAGCTTACTGAGTTCCACCCCCACAGTGGACCTCTAAGGGGCAGTACAAGGCTGACCCTGTGTGGCTCCAACTTCTACCTTCACCCTTCTGGTCTGGTGCCTGAGGGAACCCATCAGGTCACTGTGGGCCAAAGTCCCTGCCGGCCACTGCCCAAGGACAGCTCAAAACTCAGACCAGTGCCCCGGAAAGACTTTGTAGAGGAGTTTGAGTGTGAACTGGAGCCCTTGGGCACCCAGGCAGTGGGGCCTACCAACGTCAGCCTCACCGTGACTAACATGCCACCGGGCAAGCACTTCCGGGTAGACGGCACCTCCGTGCTGAGAGGCTTCTCTTTCATGGAGCCAGTGCTGATAGCAGTGCAACCCCTCTTTGGCCCACGGGCAGGAGGCACCTGTCTCACTCTTGAAGGCCAGAGTCTGTCTGTAGGCACCAGCCGGGCTGTGCTGGTCAATGGGACTGAGTGTCTGCTAGCACGGGTCAGTGAGGGGCAGCTTTTATGTGCCACACCCCCTGGGGCCACGGTGGCCAGTGTCCCCCTTAGCCTGCAGGTGGGGGGTGCCCAGGTACCTGGTTCCTGGACCTTCCAGTACAGAGAAGACCCTGTCGTGCTAAGCATCAGCCCCAACTGTGGCTACATCAACTCCCACATCACCATCTGTGGCCAGCATCTAACTTCAGCATGGCACTTAGTGCTGTCATTCCATGACGGGCTTAGGGCAGTGGAAAGCAGGTGTGAGAGGCAGCTTCCAGAGCAGCAGCTGTGCCGCCTTCCTGAATATGTGGTCCGAGACCCCCAGGGATGGGTGGCAGGGAATCTGAGTGCCCGAGGGGATGGAGCTGCTGGCTTTACACTGCCTGGCTTTCGCTTCCTACCCCCACCCCATCCACCCAGTGCCAACCTAGTTCCACTGAAGCCTGAGGAGCATGCCATTAAGTTTGAGTATATTGGGCTGGGCGCTGTGGCTGACTGTGTGGGTATCAACGTGACCGTGGGTGGTGAGAGCTGCCAGCACGAGTTCCGGGGGGACATGGTTGTCTGCCCCCTGCCCCCATCCCTGCAGCTTGGCCAGGATGGTGCCCCATTGCAGGTCTGCGTAGATGGTGAATGTCATATCCTGGGTAGAGTGGTGCGGCCAGGGCCAGATGGGGTCCCACAGAGCACGCTCCTTGGTATCCTGCTGCCTTTGCTGCTGCTTGTGGCTGCACTGGCGACTGCACTGGTCTTCAGCTACTGGTGGCGGAGGAAGCAGCTAGTTCTTCCTCCCAACCTGAATGACCTGGCATCCCTGGACCAGACTGCTGGAGCCACACCCCTGCCTATTCTGTACTCGGGCTCTGACTACAGAAGTGGCCTTGCACTCCCTGCCATTGATGGTCTGGATTCCACCACTTGTGTCCATGGAGCATCCTTCTCCGATAGTGAAGATGAATCCTGTGTGCCACTGCTGCGGAAAGAGTCCATCCAGCTAAGGGACCTGGACTCTGCGCTCTTGGCTGAGGTCAAGGATGTGCTGATTCCCCATGAGCGGGTGGTCACCCACAGTGACCGAGTCATTGGCAAAGGCCACTTTGGAGTTGTCTACCACGGAGAATACATAGACCAGGCCCAGAATCGAATCCAATGTGCCATCAAGTCACTAAGTCGCATCACAGAGATGCAGCAGGTGGAGGCCTTCCTGCGAGAGGGGCTGCTCATGCGTGGCCTGAACCACCCGAATGTGCTGGCTCTCATTGGTATCATGTTGCCACCTGAGGGCCTGCCCCATGTGCTGCTGCCCTATATGTGCCACGGTGACCTGCTCCAGTTCATCCGCTCACCTCAGCGGAACCCCACCGTGAAGGACCTCATCAGCTTTGGCCTGCAGGTAGCCCGCGGCATGGAGTACCTGGCAGAGCAGAAGTTTGTGCACAGGGACCTGGCTGCGCGGAACTGCATGCTGGACGAGTCATTCACAGTCAAGGTGGCTGACTTTGGTTTGGCCCGCGACATCCTGGACAGGGAGTACTATAGTGTTCAACAGCATCGCCACGCTCGCCTACCTGTGAAGTGGATGGCGCTGGAGAGCCTGCAGACCTATAGATTTACCACCAAGTCTGATGTGTGGTCATTTGGTGTGCTGCTGTGGGAACTGCTGACACGGGGTGCCCCACCATACCGCCACATTGACCCTTTTGACCTTACCCACTTCCTGGCCCAGGGTCGGCGCCTGCCCCAGCCTGAGTATTGCCCTGATTCTCTGTACCAAGTGATGCAGCAATGCTGGGAGGCAGACCCAGCAGTGCGACCCACCTTCAGAGTACTAGTGGGGGAGGTGGAGCAGATAGTGTCTGCACTGCTTGGGGACCATTATGTGCAGCTGCCAGCAACCTACATGAACTTGGGCCCCAGCACCTCGCATGAGATGAATGTGCGTCCAGAACAGCCGCAGTTCTCACCCATGCCAGGGAATGTACGCCGGCCCCGGCCACTCTCAGAGCCTCCTCGGCCCACTTGACTTAGTTCTTGGGCTGGACCTGCTTAGCTGCCTTGAGCTAACCCCAAGCTGCCTCTGGGCCATGCCAGGCCAGAGGGCAGTGGCCCTCCACCTTGTTCCTGCCCTTTAACTTTCAGAGGCAATAGGTAAATGGGGCCCATTAGGTCCCTCACTCCACAGAGTGAGCCAGTGAGGGCAGTCCTGCAACATGTATTTATGGAGTGCCTGCTGTGGACCCTGTCTTCTGGGCACAGTGGACTCAGCAGTGACCACACCAACACTGACCCTTGAACCAATAAAGGAACAAATGACTATTAAAGCACAAAAAAAAAAAAAAAA"}, - {"NM_001220775.1", "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"}, - {"NM_006060.4", "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGCCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTTGGTAAACCTCACAAATGTGGATATTGTGGCCGAAGCTATAAACAGCGAAGCTCTTTAGAGGAACATAAAGAGCGCTGCCACAACTACTTGGAAAGCATGGGCCTTCCGGGCACACTGTACCCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"}, - {"NM_005228.3", "CCCCGGCGCAGCGCGGCCGCAGCAGCCTCCGCCCCCCGCACGGTGTGAGCGCCCGACGCGGCCGAGGCGGCCGGAGTCCCGAGCTAGCCCCGGCGGCCGCCGCCGCCCAGACCGGACGACAGGCCACCTCGTCGGCGTCCGCCCGAGTCCCCGCCTCGCCGCCAACGCCACAACCACCGCGCACGGCCCCCTGACTCCGTCCAGTATTGATCGGGAGAGCCGGAGCGAGCTCTTCGGGGAGCAGCGATGCGACCCTCCGGGACGGCCGGGGCAGCGCTCCTGGCGCTGCTGGCTGCGCTCTGCCCGGCGAGTCGGGCTCTGGAGGAAAAGAAAGTTTGCCAAGGCACGAGTAACAAGCTCACGCAGTTGGGCACTTTTGAAGATCATTTTCTCAGCCTCCAGAGGATGTTCAATAACTGTGAGGTGGTCCTTGGGAATTTGGAAATTACCTATGTGCAGAGGAATTATGATCTTTCCTTCTTAAAGACCATCCAGGAGGTGGCTGGTTATGTCCTCATTGCCCTCAACACAGTGGAGCGAATTCCTTTGGAAAACCTGCAGATCATCAGAGGAAATATGTACTACGAAAATTCCTATGCCTTAGCAGTCTTATCTAACTATGATGCAAATAAAACCGGACTGAAGGAGCTGCCCATGAGAAATTTACAGGAAATCCTGCATGGCGCCGTGCGGTTCAGCAACAACCCTGCCCTGTGCAACGTGGAGAGCATCCAGTGGCGGGACATAGTCAGCAGTGACTTTCTCAGCAACATGTCGATGGACTTCCAGAACCACCTGGGCAGCTGCCAAAAGTGTGATCCAAGCTGTCCCAATGGGAGCTGCTGGGGTGCAGGAGAGGAGAACTGCCAGAAACTGACCAAAATCATCTGTGCCCAGCAGTGCTCCGGGCGCTGCCGTGGCAAGTCCCCCAGTGACTGCTGCCACAACCAGTGTGCTGCAGGCTGCACAGGCCCCCGGGAGAGCGACTGCCTGGTCTGCCGCAAATTCCGAGACGAAGCCACGTGCAAGGACACCTGCCCCCCACTCATGCTCTACAACCCCACCACGTACCAGATGGATGTGAACCCCGAGGGCAAATACAGCTTTGGTGCCACCTGCGTGAAGAAGTGTCCCCGTAATTATGTGGTGACAGATCACGGCTCGTGCGTCCGAGCCTGTGGGGCCGACAGCTATGAGATGGAGGAAGACGGCGTCCGCAAGTGTAAGAAGTGCGAAGGGCCTTGCCGCAAAGTGTGTAACGGAATAGGTATTGGTGAATTTAAAGACTCACTCTCCATAAATGCTACGAATATTAAACACTTCAAAAACTGCACCTCCATCAGTGGCGATCTCCACATCCTGCCGGTGGCATTTAGGGGTGACTCCTTCACACATACTCCTCCTCTGGATCCACAGGAACTGGATATTCTGAAAACCGTAAAGGAAATCACAGGGTTTTTGCTGATTCAGGCTTGGCCTGAAAACAGGACGGACCTCCATGCCTTTGAGAACCTAGAAATCATACGCGGCAGGACCAAGCAACATGGTCAGTTTTCTCTTGCAGTCGTCAGCCTGAACATAACATCCTTGGGATTACGCTCCCTCAAGGAGATAAGTGATGGAGATGTGATAATTTCAGGAAACAAAAATTTGTGCTATGCAAATACAATAAACTGGAAAAAACTGTTTGGGACCTCCGGTCAGAAAACCAAAATTATAAGCAACAGAGGTGAAAACAGCTGCAAGGCCACAGGCCAGGTCTGCCATGCCTTGTGCTCCCCCGAGGGCTGCTGGGGCCCGGAGCCCAGGGACTGCGTCTCTTGCCGGAATGTCAGCCGAGGCAGGGAATGCGTGGACAAGTGCAACCTTCTGGAGGGTGAGCCAAGGGAGTTTGTGGAGAACTCTGAGTGCATACAGTGCCACCCAGAGTGCCTGCCTCAGGCCATGAACATCACCTGCACAGGACGGGGACCAGACAACTGTATCCAGTGTGCCCACTACATTGACGGCCCCCACTGCGTCAAGACCTGCCCGGCAGGAGTCATGGGAGAAAACAACACCCTGGTCTGGAAGTACGCAGACGCCGGCCATGTGTGCCACCTGTGCCATCCAAACTGCACCTACGGATGCACTGGGCCAGGTCTTGAAGGCTGTCCAACGAATGGGCCTAAGATCCCGTCCATCGCCACTGGGATGGTGGGGGCCCTCCTCTTGCTGCTGGTGGTGGCCCTGGGGATCGGCCTCTTCATGCGAAGGCGCCACATCGTTCGGAAGCGCACGCTGCGGAGGCTGCTGCAGGAGAGGGAGCTTGTGGAGCCTCTTACACCCAGTGGAGAAGCTCCCAACCAAGCTCTCTTGAGGATCTTGAAGGAAACTGAATTCAAAAAGATCAAAGTGCTGGGCTCCGGTGCGTTCGGCACGGTGTATAAGGGACTCTGGATCCCAGAAGGTGAGAAAGTTAAAATTCCCGTCGCTATCAAGGAATTAAGAGAAGCAACATCTCCGAAAGCCAACAAGGAAATCCTCGATGAAGCCTACGTGATGGCCAGCGTGGACAACCCCCACGTGTGCCGCCTGCTGGGCATCTGCCTCACCTCCACCGTGCAGCTCATCACGCAGCTCATGCCCTTCGGCTGCCTCCTGGACTATGTCCGGGAACACAAAGACAATATTGGCTCCCAGTACCTGCTCAACTGGTGTGTGCAGATCGCAAAGGGCATGAACTACTTGGAGGACCGTCGCTTGGTGCACCGCGACCTGGCAGCCAGGAACGTACTGGTGAAAACACCGCAGCATGTCAAGATCACAGATTTTGGGCTGGCCAAACTGCTGGGTGCGGAAGAGAAAGAATACCATGCAGAAGGAGGCAAAGTGCCTATCAAGTGGATGGCATTGGAATCAATTTTACACAGAATCTATACCCACCAGAGTGATGTCTGGAGCTACGGGGTGACCGTTTGGGAGTTGATGACCTTTGGATCCAAGCCATATGACGGAATCCCTGCCAGCGAGATCTCCTCCATCCTGGAGAAAGGAGAACGCCTCCCTCAGCCACCCATATGTACCATCGATGTCTACATGATCATGGTCAAGTGCTGGATGATAGACGCAGATAGTCGCCCAAAGTTCCGTGAGTTGATCATCGAATTCTCCAAAATGGCCCGAGACCCCCAGCGCTACCTTGTCATTCAGGGGGATGAAAGAATGCATTTGCCAAGTCCTACAGACTCCAACTTCTACCGTGCCCTGATGGATGAAGAAGACATGGACGACGTGGTGGATGCCGACGAGTACCTCATCCCACAGCAGGGCTTCTTCAGCAGCCCCTCCACGTCACGGACTCCCCTCCTGAGCTCTCTGAGTGCAACCAGCAACAATTCCACCGTGGCTTGCATTGATAGAAATGGGCTGCAAAGCTGTCCCATCAAGGAAGACAGCTTCTTGCAGCGATACAGCTCAGACCCCACAGGCGCCTTGACTGAGGACAGCATAGACGACACCTTCCTCCCAGTGCCTGAATACATAAACCAGTCCGTTCCCAAAAGGCCCGCTGGCTCTGTGCAGAATCCTGTCTATCACAATCAGCCTCTGAACCCCGCGCCCAGCAGAGACCCACACTACCAGGACCCCCACAGCACTGCAGTGGGCAACCCCGAGTATCTCAACACTGTCCAGCCCACCTGTGTCAACAGCACATTCGACAGCCCTGCCCACTGGGCCCAGAAAGGCAGCCACCAAATTAGCCTGGACAACCCTGACTACCAGCAGGACTTCTTTCCCAAGGAAGCCAAGCCAAATGGCATCTTTAAGGGCTCCACAGCTGAAAATGCAGAATACCTAAGGGTCGCGCCACAAAGCAGTGAATTTATTGGAGCATGACCACGGAGGATAGTATGAGCCCTAAAAATCCAGACTCTTTCGATACCCAGGACCAAGCCACAGCAGGTCCTCCATCCCAACAGCCATGCCCGCATTAGCTCTTAGACCCACAGACTGGTTTTGCAACGTTTACACCGACTAGCCAGGAAGTACTTCCACCTCGGGCACATTTTGGGAAGTTGCATTCCTTTGTCTTCAAACTGTGAAGCATTTACAGAAACGCATCCAGCAAGAATATTGTCCCTTTGAGCAGAAATTTATCTTTCAAAGAGGTATATTTGAAAAAAAAAAAAAGTATATGTGAGGATTTTTATTGATTGGGGATCTTGGAGTTTTTCATTGTCGCTATTGATTTTTACTTCAATGGGCTCTTCCAACAAGGAAGAAGCTTGCTGGTAGCACTTGCTACCCTGAGTTCATCCAGGCCCAACTGTGAGCAAGGAGCACAAGCCACAAGTCTTCCAGAGGATGCTTGATTCCAGTGGTTCTGCTTCAAGGCTTCCACTGCAAAACACTAAAGATCCAAGAAGGCCTTCATGGCCCCAGCAGGCCGGATCGGTACTGTATCAAGTCATGGCAGGTACAGTAGGATAAGCCACTCTGTCCCTTCCTGGGCAAAGAAGAAACGGAGGGGATGGAATTCTTCCTTAGACTTACTTTTGTAAAAATGTCCCCACGGTACTTACTCCCCACTGATGGACCAGTGGTTTCCAGTCATGAGCGTTAGACTGACTTGTTTGTCTTCCATTCCATTGTTTTGAAACTCAGTATGCTGCCCCTGTCTTGCTGTCATGAAATCAGCAAGAGAGGATGACACATCAAATAATAACTCGGATTCCAGCCCACATTGGATTCATCAGCATTTGGACCAATAGCCCACAGCTGAGAATGTGGAATACCTAAGGATAGCACCGCTTTTGTTCTCGCAAAAACGTATCTCCTAATTTGAGGCTCAGATGAAATGCATCAGGTCCTTTGGGGCATAGATCAGAAGACTACAAAAATGAAGCTGCTCTGAAATCTCCTTTAGCCATCACCCCAACCCCCCAAAATTAGTTTGTGTTACTTATGGAAGATAGTTTTCTCCTTTTACTTCACTTCAAAAGCTTTTTACTCAAAGAGTATATGTTCCCTCCAGGTCAGCTGCCCCCAAACCCCCTCCTTACGCTTTGTCACACAAAAAGTGTCTCTGCCTTGAGTCATCTATTCAAGCACTTACAGCTCTGGCCACAACAGGGCATTTTACAGGTGCGAATGACAGTAGCATTATGAGTAGTGTGGAATTCAGGTAGTAAATATGAAACTAGGGTTTGAAATTGATAATGCTTTCACAACATTTGCAGATGTTTTAGAAGGAAAAAAGTTCCTTCCTAAAATAATTTCTCTACAATTGGAAGATTGGAAGATTCAGCTAGTTAGGAGCCCACCTTTTTTCCTAATCTGTGTGTGCCCTGTAACCTGACTGGTTAACAGCAGTCCTTTGTAAACAGTGTTTTAAACTCTCCTAGTCAATATCCACCCCATCCAATTTATCAAGGAAGAAATGGTTCAGAAAATATTTTCAGCCTACAGTTATGTTCAGTCACACACACATACAAAATGTTCCTTTTGCTTTTAAAGTAATTTTTGACTCCCAGATCAGTCAGAGCCCCTACAGCATTGTTAAGAAAGTATTTGATTTTTGTCTCAATGAAAATAAAACTATATTCATTTCCACTCTAAAAAAAAAAAAAAAAA"}, - {"NM_005922.2", "AAGATGGCCGCGGCGCGCACGGCTCCTGCGGCGGGGTAGAGGCGGAGGCGGAGTCGAGTCACTCCCGCACTTCGGGGCTCCGGTGCCCCGCGCCAGGCTGCAGCTTACTGCCCGCCGCGGCCATGCGGGGCTCCGTGCACGGATGAGAGAAGCCGCTGCCGCGCTGGTCCCTCCTCCCGCCTTTGCCGTCACGCCTGCCGCCGCCATGGAGGAGCCGCCGCCACCGCCGCCGCCGCCACCACCGCCACCGGAACCCGAGACCGAGTCAGAACCCGAGTGCTGCTTGGCGGCGAGGCAAGAGGGCACATTGGGAGATTCAGCTTGCAAGAGTCCTGAATCTGATCTAGAAGACTTCTCCGATGAAACAAATACAGAGAATCTTTATGGTACCTCTCCCCCCAGCACACCTCGACAGATGAAACGCATGTCAACCAAACATCAGAGGAATAATGTGGGGAGGCCAGCCAGTCGGTCTAATTTGAAAGAAAAAATGAATGCACCAAATCAGCCTCCACATAAAGACACTGGAAAAACAGTGGAGAATGTGGAAGAATACAGCTATAAGCAGGAGAAAAAGATCCGAGCAGCTCTTAGAACAACAGAGCGTGATCATAAAAAAAATGTACAGTGCTCATTCATGTTAGACTCAGTGGGTGGATCTTTGCCAAAAAAATCAATTCCAGATGTGGATCTCAATAAGCCTTACCTCAGCCTTGGCTGTAGCAATGCTAAGCTTCCAGTATCTGTGCCCATGCCTATAGCCAGACCTGCACGCCAGACTTCTAGGACTGACTGTCCAGCAGATCGTTTAAAGTTTTTTGAAACTTTACGACTTTTGCTAAAGCTTACCTCAGTCTCAAAGAAAAAAGACAGGGAGCAAAGAGGACAAGAAAATACGTCTGGTTTCTGGCTTAACCGATCTAACGAACTGATCTGGTTAGAGCTACAAGCCTGGCATGCAGGACGGACAATTAACGACCAGGACTTCTTTTTATATACAGCCCGTCAAGCCATCCCAGATATTATTAATGAAATCCTTACTTTCAAAGTCGACTATGGGAGCTTCGCCTTTGTTAGAGATAGAGCTGGTTTTAATGGTACTTCAGTAGAAGGGCAGTGCAAAGCCACTCCTGGAACAAAGATTGTAGGTTACTCAACACATCATGAGCATCTCCAACGCCAGAGGGTCTCATTTGAGCAGGTAAAACGGATAATGGAGCTGCTAGAGTACATAGAAGCACTTTATCCATCATTGCAGGCTCTTCAGAAGGACTATGAAAAATATGCTGCAAAAGACTTCCAGGACAGGGTGCAGGCACTCTGTTTGTGGTTAAACATCACAAAAGACTTAAATCAGAAATTAAGGATTATGGGCACTGTTTTGGGCATCAAGAATTTATCAGACATTGGCTGGCCAGTGTTTGAAATCCCTTCCCCTCGACCATCCAAAGGTAATGAGCCGGAGTATGAGGGTGATGACACAGAAGGAGAATTAAAGGAGTTGGAAAGTAGTACGGATGAGAGTGAAGAAGAACAAATCTCTGATCCTAGGGTACCGGAAATCAGACAGCCCATAGATAACAGCTTCGACATCCAGTCGCGGGACTGCATATCCAAGAAGCTTGAGAGGCTCGAATCTGAGGATGATTCTCTTGGCTGGGGAGCACCAGACTGGAGCACAGAAGCAGGCTTTAGTAGACATTGTCTGACTTCTATTTATAGACCATTTGTAGACAAAGCACTGAAGCAGATGGGGTTAAGAAAGTTAATTTTAAGACTTCACAAGCTAATGGATGGTTCCTTGCAAAGGGCACGTATAGCATTGGTAAAGAACGATCGTCCAGTGGAGTTTTCTGAATTTCCAGATCCCATGTGGGGTTCAGATTATGTGCAGTTGTCAAGGACACCACCTTCATCTGAGGAGAAATGCAGTGCTGTGTCGTGGGAGGAGCTGAAGGCCATGGATTTACCTTCATTCGAACCTGCCTTCCTAGTTCTCTGCCGAGTCCTTCTGAATGTCATACATGAGTGTCTGAAGTTAAGATTGGAGCAGAGACCTGCTGGAGAACCATCTCTCTTGAGTATTAAGCAGCTGGTGAGAGAGTGTAAGGAGGTCCTGAAGGGCGGCCTGCTGATGAAGCAGTACTACCAGTTCATGCTGCAGGAGGTTCTGGAGGACTTGGAGAAGCCCGACTGCAACATTGACGCTTTTGAAGAGGATCTACATAAAATGCTTATGGTGTATTTTGATTACATGAGAAGCTGGATCCAAATGCTACAGCAATTACCTCAAGCATCGCATAGTTTAAAAAATCTGTTAGAAGAAGAATGGAATTTCACCAAAGAAATAACTCATTACATACGGGGAGGAGAAGCACAGGCCGGGAAGCTTTTCTGTGACATTGCAGGAATGCTGCTGAAATCTACAGGAAGTTTTTTAGAATTTGGCTTACAGGAGAGCTGTGCTGAATTTTGGACTAGTGCGGATGACAGCAGTGCTTCCGACGAAATCAGGAGGTCTGTTATAGAGATCAGTCGAGCCCTGAAGGAGCTCTTCCATGAAGCCAGAGAAAGGGCTTCCAAAGCACTTGGATTTGCTAAAATGTTGAGAAAGGACCTGGAAATAGCAGCAGAATTCAGGCTTTCAGCCCCAGTTAGAGACCTCCTGGATGTTCTGAAATCAAAACAGTATGTCAAGGTGCAAATTCCTGGGTTAGAAAACTTGCAAATGTTTGTTCCAGACACTCTTGCTGAGGAGAAGAGTATTATTTTGCAGTTACTCAATGCAGCTGCAGGAAAGGACTGTTCAAAAGATTCAGATGACGTACTCATCGATGCCTATCTGCTTCTGACCAAGCACGGTGATCGAGCCCGTGATTCAGAGGACAGCTGGGGCACCTGGGAGGCACAGCCTGTCAAAGTCGTGCCTCAGGTGGAGACTGTTGACACCCTGAGAAGCATGCAGGTGGATAATCTTTTACTAGTTGTCATGCAGTCTGCGCATCTCACAATTCAGAGAAAAGCTTTCCAGCAGTCCATTGAGGGACTTATGACTCTGTGCCAGGAGCAGACATCCAGTCAGCCGGTCATCGCCAAAGCTTTGCAGCAGCTGAAGAATGATGCATTGGAGCTATGCAACAGGATAAGCAATGCCATTGACCGCGTGGACCACATGTTCACATCAGAATTTGATGCTGAGGTTGATGAATCTGAATCTGTCACCTTGCAACAGTACTACCGAGAAGCAATGATTCAGGGGTACAATTTTGGATTTGAGTATCATAAAGAAGTTGTTCGTTTGATGTCTGGGGAGTTTAGACAGAAGATAGGAGACAAATATATAAGCTTTGCCCGGAAGTGGATGAATTATGTCCTGACTAAATGTGAGAGTGGTAGAGGTACAAGACCCAGGTGGGCGACTCAAGGATTTGATTTTCTACAAGCAATTGAACCTGCCTTTATTTCAGCTTTACCAGAAGATGACTTCTTGAGTTTACAAGCCTTGATGAATGAATGCATTGGCCATGTCATAGGAAAACCACACAGTCCTGTTACAGGTTTGTACCTTGCCATTCATCGGAACAGCCCCCGTCCTATGAAGGTACCTCGATGCCATAGTGACCCTCCTAACCCACACCTCATTATCCCCACTCCAGAGGGATTCAGCACTCGGAGCATGCCTTCCGACGCGCGGAGCCATGGCAGCCCTGCTGCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTGCCAGTCGGCCCAGCCCCTCTGGTGGTGACTCTGTGCTGCCCAAATCCATCAGCAGTGCCCATGATACCAGGGGTTCCAGCGTTCCTGAAAATGATCGATTGGCTTCCATAGCTGCTGAATTGCAGTTTAGGTCCCTGAGTCGTCACTCAAGCCCCACGGAGGAGCGAGATGAACCAGCATATCCAAGAGGAGATTCAAGTGGGTCCACAAGAAGAAGTTGGGAACTTCGGACACTAATCAGCCAGAGTAAAGATACTGCTTCTAAACTAGGACCCATAGAAGCTATCCAGAAGTCAGTCCGATTGTTTGAAGAAAAGAGGTACCGAGAAATGAGGAGAAAGAATATCATTGGTCAAGTTTGTGATACGCCTAAGTCCTATGATAATGTTATGCACGTTGGCTTGAGGAAGGTGACCTTCAAATGGCAAAGAGGAAACAAAATTGGAGAAGGCCAGTATGGGAAGGTGTACACCTGCATCAGCGTCGACACCGGGGAGCTGATGGCCATGAAAGAGATTCGATTTCAACCTAATGACCATAAGACTATCAAGGAAACTGCAGACGAATTGAAAATATTCGAAGGCATCAAACACCCCAATCTGGTTCGGTATTTTGGTGTGGAGCTCCATAGAGAAGAAATGTACATCTTCATGGAGTACTGCGATGAGGGGACTTTAGAAGAGGTGTCAAGGCTGGGACTTCAGGAACATGTGATTAGGCTGTATTCAAAGCAGATCACCATTGCGATCAACGTCCTCCATGAGCATGGCATAGTCCACCGTGACATTAAAGGTGCCAATATCTTCCTTACCTCATCTGGATTAATCAAACTGGGAGATTTTGGATGTTCAGTAAAGCTCAAAAACAATGCCCAGACCATGCCTGGTGAAGTGAACAGCACCCTGGGGACAGCAGCATACATGGCACCTGAAGTCATCACTCGTGCCAAAGGAGAGGGCCATGGGCGTGCGGCCGACATCTGGAGTCTGGGGTGTGTTGTCATAGAGATGGTGACTGGCAAGAGGCCTTGGCATGAGTATGAGCACAACTTTCAAATTATGTATAAAGTGGGGATGGGACATAAGCCACCAATCCCTGAAAGATTAAGCCCTGAAGGAAAGGACTTCCTTTCTCACTGCCTTGAGAGTGACCCAAAGATGAGATGGACCGCCAGCCAGCTCCTCGACCATTCGTTTGTCAAGGTTTGCACAGATGAAGAATGAAGCCTAGTAGAATATGGACTTGGAAAATTCTCTTAATCACTACTGTATGTAATATTTACATAAAGACTGTGCTGAGAAGCAGTATAAGCCTTTTTAACCTTCCAAGACTGAAGACTGCACAGGTGACAAGCGTCACTTCTCCTGCTGCTCCTGTTTGTCTGATGTGGCAAAAGGCCCTCTGGAGGGCTGGTGGCCACGAGGTTAAAGAAGCTGCATGTTAAGTGCCATTACTACTGTACACGGACCATCGCCTCTGTCTCCTCCGTGTCTCGCGCGACTGAGAACCGTGACATCAGCGTAGTGTTTTGACCTTTCTAGGTTCAAAAGAAGTTGTAGTGTTATCAGGCGTCCCATACCTTGTTTTTAATCTCCTGTTTGTTGAGTGCACTGACTGTGAAACCTTTACCTTTTTTGTTGTTGTTGGCAAGCTGCAGGTTTGTAATGCAAAAGGCTGATTACTGAAATTTAAGAAAAAGGTTCTTTTTTCAATAAATGGTTTATTTTAGGAAAGCTCAAAAAAAAAAAAAAAAAA"}, - {"NM_006724.2", "AAGATGGCCGCGGCGCGCACGGCTCCTGCGGCGGGGTAGAGGCGGAGGCGGAGTCGAGTCACTCCCGCACTTCGGGGCTCCGGTGCCCCGCGCCAGGCTGCAGCTTACTGCCCGCCGCGGCCATGCGGGGCTCCGTGCACGGATGAGAGAAGCCGCTGCCGCGCTGGTCCCTCCTCCCGCCTTTGCCGTCACGCCTGCCGCCGCCATGGAGGAGCCGCCGCCACCGCCGCCGCCGCCACCACCGCCACCGGAACCCGAGACCGAGTCAGAACCCGAGTGCTGCTTGGCGGCGAGGCAAGAGGGCACATTGGGAGATTCAGCTTGCAAGAGTCCTGAATCTGATCTAGAAGACTTCTCCGATGAAACAAATACAGAGAATCTTTATGGTACCTCTCCCCCCAGCACACCTCGACAGATGAAACGCATGTCAACCAAACATCAGAGGAATAATGTGGGGAGGCCAGCCAGTCGGTCTAATTTGAAAGAAAAAATGAATGCACCAAATCAGCCTCCACATAAAGACACTGGAAAAACAGTGGAGAATGTGGAAGAATACAGCTATAAGCAGGAGAAAAAGATCCGAGCAGCTCTTAGAACAACAGAGCGTGATCATAAAAAAAATGTACAGTGCTCATTCATGTTAGACTCAGTGGGTGGATCTTTGCCAAAAAAATCAATTCCAGATGTGGATCTCAATAAGCCTTACCTCAGCCTTGGCTGTAGCAATGCTAAGCTTCCAGTATCTGTGCCCATGCCTATAGCCAGACCTGCACGCCAGACTTCTAGGACTGACTGTCCAGCAGATCGTTTAAAGTTTTTTGAAACTTTACGACTTTTGCTAAAGCTTACCTCAGTCTCAAAGAAAAAAGACAGGGAGCAAAGAGGACAAGAAAATACGTCTGGTTTCTGGCTTAACCGATCTAACGAACTGATCTGGTTAGAGCTACAAGCCTGGCATGCAGGACGGACAATTAACGACCAGGACTTCTTTTTATATACAGCCCGTCAAGCCATCCCAGATATTATTAATGAAATCCTTACTTTCAAAGTCGACTATGGGAGCTTCGCCTTTGTTAGAGATAGAGCTGGTTTTAATGGTACTTCAGTAGAAGGGCAGTGCAAAGCCACTCCTGGAACAAAGATTGTAGGTTACTCAACACATCATGAGCATCTCCAACGCCAGAGGGTCTCATTTGAGCAGGTAAAACGGATAATGGAGCTGCTAGAGTACATAGAAGCACTTTATCCATCATTGCAGGCTCTTCAGAAGGACTATGAAAAATATGCTGCAAAAGACTTCCAGGACAGGGTGCAGGCACTCTGTTTGTGGTTAAACATCACAAAAGACTTAAATCAGAAATTAAGGATTATGGGCACTGTTTTGGGCATCAAGAATTTATCAGACATTGGCTGGCCAGTGTTTGAAATCCCTTCCCCTCGACCATCCAAAGGTAATGAGCCGGAGTATGAGGGTGATGACACAGAAGGAGAATTAAAGGAGTTGGAAAGTAGTACGGATGAGAGTGAAGAAGAACAAATCTCTGATCCTAGGGTACCGGAAATCAGACAGCCCATAGATAACAGCTTCGACATCCAGTCGCGGGACTGCATATCCAAGAAGCTTGAGAGGCTCGAATCTGAGGATGATTCTCTTGGCTGGGGAGCACCAGACTGGAGCACAGAAGCAGGCTTTAGTAGACATTGTCTGACTTCTATTTATAGACCATTTGTAGACAAAGCACTGAAGCAGATGGGGTTAAGAAAGTTAATTTTAAGACTTCACAAGCTAATGGATGGTTCCTTGCAAAGGGCACGTATAGCATTGGTAAAGAACGATCGTCCAGTGGAGTTTTCTGAATTTCCAGATCCCATGTGGGGTTCAGATTATGTGCAGTTGTCAAGGACACCACCTTCATCTGAGGAGAAATGCAGTGCTGTGTCGTGGGAGGAGCTGAAGGCCATGGATTTACCTTCATTCGAACCTGCCTTCCTAGTTCTCTGCCGAGTCCTTCTGAATGTCATACATGAGTGTCTGAAGTTAAGATTGGAGCAGAGACCTGCTGGAGAACCATCTCTCTTGAGTATTAAGCAGCTGGTGAGAGAGTGTAAGGAGGTCCTGAAGGGCGGCCTGCTGATGAAGCAGTACTACCAGTTCATGCTGCAGGAGGTTCTGGAGGACTTGGAGAAGCCCGACTGCAACATTGACGCTTTTGAAGAGGATCTACATAAAATGCTTATGGTGTATTTTGATTACATGAGAAGCTGGATCCAAATGCTACAGCAATTACCTCAAGCATCGCATAGTTTAAAAAATCTGTTAGAAGAAGAATGGAATTTCACCAAAGAAATAACTCATTACATACGGGGAGGAGAAGCACAGGCCGGGAAGCTTTTCTGTGACATTGCAGGAATGCTGCTGAAATCTACAGGAAGTTTTTTAGAATTTGGCTTACAGGAGAGCTGTGCTGAATTTTGGACTAGTGCGGATGACAGCAGTGCTTCCGACGAAATCAGGAGGTCTGTTATAGAGATCAGTCGAGCCCTGAAGGAGCTCTTCCATGAAGCCAGAGAAAGGGCTTCCAAAGCACTTGGATTTGCTAAAATGTTGAGAAAGGACCTGGAAATAGCAGCAGAATTCAGGCTTTCAGCCCCAGTTAGAGACCTCCTGGATGTTCTGAAATCAAAACAGTATGTCAAGGTGCAAATTCCTGGGTTAGAAAACTTGCAAATGTTTGTTCCAGACACTCTTGCTGAGGAGAAGAGTATTATTTTGCAGTTACTCAATGCAGCTGCAGGAAAGGACTGTTCAAAAGATTCAGATGACGTACTCATCGATGCCTATCTGCTTCTGACCAAGCACGGTGATCGAGCCCGTGATTCAGAGGACAGCTGGGGCACCTGGGAGGCACAGCCTGTCAAAGTCGTGCCTCAGGTGGAGACTGTTGACACCCTGAGAAGCATGCAGGTGGATAATCTTTTACTAGTTGTCATGCAGTCTGCGCATCTCACAATTCAGAGAAAAGCTTTCCAGCAGTCCATTGAGGGACTTATGACTCTGTGCCAGGAGCAGACATCCAGTCAGCCGGTCATCGCCAAAGCTTTGCAGCAGCTGAAGAATGATGCATTGGAGCTATGCAACAGGATAAGCAATGCCATTGACCGCGTGGACCACATGTTCACATCAGAATTTGATGCTGAGGTTGATGAATCTGAATCTGTCACCTTGCAACAGTACTACCGAGAAGCAATGATTCAGGGGTACAATTTTGGATTTGAGTATCATAAAGAAGTTGTTCGTTTGATGTCTGGGGAGTTTAGACAGAAGATAGGAGACAAATATATAAGCTTTGCCCGGAAGTGGATGAATTATGTCCTGACTAAATGTGAGAGTGGTAGAGGTACAAGACCCAGGTGGGCGACTCAAGGATTTGATTTTCTACAAGCAATTGAACCTGCCTTTATTTCAGCTTTACCAGAAGATGACTTCTTGAGTTTACAAGCCTTGATGAATGAATGCATTGGCCATGTCATAGGAAAACCACACAGTCCTGTTACAGGTTTGTACCTTGCCATTCATCGGAACAGCCCCCGTCCTATGAAGGTACCTCGATGCCATAGTGACCCTCCTAACCCACACCTCATTATCCCCACTCCAGAGGGATTCAGGGGTTCCAGCGTTCCTGAAAATGATCGATTGGCTTCCATAGCTGCTGAATTGCAGTTTAGGTCCCTGAGTCGTCACTCAAGCCCCACGGAGGAGCGAGATGAACCAGCATATCCAAGAGGAGATTCAAGTGGGTCCACAAGAAGAAGTTGGGAACTTCGGACACTAATCAGCCAGAGTAAAGATACTGCTTCTAAACTAGGACCCATAGAAGCTATCCAGAAGTCAGTCCGATTGTTTGAAGAAAAGAGGTACCGAGAAATGAGGAGAAAGAATATCATTGGTCAAGTTTGTGATACGCCTAAGTCCTATGATAATGTTATGCACGTTGGCTTGAGGAAGGTGACCTTCAAATGGCAAAGAGGAAACAAAATTGGAGAAGGCCAGTATGGGAAGGTGTACACCTGCATCAGCGTCGACACCGGGGAGCTGATGGCCATGAAAGAGATTCGATTTCAACCTAATGACCATAAGACTATCAAGGAAACTGCAGACGAATTGAAAATATTCGAAGGCATCAAACACCCCAATCTGGTTCGGTATTTTGGTGTGGAGCTCCATAGAGAAGAAATGTACATCTTCATGGAGTACTGCGATGAGGGGACTTTAGAAGAGGTGTCAAGGCTGGGACTTCAGGAACATGTGATTAGGCTGTATTCAAAGCAGATCACCATTGCGATCAACGTCCTCCATGAGCATGGCATAGTCCACCGTGACATTAAAGGTGCCAATATCTTCCTTACCTCATCTGGATTAATCAAACTGGGAGATTTTGGATGTTCAGTAAAGCTCAAAAACAATGCCCAGACCATGCCTGGTGAAGTGAACAGCACCCTGGGGACAGCAGCATACATGGCACCTGAAGTCATCACTCGTGCCAAAGGAGAGGGCCATGGGCGTGCGGCCGACATCTGGAGTCTGGGGTGTGTTGTCATAGAGATGGTGACTGGCAAGAGGCCTTGGCATGAGTATGAGCACAACTTTCAAATTATGTATAAAGTGGGGATGGGACATAAGCCACCAATCCCTGAAAGATTAAGCCCTGAAGGAAAGGACTTCCTTTCTCACTGCCTTGAGAGTGACCCAAAGATGAGATGGACCGCCAGCCAGCTCCTCGACCATTCGTTTGTCAAGGTTTGCACAGATGAAGAATGAAGCCTAGTAGAATATGGACTTGGAAAATTCTCTTAATCACTACTGTATGTAATATTTACATAAAGACTGTGCTGAGAAGCAGTATAAGCCTTTTTAACCTTCCAAGACTGAAGACTGCACAGGTGACAAGCGTCACTTCTCCTGCTGCTCCTGTTTGTCTGATGTGGCAAAAGGCCCTCTGGAGGGCTGGTGGCCACGAGGTTAAAGAAGCTGCATGTTAAGTGCCATTACTACTGTACACGGACCATCGCCTCTGTCTCCTCCGTGTCTCGCGCGACTGAGAACCGTGACATCAGCGTAGTGTTTTGACCTTTCTAGGTTCAAAAGAAGTTGTAGTGTTATCAGGCGTCCCATACCTTGTTTTTAATCTCCTGTTTGTTGAGTGCACTGACTGTGAAACCTTTACCTTTTTTGTTGTTGTTGGCAAGCTGCAGGTTTGTAATGCAAAAGGCTGATTACTGAAATTTAAGAAAAAGGTTCTTTTTTCAATAAATGGTTTATTTTAGGAAAGCTCAAAAAAAAAAAAAAAAAA"}, - {"NM_175741.1", "GAGTTCCGTATTCTAGTTCTGTGTGATCTGATCTTTACCTTCCCTTCCTTGGATCCCTGTGCACCTACTGGAGCCAGGTTACTCTGGGTCCTGGACCTGACTGCCTCATTCTGGAGGCTTCCAGACAGCCACAGTTAGTGCCCAAACCTGAGAGGATGGCTTCAGATGGAGCATCTGCATTGCCGGGACCGGATATGAGCATGAAACCTAGTGCCGCCCTGTCTCCATCCCCTGCACTTCCCTTTCTCCCACCAACTTCTGACCCACCAGACCACCCACCCAGGGAGCCACCTCCACAGCCCATCATGCCTTCAGTATTCTCTCCAGACAACCCTCTGATGCTCTCTGCTTTCCCCAGCTCACTGTTGGTGACAGGGGACGGGGGCCCTTGCCTCAGTGGGGCTGGGGCTGGCAAGGTCATTGTCAAAGTCAAGACAGAAGGGGGGTCAGCTGAGCCCTCTCAAACTCAGAACTTTATCCTTACTCAGACTGCCCTCAATTCGACTGCCCCGGGCACTCCCTGTGGAGGCCTTGAGGGTCCTGCACCTCCATTTGTGACAGCATCTAATGTGAAGACCATTCTGCCCTCTAAGGCTGTTGGTGTCAGCCAGGAGGGTCCTCCAGGCCTTCCGCCTCAGCCTCCACCACCAGTTGCTCAACTGGTCCCCATTGTGCCCCTGGAAAAAGCTTGGCCAGGGCCACATGGGACAACCGGGGAAGGAGGTCCTGTGGCCACTCTATCCAAGCCTTCCCTAGGTGACCGCTCCAAAATTTCCAAGGACGTTTATGAGAACTTCCGTCAGTGGCAGCGTTACAAAGCCTTGGCCCGGAGGCACCTATCCCAGAGTCCTGACACAGAAGCTCTTTCCTGTTTTCTTATCCCAGTGCTTCGTTCCCTGGCCCGGCTGAAGCCCACTATGACCCTGGAGGAGGGACTGCCATTGGCTGTGCAGGAGTGGGAGCACACCAGCAACTTTGACCGGATGATCTTTTATGAGATGGCAGAAAGGTTCATGGAGTTTGAGGCTGAGGAGATGCAGATTCAGAACACACAGCTGATGAATGGGTCTCAGGGCCTGTCTCCTGCAACCCCTTTGAAACTTGATCCTCTAGGGCCCCTGGCCTCTGAGGTTTGCCAGCAGCCAGTGTACATTCCGAAGAAGGCAGCCTCCAAGACACGGGCCCCCCGCCGGCGTCAGCGTAAAGCCCAGAGACCTCCTGCTCCTGAGGCACCCAAGGAGATCCCACCAGAAGCTGTGAAGGAGTATGTTGACATCATGGAATGGCTGGTGGGGACTCACTTGGCCACTGGGGAGTCAGATGGAAAACAAGAGGAAGAAGGGCAGCAGCAGGAGGAGGAAGGGATGTATCCAGATCCAGGTCTCCTGAGCTACATCAATGAGCTGTGTTCTCAGAAGGTCTTTGTCTCCAAGGTGGAGGCTGTCATTCACCCTCAATTTCTGGCAGATCTGCTGTCCCCAGAAAAACAGAGAGATCCCTTGGCCTTAATTGAGGAGCTAGAGCAAGAAGAAGGACTCACTCTTGCCCAGCTGGTCCAGAAGCGACTCATGGCCTTGGAAGAGGAGGAAGATGCAGAGGCGCCTCCAAGTTTCAGTGGCGCTCAGTTGGACTCAAGTCCTTCTGGTTCTGTTGAGGATGAAGATGGGGATGGGCGGCTTCGGCCCTCACCTGGGCTTCAGGGGGCTGGGGGCGCCGCTTGCCTTGGAAAGGTTTCTTCTTCAGGAAAACGGGCAAGAGAAGTGCATGGTGGGCAGGAGCAAGCCCTAGATAGCCCCAGAGGGATGCACAGGGATGGGAACACTCTGCCATCCCCCAGCAGCTGGGACCTGCAGCCAGAACTTGCAGCTCCACAGGGAACTCCGGGACCCTTGGGTGTGGAGAGGAGAGGGTCTGGGAAGGTTATAAACCAGGTATCTCTACATCAGGATGGCCATCTAGGAGGCGCTGGGCCTCCTGGGCACTGCCTGGTGGCTGATAGGACTTCAGAGGCTCTGCCCCTTTGTTGGCAGGGAGGCTTCCAGCCTGAGAGCACTCCCAGTTTGGATGCTGGACTTGCAGAGCTGGCTCCTCTGCAAGGACAAGGGTTAGAAAAGCAAGTCCTGGGATTGCAGAAAGGACAACAAACAGGGGGTCGTGGAGTGCTTCCTCAAGGGAAGGAGCCTTTAGCAGTGCCCTGGGAAGGCTCTTCAGGAGCCATGTGGGGAGATGACAGAGGTACCCCCATGGCTCAGAGTTATGATCAGAATCCTTCCCCTAGAGCAGCTGGGGAGAGGGACGATGTCTGTCTCAGCCCAGGAGTTTGGCTGAGCAGTGAGATGGATGCTGTAGGCTTGGAGCTGCCTGTACAAATAGAGGAGGTCATAGAGAGCTTCCAAGTTGAGAAGTGTGTAACTGAGTATCAGGAAGGCTGCCAGGGACTGGGCTCCAGGGGCAACATTTCCCTGGGTCCTGGAGAAACCCTAGTACCTGGGGATACGGAGAGCAGTGTGATTCCCTGTGGAGGCACAGTTGCGGCAGCTGCCCTAGAAAAGAGAAACTATTGCAGCTTGCCAGGACCTTTGAGGGCCAACAGCCCACCCTTGAGGTCCAAAGAAAATCAAGAACAGAGCTGTGAAACCGTAGGGCATCCCAGTGATCTGTGGGCAGAAGGTTGCTTCCCATTGCTAGAAAGTGGTGATTCCACACTGGGGTCTTCCAAAGAAACCCTTCCACCCACATGCCAAGGCAATCTCCTTATCATGGGGACTGAGGATGCCTCCTCCTTGCCTGAAGCCAGTCAAGAGGCAGGGAGCAGAGGCAATTCCTTTTCTCCTCTGTTGGAAACCATAGAACCTGTCAACATACTAGATGTTAAAGATGACTGTGGCCTCCAACTAAGGGTCAGCGAGGACACCTGCCCACTGAATGTTCATTCTTATGACCCCCAAGGAGAAGGCAGGGTGGATCCTGATCTGTCCAAGCCTAAAAACCTTGCTCCTTTACAAGAGAGTCAGGAGTCTTACACAACTGGGACTCCCAAAGCAACATCTTCTCACCAGGGCCTTGGAAGCACTTTGCCTAGAAGGGGAACCAGGAATGCCATAGTTCCGAGAGAAACTTCTGTTAGTAAAACACACAGGTCAGCAGACAGGGCCAAAGGAAAGGAGAAAAAGAAAAAGGAAGCAGAGGAAGAGGATGAGGAACTCTCCAACTTTGCTTACCTCTTGGCCTCTAAACTTAGCCTCTCACCAAGGGAGCATCCCCTCAGTCCTCACCATGCCTCAGGAGGTCAGGGCAGCCAGAGAGCATCCCACCTGCTCCCTGCTGGAGCAAAAGGCCCCAGCAAACTTCCATATCCTGTTGCCAAGTCTGGGAAGCGAGCTCTAGCTGGAGGTCCAGCCCCTACTGAAAAGACACCCCACTCAGGAGCTCAACTTGGGGTCCCCAGGGAGAAACCCCTAGCTCTGGGAGTAGTTCGACCCTCACAGCCTCGTAAAAGGCGGTGTGACAGTTTTGTCACGGGCAGAAGGAAGAAACGACGTCGTAGCCAGTAGGGAGCAGCGGGACCATCTGACCCCACTTGCCAGTCCCTAAAGGTGGGTGCCCCAGAGTAGATTCCACCCCTGCTGCCCACCAATGGAGAATCCCAATGTTGAATCTCATCCCAATGTTGTTTTGTTGTTCTGCAAAAGTGGCAAGCATGGAGAGAGAGGTCAGACTGGCTAGGCTGCAGGGGGAATTACCTTTGGAAGGAGCTATATAGAAAAAAAATGAATAAAGTGTTTTGTTGGAAAA"}, - {"NM_001244937.1", "AGTGTACAGCGGCGGCTGGGGCGGCAGGTGAGGCGGCTGGGGCGTTGCTGTCGTGCGTCCGCAGGCGTCAGGTGCTCAGACCCGAGGGCCGGGAAGGGATTTGGGTTTCACAGGAACCTGGGGCGGGGGTCCGCTATCTTGGGGCTGTCGGGACCGCTGCTTAAATTTGGCCCAGTCCAGACCTCGAGTCGGGCCCCCAGCCAGGCCCACGCCCAGGTCCAGGCCCAGGCCGGTAGGGATCCTCTAGGGTCCCAGCTCGCCTCGATGGAGCTCCTCCCGCCGCTGCCTCAGTCCTTCCTGTTGCTGCTGCTGTTGCCTGCCAAGCCCGCGGCGGGCGAGGACTGGCAGTGCCCGCGCACCCCCTACGCGGCCTCTCGCGACTTTGACGTGAAGTACGTGGTGCCCAGCTTCTCCGCCGGAGGCCTGGTACAGGCCATGGTGACCTACGAGGGCGACAGAAATGAGAGTGCTGTGTTTGTAGCCATACGCAATCGCCTGCATGTGCTTGGGCCTGACCTGAAGTCTGTCCAGAGCCTGGCCACGGGCCCTGCTGGAGACCCTGGCTGCCAGACGTGTGCAGCCTGTGGCCCAGGACCCCACGGCCCTCCCGGTGACACAGACACAAAGGTGCTGGTGCTGGATCCCGCGCTGCCTGCGCTGGTCAGTTGTGGCTCCAGCCTGCAGGGCCGCTGCTTCCTGCATGACCTAGAGCCCCAAGGGACAGCCGTGCATCTGGCAGCGCCAGCCTGCCTCTTCTCAGCCCACCATAACCGGCCCGATGACTGCCCCGACTGTGTGGCCAGCCCATTGGGCACCCGTGTAACTGTGGTTGAGCAAGGCCAGGCCTCCTATTTCTACGTGGCATCCTCACTGGACGCAGCCGTGGCTGCCAGCTTCAGCCCACGCTCAGTGTCTATCAGGCGTCTCAAGGCTGACGCCTCGGGATTCGCACCGGGCTTTGTGGCGTTGTCAGTGCTGCCCAAGCATCTTGTCTCCTACAGTATTGAATACGTGCACAGCTTCCACACGGGAGCCTTCGTATACTTCCTGACTGTACAGCCGGCCAGCGTGACAGATGATCCTAGTGCCCTGCACACACGCCTGGCACGGCTTAGCGCCACTGAGCCAGAGTTGGGTGACTATCGGGAGCTGGTCCTCGACTGCAGATTTGCTCCAAAACGCAGGCGCCGGGGGGCCCCAGAAGGCGGACAGCCCTACCCTGTGCTGCGGGTGGCCCACTCCGCTCCAGTGGGTGCCCAACTTGCCACTGAGCTGAGCATCGCCGAGGGCCAGGAAGTACTATTTGGGGTCTTTGTGACTGGCAAGGATGGTGGTCCTGGCGTGGGCCCCAACTCTGTCGTCTGTGCCTTCCCCATTGACCTGCTGGACACACTAATTGATGAGGGTGTGGAGCGCTGTTGTGAATCCCCAGTCCATCCAGGCCTCCGGCGAGGCCTCGACTTCTTCCAGTCGCCCAGTTTTTGCCCCAACCCGCCTGGCCTGGAAGCCCTCAGCCCCAACACCAGCTGCCGCCACTTCCCTCTGCTGGTCAGTAGCAGCTTCTCACGTGTGGACCTATTCAATGGGCTGTTGGGACCAGTACAGGTCACTGCATTGTATGTGACACGCCTTGACAACGTCACAGTGGCACACATGGGCACAATGGATGGGCGTATCCTGCAGGTGGAGCTGGTCAGGTCACTAAACTACTTGCTGTATGTGTCCAACTTCTCACTGGGTGACAGTGGGCAGCCCGTGCAGCGGGATGTCAGTCGTCTTGGGGACCACCTACTCTTTGCCTCTGGGGACCAGGTTTTCCAGGTACCTATCCAAGGCCCTGGCTGCCGCCACTTCCTGACCTGTGGGCGTTGCCTAAGGGCATGGCATTTCATGGGCTGTGGCTGGTGTGGGAACATGTGCGGCCAGCAGAAGGAGTGTCCTGGCTCCTGGCAACAGGACCACTGCCCACCTAAGCTTACTGAGTTCCACCCCCACAGTGGACCTCTAAGGGGCAGTACAAGGCTGACCCTGTGTGGCTCCAACTTCTACCTTCACCCTTCTGGTCTGGTGCCTGAGGGAACCCATCAGGTCACTGTGGGCCAAAGTCCCTGCCGGCCACTGCCCAAGGACAGCTCAAAACTCAGACCAGTGCCCCGGAAAGACTTTGTAGAGGAGTTTGAGTGTGAACTGGAGCCCTTGGGCACCCAGGCAGTGGGGCCTACCAACGTCAGCCTCACCGTGACTAACATGCCACCGGGCAAGCACTTCCGGGTAGACGGCACCTCCGTGCTGAGAGGCTTCTCTTTCATGGAGCCAGTGCTGATAGCAGTGCAACCCCTCTTTGGCCCACGGGCAGGAGGCACCTGTCTCACTCTTGAAGGCCAGAGTCTGTCTGTAGGCACCAGCCGGGCTGTGCTGGTCAATGGGACTGAGTGTCTGCTAGCACGGGTCAGTGAGGGGCAGCTTTTATGTGCCACACCCCCTGGGGCCACGGTGGCCAGTGTCCCCCTTAGCCTGCAGGTGGGGGGTGCCCAGGTACCTGGTTCCTGGACCTTCCAGTACAGAGAAGACCCTGTCGTGCTAAGCATCAGCCCCAACTGTGGCTACATCAACTCCCACATCACCATCTGTGGCCAGCATCTAACTTCAGCATGGCACTTAGTGCTGTCATTCCATGACGGGCTTAGGGCAGTGGAAAGCAGGTGTGAGAGGCAGCTTCCAGAGCAGCAGCTGTGCCGCCTTCCTGAATATGTGGTCCGAGACCCCCAGGGATGGGTGGCAGGGAATCTGAGTGCCCGAGGGGATGGAGCTGCTGGCTTTACACTGCCTGGCTTTCGCTTCCTACCCCCACCCCATCCACCCAGTGCCAACCTAGTTCCACTGAAGCCTGAGGAGCATGCCATTAAGTTTGAGGTCTGCGTAGATGGTGAATGTCATATCCTGGGTAGAGTGGTGCGGCCAGGGCCAGATGGGGTCCCACAGAGCACGCTCCTTGGTATCCTGCTGCCTTTGCTGCTGCTTGTGGCTGCACTGGCGACTGCACTGGTCTTCAGCTACTGGTGGCGGAGGAAGCAGCTAGTTCTTCCTCCCAACCTGAATGACCTGGCATCCCTGGACCAGACTGCTGGAGCCACACCCCTGCCTATTCTGTACTCGGGCTCTGACTACAGAAGTGGCCTTGCACTCCCTGCCATTGATGGTCTGGATTCCACCACTTGTGTCCATGGAGCATCCTTCTCCGATAGTGAAGATGAATCCTGTGTGCCACTGCTGCGGAAAGAGTCCATCCAGCTAAGGGACCTGGACTCTGCGCTCTTGGCTGAGGTCAAGGATGTGCTGATTCCCCATGAGCGGGTGGTCACCCACAGTGACCGAGTCATTGGCAAAGGCCACTTTGGAGTTGTCTACCACGGAGAATACATAGACCAGGCCCAGAATCGAATCCAATGTGCCATCAAGTCACTAAGTCGCATCACAGAGATGCAGCAGGTGGAGGCCTTCCTGCGAGAGGGGCTGCTCATGCGTGGCCTGAACCACCCGAATGTGCTGGCTCTCATTGGTATCATGTTGCCACCTGAGGGCCTGCCCCATGTGCTGCTGCCCTATATGTGCCACGGTGACCTGCTCCAGTTCATCCGCTCACCTCAGCGGAACCCCACCGTGAAGGACCTCATCAGCTTTGGCCTGCAGGTAGCCCGCGGCATGGAGTACCTGGCAGAGCAGAAGTTTGTGCACAGGGACCTGGCTGCGCGGAACTGCATGCTGGACGAGTCATTCACAGTCAAGGTGGCTGACTTTGGTTTGGCCCGCGACATCCTGGACAGGGAGTACTATAGTGTTCAACAGCATCGCCACGCTCGCCTACCTGTGAAGTGGATGGCGCTGGAGAGCCTGCAGACCTATAGATTTACCACCAAGTCTGATGTGTGGTCATTTGGTGTGCTGCTGTGGGAACTGCTGACACGGGGTGCCCCACCATACCGCCACATTGACCCTTTTGACCTTACCCACTTCCTGGCCCAGGGTCGGCGCCTGCCCCAGCCTGAGTATTGCCCTGATTCTCTGTACCAAGTGATGCAGCAATGCTGGGAGGCAGACCCAGCAGTGCGACCCACCTTCAGAGTACTAGTGGGGGAGGTGGAGCAGATAGTGTCTGCACTGCTTGGGGACCATTATGTGCAGCTGCCAGCAACCTACATGAACTTGGGCCCCAGCACCTCGCATGAGATGAATGTGCGTCCAGAACAGCCGCAGTTCTCACCCATGCCAGGGAATGTACGCCGGCCCCGGCCACTCTCAGAGCCTCCTCGGCCCACTTGACTTAGTTCTTGGGCTGGACCTGCTTAGCTGCCTTGAGCTAACCCCAAGCTGCCTCTGGGCCATGCCAGGCCAGAGGGCAGTGGCCCTCCACCTTGTTCCTGCCCTTTAACTTTCAGAGGCAATAGGTAAATGGGGCCCATTAGGTCCCTCACTCCACAGAGTGAGCCAGTGAGGGCAGTCCTGCAACATGTATTTATGGAGTGCCTGCTGTGGACCCTGTCTTCTGGGCACAGTGGACTCAGCAGTGACCACACCAACACTGACCCTTGAACCAATAAAGGAACAAATGACTATTAAAGCACAAAAAAAAAAAAAAAA"}, - {"NM_001220774.1", "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGTTGGTAAACCTCACAAATGTGGATATTGTGGCCGAAGCTATAAACAGCGAAGCTCTTTAGAGGAACATAAAGAGCGCTGCCACAACTACTTGGAAAGCATGGGCCTTCCGGGCACACTGTACCCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"}, - {"NM_001220766.1", "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTTGGTAAACCTCACAAATGTGGATATTGTGGCCGAAGCTATAAACAGCGAAGCTCTTTAGAGGAACATAAAGAGCGCTGCCACAACTACTTGGAAAGCATGGGCCTTCCGGGCACACTGTACCCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"}, - {"NM_001278433.1", "GAGCTGTGGTGGGCTCCACCCAGTTCGAGCTTCCCGGCTGCTTTGGTTACCTAATCAAGCCTGGGCAATGGCAGGCGCCCCTCCCCCAGCCTCGCTGCCGCCTTGCAGTTTGATCTCAGACTGCTGTGCTAGCAATCAGCGAGACTCCGTGGGCGTAGGACCCTCCGAGCCAGAGAACCATGGAGTCTGGCAGTACCGCCGCCAGTGAGGAGGCACGCAGCCTTCGAGAATGTGAGCTCTACGTCCAGAAGCATAACATTCAAGCGCTGCTCAAAGATTCTATTGTGCAGTTGTGCACTGCTCGACCTGAGAGACCCATGGCATTCCTCAGGGAATACTTTGAGAGGTTGGAGAAGGAGGAGGCAAAACAGATTCAGAATCTGCAGAAAGCAGGCACTCGTACAGACTCAAGGGAGGATGAGATTTCTCCTCCTCCACCCAACCCAGTGGTTAAAGGTAGGAGGCGACGAGGTGCTATCAGCGCTGAGGTCTACACGGAGGAAGATGCGGCATCCTATGTTAGAAAGGTTATACCAAAAGATTACAAGACAATGGCCGCTTTAGCCAAAGCCATTGAAAAGAATGTGCTGTTTTCACATCTTGATGATAATGAGAGAAGTGATATTTTTGATGCCATGTTTTCGGTCTCCTTTATCGCAGGAGAGACTGTGATTCAGCAAGGTGATGAAGGGGATAACTTCTATGTGATTGATCAAGGAGAGACGGATGTCTATGTTAACAATGAATGGGCAACCAGTGTTGGGGAAGGAGGGAGCTTTGGAGAACTTGCTTTGATTTATGGAACACCGAGAGCAGCCACTGTCAAAGCAAAGACAAATGTGAAATTGTGGGGCATCGACCGAGACAGCTATAGAAGAATCCTCATGGGAAGCACACTGAGAAAGCGGAAGATGTATGAGGAATTCCTTAGTAAAGTCTCTATTTTAGAGTCTCTGGACAAGTGGGAACGTCTTACGGTAGCTGATGCATTGGAACCAGTGCAGTTTGAAGATGGGCAGAAGATTGTGGTGCAGGGAGAACCAGGGGATGAGTTCTTCATTATTTTAGAGGGGTCAGCTGCTGTGCTACAACGTCGGTCAGAAAATGAAGAGTTTGTTGAAGTGGGAAGATTGGGGCCTTCTGATTATTTTGGTGAAATTGCACTACTGATGAATCGTCCTCGTGCTGCCACAGTTGTTGCTCGTGGCCCCTTGAAGTGCGTTAAGCTGGACCGACCTAGATTTGAACGTGTTCTTGGCCCATGCTCAGACATCCTCAAACGAAACATCCAGCAGTACAACAGTTTTGTGTCACTGTCTGTCTGAAATCTGCCTCCTGTGCCTCCCTTTTCTCCTCTCCCCAATCCATGCTTCACTCATGCAAACTGCTTTATTTTCCCTACTTGCAGCGCCAAGTGGCCACTGGCATCGCAGCTTCCTGTCTGTTTATATATTGAAAGTTGCTTTTATTGCACCATTTTCAATTTGGAGCATTAACTAAATGCTCATACACAGTTAAATAAATAGAAAGAGTTCTATGGAGACTTTGCTGTTACTGCTTCTCTTTGTGCAGTGTTAGTATTCACCCTGGGCAGTGAGTGCCATGCTTTTTGGTGAGGGCAGATCCCAGCACCTATTGAATTACCATAGAGTAATGATGTAACAGTGCAAGATTTTTTTTTTAAGTGACATAATTGTCCAGTTATAAGCGTATTTAGACTGTGGCCATATATGCTGTATTTCTTTGTAGAATAAATGGTTTCTCATTAAACTCTAAAGATTAGGGAAAATGGATATAGAAAATCTTAGTATAGTAGAAAGACATCTGCCTGTAATTAAACTAGTTTAAGGGTGGAAAAATGCCCATTTTTGCTAATTATCAATGGGATATGATTGGTTCAGTTTTTTTTTTTCCAGAGTTGTTGTTTGCCAAGCTAATCTGCCTGGTTTTATTTATATCTTGTTATTAATGTTTCTTCTCCAATTCTGAAATACTTTTGAGTATGGCTATCTATACCTGCCTTTTAAGTTTGAAACTAACTCATAGATTGCAAATATTGGTTAGTATTTAACTACATCTGCCTCGGCTCACAAATTCCGATTAGACCTTTATCCAGCTAGTGCCAAATAATTGATCAGATGCTGAATTGAGAATAAGAATTTGAGGTCTACATTCTTGGTTGTTAATTTAGAGCGTTTGGTTAAAGTATGTCCTTCAGCTGACTCCAGTATAATCTCCTCTGCTCATTAAACTGATTCCAGGAGATTGGATTTGCTGTGACTAGATACAGATGGAGCAAATGTCCTAACAGAGAAATAGAGGTGATGCTGCTAAAGGGAGAAATGCCAGGCGGACAAAGTTCAGTGTCGGGAATTTTCCCCGTGACATTCACTGGGGCATGAGATTTTGGAAGAAGTTTTTTACTTTGGTTTAGTCTTTTTTTCCTTCCTTTTTATTCAGCTAGAATTTCTGGTGGGTTGATGGTAGGGTATAATGTGTCTGTGTTGCTTCAAATTGGTCTGAAAGGCTATCCTGCTGAAAGTCCTGCTTTCCTATCTAGCATTTATTTCTCTGGCAAACTTTTCTTTCTTTTCTTTTTTAAAGTAAACTTGTGTATTGAGTCTTAACTGTATTTCAGTATTTTCCAGCCTTATGTGTTACATTATTCCAATGATACCCAACAGTTTATTTTTATTATTTTTTTAAACAAAATTTCACAGTTCTGTAATGTAGGCACTTTTATTTTCATTGTGATTTATATATAAGGTAATGTAGGGTTATATTTGGGAGTGACTGCAAGCATTTTTCCATCTGTGTGCAACTAACTGACTCTGTTATTGATCCCTTCTCCTGCCCTTTCCCAGGTAATTTAAATTGGTCATGGTAGATTTTTTTCATAGATTTGAAAAACTTTTAGGTTGTTACCAAGTATGAAGTATAAATCTGGGGAAGAGGTTTTATTTACATTTTAGGGTGGGTAAGAAAGCCACCTTGTTACAAATTTTTTAATTTCCAAAATAATCTATATTAAATGAGGGTTTCTGATCTGTACTTTGTGTTTAGCTACCTTTTTATATTTAAAAAATTAAAAATGAAAATTACGTTCTTACAAGCTTAAAGCTTGATTTGATCTTTGTTTAAATGCCAAAATGTACTTAAATGAGTTACTTAGAATGCCATAAAATTGCAGTTTCATGTATGTATATAATCATGCTCATGTATATTTAGTTACGTATAATGCTTTCTGAGTGAGTTTTACTCTTAAATCATTTGGTTAAATCATTTGGCTTGCTGTTTACTCCCTTCTGTAGTTTTTAATTAAAAACTTTAAAGATAAGTCTACATTAAACAATGATCACATCTAAAGCTTTATCTTTGTGTAATCTAAGTATATGTGAGAAATCAGAATTGGCATAATTTGTCTTAGTTGATATTCAAGGCTTTAAAAGTCATTATTCCTGGGCTTGGTAAGTGAATTTATGAGATTTACTGCTCTAGAAAGTATAGATGGCCAAAGGACCGTTTTGTATTGCTTCCTGATTACCAGTCTGATTATACCATGTGTGCTAATATACTTTTTTTGTTATAGATTGTCTTAATGGTAGGTCAAGTAATAAAAAGAGATGAAATAATTTAAATTCTTAAATGAATCAGTTTTTCTTCCCTTTCTCCTTTCCGTCTTTCCTCTCTCTGTCCTTCCCCGAAAGTCTACTCGGGTGGGCAAAAATGAAAAGGGGGAAAGTGAATTATGGGATCGGTGTTTTGAAAGAGCAATGTTTATTTTCAGTGCTTTTCAGTTTGTCAAAGAGTGGATCTCAAAATCTTGCTTAAAGGGTAATTGAGATGTAGCAGATTTATTTACTTAGTCATGGAAAGAAAAAAATTCAGTCAAAAGCTAAAGATTTCCTTTTGATTGAAGACAGATTGGTTCTGTGGCCTTGGAACTTTCCCAGACTTAATGGGGAAACATCATTTCTAGATTAGCATACTCTTTGGTTTAAATTTAATATATACATTTAATGTTACTTAGGGATACTTTTATATTTTGCATATATAAAGCCTCATATATAAAGCCTTATTTCTGATGCTCTTAGATTTCTGAGGAGTGAGATGATTAAGTTGTATTCATTAGTGTATTGGTATTTCTTCACATCCAGTGAAATTGGAGATATGTTGTATGTTAGAAGAGCATTCTTTAAATTGTGTTGCTTTGAACATGTGTACCTTTTCTAGATTCAGTAATCCCTTCCCCCCGTCCTCTGGAGTATGAAACCTTTAGAGTCACAATAAAATGTAACTAAAGAAAAAAAAAAAAAAAAAAAAAAAAA"}, - {"NM_001220769.1", "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"}, - {"NM_001220776.1", "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"}, - {"NM_001260.1", "GGGCTCCGGCCTCAGAGGCTGTGACAATGGACTATGACTTTAAAGTGAAGCTGAGCAGCGAGCGGGAGCGGGTCGAGGACCTGTTTGAATACGAGGGCTGCAAAGTTGGCCGAGGCACTTATGGTCACGTCTACAAAGCCAAGAGGAAAGATGGGAAGGATGATAAAGACTATGCTTTAAAACAAATAGAAGGAACTGGGATCTCTATGTCGGCATGTAGAGAAATAGCATTACTTCGAGAGCTTAAGCATCCAAACGTCATTTCTCTTCAAAAGGTGTTTCTGTCTCATGCTGATAGGAAGGTGTGGCTTCTGTTTGACTATGCTGAACATGACCTCTGGCATATAATCAAGTTTCACAGAGCTTCTAAAGCAAACAAGAAGCCAGTTCAGTTACCTCGGGGAATGGTGAAGTCACTATTATATCAGATCCTAGATGGTATTCACTACCTGCATGCTAACTGGGTGTTGCACAGAGATTTGAAACCTGCTAATATTTTAGTTATGGGTGAAGGTCCTGAGCGAGGAAGAGTAAAAATTGCTGACATGGGCTTTGCCCGATTATTTAATTCACCTTTGAAGCCTTTAGCAGATTTGGATCCAGTGGTTGTTACATTCTGGTACCGAGCCCCTGAACTACTTCTTGGAGCAAGGCATTATACCAAAGCTATTGATATTTGGGCTATAGGGTGTATATTTGCAGAACTACTAACGTCAGAACCAATATTTCACTGTCGACAAGAGGACATCAAAACTAGTAATCCTTATCACCATGACCAGCTGGACAGAATATTCAATGTAATGGGATTTCCTGCAGATAAAGATTGGGAAGATATAAAAAAGATGCCTGAACATTCAACATTAATGAAAGATTTCAGAAGAAATACGTATACCAACTGCAGCCTTATCAAGTATATGGAAAAACATAAAGTTAAACCAGATAGTAAAGCATTCCACTTGCTTCAGAAGCTGCTTACCATGGACCCAATAAAGCGAATTACCTCAGAACAGGCTATGCAGGACCCCTATTTCTTAGAAGACCCACTTCCTACATCAGACGTTTTTGCCGGTTGTCAAATCCCTTACCCAAAACGAGAATTTTTAACGGAAGAAGAACCTGATGACAAAGGAGACAAAAAGAACCAGCAGCAGCAGCAGGGCAATAACCACACTAATGGAACTGGCCACCCAGGGAATCAAGACAGCAGTCACACACAGGGACCCCCGTTGAAGAAAGTGAGAGTTGTTCCTCCTACCACTACCTCAGGTGGACTTATCATGACCTCAGACTATCAGCGTTCCAATCCACATGCTGCCTATCCCAACCCTGGACCAAGCACATCACAGCCGCAGAGCAGCATGGGATACTCAGCTACCTCCCAGCAGCCTCCACAGTACTCACATCAGACACATCGGTACTGAGCTGCATCGGAATCTTGTCCATGCACTGTTGCGAATGCTGCAGGGCTGACTGTGCAGCTCTCTGCGGGAACCTGGTATGGGCCATGAGAATGTACTGTACAACCACATCTTCAAAATGTCCAGTAGCCAAGTTCCACCACTTTTCACAGATTGGGGTAGTGGCTTCCAAGTTGTACCTATTTTGGAGTTAGACTTGAAAAGAAAGTGCTAGCACAGTTTGTGTTGTGGATTTGCTACTTCCATAGTTTACTTGACATGGTTCAGACTGACCAATGCATTTTTTTCAGTGACAGTCTGTAGCAGTTGAAGCTGTGAATGTGCTAGGGGCAAGCATTTGTCTTTGTATGTGGT"}, - {"NM_001220772.1", "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"}, - {"NM_019063.3", "GGCGCGGCGCTCGCGGCTGCTGCCTGGGAGGGAGGCCGGGCAGGCGGCTGAGCGGCGCGGCTCTCAACGTGACGGGGAAGTGGTTCGGGCGGCCGCGGCTTACTACCCCAGGGCGAACGGACGGACGACGGAGGCGGGAGCCGGTAGCCGAGCCGGGCGACCTAGAGAACGAGCGGGTCAGGCTCAGCGTCGGCCACTCTGTCGGTCCGCTGAATGAAGTGCCCGCCCCTCTAAGCCCGGAGCCCGGCGCTTTCCCCGCAAGATGGACGGTTTCGCCGGCAGTCTCGATGATAGTATTTCTGCTGCAAGTACTTCTGATGTTCAAGATCGCCTGTCAGCTCTTGAGTCACGAGTTCAGCAACAAGAAGATGAAATCACTGTGCTAAAGGCGGCTTTGGCTGATGTTTTGAGGCGTCTTGCAATCTCTGAAGATCATGTGGCCTCAGTGAAAAAATCAGTCTCAAGTAAAGGCCAACCAAGCCCTCGAGCAGTTATTCCCATGTCCTGTATAACCAATGGAAGTGGTGCAAACAGAAAACCAAGTCATACCAGTGCTGTCTCAATTGCAGGAAAAGAAACTCTTTCATCTGCTGCTAAAAGTGGTACAGAAAAAAAGAAAGAAAAACCACAAGGACAGAGAGAAAAAAAAGAGGAATCTCATTCTAATGATCAAAGTCCACAAATTCGAGCATCACCTTCTCCCCAGCCCTCTTCACAACCTCTCCAAATACACAGACAAACTCCAGAAAGCAAGAATGCTACTCCCACCAAAAGCATAAAACGACCATCACCAGCTGAAAAGTCACATAATTCTTGGGAAAATTCAGATGATAGCCGTAATAAATTGTCGAAAATACCTTCAACACCCAAATTAATACCAAAAGTTACCAAAACTGCAGACAAGCATAAAGATGTCATCATCAACCAAGAAGGAGAATATATTAAAATGTTTATGCGCGGTCGGCCAATTACCATGTTCATTCCTTCCGATGTTGACAACTATGATGACATCAGAACGGAACTGCCTCCTGAGAAGCTCAAACTGGAGTGGGCATATGGTTATCGAGGAAAGGACTGTAGAGCTAATGTTTACCTTCTTCCGACCGGGGAAATAGTTTATTTCATTGCATCAGTAGTAGTACTATTTAATTATGAGGAGAGAACTCAGCGACACTACCTGGGCCATACAGACTGTGTGAAATGCCTTGCTATACATCCTGACAAAATTAGGATTGCAACTGGACAGATAGCTGGCGTGGATAAAGATGGAAGGCCTCTACAACCCCACGTCAGAGTGTGGGATTCTGTTACTCTATCCACACTGCAGATTATTGGACTTGGCACTTTTGAGCGTGGAGTAGGATGCCTGGATTTTTCAAAAGCAGATTCAGGTGTTCATTTATGTGTTATTGATGACTCCAATGAGCATATGCTTACTGTATGGGACTGGCAGAAGAAAGCAAAAGGAGCAGAAATAAAGACAACAAATGAAGTTGTTTTGGCTGTGGAGTTTCACCCAACAGATGCAAATACCATAATTACATGCGGTAAATCTCATATTTTCTTCTGGACCTGGAGCGGCAATTCACTAACAAGAAAACAGGGAATTTTTGGGAAATATGAAAAGCCAAAATTTGTGCAGTGTTTAGCATTCTTGGGGAATGGAGATGTTCTTACTGGAGACTCAGGTGGAGTCATGCTTATATGGAGCAAAACTACTGTAGAGCCCACACCTGGGAAAGGACCTAAAGGTGTATATCAAATCAGCAAACAAATCAAAGCTCATGATGGCAGTGTGTTCACACTTTGTCAGATGAGAAATGGGATGTTATTAACTGGAGGAGGGAAAGACAGAAAAATAATTCTGTGGGATCATGATCTGAATCCTGAAAGAGAAATAGAGGTTCCTGATCAGTATGGCACAATCAGAGCTGTAGCAGAAGGAAAGGCAGATCAATTTTTAGTAGGCACATCACGAAACTTTATTTTACGAGGAACATTTAATGATGGCTTCCAAATAGAAGTACAGGGTCATACAGATGAGCTTTGGGGTCTTGCCACACATCCCTTCAAAGATTTGCTCTTGACATGTGCTCAGGACAGGCAGGTGTGCCTGTGGAACTCAATGGAACACAGGCTGGAATGGACCAGGCTGGTAGATGAACCAGGACACTGTGCAGATTTTCATCCAAGTGGCACAGTGGTGGCCATAGGAACGCACTCAGGCAGGTGGTTTGTTCTGGATGCAGAAACCAGAGATCTAGTTTCTATCCACACAGACGGGAATGAACAGCTCTCTGTGATGCGCTACTCAATAGATGGTACCTTCCTGGCTGTAGGATCTCATGACAACTTTATTTACCTCTATGTAGTCTCTGAAAATGGAAGAAAATATAGCAGATATGGAAGGTGCACTGGACATTCCAGCTACATCACACACCTTGACTGGTCCCCAGACAACAAGTATATAATGTCTAACTCGGGAGACTATGAAATATTGTACTGGGACATTCCAAATGGCTGCAAACTAATCAGGAATCGATCGGATTGTAAGGACATTGATTGGACGACATATACCTGTGTGCTAGGATTTCAAGTATTTGGTGTCTGGCCAGAAGGATCTGATGGGACAGATATCAATGCACTGGTGCGATCCCACAATAGAAAGGTGATAGCTGTTGCCGATGACTTTTGTAAAGTCCATCTGTTTCAGTATCCCTGCTCCAAAGCAAAGGCTCCCAGTCACAAGTACAGTGCCCACAGCAGCCATGTCACCAATGTCAGTTTTACTCACAATGACAGTCACCTGATATCAACTGGTGGAAAAGACATGAGCATCATTCAGTGGAAACTTGTGGAAAAGTTATCTTTGCCTCAGAATGAGACTGTAGCGGATACTACTCTAACCAAAGCCCCCGTCTCTTCCACTGAAAGTGTCATCCAATCTAATACTCCCACACCGCCTCCTTCTCAGCCCTTAAATGAGACAGCTGAAGAGGAAAGTAGAATAAGCAGTTCTCCCACACTTCTGGAGAACAGCCTGGAACAAACTGTGGAGCCAAGTGAAGACCACAGCGAGGAGGAGAGTGAAGAGGGCAGCGGAGACCTTGGTGAGCCTCTTTATGAAGAGCCATGCAACGAGATAAGCAAGGAGCAGGCCAAAGCCACCCTTCTGGAGGACCAGCAAGACCCTTCGCCCTCGTCCTAACACCCTGGCTTCAGTGCAACTCTTTTCCTTCAGCTGCATGTGATTTTGTGATAAAGTTCAGGTAACAGGATGGGCAGTGATGGAGAATCACTGTTGATTGAGATTTTGGTTTCCATGTGATTTGTTTTCTTCAATAGTCTTATTTTCAGTCTCTCAAATACAGCCAACTTAAAGTTTTAGTTTGGTGTTTATTGAAAATTAACCAAACTTAATACTAGGAGAAGACTGAATCATTAATGATGTCTCACAAATTACTGTGTACCTAAGTGGTGTGATGTAAATACTGGAAACAAAAACAGCAGTTGCATTGATTTTGAAAACAAACCCCCTTGTTATCTGAACATGTTTTCTTCAGGAACAACCAGAGGTATCACAAACACTGTTACTCATCTACTGGCTCAGACTGTACTACTTTTTTTTTTTTTTTTCCTGAAAAAGAAACCAGAAAAAAATGTACTCTTACTGAGATACCCTCTCACCCCAAATGTGTAATGGAAAATTTTTAATTAAGAAAAACTTCAGTTTTGCCAAGTGCAATGGTGTTGCCTTCTTTAAAAAATGCCGTTTTCTTACACTACCAGTGGATGTCCAGACATGCTCTTAGTCTACTAGAGAGGTGCTGCCTTTTCTAAGTCATAATGAGGAACAGTCCCTTAATTTCTTGTGTGCAACTCTGTTTTATCCTAGAACTAAGAGAGCATTGGTTTGTTAAAGAGCTTTCAATGTATATTAAAACCTTCAATACTCAGAAATGATGGATTCCTCCAAGGAGTCCTTTACTAGCCTAAACATTCTCAAATGTTTGAGATTCAAGTGAATGGAAGGAAAACCACATGCCTTTAAAACTAAACTGTAATAATTACCTGGCTAATTTCAGCTAAGCCTTCATCATAATTTGTTCCCTCAGTAATAGGAGAAATATAAATACAGTAAGTTTAGATTATTGAATTGGTGCTTGAAATTTATTGGTTTTGTTGTAATTTTATACAGATTATATGAGGGATAAGATACTCATCAAATTGCAAATTCTTTTTTTTACAGAAGTGTGGGTAACAGTCACAGCAGTTTTTTTTACCAACAGCATACTTAACAGACTTGCTGTGTAGCAGTTTTTTTCTGGTGGAGTTGCTGTAAGTCTTGTAAGTCTAATGTGGCTATCCTACTCTTTTGGGCAATGCATGTATTATGCATTGGAAAGGTATTTTTTTTAAGTTCTGTTGGCTAGCTATGGTTTTCAGTACATTTCCTACTTTAAGAGTAATTACTGACAAATATGTATTTCCTATATGTTTATACTTTGATTATAAAAAAGTATTTTGTTTTGATTTTTTAACTTGCTGCATTGTTTTGATACTTTCTATTTTTTTGGTCAAATCATGTTTAGAAACTTTGGATGAGTTAAGAAGTCTTAAGTATGCAGGCGTTTACGTGATTGTGCCATTCCAAAGTGCATCAGAACTGTCATTCCCTTCTAATATCTTCTCAGGAGTAATACAAATCAGGTATTTCATCATCATTTGGTAATATGAAAACTCCAGTGAACTCCCAAGGACATTTACAACATTTATATTCACACGCTGTATGGAAGGGTGTGGGTGTGTGTGAAGGGGCGAGTGGAGACACTGTGTGTATCTCTAGATAAGAAGATATGCACCACGTTGAAAATACTCAGTGTAGATCTCTATGTGTATAGGTATCTGTATATCTTTCCTTTTGTTTACAACTGTTAAAAAACCTCAAAATAGTTCTCTTCAAAAGAAGAGAGATTCCAAGCAACCCATCTTTCTTCAGTATGTATGTTCTGTACATACTTATCGGAGCGCGCCAGTAAGTATCAGGCATATATATCTGTCTGTTAGCAATGATTATTACATCATCAGATCAGCATGTGCTATACTCCCTGCAAGAAATATACTGACATGAACAGGCAGTTCTTGGAGAAGAAAGAGCATTTCTTTAAGTACCTGGGGAATACAGCTCTCAGTGATCAGCAGGGAGTTTATTTGAGGACATCAGTCACCTTTGGGGTTGCCATGTACAATGAGATTTATAATCATGATACTCTTCGGTGGTAGTTTCAAAAGACACTACTAATACGCAGGAAGCGTTCCAGCTATTTAATGCTGGCAACTACTGTTTAATGGTCAGTTAAATCTGTGATAATGGTTGGAAGTGGGTGGGGTTATGAAATTGTAGATGTTTTTAGAAAAACTTGTGAATGAAAATGAATCCAAGTGTTTCATGTGAAGATGTTGAGCCATTGCTATCATGCATTCCTGTCTCATGGCAGAAAATTTTGAAGATTAAAAAATAAAATAATCAAAATGTTTCCTCTTTCTAAAAAAAAAAAAAAAAAA"}, - {"NM_001220771.1", "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGCCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"}, - {"NM_000314.4", "CCTCCCCTCGCCCGGCGCGGTCCCGTCCGCCTCTCGCTCGCCTCCCGCCTCCCCTCGGTCTTCCGAGGCGCCCGGGCTCCCGGCGCGGCGGCGGAGGGGGCGGGCAGGCCGGCGGGCGGTGATGTGGCGGGACTCTTTATGCGCTGCGGCAGGATACGCGCTCGGCGCTGGGACGCGACTGCGCTCAGTTCTCTCCTCTCGGAAGCTGCAGCCATGATGGAAGTTTGAGAGTTGAGCCGCTGTGAGGCGAGGCCGGGCTCAGGCGAGGGAGATGAGAGACGGCGGCGGCCGCGGCCCGGAGCCCCTCTCAGCGCCTGTGAGCAGCCGCGGGGGCAGCGCCCTCGGGGAGCCGGCCGGCCTGCGGCGGCGGCAGCGGCGGCGTTTCTCGCCTCCTCTTCGTCTTTTCTAACCGTGCAGCCTCTTCCTCGGCTTCTCCTGAAAGGGAAGGTGGAAGCCGTGGGCTCGGGCGGGAGCCGGCTGAGGCGCGGCGGCGGCGGCGGCACCTCCCGCTCCTGGAGCGGGGGGGAGAAGCGGCGGCGGCGGCGGCCGCGGCGGCTGCAGCTCCAGGGAGGGGGTCTGAGTCGCCTGTCACCATTTCCAGGGCTGGGAACGCCGGAGAGTTGGTCTCTCCCCTTCTACTGCCTCCAACACGGCGGCGGCGGCGGCGGCACATCCAGGGACCCGGGCCGGTTTTAAACCTCCCGTCCGCCGCCGCCGCACCCCCCGTGGCCCGGGCTCCGGAGGCCGCCGGCGGAGGCAGCCGTTCGGAGGATTATTCGTCTTCTCCCCATTCCGCTGCCGCCGCTGCCAGGCCTCTGGCTGCTGAGGAGAAGCAGGCCCAGTCGCTGCAACCATCCAGCAGCCGCCGCAGCAGCCATTACCCGGCTGCGGTCCAGAGCCAAGCGGCGGCAGAGCGAGGGGCATCAGCTACCGCCAAGTCCAGAGCCATTTCCATCCTGCAGAAGAAGCCCCGCCACCAGCAGCTTCTGCCATCTCTCTCCTCCTTTTTCTTCAGCCACAGGCTCCCAGACATGACAGCCATCATCAAAGAGATCGTTAGCAGAAACAAAAGGAGATATCAAGAGGATGGATTCGACTTAGACTTGACCTATATTTATCCAAACATTATTGCTATGGGATTTCCTGCAGAAAGACTTGAAGGCGTATACAGGAACAATATTGATGATGTAGTAAGGTTTTTGGATTCAAAGCATAAAAACCATTACAAGATATACAATCTTTGTGCTGAAAGACATTATGACACCGCCAAATTTAATTGCAGAGTTGCACAATATCCTTTTGAAGACCATAACCCACCACAGCTAGAACTTATCAAACCCTTTTGTGAAGATCTTGACCAATGGCTAAGTGAAGATGACAATCATGTTGCAGCAATTCACTGTAAAGCTGGAAAGGGACGAACTGGTGTAATGATATGTGCATATTTATTACATCGGGGCAAATTTTTAAAGGCACAAGAGGCCCTAGATTTCTATGGGGAAGTAAGGACCAGAGACAAAAAGGGAGTAACTATTCCCAGTCAGAGGCGCTATGTGTATTATTATAGCTACCTGTTAAAGAATCATCTGGATTATAGACCAGTGGCACTGTTGTTTCACAAGATGATGTTTGAAACTATTCCAATGTTCAGTGGCGGAACTTGCAATCCTCAGTTTGTGGTCTGCCAGCTAAAGGTGAAGATATATTCCTCCAATTCAGGACCCACACGACGGGAAGACAAGTTCATGTACTTTGAGTTCCCTCAGCCGTTACCTGTGTGTGGTGATATCAAAGTAGAGTTCTTCCACAAACAGAACAAGATGCTAAAAAAGGACAAAATGTTTCACTTTTGGGTAAATACATTCTTCATACCAGGACCAGAGGAAACCTCAGAAAAAGTAGAAAATGGAAGTCTATGTGATCAAGAAATCGATAGCATTTGCAGTATAGAGCGTGCAGATAATGACAAGGAATATCTAGTACTTACTTTAACAAAAAATGATCTTGACAAAGCAAATAAAGACAAAGCCAACCGATACTTTTCTCCAAATTTTAAGGTGAAGCTGTACTTCACAAAAACAGTAGAGGAGCCGTCAAATCCAGAGGCTAGCAGTTCAACTTCTGTAACACCAGATGTTAGTGACAATGAACCTGATCATTATAGATATTCTGACACCACTGACTCTGATCCAGAGAATGAACCTTTTGATGAAGATCAGCATACACAAATTACAAAAGTCTGAATTTTTTTTTATCAAGAGGGATAAAACACCATGAAAATAAACTTGAATAAACTGAAAATGGACCTTTTTTTTTTTAATGGCAATAGGACATTGTGTCAGATTACCAGTTATAGGAACAATTCTCTTTTCCTGACCAATCTTGTTTTACCCTATACATCCACAGGGTTTTGACACTTGTTGTCCAGTTGAAAAAAGGTTGTGTAGCTGTGTCATGTATATACCTTTTTGTGTCAAAAGGACATTTAAAATTCAATTAGGATTAATAAAGATGGCACTTTCCCGTTTTATTCCAGTTTTATAAAAAGTGGAGACAGACTGATGTGTATACGTAGGAATTTTTTCCTTTTGTGTTCTGTCACCAACTGAAGTGGCTAAAGAGCTTTGTGATATACTGGTTCACATCCTACCCCTTTGCACTTGTGGCAACAGATAAGTTTGCAGTTGGCTAAGAGAGGTTTCCGAAGGGTTTTGCTACATTCTAATGCATGTATTCGGGTTAGGGGAATGGAGGGAATGCTCAGAAAGGAAATAATTTTATGCTGGACTCTGGACCATATACCATCTCCAGCTATTTACACACACCTTTCTTTAGCATGCTACAGTTATTAATCTGGACATTCGAGGAATTGGCCGCTGTCACTGCTTGTTGTTTGCGCATTTTTTTTTAAAGCATATTGGTGCTAGAAAAGGCAGCTAAAGGAAGTGAATCTGTATTGGGGTACAGGAATGAACCTTCTGCAACATCTTAAGATCCACAAATGAAGGGATATAAAAATAATGTCATAGGTAAGAAACACAGCAACAATGACTTAACCATATAAATGTGGAGGCTATCAACAAAGAATGGGCTTGAAACATTATAAAAATTGACAATGATTTATTAAATATGTTTTCTCAATTGTAACGACTTCTCCATCTCCTGTGTAATCAAGGCCAGTGCTAAAATTCAGATGCTGTTAGTACCTACATCAGTCAACAACTTACACTTATTTTACTAGTTTTCAATCATAATACCTGCTGTGGATGCTTCATGTGCTGCCTGCAAGCTTCTTTTTTCTCATTAAATATAAAATATTTTGTAATGCTGCACAGAAATTTTCAATTTGAGATTCTACAGTAAGCGTTTTTTTTCTTTGAAGATTTATGATGCACTTATTCAATAGCTGTCAGCCGTTCCACCCTTTTGACCTTACACATTCTATTACAATGAATTTTGCAGTTTTGCACATTTTTTAAATGTCATTAACTGTTAGGGAATTTTACTTGAATACTGAATACATATAATGTTTATATTAAAAAGGACATTTGTGTTAAAAAGGAAATTAGAGTTGCAGTAAACTTTCAATGCTGCACACAAAAAAAAGACATTTGATTTTTCAGTAGAAATTGTCCTACATGTGCTTTATTGATTTGCTATTGAAAGAATAGGGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTAAATGTGCAGTGTTGAATCATTTCTTCATAGTGCTCCCCCGAGTTGGGACTAGGGCTTCAATTTCACTTCTTAAAAAAAATCATCATATATTTGATATGCCCAGACTGCATACGATTTTAAGCGGAGTACAACTACTATTGTAAAGCTAATGTGAAGATATTATTAAAAAGGTTTTTTTTTCCAGAAATTTGGTGTCTTCAAATTATACCTTCACCTTGACATTTGAATATCCAGCCATTTTGTTTCTTAATGGTATAAAATTCCATTTTCAATAACTTATTGGTGCTGAAATTGTTCACTAGCTGTGGTCTGACCTAGTTAATTTACAAATACAGATTGAATAGGACCTACTAGAGCAGCATTTATAGAGTTTGATGGCAAATAGATTAGGCAGAACTTCATCTAAAATATTCTTAGTAAATAATGTTGACACGTTTTCCATACCTTGTCAGTTTCATTCAACAATTTTTAAATTTTTAACAAAGCTCTTAGGATTTACACATTTATATTTAAACATTGATATATAGAGTATTGATTGATTGCTCATAAGTTAAATTGGTAAAGTTAGAGACAACTATTCTAACACCTCACCATTGAAATTTATATGCCACCTTGTCTTTCATAAAAGCTGAAAATTGTTACCTAAAATGAAAATCAACTTCATGTTTTGAAGATAGTTATAAATATTGTTCTTTGTTACAATTTCGGGCACCGCATATTAAAACGTAACTTTATTGTTCCAATATGTAACATGGAGGGCCAGGTCATAAATAATGACATTATAATGGGCTTTTGCACTGTTATTATTTTTCCTTTGGAATGTGAAGGTCTGAATGAGGGTTTTGATTTTGAATGTTTCAATGTTTTTGAGAAGCCTTGCTTACATTTTATGGTGTAGTCATTGGAAATGGAAAAATGGCATTATATATATTATATATATAAATATATATTATACATACTCTCCTTACTTTATTTCAGTTACCATCCCCATAGAATTTGACAAGAATTGCTATGACTGAAAGGTTTTCGAGTCCTAATTAAAACTTTATTTATGGCAGTATTCATAATTAGCCTGAAATGCATTCTGTAGGTAATCTCTGAGTTTCTGGAATATTTTCTTAGACTTTTTGGATGTGCAGCAGCTTACATGTCTGAAGTTACTTGAAGGCATCACTTTTAAGAAAGCTTACAGTTGGGCCCTGTACCATCCCAAGTCCTTTGTAGCTCCTCTTGAACATGTTTGCCATACTTTTAAAAGGGTAGTTGAATAAATAGCATCACCATTCTTTGCTGTGGCACAGGTTATAAACTTAAGTGGAGTTTACCGGCAGCATCAAATGTTTCAGCTTTAAAAAATAAAAGTAGGGTACAAGTTTAATGTTTAGTTCTAGAAATTTTGTGCAATATGTTCATAACGATGGCTGTGGTTGCCACAAAGTGCCTCGTTTACCTTTAAATACTGTTAATGTGTCATGCATGCAGATGGAAGGGGTGGAACTGTGCACTAAAGTGGGGGCTTTAACTGTAGTATTTGGCAGAGTTGCCTTCTACCTGCCAGTTCAAAAGTTCAACCTGTTTTCATATAGAATATATATACTAAAAAATTTCAGTCTGTTAAACAGCCTTACTCTGATTCAGCCTCTTCAGATACTCTTGTGCTGTGCAGCAGTGGCTCTGTGTGTAAATGCTATGCACTGAGGATACACAAAAATACCAATATGATGTGTACAGGATAATGCCTCATCCCAATCAGATGTCCATTTGTTATTGTGTTTGTTAACAACCCTTTATCTCTTAGTGTTATAAACTCCACTTAAAACTGATTAAAGTCTCATTCTTGTCAAAAAAAAAAAAAAAAAAAAAAAAAAA"}, - {"NM_000545.5", "CGTGGCCCTGTGGCAGCCGAGCCATGGTTTCTAAACTGAGCCAGCTGCAGACGGAGCTCCTGGCGGCCCTGCTCGAGTCAGGGCTGAGCAAAGAGGCACTGATCCAGGCACTGGGTGAGCCGGGGCCCTACCTCCTGGCTGGAGAAGGCCCCCTGGACAAGGGGGAGTCCTGCGGCGGCGGTCGAGGGGAGCTGGCTGAGCTGCCCAATGGGCTGGGGGAGACTCGGGGCTCCGAGGACGAGACGGACGACGATGGGGAAGACTTCACGCCACCCATCCTCAAAGAGCTGGAGAACCTCAGCCCTGAGGAGGCGGCCCACCAGAAAGCCGTGGTGGAGACCCTTCTGCAGGAGGACCCGTGGCGTGTGGCGAAGATGGTCAAGTCCTACCTGCAGCAGCACAACATCCCACAGCGGGAGGTGGTCGATACCACTGGCCTCAACCAGTCCCACCTGTCCCAACACCTCAACAAGGGCACTCCCATGAAGACGCAGAAGCGGGCCGCCCTGTACACCTGGTACGTCCGCAAGCAGCGAGAGGTGGCGCAGCAGTTCACCCATGCAGGGCAGGGAGGGCTGATTGAAGAGCCCACAGGTGATGAGCTACCAACCAAGAAGGGGCGGAGGAACCGTTTCAAGTGGGGCCCAGCATCCCAGCAGATCCTGTTCCAGGCCTATGAGAGGCAGAAGAACCCTAGCAAGGAGGAGCGAGAGACGCTAGTGGAGGAGTGCAATAGGGCGGAATGCATCCAGAGAGGGGTGTCCCCATCACAGGCACAGGGGCTGGGCTCCAACCTCGTCACGGAGGTGCGTGTCTACAACTGGTTTGCCAACCGGCGCAAAGAAGAAGCCTTCCGGCACAAGCTGGCCATGGACACGTACAGCGGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTGCCCTCTCCCCCAGTAAGGTCCACGGTGTGCGCTATGGACAGCCTGCGACCAGTGAGACTGCAGAAGTACCCTCAAGCAGCGGCGGTCCCTTAGTGACAGTGTCTACACCCCTCCACCAAGTGTCCCCCACGGGCCTGGAGCCCAGCCACAGCCTGCTGAGTACAGAAGCCAAGCTGGTCTCAGCAGCTGGGGGCCCCCTCCCCCCTGTCAGCACCCTGACAGCACTGCACAGCTTGGAGCAGACATCCCCAGGCCTCAACCAGCAGCCCCAGAACCTCATCATGGCCTCACTTCCTGGGGTCATGACCATCGGGCCTGGTGAGCCTGCCTCCCTGGGTCCTACGTTCACCAACACAGGTGCCTCCACCCTGGTCATCGGCCTGGCCTCCACGCAGGCACAGAGTGTGCCGGTCATCAACAGCATGGGCAGCAGCCTGACCACCCTGCAGCCCGTCCAGTTCTCCCAGCCGCTGCACCCCTCCTACCAGCAGCCGCTCATGCCACCTGTGCAGAGCCATGTGACCCAGAGCCCCTTCATGGCCACCATGGCTCAGCTGCAGAGCCCCCACGCCCTCTACAGCCACAAGCCCGAGGTGGCCCAGTACACCCACACGGGCCTGCTCCCGCAGACTATGCTCATCACCGACACCACCAACCTGAGCGCCCTGGCCAGCCTCACGCCCACCAAGCAGGTCTTCACCTCAGACACTGAGGCCTCCAGTGAGTCCGGGCTTCACACGCCGGCATCTCAGGCCACCACCCTCCACGTCCCCAGCCAGGACCCTGCCGGCATCCAGCACCTGCAGCCGGCCCACCGGCTCAGCGCCAGCCCCACAGTGTCCTCCAGCAGCCTGGTGCTGTACCAGAGCTCAGACTCCAGCAATGGCCAGAGCCACCTGCTGCCATCCAACCACAGCGTCATCGAGACCTTCATCTCCACCCAGATGGCCTCTTCCTCCCAGTAACCACGGCACCTGGGCCCTGGGGCCTGTACTGCCTGCTTGGGGGGTGATGAGGGCAGCAGCCAGCCCTGCCTGGAGGACCTGAGCCTGCCGAGCAACCGTGGCCCTTCCTGGACAGCTGTGCCTCGCTCCCCACTCTGCTCTGATGCATCAGAAAGGGAGGGCTCTGAGGCGCCCCAACCCGTGGAGGCTGCTCGGGGTGCACAGGAGGGGGTCGTGGAGAGCTAGGAGCAAAGCCTGTTCATGGCAGATGTAGGAGGGACTGTCGCTGCTTCGTGGGATACAGTCTTCTTACTTGGAACTGAAGGGGGCGGCCTATGACTTGGGCACCCCCAGCCTGGGCCTATGGAGAGCCCTGGGACCGCTACACCACTCTGGCAGCCACACTTCTCAGGACACAGGCCTGTGTAGCTGTGACCTGCTGAGCTCTGAGAGGCCCTGGATCAGCGTGGCCTTGTTCTGTCACCAATGTACCCACCGGGCCACTCCTTCCTGCCCCAACTCCTTCCAGCTAGTGACCCACATGCCATTTGTACTGACCCCATCACCTACTCACACAGGCATTTCCTGGGTGGCTACTCTGTGCCAGAGCCTGGGGCTCTAACGCCTGAGCCCAGGGAGGCCGAAGCTAACAGGGAAGGCAGGCAGGGCTCTCCTGGCTTCCCATCCCCAGCGATTCCCTCTCCCAGGCCCCATGACCTCCAGCTTTCCTGTATTTGTTCCCAAGAGCATCATGCCTCTGAGGCCAGCCTGGCCTCCTGCCTCTACTGGGAAGGCTACTTCGGGGCTGGGAAGTCGTCCTTACTCCTGTGGGAGCCTCGCAACCCGTGCCAAGTCCAGGTCCTGGTGGGGCAGCTCCTCTGTCTCGAGCGCCCTGCAGACCCTGCCCTTGTTTGGGGCAGGAGTAGCTGAGCTCACAAGGCAGCAAGGCCCGAGCAGCTGAGCAGGGCCGGGGAACTGGCCAAGCTGAGGTGCCCAGGAGAAGAAAGAGGTGACCCCAGGGCACAGGAGCTACCTGTGTGGACAGGACTAACACTCAGAAGCCTGGGGGCCTGGCTGGCTGAGGGCAGTTCGCAGCCACCCTGAGGAGTCTGAGGTCCTGAGCACTGCCAGGAGGGACAAAGGAGCCTGTGAACCCAGGACAAGCATGGTCCCACATCCCTGGGCCTGCTGCTGAGAACCTGGCCTTCAGTGTACCGCGTCTACCCTGGGATTCAGGAAAAGGCCTGGGGTGACCCGGCACCCCCTGCAGCTTGTAGCCAGCCGGGGCGAGTGGCACGTTTATTTAACTTTTAGTAAAGTCAAGGAGAAATGCGGTGGAAAAA"}, - {"NM_001220765.1", "GAATTCCGGCGTCGCGGACGCATCCCAGTCTGGGCGGGACGCTCGGCCGCGGCGAGGCGGGCAAGCCTGGCAGGGCAGAGGGAGCCCCGGCTCCGAGGTTGCTCTTCGCCCCCGAGGATCAGTCTTGGCCCCAAAGCGCGACGCACAAATCCACATAACCTGAGGACCATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGCCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAAAGCCCTCCCGCGCCCCCACCCCAGACCCCGAGCCACCCCAGGAAAAGCACAAGGACTGCCGCCTTCTCGCTCCCGCCAGCAGCATAGACTGGACTGGACCAGACAATGTTGTGTTTGGATTTGTAACTGTTTTTTGTTTTTTGTTTGAGTTGGTTGATTGGGGTTTGATTTGCTTTTGAAAAGATTTTTATTTTTAGAGGCAGGGCTGCATTGGGAGCATCCAGAACTGCTACCTTCCTAGATGTTTCCCCAGACCGCTGGCTGAGATTCCCTCACCTGTCGCTTCCTAGAATCCCCTTCTCCAAACGATTAGTCTAAATTTTCAGAGAGAAATAGATAAAACACGCCACAGCCTGGGAAGGAGCGTGCTCTACCCTGTGCTAAGCACGGGGTTCGCGCACCAGGTGTCTTTTTCCAGTCCCCAGAAGCAGAGAGCACAGCCCCTGCTGTGTGGGTCTGCAGGTGAGCAGACAGGACAGGTGTGCCGCCACCCAAGTGCCAAGACACAGCAGGGCCAACAACCTGTGCCCAGGCCAGCTTCGAGCTACATGCATCTAGGGCGGAGAGGCTGCACTTGTGAGAGAAAATACTATTTCAAGTCATATTCTGCGTAGGAAAATGAATTGGTTGGGGAAAGTCGTGTCTGTCAGACTGCCCTGGGTGGAGGGAGACGCCGGGCTAGAGCCTTTGGGATCGTCCTGGATTCACTGGCTTTGCGGAGGCTGCTCAGATGGCCTGAGCCTCCCGAGGCTTGCTGCCCCGTAGGAGGAGACTGTCTTCCCGTGGGCATATCTGGGGAGCCCTGTTCCCCGCTTTTTCACTCCCATACCTTTAATGGCCCCCAAAATCTGTCACTACAATTTAAACACCAGTCCCGAAATTTGGATCTTCTTTCTTTTTGAATCTCTCAAACGGCAACATTCCTCAGAAACCAAAGCTTTATTTCAAATCTCTTCCTTCCCTGGCTGGTTCCATCTAGTACCAGAGGCCTCTTTTCCTGAAGAAATCCAATCCTAGCCCTCATTTTAATTATGTACATCTGTTTGTAGCCACAAGCCTGAATTTCTCAGTGTTGGTAAGTTTCTTTACCTACCCTCACTATATATTATTCTCGTTTTAAAACCCATAAAGGAGTGATTTAGAACAGTCATTAATTTTCAACTCAATGAAATATGTGAAGCCCAGCATCTCTGTTGCTAACACACAGAGCTCACCTGTTTGAAACCAAGCTTTCAAACATGTTGAAGCTCTTTACTGTAAAGGCAAGCCAGCATGTGTGTCCACACATACATAGGATGGCTGGCTCTGCACCTGTAGGATATTGGAATGCACAGGGCAATTGAGGGACTGAGCCAGACCTTCGGAGAGTAATGCCACCAGATCCCCTAGGAAAGAGGAGGCAAATGGCACTGCAGGTGAGAACCCCGCCCATCCGTGCTATGACATGGAGGCACTGAAGCCCGAGGAAGGTGTGTGGAGATTCTAATCCCAACAAGCAAGGGTCTCCTTCAAGATTAATGCTATCAATCATTAAGGTCATTACTCTCAACCACCTAGGCAATGAAGAATATACCATTTCAAATATTTACAGTACTTGTCTTCACCAACACTGTCCCAAGGTGAAATGAAGCAACAGAGAGGAAATTGTACATAAGTACCTCAGCATTTAATCCAAACAGGGGTTCTTAGTCTCAGCACTATGACATTTTGGGCTGACTACTTATTTGTTAGGCGGGAGCTCTCCTGTGCATTGTAGGATAATTAGCAGTATCCCTGGTGGCTACCCAATAGACGCCAGTAGCACCCCGAATTGACAACCCAAACTCTCCAGACATCACCAACTGTCCCCTGCGAGGAGAAATCACTCCTGGGGGAGAACCACTGACCCAAATGAATTCTAAACCAATCAAATGTCTGGGAAGCCCTCCAAGAAAAAAAATAGAAAAGCACTTGAAGAATATTCCCAATATTCCCGGTCAGCAGTATCAAGGCTGACTTGTGTTCATGTGGAGTCATTATAAATTCTATAAATCAATTATTCCCCTTCGGTCTTAAAAATATATTTCCTCATAAACATTTGAGTTTTGTTGAAAAGATGGAGTTTACAAAGATACCATTCTTGAGTCATGGATTTCTCTGCTCACAGAAGGGTGTGGCATTTGGAAACGGGAATAAACAAAATTGCTGCACCAATGCACTGAGTGAAGGAAGAGAGACAGAGGATCAAGGGCTTTAGACAGCACTCCTTCAATATGCAATCACAGAGAAAGATGCGCCTTATCCAAGTTAATATCTCTAAGGTGAGAGCCTTCTTAGAGTCAGTTTGTTGCAAATTTCACCTACTCTGTTCTTTTCCATCCATCCCCCTGAGTCAGTTGGTTGAAGGGAGTTATTTTTTCAAGTGGAATTCAAACAAAGCTCAAACCAGAACTGTAAATAGTGATTGCAGGAATTCTTTTCTAAACTGCTTTGCCCTTTCCTCTCACTGCCTTTTATAGCCAATATAAATGTCTCTTTGCACACCTTTTGTTGTGGTTTTATATTGTAACACCATTTTTCTTTGAAACTATTGTATTTAAAGTAAGGTTTCATATTATGTCAGCAAGTAATTAACTTATGTTTAAAAGGTGGCCATATCATGTACCAAAAGTTGCTGAAGTTTCTCTTCTAGCTGGTAAAGTAGGAGTTTGCATGACTTCACACTTTTTTTGCGTAGTTTCTTCTGTTGTATGATGGCGTGAGTGTGTGTCTTGGGTACCGCTGTGTACTACTGTGTGCCTAGATTCCATGCACTCTCGTTGTGTTTGAAGTAAATATTGGAGACCGGAGGGTAACAGGTTGGCCTGTTGATTACAGCTAGTAATCGCTGTGTCTTGTTCCGCCCCCTCCCTGACACCCCAGCTTCCCAGGATGTGGAAAGCCTGGATCTCAGCTCCTTGCCCCATATCCCTTCTGTAATTTGTACCTAAAGAGTGTGATTATCCTAATTCAAGAGTCACTAAAACTCATCACATTATCATTGCATATCAGCAAAGGGTAAAGTCCTAGCACCAATTGCTTCACATACCAGCATGTTCCATTTCCAATTTAGAATTAGCCACATAATAAAATCTTAGAATCTTCCTTGAGAAAGAGCTGCCTGAGATGTAGTTTTGTTATATGGTTCCCCACCGACCATTTTTGTGCTTTTTTCTTGTTTTGTTTTGTTTTGACTGCACTGTGAGTTTTGTAGTGTCCTCTTCTTGCCAAAACAAACGCGAGATGAACTGGACTTATGTAGACAAATCGTGATGCCAGTGTATCCTTCCTTTCTTCAGTTCCAGCAATAATGAATGGTCAACTTTTTTAAAATCTAGATCTCTCTCATTCATTTCAATGTATTTTTACTTTAAGATGAACCAAAATTATTAGACTTATTTAAGATGTACAGGCATCAGAAAAAAGAAGCACATAATGCTTTTGGTGCGATGGCACTCACTGTGAACATGTGTAACCACATATTAATATGCAATATTGTTTCCAATACTTTCTAATACAGTTTTTTATAATGTTGTGTGTGGTGATTGTTCAGGTCGAATCTGTTGTATCCAGTACAGCTTTAGGTCTTCAGCTGCCCTTCTGGCGAGTACATGCACAGGATTGTAAATGAGAAATGCAGTCATATTTCCAGTCTGCCTCTATGATGATGTTAAATTATTGCTGTTTAGCTGTGAACAAGGGATGTACCACTGGAGGAATAGAGTATCCTTTTGTACACATTTTGAAATGCTTCTTCTGTAGTGATAGAACAAATAAATGCAACGAATACTCTGTCTGCCCTATCCCGTGAAGTCCACACTGGCGTAAGAGAAGGCCCAGCAGAGCAGGAATCTGCCTAGACTTTCTCCCAATGAGATCCCAATATGAGAGGGAGAAGAGATGGGCCTCAGGACAGCTGCAATACCACTTGGGAACACATGTGGTGTCTTGATGTGGCCAGCGCAGCAGTTCAGCACAACGTACCTCCCATCTACAACAGTGCTGGACGTGGGAATTCTAAGTCCCAGTCTTGAGGGTGGGTGGAGATGGAGGGCAACAAGAGATACATTTCCAGTTCTCCACTGCAGCATGCTTCAGTCATTCTGTGAGTGGCCGGGCCCAGGGCCCTCACAATTTCACTACCTTGTCTTTTACATAGTCATAAGAATTATCCTCAACATAGCCTTTTGACGCTGTAAATCTTGAGTATTCATTTACCCTTTTCTGATCTCCTGGAAACAGCTGCCTGCCTGCATTGCACTTCTCTTCCCGAGGAGTGGGGTAAATTTAAAAGTCAAGTTATAGTTTGGATGTTAGTATAGAATTTTGAAATTGGGAATTAAAAATCAGGACTGGGGACTGGGAGACCAAAAATTTCTGATCCCATTTCTGATGGATGTGTCACACCTTTTCTGTCAAAATAAAATGTCTTGGAGGTTATGACTCCTTGGTGAAAAAAAAAAAAAAAAAA"}, - {"NM_000535.5", "AGCCAATGGGAGTTCAGGAGGCGGAGCGCCTGTGGGAGCCCTGGAGGGAACTTTCCCAGTCCCCGAGGCGGATCGGGTGTTGCATCCATGGAGCGAGCTGAGAGCTCGAGTACAGAACCTGCTAAGGCCATCAAACCTATTGATCGGAAGTCAGTCCATCAGATTTGCTCTGGGCAGGTGGTACTGAGTCTAAGCACTGCGGTAAAGGAGTTAGTAGAAAACAGTCTGGATGCTGGTGCCACTAATATTGATCTAAAGCTTAAGGACTATGGAGTGGATCTTATTGAAGTTTCAGACAATGGATGTGGGGTAGAAGAAGAAAACTTCGAAGGCTTAACTCTGAAACATCACACATCTAAGATTCAAGAGTTTGCCGACCTAACTCAGGTTGAAACTTTTGGCTTTCGGGGGGAAGCTCTGAGCTCACTTTGTGCACTGAGCGATGTCACCATTTCTACCTGCCACGCATCGGCGAAGGTTGGAACTCGACTGATGTTTGATCACAATGGGAAAATTATCCAGAAAACCCCCTACCCCCGCCCCAGAGGGACCACAGTCAGCGTGCAGCAGTTATTTTCCACACTACCTGTGCGCCATAAGGAATTTCAAAGGAATATTAAGAAGGAGTATGCCAAAATGGTCCAGGTCTTACATGCATACTGTATCATTTCAGCAGGCATCCGTGTAAGTTGCACCAATCAGCTTGGACAAGGAAAACGACAGCCTGTGGTATGCACAGGTGGAAGCCCCAGCATAAAGGAAAATATCGGCTCTGTGTTTGGGCAGAAGCAGTTGCAAAGCCTCATTCCTTTTGTTCAGCTGCCCCCTAGTGACTCCGTGTGTGAAGAGTACGGTTTGAGCTGTTCCGATGCTCTGCATAATCTTTTTTACATCTCAGGTTTCATTTCACAATGCACGCATGGAGTTGGAAGGAGTTCAACAGACAGACAGTTTTTCTTTATCAACCGGCGGCCTTGTGACCCAGCAAAGGTCTGCAGACTCGTGAATGAGGTCTACCACATGTATAATCGACACCAGTATCCATTTGTTGTTCTTAACATTTCTGTTGATTCAGAATGCGTTGATATCAATGTTACTCCAGATAAAAGGCAAATTTTGCTACAAGAGGAAAAGCTTTTGTTGGCAGTTTTAAAGACCTCTTTGATAGGAATGTTTGATAGTGATGTCAACAAGCTAAATGTCAGTCAGCAGCCACTGCTGGATGTTGAAGGTAACTTAATAAAAATGCATGCAGCGGATTTGGAAAAGCCCATGGTAGAAAAGCAGGATCAATCCCCTTCATTAAGGACTGGAGAAGAAAAAAAAGACGTGTCCATTTCCAGACTGCGAGAGGCCTTTTCTCTTCGTCACACAACAGAGAACAAGCCTCACAGCCCAAAGACTCCAGAACCAAGAAGGAGCCCTCTAGGACAGAAAAGGGGTATGCTGTCTTCTAGCACTTCAGGTGCCATCTCTGACAAAGGCGTCCTGAGACCTCAGAAAGAGGCAGTGAGTTCCAGTCACGGACCCAGTGACCCTACGGACAGAGCGGAGGTGGAGAAGGACTCGGGGCACGGCAGCACTTCCGTGGATTCTGAGGGGTTCAGCATCCCAGACACGGGCAGTCACTGCAGCAGCGAGTATGCGGCCAGCTCCCCAGGGGACAGGGGCTCGCAGGAACATGTGGACTCTCAGGAGAAAGCGCCTGAAACTGACGACTCTTTTTCAGATGTGGACTGCCATTCAAACCAGGAAGATACCGGATGTAAATTTCGAGTTTTGCCTCAGCCAACTAATCTCGCAACCCCAAACACAAAGCGTTTTAAAAAAGAAGAAATTCTTTCCAGTTCTGACATTTGTCAAAAGTTAGTAAATACTCAGGACATGTCAGCCTCTCAGGTTGATGTAGCTGTGAAAATTAATAAGAAAGTTGTGCCCCTGGACTTTTCTATGAGTTCTTTAGCTAAACGAATAAAGCAGTTACATCATGAAGCACAGCAAAGTGAAGGGGAACAGAATTACAGGAAGTTTAGGGCAAAGATTTGTCCTGGAGAAAATCAAGCAGCCGAAGATGAACTAAGAAAAGAGATAAGTAAAACGATGTTTGCAGAAATGGAAATCATTGGTCAGTTTAACCTGGGATTTATAATAACCAAACTGAATGAGGATATCTTCATAGTGGACCAGCATGCCACGGACGAGAAGTATAACTTCGAGATGCTGCAGCAGCACACCGTGCTCCAGGGGCAGAGGCTCATAGCACCTCAGACTCTCAACTTAACTGCTGTTAATGAAGCTGTTCTGATAGAAAATCTGGAAATATTTAGAAAGAATGGCTTTGATTTTGTTATCGATGAAAATGCTCCAGTCACTGAAAGGGCTAAACTGATTTCCTTGCCAACTAGTAAAAACTGGACCTTCGGACCCCAGGACGTCGATGAACTGATCTTCATGCTGAGCGACAGCCCTGGGGTCATGTGCCGGCCTTCCCGAGTCAAGCAGATGTTTGCCTCCAGAGCCTGCCGGAAGTCGGTGATGATTGGGACTGCTCTTAACACAAGCGAGATGAAGAAACTGATCACCCACATGGGGGAGATGGACCACCCCTGGAACTGTCCCCATGGAAGGCCAACCATGAGACACATCGCCAACCTGGGTGTCATTTCTCAGAACTGACCGTAGTCACTGTATGGAATAATTGGTTTTATCGCAGATTTTTATGTTTTGAAAGACAGAGTCTTCACTAACCTTTTTTGTTTTAAAATGAACCTGCTACTTAAAAAAAATACACATCACACCCATTTAAAAGTGATCTTGAGAACCTTTTCAAACCAGAAAAAAAAAAAAAAAA"}, - {"NR_073517.1", "GTGGCGGCGGCGGAGGCGGGGATCCCGCGGCTGCGGCGACGGTGGCCGCGGTGGAGCCACGGGGCGGGCTTGGCTTGGTGTGACGGCGGCTGCGGCGGCGGTGGCGGCCGCGACCAGGTCGGCGTCCTCAGCTGGCCGAGCATGGTGGCAGCCTGCACCCTTGGCTCCCTTGTCTGGTGCAGCCAGCAGAGCCGCCAGCCTTGGGCGCCCATGGCCCTCCGTGTGAGGGCGTGAGCGGCCTGCCCCAGCCTCACCTGCTGATGGAGGACTCAATGGCCCAGTGACCTGACACCACACCACCAACTCCCTCCCACCAGCTGACGAATGGTGGACCCAGTGACGAGTGGCCCTTGTAAGGGTCATGGAATAATTTGAAGCGAGGCATGAGCGGCCCCTGTGGTCGCCTGTGACTGCTGGAGATAGAGGTCCCAGCACCCCAAGCCAACCCAGCGGACCCTCCCAGCCCTGCTTCAACCAATGGGGCCAGTGGGGCTCCAAGCAGCCACCTAACCATCCAGACCCCACCCCACTCACGCGGCCATGGCGGGCCCTGAGGGCTTCCAGTACCGCGCTCTGTACCCGTTCCGCCGGGAGCGGCCGGAGGACCTGGAGCTGCTGCCCGGCGACGTGCTGGTAGTGAGCCGGGCGGCCTTGCAGGCGCTGGGCGTGGCCGAGGGTGGCGAGCGCTGCCCACAGAGCGTGGGCTGGATGCCCGGCCTCAACGAGCGCACACGGCAGCGAGGTGACTTCCCTGGCACCTATGTGGAGTTCCTGGGGCCCGTGGCCCTGGCCCGGCCCGGCCCTCGCCCACGGGGCCCCCGCCCACTGCCCGCCAGGCCCCGTGATGGGGCCCCTGAGCCAGGCCTCACACTCCCCGACTTGCCCGAGCAGTTCTCCCCACCTGATGTGGCTCCCCCTCTTCTGGTGAAGCTTGTGGAGGCCATTGAAAGGACAGGGCTGGACAGCGAATCTCACTACCGCCCGGAGCTGCCCGCACCGCGTACAGACTGGTCCCTGAGCGACGTGGATCAGTGGGACACGGCAGCCCTGGCTGACGGCATTAAGAGCTTCCTGCTGGCACTGCCCGCGCCGCTCGTGACCCCCGAGGCCTCGGCCGAGGCGCGCCGGGCCCTGCGGGAGGCCGCGGGGCCCGTGGGGCCGGCGCTGGAGCCACCGACGCTGCCGCTGCACCGCGCGCTCACGCTGCGCTTCCTGCTCCAGCACCTGGGCCGCGTGGCCCGCCGCGCCCCGGCCCTGGGTCCCGCGGTCCGGGCCCTGGGCGCCACCTTTGGGCCGCTGCTGCTGCGCGCGCCGCCGCCGCCGTCCTCGCCGCCGCCAGGGGGCGCTCCCGACGGGAGTGAGCCCAGCCCTGACTTCCCGGCGCTGCTGGTGGAGAAGCTGCTTCAGGAACACTTGGAAGAGCAGGAGGTTGCGCCCCCAGCGCTGCCGCCTAAACCCCCCAAGGCAAAGCCGGCCCCCACAGTCCTGGCCAATGGAGGGAGCCCACCCTCCCTGCAGGATGCTGAGTGGTACTGGGGGGACATTTCAAGGGAGGAGGTGAACGAGAAACTCCGGGACACTCCCGATGGCACCTTCCTAGTCCGAGATGCTTCTAGCAAGATCCAGGGCGAGTACACGCTGACCCTCAGGAAAGGCGGGAACAATAAGCTGATCAAGGTCTTCCACCGAGATGGGCACTATGGCTTCTCAGAGCCACTCACCTTCTGCTCCGTTGTGGACCTCATCAATCACTACCGCCACGAGTCTCTGGCCCAGTACAATGCCAAGCTGGACACACGGCTCCTCTACCCTGTGTCCAAATACCAGCAGGACCAGATTGTCAAGGAGGACAGCGTGGAGGCAGTGGGCGCCCAGCTTAAGGTCTATCACCAGCAGTACCAGGACAAGAGCCGCGAGTATGACCAGCTTTATGAAGAGTACACACGGACCTCCCAGGGCCTTTTGGGGAGTCCCAGGAGGTGCTGAGCTGCGCCCCCTCCTCCAGGAGCTGCAGATGAAGCGTACTGCAATTGAGGCCTTCAATGAGACTATCAAGATCTTTGAAGAGCAGGGCCAGACTCAAGAGAAATGCAGCAAGGAATACCTGGAGCGCTTCCGGCGTGAGGGCAACGAGAAAGAGATGCAAAGGATCCTGCTGAACTCCGAGCGGCTCAAGTCCCGCATTGCCGAGATCCATGAGAGCCGCACGAAGCTGGAGCAGCAGCTGCGGGCCCAGGCCTCGGACAACAGAGAGATCGACAAGCGCATGAACAGCCTCAAGCCGGACCTCATGCAGCTGCGCAAGATCCGAGACCAGTACCTCGTGTGGCTCACCCAGAAAGGCGCCCGGCAGAAGAAAATCAACGAGTGGCTGGGGATTAAAAATGAGACTGAGGACCAGTACGCACTCATGGAGGACGAGGACGATCTCCCGCACCACGAGGAACGCACTTGGTACGTGGGCAAGATCAACCGCACGCAGGCAGAGGAGATGCTGAGTGGCAAGCGGGATGGCACCTTCCTCATCCGCGAGAGCAGCCAGCGGGGCTGCTACGCCTGCTCCGTGGTAGTGGACGGCGACACCAAGCACTGCGTCATCTACCGCACGGCCACCGGCTTCGGCTTCGCGGAGCCCTACAACCTGTACGGGTCGCTGAAGGAGCTGGTGCTGCACTACCAGCACGCCTCGCTGGTGCAGCACAACGACGCGCTCACCGTCACCCTGGCGCACCCAGTGCGCGCCCCGGGCCCCGGCCCGCCGCCTGCCGCCCGCTGAGCACCGAGGACCCGCCCCAAGCAGAGCCGCCCCTGGGCCCGTCTGCGCCGGAGGCTGCGGCGGCGGGAGCCACGGACCAGACCAGCCACATCCAGGGGTCCTCATTTCTCCGGCTCTGGCTCTTGTTTGGGGTTCTCTCACCCTCTTTCTCTTTCCTTCCCTCCCCCATTCTCCAGATCTCCCTCTGTCTCCTTTTCTCTGTCTTTCTTGGCCCCTGTCTCTCTCCATGTTGGGGGTCCTAACTCCCCCACCCCATATCTACGTGTCCTCCGGGCATTGCCCTCTCCATGGCTCTGGTCACCCTGACCCTCTGCCCTGCCCACCGCAGGTCCCCCGGGGTCCCGGAAGCCCCTTCTGGCTGCACCTGCCATGTTTACAGAGGGCCCCTGGGCTGCGCGGCCCCAGCCTGGGCACCCTGATTTTTAAGCCATAGACCTGGGGTCAGGGCAGGAAGGAACTTCACTCTGCTGCTTCCGAGAACCTCGGCCGTGACATTCGGGGCCGGGCGGGACCCGCCCCACAGACTCCAACTTCCCCTCCAAACCCCGAAGTGAAACCCGCCACCGGGTTACCCCCACAAGGGGGCCGCTGCGAGAAGTTCACCCACCCCCGAAAAAATAATTAAACTCGCAGGCCAGGCACGGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCAAGACGGGCGGATCTTTTGAGGTCGGGAGTTGGAGGCCAGCCTGGCCAAAATGGCAAAACCCCGCATCTACTAAAATACAAAAATTAGCCGGGCGTGGTGGCGGCCGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCGTGAGAATCTCTTGAACCCAGGAGATGGAGGTTGCAGTGAGCAGAGATCGTGCCACTGCACTCCAGCCTGGGTAACAGAGGGAGACTCCTCCGTCTCAAAAAAATAAATAAATAAACTTGTGAGCTGGCCCCAACCCCTCCTAGGAATCACAGCTCCCCGTACTGGTGCCGCCGCAGTGGCCAAGTTGCGACACTGCCCACGGCCCCTCCCTCTGATGCAGATTCAGGGCTTCTCTTCGATCATGTTGGGTTTTGATTCTGTTTTTCCTTGACTGCAAAACCCTCTTTCCTCTCCTCTTTTGGGACAAGAGCCCTGGTTTTCTACGCTGCCCTTGGCCACCACACTGCCTGCCCCACGAGCTGGGAGGCAGGTTTTGTACGGTACGTTGTTATTGATATGATATAAAACATCAAACGTCGAAAAAAAAAAAAAAA"}, - {"NR_003085.2", "AGCCAATGGGAGTTCAGGAGGCGGAGCGCCTGTGGGAGCCCTGGAGGGAACTTTCCCAGTCCCCGAGGCGGATCGGGTGTTGCATCCATGGAGCGAGCTGAGAGCTCGAGAACCTGCTAAGGCCATCAAACCTATTGATCGGAAGTCAGTCCATCAGATTTGCTCTGGGCAGGTGGTACTGAGTCTAAGCACTGCGGTAAAGGAGTTAGTAGAAAACAGTCTGGATGCTGGTGCCACTAATATTGATCTAAAGCTTAAGGACTATGGAGTGGATCTTATTGAAGTTTCAGACAATGGATGTGGGGTAGAAGAAGAAAACTTCGAAGGCTTAACTCTGAAACATCACACATCTAAGATTCAAGAGTTTGCCGACCTAACTCAGGTTGAAACTTTTGGCTTTCGGGGGGAAGCTCTGAGCTCACTTTGTGCACTGAGCGATGTCACCATTTCTACCTGCCACGCATCGGCGAAGGTTGGAACTCGACTGATGTTTGATCACAATGGGAAAATTATCCAGAAAACCCCCTACCCCCGCCCCAGAGGGACCACAGTCAGCGTGCAGCAGTTATTTTCCACACTACCTGTGCGCCATAAGGAATTTCAAAGGAATATTAAGAAGGAGTATGCCAAAATGGTCCAGGTCTTACATGCATACTGTATCATTTCAGCAGGCATCCGTGTAAGTTGCACCAATCAGCTTGGACAAGGAAAACGACAGCCTGTGGTATGCACAGGTGGAAGCCCCAGCATAAAGGAAAATATCGGCTCTGTGTTTGGGCAGAAGCAGTTGCAAAGCCTCATTCCTTTTGTTCAGCTGCCCCCTAGTGACTCCGTGTGTGAAGAGTACGGTTTGAGCTGTTCCGATGCTCTGCATAATCTTTTTTACATCTCAGGTTTCATTTCACAATGCACGCATGGAGTTGGAAGGAGTTCAACAGACAGACAGTTTTTCTTTATCAACCGGCGGCCTTGTGACCCAGCAAAGGTCTGCAGACTCGTGAATGAGGTCTACCACATGTATAATCGACACCAGTATCCATTTGTTGTTCTTAACATTTCTGTTGATTCAGAATGCGTTGATATCAATGTTACTCCAGATAAAAGGCAAATTTTGCTACAAGAGGAAAAGCTTTTGTTGGCAGTTTTAAAGACCTCTTTGATAGGAATGTTTGATAGTGATGTCAACAAGCTAAATGTCAGTCAGCAGCCACTGCTGGATGTTGAAGGTAACTTAATAAAAATGCATGCAGCGGATTTGGAAAAGCCCATGGTAGAAAAGCAGGATCAATCCCCTTCATTAAGGACTGGAGAAGAAAAAAAAGACGTGTCCATTTCCAGACTGCGAGAGGCCTTTTCTCTTCGTCACACAACAGAGAACAAGCCTCACAGCCCAAAGACTCCAGAACCAAGAAGGAGCCCTCTAGGACAGAAAAGGGGTATGCTGTCTTCTAGCACTTCAGGTGCCATCTCTGACAAAGGCGTCCTGAGACCTCAGAAAGAGGCAGTGAGTTCCAGTCACGGACCCAGTGACCCTACGGACAGAGCGGAGGTGGAGAAGGACTCGGGGCACGGCAGCACTTCCGTGGATTCTGAGGGGTTCAGCATCCCAGACACGGGCAGTCACTGCAGCAGCGAGTATGCGGCCAGCTCCCCAGGGGACAGGGGCTCGCAGGAACATGTGGACTCTCAGGAGAAAGCGCCTGAAACTGACGACTCTTTTTCAGATGTGGACTGCCATTCAAACCAGGAAGATACCGGATGTAAATTTCGAGTTTTGCCTCAGCCAACTAATCTCGCAACCCCAAACACAAAGCGTTTTAAAAAAGAAGAAATTCTTTCCAGTTCTGACATTTGTCAAAAGTTAGTAAATACTCAGGACATGTCAGCCTCTCAGGTTGATGTAGCTGTGAAAATTAATAAGAAAGTTGTGCCCCTGGACTTTTCTATGAGTTCTTTAGCTAAACGAATAAAGCAGTTACATCATGAAGCACAGCAAAGTGAAGGGGAACAGAATTACAGGAAGTTTAGGGCAAAGATTTGTCCTGGAGAAAATCAAGCAGCCGAAGATGAACTAAGAAAAGAGATAAGTAAAACGATGTTTGCAGAAATGGAAATCATTGGTCAGTTTAACCTGGGATTTATAATAACCAAACTGAATGAGGATATCTTCATAGTGGACCAGCATGCCACGGACGAGAAGTATAACTTCGAGATGCTGCAGCAGCACACCGTGCTCCAGGGGCAGAGGCTCATAGCACCTCAGACTCTCAACTTAACTGCTGTTAATGAAGCTGTTCTGATAGAAAATCTGGAAATATTTAGAAAGAATGGCTTTGATTTTGTTATCGATGAAAATGCTCCAGTCACTGAAAGGGCTAAACTGATTTCCTTGCCAACTAGTAAAAACTGGACCTTCGGACCCCAGGACGTCGATGAACTGATCTTCATGCTGAGCGACAGCCCTGGGGTCATGTGCCGGCCTTCCCGAGTCAAGCAGATGTTTGCCTCCAGAGCCTGCCGGAAGTCGGTGATGATTGGGACTGCTCTTAACACAAGCGAGATGAAGAAACTGATCACCCACATGGGGGAGATGGACCACCCCTGGAACTGTCCCCATGGAAGGCCAACCATGAGACACATCGCCAACCTGGGTGTCATTTCTCAGAACTGACCGTAGTCACTGTATGGAATAATTGGTTTTATCGCAGATTTTTATGTTTTGAAAGACAGAGTCTTCACTAACCTTTTTTGTTTTAAAATGAACCTGCTACTTAAAAAAAATACACATCACACCCATTTAAAAGTGATCTTGAGAACCTTTTCAAACCAGAAAAAAAAAAAAAAAA"} - }; - public static Dictionary TranscriptCds = new Dictionary() - { - {"NM_022148.2", "ATGGGGCGGCTGGTTCTGCTGTGGGGAGCTGCCGTCTTTCTGCTGGGAGGCTGGATGGCTTTGGGGCAAGGAGGAGCAGCAGAAGGAGTACAGATTCAGATCATCTACTTCAATTTAGAAACCGTGCAGGTGACATGGAATGCCAGCAAATACTCCAGGACCAACCTGACTTTCCACTACAGATTCAACGGTGATGAGGCCTATGACCAGTGCACCAACTACCTTCTCCAGGAAGGTCACACTTCGGGGTGCCTCCTAGACGCAGAGCAGCGAGACGACATTCTCTATTTCTCCATCAGGAATGGGACGCACCCCGTTTTCACCGCAAGTCGCTGGATGGTTTATTACCTGAAACCCAGTTCCCCGAAGCACGTGAGATTTTCGTGGCATCAGGATGCAGTGACGGTGACGTGTTCTGACCTGTCCTACGGGGATCTCCTCTATGAGGTTCAGTACCGGAGCCCCTTCGACACCGAGTGGCAGTCCAAACAGGAAAATACCTGCAACGTCACCATAGAAGGCTTGGATGCCGAGAAGTGTTACTCTTTCTGGGTCAGGGTGAAGGCTATGGAGGATGTATATGGGCCAGACACATACCCAAGCGACTGGTCAGAGGTGACATGCTGGCAGAGAGGCGAGATTCGGGATGCCTGTGCAGAGACACCAACGCCTCCCAAACCAAAGCTGTCCAAATTTATTTTAATTTCCAGCCTGGCCATCCTTCTGATGGTGTCTCTCCTCCTTCTGTCTTTATGGAAATTATGGAGAGTGAAGAAGTTTCTCATTCCCAGCGTGCCAGACCCGAAATCCATCTTCCCCGGGCTCTTTGAGATACACCAAGGGAACTTCCAGGAGTGGATCACAGACACCCAGAACGTGGCCCACCTCCACAAGATGGCAGGTGCAGAGCAAGAAAGTGGCCCCGAGGAGCCCCTGGTAGTCCAGTTGGCCAAGACTGAAGCCGAGTCTCCCAGGATGCTGGACCCACAGACCGAGGAGAAAGAGGCCTCTGGGGGATCCCTCCAGCTTCCCCACCAGCCCCTCCAAGGCGGTGATGTGGTCACAATCGGGGGCTTCACCTTTGTGATGAATGACCGCTCCTACGTGGCGTTGTGA"}, - {"NM_012234.6", "ATGACCATGGGCGACAAGAAGAGCCCGACCAGGCCAAAAAGACAAGCGAAACCTGCCGCAGACGAAGGGTTTTGGGATTGTAGCGTCTGCACCTTCAGAAACAGTGCTGAAGCCTTTAAATGCAGCATCTGCGATGTGAGGAAAGGCACCTCCACCAGAAAACCTCGGATCAATTCTCAGCTGGTGGCACAACAAGTGGCACAACAGTATGCCACCCCACCACCCCCTAAAAAGGAGAAGAAGGAGAAAGTTGAAAAGCAGGACAAAGAGAAACCTGAGAAAGACAAGGAAATTAGTCCTAGTGTTACCAAGAAAAATACCAACAAGAAAACCAAACCAAAGTCTGACATTCTGAAAGATCCTCCTAGTGAAGCAAACAGCATACAGTCTGCAAATGCTACAACAAAGACCAGCGAAACAAATCACACCTCAAGGCCCCGGCTGAAAAACGTGGACAGGAGCACTGCACAGCAGTTGGCAGTAACTGTGGGCAACGTCACCGTCATTATCACAGACTTTAAGGAAAAGACTCGCTCCTCATCGACATCCTCATCCACAGTGACCTCCAGTGCAGGGTCAGAACAGCAGAACCAGAGCAGCTCGGGGTCAGAGAGCACAGACAAGGGCTCCTCCCGTTCCTCCACGCCAAAGGGCGACATGTCAGCAGTCAATGATGAATCTTTCTGA"}, - {"NM_001220765.1", "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGCCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"}, - {"NM_001220770.1", "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"}, - {"NM_001220773.1", "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGTTGGTAAACCTCACAAATGTGGATATTGTGGCCGAAGCTATAAACAGCGAAGCTCTTTAGAGGAACATAAAGAGCGCTGCCACAACTACTTGGAAAGCATGGGCCTTCCGGGCACACTGTACCCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"}, - {"NM_002447.2", "ATGGAGCTCCTCCCGCCGCTGCCTCAGTCCTTCCTGTTGCTGCTGCTGTTGCCTGCCAAGCCCGCGGCGGGCGAGGACTGGCAGTGCCCGCGCACCCCCTACGCGGCCTCTCGCGACTTTGACGTGAAGTACGTGGTGCCCAGCTTCTCCGCCGGAGGCCTGGTACAGGCCATGGTGACCTACGAGGGCGACAGAAATGAGAGTGCTGTGTTTGTAGCCATACGCAATCGCCTGCATGTGCTTGGGCCTGACCTGAAGTCTGTCCAGAGCCTGGCCACGGGCCCTGCTGGAGACCCTGGCTGCCAGACGTGTGCAGCCTGTGGCCCAGGACCCCACGGCCCTCCCGGTGACACAGACACAAAGGTGCTGGTGCTGGATCCCGCGCTGCCTGCGCTGGTCAGTTGTGGCTCCAGCCTGCAGGGCCGCTGCTTCCTGCATGACCTAGAGCCCCAAGGGACAGCCGTGCATCTGGCAGCGCCAGCCTGCCTCTTCTCAGCCCACCATAACCGGCCCGATGACTGCCCCGACTGTGTGGCCAGCCCATTGGGCACCCGTGTAACTGTGGTTGAGCAAGGCCAGGCCTCCTATTTCTACGTGGCATCCTCACTGGACGCAGCCGTGGCTGCCAGCTTCAGCCCACGCTCAGTGTCTATCAGGCGTCTCAAGGCTGACGCCTCGGGATTCGCACCGGGCTTTGTGGCGTTGTCAGTGCTGCCCAAGCATCTTGTCTCCTACAGTATTGAATACGTGCACAGCTTCCACACGGGAGCCTTCGTATACTTCCTGACTGTACAGCCGGCCAGCGTGACAGATGATCCTAGTGCCCTGCACACACGCCTGGCACGGCTTAGCGCCACTGAGCCAGAGTTGGGTGACTATCGGGAGCTGGTCCTCGACTGCAGATTTGCTCCAAAACGCAGGCGCCGGGGGGCCCCAGAAGGCGGACAGCCCTACCCTGTGCTGCGGGTGGCCCACTCCGCTCCAGTGGGTGCCCAACTTGCCACTGAGCTGAGCATCGCCGAGGGCCAGGAAGTACTATTTGGGGTCTTTGTGACTGGCAAGGATGGTGGTCCTGGCGTGGGCCCCAACTCTGTCGTCTGTGCCTTCCCCATTGACCTGCTGGACACACTAATTGATGAGGGTGTGGAGCGCTGTTGTGAATCCCCAGTCCATCCAGGCCTCCGGCGAGGCCTCGACTTCTTCCAGTCGCCCAGTTTTTGCCCCAACCCGCCTGGCCTGGAAGCCCTCAGCCCCAACACCAGCTGCCGCCACTTCCCTCTGCTGGTCAGTAGCAGCTTCTCACGTGTGGACCTATTCAATGGGCTGTTGGGACCAGTACAGGTCACTGCATTGTATGTGACACGCCTTGACAACGTCACAGTGGCACACATGGGCACAATGGATGGGCGTATCCTGCAGGTGGAGCTGGTCAGGTCACTAAACTACTTGCTGTATGTGTCCAACTTCTCACTGGGTGACAGTGGGCAGCCCGTGCAGCGGGATGTCAGTCGTCTTGGGGACCACCTACTCTTTGCCTCTGGGGACCAGGTTTTCCAGGTACCTATCCAAGGCCCTGGCTGCCGCCACTTCCTGACCTGTGGGCGTTGCCTAAGGGCATGGCATTTCATGGGCTGTGGCTGGTGTGGGAACATGTGCGGCCAGCAGAAGGAGTGTCCTGGCTCCTGGCAACAGGACCACTGCCCACCTAAGCTTACTGAGTTCCACCCCCACAGTGGACCTCTAAGGGGCAGTACAAGGCTGACCCTGTGTGGCTCCAACTTCTACCTTCACCCTTCTGGTCTGGTGCCTGAGGGAACCCATCAGGTCACTGTGGGCCAAAGTCCCTGCCGGCCACTGCCCAAGGACAGCTCAAAACTCAGACCAGTGCCCCGGAAAGACTTTGTAGAGGAGTTTGAGTGTGAACTGGAGCCCTTGGGCACCCAGGCAGTGGGGCCTACCAACGTCAGCCTCACCGTGACTAACATGCCACCGGGCAAGCACTTCCGGGTAGACGGCACCTCCGTGCTGAGAGGCTTCTCTTTCATGGAGCCAGTGCTGATAGCAGTGCAACCCCTCTTTGGCCCACGGGCAGGAGGCACCTGTCTCACTCTTGAAGGCCAGAGTCTGTCTGTAGGCACCAGCCGGGCTGTGCTGGTCAATGGGACTGAGTGTCTGCTAGCACGGGTCAGTGAGGGGCAGCTTTTATGTGCCACACCCCCTGGGGCCACGGTGGCCAGTGTCCCCCTTAGCCTGCAGGTGGGGGGTGCCCAGGTACCTGGTTCCTGGACCTTCCAGTACAGAGAAGACCCTGTCGTGCTAAGCATCAGCCCCAACTGTGGCTACATCAACTCCCACATCACCATCTGTGGCCAGCATCTAACTTCAGCATGGCACTTAGTGCTGTCATTCCATGACGGGCTTAGGGCAGTGGAAAGCAGGTGTGAGAGGCAGCTTCCAGAGCAGCAGCTGTGCCGCCTTCCTGAATATGTGGTCCGAGACCCCCAGGGATGGGTGGCAGGGAATCTGAGTGCCCGAGGGGATGGAGCTGCTGGCTTTACACTGCCTGGCTTTCGCTTCCTACCCCCACCCCATCCACCCAGTGCCAACCTAGTTCCACTGAAGCCTGAGGAGCATGCCATTAAGTTTGAGTATATTGGGCTGGGCGCTGTGGCTGACTGTGTGGGTATCAACGTGACCGTGGGTGGTGAGAGCTGCCAGCACGAGTTCCGGGGGGACATGGTTGTCTGCCCCCTGCCCCCATCCCTGCAGCTTGGCCAGGATGGTGCCCCATTGCAGGTCTGCGTAGATGGTGAATGTCATATCCTGGGTAGAGTGGTGCGGCCAGGGCCAGATGGGGTCCCACAGAGCACGCTCCTTGGTATCCTGCTGCCTTTGCTGCTGCTTGTGGCTGCACTGGCGACTGCACTGGTCTTCAGCTACTGGTGGCGGAGGAAGCAGCTAGTTCTTCCTCCCAACCTGAATGACCTGGCATCCCTGGACCAGACTGCTGGAGCCACACCCCTGCCTATTCTGTACTCGGGCTCTGACTACAGAAGTGGCCTTGCACTCCCTGCCATTGATGGTCTGGATTCCACCACTTGTGTCCATGGAGCATCCTTCTCCGATAGTGAAGATGAATCCTGTGTGCCACTGCTGCGGAAAGAGTCCATCCAGCTAAGGGACCTGGACTCTGCGCTCTTGGCTGAGGTCAAGGATGTGCTGATTCCCCATGAGCGGGTGGTCACCCACAGTGACCGAGTCATTGGCAAAGGCCACTTTGGAGTTGTCTACCACGGAGAATACATAGACCAGGCCCAGAATCGAATCCAATGTGCCATCAAGTCACTAAGTCGCATCACAGAGATGCAGCAGGTGGAGGCCTTCCTGCGAGAGGGGCTGCTCATGCGTGGCCTGAACCACCCGAATGTGCTGGCTCTCATTGGTATCATGTTGCCACCTGAGGGCCTGCCCCATGTGCTGCTGCCCTATATGTGCCACGGTGACCTGCTCCAGTTCATCCGCTCACCTCAGCGGAACCCCACCGTGAAGGACCTCATCAGCTTTGGCCTGCAGGTAGCCCGCGGCATGGAGTACCTGGCAGAGCAGAAGTTTGTGCACAGGGACCTGGCTGCGCGGAACTGCATGCTGGACGAGTCATTCACAGTCAAGGTGGCTGACTTTGGTTTGGCCCGCGACATCCTGGACAGGGAGTACTATAGTGTTCAACAGCATCGCCACGCTCGCCTACCTGTGAAGTGGATGGCGCTGGAGAGCCTGCAGACCTATAGATTTACCACCAAGTCTGATGTGTGGTCATTTGGTGTGCTGCTGTGGGAACTGCTGACACGGGGTGCCCCACCATACCGCCACATTGACCCTTTTGACCTTACCCACTTCCTGGCCCAGGGTCGGCGCCTGCCCCAGCCTGAGTATTGCCCTGATTCTCTGTACCAAGTGATGCAGCAATGCTGGGAGGCAGACCCAGCAGTGCGACCCACCTTCAGAGTACTAGTGGGGGAGGTGGAGCAGATAGTGTCTGCACTGCTTGGGGACCATTATGTGCAGCTGCCAGCAACCTACATGAACTTGGGCCCCAGCACCTCGCATGAGATGAATGTGCGTCCAGAACAGCCGCAGTTCTCACCCATGCCAGGGAATGTACGCCGGCCCCGGCCACTCTCAGAGCCTCCTCGGCCCACTTGA"}, - {"NM_005228.3", "ATGCGACCCTCCGGGACGGCCGGGGCAGCGCTCCTGGCGCTGCTGGCTGCGCTCTGCCCGGCGAGTCGGGCTCTGGAGGAAAAGAAAGTTTGCCAAGGCACGAGTAACAAGCTCACGCAGTTGGGCACTTTTGAAGATCATTTTCTCAGCCTCCAGAGGATGTTCAATAACTGTGAGGTGGTCCTTGGGAATTTGGAAATTACCTATGTGCAGAGGAATTATGATCTTTCCTTCTTAAAGACCATCCAGGAGGTGGCTGGTTATGTCCTCATTGCCCTCAACACAGTGGAGCGAATTCCTTTGGAAAACCTGCAGATCATCAGAGGAAATATGTACTACGAAAATTCCTATGCCTTAGCAGTCTTATCTAACTATGATGCAAATAAAACCGGACTGAAGGAGCTGCCCATGAGAAATTTACAGGAAATCCTGCATGGCGCCGTGCGGTTCAGCAACAACCCTGCCCTGTGCAACGTGGAGAGCATCCAGTGGCGGGACATAGTCAGCAGTGACTTTCTCAGCAACATGTCGATGGACTTCCAGAACCACCTGGGCAGCTGCCAAAAGTGTGATCCAAGCTGTCCCAATGGGAGCTGCTGGGGTGCAGGAGAGGAGAACTGCCAGAAACTGACCAAAATCATCTGTGCCCAGCAGTGCTCCGGGCGCTGCCGTGGCAAGTCCCCCAGTGACTGCTGCCACAACCAGTGTGCTGCAGGCTGCACAGGCCCCCGGGAGAGCGACTGCCTGGTCTGCCGCAAATTCCGAGACGAAGCCACGTGCAAGGACACCTGCCCCCCACTCATGCTCTACAACCCCACCACGTACCAGATGGATGTGAACCCCGAGGGCAAATACAGCTTTGGTGCCACCTGCGTGAAGAAGTGTCCCCGTAATTATGTGGTGACAGATCACGGCTCGTGCGTCCGAGCCTGTGGGGCCGACAGCTATGAGATGGAGGAAGACGGCGTCCGCAAGTGTAAGAAGTGCGAAGGGCCTTGCCGCAAAGTGTGTAACGGAATAGGTATTGGTGAATTTAAAGACTCACTCTCCATAAATGCTACGAATATTAAACACTTCAAAAACTGCACCTCCATCAGTGGCGATCTCCACATCCTGCCGGTGGCATTTAGGGGTGACTCCTTCACACATACTCCTCCTCTGGATCCACAGGAACTGGATATTCTGAAAACCGTAAAGGAAATCACAGGGTTTTTGCTGATTCAGGCTTGGCCTGAAAACAGGACGGACCTCCATGCCTTTGAGAACCTAGAAATCATACGCGGCAGGACCAAGCAACATGGTCAGTTTTCTCTTGCAGTCGTCAGCCTGAACATAACATCCTTGGGATTACGCTCCCTCAAGGAGATAAGTGATGGAGATGTGATAATTTCAGGAAACAAAAATTTGTGCTATGCAAATACAATAAACTGGAAAAAACTGTTTGGGACCTCCGGTCAGAAAACCAAAATTATAAGCAACAGAGGTGAAAACAGCTGCAAGGCCACAGGCCAGGTCTGCCATGCCTTGTGCTCCCCCGAGGGCTGCTGGGGCCCGGAGCCCAGGGACTGCGTCTCTTGCCGGAATGTCAGCCGAGGCAGGGAATGCGTGGACAAGTGCAACCTTCTGGAGGGTGAGCCAAGGGAGTTTGTGGAGAACTCTGAGTGCATACAGTGCCACCCAGAGTGCCTGCCTCAGGCCATGAACATCACCTGCACAGGACGGGGACCAGACAACTGTATCCAGTGTGCCCACTACATTGACGGCCCCCACTGCGTCAAGACCTGCCCGGCAGGAGTCATGGGAGAAAACAACACCCTGGTCTGGAAGTACGCAGACGCCGGCCATGTGTGCCACCTGTGCCATCCAAACTGCACCTACGGATGCACTGGGCCAGGTCTTGAAGGCTGTCCAACGAATGGGCCTAAGATCCCGTCCATCGCCACTGGGATGGTGGGGGCCCTCCTCTTGCTGCTGGTGGTGGCCCTGGGGATCGGCCTCTTCATGCGAAGGCGCCACATCGTTCGGAAGCGCACGCTGCGGAGGCTGCTGCAGGAGAGGGAGCTTGTGGAGCCTCTTACACCCAGTGGAGAAGCTCCCAACCAAGCTCTCTTGAGGATCTTGAAGGAAACTGAATTCAAAAAGATCAAAGTGCTGGGCTCCGGTGCGTTCGGCACGGTGTATAAGGGACTCTGGATCCCAGAAGGTGAGAAAGTTAAAATTCCCGTCGCTATCAAGGAATTAAGAGAAGCAACATCTCCGAAAGCCAACAAGGAAATCCTCGATGAAGCCTACGTGATGGCCAGCGTGGACAACCCCCACGTGTGCCGCCTGCTGGGCATCTGCCTCACCTCCACCGTGCAGCTCATCACGCAGCTCATGCCCTTCGGCTGCCTCCTGGACTATGTCCGGGAACACAAAGACAATATTGGCTCCCAGTACCTGCTCAACTGGTGTGTGCAGATCGCAAAGGGCATGAACTACTTGGAGGACCGTCGCTTGGTGCACCGCGACCTGGCAGCCAGGAACGTACTGGTGAAAACACCGCAGCATGTCAAGATCACAGATTTTGGGCTGGCCAAACTGCTGGGTGCGGAAGAGAAAGAATACCATGCAGAAGGAGGCAAAGTGCCTATCAAGTGGATGGCATTGGAATCAATTTTACACAGAATCTATACCCACCAGAGTGATGTCTGGAGCTACGGGGTGACCGTTTGGGAGTTGATGACCTTTGGATCCAAGCCATATGACGGAATCCCTGCCAGCGAGATCTCCTCCATCCTGGAGAAAGGAGAACGCCTCCCTCAGCCACCCATATGTACCATCGATGTCTACATGATCATGGTCAAGTGCTGGATGATAGACGCAGATAGTCGCCCAAAGTTCCGTGAGTTGATCATCGAATTCTCCAAAATGGCCCGAGACCCCCAGCGCTACCTTGTCATTCAGGGGGATGAAAGAATGCATTTGCCAAGTCCTACAGACTCCAACTTCTACCGTGCCCTGATGGATGAAGAAGACATGGACGACGTGGTGGATGCCGACGAGTACCTCATCCCACAGCAGGGCTTCTTCAGCAGCCCCTCCACGTCACGGACTCCCCTCCTGAGCTCTCTGAGTGCAACCAGCAACAATTCCACCGTGGCTTGCATTGATAGAAATGGGCTGCAAAGCTGTCCCATCAAGGAAGACAGCTTCTTGCAGCGATACAGCTCAGACCCCACAGGCGCCTTGACTGAGGACAGCATAGACGACACCTTCCTCCCAGTGCCTGAATACATAAACCAGTCCGTTCCCAAAAGGCCCGCTGGCTCTGTGCAGAATCCTGTCTATCACAATCAGCCTCTGAACCCCGCGCCCAGCAGAGACCCACACTACCAGGACCCCCACAGCACTGCAGTGGGCAACCCCGAGTATCTCAACACTGTCCAGCCCACCTGTGTCAACAGCACATTCGACAGCCCTGCCCACTGGGCCCAGAAAGGCAGCCACCAAATTAGCCTGGACAACCCTGACTACCAGCAGGACTTCTTTCCCAAGGAAGCCAAGCCAAATGGCATCTTTAAGGGCTCCACAGCTGAAAATGCAGAATACCTAAGGGTCGCGCCACAAAGCAGTGAATTTATTGGAGCATGA"}, - {"NM_005922.2", "ATGAGAGAAGCCGCTGCCGCGCTGGTCCCTCCTCCCGCCTTTGCCGTCACGCCTGCCGCCGCCATGGAGGAGCCGCCGCCACCGCCGCCGCCGCCACCACCGCCACCGGAACCCGAGACCGAGTCAGAACCCGAGTGCTGCTTGGCGGCGAGGCAAGAGGGCACATTGGGAGATTCAGCTTGCAAGAGTCCTGAATCTGATCTAGAAGACTTCTCCGATGAAACAAATACAGAGAATCTTTATGGTACCTCTCCCCCCAGCACACCTCGACAGATGAAACGCATGTCAACCAAACATCAGAGGAATAATGTGGGGAGGCCAGCCAGTCGGTCTAATTTGAAAGAAAAAATGAATGCACCAAATCAGCCTCCACATAAAGACACTGGAAAAACAGTGGAGAATGTGGAAGAATACAGCTATAAGCAGGAGAAAAAGATCCGAGCAGCTCTTAGAACAACAGAGCGTGATCATAAAAAAAATGTACAGTGCTCATTCATGTTAGACTCAGTGGGTGGATCTTTGCCAAAAAAATCAATTCCAGATGTGGATCTCAATAAGCCTTACCTCAGCCTTGGCTGTAGCAATGCTAAGCTTCCAGTATCTGTGCCCATGCCTATAGCCAGACCTGCACGCCAGACTTCTAGGACTGACTGTCCAGCAGATCGTTTAAAGTTTTTTGAAACTTTACGACTTTTGCTAAAGCTTACCTCAGTCTCAAAGAAAAAAGACAGGGAGCAAAGAGGACAAGAAAATACGTCTGGTTTCTGGCTTAACCGATCTAACGAACTGATCTGGTTAGAGCTACAAGCCTGGCATGCAGGACGGACAATTAACGACCAGGACTTCTTTTTATATACAGCCCGTCAAGCCATCCCAGATATTATTAATGAAATCCTTACTTTCAAAGTCGACTATGGGAGCTTCGCCTTTGTTAGAGATAGAGCTGGTTTTAATGGTACTTCAGTAGAAGGGCAGTGCAAAGCCACTCCTGGAACAAAGATTGTAGGTTACTCAACACATCATGAGCATCTCCAACGCCAGAGGGTCTCATTTGAGCAGGTAAAACGGATAATGGAGCTGCTAGAGTACATAGAAGCACTTTATCCATCATTGCAGGCTCTTCAGAAGGACTATGAAAAATATGCTGCAAAAGACTTCCAGGACAGGGTGCAGGCACTCTGTTTGTGGTTAAACATCACAAAAGACTTAAATCAGAAATTAAGGATTATGGGCACTGTTTTGGGCATCAAGAATTTATCAGACATTGGCTGGCCAGTGTTTGAAATCCCTTCCCCTCGACCATCCAAAGGTAATGAGCCGGAGTATGAGGGTGATGACACAGAAGGAGAATTAAAGGAGTTGGAAAGTAGTACGGATGAGAGTGAAGAAGAACAAATCTCTGATCCTAGGGTACCGGAAATCAGACAGCCCATAGATAACAGCTTCGACATCCAGTCGCGGGACTGCATATCCAAGAAGCTTGAGAGGCTCGAATCTGAGGATGATTCTCTTGGCTGGGGAGCACCAGACTGGAGCACAGAAGCAGGCTTTAGTAGACATTGTCTGACTTCTATTTATAGACCATTTGTAGACAAAGCACTGAAGCAGATGGGGTTAAGAAAGTTAATTTTAAGACTTCACAAGCTAATGGATGGTTCCTTGCAAAGGGCACGTATAGCATTGGTAAAGAACGATCGTCCAGTGGAGTTTTCTGAATTTCCAGATCCCATGTGGGGTTCAGATTATGTGCAGTTGTCAAGGACACCACCTTCATCTGAGGAGAAATGCAGTGCTGTGTCGTGGGAGGAGCTGAAGGCCATGGATTTACCTTCATTCGAACCTGCCTTCCTAGTTCTCTGCCGAGTCCTTCTGAATGTCATACATGAGTGTCTGAAGTTAAGATTGGAGCAGAGACCTGCTGGAGAACCATCTCTCTTGAGTATTAAGCAGCTGGTGAGAGAGTGTAAGGAGGTCCTGAAGGGCGGCCTGCTGATGAAGCAGTACTACCAGTTCATGCTGCAGGAGGTTCTGGAGGACTTGGAGAAGCCCGACTGCAACATTGACGCTTTTGAAGAGGATCTACATAAAATGCTTATGGTGTATTTTGATTACATGAGAAGCTGGATCCAAATGCTACAGCAATTACCTCAAGCATCGCATAGTTTAAAAAATCTGTTAGAAGAAGAATGGAATTTCACCAAAGAAATAACTCATTACATACGGGGAGGAGAAGCACAGGCCGGGAAGCTTTTCTGTGACATTGCAGGAATGCTGCTGAAATCTACAGGAAGTTTTTTAGAATTTGGCTTACAGGAGAGCTGTGCTGAATTTTGGACTAGTGCGGATGACAGCAGTGCTTCCGACGAAATCAGGAGGTCTGTTATAGAGATCAGTCGAGCCCTGAAGGAGCTCTTCCATGAAGCCAGAGAAAGGGCTTCCAAAGCACTTGGATTTGCTAAAATGTTGAGAAAGGACCTGGAAATAGCAGCAGAATTCAGGCTTTCAGCCCCAGTTAGAGACCTCCTGGATGTTCTGAAATCAAAACAGTATGTCAAGGTGCAAATTCCTGGGTTAGAAAACTTGCAAATGTTTGTTCCAGACACTCTTGCTGAGGAGAAGAGTATTATTTTGCAGTTACTCAATGCAGCTGCAGGAAAGGACTGTTCAAAAGATTCAGATGACGTACTCATCGATGCCTATCTGCTTCTGACCAAGCACGGTGATCGAGCCCGTGATTCAGAGGACAGCTGGGGCACCTGGGAGGCACAGCCTGTCAAAGTCGTGCCTCAGGTGGAGACTGTTGACACCCTGAGAAGCATGCAGGTGGATAATCTTTTACTAGTTGTCATGCAGTCTGCGCATCTCACAATTCAGAGAAAAGCTTTCCAGCAGTCCATTGAGGGACTTATGACTCTGTGCCAGGAGCAGACATCCAGTCAGCCGGTCATCGCCAAAGCTTTGCAGCAGCTGAAGAATGATGCATTGGAGCTATGCAACAGGATAAGCAATGCCATTGACCGCGTGGACCACATGTTCACATCAGAATTTGATGCTGAGGTTGATGAATCTGAATCTGTCACCTTGCAACAGTACTACCGAGAAGCAATGATTCAGGGGTACAATTTTGGATTTGAGTATCATAAAGAAGTTGTTCGTTTGATGTCTGGGGAGTTTAGACAGAAGATAGGAGACAAATATATAAGCTTTGCCCGGAAGTGGATGAATTATGTCCTGACTAAATGTGAGAGTGGTAGAGGTACAAGACCCAGGTGGGCGACTCAAGGATTTGATTTTCTACAAGCAATTGAACCTGCCTTTATTTCAGCTTTACCAGAAGATGACTTCTTGAGTTTACAAGCCTTGATGAATGAATGCATTGGCCATGTCATAGGAAAACCACACAGTCCTGTTACAGGTTTGTACCTTGCCATTCATCGGAACAGCCCCCGTCCTATGAAGGTACCTCGATGCCATAGTGACCCTCCTAACCCACACCTCATTATCCCCACTCCAGAGGGATTCAGCACTCGGAGCATGCCTTCCGACGCGCGGAGCCATGGCAGCCCTGCTGCTGCTGCTGCTGCTGCTGCTGCTGCTGTTGCTGCCAGTCGGCCCAGCCCCTCTGGTGGTGACTCTGTGCTGCCCAAATCCATCAGCAGTGCCCATGATACCAGGGGTTCCAGCGTTCCTGAAAATGATCGATTGGCTTCCATAGCTGCTGAATTGCAGTTTAGGTCCCTGAGTCGTCACTCAAGCCCCACGGAGGAGCGAGATGAACCAGCATATCCAAGAGGAGATTCAAGTGGGTCCACAAGAAGAAGTTGGGAACTTCGGACACTAATCAGCCAGAGTAAAGATACTGCTTCTAAACTAGGACCCATAGAAGCTATCCAGAAGTCAGTCCGATTGTTTGAAGAAAAGAGGTACCGAGAAATGAGGAGAAAGAATATCATTGGTCAAGTTTGTGATACGCCTAAGTCCTATGATAATGTTATGCACGTTGGCTTGAGGAAGGTGACCTTCAAATGGCAAAGAGGAAACAAAATTGGAGAAGGCCAGTATGGGAAGGTGTACACCTGCATCAGCGTCGACACCGGGGAGCTGATGGCCATGAAAGAGATTCGATTTCAACCTAATGACCATAAGACTATCAAGGAAACTGCAGACGAATTGAAAATATTCGAAGGCATCAAACACCCCAATCTGGTTCGGTATTTTGGTGTGGAGCTCCATAGAGAAGAAATGTACATCTTCATGGAGTACTGCGATGAGGGGACTTTAGAAGAGGTGTCAAGGCTGGGACTTCAGGAACATGTGATTAGGCTGTATTCAAAGCAGATCACCATTGCGATCAACGTCCTCCATGAGCATGGCATAGTCCACCGTGACATTAAAGGTGCCAATATCTTCCTTACCTCATCTGGATTAATCAAACTGGGAGATTTTGGATGTTCAGTAAAGCTCAAAAACAATGCCCAGACCATGCCTGGTGAAGTGAACAGCACCCTGGGGACAGCAGCATACATGGCACCTGAAGTCATCACTCGTGCCAAAGGAGAGGGCCATGGGCGTGCGGCCGACATCTGGAGTCTGGGGTGTGTTGTCATAGAGATGGTGACTGGCAAGAGGCCTTGGCATGAGTATGAGCACAACTTTCAAATTATGTATAAAGTGGGGATGGGACATAAGCCACCAATCCCTGAAAGATTAAGCCCTGAAGGAAAGGACTTCCTTTCTCACTGCCTTGAGAGTGACCCAAAGATGAGATGGACCGCCAGCCAGCTCCTCGACCATTCGTTTGTCAAGGTTTGCACAGATGAAGAATGA"}, - {"NM_006724.2", "ATGAGAGAAGCCGCTGCCGCGCTGGTCCCTCCTCCCGCCTTTGCCGTCACGCCTGCCGCCGCCATGGAGGAGCCGCCGCCACCGCCGCCGCCGCCACCACCGCCACCGGAACCCGAGACCGAGTCAGAACCCGAGTGCTGCTTGGCGGCGAGGCAAGAGGGCACATTGGGAGATTCAGCTTGCAAGAGTCCTGAATCTGATCTAGAAGACTTCTCCGATGAAACAAATACAGAGAATCTTTATGGTACCTCTCCCCCCAGCACACCTCGACAGATGAAACGCATGTCAACCAAACATCAGAGGAATAATGTGGGGAGGCCAGCCAGTCGGTCTAATTTGAAAGAAAAAATGAATGCACCAAATCAGCCTCCACATAAAGACACTGGAAAAACAGTGGAGAATGTGGAAGAATACAGCTATAAGCAGGAGAAAAAGATCCGAGCAGCTCTTAGAACAACAGAGCGTGATCATAAAAAAAATGTACAGTGCTCATTCATGTTAGACTCAGTGGGTGGATCTTTGCCAAAAAAATCAATTCCAGATGTGGATCTCAATAAGCCTTACCTCAGCCTTGGCTGTAGCAATGCTAAGCTTCCAGTATCTGTGCCCATGCCTATAGCCAGACCTGCACGCCAGACTTCTAGGACTGACTGTCCAGCAGATCGTTTAAAGTTTTTTGAAACTTTACGACTTTTGCTAAAGCTTACCTCAGTCTCAAAGAAAAAAGACAGGGAGCAAAGAGGACAAGAAAATACGTCTGGTTTCTGGCTTAACCGATCTAACGAACTGATCTGGTTAGAGCTACAAGCCTGGCATGCAGGACGGACAATTAACGACCAGGACTTCTTTTTATATACAGCCCGTCAAGCCATCCCAGATATTATTAATGAAATCCTTACTTTCAAAGTCGACTATGGGAGCTTCGCCTTTGTTAGAGATAGAGCTGGTTTTAATGGTACTTCAGTAGAAGGGCAGTGCAAAGCCACTCCTGGAACAAAGATTGTAGGTTACTCAACACATCATGAGCATCTCCAACGCCAGAGGGTCTCATTTGAGCAGGTAAAACGGATAATGGAGCTGCTAGAGTACATAGAAGCACTTTATCCATCATTGCAGGCTCTTCAGAAGGACTATGAAAAATATGCTGCAAAAGACTTCCAGGACAGGGTGCAGGCACTCTGTTTGTGGTTAAACATCACAAAAGACTTAAATCAGAAATTAAGGATTATGGGCACTGTTTTGGGCATCAAGAATTTATCAGACATTGGCTGGCCAGTGTTTGAAATCCCTTCCCCTCGACCATCCAAAGGTAATGAGCCGGAGTATGAGGGTGATGACACAGAAGGAGAATTAAAGGAGTTGGAAAGTAGTACGGATGAGAGTGAAGAAGAACAAATCTCTGATCCTAGGGTACCGGAAATCAGACAGCCCATAGATAACAGCTTCGACATCCAGTCGCGGGACTGCATATCCAAGAAGCTTGAGAGGCTCGAATCTGAGGATGATTCTCTTGGCTGGGGAGCACCAGACTGGAGCACAGAAGCAGGCTTTAGTAGACATTGTCTGACTTCTATTTATAGACCATTTGTAGACAAAGCACTGAAGCAGATGGGGTTAAGAAAGTTAATTTTAAGACTTCACAAGCTAATGGATGGTTCCTTGCAAAGGGCACGTATAGCATTGGTAAAGAACGATCGTCCAGTGGAGTTTTCTGAATTTCCAGATCCCATGTGGGGTTCAGATTATGTGCAGTTGTCAAGGACACCACCTTCATCTGAGGAGAAATGCAGTGCTGTGTCGTGGGAGGAGCTGAAGGCCATGGATTTACCTTCATTCGAACCTGCCTTCCTAGTTCTCTGCCGAGTCCTTCTGAATGTCATACATGAGTGTCTGAAGTTAAGATTGGAGCAGAGACCTGCTGGAGAACCATCTCTCTTGAGTATTAAGCAGCTGGTGAGAGAGTGTAAGGAGGTCCTGAAGGGCGGCCTGCTGATGAAGCAGTACTACCAGTTCATGCTGCAGGAGGTTCTGGAGGACTTGGAGAAGCCCGACTGCAACATTGACGCTTTTGAAGAGGATCTACATAAAATGCTTATGGTGTATTTTGATTACATGAGAAGCTGGATCCAAATGCTACAGCAATTACCTCAAGCATCGCATAGTTTAAAAAATCTGTTAGAAGAAGAATGGAATTTCACCAAAGAAATAACTCATTACATACGGGGAGGAGAAGCACAGGCCGGGAAGCTTTTCTGTGACATTGCAGGAATGCTGCTGAAATCTACAGGAAGTTTTTTAGAATTTGGCTTACAGGAGAGCTGTGCTGAATTTTGGACTAGTGCGGATGACAGCAGTGCTTCCGACGAAATCAGGAGGTCTGTTATAGAGATCAGTCGAGCCCTGAAGGAGCTCTTCCATGAAGCCAGAGAAAGGGCTTCCAAAGCACTTGGATTTGCTAAAATGTTGAGAAAGGACCTGGAAATAGCAGCAGAATTCAGGCTTTCAGCCCCAGTTAGAGACCTCCTGGATGTTCTGAAATCAAAACAGTATGTCAAGGTGCAAATTCCTGGGTTAGAAAACTTGCAAATGTTTGTTCCAGACACTCTTGCTGAGGAGAAGAGTATTATTTTGCAGTTACTCAATGCAGCTGCAGGAAAGGACTGTTCAAAAGATTCAGATGACGTACTCATCGATGCCTATCTGCTTCTGACCAAGCACGGTGATCGAGCCCGTGATTCAGAGGACAGCTGGGGCACCTGGGAGGCACAGCCTGTCAAAGTCGTGCCTCAGGTGGAGACTGTTGACACCCTGAGAAGCATGCAGGTGGATAATCTTTTACTAGTTGTCATGCAGTCTGCGCATCTCACAATTCAGAGAAAAGCTTTCCAGCAGTCCATTGAGGGACTTATGACTCTGTGCCAGGAGCAGACATCCAGTCAGCCGGTCATCGCCAAAGCTTTGCAGCAGCTGAAGAATGATGCATTGGAGCTATGCAACAGGATAAGCAATGCCATTGACCGCGTGGACCACATGTTCACATCAGAATTTGATGCTGAGGTTGATGAATCTGAATCTGTCACCTTGCAACAGTACTACCGAGAAGCAATGATTCAGGGGTACAATTTTGGATTTGAGTATCATAAAGAAGTTGTTCGTTTGATGTCTGGGGAGTTTAGACAGAAGATAGGAGACAAATATATAAGCTTTGCCCGGAAGTGGATGAATTATGTCCTGACTAAATGTGAGAGTGGTAGAGGTACAAGACCCAGGTGGGCGACTCAAGGATTTGATTTTCTACAAGCAATTGAACCTGCCTTTATTTCAGCTTTACCAGAAGATGACTTCTTGAGTTTACAAGCCTTGATGAATGAATGCATTGGCCATGTCATAGGAAAACCACACAGTCCTGTTACAGGTTTGTACCTTGCCATTCATCGGAACAGCCCCCGTCCTATGAAGGTACCTCGATGCCATAGTGACCCTCCTAACCCACACCTCATTATCCCCACTCCAGAGGGATTCAGGGGTTCCAGCGTTCCTGAAAATGATCGATTGGCTTCCATAGCTGCTGAATTGCAGTTTAGGTCCCTGAGTCGTCACTCAAGCCCCACGGAGGAGCGAGATGAACCAGCATATCCAAGAGGAGATTCAAGTGGGTCCACAAGAAGAAGTTGGGAACTTCGGACACTAATCAGCCAGAGTAAAGATACTGCTTCTAAACTAGGACCCATAGAAGCTATCCAGAAGTCAGTCCGATTGTTTGAAGAAAAGAGGTACCGAGAAATGAGGAGAAAGAATATCATTGGTCAAGTTTGTGATACGCCTAAGTCCTATGATAATGTTATGCACGTTGGCTTGAGGAAGGTGACCTTCAAATGGCAAAGAGGAAACAAAATTGGAGAAGGCCAGTATGGGAAGGTGTACACCTGCATCAGCGTCGACACCGGGGAGCTGATGGCCATGAAAGAGATTCGATTTCAACCTAATGACCATAAGACTATCAAGGAAACTGCAGACGAATTGAAAATATTCGAAGGCATCAAACACCCCAATCTGGTTCGGTATTTTGGTGTGGAGCTCCATAGAGAAGAAATGTACATCTTCATGGAGTACTGCGATGAGGGGACTTTAGAAGAGGTGTCAAGGCTGGGACTTCAGGAACATGTGATTAGGCTGTATTCAAAGCAGATCACCATTGCGATCAACGTCCTCCATGAGCATGGCATAGTCCACCGTGACATTAAAGGTGCCAATATCTTCCTTACCTCATCTGGATTAATCAAACTGGGAGATTTTGGATGTTCAGTAAAGCTCAAAAACAATGCCCAGACCATGCCTGGTGAAGTGAACAGCACCCTGGGGACAGCAGCATACATGGCACCTGAAGTCATCACTCGTGCCAAAGGAGAGGGCCATGGGCGTGCGGCCGACATCTGGAGTCTGGGGTGTGTTGTCATAGAGATGGTGACTGGCAAGAGGCCTTGGCATGAGTATGAGCACAACTTTCAAATTATGTATAAAGTGGGGATGGGACATAAGCCACCAATCCCTGAAAGATTAAGCCCTGAAGGAAAGGACTTCCTTTCTCACTGCCTTGAGAGTGACCCAAAGATGAGATGGACCGCCAGCCAGCTCCTCGACCATTCGTTTGTCAAGGTTTGCACAGATGAAGAATGA"}, - {"NM_019063.3", "ATGGACGGTTTCGCCGGCAGTCTCGATGATAGTATTTCTGCTGCAAGTACTTCTGATGTTCAAGATCGCCTGTCAGCTCTTGAGTCACGAGTTCAGCAACAAGAAGATGAAATCACTGTGCTAAAGGCGGCTTTGGCTGATGTTTTGAGGCGTCTTGCAATCTCTGAAGATCATGTGGCCTCAGTGAAAAAATCAGTCTCAAGTAAAGGCCAACCAAGCCCTCGAGCAGTTATTCCCATGTCCTGTATAACCAATGGAAGTGGTGCAAACAGAAAACCAAGTCATACCAGTGCTGTCTCAATTGCAGGAAAAGAAACTCTTTCATCTGCTGCTAAAAGTGGTACAGAAAAAAAGAAAGAAAAACCACAAGGACAGAGAGAAAAAAAAGAGGAATCTCATTCTAATGATCAAAGTCCACAAATTCGAGCATCACCTTCTCCCCAGCCCTCTTCACAACCTCTCCAAATACACAGACAAACTCCAGAAAGCAAGAATGCTACTCCCACCAAAAGCATAAAACGACCATCACCAGCTGAAAAGTCACATAATTCTTGGGAAAATTCAGATGATAGCCGTAATAAATTGTCGAAAATACCTTCAACACCCAAATTAATACCAAAAGTTACCAAAACTGCAGACAAGCATAAAGATGTCATCATCAACCAAGAAGGAGAATATATTAAAATGTTTATGCGCGGTCGGCCAATTACCATGTTCATTCCTTCCGATGTTGACAACTATGATGACATCAGAACGGAACTGCCTCCTGAGAAGCTCAAACTGGAGTGGGCATATGGTTATCGAGGAAAGGACTGTAGAGCTAATGTTTACCTTCTTCCGACCGGGGAAATAGTTTATTTCATTGCATCAGTAGTAGTACTATTTAATTATGAGGAGAGAACTCAGCGACACTACCTGGGCCATACAGACTGTGTGAAATGCCTTGCTATACATCCTGACAAAATTAGGATTGCAACTGGACAGATAGCTGGCGTGGATAAAGATGGAAGGCCTCTACAACCCCACGTCAGAGTGTGGGATTCTGTTACTCTATCCACACTGCAGATTATTGGACTTGGCACTTTTGAGCGTGGAGTAGGATGCCTGGATTTTTCAAAAGCAGATTCAGGTGTTCATTTATGTGTTATTGATGACTCCAATGAGCATATGCTTACTGTATGGGACTGGCAGAAGAAAGCAAAAGGAGCAGAAATAAAGACAACAAATGAAGTTGTTTTGGCTGTGGAGTTTCACCCAACAGATGCAAATACCATAATTACATGCGGTAAATCTCATATTTTCTTCTGGACCTGGAGCGGCAATTCACTAACAAGAAAACAGGGAATTTTTGGGAAATATGAAAAGCCAAAATTTGTGCAGTGTTTAGCATTCTTGGGGAATGGAGATGTTCTTACTGGAGACTCAGGTGGAGTCATGCTTATATGGAGCAAAACTACTGTAGAGCCCACACCTGGGAAAGGACCTAAAGGTGTATATCAAATCAGCAAACAAATCAAAGCTCATGATGGCAGTGTGTTCACACTTTGTCAGATGAGAAATGGGATGTTATTAACTGGAGGAGGGAAAGACAGAAAAATAATTCTGTGGGATCATGATCTGAATCCTGAAAGAGAAATAGAGGTTCCTGATCAGTATGGCACAATCAGAGCTGTAGCAGAAGGAAAGGCAGATCAATTTTTAGTAGGCACATCACGAAACTTTATTTTACGAGGAACATTTAATGATGGCTTCCAAATAGAAGTACAGGGTCATACAGATGAGCTTTGGGGTCTTGCCACACATCCCTTCAAAGATTTGCTCTTGACATGTGCTCAGGACAGGCAGGTGTGCCTGTGGAACTCAATGGAACACAGGCTGGAATGGACCAGGCTGGTAGATGAACCAGGACACTGTGCAGATTTTCATCCAAGTGGCACAGTGGTGGCCATAGGAACGCACTCAGGCAGGTGGTTTGTTCTGGATGCAGAAACCAGAGATCTAGTTTCTATCCACACAGACGGGAATGAACAGCTCTCTGTGATGCGCTACTCAATAGATGGTACCTTCCTGGCTGTAGGATCTCATGACAACTTTATTTACCTCTATGTAGTCTCTGAAAATGGAAGAAAATATAGCAGATATGGAAGGTGCACTGGACATTCCAGCTACATCACACACCTTGACTGGTCCCCAGACAACAAGTATATAATGTCTAACTCGGGAGACTATGAAATATTGTACTGGGACATTCCAAATGGCTGCAAACTAATCAGGAATCGATCGGATTGTAAGGACATTGATTGGACGACATATACCTGTGTGCTAGGATTTCAAGTATTTGGTGTCTGGCCAGAAGGATCTGATGGGACAGATATCAATGCACTGGTGCGATCCCACAATAGAAAGGTGATAGCTGTTGCCGATGACTTTTGTAAAGTCCATCTGTTTCAGTATCCCTGCTCCAAAGCAAAGGCTCCCAGTCACAAGTACAGTGCCCACAGCAGCCATGTCACCAATGTCAGTTTTACTCACAATGACAGTCACCTGATATCAACTGGTGGAAAAGACATGAGCATCATTCAGTGGAAACTTGTGGAAAAGTTATCTTTGCCTCAGAATGAGACTGTAGCGGATACTACTCTAACCAAAGCCCCCGTCTCTTCCACTGAAAGTGTCATCCAATCTAATACTCCCACACCGCCTCCTTCTCAGCCCTTAAATGAGACAGCTGAAGAGGAAAGTAGAATAAGCAGTTCTCCCACACTTCTGGAGAACAGCCTGGAACAAACTGTGGAGCCAAGTGAAGACCACAGCGAGGAGGAGAGTGAAGAGGGCAGCGGAGACCTTGGTGAGCCTCTTTATGAAGAGCCATGCAACGAGATAAGCAAGGAGCAGGCCAAAGCCACCCTTCTGGAGGACCAGCAAGACCCTTCGCCCTCGTCCTAA"}, - {"NM_175741.1", "ATGGCTTCAGATGGAGCATCTGCATTGCCGGGACCGGATATGAGCATGAAACCTAGTGCCGCCCTGTCTCCATCCCCTGCACTTCCCTTTCTCCCACCAACTTCTGACCCACCAGACCACCCACCCAGGGAGCCACCTCCACAGCCCATCATGCCTTCAGTATTCTCTCCAGACAACCCTCTGATGCTCTCTGCTTTCCCCAGCTCACTGTTGGTGACAGGGGACGGGGGCCCTTGCCTCAGTGGGGCTGGGGCTGGCAAGGTCATTGTCAAAGTCAAGACAGAAGGGGGGTCAGCTGAGCCCTCTCAAACTCAGAACTTTATCCTTACTCAGACTGCCCTCAATTCGACTGCCCCGGGCACTCCCTGTGGAGGCCTTGAGGGTCCTGCACCTCCATTTGTGACAGCATCTAATGTGAAGACCATTCTGCCCTCTAAGGCTGTTGGTGTCAGCCAGGAGGGTCCTCCAGGCCTTCCGCCTCAGCCTCCACCACCAGTTGCTCAACTGGTCCCCATTGTGCCCCTGGAAAAAGCTTGGCCAGGGCCACATGGGACAACCGGGGAAGGAGGTCCTGTGGCCACTCTATCCAAGCCTTCCCTAGGTGACCGCTCCAAAATTTCCAAGGACGTTTATGAGAACTTCCGTCAGTGGCAGCGTTACAAAGCCTTGGCCCGGAGGCACCTATCCCAGAGTCCTGACACAGAAGCTCTTTCCTGTTTTCTTATCCCAGTGCTTCGTTCCCTGGCCCGGCTGAAGCCCACTATGACCCTGGAGGAGGGACTGCCATTGGCTGTGCAGGAGTGGGAGCACACCAGCAACTTTGACCGGATGATCTTTTATGAGATGGCAGAAAGGTTCATGGAGTTTGAGGCTGAGGAGATGCAGATTCAGAACACACAGCTGATGAATGGGTCTCAGGGCCTGTCTCCTGCAACCCCTTTGAAACTTGATCCTCTAGGGCCCCTGGCCTCTGAGGTTTGCCAGCAGCCAGTGTACATTCCGAAGAAGGCAGCCTCCAAGACACGGGCCCCCCGCCGGCGTCAGCGTAAAGCCCAGAGACCTCCTGCTCCTGAGGCACCCAAGGAGATCCCACCAGAAGCTGTGAAGGAGTATGTTGACATCATGGAATGGCTGGTGGGGACTCACTTGGCCACTGGGGAGTCAGATGGAAAACAAGAGGAAGAAGGGCAGCAGCAGGAGGAGGAAGGGATGTATCCAGATCCAGGTCTCCTGAGCTACATCAATGAGCTGTGTTCTCAGAAGGTCTTTGTCTCCAAGGTGGAGGCTGTCATTCACCCTCAATTTCTGGCAGATCTGCTGTCCCCAGAAAAACAGAGAGATCCCTTGGCCTTAATTGAGGAGCTAGAGCAAGAAGAAGGACTCACTCTTGCCCAGCTGGTCCAGAAGCGACTCATGGCCTTGGAAGAGGAGGAAGATGCAGAGGCGCCTCCAAGTTTCAGTGGCGCTCAGTTGGACTCAAGTCCTTCTGGTTCTGTTGAGGATGAAGATGGGGATGGGCGGCTTCGGCCCTCACCTGGGCTTCAGGGGGCTGGGGGCGCCGCTTGCCTTGGAAAGGTTTCTTCTTCAGGAAAACGGGCAAGAGAAGTGCATGGTGGGCAGGAGCAAGCCCTAGATAGCCCCAGAGGGATGCACAGGGATGGGAACACTCTGCCATCCCCCAGCAGCTGGGACCTGCAGCCAGAACTTGCAGCTCCACAGGGAACTCCGGGACCCTTGGGTGTGGAGAGGAGAGGGTCTGGGAAGGTTATAAACCAGGTATCTCTACATCAGGATGGCCATCTAGGAGGCGCTGGGCCTCCTGGGCACTGCCTGGTGGCTGATAGGACTTCAGAGGCTCTGCCCCTTTGTTGGCAGGGAGGCTTCCAGCCTGAGAGCACTCCCAGTTTGGATGCTGGACTTGCAGAGCTGGCTCCTCTGCAAGGACAAGGGTTAGAAAAGCAAGTCCTGGGATTGCAGAAAGGACAACAAACAGGGGGTCGTGGAGTGCTTCCTCAAGGGAAGGAGCCTTTAGCAGTGCCCTGGGAAGGCTCTTCAGGAGCCATGTGGGGAGATGACAGAGGTACCCCCATGGCTCAGAGTTATGATCAGAATCCTTCCCCTAGAGCAGCTGGGGAGAGGGACGATGTCTGTCTCAGCCCAGGAGTTTGGCTGAGCAGTGAGATGGATGCTGTAGGCTTGGAGCTGCCTGTACAAATAGAGGAGGTCATAGAGAGCTTCCAAGTTGAGAAGTGTGTAACTGAGTATCAGGAAGGCTGCCAGGGACTGGGCTCCAGGGGCAACATTTCCCTGGGTCCTGGAGAAACCCTAGTACCTGGGGATACGGAGAGCAGTGTGATTCCCTGTGGAGGCACAGTTGCGGCAGCTGCCCTAGAAAAGAGAAACTATTGCAGCTTGCCAGGACCTTTGAGGGCCAACAGCCCACCCTTGAGGTCCAAAGAAAATCAAGAACAGAGCTGTGAAACCGTAGGGCATCCCAGTGATCTGTGGGCAGAAGGTTGCTTCCCATTGCTAGAAAGTGGTGATTCCACACTGGGGTCTTCCAAAGAAACCCTTCCACCCACATGCCAAGGCAATCTCCTTATCATGGGGACTGAGGATGCCTCCTCCTTGCCTGAAGCCAGTCAAGAGGCAGGGAGCAGAGGCAATTCCTTTTCTCCTCTGTTGGAAACCATAGAACCTGTCAACATACTAGATGTTAAAGATGACTGTGGCCTCCAACTAAGGGTCAGCGAGGACACCTGCCCACTGAATGTTCATTCTTATGACCCCCAAGGAGAAGGCAGGGTGGATCCTGATCTGTCCAAGCCTAAAAACCTTGCTCCTTTACAAGAGAGTCAGGAGTCTTACACAACTGGGACTCCCAAAGCAACATCTTCTCACCAGGGCCTTGGAAGCACTTTGCCTAGAAGGGGAACCAGGAATGCCATAGTTCCGAGAGAAACTTCTGTTAGTAAAACACACAGGTCAGCAGACAGGGCCAAAGGAAAGGAGAAAAAGAAAAAGGAAGCAGAGGAAGAGGATGAGGAACTCTCCAACTTTGCTTACCTCTTGGCCTCTAAACTTAGCCTCTCACCAAGGGAGCATCCCCTCAGTCCTCACCATGCCTCAGGAGGTCAGGGCAGCCAGAGAGCATCCCACCTGCTCCCTGCTGGAGCAAAAGGCCCCAGCAAACTTCCATATCCTGTTGCCAAGTCTGGGAAGCGAGCTCTAGCTGGAGGTCCAGCCCCTACTGAAAAGACACCCCACTCAGGAGCTCAACTTGGGGTCCCCAGGGAGAAACCCCTAGCTCTGGGAGTAGTTCGACCCTCACAGCCTCGTAAAAGGCGGTGTGACAGTTTTGTCACGGGCAGAAGGAAGAAACGACGTCGTAGCCAGTAG"}, - {"NM_001244937.1", "ATGGAGCTCCTCCCGCCGCTGCCTCAGTCCTTCCTGTTGCTGCTGCTGTTGCCTGCCAAGCCCGCGGCGGGCGAGGACTGGCAGTGCCCGCGCACCCCCTACGCGGCCTCTCGCGACTTTGACGTGAAGTACGTGGTGCCCAGCTTCTCCGCCGGAGGCCTGGTACAGGCCATGGTGACCTACGAGGGCGACAGAAATGAGAGTGCTGTGTTTGTAGCCATACGCAATCGCCTGCATGTGCTTGGGCCTGACCTGAAGTCTGTCCAGAGCCTGGCCACGGGCCCTGCTGGAGACCCTGGCTGCCAGACGTGTGCAGCCTGTGGCCCAGGACCCCACGGCCCTCCCGGTGACACAGACACAAAGGTGCTGGTGCTGGATCCCGCGCTGCCTGCGCTGGTCAGTTGTGGCTCCAGCCTGCAGGGCCGCTGCTTCCTGCATGACCTAGAGCCCCAAGGGACAGCCGTGCATCTGGCAGCGCCAGCCTGCCTCTTCTCAGCCCACCATAACCGGCCCGATGACTGCCCCGACTGTGTGGCCAGCCCATTGGGCACCCGTGTAACTGTGGTTGAGCAAGGCCAGGCCTCCTATTTCTACGTGGCATCCTCACTGGACGCAGCCGTGGCTGCCAGCTTCAGCCCACGCTCAGTGTCTATCAGGCGTCTCAAGGCTGACGCCTCGGGATTCGCACCGGGCTTTGTGGCGTTGTCAGTGCTGCCCAAGCATCTTGTCTCCTACAGTATTGAATACGTGCACAGCTTCCACACGGGAGCCTTCGTATACTTCCTGACTGTACAGCCGGCCAGCGTGACAGATGATCCTAGTGCCCTGCACACACGCCTGGCACGGCTTAGCGCCACTGAGCCAGAGTTGGGTGACTATCGGGAGCTGGTCCTCGACTGCAGATTTGCTCCAAAACGCAGGCGCCGGGGGGCCCCAGAAGGCGGACAGCCCTACCCTGTGCTGCGGGTGGCCCACTCCGCTCCAGTGGGTGCCCAACTTGCCACTGAGCTGAGCATCGCCGAGGGCCAGGAAGTACTATTTGGGGTCTTTGTGACTGGCAAGGATGGTGGTCCTGGCGTGGGCCCCAACTCTGTCGTCTGTGCCTTCCCCATTGACCTGCTGGACACACTAATTGATGAGGGTGTGGAGCGCTGTTGTGAATCCCCAGTCCATCCAGGCCTCCGGCGAGGCCTCGACTTCTTCCAGTCGCCCAGTTTTTGCCCCAACCCGCCTGGCCTGGAAGCCCTCAGCCCCAACACCAGCTGCCGCCACTTCCCTCTGCTGGTCAGTAGCAGCTTCTCACGTGTGGACCTATTCAATGGGCTGTTGGGACCAGTACAGGTCACTGCATTGTATGTGACACGCCTTGACAACGTCACAGTGGCACACATGGGCACAATGGATGGGCGTATCCTGCAGGTGGAGCTGGTCAGGTCACTAAACTACTTGCTGTATGTGTCCAACTTCTCACTGGGTGACAGTGGGCAGCCCGTGCAGCGGGATGTCAGTCGTCTTGGGGACCACCTACTCTTTGCCTCTGGGGACCAGGTTTTCCAGGTACCTATCCAAGGCCCTGGCTGCCGCCACTTCCTGACCTGTGGGCGTTGCCTAAGGGCATGGCATTTCATGGGCTGTGGCTGGTGTGGGAACATGTGCGGCCAGCAGAAGGAGTGTCCTGGCTCCTGGCAACAGGACCACTGCCCACCTAAGCTTACTGAGTTCCACCCCCACAGTGGACCTCTAAGGGGCAGTACAAGGCTGACCCTGTGTGGCTCCAACTTCTACCTTCACCCTTCTGGTCTGGTGCCTGAGGGAACCCATCAGGTCACTGTGGGCCAAAGTCCCTGCCGGCCACTGCCCAAGGACAGCTCAAAACTCAGACCAGTGCCCCGGAAAGACTTTGTAGAGGAGTTTGAGTGTGAACTGGAGCCCTTGGGCACCCAGGCAGTGGGGCCTACCAACGTCAGCCTCACCGTGACTAACATGCCACCGGGCAAGCACTTCCGGGTAGACGGCACCTCCGTGCTGAGAGGCTTCTCTTTCATGGAGCCAGTGCTGATAGCAGTGCAACCCCTCTTTGGCCCACGGGCAGGAGGCACCTGTCTCACTCTTGAAGGCCAGAGTCTGTCTGTAGGCACCAGCCGGGCTGTGCTGGTCAATGGGACTGAGTGTCTGCTAGCACGGGTCAGTGAGGGGCAGCTTTTATGTGCCACACCCCCTGGGGCCACGGTGGCCAGTGTCCCCCTTAGCCTGCAGGTGGGGGGTGCCCAGGTACCTGGTTCCTGGACCTTCCAGTACAGAGAAGACCCTGTCGTGCTAAGCATCAGCCCCAACTGTGGCTACATCAACTCCCACATCACCATCTGTGGCCAGCATCTAACTTCAGCATGGCACTTAGTGCTGTCATTCCATGACGGGCTTAGGGCAGTGGAAAGCAGGTGTGAGAGGCAGCTTCCAGAGCAGCAGCTGTGCCGCCTTCCTGAATATGTGGTCCGAGACCCCCAGGGATGGGTGGCAGGGAATCTGAGTGCCCGAGGGGATGGAGCTGCTGGCTTTACACTGCCTGGCTTTCGCTTCCTACCCCCACCCCATCCACCCAGTGCCAACCTAGTTCCACTGAAGCCTGAGGAGCATGCCATTAAGTTTGAGGTCTGCGTAGATGGTGAATGTCATATCCTGGGTAGAGTGGTGCGGCCAGGGCCAGATGGGGTCCCACAGAGCACGCTCCTTGGTATCCTGCTGCCTTTGCTGCTGCTTGTGGCTGCACTGGCGACTGCACTGGTCTTCAGCTACTGGTGGCGGAGGAAGCAGCTAGTTCTTCCTCCCAACCTGAATGACCTGGCATCCCTGGACCAGACTGCTGGAGCCACACCCCTGCCTATTCTGTACTCGGGCTCTGACTACAGAAGTGGCCTTGCACTCCCTGCCATTGATGGTCTGGATTCCACCACTTGTGTCCATGGAGCATCCTTCTCCGATAGTGAAGATGAATCCTGTGTGCCACTGCTGCGGAAAGAGTCCATCCAGCTAAGGGACCTGGACTCTGCGCTCTTGGCTGAGGTCAAGGATGTGCTGATTCCCCATGAGCGGGTGGTCACCCACAGTGACCGAGTCATTGGCAAAGGCCACTTTGGAGTTGTCTACCACGGAGAATACATAGACCAGGCCCAGAATCGAATCCAATGTGCCATCAAGTCACTAAGTCGCATCACAGAGATGCAGCAGGTGGAGGCCTTCCTGCGAGAGGGGCTGCTCATGCGTGGCCTGAACCACCCGAATGTGCTGGCTCTCATTGGTATCATGTTGCCACCTGAGGGCCTGCCCCATGTGCTGCTGCCCTATATGTGCCACGGTGACCTGCTCCAGTTCATCCGCTCACCTCAGCGGAACCCCACCGTGAAGGACCTCATCAGCTTTGGCCTGCAGGTAGCCCGCGGCATGGAGTACCTGGCAGAGCAGAAGTTTGTGCACAGGGACCTGGCTGCGCGGAACTGCATGCTGGACGAGTCATTCACAGTCAAGGTGGCTGACTTTGGTTTGGCCCGCGACATCCTGGACAGGGAGTACTATAGTGTTCAACAGCATCGCCACGCTCGCCTACCTGTGAAGTGGATGGCGCTGGAGAGCCTGCAGACCTATAGATTTACCACCAAGTCTGATGTGTGGTCATTTGGTGTGCTGCTGTGGGAACTGCTGACACGGGGTGCCCCACCATACCGCCACATTGACCCTTTTGACCTTACCCACTTCCTGGCCCAGGGTCGGCGCCTGCCCCAGCCTGAGTATTGCCCTGATTCTCTGTACCAAGTGATGCAGCAATGCTGGGAGGCAGACCCAGCAGTGCGACCCACCTTCAGAGTACTAGTGGGGGAGGTGGAGCAGATAGTGTCTGCACTGCTTGGGGACCATTATGTGCAGCTGCCAGCAACCTACATGAACTTGGGCCCCAGCACCTCGCATGAGATGAATGTGCGTCCAGAACAGCCGCAGTTCTCACCCATGCCAGGGAATGTACGCCGGCCCCGGCCACTCTCAGAGCCTCCTCGGCCCACTTGA"}, - {"NM_001242758.1", "ATGGCCGTCATGGCGCCCCGAACCCTCCTCCTGCTACTCTCGGGGGCCCTGGCCCTGACCCAGACCTGGGCGGGCTCCCACTCCATGAGGTATTTCTTCACATCCGTGTCCCGGCCCGGCCGCGGGGAGCCCCGCTTCATCGCCGTGGGCTACGTGGACGACACGCAGTTCGTGCGGTTCGACAGCGACGCCGCGAGCCAGAAGATGGAGCCGCGGGCGCCGTGGATAGAGCAGGAGGGGCCGGAGTATTGGGACCAGGAGACACGGAATATGAAGGCCCACTCACAGACTGACCGAGCGAACCTGGGGACCCTGCGCGGCTACTACAACCAGAGCGAGGACGGTTCTCACACCATCCAGATAATGTATGGCTGCGACGTGGGGCCGGACGGGCGCTTCCTCCGCGGGTACCGGCAGGACGCCTACGACGGCAAGGATTACATCGCCCTGAACGAGGACCTGCGCTCTTGGACCGCGGCGGACATGGCAGCTCAGATCACCAAGCGCAAGTGGGAGGCGGTCCATGCGGCGGAGCAGCGGAGAGTCTACCTGGAGGGCCGGTGCGTGGACGGGCTCCGCAGATACCTGGAGAACGGGAAGGAGACGCTGCAGCGCACGGACCCCCCCAAGACACATATGACCCACCACCCCATCTCTGACCATGAGGCCACCCTGAGGTGCTGGGCCCTGGGCTTCTACCCTGCGGAGATCACACTGACCTGGCAGCGGGATGGGGAGGACCAGACCCAGGACACGGAGCTCGTGGAGACCAGGCCTGCAGGGGATGGAACCTTCCAGAAGTGGGCGGCTGTGGTGGTGCCTTCTGGAGAGGAGCAGAGATACACCTGCCATGTGCAGCATGAGGGTCTGCCCAAGCCCCTCACCCTGAGATGGGAGCTGTCTTCCCAGCCCACCATCCCCATCGTGGGCATCATTGCTGGCCTGGTTCTCCTTGGAGCTGTGATCACTGGAGCTGTGGTCGCTGCCGTGATGTGGAGGAGGAAGAGCTCAGATAGAAAAGGAGGGAGTTACACTCAGGCTGCAAGCAGTGACAGTGCCCAGGGCTCTGATGTGTCTCTCACAGCTTGTAAAGTGTGA"}, - {"NM_001220769.1", "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"}, - {"NM_001220775.1", "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"}, - {"NM_001220768.1", "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGCCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"}, - {"NM_001145076.1", "ATGGACGGTTTCGCCGGCAGTCTCGATGATAGTATTTCTGCTGCAAGTACTTCTGATGTTCAAGATCGCCTGTCAGCTCTTGAGTCACGAGTTCAGCAACAAGAAGATGAAATCACTGTGCTAAAGGCGGCTTTGGCTGATGTTTTGAGGCGTCTTGCAATCTCTGAAGATCATGTGGCCTCAGTGAAAAAATCAGTCTCAAGTAAAGGCCAACCAAGCCCTCGAGCAGTTATTCCCATGTCCTGTATAACCAATGGAAGTGGTGCAAACAGAAAACCAAGTCATACCAGTGCTGTCTCAATTGCAGGAAAAGAAACTCTTTCATCTGCTGCTAAAAGCATAAAACGACCATCACCAGCTGAAAAGTCACATAATTCTTGGGAAAATTCAGATGATAGCCGTAATAAATTGTCGAAAATACCTTCAACACCCAAATTAATACCAAAAGTTACCAAAACTGCAGACAAGCATAAAGATGTCATCATCAACCAAGAAGGAGAATATATTAAAATGTTTATGCGCGGTCGGCCAATTACCATGTTCATTCCTTCCGATGTTGACAACTATGATGACATCAGAACGGAACTGCCTCCTGAGAAGCTCAAACTGGAGTGGGCATATGGTTATCGAGGAAAGGACTGTAGAGCTAATGTTTACCTTCTTCCGACCGGGGAAATAGTTTATTTCATTGCATCAGTAGTAGTACTATTTAATTATGAGGAGAGAACTCAGCGACACTACCTGGGCCATACAGACTGTGTGAAATGCCTTGCTATACATCCTGACAAAATTAGGATTGCAACTGGACAGATAGCTGGCGTGGATAAAGATGGAAGGCCTCTACAACCCCACGTCAGAGTGTGGGATTCTGTTACTCTATCCACACTGCAGATTATTGGACTTGGCACTTTTGAGCGTGGAGTAGGATGCCTGGATTTTTCAAAAGCAGATTCAGGTGTTCATTTATGTGTTATTGATGACTCCAATGAGCATATGCTTACTGTATGGGACTGGCAGAAGAAAGCAAAAGGAGCAGAAATAAAGACAACAAATGAAGTTGTTTTGGCTGTGGAGTTTCACCCAACAGATGCAAATACCATAATTACATGCGGTAAATCTCATATTTTCTTCTGGACCTGGAGCGGCAATTCACTAACAAGAAAACAGGGAATTTTTGGGAAATATGAAAAGCCAAAATTTGTGCAGTGTTTAGCATTCTTGGGGAATGGAGATGTTCTTACTGGAGACTCAGGTGGAGTCATGCTTATATGGAGCAAAACTACTGTAGAGCCCACACCTGGGAAAGGACCTAAAGGTGTATATCAAATCAGCAAACAAATCAAAGCTCATGATGGCAGTGTGTTCACACTTTGTCAGATGAGAAATGGGATGTTATTAACTGGAGGAGGGAAAGACAGAAAAATAATTCTGTGGGATCATGATCTGAATCCTGAAAGAGAAATAGAGGTTCCTGATCAGTATGGCACAATCAGAGCTGTAGCAGAAGGAAAGGCAGATCAATTTTTAGTAGGCACATCACGAAACTTTATTTTACGAGGAACATTTAATGATGGCTTCCAAATAGAAGTACAGGGTCATACAGATGAGCTTTGGGGTCTTGCCACACATCCCTTCAAAGATTTGCTCTTGACATGTGCTCAGGACAGGCAGGTGTGCCTGTGGAACTCAATGGAACACAGGCTGGAATGGACCAGGCTGGTAGATGAACCAGGACACTGTGCAGATTTTCATCCAAGTGGCACAGTGGTGGCCATAGGAACGCACTCAGGCAGGTGGTTTGTTCTGGATGCAGAAACCAGAGATCTAGTTTCTATCCACACAGACGGGAATGAACAGCTCTCTGTGATGCGCTACTCAATAGATGGTACCTTCCTGGCTGTAGGATCTCATGACAACTTTATTTACCTCTATGTAGTCTCTGAAAATGGAAGAAAATATAGCAGATATGGAAGGTGCACTGGACATTCCAGCTACATCACACACCTTGACTGGTCCCCAGACAACAAGTATATAATGTCTAACTCGGGAGACTATGAAATATTGTACTGGGACATTCCAAATGGCTGCAAACTAATCAGGAATCGATCGGATTGTAAGGACATTGATTGGACGACATATACCTGTGTGCTAGGATTTCAAGTATTTGGTGTCTGGCCAGAAGGATCTGATGGGACAGATATCAATGCACTGGTGCGATCCCACAATAGAAAGGTGATAGCTGTTGCCGATGACTTTTGTAAAGTCCATCTGTTTCAGTATCCCTGCTCCAAAGCAAAGGCTCCCAGTCACAAGTACAGTGCCCACAGCAGCCATGTCACCAATGTCAGTTTTACTCACAATGACAGTCACCTGATATCAACTGGTGGAAAAGACATGAGCATCATTCAGTGGAAACTTGTGGAAAAGTTATCTTTGCCTCAGAATGAGACTGTAGCGGATACTACTCTAACCAAAGCCCCCGTCTCTTCCACTGAAAGTGTCATCCAATCTAATACTCCCACACCGCCTCCTTCTCAGCCCTTAAATGAGACAGCTGAAGAGGAAAGTAGAATAAGCAGTTCTCCCACACTTCTGGAGAACAGCCTGGAACAAACTGTGGAGCCAAGTGAAGACCACAGCGAGGAGGAGAGTGAAGAGGGCAGCGGAGACCTTGGTGAGCCTCTTTATGAAGAGCCATGCAACGAGATAAGCAAGGAGCAGGCCAAAGCCACCCTTCTGGAGGACCAGCAAGACCCTTCGCCCTCGTCCTAA"}, - {"NM_001220766.1", "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTTGGTAAACCTCACAAATGTGGATATTGTGGCCGAAGCTATAAACAGCGAAGCTCTTTAGAGGAACATAAAGAGCGCTGCCACAACTACTTGGAAAGCATGGGCCTTCCGGGCACACTGTACCCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"}, - {"NM_006060.4", "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGCCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTTGGTAAACCTCACAAATGTGGATATTGTGGCCGAAGCTATAAACAGCGAAGCTCTTTAGAGGAACATAAAGAGCGCTGCCACAACTACTTGGAAAGCATGGGCCTTCCGGGCACACTGTACCCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGTAAGAGCTCTATGCCTCAGAAATTTCTTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"}, - {"NM_152756.3", "ATGGCGGCGATCGGCCGCGGCCGCTCTCTGAAGAACCTCCGAGTACGAGGGCGGAATGACAGCGGCGAGGAGAACGTCCCGCTGGATCTGACCCGAGAACCTTCTGATAACTTAAGAGAGATTCTCCAAAATGTGGCCAGATTGCAGGGAGTATCAAATATGAGAAAGCTAGGCCATCTGAATAACTTTACTAAGCTTCTTTGTGATATTGGCCACAGTGAAGAAAAACTGGGCTTTCACTATGAGGATATCATAATTTGTTTGCGGTTAGCTTTATTAAATGAAGCAAAAGAAGTGCGAGCAGCAGGGCTACGAGCGCTTCGATATCTCATCCAAGACTCCAGTATTCTCCAGAAGGTGCTAAAATTGAAAGTGGACTATTTAATAGCTAGGTGCATTGACATACAACAGAGCAACGAGGTAGAGAGGACACAAGCACTTCGATTAGTCAGAAAGATGATTACTGTGAATGCTTCCTTGTTTCCTAGTTCTGTGACCAACTCATTAATTGCAGTTGGAAATGATGGACTTCAAGAAAGAGACAGAATGGTCCGAGCATGCATTGCCATTATCTGTGAACTAGCACTTCAGAATCCAGAGGTGGTGGCCCTTCGAGGTGGACTAAACACCATCTTGAAAAATGTGATCGATTGCCAATTAAGTCGAATAAATGAGGCCCTAATTACTACAATTTTGCACCTTCTTAATCATCCAAAGACTCGACAGTATGTGCGAGCTGATGTAGAATTAGAGAGAATTTTAGCACCCTATACTGATTTTCACTACAGACATAGTCCAGATACAGCTGAAGGACAGCTCAAAGAAGACAGAGAAGCACGATTTCTAGCCAGTAAAATGGGAATCATAGCAACATTCCGATCATGGGCAGGTATTATTAATTTATGTAAACCTGGAAATTCTGGGATCCAGTCTCTAATAGGAGTACTTTGCATACCAAATATGGAAATAAGGCGAGGTCTACTTGAAGTGCTTTATGATATATTTCGTCTTCCTCTACCTGTTGTGACTGAGGAGTTCATAGAAGCACTACTCAGTGTAGATCCAGGGAGGTTCCAAGACAGTTGGAGGCTTTCAGATGGCTTTGTGGCAGCTGAGGCAAAAACTATTCTTCCTCATCGTGCCAGATCCAGGCCAGACCTCATGGATAATTATTTGGCACTGATACTCTCTGCATTTATTCGTAATGGACTTTTAGAGGGTCTAGTTGAAGTGATAACAAACAGTGATGATCATATCTCAGTTAGAGCTACCATCCTTTTAGGAGAGCTTTTACATATGGCAAACACAATTCTTCCTCATTCACATAGCCATCATTTACACTGCTTGCCAACCCTAATGAATATGGCTGCATCCTTTGATATCCCCAAGGAAAAGAGACTGCGAGCCAGTGCAGCCTTGAACTGTTTAAAACGCTTCCATGAAATGAAGAAACGAGGACCTAAGCCTTATAGTCTTCATTTAGACCACATTATTCAGAAAGCAATTGCAACACACCAGAAACGGGATCAGTATCTCCGAGTTCAGAAAGATATATTTATCCTTAAGGATACAGAGGAAGCTCTTTTAATTAACCTTAGAGATAGCCAAGTCCTTCAACATAAAGAGAATCTTGAATGGAATTGGAATCTTATAGGGACCATTCTTAAGTGGCCAAATGTAAATCTAAGAAACTATAAAGATGAACAGTTACACAGGTTTGTACGAAGACTACTTTATTTTTACAAGCCCAGCAGTAAATTATATGCCAACCTGGATCTGGATTTTGCCAAGGCCAAACAGCTCACGGTTGTAGGTTGCCAGTTTACAGAATTTCTTCTTGAATCTGAAGAGGATGGGCAAGGCTACTTAGAAGATCTAGTAAAGGATATTGTTCAGTGGCTCAATGCTTCATCTGGAATGAAACCCGAAAGAAGTCTTCAAAATAATGGTTTATTGACCACCCTTAGTCAACACTACTTTTTATTTATTGGAACACTTTCTTGCCACCCTCATGGAGTTAAAATGCTGGAAAAATGCAGTGTATTTCAGTGTCTCCTTAATCTTTGCTCCTTGAAAAACCAAGATCACTTGCTAAAACTTACTGTTTCTAGCTTGGACTATAGCAGAGATGGATTGGCTAGAGTCATCCTTTCCAAAATTTTAACTGCAGCTACTGATGCCTGCAGACTCTATGCAACAAAACATTTAAGGGTATTATTGAGAGCTAATGTTGAATTCTTTAATAATTGGGGAATTGAGTTGTTAGTGACCCAGCTACATGATAAAAACAAAACGATTTCCTCTGAAGCTCTTGATATCCTCGATGAAGCATGTGAAGACAAGGCCAATCTTCATGCTCTCATTCAGATGAAACCAGCGTTATCCCACCTTGGAGACAAGGGTTTGCTTCTCCTGCTGAGATTTCTCTCCATTCCAAAAGGATTTTCCTATCTGAATGAAAGAGGTTATGTAGCAAAACAATTGGAAAAGTGGCACAGGGAATACAACTCCAAATATGTTGACTTGATTGAGGAACAACTCAATGAAGCACTTACTACTTACCGGAAGCCTGTTGATGGTGATAACTATGTTCGTCGGAGTAACCAAAGATTACAGCGTCCTCACGTCTACCTGCCTATACACCTTTATGGACAACTAGTACACCATAAAACAGGCTGCCATTTGTTGGAAGTACAGAATATTATTACAGAACTCTGTCGTAATGTTCGTACACCAGATTTGGATAAGTGGGAAGAAATTAAAAAACTGAAAGCATCTCTTTGGGCCTTGGGAAATATCGGCTCATCAAATTGGGGTCTCAATTTGCTACAGGAAGAAAACGTGATTCCAGATATACTAAAACTTGCAAAACAGTGTGAAGTTCTTTCCATCAGAGGGACCTGTGTATATGTACTTGGGCTCATAGCTAAAACCAAACAAGGCTGTGATATTCTAAAATGTCACAACTGGGATGCTGTGAGGCATAGTCGCAAACATCTGTGGCCAGTGGTTCCAGATGATGTGGAACAACTCTGTAATGAACTTTCATCTATCCCAAGCACTCTAAGTTTGAACTCGGAGTCAACCAGCTCTAGACATAATAGTGAAAGTGAATCTGTGCCATCGAGTATGTTCATATTGGAGGATGACCGGTTTGGCAGCAGCTCTACTAGTACATTTTTCCTTGATATCAATGAAGATACAGAGCCAACATTTTATGACCGATCTGGACCCATAAAGGATAAAAATTCATTCCCTTTCTTTGCTTCTAGTAAACTTGTGAAGAATCGTATCTTAAATTCGCTTACTTTGCCTAACAAAAAACATCGTAGTAGCAGTGATCCAAAAGGAGGGAAATTATCATCTGAAAGTAAGACAAGCAACAGGCGAATCAGAACACTTACGGAGCCCAGTGTTGATTTTAATCATAGTGATGATTTTACACCCATATCCACTGTACAGAAAACATTACAATTAGAGACTTCATTTATGGGGAATAAGCACATTGAAGACACTGGTAGTACACCAAGCATTGGAGAAAATGACTTAAAATTCACCAAGAATTTTGGTACAGAGAATCACAGAGAAAATACAAGCCGAGAGAGGTTAGTAGTAGAAAGTTCAACGAGCTCACATATGAAGATACGTAGCCAAAGTTTCAATACAGACACTACAACAAGTGGCATAAGTTCAATGAGCTCAAGTCCTTCACGAGAGACAGTAGGTGTAGATGCTACAACTATGGACACAGACTGTGGAAGCATGAGTACTGTGGTAAGTACTAAAACTATTAAGACAAGCCACTATTTGACGCCACAGTCTAACCATCTGTCTCTCTCCAAATCAAATTCGGTGTCCCTGGTGCCTCCAGGTTCTTCTCATACGCTTCCTAGAAGAGCACAGTCCCTTAAAGCACCCTCTATTGCTACAATTAAAAGTCTAGCAGATTGTAACTTTAGTTACACAAGTTCTAGAGATGCTTTTGGCTATGCTACACTGAAAAGACTACAGCAACAAAGAATGCATCCATCCTTATCTCACTCTGAAGCTTTGGCATCTCCAGCAAAAGATGTGCTATTTACTGATACCATCACCATGAAGGCCAACAGTTTTGAGTCCAGATTAACACCAAGCAGGTTCATGAAAGCCTTAAGTTATGCATCATTAGATAAAGAAGATTTATTGAGTCCTATTAATCAAAATACCCTGCAACGATCTTCCTCAGTGCGGTCCATGGTGTCCAGTGCCACATATGGGGGTTCAGATGATTACATTGGTCTTGCTCTCCCGGTGGATATAAATGATATATTCCAGGTAAAGGATATTCCCTATTTTCAGACAAAAAACATACCACCACATGATGATCGAGGTGCAAGAGCATTTGCCCATGATGCAGGAGGTCTTCCATCTGGAACTGGAGGTCTTGTAAAAAATTCTTTTCACTTGCTACGACAGCAGATGAGTCTTACGGAAATAATGAATTCAATCCATTCAGATGCCTCTCTGTTTTTAGAAAGTACAGAAGACACTGGACTACAGGAACATACAGATGATAACTGCCTTTATTGTGTCTGTATTGAAATTCTGGGTTTCCAGCCCAGCAACCAACTGAGTGCAATATGTAGTCATTCAGACTTTCAAGATATTCCATATTCTGATTGGTGTGAGCAGACTATCCATAATCCTTTAGAAGTGGTTCCCTCTAAGTTTTCGGGGATTTCTGGATGCAGTGATGGGGTGTCTCAAGAAGGCTCAGCTAGCAGCACCAAAAGCACAGAATTGTTACTAGGTGTTAAAACAATTCCAGATGATACACCAATGTGCCGTATACTCCTTCGCAAAGAAGTTCTAAGATTAGTCATTAATTTGAGTAGTTCAGTTTCAACTAAATGTCATGAGACTGGGCTTTTAACAATTAAGGAGAAGTATCCTCAAACATTTGATGACATATGCCTTTACTCTGAGGTTTCCCATTTGCTGTCACACTGCACATTCAGACTTCCGTGTCGGAGGTTCATACAAGAATTATTTCAAGATGTACAGTTTCTACAAATGCATGAAGAAGCAGAGGCTGTGTTGGCAACACCACCAAAGCAACCTATAGTTGATACATCTGCTGAATCCTGA"}, - {"NM_001220774.1", "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGTTGGTAAACCTCACAAATGTGGATATTGTGGCCGAAGCTATAAACAGCGAAGCTCTTTAGAGGAACATAAAGAGCGCTGCCACAACTACTTGGAAAGCATGGGCCTTCCGGGCACACTGTACCCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"}, - {"NM_001220767.1", "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGGAGAACGGCCCTTCCAGTGCAATCAGTGCGGGGCCTCATTCACCCAGAAGGGCAACCTGCTCCGGCACATCAAGCTGCATTCCGGGGAGAAGCCCTTCAAATGCCACCTCTGCAACTACGCCTGCCGCCGGAGGGACGCCCTCACTGGCCACCTGAGGACGCACTCCGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"}, - {"NM_001278433.1", "ATGGAGTCTGGCAGTACCGCCGCCAGTGAGGAGGCACGCAGCCTTCGAGAATGTGAGCTCTACGTCCAGAAGCATAACATTCAAGCGCTGCTCAAAGATTCTATTGTGCAGTTGTGCACTGCTCGACCTGAGAGACCCATGGCATTCCTCAGGGAATACTTTGAGAGGTTGGAGAAGGAGGAGGCAAAACAGATTCAGAATCTGCAGAAAGCAGGCACTCGTACAGACTCAAGGGAGGATGAGATTTCTCCTCCTCCACCCAACCCAGTGGTTAAAGGTAGGAGGCGACGAGGTGCTATCAGCGCTGAGGTCTACACGGAGGAAGATGCGGCATCCTATGTTAGAAAGGTTATACCAAAAGATTACAAGACAATGGCCGCTTTAGCCAAAGCCATTGAAAAGAATGTGCTGTTTTCACATCTTGATGATAATGAGAGAAGTGATATTTTTGATGCCATGTTTTCGGTCTCCTTTATCGCAGGAGAGACTGTGATTCAGCAAGGTGATGAAGGGGATAACTTCTATGTGATTGATCAAGGAGAGACGGATGTCTATGTTAACAATGAATGGGCAACCAGTGTTGGGGAAGGAGGGAGCTTTGGAGAACTTGCTTTGATTTATGGAACACCGAGAGCAGCCACTGTCAAAGCAAAGACAAATGTGAAATTGTGGGGCATCGACCGAGACAGCTATAGAAGAATCCTCATGGGAAGCACACTGAGAAAGCGGAAGATGTATGAGGAATTCCTTAGTAAAGTCTCTATTTTAGAGTCTCTGGACAAGTGGGAACGTCTTACGGTAGCTGATGCATTGGAACCAGTGCAGTTTGAAGATGGGCAGAAGATTGTGGTGCAGGGAGAACCAGGGGATGAGTTCTTCATTATTTTAGAGGGGTCAGCTGCTGTGCTACAACGTCGGTCAGAAAATGAAGAGTTTGTTGAAGTGGGAAGATTGGGGCCTTCTGATTATTTTGGTGAAATTGCACTACTGATGAATCGTCCTCGTGCTGCCACAGTTGTTGCTCGTGGCCCCTTGAAGTGCGTTAAGCTGGACCGACCTAGATTTGAACGTGTTCTTGGCCCATGCTCAGACATCCTCAAACGAAACATCCAGCAGTACAACAGTTTTGTGTCACTGTCTGTCTGA"}, - {"NM_001220776.1", "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGTCATTAAAGAAGAAACTAATCACAGTGAAATGGCAGAAGACCTGTGCAAGATAGGATCAGAGAGATCTCTCGTGCTGGACAGACTAGCAAGTAACGTCGCCAAACGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"}, - {"NM_001220772.1", "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"}, - {"NM_001220771.1", "ATGGATGCTGATGAGGGTCAAGACATGTCCCAAGTTTCAGGGAAGGAAAGCCCCCCTGTAAGCGATACTCCAGATGAGGGCGATGAGCCCATGCCGATCCCCGAGGACCTCTCCACCACCTCGGGAGGACAGCAAAGCTCCAAGAGTGACAGAGTCGTGGCCAGTAATGTTAAAGTAGAGACTCAGAGTGATGAAGAGAATGGGCGTGCCTGTGAAATGAATGGGGAAGAATGTGCGGAGGATTTACGAATGCTTGATGCCTCGGGAGAGAAAATGAATGGCTCCCACAGGGACCAAGGCAGCTCGGCTTTGTCGGGAGTTGGAGGCATTCGACTTCCTAACGGAAAACTAAAGTGTGATATCTGTGGGATCATTTGCATCGGGCCCAATGTGCTCATGGTTCACAAAAGAAGCCACACTGGGGACAAGGGCCTGTCCGACACGCCCTACGACAGCAGCGCCAGCTACGAGAAGGAGAACGAAATGATGAAGTCCCACGTGATGGACCAAGCCATCAACAACGCCATCAACTACCTGGGGGCCGAGTCCCTGCGCCCGCTGGTGCAGACGCCCCCGGGCGGTTCCGAGGTGGTCCCGGTCATCAGCCCGATGTACCAGCTGCACAAGCCGCTCGCGGAGGGCACCCCGCGCTCCAACCACTCGGCCCAGGACAGCGCCGTGGAGAACCTGCTGCTGCTCTCCAAGGCCAAGTTGGTGCCCTCGGAGCGCGAGGCGTCCCCGAGCAACAGCTGCCAAGACTCCACGGACACCGAGAGCAACAACGAGGAGCAGCGCAGCGGTCTCATCTACCTGACCAACCACATCGCCCCGCACGCGCGCAACGGGCTGTCGCTCAAGGAGGAGCACCGCGCCTACGACCTGCTGCGCGCCGCCTCCGAGAACTCGCAGGACGCGCTCCGCGTGGTCAGCACCAGCGGGGAGCAGATGAAGGTGTACAAGTGCGAACACTGCCGGGTGCTCTTCCTGGATCACGTCATGTACACCATCCACATGGGCTGCCACGGCTTCCGTGATCCTTTTGAGTGCAACATGTGCGGCTACCACAGCCAGGACCGGTACGAGTTCTCGTCGCACATAACGCGAGGGGAGCACCGCTTCCACATGAGCTAA"}, - {"NM_001260.1", "ATGGACTATGACTTTAAAGTGAAGCTGAGCAGCGAGCGGGAGCGGGTCGAGGACCTGTTTGAATACGAGGGCTGCAAAGTTGGCCGAGGCACTTATGGTCACGTCTACAAAGCCAAGAGGAAAGATGGGAAGGATGATAAAGACTATGCTTTAAAACAAATAGAAGGAACTGGGATCTCTATGTCGGCATGTAGAGAAATAGCATTACTTCGAGAGCTTAAGCATCCAAACGTCATTTCTCTTCAAAAGGTGTTTCTGTCTCATGCTGATAGGAAGGTGTGGCTTCTGTTTGACTATGCTGAACATGACCTCTGGCATATAATCAAGTTTCACAGAGCTTCTAAAGCAAACAAGAAGCCAGTTCAGTTACCTCGGGGAATGGTGAAGTCACTATTATATCAGATCCTAGATGGTATTCACTACCTGCATGCTAACTGGGTGTTGCACAGAGATTTGAAACCTGCTAATATTTTAGTTATGGGTGAAGGTCCTGAGCGAGGAAGAGTAAAAATTGCTGACATGGGCTTTGCCCGATTATTTAATTCACCTTTGAAGCCTTTAGCAGATTTGGATCCAGTGGTTGTTACATTCTGGTACCGAGCCCCTGAACTACTTCTTGGAGCAAGGCATTATACCAAAGCTATTGATATTTGGGCTATAGGGTGTATATTTGCAGAACTACTAACGTCAGAACCAATATTTCACTGTCGACAAGAGGACATCAAAACTAGTAATCCTTATCACCATGACCAGCTGGACAGAATATTCAATGTAATGGGATTTCCTGCAGATAAAGATTGGGAAGATATAAAAAAGATGCCTGAACATTCAACATTAATGAAAGATTTCAGAAGAAATACGTATACCAACTGCAGCCTTATCAAGTATATGGAAAAACATAAAGTTAAACCAGATAGTAAAGCATTCCACTTGCTTCAGAAGCTGCTTACCATGGACCCAATAAAGCGAATTACCTCAGAACAGGCTATGCAGGACCCCTATTTCTTAGAAGACCCACTTCCTACATCAGACGTTTTTGCCGGTTGTCAAATCCCTTACCCAAAACGAGAATTTTTAACGGAAGAAGAACCTGATGACAAAGGAGACAAAAAGAACCAGCAGCAGCAGCAGGGCAATAACCACACTAATGGAACTGGCCACCCAGGGAATCAAGACAGCAGTCACACACAGGGACCCCCGTTGAAGAAAGTGAGAGTTGTTCCTCCTACCACTACCTCAGGTGGACTTATCATGACCTCAGACTATCAGCGTTCCAATCCACATGCTGCCTATCCCAACCCTGGACCAAGCACATCACAGCCGCAGAGCAGCATGGGATACTCAGCTACCTCCCAGCAGCCTCCACAGTACTCACATCAGACACATCGGTACTGA"}, - {"NM_000545.5", "ATGGTTTCTAAACTGAGCCAGCTGCAGACGGAGCTCCTGGCGGCCCTGCTCGAGTCAGGGCTGAGCAAAGAGGCACTGATCCAGGCACTGGGTGAGCCGGGGCCCTACCTCCTGGCTGGAGAAGGCCCCCTGGACAAGGGGGAGTCCTGCGGCGGCGGTCGAGGGGAGCTGGCTGAGCTGCCCAATGGGCTGGGGGAGACTCGGGGCTCCGAGGACGAGACGGACGACGATGGGGAAGACTTCACGCCACCCATCCTCAAAGAGCTGGAGAACCTCAGCCCTGAGGAGGCGGCCCACCAGAAAGCCGTGGTGGAGACCCTTCTGCAGGAGGACCCGTGGCGTGTGGCGAAGATGGTCAAGTCCTACCTGCAGCAGCACAACATCCCACAGCGGGAGGTGGTCGATACCACTGGCCTCAACCAGTCCCACCTGTCCCAACACCTCAACAAGGGCACTCCCATGAAGACGCAGAAGCGGGCCGCCCTGTACACCTGGTACGTCCGCAAGCAGCGAGAGGTGGCGCAGCAGTTCACCCATGCAGGGCAGGGAGGGCTGATTGAAGAGCCCACAGGTGATGAGCTACCAACCAAGAAGGGGCGGAGGAACCGTTTCAAGTGGGGCCCAGCATCCCAGCAGATCCTGTTCCAGGCCTATGAGAGGCAGAAGAACCCTAGCAAGGAGGAGCGAGAGACGCTAGTGGAGGAGTGCAATAGGGCGGAATGCATCCAGAGAGGGGTGTCCCCATCACAGGCACAGGGGCTGGGCTCCAACCTCGTCACGGAGGTGCGTGTCTACAACTGGTTTGCCAACCGGCGCAAAGAAGAAGCCTTCCGGCACAAGCTGGCCATGGACACGTACAGCGGGCCCCCCCCAGGGCCAGGCCCGGGACCTGCGCTGCCCGCTCACAGCTCCCCTGGCCTGCCTCCACCTGCCCTCTCCCCCAGTAAGGTCCACGGTGTGCGCTATGGACAGCCTGCGACCAGTGAGACTGCAGAAGTACCCTCAAGCAGCGGCGGTCCCTTAGTGACAGTGTCTACACCCCTCCACCAAGTGTCCCCCACGGGCCTGGAGCCCAGCCACAGCCTGCTGAGTACAGAAGCCAAGCTGGTCTCAGCAGCTGGGGGCCCCCTCCCCCCTGTCAGCACCCTGACAGCACTGCACAGCTTGGAGCAGACATCCCCAGGCCTCAACCAGCAGCCCCAGAACCTCATCATGGCCTCACTTCCTGGGGTCATGACCATCGGGCCTGGTGAGCCTGCCTCCCTGGGTCCTACGTTCACCAACACAGGTGCCTCCACCCTGGTCATCGGCCTGGCCTCCACGCAGGCACAGAGTGTGCCGGTCATCAACAGCATGGGCAGCAGCCTGACCACCCTGCAGCCCGTCCAGTTCTCCCAGCCGCTGCACCCCTCCTACCAGCAGCCGCTCATGCCACCTGTGCAGAGCCATGTGACCCAGAGCCCCTTCATGGCCACCATGGCTCAGCTGCAGAGCCCCCACGCCCTCTACAGCCACAAGCCCGAGGTGGCCCAGTACACCCACACGGGCCTGCTCCCGCAGACTATGCTCATCACCGACACCACCAACCTGAGCGCCCTGGCCAGCCTCACGCCCACCAAGCAGGTCTTCACCTCAGACACTGAGGCCTCCAGTGAGTCCGGGCTTCACACGCCGGCATCTCAGGCCACCACCCTCCACGTCCCCAGCCAGGACCCTGCCGGCATCCAGCACCTGCAGCCGGCCCACCGGCTCAGCGCCAGCCCCACAGTGTCCTCCAGCAGCCTGGTGCTGTACCAGAGCTCAGACTCCAGCAATGGCCAGAGCCACCTGCTGCCATCCAACCACAGCGTCATCGAGACCTTCATCTCCACCCAGATGGCCTCTTCCTCCCAGTAA"}, - {"NM_000535.5", "ATGGAGCGAGCTGAGAGCTCGAGTACAGAACCTGCTAAGGCCATCAAACCTATTGATCGGAAGTCAGTCCATCAGATTTGCTCTGGGCAGGTGGTACTGAGTCTAAGCACTGCGGTAAAGGAGTTAGTAGAAAACAGTCTGGATGCTGGTGCCACTAATATTGATCTAAAGCTTAAGGACTATGGAGTGGATCTTATTGAAGTTTCAGACAATGGATGTGGGGTAGAAGAAGAAAACTTCGAAGGCTTAACTCTGAAACATCACACATCTAAGATTCAAGAGTTTGCCGACCTAACTCAGGTTGAAACTTTTGGCTTTCGGGGGGAAGCTCTGAGCTCACTTTGTGCACTGAGCGATGTCACCATTTCTACCTGCCACGCATCGGCGAAGGTTGGAACTCGACTGATGTTTGATCACAATGGGAAAATTATCCAGAAAACCCCCTACCCCCGCCCCAGAGGGACCACAGTCAGCGTGCAGCAGTTATTTTCCACACTACCTGTGCGCCATAAGGAATTTCAAAGGAATATTAAGAAGGAGTATGCCAAAATGGTCCAGGTCTTACATGCATACTGTATCATTTCAGCAGGCATCCGTGTAAGTTGCACCAATCAGCTTGGACAAGGAAAACGACAGCCTGTGGTATGCACAGGTGGAAGCCCCAGCATAAAGGAAAATATCGGCTCTGTGTTTGGGCAGAAGCAGTTGCAAAGCCTCATTCCTTTTGTTCAGCTGCCCCCTAGTGACTCCGTGTGTGAAGAGTACGGTTTGAGCTGTTCCGATGCTCTGCATAATCTTTTTTACATCTCAGGTTTCATTTCACAATGCACGCATGGAGTTGGAAGGAGTTCAACAGACAGACAGTTTTTCTTTATCAACCGGCGGCCTTGTGACCCAGCAAAGGTCTGCAGACTCGTGAATGAGGTCTACCACATGTATAATCGACACCAGTATCCATTTGTTGTTCTTAACATTTCTGTTGATTCAGAATGCGTTGATATCAATGTTACTCCAGATAAAAGGCAAATTTTGCTACAAGAGGAAAAGCTTTTGTTGGCAGTTTTAAAGACCTCTTTGATAGGAATGTTTGATAGTGATGTCAACAAGCTAAATGTCAGTCAGCAGCCACTGCTGGATGTTGAAGGTAACTTAATAAAAATGCATGCAGCGGATTTGGAAAAGCCCATGGTAGAAAAGCAGGATCAATCCCCTTCATTAAGGACTGGAGAAGAAAAAAAAGACGTGTCCATTTCCAGACTGCGAGAGGCCTTTTCTCTTCGTCACACAACAGAGAACAAGCCTCACAGCCCAAAGACTCCAGAACCAAGAAGGAGCCCTCTAGGACAGAAAAGGGGTATGCTGTCTTCTAGCACTTCAGGTGCCATCTCTGACAAAGGCGTCCTGAGACCTCAGAAAGAGGCAGTGAGTTCCAGTCACGGACCCAGTGACCCTACGGACAGAGCGGAGGTGGAGAAGGACTCGGGGCACGGCAGCACTTCCGTGGATTCTGAGGGGTTCAGCATCCCAGACACGGGCAGTCACTGCAGCAGCGAGTATGCGGCCAGCTCCCCAGGGGACAGGGGCTCGCAGGAACATGTGGACTCTCAGGAGAAAGCGCCTGAAACTGACGACTCTTTTTCAGATGTGGACTGCCATTCAAACCAGGAAGATACCGGATGTAAATTTCGAGTTTTGCCTCAGCCAACTAATCTCGCAACCCCAAACACAAAGCGTTTTAAAAAAGAAGAAATTCTTTCCAGTTCTGACATTTGTCAAAAGTTAGTAAATACTCAGGACATGTCAGCCTCTCAGGTTGATGTAGCTGTGAAAATTAATAAGAAAGTTGTGCCCCTGGACTTTTCTATGAGTTCTTTAGCTAAACGAATAAAGCAGTTACATCATGAAGCACAGCAAAGTGAAGGGGAACAGAATTACAGGAAGTTTAGGGCAAAGATTTGTCCTGGAGAAAATCAAGCAGCCGAAGATGAACTAAGAAAAGAGATAAGTAAAACGATGTTTGCAGAAATGGAAATCATTGGTCAGTTTAACCTGGGATTTATAATAACCAAACTGAATGAGGATATCTTCATAGTGGACCAGCATGCCACGGACGAGAAGTATAACTTCGAGATGCTGCAGCAGCACACCGTGCTCCAGGGGCAGAGGCTCATAGCACCTCAGACTCTCAACTTAACTGCTGTTAATGAAGCTGTTCTGATAGAAAATCTGGAAATATTTAGAAAGAATGGCTTTGATTTTGTTATCGATGAAAATGCTCCAGTCACTGAAAGGGCTAAACTGATTTCCTTGCCAACTAGTAAAAACTGGACCTTCGGACCCCAGGACGTCGATGAACTGATCTTCATGCTGAGCGACAGCCCTGGGGTCATGTGCCGGCCTTCCCGAGTCAAGCAGATGTTTGCCTCCAGAGCCTGCCGGAAGTCGGTGATGATTGGGACTGCTCTTAACACAAGCGAGATGAAGAAACTGATCACCCACATGGGGGAGATGGACCACCCCTGGAACTGTCCCCATGGAAGGCCAACCATGAGACACATCGCCAACCTGGGTGTCATTTCTCAGAACTGA"}, - {"NM_000314.4", "ATGACAGCCATCATCAAAGAGATCGTTAGCAGAAACAAAAGGAGATATCAAGAGGATGGATTCGACTTAGACTTGACCTATATTTATCCAAACATTATTGCTATGGGATTTCCTGCAGAAAGACTTGAAGGCGTATACAGGAACAATATTGATGATGTAGTAAGGTTTTTGGATTCAAAGCATAAAAACCATTACAAGATATACAATCTTTGTGCTGAAAGACATTATGACACCGCCAAATTTAATTGCAGAGTTGCACAATATCCTTTTGAAGACCATAACCCACCACAGCTAGAACTTATCAAACCCTTTTGTGAAGATCTTGACCAATGGCTAAGTGAAGATGACAATCATGTTGCAGCAATTCACTGTAAAGCTGGAAAGGGACGAACTGGTGTAATGATATGTGCATATTTATTACATCGGGGCAAATTTTTAAAGGCACAAGAGGCCCTAGATTTCTATGGGGAAGTAAGGACCAGAGACAAAAAGGGAGTAACTATTCCCAGTCAGAGGCGCTATGTGTATTATTATAGCTACCTGTTAAAGAATCATCTGGATTATAGACCAGTGGCACTGTTGTTTCACAAGATGATGTTTGAAACTATTCCAATGTTCAGTGGCGGAACTTGCAATCCTCAGTTTGTGGTCTGCCAGCTAAAGGTGAAGATATATTCCTCCAATTCAGGACCCACACGACGGGAAGACAAGTTCATGTACTTTGAGTTCCCTCAGCCGTTACCTGTGTGTGGTGATATCAAAGTAGAGTTCTTCCACAAACAGAACAAGATGCTAAAAAAGGACAAAATGTTTCACTTTTGGGTAAATACATTCTTCATACCAGGACCAGAGGAAACCTCAGAAAAAGTAGAAAATGGAAGTCTATGTGATCAAGAAATCGATAGCATTTGCAGTATAGAGCGTGCAGATAATGACAAGGAATATCTAGTACTTACTTTAACAAAAAATGATCTTGACAAAGCAAATAAAGACAAAGCCAACCGATACTTTTCTCCAAATTTTAAGGTGAAGCTGTACTTCACAAAAACAGTAGAGGAGCCGTCAAATCCAGAGGCTAGCAGTTCAACTTCTGTAACACCAGATGTTAGTGACAATGAACCTGATCATTATAGATATTCTGACACCACTGACTCTGATCCAGAGAATGAACCTTTTGATGAAGATCAGCATACACAAATTACAAAAGTCTGA"}, - }; - - public static Dictionary TranscriptPeptides = new Dictionary() - { - {"NM_022148.2", "MGRLVLLWGAAVFLLGGWMALGQGGAAEGVQIQIIYFNLETVQVTWNASKYSRTNLTFHYRFNGDEAYDQCTNYLLQEGHTSGCLLDAEQRDDILYFSIRNGTHPVFTASRWMVYYLKPSSPKHVRFSWHQDAVTVTCSDLSYGDLLYEVQYRSPFDTEWQSKQENTCNVTIEGLDAEKCYSFWVRVKAMEDVYGPDTYPSDWSEVTCWQRGEIRDACAETPTPPKPKLSKFILISSLAILLMVSLLLLSLWKLWRVKKFLIPSVPDPKSIFPGLFEIHQGNFQEWITDTQNVAHLHKMAGAEQESGPEEPLVVQLAKTEAESPRMLDPQTEEKEASGGSLQLPHQPLQGGDVVTIGGFTFVMNDRSYVAL*"}, - {"NM_012234.6", "MTMGDKKSPTRPKRQAKPAADEGFWDCSVCTFRNSAEAFKCSICDVRKGTSTRKPRINSQLVAQQVAQQYATPPPPKKEKKEKVEKQDKEKPEKDKEISPSVTKKNTNKKTKPKSDILKDPPSEANSIQSANATTKTSETNHTSRPRLKNVDRSTAQQLAVTVGNVTVIITDFKEKTRSSSTSSSTVTSSAGSEQQNQSSSGSESTDKGSSRSSTPKGDMSAVNDESF*"}, - {"NM_001220773.1", "MDADEGQDMSQVSVGKPHKCGYCGRSYKQRSSLEEHKERCHNYLESMGLPGTLYPVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"}, - {"NM_002447.2", "MELLPPLPQSFLLLLLLPAKPAAGEDWQCPRTPYAASRDFDVKYVVPSFSAGGLVQAMVTYEGDRNESAVFVAIRNRLHVLGPDLKSVQSLATGPAGDPGCQTCAACGPGPHGPPGDTDTKVLVLDPALPALVSCGSSLQGRCFLHDLEPQGTAVHLAAPACLFSAHHNRPDDCPDCVASPLGTRVTVVEQGQASYFYVASSLDAAVAASFSPRSVSIRRLKADASGFAPGFVALSVLPKHLVSYSIEYVHSFHTGAFVYFLTVQPASVTDDPSALHTRLARLSATEPELGDYRELVLDCRFAPKRRRRGAPEGGQPYPVLRVAHSAPVGAQLATELSIAEGQEVLFGVFVTGKDGGPGVGPNSVVCAFPIDLLDTLIDEGVERCCESPVHPGLRRGLDFFQSPSFCPNPPGLEALSPNTSCRHFPLLVSSSFSRVDLFNGLLGPVQVTALYVTRLDNVTVAHMGTMDGRILQVELVRSLNYLLYVSNFSLGDSGQPVQRDVSRLGDHLLFASGDQVFQVPIQGPGCRHFLTCGRCLRAWHFMGCGWCGNMCGQQKECPGSWQQDHCPPKLTEFHPHSGPLRGSTRLTLCGSNFYLHPSGLVPEGTHQVTVGQSPCRPLPKDSSKLRPVPRKDFVEEFECELEPLGTQAVGPTNVSLTVTNMPPGKHFRVDGTSVLRGFSFMEPVLIAVQPLFGPRAGGTCLTLEGQSLSVGTSRAVLVNGTECLLARVSEGQLLCATPPGATVASVPLSLQVGGAQVPGSWTFQYREDPVVLSISPNCGYINSHITICGQHLTSAWHLVLSFHDGLRAVESRCERQLPEQQLCRLPEYVVRDPQGWVAGNLSARGDGAAGFTLPGFRFLPPPHPPSANLVPLKPEEHAIKFEYIGLGAVADCVGINVTVGGESCQHEFRGDMVVCPLPPSLQLGQDGAPLQVCVDGECHILGRVVRPGPDGVPQSTLLGILLPLLLLVAALATALVFSYWWRRKQLVLPPNLNDLASLDQTAGATPLPILYSGSDYRSGLALPAIDGLDSTTCVHGASFSDSEDESCVPLLRKESIQLRDLDSALLAEVKDVLIPHERVVTHSDRVIGKGHFGVVYHGEYIDQAQNRIQCAIKSLSRITEMQQVEAFLREGLLMRGLNHPNVLALIGIMLPPEGLPHVLLPYMCHGDLLQFIRSPQRNPTVKDLISFGLQVARGMEYLAEQKFVHRDLAARNCMLDESFTVKVADFGLARDILDREYYSVQQHRHARLPVKWMALESLQTYRFTTKSDVWSFGVLLWELLTRGAPPYRHIDPFDLTHFLAQGRRLPQPEYCPDSLYQVMQQCWEADPAVRPTFRVLVGEVEQIVSALLGDHYVQLPATYMNLGPSTSHEMNVRPEQPQFSPMPGNVRRPRPLSEPPRPT*"}, - {"NM_001244937.1", "MELLPPLPQSFLLLLLLPAKPAAGEDWQCPRTPYAASRDFDVKYVVPSFSAGGLVQAMVTYEGDRNESAVFVAIRNRLHVLGPDLKSVQSLATGPAGDPGCQTCAACGPGPHGPPGDTDTKVLVLDPALPALVSCGSSLQGRCFLHDLEPQGTAVHLAAPACLFSAHHNRPDDCPDCVASPLGTRVTVVEQGQASYFYVASSLDAAVAASFSPRSVSIRRLKADASGFAPGFVALSVLPKHLVSYSIEYVHSFHTGAFVYFLTVQPASVTDDPSALHTRLARLSATEPELGDYRELVLDCRFAPKRRRRGAPEGGQPYPVLRVAHSAPVGAQLATELSIAEGQEVLFGVFVTGKDGGPGVGPNSVVCAFPIDLLDTLIDEGVERCCESPVHPGLRRGLDFFQSPSFCPNPPGLEALSPNTSCRHFPLLVSSSFSRVDLFNGLLGPVQVTALYVTRLDNVTVAHMGTMDGRILQVELVRSLNYLLYVSNFSLGDSGQPVQRDVSRLGDHLLFASGDQVFQVPIQGPGCRHFLTCGRCLRAWHFMGCGWCGNMCGQQKECPGSWQQDHCPPKLTEFHPHSGPLRGSTRLTLCGSNFYLHPSGLVPEGTHQVTVGQSPCRPLPKDSSKLRPVPRKDFVEEFECELEPLGTQAVGPTNVSLTVTNMPPGKHFRVDGTSVLRGFSFMEPVLIAVQPLFGPRAGGTCLTLEGQSLSVGTSRAVLVNGTECLLARVSEGQLLCATPPGATVASVPLSLQVGGAQVPGSWTFQYREDPVVLSISPNCGYINSHITICGQHLTSAWHLVLSFHDGLRAVESRCERQLPEQQLCRLPEYVVRDPQGWVAGNLSARGDGAAGFTLPGFRFLPPPHPPSANLVPLKPEEHAIKFEVCVDGECHILGRVVRPGPDGVPQSTLLGILLPLLLLVAALATALVFSYWWRRKQLVLPPNLNDLASLDQTAGATPLPILYSGSDYRSGLALPAIDGLDSTTCVHGASFSDSEDESCVPLLRKESIQLRDLDSALLAEVKDVLIPHERVVTHSDRVIGKGHFGVVYHGEYIDQAQNRIQCAIKSLSRITEMQQVEAFLREGLLMRGLNHPNVLALIGIMLPPEGLPHVLLPYMCHGDLLQFIRSPQRNPTVKDLISFGLQVARGMEYLAEQKFVHRDLAARNCMLDESFTVKVADFGLARDILDREYYSVQQHRHARLPVKWMALESLQTYRFTTKSDVWSFGVLLWELLTRGAPPYRHIDPFDLTHFLAQGRRLPQPEYCPDSLYQVMQQCWEADPAVRPTFRVLVGEVEQIVSALLGDHYVQLPATYMNLGPSTSHEMNVRPEQPQFSPMPGNVRRPRPLSEPPRPT*"}, - {"NM_019063.3", "MDGFAGSLDDSISAASTSDVQDRLSALESRVQQQEDEITVLKAALADVLRRLAISEDHVASVKKSVSSKGQPSPRAVIPMSCITNGSGANRKPSHTSAVSIAGKETLSSAAKSGTEKKKEKPQGQREKKEESHSNDQSPQIRASPSPQPSSQPLQIHRQTPESKNATPTKSIKRPSPAEKSHNSWENSDDSRNKLSKIPSTPKLIPKVTKTADKHKDVIINQEGEYIKMFMRGRPITMFIPSDVDNYDDIRTELPPEKLKLEWAYGYRGKDCRANVYLLPTGEIVYFIASVVVLFNYEERTQRHYLGHTDCVKCLAIHPDKIRIATGQIAGVDKDGRPLQPHVRVWDSVTLSTLQIIGLGTFERGVGCLDFSKADSGVHLCVIDDSNEHMLTVWDWQKKAKGAEIKTTNEVVLAVEFHPTDANTIITCGKSHIFFWTWSGNSLTRKQGIFGKYEKPKFVQCLAFLGNGDVLTGDSGGVMLIWSKTTVEPTPGKGPKGVYQISKQIKAHDGSVFTLCQMRNGMLLTGGGKDRKIILWDHDLNPEREIEVPDQYGTIRAVAEGKADQFLVGTSRNFILRGTFNDGFQIEVQGHTDELWGLATHPFKDLLLTCAQDRQVCLWNSMEHRLEWTRLVDEPGHCADFHPSGTVVAIGTHSGRWFVLDAETRDLVSIHTDGNEQLSVMRYSIDGTFLAVGSHDNFIYLYVVSENGRKYSRYGRCTGHSSYITHLDWSPDNKYIMSNSGDYEILYWDIPNGCKLIRNRSDCKDIDWTTYTCVLGFQVFGVWPEGSDGTDINALVRSHNRKVIAVADDFCKVHLFQYPCSKAKAPSHKYSAHSSHVTNVSFTHNDSHLISTGGKDMSIIQWKLVEKLSLPQNETVADTTLTKAPVSSTESVIQSNTPTPPPSQPLNETAEEESRISSSPTLLENSLEQTVEPSEDHSEEESEEGSGDLGEPLYEEPCNEISKEQAKATLLEDQQDPSPSS*"}, - {"NM_175741.1","MASDGASALPGPDMSMKPSAALSPSPALPFLPPTSDPPDHPPREPPPQPIMPSVFSPDNPLMLSAFPSSLLVTGDGGPCLSGAGAGKVIVKVKTEGGSAEPSQTQNFILTQTALNSTAPGTPCGGLEGPAPPFVTASNVKTILPSKAVGVSQEGPPGLPPQPPPPVAQLVPIVPLEKAWPGPHGTTGEGGPVATLSKPSLGDRSKISKDVYENFRQWQRYKALARRHLSQSPDTEALSCFLIPVLRSLARLKPTMTLEEGLPLAVQEWEHTSNFDRMIFYEMAERFMEFEAEEMQIQNTQLMNGSQGLSPATPLKLDPLGPLASEVCQQPVYIPKKAASKTRAPRRRQRKAQRPPAPEAPKEIPPEAVKEYVDIMEWLVGTHLATGESDGKQEEEGQQQEEEGMYPDPGLLSYINELCSQKVFVSKVEAVIHPQFLADLLSPEKQRDPLALIEELEQEEGLTLAQLVQKRLMALEEEEDAEAPPSFSGAQLDSSPSGSVEDEDGDGRLRPSPGLQGAGGAACLGKVSSSGKRAREVHGGQEQALDSPRGMHRDGNTLPSPSSWDLQPELAAPQGTPGPLGVERRGSGKVINQVSLHQDGHLGGAGPPGHCLVADRTSEALPLCWQGGFQPESTPSLDAGLAELAPLQGQGLEKQVLGLQKGQQTGGRGVLPQGKEPLAVPWEGSSGAMWGDDRGTPMAQSYDQNPSPRAAGERDDVCLSPGVWLSSEMDAVGLELPVQIEEVIESFQVEKCVTEYQEGCQGLGSRGNISLGPGETLVPGDTESSVIPCGGTVAAAALEKRNYCSLPGPLRANSPPLRSKENQEQSCETVGHPSDLWAEGCFPLLESGDSTLGSSKETLPPTCQGNLLIMGTEDASSLPEASQEAGSRGNSFSPLLETIEPVNILDVKDDCGLQLRVSEDTCPLNVHSYDPQGEGRVDPDLSKPKNLAPLQESQESYTTGTPKATSSHQGLGSTLPRRGTRNAIVPRETSVSKTHRSADRAKGKEKKKKEAEEEDEELSNFAYLLASKLSLSPREHPLSPHHASGGQGSQRASHLLPAGAKGPSKLPYPVAKSGKRALAGGPAPTEKTPHSGAQLGVPREKPLALGVVRPSQPRKRRCDSFVTGRRKKRRRSQ*"}, - {"NM_005228.3", "MRPSGTAGAALLALLAALCPASRALEEKKVCQGTSNKLTQLGTFEDHFLSLQRMFNNCEVVLGNLEITYVQRNYDLSFLKTIQEVAGYVLIALNTVERIPLENLQIIRGNMYYENSYALAVLSNYDANKTGLKELPMRNLQEILHGAVRFSNNPALCNVESIQWRDIVSSDFLSNMSMDFQNHLGSCQKCDPSCPNGSCWGAGEENCQKLTKIICAQQCSGRCRGKSPSDCCHNQCAAGCTGPRESDCLVCRKFRDEATCKDTCPPLMLYNPTTYQMDVNPEGKYSFGATCVKKCPRNYVVTDHGSCVRACGADSYEMEEDGVRKCKKCEGPCRKVCNGIGIGEFKDSLSINATNIKHFKNCTSISGDLHILPVAFRGDSFTHTPPLDPQELDILKTVKEITGFLLIQAWPENRTDLHAFENLEIIRGRTKQHGQFSLAVVSLNITSLGLRSLKEISDGDVIISGNKNLCYANTINWKKLFGTSGQKTKIISNRGENSCKATGQVCHALCSPEGCWGPEPRDCVSCRNVSRGRECVDKCNLLEGEPREFVENSECIQCHPECLPQAMNITCTGRGPDNCIQCAHYIDGPHCVKTCPAGVMGENNTLVWKYADAGHVCHLCHPNCTYGCTGPGLEGCPTNGPKIPSIATGMVGALLLLLVVALGIGLFMRRRHIVRKRTLRRLLQERELVEPLTPSGEAPNQALLRILKETEFKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVDNPHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDRRLVHRDLAARNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIYTHQSDVWSYGVTVWELMTFGSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCWMIDADSRPKFRELIIEFSKMARDPQRYLVIQGDERMHLPSPTDSNFYRALMDEEDMDDVVDADEYLIPQQGFFSSPSTSRTPLLSSLSATSNNSTVACIDRNGLQSCPIKEDSFLQRYSSDPTGALTEDSIDDTFLPVPEYINQSVPKRPAGSVQNPVYHNQPLNPAPSRDPHYQDPHSTAVGNPEYLNTVQPTCVNSTFDSPAHWAQKGSHQISLDNPDYQQDFFPKEAKPNGIFKGSTAENAEYLRVAPQSSEFIGA*"}, - {"NM_005922.2", "MREAAAALVPPPAFAVTPAAAMEEPPPPPPPPPPPPEPETESEPECCLAARQEGTLGDSACKSPESDLEDFSDETNTENLYGTSPPSTPRQMKRMSTKHQRNNVGRPASRSNLKEKMNAPNQPPHKDTGKTVENVEEYSYKQEKKIRAALRTTERDHKKNVQCSFMLDSVGGSLPKKSIPDVDLNKPYLSLGCSNAKLPVSVPMPIARPARQTSRTDCPADRLKFFETLRLLLKLTSVSKKKDREQRGQENTSGFWLNRSNELIWLELQAWHAGRTINDQDFFLYTARQAIPDIINEILTFKVDYGSFAFVRDRAGFNGTSVEGQCKATPGTKIVGYSTHHEHLQRQRVSFEQVKRIMELLEYIEALYPSLQALQKDYEKYAAKDFQDRVQALCLWLNITKDLNQKLRIMGTVLGIKNLSDIGWPVFEIPSPRPSKGNEPEYEGDDTEGELKELESSTDESEEEQISDPRVPEIRQPIDNSFDIQSRDCISKKLERLESEDDSLGWGAPDWSTEAGFSRHCLTSIYRPFVDKALKQMGLRKLILRLHKLMDGSLQRARIALVKNDRPVEFSEFPDPMWGSDYVQLSRTPPSSEEKCSAVSWEELKAMDLPSFEPAFLVLCRVLLNVIHECLKLRLEQRPAGEPSLLSIKQLVRECKEVLKGGLLMKQYYQFMLQEVLEDLEKPDCNIDAFEEDLHKMLMVYFDYMRSWIQMLQQLPQASHSLKNLLEEEWNFTKEITHYIRGGEAQAGKLFCDIAGMLLKSTGSFLEFGLQESCAEFWTSADDSSASDEIRRSVIEISRALKELFHEARERASKALGFAKMLRKDLEIAAEFRLSAPVRDLLDVLKSKQYVKVQIPGLENLQMFVPDTLAEEKSIILQLLNAAAGKDCSKDSDDVLIDAYLLLTKHGDRARDSEDSWGTWEAQPVKVVPQVETVDTLRSMQVDNLLLVVMQSAHLTIQRKAFQQSIEGLMTLCQEQTSSQPVIAKALQQLKNDALELCNRISNAIDRVDHMFTSEFDAEVDESESVTLQQYYREAMIQGYNFGFEYHKEVVRLMSGEFRQKIGDKYISFARKWMNYVLTKCESGRGTRPRWATQGFDFLQAIEPAFISALPEDDFLSLQALMNECIGHVIGKPHSPVTGLYLAIHRNSPRPMKVPRCHSDPPNPHLIIPTPEGFSTRSMPSDARSHGSPAAAAAAAAAAVAASRPSPSGGDSVLPKSISSAHDTRGSSVPENDRLASIAAELQFRSLSRHSSPTEERDEPAYPRGDSSGSTRRSWELRTLISQSKDTASKLGPIEAIQKSVRLFEEKRYREMRRKNIIGQVCDTPKSYDNVMHVGLRKVTFKWQRGNKIGEGQYGKVYTCISVDTGELMAMKEIRFQPNDHKTIKETADELKIFEGIKHPNLVRYFGVELHREEMYIFMEYCDEGTLEEVSRLGLQEHVIRLYSKQITIAINVLHEHGIVHRDIKGANIFLTSSGLIKLGDFGCSVKLKNNAQTMPGEVNSTLGTAAYMAPEVITRAKGEGHGRAADIWSLGCVVIEMVTGKRPWHEYEHNFQIMYKVGMGHKPPIPERLSPEGKDFLSHCLESDPKMRWTASQLLDHSFVKVCTDEE*"}, - {"NM_001220766.1", "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVGKPHKCGYCGRSYKQRSSLEEHKERCHNYLESMGLPGTLYPVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"}, - {"NM_001220765.1", "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVASNVKVETQSDEENGRACEMNGEECAEDLRMLDASGEKMNGSHRDQGSSALSGVGGIRLPNGKLKCDICGIICIGPNVLMVHKRSHTGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"}, - {"NM_001242758.1", "MAVMAPRTLLLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQKMEPRAPWIEQEGPEYWDQETRNMKAHSQTDRANLGTLRGYYNQSEDGSHTIQIMYGCDVGPDGRFLRGYRQDAYDGKDYIALNEDLRSWTAADMAAQITKRKWEAVHAAEQRRVYLEGRCVDGLRRYLENGKETLQRTDPPKTHMTHHPISDHEATLRCWALGFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPKPLTLRWELSSQPTIPIVGIIAGLVLLGAVITGAVVAAVMWRRKSSDRKGGSYTQAASSDSAQGSDVSLTACKV*"}, - {"NM_001220767.1", "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"}, - {"NM_001145076.1", "MDGFAGSLDDSISAASTSDVQDRLSALESRVQQQEDEITVLKAALADVLRRLAISEDHVASVKKSVSSKGQPSPRAVIPMSCITNGSGANRKPSHTSAVSIAGKETLSSAAKSIKRPSPAEKSHNSWENSDDSRNKLSKIPSTPKLIPKVTKTADKHKDVIINQEGEYIKMFMRGRPITMFIPSDVDNYDDIRTELPPEKLKLEWAYGYRGKDCRANVYLLPTGEIVYFIASVVVLFNYEERTQRHYLGHTDCVKCLAIHPDKIRIATGQIAGVDKDGRPLQPHVRVWDSVTLSTLQIIGLGTFERGVGCLDFSKADSGVHLCVIDDSNEHMLTVWDWQKKAKGAEIKTTNEVVLAVEFHPTDANTIITCGKSHIFFWTWSGNSLTRKQGIFGKYEKPKFVQCLAFLGNGDVLTGDSGGVMLIWSKTTVEPTPGKGPKGVYQISKQIKAHDGSVFTLCQMRNGMLLTGGGKDRKIILWDHDLNPEREIEVPDQYGTIRAVAEGKADQFLVGTSRNFILRGTFNDGFQIEVQGHTDELWGLATHPFKDLLLTCAQDRQVCLWNSMEHRLEWTRLVDEPGHCADFHPSGTVVAIGTHSGRWFVLDAETRDLVSIHTDGNEQLSVMRYSIDGTFLAVGSHDNFIYLYVVSENGRKYSRYGRCTGHSSYITHLDWSPDNKYIMSNSGDYEILYWDIPNGCKLIRNRSDCKDIDWTTYTCVLGFQVFGVWPEGSDGTDINALVRSHNRKVIAVADDFCKVHLFQYPCSKAKAPSHKYSAHSSHVTNVSFTHNDSHLISTGGKDMSIIQWKLVEKLSLPQNETVADTTLTKAPVSSTESVIQSNTPTPPPSQPLNETAEEESRISSSPTLLENSLEQTVEPSEDHSEEESEEGSGDLGEPLYEEPCNEISKEQAKATLLEDQQDPSPSS*"}, - {"NM_001278433.1", "MESGSTAASEEARSLRECELYVQKHNIQALLKDSIVQLCTARPERPMAFLREYFERLEKEEAKQIQNLQKAGTRTDSREDEISPPPPNPVVKGRRRRGAISAEVYTEEDAASYVRKVIPKDYKTMAALAKAIEKNVLFSHLDDNERSDIFDAMFSVSFIAGETVIQQGDEGDNFYVIDQGETDVYVNNEWATSVGEGGSFGELALIYGTPRAATVKAKTNVKLWGIDRDSYRRILMGSTLRKRKMYEEFLSKVSILESLDKWERLTVADALEPVQFEDGQKIVVQGEPGDEFFIILEGSAAVLQRRSENEEFVEVGRLGPSDYFGEIALLMNRPRAATVVARGPLKCVKLDRPRFERVLGPCSDILKRNIQQYNSFVSLSV*"}, - {"NM_001220768.1", "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVASNVKVETQSDEENGRACEMNGEECAEDLRMLDASGEKMNGSHRDQGSSALSGVGGIRLPNGKLKCDICGIICIGPNVLMVHKRSHTGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"}, - {"NM_001220775.1", "MDADEGQDMSQVSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"}, - {"NM_006060.4", "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVASNVKVETQSDEENGRACEMNGEECAEDLRMLDASGEKMNGSHRDQGSSALSGVGGIRLPNGKLKCDICGIICIGPNVLMVHKRSHTGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVGKPHKCGYCGRSYKQRSSLEEHKERCHNYLESMGLPGTLYPVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"}, - {"NM_001220770.1", "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"}, - {"NM_152756.3", "MAAIGRGRSLKNLRVRGRNDSGEENVPLDLTREPSDNLREILQNVARLQGVSNMRKLGHLNNFTKLLCDIGHSEEKLGFHYEDIIICLRLALLNEAKEVRAAGLRALRYLIQDSSILQKVLKLKVDYLIARCIDIQQSNEVERTQALRLVRKMITVNASLFPSSVTNSLIAVGNDGLQERDRMVRACIAIICELALQNPEVVALRGGLNTILKNVIDCQLSRINEALITTILHLLNHPKTRQYVRADVELERILAPYTDFHYRHSPDTAEGQLKEDREARFLASKMGIIATFRSWAGIINLCKPGNSGIQSLIGVLCIPNMEIRRGLLEVLYDIFRLPLPVVTEEFIEALLSVDPGRFQDSWRLSDGFVAAEAKTILPHRARSRPDLMDNYLALILSAFIRNGLLEGLVEVITNSDDHISVRATILLGELLHMANTILPHSHSHHLHCLPTLMNMAASFDIPKEKRLRASAALNCLKRFHEMKKRGPKPYSLHLDHIIQKAIATHQKRDQYLRVQKDIFILKDTEEALLINLRDSQVLQHKENLEWNWNLIGTILKWPNVNLRNYKDEQLHRFVRRLLYFYKPSSKLYANLDLDFAKAKQLTVVGCQFTEFLLESEEDGQGYLEDLVKDIVQWLNASSGMKPERSLQNNGLLTTLSQHYFLFIGTLSCHPHGVKMLEKCSVFQCLLNLCSLKNQDHLLKLTVSSLDYSRDGLARVILSKILTAATDACRLYATKHLRVLLRANVEFFNNWGIELLVTQLHDKNKTISSEALDILDEACEDKANLHALIQMKPALSHLGDKGLLLLLRFLSIPKGFSYLNERGYVAKQLEKWHREYNSKYVDLIEEQLNEALTTYRKPVDGDNYVRRSNQRLQRPHVYLPIHLYGQLVHHKTGCHLLEVQNIITELCRNVRTPDLDKWEEIKKLKASLWALGNIGSSNWGLNLLQEENVIPDILKLAKQCEVLSIRGTCVYVLGLIAKTKQGCDILKCHNWDAVRHSRKHLWPVVPDDVEQLCNELSSIPSTLSLNSESTSSRHNSESESVPSSMFILEDDRFGSSSTSTFFLDINEDTEPTFYDRSGPIKDKNSFPFFASSKLVKNRILNSLTLPNKKHRSSSDPKGGKLSSESKTSNRRIRTLTEPSVDFNHSDDFTPISTVQKTLQLETSFMGNKHIEDTGSTPSIGENDLKFTKNFGTENHRENTSRERLVVESSTSSHMKIRSQSFNTDTTTSGISSMSSSPSRETVGVDATTMDTDCGSMSTVVSTKTIKTSHYLTPQSNHLSLSKSNSVSLVPPGSSHTLPRRAQSLKAPSIATIKSLADCNFSYTSSRDAFGYATLKRLQQQRMHPSLSHSEALASPAKDVLFTDTITMKANSFESRLTPSRFMKALSYASLDKEDLLSPINQNTLQRSSSVRSMVSSATYGGSDDYIGLALPVDINDIFQVKDIPYFQTKNIPPHDDRGARAFAHDAGGLPSGTGGLVKNSFHLLRQQMSLTEIMNSIHSDASLFLESTEDTGLQEHTDDNCLYCVCIEILGFQPSNQLSAICSHSDFQDIPYSDWCEQTIHNPLEVVPSKFSGISGCSDGVSQEGSASSTKSTELLLGVKTIPDDTPMCRILLRKEVLRLVINLSSSVSTKCHETGLLTIKEKYPQTFDDICLYSEVSHLLSHCTFRLPCRRFIQELFQDVQFLQMHEEAEAVLATPPKQPIVDTSAES*"}, - {"NM_001220769.1", "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVGERPFQCNQCGASFTQKGNLLRHIKLHSGEKPFKCHLCNYACRRRDALTGHLRTHSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRKSSMPQKFLGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"}, - {"NM_001260.1", "MDYDFKVKLSSERERVEDLFEYEGCKVGRGTYGHVYKAKRKDGKDDKDYALKQIEGTGISMSACREIALLRELKHPNVISLQKVFLSHADRKVWLLFDYAEHDLWHIIKFHRASKANKKPVQLPRGMVKSLLYQILDGIHYLHANWVLHRDLKPANILVMGEGPERGRVKIADMGFARLFNSPLKPLADLDPVVVTFWYRAPELLLGARHYTKAIDIWAIGCIFAELLTSEPIFHCRQEDIKTSNPYHHDQLDRIFNVMGFPADKDWEDIKKMPEHSTLMKDFRRNTYTNCSLIKYMEKHKVKPDSKAFHLLQKLLTMDPIKRITSEQAMQDPYFLEDPLPTSDVFAGCQIPYPKREFLTEEEPDDKGDKKNQQQQQGNNHTNGTGHPGNQDSSHTQGPPLKKVRVVPPTTTSGGLIMTSDYQRSNPHAAYPNPGPSTSQPQSSMGYSATSQQPPQYSHQTHRY*"}, - {"NM_006724.2", "MREAAAALVPPPAFAVTPAAAMEEPPPPPPPPPPPPEPETESEPECCLAARQEGTLGDSACKSPESDLEDFSDETNTENLYGTSPPSTPRQMKRMSTKHQRNNVGRPASRSNLKEKMNAPNQPPHKDTGKTVENVEEYSYKQEKKIRAALRTTERDHKKNVQCSFMLDSVGGSLPKKSIPDVDLNKPYLSLGCSNAKLPVSVPMPIARPARQTSRTDCPADRLKFFETLRLLLKLTSVSKKKDREQRGQENTSGFWLNRSNELIWLELQAWHAGRTINDQDFFLYTARQAIPDIINEILTFKVDYGSFAFVRDRAGFNGTSVEGQCKATPGTKIVGYSTHHEHLQRQRVSFEQVKRIMELLEYIEALYPSLQALQKDYEKYAAKDFQDRVQALCLWLNITKDLNQKLRIMGTVLGIKNLSDIGWPVFEIPSPRPSKGNEPEYEGDDTEGELKELESSTDESEEEQISDPRVPEIRQPIDNSFDIQSRDCISKKLERLESEDDSLGWGAPDWSTEAGFSRHCLTSIYRPFVDKALKQMGLRKLILRLHKLMDGSLQRARIALVKNDRPVEFSEFPDPMWGSDYVQLSRTPPSSEEKCSAVSWEELKAMDLPSFEPAFLVLCRVLLNVIHECLKLRLEQRPAGEPSLLSIKQLVRECKEVLKGGLLMKQYYQFMLQEVLEDLEKPDCNIDAFEEDLHKMLMVYFDYMRSWIQMLQQLPQASHSLKNLLEEEWNFTKEITHYIRGGEAQAGKLFCDIAGMLLKSTGSFLEFGLQESCAEFWTSADDSSASDEIRRSVIEISRALKELFHEARERASKALGFAKMLRKDLEIAAEFRLSAPVRDLLDVLKSKQYVKVQIPGLENLQMFVPDTLAEEKSIILQLLNAAAGKDCSKDSDDVLIDAYLLLTKHGDRARDSEDSWGTWEAQPVKVVPQVETVDTLRSMQVDNLLLVVMQSAHLTIQRKAFQQSIEGLMTLCQEQTSSQPVIAKALQQLKNDALELCNRISNAIDRVDHMFTSEFDAEVDESESVTLQQYYREAMIQGYNFGFEYHKEVVRLMSGEFRQKIGDKYISFARKWMNYVLTKCESGRGTRPRWATQGFDFLQAIEPAFISALPEDDFLSLQALMNECIGHVIGKPHSPVTGLYLAIHRNSPRPMKVPRCHSDPPNPHLIIPTPEGFRGSSVPENDRLASIAAELQFRSLSRHSSPTEERDEPAYPRGDSSGSTRRSWELRTLISQSKDTASKLGPIEAIQKSVRLFEEKRYREMRRKNIIGQVCDTPKSYDNVMHVGLRKVTFKWQRGNKIGEGQYGKVYTCISVDTGELMAMKEIRFQPNDHKTIKETADELKIFEGIKHPNLVRYFGVELHREEMYIFMEYCDEGTLEEVSRLGLQEHVIRLYSKQITIAINVLHEHGIVHRDIKGANIFLTSSGLIKLGDFGCSVKLKNNAQTMPGEVNSTLGTAAYMAPEVITRAKGEGHGRAADIWSLGCVVIEMVTGKRPWHEYEHNFQIMYKVGMGHKPPIPERLSPEGKDFLSHCLESDPKMRWTASQLLDHSFVKVCTDEE*"}, - - {"NM_001220774.1", "MDADEGQDMSQVSVGKPHKCGYCGRSYKQRSSLEEHKERCHNYLESMGLPGTLYPVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"}, - {"NM_001220771.1", "MDADEGQDMSQVSGKESPPVSDTPDEGDEPMPIPEDLSTTSGGQQSSKSDRVVASNVKVETQSDEENGRACEMNGEECAEDLRMLDASGEKMNGSHRDQGSSALSGVGGIRLPNGKLKCDICGIICIGPNVLMVHKRSHTGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"}, - {"NM_001220772.1", "MDADEGQDMSQVSGDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"}, - {"NM_001220776.1", "MDADEGQDMSQVSVIKEETNHSEMAEDLCKIGSERSLVLDRLASNVAKRDKGLSDTPYDSSASYEKENEMMKSHVMDQAINNAINYLGAESLRPLVQTPPGGSEVVPVISPMYQLHKPLAEGTPRSNHSAQDSAVENLLLLSKAKLVPSEREASPSNSCQDSTDTESNNEEQRSGLIYLTNHIAPHARNGLSLKEEHRAYDLLRAASENSQDALRVVSTSGEQMKVYKCEHCRVLFLDHVMYTIHMGCHGFRDPFECNMCGYHSQDRYEFSSHITRGEHRFHMS*"}, - {"NM_000545.5", "MVSKLSQLQTELLAALLESGLSKEALIQALGEPGPYLLAGEGPLDKGESCGGGRGELAELPNGLGETRGSEDETDDDGEDFTPPILKELENLSPEEAAHQKAVVETLLQEDPWRVAKMVKSYLQQHNIPQREVVDTTGLNQSHLSQHLNKGTPMKTQKRAALYTWYVRKQREVAQQFTHAGQGGLIEEPTGDELPTKKGRRNRFKWGPASQQILFQAYERQKNPSKEERETLVEECNRAECIQRGVSPSQAQGLGSNLVTEVRVYNWFANRRKEEAFRHKLAMDTYSGPPPGPGPGPALPAHSSPGLPPPALSPSKVHGVRYGQPATSETAEVPSSSGGPLVTVSTPLHQVSPTGLEPSHSLLSTEAKLVSAAGGPLPPVSTLTALHSLEQTSPGLNQQPQNLIMASLPGVMTIGPGEPASLGPTFTNTGASTLVIGLASTQAQSVPVINSMGSSLTTLQPVQFSQPLHPSYQQPLMPPVQSHVTQSPFMATMAQLQSPHALYSHKPEVAQYTHTGLLPQTMLITDTTNLSALASLTPTKQVFTSDTEASSESGLHTPASQATTLHVPSQDPAGIQHLQPAHRLSASPTVSSSSLVLYQSSDSSNGQSHLLPSNHSVIETFISTQMASSSQ*"}, - {"NM_000535.5", "MERAESSSTEPAKAIKPIDRKSVHQICSGQVVLSLSTAVKELVENSLDAGATNIDLKLKDYGVDLIEVSDNGCGVEEENFEGLTLKHHTSKIQEFADLTQVETFGFRGEALSSLCALSDVTISTCHASAKVGTRLMFDHNGKIIQKTPYPRPRGTTVSVQQLFSTLPVRHKEFQRNIKKEYAKMVQVLHAYCIISAGIRVSCTNQLGQGKRQPVVCTGGSPSIKENIGSVFGQKQLQSLIPFVQLPPSDSVCEEYGLSCSDALHNLFYISGFISQCTHGVGRSSTDRQFFFINRRPCDPAKVCRLVNEVYHMYNRHQYPFVVLNISVDSECVDINVTPDKRQILLQEEKLLLAVLKTSLIGMFDSDVNKLNVSQQPLLDVEGNLIKMHAADLEKPMVEKQDQSPSLRTGEEKKDVSISRLREAFSLRHTTENKPHSPKTPEPRRSPLGQKRGMLSSSTSGAISDKGVLRPQKEAVSSSHGPSDPTDRAEVEKDSGHGSTSVDSEGFSIPDTGSHCSSEYAASSPGDRGSQEHVDSQEKAPETDDSFSDVDCHSNQEDTGCKFRVLPQPTNLATPNTKRFKKEEILSSSDICQKLVNTQDMSASQVDVAVKINKKVVPLDFSMSSLAKRIKQLHHEAQQSEGEQNYRKFRAKICPGENQAAEDELRKEISKTMFAEMEIIGQFNLGFIITKLNEDIFIVDQHATDEKYNFEMLQQHTVLQGQRLIAPQTLNLTAVNEAVLIENLEIFRKNGFDFVIDENAPVTERAKLISLPTSKNWTFGPQDVDELIFMLSDSPGVMCRPSRVKQMFASRACRKSVMIGTALNTSEMKKLITHMGEMDHPWNCPHGRPTMRHIANLGVISQN*"}, - {"NM_000314.4", "MTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCAERHYDTAKFNCRVAQYPFEDHNPPQLELIKPFCEDLDQWLSEDDNHVAAIHCKAGKGRTGVMICAYLLHRGKFLKAQEALDFYGEVRTRDKKGVTIPSQRRYVYYYSYLLKNHLDYRPVALLFHKMMFETIPMFSGGTCNPQFVVCQLKVKIYSSNSGPTRREDKFMYFEFPQPLPVCGDIKVEFFHKQNKMLKKDKMFHFWVNTFFIPGPEETSEKVENGSLCDQEIDSICSIERADNDKEYLVLTLTKNDLDKANKDKANRYFSPNFKVKLYFTKTVEEPSNPEASSSTSVTPDVSDNEPDHYRYSDTTDSDPENEPFDEDQHTQITKV*"}, - }; - - public static bool Validate(ISequenceProvider sequenceProvider, IChromosome chromosome, - string transcriptId, bool onReverseStrand, ITranscriptRegion[] transcriptRegions, IRnaEdit[] rnaEdits, - ITranslation translation, byte startExonPhase = 0) - { - sequenceProvider.LoadChromosome(chromosome); - - if (TranscriptCdnas.TryGetValue(transcriptId, out var expectedCdna)) - { - var cdnaSequence = new CdnaSequence(sequenceProvider.Sequence, translation?.CodingRegion, - transcriptRegions, onReverseStrand, rnaEdits); - string cdna = cdnaSequence.GetCdnaSequence(); - if (cdna != expectedCdna) throw new InvalidDataException($"{transcriptId} cdna is still not right."); - - } - else - { - throw new InvalidDataException($"{transcriptId} cdna not listed"); - } - - - string cds=null; - if (TranscriptCds.TryGetValue(transcriptId, out var expectedCds)) - { - var cdsSequence = new CodingSequence(sequenceProvider.Sequence, translation.CodingRegion, - transcriptRegions, onReverseStrand, startExonPhase, rnaEdits); - cds = cdsSequence.GetCodingSequence(); - - if (cds != expectedCds) throw new InvalidDataException($"{transcriptId} CDS is still not right."); - - } - - if (cds != null && TranscriptPeptides.TryGetValue(transcriptId, out var expectedAA)) - { - var aminoAcids = new AminoAcids(false); - string aa = aminoAcids.TranslateBases(cds, false); - - if (aa != expectedAA) throw new InvalidDataException($"{transcriptId} AA is still not right."); - } - - return true; - } - } -} \ No newline at end of file diff --git a/VariantAnnotation/CommonAssemblyInfo.props b/VariantAnnotation/CommonAssemblyInfo.props index 86a7e40a..c4135c58 100644 --- a/VariantAnnotation/CommonAssemblyInfo.props +++ b/VariantAnnotation/CommonAssemblyInfo.props @@ -1,10 +1,10 @@  Illumina - © 2020 Illumina, Inc. - 3.2.3 - 3.2.3 - 3.2.3 + © 2021 Illumina, Inc. + 3.2.5 + 3.2.5 + 3.2.5 Stromberg, Roy, Lajugie, Jiang, Li, and Kang diff --git a/VariantAnnotation/GeneAnnotation/AnnotatedGene.cs b/VariantAnnotation/GeneAnnotation/AnnotatedGene.cs index c713446e..a7aa86aa 100644 --- a/VariantAnnotation/GeneAnnotation/AnnotatedGene.cs +++ b/VariantAnnotation/GeneAnnotation/AnnotatedGene.cs @@ -1,42 +1,42 @@ -using System; -using System.Text; -using VariantAnnotation.Interface.GeneAnnotation; -using VariantAnnotation.IO; - -namespace VariantAnnotation.GeneAnnotation -{ - public sealed class AnnotatedGene:IAnnotatedGene - { - public string GeneName { get; } - public IGeneAnnotation[] Annotations { get; } - - public AnnotatedGene(string geneName, IGeneAnnotation[] annotations) - { - GeneName = geneName; - Annotations = annotations; - } - - public void SerializeJson(StringBuilder sb) - { - var jsonObject = new JsonObject(sb); - sb.Append(JsonObject.OpenBrace); - jsonObject.AddStringValue("name",GeneName); - foreach (var geneAnnotation in Annotations) - { - if (geneAnnotation.IsArray) - { - jsonObject.AddStringValues(geneAnnotation.DataSource, geneAnnotation.JsonStrings,false); - } - else - { - jsonObject.AddStringValue(geneAnnotation.DataSource, geneAnnotation.JsonStrings[0],false); - } - - } - - sb.Append(JsonObject.CloseBrace); - } - - public int CompareTo(IAnnotatedGene other) => string.Compare(GeneName, other.GeneName, StringComparison.Ordinal); - } +using System; +using System.Text; +using VariantAnnotation.Interface.GeneAnnotation; +using VariantAnnotation.IO; + +namespace VariantAnnotation.GeneAnnotation +{ + public sealed class AnnotatedGene:IAnnotatedGene + { + public string GeneName { get; } + public IGeneAnnotation[] Annotations { get; } + + public AnnotatedGene(string geneName, IGeneAnnotation[] annotations) + { + GeneName = geneName; + Annotations = annotations; + } + + public void SerializeJson(StringBuilder sb) + { + var jsonObject = new JsonObject(sb); + sb.Append(JsonObject.OpenBrace); + jsonObject.AddStringValue("name",GeneName); + foreach (var geneAnnotation in Annotations) + { + if (geneAnnotation.IsArray) + { + jsonObject.AddStringValues(geneAnnotation.DataSource, geneAnnotation.JsonStrings,false); + } + else + { + jsonObject.AddStringValue(geneAnnotation.DataSource, geneAnnotation.JsonStrings[0],false); + } + + } + + sb.Append(JsonObject.CloseBrace); + } + + public int CompareTo(IAnnotatedGene other) => string.Compare(GeneName, other.GeneName, StringComparison.Ordinal); + } } \ No newline at end of file diff --git a/VariantAnnotation/GeneAnnotation/GeneAnnotationProvider.cs b/VariantAnnotation/GeneAnnotation/GeneAnnotationProvider.cs index 39d26503..eb925181 100644 --- a/VariantAnnotation/GeneAnnotation/GeneAnnotationProvider.cs +++ b/VariantAnnotation/GeneAnnotation/GeneAnnotationProvider.cs @@ -1,51 +1,51 @@ -using System.Collections.Generic; -using System.IO; -using System.Linq; -using Genome; -using OptimizedCore; -using VariantAnnotation.Interface.GeneAnnotation; -using VariantAnnotation.Interface.Providers; -using VariantAnnotation.IO; -using VariantAnnotation.NSA; - -namespace VariantAnnotation.GeneAnnotation -{ - public sealed class GeneAnnotationProvider : IGeneAnnotationProvider - { - public string Name { get; } - public GenomeAssembly Assembly => GenomeAssembly.Unknown; - public IEnumerable DataSourceVersions => _ngaReaders.Select(x => x.Version); - - private readonly List _ngaReaders; - - public string Annotate(string geneName) - { - var sb = StringBuilderCache.Acquire(); - var jsonObject = new JsonObject(sb); - - sb.Append(JsonObject.OpenBrace); - jsonObject.AddStringValue("name", geneName); - - bool hasAnnotation = false; - foreach (NgaReader ngaReader in _ngaReaders) - { - var jsonString = ngaReader.GetAnnotation(geneName); - jsonObject.AddStringValue(ngaReader.JsonKey, jsonString, false); - if (!string.IsNullOrEmpty(jsonString)) hasAnnotation = true; - } - - if (!hasAnnotation) return null; - - sb.Append(JsonObject.CloseBrace); - - return StringBuilderCache.GetStringAndRelease(sb); - } - - public GeneAnnotationProvider(IEnumerable dbStreams) - { - Name = "Gene annotation provider"; - _ngaReaders = new List(); - foreach (var dbStream in dbStreams) _ngaReaders.Add(new NgaReader(dbStream)); - } - } +using System.Collections.Generic; +using System.IO; +using System.Linq; +using Genome; +using OptimizedCore; +using VariantAnnotation.Interface.GeneAnnotation; +using VariantAnnotation.Interface.Providers; +using VariantAnnotation.IO; +using VariantAnnotation.NSA; + +namespace VariantAnnotation.GeneAnnotation +{ + public sealed class GeneAnnotationProvider : IGeneAnnotationProvider + { + public string Name { get; } + public GenomeAssembly Assembly => GenomeAssembly.Unknown; + public IEnumerable DataSourceVersions => _ngaReaders.Select(x => x.Version); + + private readonly List _ngaReaders; + + public string Annotate(string geneName) + { + var sb = StringBuilderCache.Acquire(); + var jsonObject = new JsonObject(sb); + + sb.Append(JsonObject.OpenBrace); + jsonObject.AddStringValue("name", geneName); + + bool hasAnnotation = false; + foreach (NgaReader ngaReader in _ngaReaders) + { + var jsonString = ngaReader.GetAnnotation(geneName); + jsonObject.AddStringValue(ngaReader.JsonKey, jsonString, false); + if (!string.IsNullOrEmpty(jsonString)) hasAnnotation = true; + } + + if (!hasAnnotation) return null; + + sb.Append(JsonObject.CloseBrace); + + return StringBuilderCache.GetStringAndRelease(sb); + } + + public GeneAnnotationProvider(IEnumerable dbStreams) + { + Name = "Gene annotation provider"; + _ngaReaders = new List(); + foreach (var dbStream in dbStreams) _ngaReaders.Add(new NgaReader(dbStream)); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/GeneAnnotation/OmimEntry.cs b/VariantAnnotation/GeneAnnotation/OmimEntry.cs index 901fa0d6..3dfdbf08 100644 --- a/VariantAnnotation/GeneAnnotation/OmimEntry.cs +++ b/VariantAnnotation/GeneAnnotation/OmimEntry.cs @@ -1,141 +1,141 @@ -using System.Collections.Generic; -using System.Linq; -using System.Text; -using IO; -using OptimizedCore; -using VariantAnnotation.Interface.IO; -using VariantAnnotation.IO; - -namespace VariantAnnotation.GeneAnnotation -{ - public sealed class OmimEntry - { - public readonly string GeneSymbol; - private readonly string _description; - private readonly int _mimNumber; - private readonly List _phenotypes; - - public OmimEntry(string geneSymbol, string description, int mimNumber, List phenotypes) - { - GeneSymbol = geneSymbol; - _description = description; - _mimNumber = mimNumber; - _phenotypes = phenotypes; - } - - public override string ToString() - { - var sb = StringBuilderCache.Acquire(); - var jsonObject = new JsonObject(sb); - - sb.Append(JsonObject.OpenBrace); - - jsonObject.AddIntValue("mimNumber", _mimNumber); - jsonObject.AddStringValue("description", _description?.Replace(@"\'", @"'")); - if (_phenotypes.Count > 0) jsonObject.AddObjectValues("phenotypes", _phenotypes); - sb.Append(JsonObject.CloseBrace.ToString()); - - return StringBuilderCache.GetStringAndRelease(sb); - } - - public void Write(ExtendedBinaryWriter writer) - { - writer.WriteOptAscii(GeneSymbol); - writer.WriteOptAscii(_description); - writer.WriteOpt(_mimNumber); - writer.WriteOpt(_phenotypes.Count); - foreach (var phenotype in _phenotypes) phenotype.Write(writer); - } - - public static OmimEntry Read(ExtendedBinaryReader reader) - { - var geneSymbol = reader.ReadAsciiString(); - var description = reader.ReadAsciiString(); - var mimNumber = reader.ReadOptInt32(); - var phenotypeCount = reader.ReadOptInt32(); - var phenotypes = new List(); - - for (var i = 0; i < phenotypeCount; i++) - { - phenotypes.Add(Phenotype.ReadPhenotype(reader)); - } - - return new OmimEntry(geneSymbol, description, mimNumber, phenotypes); - } - - public sealed class Phenotype : IJsonSerializer - { - private readonly int _mimNumber; - private readonly string _phenotype; - private readonly Mapping _mapping; - private readonly Comments _comments; - private readonly HashSet _inheritance; - - public Phenotype(int mimNumber, string phenotype, Mapping mapping, Comments comments, HashSet inheritance) - { - _mimNumber = mimNumber; - _phenotype = phenotype; - _mapping = mapping; - _comments = comments; - _inheritance = inheritance; - } - - public void SerializeJson(StringBuilder sb) - { - var jsonObject = new JsonObject(sb); - - sb.Append(JsonObject.OpenBrace); - - if (_mimNumber >= 100000) jsonObject.AddIntValue("mimNumber", _mimNumber); - jsonObject.AddStringValue("phenotype", _phenotype); - if (_mapping != Mapping.unknown) jsonObject.AddStringValue("mapping", _mapping.ToString().Replace("_", " ")); - if (_inheritance != null && _inheritance.Count > 0) jsonObject.AddStringValues("inheritances", _inheritance); - if (_comments != Comments.unknown) jsonObject.AddStringValue("comments", _comments.ToString().Replace("_", " ")); - - sb.Append(JsonObject.CloseBrace); - } - - public static Phenotype ReadPhenotype(ExtendedBinaryReader reader) - { - var mimNumber = reader.ReadOptInt32(); - var phenotype = reader.ReadAsciiString(); - var mapping = (Mapping)reader.ReadByte(); - var comments = (Comments)reader.ReadByte(); - var inheritance = reader.ReadOptArray(reader.ReadAsciiString); - var inheritances = inheritance == null ? null : new HashSet(inheritance); - - return new Phenotype(mimNumber, phenotype, mapping, comments, inheritances); - } - - public void Write(ExtendedBinaryWriter writer) - { - writer.WriteOpt(_mimNumber); - writer.WriteOptAscii(_phenotype); - writer.Write((byte)_mapping); - writer.Write((byte)_comments); - writer.WriteOptArray(_inheritance.ToArray(), writer.WriteOptAscii); - } - } - - public enum Mapping : byte - { - // ReSharper disable InconsistentNaming - unknown, - mapping_of_the_wildtype_gene, - disease_phenotype_itself_was_mapped, - molecular_basis_of_the_disorder_is_known, - chromosome_deletion_or_duplication_syndrome - // ReSharper restore InconsistentNaming - } - - public enum Comments : byte - { - // ReSharper disable InconsistentNaming - unknown, - nondiseases, - contribute_to_susceptibility_to_multifactorial_disorders_or_to_susceptibility_to_infection, - unconfirmed_or_possibly_spurious_mapping - // ReSharper restore InconsistentNaming - } - } +using System.Collections.Generic; +using System.Linq; +using System.Text; +using IO; +using OptimizedCore; +using VariantAnnotation.Interface.IO; +using VariantAnnotation.IO; + +namespace VariantAnnotation.GeneAnnotation +{ + public sealed class OmimEntry + { + public readonly string GeneSymbol; + private readonly string _description; + private readonly int _mimNumber; + private readonly List _phenotypes; + + public OmimEntry(string geneSymbol, string description, int mimNumber, List phenotypes) + { + GeneSymbol = geneSymbol; + _description = description; + _mimNumber = mimNumber; + _phenotypes = phenotypes; + } + + public override string ToString() + { + var sb = StringBuilderCache.Acquire(); + var jsonObject = new JsonObject(sb); + + sb.Append(JsonObject.OpenBrace); + + jsonObject.AddIntValue("mimNumber", _mimNumber); + jsonObject.AddStringValue("description", _description?.Replace(@"\'", @"'")); + if (_phenotypes.Count > 0) jsonObject.AddObjectValues("phenotypes", _phenotypes); + sb.Append(JsonObject.CloseBrace.ToString()); + + return StringBuilderCache.GetStringAndRelease(sb); + } + + public void Write(ExtendedBinaryWriter writer) + { + writer.WriteOptAscii(GeneSymbol); + writer.WriteOptAscii(_description); + writer.WriteOpt(_mimNumber); + writer.WriteOpt(_phenotypes.Count); + foreach (var phenotype in _phenotypes) phenotype.Write(writer); + } + + public static OmimEntry Read(ExtendedBinaryReader reader) + { + var geneSymbol = reader.ReadAsciiString(); + var description = reader.ReadAsciiString(); + var mimNumber = reader.ReadOptInt32(); + var phenotypeCount = reader.ReadOptInt32(); + var phenotypes = new List(); + + for (var i = 0; i < phenotypeCount; i++) + { + phenotypes.Add(Phenotype.ReadPhenotype(reader)); + } + + return new OmimEntry(geneSymbol, description, mimNumber, phenotypes); + } + + public sealed class Phenotype : IJsonSerializer + { + private readonly int _mimNumber; + private readonly string _phenotype; + private readonly Mapping _mapping; + private readonly Comments _comments; + private readonly HashSet _inheritance; + + public Phenotype(int mimNumber, string phenotype, Mapping mapping, Comments comments, HashSet inheritance) + { + _mimNumber = mimNumber; + _phenotype = phenotype; + _mapping = mapping; + _comments = comments; + _inheritance = inheritance; + } + + public void SerializeJson(StringBuilder sb) + { + var jsonObject = new JsonObject(sb); + + sb.Append(JsonObject.OpenBrace); + + if (_mimNumber >= 100000) jsonObject.AddIntValue("mimNumber", _mimNumber); + jsonObject.AddStringValue("phenotype", _phenotype); + if (_mapping != Mapping.unknown) jsonObject.AddStringValue("mapping", _mapping.ToString().Replace("_", " ")); + if (_inheritance != null && _inheritance.Count > 0) jsonObject.AddStringValues("inheritances", _inheritance); + if (_comments != Comments.unknown) jsonObject.AddStringValue("comments", _comments.ToString().Replace("_", " ")); + + sb.Append(JsonObject.CloseBrace); + } + + public static Phenotype ReadPhenotype(ExtendedBinaryReader reader) + { + var mimNumber = reader.ReadOptInt32(); + var phenotype = reader.ReadAsciiString(); + var mapping = (Mapping)reader.ReadByte(); + var comments = (Comments)reader.ReadByte(); + var inheritance = reader.ReadOptArray(reader.ReadAsciiString); + var inheritances = inheritance == null ? null : new HashSet(inheritance); + + return new Phenotype(mimNumber, phenotype, mapping, comments, inheritances); + } + + public void Write(ExtendedBinaryWriter writer) + { + writer.WriteOpt(_mimNumber); + writer.WriteOptAscii(_phenotype); + writer.Write((byte)_mapping); + writer.Write((byte)_comments); + writer.WriteOptArray(_inheritance.ToArray(), writer.WriteOptAscii); + } + } + + public enum Mapping : byte + { + // ReSharper disable InconsistentNaming + unknown, + mapping_of_the_wildtype_gene, + disease_phenotype_itself_was_mapped, + molecular_basis_of_the_disorder_is_known, + chromosome_deletion_or_duplication_syndrome + // ReSharper restore InconsistentNaming + } + + public enum Comments : byte + { + // ReSharper disable InconsistentNaming + unknown, + nondiseases, + contribute_to_susceptibility_to_multifactorial_disorders_or_to_susceptibility_to_infection, + unconfirmed_or_possibly_spurious_mapping + // ReSharper restore InconsistentNaming + } + } } \ No newline at end of file diff --git a/VariantAnnotation/IO/Caches/CacheConstants.cs b/VariantAnnotation/IO/Caches/CacheConstants.cs index 6c25e7fb..e9e6c5ca 100644 --- a/VariantAnnotation/IO/Caches/CacheConstants.cs +++ b/VariantAnnotation/IO/Caches/CacheConstants.cs @@ -1,24 +1,24 @@ -namespace VariantAnnotation.IO.Caches -{ - public static class CacheConstants - { - public const uint GuardInt = 4041327495; // 87c3e1f0 - public const string Identifier = "NirvanaDB"; - - // increment the schema version when the file structures are updated - // N.B. we only need to regenerate unit tests when the schema version is incremented - // e.g. adding a new feature like regulatory elements - public const ushort SchemaVersion = 21; - - // increment the data version when the contents are updated - // e.g. a bug is fixed in SIFT parsing or if transcripts are filtered differently - public const ushort DataVersion = 26; - - public static string TranscriptPath(string prefix) => Combine(prefix, ".transcripts.ndb"); - public static string SiftPath(string prefix) => Combine(prefix, ".sift.ndb"); - public static string PolyPhenPath(string prefix) => Combine(prefix, ".polyphen.ndb"); - public static string BasesPath(string prefix) => Combine(prefix, ".bases"); - - private static string Combine(string prefix, string suffix) => prefix == null ? null : prefix + suffix; - } +namespace VariantAnnotation.IO.Caches +{ + public static class CacheConstants + { + public const uint GuardInt = 4041327495; // 87c3e1f0 + public const string Identifier = "NirvanaDB"; + + // increment the schema version when the file structures are updated + // N.B. we only need to regenerate unit tests when the schema version is incremented + // e.g. adding a new feature like regulatory elements + public const ushort SchemaVersion = 21; + + // increment the data version when the contents are updated + // e.g. a bug is fixed in SIFT parsing or if transcripts are filtered differently + public const ushort DataVersion = 26; + + public static string TranscriptPath(string prefix) => Combine(prefix, ".transcripts.ndb"); + public static string SiftPath(string prefix) => Combine(prefix, ".sift.ndb"); + public static string PolyPhenPath(string prefix) => Combine(prefix, ".polyphen.ndb"); + public static string BasesPath(string prefix) => Combine(prefix, ".bases"); + + private static string Combine(string prefix, string suffix) => prefix == null ? null : prefix + suffix; + } } \ No newline at end of file diff --git a/VariantAnnotation/IO/Caches/CacheHeader.cs b/VariantAnnotation/IO/Caches/CacheHeader.cs index bb444eec..372d7858 100644 --- a/VariantAnnotation/IO/Caches/CacheHeader.cs +++ b/VariantAnnotation/IO/Caches/CacheHeader.cs @@ -1,37 +1,37 @@ -using System.IO; -using System.Text; - -namespace VariantAnnotation.IO.Caches -{ - public sealed class CacheHeader : Header - { - public readonly TranscriptCacheCustomHeader Custom; - - public CacheHeader(Header header, TranscriptCacheCustomHeader customHeader) : base(header.Identifier, - header.SchemaVersion, header.DataVersion, header.Source, header.CreationTimeTicks, - header.Assembly) - { - Custom = customHeader; - } - - public new void Write(BinaryWriter writer) - { - base.Write(writer); - Custom.Write(writer); - } - - public static CacheHeader Read(Stream stream) - { - CacheHeader header; - - using (var reader = new BinaryReader(stream, Encoding.Default, true)) - { - var baseHeader = Read(reader); - var customHeader = TranscriptCacheCustomHeader.Read(reader); - header = new CacheHeader(baseHeader, customHeader); - } - - return header; - } - } -} +using System.IO; +using System.Text; + +namespace VariantAnnotation.IO.Caches +{ + public sealed class CacheHeader : Header + { + public readonly TranscriptCacheCustomHeader Custom; + + public CacheHeader(Header header, TranscriptCacheCustomHeader customHeader) : base(header.Identifier, + header.SchemaVersion, header.DataVersion, header.Source, header.CreationTimeTicks, + header.Assembly) + { + Custom = customHeader; + } + + public new void Write(BinaryWriter writer) + { + base.Write(writer); + Custom.Write(writer); + } + + public static CacheHeader Read(Stream stream) + { + CacheHeader header; + + using (var reader = new BinaryReader(stream, Encoding.Default, true)) + { + var baseHeader = Read(reader); + var customHeader = TranscriptCacheCustomHeader.Read(reader); + header = new CacheHeader(baseHeader, customHeader); + } + + return header; + } + } +} diff --git a/VariantAnnotation/IO/Caches/Header.cs b/VariantAnnotation/IO/Caches/Header.cs index 07feadc4..cc77d68e 100644 --- a/VariantAnnotation/IO/Caches/Header.cs +++ b/VariantAnnotation/IO/Caches/Header.cs @@ -1,49 +1,49 @@ -using System.IO; -using Genome; -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.IO.Caches -{ - public class Header - { - public readonly string Identifier; - public readonly ushort SchemaVersion; - public readonly ushort DataVersion; - public readonly Source Source; - public readonly long CreationTimeTicks; - public readonly GenomeAssembly Assembly; - - public Header(string identifier, ushort schemaVersion, ushort dataVersion, Source source, - long creationTimeTicks, GenomeAssembly genomeAssembly) - { - Identifier = identifier; - SchemaVersion = schemaVersion; - DataVersion = dataVersion; - Source = source; - CreationTimeTicks = creationTimeTicks; - Assembly = genomeAssembly; - } - - protected void Write(BinaryWriter writer) - { - writer.Write(Identifier); - writer.Write(SchemaVersion); - writer.Write(DataVersion); - writer.Write((byte)Source); - writer.Write(CreationTimeTicks); - writer.Write((byte)Assembly); - } - - protected static Header Read(BinaryReader reader) - { - string identifier = reader.ReadString(); - ushort schemaVersion = reader.ReadUInt16(); - ushort dataVersion = reader.ReadUInt16(); - var source = (Source)reader.ReadByte(); - long creationTimeTicks = reader.ReadInt64(); - var genomeAssembly = (GenomeAssembly)reader.ReadByte(); - - return new Header(identifier, schemaVersion, dataVersion, source, creationTimeTicks, genomeAssembly); - } - } -} +using System.IO; +using Genome; +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.IO.Caches +{ + public class Header + { + public readonly string Identifier; + public readonly ushort SchemaVersion; + public readonly ushort DataVersion; + public readonly Source Source; + public readonly long CreationTimeTicks; + public readonly GenomeAssembly Assembly; + + public Header(string identifier, ushort schemaVersion, ushort dataVersion, Source source, + long creationTimeTicks, GenomeAssembly genomeAssembly) + { + Identifier = identifier; + SchemaVersion = schemaVersion; + DataVersion = dataVersion; + Source = source; + CreationTimeTicks = creationTimeTicks; + Assembly = genomeAssembly; + } + + protected void Write(BinaryWriter writer) + { + writer.Write(Identifier); + writer.Write(SchemaVersion); + writer.Write(DataVersion); + writer.Write((byte)Source); + writer.Write(CreationTimeTicks); + writer.Write((byte)Assembly); + } + + protected static Header Read(BinaryReader reader) + { + string identifier = reader.ReadString(); + ushort schemaVersion = reader.ReadUInt16(); + ushort dataVersion = reader.ReadUInt16(); + var source = (Source)reader.ReadByte(); + long creationTimeTicks = reader.ReadInt64(); + var genomeAssembly = (GenomeAssembly)reader.ReadByte(); + + return new Header(identifier, schemaVersion, dataVersion, source, creationTimeTicks, genomeAssembly); + } + } +} diff --git a/VariantAnnotation/IO/Caches/PredictionCacheCustomHeader.cs b/VariantAnnotation/IO/Caches/PredictionCacheCustomHeader.cs index 3f91cb7c..c3f857b4 100644 --- a/VariantAnnotation/IO/Caches/PredictionCacheCustomHeader.cs +++ b/VariantAnnotation/IO/Caches/PredictionCacheCustomHeader.cs @@ -1,26 +1,26 @@ -using System.IO; -using VariantAnnotation.Caches.DataStructures; - -namespace VariantAnnotation.IO.Caches -{ - public sealed class PredictionCacheCustomHeader - { - public readonly IndexEntry[] Entries; - - public PredictionCacheCustomHeader(IndexEntry[] entries) => Entries = entries; - - public void Write(BinaryWriter writer) - { - writer.Write((ushort)Entries.Length); - foreach (var entry in Entries) entry.Write(writer); - } - - public static PredictionCacheCustomHeader Read(BinaryReader reader) - { - ushort numReferenceSeqs = reader.ReadUInt16(); - var entries = new IndexEntry[numReferenceSeqs]; - for (var i = 0; i < numReferenceSeqs; i++) entries[i].Read(reader); - return new PredictionCacheCustomHeader(entries); - } - } +using System.IO; +using VariantAnnotation.Caches.DataStructures; + +namespace VariantAnnotation.IO.Caches +{ + public sealed class PredictionCacheCustomHeader + { + public readonly IndexEntry[] Entries; + + public PredictionCacheCustomHeader(IndexEntry[] entries) => Entries = entries; + + public void Write(BinaryWriter writer) + { + writer.Write((ushort)Entries.Length); + foreach (var entry in Entries) entry.Write(writer); + } + + public static PredictionCacheCustomHeader Read(BinaryReader reader) + { + ushort numReferenceSeqs = reader.ReadUInt16(); + var entries = new IndexEntry[numReferenceSeqs]; + for (var i = 0; i < numReferenceSeqs; i++) entries[i].Read(reader); + return new PredictionCacheCustomHeader(entries); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/IO/Caches/PredictionCacheReader.cs b/VariantAnnotation/IO/Caches/PredictionCacheReader.cs index 8b2c244d..132d0e3c 100644 --- a/VariantAnnotation/IO/Caches/PredictionCacheReader.cs +++ b/VariantAnnotation/IO/Caches/PredictionCacheReader.cs @@ -1,70 +1,70 @@ -using System; -using System.Collections.Immutable; -using System.IO; -using System.IO.Compression; -using System.Text; -using Compression.Algorithms; -using Compression.FileHandling; -using IO; -using VariantAnnotation.Caches; -using VariantAnnotation.Caches.DataStructures; -using VariantAnnotation.Interface.Caches; - -namespace VariantAnnotation.IO.Caches -{ - public sealed class PredictionCacheReader : IDisposable - { - private readonly ExtendedBinaryReader _reader; - private readonly BlockStream _blockStream; - private readonly ImmutableArray _predictionDescriptions; - private readonly IndexEntry[] _indexEntries; - private readonly int _numRefSeqs; - public readonly PredictionHeader Header; - - public PredictionCacheReader(Stream stream, ImmutableArray predictionDescriptions) - { - _blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Decompress); - Header = PredictionHeader.Read(stream, _blockStream); - - _reader = new ExtendedBinaryReader(_blockStream, Encoding.Default, true); - _predictionDescriptions = predictionDescriptions; - - _indexEntries = Header.Custom.Entries; - _numRefSeqs = _indexEntries.Length; - } - - public void Dispose() - { - _reader.Dispose(); - _blockStream.Dispose(); - } - - /// - /// parses the database cache file and populates the specified lists and interval trees - /// - public IPredictionCache Read(ushort refIndex) - { - if (refIndex >= _numRefSeqs) return null; - var predictions = GetPredictions(refIndex); - return new PredictionCache(Header.Assembly, predictions, _predictionDescriptions); - } - - public Prediction[] GetPredictions(ushort refIndex) - { - var indexEntry = _indexEntries[refIndex]; - - _blockStream.SetBlockPosition(indexEntry.FileOffset); - - var predictions = new Prediction[indexEntry.Count]; - for (var i = 0; i < indexEntry.Count; i++) predictions[i] = Prediction.Read(_reader, Header.LookupTable); - - return predictions; - } - - public static readonly ImmutableArray SiftDescriptions = ImmutableArray.Create("tolerated", - "deleterious", "tolerated - low confidence", "deleterious - low confidence"); - - public static readonly ImmutableArray PolyphenDescriptions = - ImmutableArray.Create("probably damaging", "possibly damaging", "benign", "unknown"); - } +using System; +using System.Collections.Immutable; +using System.IO; +using System.IO.Compression; +using System.Text; +using Compression.Algorithms; +using Compression.FileHandling; +using IO; +using VariantAnnotation.Caches; +using VariantAnnotation.Caches.DataStructures; +using VariantAnnotation.Interface.Caches; + +namespace VariantAnnotation.IO.Caches +{ + public sealed class PredictionCacheReader : IDisposable + { + private readonly ExtendedBinaryReader _reader; + private readonly BlockStream _blockStream; + private readonly ImmutableArray _predictionDescriptions; + private readonly IndexEntry[] _indexEntries; + private readonly int _numRefSeqs; + public readonly PredictionHeader Header; + + public PredictionCacheReader(Stream stream, ImmutableArray predictionDescriptions) + { + _blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Decompress); + Header = PredictionHeader.Read(stream, _blockStream); + + _reader = new ExtendedBinaryReader(_blockStream, Encoding.Default, true); + _predictionDescriptions = predictionDescriptions; + + _indexEntries = Header.Custom.Entries; + _numRefSeqs = _indexEntries.Length; + } + + public void Dispose() + { + _reader.Dispose(); + _blockStream.Dispose(); + } + + /// + /// parses the database cache file and populates the specified lists and interval trees + /// + public IPredictionCache Read(ushort refIndex) + { + if (refIndex >= _numRefSeqs) return null; + var predictions = GetPredictions(refIndex); + return new PredictionCache(Header.Assembly, predictions, _predictionDescriptions); + } + + public Prediction[] GetPredictions(ushort refIndex) + { + var indexEntry = _indexEntries[refIndex]; + + _blockStream.SetBlockPosition(indexEntry.FileOffset); + + var predictions = new Prediction[indexEntry.Count]; + for (var i = 0; i < indexEntry.Count; i++) predictions[i] = Prediction.Read(_reader, Header.LookupTable); + + return predictions; + } + + public static readonly ImmutableArray SiftDescriptions = ImmutableArray.Create("tolerated", + "deleterious", "tolerated - low confidence", "deleterious - low confidence"); + + public static readonly ImmutableArray PolyphenDescriptions = + ImmutableArray.Create("probably damaging", "possibly damaging", "benign", "unknown"); + } } \ No newline at end of file diff --git a/VariantAnnotation/IO/Caches/PredictionHeader.cs b/VariantAnnotation/IO/Caches/PredictionHeader.cs index 392cee11..96edffa4 100644 --- a/VariantAnnotation/IO/Caches/PredictionHeader.cs +++ b/VariantAnnotation/IO/Caches/PredictionHeader.cs @@ -1,56 +1,56 @@ -using System.IO; -using System.Text; -using Compression.FileHandling; -using IO; -using VariantAnnotation.Caches.DataStructures; - -namespace VariantAnnotation.IO.Caches -{ - public sealed class PredictionHeader : Header - { - public readonly PredictionCacheCustomHeader Custom; - public readonly Prediction.Entry[] LookupTable; - - public PredictionHeader(Header header, PredictionCacheCustomHeader customHeader, Prediction.Entry[] lookupTable) - : base(header.Identifier, header.SchemaVersion, header.DataVersion, header.Source, - header.CreationTimeTicks, header.Assembly) - { - Custom = customHeader; - LookupTable = lookupTable; - } - - public new void Write(BinaryWriter writer) - { - base.Write(writer); - Custom.Write(writer); - } - - public static PredictionHeader Read(Stream stream, BlockStream blockStream) - { - Header baseHeader; - PredictionCacheCustomHeader customHeader; - Prediction.Entry[] lookupTable; - - using (var reader = new BinaryReader(stream, Encoding.Default, true)) - { - baseHeader = Read(reader); - customHeader = PredictionCacheCustomHeader.Read(reader); - } - - using (var reader = new ExtendedBinaryReader(blockStream, Encoding.Default, true)) - { - lookupTable = ReadLookupTable(reader); - } - - return new PredictionHeader(baseHeader, customHeader, lookupTable); - } - - private static Prediction.Entry[] ReadLookupTable(ExtendedBinaryReader reader) - { - int numEntries = reader.ReadInt32(); - var lut = new Prediction.Entry[numEntries]; - for (var i = 0; i < numEntries; i++) lut[i] = Prediction.Entry.ReadEntry(reader); - return lut; - } - } -} +using System.IO; +using System.Text; +using Compression.FileHandling; +using IO; +using VariantAnnotation.Caches.DataStructures; + +namespace VariantAnnotation.IO.Caches +{ + public sealed class PredictionHeader : Header + { + public readonly PredictionCacheCustomHeader Custom; + public readonly Prediction.Entry[] LookupTable; + + public PredictionHeader(Header header, PredictionCacheCustomHeader customHeader, Prediction.Entry[] lookupTable) + : base(header.Identifier, header.SchemaVersion, header.DataVersion, header.Source, + header.CreationTimeTicks, header.Assembly) + { + Custom = customHeader; + LookupTable = lookupTable; + } + + public new void Write(BinaryWriter writer) + { + base.Write(writer); + Custom.Write(writer); + } + + public static PredictionHeader Read(Stream stream, BlockStream blockStream) + { + Header baseHeader; + PredictionCacheCustomHeader customHeader; + Prediction.Entry[] lookupTable; + + using (var reader = new BinaryReader(stream, Encoding.Default, true)) + { + baseHeader = Read(reader); + customHeader = PredictionCacheCustomHeader.Read(reader); + } + + using (var reader = new ExtendedBinaryReader(blockStream, Encoding.Default, true)) + { + lookupTable = ReadLookupTable(reader); + } + + return new PredictionHeader(baseHeader, customHeader, lookupTable); + } + + private static Prediction.Entry[] ReadLookupTable(ExtendedBinaryReader reader) + { + int numEntries = reader.ReadInt32(); + var lut = new Prediction.Entry[numEntries]; + for (var i = 0; i < numEntries; i++) lut[i] = Prediction.Entry.ReadEntry(reader); + return lut; + } + } +} diff --git a/VariantAnnotation/IO/Caches/TranscriptCacheCustomHeader.cs b/VariantAnnotation/IO/Caches/TranscriptCacheCustomHeader.cs index 6020cb7e..bfe3c966 100644 --- a/VariantAnnotation/IO/Caches/TranscriptCacheCustomHeader.cs +++ b/VariantAnnotation/IO/Caches/TranscriptCacheCustomHeader.cs @@ -1,29 +1,29 @@ -using System.IO; - -namespace VariantAnnotation.IO.Caches -{ - public sealed class TranscriptCacheCustomHeader - { - public readonly ushort VepVersion; - private readonly long _vepReleaseTicks; - - public TranscriptCacheCustomHeader(ushort vepVersion, long vepReleaseTicks) - { - VepVersion = vepVersion; - _vepReleaseTicks = vepReleaseTicks; - } - - public void Write(BinaryWriter writer) - { - writer.Write(_vepReleaseTicks); - writer.Write(VepVersion); - } - - public static TranscriptCacheCustomHeader Read(BinaryReader reader) - { - long vepReleaseTicks = reader.ReadInt64(); - ushort vepVersion = reader.ReadUInt16(); - return new TranscriptCacheCustomHeader(vepVersion, vepReleaseTicks); - } - } +using System.IO; + +namespace VariantAnnotation.IO.Caches +{ + public sealed class TranscriptCacheCustomHeader + { + public readonly ushort VepVersion; + private readonly long _vepReleaseTicks; + + public TranscriptCacheCustomHeader(ushort vepVersion, long vepReleaseTicks) + { + VepVersion = vepVersion; + _vepReleaseTicks = vepReleaseTicks; + } + + public void Write(BinaryWriter writer) + { + writer.Write(_vepReleaseTicks); + writer.Write(VepVersion); + } + + public static TranscriptCacheCustomHeader Read(BinaryReader reader) + { + long vepReleaseTicks = reader.ReadInt64(); + ushort vepVersion = reader.ReadUInt16(); + return new TranscriptCacheCustomHeader(vepVersion, vepReleaseTicks); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/IO/Caches/TranscriptCacheReader.cs b/VariantAnnotation/IO/Caches/TranscriptCacheReader.cs index ba2cc041..5a82f096 100644 --- a/VariantAnnotation/IO/Caches/TranscriptCacheReader.cs +++ b/VariantAnnotation/IO/Caches/TranscriptCacheReader.cs @@ -1,92 +1,92 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.IO.Compression; -using Compression.Algorithms; -using Compression.FileHandling; -using Genome; -using Intervals; -using IO; -using VariantAnnotation.Caches; -using VariantAnnotation.Caches.DataStructures; -using VariantAnnotation.Interface.Providers; - -namespace VariantAnnotation.IO.Caches -{ - public sealed class TranscriptCacheReader : IDisposable - { - private readonly BufferedBinaryReader _reader; - public readonly CacheHeader Header; - - public TranscriptCacheReader(Stream stream) - { - Header = CacheHeader.Read(stream); - var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Decompress); - _reader = new BufferedBinaryReader(blockStream); - } - - public void Dispose() => _reader.Dispose(); - - /// - /// parses the database cache file and populates the specified lists and interval trees - /// - // SET-362 DEBUG: Remove the sequenceProvider argument in the future - public TranscriptCacheData Read(ISequenceProvider sequenceProvider, IDictionary refIndexToChromosome) - { - var genes = ReadItems(_reader, () => Gene.Read(_reader, refIndexToChromosome)); - var transcriptRegions = ReadItems(_reader, () => TranscriptRegion.Read(_reader)); - var mirnas = ReadItems(_reader, () => IntervalExtensions.Read(_reader)); - var peptideSeqs = ReadItems(_reader, () => _reader.ReadAsciiString()); - var regulatoryRegions = ReadIntervals(_reader, () => RegulatoryRegion.Read(_reader, refIndexToChromosome)); - var transcripts = ReadIntervals(_reader, () => Transcript.Read(_reader, refIndexToChromosome, genes, transcriptRegions, mirnas, peptideSeqs, sequenceProvider)); - - return new TranscriptCacheData(Header, genes, transcriptRegions, mirnas, peptideSeqs, transcripts, regulatoryRegions); - } - - private static IntervalArray[] ReadIntervals(IBufferedBinaryReader reader, Func readMethod) where T : IInterval - { - var numRefSeqs = reader.ReadOptInt32(); - var intervalArrays = new IntervalArray[numRefSeqs]; - - for (int refSeqIndex = 0; refSeqIndex < numRefSeqs; refSeqIndex++) - { - var numItems = reader.ReadOptInt32(); - if (numItems == 0) continue; - - var intervals = new Interval[numItems]; - - for (int i = 0; i < numItems; i++) - { - var item = readMethod(); - intervals[i] = new Interval(item.Start, item.End, item); - } - - intervalArrays[refSeqIndex] = new IntervalArray(intervals); - } - - CheckGuard(reader); - return intervalArrays; - } - - internal static T[] ReadItems(IBufferedBinaryReader reader, Func readMethod) - { - var numItems = reader.ReadOptInt32(); - var items = new T[numItems]; - for (int i = 0; i < numItems; i++) items[i] = readMethod(); - CheckGuard(reader); - return items; - } - - /// - /// check if the section guard is in place - /// - internal static void CheckGuard(IBufferedBinaryReader reader) - { - uint observedGuard = reader.ReadUInt32(); - if (observedGuard != CacheConstants.GuardInt) - { - throw new InvalidDataException($"Expected a guard integer ({CacheConstants.GuardInt}), but found another value: ({observedGuard})"); - } - } - } +using System; +using System.Collections.Generic; +using System.IO; +using System.IO.Compression; +using Compression.Algorithms; +using Compression.FileHandling; +using Genome; +using Intervals; +using IO; +using VariantAnnotation.Caches; +using VariantAnnotation.Caches.DataStructures; +using VariantAnnotation.Interface.Providers; + +namespace VariantAnnotation.IO.Caches +{ + public sealed class TranscriptCacheReader : IDisposable + { + private readonly BufferedBinaryReader _reader; + public readonly CacheHeader Header; + + public TranscriptCacheReader(Stream stream) + { + Header = CacheHeader.Read(stream); + var blockStream = new BlockStream(new Zstandard(), stream, CompressionMode.Decompress); + _reader = new BufferedBinaryReader(blockStream); + } + + public void Dispose() => _reader.Dispose(); + + /// + /// parses the database cache file and populates the specified lists and interval trees + /// + // SET-362 DEBUG: Remove the sequenceProvider argument in the future + public TranscriptCacheData Read(ISequenceProvider sequenceProvider, IDictionary refIndexToChromosome) + { + var genes = ReadItems(_reader, () => Gene.Read(_reader, refIndexToChromosome)); + var transcriptRegions = ReadItems(_reader, () => TranscriptRegion.Read(_reader)); + var mirnas = ReadItems(_reader, () => IntervalExtensions.Read(_reader)); + var peptideSeqs = ReadItems(_reader, () => _reader.ReadAsciiString()); + var regulatoryRegions = ReadIntervals(_reader, () => RegulatoryRegion.Read(_reader, refIndexToChromosome)); + var transcripts = ReadIntervals(_reader, () => Transcript.Read(_reader, refIndexToChromosome, genes, transcriptRegions, mirnas, peptideSeqs, sequenceProvider)); + + return new TranscriptCacheData(Header, genes, transcriptRegions, mirnas, peptideSeqs, transcripts, regulatoryRegions); + } + + private static IntervalArray[] ReadIntervals(IBufferedBinaryReader reader, Func readMethod) where T : IInterval + { + var numRefSeqs = reader.ReadOptInt32(); + var intervalArrays = new IntervalArray[numRefSeqs]; + + for (int refSeqIndex = 0; refSeqIndex < numRefSeqs; refSeqIndex++) + { + var numItems = reader.ReadOptInt32(); + if (numItems == 0) continue; + + var intervals = new Interval[numItems]; + + for (int i = 0; i < numItems; i++) + { + var item = readMethod(); + intervals[i] = new Interval(item.Start, item.End, item); + } + + intervalArrays[refSeqIndex] = new IntervalArray(intervals); + } + + CheckGuard(reader); + return intervalArrays; + } + + internal static T[] ReadItems(IBufferedBinaryReader reader, Func readMethod) + { + var numItems = reader.ReadOptInt32(); + var items = new T[numItems]; + for (int i = 0; i < numItems; i++) items[i] = readMethod(); + CheckGuard(reader); + return items; + } + + /// + /// check if the section guard is in place + /// + internal static void CheckGuard(IBufferedBinaryReader reader) + { + uint observedGuard = reader.ReadUInt32(); + if (observedGuard != CacheConstants.GuardInt) + { + throw new InvalidDataException($"Expected a guard integer ({CacheConstants.GuardInt}), but found another value: ({observedGuard})"); + } + } + } } \ No newline at end of file diff --git a/VariantAnnotation/IO/JsonCommon.cs b/VariantAnnotation/IO/JsonCommon.cs index c8e73de5..fa74ceb8 100644 --- a/VariantAnnotation/IO/JsonCommon.cs +++ b/VariantAnnotation/IO/JsonCommon.cs @@ -1,8 +1,8 @@ -namespace VariantAnnotation.IO -{ - public static class JsonCommon - { - public const int SchemaVersion = 6; - public const string FrequencyRoundingFormat = "0.######"; - } +namespace VariantAnnotation.IO +{ + public static class JsonCommon + { + public const int SchemaVersion = 6; + public const string FrequencyRoundingFormat = "0.######"; + } } \ No newline at end of file diff --git a/VariantAnnotation/IO/JsonObject.cs b/VariantAnnotation/IO/JsonObject.cs index 90c61a76..ff919a97 100644 --- a/VariantAnnotation/IO/JsonObject.cs +++ b/VariantAnnotation/IO/JsonObject.cs @@ -1,210 +1,210 @@ -using System.Collections.Generic; -using System.Linq; -using System.Text; -using VariantAnnotation.Interface.IO; - -namespace VariantAnnotation.IO -{ - public sealed class JsonObject - { - private readonly StringBuilder _sb; - private bool _needsComma; - private int _nestedLevel; - - public const char Comma = ','; - private const char DoubleQuote = '\"'; - public const char OpenBracket = '['; - public const char CloseBracket = ']'; - public const char OpenBrace = '{'; - public const char CloseBrace = '}'; - private const string ColonString = "\":"; - - public JsonObject(StringBuilder sb) => _sb = sb; - - private void AddKey(string description) - { - _sb.Append(DoubleQuote); - _sb.Append(description); - _sb.Append(ColonString); - } - - public void StartObjectWithKey(string objectKey) - { - if (_needsComma) _sb.Append(Comma); - - _sb.Append(DoubleQuote); - _sb.Append(objectKey); - _sb.Append(ColonString); - _sb.Append(OpenBrace); - - _needsComma = false; - _nestedLevel++; - } - - public bool AddBoolValue(string description, bool b, bool outputFalse = false) - { - // we do not want to print out false flags by default. - if (!b && !outputFalse) return false; - - if (_needsComma) _sb.Append(Comma); - AddKey(description); - - _sb.Append(b ? "true" : "false"); - _needsComma = true; - - return true; - } - - public bool AddIntValue(string description, int? i) - { - if (i == null) return false; - - if (_needsComma) _sb.Append(Comma); - AddKey(description); - - _sb.Append(i); - _needsComma = true; - - return true; - } - - public bool AddIntValues(string description, int[] values) - { - if (values == null || values.Length == 0) return false; - - // removing '.'s from the list of values - var valueList = values.Select(value => value.ToString()).ToList(); - - AddStringValues(description, valueList, false); - _needsComma = true; - - return true; - } - - public bool AddDoubleValue(string description, double? d, string format = "0.####") - { - if (d == null) return false; - - if (_needsComma) _sb.Append(Comma); - AddKey(description); - _sb.Append(d.Value.ToString(format)); - _needsComma = true; - - return true; - } - - public bool AddDoubleValues(string description, double[] values, string format = "0.####") - { - if (values == null || values.Length == 0) return false; - - var valueList = values.Select(value => value.ToString(format)).ToList(); - - AddStringValues(description, valueList, false); - _needsComma = true; - - return true; - } - - public bool AddStringValue(string description, string s, bool useQuote = true) - { - if (string.IsNullOrEmpty(s) || s == ".") return false; - - if (_needsComma) _sb.Append(Comma); - AddKey(description); - - if (useQuote) _sb.Append(DoubleQuote); - _sb.Append(s); - if (useQuote) _sb.Append(DoubleQuote); - _needsComma = true; - - return true; - } - - public bool AddStringValues(string description, IEnumerable values, bool useQuote = true) - { - if (values == null) return false; - - var validEntries = new List(); - foreach (string value in values) if (value != ".") validEntries.Add(value); - - if (validEntries.Count == 0) return false; - - if (_needsComma) _sb.Append(Comma); - AddKey(description); - _sb.Append(OpenBracket); - - var needsComma = false; - - foreach (string value in validEntries) - { - if (needsComma) _sb.Append(Comma); - if (useQuote) _sb.Append(DoubleQuote); - _sb.Append(value); - if (useQuote) _sb.Append(DoubleQuote); - needsComma = true; - } - - _sb.Append(CloseBracket); - _needsComma = true; - - return true; - } - - public bool AddObjectValue(string description, T value) where T : IJsonSerializer - { - if (value == null) return false; - - if (_needsComma) _sb.Append(Comma); - AddKey(description); - - value.SerializeJson(_sb); - - _needsComma = true; - - return true; - } - - public bool AddObjectValues(string description, IEnumerable values) where T : IJsonSerializer - { - if (values == null) return false; - - if (_needsComma) _sb.Append(Comma); - AddKey(description); - _sb.Append(OpenBracket); - - var needsComma = false; - - foreach (var value in values) - { - // comma handling - if (needsComma) _sb.Append(Comma); - else needsComma = true; - value.SerializeJson(_sb); - } - - _sb.Append(CloseBracket); - _needsComma = true; - - return true; - } - - public void StartObject() - { - _sb.Append(OpenBrace); - _needsComma = false; - _nestedLevel++; - } - - public void EndObject() - { - _sb.Append(CloseBrace); - _needsComma = true; - _nestedLevel--; - } - - public void EndAllObjects() - { - _sb.Append(CloseBrace, _nestedLevel); - } - } +using System.Collections.Generic; +using System.Linq; +using System.Text; +using VariantAnnotation.Interface.IO; + +namespace VariantAnnotation.IO +{ + public sealed class JsonObject + { + private readonly StringBuilder _sb; + private bool _needsComma; + private int _nestedLevel; + + public const char Comma = ','; + private const char DoubleQuote = '\"'; + public const char OpenBracket = '['; + public const char CloseBracket = ']'; + public const char OpenBrace = '{'; + public const char CloseBrace = '}'; + private const string ColonString = "\":"; + + public JsonObject(StringBuilder sb) => _sb = sb; + + private void AddKey(string description) + { + _sb.Append(DoubleQuote); + _sb.Append(description); + _sb.Append(ColonString); + } + + public void StartObjectWithKey(string objectKey) + { + if (_needsComma) _sb.Append(Comma); + + _sb.Append(DoubleQuote); + _sb.Append(objectKey); + _sb.Append(ColonString); + _sb.Append(OpenBrace); + + _needsComma = false; + _nestedLevel++; + } + + public bool AddBoolValue(string description, bool b, bool outputFalse = false) + { + // we do not want to print out false flags by default. + if (!b && !outputFalse) return false; + + if (_needsComma) _sb.Append(Comma); + AddKey(description); + + _sb.Append(b ? "true" : "false"); + _needsComma = true; + + return true; + } + + public bool AddIntValue(string description, int? i) + { + if (i == null) return false; + + if (_needsComma) _sb.Append(Comma); + AddKey(description); + + _sb.Append(i); + _needsComma = true; + + return true; + } + + public bool AddIntValues(string description, int[] values) + { + if (values == null || values.Length == 0) return false; + + // removing '.'s from the list of values + var valueList = values.Select(value => value.ToString()).ToList(); + + AddStringValues(description, valueList, false); + _needsComma = true; + + return true; + } + + public bool AddDoubleValue(string description, double? d, string format = "0.####") + { + if (d == null) return false; + + if (_needsComma) _sb.Append(Comma); + AddKey(description); + _sb.Append(d.Value.ToString(format)); + _needsComma = true; + + return true; + } + + public bool AddDoubleValues(string description, double[] values, string format = "0.####") + { + if (values == null || values.Length == 0) return false; + + var valueList = values.Select(value => value.ToString(format)).ToList(); + + AddStringValues(description, valueList, false); + _needsComma = true; + + return true; + } + + public bool AddStringValue(string description, string s, bool useQuote = true) + { + if (string.IsNullOrEmpty(s) || s == ".") return false; + + if (_needsComma) _sb.Append(Comma); + AddKey(description); + + if (useQuote) _sb.Append(DoubleQuote); + _sb.Append(s); + if (useQuote) _sb.Append(DoubleQuote); + _needsComma = true; + + return true; + } + + public bool AddStringValues(string description, IEnumerable values, bool useQuote = true) + { + if (values == null) return false; + + var validEntries = new List(); + foreach (string value in values) if (value != ".") validEntries.Add(value); + + if (validEntries.Count == 0) return false; + + if (_needsComma) _sb.Append(Comma); + AddKey(description); + _sb.Append(OpenBracket); + + var needsComma = false; + + foreach (string value in validEntries) + { + if (needsComma) _sb.Append(Comma); + if (useQuote) _sb.Append(DoubleQuote); + _sb.Append(value); + if (useQuote) _sb.Append(DoubleQuote); + needsComma = true; + } + + _sb.Append(CloseBracket); + _needsComma = true; + + return true; + } + + public bool AddObjectValue(string description, T value) where T : IJsonSerializer + { + if (value == null) return false; + + if (_needsComma) _sb.Append(Comma); + AddKey(description); + + value.SerializeJson(_sb); + + _needsComma = true; + + return true; + } + + public bool AddObjectValues(string description, IEnumerable values) where T : IJsonSerializer + { + if (values == null) return false; + + if (_needsComma) _sb.Append(Comma); + AddKey(description); + _sb.Append(OpenBracket); + + var needsComma = false; + + foreach (var value in values) + { + // comma handling + if (needsComma) _sb.Append(Comma); + else needsComma = true; + value.SerializeJson(_sb); + } + + _sb.Append(CloseBracket); + _needsComma = true; + + return true; + } + + public void StartObject() + { + _sb.Append(OpenBrace); + _needsComma = false; + _nestedLevel++; + } + + public void EndObject() + { + _sb.Append(CloseBrace); + _needsComma = true; + _nestedLevel--; + } + + public void EndAllObjects() + { + _sb.Append(CloseBrace, _nestedLevel); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/IO/JsonWriter.cs b/VariantAnnotation/IO/JsonWriter.cs index 28f81928..c111e7ce 100644 --- a/VariantAnnotation/IO/JsonWriter.cs +++ b/VariantAnnotation/IO/JsonWriter.cs @@ -1,145 +1,145 @@ -using System.Collections.Generic; -using System.IO; -using Compression.FileHandling; -using Jasix; -using Jasix.DataStructures; -using OptimizedCore; -using VariantAnnotation.Interface; -using VariantAnnotation.Interface.IO; -using VariantAnnotation.Interface.Positions; -using VariantAnnotation.Interface.Providers; - -namespace VariantAnnotation.IO -{ - public sealed class JsonWriter : IJsonWriter - { - private readonly StreamWriter _writer; - private bool _firstEntry; - private bool _positionFieldClosed; - private bool _leaveOpen; - - private readonly BgzipTextWriter _bgzipTextWriter; - private readonly OnTheFlyIndexCreator _jasixIndexCreator; - - private JsonWriter(Stream jsonStream, Stream indexStream, string annotator, string creationTime, string vepDataVersion, - List dataSourceVersions, string genomeAssembly, string[] sampleNames, bool leaveOpen) : this(GetProperWriter(jsonStream), indexStream, annotator, creationTime, vepDataVersion, dataSourceVersions, genomeAssembly, sampleNames, leaveOpen) - { - } - - public JsonWriter(Stream jsonStream, Stream indexStream, IAnnotationResources annotationResources, string creationTime, string[] sampleNames, bool leaveOpen) : this(jsonStream, indexStream, annotationResources.AnnotatorVersionTag, creationTime, annotationResources.VepDataVersion, annotationResources.DataSourceVersions, annotationResources.SequenceProvider.Assembly.ToString(), sampleNames, leaveOpen) - { - } - - private static StreamWriter GetProperWriter(Stream jsonStream) => jsonStream is BlockGZipStream - ? new BgzipTextWriter((BlockGZipStream)jsonStream) - : new StreamWriter(jsonStream); - - public JsonWriter(StreamWriter writer, Stream indexStream, string annotator, string creationTime, string vepDataVersion, - List dataSourceVersions, string genomeAssembly, string[] sampleNames, bool leaveOpen) - { - _writer = writer; - _writer.NewLine = "\n"; - _firstEntry = true; - _positionFieldClosed = false; - _leaveOpen = leaveOpen; - - _bgzipTextWriter = writer as BgzipTextWriter; - - _jasixIndexCreator = _bgzipTextWriter != null - ? new OnTheFlyIndexCreator(indexStream) - : null; - - WriteHeader(annotator, creationTime, genomeAssembly, JsonCommon.SchemaVersion, vepDataVersion, - dataSourceVersions, sampleNames); - } - - - private void WriteHeader(string annotator, string creationTime, string genomeAssembly, int schemaVersion, - string vepDataVersion, IEnumerable dataSourceVersions, string[] sampleNames) - { - _jasixIndexCreator?.BeginSection(JasixCommons.HeaderSectionTag, _bgzipTextWriter.Position); - - var sb = StringBuilderCache.Acquire(); - var jsonObject = new JsonObject(sb); - - sb.Append($"{{\"{JasixCommons.HeaderSectionTag}\":{{"); - jsonObject.AddStringValue("annotator", annotator); - jsonObject.AddStringValue("creationTime", creationTime); - jsonObject.AddStringValue("genomeAssembly", genomeAssembly); - jsonObject.AddIntValue("schemaVersion", schemaVersion); - jsonObject.AddStringValue("dataVersion", vepDataVersion); - - jsonObject.AddObjectValues("dataSources", dataSourceVersions); - - if (sampleNames != null) jsonObject.AddStringValues("samples", sampleNames); - sb.Append($"}},\"{JasixCommons.PositionsSectionTag}\":[\n"); - - _writer.Write(StringBuilderCache.GetStringAndRelease(sb)); - _writer.Flush(); - _jasixIndexCreator?.EndSection(JasixCommons.HeaderSectionTag, _bgzipTextWriter.Position - 1); - } - - public void Dispose() - { - WriteFooter(); - _writer.Flush(); - if (_leaveOpen) return; - _writer.Dispose(); - _jasixIndexCreator?.Dispose(); - } - - public void WriteJsonEntry(IPosition position, string entry) - { - if (string.IsNullOrEmpty(entry)) return; - _jasixIndexCreator?.Add(position, _bgzipTextWriter.Position); - if (!_firstEntry) _writer.WriteLine(","); - else _jasixIndexCreator?.BeginSection(JasixCommons.PositionsSectionTag, _bgzipTextWriter.Position); - - _firstEntry = false; - _writer.Write(entry); - } - - public void WriteAnnotatedGenes(IEnumerable annotatedGenes) - { - _positionFieldClosed = true; - _writer.Flush(); - _jasixIndexCreator?.EndSection(JasixCommons.PositionsSectionTag, _bgzipTextWriter.Position - 1); - - _writer.Write("\n]"); - - if (annotatedGenes == null) return; - _writer.Write($",\"{JasixCommons.GenesSectionTag}\":[\n"); - _writer.Flush(); - - _jasixIndexCreator?.BeginSection(JasixCommons.GenesSectionTag, _bgzipTextWriter.Position); - - var sb = StringBuilderCache.Acquire(); - var firstGeneEntry = true; - - foreach (string jsonString in annotatedGenes) - { - if (!firstGeneEntry) sb.Append(",\n"); - sb.Append(jsonString); - firstGeneEntry = false; - } - - _writer.Write(sb.ToString()); - _writer.Flush(); - _jasixIndexCreator?.EndSection(JasixCommons.GenesSectionTag, _bgzipTextWriter.Position - 1); - - StringBuilderCache.GetStringAndRelease(sb); - _writer.WriteLine(); - _writer.Write("]"); - } - - private void WriteFooter() - { - if (!_positionFieldClosed) - { - _writer.WriteLine(); - _writer.Write("]"); - } - _writer.WriteLine("}"); - } - } +using System.Collections.Generic; +using System.IO; +using Compression.FileHandling; +using Jasix; +using Jasix.DataStructures; +using OptimizedCore; +using VariantAnnotation.Interface; +using VariantAnnotation.Interface.IO; +using VariantAnnotation.Interface.Positions; +using VariantAnnotation.Interface.Providers; + +namespace VariantAnnotation.IO +{ + public sealed class JsonWriter : IJsonWriter + { + private readonly StreamWriter _writer; + private bool _firstEntry; + private bool _positionFieldClosed; + private bool _leaveOpen; + + private readonly BgzipTextWriter _bgzipTextWriter; + private readonly OnTheFlyIndexCreator _jasixIndexCreator; + + private JsonWriter(Stream jsonStream, Stream indexStream, string annotator, string creationTime, string vepDataVersion, + List dataSourceVersions, string genomeAssembly, string[] sampleNames, bool leaveOpen) : this(GetProperWriter(jsonStream), indexStream, annotator, creationTime, vepDataVersion, dataSourceVersions, genomeAssembly, sampleNames, leaveOpen) + { + } + + public JsonWriter(Stream jsonStream, Stream indexStream, IAnnotationResources annotationResources, string creationTime, string[] sampleNames, bool leaveOpen) : this(jsonStream, indexStream, annotationResources.AnnotatorVersionTag, creationTime, annotationResources.VepDataVersion, annotationResources.DataSourceVersions, annotationResources.SequenceProvider.Assembly.ToString(), sampleNames, leaveOpen) + { + } + + private static StreamWriter GetProperWriter(Stream jsonStream) => jsonStream is BlockGZipStream + ? new BgzipTextWriter((BlockGZipStream)jsonStream) + : new StreamWriter(jsonStream); + + public JsonWriter(StreamWriter writer, Stream indexStream, string annotator, string creationTime, string vepDataVersion, + List dataSourceVersions, string genomeAssembly, string[] sampleNames, bool leaveOpen) + { + _writer = writer; + _writer.NewLine = "\n"; + _firstEntry = true; + _positionFieldClosed = false; + _leaveOpen = leaveOpen; + + _bgzipTextWriter = writer as BgzipTextWriter; + + _jasixIndexCreator = _bgzipTextWriter != null + ? new OnTheFlyIndexCreator(indexStream) + : null; + + WriteHeader(annotator, creationTime, genomeAssembly, JsonCommon.SchemaVersion, vepDataVersion, + dataSourceVersions, sampleNames); + } + + + private void WriteHeader(string annotator, string creationTime, string genomeAssembly, int schemaVersion, + string vepDataVersion, IEnumerable dataSourceVersions, string[] sampleNames) + { + _jasixIndexCreator?.BeginSection(JasixCommons.HeaderSectionTag, _bgzipTextWriter.Position); + + var sb = StringBuilderCache.Acquire(); + var jsonObject = new JsonObject(sb); + + sb.Append($"{{\"{JasixCommons.HeaderSectionTag}\":{{"); + jsonObject.AddStringValue("annotator", annotator); + jsonObject.AddStringValue("creationTime", creationTime); + jsonObject.AddStringValue("genomeAssembly", genomeAssembly); + jsonObject.AddIntValue("schemaVersion", schemaVersion); + jsonObject.AddStringValue("dataVersion", vepDataVersion); + + jsonObject.AddObjectValues("dataSources", dataSourceVersions); + + if (sampleNames != null) jsonObject.AddStringValues("samples", sampleNames); + sb.Append($"}},\"{JasixCommons.PositionsSectionTag}\":[\n"); + + _writer.Write(StringBuilderCache.GetStringAndRelease(sb)); + _writer.Flush(); + _jasixIndexCreator?.EndSection(JasixCommons.HeaderSectionTag, _bgzipTextWriter.Position - 1); + } + + public void Dispose() + { + WriteFooter(); + _writer.Flush(); + if (_leaveOpen) return; + _writer.Dispose(); + _jasixIndexCreator?.Dispose(); + } + + public void WriteJsonEntry(IPosition position, string entry) + { + if (string.IsNullOrEmpty(entry)) return; + _jasixIndexCreator?.Add(position, _bgzipTextWriter.Position); + if (!_firstEntry) _writer.WriteLine(","); + else _jasixIndexCreator?.BeginSection(JasixCommons.PositionsSectionTag, _bgzipTextWriter.Position); + + _firstEntry = false; + _writer.Write(entry); + } + + public void WriteAnnotatedGenes(IEnumerable annotatedGenes) + { + _positionFieldClosed = true; + _writer.Flush(); + _jasixIndexCreator?.EndSection(JasixCommons.PositionsSectionTag, _bgzipTextWriter.Position - 1); + + _writer.Write("\n]"); + + if (annotatedGenes == null) return; + _writer.Write($",\"{JasixCommons.GenesSectionTag}\":[\n"); + _writer.Flush(); + + _jasixIndexCreator?.BeginSection(JasixCommons.GenesSectionTag, _bgzipTextWriter.Position); + + var sb = StringBuilderCache.Acquire(); + var firstGeneEntry = true; + + foreach (string jsonString in annotatedGenes) + { + if (!firstGeneEntry) sb.Append(",\n"); + sb.Append(jsonString); + firstGeneEntry = false; + } + + _writer.Write(sb.ToString()); + _writer.Flush(); + _jasixIndexCreator?.EndSection(JasixCommons.GenesSectionTag, _bgzipTextWriter.Position - 1); + + StringBuilderCache.GetStringAndRelease(sb); + _writer.WriteLine(); + _writer.Write("]"); + } + + private void WriteFooter() + { + if (!_positionFieldClosed) + { + _writer.WriteLine(); + _writer.Write("]"); + } + _writer.WriteLine("}"); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/IO/SampleExtensions.cs b/VariantAnnotation/IO/SampleExtensions.cs index a1361867..72abf494 100644 --- a/VariantAnnotation/IO/SampleExtensions.cs +++ b/VariantAnnotation/IO/SampleExtensions.cs @@ -1,48 +1,48 @@ -using OptimizedCore; -using VariantAnnotation.Interface.Positions; - -namespace VariantAnnotation.IO -{ - public static class SampleExtensions - { - public static string GetJsonString(this ISample sample) - { - var sb = StringBuilderCache.Acquire(); - var jsonObject = new JsonObject(sb); - - // data section - sb.Append(JsonObject.OpenBrace); - - jsonObject.AddBoolValue("isEmpty", sample.IsEmpty); - jsonObject.AddStringValue("genotype", sample.Genotype); - jsonObject.AddStringValue("repeatNumbers", sample.RepeatNumbers); - jsonObject.AddStringValue("repeatNumberSpans", sample.RepeatNumberSpans); - jsonObject.AddDoubleValues("variantFrequencies", sample.VariantFrequencies); - jsonObject.AddIntValue("totalDepth", sample.TotalDepth); - jsonObject.AddIntValue("genotypeQuality", sample.GenotypeQuality); - jsonObject.AddIntValue("copyNumber", sample.CopyNumber); - jsonObject.AddIntValues("alleleDepths", sample.AlleleDepths); - jsonObject.AddBoolValue("failedFilter", sample.FailedFilter); - jsonObject.AddIntValues("splitReadCounts", sample.SplitReadCounts); - jsonObject.AddIntValues("pairedEndReadCounts", sample.PairEndReadCounts); - jsonObject.AddBoolValue("lossOfHeterozygosity", sample.IsLossOfHeterozygosity); - jsonObject.AddDoubleValue("deNovoQuality", sample.DeNovoQuality, "0.##"); - - jsonObject.AddIntValues("mpileupAlleleDepths", sample.MpileupAlleleDepths); - jsonObject.AddStringValue("silentCarrierHaplotype", sample.SilentCarrierHaplotype); - jsonObject.AddIntValues("paralogousEntrezGeneIds", sample.ParalogousEntrezGeneIds); - jsonObject.AddIntValues("paralogousGeneCopyNumbers", sample.ParalogousGeneCopyNumbers); - jsonObject.AddStringValues("diseaseClassificationSources", sample.DiseaseClassificationSources); - jsonObject.AddStringValues("diseaseIds", sample.DiseaseIds); - jsonObject.AddStringValues("diseaseAffectedStatuses", sample.DiseaseAffectedStatus); - jsonObject.AddIntValues("proteinAlteringVariantPositions", sample.ProteinAlteringVariantPositions); - jsonObject.AddBoolValue("isCompoundHetCompatible", sample.IsCompoundHetCompatible); - - jsonObject.AddDoubleValue("artifactAdjustedQualityScore", sample.ArtifactAdjustedQualityScore, "0.#"); - jsonObject.AddDoubleValue("likelihoodRatioQualityScore", sample.LikelihoodRatioQualityScore, "0.#"); - - sb.Append(JsonObject.CloseBrace); - return StringBuilderCache.GetStringAndRelease(sb); - } - } +using OptimizedCore; +using VariantAnnotation.Interface.Positions; + +namespace VariantAnnotation.IO +{ + public static class SampleExtensions + { + public static string GetJsonString(this ISample sample) + { + var sb = StringBuilderCache.Acquire(); + var jsonObject = new JsonObject(sb); + + // data section + sb.Append(JsonObject.OpenBrace); + + jsonObject.AddBoolValue("isEmpty", sample.IsEmpty); + jsonObject.AddStringValue("genotype", sample.Genotype); + jsonObject.AddStringValue("repeatNumbers", sample.RepeatNumbers); + jsonObject.AddStringValue("repeatNumberSpans", sample.RepeatNumberSpans); + jsonObject.AddDoubleValues("variantFrequencies", sample.VariantFrequencies); + jsonObject.AddIntValue("totalDepth", sample.TotalDepth); + jsonObject.AddIntValue("genotypeQuality", sample.GenotypeQuality); + jsonObject.AddIntValue("copyNumber", sample.CopyNumber); + jsonObject.AddIntValues("alleleDepths", sample.AlleleDepths); + jsonObject.AddBoolValue("failedFilter", sample.FailedFilter); + jsonObject.AddIntValues("splitReadCounts", sample.SplitReadCounts); + jsonObject.AddIntValues("pairedEndReadCounts", sample.PairEndReadCounts); + jsonObject.AddBoolValue("lossOfHeterozygosity", sample.IsLossOfHeterozygosity); + jsonObject.AddDoubleValue("deNovoQuality", sample.DeNovoQuality, "0.##"); + + jsonObject.AddIntValues("mpileupAlleleDepths", sample.MpileupAlleleDepths); + jsonObject.AddStringValue("silentCarrierHaplotype", sample.SilentCarrierHaplotype); + jsonObject.AddIntValues("paralogousEntrezGeneIds", sample.ParalogousEntrezGeneIds); + jsonObject.AddIntValues("paralogousGeneCopyNumbers", sample.ParalogousGeneCopyNumbers); + jsonObject.AddStringValues("diseaseClassificationSources", sample.DiseaseClassificationSources); + jsonObject.AddStringValues("diseaseIds", sample.DiseaseIds); + jsonObject.AddStringValues("diseaseAffectedStatuses", sample.DiseaseAffectedStatus); + jsonObject.AddIntValues("proteinAlteringVariantPositions", sample.ProteinAlteringVariantPositions); + jsonObject.AddBoolValue("isCompoundHetCompatible", sample.IsCompoundHetCompatible); + + jsonObject.AddDoubleValue("artifactAdjustedQualityScore", sample.ArtifactAdjustedQualityScore, "0.#"); + jsonObject.AddDoubleValue("likelihoodRatioQualityScore", sample.LikelihoodRatioQualityScore, "0.#"); + + sb.Append(JsonObject.CloseBrace); + return StringBuilderCache.GetStringAndRelease(sb); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/IO/VcfWriter/Csq.cs b/VariantAnnotation/IO/VcfWriter/Csq.cs index 2f5eda9a..ae872b4d 100644 --- a/VariantAnnotation/IO/VcfWriter/Csq.cs +++ b/VariantAnnotation/IO/VcfWriter/Csq.cs @@ -1,22 +1,22 @@ -namespace VariantAnnotation.IO.VcfWriter -{ - public static class CsqCommon - { - public const string TranscriptFeatureType = "Transcript"; - public const string RegulatoryFeatureType = "RegulatoryFeature"; - } - - /// - /// The annoying bit about CSQ fields is that the order changes depending on which - /// parameters have been passed to VEP. As a result, we need to keep all of the key - /// value pairs in a dictionary. - /// - public sealed class CsqEntry - { - public string Allele; - public string Consequence; - public string Feature; - public string FeatureType; - public string Symbol; - } +namespace VariantAnnotation.IO.VcfWriter +{ + public static class CsqCommon + { + public const string TranscriptFeatureType = "Transcript"; + public const string RegulatoryFeatureType = "RegulatoryFeature"; + } + + /// + /// The annoying bit about CSQ fields is that the order changes depending on which + /// parameters have been passed to VEP. As a result, we need to keep all of the key + /// value pairs in a dictionary. + /// + public sealed class CsqEntry + { + public string Allele; + public string Consequence; + public string Feature; + public string FeatureType; + public string Symbol; + } } \ No newline at end of file diff --git a/VariantAnnotation/IO/VcfWriter/LiteVcfWriter.cs b/VariantAnnotation/IO/VcfWriter/LiteVcfWriter.cs index 77339a08..7555c116 100644 --- a/VariantAnnotation/IO/VcfWriter/LiteVcfWriter.cs +++ b/VariantAnnotation/IO/VcfWriter/LiteVcfWriter.cs @@ -1,139 +1,139 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using OptimizedCore; -using VariantAnnotation.Interface; -using VariantAnnotation.Interface.IO; -using VariantAnnotation.Interface.Providers; - -namespace VariantAnnotation.IO.VcfWriter -{ - public sealed class LiteVcfWriter : IDisposable - { - private readonly StreamWriter _writer; - - private const string AnnotatorTag = "##annotator="; - private const string AnnotationServiceUriTag = "##annotationserviceuri="; - private const string AnnotationCollectionVersionTag = "##annotationcollectionversion="; - private const string CsqInfoTag = "##INFO="; - private const string CsqrHeaderLine = "##INFO="; - - private const string InfoHeaderLines = - "##INFO=\n" + - "##INFO=\n" + - "##INFO=\n" + - "##INFO=\n" + - "##INFO=\n" + - "##INFO=\n" + - "##INFO=\n" + - "##INFO="; - - #region IDisposable - - public void Dispose() - { - _writer?.Dispose(); - } - - #endregion - - public LiteVcfWriter(StreamWriter vcfWriter, IEnumerable headerLines,string nirvanaVersion, string nirvanaDataVersion, IEnumerable dataSourceVersions) - { - // open the vcf file - _writer = vcfWriter; - _writer.NewLine = "\n"; - - // write out the header lines - WriteHeader(headerLines, BuildVcfHeaderLines(nirvanaVersion, nirvanaDataVersion, dataSourceVersions)); - } - - public LiteVcfWriter(StreamWriter vcfWriter, IEnumerable headerLines, - IAnnotationResources annotationResources) : this(vcfWriter, headerLines, - annotationResources.AnnotatorVersionTag, annotationResources.VepDataVersion, - annotationResources.DataSourceVersions) - { } - - private static string BuildVcfHeaderLines(string nirvanaVersion,string nirvanaDataVersion, IEnumerable dataSourceVersions) - { - var sb = StringBuilderCache.Acquire(); - - var nirvanaAnnotatorTag = "##annotator="+nirvanaVersion +'\n'; - - sb.Append(nirvanaAnnotatorTag); - // add the data version - sb.Append("##annotatorDataVersion=" + nirvanaDataVersion + '\n'); - - // only certain data sources are output to vcf. We will maintain a white list of those - var dataSourceWhiteList = new HashSet { "dbSNP", "COSMIC", "1000 Genomes Project", "EVS", "ExAC", "ClinVar", "phyloP" }; - - // add the data source versions - if (dataSourceVersions != null) - { - foreach (var sourceVersion in dataSourceVersions) - { - // add the transcript source - if (sourceVersion.Name == "VEP") - { - sb.Append("##annotatorTranscriptSource=" + sourceVersion.Description + '\n'); - continue; - } - - if (dataSourceWhiteList.Contains(sourceVersion.Name)) sb.AppendFormat("##{0}\n", sourceVersion); - } - } - - // add the supplementary INFO tag descriptions - sb.Append(InfoHeaderLines + '\n'); - - // add the CSQT and CSQR header lines - sb.Append(CsqtHeaderLine + '\n'); - sb.Append(CsqrHeaderLine); - - return StringBuilderCache.GetStringAndRelease(sb); - } - - /// - /// writes the vcf header to the current output stream - /// - private void WriteHeader(IEnumerable headerLines, string csqInfoTag) - { - // skip over some header lines that may already be present - var currentHeaderLines = - headerLines.Where( - line => - !line.StartsWith(AnnotatorTag) && - !line.StartsWith(AnnotationCollectionVersionTag) && - !line.StartsWith(AnnotationServiceUriTag) && - !line.StartsWith(CsqInfoTag) && - !line.StartsWith(CsqRInfoTag) && - !line.StartsWith(CsqTInfoTag)).ToList(); - - // find where we should place our info field and annotator tags - var lastIndex = currentHeaderLines.FindLastIndex(x => x.StartsWith(InfoTag)); - if (lastIndex == -1) - { - var lastChromIndex = currentHeaderLines.FindLastIndex(x => x.StartsWith(VcfCommon.ChromosomeHeader)); - lastIndex = lastChromIndex == -1 ? currentHeaderLines.Count - 1 : lastChromIndex - 1; - } - - // write the modified header lines - for (var currentIndex = 0; currentIndex < currentHeaderLines.Count; currentIndex++) - { - var line = currentHeaderLines[currentIndex]; - _writer.WriteLine(line); - - if (currentIndex != lastIndex) continue; - - _writer.WriteLine(csqInfoTag); - } - } - - public void Write(string s) => _writer.WriteLine(s); - } +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using OptimizedCore; +using VariantAnnotation.Interface; +using VariantAnnotation.Interface.IO; +using VariantAnnotation.Interface.Providers; + +namespace VariantAnnotation.IO.VcfWriter +{ + public sealed class LiteVcfWriter : IDisposable + { + private readonly StreamWriter _writer; + + private const string AnnotatorTag = "##annotator="; + private const string AnnotationServiceUriTag = "##annotationserviceuri="; + private const string AnnotationCollectionVersionTag = "##annotationcollectionversion="; + private const string CsqInfoTag = "##INFO="; + private const string CsqrHeaderLine = "##INFO="; + + private const string InfoHeaderLines = + "##INFO=\n" + + "##INFO=\n" + + "##INFO=\n" + + "##INFO=\n" + + "##INFO=\n" + + "##INFO=\n" + + "##INFO=\n" + + "##INFO="; + + #region IDisposable + + public void Dispose() + { + _writer?.Dispose(); + } + + #endregion + + public LiteVcfWriter(StreamWriter vcfWriter, IEnumerable headerLines,string nirvanaVersion, string nirvanaDataVersion, IEnumerable dataSourceVersions) + { + // open the vcf file + _writer = vcfWriter; + _writer.NewLine = "\n"; + + // write out the header lines + WriteHeader(headerLines, BuildVcfHeaderLines(nirvanaVersion, nirvanaDataVersion, dataSourceVersions)); + } + + public LiteVcfWriter(StreamWriter vcfWriter, IEnumerable headerLines, + IAnnotationResources annotationResources) : this(vcfWriter, headerLines, + annotationResources.AnnotatorVersionTag, annotationResources.VepDataVersion, + annotationResources.DataSourceVersions) + { } + + private static string BuildVcfHeaderLines(string nirvanaVersion,string nirvanaDataVersion, IEnumerable dataSourceVersions) + { + var sb = StringBuilderCache.Acquire(); + + var nirvanaAnnotatorTag = "##annotator="+nirvanaVersion +'\n'; + + sb.Append(nirvanaAnnotatorTag); + // add the data version + sb.Append("##annotatorDataVersion=" + nirvanaDataVersion + '\n'); + + // only certain data sources are output to vcf. We will maintain a white list of those + var dataSourceWhiteList = new HashSet { "dbSNP", "COSMIC", "1000 Genomes Project", "EVS", "ExAC", "ClinVar", "phyloP" }; + + // add the data source versions + if (dataSourceVersions != null) + { + foreach (var sourceVersion in dataSourceVersions) + { + // add the transcript source + if (sourceVersion.Name == "VEP") + { + sb.Append("##annotatorTranscriptSource=" + sourceVersion.Description + '\n'); + continue; + } + + if (dataSourceWhiteList.Contains(sourceVersion.Name)) sb.AppendFormat("##{0}\n", sourceVersion); + } + } + + // add the supplementary INFO tag descriptions + sb.Append(InfoHeaderLines + '\n'); + + // add the CSQT and CSQR header lines + sb.Append(CsqtHeaderLine + '\n'); + sb.Append(CsqrHeaderLine); + + return StringBuilderCache.GetStringAndRelease(sb); + } + + /// + /// writes the vcf header to the current output stream + /// + private void WriteHeader(IEnumerable headerLines, string csqInfoTag) + { + // skip over some header lines that may already be present + var currentHeaderLines = + headerLines.Where( + line => + !line.StartsWith(AnnotatorTag) && + !line.StartsWith(AnnotationCollectionVersionTag) && + !line.StartsWith(AnnotationServiceUriTag) && + !line.StartsWith(CsqInfoTag) && + !line.StartsWith(CsqRInfoTag) && + !line.StartsWith(CsqTInfoTag)).ToList(); + + // find where we should place our info field and annotator tags + var lastIndex = currentHeaderLines.FindLastIndex(x => x.StartsWith(InfoTag)); + if (lastIndex == -1) + { + var lastChromIndex = currentHeaderLines.FindLastIndex(x => x.StartsWith(VcfCommon.ChromosomeHeader)); + lastIndex = lastChromIndex == -1 ? currentHeaderLines.Count - 1 : lastChromIndex - 1; + } + + // write the modified header lines + for (var currentIndex = 0; currentIndex < currentHeaderLines.Count; currentIndex++) + { + var line = currentHeaderLines[currentIndex]; + _writer.WriteLine(line); + + if (currentIndex != lastIndex) continue; + + _writer.WriteLine(csqInfoTag); + } + } + + public void Write(string s) => _writer.WriteLine(s); + } } \ No newline at end of file diff --git a/VariantAnnotation/IO/VcfWriter/VcfConversion.cs b/VariantAnnotation/IO/VcfWriter/VcfConversion.cs index d4588697..784063f2 100644 --- a/VariantAnnotation/IO/VcfWriter/VcfConversion.cs +++ b/VariantAnnotation/IO/VcfWriter/VcfConversion.cs @@ -1,343 +1,343 @@ -using System.Collections.Generic; -using System.Globalization; -using System.IO; -using System.Linq; -using System.Text; -using OptimizedCore; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.IO; - -namespace VariantAnnotation.IO.VcfWriter -{ - public sealed class VcfConversion - { - private const string DbSnpKeyName = "dbsnp"; - private const string OneKgKeyName = "oneKg"; - private const string RefMinorKeyName = "RefMinor"; - private const string GlobalAlleleKeyName = "globalAllele"; - - private readonly StringBuilder _sb = new StringBuilder(); - private readonly StringBuilder _csqInfoBuilder = new StringBuilder(); - private readonly List _csqtStrings = new List(); - private readonly List _csqrStrings = new List(); - - public string Convert(IAnnotatedPosition annotatedPosition) - { - _sb.Clear(); - - var fields = annotatedPosition.Position.VcfFields; - - // add all of the fields before the info field - for (var vcfIndex = 0; vcfIndex < VcfCommon.IdIndex; vcfIndex++) - { - _sb.Append(fields[vcfIndex]); - _sb.Append('\t'); - } - - // add dbSNP id - string dbSnpId = ExtractDbId(annotatedPosition); - _sb.Append(dbSnpId); - _sb.Append('\t'); - - for (int vcfIndex = VcfCommon.IdIndex + 1; vcfIndex < VcfCommon.InfoIndex; vcfIndex++) - { - _sb.Append(fields[vcfIndex]); - _sb.Append('\t'); - } - - AddInfoField(annotatedPosition, _sb); - - // add all of the fields after the info field - int numColumns = fields.Length; - for (int vcfIndex = VcfCommon.InfoIndex + 1; vcfIndex < numColumns; vcfIndex++) - { - _sb.Append('\t'); - _sb.Append(fields[vcfIndex]); - } - - return _sb.ToString(); - } - - private static string ExtractDbId(IAnnotatedPosition annotatedPosition) - { - var dbSnp = new VcfField(); - - var nonDbsnpIds = GetNonDbsnpIds(annotatedPosition.Position.VcfFields[VcfCommon.IdIndex]); - - if (nonDbsnpIds != null) foreach (string nonDbsnpId in nonDbsnpIds) dbSnp.Add(nonDbsnpId); - - foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) - { - foreach (var suppAnnotation in annotatedVariant.SupplementaryAnnotations) - { - if (suppAnnotation.SaDataSource.KeyName != DbSnpKeyName) continue; - foreach (string s in suppAnnotation.GetVcfStrings()) - { - dbSnp.Add(s); - } - } - } - - return dbSnp.GetString(""); - } - - private static IEnumerable GetNonDbsnpIds(string idField) - { - if (idField == null || idField == ".") return null; - var idList = idField.OptimizedSplit(';').Where(id => !id.StartsWith("rs")).ToList(); - - return idList.Count == 0 ? null : idList; - } - - - private void AddInfoField(IAnnotatedPosition annotatedPosition, StringBuilder sb) - { - var infoEntries = new VcfField(); - string infoField = annotatedPosition.Position.InfoData.UpdatedInfoField; - - if (!string.IsNullOrEmpty(infoField)) - { - infoEntries.Add(infoField); - } - - ExtractInfo(annotatedPosition, infoEntries); - - infoField = infoEntries.GetString(""); - - // remove . - if (infoField == ".") infoField = ""; - - sb.Append(infoField); - - var csqs = new List(); - - ExtractCsqs(annotatedPosition, csqs); - - if (csqs.Count != 0 && infoField.Length > 0) sb.Append(";"); - - // append CSQ tags using delegate from annotator - sb.Append(GetCsqtAndCsqrVcfInfo(csqs)); - - if (csqs.Count == 0 && infoField.Length == 0) - { - sb.Append("."); - } - } - - private static void ExtractInfo(IAnnotatedPosition annotatedPosition, VcfField infoEntries) - { - var alleleFreq1000G = new VcfInfoKeyValue("AF1000G"); - var ancestralAllele = new VcfPositionalInfo("AA"); - var phyloP = new VcfPositionalInfo("phyloP"); - - var suppAnnotationSources = new Dictionary(); - var isSaArrayInfo = new Dictionary(); - int numInputAltAlleles = annotatedPosition.Position.AltAlleles.Length; - - UpdateSaAnnoInfo(annotatedPosition, suppAnnotationSources, isSaArrayInfo); - - foreach (var kvp in suppAnnotationSources) - { - if (isSaArrayInfo[kvp.Key]) continue; - for (var i = 0; i < numInputAltAlleles; i++) kvp.Value.Add(null); - } - - for (var i = 0; i < numInputAltAlleles; i++) - { - alleleFreq1000G.Add(null); - } - - var inputGenotypeIndex = GetInputGenotypeIndex(annotatedPosition.Position.AltAlleles, annotatedPosition.AnnotatedVariants); - - // understand the number of annotation contains in the whole vcf line - for (var i = 0; i < annotatedPosition.AnnotatedVariants.Length; i++) - { - var annotatedVariant = annotatedPosition.AnnotatedVariants[i]; - int genotypeIndex = inputGenotypeIndex[i] + 1; - if (annotatedVariant.Variant.IsRefMinor) infoEntries.Add("RefMinor"); - - phyloP.AddValue(annotatedVariant.PhylopScore?.ToString(CultureInfo.InvariantCulture)); - - ProcessSaAnnotations(alleleFreq1000G, ancestralAllele, suppAnnotationSources, annotatedVariant, genotypeIndex); - } - - foreach (var value in suppAnnotationSources.Values) infoEntries.Add(value.GetString()); - - infoEntries.Add(ancestralAllele.GetString()); - infoEntries.Add(alleleFreq1000G.GetString()); - infoEntries.Add(phyloP.GetString()); - } - - private static void ProcessSaAnnotations(VcfInfoKeyValue alleleFreq1000G, VcfPositionalInfo ancestralAllele, Dictionary suppAnnotationSources, IAnnotatedVariant annotatedVariant, int genotypeIndex) - { - foreach (var sa in annotatedVariant.SupplementaryAnnotations) - { - if (!sa.SaDataSource.MatchByAllele && !sa.IsAlleleSpecific && sa.SaDataSource.KeyName != GlobalAlleleKeyName) continue; - if (sa.SaDataSource.KeyName == DbSnpKeyName) continue; - if (sa.SaDataSource.KeyName == RefMinorKeyName) continue; - - ProcessVcfAnnotations(alleleFreq1000G, ancestralAllele, suppAnnotationSources, genotypeIndex, sa); - } - } - - private static void ProcessVcfAnnotations(VcfInfoKeyValue alleleFreq1000G, VcfPositionalInfo ancestralAllele, Dictionary suppAnnotationSources, int genotypeIndex, IAnnotatedSaDataSource sa) - { - foreach (string vcfAnnotation in sa.GetVcfStrings()) - { - if (string.IsNullOrEmpty(vcfAnnotation)) continue; - - if (sa.SaDataSource.KeyName == OneKgKeyName) - { - var contents = vcfAnnotation.OptimizedSplit(';'); - string freq = contents[0]; - string ancestryAllele = string.IsNullOrEmpty(contents[1]) ? null : contents[1]; - - alleleFreq1000G.Add(freq, genotypeIndex); - ancestralAllele.AddValue(ancestryAllele); - continue; - } - - if (sa.SaDataSource.IsArray && sa.IsAlleleSpecific) - { - suppAnnotationSources[sa.SaDataSource.KeyName].Add( - genotypeIndex.ToString(CultureInfo.InvariantCulture) + '|' + vcfAnnotation); - } - else if (!sa.SaDataSource.IsArray) - { - suppAnnotationSources[sa.SaDataSource.KeyName].Add(vcfAnnotation, genotypeIndex); - } - } - } - - private static void UpdateSaAnnoInfo(IAnnotatedPosition annotatedPosition, Dictionary suppAnnotationSources, Dictionary isSaArrayInfo) - { - foreach (var alternateAllele in annotatedPosition.AnnotatedVariants) - { - foreach (var sa in alternateAllele.SupplementaryAnnotations) - { - if (!suppAnnotationSources.ContainsKey(sa.SaDataSource.KeyName)) - { - suppAnnotationSources[sa.SaDataSource.KeyName] = new VcfInfoKeyValue(sa.SaDataSource.VcfkeyName); - isSaArrayInfo[sa.SaDataSource.KeyName] = sa.SaDataSource.IsArray; - } - } - } - } - - private static int[] GetInputGenotypeIndex(string[] positionAltAlleles, IAnnotatedVariant[] annotatedPositionAnnotatedVariants) - { - - int numAnnotatedVar = annotatedPositionAnnotatedVariants.Length; - // alt allele is or . , and this is a refMinor site - if (positionAltAlleles.Length == 1 && VcfCommon.ReferenceAltAllele.Contains(positionAltAlleles[0]) && numAnnotatedVar == 1) - return new []{0}; - - var inputGenotypeIndex = new int[numAnnotatedVar]; - var annotatedVarIndex = 0; - - for (var inputIndex = 0; inputIndex < positionAltAlleles.Length && annotatedVarIndex < numAnnotatedVar; inputIndex++) - { - if (VcfCommon.NonInformativeAltAllele.Contains(positionAltAlleles[inputIndex])) continue; - inputGenotypeIndex[annotatedVarIndex] = inputIndex; - annotatedVarIndex++; - } - - if (annotatedVarIndex < numAnnotatedVar) - throw new InvalidDataException($"There are unannotated variants! Input alternative alleles: {string.Join(",", positionAltAlleles)}; annotated alleles: {string.Join(",", annotatedPositionAnnotatedVariants.Select(x => x.Variant.AltAllele))}"); - return inputGenotypeIndex; - } - - /// - /// returns the CSQT string as specified by this annotator - /// - private static string GetCsqtString(CsqEntry csq) - { - return csq.Allele + '|' + - csq.Symbol + '|' + - csq.Feature + '|' + - csq.Consequence; - } - - /// - /// returns the CSQR string as specified by this annotator - /// - private static string GetCsqrString(CsqEntry csq) - { - return csq.Allele + '|' + - csq.Feature + '|' + - csq.Consequence; - } - - /// - /// returns a concatenated vcf INFO field string containing the CSQT and CSQR tags - /// - private string GetCsqtAndCsqrVcfInfo(List csqList) - { - // make sure we have some tags - int numCsqTags = csqList.Count; - if (numCsqTags == 0) return null; - - // build our vcf INFO fields - _csqInfoBuilder.Clear(); - _csqtStrings.Clear(); - _csqrStrings.Clear(); - - foreach (var csqEntry in csqList) - { - // may be null in unit tests - if (csqEntry.FeatureType == null) - { - // assuming such cases to be transcript type - _csqtStrings.Add(GetCsqtString(csqEntry)); - continue; - } - - if (csqEntry.FeatureType == CsqCommon.TranscriptFeatureType) - { - _csqtStrings.Add(GetCsqtString(csqEntry)); - } - else if (csqEntry.FeatureType == CsqCommon.RegulatoryFeatureType) - { - _csqrStrings.Add(GetCsqrString(csqEntry)); - } - } - - bool hasCsqT = _csqtStrings.Count > 0; - bool hasCsqR = _csqrStrings.Count > 0; - - if (hasCsqT) _csqInfoBuilder.Append("CSQT=" + string.Join(",", _csqtStrings)); - if (hasCsqT && hasCsqR) _csqInfoBuilder.Append(';'); - if (hasCsqR) _csqInfoBuilder.Append("CSQR=" + string.Join(",", _csqrStrings)); - - return _csqInfoBuilder.ToString(); - } - - private static void ExtractCsqs(IAnnotatedPosition unifiedJson, List csqs) - { - for (int i = 0; i < unifiedJson.AnnotatedVariants.Length; i++) - { - int genotypeIndex = i + 1; - var jsonVariant = unifiedJson.AnnotatedVariants[i]; - - csqs.AddRange( - jsonVariant.Transcripts.Where(x => x.Transcript.IsCanonical) - .Select(transcript => new CsqEntry - { - Allele = genotypeIndex.ToString(), - Feature = transcript.Transcript.Id.WithVersion, - FeatureType = CsqCommon.TranscriptFeatureType, - Symbol = transcript.Transcript.Gene.Symbol, - Consequence = transcript.Consequences == null ? null : string.Join("&", transcript.Consequences.Select(ConsequenceUtil.GetConsequence)) - })); - - csqs.AddRange(jsonVariant.RegulatoryRegions.Select(regulatoryRegion => new CsqEntry - { - Allele = genotypeIndex.ToString(), - Consequence = string.Join("&", regulatoryRegion.Consequences.Select(ConsequenceUtil.GetConsequence)), - Feature = regulatoryRegion.RegulatoryRegion.Id.WithoutVersion, - FeatureType = CsqCommon.RegulatoryFeatureType - })); - } - } - } +using System.Collections.Generic; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Text; +using OptimizedCore; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.IO; + +namespace VariantAnnotation.IO.VcfWriter +{ + public sealed class VcfConversion + { + private const string DbSnpKeyName = "dbsnp"; + private const string OneKgKeyName = "oneKg"; + private const string RefMinorKeyName = "RefMinor"; + private const string GlobalAlleleKeyName = "globalAllele"; + + private readonly StringBuilder _sb = new StringBuilder(); + private readonly StringBuilder _csqInfoBuilder = new StringBuilder(); + private readonly List _csqtStrings = new List(); + private readonly List _csqrStrings = new List(); + + public string Convert(IAnnotatedPosition annotatedPosition) + { + _sb.Clear(); + + var fields = annotatedPosition.Position.VcfFields; + + // add all of the fields before the info field + for (var vcfIndex = 0; vcfIndex < VcfCommon.IdIndex; vcfIndex++) + { + _sb.Append(fields[vcfIndex]); + _sb.Append('\t'); + } + + // add dbSNP id + string dbSnpId = ExtractDbId(annotatedPosition); + _sb.Append(dbSnpId); + _sb.Append('\t'); + + for (int vcfIndex = VcfCommon.IdIndex + 1; vcfIndex < VcfCommon.InfoIndex; vcfIndex++) + { + _sb.Append(fields[vcfIndex]); + _sb.Append('\t'); + } + + AddInfoField(annotatedPosition, _sb); + + // add all of the fields after the info field + int numColumns = fields.Length; + for (int vcfIndex = VcfCommon.InfoIndex + 1; vcfIndex < numColumns; vcfIndex++) + { + _sb.Append('\t'); + _sb.Append(fields[vcfIndex]); + } + + return _sb.ToString(); + } + + private static string ExtractDbId(IAnnotatedPosition annotatedPosition) + { + var dbSnp = new VcfField(); + + var nonDbsnpIds = GetNonDbsnpIds(annotatedPosition.Position.VcfFields[VcfCommon.IdIndex]); + + if (nonDbsnpIds != null) foreach (string nonDbsnpId in nonDbsnpIds) dbSnp.Add(nonDbsnpId); + + foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) + { + foreach (var suppAnnotation in annotatedVariant.SupplementaryAnnotations) + { + if (suppAnnotation.SaDataSource.KeyName != DbSnpKeyName) continue; + foreach (string s in suppAnnotation.GetVcfStrings()) + { + dbSnp.Add(s); + } + } + } + + return dbSnp.GetString(""); + } + + private static IEnumerable GetNonDbsnpIds(string idField) + { + if (idField == null || idField == ".") return null; + var idList = idField.OptimizedSplit(';').Where(id => !id.StartsWith("rs")).ToList(); + + return idList.Count == 0 ? null : idList; + } + + + private void AddInfoField(IAnnotatedPosition annotatedPosition, StringBuilder sb) + { + var infoEntries = new VcfField(); + string infoField = annotatedPosition.Position.InfoData.UpdatedInfoField; + + if (!string.IsNullOrEmpty(infoField)) + { + infoEntries.Add(infoField); + } + + ExtractInfo(annotatedPosition, infoEntries); + + infoField = infoEntries.GetString(""); + + // remove . + if (infoField == ".") infoField = ""; + + sb.Append(infoField); + + var csqs = new List(); + + ExtractCsqs(annotatedPosition, csqs); + + if (csqs.Count != 0 && infoField.Length > 0) sb.Append(";"); + + // append CSQ tags using delegate from annotator + sb.Append(GetCsqtAndCsqrVcfInfo(csqs)); + + if (csqs.Count == 0 && infoField.Length == 0) + { + sb.Append("."); + } + } + + private static void ExtractInfo(IAnnotatedPosition annotatedPosition, VcfField infoEntries) + { + var alleleFreq1000G = new VcfInfoKeyValue("AF1000G"); + var ancestralAllele = new VcfPositionalInfo("AA"); + var phyloP = new VcfPositionalInfo("phyloP"); + + var suppAnnotationSources = new Dictionary(); + var isSaArrayInfo = new Dictionary(); + int numInputAltAlleles = annotatedPosition.Position.AltAlleles.Length; + + UpdateSaAnnoInfo(annotatedPosition, suppAnnotationSources, isSaArrayInfo); + + foreach (var kvp in suppAnnotationSources) + { + if (isSaArrayInfo[kvp.Key]) continue; + for (var i = 0; i < numInputAltAlleles; i++) kvp.Value.Add(null); + } + + for (var i = 0; i < numInputAltAlleles; i++) + { + alleleFreq1000G.Add(null); + } + + var inputGenotypeIndex = GetInputGenotypeIndex(annotatedPosition.Position.AltAlleles, annotatedPosition.AnnotatedVariants); + + // understand the number of annotation contains in the whole vcf line + for (var i = 0; i < annotatedPosition.AnnotatedVariants.Length; i++) + { + var annotatedVariant = annotatedPosition.AnnotatedVariants[i]; + int genotypeIndex = inputGenotypeIndex[i] + 1; + if (annotatedVariant.Variant.IsRefMinor) infoEntries.Add("RefMinor"); + + phyloP.AddValue(annotatedVariant.PhylopScore?.ToString(CultureInfo.InvariantCulture)); + + ProcessSaAnnotations(alleleFreq1000G, ancestralAllele, suppAnnotationSources, annotatedVariant, genotypeIndex); + } + + foreach (var value in suppAnnotationSources.Values) infoEntries.Add(value.GetString()); + + infoEntries.Add(ancestralAllele.GetString()); + infoEntries.Add(alleleFreq1000G.GetString()); + infoEntries.Add(phyloP.GetString()); + } + + private static void ProcessSaAnnotations(VcfInfoKeyValue alleleFreq1000G, VcfPositionalInfo ancestralAllele, Dictionary suppAnnotationSources, IAnnotatedVariant annotatedVariant, int genotypeIndex) + { + foreach (var sa in annotatedVariant.SupplementaryAnnotations) + { + if (!sa.SaDataSource.MatchByAllele && !sa.IsAlleleSpecific && sa.SaDataSource.KeyName != GlobalAlleleKeyName) continue; + if (sa.SaDataSource.KeyName == DbSnpKeyName) continue; + if (sa.SaDataSource.KeyName == RefMinorKeyName) continue; + + ProcessVcfAnnotations(alleleFreq1000G, ancestralAllele, suppAnnotationSources, genotypeIndex, sa); + } + } + + private static void ProcessVcfAnnotations(VcfInfoKeyValue alleleFreq1000G, VcfPositionalInfo ancestralAllele, Dictionary suppAnnotationSources, int genotypeIndex, IAnnotatedSaDataSource sa) + { + foreach (string vcfAnnotation in sa.GetVcfStrings()) + { + if (string.IsNullOrEmpty(vcfAnnotation)) continue; + + if (sa.SaDataSource.KeyName == OneKgKeyName) + { + var contents = vcfAnnotation.OptimizedSplit(';'); + string freq = contents[0]; + string ancestryAllele = string.IsNullOrEmpty(contents[1]) ? null : contents[1]; + + alleleFreq1000G.Add(freq, genotypeIndex); + ancestralAllele.AddValue(ancestryAllele); + continue; + } + + if (sa.SaDataSource.IsArray && sa.IsAlleleSpecific) + { + suppAnnotationSources[sa.SaDataSource.KeyName].Add( + genotypeIndex.ToString(CultureInfo.InvariantCulture) + '|' + vcfAnnotation); + } + else if (!sa.SaDataSource.IsArray) + { + suppAnnotationSources[sa.SaDataSource.KeyName].Add(vcfAnnotation, genotypeIndex); + } + } + } + + private static void UpdateSaAnnoInfo(IAnnotatedPosition annotatedPosition, Dictionary suppAnnotationSources, Dictionary isSaArrayInfo) + { + foreach (var alternateAllele in annotatedPosition.AnnotatedVariants) + { + foreach (var sa in alternateAllele.SupplementaryAnnotations) + { + if (!suppAnnotationSources.ContainsKey(sa.SaDataSource.KeyName)) + { + suppAnnotationSources[sa.SaDataSource.KeyName] = new VcfInfoKeyValue(sa.SaDataSource.VcfkeyName); + isSaArrayInfo[sa.SaDataSource.KeyName] = sa.SaDataSource.IsArray; + } + } + } + } + + private static int[] GetInputGenotypeIndex(string[] positionAltAlleles, IAnnotatedVariant[] annotatedPositionAnnotatedVariants) + { + + int numAnnotatedVar = annotatedPositionAnnotatedVariants.Length; + // alt allele is or . , and this is a refMinor site + if (positionAltAlleles.Length == 1 && VcfCommon.ReferenceAltAllele.Contains(positionAltAlleles[0]) && numAnnotatedVar == 1) + return new []{0}; + + var inputGenotypeIndex = new int[numAnnotatedVar]; + var annotatedVarIndex = 0; + + for (var inputIndex = 0; inputIndex < positionAltAlleles.Length && annotatedVarIndex < numAnnotatedVar; inputIndex++) + { + if (VcfCommon.NonInformativeAltAllele.Contains(positionAltAlleles[inputIndex])) continue; + inputGenotypeIndex[annotatedVarIndex] = inputIndex; + annotatedVarIndex++; + } + + if (annotatedVarIndex < numAnnotatedVar) + throw new InvalidDataException($"There are unannotated variants! Input alternative alleles: {string.Join(",", positionAltAlleles)}; annotated alleles: {string.Join(",", annotatedPositionAnnotatedVariants.Select(x => x.Variant.AltAllele))}"); + return inputGenotypeIndex; + } + + /// + /// returns the CSQT string as specified by this annotator + /// + private static string GetCsqtString(CsqEntry csq) + { + return csq.Allele + '|' + + csq.Symbol + '|' + + csq.Feature + '|' + + csq.Consequence; + } + + /// + /// returns the CSQR string as specified by this annotator + /// + private static string GetCsqrString(CsqEntry csq) + { + return csq.Allele + '|' + + csq.Feature + '|' + + csq.Consequence; + } + + /// + /// returns a concatenated vcf INFO field string containing the CSQT and CSQR tags + /// + private string GetCsqtAndCsqrVcfInfo(List csqList) + { + // make sure we have some tags + int numCsqTags = csqList.Count; + if (numCsqTags == 0) return null; + + // build our vcf INFO fields + _csqInfoBuilder.Clear(); + _csqtStrings.Clear(); + _csqrStrings.Clear(); + + foreach (var csqEntry in csqList) + { + // may be null in unit tests + if (csqEntry.FeatureType == null) + { + // assuming such cases to be transcript type + _csqtStrings.Add(GetCsqtString(csqEntry)); + continue; + } + + if (csqEntry.FeatureType == CsqCommon.TranscriptFeatureType) + { + _csqtStrings.Add(GetCsqtString(csqEntry)); + } + else if (csqEntry.FeatureType == CsqCommon.RegulatoryFeatureType) + { + _csqrStrings.Add(GetCsqrString(csqEntry)); + } + } + + bool hasCsqT = _csqtStrings.Count > 0; + bool hasCsqR = _csqrStrings.Count > 0; + + if (hasCsqT) _csqInfoBuilder.Append("CSQT=" + string.Join(",", _csqtStrings)); + if (hasCsqT && hasCsqR) _csqInfoBuilder.Append(';'); + if (hasCsqR) _csqInfoBuilder.Append("CSQR=" + string.Join(",", _csqrStrings)); + + return _csqInfoBuilder.ToString(); + } + + private static void ExtractCsqs(IAnnotatedPosition unifiedJson, List csqs) + { + for (int i = 0; i < unifiedJson.AnnotatedVariants.Length; i++) + { + int genotypeIndex = i + 1; + var jsonVariant = unifiedJson.AnnotatedVariants[i]; + + csqs.AddRange( + jsonVariant.Transcripts.Where(x => x.Transcript.IsCanonical) + .Select(transcript => new CsqEntry + { + Allele = genotypeIndex.ToString(), + Feature = transcript.Transcript.Id.WithVersion, + FeatureType = CsqCommon.TranscriptFeatureType, + Symbol = transcript.Transcript.Gene.Symbol, + Consequence = transcript.Consequences == null ? null : string.Join("&", transcript.Consequences.Select(ConsequenceUtil.GetConsequence)) + })); + + csqs.AddRange(jsonVariant.RegulatoryRegions.Select(regulatoryRegion => new CsqEntry + { + Allele = genotypeIndex.ToString(), + Consequence = string.Join("&", regulatoryRegion.Consequences.Select(ConsequenceUtil.GetConsequence)), + Feature = regulatoryRegion.RegulatoryRegion.Id.WithoutVersion, + FeatureType = CsqCommon.RegulatoryFeatureType + })); + } + } + } } \ No newline at end of file diff --git a/VariantAnnotation/IO/VcfWriter/VcfField.cs b/VariantAnnotation/IO/VcfWriter/VcfField.cs index 47b13178..908bc3d9 100644 --- a/VariantAnnotation/IO/VcfWriter/VcfField.cs +++ b/VariantAnnotation/IO/VcfWriter/VcfField.cs @@ -1,42 +1,42 @@ -using System.Collections.Generic; - -namespace VariantAnnotation.IO.VcfWriter -{ - public sealed class VcfField - { - #region members - - private readonly HashSet _entries; - - #endregion - - // constructor - public VcfField() - { - _entries = new HashSet(); - } - - public void Add(string s) - { - if (s == null) return; - _entries.Add(s); - } - - - /// - /// returns a string representation of the VCF ID field - /// - public string GetString(string previousEntries) - { - if (_entries.Count == 0) - { - return string.IsNullOrEmpty(previousEntries) ? VcfInfoKeyValue.VcfEmptyEntry : previousEntries; - } - - var s = string.Join(";", _entries); - - if (string.IsNullOrEmpty(previousEntries)) return s; - return previousEntries + ";" + s; - } - } +using System.Collections.Generic; + +namespace VariantAnnotation.IO.VcfWriter +{ + public sealed class VcfField + { + #region members + + private readonly HashSet _entries; + + #endregion + + // constructor + public VcfField() + { + _entries = new HashSet(); + } + + public void Add(string s) + { + if (s == null) return; + _entries.Add(s); + } + + + /// + /// returns a string representation of the VCF ID field + /// + public string GetString(string previousEntries) + { + if (_entries.Count == 0) + { + return string.IsNullOrEmpty(previousEntries) ? VcfInfoKeyValue.VcfEmptyEntry : previousEntries; + } + + var s = string.Join(";", _entries); + + if (string.IsNullOrEmpty(previousEntries)) return s; + return previousEntries + ";" + s; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/IO/VcfWriter/VcfInfoKeyValue.cs b/VariantAnnotation/IO/VcfWriter/VcfInfoKeyValue.cs index 942e74d7..3aa03d29 100644 --- a/VariantAnnotation/IO/VcfWriter/VcfInfoKeyValue.cs +++ b/VariantAnnotation/IO/VcfWriter/VcfInfoKeyValue.cs @@ -1,74 +1,74 @@ -using System; -using System.Collections.Generic; - -namespace VariantAnnotation.IO.VcfWriter -{ - public sealed class VcfInfoKeyValue - { - #region members - - private readonly List _entries; - private bool _containsNonEmptyEntries; - private readonly string _infoFieldName; - - public const string VcfEmptyEntry = "."; - - #endregion - - // constructor - public VcfInfoKeyValue(string infoFieldName) - { - _infoFieldName = infoFieldName; - _entries = new List(); - } - - public void Add(string s) - { - if (s != null) - { - _entries.Add(s); - _containsNonEmptyEntries = true; - } - else - { - _entries.Add(VcfEmptyEntry); - } - } - - /// - /// used to update the non array info field. we initiate the info with empty emtry for every allele and update them if new value provided from supplelemtary annotation. - /// - /// - /// - public void Add(string s, int genotypeIndex) - { - if (_entries.Count < genotypeIndex) - { - throw new ArgumentOutOfRangeException(nameof(genotypeIndex)); - } - if (genotypeIndex <= 0) - { - throw new NotSupportedException("No info field should be written for reference allele"); - } - if (s != null) - { - _entries[genotypeIndex - 1] = s; - _containsNonEmptyEntries = true; - } - } - - /// - /// returns a string representation of the VCF info field - /// - public string GetString() - { - if (!_containsNonEmptyEntries) return null; - if (_infoFieldName == "AA") - { - return _infoFieldName + "=" + _entries[0]; - } - - return _infoFieldName + "=" + string.Join(",", _entries); - } - } +using System; +using System.Collections.Generic; + +namespace VariantAnnotation.IO.VcfWriter +{ + public sealed class VcfInfoKeyValue + { + #region members + + private readonly List _entries; + private bool _containsNonEmptyEntries; + private readonly string _infoFieldName; + + public const string VcfEmptyEntry = "."; + + #endregion + + // constructor + public VcfInfoKeyValue(string infoFieldName) + { + _infoFieldName = infoFieldName; + _entries = new List(); + } + + public void Add(string s) + { + if (s != null) + { + _entries.Add(s); + _containsNonEmptyEntries = true; + } + else + { + _entries.Add(VcfEmptyEntry); + } + } + + /// + /// used to update the non array info field. we initiate the info with empty emtry for every allele and update them if new value provided from supplelemtary annotation. + /// + /// + /// + public void Add(string s, int genotypeIndex) + { + if (_entries.Count < genotypeIndex) + { + throw new ArgumentOutOfRangeException(nameof(genotypeIndex)); + } + if (genotypeIndex <= 0) + { + throw new NotSupportedException("No info field should be written for reference allele"); + } + if (s != null) + { + _entries[genotypeIndex - 1] = s; + _containsNonEmptyEntries = true; + } + } + + /// + /// returns a string representation of the VCF info field + /// + public string GetString() + { + if (!_containsNonEmptyEntries) return null; + if (_infoFieldName == "AA") + { + return _infoFieldName + "=" + _entries[0]; + } + + return _infoFieldName + "=" + string.Join(",", _entries); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Logger/ConsoleLogger.cs b/VariantAnnotation/Logger/ConsoleLogger.cs index b4b9769a..e680cccb 100644 --- a/VariantAnnotation/Logger/ConsoleLogger.cs +++ b/VariantAnnotation/Logger/ConsoleLogger.cs @@ -1,14 +1,14 @@ -using System; -using VariantAnnotation.Interface; - -namespace VariantAnnotation.Logger -{ - public sealed class ConsoleLogger : ILogger - { - public void WriteLine() => Console.WriteLine(); - public void WriteLine(string s) => Console.WriteLine(s); - public void Write(string s) => Console.Write(s); - public void SetBold() => Console.ForegroundColor = ConsoleColor.Yellow; - public void ResetColor() => Console.ResetColor(); - } -} +using System; +using VariantAnnotation.Interface; + +namespace VariantAnnotation.Logger +{ + public sealed class ConsoleLogger : ILogger + { + public void WriteLine() => Console.WriteLine(); + public void WriteLine(string s) => Console.WriteLine(s); + public void Write(string s) => Console.Write(s); + public void SetBold() => Console.ForegroundColor = ConsoleColor.Yellow; + public void ResetColor() => Console.ResetColor(); + } +} diff --git a/VariantAnnotation/Logger/NullLogger.cs b/VariantAnnotation/Logger/NullLogger.cs index 3774c471..7168b854 100644 --- a/VariantAnnotation/Logger/NullLogger.cs +++ b/VariantAnnotation/Logger/NullLogger.cs @@ -1,32 +1,32 @@ -using VariantAnnotation.Interface; - -namespace VariantAnnotation.Logger -{ - public sealed class NullLogger : ILogger - { - public void WriteLine() - { - // no output desired - } - - public void WriteLine(string s) - { - // no output desired - } - - public void Write(string s) - { - // no output desired - } - - public void SetBold() - { - // no output desired - } - - public void ResetColor() - { - // no output desired - } - } -} +using VariantAnnotation.Interface; + +namespace VariantAnnotation.Logger +{ + public sealed class NullLogger : ILogger + { + public void WriteLine() + { + // no output desired + } + + public void WriteLine(string s) + { + // no output desired + } + + public void Write(string s) + { + // no output desired + } + + public void SetBold() + { + // no output desired + } + + public void ResetColor() + { + // no output desired + } + } +} diff --git a/VariantAnnotation/NSA/NsaReader.cs b/VariantAnnotation/NSA/NsaReader.cs index 18e69d6f..9d96665c 100644 --- a/VariantAnnotation/NSA/NsaReader.cs +++ b/VariantAnnotation/NSA/NsaReader.cs @@ -1,146 +1,146 @@ -using System.Collections.Generic; -using System.IO; -using Compression.Algorithms; -using ErrorHandling.Exceptions; -using Genome; -using IO; -using VariantAnnotation.Interface.Providers; -using VariantAnnotation.Interface.SA; -using VariantAnnotation.SA; - -namespace VariantAnnotation.NSA -{ - public sealed class AnnotationItem - { - public readonly int Position; - public readonly byte[] Data; - - public AnnotationItem(int position, byte[] data) - { - Position = position; - Data = data; - } - } - - public sealed class NsaReader :INsaReader - { - private readonly ExtendedBinaryReader _reader; - public GenomeAssembly Assembly { get; } - private readonly ChunkedIndex _index; - public IDataSourceVersion Version { get; } - - private readonly NsaBlock _block; - - public string JsonKey { get; } - public bool MatchByAllele { get; } - public bool IsArray { get; } - public bool IsPositional { get; } - - private AnnotationItem[] _annotations; - private int _annotationsCount; - - public NsaReader(ExtendedBinaryReader reader, Stream indexStream, int blockSize = SaCommon.DefaultBlockSize) - { - _reader = reader; - _block = new NsaBlock(new Zstandard(), blockSize); - - _index = new ChunkedIndex(indexStream); - Assembly = _index.Assembly; - Version = _index.Version; - JsonKey = _index.JsonKey; - MatchByAllele = _index.MatchByAllele; - IsArray = _index.IsArray; - IsPositional = _index.IsPositional; - - if (_index.SchemaVersion != SaCommon.SchemaVersion) throw new UserErrorException($"SA schema version mismatch. Expected {SaCommon.SchemaVersion}, observed {_index.SchemaVersion} for {JsonKey}"); - } - - public void PreLoad(IChromosome chrom, List positions) - { - if (positions == null || positions.Count == 0) return; - - _annotations = new AnnotationItem[positions.Count]; - _annotationsCount = 0; - (long start, _, int blockCount) = _index.GetFileRange(chrom.Index, positions[0], positions[positions.Count-1]); - if (start == -1) return; - _reader.BaseStream.Position = start; - - var posIndex = 0; - while (blockCount > 0 && posIndex < positions.Count) - { - _block.Read(_reader); - posIndex = GetAnnotationsFrom(positions, posIndex); - blockCount--; - } - } - - private int GetAnnotationsFrom(List positions, int i) - { - foreach ((int position, byte[] data) annotation in _block.GetAnnotations()) - { - if (annotation.position < positions[i]) continue; - - while (i < positions.Count && positions[i] < annotation.position) i++; - if (i >= positions.Count) break; - - var position = positions[i]; - - if (position != annotation.position) continue; - - _annotations[_annotationsCount++] = new AnnotationItem(position, annotation.data); - } - - return i; - } - - private IEnumerable<(string refAllele, string altAllele, string jsonString)> ExtractAnnotations(byte[] data) - { - using (var reader = new ExtendedBinaryReader(new MemoryStream(data))) - { - if (IsPositional) - { - var positionalAnno = reader.ReadString(); - return new List<(string, string, string)> { (null, null, positionalAnno) }; - } - - var count = reader.ReadOptInt32(); - var annotations = new (string, string, string)[count]; - for (var i = 0; i < count; i++) - { - string refAllele = reader.ReadAsciiString(); - string altAllele = reader.ReadAsciiString(); - string annotation = reader.ReadString(); - annotations[i] = (refAllele ?? "", altAllele ?? "", annotation); - } - - return annotations; - } - } - - public IEnumerable<(string refAllele, string altAllele, string annotation)> GetAnnotation(IChromosome chrom, int position) - { - if (_annotations == null) return null; - var index = BinarySearch(position); - return index < 0 ? null : ExtractAnnotations(_annotations[index].Data); - } - - private int BinarySearch(int position) - { - var begin = 0; - int end = _annotationsCount - 1; - - while (begin <= end) - { - int index = begin + (end - begin >> 1); - - int ret = _annotations[index].Position.CompareTo(position); - if (ret == 0) return index; - if (ret < 0) begin = index + 1; - else end = index - 1; - } - - return ~begin; - } - - } +using System.Collections.Generic; +using System.IO; +using Compression.Algorithms; +using ErrorHandling.Exceptions; +using Genome; +using IO; +using VariantAnnotation.Interface.Providers; +using VariantAnnotation.Interface.SA; +using VariantAnnotation.SA; + +namespace VariantAnnotation.NSA +{ + public sealed class AnnotationItem + { + public readonly int Position; + public readonly byte[] Data; + + public AnnotationItem(int position, byte[] data) + { + Position = position; + Data = data; + } + } + + public sealed class NsaReader :INsaReader + { + private readonly ExtendedBinaryReader _reader; + public GenomeAssembly Assembly { get; } + private readonly ChunkedIndex _index; + public IDataSourceVersion Version { get; } + + private readonly NsaBlock _block; + + public string JsonKey { get; } + public bool MatchByAllele { get; } + public bool IsArray { get; } + public bool IsPositional { get; } + + private AnnotationItem[] _annotations; + private int _annotationsCount; + + public NsaReader(ExtendedBinaryReader reader, Stream indexStream, int blockSize = SaCommon.DefaultBlockSize) + { + _reader = reader; + _block = new NsaBlock(new Zstandard(), blockSize); + + _index = new ChunkedIndex(indexStream); + Assembly = _index.Assembly; + Version = _index.Version; + JsonKey = _index.JsonKey; + MatchByAllele = _index.MatchByAllele; + IsArray = _index.IsArray; + IsPositional = _index.IsPositional; + + if (_index.SchemaVersion != SaCommon.SchemaVersion) throw new UserErrorException($"SA schema version mismatch. Expected {SaCommon.SchemaVersion}, observed {_index.SchemaVersion} for {JsonKey}"); + } + + public void PreLoad(IChromosome chrom, List positions) + { + if (positions == null || positions.Count == 0) return; + + _annotations = new AnnotationItem[positions.Count]; + _annotationsCount = 0; + (long start, _, int blockCount) = _index.GetFileRange(chrom.Index, positions[0], positions[positions.Count-1]); + if (start == -1) return; + _reader.BaseStream.Position = start; + + var posIndex = 0; + while (blockCount > 0 && posIndex < positions.Count) + { + _block.Read(_reader); + posIndex = GetAnnotationsFrom(positions, posIndex); + blockCount--; + } + } + + private int GetAnnotationsFrom(List positions, int i) + { + foreach ((int position, byte[] data) annotation in _block.GetAnnotations()) + { + if (annotation.position < positions[i]) continue; + + while (i < positions.Count && positions[i] < annotation.position) i++; + if (i >= positions.Count) break; + + var position = positions[i]; + + if (position != annotation.position) continue; + + _annotations[_annotationsCount++] = new AnnotationItem(position, annotation.data); + } + + return i; + } + + private IEnumerable<(string refAllele, string altAllele, string jsonString)> ExtractAnnotations(byte[] data) + { + using (var reader = new ExtendedBinaryReader(new MemoryStream(data))) + { + if (IsPositional) + { + var positionalAnno = reader.ReadString(); + return new List<(string, string, string)> { (null, null, positionalAnno) }; + } + + var count = reader.ReadOptInt32(); + var annotations = new (string, string, string)[count]; + for (var i = 0; i < count; i++) + { + string refAllele = reader.ReadAsciiString(); + string altAllele = reader.ReadAsciiString(); + string annotation = reader.ReadString(); + annotations[i] = (refAllele ?? "", altAllele ?? "", annotation); + } + + return annotations; + } + } + + public IEnumerable<(string refAllele, string altAllele, string annotation)> GetAnnotation(IChromosome chrom, int position) + { + if (_annotations == null) return null; + var index = BinarySearch(position); + return index < 0 ? null : ExtractAnnotations(_annotations[index].Data); + } + + private int BinarySearch(int position) + { + var begin = 0; + int end = _annotationsCount - 1; + + while (begin <= end) + { + int index = begin + (end - begin >> 1); + + int ret = _annotations[index].Position.CompareTo(position); + if (ret == 0) return index; + if (ret < 0) begin = index + 1; + else end = index - 1; + } + + return ~begin; + } + + } } \ No newline at end of file diff --git a/VariantAnnotation/NSA/RefMinorDbReader.cs b/VariantAnnotation/NSA/RefMinorDbReader.cs index 800f8c8b..35bd4021 100644 --- a/VariantAnnotation/NSA/RefMinorDbReader.cs +++ b/VariantAnnotation/NSA/RefMinorDbReader.cs @@ -19,8 +19,8 @@ public RefMinorDbReader(ExtendedBinaryReader reader, ExtendedBinaryReader indexS { _reader = reader; _index = new RefMinorIndex(indexStream); - _annotations = new Dictionary(); - + _annotations = new Dictionary(); + if (_index.SchemaVersion != SaCommon.SchemaVersion) throw new UserErrorException($"SA schema version mismatch. Expected {SaCommon.SchemaVersion}, observed {_index.SchemaVersion}"); } diff --git a/VariantAnnotation/NSA/RefMinorIndex.cs b/VariantAnnotation/NSA/RefMinorIndex.cs index dad1a6a8..3ce3a720 100644 --- a/VariantAnnotation/NSA/RefMinorIndex.cs +++ b/VariantAnnotation/NSA/RefMinorIndex.cs @@ -21,7 +21,7 @@ public RefMinorIndex(ExtendedBinaryWriter writer, GenomeAssembly assembly, DataS _assembly = assembly; _version = version; - SchemaVersion = schemaVersion; + SchemaVersion = schemaVersion; } private ushort _chromIndex = ushort.MaxValue; diff --git a/VariantAnnotation/PerformanceMetrics.cs b/VariantAnnotation/PerformanceMetrics.cs index c74f2b2f..050dcc5a 100644 --- a/VariantAnnotation/PerformanceMetrics.cs +++ b/VariantAnnotation/PerformanceMetrics.cs @@ -1,69 +1,69 @@ -using System; -using CommandLine.Utilities; -using Genome; -using VariantAnnotation.Interface; - -namespace VariantAnnotation -{ - public sealed class PerformanceMetrics - { - private readonly Benchmark _benchmark = new Benchmark(); - private readonly ILogger _logger; - - private const int LineLength = 75; - private const int ReferenceNameLength = 51; - - private int _numVariantsInReference; - private bool _hasStartedAnnotation; - - public PerformanceMetrics(ILogger logger) - { - _logger = logger; - ShowTableHeader(); - } - - private void ShowTableHeader() - { - var divider = new string('-', LineLength); - _logger.SetBold(); - _logger.WriteLine("Reference Time Variants/s"); - _logger.ResetColor(); - _logger.WriteLine(divider); - } - - public void StartAnnotatingReference(IChromosome chromosome) - { - if (_hasStartedAnnotation) ShowAnnotationTime(); - - ShowReferenceName(chromosome.UcscName); - - _benchmark.Reset(); - _hasStartedAnnotation = true; - } - - private void ShowReferenceName(string referenceName) - { - int fillerLength = ReferenceNameLength - referenceName.Length + 1; - - if (fillerLength < 1) - { - throw new InvalidOperationException("Unable to display the performance metrics, the reference sequence name is too long."); - } - - var filler = new string(' ', fillerLength); - _logger.Write($"{referenceName}{filler}"); - } - - public void ShowAnnotationTime() - { - var annotationTime = Benchmark.ToHumanReadable(_benchmark.GetElapsedTime()); - - _benchmark.GetElapsedIterationTime(_numVariantsInReference, "variants", out double variantsPerSecond); - _numVariantsInReference = 0; - - _logger.WriteLine($"{annotationTime} {variantsPerSecond,12:N0}"); - } - - public void Increment() => _numVariantsInReference++; - } +using System; +using CommandLine.Utilities; +using Genome; +using VariantAnnotation.Interface; + +namespace VariantAnnotation +{ + public sealed class PerformanceMetrics + { + private readonly Benchmark _benchmark = new Benchmark(); + private readonly ILogger _logger; + + private const int LineLength = 75; + private const int ReferenceNameLength = 51; + + private int _numVariantsInReference; + private bool _hasStartedAnnotation; + + public PerformanceMetrics(ILogger logger) + { + _logger = logger; + ShowTableHeader(); + } + + private void ShowTableHeader() + { + var divider = new string('-', LineLength); + _logger.SetBold(); + _logger.WriteLine("Reference Time Variants/s"); + _logger.ResetColor(); + _logger.WriteLine(divider); + } + + public void StartAnnotatingReference(IChromosome chromosome) + { + if (_hasStartedAnnotation) ShowAnnotationTime(); + + ShowReferenceName(chromosome.UcscName); + + _benchmark.Reset(); + _hasStartedAnnotation = true; + } + + private void ShowReferenceName(string referenceName) + { + int fillerLength = ReferenceNameLength - referenceName.Length + 1; + + if (fillerLength < 1) + { + throw new InvalidOperationException("Unable to display the performance metrics, the reference sequence name is too long."); + } + + var filler = new string(' ', fillerLength); + _logger.Write($"{referenceName}{filler}"); + } + + public void ShowAnnotationTime() + { + var annotationTime = Benchmark.ToHumanReadable(_benchmark.GetElapsedTime()); + + _benchmark.GetElapsedIterationTime(_numVariantsInReference, "variants", out double variantsPerSecond); + _numVariantsInReference = 0; + + _logger.WriteLine($"{annotationTime} {variantsPerSecond,12:N0}"); + } + + public void Increment() => _numVariantsInReference++; + } } \ No newline at end of file diff --git a/VariantAnnotation/Providers/ConservationScoreProvider.cs b/VariantAnnotation/Providers/ConservationScoreProvider.cs index 8aafdebe..b109a7f7 100644 --- a/VariantAnnotation/Providers/ConservationScoreProvider.cs +++ b/VariantAnnotation/Providers/ConservationScoreProvider.cs @@ -1,42 +1,42 @@ -using System; -using System.Collections.Generic; -using System.IO; -using Genome; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.Providers; -using VariantAnnotation.PhyloP; -using Variants; - -namespace VariantAnnotation.Providers -{ - public sealed class ConservationScoreProvider : IAnnotationProvider - { - private readonly NpdReader _phylopReader; - - public string Name { get; } - public GenomeAssembly Assembly => _phylopReader.Assembly; - public IEnumerable DataSourceVersions { get; } - - public ConservationScoreProvider(Stream dbStream, Stream indexStream) - { - _phylopReader = new NpdReader(dbStream, indexStream); - Name = "Conservation score provider"; - DataSourceVersions = new[] { _phylopReader.Version }; - } - - public void Annotate(IAnnotatedPosition annotatedPosition) - { - foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) - { - if (annotatedVariant.Variant.Type != VariantType.SNV) continue; - annotatedVariant.PhylopScore = _phylopReader.GetAnnotation(annotatedPosition.Position.Chromosome, annotatedVariant.Variant.Start); - } - } - - public void PreLoad(IChromosome chromosome, List positions) - { - throw new NotImplementedException(); - } - - } +using System; +using System.Collections.Generic; +using System.IO; +using Genome; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.Providers; +using VariantAnnotation.PhyloP; +using Variants; + +namespace VariantAnnotation.Providers +{ + public sealed class ConservationScoreProvider : IAnnotationProvider + { + private readonly NpdReader _phylopReader; + + public string Name { get; } + public GenomeAssembly Assembly => _phylopReader.Assembly; + public IEnumerable DataSourceVersions { get; } + + public ConservationScoreProvider(Stream dbStream, Stream indexStream) + { + _phylopReader = new NpdReader(dbStream, indexStream); + Name = "Conservation score provider"; + DataSourceVersions = new[] { _phylopReader.Version }; + } + + public void Annotate(IAnnotatedPosition annotatedPosition) + { + foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) + { + if (annotatedVariant.Variant.Type != VariantType.SNV) continue; + annotatedVariant.PhylopScore = _phylopReader.GetAnnotation(annotatedPosition.Position.Chromosome, annotatedVariant.Variant.Start); + } + } + + public void PreLoad(IChromosome chromosome, List positions) + { + throw new NotImplementedException(); + } + + } } \ No newline at end of file diff --git a/VariantAnnotation/Providers/DataSourceVersion.cs b/VariantAnnotation/Providers/DataSourceVersion.cs index 7e76bb21..8b884c9c 100644 --- a/VariantAnnotation/Providers/DataSourceVersion.cs +++ b/VariantAnnotation/Providers/DataSourceVersion.cs @@ -1,83 +1,83 @@ -using System.Collections.Generic; -using System.Text; -using IO; -using VariantAnnotation.Interface.Providers; -using VariantAnnotation.IO; -using VariantAnnotation.Utilities; - -namespace VariantAnnotation.Providers -{ - public sealed class DataSourceVersion : IDataSourceVersion, ISerializable - { - public string Name { get; } - public string Description { get; } - public string Version { get; } - public long ReleaseDateTicks { get; } - - public DataSourceVersion(string name, string version, long releaseDateTicks, string description = null) - { - Name = name; - Description = description; - Version = version; - Description = description; - ReleaseDateTicks = releaseDateTicks; - } - - public static IDataSourceVersion Read(ExtendedBinaryReader reader) - { - var name = reader.ReadAsciiString(); - var version = reader.ReadAsciiString(); - var releaseDateTicks = reader.ReadOptInt64(); - var description = reader.ReadAsciiString(); - return new DataSourceVersion(name, version, releaseDateTicks, description); - } - - public void Write(IExtendedBinaryWriter writer) - { - writer.WriteOptAscii(Name); - writer.WriteOptAscii(Version); - writer.WriteOpt(ReleaseDateTicks); - writer.WriteOptAscii(Description); - } - - private string GetReleaseDate() => Date.GetDate(ReleaseDateTicks); - - public override string ToString() => "dataSource=" + Name + ",version:" + Version + ",release date:" + GetReleaseDate(); - - public void SerializeJson(StringBuilder sb) - { - var jsonObject = new JsonObject(sb); - - sb.Append(JsonObject.OpenBrace); - jsonObject.AddStringValue("name", Name); - jsonObject.AddStringValue("version", Version); - if (Description != null) jsonObject.AddStringValue("description", Description.Trim()); - if (ReleaseDateTicks != 0) jsonObject.AddStringValue("releaseDate", GetReleaseDate()); - sb.Append(JsonObject.CloseBrace); - } - - - } - public sealed class DataSourceVersionComparer : EqualityComparer - { - public override bool Equals(IDataSourceVersion x, IDataSourceVersion y) - { - return string.Equals(x.Name, y.Name) && - string.Equals(x.Description, y.Description) && - string.Equals(x.Version, y.Version) && - x.ReleaseDateTicks == y.ReleaseDateTicks; - } - - public override int GetHashCode(IDataSourceVersion obj) - { - unchecked - { - var hashCode = obj.Name.GetHashCode(); - if (obj.Description != null) hashCode = (hashCode * 397) ^ obj.Description.GetHashCode(); - if (obj.Version != null) hashCode = (hashCode * 397) ^ obj.Version.GetHashCode(); - hashCode = (hashCode * 397) ^ obj.ReleaseDateTicks.GetHashCode(); - return hashCode; - } - } - } +using System.Collections.Generic; +using System.Text; +using IO; +using VariantAnnotation.Interface.Providers; +using VariantAnnotation.IO; +using VariantAnnotation.Utilities; + +namespace VariantAnnotation.Providers +{ + public sealed class DataSourceVersion : IDataSourceVersion, ISerializable + { + public string Name { get; } + public string Description { get; } + public string Version { get; } + public long ReleaseDateTicks { get; } + + public DataSourceVersion(string name, string version, long releaseDateTicks, string description = null) + { + Name = name; + Description = description; + Version = version; + Description = description; + ReleaseDateTicks = releaseDateTicks; + } + + public static IDataSourceVersion Read(ExtendedBinaryReader reader) + { + var name = reader.ReadAsciiString(); + var version = reader.ReadAsciiString(); + var releaseDateTicks = reader.ReadOptInt64(); + var description = reader.ReadAsciiString(); + return new DataSourceVersion(name, version, releaseDateTicks, description); + } + + public void Write(IExtendedBinaryWriter writer) + { + writer.WriteOptAscii(Name); + writer.WriteOptAscii(Version); + writer.WriteOpt(ReleaseDateTicks); + writer.WriteOptAscii(Description); + } + + private string GetReleaseDate() => Date.GetDate(ReleaseDateTicks); + + public override string ToString() => "dataSource=" + Name + ",version:" + Version + ",release date:" + GetReleaseDate(); + + public void SerializeJson(StringBuilder sb) + { + var jsonObject = new JsonObject(sb); + + sb.Append(JsonObject.OpenBrace); + jsonObject.AddStringValue("name", Name); + jsonObject.AddStringValue("version", Version); + if (Description != null) jsonObject.AddStringValue("description", Description.Trim()); + if (ReleaseDateTicks != 0) jsonObject.AddStringValue("releaseDate", GetReleaseDate()); + sb.Append(JsonObject.CloseBrace); + } + + + } + public sealed class DataSourceVersionComparer : EqualityComparer + { + public override bool Equals(IDataSourceVersion x, IDataSourceVersion y) + { + return string.Equals(x.Name, y.Name) && + string.Equals(x.Description, y.Description) && + string.Equals(x.Version, y.Version) && + x.ReleaseDateTicks == y.ReleaseDateTicks; + } + + public override int GetHashCode(IDataSourceVersion obj) + { + unchecked + { + var hashCode = obj.Name.GetHashCode(); + if (obj.Description != null) hashCode = (hashCode * 397) ^ obj.Description.GetHashCode(); + if (obj.Version != null) hashCode = (hashCode * 397) ^ obj.Version.GetHashCode(); + hashCode = (hashCode * 397) ^ obj.ReleaseDateTicks.GetHashCode(); + return hashCode; + } + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Providers/NsaProvider.cs b/VariantAnnotation/Providers/NsaProvider.cs index adcdce4a..f33627ea 100644 --- a/VariantAnnotation/Providers/NsaProvider.cs +++ b/VariantAnnotation/Providers/NsaProvider.cs @@ -1,204 +1,204 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using CommandLine.Utilities; -using ErrorHandling.Exceptions; -using Genome; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.Providers; -using VariantAnnotation.Interface.SA; -using VariantAnnotation.NSA; -using Variants; - -namespace VariantAnnotation.Providers -{ - public sealed class NsaProvider : IAnnotationProvider - { - public string Name => "Supplementary annotation provider"; - public GenomeAssembly Assembly { get; } - public IEnumerable DataSourceVersions { get; } - private readonly INsaReader[] _nsaReaders; - private readonly INsiReader[] _nsiReaders; - - public NsaProvider(INsaReader[] nsaReaders, INsiReader[] nsiReaders) - { - _nsaReaders = nsaReaders; - _nsiReaders = nsiReaders; - - IEnumerable assemblies = null; - if (_nsaReaders != null) - { - DataSourceVersions = _nsaReaders.Select(x => x.Version); - assemblies = _nsaReaders.Select(x => x.Assembly); - } - - if (_nsiReaders != null) - { - assemblies = assemblies?.Concat(_nsiReaders.Select(x => x.Assembly)) ?? _nsiReaders.Select(x => x.Assembly); - DataSourceVersions = DataSourceVersions?.Concat(_nsiReaders.Select(x => x.Version)) ?? _nsiReaders.Select(x => x.Version); - } - - var distinctAssemblies = assemblies?.Where(x => GenomeAssemblyHelper.AutosomeAndAllosomeAssemblies.Contains(x)).Distinct().ToArray(); - if (distinctAssemblies == null || distinctAssemblies.Length > 1) - { - if (_nsaReaders != null) - foreach (INsaReader nsaReader in _nsaReaders) - { - Console.WriteLine(nsaReader.Version + "\tAssembly:" + nsaReader.Assembly); - } - if (_nsiReaders != null) - foreach (INsiReader nsiReader in _nsiReaders) - { - Console.WriteLine(nsiReader.Version + "\tAssembly:" + nsiReader.Assembly); - } - throw new UserErrorException("Multilpe genome assemblies detected in Supplementary annotation directory"); - } - - Assembly = distinctAssemblies[0]; - } - - public void Annotate(IAnnotatedPosition annotatedPosition) - { - if (_nsaReaders != null) AddSmallVariantAnnotations(annotatedPosition); - - if (_nsiReaders != null) GetStructuralVariantAnnotations(annotatedPosition); - } - - private void GetStructuralVariantAnnotations(IAnnotatedPosition annotatedPosition) - { - var needSaIntervals = annotatedPosition.AnnotatedVariants.Any(x => x.Variant.Behavior.NeedSaInterval); - var needSmallAnnotation = annotatedPosition.AnnotatedVariants.Any(x => x.Variant.Behavior.NeedSaPosition); - - foreach (INsiReader nsiReader in _nsiReaders) - { - var position = annotatedPosition.Position; - if (nsiReader.ReportFor == ReportFor.SmallVariants && !needSmallAnnotation) continue; - if (nsiReader.ReportFor == ReportFor.StructuralVariants && !needSaIntervals) continue; - - var annotations = nsiReader.GetAnnotation(position.Variants[0]); - if (annotations == null) continue; - - annotatedPosition.SupplementaryIntervals.Add(new SupplementaryAnnotation(nsiReader.JsonKey, true, false, null, annotations)); - } - - } - - private void AddSmallVariantAnnotations(IAnnotatedPosition annotatedPosition) - { - foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) - { - if (!annotatedVariant.Variant.Behavior.NeedSaPosition) continue; - AddSmallAnnotations(annotatedVariant); - - //check for interval annotations that applies to all variants - if(_nsiReaders ==null) continue; - AddLargeAnnotationsToSmallVariants(annotatedVariant); - } - } - - private void AddLargeAnnotationsToSmallVariants(IAnnotatedVariant annotatedVariant) - { - foreach (INsiReader nsiReader in _nsiReaders) - { - if (nsiReader.ReportFor == ReportFor.StructuralVariants || - nsiReader.ReportFor == ReportFor.None) continue; - - var variant = annotatedVariant.Variant; - var annotations = nsiReader.GetAnnotation(variant); - if (annotations != null) AddPositionalAnnotation(annotations, annotatedVariant, nsiReader); - } - } - - private void AddSmallAnnotations(IAnnotatedVariant annotatedVariant) - { - foreach (INsaReader nsaReader in _nsaReaders) - { - var variant = annotatedVariant.Variant; - var annotations = nsaReader.GetAnnotation(variant.Chromosome, variant.Start); - if (annotations == null) continue; - - if (nsaReader.IsPositional) - { - AddPositionalAnnotation(annotations, annotatedVariant, nsaReader); - continue; - } - - if (nsaReader.MatchByAllele) AddAlleleSpecificAnnotation(nsaReader, annotations, annotatedVariant, variant); - - else AddNonAlleleSpecificAnnotations(annotations, variant, annotatedVariant, nsaReader); - } - } - - private void AddPositionalAnnotation(IEnumerable annotations, IAnnotatedVariant annotatedVariant, INsiReader nsiReader) - { - annotatedVariant.SaList.Add(new SupplementaryAnnotation(nsiReader.JsonKey, true, true, null, annotations)); - } - - private static void AddPositionalAnnotation(IEnumerable<(string refAllele, string altAllele, string annotation)> annotations, IAnnotatedVariant annotatedVariant, - INsaReader nsaReader) - { - //e.g. ancestral allele, global minor allele - var jsonString = annotations.First().annotation; - annotatedVariant.SaList.Add(new SupplementaryAnnotation(nsaReader.JsonKey, nsaReader.IsArray, - nsaReader.IsPositional, jsonString, null)); - } - - private static void AddNonAlleleSpecificAnnotations(IEnumerable<(string refAllele, string altAllele, string annotation)> annotations, IVariant variant, - IAnnotatedVariant annotatedVariant, INsaReader nsaReader) - { - var jsonStrings = new List(); - foreach ((string refAllele, string altAllele, string jsonString) in annotations) - { - if (refAllele == variant.RefAllele && altAllele == variant.AltAllele) - jsonStrings.Add(jsonString + ",\"isAlleleSpecific\":true"); - else jsonStrings.Add(jsonString); - } - - if (jsonStrings.Count > 0) - annotatedVariant.SaList.Add(new SupplementaryAnnotation(nsaReader.JsonKey, nsaReader.IsArray, - nsaReader.IsPositional, null, jsonStrings)); - } - - private static void AddAlleleSpecificAnnotation(INsaReader nsaReader, IEnumerable<(string refAllele, string altAllele, string annotation)> annotations, - IAnnotatedVariant annotatedVariant, IVariant variant) - { - if (nsaReader.IsArray) - { - var jsonStrings = new List(); - foreach ((string refAllele, string altAllele, string jsonString) in annotations) - { - if (refAllele == variant.RefAllele && altAllele == variant.AltAllele) - jsonStrings.Add(jsonString); - } - - if (jsonStrings.Count > 0) - annotatedVariant.SaList.Add(new SupplementaryAnnotation(nsaReader.JsonKey, nsaReader.IsArray, - nsaReader.IsPositional, null, jsonStrings)); - } - else - foreach ((string refAllele, string altAllele, string jsonString) in annotations) - { - if (refAllele != variant.RefAllele || altAllele != variant.AltAllele) continue; - annotatedVariant.SaList.Add(new SupplementaryAnnotation(nsaReader.JsonKey, nsaReader.IsArray, - nsaReader.IsPositional, jsonString, null)); - break; - } - } - - public void PreLoad(IChromosome chromosome, List positions) - { - var benchmark = new Benchmark(); - Console.Write("Pre-loading SA...."); - - foreach (INsaReader nsaReader in _nsaReaders) - { - nsaReader.PreLoad(chromosome, positions); - } - - var totalTime = benchmark.GetElapsedTime(); - //var rate = totalBytes * 1.0 / (totalTime.TotalSeconds * 1_000_000);// MB/sec - Console.WriteLine($"{Benchmark.ToHumanReadable(totalTime)}");//. Data rate {rate:#.##} MB/sec"); - //Console.WriteLine($"No of http stream sources created {HttpStreamSource.Count}"); - } - } +using System; +using System.Collections.Generic; +using System.Linq; +using CommandLine.Utilities; +using ErrorHandling.Exceptions; +using Genome; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.Providers; +using VariantAnnotation.Interface.SA; +using VariantAnnotation.NSA; +using Variants; + +namespace VariantAnnotation.Providers +{ + public sealed class NsaProvider : IAnnotationProvider + { + public string Name => "Supplementary annotation provider"; + public GenomeAssembly Assembly { get; } + public IEnumerable DataSourceVersions { get; } + private readonly INsaReader[] _nsaReaders; + private readonly INsiReader[] _nsiReaders; + + public NsaProvider(INsaReader[] nsaReaders, INsiReader[] nsiReaders) + { + _nsaReaders = nsaReaders; + _nsiReaders = nsiReaders; + + IEnumerable assemblies = null; + if (_nsaReaders != null) + { + DataSourceVersions = _nsaReaders.Select(x => x.Version); + assemblies = _nsaReaders.Select(x => x.Assembly); + } + + if (_nsiReaders != null) + { + assemblies = assemblies?.Concat(_nsiReaders.Select(x => x.Assembly)) ?? _nsiReaders.Select(x => x.Assembly); + DataSourceVersions = DataSourceVersions?.Concat(_nsiReaders.Select(x => x.Version)) ?? _nsiReaders.Select(x => x.Version); + } + + var distinctAssemblies = assemblies?.Where(x => GenomeAssemblyHelper.AutosomeAndAllosomeAssemblies.Contains(x)).Distinct().ToArray(); + if (distinctAssemblies == null || distinctAssemblies.Length > 1) + { + if (_nsaReaders != null) + foreach (INsaReader nsaReader in _nsaReaders) + { + Console.WriteLine(nsaReader.Version + "\tAssembly:" + nsaReader.Assembly); + } + if (_nsiReaders != null) + foreach (INsiReader nsiReader in _nsiReaders) + { + Console.WriteLine(nsiReader.Version + "\tAssembly:" + nsiReader.Assembly); + } + throw new UserErrorException("Multilpe genome assemblies detected in Supplementary annotation directory"); + } + + Assembly = distinctAssemblies[0]; + } + + public void Annotate(IAnnotatedPosition annotatedPosition) + { + if (_nsaReaders != null) AddSmallVariantAnnotations(annotatedPosition); + + if (_nsiReaders != null) GetStructuralVariantAnnotations(annotatedPosition); + } + + private void GetStructuralVariantAnnotations(IAnnotatedPosition annotatedPosition) + { + var needSaIntervals = annotatedPosition.AnnotatedVariants.Any(x => x.Variant.Behavior.NeedSaInterval); + var needSmallAnnotation = annotatedPosition.AnnotatedVariants.Any(x => x.Variant.Behavior.NeedSaPosition); + + foreach (INsiReader nsiReader in _nsiReaders) + { + var position = annotatedPosition.Position; + if (nsiReader.ReportFor == ReportFor.SmallVariants && !needSmallAnnotation) continue; + if (nsiReader.ReportFor == ReportFor.StructuralVariants && !needSaIntervals) continue; + + var annotations = nsiReader.GetAnnotation(position.Variants[0]); + if (annotations == null) continue; + + annotatedPosition.SupplementaryIntervals.Add(new SupplementaryAnnotation(nsiReader.JsonKey, true, false, null, annotations)); + } + + } + + private void AddSmallVariantAnnotations(IAnnotatedPosition annotatedPosition) + { + foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) + { + if (!annotatedVariant.Variant.Behavior.NeedSaPosition) continue; + AddSmallAnnotations(annotatedVariant); + + //check for interval annotations that applies to all variants + if(_nsiReaders ==null) continue; + AddLargeAnnotationsToSmallVariants(annotatedVariant); + } + } + + private void AddLargeAnnotationsToSmallVariants(IAnnotatedVariant annotatedVariant) + { + foreach (INsiReader nsiReader in _nsiReaders) + { + if (nsiReader.ReportFor == ReportFor.StructuralVariants || + nsiReader.ReportFor == ReportFor.None) continue; + + var variant = annotatedVariant.Variant; + var annotations = nsiReader.GetAnnotation(variant); + if (annotations != null) AddPositionalAnnotation(annotations, annotatedVariant, nsiReader); + } + } + + private void AddSmallAnnotations(IAnnotatedVariant annotatedVariant) + { + foreach (INsaReader nsaReader in _nsaReaders) + { + var variant = annotatedVariant.Variant; + var annotations = nsaReader.GetAnnotation(variant.Chromosome, variant.Start); + if (annotations == null) continue; + + if (nsaReader.IsPositional) + { + AddPositionalAnnotation(annotations, annotatedVariant, nsaReader); + continue; + } + + if (nsaReader.MatchByAllele) AddAlleleSpecificAnnotation(nsaReader, annotations, annotatedVariant, variant); + + else AddNonAlleleSpecificAnnotations(annotations, variant, annotatedVariant, nsaReader); + } + } + + private void AddPositionalAnnotation(IEnumerable annotations, IAnnotatedVariant annotatedVariant, INsiReader nsiReader) + { + annotatedVariant.SaList.Add(new SupplementaryAnnotation(nsiReader.JsonKey, true, true, null, annotations)); + } + + private static void AddPositionalAnnotation(IEnumerable<(string refAllele, string altAllele, string annotation)> annotations, IAnnotatedVariant annotatedVariant, + INsaReader nsaReader) + { + //e.g. ancestral allele, global minor allele + var jsonString = annotations.First().annotation; + annotatedVariant.SaList.Add(new SupplementaryAnnotation(nsaReader.JsonKey, nsaReader.IsArray, + nsaReader.IsPositional, jsonString, null)); + } + + private static void AddNonAlleleSpecificAnnotations(IEnumerable<(string refAllele, string altAllele, string annotation)> annotations, IVariant variant, + IAnnotatedVariant annotatedVariant, INsaReader nsaReader) + { + var jsonStrings = new List(); + foreach ((string refAllele, string altAllele, string jsonString) in annotations) + { + if (refAllele == variant.RefAllele && altAllele == variant.AltAllele) + jsonStrings.Add(jsonString + ",\"isAlleleSpecific\":true"); + else jsonStrings.Add(jsonString); + } + + if (jsonStrings.Count > 0) + annotatedVariant.SaList.Add(new SupplementaryAnnotation(nsaReader.JsonKey, nsaReader.IsArray, + nsaReader.IsPositional, null, jsonStrings)); + } + + private static void AddAlleleSpecificAnnotation(INsaReader nsaReader, IEnumerable<(string refAllele, string altAllele, string annotation)> annotations, + IAnnotatedVariant annotatedVariant, IVariant variant) + { + if (nsaReader.IsArray) + { + var jsonStrings = new List(); + foreach ((string refAllele, string altAllele, string jsonString) in annotations) + { + if (refAllele == variant.RefAllele && altAllele == variant.AltAllele) + jsonStrings.Add(jsonString); + } + + if (jsonStrings.Count > 0) + annotatedVariant.SaList.Add(new SupplementaryAnnotation(nsaReader.JsonKey, nsaReader.IsArray, + nsaReader.IsPositional, null, jsonStrings)); + } + else + foreach ((string refAllele, string altAllele, string jsonString) in annotations) + { + if (refAllele != variant.RefAllele || altAllele != variant.AltAllele) continue; + annotatedVariant.SaList.Add(new SupplementaryAnnotation(nsaReader.JsonKey, nsaReader.IsArray, + nsaReader.IsPositional, jsonString, null)); + break; + } + } + + public void PreLoad(IChromosome chromosome, List positions) + { + var benchmark = new Benchmark(); + Console.Write("Pre-loading SA...."); + + foreach (INsaReader nsaReader in _nsaReaders) + { + nsaReader.PreLoad(chromosome, positions); + } + + var totalTime = benchmark.GetElapsedTime(); + //var rate = totalBytes * 1.0 / (totalTime.TotalSeconds * 1_000_000);// MB/sec + Console.WriteLine($"{Benchmark.ToHumanReadable(totalTime)}");//. Data rate {rate:#.##} MB/sec"); + //Console.WriteLine($"No of http stream sources created {HttpStreamSource.Count}"); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Providers/RefMinorProvider.cs b/VariantAnnotation/Providers/RefMinorProvider.cs index 3ab4c3e6..5610030d 100644 --- a/VariantAnnotation/Providers/RefMinorProvider.cs +++ b/VariantAnnotation/Providers/RefMinorProvider.cs @@ -1,20 +1,20 @@ -using System.IO; -using Genome; -using IO; -using VariantAnnotation.Interface.Providers; -using VariantAnnotation.NSA; - -namespace VariantAnnotation.Providers -{ - public sealed class RefMinorProvider : IRefMinorProvider - { - private readonly RefMinorDbReader _reader; - - public RefMinorProvider(Stream dbStream, Stream indexStream) - { - _reader = new RefMinorDbReader(new ExtendedBinaryReader(dbStream), new ExtendedBinaryReader(indexStream)); - } - - public string GetGlobalMajorAllele(IChromosome chromosome, int pos) => _reader.GetGlobalMajorAllele(chromosome, pos); - } +using System.IO; +using Genome; +using IO; +using VariantAnnotation.Interface.Providers; +using VariantAnnotation.NSA; + +namespace VariantAnnotation.Providers +{ + public sealed class RefMinorProvider : IRefMinorProvider + { + private readonly RefMinorDbReader _reader; + + public RefMinorProvider(Stream dbStream, Stream indexStream) + { + _reader = new RefMinorDbReader(new ExtendedBinaryReader(dbStream), new ExtendedBinaryReader(indexStream)); + } + + public string GetGlobalMajorAllele(IChromosome chromosome, int pos) => _reader.GetGlobalMajorAllele(chromosome, pos); + } } \ No newline at end of file diff --git a/VariantAnnotation/Providers/ReferenceSequenceProvider.cs b/VariantAnnotation/Providers/ReferenceSequenceProvider.cs index 19d99682..9d186445 100644 --- a/VariantAnnotation/Providers/ReferenceSequenceProvider.cs +++ b/VariantAnnotation/Providers/ReferenceSequenceProvider.cs @@ -1,55 +1,55 @@ -using System.Collections.Generic; -using System.IO; -using Genome; -using Intervals; -using VariantAnnotation.AnnotatedPositions; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.Providers; -using VariantAnnotation.Sequence; - -namespace VariantAnnotation.Providers -{ - public sealed class ReferenceSequenceProvider : ISequenceProvider - { - public IDictionary RefNameToChromosome => _sequenceReader.RefNameToChromosome; - public IDictionary RefIndexToChromosome => _sequenceReader.RefIndexToChromosome; - public GenomeAssembly Assembly => _sequenceReader.Assembly; - public ISequence Sequence { get; } - - public string Name { get; } = "Reference sequence provider"; - public IEnumerable DataSourceVersions { get; } = null; - - private IChromosome _currentChromosome; - private readonly CompressedSequenceReader _sequenceReader; - - public ReferenceSequenceProvider(Stream stream) - { - _currentChromosome = new EmptyChromosome(string.Empty); - _sequenceReader = new CompressedSequenceReader(stream); - Sequence = _sequenceReader.Sequence; - } - - public void Annotate(IAnnotatedPosition annotatedPosition) - { - if (annotatedPosition.AnnotatedVariants == null) return; - - annotatedPosition.CytogeneticBand = Sequence.CytogeneticBands.Find(annotatedPosition.Position.Chromosome, - annotatedPosition.Position.Start, annotatedPosition.Position.End); - - string refSeqAccession = annotatedPosition.Position.Chromosome.RefSeqAccession; - foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) - { - annotatedVariant.HgvsgNotation = HgvsgNotation.GetNotation(refSeqAccession, annotatedVariant.Variant, Sequence, new Interval(0, Sequence.Length)); - } - } - - public void PreLoad(IChromosome chromosome, List positions) => throw new System.NotImplementedException(); - - public void LoadChromosome(IChromosome chromosome) - { - if (chromosome.Index == _currentChromosome.Index) return; - _sequenceReader.GetCompressedSequence(chromosome); - _currentChromosome = chromosome; - } - } +using System.Collections.Generic; +using System.IO; +using Genome; +using Intervals; +using VariantAnnotation.AnnotatedPositions; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.Providers; +using VariantAnnotation.Sequence; + +namespace VariantAnnotation.Providers +{ + public sealed class ReferenceSequenceProvider : ISequenceProvider + { + public IDictionary RefNameToChromosome => _sequenceReader.RefNameToChromosome; + public IDictionary RefIndexToChromosome => _sequenceReader.RefIndexToChromosome; + public GenomeAssembly Assembly => _sequenceReader.Assembly; + public ISequence Sequence { get; } + + public string Name { get; } = "Reference sequence provider"; + public IEnumerable DataSourceVersions { get; } = null; + + private IChromosome _currentChromosome; + private readonly CompressedSequenceReader _sequenceReader; + + public ReferenceSequenceProvider(Stream stream) + { + _currentChromosome = new EmptyChromosome(string.Empty); + _sequenceReader = new CompressedSequenceReader(stream); + Sequence = _sequenceReader.Sequence; + } + + public void Annotate(IAnnotatedPosition annotatedPosition) + { + if (annotatedPosition.AnnotatedVariants == null) return; + + annotatedPosition.CytogeneticBand = Sequence.CytogeneticBands.Find(annotatedPosition.Position.Chromosome, + annotatedPosition.Position.Start, annotatedPosition.Position.End); + + string refSeqAccession = annotatedPosition.Position.Chromosome.RefSeqAccession; + foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) + { + annotatedVariant.HgvsgNotation = HgvsgNotation.GetNotation(refSeqAccession, annotatedVariant.Variant, Sequence, new Interval(0, Sequence.Length)); + } + } + + public void PreLoad(IChromosome chromosome, List positions) => throw new System.NotImplementedException(); + + public void LoadChromosome(IChromosome chromosome) + { + if (chromosome.Index == _currentChromosome.Index) return; + _sequenceReader.GetCompressedSequence(chromosome); + _currentChromosome = chromosome; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Providers/TranscriptAnnotationProvider.cs b/VariantAnnotation/Providers/TranscriptAnnotationProvider.cs index 30e0cf4f..4fff331c 100644 --- a/VariantAnnotation/Providers/TranscriptAnnotationProvider.cs +++ b/VariantAnnotation/Providers/TranscriptAnnotationProvider.cs @@ -1,199 +1,199 @@ -using System.Collections.Generic; -using System.IO; -using System.Linq; -using ErrorHandling.Exceptions; -using Genome; -using Intervals; -using IO; -using OptimizedCore; -using VariantAnnotation.AnnotatedPositions; -using VariantAnnotation.Caches; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.Caches; -using VariantAnnotation.Interface.Providers; -using VariantAnnotation.IO.Caches; -using VariantAnnotation.TranscriptAnnotation; -using Variants; - -namespace VariantAnnotation.Providers -{ - public sealed class TranscriptAnnotationProvider : ITranscriptAnnotationProvider - { - private readonly ITranscriptCache _transcriptCache; - private readonly ISequence _sequence; - - public string Name { get; } - public GenomeAssembly Assembly { get; } - public IEnumerable DataSourceVersions { get; } - public IntervalArray[] TranscriptIntervalArrays { get; } - public ushort VepVersion { get; } - - private readonly PredictionCacheReader _siftReader; - private readonly PredictionCacheReader _polyphenReader; - private IPredictionCache _siftCache; - private IPredictionCache _polyphenCache; - private ushort _currentRefIndex = ushort.MaxValue; - - public TranscriptAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider) - { - Name = "Transcript annotation provider"; - _sequence = sequenceProvider.Sequence; - - var transcriptStream = PersistentStreamUtils.GetReadStream(CacheConstants.TranscriptPath(pathPrefix)); - // SET-362 DEBUG: Remove the sequenceProvider argument in the future - (_transcriptCache, TranscriptIntervalArrays, VepVersion) = InitiateCache(transcriptStream, sequenceProvider.RefIndexToChromosome, sequenceProvider.Assembly, sequenceProvider); - - Assembly = _transcriptCache.Assembly; - DataSourceVersions = _transcriptCache.DataSourceVersions; - - - var siftStream = PersistentStreamUtils.GetReadStream(CacheConstants.SiftPath(pathPrefix)); - _siftReader = new PredictionCacheReader(siftStream, PredictionCacheReader.SiftDescriptions); - - var polyphenStream = PersistentStreamUtils.GetReadStream(CacheConstants.PolyPhenPath(pathPrefix)); - _polyphenReader = new PredictionCacheReader(polyphenStream, PredictionCacheReader.PolyphenDescriptions); - } - - private static (TranscriptCache Cache, IntervalArray[] TranscriptIntervalArrays, ushort VepVersion) InitiateCache(Stream stream, - IDictionary refIndexToChromosome, GenomeAssembly refAssembly, ISequenceProvider sequenceProvider) - { - TranscriptCache cache; - ushort vepVersion; - TranscriptCacheData cacheData; - - using (var reader = new TranscriptCacheReader(stream)) - { - vepVersion = reader.Header.Custom.VepVersion; - CheckHeaderVersion(reader.Header, refAssembly); - - cacheData = reader.Read(sequenceProvider, refIndexToChromosome); - cache = cacheData.GetCache(); - } - - return (cache, cacheData.TranscriptIntervalArrays, vepVersion); - } - - private static void CheckHeaderVersion(Header header, GenomeAssembly refAssembly) - { - if (header.Assembly != refAssembly) - throw new UserErrorException(GetAssemblyErrorMessage(header.Assembly, refAssembly)); - - if (header.SchemaVersion != CacheConstants.SchemaVersion) - throw new UserErrorException( - $"Expected the cache schema version ({CacheConstants.SchemaVersion}) to be identical to the schema version in the cache header ({header.SchemaVersion})"); - } - - private static string GetAssemblyErrorMessage(GenomeAssembly cacheAssembly, GenomeAssembly refAssembly) - { - var sb = StringBuilderCache.Acquire(); - sb.AppendLine("Not all of the data sources have the same genome assembly:"); - sb.AppendLine($"- Using {refAssembly}: Reference sequence provider"); - sb.AppendLine($"- Using {cacheAssembly}: Transcript annotation provider"); - return StringBuilderCache.GetStringAndRelease(sb); - } - - public void Annotate(IAnnotatedPosition annotatedPosition) - { - if (annotatedPosition.AnnotatedVariants == null || annotatedPosition.AnnotatedVariants.Length == 0) return; - - ushort refIndex = annotatedPosition.Position.Chromosome.Index; - LoadPredictionCaches(refIndex); - - AddRegulatoryRegions(annotatedPosition.AnnotatedVariants, _transcriptCache.RegulatoryIntervalForest); - AddTranscripts(annotatedPosition.AnnotatedVariants, _transcriptCache.TranscriptIntervalForest); - } - - private void AddTranscripts(IAnnotatedVariant[] annotatedVariants, IIntervalForest transcriptIntervalForest) - { - foreach (var annotatedVariant in annotatedVariants) - { - var variant = annotatedVariant.Variant; - - ITranscript[] geneFusionCandidates = GetGeneFusionCandidates(variant.BreakEnds, transcriptIntervalForest); - ITranscript[] transcripts = transcriptIntervalForest.GetAllFlankingValues(variant); - if (transcripts == null) continue; - - IList annotatedTranscripts = - TranscriptAnnotationFactory.GetAnnotatedTranscripts(variant, transcripts, _sequence, _siftCache, - _polyphenCache, geneFusionCandidates); - - if (annotatedTranscripts.Count == 0) continue; - - foreach (var annotatedTranscript in annotatedTranscripts) - annotatedVariant.Transcripts.Add(annotatedTranscript); - } - } - - public void PreLoad(IChromosome chromosome, List positions) => throw new System.NotImplementedException(); - - private void LoadPredictionCaches(ushort refIndex) - { - if (refIndex == _currentRefIndex) return; - - if (refIndex == ushort.MaxValue) - { - ClearCache(); - return; - } - - _siftCache = _siftReader.Read(refIndex); - _polyphenCache = _polyphenReader.Read(refIndex); - _currentRefIndex = refIndex; - } - - private void ClearCache() - { - _siftCache = null; - _polyphenCache = null; - _currentRefIndex = ushort.MaxValue; - } - - private static ITranscript[] GetGeneFusionCandidates(IBreakEnd[] breakEnds, IIntervalForest transcriptIntervalForest) - { - if (breakEnds == null || breakEnds.Length == 0) return null; - - var geneFusionCandidates = new HashSet(); - - foreach (var breakEnd in breakEnds) - { - ITranscript[] transcripts = transcriptIntervalForest.GetAllOverlappingValues( - breakEnd.Piece2.Chromosome.Index, breakEnd.Piece2.Position, breakEnd.Piece2.Position); - if (transcripts == null) continue; - - foreach (var transcript in transcripts) geneFusionCandidates.Add(transcript); - } - - return geneFusionCandidates.ToArray(); - } - - private static void AddRegulatoryRegions(IAnnotatedVariant[] annotatedVariants, IIntervalForest regulatoryIntervalForest) - { - foreach (var annotatedVariant in annotatedVariants) - { - // In case of insertions, the base(s) are assumed to be inserted at the end position - // if this is an insertion just before the beginning of the regulatory element, this takes care of it - var variant = annotatedVariant.Variant; - int variantBegin = variant.Type == VariantType.insertion ? variant.End : variant.Start; - - if (SkipLargeVariants(variantBegin, variant.End)) continue; - - IRegulatoryRegion[] regulatoryRegions = - regulatoryIntervalForest.GetAllOverlappingValues(variant.Chromosome.Index, variantBegin, - variant.End); - if (regulatoryRegions == null) continue; - - foreach (var regulatoryRegion in regulatoryRegions) - { - // if the insertion is at the end, its past the feature and therefore not overlapping - if (variant.Type == VariantType.insertion && variant.End == regulatoryRegion.End) continue; - - annotatedVariant.RegulatoryRegions.Add(RegulatoryRegionAnnotator.Annotate(variant, regulatoryRegion)); - } - } - } - - private const int MaxSvLengthForRegulatoryRegionAnnotation = 50000; - - private static bool SkipLargeVariants(int begin, int end) => end - begin + 1 > MaxSvLengthForRegulatoryRegionAnnotation; - } +using System.Collections.Generic; +using System.IO; +using System.Linq; +using ErrorHandling.Exceptions; +using Genome; +using Intervals; +using IO; +using OptimizedCore; +using VariantAnnotation.AnnotatedPositions; +using VariantAnnotation.Caches; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.Caches; +using VariantAnnotation.Interface.Providers; +using VariantAnnotation.IO.Caches; +using VariantAnnotation.TranscriptAnnotation; +using Variants; + +namespace VariantAnnotation.Providers +{ + public sealed class TranscriptAnnotationProvider : ITranscriptAnnotationProvider + { + private readonly ITranscriptCache _transcriptCache; + private readonly ISequence _sequence; + + public string Name { get; } + public GenomeAssembly Assembly { get; } + public IEnumerable DataSourceVersions { get; } + public IntervalArray[] TranscriptIntervalArrays { get; } + public ushort VepVersion { get; } + + private readonly PredictionCacheReader _siftReader; + private readonly PredictionCacheReader _polyphenReader; + private IPredictionCache _siftCache; + private IPredictionCache _polyphenCache; + private ushort _currentRefIndex = ushort.MaxValue; + + public TranscriptAnnotationProvider(string pathPrefix, ISequenceProvider sequenceProvider) + { + Name = "Transcript annotation provider"; + _sequence = sequenceProvider.Sequence; + + var transcriptStream = PersistentStreamUtils.GetReadStream(CacheConstants.TranscriptPath(pathPrefix)); + // SET-362 DEBUG: Remove the sequenceProvider argument in the future + (_transcriptCache, TranscriptIntervalArrays, VepVersion) = InitiateCache(transcriptStream, sequenceProvider.RefIndexToChromosome, sequenceProvider.Assembly, sequenceProvider); + + Assembly = _transcriptCache.Assembly; + DataSourceVersions = _transcriptCache.DataSourceVersions; + + + var siftStream = PersistentStreamUtils.GetReadStream(CacheConstants.SiftPath(pathPrefix)); + _siftReader = new PredictionCacheReader(siftStream, PredictionCacheReader.SiftDescriptions); + + var polyphenStream = PersistentStreamUtils.GetReadStream(CacheConstants.PolyPhenPath(pathPrefix)); + _polyphenReader = new PredictionCacheReader(polyphenStream, PredictionCacheReader.PolyphenDescriptions); + } + + private static (TranscriptCache Cache, IntervalArray[] TranscriptIntervalArrays, ushort VepVersion) InitiateCache(Stream stream, + IDictionary refIndexToChromosome, GenomeAssembly refAssembly, ISequenceProvider sequenceProvider) + { + TranscriptCache cache; + ushort vepVersion; + TranscriptCacheData cacheData; + + using (var reader = new TranscriptCacheReader(stream)) + { + vepVersion = reader.Header.Custom.VepVersion; + CheckHeaderVersion(reader.Header, refAssembly); + + cacheData = reader.Read(sequenceProvider, refIndexToChromosome); + cache = cacheData.GetCache(); + } + + return (cache, cacheData.TranscriptIntervalArrays, vepVersion); + } + + private static void CheckHeaderVersion(Header header, GenomeAssembly refAssembly) + { + if (header.Assembly != refAssembly) + throw new UserErrorException(GetAssemblyErrorMessage(header.Assembly, refAssembly)); + + if (header.SchemaVersion != CacheConstants.SchemaVersion) + throw new UserErrorException( + $"Expected the cache schema version ({CacheConstants.SchemaVersion}) to be identical to the schema version in the cache header ({header.SchemaVersion})"); + } + + private static string GetAssemblyErrorMessage(GenomeAssembly cacheAssembly, GenomeAssembly refAssembly) + { + var sb = StringBuilderCache.Acquire(); + sb.AppendLine("Not all of the data sources have the same genome assembly:"); + sb.AppendLine($"- Using {refAssembly}: Reference sequence provider"); + sb.AppendLine($"- Using {cacheAssembly}: Transcript annotation provider"); + return StringBuilderCache.GetStringAndRelease(sb); + } + + public void Annotate(IAnnotatedPosition annotatedPosition) + { + if (annotatedPosition.AnnotatedVariants == null || annotatedPosition.AnnotatedVariants.Length == 0) return; + + ushort refIndex = annotatedPosition.Position.Chromosome.Index; + LoadPredictionCaches(refIndex); + + AddRegulatoryRegions(annotatedPosition.AnnotatedVariants, _transcriptCache.RegulatoryIntervalForest); + AddTranscripts(annotatedPosition.AnnotatedVariants, _transcriptCache.TranscriptIntervalForest); + } + + private void AddTranscripts(IAnnotatedVariant[] annotatedVariants, IIntervalForest transcriptIntervalForest) + { + foreach (var annotatedVariant in annotatedVariants) + { + var variant = annotatedVariant.Variant; + + ITranscript[] geneFusionCandidates = GetGeneFusionCandidates(variant.BreakEnds, transcriptIntervalForest); + ITranscript[] transcripts = transcriptIntervalForest.GetAllFlankingValues(variant); + if (transcripts == null) continue; + + IList annotatedTranscripts = + TranscriptAnnotationFactory.GetAnnotatedTranscripts(variant, transcripts, _sequence, _siftCache, + _polyphenCache, geneFusionCandidates); + + if (annotatedTranscripts.Count == 0) continue; + + foreach (var annotatedTranscript in annotatedTranscripts) + annotatedVariant.Transcripts.Add(annotatedTranscript); + } + } + + public void PreLoad(IChromosome chromosome, List positions) => throw new System.NotImplementedException(); + + private void LoadPredictionCaches(ushort refIndex) + { + if (refIndex == _currentRefIndex) return; + + if (refIndex == ushort.MaxValue) + { + ClearCache(); + return; + } + + _siftCache = _siftReader.Read(refIndex); + _polyphenCache = _polyphenReader.Read(refIndex); + _currentRefIndex = refIndex; + } + + private void ClearCache() + { + _siftCache = null; + _polyphenCache = null; + _currentRefIndex = ushort.MaxValue; + } + + private static ITranscript[] GetGeneFusionCandidates(IBreakEnd[] breakEnds, IIntervalForest transcriptIntervalForest) + { + if (breakEnds == null || breakEnds.Length == 0) return null; + + var geneFusionCandidates = new HashSet(); + + foreach (var breakEnd in breakEnds) + { + ITranscript[] transcripts = transcriptIntervalForest.GetAllOverlappingValues( + breakEnd.Piece2.Chromosome.Index, breakEnd.Piece2.Position, breakEnd.Piece2.Position); + if (transcripts == null) continue; + + foreach (var transcript in transcripts) geneFusionCandidates.Add(transcript); + } + + return geneFusionCandidates.ToArray(); + } + + private static void AddRegulatoryRegions(IAnnotatedVariant[] annotatedVariants, IIntervalForest regulatoryIntervalForest) + { + foreach (var annotatedVariant in annotatedVariants) + { + // In case of insertions, the base(s) are assumed to be inserted at the end position + // if this is an insertion just before the beginning of the regulatory element, this takes care of it + var variant = annotatedVariant.Variant; + int variantBegin = variant.Type == VariantType.insertion ? variant.End : variant.Start; + + if (SkipLargeVariants(variantBegin, variant.End)) continue; + + IRegulatoryRegion[] regulatoryRegions = + regulatoryIntervalForest.GetAllOverlappingValues(variant.Chromosome.Index, variantBegin, + variant.End); + if (regulatoryRegions == null) continue; + + foreach (var regulatoryRegion in regulatoryRegions) + { + // if the insertion is at the end, its past the feature and therefore not overlapping + if (variant.Type == VariantType.insertion && variant.End == regulatoryRegion.End) continue; + + annotatedVariant.RegulatoryRegions.Add(RegulatoryRegionAnnotator.Annotate(variant, regulatoryRegion)); + } + } + } + + private const int MaxSvLengthForRegulatoryRegionAnnotation = 50000; + + private static bool SkipLargeVariants(int begin, int end) => end - begin + 1 > MaxSvLengthForRegulatoryRegionAnnotation; + } } \ No newline at end of file diff --git a/VariantAnnotation/Providers/VersionProvider.cs b/VariantAnnotation/Providers/VersionProvider.cs index fea3b321..fe1ad7cd 100644 --- a/VariantAnnotation/Providers/VersionProvider.cs +++ b/VariantAnnotation/Providers/VersionProvider.cs @@ -1,12 +1,12 @@ -using VariantAnnotation.Interface.Providers; -using VariantAnnotation.IO.Caches; -using VariantAnnotation.SA; -using VariantAnnotation.Sequence; - -namespace VariantAnnotation.Providers -{ - public sealed class VersionProvider : IVersionProvider - { - public string DataVersion { get; } = $"Cache version: {CacheConstants.DataVersion}, Supplementary annotation version: {SaCommon.DataVersion}, Reference version: {ReferenceSequenceCommon.HeaderVersion}"; - } +using VariantAnnotation.Interface.Providers; +using VariantAnnotation.IO.Caches; +using VariantAnnotation.SA; +using VariantAnnotation.Sequence; + +namespace VariantAnnotation.Providers +{ + public sealed class VersionProvider : IVersionProvider + { + public string DataVersion { get; } = $"Cache version: {CacheConstants.DataVersion}, Supplementary annotation version: {SaCommon.DataVersion}, Reference version: {ReferenceSequenceCommon.HeaderVersion}"; + } } \ No newline at end of file diff --git a/VariantAnnotation/SA/SaCommon.cs b/VariantAnnotation/SA/SaCommon.cs index 2df5aa6d..8a283367 100644 --- a/VariantAnnotation/SA/SaCommon.cs +++ b/VariantAnnotation/SA/SaCommon.cs @@ -1,38 +1,38 @@ -namespace VariantAnnotation.SA -{ - public static class SaCommon - { - public const int DefaultBlockSize = 8 * 1024 * 1024; - public const ushort DataVersion = 50; - public const ushort SchemaVersion = 22; - - public const double RefMinorThreshold = 0.95; - - public const string IndexSufix = ".idx"; - public const string SaFileSuffix = ".nsa"; - public const string PhylopFileSuffix = ".npd"; - public const string RefMinorFileSuffix = ".rma"; - public const string SiFileSuffix = ".nsi"; - public const string NgaFileSuffix = ".nga"; - public const string JsonSchemaSuffix = ".schema"; - - public const string DbsnpTag = "dbsnp"; - public const string GlobalAlleleTag = "globalAllele"; - public const string OneKgenTag = "oneKg"; - public const string AncestralAlleleTag = "ancestralAllele"; - public const string RefMinorTag = "refMinor"; - public const string GnomadTag = "gnomad"; - public const string GnomadExomeTag = "gnomadExome"; - public const string ClinvarTag = "clinvar"; - public const string CosmicTag = "cosmic"; - public const string CosmicCnvTag = "cosmicCnv"; - public const string OnekSvTag = "oneKg"; - public const string DgvTag = "dgv"; - public const string ClinGenTag = "clingen"; - public const string MitoMapTag = "mitomap"; - public const string TopMedTag = "topmed"; - public const string PhylopTag = "phylopScore"; - public const string OmimTag = "omim"; - public const string ExacScoreTag = "exac"; - } +namespace VariantAnnotation.SA +{ + public static class SaCommon + { + public const int DefaultBlockSize = 8 * 1024 * 1024; + public const ushort DataVersion = 50; + public const ushort SchemaVersion = 22; + + public const double RefMinorThreshold = 0.95; + + public const string IndexSufix = ".idx"; + public const string SaFileSuffix = ".nsa"; + public const string PhylopFileSuffix = ".npd"; + public const string RefMinorFileSuffix = ".rma"; + public const string SiFileSuffix = ".nsi"; + public const string NgaFileSuffix = ".nga"; + public const string JsonSchemaSuffix = ".schema"; + + public const string DbsnpTag = "dbsnp"; + public const string GlobalAlleleTag = "globalAllele"; + public const string OneKgenTag = "oneKg"; + public const string AncestralAlleleTag = "ancestralAllele"; + public const string RefMinorTag = "refMinor"; + public const string GnomadTag = "gnomad"; + public const string GnomadExomeTag = "gnomadExome"; + public const string ClinvarTag = "clinvar"; + public const string CosmicTag = "cosmic"; + public const string CosmicCnvTag = "cosmicCnv"; + public const string OnekSvTag = "oneKg"; + public const string DgvTag = "dgv"; + public const string ClinGenTag = "clingen"; + public const string MitoMapTag = "mitomap"; + public const string TopMedTag = "topmed"; + public const string PhylopTag = "phylopScore"; + public const string OmimTag = "omim"; + public const string ExacScoreTag = "exac"; + } } \ No newline at end of file diff --git a/VariantAnnotation/Sequence/ChromosomeExtensions.cs b/VariantAnnotation/Sequence/ChromosomeExtensions.cs index e043d68d..44d99fd0 100644 --- a/VariantAnnotation/Sequence/ChromosomeExtensions.cs +++ b/VariantAnnotation/Sequence/ChromosomeExtensions.cs @@ -1,30 +1,30 @@ -using Genome; -using IO; - -namespace VariantAnnotation.Sequence -{ - public static class ChromosomeExtensions - { - public static void Write(this IChromosome chromosome, ExtendedBinaryWriter writer) - { - writer.WriteOptAscii(chromosome.UcscName); - writer.WriteOptAscii(chromosome.EnsemblName); - writer.WriteOptAscii(chromosome.RefSeqAccession); - writer.WriteOptAscii(chromosome.GenBankAccession); - writer.WriteOpt(chromosome.Length); - writer.WriteOpt(chromosome.Index); - } - - public static IChromosome Read(ExtendedBinaryReader reader) - { - string ucscName = reader.ReadAsciiString(); - string ensemblName = reader.ReadAsciiString(); - string refseqAccession = reader.ReadAsciiString(); - string genBankAccession = reader.ReadAsciiString(); - int length = reader.ReadOptInt32(); - ushort refIndex = reader.ReadOptUInt16(); - - return new Chromosome(ucscName, ensemblName, refseqAccession, genBankAccession, length, refIndex); - } - } +using Genome; +using IO; + +namespace VariantAnnotation.Sequence +{ + public static class ChromosomeExtensions + { + public static void Write(this IChromosome chromosome, ExtendedBinaryWriter writer) + { + writer.WriteOptAscii(chromosome.UcscName); + writer.WriteOptAscii(chromosome.EnsemblName); + writer.WriteOptAscii(chromosome.RefSeqAccession); + writer.WriteOptAscii(chromosome.GenBankAccession); + writer.WriteOpt(chromosome.Length); + writer.WriteOpt(chromosome.Index); + } + + public static IChromosome Read(ExtendedBinaryReader reader) + { + string ucscName = reader.ReadAsciiString(); + string ensemblName = reader.ReadAsciiString(); + string refseqAccession = reader.ReadAsciiString(); + string genBankAccession = reader.ReadAsciiString(); + int length = reader.ReadOptInt32(); + ushort refIndex = reader.ReadOptUInt16(); + + return new Chromosome(ucscName, ensemblName, refseqAccession, genBankAccession, length, refIndex); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Sequence/CompressedSequence.cs b/VariantAnnotation/Sequence/CompressedSequence.cs index 396f5147..45e99d45 100644 --- a/VariantAnnotation/Sequence/CompressedSequence.cs +++ b/VariantAnnotation/Sequence/CompressedSequence.cs @@ -1,117 +1,120 @@ -using Genome; -using Intervals; - -namespace VariantAnnotation.Sequence -{ - public sealed class CompressedSequence : ISequence - { - public int Length { get; private set; } - public GenomeAssembly Assembly { get; set; } - - private int _sequenceOffset; - private byte[] _buffer; - private char[] _decompressBuffer; - - private IIntervalSearch _maskedIntervalSearch; - public Band[] CytogeneticBands { get; private set; } - - private readonly char[] _convertNumberToBase; - - public CompressedSequence() - { - const string bases = "GCTA"; - _convertNumberToBase = bases.ToCharArray(); - _decompressBuffer = new char[1024]; - } - - private static (int BaseIndex, int Shift) GetBaseIndexAndShift(int referencePosition) - { - int refPos = referencePosition + 1; - var baseIndex = (int)(refPos / 4.0); - int shift = (3 - refPos % 4) * 2; - return (baseIndex, shift); - } - - internal static int GetNumBufferBytes(int numBases) => - (int)((double)numBases / ReferenceSequenceCommon.NumBasesPerByte + 1); - - public void Set(int length, int sequenceOffset, byte[] twoBitBuffer, - IntervalArray maskedEntryIntervalArray, Band[] cytogeneticBands) - { - Length = length; - _buffer = twoBitBuffer; - _maskedIntervalSearch = maskedEntryIntervalArray; - _sequenceOffset = sequenceOffset; - CytogeneticBands = cytogeneticBands; - } - - public string Substring(int offset, int length) - { - offset -= _sequenceOffset; - - // handle negative offsets and lengths - if (offset < 0 || length < 1 || offset >= Length) return null; - - // sanity check: avoid going past the end of the sequence - if (offset + length > Length) length = Length - offset; - - // allocate more memory if needed - if (length > _decompressBuffer.Length) _decompressBuffer = new char[length]; - - // set the initial state of the buffer - (int bufferIndex, int bufferShift) = GetBaseIndexAndShift(offset - 1); - byte currentBufferSeed = _buffer[bufferIndex]; - - // get the overlapping masked interval - var maskedIntervals = _maskedIntervalSearch.GetAllOverlappingValues(offset, offset + length - 1); - - // get the first masked interval - var currentOffset = 0; - bool hasMaskedIntervals = maskedIntervals != null; - int numIntervals = maskedIntervals?.Length ?? 0; - var currentInterval = hasMaskedIntervals ? maskedIntervals[0] : null; - - for (var baseIndex = 0; baseIndex < length; baseIndex++) - { - int currentPosition = offset + baseIndex; - - if (hasMaskedIntervals && currentPosition >= currentInterval.Begin && currentPosition <= currentInterval.End) - { - int numMaskedBases = MaskBases(offset, length, baseIndex, currentInterval); - baseIndex += numMaskedBases - 1; - - (bufferIndex, bufferShift) = GetBaseIndexAndShift(offset + baseIndex); - currentBufferSeed = _buffer[bufferIndex]; - - currentOffset++; - hasMaskedIntervals = currentOffset < numIntervals; - currentInterval = hasMaskedIntervals ? maskedIntervals[currentOffset] : null; - - continue; - } - - // evaluate normal bases - _decompressBuffer[baseIndex] = _convertNumberToBase[(currentBufferSeed >> bufferShift) & 3]; - - bufferShift -= 2; - - if (bufferShift < 0) - { - bufferShift = CompressedSequenceReader.MaxShift; - bufferIndex++; - currentBufferSeed = _buffer[bufferIndex]; - } - } - - return new string(_decompressBuffer, 0, length); - } - - private int MaskBases(int offset, int length, int baseIndex, MaskedEntry currentInterval) - { - var numBasesMasked = 0; - for (; baseIndex <= currentInterval.End - offset && baseIndex < length; baseIndex++, numBasesMasked++) - _decompressBuffer[baseIndex] = 'N'; - return numBasesMasked; - } - } +using System; +using Genome; +using Intervals; + +namespace VariantAnnotation.Sequence +{ + public sealed class CompressedSequence : ISequence + { + public int Length { get; private set; } + public GenomeAssembly Assembly { get; set; } + + private int _sequenceOffset; + private byte[] _buffer; + private char[] _decompressBuffer; + + private IIntervalSearch _maskedIntervalSearch; + public Band[] CytogeneticBands { get; private set; } + + private readonly char[] _convertNumberToBase; + + public CompressedSequence() + { + const string bases = "GCTA"; + _convertNumberToBase = bases.ToCharArray(); + _decompressBuffer = new char[1024]; + } + + public string Sequence => throw new NotImplementedException(); + + private static (int BaseIndex, int Shift) GetBaseIndexAndShift(int referencePosition) + { + int refPos = referencePosition + 1; + var baseIndex = (int)(refPos / 4.0); + int shift = (3 - refPos % 4) * 2; + return (baseIndex, shift); + } + + internal static int GetNumBufferBytes(int numBases) => + (int)((double)numBases / ReferenceSequenceCommon.NumBasesPerByte + 1); + + public void Set(int length, int sequenceOffset, byte[] twoBitBuffer, + IntervalArray maskedEntryIntervalArray, Band[] cytogeneticBands) + { + Length = length; + _buffer = twoBitBuffer; + _maskedIntervalSearch = maskedEntryIntervalArray; + _sequenceOffset = sequenceOffset; + CytogeneticBands = cytogeneticBands; + } + + public string Substring(int offset, int length) + { + offset -= _sequenceOffset; + + // handle negative offsets and lengths + if (offset < 0 || length < 1 || offset >= Length) return null; + + // sanity check: avoid going past the end of the sequence + if (offset + length > Length) length = Length - offset; + + // allocate more memory if needed + if (length > _decompressBuffer.Length) _decompressBuffer = new char[length]; + + // set the initial state of the buffer + (int bufferIndex, int bufferShift) = GetBaseIndexAndShift(offset - 1); + byte currentBufferSeed = _buffer[bufferIndex]; + + // get the overlapping masked interval + var maskedIntervals = _maskedIntervalSearch.GetAllOverlappingValues(offset, offset + length - 1); + + // get the first masked interval + var currentOffset = 0; + bool hasMaskedIntervals = maskedIntervals != null; + int numIntervals = maskedIntervals?.Length ?? 0; + var currentInterval = hasMaskedIntervals ? maskedIntervals[0] : null; + + for (var baseIndex = 0; baseIndex < length; baseIndex++) + { + int currentPosition = offset + baseIndex; + + if (hasMaskedIntervals && currentPosition >= currentInterval.Begin && currentPosition <= currentInterval.End) + { + int numMaskedBases = MaskBases(offset, length, baseIndex, currentInterval); + baseIndex += numMaskedBases - 1; + + (bufferIndex, bufferShift) = GetBaseIndexAndShift(offset + baseIndex); + currentBufferSeed = _buffer[bufferIndex]; + + currentOffset++; + hasMaskedIntervals = currentOffset < numIntervals; + currentInterval = hasMaskedIntervals ? maskedIntervals[currentOffset] : null; + + continue; + } + + // evaluate normal bases + _decompressBuffer[baseIndex] = _convertNumberToBase[(currentBufferSeed >> bufferShift) & 3]; + + bufferShift -= 2; + + if (bufferShift < 0) + { + bufferShift = CompressedSequenceReader.MaxShift; + bufferIndex++; + currentBufferSeed = _buffer[bufferIndex]; + } + } + + return new string(_decompressBuffer, 0, length); + } + + private int MaskBases(int offset, int length, int baseIndex, MaskedEntry currentInterval) + { + var numBasesMasked = 0; + for (; baseIndex <= currentInterval.End - offset && baseIndex < length; baseIndex++, numBasesMasked++) + _decompressBuffer[baseIndex] = 'N'; + return numBasesMasked; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Sequence/CompressionBlock.cs b/VariantAnnotation/Sequence/CompressionBlock.cs index 4904e93c..0b308845 100644 --- a/VariantAnnotation/Sequence/CompressionBlock.cs +++ b/VariantAnnotation/Sequence/CompressionBlock.cs @@ -1,97 +1,97 @@ -using System.IO; -using Compression.Algorithms; -using ErrorHandling.Exceptions; - -namespace VariantAnnotation.Sequence -{ - public sealed class CompressionBlock - { - private byte[] _uncompressedBuffer; - private byte[] _compressedBuffer; - - private readonly int _compressedBufferSize; - - public byte[] Buffer { get; private set; } - public int BufferSize { get; private set; } - - public readonly ushort RefIndex; - public readonly int SequenceOffset; - - public int UncompressedBufferSize { get; } - public int CompressedBufferSize { get; private set; } - - private static readonly Zstandard Zstd = new Zstandard(21); - - public CompressionBlock(ushort refIndex, byte[] uncompressedBuffer, int numBytes, int sequenceOffset) - { - RefIndex = refIndex; - _uncompressedBuffer = uncompressedBuffer; - UncompressedBufferSize = numBytes; - SequenceOffset = sequenceOffset; - _compressedBufferSize = Zstd.GetCompressedBufferBounds(numBytes); - _compressedBuffer = new byte[_compressedBufferSize]; - } - - public void Compress() - { - CompressedBufferSize = Zstd.Compress(_uncompressedBuffer, UncompressedBufferSize, _compressedBuffer, - _compressedBufferSize); - - if (CompressedBufferSize > UncompressedBufferSize) - { - _compressedBuffer = null; - CompressedBufferSize = -1; - - Buffer = _uncompressedBuffer; - BufferSize = UncompressedBufferSize; - } - else - { - _uncompressedBuffer = null; - - Buffer = _compressedBuffer; - BufferSize = CompressedBufferSize; - } - } - - public static byte[] Read(Stream stream, int uncompressedBufferSize, int compressedBufferSize) - { - return compressedBufferSize == -1 - ? ReadUncompressedBlock(stream, uncompressedBufferSize) - : ReadCompressedBlock(stream, uncompressedBufferSize, compressedBufferSize); - } - - private static byte[] ReadCompressedBlock(Stream stream, int uncompressedBufferSize, int compressedBufferSize) - { - var compressedBuffer = new byte[compressedBufferSize]; - var buffer = new byte[uncompressedBufferSize]; - - int numBytesRead = stream.Read(compressedBuffer, 0, compressedBufferSize); - if (numBytesRead != compressedBufferSize) - { - throw new IOException($"Expected {compressedBufferSize} bytes from the block, but received only {numBytesRead} bytes."); - } - - int numUncompressedBytes = Zstd.Decompress(compressedBuffer, compressedBufferSize, buffer, uncompressedBufferSize); - if (numUncompressedBytes != uncompressedBufferSize) - { - throw new CompressionException($"Expected {uncompressedBufferSize} bytes after decompression, but found only {numUncompressedBytes} bytes."); - } - - return buffer; - } - - private static byte[] ReadUncompressedBlock(Stream stream, int uncompressedBufferSize) - { - var buffer = new byte[uncompressedBufferSize]; - int numBytesRead = stream.Read(buffer, 0, uncompressedBufferSize); - - if (numBytesRead != uncompressedBufferSize) - { - throw new IOException($"Expected {uncompressedBufferSize} bytes from the uncompressed block, but received only {numBytesRead} bytes."); - } - - return buffer; - } - } -} +using System.IO; +using Compression.Algorithms; +using ErrorHandling.Exceptions; + +namespace VariantAnnotation.Sequence +{ + public sealed class CompressionBlock + { + private byte[] _uncompressedBuffer; + private byte[] _compressedBuffer; + + private readonly int _compressedBufferSize; + + public byte[] Buffer { get; private set; } + public int BufferSize { get; private set; } + + public readonly ushort RefIndex; + public readonly int SequenceOffset; + + public int UncompressedBufferSize { get; } + public int CompressedBufferSize { get; private set; } + + private static readonly Zstandard Zstd = new Zstandard(21); + + public CompressionBlock(ushort refIndex, byte[] uncompressedBuffer, int numBytes, int sequenceOffset) + { + RefIndex = refIndex; + _uncompressedBuffer = uncompressedBuffer; + UncompressedBufferSize = numBytes; + SequenceOffset = sequenceOffset; + _compressedBufferSize = Zstd.GetCompressedBufferBounds(numBytes); + _compressedBuffer = new byte[_compressedBufferSize]; + } + + public void Compress() + { + CompressedBufferSize = Zstd.Compress(_uncompressedBuffer, UncompressedBufferSize, _compressedBuffer, + _compressedBufferSize); + + if (CompressedBufferSize > UncompressedBufferSize) + { + _compressedBuffer = null; + CompressedBufferSize = -1; + + Buffer = _uncompressedBuffer; + BufferSize = UncompressedBufferSize; + } + else + { + _uncompressedBuffer = null; + + Buffer = _compressedBuffer; + BufferSize = CompressedBufferSize; + } + } + + public static byte[] Read(Stream stream, int uncompressedBufferSize, int compressedBufferSize) + { + return compressedBufferSize == -1 + ? ReadUncompressedBlock(stream, uncompressedBufferSize) + : ReadCompressedBlock(stream, uncompressedBufferSize, compressedBufferSize); + } + + private static byte[] ReadCompressedBlock(Stream stream, int uncompressedBufferSize, int compressedBufferSize) + { + var compressedBuffer = new byte[compressedBufferSize]; + var buffer = new byte[uncompressedBufferSize]; + + int numBytesRead = stream.Read(compressedBuffer, 0, compressedBufferSize); + if (numBytesRead != compressedBufferSize) + { + throw new IOException($"Expected {compressedBufferSize} bytes from the block, but received only {numBytesRead} bytes."); + } + + int numUncompressedBytes = Zstd.Decompress(compressedBuffer, compressedBufferSize, buffer, uncompressedBufferSize); + if (numUncompressedBytes != uncompressedBufferSize) + { + throw new CompressionException($"Expected {uncompressedBufferSize} bytes after decompression, but found only {numUncompressedBytes} bytes."); + } + + return buffer; + } + + private static byte[] ReadUncompressedBlock(Stream stream, int uncompressedBufferSize) + { + var buffer = new byte[uncompressedBufferSize]; + int numBytesRead = stream.Read(buffer, 0, uncompressedBufferSize); + + if (numBytesRead != uncompressedBufferSize) + { + throw new IOException($"Expected {uncompressedBufferSize} bytes from the uncompressed block, but received only {numBytesRead} bytes."); + } + + return buffer; + } + } +} diff --git a/VariantAnnotation/Sequence/IndexEntry.cs b/VariantAnnotation/Sequence/IndexEntry.cs index 1536ddc8..b4c1e863 100644 --- a/VariantAnnotation/Sequence/IndexEntry.cs +++ b/VariantAnnotation/Sequence/IndexEntry.cs @@ -1,16 +1,16 @@ -namespace VariantAnnotation.Sequence -{ - public sealed class IndexEntry - { - public readonly ushort RefIndex; - public readonly long FileOffset; - - public const int Size = 10; - - public IndexEntry(ushort refIndex, long fileOffset) - { - RefIndex = refIndex; - FileOffset = fileOffset; - } - } +namespace VariantAnnotation.Sequence +{ + public sealed class IndexEntry + { + public readonly ushort RefIndex; + public readonly long FileOffset; + + public const int Size = 10; + + public IndexEntry(ushort refIndex, long fileOffset) + { + RefIndex = refIndex; + FileOffset = fileOffset; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Sequence/MaskedEntry.cs b/VariantAnnotation/Sequence/MaskedEntry.cs index 3dd441fe..7d7d4732 100644 --- a/VariantAnnotation/Sequence/MaskedEntry.cs +++ b/VariantAnnotation/Sequence/MaskedEntry.cs @@ -1,14 +1,14 @@ -namespace VariantAnnotation.Sequence -{ - public sealed class MaskedEntry - { - public readonly int Begin; - public readonly int End; - - public MaskedEntry(int begin, int end) - { - Begin = begin; - End = end; - } - } +namespace VariantAnnotation.Sequence +{ + public sealed class MaskedEntry + { + public readonly int Begin; + public readonly int End; + + public MaskedEntry(int begin, int end) + { + Begin = begin; + End = end; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Sequence/ReferenceSequenceCommon.cs b/VariantAnnotation/Sequence/ReferenceSequenceCommon.cs index a62868e7..61146539 100644 --- a/VariantAnnotation/Sequence/ReferenceSequenceCommon.cs +++ b/VariantAnnotation/Sequence/ReferenceSequenceCommon.cs @@ -1,13 +1,13 @@ -namespace VariantAnnotation.Sequence -{ - public static class ReferenceSequenceCommon - { - public const string HeaderTag = "NirvanaReference"; - public const int HeaderVersion = 6; - - public const uint IndexStartTag = 0x4CF76E2F; - public const uint ReferenceStartTag = 0xA7D8212A; - - public const int NumBasesPerByte = 4; - } +namespace VariantAnnotation.Sequence +{ + public static class ReferenceSequenceCommon + { + public const string HeaderTag = "NirvanaReference"; + public const int HeaderVersion = 6; + + public const uint IndexStartTag = 0x4CF76E2F; + public const uint ReferenceStartTag = 0xA7D8212A; + + public const int NumBasesPerByte = 4; + } } \ No newline at end of file diff --git a/VariantAnnotation/Sequence/ReferenceSequenceReader.cs b/VariantAnnotation/Sequence/ReferenceSequenceReader.cs index 03f6936b..34d8decd 100644 --- a/VariantAnnotation/Sequence/ReferenceSequenceReader.cs +++ b/VariantAnnotation/Sequence/ReferenceSequenceReader.cs @@ -1,186 +1,186 @@ -using System; -using System.Collections.Generic; -using System.IO; -using ErrorHandling.Exceptions; -using Genome; -using Intervals; -using IO; - -namespace VariantAnnotation.Sequence -{ - public sealed class CompressedSequenceReader : IDisposable - { - public readonly Dictionary RefNameToChromosome = new Dictionary(); - public readonly Dictionary RefIndexToChromosome = new Dictionary(); - private readonly Dictionary _refIndexToIndex = new Dictionary(); - - private readonly IndexEntry[] _indexEntries; - public readonly CompressedSequence Sequence = new CompressedSequence(); - - public ushort NumRefSeqs { get; private set; } - - public const int MaxShift = 6; - - private readonly ExtendedBinaryReader _reader; - private readonly Stream _stream; - - public GenomeAssembly Assembly => Sequence.Assembly; - public byte PatchLevel; // we'll use this in future version providers - - public CompressedSequenceReader(Stream stream) - { - _stream = stream; - _reader = new ExtendedBinaryReader(stream); - - CheckHeaderVersion(); - LoadHeader(); - GetChromosomes(); - _indexEntries = LoadIndex(); - } - - public void Dispose() - { - _reader?.Dispose(); - _stream?.Dispose(); - } - - private void CheckHeaderVersion() - { - string headerTag = _reader.ReadString(); - int headerVersion = _reader.ReadInt32(); - - if (headerTag != ReferenceSequenceCommon.HeaderTag || headerVersion != ReferenceSequenceCommon.HeaderVersion) - { - throw new InvalidFileFormatException($"The header identifiers do not match the expected values: Obs: {headerTag} {headerVersion} vs Exp: {ReferenceSequenceCommon.HeaderTag} {ReferenceSequenceCommon.HeaderVersion}"); - } - } - - public void GetCompressedSequence(IChromosome chromosome) - { - if (chromosome.IsEmpty() || !_refIndexToIndex.TryGetValue(chromosome.Index, out int index)) return; - - var indexEntry = _indexEntries[index]; - _stream.Position = indexEntry.FileOffset; - - uint tag = _reader.ReadUInt32(); - - if (tag != ReferenceSequenceCommon.ReferenceStartTag) - { - throw new InvalidDataException($"The reference start tag does not match the expected values: Obs: {tag} vs Exp: {ReferenceSequenceCommon.ReferenceStartTag}"); - } - - int uncompressedBufferSize = _reader.ReadInt32(); - int compressedBufferSize = _reader.ReadInt32(); - - var buffer = CompressionBlock.Read(_stream, uncompressedBufferSize, compressedBufferSize); - var reader = new MemoryBufferBinaryReader(buffer); - - var metadata = GetMetadata(reader); - var twoBitBuffer = GetTwoBitBuffer(reader); - var maskedEntryIntervalArray = GetMaskedEntries(reader); - var cytogeneticBands = GetCytogeneticBands(reader); - - Sequence.Set(metadata.NumBases, metadata.SequenceOffset, twoBitBuffer, maskedEntryIntervalArray, - cytogeneticBands); - } - - private (int SequenceOffset, int NumBases) GetMetadata(MemoryBufferBinaryReader reader) - { - int sequenceOffset = reader.ReadOptInt32(); - int numBases = reader.ReadOptInt32(); - return (sequenceOffset, numBases); - } - - private static Band[] GetCytogeneticBands(MemoryBufferBinaryReader reader) - { - int numBands = reader.ReadOptInt32(); - var bands = new Band[numBands]; - - for (var i = 0; i < numBands; i++) - { - int begin = reader.ReadOptInt32(); - int end = reader.ReadOptInt32(); - string name = reader.ReadAsciiString(); - - bands[i] = new Band(begin, end, name); - } - - return bands; - } - - private static IntervalArray GetMaskedEntries(MemoryBufferBinaryReader reader) - { - int numEntries = reader.ReadOptInt32(); - var maskedEntries = new Interval[numEntries]; - - for (var i = 0; i < numEntries; i++) - { - int begin = reader.ReadOptInt32(); - int end = reader.ReadOptInt32(); - - maskedEntries[i] = new Interval(begin, end, new MaskedEntry(begin, end)); - } - - return new IntervalArray(maskedEntries); - } - - private static byte[] GetTwoBitBuffer(MemoryBufferBinaryReader reader) - { - int numBytes = reader.ReadOptInt32(); - return reader.ReadBytes(numBytes); - } - - private void LoadHeader() - { - Sequence.Assembly = (GenomeAssembly)_reader.ReadByte(); - PatchLevel = _reader.ReadByte(); - NumRefSeqs = (ushort)_reader.ReadOptInt32(); - } - - private IChromosome[] GetChromosomes() - { - var chromosomes = new IChromosome[NumRefSeqs]; - for (var i = 0; i < NumRefSeqs; i++) - { - var chromosome = ChromosomeExtensions.Read(_reader); - AddReferenceName(chromosome); - chromosomes[i] = chromosome; - } - return chromosomes; - } - - private IndexEntry[] LoadIndex() - { - uint tag = _reader.ReadUInt32(); - - if (tag != ReferenceSequenceCommon.IndexStartTag) - { - throw new InvalidDataException($"The index start tag does not match the expected values: Obs: {tag} vs Exp: {ReferenceSequenceCommon.IndexStartTag}"); - } - - int numEntries = _reader.ReadInt32(); - - var indexEntries = new IndexEntry[numEntries]; - - for (var i = 0; i < numEntries; i++) - { - ushort refIndex = _reader.ReadUInt16(); - long fileOffset = _reader.ReadInt64(); - indexEntries[i] = new IndexEntry(refIndex, fileOffset); - - _refIndexToIndex[refIndex] = i; - } - - return indexEntries; - } - - private void AddReferenceName(IChromosome chromosome) - { - if (!string.IsNullOrEmpty(chromosome.UcscName)) RefNameToChromosome[chromosome.UcscName] = chromosome; - if (!string.IsNullOrEmpty(chromosome.EnsemblName)) RefNameToChromosome[chromosome.EnsemblName] = chromosome; - if (!string.IsNullOrEmpty(chromosome.RefSeqAccession)) RefNameToChromosome[chromosome.RefSeqAccession] = chromosome; - if (!string.IsNullOrEmpty(chromosome.GenBankAccession)) RefNameToChromosome[chromosome.GenBankAccession] = chromosome; - RefIndexToChromosome[chromosome.Index] = chromosome; - } - } +using System; +using System.Collections.Generic; +using System.IO; +using ErrorHandling.Exceptions; +using Genome; +using Intervals; +using IO; + +namespace VariantAnnotation.Sequence +{ + public sealed class CompressedSequenceReader : IDisposable + { + public readonly Dictionary RefNameToChromosome = new Dictionary(); + public readonly Dictionary RefIndexToChromosome = new Dictionary(); + private readonly Dictionary _refIndexToIndex = new Dictionary(); + + private readonly IndexEntry[] _indexEntries; + public readonly CompressedSequence Sequence = new CompressedSequence(); + + public ushort NumRefSeqs { get; private set; } + + public const int MaxShift = 6; + + private readonly ExtendedBinaryReader _reader; + private readonly Stream _stream; + + public GenomeAssembly Assembly => Sequence.Assembly; + public byte PatchLevel; // we'll use this in future version providers + + public CompressedSequenceReader(Stream stream) + { + _stream = stream; + _reader = new ExtendedBinaryReader(stream); + + CheckHeaderVersion(); + LoadHeader(); + GetChromosomes(); + _indexEntries = LoadIndex(); + } + + public void Dispose() + { + _reader?.Dispose(); + _stream?.Dispose(); + } + + private void CheckHeaderVersion() + { + string headerTag = _reader.ReadString(); + int headerVersion = _reader.ReadInt32(); + + if (headerTag != ReferenceSequenceCommon.HeaderTag || headerVersion != ReferenceSequenceCommon.HeaderVersion) + { + throw new InvalidFileFormatException($"The header identifiers do not match the expected values: Obs: {headerTag} {headerVersion} vs Exp: {ReferenceSequenceCommon.HeaderTag} {ReferenceSequenceCommon.HeaderVersion}"); + } + } + + public void GetCompressedSequence(IChromosome chromosome) + { + if (chromosome.IsEmpty() || !_refIndexToIndex.TryGetValue(chromosome.Index, out int index)) return; + + var indexEntry = _indexEntries[index]; + _stream.Position = indexEntry.FileOffset; + + uint tag = _reader.ReadUInt32(); + + if (tag != ReferenceSequenceCommon.ReferenceStartTag) + { + throw new InvalidDataException($"The reference start tag does not match the expected values: Obs: {tag} vs Exp: {ReferenceSequenceCommon.ReferenceStartTag}"); + } + + int uncompressedBufferSize = _reader.ReadInt32(); + int compressedBufferSize = _reader.ReadInt32(); + + var buffer = CompressionBlock.Read(_stream, uncompressedBufferSize, compressedBufferSize); + var reader = new MemoryBufferBinaryReader(buffer); + + var metadata = GetMetadata(reader); + var twoBitBuffer = GetTwoBitBuffer(reader); + var maskedEntryIntervalArray = GetMaskedEntries(reader); + var cytogeneticBands = GetCytogeneticBands(reader); + + Sequence.Set(metadata.NumBases, metadata.SequenceOffset, twoBitBuffer, maskedEntryIntervalArray, + cytogeneticBands); + } + + private (int SequenceOffset, int NumBases) GetMetadata(MemoryBufferBinaryReader reader) + { + int sequenceOffset = reader.ReadOptInt32(); + int numBases = reader.ReadOptInt32(); + return (sequenceOffset, numBases); + } + + private static Band[] GetCytogeneticBands(MemoryBufferBinaryReader reader) + { + int numBands = reader.ReadOptInt32(); + var bands = new Band[numBands]; + + for (var i = 0; i < numBands; i++) + { + int begin = reader.ReadOptInt32(); + int end = reader.ReadOptInt32(); + string name = reader.ReadAsciiString(); + + bands[i] = new Band(begin, end, name); + } + + return bands; + } + + private static IntervalArray GetMaskedEntries(MemoryBufferBinaryReader reader) + { + int numEntries = reader.ReadOptInt32(); + var maskedEntries = new Interval[numEntries]; + + for (var i = 0; i < numEntries; i++) + { + int begin = reader.ReadOptInt32(); + int end = reader.ReadOptInt32(); + + maskedEntries[i] = new Interval(begin, end, new MaskedEntry(begin, end)); + } + + return new IntervalArray(maskedEntries); + } + + private static byte[] GetTwoBitBuffer(MemoryBufferBinaryReader reader) + { + int numBytes = reader.ReadOptInt32(); + return reader.ReadBytes(numBytes); + } + + private void LoadHeader() + { + Sequence.Assembly = (GenomeAssembly)_reader.ReadByte(); + PatchLevel = _reader.ReadByte(); + NumRefSeqs = (ushort)_reader.ReadOptInt32(); + } + + private IChromosome[] GetChromosomes() + { + var chromosomes = new IChromosome[NumRefSeqs]; + for (var i = 0; i < NumRefSeqs; i++) + { + var chromosome = ChromosomeExtensions.Read(_reader); + AddReferenceName(chromosome); + chromosomes[i] = chromosome; + } + return chromosomes; + } + + private IndexEntry[] LoadIndex() + { + uint tag = _reader.ReadUInt32(); + + if (tag != ReferenceSequenceCommon.IndexStartTag) + { + throw new InvalidDataException($"The index start tag does not match the expected values: Obs: {tag} vs Exp: {ReferenceSequenceCommon.IndexStartTag}"); + } + + int numEntries = _reader.ReadInt32(); + + var indexEntries = new IndexEntry[numEntries]; + + for (var i = 0; i < numEntries; i++) + { + ushort refIndex = _reader.ReadUInt16(); + long fileOffset = _reader.ReadInt64(); + indexEntries[i] = new IndexEntry(refIndex, fileOffset); + + _refIndexToIndex[refIndex] = i; + } + + return indexEntries; + } + + private void AddReferenceName(IChromosome chromosome) + { + if (!string.IsNullOrEmpty(chromosome.UcscName)) RefNameToChromosome[chromosome.UcscName] = chromosome; + if (!string.IsNullOrEmpty(chromosome.EnsemblName)) RefNameToChromosome[chromosome.EnsemblName] = chromosome; + if (!string.IsNullOrEmpty(chromosome.RefSeqAccession)) RefNameToChromosome[chromosome.RefSeqAccession] = chromosome; + if (!string.IsNullOrEmpty(chromosome.GenBankAccession)) RefNameToChromosome[chromosome.GenBankAccession] = chromosome; + RefIndexToChromosome[chromosome.Index] = chromosome; + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Sequence/SequenceHelper.cs b/VariantAnnotation/Sequence/SequenceHelper.cs index 17397c56..c6345754 100644 --- a/VariantAnnotation/Sequence/SequenceHelper.cs +++ b/VariantAnnotation/Sequence/SequenceHelper.cs @@ -1,29 +1,29 @@ -using System.Collections.Generic; -using Genome; -using IO; - -namespace VariantAnnotation.Sequence -{ - public static class SequenceHelper - { - private static CompressedSequenceReader GetSequenceReader(string referencePath) => - new CompressedSequenceReader(PersistentStreamUtils.GetReadStream(referencePath)); - - public static (IDictionary refIndexToChromosome, IDictionary - refNameToChromosome, int numRefSeqs) GetDictionaries(string referencePath) - { - IDictionary refIndexToChromosome; - IDictionary refNameToChromosome; - int numRefSeqs; - - using (var reader = GetSequenceReader(referencePath)) - { - refIndexToChromosome = reader.RefIndexToChromosome; - refNameToChromosome = reader.RefNameToChromosome; - numRefSeqs = reader.NumRefSeqs; - } - - return (refIndexToChromosome, refNameToChromosome, numRefSeqs); - } - } -} +using System.Collections.Generic; +using Genome; +using IO; + +namespace VariantAnnotation.Sequence +{ + public static class SequenceHelper + { + private static CompressedSequenceReader GetSequenceReader(string referencePath) => + new CompressedSequenceReader(PersistentStreamUtils.GetReadStream(referencePath)); + + public static (IDictionary refIndexToChromosome, IDictionary + refNameToChromosome, int numRefSeqs) GetDictionaries(string referencePath) + { + IDictionary refIndexToChromosome; + IDictionary refNameToChromosome; + int numRefSeqs; + + using (var reader = GetSequenceReader(referencePath)) + { + refIndexToChromosome = reader.RefIndexToChromosome; + refNameToChromosome = reader.RefNameToChromosome; + numRefSeqs = reader.NumRefSeqs; + } + + return (refIndexToChromosome, refNameToChromosome, numRefSeqs); + } + } +} diff --git a/VariantAnnotation/TranscriptAnnotation/FlankingTranscriptAnnotator.cs b/VariantAnnotation/TranscriptAnnotation/FlankingTranscriptAnnotator.cs index 5d2870ac..e5cf98d9 100644 --- a/VariantAnnotation/TranscriptAnnotation/FlankingTranscriptAnnotator.cs +++ b/VariantAnnotation/TranscriptAnnotation/FlankingTranscriptAnnotator.cs @@ -1,19 +1,19 @@ -using VariantAnnotation.AnnotatedPositions.Consequence; -using VariantAnnotation.AnnotatedPositions.Transcript; -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.TranscriptAnnotation -{ - public static class FlankingTranscriptAnnotator - { - public static IAnnotatedTranscript GetAnnotatedTranscript(int variantEnd, ITranscript transcript) - { - var isDownStream = variantEnd < transcript.Start == transcript.Gene.OnReverseStrand; - var consequence = new Consequences(); - - consequence.DetermineFlankingVariantEffects(isDownStream); - return new AnnotatedTranscript(transcript, null, null, null, null, null, null, null, null, null, - consequence.GetConsequences(), null, false); - } - } +using VariantAnnotation.AnnotatedPositions.Consequence; +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.TranscriptAnnotation +{ + public static class FlankingTranscriptAnnotator + { + public static IAnnotatedTranscript GetAnnotatedTranscript(int variantEnd, ITranscript transcript) + { + var isDownStream = variantEnd < transcript.Start == transcript.Gene.OnReverseStrand; + var consequence = new Consequences(); + + consequence.DetermineFlankingVariantEffects(isDownStream); + return new AnnotatedTranscript(transcript, null, null, null, null, null, null, null, null, null, + consequence.GetConsequences(), null, false); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/TranscriptAnnotation/FullTranscriptAnnotator.cs b/VariantAnnotation/TranscriptAnnotation/FullTranscriptAnnotator.cs index 84d195be..f3588b24 100644 --- a/VariantAnnotation/TranscriptAnnotation/FullTranscriptAnnotator.cs +++ b/VariantAnnotation/TranscriptAnnotation/FullTranscriptAnnotator.cs @@ -1,238 +1,258 @@ -using System; -using System.Collections.Generic; -using Genome; -using Intervals; -using VariantAnnotation.Algorithms; -using VariantAnnotation.AnnotatedPositions; -using VariantAnnotation.AnnotatedPositions.Consequence; -using VariantAnnotation.AnnotatedPositions.Transcript; -using VariantAnnotation.Caches.DataStructures; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.Caches; -using Variants; - -namespace VariantAnnotation.TranscriptAnnotation -{ - public static class FullTranscriptAnnotator - { - public static IAnnotatedTranscript GetAnnotatedTranscript(ITranscript transcript, IVariant leftShiftedVariant, - ISequence refSequence, IPredictionCache siftCache, IPredictionCache polyphenCache, AminoAcids aminoAcids) - { - var rightShiftedVariant = VariantRotator.Right(leftShiftedVariant, transcript, refSequence, - transcript.Gene.OnReverseStrand); - - var leftAnnotation = AnnotateTranscript(transcript, leftShiftedVariant, aminoAcids, refSequence); - - var rightAnnotation = ReferenceEquals(leftShiftedVariant, rightShiftedVariant) - ? leftAnnotation - : AnnotateTranscript(transcript, rightShiftedVariant, aminoAcids, refSequence); - - var consequences = GetConsequences(transcript, leftShiftedVariant, leftAnnotation.VariantEffect); - - var refAllele = rightAnnotation.TranscriptRefAllele; - var altAllele = rightAnnotation.TranscriptAltAllele; - string hgvsCoding = HgvsCodingNomenclature.GetHgvscAnnotation(transcript, rightShiftedVariant, refSequence, - rightAnnotation.Position.RegionStartIndex, rightAnnotation.Position.RegionEndIndex, - refAllele, altAllele); - - string hgvsProtein = HgvsProteinNomenclature.GetHgvsProteinAnnotation(transcript, - rightAnnotation.RefAminoAcids, rightAnnotation.AltAminoAcids, rightAnnotation.TranscriptAltAllele, - rightAnnotation.Position, rightAnnotation.VariantEffect, rightShiftedVariant, refSequence, hgvsCoding, - leftShiftedVariant.Chromosome.UcscName == "chrM"); - - (PredictionScore sift, PredictionScore polyPhen) = GetPredictionScores(leftAnnotation.Position, leftAnnotation.RefAminoAcids, - leftAnnotation.AltAminoAcids, siftCache, polyphenCache, transcript.SiftIndex, transcript.PolyPhenIndex); - - return new AnnotatedTranscript(transcript, leftAnnotation.RefAminoAcids, leftAnnotation.AltAminoAcids, - leftAnnotation.RefCodons, leftAnnotation.AltCodons, leftAnnotation.Position, hgvsCoding, hgvsProtein, - sift, polyPhen, consequences, null, false); - } - - private static (VariantEffect VariantEffect, IMappedPosition Position, string RefAminoAcids, string - AltAminoAcids, string RefCodons, string AltCodons, string TranscriptAltAllele, string TranscriptRefAllele) AnnotateTranscript(ITranscript transcript, ISimpleVariant variant, AminoAcids aminoAcids, ISequence refSequence) - { - bool onReverseStrand = transcript.Gene.OnReverseStrand; - (int startIndex, ITranscriptRegion startRegion) = MappedPositionUtilities.FindRegion(transcript.TranscriptRegions, variant.Start); - (int endIndex, ITranscriptRegion endRegion) = MappedPositionUtilities.FindRegion(transcript.TranscriptRegions, variant.End); - - - var position = GetMappedPosition(transcript.TranscriptRegions, startRegion, startIndex, endRegion, - endIndex, variant, onReverseStrand, transcript.Translation?.CodingRegion, transcript.StartExonPhase, - variant.Type == VariantType.insertion); - - var codingSequence = GetCodingSequence(transcript, refSequence); - var cdnaSequence = GetCdnaSequence(transcript, refSequence); - - string transcriptAltAllele = HgvsUtilities.GetTranscriptAllele(variant.AltAllele, onReverseStrand); - - var codons = Codons.GetCodons(transcriptAltAllele, position.CdsStart, position.CdsEnd, position.ProteinStart, position.ProteinEnd, codingSequence); - - var aa = aminoAcids.Translate(codons.Reference, codons.Alternate); - (aa, position.ProteinStart, position.ProteinEnd) = TryTrimAminoAcidsAndUpdateProteinPositions(aa, position.ProteinStart, position.ProteinEnd); - - (position.CoveredCdnaStart, position.CoveredCdnaEnd) = transcript.TranscriptRegions.GetCoveredCdnaPositions(position.CdnaStart, startIndex, position.CdnaEnd, endIndex, onReverseStrand); - (position.CoveredCdsStart, position.CoveredCdsEnd, position.CoveredProteinStart, position.CoveredProteinEnd) = MappedPositionUtilities.GetCoveredCdsAndProteinPositions(position.CoveredCdnaStart, position.CoveredCdnaEnd, transcript.StartExonPhase, transcript.Translation?.CodingRegion); - - var transcriptRefAllele = GetTranscriptRefAllele(position, cdnaSequence, variant, onReverseStrand); - - SequenceChange coveredAa; - - // only generate the covered version of ref & alt alleles when CDS start/end is -1 - if (position.CdsStart == -1 || position.CdsEnd == -1) - { - coveredAa = GetCoveredAa(aminoAcids, transcriptAltAllele, position.CoveredCdsStart, position.CoveredCdsEnd, position.CoveredProteinStart, position.CoveredProteinEnd, codingSequence); - (coveredAa, position.CoveredProteinStart, position.CoveredProteinEnd) = TryTrimAminoAcidsAndUpdateProteinPositions(coveredAa, position.CoveredProteinStart, position.CoveredProteinEnd); - } - else - { - coveredAa = aa; - position.CoveredProteinStart = position.ProteinStart; - position.CoveredProteinEnd = position.ProteinEnd; - } - - var positionalEffect = GetPositionalEffect(transcript, variant, position, aa.Reference, aa.Alternate, - position.CoveredCdnaStart, position.CoveredCdnaEnd, position.CoveredCdsStart, position.CoveredCdsEnd); - - var variantEffect = new VariantEffect(positionalEffect, variant, transcript, aa.Reference, aa.Alternate, - codons.Reference, codons.Alternate, position.ProteinStart, coveredAa.Reference, coveredAa.Alternate); - - return (variantEffect, position, aa.Reference, aa.Alternate, codons.Reference, codons.Alternate, transcriptAltAllele, transcriptRefAllele); - } - - private static string GetTranscriptRefAllele(IMappedPosition position, ISequence cdnaSequence, ISimpleVariant variant, - bool onReverseStrand) - { - try - { - var variantRef = HgvsUtilities.GetTranscriptAllele(variant.RefAllele, onReverseStrand); - if (position == null || cdnaSequence == null) return variantRef; - var start = position.CoveredCdnaStart; - var end = position.CoveredCdnaEnd; - if (start == -1 && end == -1) return variantRef; - if (start != -1 && end != -1 && end < start) Swap.Int(ref start, ref end); - - return cdnaSequence.Substring(start - 1, end - start + 1); - } - catch (Exception e) - { - Console.WriteLine($"cdna start:{position.CdnaStart}, cdna end: {position.CdnaEnd}"); - Console.WriteLine(e); - throw; - } - - } - - private static string GetCodingFromCdna(ICodingRegion codingRegion, ISequence cdnaSequence) - { - if (codingRegion == null) return null; - return cdnaSequence.Substring(codingRegion.CdnaStart - 1, codingRegion.CdnaEnd- codingRegion.CdnaStart + 1); - } - - internal static (SequenceChange AaChange, int ProteinStart, int ProteinEnd) TryTrimAminoAcidsAndUpdateProteinPositions(SequenceChange aaChange, int proteinStart, int proteinEnd) - { - (int newStart, string newReference, string newAlternate) = BiDirectionalTrimmer.Trim(proteinStart, aaChange.Reference, aaChange.Alternate); - - return string.IsNullOrEmpty(newReference) ? (aaChange, proteinStart, proteinEnd) : - (new SequenceChange(newReference, newAlternate), newStart, newStart + newReference.Length - 1); - } - - private static SequenceChange GetCoveredAa(AminoAcids aminoAcids, string transcriptAltAllele, int coveredCdsStart, int coveredCdsEnd, int coveredProteinStart, int coveredProteinEnd, ISequence codingSequence) - { - var codonsChange = Codons.GetCodons(transcriptAltAllele, coveredCdsStart, coveredCdsEnd, coveredProteinStart, coveredProteinEnd, codingSequence); - return aminoAcids.Translate(codonsChange.Reference, codonsChange.Alternate); - } - - private static ISequence GetCodingSequence(ITranscript transcript, ISequence refSequence) - { - if (transcript.Translation == null) return null; - - return transcript.CodingSequence ?? (transcript.CodingSequence = new CodingSequence(refSequence, - transcript.Translation.CodingRegion, transcript.TranscriptRegions, - transcript.Gene.OnReverseStrand, transcript.StartExonPhase, transcript.RnaEdits)); - } - - private static ISequence GetCdnaSequence(ITranscript transcript, ISequence refSequence) - { - return transcript.CdnaSequence ?? (transcript.CdnaSequence = new CdnaSequence(refSequence, - transcript.Translation?.CodingRegion, transcript.TranscriptRegions, - transcript.Gene.OnReverseStrand, transcript.RnaEdits)); - } - - private static IMappedPosition GetMappedPosition(ITranscriptRegion[] regions, ITranscriptRegion startRegion, - int startIndex, ITranscriptRegion endRegion, int endIndex, IInterval variant, bool onReverseStrand, - ICodingRegion codingRegion, byte startExonPhase, bool isInsertion) - { - (int cdnaStart, int cdnaEnd) = MappedPositionUtilities.GetCdnaPositions(startRegion, endRegion, variant, onReverseStrand, isInsertion); - if (onReverseStrand) Swap.Int(ref cdnaStart, ref cdnaEnd); - - (int cdsStart, int cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, cdnaStart, cdnaEnd, - startExonPhase, isInsertion); - - int proteinStart = MappedPositionUtilities.GetProteinPosition(cdsStart); - int proteinEnd = MappedPositionUtilities.GetProteinPosition(cdsEnd); - - (int exonStart, int exonEnd, int intronStart, int intronEnd) = regions.GetExonsAndIntrons(startIndex, endIndex); - - return new MappedPosition(cdnaStart, cdnaEnd, cdsStart, cdsEnd, proteinStart, proteinEnd, exonStart, - exonEnd, intronStart, intronEnd, startIndex, endIndex); - } - - private static TranscriptPositionalEffect GetPositionalEffect(ITranscript transcript, ISimpleVariant variant, - IMappedPosition position, string refAminoAcid, string altAminoAcid, int coveredCdnaStart, - int coveredCdnaEnd, int coveredCdsStart, int coveredCdsEnd) - { - bool startCodonInsertionWithNoImpact = variant.Type == VariantType.insertion && - position.ProteinStart <= 1 && - altAminoAcid.EndsWith(refAminoAcid); - - var positionalEffect = new TranscriptPositionalEffect(); - positionalEffect.DetermineIntronicEffect(transcript.TranscriptRegions, variant, variant.Type); - positionalEffect.DetermineExonicEffect(transcript, variant, position, coveredCdnaStart, coveredCdnaEnd, - coveredCdsStart, coveredCdsEnd, variant.AltAllele, startCodonInsertionWithNoImpact); - return positionalEffect; - } - - private static List GetConsequences(IInterval transcript, IVariant variant, - IVariantEffect variantEffect) - { - var featureEffect = new FeatureVariantEffects(transcript, variant.Type, variant, - variant.Behavior.StructuralVariantConsequence); - - var consequence = new Consequences(variantEffect, featureEffect); - consequence.DetermineSmallVariantEffects(); - return consequence.GetConsequences(); - } - - private static (PredictionScore Sift, PredictionScore PolyPhen) GetPredictionScores(IMappedPosition position, - string refAminoAcid, string altAminoAcid, IPredictionCache siftCache, IPredictionCache polyphenCache, - int siftIndex, int polyphenIndex) - { - if (!NeedPredictionScore(position.ProteinStart, position.ProteinEnd, refAminoAcid, altAminoAcid) || - position.ProteinStart == -1) return (null, null); - - char newAminoAcid = altAminoAcid[0]; - var siftScore = GetPredictionScore(position.ProteinStart, newAminoAcid, siftCache, siftIndex); - var polyphenScore = GetPredictionScore(position.ProteinStart, newAminoAcid, polyphenCache, polyphenIndex); - return (siftScore, polyphenScore); - } - - private static bool NeedPredictionScore(int proteinStart, int proteinEnd, string referenceAminoAcids, - string alternateAminoAcids) - { - return proteinStart != -1 && - proteinEnd != -1 && - proteinStart == proteinEnd && - referenceAminoAcids.Length == 1 && - alternateAminoAcids.Length == 1 && - referenceAminoAcids != alternateAminoAcids; - } - - private static PredictionScore GetPredictionScore(int proteinPosition, char newAminoAcid, - IPredictionCache predictionCache, int predictionIndex) - { - return predictionIndex == -1 - ? null - : predictionCache?.GetProteinFunctionPrediction(predictionIndex, newAminoAcid, proteinPosition); - } - } +using System; +using System.Collections.Generic; +using Genome; +using Intervals; +using VariantAnnotation.Algorithms; +using VariantAnnotation.AnnotatedPositions; +using VariantAnnotation.AnnotatedPositions.AminoAcids; +using VariantAnnotation.AnnotatedPositions.Consequence; +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Caches.DataStructures; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.Caches; +using Variants; + +namespace VariantAnnotation.TranscriptAnnotation +{ + public static class FullTranscriptAnnotator + { + public static IAnnotatedTranscript GetAnnotatedTranscript(ITranscript transcript, IVariant leftShiftedVariant, + ISequence refSequence, IPredictionCache siftCache, IPredictionCache polyphenCache, AminoAcid aminoAcids) + { + var rightShiftedVariant = VariantRotator.Right(leftShiftedVariant, transcript, refSequence, + transcript.Gene.OnReverseStrand); + + var cdnaSequence = GetCdnaSequence(transcript, refSequence); + ReadOnlySpan extendedCds = + GetExtendedCodingSequence(cdnaSequence, transcript.Translation?.CodingRegion); + + var leftAnnotation = AnnotateTranscript(transcript, leftShiftedVariant, aminoAcids, cdnaSequence, extendedCds); + + var rightAnnotation = ReferenceEquals(leftShiftedVariant, rightShiftedVariant) + ? leftAnnotation + : AnnotateTranscript(transcript, rightShiftedVariant, aminoAcids, cdnaSequence, extendedCds); + + List consequences = GetConsequences(transcript, leftShiftedVariant, leftAnnotation.VariantEffect); + + string refAllele = rightAnnotation.TranscriptRefAllele; + string altAllele = rightAnnotation.TranscriptAltAllele; + string hgvsCoding = HgvsCodingNomenclature.GetHgvscAnnotation(transcript, rightShiftedVariant, refSequence, + rightAnnotation.Position.RegionStartIndex, rightAnnotation.Position.RegionEndIndex, + refAllele, altAllele); + + string hgvsProtein = HgvsProtein.GetHgvsProteinAnnotation(transcript.Translation?.ProteinId.WithVersion, + hgvsCoding, extendedCds, transcript.Translation?.PeptideSeq, rightAnnotation.Position.CdsStart, + rightAnnotation.Position.ExtendedCdsEnd, rightAnnotation.Position.ProteinStart, rightAnnotation.RefAminoAcids, + rightAnnotation.AltAminoAcids, rightAnnotation.TranscriptAltAllele, + leftShiftedVariant.Type == VariantType.reference, aminoAcids); + + // string hgvsProtein = HgvsProteinNomenclature.GetHgvsProteinAnnotation(transcript, + // rightAnnotation.RefAminoAcids, rightAnnotation.AltAminoAcids, rightAnnotation.TranscriptAltAllele, + // rightAnnotation.Position, rightAnnotation.VariantEffect, rightShiftedVariant, refSequence, hgvsCoding, + // aminoAcids); + + (PredictionScore sift, PredictionScore polyPhen) = GetPredictionScores(leftAnnotation.Position, leftAnnotation.RefAminoAcids, + leftAnnotation.AltAminoAcids, siftCache, polyphenCache, transcript.SiftIndex, transcript.PolyPhenIndex); + + return new AnnotatedTranscript(transcript, leftAnnotation.RefAminoAcids, leftAnnotation.AltAminoAcids, + leftAnnotation.RefCodons, leftAnnotation.AltCodons, leftAnnotation.Position, hgvsCoding, hgvsProtein, + sift, polyPhen, consequences, null, false); + } + + private static ReadOnlySpan GetExtendedCodingSequence(ISequence cdnaSequence, ICodingRegion codingRegion) + { + ReadOnlySpan cdnaSpan = cdnaSequence.Sequence.AsSpan(); + return codingRegion == null ? cdnaSpan : cdnaSpan.Slice(codingRegion.CdnaStart - 1); + } + + private static (VariantEffect VariantEffect, IMappedPosition Position, string RefAminoAcids, string + AltAminoAcids, string RefCodons, string AltCodons, string TranscriptAltAllele, string TranscriptRefAllele, + bool WithinGap) + AnnotateTranscript(ITranscript transcript, ISimpleVariant variant, AminoAcid aminoAcids, + ISequence cdnaSequence, ReadOnlySpan codingSequence) + { + bool onReverseStrand = transcript.Gene.OnReverseStrand; + (int startIndex, ITranscriptRegion startRegion) = + MappedPositionUtilities.FindRegion(transcript.TranscriptRegions, variant.Start); + (int endIndex, ITranscriptRegion endRegion) = + MappedPositionUtilities.FindRegion(transcript.TranscriptRegions, variant.End); + + bool withinGap = startRegion != null && + endRegion != null && + startRegion.Type == TranscriptRegionType.Gap && + endRegion.Type == TranscriptRegionType.Gap && + startRegion.Id == endRegion.Id; + + var position = GetMappedPosition(transcript.TranscriptRegions, startRegion, startIndex, endRegion, endIndex, + variant, onReverseStrand, transcript.Translation?.CodingRegion, transcript.StartExonPhase, + variant.Type == VariantType.insertion); + + string transcriptAltAllele = HgvsUtilities.GetTranscriptAllele(variant.AltAllele, onReverseStrand); + + (string referenceCodons, string alternateCodons) = Codons.GetCodons(transcriptAltAllele, position.CdsStart, + position.ExtendedCdsEnd, position.ProteinStart, position.ExtendedProteinEnd, codingSequence); + + (string referenceAminoAcids, string alternateAminoAcids) = aminoAcids.Translate(referenceCodons, + alternateCodons, transcript.AminoAcidEdits, position.ProteinStart); + + (referenceAminoAcids, alternateAminoAcids, position.ProteinStart, position.ProteinEnd) = + TryTrimAminoAcidsAndUpdateProteinPositions(referenceAminoAcids, alternateAminoAcids, + position.ProteinStart, position.ProteinEnd); + + (position.CoveredCdnaStart, position.CoveredCdnaEnd) = + transcript.TranscriptRegions.GetCoveredCdnaPositions(position.CdnaStart, startIndex, position.CdnaEnd, + endIndex, onReverseStrand); + + (position.CoveredCdsStart, position.CoveredCdsEnd, position.CoveredProteinStart, + position.CoveredProteinEnd) = MappedPositionUtilities.GetCoveredCdsAndProteinPositions( + position.CoveredCdnaStart, position.CoveredCdnaEnd, transcript.StartExonPhase, + transcript.Translation?.CodingRegion); + + string transcriptRefAllele = HgvsUtilities.AdjustTranscriptRefAllele( + HgvsUtilities.GetTranscriptAllele(variant.RefAllele, onReverseStrand), position.CoveredCdnaStart, + position.CoveredCdnaEnd, cdnaSequence); + + // only generate the covered version of ref & alt alleles when CDS start/end is -1 + string coveredRefAa; + string coveredAltAa; + + if (position.CdsStart == -1 || position.CdsEnd == -1) + { + (string coveredRefCodon, string coveredAltCodon) = Codons.GetCodons(transcriptAltAllele, + position.CoveredCdsStart, position.CoveredCdsEnd, position.CoveredProteinStart, + position.CoveredProteinEnd, codingSequence); + + (coveredRefAa, coveredAltAa) = aminoAcids.Translate(coveredRefCodon, coveredAltCodon, + transcript.AminoAcidEdits, position.ProteinStart); + + (coveredRefAa, coveredAltAa, position.CoveredProteinStart, position.CoveredProteinEnd) = + TryTrimAminoAcidsAndUpdateProteinPositions(coveredRefAa, coveredAltAa, position.CoveredProteinStart, + position.CoveredProteinEnd); + } + else + { + coveredRefAa = referenceAminoAcids; + coveredAltAa = alternateAminoAcids; + position.CoveredProteinStart = position.ProteinStart; + position.CoveredProteinEnd = position.ProteinEnd; + } + + var positionalEffect = GetPositionalEffect(transcript, variant, position, referenceAminoAcids, + alternateAminoAcids, position.CoveredCdnaStart, position.CoveredCdnaEnd, position.CoveredCdsStart, + position.CoveredCdsEnd); + + var variantEffect = new VariantEffect(positionalEffect, variant, transcript, referenceAminoAcids, + alternateAminoAcids, referenceCodons, alternateCodons, position.ProteinStart, coveredRefAa, + coveredAltAa); + + return (variantEffect, position, referenceAminoAcids, alternateAminoAcids, referenceCodons, alternateCodons, + transcriptAltAllele, transcriptRefAllele, withinGap); + } + + internal static (string ReferenceAminoAcids, string AlternateAminoAcids, int ProteinStart, int ProteinEnd) + TryTrimAminoAcidsAndUpdateProteinPositions(string referenceAminoAcids, string alternateAminoAcids, int proteinStart, int proteinEnd) + { + (int newStart, string newReference, string newAlternate) = + BiDirectionalTrimmer.Trim(proteinStart, referenceAminoAcids, alternateAminoAcids); + + return string.IsNullOrEmpty(newReference) + ? (referenceAminoAcids, alternateAminoAcids, proteinStart, proteinEnd) + : (newReference, newAlternate, newStart, newStart + newReference.Length - 1); + } + + private static ISequence GetCdnaSequence(ITranscript transcript, ISequence refSequence) + { + return transcript.CdnaSequence ?? (transcript.CdnaSequence = new CdnaSequence(refSequence, + transcript.Translation?.CodingRegion, transcript.TranscriptRegions, + transcript.Gene.OnReverseStrand, transcript.RnaEdits)); + } + + private static IMappedPosition GetMappedPosition(ITranscriptRegion[] regions, ITranscriptRegion startRegion, + int startIndex, ITranscriptRegion endRegion, int endIndex, IInterval variant, bool onReverseStrand, + ICodingRegion codingRegion, byte startExonPhase, bool isInsertion) + { + (int cdnaStart, int cdnaEnd) = isInsertion + ? MappedPositionUtilities.GetInsertionCdnaPositions(startRegion, endRegion, variant.Start, variant.End, + onReverseStrand) + : MappedPositionUtilities.GetCdnaPositions(startRegion, endRegion, variant.Start, variant.End, + onReverseStrand); + if (onReverseStrand) Swap.Int(ref cdnaStart, ref cdnaEnd); + + (int cdsStart, int cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, cdnaStart, cdnaEnd, + startExonPhase, isInsertion); + + int extendedCdsEnd = cdsStart != -1 && cdsEnd == -1 + ? MappedPositionUtilities.GetExtendedCdsPosition(codingRegion.CdnaStart, cdnaEnd, startExonPhase) + : cdsEnd; + + int proteinStart = MappedPositionUtilities.GetProteinPosition(cdsStart); + int proteinEnd = MappedPositionUtilities.GetProteinPosition(cdsEnd); + int extendedProteinEnd = MappedPositionUtilities.GetProteinPosition(extendedCdsEnd); + + (int exonStart, int exonEnd, int intronStart, int intronEnd) = + regions.GetExonsAndIntrons(startIndex, endIndex); + + return new MappedPosition(cdnaStart, cdnaEnd, cdsStart, cdsEnd, extendedCdsEnd, proteinStart, proteinEnd, extendedProteinEnd, exonStart, + exonEnd, intronStart, intronEnd, startIndex, endIndex); + } + + private static TranscriptPositionalEffect GetPositionalEffect(ITranscript transcript, ISimpleVariant variant, + IMappedPosition position, string refAminoAcid, string altAminoAcid, int coveredCdnaStart, + int coveredCdnaEnd, int coveredCdsStart, int coveredCdsEnd) + { + bool startCodonInsertionWithNoImpact = variant.Type == VariantType.insertion && + position.ProteinStart <= 1 && + altAminoAcid.EndsWith(refAminoAcid); + + var positionalEffect = new TranscriptPositionalEffect(); + positionalEffect.DetermineIntronicEffect(transcript.TranscriptRegions, variant, variant.Type); + positionalEffect.DetermineExonicEffect(transcript, variant, position, coveredCdnaStart, coveredCdnaEnd, + coveredCdsStart, coveredCdsEnd, variant.AltAllele, startCodonInsertionWithNoImpact); + return positionalEffect; + } + + // ReSharper disable once ReturnTypeCanBeEnumerable.Local + private static List GetConsequences(IInterval transcript, IVariant variant, + IVariantEffect variantEffect) + { + var featureEffect = new FeatureVariantEffects(transcript, variant.Type, variant, + variant.Behavior.StructuralVariantConsequence); + + var consequence = new Consequences(variantEffect, featureEffect); + consequence.DetermineSmallVariantEffects(); + return consequence.GetConsequences(); + } + + private static (PredictionScore Sift, PredictionScore PolyPhen) GetPredictionScores(IMappedPosition position, + string refAminoAcid, string altAminoAcid, IPredictionCache siftCache, IPredictionCache polyphenCache, + int siftIndex, int polyphenIndex) + { + if (!NeedPredictionScore(position.ProteinStart, position.ProteinEnd, refAminoAcid, altAminoAcid) || + position.ProteinStart == -1) return (null, null); + + char newAminoAcid = altAminoAcid[0]; + var siftScore = GetPredictionScore(position.ProteinStart, newAminoAcid, siftCache, siftIndex); + var polyphenScore = GetPredictionScore(position.ProteinStart, newAminoAcid, polyphenCache, polyphenIndex); + return (siftScore, polyphenScore); + } + + private static bool NeedPredictionScore(int proteinStart, int proteinEnd, string referenceAminoAcids, + string alternateAminoAcids) + { + return proteinStart != -1 && + proteinEnd != -1 && + proteinStart == proteinEnd && + referenceAminoAcids.Length == 1 && + alternateAminoAcids.Length == 1 && + referenceAminoAcids != alternateAminoAcids; + } + + private static PredictionScore GetPredictionScore(int proteinPosition, char newAminoAcid, + IPredictionCache predictionCache, int predictionIndex) + { + return predictionIndex == -1 + ? null + : predictionCache?.GetProteinFunctionPrediction(predictionIndex, newAminoAcid, proteinPosition); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/TranscriptAnnotation/GeneFusionUtilities.cs b/VariantAnnotation/TranscriptAnnotation/GeneFusionUtilities.cs index a79685a1..86d6d8c5 100644 --- a/VariantAnnotation/TranscriptAnnotation/GeneFusionUtilities.cs +++ b/VariantAnnotation/TranscriptAnnotation/GeneFusionUtilities.cs @@ -1,104 +1,104 @@ -using System.Collections.Generic; -using Genome; -using Intervals; -using VariantAnnotation.AnnotatedPositions; -using VariantAnnotation.AnnotatedPositions.Transcript; -using VariantAnnotation.Interface.AnnotatedPositions; -using Variants; - -namespace VariantAnnotation.TranscriptAnnotation -{ - public static class GeneFusionUtilities - { - internal static IGeneFusionAnnotation GetGeneFusionAnnotation(IBreakEnd[] breakEnds, ITranscript transcript, - ITranscript[] fusedTranscriptCandidates) - { - if (transcript.Translation == null || breakEnds == null || breakEnds.Length == 0) return null; - - var desiredBreakEnd = GetBreakEndWithinCodingRegion(breakEnds, transcript.Chromosome, transcript.Translation.CodingRegion); - if (desiredBreakEnd == null) return null; - - var piece1 = MappedPositionUtilities.FindRegion(transcript.TranscriptRegions, desiredBreakEnd.Piece1.Position); - int? exon = piece1.Region.Type == TranscriptRegionType.Exon ? (int?)piece1.Region.Id : null; - int? intron = piece1.Region.Type == TranscriptRegionType.Intron ? (int?)piece1.Region.Id : null; - - var piece1Hgvs = GetBreakEndHgvs(transcript, piece1.Index, desiredBreakEnd.Piece1.Position, desiredBreakEnd.Piece1.IsSuffix, true); - - var geneFusions = new List(); - foreach (var candidate in fusedTranscriptCandidates) - { - var piece2 = MappedPositionUtilities.FindRegion(candidate.TranscriptRegions, desiredBreakEnd.Piece2.Position); - var geneFusion = GetGeneFusion(transcript, candidate, desiredBreakEnd.Piece2, piece2.Index, - piece1Hgvs.Hgvs, piece1Hgvs.IsTranscriptSuffix); - - if (geneFusion != null) geneFusions.Add(geneFusion); - } - - return geneFusions.Count == 0 - ? null - : new GeneFusionAnnotation(exon, intron, geneFusions.ToArray()); - } - - private static IBreakEnd GetBreakEndWithinCodingRegion(IBreakEnd[] breakEnds, IChromosome chromosome, - IInterval codingRegion) - { - // ReSharper disable once LoopCanBeConvertedToQuery - foreach (var breakend in breakEnds) - { - var position = breakend.Piece1.Position; - if (breakend.Piece1.Chromosome != chromosome || position < codingRegion.Start || position > codingRegion.End) continue; - return breakend; - } - - return null; - } - - private static (string Hgvs, bool IsTranscriptSuffix) GetBreakEndHgvs(ITranscript transcript, int regionIndex, - int position, bool isGenomicSuffix, bool isPiece1) - { - var positionOffset = HgvsUtilities.GetCdnaPositionOffset(transcript, position, regionIndex, isPiece1); - var isTranscriptSuffix = isGenomicSuffix != transcript.Gene.OnReverseStrand; - var codingRegionLength = transcript.Translation.CodingRegion.CdnaEnd - transcript.Translation.CodingRegion.CdnaStart + 1; - var hgvsPosString = isTranscriptSuffix ? positionOffset.Value + "_" + codingRegionLength : 1 + "_" + positionOffset.Value; - - var hgvs = transcript.Gene.Symbol + "{" + transcript.Id.WithVersion + "}" + ":c." + hgvsPosString; - return (hgvs, isTranscriptSuffix); - } - - /// - /// evaluate if a candidate transcript can lead to a gene fusion if satisfy - /// -- transcript coding region do not overlap - /// -- have the same transcript source - /// -- have different gene name - /// -- breakendPosition 2 falls to coding region - /// -- unidirectional fusion with the other gene - /// - private static IGeneFusion GetGeneFusion(ITranscript transcript, ITranscript transcript2, - IBreakEndPiece bePiece2, int piece2RegionIndex, string piece1Hgvs, bool isPos1TranscriptSuffix) - { - if (SkipGeneFusion(transcript, transcript2, bePiece2)) return null; - - var region = transcript2.TranscriptRegions[piece2RegionIndex]; - var piece2Hgvs = GetBreakEndHgvs(transcript2, piece2RegionIndex, bePiece2.Position, bePiece2.IsSuffix, false); - if (piece2Hgvs.IsTranscriptSuffix == isPos1TranscriptSuffix) return null; - - int? exon = region.Type == TranscriptRegionType.Exon ? (int?)region.Id : null; - int? intron = region.Type == TranscriptRegionType.Intron ? (int?)region.Id : null; - - var hgvs = isPos1TranscriptSuffix ? piece2Hgvs.Hgvs + "_" + piece1Hgvs : piece1Hgvs + "_" + piece2Hgvs.Hgvs; - - return new GeneFusion(exon, intron, hgvs); - } - - private static bool SkipGeneFusion(ITranscript transcript, ITranscript transcript2, IBreakEndPiece piece2) - { - return transcript.Source != transcript2.Source || - transcript2.Translation == null || - transcript2.Gene.Symbol == transcript.Gene.Symbol || - transcript2.Chromosome.Index == transcript.Chromosome.Index && - transcript2.Chromosome.Index != piece2.Chromosome.Index || - transcript2.Translation.CodingRegion.Overlaps(transcript.Translation.CodingRegion) || - !transcript2.Translation.CodingRegion.Overlaps(piece2.Position, piece2.Position); - } - } -} +using System.Collections.Generic; +using Genome; +using Intervals; +using VariantAnnotation.AnnotatedPositions; +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Interface.AnnotatedPositions; +using Variants; + +namespace VariantAnnotation.TranscriptAnnotation +{ + public static class GeneFusionUtilities + { + internal static IGeneFusionAnnotation GetGeneFusionAnnotation(IBreakEnd[] breakEnds, ITranscript transcript, + ITranscript[] fusedTranscriptCandidates) + { + if (transcript.Translation == null || breakEnds == null || breakEnds.Length == 0) return null; + + var desiredBreakEnd = GetBreakEndWithinCodingRegion(breakEnds, transcript.Chromosome, transcript.Translation.CodingRegion); + if (desiredBreakEnd == null) return null; + + var piece1 = MappedPositionUtilities.FindRegion(transcript.TranscriptRegions, desiredBreakEnd.Piece1.Position); + int? exon = piece1.Region.Type == TranscriptRegionType.Exon ? (int?)piece1.Region.Id : null; + int? intron = piece1.Region.Type == TranscriptRegionType.Intron ? (int?)piece1.Region.Id : null; + + var piece1Hgvs = GetBreakEndHgvs(transcript, piece1.Index, desiredBreakEnd.Piece1.Position, desiredBreakEnd.Piece1.IsSuffix, true); + + var geneFusions = new List(); + foreach (var candidate in fusedTranscriptCandidates) + { + var piece2 = MappedPositionUtilities.FindRegion(candidate.TranscriptRegions, desiredBreakEnd.Piece2.Position); + var geneFusion = GetGeneFusion(transcript, candidate, desiredBreakEnd.Piece2, piece2.Index, + piece1Hgvs.Hgvs, piece1Hgvs.IsTranscriptSuffix); + + if (geneFusion != null) geneFusions.Add(geneFusion); + } + + return geneFusions.Count == 0 + ? null + : new GeneFusionAnnotation(exon, intron, geneFusions.ToArray()); + } + + private static IBreakEnd GetBreakEndWithinCodingRegion(IBreakEnd[] breakEnds, IChromosome chromosome, + IInterval codingRegion) + { + // ReSharper disable once LoopCanBeConvertedToQuery + foreach (var breakend in breakEnds) + { + var position = breakend.Piece1.Position; + if (breakend.Piece1.Chromosome != chromosome || position < codingRegion.Start || position > codingRegion.End) continue; + return breakend; + } + + return null; + } + + private static (string Hgvs, bool IsTranscriptSuffix) GetBreakEndHgvs(ITranscript transcript, int regionIndex, + int position, bool isGenomicSuffix, bool isPiece1) + { + var positionOffset = HgvsUtilities.GetCdnaPositionOffset(transcript, position, regionIndex, isPiece1); + var isTranscriptSuffix = isGenomicSuffix != transcript.Gene.OnReverseStrand; + var codingRegionLength = transcript.Translation.CodingRegion.CdnaEnd - transcript.Translation.CodingRegion.CdnaStart + 1; + var hgvsPosString = isTranscriptSuffix ? positionOffset.Value + "_" + codingRegionLength : 1 + "_" + positionOffset.Value; + + var hgvs = transcript.Gene.Symbol + "{" + transcript.Id.WithVersion + "}" + ":c." + hgvsPosString; + return (hgvs, isTranscriptSuffix); + } + + /// + /// evaluate if a candidate transcript can lead to a gene fusion if satisfy + /// -- transcript coding region do not overlap + /// -- have the same transcript source + /// -- have different gene name + /// -- breakendPosition 2 falls to coding region + /// -- unidirectional fusion with the other gene + /// + private static IGeneFusion GetGeneFusion(ITranscript transcript, ITranscript transcript2, + IBreakEndPiece bePiece2, int piece2RegionIndex, string piece1Hgvs, bool isPos1TranscriptSuffix) + { + if (SkipGeneFusion(transcript, transcript2, bePiece2)) return null; + + var region = transcript2.TranscriptRegions[piece2RegionIndex]; + var piece2Hgvs = GetBreakEndHgvs(transcript2, piece2RegionIndex, bePiece2.Position, bePiece2.IsSuffix, false); + if (piece2Hgvs.IsTranscriptSuffix == isPos1TranscriptSuffix) return null; + + int? exon = region.Type == TranscriptRegionType.Exon ? (int?)region.Id : null; + int? intron = region.Type == TranscriptRegionType.Intron ? (int?)region.Id : null; + + var hgvs = isPos1TranscriptSuffix ? piece2Hgvs.Hgvs + "_" + piece1Hgvs : piece1Hgvs + "_" + piece2Hgvs.Hgvs; + + return new GeneFusion(exon, intron, hgvs); + } + + private static bool SkipGeneFusion(ITranscript transcript, ITranscript transcript2, IBreakEndPiece piece2) + { + return transcript.Source != transcript2.Source || + transcript2.Translation == null || + transcript2.Gene.Symbol == transcript.Gene.Symbol || + transcript2.Chromosome.Index == transcript.Chromosome.Index && + transcript2.Chromosome.Index != piece2.Chromosome.Index || + transcript2.Translation.CodingRegion.Overlaps(transcript.Translation.CodingRegion) || + !transcript2.Translation.CodingRegion.Overlaps(piece2.Position, piece2.Position); + } + } +} diff --git a/VariantAnnotation/TranscriptAnnotation/ReducedTranscriptAnnotator.cs b/VariantAnnotation/TranscriptAnnotation/ReducedTranscriptAnnotator.cs index c5b7ebb7..72b08df5 100644 --- a/VariantAnnotation/TranscriptAnnotation/ReducedTranscriptAnnotator.cs +++ b/VariantAnnotation/TranscriptAnnotation/ReducedTranscriptAnnotator.cs @@ -1,53 +1,53 @@ -using System.Collections.Generic; -using Intervals; -using VariantAnnotation.AnnotatedPositions.Consequence; -using VariantAnnotation.AnnotatedPositions.Transcript; -using VariantAnnotation.Caches.DataStructures; -using VariantAnnotation.Interface.AnnotatedPositions; -using Variants; - -namespace VariantAnnotation.TranscriptAnnotation -{ - public static class ReducedTranscriptAnnotator - { - public static IAnnotatedTranscript GetAnnotatedTranscript(ITranscript transcript, IVariant variant, ITranscript[] geneFusionCandidates) - { - var annotation = AnnotateTranscript(transcript, variant, geneFusionCandidates); - var consequences = GetConsequences(transcript, variant, annotation.GeneFusion != null); - - return new AnnotatedTranscript(transcript, null, null, null, null, annotation.Position, null, null, null, - null, consequences, annotation.GeneFusion, false); - } - - public static IAnnotatedTranscript GetCompleteOverlapTranscript(ITranscript transcript) => - new AnnotatedTranscript(transcript, null, null, null, null, null, null, null, null, null, null, null, true); - - private static (IMappedPosition Position, IGeneFusionAnnotation GeneFusion) - AnnotateTranscript(ITranscript transcript, IVariant variant, ITranscript[] geneFusionCandidates) - { - var position = GetMappedPosition(transcript.TranscriptRegions, variant); - var geneFusionAnnotation = GeneFusionUtilities.GetGeneFusionAnnotation(variant.BreakEnds, transcript, geneFusionCandidates); - return (position, geneFusionAnnotation); - } - - private static IMappedPosition GetMappedPosition(ITranscriptRegion[] regions, IInterval variant) - { - var (startIndex, _) = MappedPositionUtilities.FindRegion(regions, variant.Start); - var (endIndex, _) = MappedPositionUtilities.FindRegion(regions, variant.End); - - var (exonStart, exonEnd, intronStart, intronEnd) = regions.GetExonsAndIntrons(startIndex, endIndex); - - return new MappedPosition(-1, -1, -1, -1, -1, -1, exonStart, exonEnd, intronStart, intronEnd, startIndex, - endIndex); - } - - private static IEnumerable GetConsequences(IInterval transcript, ISimpleVariant variant, - bool hasGeneFusionAnnotation) - { - var featureEffect = new FeatureVariantEffects(transcript, variant.Type, variant, true); - var consequence = new Consequences(null, featureEffect); - consequence.DetermineStructuralVariantEffect(variant.Type, hasGeneFusionAnnotation); - return consequence.GetConsequences(); - } - } +using System.Collections.Generic; +using Intervals; +using VariantAnnotation.AnnotatedPositions.Consequence; +using VariantAnnotation.AnnotatedPositions.Transcript; +using VariantAnnotation.Caches.DataStructures; +using VariantAnnotation.Interface.AnnotatedPositions; +using Variants; + +namespace VariantAnnotation.TranscriptAnnotation +{ + public static class ReducedTranscriptAnnotator + { + public static IAnnotatedTranscript GetAnnotatedTranscript(ITranscript transcript, IVariant variant, ITranscript[] geneFusionCandidates) + { + var annotation = AnnotateTranscript(transcript, variant, geneFusionCandidates); + var consequences = GetConsequences(transcript, variant, annotation.GeneFusion != null); + + return new AnnotatedTranscript(transcript, null, null, null, null, annotation.Position, null, null, null, + null, consequences, annotation.GeneFusion, false); + } + + public static IAnnotatedTranscript GetCompleteOverlapTranscript(ITranscript transcript) => + new AnnotatedTranscript(transcript, null, null, null, null, null, null, null, null, null, null, null, true); + + private static (IMappedPosition Position, IGeneFusionAnnotation GeneFusion) + AnnotateTranscript(ITranscript transcript, IVariant variant, ITranscript[] geneFusionCandidates) + { + var position = GetMappedPosition(transcript.TranscriptRegions, variant); + var geneFusionAnnotation = GeneFusionUtilities.GetGeneFusionAnnotation(variant.BreakEnds, transcript, geneFusionCandidates); + return (position, geneFusionAnnotation); + } + + private static IMappedPosition GetMappedPosition(ITranscriptRegion[] regions, IInterval variant) + { + var (startIndex, _) = MappedPositionUtilities.FindRegion(regions, variant.Start); + var (endIndex, _) = MappedPositionUtilities.FindRegion(regions, variant.End); + + var (exonStart, exonEnd, intronStart, intronEnd) = regions.GetExonsAndIntrons(startIndex, endIndex); + + return new MappedPosition(-1, -1, -1, -1, -1, -1, -1, -1, exonStart, exonEnd, intronStart, intronEnd, + startIndex, endIndex); + } + + private static IEnumerable GetConsequences(IInterval transcript, ISimpleVariant variant, + bool hasGeneFusionAnnotation) + { + var featureEffect = new FeatureVariantEffects(transcript, variant.Type, variant, true); + var consequence = new Consequences(null, featureEffect); + consequence.DetermineStructuralVariantEffect(variant.Type, hasGeneFusionAnnotation); + return consequence.GetConsequences(); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/TranscriptAnnotation/SequenceChange.cs b/VariantAnnotation/TranscriptAnnotation/SequenceChange.cs deleted file mode 100644 index 2f54bb62..00000000 --- a/VariantAnnotation/TranscriptAnnotation/SequenceChange.cs +++ /dev/null @@ -1,14 +0,0 @@ -namespace VariantAnnotation.TranscriptAnnotation -{ - public struct SequenceChange - { - public readonly string Reference; - public readonly string Alternate; - - public SequenceChange(string reference, string alternate) - { - Reference = reference; - Alternate = alternate; - } - } -} \ No newline at end of file diff --git a/VariantAnnotation/TranscriptAnnotation/TranscriptAnnotationFactory.cs b/VariantAnnotation/TranscriptAnnotation/TranscriptAnnotationFactory.cs index 356c881f..7b1208a8 100644 --- a/VariantAnnotation/TranscriptAnnotation/TranscriptAnnotationFactory.cs +++ b/VariantAnnotation/TranscriptAnnotation/TranscriptAnnotationFactory.cs @@ -1,96 +1,93 @@ -using System.Collections.Generic; -using Genome; -using Intervals; -using VariantAnnotation.AnnotatedPositions.Transcript; -using VariantAnnotation.Interface.AnnotatedPositions; -using VariantAnnotation.Interface.Caches; -using VariantAnnotation.Interface.Intervals; -using Variants; - -namespace VariantAnnotation.TranscriptAnnotation -{ - public static class TranscriptAnnotationFactory - { - private static readonly AminoAcids AminoAcidsProvider = new AminoAcids(false); - private static readonly AminoAcids MitoAminoAcidsProvider = new AminoAcids(true); - - public static IList GetAnnotatedTranscripts(IVariant variant, - ITranscript[] transcriptCandidates, ISequence compressedSequence, IPredictionCache siftCache, - IPredictionCache polyphenCache, ITranscript[] geneFusionCandidates = null) - { - var annotatedTranscripts = new List(); - - foreach (var transcript in transcriptCandidates) - { - var annotationStatus = DecideAnnotationStatus(variant, transcript, variant.Behavior); - - var annotatedTranscript = GetAnnotatedTranscript(variant, compressedSequence, transcript, - annotationStatus, siftCache, polyphenCache, geneFusionCandidates); - - if (annotatedTranscript != null) annotatedTranscripts.Add(annotatedTranscript); - } - - return annotatedTranscripts; - } - - private static IAnnotatedTranscript GetAnnotatedTranscript(IVariant variant, ISequence compressedSequence, - ITranscript transcript, Status annotationStatus, IPredictionCache siftCache, IPredictionCache polyphenCache, - ITranscript[] geneFusionCandidates) - { - IAnnotatedTranscript annotatedTranscript = null; - - // ReSharper disable once SwitchStatementMissingSomeCases - switch (annotationStatus) - { - case Status.FlankingAnnotation: - annotatedTranscript = - FlankingTranscriptAnnotator.GetAnnotatedTranscript(variant.End, transcript); - break; - case Status.ReducedAnnotation: - annotatedTranscript = ReducedTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, geneFusionCandidates); - break; - case Status.CompleteOverlapAnnotation: - annotatedTranscript = ReducedTranscriptAnnotator.GetCompleteOverlapTranscript(transcript); - break; - case Status.FullAnnotation: - var acidsProvider = variant.Chromosome.UcscName == "chrM" - ? MitoAminoAcidsProvider - : AminoAcidsProvider; - annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, - compressedSequence, siftCache, polyphenCache, acidsProvider); - break; - } - - return annotatedTranscript; - } - - internal static Status DecideAnnotationStatus(IInterval variant, IInterval transcript, AnnotationBehavior behavior) - { - var overlapsTranscript = variant.Overlaps(transcript); - - if (!behavior.ReducedTranscriptAnnotation) - { - // handle small variants - if (overlapsTranscript) return Status.FullAnnotation; - if (behavior.NeedFlankingTranscript && variant.Overlaps(transcript, OverlapBehavior.FlankingLength)) return Status.FlankingAnnotation; - } - else - { - // handle large variants - if (variant.Contains(transcript)) return Status.CompleteOverlapAnnotation; - if (overlapsTranscript) return Status.ReducedAnnotation; - } - - return Status.NoAnnotation; - } - - public enum Status - { - NoAnnotation, - CompleteOverlapAnnotation, - FlankingAnnotation, - ReducedAnnotation, - FullAnnotation - } - } +using System.Collections.Generic; +using Genome; +using Intervals; +using VariantAnnotation.AnnotatedPositions.AminoAcids; +using VariantAnnotation.Interface.AnnotatedPositions; +using VariantAnnotation.Interface.Caches; +using VariantAnnotation.Interface.Intervals; +using Variants; + +namespace VariantAnnotation.TranscriptAnnotation +{ + public static class TranscriptAnnotationFactory + { + public static IList GetAnnotatedTranscripts(IVariant variant, + ITranscript[] transcriptCandidates, ISequence compressedSequence, IPredictionCache siftCache, + IPredictionCache polyphenCache, ITranscript[] geneFusionCandidates = null) + { + var annotatedTranscripts = new List(); + + foreach (var transcript in transcriptCandidates) + { + var annotationStatus = DecideAnnotationStatus(variant, transcript, variant.Behavior); + + var annotatedTranscript = GetAnnotatedTranscript(variant, compressedSequence, transcript, + annotationStatus, siftCache, polyphenCache, geneFusionCandidates); + + if (annotatedTranscript != null) annotatedTranscripts.Add(annotatedTranscript); + } + + return annotatedTranscripts; + } + + private static IAnnotatedTranscript GetAnnotatedTranscript(IVariant variant, ISequence compressedSequence, + ITranscript transcript, Status annotationStatus, IPredictionCache siftCache, IPredictionCache polyphenCache, + ITranscript[] geneFusionCandidates) + { + IAnnotatedTranscript annotatedTranscript = null; + + // ReSharper disable once SwitchStatementMissingSomeCases + switch (annotationStatus) + { + case Status.FlankingAnnotation: + annotatedTranscript = + FlankingTranscriptAnnotator.GetAnnotatedTranscript(variant.End, transcript); + break; + case Status.ReducedAnnotation: + annotatedTranscript = ReducedTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, geneFusionCandidates); + break; + case Status.CompleteOverlapAnnotation: + annotatedTranscript = ReducedTranscriptAnnotator.GetCompleteOverlapTranscript(transcript); + break; + case Status.FullAnnotation: + var aminoAcids = variant.Chromosome.UcscName == "chrM" + ? AminoAcidCommon.MitochondrialAminoAcids + : AminoAcidCommon.StandardAminoAcids; + annotatedTranscript = FullTranscriptAnnotator.GetAnnotatedTranscript(transcript, variant, + compressedSequence, siftCache, polyphenCache, aminoAcids); + break; + } + + return annotatedTranscript; + } + + internal static Status DecideAnnotationStatus(IInterval variant, IInterval transcript, AnnotationBehavior behavior) + { + var overlapsTranscript = variant.Overlaps(transcript); + + if (!behavior.ReducedTranscriptAnnotation) + { + // handle small variants + if (overlapsTranscript) return Status.FullAnnotation; + if (behavior.NeedFlankingTranscript && variant.Overlaps(transcript, OverlapBehavior.FlankingLength)) return Status.FlankingAnnotation; + } + else + { + // handle large variants + if (variant.Contains(transcript)) return Status.CompleteOverlapAnnotation; + if (overlapsTranscript) return Status.ReducedAnnotation; + } + + return Status.NoAnnotation; + } + + public enum Status + { + NoAnnotation, + CompleteOverlapAnnotation, + FlankingAnnotation, + ReducedAnnotation, + FullAnnotation + } + } } \ No newline at end of file diff --git a/VariantAnnotation/Utilities/BaseFormatting.cs b/VariantAnnotation/Utilities/BaseFormatting.cs index 03909494..40f0f568 100644 --- a/VariantAnnotation/Utilities/BaseFormatting.cs +++ b/VariantAnnotation/Utilities/BaseFormatting.cs @@ -1,7 +1,7 @@ -namespace VariantAnnotation.Utilities -{ - public static class BaseFormatting - { - public static string EmptyToDash(string bases) => bases == "" ? "-" : bases; - } +namespace VariantAnnotation.Utilities +{ + public static class BaseFormatting + { + public static string EmptyToDash(string bases) => bases == "" ? "-" : bases; + } } \ No newline at end of file diff --git a/VariantAnnotation/Utilities/Date.cs b/VariantAnnotation/Utilities/Date.cs index d121f743..822b65d8 100644 --- a/VariantAnnotation/Utilities/Date.cs +++ b/VariantAnnotation/Utilities/Date.cs @@ -1,10 +1,10 @@ -using System; - -namespace VariantAnnotation.Utilities -{ - public static class Date - { - public static string CurrentTimeStamp => DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); - public static string GetDate(long ticks) => new DateTime(ticks).ToString("yyyy-MM-dd"); - } +using System; + +namespace VariantAnnotation.Utilities +{ + public static class Date + { + public static string CurrentTimeStamp => DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); + public static string GetDate(long ticks) => new DateTime(ticks).ToString("yyyy-MM-dd"); + } } \ No newline at end of file diff --git a/VariantAnnotation/Utilities/FormatUtilities.cs b/VariantAnnotation/Utilities/FormatUtilities.cs index 152dc26d..8068c26d 100644 --- a/VariantAnnotation/Utilities/FormatUtilities.cs +++ b/VariantAnnotation/Utilities/FormatUtilities.cs @@ -1,22 +1,22 @@ -using VariantAnnotation.Interface.AnnotatedPositions; - -namespace VariantAnnotation.Utilities -{ - public static class FormatUtilities - { - public static string CombineIdAndVersion(ICompactId id, byte version) => id + "." + version; - - public static (string Id, byte Version) SplitVersion(string s) - { - if (s == null) return (null, 0); - - int lastPeriodPos = s.LastIndexOf('.'); - if (lastPeriodPos == -1) return (s, 0); - - string id = s.Substring(0, lastPeriodPos); - string remaining = s.Substring(lastPeriodPos + 1); - - return !byte.TryParse(remaining, out byte version) ? (s, (byte)1) : (id, version); - } - } +using VariantAnnotation.Interface.AnnotatedPositions; + +namespace VariantAnnotation.Utilities +{ + public static class FormatUtilities + { + public static string CombineIdAndVersion(ICompactId id, byte version) => id + "." + version; + + public static (string Id, byte Version) SplitVersion(string s) + { + if (s == null) return (null, 0); + + int lastPeriodPos = s.LastIndexOf('.'); + if (lastPeriodPos == -1) return (s, 0); + + string id = s.Substring(0, lastPeriodPos); + string remaining = s.Substring(lastPeriodPos + 1); + + return !byte.TryParse(remaining, out byte version) ? (s, (byte)1) : (id, version); + } + } } \ No newline at end of file diff --git a/VariantAnnotation/VariantAnnotation.csproj b/VariantAnnotation/VariantAnnotation.csproj index 2c174126..9e1e3c62 100644 --- a/VariantAnnotation/VariantAnnotation.csproj +++ b/VariantAnnotation/VariantAnnotation.csproj @@ -1,22 +1,23 @@ - - - netcoreapp2.1 - ..\bin\$(Configuration) - Full - - - - - - - - - - - - - - - - + + + netcoreapp2.1 + ..\bin\$(Configuration) + Full + latest + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Variants/BiDirectionalTrimmer.cs b/Variants/BiDirectionalTrimmer.cs index 0024a496..4c5d1f27 100644 --- a/Variants/BiDirectionalTrimmer.cs +++ b/Variants/BiDirectionalTrimmer.cs @@ -1,35 +1,51 @@ -namespace Variants +using System; + +namespace Variants { public static class BiDirectionalTrimmer { - public static (int Start, string RefAllele, string AltAllele) Trim(int start, string refAllele, string altAllele) + public static (int Start, string RefAllele, string AltAllele) Trim(int start, string refAllele, + string altAllele) { // do not trim if ref and alt are same if (refAllele == altAllele) return (start, refAllele, altAllele); - if (refAllele == null) refAllele = ""; - if (altAllele == null) altAllele = ""; + refAllele ??= ""; + altAllele ??= ""; + + int refLen = refAllele.Length; + int altLen = altAllele.Length; + int origRefLen = refLen; + + ReadOnlySpan refSpan = refAllele.AsSpan(); + ReadOnlySpan altSpan = altAllele.AsSpan(); // trimming at the start - var i = 0; - while (i < refAllele.Length && i < altAllele.Length && refAllele[i] == altAllele[i]) i++; + var offset = 0; + while (offset < refLen && offset < altLen && refSpan[offset] == altSpan[offset]) offset++; - if (i > 0) + if (offset > 0) { - start += i; - altAllele = altAllele.Substring(i); - refAllele = refAllele.Substring(i); + start += offset; + refSpan = refSpan.Slice(offset); + altSpan = altSpan.Slice(offset); + refLen = refSpan.Length; + altLen = altSpan.Length; } // trimming at the end - var j = 0; - while (j < refAllele.Length && j < altAllele.Length && - refAllele[refAllele.Length - j - 1] == altAllele[altAllele.Length - j - 1]) j++; + while (refLen > 0 && altLen > 0 && refSpan[refLen - 1] == altSpan[altLen - 1]) + { + refLen--; + altLen--; + } + + // nothing to trim + if (refLen == origRefLen) return (start, refAllele, altAllele); - if (j <= 0) return (start, refAllele, altAllele); + refAllele = new string(refSpan.Slice(0, refLen)); + altAllele = new string(altSpan.Slice(0, altLen)); - altAllele = altAllele.Substring(0, altAllele.Length - j); - refAllele = refAllele.Substring(0, refAllele.Length - j); return (start, refAllele, altAllele); } } diff --git a/Variants/Variants.csproj b/Variants/Variants.csproj index 26a9c479..56009f2c 100644 --- a/Variants/Variants.csproj +++ b/Variants/Variants.csproj @@ -1,12 +1,13 @@ - - - netcoreapp2.1 - ..\bin\$(Configuration) - Full - - - - - - - + + + netcoreapp2.1 + ..\bin\$(Configuration) + Full + latest + + + + + + + diff --git a/Vcf/Vcf.csproj b/Vcf/Vcf.csproj index 09898fc6..84e06d50 100644 --- a/Vcf/Vcf.csproj +++ b/Vcf/Vcf.csproj @@ -1,14 +1,14 @@ - - - netcoreapp2.1 - ..\bin\$(Configuration) - - - - - - - - - + + + netcoreapp2.1 + ..\bin\$(Configuration) + + + + + + + + + \ No newline at end of file