diff --git a/aaanalysis/__init__.py b/aaanalysis/__init__.py index e2f5688e..c9fe295a 100644 --- a/aaanalysis/__init__.py +++ b/aaanalysis/__init__.py @@ -1,10 +1,11 @@ from aaanalysis.data_loader import load_dataset, load_scales from aaanalysis.aaclust import AAclust -from aaanalysis.cpp import CPP, SequenceFeature, SplitRange +from aaanalysis.cpp import CPP, CPPPlot, SequenceFeature, SplitRange from aaanalysis.dpulearn import dPULearn -from aaanalysis.utils_plot import plot_settings, plot_set_legend, plot_gcfs +from aaanalysis.plotting import plot_settings, plot_set_legend, plot_gcfs, plot_get_cmap, plot_get_cdict __all__ = ["load_dataset", "load_scales", "AAclust", - "CPP", "SequenceFeature", "SplitRange", "dPULearn", - "plot_settings", "plot_set_legend", "plot_gcfs"] + "CPP", "CPPPlot", "SequenceFeature", "SplitRange", + "dPULearn", + "plot_settings", "plot_set_legend", "plot_gcfs", "plot_get_cmap", "plot_get_cdict"] diff --git a/aaanalysis/__pycache__/__init__.cpython-39.pyc b/aaanalysis/__pycache__/__init__.cpython-39.pyc index f1c9d64b..41d7b740 100644 Binary files a/aaanalysis/__pycache__/__init__.cpython-39.pyc and b/aaanalysis/__pycache__/__init__.cpython-39.pyc differ diff --git a/aaanalysis/__pycache__/_utils.cpython-38.pyc b/aaanalysis/__pycache__/_utils.cpython-38.pyc deleted file mode 100644 index e548d78e..00000000 Binary files a/aaanalysis/__pycache__/_utils.cpython-38.pyc and /dev/null differ diff --git a/aaanalysis/__pycache__/_utils.cpython-39.pyc b/aaanalysis/__pycache__/_utils.cpython-39.pyc deleted file mode 100644 index f6c651ec..00000000 Binary files a/aaanalysis/__pycache__/_utils.cpython-39.pyc and /dev/null differ diff --git a/aaanalysis/__pycache__/utils.cpython-39.pyc b/aaanalysis/__pycache__/utils.cpython-39.pyc new file mode 100644 index 00000000..1fed6b8a Binary files /dev/null and b/aaanalysis/__pycache__/utils.cpython-39.pyc differ diff --git a/aaanalysis/__pycache__/utils_plot.cpython-39.pyc b/aaanalysis/__pycache__/utils_plot.cpython-39.pyc deleted file mode 100644 index dcfb9e96..00000000 Binary files a/aaanalysis/__pycache__/utils_plot.cpython-39.pyc and /dev/null differ diff --git a/aaanalysis/data/__init__.py b/aaanalysis/_data/__init__.py similarity index 100% rename from aaanalysis/data/__init__.py rename to aaanalysis/_data/__init__.py diff --git a/aaanalysis/data/__pycache__/__init__.cpython-39.pyc b/aaanalysis/_data/__pycache__/__init__.cpython-39.pyc similarity index 100% rename from aaanalysis/data/__pycache__/__init__.cpython-39.pyc rename to aaanalysis/_data/__pycache__/__init__.cpython-39.pyc diff --git a/aaanalysis/data/benchmarks/AA_CASPASE3.tsv b/aaanalysis/_data/benchmarks/AA_CASPASE3.tsv similarity index 100% rename from aaanalysis/data/benchmarks/AA_CASPASE3.tsv rename to aaanalysis/_data/benchmarks/AA_CASPASE3.tsv diff --git a/aaanalysis/data/benchmarks/AA_FURIN.tsv b/aaanalysis/_data/benchmarks/AA_FURIN.tsv similarity index 100% rename from aaanalysis/data/benchmarks/AA_FURIN.tsv rename to aaanalysis/_data/benchmarks/AA_FURIN.tsv diff --git a/aaanalysis/data/benchmarks/AA_LDR.tsv b/aaanalysis/_data/benchmarks/AA_LDR.tsv similarity index 100% rename from aaanalysis/data/benchmarks/AA_LDR.tsv rename to aaanalysis/_data/benchmarks/AA_LDR.tsv diff --git a/aaanalysis/data/benchmarks/AA_MMP2.tsv b/aaanalysis/_data/benchmarks/AA_MMP2.tsv similarity index 100% rename from aaanalysis/data/benchmarks/AA_MMP2.tsv rename to aaanalysis/_data/benchmarks/AA_MMP2.tsv diff --git a/aaanalysis/data/benchmarks/AA_RNABIND.tsv b/aaanalysis/_data/benchmarks/AA_RNABIND.tsv similarity index 100% rename from aaanalysis/data/benchmarks/AA_RNABIND.tsv rename to aaanalysis/_data/benchmarks/AA_RNABIND.tsv diff --git a/aaanalysis/data/benchmarks/AA_SA.tsv b/aaanalysis/_data/benchmarks/AA_SA.tsv similarity index 100% rename from aaanalysis/data/benchmarks/AA_SA.tsv rename to aaanalysis/_data/benchmarks/AA_SA.tsv diff --git a/aaanalysis/_data/benchmarks/DOM_GSEC.tsv b/aaanalysis/_data/benchmarks/DOM_GSEC.tsv new file mode 100644 index 00000000..3ee6333b --- /dev/null +++ b/aaanalysis/_data/benchmarks/DOM_GSEC.tsv @@ -0,0 +1,127 @@ +entry sequence label tmd_start tmd_stop jmd_n tmd jmd_c +P05067 MLPGLALLLLAAWTARALEVPTDGNAGLLAEPQIAMFCGRLNMHMNVQNGKWDSDPSGTKTCIDTKEGILQYCQEVYPELQITNVVEANQPVTIQNWCKRGRKQCKTHPHFVIPYRCLVGEFVSDALLVPDKCKFLHQERMDVCETHLHWHTVAKETCSEKSTNLHDYGMLLPCGIDKFRGVEFVCCPLAEESDNVDSADAEEDDSDVWWGGADTDYADGSEDKVVEVAEEEEVAEVEEEEADDDEDDEDGDEVEEEAEEPYEEATERTTSIATTTTTTTESVEEVVREVCSEQAETGPCRAMISRWYFDVTEGKCAPFFYGGCGGNRNNFDTEEYCMAVCGSAMSQSLLKTTQEPLARDPVKLPTTAASTPDAVDKYLETPGDENEHAHFQKAKERLEAKHRERMSQVMREWEEAERQAKNLPKADKKAVIQHFQEKVESLEQEAANERQQLVETHMARVEAMLNDRRRLALENYITALQAVPPRPRHVFNMLKKYVRAEQKDRQHTLKHFEHVRMVDPKKAAQIRSQVMTHLRVIYERMNQSLSLLYNVPAVAEEIQDEVDELLQKEQNYSDDVLANMISEPRISYGNDALMPSLTETKTTVELLPVNGEFSLDDLQPWHSFGADSVPANTENEVEPVDARPAADRGLTTRPGSGLTNIKTEEISEVKMDAEFRHDSGYEVHHQKLVFFAEDVGSNKGAIIGLMVGGVVIATVIVITLVMLKKKQYTSIHHGVVEVDAAVTPEERHLSKMQQNGYENPTYKFFEQMQN 1 701 723 FAEDVGSNKG AIIGLMVGGVVIATVIVITLVML KKKQYTSIHH +P14925 MAGRARSGLLLLLLGLLALQSSCLAFRSPLSVFKRFKETTRSFSNECLGTIGPVTPLDASDFALDIRMPGVTPKESDTYFCMSMRLPVDEEAFVIDFKPRASMDTVHHMLLFGCNMPSSTGSYWFCDEGTCTDKANILYAWARNAPPTRLPKGVGFRVGGETGSKYFVLQVHYGDISAFRDNHKDCSGVSVHLTRVPQPLIAGMYLMMSVDTVIPPGEKVVNADISCQYKMYPMHVFAYRVHTHHLGKVVSGYRVRNGQWTLIGRQNPQLPQAFYPVEHPVDVTFGDILAARCVFTGEGRTEATHIGGTSSDEMCNLYIMYYMEAKYALSFMTCTKNVAPDMFRTIPAEANIPIPVKPDMVMMHGHHKEAENKEKSALMQQPKQGEEEVLEQGDFYSLLSKLLGEREDVHVHKYNPTEKTESGSDLVAEIANVVQKKDLGRSDAREGAEHEEWGNAILVRDRIHRFHQLESTLRPAESRAFSFQQPGEGPWEPEPSGDFHVEEELDWPGVYLLPGQVSGVALDSKNNLVIFHRGDHVWDGNSFDSKFVYQQRGLGPIEEDTILVIDPNNAEILQSSGKNLFYLPHGLSIDTDGNYWVTDVALHQVFKLDPHSKEGPLLILGRSMQPGSDQNHFCQPTDVAVEPSTGAVFVSDGYCNSRIVQFSPSGKFVTQWGEESSGSSPRPGQFSVPHSLALVPHLDQLCVADRENGRIQCFKTDTKEFVREIKHASFGRNVFAISYIPGFLFAVNGKPYFGDQEPVQGFVMNFSSGEIIDVFKPVRKHFDMPHDIVASEDGTVYIGDAHTNTVWKFTLTEKMEHRSVKKAGIEVQEIKEAEAVVEPKVENKPTSSELQKMQEKQKLSTEPGSGVSVVLITTLLVIPVLVLLAIVMFIRWKKSRAFGDHDRKLESSSGRVLGRFRGKGSGGLNLGNFFASRKGYSRKGFDRVSTEGSDQEKDEDDGTESEEEYSAPLPKPAPSS 1 868 890 KLSTEPGSGV SVVLITTLLVIPVLVLLAIVMFI RWKKSRAFGD +P70180 MRSLLLFTFSACVLLARVLLAGGASSGAGDTRPGSRRRAREALAAQKIEVLVLLPRDDSYLFSLARVRPAIEYALRSVEGNGTGRKLLPPGTRFQVAYEDSDCGNRALFSLVDRVAAARGAKPDLILGPVCEYAAAPVARLASHWDLPMLSAGALAAGFQHKDTEYSHLTRVAPAYAKMGEMMLALFRHHHWSRAALVYSDDKLERNCYFTLEGVHEVFQEEGLHTSAYNFDETKDLDLDDIVRYIQGSERVVIMCASGDTIRRIMLAVHRHGMTSGDYAFFNIELFNSSSYGDGSWRRGDKHDSEAKQAYSSLQTVTLLRTVKPEFEKFSMEVKSSVEKQGLNEEDYVNMFVEGFHDAILLYVLALHEVLRAGYSKKDGGKIIQQTWNRTFEGIAGQVSIDANGDRYGDFSVVAMTDTEAGTQEVIGDYFGKEGRFQMRSNVKYPWGPLKLRLDETRIVEHTNSSPCKSSGGLEESAVTGIVVGALLGAGLLMAFYFFRKKYRITIERRNQQEESNIGKHRELREDSIRSHFSVA 1 477 499 PCKSSGGLEE SAVTGIVVGALLGAGLLMAFYFF RKKYRITIER +Q03157 MGPTSPAARGQGRRWRPPPLPLLLPLSLLLLRAQLAVGNLAVGSPSAAEAPGSAQVAGLCGRLTLHRDLRTGRWEPDPQRSRRCLLDPQRVLEYCRQMYPELHIARVEQAAQAIPMERWCGGTRSGRCAHPHHEVVPFHCLPGEFVSEALLVPEGCRFLHQERMDQCESSTRRHQEAQEACSSQGLILHGSGMLLPCGSDRFRGVEYVCCPPPATPNPSGMAAGDPSTRSWPLGGRAEGGEDEEEVESFPQPVDDYFVEPPQAEEEEEEEEERAPPPSSHTPVMVSRVTPTPRPTDGVDVYFGMPGEIGEHEGFLRAKMDLEERRMRQINEVMREWAMADSQSKNLPKADRQALNEHFQSILQTLEEQVSGERQRLVETHATRVIALINDQRRAALEGFLAALQGDPPQAERVLMALRRYLRAEQKEQRHTLRHYQHVAAVDPEKAQQMRFQVQTHLQVIEERMNQSLGLLDQNPHLAQELRPQIQELLLAEHLGPSELDASVPGSSSEDKGSLQPPESKDDPPVTLPKGSTDQESSSSGREKLTPLEQYEQKVNASAPRGFPFHSSDIQRDELAPSGTGVSREALSGLLIMGAGGGSLIVLSLLLLRKKKPYGTISHGVVEVDPMLTLEEQQLRELQRHGYENPTYRFLEERP 1 585 607 APSGTGVSRE ALSGLLIMGAGGGSLIVLSLLLL RKKKPYGTIS +Q06481 MAATGTAAAAATGRLLLLLLVGLTAPALALAGYIEALAANAGTGFAVAEPQIAMFCGKLNMHVNIQTGKWEPDPTGTKSCFETKEEVLQYCQEMYPELQITNVMEANQRVSIDNWCRRDKKQCKSRFVTPFKCLVGEFVSDVLLVPEKCQFFHKERMEVCENHQHWHTVVKEACLTQGMTLYSYGMLLPCGVDQFHGTEYVCCPQTKIIGSVSKEEEEEDEEEEEEEDEEEDYDVYKSEFPTEADLEDFTEAAVDEDDEDEEEGEEVVEDRDYYYDTFKGDDYNEENPTEPGSDGTMSDKEITHDVKAVCSQEAMTGPCRAVMPRWYFDLSKGKCVRFIYGGCGGNRNNFESEDYCMAVCKAMIPPTPLPTNDVDVYFETSADDNEHARFQKAKEQLEIRHRNRMDRVKKEWEEAELQAKNLPKAERQTLIQHFQAMVKALEKEAASEKQQLVETHLARVEAMLNDRRRMALENYLAALQSDPPRPHRILQALRRYVRAENKDRLHTIRHYQHVLAVDPEKAAQMKSQVMTHLHVIEERRNQSLSLLYKVPYVAQEIQEEIDELLQEQRADMDQFTASISETPVDVRVSSEESEEIPPFHPFHPFPALPENEDTQPELYHPMKKGSGVGEQDGGLIGAEEKVINSKNKVDENMVIDETLDVKEMIFNAERVGGLEEERESVGPLREDFSLSSSALIGLLVIAVAIATVIVISLVMLRKRQYGTISHGIVEVDPMLTPEERHLNKMQNHGYENPTYKYLEQMQI 1 694 716 LREDFSLSSS ALIGLLVIAVAIATVIVISLVML RKRQYGTISH +P35613 MAAALFVLLGFALLGTHGASGAAGFVQAPLSQQRWVGGSVELHCEAVGSPVPEIQWWFEGQGPNDTCSQLWDGARLDRVHIHATYHQHAASTISIDTLVEEDTGTYECRASNDPDRNHLTRAPRVKWVRAQAVVLVLEPGTVFTTVEDLGSKILLTCSLNDSATEVTGHRWLKGGVVLKEDALPGQKTEFKVDSDDQWGEYSCVFLPEPMGTANIQLHGPPRVKAVKSSEHINEGETAMLVCKSESVPPVTDWAWYKITDSEDKALMNGSESRFFVSSSQGRSELHIENLNMEADPGQYRCNGTSSKGSDQAIITLRVRSHLAALWPFLGIVAEVLVLVTIIFIYEKRRKPEDVLDDDDAGSAPLKSSGQHQNDKGKNVRQRNSS 1 323 345 IITLRVRSHL AALWPFLGIVAEVLVLVTIIFIY EKRRKPEDVL +P35070 MDRAARCSGASSLPLLLALALGLVILHCVVADGNSTRSPETNGLLCGDPEENCAATTTQSKRKGHFSRCPKQYKHYCIKGRCRFVVAEQTPSCVCDEGYIGARCERVDLFYLRGDRGQILVICLIAVMVVFIILVIGVCTCCHPLRKRRKRKKKEEEMETLGKDITPINEDIEETNIA 1 119 141 LFYLRGDRGQ ILVICLIAVMVVFIILVIGVCTC CHPLRKRRKR +P09803 MGARCRSFSALLLLLQVSSWLCQELEPESCSPGFSSEVYTFPVPERHLERGHVLGRVRFEGCTGRPRTAFFSEDSRFKVATDGTITVKRHLKLHKLETSFLVRARDSSHRELSTKVTLKSMGHHHHRHHHRDPASESNPELLMFPSVYPGLRRQKRDWVIPPISCPENEKGEFPKNLVQIKSNRDKETKVFYSITGQGADKPPVGVFIIERETGWLKVTQPLDREAIAKYILYSHAVSSNGEAVEDPMEIVITVTDQNDNRPEFTQEVFEGSVAEGAVPGTSVMKVSATDADDDVNTYNAAIAYTIVSQDPELPHKNMFTVNRDTGVISVLTSGLDRESYPTYTLVVQAADLQGEGLSTTAKAVITVKDINDNAPVFNPSTYQGQVPENEVNARIATLKVTDDDAPNTPAWKAVYTVVNDPDQQFVVVTDPTTNDGILKTAKGLDFEAKQQYILHVRVENEEPFEGSLVPSTATVTVDVVDVNEAPIFMPAERRVEVPEDFGVGQEITSYTAREPDTFMDQKITYRIWRDTANWLEINPETGAIFTRAEMDREDAEHVKNSTYVALIIATDDGSPIATGTGTLLLVLLDVNDNAPIPEPRNMQFCQRNPQPHIITILDPDLPPNTSPFTAELTHGASVNWTIEYNDAAQESLILQPRKDLEIGEYKIHLKLADNQNKDQVTTLDVHVCDCEGTVNNCMKAGIVAAGLQVPAILGILGGILALLILILLLLLFLRRRTVVKEPLLPPDDDTRDNVYYYDEEGGGEEDQDFDLSQLHRGLDARPEVTRNDVAPTLMSVPQYRPRPANPDEIGNFIDENLKAADSDPTAPPYDSLLVFDYEGSGSEAASLSSLNSSESDQDQDYDYLNEWGNRFKKLADMYGGGEDD 1 711 733 GIVAAGLQVP AILGILGGILALLILILLLLLFL RRRTVVKEPL +P19022 MCRIAGALRTLLPLLAALLQASVEASGEIALCKTGFPEDVYSAVLSKDVHEGQPLLNVKFSNCNGKRKVQYESSEPADFKVDEDGMVYAVRSFPLSSEHAKFLIYAQDKETQEKWQVAVKLSLKPTLTEESVKESAEVEEIVFPRQFSKHSGHLQRQKRDWVIPPINLPENSRGPFPQELVRIRSDRDKNLSLRYSVTGPGADQPPTGIFIINPISGQLSVTKPLDREQIARFHLRAHAVDINGNQVENPIDIVINVIDMNDNRPEFLHQVWNGTVPEGSKPGTYVMTVTAIDADDPNALNGMLRYRIVSQAPSTPSPNMFTINNETGDIITVAAGLDREKVQQYTLIIQATDMEGNPTYGLSNTATAVITVTDVNDNPPEFTAMTFYGEVPENRVDIIVANLTVTDKDQPHTPAWNAVYRISGGDPTGRFAIQTDPNSNDGLVTVVKPIDFETNRMFVLTVAAENQVPLAKGIQHPPQSTATVSVTVIDVNENPYFAPNPKIIRQEEGLHAGTMLTTFTAQDPDRYMQQNIRYTKLSDPANWLKIDPVNGQITTIAVLDRESPNVKNNIYNATFLASDNGIPPMSGTGTLQIYLLDINDNAPQVLPQEAETCETPDPNSINITALDYDIDPNAGPFAFDLPLSPVTIKRNWTITRLNGDFAQLNLKIKFLEAGIYEVPIIITDSGNPPKSNISILRVKVCQCDSNGDCTDVDRIVGAGLGTGAIIAILLCIIILLILVLMFVVWMKRRDKERQAKQLLIDPEDDVRDNILKYDEEGGGEEDQDYDLSQLQQPDTVEPDAIKPVGIRRMDERPIHAEPQYPVRSAAPHPGDIGDFINEGLKAADNDPTAPPYDSLLVFDYEGSGSTAGSLSSLNSSSSGGEQDYDYLNDWGPRFKKLADMYGGGDD 1 724 746 RIVGAGLGTG AIIAILLCIIILLILVLMFVVWM KRRDKERQAK +P16070 MDKFWWHAAWGLCLVPLSLAQIDLNITCRFAGVFHVEKNGRYSISRTEAADLCKAFNSTLPTMAQMEKALSIGFETCRYGFIEGHVVIPRIHPNSICAANNTGVYILTSNTSQYDTYCFNASAPPEEDCTSVTDLPNAFDGPITITIVNRDGTRYVQKGEYRTNPEDIYPSNPTDDDVSSGSSSERSSTSGGYIFYTFSTVHPIPDEDSPWITDSTDRIPATTLMSTSATATETATKRQETWDWFSWLFLPSESKNHLHTTTQMAGTSSNTISAGWEPNEENEDERDRHLSFSGSGIDDDEDFISSTISTTPRAFDHTKQNQDWTQWNPSHSNPEVLLQTTTRMTDVDRNGTTAYEGNWNPEAHPPLIHHEHHEEEETPHSTSTIQATPSSTTEETATQKEQWFGNRWHEGYRQTPKEDSHSTTGTAAASAHTSHPMQGRTTPSPEDSSWTDFFNPISHPMGRGHQAGRRMDMDSSHSITLQPTANPNTGLVEDLDRTGPLSMTTQQSNSQSFSTSHEGLEEDKDHPTTSTLTSSNRNDVTGGRRDPNHSEGSTTLLEGYTSHYPHTKESRTFIPVTSAKTGSFGVTAVTVGDSNSNVNRSLSGDQDTFHPSGGSHTTHGSESDGHSHGSQEGGANTTSGPIRTPQIPEWLIILASLLALALILAVCIAVNSRRRCGQKKKLVINSGNGAVEDRKPSGLNGEASKSQEMVHLVNKESSETPDQFMTADETRNLQNVDMKIGV 1 650 672 GPIRTPQIPE WLIILASLLALALILAVCIAVNS RRRCGQKKKL +P09603 MTAPGAAGRCPPTTWLGSLLLLVCLLASRSITEEVSEYCSHMIGSGHLQSLQRLIDSQMETSCQITFEFVDQEQLKDPVCYLKKAFLLVQDIMEDTMRFRDNTPNAIAIVQLQELSLRLKSCFTKDYEEHDKACVRTFYETPLQLLEKVKNVFNETKNLLDKDWNIFSKNCNNSFAECSSQDVVTKPDCNCLYPKAIPSSDPASVSPHQPLAPSMAPVAGLTWEDSEGTEGSSLLPGEQPLHTVDPGSAKQRPPRSTCQSFEPPETPVVKDSTIGGSPQPRPSVGAFNPGMEDILDSAMGTNWVPEEASGEASEIPVPQGTELSPSRPGGGSMQTEPARPSNFLSASSPLPASAKGQQPADVTGTALPRVGPVRPTGQDWNHTPQKTDHPSALLRDPPEPGSPRISSLRPQGLSNPSTLSAQPQLSRSHSSGSVLPLGELEGRRSTRDRRSPAEPEGGPASEGAARPLPRFNSVPLTDTGHERQSEGSFSPQLQESVFHLLVPSVILVLLAVGGLLFYRWRRRSHQEPQRADSPLEQPEGSPLTQDDRQVELPV 1 496 518 EGSFSPQLQE SVFHLLVPSVILVLLAVGGLLFY RWRRRSHQEP +O94985 MLRRPAPALAPAARLLLAGLLCGGGVWAARVNKHKPWLEPTYHGIVTENDNTVLLDPPLIALDKDAPLRFAESFEVTVTKEGEICGFKIHGQNVPFDAVVVDKSTGEGVIRSKEKLDCELQKDYSFTIQAYDCGKGPDGTNVKKSHKATVHIQVNDVNEYAPVFKEKSYKATVIEGKQYDSILRVEAVDADCSPQFSQICSYEIITPDVPFTVDKDGYIKNTEKLNYGKEHQYKLTVTAYDCGKKRATEDVLVKISIKPTCTPGWQGWNNRIEYEPGTGALAVFPNIHLETCDEPVASVQATVELETSHIGKGCDRDTYSEKSLHRLCGAAAGTAELLPSPSGSLNWTMGLPTDNGHDSDQVFEFNGTQAVRIPDGVVSVSPKEPFTISVWMRHGPFGRKKETILCSSDKTDMNRHHYSLYVHGCRLIFLFRQDPSEEKKYRPAEFHWKLNQVCDEEWHHYVLNVEFPSVTLYVDGTSHEPFSVTEDYPLHPSKIETQLVVGACWQEFSGVENDNETEPVTVASAGGDLHMTQFFRGNLAGLTLRSGKLADKKVIDCLYTCKEGLDLQVLEDSGRGVQIQAHPSQLVLTLEGEDLGELDKAMQHISYLNSRQFPTPGIRRLKITSTIKCFNEATCISVPPVDGYVMVLQPEEPKISLSGVHHFARAASEFESSEGVFLFPELRIISTITREVEPEGDGAEDPTVQESLVSEEIVHDLDTCEVTVEGEELNHEQESLEVDMARLQQKGIEVSSSELGMTFTGVDTMASYEEVLHLLRYRNWHARSLLDRKFKLICSELNGRYISNEFKVEVNVIHTANPMEHANHMAAQPQFVHPEHRSFVDLSGHNLANPHPFAVVPSTATVVIVVCVSFLVFMIILGVFRIRAAHRRTMRDQDTGKENEMDWDDSALTITVNPMETYEDQHSSEEEEEEEEEEESEDGEEEDDITSAESESSEEEEGEQGDPQNATRQQQLEWDDSTLSY 1 860 882 PHPFAVVPST ATVVIVVCVSFLVFMIILGVFRI RAAHRRTMRD +Q9H4D0 MLPGRLCWVPLLLALGVGSGSGGGGDSRQRRLLAAKVNKHKPWIETSYHGVITENNDTVILDPPLVALDKDAPVPFAGEICAFKIHGQELPFEAVVLNKTSGEGRLRAKSPIDCELQKEYTFIIQAYDCGAGPHETAWKKSHKAVVHIQVKDVNEFAPTFKEPAYKAVVTEGKIYDSILQVEAIDEDCSPQYSQICNYEIVTTDVPFAIDRNGNIRNTEKLSYDKQHQYEILVTAYDCGQKPAAQDTLVQVDVKPVCKPGWQDWTKRIEYQPGSGSMPLFPSIHLETCDGAVSSLQIVTELQTNYIGKGCDRETYSEKSLQKLCGASSGIIDLLPSPSAATNWTAGLLVDSSEMIFKFDGRQGAKVPDGIVPKNLTDQFTITMWMKHGPSPGVRAEKETILCNSDKTEMNRHHYALYVHNCRLVFLLRKDFDQADTFRPAEFHWKLDQICDKEWHYYVINVEFPVVTLYMDGATYEPYLVTNDWPIHPSHIAMQLTVGACWQGGEVTKPQFAQFFHGSLASLTIRPGKMESQKVISCLQACKEGLDINSLESLGQGIKYHFNPSQSILVMEGDDIGNINRALQKVSYINSRQFPTAGVRRLKVSSKVQCFGEDVCISIPEVDAYVMVLQAIEPRITLRGTDHFWRPAAQFESARGVTLFPDIKIVSTFAKTEAPGDVKTTDPKSEVLEEMLHNLDFCDILVIGGDLDPRQECLELNHSELHQRHLDATNSTAGYSIYGVGSMSRYEQVLHHIRYRNWRPASLEARRFRIKCSELNGRYTSNEFNLEVSILHEDQVSDKEHVNHLIVQPPFLQSVHHPESRSSIQHSSVVPSIATVVIIISVCMLVFVVAMGVYRVRIAHQHFIQETEAAKESEMDWDDSALTITVNPMEKHEGPGHGEDETEGEEEEEAEEEMSSSSGSDDSEEEEEEEGMGRGRHGQNGARQAQLEWDDSTLPY 1 831 853 SSIQHSSVVP SIATVVIIISVCMLVFVVAMGVY RVRIAHQHFI +P78310 MALLLCFVLLCGVVDFARSLSITTPEEMIEKAKGETAYLPCKFTLSPEDQGPLDIEWLISPADNQKVDQVIILYSGDKIYDDYYPDLKGRVHFTSNDLKSGDASINVTNLQLSDIGTYQCKVKKAPGVANKKIHLVVLVKPSGARCYVDGSEEIGSDFKIKCEPKEGSLPLQYEWQKLSDSQKMPTSWLAEMTSSVISVKNASSEYSGTYSCTVRNRVGSDQCLLRLNVVPPSNKAGLIAGAIIGTLLALALIGLIIFCCRKKRREEKYEKEVHHDIREDVPPPKSRTSTARSYIGSNHSSLGSMSPSNMEGYSKTQYNQVPSEDFERTPQSPTLPPAKVAAPNLSRMGAIPVMIPAQSKDGSIV 1 236 258 RLNVVPPSNK AGLIAGAIIGTLLALALIGLIIF CCRKKRREEK +D3ZZK3 MAGIFYFILFSFLFGICDAVTGSRVYPANEVTLLDSRSVQGELGWIASPLEGGWEEVSIMDEKNTPIRTYQVCNVMEASQNNWLRTDWITREGAQRVYIEIKFTLRDCNSLPGVMGTCKETFNLYYYESDNDKERFIRESQFGKIDTIAADESFTQVDIGDRIMKLNTEIRDVGPLSKKGFYLAFQDVGACIALVSVRVFYKKCPLTVRNLAQFPDTITGADTSSLVEVRGSCVNNSEEKDVPKMYCGADGEWLVPIGNCLCNAGHEEQNGECQACKIGYYKALSTDATCAKCPPHSYSVWEGATSCTCDRGFFRADNDAASMPCTRPPSAPLNLISNVNETSVNLEWSSPQNTGGRQDISYNVVCKKCGAGDPSKCRPCGSGVHYTPQQNGLKTTRVSITDLLAHTNYTFEIWAVNGVSKYNPSPDQSVSVTVTTNQAAPSSIALVQAKEVTRYSVALAWLEPDRPNGVILEYEVKYYEKDQNERSYRIVRTAARNTDIKGLNPLTSYVFHVRARTAAGYGDFSEPLEVTTNTVPSRIIGDGANSTVLLVSVSGSVVLVVILIAAFVISRRRSKYSQAKQEADEEKHLNQGVRTYVDPFTYEDPNQAVREFAKEIDASCIKIEKVIGVGEFGEVCSGRLKVPGKREICVAIKTLKAGYTDKQRRDFLSEASIMGQFDHPNIIHLEGVVTKCKPVMIITEYMENGSLDAFLRKNDGRFTVIQLVGMLRGIGSGMKYLSDMSYVHRDLAARNILVNSNLVCKVSDFGMSRVLEDDPEAAYTTRGGKIPIRWTAPEAIAYRKFTSASDVWSYGIVMWEVMSYGERPYWDMSNQDVIKAIEEGYRLPPPMDCPIALHQLMLDCWQKERSDRPKFGQIVNMLDKLIRNPNSLKRTGPESSRPNTALLDPSSPEFSAVVSVGDWLQAIKMDRYKDNFTAAGYTTLEAVVHMSQDDLARIGITAITHQNKILSSVQAMRTQMQQMHGRMVPV 1 548 570 RIIGDGANST VLLVSVSGSVVLVVILIAAFVIS RRRSKYSQAK +Q14118 MRMSVGLSLLLPLSGRTFLLLLSVVMAQSHWPSEPSEAVRDWENQLEASMHSVLSDLHEAVPTVVGIPDGTAVVGRSFRVTIPTDLIASSGDIIKVSAAGKEALPSWLHWDSQSHTLEGLPLDTDKGVHYISVSATRLGANGSHIPQTSSVFSIEVYPEDHSELQSVRTASPDPGEVVSSACAADEPVTVLTVILDADLTKMTPKQRIDLLHRMRSFSEVELHNMKLVPVVNNRLFDMSAFMAGPGNAKKVVENGALLSWKLGCSLNQNSVPDIHGVEAPAREGAMSAQLGYPVVGWHIANKKPPLPKRVRRQIHATPTPVTAIGPPTTAIQEPPSRIVPTPTSPAIAPPTETMAPPVRDPVPGKPTVTIRTRGAIIQTPTLGPIQPTRVSEAGTTVPGQIRPTMTIPGYVEPTAVATPPTTTTKKPRVSTPKPATPSTDSTTTTTRRPTKKPRTPRPVPRVTTKVSITRLETASPPTRIRTTTSGVPRGGEPNQRPELKNHIDRVDAWVGTYFEVKIPSDTFYDHEDTTTDKLKLTLKLREQQLVGEKSWVQFNSNSQLMYGLPDSSHVGKHEYFMHATDKGGLSAVDAFEIHVHRRPQGDRAPARFKAKFVGDPALVLNDIHKKIALVKKLAFAFGDRNCSTITLQNITRGSIVVEWTNNTLPLEPCPKEQIAGLSRRIAEDDGKPRPAFSNALEPDFKATSITVTGSGSCRHLQFIPVVPPRRVPSEAPPTEVPDRDPEKSSEDDVYLHTVIPAVVVAAILLIAGIIAMICYRKKRKGKLTLEDQATFIKKGVPIIFADELDDSKPPPSSSMPLILQEEKAPLPPPEYPNQSVPETTPLNQDTMGEYTPLRDEDPNAPPYQPPPPFTAPMEGKGSRPKNMTPYRSPPPYVPP 1 753 775 KSSEDDVYLH TVIPAVVVAAILLIAGIIAMICY RKKRKGKLTL +Q63155 MENSLGCVWVPKLAFVLFGASLLSAHLQVTGFQIKPFTSLHFVSEPSDAVTMRGGNVLLNCSAESDRGVPVIKWKKDGLILALGMDDRKQQLPNGSLLIQNILHSRHHKPDEGLYQCEASLGDSGSIISRTAKVMVAGPLRFLSQTESITAFMGDTVLLKCEVIGDPMPTIHWQKNQQDLNPIPGDSRVVVLPSGALQISRLQPGDSGVYRCSARNPASTRTGNEAEVRILSDPGLHRQLYFLQRPSNVIAIEGKDAVLECCVSGYPPPSFTWLRGEEVIQLRSKKYSLLGGSNLLISNVTDDDSGTYTCVVTYKNENISASAELTVLVPPWFLNHPSNLYAYESMDIEFECAVSGKPVPTVNWMKNGDVVIPSDYFQIVGGSNLRILGVVKSDEGFYQCVAENEAGNAQSSAQLIVPKPAIPSSSILPSAPRDVVPVLVSSRFVRLSWRPPAEAKGNIQTFTVFFSREGDNRERALNTTQPGSLQLTVGNLKPEAMYTFRVVAYNEWGPGESSQPIKVATQPELQVPGPVENLHAVSASPTSILITWEPPAYANGPVQGYRLFCTEVSTGKEQNIEVDGLSYKLEGLKKFTEYTLRFLAYNRYGPGVSTDDITVVTLSDVPSAPPQNVSLEVVNSRSIKVSWLPPPSGTQNGFITGYKIRHRKTTRRGEMETLEPNNLWYLFTGLEKGSQYSFQVSAMTVNGTGPPSNWYTAETPENDLDESQVPDQPSSLHVRPQTNCIIMSWTPPLNPNIVVRGYIIGYGVGSPYAETVRVDSKQRYYSIERLESSSHYVISLKAFNNAGEGVPLYESATTRSITDPTDPVDYYPLLDDFPTSGPDVSTPMLPPVGVQAVALTHEAVRVSWADNSVPKNQKTSDVRLYTVRWRTSFSASAKYKSEDTTSLSYTATGLKPNTMYEFSVMVTKNRRSSTWSMTAHATTYEAAPTSAPKDLTVITREGKPRAVIVSWQPPLEANGKITAYILFYTLDKNIPIDDWIMETISGDRLTHQIMDLSLDTMYYFRIQARNAKGVGPLSDPILFRTLKVEHPDKMANDQGRHGDGGYWPVDTNLIDRSTLNEPPIGQMHPPHGSVTPQKNSNLLVITVVTVGVLTVLVVVIVAVICTRRSSAQQRKKRATHSASKRKGSQKDLRPPDLWIHHEEMEMKNIEKPAGTDPAGRGSPIQSCQDLTPVSHSQSESQMGSKSASHSGQDTEEAGSSMSTLERSLAARRATRTKLMIPMEAQSNNPAVVSAIPVPTLESAQYPGILPSPTCGYPHPQFTLRPVPFPTLSVDRGFGAGRTVSEGPTAQQQPMLPPAQPEHPSSEEAPSRTIPTACVRPTHPLRSFANPLLPPPMSAIEPKVPYTPLLSQPGPTLPKTHVKTASLGLAGKARSPLLPVSVPTAPEVSEESHKPTEDPASVYEQDDLSEQMASLEGLMKQLNAITGSAF 1 1099 1121 SVTPQKNSNL LVITVVTVGVLTVLVVVIVAVIC TRRSSAQQRK +Q61483 MGRRSALALAVVSALLCQVWSSGVFELKLQEFVNKKGLLGNRNCCRGGSGPPCACRTFFRVCLKHYQASVSPEPPCTYGSAVTPVLGVDSFSLPDGAGIDPAFSNPIRFPFGFTWPGTFSLIIEALHTDSPDDLATENPERLISRLTTQRHLTVGEEWSQDLHSSGRTDLRYSYRFVCDEHYYGEGCSVFCRPRDDAFGHFTCGDRGEKMCDPGWKGQYCTDPICLPGCDDQHGYCDKPGECKCRVGWQGRYCDECIRYPGCLHGTCQQPWQCNCQEGWGGLFCNQDLNYCTHHKPCRNGATCTNTGQGSYTCSCRPGYTGANCELEVDECAPSPCKNGASCTDLEDSFSCTCPPGFYGKVCELSAMTCADGPCFNGGRCSDNPDGGYTCHCPLGFSGFNCEKKMDLCGSSPCSNGAKCVDLGNSYLCRCQAGFSGRYCEDNVDDCASSPCANGGTCRDSVNDFSCTCPPGYTGKNCSAPVSRCEHAPCHNGATCHQRGQRYMCECAQGYGGPNCQFLLPEPPPGPMVVDLSERHMESQGGPFPWVAVCAGVVLVLLLLLGCAAVVVCVRLKLQKHQPPPEPCGGETETMNNLANCQREKDVSVSIIGATQIKNTNKKADFHGDHGAEKSSFKVRYPTVDYNLVRDLKGDEATVRDTHSKRDTKCQSQSSAGEEKIAPTLRGGEIPDRKRPESVYSTSKDTKYQSVYVLSAEKDECVIATEV 1 545 567 HMESQGGPFP WVAVCAGVVLVLLLLLGCAAVVV CVRLKLQKHQ +Q9ERC8 MWILALSLFQSFANVFSEEPHSSLYFVNASLQEVVFASTSGTLVPCPAAGIPPVTLRWYLATGEEIYDVPGIRHVHPNGTLQIFPFPPSSFSTLIHDNTYYCTAENPSGKIRSQDVHIKAVLREPYTVRVEDQKTMRGNVAVFKCIIPSSVEAYVTVVSWEKDTVSLVSGSRFLITSTGALYIKDVQNEDGLYNYRCITRHRYTGETRQSNSARLFVSDPANSAPSILDGFDHRKAMAGQRVELPCKALGHPEPDYRWLKDNMPLELSGRFQKTVTGLLIENSRPSDSGSYVCEVSNRYGTAKVIGRLYVKQPLKATISPRKVKSSVGSQVSLSCSVTGNEDQELSWYRNGEILNPGKNVRITGLNHANLIMDHMVKSDGGAYQCFVRKDKLSAQDYVQVVLEDGTPKIISAFSEKVVSPAEPVSLVCNVKGTPLPTVTWTLDDDPILKGSGHRISQMITSEGNVVSYLNISSSQVRDGGVYRCTANNSAGVVLYQARINVRGPASIRPMKNITAIAGRDTYIHCRVIGYPYYSIKWYKNANLLPFNHRQVAFENNGTLKLSDVQKEVDEGEYTCNVLVQPQLSTSQSVHVTVKVPPFIQPFEFPRFSIGQRVFIPCVVVSGDLPITITWQKDGRPIPASLGVTIDNIDFTSSLRISNLSLMHNGNYTCIARNEAAAVEHQSQLIVRVPPKFVVQPRDQDGIYGKAVILNCSAEGYPVPTIVWKFSKGAGVPQFQPIALNGRIQVLSNGSLLIKHVVEEDSGYYLCKVSNDVGADVSKSMYLTVKIPAMITSYPNTTLATQGQRKEMSCTAHGEKPIIVRWEKEDRIINPEMARYLVSTKEVGEEVISTLQILPTVREDSGFFSCHAINSYGEDRGIIQLTVQEPPDPPEIEIKDVKARTITLRWTMGFDGNSPITGYDIECKNKSDSWDSAQRTKDVSPQLNSATIIDIHPSSTYSIRMYAKNRIGKSEPSNEITITADEAAPDGPPQEVHLEPTSSQSIRVTWKAPKKHLQNGIIRGYQIGYREYSTGGNFQFNIISIDTTGDSEVYTLDNLNKFTQYGLVVQACNRAGTGPSSQEIITTTLEDVPSYPPENVQAIATSPESISISWSTLSKEALNGILQGFRVIYWANLIDGELGEIKNVTTTQPSLELDGLEKYTNYSIQVLAFTRAGDGVRSEQIFTRTKEDVPGPPAGVKAAAASASMVFVSWLPPLKLNGIIRKYTVFCSHPYPTVISEFEASPDSFSYRIPNLSRNRQYSVWVVAVTSAGRGNSSEIITVEPLAKAPARILTFSGTVTTPWMKDIVLPCKAVGDPSPAVKWMKDSNGTPSLVTIDGRRSIFSNGSFIIRTVKAEDSGYYSCVANNNWGSDEIILNLQVQVPPDQPRLTVSKTTSSSITLSWLPGDNGGSSIRGYILQYSEDNSEQWGSFPISPSERSYRLENLKCGTWYKFTLTAQNGVGPGRISEIIEAKTLGKEPQFSKEQELFASINTTRVRLNLIGWNDGGCPITSFTLEYRPFGTTVWTTAQRTSLSKSYILYDLQEATWYELQMRVCNSAGCAEKQANFATLNYDGSTIPPLIKSVVQSEEGLTTNEGLKILVTISCILVGVLLLFVLLLVVRRRRREQRLKRLRDAKSLAEMLMSKNTRTSDTLSKQQQTLRMHIDIPRAQLLIEERDTMETIDDRSTVLLTDADFGEAAKQKSLTVTHTVHYQSVSQATGPLVDVSDARPGTNPTTRRNAKAGPTARNRYASQWTLNRPHPTISAHTLTTDWRLPTPRATGSVDKESDSYSVSPSQDTDRARSSMVSTESASSTYEELARAYEHAKMEEQLRHAKFTITECFISDTSSEQLTAGTNEYTDSLTSSTPSESGICRFTASPPKPQDGGRVVNMAVPKAHRPGDLIHLPPYLRMDFLLNRGAPGTSRDLSLGQACLEPQKSRTLKRPTVLEPTPMEASSSTSSTREGQQSWQQGAVATLPQREGAELGQAAKMSSSQESLLDSRGHLKGNNPYAKSYTLV 1 1595 1617 EGLTTNEGLK ILVTISCILVGVLLLFVLLLVVR RRRREQRLKR +P54763 MAVRRLGAALLLLPLLAAVEETLMDSTTATAELGWMVHPPSGWEEVSGYDENMNTIRTYQVCNVFESSQNNWLRTKFIRRRGAHRIHVEMKFSVRDCSSIPSVPGSCKETFNLYYYEADFDLATKTFPNWMENPWVKVDTIAADESFSQVDLGGRVMKINTEVRSFGPVSRNGFYLAFQDYGGCMSLIAVRVFYRKCPRIIQNGAIFQETLSGAESTSLVAARGSCIANAEEVDVPIKLYCNGDGEWLVPIGRCMCKAGFEAVENGTVCRGCPSGTFKANQGDEACTHCPINSRTTSEGATNCVCRNGYYRADLDPLDMPCTTIPSAPQAVISSVNETSLMLEWTPPRDSGGREDLVYNIICKSCGSGRGACTRCGDNVQYAPRQLGLTEPRIYISDLLAHTQYTFEIQAVNGVTDQSPFSPQFASVNITTNQAAPSAVSIMHQVSRTVDSITLSWSQPDQPNGVILDYELQYYEKELSEYNATAIKSPTNTVTVQGLKAGAIYVFQVRARTVAGYGRYSGKMYFQTMTEAEYQTSIKEKLPLIVGSSAAGLVFLIAVVVIAIVCNRRGFERADSEYTDKLQHYTSGHMTPGMKIYIDPFTYEDPNEAVREFAKEIDISCVKIEQVIGAGEFGEVCSGHLKLPGKREIFVAIKTLKSGYTEKQRRDFLSEASIMGQFDHPNVIHLEGVVTKSTPVMIITEFMENGSLDSFLRQNDGQFTVIQLVGMLRGIAAGMKYLADMNYVHRDLAARNILVNSNLVCKVSDFGLSRFLEDDTSDPTYTSALGGKIPIRWTAPEAIQYRKFTSASDVWSYGIVMWEVMSYGERPYWDMTNQDVINAIEQDYRLPPPMDCPSALHQLMLDCWQKDRNHRPKFGQIVNTLDKMIRNPNSLKAMAPLSSGINLPLLDRTIPDYTSFNTVDEWLEAIKMGQYKESFANAGFTSFDVVSQMMMEDILRVGVTLAGHQKKILNSIQVMRAQMNQIQSVEV 1 543 565 YQTSIKEKLP LIVGSSAAGLVFLIAVVVIAIVC NRRGFERADS +Q15303 MKPATGLWVWVSLLVAAGTVQPSDSQSVCAGTENKLSSLSDLEQQYRALRKYYENCEVVMGNLEITSIEHNRDLSFLRSVREVTGYVLVALNQFRYLPLENLRIIRGTKLYEDRYALAIFLNYRKDGNFGLQELGLKNLTEILNGGVYVDQNKFLCYADTIHWQDIVRNPWPSNLTLVSTNGSSGCGRCHKSCTGRCWGPTENHCQTLTRTVCAEQCDGRCYGPYVSDCCHRECAGGCSGPKDTDCFACMNFNDSGACVTQCPQTFVYNPTTFQLEHNFNAKYTYGAFCVKKCPHNFVVDSSSCVRACPSSKMEVEENGIKMCKPCTDICPKACDGIGTGSLMSAQTVDSSNIDKFINCTKINGNLIFLVTGIHGDPYNAIEAIDPEKLNVFRTVREITGFLNIQSWPPNMTDFSVFSNLVTIGGRVLYSGLSLLILKQQGITSLQFQSLKEISAGNIYITDNSNLCYYHTINWTTLFSTINQRIVIRDNRKAENCTAEGMVCNHLCSSDGCWGPGPDQCLSCRRFSRGRICIESCNLYDGEFREFENGSICVECDPQCEKMEDGLLTCHGPGPDNCTKCSHFKDGPNCVEKCPDGLQGANSFIFKYADPDRECHPCHPNCTQGCNGPTSHDCIYYPWTGHSTLPQHARTPLIAAGVIGGLFILVIVGLTFAVYVRRKSIKKKRALRRFLETELVEPLTPSGTAPNQAQLRILKETELKRVKVLGSGAFGTVYKGIWVPEGETVKIPVAIKILNETTGPKANVEFMDEALIMASMDHPHLVRLLGVCLSPTIQLVTQLMPHGCLLEYVHEHKDNIGSQLLLNWCVQIAKGMMYLEERRLVHRDLAARNVLVKSPNHVKITDFGLARLLEGDEKEYNADGGKMPIKWMALECIHYRKFTHQSDVWSYGVTIWELMTFGGKPYDGIPTREIPDLLEKGERLPQPPICTIDVYMVMVKCWMIDADSRPKFKELAAEFSRMARDPQRYLVIQGDDRMKLPSPNDSKFFQNLLDEEDLEDMMDAEEYLVPQAFNIPPPIYTSRARIDSNRSEIGHSPPPAYTPMSGNQFVYRDGGFAAEQGVSVPYRAPTSTIPEAPVAQGATAEIFDDSCCNGTLRKPVAPHVQEDSSTQRYSADPTVFAPERSPRGELDEEGYMTPMRDKPKQEYLNPVEENPFVSRRKNGDLQALDNPEYHNASNGPPKAEDEYVNEPLYLNTFANTLGKAEYLKNNILSMPEKAKKAFDNPDYWNHSLPPRSTLQHPDYLQEYSTKYFYKQNGRIRPIVAENPEYLSEFSLKPGTVLPPPPYRHRNTVV 1 653 675 TLPQHARTPL IAAGVIGGLFILVIVGLTFAVYV RRKSIKKKRA +P16882 MDLCQVFLTLALAVTSSTFSGSEATPATLGKASPVLQRINPSLGTSSSGKPRFTKCRSPELETFSCYWTEGDNPDLKTPGSIQLYYAKRESQRQAARIAHEWTQEWKECPDYVSAGKNSCYFNSSYTSIWIPYCIKLTTNGDLLDQKCFTVDEIVQPDPPIGLNWTLLNISLTGIRGDIQVSWQPPPNADVLKGWIILEYEIQYKEVNESKWKVMGPIWLTYCPVYSLRMDKEHEVRVRSRQRSFEKYSEFSEVLRVIFPQTNILEACEEDIQFPWFLIIIFGIFGVAVMLFVVIFSKQQRIKMLILPPVPVPKIKGIDPDLLKEGKLEEVNTILGIHDNYKPDFYNDDSWVEFIELDIDEADVDEKTEGSDTDRLLSNDHEKSAGILGAKDDDSGRTSCYDPDILDTDFHTSDMCDGTLKFRQSQKLNMEADLLCLDQKNLKNLPYDASLGSLHPSITQTVEENKPQPLLSSETEATHQLASTPMSNPTSLANIDFYAQVSDITPAGGDVLSPGQKIKAGIAQGNTQREVATPCQENYSMNSAYFCESDAKKCIAVARRMEATSCIKPSFNQEDIYITTESLTTTAQMSETADIAPDAEMSVPDYTTVHTVQSPRGLILNATALPLPDKKNFPSSCGYVSTDQLNKIMQ 1 274 296 ILEACEEDIQ FPWFLIIIFGIFGVAVMLFVVIF SKQQRIKMLI +P04439 MAVMAPRTLLLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDQETRNVKAQSQTDRVDLGTLRGYYNQSEAGSHTIQIMYGCDVGSDGRFLRGYRQDAYDGKDYIALNEDLRSWTAADMAAQITKRKWEAAHEAEQLRAYLDGTCVEWLRRYLENGKETLQRTDPPKTHMTHHPISDHEATLRCWALGFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPKPLTLRWELSSQPTIPIVGIIAGLVLLGAVITGAVVAAVMWRRKSSDRKGGSYTQAASSDSAQGSDVSLTACKV 1 308 330 WELSSQPTIP IVGIIAGLVLLGAVITGAVVAAV MWRRKSSDRK +P08069 MKSGSGGGSPTSLWGLLFLSAALSLWPTSGEICGPGIDIRNDYQQLKRLENCTVIEGYLHILLISKAEDYRSYRFPKLTVITEYLLLFRVAGLESLGDLFPNLTVIRGWKLFYNYALVIFEMTNLKDIGLYNLRNITRGAIRIEKNADLCYLSTVDWSLILDAVSNNYIVGNKPPKECGDLCPGTMEEKPMCEKTTINNEYNYRCWTTNRCQKMCPSTCGKRACTENNECCHPECLGSCSAPDNDTACVACRHYYYAGVCVPACPPNTYRFEGWRCVDRDFCANILSAESSDSEGFVIHDGECMQECPSGFIRNGSQSMYCIPCEGPCPKVCEEEKKTKTIDSVTSAQMLQGCTIFKGNLLINIRRGNNIASELENFMGLIEVVTGYVKIRHSHALVSLSFLKNLRLILGEEQLEGNYSFYVLDNQNLQQLWDWDHRNLTIKAGKMYFAFNPKLCVSEIYRMEEVTGTKGRQSKGDINTRNNGERASCESDVLHFTSTTTSKNRIIITWHRYRPPDYRDLISFTVYYKEAPFKNVTEYDGQDACGSNSWNMVDVDLPPNKDVEPGILLHGLKPWTQYAVYVKAVTLTMVENDHIRGAKSEILYIRTNASVPSIPLDVLSASNSSSQLIVKWNPPSLPNGNLSYYIVRWQRQPQDGYLYRHNYCSKDKIPIRKYADGTIDIEEVTENPKTEVCGGEKGPCCACPKTEAEKQAEKEEAEYRKVFENFLHNSIFVPRPERKRRDVMQVANTTMSSRSRNTTAADTYNITDPEELETEYPFFESRVDNKERTVISNLRPFTLYRIDIHSCNHEAEKLGCSASNFVFARTMPAEGADDIPGPVTWEPRPENSIFLKWPEPENPNGLILMYEIKYGSQVEDQRECVSRQEYRKYGGAKLNRLNPGNYTARIQATSLSGNGSWTDPVFFYVQAKTGYENFIHLIIALPVAVLLIVGGLVIMLYVFHRKRNNSRLGNGVLYASVNPEYFSAADVYVPDEWEVAREKITMSRELGQGSFGMVYEGVAKGVVKDEPETRVAIKTVNEAASMRERIEFLNEASVMKEFNCHHVVRLLGVVSQGQPTLVIMELMTRGDLKSYLRSLRPEMENNPVLAPPSLSKMIQMAGEIADGMAYLNANKFVHRDLAARNCMVAEDFTVKIGDFGMTRDIYETDYYRKGGKGLLPVRWMSPESLKDGVFTTYSDVWSFGVVLWEIATLAEQPYQGLSNEQVLRFVMEGGLLDKPDNCPDMLFELMRMCWQYNPKMRPSFLEIISSIKEEMEPGFREVSFYYSEENKLPEPEELDLEPENMESVPLDPSASSSSLPLPDRHSGHKAENGPGPGVLVLRASFDERQPYAHMNGGRKNERALPLPQSSTC 1 936 958 AKTGYENFIH LIIALPVAVLLIVGGLVIMLYVF HRKRNNSRLG +P27930 MLRLYVLVMGVSAFTLQPAAHTGAARSCRFRGRHYKREFRLEGEPVALRCPQVPYWLWASVSPRINLTWHKNDSARTVPGEEETRMWAQDGALWLLPALQEDSGTYVCTTRNASYCDKMSIELRVFENTDAFLPFISYPQILTLSTSGVLVCPDLSEFTRDKTDVKIQWYKDSLLLDKDNEKFLSVRGTTHLLVHDVALEDAGYYRCVLTFAHEGQQYNITRSIELRIKKKKEETIPVIISPLKTISASLGSRLTIPCKVFLGTGTPLTTMLWWTANDTHIESAYPGGRVTEGPRQEYSENNENYIEVPLIFDPVTREDLHMDFKCVVHNTLSFQTLRTTVKEASSTFSWGIVLAPLSLAFLVLGGIWMHRRCKHRTGKADGLTVLWPHHQDFQSYPK 1 347 369 LRTTVKEASS TFSWGIVLAPLSLAFLVLGGIWM HRRCKHRTGK +Q9Y219 MRAQGRGRLPRRLLLLLALWVQAARPMGYFELQLSALRNVNGELLSGACCDGDGRTTRAGGCGHDECDTYVRVCLKEYQAKVTPTGPCSYGHGATPVLGGNSFYLPPAGAAGDRARARARAGGDQDPGLVVIPFQFAWPRSFTLIVEAWDWDNDTTPNEELLIERVSHAGMINPEDRWKSLHFSGHVAHLELQIRVRCDENYYSATCNKFCRPRNDFFGHYTCDQYGNKACMDGWMGKECKEAVCKQGCNLLHGGCTVPGECRCSYGWQGRFCDECVPYPGCVHGSCVEPWQCNCETNWGGLLCDKDLNYCGSHHPCTNGGTCINAEPDQYRCTCPDGYSGRNCEKAEHACTSNPCANGGSCHEVPSGFECHCPSGWSGPTCALDIDECASNPCAAGGTCVDQVDGFECICPEQWVGATCQLDANECEGKPCLNAFSCKNLIGGYYCDCIPGWKGINCHINVNDCRGQCQHGGTCKDLVNGYQCVCPRGFGGRHCELERDECASSPCHSGGLCEDLADGFHCHCPQGFSGPLCEVDVDLCEPSPCRNGARCYNLEGDYYCACPDDFGGKNCSVPREPCPGGACRVIDGCGSDAGPGMPGTAASGVCGPHGRCVSQPGGNFSCICDSGFTGTYCHENIDDCLGQPCRNGGTCIDEVDAFRCFCPSGWEGELCDTNPNDCLPDPCHSRGRCYDLVNDFYCACDDGWKGKTCHSREFQCDAYTCSNGGTCYDSGDTFRCACPPGWKGSTCAVAKNSSCLPNPCVNGGTCVGSGASFSCICRDGWEGRTCTHNTNDCNPLPCYNGGICVDGVNWFRCECAPGFAGPDCRINIDECQSSPCAYGATCVDEINGYRCSCPPGRAGPRCQEVIGFGRSCWSRGTPFPHGSSWVEDCNSCRCLDGRRDCSKVWCGWKPCLLAGQPEALSAQCPLGQRCLEKAPGQCLRPPCEAWGECGAEEPPSTPCLPRSGHLDNNCARLTLHFNRDHVPQGTTVGAICSGIRSLPATRAVARDRLLVLLCDRASSGASAVEVAVSFSPARDLPDSSLIQGAAHAIVAAITQRGNSSLLLAVTEVKVETVVTGGSSTGLLVPVLCGAFSVLWLACVVLCVWWTRKRRKERERSRLPREESANNQWAPLNPIRNPIERPGGHKDVLYQCKNFTPPPRRADEALPGPAGHAAVREDEEDEDLGRGEEDSLEAEKFLSHKFTKDPGRSPGRPAHWASGPKVDNRAVRSINEARYAGKE 1 1083 1105 VVTGGSSTGL LVPVLCGAFSVLWLACVVLCVWW TRKRRKERER +P15382 MILSNTTAVTPFLTKLWQETVQQGGNMSGLARRSPRSSDGKLEALYVLMVLGFFGFFTLGIMLSYIRSKKLEHSNDPFNVYIESDAWQEKDKAYVQARVLESYRSCYVVENHLAIEQPNTHLPETKPSP 1 44 66 SPRSSDGKLE ALYVLMVLGFFGFFTLGIMLSYI RSKKLEHSND +Q9Y6J6 MSTLSNFTQTLEDVFRRIFITYMDNWRQNTTAEQEALQAKVDAENFYYVILYLMVMIGMFSFIIVAILVSTVKSKRREHSNDPYHQYIVEDWQEKYKSQILNLEESKATIHENIGAAGFKMSP 1 47 69 LQAKVDAENF YYVILYLMVMIGMFSFIIVAILV STVKSKRREH +P11627 MVVMLRYVWPLLLCSPCLLIQIPDEYKGHHVLEPPVITEQSPRRLVVFPTDDISLKCEARGRPQVEFRWTKDGIHFKPKEELGVVVHEAPYSGSFTIEGNNSFAQRFQGIYRCYASNKLGTAMSHEIQLVAEGAPKWPKETVKPVEVEEGESVVLPCNPPPSAAPPRIYWMNSKIFDIKQDERVSMGQNGDLYFANVLTSDNHSDYICNAHFPGTRTIIQKEPIDLRVKPTNSMIDRKPRLLFPTNSSSRLVALQGQSLILECIAEGFPTPTIKWLHPSDPMPTDRVIYQNHNKTLQLLNVGEEDDGEYTCLAENSLGSARHAYYVTVEAAPYWLQKPQSHLYGPGETARLDCQVQGRPQPEITWRINGMSMETVNKDQKYRIEQGSLILSNVQPTDTMVTQCEARNQHGLLLANAYIYVVQLPARILTKDNQTYMAVEGSTAYLLCKAFGAPVPSVQWLDEEGTTVLQDERFFPYANGTLSIRDLQANDTGRYFCQAANDQNNVTILANLQVKEATQITQGPRSAIEKKGARVTFTCQASFDPSLQASITWRGDGRDLQERGDSDKYFIEDGKLVIQSLDYSDQGNYSCVASTELDEVESRAQLLVVGSPGPVPHLELSDRHLLKQSQVHLSWSPAEDHNSPIEKYDIEFEDKEMAPEKWFSLGKVPGNQTSTTLKLSPYVHYTFRVTAINKYGPGEPSPVSESVVTPEAAPEKNPVDVRGEGNETNNMVITWKPLRWMDWNAPQIQYRVQWRPQGKQETWRKQTVSDPFLVVSNTSTFVPYEIKVQAVNNQGKGPEPQVTIGYSGEDYPQVSPELEDITIFNSSTVLVRWRPVDLAQVKGHLKGYNVTYWWKGSQRKHSKRHIHKSHIVVPANTTSAILSGLRPYSSYHVEVQAFNGRGLGPASEWTFSTPEGVPGHPEALHLECQSDTSLLLHWQPPLSHNGVLTGYLLSYHPVEGESKEQLFFNLSDPELRTHNLTNLNPDLQYRFQLQATTQQGGPGEAIVREGGTMALFGKPDFGNISATAGENYSVVSWVPRKGQCNFRFHILFKALPEGKVSPDHQPQPQYVSYNQSSYTQWNLQPDTKYEIHLIKEKVLLHHLDVKTNGTGPVRVSTTGSFASEGWFIAFVSAIILLLLILLILCFIKRSKGGKYSVKDKEDTQVDSEARPMKDETFGEYRSLESDNEEKAFGSSQPSLNGDIKPLGSDDSLADYGGSVDVQFNEDGSFIGQYSGKKEKEAAGGNDSSGATSPINPAVALE 1 1124 1146 VSTTGSFASE GWFIAFVSAIILLLLILLILCFI KRSKGGKYSV +P01130 MGPWGWKLRWTVALLLAAAGTAVGDRCERNEFQCQDGKCISYKWVCDGSAECQDGSDESQETCLSVTCKSGDFSCGGRVNRCIPQFWRCDGQVDCDNGSDEQGCPPKTCSQDEFRCHDGKCISRQFVCDSDRDCLDGSDEASCPVLTCGPASFQCNSSTCIPQLWACDNDPDCEDGSDEWPQRCRGLYVFQGDSSPCSAFEFHCLSGECIHSSWRCDGGPDCKDKSDEENCAVATCRPDEFQCSDGNCIHGSRQCDREYDCKDMSDEVGCVNVTLCEGPNKFKCHSGECITLDKVCNMARDCRDWSDEPIKECGTNECLDNNGGCSHVCNDLKIGYECLCPDGFQLVAQRRCEDIDECQDPDTCSQLCVNLEGGYKCQCEEGFQLDPHTKACKAVGSIAYLFFTNRHEVRKMTLDRSEYTSLIPNLRNVVALDTEVASNRIYWSDLSQRMICSTQLDRAHGVSSYDTVISRDIQAPDGLAVDWIHSNIYWTDSVLGTVSVADTKGVKRKTLFRENGSKPRAIVVDPVHGFMYWTDWGTPAKIKKGGLNGVDIYSLVTENIQWPNGITLDLLSGRLYWVDSKLHSISSIDVNGGNRKTILEDEKRLAHPFSLAVFEDKVFWTDIINEAIFSANRLTGSDVNLLAENLLSPEDMVLFHNLTQPRGVNWCERTTLSNGGCQYLCLPAPQINPHSPKFTCACPDGMLLARDMRSCLTEAEAAVATQETSTVRLKVSSTAVRTQHTTTRPVPDTSRLPGATPGLTTVEIVTMSHQALGDVAGRGNEKKPSSVRALSIVLPIVLLVFLCLGVFLLWKNWRLKNINSINFDNPVYQKTTEDEVHICHNQDGYSYPSRQMVSLEDDVA 1 787 809 GRGNEKKPSS VRALSIVLPIVLLVFLCLGVFLL WKNWRLKNIN +P16150 MATLLLLLGVLVVSPDALGSTTAVQTPTSGEPLVSTSEPLSSKMYTTSITSDPKADSTGDQTSALPPSTSINEGSPLWTSIGASTGSPLPEPTTYQEVSIKMSSVPQETPHATSHPAVPITANSLGSHTVTGGTITTNSPETSSRTSGAPVTTAASSLETSRGTSGPPLTMATVSLETSKGTSGPPVTMATDSLETSTGTTGPPVTMTTGSLEPSSGASGPQVSSVKLSTMMSPTTSTNASTVPFRNPDENSRGMLPVAVLVALLAVIVLVALLLLWRRRQKRRTGALVLSRGGKRNGVVDAWAGPAQVPEEGAVTVTVGGSGGDKGSGFPDGEGSSRRPTLTTFFGRRKSRQGSLAMEELKSGSGPSLKGEEEPLVASEDGAVDAPAPDEPEGGDGAAP 1 255 277 FRNPDENSRG MLPVAVLVALLAVIVLVALLLLW RRRQKRRTGA +P0CC10 MAQAHIQGSPCPLLPPGRMSWPQGALLLLWLFSPPLRAGGGGVAVTSAAGGGSPPATSCPAACSCSNQASRVICTRRELAEVPASIPVNTRYLNLQENSIQVIRTDTFKHLRHLEILQLSKNLVRKIEVGAFNGLPSLNTLELFDNRLTTVPTQAFEYLSKLRELWLRNNPIESIPSYAFNRVPSLRRLDLGELKRLEYISEAAFEGLVNLRYLNLGMCNLKDIPNLTALVRLEELELSGNRLDLIRPGSFQGLTSLRKLWLMHAQVATIERNAFDDLKSLEELNLSHNNLMSLPHDLFTPLHRLERVHLNHNPWHCNCDVLWLSWWLKETVPSNTTCCARCHAPAGLKGRYIGELDQSHFTCYAPVIVEPPTDLNVTEGMAAELKCRTGTSMTSVNWLTPNGTLMTHGSYRVRISVLHDGTLNFTNVTVQDTGQYTCMVTNSAGNTTASATLNVSAVDPVAAGGPGGGGPGGGGGAGGAGGYTYFTTVTVETLETQPGEEAQQPRGTEKEPPGPTTDGAWGGGRPDAAAPASASTTAPAPRSSRPTEKAFTVPITDVTENALKDLDDVMKTTKIIIGCFVAITFMAAVMLVAFYKLRKQHQLHKHHGPTRTVEIINVEDELPAASAVSVAAAAAVAGGAGVGGDSHLALPALERDHLNHHHYVAAAFKAHYGGNPGGGCGAKGPGLNSIHEPLLFKSGSKENVQETQI 1 573 595 LKDLDDVMKT TKIIIGCFVAITFMAAVMLVAFY KLRKQHQLHK +Q07954 MLTPPLLLLLPLLSALVAAAIDAPKTCSPKQFACRDQITCISKGWRCDGERDCPDGSDEAPEICPQSKAQRCQPNEHNCLGTELCVPMSRLCNGVQDCMDGSDEGPHCRELQGNCSRLGCQHHCVPTLDGPTCYCNSSFQLQADGKTCKDFDECSVYGTCSQLCTNTDGSFICGCVEGYLLQPDNRSCKAKNEPVDRPPVLLIANSQNILATYLSGAQVSTITPTSTRQTTAMDFSYANETVCWVHVGDSAAQTQLKCARMPGLKGFVDEHTINISLSLHHVEQMAIDWLTGNFYFVDDIDDRIFVCNRNGDTCVTLLDLELYNPKGIALDPAMGKVFFTDYGQIPKVERCDMDGQNRTKLVDSKIVFPHGITLDLVSRLVYWADAYLDYIEVVDYEGKGRQTIIQGILIEHLYGLTVFENYLYATNSDNANAQQKTSVIRVNRFNSTEYQVVTRVDKGGALHIYHQRRQPRVRSHACENDQYGKPGGCSDICLLANSHKARTCRCRSGFSLGSDGKSCKKPEHELFLVYGKGRPGIIRGMDMGAKVPDEHMIPIENLMNPRALDFHAETGFIYFADTTSYLIGRQKIDGTERETILKDGIHNVEGVAVDWMGDNLYWTDDGPKKTISVARLEKAAQTRKTLIEGKMTHPRAIVVDPLNGWMYWTDWEEDPKDSRRGRLERAWMDGSHRDIFVTSKTVLWPNGLSLDIPAGRLYWVDAFYDRIETILLNGTDRKIVYEGPELNHAFGLCHHGNYLFWTEYRSGSVYRLERGVGGAPPTVTLLRSERPPIFEIRMYDAQQQQVGTNKCRVNNGGCSSLCLATPGSRQCACAEDQVLDADGVTCLANPSYVPPPQCQPGEFACANSRCIQERWKCDGDNDCLDNSDEAPALCHQHTCPSDRFKCENNRCIPNRWLCDGDNDCGNSEDESNATCSARTCPPNQFSCASGRCIPISWTCDLDDDCGDRSDESASCAYPTCFPLTQFTCNNGRCININWRCDNDNDCGDNSDEAGCSHSCSSTQFKCNSGRCIPEHWTCDGDNDCGDYSDETHANCTNQATRPPGGCHTDEFQCRLDGLCIPLRWRCDGDTDCMDSSDEKSCEGVTHVCDPSVKFGCKDSARCISKAWVCDGDNDCEDNSDEENCESLACRPPSHPCANNTSVCLPPDKLCDGNDDCGDGSDEGELCDQCSLNNGGCSHNCSVAPGEGIVCSCPLGMELGPDNHTCQIQSYCAKHLKCSQKCDQNKFSVKCSCYEGWVLEPDGESCRSLDPFKPFIIFSNRHEIRRIDLHKGDYSVLVPGLRNTIALDFHLSQSALYWTDVVEDKIYRGKLLDNGALTSFEVVIQYGLATPEGLAVDWIAGNIYWVESNLDQIEVAKLDGTLRTTLLAGDIEHPRAIALDPRDGILFWTDWDASLPRIEAASMSGAGRRTVHRETGSGGWPNGLTVDYLEKRILWIDARSDAIYSARYDGSGHMEVLRGHEFLSHPFAVTLYGGEVYWTDWRTNTLAKANKWTGHNVTVVQRTNTQPFDLQVYHPSRQPMAPNPCEANGGQGPCSHLCLINYNRTVSCACPHLMKLHKDNTTCYEFKKFLLYARQMEIRGVDLDAPYYNYIISFTVPDIDNVTVLDYDAREQRVYWSDVRTQAIKRAFINGTGVETVVSADLPNAHGLAVDWVSRNLFWTSYDTNKKQINVARLDGSFKNAVVQGLEQPHGLVVHPLRGKLYWTDGDNISMANMDGSNRTLLFSGQKGPVGLAIDFPESKLYWISSGNHTINRCNLDGSGLEVIDAMRSQLGKATALAIMGDKLWWADQVSEKMGTCSKADGSGSVVLRNSTTLVMHMKVYDESIQLDHKGTNPCSVNNGDCSQLCLPTSETTRSCMCTAGYSLRSGQQACEGVGSFLLYSVHEGIRGIPLDPNDKSDALVPVSGTSLAVGIDFHAENDTIYWVDMGLSTISRAKRDQTWREDVVTNGIGRVEGIAVDWIAGNIYWTDQGFDVIEVARLNGSFRYVVISQGLDKPRAITVHPEKGYLFWTEWGQYPRIERSRLDGTERVVLVNVSISWPNGISVDYQDGKLYWCDARTDKIERIDLETGENREVVLSSNNMDMFSVSVFEDFIYWSDRTHANGSIKRGSKDNATDSVPLRTGIGVQLKDIKVFNRDRQKGTNVCAVANGGCQQLCLYRGRGQRACACAHGMLAEDGASCREYAGYLLYSERTILKSIHLSDERNLNAPVQPFEDPEHMKNVIALAFDYRAGTSPGTPNRIFFSDIHFGNIQQINDDGSRRITIVENVGSVEGLAYHRGWDTLYWTSYTTSTITRHTVDQTRPGAFERETVITMSGDDHPRAFVLDECQNLMFWTNWNEQHPSIMRAALSGANVLTLIEKDIRTPNGLAIDHRAEKLYFSDATLDKIERCEYDGSHRYVILKSEPVHPFGLAVYGEHIFWTDWVRRAVQRANKHVGSNMKLLRVDIPQQPMGIIAVANDTNSCELSPCRINNGGCQDLCLLTHQGHVNCSCRGGRILQDDLTCRAVNSSCRAQDEFECANGECINFSLTCDGVPHCKDKSDEKPSYCNSRRCKKTFRQCSNGRCVSNMLWCNGADDCGDGSDEIPCNKTACGVGEFRCRDGTCIGNSSRCNQFVDCEDASDEMNCSATDCSSYFRLGVKGVLFQPCERTSLCYAPSWVCDGANDCGDYSDERDCPGVKRPRCPLNYFACPSGRCIPMSWTCDKEDDCEHGEDETHCNKFCSEAQFECQNHRCISKQWLCDGSDDCGDGSDEAAHCEGKTCGPSSFSCPGTHVCVPERWLCDGDKDCADGADESIAAGCLYNSTCDDREFMCQNRQCIPKHFVCDHDRDCADGSDESPECEYPTCGPSEFRCANGRCLSSRQWECDGENDCHDQSDEAPKNPHCTSQEHKCNASSQFLCSSGRCVAEALLCNGQDDCGDSSDERGCHINECLSRKLSGCSQDCEDLKIGFKCRCRPGFRLKDDGRTCADVDECSTTFPCSQRCINTHGSYKCLCVEGYAPRGGDPHSCKAVTDEEPFLIFANRYYLRKLNLDGSNYTLLKQGLNNAVALDFDYREQMIYWTDVTTQGSMIRRMHLNGSNVQVLHRTGLSNPDGLAVDWVGGNLYWCDKGRDTIEVSKLNGAYRTVLVSSGLREPRALVVDVQNGYLYWTDWGDHSLIGRIGMDGSSRSVIVDTKITWPNGLTLDYVTERIYWADAREDYIEFASLDGSNRHVVLSQDIPHIFALTLFEDYVYWTDWETKSINRAHKTTGTNKTLLISTLHRPMDLHVFHALRQPDVPNHPCKVNNGGCSNLCLLSPGGGHKCACPTNFYLGSDGRTCVSNCTASQFVCKNDKCIPFWWKCDTEDDCGDHSDEPPDCPEFKCRPGQFQCSTGICTNPAFICDGDNDCQDNSDEANCDIHVCLPSQFKCTNTNRCIPGIFRCNGQDNCGDGEDERDCPEVTCAPNQFQCSITKRCIPRVWVCDRDNDCVDGSDEPANCTQMTCGVDEFRCKDSGRCIPARWKCDGEDDCGDGSDEPKEECDERTCEPYQFRCKNNRCVPGRWQCDYDNDCGDNSDEESCTPRPCSESEFSCANGRCIAGRWKCDGDHDCADGSDEKDCTPRCDMDQFQCKSGHCIPLRWRCDADADCMDGSDEEACGTGVRTCPLDEFQCNNTLCKPLAWKCDGEDDCGDNSDENPEECARFVCPPNRPFRCKNDRVCLWIGRQCDGTDNCGDGTDEEDCEPPTAHTTHCKDKKEFLCRNQRCLSSSLRCNMFDDCGDGSDEEDCSIDPKLTSCATNASICGDEARCVRTEKAAYCACRSGFHTVPGQPGCQDINECLRFGTCSQLCNNTKGGHLCSCARNFMKTHNTCKAEGSEYQVLYIADDNEIRSLFPGHPHSAYEQAFQGDESVRIDAMDVHVKAGRVYWTNWHTGTISYRSLPPAAPPTTSNRHRRQIDRGVTHLNISGLKMPRGIAIDWVAGNVYWTDSGRDVIEVAQMKGENRKTLISGMIDEPHAIVVDPLRGTMYWSDWGNHPKIETAAMDGTLRETLVQDNIQWPTGLAVDYHNERLYWADAKLSVIGSIRLNGTDPIVAADSKRGLSHPFSIDVFEDYIYGVTYINNRVFKIHKFGHSPLVNLTGGLSHASDVVLYHQHKQPEVTNPCDRKKCEWLCLLSPSGPVCTCPNGKRLDNGTCVPVPSPTPPPDAPRPGTCNLQCFNGGSCFLNARRQPKCRCQPRYTGDKCELDQCWEHCRNGGTCAASPSGMPTCRCPTGFTGPKCTQQVCAGYCANNSTCTVNQGNQPQCRCLPGFLGDRCQYRQCSGYCENFGTCQMAADGSRQCRCTAYFEGSRCEVNKCSRCLEGACVVNKQSGDVTCNCTDGRVAPSCLTCVGHCSNGGSCTMNSKMMPECQCPPHMTGPRCEEHVFSQQQPGHIASILIPLLLLLLLVLVAGVVFWYKRRVQGAKGFQHQRMTNGAMNVEIGNPTYKMYEGGEPDDVGGLLDADFALDPDKPTNFTNPVYATLYMGGHGSRHSLASTDEKRELLGRGPEDEIGDPLA 1 4421 4443 HVFSQQQPGH IASILIPLLLLLLLVLVAGVVFW YKRRVQGAKG +O75581 MGAVLRSLLACSFCVLLRAAPLLLYANRRDLRLVDATNGKENATIVVGGLEDAAAVDFVFSHGLIYWSDVSEEAIKRTEFNKTESVQNVVVSGLLSPDGLACDWLGEKLYWTDSETNRIEVSNLDGSLRKVLFWQELDQPRAIALDPSSGFMYWTDWGEVPKIERAGMDGSSRFIIINSEIYWPNGLTLDYEEQKLYWADAKLNFIHKSNLDGTNRQAVVKGSLPHPFALTLFEDILYWTDWSTHSILACNKYTGEGLREIHSDIFSPMDIHAFSQQRQPNATNPCGIDNGGCSHLCLMSPVKPFYQCACPTGVKLLENGKTCKDGATELLLLARRTDLRRISLDTPDFTDIVLQLEDIRHAIAIDYDPVEGYIYWTDDEVRAIRRSFIDGSGSQFVVTAQIAHPDGIAVDWVARNLYWTDTGTDRIEVTRLNGTMRKILISEDLEEPRAIVLDPMVGYMYWTDWGEIPKIERAALDGSDRVVLVNTSLGWPNGLALDYDEGKIYWGDAKTDKIEVMNTDGTGRRVLVEDKIPHIFGFTLLGDYVYWTDWQRRSIERVHKRSAEREVIIDQLPDLMGLKATNVHRVIGSNPCAEENGGCSHLCLYRPQGLRCACPIGFELISDMKTCIVPEAFLLFSRRADIRRISLETNNNNVAIPLTGVKEASALDFDVTDNRIYWTDISLKTISRAFMNGSALEHVVEFGLDYPEGMAVDWLGKNLYWADTGTNRIEVSKLDGQHRQVLVWKDLDSPRALALDPAEGFMYWTEWGGKPKIDRAAMDGSERTTLVPNVGRANGLTIDYAKRRLYWTDLDTNLIESSNMLGLNREVIADDLPHPFGLTQYQDYIYWTDWSRRSIERANKTSGQNRTIIQGHLDYVMDILVFHSSRQSGWNECASSNGHCSHLCLAVPVGGFVCGCPAHYSLNADNRTCSAPTTFLLFSQKSAINRMVIDEQQSPDIILPIHSLRNVRAIDYDPLDKQLYWIDSRQNMIRKAQEDGSQGFTVVVSSVPSQNLEIQPYDLSIDIYSRYIYWTCEATNVINVTRLDGRSVGVVLKGEQDRPRAVVVNPEKGYMYFTNLQERSPKIERAALDGTEREVLFFSGLSKPIALALDSRLGKLFWADSDLRRIESSDLSGANRIVLEDSNILQPVGLTVFENWLYWIDKQQQMIEKIDMTGREGRTKVQARIAQLSDIHAVKELNLQEYRQHPCAQDNGGCSHICLVKGDGTTRCSCPMHLVLLQDELSCGEPPTCSPQQFTCFTGEIDCIPVAWRCDGFTECEDHSDELNCPVCSESQFQCASGQCIDGALRCNGDANCQDKSDEKNCEVLCLIDQFRCANGQCIGKHKKCDHNVDCSDKSDELDCYPTEEPAPQATNTVGSVIGVIVTIFVSGTVYFICQRMLCPRMKGDGETMTNDYVVHGPASVPLGYVPHPSSLSGSLPGMSRGKSMISSLSIMGGSSGPPYDRAHVTGASSSSSSSTKGTYFPAILNPPPSPATERSHYTMEFGYSSNSPSTHRSYSYRPYSYRHFAPPTTPCSTDVCDSDYAPSRRMTSVATAKGYTSDLNYDSEPVPPPPTPRSQYLSAEENYESCPPSPYTERSYSHHLYPPPPSPCTDSS 1 1371 1393 YPTEEPAPQA TNTVGSVIGVIVTIFVSGTVYFI CQRMLCPRMK +Q924X6 MGRPELGALRPLALLLLLLLQLQHLSAADPLPGGQGPVKECEEDQFRCRNERCIPLVWRCDEDNDCSDNSDEDDCPKRTCADSDFTCDNGHCIPERWKCDGEEECPDGSDESKATCSSEECPAEKLSCGPTSHKCVPASWRCDGEKDCEGGADEAGCPTLCAPHEFQCSNRSCLASVFVCDGDDDCGDGSDERGCSDPACPPREFRCGGGGTCIPERWVCDRQFDCEDRSDEAAELCGRAGQGTTATPAACAPTAQFTCRSGECIHLGWRCDGDRDCKDKSDEADCSPGPCRENEFQCGDGTCVLAIKRCNQERDCPDGSDEAGCLQESTCEGPRRFQCKSGECVDGGKVCDDQRDCRDWSDEPQKVCGLNECLHNNGGCSHICTDLKIGFECTCPAGFQLLDQKTCGDIDECQDPDACSQICVNYKGYFKCECHPGYEMDTLTKNCKAVAGKSPSLIFTNRHEVRRIDLVKRDYSRLIPMLKNVVALDVEVATNRIYWCDLSYRKIYSAHMDKASIPDEQVVLIDEQLHSPEGLAVDWVHKHIYWTDSGNKTISVATTDGRRRCTLFSRELSEPRAIAVDPLRGFMYWSDWGFQAKIEKAGLNGADRQTLVSDNIEWPNGITLDLLSQRLYWVDSKLHQLSSIDFNGGNRKMLIFSTDFLSHPFGVAVFEDKVFWTDLENEAIFSANRLNGLEIAILAENLNNPHDIVIFHELKQPKAADACDLSAQPNGGCEYLCLPAPQISSHSPKYTCACPDTMWLGPDMKRCYRAPQSTSTTTLASAMTRTVPATTRAPGTTIHDPTYQNHSTETPSQTAAAPHSVNVPRAPSTSPSTPSPATSNHSQHYGNEGSQMGSTVTAAVIGVIVPIVVIALLCMSGYLIWRNWKRKNTKSMNFDNPVYRKTTEEEEEDELHIGRTAQIGHVYPAAISNYDRPLWAEPCLGETRDLEDPAPALKELFVLPGEPRSQLHQLPKNPLSELPVVKCKRVALSLEDDGLP 1 859 881 GSQMGSTVTA AVIGVIVPIVVIALLCMSGYLIW RNWKRKNTKS +Q12866 MGPAPLPLLLGLFLPALWRRAITEAREEAKPYPLFPGPFPGSLQTDHTPLLSLPHASGYQPALMFSPTQPGRPHTGNVAIPQVTSVESKPLPPLAFKHTVGHIILSEHKGVKFNCSISVPNIYQDTTISWWKDGKELLGAHHAITQFYPDDEVTAIIASFSITSVQRSDNGSYICKMKINNEEIVSDPIYIEVQGLPHFTKQPESMNVTRNTAFNLTCQAVGPPEPVNIFWVQNSSRVNEQPEKSPSVLTVPGLTEMAVFSCEAHNDKGLTVSKGVQINIKAIPSPPTEVSIRNSTAHSILISWVPGFDGYSPFRNCSIQVKEADPLSNGSVMIFNTSALPHLYQIKQLQALANYSIGVSCMNEIGWSAVSPWILASTTEGAPSVAPLNVTVFLNESSDNVDIRWMKPPTKQQDGELVGYRISHVWQSAGISKELLEEVGQNGSRARISVQVHNATCTVRIAAVTRGGVGPFSDPVKIFIPAHGWVDYAPSSTPAPGNADPVLIIFGCFCGFILIGLILYISLAIRKRVQETKFGNAFTEEDSELVVNYIAKKSFCRRAIELTLHSLGVSEELQNKLEDVVIDRNLLILGKILGEGEFGSVMEGNLKQEDGTSLKVAVKTMKLDNSSQREIEEFLSEAACMKDFSHPNVIRLLGVCIEMSSQGIPKPMVILPFMKYGDLHTYLLYSRLETGPKHIPLQTLLKFMVDIALGMEYLSNRNFLHRDLAARNCMLRDDMTVCVADFGLSKKIYSGDYYRQGRIAKMPVKWIAIESLADRVYTSKSDVWAFGVTMWEIATRGMTPYPGVQNHEMYDYLLHGHRLKQPEDCLDELYEIMYSCWRTDPLDRPTFSVLRLQLEKLLESLPDVRNQADVIYVNTQLLESSEGLAQGSTLAPLDLNIDPDSIIASCTPRAAISVVTAEVHDSKPHEGRYILNGGSEEWEDLTSAPSAAVTAEKNSVLPGERLVRNGVSWSHSSMLPLGSSLPDELLFADDSSEGSEVLM 1 502 524 STPAPGNADP VLIIFGCFCGFILIGLILYISLA IRKRVQETKF +P08581 MKAPAVLAPGILVLLFTLVQRSNGECKEALAKSEMNVNMKYQLPNFTAETPIQNVILHEHHIFLGATNYIYVLNEEDLQKVAEYKTGPVLEHPDCFPCQDCSSKANLSGGVWKDNINMALVVDTYYDDQLISCGSVNRGTCQRHVFPHNHTADIQSEVHCIFSPQIEEPSQCPDCVVSALGAKVLSSVKDRFINFFVGNTINSSYFPDHPLHSISVRRLKETKDGFMFLTDQSYIDVLPEFRDSYPIKYVHAFESNNFIYFLTVQRETLDAQTFHTRIIRFCSINSGLHSYMEMPLECILTEKRKKRSTKKEVFNILQAAYVSKPGAQLARQIGASLNDDILFGVFAQSKPDSAEPMDRSAMCAFPIKYVNDFFNKIVNKNNVRCLQHFYGPNHEHCFNRTLLRNSSGCEARRDEYRTEFTTALQRVDLFMGQFSEVLLTSISTFIKGDLTIANLGTSEGRFMQVVVSRSGPSTPHVNFLLDSHPVSPEVIVEHTLNQNGYTLVITGKKITKIPLNGLGCRHFQSCSQCLSAPPFVQCGWCHDKCVRSEECLSGTWTQQICLPAIYKVFPNSAPLEGGTRLTICGWDFGFRRNNKFDLKKTRVLLGNESCTLTLSESTMNTLKCTVGPAMNKHFNMSIIISNGHGTTQYSTFSYVDPVITSISPKYGPMAGGTLLTLTGNYLNSGNSRHISIGGKTCTLKSVSNSILECYTPAQTISTEFAVKLKIDLANRETSIFSYREDPIVYEIHPTKSFISGGSTITGVGKNLNSVSVPRMVINVHEAGRNFTVACQHRSNSEIICCTTPSLQQLNLQLPLKTKAFFMLDGILSKYFDLIYVHNPVFKPFEKPVMISMGNENVLEIKGNDIDPEAVKGEVLKVGNKSCENIHLHSEAVLCTVPNDLLKLNSELNIEWKQAISSTVLGKVIVQPDQNFTGLIAGVVSISTALLLLLGFFLWLKKRKQIKDLGSELVRYDARVHTPHLDRLVSARSVSPTTEMVSNESVDYRATFPEDQFPNSSQNGSCRQVQYPLTDMSPILTSGDSDISSPLLQNTVHIDLSALNPELVQAVQHVVIGPSSLIVHFNEVIGRGHFGCVYHGTLLDNDGKKIHCAVKSLNRITDIGEVSQFLTEGIIMKDFSHPNVLSLLGICLRSEGSPLVVLPYMKHGDLRNFIRNETHNPTVKDLIGFGLQVAKGMKYLASKKFVHRDLAARNCMLDEKFTVKVADFGLARDMYDKEYYSVHNKTGAKLPVKWMALESLQTQKFTTKSDVWSFGVLLWELMTRGAPPYPDVNTFDITVYLLQGRRLLQPEYCPDPLYEVMLKCWHPKAEMRPSFSELVSRISAIFSTFIGEHYVHVNATYVNVKCVAPYPSLLSSEDNADDEVDTRPASFWETS 1 933 955 VIVQPDQNFT GLIAGVVSISTALLLLLGFFLWL KKRKQIKDLG +P15941 MTPGTQSPFFLLLLLTVLTVVTGSGHASSTPGGEKETSATQRSSVPSSTEKNAVSMTSSVLSSHSPGSGSSTTQGQDVTLAPATEPASGSAATWGQDVTSVPVTRPALGSTTPPAHDVTSAPDNKPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDNRPALGSTAPPVHNVTSASGSASGSASTLVHNGTSARATTTPASKSTPFSIPSHHSDTPTTLASHSTKTDASSTHHSSVPPLTSSNHSTSPQLSTGVSFFFLSFHISNLQFNSSLEDPSTDYYQELQRDISEMFLQIYKQGGFLGLSNIKFRPGSVVVQLTLAFREGTINVHDVETQFNQYKTEAASRYNLTISDVSVSDVPFPFSAQSGAGVPGWGIALLVLVCVLVALAIVYLIALAVCQCRRKNYGQLDIFPARDTYHPMSEYPTYHTHGRYVPPSSTDRSPYEKVSAGNGGSSLSYTNPAVAATSANL 1 1159 1181 SAQSGAGVPG WGIALLVLVCVLVALAIVYLIAL AVCQCRRKNY +Q9JKF6 MARMGLAGAAGRWWGLALGLTAFFLPGTHTQVVQVNDSMYGFIGTDVVLHCSFANPLPSVKITQVTWQKASNGSKQNMAIYNPTMGVSVLPPYEKRVEFLRPSFIDGTIRLSGLELEDEGMYICEFATFPTGNRESQLNLTVMAKPTNWIEGTRAVLRARKGQDDKVLVATCTSANGKPPSAVSWETRLKGEAEYQEIRNPNGTVTVISRYRLVPSREAHRQSLACIVNYHLDRFRESLTLNVQYEPEVTIEGFDGNWYLQRTDVKLTCKADANPPATEYHWTTLNGSLPKGVEAQNRTLFFRGPITYSLAGTYICEATNPIGTRSGQVEVNITEFPYTPTPEHGRRAGQMPTAIIGGVAGSVLLVLIVVGGIIVALRRRRHTFKGDYSTKKHVYGNGYSKAGIPQHHPPMAQNLQYPDDSDDEKKAGPLGGSSYEEEEEEEGGGGGERKVGGPHPKYDEDAKRPYFTVDEAEARQDGYGDRTLGYQYDPEQLDLAENMVSQNDGSFISKKEWYV 1 355 377 GRRAGQMPTA IIGGVAGSVLLVLIVVGGIIVAL RRRRHTFKGD +Q62765 MALPRCMWPNYVWRAMMACVVHRGSGAPLTLCLLGCLLQTFHVLSQKLDDVDPLVTTNFGKIRGIKKELNNEILGPVIQFLGVPYAAPPTGEHRFQPPEPPSPWSDIRNATQFAPVCPQNIIDGRLPEVMLPVWFTNNLDVVSSYVQDQSEDCLYLNIYVPTEDVKRISKECARKPGKKICRKGDIRDSGGPKPVMVYIHGGSYMEGTGNLYDGSVLASYGNVIVITVNYRLGVLGFLSTGDQAAKGNYGLLDLIQALRWTSENIGFFGGDPLRITVFGSGAGGSCVNLLTLSHYSEGNRWSNSTKGLFQRAIAQSGTALSSWAVSFQPAKYARILATKVGCNVSDTVELVECLQKKPYKELVDQDVQPARYHIAFGPVIDGDVIPDDPQILMEQGEFLNYDIMLGVNQGEGLKFVENIVDSDDGVSASDFDFAVSNFVDNLYGYPEGKDVLRETIKFMYTDWADRHNPETRRKTLLALFTDHQWVAPAVATADLHSNFGSPTYFYAFYHHCQTDQVPAWADAAHGDEVPYVLGIPMIGPTELFPCNFSKNDVMLSAVVMTYWTNFAKTGDPNQPVPQDTKFIHTKPNRFEEVAWTRYSQKDQLYLHIGLKPRVKEHYRANKVNLWLELVPHLHNLNDISQYTSTTTKVPSTDITLRPTRKNSTPVTSAFPTAKQDDPKQQPSPFSVDQRDYSTELSVTIAVGASLLFLNILAFAALYYKKDKRRHDVHRRCSPQRTTTNDLTHAPEEEIMSLQMKHTDLDHECESIHPHEVVLRTACPPDYTLAMRRSPDDVPLMTPNTITMIPNTIPGIQPLHTFNTFTGGQNNTLPHPHPHPHSHSTTRV 1 697 719 VDQRDYSTEL SVTIAVGASLLFLNILAFAALYY KKDKRRHDVH +O35516 MPALRPAALRALLWLWLCGAGPAHALQCRGGQEPCVNEGTCVTYHNGTGFCRCPEGFLGEYCQHRDPCEKNRCQNGGTCVPQGMLGKATCRCAPGFTGEDCQYSTSHPCFVSRPCQNGGTCHMLSRDTYECTCQVGFTGKQCQWTDACLSHPCENGSTCTSVASQFSCKCPAGLTGQKCEADINECDIPGRCQHGGTCLNLPGSYRCQCPQGFTGQHCDSPYVPCAPSPCVNGGTCRQTGDFTFECNCLPGFEGSTCERNIDDCPNHKCQNGGVCVDGVNTYNCRCPPQWTGQFCTEDVDECLLQPNACQNGGTCTNRNGGYGCVCVNGWSGDDCSENIDDCAYASCTPGSTCIDRVASFSCLCPEGKAGLLCHLDDACISNPCHKGALCDTNPLNGQYICTCPQGYKGADCTEDVDECAMANSNPCEHAGKCVNTDGAFHCECLKGYAGPRCEMDINECHSDPCQNDATCLDKIGGFTCLCMPGFKGVHCELEVNECQSNPCVNNGQCVDKVNRFQCLCPPGFTGPVCQIDIDDCSSTPCLNGAKCIDHPNGYECQCATGFTGILCDENIDNCDPDPCHHGQCQDGIDSYTCICNPGYMGAICSDQIDECYSSPCLNDGRCIDLVNGYQCNCQPGTSGLNCEINFDDCASNPCMHGVCVDGINRYSCVCSPGFTGQRCNIDIDECASNPCRKGATCINDVNGFRCICPEGPHHPSCYSQVNECLSNPCIHGNCTGGLSGYKCLCDAGWVGVNCEVDKNECLSNPCQNGGTCNNLVNGYRCTCKKGFKGYNCQVNIDECASNPCLNQGTCFDDVSGYTCHCMLPYTGKNCQTVLAPCSPNPCENAAVCKEAPNFESFSCLCAPGWQGKRCTVDVDECISKPCMNNGVCHNTQGSYVCECPPGFSGMDCEEDINDCLANPCQNGGSCVDHVNTFSCQCHPGFIGDKCQTDMNECLSEPCKNGGTCSDYVNSYTCTCPAGFHGVHCENNIDECTESSCFNGGTCVDGINSFSCLCPVGFTGPFCLHDINECSSNPCLNAGTCVDGLGTYRCICPLGYTGKNCQTLVNLCSRSPCKNKGTCVQEKARPHCLCPPGWDGAYCDVLNVSCKAAALQKGVPVEHLCQHSGICINAGNTHHCQCPLGYTGSYCEEQLDECASNPCQHGATCNDFIGGYRCECVPGYQGVNCEYEVDECQNQPCQNGGTCIDLVNHFKCSCPPGTRGLLCEENIDECAGGPHCLNGGQCVDRIGGYTCRCLPGFAGERCEGDINECLSNPCSSEGSLDCVQLKNNYNCICRSAFTGRHCETFLDVCPQKPCLNGGTCAVASNMPDGFICRCPPGFSGARCQSSCGQVKCRRGEQCIHTDSGPRCFCLNPKDCESGCASNPCQHGGTCYPQRQPPHYSCRCPPSFGGSHCELYTAPTSTPPATCQSQYCADKARDGICDEACNSHACQWDGGDCSLTMEDPWANCTSTLRCWEYINNQCDEQCNTAECLFDNFECQRNSKTCKYDKYCADHFKDNHCDQGCNSEECGWDGLDCASDQPENLAEGTLIIVVLLPPEQLLQDSRSFLRALGTLLHTNLRIKQDSQGALMVYPYFGEKSAAMKKQKMTRRSLPEEQEQEQEVIGSKIFLEIDNRQCVQDSDQCFKNTDAAAALLASHAIQGTLSYPLVSVFSELESPRNAQLLYLLAVAVVIILFFILLGVIMAKRKRKHGFLWLPEGFTLRRDSSNHKRREPVGQDAVGLKNLSVQVSEANLIGSGTSEHWVDDEGPQPKKAKAEDEALLSEDDPIDRRPWTQQHLEAADIRHTPSLALTPPQAEQEVDVLDVNVRGPDGCTPLMLASLRGGSSDLSDEDEDAEDSSANIITDLVYQGASLQAQTDRTGEMALHLAARYSRADAAKRLLDAGADANAQDNMGRCPLHAAVAADAQGVFQILIRNRVTDLDARMNDGTTPLILAARLAVEGMVAELINCQADVNAVDDHGKSALHWAAAVNNVEATLLLLKNGANRDMQDNKEETPLFLAAREGSYEAAKILLDHFANRDITDHMDRLPRDVARDRMHHDIVRLLDEYNVTPSPPGTVLTSALSPVLCGPNRSFLSLKHTPMGKKARRPNTKSTMPTSLPNLAKEAKDAKGSRRKKCLNEKVQLSESSVTLSPVDSLESPHTYVSDATSSPMITSPGILQASPTPLLAAAAPAAPVHTQHALSFSNLHDMQPLAPGASTVLPSVSQLLSHHHIAPPGSSSAGSLGRLHPVPVPADWMNRVEMNETQYSEMFGMVLAPAEGAHPGIAAPQSRPPEGKHMSTQREPLPPIVTFQLIPKGSIAQAAGAPQTQSSCPPAVAGPLPSMYQIPEMPRLPSVAFPPTMMPQQEGQVAQTIVPTYHPFPASVGKYPTPPSQHSYASSNAAERTPSHGGHLQGEHPYLTPSPESPDQWSSSSPHSASDWSDVTTSPTPGGGGGGQRGPGTHMSEPPHSNMQVYA 1 1680 1702 SELESPRNAQ LLYLLAVAVVIILFFILLGVIMA KRKRKHGFLW +Q61982 MGLGARGRRRRRRLMALPPPPPPMRALPLLLLLAGLGAAAPPCLDGSPCANGGRCTHQQPSLEAACLCLPGWVGERCQLEDPCHSGPCAGRGVCQSSVVAGTARFSCRCLRGFQGPDCSQPDPCVSRPCVHGAPCSVGPDGRFACACPPGYQGQSCQSDIDECRSGTTCRHGGTCLNTPGSFRCQCPLGYTGLLCENPVVPCAPSPCRNGGTCRQSSDVTYDCACLPGFEGQNCEVNVDDCPGHRCLNGGTCVDGVNTYNCQCPPEWTGQFCTEDVDECQLQPNACHNGGTCFNLLGGHSCVCVNGWTGESCSQNIDDCATAVCFHGATCHDRVASFYCACPMGKTGLLCHLDDACVSNPCHEDAICDTNPVSGRAICTCPPGFTGGACDQDVDECSIGANPCEHLGRCVNTQGSFLCQCGRGYTGPRCETDVNECLSGPCRNQATCLDRIGQFTCICMAGFTGTYCEVDIDECQSSPCVNGGVCKDRVNGFSCTCPSGFSGSMCQLDVDECASTPCRNGAKCVDQPDGYECRCAEGFEGTLCERNVDDCSPDPCHHGRCVDGIASFSCACAPGYTGIRCESQVDECRSQPCRYGGKCLDLVDKYLCRCPPGTTGVNCEVNIDDCASNPCTFGVCRDGINRYDCVCQPGFTGPLCNVEINECASSPCGEGGSCVDGENGFHCLCPPGSLPPLCLPANHPCAHKPCSHGVCHDAPGGFRCVCEPGWSGPRCSQSLAPDACESQPCQAGGTCTSDGIGFRCTCAPGFQGHQCEVLSPCTPSLCEHGGHCESDPDRLTVCSCPPGWQGPRCQQDVDECAGASPCGPHGTCTNLPGNFRCICHRGYTGPFCDQDIDDCDPNPCLHGGSCQDGVGSFSCSCLDGFAGPRCARDVDECLSSPCGPGTCTDHVASFTCACPPGYGGFHCEIDLPDCSPSSCFNGGTCVDGVSSFSCLCRPGYTGTHCQYEADPCFSRPCLHGGICNPTHPGFECTCREGFTGSQCQNPVDWCSQAPCQNGGRCVQTGAYCICPPGWSGRLCDIQSLPCTEAAAQMGVRLEQLCQEGGKCIDKGRSHYCVCPEGRTGSHCEHEVDPCTAQPCQHGGTCRGYMGGYVCECPAGYAGDSCEDNIDECASQPCQNGGSCIDLVARYLCSCPPGTLGVLCEINEDDCDLGPSLDSGVQCLHNGTCVDLVGGFRCNCPPGYTGLHCEADINECRPGACHAAHTRDCLQDPGGHFRCVCHPGFTGPRCQIALSPCESQPCQHGGQCRHSLGRGGGLTFTCHCVPPFWGLRCERVARSCRELQCPVGIPCQQTARGPRCACPPGLSGPSCRVSRASPSGATNASCASAPCLHGGSCLPVQSVPFFRCVCAPGWGGPRCETPSAAPEVPEEPRCPRAACQAKRGDQNCDRECNTPGCGWDGGDCSLNVDDPWRQCEALQCWRLFNNSRCDPACSSPACLYDNFDCYSGGRDRTCNPVYEKYCADHFADGRCDQGCNTEECGWDGLDCASEVPALLARGVLVLTVLLPPEELLRSSADFLQRLSAILRTSLRFRLDARGQAMVFPYHRPSPGSESRVRRELGPEVIGSVVMLEIDNRLCLQSAENDHCFPDAQSAADYLGALSAVERLDFPYPLRDVRGEPLEAPEQSVPLLPLLVAGAVFLLIIFILGVMVARRKREHSTLWFPEGFALHKDIAAGHKGRREPVGQDALGMKNMAKGESLMGEVVTDLNDSECPEAKRLKVEEPGMGAEEPEDCRQWTQHHLVAADIRVAPATALTPPQGDADADGVDVNVRGPDGFTPLMLASFCGGALEPMPAEEDEADDTSASIISDLICQGAQLGARTDRTGETALHLAARYARADAAKRLLDAGADTNAQDHSGRTPLHTAVTADAQGVFQILIRNRSTDLDARMADGSTALILAARLAVEGMVEELIASHADVNAVDELGKSALHWAAAVNNVEATLALLKNGANKDMQDSKEETPLFLAAREGSYEAAKLLLDHLANREITDHLDRLPRDVAQERLHQDIVRLLDQPSGPRSPSGPHGLGPLLCPPGAFLPGLKAVQSGTKKSRRPPGKTGLGPQGTRGRGKKLTLACPGPLADSSVTLSPVDSLDSPRPFSGPPASPGGFPLEGPYATTATAVSLAQLGASRAGPLGRQPPGGCVLSFGLLNPVAVPLDWARLPPPAPPGPSFLLPLAPGPQLLNPGAPVSPQERPPPYLAAPGHGEEYPAAGTRSSPTKARFLRVPSEHPYLTPSPESPEHWASPSPPSLSDWSDSTPSPATATNATASGALPAQPHPISVPSLPQSQTQLGPQPEVTPKRQVMA 1 1644 1666 PLEAPEQSVP LLPLLVAGAVFLLIIFILGVMVA RRKREHSTLW +P31695 MQPQLLLLLLLPLNFPVILTRELLCGGSPEPCANGGTCLRLSRGQGICQCAPGFLGETCQFPDPCRDTQLCKNGGSCQALLPTPPSSRSPTSPLTPHFSCTCPSGFTGDRCQTHLEELCPPSFCSNGGHCYVQASGRPQCSCEPGWTGEQCQLRDFCSANPCANGGVCLATYPQIQCRCPPGFEGHTCERDINECFLEPGPCPQGTSCHNTLGSYQCLCPVGQEGPQCKLRKGACPPGSCLNGGTCQLVPEGHSTFHLCLCPPGFTGLDCEMNPDDCVRHQCQNGATCLDGLDTYTCLCPKTWKGWDCSEDIDECEARGPPRCRNGGTCQNTAGSFHCVCVSGWGGAGCEENLDDCAAATCAPGSTCIDRVGSFSCLCPPGRTGLLCHLEDMCLSQPCHVNAQCSTNPLTGSTLCICQPGYSGSTCHQDLDECQMAQQGPSPCEHGGSCINTPGSFNCLCLPGYTGSRCEADHNECLSQPCHPGSTCLDLLATFHCLCPPGLEGRLCEVEVNECTSNPCLNQAACHDLLNGFQCLCLPGFTGARCEKDMDECSSTPCANGGRCRDQPGAFYCECLPGFEGPHCEKEVDECLSDPCPVGASCLDLPGAFFCLCRPGFTGQLCEVPLCTPNMCQPGQQCQGQEHRAPCLCPDGSPGCVPAEDNCPCHHGHCQRSLCVCDEGWTGPECETELGGCISTPCAHGGTCHPQPSGYNCTCPAGYMGLTCSEEVTACHSGPCLNGGSCSIRPEGYSCTCLPSHTGRHCQTAVDHCVSASCLNGGTCVNKPGTFFCLCATGFQGLHCEEKTNPSCADSPCRNKATCQDTPRGARCLCSPGYTGSSCQTLIDLCARKPCPHTARCLQSGPSFQCLCLQGWTGALCDFPLSCQKAAMSQGIEISGLCQNGGLCIDTGSSYFCRCPPGFQGKLCQDNVNPCEPNPCHHGSTCVPQPSGYVCQCAPGYEGQNCSKVLDACQSQPCHNHGTCTSRPGGFHCACPPGFVGLRCEGDVDECLDRPCHPSGTAACHSLANAFYCQCLPGHTGQRCEVEMDLCQSQPCSNGGSCEITTGPPPGFTCHCPKGFEGPTCSHKALSCGIHHCHNGGLCLPSPKPGSPPLCACLSGFGGPDCLTPPAPPGCGPPSPCLHNGTCTETPGLGNPGFQCTCPPDSPGPRCQRPGASGCEGRGGDGTCDAGCSGPGGDWDGGDCSLGVPDPWKGCPPHSQCWLLFRDGRCHPQCDSEECLFDGYDCEIPLTCIPAYDQYCRDHFHNGHCEKGCNNAECGWDGGDCRPEGEDSEGRPSLALLVVLRPPALDQQLLALARVLSLTLRVGLWVRKDSEGRNMVFPYPGTRAKEELSGARDSSSWERQAPPTQPLGKETESLGAGFVVVMGVDLSRCGPEHPASRCPWDSGLLLRFLAAMAAVGALEPLLPGPLLAAHPQAGTRPSANQLPWPILCSPVVGVLLLALGALLVLQLIRRRRREHGALWLPPGFIRRPQTQQAPHRRRPPLGEDNIGLKALKPEAEVDEDGVAMCSGPEEGEAEETASASRCQLWPLNSGCGELPQAAMLTPPQECESEVLDVDTCGPDGVTPLMSAVFCGGVQSTTGASPQRLGLGNLEPWEPLLDRGACPQAHTVGTGETPLHLAARFSRPTAARRLLEAGANPNQPDRAGRTPLHTAVAADAREVCQLLLASRQTTVDARTEDGTTPLMLAARLAVEDLVEELIAARADVGARDKRGKTALHWAAAVNNARAARSLLQAGADKDAQDSREQTPLFLAAREGAVEVAQLLLELGAARGLRDQAGLAPGDVARQRSHWDLLTLLEGAGPTTQEARAHARTTPGGGAAPRCRTLSAGARPRGGGACLQARTWSVDLGARGGKVYARCRSRSGSCGGPTTRGRRFSAGSRGRRGARASQDDWPRDWVALEACGSACSAPIPPPSLTPSPERGSPQVAWGLPVHQEIPLNSVVRNLN 1 1441 1463 QAGTRPSANQ LPWPILCSPVVGVLLLALGALLV LQLIRRRRRE +Q8CJ26 MLYNVSKGVVYSDTALQGQDGDREGMWVGAGGALAPNTSSLFPPEPPGASSNIIPVYCALLATVILGLLAYVAFKCWRSHKQRQQLAKARTVELGDPDRDQRRGDSNVFVDSPPSLEPCIPSQGPHPDLGCQLYLHIPQQQQEEVQRLLMMGEPAKGWQELAGHLGYQAEAVETMACDQMPAYTLLRNWAAQEGNRATLRVLEDALAAIGREDVVQVLSSPAESSSVV 1 52 74 FPPEPPGASS NIIPVYCALLATVILGLLAYVAF KCWRSHKQRQ +Q63373 MYQRMLRCGAELGSPGGGSSGGAGGRLALLWIVPLTLSGLLGVAWGASSLGAHHIHHFHGSSKHHSVPIAIYRSPASLRGGHAGTTYIFSKGGGQITYKWPPNDRPSTRADRLAIGFSTVQKEAVLVRVDSSSGLGDYLELHIHQGKIGVKFNVGTDDIAIEESNAIINDGKYHVVRFTRSGGNATLQVDSWPVIERYPAGNNDNERLAIARQRIPYRLGRVVDEWLLDKGRQLTIFNSQATIIIGGKEQGQPFQGQLSGLYYNGLKVLNMAAENDANIAIVGNVRLVGEVPSSMTTESTATAMQSEMSTSIMETTTTLATSTARRGKPPTKEPISQTTDDILVASAECPSDDEDIDPCEPSSGGLANPTRVGGREPYPGSAEVIRESSSTTGMVVGIVAAAALCILILLYAMYKYRNRDEGSYHVDESRNYISNSAQSNGAVVKEKQPSSAKSANKNKKNKDKEYYV 1 392 414 AEVIRESSST TGMVVGIVAAAALCILILLYAMY KYRNRDEGSY +P15209 MSPWLKWHGPAMARLWGLCLLVLGFWRASLACPTSCKCSSARIWCTEPSPGIVAFPRLEPNSVDPENITEILIANQKRLEIINEDDVEAYVGLRNLTIVDSGLKFVAYKAFLKNSNLRHINFTRNKLTSLSRRHFRHLDLSDLILTGNPFTCSCDIMWLKTLQETKSSPDTQDLYCLNESSKNMPLANLQIPNCGLPSARLAAPNLTVEEGKSVTLSCSVGGDPLPTLYWDVGNLVSKHMNETSHTQGSLRITNISSDDSGKQISCVAENLVGEDQDSVNLTVHFAPTITFLESPTSDHHWCIPFTVRGNPKPALQWFYNGAILNESKYICTKIHVTNHTEYHGCLQLDNPTHMNNGDYTLMAKNEYGKDERQISAHFMGRPGVDYETNPNYPEVLYEDWTTPTDIGDTTNKSNEIPSTDVADQSNREHLSVYAVVVIASVVGFCLLVMLLLLKLARHSKFGMKGPASVISNDDDSASPLHHISNGSNTPSSSEGGPDAVIIGMTKIPVIENPQYFGITNSQLKPDTFVQHIKRHNIVLKRELGEGAFGKVFLAECYNLCPEQDKILVAVKTLKDASDNARKDFHREAELLTNLQHEHIVKFYGVCVEGDPLIMVFEYMKHGDLNKFLRAHGPDAVLMAEGNPPTELTQSQMLHIAQQIAAGMVYLASQHFVHRDLATRNCLVGENLLVKIGDFGMSRDVYSTDYYRVGGHTMLPIRWMPPESIMYRKFTTESDVWSLGVVLWEIFTYGKQPWYQLSNNEVIECITQGRVLQRPRTCPQEVYELMLGCWQREPHTRKNIKSIHTLLQNLAKASPVYLDILG 1 431 453 VADQSNREHL SVYAVVVIASVVGFCLLVMLLLL KLARHSKFGM +Q86YL7 MWKVSALLFVLGSASLWVLAEGASTGQPEDDTETTGLEGGVAMPGAEDDVVTPGTSEDRYKSGLTTLVATSVNSVTGIRIEDLPTSESTVHAQEQSPSATASNVATSHSTEKVDGDTQTTVEKDGLSTVTLVGIIVGVLLAIGFIGAIIVVVMRKMSGRYSP 1 130 152 TVEKDGLSTV TLVGIIVGVLLAIGFIGAIIVVV MRKMSGRYSP +Q13308 MGAARGSPARPRRLPLLSVLLLPLLGGTQTAIVFIKQPSSQDALQGRRALLRCEVEAPGPVHVYWLLDGAPVQDTERRFAQGSSLSFAAVDRLQDSGTFQCVARDDVTGEEARSANASFNIKWIEAGPVVLKHPASEAEIQPQTQVTLRCHIDGHPRPTYQWFRDGTPLSDGQSNHTVSSKERNLTLRPAGPEHSGLYSCCAHSAFGQACSSQNFTLSIADESFARVVLAPQDVVVARYEEAMFHCQFSAQPPPSLQWLFEDETPITNRSRPPHLRRATVFANGSLLLTQVRPRNAGIYRCIGQGQRGPPIILEATLHLAEIEDMPLFEPRVFTAGSEERVTCLPPKGLPEPSVWWEHAGVRLPTHGRVYQKGHELVLANIAESDAGVYTCHAANLAGQRRQDVNITVATVPSWLKKPQDSQLEEGKPGYLDCLTQATPKPTVVWYRNQMLISEDSRFEVFKNGTLRINSVEVYDGTWYRCMSSTPAGSIEAQARVQVLEKLKFTPPPQPQQCMEFDKEATVPCSATGREKPTIKWERADGSSLPEWVTDNAGTLHFARVTRDDAGNYTCIASNGPQGQIRAHVQLTVAVFITFKVEPERTTVYQGHTALLQCEAQGDPKPLIQWKGKDRILDPTKLGPRMHIFQNGSLVIHDVAPEDSGRYTCIAGNSCNIKHTEAPLYVVDKPVPEESEGPGSPPPYKMIQTIGLSVGAAVAYIIAVLGLMFYCKKRCKAKRLQKQPEGEEPEMECLNGGPLQNGQPSAEIQEEVALTSLGSGPAATNKRHSTSDKMHFPRSSLQPITTLGKSEFGEVFLAKAQGLEEGVAETLVLVKSLQSKDEQQQLDFRRELEMFGKLNHANVVRLLGLCREAEPHYMVLEYVDLGDLKQFLRISKSKDEKLKSQPLSTKQKVALCTQVALGMEHLSNNRFVHKDLAARNCLVSAQRQVKVSALGLSKDVYNSEYYHFRQAWVPLRWMSPEAILEGDFSTKSDVWAFGVLMWEVFTHGEMPHGGQADDEVLADLQAGKARLPQPEGCPSKLYRLMQRCWALSPKDRPSFSEIASALGDSTVDSKP 1 704 726 GSPPPYKMIQ TIGLSVGAAVAYIIAVLGLMFYC KKRCKAKRLQ +P10586 MAPEPAPGRTMVPLVPALVMLGLVAGAHGDSKPVFIKVPEDQTGLSGGVASFVCQATGEPKPRITWMKKGKKVSSQRFEVIEFDDGAGSVLRIQPLRVQRDEAIYECTATNSLGEINTSAKLSVLEEEQLPPGFPSIDMGPQLKVVEKARTATMLCAAGGNPDPEISWFKDFLPVDPATSNGRIKQLRSGALQIESSEESDQGKYECVATNSAGTRYSAPANLYVRVRRVAPRFSIPPSSQEVMPGGSVNLTCVAVGAPMPYVKWMMGAEELTKEDEMPVGRNVLELSNVVRSANYTCVAISSLGMIEATAQVTVKALPKPPIDLVVTETTATSVTLTWDSGNSEPVTYYGIQYRAAGTEGPFQEVDGVATTRYSIGGLSPFSEYAFRVLAVNSIGRGPPSEAVRARTGEQAPSSPPRRVQARMLSASTMLVQWEPPEEPNGLVRGYRVYYTPDSRRPPNAWHKHNTDAGLLTTVGSLLPGITYSLRVLAFTAVGDGPPSPTIQVKTQQGVPAQPADFQAEVESDTRIQLSWLLPPQERIIMYELVYWAAEDEDQQHKVTFDPTSSYTLEDLKPDTLYRFQLAARSDMGVGVFTPTIEARTAQSTPSAPPQKVMCVSMGSTTVRVSWVPPPADSRNGVITQYSVAYEAVDGEDRGRHVVDGISREHSSWDLVGLEKWTEYRVWVRAHTDVGPGPESSPVLVRTDEDVPSGPPRKVEVEPLNSTAVHVYWKLPVPSKQHGQIRGYQVTYVRLENGEPRGLPIIQDVMLAEAQWRPEESEDYETTISGLTPETTYSVTVAAYTTKGDGARSKPKIVTTTGAVPGRPTMMISTTAMNTALLQWHPPKELPGELLGYRLQYCRADEARPNTIDFGKDDQHFTVTGLHKGTTYIFRLAAKNRAGLGEEFEKEIRTPEDLPSGFPQNLHVTGLTTSTTELAWDPPVLAERNGRIISYTVVFRDINSQQELQNITTDTRFTLTGLKPDTTYDIKVRAWTSKGSGPLSPSIQSRTMPVEQVFAKNFRVAAAMKTSVLLSWEVPDSYKSAVPFKILYNGQSVEVDGHSMRKLIADLQPNTEYSFVLMNRGSSAGGLQHLVSIRTAPDLLPHKPLPASAYIEDGRFDLSMPHVQDPSLVRWFYIVVVPIDRVGGSMLTPRWSTPEELELDELLEAIEQGGEEQRRRRRQAERLKPYVAAQLDVLPETFTLGDKKNYRGFYNRPLSPDLSYQCFVLASLKEPMDQKRYASSPYSDEIVVQVTPAQQQEEPEMLWVTGPVLAVILIILIVIAILLFKRKRTHSPSSKDEQSIGLKDSLLAHSSDPVEMRRLNYQTPGMRDHPPIPITDLADNIERLKANDGLKFSQEYESIDPGQQFTWENSNLEVNKPKNRYANVIAYDHSRVILTSIDGVPGSDYINANYIDGYRKQNAYIATQGPLPETMGDFWRMVWEQRTATVVMMTRLEEKSRVKCDQYWPARGTETCGLIQVTLLDTVELATYTVRTFALHKSGSSEKRELRQFQFMAWPDHGVPEYPTPILAFLRRVKACNPLDAGPMVVHCSAGVGRTGCFIVIDAMLERMKHEKTVDIYGHVTCMRSQRNYMVQTEDQYVFIHEALLEAATCGHTEVPARNLYAHIQKLGQVPPGESVTAMELEFKLLASSKAHTSRFISANLPCNKFKNRLVNIMPYELTRVCLQPIRGVEGSDYINASFLDGYRQQKAYIATQGPLAESTEDFWRMLWEHNSTIIVMLTKLREMGREKCHQYWPAERSARYQYFVVDPMAEYNMPQYILREFKVTDARDGQSRTIRQFQFTDWPEQGVPKTGEGFIDFIGQVHKTKEQFGQDGPITVHCSAGVGRTGVFITLSIVLERMRYEGVVDMFQTVKTLRTQRPAMVQTEDQYQLCYRAALEYLGSFDHYAT 1 1262 1284 PAQQQEEPEM LWVTGPVLAVILIILIVIAILLF KRKRTHSPSS +P28828 MRTLGTCLVTLAGLLLTAAGETFSGGCLFDEPYSTCGYSQADEDDFNWEQVNTLTKPTSDPWMPSGSFMLVNTSGKPEGQRAHLLLPQLKENDTHCIDFHYFVSSKSNAAPGLLNVYVKVNNGPLGNPIWNISGDPTRTWHRAELAISTFWPNFYQVIFEVVTSGHQGYLAIDEVKVLGHPCTRTPHFLRIQNVEVNAGQFATFQCSAIGRTVAGDRLWLQGIDVRDAPLKEIKVTSSRRFIASFNVVNTTKRDAGKYRCMICTEGGVGISNYAELVVKEPPVPIAPPQLASVGATYLWIQLNANSINGDGPIVAREVEYCTASGSWNDRQPVDSTSYKIGHLDPDTEYEISVLLTRPGEGGTGSPGPALRTRTKCADPMRGPRKLEVVEVKSRQITIRWEPFGYNVTRCHSYNLTVHYGYQVGGQEQVREEVSWDTDNSHPQHTITNLSPYTNVSVKLILMNPEGRKESQELTVQTDEDLPGAVPTESIQGSAFEEKIFLQWREPTQTYGVITLYEITYKAVSSFDPEIDLSNQSGRVSKLGNETHFLFFGLYPGTTYSFTIRASTAKGFGPPATNQFTTKISAPSMPAYEFETPLNQTDNTVTVMLKPAQSRGAPVSVYQIVVEEERPRRTKKTTEILKCYPVPIHFQNASILNSQYYFAAEFPADSLQAAQPFTIGDNKTYNGYWNTPLLPHKSYRIYYQAASRANGETKIDCVRVATKGAVTPKPVPEPEKQTDHTVKIAGVIAGILLFVIIFLGVVLVMKKRKLAKKRKETMSSTRQEMTVMVNSMDKSYAEQGTNCDEAFSFMGTHNLNGRSVSSPSSFTMKTNTLSTSVPNSYYPDETHTMASDTSSLAQPHTYKKREAADVPYQTGQLHPAIRVADLLQHITQMKCAEGYGFKEEYESFFEGQSAPWDSAKKDENRMKNRYGNIIAYDHSRVRLQMLEGDNNSDYINGNYIDGYHRPNHYIATQGPMQETIYDFWRMVWHENTASIIMVTNLVEVGRVKCCKYWPDDTEIYKDIKVTLIDTELLAEYVIRTFAVEKRGIHEIREIRQFHFTGWPDHGVPYHATGLLGFVRQVKSKSPPNAGPLVVHCSAGAGRTGCFIVIDIMLDMAEREGVVDIYNCVRELRSRRVNMVQTEEQYVFIHDAILEACLCGDTSIPASQVRSLYYDMNKLDPQTNSSQIKEEFRTLNMVTPTLRVEDCSIALLPRNHEKNRCMDILPPDRCLPFLITIDGESSNYINAALMDSYKQPSAFIVTQHPLPNTVKDFWRLVLDYHCTSVVMLNDVDPAQLCPQYWPENGVHRHGPIQVEFVSADLEEDIISRIFRIYNASRPQDGHRMVQQFQFLGWPMYRDTPVSKRSFLKLIRQVDKWQEEYNGGEGRTVVHCLNGGGRSGTFCAISIVCEMLRHQRTVDVFHAVKTLRNNKPNMVDLLDQYKFCYEVALEYLNSG 1 743 764 PEKQTDHTVK IAGVIAGILLFVIIFLGVVLVM KKRKLAKKRK +Q7M729 MSRAGNRGNTQARWLGTGLLGLFLLPMYLSLEVSVGKATTIYAINGSSILLPCTFSSCYGFENLYFKWSYNNSETSRILIDGIVKNDKSDPKVRVKDDDRITLEGSTKEKTNNISILLSDLEFSDTGRYTCFVRNPKEKDLNNSATIFLQVVDKLEKVDNTVTLIILAVVGGVIGLLVCILLLKKLITFILKKTREKKKECLVSSSGNDNTENGLPGSKAEEKPPTKV 1 161 183 VVDKLEKVDN TVTLIILAVVGGVIGLLVCILLL KKLITFILKK +O75056 MKPGPPHRAGAAHGAGAGAGAAAGPGARGLLLPPLLLLLLAGRAAGAQRWRSENFERPVDLEGSGDDDSFPDDELDDLYSGSGSGYFEQESGIETAMRFSPDVALAVSTTPAVLPTTNIQPVGTPFEELPSERPTLEPATSPLVVTEVPEEPSQRATTVSTTMATTAATSTGDPTVATVPATVATATPSTPAAPPFTATTAVIRTTGVRRLLPLPLTTVATARATTPEAPSPPTTAAVLDTEAPTPRLVSTATSRPRALPRPATTQEPDIPERSTLPLGTTAPGPTEVAQTPTPETFLTTIRDEPEVPVSGGPSGDFELPEEETTQPDTANEVVAVGGAAAKASSPPGTLPKGARPGPGLLDNAIDSGSSAAQLPQKSILERKEVLVAVIVGGVVGALFAAFLVTLLIYRMKKKDEGSYTLEEPKQASVTYQKPDKQEEFYA 1 387 409 KSILERKEVL VAVIVGGVVGALFAAFLVTLLIY RMKKKDEGSY +P78324 MEPAGPAPGRLGPLLCLLLAASCAWSGVAGEEELQVIQPDKSVLVAAGETATLRCTATSLIPVGPIQWFRGAGPGRELIYNQKEGHFPRVTTVSDLTKRNNMDFSIRIGNITPADAGTYYCVKFRKGSPDDVEFKSGAGTELSVRAKPSAPVVSGPAARATPQHTVSFTCESHGFSPRDITLKWFKNGNELSDFQTNVDPVGESVSYSIHSTAKVVLTREDVHSQVICEVAHVTLQGDPLRGTANLSETIRVPPTLEVTQQPVRAENQVNVTCQVRKFYPQRLQLTWLENGNVSRTETASTVTENKDGTYNWMSWLLVNVSAHRDDVKLTCQVEHDGQPAVSKSHDLKVSAHPKEQGSNTAAENTGSNERNIYIVVGVVCTLLVALLMAALYLVRIRQKKAQGSTSSTRLHEPEKNAREITQDTNDITYADLNLPKGKKPAPQAAEPNNHTEYASIQTSPQPASEDTLTYADLDMVHLNRTPKQPAPKPEPSFSEYASVQVPRK 1 372 394 AENTGSNERN IYIVVGVVCTLLVALLMAALYLV RIRQKKAQGS +Q92673 MATRSSRRESRLPFLFTLVALLPPGALCEVWTQRLHGGSAPLPQDRGFLVVQGDPRELRLWARGDARGASRADEKPLRRKRSAALQPEPIKVYGQVSLNDSHNQMVVHWAGEKSNVIVALARDSLALARPKSSDVYVSYDYGKSFKKISDKLNFGLGNRSEAVIAQFYHSPADNKRYIFADAYAQYLWITFDFCNTLQGFSIPFRAADLLLHSKASNLLLGFDRSHPNKQLWKSDDFGQTWIMIQEHVKSFSWGIDPYDKPNTIYIERHEPSGYSTVFRSTDFFQSRENQEVILEEVRDFQLRDKYMFATKVVHLLGSEQQSSVQLWVSFGRKPMRAAQFVTRHPINEYYIADASEDQVFVCVSHSNNRTNLYISEAEGLKFSLSLENVLYYSPGGAGSDTLVRYFANEPFADFHRVEGLQGVYIATLINGSMNEENMRSVITFDKGGTWEFLQAPAFTGYGEKINCELSQGCSLHLAQRLSQLLNLQLRRMPILSKESAPGLIIATGSVGKNLASKTNVYISSSAGARWREALPGPHYYTWGDHGGIITAIAQGMETNELKYSTNEGETWKTFIFSEKPVFVYGLLTEPGEKSTVFTIFGSNKENVHSWLILQVNATDALGVPCTENDYKLWSPSDERGNECLLGHKTVFKRRTPHATCFNGEDFDRPVVVSNCSCTREDYECDFGFKMSEDLSLEVCVPDPEFSGKSYSPPVPCPVGSTYRRTRGYRKISGDTCSGGDVEARLEGELVPCPLAEENEFILYAVRKSIYRYDLASGATEQLPLTGLRAAVALDFDYEHNCLYWSDLALDVIQRLCLNGSTGQEVIINSGLETVEALAFEPLSQLLYWVDAGFKKIEVANPDGDFRLTIVNSSVLDRPRALVLVPQEGVMFWTDWGDLKPGIYRSNMDGSAAYHLVSEDVKWPNGISVDDQWIYWTDAYLECIERITFSGQQRSVILDNLPHPYAIAVFKNEIYWDDWSQLSIFRASKYSGSQMEILANQLTGLMDMKIFYKGKNTGSNACVPRPCSLLCLPKANNSRSCRCPEDVSSSVLPSGDLMCDCPQGYQLKNNTCVKQENTCLRNQYRCSNGNCINSIWWCDFDNDCGDMSDERNCPTTICDLDTQFRCQESGTCIPLSYKCDLEDDCGDNSDESHCEMHQCRSDEYNCSSGMCIRSSWVCDGDNDCRDWSDEANCTAIYHTCEASNFQCRNGHCIPQRWACDGDTDCQDGSDEDPVNCEKKCNGFRCPNGTCIPSSKHCDGLRDCSDGSDEQHCEPLCTHFMDFVCKNRQQCLFHSMVCDGIIQCRDGSDEDAAFAGCSQDPEFHKVCDEFGFQCQNGVCISLIWKCDGMDDCGDYSDEANCENPTEAPNCSRYFQFRCENGHCIPNRWKCDRENDCGDWSDEKDCGDSHILPFSTPGPSTCLPNYYRCSSGTCVMDTWVCDGYRDCADGSDEEACPLLANVTAASTPTQLGRCDRFEFECHQPKTCIPNWKRCDGHQDCQDGRDEANCPTHSTLTCMSREFQCEDGEACIVLSERCDGFLDCSDESDEKACSDELTVYKVQNLQWTADFSGDVTLTWMRPKKMPSASCVYNVYYRVVGESIWKTLETHSNKTNTVLKVLKPDTTYQVKVQVQCLSKAHNTNDFVTLRTPEGLPDAPRNLQLSLPREAEGVIVGHWAPPIHTHGLIREYIVEYSRSGSKMWASQRAASNFTEIKNLLVNTLYTVRVAAVTSRGIGNWSDSKSITTIKGKVIPPPDIHIDSYGENYLSFTLTMESDIKVNGYVVNLFWAFDTHKQERRTLNFRGSILSHKVGNLTAHTSYEISAWAKTDLGDSPLAFEHVMTRGVRPPAPSLKAKAINQTAVECTWTGPRNVVYGIFYATSFLDLYRNPKSLTTSLHNKTVIVSKDEQYLFLVRVVVPYQGPSSDYVVVKMIPDSRLPPRHLHVVHTGKTSVVIKWESPYDSPDQDLLYAVAVKDLIRKTDRSYKVKSRNSTVEYTLNKLEPGGKYHIIVQLGNMSKDSSIKITTVSLSAPDALKIITENDHVLLFWKSLALKEKHFNESRGYEIHMFDSAMNITAYLGNTTDNFFKISNLKMGHNYTFTVQARCLFGNQICGEPAILLYDELGSGADASATQAARSTDVAAVVVPILFLILLSLGVGFAILYTKHRRLQSSFTAFANSHYSSRLGSAIFSSGDDLGEDDEDAPMITGFSDDVPMVIA 1 2136 2158 SATQAARSTD VAAVVVPILFLILLSLGVGFAIL YTKHRRLQSS +Q99523 MERPWGAADGLSRWPHGLGLLLLLQLLPPSTLSQDRLDAPPPPAAPLPRWSGPIGVSWGLRAAAAGGAFPRGGRWRRSAPGEDEECGRVRDFVAKLANNTHQHVFDDLRGSVSLSWVGDSTGVILVLTTFHVPLVIMTFGQSKLYRSEDYGKNFKDITDLINNTFIRTEFGMAIGPENSGKVVLTAEVSGGSRGGRIFRSSDFAKNFVQTDLPFHPLTQMMYSPQNSDYLLALSTENGLWVSKNFGGKWEEIHKAVCLAKWGSDNTIFFTTYANGSCKADLGALELWRTSDLGKSFKTIGVKIYSFGLGGRFLFASVMADKDTTRRIHVSTDQGDTWSMAQLPSVGQEQFYSILAANDDMVFMHVDEPGDTGFGTIFTSDDRGIVYSKSLDRHLYTTTGGETDFTNVTSLRGVYITSVLSEDNSIQTMITFDQGGRWTHLRKPENSECDATAKNKNECSLHIHASYSISQKLNVPMAPLSEPNAVGIVIAHGSVGDAISVMVPDVYISDDGGYSWTKMLEGPHYYTILDSGGIIVAIEHSSRPINVIKFSTDEGQCWQTYTFTRDPIYFTGLASEPGARSMNISIWGFTESFLTSQWVSYTIDFKDILERNCEEKDYTIWLAHSTDPEDYEDGCILGYKEQFLRLRKSSVCQNGRDYVVTKQPSICLCSLEDFLCDFGYYRPENDSKCVEQPELKGHDLEFCLYGREEHLTTNGYRKIPGDKCQGGVNPVREVKDLKKKCTSNFLSPEKQNSKSNSVPIILAIVGLMLVTVVAGVLIVKKYVCGGRFLVHRYSVLQQHAEANGVDGVDALDTASHTNKSGYHDDSDEDLLE 1 756 778 SPEKQNSKSN SVPIILAIVGLMLVTVVAGVLIV KKYVCGGRFL +Q8BGV3 MARGLDLAPLLLLLLAMATRFCTAQSNCTCPTNKMTVCDTNGPGGVCQCRAMGSQVLVDCSTLTSKCLLLKARMSARKSGRSLVMPSEHAILDNDGLYDPECDDKGRFKARQCNQTSVCWCVNSVGVRRTDKGDQSLRCDEVVRTHHILIELRHRPTDRAFNHSDLDSELRRLFQERYKLHPSFLSAVHYEEPTIQIELRQNASQKGLRDVDIADAAYYFERDIKGESLFMGRRGLDVQVRGEPLHVERTLIYYLDEKPPQFSMKRLTAGVIAVIAVVSVAVVAGVVVLVVTKRRKSGKYKKVELKELGEMRSEPSL 1 269 291 PPQFSMKRLT AGVIAVIAVVSVAVVAGVVVLVV TKRRKSGKYK +P35590 MVWRVPPFLLPILFLASHVGAAVDLTLLANLRLTDPQRFFLTCVSGEAGAGRGSDAWGPPLLLEKDDRIVRTPPGPPLRLARNGSHQVTLRGFSKPSDLVGVFSCVGGAGARRTRVIYVHNSPGAHLLPDKVTHTVNKGDTAVLSARVHKEKQTDVIWKSNGSYFYTLDWHEAQDGRFLLQLPNVQPPSSGIYSATYLEASPLGSAFFRLIVRGCGAGRWGPGCTKECPGCLHGGVCHDHDGECVCPPGFTGTRCEQACREGRFGQSCQEQCPGISGCRGLTFCLPDPYGCSCGSGWRGSQCQEACAPGHFGADCRLQCQCQNGGTCDRFSGCVCPSGWHGVHCEKSDRIPQILNMASELEFNLETMPRINCAAAGNPFPVRGSIELRKPDGTVLLSTKAIVEPEKTTAEFEVPRLVLADSGFWECRVSTSGGQDSRRFKVNVKVPPVPLAAPRLLTKQSRQLVVSPLVSFSGDGPISTVRLHYRPQDSTMDWSTIVVDPSENVTLMNLRPKTGYSVRVQLSRPGEGGEGAWGPPTLMTTDCPEPLLQPWLEGWHVEGTDRLRVSWSLPLVPGPLVGDGFLLRLWDGTRGQERRENVSSPQARTALLTGLTPGTHYQLDVQLYHCTLLGPASPPAHVLLPPSGPPAPRHLHAQALSDSEIQLTWKHPEALPGPISKYVVEVQVAGGAGDPLWIDVDRPEETSTIIRGLNASTRYLFRMRASIQGLGDWSNTVEESTLGNGLQAEGPVQESRAAEEGLDQQLILAVVGSVSATCLTILAALLTLVCIRRSCLHRRRTFTYQSGSGEETILQFSSGTLTLTRRPKLQPEPLSYPVLEWEDITFEDLIGEGNFGQVIRAMIKKDGLKMNAAIKMLKEYASENDHRDFAGELEVLCKLGHHPNIINLLGACKNRGYLYIAIEYAPYGNLLDFLRKSRVLETDPAFAREHGTASTLSSRQLLRFASDAANGMQYLSEKQFIHRDLAARNVLVGENLASKIADFGLSRGEEVYVKKTMGRLPVRWMAIESLNYSVYTTKSDVWSFGVLLWEIVSLGGTPYCGMTCAELYEKLPQGYRMEQPRNCDDEVYELMRQCWRDRPYERPPFAQIALQLGRMLEARKAYVNMSLFENFTYAGIDATAEEA 1 764 786 EEGLDQQLIL AVVGSVSATCLTILAALLTLVCI RRSCLHRRRT +P08138 MGAGATGRAMDGPRLLLLLLLGVSLGGAKEACPTGLYTHSGECCKACNLGEGVAQPCGANQTVCEPCLDSVTFSDVVSATEPCKPCTECVGLQSMSAPCVEADDAVCRCAYGYYQDETTGRCEACRVCEAGSGLVFSCQDKQNTVCEECPDGTYSDEANHVDPCLPCTVCEDTERQLRECTRWADAECEEIPGRWITRSTPPEGSDSTAPSTQEPEAPPEQDLIASTVAGVVTTVMGSSQPVVTRGTTDNLIPVYCSILAAVVVGLVAYIAFKRWNSCKQNKQGANSRPVNQTPPPEGEKLHSDSGISVDSQSLHDQQPHTQTASGQALKGDGGLYSSLPPAKREEVEKLLNGSAGDTWRHLAGELGYQPEHIDSFTHEACPVRALLASWATQDSATLDALLAALRRIQRADLVESLCSESTATSPV 1 250 272 QPVVTRGTTD NLIPVYCSILAAVVVGLVAYIAF KRWNSCKQNK +Q02223 MLQMAGQCSQNEYFDSLLHACIPCQLRCSSNTPPLTCQRYCNASVTNSVKGTNAILWTCLGLSLIISLAVFVLMFLLRKINSEPLKDEFKNTGSGLLGMANIDLEKSRTGDEIILPRGLEYTVEECTCEDCIKSKPKVDSDHCFPLPAMEEGATILVTTKTNDYCKSLPAALSATEIEKSISAR 1 54 76 SVTNSVKGTN AILWTCLGLSLIISLAVFVLMFL LRKINSEPLK +P19438 MGLSTVPDLLLPLVLLELLVGIYPSGVIGLVPHLGDREKRDSVCPQGKYIHPQNNSICCTKCHKGTYLYNDCPGPGQDTDCRECESGSFTASENHLRHCLSCSKCRKEMGQVEISSCTVDRDTVCGCRKNQYRHYWSENLFQCFNCSLCLNGTVHLSCQEKQNTVCTCHAGFFLRENECVSCSNCKKSLECTKLCLPQIENVKGTEDSGTTVLLPLVIFFGLCLLSLLFIGLMYRYQRWKSKLYSIVCGKSTPEKEGELEGTTTKPLAPNPSFSPTPGFTPTLGFSPVPSSTFTSSSTYTPGDCPNFAAPRREVAPPYQGADPILATALASDPIPNPLQKWEDSAHKPQSLDTDDPATLYAVVENVPPLRWKEFVRRLGLSDHEIDRLELQNGRCLREAQYSMLATWRRRTPRREATLELLGRVLRDMDLLGCLEDIEEALCGPAALPPAPSLLR 1 212 234 VKGTEDSGTT VLLPLVIFFGLCLLSLLFIGLMY RYQRWKSKLY +Q06418 MALRRSMGRPGLPPLPLPPPPRLGLLLAALASLLLPESAAAGLKLMGAPVKLTVSQGQPVKLNCSVEGMEEPDIQWVKDGAVVQNLDQLYIPVSEQHWIGFLSLKSVERSDAGRYWCQVEDGGETEISQPVWLTVEGVPFFTVEPKDLAVPPNAPFQLSCEAVGPPEPVTIVWWRGTTKIGGPAPSPSVLNVTGVTQSTMFSCEAHNLKGLASSRTATVHLQALPAAPFNITVTKLSSSNASVAWMPGADGRALLQSCTVQVTQAPGGWEVLAVVVPVPPFTCLLRDLVPATNYSLRVRCANALGPSPYADWVPFQTKGLAPASAPQNLHAIRTDSGLILEWEEVIPEAPLEGPLGPYKLSWVQDNGTQDELTVEGTRANLTGWDPQKDLIVRVCVSNAVGCGPWSQPLVVSSHDRAGQQGPPHSRTSWVPVVLGVLTALVTAAALALILLRKRRKETRFGQAFDSVMARGEPAVHFRAARSFNRERPERIEATLDSLGISDELKEKLEDVLIPEQQFTLGRMLGKGEFGSVREAQLKQEDGSFVKVAVKMLKADIIASSDIEEFLREAACMKEFDHPHVAKLVGVSLRSRAKGRLPIPMVILPFMKHGDLHAFLLASRIGENPFNLPLQTLIRFMVDIACGMEYLSSRNFIHRDLAARNCMLAEDMTVCVADFGLSRKIYSGDYYRQGCASKLPVKWLALESLADNLYTVQSDVWAFGVTMWEIMTRGQTPYAGIENAEIYNYLIGGNRLKQPPECMEDVYDLMYQCWSADPKQRPSFTCLRMELENILGQLSVLSASQDPLYINIERAEEPTAGGSLELPGRDQPYSGAGDGSGMGAVGGTPSDCRYILTPGGLAEQPGQAEHQPESPLNETQRLLLLQQGLLPHSSC 1 429 451 QQGPPHSRTS WVPVVLGVLTALVTAAALALILL RKRRKETRFG +P30530 MAWRCPRMGRVPLAWCLALCGWACMAPRGTQAEESPFVGNPGNITGARGLTGTLRCQLQVQGEPPEVHWLRDGQILELADSTQTQVPLGEDEQDDWIVVSQLRITSLQLSDTGQYQCLVFLGHQTFVSQPGYVGLEGLPYFLEEPEDRTVAANTPFNLSCQAQGPPEPVDLLWLQDAVPLATAPGHGPQRSLHVPGLNKTSSFSCEAHNAKGVTTSRTATITVLPQQPRNLHLVSRQPTELEVAWTPGLSGIYPLTHCTLQAVLSDDGMGIQAGEPDPPEEPLTSQASVPPHQLRLGSLHPHTPYHIRVACTSSQGPSSWTHWLPVETPEGVPLGPPENISATRNGSQAFVHWQEPRAPLQGTLLGYRLAYQGQDTPEVLMDIGLRQEVTLELQGDGSVSNLTVCVAAYTAAGDGPWSLPVPLEAWRPGQAQPVHQLVKEPSTPAFSWPWWYVLLGAVVAAACVLILALFLVHRRKKETRYGEVFEPTVERGELVVRYRVRKSYSRRTTEATLNSLGISEELKEKLRDVMVDRHKVALGKTLGEGEFGAVMEGQLNQDDSILKVAVKTMKIAICTRSELEDFLSEAVCMKEFDHPNVMRLIGVCFQGSERESFPAPVVILPFMKHGDLHSFLLYSRLGDQPVYLPTQMLVKFMADIASGMEYLSTKRFIHRDLAARNCMLNENMSVCVADFGLSKKIYNGDYYRQGRIAKMPVKWIAIESLADRVYTSKSDVWSFGVTMWEIATRGQTPYPGVENSEIYDYLRQGNRLKQPADCLDGLYALMSRCWELNPQDRPSFTELREDLENTLKALPPAQEPDEILYVNMDEGGGYPEPPGAAGGADPPTQPDPKDSCSCLTAAEVHPAGRYVLCPSTTPSPAQPADRGSPAAPGQEDGA 1 450 472 EPSTPAFSWP WWYVLLGAVVAAACVLILALFLV HRRKKETRYG +Q6EMK4 MCSRVPLLLPLLLLLALGPGVQGCPSGCQCSQPQTVFCTARQGTTVPRDVPPDTVGLYVFENGITMLDAGSFAGLPGLQLLDLSQNQIASLPSGVFQPLANLSNLDLTANRLHEITNETFRGLRRLERLYLGKNRIRHIQPGAFDTLDRLLELKLQDNELRALPPLRLPRLLLLDLSHNSLLALEPGILDTANVEALRLAGLGLQQLDEGLFSRLRNLHDLDVSDNQLERVPPVIRGLRGLTRLRLAGNTRIAQLRPEDLAGLAALQELDVSNLSLQALPGDLSGLFPRLRLLAAARNPFNCVCPLSWFGPWVRESHVTLASPEETRCHFPPKNAGRLLLELDYADFGCPATTTTATVPTTRPVVREPTALSSSLAPTWLSPTEPATEAPSPPSTAPPTVGPVPQPQDCPPSTCLNGGTCHLGTRHHLACLCPEGFTGLYCESQMGQGTRPSPTPVTPRPPRSLTLGIEPVSPTSLRVGLQRYLQGSSVQLRSLRLTYRNLSGPDKRLVTLRLPASLAEYTVTQLRPNATYSVCVMPLGPGRVPEGEEACGEAHTPPAVHSNHAPVTQAREGNLPLLIAPALAAVLLAALAAVGAAYCVRRGRAMAAAAQDKGQVGPGAGPLELEGVKVPLEPGPKATEGGGEALPSGSECEVPLMGFPGPGLQSPLHAKPYI 1 577 599 TQAREGNLPL LIAPALAAVLLAALAAVGAAYCV RRGRAMAAAA +Q14802 MQKVTLGLLVFLAGFPVLDANDLEDKNSPFYYDWHSLQVGGLICAGVLCAMGIIIVMSAKCKCKFGQKSGHHPGETPPLITPGSAQS 0 37 59 NSPFYYDWHS LQVGGLICAGVLCAMGIIIVMSA KCKCKFGQKS +Q86UE4 MAARSWQDELAQQAEEGSARLREMLSVGLGFLRTELGLDLGLEPKRYPGWVILVGTGALGLLLLFLLGYGWAAACAGARKKRRSPPRKREEAAAVPAAAPDDLALLKNLRSEEQKKKNRKKLSEKPKPNGRTVEVAEGEAVRTPQSVTAKQPPEIDKKNEKSKKNKKKSKSDAKAVQNSSRHDGKEVDEGAWETKISHREKRQQRKRDKVLTDSGSLDSTIPGIENTITVTTEQLTTASFPVGSKKNKGDSHLNVQVSNFKSGKGDSTLQVSSGLNENLTVNGGGWNEKSVKLSSQISAGEEKWNSVSPASAGKRKTEPSAWSQDTGDANTNGKDWGRSWSDRSIFSGIGSTAEPVSQSTTSDYQWDVSRNQPYIDDEWSGLNGLSSADPNSDWNAPAEEWGNWVDEERASLLKSQEPIPDDQKVSDDDKEKGEGALPTGKSKKKKKKKKKQGEDNSTAQDTEELEKEIREDLPVNTSKTRPKQEKAFSLKTISTSDPAEVLVKNSQPIKTLPPATSTEPSVILSKSDSDKSSSQVPPILQETDKSKSNTKQNSVPPSQTKSETSWESPKQIKKKKKARRET 0 50 72 LGLEPKRYPG WVILVGTGALGLLLLFLLGYGWA AACAGARKKR +Q969W9 MHRLMGVNSTAAAAAGQPNVSCTCNCKRSLFQSMEITELEFVQIIIIVVVMMVMVVVITCLLSHYKLSARSFISRHSQGRRREDALSSEGCLWPSESTVSGNGIPEPQVYAPPRPTDRLAVPPFAQRERFHRFQPTYPYLQHEIDLPPTISLSDGEEPPPYQGPCTLQLRDPEQQLELNRESVRAPPNRTIFDSDLMDSARLGGPCPPSSNSGISATCYGSGGRMEGPPPTYSEVIGHYPGSSFQHQQSSGPPSLLEGTRLHHTHIAPLESAAIWSKEKDKQKGHPL 0 41 63 FQSMEITELE FVQIIIIVVVMMVMVVVITCLLS HYKLSARSFI +P53801 MAPGVARGPTPYWRLRLGGAALLLLLIPVAAAQEPPGAACSQNTNKTCEECLKNVSCLWCNTNKACLDYPVTSVLPPASLCKLSSARWGVCWVNFEALIITMSVVGGTLLLGIAICCCCCCRRKRSRKPDRSEEKAMREREERRIRQEERRAEMKTRHDEIRKKYGLFKEENPYARFENN 0 97 119 RWGVCWVNFE ALIITMSVVGGTLLLGIAICCCC CCRRKRSRKP +Q8IUW5 MAPRALPGSAVLAAAVFVGGAVSSPLVAPDNGSSRTLHSRTETTPSPSNDTGNGHPEYIAYALVPVFFIMGLFGVLICHLLKKKGYRCTTEAEQDIEEEKVEKIELNDSVNENSDTVGQIVHYIMKNEANADVLKAMVADNSLYDPESPVTPSTPGSPPVSPGPLSPGGTPGKHVCGHHLHTVGGVVERDVCHRCRHKRWHFIKPTNKSRESRPRRQGEVTVLSVGRFRVTKVEHKSNQKERRSLMSVSGAETVNGEVPATPVKRERSGTE 0 59 81 NDTGNGHPEY IAYALVPVFFIMGLFGVLICHLL KKKGYRCTTE +P01135 MVPSAGQLALFALGIVLAACQALENSTSPLSADPPVAAAVVSHFNDCPDSHTQFCFHGTCRFLVQEDKPACVCHSGYVGARCEHADLLAVVAASQKKQAITALVVVSIVALAVLIITCVLIHCCQVRKHCEWCRALICRHEKPSALLKGRTACCHSETVV 0 99 121 AVVAASQKKQ AITALVVVSIVALAVLIITCVLI HCCQVRKHCE +O43914 MGGLEPCSRLLLLPLLLAVSGLRPVQAQAQSDCSCSTVSPGVLAGIVMGDLVLTVLIALAVYFLGRLVPRGRGAAEAATRKQRITETESPYQELQGQRSDVYSDLNTQRPYYK 0 42 64 DCSCSTVSPG VLAGIVMGDLVLTVLIALAVYFL GRLVPRGRGA +P05556 MNLQPIFWIGLISSVCCVFAQTDENRCLKANAKSCGECIQAGPNCGWCTNSTFLQEGMPTSARCDDLEALKKKGCPPDDIENPRGSKDIKKNKNVTNRSKGTAEKLKPEDITQIQPQQLVLRLRSGEPQTFTLKFKRAEDYPIDLYYLMDLSYSMKDDLENVKSLGTDLMNEMRRITSDFRIGFGSFVEKTVMPYISTTPAKLRNPCTSEQNCTSPFSYKNVLSLTNKGEVFNELVGKQRISGNLDSPEGGFDAIMQVAVCGSLIGWRNVTRLLVFSTDAGFHFAGDGKLGGIVLPNDGQCHLENNMYTMSHYYDYPSIAHLVQKLSENNIQTIFAVTEEFQPVYKELKNLIPKSAVGTLSANSSNVIQLIIDAYNSLSSEVILENGKLSEGVTISYKSYCKNGVNGTGENGRKCSNISIGDEVQFEISITSNKCPKKDSDSFKIRPLGFTEEVEVILQYICECECQSEGIPESPKCHEGNGTFECGACRCNEGRVGRHCECSTDEVNSEDMDAYCRKENSSEICSNNGECVCGQCVCRKRDNTNEIYSGKFCECDNFNCDRSNGLICGGNGVCKCRVCECNPNYTGSACDCSLDTSTCEASNGQICNGRGICECGVCKCTDPKFQGQTCEMCQTCLGVCAEHKECVQCRAFNKGEKKDTCTQECSYFNITKVESRDKLPQPVQPDPVSHCKEKDVDDCWFYFTYSVNGNNEVMVHVVENPECPTGPDIIPIVAGVVAGIVLIGLALLLIWKLLMIIHDRREFAKFEKEKMNAKWDTGENPIYKSAVTTVVNPKYEGK 0 729 751 ENPECPTGPD IIPIVAGVVAGIVLIGLALLLIW KLLMIIHDRR +P16234 MGTSHPAFLVLGCLLTGLSLILCQLSLPSILPNENEKVVQLNSSFSLRCFGESEVSWQYPMSEEESSDVEIRNEENNSGLFVTVLEVSSASAAHTGLYTCYYNHTQTEENELEGRHIYIYVPDPDVAFVPLGMTDYLVIVEDDDSAIIPCRTTDPETPVTLHNSEGVVPASYDSRQGFNGTFTVGPYICEATVKGKKFQTIPFNVYALKATSELDLEMEALKTVYKSGETIVVTCAVFNNEVVDLQWTYPGEVKGKGITMLEEIKVPSIKLVYTLTVPEATVKDSGDYECAARQATREVKEMKKVTISVHEKGFIEIKPTFSQLEAVNLHEVKHFVVEVRAYPPPRISWLKNNLTLIENLTEITTDVEKIQEIRYRSKLKLIRAKEEDSGHYTIVAQNEDAVKSYTFELLTQVPSSILDLVDDHHGSTGGQTVRCTAEGTPLPDIEWMICKDIKKCNNETSWTILANNVSNIITEIHSRDRSTVEGRVTFAKVEETIAVRCLAKNLLGAENRELKLVAPTLRSELTVAAAVLVLLVIVIISLIVLVVIWKQKPRYEIRWRVIESISPDGHEYIYVDPMQLPYDSRWEFPRDGLVLGRVLGSGAFGKVVEGTAYGLSRSQPVMKVAVKMLKPTARSSEKQALMSELKIMTHLGPHLNIVNLLGACTKSGPIYIITEYCFYGDLVNYLHKNRDSFLSHHPEKPKKELDIFGLNPADESTRSYVILSFENNGDYMDMKQADTTQYVPMLERKEVSKYSDIQRSLYDRPASYKKKSMLDSEVKNLLSDDNSEGLTLLDLLSFTYQVARGMEFLASKNCVHRDLAARNVLLAQGKIVKICDFGLARDIMHDSNYVSKGSTFLPVKWMAPESIFDNLYTTLSDVWSYGILLWEIFSLGGTPYPGMMVDSTFYNKIKSGYRMAKPDHATSEVYEIMVKCWNSEPEKRPSFYHLSEIVENLLPGQYKKSYEKIHLDFLKSDHPAVARMRVDSDNAYIGVTYKNEEDKLKDWEGGLDEQRLSADSGYIIPLPDIDPVPEEEDLGKRNRHSSQTSEESAIETGSSSSTFIKREDETIEDIDMMDDIGIDSSDLVEDSFL 0 527 549 VAPTLRSELT VAAAVLVLLVIVIISLIVLVVIW KQKPRYEIRW +P50895 MEPPDAPAQARGAPRLLLLAVLLAAHPDAQAEVRLSVPPLVEVMRGKSVILDCTPTGTHDHYMLEWFLTDRSGARPRLASAEMQGSELQVTMHDTRGRSPPYQLDSQGRLVLAEAQVGDERDYVCVVRAGAAGTAEATARLNVFAKPEATEVSPNKGTLSVMEDSAQEIATCNSRNGNPAPKITWYRNGQRLEVPVEMNPEGYMTSRTVREASGLLSLTSTLYLRLRKDDRDASFHCAAHYSLPEGRHGRLDSPTFHLTLHYPTEHVQFWVGSPSTPAGWVREGDTVQLLCRGDGSPSPEYTLFRLQDEQEEVLNVNLEGNLTLEGVTRGQSGTYGCRVEDYDAADDVQLSKTLELRVAYLDPLELSEGKVLSLPLNSSAVVNCSVHGLPTPALRWTKDSTPLGDGPMLSLSSITFDSNGTYVCEASLPTVPVLSRTQNFTLLVQGSPELKTAEIEPKADGSWREGDEVTLICSARGHPDPKLSWSQLGGSPAEPIPGRQGWVSSSLTLKVTSALSRDGISCEASNPHGNKRHVFHFGTVSPQTSQAGVAVMAVAVSVGLLLLVVAVFYCVRRKGGPCCRQRREKGAPPPGEPGLSHSGSEQPEQTGLLMGGASGGARGGSGGFGDEC 0 549 571 TVSPQTSQAG VAVMAVAVSVGLLLLVVAVFYCV RRKGGPCCRQ +Q9Y624 MGTKAQVERKLLCLFILAILLCSLALGSVTVHSSEPEVRIPENNPVKLSCAYSGFSSPRVEWKFDQGDTTRLVCYNNKITASYEDRVTFLPTGITFKSVTREDTGTYTCMVSEEGGNSYGEVKVKLIVLVPPSKPTVNIPSSATIGNRAVLTCSEQDGSPPSEYTWFKDGIVMPTNPKSTRAFSNSSYVLNPTTGELVFDPLSASDTGEYSCEARNGYGTPMTSNAVRMEAVERNVGVIVAAVLVTLILLGILVFGIWFAYSRGHFDRTKKGTSSKKVIYSQPSARSEGEFKQTSSFLV 0 239 261 MEAVERNVGV IVAAVLVTLILLGILVFGIWFAY SRGHFDRTKK +P49257 MAGSRQRGLRARVRPLFCALLLSLGRFVRGDGVGGDPAVALPHRRFEYKYSFKGPHLVQSDGTVPFWAHAGNAIPSSDQIRVAPSLKSQRGSVWTKTKAAFENWEVEVTFRVTGRGRIGADGLAIWYAENQGLEGPVFGSADLWNGVGIFFDSFDNDGKKNNPAIVIIGNNGQIHYDHQNDGASQALASCQRDFRNKPYPVRAKITYYQNTLTVMINNGFTPDKNDYEFCAKVENMIIPAQGHFGISAATGGLADDHDVLSFLTFQLTEPGKEPPTPDKEISEKEKEKYQEEFEHFQQELDKKKEEFQKGHPDLQGQPAEEIFESVGDRELRQVFEGQNRIHLEIKQLNRQLDMILDEQRRYVSSLTEEISKRGAGMPGQHGQITQQELDTVVKTQHEILRQVNEMKNSMSETVRLVSGMQHPGSAGGVYETTQHFIDIKEHLHIVKRDIDNLVQRNMPSNEKPKCPELPPFPSCLSTVHFIIFVVVQTVLFIGYIMYRSQQEAAAKKFF 0 476 498 CPELPPFPSC LSTVHFIIFVVVQTVLFIGYIMY RSQQEAAAKK +Q9NPR2 MLRTAMGLRSWLAAPWGALPPRPPLLLLLLLLLLLQPPPPTWALSPRISLPLGSEERPFLRFEAEHISNYTALLLSRDGRTLYVGAREALFALSSNLSFLPGGEYQELLWGADAEKKQQCSFKGKDPQRDCQNYIKILLPLSGSHLFTCGTAAFSPMCTYINMENFTLARDEKGNVLLEDGKGRCPFDPNFKSTALVVDGELYTGTVSSFQGNDPAISRSQSLRPTKTESSLNWLQDPAFVASAYIPESLGSLQGDDDKIYFFFSETGQEFEFFENTIVSRIARICKGDEGGERVLQQRWTSFLKAQLLCSRPDDGFPFNVLQDVFTLSPSPQDWRDTLFYGVFTSQWHRGTTEGSAVCVFTMKDVQRVFSGLYKEVNRETQQWYTVTHPVPTPRPGACITNSARERKINSSLQLPDRVLNFLKDHFLMDGQVRSRMLLLQPQARYQRVAVHRVPGLHHTYDVLFLGTGDGRLHKAVSVGPRVHIIEELQIFSSGQPVQNLLLDTHRGLLYAASHSGVVQVPMANCSLYRSCGDCLLARDPYCAWSGSSCKHVSLYQPQLATRPWIQDIEGASAKDLCSASSVVSPSFVPTGEKPCEQVQFQPNTVNTLACPLLSNLATRLWLRNGAPVNASASCHVLPTGDLLLVGTQQLGEFQCWSLEEGFQQLVASYCPEVVEDGVADQTDEGGSVPVIISTSRVSAPAGGKASWGADRSYWKEFLVMCTLFVLAVLLPVLFLLYRHRNSMKVFLKQGECASVHPKTCPVVLPPETRPLNGLGPPSTPLDHRGYQSLSDSPPGSRVFTESEKRPLSIQDSFVEVSPVCPRPRVRLGSEIRDSVV 0 717 739 SWGADRSYWK EFLVMCTLFVLAVLLPVLFLLYR HRNSMKVFLK +Q96JJ7 MAAWKSWTALRLCATVVVLDMVVCKGFVEDLDESFKENRNDDIWLVDFYAPWCGHCKKLEPIWNEVGLEMKSIGSPVKVGKMDATSYSSIASEFGVRGYPTIKLLKGDLAYNYRGPRTKDDIIEFAHRVSGALIRPLPSQQMFEHMQKRHRVFFVYVGGESPLKEKYIDAASELIVYTYFFSASEEVVPEYVTLKEMPAVLVFKDETYFVYDEYEDGDLSSWINRERFQNYLAMDGFLLYELGDTGKLVALAVIDEKNTSVEHTRLKSIIQEVARDYRDLFHRDFQFGHMDGNDYINTLLMDELTVPTVVVLNTSNQQYFLLDRQIKNVEDMVQFINNILDGTVEAQGGDSILQRLKRIVFDAKSTIVSIFKSSPLMGCFLFGLPLGVISIMCYGIYTADTDGGYIEERYEVSKSENENQEQIEESKEQQEPSSGGSVVPTVQEPKDVLEKKKD 0 374 396 KSTIVSIFKS SPLMGCFLFGLPLGVISIMCYGI YTADTDGGYI +Q9UKF5 MKMLLLLHCLGVFLSCSGHIQDEHPQYHSPPDVVIPVRITGTTRGMTPPGWLSYILPFGGQKHIIHIKVKKLLFSKHLPVFTYTDQGAILEDQPFVQNNCYYHGYVEGDPESLVSLSTCFGGFQGILQINDFAYEIKPLAFSTTFEHLVYKMDSEEKQFSTMRSGFMQNEITCRMEFEEIDNSTQKQSSYVGWWIHFRIVEIVVVIDNYLYIRYERNDSKLLEDLYVIVNIVDSILDVIGVKVLLFGLEIWTNKNLIVVDDVRKSVHLYCKWKSENITPRMQHDTSHLFTTLGLRGLSGIGAFRGMCTPHRSCAIVTFMNKTLGTFSIAVAHHLGHNLGMNHDEDTCRCSQPRCIMHEGNPPITKFSNCSYGDFWEYTVERTKCLLETVHTKDIFNVKRCGNGVVEEGEECDCGPLKHCAKDPCCLSNCTLTDGSTCAFGLCCKDCKFLPSGKVCRKEVNECDLPEWCNGTSHKCPDDFYVEDGIPCKERGYCYEKSCHDRNEQCRRIFGAGANTASETCYKELNTLGDRVGHCGIKNATYIKCNISDVQCGRIQCENVTEIPNMSDHTTVHWARFNDIMCWSTDYHLGMKGPDIGEVKDGTECGIDHICIHRHCVHITILNSNCSPAFCNKRGICNNKHHCHCNYLWDPPNCLIKGYGGSVDSGPPPKRKKKKKFCYLCILLLIVLFILLCCLYRLCKKSKPIKKQQDVQTPSAKEEEKIQRRPHELPPQSQPWVMPSQSQPPVTPSQSHPQVMPSQSQPPVTPSQSQPRVMPSQSQPPVMPSQSHPQLTPSQSQPPVTPSQRQPQLMPSQSQPPVTPS 0 676 698 PPPKRKKKKK FCYLCILLLIVLFILLCCLYRLC KKSKPIKKQQ +Q9BZ11 MGWRPRRARGTPLLLLLLLLLLWPVPGAGVLQGHIPGQPVTPHWVLDGQPWRTVSLEEPVSKPDMGLVALEAEGQELLLELEKNHRLLAPGYIETHYGPDGQPVVLAPNHTDHCHYQGRVRGFPDSWVVLCTCSGMSGLITLSRNASYYLRPWPPRGSKDFSTHEIFRMEQLLTWKGTCGHRDPGNKAGMTSLPGGPQSRGRREARRTRKYLELYIVADHTLFLTRHRNLNHTKQRLLEVANYVDQLLRTLDIQVALTGLEVWTERDRSRVTQDANATLWAFLQWRRGLWAQRPHDSAQLLTGRAFQGATVGLAPVEGMCRAESSGGVSTDHSELPIGAAATMAHEIGHSLGLSHDPDGCCVEAAAESGGCVMAAATGHPFPRVFSACSRRQLRAFFRKGGGACLSNAPDPGLPVPPALCGNGFVEAGEECDCGPGQECRDLCCFAHNCSLRPGAQCAHGDCCVRCLLKPAGALCRQAMGDCDLPEFCTGTSSHCPPDVYLLDGSPCARGSGYCWDGACPTLEQQCQQLWGPGSHPAPEACFQVVNSAGDAHGNCGQDSEGHFLPCAGRDALCGKLQCQGGKPSLLAPHMVPVDSTVHLDGQEVTCRGALALPSAQLDLLGLGLVEPGTQCGPRMVCQSRRCRKNAFQELQRCLTACHSHGVCNSNHNCHCAPGWAPPFCDKPGFGGSMDSGPVQAENHDTFLLAMLLSVLLPLLPGAGLAWCCYRLPGAHLQRCSWGCRRDPACSGPKDGPHRDHPLGGVHPMELGPTATGQPWPLDPENSHEPSSHPEKPLPAVSPDPQADQVQMPRSCLW 0 13 35 WRPRRARGTP LLLLLLLLLLWPVPGAGVLQGHI PGQPVTPHWV +P58335 MVAERSPARSPGSWLFPGLWLLVLSGPGGLLRAQEQPSCRRAFDLYFVLDKSGSVANNWIEIYNFVQQLAERFVSPEMRLSFIVFSSQATIILPLTGDRGKISKGLEDLKRVSPVGETYIHEGLKLANEQIQKAGGLKTSSIIIALTDGKLDGLVPSYAEKEAKISRSLGASVYCVGVLDFEQAQLERIADSKEQVFPVKGGFQALKGIINSILAQSCTEILELQPSSVCVGEEFQIVLSGRGFMLGSRNGSVLCTYTVNETYTTSVKPVSVQLNSMLCPAPILNKAGETLDVSVSFNGGKSVISGSLIVTATECSNGIAAIIVILVLLLLLGIGLMWWFWPLCCKVVIKDPPPPPAPAPKEEEEEPLPTKKWPTVDASYYGGRGVGGIKRMEVRWGDKGSTEEGARLEKAKNAVVKIPEETEEPIRPRPPRPKPTHQPPQTKWYTPIKGRLDALWALLRRQYDRVSLMRPQEGDEVCIWECIEKELTA 0 319 341 IVTATECSNG IAAIIVILVLLLLLGIGLMWWFW PLCCKVVIKD +Q7Z6A9 MKTLPAMLGTGKLFWVFFLIPYLDIWNIHGKESCDVQLYIKRQSEHSILAGDPFELECPVKYCANRPHVTWCKLNGTTCVKLEDRQTSWKEEKNISFFILHFEPVLPNDNGSYRCSANFQSNLIESHSTTLYVTDVKSASERPSKDEMASRPWLLYRLLPLGGLPLLITTCFCLFCCLRRHQGKQNELSDTAGREINLVDAHLKSEQTEASTRQNSQVLLSETGIYDNDPDLCFRMQEGSEVYSNPCLEENKPGIVYASLNHSVIGPNSRLARNVKEAPTEYASICVRS 0 153 175 PSKDEMASRP WLLYRLLPLGGLPLLITTCFCLF CCLRRHQGKQ +Q01151 MSRGLQLLLLSCAYSLAPATPEVKVACSEDVDLPCTAPWDPQVPYTVSWVKLLEGGEERMETPQEDHLRGQHYHQKGQNGSFDAPNERPYSLKIRNTTSCNSGTYRCTLQDPDGQRNLSGKVILRVTGCPAQRKEETFKKYRAEIVLLLALVIFYLTLIIFTCKFARLQSIFPDFSKAGMERAFLPVTSPNKHLGLVTPHKTELV 0 146 168 ETFKKYRAEI VLLLALVIFYLTLIIFTCKFARL QSIFPDFSKA +P01732 MALPVTALLLPLALLLHAARPSQFRVSPLDRTWNLGETVELKCQVLLSNPTSGCSWLFQPRGAAASPTFLLYLSQNKPKAAEGLDTQRFSGKRLGDTFVLTLSDFRRENEGYYFCSALSNSIMYFSHFVPVFLPAKPTTTPAPRPPTPAPTIASQPLSLRPEACRPAAGGAVHTRGLDFACDIYIWAPLAGTCGVLLLSLVITLYCNHRNRRRVCKCPRPVVKSGDKPSLSARYV 0 184 206 TRGLDFACDI YIWAPLAGTCGVLLLSLVITLYC NHRNRRRVCK +Q8VHS2 MKLKRTAYLLFLYLSSSLLICIKNSFCNKNNTRCLSGPCQNNSTCKHFPQDNNCCLDTANNLDKDCEDLKDPCFSSPCQGIATCVKIPGEGNFLCQCPPGYSGLNCETATNSCGGNLCQHGGTCRKDPEHPVCICPPGYAGRFCETDHNECASSPCHNGAMCQDGINGYSCFCVPGYQGRHCDLEVDECVSDPCKNEAVCLNEIGRYTCVCPQEFSGVNCELEIDECRSQPCLHGATCQDAPGGYSCDCAPGFLGEHCELSVNECESQPCLHGGLCVDGRNSYHCDCTGSGFTGMHCESLIPLCWSKPCHNDATCEDTVDSYICHCRPGYTGALCETDINECSSNPCQFWGECVELSSEGLYGNTAGLPSSFSYVGASGYVCICQPGFTGIHCEEDVDECLLHPCLNGGTCENLPGNYACHCPFDDTSRTFYGGENCSEILLGCTHHQCLNNGKCIPHFQNGQHGFTCQCLSGYAGPLCETVTTLSFGSNGFLWVTSGSHTGIGPECNISLRFHTVQPNALLLIRGNKDVSMKLELLNGCVHLSIEVWNQLKVLLSISHNTSDGEWHFVEVTIAETLTLALVGGSCKEKCTTKSSVPVENHQSICALQDSFLGGLPMGTANNSVSVLNIYNVPSTPSFVGCLQDIRFDLNHITLENVSSGLSSNVKAGCLGKDWCESQPCQNRGRCINLWQGYQCECDRPYTGSNCLKEYVAGRFGQDDSTGYAAFSVNDNYGQNFSLSMFVRTRQPLGLLLALENSTYQYVSVWLEHGSLALQTPGSPKFMVNFFLSDGNVHLISLRIKPNEIELYQSSQNLGFISVPTWTIRRGDVIFIGGLPDREKTEVYGGFFKGCVQDVRLNSQTLEFFPNSTNNAYDDPILVNVTQGCPGDNTCKSNPCHNGGVCHSLWDDFSCSCPTNTAGRACEQVQWCQLSPCPPTAECQLLPQGFECIANAVFSGLSREILFRSNGNITRELTNITFAFRTHDTNVMILHAEKEPEFLNISIQDARLFFQLRSGNSFYTLHLMGSQLVNDGTWHQVTFSMIDPVAQTSRWQMEVNDQTPFVISEVATGSLNFLKDNTDIYVGDQSVDNPKGLQGCLSTIEIGGIYLSYFENLHGFPGKPQEEQFLKVSTNMVLTGCLPSNACHSSPCLHGGNCEDSYSSYRCACLSGWSGTHCEINIDECFSSPCIHGNCSDGVAAYHCRCEPGYTGVNCEVDVDNCKSHQCANGATCVPEAHGYSCLCFGNFTGRFCRHSRLPSTVCGNEKRNFTCYNGGSCSMFQEDWQCMCWPGFTGEWCEEDINECASDPCINGGLCRDLVNRFLCICDVAFAGERCELDLADDRLLGIFTAVGSGTLALFFILLLAGVASLIASNKRATQGTYSPSGQEKAGPRVEMWIRMPPPALERLI 0 1346 1368 ADDRLLGIFT AVGSGTLALFFILLLAGVASLIA SNKRATQGTY +P15509 MLLLVTSLLLCELPHPAFLLIPEKSDLRTVAPASSLNVRFDSRTMNLSWDCQENTTFSKCFLTDKKNRVVEPRLSNNECSCTFREICLHEGVTFEVHVNTSQRGFQQKLLYPNSGREGTAAQNFSCFIYNADLMNCTWARGPTAPRDVQYFLYIRNSKRRREIRCPYYIQDSGTHVGCHLDNLSGLTSRNYFLVNGTSREIGIQFFDSLLDTKKIERFNPPSNVTVRCNTTHCLVRWKQPRTYQKLSYLDFQYQLDVHRKNTQPGTENLLINVSGDLENRYNFPSSEPRAKHSVKIRAADVRILNWSSWSEAIEFGSDDGNLGSVYIYVLLIVGTLVCGIVLGFLFKRFLRIQRLFPPVPQIKDKLNDNHEVEDEIIWEEFTPEEGKGYREEVLTVKEIT 0 324 346 EFGSDDGNLG SVYIYVLLIVGTLVCGIVLGFLF KRFLRIQRLF +Q99062 MARLGNCSLTWAALIILLLPGSLEECGHISVSAPIVHLGDPITASCIIKQNCSHLDPEPQILWRLGAELQPGGRQQRLSDGTQESIITLPHLNHTQAFLSCCLNWGNSLQILDQVELRAGYPPAIPHNLSCLMNLTTSSLICQWEPGPETHLPTSFTLKSFKSRGNCQTQGDSILDCVPKDGQSHCCIPRKHLLLYQNMGIWVQAENALGTSMSPQLCLDPMDVVKLEPPMLRTMDPSPEAAPPQAGCLQLCWEPWQPGLHINQKCELRHKPQRGEASWALVGPLPLEALQYELCGLLPATAYTLQIRCIRWPLPGHWSDWSPSLELRTTERAPTVRLDTWWRQRQLDPRTVQLFWKPVPLEEDSGRIQGYVVSWRPSGQAGAILPLCNTTELSCTFHLPSEAQEVALVAYNSAGTSRPTPVVFSESRGPALTRLHAMARDPHSLWVGWEPPNPWPQGYVIEWGLGPPSASNSNKTWRMEQNGRATGFLLKENIRPFQLYEIIVTPLYQDTMGPSQHVYAYSQEMAPSHAPELHLKHIGKTWAQLEWVPEPPELGKSPLTHYTIFWTNAQNQSFSAILNASSRGFVLHGLEPASLYHIHLMAASQAGATNSTVLTLMTLTPEGSELHIILGLFGLLLLLTCLCGTAWLCCSPNRKNPLWPSVPDPAHSSLGSWVPTIMEEDAFQLPGLGTPPITKLTVLEEDEKKPVPWESHNSSETCGLPTLVQTYVLQGDPRAVSTQPQSQSGTSDQVLYGQLLGSPTSPGPGHYLRCDSTQPLLAGLTPSPKSYENLWFQASPLGTLVTPAPSQEDDCVFGPLLNFPLLQGIRVHGMEALGSF 0 626 648 LMTLTPEGSE LHIILGLFGLLLLLTCLCGTAWL CCSPNRKNPL +Q6ZRH7 MCGPAMFPAGPPWPRVRVVQVLWALLAVLLASWRLWAIKDFQECTWQVVLNEFKRVGESGVSDSFFEQEPVDTVSSLFHMLVDSPIDPSEKYLGFPYYLKINYSCEEKPSEDLVRMGHLTGLKPLVLVTFQSPVNFYRWKIEQLQIQMEAAPFRSKEPCMAEEVCSMSWYTPMPIKKGSVVMRVDISSNGLGTFIPDKRFQMNINGFLKRDRDNNIQFTVGEELFNLMPQYFVGVSSRPLWHTVDQSPVLILGGIPNEKYVLMTDTSFKDFSLVELSIDSCWVGSFYCPHSGFTATIYDTIATESTLFIRQNQLVYYFTGTYTTLYERNRGSGSWIRVLASECIKKLCPVYFHSNGSEYIMALTTGKHEGYVHFGTIRDGQVSFEMLPRQWSVCEQIGVTTCSIIWSEYIAGEYTLLLLVESGYGNASKRFQVVSYNTASDDLELLYHIPEFIPEARGLEFLMILGTESYTSTAMAPKGIFCNPYNNLIFIWGNFLLQSSNKENFIYLADFPKELSIKYMARSFRGAVAIVTETEEIWYLLEGSYRVYQLFPSKGWQVHISLKLMQQSSLYASNETMLTLFYEDSKLYQLVYLMNNQKGQLVKRLVPVEQLLMYQQHTSHYDLERKGGYLMLSFIDFCPFSVMRLRSLPSPQRYTRQERYRARPPRVLERSGFHNENSLAIYQGLVYYLLWLHSVYDKPYADPVHDPTWRWWANNKQDQDYYFFLASNWRSAGGVSIEMDSYEKIYNLESAYELPERIFLDKGTEYSFAIFLSAQGHSFRTQSELGTAFQLHSQVDVGVVLADPGCIEASVKQEVLINRNSVLFSITLKDKKLCYDQGISGHHLMETSMTVNVVGSSGLCFQETHLGPHMQGNLMVPVFIGCPPGKRLAFDITYTLEYSRLKNKHYFDCVNVNPEMPCFLFRDIFYPFFLIQDLVTGDSGSFQGSYVLLVVGGGPTLDSLKDYSEDEIYRFNSPLDKTNSLIWTTRTTRTTKDSAFHIMSHESPGIEWLCLENAPCYDNVPQGIFAPEFFFKVLVSNRGVDTSTYCNYQLTFLLHIHGLPLSPKRALFIIMVSASVFVGLVIFYIAFCLLWPLVVKGCTMIRWKINNLIASESYYTYASISGISSMPSLRHSRMGSMFSSRMTEDRAEPKEAVERQLMT 0 1073 1095 PKRALFIIMV SASVFVGLVIFYIAFCLLWPLVV KGCTMIRWKI +Q61476 MVSSTWGYDPRAGAGDLVITTTAAGAVTIAVLLFQTVCGDCGPPPDIPNARPILGRHSKFAEQSKVAYSCNNGFKQVPDKSNIVVCLENGQWSSHETFCEKSCDTPERLSFASLKKEYFNMNFFPVGTIVEYECRPGFRKQPSLSGKSTCLEDLVWSPVAQFCKKKSCPNPKDLDNGHINIPTGILFGSEINFSCNPGYRLVGITSILCTIIGNTVDWDDEFPVCTEIFCPDPPKINNGIMRGESDSYKYSQVVIYSCDKGFILFGNSTIYCTVSKSDVGQWSSPPPQCIEESKVPIKKPVVNVPSTGIPSTPQKPTTESVPNPGDQPTPQKPSTVKVPATQHEPDTTTRTSTDKGESNSGGDRYIYGFVAVIAMIDSLIIVKTLWTILSPNRRSDFQGKERKDVSK 0 367 389 ESNSGGDRYI YGFVAVIAMIDSLIIVKTLWTIL SPNRRSDFQG +Q5UCC4 MAAASAGATRLLLLLLMAVAAPSRARGSGCRAGTGARGAGAEGREGEACGTVGLLLEHSFEIDDSANFRKRGSLLWNQQDGTLSLSQRQLSEEERGRLRDVAALNGLYRVRIPRRPGALDGLEAGGYVSSFVPACSLVESHLSDQLTLHVDVAGNVVGVSVVTHPGGCRGHEVEDVDLELFNTSVQLQPPTTAPGPETAAFIERLEMEQAQKAKNPQEQKSFFAKYWMYIIPVVLFLMMSGAPDTGGQGGGGGGGGGGGSGR 0 221 243 QKAKNPQEQK SFFAKYWMYIIPVVLFLMMSGAP DTGGQGGGGG +P08637 MWQLLLPTALLLLVSAGMRTEDLPKAVVFLEPQWYRVLEKDSVTLKCQGAYSPEDNSTQWFHNESLISSQASSYFIDAATVDDSGEYRCQTNLSTLSDPVQLEVHIGWLLLQAPRWVFKEEDPIHLRCHSWKNTALHKVTYLQNGKGRKYFHHNSDFYIPKATLKDSGSYFCRGLFGSKNVSSETVNITITQGLAVSTISSFFPPGYQVSFCLVMVLLFAVDTGLYFSVKTNIRSSTRDWKDHKFKWRKDPQDK 0 207 229 STISSFFPPG YQVSFCLVMVLLFAVDTGLYFSV KTNIRSSTRD +P36888 MPALARDGGQLPLLVVFSAMIFGTITNQDLPVIKCVLINHKNNDSSVGKSSSYPMVSESPEDLGCALRPQSSGTVYEAAAVEVDVSASITLQVLVDAPGNISCLWVFKHSSLNCQPHFDLQNRGVVSMVILKMTETQAGEYLLFIQSEATNYTILFTVSIRNTLLYTLRRPYFRKMENQDALVCISESVPEPIVEWVLCDSQGESCKEESPAVVKKEEKVLHELFGTDIRCCARNELGRECTRLFTIDLNQTPQTTLPQLFLKVGEPLWIRCKAVHVNHGFGLTWELENKALEEGNYFEMSTYSTNRTMIRILFAFVSSVARNDTGYYTCSSSKHPSQSALVTIVEKGFINATNSSEDYEIDQYEEFCFSVRFKAYPQIRCTWTFSRKSFPCEQKGLDNGYSISKFCNHKHQPGEYIFHAENDDAQFTKMFTLNIRRKPQVLAEASASQASCFSDGYPLPSWTWKKCSDKSPNCTEEITEGVWNRKANRKVFGQWVSSSTLNMSEAIKGFLVKCCAYNSLGTSCETILLNSPGPFPFIQDNISFYATIGVCLLFIVVLTLLICHKYKKQFRYESQLQMVQVTGSSDNEYFYVDFREYEYDLKWEFPRENLEFGKVLGSGAFGKVMNATAYGISKTGVSIQVAVKMLKEKADSSEREALMSELKMMTQLGSHENIVNLLGACTLSGPIYLIFEYCCYGDLLNYLRSKREKFHRTWTEIFKEHNFSFYPTFQSHPNSSMPGSREVQIHPDSDQISGLHGNSFHSEDEIEYENQKRLEEEEDLNVLTFEDLLCFAYQVAKGMEFLEFKSCVHRDLAARNVLVTHGKVVKICDFGLARDIMSDSNYVVRGNARLPVKWMAPESLFEGIYTIKSDVWSYGILLWEIFSLGVNPYPGIPVDANFYKLIQNGFKMDQPFYATEEIYIIMQSCWAFDSRKRPSFPNLTSFLGCQLADAEEAMYQNVDGRVSECPHTYQNRRPFSREMDLGLLSPQAQVEDS 0 542 564 PGPFPFIQDN ISFYATIGVCLLFIVVLTLLICH KYKKQFRYES +Q99665 MAHTFRGCSLAFMFIITWLLIKAKIDACKRGDVTVKPSHVILLGSTVNITCSLKPRQGCFHYSRRNKLILYKFDRRINFHHGHSLNSQVTGLPLGTTLFVCKLACINSDEIQICGAEIFVGVAPEQPQNLSCIQKGEQGTVACTWERGRDTHLYTEYTLQLSGPKNLTWQKQCKDIYCDYLDFGINLTPESPESNFTAKVTAVNSLGSSSSLPSTFTFLDIVRPLPPWDIRIKFQKASVSRCTLYWRDEGLVLLNRLRYRPSNSRLWNMVNVTKAKGRHDLLDLKPFTEYEFQISSKLHLYKGSWSDWSESLRAQTPEEEPTGMLDVWYMKRHIDYSRQQISLFWKNLSVSEARGKILHYQVTLQELTGGKAMTQNITGHTSWTTVIPRTGNWAVAVSAANSKGSSLPTRINIMNLCEAGLLAPRQVSANSEGMDNILVTWQPPRKDPSAVQEYVVEWRELHPGGDTQVPLNWLRSRPYNVSALISENIKSYICYEIRVYALSGDQGGCSSILGNSKHKAPLSGPHINAITEEKGSILISWNSIPVQEQMGCLLHYRIYWKERDSNSQPQLCEIPYRVSQNSHPINSLQPRVTYVLWMTALTAAGESSHGNEREFCLQGKANWMAFVAPSICIAIIMVGIFSTHYFQQKVFVLLAALRPQWCSREIPDPANSTCAKKYPIAEEKTQLPLDRLLIDWPTPEDPEPLVISEVLHQVTPVFRHPPCSNWPQREKGIQGHQASEKDMMHSASSPPPPRALQAESRQLVDLYKVLESRGSDPKPENPACPWTVLPAGDLPTHDGYLPSNIDDLPSHEAPLADSLEELEPQHISLSVFPSSSLHPLTFSCGDKLTLDQLKMRCDSLML 0 623 645 REFCLQGKAN WMAFVAPSICIAIIMVGIFSTHY FQQKVFVLLA +Q6PHB0 MHTPGTPAPGHPDPPPLLLLTLLLLLAASGRAVPCVFCGLPKPTNITFLSINMKNVLHWNPPESLHGVEVTYTVQYFIYGQKKWLNASKCGSINRTYCDLSVETSDYEHQFYAKVKAIWEARCSEWAETERFYPFLETQVSPPEVALTTGEKSISIALTAPEKWKRNPQDHTVSMQQIYPNLKYNVSVYNTKSRRTWSQCVTNSTLVLSWLEPNTLYCVHVESLVPGPPRLPMPSQKQCISTLEVQTSAWKAKVIFWYVFLTSVIVFLFSAIGYLVYRYIHVGKEKHPANLVLIYRNEIGTRVFEPTETITLNFITFSMLDDTKISPKDMNLLDKSSDDISVNDPEHNEAWEPHWEEVEGQHLGCSSHLMDAVCGAEQRDGDTSLTQHGWLNSTIPTGETDTEPQYKVLSDFYGEGEIQLSCEPEEAARTEKISEPLVTSANLDPQLEDLHHLGQEHTVSEDGPEEETSITVVDWDPQTGRLCIPSLPIFGRDPENYGHYERDQLLEGGLLSRLYENQAPDKPEKENENCLTRFMEEWGLHVQMES 0 255 277 VQTSAWKAKV IFWYVFLTSVIVFLFSAIGYLVY RYIHVGKEKH +Q7TSN7 MEGSWRDVLAVLVILAQLTASGSSYQIIEGPQNVTVLKDSEAHFNCTVTHGWKLLMWTLNQMVVLSLTTQGPIITNNRFTYASYNSTDSFISELIIHDVQPSDSGSVQCSLQNSHGFGSAFLSVQVMGTLNIPSNNLIVTEGEPCNVTCYAVGWTSLPDISWELEVPVSHSSYNSFLESGNFMRVLSVLDLTPLGNGTLTCVAELKDLQASKSLTVNLTVVQPPPDSIGEEGPALPTWAIILLAVAFSLLLILIIVLIIIFCCCCASRREKEESTYQNEIRKSANMRTNKADPETKLKGGKENYGYSSDEAKAAQTASLPPKSAEVSLPEKRSSSLPYQELNKHQPGPATHPRVSFDIASPQKVRNVTLV 0 239 261 GEEGPALPTW AIILLAVAFSLLLILIIVLIIIF CCCCASRREK +P31785 MLKPSLPFTSLLFLQLPLLGVGLNTTILTPNGNEDTTADFFLTTMPTDSLSVSTLPLPEVQCFVFNVEYMNCTWNSSSEPQPTNLTLHYWYKNSDNDKVQKCSHYLFSEEITSGCQLQKKEIHLYQTFVVQLQDPREPRRQATQMLKLQNLVIPWAPENLTLHKLSESQLELNWNNRFLNHCLEHLVQYRTDWDHSWTEQSVDYRHKFSLPSVDGQKRYTFRVRSRFNPLCGSAQHWSEWSHPIHWGSNTSKENPFLFALEAVVISVGSMGLIISLLCVYFWLERTMPRIPTLKNLEDLVTEYHGNFSAWSGVSKGLAESLQPDYSERLCLVSEIPPKGGALGEGPGASPCNQHSPYWAPPCYTLKPET 0 262 284 KENPFLFALE AVVISVGSMGLIISLLCVYFWLE RTMPRIPTLK +Q86SU0 MAWPKLPAPWLLLCTWLPAGCLSLLVTVQHTERYVTLFASIILKCDYTTSAQLQDVVVTWRFKSFCKDPIFDYYSASYQAALSLGQDPSNDCNDNQREVRIVAQRRGQNEPVLGVDYRQRKITIQNRADLVINEVMWWDHGVYYCTIEAPGDTSGDPDKEVKLIVLHWLTVIFIILGALLLLLLIGVCWCQCCPQYCCCYIRCPCCPAHCCCPEEALARHRYMKQAQALGPQMMGKPLYWGADRSSQVSSYPMHPLLQRDLSLPSSLPQMPMTQTTNQPPIANGVLEYLEKELRNLNLAQPLPPDLKGRFGHPCSMLSSLGSEVVERRIIHLPPLIRDLSSSRRTSDSLHQQWLTPIPSRPWDLREGRSHHHYPDFHQELQDRGPKSWALERRELDPSWSGRHRSSRLNGSPIHWSDRDSLSDVPSSSEARWRPSHPPFRSRCQERPRRPSPRESTQRHGRRRRHRSYSPPLPSGLSSWSSEEDKERQPQSWRAHRRGSHSPHWPEEKPPSYRSLDITPGKNSRKKGSVERRSEKDSSHSGRSVVI 0 163 185 TSGDPDKEVK LIVLHWLTVIFIILGALLLLLLI GVCWCQCCPQ +Q9Y561 MACRWSTKESPRWRSALLLLFLAGVYGNGALAEHSENVHISGVSTACGETPEQIRAPSGIITSPGWPSEYPAKINCSWFIRANPGEIITISFQDFDIQGSRRCNLDWLTIETYKNIESYRACGSTIPPPYISSQDHIWIRFHSDDNISRKGFRLAYFSGKSEEPNCACDQFRCGNGKCIPEAWKCNNMDECGDSSDEEICAKEANPPTAAAFQPCAYNQFQCLSRFTKVYTCLPESLKCDGNIDCLDLGDEIDCDVPTCGQWLKYFYGTFNSPNYPDFYPPGSNCTWLIDTGDHRKVILRFTDFKLDGTGYGDYVKIYDGLEENPHKLLRVLTAFDSHAPLTVVSSSGQIRVHFCADKVNAARGFNATYQVDGFCLPWEIPCGGNWGCYTEQQRCDGYWHCPNGRDETNCTMCQKEEFPCSRNGVCYPRSDRCNYQNHCPNGSDEKNCFFCQPGNFHCKNNRCVFESWVCDSQDDCGDGSDEENCPVIVPTRVITAAVIGSLICGLLLVIALGCTCKLYSLRMFERRSFETQLSRVEAELLRREAPPSYGQLIAQGLIPPVEDFPVCSPNQASVLENLRLAVRSQLGFTSVRLPMAGRSSNIWNRIFNFARSRHSGSLALVSADGDEVVPSQSTSREPERNHTHRSLFSVESDDTDTENERRDMAGASGGVAAPLPQKVPPTTAVEATVGACASSSTQSTRGGHADNGRDVTSVEPPSVSPARHQLTSALSRMTQGLRWVRFTLGRSSSLSQNQSPLRQLDNGVSGREDDDDVEMLIPISDGSSDFDVNDCSRPLLDLASDQGQGLRQPYNATNPGVRPSNRDGPCERCGIVHTAQIPDTCLEVTLKNETSDDEALLLC 0 13 32 CRWSTKESPR WRSALLLLFLAGVYGNGALA EHSENVHISG +O14786 MERGLPLLCAVLALVLAPAGAFRNDKCGDTIKIESPGYLTSPGYPHSYHPSEKCEWLIQAPDPYQRIMINFNPHFDLEDRDCKYDYVEVFDGENENGHFRGKFCGKIAPPPVVSSGPFLFIKFVSDYETHGAGFSIRYEIFKRGPECSQNYTTPSGVIKSPGFPEKYPNSLECTYIVFVPKMSEIILEFESFDLEPDSNPPGGMFCRYDRLEIWDGFPDVGPHIGRYCGQKTPGRIRSSSGILSMVFYTDSAIAKEGFSANYSVLQSSVSEDFKCMEALGMESGEIHSDQITASSQYSTNWSAERSRLNYPENGWTPGEDSYREWIQVDLGLLRFVTAVGTQGAISKETKKKYYVKTYKIDVSSNGEDWITIKEGNKPVLFQGNTNPTDVVVAVFPKPLITRFVRIKPATWETGISMRFEVYGCKITDYPCSGMLGMVSGLISDSQITSSNQGDRNWMPENIRLVTSRSGWALPPAPHSYINEWLQIDLGEEKIVRGIIIQGGKHRENKVFMRKFKIGYSNNGSDWKMIMDDSKRKAKSFEGNNNYDTPELRTFPALSTRFIRIYPERATHGGLGLRMELLGCEVEAPTAGPTTPNGNLVDECDDDQANCHSGTGDDFQLTGGTTVLATEKPTVIDSTIQSEFPTYGFNCEFGWGSHKTFCHWEHDNHVQLKWSVLTSKTGPIQDHTGDGNFIYSQADENQKGKVARLVSPVVYSQNSAHCMTFWYHMSGSHVGTLRVKLRYQKPEEYDQLVWMAIGHQGDHWKEGRVLLHKSLKLYQVIFEGEIGKGNLGGIAVDDISINNHISQEDCAKPADLDKKNPEIKIDETGSTPGYEGEGEGDKNISRKPGNVLKTLDPILITIIAMSALGVLLGAVCGVVLYCACWHNGMSERNLSALENYNFELVDGVKLKKDKLNTQSTYSEA 0 857 879 PGNVLKTLDP ILITIIAMSALGVLLGAVCGVVL YCACWHNGMS +Q86WC4 MEPGPTAAQRRCSLPPWLPLGLLLWSGLALGALPFGSSPHRVFHDLLSEQQLLEVEDLSLSLLQGGGLGPLSLPPDLPDLDPECRELLLDFANSSAELTGCLVRSARPVRLCQTCYPLFQQVVSKMDNISRAAGNTSESQSCARSLLMADRMQIVVILSEFFNTTWQEANCANCLTNNSEELSNSTVYFLNLFNHTLTCFEHNLQGNAHSLLQTKNYSEVCKNCREAYKTLSSLYSEMQKMNELENKAEPGTHLCIDVEDAMNITRKLWSRTFNCSVPCSDTVPVIAVSVFILFLPVVFYLSSFLHSEQKKRKLILPKRLKSSTSFANIQENSN 0 283 305 FNCSVPCSDT VPVIAVSVFILFLPVVFYLSSFL HSEQKKRKLI +Q969N2 MAAAMPLALLVLLLLGPGGWCLAEPPRDSLREELVITPLPSGDVAATFQFRTRWDSELQREGVSHYRLFPKALGQLISKYSLRELHLSFTQGFWRTRYWGPPFLQAPSGAELWVWFQDTVTDVDKSWKELSNVLSGIFCASLNFIDSTNTVTPTASFKPLGLANDTDHYFLRYAVLPREVVCTENLTPWKKLLPCSSKAGLSVLLKADRLFHTSYHSQAVHIRPVCRNARCTSISWELRQTLSVVFDAFITGQGKKDWSLFRMFSRTLTEPCPLASESRVYVDITTYNQDNETLEVHPPPTTTYQDVILGTRKTYAIYDLLDTAMINNSRNLNIQLKWKRPPENEAPPVPFLHAQRYVSGYGLQKGELSTLLYNTHPYRAFPVLLLDTVPWYLRLYVHTLTITSKGKENKPSYIHYQPAQDRLQPHLLEMLIQLPANSVTKVSIQFERALLKWTEYTPDPNHGFYVSPSVLSALVPSMVAAKPVDWEESPLFNSLFPVSDGSNYFVRLYTEPLLVNLPTPDFSMPYNVICLTCTVVAVCYGSFYNLLTRTFHIEEPRTGGLAKRLANLIRRARGVPPL 0 522 544 PLLVNLPTPD FSMPYNVICLTCTVVAVCYGSFY NLLTRTFHIE +Q9Z239 MASPGHILALCVCLLSMASAEAPQEPDPFTYDYHTLRIGGLTIAGILFILGILIILSKRCRCKFNQQQRTGEPDEEEGTFRSSIRRLSSRRR 0 35 57 EPDPFTYDYH TLRIGGLTIAGILFILGILIILS KRCRCKFNQQ +Q8TEM1 MAARGRGLLLLTLSVLLAAGPSAAAAKLNIPKVLLPFTRATRVNFTLEASEGCYRWLSTRPEVASIEPLGLDEQQCSQKAVVQARLTQPARLTSIIFAEDITTGQVLRCDAIVDLIHDIQIVSTTRELYLEDSPLELKIQALDSEGNTFSTLAGLVFEWTIVKDSEADRFSDSHNALRILTFLESTYIPPSYISEMEKAAKQGDTILVSGMKTGSSKLKARIQEAVYKNVRPAEVRLLILENILLNPAYDVYLMVGTSIHYKVQKIRQGKITELSMPSDQYELQLQNSIPGPEGDPARPVAVLAQDTSMVTALQLGQSSLVLGHRSIRMQGASRLPNSTIYVVEPGYLGFTVHPGDRWVLETGRLYEITIEVFDKFSNKVYVSDNIRIETVLPAEFFEVLSSSQNGSYHRIRALKRGQTAIDAALTSVVDQDGGVHILQVPVWNQQEVEIHIPITLYPSILTFPWQPKTGAYQYTIRAHGGSGNFSWSSSSHLVATVTVKGVMTTGSDIGFSVIQAHDVQNPLHFGEMKVYVIEPHSMEFAPCQVEARVGQALELPLRISGLMPGGASEVVTLSDCSHFDLAVEVENQGVFQPLPGRLPPGSEHCSGIRVKAEAQGSTTLLVSYRHGHVHLSAKITIAAYLPLKAVDPSSVALVTLGSSKEMLFEGGPRPWILEPSKFFQNVTAEDTDSIGLALFAPHSSRNYQQHWILVTCQALGEQVIALSVGNKPSLTNPFPAVEPAVVKFVCAPPSRLTLAPVYTSPQLDMSCPLLQQNKQVVPVSSHRNPRLDLAAYDQEGRRFDNFSSLSIQWESTRPVLASIEPELPMQLVSQDDESGQKKLHGLQAILVHEASGTTAITATATGYQESHLSSARTKQPHDPLVPLSASIELILVEDVRVSPEEVTIYNHPGIQAELRIREGSGYFFLNTSTADVVKVAYQEARGVAMVHPLLPGSSTIMIHDLCLVFPAPAKAVVYVSDIQELYIRVVDKVEIGKTVKAYVRVLDLHKKPFLAKYFPFMDLKLRAASPIITLVALDEALDNYTITFLIRGVAIGQTSLTASVTNKAGQRINSAPQQIEVFPPFRLMPRKVTLLIGATMQVTSEGGPQPQSNILFSISNESVALVSAAGLVQGLAIGNGTVSGLVQAVDAETGKVVIISQDLVQVEVLLLRAVRIRAPIMRMRTGTQMPIYVTGITNHQNPFSFGNAVPGLTFHWSVTKRDVLDLRGRHHEASIRLPSQYNFAMNVLGRVKGRTGLRVVVKAVDPTSGQLYGLARELSDEIQVQVFEKLQLLNPEIEAEQILMSPNSYIKLQTNRDGAASLSYRVLDGPEKVPVVHVDEKGFLASGSMIGTSTIEVIAQEPFGANQTIIVAVKVSPVSYLRVSMSPVLHTQNKEALVAVPLGMTVTFTVHFHDNSGDVFHAHSSVLNFATNRDDFVQIGKGPTNNTCVVRTVSVGLTLLRVWDAEHPGLSDFMPLPVLQAISPELSGAMVVGDVLCLATVLTSLEGLSGTWSSSANSILHIDPKTGVAVARAVGSVTVYYEVAGHLRTYKEVVVSVPQRIMARHLHPIQTSFQEATASKVIVAVGDRSSNLRGECTPTQREVIQALHPETLISCQSQFKPAVFDFPSQDVFTVEPQFDTALGQYFCSITMHRLTDKQRKHLSMKKTALVVSASLSSSHFSTEQVGAEVPFSPGLFADQAEILLSNHYTSSEIRVFGAPEVLENLEVKSGSPAVLAFAKEKSFGWPSFITYTVGVLDPAAGSQGPLSTTLTFSSPVTNQAIAIPVTVAFVVDRRGPGPYGASLFQHFLDSYQVMFFTLFALLAGTAVMIIAYHTVCTPRDLAVPAALTPRASPGHSPHYFAASSPTSPNALPPARKASPPSGLWSPAYASH 0 1809 1831 LFQHFLDSYQ VMFFTLFALLAGTAVMIIAYHTV CTPRDLAVPA +P0DTF9 MCWLRAWGQILLPVFLSLFLIQLLISFSENGFIHSPRNNQKPRDGNEEECAVKKSCQLCTEDKKCVWCSEEKACKKYCFPYFGCRFSSIYWLNCKVDMFGIMMLLLIAVLITGFVWYCCAYHFYLQDLNRNRVYFYGRRETVPIHDRSATVYDE 0 98 120 SIYWLNCKVD MFGIMMLLLIAVLITGFVWYCCA YHFYLQDLNR +Q9NXS2 MRSGGRGRPRLRLGERGLMEPLLPPKRRLLPRVRLLPLLLALAVGSAFYTIWSGWHRRTEELPLGRELRVPLIGSLPEARLRRVVGQLDPQRLWSTYLRPLLVVRTPGSPGNLQVRKFLEATLRSLTAGWHVELDPFTASTPLGPVDFGNVVATLDPRAARHLTLACHYDSKLFPPGSTPFVGATDSAVPCALLLELAQALDLELSRAKKQAAPVTLQLLFLDGEEALKEWGPKDSLYGSRHLAQLMESIPHSPGPTRIQAIELFMLLDLLGAPNPTFYSHFPRTVRWFHRLRSIEKRLHRLNLLQSHPQEVMYFQPGEPFGSVEDDHIPFLRRGVPVLHLISTPFPAVWHTPADTEVNLHPPTVHNLCRILAVFLAEYLGL 0 33 55 LPPKRRLLPR VRLLPLLLALAVGSAFYTIWSGW HRRTEELPLG +O75787 MAVFVVLLALVAGVLGNEFSILKSPGSVVFRNGNWPIPGERIPDVAALSMGFSVKEDLSWPGLAVGNLFHRPRATVMVMVKGVNKLALPPGSVISYPLENAVPFSLDSVANSIHSLFSEETPVVLQLAPSEERVYMVGKANSVFEDLSVTLRQLRNRLFQENSVLSSLPLNSLSRNNEVDLLFLSELQVLHDISSLLSRHKHLAKDHSPDLYSLELAGLDEIGKRYGEDSEQFRDASKILVDALQKFADDMYSLYGGNAVVELVTVKSFDTSLIRKTRTILEAKQAKNPASPYNLAYKYNFEYSVVFNMVLWIMIALALAVIITSYNIWNMDPGYDSIIYRMTNQKIRMD 0 309 331 YNFEYSVVFN MVLWIMIALALAVIITSYNIWNM DPGYDSIIYR +Q9HBV2 MSPRGTGCSAGLLMTVGWLLLAGLQSARGTNVTAAVQDAGLAHEGEGEEETENNDSETAENYAPPETEDVSNRNVVKEVEFGMCTVTCGIGVREVILTNGCPGGESKCVVRVEECRGPTDCGWGKPISESLESVRLACIHTSPLNRFKYMWKLLRQDQQSIILVNDSAILEVRKESHPLAFECDTLDNNEIVATIKFTVYTSSELQMRRSSLPATDAALIFVLTIGVIICVFIIFLLIFIIINWAAVKAFWGAKASTPEVQSEQSSVRYKDSTSLDQLPTEMPGEDDALSEWNE 0 217 239 MRRSSLPATD AALIFVLTIGVIICVFIIFLLIF IIINWAAVKA +Q96BY9 MAAACGPGAAGYCLLLGLHLFLLTAGPALGWNDPDRMLLRDVKALTLHYDRYTTSRRLDPIPQLKCVGGTAGCDSYTPKVIQCQNKGWDGYDVQWECKTDLDIAYKFGKTVVSCEGYESSEDQYVLRGSCGLEYNLDYTELGLQKLKESGKQHGFASFSDYYYKWSSADSCNMSGLITIVVLLGIAFVVYKLFLSDGQYSPPPYSEYPPFSHRYQRFTNSAGPPPPGFKSEFTGPQNTGHGATSGFGSAFTGQQGYENSGPGFWTGLGTGGILGYLFGSNRAATPFSDSWYYPSYPPSYPGTWNRAYSPLHGGSGSYSVCSNSDTKTRTASGYGGTRRR 0 172 194 YYKWSSADSC NMSGLITIVVLLGIAFVVYKLFL SDGQYSPPPY +Q9WTM3 MPRAPHSMPLLLLLLLLSSLPQAQAAFPQDPTPLLTSDLQGASPSSWFRGLEDDAVAAELGLDFQRFLTLNRTLLVAARDHVFSFDLQAQEEGEGLVPNKFLTWRSQDMENCAVRGKLTDECYNYIRVLVPWNSQTLLACGTNSFSPMCRSYGITSLQQEGEELSGQARCPFDATQSTVAIFAEGSLYSATAADFQASDAVVYRSLGPQPPLRSAKYDSKWLREPHFVYALEHGEHVYFFFREVSVEDARLGRVQFSRVARVCKRDMGGSPRALDRHWTSFLKLRLNCSVPGDSTFYFDVLQSLTGPVNLHGRSALFGVFTTQTNSIPGSAVCAFYLDDIERGFEGKFKEQRSLDGAWTPVSEDKVPSPRPGSCAGVGAAASFSSSQDLPDDVLLFIKAHPLLDPAVPPATHQPLLTLTSRALLTQVAVDGMAGPHRNTTVLFLGSNDGTVLKVLPPGGQSLGSEPIVLEEIDAYSHARCSGKRSPRAARRIIGLELDTEGHRLFVAFPGCIVYLSLSRCARHGACQRSCLASLDPYCGWHRSRGCMSIRGPGGTDVDLTGNQESTEHGDCQDGATGSQSGPGDSAYGVRRDLSPASASRSIPIPLLLACVAAAFALGASVSGLLVSCACRRANRRRSKDIETPGLPRPLSLRSLARLHGGGPEPPPPPKDGDAAQTPQLYTTFLPPPDGGSPPELACLPTPETTPELPVKHLRASGGPWEWNQNGNNASEGPGRPPRGCSGAGGPAPRVLVRPPPPGCPGQAVEVTTLEELLRYLHGPQPPRKGSEPLASAPFTSRPPASEPGASLFVDSSPMPRDGVPPLRLDVPPEGKRAAPSGRPALSAPAPRLGVGGSRRLPFPTHRAPPGLLTRVPSGGPARYSGGPGRHLLYLGRPEGHRGRSLKRVDVKSPLSPKPPLASPPQPAPHGGHFNF 0 604 626 SPASASRSIP IPLLLACVAAAFALGASVSGLLV SCACRRANRR +Q6UWI4 MWGARRSSVSSSWNAASLLQLLLAALLAAGARASGEYCHGWLDAQGVWRIGFQCPERFDGGDATICCGSCALRYCCSSAEARLDQGGCDNDRQQGAGEPGRADKDGPDGSAVPIYVPFLIVGSVFVAFIILGSLVAACCCRCLRPKQDPQQSRAPGGNRLMETIPMIPSASTSRGSSSRQSSTAASSSSSANSGARAPPTRSQTNCCLPEGTMNNVYVNMPTNFSVLNCQQATQIVPHQGQYLHPPYVGYTVQHDSVPMTAVPPFMDGLQPGYRQIQSPFPHTNSEQKMYPAVTV 0 114 136 KDGPDGSAVP IYVPFLIVGSVFVAFIILGSLVA ACCCRCLRPK +Q96DD7 MPPAGLRRAAPLTAIALLVLGAPLVLAGEDCLWYLDRNGSWHPGFNCEFFTFCCGTCYHRYCCRDLTLLITERQQKHCLAFSPKTIAGIASAVILFVAVVATTICCFLCSCCYLYRRRQQLQSPFEGQEIPMTGIPVQPVYPYPQDPKAGPAPPQPGFIYPPSGPAPQYPLYPAGPPVYNPAAPPPYMPPQPSYPGA 0 86 108 KHCLAFSPKT IAGIASAVILFVAVVATTICCFL CSCCYLYRRR +Q9D3G2 MWSLWSLLLFEALLPVVVVSVQVLSKVGDSELLVAECPPGFQVREAIWRSLWPSEELLATFFRGSLETLYHSRFLGRVQLYDNLSLELGPLKPGDSGNFSVLMVDTGGQTWTQTLYLKVYDAVPKPEVQVFTAAAEETQPLNTCQVFLSCWAPNISDITYSWRWEGTVDFNGEVRSHFSNGQVLSVSLGLGDKDVAFTCIASNPVSWDMTTVTPWESCHHEAASGKASYKDVLLVVVPITLFLILAGLFGAWHHGLCSGKKKDACTDGVLPETENALV 0 234 256 SGKASYKDVL LVVVPITLFLILAGLFGAWHHGL CSGKKKDACT +Q9UQF0 MALPYHIFLFTVLLPSFTLTAPPPCRCMTSSSPYQEFLWRMQRPGNIDAPSYRSLSKGTPTFTAHTHMPRNCYHSATLCMHANTHYWTGKMINPSCPGGLGVTVCWTYFTQTGMSDGGGVQDQAREKHVKEVISQLTRVHGTSSPYKGLDLSKLHETLRTHTRLVSLFNTTLTGLHEVSAQNPTNCWICLPLNFRPYVSIPVPEQWNNFSTEINTTSVLVGPLVSNLEITHTSNLTCVKFSNTTYTTNSQCIRWVTPPTQIVCLPSGIFFVCGTSAYRCLNGSSESMCFLSFLVPPMTIYTEQDLYSYVISKPRNKRVPILPFVIGAGVLGALGTGIGGITTSTQFYYKLSQELNGDMERVADSLVTLQDQLNSLAAVVLQNRRALDLLTAERGGTCLFLGEECCYYVNQSGIVTEKVKEIRDRIQRRAEELRNTGPWGLLSQWMPWILPFLGPLAAIILLLLFGPCIFNLLVNFVSSRIEAVKLQMEPKMQSKTKIYRRPLDRPASPRSDVNDIKGTPPEEISAAQPLLRPNSAGSS 0 455 477 MPWILPFLGP LAAIILLLLFGPCIFNLLVNFVS SRIEAVKLQM +P37173 MGRGLLRGLWPLHIVLWTRIASTIPPHVQKSVNNDMIVTDNNGAVKFPQLCKFCDVRFSTCDNQKSCMSNCSITSICEKPQEVCVAVWRKNDENITLETVCHDPKLPYHDFILEDAASPKCIMKEKKKPGETFFMCSCSSDECNDNIIFSEEYNTSNPDLLLVIFQVTGISLLPPLGVAISVIIIFYCYRVNRQQKLSSTWETGKTRKLMEFSEHCAIILEDDRSDISSTCANNINHNTELLPIELDTLVGKGRFAEVYKAKLKQNTSEQFETVAVKIFPYEEYASWKTEKDIFSDINLKHENILQFLTAEERKTELGKQYWLITAFHAKGNLQEYLTRHVISWEDLRKLGSSLARGIAHLHSDHTPCGRPKMPIVHRDLKSSNILVKNDLTCCLCDFGLSLRLDPTLSVDDLANSGQVGTARYMAPEVLESRMNLENVESFKQTDVYSMALVLWEMTSRCNAVGEVKDYEPPFGSKVREHPCVESMKDNVLRDRGRPEIPSFWLNHQGIQMVCETLTECWDHDPEARLTAQCVAERFSELEHLDRLSGRSCSEEKIPEDGSLNTTK 0 167 189 NPDLLLVIFQ VTGISLLPPLGVAISVIIIFYCY RVNRQQKLSS +Q15399 MTSIFHFAIIFMLILQIRIQLSEESEFLVDRSKNGLIHVPKDLSQKTTILNISQNYISELWTSDILSLSKLRILIISHNRIQYLDISVFKFNQELEYLDLSHNKLVKISCHPTVNLKHLDLSFNAFDALPICKEFGNMSQLKFLGLSTTHLEKSSVLPIAHLNISKVLLVLGETYGEKEDPEGLQDFNTESLHIVFPTNKEFHFILDVSVKTVANLELSNIKCVLEDNKCSYFLSILAKLQTNPKLSNLTLNNIETTWNSFIRILQLVWHTTVWYFSISNVKLQGQLDFRDFDYSGTSLKALSIHQVVSDVFGFPQSYIYEIFSNMNIKNFTVSGTRMVHMLCPSKISPFLHLDFSNNLLTDTVFENCGHLTELETLILQMNQLKELSKIAEMTTQMKSLQQLDISQNSVSYDEKKGDCSWTKSLLSLNMSSNILTDTIFRCLPPRIKVLDLHSNKIKSIPKQVVKLEALQELNVAFNSLTDLPGCGSFSSLSVLIIDHNSVSHPSADFFQSCQKMRSIKAGDNPFQCTCELGEFVKNIDQVSSEVLEGWPDSYKCDYPESYRGTLLKDFHMSELSCNITLLIVTIVATMLVLAVTVTSLCSYLDLPWYLRMVCQWTQTRRRARNIPLEELQRNLQFHAFISYSGHDSFWVKNELLPNLEKEGMQICLHERNFVPGKSIVENIITCIEKSYKSIFVLSPNFVQSEWCHYELYFAHHNLFHEGSNSLILILLEPIPQYSIPSSYHKLKSLMARRTYLEWPKEKSKRGLFWANLRAAINIKLTEQAKK 0 582 604 MSELSCNITL LIVTIVATMLVLAVTVTSLCSYL DLPWYLRMVC +O60602 MGDHLDLLLGVVLMAGPVFGIPSCSFDGRIAFYRFCNLTQVPQVLNTTERLLLSFNYIRTVTASSFPFLEQLQLLELGSQYTPLTIDKEAFRNLPNLRILDLGSSKIYFLHPDAFQGLFHLFELRLYFCGLSDAVLKDGYFRNLKALTRLDLSKNQIRSLYLHPSFGKLNSLKSIDFSSNQIFLVCEHELEPLQGKTLSFFSLAANSLYSRVSVDWGKCMNPFRNMVLEILDVSGNGWTVDITGNFSNAISKSQAFSLILAHHIMGAGFGFHNIKDPDQNTFAGLARSSVRHLDLSHGFVFSLNSRVFETLKDLKVLNLAYNKINKIADEAFYGLDNLQVLNLSYNLLGELYSSNFYGLPKVAYIDLQKNHIAIIQDQTFKFLEKLQTLDLRDNALTTIHFIPSIPDIFLSGNKLVTLPKINLTANLIHLSENRLENLDILYFLLRVPHLQILILNQNRFSSCSGDQTPSENPSLEQLFLGENMLQLAWETELCWDVFEGLSHLQVLYLNHNYLNSLPPGVFSHLTALRGLSLNSNRLTVLSHNDLPANLEILDISRNQLLAPNPDVFVSLSVLDITHNKFICECELSTFINWLNHTNVTIAGPPADIYCVYPDSFSGVSLFSLSTEGCDEEEVLKSLKFSLFIVCTVTLTLFLMTILTVTKFRGFCFICYKTAQRLVFKDHPQGTEPDMYKYDAYLCFSSKDFTWVQNALLKHLDTQYSDQNRFNLCFEERDFVPGENRIANIQDAIWNSRKIVCLVSRHFLRDGWCLEAFSYAQGRCLSDLNSALIMVVVGSLSQYQLMKHQSIRGFVQKQQYLRWPEDFQDVGWFLHKLSQQILKKEKEKKKDNNIPLQTVATIS 0 644 666 VLKSLKFSLF IVCTVTLTLFLMTILTVTKFRGF CFICYKTAQR +Q9NYK1 MVFPMWTLKRQILILFNIILISKLLGARWFPKTLPCDVTLDVPKNHVIVDCTDKHLTEIPGGIPTNTTNLTLTINHIPDISPASFHRLDHLVEIDFRCNCVPIPLGSKNNMCIKRLQIKPRSFSGLTYLKSLYLDGNQLLEIPQGLPPSLQLLSLEANNIFSIRKENLTELANIEILYLGQNCYYRNPCYVSYSIEKDAFLNLTKLKVLSLKDNNVTAVPTVLPSTLTELYLYNNMIAKIQEDDFNNLNQLQILDLSGNCPRCYNAPFPCAPCKNNSPLQIPVNAFDALTELKVLRLHSNSLQHVPPRWFKNINKLQELDLSQNFLAKEIGDAKFLHFLPSLIQLDLSFNFELQVYRASMNLSQAFSSLKSLKILRIRGYVFKELKSFNLSPLHNLQNLEVLDLGTNFIKIANLSMFKQFKRLKVIDLSVNKISPSGDSSEVGFCSNARTSVESYEPQVLEQLHYFRYDKYARSCRFKNKEASFMSVNESCYKYGQTLDLSKNSIFFVKSSDFQHLSFLKCLNLSGNLISQTLNGSEFQPLAELRYLDFSNNRLDLLHSTAFEELHKLEVLDISSNSHYFQSEGITHMLNFTKNLKVLQKLMMNDNDISSSTSRTMESESLRTLEFRGNHLDVLWREGDNRYLQLFKNLLKLEELDISKNSLSFLPSGVFDGMPPNLKNLSLAKNGLKSFSWKKLQCLKNLETLDLSHNQLTTVPERLSNCSRSLKNLILKNNQIRSLTKYFLQDAFQLRYLDLSSNKIQMIQKTSFPENVLNNLKMLLLHHNRFLCTCDAVWFVWWVNHTEVTIPYLATDVTCVGPGAHKGQSVISLDLYTCELDLTNLILFSLSISVSLFLMVMMTASHLYFWDVWYIYHFCKAKIKGYQRLISPDCCYDAFIVYDTKDPAVTEWVLAELVAKLEDPREKHFNLCLEERDWLPGQPVLENLSQSIQLSKKTVFVMTDKYAKTENFKIAFYLSHQRLMDEKVDVIILIFLEKPFQKSKFLQLRKRLCGSSVLEWPTNPQAHPYFWQCLKNALATDNHVAYSQVFKETV 0 843 865 CELDLTNLIL FSLSISVSLFLMVMMTASHLYFW DVWYIYHFCK +Q4KMG9 MGVRVHVVAASALLYFILLSGTRCEENCGNPEHCLTTDWVHLWYIWLLVVIGALLLLCGLTSLCFRCCCLSRQQNGEDGGPPPCEVTVIAFDHDSTLQSTITSLQSVFGPAARRILAVAHSHSSLGQLPSSLDTLPGYEEALHMSRFTVAMCGQKAPDLPPVPEEKQLPPTEKESTRIVDSWN 0 42 64 EHCLTTDWVH LWYIWLLVVIGALLLLCGLTSLC FRCCCLSRQQ +Q8NEW7 MAGWPGAGPLCVLGGAALGVCLAGVAGQLVEPSTAPPKPKPPPLTKETVVFWDMRLWHVVGIFSLFVLSIIITLCCVFNCRVPRTRKEIEARYLQRKAAKMYTDKLETVPPLNELTEVPGEDKKKKKKKKKDSVDTVAIKVEEDEKNEAKKKKGEK 0 56 78 KETVVFWDMR LWHVVGIFSLFVLSIIITLCCVF NCRVPRTRKE +A2RUT3 MLHVLASLPLLLLLVTSASTHAWSRPLWYQVGLDLQPWGCQPKSVEGCRGGLSCPGYWLGPGASRIYPVAAVMITTTMLMICRKILQGRRRSQATKGEHPQVTTEPCGPWKRRAPISDHTLLRGVLHMLDALLVHIEGHLRHLATQRQIQIKGTSTQSG 0 64 86 CPGYWLGPGA SRIYPVAAVMITTTMLMICRKIL QGRRRSQATK +B7ZWI3 MLDTWVWGTLTLTFGLLSSLQGVSFNETANTCDILNCPKGFTCCVKECCPERKVWDPANDRFRFLVILACIIFPILFICALVSLFCPNCTELQHDVRRVDHQTPIEPPSIAPLESIWVTSLDPPPPYSQVVQMTPPTEPPPPYSLRPEGPAGQMRGRAYATL 0 64 86 VWDPANDRFR FLVILACIIFPILFICALVSLFC PNCTELQHDV +O35305 MAPRARRRRQLPAPLLALCVLLVPLQVTLQVTPPCTQERHYEHLGRCCSRCEPGKYLSSKCTPTSDSVCLPCGPDEYLDTWNEEDKCLLHKVCDAGKALVAVDPGNHTAPRRCACTAGYHWNSDCECCRRNTECAPGFGAQHPLQLNKDTVCTPCLLGFFSDVFSSTDKCKPWTNCTLLGKLEAHQGTTESDVVCSSSMTLRRPPKEAQAYLPSLIVLLLFISVVVVAAIIFGVYYRKGGKALTANLWNWVNDACSSLSGNKESSGDRCAGSHSATSSQQEVCEGILLMTREEKMVPEDGAGVCGPVCAAGGPWAEVRDSRTFTLVSEVETQGDLSRKIPTEDEYTDRPSQPSTGSLLLIQQGSKSIPPFQEPLEVGENDSLSQCFTGTESTVDSEGCDFTEPPSRTDSMPVSPEKHLTKEIEGDSCLPWVVSSNSTDGYTGSGNTPGEDHEPFPGSLKCGPLPQCAYSMGFPSEAAASMAEAGVRPQDRADERGASGSGSSPSDQPPASGNVTGNSNSTFISSGQVMNFKGDIIVVYVSQTSQEGPGSAEPESEPVGRPVQEETLAHRDSFAGTAPRFPDVCATGAGLQEQGAPRQKDGTSRPVQEQGGAQTSLHTQGSGQCAE 0 212 234 RRPPKEAQAY LPSLIVLLLFISVVVVAAIIFGV YYRKGGKALT +P36941 MLLPWATSAPGLAWGPLVLGLFGLLAASQPQAVPPYASENQTCRDQEKEYYEPQHRICCSRCPPGTYVSAKCSRIRDTVCATCAENSYNEHWNYLTICQLCRPCDPVMGLEEIAPCTSKRKTQCRCQPGMFCAAWALECTHCELLSDCPPGTEAELKDEVGKGNNHCVPCKAGHFQNTSSPSARCQPHTRCENQGLVEAAPGTAQSDTTCKNPLEPLPPEMSGTMLMLAVLLPLAFFLLLATVFSCIWKSHPSLCRKLGSLLKRRPQGEGPNPVAGSWEPPKAHPYFPDLVQPLLPISGDVSPVSTGLPAAPVLEAGVPQQQSPLDLTREPQLEPGEQSQVAHGTNGIHVTGGSMTITGNIYIYNGPVLGGPPGPGDLPATPEPPYPIPEEGDPGPPGLSTPHQEDGKAWHLAETEHCGATPSNRGPRNQFITHD 0 226 248 PLPPEMSGTM LMLAVLLPLAFFLLLATVFSCIW KSHPSLCRKL +P25446 MLWIWAVLPLVLAGSQLRVHTQGTNSISESLKLRRRVRETDKNCSEGLYQGGPFCCQPCQPGKKKVEDCKMNGGTPTCAPCTEGKEYMDKNHYADKCRRCTLCDEEHGLEVETNCTLTQNTKCKCKPDFYCDSPGCEHCVRCASCEHGTLEPCTATSNTNCRKQSPRNRLWLLTILVLLIPLVFIYRKYRKRKCWKRRQDDPESRTSSRETIPMNASNLSLSKYIPRIAEDMTIQEAKKFARENNIKEGKIDEIMHDSIQDTAEQKVQLLLCWYQSHGKSDAYQDLIKGLKKAECRRTLDKFQDMVQKDLGKSTPDTGNENEGQCLE 0 170 187 NCRKQSPRNR LWLLTILVLLIPLVFIYR KYRKRKCWKR +Q9P2J2 MVWCLGLAVLSLVISQGADGRGKPEVVSVVGRAGESVVLGCDLLPPAGRPPLHVIEWLRFGFLLPIFIQFGLYSPRIDPDYVGRVRLQKGASLQIEGLRVEDQGWYECRVFFLDQHIPEDDFANGSWVHLTVNSPPQFQETPPAVLEVQELEPVTLRCVARGSPLPHVTWKLRGKDLGQGQGQVQVQNGTLRIRRVERGSSGVYTCQASSTEGSATHATQLLVLGPPVIVVPPKNSTVNASQDVSLACHAEAYPANLTYSWFQDNINVFHISRLQPRVRILVDGSLRLLATQPDDAGCYTCVPSNGLLHPPSASAYLTVLYPAQVTAMPPETPLPIGMPGVIRCPVRANPPLLFVSWTKDGKALQLDKFPGWSQGTEGSLIIALGNEDALGEYSCTPYNSLGTAGPSPVTRVLLKAPPAFIERPKEEYFQEVGRELLIPCSAQGDPPPVVSWTKVGRGLQGQAQVDSNSSLILRPLTKEAHGHWECSASNAVARVATSTNVYVLGTSPHVVTNVSVVALPKGANVSWEPGFDGGYLQRFSVWYTPLAKRPDRMHHDWVSLAVPVGAAHLLVPGLQPHTQYQFSVLAQNKLGSGPFSEIVLSAPEGLPTTPAAPGLPPTEIPPPLSPPRGLVAVRTPRGVLLHWDPPELVPKRLDGYVLEGRQGSQGWEVLDPAVAGTETELLVPGLIKDVLYEFRLVAFAGSFVSDPSNTANVSTSGLEVYPSRTQLPGLLPQPVLAGVVGGVCFLGVAVLVSILAGCLLNRRRAARRRRKRLRQDPPLIFSPTGKSAAPSALGSGSPDSVAKLKLQGSPVPSLRQSLLWGDPAGTPSPHPDPPSSRGPLPLEPICRGPDGRFVMGPTVAAPQERSGREQAEPRTPAQRLARSFDCSSSSPSGAPQPLCIEDISPVAPPPAAPPSPLPGPGPLLQYLSLPFFREMNVDGDWPPLEEPSPAAPPDYMDTRRCPTSSFLRSPETPPVSPRESLPGAVVGAGATAEPPYTALADWTLRERLLPGLLPAAPRGSLTSQSSGRGSASFLRPPSTAPSAGGSYLSPAPGDTSSWASGPERWPRREHVVTVSKRRNTSVDENYEWDSEFPGDMELLETLHLGLASSRLRPEAEPELGVKTPEEGCLLNTAHVTGPEARCAALREEFLAFRRRRDATRARLPAYRQPVPHPEQATLL 0 738 760 PGLLPQPVLA GVVGGVCFLGVAVLVSILAGCLL NRRRAARRRR +Q96J42 MVPAAGRRPPRVMRLLGWWQVLLWVLGLPVRGVEVAEESGRLWSEEQPAHPLQVGAVYLGEEELLHDPMGQDRAAEEANAVLGLDTQGDHMVMLSVIPGEAEDKVSSEPSGVTCGAGGAEDSRCNVRESLFSLDGAGAHFPDREEEYYTEPEVAESDAAPTEDSNNTESLKSPKVNCEERNITGLENFTLKILNMSQDLMDFLNPNGSDCTLVLFYTPWCRFSASLAPHFNSLPRAFPALHFLALDASQHSSLSTRFGTVAVPNILLFQGAKPMARFNHTDRTLETLKIFIFNQTGIEAKKNVVVTQADQIGPLPSTLIKSVDWLLVFSLFFLISFIMYATIRTESIRWLIPGQEQEHVE 0 324 342 LPSTLIKSVD WLLVFSLFFLISFIMYATI RTESIRWLIP +P0DPA2 MRVGGAFHLLLVCLSPALLSAVRINGDGQEVLYLAEGDNVRLGCPYVLDPEDYGPNGLDIEWMQVNSDPAHHRENVFLSYQDKRINHGSLPHLQQRVRFAASDPSQYDASINLMNLQVSDTATYECRVKKTTMATRKVIVTVQARPAVPMCWTEGHMTYGNDVVLKCYASGGSQPLSYKWAKISGHHYPYRAGSYTSQHSYHSELSYQESFHSSINQGLNNGDLVLKDISRADDGLYQCTVANNVGYSVCVVEVKVSDSRRIGVIIGIVLGSLLALGCLAVGIWGLVCCCCGGSGAGGARGAFGYGNGGGVGGGACGDLASEIREDAVAPGCKASGRGSRVTHLLGYPTQNVSRSLRRKYAPPPCGGPEDVALAPCTAAAACEAGPSPVYVKVKSAEPADCAEGPVQCKNGLLV 0 265 287 KVSDSRRIGV IIGIVLGSLLALGCLAVGIWGLV CCCCGGSGAG diff --git a/aaanalysis/_data/benchmarks/DOM_GSEC_PU.tsv b/aaanalysis/_data/benchmarks/DOM_GSEC_PU.tsv new file mode 100644 index 00000000..39df4b17 --- /dev/null +++ b/aaanalysis/_data/benchmarks/DOM_GSEC_PU.tsv @@ -0,0 +1,695 @@ +entry sequence label tmd_start tmd_stop jmd_n tmd jmd_c +P05067 MLPGLALLLLAAWTARALEVPTDGNAGLLAEPQIAMFCGRLNMHMNVQNGKWDSDPSGTKTCIDTKEGILQYCQEVYPELQITNVVEANQPVTIQNWCKRGRKQCKTHPHFVIPYRCLVGEFVSDALLVPDKCKFLHQERMDVCETHLHWHTVAKETCSEKSTNLHDYGMLLPCGIDKFRGVEFVCCPLAEESDNVDSADAEEDDSDVWWGGADTDYADGSEDKVVEVAEEEEVAEVEEEEADDDEDDEDGDEVEEEAEEPYEEATERTTSIATTTTTTTESVEEVVREVCSEQAETGPCRAMISRWYFDVTEGKCAPFFYGGCGGNRNNFDTEEYCMAVCGSAMSQSLLKTTQEPLARDPVKLPTTAASTPDAVDKYLETPGDENEHAHFQKAKERLEAKHRERMSQVMREWEEAERQAKNLPKADKKAVIQHFQEKVESLEQEAANERQQLVETHMARVEAMLNDRRRLALENYITALQAVPPRPRHVFNMLKKYVRAEQKDRQHTLKHFEHVRMVDPKKAAQIRSQVMTHLRVIYERMNQSLSLLYNVPAVAEEIQDEVDELLQKEQNYSDDVLANMISEPRISYGNDALMPSLTETKTTVELLPVNGEFSLDDLQPWHSFGADSVPANTENEVEPVDARPAADRGLTTRPGSGLTNIKTEEISEVKMDAEFRHDSGYEVHHQKLVFFAEDVGSNKGAIIGLMVGGVVIATVIVITLVMLKKKQYTSIHHGVVEVDAAVTPEERHLSKMQQNGYENPTYKFFEQMQN 1 701 723 FAEDVGSNKG AIIGLMVGGVVIATVIVITLVML KKKQYTSIHH +P14925 MAGRARSGLLLLLLGLLALQSSCLAFRSPLSVFKRFKETTRSFSNECLGTIGPVTPLDASDFALDIRMPGVTPKESDTYFCMSMRLPVDEEAFVIDFKPRASMDTVHHMLLFGCNMPSSTGSYWFCDEGTCTDKANILYAWARNAPPTRLPKGVGFRVGGETGSKYFVLQVHYGDISAFRDNHKDCSGVSVHLTRVPQPLIAGMYLMMSVDTVIPPGEKVVNADISCQYKMYPMHVFAYRVHTHHLGKVVSGYRVRNGQWTLIGRQNPQLPQAFYPVEHPVDVTFGDILAARCVFTGEGRTEATHIGGTSSDEMCNLYIMYYMEAKYALSFMTCTKNVAPDMFRTIPAEANIPIPVKPDMVMMHGHHKEAENKEKSALMQQPKQGEEEVLEQGDFYSLLSKLLGEREDVHVHKYNPTEKTESGSDLVAEIANVVQKKDLGRSDAREGAEHEEWGNAILVRDRIHRFHQLESTLRPAESRAFSFQQPGEGPWEPEPSGDFHVEEELDWPGVYLLPGQVSGVALDSKNNLVIFHRGDHVWDGNSFDSKFVYQQRGLGPIEEDTILVIDPNNAEILQSSGKNLFYLPHGLSIDTDGNYWVTDVALHQVFKLDPHSKEGPLLILGRSMQPGSDQNHFCQPTDVAVEPSTGAVFVSDGYCNSRIVQFSPSGKFVTQWGEESSGSSPRPGQFSVPHSLALVPHLDQLCVADRENGRIQCFKTDTKEFVREIKHASFGRNVFAISYIPGFLFAVNGKPYFGDQEPVQGFVMNFSSGEIIDVFKPVRKHFDMPHDIVASEDGTVYIGDAHTNTVWKFTLTEKMEHRSVKKAGIEVQEIKEAEAVVEPKVENKPTSSELQKMQEKQKLSTEPGSGVSVVLITTLLVIPVLVLLAIVMFIRWKKSRAFGDHDRKLESSSGRVLGRFRGKGSGGLNLGNFFASRKGYSRKGFDRVSTEGSDQEKDEDDGTESEEEYSAPLPKPAPSS 1 868 890 KLSTEPGSGV SVVLITTLLVIPVLVLLAIVMFI RWKKSRAFGD +P70180 MRSLLLFTFSACVLLARVLLAGGASSGAGDTRPGSRRRAREALAAQKIEVLVLLPRDDSYLFSLARVRPAIEYALRSVEGNGTGRKLLPPGTRFQVAYEDSDCGNRALFSLVDRVAAARGAKPDLILGPVCEYAAAPVARLASHWDLPMLSAGALAAGFQHKDTEYSHLTRVAPAYAKMGEMMLALFRHHHWSRAALVYSDDKLERNCYFTLEGVHEVFQEEGLHTSAYNFDETKDLDLDDIVRYIQGSERVVIMCASGDTIRRIMLAVHRHGMTSGDYAFFNIELFNSSSYGDGSWRRGDKHDSEAKQAYSSLQTVTLLRTVKPEFEKFSMEVKSSVEKQGLNEEDYVNMFVEGFHDAILLYVLALHEVLRAGYSKKDGGKIIQQTWNRTFEGIAGQVSIDANGDRYGDFSVVAMTDTEAGTQEVIGDYFGKEGRFQMRSNVKYPWGPLKLRLDETRIVEHTNSSPCKSSGGLEESAVTGIVVGALLGAGLLMAFYFFRKKYRITIERRNQQEESNIGKHRELREDSIRSHFSVA 1 477 499 PCKSSGGLEE SAVTGIVVGALLGAGLLMAFYFF RKKYRITIER +Q03157 MGPTSPAARGQGRRWRPPPLPLLLPLSLLLLRAQLAVGNLAVGSPSAAEAPGSAQVAGLCGRLTLHRDLRTGRWEPDPQRSRRCLLDPQRVLEYCRQMYPELHIARVEQAAQAIPMERWCGGTRSGRCAHPHHEVVPFHCLPGEFVSEALLVPEGCRFLHQERMDQCESSTRRHQEAQEACSSQGLILHGSGMLLPCGSDRFRGVEYVCCPPPATPNPSGMAAGDPSTRSWPLGGRAEGGEDEEEVESFPQPVDDYFVEPPQAEEEEEEEEERAPPPSSHTPVMVSRVTPTPRPTDGVDVYFGMPGEIGEHEGFLRAKMDLEERRMRQINEVMREWAMADSQSKNLPKADRQALNEHFQSILQTLEEQVSGERQRLVETHATRVIALINDQRRAALEGFLAALQGDPPQAERVLMALRRYLRAEQKEQRHTLRHYQHVAAVDPEKAQQMRFQVQTHLQVIEERMNQSLGLLDQNPHLAQELRPQIQELLLAEHLGPSELDASVPGSSSEDKGSLQPPESKDDPPVTLPKGSTDQESSSSGREKLTPLEQYEQKVNASAPRGFPFHSSDIQRDELAPSGTGVSREALSGLLIMGAGGGSLIVLSLLLLRKKKPYGTISHGVVEVDPMLTLEEQQLRELQRHGYENPTYRFLEERP 1 585 607 APSGTGVSRE ALSGLLIMGAGGGSLIVLSLLLL RKKKPYGTIS +Q06481 MAATGTAAAAATGRLLLLLLVGLTAPALALAGYIEALAANAGTGFAVAEPQIAMFCGKLNMHVNIQTGKWEPDPTGTKSCFETKEEVLQYCQEMYPELQITNVMEANQRVSIDNWCRRDKKQCKSRFVTPFKCLVGEFVSDVLLVPEKCQFFHKERMEVCENHQHWHTVVKEACLTQGMTLYSYGMLLPCGVDQFHGTEYVCCPQTKIIGSVSKEEEEEDEEEEEEEDEEEDYDVYKSEFPTEADLEDFTEAAVDEDDEDEEEGEEVVEDRDYYYDTFKGDDYNEENPTEPGSDGTMSDKEITHDVKAVCSQEAMTGPCRAVMPRWYFDLSKGKCVRFIYGGCGGNRNNFESEDYCMAVCKAMIPPTPLPTNDVDVYFETSADDNEHARFQKAKEQLEIRHRNRMDRVKKEWEEAELQAKNLPKAERQTLIQHFQAMVKALEKEAASEKQQLVETHLARVEAMLNDRRRMALENYLAALQSDPPRPHRILQALRRYVRAENKDRLHTIRHYQHVLAVDPEKAAQMKSQVMTHLHVIEERRNQSLSLLYKVPYVAQEIQEEIDELLQEQRADMDQFTASISETPVDVRVSSEESEEIPPFHPFHPFPALPENEDTQPELYHPMKKGSGVGEQDGGLIGAEEKVINSKNKVDENMVIDETLDVKEMIFNAERVGGLEEERESVGPLREDFSLSSSALIGLLVIAVAIATVIVISLVMLRKRQYGTISHGIVEVDPMLTPEERHLNKMQNHGYENPTYKYLEQMQI 1 694 716 LREDFSLSSS ALIGLLVIAVAIATVIVISLVML RKRQYGTISH +P35613 MAAALFVLLGFALLGTHGASGAAGFVQAPLSQQRWVGGSVELHCEAVGSPVPEIQWWFEGQGPNDTCSQLWDGARLDRVHIHATYHQHAASTISIDTLVEEDTGTYECRASNDPDRNHLTRAPRVKWVRAQAVVLVLEPGTVFTTVEDLGSKILLTCSLNDSATEVTGHRWLKGGVVLKEDALPGQKTEFKVDSDDQWGEYSCVFLPEPMGTANIQLHGPPRVKAVKSSEHINEGETAMLVCKSESVPPVTDWAWYKITDSEDKALMNGSESRFFVSSSQGRSELHIENLNMEADPGQYRCNGTSSKGSDQAIITLRVRSHLAALWPFLGIVAEVLVLVTIIFIYEKRRKPEDVLDDDDAGSAPLKSSGQHQNDKGKNVRQRNSS 1 323 345 IITLRVRSHL AALWPFLGIVAEVLVLVTIIFIY EKRRKPEDVL +P35070 MDRAARCSGASSLPLLLALALGLVILHCVVADGNSTRSPETNGLLCGDPEENCAATTTQSKRKGHFSRCPKQYKHYCIKGRCRFVVAEQTPSCVCDEGYIGARCERVDLFYLRGDRGQILVICLIAVMVVFIILVIGVCTCCHPLRKRRKRKKKEEEMETLGKDITPINEDIEETNIA 1 119 141 LFYLRGDRGQ ILVICLIAVMVVFIILVIGVCTC CHPLRKRRKR +P09803 MGARCRSFSALLLLLQVSSWLCQELEPESCSPGFSSEVYTFPVPERHLERGHVLGRVRFEGCTGRPRTAFFSEDSRFKVATDGTITVKRHLKLHKLETSFLVRARDSSHRELSTKVTLKSMGHHHHRHHHRDPASESNPELLMFPSVYPGLRRQKRDWVIPPISCPENEKGEFPKNLVQIKSNRDKETKVFYSITGQGADKPPVGVFIIERETGWLKVTQPLDREAIAKYILYSHAVSSNGEAVEDPMEIVITVTDQNDNRPEFTQEVFEGSVAEGAVPGTSVMKVSATDADDDVNTYNAAIAYTIVSQDPELPHKNMFTVNRDTGVISVLTSGLDRESYPTYTLVVQAADLQGEGLSTTAKAVITVKDINDNAPVFNPSTYQGQVPENEVNARIATLKVTDDDAPNTPAWKAVYTVVNDPDQQFVVVTDPTTNDGILKTAKGLDFEAKQQYILHVRVENEEPFEGSLVPSTATVTVDVVDVNEAPIFMPAERRVEVPEDFGVGQEITSYTAREPDTFMDQKITYRIWRDTANWLEINPETGAIFTRAEMDREDAEHVKNSTYVALIIATDDGSPIATGTGTLLLVLLDVNDNAPIPEPRNMQFCQRNPQPHIITILDPDLPPNTSPFTAELTHGASVNWTIEYNDAAQESLILQPRKDLEIGEYKIHLKLADNQNKDQVTTLDVHVCDCEGTVNNCMKAGIVAAGLQVPAILGILGGILALLILILLLLLFLRRRTVVKEPLLPPDDDTRDNVYYYDEEGGGEEDQDFDLSQLHRGLDARPEVTRNDVAPTLMSVPQYRPRPANPDEIGNFIDENLKAADSDPTAPPYDSLLVFDYEGSGSEAASLSSLNSSESDQDQDYDYLNEWGNRFKKLADMYGGGEDD 1 711 733 GIVAAGLQVP AILGILGGILALLILILLLLLFL RRRTVVKEPL +P19022 MCRIAGALRTLLPLLAALLQASVEASGEIALCKTGFPEDVYSAVLSKDVHEGQPLLNVKFSNCNGKRKVQYESSEPADFKVDEDGMVYAVRSFPLSSEHAKFLIYAQDKETQEKWQVAVKLSLKPTLTEESVKESAEVEEIVFPRQFSKHSGHLQRQKRDWVIPPINLPENSRGPFPQELVRIRSDRDKNLSLRYSVTGPGADQPPTGIFIINPISGQLSVTKPLDREQIARFHLRAHAVDINGNQVENPIDIVINVIDMNDNRPEFLHQVWNGTVPEGSKPGTYVMTVTAIDADDPNALNGMLRYRIVSQAPSTPSPNMFTINNETGDIITVAAGLDREKVQQYTLIIQATDMEGNPTYGLSNTATAVITVTDVNDNPPEFTAMTFYGEVPENRVDIIVANLTVTDKDQPHTPAWNAVYRISGGDPTGRFAIQTDPNSNDGLVTVVKPIDFETNRMFVLTVAAENQVPLAKGIQHPPQSTATVSVTVIDVNENPYFAPNPKIIRQEEGLHAGTMLTTFTAQDPDRYMQQNIRYTKLSDPANWLKIDPVNGQITTIAVLDRESPNVKNNIYNATFLASDNGIPPMSGTGTLQIYLLDINDNAPQVLPQEAETCETPDPNSINITALDYDIDPNAGPFAFDLPLSPVTIKRNWTITRLNGDFAQLNLKIKFLEAGIYEVPIIITDSGNPPKSNISILRVKVCQCDSNGDCTDVDRIVGAGLGTGAIIAILLCIIILLILVLMFVVWMKRRDKERQAKQLLIDPEDDVRDNILKYDEEGGGEEDQDYDLSQLQQPDTVEPDAIKPVGIRRMDERPIHAEPQYPVRSAAPHPGDIGDFINEGLKAADNDPTAPPYDSLLVFDYEGSGSTAGSLSSLNSSSSGGEQDYDYLNDWGPRFKKLADMYGGGDD 1 724 746 RIVGAGLGTG AIIAILLCIIILLILVLMFVVWM KRRDKERQAK +P16070 MDKFWWHAAWGLCLVPLSLAQIDLNITCRFAGVFHVEKNGRYSISRTEAADLCKAFNSTLPTMAQMEKALSIGFETCRYGFIEGHVVIPRIHPNSICAANNTGVYILTSNTSQYDTYCFNASAPPEEDCTSVTDLPNAFDGPITITIVNRDGTRYVQKGEYRTNPEDIYPSNPTDDDVSSGSSSERSSTSGGYIFYTFSTVHPIPDEDSPWITDSTDRIPATTLMSTSATATETATKRQETWDWFSWLFLPSESKNHLHTTTQMAGTSSNTISAGWEPNEENEDERDRHLSFSGSGIDDDEDFISSTISTTPRAFDHTKQNQDWTQWNPSHSNPEVLLQTTTRMTDVDRNGTTAYEGNWNPEAHPPLIHHEHHEEEETPHSTSTIQATPSSTTEETATQKEQWFGNRWHEGYRQTPKEDSHSTTGTAAASAHTSHPMQGRTTPSPEDSSWTDFFNPISHPMGRGHQAGRRMDMDSSHSITLQPTANPNTGLVEDLDRTGPLSMTTQQSNSQSFSTSHEGLEEDKDHPTTSTLTSSNRNDVTGGRRDPNHSEGSTTLLEGYTSHYPHTKESRTFIPVTSAKTGSFGVTAVTVGDSNSNVNRSLSGDQDTFHPSGGSHTTHGSESDGHSHGSQEGGANTTSGPIRTPQIPEWLIILASLLALALILAVCIAVNSRRRCGQKKKLVINSGNGAVEDRKPSGLNGEASKSQEMVHLVNKESSETPDQFMTADETRNLQNVDMKIGV 1 650 672 GPIRTPQIPE WLIILASLLALALILAVCIAVNS RRRCGQKKKL +P09603 MTAPGAAGRCPPTTWLGSLLLLVCLLASRSITEEVSEYCSHMIGSGHLQSLQRLIDSQMETSCQITFEFVDQEQLKDPVCYLKKAFLLVQDIMEDTMRFRDNTPNAIAIVQLQELSLRLKSCFTKDYEEHDKACVRTFYETPLQLLEKVKNVFNETKNLLDKDWNIFSKNCNNSFAECSSQDVVTKPDCNCLYPKAIPSSDPASVSPHQPLAPSMAPVAGLTWEDSEGTEGSSLLPGEQPLHTVDPGSAKQRPPRSTCQSFEPPETPVVKDSTIGGSPQPRPSVGAFNPGMEDILDSAMGTNWVPEEASGEASEIPVPQGTELSPSRPGGGSMQTEPARPSNFLSASSPLPASAKGQQPADVTGTALPRVGPVRPTGQDWNHTPQKTDHPSALLRDPPEPGSPRISSLRPQGLSNPSTLSAQPQLSRSHSSGSVLPLGELEGRRSTRDRRSPAEPEGGPASEGAARPLPRFNSVPLTDTGHERQSEGSFSPQLQESVFHLLVPSVILVLLAVGGLLFYRWRRRSHQEPQRADSPLEQPEGSPLTQDDRQVELPV 1 496 518 EGSFSPQLQE SVFHLLVPSVILVLLAVGGLLFY RWRRRSHQEP +O94985 MLRRPAPALAPAARLLLAGLLCGGGVWAARVNKHKPWLEPTYHGIVTENDNTVLLDPPLIALDKDAPLRFAESFEVTVTKEGEICGFKIHGQNVPFDAVVVDKSTGEGVIRSKEKLDCELQKDYSFTIQAYDCGKGPDGTNVKKSHKATVHIQVNDVNEYAPVFKEKSYKATVIEGKQYDSILRVEAVDADCSPQFSQICSYEIITPDVPFTVDKDGYIKNTEKLNYGKEHQYKLTVTAYDCGKKRATEDVLVKISIKPTCTPGWQGWNNRIEYEPGTGALAVFPNIHLETCDEPVASVQATVELETSHIGKGCDRDTYSEKSLHRLCGAAAGTAELLPSPSGSLNWTMGLPTDNGHDSDQVFEFNGTQAVRIPDGVVSVSPKEPFTISVWMRHGPFGRKKETILCSSDKTDMNRHHYSLYVHGCRLIFLFRQDPSEEKKYRPAEFHWKLNQVCDEEWHHYVLNVEFPSVTLYVDGTSHEPFSVTEDYPLHPSKIETQLVVGACWQEFSGVENDNETEPVTVASAGGDLHMTQFFRGNLAGLTLRSGKLADKKVIDCLYTCKEGLDLQVLEDSGRGVQIQAHPSQLVLTLEGEDLGELDKAMQHISYLNSRQFPTPGIRRLKITSTIKCFNEATCISVPPVDGYVMVLQPEEPKISLSGVHHFARAASEFESSEGVFLFPELRIISTITREVEPEGDGAEDPTVQESLVSEEIVHDLDTCEVTVEGEELNHEQESLEVDMARLQQKGIEVSSSELGMTFTGVDTMASYEEVLHLLRYRNWHARSLLDRKFKLICSELNGRYISNEFKVEVNVIHTANPMEHANHMAAQPQFVHPEHRSFVDLSGHNLANPHPFAVVPSTATVVIVVCVSFLVFMIILGVFRIRAAHRRTMRDQDTGKENEMDWDDSALTITVNPMETYEDQHSSEEEEEEEEEEESEDGEEEDDITSAESESSEEEEGEQGDPQNATRQQQLEWDDSTLSY 1 860 882 PHPFAVVPST ATVVIVVCVSFLVFMIILGVFRI RAAHRRTMRD +Q9H4D0 MLPGRLCWVPLLLALGVGSGSGGGGDSRQRRLLAAKVNKHKPWIETSYHGVITENNDTVILDPPLVALDKDAPVPFAGEICAFKIHGQELPFEAVVLNKTSGEGRLRAKSPIDCELQKEYTFIIQAYDCGAGPHETAWKKSHKAVVHIQVKDVNEFAPTFKEPAYKAVVTEGKIYDSILQVEAIDEDCSPQYSQICNYEIVTTDVPFAIDRNGNIRNTEKLSYDKQHQYEILVTAYDCGQKPAAQDTLVQVDVKPVCKPGWQDWTKRIEYQPGSGSMPLFPSIHLETCDGAVSSLQIVTELQTNYIGKGCDRETYSEKSLQKLCGASSGIIDLLPSPSAATNWTAGLLVDSSEMIFKFDGRQGAKVPDGIVPKNLTDQFTITMWMKHGPSPGVRAEKETILCNSDKTEMNRHHYALYVHNCRLVFLLRKDFDQADTFRPAEFHWKLDQICDKEWHYYVINVEFPVVTLYMDGATYEPYLVTNDWPIHPSHIAMQLTVGACWQGGEVTKPQFAQFFHGSLASLTIRPGKMESQKVISCLQACKEGLDINSLESLGQGIKYHFNPSQSILVMEGDDIGNINRALQKVSYINSRQFPTAGVRRLKVSSKVQCFGEDVCISIPEVDAYVMVLQAIEPRITLRGTDHFWRPAAQFESARGVTLFPDIKIVSTFAKTEAPGDVKTTDPKSEVLEEMLHNLDFCDILVIGGDLDPRQECLELNHSELHQRHLDATNSTAGYSIYGVGSMSRYEQVLHHIRYRNWRPASLEARRFRIKCSELNGRYTSNEFNLEVSILHEDQVSDKEHVNHLIVQPPFLQSVHHPESRSSIQHSSVVPSIATVVIIISVCMLVFVVAMGVYRVRIAHQHFIQETEAAKESEMDWDDSALTITVNPMEKHEGPGHGEDETEGEEEEEAEEEMSSSSGSDDSEEEEEEEGMGRGRHGQNGARQAQLEWDDSTLPY 1 831 853 SSIQHSSVVP SIATVVIIISVCMLVFVVAMGVY RVRIAHQHFI +P78310 MALLLCFVLLCGVVDFARSLSITTPEEMIEKAKGETAYLPCKFTLSPEDQGPLDIEWLISPADNQKVDQVIILYSGDKIYDDYYPDLKGRVHFTSNDLKSGDASINVTNLQLSDIGTYQCKVKKAPGVANKKIHLVVLVKPSGARCYVDGSEEIGSDFKIKCEPKEGSLPLQYEWQKLSDSQKMPTSWLAEMTSSVISVKNASSEYSGTYSCTVRNRVGSDQCLLRLNVVPPSNKAGLIAGAIIGTLLALALIGLIIFCCRKKRREEKYEKEVHHDIREDVPPPKSRTSTARSYIGSNHSSLGSMSPSNMEGYSKTQYNQVPSEDFERTPQSPTLPPAKVAAPNLSRMGAIPVMIPAQSKDGSIV 1 236 258 RLNVVPPSNK AGLIAGAIIGTLLALALIGLIIF CCRKKRREEK +D3ZZK3 MAGIFYFILFSFLFGICDAVTGSRVYPANEVTLLDSRSVQGELGWIASPLEGGWEEVSIMDEKNTPIRTYQVCNVMEASQNNWLRTDWITREGAQRVYIEIKFTLRDCNSLPGVMGTCKETFNLYYYESDNDKERFIRESQFGKIDTIAADESFTQVDIGDRIMKLNTEIRDVGPLSKKGFYLAFQDVGACIALVSVRVFYKKCPLTVRNLAQFPDTITGADTSSLVEVRGSCVNNSEEKDVPKMYCGADGEWLVPIGNCLCNAGHEEQNGECQACKIGYYKALSTDATCAKCPPHSYSVWEGATSCTCDRGFFRADNDAASMPCTRPPSAPLNLISNVNETSVNLEWSSPQNTGGRQDISYNVVCKKCGAGDPSKCRPCGSGVHYTPQQNGLKTTRVSITDLLAHTNYTFEIWAVNGVSKYNPSPDQSVSVTVTTNQAAPSSIALVQAKEVTRYSVALAWLEPDRPNGVILEYEVKYYEKDQNERSYRIVRTAARNTDIKGLNPLTSYVFHVRARTAAGYGDFSEPLEVTTNTVPSRIIGDGANSTVLLVSVSGSVVLVVILIAAFVISRRRSKYSQAKQEADEEKHLNQGVRTYVDPFTYEDPNQAVREFAKEIDASCIKIEKVIGVGEFGEVCSGRLKVPGKREICVAIKTLKAGYTDKQRRDFLSEASIMGQFDHPNIIHLEGVVTKCKPVMIITEYMENGSLDAFLRKNDGRFTVIQLVGMLRGIGSGMKYLSDMSYVHRDLAARNILVNSNLVCKVSDFGMSRVLEDDPEAAYTTRGGKIPIRWTAPEAIAYRKFTSASDVWSYGIVMWEVMSYGERPYWDMSNQDVIKAIEEGYRLPPPMDCPIALHQLMLDCWQKERSDRPKFGQIVNMLDKLIRNPNSLKRTGPESSRPNTALLDPSSPEFSAVVSVGDWLQAIKMDRYKDNFTAAGYTTLEAVVHMSQDDLARIGITAITHQNKILSSVQAMRTQMQQMHGRMVPV 1 548 570 RIIGDGANST VLLVSVSGSVVLVVILIAAFVIS RRRSKYSQAK +Q14118 MRMSVGLSLLLPLSGRTFLLLLSVVMAQSHWPSEPSEAVRDWENQLEASMHSVLSDLHEAVPTVVGIPDGTAVVGRSFRVTIPTDLIASSGDIIKVSAAGKEALPSWLHWDSQSHTLEGLPLDTDKGVHYISVSATRLGANGSHIPQTSSVFSIEVYPEDHSELQSVRTASPDPGEVVSSACAADEPVTVLTVILDADLTKMTPKQRIDLLHRMRSFSEVELHNMKLVPVVNNRLFDMSAFMAGPGNAKKVVENGALLSWKLGCSLNQNSVPDIHGVEAPAREGAMSAQLGYPVVGWHIANKKPPLPKRVRRQIHATPTPVTAIGPPTTAIQEPPSRIVPTPTSPAIAPPTETMAPPVRDPVPGKPTVTIRTRGAIIQTPTLGPIQPTRVSEAGTTVPGQIRPTMTIPGYVEPTAVATPPTTTTKKPRVSTPKPATPSTDSTTTTTRRPTKKPRTPRPVPRVTTKVSITRLETASPPTRIRTTTSGVPRGGEPNQRPELKNHIDRVDAWVGTYFEVKIPSDTFYDHEDTTTDKLKLTLKLREQQLVGEKSWVQFNSNSQLMYGLPDSSHVGKHEYFMHATDKGGLSAVDAFEIHVHRRPQGDRAPARFKAKFVGDPALVLNDIHKKIALVKKLAFAFGDRNCSTITLQNITRGSIVVEWTNNTLPLEPCPKEQIAGLSRRIAEDDGKPRPAFSNALEPDFKATSITVTGSGSCRHLQFIPVVPPRRVPSEAPPTEVPDRDPEKSSEDDVYLHTVIPAVVVAAILLIAGIIAMICYRKKRKGKLTLEDQATFIKKGVPIIFADELDDSKPPPSSSMPLILQEEKAPLPPPEYPNQSVPETTPLNQDTMGEYTPLRDEDPNAPPYQPPPPFTAPMEGKGSRPKNMTPYRSPPPYVPP 1 753 775 KSSEDDVYLH TVIPAVVVAAILLIAGIIAMICY RKKRKGKLTL +Q63155 MENSLGCVWVPKLAFVLFGASLLSAHLQVTGFQIKPFTSLHFVSEPSDAVTMRGGNVLLNCSAESDRGVPVIKWKKDGLILALGMDDRKQQLPNGSLLIQNILHSRHHKPDEGLYQCEASLGDSGSIISRTAKVMVAGPLRFLSQTESITAFMGDTVLLKCEVIGDPMPTIHWQKNQQDLNPIPGDSRVVVLPSGALQISRLQPGDSGVYRCSARNPASTRTGNEAEVRILSDPGLHRQLYFLQRPSNVIAIEGKDAVLECCVSGYPPPSFTWLRGEEVIQLRSKKYSLLGGSNLLISNVTDDDSGTYTCVVTYKNENISASAELTVLVPPWFLNHPSNLYAYESMDIEFECAVSGKPVPTVNWMKNGDVVIPSDYFQIVGGSNLRILGVVKSDEGFYQCVAENEAGNAQSSAQLIVPKPAIPSSSILPSAPRDVVPVLVSSRFVRLSWRPPAEAKGNIQTFTVFFSREGDNRERALNTTQPGSLQLTVGNLKPEAMYTFRVVAYNEWGPGESSQPIKVATQPELQVPGPVENLHAVSASPTSILITWEPPAYANGPVQGYRLFCTEVSTGKEQNIEVDGLSYKLEGLKKFTEYTLRFLAYNRYGPGVSTDDITVVTLSDVPSAPPQNVSLEVVNSRSIKVSWLPPPSGTQNGFITGYKIRHRKTTRRGEMETLEPNNLWYLFTGLEKGSQYSFQVSAMTVNGTGPPSNWYTAETPENDLDESQVPDQPSSLHVRPQTNCIIMSWTPPLNPNIVVRGYIIGYGVGSPYAETVRVDSKQRYYSIERLESSSHYVISLKAFNNAGEGVPLYESATTRSITDPTDPVDYYPLLDDFPTSGPDVSTPMLPPVGVQAVALTHEAVRVSWADNSVPKNQKTSDVRLYTVRWRTSFSASAKYKSEDTTSLSYTATGLKPNTMYEFSVMVTKNRRSSTWSMTAHATTYEAAPTSAPKDLTVITREGKPRAVIVSWQPPLEANGKITAYILFYTLDKNIPIDDWIMETISGDRLTHQIMDLSLDTMYYFRIQARNAKGVGPLSDPILFRTLKVEHPDKMANDQGRHGDGGYWPVDTNLIDRSTLNEPPIGQMHPPHGSVTPQKNSNLLVITVVTVGVLTVLVVVIVAVICTRRSSAQQRKKRATHSASKRKGSQKDLRPPDLWIHHEEMEMKNIEKPAGTDPAGRGSPIQSCQDLTPVSHSQSESQMGSKSASHSGQDTEEAGSSMSTLERSLAARRATRTKLMIPMEAQSNNPAVVSAIPVPTLESAQYPGILPSPTCGYPHPQFTLRPVPFPTLSVDRGFGAGRTVSEGPTAQQQPMLPPAQPEHPSSEEAPSRTIPTACVRPTHPLRSFANPLLPPPMSAIEPKVPYTPLLSQPGPTLPKTHVKTASLGLAGKARSPLLPVSVPTAPEVSEESHKPTEDPASVYEQDDLSEQMASLEGLMKQLNAITGSAF 1 1099 1121 SVTPQKNSNL LVITVVTVGVLTVLVVVIVAVIC TRRSSAQQRK +Q61483 MGRRSALALAVVSALLCQVWSSGVFELKLQEFVNKKGLLGNRNCCRGGSGPPCACRTFFRVCLKHYQASVSPEPPCTYGSAVTPVLGVDSFSLPDGAGIDPAFSNPIRFPFGFTWPGTFSLIIEALHTDSPDDLATENPERLISRLTTQRHLTVGEEWSQDLHSSGRTDLRYSYRFVCDEHYYGEGCSVFCRPRDDAFGHFTCGDRGEKMCDPGWKGQYCTDPICLPGCDDQHGYCDKPGECKCRVGWQGRYCDECIRYPGCLHGTCQQPWQCNCQEGWGGLFCNQDLNYCTHHKPCRNGATCTNTGQGSYTCSCRPGYTGANCELEVDECAPSPCKNGASCTDLEDSFSCTCPPGFYGKVCELSAMTCADGPCFNGGRCSDNPDGGYTCHCPLGFSGFNCEKKMDLCGSSPCSNGAKCVDLGNSYLCRCQAGFSGRYCEDNVDDCASSPCANGGTCRDSVNDFSCTCPPGYTGKNCSAPVSRCEHAPCHNGATCHQRGQRYMCECAQGYGGPNCQFLLPEPPPGPMVVDLSERHMESQGGPFPWVAVCAGVVLVLLLLLGCAAVVVCVRLKLQKHQPPPEPCGGETETMNNLANCQREKDVSVSIIGATQIKNTNKKADFHGDHGAEKSSFKVRYPTVDYNLVRDLKGDEATVRDTHSKRDTKCQSQSSAGEEKIAPTLRGGEIPDRKRPESVYSTSKDTKYQSVYVLSAEKDECVIATEV 1 545 567 HMESQGGPFP WVAVCAGVVLVLLLLLGCAAVVV CVRLKLQKHQ +Q9ERC8 MWILALSLFQSFANVFSEEPHSSLYFVNASLQEVVFASTSGTLVPCPAAGIPPVTLRWYLATGEEIYDVPGIRHVHPNGTLQIFPFPPSSFSTLIHDNTYYCTAENPSGKIRSQDVHIKAVLREPYTVRVEDQKTMRGNVAVFKCIIPSSVEAYVTVVSWEKDTVSLVSGSRFLITSTGALYIKDVQNEDGLYNYRCITRHRYTGETRQSNSARLFVSDPANSAPSILDGFDHRKAMAGQRVELPCKALGHPEPDYRWLKDNMPLELSGRFQKTVTGLLIENSRPSDSGSYVCEVSNRYGTAKVIGRLYVKQPLKATISPRKVKSSVGSQVSLSCSVTGNEDQELSWYRNGEILNPGKNVRITGLNHANLIMDHMVKSDGGAYQCFVRKDKLSAQDYVQVVLEDGTPKIISAFSEKVVSPAEPVSLVCNVKGTPLPTVTWTLDDDPILKGSGHRISQMITSEGNVVSYLNISSSQVRDGGVYRCTANNSAGVVLYQARINVRGPASIRPMKNITAIAGRDTYIHCRVIGYPYYSIKWYKNANLLPFNHRQVAFENNGTLKLSDVQKEVDEGEYTCNVLVQPQLSTSQSVHVTVKVPPFIQPFEFPRFSIGQRVFIPCVVVSGDLPITITWQKDGRPIPASLGVTIDNIDFTSSLRISNLSLMHNGNYTCIARNEAAAVEHQSQLIVRVPPKFVVQPRDQDGIYGKAVILNCSAEGYPVPTIVWKFSKGAGVPQFQPIALNGRIQVLSNGSLLIKHVVEEDSGYYLCKVSNDVGADVSKSMYLTVKIPAMITSYPNTTLATQGQRKEMSCTAHGEKPIIVRWEKEDRIINPEMARYLVSTKEVGEEVISTLQILPTVREDSGFFSCHAINSYGEDRGIIQLTVQEPPDPPEIEIKDVKARTITLRWTMGFDGNSPITGYDIECKNKSDSWDSAQRTKDVSPQLNSATIIDIHPSSTYSIRMYAKNRIGKSEPSNEITITADEAAPDGPPQEVHLEPTSSQSIRVTWKAPKKHLQNGIIRGYQIGYREYSTGGNFQFNIISIDTTGDSEVYTLDNLNKFTQYGLVVQACNRAGTGPSSQEIITTTLEDVPSYPPENVQAIATSPESISISWSTLSKEALNGILQGFRVIYWANLIDGELGEIKNVTTTQPSLELDGLEKYTNYSIQVLAFTRAGDGVRSEQIFTRTKEDVPGPPAGVKAAAASASMVFVSWLPPLKLNGIIRKYTVFCSHPYPTVISEFEASPDSFSYRIPNLSRNRQYSVWVVAVTSAGRGNSSEIITVEPLAKAPARILTFSGTVTTPWMKDIVLPCKAVGDPSPAVKWMKDSNGTPSLVTIDGRRSIFSNGSFIIRTVKAEDSGYYSCVANNNWGSDEIILNLQVQVPPDQPRLTVSKTTSSSITLSWLPGDNGGSSIRGYILQYSEDNSEQWGSFPISPSERSYRLENLKCGTWYKFTLTAQNGVGPGRISEIIEAKTLGKEPQFSKEQELFASINTTRVRLNLIGWNDGGCPITSFTLEYRPFGTTVWTTAQRTSLSKSYILYDLQEATWYELQMRVCNSAGCAEKQANFATLNYDGSTIPPLIKSVVQSEEGLTTNEGLKILVTISCILVGVLLLFVLLLVVRRRRREQRLKRLRDAKSLAEMLMSKNTRTSDTLSKQQQTLRMHIDIPRAQLLIEERDTMETIDDRSTVLLTDADFGEAAKQKSLTVTHTVHYQSVSQATGPLVDVSDARPGTNPTTRRNAKAGPTARNRYASQWTLNRPHPTISAHTLTTDWRLPTPRATGSVDKESDSYSVSPSQDTDRARSSMVSTESASSTYEELARAYEHAKMEEQLRHAKFTITECFISDTSSEQLTAGTNEYTDSLTSSTPSESGICRFTASPPKPQDGGRVVNMAVPKAHRPGDLIHLPPYLRMDFLLNRGAPGTSRDLSLGQACLEPQKSRTLKRPTVLEPTPMEASSSTSSTREGQQSWQQGAVATLPQREGAELGQAAKMSSSQESLLDSRGHLKGNNPYAKSYTLV 1 1595 1617 EGLTTNEGLK ILVTISCILVGVLLLFVLLLVVR RRRREQRLKR +P54763 MAVRRLGAALLLLPLLAAVEETLMDSTTATAELGWMVHPPSGWEEVSGYDENMNTIRTYQVCNVFESSQNNWLRTKFIRRRGAHRIHVEMKFSVRDCSSIPSVPGSCKETFNLYYYEADFDLATKTFPNWMENPWVKVDTIAADESFSQVDLGGRVMKINTEVRSFGPVSRNGFYLAFQDYGGCMSLIAVRVFYRKCPRIIQNGAIFQETLSGAESTSLVAARGSCIANAEEVDVPIKLYCNGDGEWLVPIGRCMCKAGFEAVENGTVCRGCPSGTFKANQGDEACTHCPINSRTTSEGATNCVCRNGYYRADLDPLDMPCTTIPSAPQAVISSVNETSLMLEWTPPRDSGGREDLVYNIICKSCGSGRGACTRCGDNVQYAPRQLGLTEPRIYISDLLAHTQYTFEIQAVNGVTDQSPFSPQFASVNITTNQAAPSAVSIMHQVSRTVDSITLSWSQPDQPNGVILDYELQYYEKELSEYNATAIKSPTNTVTVQGLKAGAIYVFQVRARTVAGYGRYSGKMYFQTMTEAEYQTSIKEKLPLIVGSSAAGLVFLIAVVVIAIVCNRRGFERADSEYTDKLQHYTSGHMTPGMKIYIDPFTYEDPNEAVREFAKEIDISCVKIEQVIGAGEFGEVCSGHLKLPGKREIFVAIKTLKSGYTEKQRRDFLSEASIMGQFDHPNVIHLEGVVTKSTPVMIITEFMENGSLDSFLRQNDGQFTVIQLVGMLRGIAAGMKYLADMNYVHRDLAARNILVNSNLVCKVSDFGLSRFLEDDTSDPTYTSALGGKIPIRWTAPEAIQYRKFTSASDVWSYGIVMWEVMSYGERPYWDMTNQDVINAIEQDYRLPPPMDCPSALHQLMLDCWQKDRNHRPKFGQIVNTLDKMIRNPNSLKAMAPLSSGINLPLLDRTIPDYTSFNTVDEWLEAIKMGQYKESFANAGFTSFDVVSQMMMEDILRVGVTLAGHQKKILNSIQVMRAQMNQIQSVEV 1 543 565 YQTSIKEKLP LIVGSSAAGLVFLIAVVVIAIVC NRRGFERADS +Q15303 MKPATGLWVWVSLLVAAGTVQPSDSQSVCAGTENKLSSLSDLEQQYRALRKYYENCEVVMGNLEITSIEHNRDLSFLRSVREVTGYVLVALNQFRYLPLENLRIIRGTKLYEDRYALAIFLNYRKDGNFGLQELGLKNLTEILNGGVYVDQNKFLCYADTIHWQDIVRNPWPSNLTLVSTNGSSGCGRCHKSCTGRCWGPTENHCQTLTRTVCAEQCDGRCYGPYVSDCCHRECAGGCSGPKDTDCFACMNFNDSGACVTQCPQTFVYNPTTFQLEHNFNAKYTYGAFCVKKCPHNFVVDSSSCVRACPSSKMEVEENGIKMCKPCTDICPKACDGIGTGSLMSAQTVDSSNIDKFINCTKINGNLIFLVTGIHGDPYNAIEAIDPEKLNVFRTVREITGFLNIQSWPPNMTDFSVFSNLVTIGGRVLYSGLSLLILKQQGITSLQFQSLKEISAGNIYITDNSNLCYYHTINWTTLFSTINQRIVIRDNRKAENCTAEGMVCNHLCSSDGCWGPGPDQCLSCRRFSRGRICIESCNLYDGEFREFENGSICVECDPQCEKMEDGLLTCHGPGPDNCTKCSHFKDGPNCVEKCPDGLQGANSFIFKYADPDRECHPCHPNCTQGCNGPTSHDCIYYPWTGHSTLPQHARTPLIAAGVIGGLFILVIVGLTFAVYVRRKSIKKKRALRRFLETELVEPLTPSGTAPNQAQLRILKETELKRVKVLGSGAFGTVYKGIWVPEGETVKIPVAIKILNETTGPKANVEFMDEALIMASMDHPHLVRLLGVCLSPTIQLVTQLMPHGCLLEYVHEHKDNIGSQLLLNWCVQIAKGMMYLEERRLVHRDLAARNVLVKSPNHVKITDFGLARLLEGDEKEYNADGGKMPIKWMALECIHYRKFTHQSDVWSYGVTIWELMTFGGKPYDGIPTREIPDLLEKGERLPQPPICTIDVYMVMVKCWMIDADSRPKFKELAAEFSRMARDPQRYLVIQGDDRMKLPSPNDSKFFQNLLDEEDLEDMMDAEEYLVPQAFNIPPPIYTSRARIDSNRSEIGHSPPPAYTPMSGNQFVYRDGGFAAEQGVSVPYRAPTSTIPEAPVAQGATAEIFDDSCCNGTLRKPVAPHVQEDSSTQRYSADPTVFAPERSPRGELDEEGYMTPMRDKPKQEYLNPVEENPFVSRRKNGDLQALDNPEYHNASNGPPKAEDEYVNEPLYLNTFANTLGKAEYLKNNILSMPEKAKKAFDNPDYWNHSLPPRSTLQHPDYLQEYSTKYFYKQNGRIRPIVAENPEYLSEFSLKPGTVLPPPPYRHRNTVV 1 653 675 TLPQHARTPL IAAGVIGGLFILVIVGLTFAVYV RRKSIKKKRA +P16882 MDLCQVFLTLALAVTSSTFSGSEATPATLGKASPVLQRINPSLGTSSSGKPRFTKCRSPELETFSCYWTEGDNPDLKTPGSIQLYYAKRESQRQAARIAHEWTQEWKECPDYVSAGKNSCYFNSSYTSIWIPYCIKLTTNGDLLDQKCFTVDEIVQPDPPIGLNWTLLNISLTGIRGDIQVSWQPPPNADVLKGWIILEYEIQYKEVNESKWKVMGPIWLTYCPVYSLRMDKEHEVRVRSRQRSFEKYSEFSEVLRVIFPQTNILEACEEDIQFPWFLIIIFGIFGVAVMLFVVIFSKQQRIKMLILPPVPVPKIKGIDPDLLKEGKLEEVNTILGIHDNYKPDFYNDDSWVEFIELDIDEADVDEKTEGSDTDRLLSNDHEKSAGILGAKDDDSGRTSCYDPDILDTDFHTSDMCDGTLKFRQSQKLNMEADLLCLDQKNLKNLPYDASLGSLHPSITQTVEENKPQPLLSSETEATHQLASTPMSNPTSLANIDFYAQVSDITPAGGDVLSPGQKIKAGIAQGNTQREVATPCQENYSMNSAYFCESDAKKCIAVARRMEATSCIKPSFNQEDIYITTESLTTTAQMSETADIAPDAEMSVPDYTTVHTVQSPRGLILNATALPLPDKKNFPSSCGYVSTDQLNKIMQ 1 274 296 ILEACEEDIQ FPWFLIIIFGIFGVAVMLFVVIF SKQQRIKMLI +P04439 MAVMAPRTLLLLLSGALALTQTWAGSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDQETRNVKAQSQTDRVDLGTLRGYYNQSEAGSHTIQIMYGCDVGSDGRFLRGYRQDAYDGKDYIALNEDLRSWTAADMAAQITKRKWEAAHEAEQLRAYLDGTCVEWLRRYLENGKETLQRTDPPKTHMTHHPISDHEATLRCWALGFYPAEITLTWQRDGEDQTQDTELVETRPAGDGTFQKWAAVVVPSGEEQRYTCHVQHEGLPKPLTLRWELSSQPTIPIVGIIAGLVLLGAVITGAVVAAVMWRRKSSDRKGGSYTQAASSDSAQGSDVSLTACKV 1 308 330 WELSSQPTIP IVGIIAGLVLLGAVITGAVVAAV MWRRKSSDRK +P08069 MKSGSGGGSPTSLWGLLFLSAALSLWPTSGEICGPGIDIRNDYQQLKRLENCTVIEGYLHILLISKAEDYRSYRFPKLTVITEYLLLFRVAGLESLGDLFPNLTVIRGWKLFYNYALVIFEMTNLKDIGLYNLRNITRGAIRIEKNADLCYLSTVDWSLILDAVSNNYIVGNKPPKECGDLCPGTMEEKPMCEKTTINNEYNYRCWTTNRCQKMCPSTCGKRACTENNECCHPECLGSCSAPDNDTACVACRHYYYAGVCVPACPPNTYRFEGWRCVDRDFCANILSAESSDSEGFVIHDGECMQECPSGFIRNGSQSMYCIPCEGPCPKVCEEEKKTKTIDSVTSAQMLQGCTIFKGNLLINIRRGNNIASELENFMGLIEVVTGYVKIRHSHALVSLSFLKNLRLILGEEQLEGNYSFYVLDNQNLQQLWDWDHRNLTIKAGKMYFAFNPKLCVSEIYRMEEVTGTKGRQSKGDINTRNNGERASCESDVLHFTSTTTSKNRIIITWHRYRPPDYRDLISFTVYYKEAPFKNVTEYDGQDACGSNSWNMVDVDLPPNKDVEPGILLHGLKPWTQYAVYVKAVTLTMVENDHIRGAKSEILYIRTNASVPSIPLDVLSASNSSSQLIVKWNPPSLPNGNLSYYIVRWQRQPQDGYLYRHNYCSKDKIPIRKYADGTIDIEEVTENPKTEVCGGEKGPCCACPKTEAEKQAEKEEAEYRKVFENFLHNSIFVPRPERKRRDVMQVANTTMSSRSRNTTAADTYNITDPEELETEYPFFESRVDNKERTVISNLRPFTLYRIDIHSCNHEAEKLGCSASNFVFARTMPAEGADDIPGPVTWEPRPENSIFLKWPEPENPNGLILMYEIKYGSQVEDQRECVSRQEYRKYGGAKLNRLNPGNYTARIQATSLSGNGSWTDPVFFYVQAKTGYENFIHLIIALPVAVLLIVGGLVIMLYVFHRKRNNSRLGNGVLYASVNPEYFSAADVYVPDEWEVAREKITMSRELGQGSFGMVYEGVAKGVVKDEPETRVAIKTVNEAASMRERIEFLNEASVMKEFNCHHVVRLLGVVSQGQPTLVIMELMTRGDLKSYLRSLRPEMENNPVLAPPSLSKMIQMAGEIADGMAYLNANKFVHRDLAARNCMVAEDFTVKIGDFGMTRDIYETDYYRKGGKGLLPVRWMSPESLKDGVFTTYSDVWSFGVVLWEIATLAEQPYQGLSNEQVLRFVMEGGLLDKPDNCPDMLFELMRMCWQYNPKMRPSFLEIISSIKEEMEPGFREVSFYYSEENKLPEPEELDLEPENMESVPLDPSASSSSLPLPDRHSGHKAENGPGPGVLVLRASFDERQPYAHMNGGRKNERALPLPQSSTC 1 936 958 AKTGYENFIH LIIALPVAVLLIVGGLVIMLYVF HRKRNNSRLG +P27930 MLRLYVLVMGVSAFTLQPAAHTGAARSCRFRGRHYKREFRLEGEPVALRCPQVPYWLWASVSPRINLTWHKNDSARTVPGEEETRMWAQDGALWLLPALQEDSGTYVCTTRNASYCDKMSIELRVFENTDAFLPFISYPQILTLSTSGVLVCPDLSEFTRDKTDVKIQWYKDSLLLDKDNEKFLSVRGTTHLLVHDVALEDAGYYRCVLTFAHEGQQYNITRSIELRIKKKKEETIPVIISPLKTISASLGSRLTIPCKVFLGTGTPLTTMLWWTANDTHIESAYPGGRVTEGPRQEYSENNENYIEVPLIFDPVTREDLHMDFKCVVHNTLSFQTLRTTVKEASSTFSWGIVLAPLSLAFLVLGGIWMHRRCKHRTGKADGLTVLWPHHQDFQSYPK 1 347 369 LRTTVKEASS TFSWGIVLAPLSLAFLVLGGIWM HRRCKHRTGK +Q9Y219 MRAQGRGRLPRRLLLLLALWVQAARPMGYFELQLSALRNVNGELLSGACCDGDGRTTRAGGCGHDECDTYVRVCLKEYQAKVTPTGPCSYGHGATPVLGGNSFYLPPAGAAGDRARARARAGGDQDPGLVVIPFQFAWPRSFTLIVEAWDWDNDTTPNEELLIERVSHAGMINPEDRWKSLHFSGHVAHLELQIRVRCDENYYSATCNKFCRPRNDFFGHYTCDQYGNKACMDGWMGKECKEAVCKQGCNLLHGGCTVPGECRCSYGWQGRFCDECVPYPGCVHGSCVEPWQCNCETNWGGLLCDKDLNYCGSHHPCTNGGTCINAEPDQYRCTCPDGYSGRNCEKAEHACTSNPCANGGSCHEVPSGFECHCPSGWSGPTCALDIDECASNPCAAGGTCVDQVDGFECICPEQWVGATCQLDANECEGKPCLNAFSCKNLIGGYYCDCIPGWKGINCHINVNDCRGQCQHGGTCKDLVNGYQCVCPRGFGGRHCELERDECASSPCHSGGLCEDLADGFHCHCPQGFSGPLCEVDVDLCEPSPCRNGARCYNLEGDYYCACPDDFGGKNCSVPREPCPGGACRVIDGCGSDAGPGMPGTAASGVCGPHGRCVSQPGGNFSCICDSGFTGTYCHENIDDCLGQPCRNGGTCIDEVDAFRCFCPSGWEGELCDTNPNDCLPDPCHSRGRCYDLVNDFYCACDDGWKGKTCHSREFQCDAYTCSNGGTCYDSGDTFRCACPPGWKGSTCAVAKNSSCLPNPCVNGGTCVGSGASFSCICRDGWEGRTCTHNTNDCNPLPCYNGGICVDGVNWFRCECAPGFAGPDCRINIDECQSSPCAYGATCVDEINGYRCSCPPGRAGPRCQEVIGFGRSCWSRGTPFPHGSSWVEDCNSCRCLDGRRDCSKVWCGWKPCLLAGQPEALSAQCPLGQRCLEKAPGQCLRPPCEAWGECGAEEPPSTPCLPRSGHLDNNCARLTLHFNRDHVPQGTTVGAICSGIRSLPATRAVARDRLLVLLCDRASSGASAVEVAVSFSPARDLPDSSLIQGAAHAIVAAITQRGNSSLLLAVTEVKVETVVTGGSSTGLLVPVLCGAFSVLWLACVVLCVWWTRKRRKERERSRLPREESANNQWAPLNPIRNPIERPGGHKDVLYQCKNFTPPPRRADEALPGPAGHAAVREDEEDEDLGRGEEDSLEAEKFLSHKFTKDPGRSPGRPAHWASGPKVDNRAVRSINEARYAGKE 1 1083 1105 VVTGGSSTGL LVPVLCGAFSVLWLACVVLCVWW TRKRRKERER +P15382 MILSNTTAVTPFLTKLWQETVQQGGNMSGLARRSPRSSDGKLEALYVLMVLGFFGFFTLGIMLSYIRSKKLEHSNDPFNVYIESDAWQEKDKAYVQARVLESYRSCYVVENHLAIEQPNTHLPETKPSP 1 44 66 SPRSSDGKLE ALYVLMVLGFFGFFTLGIMLSYI RSKKLEHSND +Q9Y6J6 MSTLSNFTQTLEDVFRRIFITYMDNWRQNTTAEQEALQAKVDAENFYYVILYLMVMIGMFSFIIVAILVSTVKSKRREHSNDPYHQYIVEDWQEKYKSQILNLEESKATIHENIGAAGFKMSP 1 47 69 LQAKVDAENF YYVILYLMVMIGMFSFIIVAILV STVKSKRREH +P11627 MVVMLRYVWPLLLCSPCLLIQIPDEYKGHHVLEPPVITEQSPRRLVVFPTDDISLKCEARGRPQVEFRWTKDGIHFKPKEELGVVVHEAPYSGSFTIEGNNSFAQRFQGIYRCYASNKLGTAMSHEIQLVAEGAPKWPKETVKPVEVEEGESVVLPCNPPPSAAPPRIYWMNSKIFDIKQDERVSMGQNGDLYFANVLTSDNHSDYICNAHFPGTRTIIQKEPIDLRVKPTNSMIDRKPRLLFPTNSSSRLVALQGQSLILECIAEGFPTPTIKWLHPSDPMPTDRVIYQNHNKTLQLLNVGEEDDGEYTCLAENSLGSARHAYYVTVEAAPYWLQKPQSHLYGPGETARLDCQVQGRPQPEITWRINGMSMETVNKDQKYRIEQGSLILSNVQPTDTMVTQCEARNQHGLLLANAYIYVVQLPARILTKDNQTYMAVEGSTAYLLCKAFGAPVPSVQWLDEEGTTVLQDERFFPYANGTLSIRDLQANDTGRYFCQAANDQNNVTILANLQVKEATQITQGPRSAIEKKGARVTFTCQASFDPSLQASITWRGDGRDLQERGDSDKYFIEDGKLVIQSLDYSDQGNYSCVASTELDEVESRAQLLVVGSPGPVPHLELSDRHLLKQSQVHLSWSPAEDHNSPIEKYDIEFEDKEMAPEKWFSLGKVPGNQTSTTLKLSPYVHYTFRVTAINKYGPGEPSPVSESVVTPEAAPEKNPVDVRGEGNETNNMVITWKPLRWMDWNAPQIQYRVQWRPQGKQETWRKQTVSDPFLVVSNTSTFVPYEIKVQAVNNQGKGPEPQVTIGYSGEDYPQVSPELEDITIFNSSTVLVRWRPVDLAQVKGHLKGYNVTYWWKGSQRKHSKRHIHKSHIVVPANTTSAILSGLRPYSSYHVEVQAFNGRGLGPASEWTFSTPEGVPGHPEALHLECQSDTSLLLHWQPPLSHNGVLTGYLLSYHPVEGESKEQLFFNLSDPELRTHNLTNLNPDLQYRFQLQATTQQGGPGEAIVREGGTMALFGKPDFGNISATAGENYSVVSWVPRKGQCNFRFHILFKALPEGKVSPDHQPQPQYVSYNQSSYTQWNLQPDTKYEIHLIKEKVLLHHLDVKTNGTGPVRVSTTGSFASEGWFIAFVSAIILLLLILLILCFIKRSKGGKYSVKDKEDTQVDSEARPMKDETFGEYRSLESDNEEKAFGSSQPSLNGDIKPLGSDDSLADYGGSVDVQFNEDGSFIGQYSGKKEKEAAGGNDSSGATSPINPAVALE 1 1124 1146 VSTTGSFASE GWFIAFVSAIILLLLILLILCFI KRSKGGKYSV +P01130 MGPWGWKLRWTVALLLAAAGTAVGDRCERNEFQCQDGKCISYKWVCDGSAECQDGSDESQETCLSVTCKSGDFSCGGRVNRCIPQFWRCDGQVDCDNGSDEQGCPPKTCSQDEFRCHDGKCISRQFVCDSDRDCLDGSDEASCPVLTCGPASFQCNSSTCIPQLWACDNDPDCEDGSDEWPQRCRGLYVFQGDSSPCSAFEFHCLSGECIHSSWRCDGGPDCKDKSDEENCAVATCRPDEFQCSDGNCIHGSRQCDREYDCKDMSDEVGCVNVTLCEGPNKFKCHSGECITLDKVCNMARDCRDWSDEPIKECGTNECLDNNGGCSHVCNDLKIGYECLCPDGFQLVAQRRCEDIDECQDPDTCSQLCVNLEGGYKCQCEEGFQLDPHTKACKAVGSIAYLFFTNRHEVRKMTLDRSEYTSLIPNLRNVVALDTEVASNRIYWSDLSQRMICSTQLDRAHGVSSYDTVISRDIQAPDGLAVDWIHSNIYWTDSVLGTVSVADTKGVKRKTLFRENGSKPRAIVVDPVHGFMYWTDWGTPAKIKKGGLNGVDIYSLVTENIQWPNGITLDLLSGRLYWVDSKLHSISSIDVNGGNRKTILEDEKRLAHPFSLAVFEDKVFWTDIINEAIFSANRLTGSDVNLLAENLLSPEDMVLFHNLTQPRGVNWCERTTLSNGGCQYLCLPAPQINPHSPKFTCACPDGMLLARDMRSCLTEAEAAVATQETSTVRLKVSSTAVRTQHTTTRPVPDTSRLPGATPGLTTVEIVTMSHQALGDVAGRGNEKKPSSVRALSIVLPIVLLVFLCLGVFLLWKNWRLKNINSINFDNPVYQKTTEDEVHICHNQDGYSYPSRQMVSLEDDVA 1 787 809 GRGNEKKPSS VRALSIVLPIVLLVFLCLGVFLL WKNWRLKNIN +P16150 MATLLLLLGVLVVSPDALGSTTAVQTPTSGEPLVSTSEPLSSKMYTTSITSDPKADSTGDQTSALPPSTSINEGSPLWTSIGASTGSPLPEPTTYQEVSIKMSSVPQETPHATSHPAVPITANSLGSHTVTGGTITTNSPETSSRTSGAPVTTAASSLETSRGTSGPPLTMATVSLETSKGTSGPPVTMATDSLETSTGTTGPPVTMTTGSLEPSSGASGPQVSSVKLSTMMSPTTSTNASTVPFRNPDENSRGMLPVAVLVALLAVIVLVALLLLWRRRQKRRTGALVLSRGGKRNGVVDAWAGPAQVPEEGAVTVTVGGSGGDKGSGFPDGEGSSRRPTLTTFFGRRKSRQGSLAMEELKSGSGPSLKGEEEPLVASEDGAVDAPAPDEPEGGDGAAP 1 255 277 FRNPDENSRG MLPVAVLVALLAVIVLVALLLLW RRRQKRRTGA +P0CC10 MAQAHIQGSPCPLLPPGRMSWPQGALLLLWLFSPPLRAGGGGVAVTSAAGGGSPPATSCPAACSCSNQASRVICTRRELAEVPASIPVNTRYLNLQENSIQVIRTDTFKHLRHLEILQLSKNLVRKIEVGAFNGLPSLNTLELFDNRLTTVPTQAFEYLSKLRELWLRNNPIESIPSYAFNRVPSLRRLDLGELKRLEYISEAAFEGLVNLRYLNLGMCNLKDIPNLTALVRLEELELSGNRLDLIRPGSFQGLTSLRKLWLMHAQVATIERNAFDDLKSLEELNLSHNNLMSLPHDLFTPLHRLERVHLNHNPWHCNCDVLWLSWWLKETVPSNTTCCARCHAPAGLKGRYIGELDQSHFTCYAPVIVEPPTDLNVTEGMAAELKCRTGTSMTSVNWLTPNGTLMTHGSYRVRISVLHDGTLNFTNVTVQDTGQYTCMVTNSAGNTTASATLNVSAVDPVAAGGPGGGGPGGGGGAGGAGGYTYFTTVTVETLETQPGEEAQQPRGTEKEPPGPTTDGAWGGGRPDAAAPASASTTAPAPRSSRPTEKAFTVPITDVTENALKDLDDVMKTTKIIIGCFVAITFMAAVMLVAFYKLRKQHQLHKHHGPTRTVEIINVEDELPAASAVSVAAAAAVAGGAGVGGDSHLALPALERDHLNHHHYVAAAFKAHYGGNPGGGCGAKGPGLNSIHEPLLFKSGSKENVQETQI 1 573 595 LKDLDDVMKT TKIIIGCFVAITFMAAVMLVAFY KLRKQHQLHK +Q07954 MLTPPLLLLLPLLSALVAAAIDAPKTCSPKQFACRDQITCISKGWRCDGERDCPDGSDEAPEICPQSKAQRCQPNEHNCLGTELCVPMSRLCNGVQDCMDGSDEGPHCRELQGNCSRLGCQHHCVPTLDGPTCYCNSSFQLQADGKTCKDFDECSVYGTCSQLCTNTDGSFICGCVEGYLLQPDNRSCKAKNEPVDRPPVLLIANSQNILATYLSGAQVSTITPTSTRQTTAMDFSYANETVCWVHVGDSAAQTQLKCARMPGLKGFVDEHTINISLSLHHVEQMAIDWLTGNFYFVDDIDDRIFVCNRNGDTCVTLLDLELYNPKGIALDPAMGKVFFTDYGQIPKVERCDMDGQNRTKLVDSKIVFPHGITLDLVSRLVYWADAYLDYIEVVDYEGKGRQTIIQGILIEHLYGLTVFENYLYATNSDNANAQQKTSVIRVNRFNSTEYQVVTRVDKGGALHIYHQRRQPRVRSHACENDQYGKPGGCSDICLLANSHKARTCRCRSGFSLGSDGKSCKKPEHELFLVYGKGRPGIIRGMDMGAKVPDEHMIPIENLMNPRALDFHAETGFIYFADTTSYLIGRQKIDGTERETILKDGIHNVEGVAVDWMGDNLYWTDDGPKKTISVARLEKAAQTRKTLIEGKMTHPRAIVVDPLNGWMYWTDWEEDPKDSRRGRLERAWMDGSHRDIFVTSKTVLWPNGLSLDIPAGRLYWVDAFYDRIETILLNGTDRKIVYEGPELNHAFGLCHHGNYLFWTEYRSGSVYRLERGVGGAPPTVTLLRSERPPIFEIRMYDAQQQQVGTNKCRVNNGGCSSLCLATPGSRQCACAEDQVLDADGVTCLANPSYVPPPQCQPGEFACANSRCIQERWKCDGDNDCLDNSDEAPALCHQHTCPSDRFKCENNRCIPNRWLCDGDNDCGNSEDESNATCSARTCPPNQFSCASGRCIPISWTCDLDDDCGDRSDESASCAYPTCFPLTQFTCNNGRCININWRCDNDNDCGDNSDEAGCSHSCSSTQFKCNSGRCIPEHWTCDGDNDCGDYSDETHANCTNQATRPPGGCHTDEFQCRLDGLCIPLRWRCDGDTDCMDSSDEKSCEGVTHVCDPSVKFGCKDSARCISKAWVCDGDNDCEDNSDEENCESLACRPPSHPCANNTSVCLPPDKLCDGNDDCGDGSDEGELCDQCSLNNGGCSHNCSVAPGEGIVCSCPLGMELGPDNHTCQIQSYCAKHLKCSQKCDQNKFSVKCSCYEGWVLEPDGESCRSLDPFKPFIIFSNRHEIRRIDLHKGDYSVLVPGLRNTIALDFHLSQSALYWTDVVEDKIYRGKLLDNGALTSFEVVIQYGLATPEGLAVDWIAGNIYWVESNLDQIEVAKLDGTLRTTLLAGDIEHPRAIALDPRDGILFWTDWDASLPRIEAASMSGAGRRTVHRETGSGGWPNGLTVDYLEKRILWIDARSDAIYSARYDGSGHMEVLRGHEFLSHPFAVTLYGGEVYWTDWRTNTLAKANKWTGHNVTVVQRTNTQPFDLQVYHPSRQPMAPNPCEANGGQGPCSHLCLINYNRTVSCACPHLMKLHKDNTTCYEFKKFLLYARQMEIRGVDLDAPYYNYIISFTVPDIDNVTVLDYDAREQRVYWSDVRTQAIKRAFINGTGVETVVSADLPNAHGLAVDWVSRNLFWTSYDTNKKQINVARLDGSFKNAVVQGLEQPHGLVVHPLRGKLYWTDGDNISMANMDGSNRTLLFSGQKGPVGLAIDFPESKLYWISSGNHTINRCNLDGSGLEVIDAMRSQLGKATALAIMGDKLWWADQVSEKMGTCSKADGSGSVVLRNSTTLVMHMKVYDESIQLDHKGTNPCSVNNGDCSQLCLPTSETTRSCMCTAGYSLRSGQQACEGVGSFLLYSVHEGIRGIPLDPNDKSDALVPVSGTSLAVGIDFHAENDTIYWVDMGLSTISRAKRDQTWREDVVTNGIGRVEGIAVDWIAGNIYWTDQGFDVIEVARLNGSFRYVVISQGLDKPRAITVHPEKGYLFWTEWGQYPRIERSRLDGTERVVLVNVSISWPNGISVDYQDGKLYWCDARTDKIERIDLETGENREVVLSSNNMDMFSVSVFEDFIYWSDRTHANGSIKRGSKDNATDSVPLRTGIGVQLKDIKVFNRDRQKGTNVCAVANGGCQQLCLYRGRGQRACACAHGMLAEDGASCREYAGYLLYSERTILKSIHLSDERNLNAPVQPFEDPEHMKNVIALAFDYRAGTSPGTPNRIFFSDIHFGNIQQINDDGSRRITIVENVGSVEGLAYHRGWDTLYWTSYTTSTITRHTVDQTRPGAFERETVITMSGDDHPRAFVLDECQNLMFWTNWNEQHPSIMRAALSGANVLTLIEKDIRTPNGLAIDHRAEKLYFSDATLDKIERCEYDGSHRYVILKSEPVHPFGLAVYGEHIFWTDWVRRAVQRANKHVGSNMKLLRVDIPQQPMGIIAVANDTNSCELSPCRINNGGCQDLCLLTHQGHVNCSCRGGRILQDDLTCRAVNSSCRAQDEFECANGECINFSLTCDGVPHCKDKSDEKPSYCNSRRCKKTFRQCSNGRCVSNMLWCNGADDCGDGSDEIPCNKTACGVGEFRCRDGTCIGNSSRCNQFVDCEDASDEMNCSATDCSSYFRLGVKGVLFQPCERTSLCYAPSWVCDGANDCGDYSDERDCPGVKRPRCPLNYFACPSGRCIPMSWTCDKEDDCEHGEDETHCNKFCSEAQFECQNHRCISKQWLCDGSDDCGDGSDEAAHCEGKTCGPSSFSCPGTHVCVPERWLCDGDKDCADGADESIAAGCLYNSTCDDREFMCQNRQCIPKHFVCDHDRDCADGSDESPECEYPTCGPSEFRCANGRCLSSRQWECDGENDCHDQSDEAPKNPHCTSQEHKCNASSQFLCSSGRCVAEALLCNGQDDCGDSSDERGCHINECLSRKLSGCSQDCEDLKIGFKCRCRPGFRLKDDGRTCADVDECSTTFPCSQRCINTHGSYKCLCVEGYAPRGGDPHSCKAVTDEEPFLIFANRYYLRKLNLDGSNYTLLKQGLNNAVALDFDYREQMIYWTDVTTQGSMIRRMHLNGSNVQVLHRTGLSNPDGLAVDWVGGNLYWCDKGRDTIEVSKLNGAYRTVLVSSGLREPRALVVDVQNGYLYWTDWGDHSLIGRIGMDGSSRSVIVDTKITWPNGLTLDYVTERIYWADAREDYIEFASLDGSNRHVVLSQDIPHIFALTLFEDYVYWTDWETKSINRAHKTTGTNKTLLISTLHRPMDLHVFHALRQPDVPNHPCKVNNGGCSNLCLLSPGGGHKCACPTNFYLGSDGRTCVSNCTASQFVCKNDKCIPFWWKCDTEDDCGDHSDEPPDCPEFKCRPGQFQCSTGICTNPAFICDGDNDCQDNSDEANCDIHVCLPSQFKCTNTNRCIPGIFRCNGQDNCGDGEDERDCPEVTCAPNQFQCSITKRCIPRVWVCDRDNDCVDGSDEPANCTQMTCGVDEFRCKDSGRCIPARWKCDGEDDCGDGSDEPKEECDERTCEPYQFRCKNNRCVPGRWQCDYDNDCGDNSDEESCTPRPCSESEFSCANGRCIAGRWKCDGDHDCADGSDEKDCTPRCDMDQFQCKSGHCIPLRWRCDADADCMDGSDEEACGTGVRTCPLDEFQCNNTLCKPLAWKCDGEDDCGDNSDENPEECARFVCPPNRPFRCKNDRVCLWIGRQCDGTDNCGDGTDEEDCEPPTAHTTHCKDKKEFLCRNQRCLSSSLRCNMFDDCGDGSDEEDCSIDPKLTSCATNASICGDEARCVRTEKAAYCACRSGFHTVPGQPGCQDINECLRFGTCSQLCNNTKGGHLCSCARNFMKTHNTCKAEGSEYQVLYIADDNEIRSLFPGHPHSAYEQAFQGDESVRIDAMDVHVKAGRVYWTNWHTGTISYRSLPPAAPPTTSNRHRRQIDRGVTHLNISGLKMPRGIAIDWVAGNVYWTDSGRDVIEVAQMKGENRKTLISGMIDEPHAIVVDPLRGTMYWSDWGNHPKIETAAMDGTLRETLVQDNIQWPTGLAVDYHNERLYWADAKLSVIGSIRLNGTDPIVAADSKRGLSHPFSIDVFEDYIYGVTYINNRVFKIHKFGHSPLVNLTGGLSHASDVVLYHQHKQPEVTNPCDRKKCEWLCLLSPSGPVCTCPNGKRLDNGTCVPVPSPTPPPDAPRPGTCNLQCFNGGSCFLNARRQPKCRCQPRYTGDKCELDQCWEHCRNGGTCAASPSGMPTCRCPTGFTGPKCTQQVCAGYCANNSTCTVNQGNQPQCRCLPGFLGDRCQYRQCSGYCENFGTCQMAADGSRQCRCTAYFEGSRCEVNKCSRCLEGACVVNKQSGDVTCNCTDGRVAPSCLTCVGHCSNGGSCTMNSKMMPECQCPPHMTGPRCEEHVFSQQQPGHIASILIPLLLLLLLVLVAGVVFWYKRRVQGAKGFQHQRMTNGAMNVEIGNPTYKMYEGGEPDDVGGLLDADFALDPDKPTNFTNPVYATLYMGGHGSRHSLASTDEKRELLGRGPEDEIGDPLA 1 4421 4443 HVFSQQQPGH IASILIPLLLLLLLVLVAGVVFW YKRRVQGAKG +O75581 MGAVLRSLLACSFCVLLRAAPLLLYANRRDLRLVDATNGKENATIVVGGLEDAAAVDFVFSHGLIYWSDVSEEAIKRTEFNKTESVQNVVVSGLLSPDGLACDWLGEKLYWTDSETNRIEVSNLDGSLRKVLFWQELDQPRAIALDPSSGFMYWTDWGEVPKIERAGMDGSSRFIIINSEIYWPNGLTLDYEEQKLYWADAKLNFIHKSNLDGTNRQAVVKGSLPHPFALTLFEDILYWTDWSTHSILACNKYTGEGLREIHSDIFSPMDIHAFSQQRQPNATNPCGIDNGGCSHLCLMSPVKPFYQCACPTGVKLLENGKTCKDGATELLLLARRTDLRRISLDTPDFTDIVLQLEDIRHAIAIDYDPVEGYIYWTDDEVRAIRRSFIDGSGSQFVVTAQIAHPDGIAVDWVARNLYWTDTGTDRIEVTRLNGTMRKILISEDLEEPRAIVLDPMVGYMYWTDWGEIPKIERAALDGSDRVVLVNTSLGWPNGLALDYDEGKIYWGDAKTDKIEVMNTDGTGRRVLVEDKIPHIFGFTLLGDYVYWTDWQRRSIERVHKRSAEREVIIDQLPDLMGLKATNVHRVIGSNPCAEENGGCSHLCLYRPQGLRCACPIGFELISDMKTCIVPEAFLLFSRRADIRRISLETNNNNVAIPLTGVKEASALDFDVTDNRIYWTDISLKTISRAFMNGSALEHVVEFGLDYPEGMAVDWLGKNLYWADTGTNRIEVSKLDGQHRQVLVWKDLDSPRALALDPAEGFMYWTEWGGKPKIDRAAMDGSERTTLVPNVGRANGLTIDYAKRRLYWTDLDTNLIESSNMLGLNREVIADDLPHPFGLTQYQDYIYWTDWSRRSIERANKTSGQNRTIIQGHLDYVMDILVFHSSRQSGWNECASSNGHCSHLCLAVPVGGFVCGCPAHYSLNADNRTCSAPTTFLLFSQKSAINRMVIDEQQSPDIILPIHSLRNVRAIDYDPLDKQLYWIDSRQNMIRKAQEDGSQGFTVVVSSVPSQNLEIQPYDLSIDIYSRYIYWTCEATNVINVTRLDGRSVGVVLKGEQDRPRAVVVNPEKGYMYFTNLQERSPKIERAALDGTEREVLFFSGLSKPIALALDSRLGKLFWADSDLRRIESSDLSGANRIVLEDSNILQPVGLTVFENWLYWIDKQQQMIEKIDMTGREGRTKVQARIAQLSDIHAVKELNLQEYRQHPCAQDNGGCSHICLVKGDGTTRCSCPMHLVLLQDELSCGEPPTCSPQQFTCFTGEIDCIPVAWRCDGFTECEDHSDELNCPVCSESQFQCASGQCIDGALRCNGDANCQDKSDEKNCEVLCLIDQFRCANGQCIGKHKKCDHNVDCSDKSDELDCYPTEEPAPQATNTVGSVIGVIVTIFVSGTVYFICQRMLCPRMKGDGETMTNDYVVHGPASVPLGYVPHPSSLSGSLPGMSRGKSMISSLSIMGGSSGPPYDRAHVTGASSSSSSSTKGTYFPAILNPPPSPATERSHYTMEFGYSSNSPSTHRSYSYRPYSYRHFAPPTTPCSTDVCDSDYAPSRRMTSVATAKGYTSDLNYDSEPVPPPPTPRSQYLSAEENYESCPPSPYTERSYSHHLYPPPPSPCTDSS 1 1371 1393 YPTEEPAPQA TNTVGSVIGVIVTIFVSGTVYFI CQRMLCPRMK +Q924X6 MGRPELGALRPLALLLLLLLQLQHLSAADPLPGGQGPVKECEEDQFRCRNERCIPLVWRCDEDNDCSDNSDEDDCPKRTCADSDFTCDNGHCIPERWKCDGEEECPDGSDESKATCSSEECPAEKLSCGPTSHKCVPASWRCDGEKDCEGGADEAGCPTLCAPHEFQCSNRSCLASVFVCDGDDDCGDGSDERGCSDPACPPREFRCGGGGTCIPERWVCDRQFDCEDRSDEAAELCGRAGQGTTATPAACAPTAQFTCRSGECIHLGWRCDGDRDCKDKSDEADCSPGPCRENEFQCGDGTCVLAIKRCNQERDCPDGSDEAGCLQESTCEGPRRFQCKSGECVDGGKVCDDQRDCRDWSDEPQKVCGLNECLHNNGGCSHICTDLKIGFECTCPAGFQLLDQKTCGDIDECQDPDACSQICVNYKGYFKCECHPGYEMDTLTKNCKAVAGKSPSLIFTNRHEVRRIDLVKRDYSRLIPMLKNVVALDVEVATNRIYWCDLSYRKIYSAHMDKASIPDEQVVLIDEQLHSPEGLAVDWVHKHIYWTDSGNKTISVATTDGRRRCTLFSRELSEPRAIAVDPLRGFMYWSDWGFQAKIEKAGLNGADRQTLVSDNIEWPNGITLDLLSQRLYWVDSKLHQLSSIDFNGGNRKMLIFSTDFLSHPFGVAVFEDKVFWTDLENEAIFSANRLNGLEIAILAENLNNPHDIVIFHELKQPKAADACDLSAQPNGGCEYLCLPAPQISSHSPKYTCACPDTMWLGPDMKRCYRAPQSTSTTTLASAMTRTVPATTRAPGTTIHDPTYQNHSTETPSQTAAAPHSVNVPRAPSTSPSTPSPATSNHSQHYGNEGSQMGSTVTAAVIGVIVPIVVIALLCMSGYLIWRNWKRKNTKSMNFDNPVYRKTTEEEEEDELHIGRTAQIGHVYPAAISNYDRPLWAEPCLGETRDLEDPAPALKELFVLPGEPRSQLHQLPKNPLSELPVVKCKRVALSLEDDGLP 1 859 881 GSQMGSTVTA AVIGVIVPIVVIALLCMSGYLIW RNWKRKNTKS +Q12866 MGPAPLPLLLGLFLPALWRRAITEAREEAKPYPLFPGPFPGSLQTDHTPLLSLPHASGYQPALMFSPTQPGRPHTGNVAIPQVTSVESKPLPPLAFKHTVGHIILSEHKGVKFNCSISVPNIYQDTTISWWKDGKELLGAHHAITQFYPDDEVTAIIASFSITSVQRSDNGSYICKMKINNEEIVSDPIYIEVQGLPHFTKQPESMNVTRNTAFNLTCQAVGPPEPVNIFWVQNSSRVNEQPEKSPSVLTVPGLTEMAVFSCEAHNDKGLTVSKGVQINIKAIPSPPTEVSIRNSTAHSILISWVPGFDGYSPFRNCSIQVKEADPLSNGSVMIFNTSALPHLYQIKQLQALANYSIGVSCMNEIGWSAVSPWILASTTEGAPSVAPLNVTVFLNESSDNVDIRWMKPPTKQQDGELVGYRISHVWQSAGISKELLEEVGQNGSRARISVQVHNATCTVRIAAVTRGGVGPFSDPVKIFIPAHGWVDYAPSSTPAPGNADPVLIIFGCFCGFILIGLILYISLAIRKRVQETKFGNAFTEEDSELVVNYIAKKSFCRRAIELTLHSLGVSEELQNKLEDVVIDRNLLILGKILGEGEFGSVMEGNLKQEDGTSLKVAVKTMKLDNSSQREIEEFLSEAACMKDFSHPNVIRLLGVCIEMSSQGIPKPMVILPFMKYGDLHTYLLYSRLETGPKHIPLQTLLKFMVDIALGMEYLSNRNFLHRDLAARNCMLRDDMTVCVADFGLSKKIYSGDYYRQGRIAKMPVKWIAIESLADRVYTSKSDVWAFGVTMWEIATRGMTPYPGVQNHEMYDYLLHGHRLKQPEDCLDELYEIMYSCWRTDPLDRPTFSVLRLQLEKLLESLPDVRNQADVIYVNTQLLESSEGLAQGSTLAPLDLNIDPDSIIASCTPRAAISVVTAEVHDSKPHEGRYILNGGSEEWEDLTSAPSAAVTAEKNSVLPGERLVRNGVSWSHSSMLPLGSSLPDELLFADDSSEGSEVLM 1 502 524 STPAPGNADP VLIIFGCFCGFILIGLILYISLA IRKRVQETKF +P08581 MKAPAVLAPGILVLLFTLVQRSNGECKEALAKSEMNVNMKYQLPNFTAETPIQNVILHEHHIFLGATNYIYVLNEEDLQKVAEYKTGPVLEHPDCFPCQDCSSKANLSGGVWKDNINMALVVDTYYDDQLISCGSVNRGTCQRHVFPHNHTADIQSEVHCIFSPQIEEPSQCPDCVVSALGAKVLSSVKDRFINFFVGNTINSSYFPDHPLHSISVRRLKETKDGFMFLTDQSYIDVLPEFRDSYPIKYVHAFESNNFIYFLTVQRETLDAQTFHTRIIRFCSINSGLHSYMEMPLECILTEKRKKRSTKKEVFNILQAAYVSKPGAQLARQIGASLNDDILFGVFAQSKPDSAEPMDRSAMCAFPIKYVNDFFNKIVNKNNVRCLQHFYGPNHEHCFNRTLLRNSSGCEARRDEYRTEFTTALQRVDLFMGQFSEVLLTSISTFIKGDLTIANLGTSEGRFMQVVVSRSGPSTPHVNFLLDSHPVSPEVIVEHTLNQNGYTLVITGKKITKIPLNGLGCRHFQSCSQCLSAPPFVQCGWCHDKCVRSEECLSGTWTQQICLPAIYKVFPNSAPLEGGTRLTICGWDFGFRRNNKFDLKKTRVLLGNESCTLTLSESTMNTLKCTVGPAMNKHFNMSIIISNGHGTTQYSTFSYVDPVITSISPKYGPMAGGTLLTLTGNYLNSGNSRHISIGGKTCTLKSVSNSILECYTPAQTISTEFAVKLKIDLANRETSIFSYREDPIVYEIHPTKSFISGGSTITGVGKNLNSVSVPRMVINVHEAGRNFTVACQHRSNSEIICCTTPSLQQLNLQLPLKTKAFFMLDGILSKYFDLIYVHNPVFKPFEKPVMISMGNENVLEIKGNDIDPEAVKGEVLKVGNKSCENIHLHSEAVLCTVPNDLLKLNSELNIEWKQAISSTVLGKVIVQPDQNFTGLIAGVVSISTALLLLLGFFLWLKKRKQIKDLGSELVRYDARVHTPHLDRLVSARSVSPTTEMVSNESVDYRATFPEDQFPNSSQNGSCRQVQYPLTDMSPILTSGDSDISSPLLQNTVHIDLSALNPELVQAVQHVVIGPSSLIVHFNEVIGRGHFGCVYHGTLLDNDGKKIHCAVKSLNRITDIGEVSQFLTEGIIMKDFSHPNVLSLLGICLRSEGSPLVVLPYMKHGDLRNFIRNETHNPTVKDLIGFGLQVAKGMKYLASKKFVHRDLAARNCMLDEKFTVKVADFGLARDMYDKEYYSVHNKTGAKLPVKWMALESLQTQKFTTKSDVWSFGVLLWELMTRGAPPYPDVNTFDITVYLLQGRRLLQPEYCPDPLYEVMLKCWHPKAEMRPSFSELVSRISAIFSTFIGEHYVHVNATYVNVKCVAPYPSLLSSEDNADDEVDTRPASFWETS 1 933 955 VIVQPDQNFT GLIAGVVSISTALLLLLGFFLWL KKRKQIKDLG +P15941 MTPGTQSPFFLLLLLTVLTVVTGSGHASSTPGGEKETSATQRSSVPSSTEKNAVSMTSSVLSSHSPGSGSSTTQGQDVTLAPATEPASGSAATWGQDVTSVPVTRPALGSTTPPAHDVTSAPDNKPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDTRPAPGSTAPPAHGVTSAPDNRPALGSTAPPVHNVTSASGSASGSASTLVHNGTSARATTTPASKSTPFSIPSHHSDTPTTLASHSTKTDASSTHHSSVPPLTSSNHSTSPQLSTGVSFFFLSFHISNLQFNSSLEDPSTDYYQELQRDISEMFLQIYKQGGFLGLSNIKFRPGSVVVQLTLAFREGTINVHDVETQFNQYKTEAASRYNLTISDVSVSDVPFPFSAQSGAGVPGWGIALLVLVCVLVALAIVYLIALAVCQCRRKNYGQLDIFPARDTYHPMSEYPTYHTHGRYVPPSSTDRSPYEKVSAGNGGSSLSYTNPAVAATSANL 1 1159 1181 SAQSGAGVPG WGIALLVLVCVLVALAIVYLIAL AVCQCRRKNY +Q9JKF6 MARMGLAGAAGRWWGLALGLTAFFLPGTHTQVVQVNDSMYGFIGTDVVLHCSFANPLPSVKITQVTWQKASNGSKQNMAIYNPTMGVSVLPPYEKRVEFLRPSFIDGTIRLSGLELEDEGMYICEFATFPTGNRESQLNLTVMAKPTNWIEGTRAVLRARKGQDDKVLVATCTSANGKPPSAVSWETRLKGEAEYQEIRNPNGTVTVISRYRLVPSREAHRQSLACIVNYHLDRFRESLTLNVQYEPEVTIEGFDGNWYLQRTDVKLTCKADANPPATEYHWTTLNGSLPKGVEAQNRTLFFRGPITYSLAGTYICEATNPIGTRSGQVEVNITEFPYTPTPEHGRRAGQMPTAIIGGVAGSVLLVLIVVGGIIVALRRRRHTFKGDYSTKKHVYGNGYSKAGIPQHHPPMAQNLQYPDDSDDEKKAGPLGGSSYEEEEEEEGGGGGERKVGGPHPKYDEDAKRPYFTVDEAEARQDGYGDRTLGYQYDPEQLDLAENMVSQNDGSFISKKEWYV 1 355 377 GRRAGQMPTA IIGGVAGSVLLVLIVVGGIIVAL RRRRHTFKGD +Q62765 MALPRCMWPNYVWRAMMACVVHRGSGAPLTLCLLGCLLQTFHVLSQKLDDVDPLVTTNFGKIRGIKKELNNEILGPVIQFLGVPYAAPPTGEHRFQPPEPPSPWSDIRNATQFAPVCPQNIIDGRLPEVMLPVWFTNNLDVVSSYVQDQSEDCLYLNIYVPTEDVKRISKECARKPGKKICRKGDIRDSGGPKPVMVYIHGGSYMEGTGNLYDGSVLASYGNVIVITVNYRLGVLGFLSTGDQAAKGNYGLLDLIQALRWTSENIGFFGGDPLRITVFGSGAGGSCVNLLTLSHYSEGNRWSNSTKGLFQRAIAQSGTALSSWAVSFQPAKYARILATKVGCNVSDTVELVECLQKKPYKELVDQDVQPARYHIAFGPVIDGDVIPDDPQILMEQGEFLNYDIMLGVNQGEGLKFVENIVDSDDGVSASDFDFAVSNFVDNLYGYPEGKDVLRETIKFMYTDWADRHNPETRRKTLLALFTDHQWVAPAVATADLHSNFGSPTYFYAFYHHCQTDQVPAWADAAHGDEVPYVLGIPMIGPTELFPCNFSKNDVMLSAVVMTYWTNFAKTGDPNQPVPQDTKFIHTKPNRFEEVAWTRYSQKDQLYLHIGLKPRVKEHYRANKVNLWLELVPHLHNLNDISQYTSTTTKVPSTDITLRPTRKNSTPVTSAFPTAKQDDPKQQPSPFSVDQRDYSTELSVTIAVGASLLFLNILAFAALYYKKDKRRHDVHRRCSPQRTTTNDLTHAPEEEIMSLQMKHTDLDHECESIHPHEVVLRTACPPDYTLAMRRSPDDVPLMTPNTITMIPNTIPGIQPLHTFNTFTGGQNNTLPHPHPHPHSHSTTRV 1 697 719 VDQRDYSTEL SVTIAVGASLLFLNILAFAALYY KKDKRRHDVH +O35516 MPALRPAALRALLWLWLCGAGPAHALQCRGGQEPCVNEGTCVTYHNGTGFCRCPEGFLGEYCQHRDPCEKNRCQNGGTCVPQGMLGKATCRCAPGFTGEDCQYSTSHPCFVSRPCQNGGTCHMLSRDTYECTCQVGFTGKQCQWTDACLSHPCENGSTCTSVASQFSCKCPAGLTGQKCEADINECDIPGRCQHGGTCLNLPGSYRCQCPQGFTGQHCDSPYVPCAPSPCVNGGTCRQTGDFTFECNCLPGFEGSTCERNIDDCPNHKCQNGGVCVDGVNTYNCRCPPQWTGQFCTEDVDECLLQPNACQNGGTCTNRNGGYGCVCVNGWSGDDCSENIDDCAYASCTPGSTCIDRVASFSCLCPEGKAGLLCHLDDACISNPCHKGALCDTNPLNGQYICTCPQGYKGADCTEDVDECAMANSNPCEHAGKCVNTDGAFHCECLKGYAGPRCEMDINECHSDPCQNDATCLDKIGGFTCLCMPGFKGVHCELEVNECQSNPCVNNGQCVDKVNRFQCLCPPGFTGPVCQIDIDDCSSTPCLNGAKCIDHPNGYECQCATGFTGILCDENIDNCDPDPCHHGQCQDGIDSYTCICNPGYMGAICSDQIDECYSSPCLNDGRCIDLVNGYQCNCQPGTSGLNCEINFDDCASNPCMHGVCVDGINRYSCVCSPGFTGQRCNIDIDECASNPCRKGATCINDVNGFRCICPEGPHHPSCYSQVNECLSNPCIHGNCTGGLSGYKCLCDAGWVGVNCEVDKNECLSNPCQNGGTCNNLVNGYRCTCKKGFKGYNCQVNIDECASNPCLNQGTCFDDVSGYTCHCMLPYTGKNCQTVLAPCSPNPCENAAVCKEAPNFESFSCLCAPGWQGKRCTVDVDECISKPCMNNGVCHNTQGSYVCECPPGFSGMDCEEDINDCLANPCQNGGSCVDHVNTFSCQCHPGFIGDKCQTDMNECLSEPCKNGGTCSDYVNSYTCTCPAGFHGVHCENNIDECTESSCFNGGTCVDGINSFSCLCPVGFTGPFCLHDINECSSNPCLNAGTCVDGLGTYRCICPLGYTGKNCQTLVNLCSRSPCKNKGTCVQEKARPHCLCPPGWDGAYCDVLNVSCKAAALQKGVPVEHLCQHSGICINAGNTHHCQCPLGYTGSYCEEQLDECASNPCQHGATCNDFIGGYRCECVPGYQGVNCEYEVDECQNQPCQNGGTCIDLVNHFKCSCPPGTRGLLCEENIDECAGGPHCLNGGQCVDRIGGYTCRCLPGFAGERCEGDINECLSNPCSSEGSLDCVQLKNNYNCICRSAFTGRHCETFLDVCPQKPCLNGGTCAVASNMPDGFICRCPPGFSGARCQSSCGQVKCRRGEQCIHTDSGPRCFCLNPKDCESGCASNPCQHGGTCYPQRQPPHYSCRCPPSFGGSHCELYTAPTSTPPATCQSQYCADKARDGICDEACNSHACQWDGGDCSLTMEDPWANCTSTLRCWEYINNQCDEQCNTAECLFDNFECQRNSKTCKYDKYCADHFKDNHCDQGCNSEECGWDGLDCASDQPENLAEGTLIIVVLLPPEQLLQDSRSFLRALGTLLHTNLRIKQDSQGALMVYPYFGEKSAAMKKQKMTRRSLPEEQEQEQEVIGSKIFLEIDNRQCVQDSDQCFKNTDAAAALLASHAIQGTLSYPLVSVFSELESPRNAQLLYLLAVAVVIILFFILLGVIMAKRKRKHGFLWLPEGFTLRRDSSNHKRREPVGQDAVGLKNLSVQVSEANLIGSGTSEHWVDDEGPQPKKAKAEDEALLSEDDPIDRRPWTQQHLEAADIRHTPSLALTPPQAEQEVDVLDVNVRGPDGCTPLMLASLRGGSSDLSDEDEDAEDSSANIITDLVYQGASLQAQTDRTGEMALHLAARYSRADAAKRLLDAGADANAQDNMGRCPLHAAVAADAQGVFQILIRNRVTDLDARMNDGTTPLILAARLAVEGMVAELINCQADVNAVDDHGKSALHWAAAVNNVEATLLLLKNGANRDMQDNKEETPLFLAAREGSYEAAKILLDHFANRDITDHMDRLPRDVARDRMHHDIVRLLDEYNVTPSPPGTVLTSALSPVLCGPNRSFLSLKHTPMGKKARRPNTKSTMPTSLPNLAKEAKDAKGSRRKKCLNEKVQLSESSVTLSPVDSLESPHTYVSDATSSPMITSPGILQASPTPLLAAAAPAAPVHTQHALSFSNLHDMQPLAPGASTVLPSVSQLLSHHHIAPPGSSSAGSLGRLHPVPVPADWMNRVEMNETQYSEMFGMVLAPAEGAHPGIAAPQSRPPEGKHMSTQREPLPPIVTFQLIPKGSIAQAAGAPQTQSSCPPAVAGPLPSMYQIPEMPRLPSVAFPPTMMPQQEGQVAQTIVPTYHPFPASVGKYPTPPSQHSYASSNAAERTPSHGGHLQGEHPYLTPSPESPDQWSSSSPHSASDWSDVTTSPTPGGGGGGQRGPGTHMSEPPHSNMQVYA 1 1680 1702 SELESPRNAQ LLYLLAVAVVIILFFILLGVIMA KRKRKHGFLW +Q61982 MGLGARGRRRRRRLMALPPPPPPMRALPLLLLLAGLGAAAPPCLDGSPCANGGRCTHQQPSLEAACLCLPGWVGERCQLEDPCHSGPCAGRGVCQSSVVAGTARFSCRCLRGFQGPDCSQPDPCVSRPCVHGAPCSVGPDGRFACACPPGYQGQSCQSDIDECRSGTTCRHGGTCLNTPGSFRCQCPLGYTGLLCENPVVPCAPSPCRNGGTCRQSSDVTYDCACLPGFEGQNCEVNVDDCPGHRCLNGGTCVDGVNTYNCQCPPEWTGQFCTEDVDECQLQPNACHNGGTCFNLLGGHSCVCVNGWTGESCSQNIDDCATAVCFHGATCHDRVASFYCACPMGKTGLLCHLDDACVSNPCHEDAICDTNPVSGRAICTCPPGFTGGACDQDVDECSIGANPCEHLGRCVNTQGSFLCQCGRGYTGPRCETDVNECLSGPCRNQATCLDRIGQFTCICMAGFTGTYCEVDIDECQSSPCVNGGVCKDRVNGFSCTCPSGFSGSMCQLDVDECASTPCRNGAKCVDQPDGYECRCAEGFEGTLCERNVDDCSPDPCHHGRCVDGIASFSCACAPGYTGIRCESQVDECRSQPCRYGGKCLDLVDKYLCRCPPGTTGVNCEVNIDDCASNPCTFGVCRDGINRYDCVCQPGFTGPLCNVEINECASSPCGEGGSCVDGENGFHCLCPPGSLPPLCLPANHPCAHKPCSHGVCHDAPGGFRCVCEPGWSGPRCSQSLAPDACESQPCQAGGTCTSDGIGFRCTCAPGFQGHQCEVLSPCTPSLCEHGGHCESDPDRLTVCSCPPGWQGPRCQQDVDECAGASPCGPHGTCTNLPGNFRCICHRGYTGPFCDQDIDDCDPNPCLHGGSCQDGVGSFSCSCLDGFAGPRCARDVDECLSSPCGPGTCTDHVASFTCACPPGYGGFHCEIDLPDCSPSSCFNGGTCVDGVSSFSCLCRPGYTGTHCQYEADPCFSRPCLHGGICNPTHPGFECTCREGFTGSQCQNPVDWCSQAPCQNGGRCVQTGAYCICPPGWSGRLCDIQSLPCTEAAAQMGVRLEQLCQEGGKCIDKGRSHYCVCPEGRTGSHCEHEVDPCTAQPCQHGGTCRGYMGGYVCECPAGYAGDSCEDNIDECASQPCQNGGSCIDLVARYLCSCPPGTLGVLCEINEDDCDLGPSLDSGVQCLHNGTCVDLVGGFRCNCPPGYTGLHCEADINECRPGACHAAHTRDCLQDPGGHFRCVCHPGFTGPRCQIALSPCESQPCQHGGQCRHSLGRGGGLTFTCHCVPPFWGLRCERVARSCRELQCPVGIPCQQTARGPRCACPPGLSGPSCRVSRASPSGATNASCASAPCLHGGSCLPVQSVPFFRCVCAPGWGGPRCETPSAAPEVPEEPRCPRAACQAKRGDQNCDRECNTPGCGWDGGDCSLNVDDPWRQCEALQCWRLFNNSRCDPACSSPACLYDNFDCYSGGRDRTCNPVYEKYCADHFADGRCDQGCNTEECGWDGLDCASEVPALLARGVLVLTVLLPPEELLRSSADFLQRLSAILRTSLRFRLDARGQAMVFPYHRPSPGSESRVRRELGPEVIGSVVMLEIDNRLCLQSAENDHCFPDAQSAADYLGALSAVERLDFPYPLRDVRGEPLEAPEQSVPLLPLLVAGAVFLLIIFILGVMVARRKREHSTLWFPEGFALHKDIAAGHKGRREPVGQDALGMKNMAKGESLMGEVVTDLNDSECPEAKRLKVEEPGMGAEEPEDCRQWTQHHLVAADIRVAPATALTPPQGDADADGVDVNVRGPDGFTPLMLASFCGGALEPMPAEEDEADDTSASIISDLICQGAQLGARTDRTGETALHLAARYARADAAKRLLDAGADTNAQDHSGRTPLHTAVTADAQGVFQILIRNRSTDLDARMADGSTALILAARLAVEGMVEELIASHADVNAVDELGKSALHWAAAVNNVEATLALLKNGANKDMQDSKEETPLFLAAREGSYEAAKLLLDHLANREITDHLDRLPRDVAQERLHQDIVRLLDQPSGPRSPSGPHGLGPLLCPPGAFLPGLKAVQSGTKKSRRPPGKTGLGPQGTRGRGKKLTLACPGPLADSSVTLSPVDSLDSPRPFSGPPASPGGFPLEGPYATTATAVSLAQLGASRAGPLGRQPPGGCVLSFGLLNPVAVPLDWARLPPPAPPGPSFLLPLAPGPQLLNPGAPVSPQERPPPYLAAPGHGEEYPAAGTRSSPTKARFLRVPSEHPYLTPSPESPEHWASPSPPSLSDWSDSTPSPATATNATASGALPAQPHPISVPSLPQSQTQLGPQPEVTPKRQVMA 1 1644 1666 PLEAPEQSVP LLPLLVAGAVFLLIIFILGVMVA RRKREHSTLW +P31695 MQPQLLLLLLLPLNFPVILTRELLCGGSPEPCANGGTCLRLSRGQGICQCAPGFLGETCQFPDPCRDTQLCKNGGSCQALLPTPPSSRSPTSPLTPHFSCTCPSGFTGDRCQTHLEELCPPSFCSNGGHCYVQASGRPQCSCEPGWTGEQCQLRDFCSANPCANGGVCLATYPQIQCRCPPGFEGHTCERDINECFLEPGPCPQGTSCHNTLGSYQCLCPVGQEGPQCKLRKGACPPGSCLNGGTCQLVPEGHSTFHLCLCPPGFTGLDCEMNPDDCVRHQCQNGATCLDGLDTYTCLCPKTWKGWDCSEDIDECEARGPPRCRNGGTCQNTAGSFHCVCVSGWGGAGCEENLDDCAAATCAPGSTCIDRVGSFSCLCPPGRTGLLCHLEDMCLSQPCHVNAQCSTNPLTGSTLCICQPGYSGSTCHQDLDECQMAQQGPSPCEHGGSCINTPGSFNCLCLPGYTGSRCEADHNECLSQPCHPGSTCLDLLATFHCLCPPGLEGRLCEVEVNECTSNPCLNQAACHDLLNGFQCLCLPGFTGARCEKDMDECSSTPCANGGRCRDQPGAFYCECLPGFEGPHCEKEVDECLSDPCPVGASCLDLPGAFFCLCRPGFTGQLCEVPLCTPNMCQPGQQCQGQEHRAPCLCPDGSPGCVPAEDNCPCHHGHCQRSLCVCDEGWTGPECETELGGCISTPCAHGGTCHPQPSGYNCTCPAGYMGLTCSEEVTACHSGPCLNGGSCSIRPEGYSCTCLPSHTGRHCQTAVDHCVSASCLNGGTCVNKPGTFFCLCATGFQGLHCEEKTNPSCADSPCRNKATCQDTPRGARCLCSPGYTGSSCQTLIDLCARKPCPHTARCLQSGPSFQCLCLQGWTGALCDFPLSCQKAAMSQGIEISGLCQNGGLCIDTGSSYFCRCPPGFQGKLCQDNVNPCEPNPCHHGSTCVPQPSGYVCQCAPGYEGQNCSKVLDACQSQPCHNHGTCTSRPGGFHCACPPGFVGLRCEGDVDECLDRPCHPSGTAACHSLANAFYCQCLPGHTGQRCEVEMDLCQSQPCSNGGSCEITTGPPPGFTCHCPKGFEGPTCSHKALSCGIHHCHNGGLCLPSPKPGSPPLCACLSGFGGPDCLTPPAPPGCGPPSPCLHNGTCTETPGLGNPGFQCTCPPDSPGPRCQRPGASGCEGRGGDGTCDAGCSGPGGDWDGGDCSLGVPDPWKGCPPHSQCWLLFRDGRCHPQCDSEECLFDGYDCEIPLTCIPAYDQYCRDHFHNGHCEKGCNNAECGWDGGDCRPEGEDSEGRPSLALLVVLRPPALDQQLLALARVLSLTLRVGLWVRKDSEGRNMVFPYPGTRAKEELSGARDSSSWERQAPPTQPLGKETESLGAGFVVVMGVDLSRCGPEHPASRCPWDSGLLLRFLAAMAAVGALEPLLPGPLLAAHPQAGTRPSANQLPWPILCSPVVGVLLLALGALLVLQLIRRRRREHGALWLPPGFIRRPQTQQAPHRRRPPLGEDNIGLKALKPEAEVDEDGVAMCSGPEEGEAEETASASRCQLWPLNSGCGELPQAAMLTPPQECESEVLDVDTCGPDGVTPLMSAVFCGGVQSTTGASPQRLGLGNLEPWEPLLDRGACPQAHTVGTGETPLHLAARFSRPTAARRLLEAGANPNQPDRAGRTPLHTAVAADAREVCQLLLASRQTTVDARTEDGTTPLMLAARLAVEDLVEELIAARADVGARDKRGKTALHWAAAVNNARAARSLLQAGADKDAQDSREQTPLFLAAREGAVEVAQLLLELGAARGLRDQAGLAPGDVARQRSHWDLLTLLEGAGPTTQEARAHARTTPGGGAAPRCRTLSAGARPRGGGACLQARTWSVDLGARGGKVYARCRSRSGSCGGPTTRGRRFSAGSRGRRGARASQDDWPRDWVALEACGSACSAPIPPPSLTPSPERGSPQVAWGLPVHQEIPLNSVVRNLN 1 1441 1463 QAGTRPSANQ LPWPILCSPVVGVLLLALGALLV LQLIRRRRRE +Q8CJ26 MLYNVSKGVVYSDTALQGQDGDREGMWVGAGGALAPNTSSLFPPEPPGASSNIIPVYCALLATVILGLLAYVAFKCWRSHKQRQQLAKARTVELGDPDRDQRRGDSNVFVDSPPSLEPCIPSQGPHPDLGCQLYLHIPQQQQEEVQRLLMMGEPAKGWQELAGHLGYQAEAVETMACDQMPAYTLLRNWAAQEGNRATLRVLEDALAAIGREDVVQVLSSPAESSSVV 1 52 74 FPPEPPGASS NIIPVYCALLATVILGLLAYVAF KCWRSHKQRQ +Q63373 MYQRMLRCGAELGSPGGGSSGGAGGRLALLWIVPLTLSGLLGVAWGASSLGAHHIHHFHGSSKHHSVPIAIYRSPASLRGGHAGTTYIFSKGGGQITYKWPPNDRPSTRADRLAIGFSTVQKEAVLVRVDSSSGLGDYLELHIHQGKIGVKFNVGTDDIAIEESNAIINDGKYHVVRFTRSGGNATLQVDSWPVIERYPAGNNDNERLAIARQRIPYRLGRVVDEWLLDKGRQLTIFNSQATIIIGGKEQGQPFQGQLSGLYYNGLKVLNMAAENDANIAIVGNVRLVGEVPSSMTTESTATAMQSEMSTSIMETTTTLATSTARRGKPPTKEPISQTTDDILVASAECPSDDEDIDPCEPSSGGLANPTRVGGREPYPGSAEVIRESSSTTGMVVGIVAAAALCILILLYAMYKYRNRDEGSYHVDESRNYISNSAQSNGAVVKEKQPSSAKSANKNKKNKDKEYYV 1 392 414 AEVIRESSST TGMVVGIVAAAALCILILLYAMY KYRNRDEGSY +P15209 MSPWLKWHGPAMARLWGLCLLVLGFWRASLACPTSCKCSSARIWCTEPSPGIVAFPRLEPNSVDPENITEILIANQKRLEIINEDDVEAYVGLRNLTIVDSGLKFVAYKAFLKNSNLRHINFTRNKLTSLSRRHFRHLDLSDLILTGNPFTCSCDIMWLKTLQETKSSPDTQDLYCLNESSKNMPLANLQIPNCGLPSARLAAPNLTVEEGKSVTLSCSVGGDPLPTLYWDVGNLVSKHMNETSHTQGSLRITNISSDDSGKQISCVAENLVGEDQDSVNLTVHFAPTITFLESPTSDHHWCIPFTVRGNPKPALQWFYNGAILNESKYICTKIHVTNHTEYHGCLQLDNPTHMNNGDYTLMAKNEYGKDERQISAHFMGRPGVDYETNPNYPEVLYEDWTTPTDIGDTTNKSNEIPSTDVADQSNREHLSVYAVVVIASVVGFCLLVMLLLLKLARHSKFGMKGPASVISNDDDSASPLHHISNGSNTPSSSEGGPDAVIIGMTKIPVIENPQYFGITNSQLKPDTFVQHIKRHNIVLKRELGEGAFGKVFLAECYNLCPEQDKILVAVKTLKDASDNARKDFHREAELLTNLQHEHIVKFYGVCVEGDPLIMVFEYMKHGDLNKFLRAHGPDAVLMAEGNPPTELTQSQMLHIAQQIAAGMVYLASQHFVHRDLATRNCLVGENLLVKIGDFGMSRDVYSTDYYRVGGHTMLPIRWMPPESIMYRKFTTESDVWSLGVVLWEIFTYGKQPWYQLSNNEVIECITQGRVLQRPRTCPQEVYELMLGCWQREPHTRKNIKSIHTLLQNLAKASPVYLDILG 1 431 453 VADQSNREHL SVYAVVVIASVVGFCLLVMLLLL KLARHSKFGM +Q86YL7 MWKVSALLFVLGSASLWVLAEGASTGQPEDDTETTGLEGGVAMPGAEDDVVTPGTSEDRYKSGLTTLVATSVNSVTGIRIEDLPTSESTVHAQEQSPSATASNVATSHSTEKVDGDTQTTVEKDGLSTVTLVGIIVGVLLAIGFIGAIIVVVMRKMSGRYSP 1 130 152 TVEKDGLSTV TLVGIIVGVLLAIGFIGAIIVVV MRKMSGRYSP +Q13308 MGAARGSPARPRRLPLLSVLLLPLLGGTQTAIVFIKQPSSQDALQGRRALLRCEVEAPGPVHVYWLLDGAPVQDTERRFAQGSSLSFAAVDRLQDSGTFQCVARDDVTGEEARSANASFNIKWIEAGPVVLKHPASEAEIQPQTQVTLRCHIDGHPRPTYQWFRDGTPLSDGQSNHTVSSKERNLTLRPAGPEHSGLYSCCAHSAFGQACSSQNFTLSIADESFARVVLAPQDVVVARYEEAMFHCQFSAQPPPSLQWLFEDETPITNRSRPPHLRRATVFANGSLLLTQVRPRNAGIYRCIGQGQRGPPIILEATLHLAEIEDMPLFEPRVFTAGSEERVTCLPPKGLPEPSVWWEHAGVRLPTHGRVYQKGHELVLANIAESDAGVYTCHAANLAGQRRQDVNITVATVPSWLKKPQDSQLEEGKPGYLDCLTQATPKPTVVWYRNQMLISEDSRFEVFKNGTLRINSVEVYDGTWYRCMSSTPAGSIEAQARVQVLEKLKFTPPPQPQQCMEFDKEATVPCSATGREKPTIKWERADGSSLPEWVTDNAGTLHFARVTRDDAGNYTCIASNGPQGQIRAHVQLTVAVFITFKVEPERTTVYQGHTALLQCEAQGDPKPLIQWKGKDRILDPTKLGPRMHIFQNGSLVIHDVAPEDSGRYTCIAGNSCNIKHTEAPLYVVDKPVPEESEGPGSPPPYKMIQTIGLSVGAAVAYIIAVLGLMFYCKKRCKAKRLQKQPEGEEPEMECLNGGPLQNGQPSAEIQEEVALTSLGSGPAATNKRHSTSDKMHFPRSSLQPITTLGKSEFGEVFLAKAQGLEEGVAETLVLVKSLQSKDEQQQLDFRRELEMFGKLNHANVVRLLGLCREAEPHYMVLEYVDLGDLKQFLRISKSKDEKLKSQPLSTKQKVALCTQVALGMEHLSNNRFVHKDLAARNCLVSAQRQVKVSALGLSKDVYNSEYYHFRQAWVPLRWMSPEAILEGDFSTKSDVWAFGVLMWEVFTHGEMPHGGQADDEVLADLQAGKARLPQPEGCPSKLYRLMQRCWALSPKDRPSFSEIASALGDSTVDSKP 1 704 726 GSPPPYKMIQ TIGLSVGAAVAYIIAVLGLMFYC KKRCKAKRLQ +P10586 MAPEPAPGRTMVPLVPALVMLGLVAGAHGDSKPVFIKVPEDQTGLSGGVASFVCQATGEPKPRITWMKKGKKVSSQRFEVIEFDDGAGSVLRIQPLRVQRDEAIYECTATNSLGEINTSAKLSVLEEEQLPPGFPSIDMGPQLKVVEKARTATMLCAAGGNPDPEISWFKDFLPVDPATSNGRIKQLRSGALQIESSEESDQGKYECVATNSAGTRYSAPANLYVRVRRVAPRFSIPPSSQEVMPGGSVNLTCVAVGAPMPYVKWMMGAEELTKEDEMPVGRNVLELSNVVRSANYTCVAISSLGMIEATAQVTVKALPKPPIDLVVTETTATSVTLTWDSGNSEPVTYYGIQYRAAGTEGPFQEVDGVATTRYSIGGLSPFSEYAFRVLAVNSIGRGPPSEAVRARTGEQAPSSPPRRVQARMLSASTMLVQWEPPEEPNGLVRGYRVYYTPDSRRPPNAWHKHNTDAGLLTTVGSLLPGITYSLRVLAFTAVGDGPPSPTIQVKTQQGVPAQPADFQAEVESDTRIQLSWLLPPQERIIMYELVYWAAEDEDQQHKVTFDPTSSYTLEDLKPDTLYRFQLAARSDMGVGVFTPTIEARTAQSTPSAPPQKVMCVSMGSTTVRVSWVPPPADSRNGVITQYSVAYEAVDGEDRGRHVVDGISREHSSWDLVGLEKWTEYRVWVRAHTDVGPGPESSPVLVRTDEDVPSGPPRKVEVEPLNSTAVHVYWKLPVPSKQHGQIRGYQVTYVRLENGEPRGLPIIQDVMLAEAQWRPEESEDYETTISGLTPETTYSVTVAAYTTKGDGARSKPKIVTTTGAVPGRPTMMISTTAMNTALLQWHPPKELPGELLGYRLQYCRADEARPNTIDFGKDDQHFTVTGLHKGTTYIFRLAAKNRAGLGEEFEKEIRTPEDLPSGFPQNLHVTGLTTSTTELAWDPPVLAERNGRIISYTVVFRDINSQQELQNITTDTRFTLTGLKPDTTYDIKVRAWTSKGSGPLSPSIQSRTMPVEQVFAKNFRVAAAMKTSVLLSWEVPDSYKSAVPFKILYNGQSVEVDGHSMRKLIADLQPNTEYSFVLMNRGSSAGGLQHLVSIRTAPDLLPHKPLPASAYIEDGRFDLSMPHVQDPSLVRWFYIVVVPIDRVGGSMLTPRWSTPEELELDELLEAIEQGGEEQRRRRRQAERLKPYVAAQLDVLPETFTLGDKKNYRGFYNRPLSPDLSYQCFVLASLKEPMDQKRYASSPYSDEIVVQVTPAQQQEEPEMLWVTGPVLAVILIILIVIAILLFKRKRTHSPSSKDEQSIGLKDSLLAHSSDPVEMRRLNYQTPGMRDHPPIPITDLADNIERLKANDGLKFSQEYESIDPGQQFTWENSNLEVNKPKNRYANVIAYDHSRVILTSIDGVPGSDYINANYIDGYRKQNAYIATQGPLPETMGDFWRMVWEQRTATVVMMTRLEEKSRVKCDQYWPARGTETCGLIQVTLLDTVELATYTVRTFALHKSGSSEKRELRQFQFMAWPDHGVPEYPTPILAFLRRVKACNPLDAGPMVVHCSAGVGRTGCFIVIDAMLERMKHEKTVDIYGHVTCMRSQRNYMVQTEDQYVFIHEALLEAATCGHTEVPARNLYAHIQKLGQVPPGESVTAMELEFKLLASSKAHTSRFISANLPCNKFKNRLVNIMPYELTRVCLQPIRGVEGSDYINASFLDGYRQQKAYIATQGPLAESTEDFWRMLWEHNSTIIVMLTKLREMGREKCHQYWPAERSARYQYFVVDPMAEYNMPQYILREFKVTDARDGQSRTIRQFQFTDWPEQGVPKTGEGFIDFIGQVHKTKEQFGQDGPITVHCSAGVGRTGVFITLSIVLERMRYEGVVDMFQTVKTLRTQRPAMVQTEDQYQLCYRAALEYLGSFDHYAT 1 1262 1284 PAQQQEEPEM LWVTGPVLAVILIILIVIAILLF KRKRTHSPSS +P28828 MRTLGTCLVTLAGLLLTAAGETFSGGCLFDEPYSTCGYSQADEDDFNWEQVNTLTKPTSDPWMPSGSFMLVNTSGKPEGQRAHLLLPQLKENDTHCIDFHYFVSSKSNAAPGLLNVYVKVNNGPLGNPIWNISGDPTRTWHRAELAISTFWPNFYQVIFEVVTSGHQGYLAIDEVKVLGHPCTRTPHFLRIQNVEVNAGQFATFQCSAIGRTVAGDRLWLQGIDVRDAPLKEIKVTSSRRFIASFNVVNTTKRDAGKYRCMICTEGGVGISNYAELVVKEPPVPIAPPQLASVGATYLWIQLNANSINGDGPIVAREVEYCTASGSWNDRQPVDSTSYKIGHLDPDTEYEISVLLTRPGEGGTGSPGPALRTRTKCADPMRGPRKLEVVEVKSRQITIRWEPFGYNVTRCHSYNLTVHYGYQVGGQEQVREEVSWDTDNSHPQHTITNLSPYTNVSVKLILMNPEGRKESQELTVQTDEDLPGAVPTESIQGSAFEEKIFLQWREPTQTYGVITLYEITYKAVSSFDPEIDLSNQSGRVSKLGNETHFLFFGLYPGTTYSFTIRASTAKGFGPPATNQFTTKISAPSMPAYEFETPLNQTDNTVTVMLKPAQSRGAPVSVYQIVVEEERPRRTKKTTEILKCYPVPIHFQNASILNSQYYFAAEFPADSLQAAQPFTIGDNKTYNGYWNTPLLPHKSYRIYYQAASRANGETKIDCVRVATKGAVTPKPVPEPEKQTDHTVKIAGVIAGILLFVIIFLGVVLVMKKRKLAKKRKETMSSTRQEMTVMVNSMDKSYAEQGTNCDEAFSFMGTHNLNGRSVSSPSSFTMKTNTLSTSVPNSYYPDETHTMASDTSSLAQPHTYKKREAADVPYQTGQLHPAIRVADLLQHITQMKCAEGYGFKEEYESFFEGQSAPWDSAKKDENRMKNRYGNIIAYDHSRVRLQMLEGDNNSDYINGNYIDGYHRPNHYIATQGPMQETIYDFWRMVWHENTASIIMVTNLVEVGRVKCCKYWPDDTEIYKDIKVTLIDTELLAEYVIRTFAVEKRGIHEIREIRQFHFTGWPDHGVPYHATGLLGFVRQVKSKSPPNAGPLVVHCSAGAGRTGCFIVIDIMLDMAEREGVVDIYNCVRELRSRRVNMVQTEEQYVFIHDAILEACLCGDTSIPASQVRSLYYDMNKLDPQTNSSQIKEEFRTLNMVTPTLRVEDCSIALLPRNHEKNRCMDILPPDRCLPFLITIDGESSNYINAALMDSYKQPSAFIVTQHPLPNTVKDFWRLVLDYHCTSVVMLNDVDPAQLCPQYWPENGVHRHGPIQVEFVSADLEEDIISRIFRIYNASRPQDGHRMVQQFQFLGWPMYRDTPVSKRSFLKLIRQVDKWQEEYNGGEGRTVVHCLNGGGRSGTFCAISIVCEMLRHQRTVDVFHAVKTLRNNKPNMVDLLDQYKFCYEVALEYLNSG 1 743 764 PEKQTDHTVK IAGVIAGILLFVIIFLGVVLVM KKRKLAKKRK +Q7M729 MSRAGNRGNTQARWLGTGLLGLFLLPMYLSLEVSVGKATTIYAINGSSILLPCTFSSCYGFENLYFKWSYNNSETSRILIDGIVKNDKSDPKVRVKDDDRITLEGSTKEKTNNISILLSDLEFSDTGRYTCFVRNPKEKDLNNSATIFLQVVDKLEKVDNTVTLIILAVVGGVIGLLVCILLLKKLITFILKKTREKKKECLVSSSGNDNTENGLPGSKAEEKPPTKV 1 161 183 VVDKLEKVDN TVTLIILAVVGGVIGLLVCILLL KKLITFILKK +O75056 MKPGPPHRAGAAHGAGAGAGAAAGPGARGLLLPPLLLLLLAGRAAGAQRWRSENFERPVDLEGSGDDDSFPDDELDDLYSGSGSGYFEQESGIETAMRFSPDVALAVSTTPAVLPTTNIQPVGTPFEELPSERPTLEPATSPLVVTEVPEEPSQRATTVSTTMATTAATSTGDPTVATVPATVATATPSTPAAPPFTATTAVIRTTGVRRLLPLPLTTVATARATTPEAPSPPTTAAVLDTEAPTPRLVSTATSRPRALPRPATTQEPDIPERSTLPLGTTAPGPTEVAQTPTPETFLTTIRDEPEVPVSGGPSGDFELPEEETTQPDTANEVVAVGGAAAKASSPPGTLPKGARPGPGLLDNAIDSGSSAAQLPQKSILERKEVLVAVIVGGVVGALFAAFLVTLLIYRMKKKDEGSYTLEEPKQASVTYQKPDKQEEFYA 1 387 409 KSILERKEVL VAVIVGGVVGALFAAFLVTLLIY RMKKKDEGSY +P78324 MEPAGPAPGRLGPLLCLLLAASCAWSGVAGEEELQVIQPDKSVLVAAGETATLRCTATSLIPVGPIQWFRGAGPGRELIYNQKEGHFPRVTTVSDLTKRNNMDFSIRIGNITPADAGTYYCVKFRKGSPDDVEFKSGAGTELSVRAKPSAPVVSGPAARATPQHTVSFTCESHGFSPRDITLKWFKNGNELSDFQTNVDPVGESVSYSIHSTAKVVLTREDVHSQVICEVAHVTLQGDPLRGTANLSETIRVPPTLEVTQQPVRAENQVNVTCQVRKFYPQRLQLTWLENGNVSRTETASTVTENKDGTYNWMSWLLVNVSAHRDDVKLTCQVEHDGQPAVSKSHDLKVSAHPKEQGSNTAAENTGSNERNIYIVVGVVCTLLVALLMAALYLVRIRQKKAQGSTSSTRLHEPEKNAREITQDTNDITYADLNLPKGKKPAPQAAEPNNHTEYASIQTSPQPASEDTLTYADLDMVHLNRTPKQPAPKPEPSFSEYASVQVPRK 1 372 394 AENTGSNERN IYIVVGVVCTLLVALLMAALYLV RIRQKKAQGS +Q92673 MATRSSRRESRLPFLFTLVALLPPGALCEVWTQRLHGGSAPLPQDRGFLVVQGDPRELRLWARGDARGASRADEKPLRRKRSAALQPEPIKVYGQVSLNDSHNQMVVHWAGEKSNVIVALARDSLALARPKSSDVYVSYDYGKSFKKISDKLNFGLGNRSEAVIAQFYHSPADNKRYIFADAYAQYLWITFDFCNTLQGFSIPFRAADLLLHSKASNLLLGFDRSHPNKQLWKSDDFGQTWIMIQEHVKSFSWGIDPYDKPNTIYIERHEPSGYSTVFRSTDFFQSRENQEVILEEVRDFQLRDKYMFATKVVHLLGSEQQSSVQLWVSFGRKPMRAAQFVTRHPINEYYIADASEDQVFVCVSHSNNRTNLYISEAEGLKFSLSLENVLYYSPGGAGSDTLVRYFANEPFADFHRVEGLQGVYIATLINGSMNEENMRSVITFDKGGTWEFLQAPAFTGYGEKINCELSQGCSLHLAQRLSQLLNLQLRRMPILSKESAPGLIIATGSVGKNLASKTNVYISSSAGARWREALPGPHYYTWGDHGGIITAIAQGMETNELKYSTNEGETWKTFIFSEKPVFVYGLLTEPGEKSTVFTIFGSNKENVHSWLILQVNATDALGVPCTENDYKLWSPSDERGNECLLGHKTVFKRRTPHATCFNGEDFDRPVVVSNCSCTREDYECDFGFKMSEDLSLEVCVPDPEFSGKSYSPPVPCPVGSTYRRTRGYRKISGDTCSGGDVEARLEGELVPCPLAEENEFILYAVRKSIYRYDLASGATEQLPLTGLRAAVALDFDYEHNCLYWSDLALDVIQRLCLNGSTGQEVIINSGLETVEALAFEPLSQLLYWVDAGFKKIEVANPDGDFRLTIVNSSVLDRPRALVLVPQEGVMFWTDWGDLKPGIYRSNMDGSAAYHLVSEDVKWPNGISVDDQWIYWTDAYLECIERITFSGQQRSVILDNLPHPYAIAVFKNEIYWDDWSQLSIFRASKYSGSQMEILANQLTGLMDMKIFYKGKNTGSNACVPRPCSLLCLPKANNSRSCRCPEDVSSSVLPSGDLMCDCPQGYQLKNNTCVKQENTCLRNQYRCSNGNCINSIWWCDFDNDCGDMSDERNCPTTICDLDTQFRCQESGTCIPLSYKCDLEDDCGDNSDESHCEMHQCRSDEYNCSSGMCIRSSWVCDGDNDCRDWSDEANCTAIYHTCEASNFQCRNGHCIPQRWACDGDTDCQDGSDEDPVNCEKKCNGFRCPNGTCIPSSKHCDGLRDCSDGSDEQHCEPLCTHFMDFVCKNRQQCLFHSMVCDGIIQCRDGSDEDAAFAGCSQDPEFHKVCDEFGFQCQNGVCISLIWKCDGMDDCGDYSDEANCENPTEAPNCSRYFQFRCENGHCIPNRWKCDRENDCGDWSDEKDCGDSHILPFSTPGPSTCLPNYYRCSSGTCVMDTWVCDGYRDCADGSDEEACPLLANVTAASTPTQLGRCDRFEFECHQPKTCIPNWKRCDGHQDCQDGRDEANCPTHSTLTCMSREFQCEDGEACIVLSERCDGFLDCSDESDEKACSDELTVYKVQNLQWTADFSGDVTLTWMRPKKMPSASCVYNVYYRVVGESIWKTLETHSNKTNTVLKVLKPDTTYQVKVQVQCLSKAHNTNDFVTLRTPEGLPDAPRNLQLSLPREAEGVIVGHWAPPIHTHGLIREYIVEYSRSGSKMWASQRAASNFTEIKNLLVNTLYTVRVAAVTSRGIGNWSDSKSITTIKGKVIPPPDIHIDSYGENYLSFTLTMESDIKVNGYVVNLFWAFDTHKQERRTLNFRGSILSHKVGNLTAHTSYEISAWAKTDLGDSPLAFEHVMTRGVRPPAPSLKAKAINQTAVECTWTGPRNVVYGIFYATSFLDLYRNPKSLTTSLHNKTVIVSKDEQYLFLVRVVVPYQGPSSDYVVVKMIPDSRLPPRHLHVVHTGKTSVVIKWESPYDSPDQDLLYAVAVKDLIRKTDRSYKVKSRNSTVEYTLNKLEPGGKYHIIVQLGNMSKDSSIKITTVSLSAPDALKIITENDHVLLFWKSLALKEKHFNESRGYEIHMFDSAMNITAYLGNTTDNFFKISNLKMGHNYTFTVQARCLFGNQICGEPAILLYDELGSGADASATQAARSTDVAAVVVPILFLILLSLGVGFAILYTKHRRLQSSFTAFANSHYSSRLGSAIFSSGDDLGEDDEDAPMITGFSDDVPMVIA 1 2136 2158 SATQAARSTD VAAVVVPILFLILLSLGVGFAIL YTKHRRLQSS +Q99523 MERPWGAADGLSRWPHGLGLLLLLQLLPPSTLSQDRLDAPPPPAAPLPRWSGPIGVSWGLRAAAAGGAFPRGGRWRRSAPGEDEECGRVRDFVAKLANNTHQHVFDDLRGSVSLSWVGDSTGVILVLTTFHVPLVIMTFGQSKLYRSEDYGKNFKDITDLINNTFIRTEFGMAIGPENSGKVVLTAEVSGGSRGGRIFRSSDFAKNFVQTDLPFHPLTQMMYSPQNSDYLLALSTENGLWVSKNFGGKWEEIHKAVCLAKWGSDNTIFFTTYANGSCKADLGALELWRTSDLGKSFKTIGVKIYSFGLGGRFLFASVMADKDTTRRIHVSTDQGDTWSMAQLPSVGQEQFYSILAANDDMVFMHVDEPGDTGFGTIFTSDDRGIVYSKSLDRHLYTTTGGETDFTNVTSLRGVYITSVLSEDNSIQTMITFDQGGRWTHLRKPENSECDATAKNKNECSLHIHASYSISQKLNVPMAPLSEPNAVGIVIAHGSVGDAISVMVPDVYISDDGGYSWTKMLEGPHYYTILDSGGIIVAIEHSSRPINVIKFSTDEGQCWQTYTFTRDPIYFTGLASEPGARSMNISIWGFTESFLTSQWVSYTIDFKDILERNCEEKDYTIWLAHSTDPEDYEDGCILGYKEQFLRLRKSSVCQNGRDYVVTKQPSICLCSLEDFLCDFGYYRPENDSKCVEQPELKGHDLEFCLYGREEHLTTNGYRKIPGDKCQGGVNPVREVKDLKKKCTSNFLSPEKQNSKSNSVPIILAIVGLMLVTVVAGVLIVKKYVCGGRFLVHRYSVLQQHAEANGVDGVDALDTASHTNKSGYHDDSDEDLLE 1 756 778 SPEKQNSKSN SVPIILAIVGLMLVTVVAGVLIV KKYVCGGRFL +Q8BGV3 MARGLDLAPLLLLLLAMATRFCTAQSNCTCPTNKMTVCDTNGPGGVCQCRAMGSQVLVDCSTLTSKCLLLKARMSARKSGRSLVMPSEHAILDNDGLYDPECDDKGRFKARQCNQTSVCWCVNSVGVRRTDKGDQSLRCDEVVRTHHILIELRHRPTDRAFNHSDLDSELRRLFQERYKLHPSFLSAVHYEEPTIQIELRQNASQKGLRDVDIADAAYYFERDIKGESLFMGRRGLDVQVRGEPLHVERTLIYYLDEKPPQFSMKRLTAGVIAVIAVVSVAVVAGVVVLVVTKRRKSGKYKKVELKELGEMRSEPSL 1 269 291 PPQFSMKRLT AGVIAVIAVVSVAVVAGVVVLVV TKRRKSGKYK +P35590 MVWRVPPFLLPILFLASHVGAAVDLTLLANLRLTDPQRFFLTCVSGEAGAGRGSDAWGPPLLLEKDDRIVRTPPGPPLRLARNGSHQVTLRGFSKPSDLVGVFSCVGGAGARRTRVIYVHNSPGAHLLPDKVTHTVNKGDTAVLSARVHKEKQTDVIWKSNGSYFYTLDWHEAQDGRFLLQLPNVQPPSSGIYSATYLEASPLGSAFFRLIVRGCGAGRWGPGCTKECPGCLHGGVCHDHDGECVCPPGFTGTRCEQACREGRFGQSCQEQCPGISGCRGLTFCLPDPYGCSCGSGWRGSQCQEACAPGHFGADCRLQCQCQNGGTCDRFSGCVCPSGWHGVHCEKSDRIPQILNMASELEFNLETMPRINCAAAGNPFPVRGSIELRKPDGTVLLSTKAIVEPEKTTAEFEVPRLVLADSGFWECRVSTSGGQDSRRFKVNVKVPPVPLAAPRLLTKQSRQLVVSPLVSFSGDGPISTVRLHYRPQDSTMDWSTIVVDPSENVTLMNLRPKTGYSVRVQLSRPGEGGEGAWGPPTLMTTDCPEPLLQPWLEGWHVEGTDRLRVSWSLPLVPGPLVGDGFLLRLWDGTRGQERRENVSSPQARTALLTGLTPGTHYQLDVQLYHCTLLGPASPPAHVLLPPSGPPAPRHLHAQALSDSEIQLTWKHPEALPGPISKYVVEVQVAGGAGDPLWIDVDRPEETSTIIRGLNASTRYLFRMRASIQGLGDWSNTVEESTLGNGLQAEGPVQESRAAEEGLDQQLILAVVGSVSATCLTILAALLTLVCIRRSCLHRRRTFTYQSGSGEETILQFSSGTLTLTRRPKLQPEPLSYPVLEWEDITFEDLIGEGNFGQVIRAMIKKDGLKMNAAIKMLKEYASENDHRDFAGELEVLCKLGHHPNIINLLGACKNRGYLYIAIEYAPYGNLLDFLRKSRVLETDPAFAREHGTASTLSSRQLLRFASDAANGMQYLSEKQFIHRDLAARNVLVGENLASKIADFGLSRGEEVYVKKTMGRLPVRWMAIESLNYSVYTTKSDVWSFGVLLWEIVSLGGTPYCGMTCAELYEKLPQGYRMEQPRNCDDEVYELMRQCWRDRPYERPPFAQIALQLGRMLEARKAYVNMSLFENFTYAGIDATAEEA 1 764 786 EEGLDQQLIL AVVGSVSATCLTILAALLTLVCI RRSCLHRRRT +P08138 MGAGATGRAMDGPRLLLLLLLGVSLGGAKEACPTGLYTHSGECCKACNLGEGVAQPCGANQTVCEPCLDSVTFSDVVSATEPCKPCTECVGLQSMSAPCVEADDAVCRCAYGYYQDETTGRCEACRVCEAGSGLVFSCQDKQNTVCEECPDGTYSDEANHVDPCLPCTVCEDTERQLRECTRWADAECEEIPGRWITRSTPPEGSDSTAPSTQEPEAPPEQDLIASTVAGVVTTVMGSSQPVVTRGTTDNLIPVYCSILAAVVVGLVAYIAFKRWNSCKQNKQGANSRPVNQTPPPEGEKLHSDSGISVDSQSLHDQQPHTQTASGQALKGDGGLYSSLPPAKREEVEKLLNGSAGDTWRHLAGELGYQPEHIDSFTHEACPVRALLASWATQDSATLDALLAALRRIQRADLVESLCSESTATSPV 1 250 272 QPVVTRGTTD NLIPVYCSILAAVVVGLVAYIAF KRWNSCKQNK +Q02223 MLQMAGQCSQNEYFDSLLHACIPCQLRCSSNTPPLTCQRYCNASVTNSVKGTNAILWTCLGLSLIISLAVFVLMFLLRKINSEPLKDEFKNTGSGLLGMANIDLEKSRTGDEIILPRGLEYTVEECTCEDCIKSKPKVDSDHCFPLPAMEEGATILVTTKTNDYCKSLPAALSATEIEKSISAR 1 54 76 SVTNSVKGTN AILWTCLGLSLIISLAVFVLMFL LRKINSEPLK +P19438 MGLSTVPDLLLPLVLLELLVGIYPSGVIGLVPHLGDREKRDSVCPQGKYIHPQNNSICCTKCHKGTYLYNDCPGPGQDTDCRECESGSFTASENHLRHCLSCSKCRKEMGQVEISSCTVDRDTVCGCRKNQYRHYWSENLFQCFNCSLCLNGTVHLSCQEKQNTVCTCHAGFFLRENECVSCSNCKKSLECTKLCLPQIENVKGTEDSGTTVLLPLVIFFGLCLLSLLFIGLMYRYQRWKSKLYSIVCGKSTPEKEGELEGTTTKPLAPNPSFSPTPGFTPTLGFSPVPSSTFTSSSTYTPGDCPNFAAPRREVAPPYQGADPILATALASDPIPNPLQKWEDSAHKPQSLDTDDPATLYAVVENVPPLRWKEFVRRLGLSDHEIDRLELQNGRCLREAQYSMLATWRRRTPRREATLELLGRVLRDMDLLGCLEDIEEALCGPAALPPAPSLLR 1 212 234 VKGTEDSGTT VLLPLVIFFGLCLLSLLFIGLMY RYQRWKSKLY +Q06418 MALRRSMGRPGLPPLPLPPPPRLGLLLAALASLLLPESAAAGLKLMGAPVKLTVSQGQPVKLNCSVEGMEEPDIQWVKDGAVVQNLDQLYIPVSEQHWIGFLSLKSVERSDAGRYWCQVEDGGETEISQPVWLTVEGVPFFTVEPKDLAVPPNAPFQLSCEAVGPPEPVTIVWWRGTTKIGGPAPSPSVLNVTGVTQSTMFSCEAHNLKGLASSRTATVHLQALPAAPFNITVTKLSSSNASVAWMPGADGRALLQSCTVQVTQAPGGWEVLAVVVPVPPFTCLLRDLVPATNYSLRVRCANALGPSPYADWVPFQTKGLAPASAPQNLHAIRTDSGLILEWEEVIPEAPLEGPLGPYKLSWVQDNGTQDELTVEGTRANLTGWDPQKDLIVRVCVSNAVGCGPWSQPLVVSSHDRAGQQGPPHSRTSWVPVVLGVLTALVTAAALALILLRKRRKETRFGQAFDSVMARGEPAVHFRAARSFNRERPERIEATLDSLGISDELKEKLEDVLIPEQQFTLGRMLGKGEFGSVREAQLKQEDGSFVKVAVKMLKADIIASSDIEEFLREAACMKEFDHPHVAKLVGVSLRSRAKGRLPIPMVILPFMKHGDLHAFLLASRIGENPFNLPLQTLIRFMVDIACGMEYLSSRNFIHRDLAARNCMLAEDMTVCVADFGLSRKIYSGDYYRQGCASKLPVKWLALESLADNLYTVQSDVWAFGVTMWEIMTRGQTPYAGIENAEIYNYLIGGNRLKQPPECMEDVYDLMYQCWSADPKQRPSFTCLRMELENILGQLSVLSASQDPLYINIERAEEPTAGGSLELPGRDQPYSGAGDGSGMGAVGGTPSDCRYILTPGGLAEQPGQAEHQPESPLNETQRLLLLQQGLLPHSSC 1 429 451 QQGPPHSRTS WVPVVLGVLTALVTAAALALILL RKRRKETRFG +P30530 MAWRCPRMGRVPLAWCLALCGWACMAPRGTQAEESPFVGNPGNITGARGLTGTLRCQLQVQGEPPEVHWLRDGQILELADSTQTQVPLGEDEQDDWIVVSQLRITSLQLSDTGQYQCLVFLGHQTFVSQPGYVGLEGLPYFLEEPEDRTVAANTPFNLSCQAQGPPEPVDLLWLQDAVPLATAPGHGPQRSLHVPGLNKTSSFSCEAHNAKGVTTSRTATITVLPQQPRNLHLVSRQPTELEVAWTPGLSGIYPLTHCTLQAVLSDDGMGIQAGEPDPPEEPLTSQASVPPHQLRLGSLHPHTPYHIRVACTSSQGPSSWTHWLPVETPEGVPLGPPENISATRNGSQAFVHWQEPRAPLQGTLLGYRLAYQGQDTPEVLMDIGLRQEVTLELQGDGSVSNLTVCVAAYTAAGDGPWSLPVPLEAWRPGQAQPVHQLVKEPSTPAFSWPWWYVLLGAVVAAACVLILALFLVHRRKKETRYGEVFEPTVERGELVVRYRVRKSYSRRTTEATLNSLGISEELKEKLRDVMVDRHKVALGKTLGEGEFGAVMEGQLNQDDSILKVAVKTMKIAICTRSELEDFLSEAVCMKEFDHPNVMRLIGVCFQGSERESFPAPVVILPFMKHGDLHSFLLYSRLGDQPVYLPTQMLVKFMADIASGMEYLSTKRFIHRDLAARNCMLNENMSVCVADFGLSKKIYNGDYYRQGRIAKMPVKWIAIESLADRVYTSKSDVWSFGVTMWEIATRGQTPYPGVENSEIYDYLRQGNRLKQPADCLDGLYALMSRCWELNPQDRPSFTELREDLENTLKALPPAQEPDEILYVNMDEGGGYPEPPGAAGGADPPTQPDPKDSCSCLTAAEVHPAGRYVLCPSTTPSPAQPADRGSPAAPGQEDGA 1 450 472 EPSTPAFSWP WWYVLLGAVVAAACVLILALFLV HRRKKETRYG +Q6EMK4 MCSRVPLLLPLLLLLALGPGVQGCPSGCQCSQPQTVFCTARQGTTVPRDVPPDTVGLYVFENGITMLDAGSFAGLPGLQLLDLSQNQIASLPSGVFQPLANLSNLDLTANRLHEITNETFRGLRRLERLYLGKNRIRHIQPGAFDTLDRLLELKLQDNELRALPPLRLPRLLLLDLSHNSLLALEPGILDTANVEALRLAGLGLQQLDEGLFSRLRNLHDLDVSDNQLERVPPVIRGLRGLTRLRLAGNTRIAQLRPEDLAGLAALQELDVSNLSLQALPGDLSGLFPRLRLLAAARNPFNCVCPLSWFGPWVRESHVTLASPEETRCHFPPKNAGRLLLELDYADFGCPATTTTATVPTTRPVVREPTALSSSLAPTWLSPTEPATEAPSPPSTAPPTVGPVPQPQDCPPSTCLNGGTCHLGTRHHLACLCPEGFTGLYCESQMGQGTRPSPTPVTPRPPRSLTLGIEPVSPTSLRVGLQRYLQGSSVQLRSLRLTYRNLSGPDKRLVTLRLPASLAEYTVTQLRPNATYSVCVMPLGPGRVPEGEEACGEAHTPPAVHSNHAPVTQAREGNLPLLIAPALAAVLLAALAAVGAAYCVRRGRAMAAAAQDKGQVGPGAGPLELEGVKVPLEPGPKATEGGGEALPSGSECEVPLMGFPGPGLQSPLHAKPYI 1 577 599 TQAREGNLPL LIAPALAAVLLAALAAVGAAYCV RRGRAMAAAA +P12821 MGAASGRRGPGLLLPLPLLLLLPPQPALALDPGLQPGNFSADEAGAQLFAQSYNSSAEQVLFQSVAASWAHDTNITAENARRQEEAALLSQEFAEAWGQKAKELYEPIWQNFTDPQLRRIIGAVRTLGSANLPLAKRQQYNALLSNMSRIYSTAKVCLPNKTATCWSLDPDLTNILASSRSYAMLLFAWEGWHNAAGIPLKPLYEDFTALSNEAYKQDGFTDTGAYWRSWYNSPTFEDDLEHLYQQLEPLYLNLHAFVRRALHRRYGDRYINLRGPIPAHLLGDMWAQSWENIYDMVVPFPDKPNLDVTSTMLQQGWNATHMFRVAEEFFTSLELSPMPPEFWEGSMLEKPADGREVVCHASAWDFYNRKDFRIKQCTRVTMDQLSTVHHEMGHIQYYLQYKDLPVSLRRGANPGFHEAIGDVLALSVSTPEHLHKIGLLDRVTNDTESDINYLLKMALEKIAFLPFGYLVDQWRWGVFSGRTPPSRYNFDWWYLRTKYQGICPPVTRNETHFDAGAKFHVPNVTPYIRYFVSFVLQFQFHEALCKEAGYEGPLHQCDIYRSTKAGAKLRKVLQAGSSRPWQEVLKDMVGLDALDAQPLLKYFQPVTQWLQEQNQQNGEVLGWPEYQWHPPLPDNYPEGIDLVTDEAEASKFVEEYDRTSQVVWNEYAEANWNYNTNITTETSKILLQKNMQIANHTLKYGTQARKFDVNQLQNTTIKRIIKKVQDLERAALPAQELEEYNKILLDMETTYSVATVCHPNGSCLQLEPDLTNVMATSRKYEDLLWAWEGWRDKAGRAILQFYPKYVELINQAARLNGYVDAGDSWRSMYETPSLEQDLERLFQELQPLYLNLHAYVRRALHRHYGAQHINLEGPIPAHLLGNMWAQTWSNIYDLVVPFPSAPSMDTTEAMLKQGWTPRRMFKEADDFFTSLGLLPVPPEFWNKSMLEKPTDGREVVCHASAWDFYNGKDFRIKQCTTVNLEDLVVAHHEMGHIQYFMQYKDLPVALREGANPGFHEAIGDVLALSVSTPKHLHSLNLLSSEGGSDEHDINFLMKMALDKIAFIPFSYLVDQWRWRVFDGSITKENYNQEWWSLRLKYQGLCPPVPRTQGDFDPGAKFHIPSSVPYIRYFVSFIIQFQFHEALCQAAGHTGPLHKCDIYQSKEAGQRLATAMKLGFSRPWPEAMQLITGQPNMSASAMLSYFKPLLDWLRTENELHGEKLGWPQYNWTPNSARSEGPLPDSGRVSFLGLDLDAQQARVGQWLLLFLGIALLVATLGLSQRLFSIRHRSLHRHSHGPQFGSEVELRHS 2 1257 1276 GLDLDAQQAR VGQWLLLFLGIALLVATLGL SQRLFSIRHR +P36896 MAESAGASSFFPLVVLLLAGSGGSGPRGVQALLCACTSCLQANYTCETDGACMVSIFNLDGMEHHVRTCIPKVELVPAGKPFYCLSSEDLRNTHCCYTDYCNRIDLRVPSGHLKEPEHPSMWGPVELVGIIAGPVFLLFLIIIIVFLVINYHQRVYHNRQRLDMEDPSCEMCLSKDKTLQDLVYDLSTSGSGSGLPLFVQRTVARTIVLQEIIGKGRFGEVWRGRWRGGDVAVKIFSSREERSWFREAEIYQTVMLRHENILGFIAADNKDNGTWTQLWLVSDYHEHGSLFDYLNRYTVTIEGMIKLALSAASGLAHLHMEIVGTQGKPGIAHRDLKSKNILVKKNGMCAIADLGLAVRHDAVTDTIDIAPNQRVGTKRYMAPEVLDETINMKHFDSFKCADIYALGLVYWEIARRCNSGGVHEEYQLPYYDLVPSDPSIEEMRKVVCDQKLRPNIPNWWQSYEALRVMGKMMRECWYANGAARLTALRIKKTLSQLSVQEDVKI 2 127 149 EHPSMWGPVE LVGIIAGPVFLLFLIIIIVFLVI NYHQRVYHNR +Q8NER5 MTRALCSALRQALLLLAAAAELSPGLKCVCLLCDSSNFTCQTEGACWASVMLTNGKEQVIKSCVSLPELNAQVFCHSSNNVTKTECCFTDFCNNITLHLPTASPNAPKLGPMELAIIITVPVCLLSIAAMLTVWACQGRQCSYRKKKRPNVEEPLSECNLVNAGKTLKDLIYDVTASGSGSGLPLLVQRTIARTIVLQEIVGKGRFGEVWHGRWCGEDVAVKIFSSRDERSWFREAEIYQTVMLRHENILGFIAADNKDNGTWTQLWLVSEYHEQGSLYDYLNRNIVTVAGMIKLALSIASGLAHLHMEIVGTQGKPAIAHRDIKSKNILVKKCETCAIADLGLAVKHDSILNTIDIPQNPKVGTKRYMAPEMLDDTMNVNIFESFKRADIYSVGLVYWEIARRCSVGGIVEEYQLPYYDMVPSDPSIEEMRKVVCDQKFRPSIPNQWQSCEALRVMGRIMRECWYANGAARLTALRIKKTISQLCVKEDCKA 2 114 136 PNAPKLGPME LAIIITVPVCLLSIAAMLTVWAC QGRQCSYRKK +P37023 MTLGSPRKGLLMLLMALVTQGDPVKPSRGPLVTCTCESPHCKGPTCRGAWCTVVLVREEGRHPQEHRGCGNLHRELCRGRPTEFVNHYCCDSHLCNHNVSLVLEATQPPSEQPGTDGQLALILGPVLALLALVALGVLGLWHVRRRQEKQRGLHSELGESSLILKASEQGDSMLGDLLDSDCTTGSGSGLPFLVQRTVARQVALVECVGKGRYGEVWRGLWHGESVAVKIFSSRDEQSWFRETEIYNTVLLRHDNILGFIASDMTSRNSSTQLWLITHYHEHGSLYDFLQRQTLEPHLALRLAVSAACGLAHLHVEIFGTQGKPAIAHRDFKSRNVLVKSNLQCCIADLGLAVMHSQGSDYLDIGNNPRVGTKRYMAPEVLDEQIRTDCFESYKWTDIWAFGLVLWEIARRTIVNGIVEDYRPPFYDVVPNDPSFEDMKKVVCVDQQTPTIPNRLAADPVLSGLAQMMRECWYPNPSARLTALRIKKTLQKISNSPEKPKVIQ 2 119 141 PSEQPGTDGQ LALILGPVLALLALVALGVLGLW HVRRRQEKQR +O43184 MAARPLPVSPARALLLALAGALLAPCEARGVSLWNQGRADEVVSASVGSGDLWIPVKSFDSKNHPEVLNIRLQRESKELIINLERNEGLIASSFTETHYLQDGTDVSLARNYTVILGHCYYHGHVRGYSDSAVSLSTCSGLRGLIVFENESYVLEPMKSATNRYKLFPAKKLKSVRGSCGSHHNTPNLAAKNVFPPPSQTWARRHKRETLKATKYVELVIVADNREFQRQGKDLEKVKQRLIEIANHVDKFYRPLNIRIVLVGVEVWNDMDKCSVSQDPFTSLHEFLDWRKMKLLPRKSHDNAQLVSGVYFQGTTIGMAPIMSMCTADQSGGIVMDHSDNPLGAAVTLAHELGHNFGMNHDTLDRGCSCQMAVEKGGCIMNASTGYPFPMVFSSCSRKDLETSLEKGMGVCLFNLPEVRESFGGQKCGNRFVEEGEECDCGEPEECMNRCCNATTCTLKPDAVCAHGLCCEDCQLKPAGTACRDSSNSCDLPEFCTGASPHCPANVYLHDGHSCQDVDGYCYNGICQTHEQQCVTLWGPGAKPAPGICFERVNSAGDPYGNCGKVSKSSFAKCEMRDAKCGKIQCQGGASRPVIGTNAVSIETNIPLQQGGRILCRGTHVYLGDDMPDPGLVLAGTKCADGKICLNRQCQNISVFGVHECAMQCHGRGVCNNRKNCHCEAHWAPPFCDKFGFGGSTDSGPIRQADNQGLTIGILVTILCLLAAGFVVYLKRKTLIRLLFTNKKTTIEKLRCVRPSRPPRGFQPCQAHLGHLGKGLMRKPPDSYPPKDNPRRLLQCQNVDISRPLNGLNVPQPQSTQRVLPPLHRAPRAPSVPARPLPAKPALRQAQGTCKPNPPQKPLPADPLARTTRLTHALARTPGQWETGLRLAPLRPAPQYPHQVPRSTHTAYIK 2 707 729 DSGPIRQADN QGLTIGILVTILCLLAAGFVVYL KRKTLIRLLF +Q13444 MRLALLWALGLLGAGSPLPSWPLPNIGGTEEQQAESEKAPREPLEPQVLQDDLPISLKKVLQTSLPEPLRIKLELDGDSHILELLQNRELVPGRPTLVWYQPDGTRVVSEGHTLENCCYQGRVRGYAGSWVSICTCSGLRGLVVLTPERSYTLEQGPGDLQGPPIISRIQDLHLPGHTCALSWRESVHTQKPPEHPLGQRHIRRRRDVVTETKTVELVIVADHSEAQKYRDFQHLLNRTLEVALLLDTFFRPLNVRVALVGLEAWTQRDLVEISPNPAVTLENFLHWRRAHLLPRLPHDSAQLVTGTSFSGPTVGMAIQNSICSPDFSGGVNMDHSTSILGVASSIAHELGHSLGLDHDLPGNSCPCPGPAPAKTCIMEASTDFLPGLNFSNCSRRALEKALLDGMGSCLFERLPSLPPMAAFCGNMFVEPGEQCDCGFLDDCVDPCCDSLTCQLRPGAQCASDGPCCQNCQLRPSGWQCRPTRGDCDLPEFCPGDSSQCPPDVSLGDGEPCAGGQAVCMHGRCASYAQQCQSLWGPGAQPAAPLCLQTANTRGNAFGSCGRNPSGSYVSCTPRDAICGQLQCQTGRTQPLLGSIRDLLWETIDVNGTELNCSWVHLDLGSDVAQPLLTLPGTACGPGLVCIDHRCQRVDLLGAQECRSKCHGHGVCDSNRHCYCEEGWAPPDCTTQLKATSSLTTGLLLSLLVLLVLVMLGASYWYRARLHQRLCQLKGPTCQYRAAQSGPSERPGPPQRALLARGTKQASALSFPAPPSRPLPPDPVSKRLQAELADRPNPPTRPLPADPVVRSPKSQGPAKPPPPRKPLPADPQGRCPSGDLPGPGAGIPPLVVPSRPAPPPPTVSSLYL 2 695 717 TTQLKATSSL TTGLLLSLLVLLVLVMLGASYWY RARLHQRLCQ +Q9Z0F8 MRRRLLILTTLVPFVLAPRPPEEAGSGSHPRLEKLDSLLSDYDILSLANIQQHSIRKRDLQSATHLETLLTFSALKRHFKLYLTSSTERFSQNLRVVVVDGKEESEYSVKWQNFFSGHVVGEPDSRVLAHIGDDDVTVRINTDGAEYNVEPLWRFVNDTKDKRMLVYKSEDIKDFSRLQSPKVCGYLNADSEELLPKGLIDREPSEEFVRRVKRRAEPNPLKNTCKLLVVADHRFYKYMGRGEESTTTNYLIELIDRVDDIYRNTSWDNAGFKGYGVQIEQIRILKSPQEVKPGERHFNMAKSFPNEEKDAWDVKMLLEQFSFDIAEEASKVCLAHLFTYQDFDMGTLGLAYVGSPRANSHGGVCPKAYYNPTVKKNIYLNSGLTSTKNYGKTILTKEADLVTTHELGHNFGAEHDPDGLAECAPNEDQGGKYVMYPIAVSGDHENNKMFSNCSKQSIYKTIESKAQECFQERSNKVCGNSRVDEGEECDPGIMYLNNDTCCNSDCTLKPGVQCSDRNSPCCKNCQFETAQKKCQEAINATCKGVSYCTGNSSECPPPGDAEDDTVCLDLGKCKAGKCIPFCKREQELESCACVDTDNSCKVCCRNLSGPCVPYVDAEQKNLFLRKGKPCTVGFCDMNGKCEKRVQDVIERFWDFIDQLSINTFGKFLADNIVGSVLVFSLIFWIPFSILVHCVDKKLDKQYESLSLFHHSNIEMLSSMDSASVRIIKPFPAPQTPGRLQALQPAAMMPPVPAAPKLDHQRMDTIQEDPSTDSHADDDGFEKDPFPNSSTAAKSFEDLTDHPVTRSEKAASFKLQRQSRVDSKETEC 2 672 694 NTFGKFLADN IVGSVLVFSLIFWIPFSILVHCV DKKLDKQYES +Q9Y3Q7 MFLLLALLTELGRLQAHEGSEGIFLHVTVPRKIKSNDSEVSERKMIYIITIDGQPYTLHLGKQSFLPQNFLVYTYNETGSLHSVSPYFMMHCHYQGYAAEFPNSFVTLSICSGLRGFLQFENISYGIEPVESSARFEHIIYQMKNNDPNVSILAVNYSHIWQKDQPYKVPLNSQIKNLSKLLPQYLEIYIIVEKALYDYMGSEMMAVTQKIVQVIGLVNTMFTQFKLTVILSSLELWSNENQISTSGDADDILQRFLAWKRDYLILRPHDIAYLLVYRKHPKYVGATFPGTVCNKSYDAGIAMYPDAIGLEGFSVIIAQLLGLNVGLTYDDITQCFCLRATCIMNHEAVSASGRKIFSNCSMHDYRYFVSKFETKCLQKLSNLQPLHQNQPVCGNGILESNEECDCGNKNECQFKKCCDYNTCKLKGSVKCGSGPCCTSKCELSIAGTPCRKSIDPECDFTEYCNGTSSNCVPDTYALNGRLCKLGTAYCYNGQCQTTDNQCAKIFGKGAQGAPFACFKEVNSLHERSENCGFKNSQPLPCERKDVLCGKLACVQPHKNANKSDAQSTVYSYIQDHVCVSIATGSSMRSDGTDNAYVADGTMCGPEMYCVNKTCRKVHLMGYNCNATTKCKGKGICNNFGNCQCFPGHRPPDCKFQFGSPGGSIDDGNFQKSGDFYTEKGYNTHWNNWFILSFCIFLPFFIVFTTVIFKRNEISKSCNRENAEYNRNSSVVSESDDVGH 2 685 707 FYTEKGYNTH WNNWFILSFCIFLPFFIVFTTVI FKRNEISKSC +Q9R157 MPLLFILAELAMLFARLDSEGICLHITVPQKIEPRKGGDAEGKVTYVITIDGKPYSLHLRNHSFLSQNFLVYTYNETGSLYSDSSHFLAHCHYRGYVDEVPNSIVTLSICSGLRGFLQLENVSYGIEPLESSARFEHIVYQVKSDSSMLAGNDSHVWQIDQLDKGHFNEQDKNHSQLLPQSLKLHIIVGKFLFDYMGSDIMAITQKIFQIIGLVNAMLTQLKLSVVLASLELWSDKNHISTDGNATDILQRLLDWKRDYLTLQSNEITHLLIYRRRPKYIGAASPGEICSKSYVAGVGMYPEDIGLEGFSVVITQLIGLHIGLTYDDNIRNCSCPSAPCIMQQGALSSSGKKTFSNCSLHDYMHYVSNFDTQCLGDLSNVHVLQPNQAVCGNGIMEAGEECDCGNETECQFKECCDHETCRLKGSAQCGSGACCMPTCELSASGTPCRKAVDPECDFTEYCDGSSSHCVPDTFALNGHLCRLGSAYCYNGRCQALNDQCVSLFGKGSQGASYACFEKVNSPRENLANCDSKDSYSVPCGQQDVLCGKLACFRPPKNYKSPSQSVVYSYVHDSVCLSILPGLSMRSDGRDSAYVADGTVCGPQMYCINGTCKEVNFTGNDCNATKKCKGNGICNNFGNCQCFPDYRPPDCNLQIGSPGGSIDDGNTLRTESAFATKRLSKNEDSWVILGFFIFLPFIVTFLVGIMKRNERKIVPQGEHKI 2 684 703 TKRLSKNEDS WVILGFFIFLPFIVTFLVGI MKRNERKIVP +O35674 MPGRAGVARFCLLALALQLHWPLAACEPGWTTRGSQEGSPPLQHELIIPQWRTSESPGRGKHPLRAELRVMAEGRELILDLEKNEHLFAPAYTETCYTASGNPQTSTLKSEDHCFYHGTVRDVDESSVTLSTCRGIRGLIIVRSNLSYIIEPVPNSDSQHRIYRSEHLTLPPGNCGFEHSGPTSKDWALQFTHQTKKQPRRMKREDLHSMKYVELYLVADYAEFQKNRHDQDATKRKLMEIANYVDKFYRSLNIRIALVGLEVWTHGDKCEVSENPYSTLWSFLSWRRKLLAQKSHDNAQLITGRSFQGTTIGLAPLMAMCSVYQSGGVSMDHSENAIGVASTVAHEIGHNFGMSHDSAHCCSASAADGGCIMAAATGHPFPKVFSWCNRKELDRYLQTGGGMCLSNMPDTRTLYGGRRCGNGYLEDGEECDCGEEEECKNPCCNASNCTLKEGAECAHGSCCHQCKLVAPGTQCREQVRQCDLPEFCTGKSPHCPTNYYQMDGTPCEGGQAYCYNGMCLTYQEQCQQLWGPGARPALDLCFERVNAAGDTYGNCGKGLNGQYRKCSPRDAKCGKIQCQSTQARPLESNAVSIDTTITLNGRRIHCRGTHVYRGPEEEEGEGDMLDPGLVMTGTKCGHNHICFEGQCRNTSFFETEGCGKKCNGHGVCNNNKNCHCFPGWSPPFCNTPGDGGSVDSGPLPPKSVGPVIAGVFSALFVLAVLVLLCHCYRQSHKLGKPSALPFKLRHQFSCPFRVSQSGGTGHANPTFKLQTPQGKRKVTNTPESLRKPSHPPPRPPPDYLRVESPPAPLPAHLNRAAGSSPEAGARIERKESARRPPPSRPMPPAPNCLLSQDFSRPRPPQKALPANPVPGQRTGPRSGGTSLLQPPTSGPQPPRPPAVPVPKLPEYRSQRVGAIISSKI 2 704 726 VDSGPLPPKS VGPVIAGVFSALFVLAVLVLLCH CYRQSHKLGK +O43506 MAVGEPLVHIRVTLLLLWFGMFLSISGHSQARPSQYFTSPEVVIPLKVISRGRGAKAPGWLSYSLRFGGQRYIVHMRVNKLLFAAHLPVFTYTEQHALLQDQPFIQDDCYYHGYVEGVPESLVALSTCSGGFLGMLQINDLVYEIKPISVSATFEHLVYKIDSDDTQFPPMRCGLTEEKIAHQMELQLSYNFTLKQSSFVGWWTHQRFVELVVVVDNIRYLFSQSNATTVQHEVFNVVNIVDSFYHPLEVDVILTGIDIWTASNPLPTSGDLDNVLEDFSIWKNYNLNNRLQHDVAHLFIKDTQGMKLGVAYVKGICQNPFNTGVDVFEDNRLVVFAITLGHELGHNLGMQHDTQWCVCELQWCIMHAYRKVTTKFSNCSYAQYWDSTISSGLCIQPPPYPGNIFRLKYCGNLVVEEGEECDCGTIRQCAKDPCCLLNCTLHPGAACAFGICCKDCKFLPSGTLCRQQVGECDLPEWCNGTSHQCPDDVYVQDGISCNVNAFCYEKTCNNHDIQCKEIFGQDARSASQSCYQEINTQGNRFGHCGIVGTTYVKCWTPDIMCGRVQCENVGVIPNLIEHSTVQQFHLNDTTCWGTDYHLGMAIPDIGEVKDGTVCGPEKICIRKKCASMVHLSQACQPKTCNMRGICNNKQHCHCNHEWAPPYCKDKGYGGSADSGPPPKNNMEGLNVMGKLRYLSLLCLLPLVAFLLFCLHVLFKKRTKSKEDEEG 2 692 714 MEGLNVMGKL RYLSLLCLLPLVAFLLFCLHVLF KKRTKSKEDE +Q9UKJ8 MAVDGTLVYIRVTLLLLWLGVFLSISGYCQAGPSQHFTSPEVVIPLKVISRGRSAKAPGWLSYSLRFGGQKHVVHMRVKKLLVSRHLPVFTYTDDRALLEDQLFIPDDCYYHGYVEAAPESLVVFSACFGGFRGVLKISGLTYEIEPIRHSATFEHLVYKINSNETQFPAMRCGLTEKEVARQQLEFEEAENSALEPKSAGDWWTHAWFLELVVVVNHDFFIYSQSNISKVQEDVFLVVNIVDSMYKQLGTYIILIGIEIWNQGNVFPMTSIEQVLNDFSQWKQISLSQLQHDAAHMFIKNSLISILGLAYVAGICRPPIDCGVDNFQGDTWSLFANTVAHELGHTLGMQHDEEFCFCGERGCIMNTFRVPAEKFTNCSYADFMKTTLNQGSCLHNPPRLGEIFMLKRCGNGVVEREEQCDCGSVQQCEQDACCLLNCTLRPGAACAFGLCCKDCKFMPSGELCRQEVNECDLPEWCNGTSHQCPEDRYVQDGIPCSDSAYCYQKRCNNHDQHCREIFGKDAKSASQNCYKEINSQGNRFGHCGINGTTYLKCHISDVFCGRVQCENVRDIPLLQDHFTLQHTHINGVTCWGIDYHLRMNISDIGEVKDGTVCGPGKICIHKKCVSLSVLSHVCLPETCNMKGICNNKHHCHCGYGWSPPYCQHRGYGGSIDSGPASAKRGVFLPLIVIPSLSVLTFLFTVGLLMYLRQCSGPKETKAHSSG 2 685 707 PASAKRGVFL PLIVIPSLSVLTFLFTVGLLMYL RQCSGPKETK +Q9JI76 MECFIMLGADARTLMRVTLLLLWLKALPSLIDLSQTGSTQYLSSPEVVIPLKVTSRARGAKNSEWLSYSLVFGGRRHVVHMRVKKLLVSTHIPVLTYTEEHTPLSDYPFVPSDCYYHGYVEGALESLVAFSACNGGLQGVLQMNGFSYEIEPIKHSSTFEHLVYTLNNNKTQFPPMLCSLTEKRLLYQPFGVEEAKKSAMKQNYGKLWPHMWFLELAVVVDYGFFTNAQQNLSKVRGDVVLVVNMVDSMYKPLDTYVTLVGIEIWNRGNVLPMENIHQVLEDFSHWKQISLSQVHHDAAHIFIRSSLISVLGIAYIAGICRPPLDCGVENFQGDAWSLFANTVAHELGHTFGMKHDEESCSCGKSGCVMSTFRVPAERFTNCSYSDFMKTTLNQGTCLYNHPRPGAGFLVKRCGNGMVESEEECDCGSVQECEQDPCCFLNCTLRPAAACSFGLCCKDCKFMLLGELCRPKINECDLPEWCNGTSHQCPEDGYVQDGVPCGAGAYCYQKQCNNHDQQCREIFGKGARSASHNCYKEINLQGNRFGHCGTDGTVFLKCRMSDVFCGKVHCENVEDIHHPQAPYVLQNIYANGITCWSTGHCLGMGVPDVGEVKDGTTCGVGKICLHKKCVSLSVLSNACLPETCNRKGVCNNKHHCHCDYGWSPPFCLHRGYGGSIDSGPTSQKRRVIITVLSITVPVLSILICLLIAGLYRIYCKIPSGPKETKASSPG 2 687 709 SGPTSQKRRV IITVLSITVPVLSILICLLIAGL YRIYCKIPSG +O75077 MKPPGSSSRQPPLAGCSLAGASCGPQRGPAGSVPASAPARTPPCRLLLVLLLLPPLAASSRPRAWGAAAPSAPHWNETAEKNLGVLADEDNTLQQNSSSNISYSNAMQKEITLPSRLIYYINQDSESPYHVLDTKARHQQKHNKAVHLAQASFQIEAFGSKFILDLILNNGLLSSDYVEIHYENGKPQYSKGGEHCYYHGSIRGVKDSKVALSTCNGLHGMFEDDTFVYMIEPLELVHDEKSTGRPHIIQKTLAGQYSKQMKNLTMERGDQWPFLSELQWLKRRKRAVNPSRGIFEEMKYLELMIVNDHKTYKKHRSSHAHTNNFAKSVVNLVDSIYKEQLNTRVVLVAVETWTEKDQIDITTNPVQMLHEFSKYRQRIKQHADAVHLISRVTFHYKRSSLSYFGGVCSRTRGVGVNEYGLPMAVAQVLSQSLAQNLGIQWEPSSRKPKCDCTESWGGCIMEETGVSHSRKFSKCSILEYRDFLQRGGGACLFNRPTKLFEPTECGNGYVEAGEECDCGFHVECYGLCCKKCSLSNGAHCSDGPCCNNTSCLFQPRGYECRDAVNECDITEYCTGDSGQCPPNLHKQDGYACNQNQGRCYNGECKTRDNQCQYIWGTKAAGSDKFCYEKLNTEGTEKGNCGKDGDRWIQCSKHDVFCGFLLCTNLTRAPRIGQLQGEIIPTSFYHQGRVIDCSGAHVVLDDDTDVGYVEDGTPCGPSMMCLDRKCLQIQALNMSSCPLDSKGKVCSGHGVCSNEATCICDFTWAGTDCSIRDPVRNLHPPKDEGPKGPSATNLIIGSIAGAILVAAIVLGGTGWGFKNVKKRRFDPTQQGPI 2 794 816 GPKGPSATNL IIGSIAGAILVAAIVLGGTGWGF KNVKKRRFDP +Q9R160 MVAMSEALVHARITLLQAWLRMLLFSSVWPPTWCAEYKGPPETVKPLRVIVSSKDMSLAGWMSYSLYFGGQRHIISMKSKNFLESRQLPVFTYNDQGVLFEDRPFVQNDCYYLGFVDGDLESMAALTTCFGGFQGILQINDTAYEIKPKSPSSTFEHLLYKIDSEKTQLRPMRCGLTDEEIAGQVRLQENGKSTRMQSIYGSWWSHGLYIKLALVIDHEQYLYRKKNTSLVIRDVLSIMQGINLFLLSVDINVVLLGLTIWTNGNPIPVQDIYALLPAFCTWKGTNLDSQIPYDIAHLFVNYTFSNYFGIAYVGTVCDKTFGCGIDSIAEDDFLTIGHIVAHEIGHNLGMSHDGILCTCGEESCLMSATMDSSQKLSNCSYEVLWAHMINKSCIHREPRPSDIFQLKVCGNGIVEEGEQCDCGSSENCRRNRCCMPSCTLRSKAKCDTGLCCNRKCQIQPSGTLCRARENECDLPEWCNGTSHECPEDLFVQDGTSCPGDGYCYEKRCNSHDVHCQRVFGQLAMKASDSCYKELNTRGDRFGNCGFINNEYVRCEISDILCGRIQCDKVGTLPILQNHYTIHWTHFNSVSCWSTDYHLGMKIADLGDIKDGTNCGPQHVCIARKCVNKPSWVNDCTPETCNMKGVCNNKQHCHCDVGWSPPNCQETGTGGSIDSGSPGNEVYEDEVVSKKDAPEKPNVIIWLLPIICVAVVLSVLFCLSGATKKSREAAASQPAEERVKPPYEGAEPSYETVKPPDEWANP 2 698 720 SKKDAPEKPN VIIWLLPIICVAVVLSVLFCLSG ATKKSREAAA +Q9R159 MQTTQRASSFAAAEDNIAMDKAVVYTRIPHLYLWLEILNILSSRPLTGYAQHTSLPEVVIPLRVTGNRPMWAMGWLTYSLHFGGQKHFIHIKAKKFLVSRLFSVFTYTKQGALHKDQPYVQNDCYYHGHMDGDPESMVAITTCYGGFQGILQINGTVYEIKPKNLSSTFEHLVHKMDSEETELLPMRCALTEEIARQMKLQQNENPTLMQSHYEGWWTHKSFLDLALVVERERIRYHNNNTSRVLVEVFTIINIINNIYETLDVELVLLGVEMWNERNHVQVRSIEELLDEFCMWKARSLNFRIPNDIAHIFVNHEFGIYLGLAYIGSVCVPSHNCGVDRLLGGNLFYFGRIIAHEMGHNLGMEHDSSSCTCGTKICLMAPADNGIPKFSNCSYSYYWATYATAKCMRKEKKSKGILRGKLCGDGVVDDGEQCDCGSAKSCADDPCCKPSCTLKDGAACAFGLCCLYCQIMPAGTVCRQEVNECDLPEWCNGHSHKCPNDVYLLDGSPCRDGGYCYEKRCNNRDEQCKQIFGKEARSADHSCYRELNTQGDRFGNCGVIRDAYLRCHDPDILCGRVQCENVAHIPFLRDHSTVHWTHLNGVTCWGTDYHFGMTIPDIGIVKDGTDCGPEHVCINKKCVSKSIWRSQCSPKTCNMKGVCNNLHHCHCNLGWDPPHCLKSGLGGSIDSGPPNYTENYTEKKHKKSIGLVILFWILFACFSVLFIVFLFFLRSYVELPMSEEPKVPTPENKEDTNEVMNTETE 2 706 728 TEKKHKKSIG LVILFWILFACFSVLFIVFLFFL RSYVELPMSE +Q9UKQ2 MLQGLLPVSLLLSVAVSAIKELPGVKKYEVVYPIRLHPLHKREAKEPEQQEQFETELKYKMTINGKIAVLYLKKNKNLLAPGYTETYYNSTGKEITTSPQIMDDCYYQGHILNEKVSDASISTCRGLRGYFSQGDQRYFIEPLSPIHRDGQEHALFKYNPDEKNYDSTCGMDGVLWAHDLQQNIALPATKLVKLKDRKVQEHEKYIEYYLVLDNGEFKRYNENQDEIRKRVFEMANYVNMLYKKLNTHVALVGMEIWTDKDKIKITPNASFTLENFSKWRGSVLSRRKRHDIAQLITATELAGTTVGLAFMSTMCSPYSVGVVQDHSDNLLRVAGTMAHEMGHNFGMFHDDYSCKCPSTICVMDKALSFYIPTDFSSCSRLSYDKFFEDKLSNCLFNAPLPTDIISTPICGNQLVEMGEDCDCGTSEECTNICCDAKTCKIKATFQCALGECCEKCQFKKAGMVCRPAKDECDLPEMCNGKSGNCPDDRFQVNGFPCHHGKGHCLMGTCPTLQEQCTELWGPGTEVADKSCYNRNEGGSKYGYCRRVDDTLIPCKANDTMCGKLFCQGGSDNLPWKGRIVTFLTCKTFDPEDTSQEIGMVANGTKCGDNKVCINAECVDIEKAYKSTNCSSKCKGHAVCDHELQCQCEEGWIPPDCDDSSVVFHFSIVVGVLFPMAVIFVVVAMVIRHQSSREKQKKDQRPLSTTGTRPHKQKRKPQMVKAVQPQEMSQMKPHVYDLPVEGNEPPASFHKDTNALPPTVFKDNPVSTPKDSNPKA 2 664 686 PDCDDSSVVF HFSIVVGVLFPMAVIFVVVAMVI RHQSSREKQK +Q9UKF5 MKMLLLLHCLGVFLSCSGHIQDEHPQYHSPPDVVIPVRITGTTRGMTPPGWLSYILPFGGQKHIIHIKVKKLLFSKHLPVFTYTDQGAILEDQPFVQNNCYYHGYVEGDPESLVSLSTCFGGFQGILQINDFAYEIKPLAFSTTFEHLVYKMDSEEKQFSTMRSGFMQNEITCRMEFEEIDNSTQKQSSYVGWWIHFRIVEIVVVIDNYLYIRYERNDSKLLEDLYVIVNIVDSILDVIGVKVLLFGLEIWTNKNLIVVDDVRKSVHLYCKWKSENITPRMQHDTSHLFTTLGLRGLSGIGAFRGMCTPHRSCAIVTFMNKTLGTFSIAVAHHLGHNLGMNHDEDTCRCSQPRCIMHEGNPPITKFSNCSYGDFWEYTVERTKCLLETVHTKDIFNVKRCGNGVVEEGEECDCGPLKHCAKDPCCLSNCTLTDGSTCAFGLCCKDCKFLPSGKVCRKEVNECDLPEWCNGTSHKCPDDFYVEDGIPCKERGYCYEKSCHDRNEQCRRIFGAGANTASETCYKELNTLGDRVGHCGIKNATYIKCNISDVQCGRIQCENVTEIPNMSDHTTVHWARFNDIMCWSTDYHLGMKGPDIGEVKDGTECGIDHICIHRHCVHITILNSNCSPAFCNKRGICNNKHHCHCNYLWDPPNCLIKGYGGSVDSGPPPKRKKKKKFCYLCILLLIVLFILLCCLYRLCKKSKPIKKQQDVQTPSAKEEEKIQRRPHELPPQSQPWVMPSQSQPPVTPSQSHPQVMPSQSQPPVTPSQSQPRVMPSQSQPPVMPSQSHPQLTPSQSQPPVTPSQRQPQLMPSQSQPPVTPS 2 676 698 PPPKRKKKKK FCYLCILLLIVLFILLCCLYRLC KKSKPIKKQQ +Q9UKF2 MRSVQIFLSQCRLLLLLVPTMLLKSLGEDVIFHPEGEFDSYEVTIPEKLSFRGEVQGVVSPVSYLLQLKGKKHVLHLWPKRLLLPRHLRVFSFTEHGELLEDHPYIPKDCNYMGSVKESLDSKATISTCMGGLRGVFNIDAKHYQIEPLKASPSFEHVVYLLKKEQFGNQVCGLSDDEIEWQMAPYENKARLRDFPGSYKHPKYLELILLFDQSRYRFVNNNLSQVIHDAILLTGIMDTYFQDVRMRIHLKALEVWTDFNKIRVGYPELAEVLGRFVIYKKSVLNARLSSDWAHLYLQRKYNDALAWSFGKVCSLEYAGSVSTLLDTNILAPATWSAHELGHAVGMSHDEQYCQCRGRLNCIMGSGRTGFSNCSYISFFKHISSGATCLNNIPGLGYVLKRCGNKIVEDNEECDCGSTEECQKDRCCQSNCKLQPGANCSIGLCCHDCRFRPSGYVCRQEGNECDLAEYCDGNSSSCPNDVYKQDGTPCKYEGRCFRKGCRSRYMQCQSIFGPDAMEAPSECYDAVNLIGDQFGNCEITGIRNFKKCESANSICGRLQCINVETIPDLPEHTTIISTHLQAENLMCWGTGYHLSMKPMGIPDLGMINDGTSCGEGRVCFKKNCVNSSVLQFDCLPEKCNTRGVCNNRKNCHCMYGWAPPFCEEVGYGGSIDSGPPGLLRGAIPSSIWVVSIIMFRLILLILSVVFVFFRQVIGNHLKPKQEKMPLSKAKTEQEESKTKTVQEESKTKTGQEESEAKTGQEESKAKTGQEESKANIESKRPKAKSVKKQKK 2 686 708 GLLRGAIPSS IWVVSIIMFRLILLILSVVFVFF RQVIGNHLKP +Q8TC27 MFRLWLLLAGLCGLLASRPGFQNSLLQIVIPEKIQTNTNDSSEIEYEQISYIIPIDEKLYTVHLKQRYFLADNFMIYLYNQGSMNTYSSDIQTQCYYQGNIEGYPDSMVTLSTCSGLRGILQFENVSYGIEPLESAVEFQHVLYKLKNEDNDIAIFIDRSLKEQPMDDNIFISEKSEPAVPDLFPLYLEMHIVVDKTLYDYWGSDSMIVTNKVIEIVGLANSMFTQFKVTIVLSSLELWSDENKISTVGEADELLQKFLEWKQSYLNLRPHDIAYLLIYMDYPRYLGAVFPGTMCITRYSAGVALYPKEITLEAFAVIVTQMLALSLGISYDDPKKCQCSESTCIMNPEVVQSNGVKTFSSCSLRSFQNFISNVGVKCLQNKPQMQKKSPKPVCGNGRLEGNEICDCGTEAQCGPASCCDFRTCVLKDGAKCYKGLCCKDCQILQSGVECRPKAHPECDIAENCNGTSPECGPDITLINGLSCKNNKFICYDGDCHDLDARCESVFGKGSRNAPFACYEEIQSQSDRFGNCGRDRNNKYVFCGWRNLICGRLVCTYPTRKPFHQENGDVIYAFVRDSVCITVDYKLPRTVPDPLAVKNGSQCDIGRVCVNRECVESRIIKASAHVCSQQCSGHGVCDSRNKCHCSPGYKPPNCQIRSKGFSIFPEEDMGSIMERASGKTENTWLLGFLIALPILIVTTAIVLARKQLKKWFAKEEEFPSSESKSEGSTQTYASQSSSEGSTQTYASQTRSESSSQADTSKSKSEDSAEAYTSRSKSQDSTQTQSSSN 2 681 703 IMERASGKTE NTWLLGFLIALPILIVTTAIVLA RKQLKKWFAK +Q9BZ11 MGWRPRRARGTPLLLLLLLLLLWPVPGAGVLQGHIPGQPVTPHWVLDGQPWRTVSLEEPVSKPDMGLVALEAEGQELLLELEKNHRLLAPGYIETHYGPDGQPVVLAPNHTDHCHYQGRVRGFPDSWVVLCTCSGMSGLITLSRNASYYLRPWPPRGSKDFSTHEIFRMEQLLTWKGTCGHRDPGNKAGMTSLPGGPQSRGRREARRTRKYLELYIVADHTLFLTRHRNLNHTKQRLLEVANYVDQLLRTLDIQVALTGLEVWTERDRSRVTQDANATLWAFLQWRRGLWAQRPHDSAQLLTGRAFQGATVGLAPVEGMCRAESSGGVSTDHSELPIGAAATMAHEIGHSLGLSHDPDGCCVEAAAESGGCVMAAATGHPFPRVFSACSRRQLRAFFRKGGGACLSNAPDPGLPVPPALCGNGFVEAGEECDCGPGQECRDLCCFAHNCSLRPGAQCAHGDCCVRCLLKPAGALCRQAMGDCDLPEFCTGTSSHCPPDVYLLDGSPCARGSGYCWDGACPTLEQQCQQLWGPGSHPAPEACFQVVNSAGDAHGNCGQDSEGHFLPCAGRDALCGKLQCQGGKPSLLAPHMVPVDSTVHLDGQEVTCRGALALPSAQLDLLGLGLVEPGTQCGPRMVCQSRRCRKNAFQELQRCLTACHSHGVCNSNHNCHCAPGWAPPFCDKPGFGGSMDSGPVQAENHDTFLLAMLLSVLLPLLPGAGLAWCCYRLPGAHLQRCSWGCRRDPACSGPKDGPHRDHPLGGVHPMELGPTATGQPWPLDPENSHEPSSHPEKPLPAVSPDPQADQVQMPRSCLW 2 13 35 WRPRRARGTP LLLLLLLLLLWPVPGAGVLQGHI PGQPVTPHWV +Q99965 MWRVLFLLSGLGGLRMDSNFDSLPVQITVPEKIRSIIKEGIESQASYKIVIEGKPYTVNLMQKNFLPHNFRVYSYSGTGIMKPLDQDFQNFCHYQGYIEGYPKSVVMVSTCTGLRGVLQFENVSYGIEPLESSVGFEHVIYQVKHKKADVSLYNEKDIESRDLSFKLQSVEPQQDFAKYIEMHVIVEKQLYNHMGSDTTVVAQKVFQLIGLTNAIFVSFNITIILSSLELWIDENKIATTGEANELLHTFLRWKTSYLVLRPHDVAFLLVYREKSNYVGATFQGKMCDANYAGGVVLHPRTISLESLAVILAQLLSLSMGITYDDINKCQCSGAVCIMNPEAIHFSGVKIFSNCSFEDFAHFISKQKSQCLHNQPRLDPFFKQQAVCGNAKLEAGEECDCGTEQDCALIGETCCDIATCRFKAGSNCAEGPCCENCLFMSKERMCRPSFEECDLPEYCNGSSASCPENHYVQTGHPCGLNQWICIDGVCMSGDKQCTDTFGKEVEFGPSECYSHLNSKTDVSGNCGISDSGYTQCEADNLQCGKLICKYVGKFLLQIPRATIIYANISGHLCIAVEFASDHADSQKMWIKDGTSCGSNKVCRNQRCVSSSYLGYDCTTDKCNDRGVCNNKKHCHCSASYLPPDCSVQSDLWPGGSIDSGNFPPVAIPARLPERRYIENIYHSKPMRWPFFLFIPFFIIFCVLIAIMVKVNFQRKKWRTEDYSSDEQPESESEPKG 2 689 711 IYHSKPMRWP FFLFIPFFIIFCVLIAIMVKVNF QRKKWRTEDY +Q3TTE0 MFLLLLLFLHLKGLQAGQNPQKTTLQTTVPEKISSPDVETDAEDHMAYLITINETPHFIHLKKQSFITPTAVVYTYDRNDVQHSQPLSALENCNYNGYVAGFPNSIVTLTVCTGLRGIIQFENVSYAIEPVETLSGFVHVIYENTNKHAVIPDLGKNQSYSWFDESDYQFRSNMKKSGFTVLRQRFIMMDIIVDKKLFDYMGSDTEVVLQKVIQIIGFVNTMLSKLKLTVLINSIEIWSKENRIRLSKAVDDLLVQFSIWKHEYRSQHVAYLLAFEEHPASTGALYPGNLCKLEYNAAVALYPKGLSLESFSVIVLQLLSIGMGLTYDTENCHCTGEVCLMTPKAIYSGGVKDFSTCTLDDFKYLSTRQDLRCLQDLPLERKPARRPRRICGNGILEMNEQCDCGTLKNCTHRKCCDPMSCRLKNKATCGSGECCSQDCTVKMNDVVCRKSVDECDFVEYCNGKDPYCVPNTYARNGQYCESGEAFCFEGRCQTADKQCMSMLGKYVRGASFACYEEFNSRGDRFGNCIHNFCAFRNSLCGKLICTWPFKKLVLKANLSVAYAQIRDDLCVAMYKGGRIPKTTKTTYSNPADRDETFVNDGTICGPDMFCLRASCTETRFHMDSSKCDSTRDCNDHGVCNNLQHCHCDIGYNPPFCEEHKGQFGSVDDGHKYHVEDGKSYKQQSHSNLKKNQLQLILYISLPLLVMISAVVIKQSKLSRVCDRERSESDSSTTEDSGSNTNVTSSGGSTSH 2 695 712 HSNLKKNQLQ LILYISLPLLVMISAVVI KQSKLSRVCD +Q9H2U9 MLPGCIFLMILLIPQVKEKFILGVEGQQLVRPKKLPLIQKRDTGHTHDDDILKTYEEELLYEIKLNRKTLVLHLLRSREFLGSNYSETFYSMKGEAFTRHPQIMDHCFYQGSIVHEYDSAASISTCNGLRGFFRINDQRYLIEPVKYSDEGEHLVFKYNLRVPYGANYSCTELNFTRKTVPGDNESEEDSKIKGIHDEKYVELFIVADDTVYRRNGHPHNKLRNRIWGMVNFVNMIYKTLNIHVTLVGIEIWTHEDKIELYSNIETTLLRFSFWQEKILKTRKDFDHVVLLSGKWLYSHVQGISYPGGMCLPYYSTSIIKDLLPDTNIIANRMAHQLGHNLGMQHDEFPCTCPSGKCVMDSDGSIPALKFSKCSQNQYHQYLKDYKPTCMLNIPFPYNFHDFQFCGNKKLDEGEECDCGPAQECTNPCCDAHTCVLKPGFTCAEGECCESCQIKKAGSICRPAKDECDFPEMCTGHSPACPKDQFRVNGFPCKNSEGYCFMGKCPTREDQCSELFDDEAIESHDICYKMNTKGNKFGYCKNKENRFLPCEEKDVRCGKIYCTGGELSSLLGEDKTYHLKDPQKNATVKCKTIFLYHDSTDIGLVASGTKCGEGMVCNNGECLNMEKVYISTNCPSQCNENPVDGHGLQCHCEEGQAPVACEETLHVTNITILVVVLVLVIVGIGVLILLVRYRKCIKLKQVQSPPTETLGVENKGYFGDEQQIRTEPILPEIHFLNKPASKDSRGIADPNQSAK 2 668 690 VACEETLHVT NITILVVVLVLVIVGIGVLILLV RYRKCIKLKQ +Q13443 MGSGARFPSGTLRVRWLLLLGLVGPVLGAARPGFQQTSHLSSYEIITPWRLTRERREAPRPYSKQVSYVIQAEGKEHIIHLERNKDLLPEDFVVYTYNKEGTLITDHPNIQNHCHYRGYVEGVHNSSIALSDCFGLRGLLHLENASYGIEPLQNSSHFEHIIYRMDDVYKEPLKCGVSNKDIEKETAKDEEEEPPSMTQLLRRRRAVLPQTRYVELFIVVDKERYDMMGRNQTAVREEMILLANYLDSMYIMLNIRIVLVGLEIWTNGNLINIVGGAGDVLGNFVQWREKFLITRRRHDSAQLVLKKGFGGTAGMAFVGTVCSRSHAGGINVFGQITVETFASIVAHELGHNLGMNHDDGRDCSCGAKSCIMNSGASGSRNFSSCSAEDFEKLTLNKGGNCLLNIPKPDEAYSAPSCGNKLVDAGEECDCGTPKECELDPCCEGSTCKLKSFAECAYGDCCKDCRFLPGGTLCRGKTSECDVPEYCNGSSQFCQPDVFIQNGYPCQNNKAYCYNGMCQYYDAQCQVIFGSKAKAAPKDCFIEVNSKGDRFGNCGFSGNEYKKCATGNALCGKLQCENVQEIPVFGIVPAIIQTPSRGTKCWGVDFQLGSDVPDPGMVNEGTKCGAGKICRNFQCVDASVLNYDCDVQKKCHGHGVCNSNKNCHCENGWAPPNCETKGYGGSVDSGPTYNEMNTALRDGLLVFFFLIVPLIVCAIFIFIKRDQLWRSYFRKKRSQTYESDGKNQANPSRQPGSVPRHVSPVTPPREVPIYANRFAVPTYAAKQPQQFPSRPPPPQPKVSSQGNLIPARPAPAPPLYSSLT 2 699 718 NEMNTALRDG LLVFFFLIVPLIVCAIFIFI KRDQLWRSYF +Q60813 MSVAAAGRGFASSLSSPQIRRIALKEAKLTPHIWAALHWNLGLRLVPSVRVGILVLLIFLPSTFCDIGSVYNSSYETVIPERLPGKGGKDPGGKVSYMLLMQGQKQLLHLEVKGHYPENNFPVYSYHNGILRQEMPLLSQDCHYEGYMEGVPGSFVSVNICSGLRGVLIKEETSYGIEPMLSSKNFEHVLYTMEHQPVVSCSVTPKDSPGDTSHPPRSRKPDDLLVLTDWWSHTKYVEMFVVVNHQRFQMWGSNINETVQAVMDIIALANSFTRGINTEVVLVGLEIWTEGDPIEVPVDLQTTLRNFNFWRQEKLVGRVRHDVAHLIVGHRPGENEGQAFLRGACSGEFAAAVEAFHHEDVLLFAALMAHELGHNLGIQHDHPTCTCGPKHFCLMGEKIGKDSGFSNCSSDHFLRFLHDHRGACLLDEPGRQSRMRRAANCGNGVVEDLEECDCGSDCDSHPCCSPTCTLKEGAQCSEGLCCYNCTFKKKGSLCRPAEDVCDLPEYCDGSTQECPANSYMQDGTQCDRIYYCLGGWCKNPDKQCSRIYGYPARSAPEECYISVNTKANRFGNCGHPTSANFRYETCSDEDVFCGKLVCTDVRYLPKVKPLHSLLQVPYGEDWCWSMDAYNITDVPDDGDVQSGTFCAPNKVCMEYICTGRGVLQYNCEPQEMCHGNGVCNNFKHCHCDAGFAPPDCSSPGNGGSVDSGPVGKPADRHLSLSFLAEESPDDKMEDEEVNLKVMVLVVPIFLVVLLCCLMLIAYLWSEVQEVVSPPSSSESSSSSSWSDSDSQ 2 741 763 KMEDEEVNLK VMVLVVPIFLVVLLCCLMLIAYL WSEVQEVVSP +Q8R534 MERLKLGKIPEHWCIRLVAMLLLAIIFLPSTFCDIGSVYNSSYETVIPERLPGKGGKDPGGKVSYMLLMQGQKQLLHLEVKGHYPENNFPVYSYHNGILRQEMPLLSQDCHYEGYMEGVPGSFVSVNICSGLRGVLIKEETSYGIEPMLSSKNFEHVLYTMEHQPVVSCSVTPKDSPGDTSHPPRSRKPDDLLVLTDWWSHTKYVEMFVVVNHQRFQMWGSNINETVQAVMDIIALANSFTRGINTEVVLVGLEIWTEGDPIEVPVDLQTTLRNFNFWRQEKLVGRVRHDVAHLIVGHRPGENEGQAFLRGACSGEFAAAVEAFHHEDVLLFAALMAHELGHNLGIQHDHPTCTCGPKHFCLMGEKIGKDSGFSNCSSDHFLRFLHDHRGVCLLDEPGRQSRMRRAANCGNGVVEDLEQCDCGSDCDKSQCCDENCKLKGNSVCSTELCCFKCNFKKEGDVCRPADGPCDLEEYCNGTSAACPSDRKAQDGSKCHESFLCFNGQCMDPTFQCSRIFGHGSRSASDYCYTSLNSRGDQFGNCGSSSQFPKKYTKCSDKNVMCGKLICTEVAFLPQIQPNNLLLQVPETEDWCWSVAVFDMRDSLHEEYVKDNTYCGKDKVCKNSICEDFTPFSFPCSPSKQCNKHGVCNDLGNCHCSFGFAPPDCKEEGTGGSVDSGPAVNLSNDSSPGPNSTQSSTEELILNLKLIVLAVILVLMILLIIICIISAYTKSETASEAGPSELEELPEGEKEEQEEVLPEEAKGEEEELEYGKEEAEEQGAVEEEGAEEANEEAAAEKKDEDEEEGEE 2 705 727 STEELILNLK LIVLAVILVLMILLIIICIISAY TKSETASEAG +A2AJA7 MCLPSHLLSTWVLFMAAQSLGKTWLPNHCRSPIKAVCNFVCDCGDCSDETQCGFHGASTIPSTSFTCNFEQDSCGWQDISTSGYRWLRDRAGAVLHGPGPHSDHTHGTDLGWYMAVGTHSGKEPSTATLRSPVMREAAPTCELRLWYHIASRDVAELRLDLTHGVETLTLWQTSGPWGPGWQELAVNTGRIQGDFKVTFSATRNATHRGAVALDDVEFRDCGLPIPQARCPLGHHHCQNKACVEPHQLCDGEDNCGDRSDEDPLICSHHMATDFETGLGPWNQLEGWTRNHSAGSMVSPAWPHRDHSRNSAYGFFLISVAKPGTTAVLYSPEFQGSVSNNCSFTFYYYLHGSEASHFQLFLQAQGLNTPQVPVLLRSRHGELGTAWVRDRVDIQSAHPFRILLAGETGPGGVVGLDDLIMSSHCMLVPAMSTLQSSLSGPVPLALYPQTSIKLPQQTCEPGHLSCGDLCVPPEQLCDFQKHCAEGEDEHKCGTTDFESASAGGWEDISVGKLQWQWVEAQEKSKPAGDANRDAPGHFLSLQKAWGQLRSEARALTPALGPSGPHCELHMAYYFQSHPQGFLALVVVENGFRELLWQAPGGGSGSWTEEKIILGARRRPFQLEFVSLVDLDGPGQQGAGVDNVTLRDCNPMVTTESDQELSCNFERDSCSWHTGHLTDAHWHRIKSHGSQLDHTTGQGFFMFLDPTDPPARGQGALLLTRPQVPVVPKECLSFWYRLYGPQIGTLCLAMRREREEDILLWSRSGTHGNRWHQAWVTLHHQPEASTKYQLLFEGLRNGYHGTMALDDIAVRPGPCWAPKSCSFEDSDCGFSPGGWGLWTHQSNASGLASWGPWIDHTTGTAQGHYMVVDTSPNVLPKGHVAALTSEEHQPLSQPACLTFWYHMSVPNPGTLRVHVEESTRRQELSISAHGRSAWRLGSVNVQAEQAWKVVFEAVAAGVEYSYMALDDISLQDGPCPQPGSCDFETGLCGWSHLPWPSLGGYSWDWSSGATPSRYPQPSVDHTLGTEAGHFAFFETSVLGPGGQAAWLRSEPLPATTVSCLRFWYYMGFPEHFYKGELRVLLSSARGQLAVWYQGGHLRDQWLQVQIELSNSEEFQIVFEATLGGQPALGPIAIDDVQYLAGQQCKQPSPSQGEVAAPVSVPVAVGGALLFFMFLVLMGLGGWHWLQKQHCPGQRSTDAAASGFANILFNADHVTLPESITSNPQSPPDLA 2 1161 1183 EVAAPVSVPV AVGGALLFFMFLVLMGLGGWHWL QKQHCPGQRS +Q86WK6 MHPHRDPRGLWLLLPSLSLLLFEVARAGRAVVSCPAACLCASNILSCSKQQLPNVPHSLPSYTALLDLSHNNLSRLRAEWTPTRLTQLHSLLLSHNHLNFISSEAFSPVPNLRYLDLSSNQLRTLDEFLFSDLQVLEVLLLYNNHIMAVDRCAFDDMAQLQKLYLSQNQISRFPLELVKEGAKLPKLTLLDLSSNKLKNLPLPDLQKLPAWIKNGLYLHNNPLNCDCELYQLFSHWQYRQLSSVMDFQEDLYCMNSKKLHNVFNLSFLNCGEYKERAWEAHLGDTLIIKCDTKQQGMTKVWVTPSNERVLDEVTNGTVSVSKDGSLLFQQVQVEDGGVYTCYAMGETFNETLSVELKVHNFTLHGHHDTLNTAYTTLVGCILSVVLVLIYLYLTPCRCWCRGVEKPSSHQGDSLSSSMLSTTPNHDPMAGGDKDDGFDRRVAFLEPAGPGQGQNGKLKPGNTLPVPEATGKGQRRMSDPESVSSVFSDTPIVV 2 371 393 FTLHGHHDTL NTAYTTLVGCILSVVLVLIYLYL TPCRCWCRGV +Q8K592 MLGTLGLWTLLPAAAQVSPNRRTCVFFEAPGVRGSTKTLGEMVDAGPGPPKGIRCLYSHCCFGIWNLTHGRAQVEMQGCRDSDEPGCESLHCDPVPRAHPNPSSTLFTCSCGTDFCNANYSHLPPSGNQGAPGPQEPQATPGGPVWMALLLLGMFLVLLLSSIILALLQRKACRVQGGSDPEPGSGGDCSEELPELAELRFSQVIQEGGHAVVWAGRLQGEMVAIKAFPPRAVAQFRAERAVYQLLGLQHDHIVRFITAGQGGPGPLPSGPLLVLELYPKGSLCHYLTQYTSDWGSSLRMALSLAEGLAFLHEERWQDGQYKPGIAHRDLSSQNVLIREDRSCAIGDLGLALVLPGLAQPPALAPTQPRGPAAILEAGTQRYMAPELLDKTLDLQDWGTALQRADVYSLALLLWEILSRCSDLRPDHRPPPFQLAYEAELGSNPSACELWALAVEERKRPNIPSTWSCSATDPRGLRELLEDCWDADPEARLTAECVQQRLAALAYPHGASSFPESPQGCPENCLSAPASAVFPCRPQQSSCLLSVQQGPGSRSPDPVGDTVQVYVNE 2 146 168 EPQATPGGPV WMALLLLGMFLVLLLSSIILALL QRKACRVQGG +Q9BXJ7 MGVLGRVLLWLQLCALTQAVSKLWVPNTDFDVAANWSQNRTPCAGGAVEFPADKMVSVLVQEGHAVSDMLLPLDGELVLASGAGFGVSDVGSHLDCGAGEPAVFRDSDRFSWHDPHLWRSGDEAPGLFFVDAERVPCRHDDVFFPPSASFRVGLGPGASPVRVRSISALGRTFTRDEDLAVFLASRAGRLRFHGPGALSVGPEDCADPSGCVCGNAEAQPWICAALLQPLGGRCPQAACHSALRPQGQCCDLCGAVVLLTHGPAFDLERYRARILDTFLGLPQYHGLQVAVSKVPRSSRLREADTEIQVVLVENGPETGGAGRLARALLADVAENGEALGVLEATMRESGAHVWGSSAAGLAGGVAAAVLLALLVLLVAPPLLRRAGRLRWRRHEAAAPAGAPLGFRNPVFDVTASEELPLPRRLSLVPKAAADSTSHSYFVNPLFAGAEAEA 2 361 383 AHVWGSSAAG LAGGVAAAVLLALLVLLVAPPLL RRAGRLRWRR +P58335 MVAERSPARSPGSWLFPGLWLLVLSGPGGLLRAQEQPSCRRAFDLYFVLDKSGSVANNWIEIYNFVQQLAERFVSPEMRLSFIVFSSQATIILPLTGDRGKISKGLEDLKRVSPVGETYIHEGLKLANEQIQKAGGLKTSSIIIALTDGKLDGLVPSYAEKEAKISRSLGASVYCVGVLDFEQAQLERIADSKEQVFPVKGGFQALKGIINSILAQSCTEILELQPSSVCVGEEFQIVLSGRGFMLGSRNGSVLCTYTVNETYTTSVKPVSVQLNSMLCPAPILNKAGETLDVSVSFNGGKSVISGSLIVTATECSNGIAAIIVILVLLLLLGIGLMWWFWPLCCKVVIKDPPPPPAPAPKEEEEEPLPTKKWPTVDASYYGGRGVGGIKRMEVRWGDKGSTEEGARLEKAKNAVVKIPEETEEPIRPRPPRPKPTHQPPQTKWYTPIKGRLDALWALLRRQYDRVSLMRPQEGDEVCIWECIEKELTA 2 319 341 IVTATECSNG IAAIIVILVLLLLLGIGLMWWFW PLCCKVVIKD +P51693 MGPASPAARGLSRRPGQPPLPLLLPLLLLLLRAQPAIGSLAGGSPGAAEAPGSAQVAGLCGRLTLHRDLRTGRWEPDPQRSRRCLRDPQRVLEYCRQMYPELQIARVEQATQAIPMERWCGGSRSGSCAHPHHQVVPFRCLPGEFVSEALLVPEGCRFLHQERMDQCESSTRRHQEAQEACSSQGLILHGSGMLLPCGSDRFRGVEYVCCPPPGTPDPSGTAVGDPSTRSWPPGSRVEGAEDEEEEESFPQPVDDYFVEPPQAEEEEETVPPPSSHTLAVVGKVTPTPRPTDGVDIYFGMPGEISEHEGFLRAKMDLEERRMRQINEVMREWAMADNQSKNLPKADRQALNEHFQSILQTLEEQVSGERQRLVETHATRVIALINDQRRAALEGFLAALQADPPQAERVLLALRRYLRAEQKEQRHTLRHYQHVAAVDPEKAQQMRFQVHTHLQVIEERVNQSLGLLDQNPHLAQELRPQIQELLHSEHLGPSELEAPAPGGSSEDKGGLQPPDSKDDTPMTLPKGSTEQDAASPEKEKMNPLEQYERKVNASVPRGFPFHSSEIQRDELAPAGTGVSREAVSGLLIMGAGGGSLIVLSMLLLRRKKPYGAISHGVVEVDPMLTLEEQQLRELQRHGYENPTYRFLEERP 2 581 603 APAGTGVSRE AVSGLLIMGAGGGSLIVLSMLLL RRKKPYGAIS +O75882 MVAAAAATEARLRRRTAATAALAGRSGGPHWDWDVTRAGRPGLGAGLRLPRLLSPPLRPRLLLLLLLLSPPLLLLLLPCEAEAAAAAAAVSGSAAAEAKECDRPCVNGGRCNPGTGQCVCPAGWVGEQCQHCGGRFRLTGSSGFVTDGPGNYKYKTKCTWLIEGQPNRIMRLRFNHFATECSWDHLYVYDGDSIYAPLVAAFSGLIVPERDGNETVPEVVATSGYALLHFFSDAAYNLTGFNITYSFDMCPNNCSGRGECKISNSSDTVECECSENWKGEACDIPHCTDNCGFPHRGICNSSDVRGCSCFSDWQGPGCSVPVPANQSFWTREEYSNLKLPRASHKAVVNGNIMWVVGGYMFNHSDYNMVLAYDLASREWLPLNRSVNNVVVRYGHSLALYKDKIYMYGGKIDSTGNVTNELRVFHIHNESWVLLTPKAKEQYAVVGHSAHIVTLKNGRVVMLVIFGHCPLYGYISNVQEYDLDKNTWSILHTQGALVQGGYGHSSVYDHRTRALYVHGGYKAFSANKYRLADDLYRYDVDTQMWTILKDSRFFRYLHTAVIVSGTMLVFGGNTHNDTSMSHGAKCFSSDFMAYDIACDRWSVLPRPDLHHDVNRFGHSAVLHNSTMYVFGGFNSLLLSDILVFTSEQCDAHRSEAACLAAGPGIRCVWNTGSSQCISWALATDEQEEKLKSECFSKRTLDHDRCDQHTDCYSCTANTNDCHWCNDHCVPRNHSCSEGQISIFRYENCPKDNPMYYCNKKTSCRSCALDQNCQWEPRNQECIALPENICGIGWHLVGNSCLKITTAKENYDNAKLFCRNHNALLASLTTQKKVEFVLKQLRIMQSSQSMSKLTLTPWVGLRKINVSYWCWEDMSPFTNSLLQWMPSEPSDAGFCGILSEPSTRGLKAATCINPLNGSVCERPANHSAKQCRTPCALRTACGDCTSGSSECMWCSNMKQCVDSNAYVASFPFGQCMEWYTMSTCPPENCSGYCTCSHCLEQPGCGWCTDPSNTGKGKCIEGSYKGPVKMPSQAPTGNFYPQPLLNSSMCLEDSRYNWSFIHCPACQCNGHSKCINQSICEKCENLTTGKHCETCISGFYGDPTNGGKCQPCKCNGHASLCNTNTGKCFCTTKGVKGDECQLCEVENRYQGNPLRGTCYYTLLIDYQFTFSLSQEDDRYYTAINFVATPDEQNRDLDMFINASKNFNLNITWAASFSAGTQAGEEMPVVSKTNIKEYKDSFSNEKFDFRNHPNITFFVYVSNFTWPIKIQIAFSQHSNFMDLVQFFVTFFSCFLSLLLVAAVVWKIKQSCWASRRREQLLREMQQMASRPFASVNVALETDEEPPDLIGGSIKTVPKPIALEPCFGNKAAVLSVFVRLPRGLGGIPPPGQSGLAVASALVDISQQMPIVYKEKSGAVRNRKQQPPAQPGTCI 2 1279 1301 AFSQHSNFMD LVQFFVTFFSCFLSLLLVAAVVW KIKQSCWASR +P27037 MGAAAKLAFAVFLISCSSGAILGRSETQECLFFNANWEKDRTNQTGVEPCYGDKDKRRHCFATWKNISGSIEIVKQGCWLDDINCYDRTDCVEKKDSPEVYFCCCEGNMCNEKFSYFPEMEVTQPTSNPVTPKPPYYNILLYSLVPLMLIAGIVICAFWVYRHHKMAYPPVLVPTQDPGPPPPSPLLGLKPLQLLEVKARGRFGCVWKAQLLNEYVAVKIFPIQDKQSWQNEYEVYSLPGMKHENILQFIGAEKRGTSVDVDLWLITAFHEKGSLSDFLKANVVSWNELCHIAETMARGLAYLHEDIPGLKDGHKPAISHRDIKSKNVLLKNNLTACIADFGLALKFEAGKSAGDTHGQVGTRRYMAPEVLEGAINFQRDAFLRIDMYAMGLVLWELASRCTAADGPVDEYMLPFEEEIGQHPSLEDMQEVVVHKKKRPVLRDYWQKHAGMAMLCETIEECWDHDAEARLSAGCVGERITQMQRLTNIITTEDIVTVVTMVTNVDFPPKESSL 2 139 161 PVTPKPPYYN ILLYSLVPLMLIAGIVICAFWVY RHHKMAYPPV +P56817 MAQALPWLLLWMGAGVLPAHGTQHGIRLPLRSGLGGAPLGLRLPRETDEEPEEPGRRGSFVEMVDNLRGKSGQGYYVEMTVGSPPQTLNILVDTGSSNFAVGAAPHPFLHRYYQRQLSSTYRDLRKGVYVPYTQGKWEGELGTDLVSIPHGPNVTVRANIAAITESDKFFINGSNWEGILGLAYAEIARPDDSLEPFFDSLVKQTHVPNLFSLQLCGAGFPLNQSEVLASVGGSMIIGGIDHSLYTGSLWYTPIRREWYYEVIIVRVEINGQDLKMDCKEYNYDKSIVDSGTTNLRLPKKVFEAAVKSIKAASSTEKFPDGFWLGEQLVCWQAGTTPWNIFPVISLYLMGEVTNQSFRITILPQQYLRPVEDVATSQDDCYKFAISQSSTGTVMGAVIMEGFYVVFDRARKRIGFAVSACHVHDEFRTAAVEGPFVTLDMEDCGYNIPQTDESTLMTIAYVMAAICALFMLPLCLMVCQWRCLRCLRQQHDDFADDISLLK 2 455 477 YNIPQTDEST LMTIAYVMAAICALFMLPLCLMV CQWRCLRCLR +Q13145 MDRHSSYIFIWLQLELCAMAVLLTKGEIRCYCDAAHCVATGYMCKSELSACFSRLLDPQNSNSPLTHGCLDSLASTTDICQAKQARNHSGTTIPTLECCHEDMCNYRGLHDVLSPPRGEASGQGNRYQHDGSRNLITKVQELTSSKELWFRAAVIAVPIAGGLILVLLIMLALRMLRSENKRLQDQRQQMLSRLHYSFHGHHSKKGQVAKLDLECMVPVSGHENCCLTCDKMRQADLSNDKILSLVHWGMYSGHGKLEFV 2 154 176 SSKELWFRAA VIAVPIAGGLILVLLIMLALRML RSENKRLQDQ +Q13873 MTSSLQRPWRVPWLPWTILLVSTAAASQNQERLCAFKDPYQQDLGIGESRISHENGTILCSKGSTCYGLWEKSKGDINLVKQGCWSHIGDPQECHYEECVVTTTPPSIQNGTYRFCCCSTDLCNVNFTENFPPPDTTPLSPPHSFNRDETIIIALASVSVLAVLIVALCFGYRMLTGDRKQGLHSMNMMEAAASEPSLDLDNLKLLELIGRGRYGAVYKGSLDERPVAVKVFSFANRQNFINEKNIYRVPLMEHDNIARFIVGDERVTADGRMEYLLVMEYYPNGSLCKYLSLHTSDWVSSCRLAHSVTRGLAYLHTELPRGDHYKPAISHRDLNSRNVLVKNDGTCVISDFGLSMRLTGNRLVRPGEEDNAAISEVGTIRYMAPEVLEGAVNLRDCESALKQVDMYALGLIYWEIFMRCTDLFPGESVPEYQMAFQTEVGNHPTFEDMQVLVSREKQRPKFPEAWKENSLAVRSLKETIEDCWDQDAEARLTAQCAEERMAELMMIWERNKSVSPTVNPMSTAMQNERNLSHNRRVPKIGPYPDYSSSSYIEDSIHHTDSIVKNISSEHSMSSTPLTIGEKNRNSINYERQQAQARIPSPETSVTSLSTNTTTTNTTGLTPSTGMTTISEMPYPDETNLHTTNVAQSIGPTPVCLQLTEEDLETNKLDPKEVDKNLKESSDENLMEHSLKQFSGPDPLSSTSSSLLYPLIKLAVEATGQQDFTQTANGQACLIPDVLPTQIYPLPKQQNLPKRPTSLPLNTKNSTKEPRLKFGSKHKSNLKQVETGVAKMNTINAAEPHVVTVTMNGVAGRNHSVNSHAATTQYANGTVLSGQTTNIVTHRAQEMLQNQFIGEDTRLNINSSPDEHEPLLRREQQAGHDEGVLDRLVDRRERPLEGGRTNSNNNNSNPCSEQDVLAQGVPSTAADPGPSKPRRAQRPNSLDLSATNVLDGSSIQIGESTQDGKSGSGEKIKKRVKTPYSLKRWRPSTWVISTESLDCEVNNNGSNRAVHSKSSTAVYLAEGGTATTMVSKDIGMNCL 2 152 174 PHSFNRDETI IIALASVSVLAVLIVALCFGYRM LTGDRKQGLH +P36894 MPQLYIYIRLLGAYLFIISRVQGQNLDSMLHGTGMKSDSDQKKSENGVTLAPEDTLPFLKCYCSGHCPDDAINNTCITNGHCFAIIEEDDQGETTLASGCMKYEGSDFQCKDSPKAQLRRTIECCRTNLCNQYLQPTLPPVVIGPFFDGSIRWLVLLISMAVCIIAMIIFSSCFCYKHYCKSISSRRRYNRDLEQDEAFIPVGESLKDLIDQSQSSGSGSGLPLLVQRTIAKQIQMVRQVGKGRYGEVWMGKWRGEKVAVKVFFTTEEASWFRETEIYQTVLMRHENILGFIAADIKGTGSWTQLYLITDYHENGSLYDFLKCATLDTRALLKLAYSAACGLCHLHTEIYGTQGKPAIAHRDLKSKNILIKKNGSCCIADLGLAVKFNSDTNEVDVPLNTRVGTKRYMAPEVLDESLNKNHFQPYIMADIYSFGLIIWEMARRCITGGIVEEYQLPYYNMVPSDPSYEDMREVVCVKRLRPIVSNRWNSDECLRAVLKLMSECWAHNPASRLTALRIKKTLAKMVESQDVKI 2 153 175 IGPFFDGSIR WLVLLISMAVCIIAMIIFSSCFC YKHYCKSISS +O00238 MLLRSAGKLNVGTKKEDGESTAPTPRPKVLRCKCHHHCPEDSVNNICSTDGYCFTMIEEDDSGLPVVTSGCLGLEGSDFQCRDTPIPHQRRSIECCTERNECNKDLHPTLPPLKNRDFVDGPIHHRALLISVTVCSLLLVLIILFCYFRYKRQETRPRYSIGLEQDETYIPPGESLRDLIEQSQSSGSGSGLPLLVQRTIAKQIQMVKQIGKGRYGEVWMGKWRGEKVAVKVFFTTEEASWFRETEIYQTVLMRHENILGFIAADIKGTGSWTQLYLITDYHENGSLYDYLKSTTLDAKSMLKLAYSSVSGLCHLHTEIFSTQGKPAIAHRDLKSKNILVKKNGTCCIADLGLAVKFISDTNEVDIPPNTRVGTKRYMPPEVLDESLNRNHFQSYIMADMYSFGLILWEVARRCVSGGIVEEYQLPYHDLVPSDPSYEDMREIVCIKKLRPSFPNRWSSDECLRQMGKLMTECWAHNPASRLTALRVKKTLAKMSESQDIKL 2 126 148 RDFVDGPIHH RALLISVTVCSLLLVLIILFCYF RYKRQETRPR +Q9BWV1 MLRGTMTAWRGMRPEVTLACLLLATAGCFADLNEVPQVTVQPASTVQKPGGTVILGCVVEPPRMNVTWRLNGKELNGSDDALGVLITHGTLVITALNNHTVGRYQCVARMPAGAVASVPATVTLANLQDFKLDVQHVIEVDEGNTAVIACHLPESHPKAQVRYSVKQEWLEASRGNYLIMPSGNLQIVNASQEDEGMYKCAAYNPVTQEVKTSGSSDRLRVRRSTAEAARIIYPPEAQTIIVTKGQSLILECVASGIPPPRVTWAKDGSSVTGYNKTRFLLSNLLIDTTSEEDSGTYRCMADNGVGQPGAAVILYNVQVFEPPEVTMELSQLVIPWGQSAKLTCEVRGNPPPSVLWLRNAVPLISSQRLRLSRRALRVLSMGPEDEGVYQCMAENEVGSAHAVVQLRTSRPSITPRLWQDAELATGTPPVSPSKLGNPEQMLRGQPALPRPPTSVGPASPQCPGEKGQGAPAEAPIILSSPRTSKTDSYELVWRPRHEGSGRAPILYYVVKHRKVTNSSDDWTISGIPANQHRLTLTRLDPGSLYEVEMAAYNCAGEGQTAMVTFRTGRRPKPEIMASKEQQIQRDDPGASPQSSSQPDHGRLSPPEAPDRPTISTASETSVYVTWIPRGNGGFPIQSFRVEYKKLKKVGDWILATSAIPPSRLSVEITGLEKGTSYKFRVRALNMLGESEPSAPSRPYVVSGYSGRVYERPVAGPYITFTDAVNETTIMLKWMYIPASNNNTPIHGFYIYYRPTDSDNDSDYKKDMVEGDKYWHSISHLQPETSYDIKMQCFNEGGESEFSNVMICETKARKSSGQPGRLPPPTLAPPQPPLPETIERPVGTGAMVARSSDLPYLIVGVVLGSIVLIIVTFIPFCLWRAWSKQKHTTDLGFPRSALPPSCPYTMVPLGGLPGHQASGQPYLSGISGRACANGIHMNRGCPSAAVGYPGMKPQQHCPGELQQQSDTSSLLRQTHLGNGYDPQSHQITRGPKSSPDEGSFLYTLPDDSTHQLLQPHHDCCQRQEQPAAVGQSGVRRAPDSPVLEAVWDPPFHSGPPCCLGLVPVEEVDSPDSCQVSGGDWCPQHPVGAYVGQEPGMQLSPGPLVRVSFETPPLTI 2 856 878 MVARSSDLPY LIVGVVLGSIVLIIVTFIPFCLW RAWSKQKHTT +Q13410 MAVFPSSGLPRCLLTLILLQLPKLDSAPFDVIGPPEPILAVVGEDAELPCRLSPNASAEHLELRWFRKKVSPAVLVHRDGREQEAEQMPEYRGRATLVQDGIAKGRVALRIRGVRVSDDGEYTCFFREDGSYEEALVHLKVAALGSDPHISMQVQENGEICLECTSVGWYPEPQVQWRTSKGEKFPSTSESRNPDEEGLFTVAASVIIRDTSAKNVSCYIQNLLLGQEKKVEISIPASSLPRLTPWIVAVAVILMVLGLLTIGSIFFTWRLYNERPRERRNEFSSKERLLEELKWKKATLHAVDVTLDPDTAHPHLFLYEDSKSVRLEDSRQKLPEKTERFDSWPCVLGRETFTSGRHYWEVEVGDRTDWAIGVCRENVMKKGFDPMTPENGFWAVELYGNGYWALTPLRTPLPLAGPPRRVGIFLDYESGDISFYNMNDGSDIYTFSNVTFSGPLRPFFCLWSSGKKPLTICPIADGPERVTVIANAQDLSKEIPLSPMGEDSAPRDADTLHSKLIPTQPSQGAP 2 245 267 IPASSLPRLT PWIVAVAVILMVLGLLTIGSIFF TWRLYNERPR +Q8WVV5 MEPAAALHFSLPASLLLLLLLLLLSLCALVSAQFTVVGPANPILAMVGENTTLRCHLSPEKNAEDMEVRWFRSQFSPAVFVYKGGRERTEEQMEEYRGRITFVSKDINRGSVALVIHNVTAQENGIYRCYFQEGRSYDEAILRLVVAGLGSKPLIEIKAQEDGSIWLECISGGWYPEPLTVWRDPYGEVVPALKEVSIADADGLFMVTTAVIIRDKYVRNVSCSVNNTLLGQEKETVIFIPESFMPSASPWMVALAVILTASPWMVSMTVILAVFIIFMAVSICCIKKLQREKKILSGEKKVEQEEKEIAQQLQEELRWRRTFLHAADVVLDPDTAHPELFLSEDRRSVRRGPYRQRVPDNPERFDSQPCVLGWESFASGKHYWEVEVENVMVWTVGVCRHSVERKGEVLLIPQNGFWTLEMFGNQYRALSSPERILPLKESLCRVGVFLDYEAGDVSFYNMRDRSHIYTCPRSAFTVPVRPFFRLGSDDSPIFICPALTGASGVMVPEEGLKLHRVGTHQSL 2 264 286 ALAVILTASP WMVSMTVILAVFIIFMAVSICCI KKLQREKKIL +Q96KV6 MEPAAALHFSRPASLLLLLSLCALVSAQVTVVGPTDPILAMVGENTTLRCCLSPEENAEDMEVRWFQSQFSPAVFVYKGGRERTEEQKEEYRGRTTFVSKDSRGSVALIIHNVTAEDNGIYQCYFQEGRSCNEAILHLVVAGLDSEPVIEMRDHEDGGIQLECISGGWYPKPLTVWRDPYGEVVPALKEVSTPDADSLFMVTTAVIIRDKSVRNVSCSINDTLLGQKKESVIFIPESFMPSRSPCVVILPVIMIILMIPIAICIYWINNLQKEKKDSHLMTFNLCLSLAGWRRTFLHAANVVLDQDTGHPYLFVSEDKRSVTLDPSRESIPGNPERFDSQLCVLGQESFASGKHYLEVDVENVIEWTVGICRDNVERKWEVPLLPQNGFWTLEMHKRKYWALTSLKWILSLEEPLCQVGIFLDYEAGDVSFYNMRDRSHIYTFPHSAFSVPVRPFFSLGSYDSQILICSAFTGASGVTVPEEGWTLHRAGTHHSPQNQFPSLTAMETSPGHLSSHCTMPLVEDTPSSPLVTQENIFQLPLSHPLQTSAPVHLLIRCGFSSSFGCNYGMESRHRELVVPQLPARKKV 2 246 268 ESFMPSRSPC VVILPVIMIILMIPIAICIYWIN NLQKEKKDSH +P78410 MKMASSLAFLLLNFHVSLLLVQLLTPCSAQFSVLGPSGPILAMVGEDADLPCHLFPTMSAETMELKWVSSSLRQVVNVYADGKEVEDRQSAPYRGRTSILRDGITAGKAALRIHNVTASDSGKYLCYFQDGDFYEKALVELKVAALGSNLHVEVKGYEDGGIHLECRSTGWYPQPQIQWSNAKGENIPAVEAPVVADGVGLYEVAASVIMRGGSGEGVSCIIRNSLLGLEKTASISIADPFFRSAQPWIAALAGTLPILLLLLAGASYFLWRQQKEITALSSEIESEQEMKEMGYAATEREISLRESLQEELKRKKIQYLTRGEESSSDTNKSA 2 248 270 ADPFFRSAQP WIAALAGTLPILLLLLAGASYFL WRQQKEITAL +Q7Z6A9 MKTLPAMLGTGKLFWVFFLIPYLDIWNIHGKESCDVQLYIKRQSEHSILAGDPFELECPVKYCANRPHVTWCKLNGTTCVKLEDRQTSWKEEKNISFFILHFEPVLPNDNGSYRCSANFQSNLIESHSTTLYVTDVKSASERPSKDEMASRPWLLYRLLPLGGLPLLITTCFCLFCCLRRHQGKQNELSDTAGREINLVDAHLKSEQTEASTRQNSQVLLSETGIYDNDPDLCFRMQEGSEVYSNPCLEENKPGIVYASLNHSVIGPNSRLARNVKEAPTEYASICVRS 2 153 175 PSKDEMASRP WLLYRLLPLGGLPLLITTCFCLF CCLRRHQGKQ +Q7TST0 MMKGSPSVPPAGCLLPLLLLLFTGVSGEVSWFSVKGPAEPITVLLGTEATLPCQLSPEQSAARMHIRWYRAQPTPAVLVFHNGQEQGEVQMPEYRGRTQMVRQAIDMGSVALQIQQVQASDDGLYHCQFTDGFTSQEVSMELRVIGLGSAPLVHMTGPENDGIRVLCSSSGWFPKPKVQWRDTSGNMLLSSSELQTQDREGLFQVEVSLLVTDRAIGNVICSIQNPMYDQEKSKAILLPEPFFPKTCPWKVALVCSVLILLVLLGGISLGIWKEHQVKRREIKKWSKEHEEMLLLKKGTKSVLKIRDDLQADLDRRKALYKEDWKKALLYPDWRKELFQEAPVRINYEMPDQDKTDSRTEENRGEETVSSSQVDHNLITLSQEGFMLGRYYWEVDVKDTEEWTLGVYELCTQDASLTDPLRKFRVLEKNGDGYRALDFCSQNINSEEPLQLKTRPLKIAIFLDQEDNDLSFYNMTDETHIFSFAQVPFLGSPYPYFTRNSMGLSATAQP 2 249 271 PEPFFPKTCP WKVALVCSVLILLVLLGGISLGI WKEHQVKRRE +Q6UXE8 MAFVLILVLSFYELVSGQWQVTGPGKFVQALVGEDAVFSCSLFPETSAEAMEVRFFRNQFHAVVHLYRDGEDWESKQMPQYRGRTEFVKDSIAGGRVSLRLKNITPSDIGLYGCWFSSQIYDEEATWELRVAALGSLPLISIVGYVDGGIQLLCLSSGWFPQPTAKWKGPQGQDLSSDSRANADGYSLYDVEISIIVQENAGSILCSIHLAEQSHEVESKVLIGETFFQPSPWRLASILLGLLCGALCGVVMGMIIVFFKSKGKIQAELDWRRKHGQAELRDARKHAVEVTLDPETAHPKLCVSDLKTVTHRKAPQEVPHSEKRFTRKSVVASQGFQAGKHYWEVDVGQNVGWYVGVCRDDVDRGKNNVTLSPNNGYWVLRLTTEHLYFTFNPHFISLPPSTPPTRVGVFLDYEGGTISFFNTNDQSLIYTLLTCQFEGLLRPYIQHAMYDEEKGTPIFICPVSWG 2 237 259 FFQPSPWRLA SILLGLLCGALCGVVMGMIIVFF KSKGKIQAEL +Q8BJE2 MADFSVFLGFLKQIPRCLSIFFTYLLFLQLWEVNSDKVWVLGPEESILARVGEAVEFPCRLSSYQDAEHMEIRWFRAQVSNVVYLYQEPQGRSSLQMAQFRNRTLFEAYDIAEGSVNLHILKVLPSDEGRYGCRFLSDNFSGEATWELEVAGSGSDPHISLQGFSGEGIQLQCSSSGWYPKPKVQWRGHQGQCLSPESEAITQNAQGLFSLETSVIVRGGAHSNVSCIIQNPLLPQKKEFVIQIADVFLPRMSPWKKAFVGTLVVLPLSLIVLTMLALRYFYKLRSFQEKQVKQGEEVREKLQTELDWRRSEGQAEWRAAQQYAADVTLDPATAHPSLEVSNNGKTVSSRLGVPSIAAGDPQRFSEQTCVLSRERFSSGRHYWEVHVGRRSRWFLGACLESVERSGPARLSPAAGYWVMGLWNRCEYFVLDPHRVALALRVPPRRIGVLLDYEAGKLSFFNVSDGSHIFSFTDTFSGALRAYLRPRAHDGSEHPDPMTICSLPVRGPQVLEENDNDNWLQPYEPLDPAWAVNEAVS 2 259 281 LPRMSPWKKA FVGTLVVLPLSLIVLTMLALRYF YKLRSFQEKQ +A8MVZ5 MAVTCDPEAFLSICFVTLVFLQLPLASIWKADFDVTGPHAPILAMAGGHVELQCQLFPNISAEDMELRWYRCQPSLAVHMHERGMDMDGEQKWQYRGRTTFMSDHVARGKAMVRSHRVTTFDNRTYCCRFKDGVKFGEATVQVQVAGLGREPRIQVTDQQDGVRAECTSAGCFPKSWVERRDFRGQARPAVTNLSASATTRLWAVASSLTLWDRAVEGLSCSISSPLLPERRKVAESHLPATFSRSSQFTAWKAALPLILVAMGLVIAGGICIFWKRQREKNKASLEEERE 2 253 275 FSRSSQFTAW KAALPLILVAMGLVIAGGICIFW KRQREKNKAS +Q86VB7 MSKLRMVLLEDSGSADFRRHFVNLSPFTITVVLLLSACFVTSSLGGTDKELRLVDGENKCSGRVEVKVQEEWGTVCNNGWSMEAVSVICNQLGCPTAIKAPGWANSSAGSGRIWMDHVSCRGNESALWDCKHDGWGKHSNCTHQQDAGVTCSDGSNLEMRLTRGGNMCSGRIEIKFQGRWGTVCDDNFNIDHASVICRQLECGSAVSFSGSSNFGEGSGPIWFDDLICNGNESALWNCKHQGWGKHNCDHAEDAGVICSKGADLSLRLVDGVTECSGRLEVRFQGEWGTICDDGWDSYDAAVACKQLGCPTAVTAIGRVNASKGFGHIWLDSVSCQGHEPAIWQCKHHEWGKHYCNHNEDAGVTCSDGSDLELRLRGGGSRCAGTVEVEIQRLLGKVCDRGWGLKEADVVCRQLGCGSALKTSYQVYSKIQATNTWLFLSSCNGNETSLWDCKNWQWGGLTCDHYEEAKITCSAHREPRLVGGDIPCSGRVEVKHGDTWGSICDSDFSLEAASVLCRELQCGTVVSILGGAHFGEGNGQIWAEEFQCEGHESHLSLCPVAPRPEGTCSHSRDVGVVCSRYTEIRLVNGKTPCEGRVELKTLGAWGSLCNSHWDIEDAHVLCQQLKCGVALSTPGGARFGKGNGQIWRHMFHCTGTEQHMGDCPVTALGASLCPSEQVASVICSGNQSQTLSSCNSSSLGPTRPTIPEESAVACIESGQLRLVNGGGRCAGRVEIYHEGSWGTICDDSWDLSDAHVVCRQLGCGEAINATGSAHFGEGTGPIWLDEMKCNGKESRIWQCHSHGWGQQNCRHKEDAGVICSEFMSLRLTSEASREACAGRLEVFYNGAWGTVGKSSMSETTVGVVCRQLGCADKGKINPASLDKAMSIPMWVDNVQCPKGPDTLWQCPSSPWEKRLASPSEETWITCDNKIRLQEGPTSCSGRVEIWHGGSWGTVCDDSWDLDDAQVVCQQLGCGPALKAFKEAEFGQGTGPIWLNEVKCKGNESSLWDCPARRWGHSECGHKEDAAVNCTDISVQKTPQKATTGRSSRQSSFIAVGILGVVLLAIFVALFFLTKKRRQRQRLAVSSRGENLVHQIQYREMNSCLNADDLDLMNSSENSHESADFSAAELISVSKFLPISGMEKEAILSHTEKENGNL 2 1049 1071 KATTGRSSRQ SSFIAVGILGVVLLAIFVALFFL TKKRRQRQRL +Q9NR16 MMLPQNSWHIDFGRCCCHQNLFSAVVTCILLLNSCFLISSFNGTDLELRLVNGDGPCSGTVEVKFQGQWGTVCDDGWNTTASTVVCKQLGCPFSFAMFRFGQAVTRHGKIWLDDVSCYGNESALWECQHREWGSHNCYHGEDVGVNCYGEANLGLRLVDGNNSCSGRVEVKFQERWGTICDDGWNLNTAAVVCRQLGCPSSFISSGVVNSPAVLRPIWLDDILCQGNELALWNCRHRGWGNHDCSHNEDVTLTCYDSSDLELRLVGGTNRCMGRVELKIQGRWGTVCHHKWNNAAADVVCKQLGCGTALHFAGLPHLQSGSDVVWLDGVSCSGNESFLWDCRHSGTVNFDCLHQNDVSVICSDGADLELRLADGSNNCSGRVEVRIHEQWWTICDQNWKNEQALVVCKQLGCPFSVFGSRRAKPSNEARDIWINSISCTGNESALWDCTYDGKAKRTCFRRSDAGVICSDKADLDLRLVGAHSPCYGRLEVKYQGEWGTVCHDRWSTRNAAVVCKQLGCGKPLHVFGMTYFKEASGPIWLDDVSCIGNESNIWDCEHSGWGKHNCVHREDVIVTCSGDATWGLRLVGGSNRCSGRLEVYFQGRWGTVCDDGWNSKAAAVVCSQLDCPSSIIGMGLGNASTGYGKIWLDDVSCDGDESDLWSCRNSGWGNNDCSHSEDVGVICSDASDMELRLVGGSSRCAGKVEVNVQGAVGILCANGWGMNIAEVVCRQLECGSAIRVSREPHFTERTLHILMSNSGCTGGEASLWDCIRWEWKQTACHLNMEASLICSAHRQPRLVGADMPCSGRVEVKHADTWRSVCDSDFSLHAANVLCRELNCGDAISLSVGDHFGKGNGLTWAEKFQCEGSETHLALCPIVQHPEDTCIHSREVGVVCSRYTDVRLVNGKSQCDGQVEINVLGHWGSLCDTHWDPEDARVLCRQLSCGTALSTTGGKYIGERSVRVWGHRFHCLGNESLLDNCQMTVLGAPPCIHGNTVSVICTGSLTQPLFPCLANVSDPYLSAVPEGSALICLEDKRLRLVDGDSRCAGRVEIYHDGFWGTICDDGWDLSDAHVVCQKLGCGVAFNATVSAHFGEGSGPIWLDDLNCTGMESHLWQCPSRGWGQHDCRHKEDAGVICSEFTALRLYSETETESCAGRLEVFYNGTWGSVGRRNITTAIAGIVCRQLGCGENGVVSLAPLSKTGSGFMWVDDIQCPKTHISIWQCLSAPWERRISSPAEETWITCEDRIRVRGGDTECSGRVEIWHAGSWGTVCDDSWDLAEAEVVCQQLGCGSALAALRDASFGQGTGTIWLDDMRCKGNESFLWDCHAKPWGQSDCGHKEDAGVRCSGQSLKSLNASSGHLALILSSIFGLLLLVLFILFLTWCRVQKQKHLPLRVSTRRRGSLEENLFHEMETCLKREDPHGTRTSDDTPNHGCEDASDTSLLGVLPASEATK 2 1360 1382 LKSLNASSGH LALILSSIFGLLLLVLFILFLTW CRVQKQKHLP +P55291 MDAAFLLVLGLLAQSLCLSLGVPGWRRPTTLYPWRRAPALSRVRRAWVIPPISVSENHKRLPYPLVQIKSDKQQLGSVIYSIQGPGVDEEPRGVFSIDKFTGKVFLNAMLDREKTDRFRLRAFALDLGGSTLEDPTDLEIVVVDQNDNRPAFLQEAFTGRVLEGAVPGTYVTRAEATDADDPETDNAALRFSILQQGSPELFSIDELTGEIRTVQVGLDREVVAVYNLTLQVADMSGDGLTATASAIITLDDINDNAPEFTRDEFFMEAIEAVSGVDVGRLEVEDRDLPGSPNWVARFTILEGDPDGQFTIRTDPKTNEGVLSIVKALDYESCEHYELKVSVQNEAPLQAAALRAERGQAKVRVHVQDTNEPPVFQENPLRTSLAEGAPPGTLVATFSARDPDTEQLQRLSYSKDYDPEDWLQVDAATGRIQTQHVLSPASPFLKGGWYRAIVLAQDDASQPRTATGTLSIEILEVNDHAPVLAPPPPGSLCSEPHQGPGLLLGATDEDLPPHGAPFHFQLSPRLPELGRNWSLSQVNVSHARLRPRHQVPEGLHRLSLLLRDSGQPPQQREQPLNVTVCRCGKDGVCLPGAAALLAGGTGLSLGALVIVLASALLLLVLVLLVALRARFWKQSRGKGLLHGPQDDLRDNVLNYDEQGGGEEDQDAYDISQLRHPTALSLPLGPPPLRRDAPQGRLHPQPPRVLPTSPLDIADFINDGLEAADSDPSVPPYDTALIYDYEGDGSVAGTLSSILSSQGDEDQDYDYLRDWGPRFARLADMYGHPCGLEYGARWDHQAREGLSPGALLPRHRGRTA 2 604 626 ALLAGGTGLS LGALVIVLASALLLLVLVLLVAL RARFWKQSRG +P33146 MGSALLLALGLLAQSLGLSWAVPEPKPSTLYPWRRASAPGRVRRAWVIPPISVSENHKRLPYPLVQIKSDKQQLGSVIYSIQGPGVDEEPRNVFSIDKFTGRVYLNATLDREKTDRFRLRAFALDLGGSTLEDPTDLEIVVVDQNDNRPAFLQDVFRGRILEGAIPGTFVTRAEATDADDPETDNAALRFSILEQGSPEFFSIDEHTGEIRTVQVGLDREVVAVYNLTLQVADMSGDGLTATASAIISIDDINDNAPEFTKDEFFMEAAEAVSGVDVGRLEVEDKDLPGSPNWVARFTILEGDPDGQFKIYTDPKTNEGVLSVVKPLDYESREQYELRVSVQNEAPLQAAAPRARRGQTRVSVWVQDTNEAPVFPENPLRTSIAEGAPPGTSVATFSARDPDTEQLQRISYSKDYDPEDWLQVDGATGRIQTQRVLSPASPFLKDGWYRAIILALDNAIPPSTATGTLSIEILEVNDHAPALALPPSGSLCSEPDQGPGLLLGATDEDLPPHGAPFHFQLNPRVPDLGRNWSVSQINVSHARLRLRHQVSEGLHRLSLLLQDSGEPPQQREQTLNVTVCRCGSDGTCLPGAAALRGGGVGVSLGALVIVLASTVVLLVLILLAALRTRFRGHSRGKSLLHGLQEDLRDNILNYDEQGGGEEDQDAYDINQLRHPVEPRATSRSLGRPPLRRDAPFSYVPQPHRVLPTSPSDIANFISDGLEAADSDPSVPPYDTALIYDYEGDGSVAGTLSSILSSLGDEDQDYDYLRDWGPRFARLADMYGHQ 2 603 625 ALRGGGVGVS LGALVIVLASTVVLLVLILLAAL RTRFRGHSRG +O75309 MVPAWLWLLCVSVPQALPKAQPAELSVEVPENYGGNFPLYLTKLPLPREGAEGQIVLSGDSGKATEGPFAMDPDSGFLLVTRALDREEQAEYQLQVTLEMQDGHVLWGPQPVLVHVKDENDQVPHFSQAIYRARLSRGTRPGIPFLFLEASDRDEPGTANSDLRFHILSQAPAQPSPDMFQLEPRLGALALSPKGSTSLDHALERTYQLLVQVKDMGDQASGHQATATVEVSIIESTWVSLEPIHLAENLKVLYPHHMAQVHWSGGDVHYHLESHPPGPFEVNAEGNLYVTRELDREAQAEYLLQVRAQNSHGEDYAAPLELHVLVMDENDNVPICPPRDPTVSIPELSPPGTEVTRLSAEDADAPGSPNSHVVYQLLSPEPEDGVEGRAFQVDPTSGSVTLGVLPLRAGQNILLLVLAMDLAGAEGGFSSTCEVEVAVTDINDHAPEFITSQIGPISLPEDVEPGTLVAMLTAIDADLEPAFRLMDFAIERGDTEGTFGLDWEPDSGHVRLRLCKNLSYEAAPSHEVVVVVQSVAKLVGPGPGPGATATVTVLVERVMPPPKLDQESYEASVPISAPAGSFLLTIQPSDPISRTLRFSLVNDSEGWLCIEKFSGEVHTAQSLQGAQPGDTYTVLVEAQDTDEPRLSASAPLVIHFLKAPPAPALTLAPVPSQYLCTPRQDHGLIVSGPSKDPDLASGHGPYSFTLGPNPTVQRDWRLQTLNGSHAYLTLALHWVEPREHIIPVVVSHNAQMWQLLVRVIVCRCNVEGQCMRKVGRMKGMPTKLSAVGILVGTLVAIGIFLILIFTHWTMSRKKDPDQPADSVPLKATV 2 786 808 RMKGMPTKLS AVGILVGTLVAIGIFLILIFTHW TMSRKKDPDQ +Q9R100 MVSAQLHFLCLLTLYLTCGYGEEGKFSGPLKPMTFSIFEGQEPSQVIFQFKTNPPAVTFELTGETDGIFKIEKDGLLYHTRALDRETRAVHHLQLAALDSHGAIVDGPVPITIEVKDINDNRPTFLQSKYEGSVRQNSRPGKPFMYVNATDLDDPATPNGQLFYQIVIQLPQINDVMYFQIDSKTGAISLTPEGSQELDPVKNPSYNLVVSVKDMGGQSENSFSDTTYVDISIRENIWKAPEPVEIRENSTDPHPIKITQVQWNDPGAQYSLVNKEKLSPFPFSIDQEGNIYVTQALDREEKNSHVFFATAKDENGKPLAYPLEIYVKVIDINDNPPTCLSPVTVFEVQENEPLGNSIGIFEAHDMDEANNINSILKYKLVDQTPKVPSDGLFLIGEYEGKVQLSKQSLKKQDSPQYNLSIEVSDVDFKTLCYIQVNVIDINDQIPIFETSNYGSKTLSEDTAIGSTILIIQATDADEPFTGSSKILYKIVQGDTEGRLEVVTDPTTNAGYVKIKKPLDFETQPVSSIVFQAENPEPLVKGIEYNASSFASFELIVTDVNEVPVFPQRIFQANVSEDAAVGSRVGNVTARDPEGLTVSYSLKGNMRGWLKIDSVTGEIFSAAPLDRETESVYRVQVVATEVGGSSLSSTADFHLVLTDVNDNPPRLAKDYTGLFFCHPLSAPGSLIFEVTDDDQQSLRRPKFTFALGREGLQSDWEVSKINGTHARLSTRHTRFEEQVYNIPIRINDGGQPPMEGTVFLPVTFCQCVEGSCFRPAGRQDGIPTVGMAVGILLTTFLVIGIILAVVFIRMRKDKVENPQSPENKPLRS 2 784 806 PAGRQDGIPT VGMAVGILLTTFLVIGIILAVVF IRMRKDKVEN +Q9H159 MNCYLLLRFMLGIPLLWPCLGATENSQTKKVKQPVRSHLRVKRGWVWNQFFVPEEMNTTSHHIGQLRSDLDNGNNSFQYKLLGAGAGSTFIIDERTGDIYAIQKLDREERSLYILRAQVIDIATGRAVEPESEFVIKVSDINDNEPKFLDEPYEAIVPEMSPEGTLVIQVTASDADDPSSGNNARLLYSLLQGQPYFSVEPTTGVIRISSKMDRELQDEYWVIIQAKDMIGQPGALSGTTSVLIKLSDVNDNKPIFKESLYRLTVSESAPTGTSIGTIMAYDNDIGENAEMDYSIEEDDSQTFDIITNHETQEGIVILKKKVDFEHQNHYGIRAKVKNHHVPEQLMKYHTEASTTFIKIQVEDVDEPPLFLLPYYVFEVFEETPQGSFVGVVSATDPDNRKSPIRYSITRSKVFNINDNGTITTSNSLDREISAWYNLSITATEKYNIEQISSIPLYVQVLNINDHAPEFSQYYETYVCENAGSGQVIQTISAVDRDESIEEHHFYFNLSVEDTNNSSFTIIDNQDNTAVILTNRTGFNLQEEPVFYISILIADNGIPSLTSTNTLTIHVCDCGDSGSTQTCQYQELVLSMGFKTEVIIAILICIMIIFGFIFLTLGLKQRRKQILFPEKSEDFRENIFQYDDEGGGEEDTEAFDIAELRSSTIMRERKTRKTTSAEIRSLYRQSLQVGPDSAIFRKFILEKLEEANTDPCAPPFDSLQTYAFEGTGSLAGSLSSLESAVSDQDESYDYLNELGPRFKRLACMFGSAVQSNN 2 596 618 ELVLSMGFKT EVIIAILICIMIIFGFIFLTLGL KQRRKQILFP +Q9H251 MGRHVATSCHVAWLLVLISGCWGQVNRLPFFTNHFFDTYLLISEDTPVGSSVTQLLAQDMDNDPLVFGVSGEEASRFFAVEPDTGVVWLRQPLDRETKSEFTVEFSVSDHQGVITRKVNIQVGDVNDNAPTFHNQPYSVRIPENTPVGTPIFIVNATDPDLGAGGSVLYSFQPPSQFFAIDSARGIVTVIRELDYETTQAYQLTVNATDQDKTRPLSTLANLAIIITDVQDMDPIFINLPYSTNIYEHSPPGTTVRIITAIDQDKGRPRGIGYTIVSGNTNSIFALDYISGVLTLNGLLDRENPLYSHGFILTVKGTELNDDRTPSDATVTTTFNILVIDINDNAPEFNSSEYSVAITELAQVGFALPLFIQVVDKDENLGLNSMFEVYLVGNNSHHFIISPTSVQGKADIRIRVAIPLDYETVDRYDFDLFANESVPDHVGYAKVKITLINENDNRPIFSQPLYNISLYENVTVGTSVLTVLATDNDAGTFGEVSYFFSDDPDRFSLDKDTGLIMLIARLDYELIQRFTLTIIARDGGGEETTGRVRINVLDVNDNVPTFQKDAYVGALRENEPSVTQLVRLRATDEDSPPNNQITYSIVSASAFGSYFDISLYEGYGVISVSRPLDYEQISNGLIYLTVMAMDAGNPPLNSTVPVTIEVFDENDNPPTFSKPAYFVSVVENIMAGATVLFLNATDLDRSREYGQESIIYSLEGSTQFRINARSGEITTTSLLDRETKSEYILIVRAVDGGVGHNQKTGIATVNITLLDINDNHPTWKDAPYYINLVEMTPPDSDVTTVVAVDPDLGENGTLVYSIQPPNKFYSLNSTTGKIRTTHAMLDRENPDPHEAELMRKIVVSVTDCGRPPLKATSSATVFVNLLDLNDNDPTFQNLPFVAEVLEGIPAGVSIYQVVAIDLDEGLNGLVSYRMPVGMPRMDFLINSSSGVVVTTTELDRERIAEYQLRVVASDAGTPTKSSTSTLTIHVLDVNDETPTFFPAVYNVSVSEDVPREFRVVWLNCTDNDVGLNAELSYFITGGNVDGKFSVGYRDAVVRTVVGLDRETTAAYMLILEAIDNGPVGKRHTGTATVFVTVLDVNDNRPIFLQSSYEASVPEDIPEGHSILQLKATDADEGEFGRVWYRILHGNHGNNFRIHVSNGLLMRGPRPLDRERNSSHVLIVEAYNHDLGPMRSSVRVIVYVEDINDEAPVFTQQQYSRLGLRETAGIGTSVIVVQATDRDSGDGGLVNYRILSGAEGKFEIDESTGLIITVNYLDYETKTSYMMNVSATDQAPPFNQGFCSVYITLLNELDEAVQFSNASYEAAILENLALGTEIVRVQAYSIDNLNQITYRFNAYTSTQAKALFKIDAITGVITVQGLVDREKGDFYTLTVVADDGGPKVDSTVKVYITVLDENDNSPRFDFTSDSAVSIPEDCPVGQRVATVKAWDPDAGSNGQVVFSLASGNIAGAFEIVTTNDSIGEVFVARPLDREELDHYILQVVASDRGTPPRKKDHILQVTILDINDNPPVIESPFGYNVSVNENVGGGTAVVQVRATDRDIGINSVLSYYITEGNKDMAFRMDRISGEIATRPAPPDRERQSFYHLVATVEDEGTPTLSATTHVYVTIVDENDNAPMFQQPHYEVLLDEGPDTLNTSLITIQALDLDEGPNGTVTYAIVAGNIVNTFRIDRHMGVITAAKELDYEISHGRYTLIVTATDQCPILSHRLTSTTTVLVNVNDINDNVPTFPRDYEGPFEVTEGQPGPRVWTFLAHDRDSGPNGQVEYSIMDGDPLGEFVISPVEGVLRVRKDVELDRETIAFYNLTICARDRGMPPLSSTMLVGIRVLDINDNDPVLLNLPMNITISENSPVSSFVAHVLASDADSGCNARLTFNITAGNRERAFFINATTGIVTVNRPLDRERIPEYKLTISVKDNPENPRIARRDYDLLLIFLSDENDNHPLFTKSTYQAEVMENSPAGTPLTVLNGPILALDADQDIYAVVTYQLLGAQSGLFDINSSTGVVTVRSGVIIDREAFSPPILELLLLAEDIGLLNSTAHLLITILDDNDNRPTFSPATLTVHLLENCPPGFSVLQVTATDEDSGLNGELVYRIEAGAQDRFLIHLVTGVIRVGNATIDREEQESYRLTVVATDRGTVPLSGTAIVTILIDDINDSRPEFLNPIQTVSVLESAEPGTVIANITAIDHDLNPKLEYHIVGIVAKDDTDRLVPNQEDAFAVNINTGSVMVKSPMNRELVATYEVTLSVIDNASDLPERSVSVPNAKLTVNVLDVNDNTPQFKPFGITYYMERILEGATPGTTLIAVAAVDPDKGLNGLVTYTLLDLVPPGYVQLEDSSAGKVIANRTVDYEEVHWLNFTVRASDNGSPPRAAEIPVYLEIVDINDNNPIFDQPSYQEAVFEDVPVGTIILTVTATDADSGNFALIEYSLGDGESKFAINPTTGDIYVLSSLDREKKDHYILTALAKDNPGDVASNRRENSVQVVIQVLDVNDCRPQFSKPQFSTSVYENEPAGTSVITMMATDQDEGPNGELTYSLEGPGVEAFHVDMDSGLVTTQRPLQSYEKFSLTVVATDGGEPPLWGTTMLLVEVIDVNDNRPVFVRPPNGTILHIREEIPLRSNVYEVYATDKDEGLNGAVRYSFLKTAGNRDWEFFIIDPISGLIQTAQRLDRESQAVYSLILVASDLGQPVPYETMQPLQVALEDIDDNEPLFVRPPKGSPQYQLLTVPEHSPRGTLVGNVTGAVDADEGPNAIVYYFIAAGNEEKNFHLQPDGCLLVLRDLDREREAIFSFIVKASSNRSWTPPRGPSPTLDLVADLTLQEVRVVLEDINDQPPRFTKAEYTAGVATDAKVGSELIQVLALDADIGNNSLVFYSILAIHYFRALANDSEDVGQVFTMGSMDGILRTFDLFMAYSPGYFVVDIVARDLAGHNDTAIIGIYILRDDQRVKIVINEIPDRVRGFEEEFIHLLSNITGAIVNTDNVQFHVDKKGRVNFAQTELLIHVVNRDTNRILDVDRVIQMIDENKEQLRNLFRNYNVLDVQPAISVRLPDDMSALQMAIIVLAILLFLAAMLFVLMNWYYRTVHKRKLKAIVAGSAGNRGFIDIMDMPNTNKYSFDGANPVWLDPFCRNLELAAQAEHEDDLPENLSEIADLWNSPTRTHGTFGREPAAVKPDDDRYLRAAIQEYDNIAKLGQIIREGPIKGSLLKVVLEDYLRLKKLFAQRMVQKASSCHSSISELIQTELDEEPGDHSPGQGSLRFRHKPPVELKGPDGIHVVHGSTGTLLATDLNSLPEEDQKGLGRSLETLTAAEATAFERNARTESAKSTPLHKLRDVIMETPLEITEL 2 3068 3090 LPDDMSALQM AIIVLAILLFLAAMLFVLMNWYY RTVHKRKLKA +Q8IXH8 MAMRSGRHPSLLLLLVLLLWLLQVSIIDSVQQETDDLTKQTKEKIYQPLRRSKRRWVITTLELEEEDPGPFPKLIGELFNNMSYNMSLMYLISGPGVDEYPEIGLFSLEDHENGRIYVHRPVDREMTPSFTVYFDVVERSTGKIVDTSLIFNIRISDVNDHAPQFPEKEFNITVQENQSAGQPIFQMLAVDLDEENTPNSQVLYFLISQTPLLKESGFRVDRLSGEIRLSGCLDYETAPQFTLLIRARDCGEPSLSSTTTVHVDVQEGNNHRPAFTQENYKVQIPEGRASQGVLRLLVQDRDSPFTSAWRAKFNILHGNEEGHFDISTDPETNEGILNVIKPLDYETRPAQSLIIVVENEERLVFCERGKLQPPRKAAASATVSVQVTDANDPPAFHPQSFIVNKEEGARPGTLLGTFNAMDPDSQIRYELVHDPANWVSVDKNSGVVITVEPIDRESPHVNNSFYVIIIHAVDDGFPPQTATGTLMLFLSDINDNVPTLRPRSRYMEVCESAVHEPLHIEAEDPDLEPFSDPFTFELDNTWGNAEDTWKLGRNWGQSVELLTLRSLPRGNYLVPLFIGDKQGLSQKQTVHVRICPCASGLTCVELADAEVGLHVGALFPVCAAFVALAVALLFLLRCYFVLEPKRHGCSVSNDEGHQTLVMYNAESKGTSAQTWSDVEGQRPALLICTAAAGPTQGVKDLEEVPPSAASQSAQARCALGSWGYGKPFEPRSVKNIHSTPAYPDATMHRQLLAPVEGRMAETLNQKLHVANVLEDDPGYLPHVYSEEGECGGAPSLSSLASLEQELQPDLLDSLGSKATPFEEIYSESGVPS 2 614 636 VELADAEVGL HVGALFPVCAAFVALAVALLFLL RCYFVLEPKR +P59862 MDTRGCAWLLLLLSLPQGQSHQPLHRSKRRWVLTTLELQEEDPGPFPKLVGELFNNMSNNVSLIYLIRGPGVDEFPEIGLFSIEDHQSGKIYVHRPVDREVTPSFMVHFDAVDRSTGKVVDESLIFNIRIRDVNDHAPQFPEKEFNISVKESQAAGQPIFQLLTVDLDQENTPNSQVLYFLVSQTPLLRESGFRIDLISGEVRLSGCLHYETAPLFTLIVRASDCGEPSLTSTATIHVSVEDSNNHMPTFMEDRYEIRISEGQVEQGVLYLPVQDGDSPFTPAWRTQFNIWDGNEEGHFDIVTDPETNQGLLSIIKPLDYESQVAHSLVVVVENQEQLFVCTEGQLQPLRKAMASTMVSVQVLDTNDPPAFHPQSFIVSEEDGAWPAIQLGYFNATDPDRADSQIRYKLVHDPENWVTVDEKSGVVTTKKQIDRESPHVNDSFYTIIVHAVDNGLPPLTGTGTLMLFLSDVNDNAPTLRPHSRHLEVCESAGSQPLLIEAEDADLDPYADPFTFDLDNAQGDVEETWMLRTKQGEGHSAELTMLRSVPPGDYLVPLFIADRQGLAQKQTVHVRICSCRSGSECEEPSDTWLLWWALSPVGAALMVLSAALLCLLRCSCTFGPKRLRGFIPSDSGHQTLIVYNEESKVPSAQGCDTFFEPRGVKTLLSSTPVYLDRMVPRQQPLQLLEGRVVEAWSQKLQSIDVLEGDTGYLPHVYREEGECEGAETLSSLTFLEQDLSPKLLGCSGSKSTPSEAMCFTSRVPS 2 592 614 ECEEPSDTWL LWWALSPVGAALMVLSAALLCLL RCSCTFGPKR +Q8NFZ8 MGRARRFQWPLLLLWAAAAGPGAGQEVQTENVTVAEGGVAEITCRLHQYDGSIVVIQNPARQTLFFNGTRALKDERFQLEEFSPRRVRIRLSDARLEDEGGYFCQLYTEDTHHQIATLTVLVAPENPVVEVREQAVEGGEVELSCLVPRSRPAATLRWYRDRKELKGVSSSQENGKVWSVASTVRFRVDRKDDGGIIICEAQNQALPSGHSKQTQYVLDVQYSPTARIHASQAVVREGDTLVLTCAVTGNPRPNQIRWNRGNESLPERAEAVGETLTLPGLVSADNGTYTCEASNKHGHARALYVLVVYDPGAVVEAQTSVPYAIVGGILALLVFLIICVLVGMVWCSVRQKGSYLTHEASGLDEQGEAREAFLNGSDGHKRKEEFFI 2 324 346 VVEAQTSVPY AIVGGILALLVFLIICVLVGMVW CSVRQKGSYL +O43570 MPRRSLHAAAVLLLVILKEQPSSPAPVNGSKWTYFGPDGENSWSKKYPSCGGLLQSPIDLHSDILQYDASLTPLEFQGYNLSANKQFLLTNNGHSVKLNLPSDMHIQGLQSRYSATQLHLHWGNPNDPHGSEHTVSGQHFAAELHIVHYNSDLYPDASTASNKSEGLAVLAVLIEMGSFNPSYDKIFSHLQHVKYKGQEAFVPGFNIEELLPERTAEYYRYRGSLTTPPCNPTVLWTVFRNPVQISQEQLLALETALYCTHMDDPSPREMINNFRQVQKFDERLVYTSFSQVQVCTAAGLSLGIILSLALAGILGICIVVVVSIWLFRRKSIKKGDNKGVIYKPATKMETEAHA 2 305 327 CTAAGLSLGI ILSLALAGILGICIVVVVSIWLF RRKSIKKGDN +Q9ULX7 MLFSALLLEVIWILAADGGQHWTYEGPHGQDHWPASYPECGNNAQSPIDIQTDSVTFDPDLPALQPHGYDQPGTEPLDLHNNGHTVQLSLPSTLYLGGLPRKYVAAQLHLHWGQKGSPGGSEHQINSEATFAELHIVHYDSDSYDSLSEAAERPQGLAVLGILIEVGETKNIAYEHILSHLHEVRHKDQKTSVPPFNLRELLPKQLGQYFRYNGSLTTPPCYQSVLWTVFYRRSQISMEQLEKLQGTLFSTEEEPSKLLVQNYRALQPLNQRMVFASFIQAGSSYTTGEMLSLGVGILVGCLCLLLAVYFIARKIRKKRLENRKSVVFTSAQATTEA 2 290 312 QAGSSYTTGE MLSLGVGILVGCLCLLLAVYFIA RKIRKKRLEN +Q16790 MAPLCPSPWLPLLIPAPAPGLTVQLLLSLLLLVPVHPQRLPRMQEDSPLGGGSSGEDDPLGEEDLPSEEDSPREEDPPGEEDLPGEEDLPGEEDLPEVKPKSEEEGSLKLEDLPTVEAPGDPQEPQNNAHRDKEGDDQSHWRYGGDPPWPRVSPACAGRFQSPVDIRPQLAAFCPALRPLELLGFQLPPLPELRLRNNGHSVQLTLPPGLEMALGPGREYRALQLHLHWGAAGRPGSEHTVEGHRFPAEIHVVHLSTAFARVDEALGRPGGLAVLAAFLEEGPEENSAYEQLLSRLEEIAEEGSETQVPGLDISALLPSDFSRYFQYEGSLTTPPCAQGVIWTVFNQTVMLSAKQLHTLSDTLWGPGDSRLQLNFRATQPLNGRVIEASFPAGVDSSPRAAEPVQLNSCLAAGDILALVFGLLFAVTSVAFLVQMRRQHRRGTKGGVSYRPAEVAETGA 2 411 433 AEPVQLNSCL AAGDILALVFGLLFAVTSVAFLV QMRRQHRRGT +P27824 MEGKWLLCMLLVLGTAIVEAHDGHDDDVIDIEDDLDDVIEEVEDSKPDTTAPPSSPKVTYKAPVPTGEVYFADSFDRGTLSGWILSKAKKDDTDDEIAKYDGKWEVEEMKESKLPGDKGLVLMSRAKHHAISAKLNKPFLFDTKPLIVQYEVNFQNGIECGGAYVKLLSKTPELNLDQFHDKTPYTIMFGPDKCGEDYKLHFIFRHKNPKTGIYEEKHAKRPDADLKTYFTDKKTHLYTLILNPDNSFEILVDQSVVNSGNLLNDMTPPVNPSREIEDPEDRKPEDWDERPKIPDPEAVKPDDWDEDAPAKIPDEEATKPEGWLDDEPEYVPDPDAEKPEDWDEDMDGEWEAPQIANPRCESAPGCGVWQRPVIDNPNYKGKWKPPMIDNPSYQGIWKPRKIPNPDFFEDLEPFRMTPFSAIGLELWSMTSDIFFDNFIICADRRIVDDWANDGWGLKKAADGAAEPGVVGQMIEAAEERPWLWVVYILTVALPVFLVILFCCSGKKQTSGMEYKKTDAPQPDVKEEEEEKEEEKDKGDEEEEGEEKLEEKQKSDAEEDGGTVSQEEEDRKPKAEEDEILNRSPRNRKPRRE 2 483 505 MIEAAEERPW LWVVYILTVALPVFLVILFCCSG KKQTSGMEYK +O75976 MASGRDERPPWRLGRLLLLMCLLLLGSSARAAHIKKAEATTTTTSAGAEAAEGQFDRYYHEEELESALREAAAAGLPGLARLFSIGRSVEGRPLWVLRLTAGLGSLIPEGDAGPDAAGPDAAGPLLPGRPQVKLVGNMHGDETVSRQVLIYLARELAAGYRRGDPRLVRLLNTTDVYLLPSLNPDGFERAREGDCGFGDGGPSGASGRDNSRGRDLNRSFPDQFSTGEPPALDEVPEVRALIEWIRRNKFVLSGNLHGGSVVASYPFDDSPEHKATGIYSKTSDDEVFKYLAKAYASNHPIMKTGEPHCPGDEDETFKDGITNGAHWYDVEGGMQDYNYVWANCFEITLELSCCKYPPASQLRQEWENNRESLITLIEKVHIGVKGFVKDSITGSGLENATISVAGINHNITTGRFGDFYRLLVPGTYNLTVVLTGYMPLTVTNVVVKEGPATEVDFSLRPTVTSVIPDTTEAVSTASTVAIPNILSGTSSSYQPIQPKDFHHHHFPDMEIFLRRFANEYPNITRLYSLGKSVESRELYVMEISDNPGVHEPGEPEFKYIGNMHGNEVVGRELLLNLIEYLCKNFGTDPEVTDLVHNTRIHLMPSMNPDGYEKSQEGDSISVIGRNNSNNFDLNRNFPDQFVQITDPTQPETIAVMSWMKSYPFVLSANLHGGSLVVNYPFDDDEQGLATYSKSPDDAVFQQIALSYSKENSQMFQGRPCKNMYPNEYFPHGITNGASWYNVPGGMQDWNYLQTNCFEVTIELGCVKYPLEKELPNFWEQNRRSLIQFMKQVHQGVRGFVLDATDGRGILNATISVAEINHPVTTYKTGDYWRLLVPGTYKITASARGYNPVTKNVTVKSEGAIQVNFTLVRSSTDSNNESKKGKGASSSTNDASDPTTKEFETLIKDLSAENGLESLMLRSSSNLALALYRYHSYKDLSEFLRGLVMNYPHITNLTNLGQSTEYRHIWSLEISNKPNVSEPEEPKIRFVAGIHGNAPVGTELLLALAEFLCLNYKKNPAVTQLVDRTRIVIVPSLNPDGRERAQEKDCTSKIGQTNARGKDLDTDFTNNASQPETKAIIENLIQKQDFSLSVALDGGSMLVTYPYDKPVQTVENKETLKHLASLYANNHPSMHMGQPSCPNKSDENIPGGVMRGAEWHSHLGSMKDYSVTYGHCPEITVYTSCCYFPSAARLPSLWADNKRSLLSMLVEVHKGVHGFVKDKTGKPISKAVIVLNEGIKVQTKEGGYFHVLLAPGVHNIIAIADGYQQQHSQVFVHHDAASSVVIVFDTDNRIFGLPRELVVTVSGATMSALILTACIIWCICSIKSNRHKDGFHRLRQHHDEYEDEIRMMSTGSKKSLLSHEFQDETDTEEETLYSSKH 2 1300 1322 DNRIFGLPRE LVVTVSGATMSALILTACIIWCI CSIKSNRHKD +Q13740 MESKGASSCRLLFCLLISATVFRPGLGWYTVNSAYGDTIIIPCRLDVPQNLMFGKWKYEKPDGSPVFIAFRSSTKKSVQYDDVPEYKDRLNLSENYTLSISNARISDEKRFVCMLVTEDNVFEAPTIVKVFKQPSKPEIVSKALFLETEQLKKLGDCISEDSYPDGNITWYRNGKVLHPLEGAVVIIFKKEMDPVTQLYTMTSTLEYKTTKADIQMPFTCSVTYYGPSGQKTIHSEQAVFDIYYPTEQVTIQVLPPKNAIKEGDNITLKCLGNGNPPPEEFLFYLPGQPEGIRSSNTYTLTDVRRNATGDYKCSLIDKKSMIASTAITVHYLDLSLNPSGEVTRQIGDALPVSCTISASRNATVVWMKDNIRLRSSPSFSSLHYQDAGNYVCETALQEVEGLKKRESLTLIVEGKPQIKMTKKTDPSGLSKTIICHVEGFPKPAIQWTITGSGSVINQTEESPYINGRYYSKIIISPEENVTLTCTAENQLERTVNSLNVSAISIPEHDEADEISDENREKVNDQAKLIVGIVVGLLLAALVAGVVYWLYMKKSKTASKHVNKDLGNMEENKKLEENNHKTEA 2 528 550 NREKVNDQAK LIVGIVVGLLLAALVAGVVYWLY MKKSKTASKH +P15391 MPPPRLLFFLLFLTPMEVRPEEPLVVKVEEGDNAVLQCLKGTSDGPTQQLTWSRESPLKPFLKLSLGLPGLGIHMRPLAIWLFIFNVSQQMGGFYLCQPGPPSEKAWQPGWTVNVEGSGELFRWNVSDLGGLGCGLKNRSSEGPSSPSGKLMSPKLYVWAKDRPEIWEGEPPCLPPRDSLNQSLSQDLTMAPGSTLWLSCGVPPDSVSRGPLSWTHVHPKGPKSLLSLELKDDRPARDMWVMETGLLLPRATAQDAGKYYCHRGNLTMSFHLEITARPVLWHWLLRTGGWKVSAVTLAYLIFCLCSLVGILHLQRALVLRRKRKRMTDPTRRFFKVTPPPGSGPQNQYGNVLSLPTPTSGLGRAQRWAAGLGGTAPSYGNPSSDVQADGALGSRSPPGVGPEEEEGEGYEEPDSEEDSEFYENDSNLGQDQLSQDGSGYENPEDEPLGPEDEDSFSNAESYENEDEELTQPVARTMDFLSPHGSAWDPSREATSLGSQSYEDMRGILYAAPQLRSIRGQPGPNHEEDADSYENMDNPDGPDPAWGGGGRMGTWSTR 2 295 317 LRTGGWKVSA VTLAYLIFCLCSLVGILHLQRAL VLRRKRKRMT +P29017 MLFLQFLLLALLLPGGDNADASQEHVSFHVIQIFSFVNQSWARGQGSGWLDELQTHGWDSESGTIIFLHNWSKGNFSNEELSDLELLFRFYLFGLTREIQDHASQDYSKYPFEVQVKAGCELHSGKSPEGFFQVAFNGLDLLSFQNTTWVPSPGCGSLAQSVCHLLNHQYEGVTETVYNLIRSTCPRFLLGLLDAGKMYVHRQVRPEAWLSSRPSLGSGQLLLVCHASGFYPKPVWVTWMRNEQEQLGTKHGDILPNADGTWYLQVILEVASEEPAGLSCRVRHSSLGGQDIILYWGHHFSMNWIALVVIVPLVILIVLVLWFKKHCSYQDIL 2 301 323 DIILYWGHHF SMNWIALVVIVPLVILIVLVLWF KKHCSYQDIL +P11609 MRYLPWLLLWAFLQVWGQSEAQQKNYTFRCLQMSSFANRSWSRTDSVVWLGDLQTHRWSNDSATISFTKPWSQGKLSNQQWEKLQHMFQVYRVSFTRDIQELVKMMSPKEDYPIEIQLSAGCEMYPGNASESFLHVAFQGKYVVRFWGTSWQTVPGAPSWLDLPIKVLNADQGTSATVQMLLNDTCPLFVRGLLEAGKSDLEKQEKPVAWLSSVPSSADGHRQLVCHVSGFYPKPVWVMWMRGDQEQQGTHRGDFLPNADETWYLQATLDVEAGEEAGLACRVKHSSLGGQDIILYWDARQAPVGLIVFIVLIMLVVVGAVVYYIWRRRSAYQDIR 2 304 326 ILYWDARQAP VGLIVFIVLIMLVVVGAVVYYIW RRRSAYQDIR +P15813 MGCLLFLLLWALLQAWGSAEVPQRLFPLRCLQISSFANSSWTRTDGLAWLGELQTHSWSNDSDTVRSLKPWSQGTFSDQQWETLQHIFRVYRSSFTRDVKEFAKMLRLSYPLELQVSAGCEVHPGNASNNFFHVAFQGKDILSFQGTSWEPTQEAPLWVNLAIQVLNQDKWTRETVQWLLNGTCPQFVSGLLESGKSELKKQVKPKAWLSRGPSPGPGRLLLVCHVSGFYPKPVWVKWMRGEQEQQGTQPGDILPNADETWYLRATLDVVAGEAAGLSCRVKHSSLEGQDIVLYWGGSYTSMGLIALAVLACLLFLLIVGFTSRFKRQTSYQGVL 2 299 321 QDIVLYWGGS YTSMGLIALAVLACLLFLLIVGF TSRFKRQTSY +P15812 MLLLFLLFEGLCCPGENTAAPQALQSYHLAAEEQLSFRMLQTSSFANHSWAHSEGSGWLGDLQTHGWDTVLGTIRFLKPWSHGNFSKQELKNLQSLFQLYFHSFIQIVQASAGQFQLEYPFEIQILAGCRMNAPQIFLNMAYQGSDFLSFQGISWEPSPGAGIRAQNICKVLNRYLDIKEILQSLLGHTCPRFLAGLMEAGESELKRKVKPEAWLSCGPSPGPGRLQLVCHVSGFYPKPVWVMWMRGEQEQRGTQRGDVLPNADETWYLRATLDVAAGEAAGLSCRVKHSSLGGHDLIIHWGGYSIFLILICLTVIVTLVILVVVDSRLKKQSSNKNILSPHTPSPVFLMGANTQDTKNSRHQFCLAQVSWIKNRVLKKWKTRLNQLW 2 303 325 GGHDLIIHWG GYSIFLILICLTVIVTLVILVVV DSRLKKQSSN +Q15762 MDYPTLLLALLHVYRALCEEVLWHTSVPFAENMSLECVYPSMGILTQVEWFKIGTQQDSIAIFSPTHGMVIRKPYAERVYFLNSTMASNNMTLFFRNASEDDVGYYSCSLYTYPQGTWQKVIQVVQSDSFEAAVPSNSHIVSEPGKNVTLTCQPQMTWPVQAVRWEKIQPRQIDLLTYCNLVHGRNFTSKFPRQIVSNCSHGRWSVIVIPDVTVSDSGLYRCYLQASAGENETFVMRLTVAEGKTDNQYTLFVAGGTVLLLLFVISITTIIVIFLNRRRRRERRDLFTESWDTQKAPNNYRSPISTSQPTNQSMDDTREDIYVNYPTFSRRPKTRV 2 253 275 GKTDNQYTLF VAGGTVLLLLFVISITTIIVIFL NRRRRRERRD +P20273 MHLLGPWLLLLVLEYLAFSDSSKWVFEHPETLYAWEGACVWIPCTYRALDGDLESFILFHNPEYNKNTSKFDGTRLYESTKDGKVPSEQKRVQFLGDKNKNCTLSIHPVHLNDSGQLGLRMESKTEKWMERIHLNVSERPFPPHIQLPPEIQESQEVTLTCLLNFSCYGYPIQLQWLLEGVPMRQAAVTSTSLTIKSVFTRSELKFSPQWSHHGKIVTCQLQDADGKFLSNDTVQLNVKHTPKLEIKVTPSDAIVREGDSVTMTCEVSSSNPEYTTVSWLKDGTSLKKQNTFTLNLREVTKDQSGKYCCQVSNDVGPGRSEEVFLQVQYAPEPSTVQILHSPAVEGSQVEFLCMSLANPLPTNYTWYHNGKEMQGRTEEKVHIPKILPWHAGTYSCVAENILGTGQRGPGAELDVQYPPKKVTTVIQNPMPIREGDTVTLSCNYNSSNPSVTRYEWKPHGAWEEPSLGVLKIQNVGWDNTTIACAACNSWCSWASPVALNVQYAPRDVRVRKIKPLSEIHSGNSVSLQCDFSSSHPKEVQFFWEKNGRLLGKESQLNFDSISPEDAGSYSCWVNNSIGQTASKAWTLEVLYAPRRLRVSMSPGDQVMEGKSATLTCESDANPPVSHYTWFDWNNQSLPYHSQKLRLEPVKVQHSGAYWCQGTNSVGKGRSPLSTLTVYYSPETIGRRVAVGLGSCLAILILAICGLKLQRRWKRTQSQQGLQENSSGQSFFVRNKKVRRAPLSEGPHSLGCYNPMMEDGISYTTLRFPEMNIPRTGDAESSEMQRPPPDCDDTVTYSALHKRQVGDYENVIPDFPEDEGIHYSELIQFGVGERPQAQENVDYVILKH 2 684 706 TLTVYYSPET IGRRVAVGLGSCLAILILAICGL KLQRRWKRTQ +Q07763 MLGQAVLFTTFLLLRAHQGQDCPDSSEEVVGVSGKPVQLRPSNIQTKDVSVQWKKTEQGSHRKIEILNWYNDGPSWSNVSFSDIYGFDYGDFALSIKSAKLQDSGHYLLEITNTGGKVCNKNFQLLILDHVETPNLKAQWKPWTNGTCQLFLSCLVTKDDNVSYALYRGSTLISNQRNSTHWENQIDASSLHTYTCNVSNRASWANHTLNFTHGCQSVPSNFRFLPFGVIIVILVTLFLGAIICFCVWTKKRKQLQFSPKEPLTIYEYVKDSRASRDQQGCSRASGSPSAVQEDGRGQRELDRRVSEVLEQLPQQTFPGDRGTMYSMIQCKPSDSTSQEKCTVYSVVQPSRKSGSKKRNQNSSLSCTVYEEVGNPWLKAHNPARLSRRELENFDVYS 2 226 248 QSVPSNFRFL PFGVIIVILVTLFLGAIICFCVW TKKRKQLQFS +Q9HCU0 MLLRLLLAWAAAGPTLGQDPWAAEPRAACGPSSCYALFPRRRTFLEAWRACRELGGDLATPRTPEEAQRVDSLVGAGPASRLLWIGLQRQARQCQLQRPLRGFTWTTGDQDTAFTNWAQPASGGPCPAQRCVALEASGEHRWLEGSCTLAVDGYLCQFGFEGACPALQDEAGQAGPAVYTTPFHLVSTEFEWLPFGSVAAVQCQAGRGASLLCVKQPEGGVGWSRAGPLCLGTGCSPDNGGCEHECVEEVDGHVSCRCTEGFRLAADGRSCEDPCAQAPCEQQCEPGGPQGYSCHCRLGFRPAEDDPHRCVDTDECQIAGVCQQMCVNYVGGFECYCSEGHELEADGISCSPAGAMGAQASQDLGDELLDDGEDEEDEDEAWKAFNGGWTEMPGILWMEPTQPPDFALAYRPSFPEDREPQIPYPEPTWPPPLSAPRVPYHSSVLSVTRPVVVSATHPTLPSAHQPPVIPATHPALSRDHQIPVIAANYPDLPSAYQPGILSVSHSAQPPAHQPPMISTKYPELFPAHQSPMFPDTRVAGTQTTTHLPGIPPNHAPLVTTLGAQLPPQAPDALVLRTQATQLPIIPTAQPSLTTTSRSPVSPAHQISVPAATQPAALPTLLPSQSPTNQTSPISPTHPHSKAPQIPREDGPSPKLALWLPSPAPTAAPTALGEAGLAEHSQRDDRWLLVALLVPTCVFLVVLLALGIVYCTRCGPHAPNKRITDCYRWVIHAGSKSPTEPMPPRGSLTGVQTCRTSV 2 686 708 LAEHSQRDDR WLLVALLVPTCVFLVVLLALGIV YCTRCGPHAP +Q5ZPR3 MLRRRGSPGMGVHVGAALGALWFCLTGALEVQVPEDPVVALVGTDATLCCSFSPEPGFSLAQLNLIWQLTDTKQLVHSFAEGQDQGSAYANRTALFPDLLAQGNASLRLQRVRVADEGSFTCFVSIRDFGSAAVSLQVAAPYSKPSMTLEPNKDLRPGDTVTITCSSYQGYPEAEVFWQDGQGVPLTGNVTTSQMANEQGLFDVHSILRVVLGANGTYSCLVRNPVLQQDAHSSVTITPQRSPTGAVEVQVPEDPVVALVGTDATLRCSFSPEPGFSLAQLNLIWQLTDTKQLVHSFTEGRDQGSAYANRTALFPDLLAQGNASLRLQRVRVADEGSFTCFVSIRDFGSAAVSLQVAAPYSKPSMTLEPNKDLRPGDTVTITCSSYRGYPEAEVFWQDGQGVPLTGNVTTSQMANEQGLFDVHSVLRVVLGANGTYSCLVRNPVLQQDAHGSVTITGQPMTFPPEALWVTVGLSVCLIALLVALAFVCWRKIKQSCEEENAGAEDQDGEGEGSKTALQPLKHSDSKEDDGQEIA 2 466 488 TGQPMTFPPE ALWVTVGLSVCLIALLVALAFVC WRKIKQSCEE +P26842 MARPHPWWLCVLGTLVGLSATPAPKSCPERHYWAQGKLCCQMCEPGTFLVKDCDQHRKAAQCDPCIPGVSFSPDHHTRPHCESCRHCNSGLLVRNCTITANAECACRNGWQCRDKECTECDPLPNPSLTARSSQALSPHPQPTHLPYVSEMLEARTAGHMQTLADFRQLPARTLSTHWPPQRSLCSSDFIRILVIFSGMFLVFTLAGALFLHQRRKYRSNKGESPVEPAEPCHYSCPREEEGSTIPIQEDYRKPEPACSP 2 189 211 PPQRSLCSSD FIRILVIFSGMFLVFTLAGALFL HQRRKYRSNK +P10747 MLRLLLALNLFPSIQVTGNKILVKQSPMLVAYDNAVNLSCKYSYNLFSREFRASLHKGLDSAVEVCVVYGNYSQQLQVYSKTGFNCDGKLGNESVTFYLQNLYVNQTDIYFCKIEVMYPPPYLDNEKSNGTIIHVKGKHLCPSPLFPGPSKPFWVLVVVGGVLACYSLLVTVAFIIFWVRSKRSRLLHSDYMNMTPRRPGPTRKHYQPYAPPRDFAAYRS 2 154 176 PLFPGPSKPF WVLVVVGGVLACYSLLVTVAFII FWVRSKRSRL +P06729 MSFPCKFVASFLLIFNVSSKGAVSKEITNALETWGALGQDINLDIPSFQMSDDIDDIKWEKTSDKKKIAQFRKEKETFKEKDTYKLFKNGTLKIKHLKTDDQDIYKVSIYDTKGKNVLEKIFDLKIQERVSKPKISWTCINTTLTCEVMNGTDPELNLYQDGKHLKLSQRVITHKWTTSLSAKFKCTAGNKVSKESSVEPVSCPEKGLDIYLIIGICGGGSLLMVFVALLVFYITKRKKQRSRRNDEELETRAHRVATEERGRKPHQIPASTPQNPATSQHPPPPPGHRSQAPSHRPPPPGHRVQHQPQKRPPAPSGTQVHQQKGPPLPRPRVQPKPPHGAAENSLSPSSN 2 212 234 SCPEKGLDIY LIIGICGGGSLLMVFVALLVFYI TKRKKQRSRR +Q8IX05 MLRAALPALLLPLLGLAAAAVADCPSSTWIQFQDSCYIFLQEAIKVESIEDVRNQCTDHGADMISIHNEEENAFILDTLKKQWKGPDDILLGMFYDTDDASFKWFDNSNMTFDKWTDQDDDEDLVDTCAFLHIKTGEWKKGNCEVSSVEGTLCKTAIPYKRKYLSDNHILISALVIASTVILTVLGAIIWFLYKKHSDSRFTTVFSTAPQSPYNEDCVLVVGEENEYPVQFD 2 171 193 RKYLSDNHIL ISALVIASTVILTVLGAIIWFLY KKHSDSRFTT +Q9NPF0 MSGGWMAQVGAWRTGALGLALLLLLGLGLGLEAAASPLSTPTSAQAAGPSSGSCPPTKFQCRTSGLCVPLTWRCDRDLDCSDGSDEEECRIEPCTQKGQCPPPPGLPCPCTGVSDCSGGTDKKLRNCSRLACLAGELRCTLSDDCIPLTWRCDGHPDCPDSSDELGCGTNEILPEGDATTMGPPVTLESVTSLRNATTMGPPVTLESVPSVGNATSSSAGDQSGSPTAYGVIAAAAVLSASLVTATLLLLSWLRAQERLRPLGLLVAMKESLLLSEQKTSLP 2 231 253 DQSGSPTAYG VIAAAAVLSASLVTATLLLLSWL RAQERLRPLG +P28906 MLVRRGARAGPRMPRGWTALCLLSLLPSGFMSLDNNGTATPELPTQGTFSNVSTNVSYQETTTPSTLGSTSLHPVSQHGNEATTNITETTVKFTSTSVITSVYGNTNSSVQSQTSVISTVFTTPANVSTPETTLKPSLSPGNVSDLSTTSTSLATSPTKPYTSSSPILSDIKAEIKCSGIREVKLTQGICLEQNKTSSCAEFKKDRGEGLARVLCGEEQADADAGAQVCSLLLAQSEVRPQCLLLVLANRTEISSKLQLMKKHQSDLKKLGILDFTEQDVASHQSYSQKTLIALVTSGALLAVLGITGYFLMNRRSWSPTGERLGEDPYYTENGGGQGYSSGPGTSPEAQGKASVNRGAQENGTGQATSRNGHSARQHVVADTEL 2 291 313 ASHQSYSQKT LIALVTSGALLAVLGITGYFLMN RRSWSPTGER +P04235 MEHSGILASLILIAVLPQGSPFKIQVTEYEDKVFVTCNTSVMHLDGTVEGWFAKNKTLNLGKGVLDPRGIYLCNGTEQLAKVVSSVQVHYRMCQNCVELDSGTMAGVIFIDLIATLLLALGVYCFAGHETGRPSGAAEVQALLKNEQLYQPLRDREDTQYSRLGGNWPRNKKS 2 105 127 NCVELDSGTM AGVIFIDLIATLLLALGVYCFAG HETGRPSGAA +P07766 MQSGTHWRVLGLCLLSVGVWGQDGNEEMGGITQTPYKVSISGTTVILTCPQYPGSEILWQHNDKNIGGDEDDKNIGSDEDHLSLKEFSELEQSGYYVCYPRGSKPEDANFYLYLRARVCENCMEMDVMSVATIVIVDICITGGLLLLVYYWSKNRKAKAKPVTRGAGAGGRQRGQNKERPPPVPNPDYEPIRKGQRDLYSGLNQRRI 2 130 152 ENCMEMDVMS VATIVIVDICITGGLLLLVYYWS KNRKAKAKPV +P09693 MEQGKGLAVLILAIILLQGTLAQSIKGNHLVKVYDYQEDGSVLLTCDAEAKNITWFKDGKMIGFLTEDKKKWNLGSNAKDPRGMYQCKGSQNKSKPLQVYYRMCQNCIELNAATISGFLFAEIVSIFVLAVGVYFIAGQDGVRQSRASDKQTLLPNDQLYQPLKDREDDQYSHLQGNQLRRN 2 115 137 QNCIELNAAT ISGFLFAEIVSIFVLAVGVYFIA GQDGVRQSRA +P20963 MKWKALFTAAILQAQLPITEAQSFGLLDPKLCYLLDGILFIYGVILTALFLRVKFSRSADAPAYQQGQNQLYNELNLGRREEYDVLDKRRGRDPEMGGKPQRRKNPQEGLYNELQKDKMAEAYSEIGMKGERRRGKGHDGLYQGLSTATKDTYDALHMQALPPR 2 31 53 AQSFGLLDPK LCYLLDGILFIYGVILTALFLRV KFSRSADAPA +P01730 MNRGVPFRHLLLVLQLALLPAATQGKKVVLGKKGDTVELTCTASQKKSIQFHWKNSNQIKILGNQGSFLTKGPSKLNDRADSRRSLWDQGNFPLIIKNLKIEDSDTYICEVEDQKEEVQLLVFGLTANSDTHLLQGQSLTLTLESPPGSSPSVQCRSPRGKNIQGGKTLSVSQLELQDSGTWTCTVLQNQKKVEFKIDIVVLAFQKASSIVYKKEGEQVEFSFPLAFTVEKLTGSGELWWQAERASSSKSWITFDLKNKEVSVKRVTQDPKLQMGKKLPLHLTLPQALPQYAGSGNLTLALEAKTGKLHQEVNLVVMRATQLQKNLTCEVWGPTSPKLMLSLKLENKEAKVSKREKAVWVLNPEAGMWQCLLSDSGQVLLESNIKVLPTWSTPVQPMALIVLGGVAGLLLFIGLGIFFCVRCRHRRRQAERMSQIKRLLSEKKTCQCPHRFQKTCSPI 2 398 420 PTWSTPVQPM ALIVLGGVAGLLLFIGLGIFFCV RCRHRRRQAE +P30203 MWLFFGITGLLTAALSGHPSPAPPDQLNTSSAESELWEPGERLPVRLTNGSSSCSGTVEVRLEASWEPACGALWDSRAAEAVCRALGCGGAEAASQLAPPTPELPPPPAAGNTSVAANATLAGAPALLCSGAEWRLCEVVEHACRSDGRRARVTCAENRALRLVDGGGACAGRVEMLEHGEWGSVCDDTWDLEDAHVVCRQLGCGWAVQALPGLHFTPGRGPIHRDQVNCSGAEAYLWDCPGLPGQHYCGHKEDAGAVCSEHQSWRLTGGADRCEGQVEVHFRGVWNTVCDSEWYPSEAKVLCQSLGCGTAVERPKGLPHSLSGRMYYSCNGEELTLSNCSWRFNNSNLCSQSLAARVLCSASRSLHNLSTPEVPASVQTVTIESSVTVKIENKESRELMLLIPSIVLGILLLGSLIFIAFILLRIKGKYALPVMVNHQHLPTTIPAGSNSYQPVPITIPKEVFMLPIQVQAPPPEDSDSGSDSDYEHYDFSAQPPVALTTFYNSQRHRVTDEEVQQSRFQMPPLEEGLEELHASHIPTANPGHCITDPPSLGPQYHPRSNSESSTSSGEDYCNSPKSKLPPWNPQVFSSERSSFLEQPPNLELAGTQPAFSAGPPADDSSSTSSGEWYQNFQPPPQPPSEEQFGCPGSPSPQPDSTDNDDYDDISAA 2 402 424 ENKESRELML LIPSIVLGILLLGSLIFIAFILL RIKGKYALPV +P11912 MPGGPGVLQALPATIFLLFLLSAVYLGPGCQALWMHKVPASLMVSLGEDAHFQCPHNSSNNANVTWWRVLHGNYTWPPEFLGPGEDPNGTLIIQNVNKSHGGIYVCRVQEGNESYQQSCGTYLRVRQPPPRPFLDMGEGTKNRIITAEGIILLFCAVVPGTLLLFRKRWQNEKLGLDAGDEYEDENLYEGLNLDDCSMYEDISRGLQGTYQDVGSLNIGDVQLEKP 2 143 165 FLDMGEGTKN RIITAEGIILLFCAVVPGTLLLF RKRWQNEKLG +P40259 MARLALSPVPSHWMVALLLLLSAEPVPAARSEDRYRNPKGSACSRIWQSPRFIARKRGFTVKMHCYMNSASGNVSWLWKQEMDENPQQLKLEKGRMEESQNESLATLTIQGIRFEDNGIYFCQQKCNNTSEVYQGCGTELRVMGFSTLAQLKQRNTLKDGIIMIQTLLIILFIIVPIFLLLDKDDSKAGMEEDHTYEGLDIDQTATYEDIVTLRTGEVKWSVGEHPGQE 2 161 180 LKQRNTLKDG IIMIQTLLIILFIIVPIFLL LDKDDSKAGM +P50283 MTQQAVLALLLTLAGILPGPLDAQDVHQSPRLTIASEGDSVNITCSTRGHLEGILMKKIWPQAYNVIYFEDRQEPTVDRTFSGRINFSGSQKNLTITISSLQLADTGDYTCEAVRKVSARGLFTTVVVKEKSSQEAYRSQEPLQTSFSFPAAIAVGFFFTGLLLGVVCSMLRKIQIKKLCASGIKESPCVVYEDMSYSNRKTPCIPNQYQ 2 149 171 SQEPLQTSFS FPAAIAVGFFFTGLLLGVVCSML RKIQIKKLCA +P33681 MGHTRRQGTSPSKCPYLNFFQLLVLAGLSHFCSGVIHVTKEVKEVATLSCGHNVSVEELAQTRIYWQKEKKMVLTMMSGDMNIWPEYKNRTIFDITNNLSIVILALRPSDEGTYECVVLKYEKDAFKREHLAEVTLSVKADFPTPSISDFEIPTSNIRRIICSTSGGFPEPHLSWLENGEELNAINTTVSQDPETELYAVSSKLDFNMTTNHSFMCLIKYGHLRVNQTFNWNTTKQEHFPDNLLPSWAITLISVNGIFVICCLTYCFAPRCRERRRNERLRRESVRPV 2 247 269 EHFPDNLLPS WAITLISVNGIFVICCLTYCFAP RCRERRRNER +Q00609 MACNCQLMQDTPLLKFPCPRLILLFVLLIRLSQVSSDVDEQLSKSVKDKVLLPCRYNSPHEDESEDRIYWQKHDKVVLSVIAGKLKVWPEYKNRTLYDNTTYSLIILGLVLSDRGTYSCVVQKKERGTYEVKHLALVKLSIKADFSTPNITESGNPSADTKRITCFASGGFPKPRFSWLENGRELPGINTTISQDPESELYTISSQLDFNTTRNHTIKCLIKYGDAHVSEDFTWEKPPEDPPDSKNTLVLFGAGFGAVITVVVIVVIIKCFCKHRSCFRRNEASRETNNSLTFGPEEALAEQTVFL 2 249 271 EDPPDSKNTL VLFGAGFGAVITVVVIVVIIKCF CKHRSCFRRN +Q01151 MSRGLQLLLLSCAYSLAPATPEVKVACSEDVDLPCTAPWDPQVPYTVSWVKLLEGGEERMETPQEDHLRGQHYHQKGQNGSFDAPNERPYSLKIRNTTSCNSGTYRCTLQDPDGQRNLSGKVILRVTGCPAQRKEETFKKYRAEIVLLLALVIFYLTLIIFTCKFARLQSIFPDFSKAGMERAFLPVTSPNKHLGLVTPHKTELV 2 146 168 ETFKKYRAEI VLLLALVIFYLTLIIFTCKFARL QSIFPDFSKA +P42081 MDPQCTMGLSNILFVMAFLLSGAAPLKIQAYFNETADLPCQFANSQNQSLSELVVFWQDQENLVLNEVYLGKEKFDSVHSKYMGRTSFDSDSWTLRLHNLQIKDKGLYQCIIHHKKPTGMIRIHQMNSELSVLANFSQPEIVPISNITENVYINLTCSSIHGYPEPKKMSVLLRTKNSTIEYDGVMQKSQDNVTELYDVSISLSVSFPDVTSNMTIFCILETDKTRLLSSPFSIELEDPQPPPDHIPWITAVLPTVIICVMVFCLILWKWKKKKRPRNSYKCGTNTMEREESEQTKKREKIHIPERSDEAQRVFKSSKTSSCDKSDTCF 2 246 268 LEDPQPPPDH IPWITAVLPTVIICVMVFCLILW KWKKKKRPRN +P42082 MDPRCTMGLAILIFVTVLLISDAVSVETQAYFNGTAYLPCPFTKAQNISLSELVVFWQDQQKLVLYEHYLGTEKLDSVNAKYLGRTSFDRNNWTLRLHNVQIKDMGSYDCFIQKKPPTGSIILQQTLTELSVIANFSEPEIKLAQNVTGNSGINLTCTSKQGHPKPKKMYFLITNSTNEYGDNMQISQDNVTELFSISNSLSLSFPDGVWHMTVVCVLETESMKISSKPLNFTQEFPSPQTYWKEITASVTVALLLVMLLIIVCHKKPNQPSRPSNTASKLERDSNADRETINLKELEPQIASAKPNAE 2 246 263 FPSPQTYWKE ITASVTVALLLVMLLIIV CHKKPNQPSR +P01732 MALPVTALLLPLALLLHAARPSQFRVSPLDRTWNLGETVELKCQVLLSNPTSGCSWLFQPRGAAASPTFLLYLSQNKPKAAEGLDTQRFSGKRLGDTFVLTLSDFRRENEGYYFCSALSNSIMYFSHFVPVFLPAKPTTTPAPRPPTPAPTIASQPLSLRPEACRPAAGGAVHTRGLDFACDIYIWAPLAGTCGVLLLSLVITLYCNHRNRRRVCKCPRPVVKSGDKPSLSARYV 2 184 206 TRGLDFACDI YIWAPLAGTCGVLLLSLVITLYC NHRNRRRVCK +Q9BYE9 MAQLWLSCFLLPALVVSVAANVAPKFLANMTSVILPEDLPVGAQAFWLVAEDQDNDPLTYGMSGPNAYFFAVTPKTGEVKLASALDYETLYTFKVTISVSDPYIQVQREMLVIVEDRNDNAPVFQNTAFSTSINETLPVGSVVFSVLAVDKDMGSAGMVVYSIEKVIPSTGDSEHLFRILANGSIVLNGSLSYNNKSAFYQLELKACDLGGMYHNTFTIQCSLPVFLSISVVDQPDLDPQFVREFYSASVAEDAAKGTSVLTVEAVDGDKGINDPVIYSISYSTRPGWFDIGADGVIRVNGSLDREQLLEADEEVQLQVTATETHLNIYGQEAKVSIWVTVRVMDVNDHKPEFYNCSLPACTFTPEEAQVNFTGYVDEHASPRIPIDDLTMVVYDPDKGSNGTFLLSLGGPDAEAFSVSPERAVGSASVQVLVRVSALVDYERQTAMAVQVVATDSVSQNFSVAMVTIHLRDINDHRPTFPQSLYVLTVPEHSATGSVVTDSIHATDPDTGAWGQITYSLLPGNGADLFQVDPVSGTVTVRNGELLDRESQAVYYLTLQATDGGNLSSSTTLQIHLLDINDNAPVVSGSYNIFVQEEEGNVSVTIQAHDNDEPGTNNSRLLFNLLPGPYSHNFSLDPDTGLLRNLGPLDREAIDPALEGRIVLTVLVSDCGEPVLGTKVNVTITVEDINDNLPIFNQSSYNFTVKEEDPGVLVGVVKAWDADQTEANNRISFSLSGSGANYFMIRGLVLGAGWAEGYLRLPPDVSLDYETQPVFNLTVSAENPDPQGGETIVDVCVNVKDVNDNPPTLDVASLRGIRVAENGSQHGQVAVVVASDVDTSAQLEIQLVNILCTKAGVDVGSLCWGWFSVAANGSVYINQSKAIDYEACDLVTLVVRACDLATDPGFQAYSNNGSLLITIEDVNDNAPYFLPENKTFVIIPELVLPNREVASVRARDDDSGNNGVILFSILRVDFISKDGATIPFQGVFSIFTSSEADVFAGSIQPVTSLDSTLQGTYQVTVQARDRPSLGPFLEATTTLNLFTVDQSYRSRLQFSTPKEEVGANRQAINAALTQATRTTVYIVDIQDIDSAARARPHSYLDAYFVFPNGSALTLDELSVMIRNDQDSLTQLLQLGLVVLGSQESQESDLSKQLISVIIGLGVALLLVLVIMTMAFVCVRKSYNRKLQAMKAAKEARKTAAGVMPSAPAIPGTNMYNTERANPMLNLPNKDLGLEYLSPSNDLDSVSVNSLDDNSVDVDKNSQEIKEHRPPHTPPEPDPEPLSVVLLGRQAGASGQLEGPSYTNAGLDTTDL 2 1155 1177 ESDLSKQLIS VIIGLGVALLLVLVIMTMAFVCV RKSYNRKLQA +Q6ZTQ4 MQEAIILLALLGAMSGGEALHLILLPATGNVAENSPPGTSVHKFSVKLSASLSPVIPGFPQIVNSNPLTEAFRVNWLSGTYFEVVTTGMEQLDFETGPNIFDLQIYVKDEVGVTDLQVLTVQVTDVNEPPQFQGNLAEGLHLYIVERANPGFIYQVEAFDPEDTSRNIPLSYFLISPPKSFRMSANGTLFSTTELDFEAGHRSFHLIVEVRDSGGLKASTELQVNIVNLNDEVPRFTSPTRVYTVLEELSPGTIVANITAEDPDDEGFPSHLLYSITTVSKYFMINQLTGTIQVAQRIDRDAGELRQNPTISLEVLVKDRPYGGQENRIQITFIVEDVNDNPATCQKFTFSIMVPERTAKGTLLLDLNKFCFDDDSEAPNNRFNFTMPSGVGSGSRFLQDPAGSGKIVLIGDLDYENPSNLAAGNKYTVIIQVQDVAPPYYKNNVYVYILTSPENEFPLIFDRPSYVFDVSERRPARTRVGQVRATDKDLPQSSLLYSISTGGASLQYPNVFWINPKTGELQLVTKVDCETTPIYILRIQATNNEDTSSVTVTVNILEENDEKPICTPNSYFLALPVDLKVGTNIQNFKLTCTDLDSSPRSFRYSIGPGNVNNHFTFSPNAGSNVTRLLLTSRFDYAGGFDKIWDYKLLVYVTDDNLMSDRKKAEALVETGTVTLSIKVIPHPTTIITTTPRPRVTYQVLRKNVYSPSAWYVPFVITLGSILLLGLLVYLVVLLAKAIHRHCPCKTGKNKEPLTKKGETKTAERDVVVETIQMNTIFDGEAIDPVTGETYEFNSKTGARKWKDPLTQMPKWKESSHQGAAPRRVTAGEGMGSLRSANWEEDELSGKAWAEDAGLGSRNEGGKLGNPKNRNPAFMNRAYPKPHPGK 2 710 732 LRKNVYSPSA WYVPFVITLGSILLLGLLVYLVV LLAKAIHRHC +A6H8M9 MVLLRLLVFLFAPVVSDLCSLPCFINVSESQGPGTVLQFLSFNCSSYTPTPTLELLNVQPPTTFFNPPSLARWQGTYVGKLTLSSSAQLDALMVNHYKVQLKFTCGNHVMEGSLSVDVQRDLSHIQCAGQFASPAGEMIQVPETVTPGARLYTLLLPGLELHGAQMSIISAQDLPHFPGPFSINEQGWLQAPSQGLLGQAQKVFQLQISVSFGQRQSCQGMVIVKVLPVPSSQVSFLEQAQNITIPENLAPGSEVVQVQARGVDLRYEILSPVPSPLFSIGRADGVVRTTTPLELARTSGTAVSRLQVKAFEQGQLWASAKLNLTMNVQLVNLWPPRCLPALLVSQIPETAPVGTVLNTLTCEDPDSVGATLDYKLWFRSSSNPASLCLYDRVLEVNATLDCDTPGACFQHAASILVLDGGQPQMTTEVPVLVMVTPINEFSPACAPRTFRVQEDAAPHTLLGSVVGTDMDYPHDNIEYYTSGGPTTFAVDRLSGEVHLLGPLDYEQQRLYRLTVLVIDHGQDQNPNHHLSGSCTITIEVEDVNDHAPECEPPFQELTIYAPLGRSVEVTKMSCQIPQEPQRLIYSYSIVGGNSQNRFILQGAILVHSDLVLGPFWPEQPRTYELLICVADAGPSTPHLSTTATIIVHLVPRRASTVATSTHRTTVPSTMTPMLVTDTEAFWQPQPWFVVVLTATGALLLLALGWLLGRLLQGLAQLLQAPSKPAQALLLNSIQGTEGSIEGFLEAPKMEMSQAPSSVMSLHFDGRAQDSRTGRDYLFNTHTGARRWL 2 686 708 TDTEAFWQPQ PWFVVVLTATGALLLLALGWLLG RLLQGLAQLL +Q9HBB8 MGSWALLWPPLLFTGLLVRPPGTMAQAQYCSVNKDIFEVEENTNVTEPLVDIHVPEGQEVTLGALSTPFAFRIQGNQLFLNVTPDYEEKSLLEAQLLCQSGGTLVTQLRVFVSVLDVNDNAPEFPFKTKEIRVEEDTKVNSTVIPETQLQAEDRDKDDILFYTLQEMTAGASDYFSLVSVNRPALRLDRPLDFYERPNMTFWLLVRDTPGENVEPSHTATATLVLNVVPADLRPPWFLPCTFSDGYVCIQAQYHGAVPTGHILPSPLVLRPGPIYAEDGDRGINQPIIYSIFRGNVNGTFIIHPDSGNLTVARSVPSPMTFLLLVKGQQADLARYSVTQVTVEAVAAAGSPPRFPQRLYRGTVARGAGAGVVVKDAAAPSQPLRIQAQDPEFSDLNSAITYRITNHSHFRMEGEVVLTTTTLAQAGAFYAEVEAHNTVTSGTATTVIEIQVSEQEPPSTDVPPSPEAGGTTGPWTSTTSEVPRPPEPSQGPSTTSSGGGTGPHPPSGTTLRPPTSSTPGGPPGAENSTSHQPATPGGDTAQTPKPGTSQPMPPGVGTSTSHQPATPSGGTAQTPEPGTSQPMPPSMGTSTSHQPATPGGGTAQTPEAGTSQPMPPGMGTSTSHQPTTPGGGTAQTPEPGTSQPMPLSKSTPSSGGGPSEDKRFSVVDMAALGGVLGALLLLALLGLAVLVHKHYGPRLKCCCGKAPEPQPQGFDNQAFLPDHKANWAPVPSPTHDPKPAEAPMPAEPAPPGPASPGGAPEPPAAARAGGSPTAVRSILTKERRPEGGYKAVWFGEDIGTEADVVVLNAPTLDVDGASDSGSGDEGEGAGRGGGPYDAPGGDDSYI 2 668 690 SEDKRFSVVD MAALGGVLGALLLLALLGLAVLV HKHYGPRLKC +Q9D871 MDFSRPSFSPWRWLTLVASLLTCGICQASGQIFISPDSLLGVEKYRTILTLENVPEDVLEYSWYRGKDNSTENMIFSYKPPNTRHPGPSYSGRENVTRAGSLVVRMSAVNDTGYYTVEVDTSNETQRATGWLQIVKLRSNPGISANTSALVEGMDSVVAKCLTNSSNISWYVNFVPTSGSNRMTISPDGKTLIIHRVSRYDHTLQCAIEDVPEILQKSELIQLTVAYGPDYVSLWTQPYFFAGVLTADIGSSVQLECNCFSKPEPRYHWIHNGSFLSIPENNMTLPSLSWEQMGSYRCVVENPETQLTFYRDVTIQPPRPPLPTVNRELYIPGPLVIFLILLTSLGGAFVCRVLVYSLFQSCSRGKTCHKCPWQTN 2 334 356 TVNRELYIPG PLVIFLILLTSLGGAFVCRVLVY SLFQSCSRGK +P13688 MGHLSAPLHRVRVPWQGLLLTASLLTFWNPPTTAQLTTESMPFNVAEGKEVLLLVHNLPQQLFGYSWYKGERVDGNRQIVGYAIGTQQATPGPANSGRETIYPNASLLIQNVTQNDTGFYTLQVIKSDLVNEEATGQFHVYPELPKPSISSNNSNPVEDKDAVAFTCEPETQDTTYLWWINNQSLPVSPRLQLSNGNRTLTLLSVTRNDTGPYECEIQNPVSANRSDPVTLNVTYGPDTPTISPSDTYYRPGANLSLSCYAASNPPAQYSWLINGTFQQSTQELFIPNITVNNSGSYTCHANNSVTGCNRTTVKTIIVTELSPVVAKPQIKASKTTVTGDKDSVNLTCSTNDTGISIRWFFKNQSLPSSERMKLSQGNTTLSINPVKREDAGTYWCEVFNPISKNQSDPIMLNVNYNALPQENGLSPGAIAGIVIGVVALVALIAVALACFLHFGKTGRASDQRDLTEHKPSVSNHTQDHSNDPPNKMNEVTYSTLNFEAQQPTQPTSASPSLTATEIIYSEVKKQ 2 433 455 NGLSPGAIAG IVIGVVALVALIAVALACFLHFG KTGRASDQRD +Q925P2 MELASAHLHKGQVPWFGLLLTASLLASWSPPTTAQVTVMAFPLHAAEGNNVILVVYNMMKGVSAFSWHKGSTTSTNAEIVRFVTGTNKTIKGPVHSGRETLYSNGSLLIQRVTMKDTGVYTIEMTDQNYRRRVLTGQFHVHTLLLKSNITSNNSNPVEGDDSVSLTCDSYTDPDNITYLWSRNGESLSEGDRLKLSEGNRTLTLLNVTRNDTGPYVCETRNPVSVNRSDPFSLNIIYGPDTPIISPSDIYLHPGSNLNLSCHAASNPPAQYFWLINEKPHASSQELFIPNITTNNSGTYTCFVNNSVTGLSRTTVKNITVLEPVTQPSLQVTNTTVKELDSVTLTCLSKDRQAHIHWIFNNDTLLITEKMTTSQAGLILKIDPIKREDAGEYQCEISNPVSVKRSNSIKLEVIFDSTYDISDVPIAVIITGAVAGVILIAGLAYRLCSRKSRWGSDQRDLTEHKPSASNHNLAPSDNSPNKVDDVAYTVLNFNSQQPNRPTSAPSSPRATETVYSEVKKK 2 422 444 VIFDSTYDIS DVPIAVIITGAVAGVILIAGLAY RLCSRKSRWG +Q6ZU64 MFTLTGCRLVEKTQKVENPSVSFASSFPLIPLLLRGKSVQKKQAESKSQIKLHTQSAPFGLCPKDMMLTQAPSSVVRSRNSRNHTVNSGGSCLSASTVAIPAINDSSAAMSACSTISAQPASSMDTQMHSPKKQERVNKRVIWGIEVAEELHWKGWELGKETTRNLVLKNRSLKLQKMKYRPPKTKFFFTVIPQPIFLSPGITLTLPIVFRPLEAKEYMDQLWFEKAEGMFCVGLRATLPCHRLICRPPSLQLPMCAVGDTTEAFFCLDNVGDLPTFFTWEFSSPFQMLPATGLLEPGQASQIKVTFQPLTAVIYEVQATCWYGAGSRQRSSIQLQAVAKCAQLLVSIKHKCPEDQDAEGFQKLLYFGSVAVGCTSERQIRLHNPSAVNAPFRIEISPDELAEDQAFSCPTAHGIVLPGEKKCVSVFFHPKTLDTRTVDYCSIMPSGCASKTLLKVVGFCRGPAVSLQHYCVNFSWVNLGERSEQPLWIENQSDCTAHFQFAIDCLESVFTIRPAFGTLVGKARMTLHCAFQPTHPIICFRRVACLIHHQDPLFLDLMGTCHSDSTKPAILKPQHLTWYRTHLARGLTLYPPDILDAMLKEKKLAQDQNGALMIPIQDLEDMPAPQYPYIPPMTEFFFDGTSDITIFPPPISVEPVEVDFGACPGPEAPNPVPLCLMNHTKGKIMVVWTRRSDCPFWVTPESCDVPPLKSMAMRLHFQPPHPNCLYTVELEAFAIYKVLQSYSNIEEDCTMCPSWCLTVRARGHSYFAGFEHHIPQYSLDVPKLFPAVSSGEPTYRSLLLVNKDCKLLTFSLAPQRGSDVILRPTSGLVAPGAHQIILICTYPEGSSWKQHTFYLQCNASPQYLKEVSMYSREEPLQLKLDTHKSLYFKPTWVGCSSTSPFTFRNPSRLPLQFEWRVSEQHRKLLAVQPSRGLIQPNERLTLTWTFSPLEETKYLFQVGMWVWEAGLSPNANPAATTHYMLRLVGVGLTSSLSAKEKELAFGNVLVNSKQSRFLVLLNDGNCTLYYRLYLEQGSPEAVDNHPLALQLDRTEGSMPPRSQDTICLTACPKQRSQYSWTITYSLLSHRDNKAGEKQELCCVSLVAVYPLLSILDVSSMGSAEGITRKHLWRLFSLDLLNSYLERDPTPCELTYKVPTRHSMSQIPPVLTPLRLDFNFGAAPFKAPPSVVFLALKNSGVVSLDWAFLLPSDQRIDVELWAEQAELNSTELHQMRVQDNCLFSISPKAGSLSPGQEQMVELKYSHLFIGTDHLPVLFKVSHGREILLNFIGVTVKPEQKYVHFTSTTHQFIPIPIGDTLPPRQIYELYNGGSVPVTYEVQTDVLSQVQEKNFDHPIFCCLNPKGEIQPGSTARVLWIFSPIEAKTYTVDVPIHILGWNSALIHFQGVGYNPHMMGDTAPFHNISSWDNSSIHSRLVVPGQNVFLSQSHISLGNIPVQSKCSRLLFLNNISKNEEIAFSWQPSPLDFGEVSVSPMIGVVAPEETVPFVVTLRASVHASFYSADLVCKLYSQQLMRQYHKELQEWKDEKVRQEVEFTITDMKVKKRTCCTACEPARKYKTLPPIKNQQSVSRPASWKLQTPKEEVSWPCPQPPSPGMLCLGLTARAHATDYFLANFFSEFPCHFLHRELPKRKAPREESETSEEKSPNKWGPVSKQKKQLLVDILTTIIRGLLEDKNFHEAVDQSLVEQVPYFRQFWNEQSTKFMDQKNSLYLMPILPVPSSSWEDGKGKQPKEDRPEHYPGLGKKEEGEEEKGEEEEEELEEEEEEEEETEEEELGKEEIEEKEEERDEKEEKVSWAGIGPTPQPESQESMQWQWQQQLNVMVKEEQEQDEKEAIRRLPAFANLQEALLENMIQNILVEASRGEVVLTSRPRVIALPPFCVPRSLTPDTLLPTQQAEVLHPVVPLPTDLP 2 187 209 KMKYRPPKTK FFFTVIPQPIFLSPGITLTLPIV FRPLEAKEYM +Q9H9P2 MSRVVSLLLGAALLCGHGAFCRRVVSGQKVCFADFKHPCYKMAYFHELSSRVSFQEARLACESEGGVLLSLENEAEQKLIESMLQNLTKPGTGISDGDFWIGLWRNGDGQTSGACPDLYQWSDGSNSQYRNWYTDEPSCGSEKCVVMYHQPTANPGLGGPYLYQWNDDRCNMKHNYICKYEPEINPTAPVEKPYLTNQPGDTHQNVVVTEAGIIPNLIYVVIPTIPLLLLILVAFGTCCFQMLHKSKGRTKTSPNQSTLWISKSTRKESGMEV 2 218 240 VTEAGIIPNL IYVVIPTIPLLLLILVAFGTCCF QMLHKSKGRT +Q96F05 MWTALVLIWIFSLSLSESHAASNDPRNFVPNKMWKGLVKRNASVETVDNKTSEDVTMAAASPVTLTKGTSAAHLNSMEVTTEDTSRTDVSEPATSGGAADGVTSIAPTAVASSTTAASITTAASSMTVASSAPTTAASSTTVASIAPTTAASSMTAASSTPMTLALPAPTSTSTGRTPSTTATGHPSLSTALAQVPKSSALPRTATLATLATRAQTVATTANTSSPMSTRPSPSKHMPSDTAASPVPPMRPQAQGPISQVSVDQPVVNTTNKSTPMPSNTTPEPAPTPTVVTTTKAQAREPTASPVPVPHTSPIPEMEAMSPTTQPSPMPYTQRAAGPGTSQAPEQVETEATPGTDSTGPTPRSSGGTKMPATDSCQPSTQGQYMVVTTEPLTQAVVDKTLLLVVLLLGVTLFITVLVLFALQAYESYKKKDYTQVDYLINGMYADSEM 2 400 422 EPLTQAVVDK TLLLVVLLLGVTLFITVLVLFAL QAYESYKKKD +Q6NUJ2 MSARAPKELRLALPPCLLNRTFASPNASGSGNTGARGPGAVGSGTCITQVGQQLFQSFSSTLVLIVLVTLIFCLIVLSLSTFHIHKRRMKKRKMQRAQEEYERDHCSGSRGGGGLPRPGRQAPTHAKETRLERQPRDSPFCAPSNASSLSSSSPGLPCQGPCAPPPPPPASSPQGAHAASSCLDTAGEGLLQTVVLS 2 62 84 QQLFQSFSST LVLIVLVTLIFCLIVLSLSTFHI HKRRMKKRKM +Q86T13 MRPAFALCLLWQALWPGPGGGEHPTADRAGCSASGACYSLHHATMKRQAAEEACILRGGALSTVRAGAELRAVLALLRAGPGPGGGSKDLLFWVALERRRSHCTLENEPLRGFSWLSSDPGGLESDTLQWVEEPQRSCTARRCAVLQATGGVEPAGWKEMRCHLRANGYLCKYQFEVLCPAPRPGAASNLSYRAPFQLHSAALDFSPPGTEVSALCRGQLPISVTCIADEIGARWDKLSGDVLCPCPGRYLRAGKCAELPNCLDDLGGFACECATGFELGKDGRSCVTSGEGQPTLGGTGVPTRRPPATATSPVPQRTWPIRVDEKLGETPLVPEQDNSVTSIPEIPRWGSQSTMSTLQMSLQAESKATITPSGSVISKFNSTTSSATPQAFDSSSAVVFIFVSTAVVVLVILTMTVLGLVKLCFHESPSSQPRKESMGPPGLESDPEPAALGSSSAHCTNNGVKVGDCDLRDRAEGALLAESPLGSSDA 2 399 421 PQAFDSSSAV VFIFVSTAVVVLVILTMTVLGLV KLCFHESPSS +Q8BG22 MTHRDSTGPVIGLKLVTLLFTLSPELLFLGAGLKLKENGYDGLLVAINPRVPEDLKLITNIKEMITEASFYLFNATKRRVFFRNVQILVPATWTDHNYSRVRQESYDKANVIVAEQSEEHGDDPYTLQHRGCGQEGRYIHFTPSFLLNDELAAGYGARGRVFVHEWAHLRWGVFDEYNNDKPFYVNGRNEIQVTRCSSDITGVFVCEKGLCPHEDCIISKIFREGCTFLYNSTQNATGSIMFMPSLPSVVEFCNESTHNQEAPNLQNQVCSLRSTWDVITASSDLNHSLPVHGVGLPAPPTFSLLQAGDRVVCLVIDVSRKMAEGDRLLRLQQAAELYLMQVVEAHTFVGIVTFDSKGEIRASLQQIYSDDDRKLLVSYLPTAVSTDAETNICAGVKKGFEVVEERNGRADGSVLILVTSGADEHIANCLLTSMNSGSTIHSMALGSSAARKVGELSRLTGGLKFFIPDKFTSNGMTEAFVRISSGTGDIFQQSLQVESVCETVQPQHQLADTMTVDSAVGNDTLFLVTWQTGGPPEIALLDPSGRKYNTGDFIINLAFRTASLKIPGTAKHGHWTYTLNNTHHSPQALKVTVASRASSLAMSPATLEAFVERDSTYFPQPVIIYANVRKGLHPILNATVVATVEPEAGDPVVLQLLDGGAGADVIRNDGIYSRYFSSFAVSGSYSLTVHVRHSPSTSTLALPVPGNHAMYVPGYITNDNIQMNAPKNLGHRPVKERWGFSRVSSGGSFSVLGVPDGPHPDMFPPCKITDLEAMKVEDDVVLSWTAPGEDFDQGQTTSYEIRMSRSLWNIRDDFDNAILVNSSELVPQHAGTRETFTFSPKLVTHELDHELAEDAQEPYIVYVALRAMDRSSLRSAVSNIALVSMSLPPNSSPVVSRDDLILKGVLTTVGLIAILCLIMVVAHCIFNRKKRPSRKENETKFL 2 904 926 VVSRDDLILK GVLTTVGLIAILCLIMVVAHCIF NRKKRPSRKE +O14967 MHFQAFWLCLGLLFISINAEFMDDDVETEDFEENSEEIDVNESELSSEIKYKTPQPIGEVYFAETFDSGRLAGWVLSKAKKDDMDEEISIYDGRWEIEELKENQVPGDRGLVLKSRAKHHAISAVLAKPFIFADKPLIVQYEVNFQDGIDCGGAYIKLLADTDDLILENFYDKTSYIIMFGPDKCGEDYKLHFIFRHKHPKTGVFEEKHAKPPDVDLKKFFTDRKTHLYTLVMNPDDTFEVLVDQTVVNKGSLLEDVVPPIKPPKEIEDPNDKKPEEWDERAKIPDPSAVKPEDWDESEPAQIEDSSVVKPAGWLDDEPKFIPDPNAEKPDDWNEDTDGEWEAPQILNPACRIGCGEWKPPMIDNPKYKGVWRPPLVDNPNYQGIWSPRKIPNPDYFEDDHPFLLTSFSALGLELWSMTSDIYFDNFIICSEKEVADHWAADGWRWKIMIANANKPGVLKQLMAAAEGHPWLWLIYLVTAGVPIALITSFCWPRKVKKKHKDTEYKKTDICIPQTKGVLEQEEKEEKAALEKPMDLEEEKKQNDGEMLEKEEESEPEEKSEEEIEIIEGQEESNQSNKSGSEDEMKEADESTGSGDGPIKSVRKRRVRKD 2 471 493 QLMAAAEGHP WLWLIYLVTAGVPIALITSFCWP RKVKKKHKDT +Q8TDQ1 MPLLTLYLLLFWLSGYSIVTQITGPTTVNGLERGSLTVQCVYRSGWETYLKWWCRGAIWRDCKILVKTSGSEQEVKRDRVSIKDNQKNRTFTVTMEDLMKTDADTYWCGIEKTGNDLGVTVQVTIDPAPVTQEETSSSPTLTGHHLDNRHKLLKLSVLLPLIFTILLLLLVAASLLAWRMMKYQQKAAGMSPEQVLQPLEGDLCYADLTLQLAGTSPQKATTKLSSAQVDQVEVEYVTMASLPKEDISYASLTLGAEDQEPTYCNMGHLSSHLPGRGPEEPTEYSTISRP 2 156 178 LDNRHKLLKL SVLLPLIFTILLLLLVAASLLAW RMMKYQQKAA +Q08708 MTARAWASWRSSALLLLLVPGYFPLSHPMTVAGPVGGSLSVQCRYEKEHRTLNKFWCRPPQILRCDKIVETKGSAGKRNGRVSIRDSPANLSFTVTLENLTEEDAGTYWCGVDTPWLRDFHDPIVEVEVSVFPAGTTTASSPQSSMGTSGPPTKLPVHTWPSVTRKDSPEPSPHPGSLFSNVRFLLLVLLELPLLLSMLGAVLWVNRPQRSSRSRQNWPKGENQ 2 184 206 HPGSLFSNVR FLLLVLLELPLLLSMLGAVLWVN RPQRSSRSRQ +A8K4G0 MWLPPALLLLSLSGCFSIQGPESVRAPEQGSLTVQCHYKQGWETYIKWWCRGVRWDTCKILIETRGSEQGEKSDRVSIKDNQKDRTFTVTMEGLRRDDADVYWCGIERRGPDLGTQVKVIVDPEGAASTTASSPTNSNMAVFIGSHKRNHYMLLVFVKVPILLILVTAILWLKGSQRVPEEPGEQPIYMNFSEPLTKDMAT 2 150 172 AVFIGSHKRN HYMLLVFVKVPILLILVTAILWL KGSQRVPEEP +Q9H6B4 MSLLLLLLLVSYYVGTLGTHTEIKRVAEEKVTLPCHHQLGLPEKDTLDIEWLLTDNEGNQKVVITYSSRHVYNNLTEEQKGRVAFASNFLAGDASLQIEPLKPSDEGRYTCKVKNSGRYVWSHVILKVLVRPSKPKCELEGELTEGSDLTLQCESSSGTEPIVYYWQRIREKEGEDERLPPKSRIDYNHPGRVLLQNLTMSYSGLYQCTAGNEAGKESCVVRVTVQYVQSIGMVAGAVTGIVAGALLIFLLVWLLIRRKDKERYEEEERPNEIREDAEAPKARLVKPSSSSSGSRSSRSGSSSTRSTANSASRSQRTLSTDAAPQPGLATQAYSLVGPEVRGSEPKKVHHANLTKAETTPSMIPSQSRAFQTV 2 234 256 TVQYVQSIGM VAGAVTGIVAGALLIFLLVWLLI RRKDKERYEE +Q9BZ76 MASVAWAVLKVLLLLPTQTWSPVGAGNPPDCDAPLASALPRSSFSSSSELSSSHGPGFSRLNRRDGAGGWTPLVSNKYQWLQIDLGERMEVTAVATQGGYGSSDWVTSYLLMFSDGGRNWKQYRREESIWGFPGNTNADSVVHYRLQPPFEARFLRFLPLAWNPRGRIGMRIEVYGCAYKSEVVYFDGQSALLYRLDKKPLKPIRDVISLKFKAMQSNGILLHREGQHGNHITLELIKGKLVFFLNSGNAKLPSTIAPVTLTLGSLLDDQHWHSVLIELLDTQVNFTVDKHTHHFQAKGDSSYLDLNFEISFGGIPTPGRSRAFRRKSFHGCLENLYYNGVDVTELAKKHKPQILMMGNVSFSCPQPQTVPVTFLSSRSYLALPGNSGEDKVSVTFQFRTWNRAGHLLFGELRRGSGSFVLFLKDGKLKLSLFQPGQSPRNVTAGAGLNDGQWHSVSFSAKWSHMNVVVDDDTAVQPLVAVLIDSGDTYYFGGCLDNSSGSGCKSPLGGFQGCLRLITIGDKAVDPILVQQGALGSFRDLQIDSCGITDRCLPSYCEHGGECSQSWDTFSCDCLGTGYTGETCHSSLYEQSCEAHKHRGNPSGLYYIDADGSGPLGPFLVYCNMTADAAWTVVQHGGPDAVTLRGAPSGHPRSAVSFAYAAGAGQLRSAVNLAERCEQRLALRCGTARRPDSRDGTPLSWWVGRTNETHTSWGGSLPDAQKCTCGLEGNCIDSQYYCNCDAGRNEWTSDTIVLSQKEHLPVTQIVMTDAGRPHSEAAYTLGPLLCRGDQSFWNSASFNTETSYLHFPAFHGELTADVCFFFKTTVSSGVFMENLGITDFIRIELRAPTEVTFSFDVGNGPCEVTVQSPTPFNDNQWHHVRAERNVKGASLQVDQLPQKMQPAPADGHVRLQLNSQLFIGGTATRQRGFLGCIRSLQLNGVALDLEERATVTPGVEPGCAGHCSTYGHLCRNGGRCREKRRGVTCDCAFSAYDGPFCSNEISAYFATGSSMTYHFQEHYTLSENSSSLVSSLHRDVTLTREMITLSFRTTRTPSLLLYVSSFYEEYLSVILANNGSLQIRYKLDRHQNPDAFTFDFKNMADGQLHQVKINREEAVVMVEVNQSTKKQVILSSGTEFNAVKSLILGKVLEAAGADPDTRRAATSGFTGCLSAVRFGRAAPLKAALRPSGPSRVTVRGHVAPMARCAAGAASGSPARELAPRLAGGAGRSGPADEGEPLVNADRRDSAVIGGVIAVVIFILLCITAIAIRIYQQRKLRKENESKVSKKEEC 2 1244 1266 EPLVNADRRD SAVIGGVIAVVIFILLCITAIAI RIYQQRKLRK +Q6NT55 MLPITDRLLHLLGLEKTAFRIYAVSTLLLFLLFFLFRLLLRFLRLCRSFYITCRRLRCFPQPPRRNWLLGHLGMYLPNEAGLQDEKKVLDNMHHVLLVWMGPVLPLLVLVHPDYIKPLLGASAAIAPKDDLFYGFLKPWLGDGLLLSKGDKWSRHRRLLTPAFHFDILKPYMKIFNQSADIMHAKWRHLAEGSAVSLDMFEHISLMTLDSLQKCVFSYNSNCQEKMSDYISAIIELSALSVRRQYRLHHYLDFIYYRSADGRRFRQACDMVHHFTTEVIQERRRALRQQGAEAWLKAKQGKTLDFIDVLLLARDEDGKELSDEDIRAEADTFMFEGHDTTSSGISWMLFNLAKYPEYQEKCREEIQEVMKGRELEELEWDDLTQLPFTTMCIKESLRQYPPVTLVSRQCTEDIKLPDGRIIPKGIICLVSIYGTHHNPTVWPDSKVYNPYRFDPDNPQQRSPLAYVPFSAGPRNCIGQSFAMAELRVVVALTLLRFRLSVDRTRKVRRKPELILRTENGLWLKVEPLPPRA 2 21 40 LLGLEKTAFR IYAVSTLLLFLLFFLFRLLL RFLRLCRSFY +P17927 MGASSPRSPEPVGPPAPGLPFCCGGSLLAVVVLLALPVAWGQCNAPEWLPFARPTNLTDEFEFPIGTYLNYECRPGYSGRPFSIICLKNSVWTGAKDRCRRKSCRNPPDPVNGMVHVIKGIQFGSQIKYSCTKGYRLIGSSSATCIISGDTVIWDNETPICDRIPCGLPPTITNGDFISTNRENFHYGSVVTYRCNPGSGGRKVFELVGEPSIYCTSNDDQVGIWSGPAPQCIIPNKCTPPNVENGILVSDNRSLFSLNEVVEFRCQPGFVMKGPRRVKCQALNKWEPELPSCSRVCQPPPDVLHAERTQRDKDNFSPGQEVFYSCEPGYDLRGAASMRCTPQGDWSPAAPTCEVKSCDDFMGQLLNGRVLFPVNLQLGAKVDFVCDEGFQLKGSSASYCVLAGMESLWNSSVPVCEQIFCPSPPVIPNGRHTGKPLEVFPFGKTVNYTCDPHPDRGTSFDLIGESTIRCTSDPQGNGVWSSPAPRCGILGHCQAPDHFLFAKLKTQTNASDFPIGTSLKYECRPEYYGRPFSITCLDNLVWSSPKDVCKRKSCKTPPDPVNGMVHVITDIQVGSRINYSCTTGHRLIGHSSAECILSGNAAHWSTKPPICQRIPCGLPPTIANGDFISTNRENFHYGSVVTYRCNPGSGGRKVFELVGEPSIYCTSNDDQVGIWSGPAPQCIIPNKCTPPNVENGILVSDNRSLFSLNEVVEFRCQPGFVMKGPRRVKCQALNKWEPELPSCSRVCQPPPDVLHAERTQRDKDNFSPGQEVFYSCEPGYDLRGAASMRCTPQGDWSPAAPTCEVKSCDDFMGQLLNGRVLFPVNLQLGAKVDFVCDEGFQLKGSSASYCVLAGMESLWNSSVPVCEQIFCPSPPVIPNGRHTGKPLEVFPFGKAVNYTCDPHPDRGTSFDLIGESTIRCTSDPQGNGVWSSPAPRCGILGHCQAPDHFLFAKLKTQTNASDFPIGTSLKYECRPEYYGRPFSITCLDNLVWSSPKDVCKRKSCKTPPDPVNGMVHVITDIQVGSRINYSCTTGHRLIGHSSAECILSGNTAHWSTKPPICQRIPCGLPPTIANGDFISTNRENFHYGSVVTYRCNLGSRGRKVFELVGEPSIYCTSNDDQVGIWSGPAPQCIIPNKCTPPNVENGILVSDNRSLFSLNEVVEFRCQPGFVMKGPRRVKCQALNKWEPELPSCSRVCQPPPEILHGEHTPSHQDNFSPGQEVFYSCEPGYDLRGAASLHCTPQGDWSPEAPRCAVKSCDDFLGQLPHGRVLFPLNLQLGAKVSFVCDEGFRLKGSSVSHCVLVGMRSLWNNSVPVCEHIFCPNPPAILNGRHTGTPSGDIPYGKEISYTCDPHPDRGMTFNLIGESTIRCTSDPHGNGVWSSPAPRCELSVRAGHCKTPEQFPFASPTIPINDFEFPVGTSLNYECRPGYFGKMFSISCLENLVWSSVEDNCRRKSCGPPPEPFNGMVHINTDTQFGSTVNYSCNEGFRLIGSPSTTCLVSGNNVTWDKKAPICEIISCEPPPTISNGDFYSNNRTSFHNGTVVTYQCHTGPDGEQLFELVGERSIYCTSKDDQVGVWSSPPPRCISTNKCTAPEVENAIRVPGNRSFFSLTEIIRFRCQPGFVMVGSHTVQCQTNGRWGPKLPHCSRVCQPPPEILHGEHTLSHQDNFSPGQEVFYSCEPSYDLRGAASLHCTPQGDWSPEAPRCTVKSCDDFLGQLPHGRVLLPLNLQLGAKVSFVCDEGFRLKGRSASHCVLAGMKALWNSSVPVCEQIFCPNPPAILNGRHTGTPFGDIPYGKEISYACDTHPDRGMTFNLIGESSIRCTSDPQGNGVWSSPAPRCELSVPAACPHPPKIQNGHYIGGHVSLYLPGMTISYICDPGYLLVGKGFIFCTDQGIWSQLDHYCKEVNCSFPLFMNGISKELEMKKVYHYGDYVTLKCEDGYTLEGSPWSQCQADDRWDPPLAKCTSRTHDALIVGTLSGTIFFILLIIFLSWIILKHRKGNNAHENPKEVAIHLHSQGGSSVHPRTLQTNEENSRVLP 2 1974 1996 KCTSRTHDAL IVGTLSGTIFFILLIIFLSWIIL KHRKGNNAHE +P20023 MGAAGLLGVFLALVAPGVLGISCGSPPPILNGRISYYSTPIAVGTVIRYSCSGTFRLIGEKSLLCITKDKVDGTWDKPAPKCEYFNKYSSCPEPIVPGGYKIRGSTPYRHGDSVTFACKTNFSMNGNKSVWCQANNMWGPTRLPTCVSVFPLECPALPMIHNGHHTSENVGSIAPGLSVTYSCESGYLLVGEKIINCLSSGKWSAVPPTCEEARCKSLGRFPNGKVKEPPILRVGVTANFFCDEGYRLQGPPSSRCVIAGQGVAWTKMPVCEEIFCPSPPPILNGRHIGNSLANVSYGSIVTYTCDPDPEEGVNFILIGESTLRCTVDSQKTGTWSGPAPRCELSTSAVQCPHPQILRGRMVSGQKDRYTYNDTVIFACMFGFTLKGSKQIRCNAQGTWEPSAPVCEKECQAPPNILNGQKEDRHMVRFDPGTSIKYSCNPGYVLVGEESIQCTSEGVWTPPVPQCKVAACEATGRQLLTKPQHQFVRPDVNSSCGEGYKLSGSVYQECQGTIPWFMEIRLCKEITCPPPPVIYNGAHTGSSLEDFPYGTTVTYTCNPGPERGVEFSLIGESTIRCTSNDQERGTWSGPAPLCKLSLLAVQCSHVHIANGYKISGKEAPYFYNDTVTFKCYSGFTLKGSSQIRCKADNTWDPEIPVCEKETCQHVRQSLQELPAGSRVELVNTSCQDGYQLTGHAYQMCQDAENGIWFKKIPLCKVIHCHPPPVIVNGKHTGMMAENFLYGNEVSYECDQGFYLLGEKKLQCRSDSKGHGSWSGPSPQCLRSPPVTRCPNPEVKHGYKLNKTHSAYSHNDIVYVDCNPGFIMNGSRVIRCHTDNTWVPGVPTCIKKAFIGCPPPPKTPNGNHTGGNIARFSPGMSILYSCDQGYLLVGEALLLCTHEGTWSQPAPHCKEVNCSSPADMDGIQKGLEPRKMYQYGAVVTLECEDGYMLEGSPQSQCQSDHQWNPPLAVCRSRSLAPVLCGIAAGLILLTFLIVITLYVISKHRARNYYTDTSQKEAFHLEAREVYSVDPYNPAS 2 976 998 AVCRSRSLAP VLCGIAAGLILLTFLIVITLYVI SKHRARNYYT +Q9NZV1 MYLVAGDRGLAGCGHLLVSLLGLLLLLARSGTRALVCLPCDESKCEEPRNCPGSIVQGVCGCCYTCASQRNESCGGTFGIYGTCDRGLRCVIRPPLNGDSLTEYEAGVCEDENWTDDQLLGFKPCNENLIAGCNIINGKCECNTIRTCSNPFEFPSQDMCLSALKRIEEEKPDCSKARCEVQFSPRCPEDSVLIEGYAPPGECCPLPSRCVCNPAGCLRKVCQPGNLNILVSKASGKPGECCDLYECKPVFGVDCRTVECPPVQQTACPPDSYETQVRLTADGCCTLPTRCECLSGLCGFPVCEVGSTPRIVSRGDGTPGKCCDVFECVNDTKPACVFNNVEYYDGDMFRMDNCRFCRCQGGVAICFTAQCGEINCERYYVPEGECCPVCEDPVYPFNNPAGCYANGLILAHGDRWREDDCTFCQCVNGERHCVATVCGQTCTNPVKVPGECCPVCEEPTIITVDPPACGELSNCTLTGKDCINGFKRDHNGCRTCQCINTEELCSERKQGCTLNCPFGFLTDAQNCEICECRPRPKKCRPIICDKYCPLGLLKNKHGCDICRCKKCPELSCSKICPLGFQQDSHGCLICKCREASASAGPPILSGTCLTVDGHHHKNEESWHDGCRECYCLNGREMCALITCPVPACGNPTIHPGQCCPSCADDFVVQKPELSTPSICHAPGGEYFVEGETWNIDSCTQCTCHSGRVLCETEVCPPLLCQNPSRTQDSCCPQCTDQPFRPSLSRNNSVPNYCKNDEGDIFLAAESWKPDVCTSCICIDSVISCFSESCPSVSCERPVLRKGQCCPYCIEDTIPKKVVCHFSGKAYADEERWDLDSCTHCYCLQGQTLCSTVSCPPLPCVEPINVEGSCCPMCPEMYVPEPTNIPIEKTNHRGEVDLEVPLWPTPSENDIVHLPRDMGHLQVDYRDNRLHPSEDSSLDSIASVVVPIIICLSIIIAFLFINQKKQWIPLLCWYRTPTKPSSLNNQLVSVDCKKGTRVQVDSSQRMLRIAEPDARFSGFYSMQKQNHLQADNFYQTV 2 939 961 LHPSEDSSLD SIASVVVPIIICLSIIIAFLFIN QKKQWIPLLC +Q9HC73 MGRLVLLWGAAVFLLGGWMALGQGGAAEGVQIQIIYFNLETVQVTWNASKYSRTNLTFHYRFNGDEAYDQCTNYLLQEGHTSGCLLDAEQRDDILYFSIRNGTHPVFTASRWMVYYLKPSSPKHVRFSWHQDAVTVTCSDLSYGDLLYEVQYRSPFDTEWQSKQENTCNVTIEGLDAEKCYSFWVRVKAMEDVYGPDTYPSDWSEVTCWQRGEIRDACAETPTPPKPKLSKFILISSLAILLMVSLLLLSLWKLWRVKKFLIPSVPDPKSIFPGLFEIHQGNFQEWITDTQNVAHLHKMAGAEQESGPEEPLVVQLAKTEAESPRMLDPQTEEKEASGGSLQLPHQPLQGGDVVTIGGFTFVMNDRSYVAL 2 233 252 TPPKPKLSKF ILISSLAILLMVSLLLLSLW KLWRVKKFLI +O95727 MWWRVLSLLAWFPLQEASLTNHTETITVEEGQTLTLKCVTSLRKNSSLQWLTPSGFTIFLNEYPALKNSKYQLLHHSANQLSITVPNVTLQDEGVYKCLHYSDSVSTKEVKVIVLATPFKPILEASVIRKQNGEEHVVLMCSTMRSKPPPQITWLLGNSMEVSGGTLHEFETDGKKCNTTSTLIIHTYGKNSTVDCIIRHRGLQGRKLVAPFRFEDLVTDEETASDALERNSLSSQDPQQPTSTVSVTEDSSTSEIDKEEKEQTTQDPDLTTEANPQYLGLARKKSGILLLTLVSFLIFILFIIVQLFIMKLRKAHVIWKKENEVSEHTLESYRSRSNNEETSSEEKNGQSSHPMRCMNYITKLYSEAKTKRKENVQHSKLEEKHIQVPESIV 2 288 310 YLGLARKKSG ILLLTLVSFLIFILFIIVQLFIM KLRKAHVIWK +Q8VHS2 MKLKRTAYLLFLYLSSSLLICIKNSFCNKNNTRCLSGPCQNNSTCKHFPQDNNCCLDTANNLDKDCEDLKDPCFSSPCQGIATCVKIPGEGNFLCQCPPGYSGLNCETATNSCGGNLCQHGGTCRKDPEHPVCICPPGYAGRFCETDHNECASSPCHNGAMCQDGINGYSCFCVPGYQGRHCDLEVDECVSDPCKNEAVCLNEIGRYTCVCPQEFSGVNCELEIDECRSQPCLHGATCQDAPGGYSCDCAPGFLGEHCELSVNECESQPCLHGGLCVDGRNSYHCDCTGSGFTGMHCESLIPLCWSKPCHNDATCEDTVDSYICHCRPGYTGALCETDINECSSNPCQFWGECVELSSEGLYGNTAGLPSSFSYVGASGYVCICQPGFTGIHCEEDVDECLLHPCLNGGTCENLPGNYACHCPFDDTSRTFYGGENCSEILLGCTHHQCLNNGKCIPHFQNGQHGFTCQCLSGYAGPLCETVTTLSFGSNGFLWVTSGSHTGIGPECNISLRFHTVQPNALLLIRGNKDVSMKLELLNGCVHLSIEVWNQLKVLLSISHNTSDGEWHFVEVTIAETLTLALVGGSCKEKCTTKSSVPVENHQSICALQDSFLGGLPMGTANNSVSVLNIYNVPSTPSFVGCLQDIRFDLNHITLENVSSGLSSNVKAGCLGKDWCESQPCQNRGRCINLWQGYQCECDRPYTGSNCLKEYVAGRFGQDDSTGYAAFSVNDNYGQNFSLSMFVRTRQPLGLLLALENSTYQYVSVWLEHGSLALQTPGSPKFMVNFFLSDGNVHLISLRIKPNEIELYQSSQNLGFISVPTWTIRRGDVIFIGGLPDREKTEVYGGFFKGCVQDVRLNSQTLEFFPNSTNNAYDDPILVNVTQGCPGDNTCKSNPCHNGGVCHSLWDDFSCSCPTNTAGRACEQVQWCQLSPCPPTAECQLLPQGFECIANAVFSGLSREILFRSNGNITRELTNITFAFRTHDTNVMILHAEKEPEFLNISIQDARLFFQLRSGNSFYTLHLMGSQLVNDGTWHQVTFSMIDPVAQTSRWQMEVNDQTPFVISEVATGSLNFLKDNTDIYVGDQSVDNPKGLQGCLSTIEIGGIYLSYFENLHGFPGKPQEEQFLKVSTNMVLTGCLPSNACHSSPCLHGGNCEDSYSSYRCACLSGWSGTHCEINIDECFSSPCIHGNCSDGVAAYHCRCEPGYTGVNCEVDVDNCKSHQCANGATCVPEAHGYSCLCFGNFTGRFCRHSRLPSTVCGNEKRNFTCYNGGSCSMFQEDWQCMCWPGFTGEWCEEDINECASDPCINGGLCRDLVNRFLCICDVAFAGERCELDLADDRLLGIFTAVGSGTLALFFILLLAGVASLIASNKRATQGTYSPSGQEKAGPRVEMWIRMPPPALERLI 2 1346 1368 ADDRLLGIFT AVGSGTLALFFILLLAGVASLIA SNKRATQGTY +Q5IJ48 MALARPGTPDPQALASVLLLLLWAPALSLLAGTVPSEPPSACASDPCAPGTECQATESGGYTCGPMEPRGCATQPCHHGALCVPQGPDPTGFRCYCVPGFQGPRCELDIDECASRPCHHGATCRNLADRYECHCPLGYAGVTCEMEVDECASAPCLHGGSCLDGVGSFRCVCAPGYGGTRCQLDLDECQSQPCAHGGTCHDLVNGFRCDCAGTGYEGTHCEREVLECASAPCEHNASCLEGLGSFRCLCWPGYSGELCEVDEDECASSPCQHGGRCLQRSDPALYGGVQAAFPGAFSFRHAAGFLCHCPPGFEGADCGVEVDECASRPCLNGGHCQDLPNGFQCHCPDGYAGPTCEEDVDECLSDPCLHGGTCSDTVAGYICRCPETWGGRDCSVQLTGCQGHTCPLAATCIPIFESGVHSYVCHCPPGTHGPFCGQNTTFSVMAGSPIQASVPAGGPLGLALRFRTTLPAGTLATRNDTKESLELALVAATLQATLWSYSTTVLVLRLPDLALNDGHWHQVEVVLHLATLELRLWHEGCPARLCVASGPVALASTASATPLPAGISSAQLGDATFAGCLQDVRVDGHLLLPEDLGENVLLGCERREQCRPLPCVHGGSCVDLWTHFRCDCARPHRGPTCADEIPAATFGLGGAPSSASFLLQELPGPNLTVSFLLRTRESAGLLLQFANDSAAGLTVFLSEGRIRAEVPGSPAVVLPGRWDDGLRHLVMLSFGPDQLQDLGQHVHVGGRLLAADSQPWGGPFRGCLQDLRLDGCHLPFFPLPLDNSSQPSELGGRQSWNLTAGCVSEDMCSPDPCFNGGTCLVTWNDFHCTCPANFTGPTCAQQLWCPGQPCLPPATCEEVPDGFVCVAEATFREGPPAAFSGHNASSGRLLGGLSLAFRTRDSEAWLLRAAAGALEGVWLAVRNGSLAGGVRGGHGLPGAVLPIPGPRVADGAWHRVRLAMERPAATTSRWLLWLDGAATPVALRGLASDLGFLQGPGAVRILLAENFTGCLGRVALGGLPLPLARPRPGAAPGAREHFASWPGTPAPILGCRGAPVCAPSPCLHDGACRDLFDAFACACGPGWEGPRCEAHVDPCHSAPCARGRCHTHPDGRFECRCPPGFGGPRCRLPVPSKECSLNVTCLDGSPCEGGSPAANCSCLEGLAGQRCQVPTLPCEANPCLNGGTCRAAGGVSECICNARFSGQFCEVAKGLPLPLPFPLLEVAVPAACACLLLLLLGLLSGILAARKRRQSEGTYSPSQQEVAGARLEMDSVLKVPPEERLI 2 1225 1247 PLPLPFPLLE VAVPAACACLLLLLLGLLSGILA ARKRRQSEGT +Q9BUF7 MANPGLGLLLALGLPFLLARWGRAWGQIQTTSANENSTVLPSSTSSSSDGNLRPEAITAIIVVFSLLAALLLAVGLALLVRKLREKRQTEGTYRPSSEEQVGARVPPTPNLKLPPEERLI 2 57 79 SSDGNLRPEA ITAIIVVFSLLAALLLAVGLALL VRKLREKRQT +Q8NEA5 MDKVQSGFLILFLFLMECQLHLCLPYADGLHPTGNITGLPGSKRSQPPRNITKEPKVFFHKTQLPGIQGAASRSTAASPTNPMKFLRNKAIIRHRPALVKVILISSVAFSIALICGMAISYMIYRLAQAEERQQLESLYKNLRIPLLGDEEEGSEDEGESTHLLPENENELEKFIHSVIISKRSKNIKKKLKEEQNSVTENKTKNASHNGKMEDL 2 102 124 IRHRPALVKV ILISSVAFSIALICGMAISYMIY RLAQAEERQQ +P07333 MGPGVLLLLLVATAWHGQGIPVIEPSVPELVVKPGATVTLRCVGNGSVEWDGPPSPHWTLYSDGSSSILSTNNATFQNTGTYRCTEPGDPLGGSAAIHLYVKDPARPWNVLAQEVVVFEDQDALLPCLLTDPVLEAGVSLVRVRGRPLMRHTNYSFSPWHGFTIHRAKFIQSQDYQCSALMGGRKVMSISIRLKVQKVIPGPPALTLVPAELVRIRGEAAQIVCSASSVDVNFDVFLQHNNTKLAIPQQSDFHNNRYQKVLTLNLDQVDFQHAGNYSCVASNVQGKHSTSMFFRVVESAYLNLSSEQNLIQEVTVGEGLNLKVMVEAYPGLQGFNWTYLGPFSDHQPEPKLANATTKDTYRHTFTLSLPRLKPSEAGRYSFLARNPGGWRALTFELTLRYPPEVSVIWTFINGSGTLLCAASGYPQPNVTWLQCSGHTDRCDEAQVLQVWDDPYPEVLSQEPFHKVTVQSLLTVETLEHNQTYECRAHNSVGSGSWAFIPISAGAHTHPPDEFLFTPVVVACMSIMALLLLLLLLLLYKYKQKPKYQVRWKIIESYEGNSYTFIDPTQLPYNEKWEFPRNNLQFGKTLGAGAFGKVVEATAFGLGKEDAVLKVAVKMLKSTAHADEKEALMSELKIMSHLGQHENIVNLLGACTHGGPVLVITEYCCYGDLLNFLRRKAEAMLGPSLSPGQDPEGGVDYKNIHLEKKYVRRDSGFSSQGVDTYVEMRPVSTSSNDSFSEQDLDKEDGRPLELRDLLHFSSQVAQGMAFLASKNCIHRDVAARNVLLTNGHVAKIGDFGLARDIMNDSNYIVKGNARLPVKWMAPESIFDCVYTVQSDVWSYGILLWEIFSLGLNPYPGILVNSKFYKLVKDGYQMAQPAFAPKNIYSIMQACWALEPTHRPTFQQICSFLQEQAQEDRRERDYTNLPSSSRSGGSGSSSSELEEESSSEHLTCCEQGDIAQPLLQPNNYQFC 2 515 537 AHTHPPDEFL FTPVVVACMSIMALLLLLLLLLL YKYKQKPKYQ +P15509 MLLLVTSLLLCELPHPAFLLIPEKSDLRTVAPASSLNVRFDSRTMNLSWDCQENTTFSKCFLTDKKNRVVEPRLSNNECSCTFREICLHEGVTFEVHVNTSQRGFQQKLLYPNSGREGTAAQNFSCFIYNADLMNCTWARGPTAPRDVQYFLYIRNSKRRREIRCPYYIQDSGTHVGCHLDNLSGLTSRNYFLVNGTSREIGIQFFDSLLDTKKIERFNPPSNVTVRCNTTHCLVRWKQPRTYQKLSYLDFQYQLDVHRKNTQPGTENLLINVSGDLENRYNFPSSEPRAKHSVKIRAADVRILNWSSWSEAIEFGSDDGNLGSVYIYVLLIVGTLVCGIVLGFLFKRFLRIQRLFPPVPQIKDKLNDNHEVEDEIIWEEFTPEEGKGYREEVLTVKEIT 2 324 346 EFGSDDGNLG SVYIYVLLIVGTLVCGIVLGFLF KRFLRIQRLF +Q99062 MARLGNCSLTWAALIILLLPGSLEECGHISVSAPIVHLGDPITASCIIKQNCSHLDPEPQILWRLGAELQPGGRQQRLSDGTQESIITLPHLNHTQAFLSCCLNWGNSLQILDQVELRAGYPPAIPHNLSCLMNLTTSSLICQWEPGPETHLPTSFTLKSFKSRGNCQTQGDSILDCVPKDGQSHCCIPRKHLLLYQNMGIWVQAENALGTSMSPQLCLDPMDVVKLEPPMLRTMDPSPEAAPPQAGCLQLCWEPWQPGLHINQKCELRHKPQRGEASWALVGPLPLEALQYELCGLLPATAYTLQIRCIRWPLPGHWSDWSPSLELRTTERAPTVRLDTWWRQRQLDPRTVQLFWKPVPLEEDSGRIQGYVVSWRPSGQAGAILPLCNTTELSCTFHLPSEAQEVALVAYNSAGTSRPTPVVFSESRGPALTRLHAMARDPHSLWVGWEPPNPWPQGYVIEWGLGPPSASNSNKTWRMEQNGRATGFLLKENIRPFQLYEIIVTPLYQDTMGPSQHVYAYSQEMAPSHAPELHLKHIGKTWAQLEWVPEPPELGKSPLTHYTIFWTNAQNQSFSAILNASSRGFVLHGLEPASLYHIHLMAASQAGATNSTVLTLMTLTPEGSELHIILGLFGLLLLLTCLCGTAWLCCSPNRKNPLWPSVPDPAHSSLGSWVPTIMEEDAFQLPGLGTPPITKLTVLEEDEKKPVPWESHNSSETCGLPTLVQTYVLQGDPRAVSTQPQSQSGTSDQVLYGQLLGSPTSPGPGHYLRCDSTQPLLAGLTPSPKSYENLWFQASPLGTLVTPAPSQEDDCVFGPLLNFPLLQGIRVHGMEALGSF 2 626 648 LMTLTPEGSE LHIILGLFGLLLLLTCLCGTAWL CCSPNRKNPL +Q96PZ7 MTAWRRFQSLLLLLGLLVLCARLLTAAKGQNCGGLVQGPNGTIESPGFPHGYPNYANCTWIIITGERNRIQLSFHTFALEEDFDILSVYDGQPQQGNLKVRLSGFQLPSSIVSTGSILTLWFTTDFAVSAQGFKALYEVLPSHTCGNPGEILKGVLHGTRFNIGDKIRYSCLPGYILEGHAILTCIVSPGNGASWDFPAPFCRAEGACGGTLRGTSSSISSPHFPSEYENNADCTWTILAEPGDTIALVFTDFQLEEGYDFLEISGTEAPSIWLTGMNLPSPVISSKNWLRLHFTSDSNHRRKGFNAQFQVKKAIELKSRGVKMLPSKDGSHKNSVLSQGGVALVSDMCPDPGIPENGRRAGSDFRVGANVQFSCEDNYVLQGSKSITCQRVTETLAAWSDHRPICRARTCGSNLRGPSGVITSPNYPVQYEDNAHCVWVITTTDPDKVIKLAFEEFELERGYDTLTVGDAGKVGDTRSVLYVLTGSSVPDLIVSMSNQMWLHLQSDDSIGSPGFKAVYQEIEKGGCGDPGIPAYGKRTGSSFLHGDTLTFECPAAFELVGERVITCQQNNQWSGNKPSCVFSCFFNFTASSGIILSPNYPEEYGNNMNCVWLIISEPGSRIHLIFNDFDVEPQFDFLAVKDDGISDITVLGTFSGNEVPSQLASSGHIVRLEFQSDHSTTGRGFNITYTTFGQNECHDPGIPINGRRFGDRFLLGSSVSFHCDDGFVKTQGSESITCILQDGNVVWSSTVPRCEAPCGGHLTASSGVILPPGWPGYYKDSLHCEWIIEAKPGHSIKITFDRFQTEVNYDTLEVRDGPASSSPLIGEYHGTQAPQFLISTGNFMYLLFTTDNSRSSIGFLIHYESVTLESDSCLDPGIPVNGHRHGGDFGIRSTVTFSCDPGYTLSDDEPLVCERNHQWNHALPSCDALCGGYIQGKSGTVLSPGFPDFYPNSLNCTWTIEVSHGKGVQMIFHTFHLESSHDYLLITEDGSFSEPVARLTGSVLPHTIKAGLFGNFTAQLRFISDFSISYEGFNITFSEYDLEPCDDPGVPAFSRRIGFHFGVGDSLTFSCFLGYRLEGATKLTCLGGGRRVWSAPLPRCVAECGASVKGNEGTLLSPNFPSNYDNNHECIYKIETEAGKGIHLRTRSFQLFEGDTLKVYDGKDSSSRPLGTFTKNELLGLILNSTSNHLWLEFNTNGSDTDQGFQLTYTSFDLVKCEDPGIPNYGYRIRDEGHFTDTVVLYSCNPGYAMHGSNTLTCLSGDRRVWDKPLPSCIAECGGQIHAATSGRILSPGYPAPYDNNLHCTWIIEADPGKTISLHFIVFDTEMAHDILKVWDGPVDSDILLKEWSGSALPEDIHSTFNSLTLQFDSDFFISKSGFSIQFSTSIAATCNDPGMPQNGTRYGDSREAGDTVTFQCDPGYQLQGQAKITCVQLNNRFFWQPDPPTCIAACGGNLTGPAGVILSPNYPQPYPPGKECDWRVKVNPDFVIALIFKSFNMEPSYDFLHIYEGEDSNSPLIGSYQGSQAPERIESSGNSLFLAFRSDASVGLSGFAIEFKEKPREACFDPGNIMNGTRVGTDFKLGSTITYQCDSGYKILDPSSITCVIGADGKPSWDQVLPSCNAPCGGQYTGSEGVVLSPNYPHNYTAGQICLYSITVPKEFVVFGQFAYFQTALNDLAELFDGTHAQARLLSSLSGSHSGETLPLATSNQILLRFSAKSGASARGFHFVYQAVPRTSDTQCSSVPEPRYGRRIGSEFSAGSIVRFECNPGYLLQGSTALHCQSVPNALAQWNDTIPSCVVPCSGNFTQRRGTILSPGYPEPYGNNLNCIWKIIVTEGSGIQIQVISFATEQNWDSLEIHDGGDVTAPRLGSFSGTTVPALLNSTSNQLYLHFQSDISVAAAGFHLEYKTVGLAACQEPALPSNSIKIGDRYMVNDVLSFQCEPGYTLQGRSHISCMPGTVRRWNYPSPLCIATCGGTLSTLGGVILSPGFPGSYPNNLDCTWRISLPIGYGAHIQFLNFSTEANHDFLEIQNGPYHTSPMIGQFSGTDLPAALLSTTHETLIHFYSDHSQNRQGFKLAYQAYELQNCPDPPPFQNGYMINSDYSVGQSVSFECYPGYILIGHPVLTCQHGINRNWNYPFPRCDAPCGYNVTSQNGTIYSPGFPDEYPILKDCIWLITVPPGHGVYINFTLLQTEAVNDYIAVWDGPDQNSPQLGVFSGNTALETAYSSTNQVLLKFHSDFSNGGFFVLNFHAFQLKKCQPPPAVPQAEMLTEDDDFEIGDFVKYQCHPGYTLVGTDILTCKLSSQLQFEGSLPTCEAQCPANEVRTGSSGVILSPGYPGNYFNSQTCSWSIKVEPNYNITIFVDTFQSEKQFDALEVFDGSSGQSPLLVVLSGNHTEQSNFTSRSNQLYLRWSTDHATSKKGFKIRYAAPYCSLTHPLKNGGILNRTAGAVGSKVHYFCKPGYRMVGHSNATCRRNPLGMYQWDSLTPLCQAVSCGIPESPGNGSFTGNEFTLDSKVVYECHEGFKLESSQQATAVCQEDGLWSNKGKPPTCKPVACPSIEAQLSEHVIWRLVSGSLNEYGAQVLLSCSPGYYLEGWRLLRCQANGTWNIGDERPSCRVISCGSLSFPPNGNKIGTLTVYGATAIFTCNTGYTLVGSHVRECLANGLWSGSETRCLAGHCGSPDPIVNGHISGDGFSYRDTVVYQCNPGFRLVGTSVRICLQDHKWSGQTPVCVPITCGHPGNPAHGFTNGSEFNLNDVVNFTCNTGYLLQGVSRAQCRSNGQWSSPLPTCRVVNCSDPGFVENAIRHGQQNFPESFEYGMSILYHCKKGFYLLGSSALTCMANGLWDRSLPKCLAISCGHPGVPANAVLTGELFTYGAVVHYSCRGSESLIGNDTRVCQEDSHWSGALPHCTGNNPGFCGDPGTPAHGSRLGDDFKTKSLLRFSCEMGHQLRGSPERTCLLNGSWSGLQPVCEAVSCGNPGTPTNGMIVSSDGILFSSSVIYACWEGYKTSGLMTRHCTANGTWTGTAPDCTIISCGDPGTLANGIQFGTDFTFNKTVSYQCNPGYVMEAVTSATIRCTKDGRWNPSKPVCKAVLCPQPPPVQNGTVEGSDFRWGSSISYSCMDGYQLSHSAILSCEGRGVWKGEIPQCLPVFCGDPGIPAEGRLSGKSFTYKSEVFFQCKSPFILVGSSRRVCQADGTWSGIQPTCIDPAHNTCPDPGTPHFGIQNSSRGYEVGSTVFFRCRKGYHIQGSTTRTCLANLTWSGIQTECIPHACRQPETPAHADVRAIDLPTFGYTLVYTCHPGFFLAGGSEHRTCKADMKWTGKSPVCKSKGVREVNETVTKTPVPSDVFFVNSLWKGYYEYLGKRQPATLTVDWFNATSSKVNATFSEASPVELKLTGIYKKEEAHLLLKAFQIKGQADIFVSKFENDNWGLDGYVSSGLERGGFTFQGDIHGKDFGKFKLERQDPLNPDQDSSSHYHGTSSGSVAAAILVPFFALILSGFAFYLYKHRTRPKVQYNGYAGHENSNGQASFENPMYDTNLKPTEAKAVRFDTTLNTVCTVV 2 3487 3509 SSHYHGTSSG SVAAAILVPFFALILSGFAFYLY KHRTRPKVQY +O95196 MGRAGGGGPGRGPPPLLLFLGAALVLASGAVPAREAGSAVEAEELVKGSPAWEPPANDTREEAGPPAAGEDEASWTAPGGELAGPEEVLQESAAVTGTAWLEADSPGLGGVTAEAGSGDAQALPATLQAPHEVLGQSIMPPAIPEATEASGPPSPTPGDKLSPASELPKESPLEVWLNLGGSTPDPQGPELTYPFQGTLEPQPASDIIDIDYFEGLDGEGRGADLGSFPGSPGTSENHPDTEGETPSWSLLDLYDDFTPFDESDFYPTTSFYDDLDEEEEEEEDDKDAVGGGDLEDENELLVPTGKPGLGPGTGQPTSRWHAVPPQHTLGSVPGSSIALRPRPGEPGRDLASSENGTECRSGFVRHNGSCRSVCDLFPSYCHNGGQCYLVENIGAFCRCNTQDYIWHKGMRCESIITDFQVMCVAVGSAALVLLLLFMMTVFFAKKLYLLKTENTKLRRTNKFRTPSELHNDNFSLSTIAEGSHPNVRKLCNTPRTSSPHARALAHYDNVICQDDPSAPHKIQEVLKSCLKEEESFNIQNSMSPKLEGGKGDQADLDVNCLQNNLT 2 421 443 RCESIITDFQ VMCVAVGSAALVLLLLFMMTVFF AKKLYLLKTE +P16410 MACLGFQRHKAQLNLATRTWPCTLLFFLLFIPVFCKAMHVAQPAVVLASSRGIASFVCEYASPGKATEVRVTVLRQADSQVTEVCAATYMMGNELTFLDDSICTGTSSGNQVNLTIQGLRAMDTGLYICKVELMYPPPYYLGIGNGTQIYVIDPEPCPDSDFLLWILAAVSSGLFFYSFLLTAVSLSKMLKKRSPLTTGVYVKMPPTEPECEKQFQPYFIPIN 2 162 184 IDPEPCPDSD FLLWILAAVSSGLFFYSFLLTAV SLSKMLKKRS +Q86XM0 MLMLMLVAAVTMWLRPLVTAQLCRSRTVRTGKVFNLIQDVQGDRLYFHPTTTRLIKHPCEKNIALYLGKQVFFTMDNFETSLLPFTIPTSMQVGVPEVTSAHFAGSLLLLVVDQKVYIYDYENNSWSMSLGIKHPVTHVSGDNCCYTGSLFCVHVSNLVFAYFRGDQISQTYIYYSNTGGFSFWKYHYDRQAEIIGSLGGIFHFFSLSQVAMLVVNQGKGMFKYSDHPLNRSFGLSFDYNGTLDILIAPGQRGILLLWFENSLLFSHNAGQLVDTVRVKKGDQTLFSSIFEAKITIHNIAVTENELAVITREDNLYYGNLGIVPSSIIKFADQYIWSEDVALMFRSPGTLEILTPLRDTAFPAFDFQKCLVNIQALLMDPELHVGKCKIEFLTGEFIYRMYTIDMHSQLELTASLIPQPGTSLIPLVMVSNPHSLGFQATFYENGYTSDGNTKYKLDIFLKQQQHWGRTDSNFTSSLKKATMSTLTVDIANKEISCVDIKPLSTLISVGCDLDKKIVIQNKVSACSMGILDPLTLQDNYSFIIEKEFYDPGFQGQQSSEDLHVFYSYQQLGCPLLVYYDTLWKPVVELWRKDSFQEVIDAEYVLLEVNGQFSYSYSLTAQSAMCTSQPQNWTTMIKEFGGPFFWNRENYVSCHDPNNNAPLRWPDVQYQILGGRTANQIIFGHNGFYVFYISIVDPYYSYCQLETIFSIYVYGAFPVQLVSAGVVILLIISSILGSVWLAYKTPKLLRTARGRRIKKCATQLCRRCKTVCQFRASATARAGTEPPGRHRTPHGGRSDH 2 719 741 IYVYGAFPVQ LVSAGVVILLIISSILGSVWLAY KTPKLLRTAR +E9Q9F6 MLVLMLAAAVATMVRAHTLCRVHTVRTGKVFKSNIQLQGDPLFYAFPNTFVLKNVCKADISVYLGQKVFLTIDNFESSLLPLTVPKSLAVGVPSITSAHFVSGSLVLFVISGKGYSYDYYENTWRKLEGISEPVSHISGDVCCFKGSFCLELSNNLFAYLRGGQIPGTNIYFSDNGGFSFQLMNTDKLSHLTGTLGGIFHLHSMSQVGVLMVENNLGTFHYMEYPLNHSMGIAFSYKNLLEVIMKPYQRGFMVLWNQKSILVSSNSGQIVEHVRLIDQKIFTDLDVEHANINIYSVASNAYELAFLVAEDHLYYGSQSYMGTYVIKLPHQPLWSTHTSIYFEDIGILQVLTPVADPHFAAYDFDKCTVNVQSSLMDEKLALQPCNVELLESTMINTMFTIDMNSKLKLSALMIPRKGENPTPLVMVSNPHALGFKANLNEFGNTFDGNSKYKLDIELKQQHHWGNSDFNFTASIKRHAISSVTVDIADKTLSCVDLKPLSTLISVGCDMTKKIVVQNKISACTMGILNPVQLQKNYTYTIEKEAYDPINHNGEAQDDLIVFYEYKDLGCPRLVYYDKPWKPVVELWKNGIVEEIMNAEYVISEINGLVTYSYSLTAATANCRSQPQNWSTFESDIENEEPFLWNRENYVSCHEDNKDNPLLWPNVEYQVLGGQTNNKIIFGQRNGIYTFHLSVVDPYYSYCNLNTIFSVYVHGALPVTKFQPLLTILLMVTTTLLTAWLAYAIPKQLRSEKGQRLLGFCYQILQLCLGVCFCTWLRGKLRQWLRPRRVKDQNRGKVRVAQKHPET 2 755 777 KQLRSEKGQR LLGFCYQILQLCLGVCFCTWLRG KLRQWLRPRR +Q5SY80 MSAREVAVLLLWLSCYGSALWRYSTNSPNYRIFSTRSTIKLEYEGTLFTEWSVPETCFVLNKSSPTTELRCSSPGVHAIKPIVTGPDEEERYLFVESSHTCFLWYYRVRHFFNNFTQLITVWAYDPESADPDELLGNAEEPSINSIVLSTQMATLGQKPVIHTVLKRKVYSSNEKMRRGTWRIVVPMTKDDALKEIRGNQVTFQDCFIADFLILLTFPLLTIPEIPGYLPISSPRGSQLMASWDACVVASAVLVTDMETFHTTDSFKSWTRIRVPPDILSDDERRSVAHVILSRDGIVFLINGVLYIKSFRGFIRLGGIVNLPDGGITGISSRKWCWVNYLLKAKGRRSTFAVWTENEIYLGSILLKFARLVTTTELKNILSLSVTATLTIDRVEYTGHPLEIAVFLNYCTVCNVTKKIFLVIYNEDTKQWVSQDFTLDAPIDSVTMPHFTFSALPGLLLWNKHSIYYCYHNFTFTGILQTPAGHGNLSMLSNDSIIHEVFIDYYGDILVKMENNVIFYSKINTRDAVKLHLWTNYTTRAFIFLSTSGQTYFLYALDDGTIQIQDYPLHLEAQSIAFTTKDKCPYMAFHNNVAHVFYFLDKGEALTVWTQIVYPENTGLYVIVESYGPKILQESHEISFEAAFGYCTKTLTLTFYQNVDYERISDYFETQDKHTGLVLVQFRPSEYSKACPIAQKVFQIAVGCDDKKFIAIKGFSKKGCHHHDFSYVIEKSYLRHQPSKNLRVRYIWGEYGCPLRLDFTEKFQPVVQLFDDNGYVKDVEANFIVWEIHGRDDYSFNNTMAQSGCLHEAQTWKSMIELNKHLPLEEVWGPENYKHCFSYAIGKPGDLNQPYEIINSSNGNHIFWPMGHSGMYVFRVKILDPNYSFCNLTAMFAIETFGLIPSPSVYLVASFLFVLMLLFFTILVLSYFRYMRIYRRYIYEPLHKPQRKRKKN 2 905 927 TFGLIPSPSV YLVASFLFVLMLLFFTILVLSYF RYMRIYRRYI +Q6ZRH7 MCGPAMFPAGPPWPRVRVVQVLWALLAVLLASWRLWAIKDFQECTWQVVLNEFKRVGESGVSDSFFEQEPVDTVSSLFHMLVDSPIDPSEKYLGFPYYLKINYSCEEKPSEDLVRMGHLTGLKPLVLVTFQSPVNFYRWKIEQLQIQMEAAPFRSKEPCMAEEVCSMSWYTPMPIKKGSVVMRVDISSNGLGTFIPDKRFQMNINGFLKRDRDNNIQFTVGEELFNLMPQYFVGVSSRPLWHTVDQSPVLILGGIPNEKYVLMTDTSFKDFSLVELSIDSCWVGSFYCPHSGFTATIYDTIATESTLFIRQNQLVYYFTGTYTTLYERNRGSGSWIRVLASECIKKLCPVYFHSNGSEYIMALTTGKHEGYVHFGTIRDGQVSFEMLPRQWSVCEQIGVTTCSIIWSEYIAGEYTLLLLVESGYGNASKRFQVVSYNTASDDLELLYHIPEFIPEARGLEFLMILGTESYTSTAMAPKGIFCNPYNNLIFIWGNFLLQSSNKENFIYLADFPKELSIKYMARSFRGAVAIVTETEEIWYLLEGSYRVYQLFPSKGWQVHISLKLMQQSSLYASNETMLTLFYEDSKLYQLVYLMNNQKGQLVKRLVPVEQLLMYQQHTSHYDLERKGGYLMLSFIDFCPFSVMRLRSLPSPQRYTRQERYRARPPRVLERSGFHNENSLAIYQGLVYYLLWLHSVYDKPYADPVHDPTWRWWANNKQDQDYYFFLASNWRSAGGVSIEMDSYEKIYNLESAYELPERIFLDKGTEYSFAIFLSAQGHSFRTQSELGTAFQLHSQVDVGVVLADPGCIEASVKQEVLINRNSVLFSITLKDKKLCYDQGISGHHLMETSMTVNVVGSSGLCFQETHLGPHMQGNLMVPVFIGCPPGKRLAFDITYTLEYSRLKNKHYFDCVNVNPEMPCFLFRDIFYPFFLIQDLVTGDSGSFQGSYVLLVVGGGPTLDSLKDYSEDEIYRFNSPLDKTNSLIWTTRTTRTTKDSAFHIMSHESPGIEWLCLENAPCYDNVPQGIFAPEFFFKVLVSNRGVDTSTYCNYQLTFLLHIHGLPLSPKRALFIIMVSASVFVGLVIFYIAFCLLWPLVVKGCTMIRWKINNLIASESYYTYASISGISSMPSLRHSRMGSMFSSRMTEDRAEPKEAVERQLMT 2 1073 1095 PKRALFIIMV SASVFVGLVIFYIAFCLLWPLVV KGCTMIRWKI +Q86UP6 MELVRRLMPLTLLILSCLAELTMAEAEGNASCTVSLGGANMAETHKAMILQLNPSENCTWTIERPENKSIRIIFSYVQLDPDGSCESENIKVFDGTSSNGPLLGQVCSKNDYVPVFESSSSTLTFQIVTDSARIQRTVFVFYYFFSPNISIPNCGGYLDTLEGSFTSPNYPKPHPELAYCVWHIQVEKDYKIKLNFKEIFLEIDKQCKFDFLAIYDGPSTNSGLIGQVCGRVTPTFESSSNSLTVVLSTDYANSYRGFSASYTSIYAENINTTSLTCSSDRMRVIISKSYLEAFNSNGNNLQLKDPTCRPKLSNVVEFSVPLNGCGTIRKVEDQSITYTNIITFSASSTSEVITRQKQLQIIVKCEMGHNSTVEIIYITEDDVIQSQNALGKYNTSMALFESNSFEKTILESPYYVDLNQTLFVQVSLHTSDPNLVVFLDTCRASPTSDFASPTYDLIKSGCSRDETCKVYPLFGHYGRFQFNAFKFLRSMSSVYLQCKVLICDSSDHQSRCNQGCVSRSKRDISSYKWKTDSIIGPIRLKRDRSASGNSGFQHETHAEETPNQPFNSVHLFSFMVLALNVVTVATITVRHFVNQRADYKYQKLQNY 2 571 593 TPNQPFNSVH LFSFMVLALNVVTVATITVRHFV NQRADYKYQK +Q5JRM2 MNLVICVLLLSIWKNNCMTTNQTNGSSTTGDKPVESMQTKLNYLRRNLLILVGIIIMVFVFICFCYLHYNCLSDDASKAGMVKKKGIAAKSSKTSFSEAKTASQCSPETQPMLSTADKSSDSSSPERASAQSSTEKLIRPSSLQKPSIPNSAGKLTRPSYPKRSSKSSCSKKLSKSSHLEKAHKKGSLEKLCKLDYACKLASSDKPVRPPQLFKPLYSSHPQNEISPSKPFGPQELAKPPKHFNPKRSVSLGRAALLSNSELAETCQPYKKKHLVAKTYRPLVNDISEAKEKNTQNLHVSSKVKSSSRSFRKLDSRNNAYGDHVNDSDTMKYYSEVDSDKVIIITCDRGYNQVTSEVTLND 2 48 70 QTKLNYLRRN LLILVGIIIMVFVFICFCYLHYN CLSDDASKAG +Q96J86 MDAPRLPVRPGVLLPKLVLLFVYADDCLAQCGKDCKSYCCDGTTPYCCSYYAYIGNILSGTAIAGIVFGIVFIMGVIAGIAICICMCMKNHRATRVGILRTTHINTVSSYPGPPPYGHDHEMEYCADLPPPYSPTPQGPAQRSPPPPYPGNARK 2 63 85 YIGNILSGTA IAGIVFGIVFIMGVIAGIAICIC MCMKNHRATR +Q61476 MVSSTWGYDPRAGAGDLVITTTAAGAVTIAVLLFQTVCGDCGPPPDIPNARPILGRHSKFAEQSKVAYSCNNGFKQVPDKSNIVVCLENGQWSSHETFCEKSCDTPERLSFASLKKEYFNMNFFPVGTIVEYECRPGFRKQPSLSGKSTCLEDLVWSPVAQFCKKKSCPNPKDLDNGHINIPTGILFGSEINFSCNPGYRLVGITSILCTIIGNTVDWDDEFPVCTEIFCPDPPKINNGIMRGESDSYKYSQVVIYSCDKGFILFGNSTIYCTVSKSDVGQWSSPPPQCIEESKVPIKKPVVNVPSTGIPSTPQKPTTESVPNPGDQPTPQKPSTVKVPATQHEPDTTTRTSTDKGESNSGGDRYIYGFVAVIAMIDSLIIVKTLWTILSPNRRSDFQGKERKDVSK 2 367 389 ESNSGGDRYI YGFVAVIAMIDSLIIVKTLWTIL SPNRRSDFQG +Q8N8Z6 MVPGARGGGALARAAGRGLLALLLAVSAPLRLQAEELGDGCGHLVTYQDSGTMTSKNYPGTYPNHTVCEKTITVPKGKRLILRLGDLDIESQTCASDYLLFTSSSDQYGPYCGSMTVPKELLLNTSEVTVRFESGSHISGRGFLLTYASSDHPDLITCLERASHYLKTEYSKFCPAGCRDVAGDISGNMVDGYRDTSLLCKAAIHAGIIADELGGQISVLQRKGISRYEGILANGVLSRDGSLSDKRFLFTSNGCSRSLSFEPDGQIRASSSWQSVNESGDQVHWSPGQARLQDQGPSWASGDSSNNHKPREWLEIDLGEKKKITGIRTTGSTQSNFNFYVKSFVMNFKNNNSKWKTYKGIVNNEEKVFQGNSNFRDPVQNNFIPPIVARYVRVVPQTWHQRIALKVELIGCQITQGNDSLVWRKTSQSTSVSTKKEDETITRPIPSEETSTGINITTVAIPLVLLVVLVFAGMGIFAAFRKKKKKGSPYGSAEAQKTDCWKQIKYPFARHQSAEFTISYDNEKEMTQKLDLITSDMADYQQPLMIGTGTVTRKGSTFRPMDTDAEEAGVSTDAGGHYDCPQRAGRHEYALPLAPPEPEYATPIVERHVLRAHTFSAQSGYRVPGPQPGHKHSLSSGGFSPVAGVGAQDGDYQRPHSAQPADRGYDRPKAVSALATESGHPDSQKPPTHPGTSDSYSAPRDCLTPLNQTAMTALL 2 458 480 EETSTGINIT TVAIPLVLLVVLVFAGMGIFAAF RKKKKKGSPY +Q96PD2 MASRAVVRARRCPQCPQVRAAAAAPAWAALPLSRSLPPCSNSSSFSMPLFLLLLLVLLLLLEDAGAQQGDGCGHTVLGPESGTLTSINYPQTYPNSTVCEWEIRVKMGERVRIKFGDFDIEDSDSCHFNYLRIYNGIGVSRTEIGKYCGLGLQMNHSIESKGNEITLLFMSGIHVSGRGFLASYSVIDKQDLITCLDTASNFLEPEFSKYCPAGCLLPFAEISGTIPHGYRDSSPLCMAGVHAGVVSNTLGGQISVVISKGIPYYESSLANNVTSVVGHLSTSLFTFKTSGCYGTLGMESGVIADPQITASSVLEWTDHTGQENSWKPKKARLKKPGPPWAAFATDEYQWLQIDLNKEKKITGIITTGSTMVEHNYYVSAYRILYSDDGQKWTVYREPGVEQDKIFQGNKDYHQDVRNNFLPPIIARFIRVNPTQWQQKIAMKMELLGCQFIPKGRPPKLTQPPPPRNSNDLKNTTAPPKIAKGRAPKFTQPLQPRSSNEFPAQTEQTTASPDIRNTTVTPNVTKDVALAAVLVPVLVMVLTTLILILVCAWHWRNRKKKTEGTYDLPYWDRAGWWKGMKQFLPAKAVDHEETPVRYSSSEVNHLSPREVTTVLQADSAEYAQPLVGGIVGTLHQRSTFKPEEGKEAGYADLDPYNSPGQEVYHAYAEPLPITGPEYATPIIMDMSGHPTTSVGQPSTSTFKATGNQPPPLVGTYNTLLSRTDSCSSAQAQYDTPKAGKPGLPAPDELVYQVPQSTQEVSGAGRDGECDVFKEIL 2 527 549 TTVTPNVTKD VALAAVLVPVLVMVLTTLILILV CAWHWRNRKK +Q16832 MILIPRMLLVLFLLLPILSSAKAQVNPAICRYPLGMSGGQIPDEDITASSQWSESTAAKYGRLDSEEGDGAWCPEIPVEPDDLKEFLQIDLHTLHFITLVGTQGRHAGGHGIEFAPMYKINYSRDGTRWISWRNRHGKQVLDGNSNPYDIFLKDLEPPIVARFVRFIPVTDHSMNVCMRVELYGCVWLDGLVSYNAPAGQQFVLPGGSIIYLNDSVYDGAVGYSMTEGLGQLTDGVSGLDDFTQTHEYHVWPGYDYVGWRNESATNGYIEIMFEFDRIRNFTTMKVHCNNMFAKGVKIFKEVQCYFRSEASEWEPNAISFPLVLDDVNPSARFVTVPLHHRMASAIKCQYHFADTWMMFSEITFQSDAAMYNNSEALPTSPMAPTTYDPMLKVDDSNTRILIGCLVAIIFILLAIIVIILWRQFWQKMLEKASRRMLDDEMTVSLSLPSDSSMFNNNRSSSPSEQGSNSTYDRIFPLRPDYQEPSRLIRKLPEFAPGEEESGCSGVVKPVQPSGPEGVPHYAEADIVNLQGVTGGNTYSVPAVTMDLLSGKDVAVEEFPRKLLTFKEKLGEGQFGEVHLCEVEGMEKFKDKDFALDVSANQPVLVAVKMLRADANKNARNDFLKEIKIMSRLKDPNIIHLLAVCITDDPLCMITEYMENGDLNQFLSRHEPPNSSSSDVRTVSYTNLKFMATQIASGMKYLSSLNFVHRDLATRNCLVGKNYTIKIADFGMSRNLYSGDYYRIQGRAVLPIRWMSWESILLGKFTTASDVWAFGVTLWETFTFCQEQPYSQLSDEQVIENTGEFFRDQGRQTYLPQPAICPDSVYKLMLSCWRRDTKNRPSFQEIHLLLLQQGDE 2 399 421 PMLKVDDSNT RILIGCLVAIIFILLAIIVIILW RQFWQKMLEK +P80370 MTATEALLRVLLLLLAFGHSTYGAECFPACNPQNGFCEDDNVCRCQPGWQGPLCDQCVTSPGCLHGLCGEPGQCICTDGWDGELCDRDVRACSSAPCANNRTCVSLDDGLYECSCAPGYSGKDCQKKDGPCVINGSPCQHGGTCVDDEGRASHASCLCPPGFSGNFCEIVANSCTPNPCENDGVCTDIGGDFRCRCPAGFIDKTCSRPVTNCASSPCQNGGTCLQHTQVSYECLCKPEFTGLTCVKKRALSPQQVTRLPSGYGLAYRLTPGVHELPVQQPEHRILKVSMKELNKKTPLLTEGQAICFTILGVLTSLVVLGTVGIVFLNKCETWVSNLRYNHMLRKKKNLLLQYNSGEDLAVNIIFPEKIDMTTFSKEAGDEEI 2 305 327 KTPLLTEGQA ICFTILGVLTSLVVLGTVGIVFL NKCETWVSNL +Q6UY11 MPSGCRCLHLVCLLCILGAPGQPVRADDCSSHCDLAHGCCAPDGSCRCDPGWEGLHCERCVRMPGCQHGTCHQPWQCICHSGWAGKFCDKDEHICTTQSPCQNGGQCMYDGGGEYHCVCLPGFHGRDCERKAGPCEQAGSPCRNGGQCQDDQGFALNFTCRCLVGFVGARCEVNVDDCLMRPCANGATCLDGINRFSCLCPEGFAGRFCTINLDDCASRPCQRGARCRDRVHDFDCLCPSGYGGKTCELVLPVPDPPTTVDTPLGPTSAVVVPATGPAPHSAGAGLLRISVKEVVRRQEAGLGEPSLVALVVFGALTAALVLATVLLTLRAWRRGVCPPGPCCYPAPHYAPACQDQECQVSMLPAGLPLPRDLPPEPGKTTAL 2 307 329 RQEAGLGEPS LVALVVFGALTAALVLATVLLTL RAWRRGVCPP +P28068 MITFLPLLLGLSLGCTGAGGFVAHVESTCLLDDAGTPKDFTYCISFNKDLLTCWDPEENKMAPCEFGVLNSLANVLSQHLNQKDTLMQRLRNGLQNCATHTQPFWGSLTNRTRPPSVQVAKTTPFNTREPVMLACYVWGFYPAEVTITWRKNGKLVMPHSSAHKTAQPNGDWTYQTLSHLALTPSYGDTYTCVVEHIGAPEPILRDWTPGLSPMQTLKVSVSAVTLGLGLIIFSLGVISWRRAGHSSYTPLPGSNYSEGWHIS 2 219 238 PGLSPMQTLK VSVSAVTLGLGLIIFSLGVI SWRRAGHSSY +Q96KC8 MTAPCSQPAQLPGRRQLGLVPFPPPPPRTPLLWLLLLLLAAVAPARGWESGDLELFDLVEEVQLNFYQFLGVQQDASSADIRKAYRKLSLTLHPDKNKDENAETQFRQLVAIYEVLKDDERRQRYDDILINGLPDWRQPVFYYRRVRKMSNAELALLLFIILTVGHYAVVWSIYLEKQLDELLSRKKREKKKKTGSKSVDVSKLGASEKNERLLMKPQWHDLLPCKLGIWFCLTLKALPHLIQDAGQFYAKYKETRLKEKEDALTRTELETLQKQKKVKKPKPEFPVYTPLETTYIQSYDHGTSIEEIEEQMDDWLENRNRTQKKQAPEWTEEDLSQLTRSMVKFPGGTPGRWEKIAHELGRSVTDVTTKAKQLKDSVTCSPGMVRLSELKSTVQNSRPIKTATTLPDDMITQREDAEGVAAEEEQEGDSGEQETGATDARPRRRKPARLLEATAKPEPEEKSRAKRQKDFDIAEQNESSDEESLRKERARSAEEPWTQNQQKLLELALQQYPRGSSDRWDKIARCVPSKSKEDCIARYKLLVELVQKKKQAKS 2 153 175 YRRVRKMSNA ELALLLFIILTVGHYAVVWSIYL EKQLDELLSR +P20036 MRPEDRMFHIRAVILRALSLAFLLSLRGAGAIKADHVSTYAAFVQTHRPTGEFMFEFDEDEMFYVDLDKKETVWHLEEFGQAFSFEAQGGLANIAILNNNLNTLIQRSNHTQATNDPPEVTVFPKEPVELGQPNTLICHIDKFFPPVLNVTWLCNGELVTEGVAESLFLPRTDYSFHKFHYLTFVPSAEDFYDCRVEHWGLDQPLLKHWEAQEPIQMPETTETVLCALGLVLGLVGIIVGTVLIIKSLRSGHDPRAQGTL 2 223 245 EPIQMPETTE TVLCALGLVLGLVGIIVGTVLII KSLRSGHDPR +P01903 MAISGVPVLGFFIIAVLMSAQESWAIKEEHVIIQAEFYLNPDQSGEFMFDFDGDEIFHVDMAKKETVWRLEEFGRFASFEAQGALANIAVDKANLEIMTKRSNYTPITNVPPEVTVLTNSPVELREPNVLICFIDKFTPPVVNVTWLRNGKPVTTGVSETVFLPREDHLFRKFHYLPFLPSTEDVYDCRVEHWGLDEPLLKHWEFDAPSPLPETTENVVCALGLTVGLVGIIIGTIFIIKGLRKSNAAERRGPL 2 217 239 APSPLPETTE NVVCALGLTVGLVGIIIGTIFII KGLRKSNAAE +P13762 MVCLKLPGGSCMAALTVTLTVLSSPLALAGDTQPRFLEQAKCECHFLNGTERVWNLIRYIYNQEEYARYNSDLGEYQAVTELGRPDAEYWNSQKDLLERRRAEVDTYCRYNYGVVESFTVQRRVQPKVTVYPSKTQPLQHHNLLVCSVNGFYPGSIEVRWFRNGQEEKAGVVSTGLIQNGDWTFQTLVMLETVPRSGEVYTCQVEHPSMMSPLTVQWSARSESAQSKMLSGVGGFVLGLLFLGTGLFIYFRNQKGHSGLQPTGLLS 2 228 250 SARSESAQSK MLSGVGGFVLGLLFLGTGLFIYF RNQKGHSGLQ +Q08554 MALASAAPGSIFCKQLLFSLLVLTLLCDACQKVYLRVPSHLQAETLVGKVNLEECLKSASLIRSSDPAFRILEDGSIYTTHDLILSSERKSFSIFLSDGQRREQQEIKVVLSARENKSPKKRHTKDTALKRSKRRWAPIPASLMENSLGPFPQHVQQIQSDAAQNYTIFYSISGPGVDKEPFNLFYIEKDTGDIFCTRSIDREKYEQFALYGYATTADGYAPEYPLPLIIKIEDDNDNAPYFEHRVTIFTVPENCRSGTSVGKVTATDLDEPDTLHTRLKYKILQQIPDHPKHFSIHPDTGVITTTTPFLDREKCDTYQLIMEVRDMGGQPFGLFNTGTITISLEDENDNPPSFTETSYVTEVEENRIDVEILRMKVQDQDLPNTPHSKAVYKILQGNENGNFIISTDPNTNEGVLCVVKPLNYEVNRQVILQVGVINEAQFSKAASSQTPTMCTTTVTVKIIDSDEGPECHPPVKVIQSQDGFPAGQELLGYKALDPEISSGEGLRYQKLGDEDNWFEINQHTGDLRTLKVLDRESKFVKNNQYNISVVAVDAVGRSCTGTLVVHLDDYNDHAPQIDKEVTICQNNEDFAVLKPVDPDGPENGPPFQFFLDNSASKNWNIEEKDGKTAILRQRQNLDYNYYSVPIQIKDRHGLVATHMLTVRVCDCSTPSECRMKDKSTRDVRPNVILGRWAILAMVLGSVLLLCILFTCFCVTAKRTVKKCFPEDIAQQNLIVSNTEGPGEEVTEANIRLPMQTSNICDTSMSVGTVGGQGIKTQQSFEMVKGGYTLDSNKGGGHQTLESVKGVGQGDTGRYAYTDWQSFTQPRLGEKVYLCGQDEEHKHCEDYVCSYNYEGKGSLAGSVGCCSDRQEEEGLEFLDHLEPKFRTLAKTCIKK 2 692 714 DVRPNVILGR WAILAMVLGSVLLLCILFTCFCV TAKRTVKKCF +Q19T08 MGTAGAMQLCWVILGFLLFRGHNSQPTMTQTSSSQGGLGGLSLTTEPVSSNPGYIPSSEANRPSHLSSTGTPGAGVPSSGRDGGTSRDTFQTVPPNSTTMSLSMREDATILPSPTSETVLTVAAFGVISFIVILVVVVIILVGVVSLRFKCRKSKESEDPQKPGSSGLSESCSTANGEKDSITLISMKNINMNNGKQSLSAEKVL 2 123 145 SPTSETVLTV AAFGVISFIVILVVVVIILVGVV SLRFKCRKSK +Q9UNE0 MAHVGDCTQTPWLPVLVVSLMCSARAEYSNCGENEYYNQTTGLCQECPPCGPGEEPYLSCGYGTKDEDYGCVPCPAEKFSKGGYQICRRHKDCEGFFRATVLTPGDMENDAECGPCLPGYYMLENRPRNIYGMVCYSCLLAPPNTKECVGATSGASANFPGTSGSSTLSPFQHAHKELSGQGHLATALIIAMSTIFIMAIAIVLIIMFYILKTKPSAPACCTSHPGKSVEAQVSKDEEKKEAPDNVVMFSEKDEFEKLTATPAKPTKSENDASSENEQLLSRSVDSDEEPAPDKQGSPELCLLSLVHLAREKSATSNKSAGIQSRRKKILDVYANVCGVVEGLSPTELPFDCLEKTSRMLSSTYNSEKAVVKTWRHLAESFGLKRDEIGGMTDGMQLFDRISTAGYSIPELLTKLVQIERLDAVESLCADILEWAGVVPPASQPHAAS 2 188 210 LSGQGHLATA LIIAMSTIFIMAIAIVLIIMFYI LKTKPSAPAC +P00533 MRPSGTAGAALLALLAALCPASRALEEKKVCQGTSNKLTQLGTFEDHFLSLQRMFNNCEVVLGNLEITYVQRNYDLSFLKTIQEVAGYVLIALNTVERIPLENLQIIRGNMYYENSYALAVLSNYDANKTGLKELPMRNLQEILHGAVRFSNNPALCNVESIQWRDIVSSDFLSNMSMDFQNHLGSCQKCDPSCPNGSCWGAGEENCQKLTKIICAQQCSGRCRGKSPSDCCHNQCAAGCTGPRESDCLVCRKFRDEATCKDTCPPLMLYNPTTYQMDVNPEGKYSFGATCVKKCPRNYVVTDHGSCVRACGADSYEMEEDGVRKCKKCEGPCRKVCNGIGIGEFKDSLSINATNIKHFKNCTSISGDLHILPVAFRGDSFTHTPPLDPQELDILKTVKEITGFLLIQAWPENRTDLHAFENLEIIRGRTKQHGQFSLAVVSLNITSLGLRSLKEISDGDVIISGNKNLCYANTINWKKLFGTSGQKTKIISNRGENSCKATGQVCHALCSPEGCWGPEPRDCVSCRNVSRGRECVDKCNLLEGEPREFVENSECIQCHPECLPQAMNITCTGRGPDNCIQCAHYIDGPHCVKTCPAGVMGENNTLVWKYADAGHVCHLCHPNCTYGCTGPGLEGCPTNGPKIPSIATGMVGALLLLLVVALGIGLFMRRRHIVRKRTLRRLLQERELVEPLTPSGEAPNQALLRILKETEFKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVDNPHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDRRLVHRDLAARNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIYTHQSDVWSYGVTVWELMTFGSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCWMIDADSRPKFRELIIEFSKMARDPQRYLVIQGDERMHLPSPTDSNFYRALMDEEDMDDVVDADEYLIPQQGFFSSPSTSRTPLLSSLSATSNNSTVACIDRNGLQSCPIKEDSFLQRYSSDPTGALTEDSIDDTFLPVPEYINQSVPKRPAGSVQNPVYHNQPLNPAPSRDPHYQDPHSTAVGNPEYLNTVQPTCVNSTFDSPAHWAQKGSHQISLDNPDYQQDFFPKEAKPNGIFKGSTAENAEYLRVAPQSSEFIGA 2 646 668 CPTNGPKIPS IATGMVGALLLLLVVALGIGLFM RRRHIVRKRT +P01133 MLLTLIILLPVVSKFSFVSLSAPQHWSCPEGTLAGNGNSTCVGPAPFLIFSHGNSIFRIDTEGTNYEQLVVDAGVSVIMDFHYNEKRIYWVDLERQLLQRVFLNGSRQERVCNIEKNVSGMAINWINEEVIWSNQQEGIITVTDMKGNNSHILLSALKYPANVAVDPVERFIFWSSEVAGSLYRADLDGVGVKALLETSEKITAVSLDVLDKRLFWIQYNREGSNSLICSCDYDGGSVHISKHPTQHNLFAMSLFGDRIFYSTWKMKTIWIANKHTGKDMVRINLHSSFVPLGELKVVHPLAQPKAEDDTWEPEQKLCKLRKGNCSSTVCGQDLQSHLCMCAEGYALSRDRKYCEDVNECAFWNHGCTLGCKNTPGSYYCTCPVGFVLLPDGKRCHQLVSCPRNVSECSHDCVLTSEGPLCFCPEGSVLERDGKTCSGCSSPDNGGCSQLCVPLSPVSWECDCFPGYDLQLDEKSCAASGPQPFLLFANSQDIRHMHFDGTDYGTLLSQQMGMVYALDHDPVENKIYFAHTALKWIERANMDGSQRERLIEEGVDVPEGLAVDWIGRRFYWTDRGKSLIGRSDLNGKRSKIITKENISQPRGIAVHPMAKRLFWTDTGINPRIESSSLQGLGRLVIASSDLIWPSGITIDFLTDKLYWCDAKQSVIEMANLDGSKRRRLTQNDVGHPFAVAVFEDYVWFSDWAMPSVMRVNKRTGKDRVRLQGSMLKPSSLVVVHPLAKPGADPCLYQNGGCEHICKKRLGTAWCSCREGFMKASDGKTCLALDGHQLLAGGEVDLKNQVTPLDILSKTRVSEDNITESQHMLVAEIMVSDQDDCAPVGCSMYARCISEGEDATCQCLKGFAGDGKLCSDIDECEMGVPVCPPASSKCINTEGGYVCRCSEGYQGDGIHCLDIDECQLGEHSCGENASCTNTEGGYTCMCAGRLSEPGLICPDSTPPPHLREDDHHYSVRNSDSECPLSHDGYCLHDGVCMYIEALDKYACNCVVGYIGERCQYRDLKWWELRHAGHGQQQKVIVVAVCVVVLVMLLLLSLWGAHYYRTQKLLSKNPKNPYEESSRDVRSRRPADTEDGMSSCPQPWFVVIKEHQDLKNGGQPVAGEDGQAADGSMQPTSWRQEPQLCGMGTEQGCWIPVSSDKGSCPQVMERSFHMPSYGTQTLEGGVEKPHSLLSANPLWQQRALDPPHQMELTQ 2 1033 1055 RHAGHGQQQK VIVVAVCVVVLVMLLLLSLWGAH YYRTQKLLSK +Q6UXG2 MAEPGHSHHLSARVRGRTERRIPRLWRLLLWAGTAFQVTQGTGPELHACKESEYHYEYTACDSTGSRWRVAVPHTPGLCTSLPDPIKGTECSFSCNAGEFLDMKDQSCKPCAEGRYSLGTGIRFDEWDELPHGFASLSANMELDDSAAESTGNCTSSKWVPRGDYIASNTDECTATLMYAVNLKQSGTVNFEYYYPDSSIIFEFFVQNDQCQPNADDSRWMKTTEKGWEFHSVELNRGNNVLYWRTTAFSVWTKVPKPVLVRNIAITGVAYTSECFPCKPGTYADKQGSSFCKLCPANSYSNKGETSCHQCDPDKYSEKGSSSCNVRPACTDKDYFYTHTACDANGETQLMYKWAKPKICSEDLEGAVKLPASGVKTHCPPCNPGFFKTNNSTCQPCPYGSYSNGSDCTRCPAGTEPAVGFEYKWWNTLPTNMETTVLSGINFEYKGMTGWEVAGDHIYTAAGASDNDFMILTLVVPGFRPPQSVMADTENKEVARITFVFETLCSVNCELYFMVGVNSRTNTPVETWKGSKGKQSYTYIIEENTTTSFTWAFQRTTFHEASRKYTNDVAKIYSINVTNVMNGVASYCRPCALEASDVGSSCTSCPAGYYIDRDSGTCHSCPTNTILKAHQPYGVQACVPCGPGTKNNKIHSLCYNDCTFSRNTPTRTFNYNFSALANTVTLAGGPSFTSKGLKYFHHFTLSLCGNQGRKMSVCTDNVTDLRIPEGESGFSKSITAYVCQAVIIPPEVTGYKAGVSSQPVSLADRLIGVTTDMTLDGITSPAELFHLESLGIPDVIFFYRSNDVTQSCSSGRSTTIRVRCSPQKTVPGSLLLPGTCSDGTCDGCNFHFLWESAAACPLCSVADYHAIVSSCVAGIQKTTYVWREPKLCSGGISLPEQRVTICKTIDFWLKVGISAGTCTAILLTVLTCYFWKKNQKLEYKYSKLVMNATLKDCDLPAADSCAIMEGEDVEDDLIFTSKKSLFGKIKSFTSKRTPDGFDSVPLKTSSGGLDMDL 2 908 930 RVTICKTIDF WLKVGISAGTCTAILLTVLTCYF WKKNQKLEYK +P0C7U0 MAGRGWGALWVCVAAATLLHAGGLARADCWLIEGDKGFVWLAICSQNQPPYEAIPQQINSTIVDLRLNENRIRSVQYASLSRFGNLTYLNLTKNEIGYIEDGAFSGQFNLQVLQLGYNRLRNLTEGMLRGLGKLEYLYLQANLIEVVMASSFWECPNIVNIDLSMNRIQQLNSGTFAGLAKLSVCELYSNPFYCSCELLGFLRWLAAFTNATQTYDRMQCESPPVYSGYYLLGQGRRGHRSILSKLQSVCTEDSYAAEVVGPPRPASGRSQPGRSPPPPPPPEPSDMPCADDECFSGDGTTPLVALPTLATQAEARPLIKVKQLTQNSATITVQLPSPFHRMYTLEHFNNSKASTVSRLTKAQEEIRLTNLFTLTNYTYCVVSTSAGLRHNHTCLTICLPRLPSPPGPVPSPSTATHYIMTILGCLFGMVLVLGAVYYCLRRRRRQEEKHKKAASAAAAGSLKKTIIELKYGPELEAPGLAPLSQGPLLGPEAVTRIPYLPAAGEVEQYKLVESADTPKASKGSYMEVRTGDPPERRDCELGRPGPDSQSSVAEISTIAKEVDKVNQIINNCIDALKSESTSFQGVKSGPVSVAEPPLVLLSEPLAAKHGFLAPGYKDAFGHSLQRHHSVEAAGPPRASTSSSGSVRSPRAFRAEAVGVHKAAAAEAKYIEKGSPAADAILTVTPAAAVLRAEAEKGRQYGEHRHSYPGSHPAEPPAPPGPPPPPPHEGLGRKASILEPLTRPRPRDLAYSQLSPQYHSLSYSSSPEYTCRASQSIWERFRLSRRRHKEEEEFMAAGHALRKKVQFAKDEDLHDILDYWKGVSAQHKS 2 418 440 PVPSPSTATH YIMTILGCLFGMVLVLGAVYYCL RRRRRQEEKH +Q6PCB8 MRALPGLLEARARTPRLLLLQCLLAAARPSSADGSAPDSPFTSPPLREEIMANNFSLESHNISLTEHSSMPVEKNITLERPSNVNLTCQFTTSGDLNAVNVTWKKDGEQLENNYLVSATGSTLYTQYRFTIINSKQMGSYSCFFREEKEQRGTFNFKVPELHGKNKPLISYVGDSTVLTCKCQNCFPLNWTWYSSNGSVKVPVGVQMNKYVINGTYANETKLKITQLLEEDGESYWCRALFQLGESEEHIELVVLSYLVPLKPFLVIVAEVILLVATILLCEKYTQKKKKHSDEGKEFEQIEQLKSDDSNGIENNVPRHRKNESLGQ 2 264 281 VLSYLVPLKP FLVIVAEVILLVATILLC EKYTQKKKKH +Q5UCC4 MAAASAGATRLLLLLLMAVAAPSRARGSGCRAGTGARGAGAEGREGEACGTVGLLLEHSFEIDDSANFRKRGSLLWNQQDGTLSLSQRQLSEEERGRLRDVAALNGLYRVRIPRRPGALDGLEAGGYVSSFVPACSLVESHLSDQLTLHVDVAGNVVGVSVVTHPGGCRGHEVEDVDLELFNTSVQLQPPTTAPGPETAAFIERLEMEQAQKAKNPQEQKSFFAKYWMYIIPVVLFLMMSGAPDTGGQGGGGGGGGGGGSGR 2 221 243 QKAKNPQEQK SFFAKYWMYIIPVVLFLMMSGAP DTGGQGGGGG +Q9NPA0 MAAALWGFFPVLLLLLLSGDVQSSEVPGAAAEGSGGSGVGIGDRFKIEGRAVVPGVKPQDWISAARVLVDGEEHVGFLKTDGSFVVHDIPSGSYVVEVVSPAYRFDPVRVDITSKGKMRARYVNYIKTSEVVRLPYPLQMKSSGPPSYFIKRESWGWTDFLMNPMVMMMVLPLLIFVLLPKVVNTSDPDMRREMEQSMNMLNSNHELPDVSEFMTRLFSSKSSGKSSSGSSKTGKSGAGKRR 2 160 182 IKRESWGWTD FLMNPMVMMMVLPLLIFVLLPKV VNTSDPDMRR +Q902F9 MNPSEMQRKAPPRRRRHRNRAPLTHKMNKMVTSEEQMKLPSTKKAEPPTWAQLKKLTQLATKYLENTKVTQTPESMLLAALMIVSMVVSLPMPAGAAAANYTYWAYVPFPPLIRAVTWMDNPIEIYVNDSVWVPGPTDDCCPAKPEEEGMMINISIGYRYPPICLGRAPGCLMPAVQNWLVEVPTVSPISRFTYHMVSGMSLRPRVNYLQDFSYQRSLKFRPKGKPCPKEIPKESKNTEVLVWEECVANSAVILQNNEFGTLIDWAPRGQFYHNCSGQTQSCPSAQVSPAVDSDLTESLDKHKHKKLQSFYPWEWGEKGISTARPKIISPVSGPEHPELWRLTVASHHIRIWSGNQTLETRDRKPFYTIDLNSSLTVPLQSCVKPPYMLVVGNIVIKPDSQTITCENCRLLTCIDSTFNWQHRILLVRAREGVWIPVSMDRPWEASPSVHILTEVLKGVLNRSKRFIFTLIAVIMGLIAVTATAAVAGVALHSSVQSVNFVNDWQNNSTRLWNSQSSIDQKLANQINDLRQTVIWMGDRLMSLEHRFQLQCDWNTSDFCITPQIYNESEHHWDMVRCHLQGREDNLTLDISKLKEQIFEASKAHLNLVPGTEAIAGVADGLANLNTVTWVKTIGSTTIINLILILVCLFCLLLVYRCTQQLRRDSDHRERAMMTMVVLSKRKGGNVGKSKRDQIVTVSV 2 633 655 NLNTVTWVKT IGSTTIINLILILVCLFCLLLVY RCTQQLRRDS +Q9Y6X5 MKLLVILLFSGLITGFRSDSSSSLPPKLLLVSFDGFRADYLKNYEFPHLQNFIKEGVLVEHVKNVFITKTFPNHYSIVTGLYEESHGIVANSMYDAVTKKHFSDSNDKDPFWWNEAVPIWVTNQLQENRSSAAAMWPGTDVPIHDTISSYFMNYNSSVSFEERLNNITMWLNNSNPPVTFATLYWEEPDASGHKYGPEDKENMSRVLKKIDDLIGDLVQRLKMLGLWENLNVIITSDHGMTQCSQDRLINLDSCIDHSYYTLIDLSPVAAILPKINRTEVYNKLKNCSPHMNVYLKEDIPNRFYYQHNDRIQPIILVADEGWTIVLNESSQKLGDHGYDNSLPSMHPFLAAHGPAFHKGYKHSTINIVDIYPMMCHILGLKPHPNNGTFGHTKCLLVDQWCINLPEAIAIVIGSLLVLTMLTCLIIIMQNRLSVPRPFSRLQLQEDDDDPLIG 2 406 428 LVDQWCINLP EAIAIVIGSLLVLTMLTCLIIIM QNRLSVPRPF +Q6UW88 MALGVPISVYLLFNAMTALTEEAAVTVTPPITAQQGNWTVNKTEADNIEGPIALKFSHLCLEDHNSYCINGACAFHHELEKAICRCFTGYTGERCEHLTLTSYAVDSYEKYIAIGIGVGLLLSGFLVIFYCYIRKRCLKLKSPYNVCSGERRPL 2 111 133 TSYAVDSYEK YIAIGIGVGLLLSGFLVIFYCYI RKRCLKLKSP +Q60750 MERRWPLGLALLLLLLCAPLPPGARAEEVTLMDTSTAQGELGWLLDPPETGWSEVQQMLNGTPLYMYQDCPIQEGGDTDHWLRSNWIYRGEEASRIYVELQFTVRDCKSFPGGAGPLGCKETFNLFYMESDQDVGIQLRRPLFQKVTTVAADQSFTIRDLASGSVKLNVERCSLGHLTRRGLYLAFHNPGSCVALVSVRVFYQRCAETVHGLAHFPDTLPGPGGLVEVAGTCLSHAQISLGSSGTPRMHCSPDGEWLVPVGQCQCEPGYEESSGNVGCTACPTGFYRVDMNTLRCLKCPQHSIAESEGSTICTCENGHYRAPGEGPQVACTRPPSAPQNLSFSTSGTQLSLRWEPPRDTGGRHDIRYSVECLQCRGIAQDGGPCQPCGKGVHFSPAASGLTTSTVQVQGLEPYANYTFTVKSQNRVSGLDSSSPSSASLSINMGHAESLSGLSLKLVKKEPRQLELTWAGSRPRNPGGNLSYELHVLNQDEEWHQMVLEPRVLLTKLQPDTTYIVRVRTLTPLGPGPFSPDHEFRTSPPVSRSLTGGEIVAVIFGLLLGIALLIGIYVFRSRRGQRQRQQRQRERTTNVDREDKLWLKPYVDLQAYEDPAQGALDFAQELDPAWLIVDTVIGEGEFGEVYRGALRLPSQDCKTVAIKTLKDTSPDGYWWNFLREATIMGQFNHPHILRLEGVITKRKPIMIITEFMENGALDAFLKEREDQLAPGQLVAMLLGIASGMNCLSGHNYVHRDLAARNILVNQNLCCKVSDFGLTRLLDDFDGTYETQGGKIPIRWTAPEAIAHRIFTTASDVWSFGIVMWEVLSFGDKPYGEMSNQEVMKSIEDGYRLPPPVDCPAPLYELMKNCWAYDRARRPHFLQLQAHLEQLLTDPHSLRTIANFDPRVTLRLPSLSGSDGIPYRSVSEWLESIRMKRYILHFRSAGLDTMECVLELTAEDLTQMGITLPGHQKRILCSIQGFKD 2 549 571 PVSRSLTGGE IVAVIFGLLLGIALLIGIYVFRS RRGQRQRQQR +Q9UF33 MGGCEVREFLLQFGFFLPLLTAWPGDCSHVSNNQVVLLDTTTVLGELGWKTYPLNGWDAITEMDEHNRPIHTYQVCNVMEPNQNNWLRTNWISRDAAQKIYVEMKFTLRDCNSIPWVLGTCKETFNLFYMESDESHGIKFKPNQYTKIDTIAADESFTQMDLGDRILKLNTEIREVGPIERKGFYLAFQDIGACIALVSVRVFYKKCPFTVRNLAMFPDTIPRVDSSSLVEVRGSCVKSAEERDTPKLYCGADGDWLVPLGRCICSTGYEEIEGSCHACRPGFYKAFAGNTKCSKCPPHSLTYMEATSVCQCEKGYFRAEKDPPSMACTRPPSAPRNVVFNINETALILEWSPPSDTGGRKDLTYSVICKKCGLDTSQCEDCGGGLRFIPRHTGLINNSVIVLDFVSHVNYTFEIEAMNGVSELSFSPKPFTAITVTTDQDAPSLIGVVRKDWASQNSIALSWQAPAFSNGAILDYEIKYYEKEHEQLTYSSTRSKAPSVIITGLKPATKYVFHIRVRTATGYSGYSQKFEFETGDETSDMAAEQGQILVIATAAVGGFTLLVILTLFFLITGRCQWYIKAKMKSEEKRRNHLQNGHLRFPGIKTYIDPDTYEDPSLAVHEFAKEIDPSRIRIERVIGAGEFGEVCSGRLKTPGKREIPVAIKTLKGGHMDRQRRDFLREASIMGQFDHPNIIRLEGVVTKRSFPAIGVEAFCPSFLRAGFLNSIQAPHPVPGGGSLPPRIPAGRPVMIVVEYMENGSLDSFLRKHDGHFTVIQLVGMLRGIASGMKYLSDMGYVHRDLAARNILVNSNLVCKVSDFGLSRVLEDDPEAAYTTTGGKIPIRWTAPEAIAYRKFSSASDAWSYGIVMWEVMSYGERPYWEMSNQDVILSIEEGYRLPAPMGCPASLHQLMLHCWQKERNHRPKFTDIVSFLDKLIRNPSALHTLVEDILVMPESPGEVPEYPLFVTVGDWLDSIKMGQYKNNFVAAGFTTFDLISRMSIDDIRRIGVILIGHQRRIVSSIQTLRLHMMHIQEKGFHV 2 549 571 SDMAAEQGQI LVIATAAVGGFTLLVILTLFFLI TGRCQWYIKA +P29322 MAPARGRLPPALWVVTAAAAAATCVSAARGEVNLLDTSTIHGDWGWLTYPAHGWDSINEVDESFQPIHTYQVCNVMSPNQNNWLRTSWVPRDGARRVYAEIKFTLRDCNSMPGVLGTCKETFNLYYLESDRDLGASTQESQFLKIDTIAADESFTGADLGVRRLKLNTEVRSVGPLSKRGFYLAFQDIGACLAILSLRIYYKKCPAMVRNLAAFSEAVTGADSSSLVEVRGQCVRHSEERDTPKMYCSAEGEWLVPIGKCVCSAGYEERRDACVACELGFYKSAPGDQLCARCPPHSHSAAPAAQACHCDLSYYRAALDPPSSACTRPPSAPVNLISSVNGTSVTLEWAPPLDPGGRSDITYNAVCRRCPWALSRCEACGSGTRFVPQQTSLVQASLLVANLLAHMNYSFWIEAVNGVSDLSPEPRRAAVVNITTNQAAPSQVVVIRQERAGQTSVSLLWQEPEQPNGIILEYEIKYYEKDKEMQSYSTLKAVTTRATVSGLKPGTRYVFQVRARTSAGCGRFSQAMEVETGKPRPRYDTRTIVWICLTLITGLVVLLLLLICKKRHCGYSKAFQDSDEEKMHYQNGQAPPPVFLPLHHPPGKLPEPQFYAEPHTYEEPGRAGRSFTREIEASRIHIEKIIGSGDSGEVCYGRLRVPGQRDVPVAIKALKAGYTERQRRDFLSEASIMGQFDHPNIIRLEGVVTRGRLAMIVTEYMENGSLDTFLRTHDGQFTIMQLVGMLRGVGAGMRYLSDLGYVHRDLAARNVLVDSNLVCKVSDFGLSRVLEDDPDAAYTTTGGKIPIRWTAPEAIAFRTFSSASDVWSFGVVMWEVLAYGERPYWNMTNRDVISSVEEGYRLPAPMGCPHALHQLMLDCWHKDRAQRPRFSQIVSVLDALIRSPESLRATATVSRCPPPAFVRSCFDLRGGSGGGGGLTVGDWLDSIRMGRYRDHFAAGGYSSLGMVLRMNAQDVRALGITLMGHQKKILGSIQTMRAQLTSTQGPRRHL 2 541 563 TGKPRPRYDT RTIVWICLTLITGLVVLLLLLIC KKRHCGYSKA +P19235 MDHLGASLWPQVGSLCLLLAGAAWAPPPNLPDPKFESKAALLAARGPEELLCFTERLEDLVCFWEEAASAGVGPGNYSFSYQLEDEPWKLCRLHQAPTARGAVRFWCSLPTADTSSFVPLELRVTAASGAPRYHRVIHINEVVLLDAPVGLVARLADESGHVVLRWLPPPETPMTSHIRYEVDVSAGNGAGSVQRVEILEGRTECVLSNLRGRTRYTFAVRARMAEPSFGGFWSAWSEPVSLLTPSDLDPLILTLSLILVVILVLLTVLALLSHRRALKQKIWPGIPSPESEFEGLFTTHKGNFQLWLYQNDGCLWWSPCTPFTEDPPASLEVLSERCWGTMQAVEPGTDDEGPLLEPVGSEHAQDTYLVLDKWLLPRNPPSEDLPGPGGSVDIVAMDEGSEASSCSSALASKPSPEGASAASFEYTILDPSSQLLRPWTLCPELPPTPPHLKYLYLVVSDSGISTDYSSGDSQGAQGGLSDGPYSNPYENSLIPAAEPLPPSYVACS 2 251 273 SLLTPSDLDP LILTLSLILVVILVLLTVLALLS HRRALKQKIW +Q9NQ60 MNFILFIFIPGVFSLKSSTLKPTIEALPNVLPLNEDVNKQEEKNEDHTPNYAPANEKNGNYYKDIKQYVFTTQNPNGTESEISVRATTDLNFALKNDKTVNATTYEKSTIEEETTTSEPSHKNIQRSTPNVPAFWTMLAKAINGTAVVMDDKDQLFHPIPESDVNATQGENQPDLEDLKIKIMLGISLMTLLLFVVLLAFCSATLYKLRHLSYKSCESQYSVNPELATMSYFHPSEGVSDTSFSKSAESSTFLGTTSSDMRRSGTRTSESKIMTDIISIGSDNEMHENDESVTR 2 184 206 DLEDLKIKIM LGISLMTLLLFVVLLAFCSATLY KLRHLSYKSC +P04626 MELAALCRWGLLLALLPPGAASTQVCTGTDMKLRLPASPETHLDMLRHLYQGCQVVQGNLELTYLPTNASLSFLQDIQEVQGYVLIAHNQVRQVPLQRLRIVRGTQLFEDNYALAVLDNGDPLNNTTPVTGASPGGLRELQLRSLTEILKGGVLIQRNPQLCYQDTILWKDIFHKNNQLALTLIDTNRSRACHPCSPMCKGSRCWGESSEDCQSLTRTVCAGGCARCKGPLPTDCCHEQCAAGCTGPKHSDCLACLHFNHSGICELHCPALVTYNTDTFESMPNPEGRYTFGASCVTACPYNYLSTDVGSCTLVCPLHNQEVTAEDGTQRCEKCSKPCARVCYGLGMEHLREVRAVTSANIQEFAGCKKIFGSLAFLPESFDGDPASNTAPLQPEQLQVFETLEEITGYLYISAWPDSLPDLSVFQNLQVIRGRILHNGAYSLTLQGLGISWLGLRSLRELGSGLALIHHNTHLCFVHTVPWDQLFRNPHQALLHTANRPEDECVGEGLACHQLCARGHCWGPGPTQCVNCSQFLRGQECVEECRVLQGLPREYVNARHCLPCHPECQPQNGSVTCFGPEADQCVACAHYKDPPFCVARCPSGVKPDLSYMPIWKFPDEEGACQPCPINCTHSCVDLDDKGCPAEQRASPLTSIISAVVGILLVVVLGVVFGILIKRRQQKIRKYTMRRLLQETELVEPLTPSGAMPNQAQMRILKETELRKVKVLGSGAFGTVYKGIWIPDGENVKIPVAIKVLRENTSPKANKEILDEAYVMAGVGSPYVSRLLGICLTSTVQLVTQLMPYGCLLDHVRENRGRLGSQDLLNWCMQIAKGMSYLEDVRLVHRDLAARNVLVKSPNHVKITDFGLARLLDIDETEYHADGGKVPIKWMALESILRRRFTHQSDVWSYGVTVWELMTFGAKPYDGIPAREIPDLLEKGERLPQPPICTIDVYMIMVKCWMIDSECRPRFRELVSEFSRMARDPQRFVVIQNEDLGPASPLDSTFYRSLLEDDDMGDLVDAEEYLVPQQGFFCPDPAPGAGGMVHHRHRSSSTRSGGGDLTLGLEPSEEEAPRSPLAPSEGAGSDVFDGDLGMGAAKGLQSLPTHDPSPLQRYSEDPTVPLPSETDGYVAPLTCSPQPEYVNQPDVRPQPPSPREGPLPAARPAGATLERPKTLSPGKNGVVKDVFAFGGAVENPEYLTPQGGAAPQPHPPPAFSPAFDNLYYWDQDPPERGAPPSTFKGTPTAENPEYLGLDVPV 2 653 675 PAEQRASPLT SIISAVVGILLVVVLGVVFGILI KRRQQKIRKY +P21860 MRANDALQVLGLLFSLARGSEVGNSQAVCPGTLNGLSVTGDAENQYQTLYKLYERCEVVMGNLEIVLTGHNADLSFLQWIREVTGYVLVAMNEFSTLPLPNLRVVRGTQVYDGKFAIFVMLNYNTNSSHALRQLRLTQLTEILSGGVYIEKNDKLCHMDTIDWRDIVRDRDAEIVVKDNGRSCPPCHEVCKGRCWGPGSEDCQTLTKTICAPQCNGHCFGPNPNQCCHDECAGGCSGPQDTDCFACRHFNDSGACVPRCPQPLVYNKLTFQLEPNPHTKYQYGGVCVASCPHNFVVDQTSCVRACPPDKMEVDKNGLKMCEPCGGLCPKACEGTGSGSRFQTVDSSNIDGFVNCTKILGNLDFLITGLNGDPWHKIPALDPEKLNVFRTVREITGYLNIQSWPPHMHNFSVFSNLTTIGGRSLYNRGFSLLIMKNLNVTSLGFRSLKEISAGRIYISANRQLCYHHSLNWTKVLRGPTEERLDIKHNRPRRDCVAEGKVCDPLCSSGGCWGPGPGQCLSCRNYSRGGVCVTHCNFLNGEPREFAHEAECFSCHPECQPMEGTATCNGSGSDTCAQCAHFRDGPHCVSSCPHGVLGAKGPIYKYPDVQNECRPCHENCTQGCKGPELQDCLGQTLVLIGKTHLTMALTVIAGLVVIFMMLGGTFLYWRGRRIQNKRAMRRYLERGESIEPLDPSEKANKVLARIFKETELRKLKVLGSGVFGTVHKGVWIPEGESIKIPVCIKVIEDKSGRQSFQAVTDHMLAIGSLDHAHIVRLLGLCPGSSLQLVTQYLPLGSLLDHVRQHRGALGPQLLLNWGVQIAKGMYYLEEHGMVHRNLAARNVLLKSPSQVQVADFGVADLLPPDDKQLLYSEAKTPIKWMALESIHFGKYTHQSDVWSYGVTVWELMTFGAEPYAGLRLAEVPDLLEKGERLAQPQICTIDVYMVMVKCWMIDENIRPTFKELANEFTRMARDPPRYLVIKRESGPGIAPGPEPHGLTNKKLEEVELEPELDLDLDLEAEEDNLATTTLGSALSLPVGTLNRPRGSQSLLSPSSGYMPMNQGNLGESCQESAVSGSSERCPRPVSLHPMPRGCLASESSEGHVTGSEAELQEKVSMCRSRSRSRSPRPRGDSAYHSQRHSLLTPVTPLSPPGLEEEDVNGYVMPDTHLKGTPSSREGTLSSVGLSSVLGTEEEDEDEEYEYMNRRRRHSPPHPPRPSSLEELGYEYMDVGSDLSASLGSTQSCPLHPVPIMPTAGTTPDEDYEYMNRQRDGGGPGGDYAAMGACPASEQGYEEMRAFQGPGHQAPHVHYARLKTLRSLEATDSAFDNPDYWHSRLFPKANAQRT 2 644 666 LVLIGKTHLT MALTVIAGLVVIFMMLGGTFLYW RGRRIQNKRA +O14944 MTAGRRMEMLCAGRVPALLLCLGFHLLQAVLSTTVIPSCIPGESSDNCTALVQTEDNPRVAQVSITKCSSDMNGYCLHGQCIYLVDMSQNYCRCEVGYTGVRCEHFFLTVHQPLSKEYVALTVILIILFLITVVGSTYYFCRWYRNRKSKEPKKEYERVTSGDPELPQV 2 118 140 LTVHQPLSKE YVALTVILIILFLITVVGSTYYF CRWYRNRKSK +Q925F2 MILQAGTPETSLLRVLFLGLSTLAAFSRAQMELHVPPGLNKLEAVEGEEVVLPAWYTMAREESWSHPREVPILIWFLEQEGKEPNQVLSYINGVMTNKPGTALVHSISSRNVSLRLGALQEGDSGTYRCSVNVQNDEGKSIGHSIKSIELKVLVPPAPPSCSLQGVPYVGTNVTLNCKSPRSKPTAQYQWERLAPSSQVFFGPALDAVRGSLKLTNLSIAMSGVYVCKAQNRVGFAKCNVTLDVMTGSKAAVVAGAVVGTFVGLVLIAGLVLLYQRRSKTLEELANDIKEDAIAPRTLPWTKGSDTISKNGTLSSVTSARALRPPKAAPPRPGTFTPTPSVSSQALSSPRLPRVDEPPPQAVSLTPGGVSSSALSRMGAVPVMVPAQSQAGSLV 2 252 274 LDVMTGSKAA VVAGAVVGTFVGLVLIAGLVLLY QRRSKTLEEL +P58658 MLLPGRARQPPTPQPVQHPGLRRQVEPPGQLLRLFYCTVLVCSKEISALTDFSGYLTKLLQNHTTYACDGDYLNLQCPRHSTISVQSAFYGQDYQMCSSQKPASQREDSLTCVAATTFQKVLDECQNQRACHLLVNSRVFGPDLCPGSSKYLLVSFKCQPNELKNKTVCEDQELKLHCHESKFLNIYSATYGRRTQERDICSSKAERLPPFDCLSYSALQVLSRRCYGKQRCKIIVNNHHFGSPCLPGVKKYLTVTYACVPKNILTAIDPAIANLKPSLKQKDGEYGINFDPSGSKVLRKDGILVSNSLAAFAYIRAHPERAALLFVSSVCIGLALTLCALVIRESCAKDFRDLQLGREQLVPGSDKVEEDSEDEEEEEDPSESDFPGELSGFCRTSYPIYSSIEAAELAERIERREQIIQEIWMNSGLDTSLPRNMGQFY 2 322 344 FAYIRAHPER AALLFVSSVCIGLALTLCALVIR ESCAKDFRDL +P22794 MPTDMEHTGHYLHLAFLMTTVFSLSPGTKANYTRLWANSTSSWDSVIQNKTGRNQNENINTNPITPEVDYKGNSTNMPETSHIVALTSKSEQELYIPSVVSNSPSTVQSIENTSKSHGEIFKKDVCAENNNNMAMLICLIIIAVLFLICTFLFLSTVVLANKVSSLRRSKQVGKRQPRSNGDFLASGLWPAESDTWKRTKQLTGPNLVMQSTGVLTATRERKDEEGTEKLTNKQIG 2 136 158 CAENNNNMAM LICLIIIAVLFLICTFLFLSTVV LANKVSSLRR +P34910 MDPKYFILILFCGHLNNTFFSKTETITTEKQSQPTLFTSSMSQVLANSQNTTGNPLGQPTQFSDTFSGQSISPAKVTAGQPTPAVYTSSEKPEAHTSAGQPLAYNTKQPTPIANTSSQQAVFTSARQLPSARTSTTQPPKSFVYTFTQQSSSVQIPSRKQITVHNPSTQPTSTVKNSPRSTPGFILDTTSNKQTPQKNNYNSIAAILIGVLLTSMLVAIIIIVLWKCLRKPVLNDQNWAGRSPFADGETPDICMDNIRENEISTKRTSIISLTPWKPSKSTLLADDLEIKLFESSENIEDSNNPKTEKIKDQVNGTSEDSADGSTVGTAVSSSDDADLPPPPPLLDLEGQESNQSDKPTMTIVSPLPNDSTSLPPSLDCLNQDCGDHKSEIIQSFPPLDSLNLPLPPVDFMKNQEDSNLEIQCQEFSIPPNSDQDLNESLPPPPAELL 2 203 225 QTPQKNNYNS IAAILIGVLLTSMLVAIIIIVLW KCLRKPVLND +Q6P995 MARLCRRVPCTLLLGLAVVLLKARLVPAAARAELSRSDLSLIQQQQQQQQQQQQQQKQLEEAEEERTEVPGATSTLTVPVSVFMLKVQVNDIISRQYLSQAVVEVFVNYTKTNSTVTKSNGAVLIKVPYKLGLSLTIIAYKDGYVLTPLPWKTRRMPIYSSVTLSLFPQSQANIWLFEDTVLITGKLADAKSQPSVQFSKALIKLPDNHHISNVTGYLTVLQQFLKVDNFLHTTGITLNKPGFENIELTPLAAICVKIYSGGKELKVNGSIQVSLPLLRLNDISAGDRIPAWTFDMNTGAWVNHGRGMVKEHNNHLIWTYDAPHLGYWIAAPLPGTRGSGINEDSKDITAYHTVFLTAILGGTIVIVIGFFAVLLCYCRDKCGTPQKRERNITKLEVLKRDQTTSTTHINHISTVKVALKAEDKSQLFNAKNSSYSPQKKEPSKAETEERVSMVKTRDDFKIYNEDVSFLSVNQNNYSRNPTQSLEPNVGSKQPKHINNNLSSSLGDAQDEKRYLTGNEEAYGRSHIPEQLMHIYSQPIAILQTSDLFSTPEQLHTAKSATLPRKGQLVYGQLMEPVNRENFTQTLPKMPIHSHAQPPDAREEDIILEGQQSLPSQASDWSRYSSSLLESVSVPGTLNEAVVMTPFSSELQGISEQTLLELSKGKPSPHPRAWFVSLDGKPVAQVRHSFIDLKKGKRTQSNDTSLDSGVDMNELHSSRKLEREKTFIKSMHQPKILYLEDLDLSSSESGTTVCSPEDPALRHILDGGSGVIMEHPGEESPGRKSTVEDFEANTSPTKRRGRPPLAKRDSKTNIWKKREERPLIPIN 2 353 375 EDSKDITAYH TVFLTAILGGTIVIVIGFFAVLL CYCRDKCGTP +Q8TBP5 MKASQCCCCLSHLLASVLLLLLLPELSGPLAVLLQAAEAAPGLGPPDPRPRTLPPLPPGPTPAQQPGRGLAEAAGPRGSEGGNGSNPVAGLETDDHGGKAGEGSVGGGLAVSPNPGDKPMTQRALTVLMVVSGAVLVYFVVRTVRMRRRNRKTRRYGVLDTNIENMELTPLEQDDEDDDNTLFDANHPRR 2 124 146 NPGDKPMTQR ALTVLMVVSGAVLVYFVVRTVRM RRRNRKTRRY +Q3ZCQ3 MRAVPLPAPLLPLLLLALLAAPAARASRAESVSAPWPEPERESRPPPGPGPGNTTRFGSGAAGGSGSSSSNSSGDALVTRISILLRDLPTLKAAVIVAFAFTTLLIACLLLRVFRSGKRLKKTRKYDIITTPAERVEMAPLNEEDDEDEDSTVFDIKYR 2 93 115 ILLRDLPTLK AAVIVAFAFTTLLIACLLLRVFR SGKRLKKTRK +Q9BVV8 MGPRVLQPPLLLLLLALLLAALPCGAEEASPLRPAQVTLSPPPAVTNGSQPGAPHNSTHTRPPGASGSALTRSFYVILGFCGLTALYFLIRAFRLKKPQRRRYGLLANTEDPTEMASLDSDEETVFESRNLR 2 74 93 GASGSALTRS FYVILGFCGLTALYFLIRAF RLKKPQRRRY +Q9D3R5 MSLAHTTVLLWAWGSLQAFEIVEKESVFQRTPCPAFLVFDNAAYLADMSFELPCHCKPEDVSAVVWYYQKHLGSKRTTVLTDFDGRLLTEAAHVRVGSSMLVRFSIRMFSLLVFRAQPEDTGLYFCGTREGDYFYAYDVDIQSNKEIVASFKDMAQEPLPDEYYGALHVFTTFWEWTPCDRCGVRGEQWRFGLCYLQYPDLSPRYIKTRSAVVSCGSGAVPWKLHLQTKYHTPELQFQSCLVSCQKRNKTRKGVLAIYSYVSKLGSRPWVPQVPIQFHQQRLGHGLIISCPGARPEHAVAWDKDNQPLYRAQYLKGVNRSMRVFIDHGNHLHIRFTQLSDRGIYYCWLQGLKIAGFRLGVITRGRYPASLSDPETRTAIELTLMGYLLITIFFITIHLCRCCCQSRCCPNFSAQTLL 2 377 399 PASLSDPETR TAIELTLMGYLLITIFFITIHLC RCCCQSRCCP +Q17R55 MPPMLWLLLHFAAPALGFYFSISCPSGKQCQQALLSGNDILLYCNSSGAHWYYLFTQGKKGRLTSLTNISNMEIMPEGSLLIKDPLPSQTGLYHCWNKNGRQVVQYEIDFQDVTTLHITHKDLGQRPLQNETLHLGSKQLIFTWWEPWQDCNRCEEPGECKRLGYRYIEEPLEEAMPCWLYLGEVLVWSSRLRPELQVEACHVQCTNNTQLRVDYVIFDNFRLDEKTEFVWLDCPLGSMYRPVNWRANDTPLTWESQLSGQDFTTFLDPSTGGRQLQVFQPAVYKCFVQQELVAQFKPAASLETLEAQWRENDAQWREARKALRGRADSVLKGLKLVLLVVTVLALLGALLKCIHPSPGRRSTQVLVVK 2 329 351 ARKALRGRAD SVLKGLKLVLLVVTVLALLGALL KCIHPSPGRR +Q15884 MILLVNLFVLLSVVCVLLNLAGFILGCQGAQFVSSVPRCDLVDLGEGKICFCCEEFQPAKCTDKENALKLFPVQPCSAVHLLLKKVLFALCALNALTTTVCLVAAALRYLQIFATRRSCIDESQISAEEAEDHGRIPDPDDFVPPVPPPSYFATFYSCTPRMNRRMVGPDVIPLPHIYGARIKGVEVFCPLDPPPPYEAVVSQMDQEQGSSFQMSEGSEAAVIPLDLGCTQVTQDGDIPNIPAEENASTSTPSSTLVRPIRSRRALPPLRTRSKSDPVLHPSEERAAPVLSCEAATQTERRLDLAAVTLRRGLRSRASRCRPRSLIDYKSYMDTKLLVARFLEQSSCTMTPDIHELVENIKSVLKSDEEHMEEAITSASFLEQIMAPLQPSTSRAHKLPSRRQPGLLHLQSCGDLHTFTPAGRPRAERRPRRVEAERPHSLIGVIRETVL 2 86 108 CSAVHLLLKK VLFALCALNALTTTVCLVAAALR YLQIFATRRS +Q5JX71 MWTLKSSLVLLLCLTCSYAFMFSSLRQKTSEPQGKVQYGEHFRIRQNLPEHTQGWLGSKWLWLLFVVVPFVILQCQRDSEKNKEQSPPGLRGGQLHSPLKKKRNASPNKDCAFNTLMELEVELMKFVSKVRNLKRAMATGSGSNLRLRKSEMPADPYHVTICEIWGEESSS 2 51 73 HFRIRQNLPE HTQGWLGSKWLWLLFVVVPFVIL QCQRDSEKNK +Q14517 MGRHLALLLLLLLLFQHFGDSDGSQRLEQTPLQFTHLEYNVTVQENSAAKTYVGHPVKMGVYITHPAWEVRYKIVSGDSENLFKAEEYILGDFCFLRIRTKGGNTAILNREVKDHYTLIVKALEKNTNVEARTKVRVQVLDTNDLRPLFSPTSYSVSLPENTAIRTSIARVSATDADIGTNGEFYYSFKDRTDMFAIHPTSGVIVLTGRLDYLETKLYEMEILAADRGMKLYGSSGISSMAKLTVHIEQANECAPVITAVTLSPSELDRDPAYAIVTVDDCDQGANGDIASLSIVAGDLLQQFRTVRSFPGSKEYKVKAIGGIDWDSHPFGYNLTLQAKDKGTPPQFSSVKVIHVTSPQFKAGPVKFEKDVYRAEISEFAPPNTPVVMVKAIPAYSHLRYVFKSTPGKAKFSLNYNTGLISILEPVKRQQAAHFELEVTTSDRKASTKVLVKVLGANSNPPEFTQTAYKAAFDENVPIGTTVMSLSAVDPDEGENGYVTYSIANLNHVPFAIDHFTGAVSTSENLDYELMPRVYTLRIRASDWGLPYRREVEVLATITLNNLNDNTPLFEKINCEGTIPRDLGVGEQITTVSAIDADELQLVQYQIEAGNELDFFSLNPNSGVLSLKRSLMDGLGAKVSFHSLRITATDGENFATPLYINITVAASHKLVNLQCEETGVAKMLAEKLLQANKLHNQGEVEDIFFDSHSVNAHIPQFRSTLPTGIQVKENQPVGSSVIFMNSTDLDTGFNGKLVYAVSGGNEDSCFMIDMETGMLKILSPLDRETTDKYTLNITVYDLGIPQKAAWRLLHVVVVDANDNPPEFLQESYFVEVSEDKEVHSEIIQVEATDKDLGPNGHVTYSIVTDTDTFSIDSVTGVVNIARPLDRELQHEHSLKIEARDQAREEPQLFSTVVVKVSLEDVNDNPPTFIPPNYRVKVREDLPEGTVIMWLEAHDPDLGQSGQVRYSLLDHGEGNFDVDKLSGAVRIVQQLDFEKKQVYNLTVRAKDKGKPVSLSSTCYVEVEVVDVNENLHPPVFSSFVEKGTVKEDAPVGSLVMTVSAHDEDARRDGEIRYSIRDGSGVGVFKIGEETGVIETSDRLDRESTSHYWLTVFATDQGVVPLSSFIEIYIEVEDVNDNAPQTSEPVYYPEIMENSPKDVSVVQIEAFDPDSSSNDKLMYKITSGNPQGFFSIHPKTGLITTTSRKLDREQQDEHILEVTVTDNGSPPKSTIARVIVKILDENDNKPQFLQKFYKIRLPEREKPDRERNARREPLYHVIATDKDEGPNAEISYSIEDGNEHGKFFIEPKTGVVSSKRFSAAGEYDILSIKAVDNGRPQKSSTTRLHIEWISKPKPSLEPISFEESFFTFTVMESDPVAHMIGVISVEPPGIPLWFDITGGNYDSHFDVDKGTGTIIVAKPLDAEQKSNYNLTVEATDGTTTILTQVFIKVIDTNDHRPQFSTSKYEVVIPEDTAPETEILQISAVDQDEKNKLIYTLQSSRDPLSLKKFRLDPATGSLYTSEKLDHEAVHQHTLTVMVRDQDVPVKRNFARIVVNVSDTNDHAPWFTASSYKGRVYESAAVGSVVLQVTALDKDKGKNAEVLYSIESGNIGNSFMIDPVLGSIKTAKELDRSNQAEYDLMVKATDKGSPPMSEITSVRIFVTIADNASPKFTSKEYSVELSETVSIGSFVGMVTAHSQSSVVYEIKDGNTGDAFDINPHSGTIITQKALDFETLPIYTLIIQGTNMAGLSTNTTVLVHLQDENDNAPVFMQAEYTGLISESASINSVVLTDRNVPLVIRAADADKDSNALLVYHIVEPSVHTYFAIDSSTGAIHTVLSLDYEETSIFHFTVQVHDMGTPRLFAEYAANVTVHVIDINDCPPVFAKPLYEASLLLPTYKGVKVITVNATDADSSAFSQLIYSITEGNIGEKFSMDYKTGALTVQNTTQLRSRYELTVRASDGRFAGLTSVKINVKESKESHLKFTQDVYSAVVKENSTEAETLAVITAIGNPINEPLFYHILNPDRRFKISRTSGVLSTTGTPFDREQQEAFDVVVEVTEEHKPSAVAHVVVKVIVEDQNDNAPVFVNLPYYAVVKVDTEVGHVIRYVTAVDRDSGRNGEVHYYLKEHHEHFQIGPLGEISLKKQFELDTLNKEYLVTVVAKDGGNPAFSAEVIVPITVMNKAMPVFEKPFYSAEIAESIQVHSPVVHVQANSPEGLKVFYSITDGDPFSQFTINFNTGVINVIAPLDFEAHPAYKLSIRATDSLTGAHAEVFVDIIVDDINDNPPVFAQQSYAVTLSEASVIGTSVVQVRATDSDSEPNRGISYQMFGNHSKSHDHFHVDSSTGLISLLRTLDYEQSRQHTIFVRAVDGGMPTLSSDVIVTVDVTDLNDNPPLFEQQIYEARISEHAPHGHFVTCVKAYDADSSDIDKLQYSILSGNDHKHFVIDSATGIITLSNLHRHALKPFYSLNLSVSDGVFRSSTQVHVTVIGGNLHSPAFLQNEYEVELAENAPLHTLVMEVKTTDGDSGIYGHVTYHIVNDFAKDRFYINERGQIFTLEKLDRETPAEKVISVRLMAKDAGGKVAFCTVNVILTDDNDNAPQFRATKYEVNIGSSAAKGTSVVKVLASDADEGSNADITYAIEADSESVKENLEINKLSGVITTKESLIGLENEFFTFFVRAVDNGSPSKESVVLVYVKILPPEMQLPKFSEPFYTFTVSEDVPIGTEIDLIRAEHSGTVLYSLVKGNTPESNRDESFVIDRQSGRLKLEKSLDHETTKWYQFSILARCTQDDHEMVASVDVSIQVKDANDNSPVFESSPYEAFIVENLPGGSRVIQIRASDADSGTNGQVMYSLDQSQSVEVIESFAINMETGWITTLKELDHEKRDNYQIKVVASDHGEKIQLSSTAIVDVTVTDVNDSPPRFTAEIYKGTVSEDDPQGGVIAILSTTDADSEEINRQVTYFITGGDPLGQFAVETIQNEWKVYVKKPLDREKRDNYLLTITATDGTFSSKAIVEVKVLDANDNSPVCEKTLYSDTIPEDVLPGKLIMQISATDADIRSNAEITYTLLGSGAEKFKLNPDTGELKTSTPLDREEQAVYHLLVRATDGGGRFCQASIVLTLEDVNDNAPEFSADPYAITVFENTEPGTLLTRVQATDADAGLNRKILYSLIDSADGQFSINELSGIIQLEKPLDRELQAVYTLSLKAVDQGLPRRLTATGTVIVSVLDINDNPPVFEYREYGATVSEDILVGTEVLQVYAASRDIEANAEITYSIISGNEHGKFSIDSKTGAVFIIENLDYESSHEYYLTVEATDGGTPSLSDVATVNVNVTDINDNTPVFSQDTYTTVISEDAVLEQSVITVMADDADGPSNSHIHYSIIDGNQGSSFTIDPVRGEVKVTKLLDRETISGYTLTVQASDNGSPPRVNTTTVNIDVSDVNDNAPVFSRGNYSVIIQENKPVGFSVLQLVVTDEDSSHNGPPFFFTIVTGNDEKAFEVNPQGVLLTSSAIKRKEKDHYLLQVKVADNGKPQLSSLTYIDIRVIEESIYPPAILPLEIFITSSGEEYSGGVIGKIHATDQDVYDTLTYSLDPQMDNLFSVSSTGGKLIAHKKLDIGQYLLNVSVTDGKFTTVADITVHIRQVTQEMLNHTIAIRFANLTPEEFVGDYWRNFQRALRNILGVRRNDIQIVSLQSSEPHPHLDVLLFVEKPGSAQISTKQLLHKINSSVTDIEEIIGVRILNVFQKLCAGLDCPWKFCDEKVSVDESVMSTHSTARLSFVTPRHHRAAVCLCKEGRCPPVHHGCEDDPCPEGSECVSDPWEEKHTCVCPSGRFGQCPGSSSMTLTGNSYVKYRLTENENKLEMKLTMRLRTYSTHAVVMYARGTDYSILEIHHGRLQYKFDCGSGPGIVSVQSIQVNDGQWHAVALEVNGNYARLVLDQVHTASGTAPGTLKTLNLDNYVFFGGHIRQQGTRHGRSPQVGNGFRGCMDSIYLNGQELPLNSKPRSYAHIEESVDVSPGCFLTATEDCASNPCQNGGVCNPSPAGGYYCKCSALYIGTHCEISVNPCSSKPCLYGGTCVVDNGGFVCQCRGLYTGQRCQLSPYCKDEPCKNGGTCFDSLDGAVCQCDSGFRGERCQSDIDECSGNPCLHGALCENTHGSYHCNCSHEYRGRHCEDAAPNQYVSTPWNIGLAEGIGIVVFVAGIFLLVVVFVLCRKMISRKKKHQAEPKDKHLGPATAFLQRPYFDSKLNKNIYSDIPPQVPVRPISYTPSIPSDSRNNLDRNSFEGSAIPEHPEFSTFNPESVHGHRKAVAVCSVAPNLPPPPPSNSPSDSDSIQKPSWDFDYDTKVVDLDPCLSKKPLEEKPSQPYSARESLSEVQSLSSFQSESCDDNGYHWDTSDWMPSVPLPDIQEFPNYEVIDEQTPLYSADPNAIDTDYYPGGYDIESDFPPPPEDFPAADELPPLPPEFSNQFESIHPPRDMPAAGSLGSSSRNRQRFNLNQYLPNFYPLDMSEPQTKGTGENSTCREPHAPYPPGYQRHFEAPAVESMPMSVYASTASCSDVSACCEVESEVMMSDYESGDDGHFEEVTIPPLDSQQHTEV 2 4179 4201 QYVSTPWNIG LAEGIGIVVFVAGIFLLVVVFVL CRKMISRKKK +Q9NYQ8 MTIALLGFAIFLLHCATCEKPLEGILSSSAWHFTHSHYNATIYENSSPKTYVESFEKMGIYLAEPQWAVRYRIISGDVANVFKTEEYVVGNFCFLRIRTKSSNTALLNREVRDSYTLIIQATEKTLELEALTRVVVHILDQNDLKPLFSPPSYRVTISEDMPLKSPICKVTATDADLGQNAEFYYAFNTRSEMFAIHPTSGVVTVAGKLNVTWRGKHELQVLAVDRMRKISEGNGFGSLAALVVHVEPALRKPPAIASVVVTPPDSNDGTTYATVLVDANSSGAEVESVEVVGGDPGKHFKAIKSYARSNEFSLVSVKDINWMEYLHGFNLSLQARSGSGPYFYSQIRGFHLPPSKLSSLKFEKAVYRVQLSEFSPPGSRVVMVRVTPAFPNLQYVLKPSSENVGFKLNARTGLITTTKLMDFHDRAHYQLHIRTSPGQASTVVVIDIVDCNNHAPLFNRSSYDGTLDENIPPGTSVLAVTATDRDHGENGYVTYSIAGPKALPFSIDPYLGIISTSKPMDYELMKRIYTFRVRASDWGSPFRREKEVSIFLQLRNLNDNQPMFEEVNCTGSIRQDWPVGKSIMTMSAIDVDELQNLKYEIVSGNELEYFDLNHFSGVISLKRPFINLTAGQPTSYSLKITASDGKNYASPTTLNITVVKDPHFEVPVTCDKTGVLTQFTKTILHFIGLQNQESSDEEFTSLSTYQINHYTPQFEDHFPQSIDVLESVPINTPLARLAATDPDAGFNGKLVYVIADGNEEGCFDIELETGLLTVAAPLDYEATNFYILNVTVYDLGTPQKSSWKLLTVNVKDWNDNAPRFPPGGYQLTISEDTEVGTTIAELTTKDADSEDNGRVRYTLLSPTEKFSLHPLTGELVVTGHLDRESEPRYILKVEARDQPSKGHQLFSVTDLIITLEDVNDNSPQCITEHNRLKVPEDLPPGTVLTFLDASDPDLGPAGEVRYVLMDGAHGTFRVDLMTGALILERELDFERRAGYNLSLWASDGGRPLARRTLCHVEVIVLDVNENLHPPHFASFVHQGQVQENSPSGTQVIVVAAQDDDSGLDGELQYFLRAGTGLAAFSINQDTGMIQTLAPLDREFASYYWLTVLAVDRGSVPLSSVTEVYIEVTDANDNPPQMSQAVFYPSIQEDAPVGTSVLQLDAWDPDSSSKGKLTFNITSGNYMGFFMIHPVTGLLSTAQQLDRENKDEHILEVTVLDNGEPSLKSTSRVVVGILDVNDNPPIFSHKLFNVRLPERLSPVSPGPVYRLVASDLDEGLNGRVTYSIEDSDEEAFSIDLVTGVVSSSSTFTAGEYNILTIKATDSGQPPLSASVRLHIEWIPWPRPSSIPLAFDETYYSFTVMETDPVNHMVGVISVEGRPGLFWFNISGGDKDMDFDIEKTTGSIVIARPLDTRRRSNYNLTVEVTDGSRTIATQVHIFMIANINHHRPQFLETRYEVRVPQDTVPGVELLRVQAIDQDKGKSLIYTIHGSQDPGSASLFQLDPSSGVLVTVGKLDLGSGPSQHTLTVMVRDQEIPIKRNFVWVTIHVEDGNLHPPRFTQLHYEASVPDTIAPGTELLQVRAMDADRGVNAEVHYSLLKGNSEGFFNINALLGIITLAQKLDQANHAPHTLTVKAEDQGSPQWHDLATVIIHVYPSDRSAPIFSKSEYFVEIPESIPVGSPILLVSAMSPSEVTYELREGNKDGVFSMNSYSGLISTQKKLDHEKISSYQLKIRGSNMAGAFTDVMVVVDIIDENDNAPMFLKSTFVGQISEAAPLYSMIMDKNNNPFVIHASDSDKEANSLLVYKILEPEALKFFKIDPSMGTLTIVSEMDYESMPSFQFCVYVHDQGSPVLFAPRPAQVIIHVRDVNDSPPRFSEQIYEVAIVGPIHPGMELLMVRASDEDSEVNYSIKTGNADEAVTIHPVTGSISVLNPAFLGLSRKLTIRASDGLYQDTALVKISLTQVLDKSLQFDQDVYWAAVKENLQDRKALVILGAQGNHLNDTLSYFLLNGTDMFHMVQSAGVLQTRGVAFDREQQDTHELAVEVRDNRTPQRVAQGLVRVSIEDVNDNPPKFKHLPYYTIIQDGTEPGDVLFQVSATDEDLGTNGAVTYEFAEDYTYFRIDPYLGDISLKKPFDYQALNKYHLKVIARDGGTPSLQSEEEVLVTVRNKSNPLFQSPYYKVRVPENITLYTPILHTQARSPEGLRLIYNIVEEEPLMLFTTDFKTGVLTVTGPLDYESKTKHVFTVRATDTALGSFSEATVEVLVEDVNDNPPTFSQLVYTTSISEGLPAQTPVIQLLASDQDSGRNRDVSYQIVEDGSDVSKFFQINGSTGEMSTVQELDYEAQQHFHVKVRAMDKGDPPLTGETLVVVNVSDINDNPPEFRQPQYEANVSELATCGHLVLKVQAIDPDSRDTSRLEYLILSGNQDRHFFINSSSGIISMFNLCKKHLDSSYNLRVGASDGVFRATVPVYINTTNANKYSPEFQQHLYEAELAENAMVGTKVIDLLAIDKDSGPYGTIDYTIINKLASEKFSINPNGQIATLQKLDRENSTERVIAIKVMARDGGGRVAFCTVKIILTDENDNPPQFKASEYTVSIQSNVSKDSPVIQVLAYDADEGQNADVTYSVNPEDLVKDVIEINPVTGVVKVKDSLVGLENQTLDFFIKAQDGGPPHWNSLVPVRLQVVPKKVSLPKFSEPLYTFSAPEDLPEGSEIGIVKAVAAQDPVIYSLVRGTTPESNKDGVFSLDPDTGVIKVRKPMDHESTKLYQIDVMAHCLQNTDVVSLVSVNIQVGDVNDNRPVFEADPYKAVLTENMPVGTSVIQVTAIDKDTGRDGQVSYRLSADPGSNVHELFAIDSESGWITTLQELDCETCQTYHFHVVAYDHGQTIQLSSQALVQVSITDENDNAPRFASEEYRGSVVENSEPGELVATLKTLDADISEQNRQVTCYITEGDPLGQFGISQVGDEWRISSRKTLDREHTAKYLLRVTASDGKFQASVTVEIFVLDVNDNSPQCSQLLYTGKVHEDVFPGHFILKVSATDLDTDTNAQITYSLHGPGAHEFKLDPHTGELTTLTALDRERKDVFNLVAKATDGGGRSCQADITLHVEDVNDNAPRFFPSHCAVAVFDNTTVKTPVAVVFARDPDQGANAQVVYSLPDSAEGHFSIDATTGVIRLEKPLQVRPQAPLELTVRASDLGTPIPLSTLGTVTVSVVGLEDYLPVFLNTEHSVQVPEDAPPGTEVLQLATLTRPGAEKTGYRVVSGNEQGRFRLDARTGILYVNASLDFETSPKYFLSIECSRKSSSSLSDVTTVMVNITDVNEHRPQFPQDPYSTRVLENALVGDVILTVSATDEDGPLNSDITYSLIGGNQLGHFTIHPKKGELQVAKALDREQASSYSLKLRATDSGQPPLHEDTDIAIQVADVNDNPPRFFQLNYSTTVQENSPIGSKVLQLILSDPDSPENGPPYSFRITKGNNGSAFRVTPDGWLVTAEGLSRRAQEWYQLQIQASDSGIPPLSSLTSVRVHVTEQSHYAPSALPLEIFITVGEDEFQGGMVGKIHATDRDPQDTLTYSLAEEETLGRHFSVGAPDGKIIAAQGLPRGHYSFNVTVSDGTFTTTAGVHVYVWHVGQEALQQAMWMGFYQLTPEELVSDHWRNLQRFLSHKLDIKRANIHLASLQPAEAVAGVDVLLVFEGHSGTFYEFQELASIITHSAKEMEHSVGVQMRSAMPMVPCQGPTCQGQICHNTVHLDPKVGPTYSTARLSILTPRHHLQRSCSCNGTATRFSGQSYVRYRAPAARNWHIHFYLKTLQPQAILLFTNETASVSLKLASGVPQLEYHCLGGFYGNLSSQRHVNDHEWHSILVEEMDASIRLMVDSMGNTSLVVPENCRGLRPERHLLLGGLILLHSSSNVSQGFEGCLDAVVVNEEALDLLAPGKTVAGLLETQALTQCCLHSDYCSQNTCLNGGKCSWTHGAGYVCKCPPQFSGKHCEQGRENCTFAPCLEGGTCILSPKGASCNCPHPYTGDRCEMEARGCSEGHCLVTPEIQRGDWGQQELLIITVAVAFIIISTVGLLFYCRRCKSHKPVAMEDPDLLARSVGVDTQAMPAIELNPLSASSCNNLNQPEPSKASVPNELVTFGPNSKQRPVVCSVPPRLPPAAVPSHSDNEPVIKRTWSSEEMVYPGGAMVWPPTYSRNERWEYPHSEVTQGPLPPSAHRHSTPVVMPEPNGLYGGFPFPLEMENKRAPLPPRYSNQNLEDLMPSRPPSPRERLVAPCLNEYTAISYYHSQFRQGGGGPCLADGGYKGVGMRLSRAGPSYAVCEVEGAPLAGQGQPRVPPNYEGSDMVESDYGSCEEVMF 2 4049 4071 IQRGDWGQQE LLIITVAVAFIIISTVGLLFYCR RCKSHKPVAM +Q8TDW7 MDIIMGHCVGTRPPACCLILLLFKLLATVSQGLPGTGPLGFHFTHSIYNATVYENSAARTYVNSQSRMGITLIDLSWDIKYRIVSGDEEGFFKAEEVIIADFCFLRIRTKGGNSAILNREIQDNYLLIVKGSVRGEDLEAWTKVNIQVLDMNDLRPLFSPTTYSVTIAESTPLRTSVAQVTATDADIGSNGEFYYYFKNKVDLFSVHPTSGVISLSGRLNYDEKNRYDLEILAVDRGMKLYGNNGVSSTAKLYVHIERINEHAPTIHVVTHVPFSLEKEPTYAVVTVDDLDDGANGEIESVSIVAGDPLDQFFLAKEGKWLNEYKIKERKQIDWESFPYGYNLTLQAKDKGSPQKCSALKAVYIGNPTRDTVPIRFEKEVYDVSISEFSPPGVVVAIVKLSPEPIDVEYKLSPGEDAVYFKINPRSGLIVTARPLNTVKKEVYKLEVTNKEGDLKAQVTISIEDANDHTPEFQQPLYDAYVNESVPVGTSVLTVSASDKDKGENGYITYSIASLNLLPFVINQFTGVISTTEELDFESSPEIYRFIVRASDWGSPYRHESEVNVTIRIGNVNDNSPLFEKVACQGVISYDFPVGGHITAVSAIDIDELELVKYKIISGNELGFFYLNPDSGVLQLKKSLTNSGIKNGNFALRITATDGENLADPMSINISVLHGKVSSKSFSCRETRVAQKLAEKLLIKAKANGKLNLEDGFLDFYSINRQGPYFDKSFPSDVAVKEDLPVGANILKIKAYDADSGFNGKVLFTISDGNTDSCFNIDMETGQLKVLMPMDREHTDLYLLNITIYDLGNPQKSSWRLLTINVEDANDNSPVFIQDSYSVNILESSGIGTEIIQVEARDKDLGSNGEVTYSVLTDTQQFAINSSTGIVYVADQLDRESKANYSLKIEARDKAESGQQLFSVVTLKVFLDDVNDCSPAFIPSSYSVKVLEDLPVGTVIAWLETHDPDLGLGGQVRYSLVNDYNGRFEIDKASGAIRLSKELDYEKQQFYNLTVRAKDKGRPVSLSSVSFVEVEVVDVNENLHTPYFPDFAVVGSVKENSRIGTSVLQVTARDEDSGRDGEIQYSIRDGSGLGRFSIDDESGVITAADILDRETMGSYWLTVYATDRGVVPLYSTIEVYIEVEDVNDNAPLTSEPIYYPVVMENSPKDVSVIQIQAEDPDSSSNEKLTYRITSGNPQNFFAINIKTGLITTTSRKLDREQQAEHFLEVTVTDGGPSPKQSTIWVVVQVLDENDNKPQFPEKVYQIKLPERDRKKRGEPIYRAFAFDRDEGPNAEISYSIVDGNDDGKFFIDPKTGMVSSRKQFTAGSYDILTIKAVDNGRPQKSSTARLHIEWIKKPPPSPIPLTFDEPFYNFTVMESDRVTEIVGVVSVQPANTPLWFDIVGGNFDSAFDAEKGVGTIVIAKPLDAEQRSIYNMSVEVTDGTNVAVTQVFIKVLDNNDNGPEFSQPNYDVTISEDVLPDTEILQIEATDRDEKHKLSYTVHSSIDSISMRKFRIDPSTGVLYTAERLDHEAQDKHILNIMVRDQEFPYRRNLARVIVNVEDANDHSPYFTNPLYEASVFESAALGSAVLQVTALDKDKGENAELIYTIEAGNTGNMFKIEPVLGIITICKEPDMTTMGQFVLSIKVTDQGSPPMSATAIVRISVTMSDNSHPKFIHKDYQAEVNENVDIGTSVILISAISQSTLIYEVKDGDINGIFTINPYSGVITTQKALDYERTSSYQLIIQATNMAGMASNATVNIQIVDENDNAPVFLFSQYSGSLSEAAPINSIVRSLDNSPLVIRATDADSNRNALLVYQIVESTAKKFFTVDSSTGAIRTIANLDHETIAHFHFHVHVRDSGSPQLTAESPVEVNIEVTDVNDNPPVFTQAVFETILLLPTYVGVEVLKVSATDPDSEVPPELTYSLMEGSLDHFLIDSNSGVLTIKNNNLSKDHYMLIVKVSDGKFYSTSMVTIMVKEAMDSGLHFTQSFYSTSISENNTNITKVAIVNAVGNRLNEPLKYSILNPGNKFKIKSTSGVIQTTGVPFDREEQELYELVVEASRELDHLRVARVVVRVNIEDINDNSPVFVGLPYYAAVQVDAEPGTLIYQVTAIDKDKGPNGEVTYVLQDDYGHFEINPNSGNVILKEAFNSDLSNIEYGVTILAKDGGKPSLSTSVELPITIVNKAMPVFDKPFYTASVNEDIRMNTPILSINATSPEGQGIIYIIIDGDPFKQFNIDFDTGVLKVVSPLDYEVTSAYKLTIRASDALTGARAEVTVDLLVNDVNDNPPIFDQPTYNTTLSEASLIGTPVLQVVSIDADSENNKMVHYQIVQDTYNSTDYFHIDSSSGLILTARMLDHELVQHCTLKVRSIDSGFPSLSSEVLVHIYISDVNDNPPVFNQLIYESYVSELAPRGHFVTCVQASDADSSDFDRLEYSILSGNDRTSFLMDSKSGVITLSNHRKQRMEPLYSLNVSVSDGLFTSTAQVHIRVLGANLYSPAFSQSTYVAEVRENVAAGTKVIHVRATDGDPGTYGQISYAIINDFAKDRFLIDSNGQVITTERLDRENPLEGDVSIFVRALDGGGRTTFCTVRVIVVDENDNAPQFMTVEYRASVRADVGRGHLVTQVQAIDPDDGANSRITYSLYSEASVSVADLLEIDPDNGWMVTKGNFNQLKNTVLSFFVKAVDGGIPVKHSLIPVYIHVLPPETFLPSFTQSQYSFTIAEDTAIGSTVDTLRILPSQNVWFSTVNGERPENNKGGIFVIEQETGTIKLDKRLDRETSPAFHFKVAATIPLDKVDIVFTVDVDIKVLDLNDNKPVFETSSYDTIIMEGMPVGTKLTQVRAIDMDWGANGQVTYSLHSDSQPEKVMEAFNIDSNTGWISTLKDLDHETDPTFTFSVVASDLGEAFSLSSTALVSVRVTDINDNAPVFAQEVYRGNVKESDPPGEVVAVLSTWDRDTSDVNRQVSYHITGGNPRGRFALGLVQSEWKVYVKRPLDREEQDIYFLNITATDGLFVTQAMVEVSVSDVNDNSPVCDQVAYTALLPEDIPSNKIILKVSAKDADIGSNGYIRYSLYGSGNSEFFLDPESGELKTLALLDRERIPVYSLMAKATDGGGRFCQSNIHLILEDVNDNPPVFSSDHYNTCVYENTATKALLTRVQAVDPDIGINRKVVYSLADSAGGVFSIDSSSGIIILEQPLDREQQSSYNISVRATDQSPGQSLSSLTTVTITVLDINDNPPVFERRDYLVTVPEDTSPGTQVLAVFATSKDIGTNAEITYLIRSGNEQGKFKINPKTGGISVSEVLDYELCKRFYLVVEAKDGGTPALSAVATVNINLTDVNDNPPKFSQDVYSAVISEDALVGDSVILLIAEDVDSQPNGQIHFSIVNGDRDNEFTVDPVLGLVKVKKKLDRERVSGYSLLVQAVDSGIPAMSSTATVNIDISDVNDNSPVFTPANYTAVIQENKPVGTSILQLVVTDRDSFHNGPPFSFSILSGNEEEEFVLDPHGILRSAVVFQHTESLEYVLCVQAKDSGKPQQVSHTYIRVRVIEESTHKPTAIPLEIFIVTMEDDFPGGVIGKIHATDQDMYDVLTFALKSEQKSLFKVNSHDGKIIALGGLDSGKYVLNVSVSDGRFQVPIDVVVHVEQLVHEMLQNTVTIRFENVSPEDFVGLHMHGFRRTLRNAVLTQKQDSLRIISIQPVAGTNQLDMLFAVEMHSSEFYKPAYLIQKLSNARRHLENIMRISAILEKNCSGLDCQEQHCEQGLSLDSHALMTYSTARISFVCPRFYRNVRCTCNGGLCPGSNDPCVEKPCPGDMQCVSYEASRRPFLCQCPPGKLGECSGHTSLSFAGNSYIKYRLSENSKEEDFKLALRLRTLQSNGIIMYTRANPCIILKIVDGKLWFQLDCGSGPGILGISGRAVNDGSWHSVFLELNRNFTSLSLDDSYVERRRAPLYFQTLSTESSIYFGALVQADNIRSLTDTRVTQVLSGFQGCLDSVILNNNELPLQNKRSSFAEVVGLTELKLGCVLYPDACKRSPCQHGGSCTGLPSGGYQCTCLSQFTGRNCESEITACFPNPCRNGGSCDPIGNTFICNCKAGLTGVTCEEDINECEREECENGGSCVNVFGSFLCNCTPGYVGQYCGLRPVVVPNIQAGHSYVGKEELIGIAVVLFVIFILVVLFIVFRKKVFRKNYSRNNITLVQDPATAALLNKSNGIPFRNLRGSGDGRNVYQEVGPPQVPVRPMAYTPCFQSDSRSNLDKIVDGLGGEHQEMTTFHPESPRILTARRGVVVCSVAPNLPAVSPCRSDCDSIRKNGWDAGTENKGVDDPGEVTCFAGSNKGSNSEVQSLSSFQSDSGDDNAYHWDTSDWMPGARLSDIEEVPNYENQDGGSAHQGSTRELESDYYLGGYDIDSEYPPPHEEEFLSQDQLPPPLPEDFPDQYEALPPSQPVSLASTLSPDCRRRPQFHPSQYLPPHPFPNETDLVGPPASCEFSTFAVSMNQGTEPTGPADSVSLSLHNSRGTSSSDVSANCGFDDSEVAMSDYESVGELSLASLHIPFVETQHQTQV 2 4156 4175 GHSYVGKEEL IGIAVVLFVIFILVVLFIVF RKKVFRKNYS +Q8WWV6 MPLFLILCLLQGSSFALPQKRPHPRWLWEGSLPSRTHLRAMGTLRPSSPLCWREESSFAAPNSLKGSRLVSGEPGGAVTIQCHYAPSSVNRHQRKYWCRLGPPRWICQTIVSTNQYTHHRYRDRVALTDFPQRGLFVVRLSQLSPDDIGCYLCGIGSENNMLFLSMNLTISAGPASTLPTATPAAGELTMRSYGTASPVANRWTPGTTQTLGQGTAWDTVASTPGTSKTTASAEGRRTPGATRPAAPGTGSWAEGSVKAPAPIPESPPSKSRSMSNTTEGVWEGTRSSVTNRARASKDRREMTTTKADRPREDIEGVRIALDAAKKVLGTIGPPALVSETLAWEILPQATPVSKQQSQGSIGETTPAAGMWTLGTPAADVWILGTPAADVWTSMEAASGEGSAAGDLDAATGDRGPQATLSQTPAVGPWGPPGKESSVKRTFPEDESSSRTLAPVSTMLALFMLMALVLLQRKLWRRRTSQEAERVTLIQMTHFLEVNPQADQLPHVERKMLQDDSLPAGASLTAPERNPGP 2 451 470 TFPEDESSSR TLAPVSTMLALFMLMALVLL QRKLWRRRTS +P12319 MAPAMESPTLLCVALLFFAPDGVLAVPQKPKVSLNPPWNRIFKGENVTLTCNGNNFFEVSSTKWFHNGSLSEETNSSLNIVNAKFEDSGEYKCQHQQVNESEPVYLEVFSDWLLLQASAEVVMEGQPLFLRCHGWRNWDVYKVIYYKDGEALKYWYENHNISITNATVEDSGTYYCTGKVWQLDYESEPLNITVIKAPREKYWLQFFIPLLVVILFAVDTGLFISTQQQVTFLLKIKRTRKGFRLLNPHPKPNPKNN 2 203 225 TVIKAPREKY WLQFFIPLLVVILFAVDTGLFIS TQQQVTFLLK +P12318 MTMETQMSQNVCPRNLWLLQPLTVLLLLASADSQAAAPPKAVLKLEPPWINVLQEDSVTLTCQGARSPESDSIQWFHNGNLIPTHTQPSYRFKANNNDSGEYTCQTGQTSLSDPVHLTVLSEWLVLQTPHLEFQEGETIMLRCHSWKDKPLVKVTFFQNGKSQKFSHLDPTFSIPQANHSHSGDYHCTGNIGYTLFSSKPVTITVQVPSMGSSSPMGIIVAVVIATAVAAIVAAVVALIYCRKKRISANSTDPVKAAQFEPPGRQMIAIRKRQLEETNNDYETADGGYMTLNPRAPTDDDKNIYLTLPPNDHVNSNN 2 218 240 PSMGSSSPMG IIVAVVIATAVAAIVAAVVALIY CRKKRISANS +P08637 MWQLLLPTALLLLVSAGMRTEDLPKAVVFLEPQWYRVLEKDSVTLKCQGAYSPEDNSTQWFHNESLISSQASSYFIDAATVDDSGEYRCQTNLSTLSDPVQLEVHIGWLLLQAPRWVFKEEDPIHLRCHSWKNTALHKVTYLQNGKGRKYFHHNSDFYIPKATLKDSGSYFCRGLFGSKNVSSETVNITITQGLAVSTISSFFPPGYQVSFCLVMVLLFAVDTGLYFSVKTNIRSSTRDWKDHKFKWRKDPQDK 2 207 229 STISSFFPPG YQVSFCLVMVLLFAVDTGLYFSV KTNIRSSTRD +P12314 MWFLTTLLLWVPVDGQVDTTKAVITLQPPWVSVFQEETVTLHCEVLHLPGSSSTQWFLNGTATQTSTPSYRITSASVNDSGEYRCQRGLSGRSDPIQLEIHRGWLLLQVSSRVFTEGEPLALRCHAWKDKLVYNVLYYRNGKAFKFFHWNSNLTILKTNISHNGTYHCSGMGKHRYTSAGISVTVKELFPAPVLNASVTSPLLEGNLVTLSCETKLLLQRPGLQLYFSFYMGSKTLRGRNTSSEYQILTARREDSGLYWCEAATEDGNVLKRSPELELQVLGLQLPTPVWFHVLFYLAVGIMFLVNTVLWVTIRKELKRKKKWDLEISLDSGHEKKVISSLQEDRHLEEELKCQEQKEEQLQEGVHRKEPQGAT 2 289 311 QVLGLQLPTP VWFHVLFYLAVGIMFLVNTVLWV TIRKELKRKK +P55899 MGVPRPQPWALGLLLFLLPGSLGAESHLSLLYHLTAVSSPAPGTPAFWVSGWLGPQQYLSYNSLRGEAEPCGAWVWENQVSWYWEKETTDLRIKEKLFLEAFKALGGKGPYTLQGLLGCELGPDNTSVPTAKFALNGEEFMNFDLKQGTWGGDWPEALAISQRWQQQDKAANKELTFLLFSCPHRLREHLERGRGNLEWKEPPSMRLKARPSSPGFSVLTCSAFSFYPPELQLRFLRNGLAAGTGQGDFGPNSDGSFHASSSLTVKSGDEHHYCCIVQHAGLAQPLRVELESPAKSSVLVVGIVIGVLLLTAAAVGGALLWRRMRSGLPAPWISLRGDDTGVLLPTPGEAQDADLKDVNVIPATA 2 298 320 VELESPAKSS VLVVGIVIGVLLLTAAAVGGALL WRRMRSGLPA +Q96LA6 MLPRLLLLICAPLCEPAELFLIASPSHPTEGSPVTLTCKMPFLQSSDAQFQFCFFRDTRALGPGWSSSPKLQIAAMWKEDTGSYWCEAQTMASKVLRSRRSQINVHRVPVADVSLETQPPGGQVMEGDRLVLICSVAMGTGDITFLWYKGAVGLNLQSKTQRSLTAEYEIPSVRESDAEQYYCVAENGYGPSPSGLVSITVRIPVSRPILMLRAPRAQAAVEDVLELHCEALRGSPPILYWFYHEDITLGSRSAPSGGGASFNLSLTEEHSGNYSCEANNGLGAQRSEAVTLNFTVPTGARSNHLTSGVIEGLLSTLGPATVALLFCYGLKRKIGRRSARDPLRSLPSPLPQEFTYLNSPTPGQLQPIYENVNVVSGDEVYSLAYYNQPEQESVAAETLGTHMEDKVSLDIYSRLRKANITDVDYEDAM 2 308 330 TGARSNHLTS GVIEGLLSTLGPATVALLFCYGL KRKIGRRSAR +Q96LA5 MLLWSLLVIFDAVTEQADSLTLVAPSSVFEGDSIVLKCQGEQNWKIQKMAYHKDNKELSVFKKFSDFLIQSAVLSDSGNYFCSTKGQLFLWDKTSNIVKIKVQELFQRPVLTASSFQPIEGGPVSLKCETRLSPQRLDVQLQFCFFRENQVLGSGWSSSPELQISAVWSEDTGSYWCKAETVTHRIRKQSLQSQIHVQRIPISNVSLEIRAPGGQVTEGQKLILLCSVAGGTGNVTFSWYREATGTSMGKKTQRSLSAELEIPAVKESDAGKYYCRADNGHVPIQSKVVNIPVRIPVSRPVLTLRSPGAQAAVGDLLELHCEALRGSPPILYQFYHEDVTLGNSSAPSGGGASFNLSLTAEHSGNYSCEANNGLGAQCSEAVPVSISGPDGYRRDLMTAGVLWGLFGVLGFTGVALLLYALFHKISGESSATNEPRGASRPNPQEFTYSSPTPDMEELQPVYVNVGSVDVDVVYSQVWSMQQPESSANIRTLLENKDSQVIYSSVKKS 2 400 422 DGYRRDLMTA GVLWGLFGVLGFTGVALLLYALF HKISGESSAT +Q96P31 MLLWLLLLILTPGREQSGVAPKAVLLLNPPWSTAFKGEKVALICSSISHSLAQGDTYWYHDEKLLKIKHDKIQITEPGNYQCKTRGSSLSDAVHVEFSPDWLILQALHPVFEGDNVILRCQGKDNKNTHQKVYYKDGKQLPNSYNLEKITVNSVSRDNSKYHCTAYRKFYILDIEVTSKPLNIQVQELFLHPVLRASSSTPIEGSPMTLTCETQLSPQRPDVQLQFSLFRDSQTLGLGWSRSPRLQIPAMWTEDSGSYWCEVETVTHSIKKRSLRSQIRVQRVPVSNVNLEIRPTGGQLIEGENMVLICSVAQGSGTVTFSWHKEGRVRSLGRKTQRSLLAELHVLTVKESDAGRYYCAADNVHSPILSTWIRVTVRIPVSHPVLTFRAPRAHTVVGDLLELHCESLRGSPPILYRFYHEDVTLGNSSAPSGGGASFNLSLTAEHSGNYSCDADNGLGAQHSHGVSLRVTVPVSRPVLTLRAPGAQAVVGDLLELHCESLRGSFPILYWFYHEDDTLGNISAHSGGGASFNLSLTTEHSGNYSCEADNGLGAQHSKVVTLNVTGTSRNRTGLTAAGITGLVLSILVLAAAAALLHYARARRKPGGLSATGTSSHSPSECQEPSSSRPSRIDPQEPTHSKPLAPMELEPMYSNVNPGDSNPIYSQIWSIQHTKENSANCPMMHQEHEELTVLYSELKKTHPDDSAGEASSRGRAHEEDDEENYENVPRVLLASDH 2 572 594 VTGTSRNRTG LTAAGITGLVLSILVLAAAAALL HYARARRKPG +Q68SN8 MSGSFSPCVVFTQMWLTLLVVTPVNGQHEAAQQSVVSLQPPWTTFFRGEVVTLTCYRFGFSVPQKTKWYQKRKTVKQTPGALVIKAHTLKVHESGEYWCQADSLLPSMHVNVEFSEDFLVLQAPPAVFEGDSVVLRCYAKKGIEAETLTFYKDGKALTLHPQSSEFYIHRANLKDNGQYKCTSKKKWSFGSLYTSNTVVVQVQELFPRPVLRARPSHPIDGSPVTLTCQTQLSAQKSDARLQFCFFRNLQLLGSGCSRSSEFHIPAIWTEESKRYQCKAETVNSQVSKQSTAFIIPVQRASARFQTHIIPASKLVFEGQLLLLNCSVKGVPGPLKFSWYKKDMLNKETKILKSSNAEFKISQVNISDAGEYYCEANNSRRSFVSRAFPITIKVPVSQPVLTLSTGKTQALEGDLMTLHCQSQRGSPCILYEFFYENVSLGNSSILSGGGAYFNFSMSTERSGNYYCTADNGLGAQCSEAIRISIFDMTKNRSVPMAAGITVGLLIMAVGVFLFYCWFSRKAGGKPTSDDSRNPSDSEPQEPTYYNVPACIELQPVYSNEPEENVIYTEVRRTQPRQKHADQESESPRSRCQMAEKK 2 495 517 FDMTKNRSVP MAAGITVGLLIMAVGVFLFYCWF SRKAGGKPTS +Q6DN72 MLLWTAVLLFVPCVGKTVWLYLQAWPNPVFEGDALTLRCQGWKNTPLSQVKFYRDGKFLHFSKENQTLSMGAATVQSRGQYSCSGQVMYIPQTFTQTSETAMVQVQELFPPPVLSAIPSPEPREGSLVTLRCQTKLHPLRSALRLLFSFHKDGHTLQDRGPHPELCIPGAKEGDSGLYWCEVAPEGGQVQKQSPQLEVRVQAPVSRPVLTLHHGPADPAVGDMVQLLCEAQRGSPPILYSFYLDEKIVGNHSAPCGGTTSLLFPVKSEQDAGNYSCEAENSVSRERSEPKKLSLKGSQVLFTPASNWLVPWLPASLLGLMVIAAALLVYVRSWRKAGPLPSQIPPTAPGGEQCPLYANVHHQKGKDEGVVYSVVHRTSKRSEARSAEFTVGRKDSSIICAEVRCLQPSEVSSTEVNMRSRTLQEPLSDCEEVLC 2 307 329 SQVLFTPASN WLVPWLPASLLGLMVIAAALLVY VRSWRKAGPL +P11362 MWSWKCLLFWAVLVTATLCTARPSPTLPEQAQPWGAPVEVESFLVHPGDLLQLRCRLRDDVQSINWLRDGVQLAESNRTRITGEEVEVQDSVPADSGLYACVTSSPSGSDTTYFSVNVSDALPSSEDDDDDDDSSSEEKETDNTKPNRMPVAPYWTSPEKMEKKLHAVPAAKTVKFKCPSSGTPNPTLRWLKNGKEFKPDHRIGGYKVRYATWSIIMDSVVPSDKGNYTCIVENEYGSINHTYQLDVVERSPHRPILQAGLPANKTVALGSNVEFMCKVYSDPQPHIQWLKHIEVNGSKIGPDNLPYVQILKTAGVNTTDKEMEVLHLRNVSFEDAGEYTCLAGNSIGLSHHSAWLTVLEALEERPAVMTSPLYLEIIIYCTGAFLISCMVGSVIVYKMKSGTKKSDFHSQMAVHKLAKSIPLRRQVTVSADSSASMNSGVLLVRPSRLSSSGTPMLAGVSEYELPEDPRWELPRDRLVLGKPLGEGCFGQVVLAEAIGLDKDKPNRVTKVAVKMLKSDATEKDLSDLISEMEMMKMIGKHKNIINLLGACTQDGPLYVIVEYASKGNLREYLQARRPPGLEYCYNPSHNPEEQLSSKDLVSCAYQVARGMEYLASKKCIHRDLAARNVLVTEDNVMKIADFGLARDIHHIDYYKKTTNGRLPVKWMAPEALFDRIYTHQSDVWSFGVLLWEIFTLGGSPYPGVPVEELFKLLKEGHRMDKPSNCTNELYMMMRDCWHAVPSQRPTFKQLVEDLDRIVALTSNQEYLDLSMPLDQYSPSFPDTRSSTCSSGEDSVFSHEPLPEEPCLPRHPAQLANGGLKRR 2 375 397 RPAVMTSPLY LEIIIYCTGAFLISCMVGSVIVY KMKSGTKKSD +Q8N441 MTPSPLLLLLLPPLLLGAFPPAAAARGPPKMADKVVPRQVARLGRTVRLQCPVEGDPPPLTMWTKDGRTIHSGWSRFRVLPQGLKVKQVEREDAGVYVCKATNGFGSLSVNYTLVVLDDISPGKESLGPDSSSGGQEDPASQQWARPRFTQPSKMRRRVIARPVGSSVRLKCVASGHPRPDITWMKDDQALTRPEAAEPRKKKWTLSLKNLRPEDSGKYTCRVSNRAGAINATYKVDVIQRTRSKPVLTGTHPVNTTVDFGGTTSFQCKVRSDVKPVIQWLKRVEYGAEGRHNSTIDVGGQKFVVLPTGDVWSRPDGSYLNKLLITRARQDDAGMYICLGANTMGYSFRSAFLTVLPDPKPPGPPVASSSSATSLPWPVVIGIPAGAVFILGTLLLWLCQAQKKPCTPAPAPPLPGHRPPGTARDRSGDKDLPSLAALSAGPGVGLCEEHGSPAAPQHLLGPGPVAGPKLYPKLYTDIHTHTHTHSHTHSHVEGKVHQHIHYQC 2 377 399 ASSSSATSLP WPVVIGIPAGAVFILGTLLLWLC QAQKKPCTPA +P36888 MPALARDGGQLPLLVVFSAMIFGTITNQDLPVIKCVLINHKNNDSSVGKSSSYPMVSESPEDLGCALRPQSSGTVYEAAAVEVDVSASITLQVLVDAPGNISCLWVFKHSSLNCQPHFDLQNRGVVSMVILKMTETQAGEYLLFIQSEATNYTILFTVSIRNTLLYTLRRPYFRKMENQDALVCISESVPEPIVEWVLCDSQGESCKEESPAVVKKEEKVLHELFGTDIRCCARNELGRECTRLFTIDLNQTPQTTLPQLFLKVGEPLWIRCKAVHVNHGFGLTWELENKALEEGNYFEMSTYSTNRTMIRILFAFVSSVARNDTGYYTCSSSKHPSQSALVTIVEKGFINATNSSEDYEIDQYEEFCFSVRFKAYPQIRCTWTFSRKSFPCEQKGLDNGYSISKFCNHKHQPGEYIFHAENDDAQFTKMFTLNIRRKPQVLAEASASQASCFSDGYPLPSWTWKKCSDKSPNCTEEITEGVWNRKANRKVFGQWVSSSTLNMSEAIKGFLVKCCAYNSLGTSCETILLNSPGPFPFIQDNISFYATIGVCLLFIVVLTLLICHKYKKQFRYESQLQMVQVTGSSDNEYFYVDFREYEYDLKWEFPRENLEFGKVLGSGAFGKVMNATAYGISKTGVSIQVAVKMLKEKADSSEREALMSELKMMTQLGSHENIVNLLGACTLSGPIYLIFEYCCYGDLLNYLRSKREKFHRTWTEIFKEHNFSFYPTFQSHPNSSMPGSREVQIHPDSDQISGLHGNSFHSEDEIEYENQKRLEEEEDLNVLTFEDLLCFAYQVAKGMEFLEFKSCVHRDLAARNVLVTHGKVVKICDFGLARDIMSDSNYVVRGNARLPVKWMAPESLFEGIYTIKSDVWSYGILLWEIFSLGVNPYPGIPVDANFYKLIQNGFKMDQPFYATEEIYIIMQSCWAFDSRKRPSFPNLTSFLGCQLADAEEAMYQNVDGRVSECPHTYQNRRPFSREMDLGLLSPQAQVEDS 2 542 564 PGPFPFIQDN ISFYATIGVCLLFIVVLTLLICH KYKKQFRYES +F2Z333 MRAPPLLLLLAACAPPPCAAAAPTPPGWEPTPDAPWCPYKVLPEGPEAGGGRLCFRSPARGFRCQAPGCVLHAPAGRSLRASVLRNRSVLLQWRLAPAAARRVRAFALNCSWRGAYTRFPCERVLLGASCRDYLLPDVHDSVLYRLCLQPLPLRAGPAAAAPETPEPAECVEFTAEPAGMQDIVVAMTAVGGSICVMLVVICLLVAYITENLMRPALARPGLRRHP 2 183 205 FTAEPAGMQD IVVAMTAVGGSICVMLVVICLLV AYITENLMRP +Q8NAU1 MHPGSPSAWPPRARAALRLWLGCVCFALVQADSPSAPVNVTVRHLKANSAVVSWDVLEDEVVIGFAISQQKKDVRMLRFIQEVNTTTRSCALWDLEEDTEYIVHVQAISIQGQSPASEPVLFKTPREAEKMASKNKDEVTMKEMGRNQQLRTGEVLIIVVVLFMWAGVIALFCRQYDIIKDNEPNNNKEKTKSASETSTPEHQGGGLLRSKI 2 150 172 TMKEMGRNQQ LRTGEVLIIVVVLFMWAGVIALF CRQYDIIKDN +Q9P2B2 MGRLASRPLLLALLSLALCRGRVVRVPTATLVRVVGTELVIPCNVSDYDGPSEQNFDWSFSSLGSSFVELASTWEVGFPAQLYQERLQRGEILLRRTANDAVELHIKNVQPSDQGHYKCSTPSTDATVQGNYEDTVQVKVLADSLHVGPSARPPPSLSLREGEPFELRCTAASASPLHTHLALLWEVHRGPARRSVLALTHEGRFHPGLGYEQRYHSGDVRLDTVGSDAYRLSVSRALSADQGSYRCIVSEWIAEQGNWQEIQEKAVEVATVVIQPSVLRAAVPKNVSVAEGKELDLTCNITTDRADDVRPEVTWSFSRMPDSTLPGSRVLARLDRDSLVHSSPHVALSHVDARSYHLLVRDVSKENSGYYYCHVSLWAPGHNRSWHKVAEAVSSPAGVGVTWLEPDYQVYLNASKVPGFADDPTELACRVVDTKSGEANVRFTVSWYYRMNRRSDNVVTSELLAVMDGDWTLKYGERSKQRAQDGDFIFSKEHTDTFNFRIQRTTEEDRGNYYCVVSAWTKQRNNSWVKSKDVFSKPVNIFWALEDSVLVVKARQPKPFFAAGNTFEMTCKVSSKNIKSPRYSVLIMAEKPVGDLSSPNETKYIISLDQDSVVKLENWTDASRVDGVVLEKVQEDEFRYRMYQTQVSDAGLYRCMVTAWSPVRGSLWREAATSLSNPIEIDFQTSGPIFNASVHSDTPSVIRGDLIKLFCIITVEGAALDPDDMAFDVSWFAVHSFGLDKAPVLLSSLDRKGIVTTSRRDWKSDLSLERVSVLEFLLQVHGSEDQDFGNYYCSVTPWVKSPTGSWQKEAEIHSKPVFITVKMDVLNAFKYPLLIGVGLSTVIGLLSCLIGYCSSHWCCKKEVQETRRERRRLMSMEMD 2 831 853 VKMDVLNAFK YPLLIGVGLSTVIGLLSCLIGYC SSHWCCKKEV +Q5SZK8 MHSAGTPGLSSRRTGNSTSFQPGPPPPPRLLLLLLLLLSLVSRVPAQPAAFGRALLSPGLAGAAGVPAEEAIVLANRGLRVPFGREVWLDPLHDLVLQVQPGDRCAVSVLDNDALAQRPGRLSPKRFPCDFGPGEVRYSHLGARSPSRDRVRLQLRYDAPGGAVVLPLVLEVEVVFTQLEVVTRNLPLVVEELLGTSNALDARSLEFAFQPETEECRVGILSGLGALPRYGELLHYPQVPGGAREGGAPETLLMDCKAFQELGVRYRHTAASRSPNRDWIPMVVELRSRGAPVGSPALKREHFQVLVRIRGGAENTAPKPSFVAMMMMEVDQFVLTALTPDMLAAEDAESPSDLLIFNLTSPFQPGQGYLVSTDDRSLPLSSFTQRDLRLLKIAYQPPSEDSDQERLFELELEVVDLEGAASDPFAFMVVVKPMNTMAPVVTRNTGLILYEGQSRPLTGPAGSGPQNLVISDEDDLEAVRLEVVAGLRHGHLVILGASSGSSAPKSFTVAELAAGQVVYQHDDRDGSLSDNLVLRMVDGGGRHQVQFLFPITLVPVDDQPPVLNANTGLTLAEGETVPILPLSLSATDMDSDDSLLLFVLESPFLTTGHLLLRQTHPPHEKQELLRGLWRKEGAFYERTVTEWQQQDITEGRLFYRHSGPHSPGPVTDQFTFRVQDNHDPPNQSGLQRFVIRIHPVDRLPPELGSGCPLRMVVQESQLTPLRKKWLRYTDLDTDDRELRYTVTQSPTDTDENHLPAPLGTLVLTDNPSVVVTHFTQAQINHHKIAYRPPGQELGVATRVAQFQFQVEDRAGNVAPGTFTLYLHPVDNQPPEILNTGFTIQEKGHHILSETELHVNDVDTDVAHISFTLTQAPKHGHMRVSGQILHVGGLFHLEDIKQGRVSYAHNGDKSLTDSCSLEVSDRHHVVPITLRVNVRPVDDEVPILSHPTGTLESYLDVLENGATEITANVIKGTNEETDDLMLTFLLEDPPLYGEILVNGIPAEQFTQRDILEGSVVYTHTSGEIGLLPKADSFNLSLSDMSQEWRIGGNTIQGVTIWVTILPVDSQAPEIFVGEQLIVMEGDKSVITSVHISAEDVDSLNDDILCTIVIQPTSGYVENISPAPGSEKSRAGIAISAFNLKDLRQGHINYVQSVHKGVEPVEDRFVFRCSDGINFSERQFFPIVIIPTNDEQPEMFMREFMVMEGMSLVIDTPILNAADADVPLDDLTFTITQFPTHGHIMNQLINGTVLVESFTLDQIIESSSIIYEHDDSETQEDSFVIKLTDGKHSVEKTVLIIVIPVDDETPRMTINNGLEIEIGDTKIINNKILMATDLDSEDKSLVYIIRYGPGHGLLQRRKPTGAFENITLGMNFTQDEVDRNLIQYVHLGQEGIRDLIKFDVTDGINPLIDRYFYVSIGSIDIVFPDVISKGVSLKEGGKVTLTTDLLSTSDLNSPDENLVFTITRAPMRGHLECTDQPGVSITSFTQLQLAGNKIYYIHTADDEVKMDSFEFQVTDGRNPVFRTFRISISDVDNKKPVVTIHKLVVSESENKLITPFELTVEDRDTPDKLLKFTITQVPIHGHLLFNNTRPVMVFTKQDLNENLISYKHDGTESSEDSFSFTVTDGTHTDFYVFPDTVFETRRPQVMKIQVLAVDNSVPQIAVNKGASTLRTLATGHLGFMITSKILKVEDRDSLHISLRFIVTEAPQHGYLLNLDKGNHSITQFTQADIDDMKICYVLREGANATSDMFYFAVEDGGGNKLTYQNFRLNWAWISFEKEYYLVNEDSKFLDVVLKRRGYLGETSFISIGTRDRTAEKDKDFKGKAQKQVQFNPGQTRATWRVRILSDGEHEQSETFQVVLSEPVLAALEFPTVATVEIVDPGDEPTVFIPQSKYSVEEDVGELFIPIRRSGDVSQELMVVCYTQQGTATGTVPTSVLSYSDYISRPEDHTSVVRFDKDEREKLCRIVIIDDSLYEEEETFHVLLSMPMGGRIGSEFPGAQVTIVPDKDDEPIFYFGDVEYSVDESAGYVEVQVWRTGTDLSKSSSVTVRSRKTDPPSADAGTDYVGISRNLDFAPGVNMQPVRVVILDDLGQPALEGIEKFELVLRMPMNAALGEPSKATVSINDSVSDLPKMQFKERIYTGSESDGQIVTMIHRTGDVQYRSSVRCYTRQGSAQVMMDFEERPNTDTSIITFLPGETEKPCILELMDDVLYEEVEELRLVLGTPQSNSPFGAAVGEQNETLIRIRDDADKTVIKFGETKFSVTEPKEPGESVVIRIPVIRQGDTSKVSIVRVHTKDGSATSGEDYHPVSEEIEFKEGETQHVVEIEVTFDGVREMREAFTVHLKPDENMIAEMQLTKAIVYIEEMSSMADVTFPSVPQIVSLLMYDDTSKAKESAEPMSGYPVICITACNPKYSDYDKTGSICASENINDTLTRYRWLISAPAGPDGVTSPMREVDFDTFFTSSKMVTLDSIYFQPGSRVQCAARAVNTNGDEGLELMSPIVTISREEGLCQPRVPGVVGAEPFSAKLRYTGPEDADYTNLIKLTVTMPHIDGMLPVISTRELSNFELTLSPDGTRVGNHKCSNLLDYTEVKTHYGFLTDATKNPEIIGETYPYQYSLSIRGSTTLRFYRNLNLEACLWEFVSYYDMSELLADCGGTIGTDGQVLNLVQSYVTLRVPLYVSYVFHSPVGVGGWQHFDLKSELRLTFVYDTAILWNDGIGSPPEAELQGSLYPTSMRIGDEGRLAVHFKTEAQFHGLFVLSHPASFTSSVIMSADHPGLTFSLRLIRSEPTYNQPVQQWSFVSDFAVRDYSGTYTVKLVPCTAPSHQEYRLPVTCNPREPVTFDLDIRFQQVSDPVAAEFSLNTQMYLLSKKSLWLSDGSMGFGQESDVAFAEGDIIYGRVMVDPVQNLGDSFYCSIEKVFLCTGADGYVPKYSPMNAEYGCLADSPSLLYRFKIVDKAQPETQATSFGNVLFNAKLAVDDPEAILLVNQPGSDGFKVDSTPLFQVALGREWYIHTIYTVRSKDNANRGIGKRSVEYHSLVSQGKPQSTTKSRKKREIRSTPSLAWEIGAENSRGTNIQHIALDRTKRQIPHGRAPPDGILPWELNSPSSAVSLVTVVGGTTVGLLTICLTVIAVLMCRGKESFRGKDAPKGSSSSEPMVPPQSHHNDSSEV 2 3111 3133 ELNSPSSAVS LVTVVGGTTVGLLTICLTVIAVL MCRGKESFRG +P09958 MELRPWLLWVVAATGTLVLLAADAQGQKVFTNTWAVRIPGGPAVANSVARKHGFLNLGQIFGDYYHFWHRGVTKRSLSPHRPRHSRLQREPQVQWLEQQVAKRRTKRDVYQEPTDPKFPQQWYLSGVTQRDLNVKAAWAQGYTGHGIVVSILDDGIEKNHPDLAGNYDPGASFDVNDQDPDPQPRYTQMNDNRHGTRCAGEVAAVANNGVCGVGVAYNARIGGVRMLDGEVTDAVEARSLGLNPNHIHIYSASWGPEDDGKTVDGPARLAEEAFFRGVSQGRGGLGSIFVWASGNGGREHDSCNCDGYTNSIYTLSISSATQFGNVPWYSEACSSTLATTYSSGNQNEKQIVTTDLRQKCTESHTGTSASAPLAAGIIALTLEANKNLTWRDMQHLVVQTSKPAHLNANDWATNGVGRKVSHSYGYGLLDAGAMVALAQNWTTVAPQRKCIIDILTEPKDIGKRLEVRKTVTACLGEPNHITRLEHAQARLTLSYNRRGDLAIHLVSPMGTRSTLLAARPHDYSADGFNDWAFMTTHSWDEDPSGEWVLEIENTSEANNYGTLTKFTLVLYGTAPEGLPVPPESSGCKTLTSSQACVVCEEGFSLHQKSCVQHCPPGFAPQVLDTHYSTENDVETIRASVCAPCHASCATCQGPALTDCLSCPSHASLDPVEQTCSRQSQSSRESPPQQQPPRLPPEVEAGQRLRAGLLPSHLPEVVAGLSCAFIVLVFVTVFLVLQLRSGFSFRGVKVYTMDRGLISYKGLPPEAWQEECPSDSEEDEGRGERTAFIKDQSAL 2 716 738 AGLLPSHLPE VVAGLSCAFIVLVFVTVFLVLQL RSGFSFRGVK +P23188 MELRSWLLWVVAAAGAVVLLAADAQGQKIFTNTWAVHIPGGPAVADRVAQKHGFHNLGQIFGDYYHFWHRAVTKRSLSPHRPRHSRLQREPQVKWLEQQVAKRRAKRDVYQEPTDPKFPQQWYLSGVTQRDLNVKEAWAQGFTGHGIVVSILDDGIEKNHPDLAGNYDPGASFDVNDQDPDPQPRYTQMNDNRHGTRCAGEVAAVANNGVCGVGVAYNARIGGVRMLDGEVTDAVEARSLGLNPNHIHIYSASWGPEDDGKTVDGPARLAEEAFFRGVSQGRGGLGSIFVWASGNGGREHDSCNCDGYTNSIYTLSISSATQFGNVPWYSEACSSTLATTYSSGNQNEKQIVTTDLRQKCTESHTGTSASAPLAAGIIALTLEANKNLTWRDMQHLVVQTSKPAHLNADDWATNGVGRKVSHSYGYGLLDAGAMVALAQNWTTVAPQRKCIVEILVEPKDIGKRLEVRKAVTACLGEPNHITRLEHVQARLTLSYNRRGDLAIHLISPMGTRSTLLAARPHDYSADGFNDWAFMTTHSWDEDPAGEWVLEIENTSEANNYGTLTKFTLVLYGTAPEGLSTPPESSGCKTLTSSQACVVCEEGYSLHQKSCVQHCPPGFIPQVLDTHYSTENDVEIIRASVCTPCHASCATCQGPAPTDCLSCPSHASLDPVEQTCSRQSQSSRESRPQQQPPALRPEVEMEPRLQAGLASHLPEVLAGLSCLIIVLIFGIVFLFLHRCSGFSFRGVKVYTMDRGLISYKGLPPEAWQEECPSDSEEDEGRGERTAFIKDQSAL 2 713 735 RLQAGLASHL PEVLAGLSCLIIVLIFGIVFLFL HRCSGFSFRG +O95866 MAVFLQLLPLLLSRAQGNPGASLDGRPGDRVNLSCGGVSHPIRWVWAPSFPACKGLSKGRRPILWASSSGTPTVPPLQPFVGRLRSLDSGIRRLELLLSAGDSGTFFCKGRHEDESRTVLHVLGDRTYCKAPGPTHGSVYPQLLIPLLGAGLVLGLGALGLVWWLHRRLPPQPIRPLPRFAPLVKTEPQRPVKEEEPKIPGDLDQEPSLLYADLDHLALSRPRRLSTADPADASTIYAVVV 2 143 165 GPTHGSVYPQ LLIPLLGAGLVLGLGALGLVWWL HRRLPPQPIR +D7PDD4 MALVLPLLPLLLSKVQGNPEVSLEGSPGDRVNLSCIGVSDPTRWAWAPSFPACKGLSKGRRPILWASTRGTPTVLQHFSGRLRSLDNGIKRLELLLSAGDSGTFFCKGRHENESRTVLQVLGDKAGCRPAGSTHGYEYPKVLIPLLGVGLVLGLGVAGVVWRRRRLSPPPPPPPPPGPLPTFAPVINAEPQRPLEQESKISGHLDQEPSLHYADLDHSVLGRHRRMSTVVSGDASTVYAVVV 2 141 160 GSTHGYEYPK VLIPLLGVGLVLGLGVAGVV WRRRRLSPPP +Q9NU53 MEGAPPGSLALRLLLFVALPASGWLTTGAPEPPPLSGAPQDGIRINVTTLKDDGDISKQQVVLNITYESGQVYVNDLPVNSGVTRISCQTLIVKNENLENLEEKEYFGIVSVRILVHEWPMTSGSSLQLIVIQEEVVEIDGKQVQQKDVTEIDILVKNRGVLRHSNYTLPLEESMLYSISRDSDILFTLPNLSKKESVSSLQTTSQYLIRNVETTVDEDVLPGKLPETPLRAEPPSSYKVMCQWMEKFRKDLCRFWSNVFPVFFQFLNIMVVGITGAAVVITILKVFFPVSEYKGILQLDKVDVIPVTAINLYPDGPEKRAENLEDKTCI 2 262 284 LCRFWSNVFP VFFQFLNIMVVGITGAAVVITIL KVFFPVSEYK +Q8WWB7 MRGSVECTWGWGHCAPSPLLLWTLLLFAAPFGLLGEKTRQVSLEVIPNWLGPLQNLLHIRAVGTNSTLHYVWSSLGPLAVVMVATNTPHSTLSVNWSLLLSPEPDGGLMVLPKDSIQFSSALVFTRLLEFDSTNVSDTAAKPLGRPYPPYSLADFSWNNITDSLDPATLSATFQGHPMNDPTRTFANGSLAFRVQAFSRSSRPAQPPRLLHTADTCQLEVALIGASPRGNRSLFGLEVATLGQGPDCPSMQEQHSIDDEYAPAVFQLDQLLWGSLPSGFAQWRPVAYSQKPGGRESALPCQASPLHPALAYSLPQSPIVRAFFGSQNNFCAFNLTFGASTGPGYWDQHYLSWSMLLGVGFPPVDGLSPLVLGIMAVALGAPGLMLLGGGLVLLLHHKKYSEYQSIN 2 372 394 PVDGLSPLVL GIMAVALGAPGLMLLGGGLVLLL HHKKYSEYQS +P02724 MYGKIIFVLLLSEIVSISASSTTGVAMHTSTSSSVTKSYISSQTNDTHKRDTYAATPRAHEVSEISVRTVYPPEEETGERVQLAHHFSEPEITLIIFGVMAGVIGTILLISYGIRRLIKKSPSDVKPLPSPDTDVPLSSVEIENPETSDQ 2 92 114 QLAHHFSEPE ITLIIFGVMAGVIGTILLISYGI RRLIKKSPSD +Q86XS8 MSCAGRAGPARLAALALLTCSLWPARADNASQEYYTALINVTVQEPGRGAPLTFRIDRGRYGLDSPKAEVRGQVLAPLPLHGVADHLGCDPQTRFFVPPNIKQWIALLQRGNCTFKEKISRAAFHNAVAVVIYNNKSKEEPVTMTHPGTGDIIAVMITELRGKDILSYLEKNISVQMTIAVGTRMPPKNFSRGSLVFVSISFIVLMIISSAWLIFYFIQKIRYTNARDRNQRRLGDAAKKAISKLTTRTVKKGDKETDPDFDHCAVCIESYKQNDVVRILPCKHVFHKSCVDPWLSEHCTCPMCKLNILKALGIVPNLPCTDNVAFDMERLTRTQAVNRRSALGDLAGDNSLGLEPLRTSGISPLPQDGELTPRTGEINIAVTKEWFIIASFGLLSALTLCYMIIRATASLNANEVEWF 2 195 217 MPPKNFSRGS LVFVSISFIVLMIISSAWLIFYF IQKIRYTNAR +P07359 MPLLLLLLLLPSPLHPHPICEVSKVASHLEVNCDKRNLTALPPDLPKDTTILHLSENLLYTFSLATLMPYTRLTQLNLDRCELTKLQVDGTLPVLGTLDLSHNQLQSLPLLGQTLPALTVLDVSFNRLTSLPLGALRGLGELQELYLKGNELKTLPPGLLTPTPKLEKLSLANNNLTELPAGLLNGLENLDTLLLQENSLYTIPKGFFGSHLLPFAFLHGNPWLCNCEILYFRRWLQDNAENVYVWKQGVDVKAMTSNVASVQCDNSDKFPVYKYPGKGCPTLGDEGDTDLYDYYPEEDTEGDKVRATRTVVKFPTKAHTTPWGLFYSWSTASLDSQMPSSLHPTQESTKEQTTFPPRWTPNFTLHMESITFSKTPKSTTEPTPSPTTSEPVPEPAPNMTTLEPTPSPTTPEPTSEPAPSPTTPEPTSEPAPSPTTPEPTSEPAPSPTTPEPTPIPTIATSPTILVSATSLITPKSTFLTTTKPVSLLESTKKTIPELDQPPKLRGVLQGHLESSRNDPFLHPDFCCLLPLGFYVLGLFWLLFASVVLILLLSWVGHVKPQALDSGQGAALTTATQTTHLELQRGRQVTVPRAWLLFLRGSLPTFRSSLFLWVRPNGRVGPLVAGRRPSALSQGRGQDLLSTVSIRYSGHSL 2 533 555 PDFCCLLPLG FYVLGLFWLLFASVVLILLLSWV GHVKPQALDS +Q99795 MVGKMWPVLWTLCAVRVTVDAISVETPQDVLRASQGKSVTLPCTYHTSTSSREGLIQWDKLLLTHTERVVIWPFSNKNYIHGELYKNRVSISNNAEQSDASITIDQLTMADNGTYECSVSLMSDLEGNTKSRVRLLVLVPPSKPECGIEGETIIGNNIQLTCQSKEGSPTPQYSWKRYNILNQEQPLAQPASGQPVSLKNISTDTSGYYICTSSNEEGTQFCNITVAVRSPSMNVALYVGIAVGVVAALIIIGIIIYCCCCRGKDDNTEDKEDARPNREAYEEPPEQLRELSREREEEDDYRQEEQRSTGRESPDHLDQ 2 235 257 TVAVRSPSMN VALYVGIAVGVVAALIIIGIIIY CCCCRGKDDN +Q14956 MECLYYFLGFLLLAARLPLDAAKRFHDVLGNERPSAYMREHNQLNGWSSDENDWNEKLYPVWKRGDMRWKNSWKGGRVQAVLTSDSPALVGSNITFAVNLIFPRCQKEDANGNIVYEKNCRNEAGLSADPYVYNWTAWSEDSDGENGTGQSHHNVFPDGKPFPHHPGWRRWNFIYVFHTLGQYFQKLGRCSVRVSVNTANVTLGPQLMEVTVYRRHGRAYVPIAQVKDVYVVTDQIPVFVTMFQKNDRNSSDETFLKDLPIMFDVLIHDPSHFLNYSTINYKWSFGDNTGLFVSTNHTVNHTYVLNGTFSLNLTVKAAAPGPCPPPPPPPRPSKPTPSLATTLKSYDSNTPGPAGDNPLELSRIPDENCQINRYGHFQATITIVEGILEVNIIQMTDVLMPVPWPESSLIDFVVTCQGSIPTEVCTIISDPTCEITQNTVCSPVDVDEMCLLTVRRTFNGSGTYCVNLTLGDDTSLALTSTLISVPDRDPASPLRMANSALISVGCLAIFVTVISLLVYKKHKEYNPIENSPGNVVRSKGLSVFLNRAKAVFFPGNQEKDPLLKNQEFKGVS 2 497 519 DRDPASPLRM ANSALISVGCLAIFVTVISLLVY KKHKEYNPIE +P40197 MLRGTLLCAVLGLLRAQPFPCPPACKCVFRDAAQCSGGDVARISALGLPTNLTHILLFGMGRGVLQSQSFSGMTVLQRLMISDSHISAVAPGTFSDLIKLKTLRLSRNKITHLPGALLDKMVLLEQLFLDHNALRGIDQNMFQKLVNLQELALNQNQLDFLPASLFTNLENLKLLDLSGNNLTHLPKGLLGAQAKLERLLLHSNRLVSLDSGLLNSLGALTELQFHRNHIRSIAPGAFDRLPNLSSLTLSRNHLAFLPSALFLHSHNLTLLTLFENPLAELPGVLFGEMGGLQELWLNRTQLRTLPAAAFRNLSRLRYLGVTLSPRLSALPQGAFQGLGELQVLALHSNGLTALPDGLLRGLGKLRQVSLRRNRLRALPRALFRNLSSLESVQLDHNQLETLPGDVFGALPRLTEVLLGHNSWRCDCGLGPFLGWLRQHLGLVGGEEPPRCAGPGAHAGLPLWALPGGDAECPGPRGPPPRPAADSSSEAPVHPALAPNSSEPWVWAQPVTTGKGQDHSPFWGFYFLLLAVQAMITVIIVFAMIKIGQLFRKLIRERALG 2 522 544 TGKGQDHSPF WGFYFLLLAVQAMITVIIVFAMI KIGQLFRKLI +P25092 MKTLLLDLALWSLLFQPGWLSFSSQVSQNCHNGSYEISVLMMGNSAFAEPLKNLEDAVNEGLEIVRGRLQNAGLNVTVNATFMYSDGLIHNSGDCRSSTCEGLDLLRKISNAQRMGCVLIGPSCTYSTFQMYLDTELSYPMISAGSFGLSCDYKETLTRLMSPARKLMYFLVNFWKTNDLPFKTYSWSTSYVYKNGTETEDCFWYLNALEASVSYFSHELGFKVVLRQDKEFQDILMDHNRKSNVIIMCGGPEFLYKLKGDRAVAEDIVIILVDLFNDQYFEDNVTAPDYMKNVLVLTLSPGNSLLNSSFSRNLSPTKRDFALAYLNGILLFGHMLKIFLENGENITTPKFAHAFRNLTFEGYDGPVTLDDWGDVDSTMVLLYTSVDTKKYKVLLTYDTHVNKTYPVDMSPTFTWKNSKLPNDITGRGPQILMIAVFTLTGAVVLLLLVALLMLRKYRKDYELRQKKWSHIPPENIFPLETNETNHVSLKIDDDKRRDTIQRLRQCKYDKKRVILKDLKHNDGNFTEKQKIELNKLLQIDYYNLTKFYGTVKLDTMIFGVIEYCERGSLREVLNDTISYPDGTFMDWEFKISVLYDIAKGMSYLHSSKTEVHGRLKSTNCVVDSRMVVKITDFGCNSILPPKKDLWTAPEHLRQANISQKGDVYSYGIIAQEIILRKETFYTLSCRDRNEKIFRVENSNGMKPFRPDLFLETAEEKELEVYLLVKNCWEEDPEKRPDFKKIETTLAKIFGLFHDQKNESYMDTLIRRLQLYSRNLEHLVEERTQLYKAERDRADRLNFMLLPRLVVKSLKEKGFVEPELYEEVTIYFSDIVGFTTICKYSTPMEVVDMLNDIYKSFDHIVDHHDVYKVETIGDAYMVASGLPKRNGNRHAIDIAKMALEILSFMGTFELEHLPGLPIWIRIGVHSGPCAAGVVGIKMPRYCLFGDTVNTASRMESTGLPLRIHVSGSTIAILKRTECQFLYEVRGETYLKGRGNETTYWLTGMKDQKFNLPTPPTVENQQRLQAEFSDMIANSLQKRQAAGIRSQKPRRVASYKKGTLEYLQLNTTDKESTYF 2 432 454 NDITGRGPQI LMIAVFTLTGAVVLLLLVALLML RKYRKDYELR +Q02846 MTACARRAGGLPDPGLCGPAWWAPSLPRLPRALPRLPLLLLLLLLQPPALSAVFTVGVLGPWACDPIFSRARPDLAARLAAARLNRDPGLAGGPRFEVALLPEPCRTPGSLGAVSSALARVSGLVGPVNPAACRPAELLAEEAGIALVPWGCPWTQAEGTTAPAVTPAADALYALLRAFGWARVALVTAPQDLWVEAGRSLSTALRARGLPVASVTSMEPLDLSGAREALRKVRDGPRVTAVIMVMHSVLLGGEEQRYLLEAAEELGLTDGSLVFLPFDTIHYALSPGPEALAALANSSQLRRAHDAVLTLTRHCPSEGSVLDSLRRAQERRELPSDLNLQQVSPLFGTIYDAVFLLARGVAEARAAAGGRWVSGAAVARHIRDAQVPGFCGDLGGDEEPPFVLLDTDAAGDRLFATYMLDPARGSFLSAGTRMHFPRGGSAPGPDPSCWFDPNNICGGGLEPGLVFLGFLLVVGMGLAGAFLAHYVRHRLLHMQMVSGPNKIILTVDDITFLHPHGGTSRKVAQGSRSSLGARSMSDIRSGPSQHLDSPNIGVYEGDRVWLKKFPGDQHIAIRPATKTAFSKLQELRHENVALYLGLFLARGAEGPAALWEGNLAVVSEHCTRGSLQDLLAQREIKLDWMFKSSLLLDLIKGIRYLHHRGVAHGRLKSRNCIVDGRFVLKITDHGHGRLLEAQKVLPEPPRAEDQLWTAPELLRDPALERRGTLAGDVFSLAIIMQEVVCRSAPYAMLELTPEEVVQRVRSPPPLCRPLVSMDQAPVECILLMKQCWAEQPELRPSMDHTFDLFKNINKGRKTNIIDSMLRMLEQYSSNLEDLIRERTEELELEKQKTDRLLTQMLPPSVAEALKTGTPVEPEYFEQVTLYFSDIVGFTTISAMSEPIEVVDLLNDLYTLFDAIIGSHDVYKVETIGDAYMVASGLPQRNGQRHAAEIANMSLDILSAVGTFRMRHMPEVPVRIRIGLHSGPCVAGVVGLTMPRYCLFGDTVNTASRMESTGLPYRIHVNLSTVGILRALDSGYQVELRGRTELKGKGAEDTFWLVGRRGFNKPIPKPPDLQPGSSNHGISLQEIPPERRRKLEKARPGQFS 2 465 487 NICGGGLEPG LVFLGFLLVVGMGLAGAFLAHYV RHRLLHMQMV +A0A0U1RPR8 MAGLQQGCHFEGQNWTAPHWKTCLPCQGPWRLTVSHLKTVSSISVLSVVFWSVLLWADSLSLLAWARETFTLGVLGPWDCDPIFAQALPSIATQLAVDQVNQDASLLPGSQLDFKVLPTGCDTPHALATFVAHKNIVAAFVGPVNPGFCSAAALLAQGWGKSLFSWACEAPEGGGDLVPTLPSAADVLLSVMRHFGWARWAIVSSHQDIWVTTAQQLATAFRTHGLPIGLVTSLGPGEKGATEVCKQLHSVHGLKIVVLCMHSALLGGLEQTTLLHCAWEEGLTDGRLVFLPYDTLLFALPYGNRSYLVLDDHGPLQEAYDAVLTVSLESSPESHAFTATEMSGGATANLEPEQVSPLFGTIYDAVILLAHALNRSETHGAGLSGAHLGDHVRALDVAGFSQRIRTDGKGRRLAQYVILDTDGEGSQLVPTHILDTSTWQVQPLGKPIHFPGGSPPAHDASCWFDPNTLCIRGVQPLGSLLTLTIACVLALVGGFLAYFIRLGLQQLRLLRGPHRILLTSQELTFLQRTPSRRRPHVDSGSESRSVVDGGSPRSVTQGSARSLPAFLEHTNVALYQGEWVWLKKFEAGVAPDLRPSSLSFLRKLREMRHENVTAFLGLFVGPGVSAMVLEHCARGSLEDLLQNENLRLDWTFKASLLLDLIRGLRYLHHRRFPHGRLKSRNCVVDTRFVLKITDHGYAEFLESHCSSRPQPAPEELLWTAPELLRGPGKATFKGDVFSLAIILQEVLTRDPPYCSWGLSAEEIIRKVASPPPLCRPLVSPDQGPLECIQLMQLCWEEAPDDRPSLDQIYTQFKSINQGKKTSVVDSMLRMLEKYSESLEDLVQERTEELELERRKTERLLSQMLPPSVAHALKMGTTVEPEYFDQVTIYFSDIVGFTTISALSEPIEVVGFLNDLYTLFDAVLDSHDVYKVETIGDAYMVASGLPRRNGNRHAAEIANLALDILSYAGNFRMRHAPDVPIRVRAGLHSGPCVAGVVGLTMPRYCLFGDTVNTASRMESTGLPYRIHVSQSTVQALLSLDEGYKIDVRGQTELKGKGLEETYWLTGKVGFCRPLPTPLSIKPGDPWQDRINQEIRTGFAKARQGLAEPRKSGEAGPGP 2 480 502 CIRGVQPLGS LLTLTIACVLALVGGFLAYFIRL GLQQLRLLRG +P51841 MFLGLGRFSRLVLWFAAFRKLLGHHGLASAKFLWCLCLLSVMSLPQQVWTLPYKIGVVGPWACDSLFSKALPEVAARLAIERINRDPSFDLSYSFEYVILNEDCQTSRALSSFISHHQMASGFIGPTNPGYCEAASLLGNSWDKGIFSWACVNYELDNKISYPTFSRTLPSPIRVLVTVMKYFQWAHAGVISSDEDIWVHTANRVASALRSHGLPVGVVLTTGQDSQSMRKALQRIHQADRIRIIIMCMHSALIGGETQMHLLECAHDLKMTDGTYVFVPYDALLYSLPYKHTPYRVLRNNPKLREAYDAVLTITVESQEKTFYQAFTEAAARGEIPEKLEFDQVSPLFGTIYNSIYFIAQAMNNAMKENGQAGAASLVQHSRNMQFHGFNQLMRTDSNGNGISEYVILDTNLKEWELHSTYTVDMEMELLRFGGTPIHFPGGRPPRADAKCWFAEGKICHGGIDPAFAMMVCLTLLIALLSINGFAYFIRRRINKIQLIKGPNRILLTLEDVTFINPHFGSKRGSRASVSFQITSEVQSGRSPRLSFSSGSLTPATYENSNIAIYEGDWVWLKKFSLGDFGDLKSIKSRASDVFEMMKDLRHENINPLLGFFYDSGMFAIVTEFCSRGSLEDILTNQDVKLDWMFKSSLLLDLIKGMKYLHHREFVHGRLKSRNCVVDGRFVLKVTDYGFNDILEMLRLSEEESSMEELLWTAPELLRAPRGSRLGSFAGDVYSFAIIMQEVMVRGTPFCMMDLPAQEIINRLKKPPPVYRPVVPPEHAPPECLQLMKQCWAEAAEQRPTFDEIFNQFKTFNKGKKTNIIDSMLRMLEQYSSNLEDLIRERTEELEIEKQKTEKLLTQMLPPSVAESLKKGCTVEPEGFDLVTLYFSDIVGFTTISAMSEPIEVVDLLNDLYTLFDAIIGSHDVYKVETIGDAYMVASGLPKRNGSRHAAEIANMSLDILSSVGTFKMRHMPEVPVRIRIGLHSGPVVAGVVGLTMPRYCLFGDTVNTASRMESTGLPYRIHVSLSTVTILQNLSEGYEVELRGRTELKGKGTEETFWLIGKKGFMKPLPVPPPVDKDGQVGHGLQPVEIAAFQRRKAERQLVRNKP 2 468 490 KICHGGIDPA FAMMVCLTLLIALLSINGFAYFI RRRINKIQLI +B1B212 MAKSSLSLNWSLLVLLNFLGATLSTGTDSLSCELTFNHRTLHGQCSVNGKTLLDFGDKKHEGNATEMCADLSQSLRELSEGMRNQQSGNDALNVTTQSQYNQGEFIGGFWAINTDEQHSIYFYPLNMTWRESHSDNSSAMEHWKNKNLEKDIRNVLIIYFSRCLNKLSPHFREMPKSKIKVLDTTQNTNTTQIHPTVNNSQHNSDTQGLSFTWIVIICIGGIVSFMAFMVFAWCMLKKKKGALCCSSSSTT 2 211 233 QHNSDTQGLS FTWIVIICIGGIVSFMAFMVFAW CMLKKKKGAL +Q8TDQ0 MFSHLPFDCVLLLLLLLLTRSSEVEYRAEVGQNAYLPCFYTPAAPGNLVPVCWGKGACPVFECGNVVLRTDERDVNYWTSRYWLNGDFRKGDVSLTIENVTLADSGIYCCRIQIPGIMNDEKFNLKLVIKPAKVTPAPTRQRDFTAAFPRMLTTRGHGPAETQTLGSLPDINLTQISTLANELRDSRLANDLRDSGATIRIGIYIGAGICAGLALALIFGALIFKWYSHSKEKIQNLSLISLANLPPSGLANAVAEGIRSEENIYTIEENVYEVEEPNEYYCYVSSRQQPSQPLGCRFAMP 2 202 224 LRDSGATIRI GIYIGAGICAGLALALIFGALIF KWYSHSKEKI +Q99075 MKLLPSVVLKLFLAAVLSALVTGESLERLRRGLAAGTSNPDPPTVSTDQLLPLGGGRDRKVRDLQEADLDLLRVTLSSKPQALATPNKEEHGKRKKKGKGLGKKRDPCLRKYKDFCIHGECKYVKELRAPSCICHPGYHGERCHGLSLPVENRLYTYDHTTILAVVAVVLSSVCLLVIVGLLMFRYHRRGGYDVENEEKVKLGMTNSH 2 162 184 NRLYTYDHTT ILAVVAVVLSSVCLLVIVGLLMF RYHRRGGYDV +Q9QUJ0 MDPPGYLLFLLLLPVAASQTSAGSCSGCGTLSLPLLAGLVAADAVMSLLIVGVVFVCMRPHGRPAQEDGRVYINMPGRG 2 35 57 CSGCGTLSLP LLAGLVAADAVMSLLIVGVVFVC MRPHGRPAQE +A8MVW5 MGQDAFMEPFGDTLGVFQCKIYLLLFGACSGLKVTVPSHTVHGVRGQALYLPVHYGFHTPASDIQIIWLFERPHTMPKYLLGSVNKSVVPDLEYQHKFTMMPPNASLLINPLQFPDEGNYIVKVNIQGNGTLSASQKIQVTVDDPVTKPVVQIHPPSGAVEYVGNMTLTCHVEGGTRLAYQWLKNGRPVHTSSTYSFSPQNNTLHIAPVTKEDIGNYSCLVRNPVSEMESDIIMPIIYYGPYGLQVNSDKGLKVGEVFTVDLGEAILFDCSADSHPPNTYSWIRRTDNTTYIIKHGPRLEVASEKVAQKTMDYVCCAYNNITGRQDETHFTVIITSVGLEKLAQKGKSLSPLASITGISLFLIISMCLLFLWKKYQPYKVIKQKLEGRPETEYRKAQTFSGHEDALDDFGIYEFVAFPDVSGVSRIPSRSVPASDCVSGQDLHSTVYEVIQHIPAQQQDHPE 2 350 372 EKLAQKGKSL SPLASITGISLFLIISMCLLFLW KKYQPYKVIK +Q14CZ8 MKRERGALSRASRALRLAPFVYLLLIQTDPLEGVNITSPVRLIHGTVGKSALLSVQYSSTSSDRPVVKWQLKRDKPVTVVQSIGTEVIGTLRPDYRDRIRLFENGSLLLSDLQLADEGTYEVEISITDDTFTGEKTINLTVDVPISRPQVLVASTTVLELSEAFTLNCSHENGTKPSYTWLKDGKPLLNDSRMLLSPDQKVLTITRVLMEDDDLYSCMVENPISQGRSLPVKITVYRRSSLYIILSTGGIFLLVTLVTVCACWKPSKRKQKKLEKQNSLEYMDQNDDRLKPEADTLPRSGEQERKNPMALYILKDKDSPETEENPAPEPRSATEPGPPGYSVSPAVPGRSPGLPIRSARRYPRSPARSPATGRTHSSPPRAPSSPGRSRSASRTLRTAGVHIIREQDEAGPVEISA 2 241 263 VKITVYRRSS LYIILSTGGIFLLVTLVTVCACW KPSKRKQKKL +E9Q7X6 MATPRAPRWPPPSLLLLLLLPLLLLPPAAPGARGSLPSPAHRTLLPVAGPLSPPGAGHTAPGPGVATRRGRSGRVPRGVSAAAARNRWLESNNPEPHIGCSPSYQSQEDHSGSRKGVTAQNARMSHSSSEGPENPPLLPETSAEWSNMASSHRADIAGLRRGPSPEITTAPTAHSSLLSLESLPESPSSSRSQRRITPSQTESGTSLGFLERTRELPEEGTVHTQVAGTWVSRQASHPALEPGEPTVLSQKRNSSGQEHSGPPFSWSQSHPPPSDHPSSSGSIKNGNNFTALQNPSVTQTKSMLITDTYTNGVPRTLRSLPVGVDPADETEGFPEHSRLGITSMSVRSSPSVKDSRTNSGLTEHLGDGEGTELSTENGYGLPSIHWQSDAPSFGGRQLASSSEAGDGRAMPLTEAVFRSDPSIGGGESTGRWILTKKKTSTDAAESSALHPEAGGAGGLTQSSHAAQQPRGGGEDSGMGGRSYAESSSSSSSTSSSESLDSSAPLREHSLTGLSYTREHGSDAGQRTSSDHTDHGYVPSTFTKGERTLLSITDNTSYSEASESSTSSVKISDSPSQAQPKQSSMSSDDDEPAQSSTESPVLHTSNLPTYTSTVNMPNTLVLDTGTKPVEDPSDSRVPSTQPSPSQPQPFSSALPSTRSPGSTSETTTSSPSPSPISLLVSTLAPYSVSQTTFPHPSSTLVPHRPREPRVTSVQMSTAISAIALIPSNQTANPKNQSTPQQEKPITEAKSPSLVSPPTDSTKAVTVSLPPGAPWSPALTGFSTGPALPATSTSLAQMSPALTSAMPQTTHSPVTSPSTLSHVEALTSGAVVVHTTPKKPHLPTNPEILVPHISTEGAITTEGNREHTDPTTQPIPLTTSTTSAGERTTELGRAEESSPSHFLTPSSPQTTDVSTAEMLTSRYITFAAQSTSQSPTALPPLTPVNSCTVNPCLHDGKCIVDLTGRGYRCVCPPAWQGENCSVDVNECLSSPCPPLATCNNTQGSFTCRCPVGYQLEKGICNLVRTFVTEFKLKKTFLNTTAENHSNTQELENEIAQTLNVCFSTLPGYIRTTAHVSREPSTVFISLKTTFALASNVTLFDLADRIQKYVNSCRSSAEVCQLLGSQRRVFRAGSLCKRKSPECDKETSICTDLDGVALCQCKSGYFQFNKMDHSCRACEDGYRLENETCMSCPFGLGGLNCGNPYQLITVVIAAAGGGLLLILGVALIVTCCRKSKNDISKLIFKSGDFQMSPYTDVPKNPRSQEWGREAIEMHENGSTKNLLQMTDVYYSPTNVRNPELERNGLYPAYTGLPGSRHSCIFPGQYNPSFISDESRRRDYF 2 1204 1226 GGLNCGNPYQ LITVVIAAAGGGLLLILGVALIV TCCRKSKNDI +Q9BQS7 MESGHLLWALLFMQSLWPQLTDGATRVYYLGIRDVQWNYAPKGRNVITNQPLDSDIVASSFLKSDKNRIGGTYKKTIYKEYKDDSYTDEVAQPAWLGFLGPVLQAEVGDVILIHLKNFATRPYTIHPHGVFYEKDSEGSLYPDGSSGPLKADDSVPPGGSHIYNWTIPEGHAPTDADPACLTWIYHSHVDAPRDIATGLIGPLITCKRGALDGNSPPQRQDVDHDFFLLFSVVDENLSWHLNENIATYCSDPASVDKEDETFQESNRMHAINGFVFGNLPELNMCAQKRVAWHLFGMGNEIDVHTAFFHGQMLTTRGHHTDVANIFPATFVTAEMVPWEPGTWLISCQVNSHFRDGMQALYKVKSCSMAPPVDLLTGKVRQYFIEAHEIQWDYGPMGHDGSTGKNLREPGSISDKFFQKSSSRIGGTYWKVRYEAFQDETFQEKMHLEEDRHLGILGPVIRAEVGDTIQVVFYNRASQPFSMQPHGVFYEKDYEGTVYNDGSSYPGLVAKPFEKVTYRWTVPPHAGPTAQDPACLTWMYFSAADPIRDTNSGLVGPLLVCRAGALGADGKQKGVDKEFFLLFTVLDENKSWYSNANQAAAMLDFRLLSEDIEGFQDSNRMHAINGFLFSNLPRLDMCKGDTVAWHLLGLGTETDVHGVMFQGNTVQLQGMRKGAAMLFPHTFVMAIMQPDNLGTFEIYCQAGSHREAGMRAIYNVSQCPGHQATPRQRYQAARIYYIMAEEVEWDYCPDRSWEREWHNQSEKDSYGYIFLSNKDGLLGSRYKKAVFREYTDGTFRIPRPRTGPEEHLGILGPLIKGEVGDILTVVFKNNASRPYSVHAHGVLESTTVWPLAAEPGEVVTYQWNIPERSGPGPNDSACVSWIYYSAVDPIKDMYSGLVGPLAICQKGILEPHGGRSDMDREFALLFLIFDENKSWYLEENVATHGSQDPGSINLQDETFLESNKMHAINGKLYANLRGLTMYQGERVAWYMLAMGQDVDLHTIHFHAESFLYRNGENYRADVVDLFPGTFEVVEMVASNPGTWLMHCHVTDHVHAGMETLFTVFSRTEHLSPLTVITKETEKAVPPRDIEEGNVKMLGMQIPIKNVEMLASVLVAISVTLLLVVLALGGVVWYQHRQRKLRRNRRSILDDSFKLLSFKQ 2 1110 1132 IPIKNVEMLA SVLVAISVTLLLVVLALGGVVWY QHRQRKLRRN +Q9UM44 MKAQTALSFFLILITSLSGSQGIFPLAFFIYVPMNEQIVIGRLDEDIILPSSFERGSEVVIHWKYQDSYKVHSYYKGSDHLESQDPRYANRTSLFYNEIQNGNASLFFRRVSLLDEGIYTCYVGTAIQVITNKVVLKVGVFLTPVMKYEKRNTNSFLICSVLSVYPRPIITWKMDNTPISENNMEETGSLDSFSINSPLNITGSNSSYECTIENSLLKQTWTGRWTMKDGLHKMQSEHVSLSCQPVNDYFSPNQDFKVTWSRMKSGTFSVLAYYLSSSQNTIINESRFSWNKELINQSDFSMNLMDLNLSDSGEYLCNISSDEYTLLTIHTVHVEPSQETASHNKGLWILVPSAILAAFLLIWSVKCCRAQLEARRSRHPADGAQQERCCVPPGERCPSAPDNGEENVPLSGKV 2 346 365 PSQETASHNK GLWILVPSAILAAFLLIWSV KCCRAQLEAR +Q75VT8 MPWTILLFASGSLAIPAPSISLVPPYPSSHEDPIYISCTAPGDILGANFTLFRGGEVVQLLQAPSDRPDVTFNVTGGGSGGGGEAAGGNFCCQYGVMGEHSQPQLSDFSQQVQVSFPVPTWILALSLSLAGAVLFSGLVAITVLVRKAKAKNLQKQRERESCWAQINFTNTDMSFDNSLFAISTKMTQEDSVATLDSGPRKRPTSASSSPEPPEFSTFRACQ 2 123 145 QVSFPVPTWI LALSLSLAGAVLFSGLVAITVLV RKAKAKNLQK +Q8HWB0 MMLLLPLLAVFLVKRSHTRTHSLRYFRLAVSDPGPVVPEFISVGYVDSHPITTYDSVTRQKEPKAPWMAENLAPDHWERYTQLLRGWQQTFKAELRHLQRHYNHSGLHTYQRMIGCELLEDGSTTGFLQYAYDGQDFIIFNKDTLSWLAMDYVAHITKQAWEANLHELQYQKNWLEEECIAWLKRFLEYGRDTLERTEHPVVRTTRKETFPGITTFFCRAHGFYPPEISMTWMKNGEEIAQEVDYGGVLPSGDGTYQTWLSVNLDPQSNDVYSCHVEHCGRQMVLEAPRESGDILRVSTISGTTILIIALAGVGVLIWRRSQELKEVMYQPTQVNEGSSPS 2 297 319 APRESGDILR VSTISGTTILIIALAGVGVLIWR RSQELKEVMY +Q6MZM0 MPRKQPAGCIFLLTFLGLSGLVGTVTRTYYIGIVEEYWNYVPQGKNVITGKSFTEDKLATLFLERGPNRIGSIYKKAVYRRFTDGTYSIEIPKPPWLGFLGPILRAEVGDVIVIHLKNFASRPYSLHPHGVFYNKDSEGALYPDGTSGRNKNDDMVPPGKNYTYVWPVREEYAPTPADANCLTWVYHSHIDAPKDICSGLIGPLLVCKEGILNRYSGTRNDVDREFVIMFTLVDENQSWYLNENIKHFCTNPDSVDKKDAVFQRSNKMHALNGYLFGNFPEPDMCVGESVSWHLFGMGNEIDIHSIYFYGNTFISRGHRTDVVNLFPATFLTTEMIAENPGKWMITCQVSDHLQAGMLGQYNVDNCKSDIFYPKMKGQQRRYFIAAEKILWDYAPQGYNKFSGLPLNASGSDSDLYFTQGDNRIGGKYWKVRYTEFVDATFTKRKRLSAEEAHLGILGPVIKAEVGDTLLVTFANKADKVYSILPHGVIYDKASDAAPNLDGFVKPGAHVKPGETFTYKWTVPESVSPTAGDPPCLTYLYFSAVDPIKDTSSGLVGPLLVCKKGVLNADGTQKGIDKEFYLLFTVFDENLSRYFDENIQKFIWHPFSIDKEDKEFVKSNRMHAVNGYMYGNQPGLNMCKRDRVSWHLIGLGTDTDMHGIVFQGNTIHLRGTHRDSLALFPHMATTAFMQPDHAGIFRVFCATMPHLSRGMGQIYEVSSCDNRDPSEQRYGMIRTFYIAAEEVEWDYAPNKNWEFEKQHVDARGERHGDIFMNRTENWIGSQYKKVVYREYTDGEFVEIKARPPREEHLELLGPMIHAEVGNTVLIIFKNKASRPYSISAQGVEEMDSGKQFQVPMTKPGEVKTYRWNIPKRSGPGPSDPNCIPWVYYSTVNFVKDTYSGLMGPLITCRKGVLNEKGRRSDVDYEFALLFLVFNENESWYLDDNIKKYLNKDPRDFKRTDDFEESNRMHAINGKIFGNLHGLIMNEDTMTNWYLLGIGSEVDIHTIHYHAESFLFKIDKSYREDVYDLFPGTFQTIELFADHPGTWLLHCHVSDHIHAGMETTYTVLRNIDNRIPYSTTSPGVASHPATVPSNERPGKEQLYFFGKNLGPTGAKAALVILFIIGLLLLITTVILSLRLCSAMKQTDYQQVQSCALPTDAL 2 1115 1137 KNLGPTGAKA ALVILFIIGLLLLITTVILSLRL CSAMKQTDYQ +Q08334 MAWSLGSWLGGCLLVSALGMVPPPENVRMNSVNFKNILQWESPAFAKGNLTFTAQYLSYRIFQDKCMNTTLTECDFSSLSKYGDHTLRVRAEFADEHSDWVNITFCPVDDTIIGPPGMQVEVLADSLHMRFLAPKIENEYETWTMKNVYNSWTYNVQYWKNGTDEKFQITPQYDFEVLRNLEPWTTYCVQVRGFLPDRNKAGEWSEPVCEQTTHDETVPSWMVAVILMASVFMVCLALLGCFALLWCVYKKTKYAFSPRNSLPQHLKEFLGHPHHNTLLFFSFPLSDENDVFDKLSVIAEDSESGKQNPGDSCSLGTPPGQGPQS 2 224 246 HDETVPSWMV AVILMASVFMVCLALLGCFALLW CVYKKTKYAF +Q64385 MSSSCSGLTRVLVAVATALVSSSSPCPQAWGPPGVQYGQPGRPVMLCCPGVSAGTPVSWFRDGDSRLLQGPDSGLGHRLVLAQVDSPDEGTYVCQTLDGVSGGMVTLKLGFPPARPEVSCQAVDYENFSCTWSPGQVSGLPTRYLTSYRKKTLPGAESQRESPSTGPWPCPQDPLEASRCVVHGAEFWSEYRINVTEVNPLGASTCLLDVRLQSILRPDPPQGLRVESVPGYPRRLHASWTYPASWRRQPHFLLKFRLQYRPAQHPAWSTVEPIGLEEVITDAVAGLPHAVRVSARDFLDAGTWSAWSPEAWGTPSTGPLQDEIPDWSQGHGQQLEAVVAQEDSPAPARPSLQPDPRPLDHRDPLEQVAVLASLGIFSCLGLAVGALALGLWLRLRRSGKDGPQKPGLLAPMIPVEKLPGIPNLQRTPENFS 2 369 391 LDHRDPLEQV AVLASLGIFSCLGLAVGALALGL WLRLRRSGKD +P42701 MEPLVTWVVPLLFLFLLSRQGAACRTSECCFQDPPYPDADSGSASGPRDLRCYRISSDRYECSWQYEGPTAGVSHFLRCCLSSGRCCYFAAGSATRLQFSDQAGVSVLYTVTLWVESWARNQTEKSPEVTLQLYNSVKYEPPLGDIKVSKLAGQLRMEWETPDNQVGAEVQFRHRTPSSPWKLGDCGPQDDDTESCLCPLEMNVAQEFQLRRRQLGSQGSSWSKWSSPVCVPPENPPQPQVRFSVEQLGQDGRRRLTLKEQPTQLELPEGCQGLAPGTEVTYRLQLHMLSCPCKAKATRTLHLGKMPYLSGAAYNVAVISSNQFGPGLNQTWHIPADTHTEPVALNISVGTNGTTMYWPARAQSMTYCIEWQPVGQDGGLATCSLTAPQDPDPAGMATYSWSRESGAMGQEKCYYITIFASAHPEKLTLWSTVLSTYHFGGNASAAGTPHHVSVKNHSLDSVSVDWAPSLLSTCPGVLKEYVVRCRDEDSKQVSEHPVQPTETQVTLSGLRAGVAYTVQVRADTAWLRGVWSQPQRFSIEVQVSDWLIFFASLGSFLSILLVGVLGYLGLNRAARHLCPPLPTPCASSAIEFPGGKETWQWINPVDFQEEASLQEALVVEMSWDKGERTEPLEKTELPEGAPELALDTELSLEDGDRCKAKM 2 546 568 RFSIEVQVSD WLIFFASLGSFLSILLVGVLGYL GLNRAARHLC +Q99665 MAHTFRGCSLAFMFIITWLLIKAKIDACKRGDVTVKPSHVILLGSTVNITCSLKPRQGCFHYSRRNKLILYKFDRRINFHHGHSLNSQVTGLPLGTTLFVCKLACINSDEIQICGAEIFVGVAPEQPQNLSCIQKGEQGTVACTWERGRDTHLYTEYTLQLSGPKNLTWQKQCKDIYCDYLDFGINLTPESPESNFTAKVTAVNSLGSSSSLPSTFTFLDIVRPLPPWDIRIKFQKASVSRCTLYWRDEGLVLLNRLRYRPSNSRLWNMVNVTKAKGRHDLLDLKPFTEYEFQISSKLHLYKGSWSDWSESLRAQTPEEEPTGMLDVWYMKRHIDYSRQQISLFWKNLSVSEARGKILHYQVTLQELTGGKAMTQNITGHTSWTTVIPRTGNWAVAVSAANSKGSSLPTRINIMNLCEAGLLAPRQVSANSEGMDNILVTWQPPRKDPSAVQEYVVEWRELHPGGDTQVPLNWLRSRPYNVSALISENIKSYICYEIRVYALSGDQGGCSSILGNSKHKAPLSGPHINAITEEKGSILISWNSIPVQEQMGCLLHYRIYWKERDSNSQPQLCEIPYRVSQNSHPINSLQPRVTYVLWMTALTAAGESSHGNEREFCLQGKANWMAFVAPSICIAIIMVGIFSTHYFQQKVFVLLAALRPQWCSREIPDPANSTCAKKYPIAEEKTQLPLDRLLIDWPTPEDPEPLVISEVLHQVTPVFRHPPCSNWPQREKGIQGHQASEKDMMHSASSPPPPRALQAESRQLVDLYKVLESRGSDPKPENPACPWTVLPAGDLPTHDGYLPSNIDDLPSHEAPLADSLEELEPQHISLSVFPSSSLHPLTFSCGDKLTLDQLKMRCDSLML 2 623 645 REFCLQGKAN WMAFVAPSICIAIIMVGIFSTHY FQQKVFVLLA +Q14627 MAFVCLAIGCLYTFLISTTFGCTSSSDTEIKVNPPQDFEIVDPGYLGYLYLQWQPPLSLDHFKECTVEYELKYRNIGSETWKTIITKNLHYKDGFDLNKGIEAKIHTLLPWQCTNGSEVQSSWAETTYWISPQGIPETKVQDMDCVYYNWQYLLCSWKPGIGVLLDTNYNLFYWYEGLDHALQCVDYIKADGQNIGCRFPYLEASDYKDFYICVNGSSENKPIRSSYFTFQLQNIVKPLPPVYLTFTRESSCEIKLKWSIPLGPIPARCFDYEIEIREDDTTLVTATVENETYTLKTTNETRQLCFVVRSKVNIYCSDDGIWSEWSDKQCWEGEDLSKKTLLRFWLPFGFILILVIFVTGLLLRKPNTYPKMIPEFFCDT 2 344 363 EDLSKKTLLR FWLPFGFILILVIFVTGLLL RKPNTYPKMI +O88786 MAFVHIRCLCFILLCTITGYSLEIKVNPPQDFEILDPGLLGYLYLQWKPPVVIEKFKGCTLEYELKYRNVDSDSWKTIITRNLIYKDGFDLNKGIEGKIRTHLSEHCTNGSEVQSPWIEASYGISDEGSLETKIQDMKCIYYNWQYLVCSWKPGKTVYSDTNYTMFFWYEGLDHALQCADYLQHDEKNVGCKLSNLDSSDYKDFFICVNGSSKLEPIRSSYTVFQLQNIVKPLPPEFLHISVENSIDIRMKWSTPGGPIPPRCYTYEIVIREDDISWESATDKNDMKLKRRANESEDLCFFVRCKVNIYCADDGIWSEWSEEECWEGYTGPDSKIIFIVPVCLFFIFLLLLLCLIVEKEEPEPTLSLHVDLNKEVCAYEDTLC 2 335 357 WEGYTGPDSK IIFIVPVCLFFIFLLLLLCLIVE KEEPEPTLSL +Q60819 MASPQLRGYGVQAIPVLLLLLLLLLLPLRVTPGTTCPPPVSIEHADIRVKNYSVNSRERYVCNSGFKRKAGTSTLIECVINKNTNVAHWTTPSLKCIRDPSLAHYSPVPTVVTPKVTSQPESPSPSAKEPEAFSPKSDTAMTTETAIMPGSRLTPSQTTSAGTTGTGSHKSSRAPSLAATMTLEPTASTSLRITEISPHSSKMTKVAISTSVLLVGAGVVMAFLAWYIKSRQPSQPCRVEVETMETVPMTVRASSKEDEDTGA 2 206 228 ISPHSSKMTK VAISTSVLLVGAGVVMAFLAWYI KSRQPSQPCR +Q96F46 MGAARSPPSAVPGPLLGLLLLLLGVLAPGGASLRLLDHRALVCSQPGLNCTVKNSTCLDDSWIHPRNLTPSSPKDLQIQLHFAHTQQGDLFPVAHIEWTLQTDASILYLEGAELSVLQLNTNERLCVRFEFLSKLRHHHRRWRFTFSHFVVDPDQEYEVTVHHLPKPIPDGDPNHQSKNFLVPDCEHARMKVTTPCMSSGSLWDPNITVETLEAHQLRVSFTLWNESTHYQILLTSFPHMENHSCFEHMHHIPAPRPEEFHQRSNVTLTLRNLKGCCRHQVQIQPFFSSCLNDCLRHSATVSCPEMPDTPEPIPDYMPLWVYWFITGISILLVGSVILLIVCMTWRLAGPGSEKYSDDTKYTDGLPAADLIPPPLKPRKVWIIYSADHPLYVDVVLKFAQFLLTACGTEVALDLLEEQAISEAGVMTWVGRQKQEMVESNSKIIVLCSRGTRAKWQALLGRGAPVRLRCDHGKPVGDLFTAAMNMILPDFKRPACFGTYVVCYFSEVSCDGDVPDLFGAAPRYPLMDRFEEVYFRIQDLEMFQPGRMHRVGELSGDNYLRSPGGRQLRAALDRFRDWQVRCPDWFECENLYSADDQDAPSLDEEVFEEPLLPPGTGIVKRAPLVREPGSQACLAIDPLVGEEGGAAVAKLEPHLQPRGQPAPQPLHTLVLAAEEGALVAAVEPGPLADGAAVRLALAGEGEACPLLGSPGAGRNSVLFLPVDPEDSPLGSSTPMASPDLLPEDVREHLEGLMLSLFEQSLSCQAQGGCSRPAMVLTDPHTPYEEEQRQSVQSDQGYISRSSPQPPEGLTEMEEEEEEEQDPGKPALPLSPEDLESLRSLQRQLLFRQLQKNSGWDTMGSESEGPSA 2 319 341 TPEPIPDYMP LWVYWFITGISILLVGSVILLIV CMTWRLAGPG +Q8NAC3 MPVPWFLLSLALGRSPVVLSLERLVGPQDATHCSPVSLEPWGDEERLRVQFLAQQSLSLAPVTAATARTALSGLSGADGRREERGRGKSWVCLSLGGSGNTEPQKKGLSCRLWDSDILCLPGDIVPAPGPVLAPTHLQTELVLRCQKETDCDLCLRVAVHLAVHGHWEEPEDEEKFGGAADSGVEEPRNASLQAQVVLSFQAYPTARCVLLEVQVPAALVQFGQSVGSVVYDCFEAALGSEVRIWSYTQPRYEKELNHTQQLPDCRGLEVWNSIPSCWALPWLNVSADGDNVHLVLNVSEEQHFGLSLYWNQVQGPPKPRWHKNLTGPQIITLNHTDLVPCLCIQVWPLEPDSVRTNICPFREDPRAHQNLWQAARLQLLTLQSWLLDAPCSLPAEAALCWRAPGGDPCQPLVPPLSWENVTVDKVLEFPLLKGHPNLCVQVNSSEKLQLQECLWADSLGPLKDDVLLLETRGPQDNRSLCALEPSGCTSLPSKASTRAARLGEYLLQDLQSGQCLQLWDDDLGALWACPMDKYIHKRWALVWLACLLFAAALSLILLLKKDHAKGWLRLLKQDVRSGAAARGRAALLLYSADDSGFERLVGALASALCQLPLRVAVDLWSRRELSAQGPVAWFHAQRRQTLQEGGVVVLLFSPGAVALCSEWLQDGVSGPGAHGPHDAFRASLSCVLPDFLQGRAPGSYVGACFDRLLHPDAVPALFRTVPVFTLPSQLPDFLGALQQPRAPRSGRLQERAEQVSRALQPALDSYFHPPGTPAPGRGVGPGAGPGAGDGT 2 540 559 PMDKYIHKRW ALVWLACLLFAAALSLILLL KKDHAKGWLR +Q8BH06 MGSPRLAALLLSLPLLLIGLAVSARVACPCLRSWTSHCLLAYRVDKRFAGLQWGWFPLLVRKSKSPPKFEDYWRHRTPASFQRKLLGSPSLSEESHRISIPSSAISHRGQRTKRAQPSAAEGREHLPEAGSQKCGGPEFSFDLLPEVQAVRVTIPAGPKASVRLCYQWALECEDLSSPFDTQKIVSGGHTVDLPYEFLLPCMCIEASYLQEDTVRRKKCPFQSWPEAYGSDFWQSIRFTDYSQHNQMVMALTLRCPLKLEASLCWRQDPLTPCETLPNATAQESEGWYILENVDLHPQLCFKFSFENSSHVECPHQSGSLPSWTVSMDTQAQQLTLHFSSRTYATFSAAWSDPGLGPDTPMPPVYSISQTQGSVPVTLDLIIPFLRQENCILVWRSDVHFAWKHVLCPDVSHRHLGLLILALLALTALVGVVLVLLGRRLLPGSGRTRPVLLLHAADSEAQRRLVGALAELLRTALGGGRDVIVDLWEGTHVARIGPLPWLWAARERVAREQGTVLLLWNCAGPSTACSGDPQAASLRTLLCAAPRPLLLAYFSRLCAKGDIPRPLRALPRYRLLRDLPRLLRALDAQPATLASSWSHLGAKRCLKNRLEQCHLLELEAAKDDYQGSTNSPCGFSCL 2 415 437 VLCPDVSHRH LGLLILALLALTALVGVVLVLLG RRLLPGSGRT +O95256 MLCLGWIFLWLVAGERIKGFNISGCSTKKLLWTYSTRSEEEFVLFCDLPEPQKSHFCHRNRLSPKQVPEHLPFMGSNDLSDVQWYQQPSNGDPLEDIRKSYPHIIQDKCTLHFLTPGVNNSGSYICRPKMIKSPYDVACCVKMILEVKPQTNASCEYSASHKQDLLLGSTGSISCPSLSCQSDAQSPAVTWYKNGKLLSVERSNRIVVDEVYDYHQGTYVCDYTQSDTVSSWTVRAVVQVRTIVGDTKLKPDILDPVEDTLEVELGKPLTISCKARFGFERVFNPVIKWYIKDSDLEWEVSVPEAKSIKSTLKDEIIERNIILEKVTQRDLRRKFVCFVQNSIGNTTQSVQLKEKRGVVLLYILLGTIGTLVAVLAASALLYRHWIEIVLLYRTYQSKDQTLGDKKDFDAFVSYAKWSSFPSEATSSLSEEHLALSLFPDVLENKYGYSLCLLERDVAPGGVYAEDIVSIIKRSRRGIFILSPNYVNGPSIFELQAAVNLALDDQTLKLILIKFCYFQEPESLPHLVKKALRVLPTVTWRGLKSVPPNSRFWAKMRYHMPVKNSQGFTWNQLRITSRIFQWKGLSRTETTGRSSQPKEW 2 360 382 VQLKEKRGVV LLYILLGTIGTLVAVLAASALLY RHWIEIVLLY +Q6PHB0 MHTPGTPAPGHPDPPPLLLLTLLLLLAASGRAVPCVFCGLPKPTNITFLSINMKNVLHWNPPESLHGVEVTYTVQYFIYGQKKWLNASKCGSINRTYCDLSVETSDYEHQFYAKVKAIWEARCSEWAETERFYPFLETQVSPPEVALTTGEKSISIALTAPEKWKRNPQDHTVSMQQIYPNLKYNVSVYNTKSRRTWSQCVTNSTLVLSWLEPNTLYCVHVESLVPGPPRLPMPSQKQCISTLEVQTSAWKAKVIFWYVFLTSVIVFLFSAIGYLVYRYIHVGKEKHPANLVLIYRNEIGTRVFEPTETITLNFITFSMLDDTKISPKDMNLLDKSSDDISVNDPEHNEAWEPHWEEVEGQHLGCSSHLMDAVCGAEQRDGDTSLTQHGWLNSTIPTGETDTEPQYKVLSDFYGEGEIQLSCEPEEAARTEKISEPLVTSANLDPQLEDLHHLGQEHTVSEDGPEEETSITVVDWDPQTGRLCIPSLPIFGRDPENYGHYERDQLLEGGLLSRLYENQAPDKPEKENENCLTRFMEEWGLHVQMES 2 255 277 VQTSAWKAKV IFWYVFLTSVIVFLFSAIGYLVY RYIHVGKEKH +Q6UXL0 MQTFTMVLEEIWTSLFMWFFYALIPCLLTDEVAILPAPQNLSVLSTNMKHLLMWSPVIAPGETVYYSVEYQGEYESLYTSHIWIPSSWCSLTEGPECDVTDDITATVPYNLRVRATLGSQTSAWSILKHPFNRNSTILTRPGMEITKDGFHLVIELEDLGPQFEFLVAYWRREPGAEEHVKMVRSGGIPVHLETMEPGAAYCVKAQTFVKAIGRYSAFSQTECVEVQGEAIPLVLALFAFVGFMLILVVVPLFVWKMGRLLQYSCCPVVVLPDTLKITNSPQKLISCRREEVDACATAVMSPEELLRAWIS 2 233 255 CVEVQGEAIP LVLALFAFVGFMLILVVVPLFVW KMGRLLQYSC +Q8N6P7 MRTLLTILTVGSLAAHAPEDPSDLLQHVKFQSSNFENILTWDSGPEGTPDTVYSIEYKTYGERDWVAKKGCQRITRKSCNLTVETGNLTELYYARVTAVSAGGRSATKMTDRFSSLQHTTLKPPDVTCISKVRSIQMIVHPTPTPIRAGDGHRLTLEDIFHDLFYHLELQVNRTYQMHLGGKQREYEFFGLTPDTEFLGTIMICVPTWAKESAPYMCRVKTLPDRTWTYSFSGAFLFSMGFLVAVLCYLSYRYVTKPPAPPNSLNVQRVLTFQPLRFIQEHVLIPVFDLSGPSSLAQPVQYSQIRVSGPREPAGAPQRHSLSEITYLGQPDISILQPSNVPPPQILSPLSYAPNAAPEVGPPSYAPQVTPEAQFPFYAPQAISKVQPSSYAPQATPDSWPPSYGVCMEGSGKDSPTGTLSSPKHLRPKGQLQKEPPAGSCMLGGLSLQEVTSLAMEESQEAKSLHQPLGICTDRTSDPNVLHSGEEGTPQYLKGQLPLLSSVQIEGHPMSLPLQPPSRPCSPSDQGPSPWGLLESLVCPKDEAKSPAPETSDLEQPTELDSLFRGLALTVQWES 2 227 249 CRVKTLPDRT WTYSFSGAFLFSMGFLVAVLCYL SYRYVTKPPA +Q6UWB1 MRGGRGAPFWLWPLPKLALLPLLWVLFQRTRPQGSAGPLQCYGVGPLGDLNCSWEPLGDLGAPSELHLQSQKYRSNKTQTVAVAAGRSWVAIPREQLTMSDKLLVWGTKAGQPLWPPVFVNLETQMKPNAPRLGPDVDFSEDDPLEATVHWAPPTWPSHKVLICQFHYRRCQEAAWTLLEPELKTIPLTPVEIQDLELATGYKVYGRCRMEKEEDLWGEWSPILSFQTPPSAPKDVWVSGNLCGTPGGEEPLLLWKAPGPCVQVSYKVWFWVGGRELSPEGITCCCSLIPSGAEWARVSAVNATSWEPLTNLSLVCLDSASAPRSVAVSSIAGSTELLVTWQPGPGEPLEHVVDWARDGDPLEKLNWVRLPPGNLSALLPGNFTVGVPYRITVTAVSASGLASASSVWGFREELAPLVGPTLWRLQDAPPGTPAIAWGEVPRHQLRGHLTHYTLCAQSGTSPSVCMNVSGNTQSVTLPDLPWGPCELWVTASTIAGQGPPGPILRLHLPDNTLRWKVLPGILFLWGLFLLGCGLSLATSGRCYHLRHKVLPRWVWEKVPDPANSSSGQPHMEQVPEAQPLGDLPILEVEEMEPPPVMESSQPAQATAPLDSGYEKHFLPTPEELGLLGPPRPQVLA 2 517 539 HLPDNTLRWK VLPGILFLWGLFLLGCGLSLATS GRCYHLRHKV +P05362 MAPSSPRPALPALLVLLGALFPGPGNAQTSVSPSKVILPRGGSVLVTCSTSCDQPKLLGIETPLPKKELLLPGNNRKVYELSNVQEDSQPMCYSNCPDGQSTAKTFLTVYWTPERVELAPLPSWQPVGKNLTLRCQVEGGAPRANLTVVLLRGEKELKREPAVGEPAEVTTTVLVRRDHHGANFSCRTELDLRPQGLELFENTSAPYQLQTFVLPATPPQLVSPRVLEVDTQGTVVCSLDGLFPVSEAQVHLALGDQRLNPTVTYGNDSFSAKASVSVTAEDEGTQRLTCAVILGNQSQETLQTVTIYSFPAPNVILTKPEVSEGTEVTVKCEAHPRAKVTLNGVPAQPLGPRAQLLLKATPEDNGRSFSCSATLEVAGQLIHKNQTRELRVLYGPRLDERDCPGNWTWPENSQQTPMCQAWGNPLPELKCLKDGTFPLPIGESVTVTRDLEGTYLCRARSTQGEVTRKVTVNVLSPRYEIVIITVVAAAVIMGTAGLSTYLYNRQRKIKKYRLQQAQKGTPMKPNTQATPP 2 481 503 TVNVLSPRYE IVIITVVAAAVIMGTAGLSTYLY NRQRKIKKYR +P35330 MSSFACWSLSLLILFYSPGSGEKAFEVYIWSEKQIVEATESWKINCSTNCAAPDMGGLETPTNKIMLEEHPQGKWKQFLVSNVSKDTVFFCHFTCSGKQHSESLNIRVYQPPAQVTLKLQPPRVFVGEDFTIECTVSPVQPLERLTLSLLRGRETLKNQTFGGAETVPQEATATFNSTALKKDGLNFSCQAELDLRPHGGYIIRSISEYQILEVYEPMQDNQMVIIIVVVSILLFLFVTSVLLCFIFGQHWHRRRTGTYGVLAAWRRLPRAFRARPV 2 224 246 VYEPMQDNQM VIIIVVVSILLFLFVTSVLLCFI FGQHWHRRRT +O75144 MRLGSPGLLFLLFSSLRADTQEKEVRAMVGSDVELSCACPEGSRFDLNDVYVYWQTSESKTVVTYHIPQNSSLENVDSRYRNRALMSPAGMLRGDFSLRLFNVTPQDEQKFHCLVLSQSLGFQEVLSVEVTLHVAANFSVPVVSAPHSPSQDELTFTCTSINGYPRPNVYWINKTDNSLLDQALQNDTVFLNMRGLYDVVSVLRIARTPSVNIGCCIENVLLQQNLTVGSQTGNDIGERDKITENPVSTGEKNAATWSILAVLCLLVVVAVAIGWVCRDRCLQHSYAGAWAVSPETELTGHV 2 254 276 ENPVSTGEKN AATWSILAVLCLLVVVAVAIGWV CRDRCLQHSY +Q9JHJ8 MQLKCPCFVSLGTRQPVWKKLHVSSGFFSGLGLFLLLLSSLCAASAETEVGAMVGSNVVLSCIDPHRRHFNLSGLYVYWQIENPEVSVTYYLPYKSPGINVDSSYKNRGHLSLDSMKQGNFSLYLKNVTPQDTQEFTCRVFMNTATELVKILEEVVRLRVAANFSTPVISTSDSSNPGQERTYTCMSKNGYPEPNLYWINTTDNSLIDTALQNNTVYLNKLGLYDVISTLRLPWTSRGDVLCCVENVALHQNITSISQAESFTGNNTKNPQETHNNELKVLVPVLAVLAAAAFVSFIIYRRTRPHRSYTGPKTVQLELTDHA 2 280 299 PQETHNNELK VLVPVLAVLAAAAFVSFIIY RRTRPHRSYT +Q9Y6W8 MKSGLWYFFLFCLRIKVLTGEINGSANYEMFIFHNGGVQILCKYPDIVQQFKMQLLKGGQILCDLTKTKGSGNTVSIKSLKFCHSQLSNNSVSFFLYNLDHSHANYYFCNLSIFDPPPFKVTLTGGYLHIYESQLCCQLKFWLPIGCAAFVVVCILGCILICWLTKKKYSSSVHDPNGEYMFMRAVNTAKKSRLTDVTL 2 142 164 ESQLCCQLKF WLPIGCAAFVVVCILGCILICWL TKKKYSSSVH +P98153 MVPKADSGAFLLLFLLVLTVTEPLRPELRCNPGQFACRSGTIQCIPLPWQCDGWATCEDESDEANCPEVTGEVRPHHGKEAVDPRQGRARGGDPSHFHAVNVAQPVRFSSFLGKCPTGWHHYEGTASCYRVYLSGENYWDAAQTCQRLNGSLATFSTDQELRFVLAQEWDQPERSFGWKDQRKLWVGYQYVITGRNRSLEGRWEVAFKGSSEVFLPPDPIFASAMSENDNVFCAQLQCFHFPTLRHHDLHSWHAESCYEKSSFLCKRSQTCVDIKDNVVDEGFYFTPKGDDPCLSCTCHGGEPEMCVAALCERPQGCQQYRKDPKECCKFMCLDPDGNSLFDSMASGMRLVVSCISSFLILSLLLFMVHRLRQRRRERIESLIGANLHHFNLGRRIPGFDYGPDGFGTGLTPLHLSDDGEGGTFHFHDPPPPYTAYKYPDIGQPDDPPPPYEASIHPDSVFYDPADDDAFEPVEVSLPAPGDGGSEGALLRRLEQPLPTAGASLADLEDSADSSSALLVPPDPAQSGSTPAAEALPGGGRHSRSSLNTVV 2 346 368 DGNSLFDSMA SGMRLVVSCISSFLILSLLLFMV HRLRQRRRER +Q8IVU1 MAVQRAASPRRPPAPLWPRLLLPLLLLLLPAPSEGLGHSAELAFAVEPSDDVAVPGQPIVLDCRVEGTPPVRITWRKNGVELPESTHSTLLANGSLMIRHFRLEPGGSPSDEGDYECVAQNRFGLVVSRKARIQAATMSDFHVHPQATVGEEGGVARFQCQIHGLPKPLITWEKNRVPIDTDNERYTLLPKGVLQITGLRAEDGGIFHCVASNIASIRISHGARLTVSGSGSGAYKEPAILVGPENLTLTVHQTAVLECVATGNPRPIVSWSRLDGRPIGVEGIQVLGTGNLIISDVTVQHSGVYVCAANRPGTRVRRTAQGRLVVQAPAEFVQHPQSISRPAGTTAMFTCQAQGEPPPHVTWLKNGQVLGPGGHVRLKNNNSTLTISGIGPEDEAIYQCVAENSAGSSQASARLTVLWAEGLPGPPRNVRAVSVSSTEVRVSWSEPLANTKEIIGYVLHIRKAADPPELEYQEAVSKSTFQHLVSDLEPSTAYSFYIKAYTPRGASSASVPTLASTLGEAPAPPPLSVRVLGSSSLQLLWEPWPRLAQHEGGFKLFYRPASKTSFTGPILLPGTVSSYNLSQLDPTAVYEVKLLAYNQHGDGNATVRFVSLRGASERTALSPPCDCRKEEAANQTSTTGIVIGIHIGVTCIIFCVLFLLFGQRGRVLLCKDVENQLSPPQGPRSQRDPGILALNGARRGQRGQLGRDEKRVDMKELEQLFPPASAAGQPDPRPTQDPAAPAPCEETQLSVLPLQGCGLMEGKTTEAKTTEATAPCAGLAAAPPPPDGGPGLLSEGQASRPAAARVTQPAHSEQ 2 639 661 KEEAANQTST TGIVIGIHIGVTCIIFCVLFLLF GQRGRVLLCK +Q9H665 MGPGRCLLTALLLLALAPPPEASQYCGRLEYWNPDNKCCSSCLQRFGPPPCPDYEFRENCGLNDHGDFVTPPFRKCSSGQCNPDGAELCSPCGGGAVTPTPAAGGGRTPWRCRERPVPAKGHCPLTPGNPGAPSSQERSSPASSIAWRTPEPVPQQAWPNFLPLVVLVLLLTLAVIAILLFILLWHLCWPKEKADPYPYPGLVCGVPNTHTPSSSHLSSPGALETGDTWKEASLLPLLSRELSSLASQPLSRLLDELEVLEELIVLLDPEPGPGGGMAHGTTRHLAARYGLPAAWSTFAYSLRPSRSPLRALIEMVVAREPSASLGQLGTHLAQLGRADALRVLSKLGSSGVCWA 2 165 187 QQAWPNFLPL VVLVLLLTLAVIAILLFILLWHL CWPKEKADPY +A8E0Y8 MACILCVASLFLSLTKFSIGQREVKIQEGPLYRAEGYPVSIRCTVSGHQGPSTQDFRWSIYLPSAPTKEVQIISTKDAGFSYAVYAQRVQSKEIYIERLQGDSVLLHISKLQMKDAGEYECHTPNTDGKYFGSYSAKTNLTVVPDTLSATMPSQTLSKKEGEPLELTCETTKATVQHTHLSLTWYLMQEGGGSQATEIVSLSKDFVLTPGSSYADRFVAGDVRLDKLGATSFRLSVGKLQPSDQGQVFCEATEWIQDPDETWTLITRKQTDQTALRIQPAARDFTVSITASSSPDEGKPLELVCLAVGRDGNPQLQGVWFLNGKEIAQTDAGGVLDLKRDYRDRASQGQLQVSKLSAQTFSLKIFSVGPEDVGTYSCEVAEVARTQMGSWQVLQRKQSPGYRVQLREPAARSVTVSAEQRTVWEGETLTLLCKAAGDVSALSVSWWLTPQDQSTPVFVAGMGQDGTVQLGVSSPGPAHRGNRRLEKVDWATFRLEIASAMVTDSGTYECRVSERLQNQAKGLQSTQKISVTVKSLKSSLRVNLMSRQPQVMLAHTFHLSCVVRANYSDLKLPFSVTWQFQPAGSGAFHRLIRIAHNGTVEWGDVLSQIHRKTKVSQSFFRSQLQIYDAAMEETGVYRCTVEVYDRDSICTSGPARVSATSNLLMITVTFPESKLSVNSSSQVQELSISSSTQIECAILSRSAGNLPLSIIWYFSSVSANASYLKILEMDQSSVVKYGDEFQTPRSKQKFYSEKVSQDLFLLNILSVEDSDQGHYHCAVEEWLLSTNDTWQKLERKTSGLTELKLRPTGSQVHVSKVNWTGNATEYGEAGFSCSLDGSGSTASLYSVTWYRGRGTATATAAAVANATATITAPAGSQMLVHLQYDGLLQYGREGSRRLQHCYRSSPTDFVLKLHRVEMEDAGIYWCRVTEWQQHGHPGKWINQASGESQRMVLRVLRSEPTVSSLICSSGPLLHFLIVCPFVMLLLLATSFLCLYRKARKLSQLSLSAKKEKALWVGMRKTSLQKEAGEESGHY 2 971 993 VSSLICSSGP LLHFLIVCPFVMLLLLATSFLCL YRKARKLSQL +O75054 MKCFFPVLSCLAVLGVVSAQRQVTVQEGPLYRTEGSHITIWCNVSGYQGPSEQNFQWSIYLPSSPEREVQIVSTMDSSFPYAIYTQRVRGGKIFIERVQGNSTLLHITDLQARDAGEYECHTPSTDKQYFGSYSAKMNLVVIPDSLQTTAMPQTLHRVEQDPLELTCEVASETIQHSHLSVAWLRQKVGEKPVEVISLSRDFMLHSSSEYAQRQSLGEVRLDKLGRTTFRLTIFHLQPSDQGEFYCEAAEWIQDPDGSWYAMTRKRSEGAVVNVQPTDKEFTVRLETEKRLHTVGEPVEFRCILEAQNVPDRYFAVSWAFNSSLIATMGPNAVPVLNSEFAHREARGQLKVAKESDSVFVLKIYHLRQEDSGKYNCRVTEREKTVTGEFIDKESKRPKNIPIIVLPLKSSISVEVASNASVILEGEDLRFSCSVRTAGRPQGRFSVIWQLVDRQNRRSNIMWLDRDGTVQPGSSYWERSSFGGVQMEQVQPNSFSLGIFNSRKEDEGQYECHVTEWVRAVDGEWQIVGERRASTPISITALEMGFAVTAISRTPGVTYSDSFDLQCIIKPHYPAWVPVSVTWRFQPVGTVEFHDLVTFTRDGGVQWGDRSSSFRTRTAIEKAESSNNVRLSISRASDTEAGKYQCVAELWRKNYNNTWTRLAERTSNLLEIRVLQPVTKLQVSKSKRTLTLVENKPIQLNCSVKSQTSQNSHFAVLWYVHKPSDADGKLILKTTHNSAFEYGTYAEEEGLRARLQFERHVSGGLFSLTVQRAEVSDSGSYYCHVEEWLLSPNYAWYKLAEEVSGRTEVTVKQPDSRLRLSQAQGNLSVLETRQVQLECVVLNRTSITSQLMVEWFVWKPNHPERETVARLSRDATFHYGEQAAKNNLKGRLHLESPSPGVYRLFIQNVAVQDSGTYSCHVEEWLPSPSGMWYKRAEDTAGQTALTVMRPDASLQVDTVVPNATVSEKAAFQLDCSIVSRSSQDSRFAVAWYSLRTKAGGKRSSPGLEEQEEEREEEEEEDDDDDDDPTERTALLSVGPDAVFGPEGSPWEGRLRFQRLSPVLYRLTVLQASPQDTGNYSCHVEEWLPSPQKEWYRLTEEESAPIGIRVLDTSPTLQSIICSNDALFYFVFFYPFPIFGILIITILLVRFKSRNSSKNSDGKNGVPLLWIKEPHLNYSPTCLEPPVLSIHPGAID 2 1125 1147 LQSIICSNDA LFYFVFFYPFPIFGILIITILLV RFKSRNSSKN +Q7TSN7 MEGSWRDVLAVLVILAQLTASGSSYQIIEGPQNVTVLKDSEAHFNCTVTHGWKLLMWTLNQMVVLSLTTQGPIITNNRFTYASYNSTDSFISELIIHDVQPSDSGSVQCSLQNSHGFGSAFLSVQVMGTLNIPSNNLIVTEGEPCNVTCYAVGWTSLPDISWELEVPVSHSSYNSFLESGNFMRVLSVLDLTPLGNGTLTCVAELKDLQASKSLTVNLTVVQPPPDSIGEEGPALPTWAIILLAVAFSLLLILIIVLIIIFCCCCASRREKEESTYQNEIRKSANMRTNKADPETKLKGGKENYGYSSDEAKAAQTASLPPKSAEVSLPEKRSSSLPYQELNKHQPGPATHPRVSFDIASPQKVRNVTLV 2 239 261 GEEGPALPTW AIILLAVAFSLLLILIIVLIIIF CCCCASRREK +O95976 MGTASRSNIARHLQTNLILFCVGAVGACTLSVTQPWYLEVDYTHEAVTIKCTFSATGCPSEQPTCLWFRYGAHQPENLCLDGCKSEADKFTVREALKENQVSLTVNRVTSNDSAIYICGIAFPSVPEARAKQTGGGTTLVVREIKLLSKELRSFLTALVSLLSVYVTGVCVAFILLSKSKSNPLRNKEIKEDSQKKKSARRIFQEIAQELYHKRHVETNQQSEKDNNTYENRRVLSNYERP 2 154 176 IKLLSKELRS FLTALVSLLSVYVTGVCVAFILL SKSKSNPLRN +Q61098 MHHEELILTLCILIVKSASKSCIHRSQIHVVEGEPFYLKPCGISAPVHRNETATMRWFKGSASHEYRELNNRSSPRVTFHDHTLEFWPVEMEDEGTYISQVGNDRRNWTLNVTKRNKHSCFSDKLVTSRDVEVNKSLHITCKNPNYEELIQDTWLYKNCKEISKTPRILKDAEFGDEGYYSCVFSVHHNGTRYNITKTVNITVIEGRSKVTPAILGPKCEKVGVELGKDVELNCSASLNKDDLFYWSIRKEDSSDPNVQEDRKETTTWISEGKLHASKILRFQKITENYLNVLYNCTVANEEAIDTKSFVLVRKEIPDIPGHVFTGGVTVLVLASVAAVCIVILCVIYKVDLVLFYRRIAERDETLTDGKTYDAFVSYLKECHPENKEEYTFAVETLPRVLEKQFGYKLCIFERDVVPGGAVVEEIHSLIEKSRRLIIVLSQSYLTNGARRELESGLHEALVERKIKIILIEFTPASNITFLPPSLKLLKSYRVLKWRADSPSMNSRFWKNLVYLMPAKAVKPWREESEARSVLSAP 2 326 348 IPDIPGHVFT GGVTVLVLASVAAVCIVILCVIY KVDLVLFYRR +Q9NPH3 MTLLWCVVSLYFYGILQSDASERCDDWGLDTMRQIQVFEDEPARIKCPLFEHFLKFNYSTAHSAGLTLIWYWTRQDRDLEEPINFRLPENRISKEKDVLWFRPTLLNDTGNYTCMLRNTTYCSKVAFPLEVVQKDSCFNSPMKLPVHKLYIEYGIQRITCPNVDGYFPSSVKPTITWYMGCYKIQNFNNVIPEGMNLSFLIALISNNGNYTCVVTYPENGRTFHLTRTLTVKVVGSPKNAVPPVIHSPNDHVVYEKEPGEELLIPCTVYFSFLMDSRNEVWWTIDGKKPDDITIDVTINESISHSRTEDETRTQILSIKKVTSEDLKRSYVCHARSAKGEVAKAAKVKQKVPAPRYTVELACGFGATVLLVVILIVVYHVYWLEMVLFYRAHFGTDETILDGKEYDIYVSYARNAEEEEFVLLTLRGVLENEFGYKLCIFDRDSLPGGIVTDETLSFIQKSRRLLVVLSPNYVLQGTQALLELKAGLENMASRGNINVILVQYKAVKETKVKELKRAKTVLTVIKWKGEKSKYPQGRFWKQLQVAMPVKKSPRRSSSDEQGLSYSSLKNV 2 360 382 KVPAPRYTVE LACGFGATVLLVVILIVVYHVYW LEMVLFYRAH +Q9HBE5 MPRGWAAPLLLLLLQGGWGCPDLVCYTDYLQTVICILEMWNLHPSTLTLTWQDQYEELKDEATSCSLHRSAHNATHATYTCHMDVFHFMADDIFSVNITDQSGNYSQECGSFLLAESIKPAPPFNVTVTFSGQYNISWRSDYEDPAFYMLKGKLQYELQYRNRGDPWAVSPRRKLISVDSRSVSLLPLEFRKDSSYELQVRAGPMPGSSYQGTWSEWSDPVIFQTQSEELKEGWNPHLLLLLLLVIVFIPAFWSLKTHPLWRLWKKIWAVPSPERFFMPLYKGCSGDFKKWVGAPFTGSSLELGPWSPEVPSTLEVYSCHPPRSPAKRLQLTELQEPAELVESDGVPKPSFWPTAQNSGGSAYSEERDRPYGLVSIDTVTVLDAEGPCTWPCSCEDDGYPALDLDAGLEPSPGLEDPLLDAGTTVLSCGCVSAGSPGLGGPLGSLLDRLKPPLADGEDWAGGLPWGGRSPGGVSESEAGSPLAGLDMDTFDSGFVGSDCSSPVECDFTSPGDEGPPRSYLRQWVVIPPPLSSPGPQAS 2 233 255 FQTQSEELKE GWNPHLLLLLLLVIVFIPAFWSL KTHPLWRLWK +Q5VWK5 MNQVTIQWDAVIALYILFSWCHGGITNINCSGHIWVEPATIFKMGMNISIYCQAAIKNCQPRKLHFYKNGIKERFQITRINKTTARLWYKNFLEPHASMYCTAECPKHFQETLICGKDISSGYPPDIPDEVTCVIYEYSGNMTCTWNAGKLTYIDTKYVVHVKSLETEEEQQYLTSSYINISTDSLQGGKKYLVWVQAANALGMEESKQLQIHLDDIVIPSAAVISRAETINATVPKTIIYWDSQTTIEKVSCEMRYKATTNQTWNVKEFDTNFTYVQQSEFYLEPNIKYVFQVRCQETGKRYWQPWSSLFFHKTPETVPQVTSKAFQHDTWNSGLTVASISTGHLTSDNRGDIGLLLGMIVFAVMLSILSLIGIFNRSFRTGIKRRILLLIPKWLYEDIPNMKNSNVVKMLQENSELMNNNSSEQVLYVDPMITEIKEIFIPEHKPTDYKKENTGPLETRDYPQNSLFDNTTVVYIPDLNTGYKPQISNFLPEGSHLSNNNEITSLTLKPPVDSLDSGNNPRLQKHPNFAFSVSSVNSLSNTIFLGELSLILNQGECSSPDIQNSVEEETTMLLENDSPSETIPEQTLLPDEFVSCLGIVNEELPSINTYFPQNILESHFNRISLLEK 2 354 376 GHLTSDNRGD IGLLLGMIVFAVMLSILSLIGIF NRSFRTGIKR +P14784 MAAPALSWRLPLLILLLPLATSWASAAVNGTSQFTCFYNSRANISCVWSQDGALQDTSCQVHAWPDRRRWNQTCELLPVSQASWACNLILGAPDSQKLTTVDIVTLRVLCREGVRWRVMAIQDFKPFENLRLMAPISLQVVHVETHRCNISWEISQASHYFERHLEFEARTLSPGHTWEEAPLLTLKQKQEWICLETLTPDTQYEFQVRVKPLQGEFTTWSPWSQPLAFRTKPAALGKDTIPWLGHLLVGLSGAFGFIILVYLLINCRNTGPWLKKVLKCNTPDPSKFFSQLSSEHGGDVQKWLSSPFPSSSFSPGGLAPEISPLEVLERDKVTQLLLQQDKVPEPASLSSNHSLTSCFTNQGYFFFHLPDALEIEACQVYFTYDPYSEEDPDEGVAGAPTGSSPQPLQPLSGEDDAYCTFPSRDDLLLFSPSLLGGPSPPSTAPGGSGAGEERMPPSLQERVPRDWDPQPLGPPTPGVPDLVDFQPPPELVLREAGEEVPDAGPREGVSFPWSRPPGQGEFRALNARLPLNTDAYLSLQELQGQDPTHLV 2 243 265 PAALGKDTIP WLGHLLVGLSGAFGFIILVYLLI NCRNTGPWLK +P31785 MLKPSLPFTSLLFLQLPLLGVGLNTTILTPNGNEDTTADFFLTTMPTDSLSVSTLPLPEVQCFVFNVEYMNCTWNSSSEPQPTNLTLHYWYKNSDNDKVQKCSHYLFSEEITSGCQLQKKEIHLYQTFVVQLQDPREPRRQATQMLKLQNLVIPWAPENLTLHKLSESQLELNWNNRFLNHCLEHLVQYRTDWDHSWTEQSVDYRHKFSLPSVDGQKRYTFRVRSRFNPLCGSAQHWSEWSHPIHWGSNTSKENPFLFALEAVVISVGSMGLIISLLCVYFWLERTMPRIPTLKNLEDLVTEYHGNFSAWSGVSKGLAESLQPDYSERLCLVSEIPPKGGALGEGPGASPCNQHSPYWAPPCYTLKPET 2 262 284 KENPFLFALE AVVISVGSMGLIISLLCVYFWLE RTMPRIPTLK +Q8NI17 MMWTWALWMLPSLCKFSLAALPAKPENISCVYYYRKNLTCTWSPGKETSYTQYTVKRTYAFGEKHDNCTTNSSTSENRASCSFFLPRITIPDNYTIEVEAENGDGVIKSHMTYWRLENIAKTEPPKIFRVKPVLGIKRMIQIEWIKPELAPVSSDLKYTLRFRTVNSTSWMEVNFAKNRKDKNQTYNLTGLQPFTEYVIALRCAVKESKFWSDWSQEKMGMTEEEAPCGLELWRVLKPAEADGRRPVRLLWKKARGAPVLEKTLGYNIWYYPESNTNLTETMNTTNQQLELHLGGESFWVSMISYNSLGKSPVATLRIPAIQEKSFQCIEVMQACVAEDQLVVKWQSSALDVNTWMIEWFPDVDSEPTTLSWESVSQATNWTIQQDKLKPFWCYNISVYPMLHDKVGEPYSIQAYAKEGVPSEGPETKVENIGVKTVTITWKEIPKSERKGIICNYTIFYQAEGGKGFSKTVNSSILQYGLESLKRKTSYIVQVMASTSAGGTNGTSINFKTLSFSVFEIILITSLIGGGLLILIILTVAYGLKKPNKLTHLCWPTVPNPAESSIATWHGDDFKDKLNLKESDDSVNTEDRILKPCSTPSDKLVIDKLVVNFGNVLQEIFTDEARTGQENNLGGEKNGYVTCPFRPDCPLGKSFEELPVSPEIPPRKSQYLRSRMPEGTRPEAKEQLLFSGQSLVPDHLCEEGAPNPYLKNSVTAREFLVSEKLPEHTKGEV 2 521 543 KTLSFSVFEI ILITSLIGGGLLILIILTVAYGL KKPNKLTHLC +P26952 MAANLWLILGLLASHSSDLAAVREAPPTAVTTPIQNLHIDPAHYTLSWDPAPGADITTGAFCRKGRDIFVWADPGLARCSFQSLSLCHVTNFTVFLGKDRAVAGSIQFPPDDDGDHEAAAQDLRCWVHEGQLSCQWERGPKATGDVHYRMFWRDVRLGPAHNRECPHYHSLDVNTAGPAPHGGHEGCTLDLDTVLGSTPNSPDLVPQVTITVNGSGRAGPVPCMDNTVDLQRAEVLAPPTLTVECNGSEAHARWVARNRFHHGLLGYTLQVNQSSRSEPQEYNVSIPHFWVPNAGAISFRVKSRSEVYPRKLSSWSEAWGLVCPPEVMPVKTALVTSVATVLGAGLVAAGLLLWWRKSLLYRLCPPIPRLRLPLAGEMVVWEPALEDCEVTPVTDA 2 333 355 CPPEVMPVKT ALVTSVATVLGAGLVAAGLLLWW RKSLLYRLCP +P32927 MVLAQGLLSMALLALCWERSLAGAEETIPLQTLRCYNDYTSHITCRWADTQDAQRLVNVTLIRRVNEDLLEPVSCDLSDDMPWSACPHPRCVPRRCVIPCQSFVVTDVDYFSFQPDRPLGTRLTVTLTQHVQPPEPRDLQISTDQDHFLLTWSVALGSPQSHWLSPGDLEFEVVYKRLQDSWEDAAILLSNTSQATLGPEHLMPSSTYVARVRTRLAPGSRLSGRPSKWSPEVCWDSQPGDEAQPQNLECFFDGAAVLSCSWEVRKEVASSVSFGLFYKPSPDAGEEECSPVLREGLGSLHTRHHCQIPVPDPATHGQYIVSVQPRRAEKHIKSSVNIQMAPPSLNVTKDGDSYSLRWETMKMRYEHIDHTFEIQYRKDTATWKDSKTETLQNAHSMALPALEPSTRYWARVRVRTSRTGYNGIWSEWSEARSWDTESVLPMWVLALIVIFLTIAVLLALRFCGIYGYRLRRKWEEKIPNPSKSHLFQNGSAELWPPGSMSAFTSGSPPHQGPWGSRFPELEGVFPVGFGDSEVSPLTIEDPKHVCDPPSGPDTTPAASDLPTEQPPSPQPGPPAASHTPEKQASSFDFNGPYLGPPHSRSLPDILGQPEPPQEGGSQKSPPPGSLEYLCLPAGGQVQLVPLAQAMGPGQAVEVERRPSQGAAGSPSLESGGGPAPPALGPRVGGQDQKDSPVAIPMSSGDTEDPGVASGYVSSADLVFTPNSGASSVSLVPSLGLPSDQTPSLCPGLASGPPGAPGPVKSGFEGYVELPPIEGRSPRSPRNNPVPPEAKSPVLNPGERPADVSPTSPQPEGLLVLQQVGDYCFLPGLGPGPLSLRSKPSSPGPGPEIKNLDQAFQVKKPPGQAVPQVPVIQLFKALKQQDYLSLPPWEVNKPGEVC 2 443 465 SWDTESVLPM WVLALIVIFLTIAVLLALRFCGI YGYRLRRKWE +Q01344 MIIVAHVLLILLGATEILQADLLPDEKISLLPPVNFTIKVTGLAQVLLQWKPNPDQEQRNVNLEYQVKINAPKEDDYETRITESKCVTILHKGFSASVRTILQNDHSLLASSWASAELHAPPGSPGTSIVNLTCTTNTTEDNYSRLRSYQVSLHCTWLVGTDAPEDTQYFLYYRYGSWTEECQEYSKDTLGRNIACWFPRTFILSKGRDWLAVLVNGSSKHSAIRPFDQLFALHAIDQINPPLNVTAEIEGTRLSIQWEKPVSAFPIHCFDYEVKIHNTRNGYLQIEKLMTNAFISIIDDLSKYDVQVRAAVSSMCREAGLWSEWSQPIYVGNDEHKPLREWFVIVIMATICFILLILSLICKICHLWIKLFPPIPAPKSNIKDLFVTTNYEKAGSSETEIEVICYIEKPGVETLEDSVF 2 342 361 GNDEHKPLRE WFVIVIMATICFILLILSLI CKICHLWIKL +P40189 MLTLQTWLVQALFIFLTTESTGELLDPCGYISPESPVVQLHSNFTAVCVLKEKCMDYFHVNANYIVWKTNHFTIPKEQYTIINRTASSVTFTDIASLNIQLTCNILTFGQLEQNVYGITIISGLPPEKPKNLSCIVNEGKKMRCEWDGGRETHLETNFTLKSEWATHKFADCKAKRDTPTSCTVDYSTVYFVNIEVWVEAENALGKVTSDHINFDPVYKVKPNPPHNLSVINSEELSSILKLTWTNPSIKSVIILKYNIQYRTKDASTWSQIPPEDTASTRSSFTVQDLKPFTEYVFRIRCMKEDGKGYWSDWSEEASGITYEDRPSKAPSFWYKIDPSHTQGYRTVQLVWKTLPPFEANGKILDYEVTLTRWKSHLQNYTVNATKLTVNLTNDRYLATLTVRNLVGKSDAAVLTIPACDFQATHPVMDLKAFPKDNMLWVEWTTPRESVKKYILEWCVLSDKAPCITDWQQEDGTVHRTYLRGNLAESKCYLITVTPVYADGPGSPESIKAYLKQAPPSKGPTVRTKKVGKNEAVLEWDQLPVDVQNGFIRNYTIFYRTIIGNETAVNVDSSHTEYTLSSLTSDTLYMVRMAAYTDEGGKDGPEFTFTTPKFAQGEIEAIVVPVCLAFLLTTLLGVLFCFNKRDLIKKHIWPNVPDPSKSHIAQWSPHTPPRHNFNSKDQMYSDGNFTDVSVVEIEANDKKPFPEDLKSLDLFKKEKINTEGHSSGIGGSSCMSSSRPSISSSDENESSQNTSSTVQYSTVVHSGYRHQVPSVQVFSRSESTQPLLDSEERPEDLQLVDHVDGGDGILPRQQYFKQNCSQHESSPDISHFERSKQVSSVNEEDFVRLKQQISDHISQSCGSGQMKMFQEVSAADAFGPGTEGQVERFETVGMEAATDEGMPKSYLPQTVRQGGYMPQ 2 620 642 TPKFAQGEIE AIVVPVCLAFLLTTLLGVLFCFN KRDLIKKHIW +P16871 MTILGTTFGMVFSLLQVVSGESGYAQNGDLEDAELDDYSFSCYSQLEVNGSQHSLTCAFEDPDVNITNLEFEICGALVEVKCLNFRKLQEIYFIETKKFLLIGKSNICVKVGEKSLTCKKIDLTTIVKPEAPFDLSVVYREGANDFVVTFNTSHLQKKYVKVLMHDVAYRQEKDENKWTHVNLSSTKLTLLQRKLQPAAMYEIKVRSIPDHYFKGFWSEWSPSYYFRTPEINNSSGEMDPILLTISILSFFSVALLVILACVLWKKRIKPIVWPSLPDHKKTLEHLCKKPRKNLNVSFNPESFLDCQIHRVDDIQARDEVEGFLQDTFPQQLEESEKQRLGGDVQSPNCPSEDVVITPESFGRDSSLTCLAGNVSACDAPILSSSRSLDCRESGKNGPHVYQDLLLSLGTTNSTLPPPFSLQSGILTLNPVAQGQPILTSLGSNQEEAYVTMSSFYQNQ 2 241 263 INNSSGEMDP ILLTISILSFFSVALLVILACVL WKKRIKPIVW +Q01114 MALGRCIAEGWTLERVAVKQVSWFLIYSWVCSGVCRGVSVPEQGGGGQKAGAFTCLSNSIYRIDCHWSAPELGQESRAWLLFTSNQVTEIKHKCTFWDSMCTLVLPKEEVFLPFDNFTITLHRCIMGQEQVSLVDSQYLPRRHIKLDPPSDLQSNVSSGRCVLTWGINLALEPLITSLSYELAFKRQEEAWEARHKDRIVGVTWLILEAVELNPGSIYEARLRVQMTLESYEDKTEGEYYKSHWSEWSQPVSFPSPQRRQGLLVPRWQWSASILVVVPIFLLLTGFVHLLFKLSPRLKRIFYQNIPSPEAFFHPLYSVYHGDFQSWTGARRAGPQARQNGVSTSSAGSESSIWEAVATLTYSPACPVQFACLKWEATAPGFPGLPGSEHVLPAGCLELEGQPSAYLPQEDWAPLGSARPPPPDSDSGSSDYCMLDCCEECHLSAFPGHTESPELTLAQPVALPVSSRA 2 269 291 RQGLLVPRWQ WSASILVVVPIFLLLTGFVHLLF KLSPRLKRIF +Q86SU0 MAWPKLPAPWLLLCTWLPAGCLSLLVTVQHTERYVTLFASIILKCDYTTSAQLQDVVVTWRFKSFCKDPIFDYYSASYQAALSLGQDPSNDCNDNQREVRIVAQRRGQNEPVLGVDYRQRKITIQNRADLVINEVMWWDHGVYYCTIEAPGDTSGDPDKEVKLIVLHWLTVIFIILGALLLLLLIGVCWCQCCPQYCCCYIRCPCCPAHCCCPEEALARHRYMKQAQALGPQMMGKPLYWGADRSSQVSSYPMHPLLQRDLSLPSSLPQMPMTQTTNQPPIANGVLEYLEKELRNLNLAQPLPPDLKGRFGHPCSMLSSLGSEVVERRIIHLPPLIRDLSSSRRTSDSLHQQWLTPIPSRPWDLREGRSHHHYPDFHQELQDRGPKSWALERRELDPSWSGRHRSSRLNGSPIHWSDRDSLSDVPSSSEARWRPSHPPFRSRCQERPRRPSPRESTQRHGRRRRHRSYSPPLPSGLSSWSSEEDKERQPQSWRAHRRGSHSPHWPEEKPPSYRSLDITPGKNSRKKGSVERRSEKDSSHSGRSVVI 2 163 185 TSGDPDKEVK LIVLHWLTVIFIILGALLLLLLI GVCWCQCCPQ +Q01638 MGFWILAILTILMYSTAAKFSKQSWGLENEALIVRCPRQGKPSYTVDWYYSQTNKSIPTQERNRVFASGQLLKFLPAAVADSGIYTCIVRSPTFNRTGYANVTIYKKQSDCNVPDYLMYSTVSGSEKNSKIYCPTIDLYNWTAPLEWFKNCQALQGSRYRAHKSFLVIDNVMTEDAGDYTCKFIHNENGANYSVTATRSFTVKDEQGFSLFPVIGAPAQNEIKEVEIGKNANLTCSACFGKGTQFLAAVLWQLNGTKITDFGEPRIQQEEGQNQSFSNGLACLDMVLRIADVKEEDLLLQYDCLALNLHGLRRHTVRLSRKNPIDHHSIYCIIAVCSVFLMLINVLVIILKMFWIEATLLWRDIAKPYKTRNDGKLYDAYVVYPRNYKSSTDGASRVEHFVHQILPDVLENKCGYTLCIYGRDMLPGEDVVTAVETNIRKSRRHIFILTPQITHNKEFAYEQEVALHCALIQNDAKVILIEMEALSELDMLQAEALQDSLQHLMKVQGTIKWREDHIANKRSLNSKFWKHVRYQMPVPSKIPRKASSLTPLAAQKQ 2 328 350 LSRKNPIDHH SIYCIIAVCSVFLMLINVLVIIL KMFWIEATLL +Q9BZV3 MIMFPLFGKISLGILIFVLIEGDFPSLTAQTYLSIEEIQEPKSAVSFLLPEESTDLSLATKKKQPLDRRETERQWLIRRRRSILFPNGVKICPDESVAEAVANHVKYFKVRVCQEAVWEAFRTFWDRLPGREEYHYWMNLCEDGVTSIFEMGTNFSESVEHRSLIMKKLTYAKETVSSSELSSPVPVGDTSTLGDTTLSVPHPEVDAYEGASESSLERPEESISNEIENVIEEATKPAGEQIAEFSIHLLGKQYREELQDSSSFHHQHLEEEFISEVENAFTGLPGYKEIRVLEFRSPKENDSGVDVYYAVTFNGEAISNTTWDLISLHSNKVENHGLVELDDKPTVVYTISNFRDYIAETLQQNFLLGNSSLNPDPDSLQLINVRGVLRHQTEDLVWNTQSSSLQATPSSILDNTFQAAWPSADESITSSIPPLDFSSGPPSATGRELWSESPLGDLVSTHKLAFPSKMGLSSSPEVLEVSSLTLHSVTPAVLQTGLPVASEERTSGSHLVEDGLANVEESEDFLSIDSLPSSSFTQPVPKETIPSMEDSDVSLTSSPYLTSSIPFGLDSLTSKVKDQLKVSPFLPDASMEKELIFDGGLGSGSGQKVDLITWPWSETSSEKSAEPLSKPWLEDDDSLLPAEIEDKKLVLVDKMDSTDQISKHSKYEHDDRSTHFPEEEPLSGPAVPIFADTAAESASLTLPKHISEVPGVDDYSVTKAPLILTSVAISASTDKSDQADAILREDMEQITESSNYEWFDSEVSMVKPDMQTLWTILPESERVWTRTSSLEKLSRDILASTPQSADRLWLSVTQSTKLPPTTISTLLEDEVIMGVQDISLELDRIGTDYYQPEQVQEQNGKVGSYVEMSTSVHSTEMVSVAWPTEGGDDLSYTQTSGALVVFFSLRVTNMMFSEDLFNKNSLEYKALEQRFLELLVPYLQSNLTGFQNLEILNFRNGSIVVNSRMKFANSVPPNVNNAVYMILEDFCTTAYNTMNLAIDKYSLDVESGDEANPCKFQACNEFSECLVNPWSGEAKCRCFPGYLSVEERPCQSLCDLQPDFCLNDGKCDIMPGHGAICRCRVGENWWYRGKHCEEFVSEPVIIGITIASVVGLLVIFSAIIYFFIRTLQAHHDRSERESPFSGSSRQPDSLSSIENAVKYNPVYESHRAGCEKYEGPYPQHPFYSSASGDVIGGLSREEIRQMYESSELSREEIQERMRVLELYANDPEFAAFVREQQVEEV 2 1102 1124 CEEFVSEPVI IGITIASVVGLLVIFSAIIYFFI RTLQAHHDRS +P17181 MMVVLLGATTLVLVAVAPWVLSAAAGGKNLKSPQKVEVDIIDDNFILRWNRSDESVGNVTFSFDYQKTGMDNWIKLSGCQNITSTKCNFSSLKLNVYEEIKLRIRAEKENTSSWYEVDSFTPFRKAQIGPPEVHLEAEDKAIVIHISPGTKDSVMWALDGLSFTYSLVIWKNSSGVEERIENIYSRHKIYKLSPETTYCLKVKAALLTSWKIGVYSPVHCIKTTVENELPPPENIEVSVQNQNYVLKWDYTYANMTFQVQWLHAFLKRNPGNHLYKWKQIPDCENVKTTQCVFPQNVFQKGIYLLRVQASDGNNTSFWSEEIKFDTEIQAFLLPPVFNIRSLSDSFHIYIGAPKQSGNTPVIQDYPLIYEIIFWENTSNAERKIIEKKTDVTVPNLKPLTVYCVKARAHTMDEKLNKSSVFSDAVCEKTKPGNTSKIWLIVGICIALFALPFVIYAAKVFLRCINYVFFPSLKPSSSIDEYFSEQPLKNLLLSTSEEQIEKCFIIENISTIATVEETNQTDEDHKKYSSQTSQDSGNYSNEDESESKTSEELQQDFV 2 437 459 EKTKPGNTSK IWLIVGICIALFALPFVIYAAKV FLRCINYVFF +P33896 MLAVVGAAALVLVAGAPWVLPSAAGGENLKPPENIDVYIIDDNYTLKWSSHGESMGSVTFSAEYRTKDEAKWLKVPECQHTTTTKCEFSLLDTNVYIKTQFRVRAEEGNSTSSWNEVDPFIPFYTAHMSPPEVRLEAEDKAILVHISPPGQDGNMWALEKPSFSYTIRIWQKSSSDKKTINSTYYVEKIPELLPETTYCLEVKAIHPSLKKHSNYSTVQCISTTVANKMPVPGNLQVDAQGKSYVLKWDYIASADVLFRAQWLPGYSKSSSGSRSDKWKPIPTCANVQTTHCVFSQDTVYTGTFFLHVQASEGNHTSFWSEEKFIDSQKHILPPPPVITVTAMSDTLLVYVNCQDSTCDGLNYEIIFWENTSNTKISMEKDGPEFTLKNLQPLTVYCVQARVLFRALLNKTSNFSEKLCEKTRPGSFSTIWIITGLGVVFFSVMVLYALRSVWKYLCHVCFPPLKPPRSIDEFFSEPPSKNLVLLTAEEHTERCFIIENTDTVAVEVKHAPEEDLRKYSSQTSQDSGNYSNEEEESVGTESGQAVLSKAPCGGPCSVPSPPGTLEDGTCFLGNEKYLQSPALRTEPALLC 2 427 449 KLCEKTRPGS FSTIWIITGLGVVFFSVMVLYAL RSVWKYLCHV +P15260 MALLFLLPLVMQGVSRAEMGTADLGPSSVPTPTNVTIESYNMNPIVYWEYQIMPQVPVFTVEVKNYGVKNSEWIDACINISHHYCNISDHVGDPSNSLWVRVKARVGQKESAYAKSEEFAVCRDGKIGPPKLDIRKEEKQIMIDIFHPSVFVNGDEQEVDYDPETTCYIRVYNVYVRMNGSEIQYKILTQKEDDCDEIQCQLAIPVSSLNSQYCVSAEGVLHVWGVTTEKSKEVCITIFNSSIKGSLWIPVVAALLLFLVLSLVFICFYIKKINPLKEKSIILPKSLISVVRSATLETKPESKYVSLITSYQPFSLEKEVVCEEPLSPATVPGMHTEDNPGKVEHTEELSSITEVVTTEENIPDVVPGSHLTPIERESSSPLSSNQSEPGSIALNSYHSRNCSESDHSRNGFDTDSSCLESHSSLSDSEFPPNNKGEIKTEGQELITVIKAPTSFGYDKPHVLVDLLVDDSGKESLIGYRPTEDSKEFS 2 248 270 IFNSSIKGSL WIPVVAALLLFLVLSLVFICFYI KKINPLKEKS +P38484 MRPTLLWSLLLLLGVFAAAAAAPPDPLSQLPAPQHPKIRLYNAEQVLSWEPVALSNSTRPVVYQVQFKYTDSKWFTADIMSIGVNCTQITATECDFTAASPSAGFPMDFNVTLRLRAELGALHSAWVTMPWFQHYRNVTVGPPENIEVTPGEGSLIIRFSSPFDIADTSTAFFCYYVHYWEKGGIQQVKGPFRSNSISLDNLKPSRVYCLQVQAQLLWNKSNIFRVGHLSNISCYETMADASTELQQVILISVGTFSLLSVLAGACFFLVLKYRGLIKYWFHTPPSIPLQIEEYLKDPTQPILEALDKDSSPKDDVWDSVSIISFPEKEQEDVLQTL 2 248 270 MADASTELQQ VILISVGTFSLLSVLAGACFFLV LKYRGLIKYW +Q8IU57 MAGPERWGPLLLCLLQAAPGRPRLAPPQNVTLLSQNFSVYLTWLPGLGNPQDVTYFVAYQSSPTRRRWREVEECAGTKELLCSMMCLKKQDLYNKFKGRVRTVSPSSKSPWVESEYLDYLFEVEPAPPVLVLTQTEEILSANATYQLPPCMPPLDLKYEVAFWKEGAGNKTLFPVTPHGQPVQITLQPAASEHHCLSARTIYTFSVPKYSKFSKPTCFLLEVPEANWAFLVLPSLLILLLVIAAGGVIWKTLMGNPWFQRAKMPRALDFSGHTHPVATFQPSRPESVNDLFLCPQKELTRGVRPTPRVRAPATQQTRWKKDLAEDEEEEDEEDTEDGVSFQPYIEPPSFLGQEHQAPGHSEAGGVDSGRPRAPLVPSEGSSAWDSSDRSWASTVDSSWDRAGSSGYLAEKGPGQGPGGDGHQESLPPPEFSKDSGFLEELPEDNLSSWATWGTLPPEPNLVPGGPPVSLQTLTFCWESSPEEEEEARESEIEDSDAGSWGAESTQRTEDRGRTLGHYMAR 2 227 249 CFLLEVPEAN WAFLVLPSLLILLLVIAAGGVIW KTLMGNPWFQ +Q9WTL4 MAVPALWPWGVHLLMSLLSLGSGLDTLEVCPSLDIRSEVTELRRLENCSVVEGHLQILLMFAATGEDFRGLSFPRLTQVTDYLLLFRVYGLESLRDLFPNLTVIRGTRLFLGYALIIFEMPHLRDVGLPSLGAVLRGAVRVEKNQELCHLSTIDWGLLQPAPGTNHIVGNKLGEECADVCPGVLGAAGEPCSRTTFSGRTDYRCWTSSHCQKVCPCPRGMACTAGGDCCHSECLGGCSQPEDPRACVACRHLYFQGVCLRACPPGTYQYESWRCVTAELCAHLREVPGLATTFGIYEGSCLAQCPPGFTRNGSSIFCHKCEGLCPKECKVGTKTIDSVQATQDLVGCTHVEGNLILNLRQGYNLEPELQRNLGLVETITGFLKIKHSFALVTLGFFKNLKLIRGDSMVDGNYTLYVLDNQNLQQLGSWVTAGLTIPVGKIYFAFNPRLCLEHIYQLEEVTGTRGRQSKAEINPRTNGDRAACQTRTLRFVFNLTEEDRILLRWERYEPLEARDLLSFIVYYKESPFQNATEHVGPDACGTQSWNLLDVELPLSRTQEPGVTLAPLKPWTQYAVFVRAITLTTAEDSPHQGAQSPIVYLRTLPAAPTVPQDVISTSNSSSHLLVRWKPPVQRNGNITYYLVLWQRLAEDGDLYINDYCHRGLRLPTSSHDTRFDREDPALEAEPEQGCCPCQHSPPGQALPALEAQEVTFQKKFENFLHHAITIPKAPWKVTSINKNPQRDSERHRREAGLLRLGKNNSDFEIQEDKVPRERAILSGLRHFTEYRIDIHACNHAAHTVGCSAATFVFARTMPHREADGIPGKVAWKAAGKSSVTLHWLEPPDPNGLILKYEIKYRRLGEEATVLCVSRLRYAKVGGVHLALLPPGNYSAKVRATSLAGNGSWTDGVTFYITDLEEEDTGGMRIFLTVTPVGFMLLVTLAALGFFYSRKRNSTLYTSVNPEYFSASHMYVPDEWEVPREQIAIIRELGQGSFGMVYEGLARGLEAGEESTPVALKTVNELASARERVEFLKEASVMKAFKCHHVVRLLGVVSQGQPTLVIMELMTRGDLKSHLRSLRPEAENNPGLPQPALSDMIQMAGEIADGMAYLAAKKFVHRDLAARNCMVSQDFTVKIGDFGMTRDVYETDYYRKGGKGLLPVRWMAPESLKDGIFTTHSDVWSFGVVLWEIVTLAEQPYQGLSNEQVLKFVMDGGVLEELENCPIQLQELMRLCWQHSPRLRPTFVHILDRIQDELRPSFRLCSFYYSPECQRGQASLLPTEAEPDSPPTLNGASDYSAPNGGPGH 2 922 944 LEEEDTGGMR IFLTVTPVGFMLLVTLAALGFFY SRKRNSTLYT +Q3MIP1 MSVHYTLNLRVFWPLVTGLCTALVCLYHVLRGSGGARAEPADGVDGGFPLLKVAVLLLLSYVLLRCRHAVRQRFLPGSPRLEGHAAFSSRHFREPGLSILLESYYEHEVRLSPHVLGHSKAHVSRIVGELVRAGRARGSPGLIPGGALALAFRGDFIQVGSAYEQHKIRRPDSFDVLVPLRLPPLVALEPRSLGEEPALAPAFRGCFLCALKAPPSPSGASGGHWLRDCKPFADAFCVDVRGRRHLSATLVLRWFQSHLQRSLATVRYSLEGRCRVTLTPGGLEQPPTLHILPCRTDYGCCRLSMAVRLIPAVHLGDGVFLVAPPPPPLPSAPLLELPEGLRAEALWGVNTARQEQKLLSWLQERAAPGACYLKCLQLLKALRDLGARGLDSAAATQWGRILSSYVLKTVLLAVLLRKGAPGQGWDEEHLGRCLEELVQFLRDCLLRRHTLFHCVLGPGGAAAEVGPLPKALREAAPVDLLAAFDGHARELAAARLLSTWQRLPQLLRAYGGPRYLARCPPPRSQRTQGFLEGEP 2 46 64 ARAEPADGVD GGFPLLKVAVLLLLSYVLL RCRHAVRQRF +Q9NZN1 MKAPIPHLILLYATFTQSLKVVTKRGSADGCTDWSIDIKKYQVLVGEPVRIKCALFYGYIRTNYSLAQSAGLSLMWYKSSGPGDFEEPIAFDGSRMSKEEDSIWFRPTLLQDSGLYACVIRNSTYCMKVSISLTVGENDTGLCYNSKMKYFEKAELSKSKEISCRDIEDFLLPTREPEILWYKECRTKTWRPSIVFKRDTLLIREVREDDIGNYTCELKYGGFVVRRTTELTVTAPLTDKPPKLLYPMESKLTIQETQLGDSANLTCRAFFGYSGDVSPLIYWMKGEKFIEDLDENRVWESDIRILKEHLGEQEVSISLIVDSVEEGDLGNYSCYVENGNGRRHASVLLHKRELMYTVELAGGLGAILLLLVCLVTIYKCYKIEIMLFYRNHFGAEELDGDNKDYDAYLSYTKVDPDQWNQETGEEERFALEILPDMLEKHYGYKLFIPDRDLIPTGTYIEDVARCVDQSKRLIIVMTPNYVVRRGWSIFELETRLRNMLVTGEIKVILIECSELRGIMNYQEVEALKHTIKLLTVIKWHGPKCNKLNSKFWKRLQYEMPFKRIEPITHEQALDVSEQGPFGELQTVSAISMAAATSTALATAHPDLRSTFHNTYHSQMRQKHYYRSYEYDVPPTGTLPLTSIGNQHTYCNIPMTLINGQRPQTKSSREQNPDEAHTNSAILPLLPRETSISSVIW 2 356 378 SVLLHKRELM YTVELAGGLGAILLLLVCLVTIY KCYKIEIMLF +P26006 MGPGPSRAPRAPRLMLCALALMVAAGGCVVSAFNLDTRFLVVKEAGNPGSLFGYSVALHRQTERQQRYLLLAGAPRELAVPDGYTNRTGAVYLCPLTAHKDDCERMNITVKNDPGHHIIEDMWLGVTVASQGPAGRVLVCAHRYTQVLWSGSEDQRRMVGKCYVRGNDLELDSSDDWQTYHNEMCNSNTDYLETGMCQLGTSGGFTQNTVYFGAPGAYNWKGNSYMIQRKEWDLSEYSYKDPEDQGNLYIGYTMQVGSFILHPKNITIVTGAPRHRHMGAVFLLSQEAGGDLRRRQVLEGSQVGAYFGSAIALADLNNDGWQDLLVGAPYYFERKEEVGGAIYVFMNQAGTSFPAHPSLLLHGPSGSAFGLSVASIGDINQDGFQDIAVGAPFEGLGKVYIYHSSSKGLLRQPQQVIHGEKLGLPGLATFGYSLSGQMDVDENFYPDLLVGSLSDHIVLLRARPVINIVHKTLVPRPAVLDPALCTATSCVQVELCFAYNQSAGNPNYRRNITLAYTLEADRDRRPPRLRFAGSESAVFHGFFSMPEMRCQKLELLLMDNLRDKLRPIIISMNYSLPLRMPDRPRLGLRSLDAYPILNQAQALENHTEVQFQKECGPDNKCESNLQMRAAFVSEQQQKLSRLQYSRDVRKLLLSINVTNTRTSERSGEDAHEALLTLVVPPALLLSSVRPPGACQANETIFCELGNPFKRNQRMELLIAFEVIGVTLHTRDLQVQLQLSTSSHQDNLWPMILTLLVDYTLQTSLSMVNHRLQSFFGGTVMGESGMKTVEDVGSPLKYEFQVGPMGEGLVGLGTLVLGLEWPYEVSNGKWLLYPTEITVHGNGSWPCRPPGDLINPLNLTLSDPGDRPSSPQRRRRQLDPGGGQGPPPVTLAAAKKAKSETVLTCATGRAHCVWLECPIPDAPVVTNVTVKARVWNSTFIEDYRDFDRVRVNGWATLFLRTSIPTINMENKTTWFSVDIDSELVEELPAEIELWLVLVAVGAGLLLLGLIILLLWKCGFFKRARTRALYEAKRQKAEMKSQPSETERLTDDY 2 992 1014 LVEELPAEIE LWLVLVAVGAGLLLLGLIILLLW KCGFFKRART +P13612 MAWEARREPGPRRAAVRETVMLLLCLGVPTGRPYNVDTESALLYQGPHNTLFGYSVVLHSHGANRWLLVGAPTANWLANASVINPGAIYRCRIGKNPGQTCEQLQLGSPNGEPCGKTCLEERDNQWLGVTLSRQPGENGSIVTCGHRWKNIFYIKNENKLPTGGCYGVPPDLRTELSKRIAPCYQDYVKKFGENFASCQAGISSFYTKDLIVMGAPGSSYWTGSLFVYNITTNKYKAFLDKQNQVKFGSYLGYSVGAGHFRSQHTTEVVGGAPQHEQIGKAYIFSIDEKELNILHEMKGKKLGSYFGASVCAVDLNADGFSDLLVGAPMQSTIREEGRVFVYINSGSGAVMNAMETNLVGSDKYAARFGESIVNLGDIDNDGFEDVAIGAPQEDDLQGAIYIYNGRADGISSTFSQRIEGLQISKSLSMFGQSISGQIDADNNGYVDVAVGAFRSDSAVLLRTRPVVIVDASLSHPESVNRTKFDCVENGWPSVCIDLTLCFSYKGKEVPGYIVLFYNMSLDVNRKAESPPRFYFSSNGTSDVITGSIQVSSREANCRTHQAFMRKDVRDILTPIQIEAAYHLGPHVISKRSTEEFPPLQPILQQKKEKDIMKKTINFARFCAHENCSADLQVSAKIGFLKPHENKTYLAVGSMKTLMLNVSLFNAGDDAYETTLHVKLPVGLYFIKILELEEKQINCEVTDNSGVVQLDCSIGYIYVDHLSRIDISFLLDVSSLSRAEEDLSITVHATCENEEEMDNLKHSRVTVAIPLKYEVKLTVHGFVNPTSFVYGSNDENEPETCMVEKMNLTFHVINTGNSMAPNVSVEIMVPNSFSPQTDKLFNILDVQTTTGECHFENYQRVCALEQQKSAMQTLKGIVRFLSKTDKRLLYCIKADPHCLNFLCNFGKMESGKEASVHIQLEGRPSILEMDETSALKFEIRATGFPEPNPRVIELNKDENVAHVLLEGLHHQRPKRYFTIVIISSSLLLGLIVLLLISYVMWKAGFFKRQYKSILQEENRRDSWSYINSKSNDD 2 978 1000 HHQRPKRYFT IVIISSSLLLGLIVLLLISYVMW KAGFFKRQYK +P20701 MKDSCITVMAMALLSGFFFFAPASSYNLDVRGARSFSPPRAGRHFGYRVLQVGNGVIVGAPGEGNSTGSLYQCQSGTGHCLPVTLRGSNYTSKYLGMTLATDPTDGSILACDPGLSRTCDQNTYLSGLCYLFRQNLQGPMLQGRPGFQECIKGNVDLVFLFDGSMSLQPDEFQKILDFMKDVMKKLSNTSYQFAAVQFSTSYKTEFDFSDYVKRKDPDALLKHVKHMLLLTNTFGAINYVATEVFREELGARPDATKVLIIITDGEATDSGNIDAAKDIIRYIIGIGKHFQTKESQETLHKFASKPASEFVKILDTFEKLKDLFTELQKKIYVIEGTSKQDLTSFNMELSSSGISADLSRGHAVVGAVGAKDWAGGFLDLKADLQDDTFIGNEPLTPEVRAGYLGYTVTWLPSRQKTSLLASGAPRYQHMGRVLLFQEPQGGGHWSQVQTIHGTQIGSYFGGELCGVDVDQDGETELLLIGAPLFYGEQRGGRVFIYQRRQLGFEEVSELQGDPGYPLGRFGEAITALTDINGDGLVDVAVGAPLEEQGAVYIFNGRHGGLSPQPSQRIEGTQVLSGIQWFGRSIHGVKDLEGDGLADVAVGAESQMIVLSSRPVVDMVTLMSFSPAEIPVHEVECSYSTSNKMKEGVNITICFQIKSLIPQFQGRLVANLTYTLQLDGHRTRRRGLFPGGRHELRRNIAVTTSMSCTDFSFHFPVCVQDLISPINVSLNFSLWEEEGTPRDQRAQGKDIPPILRPSLHSETWEIPFEKNCGEDKKCEANLRVSFSPARSRALRLTAFASLSVELSLSNLEEDAYWVQLDLHFPPGLSFRKVEMLKPHSQIPVSCEELPEESRLLSRALSCNVSSPIFKAGHSVALQMMFNTLVNSSWGDSVELHANVTCNNEDSDLLEDNSATTIIPILYPINILIQDQEDSTLYVSFTPKGPKIHQVKHMYQVRIQPSIHDHNIPTLEAVVGVPQPPSEGPITHQWSVQMEPPVPCHYEDLERLPDAAEPCLPGALFRCPVVFRQEILVQVIGTLELVGEIEASSMFSLCSSLSISFNSSKHFHLYGSNASLAQVVMKVDVVYEKQMLYLYVLSGIGGLLLLLLIFIVLYKVGFFKRNLKEKMEAGRGVPNGIPAEDSEQLASGQEAGDPGCLKPLHEKDSESGGGKD 2 1090 1112 KVDVVYEKQM LYLYVLSGIGGLLLLLLIFIVLY KVGFFKRNLK +Q3UV74 MLGQCTLLPVLAGLLSLESALSQLCTKDNVSTCQDCIRSGPSCAWCQKLNFTGRGEPDSVRCDTPEQLLLKGCTSEYLVDPKSLAESQEDKERDQRQLSPRNVTVFLRPGQAATFKVDFQRTQDNSVDLYFLMGLSGSAQGHLSNVQTLGSDLLKALNEISRSGRIGFGSIVNMTFQHILKLTADSSQFQRELRKQLVSGKLATPKGQLDAVVQVAICLGEIGWRNGTRFLVLVTDNDFHLAKDKTLGTRQNTSDGRCHLDDGMYRSRGEPDYQSVVQLASKLAENNIQPIFVVPSRMVKTYEKLTTFIPKLTIGELSDDSSNVAQLIRNAYSKLSSIVVLNHSTIPSILKVTYDSYCSNGTSNPGKPSGDCSGVQINDQVTFQVNITASECFREQFFFIQALGFMDSVTVRVLPLCECQCQEQSQHHSLCGGKGAMECGICRCNSGYAGKNCECQTQGPSSQDLEGSCRKDNSSIMCSGLGDCICGQCECHTSDIPNKEIYGQYCECNNVNCERYDGQVCGGPERGHCSCGRCFCRYGFVGSACQCRMSTSGCLNNRMVECSGHGRCYCNRCLCDPGYQPPLCEKRPGYFHRCSEYYSCARCLKDNSAIKCRECWNLLFSNTPFSNKTCMTERDSEGCWTTYTLYQPDQSDINSIYIKESLVCAEISNTTILLGVIVGVLLAVIFLLVYCMVYLKGTQKAAKLPRKGGAQSTLAQQPHFQEPHHVEPVWNQERQGTQ 2 672 694 LVCAEISNTT ILLGVIVGVLLAVIFLLVYCMVY LKGTQKAAKL +A2A863 MAGPCCSPWVKLLLLAAMLSASLPGDLANRCKKAQVKSCTECIRVDKSCAYCTDELFKERRCNTQAELLAAGCRGESILVMESSLEITENTQIDTSLHRSQVSPQGLQVRLRPGEERSFVFQVFEPLESPVDLYILMDFSNSMSDDLDNLKQMGQNLAKILRQLTSDYTIGFGKFVDKVSVPQTDMRPEKLKEPWPNSDPPFSFKNVISLTENVEEFWNKLQGERISGNLDAPEGGFDAILQTAVCTRDIGWRADSTHLLVFSTESAFHYEADGANVLAGIMNRNDEKCHLDASGAYTQYKTQDYPSVPTLVRLLAKHNIIPIFAVTNYSYSYYEKLHKYFPVSSLGVLQEDSSNIVELLEEAFYRIRSNLDIRALDSPRGLRTEVTSDTLQKTETGSFHIKRGEVGTYNVHLRAVEDIDGTHVCQLAKEDQGGNIHLKPSFSDGLRMDASVICDVCPCELQKEVRSARCHFRGDFMCGHCVCNEGWSGKTCNCSTGSLSDTQPCLREGEDKPCSGHGECQCGRCVCYGEGRYEGHFCEYDNFQCPRTSGFLCNDRGRCSMGECVCEPGWTGRSCDCPLSNATCIDSNGGICNGRGYCECGRCHCNQQSLYTDTTCEINYSAIRLGLCEDLRSCVQCQAWGTGEKKGRACDDCPFKVKMVDELKKAEEVVEYCSFRDEDDDCTYSYNVEGDGSPGPNSTVLVHKKKDCPPGSFWWLIPLLIFLLLLLALLLLLCWKYCACCKACLGLLPCCNRGHMVGFKEDHYMLRENLMASDHLDTPMLRSGNLKGRDTVRWKITNNVQRPGFATHAASTSPTELVPYGLSLRLGRLCTENLMKPGTRECDQLRQEVEENLNEVYRQVSGAHKLQQTKFRQQPNTGKKQDHTIVDTVLLAPRSAKQMLLKLTEKQVEQGSFHELKVAPGYYTVTAEQDARGMVEFQEGVELVDVRVPLFIRPEDDDEKQLLVEAIDVPVGTATLGRRLVNITIIKEQASGVVSFEQPEYSVSRGDQVARIPVIRHILDNGKSQVSYSTQDNTAHGHRDYVPVEGELLFHPGETWKELQVKLLELQEVDSLLRGRQVRRFQVQLSNPKFGARLGQPSTTTVILGEHDETDRSLINQTLSSPPPPHGDLGAPQNPNAKAAGSRKIHFNWLPPPGKPMGYRVKYWIQGDSESEAHLLDSKVPSVELTNLYPYCDYEMKVCAYGAQGEGPYSSLVSCRTHQEVPSEPGRLAFNVVSSTVTQLSWAEPAETNGEITAYEVCYGLVNEDNRPIGPMKKVLVDNPKNRMLLIENLRESQPYRYTVKARNGAGWGPEREAIINLATQPKRPMSIPIIPDIPIVDAQGGEDYENFLMYSDDVLRSPASSQRPSVSDDTGCGWKFEPLLGEELDLRRVTWRLPPELIPRLSASSGRSDEDGSVAGGVEGEGSGWIRGATPRPPGEHLVNGRMDFAYPGSANSLHRMTAANVAYGTHLSPHLSHRVLSTSSTLTRDYHSLTRTEHSHSGTLPRDYSTLTSLSSQDSRGAVGVPDTPTRLVFSALGPTSLKVSWQEPQCDRMLLGYSVEYQLLNGGEMHRLNIPNPGQTSVVVEDLLPNHSYVFRVRAQSQEGWGREREGVITIESQVHPQSPLCPLPGSAFTLSTPSAPGPLVFTALSPDSLQLSWERPRRPNGDILGYLVTCEMAQGGAPARTFRVDGDNPESRLTVPGLSENVPYKFKVQARTTEGFGPEREGIITIESQVGGPFPQLGSHSGLFQNPVQSEFSSVTSTHSTTTEPFLMDGLTLGTQRLEAGGSLTRHVTQEFVTRTLTASGSLSTHMDQQFFQT 2 711 733 LVHKKKDCPP GSFWWLIPLLIFLLLLLALLLLL CWKYCACCKA +P26010 MVALPMVLVLLLVLSRGESELDAKIPSTGDATEWRNPHLSMLGSCQPAPSCQKCILSHPSCAWCKQLNFTASGEAEARRCARREELLARGCPLEELEEPRGQQEVLQDQPLSQGARGEGATQLAPQRVRVTLRPGEPQQLQVRFLRAEGYPVDLYYLMDLSYSMKDDLERVRQLGHALLVRLQEVTHSVRIGFGSFVDKTVLPFVSTVPSKLRHPCPTRLERCQSPFSFHHVLSLTGDAQAFEREVGRQSVSGNLDSPEGGFDAILQAALCQEQIGWRNVSRLLVFTSDDTFHTAGDGKLGGIFMPSDGHCHLDSNGLYSRSTEFDYPSVGQVAQALSAANIQPIFAVTSAALPVYQELSKLIPKSAVGELSEDSSNVVQLIMDAYNSLSSTVTLEHSSLPPGVHISYESQCEGPEKREGKAEDRGQCNHVRINQTVTFWVSLQATHCLPEPHLLRLRALGFSEELIVELHTLCDCNCSDTQPQAPHCSDGQGHLQCGVCSCAPGRLGRLCECSVAELSSPDLESGCRAPNGTGPLCSGKGHCQCGRCSCSGQSSGHLCECDDASCERHEGILCGGFGRCQCGVCHCHANRTGRACECSGDMDSCISPEGGLCSGHGRCKCNRCQCLDGYYGALCDQCPGCKTPCERHRDCAECGAFRTGPLATNCSTACAHTNVTLALAPILDDGWCKERTLDNQLFFFLVEDDARGTVVLRVRPQEKGADHTQAIVLGCVGGIVAVGLGLVLAYRLSVEIYDRREYSRFEKEQQQLNWKQDSNPLYKSAITTTINPRFQEADSPTL 2 724 746 VRPQEKGADH TQAIVLGCVGGIVAVGLGLVLAY RLSVEIYDRR +P26012 MCGSALAFFTAAFVCLQNDRRGPASFLWAAWVFSLVLGLGQGEDNRCASSNAASCARCLALGPECGWCVQEDFISGGSRSERCDIVSNLISKGCSVDSIEYPSVHVIIPTENEINTQVTPGEVSIQLRPGAEANFMLKVHPLKKYPVDLYYLVDVSASMHNNIEKLNSVGNDLSRKMAFFSRDFRLGFGSYVDKTVSPYISIHPERIHNQCSDYNLDCMPPHGYIHVLSLTENITEFEKAVHRQKISGNIDTPEGGFDAMLQAAVCESHIGWRKEAKRLLLVMTDQTSHLALDSKLAGIVVPNDGNCHLKNNVYVKSTTMEHPSLGQLSEKLIDNNINVIFAVQGKQFHWYKDLLPLLPGTIAGEIESKAANLNNLVVEAYQKLISEVKVQVENQVQGIYFNITAICPDGSRKPGMEGCRNVTSNDEVLFNVTVTMKKCDVTGGKNYAIIKPIGFNETAKIHIHRNCSCQCEDNRGPKGKCVDETFLDSKCFQCDENKCHFDEDQFSSESCKSHKDQPVCSGRGVCVCGKCSCHKIKLGKVYGKYCEKDDFSCPYHHGNLCAGHGECEAGRCQCFSGWEGDRCQCPSAAAQHCVNSKGQVCSGRGTCVCGRCECTDPRSIGRFCEHCPTCYTACKENWNCMQCLHPHNLSQAILDQCKTSCALMEQQHYVDQTSECFSSPSYLRIFFIIFIVTFLIGLLKVLIIRQVILQWNSNKIKSSSDYRVSASKKDKLILQSVCTRAVTYRREKPEEIKMDISKLNAHETFRCNF 2 682 704 QTSECFSSPS YLRIFFIIFIVTFLIGLLKVLII RQVILQWNSN +Q8IYV9 MGPHFTLLCAALAGCLLPAEGCVICDPSVVLALKSLEKDYLPGHLDAKHHKAMMERVENAVKDFQELSLNEDAYMGVVDEATLQKGSWSLLKDLKRITDSDVKGDLFVKELFWMLHLQKETFATYVARFQKEAYCPNKCGVMLQTLIWCKNCKKEVHACRKSYDCGERNVEVPQMEDMILDCELNWHQASEGLTDYSFYRVWGNNTETLVSKGKEATLTKPMVGPEDAGSYRCELGSVNSSPATIINFHVTVLPKMIKEEKPSPNIVTPGEATTESSISLQPLQPEKMLASRLLGLLICGSLALITGLTFAIFRRRKVIDFIKSSLFGLGSGAAEQTQVPKEKATDSRQQ 2 291 313 QPLQPEKMLA SRLLGLLICGSLALITGLTFAIF RRRKVIDFIK +Q9D9J7 MGPHFTLLLAALANCLCPGRPCIKCDQFVTDALKTFENTYLNDHLPHDIHKNVMRMVNHEVSSFGVVTSAEDSYLGAVDENTLEQATWSFLKDLKRITDSDLKGELFIKELLWMLRHQKDIFNNLARQFQKEVLCPNKCGVMSQTLIWCLKCEKQLHICRKSLDCGERHIEVHRSEDLVLDCLLSWHRASKGLTDYSFYRVWENSSETLIAKGKEPYLTKSMVGPEDAGNYRCVLDTINQGHATVIRYDVTVLPPKHSEENQPPNIITQEEHETPVHVTPQTPPGQEPESELYPELHPELYPELIPTVAQNPEKKMKTRLLILLTLGFVVLVASIIISVLHFRKVSAKLKNASDEVKPTASGSKSDQSLSQQMGLKKASQADFNSDYSGDKSEATEN 2 320 342 QNPEKKMKTR LLILLTLGFVVLVASIIISVLHF RKVSAKLKNA +Q6UXV1 MPLALTLLLLSGLGAPGGWGCLQCDPLVLEALGHLRSALIPSRFQLEQLQARAGAVLMGMEGPFFRDYALNVFVGKVETNQLDLVASFVKNQTQHLMGNSLKDEPLLEELVTLRANVIKEFKKVLISYELKACNPKLCRLLKEEVLDCLHCQRITPKCIHKKYCFVDRQPRVALQYQMDSKYPRNQALLGILISVSLAVFVFVVIVVSACTYRQNRKLLLQ 2 187 209 QMDSKYPRNQ ALLGILISVSLAVFVFVVIVVSA CTYRQNRKLL +Q5VZ72 MGDLWLFLLLPLSAFHGVKGCLECDPKFIEDVGSLLGNLIPSEVPGRTQLLERQIKEMIHLSFKVSHSDKRLRVLAVQQVVKLRTWLKNEFYKLGNETWKGVFIYQGKLLDVCQNLESKLKELLKNFSEIACSEDCIVVEGPILDCWTCLRMTNRCFKGEYCGDEDPRKAENREIALFLILLATAVILGSAVLLFHFCIFHRRKMKAIRRSLKEYVEKKLEELMGKIDEKEEKDFRLRK 2 178 200 RKAENREIAL FLILLATAVILGSAVLLFHFCIF HRRKMKAIRR +P57087 MARRSRHRLLLLLLRYLVVALGYHKAYGFSAPKDQQVVTAVEYQEAILACKTPKKTVSSRLEWKKLGRSVSFVYYQQTLQGDFKNRAEMIDFNIRIKNVTRSDAGKYRCEVSAPSEQGQNLEEDTVTLEVLVAPAVPSCEVPSSALSGTVVELRCQDKEGNPAPEYTWFKDGIRLLENPRLGSQSTNSSYTMNTKTGTLQFNTVSKLDTGEYSCEARNSVGYRRCPGKRMQVDDLNISGIIAAVVVVALVISVCGLGVCYAQRKGYFSKETSFQKSNSSSKATTMSENDFKHTKSFII 2 239 261 RMQVDDLNIS GIIAAVVVVALVISVCGLGVCYA QRKGYFSKET +Q80UL9 MLCLLKLIVIPVILAPVGYPQGLPGLTVSSPQLRVHVGESVLMGCVVQRTEEKHVDRVDWLFSKDKDDASEYVLFYYSNLSVPTGRFQNRSHLVGDTFHNDGSLLLQDVQKADEGIYTCEIRLKNESMVMKKPVELWVLPEEPKDLRVRVGDTTQMRCSIQSTEEKRVTKVNWMFSSGSHTEEETVLSYDSNMRSGKFQSLGRFRNRVDLTGDISRNDGSIKLQTVKESDQGIYTCSIYVGKLESRKTIVLHVVQDEFQRTISPTPPTDKGQQGILNGNQLVIIVGIVCATFLLLPVLILIVKKAKWNKSSVSSMASVKSLENKEKINPEKHIYSSITTWETTERGISGESEGTYMTMNPVWPSSPKASSLVRSSVRSK 2 280 302 KGQQGILNGN QLVIIVGIVCATFLLLPVLILIV KKAKWNKSSV +O76095 MLAGAGRPGLPQGRHLCWLLCAFTLKLCQAEAPVQEEKLSASTSNLPCWLVEEFVVAEECSPCSNFRAKTTPECGPTGYVEKITCSSSKRNEFKSCRSALMEQRLFWKFEGAVVCVALIFACLVIIRQRQLDRKALEKVRKQIESI 2 109 126 ALMEQRLFWK FEGAVVCVALIFACLVII RQRQLDRKAL +Q5VV43 MAPPTGVLSSLLLLVTIAGCARKQCSEGRTYSNAVISPNLETTRIMRVSHTFPVVDCTAACCDLSSCDLAWWFEGRCYLVSCPHKENCEPKKMGPIRSYLTFVLRPVQRPAQLLDYGDMMLNRGSPSGIWGDSPEDIRKDLTFLGKDWGLEEMSEYSDDYRELEKDLLQPSGKQEPRGSAEYTDWGLLPGSEGAFNSSVGDSPAVPAETQQDPELHYLNESASTPAPKLPERSVLLPLPTTPSSGEVLEKEKASQLQEQSSNSSGKEVLMPSHSLPPASLELSSVTVEKSPVLTVTPGSTEHSIPTPPTSAAPSESTPSELPISPTTAPRTVKELTVSAGDNLIITLPDNEVELKAFVAPAPPVETTYNYEWNLISHPTDYQGEIKQGHKQTLNLSQLSVGLYVFKVTVSSENAFGEGFVNVTVKPARRVNLPPVAVVSPQLQELTLPLTSALIDGSQSTDDTEIVSYHWEEINGPFIEEKTSVDSPVLRLSNLDPGNYSFRLTVTDSDGATNSTTAALIVNNAVDYPPVANAGPNHTITLPQNSITLNGNQSSDDHQIVLYEWSLGPGSEGKHVVMQGVQTPYLHLSAMQEGDYTFQLKVTDSSRQQSTAVVTVIVQPENNRPPVAVAGPDKELIFPVESATLDGSSSSDDHGIVFYHWEHVRGPSAVEMENIDKAIATVTGLQVGTYHFRLTVKDQQGLSSTSTLTVAVKKENNSPPRARAGGRHVLVLPNNSITLDGSRSTDDQRIVSYLWIRDGQSPAAGDVIDGSDHSVALQLTNLVEGVYTFHLRVTDSQGASDTDTATVEVQPDPRKSGLVELTLQVGVGQLTEQRKDTLVRQLAVLLNVLDSDIKVQKIRAHSDLSTVIVFYVQSRPPFKVLKAAEVARNLHMRLSKEKADFLLFKVLRVDTAGCLLKCSGHGHCDPLTKRCICSHLWMENLIQRYIWDGESNCEWSIFYVTVLAFTLIVLTGGFTWLCICCCKRQKRTKIRKKTKYTILDNMDEQERMELRPKYGIKHRSTEHNSSLMVSESEFDSDQDTIFSREKMERGNPKVSMNGSIRNGASFSYCSKDR 2 956 978 WDGESNCEWS IFYVTVLAFTLIVLTGGFTWLCI CCCKRQKRTK +Q8IYS2 MWLQQRLKGLPGLLSSSWARRLLCLLGLLLLLLWFGGSGARRAAGGLHLLPWSRGEPGAAEPSACLEAATRAWRGLRERGEVVPLGPGVPALVANGFLALDVAANRLWVTPGEREPAVAPDFVPFVQLRPLSALAEAGEAVLLLREGLLRRVRCLQLGSPGPGPVAAGPGPASVSGLAAGSGRDCVLLQEDFLAHRGRPHVYLQRIQLNNPTERVAALQTVGPTAGPAPKAFTSTLEKVGDHQFLLYSGRSPPTPTGLVHLVVVAAKKLVNRLQVAPKTQLDETVLWVVHVSGPINPQVLKSKAAKELKALQDLARKEMLELLDMPAAELLQDHQLLWAQLFSPGVEMKKITDTHTPSGLTVNLTLYYMLSCSPAPLLSPSLSHRERDQMESTLNYEDHCFSGHATMHAENLWPGRLSSVQQILQLSDLWRLTLQKRGCKGLVKVGAPGILQGMVLSFGGLQFTENHLQFQADPDVLHNSYALHGIRYKNDHINLAVLADAEGKPYLHVSVESRGQPVKIYACKAGCLDEPVELTSAPTGHTFSVMVTQPITPLLYISTDLTHLQDLRHTLHLKAILAHDEHMAQQDPGLPFLFWFSVASLITLFHLFLFKLIYNEYCGPGAKPLFRSKEDPSV 2 592 614 HMAQQDPGLP FLFWFSVASLITLFHLFLFKLIY NEYCGPGAKP +Q9Y6H6 METTNGTETWYESLHAVLKALNATLHSNLLCRPGPGLGPDNQTEERRASLPGRDDNSYMYILFVMFLFAVTVGSLILGYTRSRKVDKRSDPYHVYIKNRVSMI 2 57 79 RASLPGRDDN SYMYILFVMFLFAVTVGSLILGY TRSRKVDKRS +Q9QZ26 MNCSESQRLQTLLNRLLLELHHRGNASGLGIGTGPSMGMGVVPDPFVGREATSAKGNDAYLYILLIMIFYACLAGGLILAYTRSRKLVEAKDEPPLACVAEQEWVPAAIASADPENGQGLLAEGGHQLAAGALPALAQGAERV 2 59 81 REATSAKGND AYLYILLIMIFYACLAGGLILAY TRSRKLVEAK +Q99706 MSMSPTVIILACLGFFLDQSVWAHVGGQDKPFCSAWPSAVVPQGGHVTLRCHYRRGFNIFTLYKKDGVPVPELYNRIFWNSFLISPVTPAHAGTYRCRGFHPHSPTEWSAPSNPLVIMVTGLYEKPSLTARPGPTVRAGENVTLSCSSQSSFDIYHLSREGEAHELRLPAVPSINGTFQADFPLGPATHGETYRCFGSFHGSPYEWSDPSDPLPVSVTGNPSSSWPSPTEPSFKTGIARHLHAVIRYSVAIILFTILPFFLLHRWCSKKKDAAVMNQEPAGHRTVNREDSDEQDPQEVTYAQLDHCIFTQRKITGPSQRSKRPSTDTSVCIELPNAEPRALSPAHEHHSQALMGSSRETTALSQTQLASSNVPAAGI 2 243 265 FKTGIARHLH AVIRYSVAIILFTILPFFLLHRW CSKKKDAAVM +P83555 MLLWFLSLVCSGFFLVQRMSAHVGSHDKPFLSAWPSYVVPLGQNVTLTCDSHRGSNIFKLYKEEGSPNHQLHETTFQKSQVFGPVTTEHAGTYRCFHPQYANVLSAHSEPLKIIISGIYMKPFLLILQSPLVDTGGNVTLECHSENMFDTYILISHRMGIIKNSVQVSAEHHESGSHVTYSIGPMTPDLVGTYTCYGANSYYPYEWSDSSDPIDIKITGVYKKPSLSALMGPVLMMSGETMALSCISDHQFDTFHMSREGVPRGQGMPAVQSHSGKFEAKFLLSPMIQKGNYRCYGSFRNASHVWSSPSDPLYLPAKGNCPAYTEADTKTNNYKNLHILTGLLVTMVLVVIIIFYSCYFSKQNKSQKQAAASMEQEYEVKNTINTQNFEGQERQEVTYTELEQRIFNQNLMPPISRISEFSADTIVYMEIMK 2 337 359 DTKTNNYKNL HILTGLLVTMVLVVIIIFYSCYF SKQNKSQKQA +Q96J84 MLSLLVWILTLSDTFSQGTQTRFSQEPADQTVVAGQRAVLPCVLLNYSGIVQWTKDGLALGMGQGLKAWPRYRVVGSADAGQYNLEITDAELSDDASYECQATEAALRSRRAKLTVLIPPEDTRIDGGPVILLQAGTPHNLTCRAFNAKPAATIIWFRDGTQQEGAVASTELLKDGKRETTVSQLLINPTDLDIGRVFTCRSMNEAIPSGKETSIELDVHHPPTVTLSIEPQTVQEGERVVFTCQATANPEILGYRWAKGGFLIEDAHESRYETNVDYSFFTEPVSCEVHNKVGSTNVSTLVNVHFAPRIVVDPKPTTTDIGSDVTLTCVWVGNPPLTLTWTKKDSNMVLSNSNQLLLKSVTQADAGTYTCRAIVPRIGVAEREVPLYVNGPPIISSEAVQYAVRGDGGKVECFIGSTPPPDRIAWAWKENFLEVGTLERYTVERTNSGSGVLSTLTINNVMEADFQTHYNCTAWNSFGPGTAIIQLEEREVLPVGIIAGATIGASILLIFFFIALVFFLYRRRKGSRKDVTLRKLDIKVETVNREPLTMHSDREDDTASVSTATRVMKAIYSSFKDDVDLKQDLRCDTIDTREEYEMKDPTNGYYNVRAHEDRPSSRAVLYADYRAPGPARFDGRPSSRLSHSSGYAQLNTYSRGPASDYGPEPTPPGPAAPAGTDTTSQLSYENYEKFNSHPFPGAAGYPTYRLGYPQAPPSGLERTPYEAYDPIGKYATATRFSYTSQHSDYGQRFQQRMQTHV 2 497 519 LEEREVLPVG IIAGATIGASILLIFFFIALVFF LYRRRKGSRK +Q6UWL6 MLRMRVPALLVLLFCFRGRAGPSPHFLQQPEDLVVLLGEEARLPCALGAYWGLVQWTKSGLALGGQRDLPGWSRYWISGNAANGQHDLHIRPVELEDEASYECQATQAGLRSRPAQLHVLVPPEAPQVLGGPSVSLVAGVPANLTCRSRGDARPTPELLWFRDGVLLDGATFHQTLLKEGTPGSVESTLTLTPFSHDDGATFVCRARSQALPTGRDTAITLSLQYPPEVTLSASPHTVQEGEKVIFLCQATAQPPVTGYRWAKGGSPVLGARGPRLEVVADASFLTEPVSCEVSNAVGSANRSTALDVLFGPILQAKPEPVSVDVGEDASFSCAWRGNPLPRVTWTRRGGAQVLGSGATLRLPSVGPEDAGDYVCRAEAGLSGLRGGAAEARLTVNAPPVVTALHSAPAFLRGPARLQCLVFASPAPDAVVWSWDEGFLEAGSQGRFLVETFPAPESRGGLGPGLISVLHISGTQESDFSRSFNCSARNRLGEGGAQASLGRRDLLPTVRIVAGVAAATTTLLMVITGVALCCWRHSKASASFSEQKNLMRIPGSSDGSSSRGPEEEETGSREDRGPIVHTDHSDLVLEEKGTLETKDPTNGYYKVRGVSVSLSLGEAPGGGLFLPPPSPLGPPGTPTFYDFNPHLGMVPPCRLYRARAGYLTTPHPRAFTSYIKPTSFGPPDLAPGTPPFPYAAFPTPSHPRLQTHV 2 511 533 GRRDLLPTVR IVAGVAAATTTLLMVITGVALCC WRHSKASASF +Q8IZU9 MKPFQLDLLFVCFFLFSQELGLQKRGCCLVLGYMAKDKFRRMNEGQVYSFSQQPQDQVVVSGQPVTLLCAIPEYDGFVLWIKDGLALGVGRDLSSYPQYLVVGNHLSGEHHLKILRAELQDDAVYECQAIQAAIRSRPARLTVLVPPDDPVILGGPVISLRAGDPLNLTCHADNAKPAASIIWLRKGEVINGATYSKTLLRDGKRESIVSTLFISPGDVENGQSIVCRATNKAIPGGKETSVTIDIQHPPLVNLSVEPQPVLEDNVVTFHCSAKANPAVTQYRWAKRGQIIKEASGEVYRTTVDYTYFSEPVSCEVTNALGSTNLSRTVDVYFGPRMTTEPQSLLVDLGSDAIFSCAWTGNPSLTIVWMKRGSGVVLSNEKTLTLKSVRQEDAGKYVCRAVVPRVGAGEREVTLTVNGPPIISSTQTQHALHGEKGQIKCFIRSTPPPDRIAWSWKENVLESGTSGRYTVETISTEEGVISTLTISNIVRADFQTIYNCTAWNSFGSDTEIIRLKEQGSEMKSGAGLEAESVPMAVIIGVAVGAGVAFLVLMATIVAFCCARSQRNLKGVVSAKNDIRVEIVHKEPASGREGEEHSTIKQLMMDRGEFQQDSVLKQLEVLKEEEKEFQNLKDPTNGYYSVNTFKEHHSTPTISLSSCQPDLRPAGKQRVPTGMSFTNIYSTLSGQGRLYDYGQRFVLGMGSSSIELCEREFQRGSLSDSSSFLDTQCDSSVSSSGKQDGYVQFDKASKASASSSHHSQSSSQNSDPSRPLQRRMQTHV 2 536 558 GLEAESVPMA VIIGVAVGAGVAFLVLMATIVAF CCARSQRNLK +P10721 MRGARGAWDFLCVLLLLLRVQTGSSQPSVSPGEPSPPSIHPGKSDLIVRVGDEIRLLCTDPGFVKWTFEILDETNENKQNEWITEKAEATNTGKYTCTNKHGLSNSIYVFVRDPAKLFLVDRSLYGKEDNDTLVRCPLTDPEVTNYSLKGCQGKPLPKDLRFIPDPKAGIMIKSVKRAYHRLCLHCSVDQEGKSVLSEKFILKVRPAFKAVPVVSVSKASYLLREGEEFTVTCTIKDVSSSVYSTWKRENSQTKLQEKYNSWHHGDFNYERQATLTISSARVNDSGVFMCYANNTFGSANVTTTLEVVDKGFINIFPMINTTVFVNDGENVDLIVEYEAFPKPEHQQWIYMNRTFTDKWEDYPKSENESNIRYVSELHLTRLKGTEGGTYTFLVSNSDVNAAIAFNVYVNTKPEILTYDRLVNGMLQCVAAGFPEPTIDWYFCPGTEQRCSASVLPVDVQTLNSSGPPFGKLVVQSSIDSSAFKHNGTVECKAYNDVGKTSAYFNFAFKGNNKEQIHPHTLFTPLLIGFVIVAGMMCIIVMILTYKYLQKPMYEVQWKVVEEINGNNYVYIDPTQLPYDHKWEFPRNRLSFGKTLGAGAFGKVVEATAYGLIKSDAAMTVAVKMLKPSAHLTEREALMSELKVLSYLGNHMNIVNLLGACTIGGPTLVITEYCCYGDLLNFLRRKRDSFICSKQEDHAEAALYKNLLHSKESSCSDSTNEYMDMKPGVSYVVPTKADKRRSVRIGSYIERDVTPAIMEDDELALDLEDLLSFSYQVAKGMAFLASKNCIHRDLAARNILLTHGRITKICDFGLARDIKNDSNYVVKGNARLPVKWMAPESIFNCVYTFESDVWSYGIFLWELFSLGSSPYPGMPVDSKFYKMIKEGFRMLSPEHAPAEMYDIMKTCWDADPLKRPTFKQIVQLIEKQISESTNHIYSNLANCSPNRQKPVVDHSVRINSVGSTASSSQPLLVHDDV 2 521 543 NNKEQIHPHT LFTPLLIGFVIVAGMMCIIVMIL TYKYLQKPMY +P05532 MRGARGAWDLLCVLLVLLRGQTATSQPSASPGEPSPPSIHPAQSELIVEAGDTLSLTCIDPDFVRWTFKTYFNEMVENKKNEWIQEKAEATRTGTYTCSNSNGLTSSIYVFVRDPAKLFLVGLPLFGKEDSDALVRCPLTDPQVSNYSLIECDGKSLPTDLTFVPNPKAGITIKNVKRAYHRLCVRCAAQRDGTWLHSDKFTLKVRAAIKAIPVVSVPETSHLLKKGDTFTVVCTIKDVSTSVNSMWLKMNPQPQHIAQVKHNSWHRGDFNYERQETLTISSARVDDSGVFMCYANNTFGSANVTTTLKVVEKGFINISPVKNTTVFVTDGENVDLVVEYEAYPKPEHQQWIYMNRTSANKGKDYVKSDNKSNIRYVNQLRLTRLKGTEGGTYTFLVSNSDASASVTFNVYVNTKPEILTYDRLINGMLQCVAEGFPEPTIDWYFCTGAEQRCTTPVSPVDVQVQNVSVSPFGKLVVQSSIDSSVFRHNGTVECKASNDVGKSSAFFNFAFKGNNKEQIQAHTLFTPLLIGFVVAAGAMGIIVMVLTYKYLQKPMYEVQWKVVEEINGNNYVYIDPTQLPYDHKWEFPRNRLSFGKTLGAGAFGKVVEATAYGLIKSDAAMTVAVKMLKPSAHLTEREALMSELKVLSYLGNHMNIVNLLGACTVGGPTLVITEYCCYGDLLNFLRRKRDSFIFSKQEEQAEAALYKNLLHSTEPSCDSSNEYMDMKPGVSYVVPTKTDKRRSARIDSYIERDVTPAIMEDDELALDLDDLLSFSYQVAKGMAFLASKNCIHRDLAARNILLTHGRITKICDFGLARDIRNDSNYVVKGNARLPVKWMAPESIFSCVYTFESDVWSYGIFLWELFSLGSSPYPGMPVDSKFYKMIKEGFRMVSPEHAPAEMYDVMKTCWDADPLKRPTFKQVVQLIEKQISDSTKHIYSNLANCNPNPENPVVVDHSVRVNSVGSSASSTQPLLVHEDA 2 524 546 NNKEQIQAHT LFTPLLIGFVVAAGAMGIIVMVL TYKYLQKPMY +Q96MU8 MAPPAARLALLSAAALTLAARPAPSPGLGPECFTANGADYRGTQNWTALQGGKPCLFWNETFQHPYNTLKYPNGEGGLGEHNYCRNPDGDVSPWCYVAEHEDGVYWKYCEIPACQMPGNLGCYKDHGNPPPLTGTSKTSNKLTIQTCISFCRSQRFKFAGMESGYACFCGNNPDYWKYGEAASTECNSVCFGDHTQPCGGDGRIILFDTLVGACGGNYSAMSSVVYSPDFPDTYATGRVCYWTIRVPGASHIHFSFPLFDIRDSADMVELLDGYTHRVLARFHGRSRPPLSFNVSLDFVILYFFSDRINQAQGFAVLYQAVKEELPQERPAVNQTVAEVITEQANLSVSAARSSKVLYVITTSPSHPPQTVPGSNSWAPPMGAGSHRVEGWTVYGLATLLILTVTAIVAKILLHVTFKSHRVPASGDLRDCHQPGTSGEIWSIFYKPSTSISIFKKKLKGQSQQDDRNPLVSD 2 391 413 MGAGSHRVEG WTVYGLATLLILTVTAIVAKILL HVTFKSHRVP +A6NMS7 MSSAQCPALVCVMSRLRFWGPWPLLMWQLLWLLVKEAQPLEWVKDPLQLTSNPLGPPESWSSHSSHFPRESPHAPTLPADPWDFDHLGPSASSEMPAPPQESTENLVPFLDTWDSAGEQPLEPEQFLASQQDLKDKLSPQERLPVSPKKLKKDPAQRWSLAEIIGITRQLSTPQSQKQTLQNEYSSTDTPYPGSLPPELRVKSDEPPGPSEQVGPSQFHLEPETQNPETLEDIQSSSLQQEAPAQLPQLLEEEPSSMQQEAPALPPESSMESLTLPNHEVSVQPPGEDQAYYHLPNITVKPADVEVTITSEPTNETESSQAQQETPIQFPEEVEPSATQQEAPIEPPVPPMEHELSISEQQQPVQPSESPREVESSPTQQETPGQPPEHHEVTVSPPGHHQTHHLASPSVSVKPPDVQLTIAAEPSAEVGTSLVHQEATTRLSGSGNDVEPPAIQHGGPPLLPESSEEAGPLAVQQETSFQSPEPINNENPSPTQQEAAAEHPQTAEEGESSLTHQEAPAQTPEFPNVVVAQPPEHSHLTQATVQPLDLGFTITPESKTEVELSPTMKETPTQPPKKVVPQLRVYQGVTNPTPGQDQAQHPVSPSVTVQLLDLGLTITPEPTTEVGHSTPPKRTIVSPKHPEVTLPHPDQVQTQHSHLTRATVQPLDLGFTITPKSMTEVEPSTALMTTAPPPGHPEVTLPPSDKGQAQHSHLTQATVQPLDLELTITTKPTTEVKPSPTTEETSTQPPDLGLAIIPEPTTETGHSTALEKTTAPRPDRVQTLHRSLTEVTGPPTELEPAQDSLVQSESYTQNKALTAPEEHKASTSTNICELCTCGDEMLSCIDLNPEQRLRQVPVPEPNTHNGTFTILNFQGNYISYIDGNVWKAYSWTEKLILRENNLTELHKDSFEGLLSLQYLDLSCNKIQSIERHTFEPLPFLKFINLSCNVITELSFGTFQAWHGMQFLHKLILNHNPLTTVEDPYLFKLPALKYLDMGTTLVPLTTLKNILMMTVELEKLILPSHMACCLCQFKNSIEAVCKTVKLHCNSACLTNTTHCPEEASVGNPEGAFMKVLQARKNYTSTELIVEPEEPSDSSGINLSGFGSEQLDTNDESDFISTLSYILPYFSAVNLDVKSLLLPFIKLPTTGNSLAKIQTVGQNRQRVKRVLMGPRSIQKRHFKEVGRQSIRREQGAQASVENAAEEKRLTSPAPREVEQPHTQQGPEKLAGNAVYTKPSFTQEHKAAVSVLKPFSKGTPSTSSPAKALPQVRDRSKDLTHAISILESAKARVTNTKTSKPIVHARKKYRFHKTRSHVTHRTTKVKKSPKVRKKSYLSRLMLANRLPFSAAKSLINSPSQGAFSSLGDLSPQENPFLEVSALSEHFIEKNNTKHTTARNAFEENDFMENTNMPEGTISENTNYNHPPEADSAGTAFNLGPTVKQTETKWEYNNVGTDLSPEPKSFNYPLLSSPGDQFEIQLTQQLQSLIPNNNVRRLIAHVIRTLKMDCSGAHVQVTCAKLISRTGHLMKLLSGQQEVKASKIEWDTDQWKIENYINESTEAQSEQKEKSLELKKEVPGYGYTDKLILALIVTGILTILIILFCLIVICCHRRSLQEDEEGFSRGIFRFLPWRGCSSRRESQDGLSSFGQPLWFKDLYKPLSATRINNHAWKLHKKSSNEDKILNRDPGDSEAPTEEEESEALP 2 1582 1604 EVPGYGYTDK LILALIVTGILTILIILFCLIVI CCHRRSLQED +Q8BG84 MSLHPVILLVLVLCLGWKINTQEGSLPDITIFPNSSLMISQGTFVTVVCSYSDKHDLYNMVRLEKDGSTFMEKSTEPYKTEDEFEIGPVNETITGHYSCIYSKGITWSERSKTLELKVIKENVIQTPAPGPTSDTSWLKTYSIYIFTVVSVIFLLCLSALLFCFLRHRQKKQGLPNNKRQQQRPEERLNLATNGLEMTPDIVADDRLPEDRWTETWTPVAGDLQEVTYIQLDHHSLTQRAVGAVTSQSTDMAESSTYAAIIRH 2 143 165 SDTSWLKTYS IYIFTVVSVIFLLCLSALLFCFL RHRQKKQGLP +P13473 MVCFRLFPVPGSGLVLVCLVLGAVRSYALELNLTDSENATCLYAKWQMNFTVRYETTNKTYKTVTISDHGTVTYNGSICGDDQNGPKIAVQFGPGFSWIANFTKAASTYSIDSVSFSYNTGDNTTFPDAEDKGILTVDELLAIRIPLNDLFRCNSLSTLEKNDVVQHYWDVLVQAFVQNGTVSTNEFLCDKDKTSTVAPTIHTTVPSPTTTPTPKEKPEAGTYSVNNGNDTCLLATMGLQLNITQDKVASVININPNTTHSTGSCRSHTALLRLNSSTIKYLDFVFAVKNENRFYLKEVNISMYLVNGSVFSIANNNLSYWDAPLGSSYMCNKEQTVSVSGAFQINTFDLRVQPFNVTQGKYSTAQDCSADDDNFLVPIAVGAALAGVLILVLLAYFIGLKHHHAGYEQF 2 378 400 CSADDDNFLV PIAVGAALAGVLILVLLAYFIGL KHHHAGYEQF +Q9UQV4 MPRQLSAAAALFASLAVILHDGSQMRAKAFPETRDYSQPTAAATVQDIKKPVQQPAKQAPHQTLAARFMDGHITFQTAATVKIPTTTPATTKNTATTSPITYTLVTTQATPNNSHTAPPVTEVTVGPSLAPYSLPPTITPPAHTTGTSSSTVSHTTGNTTQPSNQTTLPATLSIALHKSTTGQKPVQPTHAPGTTAAAHNTTRTAAPASTVPGPTLAPQPSSVKTGIYQVLNGSRLCIKAEMGIQLIVQDKESVFSPRRYFNIDPNATQASGNCGTRKSNLLLNFQGGFVNLTFTKDEESYYISEVGAYLTVSDPETIYQGIKHAVVMFQTAVGHSFKCVSEQSLQLSAHLQVKTTDVQLQAFDFEDDHFGNVDECSSDYTIVLPVIGAIVVGLCLMGMGVYKIRLRCQSSGYQRI 2 380 402 FGNVDECSSD YTIVLPVIGAIVVGLCLMGMGVY KIRLRCQSSG +Q9UJQ1 MDLQGRGVPSIDRLRVLLMLFHTMAQIMAEQEVENLSGLSTNPEKDIFVVRENGTTCLMAEFAAKFIVPYDVWASNYVDLITEQADIALTRGAEVKGRCGHSQSELQVFWVDRAYALKMLFVKESHNMSKGPEATWRLSKVQFVYDSSEKTHFKDAVSAGKHTANSHHLSALVTPAGKSYECQAQQTISLASSDPQKTVTMILSAVHIQPFDIISDFVFSEEHKCPVDEREQLEETLPLILGLILGLVIMVTLAIYHVHHKMTANQVQIPRDRSQYKHMG 2 236 258 PVDEREQLEE TLPLILGLILGLVIMVTLAIYHV HHKMTANQVQ +Q6UX15 MRPGTALQAVLLAVLLVGLRAATGRLLSASDLDLRGGQPVCRGGTQRPCYKVIYFHDTSRRLNFEEAKEACRRDGGQLVSIESEDEQKLIEKFIENLLPSDGDFWIGLRRREEKQSNSTACQDLYAWTDGSISQFRNWYVDEPSCGSEVCVVMYHQPSAPAGIGGPYMFQWNDDRCNMKNNFICKYSDEKPAVPSREAEGEETELTTPVLPEETQEEDAKKTFKESREAALNLAYILIPSIPLLLLLVVTTVVCWVWICRKRKREQPDPSTKKQHTIWPSPHQGNSPDLEVYNVIRKQSEADLAETRPDLKNISFRVCSGEATPDDMSCDYDNMAVNPSESGFVTLVSVESGFVTNDIYEFSPDQMGRSKESGWVENEIYGY 2 236 258 SREAALNLAY ILIPSIPLLLLLVVTTVVCWVWI CRKRKREQPD +Q86UK5 MDPSGSRGRPTWVLAGGLLAVALALGGRGCLGASSRPRWRPLGAQPPRDPQVAPRSGPGLRIPPGRSGAGPESSTQDLPCMIWPKVECCHFKTAVEAPLGMKLDKKMEVFIPLSTSAASSGPWAHSLFAFIPSWPKKNLFKRESPITHRLYGDISREVQGTSENGVIFQKCALVSGSSEAQTARIWLLVNNTKTTSSANLSELLLLDSIAGLTIWDSVGNRTSEGFQAFSKKFLQVGDAFAVSYAATLQAGDLGNGESLKLPAQLTFQSSSRNRTQLKVLFSITAEENVTVLPHHGLHAAGFFIAFLLSLVLTWAALFLMVRYQCLKGNMLTRHRVWQYESKLEPLPFTSADGVNEDLSLNDQMIDILSSEDPGSMLQALEELEIATLNRADADLEACRTQISKDIIALLLKNLTSSGHLSPQVERKMSAVFKKQFLLLENEIQEEYDRKMVALTAECDLETRKKMENQYQREMMAMEEAEELLKRAGERSAVECSNLLRTLHGLEQEHLRKSLALQQEEDFAKAHRQLAVFQRNELHSIFFTQIKSAIFKGELKPEAAKMLLQNYSKIQENVEELMDFFQASKRYHLSKRFGHREYLVQNLQSSETRVQGLLSTAAAQLTHLIQKHERAGYLDEDQMEMLLERAQTEVFSIKQKLDNDLKQEKKKLHQKLITKRRRELLQKHREQRREQASVGEAFRTVEDAGQYLHQKRSLMEEHGATLEELQERLDQAALDDLRTLTLSLFEKATDELRRLQNSAMTQELLKRGVPWLFLQQILEEHGKEMAARAEQLEGEERDRDQEGVQSVRQRLKDDAPEAVTEEQAELRRWEHLIFMKLCSSVFSLSEEELLRMRQEVHGCFAQMDRSLALPKIRARVLLQQFQTAWREAEFVKLDQAVAAPELQQQSKVRKSRSKSKSKGELLKKCIEDKIHLCEEQASEDLVEKVRGELLRERVQRMEAQEGGFAQSLVALQFQKASRVTETLSAYTALLSIQDLLLEELSASEMLTKSACTQILESHSRELQELERKLEDQLVQQEAAQQQQALASWQQWVADGPGILNEPGEVDSERQVSTVLHQALSKSQTLLEQHQQCLREEQQNSVVLEDLLENMEADTFATLCSQELRLASYLARMAMVPGATLRRLLSVVLPTASQPQLLALLDSATERHVDHAAESDGGAEQADVGRRRKHQSWWQALDGKLRGDLISRGLEKMLWARKRKQSILKKTCLPLRERMIFSGKGSWPHLSLEPIGELAPVPIVGAETIDLLNTGEKLFIFRNPKEPEISLHVPPRKKKNFLNAKKAMRALGMD 2 299 321 VTVLPHHGLH AAGFFIAFLLSLVLTWAALFLMV RYQCLKGNML +P48357 MICQKFCVVLLHWEFIYVITAFNLSYPITPWRFKLSCMPPNSTYDYFLLPAGLSKNTSNSNGHYETAVEPKFNSSGTHFSNLSKTTFHCCFRSEQDRNCSLCADNIEGKTFVSTVNSLVFQQIDANWNIQCWLKGDLKLFICYVESLFKNLFRNYNYKVHLLYVLPEVLEDSPLVPQKGSFQMVHCNCSVHECCECLVPVPTAKLNDTLLMCLKITSGGVIFQSPLMSVQPINMVKPDPPLGLHMEITDDGNLKISWSSPPLVPFPLQYQVKYSENSTTVIREADKIVSATSLLVDSILPGSSYEVQVRGKRLDGPGIWSDWSTPRVFTTQDVIYFPPKILTSVGSNVSFHCIYKKENKIVPSKEIVWWMNLAEKIPQSQYDVVSDHVSKVTFFNLNETKPRGKFTYDAVYCCNEHECHHRYAELYVIDVNINISCETDGYLTKMTCRWSTSTIQSLAESTLQLRYHRSSLYCSDIPSIHPISEPKDCYLQSDGFYECIFQPIFLLSGYTMWIRINHSLGSLDSPPTCVLPDSVVKPLPPSSVKAEITINIGLLKISWEKPVFPENNLQFQIRYGLSGKEVQWKMYEVYDAKSKSVSLPVPDLCAVYAVQVRCKRLDGLGYWSNWSNPAYTVVMDIKVPMRGPEFWRIINGDTMKKEKNVTLLWKPLMKNDSLCSVQRYVINHHTSCNGTWSEDVGNHTKFTFLWTEQAHTVTVLAINSIGASVANFNLTFSWPMSKVNIVQSLSAYPLNSSCVIVSWILSPSDYKLMYFIIEWKNLNEDGEIKWLRISSSVKKYYIHDHFIPIEKYQFSLYPIFMEGVGKPKIINSFTQDDIEKHQSDAGLYVIVPVIISSSILLLGTLLISHQRMKKLFWEDVPNPKNCSWAQGLNFQKPETFEHLFIKHTASVTCGPLLLEPETISEDISVDTSWKNKDEMMPTTVVSLLSTTDLEKGSVCISDQFNSVNFSEAEGTEVTYEDESQRQPFVKYATLISNSKPSETGEEQGLINSSVTKCFSSKNSPLKDSFSNSSWEIEAQAFFILSDQHPNIISPHLTFSEGLDELLKLEGNFPEENNDKKSIYYLGVTSIKKRESGVLLTDKSRVSCPFPAPCLFTDIRVLQDSCSHFVENNINLGTSSKKTFASYMPQFQTCSTQTHKIMENKMCDLTV 2 840 862 QDDIEKHQSD AGLYVIVPVIISSSILLLGTLLI SHQRMKKLFW +P19256 MVAGSDAGRALGVLSVVCLLHCFGFISCFSQQIYGVVYGNVTFHVPSNVPLKEVLWKKQKDKVAELENSEFRAFSSFKNRVYLDTVSGSLTIYNLTSSDEDEYEMESPNITDTMKFFLYVLESLPSPTLTCALTNGSIEVQCMIPEHYNSHRGLIMYSWDCPMEQCKRNSTSIYFKMENDLPQKIQCTLSNPLFNTTSSIILTTCIPSSGHSRHRYALIPIPLAVITTCIVLYMNGILKCDRKPDRTNSN 2 216 238 IPSSGHSRHR YALIPIPLAVITTCIVLYMNGIL KCDRKPDRTN +P42702 MMDIYVCLKRPSWMVDNKRMRTASNFQWLLSTFILLYLMNQVNSQKKGAPHDLKCVTNNLQVWNCSWKAPSGTGRGTDYEVCIENRSRSCYQLEKTSIKIPALSHGDYEITINSLHDFGSSTSKFTLNEQNVSLIPDTPEILNLSADFSTSTLYLKWNDRGSVFPHRSNVIWEIKVLRKESMELVKLVTHNTTLNGKDTLHHWSWASDMPLECAIHFVEIRCYIDNLHFSGLEEWSDWSPVKNISWIPDSQTKVFPQDKVILVGSDITFCCVSQEKVLSALIGHTNCPLIHLDGENVAIKIRNISVSASSGTNVVFTTEDNIFGTVIFAGYPPDTPQQLNCETHDLKEIICSWNPGRVTALVGPRATSYTLVESFSGKYVRLKRAEAPTNESYQLLFQMLPNQEIYNFTLNAHNPLGRSQSTILVNITEKVYPHTPTSFKVKDINSTAVKLSWHLPGNFAKINFLCEIEIKKSNSVQEQRNVTIKGVENSSYLVALDKLNPYTLYTFRIRCSTETFWKWSKWSNKKQHLTTEASPSKGPDTWREWSSDGKNLIIYWKPLPINEANGKILSYNVSCSSDEETQSLSEIPDPQHKAEIRLDKNDYIISVVAKNSVGSSPPSKIASMEIPNDDLKIEQVVGMGKGILLTWHYDPNMTCDYVIKWCNSSRSEPCLMDWRKVPSNSTETVIESDEFRPGIRYNFFLYGCRNQGYQLLRSMIGYIEELAPIVAPNFTVEDTSADSILVKWEDIPVEELRGFLRGYLFYFGKGERDTSKMRVLESGRSDIKVKNITDISQKTLRIADLQGKTSYHLVLRAYTDGGVGPEKSMYVVTKENSVGLIIAILIPVAVAVIVGVVTSILCYRKREWIKETFYPDIPNPENCKALQFQKSVCEGSSALKTLEMNPCTPNNVEVLETRSAFPKIEDTEIISPVAERPEDRSDAEPENHVVVSYCPPIIEEEIPNPAADEAGGTAQVIYIDVQSMYQPQAKPEEEQENDPVGGAGYKPQMHLPINSTVEDIAAEEDLDKTAGYRPQANVNTWNLVSPDSPRSIDSNSEIVSFGSPCSINSRQFLIPPKDEDSPKSNGGGWSFTNFFQNKPND 2 835 857 MYVVTKENSV GLIIAILIPVAVAVIVGVVTSIL CYRKREWIKE +Q96FE5 MQVSKRMLAGGVRSMPSPLLACWQPILLLVLGSVLSGSATGCPPRCECSAQDRAVLCHRKRFVAVPEGIPTETRLLDLGKNRIKTLNQDEFASFPHLEELELNENIVSAVEPGAFNNLFNLRTLGLRSNRLKLIPLGVFTGLSNLTKLDISENKIVILLDYMFQDLYNLKSLEVGDNDLVYISHRAFSGLNSLEQLTLEKCNLTSIPTEALSHLHGLIVLRLRHLNINAIRDYSFKRLYRLKVLEISHWPYLDTMTPNCLYGLNLTSLSITHCNLTAVPYLAVRHLVYLRFLNLSYNPISTIEGSMLHELLRLQEIQLVGGQLAVVEPYAFRGLNYLRVLNVSGNQLTTLEESVFHSVGNLETLILDSNPLACDCRLLWVFRRRWRLNFNRQQPTCATPEFVQGKEFKDFPDVLLPNYFTCRRARIRDRKAQQVFVDEGHTVQFVCRADGDPPPAILWLSPRKHLVSAKSNGRLTVFPDGTLEVRYAQVQDNGTYLCIAANAGGNDSMPAHLHVRSYSPDWPHQPNKTFAFISNQPGEGEANSTRATVPFPFDIKTLIIATTMGFISFLGVVLFCLVLLFLWSRGKGNTKHNIEIEYVPRKSDAGISSADAPRKFNMKMI 2 560 582 FPFDIKTLII ATTMGFISFLGVVLFCLVLLFLW SRGKGNTKHN +Q6UY18 MDAATAPKQAWPPWPPLLFLLLLPGGSGGSCPAVCDCTSQPQAVLCGHRQLEAVPGGLPLDTELLDLSGNRLWGLQQGMLSRLSLLQELDLSYNQLSTLEPGAFHGLQSLLTLRLQGNRLRIMGPGVFSGLSALTLLDLRLNQIVLFLDGAFGELGSLQKLEVGDNHLVFVAPGAFAGLAKLSTLTLERCNLSTVPGLALARLPALVALRLRELDIGRLPAGALRGLGQLKELEIHLWPSLEALDPGSLVGLNLSSLAITRCNLSSVPFQALYHLSFLRVLDLSQNPISAIPARRLSPLVRLQELRLSGACLTSIAAHAFHGLTAFHLLDVADNALQTLEETAFPSPDKLVTLRLSGNPLTCDCRLLWLLRLRRHLDFGMSPPACAGPHHVQGKSLKEFSDILPPGHFTCKPALIRKSGPRWVIAEEGGHAVFSCSGDGDPAPTVSWMRPHGAWLGRAGRVRVLEDGTLEIRSVQLRDRGAYVCVVSNVAGNDSLRTWLEVIQVEPPNGTLSDPNITVPGIPGPFFLDSRGVAMVLAVGFLPFLTSVTLCFGLIALWSKGKGRVKHHMTFDFVAPRPSGDKNSGGNRVTAKLF 2 535 557 FFLDSRGVAM VLAVGFLPFLTSVTLCFGLIALW SKGKGRVKHH +O75022 MTPALTALLCLGLSLGPRTRVQAGPFPKPTLWAEPGSVISWGSPVTIWCQGSQEAQEYRLHKEGSPEPLDRNNPLEPKNKARFSIPSMTEHHAGRYRCHYYSSAGWSEPSDPLEMVMTGAYSKPTLSALPSPVVASGGNMTLRCGSQKGYHHFVLMKEGEHQLPRTLDSQQLHSRGFQALFPVGPVTPSHRWRFTCYYYYTNTPWVWSHPSDPLEILPSGVSRKPSLLTLQGPVLAPGQSLTLQCGSDVGYNRFVLYKEGERDFLQRPGQQPQAGLSQANFTLGPVSPSNGGQYRCYGAHNLSSEWSAPSDPLNILMAGQIYDTVSLSAQPGPTVASGENVTLLCQSWWQFDTFLLTKEGAAHPPLRLRSMYGAHKYQAEFPMSPVTSAHAGTYRCYGSYSSNPHLLSHPSEPLELVVSGHSGGSSLPPTGPPSTPGLGRYLEVLIGVSVAFVLLLFLLLFLLLRRQRHSKHRTSDQRKTDFQRPAGAAETEPKDRGLLRRSSPAADVQEENLYAAVKDTQSEDRVELDSQSPHDEDPQAVTYAPVKHSSPRREMASPPSSLSGEFLDTKDRQVEEDRQMDTEAAASEASQDVTYAQLHSLTLRRKATEPPPSQEGEPPAEPSIYATLAIH 2 442 464 PPSTPGLGRY LEVLIGVSVAFVLLLFLLLFLLL RRQRHSKHRT +Q8VCD3 MLEIRGLSPSLCLLSLLLVLHGAERSQPPPRRRFEYKLSFKGPRLAVPGAGIPFWSHHGDAILGLEEVRLVPSMKNRSGAVWSNISVSFPSWEVEMQMRVTGPGRRGAQGVAMWYTKDRAQVGSVVEELASWDGIGIYFDSSTSDVQDSPVIRVLASDGHDLQEQSGDGNVRELGSCHRDFRNRPFPFRARVTYWRQRLRVSLSGGLTPKDPEEVCVDVEPLFLAPGGFFGVSAATGTLAADDHDVLSFLTFSLREPGPEETPQPFMEKDQLLLARKLEELKARLALGTREASIPPLNPKAQEEGERFFNLEDTLGRQSQILQALQALSRQMAQAEKHWKQQLGSTVQVRPEGGWNTAKVSTLLYGQRTLIQALQEMREAAARMASGAHVFYLPVGTKHHFFELDHILSLLQKDLRGLVKKTAKAPRPSGWLLGSSTCLHTSIFLFFLLLQTVGFFCYVNFSRQELDKRLQEYLSTGSLSLEPALPITRTIGVLRRQPISPSMQA 2 439 461 SGWLLGSSTC LHTSIFLFFLLLQTVGFFCYVNF SRQELDKRLQ +Q9H0V9 MAATLGPLGSWQQWRRCLSARDGSRMLLLLLLLGSGQGPQQVGAGQTFEYLKREHSLSKPYQGVGTGSSSLWNLMGNAMVMTQYIRLTPDMQSKQGALWNRVPCFLRDWELQVHFKIHGQGKKNLHGDGLAIWYTKDRMQPGPVFGNMDKFVGLGVFVDTYPNEEKQQERVFPYISAMVNNGSLSYDHERDGRPTELGGCTAIVRNLHYDTFLVIRYVKRHLTIMMDIDGKHEWRDCIEVPGVRLPRGYYFGTSSITGDLSDNHDVISLKLFELTVERTPEEEKLHRDVFLPSVDNMKLPEMTAPLPPLSGLALFLIVFFSLVFSVFAIVIGIILYNKWQEQSRKRFY 2 314 336 APLPPLSGLA LFLIVFFSLVFSVFAIVIGIILY NKWQEQSRKR +Q12907 MAAEGWIWRWGWGRRCLGRPGLLGPGPGPTTPLFLLLLLGSVTADITDGNSEHLKREHSLIKPYQGVGSSSMPLWDFQGSTMLTSQYVRLTPDERSKEGSIWNHQPCFLKDWEMHVHFKVHGTGKKNLHGDGIALWYTRDRLVPGPVFGSKDNFHGLAIFLDTYPNDETTERVFPYISVMVNNGSLSYDHSKDGRWTELAGCTADFRNRDHDTFLAVRYSRGRLTVMTDLEDKNEWKNCIDITGVRLPTGYYFGASAGTGDLSDNHDIISMKLFQLMVEHTPDEESIDWTKIEPSVNFLKSPKDNVDDPTGNFRSGPLTGWRVFLLLLCALLGIVVCAVVGAVVFQKRQERNKRFY 2 323 345 FRSGPLTGWR VFLLLLCALLGIVVCAVVGAVVF QKRQERNKRF +A0A1B0GTW7 MLLLLLLLLLLPPLVLRVAASRCLHDETQKSVSLLRPPFSQLPSKSRSSSLTLPSSRDPQPLRIQSCYLGDHISDGAWDPEGEGMRGGSRALAAVREATQRIQAVLAVQGPLLLSRDPAQYCHAVWGDPDSPNYHRCSLLNPGYKGESCLGAKIPDTHLRGYALWPEQGPPQLVQPDGPGVQNTDFLLYVRVAHTSKCHQETVSLCCPGWSTAAQSQLTAALTSWAQRRGFVMLPRLCLKLLGSSNLPTLASQSIRITGPSVIAYAACCQLDSEDRPLAGTIVYCAQHLTSPSLSHSDIVMATLHELLHALGFSGQLFKKWRDCPSGFSVRENCSTRQLVTRQDEWGQLLLTTPAVSLSLAKHLGVSGASLGVPLEEEEGLLSSHWEARLLQGSLMTATFDGAQRTRLDPITLAAFKDSGWYQVNHSAAEELLWGQGSGPEFGLVTTCGTGSSDFFCTGSGLGCHYLHLDKGSCSSDPMLEGCRMYKPLANGSECWKKENGFPAGVDNPHGEIYHPQSRCFFANLTSQLLPGDKPRHPSLTPHLKEAELMGRCYLHQCTGRGAYKVQVEGSPWVPCLPGKVIQIPGYYGLLFCPRGRLCQTNEDINAVTSPPVSLSTPDPLFQLSLELAGPPGHSLGKEQQEGLAEAVLEALASKGGTGRCYFHGPSITTSLVFTVHMWKSPGCQGPSVATLHKALTLTLQKKPLEVYHGGANFTTQPSKLLVTSDHNPSMTHLRLSMGLCLMLLILVGVMGTTAYQKRATLPVRPSASYHSPELHSTRVPVRGIREV 2 734 756 TSDHNPSMTH LRLSMGLCLMLLILVGVMGTTAY QKRATLPVRP +Q86YD5 MWLLGPLCLLLSSAAESQLLPGNNFTNECNIPGNFMCSNGRCIPGAWQCDGLPDCFDKSDEKECPKAKSKCGPTFFPCASGIHCIIGRFRCNGFEDCPDGSDEENCTANPLLCSTARYHCKNGLCIDKSFICDGQNNCQDNSDEESCESSQEPGSGQVFVTSENQLVYYPSITYAIIGSSVIFVLVVALLALVLHHQRKRNNLMTLPVHRLQHPVLLSRLVVLDHPHHCNVTYNVNNGIQYVASQAEQNASEVGSPPSYSEALLDQRPAWYDLPPPPYSSDTESLNQADLPPYRSRSGSANSASSQAASSLLSVEDTSHSPGQPGPQEGTAEPRDSEPSQGTEEV 2 172 194 SENQLVYYPS ITYAIIGSSVIFVLVVALLALVL HHQRKRNNLM +Q8TF66 MPLKHYLLLLVGCQAWGAGLAYHGCPSECTCSRASQVECTGARIVAVPTPLPWNAMSLQILNTHITELNESPFLNISALIALRIEKNELSRITPGAFRNLGSLRYLSLANNKLQVLPIGLFQGLDSLESLLLSSNQLLQIQPAHFSQCSNLKELQLHGNHLEYIPDGAFDHLVGLTKLNLGKNSLTHISPRVFQHLGNLQVLRLYENRLTDIPMGTFDGLVNLQELALQQNQIGLLSPGLFHNNHNLQRLYLSNNHISQLPPSVFMQLPQLNRLTLFGNSLKELSPGIFGPMPNLRELWLYDNHISSLPDNVFSNLRQLQVLILSRNQISFISPGAFNGLTELRELSLHTNALQDLDGNVFRMLANLQNISLQNNRLRQLPGNIFANVNGLMAIQLQNNQLENLPLGIFDHLGKLCELRLYDNPWRCDSDILPLRNWLLLNQPRLGTDTVPVCFSPANVRGQSLIIINVNVAVPSVHVPEVPSYPETPWYPDTPSYPDTTSVSSTTELTSPVEDYTDLTTIQVTDDRSVWGMTQAQSGLAIAAIVIGIVALACSLAACVGCCCCKKRSQAVLMQMKAPNEC 2 539 561 VWGMTQAQSG LAIAAIVIGIVALACSLAACVGC CCCKKRSQAV +Q9H756 MKVTGITILFWPLSMILLSDKIQSSKREVQCNFTEKNYTLIPADIKKDVTILDLSYNQITLNGTDTRVLQTYFLLTELYLIENKVTILHNNGFGNLSSLEILNICRNSIYVIQQGAFLGLNKLKQLYLCQNKIEQLNADVFVPLRSLKLLNLQGNLISYLDVPPLFHLELITLYGNLWNCSCSLFNLQNWLNTSNVTLENENITMCSYPNSLQSYNIKTVPHKAECHSKFPSSVTEDLYIHFQPISNSIFNSSSNNLTRNSEHEPLGKSWAFLVGVVVTVLTTSLLIFIAIKCPIWYNILLSYNHHRLEEHEAETYEDGFTGNPSSLSQIPETNSEETTVIFEQLHSFVVDDDGFIEDKYIDIHELCEEN 2 269 291 RNSEHEPLGK SWAFLVGVVVTVLTTSLLIFIAI KCPIWYNILL +Q8N386 MGGTLAWTLLLPLLLRESDSLEPSCTVSSADVDWNAEFSATCLNFSGLSLSLPHNQSLRASNVILLDLSGNGLRELPVTFFAHLQKLEVLNVLRNPLSRVDGALAARCDLDLQADCNCALESWHDIRRDNCSGQKPLLCWDTTSSQHNLSAFLEVSCAPGLASATIGAVVVSGCLLLGLAIAGPVLAWRLWRCRVARSRELNKPWAAQDGPKPGLGLQPRYGSRSAPKPQVAVPSCPSTPDYENMFVGQPAAEHQWDEQGAHPSEDNDFYINYKDIDLASQPVYCNLQSLGQAPMDEEEYVIPGH 2 166 188 SCAPGLASAT IGAVVVSGCLLLGLAIAGPVLAW RLWRCRVARS +Q2I0M4 MRGPSWSRPRPLLLLLLLLSPWPVWAQVSATASPSGSLGAPDCPEVCTCVPGGLASCSALSLPAVPPGLSLRLRALLLDHNRVRALPPGAFAGAGALQRLDLRENGLHSVHVRAFWGLGALQLLDLSANQLEALAPGTFAPLRALRNLSLAGNRLARLEPAALGALPLLRSLSLQDNELAALAPGLLGRLPALDALHLRGNPWGCGCALRPLCAWLRRHPLPASEAETVLCVWPGRLTLSPLTAFSDAAFSHCAQPLALRDLAVVYTLGPASFLVSLASCLALGSGLTACRARRRRLRTAALRPPRPPDPNPDPDPHGCASPADPGSPAAAAQA 2 265 287 QPLALRDLAV VYTLGPASFLVSLASCLALGSGL TACRARRRRL +Q14392 MRPQILLLLALLTLGLAAQHQDKVPCKMVDKKVSCQVLGLLQVPSVLPPDTETLDLSGNQLRSILASPLGFYTALRHLDLSTNEISFLQPGAFQALTHLEHLSLAHNRLAMATALSAGGLGPLPRVTSLDLSGNSLYSGLLERLLGEAPSLHTLSLAENSLTRLTRHTFRDMPALEQLDLHSNVLMDIEDGAFEGLPRLTHLNLSRNSLTCISDFSLQQLRVLDLSCNSIEAFQTASQPQAEFQLTWLDLRENKLLHFPDLAALPRLIYLNLSNNLIRLPTGPPQDSKGIHAPSEGWSALPLSAPSGNASGRPLSQLLNLDLSYNEIELIPDSFLEHLTSLCFLNLSRNCLRTFEARRLGSLPCLMLLDLSHNALETLELGARALGSLRTLLLQGNALRDLPPYTFANLASLQRLNLQGNRVSPCGGPDEPGPSGCVAFSGITSLRSLSLVDNEIELLRAGAFLHTPLTELDLSSNPGLEVATGALGGLEASLEVLALQGNGLMVLQVDLPCFICLKRLNLAENRLSHLPAWTQAVSLEVLDLRNNSFSLLPGSAMGGLETSLRRLYLQGNPLSCCGNGWLAAQLHQGRVDVDATQDLICRFSSQEEVSLSHVRPEDCEKGGLKNINLIIILTFILVSAILLTTLAACCCVRRQKFNQQYKA 2 629 651 EKGGLKNINL IIILTFILVSAILLTTLAACCCV RRQKFNQQYK +Q86YC3 MELLPLWLCLGFHFLTVGWRNRSGTATAASQGVCKLVGGAADCRGQSLASVPSSLPPHARMLTLDANPLKTLWNHSLQPYPLLESLSLHSCHLERISRGAFQEQGHLRSLVLGDNCLSENYEETAAALHALPGLRRLDLSGNALTEDMAALMLQNLSSLRSVSLAGNTIMRLDDSVFEGLERLRELDLQRNYIFEIEGGAFDGLAELRHLNLAFNNLPCIVDFGLTRLRVLNVSYNVLEWFLATGGEAAFELETLDLSHNQLLFFPLLPQYSKLRTLLLRDNNMGFYRDLYNTSSPREMVAQFLLVDGNVTNITTVSLWEEFSSSDLADLRFLDMSQNQFQYLPDGFLRKMPSLSHLNLHQNCLMTLHIREHEPPGALTELDLSHNQLSELHLAPGLASCLGSLRLFNLSSNQLLGVPPGLFANARNITTLDMSHNQISLCPLPAASDRVGPPSCVDFRNMASLRSLSLEGCGLGALPDCPFQGTSLTYLDLSSNWGVLNGSLAPLQDVAPMLQVLSLRNMGLHSSFMALDFSGFGNLRDLDLSGNCLTTFPRFGGSLALETLDLRRNSLTALPQKAVSEQLSRGLRTIYLSQNPYDCCGVDGWGALQHGQTVADWAMVTCNLSSKIIRVTELPGGVPRDCKWERLDLGLLYLVLILPSCLTLLVACTVIVLTFKKPLLQVIKSRCHWSSVY 2 652 674 KWERLDLGLL YLVLILPSCLTLLVACTVIVLTF KKPLLQVIKS +Q5VT99 MRPRAPACAAAALGLCSLLLLLAPGHACPAGCACTDPHTVDCRDRGLPSVPDPFPLDVRKLLVAGNRIQRIPEDFFIFYGDLVYLDFRNNSLRSLEEGTFSGSAKLVFLDLSYNNLTQLGAGAFRSAGRLVKLSLANNNLVGVHEDAFETLESLQVLELNDNNLRSLSVAALAALPALRSLRLDGNPWLCDCDFAHLFSWIQENASKLPKGLDEIQCSLPMESRRISLRELSEASFSECRFSLSLTDLCIIIFSGVAVSIAAIISSFFLATVVQCLQRCAPNKDAEDEDEDKDD 2 251 273 FSLSLTDLCI IIFSGVAVSIAAIISSFFLATVV QCLQRCAPNK +Q9BTN0 MAILPLLLCLLPLAPASSPPQSATPSPCPRRCRCQTQSLPLSVLCPGAGLLFVPPSLDRRAAELRLADNFIASVRRRDLANMTGLLHLSLSRNTIRHVAAGAFADLRALRALHLDGNRLTSLGEGQLRGLVNLRHLILSNNQLAALAAGALDDCAETLEDLDLSYNNLEQLPWEALGRLGNVNTLGLDHNLLASVPAGAFSRLHKLARLDMTSNRLTTIPPDPLFSRLPLLARPRGSPASALVLAFGGNPLHCNCELVWLRRLAREDDLEACASPPALGGRYFWAVGEEEFVCEPPVVTHRSPPLAVPAGRPAALRCRAVGDPEPRVRWVSPQGRLLGNSSRARAFPNGTLELLVTEPGDGGIFTCIAANAAGEATAAVELTVGPPPPPQLANSTSCDPPRDGDPDALTPPSAASASAKVADTGPPTDRGVQVTEHGATAALVQWPDQRPIPGIRMYQIQYNSSADDILVYRMIPAESRSFLLTDLASGRTYDLCVLAVYEDSATGLTATRPVGCARFSTEPALRPCGAPHAPFLGGTMIIALGGVIVASVLVFIFVLLMRYKVHGGQPPGKAKIPAPVSSVCSQTNGALGPTPTPAPPAPEPAALRAHTVVQLDCEPWGPGHEPVGP 2 537 559 CGAPHAPFLG GTMIIALGGVIVASVLVFIFVLL MRYKVHGGQP +Q96JA1 MARPVRGGLGAPRRSPCLLLLWLLLLRLEPVTAAAGPRAPCAAACTCAGDSLDCGGRGLAALPGDLPSWTRSLNLSYNKLSEIDPAGFEDLPNLQEVYLNNNELTAVPSLGAASSHVVSLFLQHNKIRSVEGSQLKAYLSLEVLDLSLNNITEVRNTCFPHGPPIKELNLAGNRIGTLELGAFDGLSRSLLTLRLSKNRITQLPVRAFKLPRLTQLDLNRNRIRLIEGLTFQGLNSLEVLKLQRNNISKLTDGAFWGLSKMHVLHLEYNSLVEVNSGSLYGLTALHQLHLSNNSIARIHRKGWSFCQKLHELVLSFNNLTRLDEESLAELSSLSVLRLSHNSISHIAEGAFKGLRSLRVLDLDHNEISGTIEDTSGAFSGLDSLSKLTLFGNKIKSVAKRAFSGLEGLEHLNLGGNAIRSVQFDAFVKMKNLKELHISSDSFLCDCQLKWLPPWLIGRMLQAFVTATCAHPESLKGQSIFSVPPESFVCDDFLKPQIITQPETTMAMVGKDIRFTCSAASSSSSPMTFAWKKDNEVLTNADMENFVHVHAQDGEVMEYTTILHLRQVTFGHEGRYQCVITNHFGSTYSHKARLTVNVLPSFTKTPHDITIRTTTMARLECAATGHPNPQIAWQKDGGTDFPAARERRMHVMPDDDVFFITDVKIDDAGVYSCTAQNSAGSISANATLTVLETPSLVVPLEDRVVSVGETVALQCKATGNPPPRITWFKGDRPLSLTERHHLTPDNQLLVVQNVVAEDAGRYTCEMSNTLGTERAHSQLSVLPAAGCRKDGTTVGIFTIAVVSSIVLTSLVWVCIIYQTRKKSEEYSVTNTDETVVPPDVPSYLSSQGTLSDRQETVVRTEGGPQANGHIESNGVCPRDASHFPEPDTHSVACRQPKLCAGSAYHKEPWKAMEKAEGTPGPHKMEHGGRVVCSDCNTEVDCYSRGQAFHPQPVSRDSAQPSAPNGPEPGGSDQEHSPHHQCSRTAAGSCPECQGSLYPSNHDRMLTAVKKKPMASLDGKGDSSWTLARLYHPDSTELQPASSLTSGSPERAEAQYLLVSNGHLPKACDASPESTPLTGQLPGKQRVPLLLAPKS 2 793 815 AAGCRKDGTT VGIFTIAVVSSIVLTSLVWVCII YQTRKKSEEY +Q9P2V4 MRVALGMLWLLALAWPPQARGFCPSQCSCSLHIMGDGSKARTVVCNDPDMTLPPASIPPDTSRLRLERTAIRRVPGEAFRPLGRLEQLWLPYNALSELNALMLRGLRRLRELRLPGNRLAAFPWAALRDAPKLRLLDLQANRLSAVPAEAARFLENLTFLDLSSNQLMRLPQELIVSWAHLETGIFPPGHHPRRVLGLQDNPWACDCRLYDLVHLLDGWAPNLAFIETELRCASPRSLAGVAFSQLELRKCQGPELHPGVASIRSLLGGTALLRCGATGVPGPEMSWRRANGRPLNGTVHQEVSSDGTSWTLLGLPAVSHLDSGDYICQAKNFLGASETVISLIVTEPPTSTEHSGSPGALWARTGGGGEAAAYNNKLVARHVPQIPKPAVLATGPSVPSTKEELTLEHFQMDALGELSDGRAGPSEARMVRSVKVVGDTYHSVSLVWKAPQAKNTTAFSVLYAVFGQHSMRRVIVQPGKTRVTITGLLPKTKYVACVCVQGLVPRKEQCVIFSTNEVVDAENTQQLINVVVISVAIVIALPLTLLVCCSALQKRCRKCFNKDSTEATVTYVNLERLGYSEDGLEELSRHSVSEADRLLSARSSVDFQAFGVKGGRRINEYFC 2 530 552 DAENTQQLIN VVVISVAIVIALPLTLLVCCSAL QKRCRKCFNK +A6NDA9 MASVFHYFLLVLVFLDTHAAQPFCLPGCTCSEESFGRTLQCTSVSLGKIPGNLSEEFKQVRIENSPLFEMPQGSFINMSTLEYLWLNFNNISVIHLGALEHLPELRELRLEGNKLCSVPWTAFRATPLLRVLDLKRNKIDALPELALQFLVSLTYLDLSSNRLTVVSKSVFLNWPAYQKCRQPDCGAEILSSLVVALHDNPWVCDCRLRGLVQFVKSITLPVILVNSYLICQGPLSKAGQLFHETELSACMKPQISTPSANITIRAGQNVTLRCLAQASPSPSIAWTYPLSMWREFDVLTSSTGEDTALSELAIPAAHLVDSGNYTCMASNSIGKSNLVISLHVQPAQALHAPDSLSIPSEGNAYIDLRVVKQTVHGILLEWLAVADTSKEEWFTLYIASDEAFRKEVVHIGPGINTYAVDDLLPGTKYEACLSLEGQPPHQGQCVAFVTGRDAGGLEAREHLLHVTVVLCVVLLAVPVGAYAWAAQGPCSCSKWVLRGCLHRRKAPSCTPAAPQSKDGSFREHPAVCDDGEGHIDTEGDKEKGGTEDNS 2 463 485 DAGGLEAREH LLHVTVVLCVVLLAVPVGAYAWA AQGPCSCSKW +Q3SXY7 MHLFACLCIVLSFLEGVGCLCPSQCTCDYHGRNDGSGSRLVLCNDMDMNELPTNLPVDTVKLRIEKTVIRRISAEAFYYLVELQYLWVTYNSVASIDPSSFYNLKQLHELRLDGNSLAAFPWASLLDMPLLRTLDLHNNKITSVPNEALRYLKNLAYLDLSSNRLTTLPPDFLESWTHLVSTPSGVLDLSPSRIILGLQDNPWFCDCHISKMIELSKVVDPAIVLLDPLMTCSEPERLTGILFQRAELEHCLKPSVMTSATKIMSALGSNVLLRCDATGFPTPQITWTRSDSSPVNYTVIQESPEEGVRWSIMSLTGISSKDAGDYKCKAKNLAGMSEAVVTVTVLGITTTPIPPDTSERTGDHPEWDVQPGSGRSTSVSSASSYLWSSSFSPTSSFSASTLSPPSTASFSLSPFSSSTVSSTTTLSTSISASTTMANKRSFQLHQGGKRNLKVAKNGSKLPPASTSKKEELALLDQTMLTETNAAIENLRVVSETKESVTLTWNMINTTHNSAVTVLYSKYGGKDLLLLNADSSKNQVTIDGLEPGGQYMACVCPKGVPPQKDQCITFSTERVEGDDSQWSLLLVVTSTACVVILPLICFLLYKVCKLQCKSEPFWEDDLAKETYIQFETLFPRSQSVGELWTRSHRDDSEKLLLCSRSSVESQVTFKSEGSRPEYYC 2 581 603 TERVEGDDSQ WSLLLVVTSTACVVILPLICFLL YKVCKLQCKS +Q8ND94 MLGSPCLLWLLAVTFLVPRAQPLAPQDFEEEEADETETAWPPLPAVPCDYDHCRHLQVPCKELQRVGPAACLCPGLSSPAQPPDPPRMGEVRIAAEEGRAVVHWCAPFSPVLHYWLLLWDGSEAAQKGPPLNATVRRAELKGLKPGGIYVVCVVAANEAGASRVPQAGGEGLEGADIPAFGPCSRLAVPPNPRTLVHAAVGVGTALALLSCAALVWHFCLRDRWGCPRRAAARAAGAL 2 198 220 VPPNPRTLVH AAVGVGTALALLSCAALVWHFCL RDRWGCPRRA +Q86VZ4 MASVAQESAGSQRRLPPRHGALRGLLLLCLWLPSGRAALPPAAPLSELHAQLSGVEQLLEEFRRQLQQERPQEELELELRAGGGPQEDCPGPGSGGYSAMPDAIIRTKDSLAAGASFLRAPAAVRGWRQCVAACCSEPRCSVAVVELPRRPAPPAAVLGCYLFNCTARGRNVCKFALHSGYSSYSLSRAPDGAALATARASPRQEKDAPPLSKAGQDVVLHLPTDGVVLDGRESTDDHAIVQYEWALLQGDPSVDMKVPQSGTLKLSHLQEGTYTFQLTVTDTAGQRSSDNVSVTVLRAAYSTGGCLHTCSRYHFFCDDGCCIDITLACDGVQQCPDGSDEDFCQNLGLDRKMVTHTAASPALPRTTGPSEDAGGDSLVEKSQKATAPNKPPALSNTEKRNHSAFWGPESQIIPVMPDSSSSGKNRKEESYIFESKGDGGGGEHPAPETGAVLPLALGLAITALLLLMVACRLRLVKQKLKKARPITSEESDYLINGMYL 2 451 473 GGEHPAPETG AVLPLALGLAITALLLLMVACRL RLVKQKLKKA +Q9Y561 MACRWSTKESPRWRSALLLLFLAGVYGNGALAEHSENVHISGVSTACGETPEQIRAPSGIITSPGWPSEYPAKINCSWFIRANPGEIITISFQDFDIQGSRRCNLDWLTIETYKNIESYRACGSTIPPPYISSQDHIWIRFHSDDNISRKGFRLAYFSGKSEEPNCACDQFRCGNGKCIPEAWKCNNMDECGDSSDEEICAKEANPPTAAAFQPCAYNQFQCLSRFTKVYTCLPESLKCDGNIDCLDLGDEIDCDVPTCGQWLKYFYGTFNSPNYPDFYPPGSNCTWLIDTGDHRKVILRFTDFKLDGTGYGDYVKIYDGLEENPHKLLRVLTAFDSHAPLTVVSSSGQIRVHFCADKVNAARGFNATYQVDGFCLPWEIPCGGNWGCYTEQQRCDGYWHCPNGRDETNCTMCQKEEFPCSRNGVCYPRSDRCNYQNHCPNGSDEKNCFFCQPGNFHCKNNRCVFESWVCDSQDDCGDGSDEENCPVIVPTRVITAAVIGSLICGLLLVIALGCTCKLYSLRMFERRSFETQLSRVEAELLRREAPPSYGQLIAQGLIPPVEDFPVCSPNQASVLENLRLAVRSQLGFTSVRLPMAGRSSNIWNRIFNFARSRHSGSLALVSADGDEVVPSQSTSREPERNHTHRSLFSVESDDTDTENERRDMAGASGGVAAPLPQKVPPTTAVEATVGACASSSTQSTRGGHADNGRDVTSVEPPSVSPARHQLTSALSRMTQGLRWVRFTLGRSSSLSQNQSPLRQLDNGVSGREDDDDVEMLIPISDGSSDFDVNDCSRPLLDLASDQGQGLRQPYNATNPGVRPSNRDGPCERCGIVHTAQIPDTCLEVTLKNETSDDEALLLC 2 13 32 CRWSTKESPR WRSALLLLFLAGVYGNGALA EHSENVHISG +O75096 MRRQWGALLLGALLCAHGLASSPECACGRSHFTCAVSALGECTCIPAQWQCDGDNDCGDHSDEDGCILPTCSPLDFHCDNGKCIRRSWVCDGDNDCEDDSDEQDCPPRECEEDEFPCQNGYCIRSLWHCDGDNDCGDNSDEQCDMRKCSDKEFRCSDGSCIAEHWYCDGDTDCKDGSDEENCPSAVPAPPCNLEEFQCAYGRCILDIYHCDGDDDCGDWSDESDCSSHQPCRSGEFMCDSGLCINAGWRCDGDADCDDQSDERNCTTSMCTAEQFRCHSGRCVRLSWRCDGEDDCADNSDEENCENTGSPQCALDQFLCWNGRCIGQRKLCNGVNDCGDNSDESPQQNCRPRTGEENCNVNNGGCAQKCQMVRGAVQCTCHTGYRLTEDGHTCQDVNECAEEGYCSQGCTNSEGAFQCWCETGYELRPDRRSCKALGPEPVLLFANRIDIRQVLPHRSEYTLLLNNLENAIALDFHHRRELVFWSDVTLDRILRANLNGSNVEEVVSTGLESPGGLAVDWVHDKLYWTDSGTSRIEVANLDGAHRKVLLWQNLEKPRAIALHPMEGTIYWTDWGNTPRIEASSMDGSGRRIIADTHLFWPNGLTIDYAGRRMYWVDAKHHVIERANLDGSHRKAVISQGLPHPFAITVFEDSLYWTDWHTKSINSANKFTGKNQEIIRNKLHFPMDIHTLHPQRQPAGKNRCGDNNGGCTHLCLPSGQNYTCACPTGFRKISSHACAQSLDKFLLFARRMDIRRISFDTEDLSDDVIPLADVRSAVALDWDSRDDHVYWTDVSTDTISRAKWDGTGQEVVVDTSLESPAGLAIDWVTNKLYWTDAGTDRIEVANTDGSMRTVLIWENLDRPRDIVVEPMGGYMYWTDWGASPKIERAGMDASGRQVIISSNLTWPNGLAIDYGSQRLYWADAGMKTIEFAGLDGSKRKVLIGSQLPHPFGLTLYGERIYWTDWQTKSIQSADRLTGLDRETLQENLENLMDIHVFHRRRPPVSTPCAMENGGCSHLCLRSPNPSGFSCTCPTGINLLSDGKTCSPGMNSFLIFARRIDIRMVSLDIPYFADVVVPINITMKNTIAIGVDPQEGKVYWSDSTLHRISRANLDGSQHEDIITTGLQTTDGLAVDAIGRKVYWTDTGTNRIEVGNLDGSMRKVLVWQNLDSPRAIVLYHEMGFMYWTDWGENAKLERSGMDGSDRAVLINNNLGWPNGLTVDKASSQLLWADAHTERIEAADLNGANRHTLVSPVQHPYGLTLLDSYIYWTDWQTRSIHRADKGTGSNVILVRSNLPGLMDMQAVDRAQPLGFNKCGSRNGGCSHLCLPRPSGFSCACPTGIQLKGDGKTCDPSPETYLLFSSRGSIRRISLDTSDHTDVHVPVPELNNVISLDYDSVDGKVYYTDVFLDVIRRADLNGSNMETVIGRGLKTTDGLAVDWVARNLYWTDTGRNTIEASRLDGSCRKVLINNSLDEPRAIAVFPRKGYLFWTDWGHIAKIERANLDGSERKVLINTDLGWPNGLTLDYDTRRIYWVDAHLDRIESADLNGKLRQVLVSHVSHPFALTQQDRWIYWTDWQTKSIQRVDKYSGRNKETVLANVEGLMDIIVVSPQRQTGTNACGVNNGGCTHLCFARASDFVCACPDEPDSRPCSLVPGLVPPAPRATGMSEKSPVLPNTPPTTLYSSTTRTRTSLEEVEGRCSERDARLGLCARSNDAVPAAPGEGLHISYAIGGLLSILLILVVIAALMLYRHKKSKFTDPGMGNLTYSNPSYRTSTQEVKIEAIPKPAMYNQLCYKKEGGPDHNYTKEKIKIVEGICLLSGDDAEWDDLKQLRSSRGGLLRDHVCMKTDTVSIQASSGSLDDTETEQLLQEEQSECSSVHTAATPERRGSLPDTGWKHERKLSSESQV 2 1724 1746 VPAAPGEGLH ISYAIGGLLSILLILVVIAALML YRHKKSKFTD +Q8WUT4 MRQTLPLLLLTVLRPSWADPPQEKVPLFRVTQQGPWGSSGSNATDSPCEGLPAADATALTLANRNLERLPGCLPRTLRSLDASHNLLRALSTSELGHLEQLQVLTLRHNRIAALRWGPGGPAGLHTLDLSYNQLAALPPCTGPALSSLRALALAGNPLRALQPRAFACFPALQLLNLSCTALGRGAQGGIAEAAFAGEDGAPLVTLEVLDLSGTFLERVESGWIRDLPKLTSLYLRKMPRLTTLEGDIFKMTPNLQQLDCQDSPALASVATHIFQDTPHLQVLLFQNCNLSSFPPWTLDSSQVLSINLFGNPLTCSCDLSWLLTDAKRTVLSRAADTMCAPAAGSSGPFSASLSLSQLPGVCQSDQSTTLGASHPPCFNRSTYAQGTTVAPSAAPATRPAGDQQSVSKAPNVGSRTIAAWPHSDAREGTAPSTTNSVAGHSNSSVFPRAASTTRTQHRGEHAPELVLEPDISAASTPLASKLLGPFPTSWDRSISSPQPGQRTHATPQAPNPSLSEGEIPVLLLDDYSEEEEGRKEEVGTPHQDVPCDYHPCKHLQTPCAELQRRWRCRCPGLSGEDTIPDPPRLQGVTETTDTSALVHWCAPNSVVHGYQIRYSAEGWAGNQSVVGVIYATARQHPLYGLSPGTTYRVCVLAANRAGLSQPRSSGWRSPCAAFTTKPSFALLLSGLCAASGLLLASTVVLSACLCRRGQTLGLQRCDTHLVAYKNPAFDDYPLGLQTVS 2 683 705 AFTTKPSFAL LLSGLCAASGLLLASTVVLSACL CRRGQTLGLQ +Q86UE6 MDFLLLGLCLYWLLRRPSGVVLCLLGACFQMLPAAPSGCPQLCRCEGRLLYCEALNLTEAPHNLSGLLGLSLRYNSLSELRAGQFTGLMQLTWLYLDHNHICSVQGDAFQKLRRVKELTLSSNQITQLPNTTFRPMPNLRSVDLSYNKLQALAPDLFHGLRKLTTLHMRANAIQFVPVRIFQDCRSLKFLDIGYNQLKSLARNSFAGLFKLTELHLEHNDLVKVNFAHFPRLISLHSLCLRRNKVAIVVSSLDWVWNLEKMDLSGNEIEYMEPHVFETVPHLQSLQLDSNRLTYIEPRILNSWKSLTSITLAGNLWDCGRNVCALASWLNNFQGRYDGNLQCASPEYAQGEDVLDAVYAFHLCEDGAEPTSGHLLSAVTNRSDLGPPASSATTLADGGEGQHDGTFEPATVALPGGEHAENAVQIHKVVTGTMALIFSFLIVVLVLYVSWKCFPASLRQLRQCFVTQRRKQKQKQTMHQMAAMSAQEYYVDYKPNHIEGALVIINEYGSCTCHQQPARECEV 2 428 450 HAENAVQIHK VVTGTMALIFSFLIVVLVLYVSW KCFPASLRQL +Q9HBL6 MKGELLLFSSVIVLLQVVCSCPDKCYCQSSTNFVDCSQQGLAEIPSHLPPQTRTLHLQDNQIHHLPAFAFRSVPWLMTLNLSNNSLSNLAPGAFHGLQHLQVLNLTQNSLLSLESRLFHSLPQLRELDLSSNNISHLPTSLGETWENLTILAVQQNQLQQLDRALLESMPSVRLLLLKDNLWKCNCHLLGLKLWLEKFVYKGGLTDGIICESPDTWKGKDLLRIPHELYQPCPLPAPDPVSSQAQWPGSAHGVVLRPPENHNAGERELLECELKPKPRPANLRHAIATVIITGVVCGIVCLMMLAAAIYGCTYAAITAQYHGGPLAQTNDPGKVEEKERFDSSPA 2 286 308 KPRPANLRHA IATVIITGVVCGIVCLMMLAAAI YGCTYAAITA +Q5SQ64 MAVLFLLLFLCGTPQAADNMQAIYVALGEAVELPCPSPPTLHGDEHLSWFCSPAAGSFTTLVAQVQVGRPAPDPGKPGRESRLRLLGNYSLWLEGSKEEDAGRYWCAVLGQHHNYQNWRVYDVLVLKGSQLSARAADGSPCNVLLCSVVPSRRMDSVTWQEGKGPVRGRVQSFWGSEAALLLVCPGEGLSEPRSRRPRIIRCLMTHNKGVSFSLAASIDASPALCAPSTGWDMPWILMLLLTMGQGVVILALSIVLWRQRVRGAPGRDASIPQFKPEIQVYENIHLARLGPPAHKPR 2 235 257 CAPSTGWDMP WILMLLLTMGQGVVILALSIVLW RQRVRGAPGR +O60449 MRTGWATPRRPAGLLMLLFWFFDLAEPSGRAANDPFTIVHGNTGKCIKPVYGWIVADDCDETEDKLWKWVSQHRLFHLHSQKCLGLDITKSVNELRMFSCDSSAMLWWKCEHHSLYGAARYRLALKDGHGTAISNASDVWKKGGSEESLCDQPYHEIYTRDGNSYGRPCEFPFLIDGTWHHDCILDEDHSGPWCATTLNYEYDRKWGICLKPENGCEDNWEKNEQFGSCYQFNTQTALSWKEAYVSCQNQGADLLSINSAAELTYLKEKEGIAKIFWIGLNQLYSARGWEWSDHKPLNFLNWDPDRPSAPTIGGSSCARMDAESGLWQSFSCEAQLPYVCRKPLNNTVELTDVWTYSDTRCDAGWLPNNGFCYLLVNESNSWDKAHAKCKAFSSDLISIHSLADVEVVVTKLHNEDIKEEVWIGLKNINIPTLFQWSDGTEVTLTYWDENEPNVPYNKTPNCVSYLGELGQWKVQSCEEKLKYVCKRKGEKLNDASSDKMCPPDEGWKRHGETCYKIYEDEVPFGTNCNLTITSRFEQEYLNDLMKKYDKSLRKYFWTGLRDVDSCGEYNWATVGGRRRAVTFSNWNFLEPASPGGCVAMSTGKSVGKWEVKDCRSFKALSICKKMSGPLGPEEASPKPDDPCPEGWQSFPASLSCYKVFHAERIVRKRNWEEAERFCQALGAHLSSFSHVDEIKEFLHFLTDQFSGQHWLWIGLNKRSPDLQGSWQWSDRTPVSTIIMPNEFQQDYDIRDCAAVKVFHRPWRRGWHFYDDREFIYLRPFACDTKLEWVCQIPKGRTPKTPDWYNPDRAGIHGPPLIIEGSEYWFVADLHLNYEEAVLYCASNHSFLATITSFVGLKAIKNKIANISGDGQKWWIRISEWPIDDHFTYSRYPWHRFPVTFGEECLYMSAKTWLIDLGKPTDCSTKLPFICEKYNVSSLEKYSPDSAAKVQCSEQWIPFQNKCFLKIKPVSLTFSQASDTCHSYGGTLPSVLSQIEQDFITSLLPDMEATLWIGLRWTAYEKINKWTDNRELTYSNFHPLLVSGRLRIPENFFEEESRYHCALILNLQKSPFTGTWNFTSCSERHFVSLCQKYSEVKSRQTLQNASETVKYLNNLYKIIPKTLTWHSAKRECLKSNMQLVSITDPYQQAFLSVQALLHNSSLWIGLFSQDDELNFGWSDGKRLHFSRWAETNGQLEDCVVLDTDGFWKTVDCNDNQPGAICYYSGNETEKEVKPVDSVKCPSPVLNTPWIPFQNCCYNFIITKNRHMATTQDEVHTKCQKLNPKSHILSIRDEKENNFVLEQLLYFNYMASWVMLGITYRNKSLMWFDKTPLSYTHWRAGRPTIKNEKFLAGLSTDGFWDIQTFKVIEEAVYFHQHSILACKIEMVDYKEEYNTTLPQFMPYEDGIYSVIQKKVTWYEALNMCSQSGGHLASVHNQNGQLFLEDIVKRDGFPLWVGLSSHDGSESSFEWSDGSTFDYIPWKGQTSPGNCVLLDPKGTWKHEKCNSVKDGAICYKPTKSKKLSRLTYSSRCPAAKENGSRWIQYKGHCYKSDQALHSFSEAKKLCSKHDHSATIVSIKDEDENKFVSRLMRENNNITMRVWLGLSQHSVDQSWSWLDGSEVTFVKWENKSKSGVGRCSMLIASNETWKKVECEHGFGRVVCKVPLGPDYTAIAIIVATLSILVLMGGLIWFLFQRHRLHLAGFSSVRYAQGVNEDEIMLPSFHD 2 1669 1691 CKVPLGPDYT AIAIIVATLSILVLMGGLIWFLF QRHRLHLAGF +Q9HBG7 MVAPKSHTDDWAPGPFSSKPQRSQLQIFSSVLQTSLLFLLMGLRASGKDSAPTVVSGILGGSVTLPLNISVDTEIENVIWIGPKNALAFARPKENVTIMVKSYLGRLDITKWSYSLCISNLTLNDAGSYKAQINQRNFEVTTEEEFTLFVYEQLQEPQVTMKSVKVSENFSCNITLMCSVKGAEKSVLYSWTPREPHASESNGGSILTVSRTPCDPDLPYICTAQNPVSQRSSLPVHVGQFCTDPGASRGGTTGETVVGVLGEPVTLPLALPACRDTEKVVWLFNTSIISKEREEAATADPLIKSRDPYKNRVWVSSQDCSLKISQLKIEDAGPYHAYVCSEASSVTSMTHVTLLIYRRLRKPKITWSLRHSEDGICRISLTCSVEDGGNTVMYTWTPLQKEAVVSQGESHLNVSWRSSENHPNLTCTASNPVSRSSHQFLSENICSGPERNTKLWIGLFLMVCLLCVGIFSWCIWKRKGRCSVPAFCSSQAEAPADTPEPTAGHTLYSVLSQGYEKLDTPLRPARQQPTPTSDSSSDSNLTTEEDEDRPEVHKPISGRYEVFDQVTQEGAGHDPAPEGQADYDPVTPYVTEVESVVGENTMYAQVFNLQGKTPVSQKEESSATIYCSIRKPQVVPPPQQNDLEIPESPTYENFT 2 455 476 ICSGPERNTK LWIGLFLMVCLLCVGIFSWCIW KRKGRCSVPA +P14151 MIFPWKCQSTQRDLWNIFKLWGWTMLCCDFLAHHGTDCWTYHYSEKPMNWQRARRFCRDNYTDLVAIQNKAEIEYLEKTLPFSRSYYWIGIRKIGGIWTWVGTNKSLTEEAENWGDGEPNNKKNKEDCVEIYIKRNKDAGKWNDDACHKLKAALCYTASCQPWSCSGHGECVEIINNYTCNCDVGYYGPQCQFVIQCEPLEAPELGTMDCTHPLGNFSFSSQCAFSCSEGTNLTGIEETTCGPFGNWSSPEPTCQVIQCEPLSAPDLGIMNCSHPLASFSFTSACTFICSEGTELIGKKKTICESSGIWSNPSPICQKLDKSFSMIKEGDYNPLFIPVAVMVTAFSGLAFIIWLARRLKKGKKSKRSMNDPY 2 333 355 FSMIKEGDYN PLFIPVAVMVTAFSGLAFIIWLA RRLKKGKKSK +P16581 MIASQFLSALTLVLLIKESGAWSYNTSTEAMTYDEASAYCQQRYTHLVAIQNKEEIEYLNSILSYSPSYYWIGIRKVNNVWVWVGTQKPLTEEAKNWAPGEPNNRQKDEDCVEIYIKREKDVGMWNDERCSKKKLALCYTAACTNTSCSGHGECVETINNYTCKCDPGFSGLKCEQIVNCTALESPEHGSLVCSHPLGNFSYNSSCSISCDRGYLPSSMETMQCMSSGEWSAPIPACNVVECDAVTNPANGFVECFQNPGSFPWNTTCTFDCEEGFELMGAQSLQCTSSGNWDNEKPTCKAVTCRAVRQPQNGSVRCSHSPAGEFTFKSSCNFTCEEGFMLQGPAQVECTTQGQWTQQIPVCEAFQCTALSNPERGYMNCLPSASGSFRYGSSCEFSCEQGFVLKGSKRLQCGPTGEWDNEKPTCEAVRCDAVHQPPKGLVRCAHSPIGEFTYKSSCAFSCEEGFELHGSTQLECTSQGQWTEEVPSCQVVKCSSLAVPGKINMSCSGEPVFGTVCKFACPEGWTLNGSAARTCGATGHWSGLLPTCEAPTESNIPLVAGLSAAGLSLLTLAPFLLWLRKCLRKAKKFVPASSCQSLESDGSYQKPSYIL 2 556 578 TCEAPTESNI PLVAGLSAAGLSLLTLAPFLLWL RKCLRKAKKF +P16109 MANCQIAILYQRFQRVVFGISQLLCFSALISELTNQKEVAAWTYHYSTKAYSWNISRKYCQNRYTDLVAIQNKNEIDYLNKVLPYYSSYYWIGIRKNNKTWTWVGTKKALTNEAENWADNEPNNKRNNEDCVEIYIKSPSAPGKWNDEHCLKKKHALCYTASCQDMSCSKQGECLETIGNYTCSCYPGFYGPECEYVRECGELELPQHVLMNCSHPLGNFSFNSQCSFHCTDGYQVNGPSKLECLASGIWTNKPPQCLAAQCPPLKIPERGNMTCLHSAKAFQHQSSCSFSCEEGFALVGPEVVQCTASGVWTAPAPVCKAVQCQHLEAPSEGTMDCVHPLTAFAYGSSCKFECQPGYRVRGLDMLRCIDSGHWSAPLPTCEAISCEPLESPVHGSMDCSPSLRAFQYDTNCSFRCAEGFMLRGADIVRCDNLGQWTAPAPVCQALQCQDLPVPNEARVNCSHPFGAFRYQSVCSFTCNEGLLLVGASVLQCLATGNWNSVPPECQAIPCTPLLSPQNGTMTCVQPLGSSSYKSTCQFICDEGYSLSGPERLDCTRSGRWTDSPPMCEAIKCPELFAPEQGSLDCSDTRGEFNVGSTCHFSCDNGFKLEGPNNVECTTSGRWSATPPTCKGIASLPTPGLQCPALTTPGQGTMYCRHHPGTFGFNTTCYFGCNAGFTLIGDSTLSCRPSGQWTAVTPACRAVKCSELHVNKPIAMNCSNLWGNFSYGSICSFHCLEGQLLNGSAQTACQENGHWSTTVPTCQAGPLTIQEALTYFGGAVASTIGLIMGGTLLALLRKRFRQKDDGKCPLNPHSHLGTYGVFTNAAFDPSP 2 773 795 AGPLTIQEAL TYFGGAVASTIGLIMGGTLLALL RKRFRQKDDG +Q9Y5Y7 MARCFSLVLLLTSIWTTRLLVQGSLRAEELSIQVSCRIMGITLVSKKANQQLNFTEAKEACRLLGLSLAGKDQVETALKASFETCSYGWVGDGFVVISRISPNPKCGKNGVGVLIWKVPVSRQFAAYCYNSSDTWTNSCIPEIITTKDPIFNTQTATQTTEFIVSDSTYSVASPYSTIPAPTTTPPAPASTSIPRRKKLICVTEVFMETSTMSTETEPFVENKAAFKNEAAGFGGVPTALLVLALLFFGAAAGLGFCYVKRYVKAFPFTNKNQQKEMIETKVVKEEKANDSNPNEESKKTDKNPEESKSPSKTTVRCLEAEV 2 236 258 FKNEAAGFGG VPTALLVLALLFFGAAAGLGFCY VKRYVKAFPF +P20916 MIFLTALPLFWIMISASRGGHWGAWMPSSISAFEGTCVSIPCRFDFPDELRPAVVHGVWYFNSPYPKNYPPVVFKSRTQVVHESFQGRSRLLGDLGLRNCTLLLSNVSPELGGKYYFRGDLGGYNQYTFSEHSVLDIVNTPNIVVPPEVVAGTEVEVSCMVPDNCPELRPELSWLGHEGLGEPAVLGRLREDEGTWVQVSLLHFVPTREANGHRLGCQASFPNTTLQFEGYASMDVKYPPVIVEMNSSVEAIEGSHVSLLCGADSNPPPLLTWMRDGTVLREAVAESLLLELEEVTPAEDGVYACLAENAYGQDNRTVGLSVMYAPWKPTVNGTMVAVEGETVSILCSTQSNPDPILTIFKEKQILSTVIYESELQLELPAVSPEDDGEYWCVAENQYGQRATAFNLSVEFAPVLLLESHCAAARDTVQCLCVVKSNPEPSVAFELPSRNVTVNESEREFVYSERSGLVLTSILTLRGQAQAPPRVICTARNLYGAKSLELPFQGAHRLMWAKIGPVGAVVAFAILIAIVCYITQTRRKKNVTESPSFSAGDNPPVLFSSDFRISGAPEKYESERRLGSERRLLGLRGEPPELDLSYSHSDLGKRPTKDSYTLTEELAEYAEIRVK 2 511 533 LPFQGAHRLM WAKIGPVGAVVAFAILIAIVCYI TQTRRKKNVT +Q5VYJ5 MLFFLDRMLAFPMNETFCCLWIACVFNSTLAQQGTESFQCDNGVSLPPDSICDFTDQCGDSSDERHCLNYERCDFEDGLCHMTQDQSLQPSWTKRSGMIGLSPPFYDHNGDVSAHFLSLVSRVDSISSSLRSRVFLPTNDQHDCQITFYYFSCQVSGKLMVGLQTACGGPIQHLWQNTAALPNQWERNVIKIQSSQRFQVVFEGQMASTYEQDEVIAIDDISFSSGCLPANDGILLCQEALNAERELCHPDTDLCRFDATDEELRLCQACGFEFDMCEWTSEASAGQISWMRTKAREIPAFESTPQQDQGGDDEGYYVWVGAKHGFTLNHLDSRAYLNSSVCHCLGKSCHLQFYYAMESSVLRVRLYNNKEEEIFWTYNISTHSQWVKADVLIPEDLKTFKIIFEGTLLSQRSFIALDHLWVYACGQTQSRKLCSADEFPCTSGQCIAKESVCDSRQDCSDESDEDPATCSKHLTCDFESGFCGWEPFLTEDSHWKLMKGLNNGEHHFPAADHTANINHGSFIYLEAQRSPGVAKLGSPVLTKLLTASTPCQVQFWYHLSQHSNLSVFTRTSLDGNLQKQGKIIRFSESQWSHAKIDLIAEAGESTLPFQLILEATVLSSNATVALDDISVSQECEISYKSLPRTSTQSKFSKCDFEANSCDWFEAISGDHFDWIRSSQSELSADFEHQAPPRDHSLNASQGHFMFILKKSSSLWQVAKLQSPTFSQTGPGCILSFWFYNYGLSVGAAELQLHMENSHDSTVIWRVLYNQGKQWLEATIQLGRLSQPFHLSLDKVSLGIYDGVSAIDDIRFENCTLPLPAESCEGLDHFWCRHTRACIEKLRLCDLVDDCGDRTDEVNCAPELQCNFETGICNWEQDAKDDFDWTRSQGPTPTLNTGPMKDNTLGTAKGHYLYIESSEPQAFQDSAALLSPILNATDTKGCTFRFYYHMFGKRIYRLAIYQRIWSDSRGQLLWQIFGNQGNRWIRKHLNISSRQPFQILVEASVGDGFTGDIAIDDLSFMDCTLYPGNLPADLPTPPETSVPVTLPPHNCTDNEFICRSDGHCIEKMQKCDFKYDCPDKSDEASCVMEVCSFEKRSLCKWYQPIPVHLLQDSNTFRWGLGNGISIHHGEENHRPSVDHTQNTTDGWYLYADSSNGKFGDTADILTPIISLTGPKCTLVFWTHMNGATVGSLQVLIKKDNVTSKLWAQTGQQGAQWKRAEVFLGIRSHTQIVFRAKRGISYIGDVAVDDISFQDCSPLLSPERKCTDHEFMCANKHCIAKDKLCDFVNDCADNSDETTFICRTSSGRCDFEFDLCSWKQEKDEDFDWNLKASSIPAAGTEPAADHTLGNSSGHYIFIKSLFPQQPMRAARISSPVISKRSKNCKIIFHYHMYGNGIGALTLMQVSVTNQTKVLLNLTVEQGNFWRREELSLFGDEDFQLKFEGRVGKGQRGDIALDDIVLTENCLSLHDSVQEELAVPLPTGFCPLGYRECHNGKCYRLEQSCNFVDNCGDNTDENECGSSCTFEKGWCGWQNSQADNFDWVLGVGSHQSLRPPKDHTLGNENGHFMYLEATAVGLRGDKAHFRSTMWRESSAACTMSFWYFVSAKATGSIQILIKTEKGLSKVWQESKQNPGNHWQKADILLGKLRNFEVIFQGIRTRDLGGGAAIDDIEFKNCTTVGEISELCPEITDFLCRDKKCIASHLLCDYKPDCSDRSDEAHCAHYTSTTGSCNFETSSGNWTTACSLTQDSEDDLDWAIGSRIPAKALIPDSDHTPGSGQHFLYVNSSGSKEGSVARITTSKSFPASLGMCTVRFWFYMIDPRSMGILKVYTIEESGLNILVWSVIGNKRTGWTYGSVPLSSNSPFKVAFEADLDGNEDIFIALDDISFTPECVTGGPVPVQPSPCEADQFSCIYTLQCVPLSGKCDGHEDCIDGSDEMDCPLSPTPPLCSNMEFPCSTDECIPSLLLCDGVPDCHFNEDELICSNKSCSNGALVCASSNSCIPAHQRCDGFADCMDFQLDESSCSECPLNYCRNGGTCVVEKNGPMCRCRQGWKGNRCHIKFNPPATDFTYAQNNTWTLLGIGLAFLMTHITVAVLCFLANRKVPIRKTEGSGNCAFVNPVYGNWSNPEKTESSVYSFSNPLYGTTSGSLETLSHHLK 2 2075 2097 TDFTYAQNNT WTLLGIGLAFLMTHITVAVLCFL ANRKVPIRKT +Q9H8J5 MFFGGEGSLTYTLVIICFLTLRLSASQNCLKKSLEDVVIDIQSSLSKGIRGNEPVYTSTQEDCINSCCSTKNISGDKACNLMIFDTRKTARQPNCYLFFCPNEEACPLKPAKGLMSYRIITDFPSLTRNLPSQELPQEDSLLHGQFSQAVTPLAHHHTDYSKPTDISWRDTLSQKFGSSDHLEKLFKMDEASAQLLAYKEKGHSQSSQFSSDQEIAHLLPENVSALPATVAVASPHTTSATPKPATLLPTNASVTPSGTSQPQLATTAPPVTTVTSQPPTTLISTVFTRAAATLQAMATTAVLTTTFQAPTDSKGSLETIPFTEISNLTLNTGNVYNPTALSMSNVESSTMNKTASWEGREASPGSSSQGSVPENQYGLPFEKWLLIGSLLFGVLFLVIGLVLLGRILSESLRRKRYSRLDYLINGIYVDI 2 386 408 QYGLPFEKWL LIGSLLFGVLFLVIGLVLLGRIL SESLRRKRYS +A6NHS7 MHVAEVAVNVILLLSMGWTSDSLCSPTIFYRDCWIRRFPGLLINLEESQKLGAQFLKYYSESTGQKCSRSCCLRKDVSCNLAVFYHSPIHDNINCLHVHCPTLESCILEPGTSAILYNITDGIDPDLLVFEQSPTYLNTRSSSNRWDRLRILKAMNLDKQTTTINGMLPSTEAPSSTTHQDLVVNTNSTSYSKELTTDFWARFTSLNESITTKINKVSPSTDFISNPDNKTISPFFEPIDTKLSHMPVPPGLNSSKQLLNKTKGYNSRNHTSANEDEVSVTSKTWLVSVALCTSVIFLGCCIVILASGCCGKQQGQYKPGQRKSGSLQIKNRNHMKENSS 2 286 308 DEVSVTSKTW LVSVALCTSVIFLGCCIVILASG CCGKQQGQYK +Q3UU94 MRAVELLLLLGLASMVHGLCSPTVFYRDCWIRRFPGMLLDLEESQRLGAQFLKYYSENTGQKCGRSCCLRKDVSCNVAVFFHDPVHDNVNCLHVHCPTLESCILEPGASAILYNITAGIDPDLLVFEHTSPIYPNSRSSSEWWDRLRILKAMSVGSEGVYPDVMNRMVPSTEAASTTQQDLGANTGISYSRKSTTDVGLRFTSANVSTATKVNMVSPSTDFTHSPGNKTISPFFGPTDTRVSQVPSRSRLNISKPSVNKTKGSHSRNHSSENEEPWDGAPASAGVWLACVTLGAAVISLCCRVVLGTSRCCGKRQGWSHMGQRSASGCRRNTLKENS 2 284 306 EPWDGAPASA GVWLACVTLGAAVISLCCRVVLG TSRCCGKRQG +Q14703 MKLVNIWLLLLVVLLCGKKHLGDRLEKKSFEKAPCPGCSHLTLKVEFSSTVVEYEYIVAFNGYFTAKARNSFISSALKSSEVDNWRIIPRNNPSSDYPSDFEVIQIKEKQKAGLLTLEDHPNIKRVTPQRKVFRSLKYAESDPTVPCNETRWSQKWQSSRPLRRASLSLGSGFWHATGRHSSRRLLRAIPRQVAQTLQADVLWQMGYTGANVRVAVFDTGLSEKHPHFKNVKERTNWTNERTLDDGLGHGTFVAGVIASMRECQGFAPDAELHIFRVFTNNQVSYTSWFLDAFNYAILKKIDVLNLSIGGPDFMDHPFVDKVWELTANNVIMVSAIGNDGPLYGTLNNPADQMDVIGVGGIDFEDNIARFSSRGMTTWELPGGYGRMKPDIVTYGAGVRGSGVKGGCRALSGTSVASPVVAGAVTLLVSTVQKRELVNPASMKQALIASARRLPGVNMFEQGHGKLDLLRAYQILNSYKPQASLSPSYIDLTECPYMWPYCSQPIYYGGMPTVVNVTILNGMGVTGRIVDKPDWQPYLPQNGDNIEVAFSYSSVLWPWSGYLAISISVTKKAASWEGIAQGHVMITVASPAETESKNGAEQTSTVKLPIKVKIIPTPPRSKRVLWDQYHNLRYPPGYFPRDNLRMKNDPLDWNGDHIHTNFRDMYQHLRSMGYFVEVLGAPFTCFDASQYGTLLMVDSEEEYFPEEIAKLRRDVDNGLSLVIFSDWYNTSVMRKVKFYDENTRQWWMPDTGGANIPALNELLSVWNMGFSDGLYEGEFTLANHDMYYASGCSIAKFPEDGVVITQTFKDQGLEVLKQETAVVENVPILGLYQIPAEGGGRIVLYGDSNCLDDSHRQKDCFWLLDALLQYTSYGVTPPSLSHSGNRQRPPSGAGSVTPERMEGNHLHRYSKVLEAHLGDPKPRPLPACPRLSWAKPQPLNETAPSNLWKHQKLLSIDLDKVVLPNFRSNRPQVRPLSPGESGAWDIPGGIMPGRYNQEVGQTIPVFAFLGAMVVLAFFVVQINKAKSRPKRRKPRVKRPQLMQQVHPPKTPSV 2 999 1021 IMPGRYNQEV GQTIPVFAFLGAMVVLAFFVVQI NKAKSRPKRR +P15529 MEPPGRRECPFPSWRFPGLLLAAMVLLLYSFSDACEEPPTFEAMELIGKPKPYYEIGERVDYKCKKGYFYIPPLATHTICDRNHTWLPVSDDACYRETCPYIRDPLNGQAVPANGTYEFGYQMHFICNEGYYLIGEEILYCELKGSVAIWSGKPPICEKVLCTPPPKIKNGKHTFSEVEVFEYLDAVTYSCDPAPGPDPFSLIGESTIYCGDNSVWSRAAPECKVVKCRFPVVENGKQISGFGKKFYYKATVMFECDKGFYLDGSDTIVCDSNSTWDPPVPKCLKVLPPSSTKPPALSHSVSTSSTTKSPASSASGPRPTYKPPVSNYPGYPKPEEGILDSLDVWVIAVIVIAIVVGVAVICVVPYRYLQRRKKKGTYLTDETHREVKFTSL 2 344 366 PEEGILDSLD VWVIAVIVIAIVVGVAVICVVPY RYLQRRKKKG +Q96KG7 MVISLNSCLSFICLLLCHWIGTASPLNLEDPNVCSHWESYSVTVQESYPHPFDQIYYTSCTDILNWFKCTRHRVSYRTAYRHGEKTMYRRKSQCCPGFYESGEMCVPHCADKCVHGRCIAPNTCQCEPGWGGTNCSSACDGDHWGPHCTSRCQCKNGALCNPITGACHCAAGFRGWRCEDRCEQGTYGNDCHQRCQCQNGATCDHVTGECRCPPGYTGAFCEDLCPPGKHGPQCEQRCPCQNGGVCHHVTGECSCPSGWMGTVCGQPCPEGRFGKNCSQECQCHNGGTCDAATGQCHCSPGYTGERCQDECPVGTYGVLCAETCQCVNGGKCYHVSGACLCEAGFAGERCEARLCPEGLYGIKCDKRCPCHLENTHSCHPMSGECACKPGWSGLYCNETCSPGFYGEACQQICSCQNGADCDSVTGKCTCAPGFKGIDCSTPCPLGTYGINCSSRCGCKNDAVCSPVDGSCTCKAGWHGVDCSIRCPSGTWGFGCNLTCQCLNGGACNTLDGTCTCAPGWRGEKCELPCQDGTYGLNCAERCDCSHADGCHPTTGHCRCLPGWSGVHCDSVCAEGRWGPNCSLPCYCKNGASCSPDDGICECAPGFRGTTCQRICSPGFYGHRCSQTCPQCVHSSGPCHHITGLCDCLPGFTGALCNEVCPSGRFGKNCAGICTCTNNGTCNPIDRSCQCYPGWIGSDCSQPCPPAHWGPNCIHTCNCHNGAFCSAYDGECKCTPGWTGLYCTQRCPLGFYGKDCALICQCQNGADCDHISGQCTCRTGFMGRHCEQKCPSGTYGYGCRQICDCLNNSTCDHITGTCYCSPGWKGARCDQAGVIIVGNLNSLSRTSTALPADSYQIGAIAGIIILVLVVLFLLALFIIYRHKQKGKESSMPAVTYTPAMRVVNADYTISGTLPHSNGGNANSHYFTNPSYHTLTQCATSPHVNNRDRMTVTKSKNNQLFVNLKNVNPGKRGPVGDCTGTLPADWKHGGYLNELGAFGLDRSYMGKSLKDLGKNSEYNSSNCSLSSSENPYATIKDPPVLIPKSSECGYVEMKSPARRDSPYAEINNSTSANRNVYEVEPTVSVVQGVFSNNGRLSQDPYDLPKNSHIPCHYDLLPVRDSSSSPKQEDSGGSSSNSSSSSE 2 856 878 STALPADSYQ IGAIAGIIILVLVVLFLLALFII YRHKQKGKES +A6BM72 MVLSLTGLIAFSFLQATLALNPEDPNVCSHWESYAVTVQESYAHPFDQIYYTRCTDILNWFKCTRHRISYKTAYRRGLRTMYRRRSQCCPGYYESGDFCIPLCTEECVHGRCVSPDTCHCEPGWGGPDCSSGCDSDHWGPHCSNRCQCQNGALCNPITGACVCAAGFRGWRCEELCAPGTHGKGCQLPCQCRHGASCDPRAGECLCAPGYTGVYCEELCPPGSHGAHCELRCPCQNGGTCHHITGECACPPGWTGAVCAQPCPPGTFGQNCSQDCPCHHGGQCDHVTGQCHCTAGYMGDRCQEECPFGSFGFQCSQHCDCHNGGQCSPTTGACECEPGYKGPRCQERLCPEGLHGPGCTLPCPCDADNTISCHPVTGACTCQPGWSGHHCNESCPVGYYGDGCQLPCTCQNGADCHSITGGCTCAPGFMGEVCAVSCAAGTYGPNCSSICSCNNGGTCSPVDGSCTCKEGWQGLDCTLPCPSGTWGLNCNESCTCANGAACSPIDGSCSCTPGWLGDTCELPCPDGTFGLNCSEHCDCSHADGCDPVTGHCCCLAGWTGIRCDSTCPPGRWGPNCSVSCSCENGGSCSPEDGSCECAPGFRGPLCQRICPPGFYGHGCAQPCPLCVHSSRPCHHISGICECLPGFSGALCNQVCAGGYFGQDCAQLCSCANNGTCSPIDGSCQCFPGWIGKDCSQACPPGFWGPACFHACSCHNGASCSAEDGACHCTPGWTGLFCTQRCPAAFFGKDCGRVCQCQNGASCDHISGKCTCRTGFTGQHCEQRCAPGTFGYGCQQLCECMNNSTCDHVTGTCYCSPGFKGIRCDQAALMMEELNPYTKISPALGAERHSVGAVTGIMLLLFLIVVLLGLFAWHRRRQKEKGRDLAPRVSYTPAMRMTSTDYSLSGACGMDRRQNTYIMDKGFKDYMKESVCSSSTCSLNSSENPYATIKDPPILTCKLPESSYVEMKSPVHMGSPYTDVPSLSTSNKNIYEVEPTVSVVQEGCGHNSSYIQNAYDLPRNSHIPGHYDLLPVRQSPANGPSQDKQS 2 849 871 SPALGAERHS VGAVTGIMLLLFLIVVLLGLFAW HRRRQKEKGR +Q7Z7M0 MALGKVLAMALVLALAVLGSLSPGARAGDCKGQRQVLREAPGFVTDGAGNYSVNGNCEWLIEAPSPQHRILLDFLFLDTECTYDYLFVYDGDSPRGPLLASLSGSTRPPPIEASSGKMLLHLFSDANYNLLGFNASFRFSLCPGGCQSHGQCQPPGVCACEPGWGGPDCGLQECSAYCGSHGTCASPLGPCRCEPGFLGRACDLHLWENQGAGWWHNVSARDPAFSARIGAAGAFLSPPGLLAVFGGQDLNNALGDLVLYNFSANTWESWDLSPAPAARHSHVAVAWAGSLVLMGGELADGSLTNDVWAFSPLGRGHWELLAPPASSSSGPPGLAGHAAALVDDVWLYVSGGRTPHDLFSSGLFRFRLDSTSGGYWEQVIPAGGRPPAATGHSMVFHAPSRALLVHGGHRPSTARFSVRVNSTELFHVDRHVWTTLKGRDGLQGPRERAFHTASVLGNYMVVYGGNVHTHYQEEKCYEDGIFFYHLGCHQWVSGAELAPPGTPEGRAAPPSGRYSHVAAVLGGSVLLVAGGYSGRPRGDLMAYKVPPFVFQAPAPDYHLDYCSMYTDHSVCSRDPECSWCQGACQAAPPPGTPLGACPAASCLGLGRLLGDCQACLAFSSPTAPPRGPGTLGWCVHNESCLPRPEQARCRGEQISGTVGWWGPAPVFVTSLEACVTQSFLPGLHLLTFQQPPNTSQPDKVSIVRSTTITLTPSAETDVSLVYRGFIYPMLPGGPGGPGAEDVAVWTRAQRLHVLARMARGPDTENMEEVGRWVAHQEKETRRLQRPGSARLFPLPGRDHKYAVEIQGQLNGSAGPGHSELTLLWDRTGVPGGSEISFFFLEPYRSSSCTSYSSCLGCLADQGCGWCLTSATCHLRQGGAHCGDDGAGGSLLVLVPTLCPLCEEHRDCHACTQDPFCEWHQSTSRKGDAACSRRGRGRGALKSPEECPPLCSQRLTCEDCLANSSQCAWCQSTHTCFLFAAYLARYPHGGCRGWDDSVHSEPRCRSCDGFLTCHECLQSHECGWCGNEDNPTLGRCLQGDFSGPLGGGNCSLWVGEGLGLPVALPARWAYARCPDVDECRLGLARCHPRATCLNTPLSYECHCQRGYQGDGISHCNRTCLEDCGHGVCSGPPDFTCVCDLGWTSDLPPPTPAPGPPAPRCSRDCGCSFHSHCRKRGPGFCDECQDWTWGEHCERCRPGSFGNATGSRGCRPCQCNGHGDPRRGHCDNLSGLCFCQDHTEGAHCQLCSPGYYGDPRAGGSCFRECGGRALLTNVSSVALGSRRVGGLLPPGGGAARAGPGLSYCVWVVSATEELQPCAPGTLCPPLTLTFSPDSSTPCTLSYVLAFDGFPRFLDTGVVQSDRSLIAAFCGQRRDRPLTVQALSGLLVLHWEANGSSSWGFNASVGSARCGSGGPGSCPVPQECVPQDGAAGAGLCRCPQGWAGPHCRMALCPENCNAHTGAGTCNQSLGVCICAEGFGGPDCATKLDGGQLVWETLMDSRLSADTASRFLHRLGHTMVDGPDATLWMFGGLGLPQGLLGNLYRYSVSERRWTQMLAGAEDGGPGPSPRSFHAAAYVPAGRGAMYLLGGLTAGGVTRDFWVLNLTTLQWRQEKAPQTVELPAVAGHTLTARRGLSLLLVGGYSPENGFNQQLLEYQLATGTWVSGAQSGTPPTGLYGHSAVYHEATDSLYVFGGFRFHVELAAPSPELYSLHCPDRTWSLLAPSQGAKRDRMRNVRGSSRGLGQVPGEQPGSWGFREVRKKMALWAALAGTGGFLEEISPHLKEPRPRLFHASALLGDTMVVLGGRSDPDEFSSDVLLYQVNCNAWLLPDLTRSASVGPPMEESVAHAVAAVGSRLYISGGFGGVALGRLLALTLPPDPCRLLSSPEACNQSGACTWCHGACLSGDQAHRLGCGGSPCSPMPRSPEECRRLRTCSECLARHPRTLQPGDGEASTPRCKWCTNCPEGACIGRNGSCTSENDCRINQREVFWAGNCSEAACGAADCEQCTREGKCMWTRQFKRTGETRRILSVQPTYDWTCFSHSLLNVSPMPVESSPPLPCPTPCHLLPNCTSCLDSKGADGGWQHCVWSSSLQQCLSPSYLPLRCMAGGCGRLLRGPESCSLGCAQATQCALCLRRPHCGWCAWGGQDGGGRCMEGGLSGPRDGLTCGRPGASWAFLSCPPEDECANGHHDCNETQNCHDQPHGYECSCKTGYTMDNMTGLCRPVCAQGCVNGSCVEPDHCRCHFGFVGRNCSTECRCNRHSECAGVGARDHCLLCRNHTKGSHCEQCLPLFVGSAVGGGTCRPCHAFCRGNSHICISRKELQMSKGEPKKYSLDPEEIENWVTEGPSEDEAVCVNCQNNSYGEKCESCLQGYFLLDGKCTKCQCNGHADTCNEQDGTGCPCQNNTETGTCQGSSPSDRRDCYKYQCAKCRESFHGSPLGGQQCYRLISVEQECCLDPTSQTNCFHEPKRRALGPGRTVLFGVQPKFTNVDIRLTLDVTFGAVDLYVSTSYDTFVVRVAPDTGVHTVHIQPPPAPPPPPPPADGGPRGAGDPGGAGASSGPGAPAEPRVREVWPRGLITYVTVTEPSAVLVVRGVRDRLVITYPHEHHALKSSRFYLLLLGVGDPSGPGANGSADSQGLLFFRQDQAHIDLFVFFSVFFSCFFLFLSLCVLLWKAKQALDQRQEQRRHLQEMTKMASRPFAKVTVCFPPDPTAPASAWKPAGLPPPAFRRSEPFLAPLLLTGAGGPWGPMGGGCCPPAIPATTAGLRAGPITLEPTEDGMAGVATLLLQLPGGPHAPNGACLGSALVTLRHRLHEYCGGGGGAGGSGHGTGAGRKGLLSQDNLTSMSL 2 2648 2670 FFRQDQAHID LFVFFSVFFSCFFLFLSLCVLLW KAKQALDQRQ +Q9H1U4 MNGGAERAMRSLPSLGGLALLCCAAAAAAAAVASAASAGNVTGGGGAAGQVDASPGPGLRGEPSHPFPRATAPTAQAPRTGPPRATVHRPLAATSPAQSPETTPLWATAGPSSTTFQAPLGPSPTTPPAAERTSTTSQAPTRPAPTTLSTTTGPAPTTPVATTVPAPTTPRTPTPDLPSSSNSSVLPTPPATEAPSSPPPEYVCNCSVVGSLNVNRCNQTTGQCECRPGYQGLHCETCKEGFYLNYTSGLCQPCDCSPHGALSIPCNSSGKCQCKVGVIGSICDRCQDGYYGFSKNGCLPCQCNNRSASCDALTGACLNCQENSKGNHCEECKEGFYQSPDATKECLRCPCSAVTSTGSCSIKSSELEPECDQCKDGYIGPNCNKCENGYYNFDSICRKCQCHGHVDPVKTPKICKPESGECINCLHNTTGFWCENCLEGYVHDLEGNCIKKEVILPTPEGSTILVSNASLTTSVPTPVINSTFTPTTLQTIFSVSTSENSTSALADVSWTQFNIIILTVIIIVVVLLMGFVGAVYMYREYQNRKLNAPFWTIELKEDNISFSSYHDSIPNADVSGLLEDDGNEVAPNGQLTLTTPIHNYKA 2 515 537 LADVSWTQFN IIILTVIIIVVVLLMGFVGAVYM YREYQNRKLN +Q16820 MDLWNLSWFLFLDALLVISGLATPENFDVDGGMDQDIFDINEGLGLDLFEGDIRLDRAQIRNSIIGEKYRWPHTIPYVLEDSLEMNAKGVILNAFERYRLKTCIDFKPWAGETNYISVFKGSGCWSSVGNRRVGKQELSIGANCDRIATVQHEFLHALGFWHEQSRSDRDDYVRIMWDRILSGREHNFNTYSDDISDSLNVPYDYTSVMHYSKTAFQNGTEPTIVTRISDFEDVIGQRMDFSDSDLLKLNQLYNCSSSLSFMDSCSFELENVCGMIQSSGDNADWQRVSQVPRGPESDHSNMGQCQGSGFFMHFDSSSVNVGATAVLESRTLYPKRGFQCLQFYLYNSGSESDQLNIYIREYSADNVDGNLTLVEEIKEIPTGSWQLYHVTLKVTKKFRVVFEGRKGSGASLGGLSIDDINLSETRCPHHIWHIRNFTQFIGSPNGTLYSPPFYSSKGYAFQIYLNLAHVTNAGIYFHLISGANDDQLQWPCPWQQATMTLLDQNPDIRQRMSNQRSITTDPFMTTDNGNYFWDRPSKVGTVALFSNGTQFRRGGGYGTSAFITHERLKSRDFIKGDDVYILLTVEDISHLNSTQIQLTPAPSVQDLCSKTTCKNDGVCTVRDGKAECRCQSGEDWWYMGERCEKRGSTRDTIVIAVSSTVAVFALMLIITLVSVYCTRKKYRERMSSNRPNLTPQNQHAF 2 654 676 EKRGSTRDTI VIAVSSTVAVFALMLIITLVSVY CTRKKYRERM +Q9H9K5 MGSLSNYALLQLTLTAFLTILVQPQHLLAPVFRTLSILTNQSNCWLCEHLDNAEQPELVFVPASASTWWTYSGQWMYERVWYPQAEVQNHSTSSYRKVTWHWEASMEAQGLSFAQVRLLEGNFSLCVENKNGSGPFLGNIPKQYCNQILWFDSTDGTFMPSIDVTNESRNDDDDTSVCLGTRQCSWFAGCTNRTWNSSAVPLIGLPNTQDYKWVDRNSGLTWSGNDTCLYSCQNQTKGLLYQLFRNLFCSYGLTEAHGKWRCADASITNDKGHDGHRTPTWWLTGSNLTLSVNNSGLFFLCGNGVYKGFPPKWSGRCGLGYLVPSLTRYLTLNASQITNLRSFIHKVTPHRCTQGDTDNPPLYCNPKDNSTIRALFPSLGTYDLEKAILNISKAMEQEFSATKQTLEAHQSKVSSLASASRKDHVLDIPTTQRQTACGTVGKQCCLYINYSEEIKSNIQRLHEASENLKNVPLLDWQGIFAKVGDWFRSWGYVLLIVLFCLFIFVLIYVRVFRKSRRSLNSQPLNLALSPQQSAQLLVSETSCQVSNRAMKGLTTHQYDTSLL 2 490 512 FAKVGDWFRS WGYVLLIVLFCLFIFVLIYVRVF RKSRRSLNSQ +O75121 MDRLKSHLTVCFLPSVPFLILVSTLATAKSVTNSTLNGTNVVLGSVPVIIARTDHIIVKEGNSALINCSVYGIPDPQFKWYNSIGKLLKEEEDEKERGGGKWQMHDSGLLNITKVSFSDRGKYTCVASNIYGTVNNTVTLRVIFTSGDMGVYYMVVCLVAFTIVMVLNITRLCMMSSHLKKTEKAINEFFRTEGAEKLQKAFEIAKRIPIITSAKTLELAKVTQFKTMEFARYIEELARSVPLPPLIMNCRTIMEEIMEVVGLEEQGQNFVRHTPEGQEAADRDEVYTIPNSLKRSDSPAADSDASSLHEQPQQIAIKVSVHPQSKKEHADDQEGGQFEVKDVEETELSAEHSPETAEPSTDVTSTELTSEEPTPVEVPDKVLPPAYLEATEPAVTHDKNTCIIYESHV 2 150 172 LRVIFTSGDM GVYYMVVCLVAFTIVMVLNITRL CMMSSHLKKT +Q29983 MGLGPVFLLLAGIFPFAPPGAAAEPHSLRYNLTVLSWDGSVQSGFLTEVHLDGQPFLRCDRQKCRAKPQGQWAEDVLGNKTWDRETRDLTGNGKDLRMTLAHIKDQKEGLHSLQEIRVCEIHEDNSTRSSQHFYYDGELFLSQNLETKEWTMPQSSRAQTLAMNVRNFLKEDAMKTKTHYHAMHADCLQELRRYLKSGVVLRRTVPPMVNVTRSEASEGNITVTCRASGFYPWNITLSWRQDGVSLSHDTQQWGDVLPDGNGTYQTWVATRICQGEEQRFTCYMEHSGNHSTHPVPSGKVLVLQSHWQTFHVSAVAAAAIFVIIIFYVRCCKKKTSAAEGPELVSLQVLDQHPVGTSDHRDATQLGFQPLMSDLGSTGSTEGA 2 306 328 PSGKVLVLQS HWQTFHVSAVAAAAIFVIIIFYV RCCKKKTSAA +P51512 MILLTFSTGRRLDFVHHSGVFFLQTLLWILCATVCGTEQYFNVEVWLQKYGYLPPTDPRMSVLRSAETMQSALAAMQQFYGINMTGKVDRNTIDWMKKPRCGVPDQTRGSSKFHIRRKRYALTGQKWQHKHITYSIKNVTPKVGDPETRKAIRRAFDVWQNVTPLTFEEVPYSELENGKRDVDITIIFASGFHGDSSPFDGEGGFLAHAYFPGPGIGGDTHFDSDEPWTLGNPNHDGNDLFLVAVHELGHALGLEHSNDPTAIMAPFYQYMETDNFKLPNDDLQGIQKIYGPPDKIPPPTRPLPTVPPHRSIPPADPRKNDRPKPPRPPTGRPSYPGAKPNICDGNFNTLAILRREMFVFKDQWFWRVRNNRVMDGYPMQITYFWRGLPPSIDAVYENSDGNFVFFKGNKYWVFKDTTLQPGYPHDLITLGSGIPPHGIDSAIWWEDVGKTYFFKGDRYWRYSEEMKTMDPGYPKPITVWKGIPESPQGAFVHKENGFTYFYKGKEYWKFNNQILKVEPGYPRSILKDFMGCDGPTDRVKEGHSPPDDVDIVIKLDNTASTVKAIAIVIPCILALCLLVLVYTVFQFKRKGTPRHILYCKRSMQEWV 2 565 587 LDNTASTVKA IAIVIPCILALCLLVLVYTVFQF KRKGTPRHIL +Q8TD46 MLCPWRTANLGLLLILTIFLVAASSSLCMDEKQITQNYSKVLAEVNTSWPVKMATNAVLCCPPIALRNLIIITWEIILRGQPSCTKAYKKETNETKETNCTDERITWVSRPDQNSDLQIRTVAITHDGYYRCIMVTPDGNFHRGYHLQVLVTPEVTLFQNRNRTAVCKAVAGKPAAHISWIPEGDCATKQEYWSNGTVTVKSTCHWEVHNVSTVTCHVSHLTGNKSLYIELLPVPGAKKSAKLYIPYIILTIIILTIVGFIWLLKVNGCRKYKLNKTESTPVVEEDEMQPYASYTEKNNPLYDTTNKVKASEALQSEVDTDLHTL 2 244 266 VPGAKKSAKL YIPYIILTIIILTIVGFIWLLKV NGCRKYKLNK +Q6Q8B3 MSAPRLLISIIIMVSASSSSCMGGKQMTQNYSTIFAEGNISQPVLMDINAVLCCPPIALRNLIIITWEIILRGQPSCTKAYKKETNETKETNCTVERITWVSRPDQNSDLQIRPVDTTHDGYYRGIVVTPDGNFHRGYHLQVLVTPEVNLFQSRNITAVCKAVTGKPAAQISWIPEGSILATKQEYWGNGTVTVKSTCPWEGHKSTVTCHVSHLTGNKSLSVKLNSGLRTSGSPALSLLIILYVKLSLFVVILVTTGFVFFQRINHVRKVL 2 239 261 RTSGSPALSL LIILYVKLSLFVVILVTTGFVFF QRINHVRKVL +Q2M385 MNNFRATILFWAAAAWAKSGKPSGEMDEVGVQKCKNALKLPVLEVLPGGGWDNLRNVDMGRVMELTYSNCRTTEDGQYIIPDEIFTIPQKQSNLEMNSEILESWANYQSSTSYSINTELSLFSKVNGKFSTEFQRMKTLQVKDQAITTRVQVRNLVYTVKINPTLELSSGFRKELLDISDRLENNQTRMATYLAELLVLNYGTHVTTSVDAGAALIQEDHLRASFLQDSQSSRSAVTASAGLAFQNTVNFKFEENYTSQNVLTKSYLSNRTNSRVQSIGGVPFYPGITLQAWQQGITNHLVAIDRSGLPLHFFINPNMLPDLPGPLVKKVSKTVETAVKRYYTFNTYPGCTDLNSPNFNFQANTDDGSCEGKMTNFSFGGVYQECTQLSGNRDVLLCQKLEQKNPLTGDFSCPSGYSPVHLLSQIHEEGYNHLECHRKCTLLVFCKTVCEDVFQVAKAEFRAFWCVASSQVPENSGLLFGGLFSSKSINPMTNAQSCPAGYFPLRLFENLKVCVSQDYELGSRFAVPFGGFFSCTVGNPLVDPAISRDLGAPSLKKCPGGFSQHPALISDGCQVSYCVKSGLFTGGSLPPARLPPFTRPPLMSQAATNTVIVTNSENARSWIKDSQTHQWRLGEPIELRRAMNVIHGDGGGLSGGAAAGVTVGVTTILAVVITLAIYGTRKFKKKAYQAIEERQSLVPGTAATGDTTYQEQGQSPA 2 656 678 HGDGGGLSGG AAAGVTVGVTTILAVVITLAIYG TRKFKKKAYQ +P20645 MFPFYSCWRTGLLLLLLAVAVRESWQTEEKTCDLVGEKGKESEKELALVKRLKPLFNKSFESTVGQGSDTYIYIFRVCREAGNHTSGAGLVQINKSNGKETVVGRLNETHIFNGSNWIMLIYKGGDEYDNHCGKEQRRAVVMISCNRHTLADNFNPVSEERGKVQDCFYLFEMDSSLACSPEISHLSVGSILLVTFASLVAVYVVGGFLYQRLVVGAKGMEQFPHLAFWQDLGNLVADGCDFVCRSKPRNVPAAYRGVGDDQLGEESEERDDHLLPM 2 188 210 ACSPEISHLS VGSILLVTFASLVAVYVVGGFLY QRLVVGAKGM +P11717 MGAAAGRSPHLGPAPARRPQRSLLLLQLLLLVAAPGSTQAQAAPFPELCSYTWEAVDTKNNVLYKINICGSVDIVQCGPSSAVCMHDLKTRTYHSVGDSVLRSATRSLLEFNTTVSCDQQGTNHRVQSSIAFLCGKTLGTPEFVTATECVHYFEWRTTAACKKDIFKANKEVPCYVFDEELRKHDLNPLIKLSGAYLVDDSDPDTSLFINVCRDIDTLRDPGSQLRACPPGTAACLVRGHQAFDVGQPRDGLKLVRKDRLVLSYVREEAGKLDFCDGHSPAVTITFVCPSERREGTIPKLTAKSNCRYEIEWITEYACHRDYLESKTCSLSGEQQDVSIDLTPLAQSGGSSYISDGKEYLFYLNVCGETEIQFCNKKQAAVCQVKKSDTSQVKAAGRYHNQTLRYSDGDLTLIYFGGDECSSGFQRMSVINFECNKTAGNDGKGTPVFTGEVDCTYFFTWDTEYACVKEKEDLLCGATDGKKRYDLSALVRHAEPEQNWEAVDGSQTETEKKHFFINICHRVLQEGKARGCPEDAAVCAVDKNGSKNLGKFISSPMKEKGNIQLSYSDGDDCGHGKKIKTNITLVCKPGDLESAPVLRTSGEGGCFYEFEWHTAAACVLSKTEGENCTVFDSQAGFSFDLSPLTKKNGAYKVETKKYDFYINVCGPVSVSPCQPDSGACQVAKSDEKTWNLGLSNAKLSYYDGMIQLNYRGGTPYNNERHTPRATLITFLCDRDAGVGFPEYQEEDNSTYNFRWYTSYACPEEPLECVVTDPSTLEQYDLSSLAKSEGGLGGNWYAMDNSGEHVTWRKYYINVCRPLNPVPGCNRYASACQMKYEKDQGSFTEVVSISNLGMAKTGPVVEDSGSLLLEYVNGSACTTSDGRQTTYTTRIHLVCSRGRLNSHPIFSLNWECVVSFLWNTEAACPIQTTTDTDQACSIRDPNSGFVFNLNPLNSSQGYNVSGIGKIFMFNVCGTMPVCGTILGKPASGCEAETQTEELKNWKPARPVGIEKSLQLSTEGFITLTYKGPLSAKGTADAFIVRFVCNDDVYSGPLKFLHQDIDSGQGIRNTYFEFETALACVPSPVDCQVTDLAGNEYDLTGLSTVRKPWTAVDTSVDGRKRTFYLSVCNPLPYIPGCQGSAVGSCLVSEGNSWNLGVVQMSPQAAANGSLSIMYVNGDKCGNQRFSTRITFECAQISGSPAFQLQDGCEYVFIWRTVEACPVVRVEGDNCEVKDPRHGNLYDLKPLGLNDTIVSAGEYTYYFRVCGKLSSDVCPTSDKSKVVSSCQEKREPQGFHKVAGLLTQKLTYENGLLKMNFTGGDTCHKVYQRSTAIFFYCDRGTQRPVFLKETSDCSYLFEWRTQYACPPFDLTECSFKDGAGNSFDLSSLSRYSDNWEAITGTGDPEHYLINVCKSLAPQAGTEPCPPEAAACLLGGSKPVNLGRVRDGPQWRDGIIVLKYVDGDLCPDGIRKKSTTIRFTCSESQVNSRPMFISAVEDCEYTFAWPTATACPMKSNEHDDCQVTNPSTGHLFDLSSLSGRAGFTAAYSEKGLVYMSICGENENCPPGVGACFGQTRISVGKANKRLRYVDQVLQLVYKDGSPCPSKSGLSYKSVISFVCRPEARPTNRPMLISLDKQTCTLFFSWHTPLACEQATECSVRNGSSIVDLSPLIHRTGGYEAYDESEDDASDTNPDFYINICQPLNPMHGVPCPAGAAVCKVPIDGPPIDIGRVAGPPILNPIANEIYLNFESSTPCLADKHFNYTSLIAFHCKRGVSMGTPKLLRTSECDFVFEWETPVVCPDEVRMDGCTLTDEQLLYSFNLSSLSTSTFKVTRDSRTYSVGVCTFAVGPEQGGCKDGGVCLLSGTKGASFGRLQSMKLDYRHQDEAVVLSYVNGDRCPPETDDGVPCVFPFIFNGKSYEECIIESRAKLWCSTTADYDRDHEWGFCRHSNSYRTSSIIFKCDEDEDIGRPQVFSEVRGCDVTFEWKTKVVCPPKKLECKFVQKHKTYDLRLLSSLTGSWSLVHNGVSYYINLCQKIYKGPLGCSERASICRRTTTGDVQVLGLVHTQKLGVIGDKVVVTYSKGYPCGGNKTASSVIELTCTKTVGRPAFKRFDIDSCTYYFSWDSRAACAVKPQEVQMVNGTITNPINGKSFSLGDIYFKLFRASGDMRTNGDNYLYEIQLSSITSSRNPACSGANICQVKPNDQHFSRKVGTSDKTKYYLQDGDLDVVFASSSKCGKDKTKSVSSTIFFHCDPLVEDGIPEFSHETADCQYLFSWYTSAVCPLGVGFDSENPGDDGQMHKGLSERSQAVGAVLSLLLVALTCCLLALLLYKKERRETVISKLTTCCRRSSNVSYKYSKVNKEEETDENETEWLMEEIQLPPPRQGKEGQENGHITTKSVKALSSLHGDDQDSEDEVLTIPEVKVHSGRGAGAESSHPVRNAQSNALQEREDDRVGLVRGEKARKGKSSSAQQKTVSSTKLVSFHDDSDEDLLHI 2 2305 2327 MHKGLSERSQ AVGAVLSLLLVALTCCLLALLLY KKERRETVIS +Q3TEW6 MAEAVGAVALIAAPARRRWLWSVLAAMLGLLTARISALEVHTPKEIFVVNGTQGKLTCTFDSPNTTGWLTTVSWSFQPDGTDSAVSFFHYSQGQVYIGDYPPFKDRVTWAGDLDKKDASINIENIQAVHNGTYICDVKNPPDIVVRPGHIRLHVVEIDNLLVFLVWVVVGTVTAVVLGLTLLISLVLVVLYRRKHSKRDYTGCSTSERLSPVKQAPRKCPSDTEGLVKSPPSAGSHQGPVIYAQLDHSGGHHSGKINKSESVVYADIRKD 2 162 191 LHVVEIDNLL VFLVWVVVGTVTAVVLGLTLLISLVLVVLY RRKHSKRDYT +O60487 MYGKSSTRAVLLLLGIQLTALWPIAAVEIYTSRVLEAVNGTDARLKCTFSSFAPVGDALTVTWNFRPLDGGPEQFVFYYHIDPFQPMSGRFKDRVSWDGNPERYDASILLWKLQFDDNGTYTCQVKNPPDVDGVIGEIRLSVVHTVRFSEIHFLALAIGSACALMIIIVIVVVLFQHYRKKRWAERAHKVVEIKSKEEERLNQEKKVSVYLEDTD 2 153 175 VHTVRFSEIH FLALAIGSACALMIIIVIVVVLF QHYRKKRWAE +Q6UWV2 MQQRGAAGSRGCALFPLLGVLFFQGVYIVFSLEIRADAHVRGYVGEKIKLKCTFKSTSDVTDKLTIDWTYRPPSSSHTVSIFHYQSFQYPTTAGTFRDRISWVGNVYKGDASISISNPTIKDNGTFSCAVKNPPDVHHNIPMTELTVTERGFGTMLSSVALLSILVFVPSAVVVALLLVRMGRKAAGLKKRSRSGYKKSSIEVSDDTDQEEEEACMARLCVRCAECLDSDYEETY 2 159 181 ERGFGTMLSS VALLSILVFVPSAVVVALLLVRM GRKAAGLKKR +Q61830 MRLLLLLAFISVIPVSVQLLDARQFLIYNEDHKRCVDALSAISVQTATCNPEAESQKFRWVSDSQIMSVAFKLCLGVPSKTDWASVTLYACDSKSEYQKWECKNDTLFGIKGTELYFNYGNRQEKNIKLYKGSGLWSRWKVYGTTDDLCSRGYEAMYSLLGNANGAVCAFPFKFENKWYADCTSAGRSDGWLWCGTTTDYDKDKLFGFCPLHFEGSERLWNKDPLTGILYQINSKSALTWHQARASCKQQNADLLSVTEIHEQMYLTGLTSSLSSGLWIGLNSLSVRSGWQWAGGSPFRYLNWLPGSPSSEPGKSCVSLNPGKNAKWENLECVQKLGYICKKGNNTLNPFIIPSASDVPTGCPNQWWPYAGHCYRIHREEKKIQKYALQACRKEGGDLASIHSIEEFDFIFSQLGYEPNDELWIGLNDIKIQMYFEWSDGTPVTFTKWLPGEPSHENNRQEDCVVMKGKDGYWADRACEQPLGYICKMVSQSHAVVPEGADKGCRKGWKRHGFYCYLIGSTLSTFTDANHTCTNEKAYLTTVEDRYEQAFLTSLVGLRPEKYFWTGLSDVQNKGTFRWTVDEQVQFTHWNADMPGRKAGCVAMKTGVAGGLWDVLSCEEKAKFVCKHWAEGVTRPPEPTTTPEPKCPENWGTTSKTSMCFKLYAKGKHEKKTWFESRDFCKAIGGELASIKSKDEQQVIWRLITSSGSYHELFWLGLTYGSPSEGFTWSDGSPVSYENWAYGEPNNYQNVEYCGELKGDPGMSWNDINCEHLNNWICQIQKGKTLLPEPTPAPQDNPPVTADGWVIYKDYQYYFSKEKETMDNARAFCKKNFGDLATIKSESEKKFLWKYINKNGGQSPYFIGMLISMDKKFIWMDGSKVDFVAWATGEPNFANDDENCVTMYTNSGFWNDINCGYPNNFICQRHNSSINATAMPTTPTTPGGCKEGWHLYKNKCFKIFGFANEEKKSWQDARQACKGLKGNLVSIENAQEQAFVTYHMRDSTFNAWTGLNDINAEHMFLWTAGQGVHYTNWGKGYPGGRRSSLSYEDADCVVVIGGNSREAGTWMDDTCDSKQGYICQTQTDPSLPVSPTTTPKDGFVTYGKSSYSLMKLKLPWHEAETYCKDHTSLLASILDPYSNAFAWMKMHPFNVPIWIALNSNLTNNEYTWTDRWRVRYTNWGADEPKLKSACVYMDVDGYWRTSYCNESFYFLCKKSDEIPATEPPQLPGKCPESEQTAWIPFYGHCYYFESSFTRSWGQASLECLRMGASLVSIETAAESSFLSYRVEPLKSKTNFWIGMFRNVEGKWLWLNDNPVSFVNWKTGDPSGERNDCVVLASSSGLWNNIHCSSYKGFICKMPKIIDPVTTHSSITTKADQRKMDPQPKGSSKAAGVVTVVLLIVIGAGVAAYFFYKKRHALHIPQEATFENTLYFNSNLSPGTSDTKDLMGNIEQNEHAII 2 1388 1410 MDPQPKGSSK AAGVVTVVLLIVIGAGVAAYFFY KKRHALHIPQ +Q13505 MLLGGPPRSPRSGTSPKGPWSSTGHVQFGKSPQTWPRRTRPRSPEPAAPSGVRGSTWTRRRDSPRRAGPTALSRYVGHLWMGRRPPSPEARGPVPRSSAASRARRSLASPGISPGPLTATIGGAVAGGGPRQGRAEAHKEVFPGQRVGKMAAPMELFCWSGGWGLPSVDLDSLAVLTYARFTGAPLKVHKISNPWQSPSGTLPALRTSHGEVISVPHKIITHLRKEKYNADYDLSARQGADTLAFMSLLEEKLLPVLVHTFWIDTKNYVEVTRKWYAEAMPFPLNFFLPGRMQRQYMERLQLLTGEHRPEDEEELEKELYREARECLTLLSQRLGSQKFFFGDAPASLDAFVFSYLALLLQAKLPSGKLQVHLRGLHNLCAYCTHILSLYFPWDGAEVPPQRQTPAGPETEEEPYRRRNQILSVLAGLAAMVGYALLSGIVSIQRATPARAPGTRTLGMAEEDEEE 2 421 443 EEEPYRRRNQ ILSVLAGLAAMVGYALLSGIVSI QRATPARAPG +Q9UKN1 MLVIWILTLALRLCASVTTVTPEGSAVHKAISQQGTLWTGEVLEKQTVEQGKSTLRRQKNHFHRSAGELRCRNALKDEGASAGWSVMFAGESVVVLVHLWMTGARVKNLGLVEFASPGDDGDGRAEGFSLGLPLSEQARAAGAREKERQETVINHSTFSGFSQITGSTVNTSIGGNTTSASTPSSSDPFTTFSDYGVSVTFITGSTATKHFLDSSTNSGHSEESTVSHSGPGATGTTLFPSHSATSVFVGEPKTSPITSASMETTALPGSTTTAGLSEKSTTFYSSPRSPDRTLSPARTTSSGVSEKSTTSHSRPGPTHTIAFPDSTTMPGVSQESTASHSIPGSTDTTLSPGTTTPSSLGPESTTFHSSPGYTKTTRLPDNTTTSGLLEASTPVHSSTGSPHTTLSPSSSTTHEGEPTTFQSWPSSKDTSPAPSGTTSAFVKLSTTYHSSPSSTPTTHFSASSTTLGHSEESTPVHSSPVATATTPPPARSATSGHVEESTAYHRSPGSTQTMHFPESSTTSGHSEESATFHGSTTHTKSSTPSTTAALAHTSYHSSLGSTETTHFRDSSTISGRSEESKASHSSPDAMATTVLPAGSTPSVLVGDSTPSPISSGSMETTALPGSTTKPGLSEKSTTFYSSPRSPDTTHLPASMTSSGVSEESTTSHSRPGSTHTTAFPGSTTMPGLSQESTASHSSPGPTDTTLSPGSTTASSLGPEYTTFHSRPGSTETTLLPDNTTASGLLEASMPVHSSTRSPHTTLSPAGSTTRQGESTTFHSWPSSKDTRPAPPTTTSAFVEPSTTSHGSPSSIPTTHISARSTTSGLVEESTTYHSSPGSTQTMHFPESDTTSGRGEESTTSHSSTTHTISSAPSTTSALVEEPTSYHSSPGSTATTHFPDSSTTSGRSEESTASHSSQDATGTIVLPARSTTSVLLGESTTSPISSGSMETTALPGSTTTPGLSERSTTFHSSPRSPATTLSPASTTSSGVSEESTTSRSRPGSTHTTAFPDSTTTPGLSRHSTTSHSSPGSTDTTLLPASTTTSGPSQESTTSHSSSGSTDTALSPGSTTALSFGQESTTFHSNPGSTHTTLFPDSTTSSGIVEASTRVHSSTGSPRTTLSPASSTSPGLQGESTAFQTHPASTHTTPSPPSTATAPVEESTTYHRSPGSTPTTHFPASSTTSGHSEKSTIFHSSPDASGTTPSSAHSTTSGRGESTTSRISPGSTEITTLPGSTTTPGLSEASTTFYSSPRSPTTTLSPASMTSLGVGEESTTSRSQPGSTHSTVSPASTTTPGLSEESTTVYSSSRGSTETTVFPHSTTTSVHGEEPTTFHSRPASTHTTLFTEDSTTSGLTEESTAFPGSPASTQTGLPATLTTADLGEESTTFPSSSGSTGTKLSPARSTTSGLVGESTPSRLSPSSTETTTLPGSPTTPSLSEKSTTFYTSPRSPDATLSPATTTSSGVSEESSTSHSQPGSTHTTAFPDSTTTSDLSQEPTTSHSSQGSTEATLSPGSTTASSLGQQSTTFHSSPGDTETTLLPDDTITSGLVEASTPTHSSTGSLHTTLTPASSTSAGLQEESTTFQSWPSSSDTTPSPPGTTAAPVEVSTTYHSRPSSTPTTHFSASSTTLGRSEESTTVHSSPGATGTALFPTRSATSVLVGEPTTSPISSGSTETTALPGSTTTAGLSEKSTTFYSSPRSPDTTLSPASTTSSGVSEESTTSHSRPGSTHTTAFPGSTTMPGVSQESTASHSSPGSTDTTLSPGSTTASSLGPESTTFHSSPGSTETTLLPDNTTASGLLEASTPVHSSTGSPHTTLSPAGSTTRQGESTTFQSWPSSKDTMPAPPTTTSAFVELSTTSHGSPSSTPTTHFSASSTTLGRSEESTTVHSSPVATATTPSPARSTTSGLVEESTAYHSSPGSTQTMHFPESSTASGRSEESRTSHSSTTHTISSPPSTTSALVEEPTSYHSSPGSTATTHFPDSSTTSGRSEESTASHSSQDATGTIVLPARSTTSVLLGESTTSPISSGSMETTALPGSTTTPGLSEKSTTFHSSPRSPATTLSPASTTSSGVSEESTTSHSRPGSTHTTAFPDSTTTPGLSRHSTTSHSSPGSTDTTLLPASTTTSGPSQESTTSHSSPGSTDTALSPGSTTALSFGQESTTFHSSPGSTHTTLFPDSTTSSGIVEASTRVHSSTGSPRTTLSPASSTSPGLQGESTAFQTHPASTHTTPSPPSTATAPVEESTTYHRSPGSTPTTHFPASSTTSGHSEKSTIFHSSPDASGTTPSSAHSTTSGRGESTTSRISPGSTEITTLPGSTTTPGLSEASTTFYSSPRSPTTTLSPASMTSLGVGEESTTSRSQPGSTHSTVSPASTTTPGLSEESTTVYSSSPGSTETTVFPRTPTTSVRGEEPTTFHSRPASTHTTLFTEDSTTSGLTEESTAFPGSPASTQTGLPATLTTADLGEESTTFPSSSGSTGTTLSPARSTTSGLVGESTPSRLSPSSTETTTLPGSPTTPSLSEKSTTFYTSPRSPDATLSPATTTSSGVSEESSTSHSQPGSTHTTAFPDSTTTPGLSRHSTTSHSSPGSTDTTLLPASTTTSGPSQESTTSHSSPGSTDTALSPGSTTALSFGQESTTFHSSPGSTHTTLFPDSTTSSGIVEASTRVHSSTGSPRTTLSPASSTSPGLQGESTTFQTHPASTHTTPSPPSTATAPVEESTTYHRSPGSTPTTHFPASSTTSGHSEKSTIFHSSPDASGTTPSSAHSTTSGRGESTTSRISPGSTEITTLPGSTTTPGLSEASTTFYSSPRSPTTTLSPASMTSLGVGEESTTSRSQPGSTHSTVSPASTTTPGLSEESTTVYSSSPGSTETTVFPRSTTTSVRGEEPTTFHSRPASTHTTLFTEDSTTSGLTEESTAFPGSPASTQTGLPATLTTADLGEESTTFPSSSGSTGTTLSPARSTTSGLVGESTPSRLSPSSTETTTLPGSPTTPSLSEKSTTFYTSPRSPDATLSPATTTSSGVSEESSTSHSQPGSTHTTAFPDSTTTSGLSQEPTASHSSQGSTEATLSPGSTTASSLGQQSTTFHSSPGDTETTLLPDDTITSGLVEASTPTHSSTGSLHTTLTPASSTSAGLQEESTTFQSWPSSSDTTPSPPGTTAAPVEVSTTYHSRPSSTPTTHFSASSTTLGRSEESTTVHSSPGATGTALFPTRSATSVLVGEPTTSPISSGSTETTALPGSTTTAGLSEKSTTFYSSPRSPDTTLSPASTTSSGVSEESTTSHSRPGSTHTTAFPGSTTMPGVSQESTASHSSPGSTDTTLSPGSTTASSLGPESTTFHSGPGSTETTLLPDNTTASGLLEASTPVHSSTGSPHTTLSPAGSTTRQGESTTFQSWPNSKDTTPAPPTTTSAFVELSTTSHGSPSSTPTTHFSASSTTLGRSEESTTVHSSPVATATTPSPARSTTSGLVEESTTYHSSPGSTQTMHFPESDTTSGRGEESTTSHSSTTHTISSAPSTTSALVEEPTSYHSSPGSTATTHFPDSSTTSGRSEESTASHSSQDATGTIVLPARSTTSVLLGESTTSPISSGSMETTALPGSTTTPGLSEKSTTFHSSPRSPATTLSPASTTSSGVSEESTTSHSRPGSTHTTAFPDSTTTPGLSRHSTTSHSSPGSTDTTLLPASTTTSGSSQESTTSHSSSGSTDTALSPGSTTALSFGQESTTFHSSPGSTHTTLFPDSTTSSGIVEASTRVHSSTGSPRTTLSPASSTSPGLQGESTAFQTHPASTHTTPSPPSTATAPVEESTTYHRSPGSTPTTHFPASSTTSGHSEKSTIFHSSPDASGTTPSSAHSTTSGRGESTTSRISPGSTEITTLPGSTTTPGLSEASTTFYSSPRSPTTTLSPASMTSLGVGEESTTSRSQPGSTHSTVSPASTTTPGLSEESTTVYSSSPGSTETTVFPRSTTTSVRREEPTTFHSRPASTHTTLFTEDSTTSGLTEESTAFPGSPASTQTGLPATLTTADLGEESTTFPSSSGSTGTKLSPARSTTSGLVGESTPSRLSPSSTETTTLPGSPTTPSLSEKSTTFYTSPRSPDATLSPATTTSSGVSEESSTSHSQPGSTHTTAFPDSTTTSGLSQEPTTSHSSQGSTEATLSPGSTTASSLGQQSTTFHSSPGDTETTLLPDDTITSGLVEASTPTHSSTGSLHTTLTPASSTSTGLQEESTTFQSWPSSSDTTPSPPSTTAVPVEVSTTYHSRPSSTPTTHFSASSTTLGRSEESTTVHSSPGATGTALFPTRSATSVLVGEPTTSPISSGSTETTALPGSTTTAGLSEKSTTFYSSPRSPDTTLSPASTTSSGVSEESTTSHSRPGSMHTTAFPSSTTMPGVSQESTASHSSPGSTDTTLSPGSTTASSLGPESTTFHSSPGSTETTLLPDNTTASGLLEASTPVHSSTGSPHTTLSPAGSTTRQGESTTFQSWPNSKDTTPAPPTTTSAFVELSTTSHGSPSSTPTTHFSASSTTLGRSEESTTVHSSPVATATTPSPARSTTSGLVEESTTYHSSPGSTQTMHFPESNTTSGRGEESTTSHSSTTHTISSAPSTTSALVEEPTSYHSSPGSTATTHFPDSSTTSGRSEESTASHSSQDATGTIVLPARSTTSVLLGESTTSPISSGSMETTALPGSTTTPGLSEKSTTFHSSPSSTPTTHFSASSTTLGRSEESTTVHSSPVATATTPSPARSTTSGLVEESTAYHSSPGSTQTMHFPESSTASGRSEESRTSHSSTTHTISSPPSTTSALVEEPTSYHSSPGSIATTHFPESSTTSGRSEESTASHSSPDTNGITPLPAHFTTSGRIAESTTFYISPGSMETTLASTATTPGLSAKSTILYSSSRSPDQTLSPASMTSSSISGEPTSLYSQAESTHTTAFPASTTTSGLSQESTTFHSKPGSTETTLSPGSITTSSFAQEFTTPHSQPGSALSTVSPASTTVPGLSEESTTFYSSPGSTETTAFSHSNTMSIHSQQSTPFPDSPGFTHTVLPATLTTTDIGQESTAFHSSSDATGTTPLPARSTASDLVGEPTTFYISPSPTYTTLFPASSSTSGLTEESTTFHTSPSFTSTIVSTESLETLAPGLCQEGQIWNGKQCVCPQGYVGYQCLSPLESFPVETPEKLNATLGMTVKVTYRNFTEKMNDASSQEYQNFSTLFKNRMDVVLKGDNLPQYRGVNIRRLLNGSIVVKNDVILEADYTLEYEELFENLAEIVKAKIMNETRTTLLDPDSCRKAILCYSEEDTFVDSSVTPGFDFQEQCTQKAAEGYTQFYYVDVLDGKLACVNKCTKGTKSQMNCNLGTCQLQRSGPRCLCPNTNTHWYWGETCEFNIAKSLVYGIVGAVMAVLLLALIILIILFSLSQRKRHREQYDVPQEWRKEGTPGIFQKTAIWEDQNLRESRFGLENAYNNFRPTLETVDSGTELHIQRPEMVASTV 2 5381 5403 EFNIAKSLVY GIVGAVMAVLLLALIILIILFSL SQRKRHREQY +Q9H3R2 MKAIIHLTLLALLSVNTATNQGNSADAVTTTETATSGPTVAAADTTETNFPETASTTANTPSFPTATSPAPPIISTHSSSTIPTPAPPIISTHSSSTIPIPTAADSESTTNVNSLATSDIITASSPNDGLITMVPSETQSNNEMSPTTEDNQSSGPPTGTALLETSTLNSTGPSNPCQDDPCADNSLCVKLHNTSFCLCLEGYYYNSSTCKKGKVFPGKISVTVSETFDPEEKHSMAYQDLHSEITSLFKDVFGTSVYGQTVILTVSTSLSPRSEMRADDKFVNVTIVTILAETTSDNEKTVTEKINKAIRSSSSNFLNYDLTLRCDYYGCNQTADDCLNGLACDCKSDLQRPNPQSPFCVASSLKCPDACNAQHKQCLIKKSGGAPECACVPGYQEDANGNCQKCAFGYSGLDCKDKFQLILTIVGTIAGIVILSMIIALIVTARSNNKTKHIEEENLIDEDFQNLKLRSTGFTNLGAEGSVFPKVRITASRDSQMQNPYSRHSSMPRPDY 2 421 443 SGLDCKDKFQ LILTIVGTIAGIVILSMIIALIV TARSNNKTKH +Q8C6Z1 MLTLAKIALISSLFISLPFARPQKQNPRRNVTQHTIEDVKIMRNNSIHLERSINVTSENGSDISNLMVTTPSPLNLSTTFRTTNSTRTWLMTSSSESSRPSSTYSVPPLVQGFVSKLPLNSSTADANPLQVSEHSNSTNSPSPENFTWSLDNDTMNSPEDISTTVRPFPPPPKTTPVTPFTAEPTEWLPTNNDNFAGFTPYQEKTTLQPTLKFTNNSKLFPNTSDTPKENKNTGIVFGAILGAILGASLLSLVGYLLCGQRKTDSFSHRRLYDDRNEPVLRLDNAPEPYDVNFGNSSYYNPAVSDSSMPEGGESLQDGIPMDAIPPLRPSI 2 235 257 DTPKENKNTG IVFGAILGAILGASLLSLVGYLL CGQRKTDSFS +Q8WXI7 MLKPSGLPGSSSPTRSLMTGSRSTKATPEMDSGLTGATLSPKTSTGAIVVTEHTLPFTSPDKTLASPTSSVVGRTTQSLGVMSSALPESTSRGMTHSEQRTSPSLSPQVNGTPSRNYPATSMVSGLSSPRTRTSSTEGNFTKEASTYTLTVETTSGPVTEKYTVPTETSTTEGDSTETPWDTRYIPVKITSPMKTFADSTASKENAPVSMTPAETTVTDSHTPGRTNPSFGTLYSSFLDLSPKGTPNSRGETSLELILSTTGYPFSSPEPGSAGHSRISTSAPLSSSASVLDNKISETSIFSGQSLTSPLSPGVPEARASTMPNSAIPFSMTLSNAETSAERVRSTISSLGTPSISTKQTAETILTFHAFAETMDIPSTHIAKTLASEWLGSPGTLGGTSTSALTTTSPSTTLVSEETNTHHSTSGKETEGTLNTSMTPLETSAPGEESEMTATLVPTLGFTTLDSKIRSPSQVSSSHPTRELRTTGSTSGRQSSSTAAHGSSDILRATTSSTSKASSWTSESTAQQFSEPQHTQWVETSPSMKTERPPASTSVAAPITTSVPSVVSGFTTLKTSSTKGIWLEETSADTLIGESTAGPTTHQFAVPTGISMTGGSSTRGSQGTTHLLTRATASSETSADLTLATNGVPVSVSPAVSKTAAGSSPPGGTKPSYTMVSSVIPETSSLQSSAFREGTSLGLTPLNTRHPFSSPEPDSAGHTKISTSIPLLSSASVLEDKVSATSTFSHHKATSSITTGTPEISTKTKPSSAVLSSMTLSNAATSPERVRNATSPLTHPSPSGEETAGSVLTLSTSAETTDSPNIHPTGTLTSESSESPSTLSLPSVSGVKTTFSSSTPSTHLFTSGEETEETSNPSVSQPETSVSRVRTTLASTSVPTPVFPTMDTWPTRSAQFSSSHLVSELRATSSTSVTNSTGSALPKISHLTGTATMSQTNRDTFNDSAAPQSTTWPETSPRFKTGLPSATTTVSTSATSLSATVMVSKFTSPATSSMEATSIREPSTTILTTETTNGPGSMAVASTNIPIGKGYITEGRLDTSHLPIGTTASSETSMDFTMAKESVSMSVSPSQSMDAAGSSTPGRTSQFVDTFSDDVYHLTSREITIPRDGTSSALTPQMTATHPPSPDPGSARSTWLGILSSSPSSPTPKVTMSSTFSTQRVTTSMIMDTVETSRWNMPNLPSTTSLTPSNIPTSGAIGKSTLVPLDTPSPATSLEASEGGLPTLSTYPESTNTPSIHLGAHASSESPSTIKLTMASVVKPGSYTPLTFPSIETHIHVSTARMAYSSGSSPEMTAPGETNTGSTWDPTTYITTTDPKDTSSAQVSTPHSVRTLRTTENHPKTESATPAAYSGSPKISSSPNLTSPATKAWTITDTTEHSTQLHYTKLAEKSSGFETQSAPGPVSVVIPTSPTIGSSTLELTSDVPGEPLVLAPSEQTTITLPMATWLSTSLTEEMASTDLDISSPSSPMSTFAIFPPMSTPSHELSKSEADTSAIRNTDSTTLDQHLGIRSLGRTGDLTTVPITPLTTTWTSVIEHSTQAQDTLSATMSPTHVTQSLKDQTSIPASASPSHLTEVYPELGTQGRSSSEATTFWKPSTDTLSREIETGPTNIQSTPPMDNTTTGSSSSGVTLGIAHLPIGTSSPAETSTNMALERRSSTATVSMAGTMGLLVTSAPGRSISQSLGRVSSVLSESTTEGVTDSSKGSSPRLNTQGNTALSSSLEPSYAEGSQMSTSIPLTSSPTTPDVEFIGGSTFWTKEVTTVMTSDISKSSARTESSSATLMSTALGSTENTGKEKLRTASMDLPSPTPSMEVTPWISLTLSNAPNTTDSLDLSHGVHTSSAGTLATDRSLNTGVTRASRLENGSDTSSKSLSMGNSTHTSMTYTEKSEVSSSIHPRPETSAPGAETTLTSTPGNRAISLTLPFSSIPVEEVISTGITSGPDINSAPMTHSPITPPTIVWTSTGTIEQSTQPLHAVSSEKVSVQTQSTPYVNSVAVSASPTHENSVSSGSSTSSPYSSASLESLDSTISRRNAITSWLWDLTTSLPTTTWPSTSLSEALSSGHSGVSNPSSTTTEFPLFSAASTSAAKQRNPETETHGPQNTAASTLNTDASSVTGLSETPVGASISSEVPLPMAITSRSDVSGLTSESTANPSLGTASSAGTKLTRTISLPTSESLVSFRMNKDPWTVSIPLGSHPTTNTETSIPVNSAGPPGLSTVASDVIDTPSDGAESIPTVSFSPSPDTEVTTISHFPEKTTHSFRTISSLTHELTSRVTPIPGDWMSSAMSTKPTGASPSITLGERRTITSAAPTTSPIVLTASFTETSTVSLDNETTVKTSDILDARKTNELPSDSSSSSDLINTSIASSTMDVTKTASISPTSISGMTASSSPSLFSSDRPQVPTSTTETNTATSPSVSSNTYSLDGGSNVGGTPSTLPPFTITHPVETSSALLAWSRPVRTFSTMVSTDTASGENPTSSNSVVTSVPAPGTWTSVGSTTDLPAMGFLKTSPAGEAHSLLASTIEPATAFTPHLSAAVVTGSSATSEASLLTTSESKAIHSSPQTPTTPTSGANWETSATPESLLVVTETSDTTLTSKILVTDTILFSTVSTPPSKFPSTGTLSGASFPTLLPDTPAIPLTATEPTSSLATSFDSTPLVTIASDSLGTVPETTLTMSETSNGDALVLKTVSNPDRSIPGITIQGVTESPLHPSSTSPSKIVAPRNTTYEGSITVALSTLPAGTTGSLVFSQSSENSETTALVDSSAGLERASVMPLTTGSQGMASSGGIRSGSTHSTGTKTFSSLPLTMNPGEVTAMSEITTNRLTATQSTAPKGIPVKPTSAESGLLTPVSASSSPSKAFASLTTAPPTWGIPQSTLTFEFSEVPSLDTKSASLPTPGQSLNTIPDSDASTASSSLSKSPEKNPRARMMTSTKAISASSFQSTGFTETPEGSASPSMAGHEPRVPTSGTGDPRYASESMSYPDPSKASSAMTSTSLASKLTTLFSTGQAARSGSSSSPISLSTEKETSFLSPTASTSRKTSLFLGPSMARQPNILVHLQTSALTLSPTSTLNMSQEEPPELTSSQTIAEEEGTTAETQTLTFTPSETPTSLLPVSSPTEPTARRKSSPETWASSISVPAKTSLVETTDGTLVTTIKMSSQAAQGNSTWPAPAEETGSSPAGTSPGSPEMSTTLKIMSSKEPSISPEIRSTVRNSPWKTPETTVPMETTVEPVTLQSTALGSGSTSISHLPTGTTSPTKSPTENMLATERVSLSPSPPEAWTNLYSGTPGGTRQSLATMSSVSLESPTARSITGTGQQSSPELVSKTTGMEFSMWHGSTGGTTGDTHVSLSTSSNILEDPVTSPNSVSSLTDKSKHKTETWVSTTAIPSTVLNNKIMAAEQQTSRSVDEAYSSTSSWSDQTSGSDITLGASPDVTNTLYITSTAQTTSLVSLPSGDQGITSLTNPSGGKTSSASSVTSPSIGLETLRANVSAVKSDIAPTAGHLSQTSSPAEVSILDVTTAPTPGISTTITTMGTNSISTTTPNPEVGMSTMDSTPATERRTTSTEHPSTWSSTAASDSWTVTDMTSNLKVARSPGTISTMHTTSFLASSTELDSMSTPHGRITVIGTSLVTPSSDASAVKTETSTSERTLSPSDTTASTPISTFSRVQRMSISVPDILSTSWTPSSTEAEDVPVSMVSTDHASTKTDPNTPLSTFLFDSLSTLDWDTGRSLSSATATTSAPQGATTPQELTLETMISPATSQLPFSIGHITSAVTPAAMARSSGVTFSRPDPTSKKAEQTSTQLPTTTSAHPGQVPRSAATTLDVIPHTAKTPDATFQRQGQTALTTEARATSDSWNEKEKSTPSAPWITEMMNSVSEDTIKEVTSSSSVLRTLNTLDINLESGTTSSPSWKSSPYERIAPSESTTDKEAIHPSTNTVETTGWVTSSEHASHSTIPAHSASSKLTSPVVTTSTREQAIVSMSTTTWPESTRARTEPNSFLTIELRDVSPYMDTSSTTQTSIISSPGSTAITKGPRTEITSSKRISSSFLAQSMRSSDSPSEAITRLSNFPAMTESGGMILAMQTSPPGATSLSAPTLDTSATASWTGTPLATTQRFTYSEKTTLFSKGPEDTSQPSPPSVEETSSSSSLVPIHATTSPSNILLTSQGHSPSSTPPVTSVFLSETSGLGKTTDMSRISLEPGTSLPPNLSSTAGEALSTYEASRDTKAIHHSADTAVTNMEATSSEYSPIPGHTKPSKATSPLVTSHIMGDITSSTSVFGSSETTEIETVSSVNQGLQERSTSQVASSATETSTVITHVSSGDATTHVTKTQATFSSGTSISSPHQFITSTNTFTDVSTNPSTSLIMTESSGVTITTQTGPTGAATQGPYLLDTSTMPYLTETPLAVTPDFMQSEKTTLISKGPKDVSWTSPPSVAETSYPSSLTPFLVTTIPPATSTLQGQHTSSPVSATSVLTSGLVKTTDMLNTSMEPVTNSPQNLNNPSNEILATLAATTDIETIHPSINKAVTNMGTASSAHVLHSTLPVSSEPSTATSPMVPASSMGDALASISIPGSETTDIEGEPTSSLTAGRKENSTLQEMNSTTESNIILSNVSVGAITEATKMEVPSFDATFIPTPAQSTKFPDIFSVASSRLSNSPPMTISTHMTTTQTGSSGATSKIPLALDTSTLETSAGTPSVVTEGFAHSKITTAMNNDVKDVSQTNPPFQDEASSPSSQAPVLVTTLPSSVAFTPQWHSTSSPVSMSSVLTSSLVKTAGKVDTSLETVTSSPQSMSNTLDDISVTSAATTDIETTHPSINTVVTNVGTTGSAFESHSTVSAYPEPSKVTSPNVTTSTMEDTTISRSIPKSSKTTRTETETTSSLTPKLRETSISQEITSSTETSTVPYKELTGATTEVSRTDVTSSSSTSFPGPDQSTVSLDISTETNTRLSTSPIMTESAEITITTQTGPHGATSQDTFTMDPSNTTPQAGIHSAMTHGFSQLDVTTLMSRIPQDVSWTSPPSVDKTSSPSSFLSSPAMTTPSLISSTLPEDKLSSPMTSLLTSGLVKITDILRTRLEPVTSSLPNFSSTSDKILATSKDSKDTKEIFPSINTEETNVKANNSGHESHSPALADSETPKATTQMVITTTVGDPAPSTSMPVHGSSETTNIKREPTYFLTPRLRETSTSQESSFPTDTSFLLSKVPTGTITEVSSTGVNSSSKISTPDHDKSTVPPDTFTGEIPRVFTSSIKTKSAEMTITTQASPPESASHSTLPLDTSTTLSQGGTHSTVTQGFPYSEVTTLMGMGPGNVSWMTTPPVEETSSVSSLMSSPAMTSPSPVSSTSPQSIPSSPLPVTALPTSVLVTTTDVLGTTSPESVTSSPPNLSSITHERPATYKDTAHTEAAMHHSTNTAVTNVGTSGSGHKSQSSVLADSETSKATPLMSTTSTLGDTSVSTSTPNISQTNQIQTEPTASLSPRLRESSTSEKTSSTTETNTAFSYVPTGAITQASRTEISSSRTSISDLDRPTIAPDISTGMITRLFTSPIMTKSAEMTVTTQTTTPGATSQGILPWDTSTTLFQGGTHSTVSQGFPHSEITTLRSRTPGDVSWMTTPPVEETSSGFSLMSPSMTSPSPVSSTSPESIPSSPLPVTALLTSVLVTTTNVLGTTSPEPVTSSPPNLSSPTQERLTTYKDTAHTEAMHASMHTNTAVANVGTSISGHESQSSVPADSHTSKATSPMGITFAMGDTSVSTSTPAFFETRIQTESTSSLIPGLRDTRTSEEINTVTETSTVLSEVPTTTTTEVSRTEVITSSRTTISGPDHSKMSPYISTETITRLSTFPFVTGSTEMAITNQTGPIGTISQATLTLDTSSTASWEGTHSPVTQRFPHSEETTTMSRSTKGVSWQSPPSVEETSSPSSPVPLPAITSHSSLYSAVSGSSPTSALPVTSLLTSGRRKTIDMLDTHSELVTSSLPSASSFSGEILTSEASTNTETIHFSENTAETNMGTTNSMHKLHSSVSIHSQPSGHTPPKVTGSMMEDAIVSTSTPGSPETKNVDRDSTSPLTPELKEDSTALVMNSTTESNTVFSSVSLDAATEVSRAEVTYYDPTFMPASAQSTKSPDISPEASSSHSNSPPLTISTHKTIATQTGPSGVTSLGQLTLDTSTIATSAGTPSARTQDFVDSETTSVMNNDLNDVLKTSPFSAEEANSLSSQAPLLVTTSPSPVTSTLQEHSTSSLVSVTSVPTPTLAKITDMDTNLEPVTRSPQNLRNTLATSEATTDTHTMHPSINTAVANVGTTSSPNEFYFTVSPDSDPYKATSAVVITSTSGDSIVSTSMPRSSAMKKIESETTFSLIFRLRETSTSQKIGSSSDTSTVFDKAFTAATTEVSRTELTSSSRTSIQGTEKPTMSPDTSTRSVTMLSTFAGLTKSEERTIATQTGPHRATSQGTLTWDTSITTSQAGTHSAMTHGFSQLDLSTLTSRVPEYISGTSPPSVEKTSSSSSLLSLPAITSPSPVPTTLPESRPSSPVHLTSLPTSGLVKTTDMLASVASLPPNLGSTSHKIPTTSEDIKDTEKMYPSTNIAVTNVGTTTSEKESYSSVPAYSEPPKVTSPMVTSFNIRDTIVSTSMPGSSEITRIEMESTFSLAHGLKGTSTSQDPIVSTEKSAVLHKLTTGATETSRTEVASSRRTSIPGPDHSTESPDISTEVIPSLPISLGITESSNMTIITRTGPPLGSTSQGTFTLDTPTTSSRAGTHSMATQEFPHSEMTTVMNKDPEILSWTIPPSIEKTSFSSSLMPSPAMTSPPVSSTLPKTIHTTPSPMTSLLTPSLVMTTDTLGTSPEPTTSSPPNLSSTSHEILTTDEDTTAIEAMHPSTSTAATNVETTSSGHGSQSSVLADSEKTKATAPMDTTSTMGHTTVSTSMSVSSETTKIKRESTYSLTPGLRETSISQNASFSTDTSIVLSEVPTGTTAEVSRTEVTSSGRTSIPGPSQSTVLPEISTRTMTRLFASPTMTESAEMTIPTQTGPSGSTSQDTLTLDTSTTKSQAKTHSTLTQRFPHSEMTTLMSRGPGDMSWQSSPSLENPSSLPSLLSLPATTSPPPISSTLPVTISSSPLPVTSLLTSSPVTTTDMLHTSPELVTSSPPKLSHTSDERLTTGKDTTNTEAVHPSTNTAASNVEIPSSGHESPSSALADSETSKATSPMFITSTQEDTTVAISTPHFLETSRIQKESISSLSPKLRETGSSVETSSAIETSAVLSEVSIGATTEISRTEVTSSSRTSISGSAESTMLPEISTTRKIIKFPTSPILAESSEMTIKTQTSPPGSTSESTFTLDTSTTPSLVITHSTMTQRLPHSEITTLVSRGAGDVPRPSSLPVEETSPPSSQLSLSAMISPSPVSSTLPASSHSSSASVTSLLTPGQVKTTEVLDASAEPETSSPPSLSSTSVEILATSEVTTDTEKIHPFSNTAVTKVGTSSSGHESPSSVLPDSETTKATSAMGTISIMGDTSVSTLTPALSNTRKIQSEPASSLTTRLRETSTSEETSLATEANTVLSKVSTGATTEVSRTEAISFSRTSMSGPEQSTMSQDISIGTIPRISASSVLTESAKMTITTQTGPSESTLESTLNLNTATTPSWVETHSIVIQGFPHPEMTTSMGRGPGGVSWPSPPFVKETSPPSSPLSLPAVTSPHPVSTTFLAHIPPSPLPVTSLLTSGPATTTDILGTSTEPGTSSSSSLSTTSHERLTTYKDTAHTEAVHPSTNTGGTNVATTSSGYKSQSSVLADSSPMCTTSTMGDTSVLTSTPAFLETRRIQTELASSLTPGLRESSGSEGTSSGTKMSTVLSKVPTGATTEISKEDVTSIPGPAQSTISPDISTRTVSWFSTSPVMTESAEITMNTHTSPLGATTQGTSTLDTSSTTSLTMTHSTISQGFSHSQMSTLMRRGPEDVSWMSPPLLEKTRPSFSLMSSPATTSPSPVSSTLPESISSSPLPVTSLLTSGLAKTTDMLHKSSEPVTNSPANLSSTSVEILATSEVTTDTEKTHPSSNRTVTDVGTSSSGHESTSFVLADSQTSKVTSPMVITSTMEDTSVSTSTPGFFETSRIQTEPTSSLTLGLRKTSSSEGTSLATEMSTVLSGVPTGATAEVSRTEVTSSSRTSISGFAQLTVSPETSTETITRLPTSSIMTESAEMMIKTQTDPPGSTPESTHTVDISTTPNWVETHSTVTQRFSHSEMTTLVSRSPGDMLWPSQSSVEETSSASSLLSLPATTSPSPVSSTLVEDFPSASLPVTSLLNPGLVITTDRMGISREPGTSSTSNLSSTSHERLTTLEDTVDTEDMQPSTHTAVTNVRTSISGHESQSSVLSDSETPKATSPMGTTYTMGETSVSISTSDFFETSRIQIEPTSSLTSGLRETSSSERISSATEGSTVLSEVPSGATTEVSRTEVISSRGTSMSGPDQFTISPDISTEAITRLSTSPIMTESAESAITIETGSPGATSEGTLTLDTSTTTFWSGTHSTASPGFSHSEMTTLMSRTPGDVPWPSLPSVEEASSVSSSLSSPAMTSTSFFSTLPESISSSPHPVTALLTLGPVKTTDMLRTSSEPETSSPPNLSSTSAEILATSEVTKDREKIHPSSNTPVVNVGTVIYKHLSPSSVLADLVTTKPTSPMATTSTLGNTSVSTSTPAFPETMMTQPTSSLTSGLREISTSQETSSATERSASLSGMPTGATTKVSRTEALSLGRTSTPGPAQSTISPEISTETITRISTPLTTTGSAEMTITPKTGHSGASSQGTFTLDTSSRASWPGTHSAATHRSPHSGMTTPMSRGPEDVSWPSRPSVEKTSPPSSLVSLSAVTSPSPLYSTPSESSHSSPLRVTSLFTPVMMKTTDMLDTSLEPVTTSPPSMNITSDESLATSKATMETEAIQLSENTAVTQMGTISARQEFYSSYPGLPEPSKVTSPVVTSSTIKDIVSTTIPASSEITRIEMESTSTLTPTPRETSTSQEIHSATKPSTVPYKALTSATIEDSMTQVMSSSRGPSPDQSTMSQDISTEVITRLSTSPIKTESTEMTITTQTGSPGATSRGTLTLDTSTTFMSGTHSTASQGFSHSQMTALMSRTPGDVPWLSHPSVEEASSASFSLSSPVMTSSSPVSSTLPDSIHSSSLPVTSLLTSGLVKTTELLGTSSEPETSSPPNLSSTSAEILAITEVTTDTEKLEMTNVVTSGYTHESPSSVLADSVTTKATSSMGITYPTGDTNVLTSTPAFSDTSRIQTKSKLSLTPGLMETSISEETSSATEKSTVLSSVPTGATTEVSRTEAISSSRTSIPGPAQSTMSSDTSMETITRISTPLTRKESTDMAITPKTGPSGATSQGTFTLDSSSTASWPGTHSATTQRFPQSVVTTPMSRGPEDVSWPSPLSVEKNSPPSSLVSSSSVTSPSPLYSTPSGSSHSSPVPVTSLFTSIMMKATDMLDASLEPETTSAPNMNITSDESLAASKATTETEAIHVFENTAASHVETTSATEELYSSSPGFSEPTKVISPVVTSSSIRDNMVSTTMPGSSGITRIEIESMSSLTPGLRETRTSQDITSSTETSTVLYKMPSGATPEVSRTEVMPSSRTSIPGPAQSTMSLDISDEVVTRLSTSPIMTESAEITITTQTGYSLATSQVTLPLGTSMTFLSGTHSTMSQGLSHSEMTNLMSRGPESLSWTSPRFVETTRSSSSLTSLPLTTSLSPVSSTLLDSSPSSPLPVTSLILPGLVKTTEVLDTSSEPKTSSSPNLSSTSVEIPATSEIMTDTEKIHPSSNTAVAKVRTSSSVHESHSSVLADSETTITIPSMGITSAVDDTTVFTSNPAFSETRRIPTEPTFSLTPGFRETSTSEETTSITETSAVLYGVPTSATTEVSMTEIMSSNRIHIPDSDQSTMSPDIITEVITRLSSSSMMSESTQMTITTQKSSPGATAQSTLTLATTTAPLARTHSTVPPRFLHSEMTTLMSRSPENPSWKSSLFVEKTSSSSSLLSLPVTTSPSVSSTLPQSIPSSSFSVTSLLTPGMVKTTDTSTEPGTSLSPNLSGTSVEILAASEVTTDTEKIHPSSSMAVTNVGTTSSGHELYSSVSIHSEPSKATYPVGTPSSMAETSISTSMPANFETTGFEAEPFSHLTSGFRKTNMSLDTSSVTPTNTPSSPGSTHLLQSSKTDFTSSAKTSSPDWPPASQYTEIPVDIITPFNASPSITESTGITSFPESRFTMSVTESTHHLSTDLLPSAETISTGTVMPSLSEAMTSFATTGVPRAISGSGSPFSRTESGPGDATLSTIAESLPSSTPVPFSSSTFTTTDSSTIPALHEITSSSATPYRVDTSLGTESSTTEGRLVMVSTLDTSSQPGRTSSSPILDTRMTESVELGTVTSAYQVPSLSTRLTRTDGIMEHITKIPNEAAHRGTIRPVKGPQTSTSPASPKGLHTGGTKRMETTTTALKTTTTALKTTSRATLTTSVYTPTLGTLTPLNASMQMASTIPTEMMITTPYVFPDVPETTSSLATSLGAETSTALPRTTPSVFNRESETTASLVSRSGAERSPVIQTLDVSSSEPDTTASWVIHPAETIPTVSKTTPNFFHSELDTVSSTATSHGADVSSAIPTNISPSELDALTPLVTISGTDTSTTFPTLTKSPHETETRTTWLTHPAETSSTIPRTIPNFSHHESDATPSIATSPGAETSSAIPIMTVSPGAEDLVTSQVTSSGTDRNMTIPTLTLSPGEPKTIASLVTHPEAQTSSAIPTSTISPAVSRLVTSMVTSLAAKTSTTNRALTNSPGEPATTVSLVTHPAQTSPTVPWTTSIFFHSKSDTTPSMTTSHGAESSSAVPTPTVSTEVPGVVTPLVTSSRAVISTTIPILTLSPGEPETTPSMATSHGEEASSAIPTPTVSPGVPGVVTSLVTSSRAVTSTTIPILTFSLGEPETTPSMATSHGTEAGSAVPTVLPEVPGMVTSLVASSRAVTSTTLPTLTLSPGEPETTPSMATSHGAEASSTVPTVSPEVPGVVTSLVTSSSGVNSTSIPTLILSPGELETTPSMATSHGAEASSAVPTPTVSPGVSGVVTPLVTSSRAVTSTTIPILTLSSSEPETTPSMATSHGVEASSAVLTVSPEVPGMVTSLVTSSRAVTSTTIPTLTISSDEPETTTSLVTHSEAKMISAIPTLAVSPTVQGLVTSLVTSSGSETSAFSNLTVASSQPETIDSWVAHPGTEASSVVPTLTVSTGEPFTNISLVTHPAESSSTLPRTTSRFSHSELDTMPSTVTSPEAESSSAISTTISPGIPGVLTSLVTSSGRDISATFPTVPESPHESEATASWVTHPAVTSTTVPRTTPNYSHSEPDTTPSIATSPGAEATSDFPTITVSPDVPDMVTSQVTSSGTDTSITIPTLTLSSGEPETTTSFITYSETHTSSAIPTLPVSPGASKMLTSLVISSGTDSTTTFPTLTETPYEPETTAIQLIHPAETNTMVPRTTPKFSHSKSDTTLPVAITSPGPEASSAVSTTTISPDMSDLVTSLVPSSGTDTSTTFPTLSETPYEPETTATWLTHPAETSTTVSGTIPNFSHRGSDTAPSMVTSPGVDTRSGVPTTTIPPSIPGVVTSQVTSSATDTSTAIPTLTPSPGEPETTASSATHPGTQTGFTVPIRTVPSSEPDTMASWVTHPPQTSTPVSRTTSSFSHSSPDATPVMATSPRTEASSAVLTTISPGAPEMVTSQITSSGAATSTTVPTLTHSPGMPETTALLSTHPRTETSKTFPASTVFPQVSETTASLTIRPGAETSTALPTQTTSSLFTLLVTGTSRVDLSPTASPGVSAKTAPLSTHPGTETSTMIPTSTLSLGLLETTGLLATSSSAETSTSTLTLTVSPAVSGLSSASITTDKPQTVTSWNTETSPSVTSVGPPEFSRTVTGTTMTLIPSEMPTPPKTSHGEGVSPTTILRTTMVEATNLATTGSSPTVAKTTTTFNTLAGSLFTPLTTPGMSTLASESVTSRTSYNHRSWISTTSSYNRRYWTPATSTPVTSTFSPGISTSSIPSSTAATVPFMVPFTLNFTITNLQYEEDMRHPGSRKFNATERELQGLLKPLFRNSSLEYLYSGCRLASLRPEKDSSATAVDAICTHRPDPEDLGLDRERLYWELSNLTNGIQELGPYTLDRNSLYVNGFTHRSSMPTTSTPGTSTVDVGTSGTPSSSPSPTTAGPLLMPFTLNFTITNLQYEEDMRRTGSRKFNTMESVLQGLLKPLFKNTSVGPLYSGCRLTLLRPEKDGAATGVDAICTHRLDPKSPGLNREQLYWELSKLTNDIEELGPYTLDRNSLYVNGFTHQSSVSTTSTPGTSTVDLRTSGTPSSLSSPTIMAAGPLLVPFTLNFTITNLQYGEDMGHPGSRKFNTTERVLQGLLGPIFKNTSVGPLYSGCRLTSLRSEKDGAATGVDAICIHHLDPKSPGLNRERLYWELSQLTNGIKELGPYTLDRNSLYVNGFTHRTSVPTSSTPGTSTVDLGTSGTPFSLPSPATAGPLLVLFTLNFTITNLKYEEDMHRPGSRKFNTTERVLQTLLGPMFKNTSVGLLYSGCRLTLLRSEKDGAATGVDAICTHRLDPKSPGVDREQLYWELSQLTNGIKELGPYTLDRNSLYVNGFTHWIPVPTSSTPGTSTVDLGSGTPSSLPSPTTAGPLLVPFTLNFTITNLKYEEDMHCPGSRKFNTTERVLQSLLGPMFKNTSVGPLYSGCRLTLLRSEKDGAATGVDAICTHRLDPKSPGVDREQLYWELSQLTNGIKELGPYTLDRNSLYVNGFTHQTSAPNTSTPGTSTVDLGTSGTPSSLPSPTSAGPLLVPFTLNFTITNLQYEEDMHHPGSRKFNTTERVLQGLLGPMFKNTSVGLLYSGCRLTLLRPEKNGAATGMDAICSHRLDPKSPGLNREQLYWELSQLTHGIKELGPYTLDRNSLYVNGFTHRSSVAPTSTPGTSTVDLGTSGTPSSLPSPTTAVPLLVPFTLNFTITNLQYGEDMRHPGSRKFNTTERVLQGLLGPLFKNSSVGPLYSGCRLISLRSEKDGAATGVDAICTHHLNPQSPGLDREQLYWQLSQMTNGIKELGPYTLDRNSLYVNGFTHRSSGLTTSTPWTSTVDLGTSGTPSPVPSPTTTGPLLVPFTLNFTITNLQYEENMGHPGSRKFNITESVLQGLLKPLFKSTSVGPLYSGCRLTLLRPEKDGVATRVDAICTHRPDPKIPGLDRQQLYWELSQLTHSITELGPYTLDRDSLYVNGFTQRSSVPTTSTPGTFTVQPETSETPSSLPGPTATGPVLLPFTLNFTITNLQYEEDMRRPGSRKFNTTERVLQGLLMPLFKNTSVSSLYSGCRLTLLRPEKDGAATRVDAVCTHRPDPKSPGLDRERLYWKLSQLTHGITELGPYTLDRHSLYVNGFTHQSSMTTTRTPDTSTMHLATSRTPASLSGPMTASPLLVLFTINFTITNLRYEENMHHPGSRKFNTTERVLQGLLRPVFKNTSVGPLYSGCRLTLLRPKKDGAATKVDAICTYRPDPKSPGLDREQLYWELSQLTHSITELGPYTLDRDSLYVNGFTQRSSVPTTSIPGTPTVDLGTSGTPVSKPGPSAASPLLVLFTLNFTITNLRYEENMQHPGSRKFNTTERVLQGLLRSLFKSTSVGPLYSGCRLTLLRPEKDGTATGVDAICTHHPDPKSPRLDREQLYWELSQLTHNITELGPYALDNDSLFVNGFTHRSSVSTTSTPGTPTVYLGASKTPASIFGPSAASHLLILFTLNFTITNLRYEENMWPGSRKFNTTERVLQGLLRPLFKNTSVGPLYSGCRLTLLRPEKDGEATGVDAICTHRPDPTGPGLDREQLYLELSQLTHSITELGPYTLDRDSLYVNGFTHRSSVPTTSTGVVSEEPFTLNFTINNLRYMADMGQPGSLKFNITDNVMQHLLSPLFQRSSLGARYTGCRVIALRSVKNGAETRVDLLCTYLQPLSGPGLPIKQVFHELSQQTHGITRLGPYSLDKDSLYLNGYNEPGPDEPPTTPKPATTFLPPLSEATTAMGYHLKTLTLNFTISNLQYSPDMGKGSATFNSTEGVLQHLLRPLFQKSSMGPFYLGCQLISLRPEKDGAATGVDTTCTYHPDPVGPGLDIQQLYWELSQLTHGVTQLGFYVLDRDSLFINGYAPQNLSIRGEYQINFHIVNWNLSNPDPTSSEYITLLRDIQDKVTTLYKGSQLHDTFRFCLVTNLTMDSVLVTVKALFSSNLDPSLVEQVFLDKTLNASFHWLGSTYQLVDIHVTEMESSVYQPTSSSSTQHFYLNFTITNLPYSQDKAQPGTTNYQRNKRNIEDALNQLFRNSSIKSYFSDCQVSTFRSVPNRHHTGVDSLCNFSPLARRVDRVAIYEEFLRMTRNGTQLQNFTLDRSSVLVDGYSPNRNEPLTGNSDLPFWAVILIGLAGLLGVITCLICGVLVTTRRRKKEGEYNVQQQCPGYYQSHLDLEDLQ 2 14453 14475 PLTGNSDLPF WAVILIGLAGLLGVITCLICGVL VTTRRRKKEG +Q5SSG8 MKMQKGNVLLMFGLLLHLEAATNSNETSTSANTGSSVISSGASTATNSGSSVTSSGVSTATISGSSVTSNGVSIVTNSEFHTTSSGISTATNSEFSTVSSGISIATNSESSTTSSGASTATNSESSTPSSGASTATNSDSSTTSSGASTATNSDSSTTSSEASTATNSESSTTSSGASTATNSESSTVSSRASTATNSESSTTSSGASTATNSESRTTSNGAGTATNSESSTTSSGASTATNSESSTPSSGAGTATNSESSTTSSGAGTATNSESSTVSSGISTVTNSESSTPSSGANTATNSESSTTSSGANTATNSDSSTTSSGASTATNSESSTTSSGASTATNSESSTTSSGASTATNSGSSTTSSGTSTATNSESSTVSSGASTATTSESSTTSSGASTATNSESSTVSSGASTATNSESSTTSSGANTATNSGSSVTSAGSGTAALTGMHTTSHSASTAVSEAKPGGSLVPWEIFLITLVSVVAAVGLFAGLFFCVRNSLSLRNTFNTAVYHPHGLNHGLGPGPGGNHGAPHRPRWSPNWFWRRPVSSIAMEMSGRNSGP 2 480 502 KPGGSLVPWE IFLITLVSVVAAVGLFAGLFFCV RNSLSLRNTF +Q04900 MSRLSRSLLWAATCLGVLCVLSADKNTTQHPNVTTLAPISNVTSAPVTSLPLVTTPAPETCEGRNSCVSCFNVSVVNTTCFWIECKDESYCSHNSTVSDCQVGNTTDFCSVSTATPVPTANSTAKPTVQPSPSTTSKTVTTSGTTNNTVTPTSQPVRKSTFDAASFIGGIVLVLGVQAVIFFLYKFCKSKERNYHTL 2 164 186 QPVRKSTFDA ASFIGGIVLVLGVQAVIFFLYKF CKSKERNYHT +Q9ULC0 MELLQVTILFLLPSICSSNSTGVLEAANNSLVVTTTKPSITTPNTESLQKNVVTPTTGTTPKGTITNELLKMSLMSTATFLTSKDEGLKATTTDVRKNDSIISNVTVTSVTLPNAVSTLQSSKPKTETQSSIKTTEIPGSVLQPDASPSKTGTLTSIPVTIPENTSQSQVIGTEGGKNASTSATSRSYSSIILPVVIALIVITLSVFVLVGLYRMCWKADPGTPENGNDQPQSDKESVKLLTVKTISHESGEHSAQGKTKN 2 191 213 TSATSRSYSS IILPVVIALIVITLSVFVLVGLY RMCWKADPGT +Q3MIW9 MAQPVHSLCSAFGLQCCLLFLLASWGAGATTFQEYQKTGELSTSDHIFPLTPGLVYSIPFDHIVLHSGQRPPELPKSTEIHEQKRHCNTTRHSKPTDKPTGNSKTIDHKSSTDNHEAPPTSEENSSNQGKDPMIRNQRSVDPADSTTTHKESAGKKHITPAPKSKINCRKSTTGKSTVTRKSDKTGRPLEKSMSTLDKTSTSSHKTTTSFHNSGNSQTKQKSTSFPEKITAASKTTYKTTGTPEESEKTEDSRTTVASDKLLTKTTKNIQETISANELTQSLAEPTEHGGRTANENNTPSPAEPTENRERTANENKKTICTKGKNTPVPEKPTENLGNTTLTTETIKAPVKSTENPEKTAAVTKTIKPSVKVTGDKSLTTTSSHLNKTEVTHQVPTGSFTLITSRTKLSSITSEATGNESHPYLNKDGSQKGIHAGQMGENDSFPAWAIVIVVLVAVILLLVFLGLIFLVSYMMRTRRTLTQNTQYNDAEDEGGPNSYPVYLMEQQNLGMGQIPSPR 2 447 469 QMGENDSFPA WAIVIVVLVAVILLLVFLGLIFL VSYMMRTRRT +Q9BRK3 MALPSRILLWKLVLLQSSAVLLHSGSSVPAAAGSSVVSESAVSWEAGARAVLRCQSPRMVWTQDRLHDRQRVLHWDLRGPGGGPARRLLDLYSAGEQRVYEARDRGRLELSASAFDDGNFSLLIRAVEETDAGLYTCNLHHHYCHLYESLAVRLEVTDGPPATPAYWDGEKEVLAVARGAPALLTCVNRGHVWTDRHVEEAQQVVHWDRQPPGVPHDRADRLLDLYASGERRAYGPLFLRDRVAVGADAFERGDFSLRIEPLEVADEGTYSCHLHHHYCGLHERRVFHLTVAEPHAEPPPRGSPGNGSSHSGAPGPDPTLARGHNVINVIVPESRAHFFQQLGYVLATLLLFILLLVTVLLAARRRRGGYEYSDQKSGKSKGKDVNLAEFAVAAGDQMLYRSEDIQLDYKNNILKERAELAHSPLPAKYIDLDKGFRKENCK 2 341 363 VPESRAHFFQ QLGYVLATLLLFILLLVTVLLAA RRRRGGYEYS +P25189 MAPGAPSSSPSPILAVLLFSSLVLSPAQAIVVYTDREVHGAVGSRVTLHCSFWSSEWVSDDISFTWRYQPEGGRDAISIFHYAKGQPYIDEVGTFKERIQWVGDPRWKDGSIVIHNLDYSDNGTFTCDVKNPPDIVGKTSQVTLYVFEKVPTRYGVVLGAVIGGVLGVVLLLLLLFYVVRYCWLRRQAALQRRLSAMEKGKLHKPGKDASKRGRQTPVLYAMLDHSRSTKAVSEKKAKGLGESRKDKK 2 157 179 FEKVPTRYGV VLGAVIGGVLGVVLLLLLLFYVV RYCWLRRQAA +Q9UK23 MATSTGRWLLLRLALFGFLWEASGGLDSGASRDDDLLLPYPRARARLPRDCTRVRAGNREHESWPPPPATPGAGGLAVRTFVSHFRDRAVAGHLTRAVEPLRTFSVLEPGGPGGCAARRRATVEETARAADCRVAQNGGFFRMNSGECLGNVVSDERRVSSSGGLQNAQFGIRRDGTLVTGYLSEEEVLDTENPFVQLLSGVVWLIRNGSIYINESQATECDETQETGSFSKFVNVISARTAIGHDRKGQLVLFHADGQTEQRGINLWEMAEFLLKQDVVNAINLDGGGSATFVLNGTLASYPSDHCQDNMWRCPRQVSTVVCVHEPRCQPPDCHGHGTCVDGHCQCTGHFWRGPGCDELDCGPSNCSQHGLCTETGCRCDAGWTGSNCSEECPLGWHGPGCQRPCKCEHHCPCDPKTGNCSVSRVKQCLQPPEATLRAGELSFFTRTAWLALTLALAFLLLISTAANLSLLLSRAERNRRLHGDYAYHPLQEMNGEPLAAEKEQPGGAHNPFKD 2 450 472 GELSFFTRTA WLALTLALAFLLLISTAANLSLL LSRAERNRRL +P13591 MLQTKDLIWTLFFLGTAVSLQVDIVPSQGEISVGESKFFLCQVAGDAKDKDISWFSPNGEKLTPNQQRISVVWNDDSSSTLTIYNANIDDAGIYKCVVTGEDGSESEATVNVKIFQKLMFKNAPTPQEFREGEDAVIVCDVVSSLPPTIIWKHKGRDVILKKDVRFIVLSNNYLQIRGIKKTDEGTYRCEGRILARGEINFKDIQVIVNVPPTIQARQNIVNATANLGQSVTLVCDAEGFPEPTMSWTKDGEQIEQEEDDEKYIFSDDSSQLTIKKVDKNDEAEYICIAENKAGEQDATIHLKVFAKPKITYVENQTAMELEEQVTLTCEASGDPIPSITWRTSTRNISSEEKASWTRPEKQETLDGHMVVRSHARVSSLTLKSIQYTDAGEYICTASNTIGQDSQSMYLEVQYAPKLQGPVAVYTWEGNQVNITCEVFAYPSATISWFRDGQLLPSSNYSNIKIYNTPSASYLEVTPDSENDFGNYNCTAVNRIGQESLEFILVQADTPSSPSIDQVEPYSSTAQVQFDEPEATGGVPILKYKAEWRAVGEEVWHSKWYDAKEASMEGIVTIVGLKPETTYAVRLAALNGKGLGEISAASEFKTQPVQGEPSAPKLEGQMGEDGNSIKVNLIKQDDGGSPIRHYLVRYRALSSEWKPEIRLPSGSDHVMLKSLDWNAEYEVYVVAENQQGKSKAAHFVFRTSAQPTAIPANGSPTSGLSTGAIVGILIVIFVLLLVVVDITCYFLNKCGLFMCIAVNLCGKAGPGAKGKDMEEGKAAFSKDESKEPIVEVRTEEERTPNHDGGKHTEPNETTPLTEPEKGPVEAKPECQETETKPAPAEVKTVPNDATQTKENESKA 2 724 746 SPTSGLSTGA IVGILIVIFVLLLVVVDITCYFL NKCGLFMCIA +O35136 MSLLLSFYLLGLLVRSGQALLQVTISLSKVELSVGESKFFTCTAIGEPESIDWYNPQGEKIISTQRVMLQKEGVRSRLTIYNANIEDAGIYRCQATDAKGQTQEATVVLEIYQKLTFREVVSPQEFKQGEDAEVVCRVSSSPAPAVSWLYHNEEVTTIPDNRFAVLANNNLQILNINKSDEGIYRCEGRVEARGEIDFRDIIVIVNVPPAIMMPQKSFNATAERGEEMTLTCKASGSPDPTISWFRNGKLIEENEKYILKGSNTELTVRNIINKDGGSYVCKATNKAGEDQKQAFLQVFVQPHILQLKNETTSENGHVTLVCEAEGEPVPEITWKRAIDGVMFSEGDKSPDGRIEVKGQHGRSSLHIRDVKLSDSGRYDCEAASRIGGHQRSMHLDIEYAPKFVSNQTMYYSWEGNPINISCDVTANPPASIHWRREKLLLPAKNTTHLKTHSVGRKMILEIAPTSDNDFGRYNCTATNRIGTRFQEYILELADVPSSPHGVKIIELSQTTAKISFNKPESHGGVPIHHYQVDVKEVASETWKIVRSHGVQTMVVLSSLEPNTTYEIRVAAVNGKGQGDYSKIEIFQTLPVREPSPPSIHGQPSSGKSFKISITKQDDGGAPILEYIVKYRSKDKEDQWLEKKVQGNKDHIILEHLQWTMGYEVQITAANRLGYSEPTVYEFSMPPKPNIIKDTLFNGLGLGAIIGLGVAALLLILVVTDVSCFFIRQCGLLMCITRRMCGKKSGSSGKSKELEEGKAAYLKDGSKEPIVEMRTEDERITNHEDGSPVNEPNETTPLTEPEKLPLKEENGKEVLNAETIEIKVSNDIIQSKEDDIKA 2 696 718 PKPNIIKDTL FNGLGLGAIIGLGVAALLLILVV TDVSCFFIRQ +Q5T1S8 MTTATPLGDTTFFSLNMTTRGEDFLYKSSGAIVAAVVVVVIIIFTVVLILLKMYNRKMRTRRELEPKGPKPTAPSAVGPNSNGSQHPATVTFSPVDVQVETR 2 29 51 TRGEDFLYKS SGAIVAAVVVVVIIIFTVVLILL KMYNRKMRTR +O76036 MSSTLPALLCVGLCLSQRISAQQQTLPKPFIWAEPHFMVPKEKQVTICCQGNYGAVEYQLHFEGSLFAVDRPKPPERINKVKFYIPDMNSRMAGQYSCIYRVGELWSEPSNLLDLVVTEMYDTPTLSVHPGPEVISGEKVTFYCRLDTATSMFLLLKEGRSSHVQRGYGKVQAEFPLGPVTTAHRGTYRCFGSYNNHAWSFPSEPVKLLVTGDIENTSLAPEDPTFPADTWGTYLLTTETGLQKDHALWDHTAQNLLRMGLAFLVLVALVWFLVEDWLSRKRTRERASRASTWEGRRRLNTQTL 2 256 274 HALWDHTAQN LLRMGLAFLVLVALVWFLV EDWLSRKRTR +O95944 MAWRALHPLLLLLLLFPGSQAQSKAQVLQSVAGQTLTVRCQYPPTGSLYEKKGWCKEASALVCIRLVTSSKPRTMAWTSRFTIWDDPDAGFFTVTMTDLREEDSGHYWCRIYRPSDNSVSKSVRFYLVVSPASASTQTSWTPRDLVSSQTQTQSCVPPTAGARQAPESPSTIPVPSQPQNSTLRPGPAAPIALVPVFCGLLVAKSLVLSALLVWWGDIWWKTMMELRSLDTQKATCHLQQVTDLPWTSVSSPVEREILYHTVARTKISDDDDEHTL 2 193 215 LRPGPAAPIA LVPVFCGLLVAKSLVLSALLVWW GDIWWKTMME +Q96NY8 MPLSLGAEMWGPEAWLLLLLLLASFTGRCPAGELETSDVVTVVLGQDAKLPCFYRGDSGEQVGQVAWARVDAGEGAQELALLHSKYGLHVSPAYEGRVEQPPPPRNPLDGSVLLRNAVQADEGEYECRVSTFPAGSFQARLRLRVLVPPLPSLNPGPALEEGQGLTLAASCTAEGSPAPSVTWDTEVKGTTSSRSFKHSRSAAVTSEFHLVPSRSMNGQPLTCVVSHPGLLQDQRITHILHVSFLAEASVRGLEDQNLWHIGREGAMLKCLSEGQPPPSYNWTRLDGPLPSGVRVDGDTLGFPPLTTEHSGIYVCHVSNEFSSRDSQVTVDVLDPQEDSGKQVDLVSASVVVVGVIAALLFCLLVVVVVLMSRYHRRKAQQMTQKYEEELTLTRENSIRRLHSHHTDPRSQPEESVGLRAEGHPDSLKDNSSCSVMSEEPEGRSYSTLTTVREIETQTELLSPGSGRAEEEEDQDEGIKQAMNHFVQENGTLRAKPTGNGIYINGRGHLV 2 350 372 GKQVDLVSAS VVVVGVIAALLFCLLVVVVVLMS RYHRRKAQQM +Q8TDF5 MIHGRSVLHIVASLIILHLSGATKKGTEKQTTSETQKSVQCGTWTKHAEGGIFTSPNYPSKYPPDRECIYIIEAAPRQCIELYFDEKYSIEPSWECKFDHIEVRDGPFGFSPIIGRFCGQQNPPVIKSSGRFLWIKFFADGELESMGFSARYNFTPDPDFKDLGALKPLPACEFEMGGSEGIVESIQIMKEGKATASEAVDCKWYIRAPPRSKIYLRFLDYEMQNSNECKRNFVAVYDGSSSVEDLKAKFCSTVANDVMLRTGLGVIRMWADEGSRNSRFQMLFTSFQEPPCEGNTFFCHSNMCINNTLVCNGLQNCVYPWDENHCKEKRKTSLLDQLTNTSGTVIGVTSCIVIILIIISVIVQIKQPRKKYVQRKSDFDQTVFQEVFEPPHYELCTLRGTGATADFADVADDFENYHKLRRSSSKCIHDHHCGSQLSSTKGSRSNLSTRDASILTEMPTQPGKPLIPPMNRRNILVMKHSYSQDAADACDIDEIEEVPTTSHRLSRHDKAVQRFCLIGSLSKHESEYNTTRV 2 343 365 SLLDQLTNTS GTVIGVTSCIVIILIIISVIVQI KQPRKKYVQR +Q8NET5 MENQPVRWRALPGLPRPPGLPAAPWLLLGVLLLPGTLRLAGGQSVTHTGLPIMASLANTAISFSCRITYPYTPQFKVFTVSYFHEDLQGQRSPKKPTNCHPGLGTENQSHTLDCQVTLVLPGASATGTYYCSVHWPHSTVRGSGTFILVRDAGYREPPQSPQKLLLFGFTGLLSVLSVVGTALLLWNKKRMRGPGKDPTRKCPDPRSASSPKQHPSESVYTALQRRETEVYACIENEDGSSPTAKQSPLSQERPHRFEDDGELNLVYENL 2 164 186 YREPPQSPQK LLLFGFTGLLSVLSVVGTALLLW NKKRMRGPGK +Q92542 MATAGGGSGADPGSRGLLRLLSFCVLLAGLCRGNSVERKIYIPLNKTAPCVRLLNATHQIGCQSSISGDTGVIHVVEKEEDLQWVLTDGPNPPYMVLLESKHFTRDLMEKLKGRTSRIAGLAVSLTKPSPASGFSPSVQCPNDGFGVYSNSYGPEFAHCREIQWNSLGNGLAYEDFSFPIFLLEDENETKVIKQCYQDHNLSQNGSAPTFPLCAMQLFSHMHAVISTATCMRRSSIQSTFSINPEIVCDPLSDYNVWSMLKPINTTGTLKPDDRVVVAATRLDSRSFFWNVAPGAESAVASFVTQLAAAEALQKAPDVTTLPRNVMFVFFQGETFDYIGSSRMVYDMEKGKFPVQLENVDSFVELGQVALRTSLELWMHTDPVSQKNESVRNQVEDLLATLEKSGAGVPAVILRRPNQSQPLPPSSLQRFLRARNISGVVLADHSGAFHNKYYQSIYDTAENINVSYPEWLSPEEDLNFVTDTAKALADVATVLGRALYELAGGTNFSDTVQADPQTVTRLLYGFLIKANNSWFQSILRQDLRSYLGDGPLQHYIAVSSPTNTTYVVQYALANLTGTVVNLTREQCQDPSKVPSENKDLYEYSWVQGPLHSNETDRLPRCVRSTARLARALSPAFELSQWSSTEYSTWTESRWKDIRARIFLIASKELELITLTVGFGILIFSLIVTYCINAKADVLFIAPREPGAVSY 2 670 692 IFLIASKELE LITLTVGFGILIFSLIVTYCINA KADVLFIAPR +O60500 MALGTTLRASLLLLGLLTEGLAQLAIPASVPRGFWALPENLTVVEGASVELRCGVSTPGSAVQWAKDGLLLGPDPRIPGFPRYRLEGDPARGEFHLHIEACDLSDDAEYECQVGRSEMGPELVSPRVILSILVPPKLLLLTPEAGTMVTWVAGQEYVVNCVSGDAKPAPDITILLSGQTISDISANVNEGSQQKLFTVEATARVTPRSSDNRQLLVCEASSPALEAPIKASFTVNVLFPPGPPVIEWPGLDEGHVRAGQSLELPCVARGGNPLATLQWLKNGQPVSTAWGTEHTQAVARSVLVMTVRPEDHGAQLSCEAHNSVSAGTQEHGITLQVTFPPSAIIILGSASQTENKNVTLSCVSKSSRPRVLLRWWLGWRQLLPMEETVMDGLHGGHISMSNLTFLARREDNGLTLTCEAFSEAFTKETFKKSLILNVKYPAQKLWIEGPPEGQKLRAGTRVRLVCLAIGGNPEPSLMWYKDSRTVTESRLPQESRRVHLGSVEKSGSTFSRELVLVTGPSDNQAKFTCKAGQLSASTQLAVQFPPTNVTILANASALRPGDALNLTCVSVSSNPPVNLSWDKEGERLEGVAAPPRRAPFKGSAAARSVLLQVSSRDHGQRVTCRAHSAELRETVSSFYRLNVLYRPEFLGEQVLVVTAVEQGEALLPVSVSANPAPEAFNWTFRGYRLSPAGGPRHRILSSGALHLWNVTRADDGLYQLHCQNSEGTAEARLRLDVHYAPTIRALQDPTEVNVGGSVDIVCTVDANPILPGMFNWERLGEDEEDQSLDDMEKISRGPTGRLRIHHAKLAQAGAYQCIVDNGVAPPARRLLRLVVRFAPQVEHPTPLTKVAAAGDSTSSATLHCRARGVPNIVFTWTKNGVPLDLQDPRYTEHTYHQGGVHSSLLTIANVSAAQDYALFTCTATNALGSDQTNIQLVSISRPDPPSGLKVVSLTPHSVGLEWKPGFDGGLPQRFCIRYEALGTPGFHYVDVVPPQATTFTLTGLQPSTRYRVWLLASNALGDSGLADKGTQLPITTPGLHQPSGEPEDQLPTEPPSGPSGLPLLPVLFALGGLLLLSNASCVGGVLWQRRLRRLAEGISEKTEAGSEEDRVRNEYEESQWTGERDTQSSTVSTTEAEPYYRSLRDFSPQLPPTQEEVSYSRGFTGEDEDMAFPGHLYDEVERTYPPSGAWGPLYDEVQMGPWDLHWPEDTYQDPRGIYDQVAGDLDTLEPDSLPFELRGHLV 2 1064 1086 PSGPSGLPLL PVLFALGGLLLLSNASCVGGVLW QRRLRRLAEG +Q68D85 MTWRAAASTCAALLILLWALTTEGDLKVEMMAGGTQITPLNDNVTIFCNIFYSQPLNITSMGITWFWKSLTFDKEVKVFEFFGDHQEAFRPGAIVSPWRLKSGDASLRLPGIQLEEAGEYRCEVVVTPLKAQGTVQLEVVASPASRLLLDQVGMKENEDKYMCESSGFYPEAINITWEKQTQKFPHPIEISEDVITGPTIKNMDGTFNVTSCLKLNSSQEDPGTVYQCVVRHASLHTPLRSNFTLTAARHSLSETEKTDNFSIHWWPISFIGVGLVLLIVLIPWKKICNKSSSAYTPLKCILKHWNSFDTQTLKKEHLIFFCTRAWPSYQLQDGEAWPPEGSVNINTIQQLDVFCRQEGKWSEVPYVQAFFALRDNPDLCQCCRIDPALLTVTSGKSIDDNSTKSEKQTPREHSDAVPDAPILPVSPIWEPPPATTSTTPVLSSQPPTLLLPLQ 2 262 284 LSETEKTDNF SIHWWPISFIGVGLVLLIVLIPW KKICNKSSSA +O35181 MSEGAAGASPPGAASAAAASAEEGTAAAAAAAAAGGGPDGGGEGAAEPPRELRCSDCIVWNRQQTWLCVVPLFIGFIGLGLSLMLLKWIVVGSVKEYVPTDLVDSKGMGQDPFFLSKPSSFPKAMETTTTTTSTTSPATPSAGGAASSRTPNRISTRLTTITRAPTRFPGHRVPIRASPRSTTARNTAAPPTVLSTTAPFFSSSTPGSRPPMPGAPSTQAMPSWPTAAYATSSYLHDSTPSWTLSPFQDAAAASSSSPSSTSSTTTTPETSTSPKFHTTTYSTERSEHFKPCRDKDLAYCLNDGECFVIETLTGSHKHCRCKEGYQGVRCDQFLPKTDSILSDPTDHLGIEFMESEDVYQRQVLSISCIIFGIVIVGMFCAAFYFKSKKQAKQIQEHLKESQNGKNYSLKASSTKSESLMKSHVHLQNYSKADRHPVTALEKIMESSFSAPQSFPEVTSPDRGSQPIKHHSPGQRSGMLHRNTFRRAPPSPRSRLGGIVGPAYQQLEESRIPDQDTIPCQGIEVRKTISHLPIQLWCVERPLDLKYVSNGLRTQQNASINMQLPSRETNPYFNSLDQKDLVGYLSPRANSVPIIPSMGLEETCMQMPGISDVKSIKWCKNSYSADIVNASMPVSDCLLEEQQEVKILLETVQEQIRILTDARRSEDFELASMETEDSASENTAFLPLSPTAKSEREAQFVLRNEIQRDSVLTK 2 363 385 MESEDVYQRQ VLSISCIIFGIVIVGMFCAAFYF KSKKQAKQIQ +Q8WWG1 MPTDHEEPCGPSHKSFCLNGGLCYVIPTIPSPFCRCVENYTGARCEEVFLPGSSIQTKSNLFEAFVALAVLVTLIIGAFYFLCRKGHFQRASSVQYDINLVETSSTSAHHSHEQH 2 61 83 PGSSIQTKSN LFEAFVALAVLVTLIIGAFYFLC RKGHFQRASS +O14786 MERGLPLLCAVLALVLAPAGAFRNDKCGDTIKIESPGYLTSPGYPHSYHPSEKCEWLIQAPDPYQRIMINFNPHFDLEDRDCKYDYVEVFDGENENGHFRGKFCGKIAPPPVVSSGPFLFIKFVSDYETHGAGFSIRYEIFKRGPECSQNYTTPSGVIKSPGFPEKYPNSLECTYIVFVPKMSEIILEFESFDLEPDSNPPGGMFCRYDRLEIWDGFPDVGPHIGRYCGQKTPGRIRSSSGILSMVFYTDSAIAKEGFSANYSVLQSSVSEDFKCMEALGMESGEIHSDQITASSQYSTNWSAERSRLNYPENGWTPGEDSYREWIQVDLGLLRFVTAVGTQGAISKETKKKYYVKTYKIDVSSNGEDWITIKEGNKPVLFQGNTNPTDVVVAVFPKPLITRFVRIKPATWETGISMRFEVYGCKITDYPCSGMLGMVSGLISDSQITSSNQGDRNWMPENIRLVTSRSGWALPPAPHSYINEWLQIDLGEEKIVRGIIIQGGKHRENKVFMRKFKIGYSNNGSDWKMIMDDSKRKAKSFEGNNNYDTPELRTFPALSTRFIRIYPERATHGGLGLRMELLGCEVEAPTAGPTTPNGNLVDECDDDQANCHSGTGDDFQLTGGTTVLATEKPTVIDSTIQSEFPTYGFNCEFGWGSHKTFCHWEHDNHVQLKWSVLTSKTGPIQDHTGDGNFIYSQADENQKGKVARLVSPVVYSQNSAHCMTFWYHMSGSHVGTLRVKLRYQKPEEYDQLVWMAIGHQGDHWKEGRVLLHKSLKLYQVIFEGEIGKGNLGGIAVDDISINNHISQEDCAKPADLDKKNPEIKIDETGSTPGYEGEGEGDKNISRKPGNVLKTLDPILITIIAMSALGVLLGAVCGVVLYCACWHNGMSERNLSALENYNFELVDGVKLKKDKLNTQSTYSEA 2 857 879 PGNVLKTLDP ILITIIAMSALGVLLGAVCGVVL YCACWHNGMS +Q96PE5 MSFSLNFTLPANTTSSPVTGGKETDCGPSLGLAAGIPLLVATALLVALLFTLIHRRRSSIEAMEESDRPCEISEIDDNPKISENPRRSPTHEKNTMGAQEAHIYVKTVAGSEEPVHDRYRPTIEMERRRGLWWLVPRLSLE 2 31 53 GKETDCGPSL GLAAGIPLLVATALLVALLFTLI HRRRSSIEAM +Q99650 MALFAVFQTTFFLTLLSLRTYQSEVLAERLPLTPVSLKVSTNSTRQSLHLQWTVHNLPYHQELKMVFQIQISRIETSNVIWVGNYSTTVKWNQVLHWSWESELPLECATHFVRIKSLVDDAKFPEPNFWSNWSSWEEVSVQDSTGQDILFVFPKDKLVEEGTNVTICYVSRNIQNNVSCYLEGKQIHGEQLDPHVTAFNLNSVPFIRNKGTNIYCEASQGNVSEGMKGIVLFVSKVLEEPKDFSCETEDFKTLHCTWDPGTDTALGWSKQPSQSYTLFESFSGEKKLCTHKNWCNWQITQDSQETYNFTLIAENYLRKRSVNILFNLTHRVYLMNPFSVNFENVNATNAIMTWKVHSIRNNFTYLCQIELHGEGKMMQYNVSIKVNGEYFLSELEPATEYMARVRCADASHFWKWSEWSGQNFTTLEAAPSEAPDVWRIVSLEPGNHTVTLFWKPLSKLHANGKILFYNVVVENLDKPSSSELHSIPAPANSTKLILDRCSYQICVIANNSVGASPASVIVISADPENKEVEEERIAGTEGGFSLSWKPQPGDVIGYVVDWCDHTQDVLGDFQWKNVGPNTTSTVISTDAFRPGVRYDFRIYGLSTKRIACLLEKKTGYSQELAPSDNPHVLVDTLTSHSFTLSWKDYSTESQPGFIQGYHVYLKSKARQCHPRFEKAVLSDGSECCKYKIDNPEEKALIVDNLKPESFYEFFITPFTSAGEGPSATFTKVTTPDEHSSMLIHILLPMVFCVLLIMVMCYLKSQWIKETCYPDIPDPYKSSILSLIKFKENPHLIIMNVSDCIPDAIEVVSKPEGTKIQFLGTRKSLTETELTKPNYLYLLPTEKNHSGPGPCICFENLTYNQAASDSGSCGHVPVSPKAPSMLGLMTSPENVLKALEKNYMNSLGEIPAGETSLNYVSQLASPMFGDKDSLPTNPVEAPHCSEYKMQMAVSLRLALPPPTENSSLSSITLLDPGEHYC 2 739 761 TKVTTPDEHS SMLIHILLPMVFCVLLIMVMCYL KSQWIKETCY +Q86WC4 MEPGPTAAQRRCSLPPWLPLGLLLWSGLALGALPFGSSPHRVFHDLLSEQQLLEVEDLSLSLLQGGGLGPLSLPPDLPDLDPECRELLLDFANSSAELTGCLVRSARPVRLCQTCYPLFQQVVSKMDNISRAAGNTSESQSCARSLLMADRMQIVVILSEFFNTTWQEANCANCLTNNSEELSNSTVYFLNLFNHTLTCFEHNLQGNAHSLLQTKNYSEVCKNCREAYKTLSSLYSEMQKMNELENKAEPGTHLCIDVEDAMNITRKLWSRTFNCSVPCSDTVPVIAVSVFILFLPVVFYLSSFLHSEQKKRKLILPKRLKSSTSFANIQENSN 2 283 305 FNCSVPCSDT VPVIAVSVFILFLPVVFYLSSFL HSEQKKRKLI +Q96FE7 MLLAWVQAFLVSNMLLAEAYGSGGCFWDNGHLYREDQTSPAPGLRCLNWLDAQSGLASAPVSGAGNHSYCRNPDEDPRGPWCYVSGEAGVPEKRPCEDLRCPETTSQALPAFTTEIQEASEGPGADEVQVFAPANALPARSEAAAVQPVIGISQRVRMNSKEKKDLGTLGYVLGITMMVIIIAIGAGIILGYSYKRGKDLKEQHDQKVCEREMQRITLPLSAFTNPTCEIVDEKTVVVHTSQTPVDPQEGTTPLMGQAGTPGA 2 169 191 NSKEKKDLGT LGYVLGITMMVIIIAIGAGIILG YSYKRGKDLK +Q8NBR0 MAPPPPSPQLLLLAALARLLGPSEVMAGPAEEAGAHCPESLWPLPPQVSPRVTYTRVSPGQAEDVTFLYHPCAHPWLKLQLALLAYACMANPSLTPDFSLTQDRPLVLTAWGLALEMAWVEPAWAAHWLMRRRRRKQRKKKAWIYCESLSGPAPSEPTPGRGRLCRRGCVQALALAFALRSWRPPGTEVTSQGPRQPSSSGAKRRRLRAALGPQPTRSALRFPSASPGSLKAKQSMAGIPGRESNAPSVPTVSLLPGAPGGNASSRTEAQVPNGQGSPGGCVCSSQASPAPRAAAPPRAARGPTPRTEEAAWAAMALTFLLVLLTLATLCTRLHRNFRRGESIYWGPTADSQDTVAAVLKRRLLQPSRRVKRSRRRPLLPPTPDSGPEGESSE 2 310 329 ARGPTPRTEE AAWAAMALTFLLVLLTLATL CTRLHRNFRR +Q6UWI2 MVYKTLFALCILTAGWRVQSLPTSAPLSVSLPTNIVPPTTIWTSSPQNTDADTASPSNGTHNNSVLPVTASAPTSLLPKNISIESREEEITSPGSNWEGTNTDPSPSGFSSTSGGVHLTTTLEEHSSGTPEAGVAATLSQSAAEPPTLISPQAPASSPSSLSTSPPEVFSASVTTNHSSTVTSTQPTGAPTAPESPTEESSSDHTPTSHATAEPVPQEKTPPTTVSGKVMCELIDMETTTTFPRVIMQEVEHALSSGSIAAITVTVIAVVLLVFGVAAYLKIRHSSYGRLLDDHDYGSWGNYNNPLYDDS 2 258 280 QEVEHALSSG SIAAITVTVIAVVLLVFGVAAYL KIRHSSYGRL +Q923D3 MVCKVLIALCIFTAGLRVQGSPTVPLPVSLMTKSSAPVATWTTSAPHTARATTPVASATHNASVLRTTAASLTSQLPTDHREEAVTSPPLKRDVNSTDSSPAGFPSTSSDGHLAPTPEEHSLGSPEATVPATGSQSPMLLSSQAPTSATTSPATSLSESLSASVTSSHNSTVANIQPTEAPMAPASPTEEHSSSHTPTSHVTAEPVPKEKSPQDTEPGKVICESETTTPFLIMQEVENALSSGSIAAITVTVIAVVLLVFGGAAYLKIRHSSYGRLLDDHDYGSWGNYNNPLYDDS 2 244 266 QEVENALSSG SIAAITVTVIAVVLLVFGGAAYL KIRHSSYGRL +Q9P2E7 MIVLLLFALLWMVEGVFSQLHYTVQEEQEHGTFVGNIAEDLGLDITKLSARGFQTVPNSRTPYLDLNLETGVLYVNEKIDREQICKQSPSCVLHLEVFLENPLELFQVEIEVLDINDNPPSFPEPDLTVEISESATPGTRFPLESAFDPDVGTNSLRDYEITPNSYFSLDVQTQGDGNRFAELVLEKPLDREQQAVHRYVLTAVDGGGGGGVGEGGGGGGGAGLPPQQQRTGTALLTIRVLDSNDNVPAFDQPVYTVSLPENSPPGTLVIQLNATDPDEGQNGEVVYSFSSHISPRARELFGLSPRTGRLEVSGELDYEESPVYQVYVQAKDLGPNAVPAHCKVLVRVLDANDNAPEISFSTVKEAVSEGAAPGTVVALFSVTDRDSEENGQVQCELLGDVPFRLKSSFKNYYTIVTEAPLDREAGDSYTLTVVARDRGEPALSTSKSIQVQVSDVNDNAPRFSQPVYDVYVTENNVPGAYIYAVSATDRDEGANAQLAYSILECQIQGMSVFTYVSINSENGYLYALRSFDYEQLKDFSFQVEARDAGSPQALAGNATVNILIVDQNDNAPAIVAPLPGRNGTPAREVLPRSAEPGYLLTRVAAVDADDGENARLTYSIVRGNEMNLFRMDWRTGELRTARRVPAKRDPQRPYELVIEVRDHGQPPLSSTATLVVQLVDGAVEPQGGGGSGGGGSGEHQRPSRSGGGETSLDLTLILIIALGSVSFIFLLAMIVLAVRCQKEKKLNIYTCLASDCCLCCCCCGGGGSTCCGRQARARKKKLSKSDIMLVQSSNVPSNPAQVPIEESGGFGSHHHNQNYCYQVCLTPESAKTDLMFLKPCSPSRSTDTEHNPCGAIVTGYTDQQPDIISNGSILSNETKHQRAELSYLVDRPRRVNSSAFQEADIVSSKDSGHGDSEQGDSDHDATNRAQSAGMDLFSNCTEECKALGHSDRCWMPSFVPSDGRQAADYRSNLHVPGMDSVPDTEVFETPEAQPGAERSFSTFGKEKALHSTLERKELDGLLTNTRAPYKPPYLTRKRIC 2 716 738 GGGETSLDLT LILIIALGSVSFIFLLAMIVLAV RCQKEKKLNI +Q96QU1 MFRQFYLWTCLASGIILGSLFEICLGQYDDDCKLARGGPPATIVAIDEESRNGTILVDNMLIKGTAGGPDPTIELSLKDNVDYWVLMDPVKQMLFLNSTGRVLDRDPPMNIHSIVVQVQCINKKVGTIIYHEVRIVVRDRNDNSPTFKHESYYATVNELTPVGTTIFTGFSGDNGATDIDDGPNGQIEYVIQYNPDDPTSNDTFEIPLMLTGNIVLRKRLNYEDKTRYFVIIQANDRAQNLNERRTTTTTLTVDVLDGDDLGPMFLPCVLVPNTRDCRPLTYQAAIPELRTPEELNPIIVTPPIQAIDQDRNIQPPSDRPGILYSILVGTPEDYPRFFHMHPRTAELSLLEPVNRDFHQKFDLVIKAEQDNGHPLPAFAGLHIEILDENNQSPYFTMPSYQGYILESAPVGATISDSLNLTSPLRIVALDKDIEDTKDPELHLFLNDYTSVFTVTQTGITRYLTLLQPVDREEQQTYTFSITAFDGVQESEPVIVNIQVMDANDNTPTFPEISYDVYVYTDMRPGDSVIQLTAVDADEGSNGEITYEILVGAQGDFIINKTTGLITIAPGVEMIVGRTYALTVQAADNAPPAERRNSICTVYIEVLPPNNQSPPRFPQLMYSLEISEAMRVGAVLLNLQATDREGDSITYAIENGDPQRVFNLSETTGILTLGKALDRESTDRYILIITASDGRPDGTSTATVNIVVTDVNDNAPVFDPYLPRNLSVVEEEANAFVGQVKATDPDAGINGQVHYSLGNFNNLFRITSNGSIYTAVKLNREVRDYYELVVVATDGAVHPRHSTLTLAIKVLDIDDNSPVFTNSTYTVLVEENLPAGTTILQIEAKDVDLGANVSYRIRSPEVKHFFALHPFTGELSLLRSLDYEAFPDQEASITFLVEAFDIYGTMPPGIATVTVIVKDMNDYPPVFSKRIYKGMVAPDAVKGTPITTVYAEDADPPGLPASRVRYRVDDVQFPYPASIFEVEEDSGRVITRVNLNEEPTTIFKLVVVAFDDGEPVMSSSATVKILVLHPGEIPRFTQEEYRPPPVSELATKGTMVGVISAAAINQSIVYSIVSGNEEDTFGINNITGVIYVNGPLDYETRTSYVLRVQADSLEVVLANLRVPSKSNTAKVYIEIQDENNHPPVFQKKFYIGGVSEDARMFTSVLRVKATDKDTGNYSVMAYRLIIPPIKEGKEGFVVETYTGLIKTAMLFHNMRRSYFKFQVIATDDYGKGLSGKADVLVSVVNQLDMQVIVSNVPPTLVEKKIEDLTEILDRYVQEQIPGAKVVVESIGARRHGDAFSLEDYTKCDLTVYAIDPQTNRAIDRNELFKFLDGKLLDINKDFQPYYGEGGRILEIRTPEAVTSIKKRGESLGYTEGALLALAFIIILCCIPAILVVLVSYRQFKVRQAECTKTARIQAALPAAKPAVPAPAPVAAPPPPPPPPPGAHLYEELGDSSILFLLYHFQQSRGNNSVSEDRKHQQVVMPFSSNTIEAHKSAHVDGSLKSNKLKSARKFTFLSDEDDLSAHNPLYKENISQVSTNSDISQRTDFVDPFSPKIQAKSKSLRGPREKIQRLWSQSVSLPRRLMRKVPNRPEIIDLQQWQGTRQKAENENTGICTNKRGSSNPLLTTEEANLTEKEEIRQGETLMIEGTEQLKSLSSDSSFCFPRPHFSFSTLPTVSRTVELKSEPNVISSPAECSLELSPSRPCVLHSSLSRRETPICMLPIETERNIFENFAHPPNISPSACPLPPPPPISPPSPPPAPAPLAPPPDISPFSLFCPPPSPPSIPLPLPPPTFFPLSVSTSGPPTPPLLPPFPTPLPPPPPSIPCPPPPSASFLSTECVCITGVKCTTNLMPAEKIKSSMTQLSTTTVCKTDPQREPKGILRHVKNLAELEKSVANMYSQIEKNYLRTNVSELQTMCPSEVTNMEITSEQNKGSLNNIVEGTEKQSHSQSTSL 2 1375 1397 KRGESLGYTE GALLALAFIIILCCIPAILVVLV SYRQFKVRQA +Q9HCL0 MHQMNAKMHFRFVFALLIVSFNHDVLGKNLKYRIYEEQRVGSVIARLSEDVADVLLKLPNPSTVRFRAMQRGNSPLLVVNEDNGEISIGATIDREQLCQKNLNCSIEFDVITLPTEHLQLFHIEVEVLDINDNSPQFSRSLIPIEISESAAVGTRIPLDSAFDPDVGENSLHTYSLSANDFFNIEVRTRTDGAKYAELIVVRELDRELKSSYELQLTASDMGVPQRSGSSILKISISDSNDNSPAFEQQSYIIQLLENSPVGTLLLDLNATDPDEGANGKIVYSFSSHVSPKIMETFKIDSERGHLTLFKQVDYEITKSYEIDVQAQDLGPNSIPAHCKIIIKVVDVNDNKPEININLMSPGKEEISYIFEGDPIDTFVALVRVQDKDSGLNGEIVCKLHGHGHFKLQKTYENNYLILTNATLDREKRSEYSLTVIAEDRGTPSLSTVKHFTVQINDINDNPPHFQRSRYEFVISENNSPGAYITTVTATDPDLGENGQVTYTILESFILGSSITTYVTIDPSNGAIYALRIFDHEEVSQITFVVEARDGGSPKQLVSNTTVVLTIIDENDNVPVVIGPALRNNTAEITIPKGAESGFHVTRIRAIDRDSGVNAELSCAIVAGNEENIFIIDPRSCDIHTNVSMDSVPYTEWELSVIIQDKGNPQLHTKVLLKCMIFEYAESVTSTAMTSVSQASLDVSMIIIISLGAICAVLLVIMVLFATRCNREKKDTRSYNCRVAESTYQHHPKRPSRQIHKGDITLVPTINGTLPIRSHHRSSPSSSPTLERGQMGSRQSHNSHQSLNSLVTISSNHVPENFSLELTHATPAVEQVSQLLSMLHQGQYQPRPSFRGNKYSRSYRYALQDMDKFSLKDSGRGDSEAGDSDYDLGRDSPIDRLLGEGFSDLFLTDGRIPAAMRLCTEECRVLGHSDQCWMPPLPSPSSDYRSNMFIPGEEFPTQPQQQHPHQSLEDDAQPADSGEKKKSFSTFGKDSPNDEDTGDTSTSSLLSEMSSVFQRLLPPSLDTYSECSEVDRSNSLERRKGPLPAKTVGYPQGVAAWAASTHFQNPTTNCGPPLGTHSSVQPSSKWLPAMEEIPENYEEDDFDNVLNHLNDGKHELMDASELVAEINKLLQDVRQS 2 698 720 MTSVSQASLD VSMIIIISLGAICAVLLVIMVLF ATRCNREKKD +Q8TAB3 MESLLLPVLLLLAILWTQAAALINLKYSVEEEQRAGTVIANVAKDAREAGFALDPRQASAFRVVSNSAPHLVDINPSSGLLVTKQKIDRDLLCRQSPKCIISLEVMSSSMEICVIKVEIKDLNDNAPSFPAAQIELEISEAASPGTRIPLDSAYDPDSGSFGVQTYELTPNELFGLEIKTRGDGSRFAELVVEKSLDRETQSHYSFRITALDGGDPPRLGTVGLSIKVTDSNDNNPVFSESTYAVSVPENSPPNTPVIRLNASDPDEGTNGQVVYSFYGYVNDRTRELFQIDPHSGLVTVTGALDYEEGHVYELDVQAKDLGPNSIPAHCKVTVSVLDTNDNPPVINLLSVNSELVEVSESAPPGYVIALVRVSDRDSGLNGRVQCRLLGNVPFRLQEYESFSTILVDGRLDREQHDQYNLTIQARDGGVPMLQSAKSFTVLITDENDNHPHFSKPYYQVIVQENNTPGAYLLSVSARDPDLGLNGSVSYQIVPSQVRDMPVFTYVSINPNSGDIYALRSFNHEQTKAFEFKVLAKDGGLPSLQSNATVRVIILDVNDNTPVITAPPLINGTAEVYIPRNSGIGYLVTVVKAEDYDEGENGRVTYDMTEGDRGFFEIDQVNGEVRTTRTFGESSKSSYELIVVAHDHGKTSLSASALVLIYLSPALDAQESMGSVNLSLIFIIALGSIAGILFVTMIFVAIKCKRDNKEIRTYNCSNCLTITCLLGCFIKGQNSKCLHCISVSPISEEQDKKTEEKVSLRGKRIAEYSYGHQKKSSKKKKISKNDIRLVPRDVEETDKMNVVSCSSLTSSLNYFDYHQQTLPLGCRRSESTFLNVENQNTRNTSANHIYHHSFNSQGPQQPDLIINGVPLPETENYSFDSNYVNSRAHLIKSSSTFKDLEGNSLKDSGHEESDQTDSEHDVQRSLYCDTAVNDVLNTSVTSMGSQMPDHDQNEGFHCREECRILGHSDRCWMPRNPMPIRSKSPEHVRNIIALSIEATAADVEAYDDCGPTKRTFATFGKDVSDHPAEERPTLKGKRTVDVTICSPKVNSVIREAGNGCEAISPVTSPLHLKSSLPTKPSVSYTIALAPPARDLEQYVNNVNNGPTRPSEAEPRGADSEKVMHEVSPILKEGRNKESPGVKRLKDIVL 2 679 701 QESMGSVNLS LIFIIALGSIAGILFVTMIFVAI KCKRDNKEIR +Q8N6Y1 MRGRGNARSSQALGVSWCPATWHPRLDMGRLHRPRSSTSYRNLPHLFLFFLFVGPFSCLGSYSRATELLYSLNEGLPAGVLIGSLAEDLRLLPRSAGRPDPQSQLPERTGAEWNPPLSFSLASRGLSGQYVTLDNRSGELHTSAQEIDREALCVEGGGGTAWSGSVSISSSPSDSCLLLLDVLVLPQEYFRFVKVKIAIRDINDNAPQFPVSQISVWVPENAPVNTRLAIEHPAVDPDVGINGVQTYRLLDYHGMFTLDVEENENGERTPYLIVMGALDRETQDQYVSIIIAEDGGSPPLLGSATLTIGISDINDNCPLFTDSQINVTVYGNATVGTPIAAVQAVDKDLGTNAQITYSYSQKVPQASKDLFHLDENTGVIKLFSKIGGSVLESHKLTILANGPGCIPAVITALVSIIKVIFRPPEIVPRYIANEIDGVVYLKELEPVNTPIAFFTIRDPEGKYKVNCYLDGEGPFRLSPYKPYNNEYLLETTKPMDYELQQFYEVAVVAWNSEGFHVKRVIKVQLLDDNDNAPIFLQPLIELTIEENNSPNAFLTKLYATDADSEERGQVSYFLGPDAPSYFSLDSVTGILTVSTQLDREEKEKYRYTVRAVDCGKPPRESVATVALTVLDKNDNSPRFINKDFSFFVPENFPGYGEIGVISVTDADAGRNGWVALSVVNQSDIFVIDTGKGMLRAKVSLDREQQSSYTLWVEAVDGGEPALSSTAKITILLLDINDNPPLVLFPQSNMSYLLVLPSTLPGSPVTEVYAVDKDTGMNAVIAYSIIGRRGPRPESFRIDPKTGNITLEEALLQTDYGLHRLLVKVSDHGYPEPLHSTVMVNLFVNDTVSNESYIESLLRKEPEINIEEKEPQISIEPTHRKVESVSCMPTLVALSVISLGSITLVTGMGIYICLRKGEKHPREDENLEVQIPLKGKIDLHMRERKPMDISNI 2 889 911 RKVESVSCMP TLVALSVISLGSITLVTGMGIYI CLRKGEKHPR +Q9Y5F3 MAGTRRKSLQNRQVGSLLIFLCISVGDATTIRYSVAEEMESGSFVANVAKDLGLEVGKLAARGARLVSEGNKMHFRLHRKTGDLFVKEKLDRESLCGKADPCVLHFEVVLVEPLQSFRAEVRVFDINDNAPVFLNKEPLLKIPESTPLGSRFPLQSAQDLDVGLNGLQNYTLSANGYFHLHTRFCSHGPKYAELVLNKPLDREEQPEVNLTITAVDGGSPPKSGTAHIHVVVLDVNDHVPQFSRLVYRAQVSENSPNGSLVATVTAVDLDEGTNKAITYSLAQNPEAILKTFQIDPQNGEVRLRGPLDFEAIETYDIDIQATDGGGLSAHSKVLVEVVDVNDNPPEVMVSSVSSPLPEDSPPQTVVALFTIRDRDIRVGGKVTCFLREDLPFVIKPTFGNSYSLVTDRSLDREEVSGYNITIVAMDTGPPSLSAETMIEVLISDVNDNPPIFREDSYILTVRENNSPAVFIGKVHAEDLDLGENAQITYSLLPPKNGDLSVFAYISINSGNGKLYALRTMDYEAIQDFQFVVKATDGGFLSLSSQVTVRVVVLDDNDNRPMILYPLQNGTLPCNDLVPRSAEAGYLVTKVVAVDGDSGQNSWLSYHLLKATDLGLFSVQRQNGEIHTLRQISERDPMMQKLIILVQDHGQPALSTTVSLNILLVDGFSEPYLQFQDPTKHSRKVNPSTKYLVISLVILSFLFLLSVIVIFIIHVYQKIKYREKFTIQEHFYDDCNFSNNLVQGQGNGSLSRPCPYEMCSATGTGNSEFRFLKRFMPNFPFPHATGEIKMEAGSSLPPNSDRNKSQRLEGHDQVSDDYM 2 690 712 HSRKVNPSTK YLVISLVILSFLFLLSVIVIFII HVYQKIKYRE +Q9H158 MVGCGVAVLCLWVSCGAAAGQLEYSVPEETERGVAVGNLSADLRLPAAAMSSRNFRFLSSHRELYFGVDLPSGNLVVREPADREQLCRAKAACVLTYDLVLEDPLELHKIRIHVLDTNDNSPLFPAGDVQLHIPEFLTPGARFTLPNAQDDDEGSNGILSYSLSPSQHFRLDMGSRVDGSEYPELVLEKALDREQRATHLLVLTARDGGLPARSGDAQVTIIVVDTNDNAPVFERSVYRTKVPETAPNGTVLFRVQALDPDEGSNGEVQYSLSNSTQAELRHRFHVHPKSGEVQVAASLGPPETLLEAYIEARDEGVFGLASTAKLLVEVTDVNDHAPELDFLTLSNPVPEDAAPGTVIALFSVKDEDLDSNGRVICGMSSAGPFQLTASFDNYYSLLIDGPLDREQISEYQVLITASDSGSPPLSTRRTITVSVADVNDNTPNFPQPQQELFVAENNGPGASLGRVFAQDPDLGKNGLVSYELLDVISEGPSASSLLAVESSSGAITAKTSFDFEQLRGFHFQVEGRDGGIPPRSATVTINLFVVDRNDNYPVILFPLPRNGSVPVEIVPRSARTGHLVTKVVAEDADSGSNAWLSYHISRASDSSLFRISANIGELRTARLVLPTDAVKQRVVVVVRDHGDPPLSSSVTLGVLLSNSVPQLLPDFEDVWEPGGQLSAQNLYLVIALACISFLFLGCLLFFVCTKLHQSPGCCAQSCCRSTEDLRYGSKMVSNPCMTSATIDVTTVERLSQTYLYRASLGLGSDNNSLLLRGEYNAADLRNLATGVGLNLPISCIQIRNRKGDHANVNAMPRQPNPDWRYSASLRAGMHSSVHLEEAGILRAGPGGPDQQWPTVSSATPEPEAGEVSPPVGAGVNSNSWTFKYGPGNPKQSGPGELPDKFIIPGSPAIISIRQEPTNSQIDKSDFITFGKKEETKKKKKKKKGNKTQEKKEKGNSTTDNSDQ 2 682 704 EPGGQLSAQN LYLVIALACISFLFLGCLLFFVC TKLHQSPGCC +Q9Y5F7 MLRKVRSWTEIWRWATLLFLFYHLGYVCGQIRYPVPEESQEGTFVGNVAQDFLLDTDSLSARRLQVAGEVNQRHFRVDLDSGALLIKNPIDREALCGLSASCIVPLEFVTEGPLEMYRAEVEIVDVNDHAPRFPRQQLDLEIGEAAPPGQRFPLEKAQDADVGSNSISSYRLSSNEHFALDVKKRSDGSLVPELLLEKPLDREKQSDYRLVLTAVDGGNPPRSGTAELRVSVLDVNDNAPAFQQSSYRISVLESAPAGMVLIQLNASDPDLGPSGNVTFYFSGHTPDRVRNLFSLHPTTGKLTLLGPLDFESENYYEFDVRARDGGSPAMEQHCSLRVDLLDVNDNAPYITVTSELGTLPESAEPGTVVALISVQDPDSGSNGDVSLRIPDHLPFALKSAFRNQFSLVTAGPLDREAKSSYDIMVTASDAGNPPLSTHRTIFLNISDVNDNPPSFFQRSHEVFVPENNRPGDLLCSLAASDPDSGLNALISYSLLEPRNRDVSASSFISLNPQTGAVHATRSFDYEQTQTLQFEVQARDRGNPPLSSTVTVRLFVLDLNDNAPAVLRPRARPGSLCPQALPPSVGAGHLITKVTAVDLDSGYNAWVSYQLLEAPDPSLFAVSRYAGEVRTAVPIPADLPPQKLVIVVKDSGSPPLSTSVTLLVSLEEDTHPVVPDLRESSAPREGESRLTLYLAVSLVAICFVSFGSFVALLSKCLRGAACGVTCFPAGTCACLTRSRRREGLPPSNGILRIQLGSDDPIKFVDVGGHSHGCTPLASAPTRSDSFMMVKSPSAPMAGEPVRPSCPPSDLLYGLEQAPPNTDWRFSQAQRPGTSGSQNGDDTGTWPNNQFDTEMLQAMILASASEAADGSSTLGGGAGTMGLSARYGPQFTLQHVPDYRQNVYIPGSNATLTNAAGKRDGKAPAGGNGNKKKSGKKEKK 2 690 712 SAPREGESRL TLYLAVSLVAICFVSFGSFVALL SKCLRGAACG +O60245 MLRMRTAGWARGWCLGCCLLLPLSLSLAAAKQLLRYRLAEEGPADVRIGNVASDLGIVTGSGEVTFSLESGSEYLKIDNLTGELSTSERRIDREKLPQCQMIFDENECFLDFEVSVIGPSQSWVDLFEGQVIVLDINDNTPTFPSPVLTLTVEENRPVGTLYLLPTATDRDFGRNGIERYELLQEPGGGGSGGESRRAGAADSAPYPGGGGNGASGGGSGGSKRRLDASEGGGGTNPGGRSSVFELQVADTPDGEKQPQLIVKGALDREQRDSYELTLRVRDGGDPPRSSQAILRVLITDVNDNSPRFEKSVYEADLAENSAPGTPILQLRAADLDVGVNGQIEYVFGAATESVRRLLRLDETSGWLSVLHRIDREEVNQLRFTVMARDRGQPPKTDKATVVLNIKDENDNVPSIEIRKIGRIPLKDGVANVAEDVLVDTPIALVQVSDRDQGENGVVTCTVVGDVPFQLKPASDTEGDQNKKKYFLHTSTPLDYEATREFNVVIVAVDSGSPSLSSNNSLIVKVGDTNDNPPMFGQSVVEVYFPENNIPGERVATVLATDADSGKNAEIAYSLDSSVMGIFAIDPDSGDILVNTVLDREQTDRYEFKVNAKDKGIPVLQGSTTVIVQVADKNDNDPKFMQDVFTFYVKENLQPNSPVGMVTVMDADKGRNAEMSLYIEENNNIFSIENDTGTIYSTMSFDREHQTTYTFRVKAVDGGDPPRSATATVSLFVMDENDNAPTVTLPKNISYTLLPPSSNVRTVVATVLATDSDDGINADLNYSIVGGNPFKLFEIDPTSGVVSLVGKLTQKHYGLHRLVVQVNDSGQPSQSTTTLVHVFVNESVSNATAIDSQIARSLHIPLTQDIAGDPSYEISKQRLSIVIGVVAGIMTVILIILIVVMARYCRSKNKNGYEAGKKDHEDFFTPQQHDKSKKPKKDKKNKKSKQPLYSSIVTVEASKPNGQRYDSVNEKLSDSPSMGRYRSVNGGPGSPDLARHYKSSSPLPTVQLHPQSPTAGKKHQAVQDLPPANTFVGAGDNISIGSDHCSEYSCQTNNKYSKQMRLHPYITVFG 2 878 900 DPSYEISKQR LSIVIGVVAGIMTVILIILIVVM ARYCRSKNKN +Q7TSK3 MSPAKRWGSPCLFPLQLFSLCWVLSVAQSKTVRYSTFEEDAPGTVIGTLAEDLHMKVSGDTSFRLMKQFNSSLLRVREGDGQLTVGDAGLDRERLCGPSPQCVLAFDVVSFSQEQFRLVHVEVEVRDVNDHAPRFPRAQIPVEVSESAPVGTRIPLEVPVDEDVGANGLQSVRLAEPHSPFRVELQTRADGAQCADLVLLQELDRESQASYSLELVAQDGGRPPRSATAALSVRVLDANDHSPAFPQGAVAEVELAEDAPVGSLLLDLDAADPDEGPNGDVVFTFGARTPPEARHLFRLDPRSGRLTLAGQVDYERQDTYELDVRAQDRGPGPRTATCKVIVRIRDVNDNAPEISITPLAAPGAPATSPFAAAAAAAALGGADAASSTGSGTQEAGITSLVPEGAARESLVALVSTSDRDSGANGQVRCALYGHEHFRLQPAYAGSYLVVTAASLDRERIAEYNLTLVAEDRGTPPLRTVRPYTVRVGDENDNAPIFTKPVYEVSVRENNPPGAYLATVAARDPDVGRNGQVTYRLVEAEVGRSGEAVSTYVSVDPATGAIYALRSFDYETLRQLDVRVQASDGGSPQLSSNALVQVRVLDQNDHSPILVHPAPANGSLEVAVPGRSTKDTAVARIQARDADEGANGELAFDLLQQEPREAFSIGRHTGEIMLTGDLSQEPPGRVFKALLVISDGGRPPLTTTATVSFVVTAGGGSAVPASSGSPEHSRPPGSRLAPSGPSLQWDTPLIVIIVLAGSCTLLLAAIIAIATTCNRRKKEVRKGGALREERPGAAGGGASAPGSPDETARGTGPRPNMFDVLTFPGSGKAPFGSPAADAPPPAVAAAEVPGSEGGSATGESACHFEGQQRLRGAHAEPYGASPGFGKEPAAPPVAVWKGHSFNTISGREAEKFSGKDSGKGDSDFNDSDSDISGDALKKDLINHMQSGLWACTAECKILGHSDRCWSPSCAGPNVHPPPHPPAQMSTFCKSTSLPRDPLRRDNYYQAQLPKTVGLQSVYEKVLHRDYDRTVTLLSPPRPGRLPDLQEIGVPLYESPPGSRYVSPKKGINENV 2 748 770 SGPSLQWDTP LIVIIVLAGSCTLLLAAIIAIAT TCNRRKKEVR +Q9HC56 MDLRDFYLLAALIACLRLDSAIAQELIYTIREELPENVPIGNIPKDLNISHINAATGTSASLVYRLVSKAGDAPLVKVSSSTGEIFTTSNRIDREKLCAGASYAEENECFFELEVVILPNDFFRLIKIKIIVKDTNDNAPMFPSPVINISIPENTLINSRFPIPSATDPDTGFNGVQHYELLNGQSVFGLDIVETPEGEKWPQLIVQQNLDREQKDTYVMKIKVEDGGTPQKSSTAILQVTVSDVNDNRPVFKEGQVEVHIPENAPVGTSVIQLHATDADIGSNAEIRYIFGAQVAPATKRLFALNNTTGLITVQRSLDREETAIHKVTVLASDGSSTPARATVTINVTDVNDNPPNIDLRYIISPINGTVYLSEKDPVNTKIALITVSDKDTDVNGKVICFIEREVPFHLKAVYDNQYLLETSSLLDYEGTKEFSFKIVASDSGKPSLNQTALVRVKLEDENDNPPIFNQPVIELSVSENNRRGLYLTTISATDEDSGKNADIVYQLGPNASFFDLDRKTGVLTASRVFDREEQERFIFTVTARDNGTPPLQSQAAVIVTVLDENDNSPKFTHNHFQFFVSENLPKYSTVGVITVTDADAGENKAVTLSILNDNDNFVLDPYSGVIKSNVSFDREQQSSYTFDVKATDGGQPPRSSTAKVTINVMDVNDNSPVVISPPSNTSFKLVPLSAIPGSVVAEVFAVDVDTGMNAELKYTIVSGNNKGLFRIDPVTGNITLEEKPAPTDVGLHRLVVNISDLGYPKSLHTLVLVFLYVNDTAGNASYIYDLIRRTMETPLDRNIGDSSQPYQNEDYLTIMIAIIAGAMVVIVVIFVTVLVRCRHASRFKAAQRSKQGAEWMSPNQENKQNKKKKRKKRKSPKSSLLNFVTIEESKPDDAVHEPINGTISLPAELEEQSIGRFDWGPAPPTTFKPNSPDLAKHYKSASPQPAFHLKPDTPVSVKKHHVIQELPLDNTFVGGCDTLSKRSSTSSDHFSASECSSQGGFKTKGPLHTRQCNSHSKSDNIPVTPQKCPSSTGFHIQENEESHYESQRRVTFHLPDGSQESCSDSGLGDHEPVGSGTLISHPLPLVQPQDEFYDQASPDKRTEADGNSDPNSDGPLGPRGLAEATEMCTQECLVLGHSDNCWMPPGLGPYQHPKSPLSTFAPQKEWVKKDKLVNGHTLTRAWKEDSNRNQFNDRKQYGSNEGHFNNGSHMTDIPLANLKSYKQAGGATESPKEHQL 2 814 836 SQPYQNEDYL TIMIAIIAGAMVVIVVIFVTVLV RCRHASRFKA +Q92824 MGWGSRCCCPGRLDLLCVLALLGGCLLPVCRTRVYTNHWAVKIAGGFPEANRIASKYGFINIGQIGALKDYYHFYHSRTIKRSVISSRGTHSFISMEPKVEWIQQQVVKKRTKRDYDFSRAQSTYFNDPKWPSMWYMHCSDNTHPCQSDMNIEGAWKRGYTGKNIVVTILDDGIERTHPDLMQNYDALASCDVNGNDLDPMPRYDASNENKHGTRCAGEVAAAANNSHCTVGIAFNAKIGGVRMLDGDVTDMVEAKSVSFNPQHVHIYSASWGPDDDGKTVDGPAPLTRQAFENGVRMGRRGLGSVFVWASGNGGRSKDHCSCDGYTNSIYTISISSTAESGKKPWYLEECSSTLATTYSSGESYDKKIITTDLRQRCTDNHTGTSASAPMAAGIIALALEANPFLTWRDVQHVIVRTSRAGHLNANDWKTNAAGFKVSHLYGFGLMDAEAMVMEAEKWTTVPRQHVCVESTDRQIKTIRPNSAVRSIYKASGCSDNPNRHVNYLEHVVVRITITHPRRGDLAIYLTSPSGTRSQLLANRLFDHSMEGFKNWEFMTIHCWGERAAGDWVLEVYDTPSQLRNFKTPGKLKEWSLVLYGTSVQPYSPTNEFPKVERFRYSRVEDPTDDYGTEDYAGPCDPECSEVGCDGPGPDHCNDCLHYYYKLKNNTRICVSSCPPGHYHADKKRCRKCAPNCESCFGSHGDQCMSCKYGYFLNEETNSCVTHCPDGSYQDTKKNLCRKCSENCKTCTEFHNCTECRDGLSLQGSRCSVSCEDGRYFNGQDCQPCHRFCATCAGAGADGCINCTEGYFMEDGRCVQSCSISYYFDHSSENGYKSCKKCDISCLTCNGPGFKNCTSCPSGYLLDLGMCQMGAICKDGEYVDEHGHCQTCEASCAKCQGPTQEDCTTCPMTRIFDDGRCVSNCPSWKFEFENQCHPCHHTCQRCQGSGPTHCTSCGADNYGREHFLYQGECGDSCPEGHYATEGNTCLPCPDNCELCHSVHVCTRCMKGYFIAPTNHTCQKLECGQGEVQDPDYEECVPCEEGCLGCSLDDPGTCTSCAMGYYRFDHHCYKTCPEKTYSEEVECKACDSNCGSCDQNGCYWCEEGFFLLGGSCVRKCGPGFYGDQEMGECESCHRACETCTGPGHDECSSCQEGLQLLRGMCVHATKTQEEGKFWNDILRKLQPCHSSCKTCNGSATLCTSCPKGAYLLAQACVSSCPQGTWPSVRSGSCENCTEACAICSGADLCKKCQMQPGHPLFLHEGRCYSKCPEGSYAEDGICERCSSPCRTCEGNATNCHSCEGGHVLHHGVCQENCPERHVAVKGVCKHCPEMCQDCIHEKTCKECTPEFFLHDDMCHQSCPRGFYADSRHCVPCHKDCLECSGPKADDCELCLESSWVLYDGLCLEECPAGTYYEKETKECRDCHKSCLTCSSSGTCTTCQKGLIMNPRGSCMANEKCSPSEYWDEDAPGCKPCHVKCFHCMGPAEDQCQTCPMNSLLLNTTCVKDCPEGYYADEDSNRCAHCHSSCRTCEGRHSRQCHSCRPGWFQLGKECLLQCREGYYADNSTGRCERCNRSCKGCQGPRPTDCLSCDRFFFLLRSKGECHRSCPDHYYVEQSTQTCERCHPTCDQCKGKGALNCLSCVWSYHLMGGICTSDCLVGEYRVGEGEKFNCEKCHESCMECKGPGAKNCTLCPANLVLHMDDSHCLHCCNTSDPPSAQECCDCQDTTDECILRTSKVRPATEHFKTALFITSSMMLVLLLGAAVVVWKKSRGRVQPAAKAGYEKLADPNKSYSSYKSSYRESTSFEEDQVIEYRDRDYDEDDDDDIVYMGQDGTVYRKFKYGLLDDDDIDELEYDDESYSYYQ 2 1745 1764 RPATEHFKTA LFITSSMMLVLLLGAAVVVW KKSRGRVQPA +Q16549 MPKGRQKVPHLDAPLGLPTCLWLELAGLFLLVPWVMGLAGTGGPDGQGTGGPSWAVHLESLEGDGEEETLEQQADALAQAAGLVNAGRIGELQGHYLFVQPAGHRPALEVEAIRQQVEAVLAGHEAVRWHSEQRLLRRAKRSVHFNDPKYPQQWHLNNRRSPGRDINVTGVWERNVTGRGVTVVVVDDGVEHTIQDIAPNYSPEGSYDLNSNDPDPMPHPDVENGNHHGTRCAGEIAAVPNNSFCAVGVAYGSRIAGIRVLDGPLTDSMEAVAFNKHYQINDIYSCSWGPDDDGKTVDGPHQLGKAALQHGVIAGRQGFGSIFVVASGNGGQHNDNCNYDGYANSIYTVTIGAVDEEGRMPFYAEECASMLAVTFSGGDKMLRSIVTTDWDLQKGTGCTEGHTGTSAAAPLAAGMIALMLQVRPCLTWRDVQHIIVFTATRYEDRRAEWVTNEAGFSHSHQHGFGLLNAWRLVNAAKIWTSVPYLASYVSPVLKENKAIPQSPRSLEVLWNVSRMDLEMSGLKTLEHVAVTVSITHPRRGSLELKLFCPSGMMSLIGAPRSMDSDPNGFNDWTFSTVRCWGERARGTYRLVIRDVGDESFQVGILRQWQLTLYGSVWSAVDIRDRQRLLESAMSGKYLHDDFALPCPPGLKIPEEDGYTITPNTLKTLVLVGCFTVFWTVYYMLEVYLSQRNVASNQVCRSGPCHWPHRSRKAKEEGTELESVPLCSSKDPDEVETESRGPPTTSDLLAPDLLEQGDWSLSQNKSALDCPHQHLDVPHGKEEQIC 2 13 35 KGRQKVPHLD APLGLPTCLWLELAGLFLLVPWV MGLAGTGGPD +Q9EP73 MRIFAGIIFTACCHLLRAFTITAPKDLYVVEYGSNVTMECRFPVERELDLLALVVYWEKEDEQVIQFVAGEEDLKPQHSNFRGRASLPKDQLLKGNAALQITDVKLQDAGVYCCIISYGGADYKRITLKVNAPYRKINQRISVDPATSEHELICQAEGYPEAEVIWTNSDHQPVSGKRSVTTSRTEGMLLNVTSSLRVNATANDVFYCTFWRSQPGQNHTAELIIPELPATHPPQNRTHWVLLGSILLFLIVVSTVLLFLRKQVRMLDVEKCGVEDTSSKNRNDTQFEET 2 238 260 LPATHPPQNR THWVLLGSILLFLIVVSTVLLFL RKQVRMLDVE +Q9BQ51 MIFLLLMLSLELQLHQIAALFTVTVPKELYIIEHGSNVTLECNFDTGSHVNLGAITASLQKVENDTSPHRERATLLEEQLPLGKASFHIPQVQVRDEGQYQCIIIYGVAWDYKYLTLKVKASYRKINTHILKVPETDEVELTCQATGYPLAEVSWPNVSVPANTSHSRTPEGLYQVTSVLRLKPPPGRNFSCVFWNTHVRELTLASIDLQSQMEPRTHPTWLLHIFIPFCIIAFIFIATVIALRKQLCQKLYSSKDTTKRPVTTTKREVNSAI 2 221 243 SQMEPRTHPT WLLHIFIPFCIIAFIFIATVIAL RKQLCQKLYS +Q15116 MQIPQAPWPVVWAVLQLGWRPGWFLDSPDRPWNPPTFSPALLVVTEGDNATFTCSFSNTSESFVLNWYRMSPSNQTDKLAAFPEDRSQPGQDCRFRVTQLPNGRDFHMSVVRARRNDSGTYLCGAISLAPKAQIKESLRAELRVTERRAEVPTAHPSPSPRPAGQFQTLVVGVVGGLLGSLVLLVWVLAVICSRAARGTIGARRTGQPLKEDPSAVPVFSVDYGELDFQWREKTPEPPVPCVPEQTEYATIVFPSGMGTSSPARRGSADGPRSAQPLRPEDGHCSWPL 2 168 190 PSPRPAGQFQ TLVVGVVGGLLGSLVLLVWVLAV ICSRAARGTI +Q9NZ53 MGRLLRAARLPPLLSPLLLLLVGGAFLGACVAGSDEPGPEGLTSTSLLDLLLPTGLEPLDSEEPSETMGLGAGLGAPGSGFPSEENEESRILQPPQYFWEEEEELNDSSLDLGPTADYVFPDLTEKAGSIEDTSQAQELPNLPSPLPKMNLVEPPWHMPPREEEEEEEEEEEREKEEVEKQEEEEEEELLPVNGSQEEAKPQVRDFSLTSSSQTPGATKSRHEDSGDQASSGVEVESSMGPSLLLPSVTPTTVTPGDQDSTSQEAEATVLPAAGLGVEFEAPQEASEEATAGAAGLSGQHEEVPALPSFPQTTAPSGAEHPDEDPLGSRTSASSPLAPGDMELTPSSATLGQEDLNQQLLEGQAAEAQSRIPWDSTQVICKDWSNLAGKNYIILNMTENIDCEVFRQHRGPQLLALVEEVLPRHGSGHHGAWHISLSKPSEKEQHLLMTLVGEQGVVPTQDVLSMLGDIRRSLEEIGIQNYSTTSSCQARASQVRSDYGTLFVVLVVIGAICIIIIALGLLYNCWQRRLPKLKHVSHGEELRFVENGCHDNPTLDVASDSQSEMQEKHPSLNGGGALNGPGSWGALMGGKRDPEDSDVFEEDTHL 2 500 522 RASQVRSDYG TLFVVLVVIGAICIIIIALGLLY NCWQRRLPKL +P16284 MQPRWAQGATMWLGVLLTLLLCSSLEGQENSFTINSVDMKSLPDWTVQNGKNLTLQCFADVSTTSHVKPQHQMLFYKDDVLFYNISSMKSTESYFIPEVRIYDSGTYKCTVIVNNKEKTTAEYQVLVEGVPSPRVTLDKKEAIQGGIVRVNCSVPEEKAPIHFTIEKLELNEKMVKLKREKNSRDQNFVILEFPVEEQDRVLSFRCQARIISGIHMQTSESTKSELVTVTESFSTPKFHISPTGMIMEGAQLHIKCTIQVTHLAQEFPEIIIQKDKAIVAHNRHGNKAVYSVMAMVEHSGNYTCKVESSRISKVSSIVVNITELFSKPELESSFTHLDQGERLNLSCSIPGAPPANFTIQKEDTIVSQTQDFTKIASKSDSGTYICTAGIDKVVKKSNTVQIVVCEMLSQPRISYDAQFEVIKGQTIEVRCESISGTLPISYQLLKTSKVLENSTKNSNDPAVFKDNPTEDVEYQCVADNCHSHAKMLSEVLRVKVIAPVDEVQISILSSKVVESGEDIVLQCAVNEGSGPITYKFYREKEGKPFYQMTSNATQAFWTKQKASKEQEGEYYCTAFNRANHASSVPRSKILTVRVILAPWKKGLIAVVIIGVIIALLIIAAKCYFLRKAKAKQMPVEMSRPAVPLLNSNNEKMSDPNMEANSHYGHNDDVRNHAMKPINDNKEPLNSDVQYTEVQVSSAESHKDLGKKDTETVYSEVRKAVPDAVESRYSRTEGSLDGT 2 603 625 RVILAPWKKG LIAVVIIGVIIALLIIAAKCYFL RKAKAKQMPV +P07202 MRALAVLSVTLVMACTEAFFPFISRGKELLWGKPEESRVSSVLEESKRLVDTAMYATMQRNLKKRGILSPAQLLSFSKLPEPTSGVIARAAEIMETSIQAMKRKVNLKTQQSQHPTDALSEDLLSIIANMSGCLPYMLPPKCPNTCLANKYRPITGACNNRDHPRWGASNTALARWLPPVYEDGFSQPRGWNPGFLYNGFPLPPVREVTRHVIQVSNEVVTDDDRYSDLLMAWGQYIDHDIAFTPQSTSKAAFGGGADCQMTCENQNPCFPIQLPEEARPAAGTACLPFYRSSAACGTGDQGALFGNLSTANPRQQMNGLTSFLDASTVYGSSPALERQLRNWTSAEGLLRVHARLRDSGRAYLPFVPPRAPAACAPEPGIPGETRGPCFLAGDGRASEVPSLTALHTLWLREHNRLAAALKALNAHWSADAVYQEARKVVGALHQIITLRDYIPRILGPEAFQQYVGPYEGYDSTANPTVSNVFSTAAFRFGHATIHPLVRRLDASFQEHPDLPGLWLHQAFFSPWTLLRGGGLDPLIRGLLARPAKLQVQDQLMNEELTERLFVLSNSSTLDLASINLQRGRDHGLPGYNEWREFCGLPRLETPADLSTAIASRSVADKILDLYKHPDNIDVWLGGLAENFLPRARTGPLFACLIGKQMKALRDGDWFWWENSHVFTDAQRRELEKHSLSRVICDNTGLTRVPMDAFQVGKFPEDFESCDSITGMNLEAWRETFPQDDKCGFPESVENGDFVHCEESGRRVLVYSCRHGYELQGREQLTCTQEGWDFQPPLCKDVNECADGAHPPCHASARCRNTKGGFQCLCADPYELGDDGRTCVDSGRLPRVTWISMSLAALLIGGFAGLTSTVICRWTRTGTKSTLPISETGGGTPELRCGKHQAVGTSPQRAAAQDSEQESAGMEGRDTHRLPRAL 2 849 871 VDSGRLPRVT WISMSLAALLIGGFAGLTSTVIC RWTRTGTKST +Q8IYJ0 MESRMWPALLLSHLLPLWPLLLLPLPPPAQGSSSSPRTPPAPARPPCARGGPSAPRHVCVWERAPPPSRSPRVPRSRRQVLPGTAPPATPSGFEEGPPSSQYPWAIVWGPTVSREDGGDPNSANPGFLDYGFAAPHGLATPHPNSDSMRGDGDGLILGEAPATLRPFLFGGRGEGVDPQLYVTITISIIIVLVATGIIFKFCWDRSQKRRRPSGQQGALRQEESQQPLTDLSPAGVTVLGAFGDSPTPTPDHEEPRGGPRPGMPHPKGAPAFQLNRIPLVNL 2 181 203 GRGEGVDPQL YVTITISIIIVLVATGIIFKFCW DRSQKRRRPS +P01833 MLLFVLTCLLAVFPAISTKSPIFGPEEVNSVEGNSVSITCYYPPTSVNRHTRKYWCRQGARGGCITLISSEGYVSSKYAGRANLTNFPENGTFVVNIAQLSQDDSGRYKCGLGINSRGLSFDVSLEVSQGPGLLNDTKVYTVDLGRTVTINCPFKTENAQKRKSLYKQIGLYPVLVIDSSGYVNPNYTGRIRLDIQGTGQLLFSVVINQLRLSDAGQYLCQAGDDSNSNKKNADLQVLKPEPELVYEDLRGSVTFHCALGPEVANVAKFLCRQSSGENCDVVVNTLGKRAPAFEGRILLNPQDKDGSFSVVITGLRKEDAGRYLCGAHSDGQLQEGSPIQAWQLFVNEESTIPRSPTVVKGVAGGSVAVLCPYNRKESKSIKYWCLWEGAQNGRCPLLVDSEGWVKAQYEGRLSLLEEPGNGTFTVILNQLTSRDAGFYWCLTNGDTLWRTTVEIKIIEGEPNLKVPGNVTAVLGETLKVPCHFPCKFSSYEKYWCKWNNTGCQALPSQDEGPSKAFVNCDENSRLVSLTLNLVTRADEGWYWCGVKQGHFYGETAAVYVAVEERKAAGSRDVSLAKADAAPDEKVLDSGFREIENKAIQDPRLFAEEKAVADTRDQADGSRASVDSGSSEEQGGSSRALVSTLVPLGLVLAVGAVAVGVARARHRKNVDRVSIRSYRTDISMSDFENSREFGANDNMGASSITQETSLGGKEEFVATTESTTETKEPKKAKRSSKEEAEMAYKDFLLQSSTVAAEAQDGPQEA 2 639 661 SSEEQGGSSR ALVSTLVPLGLVLAVGAVAVGVA RARHRKNVDR +Q969N2 MAAAMPLALLVLLLLGPGGWCLAEPPRDSLREELVITPLPSGDVAATFQFRTRWDSELQREGVSHYRLFPKALGQLISKYSLRELHLSFTQGFWRTRYWGPPFLQAPSGAELWVWFQDTVTDVDKSWKELSNVLSGIFCASLNFIDSTNTVTPTASFKPLGLANDTDHYFLRYAVLPREVVCTENLTPWKKLLPCSSKAGLSVLLKADRLFHTSYHSQAVHIRPVCRNARCTSISWELRQTLSVVFDAFITGQGKKDWSLFRMFSRTLTEPCPLASESRVYVDITTYNQDNETLEVHPPPTTTYQDVILGTRKTYAIYDLLDTAMINNSRNLNIQLKWKRPPENEAPPVPFLHAQRYVSGYGLQKGELSTLLYNTHPYRAFPVLLLDTVPWYLRLYVHTLTITSKGKENKPSYIHYQPAQDRLQPHLLEMLIQLPANSVTKVSIQFERALLKWTEYTPDPNHGFYVSPSVLSALVPSMVAAKPVDWEESPLFNSLFPVSDGSNYFVRLYTEPLLVNLPTPDFSMPYNVICLTCTVVAVCYGSFYNLLTRTFHIEEPRTGGLAKRLANLIRRARGVPPL 2 522 544 PLLVNLPTPD FSMPYNVICLTCTVVAVCYGSFY NLLTRTFHIE +Q9UKJ1 MGRPLLLPLLPLLLPPAFLQPSGSTGSGPSYLYGVTQPKHLSASMGGSVEIPFSFYYPWELATAPDVRISWRRGHFHRQSFYSTRPPSIHKDYVNRLFLNWTEGQKSGFLRISNLQKQDQSVYFCRVELDTRSSGRQQWQSIEGTKLSITQAVTTTTQRPSSMTTTWRLSSTTTTTGLRVTQGKRRSDSWHISLETAVGVAVAVTVLGIMILGLICLLRWRRRKGQQRTKATTPAREPFQNTEEPYENIRNEGQNTDPKLNPKDDGIVYASLALSSSTSPRAPPSHRPLKSPQNETLYSVLKA 2 196 218 RSDSWHISLE TAVGVAVAVTVLGIMILGLICLL RWRRRKGQQR +Q13018 MLLSPSLLLLLLLGAPRGCAEGVAAALTPERLLEWQDKGIFVIQSESLKKCIQAGKSVLTLENCKQANKHMLWKWVSNHGLFNIGGSGCLGLNFSAPEQPLSLYECDSTLVSLRWRCNRKMITGPLQYSVQVAHDNTVVASRKYIHKWISYGSGGGDICEYLHKDLHTIKGNTHGMPCMFPFQYNHQWHHECTREGREDDLLWCATTSRYERDEKWGFCPDPTSAEVGCDTIWEKDLNSHICYQFNLLSSLSWSEAHSSCQMQGGTLLSITDETEENFIREHMSSKTVEVWMGLNQLDEHAGWQWSDGTPLNYLNWSPEVNFEPFVEDHCGTFSSFMPSAWRSRDCESTLPYICKKYLNHIDHEIVEKDAWKYYATHCEPGWNPYNRNCYKLQKEEKTWHEALRSCQADNSALIDITSLAEVEFLVTLLGDENASETWIGLSSNKIPVSFEWSNDSSVIFTNWHTLEPHIFPNRSQLCVSAEQSEGHWKVKNCEERLFYICKKAGHVLSDAESGCQEGWERHGGFCYKIDTVLRSFDQASSGYYCPPALVTITNRFEQAFITSLISSVVKMKDSYFWIALQDQNDTGEYTWKPVGQKPEPVQYTHWNTHQPRYSGGCVAMRGRHPLGRWEVKHCRHFKAMSLCKQPVENQEKAEYEERWPFHPCYLDWESEPGLASCFKVFHSEKVLMKRTWREAEAFCEEFGAHLASFAHIEEENFVNELLHSKFNWTEERQFWIGFNKRNPLNAGSWEWSDRTPVVSSFLDNTYFGEDARNCAVYKANKTLLPLHCGSKREWICKIPRDVKPKIPFWYQYDVPWLFYQDAEYLFHTFASEWLNFEFVCSWLHSDLLTIHSAHEQEFIHSKIKALSKYGASWWIGLQEERANDEFRWRDGTPVIYQNWDTGRERTVNNQSQRCGFISSITGLWGSEECSVSMPSICKRKKVWLIEKKKDTPKQHGTCPKGWLYFNYKCLLLNIPKDPSSWKNWTHAQHFCAEEGGTLVAIESEVEQAFITMNLFGQTTSVWIGLQNDDYETWLNGKPVVYSNWSPFDIINIPSHNTTEVQKHIPLCALLSSNPNFHFTGKWYFEDCGKEGYGFVCEKMQDTSGHGVNTSDMYPMPNTLEYGNRTYKIINANMTWYAAIKTCLMHKAQLVSITDQYHQSFLTVVLNRLGYAHWIGLFTTDNGLNFDWSDGTKSSFTFWKDEESSLLGDCVFADSNGRWHSTACESFLQGAICHVPPETRQSEHPELCSETSIPWIKFKSNCYSFSTVLDSMSFEAAHEFCKKEGSNLLTIKDEAENAFLLEELFAFGSSVQMVWLNAQFDGNNETIKWFDGTPTDQSNWGIRKPDTDYFKPHHCVALRIPEGLWQLSPCQEKKGFICKMEADIHTAEALPEKGPSHSIIPLAVVLTLIVIVAICTLSFCIYKHNGGFFRRLAGFRNPYYPATNFSTVYLEENILISDLEKSDQ 2 1398 1420 ALPEKGPSHS IIPLAVVLTLIVIVAICTLSFCI YKHNGGFFRR +Q3TTY0 MELYPGVSPVGLLLLLLLGQGPSQIHGSSGENTLAWQSQQVFWTLKNFPFPCKPKKLELSVLSESVHSLRPSDIKLVAAIGNPEIPLAPGSGTINMEKPQSIKNQPQDVCMGIMTVLSDIIRHFSPSVLMPTCSPGKGTAVHTTAEDLWIQAKELVRRLKDNPQLDFEKDWKLITVFFSNTSQCHLCPSAQQKSHLMRHMEMLWGVLDYLHHEVPRAFVNLVDLSEVLAMDLQHQETGFSPAPEVCKCTETTTLSKAVMQWSYQEAWEDLLASSKFNKHETFAVVFQPFFDEIEPPLKRSSPQDPTTLALRIWNSMMEPVGQKDGLLNTAERKTMKCPSEESPYLFTYKNSNYQARRLKPITKLQMKEGSEFTCPDKNPSNSIPTTVHSLRPADIKIIGALGDSLTAGNGAGASPWNILDVLTEYRGLSWSVGGDETIKTVTTLPNILREFNPSLKGFSVGTGKESTSRASFNQAVAGAKSDGLAGQARKLVDLMKADKTINFQEDWKIITVFIGGNDLCASCSNSTRFSPQNFIDNIKNALDILHAEVPRAFVNMAMVMEITPLRELFNEPTVSCPRNILSRLCPCVLGLGDNSEELSSLVQRNRDYQKKTEELINSGRYDTRDNFTVVVQPLFENVSMPRTPEGVPDKSFFAPDCFHFNAKTHARSAIALWKNMLEPVGHKTRHNNFEIKAPIVCPNQASPFLSTTKNSNLGNGTWMVCEERAPSASPPTSVHTLRPADIQVVAALGDSLTAGNGISSQEGNLTDVSTQYRGLSYSAGGDKTLENVTTLPNILRKFNGNLTGYSVGTGDSSSANAFLNQAVPGAKAENLTSQVRTLVQKMKSDNRVNFNRDWKVITVMIGASDLCDFCTDSNHYSAANFFDHLQNALDILHKEVPRALVNLVDFINPSIIREVFLKNPDKCPVNQSSVLCNCVLTPRKDSYELARLEAFTKSYQSSMLQLVESGRYDTREDFSVVLQPFLLNTKLPVLENGKPDTSFFAPDCIHLNQKFHTQLARALWANMLEPLGKKTDTLDPKGHISLACPTKDQPFLRTFRNSNYKYPTKPAIENWGSDFLCTEKSPSSQVPTSVHELRPADIKVVAAMGDFLTTATGARPSGYKRLATPWRGLSWSIGGDGKLETHTTLPNILKKFNPSITGFSTGTLDNKAGLNVAEEGARAQDMPAQAKTLVKKMKSTPTINLQEDWKLITLLIGNNDLCLYCENPEDNSTKEYVKYIQQALDILYEELPRVFINVVEVMELAGLHHVQGGKCAMPLAVQKNCSCLRHSQNLTAMQELKKLNWNLQSGISELSYWHRYMEREDFAVTVQPFFRNTFIPLNEREGLDLTFFSEDCFYFSDRGHAEMAIALWNNMLEPVGWKTSSNNFIYNRTKLKCPSPERPFLYTLRNSQLLPDKAEEPSNALYWAVPVAAIGGLAVGILGVMLWRTVKPVQQEEEEEDTLPNTSVTQDAVSEKRLKAGN 2 1420 1442 LPDKAEEPSN ALYWAVPVAAIGGLAVGILGVML WRTVKPVQQE +Q9Z239 MASPGHILALCVCLLSMASAEAPQEPDPFTYDYHTLRIGGLTIAGILFILGILIILSKRCRCKFNQQQRTGEPDEEEGTFRSSIRRLSSRRR 2 35 57 EPDPFTYDYH TLRIGGLTIAGILFILGILIILS KRCRCKFNQQ +O75051 MEQRRPWPRALEVDSRSVVLLSVVWVLLAPPAAGMPQFSTFHSENRDWTFNHLTVHQGTGAVYVGAINRVYKLTGNLTIQVAHKTGPEEDNKSCYPPLIVQPCSEVLTLTNNVNKLLIIDYSENRLLACGSLYQGVCKLLRLDDLFILVEPSHKKEHYLSSVNKTGTMYGVIVRSEGEDGKLFIGTAVDGKQDYFPTLSSRKLPRDPESSAMLDYELHSDFVSSLIKIPSDTLALVSHFDIFYIYGFASGGFVYFLTVQPETPEGVAINSAGDLFYTSRIVRLCKDDPKFHSYVSLPFGCTRAGVEYRLLQAAYLAKPGDSLAQAFNITSQDDVLFAIFSKGQKQYHHPPDDSALCAFPIRAINLQIKERLQSCYQGEGNLELNWLLGKDVQCTKAPVPIDDNFCGLDINQPLGGSTPVEGLTLYTTSRDRMTSVASYVYNGYSVVFVGTKSGKLKKIRADGPPHGGVQYEMVSVLKDGSPILRDMAFSIDQRYLYVMSERQVTRVPVESCEQYTTCGECLSSGDPHCGWCALHNMCSRRDKCQQAWEPNRFAASISQCVSLAVHPSSISVSEHSRLLSLVVSDAPDLSAGIACAFGNLTEVEGQVSGSQVICISPGPKDVPVIPLDQDWFGLELQLRSKETGKIFVSTEFKFYNCSAHQLCLSCVNSAFRCHWCKYRNLCTHDPTTCSFQEGRINISEDCPQLVPTEEILIPVGEVKPITLKARNLPQPQSGQRGYECVLNIQGAIHRVPALRFNSSSVQCQNSSYQYDGMDISNLAVDFAVVWNGNFIIDNPQDLKVHLYKCAAQRESCGLCLKADRKFECGWCSGERRCTLHQHCTSPSSPWLDWSSHNVKCSNPQITEILTVSGPPEGGTRVTIHGVNLGLDFSEIAHHVQVAGVPCTPLPGEYIIAEQIVCEMGHALVGTTSGPVRLCIGECKPEFMTKSHQQYTFVNPSVLSLNPIRGPESGGTMVTITGHYLGAGSSVAVYLGNQTCEFYGRSMSEIVCVSPPSSNGLGPVPVSVSVDRAHVDSNLQFEYIDDPRVQRIEPEWSIASGHTPLTITGFNLDVIQEPRIRVKFNGKESVNVCKVVNTTTLTCLAPSLTTDYRPGLDTVERPDEFGFVFNNVQSLLIYNDTKFIYYPNPTFELLSPTGVLDQKPGSPIILKGKNLCPPASGGAKLNYTVLIGETPCAVTVSETQLLCEPPNLTGQHKVMVHVGGMVFSPGSVSVISDSLLTLPAIVSIAAGGSLLLIIVIIVLIAYKRKSRENDLTLKRLQMQMDNLESRVALECKEAFAELQTDINELTSDLDRSGIPYLDYRTYAMRVLFPGIEDHPVLRELEVQGNGQQHVEKALKLFAQLINNKVFLLTFIRTLELQRSFSMRDRGNVASLIMTGLQGRLEYATDVLKQLLSDLIDKNLENKNHPKLLLRRTESVAEKMLTNWFAFLLHKFLKECAGEPLFMLYCAIKQQMEKGPIDAITGEARYSLSEDKLIRQQIEYKTLILNCVNPDNENSPEIPVKVLNCDTITQVKEKILDAVYKNVPYSQRPRAVDMDLEWRQGRIARVVLQDEDITTKIEGDWKRLNTLMHYQVSDRSVVALVPKQTSSYNIPASASISRTSISRYDSSFRYTGSPDSLRSRAPMITPDLESGVKVWHLVKNHDHGDQKEGDRGSKMVSEIYLTRLLATKGTLQKFVDDLFETLFSTVHRGSALPLAIKYMFDFLDEQADRHSIHDTDVRHTWKSNCLPLRFWVNVIKNPQFVFDIHKGSITDACLSVVAQTFMDSCSTSEHRLGKDSPSNKLLYAKDIPSYKSWVERYYADIAKLPAISDQDMNAYLAEQSRLHAVEFNMLSALNEIYSYVSKYSEELIGALEQDEQARRQRLAYKVEQLINAMSIES 2 1238 1260 VISDSLLTLP AIVSIAAGGSLLLIIVIIVLIAY KRKSRENDLT +Q9QY40 MLTDFLQAPVMAPWSPFSLHLLLLFLPLLPLTRVHRFSVPNTSFNHLVLAPDQGKLYVGAVNHLFQLSPELKMESVAVTGPVIDSPDCVPFRDLAECPQAQLTDNANQLLLVSSRTQELVACGQVKQGVCEKRRLGDVTQVLYQAEDPGDGQFVAANTLGVTTVGLVVPLPGRDLLLVARGLAGKLSAGVPPLTVRQLAGPQPFSSEGLGRLVVGDFSDYNNSYVGAFSDAHSAYFVFRRRGARAQTEYRSYVARVCLRDVNLYSYVEMPLTCHGQGLIQAAFLTPDTLLGAFSAGTSQAQAALCAFPLADLDRSMEQARRLCYTTGGQGPSGMEEATVEYGVTSRCVTLPPDSPESYPCGDEHTPSPIAGRQPLEAQPLLQLGQSISAVAALQTDGHTIAFLGDTQGQLHKVFLNSSHGQVYHSQQVGPPGSAISPDLLVDSNGDHLYVLTAQQVDRILVAACPQFPNCTTCLQARDPLCGWCILQGRCTRRGECGRAAQPNHWLWSYEDNHCPYIQSLLPAQHPRQEQGQIILSVPRLPTLAMDEYFHCAFGGYNSLAQVEEPHVVCTTPPQDQMPPNPPGSDHVTLPLALMFEDVVLTATTFSFYDCSAVQALEVAAPCRACVSSLWRCHWCPQSSHCIYGEHCPEGEKAVYSAQEVDILVRGPEACPQVEGLASPQLVPVGWESHVTLHIQNLHYFQGLPALYHCWLELPGKLQKLPASLEETSRDSGLIHCQAQQFYPSMSQWELPVPIYVTRGEIQRLDNAGDLHVTLYDCAMGHPDCSHCQAANGSLSCLWCGDGQPACRYGPLCPPGAVEQLCPIPSIDVIEPLTGPPEGGLAITILGSNLGQAFNDVRNAVTVAGQPCNPDPSLYRISARIVCVTSPAPNGTAGPVQVAIKSRPPGISTQNFTYQDPVLLSLNPQWGPQAGGTQLTIHGQYLQTGGNISVFVGDQPCPIQEPVCPEAIICHTMPQTEPGEAVVLIVFGHVERKLLTTPFRYTANPQLVEAEPSVSFRGGGRVIRVRGTGLDVVWQPLLSVWLEDEPKVKALGVQAQDANPRRSCGAPAADPQACIHLESGLLQCSTLCSVNSSSLLLCHSPAVPDGALPKRVFFALDNMQVDFASASGGQGFLYQPNPRLAPLSHEGITHPYHLKPGHVLDVEGEGLNLGISKEEVQVHIGDGECLVKTLTLTHLYCEPPPQAPQPTNGSGTLPQFVVQMGNLRLALGPVQYEAESMMSTFPVEAQLGLGMGAAVLIAAVLLLTLMYRHKSKKALRDYQKVLVQLENLETGVGDQCRKEFTDLMTEMTDLTSDLEASGIPFLDYRTYAERAFFPGHVGCPLQPGLEGLGEEGRSVTVRQGLTQLSNLLNSKLFLLTLIHTLEEQPSFSQRDRCHVASLLSLALHSKLEYLTDIMRTLLGDLAAHYVHKNPKLMLRRTETMVEKLLTNWLSICLYTFLKEVAGEPLYMLFRAIKYQVDKGPVDAVTGKAKRTLNDSHLLREDVEFQPLTLMALVGPEADRAAGNSGVHRVPARVLDTDTITQVKEKVLDQIYKGTPFSQRPSVHSLDLEWRSGLAGHLTLSDEDLTSVTQNHWKRLNTLQHYKVPDGATVVLIPQVHNGGTVSQSLGQTGCPSGENTPMLEDGEEGGVRLWHLVKATEEAEGAKVRRSSLRDRERERSRAKAIPEIYLTRLLSMKGTLQKFVDDTFQAILSMNRPVPIAVKYLFDFLDELAEKHGIEDPETLHIWKTNSLLLRFWVNVLKNPQLIFDVQVSDNEDAILAVIAQTFIDSCMVSEHKVGRDSPVNKLLYAREIPRYKQMVEKYYADIRQSSPASYQEMNSALAELSGNYSSAPHCLEALRELYNHIHRYYDQIISALEEDPVAQKMQLACRLQQVAALVEYKVTDL 2 1244 1266 ESMMSTFPVE AQLGLGMGAAVLIAAVLLLTLMY RHKSKKALRD +Q9QZC2 MEVSRRKTPPRPPYPAAPLPLIAYLLALAAPARGADEPVWRSEQAIGAIAASRADGVFVASGSCLDQLDYSLKNRLSRLYRDQAGNCTEPVSLAPPARPRPGSSFSKLLLPYREGATGLEGLLLTGWTFDRGACEVRPLGNLNRSSLRNGTEVVSCHPQGSTAGVVYRASGTDLWYLAVAATYVLPEPETANRCNPAASDRDTAIALKNTEGRSLATQELGRLKLRGSAGSLHFVDAFLWNGSVYFPYYPYNYTSGAATGWPSMARIAQSTEVLFQGQAALDCDHGHPEGRRLLLSSSLVEAVDIWAGVFSAATGEGQERRSPATTALCLFRMSEIQAHARSCSWDFQATEHNCKEGDRPERVQPIASSTLIHSDLTSVYGTVVMNRTVLFLGTGDGQLLKVVLGENLTSNCPEVIYEIKEETPVFYKLVPHPMKNIYIYLTAGKEVRRIPVANCSKRKSCSECLAAADPHCGWCLPLQRCTFQGDCTHAGSFENWLDISSGPKKCPKIQILRSLRERTTVTIVGSISARHSECVVKNADTGKLLCQGRSQLNWTCACNIPSRPSYNVLVVNATFSFPSWNLSERFNFTNCASLKECPACIRSGCAWCKRDKKCIHPFTPCEPSDYERNQELCQVAVEKSPKDSGGGRVKESKRNRTDGAVQVFYIKAIEPQKISTLGKSNVIVTGANFTQASNITMILRGTSTCERDVIRVSHVLNDTHMKFSLPSSRKEMKDVCIQFDGGTCSSAGALSYIALPHCSLIVPATTWISGGQNITIMGRNFDVIDNLIISHELKGNANVNINVSEYCAATFCRFLAPNLKSSKVRTNVAVKLRVQDTYLDCGTLQYLEDPRFTGYRVESEIDTELEVKIQKENDNFNISKDDIDITLFHGENKQFNCSFENITRNQDLTTILCKIKSIKNANTIASSSKKVRVKLGNLELYVEQESVPSTWYFLIALPILLAIVIVVAVVVTRYKSKELSRKQSQQLELLESELRKEIRDGFAELQMDKLDVVDSFGTVPFLDYKHFALRTFFPESGGFTHIFTEDMHNRDANDKNESLTALDALICNKSFLVTVIHTLEKQKNFSVKDRCLFASFLTIALQTKLVYLTSILEVLTRDLMEQCSNMQPKLMLRRTESVVEKLLTNWMSVCLSGFLRETVGEPFYLLVTTLNQKINKGPVDVITCKALYTLNEDWLLWQVPEFNTVALNVVFEKIPENESADVCRNISVNVLDCDTIGQAKEKVFQAFLSKNGSPYGLQLNEIGLELQVGTRQKELLDIDSSSVILEDGITKLNTIGHYEISNGSTIKVFKKIANFTSDVEYSDDHCHLILPDSEAFQVVQGKRHRGKHKFKVKEMYLTKLLSTKVAIHSVLEKLFRSIWSLPNSRAPFAIKYFFDFLDAQAENKKITDPDVVHIWKTNSLPLRFWVNILKNPQFVFDIKKTPHIDSCLSVIAQAFMDAFSLTEQQLGKEAPTNKLLYAKDIPTYKEEVKSYYKAIRDLPPLSSLEMEEFLTQESKKHENEFNEEVALTEIYKYIVKYFDEILNKLERERGLEEAQKQLLHVKVLFDEKKKCKWM 2 950 972 LYVEQESVPS TWYFLIALPILLAIVIVVAVVVT RYKSKELSRK +Q8TEM1 MAARGRGLLLLTLSVLLAAGPSAAAAKLNIPKVLLPFTRATRVNFTLEASEGCYRWLSTRPEVASIEPLGLDEQQCSQKAVVQARLTQPARLTSIIFAEDITTGQVLRCDAIVDLIHDIQIVSTTRELYLEDSPLELKIQALDSEGNTFSTLAGLVFEWTIVKDSEADRFSDSHNALRILTFLESTYIPPSYISEMEKAAKQGDTILVSGMKTGSSKLKARIQEAVYKNVRPAEVRLLILENILLNPAYDVYLMVGTSIHYKVQKIRQGKITELSMPSDQYELQLQNSIPGPEGDPARPVAVLAQDTSMVTALQLGQSSLVLGHRSIRMQGASRLPNSTIYVVEPGYLGFTVHPGDRWVLETGRLYEITIEVFDKFSNKVYVSDNIRIETVLPAEFFEVLSSSQNGSYHRIRALKRGQTAIDAALTSVVDQDGGVHILQVPVWNQQEVEIHIPITLYPSILTFPWQPKTGAYQYTIRAHGGSGNFSWSSSSHLVATVTVKGVMTTGSDIGFSVIQAHDVQNPLHFGEMKVYVIEPHSMEFAPCQVEARVGQALELPLRISGLMPGGASEVVTLSDCSHFDLAVEVENQGVFQPLPGRLPPGSEHCSGIRVKAEAQGSTTLLVSYRHGHVHLSAKITIAAYLPLKAVDPSSVALVTLGSSKEMLFEGGPRPWILEPSKFFQNVTAEDTDSIGLALFAPHSSRNYQQHWILVTCQALGEQVIALSVGNKPSLTNPFPAVEPAVVKFVCAPPSRLTLAPVYTSPQLDMSCPLLQQNKQVVPVSSHRNPRLDLAAYDQEGRRFDNFSSLSIQWESTRPVLASIEPELPMQLVSQDDESGQKKLHGLQAILVHEASGTTAITATATGYQESHLSSARTKQPHDPLVPLSASIELILVEDVRVSPEEVTIYNHPGIQAELRIREGSGYFFLNTSTADVVKVAYQEARGVAMVHPLLPGSSTIMIHDLCLVFPAPAKAVVYVSDIQELYIRVVDKVEIGKTVKAYVRVLDLHKKPFLAKYFPFMDLKLRAASPIITLVALDEALDNYTITFLIRGVAIGQTSLTASVTNKAGQRINSAPQQIEVFPPFRLMPRKVTLLIGATMQVTSEGGPQPQSNILFSISNESVALVSAAGLVQGLAIGNGTVSGLVQAVDAETGKVVIISQDLVQVEVLLLRAVRIRAPIMRMRTGTQMPIYVTGITNHQNPFSFGNAVPGLTFHWSVTKRDVLDLRGRHHEASIRLPSQYNFAMNVLGRVKGRTGLRVVVKAVDPTSGQLYGLARELSDEIQVQVFEKLQLLNPEIEAEQILMSPNSYIKLQTNRDGAASLSYRVLDGPEKVPVVHVDEKGFLASGSMIGTSTIEVIAQEPFGANQTIIVAVKVSPVSYLRVSMSPVLHTQNKEALVAVPLGMTVTFTVHFHDNSGDVFHAHSSVLNFATNRDDFVQIGKGPTNNTCVVRTVSVGLTLLRVWDAEHPGLSDFMPLPVLQAISPELSGAMVVGDVLCLATVLTSLEGLSGTWSSSANSILHIDPKTGVAVARAVGSVTVYYEVAGHLRTYKEVVVSVPQRIMARHLHPIQTSFQEATASKVIVAVGDRSSNLRGECTPTQREVIQALHPETLISCQSQFKPAVFDFPSQDVFTVEPQFDTALGQYFCSITMHRLTDKQRKHLSMKKTALVVSASLSSSHFSTEQVGAEVPFSPGLFADQAEILLSNHYTSSEIRVFGAPEVLENLEVKSGSPAVLAFAKEKSFGWPSFITYTVGVLDPAAGSQGPLSTTLTFSSPVTNQAIAIPVTVAFVVDRRGPGPYGASLFQHFLDSYQVMFFTLFALLAGTAVMIIAYHTVCTPRDLAVPAALTPRASPGHSPHYFAASSPTSPNALPPARKASPPSGLWSPAYASH 2 1809 1831 LFQHFLDSYQ VMFFTLFALLAGTAVMIIAYHTV CTPRDLAVPA +O00592 MRCALALSALLLLLSTPPLLPSSPSPSPSPSQNATQTTTDSSNKTAPTPASSVTIMATDTAQQSTVPTSKANEILASVKATTLGVSSDSPGTTTLAQQVSGPVNTTVARGGGSGNPTTTIESPKSTKSADTTTVATSTATAKPNTTSSQNGAEDTTNSGGKSSHSVTTDLTSTKAEHLTTPHPTSPLSPRQPTSTHPVATPTSSGHDHLMKISSSSSTVAIPGYTFTSPGMTTTLLETVFHHVSQAGLELLTSGDLPTLASQSAGITASSVISQRTQQTSSQMPASSTAPSSQETVQPTSPATALRTPTLPETMSSSPTAASTTHRYPKTPSPTVAHESNWAKCEDLETQTQSEKQLVLNLTGNTLCAGGASDEKLISLICRAVKATFNPAQDKCGIRLASVPGSQTVVVKEITIHTKLPAKDVYERLKDKWDELKEAGVSDMKLGDQGPPEEAEDRFSMPLIITIVCMASFLLLVAALYGCCHQRLSQRKDQQRLTEELQTVENGYHDNPTLEVMETSSEMQEKKVVSLNGELGDSWIVPLDNLTKDDLDEEEDTHL 2 461 483 PEEAEDRFSM PLIITIVCMASFLLLVAALYGCC HQRLSQRKDQ +Q8N131 MGLGARGAWAALLLGTLQVLALLGAAHESAAMAASANIENSGLPHNSSANSTETLQHVPSDHTNETSNSTVKPPTSVASDSSNTTVTTMKPTAASNTTTPGMVSTNMTSTTLKSTPKTTSVSQNTSQISTSTMTVTHNSSVTSAASSVTITTTMHSEAKKGSKFDTGSFVGGIVLTLGVLSILYIGCKMYYSRRGIRYRTIDEHDAII 2 169 191 KKGSKFDTGS FVGGIVLTLGVLSILYIGCKMYY SRRGIRYRTI +P16471 MKENVASATVFTLLLFLNTCLLNGQLPPGKPEIFKCRSPNKETFTCWWRPGTDGGLPTNYSLTYHREGETLMHECPDYITGGPNSCHFGKQYTSMWRTYIMMVNATNQMGSSFSDELYVDVTYIVQPDPPLELAVEVKQPEDRKPYLWIKWSPPTLIDLKTGWFTLLYEIRLKPEKAAEWEIHFAGQQTEFKILSLHPGQKYLVQVRCKPDHGYWSAWSPATFIQIPSDFTMNDTTVWISVAVLSAVICLIIVWAVALKGYSMVTCIFPPVPGPKIKGFDAHLLEKGKSEELLSALGCQDFPPTSDYEDLLVEYLEVDDSEDQHLMSVHSKEHPSQGMKPTYLDPDTDSGRGSCDSPSLLSEKCEEPQANPSTFYDPEVIEKPENPETTHTWDPQCISMEGKIPYFHAGGSKCSTWPLPQPSQHNPRSSYHNITDVCELAVGPAGAPATLLNEAGKDALKSSQTIKSREEGKATQQREVESFHSETDQDTPWLLPQEKTPFGSAKPLDYVEIHKVNKDGALSLLPKQRENSGKPKKPGTPENNKEYAKVSGVMDNNILVLVPDPHAKNVACFEESAKEAPPSLEQNQAEKALANFTATSSKCRLQLGGLDYLDPACFTHSFH 2 236 258 IPSDFTMNDT TVWISVAVLSAVICLIIVWAVAL KGYSMVTCIF +P0DTF9 MCWLRAWGQILLPVFLSLFLIQLLISFSENGFIHSPRNNQKPRDGNEEECAVKKSCQLCTEDKKCVWCSEEKACKKYCFPYFGCRFSSIYWLNCKVDMFGIMMLLLIAVLITGFVWYCCAYHFYLQDLNRNRVYFYGRRETVPIHDRSATVYDE 2 98 120 SIYWLNCKVD MFGIMMLLLIAVLITGFVWYCCA YHFYLQDLNR +P18433 MDSWFILVLLGSGLICVSANNATTVAPSVGITRLINSSTAEPVKEEAKTSNPTSSLTSLSVAPTFSPNITLGPTYLTTVNSSDSDNGTTRTASTNSIGITISPNGTWLPDNQFTDARTEPWEGNSSTAATTPETFPPSGNSDSKDRRDETPIIAVMVALSSLLVIVFIIIVLYMLRFKKYKQAGSHSNSFRLSNGRTEDVEPQSVPLLARSPSTNRKYPPLPVDKLEEEINRRMADDNKLFREEFNALPACPIQATCEAASKEENKEKNRYVNILPYDHSRVHLTPVEGVPDSDYINASFINGYQEKNKFIAAQGPKEETVNDFWRMIWEQNTATIVMVTNLKERKECKCAQYWPDQGCWTYGNIRVSVEDVTVLVDYTVRKFCIQQVGDMTNRKPQRLITQFHFTSWPDFGVPFTPIGMLKFLKKVKACNPQYAGAIVVHCSAGVGRTGTFVVIDAMLDMMHTERKVDVYGFVSRIRAQRCQMVQTDMQYVFIYQALLEHYLYGDTELEVTSLETHLQKIYNKIPGTSNNGLEEEFKKLTSIKIQNDKMRTGNLPANMKKNRVLQIIPYEFNRVIIPVKRGEENTDYVNASFIDGYRQKDSYIASQGPLLHTIEDFWRMIWEWKSCSIVMLTELEERGQEKCAQYWPSDGLVSYGDITVELKKEEECESYTVRDLLVTNTRENKSRQIRQFHFHGWPEVGIPSDGKGMISIIAAVQKQQQQSGNHPITVHCSAGAGRTGTFCALSTVLERVKAEGILDVFQTVKSLRLQRPHMVQTLEQYEFCYKVVQEYIDAFSDYANFK 2 152 174 DSKDRRDETP IIAVMVALSSLLVIVFIIIVLYM LRFKKYKQAG +B2RU80 MLRHGALTALWITLSVVQTGVAEQVKCNFTLLESRVSSLSASIQWRTFASPCNFSLIYSSDTSGPMWCHPIRIDNFTYGCNPKDLQAGTVYNFRIVSLDGEESTLVLQTDPLPPARFEVNREKTASTTLQVRWTPSSGKVSWYEVQLFDHNNQKIQEVQVQESTTWSQYTFLNLTEGNSYKVAITAVSGEKRSFPVYINGSTVPSPVKDLGISPNPNSLLISWSRGSGNVEQYRLVLMDKGAIVQDTNVDRRDTSYAFHELTPGHLYNLTIVTMASGLQNSRWKLVRTAPMEVSNLKVTNDGRLTSLNVKWQKPPGDVDSYSITLSHQGTIKESKTLAPPVTETQFKDLVPGRLYQVTISCISGELSAEKSAAGRTVPEKVRNLVSYNEIWMKSFTVNWTPPAGDWEHYRIVLFNESLVLLNTTVGKEETHYALDGLELIPGRQYEIEVIVESGNLRNSERCQGRTVPLAVLQLRVKHANETSLGITWRAPLGEWEKYIISLMDRELLVIHKSLSKDAKEFTFTDLMPGRNYKATVTSMSGDLKQSSSIKGRTVPAQVTDLHVNNQGMTSSLFTNWTKALGDVEFYQVLLIHENVVVKNESVSSDTSRYSFRALKPGSLYSVVVTTVSGGISSRQVVAEGRTVPSSVSGVTVNNSGRNDYLSVSWLPAPGEVDHYVVSLSHEGKVDQFLIIAKSVSECSFSSLTPGRLYNVTVTTKSGNYASHSFTEERTVPDKVQGISVSNSARSDYLKVSWVHATGDFDHYEVTIKNRESFIQTKTIPKSENECEFIELVPGRLYSVTVSTKSGQYEASEQGTGRTIPEPVKDLTLLNRSTEDLHVTWSRANGDVDQYEVQLLFNDMKVFPHIHLVNTATEYKFTALTPGRHYKILVLTISGDVQQSAFIEGLTVPSTVKNIHISANGATDRLMVTWSPGGGDVDSYVVSAFRQDEKVDSQTIPKHASEHTFHRLEAGAKYRIAIVSVSGSLRNQIDALGQTVPASVQGVVAANAYSSNSLTVSWQKALGVAERYDILLLNENGLLLSNVSEPATARQHKFEDLTPGKKYKMQILTVSGGLFSKESQAEGRTVPAAVTNLRITENSSRYLSFGWTASEGELSWYNIFLYNPDRTLQERAQVDPLVQSFSFQNLLQGRMYKMVIVTHSGELSNESFIFGRTVPAAVNHLKGSHRNTTDSLWFSWSPASGDFDFYELILYNPNGTKKENWKEKDVTEWRFQGLVPGRKYTLYVVTHSGDLSNKVTGEGRTAPSPPSLLSFADVANTSLAITWKGPPDWTDYNDFELQWFPGDALTIFNPYSSRKSEGRIVYGLHPGRSYQFSVKTVSGDSWKTYSKPISGSVRTKPDKIQNLHCRPQNSTAIACSWIPPDSDFDGYSIECRKMDTQEIEFSRKLEKEKSLLNIMMLVPHKRYLVSIKVQSAGMTSEVVEDSTITMIDRPPQPPPHIRVNEKDVLISKSSINFTVNCSWFSDTNGAVKYFAVVVREADSMDELKPEQQHPLPSYLEYRHNASIRVYQTNYFASKCAESPDSSSKSFNIKLGAEMDSLGGKCDPSQQKFCDGPLKPHTAYRISIRAFTQLFDEDLKEFTKPLYSDTFFSMPITTESEPLFGVIEGVSAGLFLIGMLVALVAFFICRQKASHSRERPSARLSIRRDRPLSVHLNLGQKGNRKTSCPIKINQFEGHFMKLQADSNYLLSKEYEDLKDVGRSQSCDIALLPENRGKNRYNNILPYDASRVKLCNVDDDPCSDYINASYIPGNNFRREYIATQGPLPGTKDDFWKMAWEQNVHNIVMVTQCVEKGRVKCDHYWPADQDPLYYGDLILQMVSESVLPEWTIREFKICSEEQLDAHRLIRHFHYTVWPDHGVPETTQSLIQFVRTVRDYINRSPGAGPTVVHCSAGVGRTGTFVALDRILQQLDSKDSVDIYGAVHDLRLHRVHMVQTECQYVYLHQCVRDVLRAKKLRNEQENPLFPIYENVNPEYHRDAIYSRH 2 1620 1642 ITTESEPLFG VIEGVSAGLFLIGMLVALVAFFI CRQKASHSRE +P08575 MTMYLWLKLLAFGFAFLDTEVFVTGQSPTPSPTGLTTAKMPSVPLSSDPLPTHTTAFSPASTFERENDFSETTTSLSPDNTSTQVSPDSLDNASAFNTTGVSSVQTPHLPTHADSQTPSAGTDTQTFSGSAANAKLNPTPGSNAISDVPGERSTASTFPTDPVSPLTTTLSLAHHSSAALPARTSNTTITANTSDAYLNASETTTLSPSGSAVISTTTIATTPSKPTCDEKYANITVDYLYNKETKLFTAKLNVNENVECGNNTCTNNEVHNLTECKNASVSISHNSCTAPDKTLILDVPPGVEKFQLHDCTQVEKADTTICLKWKNIETFTCDTQNITYRFQCGNMIFDNKEIKLENLEPEHEYKCDSEILYNNHKFTNASKIIKTDFGSPGEPQIIFCRSEAAHQGVITWNPPQRSFHNFTLCYIKETEKDCLNLDKNLIKYDLQNLKPYTKYVLSLHAYIIAKVQRNGSAAMCHFTTKSAPPSQVWNMTVSMTSDNSMHVKCRPPRDRNGPHERYHLEVEAGNTLVRNESHKNCDFRVKDLQYSTDYTFKAYFHNGDYPGEPFILHHSTSYNSKALIAFLAFLIIVTSIALLVVLYKIYDLHKKRSCNLDEQQELVERDDEKQLMNVEPIHADILLETYKRKIADEGRLFLAEFQSIPRVFSKFPIKEARKPFNQNKNRYVDILPYDYNRVELSEINGDAGSNYINASYIDGFKEPRKYIAAQGPRDETVDDFWRMIWEQKATVIVMVTRCEEGNRNKCAEYWPSMEEGTRAFGDVVVKINQHKRCPDYIIQKLNIVNKKEKATGREVTHIQFTSWPDHGVPEDPHLLLKLRRRVNAFSNFFSGPIVVHCSAGVGRTGTYIGIDAMLEGLEAENKVDVYGYVVKLRRQRCLMVQVEAQYILIHQALVEYNQFGETEVNLSELHPYLHNMKKRDPPSEPSPLEAEFQRLPSYRSWRTQHIGNQEENKSKNRNSNVIPYDYNRVPLKHELEMSKESEHDSDESSDDDSDSEEPSKYINASFIMSYWKPEVMIAAQGPLKETIGDFWQMIFQRKVKVIVMLTELKHGDQEICAQYWGEGKQTYGDIEVDLKDTDKSSTYTLRVFELRHSKRKDSRTVYQYQYTNWSVEQLPAEPKELISMIQVVKQKLPQKNSSEGNKHHKSTPLLIHCRDGSQQTGIFCALLNLLESAETEEVVDIFQVVKALRKARPGMVSTFEQYQFLYDVIASTYPAQNGQVKKNNHQEDKIEFDNEVDKVKQDANCVNPLGAPEKLPEAKEQAEGSEPTSGTEGPEHSVNGPASPALNQGS 2 580 602 HSTSYNSKAL IAFLAFLIIVTSIALLVVLYKIY DLHKKRSCNL +P23469 MEPLCPLLLVGFSLPLARALRGNETTADSNETTTTSGPPDPGASQPLLAWLLLPLLLLLLVLLLAAYFFRFRKQRKAVVSTSDKKMPNGILEEQEQQRVMLLSRSPSGPKKYFPIPVEHLEEEIRIRSADDCKQFREEFNSLPSGHIQGTFELANKEENREKNRYPNILPNDHSRVILSQLDGIPCSDYINASYIDGYKEKNKFIAAQGPKQETVNDFWRMVWEQKSATIVMLTNLKERKEEKCHQYWPDQGCWTYGNIRVCVEDCVVLVDYTIRKFCIQPQLPDGCKAPRLVSQLHFTSWPDFGVPFTPIGMLKFLKKVKTLNPVHAGPIVVHCSAGVGRTGTFIVIDAMMAMMHAEQKVDVFEFVSRIRNQRPQMVQTDMQYTFIYQALLEYYLYGDTELDVSSLEKHLQTMHGTTTHFDKIGLEEEFRKLTNVRIMKENMRTGNLPANMKKARVIQIIPYDFNRVILSMKRGQEYTDYINASFIDGYRQKDYFIATQGPLAHTVEDFWRMIWEWKSHTIVMLTEVQEREQDKCYQYWPTEGSVTHGEITIEIKNDTLSEAISIRDFLVTLNQPQARQEEQVRVVRQFHFHGWPEIGIPAEGKGMIDLIAAVQKQQQQTGNHPITVHCSAGAGRTGTFIALSNILERVKAEGLLDVFQAVKSLRLQRPHMVQTLEQYEFCYKVVQDFIDIFSDYANFK 2 47 69 GPPDPGASQP LLAWLLLPLLLLLLVLLLAAYFF RFRKQRKAVV +P23470 MRRLLEPCWWILFLKITSSVLHYVVCFPALTEGYVGALHENRHGSAVQIRRRKASGDPYWAYSGAYGPEHWVTSSVSCGGRHQSPIDILDQYARVGEEYQELQLDGFDNESSNKTWMKNTGKTVAILLKDDYFVSGAGLPGRFKAEKVEFHWGHSNGSAGSEHSINGRRFPVEMQIFFYNPDDFDSFQTAISENRIIGAMAIFFQVSPRDNSALDPIIHGLKGVVHHEKETFLDPFVLRDLLPASLGSYYRYTGSLTTPPCSEIVEWIVFRRPVPISYHQLEAFYSIFTTEQQDHVKSVEYLRNNFRPQQRLHDRVVSKSAVRDSWNHDMTDFLENPLGTEASKVCSSPPIHMKVQPLNQTALQVSWSQPETIYHPPIMNYMISYSWTKNEDEKEKTFTKDSDKDLKATISHVSPDSLYLFRVQAVCRNDMRSDFSQTMLFQANTTRIFQGTRIVKTGVPTASPASSADMAPISSGSSTWTSSGIPFSFVSMATGMGPSSSGSQATVASVVTSTLLAGLGFGGGGISSFPSTVWPTRLPTAASASKQAARPVLATTEALASPGPDGDSSPTKDGEGTEEGEKDEKSESEDGEREHEEDGEKDSEKKEKSGVTHAAEERNQTEPSPTPSSPNRTAEGGHQTIPGHEQDHTAVPTDQTGGRRDAGPGLDPDMVTSTQVPPTATEEQYAGSDPKRPEMPSKKPMSRGDRFSEDSRFITVNPAEKNTSGMISRPAPGRMEWIIPLIVVSALTFVCLILLIAVLVYWRGCNKIKSKGFPRRFREVPSSGERGEKGSRKCFQTAHFYVEDSSSPRVVPNESIPIIPIPDDMEAIPVKQFVKHIGELYSNNQHGFSEDFEEVQRCTADMNITAEHSNHPENKHKNRYINILAYDHSRVKLRPLPGKDSKHSDYINANYVDGYNKAKAYIATQGPLKSTFEDFWRMIWEQNTGIIVMITNLVEKGRRKCDQYWPTENSEEYGNIIVTLKSTKIHACYTVRRFSIRNTKVKKGQKGNPKGRQNERVVIQYHYTQWPDMGVPEYALPVLTFVRRSSAARMPETGPVLVHCSAGVGRTGTYIVIDSMLQQIKDKSTVNVLGFLKHIRTQRNYLVQTEEQYIFIHDALLEAILGKETEVSSNQLHSYVNSILIPGVGGKTRLEKQFKLVTQCNAKYVECFSAQKECNKEKNRNSSVVPSERARVGLAPLPGMKGTDYINASYIMGYYRSNEFIITQHPLPHTTKDFWRMIWDHNAQIIVMLPDNQSLAEDEFVYWPSREESMNCEAFTVTLISKDRLCLSNEEQIIIHDFILEATQDDYVLEVRHFQCPKWPNPDAPISSTFELINVIKEEALTRDGPTIVHDEYGAVSAGMLCALTTLSQQLENENAVDVFQVAKMINLMRPGVFTDIEQYQFIYKAMLSLVSTKENGNGPMTVDKNGAVLIADESDPAESMESLV 2 737 759 ISRPAPGRME WIIPLIVVSALTFVCLILLIAVL VYWRGCNKIK +Q12913 MKPAAREARLPPRSPGLRWALPLLLLLLRLGQILCAGGTPSPIPDPSVATVATGENGITQISSTAESFHKQNGTGTPQVETNTSEDGESSGANDSLRTPEQGSNGTDGASQKTPSSTGPSPVFDIKAVSISPTNVILTWKSNDTAASEYKYVVKHKMENEKTITVVHQPWCNITGLRPATSYVFSITPGIGNETWGDPRVIKVITEPIPVSDLRVALTGVRKAALSWSNGNGTASCRVLLESIGSHEELTQDSRLQVNISGLKPGVQYNINPYLLQSNKTKGDPLGTEGGLDASNTERSRAGSPTAPVHDESLVGPVDPSSGQQSRDTEVLLVGLEPGTRYNATVYSQAANGTEGQPQAIEFRTNAIQVFDVTAVNISATSLTLIWKVSDNESSSNYTYKIHVAGETDSSNLNVSEPRAVIPGLRSSTFYNITVCPVLGDIEGTPGFLQVHTPPVPVSDFRVTVVSTTEIGLAWSSHDAESFQMHITQEGAGNSRVEITTNQSIIIGGLFPGTKYCFEIVPKGPNGTEGASRTVCNRTVPSAVFDIHVVYVTTTEMWLDWKSPDGASEYVYHLVIESKHGSNHTSTYDKAITLQGLIPGTLYNITISPEVDHVWGDPNSTAQYTRPSNVSNIDVSTNTTAATLSWQNFDDASPTYSYCLLIEKAGNSSNATQVVTDIGITDATVTELIPGSSYTVEIFAQVGDGIKSLEPGRKSFCTDPASMASFDCEVVPKEPALVLKWTCPPGANAGFELEVSSGAWNNATHLESCSSENGTEYRTEVTYLNFSTSYNISITTVSCGKMAAPTRNTCTTGITDPPPPDGSPNITSVSHNSVKVKFSGFEASHGPIKAYAVILTTGEAGHPSADVLKYTYEDFKKGASDTYVTYLIRTEEKGRSQSLSEVLKYEIDVGNESTTLGYYNGKLEPLGSYRACVAGFTNITFHPQNKGLIDGAESYVSFSRYSDAVSLPQDPGVICGAVFGCIFGALVIVTVGGFIFWRKKRKDAKNNEVSFSQIKPKKSKLIRVENFEAYFKKQQADSNCGFAEEYEDLKLVGISQPKYAAELAENRGKNRYNNVLPYDISRVKLSVQTHSTDDYINANYMPGYHSKKDFIATQGPLPNTLKDFWRMVWEKNVYAIIMLTKCVEQGRTKCEEYWPSKQAQDYGDITVAMTSEIVLPEWTIRDFTVKNIQTSESHPLRQFHFTSWPDHGVPDTTDLLINFRYLVRDYMKQSPPESPILVHCSAGVGRTGTFIAIDRLIYQIENENTVDVYGIVYDLRMHRPLMVQTEDQYVFLNQCVLDIVRSQKDSKVDLIYQNTTAMTIYENLAPVTTFGKTNGYIA 2 974 996 VSLPQDPGVI CGAVFGCIFGALVIVTVGGFIFW RKKRKDAKNN +Q16849 MRRPRRPGGLGGSGGLRLLLCLLLLSSRPGGCSAVSAHGCLFDRRLCSHLEVCIQDGLFGQCQVGVGQARPLLQVTSPVLQRLQGVLRQLMSQGLSWHDDLTQYVISQEMERIPRLRPPEPRPRDRSGLAPKRPGPAGELLLQDIPTGSAPAAQHRLPQPPVGKGGAGASSSLSPLQAELLPPLLEHLLLPPQPPHPSLSYEPALLQPYLFHQFGSRDGSRVSEGSPGMVSVGPLPKAEAPALFSRTASKGIFGDHPGHSYGDLPGPSPAQLFQDSGLLYLAQELPAPSRARVPRLPEQGSSSRAEDSPEGYEKEGLGDRGEKPASPAVQPDAALQRLAAVLAGYGVELRQLTPEQLSTLLTLLQLLPKGAGRNPGGVVNVGADIKKTMEGPVEGRDTAELPARTSPMPGHPTASPTSSEVQQVPSPVSSEPPKAARPPVTPVLLEKKSPLGQSQPTVAGQPSARPAAEEYGYIVTDQKPLSLAAGVKLLEILAEHVHMSSGSFINISVVGPALTFRIRHNEQNLSLADVTQQAGLVKSELEAQTGLQILQTGVGQREEAAAVLPQTAHSTSPMRSVLLTLVALAGVAGLLVALAVALCVRQHARQQDKERLAALGPEGAHGDTTFEYQDLCRQHMATKSLFNRAEGPPEPSRVSSVSSQFSDAAQASPSSHSSTPSWCEEPAQANMDISTGHMILAYMEDHLRNRDRLAKEWQALCAYQAEPNTCATAQGEGNIKKNRHPDFLPYDHARIKLKVESSPSRSDYINASPIIEHDPRMPAYIATQGPLSHTIADFWQMVWESGCTVIVMLTPLVEDGVKQCDRYWPDEGASLYHVYEVNLVSEHIWCEDFLVRSFYLKNVQTQETRTLTQFHFLSWPAEGTPASTRPLLDFRRKVNKCYRGRSCPIIVHCSDGAGRTGTYILIDMVLNRMAKGVKEIDIAATLEHVRDQRPGLVRSKDQFEFALTAVAEEVNAILKALPQ 2 577 599 TAHSTSPMRS VLLTLVALAGVAGLLVALAVALC VRQHARQQDK +E9Q612 MGHLPRGTLGGRRLLPLLGLFVLLKIVTTFHVAVQDDNNIVVSLEASDIVSPASVYVVRVAGESKNYFFEFEEFNSTLPPPVVFKATYHGLYYIITLVVVNGNVVTKPSRSITVLTKPLPVTSVSIYDYKPSPETGVLFEIHYPEKYNVFSRVNISYWEGRDFRTMLYKDFFKGKTVFNHWLPGLCYSNITFQLVSEATFNKSTLVEYSGVSHEPKQHRTAPYPPRNISVRFVNLNKNNWEEPSGSFPEDSFIKPPQDSIGRDRRFHFPEETPETPPSNVSSGSPPSNVSSAWPDPNSTDYESTSQPFWWDSASAAPENEEDFVSALPADYDTETTLDRTEKPTADPFSAFPVQMTLSWLPPKPPTAFDGFNILIEREENFTDYLTVDEEAHEFVAELKEPGKYKLSVTTFSSSGACETRKSQSAKSLSFYISPTGEWIEELTEKPQHVSVHVLSSTTALMSWTSSQENYNSTIVSVVSLTCQKQKESQRLEKQYCTQVNSSKPVIENLVPGAQYQVVMYLRKGPLIGPPSDPVTFAIVPTGIKDLMLYPLGPTAVVLSWTRPILGVFRKYVVEMFYFNPTTMTSEWTTYYEIAATVSLTASVRIASLLPAWYYNFRVTMVTWGDPELSCCDSSTISFITAPVAPEITSVEYFNSLLYISWTYGDATTDLSHSRMLHWMVVAEGRKKIKKSVTRNVMTAILSLPPGDIYNLSVTACTERGSNTSLPRLVKLEPAPPKSLFAVNKTQTSVTLLWVEEGVADFFEVFCQQLGSGHNGKLQEPVAVSSHVVTISSLLPATAYNCSVTSFSHDTPSVPTFIAVSTMVTEVNPNVVVISVLAILSTLLIGLLLVTLVILRKKHLQMARECGAGTFVNFASLEREGKLPYSWRRSVFALLTLLPSCLWTDYLLAFYINPWSKNGLKKRKLTNPVQLDDFDSYIKDMAKDSDYKFSLQFEELKLIGLDIPHFAADLPLNRCKNRYTNILPYDFSRVRLVSMNEEEGADYINANYIPGYNSPQEYIATQGPLPETRNDFWKMVLQQKSHIIVMLTQCNEKRRVKCDHYWPFTEEPIAYGDITVEMVSEEEEEDWASRHFRINYADEAQDVMHFNYTAWPDHGVPPANAAESILQFVFTVRQQAAKSKGPMIIHCSAGVGRTGTFIALDRLLQHIRDHEFVDILGLVSEMRSYRMSMVQTEEQYIFIHQCVQLMWLRKKQQFCISDVIYENVSKS 2 831 853 TMVTEVNPNV VVISVLAILSTLLIGLLLVTLVI LRKKHLQMAR +Q9UMZ3 MKKVPIKPEQPEKLRAFNISTHSFSLHWSLPSGHVERYQVDLVPDSGFVTIRDLGGGEYQVDVSNVVPGTRYDITISSISTTYTSPVTRIVTTNVTKPGPPVFLAGERVGSAGILLSWNTPPNPNGRIISYIVKYKEVCPWMQTVYTQVRSKPDSLEVLLTNLNPGTTYEIKVAAENSAGIGVFSDPFLFQTAESAPGKVVNLTVEAYNASAVKLIWYLPRQPNGKITSFKISVKHARSGIVVKDVSIRVEDILTGKLPECNENSESFLWSTASPSPTLGRVTPPSRTTHSSSTLTQNEISSVWKEPISFVVTHLRPYTTYLFEVSAVTTEAGYIDSTIVRTPESVPEGPPQNCVTGNITGKSFSILWDPPTIVTGKFSYRVELYGPSGRILDNSTKDLKFAFTNLTPFTMYDVYIAAETSAGTGPKSNISVFTPPDVPGAVFDLQLAEVESTQVRITWKKPRQPNGIINQYRVKVLVPETGIILENTLLTGNNEYINDPMAPEIVNIVEPMVGLYEGSAEMSSDLHSLATFIYNSHPDKNFPARNRAEDQTSPVVTTRNQYITDIAAEQLSYVIRRLVPFTEHMISVSAFTIMGEGPPTVLSVRTRQQVPSSIKIINYKNISSSSILLYWDPPEYPNGKITHYTIYAMELDTNRAFQITTIDNSFLITGLKKYTKYKMRVAASTHVGESSLSEENDIFVRTSEDEPESSPQDVEVIDVTADEIRLKWSPPEKPNGIIIAYEVLYKNIDTLYMKNTSTTDIILRNLRPHTLYNISVRSYTRFGHGNQVSSLLSVRTSETVPDSAPENITYKNISSGEIELSFLPPSSPNGIIKKYTIYLKRSNGNEERTINTTSLTQNIKVLKKYTQYIIEVSASTLKGEGVRSAPISILTEEDAPDSPPQDFSVKQLSGVTVKLSWQPPLEPNGIILYYTVYVWNRSSLKTINVTETSLELSDLDYNVEYSAYVTASTRFGDGKTRSNIISFQTPEGAPSDPPKDVYYANLSSSSIILFWTPPSKPNGIIQYYSVYYRNTSGTFMQNFTLHEVTNDFDNMTVSTIIDKLTIFSYYTFWLTASTSVGNGNKSSDIIEVYTDQDIPEGFVGNLTYESISSTAINVSWVPPAQPNGLVFYYVSLILQQTPRHVRPPLVTYERSIYFDNLEKYTDYILKITPSTEKGFSDTYTAQLYIKTEEDVPETSPIINTFKNLSSTSVLLSWDPPVKPNGAIISYDLTLQGPNENYSFITSDNYIILEELSPFTLYSFFAAARTRKGLGPSSILFFYTDESVPLAPPQNLTLINCTSDFVWLKWSPSPLPGGIVKVYSFKIHEHETDTIYYKNISGFKTEAKLVGLEPVSTYSIRVSAFTKVGNGNQFSNVVKFTTQESVPDVVQNMQCMATSWQSVLVKWDPPKKANGIITQYMVTVERNSTKVSPQDHMYTFIKLLANTSYVFKVRASTSAGEGDESTCHVSTLPETVPSVPTNIAFSDVQSTSATLTWIRPDTILGYFQNYKITTQLRAQKCKEWESEECVEYQKIQYLYEAHLTEETVYGLKKFRWYRFQVAASTNAGYGNASNWISTKTLPGPPDGPPENVHVVATSPFSISISWSEPAVITGPTCYLIDVKSVDNDEFNISFIKSNEENKTIEIKDLEIFTRYSVVITAFTGNISAAYVEGKSSAEMIVTTLESAPKDPPNNMTFQKIPDEVTKFQLTFLPPSQPNGNIQVYQALVYREDDPTAVQIHNLSIIQKTNTFVIAMLEGLKGGHTYNISVYAVNSAGAGPKVPMRITMDIKAPARPKTKPTPIYDATGKLLVTSTTITIRMPICYYSDDHGPIKNVQVLVTETGAQHDGNVTKWYDAYFNKARPYFTNEGFPNPPCTEGKTKFSGNEEIYIIGADNACMIPGNEDKICNGPLKPKKQYLFKFRATNIMGQFTDSDYSDPVKTLGEGLSERTVEIILSVTLCILSIILLGTAIFAFARIRQKQKEGGTYSPQDAEIIDTKLKLDQLITVADLELKDERLTRPISKKSFLQHVEELCTNNNLKFQEEFSELPKFLQDLSSTDADLPWNRAKNRFPNIKPYNNNRVKLIADASVPGSDYINASYISGYLCPNEFIATQGPLPGTVGDFWRMVWETRAKTLVMLTQCFEKGRIRCHQYWPEDNKPVTVFGDIVITKLMEDVQIDWTIRDLKIERHGDCMTVRQCNFTAWPEHGVPENSAPLIHFVKLVRASRAHDTTPMIVHCSAGVGRTGVFIALDHLTQHINDHDFVDIYGLVAELRSERMCMVQNLAQYIFLHQCILDLLSNKGSNQPICFVNYSALQKMDSLDAMEGDVELEWEETTM 2 1948 1970 GEGLSERTVE IILSVTLCILSIILLGTAIFAFA RIRQKQKEGG +Q15256 MRRAVCFPALCLLLNLHAAGCFSGNNDHFLAINQKKSGKPVFIYKHSQDIEKSLDIAPQKIYRHSYHSSSEAQVSKRHQIVNSAFPRPAYDPSLNLLAMDGQDLEVENLPIPAANVIVVTLQMDVNKLNITLLRIFRQGVAAALGLLPQQVHINRLIGKKNSIELFVSPINRKTGISDALPSEEVLRSLNINVLHQSLSQFGITEVSPEKNVLQGQHEADKIWSKEGFYAVVIFLSIFVIIVTCLMILYRLKERFQLSLRQDKEKNQEIHLSPITLQPALSEAKTVHSMVQPEQAPKVLNVVVDPQGRGAPEIKATTATSVCPSPFKMKPIGLQERRGSNVSLTLDMSSLGNIEPFVSIPTPREKVAMEYLQSASRILTRSQLRDVVASSHLLQSEFMEIPMNFVDPKEIDIPRHGTKNRYKTILPNPLSRVCLRPKNVTDSLSTYINANYIRGYSGKEKAFIATQGPMINTVDDFWQMVWQEDSPVIVMITKLKEKNEKCVLYWPEKRGIYGKVEVLVISVNECDNYTIRNLVLKQGSHTQHVKHYWYTSWPDHKTPDSAQPLLQLMLDVEEDRLASQGRGPVVVHCSAGIGRTGCFIATSIGCQQLKEEGVVDALSIVCQLRMDRGGMVQTSEQYEFVHHALCLYESRLSAETVQ 2 227 249 HEADKIWSKE GFYAVVIFLSIFVIIVTCLMILY RLKERFQLSL +Q99M80 MGSLGGLALCLLRLLLLGLQRPPLPGAGAQSAAGGCSFDEHYSNCGYSVALGTNGFTWEQINTWEKPMLDPAVPTGSFMMVNSSGRASGQKAHLLLPTLKENDTHCIDFHYYFSSRDRSSPGALNVYVKVNGGPQGNPVWNVSGVVTEGWVKAELAISTFWPHFYQVIFESVSLKGHPGYIAVDEVRVLAHPCRKAPHFLRLQNVEVNVGQNATFQCIAGGKWSQHDKLWLQQWNGRDTALMVTRVVNHRRFSATVSVADTSQRSISKYRCVIRSDGGSGVSNYAELIVKEPPTPIAPPELLAVGATYLWIKPNANSIIGDGPIILKEVEYRTTTGTWAETHIVDSPNYKLWHLDPDVEYEIRVLLTRPGEGGTGPPGPPLTTRTKCADPVHGPQNVEIVDIRARQLTLQWEPFGYAVTRCHSYNLTVQYQYVFNQQQYEAEEVIQTSSHYTLRGLRPFMTIRLRLLLSNPEGRMESEELVVQTEEDVPGAVPLESIQGGPFEEKIYIQWKPPNETNGVITLYEINYKAVGSLDPSADLSSQRGKVFKLRNETHHLFVGLYPGTTYSFTIKASTAKGFGPPVTTRIATKISAPSMPEYDADTPLNETDTTITVMLKPAQSRGAPVSVYQLVVKEERLQKSRRAADIIECFSVPVSYRNASNLDSLHYFAAELKPSNLPVTQPFTVGDNKTYNGYWNPPLSPLKSYSIYFQALSKANGETKINCVRLATKGAPMGSAQVTPGTPLCLLTTASTQNSNTVEPEKQVDNTVKMAGVIAGLLMFIIILLGVMLTIKRRKLAKKQKETQSGAQREMGPVASTDKPTAKLGTNRNDEGFSSSSQDVNGFTDGSRGELSQPTLTIQTHPYRTCDPVEMSYPRDQFQPAIRVADLLQHITQMKRGQGYGFKEEYEALPEGQTASWDTAKEDENRNKNRYGNIISYDHSRVRLLVLDGDPHSDYINANYIDGYHRPRHYIATQGPMQETVKDFWRMIWQENSASIVMVTNLVEVGRVKCVRYWPDDTEVYGDIKVTLIETEPLAEYVIRTFTVQKKGYHEIRELRLFHFTSWPDHGVPCYATGLLGFVRQVKFLNPPEAGPIVVHCSAGAGRTGCFIAIDTMLDMAENEGVVDIFNCVRELRAQRVNLVQTEEQYVFVHDAILEACLCGNTAIPVCEFRSLYYNISRLDPQTNSSQIKDEFQTLNIVTPRVRPEDCSIGLLPRNHDKNRSMDVLPLDRCLPFLISVDGESSNYINAALMDSHKQPAAFVVTQHPLPNTVADFWRLVFDYNCSSVVMLNEMDTAQLCMQYWPEKTSGCYGPIQVEFVSADIDEDIIHRIFRICNMARPQDGYRIVQHLQYIGWPAYRDTPPSKRSLLKVVRRLEKWQEQYDGREGRTVVHCLNGGGRSGTFCAICSVCEMIQQQNIIDVFHIVKTLRNNKSNMVETLEQYKFVYEVALEYLSSF 2 772 791 KQVDNTVKMA GVIAGLLMFIIILLGVMLTI KRRKLAKKQK +Q92729 MARAQALVLALTFQLCAPETETPAAGCTFEEASDPAVPCEYSQAQYDDFQWEQVRIHPGTRAPADLPHGSYLMVNTSQHAPGQRAHVIFQSLSENDTHCVQFSYFLYSRDGHSPGTLGVYVRVNGGPLGSAVWNMTGSHGRQWHQAELAVSTFWPNEYQVLFEALISPDRRGYMGLDDILLLSYPCAKAPHFSRLGDVEVNAGQNASFQCMAAGRAAEAERFLLQRQSGALVPAAGVRHISHRRFLATFPLAAVSRAEQDLYRCVSQAPRGAGVSNFAELIVKEPPTPIAPPQLLRAGPTYLIIQLNTNSIIGDGPIVRKEIEYRMARGPWAEVHAVSLQTYKLWHLDPDTEYEISVLLTRPGDGGTGRPGPPLISRTKCAEPMRAPKGLAFAEIQARQLTLQWEPLGYNVTRCHTYTVSLCYHYTLGSSHNQTIRECVKTEQGVSRYTIKNLLPYRNVHVRLVLTNPEGRKEGKEVTFQTDEDVPSGIAAESLTFTPLEDMIFLKWEEPQEPNGLITQYEISYQSIESSDPAVNVPGPRRTISKLRNETYHVFSNLHPGTTYLFSVRARTGKGFGQAALTEITTNISAPSFDYADMPSPLGESENTITVLLRPAQGRGAPISVYQVIVEEERARRLRREPGGQDCFPVPLTFEAALARGLVHYFGAELAASSLPEAMPFTVGDNQTYRGFWNPPLEPRKAYLIYFQAASHLKGETRLNCIRIARKAACKESKRPLEVSQRSEEMGLILGICAGGLAVLILLLGAIIVIIRKGRDHYAYSYYPKPVNMTKATVNYRQEKTHMMSAVDRSFTDQSTLQEDERLGLSFMDTHGYSTRGDQRSGGVTEASSLLGGSPRRPCGRKGSPYHTGQLHPAVRVADLLQHINQMKTAEGYGFKQEYESFFEGWDATKKKDKVKGSRQEPMPAYDRHRVKLHPMLGDPNADYINANYIDGYHRSNHFIATQGPKPEMVYDFWRMVWQEHCSSIVMITKLVEVGRVKCSRYWPEDSDTYGDIKIMLVKTETLAEYVVRTFALERRGYSARHEVRQFHFTAWPEHGVPYHATGLLAFIRRVKASTPPDAGPIVIHCSAGTGRTGCYIVLDVMLDMAECEGVVDIYNCVKTLCSRRVNMIQTEEQYIFIHDAILEACLCGETTIPVSEFKATYKEMIRIDPQSNSSQLREEFQTLNSVTPPLDVEECSIALLPRNRDKNRSMDVLPPDRCLPFLISTDGDSNNYINAALTDSYTRSAAFIVTLHPLQSTTPDFWRLVYDYGCTSIVMLNQLNQSNSAWPCLQYWPEPGRQQYGLMEVEFMSGTADEDLVARVFRVQNISRLQEGHLLVRHFQFLRWSAYRDTPDSKKAFLHLLAEVDKWQAESGDGRTIVHCLNGGGRSGTFCACATVLEMIRCHNLVDVFFAAKTLRNYKPNMVETMDQYHFCYDVALEYLEGLESR 2 747 769 EVSQRSEEMG LILGICAGGLAVLILLLGAIIVI IRKGRDHYAY +P70289 MRPLILLAALLWLQDSLAQEDVCSSLDGSPDRQGGGPPLSVSVTSRGRPTSLFLSWVAAEPGGFDYALCLRAMNLSGFPEGQQLQAHTNESSFEFHGLVPGSRYQLELTVLRPCWQNVTITLTARTAPTVVRGLQLHSTGSPASLEASWSDASGDQDSYQLLLYHPESHTLACNVSVSPDTLSYNFGDLLPGSQYVLEVITWAGSLHAKTSILQWTEPVPPDHLRVRALGTSSLQAFWNSSEGATWFHLILTDLLEGTNLTKVVRQGISTHTFLRLSPGTPYQLKICAAAGPHQIWGPNATEWTYPSYPSDLVLTPLWNELWASWKAGQGARDGYVLKLSGPVENTTTLGPEECNAVFPGPLPPGHYTLGLRVLAGPYDAWVEGSIWLAESAARPMEVPGARLWLEGLEATKQPGRRALLYSVDAPGLLGNISVSSGATHVTFCGLVPGAHYRVDIASSMGDITQSLTGYTSPLPPQSLEIISRNSPSDLTIGWAPAPGQMEGYKVTWHQDGSQRSPGDLVDLGPDISSLTLKSLVPGSCYTVSAWAWSGNLSSDSQKIHSCTRPAPPTNLSLGFAHQPATLRASWCHPPGGRDAFQLRLYRLRPLTLESEKILSQEAQNFSWAQLPAGYEFQVQLSTLWGSEESGSANTTGWTPPSAPTLVNVTSEAPTQLHVSWVHAAGDRSSYQVTLYQESTRTATSIVGPKADSTSFWGLTPGTKYKVEAISWAGPLYTAAANVSAWTYPLTPNELLASMQAGSAVVNLAWPSGPLGRGTCHAQLSDAGHLSWEQPLSLGQDLLMLRNLIPGHTVSLSVKCRAGPLQASTHPLVLSVEPGPVEDVFCQPEATYLSLNWTMPTGDVAVCLVEVEQLVPGGSAHFVFQVNTSEDALLLPNLTPTTSYRLSLTVLGGNRQWSRAVTLVCTTSAEVWHPPELAEAPQVELGTGMGVTVTRGMFGKDDGQIQWYGIIATINMTLAQPSQEAINHTWYDHYYRGHDSYLALLFPNPFYPEPWAVPRSWTVPVGTEDCDNTQEICNGHLKPGFQYRFSIAAFSRLSSPETILAFSAFSEPQASISLVAMPLTVMMGTVVGCIIIVCAVLCLLCRRGLKGPRSEKNGFSQELMPYNLWRTHRPIPSHSFRQSYEAKSARAHQAFFQEFEELKEVGKDQPRLEAEHPANITKNRYPHVLPYDHSRVRLTQLSGEPHSDYINANFIPGYSHPQEIIATQGPLKKTVEDFWRLVWEQQVHVIIMLTVGMENGRVLCEHYWPVNSTPVTHGHITTHLLAEESEDEWTRREFQLQHGAEQKQRRVKQLQFTTWPDHSVPEAPSSLLAFVELVQEEVKATQGKGPILVHCSAGVGRTGTFVALLPAVRQLEEEQVVDVFNTVYILRLHRPLMIQTLSQYIFLHSCLLNKILEGPSDASDSGPIPVMNFAQACAKRAANANAGFLKEYRLLKQAIKDETGSLLPSPDYNQNSIASCHHSQEQLALVEESPADNMLAASLFPGGPSGRDHVVLTGSAGPKELWEMVWEHGAYVLVSLGLPDTKEKPQDIWPMEMQPIVTDMVTVHRVAESNTAGWPSTLIRVIHGDSGTERQVQCLQFPHCETGSELPANTLLTFLDAVGQCCSRGNSKKPGTLLSHSSKVTNQLSTFLAMEQLLQQAGTERTVDVFSVALKQTQACAVKTPTLEQYIYLYNCLNSALRNRLPRARK 2 1078 1100 QASISLVAMP LTVMMGTVVGCIIIVCAVLCLLC RRGLKGPRSE +P15151 MARAMAAAWPLLLVALLVLSWPPPGTGDVVVQAPTQVPGFLGDSVTLPCYLQVPNMEVTHVSQLTWARHGESGSMAVFHQTQGPSYSESKRLEFVAARLGAELRNASLRMFGLRVEDEGNYTCLFVTFPQGSRSVDIWLRVLAKPQNTAEVQKVQLTGEPVPMARCVSTGGRPPAQITWHSDLGGMPNTSQVPGFLSGTVTVTSLWILVPSSQVDGKNVTCKVEHESFEKPQLLTVNLTVYYPPEVSISGYDNNWYLGQNEATLTCDARSNPEPTGYNWSTTMGPLPPFAVAQGAQLLIRPVDKPINTTLICNVTNALGARQAELTVQVKEGPPSEHSGISRNAIIFLVLGILVFLILLGIGIYFYWSKCSREVLWHCHLCPSSTEHASASANGHVSYSAVSRENSSSQDPQTEGTR 2 345 367 SEHSGISRNA IIFLVLGILVFLILLGIGIYFYW SKCSREVLWH +Q9NXS2 MRSGGRGRPRLRLGERGLMEPLLPPKRRLLPRVRLLPLLLALAVGSAFYTIWSGWHRRTEELPLGRELRVPLIGSLPEARLRRVVGQLDPQRLWSTYLRPLLVVRTPGSPGNLQVRKFLEATLRSLTAGWHVELDPFTASTPLGPVDFGNVVATLDPRAARHLTLACHYDSKLFPPGSTPFVGATDSAVPCALLLELAQALDLELSRAKKQAAPVTLQLLFLDGEEALKEWGPKDSLYGSRHLAQLMESIPHSPGPTRIQAIELFMLLDLLGAPNPTFYSHFPRTVRWFHRLRSIEKRLHRLNLLQSHPQEVMYFQPGEPFGSVEDDHIPFLRRGVPVLHLISTPFPAVWHTPADTEVNLHPPTVHNLCRILAVFLAEYLGL 2 33 55 LPPKRRLLPR VRLLPLLLALAVGSAFYTIWSGW HRRTEELPLG +Q8TD07 MRRISLTSSPVRLLLFLLLLLIALEIMVGGHSLCFNFTIKSLSRPGQPWCEAQVFLNKNLFLQYNSDNNMVKPLGLLGKKVYATSTWGELTQTLGEVGRDLRMLLCDIKPQIKTSDPSTLQVEMFCQREAERCTGASWQFATNGEKSLLFDAMNMTWTVINHEASKIKETWKKDRGLEKYFRKLSKGDCDHWLREFLGHWEAMPEPTVSPVNASDIHWSSSSLPDRWIILGAFILLVLMGIVLICVWWQNGEWQAGLWPLRTS 2 226 248 IHWSSSSLPD RWIILGAFILLVLMGIVLICVWW QNGEWQAGLW +O75787 MAVFVVLLALVAGVLGNEFSILKSPGSVVFRNGNWPIPGERIPDVAALSMGFSVKEDLSWPGLAVGNLFHRPRATVMVMVKGVNKLALPPGSVISYPLENAVPFSLDSVANSIHSLFSEETPVVLQLAPSEERVYMVGKANSVFEDLSVTLRQLRNRLFQENSVLSSLPLNSLSRNNEVDLLFLSELQVLHDISSLLSRHKHLAKDHSPDLYSLELAGLDEIGKRYGEDSEQFRDASKILVDALQKFADDMYSLYGGNAVVELVTVKSFDTSLIRKTRTILEAKQAKNPASPYNLAYKYNFEYSVVFNMVLWIMIALALAVIITSYNIWNMDPGYDSIIYRMTNQKIRMD 2 309 331 YNFEYSVVFN MVLWIMIALALAVIITSYNIWNM DPGYDSIIYR +P07949 MAKATSGAAGLRLLLLLLLPLLGKVALGLYFSRDAYWEKLYVDQAAGTPLLYVHALRDAPEEVPSFRLGQHLYGTYRTRLHENNWICIQEDTGLLYLNRSLDHSSWEKLSVRNRGFPLLTVYLKVFLSPTSLREGECQWPGCARVYFSFFNTSFPACSSLKPRELCFPETRPSFRIRENRPPGTFHQFRLLPVQFLCPNISVAYRLLEGEGLPFRCAPDSLEVSTRWALDREQREKYELVAVCTVHAGAREEVVMVPFPVTVYDEDDSAPTFPAGVDTASAVVEFKRKEDTVVATLRVFDADVVPASGELVRRYTSTLLPGDTWAQQTFRVEHWPNETSVQANGSFVRATVHDYRLVLNRNLSISENRTMQLAVLVNDSDFQGPGAGVLLLHFNVSVLPVSLHLPSTYSLSVSRRARRFAQIGKVCVENCQAFSGINVQYKLHSSGANCSTLGVVTSAEDTSGILFVNDTKALRRPKCAELHYMVVATDQQTSRQAQAQLLVTVEGSYVAEEAGCPLSCAVSKRRLECEECGGLGSPTGRCEWRQGDGKGITRNFSTCSPSTKTCPDGHCDVVETQDINICPQDCLRGSIVGGHEPGEPRGIKAGYGTCNCFPEEEKCFCEPEDIQDPLCDELCRTVIAAAVLFSFIVSVLLSAFCIHCYHKFAHKPPISSAEMTFRRPAQAFPVSYSSSGARRPSLDSMENQVSVDAFKILEDPKWEFPRKNLVLGKTLGEGEFGKVVKATAFHLKGRAGYTTVAVKMLKENASPSELRDLLSEFNVLKQVNHPHVIKLYGACSQDGPLLLIVEYAKYGSLRGFLRESRKVGPGYLGSGGSRNSSSLDHPDERALTMGDLISFAWQISQGMQYLAEMKLVHRDLAARNILVAEGRKMKISDFGLSRDVYEEDSYVKRSQGRIPVKWMAIESLFDHIYTTQSDVWSFGVLLWEIVTLGGNPYPGIPPERLFNLLKTGHRMERPDNCSEEMYRLMLQCWKQEPDKRPVFADISKDLEKMMVKRRDYLDLAASTPSDSLIYDDGLSEEETPLVDCNNAPLPRALPSTWIENKLYGMSDPNWPGESPVPLTRADGTNTGFPRYPNDSVYANWMLSPSAAKLMDTFDS 2 13 32 KATSGAAGLR LLLLLLLPLLGKVALGLYFS RDAYWEKLYV +Q68DV7 MSGGHQLQLAALWPWLLMATLQAGFGRTGLVLAAAVESERSAEQKAIIRVIPLKMDPTGKLNLTLEGVFAGVAEITPAEGKLMQSHPLYLCNASDDDNLEPGFISIVKLESPRRAPRPCLSLASKARMAGERGASAVLFDITEDRAAAEQLQQPLGLTWPVVLIWGNDAEKLMEFVYKNQKAHVRIELKEPPAWPDYDVWILMTVVGTIFVIILASVLRIRCRPRHSRPDPLQQRTAWAISQLATRRYQASCRQARGEWPDSGSSCSSAPVCAICLEEFSEGQELRVISCLHEFHRNCVDPWLHQHRTCPLCMFNITEGDSFSQSLGPSRSYQEPGRRLHLIRQHPGHAHYHLPAAYLLGPSRSAVARPPRPGPFLPSQEPGMGPRHHRFPRAAHPRAPGEQQRLAGAQHPYAQGWGLSHLQSTSQHPAACPVPLRRARPPDSSGSGESYCTERSGYLADGPASDSSSGPCHGSSSDSVVNCTDISLQGVHGSSSTFCSSLSSDFDPLVYCSPKGDPQRVDMQPSVTSRPRSLDSVVPTGETQVSSHVHYHRHRHHHYKKRFQWHGRKPGPETGVPQSRPPIPRTQPQPEPPSPDQQVTRSNSAAPSGRLSNPQCPRALPEPAPGPVDASSICPSTSSLFNLQKSSLSARHPQRKRRGGPSEPTPGSRPQDATVHPACQIFPHYTPSVAYPWSPEAHPLICGPPGLDKRLLPETPGPCYSNSQPVWLCLTPRQPLEPHPPGEGPSEWSSDTAEGRPCPYPHCQVLSAQPGSEEELEELCEQAV 2 199 218 KEPPAWPDYD VWILMTVVGTIFVIILASVL RIRCRPRHSR +Q04912 MELLPPLPQSFLLLLLLPAKPAAGEDWQCPRTPYAASRDFDVKYVVPSFSAGGLVQAMVTYEGDRNESAVFVAIRNRLHVLGPDLKSVQSLATGPAGDPGCQTCAACGPGPHGPPGDTDTKVLVLDPALPALVSCGSSLQGRCFLHDLEPQGTAVHLAAPACLFSAHHNRPDDCPDCVASPLGTRVTVVEQGQASYFYVASSLDAAVAASFSPRSVSIRRLKADASGFAPGFVALSVLPKHLVSYSIEYVHSFHTGAFVYFLTVQPASVTDDPSALHTRLARLSATEPELGDYRELVLDCRFAPKRRRRGAPEGGQPYPVLRVAHSAPVGAQLATELSIAEGQEVLFGVFVTGKDGGPGVGPNSVVCAFPIDLLDTLIDEGVERCCESPVHPGLRRGLDFFQSPSFCPNPPGLEALSPNTSCRHFPLLVSSSFSRVDLFNGLLGPVQVTALYVTRLDNVTVAHMGTMDGRILQVELVRSLNYLLYVSNFSLGDSGQPVQRDVSRLGDHLLFASGDQVFQVPIQGPGCRHFLTCGRCLRAWHFMGCGWCGNMCGQQKECPGSWQQDHCPPKLTEFHPHSGPLRGSTRLTLCGSNFYLHPSGLVPEGTHQVTVGQSPCRPLPKDSSKLRPVPRKDFVEEFECELEPLGTQAVGPTNVSLTVTNMPPGKHFRVDGTSVLRGFSFMEPVLIAVQPLFGPRAGGTCLTLEGQSLSVGTSRAVLVNGTECLLARVSEGQLLCATPPGATVASVPLSLQVGGAQVPGSWTFQYREDPVVLSISPNCGYINSHITICGQHLTSAWHLVLSFHDGLRAVESRCERQLPEQQLCRLPEYVVRDPQGWVAGNLSARGDGAAGFTLPGFRFLPPPHPPSANLVPLKPEEHAIKFEYIGLGAVADCVGINVTVGGESCQHEFRGDMVVCPLPPSLQLGQDGAPLQVCVDGECHILGRVVRPGPDGVPQSTLLGILLPLLLLVAALATALVFSYWWRRKQLVLPPNLNDLASLDQTAGATPLPILYSGSDYRSGLALPAIDGLDSTTCVHGASFSDSEDESCVPLLRKESIQLRDLDSALLAEVKDVLIPHERVVTHSDRVIGKGHFGVVYHGEYIDQAQNRIQCAIKSLSRITEMQQVEAFLREGLLMRGLNHPNVLALIGIMLPPEGLPHVLLPYMCHGDLLQFIRSPQRNPTVKDLISFGLQVARGMEYLAEQKFVHRDLAARNCMLDESFTVKVADFGLARDILDREYYSVQQHRHARLPVKWMALESLQTYRFTTKSDVWSFGVLLWELLTRGAPPYRHIDPFDLTHFLAQGRRLPQPEYCPDSLYQVMQQCWEADPAVRPTFRVLVGEVEQIVSALLGDHYVQLPATYMNLGPSTSHEMNVRPEQPQFSPMPGNVRRPRPLSEPPRPT 2 960 982 PDGVPQSTLL GILLPLLLLVAALATALVFSYWW RRKQLVLPPN +Q01974 MARGSALPRRPLLCIPAVWAAAALLLSVSRTSGEVEVLDPNDPLGPLDGQDGPIPTLKGYFLNFLEPVNNITIVQGQTAILHCKVAGNPPPNVRWLKNDAPVVQEPRRIIIRKTEYGSRLRIQDLDTTDTGYYQCVATNGMKTITATGVLFVRLGPTHSPNHNFQDDYHEDGFCQPYRGIACARFIGNRTIYVDSLQMQGEIENRITAAFTMIGTSTHLSDQCSQFAIPSFCHFVFPLCDARSRTPKPRELCRDECEVLESDLCRQEYTIARSNPLILMRLQLPKCEALPMPESPDAANCMRIGIPAERLGRYHQCYNGSGMDYRGTASTTKSGHQCQPWALQHPHSHHLSSTDFPELGGGHAYCRNPGGQMEGPWCFTQNKNVRMELCDVPSCSPRDSSKMGILYILVPSIAIPLVIACLFFLVCMCRNKQKASASTPQRRQLMASPSQDMEMPLINQHKQAKLKEISLSAVRFMEELGEDRFGKVYKGHLFGPAPGEQTQAVAIKTLKDKAEGPLREEFRHEAMLRARLQHPNVVCLLGVVTKDQPLSMIFSYCSHGDLHEFLVMRSPHSDVGSTDDDRTVKSALEPPDFVHLVAQIAAGMEYLSSHHVVHKDLATRNVLVYDKLNVKISDLGLFREVYAADYYKLLGNSLLPIRWMAPEAIMYGKFSIDSDIWSYGVVLWEVFSYGLQPYCGYSNQDVVEMIRNRQVLPCPDDCPAWVYALMIECWNEFPSRRPRFKDIHSRLRAWGNLSNYNSSAQTSGASNTTQTSSLSTSPVSNVSNARYVGPKQKAPPFPQPQFIPMKGQIRPMVPPPQLYVPVNGYQPVPAYGAYLPNFYPVQIPMQMAPQQVPPQMVPKPSSHHSGSGSTSTGYVTTAPSNTSMADRAALLSEGADDTQNAPEDGAQSTVQEAEEEEEGSVPETELLGDCDTLQVDEAQVQLEA 2 403 425 SCSPRDSSKM GILYILVPSIAIPLVIACLFFLV CMCRNKQKAS +P08922 MKNIYCLIPKLVNFATLGCLWISVVQCTVLNSCLKSCVTNLGQQLDLGTPHNLSEPCIQGCHFWNSVDQKNCALKCRESCEVGCSSAEGAYEEEVLENADLPTAPFASSIGSHNMTLRWKSANFSGVKYIIQWKYAQLLGSWTYTKTVSRPSYVVKPLHPFTEYIFRVVWIFTAQLQLYSPPSPSYRTHPHGVPETAPLIRNIESSSPDTVEVSWDPPQFPGGPILGYNLRLISKNQKLDAGTQRTSFQFYSTLPNTIYRFSIAAVNEVGEGPEAESSITTSSSAVQQEEQWLFLSRKTSLRKRSLKHLVDEAHCLRLDAIYHNITGISVDVHQQIVYFSEGTLIWAKKAANMSDVSDLRIFYRGSGLISSISIDWLYQRMYFIMDELVCVCDLENCSNIEEITPPSISAPQKIVADSYNGYVFYLLRDGIYRADLPVPSGRCAEAVRIVESCTLKDFAIKPQAKRIIYFNDTAQVFMSTFLDGSASHLILPRIPFADVKSFACENNDFLVTDGKVIFQQDALSFNEFIVGCDLSHIEEFGFGNLVIFGSSSQLHPLPGRPQELSVLFGSHQALVQWKPPALAIGANVILISDIIELFELGPSAWQNWTYEVKVSTQDPPEVTHIFLNISGTMLNVPELQSAMKYKVSVRASSPKRPGPWSEPSVGTTLVPASEPPFIMAVKEDGLWSKPLNSFGPGEFLSSDIGNVSDMDWYNNSLYYSDTKGDVFVWLLNGTDISENYHLPSIAGAGALAFEWLGHFLYWAGKTYVIQRQSVLTGHTDIVTHVKLLVNDMVVDSVGGYLYWTTLYSVESTRLNGESSLVLQTQPWFSGKKVIALTLDLSDGLLYWLVQDSQCIHLYTAVLRGQSTGDTTITEFAAWSTSEISQNALMYYSGRLFWINGFRIITTQEIGQKTSVSVLEPARFNQFTIIQTSLKPLPGNFSFTPKVIPDSVQESSFRIEGNASSFQILWNGPPAVDWGVVFYSVEFSAHSKFLASEQHSLPVFTVEGLEPYALFNLSVTPYTYWGKGPKTSLSLRAPETVPSAPENPRIFILPSGKCCNKNEVVVEFRWNKPKHENGVLTKFEIFYNISNQSITNKTCEDWIAVNVTPSVMSFQLEGMSPRCFIAFQVRAFTSKGPGPYADVVKSTTSEINPFPHLITLLGNKIVFLDMDQNQVVWTFSAERVISAVCYTADNEMGYYAEGDSLFLLHLHNRSSSELFQDSLVFDITVITIDWISRHLYFALKESQNGMQVFDVDLEHKVKYPREVKIHNRNSTIISFSVYPLLSRLYWTEVSNFGYQMFYYSIISHTLHRILQPTATNQQNKRNQCSCNVTEFELSGAMAIDTSNLEKPLIYFAKAQEIWAMDLEGCQCWRVITVPAMLAGKTLVSLTVDGDLIYWIITAKDSTQIYQAKKGNGAIVSQVKALRSRHILAYSSVMQPFPDKAFLSLASDTVEPTILNATNTSLTIRLPLAKTNLTWYGITSPTPTYLVYYAEVNDRKNSSDLKYRILEFQDSIALIEDLQPFSTYMIQIAVKNYYSDPLEHLPPGKEIWGKTKNGVPEAVQLINTTVRSDTSLIISWRESHKPNGPKESVRYQLAISHLALIPETPLRQSEFPNGRLTLLVTRLSGGNIYVLKVLACHSEEMWCTESHPVTVEMFNTPEKPYSLVPENTSLQFNWKAPLNVNLIRFWVELQKWKYNEFYHVKTSCSQGPAYVCNITNLQPYTSYNVRVVVVYKTGENSTSLPESFKTKAGVPNKPGIPKLLEGSKNSIQWEKAEDNGCRITYYILEIRKSTSNNLQNQNLRWKMTFNGSCSSVCTWKSKNLKGIFQFRVVAANNLGFGEYSGISENIILVGDDFWIPETSFILTIIVGIFLVVTIPLTFVWHRRLKNQKSAKEGVTVLINEDKELAELRGLAAGVGLANACYAIHTLPTQEEIENLPAFPREKLTLRLLLGSGAFGEVYEGTAVDILGVGSGEIKVAVKTLKKGSTDQEKIEFLKEAHLMSKFNHPNILKQLGVCLLNEPQYIILELMEGGDLLTYLRKARMATFYGPLLTLVDLVDLCVDISKGCVYLERMHFIHRDLAARNCLVSVKDYTSPRIVKIGDFGLARDIYKNDYYRKRGEGLLPVRWMAPESLMDGIFTTQSDVWSFGILIWEILTLGHQPYPAHSNLDVLNYVQTGGRLEPPRNCPDDLWNLMTQCWAQEPDQRPTFHRIQDQLQLFRNFFLNSIYKSRDEANNSGVINESFEGEDGDVICLNSDDIMPVALMETKNREGLNYMVLATECGQGEEKSEGPLGSQESESCGLRKEEKEPHADKDFCQEKQVAYCPSGKPEGLNYACLTHSGYGDGSD 2 1860 1882 LVGDDFWIPE TSFILTIIVGIFLVVTIPLTFVW HRRLKNQKSA +P04843 MEAPAAGLFLLLLLGTWAPAPGSASSEAPPLINEDVKRTVDLSSHLAKVTAEVVLAHLGGGSTSRATSFLLALEPELEARLAHLGVQVKGEDEEENNLEVRETKIKGKSGRFFTVKLPVALDPGAKISVIVETVYTHVLHPYPTQITQSEKQFVVFEGNHYFYSPYPTKTQTMRVKLASRNVESYTKLGNPTRSEDLLDYGPFRDVPAYSQDTFKVHYENNSPFLTITSMTRVIEVSHWGNIAVEENVDLKHTGAVLKGPFSRYDYQRQPDSGISSIRSFKTILPAAAQDVYYRDEIGNVSTSHLLILDDSVEMEIRPRFPLFGGWKTHYIVGYNLPSYEYLYNLGDQYALKMRFVDHVFDEQVIDSLTVKIILPEGAKNIEIDSPYEISRAPDELHYTYLDTFGRPVIVAYKKNLVEQHIQDIVVHYTFNKVLMLQEPLLVVAAFYILFFTVIIYVRLDFSITKDPAAEARMKVACITEQVLTLVNKRIGLYRHFDETVNRYKQSRDISTLNSGKKSLETEHKALTSEIALLQSRLKTEGSDLCDRVSEMQKLDAQVKELVLKSAVEAERLVAGKLKKDTYIENEKLISGKRQELVTKIDHILDAL 2 440 459 FNKVLMLQEP LLVVAAFYILFFTVIIYVRL DFSITKDPAA +Q9HBV2 MSPRGTGCSAGLLMTVGWLLLAGLQSARGTNVTAAVQDAGLAHEGEGEEETENNDSETAENYAPPETEDVSNRNVVKEVEFGMCTVTCGIGVREVILTNGCPGGESKCVVRVEECRGPTDCGWGKPISESLESVRLACIHTSPLNRFKYMWKLLRQDQQSIILVNDSAILEVRKESHPLAFECDTLDNNEIVATIKFTVYTSSELQMRRSSLPATDAALIFVLTIGVIICVFIIFLLIFIIINWAAVKAFWGAKASTPEVQSEQSSVRYKDSTSLDQLPTEMPGEDDALSEWNE 2 217 239 MRRSSLPATD AALIFVLTIGVIICVFIIFLLIF IIINWAAVKA +Q96BY9 MAAACGPGAAGYCLLLGLHLFLLTAGPALGWNDPDRMLLRDVKALTLHYDRYTTSRRLDPIPQLKCVGGTAGCDSYTPKVIQCQNKGWDGYDVQWECKTDLDIAYKFGKTVVSCEGYESSEDQYVLRGSCGLEYNLDYTELGLQKLKESGKQHGFASFSDYYYKWSSADSCNMSGLITIVVLLGIAFVVYKLFLSDGQYSPPPYSEYPPFSHRYQRFTNSAGPPPPGFKSEFTGPQNTGHGATSGFGSAFTGQQGYENSGPGFWTGLGTGGILGYLFGSNRAATPFSDSWYYPSYPPSYPGTWNRAYSPLHGGSGSYSVCSNSDTKTRTASGYGGTRRR 2 172 194 YYKWSSADSC NMSGLITIVVLLGIAFVVYKLFL SDGQYSPPPY +P21583 MKKTQTWILTCIYLQLLLFNPLVKTEGICRNRVTNNVKDVTKLVANLPKDYMITLKYVPGMDVLPSHCWISEMVVQLSDSLTDLLDKFSNISEGLSNYSIIDKLVNIVDDLVECVKENSSKDLKKSFKSPEPRLFTPEEFFRIFNRSIDAFKDFVVASETSDCVVSSTLSPEKDSRVSVTKPFMLPPVAASSLRNDSSSSNRKAKNPPGDSSLHWAAMALPALFSLIIGFAFGALYWKKRQPSLTRAVENIQINEEDNEISMLQEKEREFQEV 2 215 237 KNPPGDSSLH WAAMALPALFSLIIGFAFGALYW KKRQPSLTRA +Q9JL59 MLAYSVTSSGLFPRMLWALLLLAASLNAHNDVWDEPCCTEHEVSVNRGSRVVMACNISNNLRDVTIELVTSEKTSIIFNHTPPGNYSKDSWQLHIQGVQAQLVITDAQDKHSGNYSWKLHGFQAEFKNFNLTVNAADRQKTEDLPVTKVPDKPPTAVRTEVIIIIAIATTIIITGIGVFVWYKQFPVAPQIQMSVPCLIHGSPGIPYLTLPP 2 160 182 PDKPPTAVRT EVIIIIAIATTIIITGIGVFVWY KQFPVAPQIQ +Q8WVN6 MQTCPLAFPGHVSQALGTLLFLAASLSAQNEGWDSPICTEGVVSVSWGENTVMSCNISNAFSHVNIKLRAHGQESAIFNEVAPGYFSRDGWQLQVQGGVAQLVIKGARDSHAGLYMWHLVGHQRNNRQVTLEVSGAEPQSAPDTGFWPVPAVVTAVFILLVALVMFAWYRCRCSQQRREKKFFLLEPQMKVAALRAGAQQGLSRASAELWTPDSEPTPRPLALVFKPSPLGALELLSPQPLFPYAADP 2 146 168 AEPQSAPDTG FWPVPAVVTAVFILLVALVMFAW YRCRCSQQRR +Q7Z5N4 MARGARPSAAGGGGGGAEPPERAGPGRPRGSPPGRARPSLAPRPGPEPSRPRAAPETSGGDTAGAGRCGGRRAAKLGPGRRGWWALLALQLHLLRALAQDDVAPYFKTEPGLPQIHLEGNRLVLTCLAEGSWPLEFKWMRDDSELTTYSSEYKYIIPSLQKLDAGFYRCVVRNRMGALLQRKSEVQVAYMGSFMDTDQRKTVSQGRAAILNLLPITSYPRPQVTWFREGHKIIPSNRIAITLENQLVILATTTSDAGAYYVQAVNEKNGENKTSPFIHLSIARDVGTPETMAPTIVVPPGNRSVVAGSSETTLECIASARPVEDLSVTWKRNGVRITSGLHSFGRRLTISNPTSADTGPYVCEAALPGSAFEPARATAFLFIIEPPYFTAEPESRISAEVEETVDIGCQAMGVPLPTLQWYKDAISISRLQNPRYKVLASGGLRIQKLRPEDSGIFQCFASNEGGEIQTHTYLDVTNIAPVFTQRPVDTTVTDGMTAILRCEVSGAPKPAITWKRENHILASGSVRIPRFMLLESGGLQIAPVFIQDAGNYTCYAANTEGSLNASATLTVWNRTSIVHPPEDHVVIKGTTATLHCGATHDPRVSLRYVWKKDNVALTPSSTSRIVVEKDGSLLISQTWSGDIGDYSCEIVSEGGNDSRMARLEVIELPHSPQNLLVSPNSSHSHAVVLSWVRPFDGNSPILYYIVELSENNSPWKVHLSNVGPEMTGVTVSGLTPARTYQFRVCAVNEVGRGQYSAETSRLMLPEEPPSAPPKNIVASGRTNQSIMVQWQPPPETEHNGVLRGYILRYRLAGLPGEYQQRNITSPEVNYCLVTDLIIWTQYEIQVAAYNGAGLGVFSRAVTEYTLQGVPTAPPQNVQTEAVNSTTIQFLWNPPPQQFINGINQGYKLLAWPADAPEAVTVVTIAPDFHGVHHGHITNLKKFTAYFTSVLCFTTPGDGPPSTPQLVWTQEDKPGAVGHLSFTEILDTSLKVSWQEPLEKNGIITGYQISWEVYGRNDSRLTHTLNSTTHEYKIQGLSSLTTYTIDVAAVTAVGTGLVTSSTISSGVPPDLPGAPSNLVISNISPRSATLQFRPGYDGKTSISRWIVEGQVGAIGDEEEWVTLYEEENEPDAQMLEIPNLTPYTHYRFRMKQVNIVGPSPYSPSSRVIQTLQAPPDVAPTSVTVRTASETSLRLRWVPLPDSQYNGNPESVGYRIKYWRSDLQSSAVAQVVSDRLEREFTIEELEEWMEYELQMQAFNAVGAGPWSEVVRGRTRESVPSAAPENVSAEAVSSTQILLTWTSVPEQDQNGLILGYKILFRAKDLDPEPRSHIVRGNHTQSALLAGLRKFVLYELQVLAFTRIGNGVPSTPLILERTKDDAPGPPVRLVFPEVRLTSVRIVWQPPEEPNGIILGYQIAYRLASSSPHTFTTVEVGATVRQFTATDLAPESAYIFRLSAKTRQGWGEPLEATVITTEKRERPAPPRELLVPQAEVTARSLRLQWVPGSDGASPIRYFTMQVRELPRGEWQTYSSSISHEATACVVDRLRPFTSYKLRLKATNDIGDSDFSSETEAVTTLQDVPGEPPGSVSATPHTTSSVLIQWQPPRDESLNGLLQGYRIYYRELEYEAGSGTEAKTLKNPIALHAELTAQSSFKTVNSSSTSTMCELTHLKKYRRYEVIMTAYNIIGESPASAPVEVFVGEAAPAMAPQNVQVTPLTASQLEVTWDPPPPESQNGNIQGYKIYYWEADSQNETEKMKVLFLPEPVVRLKNLTSHTKYLVSISAFNAAGDGPKSDPQQGRTHQAAPGAPSFLAFSEITSTTLNVSWGEPAAANGILQGYRVVYEPLAPVQGVSKVVTVEVRGNWQRWLKVRDLTKGVTYFFRVQARTITYGPELQANITAGPAEGSPGSPRDVLVTKSASELTLQWTEGHSGDTPTTGYVIEARPSDEGLWDMFVKDIPRSATSYTLSLDKLRQGVTYEFRVVAVNEAGYGEPSNPSTAVSAQVEAPFYEEWWFLLVMALSSLIVILLVVFALVLHGQNKKYKNCSTGKGISTMEESVTLDNGGFAALELSSRHLNVKSTFSKKNGTRSPPRPSPGGLHYSDEDICNKYNGAVLTESVSLKEKSADASESEATDSDYEDALPKHSFVNHYMSDPTYYNSWKRRAQGRAPAPHRYEAVAGSEAGAQLHPVITTQSAGGVYTPAGPGARTPLTGFSSFV 2 2008 2030 AQVEAPFYEE WWFLLVMALSSLIVILLVVFALV LHGQNKKYKN +Q9UBV2 MRVRIGLTLLLCAVLLSLASASSDEEGSQDESLDSKTTLTSDESVKDHTTAGRVVAGQIFLDSEESELESSIQEEEDSLKSQEGESVTEDISFLESPNPENKDYEEPKKVRKPALTAIEGTAHGEPCHFPFLFLDKEYDECTSDGREDGRLWCATTYDYKADEKWGFCETEEEAAKRRQMQEAEMMYQTGMKILNGSNKKSQKREAYRYLQKAASMNHTKALERVSYALLFGDYLPQNIQAAREMFEKLTEEGSPKGQTALGFLYASGLGVNSSQAKALVYYTFGALGGNLIAHMVLGYRYWAGIGVLQSCESALTHYRLVANHVASDISLTGGSVVQRIRLPDEVENPGMNSGMLEEDLIQYYQFLAEKGDVQAQVGLGQLHLHGGRGVEQNHQRAFDYFNLAANAGNSHAMAFLGKMYSEGSDIVPQSNETALHYFKKAADMGNPVGQSGLGMAYLYGRGVQVNYDLALKYFQKAAEQGWVDGQLQLGSMYYNGIGVKRDYKQALKYFNLASQGGHILAFYNLAQMHASGTGVMRSCHTAVELFKNVCERGRWSERLMTAYNSYKDGDYNAAVIQYLLLAEQGYEVAQSNAAFILDQREASIVGENETYPRALLHWNRAASQGYTVARIKLGDYHFYGFGTDVDYETAFIHYRLASEQQHSAQAMFNLGYMHEKGLGIKQDIHLAKRFYDMAAEASPDAQVPVFLALCKLGVVYFLQYIRETNIRDMFTQLDMDQLLGPEWDLYLMTIIALLLGTVIAYRQRQHQDMPAPRPPGPRPAPPQQEGPPEQQPPQ 2 739 761 MFTQLDMDQL LGPEWDLYLMTIIALLLGTVIAY RQRQHQDMPA +Q14242 MPLQLLLLLILLGPGNSLQLWDTWADEAEKALGPLLARDRRQATEYEYLDYDFLPETEPPEMLRNSTDTTPLTGPGTPESTTVEPAARRSTGLDAGGAVTELTTELANMGNLSTDSAAMEIQTTQPAATEAQTTQPVPTEAQTTPLAATEAQTTRLTATEAQTTPLAATEAQTTPPAATEAQTTQPTGLEAQTTAPAAMEAQTTAPAAMEAQTTPPAAMEAQTTQTTAMEAQTTAPEATEAQTTQPTATEAQTTPLAAMEALSTEPSATEALSMEPTTKRGLFIPFSVSSVTHKGIPMAASNLSVNYPVGAPDHISVKQCLLAILILALVATIFFVCTVVLAVRLSRKGHMYPVRNYSPTEMVCISSLLPDGGEGPSATANGGLSKAKSPGLTPEPREDREGDDLTLHSFLP 2 321 343 APDHISVKQC LLAILILALVATIFFVCTVVLAV RLSRKGHMYP +Q9H3S1 MALPALGLDPWSLLGLFLFQLLQLLLPTTTAGGGGQGPMPRVRYYAGDERRALSFFHQKGLQDFDTLLLSGDGNTLYVGAREAILALDIQDPGVPRLKNMIPWPASDRKKSECAFKKKSNETQCFNFIRVLVSYNVTHLYTCGTFAFSPACTFIELQDSYLLPISEDKVMEGKGQSPFDPAHKHTAVLVDGMLYSGTMNNFLGSEPILMRTLGSQPVLKTDNFLRWLHHDASFVAAIPSTQVVYFFFEETASEFDFFERLHTSRVARVCKNDVGGEKLLQKKWTTFLKAQLLCTQPGQLPFNVIRHAVLLPADSPTAPHIYAVFTSQWQVGGTRSSAVCAFSLLDIERVFKGKYKELNKETSRWTTYRGPETNPRPGSCSVGPSSDKALTFMKDHFLMDEQVVGTPLLVKSGVEYTRLAVETAQGLDGHSHLVMYLGTTTGSLHKAVVSGDSSAHLVEEIQLFPDPEPVRNLQLAPTQGAVFVGFSGGVWRVPRANCSVYESCVDCVLARDPHCAWDPESRTCCLLSAPNLNSWKQDMERGNPEWACASGPMSRSLRPQSRPQIIKEVLAVPNSILELPCPHLSALASYYWSHGPAAVPEASSTVYNGSLLLIVQDGVGGLYQCWATENGFSYPVISYWVDSQDQTLALDPELAGIPREHVKVPLTRVSGGAALAAQQSYWPHFVTVTVLFALVLSGALIILVASPLRALRARGKVQGCETLRPGEKAPLSREQHLQSPKECRTSASDVDADNNCLGTEVA 2 681 703 GAALAAQQSY WPHFVTVTVLFALVLSGALIILV ASPLRALRAR +Q92854 MRMCTPIRGLLMALAVMFGTAMAFAPIPRITWEHREVHLVQFHEPDIYNYSALLLSEDKDTLYIGAREAVFAVNALNISEKQHEVYWKVSEDKKAKCAEKGKSKQTECLNYIRVLQPLSATSLYVCGTNAFQPACDHLNLTSFKFLGKNEDGKGRCPFDPAHSYTSVMVDGELYSGTSYNFLGSEPIISRNSSHSPLRTEYAIPWLNEPSFVFADVIRKSPDSPDGEDDRVYFFFTEVSVEYEFVFRVLIPRIARVCKGDQGGLRTLQKKWTSFLKARLICSRPDSGLVFNVLRDVFVLRSPGLKVPVFYALFTPQLNNVGLSAVCAYNLSTAEEVFSHGKYMQSTTVEQSHTKWVRYNGPVPKPRPGACIDSEARAANYTSSLNLPDKTLQFVKDHPLMDDSVTPIDNRPRLIKKDVNYTQIVVDRTQALDGTVYDVMFVSTDRGALHKAISLEHAVHIIEETQLFQDFEPVQTLLLSSKKGNRFVYAGSNSGVVQAPLAFCGKHGTCEDCVLARDPYCAWSPPTATCVALHQTESPSRGLIQEMSGDASVCPDKSKGSYRQHFFKHGGTAELKCSQKSNLARVFWKFQNGVLKAESPKYGLMGRKNLLIFNLSEGDSGVYQCLSEERVKNKTVFQVVAKHVLEVKVVPKPVVAPTLSVVQTEGSRIATKVLVASTQGSSPPTPAVQATSSGAITLPPKPAPTGTSCEPKIVINTVPQLHSEKTMYLKSSDNRLLMSLFLFFFVLFLCLFFYNCYKGYLPRQCLKFRSALLIGKKKPKSDFCDREQSLKETLVEPGSFSQQNGEHPKPALDTGYETEQDTITSKVPTDREDSQRIDDLSARDKPFDVKCELKFADSDADGD 2 734 756 KTMYLKSSDN RLLMSLFLFFFVLFLCLFFYNCY KGYLPRQCLK +Q9Z123 MLARAERPRPGPRPPPVSLFPPPSSLLLLLLAMLSAPVCGRVPRSVPRTSLPISEADSYLTRFAAPHTYNYSALLVDPASHTLYVGARDSIFALTLPFSGEKPRRIDWMVPETHRQNCRKKGKKEDECHNFIQILAIANASHLLTCGTFAFDPKCGVIDVSSFQQVERLESGRGKCPFEPAQRSAAVMAGGVLYTATVKNFLGTEPIISRAVGRAEDWIRTETLSSWLNAPAFVAAMVLSPAEWGDEDGDDEIFFFFTETSRVLDSYERIKVPRVARVCAGDLGGRKTLQQRWTTFLKADLLCPGPEHGRASGVLQDMTELRPQPGAGTPLFYGIFSSQWEGAAISAVCAFRPQDIRAVLNGPFRELKHDCNRGLPVMDNEVPQPRPGECITNNMKFQQFGSSLSLPDRVLTFIRDHPLMDRPVFPADGRPLLVTTDTAYLRVVAHRVTSLSGKEYDVLYLGTEDGHLHRAVRIGAQLSVLEDLALFPETQPVESMKLYHDWLLVGSHTEVTQVNTSNCGRLQSCSECILAQDPVCAWSFRLDACVAHAGEHRGMVQDIESADVSSLCPKEPGEHPVVFEVPVATVGHVVLPCSPSSAWASCVWHQPSGVTSLTPRRDGLEVVVTPGAMGAYACECQEGGAARVVAAYSLVWGSQRGPANRAHTVVGAGLVGFFLGVLAASLTLLLIGRRQQRRRQRELLARDKVGLDLGAPPSGTTSYSQDPPSPSPEDERLPLALGKRGSGFGGFPPPFLLDSCPSPAHIRLTGAPLATCDETSI 2 665 687 QRGPANRAHT VVGAGLVGFFLGVLAASLTLLLI GRRQQRRRQR +Q9NTN9 MWGRLWPLLLSILTATAVPGPSLRRPSRELDATPRMTIPYEELSGTRHFKGQAQNYSTLLLEEASARLLVGARGALFSLSANDIGDGAHKEIHWEASPEMQSKCHQKGKNNQTECFNHVRFLQRLNSTHLYACGTHAFQPLCAAIDAEAFTLPTSFEEGKEKCPYDPARGFTGLIIDGGLYTATRYEFRSIPDIRRSRHPHSLRTEETPMHWLNDAEFVFSVLVRESKASAVGDDDKVYYFFTERATEEGSGSFTQSRSSHRVARVARVCKGDLGGKKILQKKWTSFLKARLICHIPLYETLRGVCSLDAETSSRTHFYAAFTLSTQWKTLEASAICRYDLAEIQAVFAGPYMEYQDGSRRWGRYEGGVPEPRPGSCITDSLRSQGYNSSQDLPSLVLDFVKLHPLMARPVVPTRGRPLLLKRNIRYTHLTGTPVTTPAGPTYDLLFLGTADGWIHKAVVLGSGMHIIEETQVFRESQSVENLVISLLQHSLYVGAPSGVIQLPLSSCSRYRSCYDCILARDPYCGWDPGTHACAAATTIANRTALIQDIERGNRGCESSRDTGPPPPLKTRSVLRGDDVLLPCDQPSNLARALWLLNGSMGLSDGQGGYRVGVDGLLVTDAQPEHSGNYGCYAEENGLRTLLASYSLTVRPATPAPAPKAPATPGAQLAPDVRLLYVLAIAALGGLCLILASSLLYVACLREGRRGRRRKYSLGRASRAGGSAVQLQTVSGQCPGEEDEGDDEGAGGLEGSCLQIIPGEGAPAPPPPPPPPPPAELTNGLVALPSRLRRMNGNSYVLLRQSNNGVPAGPCSFAEELSRILEKRKHTQLVEQLDESSV 2 679 701 LAPDVRLLYV LAIAALGGLCLILASSLLYVACL REGRRGRRRK +Q13591 MKGTCVIAWLFSSLGLWRLAHPEAQGTTQCQRTEHPVISYKEIGPWLREFRAKNAVDFSQLTFDPGQKELVVGARNYLFRLQLEDLSLIQAVEWECDEATKKACYSKGKSKEECQNYIRVLLVGGDRLFTCGTNAFTPVCTNRSLSNLTEIHDQISGMARCPYSPQHNSTALLTAGGELYAATAMDFPGRDPAIYRSLGILPPLRTAQYNSKWLNEPNFVSSYDIGNFTYFFFRENAVEHDCGKTVFSRAARVCKNDIGGRFLLEDTWTTFMKARLNCSRPGEVPFYYNELQSTFFLPELDLIYGIFTTNVNSIAASAVCVFNLSAIAQAFSGPFKYQENSRSAWLPYPNPNPHFQCGTVDQGLYVNLTERNLQDAQKFILMHEVVQPVTTVPSFMEDNSRFSHVAVDVVQGREALVHIIYLATDYGTIKKVRVPLNQTSSSCLLEEIELFPERRREPIRSLQILHSQSVLFVGLREHVVKIPLKRCQFYRTRSTCIGAQDPYCGWDVVMKKCTSLEESLSMTQWEQSISACPTRNLTVDGHFGVWSPWTPCTHTDGSAVGSCLCRTRSCDSPAPQCGGWQCEGPGMEIANCSRNGGWTPWTSWSPCSTTCGIGFQVRQRSCSNPTPRHGGRVCVGQNREERYCNEHLLCPPHMFWTGWGPWERCTAQCGGGIQARRRICENGPDCAGCNVEYQSCNTNPCPELKKTTPWTPWTPVNISDNGGHYEQRFRYTCKARLADPNLLEVGRQRIEMRYCSSDGTSGCSTDGLSGDFLRAGRYSAHTVNGAWSAWTSWSQCSRDCSRGIRNRKRVCNNPEPKYGGMPCLGPSLEYQECNILPCPVDGVWSCWSPWTKCSATCGGGHYMRTRSCSNPAPAYGGDICLGLHTEEALCNTQPCPESWSEWSDWSECEASGVQVRARQCILLFPMGSQCSGNTTESRPCVFDSNFIPEVSVARSSSVEEKRCGEFNMFHMIAVGLSSSILGCLLTLLVYTYCQRYQQQSHDATVIHPVSPAPLNTSITNHINKLDKYDSVEAIKAFNKNNLILEERNKYFNPHLTGKTYSNAYFTDLNNYDEY 2 971 993 KRCGEFNMFH MIAVGLSSSILGCLLTLLVYTYC QRYQQQSHDA +Q9H2E6 MRSEALLLYFTLLHFAGAGFPEDSEPISISHGNYTKQYPVFVGHKPGRNTTQRHRLDIQMIMIMNGTLYIAARDHIYTVDIDTSHTEEIYCSKKLTWKSRQADVDTCRMKGKHKDECHNFIKVLLKKNDDALFVCGTNAFNPSCRNYKMDTLEPFGDEFSGMARCPYDAKHANVALFADGKLYSATVTDFLAIDAVIYRSLGESPTLRTVKHDSKWLKEPYFVQAVDYGDYIYFFFREIAVEYNTMGKVVFPRVAQVCKNDMGGSQRVLEKQWTSFLKARLNCSVPGDSHFYFNILQAVTDVIRINGRDVVLATFSTPYNSIPGSAVCAYDMLDIASVFTGRFKEQKSPDSTWTPVPDERVPKPRPGCCAGSSSLERYATSNEFPDDTLNFIKTHPLMDEAVPSIFNRPWFLRTMVRYRLTKIAVDTAAGPYQNHTVVFLGSEKGIILKFLARIGNSGFLNDSLFLEEMSVYNSEKCSYDGVEDKRIMGMQLDRASSSLYVAFSTCVIKVPLGRCERYGKCKKTCIASRDPYCGWIKEGGACSHLSPNSRLTFEQDIERGNTDGLGDCHNSFVALNGHSSSLLPSTTTSDSTAQEGYESRGGMLDWKHLLDSPDSTDPLGAVSSHNHQDKKGVIRESYLKGHDQLVPVTLLAIAVILAFVMGAVFSGITVYCVCDHRRKDVAVVQRKEKELTHSRRGSMSSVTKLSGLFGDTQSKDPKPEAILTPLMHNGKLATPGNTAKMLIKADQHHLDLTALPTPESTPTLQQKRKPSRGSREWERNQNLINACTKDMPPMGSPVIPTDLPLRASPSHIPSVVVLPITQQGYQHEYVDQPKMSEVAQMALEDQAATLEYKTIKEHLSSKSPNHGVNLVENLDSLPPKVPQREASLGPPGASLSQTGLSKRLEMHHSSSYGVDYKRSYPTNSLTRSHQATTLKRNNTNSSNSSHLSRNQSFGRGDNPPPAPQRVDSIQVHSSQPSGQAVTVSRQPSLNAYNSLTRSGLKRTPSLKPDVPPKPSFAPLSTSMKPNDACT 2 648 670 YLKGHDQLVP VTLLAIAVILAFVMGAVFSGITV YCVCDHRRKD +Q9H3T3 MQTPRASPPRPALLLLLLLLGGAHGLFPEEPPPLSVAPRDYLNHYPVFVGSGPGRLTPAEGADDLNIQRVLRVNRTLFIGDRDNLYRVELEPPTSTELRYQRKLTWRSNPSDINVCRMKGKQEGECRNFVKVLLLRDESTLFVCGSNAFNPVCANYSIDTLQPVGDNISGMARCPYDPKHANVALFSDGMLFTATVTDFLAIDAVIYRSLGDRPTLRTVKHDSKWFKEPYFVHAVEWGSHVYFFFREIAMEFNYLEKVVVSRVARVCKNDVGGSPRVLEKQWTSFLKARLNCSVPGDSHFYFNVLQAVTGVVSLGGRPVVLAVFSTPSNSIPGSAVCAFDLTQVAAVFEGRFREQKSPESIWTPVPEDQVPRPRPGCCAAPGMQYNASSALPDDILNFVKTHPLMDEAVPSLGHAPWILRTLMRHQLTRVAVDVGAGPWGNQTVVFLGSEAGTVLKFLVRPNASTSGTSGLSVFLEEFETYRPDRCGRPGGGETGQRLLSLELDAASGGLLAAFPRCVVRVPVARCQQYSGCMKNCIGSQDPYCGWAPDGSCIFLSPGTRAAFEQDVSGASTSGLGDCTGLLRASLSEDRAGLVSVNLLVTSSVAAFVVGAVVSGFSVGWFVGLRERRELARRKDKEAILAHGAGEAVLSVSRLGERRAQGPGGRGGGGGGGAGVPPEALLAPLMQNGWAKATLLQGGPHDLDSGLLPTPEQTPLPQKRLPTPHPHPHALGPRAWDHGHPLLPASASSSLLLLAPARAPEQPPAPGEPTPDGRLYAARPGRASHGDFPLTPHASPDRRRVVSAPTGPLDPASAADGLPRPWSPPPTGSLRRPLGPHAPPAATLRRTHTFNSGEARPGDRHRGCHARPGTDLAHLLPYGGADRTAPPVP 2 602 624 GLVSVNLLVT SSVAAFVVGAVVSGFSVGWFVGL RERRELARRK +Q9WTM3 MPRAPHSMPLLLLLLLLSSLPQAQAAFPQDPTPLLTSDLQGASPSSWFRGLEDDAVAAELGLDFQRFLTLNRTLLVAARDHVFSFDLQAQEEGEGLVPNKFLTWRSQDMENCAVRGKLTDECYNYIRVLVPWNSQTLLACGTNSFSPMCRSYGITSLQQEGEELSGQARCPFDATQSTVAIFAEGSLYSATAADFQASDAVVYRSLGPQPPLRSAKYDSKWLREPHFVYALEHGEHVYFFFREVSVEDARLGRVQFSRVARVCKRDMGGSPRALDRHWTSFLKLRLNCSVPGDSTFYFDVLQSLTGPVNLHGRSALFGVFTTQTNSIPGSAVCAFYLDDIERGFEGKFKEQRSLDGAWTPVSEDKVPSPRPGSCAGVGAAASFSSSQDLPDDVLLFIKAHPLLDPAVPPATHQPLLTLTSRALLTQVAVDGMAGPHRNTTVLFLGSNDGTVLKVLPPGGQSLGSEPIVLEEIDAYSHARCSGKRSPRAARRIIGLELDTEGHRLFVAFPGCIVYLSLSRCARHGACQRSCLASLDPYCGWHRSRGCMSIRGPGGTDVDLTGNQESTEHGDCQDGATGSQSGPGDSAYGVRRDLSPASASRSIPIPLLLACVAAAFALGASVSGLLVSCACRRANRRRSKDIETPGLPRPLSLRSLARLHGGGPEPPPPPKDGDAAQTPQLYTTFLPPPDGGSPPELACLPTPETTPELPVKHLRASGGPWEWNQNGNNASEGPGRPPRGCSGAGGPAPRVLVRPPPPGCPGQAVEVTTLEELLRYLHGPQPPRKGSEPLASAPFTSRPPASEPGASLFVDSSPMPRDGVPPLRLDVPPEGKRAAPSGRPALSAPAPRLGVGGSRRLPFPTHRAPPGLLTRVPSGGPARYSGGPGRHLLYLGRPEGHRGRSLKRVDVKSPLSPKPPLASPPQPAPHGGHFNF 2 604 626 SPASASRSIP IPLLLACVAAAFALGASVSGLLV SCACRRANRR +Q8NFY4 MRVFLLCAYILLLMVSQLRAVSFPEDDEPLNTVDYHYSRQYPVFRGRPSGNESQHRLDFQLMLKIRDTLYIAGRDQVYTVNLNEMPKTEVIPNKKLTWRSRQQDRENCAMKGKHKDECHNFIKVFVPRNDEMVFVCGTNAFNPMCRYYRLSTLEYDGEEISGLARCPFDARQTNVALFADGKLYSATVADFLASDAVIYRSMGDGSALRTIKYDSKWIKEPHFLHAIEYGNYVYFFFREIAVEHNNLGKAVYSRVARICKNDMGGSQRVLEKHWTSFLKARLNCSVPGDSFFYFDVLQSITDIIQINGIPTVVGVFTTQLNSIPGSAVCAFSMDDIEKVFKGRFKEQKTPDSVWTAVPEDKVPKPRPGCCAKHGLAEAYKTSIDFPDETLSFIKSHPLMDSAVPPIADEPWFTKTRVRYRLTAISVDHSAGPYQNYTVIFVGSEAGMVLKVLAKTSPFSLNDSVLLEEIEAYNHAKCSAENEEDKKVISLQLDKDHHALYVAFSSCIIRIPLSRCERYGSCKKSCIASRDPYCGWLSQGSCGRVTPGMLAEGYEQDTEFGNTAHLGDCHEILPTSTTPDYKIFGGPTSDMEVSSSSVTTMASIPEITPKVIDTWRPKLTSSRKFVVQDDPNTSDFTDPLSGIPKGVRWEVQSGESNQMVHMNVLITCVFAAFVLGAFIAGVAVYCYRDMFVRKNRKIHKDAESAQSCTDSSGSFAKLNGLFDSPVKEYQQNIDSPKLYSNLLTSRKELPPNGDTKSMVMDHRGQPPELAALPTPESTPVLHQKTLQAMKSHSEKAHGHGASRKETPQFFPSSPPPHSPLSHGHIPSAIVLPNATHDYNTSFSNSNAHKAEKKLQNIDHPLTKSSSKRDHRRSVDSRNTLNDLLKHLNDPNSNPKAIMGDIQMAHQNLMLDPMGSMSEVPPKVPNREASLYSPPSTLPRNSPTKRVDVPTTPGVPMTSLERQRGYHKNSSQRHSISAMPKNLNSPNGVLLSRQPSMNRGGYMPTPTGAKVDYIQGTPVSVHLQPSLSRQSSYTSNGTLPRTGLKRTPSLKPDVPPKPSFVPQTPSVRPLNKYTY 2 664 686 ESNQMVHMNV LITCVFAAFVLGAFIAGVAVYCY RDMFVRKNRK +Q53EL9 MRPVALLLLPSLLALLAHGLSLEAPTVGKGQAPGIEETDGELTAAPTPEQPERGVHFVTTAPTLKLLNHHPLLEEFLQEGLEKGDEELRPALPFQPDPPAPFTPSPLPRLANQDSRPVFTSPTPAMAAVPTQPQSKEGPWSPESESPMLRITAPLPPGPSMAVPTLGPGEIASTTPPSRAWTPTQEGPGDMGRPWVAEVVSQGAGIGIQGTITSSTASGDDEETTTTTTIITTTITTVQTPGPCSWNFSGPEGSLDSPTDLSSPTDVGLDCFFYISVYPGYGVEIKVQNISLREGETVTVEGLGGPDPLPLANQSFLLRGQVIRSPTHQAALRFQSLPPPAGPGTFHFHYQAYLLSCHFPRRPAYGDVTVTSLHPGGSARFHCATGYQLKGARHLTCLNATQPFWDSKEPVCIAACGGVIRNATTGRIVSPGFPGNYSNNLTCHWLLEAPEGQRLHLHFEKVSLAEDDDRLIIRNGDNVEAPPVYDSYEVEYLPIEGLLSSGKHFFVELSTDSSGAAAGMALRYEAFQQGHCYEPFVKYGNFSSSTPTYPVGTTVEFSCDPGYTLEQGSIIIECVDPHDPQWNETEPACRAVCSGEITDSAGVVLSPNWPEPYGRGQDCIWGVHVEEDKRIMLDIRVLRIGPGDVLTFYDGDDLTARVLGQYSGPRSHFKLFTSMADVTIQFQSDPGTSVLGYQQGFVIHFFEVPRNDTCPELPEIPNGWKSPSQPELVHGTVVTYQCYPGYQVVGSSVLMCQWDLTWSEDLPSCQRVTSCHDPGDVEHSRRLISSPKFPVGATVQYICDQGFVLMGSSILTCHDRQAGSPKWSDRAPKCLLEQLKPCHGLSAPENGARSPEKQLHPAGATIHFSCAPGYVLKGQASIKCVPGHPSHWSDPPPICRAASLDGFYNSRSLDVAKAPAASSTLDAAHIAAAIFLPLVAMVLLVGGVYFYFSRLQGKSSLQLPRPRPRPYNRITIESAFDNPTYETGSLSFAGDERI 2 926 948 AASSTLDAAH IAAAIFLPLVAMVLLVGGVYFYF SRLQGKSSLQ +Q16586 MAETLFWTPLLVVLLAGLGDTEAQQTTLHPLVGRVFVHTLDHETFLSLPEHVAVPPAVHITYHAHLQGHPDLPRWLRYTQRSPHHPGFLYGSATPEDRGLQVIEVTAYNRDSFDTTRQRLVLEIGDPEGPLLPYQAEFLVRSHDAEEVLPSTPASRFLSALGGLWEPGELQLLNVTSALDRGGRVPLPIEGRKEGVYIKVGSASPFSTCLKMVASPDSHARCAQGQPPLLSCYDTLAPHFRVDWCNVTLVDKSVPEPADEVPTPGDGILEHDPFFCPPTEAPDRDFLVDALVTLLVPLLVALLLTLLLAYVMCCRREGRLKRDLATSDIQMVHHCTIHGNTEELRQMAASREVPRPLSTLPMFNVHTGERLPPRVDSAQVPLILDQH 2 290 312 EAPDRDFLVD ALVTLLVPLLVALLLTLLLAYVM CCRREGRLKR +Q6UWI4 MWGARRSSVSSSWNAASLLQLLLAALLAAGARASGEYCHGWLDAQGVWRIGFQCPERFDGGDATICCGSCALRYCCSSAEARLDQGGCDNDRQQGAGEPGRADKDGPDGSAVPIYVPFLIVGSVFVAFIILGSLVAACCCRCLRPKQDPQQSRAPGGNRLMETIPMIPSASTSRGSSSRQSSTAASSSSSANSGARAPPTRSQTNCCLPEGTMNNVYVNMPTNFSVLNCQQATQIVPHQGQYLHPPYVGYTVQHDSVPMTAVPPFMDGLQPGYRQIQSPFPHTNSEQKMYPAVTV 2 114 136 KDGPDGSAVP IYVPFLIVGSVFVAFIILGSLVA ACCCRCLRPK +Q96DD7 MPPAGLRRAAPLTAIALLVLGAPLVLAGEDCLWYLDRNGSWHPGFNCEFFTFCCGTCYHRYCCRDLTLLITERQQKHCLAFSPKTIAGIASAVILFVAVVATTICCFLCSCCYLYRRRQQLQSPFEGQEIPMTGIPVQPVYPYPQDPKAGPAPPQPGFIYPPSGPAPQYPLYPAGPPVYNPAAPPPYMPPQPSYPGA 2 86 108 KHCLAFSPKT IAGIASAVILFVAVVATTICCFL CSCCYLYRRR +Q6ZSJ9 MALRRLLLLLLLSLESLDLLPSVHGARGRAANRTLSAGGAAVGGRRAGGALARGGRELNGTARAPGIPEAGSRRGQPAAAVAAAASAAVTYETCWGYYDVSGQYDKEFECNNSESGYLYCCGTCYYRFCCKKRHEKLDQRQCTNYQSPVWVQTPSTKVVSPGPENKYDPEKDKTNFTVYITCGVIAFVIVAGVFAKVSYDKAHRPPREMNIHRALADILRQQGPIPIAHCERETISAIDTSPKENTPVRSSSKNHYTPVRTAKQTPEKPRMNNILTSATEPYDLSFSRSFQNLAHLPPSYESAVKTNPSKYSSLKRLTDKEADEYYMRRRHLPDLAARGTLPLNVIQMSQQKPLPRERPRRPIRAMSQDRVLSPDRGLPDEFSMPYDRILSDEQLLSTERLHSQDPLLSPERTAFPEQSLSRAISHTDVFVSTPVLDRYRMSKMHSHPSASNNSYATLGQSQTAAKRHAFASRRHNTVEQLHYIPGHHTCYTASKTEVTV 2 176 195 KYDPEKDKTN FTVYITCGVIAFVIVAGVFA KVSYDKAHRP +B8ZZ34 MARAGARGLLGGRRPPGLRLALALRLALLLARPPSGRAGAPEAQGPAAPGTTAPEGGDRCRGYYDVMGQWDPPFNCSSGAYSFCCGTCGYRFCCHDGPRRLDQSRCSNYDTPAWVQTGRPPARARDTAAPRDPGRERSHTAVYAVCGVAALLVLAGIGARLGLERAHSPRARRTVTRALTELLKQPGPQEPLPPTLGPPLGGCVQVQMGDGLPRGSPHNSADKKRLNNAPRGSAAPGPPRGPRLQGGGSLTLQPDYAKYATFKAAALKAAEAAPRDFCQRFPALEPSPRQPPARAPRPSPDLPAPLDACPWAPPVYAPPAAPGPYAAWTSSRPARPAPLSHPTARAFQVPRRPGHAARRQFSVKMPETFNPQLPGLYGSAGRGSRYLRTNSKTEVTV 2 141 163 RDPGRERSHT AVYAVCGVAALLVLAGIGARLGL ERAHSPRARR +Q3SXP7 MTSCGQQSLNVLAVLFSLLFSAVLSAHFRVCEPYTDHKGRYHFGFHCPRLSDNKTFILCCHHNNTVFKYCCNETEFQAVMQANLTASSEGYMHNNYTALLGVWIYGFFVLMLLVLDLLYYSAMNYDICKVYLARWGIQGRWMKQDPRRWGNPARAPRPGQRAPQPQPPPGPLPQAPQAVHTLRGDAHSPPLMTFQSSSA 2 99 121 EGYMHNNYTA LLGVWIYGFFVLMLLVLDLLYYS AMNYDICKVY +Q96LC7 MLLPLLLSSLLGGSQAMDGRFWIRVQESVMVPEGLCISVPCSFSYPRQDWTGSTPAYGYWFKAVTETTKGAPVATNHQSREVEMSTRGRFQLTGDPAKGNCSLVIRDAQMQDESQYFFRVERGSYVRYNFMNDGFFLKVTALTQKPDVYIPETLEPGQPVTVICVFNWAFEECPPPSFSWTGAALSSQGTKPTTSHFSVLSFTPRPQDHNTDLTCHVDFSRKGVSAQRTVRLRVAYAPRDLVISISRDNTPALEPQPQGNVPYLEAQKGQFLRLLCAADSQPPATLSWVLQNRVLSSSHPWGPRPLGLELPGVKAGDSGRYTCRAENRLGSQQRALDLSVQYPPENLRVMVSQANRTVLENLGNGTSLPVLEGQSLCLVCVTHSSPPARLSWTQRGQVLSPSQPSDPGVLELPRVQVEHEGEFTCHARHPLGSQHVSLSLSVHYSPKLLGPSCSWEAEGLHCSCSSQASPAPSLRWWLGEELLEGNSSQDSFEVTPSSAGPWANSSLSLHGGLSSGLRLRCEAWNVHGAQSGSILQLPDKKGLISTAFSNGAFLGIGITALLFLCLALIIMKILPKRRTQTETPRPRFSRHSTILDYINVVPTAGPLAQKRNQKATPNSPRTPLPPGAPSPESKKNQKKQYQLPSFPEPKSSTQAPESQESQEELHYATLNFPGVRPRPEARMPKGTQADYAEVKFQ 2 548 570 PDKKGLISTA FSNGAFLGIGITALLFLCLALII MKILPKRRTQ +Q08ET2 MLPLLLLPLLWGGSLQEKPVYELQVQKSVTVQEGLCVLVPCSFSYPWRSWYSSPPLYVYWFRDGEIPYYAEVVATNNPDRRVKPETQGRFRLLGDVQKKNCSLSIGDARMEDTGSYFFRVERGRDVKYSYQQNKLNLEVTALIEKPDIHFLEPLESGRPTRLSCSLPGSCEAGPPLTFSWTGNALSPLDPETTRSSELTLTPRPEDHGTNLTCQVKRQGAQVTTERTVQLNVSYAPQNLAISIFFRNGTGTALRILSNGMSVPIQEGQSLFLACTVDSNPPASLSWFREGKALNPSQTSMSGTLELPNIGAREGGEFTCRVQHPLGSQHLSFILSVQRSSSSCICVTEKQQGSWPLVLTLIRGALMGAGFLLTYGLTWIYYTRCGGPQQSRAERPG 2 359 381 KQQGSWPLVL TLIRGALMGAGFLLTYGLTWIYY TRCGGPQQSR +O43699 MQGAQEASASEMLPLLLPLLWAGALAQERRFQLEGPESLTVQEGLCVLVPCRLPTTLPASYYGYGYWFLEGADVPVATNDPDEEVQEETRGRFHLLWDPRRKNCSLSIRDARRRDNAAYFFRLKSKWMKYGYTSSKLSVRVMALTHRPNISIPGTLESGHPSNLTCSVPWVCEQGTPPIFSWMSAAPTSLGPRTTQSSVLTITPRPQDHSTNLTCQVTFPGAGVTMERTIQLNVSYAPQKVAISIFQGNSAAFKILQNTSSLPVLEGQALRLLCDADGNPPAHLSWFQGFPALNATPISNTGVLELPQVGSAEEGDFTCRAQHPLGSLQISLSLFVHWKPEGRAGGVLGAVWGASITTLVFLCVCFIFRVKTRRKKAAQPVQNTDDVNPVMVSGSRGHQHQFQTGIVSDHPAEAGPISEDEQELHYAVLHFHKVQPQEPKVTDTEYSEIKIHK 2 346 368 VHWKPEGRAG GVLGAVWGASITTLVFLCVCFIF RVKTRRKKAA +Q9NYZ4 MLLLLLLLPLLWGTKGMEGDRQYGDGYLLQVQELVTVQEGLCVHVPCSFSYPQDGWTDSDPVHGYWFRAGDRPYQDAPVATNNPDREVQAETQGRFQLLGDIWSNDCSLSIRDARKRDKGSYFFRLERGSMKWSYKSQLNYKTKQLSVFVTALTHRPDILILGTLESGHSRNLTCSVPWACKQGTPPMISWIGASVSSPGPTTARSSVLTLTPKPQDHGTSLTCQVTLPGTGVTTTSTVRLDVSYPPWNLTMTVFQGDATASTALGNGSSLSVLEGQSLRLVCAVNSNPPARLSWTRGSLTLCPSRSSNPGLLELPRVHVRDEGEFTCRAQNAQGSQHISLSLSLQNEGTGTSRPVSQVTLAAVGGAGATALAFLSFCIIFIIVRSCRKKSARPAAGVGDTGMEDAKAIRGSASQGPLTESWKDGNPLKKPPPAVAPSSGEEGELHYATLSFHKVKPQDPQGQEATDSEYSEIKIHKRETAETQACLRNHNPSSKEVRG 2 362 384 TSRPVSQVTL AAVGGAGATALAFLSFCIIFIIV RSCRKKSARP +Q5JXA9 MCSTMSAPTCLAHLPPCFLLLALVLVPSDASGQSSRNDWQVLQPEGPMLVAEGETLLLRCMVVGSCTDGMIKWVKVSTQDQQEIYNFKRGSFPGVMPMIQRTSEPLNCDYSIYIHNVTREHTGTYHCVRFDGLSEHSEMKSDEGTSVLVKGAGDPEPDLWIIQPQELVLGTTGDTVFLNCTVLGDGPPGPIRWFQGAGLSREAIYNFGGISHPKETAVQASNNDFSILLQNVSSEDAGTYYCVKFQRKPNRQYLSGQGTSLKVKAKSTSSKEAEFTSEPATEMSPTGLLVVFAPVVLGLKAITLAALLLALATSRRSPGQEDVKTTGPAGAMNTLAWSKGQE 2 289 311 PATEMSPTGL LVVFAPVVLGLKAITLAALLLAL ATSRRSPGQE +Q9Y3P8 MNQADPRLRAVCLWTLTSAAMSRGDNCTDLLALGIPSITQAWGLWVLLGAVTLLFLISLAAHLSQWTRGRSRSHPGQGRSGESVEEVPLYGNLHYLQTGRLSQDPEPDQQDPTLGGPARAAEEVMCYTSLQLRPPQGRIPGPGTPVKYSEVVLDSEPKSQASGPEPELYASVCAQTRRARASFPDQAYANSQPAAS 2 41 63 LALGIPSITQ AWGLWVLLGAVTLLFLISLAAHL SQWTRGRSRS +Q13291 MDPKGLLSLTFVLFLSLAFGASYGTGGRMMNCPKILRQLGSKVLLPLTYERINKSMNKSIHIVVTMAKSLENSVENKIVSLDPSEAGPPRYLGDRYKFYLENLTLGIRESRKEDEGWYLMTLEKNVSVQRFCLQLRLYEQVSTPEIKVLNKTQENGTCTLILGCTVEKGDHVAYSWSEKAGTHPLNPANSSHLLSLTLGPQHADNIYICTVSNPISNNSQTFSPWPGCRTDPSETKPWAVYAGLLGGVIMILIMVVILQLRRRGKTNHYQTTVEKKSLTIYAQVQKPGPLQKKLDSFPAQDPCTTIYVAATEPVPESVQETNSITVYASVTLPES 2 238 260 CRTDPSETKP WAVYAGLLGGVIMILIMVVILQL RRRGKTNHYQ +Q9QUM4 MDPKGSLSWRILLFLSLAFELSYGTGGGVMDCPVILQKLGQDTWLPLTNEHQINKSVNKSVRILVTMATSPGSKSNKKIVSFDLSKGSYPDHLEDGYHFQSKNLSLKILGNRRESEGWYLVSVEENVSVQQFCKQLKLYEQVSPPEIKVLNKTQENENGTCSLLLACTVKKGDHVTYSWSDEAGTHLLSRANRSHLLHITLSNQHQDSIYNCTASNPVSSISRTFNLSSQACKQESSSESSPWMQYTLVPLGVVIIFILVFTAIIMMKRQGKSNHCQPPVEEKSLTIYAQVQKSGPQEKKLHDALTDQDPCTTIYVAATEPAPESVQEPNPTTVYASVTLPES 2 243 265 KQESSSESSP WMQYTLVPLGVVIIFILVFTAII MMKRQGKSNH +Q9UIB8 MAQHHLWILLLCLQTWPEAAGKDSEIFTVNGILGESVTFPVNIQEPRQVKIIAWTSKTSVAYVTPGDSETAPVVTVTHRNYYERIHALGPNYNLVISDLRMEDAGDYKADINTQADPYTTTKRYNLQIYRRLGKPKITQSLMASVNSTCNVTLTCSVEKEEKNVTYNWSPLGEEGNVLQIFQTPEDQELTYTCTAQNPVSNNSDSISARQLCADIAMGFRTHHTGLLSVLAMFFLLVLILSSVFLFRLFKRRQGRIFPEGSCLNTFTKNPYAASKKTIYTYIMASRNTQPAESRIYDEILQSKVLPSKEEPVNTVYSEVQFADKMGKASTQDSKPPGTSSYEIVI 2 224 246 DIAMGFRTHH TGLLSVLAMFFLLVLILSSVFLF RLFKRRQGRI +Q96DU3 MLWLFQSLLFVFCFGPGNVVSQSSLTPLMVNGILGESVTLPLEFPAGEKVNFITWLFNETSLAFIVPHETKSPEIHVTNPKQGKRLNFTQSYSLQLSNLKMEDTGSYRAQISTKTSAKLSSYTLRILRQLRNIQVTNHSQLFQNMTCELHLTCSVEDADDNVSFRWEALGNTLSSQPNLTVSWDPRISSEQDYTCIAENAVSNLSFSVSAQKLCEDVKIQYTDTKMILFMVSGICIVFGFIILLLLVLRKRRDSLSLSTQRTQGPAESARNLEYVSVSPTNNTVYASVTHSNRETEIWTPRENDTITIYSTINHSKESKPTFSRATALDNVV 2 226 248 DVKIQYTDTK MILFMVSGICIVFGFIILLLLVL RKRRDSLSLS +Q9ET39 MAVSRAPAPDSACQRMVWLFPLVFCLGSGSEVSQSSSDPQLMNGVLGESAVLPLKLPAGKIANIIIWNYEWEASQVTALVINLSNPESPQIMNTDVKKRLNITQSYSLQISNLTMADTGSYTAQITTKDSEVITFKYILRVFERLGNLETTNYTLLLENGTCQIHLACVLKNQSQTVSVEWQATGNISLGGPNVTIFWDPRNSGDQTYVCRAKNAVSNLSVSVSTQSLCKGVLTNPPWNAVWFMTTISIISAVILIFVCWSIHVWKRRGSLPLTSQHPESSQSTDGPGSPGNTVYAQVTRPMQEMKIPKPIKNDSMTIYSIVNHSREETVALTGYNQPITLKVNTLINYNS 2 240 262 KGVLTNPPWN AVWFMTTISIISAVILIFVCWSI HVWKRRGSLP +Q8BHK6 MARFSTYIIFTSVLCQLTVTAASGTLKKVAGALDGSVTFTLNITEIKVDYVVWTFNTFFLAMVKKDGVTSQSSNKERIVFPDGLYSMKLSQLKKNDSGAYRAEIYSTSSQASLIQEYVLHVYKHLSRPKVTIDRQSNKNGTCVINLTCSTDQDGENVTYSWKAVGQGDNQFHDGATLSIAWRSGEKDQALTCMARNPVSNSFSTPVFPQKLCEDAATDLTSLRGILYILCFSAVLILFAVLLTIFHTTWIKKGKGCEEDKKRVDRHQEMPDLCPHLEENADYDTIPYTEKRRPEEDAPNTFYSTVQIPKVVKSPSSLPAKPLVPRSLSFENVI 2 224 246 DAATDLTSLR GILYILCFSAVLILFAVLLTIFH TTWIKKGKGC +Q9D3G2 MWSLWSLLLFEALLPVVVVSVQVLSKVGDSELLVAECPPGFQVREAIWRSLWPSEELLATFFRGSLETLYHSRFLGRVQLYDNLSLELGPLKPGDSGNFSVLMVDTGGQTWTQTLYLKVYDAVPKPEVQVFTAAAEETQPLNTCQVFLSCWAPNISDITYSWRWEGTVDFNGEVRSHFSNGQVLSVSLGLGDKDVAFTCIASNPVSWDMTTVTPWESCHHEAASGKASYKDVLLVVVPITLFLILAGLFGAWHHGLCSGKKKDACTDGVLPETENALV 2 234 256 SGKASYKDVL LVVVPITLFLILAGLFGAWHHGL CSGKKKDACT +Q96A28 MCAFPWLLLLLLLQEGSQRRLWRWCGSEEVVAVLQESISLPLEIPPDEEVENIIWSSHKSLATVVPGKEGHPATIMVTNPHYQGQVSFLDPSYSLHISNLSWEDSGLYQAQVNLRTSQISTMQQYNICVYRWLSEPQITVNFESSGEGACSMSLVCSVEKAGMDMTYSWLSRGDSTYTFHEGPVLSTSWRPGDSALSYTCRANNPISNVSSCPIPDGPFYADPNYASEKPSTAFCLLAKGLLIFLLLVILAMGLWVIRVQKRHKMPRMKKLMRNRMKLRKEAKPGSSPA 2 240 259 PSTAFCLLAK GLLIFLLLVILAMGLWVIRV QKRHKMPRMK +Q96PX8 MLLWILLLETSLCFAAGNVTGDVCKEKICSCNEIEGDLHVDCEKKGFTSLQRFTAPTSQFYHLFLHGNSLTRLFPNEFANFYNAVSLHMENNGLHEIVPGAFLGLQLVKRLHINNNKIKSFRKQTFLGLDDLEYLQADFNLLRDIDPGAFQDLNKLEVLILNDNLISTLPANVFQYVPITHLDLRGNRLKTLPYEEVLEQIPGIAEILLEDNPWDCTCDLLSLKEWLENIPKNALIGRVVCEAPTRLQGKDLNETTEQDLCPLKNRVDSSLPAPPAQEETFAPGPLPTPFKTNGQEDHATPGSAPNGGTKIPGNWQIKIRPTAAIATGSSRNKPLANSLPCPGGCSCDHIPGSGLKMNCNNRNVSSLADLKPKLSNVQELFLRDNKIHSIRKSHFVDYKNLILLDLGNNNIATVENNTFKNLLDLRWLYMDSNYLDTLSREKFAGLQNLEYLNVEYNAIQLILPGTFNAMPKLRILILNNNLLRSLPVDVFAGVSLSKLSLHNNYFMYLPVAGVLDQLTSIIQIDLHGNPWECSCTIVPFKQWAERLGSEVLMSDLKCETPVNFFRKDFMLLSNDEICPQLYARISPTLTSHSKNSTGLAETGTHSNSYLDTSRVSISVLVPGLLLVFVTSAFTVVGMLVFILRNRKRSKRRDANSSASEINSLQTVCDSSYWHNGPYNADGAHRVYDCGSHSLSD 2 621 643 DTSRVSISVL VPGLLLVFVTSAFTVVGMLVFIL RNRKRSKRRD +Q9H156 MLSGVWFLSVLTVAGILQTESRKTAKDICKIRCLCEEKENVLNINCENKGFTTVSLLQPPQYRIYQLFLNGNLLTRLYPNEFVNYSNAVTLHLGNNGLQEIRTGAFSGLKTLKRLHLNNNKLEILREDTFLGLESLEYLQADYNYISAIEAGAFSKLNKLKVLILNDNLLLSLPSNVFRFVLLTHLDLRGNRLKVMPFAGVLEHIGGIMEIQLEENPWNCTCDLLPLKAWLDTITVFVGEIVCETPFRLHGKDVTQLTRQDLCPRKSASDSSQRGSHADTHVQRLSPTMNPALNPTRAPKASRPPKMRNRPTPRVTVSKDRQSFGPIMVYQTKSPVPLTCPSSCVCTSQSSDNGLNVNCQERKFTNISDLQPKPTSPKKLYLTGNYLQTVYKNDLLEYSSLDLLHLGNNRIAVIQEGAFTNLTSLRRLYLNGNYLEVLYPSMFDGLQSLQYLYLEYNVIKEIKPLTFDALINLQLLFLNNNLLRSLPDNIFGGTALTRLNLRNNHFSHLPVKGVLDQLPAFIQIDLQENPWDCTCDIMGLKDWTEHANSPVIINEVTCESPAKHAGEILKFLGREAICPDSPNLSDGTVLSMNHNTDTPRSLSVSPSSYPELHTEVPLSVLILGLLVVFILSVCFGAGLFVFVLKRRKGVPSVPRNTNNLDVSSFQLQYGSYNTETHDKTDGHVYNYIPPPVGQMCQNPIYMQKEGDPVAYYRNLQEFSYSNLEEKKEEPATPAYTISATELLEKQATPREPELLYQNIAERVKELPSAGLVHYNFCTLPKRQFAPSYESRRQNQDRINKTVLYGTPRKCFVGQSKPNHPLLQAKPQSEPDYLEVLEKQTAISQL 2 622 644 LHTEVPLSVL ILGLLVVFILSVCFGAGLFVFVL KRRKGVPSVP +Q3V0X1 MKPLKLFCIGLLLCPLVCLLLETAPPPSALLTLEVKEKTGLKSDAMGVFAIRKNTSDINRQVSGLQRPWMTKFKNHLSDFFKSSIPPAAIFALFVTTAIMRAAIVNKRLEEPHRQWTIDQRSSLEMQNMNLIKLFGG 2 83 105 FKNHLSDFFK SSIPPAAIFALFVTTAIMRAAIV NKRLEEPHRQ +Q62230 MCVLFSLLLLASVFSLGQTTWGVSSPKNVQGLSGSCLLIPCIFSYPADVPVSNGITAIWYYDYSGKRQVVIHSGDPKLVDKRFRGRAELMGNMDHKVCNLLLKDLKPEDSGTYNFRFEISDSNRWLDVKGTTVTVTTDPSPPTITIPEELREGMERNFNCSTPYLCLQEKQVSLQWRGQDPTHSVTSSFQSLEPTGVYHQTTLHMALSWQDHGRTLLCQFSLGAHSSRKEVYLQVPHAPKGVEILLSSSGRNILPGDPVTLTCRVNSSYPAVSAVQWARDGVNLGVTGHVLRLFSAAWNDSGAYTCQATNDMGSLVSSPLSLHVFMAEVKMNPAGPVLENETVTLLCSTPKEAPQELRYSWYKNHILLEDAHASTLHLPAVTRADTGFYFCEVQNAQGSERSSPLSVVVRYPPLTPDLTTFLETQAGLVGILHCSVVSEPLATVVLSHGGLTLASNSGENDFNPRFRISSAPNSLRLEIRDLQPADSGEYTCLAVNSLGNSTSSLDFYANVARLLINPSAEVVEGQAVTLSCRSGLSPAPDTRFSWYLNGALLLEGSSSSLLLPAASSTDAGSYYCRTQAGPNTSGPSLPTVLTVFYPPRKPTFTARLDLDTSGVGDGRRGILLCHVDSDPPAQLRLLHKGHVVATSLPSRCGSCSQRTKVSRTSNSLHVEIQKPVLEDEGVYLCEASNTLGNSSAAASFNAKATVLVITPSNTLREGTEANLTCNVNQEVAVSPANFSWFRNGVLWTQGSLETVRLQPVARTDAAVYACRLLTEDGAQLSAPVVLSVLYAPDPPKLSALLDVGQGHMAVFICTVDSYPLAHLSLFRGDHLLATNLEPQRPSHGRIQAKATANSLQLEVRELGLVDSGNYHCEATNILGSANSSLFFQVRGAWVQVSPSPELREGQAVVLSCQVPTGVSEGTSYSWYQDGRPLQESTSSTLRIAAISLRQAGAYHCQAQAPDTAIASLAAPVSLHVSYTPRHVTLSALLSTDPERLGHLVCSVQSDPPAQLQLFHRNRLVASTLQGADELAGSNPRLHVTVLPNELRLQIHFPELEDDGTYTCEASNTLGQASAAADFDAQAVRVTVWPNATVQEGQQVNLTCLVWSTHQDSLSYTWYKGGQQLLGARSITLPSVKVLDATSYRCGVGLPGHAPHLSRPVTLDVLHAPRNLRLTYLLETQGRQLALVLCTVDSRPPAQLTLSHGDQLVASSTEASVPNTLRLELQDPRPSNEGLYSCSAHSPLGKANTSLELLLEGVRVKMNPSGSVPEGEPVTVTCEDPAALSSALYAWFHNGHWLQEGPASSLQFLVTTRAHAGAYFCQVHDTQGTRSSRPASLQILYAPRDAVLSSFRDSRTRLMVVIQCTVDSEPPAEMVLSHNGKVLAASHERHSSASGIGHIQVARNALRLQVQDVTLGDGNTYVCTAQNTLGSISTTQRLLTETDIRVTAEPGLDVPEGTALNLSCLLPGGSGPTGNSSFTWFWNRHRLHSAPVPTLSFTPVVRAQAGLYHCRADLPTGATTSAPVMLRVLYPPKTPTLIVFVEPQGGHQGILDCRVDSEPLAILTLHRGSQLVASNQLHDAPTKPHIRVTAPPNALRVDIEELGPSNQGEYVCTASNTLGSASASAYFGTRALHQLQLFQRLLWVLGFLAGFLCLLLGLVAYHTWRKKSSTKLNEDENSAEMATKKNTIQEEVVAAL 2 1641 1663 LHQLQLFQRL LWVLGFLAGFLCLLLGLVAYHTW RKKSSTKLNE +Q96PQ0 MAHRGPSRASKGPGPTARAPSPGAPPPPRSPRSRPLLLLLLLLGACGAAGRSPEPGRLGPHAQLTRVPRSPPAGRAEPGGGEDRQARGTEPGAPGPSPGPAPGPGEDGAPAAGYRRWERAAPLAGVASRAQVSLISTSFVLKGDATHNQAMVHWTGENSSVILILTKYYHADMGKVLESSLWRSSDFGTSYTKLTLQPGVTTVIDNFYICPTNKRKVILVSSSLSDRDQSLFLSADEGATFQKQPIPFFVETLIFHPKEEDKVLAYTKESKLYVSSDLGKKWTLLQERVTKDHVFWSVSGVDADPDLVHVEAQDLGGDFRYVTCAIHNCSEKMLTAPFAGPIDHGSLTVQDDYIFFKATSANQTKYYVSYRRNEFVLMKLPKYALPKDLQIISTDESQVFVAVQEWYQMDTYNLYQSDPRGVRYALVLQDVRSSRQAEESVLIDILEVRGVKGVFLANQKIDGKVMTLITYNKGRDWDYLRPPSMDMNGKPTNCKPPDCHLHLHLRWADNPYVSGTVHTKDTAPGLIMGAGNLGSQLVEYKEEMYITSDCGHTWRQVFEEEHHILYLDHGGVIVAIKDTSIPLKILKFSVDEGLTWSTHNFTSTSVFVDGLLSEPGDETLVMTVFGHISFRSDWELVKVDFRPSFSRQCGEEDYSSWELSNLQGDRCIMGQQRSFRKRKSTSWCIKGRSFTSALTSRVCECRDSDFLCDYGFERSSSSESSTNKCSANFWFNPLSPPDDCALGQTYTSSLGYRKVVSNVCEGGVDMQQSQVQLQCPLTPPRGLQVSIQGEAVAVRPGEDVLFVVRQEQGDVLTTKYQVDLGDGFKAMYVNLTLTGEPIRHRYESPGIYRVSVRAENTAGHDEAVLFVQVNSPLQALYLEVVPVIGLNQEVNLTAVLLPLNPNLTVFYWWIGHSLQPLLSLDNSVTTRFSDTGDVRVTVQAACGNSVLQDSRVLRVLDQFQVMPLQFSKELDAYNPNTPEWREDVGLVVTRLLSKETSVPQELLVTVVKPGLPTLADLYVLLPPPRPTRKRSLSSDKRLAAIQQVLNAQKISFLLRGGVRVLVALRDTGTGAEQLGGGGGYWAVVVLFVIGLFAAGAFILYKFKRKRPGRTVYAQMHNEKEQEMTSPVSHSEDVQGAVQGNHSGVVLSINSREMHSYLVS 2 1078 1100 GTGAEQLGGG GGYWAVVVLFVIGLFAAGAFILY KFKRKRPGRT +Q9WU03 MAQLCELRRGRALLALVASLLLSGAQVASRELDVHESCGVSKVVGKCRASIPRWWYNITDGSCQPFVYGGCEGNGNNYQSKEECLDKCAGVTENTTDDMARNRNGADSSVLSVPRKQSAEDLSAEIFNYEEYCVPKAVTGPCRAAFPRWYYDTEKNSCISFIYGGCRGNKNSYLSQEACMQHCSGKQMHPFLTPGLKAVILVGLFLMVLILLLGTSMVCLIRVVRRKQERALRTVWSTADDKEQLVKNTCVL 2 198 220 MHPFLTPGLK AVILVGLFLMVLILLLGTSMVCL IRVVRRKQER +P43307 MRLLPRLLLLLLLVFPATVLFRGGPRGLLAVAQDLTEDEETVEDSIIEDEDDEAEVEEDEPTDLVEDKEEEDVSGEPEASPSADTTILFVKGEDFPANNIVKFLVGFTNKGTEDFIVESLDASFRYPQDYQFYIQNFTALPLNTVVPPQRQATFEYSFIPAEPMGGRPFGLVINLNYKDLNGNVFQDAVFNQTVTVIEREDGLDGETIFMYMFLAGLGLLVIVGLHQLLESRKRKRPIQKVEMGTSSQNDVDMSWIPQETLNQINKASPRRLPRKRAQKRSVGSDE 2 207 229 IEREDGLDGE TIFMYMFLAGLGLLVIVGLHQLL ESRKRKRPIQ +P43308 MRLLSFVVLALFAVTQAEEGARLLASKSLLNRYAVEGRDLTLQYNIYNVGSSAALDVELSDDSFPPEDFGIVSGMLNVKWDRIAPASNVSHTVVLRPLKAGYFNFTSATITYLAQEDGPVVIGSTSAPGQGGILAQREFDRRFSPHFLDWAAFGVMTLPSIGIPLLLWYSSKRKYDTPKTKKN 2 147 169 REFDRRFSPH FLDWAAFGVMTLPSIGIPLLLWY SSKRKYDTPK +Q9NY15 MAGPRGLLPLCLLAFCLAGFSFVRGQVLFKGCDVKTTFVTHVPCTSCAAIKKQTCPSGWLRELPDQITQDCRYEVQLGGSMVSMSGCRRKCRKQVVQKACCPGYWGSRCHECPGGAETPCNGHGTCLDGMDRNGTCVCQENFRGSACQECQDPNRFGPDCQSVCSCVHGVCNHGPRGDGSCLCFAGYTGPHCDQELPVCQELRCPQNTQCSAEAPSCRCLPGYTQQGSECRAPNPCWPSPCSLLAQCSVSPKGQAQCHCPENYHGDGMVCLPKDPCTDNLGGCPSNSTLCVYQKPGQAFCTCRPGLVSINSNASAGCFAFCSPFSCDRSATCQVTADGKTSCVCRESEVGDGRACYGHLLHEVQKATQTGRVFLQLRVAVAMMDQGCREILTTAGPFTVLVPSVSSFSSRTMNASLAQQLCRQHIIAGQHILEDTRTQQTRRWWTLAGQEITVTFNQFTKYSYKYKDQPQQTFNIYKANNIAANGVFHVVTGLRWQAPSGTPGDPKRTIGQILASTEAFSRFETILENCGLPSILDGPGPFTVFAPSNEAVDSLRDGRLIYLFTAGLSKLQELVRYHIYNHGQLTVEKLISKGRILTMANQVLAVNISEEGRILLGPEGVPLQRVDVMAANGVIHMLDGILLPPTILPILPKHCSEEQHKIVAGSCVDCQALNTSTCPPNSVKLDIFPKECVYIHDPTGLNVLKKGCASYCNQTIMEQGCCKGFFGPDCTQCPGGFSNPCYGKGNCSDGIQGNGACLCFPDYKGIACHICSNPNKHGEQCQEDCGCVHGLCDNRPGSGGVCQQGTCAPGFSGRFCNESMGDCGPTGLAQHCHLHARCVSQEGVARCRCLDGFEGDGFSCTPSNPCSHPDRGGCSENAECVPGSLGTHHCTCHKGWSGDGRVCVAIDECELDMRGGCHTDALCSYVGPGQSRCTCKLGFAGDGYQCSPIDPCRAGNGGCHGLATCRAVGGGQRVCTCPPGFGGDGFSCYGDIFRELEANAHFSIFYQWLKSAGITLPADRRVTALVPSEAAVRQLSPEDRAFWLQPRTLPNLVRAHFLQGALFEEELARLGGQEVATLNPTTRWEIRNISGRVWVQNASVDVADLLATNGVLHILSQVLLPPRGDVPGGQGLLQQLDLVPAFSLFRELLQHHGLVPQIEAATAYTIFVPTNRSLEAQGNSSHLDADTVRHHVVLGEALSMETLRKGGHRNSLLGPAHWIVFYNHSGQPEVNHVPLEGPMLEAPGRSLIGLSGVLTVGSSRCLHSHAEALREKCVNCTRRFRCTQGFQLQDTPRKSCVYRSGFSFSRGCSYTCAKKIQVPDCCPGFFGTLCEPCPGGLGGVCSGHGQCQDRFLGSGECHCHEGFHGTACEVCELGRYGPNCTGVCDCAHGLCQEGLQGDGSCVCNVGWQGLRCDQKITSPQCPRKCDPNANCVQDSAGASTCACAAGYSGNGIFCSEVDPCAHGHGGCSPHANCTKVAPGQRTCTCQDGYMGDGELCQEINSCLIHHGGCHIHAECIPTGPQQVSCSCREGYSGDGIRTCELLDPCSKNNGGCSPYATCKSTGDGQRTCTCDTAHTVGDGLTCRARVGLELLRDKHASFFSLRLLEYKELKGDGPFTIFVPHADLMSNLSQDELARIRAHRQLVFRYHVVGCRRLRSEDLLEQGYATALSGHPLRFSEREGSIYLNDFARVVSSDHEAVNGILHFIDRVLLPPEALHWEPDDAPIPRRNVTAAAQGFGYKIFSGLLKVAGLLPLLREASHRPFTMLWPTDAAFRALPPDRQAWLYHEDHRDKLAAILRGHMIRNVEALASDLPNLGPLRTMHGTPISFSCSRTRAGELMVGEDDARIVQRHLPFEGGLAYGIDQLLEPPGLGARCDHFETRPLRLNTCSICGLEPPCPEGSQEQGSPEACWRFYPKFWTSPPLHSLGLRSVWVHPSLWGRPQGLGRGCHRNCVTTTWKPSCCPGHYGSECQACPGGPSSPCSDRGVCMDGMSGSGQCLCRSGFAGTACELCAPGAFGPHCQACRCTVHGRCDEGLGGSGSCFCDEGWTGPRCEVQLELQPVCTPPCAPEAVCRAGNSCECSLGYEGDGRVCTVADLCQDGHGGCSEHANCSQVGTMVTCTCLPDYEGDGWSCRARNPCTDGHRGGCSEHANCLSTGLNTRRCECHAGYVGDGLQCLEESEPPVDRCLGQPPPCHSDAMCTDLHFQEKRAGVFHLQATSGPYGLNFSEAEAACEAQGAVLASFPQLSAAQQLGFHLCLMGWLANGSTAHPVVFPVADCGNGRVGIVSLGARKNLSERWDAYCFRVQDVACRCRNGFVGDGISTCNGKLLDVLAATANFSTFYGMLLGYANATQRGLDFLDFLDDELTYKTLFVPVNEGFVDNMTLSGPDLELHASNATLLSANASQGKLLPAHSGLSLIISDAGPDNSSWAPVAPGTVVVSRIIVWDIMAFNGIIHALASPLLAPPQPQAVLAPEAPPVAAGVGAVLAAGALLGLVAGALYLRARGKPMGFGFSAFQAEDDADDDFSPWQEGTNPTLVSVPNPVFGSDTFCEPFDDSLLEEDFPDTQRILTVK 2 2477 2499 AVLAPEAPPV AAGVGAVLAAGALLGLVAGALYL RARGKPMGFG +Q8WWQ8 MMLQHLVIFCLGLVVQNFCSPAETTGQARRCDRKSLLTIRTECRSCALNLGVKCPDGYTMITSGSVGVRDCRYTFEVRTYSLSLPGCRHICRKDYLQPRCCPGRWGPDCIECPGGAGSPCNGRGSCAEGMEGNGTCSCQEGFGGTACETCADDNLFGPSCSSVCNCVHGVCNSGLDGDGTCECYSAYTGPKCDKPIPECAALLCPENSRCSPSTEDENKLECKCLPNYRGDGKYCDPINPCLRKICHPHAHCTYLGPNRHSCTCQEGYRGDGQVCLPVDPCQINFGNCPTKSTVCKYDGPGQSHCECKEHYQNFVPGVGCSMTDICKSDNPCHRNANCTTVAPGRTECICQKGYVGDGLTCYGNIMERLRELNTEPRGKWQGRLTSFISLLDKAYAWPLSKLGPFTVLLPTDKGLKGFNVNELLVDNKAAQYFVKLHIIAGQMNIEYMNNTDMFYTLTGKSGEIFNSDKDNQIKLKLHGGKKKVKIIQGDIIASNGLLHILDRAMDKLEPTFESNNEQTIMTMLQPRYSKFRSLLEETNLGHALDEDGVGGPYTIFVPNNEALNNMKDGTLDYLLSPEGSRKLLELVRYHIVPFTQLEVATLISTPHIRSMANQLIQFNTTDNGQILANDVAMEEIEITAKNGRIYTLTGVLIPPSIVPILPHRCDETKREMKLGTCVSCSLVYWSRCPANSEPTALFTHRCVYSGRFGSLKSGCARYCNATVKIPKCCKGFYGPDCNQCPGGFSNPCSGNGQCADSLGGNGTCICEEGFQGSQCQFCSDPNKYGPRCNKKCLCVHGTCNNRIDSDGACLTGTCRDGSAGRLCDKQTSACGPYVQFCHIHATCEYSNGTASCICKAGYEGDGTLCSEMDPCTGLTPGGCSRNAECIKTGTGTHTCVCQQGWTGNGRDCSEINNCLLPSAGGCHDNASCLYVGPGQNECECKKGFRGNGIDCEPITSCLEQTGKCHPLASCQSTSSGVWSCVCQEGYEGDGFLCYGNAAVELSFLSEAAIFNRWINNASLQPTLSATSNLTVLVPSQQATEDMDQDEKSFWLSQSNIPALIKYHMLLGTYRVADLQTLSSSDMLATSLQGNFLHLAKVDGNITIEGASIVDGDNAATNGVIHIINKVLVPQRRLTGSLPNLLMRLEQMPDYSIFRGYIIQYNLANAIEAADAYTVFAPNNNAIENYIREKKVLSLEEDVLRYHVVLEEKLLKNDLHNGMHRETMLGFSYFLSFFLHNDQLYVNEAPINYTNVATDKGVIHGLGKVLEIQKNRCDNNDTTIIRGRCRTCSSELTCPFGTKSLGNEKRRCIYTSYFMGRRTLFIGCQPKCVRTVITRECCAGFFGPQCQPCPGNAQNVCFGNGICLDGVNGTGVCECGEGFSGTACETCTEGKYGIHCDQACSCVHGRCNQGPLGDGSCDCDVGWRGVHCDNATTEDNCNGTCHTSANCLTNSDGTASCKCAAGFQGNGTICTAINACEISNGGCSAKADCKRTTPGRRVCTCKAGYTGDGIVCLEINPCLENHGGCDKNAECTQTGPNQAACNCLPAYTGDGKVCTLINVCLTKNGGCSEFAICNHTGQVERTCTCKPNYIGDGFTCRGSIYQELPKNPKTSQYFFQLQEHFVKDLVGPGPFTVFAPLSAAFDEEARVKDWDKYGLMPQVLRYHVVACHQLLLENLKLISNATSLQGEPIVISVSQSTVYINNKAKIISSDIISTNGIVHIIDKLLSPKNLLITPKDNSGRILQNLTTLATNNGYIKFSNLIQDSGLLSVITDPIHTPVTLFWPTDQALHALPAEQQDFLFNQDNKDKLKEYLKFHVIRDAKVLAVDLPTSTAWKTLQGSELSVKCGAGRDIGDLFLNGQTCRIVQRELLFDLGVAYGIDCLLIDPTLGGRCDTFTTFDASGECGSCVNTPSCPRWSKPKGVKQKCLYNLPFKRNLEGCRERCSLVIQIPRCCKGYFGRDCQACPGGPDAPCNNRGVCLDQYSATGECKCNTGFNGTACEMCWPGRFGPDCLPCGCSDHGQCDDGITGSGQCLCETGWTGPSCDTQAVLPAVCTPPCSAHATCKENNTCECNLDYEGDGITCTVVDFCKQDNGGCAKVARCSQKGTKVSCSCQKGYKGDGHSCTEIDPCADGLNGGCHEHATCKMTGPGKHKCECKSHYVGDGLNCEPEQLPIDRCLQDNGQCHADAKCVDLHFQDTTVGVFHLRSPLGQYKLTFDKAREACANEAATMATYNQLSYAQKAKYHLCSAGWLETGRVAYPTAFASQNCGSGVVGIVDYGPRPNKSEMWDVFCYRMKDVNCTCKVGYVGDGFSCSGNLLQVLMSFPSLTNFLTEVLAYSNSSARGRAFLEHLTDLSIRGTLFVPQNSGLGENETLSGRDIEHHLANVSMFFYNDLVNGTTLQTRLGSKLLITASQDPLQPTETRFVDGRAILQWDIFASNGIIHVISRPLKAPPAPVTLTHTGLGAGIFFAIILVTGAVALAAYSYFRINRRTIGFQHFESEEDINVAALGKQQPENISNPLYESTTSAPPEPSYDPFTDSEERQLEGNDPLRTL 2 2461 2483 APVTLTHTGL GAGIFFAIILVTGAVALAAYSYF RINRRTIGFQ +Q13586 MDVCVRLALWLLWGLLLHQGQSLSHSHSEKATGTSSGANSEESTAAEFCRIDKPLCHSEDEKLSFEAVRNIHKLMDDDANGDVDVEESDEFLREDLNYHDPTVKHSTFHGEDKLISVEDLWKAWKSSEVYNWTVDEVVQWLITYVELPQYEETFRKLQLSGHAMPRLAVTNTTMTGTVLKMTDRSHRQKLQLKALDTVLFGPPLLTRHNHLKDFMLVVSIVIGVGGCWFAYIQNRYSKEHMKKMMKDLEGLHRAEQSLHDLQERLHKAQEEHRTVEVEKVHLEKKLRDEINLAKQEAQRLKELREGTENERSRQKYAEEELEQVREALRKAEKELESHSSWYAPEALQKWLQLTHEVEVQYYNIKKQNAEKQLLVAKEGAEKIKKKRNTLFGTFHVAHSSSLDDVDHKILTAKQALSEVTAALRERLHRWQQIEILCGFQIVNNPGIHSLVAALNIDPSWMGSTRPNPAHFIMTDDVDDMDEEIVSPLSMQSPSLQSSVRQRLTEPQHGLGSQRDLTHSDSESSLHMSDRQRVAPKPPQMSRAADEALNAMTSNGSHRLIEGVHPGSLVEKLPDSPALAKKALLALNHGLDKAHSLMELSPSAPPGGSPHLDSSRSHSPSSPDPDTPSPVGDSRALQASRNTRIPHLAGKKAVAEEDNGSIGEETDSSPGRKKFPLKIFKKPLKK 2 214 232 LLTRHNHLKD FMLVVSIVIGVGGCWFAYI QNRYSKEHMK +Q9UGT4 MKPALLPWALLLLATALGPGPGPTADAQESCSMRCGALDGPCSCHPTCSGLGTCCLDFRDFCLEILPYSGSMMGGKDFVVRHFKMSSPTDASVICRFKDSIQTLGHVDSSGQVHCVSPLLYESGRIPFTVSLDNGHSFPRAGTWLAVHPNKVSMMEKSELVNETRWQYYGTANTSGNLSLTWHVKSLPTQTITIELWGYEETGMPYSQEWTAKWSYLYPLATHIPNSGSFTFTPKPAPPSYQRWRVGALRIIDSKNYAGQKDVQALWTNDHALAWHLSDDFREDPVAWARTQCQAWEELEDQLPNFLEELPDCPCTLTQARADSGRFFTDYGCDMEQGSVCTYHPGAVHCVRSVQASLRYGSGQQCCYTADGTQLLTADSSGGSTPDRGHDWGAPPFRTPPRVPSMSHWLYDVLSFYYCCLWAPDCPRYMQRRPSNDCRNYRPPRLASAFGDPHFVTFDGTNFTFNGRGEYVLLEAALTDLRVQARAQPGTMSNGTETRGTGLTAVAVQEGNSDVVEVRLANRTGGLEVLLNQEVLSFTEQSWMDLKGMFLSVAAGDRVSIMLASGAGLEVSVQGPFLSVSVLLPEKFLTHTHGLLGTLNNDPTDDFTLHSGRVLPPGTSPQELFLFGANWTVHNASSLLTYDSWFLVHNFLYQPKHDPTFEPLFPSETTLNPSLAQEAAKLCGDDHFCNFDVAATGSLSTGTATRVAHQLHQRRMQSLQPVVSCGWLAPPPNGQKEGNRYLAGSTIYFHCDNGYSLAGAETSTCQADGTWSSPTPKCQPGRSYAVLLGIIFGGLAVVAAVALVYVLLRRRKGNTHVWGAQP 2 786 808 PKCQPGRSYA VLLGIIFGGLAVVAAVALVYVLL RRRKGNTHVW +Q5VX71 MYHGMNPSNGDGFLEQQQQQQQPQSPQRLLAVILWFQLALCFGPAQLTGGFDDLQVCADPGIPENGFRTPSGGVFFEGSVARFHCQDGFKLKGATKRLCLKHFNGTLGWIPSDNSICVQEDCRIPQIEDAEIHNKTYRHGEKLIITCHEGFKIRYPDLHNMVSLCRDDGTWNNLPICQGCLRPLASSNGYVNISELQTSFPVGTVISYRCFPGFKLDGSAYLECLQNLIWSSSPPRCLALEVCPLPPMVSHGDFVCHPRPCERYNHGTVVEFYCDPGYSLTSDYKYITCQYGEWFPSYQVYCIKSEQTWPSTHETLLTTWKIVAFTATSVLLVLLLVILARMFQTKFKAHFPPRGPPRSSSSDPDFVVVDGVPVMLPSYDEAVSGGLSALGPGYMASVGQGCPLPVDDQSPPAYPGSGDTDTGPGESETCDSVSGSSELLQSLYSPPRCQESTHPASDNPDIIASTAEEVASTSPGIDIADEIPLMEEDP 2 317 339 QTWPSTHETL LTTWKIVAFTATSVLLVLLLVIL ARMFQTKFKA +O60279 MTAEGPSPPARWHRRLPGLWAAALLLLGLPRLSVRADGKFFVLESQNGSQGLQLEAARLSCKSRGAHLASADELRRVVQDCSFAVCTTGWLADGTLGTTVCSKGSGEQQIMRAVDVRIESNPVPGGTYSALCIKDEEKPCGDPPSFPHTILQGRTGLEMGDELLYVCAPGHIMGHRETAFTLLCNSCGEWYGLVQACGKDEAEAHIDYEDNFPDDRSVSFRELMEDSRTEADEDRGQGDSSEEAPKQDRLVSISVGRENIARDKVFVPTTGLPGAGSSVPADSPGSRLLQKHLFWFPAEAFHKPGLEKEVDDDTKKQFSAGDNHSGVKLVPGEPETKVIYGNTDGPSGPFVGKNDSKAGDPVVSSSDESWLDGYPVTEGAWRKTEAEEEEDGDRGDGSVGLDENVLVTPDQPILVEVKKPKSSTLTPSEGMTHSSVLPSQMLDVEALALRPVNASETEGIGDGDLTKYQSTLPWRFITEESPMATLSYELTSSTLEILTVNTVKQTPNHIPSTIMATTQPPVETTVPEIQDSFPYLLSEDFFGQEGPGPGASEELHPTLESCVGDGCPGLSRGPVIATIVTVLCLLLLLAGVGMVWGYRKCQHKSSVYKLNVGQRQARHYHQQIEMEKV 2 576 598 GCPGLSRGPV IATIVTVLCLLLLLAGVGMVWGY RKCQHKSSVY +Q9UQF0 MALPYHIFLFTVLLPSFTLTAPPPCRCMTSSSPYQEFLWRMQRPGNIDAPSYRSLSKGTPTFTAHTHMPRNCYHSATLCMHANTHYWTGKMINPSCPGGLGVTVCWTYFTQTGMSDGGGVQDQAREKHVKEVISQLTRVHGTSSPYKGLDLSKLHETLRTHTRLVSLFNTTLTGLHEVSAQNPTNCWICLPLNFRPYVSIPVPEQWNNFSTEINTTSVLVGPLVSNLEITHTSNLTCVKFSNTTYTTNSQCIRWVTPPTQIVCLPSGIFFVCGTSAYRCLNGSSESMCFLSFLVPPMTIYTEQDLYSYVISKPRNKRVPILPFVIGAGVLGALGTGIGGITTSTQFYYKLSQELNGDMERVADSLVTLQDQLNSLAAVVLQNRRALDLLTAERGGTCLFLGEECCYYVNQSGIVTEKVKEIRDRIQRRAEELRNTGPWGLLSQWMPWILPFLGPLAAIILLLLFGPCIFNLLVNFVSSRIEAVKLQMEPKMQSKTKIYRRPLDRPASPRSDVNDIKGTPPEEISAAQPLLRPNSAGSS 2 455 477 MPWILPFLGP LAAIILLLLFGPCIFNLLVNFVS SRIEAVKLQM +Q24JP5 MCARMAGRTTAAPRGPYGPWLCLLVALALDVVRVDCGQAPLDPVYLPAALELLDAPEHFRVQQVGHYPPANSSLSSRSETFLLLQPWPRAQPLLRASYPPFATQQVVPPRVTEPHQRPVPWDVRAVSVEAAVTPAEPYARVLFHLKGQDWPPGSGSLPCARLHATHPAGTAHQACRFQPSLGACVVELELPSHWFSQASTTRAELAYTLEPAAEGPGGCGSGEENDPGEQALPVGGVELRPADPPQYQEVPLDEAVTLRVPDMPVRPGQLFSATLLLRHNFTASLLTLRIKVKKGLHVTAARPAQPTLWTAKLDRFKGSRHHTTLITCHRAGLTEPDSSPLELSEFLWVDFVVENSTGGGVAVTRPVTWQLEYPGQAPEAEKDKMVWEILVSERDIRALIPLAKAEELVNTAPLTGVPQHVPVRLVTVDGGGALVEVTEHVGCESANTQVLQVSEACDAVFVAGKESRGARGVRVDFWWRRLRASLRLTVWAPLLPLRIELTDTTLEQVRGWRVPGPAEGPAEPAAEASDEAERRARGCHLQYQRAGVRFLAPFAAHPLDGGRRLTHLLGPDWLLDVSHLVAPHARVLDSRVASLEGGRVVVGREPGVTSIEVRSPLSDSILGEQALAVTDDKVSVLELRVQPVMGISLTLSRGTAHPGEVTATCWAQSALPAPKQEVALSLWLSFSDHTVAPAELYDRRDLGLSVSAEEPGAILPAEEQGAQLGVVVSGAGAEGLPLHVALHPPEPCRRGRHRVPLASGTAWLGLPPASTPAPALPSSPAWSPPATEATMGGKRQVAGSVGGNTGVRGKFERAEEEARKEETEAREEEEEEEEEMVPAPQHVTELELGMYALLGVFCVAIFIFLVNGVVFVLRYQRKEPPDSATDPTSPQPHNWVWLGTDQEELSRQLDRQSPGPPKGEGSCPCESGGGGEAPTLAPGPPGGTTSSSSTLARKEAGGRRKRVEFVTFAPAPPAQSPEEPVGAPAVQSILVAGEEDIRWVCEDMGLKDPEELRNYMERIRGSS 2 851 873 QHVTELELGM YALLGVFCVAIFIFLVNGVVFVL RYQRKEPPDS +P40200 MEKKWKYCAVYYIIQIHFVKGVWEKTVNTEENVYATLGSDVNLTCQTQTVGFFVQMQWSKVTNKIDLIAVYHPQYGFYCAYGRPCESLVTFTETPENGSKWTLHLRNMSCSVSGRYECMLVLYPEGIQTKIYNLLIQTHVTADEWNSNHTIEIEINQTLEIPCFQNSSSKISSEFTYAWSVENSSTDSWVLLSKGIKEDNGTQETLISQNHLISNSTLLKDRVKLGTDYRLHLSPVQIFDDGRKFSCHIRVGPNKILRSSTTVKVFAKPEIPVIVENNSTDVLVERRFTCLLKNVFPKANITWFIDGSFLHDEKEGIYITNEERKGKDGFLELKSVLTRVHSNKPAQSDNLTIWCMALSPVPGNKVWNISSEKITFLLGSEISSTDPPLSVTESTLDTQPSPASSVSPARYPATSSVTLVDVSALRPNTTPQPSNSSMTTRGFNYPWTSSGTDTKKSVSRIPSETYSSSPSGAGSTLHDNVFTSTARAFSEVPTTANGSTKTNHVHITGIVVNKPKDGMSWPVIVAALLFCCMILFGLGVRKWCQYQKEIMERPPPFKPPPPPIKYTCIQEPNESDLPYHEMETL 2 518 540 TGIVVNKPKD GMSWPVIVAALLFCCMILFGLGV RKWCQYQKEI +B6A8C7 MIPKLLSLLCFRLCVGQGDTRGDGSLPKPSLSAWPSSVVPANSNVTLRCWTPARGVSFVLRKGGIILESPKPLDSTEGAAEFHLNNLKVRNAGEYTCEYYRKASPHILSQRSDVLLLLVTGHLSKPFLRTYQRGTVTAGGRVTLQCQKRDQLFVPIMFALLKAGTPSPIQLQSPAGKEIDFSLVDVTAGDAGNYSCMYYQTKSPFWASEPSDQLEILVTVPPGTTSSNYSLGNFVRLGLAAVIVVIMGAFLVEAWYSRNVSPGESEAFKPE 2 234 256 TTSSNYSLGN FVRLGLAAVIVVIMGAFLVEAWY SRNVSPGESE +A0A1B0GTY4 MSNQRLPLIFSLLFICFFGESFCICDGTVWTKVGWEILPEEVHYWKVKGSPSHCLPYLLDKLCCDFANMDIFQGCLYLIYNLLQAVFFVLFVLSVHYLWKKWKKHQKKLKKQASLEKPGNDLESPLINNIDQTLHRVATTASVIYKIWEHRSHHPSSKKIKHCKLKKKSKEEGARRY 2 76 98 FANMDIFQGC LYLIYNLLQAVFFVLFVLSVHYL WKKWKKHQKK +P13726 METPAWPRVPRPETAVARTLLLGWVFAQVAGASGTTNTVAAYNLTWKSTNFKTILEWEPKPVNQVYTVQISTKSGDWKSKCFYTTDTECDLTDEIVKDVKQTYLARVFSYPAGNVESTGSAGEPLYENSPEFTPYLETNLGQPTIQSFEQVGTKVNVTVEDERTLVRRNNTFLSLRDVFGKDLIYTLYYWKSSSSGKKTAKTNTNEFLIDVDKGENYCFSVQAVIPSRTVNRKSTDSPVECMGQEKGEFREIFYIIGAVVFVVIILVIILAISLHKCRKAGVGQSWKENSPLNVS 2 252 274 MGQEKGEFRE IFYIIGAVVFVVIILVIILAISL HKCRKAGVGQ +P37173 MGRGLLRGLWPLHIVLWTRIASTIPPHVQKSVNNDMIVTDNNGAVKFPQLCKFCDVRFSTCDNQKSCMSNCSITSICEKPQEVCVAVWRKNDENITLETVCHDPKLPYHDFILEDAASPKCIMKEKKKPGETFFMCSCSSDECNDNIIFSEEYNTSNPDLLLVIFQVTGISLLPPLGVAISVIIIFYCYRVNRQQKLSSTWETGKTRKLMEFSEHCAIILEDDRSDISSTCANNINHNTELLPIELDTLVGKGRFAEVYKAKLKQNTSEQFETVAVKIFPYEEYASWKTEKDIFSDINLKHENILQFLTAEERKTELGKQYWLITAFHAKGNLQEYLTRHVISWEDLRKLGSSLARGIAHLHSDHTPCGRPKMPIVHRDLKSSNILVKNDLTCCLCDFGLSLRLDPTLSVDDLANSGQVGTARYMAPEVLESRMNLENVESFKQTDVYSMALVLWEMTSRCNAVGEVKDYEPPFGSKVREHPCVESMKDNVLRDRGRPEIPSFWLNHQGIQMVCETLTECWDHDPEARLTAQCVAERFSELEHLDRLSGRSCSEEKIPEDGSLNTTK 2 167 189 NPDLLLVIFQ VTGISLLPPLGVAISVIIIFYCY RVNRQQKLSS +O43493 MRFVVALVLLNVAAAGAVPLLATESVKQEEAGVRPSAGNVSTHPSLSQRPGGSTKSHPEPQTPKDSPSKSSAEAQTPEDTPNKSGAEAKTQKDSSNKSGAEAKTQKGSTSKSGSEAQTTKDSTSKSHPELQTPKDSTGKSGAEAQTPEDSPNRSGAEAKTQKDSPSKSGSEAQTTKDVPNKSGADGQTPKDGSSKSGAEDQTPKDVPNKSGAEKQTPKDGSNKSGAEEQGPIDGPSKSGAEEQTSKDSPNKVVPEQPSRKDHSKPISNPSDNKELPKADTNQLADKGKLSPHAFKTESGEETDLISPPQEEVKSSEPTEDVEPKEAEDDDTGPEEGSPPKEEKEKMSGSASSENREGTLSDSTGSEKDDLYPNGSGNGSAESSHFFAYLVTAAILVAVLYIAHHNKRKIIAFVLEGKRSKVTRRPKASDYQRLDQKS 2 385 402 SGNGSAESSH FFAYLVTAAILVAVLYIA HHNKRKIIAF +Q9UPZ6 MGLQARRWASGSRGAAGPRRGVLQLLPLPLPLPLLLLLLLRPGAGRAAAQGEAEAPTLYLWKTGPWGRCMGDECGPGGIQTRAVWCAHVEGWTTLHTNCKQAERPNNQQNCFKVCDWHKELYDWRLGPWNQCQPVISKSLEKPLECIKGEEGIQVREIACIQKDKDIPAEDIICEYFEPKPLLEQACLIPCQQDCIVSEFSAWSECSKTCGSGLQHRTRHVVAPPQFGGSGCPNLTEFQVCQSSPCEAEELRYSLHVGPWSTCSMPHSRQVRQARRRGKNKEREKDRSKGVKDPEARELIKKKRNRNRQNRQENKYWDIQIGYQTREVMCINKTGKAADLSFCQQEKLPMTFQSCVITKECQVSEWSEWSPCSKTCHDMVSPAGTRVRTRTIRQFPIGSEKECPEFEEKEPCLSQGDGVVPCATYGWRTTEWTECRVDPLLSQQDKRRGNQTALCGGGIQTREVYCVQANENLLSQLSTHKNKEASKPMDLKLCTGPIPNTTQLCHIPCPTECEVSPWSAWGPCTYENCNDQQGKKGFKLRKRRITNEPTGGSGVTGNCPHLLEAIPCEEPACYDWKAVRLGNCEPDNGKECGPGTQVQEVVCINSDGEEVDRQLCRDAIFPIPVACDAPCPKDCVLSTWSTWSSCSHTCSGKTTEGKQIRARSILAYAGEEGGIRCPNSSALQEVRSCNEHPCTVYHWQTGPWGQCIEDTSVSSFNTTTTWNGEASCSVGMQTRKVICVRVNVGQVGPKKCPESLRPETVRPCLLPCKKDCIVTPYSDWTSCPSSCKEGDSSIRKQSRHRVIIQLPANGGRDCTDPLYEEKACEAPQACQSYRWKTHKWRRCQLVPWSVQQDSPGAQEGCGPGRQARAITCRKQDGGQAGIHECLQYAGPVPALTQACQIPCQDDCQLTSWSKFSSCNGDCGAVRTRKRTLVGKSKKKEKCKNSHLYPLIETQYCPCDKYNAQPVGNWSDCILPEGKVEVLLGMKVQGDIKECGQGYRYQAMACYDQNGRLVETSRCNSHGYIEEACIIPCPSDCKLSEWSNWSRCSKSCGSGVKVRSKWLREKPYNGGRPCPKLDHVNQAQVYEVVPCHSDCNQYLWVTEPWSICKVTFVNMRENCGEGVQTRKVRCMQNTADGPSEHVEDYLCDPEEMPLGSRVCKLPCPEDCVISEWGPWTQCVLPCNQSSFRQRSADPIRQPADEGRSCPNAVEKEPCNLNKNCYHYDYNVTDWSTCQLSEKAVCGNGIKTRMLDCVRSDGKSVDLKYCEALGLEKNWQMNTSCMVECPVNCQLSDWSPWSECSQTCGLTGKMIRRRTVTQPFQGDGRPCPSLMDQSKPCPVKPCYRWQYGQWSPCQVQEAQCGEGTRTRNISCVVSDGSADDFSKVVDEEFCADIELIIDGNKNMVLEESCSQPCPGDCYLKDWSSWSLCQLTCVNGEDLGFGGIQVRSRPVIIQELENQHLCPEQMLETKSCYDGQCYEYKWMASAWKGSSRTVWCQRSDGINVTGGCLVMSQPDADRSCNPPCSQPHSYCSETKTCHCEEGYTEVMSSNSTLEQCTLIPVVVLPTMEDKRGDVKTSRAVHPTQPSSNPAGRGRTWFLQPFGPDGRLKTWVYGVAAGAFVLLIFIVSMIYLACKKPKKPQRRQNNRLKPLTLAYDGDADM 2 1607 1629 PFGPDGRLKT WVYGVAAGAFVLLIFIVSMIYLA CKKPKKPQRR +Q9NS62 MKPMLKDFSNLLLVVLCDYVLGEAEYLLLREPGHVALSNDTVYVDFQYFDGANGTLRNVSVLLLEANTNQTVTTKYLLTNQSQGTLKFECFYFKEAGDYWFTMTPEATDNSTPFPWWEKSAFLKVEWPVFHVDLNRSAKAAEGTFQVGLFTSQPLCPFPVDKPNIVVDVIFTNSLPEARRNSRQPLEIRTSKRTELAQGQWVEFGCAPLGPEAYVTVVLKLLGRDSVITSTGPIDLAQKFGYKLVMVPELTCESGVEVTVLPPPCTFVQGVVTVFKEAPRYPGKRTIHLAENSLPLGERRTIFNCTLFDMGKNKYCFDFGISSRSHFSAKEECMLIQRNTETWGLWQPWSQCSATCGDGVRERRRVCLTSFPSSPVCPGMSLEASLCSLEECAAFQPSSPSPLQPQGPVKSNNIVTVTGISLCLFIIIATVLITLWRRFGRPAKCSTPARHNSIHSPSFRKNSDEENICELSEQRGSFSDGGDGPTGSPGDTGIPLTYRRSGPVPPEDDASGSESFQSNAQKIIPPLFSYRLAQQQLKEMKKKGLTETTKVYHVSQSPLTDTAIDAAPSAPLDLESPEEAAANKFRIKSPFPEQPAVSAGERPPSRLDLNVTQASCAISPSQTLIRKSQARHVGSRGGPSERSHARNAHFRRTASFHEARQARPFRERSMSTLTPRQAPAYSSRTRTCEQAEDRFRPQSRGAHLFPEKLEHFQEASGTRGPLNPLPKSYTLGQPLRKPDLGDHQAGLVAGIERTEPHRARRGPSPSHKSVSRKQSSPISPKDNYQRVSSLSPSQCRKDKCQSFPTHPEFAFYDNTSFGLTEAEQRMLDLPGYFGSNEEDETTSTLSVEKLVI 2 414 436 QPQGPVKSNN IVTVTGISLCLFIIIATVLITLW RRFGRPAKCS +Q02763 MDSLASLVLCGVSLLLSGTVEGAMDLILINSLPLVSDAETSLTCIASGWRPHEPITIGRDFEALMNQHQDPLEVTQDVTREWAKKVVWKREKASKINGAYFCEGRVRGEAIRIRTMKMRQQASFLPATLTMTVDKGDNVNISFKKVLIKEEDAVIYKNGSFIHSVPRHEVPDILEVHLPHAQPQDAGVYSARYIGGNLFTSAFTRLIVRRCEAQKWGPECNHLCTACMNNGVCHEDTGECICPPGFMGRTCEKACELHTFGRTCKERCSGQEGCKSYVFCLPDPYGCSCATGWKGLQCNEACHPGFYGPDCKLRCSCNNGEMCDRFQGCLCSPGWQGLQCEREGIQRMTPKIVDLPDHIEVNSGKFNPICKASGWPLPTNEEMTLVKPDGTVLHPKDFNHTDHFSVAIFTIHRILPPDSGVWVCSVNTVAGMVEKPFNISVKVLPKPLNAPNVIDTGHNFAVINISSEPYFGDGPIKSKKLLYKPVNHYEAWQHIQVTNEIVTLNYLEPRTEYELCVQLVRRGEGGEGHPGPVRRFTTASIGLPPPRGLNLLPKSQTTLNLTWQPIFPSSEDDFYVEVERRSVQKSDQQNIKVPGNLTSVLLNNLHPREQYVVRARVNTKAQGEWSEDLTAWTLSDILPPQPENIKISNITHSSAVISWTILDGYSISSITIRYKVQGKNEDQHVDVKIKNATITQYQLKGLEPETAYQVDIFAENNIGSSNPAFSHELVTLPESQAPADLGGGKMLLIAILGSAGMTCLTVLLAFLIILQLKRANVQRRMAQAFQNVREEPAVQFNSGTLALNRKVKNNPDPTIYPVLDWNDIKFQDVIGEGNFGQVLKARIKKDGLRMDAAIKRMKEYASKDDHRDFAGELEVLCKLGHHPNIINLLGACEHRGYLYLAIEYAPHGNLLDFLRKSRVLETDPAFAIANSTASTLSSQQLLHFAADVARGMDYLSQKQFIHRDLAARNILVGENYVAKIADFGLSRGQEVYVKKTMGRLPVRWMAIESLNYSVYTTNSDVWSYGVLLWEIVSLGGTPYCGMTCAELYEKLPQGYRLEKPLNCDDEVYDLMRQCWREKPYERPSFAQILVSLNRMLEERKTYVNTTLYEKFTYAGIDCSAEEAA 2 748 770 PADLGGGKML LIAILGSAGMTCLTVLLAFLIIL QLKRANVQRR +Q495A1 MRWCLLLIWAQGLRQAPLASGMMTGTIETTGNISAEKGGSIILQCHLSSTTAQVTQVNWEQQDQLLAICNADLGWHISPSFKDRVAPGPGLGLTLQSLTVNDTGEYFCIYHTYPDGTYTGRIFLEVLESSVAEHGARFQIPLLGAMAATLVVICTAVIVVVALTRKKKALRIHSVEGDLRRKSAGQEEWSPSAPSPPGSCVQAEAAPAGLCGEQRGEDCAELHDYFNVLSYRSLGNCSFFTETG 2 142 164 AEHGARFQIP LLGAMAATLVVICTAVIVVVALT RKKKALRIHS +Q96H15 MSKEPLILWLMIEFWWLYLTPVTSETVVTEVLGHRVTLPCLYSSWSHNSNSMCWGKDQCPYSGCKEALIRTDGMRVTSRKSAKYRLQGTIPRGDVSLTILNPSESDSGVYCCRIEVPGWFNDVKINVRLNLQRASTTTHRTATTTTRRTTTTSPTTTRQMTTTPAALPTTVVTTPDLTTGTPLQMTTIAVFTTANTCLSLTPSTLPEEATGLLTPEPSKEGPILTAESETVLPSDSWSSVESTSADTVLLTSKESKVWDLPSTSHVSMWKTSDSVSSPQPGASDTAVPEQNKTTKTGQMDGIPMSMKNEMPISQLLMIIAPSLGFVLFALFVAFLLRGKLMETYCSQKHTRLDYIGDSKNVLNDVQHGREDEDGLFTL 2 314 336 MSMKNEMPIS QLLMIIAPSLGFVLFALFVAFLL RGKLMETYCS +Q8TB96 MAAAGRLPSSWALFSPLLAGLALLGVGPVPARALHNVTAELFGAEAWGTLAAFGDLNSDKQTDLFVLRERNDLIVFLADQNAPYFKPKVKVSFKNHSALITSVVPGDYDGDSQMDVLLTYLPKNYAKSELGAVIFWGQNQTLDPNNMTILNRTFQDEPLIMDFNGDLIPDIFGITNESNQPQILLGGNLSWHPALTTTSKMRIPHSHAFIDLTEDFTADLFLTTLNATTSTFQFEIWENLDGNFSVSTILEKPQNMMVVGQSAFADFDGDGHMDHLLPGCEDKNCQKSTIYLVRSGMKQWVPVLQDFSNKGTLWGFVPFVDEQQPTEIPIPITLHIGDYNMDGYPDALVILKNTSGSNQQAFLLENVPCNNASCEEARRMFKVYWELTDLNQIKDAMVATFFDIYEDGILDIVVLSKGYTKNDFAIHTLKNNFEADAYFVKVIVLSGLCSNDCPRKITPFGVNQPGPYIMYTTVDANGYLKNGSAGQLSQSAHLALQLPYNVLGLGRSANFLDHLYVGIPRPSGEKSIRKQEWTAIIPNSQLIVIPYPHNVPRSWSAKLYLTPSNIVLLTAIALIGVCVFILAIIGILHWQEKKADDREKRQEAHRFHFDAM 2 566 588 SAKLYLTPSN IVLLTAIALIGVCVFILAIIGIL HWQEKKADDR +Q6R5P0 MPRMERHQFCSVLLILILLTLVSLTLTGWAWTIPDCIIADSLLFPNLSYYIPFCTSAPGLHLLASCSNVKNLNQTLKRVPRNTEVLCLQGMVPTLPAKAFIRFHSLQLLRLQLRTTSVTSRTFQGLDQLQYLFFDHHAPCCLSLFLSPNCFESLRSLSSLSFQGYCLTYSQSIYLPTSLRHLTLRNSCLTKFQDLQRLFPDLLLSTSSTPNIKPGAPFLETLDLSYNLQLKQAGVRDLYGLTLHSLILDGTPLKALDLTDSGLLHLHFLSLVGTGIEKVPASLTGYSELRALDLGKNQIQNILENGEIPGYKALEFLSLHDNHLQTLPTRFLHTLPQLQKLNLSMNKLGPILELPEGLFSTNLKVLDLSYNQLCDVPHGALSLLSQLQELWLSGNNISSLSNESLQGLRQLRTLDLSWNQIKVLKPGWLSHLPALTTLNLLGTYLEYILGIQLQGPKMLRHLQLGSYPILDIYPPWPPTLLSLEIQAESCIQFMIHSGQPFLFLENLTLETSILLLKPDNITIHFPSLRRLTLRGYSFIFSTSQLQRFFPQQLPLLEHFFIWCENSYAVDLYLFGMPRLRVLELGYLNFFYESSTMKLEMLLKEVPQLQVLALSHLNLRNLSVSSFKSLQDLKLLLFNSERALEMNSNLQEFIPQMPQYVYFSDVTFTCQCEASWLESWATRAPNTFVYGLEKSICIANASDYSKTLLFSFLATNCPHGTEFWGFLTSFILLLLLIILPLISCPKWSWLHHLWTLFHTCWWKLCGHRLRGQFNYDVFISYCEEDQAWVLEELVPVLEKAPPEGEGLRLCLPARDFGIGNDRMESMIASMGKSRATLCVLTGQALASPWCNLELRLATYHLVARPGTTHLLLLFLEPLDRQRLHSYHRLSRWLQKEDYFDLSQGKVEWNSFCEQLKRRLSKAGQERD 2 719 741 FSFLATNCPH GTEFWGFLTSFILLLLLIILPLI SCPKWSWLHH +Q15399 MTSIFHFAIIFMLILQIRIQLSEESEFLVDRSKNGLIHVPKDLSQKTTILNISQNYISELWTSDILSLSKLRILIISHNRIQYLDISVFKFNQELEYLDLSHNKLVKISCHPTVNLKHLDLSFNAFDALPICKEFGNMSQLKFLGLSTTHLEKSSVLPIAHLNISKVLLVLGETYGEKEDPEGLQDFNTESLHIVFPTNKEFHFILDVSVKTVANLELSNIKCVLEDNKCSYFLSILAKLQTNPKLSNLTLNNIETTWNSFIRILQLVWHTTVWYFSISNVKLQGQLDFRDFDYSGTSLKALSIHQVVSDVFGFPQSYIYEIFSNMNIKNFTVSGTRMVHMLCPSKISPFLHLDFSNNLLTDTVFENCGHLTELETLILQMNQLKELSKIAEMTTQMKSLQQLDISQNSVSYDEKKGDCSWTKSLLSLNMSSNILTDTIFRCLPPRIKVLDLHSNKIKSIPKQVVKLEALQELNVAFNSLTDLPGCGSFSSLSVLIIDHNSVSHPSADFFQSCQKMRSIKAGDNPFQCTCELGEFVKNIDQVSSEVLEGWPDSYKCDYPESYRGTLLKDFHMSELSCNITLLIVTIVATMLVLAVTVTSLCSYLDLPWYLRMVCQWTQTRRRARNIPLEELQRNLQFHAFISYSGHDSFWVKNELLPNLEKEGMQICLHERNFVPGKSIVENIITCIEKSYKSIFVLSPNFVQSEWCHYELYFAHHNLFHEGSNSLILILLEPIPQYSIPSSYHKLKSLMARRTYLEWPKEKSKRGLFWANLRAAINIKLTEQAKK 2 582 604 MSELSCNITL LIVTIVATMLVLAVTVTSLCSYL DLPWYLRMVC +Q9QUN7 MLRALWLFWILVAITVLFSKRCSAQESLSCDASGVCDGRSRSFTSIPSGLTAAMKSLDLSFNKITYIGHGDLRACANLQVLILKSSRINTIEGDAFYSLGSLEHLDLSDNHLSSLSSSWFGPLSSLKYLNLMGNPYQTLGVTSLFPNLTNLQTLRIGNVETFSEIRRIDFAGLTSLNELEIKALSLRNYQSQSLKSIRDIHHLTLHLSESAFLLEIFADILSSVRYLELRDTNLARFQFSPLPVDEVSSPMKKLAFRGSVLTDESFNELLKLLRYILELSEVEFDDCTLNGLGDFNPSESDVVSELGKVETVTIRRLHIPQFYLFYDLSTVYSLLEKVKRITVENSKVFLVPCSFSQHLKSLEFLDLSENLMVEEYLKNSACKGAWPSLQTLVLSQNHLRSMQKTGEILLTLKNLTSLDISRNTFHPMPDSCQWPEKMRFLNLSSTGIRVVKTCIPQTLEVLDVSNNNLDSFSLFLPRLQELYISRNKLKTLPDASLFPVLLVMKIRENAVSTFSKDQLGSFPKLETLEAGDNHFVCSCELLSFTMETPALAQILVDWPDSYLCDSPPRLHGHRLQDARPSVLECHQAALVSGVCCALLLLILLVGALCHHFHGLWYLRMMWAWLQAKRKPKKAPCRDVCYDAFVSYSEQDSHWVENLMVQQLENSDPPFKLCLHKRDFVPGKWIIDNIIDSIEKSHKTVFVLSENFVRSEWCKYELDFSHFRLFDENNDAAILVLLEPIERKAIPQRFCKLRKIMNTKTYLEWPLDEGQQEVFWVNLRTAIKS 2 588 610 ARPSVLECHQ AALVSGVCCALLLLILLVGALCH HFHGLWYLRM +O15455 MRQTLPCIYFWGGLLPFGMLCASSTTKCTVSHEVADCSHLKLTQVPDDLPTNITVLNLTHNQLRRLPAANFTRYSQLTSLDVGFNTISKLEPELCQKLPMLKVLNLQHNELSQLSDKTFAFCTNLTELHLMSNSIQKIKNNPFVKQKNLITLDLSHNGLSSTKLGTQVQLENLQELLLSNNKIQALKSEELDIFANSSLKKLELSSNQIKEFSPGCFHAIGRLFGLFLNNVQLGPSLTEKLCLELANTSIRNLSLSNSQLSTTSNTTFLGLKWTNLTMLDLSYNNLNVVGNDSFAWLPQLEYFFLEYNNIQHLFSHSLHGLFNVRYLNLKRSFTKQSISLASLPKIDDFSFQWLKCLEHLNMEDNDIPGIKSNMFTGLINLKYLSLSNSFTSLRTLTNETFVSLAHSPLHILNLTKNKISKIESDAFSWLGHLEVLDLGLNEIGQELTGQEWRGLENIFEIYLSYNKYLQLTRNSFALVPSLQRLMLRRVALKNVDSSPSPFQPLRNLTILDLSNNNIANINDDMLEGLEKLEILDLQHNNLARLWKHANPGGPIYFLKGLSHLHILNLESNGFDEIPVEVFKDLFELKIIDLGLNNLNTLPASVFNNQVSLKSLNLQKNLITSVEKKVFGPAFRNLTELDMRFNPFDCTCESIAWFVNWINETHTNIPELSSHYLCNTPPHYHGFPVRLFDTSSCKDSAPFELFFMINTSILLIFIFIVLLIHFEGWRISFYWNVSVHRVLGFKEIDRQTEQFEYAAYIIHAYKDKDWVWEHFSSMEKEDQSLKFCLEERDFEAGVFELEAIVNSIKRSRKIIFVITHHLLKDPLCKRFKVHHAVQQAIEQNLDSIILVFLEEIPDYKLNHALCLRRGMFKSHCILNWPVQKERIGAFRHKLQVALGSKNSVH 2 703 725 TSSCKDSAPF ELFFMINTSILLIFIFIVLLIHF EGWRISFYWN +O00206 MMSASRLAGTLIPAMAFLSCVRPESWEPCVEVVPNITYQCMELNFYKIPDNLPFSTKNLDLSFNPLRHLGSYSFFSFPELQVLDLSRCEIQTIEDGAYQSLSHLSTLILTGNPIQSLALGAFSGLSSLQKLVAVETNLASLENFPIGHLKTLKELNVAHNLIQSFKLPEYFSNLTNLEHLDLSSNKIQSIYCTDLRVLHQMPLLNLSLDLSLNPMNFIQPGAFKEIRLHKLTLRNNFDSLNVMKTCIQGLAGLEVHRLVLGEFRNEGNLEKFDKSALEGLCNLTIEEFRLAYLDYYLDDIIDLFNCLTNVSSFSLVSVTIERVKDFSYNFGWQHLELVNCKFGQFPTLKLKSLKRLTFTSNKGGNAFSEVDLPSLEFLDLSRNGLSFKGCCSQSDFGTTSLKYLDLSFNGVITMSSNFLGLEQLEHLDFQHSNLKQMSEFSVFLSLRNLIYLDISHTHTRVAFNGIFNGLSSLEVLKMAGNSFQENFLPDIFTELRNLTFLDLSQCQLEQLSPTAFNSLSSLQVLNMSHNNFFSLDTFPYKCLNSLQVLDYSLNHIMTSKKQELQHFPSSLAFLNLTQNDFACTCEHQSFLQWIKDQRQLLVEVERMECATPSDKQGMPVLSLNITCQMNKTIIGVSVLSVLVVSVVAVLVYKFYFHLMLLAGCIKYGRGENIYDAFVIYSSQDEDWVRNELVKNLEEGVPPFQLCLHYRDFIPGVAIAANIIHEGFHKSRKVIVVVSQHFIQSRWCIFEYEIAQTWQFLSSRAGIIFIVLQKVEKTLLRQQVELYRLLSRNTYLEWEDSVLGRHIFWRRLRKALLDGKSWNPEGTVGTGCNWQEATSI 2 634 656 NITCQMNKTI IGVSVLSVLVVSVVAVLVYKFYF HLMLLAGCIK +O60602 MGDHLDLLLGVVLMAGPVFGIPSCSFDGRIAFYRFCNLTQVPQVLNTTERLLLSFNYIRTVTASSFPFLEQLQLLELGSQYTPLTIDKEAFRNLPNLRILDLGSSKIYFLHPDAFQGLFHLFELRLYFCGLSDAVLKDGYFRNLKALTRLDLSKNQIRSLYLHPSFGKLNSLKSIDFSSNQIFLVCEHELEPLQGKTLSFFSLAANSLYSRVSVDWGKCMNPFRNMVLEILDVSGNGWTVDITGNFSNAISKSQAFSLILAHHIMGAGFGFHNIKDPDQNTFAGLARSSVRHLDLSHGFVFSLNSRVFETLKDLKVLNLAYNKINKIADEAFYGLDNLQVLNLSYNLLGELYSSNFYGLPKVAYIDLQKNHIAIIQDQTFKFLEKLQTLDLRDNALTTIHFIPSIPDIFLSGNKLVTLPKINLTANLIHLSENRLENLDILYFLLRVPHLQILILNQNRFSSCSGDQTPSENPSLEQLFLGENMLQLAWETELCWDVFEGLSHLQVLYLNHNYLNSLPPGVFSHLTALRGLSLNSNRLTVLSHNDLPANLEILDISRNQLLAPNPDVFVSLSVLDITHNKFICECELSTFINWLNHTNVTIAGPPADIYCVYPDSFSGVSLFSLSTEGCDEEEVLKSLKFSLFIVCTVTLTLFLMTILTVTKFRGFCFICYKTAQRLVFKDHPQGTEPDMYKYDAYLCFSSKDFTWVQNALLKHLDTQYSDQNRFNLCFEERDFVPGENRIANIQDAIWNSRKIVCLVSRHFLRDGWCLEAFSYAQGRCLSDLNSALIMVVVGSLSQYQLMKHQSIRGFVQKQQYLRWPEDFQDVGWFLHKLSQQILKKEKEKKKDNNIPLQTVATIS 2 644 666 VLKSLKFSLF IVCTVTLTLFLMTILTVTKFRGF CFICYKTAQR +Q9NYK1 MVFPMWTLKRQILILFNIILISKLLGARWFPKTLPCDVTLDVPKNHVIVDCTDKHLTEIPGGIPTNTTNLTLTINHIPDISPASFHRLDHLVEIDFRCNCVPIPLGSKNNMCIKRLQIKPRSFSGLTYLKSLYLDGNQLLEIPQGLPPSLQLLSLEANNIFSIRKENLTELANIEILYLGQNCYYRNPCYVSYSIEKDAFLNLTKLKVLSLKDNNVTAVPTVLPSTLTELYLYNNMIAKIQEDDFNNLNQLQILDLSGNCPRCYNAPFPCAPCKNNSPLQIPVNAFDALTELKVLRLHSNSLQHVPPRWFKNINKLQELDLSQNFLAKEIGDAKFLHFLPSLIQLDLSFNFELQVYRASMNLSQAFSSLKSLKILRIRGYVFKELKSFNLSPLHNLQNLEVLDLGTNFIKIANLSMFKQFKRLKVIDLSVNKISPSGDSSEVGFCSNARTSVESYEPQVLEQLHYFRYDKYARSCRFKNKEASFMSVNESCYKYGQTLDLSKNSIFFVKSSDFQHLSFLKCLNLSGNLISQTLNGSEFQPLAELRYLDFSNNRLDLLHSTAFEELHKLEVLDISSNSHYFQSEGITHMLNFTKNLKVLQKLMMNDNDISSSTSRTMESESLRTLEFRGNHLDVLWREGDNRYLQLFKNLLKLEELDISKNSLSFLPSGVFDGMPPNLKNLSLAKNGLKSFSWKKLQCLKNLETLDLSHNQLTTVPERLSNCSRSLKNLILKNNQIRSLTKYFLQDAFQLRYLDLSSNKIQMIQKTSFPENVLNNLKMLLLHHNRFLCTCDAVWFVWWVNHTEVTIPYLATDVTCVGPGAHKGQSVISLDLYTCELDLTNLILFSLSISVSLFLMVMMTASHLYFWDVWYIYHFCKAKIKGYQRLISPDCCYDAFIVYDTKDPAVTEWVLAELVAKLEDPREKHFNLCLEERDWLPGQPVLENLSQSIQLSKKTVFVMTDKYAKTENFKIAFYLSHQRLMDEKVDVIILIFLEKPFQKSKFLQLRKRLCGSSVLEWPTNPQAHPYFWQCLKNALATDNHVAYSQVFKETV 2 843 865 CELDLTNLIL FSLSISVSLFLMVMMTASHLYFW DVWYIYHFCK +Q9NR97 MENMFLQSSMLTCIFLLISGSCELCAEENFSRSYPCDEKKQNDSVIAECSNRRLQEVPQTVGKYVTELDLSDNFITHITNESFQGLQNLTKINLNHNPNVQHQNGNPGIQSNGLNITDGAFLNLKNLRELLLEDNQLPQIPSGLPESLTELSLIQNNIYNITKEGISRLINLKNLYLAWNCYFNKVCEKTNIEDGVFETLTNLELLSLSFNSLSHVPPKLPSSLRKLFLSNTQIKYISEEDFKGLINLTLLDLSGNCPRCFNAPFPCVPCDGGASINIDRFAFQNLTQLRYLNLSSTSLRKINAAWFKNMPHLKVLDLEFNYLVGEIASGAFLTMLPRLEILDLSFNYIKGSYPQHINISRNFSKLLSLRALHLRGYVFQELREDDFQPLMQLPNLSTINLGINFIKQIDFKLFQNFSNLEIIYLSENRISPLVKDTRQSYANSSSFQRHIRKRRSTDFEFDPHSNFYHFTRPLIKPQCAAYGKALDLSLNSIFFIGPNQFENLPDIACLNLSANSNAQVLSGTEFSAIPHVKYLDLTNNRLDFDNASALTELSDLEVLDLSYNSHYFRIAGVTHHLEFIQNFTNLKVLNLSHNNIYTLTDKYNLESKSLVELVFSGNRLDILWNDDDNRYISIFKGLKNLTRLDLSLNRLKHIPNEAFLNLPASLTELHINDNMLKFFNWTLLQQFPRLELLDLRGNKLLFLTDSLSDFTSSLRTLLLSHNRISHLPSGFLSEVSSLKHLDLSSNLLKTINKSALETKTTTKLSMLELHGNPFECTCDIGDFRRWMDEHLNVKIPRLVDVICASPGDQRGKSIVSLELTTCVSDVTAVILFFFTFFITTMVMLAALAHHLFYWDVWFIYNVCLAKVKGYRSLSTSQTFYDAYISYDTKDASVTDWVINELRYHLEESRDKNVLLCLEERDWDPGLAIIDNLMQSINQSKKTVFVLTKKYAKSWNFKTAFYLALQRLMDENMDVIIFILLEPVLQHSQYLRLRQRICKSSILQWPDNPKAEGLFWQTLRNVVLTENDSRYNNMYVDSIKQY 2 826 848 SLELTTCVSD VTAVILFFFTFFITTMVMLAALA HHLFYWDVWF +Q4V9L6 MVSAAAPSLLILLLLLLGSVPATDARSVPLKATFLEDVAGSGEAEGSSASSPSLPPPWTPALSPTSMGPQPITLGGPSPPTNFLDGIVDFFRQYVMLIAVVGSLAFLLMFIVCAAVITRQKQKASAYYPSSFPKKKYVDQSDRAGGPRAFSEVPDRAPDSRPEEALDSSRQLQADILAATQNLKSPTRAALGGGDGARMVEGRGAEEEEKGSQEGDQEVQGHGVPVETPEAQEEPCSGVLEGAVVAGEGQGELEGSLLLAQEAQGPVGPPESPCACSSVHPSV 2 95 117 DGIVDFFRQY VMLIAVVGSLAFLLMFIVCAAVI TRQKQKASAY +Q8N3G9 MAQAVWSRLGRILWLACLLPWAPAGVAAGLYELNLTTDSPATTGAVVTISASLVAKDNGSLALPADAHLYRFHWIHTPLVLTGKMEKGLSSTIRVVGHVPGEFPVSVWVTAADCWMCQPVARGFVVLPITEFLVGDLVVTQNTSLPWPSSYLTKTVLKVSFLLHDPSNFLKTALFLYSWDFGDGTQMVTEDSVVYYNYSIIGTFTVKLKVVAEWEEVEPDATRAVKQKTGDFSASLKLQETLRGIQVLGPTLIQTFQKMTVTLNFLGSPPLTVCWRLKPECLPLEEGECHPVSVASTAYNLTHTFRDPGDYCFSIRAENIISKTHQYHKIQVWPSRIQPAVFAFPCATLITVMLAFIMYMTLRNATQQKDMVEVADFDFSPMSDKNPEPPSGVRCCCQMCCGPFLLETPSEYLEIVRENHGLLPPLYKSVKTYTV 2 340 362 IQVWPSRIQP AVFAFPCATLITVMLAFIMYMTL RNATQQKDMV +Q6P9G4 MQAPRAALVFALVIALVPVGRGNYEELENSGDTTVESERPNKVTIPSTFAAVTIKETLNANINSTNFAPDENQLEFILMVLIPLILLVLLLLSVVFLATYYKRKRTKQEPSSQGSQSALQTYELGSENVKVPIFEEDTPSVMEIEMEELDKWMNSMNRNADFECLPTLKEEKESNHNPSDSES 2 76 98 NFAPDENQLE FILMVLIPLILLVLLLLSVVFLA TYYKRKRTKQ +Q8WZ59 MLGCGIPALGLLLLLQGSADGNGIQGFFYPWSCEGDIWDRESCGGQAAIDSPNLCLRLRCCYRNGVCYHQRPDENVRRKHMWALVWTCSGLLLLSCSICLFWWAKRRDVLHMPGFLAGPCDMSKSVSLLSKHRGTKKTPSTGSVPVALSKESRDVEGGTEGEGTEEGEETEGEEEED 2 82 104 PDENVRRKHM WALVWTCSGLLLLSCSICLFWWA KRRDVLHMPG +Q6UWW9 MSRSRLFSVTSAISTIGILCLPLFQLVLSDLPCEEDEMCVNYNDQHPNGWYIWILLLLVLVAALLCGAVVLCLQCWLRRPRIDSHRRTMAVFAVGDLDSIYGTEAAVSPTVGIHLQTQTPDLYPVPAPCFGPLGSPPPYEEIVKTT 2 49 71 CVNYNDQHPN GWYIWILLLLVLVAALLCGAVVL CLQCWLRRPR +A6NLX4 MAPGPWPVSCLRGGPLGLTYLSLLLIPAAAGTYCECSLGLSREALIALLVVLAGISASCFCALVIVAIGVLRAKGETCPRQVDNRLVENFGVQEDLMDLHPVYVESQLMDADLEVSLVPPLEDQSLVAIPMEASSEEPPPPPPLPPE 2 44 66 CECSLGLSRE ALIALLVVLAGISASCFCALVIV AIGVLRAKGE +A2RRL7 MQRLPAATRATLILSLAFASLHSACSAEASSSNSSSLTAHHPDPGTLEQCLNVDFCPQAARCCRTGVDEYGWIAAAVGWSLWFLTLILLCVDKLMKLTPDEPKDLQA 2 70 89 ARCCRTGVDE YGWIAAAVGWSLWFLTLILL CVDKLMKLTP +Q5T292 MNLGVSMLRILFLLDVGGAQVLATGKTPGAEIDFKYALIGTAVGVAISAGFLALKICMIRRHLFDDDSSDLKSTPGGLSDTIPLKKRAPRRNHNFSKRDAQVIEL 2 37 59 TPGAEIDFKY ALIGTAVGVAISAGFLALKICMI RRHLFDDDSS +Q4KMG9 MGVRVHVVAASALLYFILLSGTRCEENCGNPEHCLTTDWVHLWYIWLLVVIGALLLLCGLTSLCFRCCCLSRQQNGEDGGPPPCEVTVIAFDHDSTLQSTITSLQSVFGPAARRILAVAHSHSSLGQLPSSLDTLPGYEEALHMSRFTVAMCGQKAPDLPPVPEEKQLPPTEKESTRIVDSWN 2 42 64 EHCLTTDWVH LWYIWLLVVIGALLLLCGLTSLC FRCCCLSRQQ +Q9D2R4 MQIQTILLCFSFSFSAAFYFHAGEREEKCIIEDIPSDTLITGTFKVQQWDIVRHDFLESAPGLGMFVTVTTYNDEVLLSKLYGAQGTFYFTSHSSGEHIICLESNSTQFVSFGGSKLRIHLDIRVGEHDLDAAIVQAKDKVNEVTFKLQHLIEQVEQILKEQDYQRDREENFRITSEDTNRNVLWWAFAQILIFISVGIFQMKHLKDFFIAKKLV 2 183 202 RITSEDTNRN VLWWAFAQILIFISVGIFQM KHLKDFFIAK +P0DPE3 MAARTLASALVLTLWVWALAPAGAVDAMGPHAAVRLAELLTPEECGHFRSLLEAPEPDVEAELSRLSEDRLARPEPLNTTSGSPSRRRRREAAEDPAGRVAGPGEVSDGCREALAAWLAPQAASLSWDRLARALRRSGRPDVARELGKNLHQQATLQLRKFGQRFLPRPGAAARVPFAPAPRPRRAAVPAPDWDALQLIVERLPQPLYERSPMGWAGPLALGLLTGFVGALGTGALVVLLTLWITGGDGDRASPGSPGPLATVQGWWETKLLLPKERRAPPGAWAADGPDSPSPHSALALSCKMGAQSWGSGALDGL 2 220 242 RSPMGWAGPL ALGLLTGFVGALGTGALVVLLTL WITGGDGDRA +Q13445 MMAAGAALALALWLLMPPVEVGGAGPPPIQDGEFTFLLPAGRKQCFYQSAPANASLETEYQVIGGAGLDVDFTLESPQGVLLVSESRKADGVHTVEPTEAGDYKLCFDNSFSTISEKLVFFELIFDSLQDDEEVEGWAEAVEPEEMLDVKMEDIKESIETMRTRLERSIQMLTLLRAFEARDRNLQEGNLERVNFWSAVNVAVLLLVAVLQVCTLKRFFQDKRPVPT 2 193 215 RNLQEGNLER VNFWSAVNVAVLLLVAVLQVCTL KRFFQDKRPV +Q15363 MVTLAELLVLLAALLATVSGYFVSIDAHAEECFFERVTSGTKMGLIFEVAEGGFLDIDVEITGPDNKGIYKGDRESSGKYTFAAHMDGTYKFCFSNRMSTMTPKIVMFTIDIGEAPKGQDMETEAHQNKLEEMINELAVAMTAVKHEQEYMEVRERIHRAINDNTNSRVVLWSFFEALVLVAMTLGQIYYLKRFFEVRRVV 2 169 191 RAINDNTNSR VVLWSFFEALVLVAMTLGQIYYL KRFFEVRRVV +Q9Y3Q3 MGSTVPRSASVLLLLLLLRRAEQPCGAELTFELPDNAKQCFHEEVEQGVKFSLDYQVITGGHYDVDCYVEDPQGNTIYRETKKQYDSFTYRAEVKGVYQFCFSNEFSTFSHKTVYFDFQVGDEPPILPDMGNRVTALTQMESACVTIHEALKTVIDSQTHYRLREAQDRARAEDLNSRVSYWSVGETIALFVVSFSQVLLLKSFFTEKRPISRAVHS 2 179 201 RARAEDLNSR VSYWSVGETIALFVVSFSQVLLL KSFFTEKRPI +Q8WW62 MSPLLFGAGLVVLNLVTSARSQKTEPLSGSGDQPLFRGADRYDFAIMIPPGGTECFWQFAHQTGYFYFSYEVQRTVGMSHDRHVAATAHNPQGFLIDTSQGVRGQINFSTQETGFYQLCLSNQHNHFGSVQVYLNFGVFYEGPETDHKQKERKQLNDTLDAIEDGTQKVQNNIFHMWRYYNFARMRKMADFFLIQSNYNYVNWWSTAQSLVIILSGILQLYFLKRLFNVPTTTDTKKPRC 2 201 223 FFLIQSNYNY VNWWSTAQSLVIILSGILQLYFL KRLFNVPTTT +P49755 MSGLSGPPARRGPFPLALLLLFLLGPRLVLAISFHLPINSRKCLREEIHKDLLVTGAYEISDQSGGAGGLRSHLKITDSAGHILYSKEDATKGKFAFTTEDYDMFEVCFESKGTGRIPDQLVILDMKHGVEAKNYEEIAKVEKLKPLEVELRRLEDLSESIVNDFAYMKKREEEMRDTNESTNTRVLYFSIFSMFCLIGLATWQVFYLRRFFKAKKLIE 2 186 208 RDTNESTNTR VLYFSIFSMFCLIGLATWQVFYL RRFFKAKKLI +Q9P0T7 MKLLSLVAVVGCLLVPPAEANKSSEDIRCKCICPPYRNISGHIYNQNVSQKDCNCLHVVEPMPVPGHDVEAYCLLCECRYEERSTTTIKVIIVIYLSVVGALLLYMAFLMLVDPLIRKPDAYTEQLHNEEENEDARSMAAAAASLGGPRANTVLERVEGAQQRWKLQVQEQRKTVFDRHKMLS 2 90 112 YEERSTTTIK VIIVIYLSVVGALLLYMAFLMLV DPLIRKPDAY +O14668 MGRVFLTGEKANSILKRYPRANGFFEEIRQGNIERECKEEFCTFEEAREAFENNEKTKEFWSTYTKAQQGESNRGSDWFQFYLTFPLIFGLFIILLVIFLIWRCFLRNKTRRQTVTEGHIPFPQHLNIITPPPPPDEVFDSSGLSPGFLGYVVGRSDSVSTRLSNCDPPPTYEEATGQVNLQRSETEPHLDPPPEYEDIVNSNSASAIPMVPVVTTIK 2 84 106 RGSDWFQFYL TFPLIFGLFIILLVIFLIWRCFL RNKTRRQTVT +O14669 MRGHPSLLLLYMALTTCLDTSPSEETDQEVFLGPPEAQSFLSSHTRIPRANHWDLELLTPGNLERECLEERCSWEEAREYFEDNTLTERFWESYIYNGKGGRGRVDVASLAVGLTGGILLIVLAGLGAFWYLRWRQHRGQQPCPQEAGLISPLSPLNPLGPPTPLPPPPPPPPGLPTYEQALAASGVHDAPPPPYTSLRRPH 2 110 132 GGRGRVDVAS LAVGLTGGILLIVLAGLGAFWYL RWRQHRGQQP +Q9BZD7 MAVFLEAKDAHSVLKRFPRANEFLEELRQGTIERECMEEICSYEEVKEVFENKEKTMEFWKGYPNAVYSVRDPSQSSDAMYVVVPLLGVALLIVIALFIIWRCQLQKATRHHPSYAQNRYLASRAGHTLPRVMVYRGTVHSQGEPSGHREAANSPQVVLGPSRGGRTTVRLESTLYLPELSLSRLSSTTPPPSYEEVTAPQESSSEEASVSYSDPPPKYEEIVAANPGADK 2 79 101 SVRDPSQSSD AMYVVVPLLGVALLIVIALFIIW RCQLQKATRH +Q9BZD6 MFTLLVLLSQLPTVTLGFPHCARGPKASKHAGEEVFTSKEEANFFIHRRLLYNRFDLELFTPGNLERECNEELCNYEEAREIFVDEDKTIAFWQEYSAKGPTTKSDGNREKIDVMGLLTGLIAAGVFLVIFGLLGYYLCITKCNRLQHPCSSAVYERGRHTPSIIFRRPEEAALSPLPPSVEDAGLPSYEQAVALTRKHSVSPPPPYPGHTKGFRVFKKSMSLPSH 2 118 140 NREKIDVMGL LTGLIAAGVFLVIFGLLGYYLCI TKCNRLQHPC +Q8NEW7 MAGWPGAGPLCVLGGAALGVCLAGVAGQLVEPSTAPPKPKPPPLTKETVVFWDMRLWHVVGIFSLFVLSIIITLCCVFNCRVPRTRKEIEARYLQRKAAKMYTDKLETVPPLNELTEVPGEDKKKKKKKKKDSVDTVAIKVEEDEKNEAKKKKGEK 2 56 78 KETVVFWDMR LWHVVGIFSLFVLSIIITLCCVF NCRVPRTRKE +Q9D7L8 MVWKITGPLQACQLLLVVLSLPQGRTSSVLTVNGRTENYILDTQHGVQASLECAVQNHTEDEELLWYREDGIVDLKNGNKINISSVCVSPINESDNGVRFTCKLQRDQTVSVTVVLNVTFPPLLSGNGFQTVEENSDVSLVCNVKSNPQAQMMWYKNNSALVLEKGRHQIHQTRESFQLSITKVKKSDNGTYSCIASSSLKMETMDFHLLVKDKVFVMPAEPIIAACVVVVLTMAFALFSRRKRIMKLCGKKNDPNSETAL 2 217 239 FHLLVKDKVF VMPAEPIIAACVVVVLTMAFALF SRRKRIMKLC +Q96BF3 MGSPGMVLGLLVQIWALQEASSLSVQQGPNLLQVRQGSQATLVCQVDQATAWERLRVKWTKDGAILCQPYITNGSLSLGVCGPQGRLSWQAPSHLTLQLDPVSLNHSGAYVCWAAVEIPELEEAEGNITRLFVDPDDPTQNRNRIASFPGFLFVLLGVGSMGVAAIVWGAWFWGRRSCQQRDSGNSPGNAFYSNVLYRPRGAPKKSEDCSGEGKDQRGQSIYSTSFPQPAPRQPHLASRPCPSPRPCPSPRPGHPVSMVRVSPRPSPTQQPRPKGFPKVGEE 2 150 172 QNRNRIASFP GFLFVLLGVGSMGVAAIVWGAWF WGRRSCQQRD +G3X8R9 MEFLLLLSLALFSDAMVMDEKVKSGVELETASAVCVYDAYYKDHTKYWCRGYFRDSCNIIAFTPNSTNRVALKDTGNQLIITISCLVKEDTGWYWCGIQRDLARDDMDFTQLIVTDNREDRANGFSSDPSGNRTRSCRASKAVQKAEGSRMSILIICILITSLGIIFIISHLSRGRRSQRNREVTGKSISRNPQASQGPSMVSITLARI 2 151 173 KAVQKAEGSR MSILIICILITSLGIIFIISHLS RGRRSQRNRE +Q9DCF1 MELPLSQATLRHTLLLLPALLSSGQGELAPQIDGQTWAERALRENEHHAFTCRVAGGSATPRLAWYLDGQLQEATTSRLLSVGGDAFSGGTSTFTVTAQRSQHELNCSLQDPGSGRPANASVILNVQFKPEIAQVGAKYQEAQGPGLLVVLFALVRANPPANVTWIDQDGPVTVNASDFLVLDAQNYPWLTNHTVQLQLRSLAHNLSVVATNDVGVTSASLPAPGLLATRIEVPLLGIVVAGGLALGTLVGFSTLVACLVCRKEKKTKGPSRRPSLISSDSNNLKLNNVRLPRENMSLPSNLQLNDLTPDLRGKATERPMAQHSSRPELLEAEPGGLLTSRGFIRLPMLGYIYRVSSVSSDEIWL 2 238 260 ATRIEVPLLG IVVAGGLALGTLVGFSTLVACLV CRKEKKTKGP +Q9BXS4 MAAPKGSLWVRTQLGLPPLLLLTMALAGGSGTASAEAFDSVLGDTASCHRACQLTYPLHTYPKEEELYACQRGCRLFSICQFVDDGIDLNRTKLECESACTEAYSQSDEQYACHLGCQNQLPFAELRQEQLMSLMPKMHLLFPLTLVRSFWSDMMDSAQSFITSSWTFYLQADDGKIVIFQSKPEIQYAPHLEQEPTNLRESSLSKMSYLQMRNSQAHRNFLEDGESDGFLRCLSLNSGWILTTTLVLSVMVLLWICCATVATAVEQYVPSEKLSIYGDLEFMNEQKLNRYPASSLVVVRSKTEDHEEAGPLPTKVNLAHSEI 2 240 262 FLRCLSLNSG WILTTTLVLSVMVLLWICCATVA TAVEQYVPSE +Q9D5K1 MKTGAIVFILRSLLSITYLPLVLMTLDIPEELQKAVGRVIVNATGCSVTCGLGYKEETECEVGPDGVRRNCTFQRLECVTNWICGMLHFTIVHGKTFELNCLSSDILEKGQEAFRFTWRLARGIISTNDELFRPFRANSPFIGFKPAYEYNAGTYRCDVQLLKNLKFVKRLYFGLRVLPPKLVNLNFQQSLTEDQKLIDKGWEVNLDNGSKPHLPVWQRKVTSALGIGIVAGVVGGVLVSVAVFKALGGTDGSGGRTRL 2 221 243 KPHLPVWQRK VTSALGIGIVAGVVGGVLVSVAV FKALGGTDGS +A2RUT3 MLHVLASLPLLLLLVTSASTHAWSRPLWYQVGLDLQPWGCQPKSVEGCRGGLSCPGYWLGPGASRIYPVAAVMITTTMLMICRKILQGRRRSQATKGEHPQVTTEPCGPWKRRAPISDHTLLRGVLHMLDALLVHIEGHLRHLATQRQIQIKGTSTQSG 2 64 86 CPGYWLGPGA SRIYPVAAVMITTTMLMICRKIL QGRRRSQATK +Q6UXU6 MSQAWVPGLAPTLLFSLLAGPQKIAAKCGLILACPKGFKCCGDSCCQENELFPGPVRIFVIIFLVILSVFCICGLAKCFCRNCREPEPDSPVDCRGPLELPSIIPPERVRVSLSAPPPPYSEVILKPSLGPTPTEPPPPYSFRPEEYTGDQRGIDNPAF 2 54 76 SCCQENELFP GPVRIFVIIFLVILSVFCICGLA KCFCRNCREP +B7ZWI3 MLDTWVWGTLTLTFGLLSSLQGVSFNETANTCDILNCPKGFTCCVKECCPERKVWDPANDRFRFLVILACIIFPILFICALVSLFCPNCTELQHDVRRVDHQTPIEPPSIAPLESIWVTSLDPPPPYSQVVQMTPPTEPPPPYSLRPEGPAGQMRGRAYATL 2 64 86 VWDPANDRFR FLVILACIIFPILFICALVSLFC PNCTELQHDV +Q3KNT9 MWRLALGGVFLAAAQACVFCRLPAHDLSGRLARLCSQMEARQKECGASPDFSAFALDEVSMNKVTEKTHRVLRVMEIKEAVSSLPSYWSWLRKTKLPEYTREALCPPACRGSTTLYNCSTCKGTEVSCWPRKRCFPGSQDLWEAKILLLSIFGAFLLLGVLSLLVESHHLQAKSGL 2 146 165 PGSQDLWEAK ILLLSIFGAFLLLGVLSLLV ESHHLQAKSG +Q9H3N1 MAPSGSLAVPLAVLVLLLWGAPWTHGRRSNVRVITDENWRELLEGDWMIEFYAPWCPACQNLQPEWESFAEWGEDLEVNIAKVDVTEQPGLSGRFIITALPTIYHCKDGEFRRYQGPRTKKDFINFISDKEWKSIEPVSSWFGPGSVLMSSMSALFQLSMWIRTCHNYFIEDLGLPVWGSYTVFALATLFSGLLLGLCMIFVADCLCPSKRRRPQPYPYPSKKLLSESAQPLKKVEEEQEADEEDVSEEEAESKEGTNKDFPQNAIRQRSLGPSLATDKS 2 181 203 EDLGLPVWGS YTVFALATLFSGLLLGLCMIFVA DCLCPSKRRR +Q9Y320 MAVLAPLIALVYSVPRLSRWLAQPYYLLSALLSAAFLLVRKLPPLCHGLPTQREDGNPCDFDWREVEILMFLSAIVMMKNRRSITVEQHIGNIFMFSKVANTILFFRLDIRMGLLYITLCIVFLMTCKPPLYMGPEYIKYFNDKTIDEELERDKRVTWIVEFFANWSNDCQSFAPIYADLSLKYNCTGLNFGKVDVGRYTDVSTRYKVSTSPLTKQLPTLILFQGGKEAMRRPQIDKKGRAVSWTFSEENVIREFNLNELYQRAKKLSKAGDNIPEEQPVASTPTTVSDGENKKDK 2 103 125 IFMFSKVANT ILFFRLDIRMGLLYITLCIVFLM TCKPPLYMGP +O35305 MAPRARRRRQLPAPLLALCVLLVPLQVTLQVTPPCTQERHYEHLGRCCSRCEPGKYLSSKCTPTSDSVCLPCGPDEYLDTWNEEDKCLLHKVCDAGKALVAVDPGNHTAPRRCACTAGYHWNSDCECCRRNTECAPGFGAQHPLQLNKDTVCTPCLLGFFSDVFSSTDKCKPWTNCTLLGKLEAHQGTTESDVVCSSSMTLRRPPKEAQAYLPSLIVLLLFISVVVVAAIIFGVYYRKGGKALTANLWNWVNDACSSLSGNKESSGDRCAGSHSATSSQQEVCEGILLMTREEKMVPEDGAGVCGPVCAAGGPWAEVRDSRTFTLVSEVETQGDLSRKIPTEDEYTDRPSQPSTGSLLLIQQGSKSIPPFQEPLEVGENDSLSQCFTGTESTVDSEGCDFTEPPSRTDSMPVSPEKHLTKEIEGDSCLPWVVSSNSTDGYTGSGNTPGEDHEPFPGSLKCGPLPQCAYSMGFPSEAAASMAEAGVRPQDRADERGASGSGSSPSDQPPASGNVTGNSNSTFISSGQVMNFKGDIIVVYVSQTSQEGPGSAEPESEPVGRPVQEETLAHRDSFAGTAPRFPDVCATGAGLQEQGAPRQKDGTSRPVQEQGGAQTSLHTQGSGQCAE 2 212 234 RRPPKEAQAY LPSLIVLLLFISVVVVAAIIFGV YYRKGGKALT +Q9CR75 MASAWPRSLPQILVLGFGLVLMRAAAGEQAPGTSPCSSGSSWSADLDKCMDCASCPARPHSDFCLGCAAAPPAHFRLLWPILGGALSLVLVLALVSSFLVWRRCRRREKFTTPIEETGGEGCPGVALIQ 2 79 101 AAPPAHFRLL WPILGGALSLVLVLALVSSFLVW RRCRRREKFT +Q92956 MEPPGDWGPPPWRSTPKTDVLRLVLYLTFLGAPCYAPALPSCKEDEYPVGSECCPKCSPGYRVKEACGELTGTVCEPCPPGTYIAHLNGLSKCLQCQMCDPAMGLRASRNCSRTENAVCGCSPGHFCIVQDGDHCAACRAYATSSPGQRVQKGGTESQDTLCQNCPPGTFSPNGTLEECQHQTKCSWLVTKAGAGTSSSHWVWWFLSGSLVIVIVCSTVGLIICVKRRKPRGDVVKVIVSVQRKRQEAEGEATVIEALQAPPDVTTVAVEETIPSFTGRSPNH 2 201 223 KAGAGTSSSH WVWWFLSGSLVIVIVCSTVGLII CVKRRKPRGD +Q80WM9 MEPLPGWGSAPWSQAPTDNTFRLVPCVFLLNLLQRISAQPSCRQEEFLVGDECCPMCNPGYHVKQVCSEHTGTVCAPCPPQTYTAHANGLSKCLPCGVCDPDMGLLTWQECSSWKDTVCRCIPGYFCENQDGSHCSTCLQHTTCPPGQRVEKRGTHDQDTVCADCLTGTFSLGGTQEECLPWTNCSAFQQEVRRGTNSTDTTCSSQVVYYVVSILLPLVIVGAGIAGFLICTRRHLHTSSVAKELEPFQEQQENTIRFPVTEVGFAETEEETASN 2 208 230 STDTTCSSQV VYYVVSILLPLVIVGAGIAGFLI CTRRHLHTSS +Q9Y5U5 MAQHGAMGAFRALCGLALLCALSLGQRPTGGPGCGPGRLLLGTGTDARCCRVHTTRCCRDYPGEECCSEWDCMCVQPEFHCGDPCCTTCRHHPCPPGQGVQSQGKFSFGFQCIDCASGTFSGGHEGHCKPWTDCTQFGFLTVFPGNKTHNAVCVPGSPPAEPLGWLTVVLLAVAACVLLLTSAQLGLHIWQLRSQCMWPRETQLLLEVPPSTEDARSCQFPEEERGERSAEEKGRLGDLWV 2 165 187 PGSPPAEPLG WLTVVLLAVAACVLLLTSAQLGL HIWQLRSQCM +P20333 MAPVAVWAALAVGLELWAAAHALPAQVAFTPYAPEPGSTCRLREYYDQTAQMCCSKCSPGQHAKVFCTKTSDTVCDSCEDSTYTQLWNWVPECLSCGSRCSSDQVETQACTREQNRICTCRPGWYCALSKQEGCRLCAPLRKCRPGFGVARPGTETSDVVCKPCAPGTFSNTTSSTDICRPHQICNVVAIPGNASMDAVCTSTSPTRSMAPGAVHLPQPVSTRSQHTQPTPEPSTAPSTSFLLPMGPSPPAEGSTGDFALPVGLIVGVTALGLLIIGVVNCVIMTQVKKKPLCLQREAKVPHLPADKARGTQGPEQQHLLITAPSSSSSSLESSASALDRRAPTRNQPQAPGVEASGAGEARASTGSSDSSPGGHGTQVNVTCIVNVCSSSDHSSQCSSQASSTMGDTDSSPSESPKDEQVPFSKEECAFRSQLETPETLLGSTEEKPLPLGVPDAGMKPS 2 258 280 SPPAEGSTGD FALPVGLIVGVTALGLLIIGVVN CVIMTQVKKK +Q93038 MEQRPRGCAAVAAALLLVLLGARAQGGTRSPRCDCAGDFHKKIGLFCCRGCPAGHYLKAPCTEPCGNSTCLVCPQDTFLAWENHHNSECARCQACDEQASQVALENCSAVADTRCGCKPGWFVECQVSQCVSSSPFYCQPCLDCGALHRHTRLLCSRRDTDCGTCLPGFYEHGDGCVSCPTSTLGSCPERCAAVCGWRQMFWVQVLLAGLVVPLLLGATLTYTYRHCWPHKPLVTADEAGMEALTPPPATHLSPLDSAHTLLAPPDSSEKICTVQLVGNSWTPGYPETQEALCPQVTWSWDQLPSRALGPAAAPTLSPESPAGSPAMMLQPGPQLYDVMDAVPARRWKEFVRTLGLREAEIEAVEVEIGRFRDQQYEMLKRWRQQQPAGLGAVYAALERMGLDGCVEDLRSRLQRGP 2 200 222 RCAAVCGWRQ MFWVQVLLAGLVVPLLLGATLTY TYRHCWPHKP +P83626 MTRLRLLLLLGLLLRVAVCSVNTITLCKIGEFKHENLCCLQCSAGTYLRNPCQENHNKSECAPCDSEHFIDHKNRESECFPCSVCRDDQEEVAKCSRTADRVCQCKQGTYCDSENCLERCHTCSSCPDGRVVRKCNATMDTVCDKFDSEPGQSGSQCFCFSKPLGIVVIIAAFIIIIGAVIILILKIICYCKRGENIQLSSTML 2 163 185 SGSQCFCFSK PLGIVVIIAAFIIIIGAVIILIL KIICYCKRGE +P36941 MLLPWATSAPGLAWGPLVLGLFGLLAASQPQAVPPYASENQTCRDQEKEYYEPQHRICCSRCPPGTYVSAKCSRIRDTVCATCAENSYNEHWNYLTICQLCRPCDPVMGLEEIAPCTSKRKTQCRCQPGMFCAAWALECTHCELLSDCPPGTEAELKDEVGKGNNHCVPCKAGHFQNTSSPSARCQPHTRCENQGLVEAAPGTAQSDTTCKNPLEPLPPEMSGTMLMLAVLLPLAFFLLLATVFSCIWKSHPSLCRKLGSLLKRRPQGEGPNPVAGSWEPPKAHPYFPDLVQPLLPISGDVSPVSTGLPAAPVLEAGVPQQQSPLDLTREPQLEPGEQSQVAHGTNGIHVTGGSMTITGNIYIYNGPVLGGPPGPGDLPATPEPPYPIPEEGDPGPPGLSTPHQEDGKAWHLAETEHCGATPSNRGPRNQFITHD 2 226 248 PLPPEMSGTM LMLAVLLPLAFFLLLATVFSCIW KSHPSLCRKL +P25942 MVRLPLQCVLWGCLLTAVHPEPPTACREKQYLINSQCCSLCQPGQKLVSDCTEFTETECLPCGESEFLDTWNRETHCHQHKYCDPNLGLRVQQKGTSETDTICTCEEGWHCTSEACESCVLHRSCSPGFGVKQIATGVSDTICEPCPVGFFSNVSSAFEKCHPWTSCETKDLVVQQAGTNKTDVVCGPQDRLRALVVIPIIFGILFAILLVLVFIKKVAKKPTNKAPHPKQEPQEINFPDDLPGSNTAAPVQETLHGCQPVTQEDGKESRISVQERQ 2 193 215 DVVCGPQDRL RALVVIPIIFGILFAILLVLVFI KKVAKKPTNK +P25446 MLWIWAVLPLVLAGSQLRVHTQGTNSISESLKLRRRVRETDKNCSEGLYQGGPFCCQPCQPGKKKVEDCKMNGGTPTCAPCTEGKEYMDKNHYADKCRRCTLCDEEHGLEVETNCTLTQNTKCKCKPDFYCDSPGCEHCVRCASCEHGTLEPCTATSNTNCRKQSPRNRLWLLTILVLLIPLVFIYRKYRKRKCWKRRQDDPESRTSSRETIPMNASNLSLSKYIPRIAEDMTIQEAKKFARENNIKEGKIDEIMHDSIQDTAEQKVQLLLCWYQSHGKSDAYQDLIKGLKKAECRRTLDKFQDMVQKDLGKSTPDTGNENEGQCLE 2 170 187 NCRKQSPRNR LWLLTILVLLIPLVFIYR KYRKRKCWKR +P28908 MRVLLAALGLLFLGALRAFPQDRPFEDTCHGNPSHYYDKAVRRCCYRCPMGLFPTQQCPQRPTDCRKQCEPDYYLDEADRCTACVTCSRDDLVEKTPCAWNSSRVCECRPGMFCSTSAVNSCARCFFHSVCPAGMIVKFPGTAQKNTVCEPASPGVSPACASPENCKEPSSGTIPQAKPTPVSPATSSASTMPVRGGTRLAQEAASKLTRAPDSPSSVGRPSSDPGLSPTQPCPEGSGDCRKQCEPDYYLDEAGRCTACVSCSRDDLVEKTPCAWNSSRTCECRPGMICATSATNSCARCVPYPICAAETVTKPQDMAEKDTTFEAPPLGTQPDCNPTPENGEAPASTSPTQSLLVDSQASKTLPIPTSAPVALSSTGKPVLDAGPVLFWVILVLVVVVGSSAFLLCHRRACRKRIRQKLHLCYPVQTSQPKLELVDSRPRRSSTQLRSGASVTEPVAEERGLMSQPLMETCHSVGAAYLESLPLQDASPAGGPSSPRDLPEPRVSTEHTNNKIEKIYIMKADTVIVGTVKAELPEGRGLAGPAEPELEEELEADHTPHYPEQETEPPLGSCSDVMLSVEEEGKEDPLPTAASGK 2 386 408 STGKPVLDAG PVLFWVILVLVVVVGSSAFLLCH RRACRKRIRQ +Q07011 MGNSCYNIVATLLLVLNFERTRSLQDPCSNCPAGTFCDNNRNQICSPCPPNSFSSAGGQRTCDICRQCKGVFRTRKECSSTSNAECDCTPGFHCLGAGCSMCEQDCKQGQELTKKGCKDCCFGTFNDQKRGICRPWTNCSLDGKSVLVNGTKERDVVCGPSPADLSPGASSVTPPAPAREPGHSPQIISFFLALTSTALLFLLFFLTLRFSVVKRGRKKLLYIFKQPFMRPVQTTQEEDGCSCRFPEEEEGGCEL 2 191 213 PGHSPQIISF FLALTSTALLFLLFFLTLRFSVV KRGRKKLLYI +Q13641 MPGGCSRGPAAGDGRLRLARLALVLLGWVSSSSPTSSASSFSSSAPFLASAVSAQPPLPDQCPALCECSEAARTVKCVNRNLTEVPTDLPAYVRNLFLTGNQLAVLPAGAFARRPPLAELAALNLSGSRLDEVRAGAFEHLPSLRQLDLSHNPLADLSPFAFSGSNASVSAPSPLVELILNHIVPPEDERQNRSFEGMVVAALLAGRALQGLRRLELASNHFLYLPRDVLAQLPSLRHLDLSNNSLVSLTYVSFRNLTHLESLHLEDNALKVLHNGTLAELQGLPHIRVFLDNNPWVCDCHMADMVTWLKETEVVQGKDRLTCAYPEKMRNRVLLELNSADLDCDPILPPSLQTSYVFLGIVLALIGAIFLLVLYLNRKGIKKWMHNIRDACRDHMEGYHYRYEINADPRLTNLSSNSDV 2 354 376 CDPILPPSLQ TSYVFLGIVLALIGAIFLLVLYL NRKGIKKWMH +P40238 MPSWALFMVTSCLLLAPQNLAQVSSQDVSLLASDSEPLKCFSRTFEDLTCFWDEEEAAPSGTYQLLYAYPREKPRACPLSSQSMPHFGTRYVCQFPDQEEVRLFFPLHLWVKNVFLNQTRTQRVLFVDSVGLPAPPSIIKAMGGSQPGELQISWEEPAPEISDFLRYELRYGPRDPKNSTGPTVIQLIATETCCPALQRPHSASALDQSPCAQPTMPWQDGPKQTSPSREASALTAEGGSCLISGLQPGNSYWLQLRSEPDGISLGGSWGSWSLPVTVDLPGDAVALGLQCFTLDLKNVTCQWQQQDHASSQGFFYHSRARCCPRDRYPIWENCEEEEKTNPGLQTPQFSRCHFKSRNDSIIHILVEVTTAPGTVHSYLGSPFWIHQAVRLPTPNLHWREISSGHLELEWQHPSSWAAQETCYQLRYTGEGHQDWKVLEPPLGARGGTLELRPRSRYRLQLRARLNGPTYQGPWSSWSDPTRVETATETAWISLVTALHLVLGLSAVLGLLLLRWQFPAHYRRLRHALWPSLPDLHRVLGQYLRDTAALSPPKATVSDTCEEVEPSLLEILPKSSERTPLPLCSSQAQMDYRRLQPSCLGTMPLSVCPPMAESGSCCTTHIANHSYLPLSYWQQP 2 491 513 TRVETATETA WISLVTALHLVLGLSAVLGLLLL RWQFPAHYRR +Q9BX59 MGTQEGWCLLLCLALSGAAETKPHPAEGQWRAVDVVLDCFLAKDGAHRGALASSEDRARASLVLKQVPVLDDGSLEDFTDFQGGTLAQDDPPIIFEASVDLVQIPQAEALLHADCSGKEVTCEISRYFLQMTETTVKTAAWFMANMQVSGGGPSISLVMKTPRVTKNEALWHPTLNLPLSPQGTVRTAVEFQVMTQTQSLSFLLGSSASLDCGFSMAPGLDLISVEWRLQHKGRGQLVYSWTAGQGQAVRKGATLEPAQLGMARDASLTLPGLTIQDEGTYICQITTSLYRAQQIIQLNIQASPKVRLSLANEALLPTLICDIAGYYPLDVVVTWTREELGGSPAQVSGASFSSLRQSVAGTYSISSSLTAEPGSAGATYTCQVTHISLEEPLGASTQVVPPERRTALGVIFASSLFLLALMFLGLQRRQAPTGLGLLQAERWETTSCADTQSSHLHEDRTARVSQPS 2 407 426 TQVVPPERRT ALGVIFASSLFLLALMFLGL QRRQAPTGLG +O15533 MKSLSLLLAVALGLATAVSAGPAVIECWFVEDASGKGLAKRPGALLLRQGPGEPPPRPDLDPELYLSVHDPAGALQAAFRRYPRGAPAPHCEMSRFVPLPASAKWASGLTPAQNCPRALDGAWLMVSISSPVLSLSSLLRPQPEPQQEPVLITMATVVLTVLTHTPAPRVRLGQDALLDLSFAYMPPTSEAASSLAPGPPPFGLEWRRQHLGKGHLLLAATPGLNGQMPAAQEGAVAFAAWDDDEPWGPWTGNGTFWLPRVQPFQEGTYLATIHLPYLQGQVTLELAVYKPPKVSLMPATLARAAPGEAPPELLCLVSHFYPSGGLEVEWELRGGPGGRSQKAEGQRWLSALRHHSDGSVSLSGHLQPPPVTTEQHGARYACRIHHPSLPASGRSAEVTLEVAGLSGPSLEDSVGLFLSAFLLLGLFKALGWAAVYLSTCKDSKKKAE 2 414 436 GLSGPSLEDS VGLFLSAFLLLGLFKALGWAAVY LSTCKDSKKK +O00220 MAPPPARVHLGAFLAVTPNPGSAASGTEAAAATPSKVWGSSAGRIEPRGGGRGALPTSMGQHGPSARARAGRAPGPRPAREASPRLRVHKTFKFVVVGVLLQVVPSSAATIKLHDQSIGTQQWEHSPLGELCPPGSHRSEHPGACNRCTEGVGYTNASNNLFACLPCTACKSDEEERSPCTTTRNTACQCKPGTFRNDNSAEMCRKCSRGCPRGMVKVKDCTPWSDIECVHKESGNGHNIWVILVVTLVVPLLLVAVLIVCCCIGSGCGGDPKCMDRVCFWRLGLLRGPGAEDNAHNEILSNADSLSTFVSEQQMESQEPADLTGVTVQSPGEAQCLLGPAEAEGSQRRRLLVPANGADPTETLMLFFDKFANIVPFDSWDQLMRQLDLTKNEIDVVRAGTAGPGDALYAMLMKWVNKTGRNASIHTLLDALERMEERHAREKIQDLLVDSGKFIYLEDGTGSAVSLE 2 240 262 VHKESGNGHN IWVILVVTLVVPLLLVAVLIVCC CIGSGCGGDP +O14763 MEQRGQNAPAASGARKRHGPGPREARGARPGPRVPKTLVLVVAAVLLLVSAESALITQQDLAPQQRAAPQQKRSSPSEGLCPPGHHISEDGRDCISCKYGQDYSTHWNDLLFCLRCTRCDSGEVELSPCTTTRNTVCQCEEGTFREEDSPEMCRKCRTGCPRGMVKVGDCTPWSDIECVHKESGTKHSGEVPAVEETVTSSPGTPASPCSLSGIIIGVTVAAVVLIVAVFVCKSLLWKKVLPYLKGICSGGGGDPERVDRSSQRPGAEDNVLNEIVSILQPTQVPEQEMEVQEPAEPTGVNMLSPGESEHLLEPAEAERSQRRRLLVPANEGDPTETLRQCFDDFADLVPFDSWEPLMRKLGLMDNEIKVAKAEAAGHRDTLYTMLIKWVNKTGRDASVHTLLDALETLGERLAKQKIEDHLLSSGKFMYLEGNADSAMS 2 209 231 TSSPGTPASP CSLSGIIIGVTVAAVVLIVAVFV CKSLLWKKVL +Q9UBN6 MGLWGQSVPTASSARAGRYPGARTASGTRPWLLDPKILKFVVFIVAVLLPVRVDSATIPRQDEVPQQTVAPQQQRRSLKEEECPAGSHRSEYTGACNPCTEGVDYTIASNNLPSCLLCTVCKSGQTNKSSCTTTRDTVCQCEKGSFQDKNSPEMCRTCRTGCPRGMVKVSNCTPRSDIKCKNESAASSTGKTPAAEETVTTILGMLASPYHYLIIIVVLVIILAVVVVGFSCRKKFISYLKGICSGGGGGPERVHRVLFRRRSCPSRVPGAEDNARNETLSNRYLQPTQVSEQEIQGQELAELTGVTVESPEEPQRLLEQAEAEGCQRRRLLVPVNDADSADISTLLDASATLEEGHAKETIQDQLVGSEKLFYEEDEAGSATSCL 2 210 232 TTILGMLASP YHYLIIIVVLVIILAVVVVGFSC RKKFISYLKG +Q9JKE1 MSPLLLWLGLMLCVSGLQAGDEEEHKCFLEGENLTLTCPYNIMLYSLSLKAWQRVRSHGSPETLVLTNTRKADFNVARAGKYLLEDYPTESVVKVTVTGLQRQDVGLYQCVVYLSPDNVIILRQRIRLAWCQGKPVMVIVLTCGFILNKGLVFSVLFVFLCKAGPKVLQPSKTSKVQGVSEKQ 2 138 160 LAWCQGKPVM VIVLTCGFILNKGLVFSVLFVFL CKAGPKVLQP +Q8K558 MDCYLLLLLLLLGLAGQGSADSHPEVLQAPVGSSILVQCHYRLQDVRALKVWCQFLQEGCHPLVTSAVDRRAPGNGRIFLTDLGGGLLQVEMVTLQEEDTGEYGCVVEGAAGPQTLHRVSLLVLPPVPGPREGEEAEDEKETYRIGTGSLLEDPSLDPSASAGPHEFRRRENSIPLIWGAVLLLALVVVAVVIFAVMARKKGNRLVVCGPSQSTGVPGMDPPSAAHRSSDSGLPSDIPHVRLDSPPSFDSIYTGSSLDPPSSEPPAPPSQPPLPPKVLMSSKSVTYATVVFPGGDKGKIASCEPVQDPPNSQTPPSK 2 174 196 PHEFRRRENS IPLIWGAVLLLALVVVAVVIFAV MARKKGNRLV +Q5T2D2 MAPAFLLLLLLWPQGCVSGPSADSVYTKVRLLEGETLSVQCSYKGYKNRVEGKVWCKIRKKKCEPGFARVWVKGPRYLLQDDAQAKVVNITMVALKLQDSGRYWCMRNTSGILYPLMGFQLDVSPAPQTERNIPFTHLDNILKSGTVTTGQAPTSGPDAPFTTGVMVFTPGLITLPRLLASTRPASKTGYSFTATSTTSQGPRRTMGSQTVTASPSNARDSSAGPESISTKSGDLSTRSPTTGLCLTSRSLLNRLPSMPSIRHQDVYSTVLGVVLTLLVLMLIMVYGFWKKRHMASYSMCSDPSTRDPPGRPEPYVEVYLI 2 267 289 SMPSIRHQDV YSTVLGVVLTLLVLMLIMVYGFW KKRHMASYSM +Q3LRV9 MAWRYSQLLLVPVQLVFLASVCCPGVWGSTVSEELHRMVGQSLSVQCQYKPKEESYVLKTWCRQTAPSKCTRVVTTSEPRKAARELQHTIWDDPEAGFFNITMTQLTEDDSAFYWCGPYYPSLREVTVLRNISLVVSPAPSTLPSQTIAPLPESTATIFMPFPVLTTSPEETTDSSINGTGHRNQSSSSPGWTSPGLLVSVQYGLLLLKALMLSVFCVLLCWRSGQGREYMAETMELSKLPHISKSLDTVSHISGYEKKANWY 2 200 222 PGWTSPGLLV SVQYGLLLLKALMLSVFCVLLCW RSGQGREYMA +Q5BVD1 MDLAQPSQPVDELELSVLERQPEENTPLNGADKVFPSLDEEVPPAEANKESPWSSCNKNVVGRCKLWMIITSIFLGVITVIIIGLCLAAVTYVDEDENEILELSSNKTFFIMLKIPEECVAEEELPHLLTERLTDVYSTSPSLGRYFTSVEIVDFSGENATVTYDLQFGVPSDDENFMKYMMSEELVLGILLQDFRDQNIPGCESLGLDPTSLLLYE 2 66 88 CNKNVVGRCK LWMIITSIFLGVITVIIIGLCLA AVTYVDEDEN +Q9P2J2 MVWCLGLAVLSLVISQGADGRGKPEVVSVVGRAGESVVLGCDLLPPAGRPPLHVIEWLRFGFLLPIFIQFGLYSPRIDPDYVGRVRLQKGASLQIEGLRVEDQGWYECRVFFLDQHIPEDDFANGSWVHLTVNSPPQFQETPPAVLEVQELEPVTLRCVARGSPLPHVTWKLRGKDLGQGQGQVQVQNGTLRIRRVERGSSGVYTCQASSTEGSATHATQLLVLGPPVIVVPPKNSTVNASQDVSLACHAEAYPANLTYSWFQDNINVFHISRLQPRVRILVDGSLRLLATQPDDAGCYTCVPSNGLLHPPSASAYLTVLYPAQVTAMPPETPLPIGMPGVIRCPVRANPPLLFVSWTKDGKALQLDKFPGWSQGTEGSLIIALGNEDALGEYSCTPYNSLGTAGPSPVTRVLLKAPPAFIERPKEEYFQEVGRELLIPCSAQGDPPPVVSWTKVGRGLQGQAQVDSNSSLILRPLTKEAHGHWECSASNAVARVATSTNVYVLGTSPHVVTNVSVVALPKGANVSWEPGFDGGYLQRFSVWYTPLAKRPDRMHHDWVSLAVPVGAAHLLVPGLQPHTQYQFSVLAQNKLGSGPFSEIVLSAPEGLPTTPAAPGLPPTEIPPPLSPPRGLVAVRTPRGVLLHWDPPELVPKRLDGYVLEGRQGSQGWEVLDPAVAGTETELLVPGLIKDVLYEFRLVAFAGSFVSDPSNTANVSTSGLEVYPSRTQLPGLLPQPVLAGVVGGVCFLGVAVLVSILAGCLLNRRRAARRRRKRLRQDPPLIFSPTGKSAAPSALGSGSPDSVAKLKLQGSPVPSLRQSLLWGDPAGTPSPHPDPPSSRGPLPLEPICRGPDGRFVMGPTVAAPQERSGREQAEPRTPAQRLARSFDCSSSSPSGAPQPLCIEDISPVAPPPAAPPSPLPGPGPLLQYLSLPFFREMNVDGDWPPLEEPSPAAPPDYMDTRRCPTSSFLRSPETPPVSPRESLPGAVVGAGATAEPPYTALADWTLRERLLPGLLPAAPRGSLTSQSSGRGSASFLRPPSTAPSAGGSYLSPAPGDTSSWASGPERWPRREHVVTVSKRRNTSVDENYEWDSEFPGDMELLETLHLGLASSRLRPEAEPELGVKTPEEGCLLNTAHVTGPEARCAALREEFLAFRRRRDATRARLPAYRQPVPHPEQATLL 2 738 760 PGLLPQPVLA GVVGGVCFLGVAVLVSILAGCLL NRRRAARRRR +Q9UPX0 MIWYVATFIASVIGTRGLAAEGAHGLREEPEFVTARAGESVVLRCDVIHPVTGQPPPYVVEWFKFGVPIPIFIKFGYYPPHVDPEYAGRASLHDKASLRLEQVRSEDQGWYECKVLMLDQQYDTFHNGSWVHLTINAPPTFTETPPQYIEAKEGGSITMTCTAFGNPKPIVTWLKEGTLLGASGKYQVSDGSLTVTSVSREDRGAYTCRAYSIQGEAVHTTHLLVQGPPFIVSPPENITVNISQDALLTCRAEAYPGNLTYTWYWQDENVYFQNDLKLRVRILIDGTLIIFRVKPEDSGKYTCVPSNSLGRSPSASAYLTVQYPARVLNMPPVIYVPVGIHGYIRCPVDAEPPATVVKWNKDGRPLQVEKNLGWTLMEDGSIRIEEATEEALGTYTCVPYNTLGTMGQSAPARLVLKDPPYFTVLPGWEYRQEAGRELLIPCAAAGDPFPVITWRKVGKPSRSKHSALPSGSLQFRALSKEDHGEWECVATNVVTSITASTHLTVIGTSPHAPGSVRVQVSMTTANVSWEPGYDGGYEQTFSVWMKRAQFGPHDWLSLPVPPGPSWLLVDTLEPETAYQFSVLAQNKLGTSAFSEVVTVNTLAFPITTPEPLVLVTPPRCLIANRTQQGVLLSWLPPANHSFPIDRYIMEFRVAERWELLDDGIPGTEGEFFAKDLSQDTWYEFRVLAVMQDLISEPSNIAGVSSTDIFPQPDLTEDGLARPVLAGIVATICFLAAAILFSTLAACFVNKQRKRKLKRKKDPPLSITHCRKSLESPLSSGKVSPESIRTLRAPSESSDDQGQPAAKRMLSPTREKELSLYKKTKRAISSKKYSVAKAEAEAEATTPIELISRGPDGRFVMDPAEMEPSLKSRRIEGFPFAEETDMYPEFRQSDEENEDPLVPTSVAALKSQLTPLSSSQESYLPPPAYSPRFQPRGLEGPGGLEGRLQATGQARPPAPRPFHHGQYYGYLSSSSPGEVEPPPFYVPEVGSPLSSVMSSPPLPTEGPFGHPTIPEENGENASNSTLPLTQTPTGGRSPEPWGRPEFPFGGLETPAMMFPHQLPPCDVPESLQPKAGLPRGLPPTSLQVPAAYPGILSLEAPKGWAGKSPGRGPVPAPPAAKWQDRPMQPLVSQGQLRHTSQGMGIPVLPYPEPAEPGAHGGPSTFGLDTRWYEPQPRPRPSPRQARRAEPSLHQVVLQPSRLSPLTQSPLSSRTGSPELAARARPRPGLLQQAEMSEITLQPPAAVSFSRKSTPSTGSPSQSSRSGSPSYRPAMGFTTLATGYPSPPPGPAPAGPGDSLDVFGQTPSPRRTGEELLRPETPPPTLPTSGKLQRDRPAPATSPPERALSKL 2 727 749 DGLARPVLAG IVATICFLAAAILFSTLAACFVN KQRKRKLKRK +Q96J42 MVPAAGRRPPRVMRLLGWWQVLLWVLGLPVRGVEVAEESGRLWSEEQPAHPLQVGAVYLGEEELLHDPMGQDRAAEEANAVLGLDTQGDHMVMLSVIPGEAEDKVSSEPSGVTCGAGGAEDSRCNVRESLFSLDGAGAHFPDREEEYYTEPEVAESDAAPTEDSNNTESLKSPKVNCEERNITGLENFTLKILNMSQDLMDFLNPNGSDCTLVLFYTPWCRFSASLAPHFNSLPRAFPALHFLALDASQHSSLSTRFGTVAVPNILLFQGAKPMARFNHTDRTLETLKIFIFNQTGIEAKKNVVVTQADQIGPLPSTLIKSVDWLLVFSLFFLISFIMYATIRTESIRWLIPGQEQEHVE 2 324 342 LPSTLIKSVD WLLVFSLFFLISFIMYATI RTESIRWLIP +P0DTE4 MLNNLLLFSLQISLIGTTLGGNVLIWPMEGSHWLNVKIIIDELIKKEHNVTVLVASGALFITPTSNPSLTFEIYRVPFGKERIEGVIKDFVLTWLENRPSPSTIWRFYQEMAKVIKDFHMVSQEICDGVLKNQQLMAKLKKSKFEVLVSDPVFPCGDIVALKLGIPFMYSLRFSPASTVEKHCGKVPYPPSYVPAVLSELTDQMSFTDRIRNFISYHLQDYMFETLWKSWDSYYSKALGRPTTLCETMGKAEIWLIRTYWDFEFPRPYLPNFEFVGGLHCKPAKPLPKEMEEFIQSSGKNGVVVFSLGSMVKNLTEEKANLIASALAQIPQKVLWRYKGKKPATLGNNTQLFDWIPQNDLLGHPKTKAFITHGGTNGIYEAIYHGVPMVGVPMFADQPDNIAHMKAKGAAVEVNLNTMTSVDLLSALRTVINEPSYKENAMRLSRIHHDQPVKPLDRAVFWIEFVMRHKGAKHLRVAAHDLTWFQYHSLDVIGFLLVCVTTAIFLVIQCCLFSCQKFGKIGKKKKRE 2 491 513 LTWFQYHSLD VIGFLLVCVTTAIFLVIQCCLFS CQKFGKIGKK +Q8IZJ1 MGARSGARGALLLALLLCWDPRLSQAGTDSGSEVLPDSFPSAPAEPLPYFLQEPQDAYIVKNKPVELRCRAFPATQIYFKCNGEWVSQNDHVTQEGLDEATGLRVREVQIEVSRQQVEELFGLEDYWCQCVAWSSAGTTKSRRAYVRIAYLRKNFDQEPLGKEVPLDHEVLLQCRPPEGVPVAEVEWLKNEDVIDPTQDTNFLLTIDHNLIIRQARLSDTANYTCVAKNIVAKRRSTTATVIVYVNGGWSSWAEWSPCSNRCGRGWQKRTRTCTNPAPLNGGAFCEGQAFQKTACTTICPVDGAWTEWSKWSACSTECAHWRSRECMAPPPQNGGRDCSGTLLDSKNCTDGLCMQNKKTLSDPNSHLLEASGDAALYAGLVVAIFVVVAILMAVGVVVYRRNCRDFDTDITDSSAALTGGFHPVNFKTARPSNPQLLHPSVPPDLTASAGIYRGPVYALQDSTDKIPMTNSPLLDPLPSLKVKVYSSSTTGSGPGLADGADLLGVLPPGTYPSDFARDTHFLHLRSASLGSQQLLGLPRDPGSSVSGTFGCLGGRLSIPGTGVSLLVPNGAIPQGKFYEMYLLINKAESTLPLSEGTQTVLSPSVTCGPTGLLLCRPVILTMPHCAEVSARDWIFQLKTQAHQGHWEEVVTLDEETLNTPCYCQLEPRACHILLDQLGTYVFTGESYSRSAVKRLQLAVFAPALCTSLEYSLRVYCLEDTPVALKEVLELERTLGGYLVEEPKPLMFKDSYHNLRLSLHDLPHAHWRSKLLAKYQEIPFYHIWSGSQKALHCTFTLERHSLASTELTCKICVRQVEGEGQIFQLHTTLAETPAGSLDTLCSAPGSTVTTQLGPYAFKIPLSIRQKICNSLDAPNSRGNDWRMLAQKLSMDRYLNYFATKASPTGVILDLWEALQQDDGDLNSLASALEEMGKSEMLVAVATDGDC 2 376 398 HLLEASGDAA LYAGLVVAIFVVVAILMAVGVVV YRRNCRDFDT +Q80YF6 MVRTRWQPPLRALLLLVLVWLPQSLSLDLIAYVPQITAWDLEGKITATTFSLEQPRCVFDEHVSTKDTIWLVVAFSNASRDFQNPQTAAKIPTFPQLLTDGHYMTLPLSLDQLPCEDLTGGSGGAPVLRVGNDFGCYQRPYCNAPLPSQGPYSVKFLVMDAAGPPKAETKWSNPIYLHQGKNPNSIDTWPGRRSGCMIVITSILSALAGLLLLAFLAASTTRFSSLWWPEEAPEQLRIGSFMGKRYMTHHIPPSEAATLPVGCEPGLDPLPSLSP 2 196 218 IDTWPGRRSG CMIVITSILSALAGLLLLAFLAA STTRFSSLWW +B0FP48 MDNSWRLGPAIGLSAGQSQLLVSLLLLLTRVQPGTDVAAPEHISYVPQLSNDTLAGRLTLSTFTLEQPLGQFSSHNISDLDTIWLVVALSNATQSFTAPRTNQDIPAPANFSQRGYYLTLRANRVLYQTRGQLHVLRVGNDTHCQPTKIGCNHPLPGPGPYRVKFLVMNDEGPVAETKWSSDTRLQQAQALRAVPGPQSPGTVVIIAILSILLAVLLTVLLAVLIYTCFNSCRSTSLSGPEEAGSVRRYTTHLAFSTPAEGAS 2 203 225 AVPGPQSPGT VVIIAILSILLAVLLTVLLAVLI YTCFNSCRST +Q5DID0 MLRTSGLALLALVSAVGPSQASGFTEKGLSLLGYQLCSHRVTHTVQKVEAVQTSYTSYVSCGGWIPWRRCPKMVYRTQYLVVEVPESRNVTDCCEGYEQLGLYCVLPLNQSGQFTSRPGACPAEGPEPSTSPCSLDIDCPGLEKCCPWSGGRYCMAPAPQAPERDPVGSWYNVTILVKMDFKELQQVDPRLLNHMRLLHSLVTSALQPMASTVHHLHSAPGNASTTVSRLLLGLPRPLPVADVSTLLGDIAKRVYEVISVQVQDVNECFYEELNACSGRELCANLEGSYWCVCHQEAPATSPRKLNLEWEDCPPVSDYVVLNVTSDSFQVSWRLNSTQNHTFHVRVYRGMELLRSARTQSQALAVAGLEAGVLYRVKTSYQGCGADVSTTLTIKTNAQVFEVTIKIVNHNLTEKLLNRSSVEYQDFSRQLLHEVESSFPPVVSDLYRSGKLRMQIVSLQAGSVVVRLKLTVQDPGFPMGISTLAPILQPLLASTVFQIDRQGTRVQDWDECVDSAEHDCSPAAWCINLEGSYTCQCRTTRDATPSRAGRACEGDLVSPMGGGLSAATGVTVPGLGTGTAALGLENFTLSPSPGYPQGTPAAGQAWTPEPSPRRGGSNVVGYDRNNTGKGVEQELQGNSIMEPPSWPSPTEDPTGHFLWHATRSTRETLLNPTWLRNEDSGPSGSVDLPLTSTLTALKTPACVPVSIGRIMVSNVTSTGFHLAWEADLAMDSTFQLTLTSMWSPAVVLETWNTSVTLSGLEPGVLHLVEIMAKACGKEGARAHLKVRTAARKLIGKVRIKNVRYSESFRNASSQEYRDFLELFFRMVRGSLPATMCQHMDAGGVRMEVVSVTNGSIVVEFHLLIIADVDVQEVSAAFLTAFQTVPLLEVIRGDTFIQDYDECERKEDDCVPGTSCRNTLGSFTCSCEGGAPDFPVEYSERPCEGDSPGNETWATSPERPLTTAGTKAAFVQGTSPTPQGLPQRLNLTGAVRVLCEIEKVVVAIQKRFLQQESIPESSLYLSHPSCNVSHSNGTHVLLEAGWSECGTLMQSNMTNTVVRTTLRNDLSQEGIIHHLKILSPIYCAFQNDLLTSSGFTLEWGVYTIIEDLHGAGNFVTEMQLFIGDSPIPQNYSVSASDDVRIEVGLYRQKSNLKVVLTECWATPSSNARDPITFSFINNSCPVPNTYTNVIENGNSNKAQFKLRIFSFINDSIVYLHCKLRVCMESPGATCKINCNNFRLLQNSETSATHQMSWGPLIRSEGEPPHAEAGLGAGYVVLIVVAIFVLVAGTATLLIVRYQRMNGRYNFKIQSNNFSYQVFYE 2 1271 1293 PPHAEAGLGA GYVVLIVVAIFVLVAGTATLLIV RYQRMNGRYN +O75445 MNCPVLSLGSGFLFQVIEMLIFAYFASISLTESRGLFPRLENVGAFKKVSIVPTQAVCGLPDRSTFCHSSAAAESIQFCTQRFCIQDCPYRSSHPTYTALFSAGLSSCITPDKNDLHPNAHSNSASFIFGNHKSCFSSPPSPKLMASFTLAVWLKPEQQGVMCVIEKTVDGQIVFKLTISEKETMFYYRTVNGLQPPIKVMTLGRILVKKWIHLSVQVHQTKISFFINGVEKDHTPFNARTLSGSITDFASGTVQIGQSLNGLEQFVGRMQDFRLYQVALTNREILEVFSGDLLRLHAQSHCRCPGSHPRVHPLAQRYCIPNDAGDTADNRVSRLNPEAHPLSFVNDNDVGTSWVSNVFTNITQLNQGVTISVDLENGQYQVFYIIIQFFSPQPTEIRIQRKKENSLDWEDWQYFARNCGAFGMKNNGDLEKPDSVNCLQLSNFTPYSRGNVTFSILTPGPNYRPGYNNFYNTPSLQEFVKATQIRFHFHGQYYTTETAVNLRHRYYAVDEITISGRCQCHGHADNCDTTSQPYRCLCSQESFTEGLHCDRCLPLYNDKPFRQGDQVYAFNCKPCQCNSHSKSCHYNISVDPFPFEHFRGGGGVCDDCEHNTTGRNCELCKDYFFRQVGADPSAIDVCKPCDCDTVGTRNGSILCDQIGGQCNCKRHVSGRQCNQCQNGFYNLQELDPDGCSPCNCNTSGTVDGDITCHQNSGQCKCKANVIGLRCDHCNFGFKFLRSFNDVGCEPCQCNLHGSVNKFCNPHSGQCECKKEAKGLQCDTCRENFYGLDVTNCKACDCDTAGSLPGTVCNAKTGQCICKPNVEGRQCNKCLEGNFYLRQNNSFLCLPCNCDKTGTINGSLLCNKSTGQCPCKLGVTGLRCNQCEPHRYNLTIDNFQHCQMCECDSLGTLPGTICDPISGQCLCVPNRQGRRCNQCQPGFYISPGNATGCLPCSCHTTGAVNHICNSLTGQCVCQDASIAGQRCDQCKDHYFGFDPQTGRCQPCNCHLSGALNETCHLVTGQCFCKQFVTGSKCDACVPSASHLDVNNLLGCSKTPFQQPPPRGQVQSSSAINLSWSPPDSPNAHWLTYSLLRDGFEIYTTEDQYPYSIQYFLDTDLLPYTKYSYYIETTNVHGSTRSVAVTYKTKPGVPEGNLTLSYIIPIGSDSVTLTWTTLSNQSGPIEKYILSCAPLAGGQPCVSYEGHETSATIWNLVPFAKYDFSVQACTSGGCLHSLPITVTTAQAPPQRLSPPKMQKISSTELHVEWSPPAELNGIIIRYELYMRRLRSTKETTSEESRVFQSSGWLSPHSFVESANENALKPPQTMTTITGLEPYTKYEFRVLAVNMAGSVSSAWVSERTGESAPVFMIPPSVFPLSSYSLNISWEKPADNVTRGKVVGYDINMLSEQSPQQSIPMAFSQLLHTAKSQELSYTVEGLKPYRIYEFTITLCNSVGCVTSASGAGQTLAAAPAQLRPPLVKGINSTTIHLRWFPPEELNGPSPIYQLERRESSLPALMTTMMKGIRFIGNGYCKFPSSTHPVNTDFTGIKASFRTKVPEGLIVFAASPGNQEEYFALQLKKGRLYFLFDPQGSPVEVTTTNDHGKQYSDGKWHEIIAIRHQAFGQITLDGIYTGSSAILNGSTVIGDNTGVFLGGLPRSYTILRKDPEIIQKGFVGCLKDVHFMKNYNPSAIWEPLDWQSSEEQINVYNSWEGCPASLNEGAQFLGAGFLELHPYMFHGGMNFEISFKFRTDQLNGLLLFVYNKDGPDFLAMELKSGILTFRLNTSLAFTQVDLLLGLSYCNGKWNKVIIKKEGSFISASVNGLMKHASESGDQPLVVNSPVYVGGIPQELLNSYQHLCLEQGFGGCMKDVKFTRGAVVNLASVSSGAVRVNLDGCLSTDSAVNCRGNDSILVYQGKEQSVYEGGLQPFTEYLYRVIASHEGGSVYSDWSRGRTTGAAPQSVPTPSRVRSLNGYSIEVTWDEPVVRGVIEKYILKAYSEDSTRPPRMPSASAEFVNTSNLTGILTGLLPFKNYAVTLTACTLAGCTESSHALNISTPQEAPQEVQPPVAKSLPSSLLLSWNPPKKANGIITQYCLYMDGRLIYSGSEENYIVTDLAVFTPHQFLLSACTHVGCTNSSWVLLYTAQLPPEHVDSPVLTVLDSRTIHIQWKQPRKISGILERYVLYMSNHTHDFTIWSVIYNSTELFQDHMLQYVLPGNKYLIKLGACTGGGCTVSEASEALTDEDIPEGVPAPKAHSYSPDSFNVSWTEPEYPNGVITSYGLYLDGILIHNSSELSYRAYGFAPWSLHSFRVQACTAKGCALGPLVENRTLEAPPEGTVNVFVKTQGSRKAHVRWEAPFRPNGLLTHSVLFTGIFYVDPVGNNYTLLNVTKVMYSGEETNLWVLIDGLVPFTNYTVQVNISNSQGSLITDPITIAMPPGAPDGVLPPRLSSATPTSLQVVWSTPARNNAPGSPRYQLQMRSGDSTHGFLELFSNPSASLSYEVSDLQPYTEYMFRLVASNGFGSAHSSWIPFMTAEDKPGPVVPPILLDVKSRMMLVTWQHPRKSNGVITHYNIYLHGRLYLRTPGNVTNCTVMHLHPYTAYKFQVEACTSKGCSLSPESQTVWTLPGAPEGIPSPELFSDTPTSVIISWQPPTHPNGLVENFTIERRVKGKEEVTTLVTLPRSHSMRFIDKTSALSPWTKYEYRVLMSTLHGGTNSSAWVEVTTRPSRPAGVQPPVVTVLEPDAVQVTWKPPLIQNGDILSYEIHMPDPHITLTNVTSAVLSQKVTHLIPFTNYSVTIVACSGGNGYLGGCTESLPTYVTTHPTVPQNVGPLSVIPLSESYVVISWQPPSKPNGPNLRYELLRRKIQQPLASNPPEDLNRWHNIYSGTQWLYEDKGLSRFTTYEYMLFVHNSVGFTPSREVTVTTLAGLPERGANLTASVLNHTAIDVRWAKPTVQDLQGEVEYYTLFWSSATSNDSLKILPDVNSHVIGHLKPNTEYWIFISVFNGVHSINSAGLHATTCDGEPQGMLPPEVVIINSTAVRVIWTSPSNPNGVVTEYSIYVNNKLYKTGMNVPGSFILRDLSPFTIYDIQVEVCTIYACVKSNGTQITTVEDTPSDIPTPTIRGITSRSLQIDWVSPRKPNGIILGYDLLWKTWYPCAKTQKLVQDQSDELCKAVRCQKPESICGHICYSSEAKVCCNGVLYNPKPGHRCCEEKYIPFVLNSTGVCCGGRIQEAQPNHQCCSGYYARILPGEVCCPDEQHNRVSVGIGDSCCGRMPYSTSGNQICCAGRLHDGHGQKCCGRQIVSNDLECCGGEEGVVYNRLPGMFCCGQDYVNMSDTICCSASSGESKAHIKKNDPVPVKCCETELIPKSQKCCNGVGYNPLKYVCSDKISTGMMMKETKECRILCPASMEATEHCGRCDFNFTSHICTVIRGSHNSTGKASIEEMCSSAEETIHTGSVNTYSYTDVNLKPYMTYEYRISAWNSYGRGLSKAVRARTKEDVPQGVSPPTWTKIDNLEDTIVLNWRKPIQSNGPIIYYILLRNGIERFRGTSLSFSDKEGIQPFQEYSYQLKACTVAGCATSSKVVAATTQGVPESILPPSITALSAVALHLSWSVPEKSNGVIKEYQIRQVGKGLIHTDTTDRRQHTVTGLQPYTNYSFTLTACTSAGCTSSEPFLGQTLQAAPEGVWVTPRHIIINSTTVELYWSLPEKPNGLVSQYQLSRNGNLLFLGGSEEQNFTDKNLEPNSRYTYKLEVKTGGGSSASDDYIVQTPMSTPEEIYPPYNITVIGPYSIFVAWIPPGILIPEIPVEYNVLLNDGSVTPLAFSVGHHQSTLLENLTPFTQYEIRIQACQNGSCGVSSRMFVKTPEAAPMDLNSPVLKALGSACIEIKWMPPEKPNGIIINYFIYRRPAGIEEESVLFVWSEGALEFMDEGDTLRPFTLYEYRVRACNSKGSVESLWSLTQTLEAPPQDFPAPWAQATSAHSVLLNWTKPESPNGIISHYRVVYQERPDDPTFNSPTVHAFTVKGTSHQAHLYGLEPFTTYRIGVVAANHAGEILSPWTLIQTLESSPSGLRNFIVEQKENGRALLLQWSEPMRTNGVIKTYNIFSDGFLEYSGLNRQFLFRRLDPFTLYTLTLEACTRAGCAHSAPQPLWTDEAPPDSQLAPTVHSVKSTSVELSWSEPVNPNGKIIRYEVIRRCFEGKAWGNQTIQADEKIVFTEYNTERNTFMYNDTGLQPWTQCEYKIYTWNSAGHTCSSWNVVRTLQAPPEGLSPPVISYVSMNPQKLLISWIPPEQSNGIIQSYRLQRNEMLYPFSFDPVTFNYTDEELLPFSTYSYALQACTSGGCSTSKPTSITTLEAAPSEVSPPDLWAVSATQMNVCWSPPTVQNGKITKYLVRYDNKESLAGQGLCLLVSHLQPYSQYNFSLVACTNGGCTASVSKSAWTMEALPENMDSPTLQVTGSESIEITWKPPRNPNGQIRSYELRRDGTIVYTGLETRYRDFTLTPGVEYSYTVTASNSQGGILSPLVKDRTSPSAPSGMEPPKLQARGPQEILVNWDPPVRTNGDIINYTLFIRELFERETKIIHINTTHNSFGMQSYIVNQLKPFHRYEIRIQACTTLGCASSDWTFIQTPEIAPLMQPPPHLEVQMAPGGFQPTVSLLWTGPLQPNGKVLYYELYRRQIATQPRKSNPVLIYNGSSTSFIDSELLPFTEYEYQVWAVNSAGKAPSSWTWCRTGPAPPEGLRAPTFHVISSTQAVVNISAPGKPNGIVSLYRLFSSSAHGAETVLSEGMATQQTLHGLQAFTNYSIGVEACTCFNCCSKGPTAELRTHPAPPSGLSSPQIGTLASRTASFRWSPPMFPNGVIHSYELQFHVACPPDSALPCTPSQIETKYTGLGQKASLGGLQPYTTYKLRVVAHNEVGSTASEWISFTTQKELPQYRAPFSVDSNLSVVCVNWSDTFLLNGQLKEYVLTDGGRRVYSGLDTTLYIPRTADKTFFFQVICTTDEGSVKTPLIQYDTSTGLGLVLTTPGKKKGSRSKSTEFYSELWFIVLMAMLGLILLAIFLSLILQRKIHKEPYIRERPPLVPLQKRMSPLNVYPPGENHMGLADTKIPRSGTPVSIRSNRSACVLRIPSQNQTSLTYSQGSLHRSVSQLMDIQDKKVLMDNSLWEAIMGHNSGLYVDEEDLMNAIKDFSSVTKERTTFTDTHL 2 5041 5063 RSKSTEFYSE LWFIVLMAMLGLILLAIFLSLIL QRKIHKEPYI +P29533 MPVKMVAVLGASTVLWILFAVSQAFKIEISPEYKTIAQIGDSMALTCSTTGCESPLFSWRTQIDSPLNAKVRTEGSKSVLTMEPVSFENEHSYLCTATCGSGKLERSIHVDIYSFPKDPEIQFSGPLEVGKPVTVKCLAPDIYPVYRLEIDLFKGDQLMNRQEFSSEEMTKSLETKSLEVTFTPVIEDIGKALVCRAKLHIDQIDSTLKERETVKELQVYISPRNTTISVHPSTRLQEGGAVTMTCSSEGLPAPEIFWGRKLDNEVLQLLSGNATLTLIAMRMEDSGVYVCEGVNLIGRDKAEVELVVQEKPFIVDISPGSQVAAQVGDSVVLTCAAIGCDSPSFSWRTQTDSPLNGVVRNEGAKSTLVLSSVGFEDEHSYLCAVTCLQRTLEKRTQVEVYSFPEDPVIKMSGPLVHGRPVTVNCTVPNVYPFDHLEIELLKGETTLMKKYFLEEMGIKSLETKILETTFIPTIEDTGKSLVCLARLHSGEMESEPKQRQSVQPLYVNVAPKETTIWVSPSPILEEGSPVNLTCSSDGIPAPKILWSRQLNNGELQPLSENTTLTFMSTKRDDSGIYVCEGINEAGISRKSVELIIQVSPKDIQLTVFPSKSVKEGDTVIISCTCGNVPETWIILKKKAKTGDMVLKSVDGSYTIRQAQLQDAGIYECESKTEVGSQLRSLTLDVKGKEHNKDYFSPELLALYCASSLVIPAIGMIVYFARKANMKGSYSLVEAQKSKV 2 699 721 EHNKDYFSPE LLALYCASSLVIPAIGMIVYFAR KANMKGSYSL +Q9H7M9 MGVPTALEAGSWRWGSLLFALFLAASLGPVAAFKVATPYSLYVCPEGQNVTLTCRLLGPVDKGHDVTFYKTWYRSSRGEVQTCSERRPIRNLTFQDLHLHHGGHQAANTSHDLAQRHGLESASDHHGNFSITMRNLTLLDSGLYCCLVVEIRHHHSEHRVHGAMELQVQTGKDAPSNCVVYPSSSQDSENITAAALATGACIVGILCLPLILLLVYKQRQAASNRRAQELVRMDSNIQGIENPGFEASPPAQGIPEAKVRHPLSYVAQRQPSESGRHLLSEPSTPLSPPGPGDVFFPSLDPVPDSPNFEVI 2 194 216 SSQDSENITA AALATGACIVGILCLPLILLLVY KQRQAASNRR +Q96AW1 MRRQPAKVAALLLGLLLECTEAKKHCWYFEGLYPTYYICRSYEDCCGSRCCVRALSIQRLWYFWFLLMMGVLFCCGAGFFIRRRMYPPPLIEEPAFNVSYTRQPPNPGPGAQQPGPPYYTDPGGPGMNPVGNSMAMAFQVPPNSPQGSVACPPPPAYCNTPPPPYEQVVKAK 2 59 81 RCCVRALSIQ RLWYFWFLLMMGVLFCCGAGFFI RRRMYPPPLI +Q8N0Z9 MAAGGSAPEPRVLVCLGALLAGWVAVGLEAVVIGEVHENVTLHCGNISGLRGQVTWYRNNSEPVFLLSSNSSLRPAEPRFSLVDATSLHIESLSLGDEGIYTCQEILNVTQWFQVWLQVASGPYQIEVHIVATGTLPNGTLYAARGSQVDFSCNSSSRPPPVVEWWFQALNSSSESFGHNLTVNFFSLLLISPNLQGNYTCLALNQLSKRHRKVTTELLVYYPPPSAPQCWAQMASGSFMLQLTCRWDGGYPDPDFLWIEEPGGVIVGKSKLGVEMLSESQLSDGKKFKCVTSHIVGPESGASCMVQIRGPSLLSEPMKTCFTGGNVTLTCQVSGAYPPAKILWLRNLTQPEVIIQPSSRHLITQDGQNSTLTIHNCSQDLDEGYYICRADSPVGVREMEIWLSVKEPLNIGGIVGTIVSLLLLGLAIISGLLLHYSPVFCWKVGNTSRGQNMDDVMVLVDSEEEEEEEEEEEEDAAVGEQEGAREREELPKEIPKQDHIHRVTALVNGNIEQMGNGFQDLQDDSSEEQSDIVQEEDRPV 2 412 434 WLSVKEPLNI GGIVGTIVSLLLLGLAIISGLLL HYSPVFCWKV +Q86XK7 MVFAFWKVFLILSCLAGQVSVVQVTIPDGFVNVTVGSNVTLICIYTTTVASREQLSIQWSFFHKKEMEPISIYFSQGGQAVAIGQFKDRITGSNDPGNASITISHMQPADSGIYICDVNNPPDFLGQNQGILNVSVLVKPSKPLCSVQGRPETGHTISLSCLSALGTPSPVYYWHKLEGRDIVPVKENFNPTTGILVIGNLTNFEQGYYQCTAINRLGNSSCEIDLTSSHPEVGIIVGALIGSLVGAAIIISVVCFARNKAKAKAKERNSKTIAELEPMTKINPRGESEAMPREDATQLEVTLPSSIHETGPDTIQEPDYEPKPTQEPAPEPAPGSEPMAVPDLDIELELEPETQSELEPEPEPEPESEPGVVVEPLSEDEKGVVKA 2 234 256 IDLTSSHPEV GIIVGALIGSLVGAAIIISVVCF ARNKAKAKAK +Q96IQ7 MAELPGPFLCGALLGFLCLSGLAVEVKVPTEPLSTPLGKTAELTCTYSTSVGDSFALEWSFVQPGKPISESHPILYFTNGHLYPTGSKSKRVSLLQNPPTVGVATLKLTDVHPSDTGTYLCQVNNPPDFYTNGLGLINLTVLVPPSNPLCSQSGQTSVGGSTALRCSSSEGAPKPVYNWVRLGTFPTPSPGSMVQDEVSGQLILTNLSLTSSGTYRCVATNQMGSASCELTLSVTEPSQGRVAGALIGVLLGVLLLSVAAFCLVRFQKERGKKPKETYGGSDLREDAIAPGISEHTCMRADSSKGFLERPSSASTVTTTKSKLPMVV 2 242 264 LSVTEPSQGR VAGALIGVLLGVLLLSVAAFCLV RFQKERGKKP +Q9Z109 MAWPLVGAFLCGHLLGFVCLSGLAVEVTVPTEPLSVPKGKTAELSCSYKTSVGDNFALEWSFVQPGKPISASVPVLYFTNGHLYPTGSKADRAILLHDPPTGGLATLKLTDLRPSDTGTYLCNVNNPPDFYTNGLGLINLTVLVPPSHPLCSQSGQTSVGGSAALGCRSSEGAPKPVYNWERLGSSPTPPPGSMVQDEVSGQLILTNLSLTSSGTYRCVASNQMGSASCELNLSVTDSSEGRVAGTLIGVLLGVLLLSVAAFCLIRFQKERKKEPKETYGGSDLREDATAPGVFEQASMRADHSKELLEKSPCASTMTTTKSKLSMVV 2 243 265 LSVTDSSEGR VAGTLIGVLLGVLLLSVAAFCLI RFQKERKKEP +Q9Y279 MGILLGLLLLGHLTVDTYGRPILEVPESVTGPWKGDVNLPCTYDPLQGYTQVLVKWLVQRGSDPVTIFLRDSSGDHIQQAKYQGRLHVSHKVPGDVSLQLSTLEMDDRSHYTCEVTWQTPDGNQVVRDKITELRVQKLSVSKPTVTTGSGYGFTVPQGMRISLQCQARGSPPISYIWYKQQTNNQEPIKVATLSTLLFKPAVIADSGSYFCTAKGQVGSEQHSDIVKFVVKDSSKLLKTKTEAPTTMTYPLKATSTVKQSWDWTTDMDGYLGETSAGPGKSLPVFAIILIISLCCMVVFTMAYIMLCRKTSQQEHVYEAARAHAREANDSGETMRVAIFASGCSSDEPTSQNLGNNYSDEPCIGQEYQIIAQINGNYARLLDTVPLDYEFLATEGKSVC 2 284 306 TSAGPGKSLP VFAIILIISLCCMVVFTMAYIML CRKTSQQEHV +P0DPA2 MRVGGAFHLLLVCLSPALLSAVRINGDGQEVLYLAEGDNVRLGCPYVLDPEDYGPNGLDIEWMQVNSDPAHHRENVFLSYQDKRINHGSLPHLQQRVRFAASDPSQYDASINLMNLQVSDTATYECRVKKTTMATRKVIVTVQARPAVPMCWTEGHMTYGNDVVLKCYASGGSQPLSYKWAKISGHHYPYRAGSYTSQHSYHSELSYQESFHSSINQGLNNGDLVLKDISRADDGLYQCTVANNVGYSVCVVEVKVSDSRRIGVIIGIVLGSLLALGCLAVGIWGLVCCCCGGSGAGGARGAFGYGNGGGVGGGACGDLASEIREDAVAPGCKASGRGSRVTHLLGYPTQNVSRSLRRKYAPPPCGGPEDVALAPCTAAAACEAGPSPVYVKVKSAEPADCAEGPVQCKNGLLV 2 265 287 KVSDSRRIGV IIGIVLGSLLALGCLAVGIWGLV CCCCGGSGAG +Q8IW00 MRLLALAAAALLARAPAPEVCAALNVTVSPGPVVDYLEGENATLLCHVSQKRRKDSLLAVRWFFAHSFDSQEALMVKMTKLRVVQYYGNFSRSAKRRRLRLLEEQRGALYRLSVLTLQPSDQGHYVCRVQEISRHRNKWTAWSNGSSATEMRVISLKASEESSFEKTKETWAFFEDLYVYAVLVCCVGILSILLFMLVIVWQSVFNKRKSRVRHYLVKCPQNSSGETVTSVTSLAPLQPKKGKRQKEKPDIPPAVPAKAPIAPTFHKPKLLKPQRKVTLPKIAEENLTYAELELIKPHRAAKGAPTSTVYAQILFEENKL 2 179 201 ETWAFFEDLY VYAVLVCCVGILSILLFMLVIVW QSVFNKRKSR +A8MXK1 MRPLPSGRRKTRGISLGLFALCLAAARCLQSQGVSLYIPQATINATVKEDILLSVEYSCHGVPTIEWTYSSNWGTQKIVEWKPGTQANISQSHKDRVCTFDNGSIQLFSVGVRDSGYYVITVTERLGSSQFGTIVLHVSEILYEDLHFVAVILAFLAAVAAVLISLMWVCNKCAYKFQRKRRHKLKESTTEEIELEDVEC 2 148 170 VSEILYEDLH FVAVILAFLAAVAAVLISLMWVC NKCAYKFQRK +P55808 MESWWGLPCLAFLCFLMHARGQRDFDLADALDDPEPTKKPNSDIYPKPKPPYYPQPENPDSGGNIYPRPKPRPQPQPGNSGNSGGYFNDVDRDDGRYPPRPRPRPPAGGGGGGYSSYGNSDNTHGGDHHSTYGNPEGNMVAKIVSPIVSVVVVTLLGAAASYFKLNNRRNCFRTHEPENV 2 143 165 GNPEGNMVAK IVSPIVSVVVVTLLGAAASYFKL NNRRNCFRTH +Q9Y493 MVPPVWTLLLLVGAALFRKEKPPDQKLVVRSSRDNYVLTQCDFEDDAKPLCDWSQVSADDEDWVRASGPSPTGSTGAPGGYPNGEGSYLHMESNSFHRGGVARLLSPDLWEQGPLCVHFAHHMFGLSWGAQLRLLLLSGEEGRRPDVLWKHWNTQRPSWMLTTVTVPAGFTLPTRLMFEGTRGSTAYLDIALDALSIRRGSCNRVCMMQTCSFDIPNDLCDWTWIPTASGAKWTQKKGSSGKPGVGPDGDFSSPGSGCYMLLDPKNARPGQKAVLLSPVSLSSGCLSFSFHYILRGQSPGAALHIYASVLGSIRKHTLFSGQPGPNWQAVSVNYTAVGRIQFAVVGVFGKTPEPAVAVDATSIAPCGEGFPQCDFEDNAHPFCDWVQTSGDGGHWALGHKNGPVHGMGPAGGFPNAGGHYIYLEADEFSQAGQSVRLVSRPFCAPGDICVEFAYHMYGLGEGTMLELLLGSPAGSPPIPLWKRVGSQRPYWQNTSVTVPSGHQQPMQLIFKGIQGSNTASVVAMGFILINPGTCPVKVLPELPPVSPVSSTGPSETTGLTENPTISTKKPTVSIEKPSVTTEKPTVPKEKPTIPTEKPTISTEKPTIPSEKPNMPSEKPTIPSEKPTILTEKPTIPSEKPTIPSEKPTISTEKPTVPTEEPTTPTEETTTSMEEPVIPTEKPSIPTEKPSIPTEKPTISMEETIISTEKPTISPEKPTIPTEKPTIPTEKSTISPEKPTTPTEKPTIPTEKPTISPEKPTTPTEKPTISPEKLTIPTEKPTIPTEKPTIPTEKPTISTEEPTTPTEETTISTEKPSIPMEKPTLPTEETTTSVEETTISTEKLTIPMEKPTISTEKPTIPTEKPTISPEKLTIPTEKLTIPTEKPTIPIEETTISTEKLTIPTEKPTISPEKPTISTEKPTIPTEKPTIPTEETTISTEKLTIPTEKPTISPEKLTIPTEKPTISTEKPTIPTEKLTIPTEKPTIPTEKPTIPTEKLTALRPPHPSPTATGLAALVMSPHAPSTPMTSVILGTTTTSRSSTERCPPNARYESCACPASCKSPRPSCGPLCREGCVCNPGFLFSDNHCIQASSCNCFYNNDYYEPGAEWFSPNCTEHCRCWPGSRVECQISQCGTHTVCQLKNGQYGCHPYAGTATCLVYGDPHYVTFDGRHFGFMGKCTYILAQPCGNSTDPFFRVTAKNEEQGQEGVSCLSKVYVTLPESTVTLLKGRRTLVGGQQVTLPAIPSKGVFLGASGRFVELQTEFGLRVRWDGDQQLYVTVSSTYSGKLCGLCGNYDGNSDNDHLKLDGSPAGDKEELGNSWQTDQDEDQECQKYQVVNSPSCDSSLQSSMSGPGFCGRLVDTHGPFETCLLHVKAASFFDSCMLDMCGFQGLQHLLCTHMSTMTTTCQDAGHAVKPWREPHFCPMACPPNSKYSLCAKPCPDTCHSGFSGMFCSDRCVEACECNPGFVLSGLECIPRSQCGCLHPAGSYFKVGERWYKPGCKELCVCESNNRIRCQPWRCRAQEFCGQQDGIYGCHAQGAATCTASGDPHYLTFDGALHHFMGTCTYVLTRPCWSRSQDSYFVVSATNENRGGILEVSYIKAVHVTVFDLSISLLRGCKVMLNGHRVALPVWLAQGRVTIRLSSNLVLLYTNFGLQVRYDGSHLVEVTVPSSYGGQLCGLCGNYNNNSLDDNLRPDRKLAGDSMQLGAAWKLPESSEPGCFLVGGKPSSCQENSMADAWNKNCAILINPQGPFSQCHQVVPPQSSFASCVHGQCGTKGDTTALCRSLQAYASLCAQAGQAPAWRNRTFCPMRCPPGSSYSPCSSPCPDTCSSINNPRDCPKALPCAESCECQKGHILSGTSCVPLGQCGCTDPAGSYHPVGERWYTENTCTRLCTCSVHNNITCFQSTCKPNQICWALDGLLHCRASGVGVCQLPGESHYVSFDGSNHSIPDACTLVLVKVCHPAMALPFFKISAKHEKEEGGTEAFRLHEVYIDIYDAQVTLQKGHRVLINSKQVTLPAISQIPGVSVKSSSIYSIVNIKIGVQVKFDGNHLLEIEIPTTYYGKVCGMCGNFNDEEEDELMMPSDEVANSDSEFVNSWKDKDIDPSCQSLLVDEQQIPAEQQENPSGNCRAADLRRAREKCEAALRAPVWAQCASRIDLTPFLVDCANTLCEFGGLYQALCQALQAFGATCQSQGLKPPLWRNSSFCPLECPAYSSYTNCLPSCSPSCWDLDGRCEGAKVPSACAEGCICQPGYVLSEDKCVPRSQCGCKDAHGGSIPLGKSWVSSGCTEKCVCTGGAIQCGDFRCPSGSHCQLTSDNSNSNCVSDKSEQCSVYGDPRYLTFDGFSYRLQGRMTYVLIKTVDVLPEGVEPLLVEGRNKMDPPRSSIFLQEVITTVYGYKVQLQAGLELVVNNQKMAVPYRPNEHLRVTLWGQRLYLVTDFELVVSFGGRKNAVISLPSMYEGLVSGLCGNYDKNRKNDMMLPSGALTQNLNTFGNSWEVKTEDALLRFPRAIPAEEEGQGAELGLRTGLQVSECSPEQLASNSTQACRVLADPQGPFAACHQTVAPEPFQEHCVLDLCSAQDPREQEELRCQVLSGHGVSSRYHISELYDTLPSILCQPGRPRGLRGPLRGRLRQHPRLCLQWHPEPPLADCGCTSNGIYYQLGSSFLTEDCSQRCTCASSRILLCEPFSCRAGEVCTLGNHTQGCFPESPCLQNPCQNDGQCREQGATFTCECEVGYGGGLCMEPRDAPPPRKPASNLVGVLLGLLVPVVVVLLAVTRECIYRTRRKREKTQEGDRLARLVDTDTVLDCAC 2 2756 2778 PRDAPPPRKP ASNLVGVLLGLLVPVVVVLLAVT RECIYRTRRK +Q9ULT6 MRPRSGGRPGATGRRRRRLRRRPRGLRCSRLPPPPPLPLLLGLLLAAAGPGAARAKETAFVEVVLFESSPSGDYTTYTTGLTGRFSRAGATLSAEGEIVQMHPLGLCNNNDEEDLYEYGWVGVVKLEQPELDPKPCLTVLGKAKRAVQRGATAVIFDVSENPEAIDQLNQGSEDPLKRPVVYVKGADAIKLMNIVNKQKVARARIQHRPPRQPTEYFDMGIFLAFFVVVSLVCLILLVKIKLKQRRSQNSMNRLAVQALEKMETRKFNSKSKGRREGSCGALDTLSSSSTSDCAICLEKYIDGEELRVIPCTHRFHRKCVDPWLLQHHTCPHCRHNIIEQKGNPSAVCVETSNLSRGRQQRVTLPVHYPGRVHRTNAIPAYPTRTSMDSHGNPVTLLTMDRHGEQSLYSPQTPAYIRSYPPLHLDHSLAAHRCGLEHRAYSPAHPFRRPKLSGRSFSKAACFSQYETMYQHYYFQGLSYPEQEGQSPPSLAPRGPARAFPPSGSGSLLFPTVVHVAPPSHLESGSTSSFSCYHGHRSVCSGYLADCPGSDSSSSSSSGQCHCSSSDSVVDCTEVSNQGVYGSCSTFRSSLSSDYDPFIYRSRSPCRASEAGGSGSSGRGPALCFEGSPPPEELPAVHSHGAGRGEPWPGPASPSGDQVSTCSLEMNYSSNSSLEHRGPNSSTSEVGLEASPGAAPDLRRTWKGGHELPSCACCCEPQPSPAGPSAGAAGSSTLFLGPHLYEGSGPAGGEPQSGSSQGLYGLHPDHLPRTDGVKYEGLPCCFYEEKQVARGGGGGSGCYTEDYSVSVQYTLTEEPPPGCYPGARDLSQRIPIIPEDVDCDLGLPSDCQGTHSLGSWGGTRGPDTPRPHRGLGATREEERALCCQARALLRPGCPPEEAGAVRANFPSALQDTQESSTTATEAAGPRSHSADSSSPGA 2 216 238 QHRPPRQPTE YFDMGIFLAFFVVVSLVCLILLV KIKLKQRRSQ +Q8WWF5 MPLCRPEHLMPRASRVPVAASLPLSHAVIPTQLPSRPGHRPPGRPRRCPKASCLPPPVGPSSTQTAKRVTMGWPRPGRALVAVKALLVLSLLQVPAQAVVRAVLEDNSSSVDFADLPALFGVPLAPEGIRGYLMEVKPANACHPIEAPRLGNRSLGAIVLIRRYDCTFDLKVLNAQRAGFEAAIVHNVHSDDLVSMTHVYEDLRGQIAIPSVFVSEAASQDLRVILGCNKSAHALLLPDDPPCHDLGCHPVLTVSWVLGCTLALVVSAFFVLNHLWLWAQACCSHRRPVKTSTCQKAQVRTFTWHNDLCAICLDEYEEGDQLKILPCSHTYHCKCIDPWFSQAPRRSCPVCKQSVAATEDSFDSTTYSFRDEDPSLPGHRPPIWAIQVQLRSRRLELLGRASPHCHCSTTSLEAEYTTVSSAPPEAPGQ 2 255 277 DLGCHPVLTV SWVLGCTLALVVSAFFVLNHLWL WAQACCSHRR +P60852 MAGGSATTWGYPVALLLLVATLGLGRWLQPDPGLPGLRHSYDCGIKGMQLLVFPRPGQTLRFKVVDEFGNRFDVNNCSICYHWVTSRPQEPAVFSADYRGCHVLEKDGRFHLRVFMEAVLPNGRVDVAQDATLICPKPDPSRTLDSQLAPPAMFSVSTPQTLSFLPTSGHTSQGSGHAFPSPLDPGHSSVHPTPALPSPGPGPTLATLAQPHWGTLEHWDVNKRDYIGTHLSQEQCQVASGHLPCIVRRTSKEACQQAGCCYDNTREVPCYYGNTATVQCFRDGYFVLVVSQEMALTHRITLANIHLAYAPTSCSPTQHTEAFVVFYFPLTHCGTTMQVAGDQLIYENWLVSGIHIQKGPQGSITRDSTFQLHVRCVFNASDFLPIQASIFPPPSPAPMTQPGPLRLELRIAKDETFSSYYGEDDYPIVRLLREPVHVEVRLLQRTDPNLVLLLHQCWGAPSANPFQQPQWPILSDGCPFKGDSYRTQMVALDGATPFQSHYQRFTVATFALLDSGSQRALRGLVYLFCSTSACHTSGLETCSTACSTGTTRQRRSSGHRNDTARPQDIVSSPGPVGFEDSYGQEPTLGPTDSNGNSSLRPLLWAVLLLPAVALVLGFGVFVGLSQTWAQKLWESNRQ 2 602 624 DSNGNSSLRP LLWAVLLLPAVALVLGFGVFVGL SQTWAQKLWE +P20239 MARWQRKASVSSPCGRSIYRFLSLLFTLVTSVNSVSLPQSENPAFPGTLICDKDEVRIEFSSRFDMEKWNPSVVDTLGSEILNCTYALDLERFVLKFPYETCTIKVVGGYQVNIRVGDTTTDVRYKDDMYHFFCPAIQAETHEISEIVVCRRDLISFSFPQLFSRLADENQNVSEMGWIVKIGNGTRAHILPLKDAIVQGFNLLIDSQKVTLHVPANATGIVHYVQESSYLYTVQLELLFSTTGQKIVFSSHAICAPDLSVACNATHMTLTIPEFPGKLESVDFGQWSIPEDQWHANGIDKEATNGLRLNFRKSLLKTKPSEKCPFYQFYLSSLKLTFYFQGNMLSTVIDPECHCESPVSIDELCAQDGFMDFEVYSHQTKPALNLDTLLVGNSSCQPIFKVQSVGLARFHIPLNGCGTRQKFEGDKVIYENEIHALWENPPSNIVFRNSEFRMTVRCYYIRDSMLLNAHVKGHPSPEAFVKPGPLVLVLQTYPDQSYQRPYRKDEYPLVRYLRQPIYMEVKVLSRNDPNIKLVLDDCWATSSEDPASAPQWQIVMDGCEYELDNYRTTFHPAGSSAAHSGHYQRFDVKTFAFVSEARGLSSLIYFHCSALICNQVSLDSPLCSVTCPASLRSKREANKEDTMTVSLPGPILLLSDVSSSKGVDPSSSEITKDIIAKDIASKTLGAVAALVGSAVILGFICYLYKKRTIRFNH 2 684 703 IIAKDIASKT LGAVAALVGSAVILGFICYL YKKRTIRFNH +P21754 MELSYRLFICLLLWGSTELCYPQPLWLLQGGASHPETSVQPVLVECQEATLMVMVSKDLFGTGKLIRAADLTLGPEACEPLVSMDTEDVVRFEVGLHECGNSMQVTDDALVYSTFLLHDPRPVGNLSIVRTNRAEIPIECRYPRQGNVSSQAILPTWLPFRTTVFSEEKLTFSLRLMEENWNAEKRSPTFHLGDAAHLQAEIHTGSHVPLRLFVDHCVATPTPDQNASPYHTIVDFHGCLVDGLTDASSAFKVPRPGPDTLQFTVDVFHFANDSRNMIYITCHLKVTLAEQDPDELNKACSFSKPSNSWFPVEGSADICQCCNKGDCGTPSHSRRQPHVMSQWSRSASRNRRHVTEEADVTVGPLIFLDRRGDHEVEQWALPSDTSVVLLGVGLAVVVSLTLTAVILVLTRRCRTASHPVSASE 2 387 409 EQWALPSDTS VVLLGVGLAVVVSLTLTAVILVL TRRCRTASHP +Q12836 MWLLRCVLLCVSLSLAVSGQHKPEAPDYSSVLHCGPWSFQFAVNLNQEATSPPVLIAWDNQGLLHELQNDSDCGTWIRKGPGSSVVLEATYSSCYVTEWDSHYIMPVGVEGAGAAEHKVVTERKLLKCPMDLLARDAPDTDWCDSIPARDRLPCAPSPISRGDCEGLGCCYSSEEVNSCYYGNTVTLHCTREGHFSIAVSRNVTSPPLLLDSVRLALRNDSACNPVMATQAFVLFQFPFTSCGTTRQITGDRAVYENELVATRDVKNGSRGSVTRDSIFRLHVSCSYSVSSNSLPINVQVFTLPPPFPETQPGPLTLELQIAKDKNYGSYYGVGDYPVVKLLRDPIYVEVSILHRTDPYLGLLLQQCWATPSTDPLSQPQWPILVKGCPYIGDNYQTQLIPVQKALDLPFPSHHQRFSIFTFSFVNPTVEKQALRGPVHLHCSVSVCQPAETPSCVVTCPDLSRRRNFDNSSQNTTASVSSKGPMILLQATKDPPEKLRVPVDSKVLWVAGLSGTLILGALLVSYLAVKKQKSCPDQMCQ 2 506 528 EKLRVPVDSK VLWVAGLSGTLILGALLVSYLAV KKQKSCPDQM +Q8TCW7 MEQIWLLLLLTIRVLPGSAQFNGYNCDANLHSRFPAERDISVYCGVQAITMKINFCTVLFSGYSETDLALNGRHGDSHCRGFINNNTFPAVVIFIINLSTLEGCGNNLVVSTIPGVSAYGNATSVQVGNISGYIDTPDPPTIISYLPGLLYKFSCSYPLEYLVNNTQLASSSAAISVRENNGTFVSTLNLLLYNDSTYNQQLIIPSIGLPLKTKVFAAVQATNLDGRWNVLMDYCYTTPSGNPNDDIRYDLFLSCDKDPQTTVIENGRSQRGRFSFEVFRFVKHKNQKMSTVFLHCVTKLCRADDCPFLMPICSHRERRDAGRRTTWSPQSSSGSAVLSAGPIITRSDETPTNNSQLGSPSMPPFQLNAITSALISGMVILGVTSFSLLLCSLALLHRKGPTSLVLNGIRNPVFD 2 374 396 PFQLNAITSA LISGMVILGVTSFSLLLCSLALL HRKGPTSLVL diff --git a/aaanalysis/_data/benchmarks/INFO_benchmarks.xlsx b/aaanalysis/_data/benchmarks/INFO_benchmarks.xlsx new file mode 100644 index 00000000..6a98baed Binary files /dev/null and b/aaanalysis/_data/benchmarks/INFO_benchmarks.xlsx differ diff --git a/aaanalysis/data/benchmarks/SEQ_AMYLO.tsv b/aaanalysis/_data/benchmarks/SEQ_AMYLO.tsv similarity index 100% rename from aaanalysis/data/benchmarks/SEQ_AMYLO.tsv rename to aaanalysis/_data/benchmarks/SEQ_AMYLO.tsv diff --git a/aaanalysis/data/benchmarks/SEQ_CAPSID.tsv b/aaanalysis/_data/benchmarks/SEQ_CAPSID.tsv similarity index 100% rename from aaanalysis/data/benchmarks/SEQ_CAPSID.tsv rename to aaanalysis/_data/benchmarks/SEQ_CAPSID.tsv diff --git a/aaanalysis/data/benchmarks/SEQ_DISULFIDE.tsv b/aaanalysis/_data/benchmarks/SEQ_DISULFIDE.tsv similarity index 100% rename from aaanalysis/data/benchmarks/SEQ_DISULFIDE.tsv rename to aaanalysis/_data/benchmarks/SEQ_DISULFIDE.tsv diff --git a/aaanalysis/data/benchmarks/SEQ_LOCATION.tsv b/aaanalysis/_data/benchmarks/SEQ_LOCATION.tsv similarity index 100% rename from aaanalysis/data/benchmarks/SEQ_LOCATION.tsv rename to aaanalysis/_data/benchmarks/SEQ_LOCATION.tsv diff --git a/aaanalysis/data/benchmarks/SEQ_SOLUBLE.tsv b/aaanalysis/_data/benchmarks/SEQ_SOLUBLE.tsv similarity index 100% rename from aaanalysis/data/benchmarks/SEQ_SOLUBLE.tsv rename to aaanalysis/_data/benchmarks/SEQ_SOLUBLE.tsv diff --git a/aaanalysis/data/benchmarks/SEQ_TAIL.tsv b/aaanalysis/_data/benchmarks/SEQ_TAIL.tsv similarity index 100% rename from aaanalysis/data/benchmarks/SEQ_TAIL.tsv rename to aaanalysis/_data/benchmarks/SEQ_TAIL.tsv diff --git a/aaanalysis/data/scale_classification.xlsx b/aaanalysis/_data/scale_classification.xlsx similarity index 100% rename from aaanalysis/data/scale_classification.xlsx rename to aaanalysis/_data/scale_classification.xlsx diff --git a/aaanalysis/data/scales.xlsx b/aaanalysis/_data/scales.xlsx similarity index 100% rename from aaanalysis/data/scales.xlsx rename to aaanalysis/_data/scales.xlsx diff --git a/aaanalysis/data/scales_pc.xlsx b/aaanalysis/_data/scales_pc.xlsx similarity index 100% rename from aaanalysis/data/scales_pc.xlsx rename to aaanalysis/_data/scales_pc.xlsx diff --git a/aaanalysis/data/scales_raw.xlsx b/aaanalysis/_data/scales_raw.xlsx similarity index 100% rename from aaanalysis/data/scales_raw.xlsx rename to aaanalysis/_data/scales_raw.xlsx diff --git a/aaanalysis/data/top60.xlsx b/aaanalysis/_data/top60.xlsx similarity index 100% rename from aaanalysis/data/top60.xlsx rename to aaanalysis/_data/top60.xlsx diff --git a/aaanalysis/data/top60_eval.xlsx b/aaanalysis/_data/top60_eval.xlsx similarity index 100% rename from aaanalysis/data/top60_eval.xlsx rename to aaanalysis/_data/top60_eval.xlsx diff --git a/aaanalysis/_utils/__init__.py b/aaanalysis/_utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/aaanalysis/_utils/__pycache__/__init__.cpython-39.pyc b/aaanalysis/_utils/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 00000000..ec36d995 Binary files /dev/null and b/aaanalysis/_utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/aaanalysis/_utils/__pycache__/_utils_check.cpython-39.pyc b/aaanalysis/_utils/__pycache__/_utils_check.cpython-39.pyc new file mode 100644 index 00000000..07d15af7 Binary files /dev/null and b/aaanalysis/_utils/__pycache__/_utils_check.cpython-39.pyc differ diff --git a/aaanalysis/_utils/__pycache__/_utils_constants.cpython-39.pyc b/aaanalysis/_utils/__pycache__/_utils_constants.cpython-39.pyc new file mode 100644 index 00000000..ff9449b8 Binary files /dev/null and b/aaanalysis/_utils/__pycache__/_utils_constants.cpython-39.pyc differ diff --git a/aaanalysis/_utils/__pycache__/_utils_output.cpython-39.pyc b/aaanalysis/_utils/__pycache__/_utils_output.cpython-39.pyc new file mode 100644 index 00000000..98d80c94 Binary files /dev/null and b/aaanalysis/_utils/__pycache__/_utils_output.cpython-39.pyc differ diff --git a/aaanalysis/_utils/__pycache__/utils_aaclust.cpython-39.pyc b/aaanalysis/_utils/__pycache__/utils_aaclust.cpython-39.pyc new file mode 100644 index 00000000..59668b2c Binary files /dev/null and b/aaanalysis/_utils/__pycache__/utils_aaclust.cpython-39.pyc differ diff --git a/aaanalysis/_utils/__pycache__/utils_cpp.cpython-39.pyc b/aaanalysis/_utils/__pycache__/utils_cpp.cpython-39.pyc new file mode 100644 index 00000000..99fcd96a Binary files /dev/null and b/aaanalysis/_utils/__pycache__/utils_cpp.cpython-39.pyc differ diff --git a/aaanalysis/_utils.py b/aaanalysis/_utils/_utils_check.py similarity index 68% rename from aaanalysis/_utils.py rename to aaanalysis/_utils/_utils_check.py index 019c0d25..6e136c27 100644 --- a/aaanalysis/_utils.py +++ b/aaanalysis/_utils/_utils_check.py @@ -1,41 +1,11 @@ -#! /usr/bin/python3 """ -Config with folder structure +Utility check functions """ -import os -import platform from sklearn.utils import check_array, check_consistent_length +import pandas as pd -# Helper Function -def _folder_path(super_folder, folder_name): - """Modification of separator (OS depending)""" - path = os.path.join(super_folder, folder_name + SEP) - return path - - -# Folder -SEP = "\\" if platform.system() == "Windows" else "/" -FOLDER_PROJECT = os.path.dirname(os.path.abspath(__file__)) -FOLDER_DATA = _folder_path(FOLDER_PROJECT, 'data') -URL_DATA = "https://github.com/breimanntools/aaanalysis/tree/master/aaanalysis/data/" - -# Default data for protein analysis -STR_SCALES = "scales" # Min-max normalized scales (from AAontology) -STR_SCALES_RAW = "scales_raw" # Ras scales (from AAontology) -STR_SCALES_PC = "scales_pc" # AAclust pc-based scales (pc: principal component) -STR_SCALE_CAT = "scale_classification" # AAontology -STR_TOP60 = "top60" # AAclustTop60 -STR_TOP60_EVAL = "top60_eval" # AAclustTop60 evaluation - -# Column names -COL_SCALE_ID = "scale_id" -COL_SEQ = "sequence" -COL_CAT = "category" -COL_SUBCAT = "subcategory" - - -# General check functions +# Type checking functions def check_non_negative_number(name=None, val=None, min_val=0, max_val=None, accept_none=False, just_int=True): """Check if value of given name variable is non-negative integer""" @@ -108,8 +78,7 @@ def check_tuple(name=None, val=None, n=None): raise ValueError(error) -# Data check functions -# TODO update +# Array checking functions def check_feat_matrix(X=None, names=None, labels=None): """Check if X and y match (y can be labels or names). Otherwise, transpose X or give error.""" # TODO type check @@ -141,7 +110,40 @@ def check_feat_matrix(X=None, y=None): """ -# Plotting & print functions -def print_red(input_str, **args): - """Prints the given string in red text.""" - print(f"\033[91m{input_str}\033[0m", **args) +# df checking functions +def check_col_in_df(df=None, name_df=None, col=None, col_type=None, accept_nan=False, error_if_exists=False): + """ + Check if the column exists in the DataFrame, if the values have the correct type, and if NaNs are allowed. + """ + # Check if the column already exists and raise error if error_if_exists is True + if error_if_exists and (col in df.columns): + raise ValueError(f"Column '{col}' already exists in '{name_df}'") + + # Check if the column exists in the DataFrame + if col not in df.columns: + raise ValueError(f"'{col}' must be a column in '{name_df}': {list(df.columns)}") + + # Make col_type a list if it is not already + if col_type is not None and not isinstance(col_type, list): + col_type = [col_type] + + # Check if the types match + if col_type is not None: + wrong_types = [x for x in df[col] if not any([isinstance(x, t) for t in col_type])] + + # Remove NaNs from the list of wrong types if they are accepted + if accept_nan: + wrong_types = [x for x in wrong_types if not pd.isna(x)] + + if len(wrong_types) > 0: + raise ValueError(f"Values in '{col}' should be of type(s) {col_type}, " + f"but the following values do not match: {wrong_types}") + + # Check if NaNs are present when they are not accepted + if not accept_nan: + if df[col].isna().sum() > 0: + raise ValueError(f"NaN values are not allowed in '{col}'.") + + + + diff --git a/aaanalysis/_utils/_utils_constants.py b/aaanalysis/_utils/_utils_constants.py new file mode 100644 index 00000000..64fedc7a --- /dev/null +++ b/aaanalysis/_utils/_utils_constants.py @@ -0,0 +1,71 @@ +""" +This is a script for constants (e.g., names or column names) +""" + + +# Default datasets for protein analysis +STR_SCALES = "scales" # Min-max normalized scales (from AAontology) +STR_SCALES_RAW = "scales_raw" # Raw scales (from AAontology) +STR_SCALES_PC = "scales_pc" # AAclust pc-based scales (pc: principal component) +STR_SCALE_CAT = "scale_classification" # AAontology +STR_TOP60 = "top60" # AAclustTop60 +STR_TOP60_EVAL = "top60_eval" # AAclustTop60 evaluation + +# Column name datasets (DOM_GSEC) +COL_ENTRY = "entry" # ACC, protein entry, uniprot id +COL_NAME = "name" # Entry name, Protein name, Uniprot Name +COL_LABEL = "label" +COL_SEQ = "sequence" +COLS_PARTS = ["jmd_n", "tmd", "jmd_c"] +COL_TMD_START = "tmd_start" +COL_TMD_STOP = "tmd_stop" +COLS_SEQ_INFO = [COL_SEQ, COL_TMD_START, COL_TMD_STOP] # TODO + +# Column names scales (scale_classification) +COL_SCALE_ID = "scale_id" +COL_CAT = "category" +COL_SUBCAT = "subcategory" +COL_SCALE_NAME = "scale_name" +COL_SCALE_DES = "scale_description" + +# Column names cpp features +COL_FEATURE = "feature" +# COL_CAT, COL_SUBCAT, COL_SCALE_NAME, COL_SCALE_DES +COL_ABS_AUC = "abs_auc" +COL_ABS_MEAN_DIF = "abs_mean_dif" +COL_MEAN_DIF = "mean_dif" +COL_STD_TEST = "std_test" +COL_STD_REF = "std_ref" +COL_PVAL_MW = "p_val_mann_whitney" +COL_PVAL_FDR = "p_val_fdr_bh" +COL_POSITION = "positions" + +COL_FEAT_IMPORTANCE = "feat_importance" +COO_FEAT_IMP_STD = "feat_importance_std" +COL_FEAT_IMPACT = "feat_impact" + +# Standard colors +COLOR_SHAP_POS = '#FF0D57' # (255, 13, 87) +COLOR_SHAP_NEG = '#1E88E5' # (30, 136, 229) +COLOR_FEAT_POS = '#9D2B39' # (157, 43, 57) Mean difference +COLOR_FEAT_NEG = '#326599' # (50, 101, 133) Mean difference +COLOR_FEAT_IMP = '#7F7F7F' # (127, 127, 127) feature importance +COLOR_TMD = '#00FA9A' # (0, 250, 154) +COLOR_JMD = '#0000FF' # (0, 0, 255) + +DICT_COLOR = {"SHAP_POS": COLOR_SHAP_POS, + "SHAP_NEG": COLOR_SHAP_NEG, + "FEAT_POS": COLOR_FEAT_POS, + "FEAT_NEG": COLOR_FEAT_NEG, + "FEAT_IMP": COLOR_FEAT_IMP, + "TMD": COLOR_TMD, + "JMD": COLOR_JMD} + +DICT_COLOR_CAT = {"ASA/Volume": "tab:blue", + "Composition": "tab:orange", + "Conformation": "tab:green", + "Energy": "tab:red", + "Others": "tab:gray", + "Polarity": "gold", + "Shape": "tab:cyan", + "Structure-Activity": "tab:brown"} \ No newline at end of file diff --git a/aaanalysis/_utils/_utils_output.py b/aaanalysis/_utils/_utils_output.py new file mode 100644 index 00000000..e1ead0e9 --- /dev/null +++ b/aaanalysis/_utils/_utils_output.py @@ -0,0 +1,32 @@ +""" +This is a script for adjust output (mainly for python console) +""" +import numpy as np + + +# I Helper Functions +# Plotting & print functions +def print_red(input_str, **args): + """Prints the given string in red text.""" + print(f"\033[91m{input_str}\033[0m", **args) + + +# Progress bar +def print_start_progress(): + """Print start progress""" + progress_bar = " " * 25 + print_red(f"\r |{progress_bar}| 0.00%", end="") + + +def print_progress(i=0, n=0): + """Print progress""" + progress = min(np.round(i/n * 100, 2), 100) + progress_bar = "#" * int(progress/4) + " " * (25-int(progress/4)) + print_red(f"\r |{progress_bar}| {progress:.2f}%", end="") + + +def print_finished_progress(): + """Print finished progress bar""" + progress_bar = "#" * 25 + print_red(f"\r |{progress_bar}| 100.00%") + diff --git a/aaanalysis/aaclust/_utils.py b/aaanalysis/_utils/utils_aaclust.py similarity index 96% rename from aaanalysis/aaclust/_utils.py rename to aaanalysis/_utils/utils_aaclust.py index 110f6f47..01038ca6 100644 --- a/aaanalysis/aaclust/_utils.py +++ b/aaanalysis/_utils/utils_aaclust.py @@ -1,6 +1,5 @@ -#! /usr/bin/python3 """ -Config with folder structure +Utility functions for AAclust module """ import inspect diff --git a/aaanalysis/cpp/_utils.py b/aaanalysis/_utils/utils_cpp.py similarity index 81% rename from aaanalysis/cpp/_utils.py rename to aaanalysis/_utils/utils_cpp.py index 8eccbe39..28bd231b 100644 --- a/aaanalysis/cpp/_utils.py +++ b/aaanalysis/_utils/utils_cpp.py @@ -6,56 +6,20 @@ import matplotlib.colors as mcolors import matplotlib.pyplot as plt -import aaanalysis._utils as ut +import aaanalysis._utils._utils_constants as ut_c +import aaanalysis._utils._utils_check as ut_check +import aaanalysis._utils._utils_output as ut_o # Settings # Default Split names -STR_SCALE_CAT = "scale_classification" STR_SEGMENT = "Segment" STR_PATTERN = "Pattern" STR_PERIODIC_PATTERN = "PeriodicPattern" -# Default column names for scales and categories -COL_ENTRY = "entry" # ACC, protein entry, uniprot id -COL_NAME = "name" # Entry name, Protein name, Uniprot Name -COL_SCALE_ID = "scale_id" -COLS_PARTS = ["jmd_n", "tmd", "jmd_c"] -COL_SEQ = "sequence" -COL_TMD_START = "tmd_start" -COL_TMD_STOP = "tmd_stop" -COLS_SEQ_INFO = [COL_SEQ, COL_TMD_START, COL_TMD_STOP] # TODO -COL_CAT = "category" -COL_SUBCAT = "subcategory" -COL_SCALE_NAME = "scale_name" -COL_SCALE_DES = "scale_description" - # DEFAULT Signs STR_AA_GAP = "-" -# Default column names for feature statistics -COL_FEATURE = "feature" -COL_ABS_AUC = "abs_auc" -COL_MEAN_DIF = "mean_dif" -COL_ABS_MEAN_DIF = "abs_mean_dif" -COL_STD_TEST = "std_test" -COL_STD_REF = "std_ref" - -COL_FEAT_IMPACT = "feat_impact" -COL_FEAT_IMPORTANCE = "feat_importance" - -DICT_COLOR = {"ASA/Volume": "tab:blue", - "Composition": "tab:orange", - "Conformation": "tab:green", - "Energy": "tab:red", - "Others": "tab:gray", - "Polarity": "gold", - "Shape": "tab:cyan", - "Structure-Activity": "tab:brown"} - -COLOR_SHAP_HIGHER = '#FF0D57' # (255, 13, 87) -COLOR_SHAP_LOWER = '#1E88E5' # (30, 136, 229) - # Default column names for cpp analysis LIST_ALL_PARTS = ["tmd", "tmd_e", "tmd_n", "tmd_c", "jmd_n", "jmd_c", "ext_c", "ext_n", "tmd_jmd", "jmd_n_tmd_n", "tmd_c_jmd_c", "ext_n_tmd_n", "tmd_c_ext_c"] @@ -84,22 +48,6 @@ def check_color(name=None, val=None, accept_none=False): raise ValueError(error) -def check_col_in_df(df=None, name_df=None, col=None, type_check=False): - """Check if column in DataFrame""" - list_col = list(df) - if type_check == "numerical": - list_col = [col for col, data_type in zip(list(df), df.dtypes) if data_type == float] - elif type_check == "categorical": - list_col = [col for col, data_type in zip(list(df), df.dtypes) if data_type == str] - else: - if type_check: - raise TypeError("'type_check' should be False, 'numerical', or 'categorical'") - else: - type_check = "any" - if not isinstance(col, str) or col not in list_col: - raise ValueError(f"'{col}' should be {type_check} column in '{name_df}': {list_col}") - - def check_y_categorical(df=None, y=None): """Check if y in df""" list_cat_columns = [col for col, data_type in zip(list(df), df.dtypes) @@ -126,9 +74,9 @@ def check_labels(labels=None, df=None, name_df=None): def check_ylim(df=None, ylim=None, val_col=None, retrieve_plot=False, scaling_factor=1.1): """""" if ylim is not None: - ut.check_tuple(name="ylim", val=ylim, n=2) - ut.check_float(name="ylim:min", val=ylim[0], just_float=False) - ut.check_float(name="ylim:max", val=ylim[1], just_float=False) + ut_check.check_tuple(name="ylim", val=ylim, n=2) + ut_check.check_float(name="ylim:min", val=ylim[0], just_float=False) + ut_check.check_float(name="ylim:max", val=ylim[1], just_float=False) max_val = round(max(df[val_col]), 3) max_y = ylim[1] if max_val >= max_y: @@ -143,31 +91,74 @@ def check_ylim(df=None, ylim=None, val_col=None, retrieve_plot=False, scaling_fa # Sequence check function +def _check_seq(seq, len_, name_seq, name_len, verbose): + """""" + if seq is None: + return len_ + else: + if type(seq) != str: + raise ValueError(f"'{name_seq}' should be string (type={type(seq)})") + if len_ is not None: + # Waring sequence length doesn't match the corresponding length parameter + if len(seq) != len_ and verbose: + warning_msg = f"The length of {seq} ({len(seq)}) does not match {name_len} ({len_})." + ut_o.print_red(f"Warning: {warning_msg}") + return len(seq) + + +def check_args_len(tmd_len=None, jmd_n_len=None, jmd_c_len=None, ext_len=None, + tmd_seq=None, jmd_n_seq=None, jmd_c_seq=None, verbose=False, + accept_tmd_none=False): + """Check length parameters and if they are matching with sequences if provided""" + # Check lengths + tmd_seq_given = tmd_seq is not None or accept_tmd_none # If tmd_seq is given, tmd_len can be None + ut_check.check_non_negative_number(name="tmd_len", val=tmd_len, accept_none=tmd_seq_given, min_val=1) + ut_check.check_non_negative_number(name="jmd_n_len", val=jmd_n_len, accept_none=True, min_val=1) + ut_check.check_non_negative_number(name="jmd_c_len", val=jmd_c_len, accept_none=True, min_val=1) + ut_check.check_non_negative_number(name="ext_len", val=ext_len, accept_none=True) + # Check if lengths are matching + if ext_len is not None: + if jmd_n_len is None: + raise ValueError(f"'jmd_n_len' should not be None if 'ext_len' ({ext_len}) is given") + if jmd_c_len is None: + raise ValueError(f"'jmd_c_len' should not be None if 'ext_len' ({ext_len}) is given") + if jmd_n_len is not None and ext_len > jmd_n_len: + raise ValueError(f"'ext_len' ({ext_len}) must be <= jmd_n_len ({jmd_n_len})") + if jmd_c_len is not None and ext_len > jmd_c_len: + raise ValueError(f"'ext_len' ({ext_len}) must be <= jmd_c_len ({jmd_c_len})") + # Check if lengths and sequences match + tmd_len = _check_seq(tmd_seq, tmd_len, "tmd_seq", "tmd_len", verbose) + jmd_n_len = _check_seq(jmd_n_seq, jmd_n_len, "jmd_n_seq", "jmd_n_len", verbose) + jmd_c_len = _check_seq(jmd_c_seq, jmd_c_len, "jmd_c_seq", "jmd_c_len", verbose) + args_len = dict(tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len) + return args_len + +# TODO def check_df_seq(df_seq=None, jmd_n_len=None, jmd_c_len=None): """Get features from df""" # TODO check if df_seq is None or not isinstance(df_seq, pd.DataFrame): raise ValueError("Type of 'df_seq' ({}) must be pd.DataFrame".format(type(df_seq))) - if COL_ENTRY not in list(df_seq): - raise ValueError("'{}' must be in 'df_seq'".format(COL_ENTRY)) - seq_info_in_df = set(COLS_SEQ_INFO).issubset(set(df_seq)) - parts_in_df = set(COLS_PARTS).issubset(set(df_seq)) - seq_in_df = COL_SEQ in set(df_seq) + if ut_c.COL_ENTRY not in list(df_seq): + raise ValueError("'{}' must be in 'df_seq'".format(ut_c.COL_ENTRY)) + seq_info_in_df = set(ut_c.COLS_SEQ_INFO).issubset(set(df_seq)) + parts_in_df = set(ut_c.COLS_PARTS).issubset(set(df_seq)) + seq_in_df = ut_c.COL_SEQ in set(df_seq) if "start" in list(df_seq): - raise ValueError(f"'df_seq' should not contain 'start' in columns. Change column to '{COL_TMD_START}'.") + raise ValueError(f"'df_seq' should not contain 'start' in columns. Change column to '{ut_c.COL_TMD_START}'.") if "stop" in list(df_seq): - raise ValueError(f"'df_seq' should not contain 'stop' in columns. Change column to '{COL_TMD_STOP}'.") + raise ValueError(f"'df_seq' should not contain 'stop' in columns. Change column to '{ut_c.COL_TMD_STOP}'.") if not (seq_info_in_df or parts_in_df or seq_in_df): - raise ValueError(f"'df_seq' should contain ['{COL_SEQ}'], {COLS_SEQ_INFO}, or {COLS_PARTS}") + raise ValueError(f"'df_seq' should contain ['{ut_c.COL_SEQ}'], {ut_c.COLS_SEQ_INFO}, or {ut_c.COLS_PARTS}") # Check data type in part or sequence columns else: if seq_info_in_df or seq_in_df: - error = f"Sequence column ('{COL_SEQ}') should only contain strings" - dict_wrong_seq = {COL_SEQ: [x for x in df_seq[COL_SEQ].values if type(x) != str]} + error = f"Sequence column ('{ut_c.COL_SEQ}') should only contain strings" + dict_wrong_seq = {ut_c.COL_SEQ: [x for x in df_seq[ut_c.COL_SEQ].values if type(x) != str]} else: - cols = COLS_PARTS + cols = ut_c.COLS_PARTS error = f"Part columns ('{cols}') should only contain strings" - dict_wrong_seq = {part: [x for x in df_seq[part].values if type(x) != str] for part in COLS_PARTS} + dict_wrong_seq = {part: [x for x in df_seq[part].values if type(x) != str] for part in ut_c.COLS_PARTS} # Filter empty lists dict_wrong_seq = {part: dict_wrong_seq[part] for part in dict_wrong_seq if len(dict_wrong_seq[part]) > 0} n_wrong_entries = sum([len(dict_wrong_seq[part]) for part in dict_wrong_seq]) @@ -177,33 +168,33 @@ def check_df_seq(df_seq=None, jmd_n_len=None, jmd_c_len=None): # Check if only sequence given -> Convert sequence to tmd if seq_in_df and not parts_in_df: if seq_info_in_df: - for entry, start, stop in zip(df_seq[COL_ENTRY], df_seq[COL_TMD_START], df_seq[COL_TMD_STOP]): - ut.check_non_negative_number(name=f"tmd_start [{entry}]", val=start) - ut.check_non_negative_number(name=f"tmd_start [{entry}]", val=stop,) - tmd_start = [int(x) for x in df_seq[COL_TMD_START]] - tmd_stop = [int(x) for x in df_seq[COL_TMD_STOP]] + for entry, start, stop in zip(df_seq[ut_c.COL_ENTRY], df_seq[ut_c.COL_TMD_START], df_seq[ut_c.COL_TMD_STOP]): + ut_check.check_non_negative_number(name=f"tmd_start [{entry}]", val=start) + ut_check.check_non_negative_number(name=f"tmd_start [{entry}]", val=stop,) + tmd_start = [int(x) for x in df_seq[ut_c.COL_TMD_START]] + tmd_stop = [int(x) for x in df_seq[ut_c.COL_TMD_STOP]] else: tmd_start = 1 if jmd_n_len is None else 1 + jmd_n_len - tmd_stop = [len(x)-1 for x in df_seq[COL_SEQ]] + tmd_stop = [len(x)-1 for x in df_seq[ut_c.COL_SEQ]] if jmd_c_len is not None: tmd_stop = [x - jmd_c_len for x in tmd_stop] - df_seq[COL_TMD_START] = tmd_start - df_seq[COL_TMD_STOP] = tmd_stop - seq_info_in_df = set(COLS_SEQ_INFO).issubset(set(df_seq)) + df_seq[ut_c.COL_TMD_START] = tmd_start + df_seq[ut_c.COL_TMD_STOP] = tmd_stop + seq_info_in_df = set(ut_c.COLS_SEQ_INFO).issubset(set(df_seq)) # Check parameter combinations if [jmd_n_len, jmd_c_len].count(None) == 1: raise ValueError("'jmd_n_len' and 'jmd_c_len' should both be given (not None) or None") if not parts_in_df and seq_info_in_df and jmd_n_len is None and jmd_c_len is None: error = f"'jmd_n_len' and 'jmd_c_len' should not be None if " \ - f"sequence information ({COLS_SEQ_INFO}) are given." + f"sequence information ({ut_c.COLS_SEQ_INFO}) are given." raise ValueError(error) if not seq_info_in_df and jmd_n_len is not None and jmd_c_len is not None: - error = f"If not all sequence information ({COLS_SEQ_INFO}) are given," \ + error = f"If not all sequence information ({ut_c.COLS_SEQ_INFO}) are given," \ f"'jmd_n_len' and 'jmd_c_len' should be None." raise ValueError(error) if not parts_in_df and seq_info_in_df and (jmd_c_len is None or jmd_n_len is None): error = "If part columns ({}) are not in 'df_seq' but sequence information ({}), " \ - "\n'jmd_n_len' and 'jmd_c_len' should be given (not None).".format(COLS_PARTS, COLS_SEQ_INFO) + "\n'jmd_n_len' and 'jmd_c_len' should be given (not None).".format(ut_c.COLS_PARTS, ut_c.COLS_SEQ_INFO) raise ValueError(error) return df_seq @@ -211,7 +202,7 @@ def check_df_seq(df_seq=None, jmd_n_len=None, jmd_c_len=None): # Scale check functions def check_df_scales(df_scales=None, df_parts=None, accept_none=False, accept_gaps=False): """Check if df_scales is a valid input and matching to df_parts""" - ut.check_bool(name="accept_gaps", val=accept_gaps) + ut_check.check_bool(name="accept_gaps", val=accept_gaps) if accept_none and df_scales is None: return # Skip check if not isinstance(df_scales, pd.DataFrame): @@ -262,17 +253,17 @@ def check_df_cat(df_cat=None, df_scales=None, accept_none=True, verbose=True): if not isinstance(df_cat, pd.DataFrame): raise ValueError("'df_cat' should be type pd.DataFrame (not {})".format(type(df_cat))) # Check columns - for col in [COL_SCALE_ID, COL_CAT, COL_SUBCAT]: + for col in [ut_c.COL_SCALE_ID, ut_c.COL_CAT, ut_c.COL_SUBCAT]: if col not in df_cat: raise ValueError(f"'{col}' not in 'df_cat'") # Check scales from df_cat and df_scales do match if df_scales is not None: - scales_cat = list(df_cat[COL_SCALE_ID]) + scales_cat = list(df_cat[ut_c.COL_SCALE_ID]) scales = list(df_scales) overlap_scales = [x for x in scales if x in scales_cat] difference_scales = list(set(scales).difference(set(scales_cat))) # Adjust df_cat and df_scales - df_cat = df_cat[df_cat[COL_SCALE_ID].isin(overlap_scales)] + df_cat = df_cat[df_cat[ut_c.COL_SCALE_ID].isin(overlap_scales)] df_scales = df_scales[overlap_scales] if verbose and len(difference_scales) > 0: str_warning = f"Scales from 'df_scales' and 'df_cat' do not overlap completely." @@ -403,7 +394,7 @@ def check_split(split=None): i_th, n_split = [int(x) for x in split.split("(")[1].replace(")", "").split(",")] # Check if values non-negative integers for name, val in zip(["i_th", "n_split"], [i_th, n_split]): - ut.check_non_negative_number(name=name, val=val) + ut_check.check_non_negative_number(name=name, val=val) # Check if i-th and n_split are valid if i_th > n_split: raise ValueError @@ -416,7 +407,7 @@ def check_split(split=None): start = int(start[0]) # Check if values non-negative integers for name, val in zip(["start", "step1", "step2"], [start, step1, step2]): - ut.check_non_negative_number(name=name, val=val) + ut_check.check_non_negative_number(name=name, val=val) # Check if terminus valid terminus = split.split("i+")[0].split("(")[1].replace(",", "") if terminus not in ["N", "C"]: @@ -431,7 +422,7 @@ def check_split(split=None): list_pos = [int(x) for x in list_pos] for val in list_pos: name = "pos" + str(val) - ut.check_non_negative_number(name=name, val=val) + ut_check.check_non_negative_number(name=name, val=val) # Check if terminus valid if terminus not in ["N", "C"]: raise ValueError @@ -510,16 +501,16 @@ def check_df_feat(df_feat=None, df_cat=None): if len(df_feat) == 0 or len(list(df_feat)) == 0: raise ValueError("'df_feat' should be not empty") # Check if feature column in df_feat - if COL_FEATURE not in df_feat: - raise ValueError(f"'{COL_FEATURE}' must be column in 'df_feat'") - list_feat = list(df_feat[COL_FEATURE]) + if ut_c.COL_FEATURE not in df_feat: + raise ValueError(f"'{ut_c.COL_FEATURE}' must be column in 'df_feat'") + list_feat = list(df_feat[ut_c.COL_FEATURE]) for feat in list_feat: if feat.count("-") != 2: raise ValueError(f"'{feat}' is no valid feature") # Check if df_feat matches df_cat if df_cat is not None: scales = set([x.split("-")[2] for x in list_feat]) - list_scales = list(df_cat[COL_SCALE_ID]) + list_scales = list(df_cat[ut_c.COL_SCALE_ID]) missing_scales = [x for x in scales if x not in list_scales] if len(missing_scales) > 0: raise ValueError(f"Following scales occur in 'df_feat' but not in 'df_cat': {missing_scales}") @@ -550,21 +541,3 @@ def get_mean_excepting_nan(x): return vf_scale -# Progress bar -def print_start_progress(): - """Print start progress""" - progress_bar = " " * 25 - ut.print_red(f"\r |{progress_bar}| 0.00%", end="") - - -def print_progress(i=0, n=0): - """Print progress""" - progress = min(np.round(i/n * 100, 2), 100) - progress_bar = "#" * int(progress/4) + " " * (25-int(progress/4)) - ut.print_red(f"\r |{progress_bar}| {progress:.2f}%", end="") - - -def print_finished_progress(): - """Print finished progress bar""" - progress_bar = "#" * 25 - ut.print_red(f"\r |{progress_bar}| 100.00%") diff --git a/aaanalysis/_utils/utils_dpulearn.py b/aaanalysis/_utils/utils_dpulearn.py new file mode 100644 index 00000000..2463d63c --- /dev/null +++ b/aaanalysis/_utils/utils_dpulearn.py @@ -0,0 +1,31 @@ +""" +This is a script for ... +""" +import time +import pandas as pd +import numpy as np + + +# Settings +pd.set_option('expand_frame_repr', False) # Single line print for pd.Dataframe + + +# I Helper Functions + + +# II Main Functions + + +# III Test/Caller Functions + + +# IV Main +def main(): + t0 = time.time() + + t1 = time.time() + print("Time:", t1 - t0) + + +if __name__ == "__main__": + main() diff --git a/aaanalysis/aaclust/__pycache__/_utils.cpython-38.pyc b/aaanalysis/aaclust/__pycache__/_utils.cpython-38.pyc deleted file mode 100644 index cba0906c..00000000 Binary files a/aaanalysis/aaclust/__pycache__/_utils.cpython-38.pyc and /dev/null differ diff --git a/aaanalysis/aaclust/__pycache__/_utils.cpython-39.pyc b/aaanalysis/aaclust/__pycache__/_utils.cpython-39.pyc deleted file mode 100644 index 7d858027..00000000 Binary files a/aaanalysis/aaclust/__pycache__/_utils.cpython-39.pyc and /dev/null differ diff --git a/aaanalysis/aaclust/__pycache__/aaclust.cpython-39.pyc b/aaanalysis/aaclust/__pycache__/aaclust.cpython-39.pyc index 7903989e..83f1ddbc 100644 Binary files a/aaanalysis/aaclust/__pycache__/aaclust.cpython-39.pyc and b/aaanalysis/aaclust/__pycache__/aaclust.cpython-39.pyc differ diff --git a/aaanalysis/aaclust/aaclust.py b/aaanalysis/aaclust/aaclust.py index 7469604d..a0fd6f6c 100644 --- a/aaanalysis/aaclust/aaclust.py +++ b/aaanalysis/aaclust/aaclust.py @@ -7,8 +7,7 @@ from sklearn.metrics.pairwise import pairwise_distances from sklearn.cluster import KMeans -import aaanalysis.aaclust._utils as _ut -import aaanalysis._utils as ut +import aaanalysis.utils as ut # I Helper Functions @@ -200,7 +199,7 @@ def _get_min_cor_cluster(X, labels=None, label_cluster=None, on_center=True): def _get_quality_measure(X, metric=None, labels=None, label_cluster=None, on_center=True): """Get quality measure single cluster given by feature matrix X, labels, and label of cluster""" mask = [l == label_cluster for l in labels] - if metric == _ut.METRIC_CORRELATION: + if metric == ut.METRIC_CORRELATION: return get_min_cor(X[mask], on_center=on_center) else: return get_max_dist(X[mask], on_center=on_center, metric=metric) @@ -209,7 +208,7 @@ def _get_quality_measure(X, metric=None, labels=None, label_cluster=None, on_cen def _get_best_cluster(dict_clust_qm=None, metric=None): """Get cluster with best quality measure: either highest minimum Pearson correlation or lowest distance measure""" - if metric == _ut.METRIC_CORRELATION: + if metric == ut.METRIC_CORRELATION: return max(dict_clust_qm, key=dict_clust_qm.get) else: return min(dict_clust_qm, key=dict_clust_qm.get) @@ -348,7 +347,7 @@ def __init__(self, model=None, model_kwargs=None, verbose=False): self.model = model if model_kwargs is None: model_kwargs = dict() - model_kwargs = _ut.check_model(model=self.model, model_kwargs=model_kwargs) + model_kwargs = ut.check_model(model=self.model, model_kwargs=model_kwargs) self._model_kwargs = model_kwargs # AAclust clustering settings self._verbose = verbose @@ -364,9 +363,9 @@ def __init__(self, model=None, model_kwargs=None, verbose=False): # Clustering method def fit(self, X, names=None, on_center=True, min_th=0, merge_metric="euclidean", n_clusters=None): """ - Fit the AAclust model on the data, optimizing cluster formation using Pearson correlation. + Fit the AAclust model on the _data, optimizing cluster formation using Pearson correlation. - AAclust determines the optimal number of clusters, k, without pre-specification. It partitions data (X) into + AAclust determines the optimal number of clusters, k, without pre-specification. It partitions data(X) into clusters by maximizing the within-cluster Pearson correlation beyond the 'min_th' threshold. The quality of clustering is either based on the minimum Pearson correlation of all members ('min_cor all') or between the cluster center and its members ('min_cor center'), governed by `on_center`. @@ -408,8 +407,8 @@ def fit(self, X, names=None, on_center=True, min_th=0, merge_metric="euclidean" For further information, refer to the AAclust paper : TODO: add link to AAclust paper """ # Check input - _ut.check_min_th(min_th=min_th) - merge_metric = _ut.check_merge_metric(merge_metric=merge_metric) + ut.check_min_th(min_th=min_th) + merge_metric = ut.check_merge_metric(merge_metric=merge_metric) X, names = ut.check_feat_matrix(X=X, names=names) args = dict(model=self.model, model_kwargs=self._model_kwargs, min_th=min_th, on_center=on_center) # Clustering using given clustering models @@ -541,7 +540,7 @@ def get_cluster_medoids(X, labels=None): medoid_labels : array-like The labels corresponding to each medoid. medoid_ind : array-like - Indexes of medoids within the original data. + Indexes of medoids within the original _data. """ medoids, medoid_labels, medoid_ind = get_cluster_medoids(X, labels=labels) return medoids, medoid_labels, medoid_ind @@ -559,9 +558,9 @@ def correlation(X_test, X_ref, labels_test=None, labels_ref=None, n=3, positive= X_ref : array-like Reference feature matrix. labels_test : list or array-like, optional - Cluster labels for the test data. + Cluster labels for the test _data. labels_ref : list or array-like, optional - Cluster labels for the reference data. + Cluster labels for the reference _data. n : int, default = 3 Number of top centers to consider based on correlation strength. positive : bool, default = True @@ -585,7 +584,7 @@ def correlation(X_test, X_ref, labels_test=None, labels_ref=None, n=3, positive= names_ref = [x for x in list(dict.fromkeys(labels_ref)) if "unclassified" not in x.lower()] masks_ref = [[True if i == label else False for i in labels_ref] for label in names_ref] if on_center: - # Get centers for all clusters in reference data + # Get centers for all clusters in reference _data centers = np.concatenate([cluster_center(X_ref[mask]) for mask in masks_ref], axis=0) # Compute correlation of test data with centers Xtest_centers = np.concatenate([X_test, centers], axis=0) @@ -594,7 +593,7 @@ def correlation(X_test, X_ref, labels_test=None, labels_ref=None, n=3, positive= else: masks_test = [[True if i == j else False for j in range(0, len(labels_test))] for i, _ in enumerate(labels_test)] - # Compute minimum correlation of test data with each group of reference data + # Compute minimum correlation of test data with each group of reference _data X_corr = np.array([[_min_cor_all(np.concatenate([X_test[mask_test], X_ref[mask_ref]], axis=0)) for mask_ref in masks_ref ] for mask_test in masks_test]) # Get index for n centers with highest/lowest correlation for each scale @@ -610,3 +609,7 @@ def correlation(X_test, X_ref, labels_test=None, labels_ref=None, n=3, positive= str_corr = ";".join([f"{name} ({round(corr, 3)})" for name, corr in zip(top_names, top_corr)]) list_top_center_name_corr.append(str_corr) return list_top_center_name_corr + + def eval(self): + """""" + # TODO add evaluation function diff --git a/aaanalysis/aaclust/aaclust_plot.py b/aaanalysis/aaclust/aaclust_plot.py new file mode 100644 index 00000000..2463d63c --- /dev/null +++ b/aaanalysis/aaclust/aaclust_plot.py @@ -0,0 +1,31 @@ +""" +This is a script for ... +""" +import time +import pandas as pd +import numpy as np + + +# Settings +pd.set_option('expand_frame_repr', False) # Single line print for pd.Dataframe + + +# I Helper Functions + + +# II Main Functions + + +# III Test/Caller Functions + + +# IV Main +def main(): + t0 = time.time() + + t1 = time.time() + print("Time:", t1 - t0) + + +if __name__ == "__main__": + main() diff --git a/aaanalysis/cpp/__init__.py b/aaanalysis/cpp/__init__.py index 76041ecd..e822755b 100644 --- a/aaanalysis/cpp/__init__.py +++ b/aaanalysis/cpp/__init__.py @@ -1,4 +1,5 @@ from aaanalysis.cpp.cpp import CPP +from aaanalysis.cpp.cpp_plot import CPPPlot from aaanalysis.cpp.feature import SequenceFeature, SplitRange -__all__ = ["CPP", "SequenceFeature", "SplitRange"] +__all__ = ["CPP", "CPPPlot", "SequenceFeature", "SplitRange"] diff --git a/aaanalysis/cpp/__pycache__/__init__.cpython-39.pyc b/aaanalysis/cpp/__pycache__/__init__.cpython-39.pyc index 1fd42c0b..fcc12ea7 100644 Binary files a/aaanalysis/cpp/__pycache__/__init__.cpython-39.pyc and b/aaanalysis/cpp/__pycache__/__init__.cpython-39.pyc differ diff --git a/aaanalysis/cpp/__pycache__/_cpp.cpython-39.pyc b/aaanalysis/cpp/__pycache__/_cpp.cpython-39.pyc index 7863de3b..da2bd937 100644 Binary files a/aaanalysis/cpp/__pycache__/_cpp.cpython-39.pyc and b/aaanalysis/cpp/__pycache__/_cpp.cpython-39.pyc differ diff --git a/aaanalysis/cpp/__pycache__/_feature_pos.cpython-39.pyc b/aaanalysis/cpp/__pycache__/_feature_pos.cpython-39.pyc index 794f5942..58aee4c7 100644 Binary files a/aaanalysis/cpp/__pycache__/_feature_pos.cpython-39.pyc and b/aaanalysis/cpp/__pycache__/_feature_pos.cpython-39.pyc differ diff --git a/aaanalysis/cpp/__pycache__/_feature_stat.cpython-39.pyc b/aaanalysis/cpp/__pycache__/_feature_stat.cpython-39.pyc index b92b132e..6306a853 100644 Binary files a/aaanalysis/cpp/__pycache__/_feature_stat.cpython-39.pyc and b/aaanalysis/cpp/__pycache__/_feature_stat.cpython-39.pyc differ diff --git a/aaanalysis/cpp/__pycache__/_part.cpython-39.pyc b/aaanalysis/cpp/__pycache__/_part.cpython-39.pyc index a214b858..22ce9d9a 100644 Binary files a/aaanalysis/cpp/__pycache__/_part.cpython-39.pyc and b/aaanalysis/cpp/__pycache__/_part.cpython-39.pyc differ diff --git a/aaanalysis/cpp/__pycache__/_split.cpython-39.pyc b/aaanalysis/cpp/__pycache__/_split.cpython-39.pyc index 4f292692..486ad10b 100644 Binary files a/aaanalysis/cpp/__pycache__/_split.cpython-39.pyc and b/aaanalysis/cpp/__pycache__/_split.cpython-39.pyc differ diff --git a/aaanalysis/cpp/__pycache__/_utils.cpython-38.pyc b/aaanalysis/cpp/__pycache__/_utils.cpython-38.pyc deleted file mode 100644 index 1c6fac5a..00000000 Binary files a/aaanalysis/cpp/__pycache__/_utils.cpython-38.pyc and /dev/null differ diff --git a/aaanalysis/cpp/__pycache__/_utils.cpython-39.pyc b/aaanalysis/cpp/__pycache__/_utils.cpython-39.pyc deleted file mode 100644 index 3da46d18..00000000 Binary files a/aaanalysis/cpp/__pycache__/_utils.cpython-39.pyc and /dev/null differ diff --git a/aaanalysis/cpp/__pycache__/cpp.cpython-39.pyc b/aaanalysis/cpp/__pycache__/cpp.cpython-39.pyc index 79da1727..74104f2a 100644 Binary files a/aaanalysis/cpp/__pycache__/cpp.cpython-39.pyc and b/aaanalysis/cpp/__pycache__/cpp.cpython-39.pyc differ diff --git a/aaanalysis/cpp/__pycache__/cpp_plot.cpython-39.pyc b/aaanalysis/cpp/__pycache__/cpp_plot.cpython-39.pyc new file mode 100644 index 00000000..81093f90 Binary files /dev/null and b/aaanalysis/cpp/__pycache__/cpp_plot.cpython-39.pyc differ diff --git a/aaanalysis/cpp/__pycache__/feature.cpython-39.pyc b/aaanalysis/cpp/__pycache__/feature.cpython-39.pyc index 0acdd86f..d0835d7d 100644 Binary files a/aaanalysis/cpp/__pycache__/feature.cpython-39.pyc and b/aaanalysis/cpp/__pycache__/feature.cpython-39.pyc differ diff --git a/aaanalysis/cpp/_cpp.py b/aaanalysis/cpp/_cpp.py index 2b9c3060..f3879073 100644 --- a/aaanalysis/cpp/_cpp.py +++ b/aaanalysis/cpp/_cpp.py @@ -8,7 +8,7 @@ import seaborn as sns import numpy as np -import aaanalysis.cpp._utils as ut +import aaanalysis.utils as ut # I Helper Functions @@ -358,22 +358,22 @@ def draw_shap_legend(x=None, y=10, offset_text=1, fontsize=13): arrow_dif = y * 0.02 plt.text(x - offset_text, y, 'higher', fontweight='bold', - fontsize=fontsize, color=ut.COLOR_SHAP_HIGHER, + fontsize=fontsize, color=ut.COLOR_SHAP_POS, horizontalalignment='right') - plt.text(x + offset_text*1.1, y, 'lower', + plt.text(x + offset_text * 1.1, y, 'lower', fontweight='bold', - fontsize=fontsize, color=ut.COLOR_SHAP_LOWER, + fontsize=fontsize, color=ut.COLOR_SHAP_NEG, horizontalalignment='left') - plt.text(x, y-arrow_dif, r'$\leftarrow$', + plt.text(x, y - arrow_dif, r'$\leftarrow$', fontweight='bold', - fontsize=fontsize+1, color=ut.COLOR_SHAP_LOWER, + fontsize=fontsize+1, color=ut.COLOR_SHAP_NEG, horizontalalignment='center') - plt.text(x, y+arrow_dif, r'$\rightarrow$', + plt.text(x, y + arrow_dif, r'$\rightarrow$', fontweight='bold', - fontsize=fontsize+1, color=ut.COLOR_SHAP_HIGHER, + fontsize=fontsize+1, color=ut.COLOR_SHAP_POS, horizontalalignment='center') @@ -395,8 +395,6 @@ def __init__(self, tmd_len=20, jmd_n_len=10, jmd_c_len=10, start=1): # Constants XLIM_ADD = 3 YLIM_ADD = 1 - HIGHER_COLOR = '#FF0D57' # (255, 13, 87) - LOWER_COLOR = '#1E88E5' # (30, 136, 229) # Helper methods def _get_starts(self, x_shift=0): @@ -632,8 +630,8 @@ def profile(self, df_pos=None, ax=None, dict_color=None, edge_color="none", bar_ df_pos = df_pos.sum(axis=1) df_neg = df_bar[df_bar < 0] df_neg = df_neg.sum(axis=1) - ax = df_pos.plot(ax=ax, color=ut.COLOR_SHAP_HIGHER, **plot_args) - ax = df_neg.plot(ax=ax, color=ut.COLOR_SHAP_LOWER, **plot_args) + ax = df_pos.plot(ax=ax, color=ut.COLOR_SHAP_POS, **plot_args) + ax = df_neg.plot(ax=ax, color=ut.COLOR_SHAP_NEG, **plot_args) ylim = ut.check_ylim(df=df, val_col="y", ylim=ylim, retrieve_plot=True) plt.ylim(ylim) if add_legend: @@ -687,8 +685,8 @@ def heatmap(self, df_pos=None, ax=None, cmap=None, cmap_n_colors=None, cbar_kws= """Show summary static values of feature categories/sub_categories per position as heat map""" facecolor = "black" if facecolor_dark else "white" # Default arguments for heatmap - cmap = get_cmap_heatmap(df_pos=df_pos, cmap=cmap, n_colors=cmap_n_colors, higher_color=ut.COLOR_SHAP_HIGHER, - lower_color=ut.COLOR_SHAP_LOWER, facecolor_dark=facecolor_dark) + cmap = get_cmap_heatmap(df_pos=df_pos, cmap=cmap, n_colors=cmap_n_colors, higher_color=ut.COLOR_SHAP_POS, + lower_color=ut.COLOR_SHAP_NEG, facecolor_dark=facecolor_dark) center = get_center_heatmap(df_pos=df_pos) dict_cbar, cbar_kws_ = get_cbar_args_heatmap(cbar_kws=cbar_kws, df_pos=df_pos) linewidths = 0.01 if grid_on else 0 diff --git a/aaanalysis/cpp/_feature_pos.py b/aaanalysis/cpp/_feature_pos.py index 68cd5546..ba4e70ae 100644 --- a/aaanalysis/cpp/_feature_pos.py +++ b/aaanalysis/cpp/_feature_pos.py @@ -5,7 +5,7 @@ from aaanalysis.cpp._part import Parts from aaanalysis.cpp._split import Split -import aaanalysis.cpp._utils as ut +import aaanalysis.utils as ut # I Helper Functions @@ -19,15 +19,6 @@ def check_dict_part_pos(dict_part_pos=None): raise ValueError(error) -def check_part_args_non_negative_int(tmd_len=20, jmd_n_len=10, jmd_c_len=10, ext_len=0, start=1): - """Check if args non-negative integers""" - ut.check_non_negative_number(name="start", val=start) - args = zip(["tmd_len", "jmd_n_len", "jmd_c_len", "ext_len"], - [tmd_len, jmd_n_len, jmd_c_len, ext_len]) - for name, val in args: - ut.check_non_negative_number(name=name, val=val, min_val=0) - - # II Main Functions class SequenceFeaturePositions: """Class for getting sequence positions for features""" @@ -48,8 +39,8 @@ def get_dict_part_pos(tmd_len=20, jmd_n_len=10, jmd_c_len=10, ext_len=0, start=1 ------- dict_part_pos: dictionary with parts to positions of parts """ - check_part_args_non_negative_int(tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, - ext_len=ext_len, start=start) + ut.check_non_negative_number(name="start", val=start, min_val=1) + ut.check_args_len(tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, ext_len=ext_len) pa = Parts() jmd_n = list(range(0, jmd_n_len)) tmd = list(range(jmd_n_len, tmd_len+jmd_n_len)) diff --git a/aaanalysis/cpp/_feature_stat.py b/aaanalysis/cpp/_feature_stat.py index a1dad9ee..7d739f9e 100644 --- a/aaanalysis/cpp/_feature_stat.py +++ b/aaanalysis/cpp/_feature_stat.py @@ -12,7 +12,7 @@ import warnings from aaanalysis.cpp._split import SplitRange -import aaanalysis.cpp._utils as ut +import aaanalysis.utils as ut # I Helper Functions diff --git a/aaanalysis/cpp/_part.py b/aaanalysis/cpp/_part.py index b2a4496d..6e091bd4 100644 --- a/aaanalysis/cpp/_part.py +++ b/aaanalysis/cpp/_part.py @@ -1,7 +1,7 @@ """ Script for Part objects used to retrieve sequence parts for given sequences. """ -import aaanalysis.cpp._utils as ut +import aaanalysis.utils as ut # I Helper Functions diff --git a/aaanalysis/cpp/_split.py b/aaanalysis/cpp/_split.py index 7c305fce..7820eb15 100644 --- a/aaanalysis/cpp/_split.py +++ b/aaanalysis/cpp/_split.py @@ -4,7 +4,7 @@ import numpy as np import itertools -import aaanalysis.cpp._utils as ut +import aaanalysis.utils as ut # I Helper Functions diff --git a/aaanalysis/cpp/cpp.py b/aaanalysis/cpp/cpp.py index 4db58c01..21c95196 100644 --- a/aaanalysis/cpp/cpp.py +++ b/aaanalysis/cpp/cpp.py @@ -2,231 +2,15 @@ This is a script for ... """ import pandas as pd -import numpy as np -import matplotlib.pyplot as plt -import seaborn as sns -import inspect -import warnings -import aaanalysis.cpp._utils as _ut from aaanalysis.cpp.feature import SequenceFeature from aaanalysis.cpp._feature_stat import SequenceFeatureStatistics -from aaanalysis.cpp._feature_pos import SequenceFeaturePositions -from aaanalysis.cpp._cpp import CPPPlots, get_optimal_fontsize -import aaanalysis as aa -import aaanalysis._utils as ut - -# I Helper Functions -# TODO separate interface from backend -# TODO simplify interface (delete old profile) -# TODO delete SHAP -# TODO add importance plot for heatmap -# TODO add ranking - - -# Check CPP parameters -def check_len_ext_and_jmd(jmd_n_len=None, jmd_c_len=None, ext_len=None): - """Check if lengths are matching""" - ut.check_non_negative_number(name="jmd_n_len", val=jmd_n_len) - ut.check_non_negative_number(name="jmd_c_len", val=jmd_c_len) - ut.check_non_negative_number(name="ext_len", val=ext_len) - if ext_len > jmd_n_len: - raise ValueError(f"'ext_len' ({ext_len}) must be <= jmd_n_len ({jmd_n_len})") - if ext_len > jmd_c_len: - raise ValueError(f"'ext_len' ({ext_len}) must be <= jmd_c_len ({jmd_c_len})") - - -# Check for add methods -def check_shap_value_for_feat_impact(df_feat=None, col_shap=None): - """Check if SHAP value column in df""" - if col_shap not in df_feat: - raise ValueError(f"'{col_shap}' must be column in 'df_feat' to compute feature impact") - wrong_types = [x for x in list(df_feat[col_shap]) if type(x) not in [float, int]] - if len(wrong_types) > 0: - error = f"Values in '{col_shap}' should be type float or int\n" \ - f" but following values do not match: {wrong_types}" - raise ValueError(error) - - -def check_feat_impact_in_df_feat(df_feat=None, name_feat_impact=None): - """Check if name for feature impact column already""" - if name_feat_impact in df_feat: - error = f"'name_feat_impact' ('{name_feat_impact}') already in 'df_feat' columns: {list(df_feat)}" - raise ValueError(error) - - -def check_ref_group(ref_group=0, labels=None): - """Check if ref group class lable""" - if ref_group not in labels: - raise ValueError(f"'ref_group' ({ref_group}) not class label: {set(labels)}.") - - -def check_sample_in_df_seq(sample_name=None, df_seq=None): - """Check if sample name in df_seq""" - list_names = list(df_seq[_ut.COL_NAME]) - if sample_name not in list_names: - error = f"'sample_name' ('{sample_name}') not in '{_ut.COL_NAME}' of 'df_seq'." \ - f"\nValid names are: {list_names}" - raise ValueError(error) - - -# Check get df positions -def check_value_type(val_type=None, count_in=True): - """Check if value type is valid""" - list_value_type = ["mean", "sum", "std"] - if count_in: - list_value_type.append("count") - if val_type not in list_value_type: - raise ValueError(f"'val_type' ('{val_type}') should be on of following: {list_value_type}") - - -def check_normalize(normalize=True): - """Check normalize parameter""" - if not (type(normalize) == bool or normalize in ["positions", "positions_only"]): - raise ValueError(f"'normalize' ('{normalize}') should be bool or, if normalized for positions, 'positions'.") - normalize_for_positions = False if type(normalize) is bool else "positions" in normalize - normalize = normalize if type(normalize) is bool else "positions" == normalize - return normalize, normalize_for_positions - - -# Check for plotting methods -def check_args_len(tmd_seq=None, jmd_n_seq=None, jmd_c_seq=None, tmd_len=None, jmd_n_len=None, jmd_c_len=None): - """Check if parameters for sequence size and sequences match""" - count = 0 - for seq in [tmd_seq, jmd_c_seq, jmd_n_seq]: - if type(seq) == str: - count += 1 - if count == 3: - if len(jmd_n_seq) != jmd_n_len: - error = f"'jmd_n_seq' ('{jmd_n_seq}', len={len(jmd_n_seq)}) does not match CPP setting: ({jmd_n_len})." - raise ValueError(error) - if len(jmd_c_seq) != jmd_c_len: - error = f"'jmd_c_seq' ('{jmd_c_seq}', len={len(jmd_c_seq)}) does not match CPP setting: ({jmd_c_len})." - raise ValueError(error) - tmd_len, jmd_n_len, jmd_c_len = len(tmd_seq), len(jmd_n_seq), len(jmd_c_seq) - elif count != 0: - raise ValueError("'jmd_n_seq' 'tmd_seq', and 'jmd_c_seq' must all be None or sequence (type string)") - args_len = dict(tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len) - return args_len - - -def check_args_size(seq_size=None, tmd_jmd_fontsize=None): - """Check if sequence size parameters match""" - ut.check_non_negative_number(name="seq_size", val=seq_size, min_val=0, accept_none=True, just_int=False) - ut.check_non_negative_number(name="tmd_jmd_fontsize", val=tmd_jmd_fontsize, min_val=0, accept_none=True, just_int=False) - args_size = dict(seq_size=seq_size, tmd_jmd_fontsize=tmd_jmd_fontsize) - return args_size - - -def check_args_xtick(xtick_size=None, xtick_width=None, xtick_length=None): - """Check if x tick parameters non-negative float""" - args = dict(accept_none=True, just_int=False, min_val=0) - ut.check_non_negative_number(name="xtick_size", val=xtick_size, **args) - ut.check_non_negative_number(name="xtick_width", val=xtick_width, **args) - ut.check_non_negative_number(name="xtick_length", val=xtick_length, **args) - args_xtick = dict(xtick_size=xtick_size, xtick_width=xtick_width, xtick_length=xtick_length) - return args_xtick - - -def check_args_ytick(ytick_size=None, ytick_width=None, ytick_length=None): - """Check if y tick parameters non-negative float""" - args = dict(accept_none=True, just_int=False, min_val=1) - ut.check_non_negative_number(name="ytick_size", val=ytick_size, **args) - ut.check_non_negative_number(name="ytick_width", val=ytick_width, **args) - ut.check_non_negative_number(name="ytick_length", val=ytick_length, **args) - args_ytick = dict(ytick_size=ytick_size, ytick_width=ytick_width, ytick_length=ytick_length) - return args_ytick - - -def check_part_color(tmd_color=None, jmd_color=None): - """Check if part colors valid""" - _ut.check_color(name="tmd_color", val=tmd_color) - _ut.check_color(name="jmd_color", val=jmd_color) - args_part_color = dict(tmd_color=tmd_color, jmd_color=jmd_color) - return args_part_color - - -def check_seq_color(tmd_seq_color=None, jmd_seq_color=None): - """Check sequence colors""" - _ut.check_color(name="tmd_seq_color", val=tmd_seq_color) - _ut.check_color(name="jmd_seq_color", val=jmd_seq_color) - args_seq_color = dict(tmd_seq_color=tmd_seq_color, jmd_seq_color=jmd_seq_color) - return args_seq_color - - -def check_figsize(figsize=None): - """""" - _ut.check_tuple(name="figsize", val=figsize, n=2) - ut.check_non_negative_number(name="figsize:width", val=figsize[0], min_val=1, just_int=False) - ut.check_non_negative_number(name="figsize:height", val=figsize[1], min_val=1, just_int=False) - - -def check_dict_color(dict_color=None, df_cat=None): - """Check if color dictionary is matching to DataFrame with categories""" - list_cats = list(sorted(set(df_cat[_ut.COL_CAT]))) - if dict_color is None: - dict_color = _ut.DICT_COLOR - if not isinstance(dict_color, dict): - raise ValueError(f"'dict_color' should be a dictionary with colors for: {list_cats}") - list_cat_not_in_dict_cat = [x for x in list_cats if x not in dict_color] - if len(list_cat_not_in_dict_cat) > 0: - error = f"'dict_color' not complete! Following categories are missing from 'df_cat': {list_cat_not_in_dict_cat}" - raise ValueError(error) - for key in dict_color: - color = dict_color[key] - _ut.check_color(name=key, val=color) - return dict_color - - -def check_parameters(func=None, name_called_func=None, e=None): - """Check parameters string from error message of third party packages""" - list_arg_str = ["property ", "attribute ", "argument ", "parameter "] - str_error = "" - for arg_str in list_arg_str: - if arg_str in str(e): - error_arg = str(e).split(arg_str)[1] - str_error += "Error due to {} parameter. ".format(error_arg) - break - args = [x for x in inspect.getfullargspec(func).args if x != "self"] - str_error += "Arguments are allowed from {} and as follows: {}".format(name_called_func, args) - return str_error - - -# Check heatmap plotting -def check_vmin_vmax(vmin=None, vmax=None): - """Check if number of cmap colors is valid with given value range""" - ut.check_float(name="vmin", val=vmin, accept_none=True, just_float=False) - ut.check_float(name="vmax", val=vmax, accept_none=True, just_float=False) - if vmin is not None and vmax is not None and vmin >= vmax: - raise ValueError(f"'vmin' ({vmin}) < 'vmax' ({vmax}) not fulfilled.") - -# Check barplot and profile -def check_grid_axis(grid_axis=None): - """""" - list_valid = ["x", 'y', 'both'] - if grid_axis not in list_valid: - raise ValueError(f"'grid_axis' ('{grid_axis}') not valid. Chose from following: {list_valid}") +import aaanalysis as aa +import aaanalysis.utils as ut -# Check stat plot -def check_ylabel_fontweight(ylabel_fontweight=None, accept_none=True): - """""" - if accept_none and ylabel_fontweight is None: - return - name = "ylabel_fontweight" - args = dict(name=name, val=ylabel_fontweight) - list_weights = ['light', 'medium', 'bold'] - if type(ylabel_fontweight) in [float, int]: - ut.check_non_negative_number(**args, min_val=0, max_val=1000, just_int=False) - elif isinstance(ylabel_fontweight, str): - if ylabel_fontweight not in list_weights: - error = f"'{name}' ({ylabel_fontweight}) should be one of following: {list_weights}" - raise ValueError(error) - else: - error = f"'{name}' ({ylabel_fontweight}) should be either numeric value in range 0-1000" \ - f"\n\tor one of following: {list_weights}" - raise ValueError(error) +# I Helper Functions # Filtering functions @@ -234,60 +18,15 @@ def _filtering_info(df=None, df_scales=None, check_cat=True): """Get datasets structures for filtering, two dictionaries with feature to scales category resp. feature positions and one datasets frame with paired pearson correlations of all scales""" if check_cat: - dict_c = dict(zip(df[_ut.COL_FEATURE], df["category"])) + dict_c = dict(zip(df[ut.COL_FEATURE], df["category"])) else: dict_c = dict() - dict_p = dict(zip(df[_ut.COL_FEATURE], [set(x) for x in df["positions"]])) + dict_p = dict(zip(df[ut.COL_FEATURE], [set(x) for x in df["positions"]])) df_cor = df_scales.corr() return dict_c, dict_p, df_cor -# Plotting functions -def _get_df_pos(df_feat=None, df_cat=None, y="subcategory", val_col="mean_dif", - value_type="mean", normalize=False, - tmd_len=20, jmd_n_len=10, jmd_c_len=10, start=1): - """Helper method for plotting""" - normalize, normalize_for_pos = check_normalize(normalize=normalize) - cpp_plot = CPPPlots(tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, start=start) - df_pos = cpp_plot.get_df_pos(df=df_feat.copy(), y=y, value_type=value_type, val_col=val_col, - normalize=normalize, - normalize_for_pos=normalize_for_pos) - # Sort according to given categories - list_cat = list(df_cat[y].drop_duplicates()) - list_col = list(df_pos.T) - sorted_col = [x for x in list_cat if x in list_col] - df_pos = df_pos.T[sorted_col].T - return df_pos - - -def _add_importance_map(ax=None, df_feat=None, df_cat=None, start=None, args_len=None, y=None): - """""" - _df_pos = _get_df_pos(df_feat=df_feat, df_cat=df_cat, y=y, val_col=_ut.COL_FEAT_IMPORTANCE, - value_type="sum", normalize="positions_only", start=start, **args_len) - _df = pd.melt(_df_pos.reset_index(), id_vars="index") - _df.columns = [_ut.COL_SUBCAT, "position", _ut.COL_FEAT_IMPORTANCE] - _list_sub_cat = _df[_ut.COL_SUBCAT].unique() - for i, sub_cat in enumerate(_list_sub_cat): - _dff = _df[_df[_ut.COL_SUBCAT] == sub_cat] - for pos, val in enumerate(_dff[_ut.COL_FEAT_IMPORTANCE]): - _symbol = "■" # "•" - color = "black" - size = 12 if val >= 1 else (8 if val >= 0.5 else 4) - _args_symbol = dict(ha="center", va="center", color=color, size=size) - if val >= 0.2: - ax.text(pos + 0.5, i + 0.5, _symbol, **_args_symbol) - - -def _set_size_to_optimized_value(seq_size=None, tmd_jmd_fontsize=None, opt_size=None): - """Set sizes to given value if None""" - if tmd_jmd_fontsize is None: - tmd_jmd_fontsize = opt_size - args_size = dict(seq_size=seq_size, tmd_jmd_fontsize=tmd_jmd_fontsize) - return args_size - # TODO simplify checks & interface (end-to-end check with tests & docu) -# TODO plot_functions test & refactor (end-to-end) - # II Main Functions class CPP: @@ -306,13 +45,7 @@ class CPP: Nested dictionary with parameter dictionary for each chosen split_type. accept_gaps : bool, default = False Whether to accept missing values by enabling omitting for computations (if True). - jmd_n_len : int, >=0, default = 10 - Length of JMD-N. - jmd_c_len : int, >=0, default = 10 - Length of JMD-C. - ext_len : int, >=0, default = 4 - Length of TMD-extending part (starting from C and N terminal part of TMD). - Conditions: ext_len < jmd_m_len and ext_len < jmd_c_len. + verbose : bool, default = True Whether to print progress information about the algorithm (if True). @@ -321,34 +54,27 @@ class CPP: The CPP.run() method performs all steps of the CPP algorithm. """ def __init__(self, df_scales=None, df_cat=None, df_parts=None, split_kws=None, - accept_gaps=False, jmd_n_len=10, jmd_c_len=10, ext_len=4, verbose=True): + accept_gaps=False, verbose=True): # Load default scales if not specified sf = SequenceFeature() if df_cat is None: - df_cat = aa.load_scales(name=_ut.STR_SCALE_CAT) + df_cat = aa.load_scales(name=ut.STR_SCALE_CAT) if df_scales is None: df_scales = aa.load_scales() if split_kws is None: split_kws = sf.get_split_kws() ut.check_bool(name="verbose", val=verbose) - _ut.check_df_parts(df_parts=df_parts, verbose=verbose) - df_parts = _ut.check_df_scales(df_scales=df_scales, df_parts=df_parts, accept_gaps=accept_gaps) - df_cat, df_scales = _ut.check_df_cat(df_cat=df_cat, df_scales=df_scales, verbose=verbose) - _ut.check_split_kws(split_kws=split_kws) - check_len_ext_and_jmd(jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, ext_len=ext_len) + ut.check_df_parts(df_parts=df_parts, verbose=verbose) + df_parts = ut.check_df_scales(df_scales=df_scales, df_parts=df_parts, accept_gaps=accept_gaps) + df_cat, df_scales = ut.check_df_cat(df_cat=df_cat, df_scales=df_scales, verbose=verbose) + ut.check_split_kws(split_kws=split_kws) self._verbose = verbose self._accept_gaps = accept_gaps # Feature components: Scales + Part + Split - self.df_cat = df_cat - self.df_scales = df_scales - self.df_parts = df_parts + self.df_cat = df_cat.copy() + self.df_scales = df_scales.copy() + self.df_parts = df_parts.copy() self.split_kws = split_kws - # Set consistent length of JMD_N, JMD_C, TMD flanking amino acids (TMD-E) - self.jmd_n_len = jmd_n_len - self.jmd_c_len = jmd_c_len - self.ext_len = ext_len - # Axes dict for plotting - self.ax_seq = None # Adder methods for CPP analysis (used in run method) def _add_scale_info(self, df_feat=None): @@ -367,16 +93,16 @@ def _add_scale_info(self, df_feat=None): Feature DataFrame including scale categories. """ # Check input - df_feat = _ut.check_df_feat(df_feat=df_feat) + df_feat = ut.check_df_feat(df_feat=df_feat) # Add scale categories df_cat = self.df_cat.copy() - i = df_feat.columns.get_loc(_ut.COL_FEATURE) - for col in [_ut.COL_SCALE_DES, _ut.COL_SCALE_NAME, _ut.COL_SUBCAT, _ut.COL_CAT]: + i = df_feat.columns.get_loc(ut.COL_FEATURE) + for col in [ut.COL_SCALE_DES, ut.COL_SCALE_NAME, ut.COL_SUBCAT, ut.COL_CAT]: if col in list(df_feat): df_feat.drop(col, inplace=True, axis=1) - dict_cat = dict(zip(df_cat[_ut.COL_SCALE_ID], df_cat[col])) - vals = [dict_cat[s.split("-")[2]] for s in df_feat[_ut.COL_FEATURE]] + dict_cat = dict(zip(df_cat[ut.COL_SCALE_ID], df_cat[col])) + vals = [dict_cat[s.split("-")[2]] for s in df_feat[ut.COL_FEATURE]] df_feat.insert(i + 1, col, vals) return df_feat @@ -408,12 +134,12 @@ def _add_stat(self, df_feat=None, labels=None, parametric=False, accept_gaps=Fal as implemented in SciPy. """ # Check input - df_feat = _ut.check_df_feat(df_feat=df_feat) - _ut.check_labels(labels=labels, df=self.df_parts, name_df="df_parts") + df_feat = ut.check_df_feat(df_feat=df_feat) + ut.check_labels(labels=labels, df=self.df_parts, name_df="df_parts") ut.check_bool(name="parametric", val=parametric) # Add feature statistics - features = list(df_feat[_ut.COL_FEATURE]) + features = list(df_feat[ut.COL_FEATURE]) sf = SequenceFeature() sfs = SequenceFeatureStatistics() X = sf.feat_matrix(df_parts=self.df_parts, @@ -423,120 +149,17 @@ def _add_stat(self, df_feat=None, labels=None, parametric=False, accept_gaps=Fal df_feat = sfs.add_stat(df=df_feat, X=X, y=labels, parametric=parametric) return df_feat - def add_positions(self, df_feat=None, tmd_len=20, start=1): - """ - Add sequence positions to DataFrame. - - Parameters - ---------- - df_feat: :class:`pandas.DataFrame` - Feature DataFrame to add feature positions. - tmd_len: int, >0 - Length of TMD. - start: int, >=0 - Position label of first amino acid position (starting at N-terminus). - - Returns - ------- - df_feat: :class:`pandas.DataFrame` - Feature DataFrame including feature positions. - - Notes - ----- - The length parameters define the total number of positions (jmd_n_len + tmd_len + jmd_c_len). - """ + @staticmethod + def _add_positions(df_feat=None, tmd_len=20, jmd_n_len=10, jmd_c_len=10, ext_len=4, start=1): + """Add sequence positions to DataFrame.""" # Check input (length checked by SequenceFeaturePositions) - df_feat = _ut.check_df_feat(df_feat=df_feat) - + df_feat = ut.check_df_feat(df_feat=df_feat) # Add positions of features - sfp = SequenceFeaturePositions() - dict_part_pos = sfp.get_dict_part_pos(tmd_len=tmd_len, - jmd_n_len=self.jmd_n_len, jmd_c_len=self.jmd_c_len, - ext_len=self.ext_len, start=start) - df_feat["positions"] = sfp.get_positions(dict_part_pos=dict_part_pos, features=list(df_feat[_ut.COL_FEATURE])) - return df_feat - - @staticmethod - def add_shap(df_feat=None, col_shap="shap_value", name_feat_impact="feat_impact"): - """ - Convert SHAP values into feature impact/importance and add to DataFrame. - - Parameters - ---------- - df_feat: :class:`pandas.DataFrame` - Feature DataFrame to which the feature impact will be added. - col_shap: str, default = 'shap_value' - Column name of `SHAP `_ values in the feature DataFrame. - name_feat_impact: str, default = 'feat_impact' - Column name of feature impact or feature importance that will be added to the feature DataFrame. - - Returns - ------- - df_feat: :class:`pandas.DataFrame` - Feature DataFrame including feature impact. - - Notes - ----- - - SHAP (SHapley Additive exPlanations) is a game theoretic approach to explain the output of any machine learning model. - - SHAP values represent a feature's responsibility for a change in the model output. - - Missing values are accepted in SHAP values. - - """ - - # Check input - df_feat = df_feat.copy() - ut.check_str(name="name_feat_impact", val=name_feat_impact) - ut.check_str(name="col_shap", val=col_shap) - df_feat = _ut.check_df_feat(df_feat=df_feat) - check_shap_value_for_feat_impact(df_feat=df_feat, col_shap=col_shap) - check_feat_impact_in_df_feat(df_feat=df_feat, name_feat_impact=name_feat_impact) - - # Compute feature impact (accepting missing values) - shap_values = np.array(df_feat[col_shap]) - feat_impact = shap_values / np.nansum(np.abs(shap_values)) * 100 - shap_loc = df_feat.columns.get_loc(col_shap) - df_feat.insert(shap_loc + 1, name_feat_impact, feat_impact) - return df_feat - - def add_sample_dif(self, df_feat=None, df_seq=None, labels=None, sample_name=str, ref_group=0, accept_gaps=False): - """ - Add feature value difference between sample and reference group to DataFrame. - - Parameters - ---------- - df_feat: :class:`pandas.DataFrame` - Feature DataFrame to add sample difference. - df_seq: :class:`pandas.DataFrame` - DataFrame with sequences and sample names, in which the given sample name is included. - labels: array-like, shape (n_samples) - Class labels for samples in sequence DataFrame. - sample_name: str - Name of sample for which the feature value difference to a given reference group should be computed. - ref_group: int, default = 0 - Class label of reference group. - accept_gaps: bool, default = False - Whether to accept missing values by enabling omitting for computations (if True). - - Returns - ------- - df_feat: :class:`pandas.DataFrame` - Feature DataFrame including feature value difference. - """ - # Check input - df_feat = _ut.check_df_feat(df_feat=df_feat) - _ut.check_df_seq(df_seq=df_seq, jmd_c_len=self.jmd_c_len, jmd_n_len=self.jmd_c_len) - _ut.check_labels(labels=labels, df=df_seq, name_df="df_seq") - check_ref_group(ref_group=ref_group, labels=labels) - check_sample_in_df_seq(sample_name=sample_name, df_seq=df_seq) - # Add sample difference to reference group + features = df_feat[ut.COL_FEATURE].to_list() sf = SequenceFeature() - X = sf.feat_matrix(features=list(df_feat["feature"]), - df_parts=self.df_parts, - df_scales=self.df_scales, - accept_gaps=accept_gaps) - mask = [True if x == ref_group else False for x in labels] - i = list(df_seq[_ut.COL_NAME]).index(sample_name) - df_feat[f"dif_{sample_name}"] = X[i] - X[mask].mean() + feat_positions = sf.add_position(features=features, tmd_len=tmd_len, start=start, + jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, ext_len=ext_len) + df_feat[ut.COL_POSITION] = feat_positions return df_feat # Filtering methods @@ -544,16 +167,16 @@ def add_sample_dif(self, df_feat=None, df_seq=None, labels=None, sample_name=str def _pre_filtering(features=None, abs_mean_dif=None, std_test=None, max_std_test=0.2, n=10000): """CPP pre-filtering based on thresholds.""" df = pd.DataFrame(zip(features, abs_mean_dif, std_test), - columns=[_ut.COL_FEATURE, _ut.COL_ABS_MEAN_DIF, _ut.COL_STD_TEST]) - df = df[df[_ut.COL_STD_TEST] <= max_std_test] - df = df.sort_values(by=_ut.COL_ABS_MEAN_DIF, ascending=False).head(n) + columns=[ut.COL_FEATURE, ut.COL_ABS_MEAN_DIF, ut.COL_STD_TEST]) + df = df[df[ut.COL_STD_TEST] <= max_std_test] + df = df.sort_values(by=ut.COL_ABS_MEAN_DIF, ascending=False).head(n) return df def _filtering(self, df=None, max_overlap=0.5, max_cor=0.5, n_filter=100, check_cat=True): """CPP filtering algorithm based on redundancy reduction in descending order of absolute AUC.""" dict_c, dict_p, df_cor = _filtering_info(df=df, df_scales=self.df_scales, check_cat=check_cat) - df = df.sort_values(by=[_ut.COL_ABS_AUC, _ut.COL_ABS_MEAN_DIF], ascending=False).copy().reset_index(drop=True) - list_feat = list(df[_ut.COL_FEATURE]) + df = df.sort_values(by=[ut.COL_ABS_AUC, ut.COL_ABS_MEAN_DIF], ascending=False).copy().reset_index(drop=True) + list_feat = list(df[ut.COL_FEATURE]) list_top_feat = [list_feat.pop(0)] # List with best feature for feat in list_feat: add_flag = True @@ -574,12 +197,14 @@ def _filtering(self, df=None, max_overlap=0.5, max_cor=0.5, n_filter=100, check_ add_flag = False if add_flag: list_top_feat.append(feat) - df_top_feat = df[df[_ut.COL_FEATURE].isin(list_top_feat)] + df_top_feat = df[df[ut.COL_FEATURE].isin(list_top_feat)] return df_top_feat # Main method - def run(self, labels=None, parametric=False, n_filter=100, tmd_len=20, start=1, check_cat=True, - n_pre_filter=None, pct_pre_filter=5, max_std_test=0.2, max_overlap=0.5, max_cor=0.5, n_processes=None): + def run(self, labels=None, parametric=False, n_filter=100, + tmd_len=20, jmd_n_len=10, jmd_c_len=10, ext_len=4, start=1, + check_cat=True, n_pre_filter=None, pct_pre_filter=5, max_std_test=0.2, max_overlap=0.5, max_cor=0.5, + n_processes=None): """ Perform CPP pipeline by creation and two-step filtering of features. CPP aims to identify a collection of non-redundant features that are most discriminant between @@ -596,9 +221,16 @@ def run(self, labels=None, parametric=False, n_filter=100, tmd_len=20, start=1, n_pre_filter : int, optional Number of feature to be pre-filtered by CPP algorithm. If None, a percentage of all features is used. tmd_len : int, >0 - Length of Transmembrane Domain (TMD) used for positions. + Length of TMD used for positions. TODO add link to explanation start : int, >=0 Position label of first amino acid position (starting at N-terminus). + jmd_n_len : int, >=0, default = 10 + Length of JMD-N. + jmd_c_len : int, >=0, default = 10 + Length of JMD-C. + ext_len : int, >=0, default = 4 + Length of TMD-extending part (starting from C and N terminal part of TMD). + Should be longer than jmd_n_len and jmd_c_len check_cat : bool, default = True Whether to check for redundancy within scale categories. pct_pre_filter : int, default = 5 @@ -636,7 +268,8 @@ def run(self, labels=None, parametric=False, n_filter=100, tmd_len=20, start=1, 11. positions: Feature positions for default settings """ # Check input - _ut.check_labels(labels=labels, df=self.df_parts, name_df="df_parts") + ut.check_labels(labels=labels, df=self.df_parts, name_df="df_parts") + ut.check_args_len(tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, ext_len=ext_len) ut.check_non_negative_number(name="n_filter", val=n_filter, min_val=1) ut.check_non_negative_number(name="n_pre_filter", val=n_pre_filter, min_val=1, accept_none=True) ut.check_non_negative_number(name="pct_pre_filter", val=pct_pre_filter, min_val=5, max_val=100) @@ -647,9 +280,9 @@ def run(self, labels=None, parametric=False, n_filter=100, tmd_len=20, start=1, args = dict(split_kws=self.split_kws, df_scales=self.df_scales) if self._verbose: sf = SequenceFeature() - n_feat = len(sf.features(**args, list_parts=list(self.df_parts))) + n_feat = len(sf.get_features(**args, list_parts=list(self.df_parts))) print(f"1. CPP creates {n_feat} features for {len(self.df_parts)} samples") - _ut.print_start_progress() + ut.print_start_progress() # Pre-filtering: Select best n % of feature (filter_pct) based std(test set) and mean_dif sfs = SequenceFeatureStatistics() abs_mean_dif, std_test, features = sfs.pre_filtering_info(**args, @@ -661,8 +294,8 @@ def run(self, labels=None, parametric=False, n_filter=100, tmd_len=20, start=1, if n_pre_filter is None: n_pre_filter = int(len(features) * (pct_pre_filter / 100)) if self._verbose: - _ut.print_finished_progress() - print(f"2. CPP pre-filters {n_pre_filter} features ({pct_pre_filter}%) with highest '{_ut.COL_ABS_MEAN_DIF}'" + ut.print_finished_progress() + print(f"2. CPP pre-filters {n_pre_filter} features ({pct_pre_filter}%) with highest '{ut.COL_ABS_MEAN_DIF}'" f" and 'max_std_test' <= {max_std_test}") df = self._pre_filtering(features=features, abs_mean_dif=abs_mean_dif, @@ -673,422 +306,15 @@ def run(self, labels=None, parametric=False, n_filter=100, tmd_len=20, start=1, df = self._add_stat(df_feat=df, labels=labels, parametric=parametric, accept_gaps=self._accept_gaps) if self._verbose: print(f"3. CPP filtering algorithm") - df = self.add_positions(df_feat=df, tmd_len=tmd_len, start=start) + df = self._add_positions(df_feat=df, tmd_len=tmd_len, start=start) df = self._add_scale_info(df_feat=df) - df = self._filtering(df=df, n_filter=n_filter, check_cat=check_cat, max_overlap=max_overlap, max_cor=max_cor) - df.reset_index(drop=True, inplace=True) + df_feat = self._filtering(df=df, n_filter=n_filter, check_cat=check_cat, max_overlap=max_overlap, max_cor=max_cor) + df_feat.reset_index(drop=True, inplace=True) if self._verbose: - print(f"4. CPP returns df with {len(df)} unique features including general information and statistics") - return df - - # Plotting methods - def plot_profile(self, df_feat=None, y="category", val_col="mean_dif", val_type="count", normalize=False, - figsize=(7, 5), title=None, title_kws=None, - dict_color=None, edge_color="none", bar_width=0.75, - add_jmd_tmd=True, tmd_len=20, start=1, - jmd_n_seq=None, tmd_seq=None, jmd_c_seq=None, - tmd_color="mediumspringgreen", jmd_color="blue", tmd_seq_color="black", jmd_seq_color="white", - seq_size=None, tmd_jmd_fontsize=None, - xtick_size=11.0, xtick_width=2.0, xtick_length=5.0, xticks_pos=False, - ytick_size=None, ytick_width=2.0, ytick_length=5.0, ylim=None, - highlight_tmd_area=True, highlight_alpha=0.15, - grid=False, grid_axis="both", - add_legend_cat=True, legend_kws=None, - shap_plot=False, - **kwargs): - """ - Plot feature profile for given features from 'df_feat'. - - Parameters - ---------- - df_feat : class:`pandas.DataFrame`, optional, default=None - Dataframe containing the features to be plotted. If None, default features from the instance will be used. - y : str, default='category' - Column name in df_feat which contains the categories for grouping. - val_col : str, default='mean_dif' - Column name in df_feat which contains the values to be plotted. - val_type : str, default='count' - Type of value. Available options are specified by the `check_value_type` function. - normalize : bool, default=False - If True, the feature values will be normalized. - figsize : tuple, default=(7, 5) - Size of the plot. - title : str, optional - Title of the plot. - title_kws : dict, optional - Keyword arguments to customize the title appearance. - dict_color : dict, optional - Dictionary mapping categories to colors. - edge_color : str, default='none' - Color of the edges of the bars. - bar_width : float, default=0.75 - Width of the bars. - add_jmd_tmd : bool, default=True - If True, adds JMD and TMD lines/annotations to the plot. - tmd_len : int, default=20 - Length of the TMD. - start : int, default=1 - Start position. - jmd_n_seq : str, optional - JMD N-terminal sequence. - tmd_seq : str, optional - TMD sequence. - jmd_c_seq : str, optional - JMD C-terminal sequence. - tmd_color : str, default='mediumspringgreen' - Color for TMD. - jmd_color : str, default='blue' - Color for JMD. - tmd_seq_color : str, default='black' - Color for TMD sequence. - jmd_seq_color : str, default='white' - Color for JMD sequence. - seq_size : float, optional - Font size for sequence annotations. - tmd_jmd_fontsize : float, optional - Font size for TMD and JMD annotations. - xtick_size : float, default=11.0 - Size for x-tick labels. - xtick_width : float, default=2.0 - Width of the x-ticks. - xtick_length : float, default=5.0 - Length of the x-ticks. - xticks_pos : bool, default=False - If True, x-tick positions are adjusted based on given sequences. - ytick_size : float, optional - Size for y-tick labels. - ytick_width : float, default=2.0 - Width of the y-ticks. - ytick_length : float, default=5.0 - Length of the y-ticks. - ylim : tuple, optional - Y-axis limits. - highlight_tmd_area : bool, default=True - If True, highlights the TMD area on the plot. - highlight_alpha : float, default=0.15 - Alpha value for TMD area highlighting. - grid : bool, default=False - If True, a grid is added to the plot. - grid_axis : str, default='both' - Axis on which the grid is drawn. Options: 'both', 'x', 'y'. - add_legend_cat : bool, default=True - If True, a legend is added for categories. - legend_kws : dict, optional - Keyword arguments for the legend. - shap_plot : bool, default=False - If True, SHAP (SHapley Additive exPlanations) plot is generated. - **kwargs : dict - Other keyword arguments passed to internal functions or plotting libraries. - - Returns - ------- - ax : matplotlib.axes.Axes - The axes object containing the plot. - - """ - # Group arguments - args_seq = dict(jmd_n_seq=jmd_n_seq, tmd_seq=tmd_seq, jmd_c_seq=jmd_c_seq,) - args_size = check_args_size(seq_size=seq_size, tmd_jmd_fontsize=tmd_jmd_fontsize) - args_len = check_args_len(tmd_len=tmd_len, jmd_n_len=self.jmd_n_len, jmd_c_len=self.jmd_c_len, **args_seq) - args_xtick = check_args_xtick(xtick_size=xtick_size, xtick_width=xtick_width, xtick_length=xtick_length) - args_part_color = check_part_color(tmd_color=tmd_color, jmd_color=jmd_color) - args_seq_color = check_seq_color(tmd_seq_color=tmd_seq_color, jmd_seq_color=jmd_seq_color) - - # Checking input - # Args checked by Matplotlib: title, legend_kws - # Args checked by internal plotting functions: ylim - ut.check_non_negative_number(name="bar_width", val=bar_width, min_val=0, just_int=False) - ut.check_non_negative_number(name="start", val=start, min_val=0) - ut.check_non_negative_number(name="tmd_area_alpha", val=highlight_alpha, min_val=0, max_val=1, just_int=False) - ut.check_bool(name="add_jmd_tmd", val=add_jmd_tmd) - ut.check_bool(name="highlight_tmd_area", val=highlight_tmd_area) - ut.check_bool(name="grid", val=grid) - ut.check_bool(name="shap_plot", val=shap_plot) - ut.check_bool(name="add_legend_cat", val=add_legend_cat) - _ut.check_color(name="edge_color", val=edge_color, accept_none=True) - ut.check_dict(name="legend_kws", val=legend_kws, accept_none=True) - - _ut.check_col_in_df(df=df_feat, name_df="df_feat", col=val_col, type_check="numerical") - _ut.check_y_categorical(df=df_feat, y=y) - df_feat = _ut.check_df_feat(df_feat=df_feat) - check_value_type(val_type=val_type, count_in=True) - check_args_ytick(ytick_size=ytick_size, ytick_width=ytick_width, ytick_length=ytick_length) - check_figsize(figsize=figsize) - dict_color = check_dict_color(dict_color=dict_color, df_cat=self.df_cat) - check_grid_axis(grid_axis=grid_axis) - # Get df positions - df_feat = self.add_positions(df_feat=df_feat, tmd_len=args_len["tmd_len"], start=start) - df_pos = _get_df_pos(df_feat=df_feat, df_cat=self.df_cat, y=y, val_col=val_col, - value_type=val_type, normalize=normalize, start=start, **args_len) - # Plotting - cpp_plot = CPPPlots(**args_len, start=start) - try: - ax = cpp_plot.profile(df_pos=df_pos, figsize=figsize, ylim=ylim, - dict_color=dict_color, edge_color=edge_color, bar_width=bar_width, - add_legend=add_legend_cat, legend_kws=legend_kws, shap_plot=shap_plot, - **args_xtick, **kwargs) - except AttributeError as e: - error_message = check_parameters(func=self.plot_profile, name_called_func="pd.DataFrame.plot", e=e) - raise AttributeError(error_message) - cpp_plot.set_title(title=title, title_kws=title_kws) - - # Autosize tmd sequence & annotation - opt_size = cpp_plot.optimize_label_size(ax=ax, df_pos=df_pos, label_term=False) - # Set default ylabel - ylabel = "Feature impact" if shap_plot else f"Feature count (-/+ {val_col})" - ax.set_ylabel(ylabel, size=opt_size) - # Adjust y ticks - ytick_size = opt_size if ytick_size is None else ytick_size - plt.yticks(size=ytick_size) - plt.tick_params(axis="y", color="black", width=ytick_width, length=ytick_length, bottom=False) - sns.despine(top=True, right=True) - # Add grid - if grid: - ax.set_axisbelow(True) # Grid behind datasets - ax.grid(which="major", axis=grid_axis, linestyle="-") - # Add tmd area - if highlight_tmd_area: - cpp_plot.highlight_tmd_area(ax=ax, x_shift=-0.5, tmd_color=tmd_color, alpha=highlight_alpha) - # Add tmd_jmd sequence if sequence is given - if type(tmd_seq) == str: - ax = cpp_plot.add_tmd_jmd_seq(ax=ax, **args_seq, **args_size, **args_part_color, **args_seq_color, - xticks_pos=xticks_pos, heatmap=False, x_shift=0, - xtick_size=xtick_size) # Add tmd_jmd bar - self.ax_seq = ax - elif add_jmd_tmd: - size = opt_size if tmd_jmd_fontsize is None else tmd_jmd_fontsize - cpp_plot.add_tmd_jmd_bar(ax=ax, x_shift=-0.5, **args_part_color, add_white_bar=False) - cpp_plot.add_tmd_jmd_xticks(ax=ax, x_shift=0, **args_xtick) - cpp_plot.add_tmd_jmd_text(ax=ax, x_shift=-0.5, tmd_jmd_fontsize=size) - - # Set current axis to main axis object depending on tmd sequence given or not - plt.yticks(size=ytick_size) - plt.tick_params(axis="y", color="black", width=ytick_width, length=ytick_length, bottom=False) - plt.sca(plt.gcf().axes[0]) - ax = plt.gca() - return ax - - def plot_heatmap(self, df_feat=None, y="subcategory", val_col="mean_dif", val_type="mean", normalize=False, - figsize=(8, 5), title=None, title_kws=None, - vmin=None, vmax=None, grid_on=True, - cmap="RdBu_r", cmap_n_colors=None, dict_color=None, cbar_kws=None, facecolor_dark=False, - add_jmd_tmd=True, tmd_len=20, start=1, - jmd_n_seq=None, tmd_seq=None, jmd_c_seq=None, - tmd_color="mediumspringgreen", jmd_color="blue", tmd_seq_color="black", jmd_seq_color="white", - seq_size=None, tmd_jmd_fontsize=None, - xticks_pos=False, xtick_size=11.0, xtick_width=2.0, xtick_length=5.0, ytick_size=None, - add_legend_cat=True, legend_kws=None, - add_importance_map=False, cbar_pct=False, **kwargs): - """ - Plot a featuremap of the selected value column with scale information (y-axis) versus sequence position (x-axis). - - This is a wrapper function for :func:`seaborn.heatmap`, designed to highlight differences between two sets - of sequences at the positional level (e.g., amino acid level for protein sequences). - - Parameters - ---------- - df_feat : :class:`~pandas.DataFrame`, shape (n_feature, n_feature_information) - DataFrame containing unique identifiers, scale information, statistics, and positions for each feature. - y : {'category', 'subcategory', 'scale_name'}, str, default = 'subcategory' - Name of the column in the feature DataFrame representing scale information (shown on the y-axis). - val_col : {'mean_dif', 'feat_impact', 'abs_auc', 'std_test', ...}, str, default = 'mean_dif' - Name of the column in the feature DataFrame containing numerical values to display. - val_type : {'mean', 'sum', 'std'}, str, default = 'mean' - Method to aggregate numerical values from 'val_col'. - normalize : {True, False, 'positions', 'positions_only'}, bool/str, default = False - Specifies normalization for numerical values in 'val_col': - - False: Set value at all positions of a feature without further normalization. - - - True: Set value at all positions of a feature and normalize across all features. - - - 'positions': Value/number of positions set at each position of a feature and normalized across features. - Recommended when aiming to emphasize features with fewer positions using 'val_col'='feat_impact' and 'value_type'='mean'. - - figsize : tuple(float, float), default = (10,7) - Width and height of the figure in inches passed to :func:`matplotlib.pyplot.figure`. - title : str, optional - Title of figure used by :func:`matplotlib.pyplot.title`. - title_kws : dict, optional - Keyword arguments passed to :func:`matplotlib.pyplot.title`. - vmin, vmax : float, optional - Values to anchor the colormap, otherwise, inferred from data and other keyword arguments. - cmap : matplotlib colormap name or object, or list of colors, default = 'seismic' - Name of color map assigning data values to color space. If 'SHAP', colors from - `SHAP `_ will be used (recommended for feature impact). - cmap_n_colors : int, optional - Number of discrete steps in diverging or sequential color map. - dict_color : dict, optional - Map of colors for scale categories classifying scales shown on y-axis. - cbar_kws : dict of key, value mappings, optional - Keyword arguments for :meth:`matplotlib.figure.Figure.colorbar`. - add_jmd_tmd : bool, default = True - Whether to add colored bar under heatmap indicating sequence parts (JMD-N, TMD, JMD-C). - tmd_len : int, >0 - Length of TMD to be depiceted. - start : int, >=0 - Position label of first amino acid position (starting at N-terminus). - tmd_seq : str, optional - Sequence of TMD. 'tmd_len' is set to length of TMD if sequence for TMD, JMD-N and JMD-C are given. - Recommended if feature impact or mean difference should be depicted for one sample. - jmd_n_seq : str, optional - Sequence of JMD_N. 'jmd_n_len' is set to length of JMD_N if sequence for TMD, JMD-N and JMD-C are given. - Recommended if feature impact or mean difference should be depicted for one sample. - jmd_c_seq : str, optional - Sequence of JMD_C. 'jmd_c_len' is set to length of JMD_C if sequence for TMD, JMD-N and JMD-C are given. - Recommended if feature impact or mean difference should be depicted for one sample. - tmd_color : str, default = 'mediumspringgreen' - Color of TMD bar. - jmd_color : str, default = 'blue' - Color of JMD-N and JMD-C bar. - tmd_seq_color : str, default = 'black' - Color of TMD sequence. - jmd_seq_color : str, default = 'white' - Color of JMD-N and JMD-C sequence. - seq_size : float, optional - Font size of all sequence parts in points. If None, optimized automatically. - tmd_jmd_fontsize : float, optional - Font size of 'TMD', 'JMD-N' and 'JMD-C' label in points. If None, optimized automatically. - xtick_size : float, default = 11.0 - Size of x ticks in points. Passed as 'size' argument to :meth:`matplotlib.axes.Axes.set_xticklabels`. - xtick_width : float, default = 2.0 - Widht of x ticks in points. Passed as 'width' argument to :meth:`matplotlib.axes.Axes.tick_params`. - xtick_length : float, default = 5.0, - Length of x ticks in points. Passed as 'length' argument to :meth:`matplotlib.axes.Axes.tick_params`. - ytick_size : float, optional - Size of scale information as y ticks in points. Passed to :meth:`matplotlib.axes.Axes.tick_params`. - If None, optimized automatically. - add_legend_cat : bool, default = True, - Whether to add legend for categories under plot and classification of scales at y-axis. - legend_kws : dict, optional - Keyword arguments passed to :meth:`matplotlib.axes.Axes.legend` - kwargs : other keyword arguments - All other keyword arguments passed to :meth:`matplotlib.axes.Axes.pcolormesh`. - - Returns - ------- - ax : matplotlib Axes - Axes object containing the heatmap. - - Warnings - -------- - - 'cmap_n_colors' is effective only if 'vmin' and 'vmax' align with the data. - - - 'tmd_seq_color' and 'jmd_seq_color' are applicable only when 'tmd_seq', 'jmd_n_seq', and 'jmd_c_seq' are provided. - - See Also - -------- - seaborn.heatmap - Plotting heatmap using seaborn. - See `Seaborn documentation `_ for more details. - - Examples - -------- - - Plot CPP feature heatmap: - - .. plot:: - :context: close-figs - - >>> import matplotlib.pyplot as plt - >>> import aaanalysis as aa - >>> sf = aa.SequenceFeature() - >>> df_seq = aa.load_dataset(name='SEQ_DISULFIDE', min_len=100) - >>> labels = list(df_seq["label"]) - >>> df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10) - >>> #split_kws = sf.get_split_kws(n_split_min=1, n_split_max=3, split_types=["Segment", "PeriodicPattern"]) - >>> #df_scales = aa.load_scales(unclassified_in=False).sample(n=10, axis=1) - >>> #cpp = aa.CPP(df_parts=df_parts, split_kws=split_kws, df_scales=df_scales) - >>> #df_feat = cpp.run(labels=labels) - >>> #cpp.plot_heatmap(df_feat=df_feat) - >>> #plt.tight_layout() - - """ - # Group arguments - args_seq = dict(jmd_n_seq=jmd_n_seq, tmd_seq=tmd_seq, jmd_c_seq=jmd_c_seq) - args_size = check_args_size(seq_size=seq_size, tmd_jmd_fontsize=tmd_jmd_fontsize) - args_len = check_args_len(tmd_len=tmd_len, jmd_n_len=self.jmd_n_len, jmd_c_len=self.jmd_c_len, **args_seq) - args_xtick = check_args_xtick(xtick_size=xtick_size, xtick_width=xtick_width, xtick_length=xtick_length) - args_part_color = check_part_color(tmd_color=tmd_color, jmd_color=jmd_color) - args_seq_color = check_seq_color(tmd_seq_color=tmd_seq_color, jmd_seq_color=jmd_seq_color) - - # Checking input - # Args checked by Matplotlib: title, cmap, cbar_kws, legend_kws - ut.check_non_negative_number(name="start", val=start, min_val=0) - ut.check_non_negative_number(name="ytick_size", val=ytick_size, accept_none=True, just_int=False, min_val=1) - ut.check_non_negative_number(name="cmap_n_colors", val=cmap_n_colors, min_val=1, accept_none=True) - ut.check_bool(name="add_jmd_tmd", val=add_jmd_tmd) - ut.check_bool(name="add_legend_cat", val=add_legend_cat) - ut.check_dict(name="legend_kws", val=legend_kws, accept_none=True) - ut.check_dict(name="cbar_kws", val=cbar_kws, accept_none=True) - _ut.check_col_in_df(df=df_feat, name_df="df_feat", col=val_col, type_check="numerical") - _ut.check_y_categorical(df=df_feat, y=y) - df_feat = _ut.check_df_feat(df_feat=df_feat, df_cat=self.df_cat) - check_value_type(val_type=val_type, count_in=False) - check_vmin_vmax(vmin=vmin, vmax=vmax) - check_figsize(figsize=figsize) - dict_color = check_dict_color(dict_color=dict_color, df_cat=self.df_cat) - - # Get df positions - df_feat = self.add_positions(df_feat=df_feat, tmd_len=args_len["tmd_len"], start=start) - df_pos = _get_df_pos(df_feat=df_feat, df_cat=self.df_cat, y=y, val_col=val_col, - value_type=val_type, normalize=normalize, start=start, **args_len) - # Plotting - cpp_plot = CPPPlots(**args_len, start=start) - cpp_plot.set_figsize(figsize=figsize) # figsize is not used as argument in seaborn (but in pandas) - try: - linecolor = "gray" if facecolor_dark else "black" - if "linecolor" in kwargs: - linecolor = kwargs["linecolor"] - else: - kwargs["linecolor"] = linecolor - ax = cpp_plot.heatmap(df_pos=df_pos, vmin=vmin, vmax=vmax, grid_on=grid_on, - cmap=cmap, cmap_n_colors=cmap_n_colors, cbar_kws=cbar_kws, - x_shift=0.5, ytick_size=ytick_size, facecolor_dark=facecolor_dark, - cbar_pct=cbar_pct, **args_xtick, **kwargs) - ax.axvline(self.jmd_n_len, color=linecolor, linestyle="-", linewidth=1.5) - ax.axvline(x=self.jmd_n_len + args_len["tmd_len"], color=linecolor, linestyle="-", linewidth=1.5) - - except AttributeError as e: - error_message = check_parameters(func=self.plot_heatmap, name_called_func="sns.heatmap", e=e) - raise AttributeError(error_message) - cpp_plot.set_title(title=title, title_kws=title_kws) - # Autosize tmd sequence & annotation - opt_size = cpp_plot.optimize_label_size(ax=ax, df_pos=df_pos) - # Add importance map - if add_importance_map: - _add_importance_map(ax=ax, df_feat=df_feat, df_cat=self.df_cat, - start=start, args_len=args_len, y=y) - # Add scale classification - if add_legend_cat: - ax = cpp_plot.add_legend_cat(ax=ax, df_pos=df_pos, df_cat=self.df_cat, y=y, dict_color=dict_color, - legend_kws=legend_kws) - # Add tmd_jmd sequence if sequence is given - if isinstance(tmd_seq, str): - ax = cpp_plot.add_tmd_jmd_seq(ax=ax, **args_seq, **args_size, **args_part_color, **args_seq_color, - xticks_pos=xticks_pos, - x_shift=0.5, xtick_size=xtick_size) - self.ax_seq = ax - # Add tmd_jmd bar - elif add_jmd_tmd: - size = opt_size if tmd_jmd_fontsize is None else tmd_jmd_fontsize - cpp_plot.add_tmd_jmd_bar(ax=ax, **args_part_color) - cpp_plot.add_tmd_jmd_xticks(ax=ax, x_shift=0.5, **args_xtick) - cpp_plot.add_tmd_jmd_text(ax=ax, x_shift=0, tmd_jmd_fontsize=size) - # Set current axis to main axis object depending on tmd sequence given or not - plt.sca(plt.gcf().axes[0]) - ax = plt.gca() - return ax + print(f"4. CPP returns df with {len(df_feat)} unique features including general information and statistics") + return df_feat - def update_seq_size(self): - """""" - # TODO legend changes slightly if sequnece length altered (e.g. PTPRM_MOUSE vs A4_HUMAN) - # TODO look for more extreme example and text - f = lambda l: l.get_window_extent(ax.figure.canvas.get_renderer()) - ax = self.ax_seq - labels = ax.xaxis.get_ticklabels(which="both") - tick_positions = [f(l).x0 for l in labels] - sorted_tick_positions, sorted_labels = zip(*sorted(zip(tick_positions, labels), key=lambda t: t[0])) - # Adjust font size to prevent overlap - seq_size = get_optimal_fontsize(ax, sorted_labels) - for l in sorted_labels: - l.set_fontsize(seq_size) + @staticmethod + def eval(df_feat=None, features=None): + """Get evaluation for provided dataset""" + # TODO get evaluation for any dataset for compelete diff --git a/aaanalysis/cpp/cpp_plot.py b/aaanalysis/cpp/cpp_plot.py new file mode 100644 index 00000000..8add8bc9 --- /dev/null +++ b/aaanalysis/cpp/cpp_plot.py @@ -0,0 +1,657 @@ +""" +This is a script for ... +""" +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +import inspect + +import aaanalysis +from aaanalysis.cpp._cpp import CPPPlots, get_optimal_fontsize + +import aaanalysis.utils as ut + +# Settings +pd.set_option('expand_frame_repr', False) # Single line print for pd.Dataframe + +# TODO simplify checks & interface (end-to-end check with tests & docu) +# TODO plot_functions test & refactor (end-to-end) + + +# I Helper Functions +def check_value_type(val_type=None, count_in=True): + """Check if value type is valid""" + list_value_type = ["mean", "sum", "std"] + if count_in: + list_value_type.append("count") + if val_type not in list_value_type: + raise ValueError(f"'val_type' ('{val_type}') should be on of following: {list_value_type}") + + +def check_normalize(normalize=True): + """Check normalize parameter""" + if not (type(normalize) == bool or normalize in ["positions", "positions_only"]): + raise ValueError(f"'normalize' ('{normalize}') should be bool or, if normalized for positions, 'positions'.") + normalize_for_positions = False if type(normalize) is bool else "positions" in normalize + normalize = normalize if type(normalize) is bool else "positions" == normalize + return normalize, normalize_for_positions + + +# Check for plotting methods +def check_args_size(seq_size=None, tmd_jmd_fontsize=None): + """Check if sequence size parameters match""" + ut.check_non_negative_number(name="seq_size", val=seq_size, min_val=0, accept_none=True, just_int=False) + ut.check_non_negative_number(name="tmd_jmd_fontsize", val=tmd_jmd_fontsize, min_val=0, accept_none=True, just_int=False) + args_size = dict(seq_size=seq_size, tmd_jmd_fontsize=tmd_jmd_fontsize) + return args_size + + +def check_args_xtick(xtick_size=None, xtick_width=None, xtick_length=None): + """Check if x tick parameters non-negative float""" + args = dict(accept_none=True, just_int=False, min_val=0) + ut.check_non_negative_number(name="xtick_size", val=xtick_size, **args) + ut.check_non_negative_number(name="xtick_width", val=xtick_width, **args) + ut.check_non_negative_number(name="xtick_length", val=xtick_length, **args) + args_xtick = dict(xtick_size=xtick_size, xtick_width=xtick_width, xtick_length=xtick_length) + return args_xtick + + +def check_args_ytick(ytick_size=None, ytick_width=None, ytick_length=None): + """Check if y tick parameters non-negative float""" + args = dict(accept_none=True, just_int=False, min_val=1) + ut.check_non_negative_number(name="ytick_size", val=ytick_size, **args) + ut.check_non_negative_number(name="ytick_width", val=ytick_width, **args) + ut.check_non_negative_number(name="ytick_length", val=ytick_length, **args) + args_ytick = dict(ytick_size=ytick_size, ytick_width=ytick_width, ytick_length=ytick_length) + return args_ytick + + +def check_part_color(tmd_color=None, jmd_color=None): + """Check if part colors valid""" + ut.check_color(name="tmd_color", val=tmd_color) + ut.check_color(name="jmd_color", val=jmd_color) + args_part_color = dict(tmd_color=tmd_color, jmd_color=jmd_color) + return args_part_color + + +def check_seq_color(tmd_seq_color=None, jmd_seq_color=None): + """Check sequence colors""" + ut.check_color(name="tmd_seq_color", val=tmd_seq_color) + ut.check_color(name="jmd_seq_color", val=jmd_seq_color) + args_seq_color = dict(tmd_seq_color=tmd_seq_color, jmd_seq_color=jmd_seq_color) + return args_seq_color + + +def check_figsize(figsize=None): + """""" + ut.check_tuple(name="figsize", val=figsize, n=2) + ut.check_non_negative_number(name="figsize:width", val=figsize[0], min_val=1, just_int=False) + ut.check_non_negative_number(name="figsize:height", val=figsize[1], min_val=1, just_int=False) + + +def check_dict_color(dict_color=None, df_cat=None): + """Check if color dictionary is matching to DataFrame with categories""" + list_cats = list(sorted(set(df_cat[ut.COL_CAT]))) + if dict_color is None: + dict_color = ut.DICT_COLOR + if not isinstance(dict_color, dict): + raise ValueError(f"'dict_color' should be a dictionary with colors for: {list_cats}") + list_cat_not_in_dict_cat = [x for x in list_cats if x not in dict_color] + if len(list_cat_not_in_dict_cat) > 0: + error = f"'dict_color' not complete! Following categories are missing from 'df_cat': {list_cat_not_in_dict_cat}" + raise ValueError(error) + for key in dict_color: + color = dict_color[key] + ut.check_color(name=key, val=color) + return dict_color + + +def check_parameters(func=None, name_called_func=None, e=None): + """Check parameters string from error message of third party packages""" + list_arg_str = ["property ", "attribute ", "argument ", "parameter "] + str_error = "" + for arg_str in list_arg_str: + if arg_str in str(e): + error_arg = str(e).split(arg_str)[1] + str_error += "Error due to {} parameter. ".format(error_arg) + break + args = [x for x in inspect.getfullargspec(func).args if x != "self"] + str_error += "Arguments are allowed from {} and as follows: {}".format(name_called_func, args) + return str_error + + +# Check heatmap plotting +def check_vmin_vmax(vmin=None, vmax=None): + """Check if number of cmap colors is valid with given value range""" + ut.check_float(name="vmin", val=vmin, accept_none=True, just_float=False) + ut.check_float(name="vmax", val=vmax, accept_none=True, just_float=False) + if vmin is not None and vmax is not None and vmin >= vmax: + raise ValueError(f"'vmin' ({vmin}) < 'vmax' ({vmax}) not fulfilled.") + + +# Check barplot and profile +def check_grid_axis(grid_axis=None): + """""" + list_valid = ["x", 'y', 'both'] + if grid_axis not in list_valid: + raise ValueError(f"'grid_axis' ('{grid_axis}') not valid. Chose from following: {list_valid}") + + +# Check stat plot +def check_ylabel_fontweight(ylabel_fontweight=None, accept_none=True): + """""" + if accept_none and ylabel_fontweight is None: + return + name = "ylabel_fontweight" + args = dict(name=name, val=ylabel_fontweight) + list_weights = ['light', 'medium', 'bold'] + if type(ylabel_fontweight) in [float, int]: + ut.check_non_negative_number(**args, min_val=0, max_val=1000, just_int=False) + elif isinstance(ylabel_fontweight, str): + if ylabel_fontweight not in list_weights: + error = f"'{name}' ({ylabel_fontweight}) should be one of following: {list_weights}" + raise ValueError(error) + else: + error = f"'{name}' ({ylabel_fontweight}) should be either numeric value in range 0-1000" \ + f"\n\tor one of following: {list_weights}" + raise ValueError(error) + + +# Plotting functions +def _get_df_pos(df_feat=None, df_cat=None, y="subcategory", val_col="mean_dif", + value_type="mean", normalize=False, + tmd_len=20, jmd_n_len=10, jmd_c_len=10, start=1): + """Helper method for plotting""" + normalize, normalize_for_pos = check_normalize(normalize=normalize) + cpp_plot = CPPPlots(tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, start=start) + df_pos = cpp_plot.get_df_pos(df=df_feat.copy(), y=y, value_type=value_type, val_col=val_col, + normalize=normalize, + normalize_for_pos=normalize_for_pos) + # Sort according to given categories + list_cat = list(df_cat[y].drop_duplicates()) + list_col = list(df_pos.T) + sorted_col = [x for x in list_cat if x in list_col] + df_pos = df_pos.T[sorted_col].T + return df_pos + + +def _add_importance_map(ax=None, df_feat=None, df_cat=None, start=None, args_len=None, y=None): + """""" + _df_pos = _get_df_pos(df_feat=df_feat, df_cat=df_cat, y=y, val_col=ut.COL_FEAT_IMPORTANCE, + value_type="sum", normalize="positions_only", start=start, **args_len) + _df = pd.melt(_df_pos.reset_index(), id_vars="index") + _df.columns = [ut.COL_SUBCAT, "position", ut.COL_FEAT_IMPORTANCE] + _list_sub_cat = _df[ut.COL_SUBCAT].unique() + for i, sub_cat in enumerate(_list_sub_cat): + _dff = _df[_df[ut.COL_SUBCAT] == sub_cat] + for pos, val in enumerate(_dff[ut.COL_FEAT_IMPORTANCE]): + _symbol = "■" # "•" + color = "black" + size = 12 if val >= 1 else (8 if val >= 0.5 else 4) + _args_symbol = dict(ha="center", va="center", color=color, size=size) + if val >= 0.2: + ax.text(pos + 0.5, i + 0.5, _symbol, **_args_symbol) + + +def _set_size_to_optimized_value(seq_size=None, tmd_jmd_fontsize=None, opt_size=None): + """Set sizes to given value if None""" + if tmd_jmd_fontsize is None: + tmd_jmd_fontsize = opt_size + args_size = dict(seq_size=seq_size, tmd_jmd_fontsize=tmd_jmd_fontsize) + return args_size + + +# TODO simplify interface (delete old profile) +# TODO add importance plot for heatmap +# TODO add ranking + +# II Main Functions +class CPPPlot: + """ + Create and filter features that are most discriminant between two sets of sequences. + + Parameters + ---------- + accept_gaps : bool, default = False + Whether to accept missing values by enabling omitting for computations (if True). + jmd_n_len : int, >=0, default = 10 + Length of JMD-N. + jmd_c_len : int, >=0, default = 10 + Length of JMD-C. + ext_len : int, >=0, default = 4 + Length of TMD-extending part (starting from C and N terminal part of TMD). + Conditions: ext_len < jmd_m_len and ext_len < jmd_c_len. + verbose : bool, default = True + Whether to print progress information about the algorithm (if True). + + Notes + ----- + The CPP.run() method performs all steps of the CPP algorithm. + """ + def __init__(self, df_cat=None, accept_gaps=False, jmd_n_len=10, jmd_c_len=10, ext_len=4, verbose=True): + # Load default scales if not specified + + ut.check_bool(name="verbose", val=verbose) + if df_cat is None: + df_cat = aaanalysis.load_scales(name=ut.COL_SCALE_ID) + self.df_cat = df_cat + self._verbose = verbose + self._accept_gaps = accept_gaps + # Set consistent length of JMD_N, JMD_C, TMD flanking amino acids (TMD-E) + self.jmd_n_len = jmd_n_len + self.jmd_c_len = jmd_c_len + self.ext_len = ext_len + # Axes dict for plotting + self.ax_seq = None + + # Plotting methods + def profile(self, df_feat=None, y="category", val_col="mean_dif", val_type="count", normalize=False, + figsize=(7, 5), title=None, title_kws=None, + dict_color=None, edge_color="none", bar_width=0.75, + add_jmd_tmd=True, tmd_len=20, start=1, + jmd_n_seq=None, tmd_seq=None, jmd_c_seq=None, + tmd_color="mediumspringgreen", jmd_color="blue", tmd_seq_color="black", jmd_seq_color="white", + seq_size=None, tmd_jmd_fontsize=None, + xtick_size=11.0, xtick_width=2.0, xtick_length=5.0, xticks_pos=False, + ytick_size=None, ytick_width=2.0, ytick_length=5.0, ylim=None, + highlight_tmd_area=True, highlight_alpha=0.15, + grid=False, grid_axis="both", + add_legend_cat=True, legend_kws=None, + shap_plot=False, + **kwargs): + """ + Plot feature profile for given features from 'df_feat'. + + Parameters + ---------- + df_feat : class:`pandas.DataFrame`, optional, default=None + Dataframe containing the features to be plotted. If None, default features from the instance will be used. + y : str, default='category' + Column name in df_feat which contains the categories for grouping. + val_col : str, default='mean_dif' + Column name in df_feat which contains the values to be plotted. + val_type : str, default='count' + Type of value. Available options are specified by the `check_value_type` function. + normalize : bool, default=False + If True, the feature values will be normalized. + figsize : tuple, default=(7, 5) + Size of the plot. + title : str, optional + Title of the plot. + title_kws : dict, optional + Keyword arguments to customize the title appearance. + dict_color : dict, optional + Dictionary mapping categories to colors. + edge_color : str, default='none' + Color of the edges of the bars. + bar_width : float, default=0.75 + Width of the bars. + add_jmd_tmd : bool, default=True + If True, adds JMD and TMD lines/annotations to the plot. + tmd_len : int, default=20 + Length of the TMD. + start : int, default=1 + Start position. + jmd_n_seq : str, optional + JMD N-terminal sequence. + tmd_seq : str, optional + TMD sequence. + jmd_c_seq : str, optional + JMD C-terminal sequence. + tmd_color : str, default='mediumspringgreen' + Color for TMD. + jmd_color : str, default='blue' + Color for JMD. + tmd_seq_color : str, default='black' + Color for TMD sequence. + jmd_seq_color : str, default='white' + Color for JMD sequence. + seq_size : float, optional + Font size for sequence annotations. + tmd_jmd_fontsize : float, optional + Font size for TMD and JMD annotations. + xtick_size : float, default=11.0 + Size for x-tick labels. + xtick_width : float, default=2.0 + Width of the x-ticks. + xtick_length : float, default=5.0 + Length of the x-ticks. + xticks_pos : bool, default=False + If True, x-tick positions are adjusted based on given sequences. + ytick_size : float, optional + Size for y-tick labels. + ytick_width : float, default=2.0 + Width of the y-ticks. + ytick_length : float, default=5.0 + Length of the y-ticks. + ylim : tuple, optional + Y-axis limits. + highlight_tmd_area : bool, default=True + If True, highlights the TMD area on the plot. + highlight_alpha : float, default=0.15 + Alpha value for TMD area highlighting. + grid : bool, default=False + If True, a grid is added to the plot. + grid_axis : str, default='both' + Axis on which the grid is drawn. Options: 'both', 'x', 'y'. + add_legend_cat : bool, default=True + If True, a legend is added for categories. + legend_kws : dict, optional + Keyword arguments for the legend. + shap_plot : bool, default=False + If True, SHAP (SHapley Additive exPlanations) plot is generated. + **kwargs : dict + Other keyword arguments passed to internal functions or plotting libraries. + + Returns + ------- + ax : matplotlib.axes.Axes + The axes object containing the plot. + + """ + # Group arguments + args_seq = dict(jmd_n_seq=jmd_n_seq, tmd_seq=tmd_seq, jmd_c_seq=jmd_c_seq,) + args_size = check_args_size(seq_size=seq_size, tmd_jmd_fontsize=tmd_jmd_fontsize) + args_len = ut.check_args_len(tmd_len=tmd_len, jmd_n_len=self.jmd_n_len, jmd_c_len=self.jmd_c_len, **args_seq) + args_xtick = check_args_xtick(xtick_size=xtick_size, xtick_width=xtick_width, xtick_length=xtick_length) + args_part_color = check_part_color(tmd_color=tmd_color, jmd_color=jmd_color) + args_seq_color = check_seq_color(tmd_seq_color=tmd_seq_color, jmd_seq_color=jmd_seq_color) + + # Checking input + # Args checked by Matplotlib: title, legend_kws + # Args checked by internal plotting functions: ylim + ut.check_non_negative_number(name="bar_width", val=bar_width, min_val=0, just_int=False) + ut.check_non_negative_number(name="start", val=start, min_val=0) + ut.check_non_negative_number(name="tmd_area_alpha", val=highlight_alpha, min_val=0, max_val=1, just_int=False) + ut.check_bool(name="add_jmd_tmd", val=add_jmd_tmd) + ut.check_bool(name="highlight_tmd_area", val=highlight_tmd_area) + ut.check_bool(name="grid", val=grid) + ut.check_bool(name="shap_plot", val=shap_plot) + ut.check_bool(name="add_legend_cat", val=add_legend_cat) + ut.check_color(name="edge_color", val=edge_color, accept_none=True) + ut.check_dict(name="legend_kws", val=legend_kws, accept_none=True) + + ut.check_col_in_df(df=df_feat, name_df="df_feat", col=val_col, col_type=[float, int]) + ut.check_y_categorical(df=df_feat, y=y) + df_feat = ut.check_df_feat(df_feat=df_feat) + check_value_type(val_type=val_type, count_in=True) + check_args_ytick(ytick_size=ytick_size, ytick_width=ytick_width, ytick_length=ytick_length) + check_figsize(figsize=figsize) + dict_color = check_dict_color(dict_color=dict_color, df_cat=self.df_cat) + check_grid_axis(grid_axis=grid_axis) + # Get df positions + df_feat = self.add_positions(df_feat=df_feat, tmd_len=args_len["tmd_len"], start=start) + df_pos = _get_df_pos(df_feat=df_feat, df_cat=self.df_cat, y=y, val_col=val_col, + value_type=val_type, normalize=normalize, start=start, **args_len) + # Plotting + cpp_plot = CPPPlots(**args_len, start=start) + try: + ax = cpp_plot.profile(df_pos=df_pos, figsize=figsize, ylim=ylim, + dict_color=dict_color, edge_color=edge_color, bar_width=bar_width, + add_legend=add_legend_cat, legend_kws=legend_kws, shap_plot=shap_plot, + **args_xtick, **kwargs) + except AttributeError as e: + error_message = check_parameters(func=self.profile, name_called_func="pd.DataFrame.plot", e=e) + raise AttributeError(error_message) + cpp_plot.set_title(title=title, title_kws=title_kws) + + # Autosize tmd sequence & annotation + opt_size = cpp_plot.optimize_label_size(ax=ax, df_pos=df_pos, label_term=False) + # Set default ylabel + ylabel = "Feature impact" if shap_plot else f"Feature count (-/+ {val_col})" + ax.set_ylabel(ylabel, size=opt_size) + # Adjust y ticks + ytick_size = opt_size if ytick_size is None else ytick_size + plt.yticks(size=ytick_size) + plt.tick_params(axis="y", color="black", width=ytick_width, length=ytick_length, bottom=False) + sns.despine(top=True, right=True) + # Add grid + if grid: + ax.set_axisbelow(True) # Grid behind datasets + ax.grid(which="major", axis=grid_axis, linestyle="-") + # Add tmd area + if highlight_tmd_area: + cpp_plot.highlight_tmd_area(ax=ax, x_shift=-0.5, tmd_color=tmd_color, alpha=highlight_alpha) + # Add tmd_jmd sequence if sequence is given + if type(tmd_seq) == str: + ax = cpp_plot.add_tmd_jmd_seq(ax=ax, **args_seq, **args_size, **args_part_color, **args_seq_color, + xticks_pos=xticks_pos, heatmap=False, x_shift=0, + xtick_size=xtick_size) # Add tmd_jmd bar + self.ax_seq = ax + elif add_jmd_tmd: + size = opt_size if tmd_jmd_fontsize is None else tmd_jmd_fontsize + cpp_plot.add_tmd_jmd_bar(ax=ax, x_shift=-0.5, **args_part_color, add_white_bar=False) + cpp_plot.add_tmd_jmd_xticks(ax=ax, x_shift=0, **args_xtick) + cpp_plot.add_tmd_jmd_text(ax=ax, x_shift=-0.5, tmd_jmd_fontsize=size) + + # Set current axis to main axis object depending on tmd sequence given or not + plt.yticks(size=ytick_size) + plt.tick_params(axis="y", color="black", width=ytick_width, length=ytick_length, bottom=False) + plt.sca(plt.gcf().axes[0]) + ax = plt.gca() + return ax + + def heatmap(self, df_feat=None, y="subcategory", val_col="mean_dif", val_type="mean", normalize=False, + figsize=(8, 5), title=None, title_kws=None, + vmin=None, vmax=None, grid_on=True, + cmap="RdBu_r", cmap_n_colors=None, dict_color=None, cbar_kws=None, facecolor_dark=False, + add_jmd_tmd=True, tmd_len=20, start=1, + jmd_n_seq=None, tmd_seq=None, jmd_c_seq=None, + tmd_color="mediumspringgreen", jmd_color="blue", tmd_seq_color="black", jmd_seq_color="white", + seq_size=None, tmd_jmd_fontsize=None, + xticks_pos=False, xtick_size=11.0, xtick_width=2.0, xtick_length=5.0, ytick_size=None, + add_legend_cat=True, legend_kws=None, + add_importance_map=False, cbar_pct=False, **kwargs): + """ + Plot a featuremap of the selected value column with scale information (y-axis) versus sequence position (x-axis). + + This is a wrapper function for :func:`seaborn.heatmap`, designed to highlight differences between two sets + of sequences at the positional level (e.g., amino acid level for protein sequences). + + Parameters + ---------- + df_feat : :class:`~pandas.DataFrame`, shape (n_feature, n_feature_information) + DataFrame containing unique identifiers, scale information, statistics, and positions for each feature. + y : {'category', 'subcategory', 'scale_name'}, str, default = 'subcategory' + Name of the column in the feature DataFrame representing scale information (shown on the y-axis). + val_col : {'mean_dif', 'feat_impact', 'abs_auc', 'std_test', ...}, str, default = 'mean_dif' + Name of the column in the feature DataFrame containing numerical values to display. + val_type : {'mean', 'sum', 'std'}, str, default = 'mean' + Method to aggregate numerical values from 'val_col'. + normalize : {True, False, 'positions', 'positions_only'}, bool/str, default = False + Specifies normalization for numerical values in 'val_col': + - False: Set value at all positions of a feature without further normalization. + + - True: Set value at all positions of a feature and normalize across all features. + + - 'positions': Value/number of positions set at each position of a feature and normalized across features. + Recommended when aiming to emphasize features with fewer positions using 'val_col'='feat_impact' and 'value_type'='mean'. + + figsize : tuple(float, float), default = (10,7) + Width and height of the figure in inches passed to :func:`matplotlib.pyplot.figure`. + title : str, optional + Title of figure used by :func:`matplotlib.pyplot.title`. + title_kws : dict, optional + Keyword arguments passed to :func:`matplotlib.pyplot.title`. + vmin, vmax : float, optional + Values to anchor the colormap, otherwise, inferred from data and other keyword arguments. + cmap : matplotlib colormap name or object, or list of colors, default = 'seismic' + Name of color map assigning data values to color space. If 'SHAP', colors from + `SHAP `_ will be used (recommended for feature impact). + cmap_n_colors : int, optional + Number of discrete steps in diverging or sequential color map. + dict_color : dict, optional + Map of colors for scale categories classifying scales shown on y-axis. + cbar_kws : dict of key, value mappings, optional + Keyword arguments for :meth:`matplotlib.figure.Figure.colorbar`. + add_jmd_tmd : bool, default = True + Whether to add colored bar under heatmap indicating sequence parts (JMD-N, TMD, JMD-C). + tmd_len : int, >0 + Length of TMD to be depiceted. + start : int, >=0 + Position label of first amino acid position (starting at N-terminus). + tmd_seq : str, optional + Sequence of TMD. 'tmd_len' is set to length of TMD if sequence for TMD, JMD-N and JMD-C are given. + Recommended if feature impact or mean difference should be depicted for one sample. + jmd_n_seq : str, optional + Sequence of JMD_N. 'jmd_n_len' is set to length of JMD_N if sequence for TMD, JMD-N and JMD-C are given. + Recommended if feature impact or mean difference should be depicted for one sample. + jmd_c_seq : str, optional + Sequence of JMD_C. 'jmd_c_len' is set to length of JMD_C if sequence for TMD, JMD-N and JMD-C are given. + Recommended if feature impact or mean difference should be depicted for one sample. + tmd_color : str, default = 'mediumspringgreen' + Color of TMD bar. + jmd_color : str, default = 'blue' + Color of JMD-N and JMD-C bar. + tmd_seq_color : str, default = 'black' + Color of TMD sequence. + jmd_seq_color : str, default = 'white' + Color of JMD-N and JMD-C sequence. + seq_size : float, optional + Font size of all sequence parts in points. If None, optimized automatically. + tmd_jmd_fontsize : float, optional + Font size of 'TMD', 'JMD-N' and 'JMD-C' label in points. If None, optimized automatically. + xtick_size : float, default = 11.0 + Size of x ticks in points. Passed as 'size' argument to :meth:`matplotlib.axes.Axes.set_xticklabels`. + xtick_width : float, default = 2.0 + Widht of x ticks in points. Passed as 'width' argument to :meth:`matplotlib.axes.Axes.tick_params`. + xtick_length : float, default = 5.0, + Length of x ticks in points. Passed as 'length' argument to :meth:`matplotlib.axes.Axes.tick_params`. + ytick_size : float, optional + Size of scale information as y ticks in points. Passed to :meth:`matplotlib.axes.Axes.tick_params`. + If None, optimized automatically. + add_legend_cat : bool, default = True, + Whether to add legend for categories under plot and classification of scales at y-axis. + legend_kws : dict, optional + Keyword arguments passed to :meth:`matplotlib.axes.Axes.legend` + kwargs : other keyword arguments + All other keyword arguments passed to :meth:`matplotlib.axes.Axes.pcolormesh`. + + Returns + ------- + ax : matplotlib Axes + Axes object containing the heatmap. + + Warnings + -------- + - 'cmap_n_colors' is effective only if 'vmin' and 'vmax' align with the _data. + + - 'tmd_seq_color' and 'jmd_seq_color' are applicable only when 'tmd_seq', 'jmd_n_seq', and 'jmd_c_seq' are provided. + + See Also + -------- + seaborn.heatmap + Plotting heatmap using seaborn. + See `Seaborn documentation `_ for more details. + + Examples + -------- + + Plot CPP feature heatmap: + + .. plot:: + :context: close-figs + + >>> import matplotlib.pyplot as plt + >>> import aaanalysis as aa + >>> sf = aa.SequenceFeature() + >>> df_seq = aa.load_dataset(name='SEQ_DISULFIDE', min_len=100) + >>> labels = list(df_seq["label"]) + >>> df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10) + >>> #split_kws = sf.get_split_kws(n_split_min=1, n_split_max=3, split_types=["Segment", "PeriodicPattern"]) + >>> #df_scales = aa.load_scales(unclassified_in=False).sample(n=10, axis=1) + >>> #cpp = aa.CPP(df_parts=df_parts, split_kws=split_kws, df_scales=df_scales) + >>> #df_feat = cpp.run(labels=labels) + >>> #cpp.plot_heatmap(df_feat=df_feat) + >>> #plt.tight_layout() + + """ + # Group arguments + args_seq = dict(jmd_n_seq=jmd_n_seq, tmd_seq=tmd_seq, jmd_c_seq=jmd_c_seq) + args_size = check_args_size(seq_size=seq_size, tmd_jmd_fontsize=tmd_jmd_fontsize) + args_len = ut.check_args_len(tmd_len=tmd_len, jmd_n_len=self.jmd_n_len, jmd_c_len=self.jmd_c_len, **args_seq) + args_xtick = check_args_xtick(xtick_size=xtick_size, xtick_width=xtick_width, xtick_length=xtick_length) + args_part_color = check_part_color(tmd_color=tmd_color, jmd_color=jmd_color) + args_seq_color = check_seq_color(tmd_seq_color=tmd_seq_color, jmd_seq_color=jmd_seq_color) + + # Checking input + # Args checked by Matplotlib: title, cmap, cbar_kws, legend_kws + ut.check_non_negative_number(name="start", val=start, min_val=0) + ut.check_non_negative_number(name="ytick_size", val=ytick_size, accept_none=True, just_int=False, min_val=1) + ut.check_non_negative_number(name="cmap_n_colors", val=cmap_n_colors, min_val=1, accept_none=True) + ut.check_bool(name="add_jmd_tmd", val=add_jmd_tmd) + ut.check_bool(name="add_legend_cat", val=add_legend_cat) + ut.check_dict(name="legend_kws", val=legend_kws, accept_none=True) + ut.check_dict(name="cbar_kws", val=cbar_kws, accept_none=True) + ut.check_col_in_df(df=df_feat, name_df="df_feat", col=val_col, col_type=[float, int]) + ut.check_y_categorical(df=df_feat, y=y) + df_feat = ut.check_df_feat(df_feat=df_feat, df_cat=self.df_cat) + check_value_type(val_type=val_type, count_in=False) + check_vmin_vmax(vmin=vmin, vmax=vmax) + check_figsize(figsize=figsize) + dict_color = check_dict_color(dict_color=dict_color, df_cat=self.df_cat) + + # Get df positions + df_feat = self.add_positions(df_feat=df_feat, tmd_len=args_len["tmd_len"], start=start) + df_pos = _get_df_pos(df_feat=df_feat, df_cat=self.df_cat, y=y, val_col=val_col, + value_type=val_type, normalize=normalize, start=start, **args_len) + # Plotting + cpp_plot = CPPPlots(**args_len, start=start) + cpp_plot.set_figsize(figsize=figsize) # figsize is not used as argument in seaborn (but in pandas) + try: + linecolor = "gray" if facecolor_dark else "black" + if "linecolor" in kwargs: + linecolor = kwargs["linecolor"] + else: + kwargs["linecolor"] = linecolor + ax = cpp_plot.heatmap(df_pos=df_pos, vmin=vmin, vmax=vmax, grid_on=grid_on, + cmap=cmap, cmap_n_colors=cmap_n_colors, cbar_kws=cbar_kws, + x_shift=0.5, ytick_size=ytick_size, facecolor_dark=facecolor_dark, + cbar_pct=cbar_pct, **args_xtick, **kwargs) + ax.axvline(self.jmd_n_len, color=linecolor, linestyle="-", linewidth=1.5) + ax.axvline(x=self.jmd_n_len + args_len["tmd_len"], color=linecolor, linestyle="-", linewidth=1.5) + + except AttributeError as e: + error_message = check_parameters(func=self.heatmap, name_called_func="sns.heatmap", e=e) + raise AttributeError(error_message) + cpp_plot.set_title(title=title, title_kws=title_kws) + # Autosize tmd sequence & annotation + opt_size = cpp_plot.optimize_label_size(ax=ax, df_pos=df_pos) + # Add importance map + if add_importance_map: + _add_importance_map(ax=ax, df_feat=df_feat, df_cat=self.df_cat, + start=start, args_len=args_len, y=y) + # Add scale classification + if add_legend_cat: + ax = cpp_plot.add_legend_cat(ax=ax, df_pos=df_pos, df_cat=self.df_cat, y=y, dict_color=dict_color, + legend_kws=legend_kws) + # Add tmd_jmd sequence if sequence is given + if isinstance(tmd_seq, str): + ax = cpp_plot.add_tmd_jmd_seq(ax=ax, **args_seq, **args_size, **args_part_color, **args_seq_color, + xticks_pos=xticks_pos, + x_shift=0.5, xtick_size=xtick_size) + self.ax_seq = ax + # Add tmd_jmd bar + elif add_jmd_tmd: + size = opt_size if tmd_jmd_fontsize is None else tmd_jmd_fontsize + cpp_plot.add_tmd_jmd_bar(ax=ax, **args_part_color) + cpp_plot.add_tmd_jmd_xticks(ax=ax, x_shift=0.5, **args_xtick) + cpp_plot.add_tmd_jmd_text(ax=ax, x_shift=0, tmd_jmd_fontsize=size) + # Set current axis to main axis object depending on tmd sequence given or not + plt.sca(plt.gcf().axes[0]) + ax = plt.gca() + return ax + + def update_seq_size(self): + """""" + # TODO legend changes slightly if sequnece length altered (e.g. PTPRM_MOUSE vs A4_HUMAN) + # TODO look for more extreme example and text + f = lambda l: l.get_window_extent(ax.figure.canvas.get_renderer()) + ax = self.ax_seq + labels = ax.xaxis.get_ticklabels(which="both") + tick_positions = [f(l).x0 for l in labels] + sorted_tick_positions, sorted_labels = zip(*sorted(zip(tick_positions, labels), key=lambda t: t[0])) + # Adjust font size to prevent overlap + seq_size = get_optimal_fontsize(ax, sorted_labels) + for l in sorted_labels: + l.set_fontsize(seq_size) diff --git a/aaanalysis/cpp/feature.py b/aaanalysis/cpp/feature.py index 470e91bf..d73ec207 100644 --- a/aaanalysis/cpp/feature.py +++ b/aaanalysis/cpp/feature.py @@ -9,17 +9,32 @@ from itertools import repeat import multiprocessing as mp import warnings -from collections import OrderedDict from aaanalysis.cpp._feature_pos import SequenceFeaturePositions from aaanalysis.cpp._split import Split, SplitRange from aaanalysis.cpp._part import Parts -import aaanalysis.cpp._utils as _ut -import aaanalysis._utils as ut + import aaanalysis as aa +import aaanalysis.utils as ut + +# TODO simplify and check # I Helper Functions +# Check for add methods +def check_ref_group(ref_group=0, labels=None): + """Check if ref group class lable""" + if ref_group not in labels: + raise ValueError(f"'ref_group' ({ref_group}) not class label: {set(labels)}.") + + +def check_sample_in_df_seq(sample_name=None, df_seq=None): + """Check if sample name in df_seq""" + list_names = list(df_seq[ut.COL_NAME]) + if sample_name not in list_names: + error = f"'sample_name' ('{sample_name}') not in '{ut.COL_NAME}' of 'df_seq'." \ + f"\nValid names are: {list_names}" + raise ValueError(error) # Check load functions @@ -29,21 +44,12 @@ def check_clustered(complete=False, clust_th=0.7): raise ValueError("'clust_th' should be 0.3, 0.5, 0.7, or 0.9") -# Check functions get_df_parts -def check_jmd_len(jmd_n_len=None, jmd_c_len=None, accept_none=True): - """Check jmd_n_len and jmd_c_len""" - if accept_none and jmd_n_len is None and jmd_c_len is None: - return None # skip check - for name, val in zip(["jmd_n_len", "jmd_c_len"], [jmd_n_len, jmd_c_len]): - ut.check_non_negative_number(name=name, val=val) - - # Check functions get_split_kws def check_split_types(split_types=None): """Check split_type""" if type(split_types) is str: split_types = [split_types] - list_split_types = [_ut.STR_SEGMENT, _ut.STR_PATTERN, _ut.STR_PERIODIC_PATTERN] + list_split_types = [ut.STR_SEGMENT, ut.STR_PATTERN, ut.STR_PERIODIC_PATTERN] if split_types is None: split_types = list_split_types if not set(list_split_types).issuperset(set(split_types)): @@ -76,7 +82,7 @@ def _get_missing_elements(df_parts=None, scale_elements=None, accept_gaps=False) """Get missing elements""" seq_elements = set("".join(df_parts.values.flatten())) if accept_gaps: - missing_elements = [x for x in seq_elements if x not in scale_elements and x != _ut.STR_AA_GAP] + missing_elements = [x for x in seq_elements if x not in scale_elements and x != ut.STR_AA_GAP] else: missing_elements = [x for x in seq_elements if x not in scale_elements] return missing_elements @@ -133,12 +139,12 @@ def _feature_value(df_parts=None, split=None, dict_scale=None, accept_gaps=False """Helper function to create feature values for feature matrix""" sp = Split() # Get vectorized split function - split_type, split_kwargs = _ut.check_split(split=split) + split_type, split_kwargs = ut.check_split(split=split) f_split = getattr(sp, split_type.lower()) # Vectorize split function using anonymous function vf_split = np.vectorize(lambda x: f_split(seq=x, **split_kwargs)) # Get vectorized scale function - vf_scale = _ut.get_vf_scale(dict_scale=dict_scale, accept_gaps=accept_gaps) + vf_scale = ut.get_vf_scale(dict_scale=dict_scale, accept_gaps=accept_gaps) # Combine part split and scale to get feature values part_split = vf_split(df_parts) feature_value = np.round(vf_scale(part_split), 5) # feature values @@ -188,7 +194,7 @@ class SequenceFeature: 'tmd_jmd', 'jmd_n_tmd_n', 'tmd_c_jmd_c', 'ext_n_tmd_n', 'tmd_c_ext_c'] """ - # Basic data structures for features + # Basic datastructures for features @staticmethod def get_df_parts(df_seq=None, list_parts=None, jmd_n_len=None, jmd_c_len=None, ext_len=4, all_parts=False): """Create DataFrane with sequence parts. @@ -234,22 +240,21 @@ def get_df_parts(df_seq=None, list_parts=None, jmd_n_len=None, jmd_c_len=None, e >>> df_seq = aa.load_dataset(name='GSEC_SUB_SEQ') >>> df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10) """ - check_jmd_len(jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len) - ut.check_non_negative_number(name="ext_len", val=ext_len) - df_seq = _ut.check_df_seq(df_seq=df_seq, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len) - list_parts = _ut.check_list_parts(list_parts=list_parts, all_parts=all_parts) - seq_info_in_df = set(_ut.COLS_SEQ_INFO).issubset(set(df_seq)) + ut.check_args_len(jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, ext_len=ext_len, accept_tmd_none=True) + df_seq = ut.check_df_seq(df_seq=df_seq, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len) + list_parts = ut.check_list_parts(list_parts=list_parts, all_parts=all_parts) + seq_info_in_df = set(ut.COLS_SEQ_INFO).issubset(set(df_seq)) pa = Parts() dict_parts = {} for i, row in df_seq.iterrows(): - entry = row[_ut.COL_ENTRY] + entry = row[ut.COL_ENTRY] if jmd_c_len is not None and jmd_n_len is not None and seq_info_in_df: - seq, start, stop = row[_ut.COLS_SEQ_INFO].values + seq, start, stop = row[ut.COLS_SEQ_INFO].values parts = pa.create_parts(seq=seq, tmd_start=start, tmd_stop=stop, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len) jmd_n, tmd, jmd_c = parts.jmd_n, parts.tmd, parts.jmd_c else: - jmd_n, tmd, jmd_c = row[_ut.COLS_PARTS].values + jmd_n, tmd, jmd_c = row[ut.COLS_PARTS].values dict_part_seq = pa.get_dict_part_seq(tmd=tmd, jmd_n=jmd_n, jmd_c=jmd_c, ext_len=ext_len) dict_part_seq = {part: dict_part_seq[part] for part in list_parts} dict_parts[entry] = dict_part_seq @@ -313,15 +318,15 @@ def get_split_kws(n_split_min=1, n_split_max=15, steps_pattern=None, n_min=2, n_ steps_pattern = [3, 4] if steps_periodicpattern is None: steps_periodicpattern = [3, 4] # Differences between interacting amino acids in helix (without gaps) - split_kws = {_ut.STR_SEGMENT: dict(n_split_min=n_split_min, n_split_max=n_split_max), - _ut.STR_PATTERN: dict(steps=steps_pattern, n_min=n_min, n_max=n_max, len_max=len_max), - _ut.STR_PERIODIC_PATTERN: dict(steps=steps_periodicpattern)} + split_kws = {ut.STR_SEGMENT: dict(n_split_min=n_split_min, n_split_max=n_split_max), + ut.STR_PATTERN: dict(steps=steps_pattern, n_min=n_min, n_max=n_max, len_max=len_max), + ut.STR_PERIODIC_PATTERN: dict(steps=steps_periodicpattern)} split_kws = {x: split_kws[x] for x in split_types} - _ut.check_split_kws(split_kws=split_kws) + ut.check_split_kws(split_kws=split_kws) return split_kws - def features(self, list_parts=None, split_kws=None, df_scales=None, all_parts=False): - """Create list of feature ids for given Parts, Splits, and Scales + def get_features(self, list_parts=None, split_kws=None, df_scales=None, all_parts=False): + """Create list of all feature ids for given Parts, Splits, and Scales Parameters ---------- @@ -340,9 +345,9 @@ def features(self, list_parts=None, split_kws=None, df_scales=None, all_parts=Fa Ids of all possible features for combination of Parts, Splits, and Scales with form: PART-SPLIT-SCALE """ - list_parts = _ut.check_list_parts(list_parts=list_parts, all_parts=all_parts) - _ut.check_split_kws(split_kws=split_kws) - _ut.check_df_scales(df_scales=df_scales, accept_none=True) + list_parts = ut.check_list_parts(list_parts=list_parts, all_parts=all_parts) + ut.check_split_kws(split_kws=split_kws) + ut.check_df_scales(df_scales=df_scales, accept_none=True) if df_scales is None: df_scales = aa.load_scales() if split_kws is None: @@ -359,9 +364,72 @@ def features(self, list_parts=None, split_kws=None, df_scales=None, all_parts=Fa for sc in scales]) return features + @staticmethod + def feat_matrix(features=None, df_parts=None, df_scales=None, accept_gaps=False, + n_jobs=None, verbose=False, return_labels=False): + """Create feature matrix for given feature ids and sequence parts. + + Parameters + ---------- + features: str, list of strings, pd.Series + Ids of features for which matrix of feature values should be created. + df_parts: :class:`pandas.DataFrame` + DataFrame with sequence parts. + df_scales: :class:`pandas.DataFrame`, optional + DataFrame with default amino acid scales. + accept_gaps: bool, default = False + Whether to accept missing values by enabling omitting for computations (if True). + n_jobs: int, default = None, + The number of jobs to run in parallel. If None, it will be set to the maximum. + verbose: bool, default = True + Whether to print size of to be created feature matrix (if True) or not otherwise. + return_labels: bool, default = False + Whether to return sample labels in addition to feature matrix. + + Returns + ------- + feat_matrix: array-like or sparse matrix, shape (n_samples, n_features) + Feature values of samples. + """ + ut.check_non_negative_number(name="j_jobs", val=n_jobs, accept_none=True, min_val=1, just_int=True) + if df_scales is None: + df_scales = aa.load_scales() + ut.check_df_scales(df_scales=df_scales) + ut.check_df_parts(df_parts=df_parts) + features = ut.check_features(features=features, parts=df_parts, df_scales=df_scales) + check_df_scales_matches_df_parts(df_scales=df_scales, df_parts=df_parts, accept_gaps=accept_gaps) + if verbose: + n_feat = len(features) + n_samples = len(df_parts) + n_vals = n_feat * n_samples + print(f"Feature matrix for {n_feat} features and {n_samples} samples will be created") + if n_vals > 1000*1000: + warning = f"Feature matrix with n={n_vals}>=10^6 values will be created, which will take some time.\n" \ + "It is recommended to create a feature matrix for a pre-selected number features " \ + "so that 10^6 values are not exceeded." + warnings.warn(warning) + # Create feature matrix using parallel processing + dict_all_scales = ut.get_dict_all_scales(df_scales=df_scales) + n_processes = min([os.cpu_count(), len(features)]) if n_jobs is None else n_jobs + feat_chunks = np.array_split(features, n_processes) + args = zip(feat_chunks, repeat(dict_all_scales), repeat(df_parts), repeat(accept_gaps)) + with mp.get_context("spawn").Pool(processes=n_processes) as pool: + result = pool.starmap(_feature_matrix, args) + feat_matrix = np.concatenate(result, axis=1) + if return_labels: + if verbose: + print("Tuple of (feat_matrix, labels) will be returned") + labels = df_parts.index.tolist() + return feat_matrix, labels # X, y + else: + if verbose: + print("Only feat_matrix (without labels) will be returned") + return feat_matrix # X + + # Additional feature related methods @staticmethod def feat_names(features=None, df_cat=None, tmd_len=20, jmd_c_len=10, jmd_n_len=10, ext_len=0, start=1): - """Convert feature ids (PART-SPLIT-SCALE) into feature name (scale name [positions]). + """Convert feature ids (PART-SPLIT-SCALE) into feature names (scale name [positions]). Parameters ---------- @@ -394,31 +462,33 @@ def feat_names(features=None, df_cat=None, tmd_len=20, jmd_c_len=10, jmd_n_len=1 - PeriodicPattern: [first..step1/step2..last] """ # Check input (length checked in SequenceFeaturePositions) - features = _ut.check_features(features=features) - _ut.check_df_cat(df_cat=df_cat) + features = ut.check_features(features=features) + ut.check_df_cat(df_cat=df_cat) if df_cat is None: - df_cat = aa.load_scales(name=_ut.STR_SCALE_CAT) + df_cat = aa.load_scales(name=ut.STR_SCALE_CAT) # Get feature names sfp = SequenceFeaturePositions() dict_part_pos = sfp.get_dict_part_pos(tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, ext_len=ext_len, start=start) list_positions = sfp.get_positions(dict_part_pos=dict_part_pos, features=features) - dict_scales = dict(zip(df_cat[_ut.COL_SCALE_ID], df_cat[_ut.COL_SCALE_NAME])) + dict_scales = dict(zip(df_cat[ut.COL_SCALE_ID], df_cat[ut.COL_SCALE_NAME])) feat_names = [] for feat_id, pos in zip(features, list_positions): part, split, scale = feat_id.split("-") split_type = split.split("(")[0] - if split_type == _ut.STR_SEGMENT and len(pos.split(",")) > 2: + if split_type == ut.STR_SEGMENT and len(pos.split(",")) > 2: pos = pos.split(",")[0] + "..." + pos.split(",")[-1] - if split_type == _ut.STR_PERIODIC_PATTERN: + if split_type == ut.STR_PERIODIC_PATTERN: step = split.split("+")[1].split(",")[0] pos = pos.split(",")[0] + ".." + step + ".." + pos.split(",")[-1] feat_names.append(f"{dict_scales[scale]} [{pos}]") return feat_names # Feature: Part + Split + Scale + # For what used? Not redudant with feature matrix? + # TODO Add functions (modify df_feat) @staticmethod - def feat_value(df_parts=None, split=None, dict_scale=None, accept_gaps=False): + def add_feat_value(df_parts=None, split=None, dict_scale=None, accept_gaps=False): """Create feature values for all sequence parts by combining Part, Split, and Scale. Parameters @@ -457,8 +527,8 @@ def feat_value(df_parts=None, split=None, dict_scale=None, accept_gaps=False): All numbers should be non-negative integers. Examples for each split type are as follows: 'Segment(5,7)', 'Pattern(C,1,2)', 'PeriodicPattern(N,i+2/3,1)'. """ - _ut.check_df_parts(df_parts=df_parts) - _ut.check_split(split=split) + ut.check_df_parts(df_parts=df_parts) + ut.check_split(split=split) check_dict_scale(dict_scale=dict_scale, df_parts=df_parts, accept_gaps=accept_gaps) feature_value = _feature_value(df_parts=df_parts, split=split, @@ -467,63 +537,88 @@ def feat_value(df_parts=None, split=None, dict_scale=None, accept_gaps=False): return feature_value @staticmethod - def feat_matrix(df_parts=None, features=None, df_scales=None, accept_gaps=False, - n_jobs=None, verbose=False, return_labels=False): - """Create feature matrix for given feature names and sequence parts. + def add_dif(df_feat=None, df_seq=None, labels=None, sample_name=str, ref_group=0, + accept_gaps=False, jmd_n_len=10, jmd_c_len=10, df_parts=None, df_scales=None): + """ + Add feature value difference between sample and reference group to DataFrame. Parameters ---------- - df_parts: :class:`pandas.DataFrame` - DataFrame with sequence parts. - features: str, list of strings, pd.Series - Ids of features for which matrix of feature values should be created. - df_scales: :class:`pandas.DataFrame`, optional - DataFrame with default amino acid scales. + df_feat: :class:`pandas.DataFrame` + Feature DataFrame (CPP output) to add sample difference. + df_seq: :class:`pandas.DataFrame` + DataFrame with sequences and sample names, in which the given sample name is included. + labels: array-like, shape (n_samples) + Class labels for samples in sequence DataFrame. + sample_name: str + Name of sample for which the feature value difference to a given reference group should be computed. + ref_group: int, default = 0 + Class label of reference group. accept_gaps: bool, default = False Whether to accept missing values by enabling omitting for computations (if True). - n_jobs: int, default = None, - The number of jobs to run in parallel. If None, it will be set to the maximum. - verbose: bool, default = True - Whether to print size of to be created feature matrix (if True) or not otherwise. - return_labels: bool, default = False - Whether to return sample labels in addition to feature matrix. Returns ------- - feat_matrix: array-like or sparse matrix, shape (n_samples, n_features) - Feature values of samples. + df_feat: :class:`pandas.DataFrame` + Feature DataFrame including feature value difference. """ - ut.check_non_negative_number(name="j_jobs", val=n_jobs, accept_none=True, min_val=1, just_int=True) - if df_scales is None: - df_scales = aa.load_scales() - _ut.check_df_scales(df_scales=df_scales) - _ut.check_df_parts(df_parts=df_parts) - features = _ut.check_features(features=features, parts=df_parts, df_scales=df_scales) - check_df_scales_matches_df_parts(df_scales=df_scales, df_parts=df_parts, accept_gaps=accept_gaps) - if verbose: - n_feat = len(features) - n_samples = len(df_parts) - n_vals = n_feat * n_samples - print(f"Feature matrix for {n_feat} features and {n_samples} samples will be created") - if n_vals > 1000*1000: - warning = f"Feature matrix with n={n_vals}>=10^6 values will be created, which will take some time.\n" \ - "It is recommended to create a feature matrix for a pre-selected number features " \ - "so that 10^6 values are not exceeded." - warnings.warn(warning) - # Create feature matrix using parallel processing - dict_all_scales = _ut.get_dict_all_scales(df_scales=df_scales) - n_processes = min([os.cpu_count(), len(features)]) if n_jobs is None else n_jobs - feat_chunks = np.array_split(features, n_processes) - args = zip(feat_chunks, repeat(dict_all_scales), repeat(df_parts), repeat(accept_gaps)) - with mp.get_context("spawn").Pool(processes=n_processes) as pool: - result = pool.starmap(_feature_matrix, args) - feat_matrix = np.concatenate(result, axis=1) - if return_labels: - if verbose: - print("Tuple of (feat_matrix, labels) will be returned") - labels = df_parts.index.tolist() - return feat_matrix, labels # X, y - else: - if verbose: - print("Only feat_matrix (without labels) will be returned") - return feat_matrix # X + # Check input + df_feat = ut.check_df_feat(df_feat=df_feat) + ut.check_df_seq(df_seq=df_seq, jmd_c_len=jmd_c_len, jmd_n_len=jmd_n_len) + ut.check_labels(labels=labels, df=df_seq, name_df="df_seq") + check_ref_group(ref_group=ref_group, labels=labels) + check_sample_in_df_seq(sample_name=sample_name, df_seq=df_seq) + # Add sample difference to reference group + sf = SequenceFeature() + X = sf.feat_matrix(features=list(df_feat["feature"]), + df_parts=df_parts, + df_scales=df_scales, + accept_gaps=accept_gaps) + mask = [True if x == ref_group else False for x in labels] + i = list(df_seq[ut.COL_NAME]).index(sample_name) + df_feat[f"dif_{sample_name}"] = X[i] - X[mask].mean() + return df_feat + + @staticmethod + def add_position(df_feat=None, features=None, start=1, tmd_len=20, jmd_n_len=10, jmd_c_len=10, ext_len=4, + part_split=False): + """Create list with positions for given feature names + + Parameters + ---------- + df_feat: :class:`pandas.DataFrame` + Feature DataFrame (CPP output) to add sample difference. + features: str, list of strings, pd.Series + Ids of features for which feature names should be created. + start: int, >=0, default = 1 + Position label of first amino acid position (starting at N-terminus). + tmd_len: int, >0, default = 20 + Length of TMD. + jmd_n_len : int, >=0, default = 10 + Length of JMD-N. + jmd_c_len : int, >=0, default = 10 + Length of JMD-C. + ext_len : int, >=0, default = 4 + Length of TMD-extending part (starting from C and N terminal part of TMD). + Conditions: ext_len < jmd_m_len and ext_len < jmd_c_len. + + Returns + ------- + feat_positions: list + list with positions for each feature in feat_names + + Notes + ----- + The length parameters define the total number of positions (jmd_n_len + tmd_len + jmd_c_len). + """ + # TODO add sequence, generalize check functions for tmd_len ... + features = ut.check_features(features=features) + ut.check_non_negative_number(name="tmd_len", val=tmd_len, just_int=True, min_val=1) + args = dict(jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, ext_len=ext_len, start=start) + for name in args: + ut.check_non_negative_number(name=name, val=args[name], just_int=True, min_val=0) + sfp = SequenceFeaturePositions() + dict_part_pos = sfp.get_dict_part_pos(tmd_len=tmd_len, **args) + feat_positions = sfp.get_positions(dict_part_pos=dict_part_pos, features=features) + return feat_positions + diff --git a/aaanalysis/data/benchmarks/INFO_benchmarks.xlsx b/aaanalysis/data/benchmarks/INFO_benchmarks.xlsx deleted file mode 100644 index a8c59f4f..00000000 Binary files a/aaanalysis/data/benchmarks/INFO_benchmarks.xlsx and /dev/null differ diff --git a/aaanalysis/data_loader/__pycache__/data_loader.cpython-39.pyc b/aaanalysis/data_loader/__pycache__/data_loader.cpython-39.pyc index 00eb0211..de348bb7 100644 Binary files a/aaanalysis/data_loader/__pycache__/data_loader.cpython-39.pyc and b/aaanalysis/data_loader/__pycache__/data_loader.cpython-39.pyc differ diff --git a/aaanalysis/data_loader/data_loader.py b/aaanalysis/data_loader/data_loader.py index 0d6abe89..97e5710f 100644 --- a/aaanalysis/data_loader/data_loader.py +++ b/aaanalysis/data_loader/data_loader.py @@ -6,7 +6,7 @@ import numpy as np import re -import aaanalysis._utils as ut +import aaanalysis.utils as ut # I Helper Functions @@ -44,8 +44,8 @@ def load_dataset(name="INFO", n=None, non_canonical_aa="remove", min_len=None, m Three types of benchmark datasets are provided: - Residue prediction: 6 datasets used to predict residue (amino acid) specific properties ('AA_CASPASE3', 'AA_FURIN', 'AA_LDR', 'AA_MMP2', 'AA_RNABIND', 'AA_SA') - - Domain prediction: 1 dataset used to predict domain specific properties (containing unlabeled data) - (DOM_SUBGSEC) + - Domain prediction: 1 dataset used to predict domain specific properties (_PU contains unlabeled _data) + (DOM_GSEC, DOM_GSEC_PU) - Sequence prediction: 6 datasets used to predict sequence specific properties ('SEQ_AMYLO', 'SEQ_CAPSID', 'SEQ_DISULFIDE', 'SEQ_LOCATION', 'SEQ_SOLUBLE', 'SEQ_TAIL') @@ -85,20 +85,25 @@ def load_dataset(name="INFO", n=None, non_canonical_aa="remove", min_len=None, m if name not in list_datasets: list_aa = [x for x in list_datasets if 'AA' in x] list_seq = [x for x in list_datasets if 'SEQ' in x] - raise ValueError(f"'name' ({name}) is not valid.\n Amino acid datasets: {list_aa}\n Sequence datasets: {list_seq}") + list_dom = [x for x in list_datasets if 'DOM' in x] + raise ValueError(f"'name' ({name}) is not valid." + f"\n Amino acid datasets: {list_aa}" + f"\n Sequence datasets: {list_seq}" + f"\n Domain datasets: {list_dom}") df = pd.read_csv(folder_in + name + ".tsv", sep="\t") - # Filter data + # Filter _data if min_len is not None: mask = [len(x) >= min_len for x in df[ut.COL_SEQ]] df = df[mask] if max_len is not None: mask = [len(x) <= max_len for x in df[ut.COL_SEQ]] df = df[mask] - if n is not None: - labels = set(df["label"]) - df = pd.concat([df[df["label"] == l].head(n) for l in labels]) # Adjust non-canonical amino acid (keep, remove, or replace by gap) df = _adjust_non_canonical_aa(df=df, non_canonical_aa=non_canonical_aa) + # Select balanced groups + if n is not None: + labels = set(df[ut.COL_LABEL]) + df = pd.concat([df[df[ut.COL_LABEL] == l].head(n) for l in labels]) return df @@ -141,7 +146,7 @@ def load_scales(name="scales", just_aaindex=False, unclassified_in=True): """ if name not in LIST_DATASETS: raise ValueError(f"'name' ({name}) is not valid. Choose one of following: {LIST_DATASETS}") - # Load data + # Load _data df_cat = pd.read_excel(ut.FOLDER_DATA + f"{ut.STR_SCALE_CAT}.xlsx") df_cat = _filter_scales(df_cat=df_cat, unclassified_in=unclassified_in, just_aaindex=just_aaindex) if name == ut.STR_SCALE_CAT: diff --git a/aaanalysis/dpulearn/__pycache__/dpulearn.cpython-39.pyc b/aaanalysis/dpulearn/__pycache__/dpulearn.cpython-39.pyc index beeaad49..6c884bfe 100644 Binary files a/aaanalysis/dpulearn/__pycache__/dpulearn.cpython-39.pyc and b/aaanalysis/dpulearn/__pycache__/dpulearn.cpython-39.pyc differ diff --git a/aaanalysis/dpulearn/dpulearn.py b/aaanalysis/dpulearn/dpulearn.py index 5c748034..496adb71 100644 --- a/aaanalysis/dpulearn/dpulearn.py +++ b/aaanalysis/dpulearn/dpulearn.py @@ -7,7 +7,7 @@ from sklearn.decomposition import PCA import math import warnings -import aaanalysis._utils as ut +import aaanalysis.utils as ut # Settings pd.set_option('expand_frame_repr', False) # Single line print for pd.Dataframe @@ -106,7 +106,7 @@ def _get_neg_via_distance(X=None, labels=None, metric="euclidean", n_neg=None, """ mask_pos = labels == label_pos mask_unl = labels != label_pos - # Compute the average distances to the positive data points + # Compute the average distances to the positive datapoints avg_dist = pairwise_distances(X[mask_pos], X, metric=metric).mean(axis=0) # Select negatives based on largest average distance to positives top_indices = np.argsort(avg_dist[mask_unl])[::-1][:n_neg] @@ -154,7 +154,7 @@ def _get_neg_via_pca(X=None, labels=None, n_components=0.8, n_neg=None, columns_pca = _columns_pca[0:len(list_n_neg)] df_seq[columns_pca] = pca.components_.T[:, 0:len(columns_pca)] - # Get mean of positive data for each component + # Get mean of positive datafor each component mask_pos = labels == label_pos mask_unl = labels != label_pos pc_means = df_seq[mask_pos][columns_pca].mean(axis=0) @@ -208,7 +208,7 @@ class dPULearn: Attributes ---------- labels_ : array-like, shape (n_samples,) - Labels of each data point. + Labels of each datapoint. Notes ----- @@ -266,7 +266,7 @@ def fit(self, X, labels=None, n_neg=0, label_pos=1, name_neg="REL_NEG", df_seq=N Examples -------- - Create small example data for dPUlearn containg positive ('pos', 1) and unlabeled ('unl', 2) data + Create small example datafor dPUlearn containg positive ('pos', 1) and unlabeled ('unl', 2) _data >>> import aaanalysis as aa >>> import pandas as pd @@ -303,3 +303,5 @@ def fit(self, X, labels=None, n_neg=0, label_pos=1, name_neg="REL_NEG", df_seq=N self.labels_ = new_labels return df_seq + def eval(self): + """""" # TODO add evaluation function diff --git a/aaanalysis/dpulearn/dpulearn_plot.py b/aaanalysis/dpulearn/dpulearn_plot.py new file mode 100644 index 00000000..2463d63c --- /dev/null +++ b/aaanalysis/dpulearn/dpulearn_plot.py @@ -0,0 +1,31 @@ +""" +This is a script for ... +""" +import time +import pandas as pd +import numpy as np + + +# Settings +pd.set_option('expand_frame_repr', False) # Single line print for pd.Dataframe + + +# I Helper Functions + + +# II Main Functions + + +# III Test/Caller Functions + + +# IV Main +def main(): + t0 = time.time() + + t1 = time.time() + print("Time:", t1 - t0) + + +if __name__ == "__main__": + main() diff --git a/aaanalysis/explainer/__init__.py b/aaanalysis/explainer/__init__.py new file mode 100644 index 00000000..14853156 --- /dev/null +++ b/aaanalysis/explainer/__init__.py @@ -0,0 +1,5 @@ +from aaanalysis.data_loader import load_dataset, load_scales +from aaanalysis.aaclust import AAclust +from aaanalysis.cpp import CPP, CPPPlot, SequenceFeature, SplitRange +from aaanalysis.dpulearn import dPULearn +from aaanalysis.plotting import plot_settings, plot_set_legend, plot_gcfs, plot_get_cmap, plot_get_cdict diff --git a/aaanalysis/explainer/tree_based.py b/aaanalysis/explainer/tree_based.py new file mode 100644 index 00000000..244e22bf --- /dev/null +++ b/aaanalysis/explainer/tree_based.py @@ -0,0 +1,92 @@ +""" +This is a script for the processing SHAP values, primarily for the combination of SHAP with CPP +""" +import time +import pandas as pd +import numpy as np + +import aaanalysis.utils as ut + +# Settings + + +# I Helper Functions +# Get info for COL_FEAT_IMPORTANCE = "feat_importance" +# COO_FEAT_IMP_STD = "feat_importance_std" +# COL_FEAT_IMPACT = "feat_impact" + +# II Main Functions +class Tree: + """A wrapper for Tree based prediction models and Tree explainer from SHAP package to + explain prediction (typically binary classification) results at global and individual level""" + def __init__(self, model=None): + """""" + + def fit(self, n_epochs=10, rcf=True, return_models=False): + """Fit provided tree based model n_epochs time and compute average feature importance""" + + def eval(self): + """""" + + def add_feat_import(self, df_feat=None): + """""" + + +class TreeSHAP: + """A wrapper for Tree explainer from SHAP package""" + def __init__(self, model=None): + """""" + + def fit(self, n_epochs=10, return_models=False): + """Fit provided tree based model n_epochs time and compute average feature importance""" + + def eval(self): + """""" + + # TODO rename and add other functions (e.g., fuzzy labeling) + @staticmethod + def add_feat_impact(df_feat=None, col_shap="shap_value", name_feat_impact="feat_impact"): + """ + Convert SHAP values into feature impact/importance and add to DataFrame. + + Parameters + ---------- + df_feat: :class:`pandas.DataFrame` + Feature DataFrame to which the feature impact will be added. + col_shap: str, default = 'shap_value' + Column name of `SHAP `_ values in the feature DataFrame. + name_feat_impact: str, default = 'feat_impact' + Column name of feature impact or feature importance that will be added to the feature DataFrame. + + Returns + ------- + df_feat: :class:`pandas.DataFrame` + Feature DataFrame including feature impact. + + Notes + ----- + - SHAP (SHapley Additive exPlanations) is a game theoretic approach to explain the output of any machine learning model. + - SHAP values represent a feature's responsibility for a change in the model output. + - Missing values are accepted in SHAP values. + + """ + + # Check input + df_feat = df_feat.copy() + ut.check_str(name="name_feat_impact", val=name_feat_impact) + ut.check_str(name="col_shap", val=col_shap) + df_feat = ut.check_df_feat(df_feat=df_feat) + ut.check_col_in_df(df=df_feat, name_df="df_feat", col=col_shap, col_type=[float, int]) + ut.check_col_in_df(df=df_feat, name_df="df_feat", col=name_feat_impact, error_if_exists=True) + + # Compute feature impact (accepting missing values) + shap_values = np.array(df_feat[col_shap]) + feat_impact = shap_values / np.nansum(np.abs(shap_values)) * 100 + shap_loc = df_feat.columns.get_loc(col_shap) + df_feat.insert(shap_loc + 1, name_feat_impact, feat_impact) + return df_feat + + @staticmethod + def fuzzly_labeling(): + """Perform fuzzy labeling for selected sample""" + diff --git a/aaanalysis/plotting/__init__.py b/aaanalysis/plotting/__init__.py new file mode 100644 index 00000000..dfa4cf8d --- /dev/null +++ b/aaanalysis/plotting/__init__.py @@ -0,0 +1,4 @@ +from aaanalysis.plotting.plotting_functions import plot_get_cmap, plot_get_cdict, plot_gcfs, \ + plot_settings, plot_set_legend + +__all__ = ["plot_get_cmap", "plot_get_cdict", "plot_settings", "plot_set_legend", "plot_gcfs"] diff --git a/aaanalysis/plotting/__pycache__/__init__.cpython-39.pyc b/aaanalysis/plotting/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 00000000..2f1cbe98 Binary files /dev/null and b/aaanalysis/plotting/__pycache__/__init__.cpython-39.pyc differ diff --git a/aaanalysis/plotting/__pycache__/plotting_functions.cpython-39.pyc b/aaanalysis/plotting/__pycache__/plotting_functions.cpython-39.pyc new file mode 100644 index 00000000..38197b76 Binary files /dev/null and b/aaanalysis/plotting/__pycache__/plotting_functions.cpython-39.pyc differ diff --git a/aaanalysis/utils_plot.py b/aaanalysis/plotting/plotting_functions.py similarity index 64% rename from aaanalysis/utils_plot.py rename to aaanalysis/plotting/plotting_functions.py index 649e7e30..e310225c 100644 --- a/aaanalysis/utils_plot.py +++ b/aaanalysis/plotting/plotting_functions.py @@ -5,10 +5,27 @@ import seaborn as sns import matplotlib as mpl import matplotlib.pyplot as plt -import aaanalysis._utils as ut +import aaanalysis.utils as ut + + + +LIST_AA_COLOR_PALETTES = ["FEAT", "SHAP", "GGPLOT"] +LIST_AA_COLOR_DICTS = ["DICT_SCALE_CAT", "DICT_COLOR"] +LIST_AA_COLORS = LIST_AA_COLOR_PALETTES + LIST_AA_COLOR_DICTS + +LIST_FONTS = ['Arial', 'Avant Garde', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'DejaVu Sans', + 'Geneva', 'Helvetica', 'Lucid', 'Lucida Grande', 'Verdana'] # Helper functions +def check_font_style(font="Arial"): + """""" + if font not in LIST_FONTS: + error_message = f"'font' ({font}) not in recommended fonts: {LIST_FONTS}. Set font manually by:" \ + f"\n\tplt.rcParams['font.sans-serif'] = '{font}'" + raise ValueError(error_message) + + def check_fig_format(fig_format="pdf"): """""" list_fig_formats = ['eps', 'jpg', 'jpeg', 'pdf', 'pgf', 'png', 'ps', @@ -40,8 +57,137 @@ def check_cats(list_cat=None, dict_color=None, labels=None): return list_cat -# Default plotting functions -def plot_settings(fig_format="pdf", verbose=False, grid=False, grid_axis="y", font_scale=0.7, +# Get color maps +def _get_shap_cmap(n_colors=100, facecolor_dark=True): + """Generate a diverging color map for feature values.""" + n = 20 + cmap_low = sns.light_palette(ut.COLOR_SHAP_NEG, input="hex", reverse=True, n_colors=int(n_colors/2)+n) + cmap_high = sns.light_palette(ut.COLOR_SHAP_POS, input="hex", n_colors=int(n_colors/2)+n) + c_middle = [(0, 0, 0)] if facecolor_dark else [cmap_low[-1]] + cmap = cmap_low[0:-n] + c_middle + cmap_high[n:] + return cmap + + +def _get_feat_cmap(n_colors=100, facecolor_dark=False): + """Generate a diverging color map for feature values.""" + n = 5 + cmap = sns.color_palette("RdBu_r", n_colors=n_colors + n * 2) + cmap_low, cmap_high = cmap[0:int((n_colors + n * 2) / 2)], cmap[int((n_colors + n * 2) / 2):] + c_middle = [(0, 0, 0)] if facecolor_dark else [cmap_low[-1]] + cmap = cmap_low[0:-n] + c_middle + cmap_high[n:] + return cmap + + +def _get_ggplot_cmap(n_colors=100): + """Generate a circular GGplot color palette.""" + cmap = sns.color_palette("husl", n_colors) + return cmap + + +def _get_default_colors(name=None, n_colors=100, facecolor_dark=True): + """Retrieve default color maps based on palette name.""" + args = dict(n_colors=n_colors, facecolor_dark=facecolor_dark) + if name == "SHAP": + return _get_shap_cmap(**args) + elif name == "FEAT": + return _get_feat_cmap(**args) + elif name == "GGPLOT": + return _get_ggplot_cmap(n_colors=n_colors) + + +def _get_cmap_with_gap(n_colors=100, color_pos=None, color_neg=None, color_center=None, pct_gap=10, pct_center=None, + input="hex"): + """Generate a custom color map with a gap.""" + n_gap = int(n_colors*pct_gap/2) + cmap_pos = sns.light_palette(color_pos, input=input, n_colors=int(n_colors/2)+n_gap) + cmap_neg = sns.light_palette(color_neg, input=input, reverse=True, n_colors=int(n_colors/2)+n_gap) + color_center = [cmap_neg[-1]] if color_center is None else color_center + color_center = [color_center] if type(color_center) is str else color_center + if pct_center is None: + cmap = cmap_neg[0:-n_gap] + color_center + cmap_pos[n_gap:] + else: + n_center = int(n_colors * pct_center) + n_gap += int(n_center/2) + cmap = cmap_neg[0:-n_gap] + color_center * n_center + cmap_pos[n_gap:] + return cmap + + +# Default plotting function +def plot_get_cmap(name=None, n_colors=100, facecolor_dark=False, + color_pos=None, color_neg=None, color_center=None, + input="hex", pct_gap=10, pct_center=None): + """ + Retrieve color maps or color dictionaries specified for AAanalysis. + + Parameters + ---------- + name : str, optional + The name of the color palette to use in AAanalysis. Options include: + - 'SHAP', 'FEAT', 'GGPLOT': Return color maps for SHAP plots, CPP feature maps/heatmaps, + and datagrouping as in GGplot, respectively. + - 'DICT_COLOR', 'DICT_SCALE_CAT': Return default color dictionaries for plots (e.g., bars in CPPPlot.profile) + and scale categories (e.g., CPPPlot.heatmap), respectively. + n_colors : int, default=100 + Number of colors in the color map. + facecolor_dark : bool, default=False + Whether to use a dark face color for 'SHAP' and 'FEAT'. + color_pos : str, optional + Hex code for the positive color. + color_neg : str, optional + Hex code for the negative color. + color_center : str or list, optional + Hex code or list for the center color. + input : str, {'rgb', 'hls', 'husl', 'xkcd'} + Color space to interpret the input color. The first three options + apply to tuple inputs and the latter applies to string inputs. + pct_gap : int, default=10 + Percentage size of the gap between color ranges. + pct_center : float, optional + Percentage size of the center color in the map. + + Returns + ------- + cmap : list or dict + If 'name' parameter is 'SHAP', 'FEAT', or 'GGPLOT', a list of colors specified for AAanalysis will be returned. + If 'name' parameter is None, a list of colors based on provided colors + + See Also + -------- + sns.color_palette : Function to generate a color palette in seaborn. + sns.light_palette : Function to generate a lighter color palette in seaborn. + """ + # TODO check color dict name + if name in LIST_AA_COLOR_PALETTES: + cmap = _get_default_colors(name=name, n_colors=n_colors, facecolor_dark=facecolor_dark) + return cmap + cmap = _get_cmap_with_gap(n_colors=n_colors, color_pos=color_pos, color_neg=color_neg, + color_center=color_center, pct_gap=pct_gap, pct_center=pct_center, + input=input) + return cmap + + +def plot_get_cdict(name=None): + """ + Retrieve color dictionaries specified for AAanalysis. + + Parameters + ---------- + name : str, {'DICT_COLOR', 'DICT_SCALE_CAT'} + The name of default color dictionaries for plots (e.g., bars in CPPPlot.profile) + and scale categories (e.g., CPPPlot.heatmap), respectively. + + Returns + ------- + cmap : dict + Specific AAanalysis color dictionary. + """ + # TODO check color dict name + color_dict = ut.DICT_COLOR if name == "DICT_COLORS" else ut.DICT_COLOR_CAT + return color_dict + + +def plot_settings(fig_format="pdf", verbose=False, grid=False, grid_axis="y", + font_scale=0.7, font="Arial", change_size=True, weight_bold=True, adjust_elements=True, short_ticks=False, no_ticks=False, no_ticks_y=False, short_ticks_y=False, no_ticks_x=False, short_ticks_x=False): @@ -60,6 +206,8 @@ def plot_settings(fig_format="pdf", verbose=False, grid=False, grid_axis="y", fo Choose the axis ('y', 'x', 'both') to apply the grid to. font_scale : float, default=0.7 Sets the scale for font sizes in the plot. + font : str, default='Arial' + Name of sans-serif font (e.g., 'Arial', 'Verdana', 'Helvetica', 'DejaVu Sans') change_size : bool, default=True If True, adjusts the size of plot elements. weight_bold : bool, default=True @@ -90,6 +238,7 @@ def plot_settings(fig_format="pdf", verbose=False, grid=False, grid_axis="y", fo """ # Check input check_fig_format(fig_format=fig_format) + check_font_style(font=font) check_grid_axis(grid_axis=grid_axis) args_bool = {"verbose": verbose, "grid": grid, "change_size": change_size, "weight_bold": weight_bold, "adjust_elements": adjust_elements, @@ -107,13 +256,12 @@ def plot_settings(fig_format="pdf", verbose=False, grid=False, grid_axis="y", fo print(plt.rcParams.keys) # Print all plot settings that can be modified in general if not change_size: plt.rcParams["font.family"] = "sans-serif" - plt.rcParams["font.sans-serif"] = "Arial" - font = {'family': 'Arial'} - mpl.rc('font', **font) + plt.rcParams["font.sans-serif"] = font + mpl.rc('font', **{'family': font}) return sns.set_context("talk", font_scale=font_scale) # Font settings https://matplotlib.org/3.1.1/tutorials/text/text_props.html plt.rcParams["font.family"] = "sans-serif" - plt.rcParams["font.sans-serif"] = "Arial" + plt.rcParams["font.sans-serif"] = font if weight_bold: plt.rcParams["axes.labelweight"] = "bold" plt.rcParams["axes.titleweight"] = "bold" @@ -152,7 +300,7 @@ def plot_settings(fig_format="pdf", verbose=False, grid=False, grid_axis="y", fo mpl.rcParams['pdf.fonttype'] = 42 elif "svg" in fig_format: mpl.rcParams['svg.fonttype'] = 'none' - font = {'family': 'Arial', "weight": "bold"} if weight_bold else {"family": "Arial"} + font = {'family': font, "weight": "bold"} if weight_bold else {"family": font} mpl.rc('font', **font) if adjust_elements: # Error bars diff --git a/aaanalysis/utils.py b/aaanalysis/utils.py new file mode 100644 index 00000000..89de9e8a --- /dev/null +++ b/aaanalysis/utils.py @@ -0,0 +1,33 @@ +""" +Config with folder structure +""" +import os +import platform + +# Import utility functions for specific purposes +from aaanalysis._utils._utils_constants import * +from aaanalysis._utils._utils_check import * +from aaanalysis._utils._utils_output import * + +# Import utility function for specific modules +from aaanalysis._utils.utils_aaclust import * +from aaanalysis._utils.utils_cpp import * + + + +# I Folder +def _folder_path(super_folder, folder_name): + """Modification of separator (OS depending)""" + path = os.path.join(super_folder, folder_name + SEP) + return path + + +SEP = "\\" if platform.system() == "Windows" else "/" +FOLDER_PROJECT = os.path.dirname(os.path.abspath(__file__)) +FOLDER_DATA = _folder_path(FOLDER_PROJECT, '_data') +URL_DATA = "https://github.com/breimanntools/aaanalysis/tree/master/aaanalysis/data/" + + +# II Helper Function + + diff --git a/docs/build/doctrees/_resources/tables.doctree b/docs/build/doctrees/_resources/tables.doctree index 17fb4434..24c21509 100644 Binary files a/docs/build/doctrees/_resources/tables.doctree and b/docs/build/doctrees/_resources/tables.doctree differ diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle index 9cc76545..4e2b7c3c 100644 Binary files a/docs/build/doctrees/environment.pickle and b/docs/build/doctrees/environment.pickle differ diff --git a/docs/build/doctrees/generated/aaanalysis.AAclust.doctree b/docs/build/doctrees/generated/aaanalysis.AAclust.doctree index b06c8559..497552ad 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.AAclust.doctree and b/docs/build/doctrees/generated/aaanalysis.AAclust.doctree differ diff --git a/docs/build/doctrees/generated/aaanalysis.CPP.doctree b/docs/build/doctrees/generated/aaanalysis.CPP.doctree index bed45d73..25bfe099 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.CPP.doctree and b/docs/build/doctrees/generated/aaanalysis.CPP.doctree differ diff --git a/docs/build/doctrees/generated/aaanalysis.SequenceFeature.doctree b/docs/build/doctrees/generated/aaanalysis.SequenceFeature.doctree index d3ec53e5..5a941ee0 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.SequenceFeature.doctree and b/docs/build/doctrees/generated/aaanalysis.SequenceFeature.doctree differ diff --git a/docs/build/doctrees/generated/aaanalysis.dPULearn.doctree b/docs/build/doctrees/generated/aaanalysis.dPULearn.doctree index 25254e00..40bcb6c1 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.dPULearn.doctree and b/docs/build/doctrees/generated/aaanalysis.dPULearn.doctree differ diff --git a/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree b/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree index cd888d3d..29404ced 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree and b/docs/build/doctrees/generated/aaanalysis.load_dataset.doctree differ diff --git a/docs/build/doctrees/generated/aaanalysis.load_scales.doctree b/docs/build/doctrees/generated/aaanalysis.load_scales.doctree index 2a142a36..cb329365 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.load_scales.doctree and b/docs/build/doctrees/generated/aaanalysis.load_scales.doctree differ diff --git a/docs/build/doctrees/generated/aaanalysis.plot_gcfs.doctree b/docs/build/doctrees/generated/aaanalysis.plot_gcfs.doctree index 40ed2786..364e8a38 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.plot_gcfs.doctree and b/docs/build/doctrees/generated/aaanalysis.plot_gcfs.doctree differ diff --git a/docs/build/doctrees/generated/aaanalysis.plot_set_legend.doctree b/docs/build/doctrees/generated/aaanalysis.plot_set_legend.doctree index 3043e328..3856d9ea 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.plot_set_legend.doctree and b/docs/build/doctrees/generated/aaanalysis.plot_set_legend.doctree differ diff --git a/docs/build/doctrees/generated/aaanalysis.plot_settings.doctree b/docs/build/doctrees/generated/aaanalysis.plot_settings.doctree index e69a7b5e..da8825e3 100644 Binary files a/docs/build/doctrees/generated/aaanalysis.plot_settings.doctree and b/docs/build/doctrees/generated/aaanalysis.plot_settings.doctree differ diff --git a/docs/build/html/_downloads/a1e0721bda366e81dd2b5d60fcd98bc8/aaanalysis-CPP-1.py b/docs/build/html/_downloads/a1e0721bda366e81dd2b5d60fcd98bc8/aaanalysis-CPP-1.py deleted file mode 100644 index b2eede39..00000000 --- a/docs/build/html/_downloads/a1e0721bda366e81dd2b5d60fcd98bc8/aaanalysis-CPP-1.py +++ /dev/null @@ -1,6 +0,0 @@ -import matplotlib.pyplot as plt -import aaanalysis as aa -sf = aa.SequenceFeature() -df_seq = aa.load_dataset(name='SEQ_DISULFIDE', min_len=100) -labels = list(df_seq["label"]) -df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10) diff --git a/docs/build/html/_images/social_previews/summary__resources_tables_59319699.png b/docs/build/html/_images/social_previews/summary__resources_tables_59319699.png deleted file mode 100644 index 2dd5a7fb..00000000 Binary files a/docs/build/html/_images/social_previews/summary__resources_tables_59319699.png and /dev/null differ diff --git a/docs/build/html/_images/social_previews/summary__resources_tables_88a5b382.png b/docs/build/html/_images/social_previews/summary__resources_tables_88a5b382.png new file mode 100644 index 00000000..a6416375 Binary files /dev/null and b/docs/build/html/_images/social_previews/summary__resources_tables_88a5b382.png differ diff --git a/docs/build/html/_modules/aaanalysis/aaclust/aaclust.html b/docs/build/html/_modules/aaanalysis/aaclust/aaclust.html index 6bcfc184..faf7c1fe 100644 --- a/docs/build/html/_modules/aaanalysis/aaclust/aaclust.html +++ b/docs/build/html/_modules/aaanalysis/aaclust/aaclust.html @@ -116,7 +116,7 @@

REFERENCES

@@ -193,8 +193,7 @@

Source code for aaanalysis.aaclust.aaclust

 from sklearn.metrics.pairwise import pairwise_distances
 from sklearn.cluster import KMeans
 
-import aaanalysis.aaclust._utils as _ut
-import aaanalysis._utils as ut
+import aaanalysis.utils as ut
 
 
 # I Helper Functions
@@ -386,7 +385,7 @@ 

Source code for aaanalysis.aaclust.aaclust

 def _get_quality_measure(X, metric=None, labels=None, label_cluster=None, on_center=True):
     """Get quality measure single cluster given by feature matrix X, labels, and label of cluster"""
     mask = [l == label_cluster for l in labels]
-    if metric == _ut.METRIC_CORRELATION:
+    if metric == ut.METRIC_CORRELATION:
         return get_min_cor(X[mask], on_center=on_center)
     else:
         return get_max_dist(X[mask], on_center=on_center, metric=metric)
@@ -395,7 +394,7 @@ 

Source code for aaanalysis.aaclust.aaclust

 def _get_best_cluster(dict_clust_qm=None, metric=None):
     """Get cluster with best quality measure: either highest minimum Pearson correlation
     or lowest distance measure"""
-    if metric == _ut.METRIC_CORRELATION:
+    if metric == ut.METRIC_CORRELATION:
         return max(dict_clust_qm, key=dict_clust_qm.get)
     else:
         return min(dict_clust_qm, key=dict_clust_qm.get)
@@ -534,7 +533,7 @@ 

Source code for aaanalysis.aaclust.aaclust

         self.model = model
         if model_kwargs is None:
             model_kwargs = dict()
-        model_kwargs = _ut.check_model(model=self.model, model_kwargs=model_kwargs)
+        model_kwargs = ut.check_model(model=self.model, model_kwargs=model_kwargs)
         self._model_kwargs = model_kwargs
         # AAclust clustering settings
         self._verbose = verbose
@@ -550,9 +549,9 @@ 

Source code for aaanalysis.aaclust.aaclust

     # Clustering method
 
[docs] def fit(self, X, names=None, on_center=True, min_th=0, merge_metric="euclidean", n_clusters=None): """ - Fit the AAclust model on the data, optimizing cluster formation using Pearson correlation. + Fit the AAclust model on the _data, optimizing cluster formation using Pearson correlation. - AAclust determines the optimal number of clusters, k, without pre-specification. It partitions data (X) into + AAclust determines the optimal number of clusters, k, without pre-specification. It partitions data(X) into clusters by maximizing the within-cluster Pearson correlation beyond the 'min_th' threshold. The quality of clustering is either based on the minimum Pearson correlation of all members ('min_cor all') or between the cluster center and its members ('min_cor center'), governed by `on_center`. @@ -594,8 +593,8 @@

Source code for aaanalysis.aaclust.aaclust

         For further information, refer to the AAclust paper : TODO: add link to AAclust paper
         """
         # Check input
-        _ut.check_min_th(min_th=min_th)
-        merge_metric = _ut.check_merge_metric(merge_metric=merge_metric)
+        ut.check_min_th(min_th=min_th)
+        merge_metric = ut.check_merge_metric(merge_metric=merge_metric)
         X, names = ut.check_feat_matrix(X=X, names=names)
         args = dict(model=self.model, model_kwargs=self._model_kwargs, min_th=min_th, on_center=on_center)
         # Clustering using given clustering models
@@ -727,7 +726,7 @@ 

Source code for aaanalysis.aaclust.aaclust

         medoid_labels : array-like
             The labels corresponding to each medoid.
         medoid_ind : array-like
-            Indexes of medoids within the original data.
+            Indexes of medoids within the original _data.
         """
         medoids, medoid_labels, medoid_ind = get_cluster_medoids(X, labels=labels)
         return medoids, medoid_labels, medoid_ind
@@ -745,9 +744,9 @@

Source code for aaanalysis.aaclust.aaclust

         X_ref : array-like
             Reference feature matrix.
         labels_test : list or array-like, optional
-            Cluster labels for the test data.
+            Cluster labels for the test _data.
         labels_ref : list or array-like, optional
-            Cluster labels for the reference data.
+            Cluster labels for the reference _data.
         n : int, default = 3
             Number of top centers to consider based on correlation strength.
         positive : bool, default = True
@@ -771,7 +770,7 @@ 

Source code for aaanalysis.aaclust.aaclust

             names_ref = [x for x in list(dict.fromkeys(labels_ref)) if "unclassified" not in x.lower()]
         masks_ref = [[True if i == label else False for i in labels_ref] for label in names_ref]
         if on_center:
-            # Get centers for all clusters in reference data
+            # Get centers for all clusters in reference _data
             centers = np.concatenate([cluster_center(X_ref[mask]) for mask in masks_ref], axis=0)
             # Compute correlation of test data with centers
             Xtest_centers = np.concatenate([X_test, centers], axis=0)
@@ -780,7 +779,7 @@ 

Source code for aaanalysis.aaclust.aaclust

         else:
             masks_test = [[True if i == j else False for j in range(0, len(labels_test))]
                           for i, _ in enumerate(labels_test)]
-            # Compute minimum correlation of test data with each group of reference data
+            # Compute minimum correlation of test data with each group of reference _data
             X_corr = np.array([[_min_cor_all(np.concatenate([X_test[mask_test], X_ref[mask_ref]], axis=0))
                                 for mask_ref in masks_ref ] for mask_test in masks_test])
         # Get index for n centers with highest/lowest correlation for each scale
@@ -795,7 +794,11 @@ 

Source code for aaanalysis.aaclust.aaclust

             top_names = [names_ref[x] for x in ind]
             str_corr = ";".join([f"{name} ({round(corr, 3)})" for name, corr in zip(top_names, top_corr)])
             list_top_center_name_corr.append(str_corr)
-        return list_top_center_name_corr
+ return list_top_center_name_corr
+ + def eval(self): + """"""
+ # TODO add evaluation function
diff --git a/docs/build/html/_modules/aaanalysis/cpp/_split.html b/docs/build/html/_modules/aaanalysis/cpp/_split.html index 44961bcc..2828d1e1 100644 --- a/docs/build/html/_modules/aaanalysis/cpp/_split.html +++ b/docs/build/html/_modules/aaanalysis/cpp/_split.html @@ -116,7 +116,7 @@

REFERENCES

@@ -190,7 +190,7 @@

Source code for aaanalysis.cpp._split

 import numpy as np
 import itertools
 
-import aaanalysis.cpp._utils as ut
+import aaanalysis.utils as ut
 
 
 # I Helper Functions
diff --git a/docs/build/html/_modules/aaanalysis/cpp/cpp.html b/docs/build/html/_modules/aaanalysis/cpp/cpp.html
index a6d51ba2..afc0d341 100644
--- a/docs/build/html/_modules/aaanalysis/cpp/cpp.html
+++ b/docs/build/html/_modules/aaanalysis/cpp/cpp.html
@@ -116,7 +116,7 @@
 

REFERENCES

@@ -188,231 +188,15 @@

Source code for aaanalysis.cpp.cpp

 This is a script for ...
 """
 import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
-import inspect
-import warnings
 
-import aaanalysis.cpp._utils as _ut
 from aaanalysis.cpp.feature import SequenceFeature
 from aaanalysis.cpp._feature_stat import SequenceFeatureStatistics
-from aaanalysis.cpp._feature_pos import SequenceFeaturePositions
-from aaanalysis.cpp._cpp import CPPPlots, get_optimal_fontsize
+
 import aaanalysis as aa
-import aaanalysis._utils as ut
+import aaanalysis.utils as ut
+
 
 # I Helper Functions
-# TODO separate interface from backend
-# TODO simplify interface (delete old profile)
-# TODO delete SHAP
-# TODO add importance plot for heatmap
-# TODO add ranking
-
-
-# Check CPP parameters
-def check_len_ext_and_jmd(jmd_n_len=None, jmd_c_len=None, ext_len=None):
-    """Check if lengths are matching"""
-    ut.check_non_negative_number(name="jmd_n_len", val=jmd_n_len)
-    ut.check_non_negative_number(name="jmd_c_len", val=jmd_c_len)
-    ut.check_non_negative_number(name="ext_len", val=ext_len)
-    if ext_len > jmd_n_len:
-        raise ValueError(f"'ext_len' ({ext_len}) must be <= jmd_n_len ({jmd_n_len})")
-    if ext_len > jmd_c_len:
-        raise ValueError(f"'ext_len' ({ext_len}) must be <= jmd_c_len ({jmd_c_len})")
-
-
-# Check for add methods
-def check_shap_value_for_feat_impact(df_feat=None, col_shap=None):
-    """Check if SHAP value column in df"""
-    if col_shap not in df_feat:
-        raise ValueError(f"'{col_shap}' must be column in 'df_feat' to compute feature impact")
-    wrong_types = [x for x in list(df_feat[col_shap]) if type(x) not in [float, int]]
-    if len(wrong_types) > 0:
-        error = f"Values in '{col_shap}' should be type float or int\n" \
-                f" but following values do not match: {wrong_types}"
-        raise ValueError(error)
-
-
-def check_feat_impact_in_df_feat(df_feat=None, name_feat_impact=None):
-    """Check if name for feature impact column already"""
-    if name_feat_impact in df_feat:
-        error = f"'name_feat_impact' ('{name_feat_impact}') already in 'df_feat' columns: {list(df_feat)}"
-        raise ValueError(error)
-
-
-def check_ref_group(ref_group=0, labels=None):
-    """Check if ref group class lable"""
-    if ref_group not in labels:
-        raise ValueError(f"'ref_group' ({ref_group}) not class label: {set(labels)}.")
-
-
-def check_sample_in_df_seq(sample_name=None, df_seq=None):
-    """Check if sample name in df_seq"""
-    list_names = list(df_seq[_ut.COL_NAME])
-    if sample_name not in list_names:
-        error = f"'sample_name' ('{sample_name}') not in '{_ut.COL_NAME}' of 'df_seq'." \
-                f"\nValid names are: {list_names}"
-        raise ValueError(error)
-
-
-# Check get df positions
-def check_value_type(val_type=None, count_in=True):
-    """Check if value type is valid"""
-    list_value_type = ["mean", "sum", "std"]
-    if count_in:
-        list_value_type.append("count")
-    if val_type not in list_value_type:
-        raise ValueError(f"'val_type' ('{val_type}') should be on of following: {list_value_type}")
-
-
-def check_normalize(normalize=True):
-    """Check normalize parameter"""
-    if not (type(normalize) == bool or normalize in ["positions", "positions_only"]):
-        raise ValueError(f"'normalize' ('{normalize}') should be bool or, if normalized for positions, 'positions'.")
-    normalize_for_positions = False if type(normalize) is bool else "positions" in normalize
-    normalize = normalize if type(normalize) is bool else "positions" == normalize
-    return normalize, normalize_for_positions
-
-
-# Check for plotting methods
-def check_args_len(tmd_seq=None, jmd_n_seq=None, jmd_c_seq=None, tmd_len=None, jmd_n_len=None, jmd_c_len=None):
-    """Check if parameters for sequence size and sequences match"""
-    count = 0
-    for seq in [tmd_seq, jmd_c_seq, jmd_n_seq]:
-        if type(seq) == str:
-            count += 1
-    if count == 3:
-        if len(jmd_n_seq) != jmd_n_len:
-            error = f"'jmd_n_seq' ('{jmd_n_seq}', len={len(jmd_n_seq)}) does not match CPP setting: ({jmd_n_len})."
-            raise ValueError(error)
-        if len(jmd_c_seq) != jmd_c_len:
-            error = f"'jmd_c_seq' ('{jmd_c_seq}', len={len(jmd_c_seq)}) does not match CPP setting: ({jmd_c_len})."
-            raise ValueError(error)
-        tmd_len, jmd_n_len, jmd_c_len = len(tmd_seq), len(jmd_n_seq), len(jmd_c_seq)
-    elif count != 0:
-        raise ValueError("'jmd_n_seq' 'tmd_seq', and 'jmd_c_seq' must all be None or sequence (type string)")
-    args_len = dict(tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len)
-    return args_len
-
-
-def check_args_size(seq_size=None, tmd_jmd_fontsize=None):
-    """Check if sequence size parameters match"""
-    ut.check_non_negative_number(name="seq_size", val=seq_size, min_val=0, accept_none=True, just_int=False)
-    ut.check_non_negative_number(name="tmd_jmd_fontsize", val=tmd_jmd_fontsize, min_val=0, accept_none=True, just_int=False)
-    args_size = dict(seq_size=seq_size, tmd_jmd_fontsize=tmd_jmd_fontsize)
-    return args_size
-
-
-def check_args_xtick(xtick_size=None, xtick_width=None, xtick_length=None):
-    """Check if x tick parameters non-negative float"""
-    args = dict(accept_none=True, just_int=False, min_val=0)
-    ut.check_non_negative_number(name="xtick_size", val=xtick_size, **args)
-    ut.check_non_negative_number(name="xtick_width", val=xtick_width, **args)
-    ut.check_non_negative_number(name="xtick_length", val=xtick_length, **args)
-    args_xtick = dict(xtick_size=xtick_size, xtick_width=xtick_width, xtick_length=xtick_length)
-    return args_xtick
-
-
-def check_args_ytick(ytick_size=None, ytick_width=None, ytick_length=None):
-    """Check if y tick parameters non-negative float"""
-    args = dict(accept_none=True, just_int=False, min_val=1)
-    ut.check_non_negative_number(name="ytick_size", val=ytick_size, **args)
-    ut.check_non_negative_number(name="ytick_width", val=ytick_width, **args)
-    ut.check_non_negative_number(name="ytick_length", val=ytick_length, **args)
-    args_ytick = dict(ytick_size=ytick_size, ytick_width=ytick_width, ytick_length=ytick_length)
-    return args_ytick
-
-
-def check_part_color(tmd_color=None, jmd_color=None):
-    """Check if part colors valid"""
-    _ut.check_color(name="tmd_color", val=tmd_color)
-    _ut.check_color(name="jmd_color", val=jmd_color)
-    args_part_color = dict(tmd_color=tmd_color, jmd_color=jmd_color)
-    return args_part_color
-
-
-def check_seq_color(tmd_seq_color=None, jmd_seq_color=None):
-    """Check sequence colors"""
-    _ut.check_color(name="tmd_seq_color", val=tmd_seq_color)
-    _ut.check_color(name="jmd_seq_color", val=jmd_seq_color)
-    args_seq_color = dict(tmd_seq_color=tmd_seq_color, jmd_seq_color=jmd_seq_color)
-    return args_seq_color
-
-
-def check_figsize(figsize=None):
-    """"""
-    _ut.check_tuple(name="figsize", val=figsize, n=2)
-    ut.check_non_negative_number(name="figsize:width", val=figsize[0], min_val=1, just_int=False)
-    ut.check_non_negative_number(name="figsize:height", val=figsize[1], min_val=1, just_int=False)
-
-
-def check_dict_color(dict_color=None, df_cat=None):
-    """Check if color dictionary is matching to DataFrame with categories"""
-    list_cats = list(sorted(set(df_cat[_ut.COL_CAT])))
-    if dict_color is None:
-        dict_color = _ut.DICT_COLOR
-    if not isinstance(dict_color, dict):
-        raise ValueError(f"'dict_color' should be a dictionary with colors for: {list_cats}")
-    list_cat_not_in_dict_cat = [x for x in list_cats if x not in dict_color]
-    if len(list_cat_not_in_dict_cat) > 0:
-        error = f"'dict_color' not complete! Following categories are missing from 'df_cat': {list_cat_not_in_dict_cat}"
-        raise ValueError(error)
-    for key in dict_color:
-        color = dict_color[key]
-        _ut.check_color(name=key, val=color)
-    return dict_color
-
-
-def check_parameters(func=None, name_called_func=None, e=None):
-    """Check parameters string from error message of third party packages"""
-    list_arg_str = ["property ", "attribute ", "argument ", "parameter "]
-    str_error = ""
-    for arg_str in list_arg_str:
-        if arg_str in str(e):
-            error_arg = str(e).split(arg_str)[1]
-            str_error += "Error due to {} parameter. ".format(error_arg)
-            break
-    args = [x for x in inspect.getfullargspec(func).args if x != "self"]
-    str_error += "Arguments are allowed from {} and as follows: {}".format(name_called_func, args)
-    return str_error
-
-
-# Check heatmap plotting
-def check_vmin_vmax(vmin=None, vmax=None):
-    """Check if number of cmap colors is valid with given value range"""
-    ut.check_float(name="vmin", val=vmin, accept_none=True, just_float=False)
-    ut.check_float(name="vmax", val=vmax, accept_none=True, just_float=False)
-    if vmin is not None and vmax is not None and vmin >= vmax:
-        raise ValueError(f"'vmin' ({vmin}) < 'vmax' ({vmax}) not fulfilled.")
-
-
-# Check barplot and profile
-def check_grid_axis(grid_axis=None):
-    """"""
-    list_valid = ["x", 'y', 'both']
-    if grid_axis not in list_valid:
-        raise ValueError(f"'grid_axis' ('{grid_axis}') not valid. Chose from following: {list_valid}")
-
-
-# Check stat plot
-def check_ylabel_fontweight(ylabel_fontweight=None, accept_none=True):
-    """"""
-    if accept_none and ylabel_fontweight is None:
-        return
-    name = "ylabel_fontweight"
-    args = dict(name=name, val=ylabel_fontweight)
-    list_weights = ['light', 'medium', 'bold']
-    if type(ylabel_fontweight) in [float, int]:
-        ut.check_non_negative_number(**args, min_val=0, max_val=1000, just_int=False)
-    elif isinstance(ylabel_fontweight, str):
-        if ylabel_fontweight not in list_weights:
-            error = f"'{name}' ({ylabel_fontweight}) should be one of following: {list_weights}"
-            raise ValueError(error)
-    else:
-        error = f"'{name}' ({ylabel_fontweight}) should be either numeric value in range 0-1000" \
-                f"\n\tor one of following: {list_weights}"
-        raise ValueError(error)
 
 
 # Filtering functions
@@ -420,60 +204,15 @@ 

Source code for aaanalysis.cpp.cpp

     """Get datasets structures for filtering, two dictionaries with feature to scales category resp.
     feature positions and one datasets frame with paired pearson correlations of all scales"""
     if check_cat:
-        dict_c = dict(zip(df[_ut.COL_FEATURE], df["category"]))
+        dict_c = dict(zip(df[ut.COL_FEATURE], df["category"]))
     else:
         dict_c = dict()
-    dict_p = dict(zip(df[_ut.COL_FEATURE], [set(x) for x in df["positions"]]))
+    dict_p = dict(zip(df[ut.COL_FEATURE], [set(x) for x in df["positions"]]))
     df_cor = df_scales.corr()
     return dict_c, dict_p, df_cor
 
 
-# Plotting functions
-def _get_df_pos(df_feat=None, df_cat=None, y="subcategory", val_col="mean_dif",
-                value_type="mean", normalize=False,
-                tmd_len=20, jmd_n_len=10, jmd_c_len=10, start=1):
-    """Helper method for plotting"""
-    normalize, normalize_for_pos = check_normalize(normalize=normalize)
-    cpp_plot = CPPPlots(tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, start=start)
-    df_pos = cpp_plot.get_df_pos(df=df_feat.copy(), y=y, value_type=value_type, val_col=val_col,
-                                 normalize=normalize,
-                                 normalize_for_pos=normalize_for_pos)
-    # Sort according to given categories
-    list_cat = list(df_cat[y].drop_duplicates())
-    list_col = list(df_pos.T)
-    sorted_col = [x for x in list_cat if x in list_col]
-    df_pos = df_pos.T[sorted_col].T
-    return df_pos
-
-
-def _add_importance_map(ax=None, df_feat=None, df_cat=None, start=None, args_len=None, y=None):
-    """"""
-    _df_pos = _get_df_pos(df_feat=df_feat, df_cat=df_cat, y=y, val_col=_ut.COL_FEAT_IMPORTANCE,
-                          value_type="sum", normalize="positions_only", start=start, **args_len)
-    _df = pd.melt(_df_pos.reset_index(), id_vars="index")
-    _df.columns = [_ut.COL_SUBCAT, "position", _ut.COL_FEAT_IMPORTANCE]
-    _list_sub_cat = _df[_ut.COL_SUBCAT].unique()
-    for i, sub_cat in enumerate(_list_sub_cat):
-        _dff = _df[_df[_ut.COL_SUBCAT] == sub_cat]
-        for pos, val in enumerate(_dff[_ut.COL_FEAT_IMPORTANCE]):
-            _symbol = "■"  # "•"
-            color = "black"
-            size = 12 if val >= 1 else (8 if val >= 0.5 else 4)
-            _args_symbol = dict(ha="center", va="center", color=color, size=size)
-            if val >= 0.2:
-                ax.text(pos + 0.5, i + 0.5, _symbol, **_args_symbol)
-
-
-def _set_size_to_optimized_value(seq_size=None, tmd_jmd_fontsize=None, opt_size=None):
-    """Set sizes to given value if None"""
-    if tmd_jmd_fontsize is None:
-        tmd_jmd_fontsize = opt_size
-    args_size = dict(seq_size=seq_size, tmd_jmd_fontsize=tmd_jmd_fontsize)
-    return args_size
-
 # TODO simplify checks & interface (end-to-end check with tests & docu)
-# TODO plot_functions test & refactor (end-to-end)
-
 
 # II Main Functions
 
[docs]class CPP: @@ -492,13 +231,7 @@

Source code for aaanalysis.cpp.cpp

         Nested dictionary with parameter dictionary for each chosen split_type.
     accept_gaps : bool, default = False
         Whether to accept missing values by enabling omitting for computations (if True).
-    jmd_n_len : int, >=0, default = 10
-        Length of JMD-N.
-    jmd_c_len : int, >=0, default = 10
-        Length of JMD-C.
-    ext_len : int, >=0, default = 4
-        Length of TMD-extending part (starting from C and N terminal part of TMD).
-        Conditions: ext_len < jmd_m_len and ext_len < jmd_c_len.
+
     verbose : bool, default = True
         Whether to print progress information about the algorithm (if True).
 
@@ -507,34 +240,27 @@ 

Source code for aaanalysis.cpp.cpp

     The CPP.run() method performs all steps of the CPP algorithm.
     """
 
[docs] def __init__(self, df_scales=None, df_cat=None, df_parts=None, split_kws=None, - accept_gaps=False, jmd_n_len=10, jmd_c_len=10, ext_len=4, verbose=True): + accept_gaps=False, verbose=True): # Load default scales if not specified sf = SequenceFeature() if df_cat is None: - df_cat = aa.load_scales(name=_ut.STR_SCALE_CAT) + df_cat = aa.load_scales(name=ut.STR_SCALE_CAT) if df_scales is None: df_scales = aa.load_scales() if split_kws is None: split_kws = sf.get_split_kws() ut.check_bool(name="verbose", val=verbose) - _ut.check_df_parts(df_parts=df_parts, verbose=verbose) - df_parts = _ut.check_df_scales(df_scales=df_scales, df_parts=df_parts, accept_gaps=accept_gaps) - df_cat, df_scales = _ut.check_df_cat(df_cat=df_cat, df_scales=df_scales, verbose=verbose) - _ut.check_split_kws(split_kws=split_kws) - check_len_ext_and_jmd(jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, ext_len=ext_len) + ut.check_df_parts(df_parts=df_parts, verbose=verbose) + df_parts = ut.check_df_scales(df_scales=df_scales, df_parts=df_parts, accept_gaps=accept_gaps) + df_cat, df_scales = ut.check_df_cat(df_cat=df_cat, df_scales=df_scales, verbose=verbose) + ut.check_split_kws(split_kws=split_kws) self._verbose = verbose self._accept_gaps = accept_gaps # Feature components: Scales + Part + Split - self.df_cat = df_cat - self.df_scales = df_scales - self.df_parts = df_parts - self.split_kws = split_kws - # Set consistent length of JMD_N, JMD_C, TMD flanking amino acids (TMD-E) - self.jmd_n_len = jmd_n_len - self.jmd_c_len = jmd_c_len - self.ext_len = ext_len - # Axes dict for plotting - self.ax_seq = None
+ self.df_cat = df_cat.copy() + self.df_scales = df_scales.copy() + self.df_parts = df_parts.copy() + self.split_kws = split_kws
# Adder methods for CPP analysis (used in run method) def _add_scale_info(self, df_feat=None): @@ -553,16 +279,16 @@

Source code for aaanalysis.cpp.cpp

             Feature DataFrame including scale categories.
         """
         # Check input
-        df_feat = _ut.check_df_feat(df_feat=df_feat)
+        df_feat = ut.check_df_feat(df_feat=df_feat)
 
         # Add scale categories
         df_cat = self.df_cat.copy()
-        i = df_feat.columns.get_loc(_ut.COL_FEATURE)
-        for col in [_ut.COL_SCALE_DES, _ut.COL_SCALE_NAME, _ut.COL_SUBCAT, _ut.COL_CAT]:
+        i = df_feat.columns.get_loc(ut.COL_FEATURE)
+        for col in [ut.COL_SCALE_DES, ut.COL_SCALE_NAME, ut.COL_SUBCAT, ut.COL_CAT]:
             if col in list(df_feat):
                 df_feat.drop(col, inplace=True, axis=1)
-            dict_cat = dict(zip(df_cat[_ut.COL_SCALE_ID], df_cat[col]))
-            vals = [dict_cat[s.split("-")[2]] for s in df_feat[_ut.COL_FEATURE]]
+            dict_cat = dict(zip(df_cat[ut.COL_SCALE_ID], df_cat[col]))
+            vals = [dict_cat[s.split("-")[2]] for s in df_feat[ut.COL_FEATURE]]
             df_feat.insert(i + 1, col, vals)
         return df_feat
 
@@ -594,12 +320,12 @@ 

Source code for aaanalysis.cpp.cpp

         as implemented in SciPy.
         """
         # Check input
-        df_feat = _ut.check_df_feat(df_feat=df_feat)
-        _ut.check_labels(labels=labels, df=self.df_parts, name_df="df_parts")
+        df_feat = ut.check_df_feat(df_feat=df_feat)
+        ut.check_labels(labels=labels, df=self.df_parts, name_df="df_parts")
         ut.check_bool(name="parametric", val=parametric)
 
         # Add feature statistics
-        features = list(df_feat[_ut.COL_FEATURE])
+        features = list(df_feat[ut.COL_FEATURE])
         sf = SequenceFeature()
         sfs = SequenceFeatureStatistics()
         X = sf.feat_matrix(df_parts=self.df_parts,
@@ -609,137 +335,34 @@ 

Source code for aaanalysis.cpp.cpp

         df_feat = sfs.add_stat(df=df_feat, X=X, y=labels, parametric=parametric)
         return df_feat
 
-
[docs] def add_positions(self, df_feat=None, tmd_len=20, start=1): - """ - Add sequence positions to DataFrame. - - Parameters - ---------- - df_feat: :class:`pandas.DataFrame` - Feature DataFrame to add feature positions. - tmd_len: int, >0 - Length of TMD. - start: int, >=0 - Position label of first amino acid position (starting at N-terminus). - - Returns - ------- - df_feat: :class:`pandas.DataFrame` - Feature DataFrame including feature positions. - - Notes - ----- - The length parameters define the total number of positions (jmd_n_len + tmd_len + jmd_c_len). - """ + @staticmethod + def _add_positions(df_feat=None, tmd_len=20, jmd_n_len=10, jmd_c_len=10, ext_len=4, start=1): + """Add sequence positions to DataFrame.""" # Check input (length checked by SequenceFeaturePositions) - df_feat = _ut.check_df_feat(df_feat=df_feat) - + df_feat = ut.check_df_feat(df_feat=df_feat) # Add positions of features - sfp = SequenceFeaturePositions() - dict_part_pos = sfp.get_dict_part_pos(tmd_len=tmd_len, - jmd_n_len=self.jmd_n_len, jmd_c_len=self.jmd_c_len, - ext_len=self.ext_len, start=start) - df_feat["positions"] = sfp.get_positions(dict_part_pos=dict_part_pos, features=list(df_feat[_ut.COL_FEATURE])) - return df_feat
- -
[docs] @staticmethod - def add_shap(df_feat=None, col_shap="shap_value", name_feat_impact="feat_impact"): - """ - Convert SHAP values into feature impact/importance and add to DataFrame. - - Parameters - ---------- - df_feat: :class:`pandas.DataFrame` - Feature DataFrame to which the feature impact will be added. - col_shap: str, default = 'shap_value' - Column name of `SHAP <https://shap.readthedocs.io/en/latest/index.html>`_ values in the feature DataFrame. - name_feat_impact: str, default = 'feat_impact' - Column name of feature impact or feature importance that will be added to the feature DataFrame. - - Returns - ------- - df_feat: :class:`pandas.DataFrame` - Feature DataFrame including feature impact. - - Notes - ----- - - SHAP (SHapley Additive exPlanations) is a game theoretic approach to explain the output of any machine learning model. - - SHAP values represent a feature's responsibility for a change in the model output. - - Missing values are accepted in SHAP values. - - """ - - # Check input - df_feat = df_feat.copy() - ut.check_str(name="name_feat_impact", val=name_feat_impact) - ut.check_str(name="col_shap", val=col_shap) - df_feat = _ut.check_df_feat(df_feat=df_feat) - check_shap_value_for_feat_impact(df_feat=df_feat, col_shap=col_shap) - check_feat_impact_in_df_feat(df_feat=df_feat, name_feat_impact=name_feat_impact) - - # Compute feature impact (accepting missing values) - shap_values = np.array(df_feat[col_shap]) - feat_impact = shap_values / np.nansum(np.abs(shap_values)) * 100 - shap_loc = df_feat.columns.get_loc(col_shap) - df_feat.insert(shap_loc + 1, name_feat_impact, feat_impact) - return df_feat
- -
[docs] def add_sample_dif(self, df_feat=None, df_seq=None, labels=None, sample_name=str, ref_group=0, accept_gaps=False): - """ - Add feature value difference between sample and reference group to DataFrame. - - Parameters - ---------- - df_feat: :class:`pandas.DataFrame` - Feature DataFrame to add sample difference. - df_seq: :class:`pandas.DataFrame` - DataFrame with sequences and sample names, in which the given sample name is included. - labels: array-like, shape (n_samples) - Class labels for samples in sequence DataFrame. - sample_name: str - Name of sample for which the feature value difference to a given reference group should be computed. - ref_group: int, default = 0 - Class label of reference group. - accept_gaps: bool, default = False - Whether to accept missing values by enabling omitting for computations (if True). - - Returns - ------- - df_feat: :class:`pandas.DataFrame` - Feature DataFrame including feature value difference. - """ - # Check input - df_feat = _ut.check_df_feat(df_feat=df_feat) - _ut.check_df_seq(df_seq=df_seq, jmd_c_len=self.jmd_c_len, jmd_n_len=self.jmd_c_len) - _ut.check_labels(labels=labels, df=df_seq, name_df="df_seq") - check_ref_group(ref_group=ref_group, labels=labels) - check_sample_in_df_seq(sample_name=sample_name, df_seq=df_seq) - # Add sample difference to reference group + features = df_feat[ut.COL_FEATURE].to_list() sf = SequenceFeature() - X = sf.feat_matrix(features=list(df_feat["feature"]), - df_parts=self.df_parts, - df_scales=self.df_scales, - accept_gaps=accept_gaps) - mask = [True if x == ref_group else False for x in labels] - i = list(df_seq[_ut.COL_NAME]).index(sample_name) - df_feat[f"dif_{sample_name}"] = X[i] - X[mask].mean() - return df_feat
+ feat_positions = sf.add_position(features=features, tmd_len=tmd_len, start=start, + jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, ext_len=ext_len) + df_feat[ut.COL_POSITION] = feat_positions + return df_feat # Filtering methods @staticmethod def _pre_filtering(features=None, abs_mean_dif=None, std_test=None, max_std_test=0.2, n=10000): """CPP pre-filtering based on thresholds.""" df = pd.DataFrame(zip(features, abs_mean_dif, std_test), - columns=[_ut.COL_FEATURE, _ut.COL_ABS_MEAN_DIF, _ut.COL_STD_TEST]) - df = df[df[_ut.COL_STD_TEST] <= max_std_test] - df = df.sort_values(by=_ut.COL_ABS_MEAN_DIF, ascending=False).head(n) + columns=[ut.COL_FEATURE, ut.COL_ABS_MEAN_DIF, ut.COL_STD_TEST]) + df = df[df[ut.COL_STD_TEST] <= max_std_test] + df = df.sort_values(by=ut.COL_ABS_MEAN_DIF, ascending=False).head(n) return df def _filtering(self, df=None, max_overlap=0.5, max_cor=0.5, n_filter=100, check_cat=True): """CPP filtering algorithm based on redundancy reduction in descending order of absolute AUC.""" dict_c, dict_p, df_cor = _filtering_info(df=df, df_scales=self.df_scales, check_cat=check_cat) - df = df.sort_values(by=[_ut.COL_ABS_AUC, _ut.COL_ABS_MEAN_DIF], ascending=False).copy().reset_index(drop=True) - list_feat = list(df[_ut.COL_FEATURE]) + df = df.sort_values(by=[ut.COL_ABS_AUC, ut.COL_ABS_MEAN_DIF], ascending=False).copy().reset_index(drop=True) + list_feat = list(df[ut.COL_FEATURE]) list_top_feat = [list_feat.pop(0)] # List with best feature for feat in list_feat: add_flag = True @@ -760,12 +383,14 @@

Source code for aaanalysis.cpp.cpp

                             add_flag = False
             if add_flag:
                 list_top_feat.append(feat)
-        df_top_feat = df[df[_ut.COL_FEATURE].isin(list_top_feat)]
+        df_top_feat = df[df[ut.COL_FEATURE].isin(list_top_feat)]
         return df_top_feat
 
     # Main method
-
[docs] def run(self, labels=None, parametric=False, n_filter=100, tmd_len=20, start=1, check_cat=True, - n_pre_filter=None, pct_pre_filter=5, max_std_test=0.2, max_overlap=0.5, max_cor=0.5, n_processes=None): +
[docs] def run(self, labels=None, parametric=False, n_filter=100, + tmd_len=20, jmd_n_len=10, jmd_c_len=10, ext_len=4, start=1, + check_cat=True, n_pre_filter=None, pct_pre_filter=5, max_std_test=0.2, max_overlap=0.5, max_cor=0.5, + n_processes=None): """ Perform CPP pipeline by creation and two-step filtering of features. CPP aims to identify a collection of non-redundant features that are most discriminant between @@ -782,9 +407,16 @@

Source code for aaanalysis.cpp.cpp

         n_pre_filter : int, optional
             Number of feature to be pre-filtered by CPP algorithm. If None, a percentage of all features is used.
         tmd_len : int, >0
-            Length of Transmembrane Domain (TMD) used for positions.
+            Length of TMD used for positions. TODO add link to explanation
         start : int, >=0
             Position label of first amino acid position (starting at N-terminus).
+        jmd_n_len : int, >=0, default = 10
+            Length of JMD-N.
+        jmd_c_len : int, >=0, default = 10
+            Length of JMD-C.
+        ext_len : int, >=0, default = 4
+            Length of TMD-extending part (starting from C and N terminal part of TMD).
+            Should be longer than jmd_n_len and jmd_c_len
         check_cat : bool, default = True
             Whether to check for redundancy within scale categories.
         pct_pre_filter : int, default = 5
@@ -822,7 +454,8 @@ 

Source code for aaanalysis.cpp.cpp

         11. positions: Feature positions for default settings
         """
         # Check input
-        _ut.check_labels(labels=labels, df=self.df_parts, name_df="df_parts")
+        ut.check_labels(labels=labels, df=self.df_parts, name_df="df_parts")
+        ut.check_args_len(tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, ext_len=ext_len)
         ut.check_non_negative_number(name="n_filter", val=n_filter, min_val=1)
         ut.check_non_negative_number(name="n_pre_filter", val=n_pre_filter, min_val=1, accept_none=True)
         ut.check_non_negative_number(name="pct_pre_filter", val=pct_pre_filter, min_val=5, max_val=100)
@@ -833,9 +466,9 @@ 

Source code for aaanalysis.cpp.cpp

         args = dict(split_kws=self.split_kws, df_scales=self.df_scales)
         if self._verbose:
             sf = SequenceFeature()
-            n_feat = len(sf.features(**args, list_parts=list(self.df_parts)))
+            n_feat = len(sf.get_features(**args, list_parts=list(self.df_parts)))
             print(f"1. CPP creates {n_feat} features for {len(self.df_parts)} samples")
-            _ut.print_start_progress()
+            ut.print_start_progress()
         # Pre-filtering: Select best n % of feature (filter_pct) based std(test set) and mean_dif
         sfs = SequenceFeatureStatistics()
         abs_mean_dif, std_test, features = sfs.pre_filtering_info(**args,
@@ -847,8 +480,8 @@ 

Source code for aaanalysis.cpp.cpp

         if n_pre_filter is None:
             n_pre_filter = int(len(features) * (pct_pre_filter / 100))
         if self._verbose:
-            _ut.print_finished_progress()
-            print(f"2. CPP pre-filters {n_pre_filter} features ({pct_pre_filter}%) with highest '{_ut.COL_ABS_MEAN_DIF}'"
+            ut.print_finished_progress()
+            print(f"2. CPP pre-filters {n_pre_filter} features ({pct_pre_filter}%) with highest '{ut.COL_ABS_MEAN_DIF}'"
                   f" and 'max_std_test' <= {max_std_test}")
         df = self._pre_filtering(features=features,
                                  abs_mean_dif=abs_mean_dif,
@@ -859,425 +492,18 @@ 

Source code for aaanalysis.cpp.cpp

         df = self._add_stat(df_feat=df, labels=labels, parametric=parametric, accept_gaps=self._accept_gaps)
         if self._verbose:
             print(f"3. CPP filtering algorithm")
-        df = self.add_positions(df_feat=df, tmd_len=tmd_len, start=start)
+        df = self._add_positions(df_feat=df, tmd_len=tmd_len, start=start)
         df = self._add_scale_info(df_feat=df)
-        df = self._filtering(df=df, n_filter=n_filter, check_cat=check_cat, max_overlap=max_overlap, max_cor=max_cor)
-        df.reset_index(drop=True, inplace=True)
+        df_feat = self._filtering(df=df, n_filter=n_filter, check_cat=check_cat, max_overlap=max_overlap, max_cor=max_cor)
+        df_feat.reset_index(drop=True, inplace=True)
         if self._verbose:
-            print(f"4. CPP returns df with {len(df)} unique features including general information and statistics")
-        return df
- - # Plotting methods -
[docs] def plot_profile(self, df_feat=None, y="category", val_col="mean_dif", val_type="count", normalize=False, - figsize=(7, 5), title=None, title_kws=None, - dict_color=None, edge_color="none", bar_width=0.75, - add_jmd_tmd=True, tmd_len=20, start=1, - jmd_n_seq=None, tmd_seq=None, jmd_c_seq=None, - tmd_color="mediumspringgreen", jmd_color="blue", tmd_seq_color="black", jmd_seq_color="white", - seq_size=None, tmd_jmd_fontsize=None, - xtick_size=11.0, xtick_width=2.0, xtick_length=5.0, xticks_pos=False, - ytick_size=None, ytick_width=2.0, ytick_length=5.0, ylim=None, - highlight_tmd_area=True, highlight_alpha=0.15, - grid=False, grid_axis="both", - add_legend_cat=True, legend_kws=None, - shap_plot=False, - **kwargs): - """ - Plot feature profile for given features from 'df_feat'. - - Parameters - ---------- - df_feat : class:`pandas.DataFrame`, optional, default=None - Dataframe containing the features to be plotted. If None, default features from the instance will be used. - y : str, default='category' - Column name in df_feat which contains the categories for grouping. - val_col : str, default='mean_dif' - Column name in df_feat which contains the values to be plotted. - val_type : str, default='count' - Type of value. Available options are specified by the `check_value_type` function. - normalize : bool, default=False - If True, the feature values will be normalized. - figsize : tuple, default=(7, 5) - Size of the plot. - title : str, optional - Title of the plot. - title_kws : dict, optional - Keyword arguments to customize the title appearance. - dict_color : dict, optional - Dictionary mapping categories to colors. - edge_color : str, default='none' - Color of the edges of the bars. - bar_width : float, default=0.75 - Width of the bars. - add_jmd_tmd : bool, default=True - If True, adds JMD and TMD lines/annotations to the plot. - tmd_len : int, default=20 - Length of the TMD. - start : int, default=1 - Start position. - jmd_n_seq : str, optional - JMD N-terminal sequence. - tmd_seq : str, optional - TMD sequence. - jmd_c_seq : str, optional - JMD C-terminal sequence. - tmd_color : str, default='mediumspringgreen' - Color for TMD. - jmd_color : str, default='blue' - Color for JMD. - tmd_seq_color : str, default='black' - Color for TMD sequence. - jmd_seq_color : str, default='white' - Color for JMD sequence. - seq_size : float, optional - Font size for sequence annotations. - tmd_jmd_fontsize : float, optional - Font size for TMD and JMD annotations. - xtick_size : float, default=11.0 - Size for x-tick labels. - xtick_width : float, default=2.0 - Width of the x-ticks. - xtick_length : float, default=5.0 - Length of the x-ticks. - xticks_pos : bool, default=False - If True, x-tick positions are adjusted based on given sequences. - ytick_size : float, optional - Size for y-tick labels. - ytick_width : float, default=2.0 - Width of the y-ticks. - ytick_length : float, default=5.0 - Length of the y-ticks. - ylim : tuple, optional - Y-axis limits. - highlight_tmd_area : bool, default=True - If True, highlights the TMD area on the plot. - highlight_alpha : float, default=0.15 - Alpha value for TMD area highlighting. - grid : bool, default=False - If True, a grid is added to the plot. - grid_axis : str, default='both' - Axis on which the grid is drawn. Options: 'both', 'x', 'y'. - add_legend_cat : bool, default=True - If True, a legend is added for categories. - legend_kws : dict, optional - Keyword arguments for the legend. - shap_plot : bool, default=False - If True, SHAP (SHapley Additive exPlanations) plot is generated. - **kwargs : dict - Other keyword arguments passed to internal functions or plotting libraries. - - Returns - ------- - ax : matplotlib.axes.Axes - The axes object containing the plot. - - """ - # Group arguments - args_seq = dict(jmd_n_seq=jmd_n_seq, tmd_seq=tmd_seq, jmd_c_seq=jmd_c_seq,) - args_size = check_args_size(seq_size=seq_size, tmd_jmd_fontsize=tmd_jmd_fontsize) - args_len = check_args_len(tmd_len=tmd_len, jmd_n_len=self.jmd_n_len, jmd_c_len=self.jmd_c_len, **args_seq) - args_xtick = check_args_xtick(xtick_size=xtick_size, xtick_width=xtick_width, xtick_length=xtick_length) - args_part_color = check_part_color(tmd_color=tmd_color, jmd_color=jmd_color) - args_seq_color = check_seq_color(tmd_seq_color=tmd_seq_color, jmd_seq_color=jmd_seq_color) - - # Checking input - # Args checked by Matplotlib: title, legend_kws - # Args checked by internal plotting functions: ylim - ut.check_non_negative_number(name="bar_width", val=bar_width, min_val=0, just_int=False) - ut.check_non_negative_number(name="start", val=start, min_val=0) - ut.check_non_negative_number(name="tmd_area_alpha", val=highlight_alpha, min_val=0, max_val=1, just_int=False) - ut.check_bool(name="add_jmd_tmd", val=add_jmd_tmd) - ut.check_bool(name="highlight_tmd_area", val=highlight_tmd_area) - ut.check_bool(name="grid", val=grid) - ut.check_bool(name="shap_plot", val=shap_plot) - ut.check_bool(name="add_legend_cat", val=add_legend_cat) - _ut.check_color(name="edge_color", val=edge_color, accept_none=True) - ut.check_dict(name="legend_kws", val=legend_kws, accept_none=True) - - _ut.check_col_in_df(df=df_feat, name_df="df_feat", col=val_col, type_check="numerical") - _ut.check_y_categorical(df=df_feat, y=y) - df_feat = _ut.check_df_feat(df_feat=df_feat) - check_value_type(val_type=val_type, count_in=True) - check_args_ytick(ytick_size=ytick_size, ytick_width=ytick_width, ytick_length=ytick_length) - check_figsize(figsize=figsize) - dict_color = check_dict_color(dict_color=dict_color, df_cat=self.df_cat) - check_grid_axis(grid_axis=grid_axis) - # Get df positions - df_feat = self.add_positions(df_feat=df_feat, tmd_len=args_len["tmd_len"], start=start) - df_pos = _get_df_pos(df_feat=df_feat, df_cat=self.df_cat, y=y, val_col=val_col, - value_type=val_type, normalize=normalize, start=start, **args_len) - # Plotting - cpp_plot = CPPPlots(**args_len, start=start) - try: - ax = cpp_plot.profile(df_pos=df_pos, figsize=figsize, ylim=ylim, - dict_color=dict_color, edge_color=edge_color, bar_width=bar_width, - add_legend=add_legend_cat, legend_kws=legend_kws, shap_plot=shap_plot, - **args_xtick, **kwargs) - except AttributeError as e: - error_message = check_parameters(func=self.plot_profile, name_called_func="pd.DataFrame.plot", e=e) - raise AttributeError(error_message) - cpp_plot.set_title(title=title, title_kws=title_kws) - - # Autosize tmd sequence & annotation - opt_size = cpp_plot.optimize_label_size(ax=ax, df_pos=df_pos, label_term=False) - # Set default ylabel - ylabel = "Feature impact" if shap_plot else f"Feature count (-/+ {val_col})" - ax.set_ylabel(ylabel, size=opt_size) - # Adjust y ticks - ytick_size = opt_size if ytick_size is None else ytick_size - plt.yticks(size=ytick_size) - plt.tick_params(axis="y", color="black", width=ytick_width, length=ytick_length, bottom=False) - sns.despine(top=True, right=True) - # Add grid - if grid: - ax.set_axisbelow(True) # Grid behind datasets - ax.grid(which="major", axis=grid_axis, linestyle="-") - # Add tmd area - if highlight_tmd_area: - cpp_plot.highlight_tmd_area(ax=ax, x_shift=-0.5, tmd_color=tmd_color, alpha=highlight_alpha) - # Add tmd_jmd sequence if sequence is given - if type(tmd_seq) == str: - ax = cpp_plot.add_tmd_jmd_seq(ax=ax, **args_seq, **args_size, **args_part_color, **args_seq_color, - xticks_pos=xticks_pos, heatmap=False, x_shift=0, - xtick_size=xtick_size) # Add tmd_jmd bar - self.ax_seq = ax - elif add_jmd_tmd: - size = opt_size if tmd_jmd_fontsize is None else tmd_jmd_fontsize - cpp_plot.add_tmd_jmd_bar(ax=ax, x_shift=-0.5, **args_part_color, add_white_bar=False) - cpp_plot.add_tmd_jmd_xticks(ax=ax, x_shift=0, **args_xtick) - cpp_plot.add_tmd_jmd_text(ax=ax, x_shift=-0.5, tmd_jmd_fontsize=size) - - # Set current axis to main axis object depending on tmd sequence given or not - plt.yticks(size=ytick_size) - plt.tick_params(axis="y", color="black", width=ytick_width, length=ytick_length, bottom=False) - plt.sca(plt.gcf().axes[0]) - ax = plt.gca() - return ax
- -
[docs] def plot_heatmap(self, df_feat=None, y="subcategory", val_col="mean_dif", val_type="mean", normalize=False, - figsize=(8, 5), title=None, title_kws=None, - vmin=None, vmax=None, grid_on=True, - cmap="RdBu_r", cmap_n_colors=None, dict_color=None, cbar_kws=None, facecolor_dark=False, - add_jmd_tmd=True, tmd_len=20, start=1, - jmd_n_seq=None, tmd_seq=None, jmd_c_seq=None, - tmd_color="mediumspringgreen", jmd_color="blue", tmd_seq_color="black", jmd_seq_color="white", - seq_size=None, tmd_jmd_fontsize=None, - xticks_pos=False, xtick_size=11.0, xtick_width=2.0, xtick_length=5.0, ytick_size=None, - add_legend_cat=True, legend_kws=None, - add_importance_map=False, cbar_pct=False, **kwargs): - """ - Plot a featuremap of the selected value column with scale information (y-axis) versus sequence position (x-axis). - - This is a wrapper function for :func:`seaborn.heatmap`, designed to highlight differences between two sets - of sequences at the positional level (e.g., amino acid level for protein sequences). - - Parameters - ---------- - df_feat : :class:`~pandas.DataFrame`, shape (n_feature, n_feature_information) - DataFrame containing unique identifiers, scale information, statistics, and positions for each feature. - y : {'category', 'subcategory', 'scale_name'}, str, default = 'subcategory' - Name of the column in the feature DataFrame representing scale information (shown on the y-axis). - val_col : {'mean_dif', 'feat_impact', 'abs_auc', 'std_test', ...}, str, default = 'mean_dif' - Name of the column in the feature DataFrame containing numerical values to display. - val_type : {'mean', 'sum', 'std'}, str, default = 'mean' - Method to aggregate numerical values from 'val_col'. - normalize : {True, False, 'positions', 'positions_only'}, bool/str, default = False - Specifies normalization for numerical values in 'val_col': - - False: Set value at all positions of a feature without further normalization. - - - True: Set value at all positions of a feature and normalize across all features. - - - 'positions': Value/number of positions set at each position of a feature and normalized across features. - Recommended when aiming to emphasize features with fewer positions using 'val_col'='feat_impact' and 'value_type'='mean'. - - figsize : tuple(float, float), default = (10,7) - Width and height of the figure in inches passed to :func:`matplotlib.pyplot.figure`. - title : str, optional - Title of figure used by :func:`matplotlib.pyplot.title`. - title_kws : dict, optional - Keyword arguments passed to :func:`matplotlib.pyplot.title`. - vmin, vmax : float, optional - Values to anchor the colormap, otherwise, inferred from data and other keyword arguments. - cmap : matplotlib colormap name or object, or list of colors, default = 'seismic' - Name of color map assigning data values to color space. If 'SHAP', colors from - `SHAP <https://shap.readthedocs.io/en/latest/index.html>`_ will be used (recommended for feature impact). - cmap_n_colors : int, optional - Number of discrete steps in diverging or sequential color map. - dict_color : dict, optional - Map of colors for scale categories classifying scales shown on y-axis. - cbar_kws : dict of key, value mappings, optional - Keyword arguments for :meth:`matplotlib.figure.Figure.colorbar`. - add_jmd_tmd : bool, default = True - Whether to add colored bar under heatmap indicating sequence parts (JMD-N, TMD, JMD-C). - tmd_len : int, >0 - Length of TMD to be depiceted. - start : int, >=0 - Position label of first amino acid position (starting at N-terminus). - tmd_seq : str, optional - Sequence of TMD. 'tmd_len' is set to length of TMD if sequence for TMD, JMD-N and JMD-C are given. - Recommended if feature impact or mean difference should be depicted for one sample. - jmd_n_seq : str, optional - Sequence of JMD_N. 'jmd_n_len' is set to length of JMD_N if sequence for TMD, JMD-N and JMD-C are given. - Recommended if feature impact or mean difference should be depicted for one sample. - jmd_c_seq : str, optional - Sequence of JMD_C. 'jmd_c_len' is set to length of JMD_C if sequence for TMD, JMD-N and JMD-C are given. - Recommended if feature impact or mean difference should be depicted for one sample. - tmd_color : str, default = 'mediumspringgreen' - Color of TMD bar. - jmd_color : str, default = 'blue' - Color of JMD-N and JMD-C bar. - tmd_seq_color : str, default = 'black' - Color of TMD sequence. - jmd_seq_color : str, default = 'white' - Color of JMD-N and JMD-C sequence. - seq_size : float, optional - Font size of all sequence parts in points. If None, optimized automatically. - tmd_jmd_fontsize : float, optional - Font size of 'TMD', 'JMD-N' and 'JMD-C' label in points. If None, optimized automatically. - xtick_size : float, default = 11.0 - Size of x ticks in points. Passed as 'size' argument to :meth:`matplotlib.axes.Axes.set_xticklabels`. - xtick_width : float, default = 2.0 - Widht of x ticks in points. Passed as 'width' argument to :meth:`matplotlib.axes.Axes.tick_params`. - xtick_length : float, default = 5.0, - Length of x ticks in points. Passed as 'length' argument to :meth:`matplotlib.axes.Axes.tick_params`. - ytick_size : float, optional - Size of scale information as y ticks in points. Passed to :meth:`matplotlib.axes.Axes.tick_params`. - If None, optimized automatically. - add_legend_cat : bool, default = True, - Whether to add legend for categories under plot and classification of scales at y-axis. - legend_kws : dict, optional - Keyword arguments passed to :meth:`matplotlib.axes.Axes.legend` - kwargs : other keyword arguments - All other keyword arguments passed to :meth:`matplotlib.axes.Axes.pcolormesh`. - - Returns - ------- - ax : matplotlib Axes - Axes object containing the heatmap. - - Warnings - -------- - - 'cmap_n_colors' is effective only if 'vmin' and 'vmax' align with the data. - - - 'tmd_seq_color' and 'jmd_seq_color' are applicable only when 'tmd_seq', 'jmd_n_seq', and 'jmd_c_seq' are provided. - - See Also - -------- - seaborn.heatmap - Plotting heatmap using seaborn. - See `Seaborn documentation <https://seaborn.pydata.org/generated/seaborn.heatmap.html>`_ for more details. - - Examples - -------- - - Plot CPP feature heatmap: - - .. plot:: - :context: close-figs - - >>> import matplotlib.pyplot as plt - >>> import aaanalysis as aa - >>> sf = aa.SequenceFeature() - >>> df_seq = aa.load_dataset(name='SEQ_DISULFIDE', min_len=100) - >>> labels = list(df_seq["label"]) - >>> df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10) - >>> #split_kws = sf.get_split_kws(n_split_min=1, n_split_max=3, split_types=["Segment", "PeriodicPattern"]) - >>> #df_scales = aa.load_scales(unclassified_in=False).sample(n=10, axis=1) - >>> #cpp = aa.CPP(df_parts=df_parts, split_kws=split_kws, df_scales=df_scales) - >>> #df_feat = cpp.run(labels=labels) - >>> #cpp.plot_heatmap(df_feat=df_feat) - >>> #plt.tight_layout() + print(f"4. CPP returns df with {len(df_feat)} unique features including general information and statistics") + return df_feat
- """ - # Group arguments - args_seq = dict(jmd_n_seq=jmd_n_seq, tmd_seq=tmd_seq, jmd_c_seq=jmd_c_seq) - args_size = check_args_size(seq_size=seq_size, tmd_jmd_fontsize=tmd_jmd_fontsize) - args_len = check_args_len(tmd_len=tmd_len, jmd_n_len=self.jmd_n_len, jmd_c_len=self.jmd_c_len, **args_seq) - args_xtick = check_args_xtick(xtick_size=xtick_size, xtick_width=xtick_width, xtick_length=xtick_length) - args_part_color = check_part_color(tmd_color=tmd_color, jmd_color=jmd_color) - args_seq_color = check_seq_color(tmd_seq_color=tmd_seq_color, jmd_seq_color=jmd_seq_color) - - # Checking input - # Args checked by Matplotlib: title, cmap, cbar_kws, legend_kws - ut.check_non_negative_number(name="start", val=start, min_val=0) - ut.check_non_negative_number(name="ytick_size", val=ytick_size, accept_none=True, just_int=False, min_val=1) - ut.check_non_negative_number(name="cmap_n_colors", val=cmap_n_colors, min_val=1, accept_none=True) - ut.check_bool(name="add_jmd_tmd", val=add_jmd_tmd) - ut.check_bool(name="add_legend_cat", val=add_legend_cat) - ut.check_dict(name="legend_kws", val=legend_kws, accept_none=True) - ut.check_dict(name="cbar_kws", val=cbar_kws, accept_none=True) - _ut.check_col_in_df(df=df_feat, name_df="df_feat", col=val_col, type_check="numerical") - _ut.check_y_categorical(df=df_feat, y=y) - df_feat = _ut.check_df_feat(df_feat=df_feat, df_cat=self.df_cat) - check_value_type(val_type=val_type, count_in=False) - check_vmin_vmax(vmin=vmin, vmax=vmax) - check_figsize(figsize=figsize) - dict_color = check_dict_color(dict_color=dict_color, df_cat=self.df_cat) - - # Get df positions - df_feat = self.add_positions(df_feat=df_feat, tmd_len=args_len["tmd_len"], start=start) - df_pos = _get_df_pos(df_feat=df_feat, df_cat=self.df_cat, y=y, val_col=val_col, - value_type=val_type, normalize=normalize, start=start, **args_len) - # Plotting - cpp_plot = CPPPlots(**args_len, start=start) - cpp_plot.set_figsize(figsize=figsize) # figsize is not used as argument in seaborn (but in pandas) - try: - linecolor = "gray" if facecolor_dark else "black" - if "linecolor" in kwargs: - linecolor = kwargs["linecolor"] - else: - kwargs["linecolor"] = linecolor - ax = cpp_plot.heatmap(df_pos=df_pos, vmin=vmin, vmax=vmax, grid_on=grid_on, - cmap=cmap, cmap_n_colors=cmap_n_colors, cbar_kws=cbar_kws, - x_shift=0.5, ytick_size=ytick_size, facecolor_dark=facecolor_dark, - cbar_pct=cbar_pct, **args_xtick, **kwargs) - ax.axvline(self.jmd_n_len, color=linecolor, linestyle="-", linewidth=1.5) - ax.axvline(x=self.jmd_n_len + args_len["tmd_len"], color=linecolor, linestyle="-", linewidth=1.5) - - except AttributeError as e: - error_message = check_parameters(func=self.plot_heatmap, name_called_func="sns.heatmap", e=e) - raise AttributeError(error_message) - cpp_plot.set_title(title=title, title_kws=title_kws) - # Autosize tmd sequence & annotation - opt_size = cpp_plot.optimize_label_size(ax=ax, df_pos=df_pos) - # Add importance map - if add_importance_map: - _add_importance_map(ax=ax, df_feat=df_feat, df_cat=self.df_cat, - start=start, args_len=args_len, y=y) - # Add scale classification - if add_legend_cat: - ax = cpp_plot.add_legend_cat(ax=ax, df_pos=df_pos, df_cat=self.df_cat, y=y, dict_color=dict_color, - legend_kws=legend_kws) - # Add tmd_jmd sequence if sequence is given - if isinstance(tmd_seq, str): - ax = cpp_plot.add_tmd_jmd_seq(ax=ax, **args_seq, **args_size, **args_part_color, **args_seq_color, - xticks_pos=xticks_pos, - x_shift=0.5, xtick_size=xtick_size) - self.ax_seq = ax - # Add tmd_jmd bar - elif add_jmd_tmd: - size = opt_size if tmd_jmd_fontsize is None else tmd_jmd_fontsize - cpp_plot.add_tmd_jmd_bar(ax=ax, **args_part_color) - cpp_plot.add_tmd_jmd_xticks(ax=ax, x_shift=0.5, **args_xtick) - cpp_plot.add_tmd_jmd_text(ax=ax, x_shift=0, tmd_jmd_fontsize=size) - # Set current axis to main axis object depending on tmd sequence given or not - plt.sca(plt.gcf().axes[0]) - ax = plt.gca() - return ax
- - def update_seq_size(self): - """""" - # TODO legend changes slightly if sequnece length altered (e.g. PTPRM_MOUSE vs A4_HUMAN) - # TODO look for more extreme example and text - f = lambda l: l.get_window_extent(ax.figure.canvas.get_renderer()) - ax = self.ax_seq - labels = ax.xaxis.get_ticklabels(which="both") - tick_positions = [f(l).x0 for l in labels] - sorted_tick_positions, sorted_labels = zip(*sorted(zip(tick_positions, labels), key=lambda t: t[0])) - # Adjust font size to prevent overlap - seq_size = get_optimal_fontsize(ax, sorted_labels) - for l in sorted_labels: - l.set_fontsize(seq_size)
+
[docs] @staticmethod + def eval(df_feat=None, features=None): + """Get evaluation for provided dataset"""
+ # TODO get evaluation for any dataset for compelete
diff --git a/docs/build/html/_modules/aaanalysis/cpp/feature.html b/docs/build/html/_modules/aaanalysis/cpp/feature.html index 14accb68..4d53fc65 100644 --- a/docs/build/html/_modules/aaanalysis/cpp/feature.html +++ b/docs/build/html/_modules/aaanalysis/cpp/feature.html @@ -116,7 +116,7 @@

REFERENCES

@@ -195,17 +195,32 @@

Source code for aaanalysis.cpp.feature

 from itertools import repeat
 import multiprocessing as mp
 import warnings
-from collections import OrderedDict
 
 from aaanalysis.cpp._feature_pos import SequenceFeaturePositions
 from aaanalysis.cpp._split import Split, SplitRange
 from aaanalysis.cpp._part import Parts
-import aaanalysis.cpp._utils as _ut
-import aaanalysis._utils as ut
+
 import aaanalysis as aa
+import aaanalysis.utils as ut
+
+# TODO simplify and check
 
 
 # I Helper Functions
+# Check for add methods
+def check_ref_group(ref_group=0, labels=None):
+    """Check if ref group class lable"""
+    if ref_group not in labels:
+        raise ValueError(f"'ref_group' ({ref_group}) not class label: {set(labels)}.")
+
+
+def check_sample_in_df_seq(sample_name=None, df_seq=None):
+    """Check if sample name in df_seq"""
+    list_names = list(df_seq[ut.COL_NAME])
+    if sample_name not in list_names:
+        error = f"'sample_name' ('{sample_name}') not in '{ut.COL_NAME}' of 'df_seq'." \
+                f"\nValid names are: {list_names}"
+        raise ValueError(error)
 
 
 # Check load functions
@@ -215,21 +230,12 @@ 

Source code for aaanalysis.cpp.feature

         raise ValueError("'clust_th' should be 0.3, 0.5, 0.7, or 0.9")
 
 
-# Check functions get_df_parts
-def check_jmd_len(jmd_n_len=None, jmd_c_len=None, accept_none=True):
-    """Check jmd_n_len and jmd_c_len"""
-    if accept_none and jmd_n_len is None and jmd_c_len is None:
-        return None     # skip check
-    for name, val in zip(["jmd_n_len", "jmd_c_len"], [jmd_n_len, jmd_c_len]):
-        ut.check_non_negative_number(name=name, val=val)
-
-
 # Check functions get_split_kws
 def check_split_types(split_types=None):
     """Check split_type"""
     if type(split_types) is str:
         split_types = [split_types]
-    list_split_types = [_ut.STR_SEGMENT, _ut.STR_PATTERN, _ut.STR_PERIODIC_PATTERN]
+    list_split_types = [ut.STR_SEGMENT, ut.STR_PATTERN, ut.STR_PERIODIC_PATTERN]
     if split_types is None:
         split_types = list_split_types
     if not set(list_split_types).issuperset(set(split_types)):
@@ -262,7 +268,7 @@ 

Source code for aaanalysis.cpp.feature

     """Get missing elements"""
     seq_elements = set("".join(df_parts.values.flatten()))
     if accept_gaps:
-        missing_elements = [x for x in seq_elements if x not in scale_elements and x != _ut.STR_AA_GAP]
+        missing_elements = [x for x in seq_elements if x not in scale_elements and x != ut.STR_AA_GAP]
     else:
         missing_elements = [x for x in seq_elements if x not in scale_elements]
     return missing_elements
@@ -319,12 +325,12 @@ 

Source code for aaanalysis.cpp.feature

     """Helper function to create feature values for feature matrix"""
     sp = Split()
     # Get vectorized split function
-    split_type, split_kwargs = _ut.check_split(split=split)
+    split_type, split_kwargs = ut.check_split(split=split)
     f_split = getattr(sp, split_type.lower())
     # Vectorize split function using anonymous function
     vf_split = np.vectorize(lambda x: f_split(seq=x, **split_kwargs))
     # Get vectorized scale function
-    vf_scale = _ut.get_vf_scale(dict_scale=dict_scale, accept_gaps=accept_gaps)
+    vf_scale = ut.get_vf_scale(dict_scale=dict_scale, accept_gaps=accept_gaps)
     # Combine part split and scale to get feature values
     part_split = vf_split(df_parts)
     feature_value = np.round(vf_scale(part_split), 5)  # feature values
@@ -374,7 +380,7 @@ 

Source code for aaanalysis.cpp.feature

         'tmd_jmd', 'jmd_n_tmd_n', 'tmd_c_jmd_c', 'ext_n_tmd_n', 'tmd_c_ext_c']
     """
 
-    # Basic data structures for features
+    # Basic datastructures for features
 
[docs] @staticmethod def get_df_parts(df_seq=None, list_parts=None, jmd_n_len=None, jmd_c_len=None, ext_len=4, all_parts=False): """Create DataFrane with sequence parts. @@ -420,22 +426,21 @@

Source code for aaanalysis.cpp.feature

         >>> df_seq = aa.load_dataset(name='GSEC_SUB_SEQ')
         >>> df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10)
         """
-        check_jmd_len(jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len)
-        ut.check_non_negative_number(name="ext_len", val=ext_len)
-        df_seq = _ut.check_df_seq(df_seq=df_seq, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len)
-        list_parts = _ut.check_list_parts(list_parts=list_parts, all_parts=all_parts)
-        seq_info_in_df = set(_ut.COLS_SEQ_INFO).issubset(set(df_seq))
+        ut.check_args_len(jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, ext_len=ext_len, accept_tmd_none=True)
+        df_seq = ut.check_df_seq(df_seq=df_seq, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len)
+        list_parts = ut.check_list_parts(list_parts=list_parts, all_parts=all_parts)
+        seq_info_in_df = set(ut.COLS_SEQ_INFO).issubset(set(df_seq))
         pa = Parts()
         dict_parts = {}
         for i, row in df_seq.iterrows():
-            entry = row[_ut.COL_ENTRY]
+            entry = row[ut.COL_ENTRY]
             if jmd_c_len is not None and jmd_n_len is not None and seq_info_in_df:
-                seq, start, stop = row[_ut.COLS_SEQ_INFO].values
+                seq, start, stop = row[ut.COLS_SEQ_INFO].values
                 parts = pa.create_parts(seq=seq, tmd_start=start, tmd_stop=stop,
                                         jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len)
                 jmd_n, tmd, jmd_c = parts.jmd_n, parts.tmd, parts.jmd_c
             else:
-                jmd_n, tmd, jmd_c = row[_ut.COLS_PARTS].values
+                jmd_n, tmd, jmd_c = row[ut.COLS_PARTS].values
             dict_part_seq = pa.get_dict_part_seq(tmd=tmd, jmd_n=jmd_n, jmd_c=jmd_c, ext_len=ext_len)
             dict_part_seq = {part: dict_part_seq[part] for part in list_parts}
             dict_parts[entry] = dict_part_seq
@@ -499,15 +504,15 @@ 

Source code for aaanalysis.cpp.feature

             steps_pattern = [3, 4]
         if steps_periodicpattern is None:
             steps_periodicpattern = [3, 4]      # Differences between interacting amino acids in helix (without gaps)
-        split_kws = {_ut.STR_SEGMENT: dict(n_split_min=n_split_min, n_split_max=n_split_max),
-                     _ut.STR_PATTERN: dict(steps=steps_pattern, n_min=n_min, n_max=n_max, len_max=len_max),
-                     _ut.STR_PERIODIC_PATTERN: dict(steps=steps_periodicpattern)}
+        split_kws = {ut.STR_SEGMENT: dict(n_split_min=n_split_min, n_split_max=n_split_max),
+                     ut.STR_PATTERN: dict(steps=steps_pattern, n_min=n_min, n_max=n_max, len_max=len_max),
+                     ut.STR_PERIODIC_PATTERN: dict(steps=steps_periodicpattern)}
         split_kws = {x: split_kws[x] for x in split_types}
-        _ut.check_split_kws(split_kws=split_kws)
+        ut.check_split_kws(split_kws=split_kws)
         return split_kws
-
[docs] def features(self, list_parts=None, split_kws=None, df_scales=None, all_parts=False): - """Create list of feature ids for given Parts, Splits, and Scales +
[docs] def get_features(self, list_parts=None, split_kws=None, df_scales=None, all_parts=False): + """Create list of all feature ids for given Parts, Splits, and Scales Parameters ---------- @@ -526,9 +531,9 @@

Source code for aaanalysis.cpp.feature

             Ids of all possible features for combination of Parts, Splits, and Scales with form: PART-SPLIT-SCALE
 
         """
-        list_parts = _ut.check_list_parts(list_parts=list_parts, all_parts=all_parts)
-        _ut.check_split_kws(split_kws=split_kws)
-        _ut.check_df_scales(df_scales=df_scales, accept_none=True)
+        list_parts = ut.check_list_parts(list_parts=list_parts, all_parts=all_parts)
+        ut.check_split_kws(split_kws=split_kws)
+        ut.check_df_scales(df_scales=df_scales, accept_none=True)
         if df_scales is None:
             df_scales = aa.load_scales()
         if split_kws is None:
@@ -545,9 +550,72 @@ 

Source code for aaanalysis.cpp.feature

                              for sc in scales])
         return features
+
[docs] @staticmethod + def feat_matrix(features=None, df_parts=None, df_scales=None, accept_gaps=False, + n_jobs=None, verbose=False, return_labels=False): + """Create feature matrix for given feature ids and sequence parts. + + Parameters + ---------- + features: str, list of strings, pd.Series + Ids of features for which matrix of feature values should be created. + df_parts: :class:`pandas.DataFrame` + DataFrame with sequence parts. + df_scales: :class:`pandas.DataFrame`, optional + DataFrame with default amino acid scales. + accept_gaps: bool, default = False + Whether to accept missing values by enabling omitting for computations (if True). + n_jobs: int, default = None, + The number of jobs to run in parallel. If None, it will be set to the maximum. + verbose: bool, default = True + Whether to print size of to be created feature matrix (if True) or not otherwise. + return_labels: bool, default = False + Whether to return sample labels in addition to feature matrix. + + Returns + ------- + feat_matrix: array-like or sparse matrix, shape (n_samples, n_features) + Feature values of samples. + """ + ut.check_non_negative_number(name="j_jobs", val=n_jobs, accept_none=True, min_val=1, just_int=True) + if df_scales is None: + df_scales = aa.load_scales() + ut.check_df_scales(df_scales=df_scales) + ut.check_df_parts(df_parts=df_parts) + features = ut.check_features(features=features, parts=df_parts, df_scales=df_scales) + check_df_scales_matches_df_parts(df_scales=df_scales, df_parts=df_parts, accept_gaps=accept_gaps) + if verbose: + n_feat = len(features) + n_samples = len(df_parts) + n_vals = n_feat * n_samples + print(f"Feature matrix for {n_feat} features and {n_samples} samples will be created") + if n_vals > 1000*1000: + warning = f"Feature matrix with n={n_vals}>=10^6 values will be created, which will take some time.\n" \ + "It is recommended to create a feature matrix for a pre-selected number features " \ + "so that 10^6 values are not exceeded." + warnings.warn(warning) + # Create feature matrix using parallel processing + dict_all_scales = ut.get_dict_all_scales(df_scales=df_scales) + n_processes = min([os.cpu_count(), len(features)]) if n_jobs is None else n_jobs + feat_chunks = np.array_split(features, n_processes) + args = zip(feat_chunks, repeat(dict_all_scales), repeat(df_parts), repeat(accept_gaps)) + with mp.get_context("spawn").Pool(processes=n_processes) as pool: + result = pool.starmap(_feature_matrix, args) + feat_matrix = np.concatenate(result, axis=1) + if return_labels: + if verbose: + print("Tuple of (feat_matrix, labels) will be returned") + labels = df_parts.index.tolist() + return feat_matrix, labels # X, y + else: + if verbose: + print("Only feat_matrix (without labels) will be returned") + return feat_matrix # X
+ + # Additional feature related methods
[docs] @staticmethod def feat_names(features=None, df_cat=None, tmd_len=20, jmd_c_len=10, jmd_n_len=10, ext_len=0, start=1): - """Convert feature ids (PART-SPLIT-SCALE) into feature name (scale name [positions]). + """Convert feature ids (PART-SPLIT-SCALE) into feature names (scale name [positions]). Parameters ---------- @@ -580,31 +648,33 @@

Source code for aaanalysis.cpp.feature

             - PeriodicPattern: [first..step1/step2..last]
         """
         # Check input (length checked in SequenceFeaturePositions)
-        features = _ut.check_features(features=features)
-        _ut.check_df_cat(df_cat=df_cat)
+        features = ut.check_features(features=features)
+        ut.check_df_cat(df_cat=df_cat)
         if df_cat is None:
-            df_cat = aa.load_scales(name=_ut.STR_SCALE_CAT)
+            df_cat = aa.load_scales(name=ut.STR_SCALE_CAT)
         # Get feature names
         sfp = SequenceFeaturePositions()
         dict_part_pos = sfp.get_dict_part_pos(tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len,
                                               ext_len=ext_len, start=start)
         list_positions = sfp.get_positions(dict_part_pos=dict_part_pos, features=features)
-        dict_scales = dict(zip(df_cat[_ut.COL_SCALE_ID], df_cat[_ut.COL_SCALE_NAME]))
+        dict_scales = dict(zip(df_cat[ut.COL_SCALE_ID], df_cat[ut.COL_SCALE_NAME]))
         feat_names = []
         for feat_id, pos in zip(features, list_positions):
             part, split, scale = feat_id.split("-")
             split_type = split.split("(")[0]
-            if split_type == _ut.STR_SEGMENT and len(pos.split(",")) > 2:
+            if split_type == ut.STR_SEGMENT and len(pos.split(",")) > 2:
                 pos = pos.split(",")[0] + "..." + pos.split(",")[-1]
-            if split_type == _ut.STR_PERIODIC_PATTERN:
+            if split_type == ut.STR_PERIODIC_PATTERN:
                 step = split.split("+")[1].split(",")[0]
                 pos = pos.split(",")[0] + ".." + step + ".." + pos.split(",")[-1]
             feat_names.append(f"{dict_scales[scale]} [{pos}]")
         return feat_names
# Feature: Part + Split + Scale -
[docs] @staticmethod - def feat_value(df_parts=None, split=None, dict_scale=None, accept_gaps=False): + # For what used? Not redudant with feature matrix? + # TODO Add functions (modify df_feat) +
[docs] @staticmethod + def add_feat_value(df_parts=None, split=None, dict_scale=None, accept_gaps=False): """Create feature values for all sequence parts by combining Part, Split, and Scale. Parameters @@ -643,8 +713,8 @@

Source code for aaanalysis.cpp.feature

         All numbers should be non-negative integers. Examples for each split type
         are as follows: 'Segment(5,7)', 'Pattern(C,1,2)', 'PeriodicPattern(N,i+2/3,1)'.
         """
-        _ut.check_df_parts(df_parts=df_parts)
-        _ut.check_split(split=split)
+        ut.check_df_parts(df_parts=df_parts)
+        ut.check_split(split=split)
         check_dict_scale(dict_scale=dict_scale, df_parts=df_parts, accept_gaps=accept_gaps)
         feature_value = _feature_value(df_parts=df_parts,
                                        split=split,
@@ -652,67 +722,92 @@ 

Source code for aaanalysis.cpp.feature

                                        accept_gaps=accept_gaps)
         return feature_value
-
[docs] @staticmethod - def feat_matrix(df_parts=None, features=None, df_scales=None, accept_gaps=False, - n_jobs=None, verbose=False, return_labels=False): - """Create feature matrix for given feature names and sequence parts. +
[docs] @staticmethod + def add_dif(df_feat=None, df_seq=None, labels=None, sample_name=str, ref_group=0, + accept_gaps=False, jmd_n_len=10, jmd_c_len=10, df_parts=None, df_scales=None): + """ + Add feature value difference between sample and reference group to DataFrame. Parameters ---------- - df_parts: :class:`pandas.DataFrame` - DataFrame with sequence parts. - features: str, list of strings, pd.Series - Ids of features for which matrix of feature values should be created. - df_scales: :class:`pandas.DataFrame`, optional - DataFrame with default amino acid scales. + df_feat: :class:`pandas.DataFrame` + Feature DataFrame (CPP output) to add sample difference. + df_seq: :class:`pandas.DataFrame` + DataFrame with sequences and sample names, in which the given sample name is included. + labels: array-like, shape (n_samples) + Class labels for samples in sequence DataFrame. + sample_name: str + Name of sample for which the feature value difference to a given reference group should be computed. + ref_group: int, default = 0 + Class label of reference group. accept_gaps: bool, default = False Whether to accept missing values by enabling omitting for computations (if True). - n_jobs: int, default = None, - The number of jobs to run in parallel. If None, it will be set to the maximum. - verbose: bool, default = True - Whether to print size of to be created feature matrix (if True) or not otherwise. - return_labels: bool, default = False - Whether to return sample labels in addition to feature matrix. Returns ------- - feat_matrix: array-like or sparse matrix, shape (n_samples, n_features) - Feature values of samples. + df_feat: :class:`pandas.DataFrame` + Feature DataFrame including feature value difference. """ - ut.check_non_negative_number(name="j_jobs", val=n_jobs, accept_none=True, min_val=1, just_int=True) - if df_scales is None: - df_scales = aa.load_scales() - _ut.check_df_scales(df_scales=df_scales) - _ut.check_df_parts(df_parts=df_parts) - features = _ut.check_features(features=features, parts=df_parts, df_scales=df_scales) - check_df_scales_matches_df_parts(df_scales=df_scales, df_parts=df_parts, accept_gaps=accept_gaps) - if verbose: - n_feat = len(features) - n_samples = len(df_parts) - n_vals = n_feat * n_samples - print(f"Feature matrix for {n_feat} features and {n_samples} samples will be created") - if n_vals > 1000*1000: - warning = f"Feature matrix with n={n_vals}>=10^6 values will be created, which will take some time.\n" \ - "It is recommended to create a feature matrix for a pre-selected number features " \ - "so that 10^6 values are not exceeded." - warnings.warn(warning) - # Create feature matrix using parallel processing - dict_all_scales = _ut.get_dict_all_scales(df_scales=df_scales) - n_processes = min([os.cpu_count(), len(features)]) if n_jobs is None else n_jobs - feat_chunks = np.array_split(features, n_processes) - args = zip(feat_chunks, repeat(dict_all_scales), repeat(df_parts), repeat(accept_gaps)) - with mp.get_context("spawn").Pool(processes=n_processes) as pool: - result = pool.starmap(_feature_matrix, args) - feat_matrix = np.concatenate(result, axis=1) - if return_labels: - if verbose: - print("Tuple of (feat_matrix, labels) will be returned") - labels = df_parts.index.tolist() - return feat_matrix, labels # X, y - else: - if verbose: - print("Only feat_matrix (without labels) will be returned") - return feat_matrix # X
+ # Check input + df_feat = ut.check_df_feat(df_feat=df_feat) + ut.check_df_seq(df_seq=df_seq, jmd_c_len=jmd_c_len, jmd_n_len=jmd_n_len) + ut.check_labels(labels=labels, df=df_seq, name_df="df_seq") + check_ref_group(ref_group=ref_group, labels=labels) + check_sample_in_df_seq(sample_name=sample_name, df_seq=df_seq) + # Add sample difference to reference group + sf = SequenceFeature() + X = sf.feat_matrix(features=list(df_feat["feature"]), + df_parts=df_parts, + df_scales=df_scales, + accept_gaps=accept_gaps) + mask = [True if x == ref_group else False for x in labels] + i = list(df_seq[ut.COL_NAME]).index(sample_name) + df_feat[f"dif_{sample_name}"] = X[i] - X[mask].mean() + return df_feat
+ +
[docs] @staticmethod + def add_position(df_feat=None, features=None, start=1, tmd_len=20, jmd_n_len=10, jmd_c_len=10, ext_len=4, + part_split=False): + """Create list with positions for given feature names + + Parameters + ---------- + df_feat: :class:`pandas.DataFrame` + Feature DataFrame (CPP output) to add sample difference. + features: str, list of strings, pd.Series + Ids of features for which feature names should be created. + start: int, >=0, default = 1 + Position label of first amino acid position (starting at N-terminus). + tmd_len: int, >0, default = 20 + Length of TMD. + jmd_n_len : int, >=0, default = 10 + Length of JMD-N. + jmd_c_len : int, >=0, default = 10 + Length of JMD-C. + ext_len : int, >=0, default = 4 + Length of TMD-extending part (starting from C and N terminal part of TMD). + Conditions: ext_len < jmd_m_len and ext_len < jmd_c_len. + + Returns + ------- + feat_positions: list + list with positions for each feature in feat_names + + Notes + ----- + The length parameters define the total number of positions (jmd_n_len + tmd_len + jmd_c_len). + """ + # TODO add sequence, generalize check functions for tmd_len ... + features = ut.check_features(features=features) + ut.check_non_negative_number(name="tmd_len", val=tmd_len, just_int=True, min_val=1) + args = dict(jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, ext_len=ext_len, start=start) + for name in args: + ut.check_non_negative_number(name=name, val=args[name], just_int=True, min_val=0) + sfp = SequenceFeaturePositions() + dict_part_pos = sfp.get_dict_part_pos(tmd_len=tmd_len, **args) + feat_positions = sfp.get_positions(dict_part_pos=dict_part_pos, features=features) + return feat_positions
+
diff --git a/docs/build/html/_modules/aaanalysis/data_loader/data_loader.html b/docs/build/html/_modules/aaanalysis/data_loader/data_loader.html index 928a8660..57ba2282 100644 --- a/docs/build/html/_modules/aaanalysis/data_loader/data_loader.html +++ b/docs/build/html/_modules/aaanalysis/data_loader/data_loader.html @@ -116,7 +116,7 @@

REFERENCES

@@ -192,7 +192,7 @@

Source code for aaanalysis.data_loader.data_loader

import numpy as np import re -import aaanalysis._utils as ut +import aaanalysis.utils as ut # I Helper Functions @@ -230,8 +230,8 @@

Source code for aaanalysis.data_loader.data_loader

Three types of benchmark datasets are provided: - Residue prediction: 6 datasets used to predict residue (amino acid) specific properties ('AA_CASPASE3', 'AA_FURIN', 'AA_LDR', 'AA_MMP2', 'AA_RNABIND', 'AA_SA') - - Domain prediction: 1 dataset used to predict domain specific properties (containing unlabeled data) - (DOM_SUBGSEC) + - Domain prediction: 1 dataset used to predict domain specific properties (_PU contains unlabeled _data) + (DOM_GSEC, DOM_GSEC_PU) - Sequence prediction: 6 datasets used to predict sequence specific properties ('SEQ_AMYLO', 'SEQ_CAPSID', 'SEQ_DISULFIDE', 'SEQ_LOCATION', 'SEQ_SOLUBLE', 'SEQ_TAIL') @@ -271,20 +271,25 @@

Source code for aaanalysis.data_loader.data_loader

if name not in list_datasets: list_aa = [x for x in list_datasets if 'AA' in x] list_seq = [x for x in list_datasets if 'SEQ' in x] - raise ValueError(f"'name' ({name}) is not valid.\n Amino acid datasets: {list_aa}\n Sequence datasets: {list_seq}") + list_dom = [x for x in list_datasets if 'DOM' in x] + raise ValueError(f"'name' ({name}) is not valid." + f"\n Amino acid datasets: {list_aa}" + f"\n Sequence datasets: {list_seq}" + f"\n Domain datasets: {list_dom}") df = pd.read_csv(folder_in + name + ".tsv", sep="\t") - # Filter data + # Filter _data if min_len is not None: mask = [len(x) >= min_len for x in df[ut.COL_SEQ]] df = df[mask] if max_len is not None: mask = [len(x) <= max_len for x in df[ut.COL_SEQ]] df = df[mask] - if n is not None: - labels = set(df["label"]) - df = pd.concat([df[df["label"] == l].head(n) for l in labels]) # Adjust non-canonical amino acid (keep, remove, or replace by gap) df = _adjust_non_canonical_aa(df=df, non_canonical_aa=non_canonical_aa) + # Select balanced groups + if n is not None: + labels = set(df[ut.COL_LABEL]) + df = pd.concat([df[df[ut.COL_LABEL] == l].head(n) for l in labels]) return df
@@ -327,7 +332,7 @@

Source code for aaanalysis.data_loader.data_loader

""" if name not in LIST_DATASETS: raise ValueError(f"'name' ({name}) is not valid. Choose one of following: {LIST_DATASETS}") - # Load data + # Load _data df_cat = pd.read_excel(ut.FOLDER_DATA + f"{ut.STR_SCALE_CAT}.xlsx") df_cat = _filter_scales(df_cat=df_cat, unclassified_in=unclassified_in, just_aaindex=just_aaindex) if name == ut.STR_SCALE_CAT: diff --git a/docs/build/html/_modules/aaanalysis/dpulearn/dpulearn.html b/docs/build/html/_modules/aaanalysis/dpulearn/dpulearn.html index 3e215c36..11355e4f 100644 --- a/docs/build/html/_modules/aaanalysis/dpulearn/dpulearn.html +++ b/docs/build/html/_modules/aaanalysis/dpulearn/dpulearn.html @@ -116,7 +116,7 @@

REFERENCES

@@ -193,7 +193,7 @@

Source code for aaanalysis.dpulearn.dpulearn

from sklearn.decomposition import PCA
 import math
 import warnings
-import aaanalysis._utils as ut
+import aaanalysis.utils as ut
 
 # Settings
 pd.set_option('expand_frame_repr', False)  # Single line print for pd.Dataframe
@@ -292,7 +292,7 @@ 

Source code for aaanalysis.dpulearn.dpulearn

    """
     mask_pos = labels == label_pos
     mask_unl = labels != label_pos
-    # Compute the average distances to the positive data points
+    # Compute the average distances to the positive datapoints
     avg_dist = pairwise_distances(X[mask_pos], X, metric=metric).mean(axis=0)
     # Select negatives based on largest average distance to positives
     top_indices = np.argsort(avg_dist[mask_unl])[::-1][:n_neg]
@@ -340,7 +340,7 @@ 

Source code for aaanalysis.dpulearn.dpulearn

columns_pca = _columns_pca[0:len(list_n_neg)]
     df_seq[columns_pca] = pca.components_.T[:, 0:len(columns_pca)]
 
-    # Get mean of positive data for each component
+    # Get mean of positive datafor each component
     mask_pos = labels == label_pos
     mask_unl = labels != label_pos
     pc_means = df_seq[mask_pos][columns_pca].mean(axis=0)
@@ -394,7 +394,7 @@ 

Source code for aaanalysis.dpulearn.dpulearn

    Attributes
     ----------
     labels_ : array-like, shape (n_samples,)
-        Labels of each data point.
+        Labels of each datapoint.
 
     Notes
     -----
@@ -452,7 +452,7 @@ 

Source code for aaanalysis.dpulearn.dpulearn

        Examples
         --------
-        Create small example data for dPUlearn containg positive ('pos', 1) and unlabeled ('unl', 2) data
+        Create small example datafor dPUlearn containg positive ('pos', 1) and unlabeled ('unl', 2) _data
 
         >>> import aaanalysis as aa
         >>> import pandas as pd
@@ -487,8 +487,10 @@ 

Source code for aaanalysis.dpulearn.dpulearn

new_labels, df_seq = _get_neg_via_pca(**args, n_components=self.n_components, **self.pca_kwargs)
         # Set new labels
         self.labels_ = new_labels
-        return df_seq
+ return df_seq
+ def eval(self): + """""" # TODO add evaluation function
diff --git a/docs/build/html/_modules/aaanalysis/utils_plot.html b/docs/build/html/_modules/aaanalysis/plotting/plotting_functions.html similarity index 62% rename from docs/build/html/_modules/aaanalysis/utils_plot.html rename to docs/build/html/_modules/aaanalysis/plotting/plotting_functions.html index b14bb969..dafb677b 100644 --- a/docs/build/html/_modules/aaanalysis/utils_plot.html +++ b/docs/build/html/_modules/aaanalysis/plotting/plotting_functions.html @@ -7,23 +7,23 @@ - aaanalysis.utils_plot — AAanalysis + aaanalysis.plotting.plotting_functions — AAanalysis - - - - - - - - - + + + + + + + + + - + @@ -33,26 +33,26 @@ - - - - - - - - - - + + + + + + + + + + - + - - + + @@ -66,12 +66,12 @@ - + - + @@ -86,7 +86,7 @@
-
+ @@ -106,18 +106,18 @@

OVERVIEW

EXAMPLES

REFERENCES

@@ -133,7 +133,7 @@ @@ -164,11 +164,11 @@
    -
  • »
  • +
  • »
  • -
  • Module code »
  • +
  • Module code »
  • -
  • aaanalysis.utils_plot
  • +
  • aaanalysis.plotting.plotting_functions
  • @@ -183,7 +183,7 @@
    -

    Source code for aaanalysis.utils_plot

    +  

    Source code for aaanalysis.plotting.plotting_functions

     #! /usr/bin/python3
     """
     Default plotting functions
    @@ -191,10 +191,27 @@ 

    Source code for aaanalysis.utils_plot

     import seaborn as sns
     import matplotlib as mpl
     import matplotlib.pyplot as plt
    -import aaanalysis._utils as ut
    +import aaanalysis.utils as ut
    +
    +
    +
    +LIST_AA_COLOR_PALETTES = ["FEAT", "SHAP", "GGPLOT"]
    +LIST_AA_COLOR_DICTS = ["DICT_SCALE_CAT", "DICT_COLOR"]
    +LIST_AA_COLORS = LIST_AA_COLOR_PALETTES + LIST_AA_COLOR_DICTS
    +
    +LIST_FONTS = ['Arial', 'Avant Garde', 'Bitstream Vera Sans', 'Computer Modern Sans Serif', 'DejaVu Sans',
    +              'Geneva', 'Helvetica', 'Lucid', 'Lucida Grande', 'Verdana']
     
     
     # Helper functions
    +def check_font_style(font="Arial"):
    +    """"""
    +    if font not in LIST_FONTS:
    +        error_message = f"'font' ({font}) not in recommended fonts: {LIST_FONTS}. Set font manually by:" \
    +                        f"\n\tplt.rcParams['font.sans-serif'] = '{font}'"
    +        raise ValueError(error_message)
    +
    +
     def check_fig_format(fig_format="pdf"):
         """"""
         list_fig_formats = ['eps', 'jpg', 'jpeg', 'pdf', 'pgf', 'png', 'ps',
    @@ -226,8 +243,137 @@ 

    Source code for aaanalysis.utils_plot

         return list_cat
     
     
    -# Default plotting functions
    -
    [docs]def plot_settings(fig_format="pdf", verbose=False, grid=False, grid_axis="y", font_scale=0.7, +# Get color maps +def _get_shap_cmap(n_colors=100, facecolor_dark=True): + """Generate a diverging color map for feature values.""" + n = 20 + cmap_low = sns.light_palette(ut.COLOR_SHAP_NEG, input="hex", reverse=True, n_colors=int(n_colors/2)+n) + cmap_high = sns.light_palette(ut.COLOR_SHAP_POS, input="hex", n_colors=int(n_colors/2)+n) + c_middle = [(0, 0, 0)] if facecolor_dark else [cmap_low[-1]] + cmap = cmap_low[0:-n] + c_middle + cmap_high[n:] + return cmap + + +def _get_feat_cmap(n_colors=100, facecolor_dark=False): + """Generate a diverging color map for feature values.""" + n = 5 + cmap = sns.color_palette("RdBu_r", n_colors=n_colors + n * 2) + cmap_low, cmap_high = cmap[0:int((n_colors + n * 2) / 2)], cmap[int((n_colors + n * 2) / 2):] + c_middle = [(0, 0, 0)] if facecolor_dark else [cmap_low[-1]] + cmap = cmap_low[0:-n] + c_middle + cmap_high[n:] + return cmap + + +def _get_ggplot_cmap(n_colors=100): + """Generate a circular GGplot color palette.""" + cmap = sns.color_palette("husl", n_colors) + return cmap + + +def _get_default_colors(name=None, n_colors=100, facecolor_dark=True): + """Retrieve default color maps based on palette name.""" + args = dict(n_colors=n_colors, facecolor_dark=facecolor_dark) + if name == "SHAP": + return _get_shap_cmap(**args) + elif name == "FEAT": + return _get_feat_cmap(**args) + elif name == "GGPLOT": + return _get_ggplot_cmap(n_colors=n_colors) + + +def _get_cmap_with_gap(n_colors=100, color_pos=None, color_neg=None, color_center=None, pct_gap=10, pct_center=None, + input="hex"): + """Generate a custom color map with a gap.""" + n_gap = int(n_colors*pct_gap/2) + cmap_pos = sns.light_palette(color_pos, input=input, n_colors=int(n_colors/2)+n_gap) + cmap_neg = sns.light_palette(color_neg, input=input, reverse=True, n_colors=int(n_colors/2)+n_gap) + color_center = [cmap_neg[-1]] if color_center is None else color_center + color_center = [color_center] if type(color_center) is str else color_center + if pct_center is None: + cmap = cmap_neg[0:-n_gap] + color_center + cmap_pos[n_gap:] + else: + n_center = int(n_colors * pct_center) + n_gap += int(n_center/2) + cmap = cmap_neg[0:-n_gap] + color_center * n_center + cmap_pos[n_gap:] + return cmap + + +# Default plotting function +def plot_get_cmap(name=None, n_colors=100, facecolor_dark=False, + color_pos=None, color_neg=None, color_center=None, + input="hex", pct_gap=10, pct_center=None): + """ + Retrieve color maps or color dictionaries specified for AAanalysis. + + Parameters + ---------- + name : str, optional + The name of the color palette to use in AAanalysis. Options include: + - 'SHAP', 'FEAT', 'GGPLOT': Return color maps for SHAP plots, CPP feature maps/heatmaps, + and datagrouping as in GGplot, respectively. + - 'DICT_COLOR', 'DICT_SCALE_CAT': Return default color dictionaries for plots (e.g., bars in CPPPlot.profile) + and scale categories (e.g., CPPPlot.heatmap), respectively. + n_colors : int, default=100 + Number of colors in the color map. + facecolor_dark : bool, default=False + Whether to use a dark face color for 'SHAP' and 'FEAT'. + color_pos : str, optional + Hex code for the positive color. + color_neg : str, optional + Hex code for the negative color. + color_center : str or list, optional + Hex code or list for the center color. + input : str, {'rgb', 'hls', 'husl', 'xkcd'} + Color space to interpret the input color. The first three options + apply to tuple inputs and the latter applies to string inputs. + pct_gap : int, default=10 + Percentage size of the gap between color ranges. + pct_center : float, optional + Percentage size of the center color in the map. + + Returns + ------- + cmap : list or dict + If 'name' parameter is 'SHAP', 'FEAT', or 'GGPLOT', a list of colors specified for AAanalysis will be returned. + If 'name' parameter is None, a list of colors based on provided colors + + See Also + -------- + sns.color_palette : Function to generate a color palette in seaborn. + sns.light_palette : Function to generate a lighter color palette in seaborn. + """ + # TODO check color dict name + if name in LIST_AA_COLOR_PALETTES: + cmap = _get_default_colors(name=name, n_colors=n_colors, facecolor_dark=facecolor_dark) + return cmap + cmap = _get_cmap_with_gap(n_colors=n_colors, color_pos=color_pos, color_neg=color_neg, + color_center=color_center, pct_gap=pct_gap, pct_center=pct_center, + input=input) + return cmap + + +def plot_get_cdict(name=None): + """ + Retrieve color dictionaries specified for AAanalysis. + + Parameters + ---------- + name : str, {'DICT_COLOR', 'DICT_SCALE_CAT'} + The name of default color dictionaries for plots (e.g., bars in CPPPlot.profile) + and scale categories (e.g., CPPPlot.heatmap), respectively. + + Returns + ------- + cmap : dict + Specific AAanalysis color dictionary. + """ + # TODO check color dict name + color_dict = ut.DICT_COLOR if name == "DICT_COLORS" else ut.DICT_COLOR_CAT + return color_dict + + +
    [docs]def plot_settings(fig_format="pdf", verbose=False, grid=False, grid_axis="y", + font_scale=0.7, font="Arial", change_size=True, weight_bold=True, adjust_elements=True, short_ticks=False, no_ticks=False, no_ticks_y=False, short_ticks_y=False, no_ticks_x=False, short_ticks_x=False): @@ -246,6 +392,8 @@

    Source code for aaanalysis.utils_plot

             Choose the axis ('y', 'x', 'both') to apply the grid to.
         font_scale : float, default=0.7
             Sets the scale for font sizes in the plot.
    +    font : str, default='Arial'
    +        Name of sans-serif font (e.g., 'Arial', 'Verdana', 'Helvetica', 'DejaVu Sans')
         change_size : bool, default=True
             If True, adjusts the size of plot elements.
         weight_bold : bool, default=True
    @@ -276,6 +424,7 @@ 

    Source code for aaanalysis.utils_plot

         """
         # Check input
         check_fig_format(fig_format=fig_format)
    +    check_font_style(font=font)
         check_grid_axis(grid_axis=grid_axis)
         args_bool = {"verbose": verbose, "grid": grid, "change_size": change_size, "weight_bold": weight_bold,
                      "adjust_elements": adjust_elements,
    @@ -293,13 +442,12 @@ 

    Source code for aaanalysis.utils_plot

             print(plt.rcParams.keys)    # Print all plot settings that can be modified in general
         if not change_size:
             plt.rcParams["font.family"] = "sans-serif"
    -        plt.rcParams["font.sans-serif"] = "Arial"
    -        font = {'family': 'Arial'}
    -        mpl.rc('font', **font)
    +        plt.rcParams["font.sans-serif"] = font
    +        mpl.rc('font', **{'family': font})
             return
         sns.set_context("talk", font_scale=font_scale)  # Font settings https://matplotlib.org/3.1.1/tutorials/text/text_props.html
         plt.rcParams["font.family"] = "sans-serif"
    -    plt.rcParams["font.sans-serif"] = "Arial"
    +    plt.rcParams["font.sans-serif"] = font
         if weight_bold:
             plt.rcParams["axes.labelweight"] = "bold"
             plt.rcParams["axes.titleweight"] = "bold"
    @@ -338,7 +486,7 @@ 

    Source code for aaanalysis.utils_plot

             mpl.rcParams['pdf.fonttype'] = 42
         elif "svg" in fig_format:
             mpl.rcParams['svg.fonttype'] = 'none'
    -    font = {'family': 'Arial', "weight": "bold"} if weight_bold else {"family": "Arial"}
    +    font = {'family': font, "weight": "bold"} if weight_bold else {"family": font}
         mpl.rc('font', **font)
         if adjust_elements:
             # Error bars
    @@ -352,7 +500,7 @@ 

    Source code for aaanalysis.utils_plot

             plt.rcParams["legend.loc"] = 'upper right'  # https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.legend.html
    -
    [docs]def plot_gcfs(): +
    [docs]def plot_gcfs(): """Get current font size, which is set by ut.plot_settings function""" # Get the current plotting context current_context = sns.plotting_context() @@ -360,7 +508,7 @@

    Source code for aaanalysis.utils_plot

         return font_size
    -
    [docs]def plot_set_legend(ax=None, handles=None, dict_color=None, list_cat=None, labels=None, y=-0.2, x=0.5, ncol=3, +
    [docs]def plot_set_legend(ax=None, handles=None, dict_color=None, list_cat=None, labels=None, y=-0.2, x=0.5, ncol=3, fontsize=11, weight="normal", lw=0, edgecolor=None, return_handles=False, loc="upper left", labelspacing=0.2, columnspacing=1, title=None, fontsize_legend=None, title_align_left=True, fontsize_weight="normal", shape=None, **kwargs): diff --git a/docs/build/html/_modules/index.html b/docs/build/html/_modules/index.html index a42bcfc9..d66ff871 100644 --- a/docs/build/html/_modules/index.html +++ b/docs/build/html/_modules/index.html @@ -116,7 +116,7 @@

    REFERENCES

    @@ -188,7 +188,7 @@

    All modules for which code is available

  • aaanalysis.cpp.feature
  • aaanalysis.data_loader.data_loader
  • aaanalysis.dpulearn.dpulearn
  • -
  • aaanalysis.utils_plot
  • +
  • aaanalysis.plotting.plotting_functions
diff --git a/docs/build/html/_resources/overview.html b/docs/build/html/_resources/overview.html index 0ffbd2d6..a6e3fca4 100644 --- a/docs/build/html/_resources/overview.html +++ b/docs/build/html/_resources/overview.html @@ -127,7 +127,7 @@

REFERENCES

diff --git a/docs/build/html/_resources/tables.html b/docs/build/html/_resources/tables.html index d8aabf52..e3159490 100644 --- a/docs/build/html/_resources/tables.html +++ b/docs/build/html/_resources/tables.html @@ -4,21 +4,21 @@ - + - + - - - + + + - Tables for the Project — AAanalysis + Tables — AAanalysis @@ -129,8 +129,8 @@

REFERENCES

diff --git a/docs/build/html/generated/aaanalysis.AAclust.html b/docs/build/html/generated/aaanalysis.AAclust.html index a85126e5..99e8caf9 100644 --- a/docs/build/html/generated/aaanalysis.AAclust.html +++ b/docs/build/html/generated/aaanalysis.AAclust.html @@ -137,7 +137,7 @@
  • Plot Utilities
  • -
  • Tables for the Project
  • +
  • Tables
  • References
  • @@ -212,7 +212,7 @@

    aaanalysis.AAclust

    -class aaanalysis.AAclust(model=None, model_kwargs=None, verbose=False)[source][source]
    +class aaanalysis.AAclust(model=None, model_kwargs=None, verbose=False)[source][source]

    AAclust: A k-optimized clustering framework for selecting redundancy-reduced set of numerical scales.

    AAclust is designed primarily for amino acid scales but is versatile enough for any set of numerical indices. It takes clustering models that require a pre-defined number of clusters (k) from @@ -251,7 +251,7 @@

    aaanalysis.AAclust
    -__init__(model=None, model_kwargs=None, verbose=False)[source][source]
    +__init__(model=None, model_kwargs=None, verbose=False)[source][source]

    Methods

    @@ -270,20 +270,23 @@

    aaanalysis.AAclust

    correlation(X_test, X_ref[, labels_test, ...])

    Computes the correlation of test data with reference cluster centers.

    -

    fit(X[, names, on_center, min_th, ...])

    -

    Fit the AAclust model on the data, optimizing cluster formation using Pearson correlation.

    +

    eval()

    +

    + +

    fit(X[, names, on_center, min_th, ...])

    +

    Fit the AAclust model on the _data, optimizing cluster formation using Pearson correlation.

    -

    get_cluster_centers(X[, labels])

    +

    get_cluster_centers(X[, labels])

    Computes the center of each cluster based on the given labels.

    -

    get_cluster_medoids(X[, labels])

    +

    get_cluster_medoids(X[, labels])

    Computes the medoid of each cluster based on the given labels.

    -cluster_naming(names=None, labels=None, name_unclassified='Unclassified')[source][source]
    +cluster_naming(names=None, labels=None, name_unclassified='Unclassified')[source][source]

    Assigns names to clusters based on scale names and their frequency.

    This method renames clusters based on the names of the scales in each cluster, with priority given to the most frequent scales. If the name is already used or does not exist, it defaults to ‘name_unclassified’.

    @@ -307,7 +310,7 @@

    aaanalysis.AAclust
    -static correlation(X_test, X_ref, labels_test=None, labels_ref=None, n=3, positive=True, on_center=False, except_unclassified=True)[source][source]
    +static correlation(X_test, X_ref, labels_test=None, labels_ref=None, n=3, positive=True, on_center=False, except_unclassified=True)[source][source]

    Computes the correlation of test data with reference cluster centers.

    Parameters
    @@ -316,9 +319,9 @@

    aaanalysis.AAclustarray-like

    Reference feature matrix.

    -
    labels_testlist or array-like, optional

    Cluster labels for the test data.

    +
    labels_testlist or array-like, optional

    Cluster labels for the test _data.

    -
    labels_reflist or array-like, optional

    Cluster labels for the reference data.

    +
    labels_reflist or array-like, optional

    Cluster labels for the reference _data.

    nint, default = 3

    Number of top centers to consider based on correlation strength.

    @@ -341,9 +344,9 @@

    aaanalysis.AAclust
    -fit(X, names=None, on_center=True, min_th=0, merge_metric='euclidean', n_clusters=None)[source][source]
    -

    Fit the AAclust model on the data, optimizing cluster formation using Pearson correlation.

    -

    AAclust determines the optimal number of clusters, k, without pre-specification. It partitions data (X) into +fit(X, names=None, on_center=True, min_th=0, merge_metric='euclidean', n_clusters=None)[source][source] +

    Fit the AAclust model on the _data, optimizing cluster formation using Pearson correlation.

    +

    AAclust determines the optimal number of clusters, k, without pre-specification. It partitions data(X) into clusters by maximizing the within-cluster Pearson correlation beyond the ‘min_th’ threshold. The quality of clustering is either based on the minimum Pearson correlation of all members (‘min_cor all’) or between the cluster center and its members (‘min_cor center’), governed by on_center.

    @@ -386,7 +389,7 @@

    aaanalysis.AAclust
    -static get_cluster_centers(X, labels=None)[source][source]
    +static get_cluster_centers(X, labels=None)[source][source]

    Computes the center of each cluster based on the given labels.

    Parameters
    @@ -410,7 +413,7 @@

    aaanalysis.AAclust
    -static get_cluster_medoids(X, labels=None)[source][source]
    +static get_cluster_medoids(X, labels=None)[source][source]

    Computes the medoid of each cluster based on the given labels.

    Parameters
    @@ -427,7 +430,7 @@

    aaanalysis.AAclustarray-like

    The labels corresponding to each medoid.

    -
    medoid_indarray-like

    Indexes of medoids within the original data.

    +
    medoid_indarray-like

    Indexes of medoids within the original _data.

    diff --git a/docs/build/html/generated/aaanalysis.CPP.html b/docs/build/html/generated/aaanalysis.CPP.html index c443e955..ed992242 100644 --- a/docs/build/html/generated/aaanalysis.CPP.html +++ b/docs/build/html/generated/aaanalysis.CPP.html @@ -139,7 +139,7 @@
  • Plot Utilities
  • -
  • Tables for the Project
  • +
  • Tables
  • References
  • @@ -214,7 +214,7 @@

    aaanalysis.CPP

    -class aaanalysis.CPP(df_scales=None, df_cat=None, df_parts=None, split_kws=None, accept_gaps=False, jmd_n_len=10, jmd_c_len=10, ext_len=4, verbose=True)[source][source]
    +class aaanalysis.CPP(df_scales=None, df_cat=None, df_parts=None, split_kws=None, accept_gaps=False, verbose=True)[source][source]

    Create and filter features that are most discriminant between two sets of sequences.

    Parameters
    @@ -229,13 +229,6 @@

    aaanalysis.CPPbool, default = False

    Whether to accept missing values by enabling omitting for computations (if True).

    -
    jmd_n_lenint, >=0, default = 10

    Length of JMD-N.

    -
    -
    jmd_c_lenint, >=0, default = 10

    Length of JMD-C.

    -
    -
    ext_lenint, >=0, default = 4

    Length of TMD-extending part (starting from C and N terminal part of TMD). -Conditions: ext_len < jmd_m_len and ext_len < jmd_c_len.

    -
    verbosebool, default = True

    Whether to print progress information about the algorithm (if True).

    @@ -245,7 +238,7 @@

    aaanalysis.CPP
    -__init__(df_scales=None, df_cat=None, df_parts=None, split_kws=None, accept_gaps=False, jmd_n_len=10, jmd_c_len=10, ext_len=4, verbose=True)[source][source]
    +__init__(df_scales=None, df_cat=None, df_parts=None, split_kws=None, accept_gaps=False, verbose=True)[source][source]

    Methods

    @@ -258,326 +251,23 @@

    aaanalysis.CPP

    __init__([df_scales, df_cat, df_parts, ...])

    -

    add_positions([df_feat, tmd_len, start])

    -

    Add sequence positions to DataFrame.

    - -

    add_sample_dif([df_feat, df_seq, labels, ...])

    -

    Add feature value difference between sample and reference group to DataFrame.

    - -

    add_shap([df_feat, col_shap, name_feat_impact])

    -

    Convert SHAP values into feature impact/importance and add to DataFrame.

    - -

    plot_heatmap([df_feat, y, val_col, ...])

    -

    Plot a featuremap of the selected value column with scale information (y-axis) versus sequence position (x-axis).

    - -

    plot_profile([df_feat, y, val_col, ...])

    -

    Plot feature profile for given features from 'df_feat'.

    +

    eval([df_feat, features])

    +

    Get evaluation for provided dataset

    run([labels, parametric, n_filter, tmd_len, ...])

    Perform CPP pipeline by creation and two-step filtering of features.

    -

    update_seq_size()

    -

    -
    -
    -add_positions(df_feat=None, tmd_len=20, start=1)[source][source]
    -

    Add sequence positions to DataFrame.

    -
    -
    Parameters
    -
    -
    df_feat: :class:`pandas.DataFrame`

    Feature DataFrame to add feature positions.

    -
    -
    tmd_len: int, >0

    Length of TMD.

    -
    -
    start: int, >=0

    Position label of first amino acid position (starting at N-terminus).

    -
    -
    -
    -
    Returns
    -
    -
    df_feat: pandas.DataFrame

    Feature DataFrame including feature positions.

    -
    -
    -
    -
    -

    Notes

    -

    The length parameters define the total number of positions (jmd_n_len + tmd_len + jmd_c_len).

    -
    - -
    -
    -add_sample_dif(df_feat=None, df_seq=None, labels=None, sample_name=<class 'str'>, ref_group=0, accept_gaps=False)[source][source]
    -

    Add feature value difference between sample and reference group to DataFrame.

    -
    -
    Parameters
    -
    -
    df_feat: :class:`pandas.DataFrame`

    Feature DataFrame to add sample difference.

    -
    -
    df_seq: :class:`pandas.DataFrame`

    DataFrame with sequences and sample names, in which the given sample name is included.

    -
    -
    labels: array-like, shape (n_samples)

    Class labels for samples in sequence DataFrame.

    -
    -
    sample_name: str

    Name of sample for which the feature value difference to a given reference group should be computed.

    -
    -
    ref_group: int, default = 0

    Class label of reference group.

    -
    -
    accept_gaps: bool, default = False

    Whether to accept missing values by enabling omitting for computations (if True).

    -
    -
    -
    -
    Returns
    -
    -
    df_feat: pandas.DataFrame

    Feature DataFrame including feature value difference.

    -
    -
    -
    -
    -
    - -
    -
    -static add_shap(df_feat=None, col_shap='shap_value', name_feat_impact='feat_impact')[source][source]
    -

    Convert SHAP values into feature impact/importance and add to DataFrame.

    -
    -
    Parameters
    -
    -
    df_feat: :class:`pandas.DataFrame`

    Feature DataFrame to which the feature impact will be added.

    -
    -
    col_shap: str, default = ‘shap_value’

    Column name of SHAP values in the feature DataFrame.

    -
    -
    name_feat_impact: str, default = ‘feat_impact’

    Column name of feature impact or feature importance that will be added to the feature DataFrame.

    -
    -
    -
    -
    Returns
    -
    -
    df_feat: pandas.DataFrame

    Feature DataFrame including feature impact.

    -
    -
    -
    -
    -

    Notes

    -
      -
    • SHAP (SHapley Additive exPlanations) is a game theoretic approach to explain the output of any machine learning model.

    • -
    • SHAP values represent a feature’s responsibility for a change in the model output.

    • -
    • Missing values are accepted in SHAP values.

    • -
    -
    - -
    -
    -plot_heatmap(df_feat=None, y='subcategory', val_col='mean_dif', val_type='mean', normalize=False, figsize=(8, 5), title=None, title_kws=None, vmin=None, vmax=None, grid_on=True, cmap='RdBu_r', cmap_n_colors=None, dict_color=None, cbar_kws=None, facecolor_dark=False, add_jmd_tmd=True, tmd_len=20, start=1, jmd_n_seq=None, tmd_seq=None, jmd_c_seq=None, tmd_color='mediumspringgreen', jmd_color='blue', tmd_seq_color='black', jmd_seq_color='white', seq_size=None, tmd_jmd_fontsize=None, xticks_pos=False, xtick_size=11.0, xtick_width=2.0, xtick_length=5.0, ytick_size=None, add_legend_cat=True, legend_kws=None, add_importance_map=False, cbar_pct=False, **kwargs)[source][source]
    -

    Plot a featuremap of the selected value column with scale information (y-axis) versus sequence position (x-axis).

    -

    This is a wrapper function for seaborn.heatmap(), designed to highlight differences between two sets -of sequences at the positional level (e.g., amino acid level for protein sequences).

    -
    -
    Parameters
    -
    -
    df_featDataFrame, shape (n_feature, n_feature_information)

    DataFrame containing unique identifiers, scale information, statistics, and positions for each feature.

    -
    -
    y{‘category’, ‘subcategory’, ‘scale_name’}, str, default = ‘subcategory’

    Name of the column in the feature DataFrame representing scale information (shown on the y-axis).

    -
    -
    val_col{‘mean_dif’, ‘feat_impact’, ‘abs_auc’, ‘std_test’, …}, str, default = ‘mean_dif’

    Name of the column in the feature DataFrame containing numerical values to display.

    -
    -
    val_type{‘mean’, ‘sum’, ‘std’}, str, default = ‘mean’

    Method to aggregate numerical values from ‘val_col’.

    -
    -
    normalize{True, False, ‘positions’, ‘positions_only’}, bool/str, default = False

    Specifies normalization for numerical values in ‘val_col’: -- False: Set value at all positions of a feature without further normalization.

    -
      -
    • True: Set value at all positions of a feature and normalize across all features.

    • -
    • ‘positions’: Value/number of positions set at each position of a feature and normalized across features. -Recommended when aiming to emphasize features with fewer positions using ‘val_col’=’feat_impact’ and ‘value_type’=’mean’.

    • -
    -
    -
    figsizetuple(float, float), default = (10,7)

    Width and height of the figure in inches passed to matplotlib.pyplot.figure().

    -
    -
    titlestr, optional

    Title of figure used by matplotlib.pyplot.title().

    -
    -
    title_kwsdict, optional

    Keyword arguments passed to matplotlib.pyplot.title().

    -
    -
    vmin, vmaxfloat, optional

    Values to anchor the colormap, otherwise, inferred from data and other keyword arguments.

    -
    -
    cmapmatplotlib colormap name or object, or list of colors, default = ‘seismic’

    Name of color map assigning data values to color space. If ‘SHAP’, colors from -SHAP will be used (recommended for feature impact).

    -
    -
    cmap_n_colorsint, optional

    Number of discrete steps in diverging or sequential color map.

    -
    -
    dict_colordict, optional

    Map of colors for scale categories classifying scales shown on y-axis.

    -
    -
    cbar_kwsdict of key, value mappings, optional

    Keyword arguments for matplotlib.figure.Figure.colorbar().

    -
    -
    add_jmd_tmdbool, default = True

    Whether to add colored bar under heatmap indicating sequence parts (JMD-N, TMD, JMD-C).

    -
    -
    tmd_lenint, >0

    Length of TMD to be depiceted.

    -
    -
    startint, >=0

    Position label of first amino acid position (starting at N-terminus).

    -
    -
    tmd_seqstr, optional

    Sequence of TMD. ‘tmd_len’ is set to length of TMD if sequence for TMD, JMD-N and JMD-C are given. -Recommended if feature impact or mean difference should be depicted for one sample.

    -
    -
    jmd_n_seqstr, optional

    Sequence of JMD_N. ‘jmd_n_len’ is set to length of JMD_N if sequence for TMD, JMD-N and JMD-C are given. -Recommended if feature impact or mean difference should be depicted for one sample.

    -
    -
    jmd_c_seqstr, optional

    Sequence of JMD_C. ‘jmd_c_len’ is set to length of JMD_C if sequence for TMD, JMD-N and JMD-C are given. -Recommended if feature impact or mean difference should be depicted for one sample.

    -
    -
    tmd_colorstr, default = ‘mediumspringgreen’

    Color of TMD bar.

    -
    -
    jmd_colorstr, default = ‘blue’

    Color of JMD-N and JMD-C bar.

    -
    -
    tmd_seq_colorstr, default = ‘black’

    Color of TMD sequence.

    -
    -
    jmd_seq_colorstr, default = ‘white’

    Color of JMD-N and JMD-C sequence.

    -
    -
    seq_sizefloat, optional

    Font size of all sequence parts in points. If None, optimized automatically.

    -
    -
    tmd_jmd_fontsizefloat, optional

    Font size of ‘TMD’, ‘JMD-N’ and ‘JMD-C’ label in points. If None, optimized automatically.

    -
    -
    xtick_sizefloat, default = 11.0

    Size of x ticks in points. Passed as ‘size’ argument to matplotlib.axes.Axes.set_xticklabels().

    -
    -
    xtick_widthfloat, default = 2.0

    Widht of x ticks in points. Passed as ‘width’ argument to matplotlib.axes.Axes.tick_params().

    -
    -
    xtick_lengthfloat, default = 5.0,

    Length of x ticks in points. Passed as ‘length’ argument to matplotlib.axes.Axes.tick_params().

    -
    -
    ytick_sizefloat, optional

    Size of scale information as y ticks in points. Passed to matplotlib.axes.Axes.tick_params(). -If None, optimized automatically.

    -
    -
    add_legend_catbool, default = True,

    Whether to add legend for categories under plot and classification of scales at y-axis.

    -
    -
    legend_kwsdict, optional

    Keyword arguments passed to matplotlib.axes.Axes.legend()

    -
    -
    kwargsother keyword arguments

    All other keyword arguments passed to matplotlib.axes.Axes.pcolormesh().

    -
    -
    -
    -
    Returns
    -
    -
    axmatplotlib Axes
    -
    Axes object containing the heatmap.
    -
    -
    -
    -
    -

    Warning

    -
      -
    • ‘cmap_n_colors’ is effective only if ‘vmin’ and ‘vmax’ align with the data.

    • -
    • ‘tmd_seq_color’ and ‘jmd_seq_color’ are applicable only when ‘tmd_seq’, ‘jmd_n_seq’, and ‘jmd_c_seq’ are provided.

    • -
    -
    -
    -

    See also

    -
    -
    seaborn.heatmap

    Plotting heatmap using seaborn. See Seaborn documentation for more details.

    -
    -
    -
    -

    Examples

    -

    Plot CPP feature heatmap:

    -

    (Source code)

    -
    - -
    -
    -plot_profile(df_feat=None, y='category', val_col='mean_dif', val_type='count', normalize=False, figsize=(7, 5), title=None, title_kws=None, dict_color=None, edge_color='none', bar_width=0.75, add_jmd_tmd=True, tmd_len=20, start=1, jmd_n_seq=None, tmd_seq=None, jmd_c_seq=None, tmd_color='mediumspringgreen', jmd_color='blue', tmd_seq_color='black', jmd_seq_color='white', seq_size=None, tmd_jmd_fontsize=None, xtick_size=11.0, xtick_width=2.0, xtick_length=5.0, xticks_pos=False, ytick_size=None, ytick_width=2.0, ytick_length=5.0, ylim=None, highlight_tmd_area=True, highlight_alpha=0.15, grid=False, grid_axis='both', add_legend_cat=True, legend_kws=None, shap_plot=False, **kwargs)[source][source]
    -

    Plot feature profile for given features from ‘df_feat’.

    -
    -
    Parameters
    -
    -
    df_featclass:pandas.DataFrame, optional, default=None

    Dataframe containing the features to be plotted. If None, default features from the instance will be used.

    -
    -
    ystr, default=’category’

    Column name in df_feat which contains the categories for grouping.

    -
    -
    val_colstr, default=’mean_dif’

    Column name in df_feat which contains the values to be plotted.

    -
    -
    val_typestr, default=’count’

    Type of value. Available options are specified by the check_value_type function.

    -
    -
    normalizebool, default=False

    If True, the feature values will be normalized.

    -
    -
    figsizetuple, default=(7, 5)

    Size of the plot.

    -
    -
    titlestr, optional

    Title of the plot.

    -
    -
    title_kwsdict, optional

    Keyword arguments to customize the title appearance.

    -
    -
    dict_colordict, optional

    Dictionary mapping categories to colors.

    -
    -
    edge_colorstr, default=’none’

    Color of the edges of the bars.

    -
    -
    bar_widthfloat, default=0.75

    Width of the bars.

    -
    -
    add_jmd_tmdbool, default=True

    If True, adds JMD and TMD lines/annotations to the plot.

    -
    -
    tmd_lenint, default=20

    Length of the TMD.

    -
    -
    startint, default=1

    Start position.

    -
    -
    jmd_n_seqstr, optional

    JMD N-terminal sequence.

    -
    -
    tmd_seqstr, optional

    TMD sequence.

    -
    -
    jmd_c_seqstr, optional

    JMD C-terminal sequence.

    -
    -
    tmd_colorstr, default=’mediumspringgreen’

    Color for TMD.

    -
    -
    jmd_colorstr, default=’blue’

    Color for JMD.

    -
    -
    tmd_seq_colorstr, default=’black’

    Color for TMD sequence.

    -
    -
    jmd_seq_colorstr, default=’white’

    Color for JMD sequence.

    -
    -
    seq_sizefloat, optional

    Font size for sequence annotations.

    -
    -
    tmd_jmd_fontsizefloat, optional

    Font size for TMD and JMD annotations.

    -
    -
    xtick_sizefloat, default=11.0

    Size for x-tick labels.

    -
    -
    xtick_widthfloat, default=2.0

    Width of the x-ticks.

    -
    -
    xtick_lengthfloat, default=5.0

    Length of the x-ticks.

    -
    -
    xticks_posbool, default=False

    If True, x-tick positions are adjusted based on given sequences.

    -
    -
    ytick_sizefloat, optional

    Size for y-tick labels.

    -
    -
    ytick_widthfloat, default=2.0

    Width of the y-ticks.

    -
    -
    ytick_lengthfloat, default=5.0

    Length of the y-ticks.

    -
    -
    ylimtuple, optional

    Y-axis limits.

    -
    -
    highlight_tmd_areabool, default=True

    If True, highlights the TMD area on the plot.

    -
    -
    highlight_alphafloat, default=0.15

    Alpha value for TMD area highlighting.

    -
    -
    gridbool, default=False

    If True, a grid is added to the plot.

    -
    -
    grid_axisstr, default=’both’

    Axis on which the grid is drawn. Options: ‘both’, ‘x’, ‘y’.

    -
    -
    add_legend_catbool, default=True

    If True, a legend is added for categories.

    -
    -
    legend_kwsdict, optional

    Keyword arguments for the legend.

    -
    -
    shap_plotbool, default=False

    If True, SHAP (SHapley Additive exPlanations) plot is generated.

    -
    -
    **kwargsdict

    Other keyword arguments passed to internal functions or plotting libraries.

    -
    -
    -
    -
    Returns
    -
    -
    axmatplotlib.axes.Axes

    The axes object containing the plot.

    -
    -
    -
    -
    +
    +static eval(df_feat=None, features=None)[source][source]
    +

    Get evaluation for provided dataset

    -run(labels=None, parametric=False, n_filter=100, tmd_len=20, start=1, check_cat=True, n_pre_filter=None, pct_pre_filter=5, max_std_test=0.2, max_overlap=0.5, max_cor=0.5, n_processes=None)[source][source]
    +run(labels=None, parametric=False, n_filter=100, tmd_len=20, jmd_n_len=10, jmd_c_len=10, ext_len=4, start=1, check_cat=True, n_pre_filter=None, pct_pre_filter=5, max_std_test=0.2, max_overlap=0.5, max_cor=0.5, n_processes=None)[source][source]

    Perform CPP pipeline by creation and two-step filtering of features. CPP aims to identify a collection of non-redundant features that are most discriminant between a test and a reference group of sequences.

    @@ -592,10 +282,17 @@

    aaanalysis.CPPint, optional

    Number of feature to be pre-filtered by CPP algorithm. If None, a percentage of all features is used.

    -
    tmd_lenint, >0

    Length of Transmembrane Domain (TMD) used for positions.

    +
    tmd_lenint, >0

    Length of TMD used for positions. TODO add link to explanation

    startint, >=0

    Position label of first amino acid position (starting at N-terminus).

    +
    jmd_n_lenint, >=0, default = 10

    Length of JMD-N.

    +
    +
    jmd_c_lenint, >=0, default = 10

    Length of JMD-C.

    +
    +
    ext_lenint, >=0, default = 4

    Length of TMD-extending part (starting from C and N terminal part of TMD). +Should be longer than jmd_n_len and jmd_c_len

    +
    check_catbool, default = True

    Whether to check for redundancy within scale categories.

    pct_pre_filterint, default = 5

    Percentage of all features that should remain after the pre-filtering step.

    diff --git a/docs/build/html/generated/aaanalysis.SequenceFeature.html b/docs/build/html/generated/aaanalysis.SequenceFeature.html index 0c7d0075..c4037f48 100644 --- a/docs/build/html/generated/aaanalysis.SequenceFeature.html +++ b/docs/build/html/generated/aaanalysis.SequenceFeature.html @@ -139,7 +139,7 @@
  • Plot Utilities
  • -
  • Tables for the Project
  • +
  • Tables
  • References
  • @@ -214,7 +214,7 @@

    aaanalysis.SequenceFeature

    -class aaanalysis.SequenceFeature[source][source]
    +class aaanalysis.SequenceFeature[source][source]

    Retrieve and create sequence feature components (Part, Split, and Scale).

    Notes

    @@ -250,103 +250,65 @@

    aaanalysis.SequenceFeature

    __init__(*args, **kwargs)

    -

    feat_matrix([df_parts, features, df_scales, ...])

    -

    Create feature matrix for given feature names and sequence parts.

    +

    add_dif([df_feat, df_seq, labels, ...])

    +

    Add feature value difference between sample and reference group to DataFrame.

    -

    feat_names([features, df_cat, tmd_len, ...])

    -

    Convert feature ids (PART-SPLIT-SCALE) into feature name (scale name [positions]).

    - -

    feat_value([df_parts, split, dict_scale, ...])

    +

    add_feat_value([df_parts, split, ...])

    Create feature values for all sequence parts by combining Part, Split, and Scale.

    -

    features([list_parts, split_kws, df_scales, ...])

    -

    Create list of feature ids for given Parts, Splits, and Scales

    +

    add_position([df_feat, features, start, ...])

    +

    Create list with positions for given feature names

    + +

    feat_matrix([features, df_parts, df_scales, ...])

    +

    Create feature matrix for given feature ids and sequence parts.

    + +

    feat_names([features, df_cat, tmd_len, ...])

    +

    Convert feature ids (PART-SPLIT-SCALE) into feature names (scale name [positions]).

    -

    get_df_parts([df_seq, list_parts, ...])

    +

    get_df_parts([df_seq, list_parts, ...])

    Create DataFrane with sequence parts.

    +

    get_features([list_parts, split_kws, ...])

    +

    Create list of all feature ids for given Parts, Splits, and Scales

    +

    get_split_kws([n_split_min, n_split_max, ...])

    Create dictionary with kwargs for three split types: Segment, Pattern, PeriodicPattern

    -
    -static feat_matrix(df_parts=None, features=None, df_scales=None, accept_gaps=False, n_jobs=None, verbose=False, return_labels=False)[source][source]
    -

    Create feature matrix for given feature names and sequence parts.

    -
    -
    Parameters
    -
    -
    df_parts: :class:`pandas.DataFrame`

    DataFrame with sequence parts.

    -
    -
    features: str, list of strings, pd.Series

    Ids of features for which matrix of feature values should be created.

    -
    -
    df_scales: :class:`pandas.DataFrame`, optional

    DataFrame with default amino acid scales.

    -
    -
    accept_gaps: bool, default = False

    Whether to accept missing values by enabling omitting for computations (if True).

    -
    -
    n_jobs: int, default = None,

    The number of jobs to run in parallel. If None, it will be set to the maximum.

    -
    -
    verbose: bool, default = True

    Whether to print size of to be created feature matrix (if True) or not otherwise.

    -
    -
    return_labels: bool, default = False

    Whether to return sample labels in addition to feature matrix.

    -
    -
    -
    -
    Returns
    -
    -
    feat_matrix: array-like or sparse matrix, shape (n_samples, n_features)

    Feature values of samples.

    -
    -
    -
    -
    -
    - -
    -
    -static feat_names(features=None, df_cat=None, tmd_len=20, jmd_c_len=10, jmd_n_len=10, ext_len=0, start=1)[source][source]
    -

    Convert feature ids (PART-SPLIT-SCALE) into feature name (scale name [positions]).

    +
    +static add_dif(df_feat=None, df_seq=None, labels=None, sample_name=<class 'str'>, ref_group=0, accept_gaps=False, jmd_n_len=10, jmd_c_len=10, df_parts=None, df_scales=None)[source][source]
    +

    Add feature value difference between sample and reference group to DataFrame.

    Parameters
    -
    features: str, list of strings, pd.Series

    Ids of features for which feature names should be created.

    +
    df_feat: :class:`pandas.DataFrame`

    Feature DataFrame (CPP output) to add sample difference.

    -
    df_cat: :class:`pandas.DataFrame`, default = SequenceFeature.load_categories

    DataFrame with default categories for physicochemical amino acid scales

    +
    df_seq: :class:`pandas.DataFrame`

    DataFrame with sequences and sample names, in which the given sample name is included.

    -
    tmd_len: int, >0

    Length of TMD.

    +
    labels: array-like, shape (n_samples)

    Class labels for samples in sequence DataFrame.

    -
    jmd_n_len: int, >0

    Length of JMD-N.

    +
    sample_name: str

    Name of sample for which the feature value difference to a given reference group should be computed.

    -
    jmd_c_len: int, >0

    Length of JMD-C.

    +
    ref_group: int, default = 0

    Class label of reference group.

    -
    ext_len:int, >0

    Length of TMD-extending part (starting from C and N terminal part of TMD). -Conditions: ext_len<jmd_m_len and ext_len<jmd_c_len

    -
    -
    start: int, >=0

    Position label of first amino acid position (starting at N-terminus).

    +
    accept_gaps: bool, default = False

    Whether to accept missing values by enabling omitting for computations (if True).

    Returns
    -
    feat_names: list of strings

    Names of features.

    -
    -
    +
    df_feat: pandas.DataFrame

    Feature DataFrame including feature value difference.

    -

    Notes

    -
    -
    Positions are given depending on the three split types:
      -
    • Segment: [first…last]

    • -
    • Pattern: [all positions]

    • -
    • PeriodicPattern: [first..step1/step2..last]

    • -
    -
    -static feat_value(df_parts=None, split=None, dict_scale=None, accept_gaps=False)[source][source]
    +
    +static add_feat_value(df_parts=None, split=None, dict_scale=None, accept_gaps=False)[source][source]

    Create feature values for all sequence parts by combining Part, Split, and Scale.

    Parameters
    @@ -396,34 +358,117 @@

    aaanalysis.SequenceFeature -
    -features(list_parts=None, split_kws=None, df_scales=None, all_parts=False)[source][source]
    -

    Create list of feature ids for given Parts, Splits, and Scales

    +
    +static add_position(df_feat=None, features=None, start=1, tmd_len=20, jmd_n_len=10, jmd_c_len=10, ext_len=4, part_split=False)[source][source]
    +

    Create list with positions for given feature names

    Parameters
    -
    list_parts: list of strings (n>=1 parts), default = [“tmd_e”, “jmd_n_tmd_n”, “tmd_c_jmd_c”]

    Names of sequence parts which should be created (e.g., ‘tmd’).

    +
    df_feat: :class:`pandas.DataFrame`

    Feature DataFrame (CPP output) to add sample difference.

    -
    split_kws: dict, default = SequenceFeature.get_split_kws

    Nested dictionary with parameter dictionary for each chosen split_type.

    +
    features: str, list of strings, pd.Series

    Ids of features for which feature names should be created.

    -
    df_scales: :class:`pandas.DataFrame`, default = SequenceFeature.load_scales

    DataFrame with default amino acid scales.

    +
    start: int, >=0, default = 1

    Position label of first amino acid position (starting at N-terminus).

    -
    all_parts: bool, default = False

    Whether to create DataFrame with all possible sequence parts (if True) or parts given by list_parts.

    +
    tmd_len: int, >0, default = 20

    Length of TMD.

    +
    +
    jmd_n_lenint, >=0, default = 10

    Length of JMD-N.

    +
    +
    jmd_c_lenint, >=0, default = 10

    Length of JMD-C.

    +
    +
    ext_lenint, >=0, default = 4

    Length of TMD-extending part (starting from C and N terminal part of TMD). +Conditions: ext_len < jmd_m_len and ext_len < jmd_c_len.

    Returns
    -
    features: list of strings

    Ids of all possible features for combination of Parts, Splits, and Scales with form: PART-SPLIT-SCALE

    +
    feat_positions: list

    list with positions for each feature in feat_names

    +
    +
    +
    +
    +

    Notes

    +

    The length parameters define the total number of positions (jmd_n_len + tmd_len + jmd_c_len).

    +

    + +
    +
    +static feat_matrix(features=None, df_parts=None, df_scales=None, accept_gaps=False, n_jobs=None, verbose=False, return_labels=False)[source][source]
    +

    Create feature matrix for given feature ids and sequence parts.

    +
    +
    Parameters
    +
    +
    features: str, list of strings, pd.Series

    Ids of features for which matrix of feature values should be created.

    +
    +
    df_parts: :class:`pandas.DataFrame`

    DataFrame with sequence parts.

    +
    +
    df_scales: :class:`pandas.DataFrame`, optional

    DataFrame with default amino acid scales.

    +
    +
    accept_gaps: bool, default = False

    Whether to accept missing values by enabling omitting for computations (if True).

    +
    +
    n_jobs: int, default = None,

    The number of jobs to run in parallel. If None, it will be set to the maximum.

    +
    +
    verbose: bool, default = True

    Whether to print size of to be created feature matrix (if True) or not otherwise.

    +
    +
    return_labels: bool, default = False

    Whether to return sample labels in addition to feature matrix.

    +
    +
    +
    +
    Returns
    +
    +
    feat_matrix: array-like or sparse matrix, shape (n_samples, n_features)

    Feature values of samples.

    +
    +
    +static feat_names(features=None, df_cat=None, tmd_len=20, jmd_c_len=10, jmd_n_len=10, ext_len=0, start=1)[source][source]
    +

    Convert feature ids (PART-SPLIT-SCALE) into feature names (scale name [positions]).

    +
    +
    Parameters
    +
    +
    features: str, list of strings, pd.Series

    Ids of features for which feature names should be created.

    +
    +
    df_cat: :class:`pandas.DataFrame`, default = SequenceFeature.load_categories

    DataFrame with default categories for physicochemical amino acid scales

    +
    +
    tmd_len: int, >0

    Length of TMD.

    +
    +
    jmd_n_len: int, >0

    Length of JMD-N.

    +
    +
    jmd_c_len: int, >0

    Length of JMD-C.

    +
    +
    ext_len:int, >0

    Length of TMD-extending part (starting from C and N terminal part of TMD). +Conditions: ext_len<jmd_m_len and ext_len<jmd_c_len

    +
    +
    start: int, >=0

    Position label of first amino acid position (starting at N-terminus).

    +
    +
    +
    +
    Returns
    +
    +
    feat_names: list of strings

    Names of features.

    +
    +
    +
    +
    +

    Notes

    +
    +
    Positions are given depending on the three split types:
      +
    • Segment: [first…last]

    • +
    • Pattern: [all positions]

    • +
    • PeriodicPattern: [first..step1/step2..last]

    • +
    +
    +
    +
    +
    -static get_df_parts(df_seq=None, list_parts=None, jmd_n_len=None, jmd_c_len=None, ext_len=4, all_parts=False)[source][source]
    +static get_df_parts(df_seq=None, list_parts=None, jmd_n_len=None, jmd_c_len=None, ext_len=4, all_parts=False)[source][source]

    Create DataFrane with sequence parts.

    Parameters
    @@ -469,9 +514,35 @@

    aaanalysis.SequenceFeature +
    +get_features(list_parts=None, split_kws=None, df_scales=None, all_parts=False)[source][source]
    +

    Create list of all feature ids for given Parts, Splits, and Scales

    +
    +
    Parameters
    +
    +
    list_parts: list of strings (n>=1 parts), default = [“tmd_e”, “jmd_n_tmd_n”, “tmd_c_jmd_c”]

    Names of sequence parts which should be created (e.g., ‘tmd’).

    +
    +
    split_kws: dict, default = SequenceFeature.get_split_kws

    Nested dictionary with parameter dictionary for each chosen split_type.

    +
    +
    df_scales: :class:`pandas.DataFrame`, default = SequenceFeature.load_scales

    DataFrame with default amino acid scales.

    +
    +
    all_parts: bool, default = False

    Whether to create DataFrame with all possible sequence parts (if True) or parts given by list_parts.

    +
    +
    +
    +
    Returns
    +
    +
    features: list of strings

    Ids of all possible features for combination of Parts, Splits, and Scales with form: PART-SPLIT-SCALE

    +
    +
    +
    +
    +

    +
    -static get_split_kws(n_split_min=1, n_split_max=15, steps_pattern=None, n_min=2, n_max=4, len_max=15, steps_periodicpattern=None, split_types=None)[source][source]
    +static get_split_kws(n_split_min=1, n_split_max=15, steps_pattern=None, n_min=2, n_max=4, len_max=15, steps_periodicpattern=None, split_types=None)[source][source]

    Create dictionary with kwargs for three split types: Segment, Pattern, PeriodicPattern

    Parameters
    diff --git a/docs/build/html/generated/aaanalysis.SplitRange.html b/docs/build/html/generated/aaanalysis.SplitRange.html index 9e9b4160..03aaeafc 100644 --- a/docs/build/html/generated/aaanalysis.SplitRange.html +++ b/docs/build/html/generated/aaanalysis.SplitRange.html @@ -139,7 +139,7 @@
  • Plot Utilities
  • -
  • Tables for the Project
  • +
  • Tables
  • References
  • diff --git a/docs/build/html/generated/aaanalysis.dPULearn.html b/docs/build/html/generated/aaanalysis.dPULearn.html index 7ec217c4..46735cce 100644 --- a/docs/build/html/generated/aaanalysis.dPULearn.html +++ b/docs/build/html/generated/aaanalysis.dPULearn.html @@ -137,7 +137,7 @@
  • Plot Utilities
  • -
  • Tables for the Project
  • +
  • Tables
  • References
  • @@ -212,7 +212,7 @@

    aaanalysis.dPULearn

    -class aaanalysis.dPULearn(verbose=False, n_components=0.8, pca_kwargs=None, metric=None)[source][source]
    +class aaanalysis.dPULearn(verbose=False, n_components=0.8, pca_kwargs=None, metric=None)[source][source]

    Deterministic Positive-Unlabeled (dPULearn) model.

    dPULearn offers a deterministic approach for Positive-Unlabeled (PU) learning. The model primarily employs Principal Component Analysis (PCA) to reduce the dimensionality of the feature space. Based on the most @@ -252,7 +252,7 @@

    aaanalysis.dPULearn
    Attributes
    -
    labels_array-like, shape (n_samples,)

    Labels of each data point.

    +
    labels_array-like, shape (n_samples,)

    Labels of each datapoint.

    @@ -272,7 +272,10 @@

    aaanalysis.dPULearn

    __init__([verbose, n_components, ...])

    -

    fit(X[, labels, n_neg, label_pos, name_neg, ...])

    +

    eval()

    +

    + +

    fit(X[, labels, n_neg, label_pos, name_neg, ...])

    Fit the dPULearn model to identify reliable negative samples from the provided feature matrix and labels.

    @@ -311,7 +314,7 @@

    aaanalysis.dPULearnNotes

    Distance-based identification is used if metric is specified during class initialization.

    Examples

    -

    Create small example data for dPUlearn containg positive (‘pos’, 1) and unlabeled (‘unl’, 2) data

    +

    Create small example datafor dPUlearn containg positive (‘pos’, 1) and unlabeled (‘unl’, 2) _data

    >>> import aaanalysis as aa
     >>> import pandas as pd
     >>> import numpy as np
    diff --git a/docs/build/html/generated/aaanalysis.load_dataset.html b/docs/build/html/generated/aaanalysis.load_dataset.html
    index 7fec8389..4061f2af 100644
    --- a/docs/build/html/generated/aaanalysis.load_dataset.html
    +++ b/docs/build/html/generated/aaanalysis.load_dataset.html
    @@ -138,7 +138,7 @@
     
  • Plot Utilities
  • -
  • Tables for the Project
  • +
  • Tables
  • References
  • @@ -213,7 +213,7 @@

    aaanalysis.load_dataset

    -aaanalysis.load_dataset(name='INFO', n=None, non_canonical_aa='remove', min_len=None, max_len=None)[source][source]
    +aaanalysis.load_dataset(name='INFO', n=None, non_canonical_aa='remove', min_len=None, max_len=None)[source][source]

    Load protein benchmarking datasets or their general overview by setting ‘name’ to ‘INFO’.

    Three types of benchmark datasets are provided:
    -
  • Tables for the Project
  • +
  • Tables
  • References
  • @@ -213,7 +213,7 @@

    aaanalysis.load_scales

    -aaanalysis.load_scales(name='scales', just_aaindex=False, unclassified_in=True)[source][source]
    +aaanalysis.load_scales(name='scales', just_aaindex=False, unclassified_in=True)[source][source]

    Load amino acid scales or scale classification.

    Parameters
    diff --git a/docs/build/html/generated/aaanalysis.plot_gcfs.html b/docs/build/html/generated/aaanalysis.plot_gcfs.html index 81c01393..ae22886f 100644 --- a/docs/build/html/generated/aaanalysis.plot_gcfs.html +++ b/docs/build/html/generated/aaanalysis.plot_gcfs.html @@ -62,7 +62,7 @@ - + @@ -139,7 +139,7 @@ -
  • Tables for the Project
  • +
  • Tables
  • References
  • @@ -214,7 +214,7 @@

    aaanalysis.plot_gcfs

    -aaanalysis.plot_gcfs()[source][source]
    +aaanalysis.plot_gcfs()[source][source]

    Get current font size, which is set by ut.plot_settings function

    @@ -226,7 +226,7 @@

    aaanalysis.plot_gcfs
    diff --git a/docs/build/html/generated/aaanalysis.plot_set_legend.html b/docs/build/html/generated/aaanalysis.plot_set_legend.html index d677931d..5d4474ff 100644 --- a/docs/build/html/generated/aaanalysis.plot_set_legend.html +++ b/docs/build/html/generated/aaanalysis.plot_set_legend.html @@ -139,7 +139,7 @@ -
  • Tables for the Project
  • +
  • Tables
  • References
  • @@ -214,7 +214,7 @@

    aaanalysis.plot_set_legend

    -aaanalysis.plot_set_legend(ax=None, handles=None, dict_color=None, list_cat=None, labels=None, y=-0.2, x=0.5, ncol=3, fontsize=11, weight='normal', lw=0, edgecolor=None, return_handles=False, loc='upper left', labelspacing=0.2, columnspacing=1, title=None, fontsize_legend=None, title_align_left=True, fontsize_weight='normal', shape=None, **kwargs)[source][source]
    +aaanalysis.plot_set_legend(ax=None, handles=None, dict_color=None, list_cat=None, labels=None, y=-0.2, x=0.5, ncol=3, fontsize=11, weight='normal', lw=0, edgecolor=None, return_handles=False, loc='upper left', labelspacing=0.2, columnspacing=1, title=None, fontsize_legend=None, title_align_left=True, fontsize_weight='normal', shape=None, **kwargs)[source][source]

    Set a customizable legend for a plot.

    Parameters
    @@ -275,9 +275,9 @@

    aaanalysis.plot_set_legend

    See also

    -
    matplotlib.pyplot.legend

    For additional details on how the ‘loc’ parameter can be customized.

    +
    matplotlib.pyplot.legend

    For additional details on how the ‘loc’ parameter can be customized.

    -
    matplotlib.lines.Line2D

    For additional details on the different types of marker shapes (‘shape’ parameter).

    +
    matplotlib.lines.Line2D

    For additional details on the different types of marker shapes (‘shape’ parameter).

    diff --git a/docs/build/html/generated/aaanalysis.plot_settings.html b/docs/build/html/generated/aaanalysis.plot_settings.html index 5a90cad0..66e61abc 100644 --- a/docs/build/html/generated/aaanalysis.plot_settings.html +++ b/docs/build/html/generated/aaanalysis.plot_settings.html @@ -139,7 +139,7 @@ -
  • Tables for the Project
  • +
  • Tables
  • References
  • @@ -214,7 +214,7 @@

    aaanalysis.plot_settings

    -aaanalysis.plot_settings(fig_format='pdf', verbose=False, grid=False, grid_axis='y', font_scale=0.7, change_size=True, weight_bold=True, adjust_elements=True, short_ticks=False, no_ticks=False, no_ticks_y=False, short_ticks_y=False, no_ticks_x=False, short_ticks_x=False)[source][source]
    +aaanalysis.plot_settings(fig_format='pdf', verbose=False, grid=False, grid_axis='y', font_scale=0.7, font='Arial', change_size=True, weight_bold=True, adjust_elements=True, short_ticks=False, no_ticks=False, no_ticks_y=False, short_ticks_y=False, no_ticks_x=False, short_ticks_x=False)[source][source]

    Configure general settings for plot visualization with various customization options.

    Parameters
    @@ -229,6 +229,8 @@

    aaanalysis.plot_settingsfloat, default=0.7

    Sets the scale for font sizes in the plot.

    +
    fontstr, default=’Arial’

    Name of sans-serif font (e.g., ‘Arial’, ‘Verdana’, ‘Helvetica’, ‘DejaVu Sans’)

    +
    change_sizebool, default=True

    If True, adjusts the size of plot elements.

    weight_boldbool, default=True

    If True, text elements appear in bold.

    diff --git a/docs/build/html/genindex.html b/docs/build/html/genindex.html index 27c494fd..a17c1c88 100644 --- a/docs/build/html/genindex.html +++ b/docs/build/html/genindex.html @@ -116,7 +116,7 @@

    REFERENCES

    @@ -191,6 +191,7 @@

    Index

    | A | C | D + | E | F | G | L @@ -222,13 +223,13 @@

    A

    @@ -255,19 +256,23 @@

    D

    +

    E

    + + +
    +

    F

    -
  • Tables for the Project
      -
    • Overview of Tables
    • +
    • Tables diff --git a/docs/build/html/index/badges.html b/docs/build/html/index/badges.html index ad4bc9b0..c1875b78 100644 --- a/docs/build/html/index/badges.html +++ b/docs/build/html/index/badges.html @@ -127,7 +127,7 @@

      REFERENCES

      diff --git a/docs/build/html/index/citations.html b/docs/build/html/index/citations.html index 131b8ac6..b2eb6927 100644 --- a/docs/build/html/index/citations.html +++ b/docs/build/html/index/citations.html @@ -127,7 +127,7 @@

      REFERENCES

      diff --git a/docs/build/html/index/introduction.html b/docs/build/html/index/introduction.html index de0202dd..84ed4b7a 100644 --- a/docs/build/html/index/introduction.html +++ b/docs/build/html/index/introduction.html @@ -132,7 +132,7 @@

      REFERENCES

      diff --git a/docs/build/html/index/references.html b/docs/build/html/index/references.html index 0e53718d..a1e3c3a2 100644 --- a/docs/build/html/index/references.html +++ b/docs/build/html/index/references.html @@ -64,7 +64,7 @@ - + @@ -128,7 +128,7 @@

      REFERENCES

      • API
      • -
      • Tables for the Project
      • +
      • Tables
      • References
        • Algorithms
        • Datasets and Benchmarks
        • @@ -289,7 +289,7 @@

          Further Information - +
          diff --git a/docs/build/html/index/usage_principles.html b/docs/build/html/index/usage_principles.html index e931f708..bc91020b 100644 --- a/docs/build/html/index/usage_principles.html +++ b/docs/build/html/index/usage_principles.html @@ -129,7 +129,7 @@

          REFERENCES

          diff --git a/docs/build/html/objects.inv b/docs/build/html/objects.inv index c65d9362..faf19a8f 100644 Binary files a/docs/build/html/objects.inv and b/docs/build/html/objects.inv differ diff --git a/docs/build/html/search.html b/docs/build/html/search.html index 2efb30ae..a7a803b4 100644 --- a/docs/build/html/search.html +++ b/docs/build/html/search.html @@ -119,7 +119,7 @@

          REFERENCES

          diff --git a/docs/build/html/searchindex.js b/docs/build/html/searchindex.js index 89c71736..c4d7e091 100644 --- a/docs/build/html/searchindex.js +++ b/docs/build/html/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["_resources/overview", "_resources/tables", "api", "generated/aaanalysis.AAclust", "generated/aaanalysis.CPP", "generated/aaanalysis.SequenceFeature", "generated/aaanalysis.SplitRange", "generated/aaanalysis.dPULearn", "generated/aaanalysis.load_dataset", "generated/aaanalysis.load_scales", "generated/aaanalysis.plot_gcfs", "generated/aaanalysis.plot_set_legend", "generated/aaanalysis.plot_settings", "index", "index/badges", "index/citations", "index/introduction", "index/references", "index/usage_principles", "tutorials"], "filenames": ["_resources/overview.rst", "_resources/tables.rst", "api.rst", "generated/aaanalysis.AAclust.rst", "generated/aaanalysis.CPP.rst", "generated/aaanalysis.SequenceFeature.rst", "generated/aaanalysis.SplitRange.rst", "generated/aaanalysis.dPULearn.rst", "generated/aaanalysis.load_dataset.rst", "generated/aaanalysis.load_scales.rst", "generated/aaanalysis.plot_gcfs.rst", "generated/aaanalysis.plot_set_legend.rst", "generated/aaanalysis.plot_settings.rst", "index.rst", "index/badges.rst", "index/citations.rst", "index/introduction.rst", "index/references.rst", "index/usage_principles.rst", "tutorials.rst"], "titles": ["<no title>", "Tables for the Project", "API", "aaanalysis.AAclust", "aaanalysis.CPP", "aaanalysis.SequenceFeature", "aaanalysis.SplitRange", "aaanalysis.dPULearn", "aaanalysis.load_dataset", "aaanalysis.load_scales", "aaanalysis.plot_gcfs", "aaanalysis.plot_set_legend", "aaanalysis.plot_settings", "Welcome to the AAanalysis documentation", "<no title>", "<no title>", "Introduction", "References", "Usage Principles", "Tutorials"], "terms": {"aaanalysi": [0, 2, 15, 16, 18], "amino": [0, 3, 4, 5, 6, 8, 9, 13, 15, 16, 17], "acid": [0, 3, 4, 5, 6, 8, 9, 13, 15, 16, 17], "analysi": [0, 7, 13, 16], "i": [0, 2, 3, 4, 5, 7, 9, 10, 13, 16], "python": [0, 13, 16], "framework": [0, 3, 13, 16], "interpret": [0, 13, 15, 16, 17], "sequenc": [0, 1, 4, 5, 6, 7, 8, 13, 16, 17], "base": [0, 3, 4, 5, 7, 13, 16, 17], "protein": [0, 4, 5, 6, 8, 13, 16, 17], "predict": [0, 1, 8, 13, 16, 17], "provid": [0, 3, 4, 7, 8, 9, 13], "follow": [0, 2, 3, 4, 5, 7, 13, 15, 16], "algorithm": [0, 4, 13, 16], "aaclust": [0, 8, 13, 15, 16, 17, 19], "k": [0, 3, 13, 16, 17], "optim": [0, 3, 4, 13, 16, 17], "cluster": [0, 3, 13, 16, 17], "wrapper": [0, 4, 13, 16], "select": [0, 3, 4, 8, 9, 13, 16, 17], "redund": [0, 3, 4, 13, 16, 17], "reduc": [0, 3, 7, 13, 16, 17], "set": [0, 1, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 16, 17], "numer": [0, 3, 4, 5, 13, 16], "scale": [0, 3, 4, 5, 9, 12, 13, 15, 16, 17], "e": [0, 4, 5, 6, 13, 16], "g": [0, 4, 5, 6, 13, 16], "cpp": [0, 6, 13, 15, 16, 19], "compar": [0, 13, 16], "physicochem": [0, 4, 5, 13, 16, 17], "profil": [0, 4, 13, 16], "featur": [0, 3, 4, 5, 6, 7, 13, 16], "engin": [0, 13, 16], "two": [0, 4, 13, 16, 17], "identifi": [0, 4, 7, 13, 16, 17], "most": [0, 3, 4, 7, 13, 16], "distinct": [0, 5, 13, 16], "dpulearn": [0, 13, 15, 16, 19], "determinist": [0, 7, 13, 16], "posit": [0, 1, 3, 4, 5, 7, 13, 16], "unlabel": [0, 7, 8, 13, 16], "pu": [0, 7, 13, 16], "learn": [0, 3, 4, 7, 13, 15, 16, 17], "enabl": [0, 3, 4, 5, 7, 12, 13, 16], "train": [0, 13, 16], "unbalanc": [0, 13, 16], "small": [0, 7, 13, 16], "dataset": [0, 3, 8, 9, 13, 16], "moreov": [0, 13], "function": [0, 2, 4, 10, 12, 13], "load": [0, 8, 9, 13], "benchmark": [0, 8, 13], "load_data": [0, 13], "load_scal": [0, 1, 4, 5, 13], "depth": [0, 13], "level": [0, 1, 4, 13], "classif": [0, 1, 4, 9, 13], "aaontologi": [0, 13, 15, 17], "descript": 1, "see": [1, 4], "also": [1, 7], "1_overview_benchmark": 1, "aa": [1, 2, 4, 5, 7, 11, 12, 18], "load_dataset": [1, 2, 5], "2_overview_scal": 1, "neg": [1, 3, 5, 7], "predictor": 1, "refer": [1, 2, 3, 4, 8], "label": [1, 3, 4, 5, 6, 7, 11], "aa_caspase3": [1, 8], "233": 1, "185605": 1, "705": 1, "184900": 1, "prosper": [1, 17], "caspas": 1, "3": [1, 3, 5, 7, 11], "cleavag": [1, 17], "site": [1, 17], "song18": [1, 17], "1": [1, 3, 4, 5, 6, 7, 8, 11, 12], "adjac": 1, "0": [1, 3, 4, 5, 7, 11, 12], "aa_furin": [1, 8], "71": 1, "59003": 1, "163": 1, "58840": 1, "furin": 1, "aa_ldr": [1, 8], "342": 1, "118248": 1, "35469": 1, "82779": 1, "idp": [1, 17], "seq2seq": [1, 17], "long": 1, "intrins": [1, 17], "disord": [1, 17], "region": [1, 17], "ldr": 1, "tang20": [1, 17], "order": [1, 3], "aa_mmp2": [1, 8], "573": 1, "312976": 1, "2416": 1, "310560": 1, "matrix": [1, 3, 5, 7], "metallopeptidas": 1, "2": [1, 3, 4, 5, 6, 7, 11], "mmp2": 1, "aa_rnabind": [1, 8], "221": 1, "55001": 1, "6492": 1, "48509": 1, "gmksvm": 1, "ru": 1, "rna": [1, 17], "bind": [1, 17], "residu": [1, 8, 17], "rbp60": 1, "yang21": [1, 17], "non": [1, 4, 5, 8], "aa_sa": [1, 8], "101082": 1, "84523": 1, "solvent": 1, "access": [1, 2], "sa": 1, "data": [1, 3, 4, 7, 8, 13], "expos": 1, "buri": 1, "seq_amylo": [1, 8], "1414": 1, "8484": 1, "511": 1, "903": 1, "rerf": [1, 17], "pred": [1, 17], "amyloidognen": 1, "teng21": [1, 17], "amyloidogen": [1, 17], "seq_capsid": [1, 8], "7935": 1, "3364680": 1, "3864": 1, "4071": 1, "viralpro": [1, 17], "capdsid": 1, "galiez16": [1, 17], "capsid": [1, 17], "seq_disulfid": [1, 8], "2547": 1, "614470": 1, "897": 1, "1650": 1, "dipro": 1, "disulfid": 1, "bridg": [1, 17], "cheng06": [1, 17], "ss": 1, "bond": 1, "without": [1, 3, 4], "seq_loc": [1, 8], "1835": 1, "732398": 1, "1045": 1, "790": 1, "nan": 1, "subcellular": [1, 17], "locat": [1, 11], "cytoplasm": 1, "v": 1, "plasma": 1, "membran": [1, 5], "shen19": [1, 17], "seq_solubl": [1, 8], "17408": 1, "4432269": 1, "8704": 1, "solpro": [1, 17], "solubl": [1, 17], "insolubl": 1, "magnan09": [1, 17], "seq_tail": [1, 8], "6668": 1, "2671690": 1, "2574": 1, "4094": 1, "tail": [1, 17], "min": 1, "max": 1, "normal": [1, 4, 11], "586": 1, "breimann23b": [1, 13, 15, 17], "scales_raw": [1, 9], "raw": 1, "valu": [1, 3, 4, 5, 16], "kawashima08": [1, 17], "scales_classif": 1, "scales_pc": [1, 9], "princip": [1, 7], "compon": [1, 5, 7], "pc": [1, 7], "compress": 1, "20": [1, 4, 5], "breimann23a": [1, 17], "top60": [1, 9], "top": [1, 3], "60": 1, "subset": [1, 5], "top60_ev": [1, 9], "evalu": 1, "thi": [2, 3, 4, 12], "page": [2, 13], "contain": [2, 4, 7, 8], "public": [2, 13, 15], "object": [2, 4], "For": [2, 3, 5, 8, 11], "more": [2, 4], "exampl": [2, 4, 5, 7, 11, 12], "practic": 2, "usag": [2, 13], "our": 2, "notebook": [2, 19], "conveni": 2, "common": 2, "import": [2, 4, 5, 7, 11, 12, 18], "Then": 2, "you": [2, 13, 15], "can": [2, 3, 5, 7, 11, 13, 16], "all": [2, 3, 4, 5, 6, 12], "method": [2, 3, 4, 5, 6, 7, 17], "via": [2, 17], "alia": [2, 5], "class": [3, 4, 5, 6, 7, 8], "model": [3, 4, 7], "none": [3, 4, 5, 6, 7, 8, 11], "model_kwarg": 3, "verbos": [3, 4, 5, 7, 12], "fals": [3, 4, 5, 7, 9, 11, 12], "sourc": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12], "A": [3, 5, 11, 16, 17], "design": [3, 4], "primarili": [3, 7], "versatil": 3, "enough": 3, "ani": [3, 4, 16], "indic": [3, 4, 5, 6, 7], "It": [3, 16], "take": 3, "requir": 3, "pre": [3, 4], "defin": [3, 4], "number": [3, 4, 5, 7, 8, 11], "from": [3, 4, 5, 7, 9, 13], "scikit": 3, "http": 3, "org": 3, "stabl": 3, "modul": [3, 13], "html": 3, "By": 3, "leverag": 3, "pearson": [3, 4], "correl": [3, 4], "similar": 3, "measur": 3, "one": [3, 4], "repres": [3, 4, 16], "sampl": [3, 4, 5, 7], "term": 3, "medoid": 3, "each": [3, 4, 5, 7], "which": [3, 4, 5, 10, 16], "closest": 3, "": [3, 4, 11, 17], "center": 3, "yield": 3, "paramet": [3, 4, 5, 6, 7, 8, 9, 11, 12], "callabl": 3, "option": [3, 4, 5, 7, 8, 9, 12], "default": [3, 4, 5, 7, 8, 9, 11, 12], "sklearn": 3, "kmean": 3, "The": [3, 4, 5, 7, 9, 11], "emploi": [3, 7], "given": [3, 4, 5, 6, 9], "n_cluster": 3, "dict": [3, 4, 5, 7, 11], "dictionari": [3, 4, 5, 11], "keyword": [3, 4, 7], "argument": [3, 4, 5, 7, 11], "pass": [3, 4, 7, 11], "bool": [3, 4, 5, 7, 9, 11, 12], "flag": 3, "disabl": 3, "output": [3, 4, 6, 7, 12], "attribut": [3, 7], "int": [3, 4, 5, 7, 8, 11], "obtain": [3, 5], "labels_": [3, 7], "arrai": [3, 4, 5, 7], "like": [3, 4, 5, 7], "centers_": 3, "averag": [3, 5], "correspond": 3, "center_labels_": 3, "medoids_": 3, "medoid_labels_": 3, "medoid_ind_": 3, "chosen": [3, 4, 5], "within": [3, 4, 5], "origin": 3, "__init__": [3, 4, 5, 6, 7], "cluster_nam": 3, "name": [3, 4, 5, 7, 8, 9], "name_unclassifi": 3, "unclassifi": [3, 9], "assign": [3, 4, 5, 7], "frequenc": 3, "renam": 3, "prioriti": 3, "frequent": 3, "If": [3, 4, 5, 7, 8, 12, 13, 15], "alreadi": 3, "us": [3, 4, 7, 8, 12, 13, 15, 16], "doe": 3, "exist": 3, "list": [3, 4, 5, 6, 11], "fit": [3, 7], "str": [3, 4, 5, 7, 8, 9, 11, 12], "cannot": 3, "classifi": [3, 4], "return": [3, 4, 5, 6, 7, 8, 9, 11], "static": [3, 4, 5, 6], "x_test": 3, "x_ref": 3, "labels_test": 3, "labels_ref": 3, "n": [3, 4, 5, 8, 17], "true": [3, 4, 5, 9, 11, 12], "on_cent": 3, "except_unclassifi": 3, "comput": [3, 4, 5, 17], "test": [3, 4, 6], "consid": 3, "strength": 3, "els": 3, "otherwis": [3, 4, 5, 7], "member": 3, "exclud": 3, "list_top_center_name_corr": 3, "have": 3, "strongest": 3, "x": [3, 4, 7, 8, 11, 12], "min_th": 3, "merge_metr": 3, "euclidean": [3, 7], "format": [3, 12], "determin": 3, "specif": [3, 8], "partit": 3, "maxim": 3, "beyond": 3, "threshold": [3, 4], "qualiti": 3, "either": [3, 5, 13], "minimum": [3, 5, 6, 8], "min_cor": 3, "between": [3, 4, 5, 11], "its": 3, "govern": 3, "undergo": 3, "three": [3, 5, 8], "stage": 3, "estim": 3, "lower": 3, "bound": 3, "refin": 3, "metric": [3, 7], "merg": 3, "smaller": 3, "direct": 3, "final": 3, "reduct": 3, "shape": [3, 4, 5, 7, 11], "n_sampl": [3, 4, 5, 7], "n_featur": [3, 4, 5, 7], "where": [3, 5, 7], "appli": [3, 7, 11, 12], "float": [3, 4, 7, 11, 12], "instead": 3, "names_medoid": 3, "note": [3, 4, 5, 7, 8, 12], "attr": 3, "further": [3, 4, 8, 13], "inform": [3, 4, 5, 7, 8, 13], "paper": [3, 8], "todo": [3, 8], "add": [3, 4, 8], "link": [3, 8, 13, 15, 17], "get_cluster_cent": 3, "center_label": 3, "associ": 3, "get_cluster_medoid": 3, "medoid_label": 3, "medoid_ind": 3, "index": [3, 13, 17], "df_scale": [4, 5], "df_cat": [4, 5], "df_part": [4, 5], "split_kw": [4, 5], "accept_gap": [4, 5], "jmd_n_len": [4, 5], "10": [4, 5], "jmd_c_len": [4, 5], "ext_len": [4, 5], "4": [4, 5, 6], "creat": [4, 5, 6, 7], "filter": [4, 8], "ar": [4, 5, 7, 8, 9], "discrimin": 4, "panda": [4, 5, 7, 8, 9], "datafram": [4, 5, 7, 8, 9], "ut": [4, 10], "str_scale_cat": 4, "load_categori": [4, 5], "categori": [4, 5, 9, 11], "part": [4, 5], "sequencefeatur": [4, 6], "get_split_kw": [4, 5], "nest": [4, 5], "split_typ": [4, 5], "whether": [4, 5, 9, 11], "accept": [4, 5], "miss": [4, 5], "omit": [4, 5], "length": [4, 5, 6, 8], "jmd": [4, 5], "c": [4, 5, 13, 17], "tmd": [4, 5], "extend": [4, 5], "start": [4, 5], "termin": [4, 5], "condit": [4, 5], "jmd_m_len": [4, 5], "print": [4, 5], "progress": [4, 17], "about": 4, "run": [4, 5], "perform": [4, 7], "step": [4, 5, 6, 16], "add_posit": 4, "df_feat": 4, "tmd_len": [4, 5], "first": [4, 5], "terminu": [4, 5], "includ": [4, 9, 11], "total": [4, 7], "add_sample_dif": 4, "df_seq": [4, 5, 7], "sample_nam": 4, "ref_group": 4, "differ": [4, 11], "group": 4, "should": [4, 5, 7, 9], "add_shap": 4, "col_shap": 4, "shap_valu": 4, "name_feat_impact": 4, "feat_impact": 4, "convert": [4, 5], "shap": 4, "impact": 4, "ad": 4, "column": [4, 5, 7, 8, 11], "shaplei": 4, "addit": [4, 5, 7, 11, 12], "explan": 4, "game": 4, "theoret": [4, 7], "approach": [4, 7], "explain": [4, 17], "machin": [4, 13, 15, 17], "respons": 4, "chang": 4, "plot_heatmap": 4, "y": [4, 11, 12], "subcategori": 4, "val_col": 4, "mean_dif": 4, "val_typ": 4, "mean": 4, "figsiz": 4, "8": [4, 5, 7], "5": [4, 5, 7, 11], "titl": [4, 11], "title_kw": 4, "vmin": 4, "vmax": 4, "grid_on": 4, "cmap": 4, "rdbu_r": 4, "cmap_n_color": 4, "dict_color": [4, 11], "cbar_kw": 4, "facecolor_dark": 4, "add_jmd_tmd": 4, "jmd_n_seq": 4, "tmd_seq": 4, "jmd_c_seq": 4, "tmd_color": 4, "mediumspringgreen": 4, "jmd_color": 4, "blue": [4, 11], "tmd_seq_color": 4, "black": 4, "jmd_seq_color": 4, "white": 4, "seq_siz": 4, "tmd_jmd_fontsiz": 4, "xticks_po": 4, "xtick_siz": 4, "11": [4, 11], "xtick_width": 4, "xtick_length": 4, "ytick_siz": 4, "add_legend_cat": 4, "legend_kw": 4, "add_importance_map": 4, "cbar_pct": 4, "kwarg": [4, 5, 6, 11], "plot": [4, 11, 12, 13], "featuremap": 4, "axi": [4, 12], "versu": 4, "seaborn": [4, 12], "heatmap": 4, "highlight": 4, "n_feature_inform": 4, "uniqu": 4, "statist": 4, "scale_nam": 4, "shown": 4, "abs_auc": 4, "std_test": 4, "displai": 4, "sum": 4, "std": 4, "aggreg": 4, "positions_onli": 4, "specifi": [4, 5, 7, 12], "across": 4, "recommend": [4, 7], "when": [4, 7], "aim": 4, "emphas": 4, "fewer": 4, "value_typ": 4, "tupl": 4, "7": [4, 5, 7, 12], "width": [4, 11], "height": 4, "figur": 4, "inch": 4, "matplotlib": [4, 11, 12], "pyplot": [4, 11], "anchor": [4, 11], "colormap": 4, "infer": 4, "other": [4, 9], "color": [4, 11], "seismic": 4, "map": [4, 5, 11], "space": [4, 7, 11], "discret": 4, "diverg": 4, "sequenti": 4, "kei": 4, "colorbar": 4, "bar": 4, "under": 4, "depicet": 4, "depict": 4, "jmd_n": [4, 5], "jmd_c": [4, 5], "font": [4, 10, 11, 12], "size": [4, 5, 6, 10, 11, 12], "point": [4, 7, 11], "automat": [4, 7], "tick": [4, 12], "ax": [4, 11], "set_xticklabel": 4, "widht": 4, "tick_param": 4, "legend": [4, 11], "pcolormesh": 4, "effect": 4, "onli": [4, 9], "align": [4, 11], "applic": 4, "document": 4, "detail": [4, 11, 13, 15], "code": 4, "plot_profil": 4, "count": [4, 9], "edge_color": 4, "bar_width": 4, "75": 4, "ytick_width": 4, "ytick_length": 4, "ylim": 4, "highlight_tmd_area": 4, "highlight_alpha": 4, "15": [4, 5, 6], "grid": [4, 12], "grid_axi": [4, 12], "both": [4, 12], "shap_plot": 4, "instanc": 4, "type": [4, 5, 8, 11], "avail": [4, 13, 15, 17], "check_value_typ": 4, "custom": [4, 11, 12], "appear": [4, 12], "edg": [4, 11], "line": [4, 11], "annot": 4, "adjust": [4, 12], "limit": 4, "area": 4, "alpha": 4, "drawn": 4, "gener": [4, 5, 8, 12, 16, 17], "intern": 4, "librari": [4, 12], "parametr": 4, "n_filter": 4, "100": 4, "check_cat": 4, "n_pre_filt": 4, "pct_pre_filt": 4, "max_std_test": 4, "max_overlap": 4, "max_cor": 4, "n_process": 4, "pipelin": 4, "creation": 4, "collect": 4, "t": 4, "u": [4, 13], "p": [4, 17], "percentag": [4, 7], "transmembran": [4, 5], "domain": [4, 5, 8], "check": 4, "remain": 4, "after": 4, "maximum": [4, 5, 6, 7, 8], "standard": 4, "deviat": 4, "overlap": 4, "cpu": 4, "multiprocess": 4, "eleven": 4, "id": [4, 5], "result": 4, "rank": 4, "split": [4, 5, 6], "sub": 4, "absolut": 4, "auc": 4, "abs_mean_dif": 4, "std_ref": 4, "p_val": 4, "mann_whitnei": 4, "ttest_indep": 4, "p_val_fdr_bh": 4, "benjamini": 4, "hochberg": 4, "fdr": 4, "correct": 4, "retriev": 5, "continu": 5, "principl": [5, 13], "segment": [5, 6], "pattern": [5, 6], "properti": [5, 8], "express": 5, "present": 5, "realiz": 5, "over": 5, "valid": 5, "tmd_e": 5, "tmd_n": 5, "tmd_c": 5, "ext_c": 5, "ext_n": 5, "tmd_jmd": 5, "jmd_n_tmd_n": 5, "tmd_c_jmd_c": 5, "ext_n_tmd_n": 5, "tmd_c_ext_c": 5, "arg": [5, 6], "feat_matrix": 5, "n_job": 5, "return_label": 5, "string": 5, "pd": [5, 7], "seri": 5, "job": 5, "parallel": 5, "spars": 5, "feat_nam": 5, "depend": 5, "last": 5, "periodicpattern": [5, 6], "step1": 5, "step2": 5, "feat_valu": 5, "dict_scal": 5, "combin": 5, "convent": 5, "letter": 5, "feature_valu": 5, "n_part": 5, "ha": 5, "form": 5, "structur": [5, 17], "th": 5, "n_split": 5, "resp": 5, "p1": 5, "p2": 5, "pn": 5, "end": 5, "odd": 5, "even": 5, "give": 5, "integ": [5, 6], "list_part": 5, "all_part": 5, "possibl": [5, 6], "get_df_part": 5, "datafran": 5, "compris": 5, "tmd_start": 5, "tmd_stop": 5, "len": 5, "must": 5, "lenght": 5, "extra": 5, "found": 5, "get": [5, 6, 10], "sf": 5, "gsec_sub_seq": 5, "n_split_min": [5, 6], "n_split_max": [5, 6], "steps_pattern": 5, "n_min": [5, 6], "n_max": [5, 6], "len_max": [5, 6], "steps_periodicpattern": 5, "greater": 5, "greatest": 5, "whole": [5, 8], "specfii": 5, "smallest": 5, "6": [5, 8], "vari": 5, "paramt": 5, "argumetn": 5, "rang": 6, "multipl": [6, 17], "labels_pattern": 6, "match": [6, 17], "element": [6, 12], "labels_periodicpattern": 6, "labels_seg": 6, "seq": 6, "seq_split": 6, "n_compon": 7, "pca_kwarg": 7, "offer": 7, "pca": 7, "dimension": [7, 17], "iter": 7, "reliabl": 7, "These": 7, "those": 7, "distant": 7, "altern": 7, "distanc": 7, "manhattan": 7, "cosin": 7, "80": 7, "cover": 7, "varianc": 7, "identif": [7, 17], "inspir": 7, "techniqu": 7, "an": [7, 13, 15, 17], "high": [7, 17], "n_neg": 7, "label_po": 7, "name_neg": 7, "rel_neg": 7, "col_class": 7, "newli": 7, "updat": 7, "new": 7, "store": 7, "Will": 7, "dure": 7, "initi": 7, "conta": 7, "po": 7, "unl": 7, "numpi": 7, "np": 7, "atgc": 7, "gcta": 7, "actg": 7, "tacg": 7, "mode": 7, "modifi": [7, 8, 12], "dpul": 7, "info": 8, "non_canonical_aa": 8, "remov": [8, 12], "min_len": 8, "max_len": 8, "overview": 8, "dom_subgsec": 8, "per": 8, "keep": 8, "gap": 8, "canon": 8, "kept": 8, "replac": 8, "symbol": 8, "df": [8, 9], "just_aaindex": 9, "unclassified_in": 9, "scale_classif": 9, "relev": 9, "aaindex": [9, 17], "current": 10, "plot_set": 10, "handl": 11, "list_cat": 11, "ncol": 11, "fontsiz": 11, "weight": [11, 17], "lw": 11, "edgecolor": 11, "return_handl": 11, "loc": 11, "upper": 11, "left": 11, "labelspac": 11, "columnspac": 11, "fontsize_legend": 11, "title_align_left": 11, "fontsize_weight": 11, "customiz": 11, "attach": 11, "item": 11, "coordin": 11, "text": [11, 12], "vertic": 11, "horizont": 11, "marker": 11, "directli": 11, "finer": 11, "control": 11, "how": 11, "line2d": 11, "cat1": 11, "red": 11, "cat2": 11, "o": 11, "fig_format": 12, "pdf": 12, "font_scal": 12, "change_s": 12, "weight_bold": 12, "adjust_el": 12, "short_tick": 12, "no_tick": 12, "no_ticks_i": 12, "short_ticks_i": 12, "no_ticks_x": 12, "short_ticks_x": 12, "configur": 12, "visual": 12, "variou": 12, "file": 12, "save": 12, "make": 12, "visibl": 12, "choos": 12, "bold": 12, "layout": 12, "short": 12, "mark": 12, "global": 12, "pypi": 13, "conda": 13, "forg": 13, "pip": 13, "introduct": 13, "tutori": 13, "api": 13, "util": 13, "project": 13, "case": 13, "search": 13, "your": [13, 15], "work": [13, 15], "pleas": [13, 15], "cite": [13, 15], "respect": [13, 15], "_": [13, 15], "breimann": [13, 15, 17], "kamp": [13, 15], "steiner": [13, 15], "frishman": [13, 15], "2023": [13, 15], "ontologi": [13, 15, 17], "biorxiv": [13, 15, 17], "citat": 15, "wa": 16, "develop": 16, "biolog": 16, "typic": 16, "consist": 16, "et": 17, "al": 17, "2023a": 17, "2023b": 17, "breimann23c": 17, "2023c": 17, "chart": 17, "\u03b3": 17, "secretas": 17, "substrat": 17, "ai": 17, "cheng": 17, "2006": 17, "larg": 17, "disulphid": 17, "kernel": 17, "recurs": 17, "neural": 17, "network": 17, "graph": 17, "struct": 17, "funct": 17, "kawashima": 17, "2008": 17, "aid": 17, "databas": 17, "report": 17, "nucleic": 17, "re": 17, "magnan": 17, "randal": 17, "baldi": 17, "2009": 17, "accur": 17, "bioinformat": 17, "galiez": 17, "2016": 17, "tool": 17, "viral": 17, "song": 17, "2018": 17, "throughput": 17, "90": 17, "proteas": 17, "improv": 17, "accuraci": 17, "shen": 17, "2019": 17, "local": 17, "integr": 17, "evolutionari": 17, "chou": 17, "pseaac": 17, "j": 17, "theor": 17, "biol": 17, "tang": 17, "2020": 17, "teng": 17, "2021": 17, "pseudo": 17, "composit": 17, "tripeptid": 17, "bmc": 17, "yang": 17, "granular": 17, "appl": 17, "view": 19}, "objects": {"aaanalysis": [[3, 0, 1, "", "AAclust"], [4, 0, 1, "", "CPP"], [5, 0, 1, "", "SequenceFeature"], [6, 0, 1, "", "SplitRange"], [7, 0, 1, "", "dPULearn"], [8, 2, 1, "", "load_dataset"], [9, 2, 1, "", "load_scales"], [10, 2, 1, "", "plot_gcfs"], [11, 2, 1, "", "plot_set_legend"], [12, 2, 1, "", "plot_settings"]], "aaanalysis.AAclust": [[3, 1, 1, "", "__init__"], [3, 1, 1, "", "cluster_naming"], [3, 1, 1, "", "correlation"], [3, 1, 1, "", "fit"], [3, 1, 1, "", "get_cluster_centers"], [3, 1, 1, "", "get_cluster_medoids"]], "aaanalysis.CPP": [[4, 1, 1, "", "__init__"], [4, 1, 1, "", "add_positions"], [4, 1, 1, "", "add_sample_dif"], [4, 1, 1, "", "add_shap"], [4, 1, 1, "", "plot_heatmap"], [4, 1, 1, "", "plot_profile"], [4, 1, 1, "", "run"]], "aaanalysis.SequenceFeature": [[5, 1, 1, "", "__init__"], [5, 1, 1, "", "feat_matrix"], [5, 1, 1, "", "feat_names"], [5, 1, 1, "", "feat_value"], [5, 1, 1, "", "features"], [5, 1, 1, "", "get_df_parts"], [5, 1, 1, "", "get_split_kws"]], "aaanalysis.SplitRange": [[6, 1, 1, "", "__init__"], [6, 1, 1, "", "labels_pattern"], [6, 1, 1, "", "labels_periodicpattern"], [6, 1, 1, "", "labels_segment"], [6, 1, 1, "", "pattern"], [6, 1, 1, "", "periodicpattern"], [6, 1, 1, "", "segment"]], "aaanalysis.dPULearn": [[7, 1, 1, "", "__init__"], [7, 1, 1, "", "fit"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "function", "Python function"]}, "titleterms": {"tabl": [1, 13], "project": 1, "overview": [1, 13], "protein": [1, 19], "benchmark": [1, 17], "dataset": [1, 17], "amino": 1, "acid": 1, "scale": 1, "api": 2, "data": [2, 19], "load": [2, 19], "aaclust": [2, 3], "cpp": [2, 4], "modul": 2, "dpulearn": [2, 7], "plot": 2, "util": 2, "aaanalysi": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], "sequencefeatur": 5, "splitrang": 6, "load_dataset": 8, "load_scal": 9, "plot_gcf": 10, "plot_set_legend": 11, "plot_set": 12, "welcom": 13, "document": 13, "instal": 13, "exampl": 13, "refer": [13, 17], "indic": 13, "citat": 13, "introduct": 16, "workflow": 16, "algorithm": 17, "us": 17, "case": 17, "further": 17, "inform": 17, "usag": 18, "principl": 18, "tutori": 19, "redund": 19, "reduct": 19, "featur": 19, "engin": 19, "identif": 19, "neg": 19, "predict": 19}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx.ext.intersphinx": 1, "sphinx": 57}, "alltitles": {"Tables for the Project": [[1, "tables-for-the-project"]], "Overview of Tables": [[1, "overview-of-tables"]], "Protein benchmark datasets": [[1, "protein-benchmark-datasets"]], "Amino acid scale datasets": [[1, "amino-acid-scale-datasets"]], "API": [[2, "api"]], "Data Loading": [[2, "data-loading"]], "AAclust": [[2, "aaclust"]], "CPP Module": [[2, "cpp-module"]], "dPUlearn": [[2, "dpulearn"]], "Plot Utilities": [[2, "plot-utilities"]], "aaanalysis.AAclust": [[3, "aaanalysis-aaclust"]], "aaanalysis.CPP": [[4, "aaanalysis-cpp"]], "aaanalysis.SequenceFeature": [[5, "aaanalysis-sequencefeature"]], "aaanalysis.SplitRange": [[6, "aaanalysis-splitrange"]], "aaanalysis.dPULearn": [[7, "aaanalysis-dpulearn"]], "aaanalysis.load_dataset": [[8, "aaanalysis-load-dataset"]], "aaanalysis.load_scales": [[9, "aaanalysis-load-scales"]], "aaanalysis.plot_gcfs": [[10, "aaanalysis-plot-gcfs"]], "aaanalysis.plot_set_legend": [[11, "aaanalysis-plot-set-legend"]], "aaanalysis.plot_settings": [[12, "aaanalysis-plot-settings"]], "Welcome to the AAanalysis documentation": [[13, "welcome-to-the-aaanalysis-documentation"]], "Install": [[13, "install"]], "OVERVIEW": [[13, null]], "EXAMPLES": [[13, null]], "REFERENCES": [[13, null]], "Indices and tables": [[13, "indices-and-tables"]], "Citation": [[13, "citation"]], "Introduction": [[16, "introduction"]], "Workflow": [[16, "workflow"]], "References": [[17, "references"]], "Algorithms": [[17, "algorithms"]], "Datasets and Benchmarks": [[17, "datasets-and-benchmarks"]], "Use Cases": [[17, "use-cases"]], "Further Information": [[17, "further-information"]], "Usage Principles": [[18, "usage-principles"]], "Tutorials": [[19, "tutorials"]], "Data loading": [[19, "data-loading"]], "Redundancy-reduction": [[19, "redundancy-reduction"]], "Feature engineering": [[19, "feature-engineering"]], "Identification of negatives": [[19, "identification-of-negatives"]], "Protein prediction": [[19, "protein-prediction"]]}, "indexentries": {"aaclust (class in aaanalysis)": [[3, "aaanalysis.AAclust"]], "__init__() (aaanalysis.aaclust method)": [[3, "aaanalysis.AAclust.__init__"]], "cluster_naming() (aaanalysis.aaclust method)": [[3, "aaanalysis.AAclust.cluster_naming"]], "correlation() (aaanalysis.aaclust static method)": [[3, "aaanalysis.AAclust.correlation"]], "fit() (aaanalysis.aaclust method)": [[3, "aaanalysis.AAclust.fit"]], "get_cluster_centers() (aaanalysis.aaclust static method)": [[3, "aaanalysis.AAclust.get_cluster_centers"]], "get_cluster_medoids() (aaanalysis.aaclust static method)": [[3, "aaanalysis.AAclust.get_cluster_medoids"]], "cpp (class in aaanalysis)": [[4, "aaanalysis.CPP"]], "__init__() (aaanalysis.cpp method)": [[4, "aaanalysis.CPP.__init__"]], "add_positions() (aaanalysis.cpp method)": [[4, "aaanalysis.CPP.add_positions"]], "add_sample_dif() (aaanalysis.cpp method)": [[4, "aaanalysis.CPP.add_sample_dif"]], "add_shap() (aaanalysis.cpp static method)": [[4, "aaanalysis.CPP.add_shap"]], "plot_heatmap() (aaanalysis.cpp method)": [[4, "aaanalysis.CPP.plot_heatmap"]], "plot_profile() (aaanalysis.cpp method)": [[4, "aaanalysis.CPP.plot_profile"]], "run() (aaanalysis.cpp method)": [[4, "aaanalysis.CPP.run"]], "sequencefeature (class in aaanalysis)": [[5, "aaanalysis.SequenceFeature"]], "__init__() (aaanalysis.sequencefeature method)": [[5, "aaanalysis.SequenceFeature.__init__"]], "feat_matrix() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.feat_matrix"]], "feat_names() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.feat_names"]], "feat_value() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.feat_value"]], "features() (aaanalysis.sequencefeature method)": [[5, "aaanalysis.SequenceFeature.features"]], "get_df_parts() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.get_df_parts"]], "get_split_kws() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.get_split_kws"]], "splitrange (class in aaanalysis)": [[6, "aaanalysis.SplitRange"]], "__init__() (aaanalysis.splitrange method)": [[6, "aaanalysis.SplitRange.__init__"]], "labels_pattern() (aaanalysis.splitrange static method)": [[6, "aaanalysis.SplitRange.labels_pattern"]], "labels_periodicpattern() (aaanalysis.splitrange static method)": [[6, "aaanalysis.SplitRange.labels_periodicpattern"]], "labels_segment() (aaanalysis.splitrange static method)": [[6, "aaanalysis.SplitRange.labels_segment"]], "pattern() (aaanalysis.splitrange static method)": [[6, "aaanalysis.SplitRange.pattern"]], "periodicpattern() (aaanalysis.splitrange static method)": [[6, "aaanalysis.SplitRange.periodicpattern"]], "segment() (aaanalysis.splitrange static method)": [[6, "aaanalysis.SplitRange.segment"]], "__init__() (aaanalysis.dpulearn method)": [[7, "aaanalysis.dPULearn.__init__"]], "dpulearn (class in aaanalysis)": [[7, "aaanalysis.dPULearn"]], "fit() (aaanalysis.dpulearn method)": [[7, "aaanalysis.dPULearn.fit"]], "load_dataset() (in module aaanalysis)": [[8, "aaanalysis.load_dataset"]], "load_scales() (in module aaanalysis)": [[9, "aaanalysis.load_scales"]], "plot_gcfs() (in module aaanalysis)": [[10, "aaanalysis.plot_gcfs"]], "plot_set_legend() (in module aaanalysis)": [[11, "aaanalysis.plot_set_legend"]], "plot_settings() (in module aaanalysis)": [[12, "aaanalysis.plot_settings"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["_resources/overview", "_resources/tables", "api", "generated/aaanalysis.AAclust", "generated/aaanalysis.CPP", "generated/aaanalysis.SequenceFeature", "generated/aaanalysis.SplitRange", "generated/aaanalysis.dPULearn", "generated/aaanalysis.load_dataset", "generated/aaanalysis.load_scales", "generated/aaanalysis.plot_gcfs", "generated/aaanalysis.plot_set_legend", "generated/aaanalysis.plot_settings", "index", "index/badges", "index/citations", "index/introduction", "index/references", "index/usage_principles", "tutorials"], "filenames": ["_resources/overview.rst", "_resources/tables.rst", "api.rst", "generated/aaanalysis.AAclust.rst", "generated/aaanalysis.CPP.rst", "generated/aaanalysis.SequenceFeature.rst", "generated/aaanalysis.SplitRange.rst", "generated/aaanalysis.dPULearn.rst", "generated/aaanalysis.load_dataset.rst", "generated/aaanalysis.load_scales.rst", "generated/aaanalysis.plot_gcfs.rst", "generated/aaanalysis.plot_set_legend.rst", "generated/aaanalysis.plot_settings.rst", "index.rst", "index/badges.rst", "index/citations.rst", "index/introduction.rst", "index/references.rst", "index/usage_principles.rst", "tutorials.rst"], "titles": ["<no title>", "Tables", "API", "aaanalysis.AAclust", "aaanalysis.CPP", "aaanalysis.SequenceFeature", "aaanalysis.SplitRange", "aaanalysis.dPULearn", "aaanalysis.load_dataset", "aaanalysis.load_scales", "aaanalysis.plot_gcfs", "aaanalysis.plot_set_legend", "aaanalysis.plot_settings", "Welcome to the AAanalysis documentation", "<no title>", "<no title>", "Introduction", "References", "Usage Principles", "Tutorials"], "terms": {"aaanalysi": [0, 2, 15, 16, 18], "amino": [0, 3, 4, 5, 6, 8, 9, 13, 15, 16, 17], "acid": [0, 3, 4, 5, 6, 8, 9, 13, 15, 16, 17], "analysi": [0, 7, 13, 16], "i": [0, 2, 3, 4, 5, 7, 9, 10, 13, 16], "python": [0, 13, 16], "framework": [0, 3, 13, 16], "interpret": [0, 13, 15, 16, 17], "sequenc": [0, 1, 4, 5, 6, 7, 8, 13, 16, 17], "base": [0, 3, 5, 7, 13, 16, 17], "protein": [0, 5, 6, 8, 13, 16, 17], "predict": [0, 1, 8, 13, 16, 17], "provid": [0, 3, 4, 7, 8, 9, 13], "follow": [0, 2, 3, 4, 5, 7, 13, 15, 16], "algorithm": [0, 4, 13, 16], "aaclust": [0, 8, 13, 15, 16, 17, 19], "k": [0, 3, 13, 16, 17], "optim": [0, 3, 4, 13, 16, 17], "cluster": [0, 3, 13, 16, 17], "wrapper": [0, 13, 16], "select": [0, 3, 4, 8, 9, 13, 16, 17], "redund": [0, 3, 4, 13, 16, 17], "reduc": [0, 3, 7, 13, 16, 17], "set": [0, 1, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 16, 17], "numer": [0, 3, 5, 13, 16], "scale": [0, 3, 4, 5, 9, 12, 13, 15, 16, 17], "e": [0, 5, 6, 12, 13, 16], "g": [0, 5, 6, 12, 13, 16], "cpp": [0, 5, 6, 13, 15, 16, 19], "compar": [0, 13, 16], "physicochem": [0, 4, 5, 13, 16, 17], "profil": [0, 13, 16], "featur": [0, 3, 4, 5, 6, 7, 13, 16], "engin": [0, 13, 16], "two": [0, 4, 13, 16, 17], "identifi": [0, 4, 7, 13, 16, 17], "most": [0, 3, 4, 7, 13, 16], "distinct": [0, 5, 13, 16], "dpulearn": [0, 13, 15, 16, 19], "determinist": [0, 7, 13, 16], "posit": [0, 1, 3, 4, 5, 7, 13, 16], "unlabel": [0, 7, 8, 13, 16], "pu": [0, 1, 7, 13, 16], "learn": [0, 3, 7, 13, 15, 16, 17], "enabl": [0, 3, 4, 5, 7, 12, 13, 16], "train": [0, 13, 16], "unbalanc": [0, 13, 16], "small": [0, 7, 13, 16], "dataset": [0, 3, 4, 8, 9, 13, 16], "moreov": [0, 13], "function": [0, 2, 10, 12, 13], "load": [0, 8, 9, 13], "benchmark": [0, 8, 13], "load_data": [0, 13], "load_scal": [0, 1, 4, 5, 13], "depth": [0, 13], "level": [0, 1, 13], "classif": [0, 1, 9, 13], "aaontologi": [0, 1, 13, 15, 17], "descript": 1, "see": 1, "also": [1, 7], "1_overview_benchmark": 1, "aa": [1, 2, 4, 5, 7, 11, 12, 18], "load_dataset": [1, 2, 5], "2_overview_scal": 1, "neg": [1, 3, 5, 7], "predictor": 1, "refer": [1, 2, 3, 4, 5, 8], "label": [1, 3, 4, 5, 6, 7, 11], "aa_caspase3": [1, 8], "233": 1, "185605": 1, "705": 1, "184900": 1, "prosper": [1, 17], "caspas": 1, "3": [1, 3, 5, 7, 11], "cleavag": [1, 17], "site": [1, 17], "song18": [1, 17], "1": [1, 3, 4, 5, 6, 7, 8, 11, 12], "adjac": 1, "0": [1, 3, 4, 5, 7, 11, 12], "aa_furin": [1, 8], "71": 1, "59003": 1, "163": 1, "58840": 1, "furin": 1, "aa_ldr": [1, 8], "342": 1, "118248": 1, "35469": 1, "82779": 1, "idp": [1, 17], "seq2seq": [1, 17], "long": 1, "intrins": [1, 17], "disord": [1, 17], "region": [1, 17], "ldr": 1, "tang20": [1, 17], "order": [1, 3], "aa_mmp2": [1, 8], "573": 1, "312976": 1, "2416": 1, "310560": 1, "matrix": [1, 3, 5, 7], "metallopeptidas": 1, "2": [1, 3, 4, 5, 6, 7, 11], "mmp2": 1, "aa_rnabind": [1, 8], "221": 1, "55001": 1, "6492": 1, "48509": 1, "gmksvm": 1, "ru": 1, "rna": [1, 17], "bind": [1, 17], "residu": [1, 8, 17], "rbp60": 1, "yang21": [1, 17], "non": [1, 4, 5, 8], "aa_sa": [1, 8], "101082": 1, "84523": 1, "solvent": 1, "access": [1, 2], "sa": 1, "data": [1, 3, 13], "expos": 1, "buri": 1, "seq_amylo": [1, 8], "1414": 1, "8484": 1, "511": 1, "903": 1, "rerf": [1, 17], "pred": [1, 17], "amyloidognen": 1, "teng21": [1, 17], "amyloidogen": [1, 17], "seq_capsid": [1, 8], "7935": 1, "3364680": 1, "3864": 1, "4071": 1, "viralpro": [1, 17], "capdsid": 1, "galiez16": [1, 17], "capsid": [1, 17], "seq_disulfid": [1, 8], "2547": 1, "614470": 1, "897": 1, "1650": 1, "dipro": 1, "disulfid": 1, "bridg": [1, 17], "cheng06": [1, 17], "ss": 1, "bond": 1, "without": [1, 3], "seq_loc": [1, 8], "1835": 1, "732398": 1, "1045": 1, "790": 1, "nan": 1, "subcellular": [1, 17], "locat": [1, 11], "cytoplasm": 1, "v": 1, "plasma": 1, "membran": [1, 5], "shen19": [1, 17], "seq_solubl": [1, 8], "17408": 1, "4432269": 1, "8704": 1, "solpro": [1, 17], "solubl": [1, 17], "insolubl": 1, "magnan09": [1, 17], "seq_tail": [1, 8], "6668": 1, "2671690": 1, "2574": 1, "4094": 1, "tail": [1, 17], "domain": [1, 5, 8], "dom_gsec": [1, 8], "126": 1, "92964": 1, "63": 1, "gamma": 1, "secretas": [1, 17], "substrat": [1, 17], "breimann23c": [1, 17], "dom_gsec_pu": [1, 8], "694": 1, "494524": 1, "unknown": 1, "statu": 1, "min": 1, "max": 1, "normal": [1, 11], "586": 1, "breimann23b": [1, 13, 15, 17], "scales_raw": [1, 9], "raw": 1, "valu": [1, 3, 4, 5, 16], "kawashima08": [1, 17], "scales_classif": 1, "scales_pc": [1, 9], "princip": [1, 7], "compon": [1, 5, 7], "pc": [1, 7], "compress": 1, "20": [1, 4, 5], "breimann23a": [1, 17], "top60": [1, 9], "top": [1, 3], "60": 1, "subset": [1, 5], "top60_ev": [1, 9], "evalu": [1, 4], "thi": [2, 3, 12], "page": [2, 13], "contain": [2, 4, 7, 8], "public": [2, 13, 15], "object": 2, "For": [2, 3, 5, 8, 11], "more": 2, "exampl": [2, 5, 7, 11, 12], "practic": 2, "usag": [2, 13], "our": 2, "notebook": [2, 19], "conveni": 2, "common": 2, "import": [2, 5, 7, 11, 12, 18], "Then": 2, "you": [2, 13, 15], "can": [2, 3, 5, 7, 11, 13, 16], "all": [2, 3, 4, 5, 6, 12], "method": [2, 3, 4, 5, 6, 7, 17], "via": [2, 17], "alia": [2, 5], "class": [3, 4, 5, 6, 7, 8], "model": [3, 7], "none": [3, 4, 5, 6, 7, 8, 11], "model_kwarg": 3, "verbos": [3, 4, 5, 7, 12], "fals": [3, 4, 5, 7, 9, 11, 12], "sourc": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12], "A": [3, 5, 11, 16, 17], "design": 3, "primarili": [3, 7], "versatil": 3, "enough": 3, "ani": [3, 16], "indic": [3, 5, 6, 7], "It": [3, 16], "take": 3, "requir": 3, "pre": [3, 4], "defin": [3, 5], "number": [3, 4, 5, 7, 8, 11], "from": [3, 4, 5, 7, 9, 13], "scikit": 3, "http": 3, "org": 3, "stabl": 3, "modul": [3, 13], "html": 3, "By": 3, "leverag": 3, "pearson": [3, 4], "correl": [3, 4], "similar": 3, "measur": 3, "one": 3, "repres": [3, 16], "sampl": [3, 4, 5, 7], "term": 3, "medoid": 3, "each": [3, 4, 5, 7], "which": [3, 5, 10, 16], "closest": 3, "": [3, 11, 17], "center": 3, "yield": 3, "paramet": [3, 4, 5, 6, 7, 8, 9, 11, 12], "callabl": 3, "option": [3, 4, 5, 7, 8, 9, 12], "default": [3, 4, 5, 7, 8, 9, 11, 12], "sklearn": 3, "kmean": 3, "The": [3, 4, 5, 7, 9, 11], "emploi": [3, 7], "given": [3, 5, 6, 9], "n_cluster": 3, "dict": [3, 4, 5, 7, 11], "dictionari": [3, 4, 5, 11], "keyword": [3, 7], "argument": [3, 5, 7, 11], "pass": [3, 7, 11], "bool": [3, 4, 5, 7, 9, 11, 12], "flag": 3, "disabl": 3, "output": [3, 5, 6, 7, 12], "attribut": [3, 7], "int": [3, 4, 5, 7, 8, 11], "obtain": [3, 5], "labels_": [3, 7], "arrai": [3, 4, 5, 7], "like": [3, 4, 5, 7], "centers_": 3, "averag": [3, 5], "correspond": 3, "center_labels_": 3, "medoids_": 3, "medoid_labels_": 3, "medoid_ind_": 3, "chosen": [3, 4, 5], "within": [3, 4, 5], "origin": 3, "__init__": [3, 4, 5, 6, 7], "cluster_nam": 3, "name": [3, 4, 5, 7, 8, 9, 12], "name_unclassifi": 3, "unclassifi": [3, 9], "assign": [3, 5, 7], "frequenc": 3, "renam": 3, "prioriti": 3, "frequent": 3, "If": [3, 4, 5, 7, 8, 12, 13, 15], "alreadi": 3, "us": [3, 4, 7, 8, 12, 13, 15, 16], "doe": 3, "exist": 3, "list": [3, 5, 6, 11], "fit": [3, 7], "str": [3, 5, 7, 8, 9, 11, 12], "cannot": 3, "classifi": 3, "return": [3, 4, 5, 6, 7, 8, 9, 11], "static": [3, 4, 5, 6], "x_test": 3, "x_ref": 3, "labels_test": 3, "labels_ref": 3, "n": [3, 4, 5, 8, 17], "true": [3, 4, 5, 9, 11, 12], "on_cent": 3, "except_unclassifi": 3, "comput": [3, 4, 5, 17], "test": [3, 4, 6], "_data": [3, 7, 8], "consid": 3, "strength": 3, "els": 3, "otherwis": [3, 5, 7], "member": 3, "exclud": 3, "list_top_center_name_corr": 3, "have": 3, "strongest": 3, "x": [3, 7, 8, 11, 12], "min_th": 3, "merge_metr": 3, "euclidean": [3, 7], "format": [3, 12], "determin": 3, "specif": [3, 8], "partit": 3, "maxim": 3, "beyond": 3, "threshold": [3, 4], "qualiti": 3, "either": [3, 5, 13], "minimum": [3, 5, 6, 8], "min_cor": 3, "between": [3, 4, 5, 11], "its": 3, "govern": 3, "undergo": 3, "three": [3, 5, 8], "stage": 3, "estim": 3, "lower": 3, "bound": 3, "refin": 3, "metric": [3, 7], "merg": 3, "smaller": 3, "direct": 3, "final": 3, "reduct": 3, "shape": [3, 4, 5, 7, 11], "n_sampl": [3, 4, 5, 7], "n_featur": [3, 4, 5, 7], "where": [3, 5, 7], "appli": [3, 7, 11, 12], "float": [3, 4, 7, 11, 12], "instead": 3, "names_medoid": 3, "note": [3, 4, 5, 7, 8, 12], "attr": 3, "further": [3, 8, 13], "inform": [3, 4, 5, 7, 8, 13], "paper": [3, 8], "todo": [3, 4, 8], "add": [3, 4, 5, 8], "link": [3, 4, 8, 13, 15, 17], "get_cluster_cent": 3, "center_label": 3, "associ": 3, "get_cluster_medoid": 3, "medoid_label": 3, "medoid_ind": 3, "index": [3, 13, 17], "df_scale": [4, 5], "df_cat": [4, 5], "df_part": [4, 5], "split_kw": [4, 5], "accept_gap": [4, 5], "creat": [4, 5, 6, 7], "filter": [4, 8], "ar": [4, 5, 7, 8, 9], "discrimin": 4, "panda": [4, 5, 7, 8, 9], "datafram": [4, 5, 7, 8, 9], "ut": [4, 10], "str_scale_cat": 4, "load_categori": [4, 5], "categori": [4, 5, 9, 11], "part": [4, 5], "sequencefeatur": [4, 6], "get_split_kw": [4, 5], "nest": [4, 5], "split_typ": [4, 5], "whether": [4, 5, 9, 11], "accept": [4, 5], "miss": [4, 5], "omit": [4, 5], "print": [4, 5], "progress": [4, 17], "about": 4, "run": [4, 5], "perform": [4, 7], "step": [4, 5, 6, 16], "eval": 4, "df_feat": [4, 5], "get": [4, 5, 6, 10], "parametr": 4, "n_filter": 4, "100": 4, "tmd_len": [4, 5], "jmd_n_len": [4, 5], "10": [4, 5], "jmd_c_len": [4, 5], "ext_len": [4, 5], "4": [4, 5, 6], "start": [4, 5], "check_cat": 4, "n_pre_filt": 4, "pct_pre_filt": 4, "5": [4, 5, 7, 11], "max_std_test": 4, "max_overlap": 4, "max_cor": 4, "n_process": 4, "pipelin": 4, "creation": 4, "aim": 4, "collect": 4, "group": [4, 5], "t": 4, "u": [4, 13], "p": [4, 17], "percentag": [4, 7], "length": [4, 5, 6, 8], "tmd": [4, 5], "explan": 4, "first": [4, 5], "terminu": [4, 5], "jmd": [4, 5], "c": [4, 5, 13, 17], "extend": [4, 5], "termin": [4, 5], "should": [4, 5, 7, 9], "longer": 4, "than": 4, "check": 4, "remain": 4, "after": 4, "maximum": [4, 5, 6, 7, 8], "standard": 4, "deviat": 4, "overlap": 4, "cpu": 4, "multiprocess": 4, "automat": [4, 7], "n_feature_inform": 4, "uniqu": 4, "statist": 4, "eleven": 4, "column": [4, 5, 7, 8, 11], "includ": [4, 5, 9, 11], "id": [4, 5], "result": 4, "rank": 4, "11": [4, 11], "split": [4, 5, 6], "subcategori": 4, "sub": 4, "scale_nam": 4, "abs_auc": 4, "absolut": 4, "adjust": [4, 12], "auc": 4, "abs_mean_dif": 4, "mean": 4, "differ": [4, 5, 11], "std_test": 4, "std_ref": 4, "p_val": 4, "mann_whitnei": 4, "ttest_indep": 4, "p_val_fdr_bh": 4, "benjamini": 4, "hochberg": 4, "fdr": 4, "correct": 4, "retriev": 5, "continu": 5, "transmembran": 5, "principl": [5, 13], "segment": [5, 6], "pattern": [5, 6], "properti": [5, 8], "express": 5, "present": 5, "realiz": 5, "over": 5, "valid": 5, "tmd_e": 5, "tmd_n": 5, "tmd_c": 5, "jmd_n": 5, "jmd_c": 5, "ext_c": 5, "ext_n": 5, "tmd_jmd": 5, "jmd_n_tmd_n": 5, "tmd_c_jmd_c": 5, "ext_n_tmd_n": 5, "tmd_c_ext_c": 5, "arg": [5, 6], "kwarg": [5, 6, 11], "add_dif": 5, "df_seq": [5, 7], "sample_nam": 5, "ref_group": 5, "add_feat_valu": 5, "dict_scal": 5, "combin": 5, "convent": 5, "map": [5, 11], "letter": 5, "feature_valu": 5, "n_part": 5, "ha": 5, "form": 5, "structur": [5, 17], "type": [5, 8, 11], "th": 5, "n_split": 5, "resp": 5, "p1": 5, "p2": 5, "pn": 5, "end": 5, "periodicpattern": [5, 6], "step1": 5, "step2": 5, "size": [5, 6, 10, 11, 12], "odd": 5, "even": 5, "give": 5, "integ": [5, 6], "7": [5, 7, 12], "add_posit": 5, "part_split": 5, "string": 5, "pd": [5, 7], "seri": 5, "condit": 5, "jmd_m_len": 5, "feat_posit": 5, "feat_nam": 5, "total": [5, 7], "feat_matrix": 5, "n_job": 5, "return_label": 5, "job": 5, "parallel": 5, "addit": [5, 7, 11, 12], "spars": 5, "convert": 5, "depend": 5, "last": 5, "get_df_part": 5, "list_part": 5, "all_part": 5, "datafran": 5, "compris": 5, "tmd_start": 5, "tmd_stop": 5, "len": 5, "must": 5, "lenght": 5, "extra": 5, "possibl": [5, 6], "found": 5, "sf": 5, "gsec_sub_seq": 5, "get_featur": 5, "n_split_min": [5, 6], "n_split_max": [5, 6], "15": [5, 6], "steps_pattern": 5, "n_min": [5, 6], "n_max": [5, 6], "len_max": [5, 6], "steps_periodicpattern": 5, "greater": 5, "specifi": [5, 7, 12], "greatest": 5, "whole": [5, 8], "specfii": 5, "smallest": 5, "6": [5, 8], "8": [5, 7], "vari": 5, "paramt": 5, "gener": [5, 8, 12, 16, 17], "argumetn": 5, "rang": 6, "multipl": [6, 17], "labels_pattern": 6, "match": [6, 17], "element": [6, 12], "labels_periodicpattern": 6, "labels_seg": 6, "seq": 6, "seq_split": 6, "n_compon": 7, "pca_kwarg": 7, "offer": 7, "approach": 7, "pca": 7, "dimension": [7, 17], "space": [7, 11], "iter": 7, "reliabl": 7, "These": 7, "those": 7, "distant": 7, "altern": 7, "distanc": 7, "manhattan": 7, "cosin": 7, "80": 7, "cover": 7, "varianc": 7, "when": 7, "identif": [7, 17], "inspir": 7, "techniqu": 7, "an": [7, 13, 15, 17], "theoret": 7, "recommend": 7, "high": [7, 17], "datapoint": 7, "n_neg": 7, "label_po": 7, "name_neg": 7, "rel_neg": 7, "col_class": 7, "newli": 7, "updat": 7, "new": 7, "store": 7, "Will": 7, "dure": 7, "initi": 7, "datafor": 7, "conta": 7, "po": 7, "unl": 7, "numpi": 7, "np": 7, "atgc": 7, "gcta": 7, "actg": 7, "tacg": 7, "mode": 7, "modifi": [7, 8, 12], "dpul": 7, "info": 8, "non_canonical_aa": 8, "remov": [8, 12], "min_len": 8, "max_len": 8, "overview": 8, "_pu": 8, "per": 8, "keep": 8, "gap": 8, "canon": 8, "kept": 8, "replac": 8, "symbol": 8, "df": [8, 9], "just_aaindex": 9, "unclassified_in": 9, "scale_classif": 9, "other": 9, "count": 9, "onli": 9, "relev": 9, "aaindex": [9, 17], "current": 10, "font": [10, 11, 12], "plot_set": 10, "ax": 11, "handl": 11, "dict_color": 11, "list_cat": 11, "y": [11, 12], "ncol": 11, "fontsiz": 11, "weight": [11, 17], "lw": 11, "edgecolor": 11, "return_handl": 11, "loc": 11, "upper": 11, "left": 11, "labelspac": 11, "columnspac": 11, "titl": 11, "fontsize_legend": 11, "title_align_left": 11, "fontsize_weight": 11, "customiz": 11, "legend": 11, "plot": [11, 12, 13], "matplotlib": [11, 12], "attach": 11, "item": 11, "color": 11, "coordin": 11, "anchor": 11, "point": 11, "text": [11, 12], "line": 11, "width": 11, "edg": 11, "vertic": 11, "horizont": 11, "align": 11, "marker": 11, "directli": 11, "finer": 11, "control": 11, "pyplot": 11, "detail": [11, 13, 15], "how": 11, "custom": [11, 12], "line2d": 11, "cat1": 11, "red": 11, "cat2": 11, "blue": 11, "o": 11, "fig_format": 12, "pdf": 12, "grid": 12, "grid_axi": 12, "font_scal": 12, "arial": 12, "change_s": 12, "weight_bold": 12, "adjust_el": 12, "short_tick": 12, "no_tick": 12, "no_ticks_i": 12, "short_ticks_i": 12, "no_ticks_x": 12, "short_ticks_x": 12, "configur": 12, "visual": 12, "variou": 12, "file": 12, "save": 12, "make": 12, "visibl": 12, "choos": 12, "axi": 12, "both": 12, "san": 12, "serif": 12, "verdana": 12, "helvetica": 12, "dejavu": 12, "appear": 12, "bold": 12, "layout": 12, "short": 12, "tick": 12, "mark": 12, "global": 12, "seaborn": 12, "librari": 12, "pypi": 13, "conda": 13, "forg": 13, "pip": 13, "introduct": 13, "tutori": 13, "api": 13, "util": 13, "case": 13, "search": 13, "your": [13, 15], "work": [13, 15], "pleas": [13, 15], "cite": [13, 15], "respect": [13, 15], "avail": [13, 15, 17], "_": [13, 15], "breimann": [13, 15, 17], "kamp": [13, 15], "steiner": [13, 15], "frishman": [13, 15], "2023": [13, 15], "ontologi": [13, 15, 17], "machin": [13, 15, 17], "biorxiv": [13, 15, 17], "citat": 15, "wa": 16, "develop": 16, "biolog": 16, "typic": 16, "consist": 16, "et": 17, "al": 17, "2023a": 17, "2023b": 17, "2023c": 17, "chart": 17, "\u03b3": 17, "explain": 17, "ai": 17, "cheng": 17, "2006": 17, "larg": 17, "disulphid": 17, "kernel": 17, "recurs": 17, "neural": 17, "network": 17, "graph": 17, "struct": 17, "funct": 17, "kawashima": 17, "2008": 17, "aid": 17, "databas": 17, "report": 17, "nucleic": 17, "re": 17, "magnan": 17, "randal": 17, "baldi": 17, "2009": 17, "accur": 17, "bioinformat": 17, "galiez": 17, "2016": 17, "tool": 17, "viral": 17, "song": 17, "2018": 17, "throughput": 17, "90": 17, "proteas": 17, "improv": 17, "accuraci": 17, "shen": 17, "2019": 17, "local": 17, "integr": 17, "evolutionari": 17, "chou": 17, "pseaac": 17, "j": 17, "theor": 17, "biol": 17, "tang": 17, "2020": 17, "teng": 17, "2021": 17, "pseudo": 17, "composit": 17, "tripeptid": 17, "bmc": 17, "yang": 17, "granular": 17, "appl": 17, "view": 19}, "objects": {"aaanalysis": [[3, 0, 1, "", "AAclust"], [4, 0, 1, "", "CPP"], [5, 0, 1, "", "SequenceFeature"], [6, 0, 1, "", "SplitRange"], [7, 0, 1, "", "dPULearn"], [8, 2, 1, "", "load_dataset"], [9, 2, 1, "", "load_scales"], [10, 2, 1, "", "plot_gcfs"], [11, 2, 1, "", "plot_set_legend"], [12, 2, 1, "", "plot_settings"]], "aaanalysis.AAclust": [[3, 1, 1, "", "__init__"], [3, 1, 1, "", "cluster_naming"], [3, 1, 1, "", "correlation"], [3, 1, 1, "", "fit"], [3, 1, 1, "", "get_cluster_centers"], [3, 1, 1, "", "get_cluster_medoids"]], "aaanalysis.CPP": [[4, 1, 1, "", "__init__"], [4, 1, 1, "", "eval"], [4, 1, 1, "", "run"]], "aaanalysis.SequenceFeature": [[5, 1, 1, "", "__init__"], [5, 1, 1, "", "add_dif"], [5, 1, 1, "", "add_feat_value"], [5, 1, 1, "", "add_position"], [5, 1, 1, "", "feat_matrix"], [5, 1, 1, "", "feat_names"], [5, 1, 1, "", "get_df_parts"], [5, 1, 1, "", "get_features"], [5, 1, 1, "", "get_split_kws"]], "aaanalysis.SplitRange": [[6, 1, 1, "", "__init__"], [6, 1, 1, "", "labels_pattern"], [6, 1, 1, "", "labels_periodicpattern"], [6, 1, 1, "", "labels_segment"], [6, 1, 1, "", "pattern"], [6, 1, 1, "", "periodicpattern"], [6, 1, 1, "", "segment"]], "aaanalysis.dPULearn": [[7, 1, 1, "", "__init__"], [7, 1, 1, "", "fit"]]}, "objtypes": {"0": "py:class", "1": "py:method", "2": "py:function"}, "objnames": {"0": ["py", "class", "Python class"], "1": ["py", "method", "Python method"], "2": ["py", "function", "Python function"]}, "titleterms": {"tabl": [1, 13], "overview": [1, 13], "protein": [1, 19], "benchmark": [1, 17], "dataset": [1, 17], "amino": 1, "acid": 1, "scale": 1, "api": 2, "data": [2, 19], "load": [2, 19], "aaclust": [2, 3], "cpp": [2, 4], "modul": 2, "dpulearn": [2, 7], "plot": 2, "util": 2, "aaanalysi": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], "sequencefeatur": 5, "splitrang": 6, "load_dataset": 8, "load_scal": 9, "plot_gcf": 10, "plot_set_legend": 11, "plot_set": 12, "welcom": 13, "document": 13, "instal": 13, "exampl": 13, "refer": [13, 17], "indic": 13, "citat": 13, "introduct": 16, "workflow": 16, "algorithm": 17, "us": 17, "case": 17, "further": 17, "inform": 17, "usag": 18, "principl": 18, "tutori": 19, "redund": 19, "reduct": 19, "featur": 19, "engin": 19, "identif": 19, "neg": 19, "predict": 19}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.viewcode": 1, "sphinx.ext.intersphinx": 1, "sphinx": 57}, "alltitles": {"Tables": [[1, "tables"]], "Overview Table": [[1, "overview-table"]], "Protein benchmark datasets": [[1, "protein-benchmark-datasets"]], "Amino acid scale datasets": [[1, "amino-acid-scale-datasets"]], "API": [[2, "api"]], "Data Loading": [[2, "data-loading"]], "AAclust": [[2, "aaclust"]], "CPP Module": [[2, "cpp-module"]], "dPUlearn": [[2, "dpulearn"]], "Plot Utilities": [[2, "plot-utilities"]], "aaanalysis.AAclust": [[3, "aaanalysis-aaclust"]], "aaanalysis.CPP": [[4, "aaanalysis-cpp"]], "aaanalysis.SequenceFeature": [[5, "aaanalysis-sequencefeature"]], "aaanalysis.SplitRange": [[6, "aaanalysis-splitrange"]], "aaanalysis.dPULearn": [[7, "aaanalysis-dpulearn"]], "aaanalysis.load_dataset": [[8, "aaanalysis-load-dataset"]], "aaanalysis.load_scales": [[9, "aaanalysis-load-scales"]], "aaanalysis.plot_gcfs": [[10, "aaanalysis-plot-gcfs"]], "aaanalysis.plot_set_legend": [[11, "aaanalysis-plot-set-legend"]], "aaanalysis.plot_settings": [[12, "aaanalysis-plot-settings"]], "Welcome to the AAanalysis documentation": [[13, "welcome-to-the-aaanalysis-documentation"]], "Install": [[13, "install"]], "OVERVIEW": [[13, null]], "EXAMPLES": [[13, null]], "REFERENCES": [[13, null]], "Indices and tables": [[13, "indices-and-tables"]], "Citation": [[13, "citation"]], "Introduction": [[16, "introduction"]], "Workflow": [[16, "workflow"]], "References": [[17, "references"]], "Algorithms": [[17, "algorithms"]], "Datasets and Benchmarks": [[17, "datasets-and-benchmarks"]], "Use Cases": [[17, "use-cases"]], "Further Information": [[17, "further-information"]], "Usage Principles": [[18, "usage-principles"]], "Tutorials": [[19, "tutorials"]], "Data loading": [[19, "data-loading"]], "Redundancy-reduction": [[19, "redundancy-reduction"]], "Feature engineering": [[19, "feature-engineering"]], "Identification of negatives": [[19, "identification-of-negatives"]], "Protein prediction": [[19, "protein-prediction"]]}, "indexentries": {"aaclust (class in aaanalysis)": [[3, "aaanalysis.AAclust"]], "__init__() (aaanalysis.aaclust method)": [[3, "aaanalysis.AAclust.__init__"]], "cluster_naming() (aaanalysis.aaclust method)": [[3, "aaanalysis.AAclust.cluster_naming"]], "correlation() (aaanalysis.aaclust static method)": [[3, "aaanalysis.AAclust.correlation"]], "fit() (aaanalysis.aaclust method)": [[3, "aaanalysis.AAclust.fit"]], "get_cluster_centers() (aaanalysis.aaclust static method)": [[3, "aaanalysis.AAclust.get_cluster_centers"]], "get_cluster_medoids() (aaanalysis.aaclust static method)": [[3, "aaanalysis.AAclust.get_cluster_medoids"]], "cpp (class in aaanalysis)": [[4, "aaanalysis.CPP"]], "__init__() (aaanalysis.cpp method)": [[4, "aaanalysis.CPP.__init__"]], "eval() (aaanalysis.cpp static method)": [[4, "aaanalysis.CPP.eval"]], "run() (aaanalysis.cpp method)": [[4, "aaanalysis.CPP.run"]], "sequencefeature (class in aaanalysis)": [[5, "aaanalysis.SequenceFeature"]], "__init__() (aaanalysis.sequencefeature method)": [[5, "aaanalysis.SequenceFeature.__init__"]], "add_dif() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.add_dif"]], "add_feat_value() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.add_feat_value"]], "add_position() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.add_position"]], "feat_matrix() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.feat_matrix"]], "feat_names() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.feat_names"]], "get_df_parts() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.get_df_parts"]], "get_features() (aaanalysis.sequencefeature method)": [[5, "aaanalysis.SequenceFeature.get_features"]], "get_split_kws() (aaanalysis.sequencefeature static method)": [[5, "aaanalysis.SequenceFeature.get_split_kws"]], "splitrange (class in aaanalysis)": [[6, "aaanalysis.SplitRange"]], "__init__() (aaanalysis.splitrange method)": [[6, "aaanalysis.SplitRange.__init__"]], "labels_pattern() (aaanalysis.splitrange static method)": [[6, "aaanalysis.SplitRange.labels_pattern"]], "labels_periodicpattern() (aaanalysis.splitrange static method)": [[6, "aaanalysis.SplitRange.labels_periodicpattern"]], "labels_segment() (aaanalysis.splitrange static method)": [[6, "aaanalysis.SplitRange.labels_segment"]], "pattern() (aaanalysis.splitrange static method)": [[6, "aaanalysis.SplitRange.pattern"]], "periodicpattern() (aaanalysis.splitrange static method)": [[6, "aaanalysis.SplitRange.periodicpattern"]], "segment() (aaanalysis.splitrange static method)": [[6, "aaanalysis.SplitRange.segment"]], "__init__() (aaanalysis.dpulearn method)": [[7, "aaanalysis.dPULearn.__init__"]], "dpulearn (class in aaanalysis)": [[7, "aaanalysis.dPULearn"]], "fit() (aaanalysis.dpulearn method)": [[7, "aaanalysis.dPULearn.fit"]], "load_dataset() (in module aaanalysis)": [[8, "aaanalysis.load_dataset"]], "load_scales() (in module aaanalysis)": [[9, "aaanalysis.load_scales"]], "plot_gcfs() (in module aaanalysis)": [[10, "aaanalysis.plot_gcfs"]], "plot_set_legend() (in module aaanalysis)": [[11, "aaanalysis.plot_set_legend"]], "plot_settings() (in module aaanalysis)": [[12, "aaanalysis.plot_settings"]]}}) \ No newline at end of file diff --git a/docs/build/html/tutorials.html b/docs/build/html/tutorials.html index ca6f8d06..f2e41999 100644 --- a/docs/build/html/tutorials.html +++ b/docs/build/html/tutorials.html @@ -136,7 +136,7 @@

          REFERENCES

          diff --git a/docs/build/plot_directive/generated/aaanalysis-CPP-1.py b/docs/build/plot_directive/generated/aaanalysis-CPP-1.py deleted file mode 100644 index b2eede39..00000000 --- a/docs/build/plot_directive/generated/aaanalysis-CPP-1.py +++ /dev/null @@ -1,6 +0,0 @@ -import matplotlib.pyplot as plt -import aaanalysis as aa -sf = aa.SequenceFeature() -df_seq = aa.load_dataset(name='SEQ_DISULFIDE', min_len=100) -labels = list(df_seq["label"]) -df_parts = sf.get_df_parts(df_seq=df_seq, jmd_n_len=10, jmd_c_len=10) diff --git a/docs/source/__pycache__/create_tables_doc.cpython-39.pyc b/docs/source/__pycache__/create_tables_doc.cpython-39.pyc index 2c9f83e5..8de38eb4 100644 Binary files a/docs/source/__pycache__/create_tables_doc.cpython-39.pyc and b/docs/source/__pycache__/create_tables_doc.cpython-39.pyc differ diff --git a/docs/source/_resources/tables.rst b/docs/source/_resources/tables.rst index 7a943185..a1488f89 100755 --- a/docs/source/_resources/tables.rst +++ b/docs/source/_resources/tables.rst @@ -1,11 +1,11 @@ -Tables for the Project +Tables ====================== .. contents:: :local: :depth: 1 -Overview of Tables +Overview Table ------------------ .. list-table:: :header-rows: 1 @@ -157,6 +157,26 @@ Protein benchmark datasets - Prediction of tail proteins - Galiez16 - 1 (tail protein), 0 (non-tail protein) + * - Domain + - DOM_GSEC + - 126 + - 92964 + - 63 + - 63 + - nan + - Prediction of gamma-secretase substrates + - Breimann23c + - 1 (substrate), 0 (non-substrate) + * - Domain + - DOM_GSEC_PU + - 694 + - 494524 + - 63 + - 0 + - nan + - Prediction of gamma-secretase substrates (PU dataset) + - Breimann23c + - 1 (substrate), 2 (unknown substrate status) Amino acid scale datasets ------------------------- @@ -177,7 +197,7 @@ Amino acid scale datasets - 586 - Kawashima08 * - scales_classification - - Classification of scales + - Classification of scales (Aaontology) - 586 - Breimann23b * - scales_pc diff --git a/docs/source/_resources/tables/1_overview_benchmarks.xlsx b/docs/source/_resources/tables/1_overview_benchmarks.xlsx index 1e2b435d..232d82b8 100644 Binary files a/docs/source/_resources/tables/1_overview_benchmarks.xlsx and b/docs/source/_resources/tables/1_overview_benchmarks.xlsx differ diff --git a/docs/source/_resources/tables/2_overview_scales.xlsx b/docs/source/_resources/tables/2_overview_scales.xlsx index bd63e07f..4565face 100644 Binary files a/docs/source/_resources/tables/2_overview_scales.xlsx and b/docs/source/_resources/tables/2_overview_scales.xlsx differ diff --git a/docs/source/conf.py b/docs/source/conf.py index c1e46df4..c69a6c1c 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -58,7 +58,7 @@ # 'sphinx.ext.napoleon', # Support for Numpy-style and Google-style docstrings # 'sphinx_autodoc_typehints', # Display Python type hints in documentation (needs to be after napoleon) # 'sphinx_book_theme', # Theme optimized for book-style content presentation - # 'pydata_sphinx_theme', # Theme with a focus on long-form content and optimized for data-focused libraries + # 'pydata_sphinx_theme', # Theme with a focus on long-form content and optimized for _data-focused libraries ] # -- Autodoc & Numpydoc settings ---------------------------------------------- diff --git a/docs/source/create_tables_doc.py b/docs/source/create_tables_doc.py index ac4be815..82d67445 100644 --- a/docs/source/create_tables_doc.py +++ b/docs/source/create_tables_doc.py @@ -73,7 +73,7 @@ def generate_table_rst(): list_tables = [_f_xlsx(on=True, file=x) for x in sorted(df_mapper[COL_MAP_TABLE])] _check_tables(list_tables) rst_content = _convert_excel_to_rst(df_mapper) - rst_content = f"Tables for the Project\n======================\n\n.. contents::\n :local:\n :depth: 1\n\nOverview of Tables\n------------------\n{rst_content}" + rst_content = f"Tables\n======================\n\n.. contents::\n :local:\n :depth: 1\n\nOverview Table\n------------------\n{rst_content}" for index, row in df_mapper.iterrows(): table_name = row[COL_MAP_TABLE] description = row[COL_DESCRIPTION] diff --git a/docs/source/generated/aaanalysis.AAclust.rst b/docs/source/generated/aaanalysis.AAclust.rst index 779af3a4..f6e6666c 100644 --- a/docs/source/generated/aaanalysis.AAclust.rst +++ b/docs/source/generated/aaanalysis.AAclust.rst @@ -16,6 +16,7 @@ ~AAclust.__init__ ~AAclust.cluster_naming ~AAclust.correlation + ~AAclust.eval ~AAclust.fit ~AAclust.get_cluster_centers ~AAclust.get_cluster_medoids diff --git a/docs/source/generated/aaanalysis.CPP.rst b/docs/source/generated/aaanalysis.CPP.rst index e1ddd88d..cdb62bd1 100644 --- a/docs/source/generated/aaanalysis.CPP.rst +++ b/docs/source/generated/aaanalysis.CPP.rst @@ -14,13 +14,8 @@ .. autosummary:: ~CPP.__init__ - ~CPP.add_positions - ~CPP.add_sample_dif - ~CPP.add_shap - ~CPP.plot_heatmap - ~CPP.plot_profile + ~CPP.eval ~CPP.run - ~CPP.update_seq_size diff --git a/docs/source/generated/aaanalysis.SequenceFeature.rst b/docs/source/generated/aaanalysis.SequenceFeature.rst index e1f9fa27..336e5607 100644 --- a/docs/source/generated/aaanalysis.SequenceFeature.rst +++ b/docs/source/generated/aaanalysis.SequenceFeature.rst @@ -14,11 +14,13 @@ .. autosummary:: ~SequenceFeature.__init__ + ~SequenceFeature.add_dif + ~SequenceFeature.add_feat_value + ~SequenceFeature.add_position ~SequenceFeature.feat_matrix ~SequenceFeature.feat_names - ~SequenceFeature.feat_value - ~SequenceFeature.features ~SequenceFeature.get_df_parts + ~SequenceFeature.get_features ~SequenceFeature.get_split_kws diff --git a/docs/source/generated/aaanalysis.dPULearn.rst b/docs/source/generated/aaanalysis.dPULearn.rst index b3413fa3..fb647fa9 100644 --- a/docs/source/generated/aaanalysis.dPULearn.rst +++ b/docs/source/generated/aaanalysis.dPULearn.rst @@ -14,6 +14,7 @@ .. autosummary:: ~dPULearn.__init__ + ~dPULearn.eval ~dPULearn.fit diff --git a/tests/_utils.py b/tests/_utils.py index 4575ea09..daab5f73 100644 --- a/tests/_utils.py +++ b/tests/_utils.py @@ -19,7 +19,7 @@ def _folder_path(super_folder, folder_name): FOLDER_PROJECT = str(Path(__file__).parent.parent).replace('/', SEP) + SEP FOLDER_PROJECT += "tests" + SEP FOLDER_RESULTS = _folder_path(FOLDER_PROJECT, 'results') -FOLDER_DATA = _folder_path(FOLDER_PROJECT, 'data') +FOLDER_DATA = _folder_path(FOLDER_PROJECT, '_data') # General Columns and strings COL_SCALE_ID = "scale_id" diff --git a/tests/test_cpp/conftest.py b/tests/test_cpp/conftest.py index ee33337f..4e2ed136 100644 --- a/tests/test_cpp/conftest.py +++ b/tests/test_cpp/conftest.py @@ -13,7 +13,7 @@ b) Negative testing: Check if code troughs error with invalid input II Additional test strategies - a) Property-Based Testing: Validate assumptions (hypothesis) of code using automatically generated data + a) Property-Based Testing: Validate assumptions (hypothesis) of code using automatically generated _data "Complementary to unit testing" (p. 224-230, The Pragmatic Programmer) b) Functional test: Check single bit of functionality in a system (similar to regression test?) Unit test vs. functional test (Code is doing things right vs. Code is doing right things) diff --git a/tests/test_cpp/test_cpp.py b/tests/test_cpp/test_cpp.py index 832907f6..080bc9dc 100644 --- a/tests/test_cpp/test_cpp.py +++ b/tests/test_cpp/test_cpp.py @@ -93,7 +93,7 @@ class TestAddPositions: # Positive unit tests def test_add_positions(self, df_feat, cpp): - df_feat = cpp.add_positions(df_feat=df_feat, tmd_len=30) + df_feat = cpp._add_positions(df_feat=df_feat, tmd_len=30) assert isinstance(df_feat, pd.DataFrame) assert "positions" in list(df_feat) @@ -106,35 +106,35 @@ def test_add_positions(self, df_feat, cpp): @settings(max_examples=10, deadline=None) def test_add_position_tmd_len(self, df_feat_module_scope, df_parts, tmd_len, jmd_n_len, jmd_c_len, ext_len, start): cpp = aa.CPP(df_parts=df_parts) - df_feat = cpp.add_positions(df_feat=df_feat_module_scope, tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, - ext_len=ext_len, start=start) + df_feat = cpp._add_positions(df_feat=df_feat_module_scope, tmd_len=tmd_len, jmd_n_len=jmd_n_len, jmd_c_len=jmd_c_len, + ext_len=ext_len, start=start) assert isinstance(df_feat, pd.DataFrame) # Negative unit tests def test_wrong_tmd_len(self, df_feat, cpp, wrong_input): with pytest.raises(ValueError): - cpp.add_positions(df_feat=df_feat, tmd_len=wrong_input) + cpp._add_positions(df_feat=df_feat, tmd_len=wrong_input) def test_wrong_jmd_len(self, df_feat, cpp, wrong_input): with pytest.raises(ValueError): - cpp.add_positions(df_feat=df_feat, jmd_n_len=wrong_input) + cpp._add_positions(df_feat=df_feat, jmd_n_len=wrong_input) with pytest.raises(ValueError): - cpp.add_positions(df_feat=df_feat, jmd_c_len=wrong_input) + cpp._add_positions(df_feat=df_feat, jmd_c_len=wrong_input) def test_wrong_ext_len(self, df_feat, cpp, wrong_input): with pytest.raises(ValueError): - cpp.add_positions(df_feat=df_feat, ext_len=wrong_input) + cpp._add_positions(df_feat=df_feat, ext_len=wrong_input) # ext_len >= jmd_n_len or jmd_c_len with pytest.raises(ValueError): - cpp.add_positions(df_feat=df_feat, ext_len=5, jmd_n_len=3) + cpp._add_positions(df_feat=df_feat, ext_len=5, jmd_n_len=3) with pytest.raises(ValueError): - cpp.add_positions(df_feat=df_feat, ext_len=5, jmd_c_len=3) + cpp._add_positions(df_feat=df_feat, ext_len=5, jmd_c_len=3) def test_wrong_start(self, df_feat, cpp, wrong_input): with pytest.raises(ValueError): - cpp.add_positions(df_feat=df_feat, start=wrong_input) + cpp._add_positions(df_feat=df_feat, start=wrong_input) with pytest.raises(ValueError): - cpp.add_positions(df_feat=df_feat, start=-4) + cpp._add_positions(df_feat=df_feat, start=-4) class TestAddScaleCategory: @@ -611,14 +611,14 @@ def test_add_pipeline(df_feat): df = cpp.add_scale_info(df_feat=df) assert df_feat[cols].equals(df[cols]) df = cpp.add_stat(df_feat=df, labels=labels) - df = cpp.add_positions(df_feat=df) + df = cpp._add_positions(df_feat=df) assert df_feat[cols].equals(df[cols]) df = cpp.add_scale_info(df_feat=df) assert df_feat[cols].equals(df[cols]) - df = cpp.add_positions(df_feat=df) + df = cpp._add_positions(df_feat=df) df = cpp.add_stat(df_feat=df, labels=labels) assert df_feat[cols].equals(df[cols]) - df = cpp.add_positions(df_feat=df) + df = cpp._add_positions(df_feat=df) assert df_feat[cols].equals(df[cols]) diff --git a/tests/test_cpp/test_feature.py b/tests/test_cpp/test_feature.py index 0ca1c008..9bb77b8e 100644 --- a/tests/test_cpp/test_feature.py +++ b/tests/test_cpp/test_feature.py @@ -159,38 +159,38 @@ class TestFeatures: def test_features(self, df_scales, list_parts): sf = aa.SequenceFeature() split_kws = sf.get_split_kws() - assert isinstance(sf.features(), list) + assert isinstance(sf.get_features(), list) for parts in list_parts: - assert isinstance(sf.features(list_parts=parts), list) + assert isinstance(sf.get_features(list_parts=parts), list) for split_type in split_kws: args = dict(list_parts=parts, df_scales=df_scales, split_kws={split_type: split_kws[split_type]}) - assert isinstance(sf.features(**args), list) + assert isinstance(sf.get_features(**args), list) # Negative unit tests def test_wrong_input(self, df_cat, df_seq): sf = aa.SequenceFeature() for wrong_input in [1, -1, "TMD", ["TMD"], [1, 2], ["aa", "a"], [["tmd", "tmd_e"]], df_cat, [df_cat, df_seq]]: with pytest.raises(ValueError): - sf.features(list_parts=wrong_input) + sf.get_features(list_parts=wrong_input) with pytest.raises(ValueError): - sf.features(list_parts=["tmd"], df_scales=wrong_input) + sf.get_features(list_parts=["tmd"], df_scales=wrong_input) with pytest.raises(ValueError): - sf.features(list_parts=["tmd"], split_kws=wrong_input) + sf.get_features(list_parts=["tmd"], split_kws=wrong_input) def test_corrupted_list_parts(self, corrupted_list_parts): sf = aa.SequenceFeature() with pytest.raises(ValueError): - sf.features(list_parts=corrupted_list_parts) # Via parametrized fixtures + sf.get_features(list_parts=corrupted_list_parts) # Via parametrized fixtures def test_corrupted_df_scales(self, corrupted_df_scales): sf = aa.SequenceFeature() with pytest.raises(ValueError): - sf.features(list_parts=["tmd"], df_scales=corrupted_df_scales) # Via parametrized fixtures + sf.get_features(list_parts=["tmd"], df_scales=corrupted_df_scales) # Via parametrized fixtures def test_corrupted_split_kws(self, corrupted_split_kws): sf = aa.SequenceFeature() with pytest.raises(ValueError): - sf.features(list_parts=["tmd"], split_kws=corrupted_split_kws) # Via parametrized fixtures + sf.get_features(list_parts=["tmd"], split_kws=corrupted_split_kws) # Via parametrized fixtures class TestFeatureName: @@ -258,7 +258,7 @@ def test_feature_value(self, df_seq, df_scales, list_parts, list_splits): for split in list_splits: for i in range(0, len(df_scales)): dict_scale = df_scales.iloc[:, i].to_dict() - x = sf.feat_value(split=split, dict_scale=dict_scale, df_parts=df_parts[parts]) + x = sf.add_feat_value(split=split, dict_scale=dict_scale, df_parts=df_parts[parts]) assert isinstance(x, np.ndarray) def test_accept_gaps(self, df_seq, list_parts, list_splits, df_scales): @@ -268,18 +268,18 @@ def test_accept_gaps(self, df_seq, list_parts, list_splits, df_scales): df = df_parts.copy() args = dict(split=split, dict_scale=dict_scale) df[parts] = "AAA-CCC" - assert isinstance(sf.feat_value(**args, df_parts=df[parts], accept_gaps=True), np.ndarray) + assert isinstance(sf.add_feat_value(**args, df_parts=df[parts], accept_gaps=True), np.ndarray) with pytest.raises(ValueError): - sf.feat_value(**args, df_parts=df[parts], accept_gaps=False) + sf.add_feat_value(**args, df_parts=df[parts], accept_gaps=False) df[parts] = "------" with pytest.raises(ValueError): - sf.feat_value(**args, df_parts=df[parts], accept_gaps=True) + sf.add_feat_value(**args, df_parts=df[parts], accept_gaps=True) args = dict(split=split, df_parts=df_parts[parts]) dict_scale_na = dict_scale.copy() dict_scale_na["A"] = np.NaN - assert isinstance(sf.feat_value(**args, dict_scale=dict_scale_na, accept_gaps=True), np.ndarray) + assert isinstance(sf.add_feat_value(**args, dict_scale=dict_scale_na, accept_gaps=True), np.ndarray) with pytest.raises(ValueError): - sf.feat_value(**args, dict_scale=dict_scale_na, accept_gaps=False) + sf.add_feat_value(**args, dict_scale=dict_scale_na, accept_gaps=False) # Negative test def test_wrong_input(self, df_cat, df_seq, list_parts, list_splits, df_scales): @@ -290,11 +290,11 @@ def test_wrong_input(self, df_cat, df_seq, list_parts, list_splits, df_scales): df_cat, [df_cat, df_seq], dict(a=1)] for wrong_input in list_wrong_input: with pytest.raises(ValueError): - sf.feat_value(split=wrong_input, dict_scale=dict_scale, df_parts=df_parts[parts]) + sf.add_feat_value(split=wrong_input, dict_scale=dict_scale, df_parts=df_parts[parts]) with pytest.raises(ValueError): - sf.feat_value(split=split, dict_scale=wrong_input, df_parts=df_parts[parts]) + sf.add_feat_value(split=split, dict_scale=wrong_input, df_parts=df_parts[parts]) with pytest.raises(ValueError): - sf.feat_value(split=split, dict_scale=dict_scale, df_parts=wrong_input) + sf.add_feat_value(split=split, dict_scale=dict_scale, df_parts=wrong_input) def test_corrupted_split(self, df_seq, list_parts, df_scales, corrupted_list_splits): sf = aa.SequenceFeature() @@ -302,7 +302,7 @@ def test_corrupted_split(self, df_seq, list_parts, df_scales, corrupted_list_spl parts, dict_scale = list_parts[0], df_scales.iloc[:, 0].to_dict() with pytest.raises(ValueError): # Via parametrized fixtures - sf.feat_value(split=corrupted_list_splits, dict_scale=dict_scale, df_parts=df_parts[parts]) + sf.add_feat_value(split=corrupted_list_splits, dict_scale=dict_scale, df_parts=df_parts[parts]) def test_corrupted_dict_scale(self, df_seq, list_parts, list_splits, df_scales): sf = aa.SequenceFeature() @@ -317,14 +317,14 @@ def test_corrupted_dict_scale(self, df_seq, list_parts, list_splits, df_scales): wrong_dict_scales = [dict(A=1, B=np.NaN), dict(a=0), dict_scale1, dict_scale2, dict_scale3, dict_scale3] for d in wrong_dict_scales: with pytest.raises(ValueError): - sf.feat_value(split=split, dict_scale=d, df_parts=df_parts[parts]) + sf.add_feat_value(split=split, dict_scale=d, df_parts=df_parts[parts]) def test_corrupted_df_parts(self, list_splits, df_scales, corrupted_df_parts): sf = aa.SequenceFeature() split, dict_scale = list_splits[0], df_scales.iloc[:, 0].to_dict() with pytest.raises(ValueError): # Via parametrized fixtures - sf.feat_value(split=split, dict_scale=dict_scale, df_parts=corrupted_df_parts) + sf.add_feat_value(split=split, dict_scale=dict_scale, df_parts=corrupted_df_parts) class TestFeatureMatrix: @@ -334,7 +334,7 @@ class TestFeatureMatrix: def test_feature_matrix(self, df_seq, df_scales): sf = aa.SequenceFeature() df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True) - features = sf.features()[0:100] + features = sf.get_features()[0:100] feat_matrix = sf.feat_matrix(df_parts=df_parts, df_scales=df_scales, features=features) assert isinstance(feat_matrix, np.ndarray) assert feat_matrix.shape == (len(df_seq), len(features)) @@ -345,7 +345,7 @@ def test_feature_matrix(self, df_seq, df_scales): def test_missing_parameters(self, df_scales, df_seq): sf = aa.SequenceFeature() df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True) - features = sf.features()[0:100] + features = sf.get_features()[0:100] with pytest.raises(ValueError): sf.feat_matrix(df_parts=df_parts) with pytest.raises(ValueError): @@ -360,7 +360,7 @@ def test_missing_parameters(self, df_scales, df_seq): def test_wrong_input(self, df_cat, df_seq, df_scales): sf = aa.SequenceFeature() df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True) - features = sf.features()[0:100] + features = sf.get_features()[0:100] list_wrong_input = [1, -1, "TMD", ["TMD"], None, [1, 2], ["aa", "a"], [["tmd", "tmd_e"]], df_cat, [df_cat, df_seq], dict(a=1)] for wrong_input in list_wrong_input: @@ -374,7 +374,7 @@ def test_wrong_input(self, df_cat, df_seq, df_scales): def test_corrupted_df_parts(self, corrupted_df_parts, df_scales): sf = aa.SequenceFeature() - features = sf.features()[0:100] + features = sf.get_features()[0:100] with pytest.raises(ValueError): # Via parametrized fixtures sf.feat_matrix(df_parts=corrupted_df_parts, df_scales=df_scales, features=features) @@ -382,7 +382,7 @@ def test_corrupted_df_parts(self, corrupted_df_parts, df_scales): def test_corrupted_df_scales(self, corrupted_df_scales, df_seq): sf = aa.SequenceFeature() df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True) - features = sf.features()[0:100] + features = sf.get_features()[0:100] with pytest.raises(ValueError): # Via parametrized fixtures sf.feat_matrix(df_parts=df_parts, df_scales=corrupted_df_scales, features=features) @@ -390,7 +390,7 @@ def test_corrupted_df_scales(self, corrupted_df_scales, df_seq): def test_corrupted_features(self, df_scales, df_seq): sf = aa.SequenceFeature() df_parts = sf.get_df_parts(df_seq=df_seq, all_parts=True) - features = sf.features()[0:100] + features = sf.get_features()[0:100] corrupted_features = [features[0:5] + [np.NaN], features[0:3] + ["Test"], "a", [[features[0:4]]], @@ -413,7 +413,7 @@ def test_sequence_feature(list_splits): df_scales = sf.load_scales() split_kws = sf.get_split_kws() # Get features (names, values, matrix) - features = sf.features()[0:100] + features = sf.get_features()[0:100] feat_matrix = sf.feat_matrix(df_parts=df_parts, df_scales=df_scales, features=features) assert isinstance(feat_matrix, np.ndarray) assert feat_matrix.shape == (len(df_seq), len(features))