-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbibliography.bib
155 lines (140 loc) · 8.58 KB
/
bibliography.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
@misc{https://doi.org/10.48550/arxiv.2201.05028,
doi = {10.48550/ARXIV.2201.05028},
url = {https://arxiv.org/abs/2201.05028},
author = {Duda, Jarek},
keywords = {Information Theory (cs.IT), Genomics (q-bio.GN), FOS: Computer and information sciences, FOS: Computer and information sciences, FOS: Biological sciences, FOS: Biological sciences},
title = {Context binning, model clustering and adaptivity for data compression of genetic data},
publisher = {arXiv},
year = {2022},
copyright = {arXiv.org perpetual, non-exclusive license}
}
@misc{idencomp,
title = "idencomp on {GitHub}",
author = {Maćkowski, Mateusz},
howpublished = "\url{https://github.com/m4tx/idencomp}",
year = 2022,
note = "Accessed: 2022-07-20"
}
@misc{rans-rs,
title = "rans-rs on {GitHub}",
author = {Maćkowski, Mateusz},
howpublished = "\url{https://github.com/m4tx/rans-rs}",
year = 2022,
note = "Accessed: 2022-07-20"
}
@misc{ryg-rans,
title = "ryg\_rans on {GitHub}",
author = {Giesen, Fabian},
howpublished = "\url{https://github.com/rygorous/ryg_rans}",
year = 2014,
note = "Accessed: 2022-07-20"
}
@misc{rust,
title = "Rust Programming Language",
howpublished = "\url{https://www.rust-lang.org/}",
note = "Accessed: 2022-07-20"
}
@misc{messagepack,
title = "{MessagePack: It's like JSON. but fast and small.}",
howpublished = "\url{https://msgpack.org/}",
note = "Accessed: 2022-07-20"
}
@misc{cram,
title = "{CRAM format specification (version 3.0)}",
howpublished = "\url{https://samtools.github.io/hts-specs/CRAMv3.pdf}",
year = 2014,
note = "Accessed: 2022-07-20"
}
@misc{fastq,
title = "{FASTQ Format Specification}",
howpublished = "\url{http://maq.sourceforge.net/fastq.shtml}",
year = 2007,
note = "Accessed: 2022-07-20"
}
@INPROCEEDINGS{7170048,
author = {Duda, Jarek and Tahboub, Khalid and Gadgil, Neeraj J. and Delp, Edward J.},
booktitle = {2015 Picture Coding Symposium (PCS)},
title = {The use of asymmetric numeral systems as an accurate replacement for Huffman coding},
year = {2015},
volume = {},
number = {},
pages = {65-69},
doi = {10.1109/PCS.2015.7170048}
}
@misc{rfc1951,
series = {Request for Comments},
number = 1951,
howpublished = {RFC 1951},
publisher = {RFC Editor},
doi = {10.17487/RFC1951},
url = {https://www.rfc-editor.org/info/rfc1951},
author = {L. Peter Deutsch},
title = {{DEFLATE Compressed Data Format Specification version 1.3}},
pagetotal = 17,
year = 1996,
month = may,
abstract = {This specification defines a lossless compressed data format that compresses data using a combination of the LZ77 algorithm and Huffman coding, with efficiency comparable to the best currently available general-purpose compression methods. This memo provides information for the Internet community. This memo does not specify an Internet standard of any kind.},
}
@misc{rfc7932,
series = {Request for Comments},
number = 7932,
howpublished = {RFC 7932},
publisher = {RFC Editor},
doi = {10.17487/RFC7932},
url = {https://www.rfc-editor.org/info/rfc7932},
author = {Jyrki Alakuijala and Zoltan Szabadka},
title = {{Brotli Compressed Data Format}},
pagetotal = 128,
year = 2016,
month = jul,
abstract = {This specification defines a lossless compressed data format that compresses data using a combination of the LZ77 algorithm and Huffman coding, with efficiency comparable to the best currently available general-purpose compression methods.},
}
@misc{rfc3385,
series = {Request for Comments},
number = 3385,
howpublished = {RFC 3385},
publisher = {RFC Editor},
doi = {10.17487/RFC3385},
url = {https://www.rfc-editor.org/info/rfc3385},
author = {Dafna Sheinwald and Patricia Thaler and Julian Satran and Vincente Cavanna},
title = {{Internet Protocol Small Computer System Interface (iSCSI) Cyclic Redundancy Check (CRC)/Checksum Considerations}},
pagetotal = 23,
year = 2002,
month = sep,
abstract = {Cyclic redundancy check (CRC) codes {[}Peterson{]} are shortened cyclic codes used for error detection. A number of CRC codes have been adopted in standards: ATM, IEC, IEEE, CCITT, IBM-SDLC, and more {[}Baicheva{]}. The most important expectation from this kind of code is a very low probability for undetected errors. The probability of undetected errors in such codes has been, and still is, subject to extensive studies that have included both analytical models and simulations. Those codes have been used extensively in communications and magnetic recording as they demonstrate good 'burst error' detection capabilities, but are also good at detecting several independent bit errors. Hardware implementations are very simple and well known; their simplicity has made them popular with hardware developers for many years. However, algorithms and software for effective implementations of CRC are now also widely available {[}Williams{]}. The probability of undetected errors depends on the polynomial selected to generate the code, the error distribution (error model), and the data length. In this memo, we attempt to give some estimates for the probability of undetected errors to facilitate the selection of an error detection code for iSCSI. We will also attempt to compare CRCs with other checksum forms (Fletcher, Adler, weighted checksums), as permitted by available data.},
}
@Article{Ashley2016,
author={Ashley, Euan A.},
title={Towards precision medicine},
journal={Nature Reviews Genetics},
year={2016},
month=sep,
day={01},
volume={17},
number={9},
pages={507-522},
abstract={Precision medicine describes the definition of disease at a higher resolution by genomic and other technologies to enable more precise targeting of subgroups of disease with new therapies. Prominent examples include cystic fibrosis and cancer.Clinical genomics exists at the intersection of sequencing-led discovery genetics in population cohorts and historical low-throughput approaches to genetic diagnosis in patients. As a result of the different aims of these two endeavours, technologies and algorithms that have been developed for discovery genomics need to be optimized before application to clinical medicine.Areas of need include the improvement of sequencing technologies. Current short-read approaches are limited in areas of the genome of low complexity (such as repeats), regions of high GC content, regions that are highly polymorphic or that include small-scale (indel) or large-scale (structural variant) disruption of the open reading frame.Possible routes to such improvements include long-read sequencing, improved algorithms for indel and structural variant calling, graph reference approaches and standardization of nomenclature.One area that requires specific attention is the quality and coverage of sequence data for clinical genetic testing. In general, the emerging consensus standard is that the coding regions of interest (plus two base pairs on either side) should be covered by 20 high-quality (Q20) reads that are uniquely mapped.To improve assertions of the disease causality of genetic variants, data sharing of both phenotypic and genotypic information across communities will be required. Projects such as ClinGen and its associated database ClinVar represent an important step in this direction. Large-scale population sequencing projects such as the UK Biobank and the US Precision Medicine Initiative Cohort Program will enhance our understanding of population-scale genetic variation in a way that optimizes our care of the individual with genetic disease.},
issn={1471-0064},
doi={10.1038/nrg.2016.86},
url={https://doi.org/10.1038/nrg.2016.86}
}
@misc{1421,
author = {Morris Dworkin},
title = {{SHA-3} Standard: Permutation-Based Hash and Extendable-Output Functions},
year = {2015},
month = aug,
day = {04},
publisher = {Federal Inf. Process. Stds. (NIST FIPS), National Institute of Standards and Technology, Gaithersburg, MD},
doi = {https://doi.org/10.6028/NIST.FIPS.202},
language = {en},
}
@BOOK{Salomon2007-bj,
title = "Data compression",
author = "Salomon, David",
publisher = "Springer",
edition = 4,
month = jan,
year = 2007,
address = "Guildford, England",
language = "en"
}