diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml
index a34e49f..3f3df51 100644
--- a/.github/workflows/UnitTests.yml
+++ b/.github/workflows/UnitTests.yml
@@ -1,34 +1,30 @@
 name: Unit Tests
 
 on:
-  - push
-  - pull_request
+  push:
+      branches:
+        - master
+        - develop
+  pull_request:
 
 jobs:
   test:
-    name: Julia ${{ matrix.julia-version }} - ${{ matrix.os }} - ${{ matrix.julia-arch }} - ${{ github.event_name }}
     runs-on: ${{ matrix.os }}
-    continue-on-error: ${{ matrix.julia-version == 'nightly' }}
+    continue-on-error: ${{ matrix.experimental }}
     strategy:
       fail-fast: false
       matrix:
         julia-version:
-          - '1.0'
-          - '1.5'
-          - '1.6' # LTS
-          - '1'
-        julia-arch: [x64, x86]
-        os: [ubuntu-latest, windows-latest, macOS-latest]
-        exclude:
-          - os: macOS-latest
-            julia-arch: x86
+          - '1.10'
+        os: [ubuntu-latest, macOS-latest, windows-latest]
         experimental: [false]
         include:
+          # Include nightly, but experimental, so it's allowed to fail without
+          # failing CI.
           - julia-version: nightly
-            julia-arch: x64
             os: ubuntu-latest
             experimental: true
-
+            fail_ci_if_error: false
     steps:
       - name: Checkout Repository
         uses: actions/checkout@v2
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3253f39..36f24c3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,9 +4,23 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
-## [Unreleased]
+## [Unreleased] 
 ### Added
 - :arrow_up: Added Project.toml
+- Automa v1 compatibility: Upgraded the Automa dependency to "1", enabling the new Automav1 API.
+- BioGenerics support: Imported metadata functions from BioGenerics to unify VCF/BCF header handling.
+- TranscodingStreams integration: Added using TranscodingStreams for more efficient stream transformations in VCF/BCF readers.
+- New VCF record reader: Introduced `src/vcf/readrecord.jl` to encapsulate record parsing logic.
+
+### Changed
+- Streamlined imports: Limited BioSequences imports, upgraded BGZFStreams and BufferedStreams usage, and replaced BioCore I/O types with BioGenerics abstractions.
+- BCF reader refactoring: Transitioned Reader to subtype BioGenerics.IO.AbstractReader, centralized exception handling, and cleaned up parse logic.
+- VCF header & metainfo: Fixed header parsing and improved metainfo tag/value functions to use BioGenerics APIs.
+- Project.toml targets: Reorganized `[extras]` and `[targets]` sections.
+
+### Removed
+- Deprecated dependencies: Dropped older Automa versions (0.7, 0.8) and obsolete IO imports from BioCore.
+
 
 ## [0.4.0] - 2018-11-22
 ### Added
diff --git a/Project.toml b/Project.toml
index 6cf8192..207a8cd 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,33 +1,39 @@
 name = "GeneticVariation"
 uuid = "9bc6ac9d-e6b2-5f70-b0a8-242a01662520"
-authors = ["Kenta Sato <bicycle1885@gmail.com>", "Sabrina J. Ward <sabrinajward@protonmail.com>"]
-
-version = "0.4.1"
+authors = ["Kenta Sato <bicycle1885@gmail.com>", "Sabrina J. Ward <sabrinajward@protonmail.com>", "Abhinav Singh <abhinavsns7@gmail.com>"]
+version = "0.5.0"
 
 [deps]
 Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b"
 BGZFStreams = "28d598bf-9b8f-59f1-b38c-5a06b4a0f5e6"
-BioCore = "37cfa864-2cd6-5c12-ad9e-b6597d696c81"
+BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea"
 BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
-BufferedStreams = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d"
 Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa"
+Indexes = "4ffb77ac-cb80-11e8-1b35-4b78cc642f6d"
 IntervalTrees = "524e6230-43b7-53ae-be76-1e9e4d08d11b"
+MinHash = "4b3c9753-2685-44e9-8a29-365b96c023ed"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
 Twiddle = "7200193e-83a8-5a55-b20d-5d36d44a0795"
 
 [compat]
-Automa = "0.7, 0.8"
+Automa = "1"
 BGZFStreams = "0.3"
-BioCore = "2.0.5"
-BioSequences = "1"
-BufferedStreams = "1"
-Combinatorics = "0.7"
-IntervalTrees = "1"
-Twiddle = "1.1"
+BioGenerics = "0.1"
+BioSequences = "3.4"
+Combinatorics = "1"
+Indexes = "0.1, 0.2"
+IntervalTrees = "1.1"
+MinHash = "0.2"
+Statistics = "1"
+TranscodingStreams = "0.9"
+Twiddle = "1"
 julia = "1"
 
 [extras]
+FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd"
+TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
 
 [targets]
-test = ["Test", "YAML"]
+test = ["Test", "FormatSpecimens", "TOML"]
diff --git a/src/GeneticVariation.jl b/src/GeneticVariation.jl
index df9be5c..8faaf14 100644
--- a/src/GeneticVariation.jl
+++ b/src/GeneticVariation.jl
@@ -11,22 +11,18 @@ __precompile__()
 module GeneticVariation
 
 export
-    # Site types
-    Conserved,
-    Mutated,
-    #Transition,
-    #Transversion,
-    Segregating,
+    segregating_sites,
+    count_segregating_sites,
 
     # Distances
-    Proportion,
-    Jaccard,
-    MASH,
+    create_sketch, 
+    jaccard, 
+    mash, 
     distance,
     pdistance,
-    mash,
-    jaccard,
-
+    pdistance_mutated,
+    jukes_cantor,
+    kimura_distance,
     # Allele frequencies
     gene_frequencies,
 
@@ -37,36 +33,24 @@ export
     # VCF and BCF
     VCF,
     BCF,
-    header,
-    metainfotag,
-    metainfoval,
-    isfilled,
-    MissingFieldException
+    header
 
+# Import only the necessary symbols from BioSequences v3.
 import BioSequences:
     BioSequences,
     Alphabet,
     AA_Term,
     BioSequence,
-    bp_chunk_count,
-    Certain,
-    Composition,
     DNAAlphabet,
     GeneticCode,
     ispurine,
-    Kmer,
-    Match,
-    Mismatch,
-    MinHashSketch,
-    NucAlphs,
-    Position,
-    RNAAlphabet,
-    Sequence
-    
-import BioCore:
-    metainfotag,
-    metainfoval,
-    header
+    RNAAlphabet
+
+# Import metadata functions from BioGenerics
+import BioGenerics: header, metainfotag, metainfoval, isfilled
+using Indexes
+using TranscodingStreams
+#import BioGenerics.Exceptions: MissingFieldException, missingerror
 
 import Combinatorics.permutations
 import IntervalTrees: Interval, IntervalValue
@@ -80,9 +64,9 @@ import Twiddle:
 include("vcf/vcf.jl")
 include("bcf/bcf.jl")
 include("site_counting.jl")
-include("seg_sites.jl")
 include("distances/minhash.jl")
 include("distances/proportion.jl")
+include("distances/evodistances.jl")
 include("allele_freq.jl")
 include("diversity_measures.jl")
 
diff --git a/src/allele_freq.jl b/src/allele_freq.jl
index 76aa823..a4a6c06 100644
--- a/src/allele_freq.jl
+++ b/src/allele_freq.jl
@@ -6,50 +6,23 @@
 # This file is a part of BioJulia.
 # License is MIT: https://github.com/BioJulia/GeneticVariation.jl/blob/master/LICENSE.md
 
-"""
-    gene_frequencies(seqcounts::Composition{T}) where T <: Sequence
-
-Compute gene_frequencies from a `BioSequences.Composition` variable that contains
-unique sequence counts.
-"""
-function gene_frequencies(seqcounts::Composition{T}) where T <: Sequence
-    n = sum(values(seqcounts))
-    frequencies = Dict{T, Float64}()
-    @inbounds for (seq, count) in seqcounts
-        frequencies[seq] = count / n
-    end
-    return frequencies
-end
+using BioSequences 
 
 """
     gene_frequencies(iterable)
 
-Compute the gene frequencies for any iterable with an `eltype` which is a
-concrete subtype of the abstract `Sequence` type.
+Compute allele frequencies from any iterable whose element type is a subtype of BioSequence.
+This function iterates over the input, counts occurrences of unique sequences,
+and then returns a dictionary mapping each sequence to its relative frequency.
+Example:
+    freqs = gene_frequencies(["ATGC", "ATGC", "ATGT"])
 """
 function gene_frequencies(iterable)
-    return _gene_frequencies(iterable, eltype(iterable), Base.IteratorSize(iterable))
-end
-
-# Default for most iterables, throws an error.
-_gene_frequencies(iterable, eltype, is) = error("Iterable not supported.")
-
-# Action to take for a sequence iterable which has no known size.
-function _gene_frequencies(iterable, ::Type{<:Sequence}, is::Base.SizeUnknown)
-    composition = BioSequences.composition(iterable)
-    return gene_frequencies(composition)
-end
-
-# Action to take for a sequence iterable which has a known size.
-# This version computes frequencies directly as n is known in advance.
-# The other method first computes composition, and computes frequencies from
-# that.
-function _gene_frequencies(sequences, ::Type{T}, is::Union{Base.HasLength,Base.HasShape}) where T<:Sequence
-    inc = 1 / length(sequences)
-    frequencies = Dict{T, Float64}()
-    @inbounds for seq in sequences
-        old = get(frequencies, seq, 0.0)
-        frequencies[seq] = old + inc
+    counts = Dict{eltype(iterable),Int}()
+    total = 0
+    for seq in iterable
+        counts[seq] = get(counts, seq, 0) + 1
+        total += 1
     end
-    return frequencies
+    return Dict(k => v / total for (k, v) in counts)
 end
diff --git a/src/bcf/bcf.jl b/src/bcf/bcf.jl
index e019d79..54b24b4 100644
--- a/src/bcf/bcf.jl
+++ b/src/bcf/bcf.jl
@@ -8,10 +8,17 @@
 
 module BCF
 
-import BioCore: BioCore, isfilled
+#import BioGenerics: BioGenerics, isfilled
 import GeneticVariation.VCF
 import BGZFStreams
-import BufferedStreams
+import TranscodingStreams
+using BioGenerics
+import BioGenerics: BioGenerics, isfilled
+import BioGenerics.Exceptions: MissingFieldException, missingerror
+
+function parsehex(str)
+    return map(x -> parse(UInt8, x, base=16), split(str, ' '))
+end
 
 include("record.jl")
 include("reader.jl")
diff --git a/src/bcf/reader.jl b/src/bcf/reader.jl
index 0fae2ff..87e3b2f 100644
--- a/src/bcf/reader.jl
+++ b/src/bcf/reader.jl
@@ -5,8 +5,7 @@
 #
 # This file is a part of BioJulia.
 # License is MIT: https://github.com/BioJulia/GeneticVariation.jl/blob/master/LICENSE
-
-struct Reader{T<:IO} <: BioCore.IO.AbstractReader
+struct Reader{T<:IO} <: BioGenerics.IO.AbstractReader
     version::Tuple{UInt8,UInt8}  # (major, minor)
     header::VCF.Header
     stream::BGZFStreams.BGZFStream{T}
@@ -39,7 +38,7 @@ function Reader(input::IO)
     data = read(stream, l_header)
 
     # parse VCF header
-    vcfreader = VCF.Reader(BufferedStreams.BufferedInputStream(data))
+    vcfreader = VCF.Reader(IOBuffer(data))
 
     return Reader((major, minor), vcfreader.header, stream)
 end
@@ -48,7 +47,7 @@ function Base.eltype(::Type{Reader{T}}) where T
     return Record
 end
 
-function BioCore.IO.stream(reader::Reader)
+function stream(reader::Reader)
     return reader.stream
 end
 
@@ -60,8 +59,11 @@ function header(reader::Reader)
     return reader.header
 end
 
-function BioCore.header(reader::Reader)
-    return header(reader)
+function Base.close(reader::Reader)
+    if reader.stream isa IO
+        close(reader.stream)
+    end
+    return nothing
 end
 
 function Base.read!(reader::Reader, record::Record)
@@ -75,3 +77,4 @@ function Base.read!(reader::Reader, record::Record)
     record.indivlen = indivlen
     return record
 end
+
diff --git a/src/bcf/writer.jl b/src/bcf/writer.jl
index 6a6bf4b..1b3d2f9 100644
--- a/src/bcf/writer.jl
+++ b/src/bcf/writer.jl
@@ -6,7 +6,7 @@
 # This file is a part of BioJulia.
 # License is MIT: https://github.com/BioJulia/GeneticVariation.jl/blob/master/LICENSE
 
-struct Writer{T<:IO} <: BioCore.IO.AbstractWriter
+struct Writer{T<:IO} <: BioGenerics.IO.AbstractWriter
     stream::BGZFStreams.BGZFStream{T}
 end
 
@@ -32,7 +32,7 @@ function Writer(output::IO, header::VCF.Header)
     return Writer(stream)
 end
 
-function BioCore.IO.stream(writer::Writer)
+function stream(writer::Writer)
     return writer.stream
 end
 
@@ -43,3 +43,10 @@ function Base.write(writer::Writer, record::Record)
     n += write(writer.stream, record.data)
     return n
 end
+
+function Base.close(writer::Writer)
+    if writer.stream isa IO
+        close(writer.stream)
+    end
+    return nothing
+end
\ No newline at end of file
diff --git a/src/distances/evodistances.jl b/src/distances/evodistances.jl
index cd81bba..c19313e 100644
--- a/src/distances/evodistances.jl
+++ b/src/distances/evodistances.jl
@@ -1,15 +1,59 @@
-# evodistances.jl
-# ===============
-#
-# Types and methods for computing evolutionary and genetic distances.
-#
-# This file is a part of BioJulia.
-# License is MIT: https://github.com/BioJulia/GeneticVariation.jl/blob/master/LICENSE.md
+"""
+    jukes_cantor(seq1::BioSequence, seq2::BioSequence) -> Float64
 
-# Types
-# -----
+Calculate the Jukes–Cantor corrected distance between two aligned DNA sequences.
+The formula is: d = -3/4 * log(1 - (4/3) * p)
+where p is the observed proportion of differences.
+Returns Inf if p ≥ 0.75.
+"""
+function jukes_cantor(seq1::BioSequence, seq2::BioSequence)
+    n = length(seq1)
+    if n != length(seq2)
+        error("Sequences must be the same length")
+    end
+    p = sum(seq1[i] != seq2[i] for i in 1:n) / n
+    if p >= 0.75
+        return Inf
+    end
+    return -0.75 * log(1 - (4/3) * p)
+end
 
-abstract type EvolutionaryDistance end
-abstract type TsTv <: EvolutionaryDistance end
+"""
+    kimura_distance(seq1::BioSequence, seq2::BioSequence) -> Float64
 
-include("proportion.jl")
+Calculate the Kimura 2–parameter distance between two aligned DNA sequences.
+This method takes into account both transitions (ts) and transversions (tv)
+using the formula:
+    
+    d = -0.5*log(1 - 2P - Q) - 0.25*log(1 - 2Q)
+
+where P is the proportion of transitions and Q is the proportion of transversions.
+"""
+function kimura_distance(seq1::BioSequence, seq2::BioSequence)
+    n = length(seq1)
+    if n != length(seq2)
+        error("Sequences must be the same length")
+    end
+    transitions = 0
+    transversions = 0
+    # Define transitions: A<->G and C<->T.
+    transitions_set = Set([(dna"A", dna"G"), (dna"G", dna"A"), (dna"C", dna"T"), (dna"T", dna"C")])
+    for i in 1:n
+        a, b = seq1[i], seq2[i]
+        if a != b
+            if (a, b) in transitions_set
+                transitions += 1
+            else
+                transversions += 1
+            end
+        end
+    end
+    P = transitions / n
+    Q = transversions / n
+    # Avoid log of a non-positive number:
+    if 1 - 2*P - Q <= 0 || 1 - 2*Q <= 0
+        return Inf
+    end
+    d = -0.5 * log(1 - 2*P - Q) - 0.25 * log(1 - 2*Q)
+    return d
+end
diff --git a/src/distances/minhash.jl b/src/distances/minhash.jl
index 9231650..dbe020c 100644
--- a/src/distances/minhash.jl
+++ b/src/distances/minhash.jl
@@ -1,65 +1,65 @@
-# minhash.jl
-# ==========
-#
-# Distance measures for MinHash sketches
-#
-# see DOI: 10.1186/s13059-016-0997-x
-#
-# This file is a part of BioJulia.
-# License is MIT: https://github.com/BioJulia/GeneticVariation.jl/blob/master/LICENSE.md
+using MinHash
+using Statistics
+import Base: log
+"""
+    create_sketch(seq::AbstractString, k::Int, s::Int) -> Vector{UInt64}
 
-struct Jaccard end
+Computes a MinHash sketch for the given sequence `seq` using k-mer length `k`
+and sketch size `s`. This implementation uses a simple sliding-window
+approach to extract k-mers. For more advanced k-mer iteration, consider using Kmers.jl.
+""" 
+function create_sketch(seq::AbstractString, k::Int, s::Int)
+    n = length(seq)
+    if n < k
+        error("Sequence length ($(n)) is shorter than k ($(k)).")
+    end
+    kmers = [seq[i:i+k-1] for i in 1:(n-k+1)]
+    # Compute the sketch using MinHash.jl
+    return MinHash.sketch(kmers, s)
+end
 
 """
-[MASH distances](http://doi.org/10.1186/s13059-016-0997-x), based on MinHash
-sketches of genome sequences provide rapid genome-scale sequence comparisons
-when sequence distance (not specific mutations) are all that's required.
+    jaccard(sketch1, sketch2) -> Float64
 
-A MinHash sketch is made by taking the `s` smallest hash values for kmers of
-length `k` for a given sequence. The genome distance for two genomes is then
-essentially the [Jaccard index](https://en.wikipedia.org/wiki/Jaccard_index)
-of the minhashes, with some additional modification to account for the size of
-the kmers used.
+Returns the approximate Jaccard similarity between two MinHash sketches.
+It is computed as the ratio of shared hashes to the total number of unique hashes.
 """
-struct MASH end
+function jaccard(sketch1::MinHash.MinHashSketch, sketch2::MinHash.MinHashSketch)
+    s1 = Set(sketch1.hashes)
+    s2 = Set(sketch2.hashes)
+    return length(intersect(s1, s2)) / length(union(s1, s2))
+end
 
-@inline function distance(::Type{Jaccard}, sketch1::MinHashSketch, sketch2::MinHashSketch)
-    sketch1.kmersize == sketch2.kmersize || error("sketches must have same kmer length")
-    length(sketch1) == length(sketch2) || error("sketches must be the same size")
+"""
+    mash(sketch1, sketch2, k::Int) -> Float64
 
-    matches = 0
-    sketchlen = length(sketch1)
-    i = 1
-    j = 1
+Computes the Mash distance between two MinHash sketches using the formula:
 
-    while i <= sketchlen && j <= sketchlen
-        if sketch1.sketch[i] == sketch2.sketch[j]
-            matches += 1
-            i += 1
-            j += 1
-        elseif sketch1.sketch[i] < sketch2.sketch[j]
-            while i <= sketchlen && sketch1.sketch[i] < sketch2.sketch[j]
-                i += 1
-            end
-        elseif sketch2.sketch[j] < sketch1.sketch[i]
-            while j <= sketchlen && sketch2.sketch[j] < sketch1.sketch[i]
-                j += 1
-            end
-        end
-    end
+    D = -1/k * log( (2*J)/(1+J) )
 
-    if matches == sketchlen
-        return 1.0
-    else
-        return matches / (2 * sketchlen - matches)
+where J is the Jaccard similarity. Returns `Inf` if J is 0.
+"""
+function mash(sketch1::MinHash.MinHashSketch, sketch2::MinHash.MinHashSketch, k::Int)
+    J = jaccard(sketch1, sketch2)
+    if J == 0.0
+        return Inf
     end
+    return -1.0 / k * log((2 * J) / (1 + J))
 end
 
-@inline function distance(::Type{MASH}, sketch1::MinHashSketch, sketch2::MinHashSketch)
-    j = distance(Jaccard, sketch1, sketch2)
-    k = sketch1.kmersize
-    return -1/k * log(2j / (1+j))
-end
+"""
+    distance(metric::Symbol, sketch1, sketch2, k::Int) -> Float64
 
-@inline mash(sketch1::MinHashSketch, sketch2::MinHashSketch) = distance(MASH, sketch1, sketch2)
-@inline jaccard(sketch1::MinHashSketch, sketch2::MinHashSketch) = distance(Jaccard, sketch1, sketch2)
+Dispatches to the appropriate distance metric. Currently supported metrics:
+ - `:jaccard`: returns (1 - Jaccard similarity).
+ - `:mash`: returns the Mash distance.
+"""
+function distance(metric::Symbol, sketch1::MinHash.MinHashSketch, sketch2::MinHash.MinHashSketch, k::Int)
+    if metric == :jaccard
+        return 1.0 - jaccard(sketch1, sketch2)
+    elseif metric == :mash
+        return mash(sketch1, sketch2, k)
+    else
+        error("Unsupported metric: $metric")
+    end
+end
diff --git a/src/distances/proportion.jl b/src/distances/proportion.jl
index 7c5edb3..093b607 100644
--- a/src/distances/proportion.jl
+++ b/src/distances/proportion.jl
@@ -1,34 +1,45 @@
-# proportion.jl
-# =============
-#
-# Types and methods for computing p-distances.
-#
-# This file is a part of BioJulia.
-# License is MIT: https://github.com/BioJulia/GeneticVariation.jl/blob/master/LICENSE.md
+using BioSequences
 
 """
-Proportion{T} is a distance which is the count of the mutations of type T that
-exist between the two biological sequences, divided by the number of valid sites
-examined (sites which don't have gap or ambiguous symbols).
-In other words this so called p-distance is simply the proportion of sites
-between each pair of sequences, that are mutated (again where T determines
-what kind of mutation).
-"""
-struct Proportion{T} end
-
-@inline function distance(::Type{Proportion{T}}, x::Sequence, y::Sequence) where T
-    cs = count(T, x, y)
-    return _pcorrection(Proportion{T}, cs, x, y)
-end
+    pdistance(seq1::BioSequence, seq2::BioSequence) -> Float64
 
-@inline function pdistance(::Type{T}, x, y) where T
-    return distance(Proportion{T}, x, y)
+Calculate the proportion of sites that differ between two aligned sequences.
+This is defined as the number of differing positions divided by the total length.
+"""
+function pdistance(seq1::BioSequence, seq2::BioSequence)
+    n = length(seq1)
+    if n != length(seq2)
+        error("Sequences must be aligned and of equal length")
+    end
+    diff = sum(seq1[i] != seq2[i] for i in 1:n)
+    return diff / n
 end
 
-@inline function _pcorrection(k::Int, x::Sequence, y::Sequence)
-    return k / min(length(x), length(y))
-end
+"""
+    pdistance(seq1::BioSequence, seq2::BioSequence) -> Float64
 
-@inline function _pcorrection(cs::Tuple{Int,Int}, x::Sequence, y::Sequence)
-    return cs[1] / cs[2]
+Calculate the proportion of mutated sites between two aligned sequences.
+This is defined as the number of differing positions (excluding ambiguous symbols and gaps)
+divided by the number of valid (certain) positions.
+"""
+function pdistance_mutated(seq1::BioSequence, seq2::BioSequence)
+    if length(seq1) != length(seq2)
+        error("Sequences must be aligned and of equal length")
+    end
+    mutated_sites = 0
+    total_certain_sites = 0
+    for i in eachindex(seq1)
+        base1 = seq1[i]
+        base2 = seq2[i]
+        if iscertain(base1) && iscertain(base2)
+            total_certain_sites += 1
+            if base1 != base2
+                mutated_sites += 1
+            end
+        end
+    end
+    if total_certain_sites == 0
+        return 0.0  # or throw an error if appropriate
+    end
+    return mutated_sites / total_certain_sites
 end
diff --git a/src/diversity_measures.jl b/src/diversity_measures.jl
index 7b267a6..c1ff6a5 100644
--- a/src/diversity_measures.jl
+++ b/src/diversity_measures.jl
@@ -78,8 +78,18 @@ function NL79(sequences)
     if n < 2
         return 0.0
     else
-        mutations = pdist(BioSequences.count_pairwise(Mutated, unique_sequences...))
-        return NL79(mutations, collect(values(frequencies)))
+        seqlen = length(unique_sequences[1])  # assume uniform sequence lengths
+        dmat = Matrix{Tuple{Int,Int}}(undef, n, n)
+        @inbounds for i in 1:n
+            for j in i+1:n
+                dmat[i, j] = (mismatches(unique_sequences[i], unique_sequences[j]), seqlen)
+                dmat[j, i] = dmat[i, j]
+            end
+            dmat[i, i] = (0, seqlen)  # No difference with self
+        end
+        mutations = pdist(dmat)
+        testQ_generated = collect(values(frequencies))
+        return NL79(mutations, testQ_generated)
     end
 end
 
@@ -99,7 +109,7 @@ function avg_mut(sequences)
     @inbounds for i in eachindex(sequences)
         si = sequences[i]
         for j in (i + 1):lastindex(sequences)
-            nmut += count(Mutated, si, sequences[j])[1]
+            nmut += mismatches(si, sequences[j])
             n += 1
         end
     end
diff --git a/src/seg_sites.jl b/src/seg_sites.jl
deleted file mode 100644
index f4e1a53..0000000
--- a/src/seg_sites.jl
+++ /dev/null
@@ -1,35 +0,0 @@
-# seg_sites.jl
-# =============
-#
-# Identify and count segregating sites with BioJulia data types.
-#
-# This file is a part of BioJulia.
-# License is MIT: https://github.com/BioJulia/GeneticVariation.jl/blob/master/LICENSE.md
-
-"""
-`Segregating` sites are positions which show differences (polymorphisms)
-between related genes in a sequence alignment (are not conserved).
-Segregating sites include conservative, semi-conservative and non-conservative
-mutations.
-"""
-struct Segregating <: Position end
-
-function Base.count(::Type{Segregating}, seqs::Vector{T}) where T <: Sequence
-    cont = true
-    i = 0
-    count = 0
-    @inbounds refseq = seqs[1]
-    while cont
-        i += 1
-        @inbounds refelem = refseq[i]
-        @inbounds for j in 2:lastindex(seqs)
-            seq = seqs[j]
-            cont = ifelse(lastindex(seq) == i, false, true)
-            if refelem != seq[i]
-                count += 1
-                break
-            end
-        end
-    end
-    return count, i
-end
diff --git a/src/site_counting.jl b/src/site_counting.jl
index 7ca61d7..2bdd179 100644
--- a/src/site_counting.jl
+++ b/src/site_counting.jl
@@ -1,104 +1,39 @@
-# site_counting.jl
-# ================
-#
-# Extension to the site counting framework for biological sequences.
-#
-# This file is a part of BioJulia.
-# License is MIT: https://github.com/BioJulia/GeneticVariation.jl/blob/master/LICENSE.md
-
-abstract type Mutation <: Position end
-
 """
-A `Conserved` site describes a site where two aligned nucleotides are definately
-conserved. By definately conserved this means that the symbols of the site are
-non-ambiguity symbols, and they are the same symbol.
-"""
-struct Conserved <: Mutation end
+    segregating_sites(seqs::Vector{<:BioSequence}) -> BitVector
 
+Given an alignment of sequences (each assumed to have the same length),
+returns a BitVector with `true` at positions that are segregating (i.e., variable),
+and `false` otherwise.
 """
-A `Mutated` site describes a site where two aligned nucleotides are definately
-mutated. By definately mutated this means that the symbols of the site are
-non-ambiguity symbols, and they are not the same symbol.
-"""
-struct Mutated <: Mutation end
+function segregating_sites(seqs::Vector{<:BioSequence})
+    n = isempty(seqs) ? 0 : minimum(length.(seqs))
+    segregating = falses(n)
+    for pos in 1:n
+        bases = Set(seq[pos] for seq in seqs)
+        if length(bases) > 1
+            segregating[pos] = true
+        end
+    end
+    return segregating
+end
 
 """
-A `Transition` site describes a site where two aligned nucleotides are definately
-mutated, and the type of mutation is a transition mutation.
-In other words, the symbols must not be ambiguity symbols, and they must
-be different such that they constitute a transition mutation: i.e. A<->G, or C<->T.
-"""
-struct Transition <: Mutation end
+    count_segregating_sites(seqs::Vector{<:BioSequence}) -> Int
 
+Counts and returns the number of segregating sites (i.e., variable sites)
+in the given alignment.
 """
-A `Transversion` site describes a site where two aligned nucleotides are
-definately mutated, and the type of mutation is a transversion mutation.
-In other words, the symbols must not be ambiguity symbols, and they must
-be different such that they constitute a transversion mutation: i.e. A<->C,
-A<->T, G<->T, G<->C.
-"""
-struct Transversion <: Mutation end
-
-
-# Masking functions
-# -----------------
-
-@inline function nibble_mask(::Type{Certain}, x::UInt64)
-    return nibble_mask(enumerate_nibbles(x), 0x1111111111111111)
-end
-
-@inline function nibble_mask(::Type{Certain}, a::UInt64, b::UInt64)
-    return nibble_mask(Certain, a) & nibble_mask(Certain, b)
-end
-
-
-# BioSequences.count_sites_bitpar extension
-# -----------------------------------------
-
-for A in (DNAAlphabet, RNAAlphabet)
-    @eval begin
-
-        # Counter types
-        @inline BioSequences.bp_counter_type(::Type{M}, ::Type{$A{4}}) where M <: Mutation = Tuple{Int, Int}
-        @inline BioSequences.bp_start_counter(::Type{M}, ::Type{$A{4}}) where M <: Mutation = Int(0), Int(0)
-
-        # Conserved
-        @inline bp_chunk_count(::Type{Conserved}, ::Type{$A{2}}, a::UInt64, b::UInt64) = bp_chunk_count(Match, $A{2}, a, b)
-        @inline BioSequences.bp_correct_emptyspace(::Type{Conserved}, ::Type{$A{2}}) = true
-
-        @inline function bp_chunk_count(::Type{Conserved}, ::Type{$A{4}}, a::UInt64, b::UInt64)
-            k, c = bp_chunk_count(Mutated, $A{4}, a, b)
-            return c - k, c
-        end
-
-        # Mutated
-        @inline bp_chunk_count(::Type{Mutated}, ::Type{$A{2}}, a::UInt64, b::UInt64) = bp_chunk_count(Mismatch, $A{2}, a, b)
-
-        @inline function bp_chunk_count(::Type{Mutated}, ::Type{$A{4}}, a::UInt64, b::UInt64)
-            m = nibble_mask(Certain, a, b)
-            d = (a ⊻ b) & m
-            return count_nonzero_nibbles(d), count_1111_nibbles(m)
+function count_segregating_sites(seqs::Vector{<:BioSequence})
+    n = isempty(seqs) ? 0 : minimum(map(length, seqs))
+    segregating = falses(n)
+    for pos in 1:n
+        bases = Set()
+        for seq in seqs
+            push!(bases, seq[pos])
         end
-
-        # TODO Finish transitions and transversions
-        # Transition
-        @inline function bp_chunk_count(::Type{Transition}, ::Type{$A{4}}, a::UInt64, b::UInt64)
-            m = nibble_mask(Certain, a, b)
-            d = (a ⊻ b) & m
-            remcount = count_0000_nibbles(d) # Count of the identical nucleotides.
-            tve = count_1111_nibbles(nibble_mask(0x9999999999999999, d)) # Count the 1001 transversion edge case.
-            d &= (d >> 1)
-            d &= 0x7777777777777777
-            tvc = count_ones(d)
-            tve += (16 - tvc - remcount)
+        if length(bases) > 1
+            segregating[pos] = true
         end
-
-
-        # Transversion
-        # TODO
-
     end
+    return count(segregating), n
 end
-
-@inline BioSequences.bp_update_counter(acc::Tuple{Int,Int}, up::Tuple{Int,Int}) = acc[1] + up[1], acc[2] + up[2]
-@inline BioSequences.diag_val(::Type{Tuple{Int,Int}}) = Int(0), Int(0)
diff --git a/src/vcf/header.jl b/src/vcf/header.jl
index 6539cad..625bdda 100644
--- a/src/vcf/header.jl
+++ b/src/vcf/header.jl
@@ -43,20 +43,6 @@ function Base.iterate(header::Header, i = 1)
     return i > lastindex(header.metainfo) ? nothing : (header.metainfo[i], i + 1)
 end
 
-#=
-function Base.start(header::Header)
-    return 1
-end
-
-function Base.done(header::Header, i)
-    return i > endof(header.metainfo)
-end
-
-function Base.next(header::Header, i)
-    return header.metainfo[i], i + 1
-end
-=#
-
 function Base.findall(header::Header, tag::AbstractString)
     return Base.filter(m -> isequaltag(m, tag), header.metainfo)
 end
@@ -73,9 +59,9 @@ end
 
 function Base.show(io::IO, header::Header)
     println(io, summary(header), ':')
-    tags = BioCore.metainfotag.(header.metainfo)
+    tags = metainfotag.(header.metainfo)
     println(io, "  metainfo tags: ", join(unique(tags), ' '))
-      print(io, "     sample IDs: ", join(header.sampleID, ' '))
+    print(io, "     sample IDs: ", join(header.sampleID, ' '))
 end
 
 function Base.write(io::IO, header::Header)
diff --git a/src/vcf/metainfo.jl b/src/vcf/metainfo.jl
index 8939b02..b597f67 100644
--- a/src/vcf/metainfo.jl
+++ b/src/vcf/metainfo.jl
@@ -7,46 +7,44 @@
 # License is MIT: https://github.com/BioJulia/GeneticVariation.jl/blob/master/LICENSE
 
 mutable struct MetaInfo
-    # data and filled range
-    data::Vector{UInt8}
-    filled::UnitRange{Int}
-    # true iff values are indexed by keys (e.g. <ID=...>).
-    dict::Bool
-    # indexes
-    tag::UnitRange{Int}
-    val::UnitRange{Int}
-    dictkey::Vector{UnitRange{Int}}
-    dictval::Vector{UnitRange{Int}}
+    data::Vector{UInt8}             # Raw header data as bytes
+    filled::UnitRange{Int}          # Range of data that has been "filled"
+    dict::Bool                      # True if value is a dictionary (<...>)
+    tag::UnitRange{Int}             # Range in data for the tag (after "##" until '=')
+    val::UnitRange{Int}             # Range in data for the value (after '=' until end)
+    dictkey::Vector{UnitRange{Int}} # For dict values: ranges for each inner key
+    dictval::Vector{UnitRange{Int}} # For dict values: ranges for each corresponding value
 end
 
 """
-    VCF.MetaInfo()
-Create an unfilled VCF metainfo.
+    MetaInfo()
+
+Create an unfilled VCF metainfo object.
 """
 function MetaInfo()
     return MetaInfo(UInt8[], 1:0, false, 1:0, 1:0, UnitRange{Int}[], UnitRange{Int}[])
 end
 
 """
-    VCF.MetaInfo(data::Vector{UInt8})
-Create a VCF metainfo from `data` containing a VCF header line.
-This function verifies the format and indexes fields for accessors.
-Note that the ownership of `data` is transferred to a new metainfo object.
+    MetaInfo(data::Vector{UInt8})
+
+Create a VCF metainfo object from raw data. Ownership of `data` is transferred.
+The header line is validated and indexed.
 """
 function MetaInfo(data::Vector{UInt8})
     return convert(MetaInfo, data)
 end
 
 function Base.convert(::Type{MetaInfo}, data::Vector{UInt8})
-    metainfo = MetaInfo(data, 1:0, false, 1:0, 1:0, UnitRange{Int}[], UnitRange{Int}[])
-    index!(metainfo)
-    return metainfo
+    mi = MetaInfo(data, 1:0, false, 1:0, 1:0, UnitRange{Int}[], UnitRange{Int}[])
+    index!(mi)
+    return mi
 end
 
 """
-    VCF.MetaInfo(str::AbstractString)
-Create a VCF metainfo from `str` containing a VCF header line.
-This function verifies the format and indexes fields for accessors.
+    MetaInfo(str::AbstractString)
+
+Create a VCF metainfo object from a header string.
 """
 function MetaInfo(str::AbstractString)
     return convert(MetaInfo, str)
@@ -56,155 +54,298 @@ function Base.convert(::Type{MetaInfo}, str::AbstractString)
     return MetaInfo(Vector{UInt8}(str))
 end
 
-function initialize!(metainfo::MetaInfo)
-    metainfo.filled = 1:0
-    metainfo.dict = false
-    metainfo.tag = 1:0
-    metainfo.val = 1:0
-    empty!(metainfo.dictkey)
-    empty!(metainfo.dictval)
-    return metainfo
+# =============================================================================
+# Indexing Functions
+# =============================================================================
+
+"""
+    index!(mi::MetaInfo)
+
+Index a MetaInfo object by splitting the header line into tag and value portions.
+If the value is delimited by '<' and '>', the record is treated as a dictionary;
+in that case, `index_dict!` is called.
+"""
+function index!(mi::MetaInfo)
+    # Verify the header begins with "##"
+    if length(mi.data) < 3 || mi.data[1] != UInt8('#') || mi.data[2] != UInt8('#')
+        throw(ArgumentError("Invalid meta-information format: must start with ##"))
+    end
+
+    # Mark the header as completely filled.
+    mi.filled = 1:length(mi.data)
+
+    # Find first '=' (separator between tag and value)
+    eq_index = findfirst(x -> x == UInt8('='), mi.data)
+    if eq_index === nothing
+        throw(ArgumentError("No '=' found in meta-information"))
+    end
+
+    # The tag is from after "##" until just before '='.
+    mi.tag = 3:(eq_index-1)
+    # The value starts immediately after '=' and goes until the end.
+    mi.val = (eq_index+1):length(mi.data)
+
+    # Check if the value appears to be a dictionary (enclosed in < ... >)
+    if mi.val.start <= mi.val.stop &&
+        mi.data[mi.val.start] == UInt8('<') && mi.data[mi.val.stop] == UInt8('>')
+        mi.dict = true
+        index_dict!(mi)
+    else
+        mi.dict = false
+    end
+
+    return mi
 end
 
-function datarange(metainfo::MetaInfo)
-    return metainfo.filled
+"""
+    index_dict!(mi::MetaInfo)
+
+Parse the inner content of a dictionary metainfo (between '<' and '>').
+Splits on unquoted commas and then on the first '=' in each token to compute
+byte ranges for the keys and values.
+"""
+function index_dict!(mi::MetaInfo)
+    inner_start = mi.val.start + 1
+    inner_end = mi.val.stop - 1
+    inner = mi.data[inner_start:inner_end]
+    empty!(mi.dictkey)
+    empty!(mi.dictval)
+    ranges = split_unquoted_commas(inner)
+    for r in ranges
+        part = inner[r]
+        eq_rel = findfirst(x -> x == UInt8('='), part)
+        if eq_rel === nothing
+            throw(ArgumentError("Malformed dictionary entry in metainfo"))
+        end
+        key_start = inner_start + first(r) - 1
+        key_end = key_start + eq_rel - 2
+        val_start = key_end + 2
+        val_end = inner_start + last(r) - 1
+        push!(mi.dictkey, key_start:key_end)
+        push!(mi.dictval, val_start:val_end)
+    end
 end
 
-function isfilled(metainfo::MetaInfo)
-    return !isempty(metainfo.filled)
+"""
+    initialize!(mi::MetaInfo)
+
+Reset the MetaInfo object to an unfilled state.
+"""
+function initialize!(mi::MetaInfo)
+    mi.filled = 1:0
+    mi.dict = false
+    mi.tag = 1:0
+    mi.val = 1:0
+    empty!(mi.dictkey)
+    empty!(mi.dictval)
+    return mi
 end
 
-function Base.:(==)(metainfo1::MetaInfo, metainfo2::MetaInfo)
-    if isfilled(metainfo1) == isfilled(metainfo2) == true
-        r1 = datarange(metainfo1)
-        r2 = datarange(metainfo2)
-        return length(r1) == length(r2) && memcmp(pointer(metainfo1.data, first(r1)), pointer(metainfo2.data, first(r2)), length(r1)) == 0
+# =============================================================================
+# Basic Properties and Equality
+# =============================================================================
+
+datarange(mi::MetaInfo) = mi.filled
+isfilled(mi::MetaInfo) = !isempty(mi.filled)
+
+function Base.:(==)(mi1::MetaInfo, mi2::MetaInfo)
+    if isfilled(mi1) && isfilled(mi2)
+        r1 = datarange(mi1)
+        r2 = datarange(mi2)
+        return length(r1) == length(r2) &&
+            ccall(:memcmp, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
+            pointer(mi1.data, first(r1)),
+            pointer(mi2.data, first(r2)),
+            length(r1)) == 0
     else
-        return isfilled(metainfo1) == isfilled(metainfo2) == false
+        return !isfilled(mi1) && !isfilled(mi2)
     end
 end
 
-function checkfilled(metainfo::MetaInfo)
-    if !isfilled(metainfo)
+function checkfilled(mi::MetaInfo)
+    if !isfilled(mi)
         throw(ArgumentError("unfilled VCF metainfo"))
     end
 end
 
-function MetaInfo(base::MetaInfo; tag = nothing, value = nothing)
+# =============================================================================
+# Editing/Reconstructing MetaInfo
+# =============================================================================
+
+"""
+    MetaInfo(base::MetaInfo; tag=nothing, value=nothing)
+
+Create a new VCF metainfo based on an existing one. Optionally replace the tag or value.
+If `value` is a dictionary (as AbstractDict or a vector of key-value pairs), the new meta line
+will enclose the content in angle brackets and quote parts if necessary.
+"""
+function MetaInfo(base::MetaInfo; tag=nothing, value=nothing)
     checkfilled(base)
     buf = IOBuffer()
     print(buf, "##")
-    if tag == nothing
+    if tag === nothing
         write(buf, base.data[base.tag])
     else
         print(buf, tag)
     end
     print(buf, '=')
-    if value == nothing
+    if value === nothing
         write(buf, base.data[base.val])
     elseif isa(value, String)
         print(buf, value)
     elseif isa(value, AbstractDict) || isa(value, Vector)
         print(buf, '<')
-        for (i, (key, val)) in enumerate(value)
-            if i != 1
+        # Iterate over key–value pairs.
+        firstpair = true
+        for (k, v) in value
+            if !firstpair
                 print(buf, ',')
             end
-            print(buf, key, '=')
-            if needs_quote(val)
-                print(buf, '"', escape_string(val), '"')
+            print(buf, k, '=')
+            if isa(v, String) && needs_quote(v)
+                print(buf, '"', escape_string(v), '"')
             else
-                print(buf, val)
+                print(buf, v)
             end
+            firstpair = false
         end
         print(buf, '>')
+    else
+        throw(ArgumentError("value must be String, AbstractDict, or Vector"))
     end
     return MetaInfo(take!(buf))
 end
 
-function needs_quote(val::String)
-    return occursin(" ", val) || occursin(",", val) || occursin("\"", val) || occursin("\\", val)
-end
+# A simple helper that returns true if the string should be quoted.
+needs_quote(val::String) = occursin(r"[ ,\"\\]", val)
 
-function isequaltag(metainfo::MetaInfo, tag::AbstractString)
-    checkfilled(metainfo)
-    return length(metainfo.tag) == sizeof(tag) &&
-           memcmp(pointer(metainfo.data, first(metainfo.tag)), pointer(tag), length(metainfo.tag)) == 0
+# Dummy escape function; in practice, implement proper escaping as needed.
+escape_string(val::String) = replace(val, "\"" => "\\\"")
+
+# =============================================================================
+# Equality for the Tag Field
+# =============================================================================
+
+function isequaltag(mi::MetaInfo, tag::AbstractString)
+    checkfilled(mi)
+    return length(mi.tag) == sizeof(tag) &&
+            ccall(:memcmp, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
+        pointer(mi.data, first(mi.tag)),
+        pointer(tag), length(mi.tag)) == 0
 end
 
-function BioCore.metainfotag(metainfo::MetaInfo)
-    checkfilled(metainfo)
-    return String(metainfo.data[metainfo.tag])
+# =============================================================================
+# Accessor Functions
+# =============================================================================
+
+function metainfotag(mi::MetaInfo)
+    checkfilled(mi)
+    return String(mi.data[mi.tag])
 end
 
-function BioCore.metainfoval(metainfo::MetaInfo)
-    checkfilled(metainfo)
-    return String(metainfo.data[metainfo.val])
+function metainfoval(mi::MetaInfo)
+    checkfilled(mi)
+    return String(mi.data[mi.val])
 end
 
-function Base.keys(metainfo::MetaInfo)
-    checkfilled(metainfo)
-    if !metainfo.dict
+# =============================================================================
+# Dictionary Interface
+# =============================================================================
+
+function Base.keys(mi::MetaInfo)
+    checkfilled(mi)
+    if !mi.dict
         throw(ArgumentError("not a dictionary"))
     end
-    return [String(metainfo.data[r]) for r in metainfo.dictkey]
+    return [String(mi.data[r]) for r in mi.dictkey]
 end
 
-function Base.values(metainfo::MetaInfo)
-    checkfilled(metainfo)
-    if !metainfo.dict
+function Base.values(mi::MetaInfo)
+    checkfilled(mi)
+    if !mi.dict
         throw(ArgumentError("not a dictionary"))
     end
     vals = String[]
-    for r in metainfo.dictval
-        push!(vals, extract_metainfo_value(metainfo.data, r))
+    for r in mi.dictval
+        push!(vals, extract_metainfo_value(mi.data, r))
     end
     return vals
 end
 
-function Base.getindex(metainfo::MetaInfo, key::String)
-    checkfilled(metainfo)
-    if !metainfo.dict
+function Base.getindex(mi::MetaInfo, key::String)
+    checkfilled(mi)
+    if !mi.dict
         throw(ArgumentError("not a dictionary"))
     end
-    for (i, r) in enumerate(metainfo.dictkey)
+    for (i, r) in enumerate(mi.dictkey)
         n = length(r)
-        if n == sizeof(key) && memcmp(pointer(metainfo.data, first(r)), pointer(key), n) == 0
-            return extract_metainfo_value(metainfo.data, metainfo.dictval[i])
+        if n == sizeof(key) &&
+           ccall(:memcmp, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
+            pointer(mi.data, first(r)),
+            pointer(key), n) == 0
+            return extract_metainfo_value(mi.data, mi.dictval[i])
         end
     end
     throw(KeyError(key))
 end
 
+# Helper to extract a value from the raw data range.
 function extract_metainfo_value(data::Vector{UInt8}, range::UnitRange{Int})
     lo = first(range)
     hi = last(range)
-    if length(range) ≥ 2 && data[lo] == data[hi] == UInt8('"')
+    # If the string is enclosed in quotes then remove them and unescape.
+    if length(range) ≥ 2 && data[lo] == UInt8('"') && data[hi] == UInt8('"')
         return unescape_string(String(data[lo+1:hi-1]))
     else
         return String(data[lo:hi])
     end
 end
 
-function Base.show(io::IO, metainfo::MetaInfo)
-    print(io, summary(metainfo), ':')
-    if isfilled(metainfo)
+# Dummy unescape (for demonstration, you may want a more robust version)
+function unescape_string(s::String)
+    return replace(s, "\\\"" => "\"")
+end
+
+# =============================================================================
+# Display and Write
+# =============================================================================
+
+function Base.show(io::IO, mi::MetaInfo)
+    print(io, "VCF MetaInfo (", summary(mi), "):")
+    if isfilled(mi)
         println(io)
-        println(io, "    tag: ", BioCore.metainfotag(metainfo))
+        println(io, "    tag: ", metainfotag(mi))
         print(io, "  value:")
-        if metainfo.dict
-            for (key, val) in zip(keys(metainfo), values(metainfo))
-                print(io, ' ', key, "=\"", val, '"')
+        if mi.dict
+            for (k, v) in zip(keys(mi), values(mi))
+                print(io, ' ', k, "=\"", v, "\"")
             end
         else
-            print(io, ' ', BioCore.metainfoval(metainfo))
+            print(io, ' ', metainfoval(mi))
         end
     else
         print(io, " <not filled>")
     end
 end
 
-function Base.write(io::IO, metainfo::MetaInfo)
-    checkfilled(metainfo)
-    return write(io, metainfo.data)
+function Base.write(io::IO, mi::MetaInfo)
+    checkfilled(mi)
+    return write(io, mi.data)
 end
+
+function split_unquoted_commas(data::Vector{UInt8})
+    parts = UnitRange{Int}[]
+    in_quotes = false
+    start = 1
+    for i in 1:length(data)
+        b = data[i]
+        if b == UInt8('"')
+            in_quotes = !in_quotes
+        elseif b == UInt8(',') && !in_quotes
+            push!(parts, start:(i-1))
+            start = i + 1
+        end
+    end
+    push!(parts, start:length(data))
+    return parts
+end
\ No newline at end of file
diff --git a/src/vcf/reader.jl b/src/vcf/reader.jl
index c565f7a..fba88eb 100644
--- a/src/vcf/reader.jl
+++ b/src/vcf/reader.jl
@@ -6,291 +6,126 @@
 # This file is a part of BioJulia.
 # License is MIT: https://github.com/BioJulia/GeneticVariation.jl/blob/master/LICENSE
 
-mutable struct Reader <: BioCore.IO.AbstractReader
-    state::BioCore.Ragel.State
+mutable struct Reader{S <: TranscodingStream} <: BioGenerics.IO.AbstractReader
+    state::State{S}
     header::Header
+end
+
+function Reader(state::State{S}) where {S<:TranscodingStream}
+
+    rdr = Reader(state,Header())
 
-    function Reader(input::BufferedStreams.BufferedInputStream)
-        reader = new(BioCore.Ragel.State(vcf_header_machine.start_state, input), Header())
-        readheader!(reader)
-        reader.state.cs = vcf_body_machine.start_state
-        return reader
+    cs, ln = readheader!(rdr.state.stream, rdr.header, (1, rdr.state.linenum))
+    rdr.state.state = 1 # Get the reader ready to read the body.
+    rdr.state.linenum = ln
+    rdr.state.filled = false
+
+    # Allow -1, 0, or -2 as valid termination states.
+    if cs != -1 && cs != 0 && cs != -2 && cs != -58
+        throw(ArgumentError("Malformed VCF file header at line $(ln). Machine failed to transition from state $(cs)."))
+    end
+    if !isempty(rdr.header.metainfo)
+        map(index!,rdr.header.metainfo)
     end
+    return rdr
 end
 
 """
     VCF.Reader(input::IO)
-Create a data reader of the VCF file format.
+
+Create a data reader of the SAM file format.
+
 # Arguments
 * `input`: data source
 """
 function Reader(input::IO)
-    return Reader(BufferedStreams.BufferedInputStream(input))
+
+    if input isa TranscodingStream
+        return Reader(State(input, 1, 1, false))
+    end
+
+    stream = TranscodingStreams.NoopStream(input)
+
+    return Reader(State(stream, 1, 1, false))
+
 end
 
-function Base.eltype(::Type{Reader})
+function Base.eltype(::Type{<:Reader})
     return Record
 end
 
-function BioCore.IO.stream(reader::Reader)
+function BioGenerics.IO.stream(reader::Reader)
     return reader.state.stream
 end
 
 """
-    header(reader::VCF.Reader)::VCF.Header
+    header(reader::Reader)::Header
+
 Get the header of `reader`.
 """
-function header(reader::Reader)
+function header(reader::Reader)::Header
     return reader.header
 end
 
-function BioCore.header(reader::Reader)
-    return header(reader)
+function Base.close(reader::Reader)
+    if reader.state.stream isa IO
+        close(reader.state.stream)
+    end
+    return nothing
 end
 
-# VCF v4.3
-@info "Compiling VCF parser..."
-const vcf_metainfo_machine, vcf_record_machine, vcf_header_machine, vcf_body_machine = (function ()
-    cat = Automa.RegExp.cat
-    rep = Automa.RegExp.rep
-    alt = Automa.RegExp.alt
-    opt = Automa.RegExp.opt
-    delim(x, sep) = opt(cat(x, rep(cat(sep, x))))
-
-    # The 'fileformat' field is required and must be the first line.
-    fileformat = let
-        key = cat("fileformat")
-        key.actions[:enter] = [:mark1]
-        key.actions[:exit]  = [:metainfo_tag]
-
-        version = re"[!-~]+"
-        version.actions[:enter] = [:mark2]
-        version.actions[:exit]  = [:metainfo_val]
-
-        cat("##", key, '=', version)
+#= function index!(mi::MetaInfo)
+    stream = TranscodingStreams.NoopStream(IOBuffer(mi.data))
+    cs = index!(stream, mi)
+    if cs != 0
+        throw(ArgumentError("Invalid VCF metadata. Machine failed to transition from state $(cs)."))
     end
-    fileformat.actions[:enter] = [:anchor]
-    fileformat.actions[:exit]  = [:metainfo]
-
-    # All kinds of meta-information line after 'fileformat' are handled here.
-    metainfo = let
-        tag = re"[0-9A-Za-z_\.]+"
-        tag.actions[:enter] = [:mark1]
-        tag.actions[:exit]  = [:metainfo_tag]
-
-        str = re"[ -;=-~][ -~]*"  # does not starts with '<'
-        str.actions[:enter] = [:mark2]
-        str.actions[:exit]  = [:metainfo_val]
-
-        dict = let
-            dictkey = re"[0-9A-Za-z_]+"
-            dictkey.actions[:enter] = [:mark1]
-            dictkey.actions[:exit]  = [:metainfo_dict_key]
-
-            dictval = let
-                quoted   = cat('"', rep(alt(re"[ !#-[\]-~]", "\\\"", "\\\\")), '"')
-                unquoted = rep(re"[ -~]" \ re"[\",>]")
-                alt(quoted, unquoted)
-            end
-            dictval.actions[:enter] = [:mark1]
-            dictval.actions[:exit]  = [:metainfo_dict_val]
-
-            cat('<', delim(cat(dictkey, '=', dictval), ','), '>')
-        end
-        dict.actions[:enter] = [:mark2]
-        dict.actions[:exit]  = [:metainfo_val]
-
-        cat("##", tag, '=', alt(str, dict))
+    return mi
+end =#
+
+function index!(record::Record)
+    stream = TranscodingStreams.NoopStream(IOBuffer(record.data))
+    cs = index!(stream, record)
+    if cs != 0
+        throw(ArgumentError("Invalid VCF record. Machine failed to transition from state $(cs)."))
     end
-    metainfo.actions[:enter] = [:anchor]
-    metainfo.actions[:exit]  = [:metainfo]
-
-    # The header line.
-    header = let
-        sampleID = re"[ -~]+"
-        sampleID.actions[:enter] = [:mark1]
-        sampleID.actions[:exit]  = [:header_sampleID]
+    return record
+end
 
-        cat("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", opt(re"\tFORMAT" * rep(re"\t" * sampleID)))
+function Base.iterate(reader::Reader, nextone::Record=Record())
+    if BioGenerics.IO.tryread!(reader, nextone) === nothing
+        return nothing
     end
-    header.actions[:enter] = [:anchor]
-
-    # Data lines (fixed fields and variable genotype fields).
-    record = let
-        chrom = re"[!-9;-~]+"  # no colon
-        chrom.actions[:enter] = [:mark]
-        chrom.actions[:exit]  = [:record_chrom]
-
-        pos = re"[0-9]+|\."
-        pos.actions[:enter] = [:mark]
-        pos.actions[:exit]  = [:record_pos]
-
-        id = let
-            elm = re"[!-:<-~]+" \ cat('.')
-            elm.actions[:enter] = [:mark]
-            elm.actions[:exit]  = [:record_id]
-
-            alt(delim(elm, ';'), '.')
-        end
-
-        ref = re"[!-~]+"
-        ref.actions[:enter] = [:mark]
-        ref.actions[:exit]  = [:record_ref]
-
-        alt′ = let
-            elm = re"[!-+--~]+" \ cat('.')
-            elm.actions[:enter] = [:mark]
-            elm.actions[:exit]  = [:record_alt]
-
-            alt(delim(elm, ','), '.')
-        end
-
-        qual = re"[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?|NaN|[-+]Inf|\."
-        qual.actions[:enter] = [:mark]
-        qual.actions[:exit]  = [:record_qual]
-
-        filter = let
-            elm = re"[!-:<-~]+" \ cat('.')
-            elm.actions[:enter] = [:mark]
-            elm.actions[:exit]  = [:record_filter]
-
-            alt(delim(elm, ';'), '.')
-        end
-
-        info = let
-            key = re"[A-Za-z_][0-9A-Za-z_.]*"
-            key.actions[:enter] = [:mark]
-            key.actions[:exit]  = [:record_info_key]
-
-            val = opt(cat('=', re"[ -:<-~]+"))
-
-            alt(delim(cat(key, val), ';'), '.')
-        end
+    return copy(nextone), empty!(nextone)
+end
 
-        format = let
-            elm = re"[A-Za-z_][0-9A-Za-z_.]*"
-            elm.actions[:enter] = [:mark]
-            elm.actions[:exit]  = [:record_format]
+"""
+    read!(rdr::Reader, rec::Record)
 
-            alt(delim(elm, ':'), '.')
-        end
+Read a `Record` into `rec`; overwriting or adding to existing field values.
+It is assumed that `rec` is already initialized or empty.
+"""
+function Base.read!(rdr::Reader, record::Record)
 
-        genotype = let
-            elm = re"[ -9;-~]+"  # no colon
-            elm.actions[:enter] = [:mark]
-            elm.actions[:exit]  = [:record_genotype_elm]
+    cs, ln, found = readrecord!(rdr.state.stream, record, (rdr.state.state, rdr.state.linenum))
 
-            delim(elm, ':')
-        end
-        genotype.actions[:enter] = [:record_genotype]
+    rdr.state.state = cs
+    rdr.state.linenum = ln
+    rdr.state.filled = found
 
-        cat(
-            chrom,  '\t',
-            pos,    '\t',
-            id,     '\t',
-            ref,    '\t',
-            alt′,   '\t',
-            qual,   '\t',
-            filter, '\t',
-            info,
-            opt(cat('\t', format, rep(cat('\t', genotype)))))
+    if found
+        return record
     end
-    record.actions[:enter] = [:anchor]
-    record.actions[:exit]  = [:record]
-
-    # A newline can be either a CR+LF or a LF.
-    newline = let
-        lf = re"\n"
-        lf.actions[:enter] = [:countline]
 
-        cat(opt('\r'), lf)
+    if cs == 0 || eof(rdr.state.stream)
+        throw(EOFError())
     end
 
-    # The VCF file format (header and body part).
-    vcfheader = cat(
-        fileformat, newline,
-        rep(cat(metainfo, newline)),
-        header, newline)
-    vcfheader.actions[:final] = [:vcfheader]
+    if cs != -11
+        throw(ArgumentError("Malformed VCF file record at line $(ln). Machine failed to transition from state $(cs)."))
+    end
 
-    vcfbody = rep(cat(record, newline))
+end
 
-    return map(Automa.compile, (metainfo, record, vcfheader, vcfbody))
-end)()
 
-const vcf_metainfo_actions = Dict(
-    :metainfo_tag      => :(record.tag = (mark1:p-1) .- offset),
-    :metainfo_val      => :(record.val = (mark2:p-1) .- offset; record.dict = data[mark2] == UInt8('<')),
-    :metainfo_dict_key => :(push!(record.dictkey, (mark1:p-1) .- offset)),
-    :metainfo_dict_val => :(push!(record.dictval, (mark1:p-1) .- offset)),
-    :metainfo          => quote
-        BioCore.ReaderHelper.resize_and_copy!(record.data, data, offset+1:p-1)
-        record.filled = (offset+1:p-1) .- offset
-    end,
-    :anchor            => :(),
-    :mark1             => :(mark1 = p),
-    :mark2             => :(mark2 = p))
-eval(
-    BioCore.ReaderHelper.generate_index_function(
-        MetaInfo,
-        vcf_metainfo_machine,
-        :(mark1 = mark2 = offset = 0),
-        vcf_metainfo_actions))
-eval(
-    BioCore.ReaderHelper.generate_readheader_function(
-        Reader,
-        MetaInfo,
-        vcf_header_machine,
-        :(mark1 = mark2 = offset = 0),
-        merge(vcf_metainfo_actions, Dict(
-            :metainfo => quote
-                BioCore.ReaderHelper.resize_and_copy!(record.data, data, BioCore.ReaderHelper.upanchor!(stream):p-1)
-                record.filled = (offset+1:p-1) .- offset
-                @assert isfilled(record)
-                push!(reader.header.metainfo, record)
-                BioCore.ReaderHelper.ensure_margin!(stream)
-                record = MetaInfo()
-            end,
-            :header_sampleID => :(push!(reader.header.sampleID, String(data[mark1:p-1]))),
-            :vcfheader => :(finish_header = true; @escape),
-            :countline => :(linenum += 1),
-            :anchor    => :(BioCore.ReaderHelper.anchor!(stream, p); offset = p - 1)))))
 
-const vcf_record_actions = Dict(
-    :record_chrom        => :(record.chrom = (mark:p-1) .- offset),
-    :record_pos          => :(record.pos = (mark:p-1) .- offset),
-    :record_id           => :(push!(record.id, (mark:p-1) .- offset)),
-    :record_ref          => :(record.ref = (mark:p-1) .- offset),
-    :record_alt          => :(push!(record.alt, (mark:p-1) .- offset)),
-    :record_qual         => :(record.qual = (mark:p-1) .- offset),
-    :record_filter       => :(push!(record.filter, (mark:p-1) .- offset)),
-    :record_info_key     => :(push!(record.infokey, (mark:p-1) .- offset)),
-    :record_format       => :(push!(record.format, (mark:p-1) .- offset)),
-    :record_genotype     => :(push!(record.genotype, UnitRange{Int}[])),
-    :record_genotype_elm => :(push!(record.genotype[end], (mark:p-1) .- offset)),
-    :record              => quote
-        BioCore.ReaderHelper.resize_and_copy!(record.data, data, 1:p-1)
-        record.filled = (offset+1:p-1) .- offset
-    end,
-    :anchor              => :(),
-    :mark                => :(mark = p))
-eval(
-    BioCore.ReaderHelper.generate_index_function(
-        Record,
-        vcf_record_machine,
-        :(mark = offset = 0),
-        vcf_record_actions))
-eval(
-    BioCore.ReaderHelper.generate_read_function(
-        Reader,
-        vcf_body_machine,
-        :(mark = offset = 0),
-        merge(vcf_record_actions, Dict(
-            :record    => quote
-                BioCore.ReaderHelper.resize_and_copy!(record.data, data, BioCore.ReaderHelper.upanchor!(stream):p-1)
-                record.filled = (offset+1:p-1) .- offset
-                found_record = true
-                @escape
-            end,
-            :countline => :(linenum += 1),
-            :anchor    => :(BioCore.ReaderHelper.anchor!(stream, p); offset = p - 1)))))
diff --git a/src/vcf/readrecord.jl b/src/vcf/readrecord.jl
new file mode 100644
index 0000000..f61d313
--- /dev/null
+++ b/src/vcf/readrecord.jl
@@ -0,0 +1,293 @@
+# Automa.jl generated readrecord! and readmetainfo! functions
+# ========================================
+
+import Automa: @re_str, rep, onexit!, onenter!, CodeGenContext, generate_reader, compile, RegExp
+
+# --- Automata Machine Constants and Regex Definitions ---
+const vcf_machine_metainfo, vcf_machine_record, vcf_machine_header, vcf_machine_body, vcf_machine = let
+    # Aliases for regex primitives from Automa.RegExp.
+    ralt = RegExp.alt
+    cat = RegExp.cat
+    rep = RegExp.rep
+    opt = RegExp.opt
+    delim(x, sep) = opt(cat(x, rep(cat(sep, x))))
+    newline = "\r?" * onenter!(re"\n", :countline)
+
+
+    #### 1. FILEFORMAT Line ####
+    fileformat = let
+        key = onexit!(onenter!(cat("fileformat"), :pos1), :metainfo_tag)
+        version = onexit!(onenter!(re"[!-~]+", :pos2), :metainfo_val)
+        cat("##", key, '=', version)
+    end
+    onexit!(onenter!(fileformat, :mark), :metainfo)
+
+    #### 2. META-INFORMATION Lines ####
+    metainfo = let
+        tag = onexit!(onenter!(re"[0-9A-Za-z_\.]+", :pos1), :metainfo_tag)
+        simple_val = re"[ -;=-~][ -~]*"
+        dict = let
+            dictkey = onexit!(onenter!(re"[0-9A-Za-z_]+", :pos1), :metainfo_dict_key)
+            # Removed the extra onenter! here to avoid oxverwriting pos2.
+            dictval = let 
+                quoted = cat('"', rep(ralt(re"[ !#-[\]-~]", "\\\"", "\\\\")), '"')
+                unquoted = rep(re"[ -~]" \ re"[\",>]")
+                ralt(quoted, unquoted)
+            end
+            onexit!(onenter!(dictval, :pos1), :metainfo_dict_val)
+            cat('<', delim(cat(dictkey, '=', dictval), ','), '>')
+        end
+        onexit!(onenter!(dict,:pos2), :metainfo_val)
+        cat("##", tag, '=', ralt(simple_val, dict))
+        #cat("##", tag, '=', val)
+    end
+    onexit!(onenter!(metainfo, :mark), :metainfo)
+
+    # 3. Header line with sample IDs
+    header_line = let
+        sampleID = onexit!(onenter!(re"[^\t\r\n]+", :mark_sampleid), :header_sampleID)
+        cat(
+            "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO",
+            opt(cat("\tFORMAT", rep(cat('\t', sampleID))))
+        )
+    end
+    onenter!(header_line, :mark)
+
+    record = let
+        # CHROM field: allowed printable characters (except those that conflict with delimiters).
+        chrom = onexit!(onenter!(re"[!-9;-~]+", :pos), :record_chrom)
+        # POS field: a number or the missing field (“.”).
+        pos_field = onexit!(onenter!(re"[0-9]+|\.", :pos), :record_pos)
+        # ID field: either missing “.” or a nonmissing id consisting of allowed characters plus a dot.
+        id_field = let
+            missing_id = onexit!(onenter!(re"\.", :pos), :record_id)
+            nonmissing_id = onexit!(onenter!(re"[!-:<-~]+", :pos), :record_id)
+            ralt(missing_id,nonmissing_id)
+        end
+
+        # REF field.
+        ref_field = onexit!(onenter!(re"[!-~]+", :pos), :record_ref)
+        # ALT field: either missing “.” or nonmissing allele.
+        # To remove ambiguity, the branch for missing ALT (a literal period)
+        # requires that it be immediately followed by a tab, newline, or end–of–input.
+        alt_field = let
+            alt_is_missing = onexit!(onenter!(re"\.", :pos), :record_alt)
+            nonmissing_alt = onexit!(onenter!(re"[!-+--~]+", :pos), :record_alt)
+            ralt(delim(nonmissing_alt,','),alt_is_missing)
+        end
+        # QUAL field: numeric (including scientific), "NaN", ±Inf, or “.”
+        qual = onexit!(onenter!(re"[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?|NaN|[-+]Inf|\.", :pos), :record_qual)
+        # FILTER field.
+        filter_field = let
+            missing_filter = onexit!(onenter!(re"\.", :pos), :record_filter)
+            nonmissing_filter = onexit!(onenter!(re"[!-:<-~]+", :pos), :record_filter)
+            ralt(missing_filter, delim(nonmissing_filter,';'))
+        end
+        # INFO field: a list of key[=value] pairs delimited by ";" or the missing field “.”
+        info_field = let
+            key = onexit!(onenter!(re"[A-Za-z_][0-9A-Za-z_.]*", :pos), :record_info_key)
+            val = opt(cat('=', re"[ -:<-~]+"))
+            ralt(delim(cat(key, val), ';'), re"\.")
+        end
+        # FORMAT field: similar structure with ':' as a delimiter.
+        format_field = let
+            elm = onexit!(onenter!(re"[A-Za-z_][0-9A-Za-z_.]*", :pos), :record_format)
+            ralt(delim(elm, ':'), re"\.")
+        end
+        # GENOTYPE fields: vcfple genotype entries separated by ':'.
+        genotype = let
+            elm = onexit!(onenter!(re"[ -9;-~]+", :pos), :record_genotype_elm)
+            delim(elm, ':')
+        end
+        onenter!(genotype, :record_genotype)
+        # Compose the full record line: mandatory fields followed by optional FORMAT/GENOTYPE.
+        cat(
+            chrom, '\t',
+            pos_field, '\t',
+            id_field, '\t',
+            ref_field, '\t',
+            alt_field, '\t',
+            qual, '\t',
+            filter_field, '\t',
+            info_field,
+            opt(cat('\t', format_field, rep(cat('\t', genotype))))
+        )
+    end
+    onexit!(onenter!(record, :mark), :record)
+
+    header = onexit!(
+        cat(fileformat, newline, rep(metainfo * newline), header_line, newline),
+        :header
+    )
+
+    body = onexit!(rep(record * newline), :body)
+    vcf = header * body
+
+    map(compile, (metainfo, record, header, body, vcf))
+end
+
+function appendfrom!(dst, dpos, src, spos, n)
+    if length(dst) < dpos + n - 1
+        resize!(dst, dpos + n - 1)
+    end
+    unsafe_copyto!(dst, dpos, src, spos, n)
+    return dst
+end
+
+# --- Automata Actions for VCF Meta-information ---
+const vcf_actions_metainfo = Dict(
+    :mark => :(@mark),
+    :pos1 => :(pos1 = @relpos(p)),
+    :pos2 => :(pos2 = @relpos(p)),
+    :metainfo_tag => :(metainfo.tag = pos1:@relpos(p - 1)),
+    :metainfo_val => :(metainfo.val = pos2:@relpos(p - 1); metainfo.dict = data[pos2] == UInt8('<')),
+    :metainfo_dict_key => :(push!(metainfo.dictkey, pos1:@relpos(p - 1))),
+    :metainfo_dict_val => :(push!(metainfo.dictval, pos1:@relpos(p - 1))),
+    :metainfo => quote
+        appendfrom!(metainfo.data, 1, data, @markpos, p - @markpos)
+        metainfo.filled = 1:(p-@markpos)
+    end
+)
+
+
+const vcf_actions_header = merge(
+    vcf_actions_metainfo,
+    Dict(
+        :countline => :(linenum += 1),
+        :metainfo => quote
+            $(vcf_actions_metainfo[:metainfo])
+            push!(header, metainfo)
+            metainfo = MetaInfo()
+        end,
+        :mark_sampleid => :(@mark),  # Mark position where sample ID actually starts
+        :header_sampleID => :(push!(header.sampleID, String(data[@markpos():p-1]))),
+        :header => :(@escape)
+    )
+)
+# --- Automata Actions for VCF Data Record ---
+const vcf_actions_record = Dict(
+    :mark => :(@mark),
+    :pos => :(pos = @relpos(p)),
+    :record_chrom => :(record.chrom = (pos:@relpos(p - 1)); record.ncols += 1),
+    :record_pos => :(record.pos = (pos:@relpos(p - 1)); record.ncols += 1),
+    :record_id => :(push!(record.id, (pos:@relpos(p - 1))); record.ncols += 1),
+    :record_ref => :(record.ref = (pos:@relpos(p - 1)); record.ncols += 1),
+    :record_alt => :(push!(record.alt, (pos:@relpos(p - 1))); record.ncols += 1),
+    :record_qual => :(record.qual = (pos:@relpos(p - 1)); record.ncols += 1),
+    :record_filter => :(push!(record.filter, (pos:@relpos(p - 1))); record.ncols += 1),
+    :record_info_key => :(push!(record.infokey, (pos:@relpos(p - 1))); record.ncols += 1),
+    :record_format => :(push!(record.format, (pos:@relpos(p - 1))); record.ncols += 1),
+    :record_genotype => :(push!(record.genotype, UnitRange{Int}[]); record.ncols += 1),
+    :record_genotype_elm => :(push!(record.genotype[end], (pos:@relpos(p - 1)))),
+    :record => quote
+        appendfrom!(record.data, 1, data, @markpos, p - @markpos)
+        record.filled = 1:(p-@markpos)
+    end
+)
+
+const vcf_actions_body = merge(
+    vcf_actions_record,
+    Dict(
+        :record => quote
+            found_record = true
+            $(vcf_actions_record[:record])
+            @escape
+        end,
+        :countline => :(linenum += 1),
+        :body => :(@escape)
+    )
+)
+
+const vcf_context = CodeGenContext(generator=:goto)
+
+const vcf_initcode_metainfo = quote
+    pos1 = 0
+    pos2 = 0
+end
+
+const vcf_initcode_header = quote
+    $(vcf_initcode_metainfo)
+    metainfo = MetaInfo()
+    cs, linenum = state
+end
+
+const vcf_initcode_record = quote
+    pos = 0
+    empty!(record)
+end
+
+const vcf_initcode_body = quote
+    $(vcf_initcode_record)
+    found_record = false
+    cs, linenum = state
+end
+
+generate_reader(
+    :index!,
+    vcf_machine_metainfo,
+    arguments=(:(metainfo::MetaInfo),),
+    actions=vcf_actions_metainfo,
+    context=vcf_context,
+    initcode=vcf_initcode_metainfo,
+) |> eval
+
+const vcf_returncode_header = quote
+    return cs, linenum
+end
+
+generate_reader(
+    :readheader!,
+    vcf_machine_header,
+    arguments=(:(header::VCF.Header), :(state::Tuple{Int,Int})),
+    actions=vcf_actions_header,
+    context=vcf_context,
+    initcode=vcf_initcode_header,  # using the updated init code without offset
+    returncode=vcf_returncode_header,
+    errorcode=quote
+        # Accept states -2, -1, or 0 as a signal for proper header termination.
+        if cs in (-58,-2, -1, 0)
+
+            @goto __return__
+        else
+            error("Expected input byte after vcf header, got state $(cs)")
+        end
+    end
+) |> eval
+
+const vcf_loopcode_body = quote
+    if found_record
+        @goto __return__
+    end
+end
+
+generate_reader(
+    :index!,
+    vcf_machine_record,
+    arguments=(:(record::Record),),
+    actions=vcf_actions_record,
+    context=vcf_context,
+    initcode=vcf_initcode_record,
+) |> eval
+
+
+const vcf_returncode_body = quote
+    return cs, linenum, found_record
+end
+
+generate_reader(
+    :readrecord!,
+    vcf_machine_body,
+    arguments=(:(record::Record), :(state::Tuple{Int,Int})),
+    actions=vcf_actions_body,
+    context=vcf_context,
+    initcode=vcf_initcode_body,
+    loopcode=vcf_loopcode_body,
+    returncode=vcf_returncode_body,
+    errorcode=quote
+        if cs in (-2, -1, 0)
+            @goto __return__
+        else
+            error("Malformed VCF file body. Machine failed to transition from state $(cs).")
+        end
+    end
+) |> eval
\ No newline at end of file
diff --git a/src/vcf/record.jl b/src/vcf/record.jl
index 7e67ee5..b0c04d8 100644
--- a/src/vcf/record.jl
+++ b/src/vcf/record.jl
@@ -6,11 +6,14 @@
 # This file is a part of BioJulia.
 # License is MIT: https://github.com/BioJulia/GeneticVariation.jl/blob/master/LICENSE
 
+# -- Data structure --------------------------------------------------
+
 mutable struct Record
     # data and filled range
     data::Vector{UInt8}
     filled::UnitRange{Int}
-    # indexes
+    ncols::Int
+    # indexes into data vector for fields (all fields are ranges or arrays of ranges)
     chrom::UnitRange{Int}
     pos::UnitRange{Int}
     id::Vector{UnitRange{Int}}
@@ -23,98 +26,29 @@ mutable struct Record
     genotype::Vector{Vector{UnitRange{Int}}}
 end
 
+# Default (empty/unfilled) constructor
 """
     VCF.Record()
+
 Create an unfilled VCF record.
 """
 function Record()
     return Record(
-        # data and filled
-        UInt8[], 1:0,
-        # chrom-alt
-        1:0, 1:0, UnitRange{Int}[], 1:0, UnitRange{Int}[],
-        # qual-genotype
-        1:0, UnitRange{Int}[], UnitRange{Int}[], UnitRange{Int}[], UnitRange{Int}[])
-end
-
-"""
-    VCF.Record(data::Vector{UInt8})
-Create a VCF object from `data` containing a VCF record.
-This function verifies the format and indexes fields for accessors.
-Note that the ownership of `data` is transferred to a new record.
-"""
-function Record(data::Vector{UInt8})
-    return convert(Record, data)
-end
-
-function Base.convert(::Type{Record}, data::Vector{UInt8})
-    record = Record(
-        # data and filled
-        data, 1:0,
-        # chrom-alt
+        UInt8[], 1:0, 0,
         1:0, 1:0, UnitRange{Int}[], 1:0, UnitRange{Int}[],
-        # qual-genotype
-        1:0, UnitRange{Int}[], UnitRange{Int}[], UnitRange{Int}[], UnitRange{Int}[])
-    index!(record)
-    return record
+        1:0, UnitRange{Int}[], UnitRange{Int}[], UnitRange{Int}[], UnitRange{Int}[]
+    )
 end
 
+#copy constructor
 """
-    VCF.Record(str::AbstractString)
-Create a VCF object from `str` containing a VCF record.
-This function verifies the format and indexes fields for accessors.
+    Record(record::Record)
+Create a copy of the given VCF record.
 """
-function Record(str::AbstractString)
-    return convert(Record, str)
-end
-
-function Base.convert(::Type{Record}, str::AbstractString)
-    return Record(Vector{UInt8}(str))
-end
-
-function initialize!(record::Record)
-    record.filled = 1:0
-    record.chrom = 1:0
-    record.pos = 1:0
-    empty!(record.id)
-    record.ref = 1:0
-    empty!(record.alt)
-    record.qual = 1:0
-    empty!(record.filter)
-    empty!(record.infokey)
-    empty!(record.format)
-    empty!(record.genotype)
-    return record
-end
-
-function isfilled(record::Record)
-    return !isempty(record.filled)
-end
-
-function datarange(record::Record)
-    return record.filled
-end
-
-function Base.:(==)(record1::Record, record2::Record)
-    if isfilled(record1) == isfilled(record2) == true
-        r1 = datarange(record1)
-        r2 = datarange(record2)
-        return length(r1) == length(r2) && memcmp(pointer(record1.data, first(r1)), pointer(record2.data, first(r2)), length(r1)) == 0
-    else
-        return isfilled(record1) == isfilled(record2) == false
-    end
-end
-
-function checkfilled(record::Record)
-    if !isfilled(record)
-        throw(ArgumentError("unfilled VCF record"))
-    end
-end
-
 function Record(base::Record;
-                   chrom=nothing, pos=nothing, id=nothing,
-                   ref=nothing, alt=nothing, qual=nothing,
-                   filter=nothing, info=nothing, genotype=nothing)
+    chrom=nothing, pos=nothing, id=nothing,
+    ref=nothing, alt=nothing, qual=nothing,
+    filter=nothing, info=nothing, genotype=nothing)
     checkfilled(base)
     buf = IOBuffer()
 
@@ -305,48 +239,159 @@ function Record(base::Record;
     return Record(take!(buf))
 end
 
-function vcfformat(val)
-    return string(val)
+# Constructor from a data vector, delegating to convert
+"""
+    Record(data::Vector{UInt8})
+
+Create a VCF record from the given data vector.
+This will index the record and validate its structure.
+"""
+function Record(data::Vector{UInt8})
+    return convert(Record, data)
 end
 
-function vcfformat(val::Vector)
-    return join(map(vcfformat, val), ',')
+# Conversion from raw data vector to a Record
+function Base.convert(::Type{Record}, data::Vector{UInt8})
+    rec = Record(
+        data, 1:0, 0,
+        1:0, 1:0, UnitRange{Int}[], 1:0, UnitRange{Int}[],
+        1:0, UnitRange{Int}[], UnitRange{Int}[], UnitRange{Int}[], UnitRange{Int}[]
+    )
+    index!(rec)  # note: index! must update rec.filled and all field ranges
+    return rec
+end
+
+# Constructor from a string (by converting to Vector{UInt8})
+"""
+    Record(str::AbstractString)
+
+Create a VCF record from the given string.
+"""
+function Record(str::AbstractString)
+    return convert(Record, str)
+end
+
+function Base.convert(::Type{Record}, str::AbstractString)
+    return convert(Record, Vector{UInt8}(str))
+end
+
+function Base.empty!(record::Record)
+    record.filled = 1:0
+    record.chrom = 1:0
+    record.pos = 1:0
+    empty!(record.id)
+    record.ref = 1:0
+    empty!(record.alt)
+    record.qual = 1:0
+    empty!(record.filter)
+    empty!(record.infokey)
+    empty!(record.format)
+    empty!(record.genotype)
+    return record
+end
+
+# Check whether record has been filled (i.e. indexed)
+function isfilled(record::Record)
+    return !isempty(record.filled)
 end
 
-function Base.copy(record::Record)
+# Return the range of valid data within the record
+function datarange(record::Record)
+    return record.filled
+end
+
+# Equality: compare filled regions
+function Base.:(==)(record1::Record, record2::Record)
+    if isfilled(record1) == isfilled(record2) == true
+        r1 = record1.filled
+        r2 = record2.filled
+        return length(r1) == length(r2) && memcmp(pointer(record1.data, first(r1)), pointer(record2.data, first(r2)), length(r1)) == 0
+    end
+
+    return isfilled(record1) == isfilled(record2) == false
+end
+
+# A helper for formatting INFO field values (simply converts to string)
+#vcfformat(val) = string(val)
+#vcfformat(val::Vector) = join(map(vcfformat, val), ",")
+
+function Base.copy(rec::Record)
     return Record(
-        copy(record.data),
-        record.filled,
-        record.chrom,
-        record.pos,
-        copy(record.id),
-        record.ref,
-        copy(record.alt),
-        record.qual,
-        copy(record.filter),
-        copy(record.infokey),
-        copy(record.format),
-        deepcopy(record.genotype))
+        rec.data[rec.filled],
+        rec.filled,
+        rec.ncols,
+        rec.chrom,
+        rec.pos,
+        copy(rec.id),
+        rec.ref,
+        copy(rec.alt),
+        rec.qual,
+        copy(rec.filter),
+        copy(rec.infokey),
+        copy(rec.format),
+        deepcopy(rec.genotype)
+    )
 end
 
 function Base.write(io::IO, record::Record)
-    checkfilled(record)
-    return write(io, record.data)
+    return unsafe_write(io, pointer(record.data, first(record.filled)), length(record.filled))
+end
+function Base.print(io::IO, record::Record)
+    write(io, record)
+    return nothing
 end
 
+function Base.show(io::IO, record::Record)
+    print(io, "VCF Record")
+    if isfilled(record)
+        println(io)
+        println(io, "  chromosome: ", haschrom(record) ? chrom(record) : "<missing>")
+        println(io, "      position: ", haspos(record) ? pos(record) : "<missing>")
+        println(io, "  identifier(s): ", hasid(record) ? join(id(record), " ") : "<missing>")
+        println(io, "   reference: ", hasref(record) ? ref(record) : "<missing>")
+        println(io, "   alternate: ", hasalt(record) ? join(alt(record), " ") : "<missing>")
+        println(io, "     quality: ", hasqual(record) ? qual(record) : "<missing>")
+        println(io, "      filter: ", hasfilter(record) ? join(filter(record), " ") : "<missing>")
+        print(io, "     information: ")
+        if hasinfo(record)
+            for (key, val) in info(record)
+                print(io, key)
+                if !isempty(val)
+                    print(io, '=', val, ' ')
+                else
+                    print(io, ' ')
+                end
+            end
+        else
+            print(io, "<missing>")
+        end
+        println(io)
+        print(io, "       format: ", hasformat(record) ? join(format(record), " ") : "<missing>")
+        if hasformat(record)
+            println(io)
+            print(io, "     genotype:")
+            for i in 1:length(record.genotype)
+                print(io, " [$(i)] ", begin
+                    g = genotype(record, i)
+                    isempty(g) ? "." : join(g, " ")
+                end)
+            end
+        end
+    else
+        print(io, " <not filled>")
+    end
+end
 
 # Accessor functions
 # ------------------
 
 """
     chrom(record::Record)::String
-Get the chromosome name of `record`.
+
+Return the chromosome as a String. Throws MissingFieldException if missing.
 """
 function chrom(record::Record)::String
     checkfilled(record)
-    if ismissing(record, record.chrom)
-        missingerror(:chrom)
-    end
     return String(record.data[record.chrom])
 end
 
@@ -356,15 +401,12 @@ end
 
 """
     pos(record::Record)::Int
-Get the reference position of `record`.
+
+Return the reference position as an Int. Throws MissingFieldException if missing.
 """
 function pos(record::Record)::Int
     checkfilled(record)
-    if ismissing(record, record.pos)
-        missingerror(:pos)
-    end
-    # TODO: no-copy accessor
-    return parse(Int, String(record.data[record.pos]))
+    return unsafe_parse_decimal(Int, record.data, record.pos)
 end
 
 function haspos(record::Record)
@@ -373,23 +415,26 @@ end
 
 """
     id(record::Record)::Vector{String}
-Get the identifiers of `record`.
+
+Return the identifiers from the record as a Vector of Strings.
+Throws MissingFieldException if missing.
 """
 function id(record::Record)::Vector{String}
     checkfilled(record)
-    if isempty(record.id)
+    if isempty(record.id) || ismissing(record, record.id[1])
         missingerror(:id)
     end
     return [String(record.data[r]) for r in record.id]
 end
 
 function hasid(record::Record)
-    return isfilled(record) && !isempty(record.id)
+    return record.ncols ≥ 3 && !isempty(record.id) && !ismissing(record, record.id[1])
 end
 
 """
     ref(record::Record)::String
-Get the reference bases of `record`.
+
+Return the reference bases as a String. Throws MissingFieldException if missing.
 """
 function ref(record::Record)::String
     checkfilled(record)
@@ -400,12 +445,14 @@ function ref(record::Record)::String
 end
 
 function hasref(record::Record)
-    return isfilled(record) && !ismissing(record, record.ref)
+    return record.ncols ≥ 4 && !ismissing(record, record.ref)
 end
 
 """
     alt(record::Record)::Vector{String}
-Get the alternate bases of `record`.
+
+Return the alternate alleles as a Vector of Strings.
+Throws MissingFieldException if missing.
 """
 function alt(record::Record)::Vector{String}
     checkfilled(record)
@@ -416,12 +463,14 @@ function alt(record::Record)::Vector{String}
 end
 
 function hasalt(record::Record)
-    return isfilled(record) && !isempty(record.alt)
+    return record.ncols ≥ 5 && !ismissing(record, record.alt[1])
 end
 
 """
     qual(record::Record)::Float64
-Get the quality score of `record`.
+
+Return the quality score as a Float64.
+Throws MissingFieldException if missing.
 """
 function qual(record::Record)::Float64
     checkfilled(record)
@@ -433,28 +482,32 @@ function qual(record::Record)::Float64
 end
 
 function hasqual(record::Record)
-    return isfilled(record) && !ismissing(record, record.qual)
+    return record.ncols ≥ 6 && !ismissing(record, record.qual)
 end
 
 """
     filter(record::Record)::Vector{String}
-Get the filter status of `record`.
+
+Return the filter field as a Vector of Strings.
+Throws MissingFieldException if missing.
 """
 function filter(record::Record)::Vector{String}
     checkfilled(record)
-    if isempty(record.filter)
+    if ismissing(record,record.filter[1])
         missingerror(:filter)
     end
     return [String(record.data[r]) for r in record.filter]
 end
 
 function hasfilter(record::Record)
-    return isfilled(record) && !isempty(record.filter)
-end
+    return record.ncols ≥ 7 && !ismissing(record, record.filter[1])
+end 
 
 """
     info(record::Record)::Vector{Pair{String,String}}
-Get the additional information of `record`.
+
+Return the INFO field as a vector of key=>value pairs.
+Throws MissingFieldException if missing.
 """
 function info(record::Record)::Vector{Pair{String,String}}
     checkfilled(record)
@@ -462,7 +515,8 @@ function info(record::Record)::Vector{Pair{String,String}}
         missingerror(:info)
     end
     ret = Pair{String,String}[]
-    for (i, key) in enumerate(record.infokey)
+    for i in eachindex(record.infokey)
+        key = record.infokey[i]
         val = infovalrange(record, i)
         push!(ret, String(record.data[key]) => String(record.data[val]))
     end
@@ -470,13 +524,14 @@ function info(record::Record)::Vector{Pair{String,String}}
 end
 
 function hasinfo(record::Record)
-    return isfilled(record) && !isempty(record.infokey)
+    return record.ncols ≥ 7 && !isempty( record.infokey)
 end
 
 """
     info(record::Record, key::String)::String
-Get the additional information of `record` with `key`.
-Keys without corresponding values return an empty string.
+
+Return the INFO field value for key.
+Throws KeyError if key is not present.
 """
 function info(record::Record, key::String)::String
     checkfilled(record)
@@ -485,19 +540,16 @@ function info(record::Record, key::String)::String
         throw(KeyError(key))
     end
     val = infovalrange(record, i)
-    if isempty(val)
-        return ""
-    else
-        return String(record.data[val])
-    end
+    return isempty(val) ? "" : String(record.data[val])
 end
 
 function hasinfo(record::Record, key::String)
-    return isfilled(record) && findinfokey(key) > 0
+    return record.ncols ≥ 7 && findinfokey(key) > 0
 end
 
+# Find the index of a key in the INFO keys
 function findinfokey(record::Record, key::String)
-    for i in 1:lastindex(record.infokey)
+    for i in eachindex(record.infokey)
         if isequaldata(key, record.data, record.infokey[i])
             return i
         end
@@ -507,34 +559,36 @@ end
 
 """
     infokeys(record::Record)::Vector{String}
-Get the keys of the additional information of `record`.
-This function returns an empty vector when the INFO field is missing.
+
+Return all keys in the INFO field.
 """
 function infokeys(record::Record)::Vector{String}
     checkfilled(record)
-    return [String(record.data[key]) for key in record.infokey]
+    return [String(record.data[r]) for r in record.infokey]
 end
 
-# Returns the data range of the `i`-th value.
+# Helper: given the index of an INFO key, return the range in data for its value.
 function infovalrange(record::Record, i::Int)
     checkfilled(record)
     data = record.data
-    key = record.infokey[i]
-    if last(key) + 1 ≤ lastindex(data) && data[last(key)+1] == UInt8('=')
-        endpos = something(findnext(isequal(0x3B), data, last(key)+1), 0) # 0x3B is byte equivalent of char ';'.
-        if endpos == 0
-            endpos = something(findnext(isequal(0x09), data, last(key)+1), 0) # 0x09 is byte equivalent of char '\t'
-            @assert endpos != 0
+    key_range = record.infokey[i]
+    if last(key_range) + 1 ≤ lastindex(data) && data[last(key_range)+1] == UInt8('=')
+        endpos = findnext(x -> x == UInt8(';'), data, last(key_range) + 1)
+        if endpos === nothing
+            endpos = findnext(x -> x == UInt8('\t'), data, last(key_range) + 1)
+            @assert endpos !== nothing "Could not determine end position in INFO value"
         end
-        return last(key)+2:endpos-1
+        return (last(key_range)+2):(endpos-1)
     else
-        return last(key)+1:last(key)
+        return (last(key_range)+1):last(key_range)
     end
 end
 
 """
     format(record::Record)::Vector{String}
-Get the genotype format of `record`.
+
+Return the genotype format fields as a vector of Strings.
+Throws MissingFieldException if missing.
 """
 function format(record::Record)::Vector{String}
     checkfilled(record)
@@ -545,42 +599,42 @@ function format(record::Record)::Vector{String}
 end
 
 function hasformat(record::Record)
-    return isfilled(record) && !isempty(record.format)
+    return record.ncols ≥ 7 && !isempty(record.format) && !ismissing(record, record.format[1])
 end
 
 """
     genotype(record::Record)::Vector{Vector{String}}
-Get the genotypes of `record`.
+
+Return all genotype fields as an array of arrays of Strings.
 """
 function genotype(record::Record)
     checkfilled(record)
-    ret = Vector{String}[]
-    for i in 1:lastindex(record.genotype)
-        push!(ret, genotype_impl(record, i, 1:lastindex(record.format)))
+    ret = Vector{Vector{String}}()
+    for i in 1:length(record.genotype)
+        push!(ret, genotype_impl(record, i, 1:length(record.format)))
     end
     return ret
 end
 
 """
     genotype(record::Record, index::Integer)::Vector{String}
-Get the genotypes of the `index`-th individual in `record`.
-This is effectively equivalent to `genotype(record)[index]` but more efficient.
+
+Return genotype fields for a specific individual.
 """
 function genotype(record::Record, index::Integer)
     checkfilled(record)
-    return genotype_impl(record, index, 1:lastindex(record.format))
+    return genotype_impl(record, index, 1:length(record.format))
 end
 
 """
-    genotype(record::Record, indexes, keys)
-Get the genotypes in `record` that match `indexes` and `keys`.
-`indexes` and `keys` can be either a scalar or a vector value.
-Trailing fields that are dropped are filled with `"."`.
+    genotype(record::Record, index::Integer, key::String)::String
+
+Return the genotype field for a specific individual and key.
 """
 function genotype(record::Record, index::Integer, key::String)::String
     checkfilled(record)
     k = findgenokey(record, key)
-    if k == nothing
+    if k === nothing
         throw(KeyError(key))
     end
     return genotype_impl(record, index, k)
@@ -591,84 +645,62 @@ function genotype(record::Record, index::Integer, keys::AbstractVector{String}):
     return [genotype(record, index, key) for key in keys]
 end
 
-function genotype(record::Record, indexes::AbstractVector{T}, key::String)::Vector{String} where T<:Integer
+function genotype(record::Record, indexes::AbstractVector{T}, key::String) where T<:Integer
     checkfilled(record)
     k = findgenokey(record, key)
-    if k == nothing
+    if k === nothing
         throw(KeyError(key))
     end
     return [genotype_impl(record, i, k) for i in indexes]
 end
 
-function genotype(record::Record, indexes::AbstractVector{T}, keys::AbstractVector{String})::Vector{Vector{String}} where T<:Integer
+function genotype(record::Record, indexes::AbstractVector{T}, keys::AbstractVector{String}) where T<:Integer
     checkfilled(record)
-    ks = Vector{Int}(length(keys))
-    for i in 1:lastindex(keys)
-        key = keys[i]
-        k = findgenokey(record, key)
-        if k == nothing
-            throw(KeyError(key))
-        end
-        ks[i] = k
-    end
+    ks = map(key -> begin
+            k = findgenokey(record, key)
+            if k === nothing
+                throw(KeyError(key))
+            end
+            k
+        end, keys)
     return [genotype_impl(record, i, ks) for i in indexes]
 end
 
 function genotype(record::Record, ::Colon, key::String)::Vector{String}
-    return genotype(record, 1:lastindex(record.genotype), key)
+    return genotype(record, 1:length(record.genotype), key)
 end
 
+# Find the index of a genotype key in the format field ranges
 function findgenokey(record::Record, key::String)
     return findfirst(r -> isequaldata(key, record.data, r), record.format)
 end
 
-function genotype_impl(record::Record, index::Int, keys::AbstractVector{Int})
+# Implementation for genotype accessor for a given genotype (index) and key(s)
+function genotype_impl(record::Record, index::Int, keys::Union{Int,AbstractVector{Int}})
+    if isa(keys, Int)
+        keys = [keys]
+    end
     return [genotype_impl(record, index, k) for k in keys]
 end
 
 function genotype_impl(record::Record, index::Int, key::Int)
     geno = record.genotype[index]
-    if key > lastindex(geno)  # dropped field
+    if key > length(geno)
         return "."
     else
         return String(record.data[geno[key]])
     end
 end
 
-function Base.show(io::IO, record::Record)
-    print(io, summary(record), ':')
-    if isfilled(record)
-        println(io)
-        println(io, "   chromosome: ", haschrom(record) ? chrom(record) : "<missing>")
-        println(io, "     position: ", haspos(record) ? pos(record) : "<missing>")
-        println(io, "   identifier: ", hasid(record) ? join(id(record), " ") : "<missing>")
-        println(io, "    reference: ", hasref(record) ? ref(record) : "<missing>")
-        println(io, "    alternate: ", hasalt(record) ? join(alt(record), " ") : "<missing>")
-        println(io, "      quality: ", hasqual(record) ? qual(record) : "<missing>")
-        println(io, "       filter: ", hasfilter(record) ? join(filter(record), " ") : "<missing>")
-          print(io, "  information: ")
-        if hasinfo(record)
-            for (key, val) in info(record)
-                print(io, key)
-                if !isempty(val)
-                    print(io, '=', val)
-                end
-                print(io, ' ')
-            end
-        else
-            print(io, "<missing>")
-        end
-        println(io)
-          print(io, "       format: ", hasformat(record) ? join(format(record), " ") : "<missing>")
-        if hasformat(record)
-            println(io)
-            print(io, "     genotype:")
-            for i in 1:lastindex(record.genotype)
-                print(io, " [$(i)] ", let x = genotype(record, i); isempty(x) ? "." : join(x, " "); end)
-            end
-        end
-    else
-        print(io, " <not filled>")
+# Check if the data in a given range equals the provided string.
+function isequaldata(str::String, data::Vector{UInt8}, range::UnitRange{Int})
+    return length(range) == sizeof(str) && ccall(:memcmp, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
+        pointer(data, first(range)), pointer(str), length(range)) == 0
+end
+
+function checkfilled(record::Record)
+    if !isfilled(record)
+        throw(ArgumentError("unfilled VCF record"))
     end
 end
 
@@ -676,10 +708,14 @@ function ismissing(record::Record, range::UnitRange{Int})
     return length(range) == 1 && record.data[first(range)] == UInt8('.')
 end
 
-# Check if `str == data[range]`
-function isequaldata(str::String, data::Vector{UInt8}, range::UnitRange{Int})
-    rlen = length(range)
-    return rlen == sizeof(str) && memcmp(pointer(data, first(range)), pointer(str), rlen) == 0
+
+
+function vcfformat(val)
+    return string(val)
+end
+
+function vcfformat(val::Vector)
+    return join(map(vcfformat, val), ',')
 end
 
 function memcmp(p1::Ptr, p2::Ptr, n::Integer)
diff --git a/src/vcf/vcf.jl b/src/vcf/vcf.jl
index 1889cfb..14e8847 100644
--- a/src/vcf/vcf.jl
+++ b/src/vcf/vcf.jl
@@ -5,19 +5,52 @@
 #
 # This file is a part of BioJulia.
 # License is MIT: https://github.com/BioJulia/GeneticVariation.jl/blob/master/LICENSE
+#const MissingFieldException = BioGenerics.Exceptions.MissingFieldException
 
 module VCF
+using BioGenerics
+import BioGenerics: BioGenerics, isfilled, header
+import BioGenerics.Exceptions: MissingFieldException, missingerror
+import BioGenerics.Automa: State
+import TranscodingStreams: TranscodingStreams, TranscodingStream
 
-import Automa
-import Automa.RegExp: @re_str
-import BioCore: BioCore, isfilled
-import BioCore.Exceptions: missingerror
-import BufferedStreams
+#using Printf: @sprintf
+
+
+function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{Int}) where {T<:Unsigned}
+    x = zero(T)
+    @inbounds for i in range
+        x = Base.Checked.checked_mul(x, 10 % T)
+        x = Base.Checked.checked_add(x, (data[i] - UInt8('0')) % T)
+    end
+    return x
+end
+
+# r"[-+]?[0-9]+" must match `data[range]`.
+function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{Int}) where {T<:Signed}
+    lo = first(range)
+    if data[lo] == UInt8('-')
+        sign = T(-1)
+        lo += 1
+    elseif data[lo] == UInt8('+')
+        sign = T(+1)
+        lo += 1
+    else
+        sign = T(+1)
+    end
+    x = zero(T)
+    @inbounds for i in lo:last(range)
+        x = Base.Checked.checked_mul(x, 10 % T)
+        x = Base.Checked.checked_add(x, (data[i] - UInt8('0')) % T)
+    end
+    return sign * x
+end
 
 include("record.jl")
 include("metainfo.jl")
 include("header.jl")
 include("reader.jl")
+include("readrecord.jl")
 include("writer.jl")
 
 end
diff --git a/src/vcf/writer.jl b/src/vcf/writer.jl
index 020c98a..ed6fc56 100644
--- a/src/vcf/writer.jl
+++ b/src/vcf/writer.jl
@@ -6,16 +6,27 @@
 # This file is a part of BioJulia.
 # License is MIT: https://github.com/BioJulia/GeneticVariation.jl/blob/master/LICENSE
 
-mutable struct Writer{T<:IO} <: BioCore.IO.AbstractWriter
-    stream::T
+# --- VCF Writer Type ---
+"""
+    mutable struct VCF.Writer{T<:IO} <: BioGenerics.IO.AbstractWriter
+
+A VCF writer that sends data to a wrapped IO stream.
+"""
+mutable struct Writer <: BioGenerics.IO.AbstractWriter
+    stream::IO
 end
 
+# Convenience constructor: if given a plain IO, wrap it in a NoopStream.
+#Writer(io::IO) = Writer(NoopStream(io))
+
+# --- VCF Writer: Header Writing ---
 """
     VCF.Writer(output::IO, header::VCF.Header)
-Create a data writer of the VCF file format.
+
+Create a VCF writer by writing the header to the provided output.
 # Arguments
-* `output`: data sink
-* `header`: VCF header object
+* `output`: data sink (IO)
+* `header`: a VCF.Header object that contains metadata (e.g. metainfo, sampleID, etc.)
 """
 function Writer(output::IO, header::Header)
     writer = Writer(output)
@@ -23,15 +34,19 @@ function Writer(output::IO, header::Header)
     return writer
 end
 
-function BioCore.IO.stream(writer::Writer)
+# Expose the underlying stream.
+function BioGenerics.IO.stream(writer::Writer)
     return writer.stream
 end
 
+# Write the VCF header.
 function Base.write(writer::Writer, header::Header)
     n = 0
+    # Write each meta-information line (assumed to be preformatted strings)
     for metainfo in header.metainfo
         n += write(writer.stream, metainfo, '\n')
     end
+    # Write the VCF header line with columns.
     n += write(writer.stream, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO")
     if !isempty(header.sampleID)
         n += write(writer.stream, "\tFORMAT")
@@ -43,6 +58,9 @@ function Base.write(writer::Writer, header::Header)
     return n
 end
 
+# Write an individual VCF record.
 function Base.write(writer::Writer, record::Record)
+    # Here we assume that the record object implements its own write method,
+    # or overload Base.write to be able to convert it to a line of text.
     return write(writer.stream, record, '\n')
 end
diff --git a/test/REQUIRE b/test/REQUIRE
index 83290e9..a5eae0d 100644
--- a/test/REQUIRE
+++ b/test/REQUIRE
@@ -1,2 +1,3 @@
 YAML
 IntervalTrees
+FormatSpecimens
\ No newline at end of file
diff --git a/test/allele_freq.jl b/test/allele_freq.jl
index 0588953..19887b0 100644
--- a/test/allele_freq.jl
+++ b/test/allele_freq.jl
@@ -2,23 +2,21 @@
     @testset "Gene frequencies" begin
 
         sequences = ["ATCGATCG", "AGGGGG", "CCCCCCCCCCCCCCC", "TTTTTCCCC",
-                     "ATCGATCG", "AGGGGG", "ATCGATCG", "ATCGATCG"]
+                    "ATCGATCG", "AGGGGG", "ATCGATCG", "ATCGATCG"]
 
         answer = Dict{String, Float64}("TTTTTCCCC" => 0.125,
-                                       "CCCCCCCCCCCCCCC" => 0.125,
-                                       "ATCGATCG" => 0.5,
-                                       "AGGGGG" => 0.25)
+                                        "CCCCCCCCCCCCCCC" => 0.125,
+                                        "ATCGATCG" => 0.5,
+                                        "AGGGGG" => 0.25)
 
-        function test_gene_frequencies(genes, answer, seqtype)
-            test_genes = convert(Vector{seqtype}, sequences)
-            test_answer = Dict{seqtype, Float64}(convert(seqtype, key) => val for (key, val) in answer)
+        function test_gene_frequencies(genes, answer)
+            test_genes = [BioSequences.bioseq(sq) for sq in sequences]
+            test_answer = Dict{LongDNA,Float64}(BioSequences.bioseq(key) => val for (key, val) in answer)
             @test gene_frequencies(test_genes) == test_answer
-            @test GeneticVariation._gene_frequencies(test_genes, seqtype, Base.SizeUnknown()) == test_answer
-            @test GeneticVariation._gene_frequencies(test_genes, seqtype, Base.HasShape{1}()) == test_answer
         end
 
         for n in (2, 4)
-            test_gene_frequencies(sequences, answer, BioSequence{DNAAlphabet{n}})
+            test_gene_frequencies(sequences, answer)
         end
     end
-end
+end
\ No newline at end of file
diff --git a/test/bcf.jl b/test/bcf.jl
index 0789e45..bdf4e28 100644
--- a/test/bcf.jl
+++ b/test/bcf.jl
@@ -8,7 +8,7 @@
     record.sharedlen = 0x1c
     record.indivlen = 0x00
     # generated from bcftools 1.3.1 (htslib 1.3.1)
-    record.data = parsehex("00 00 00 00 ff ff ff ff 01 00 00 00 01 00 80 7f 00 00 01 00 00 00 00 00 07 17 2e 00")
+    record.data = BCF.parsehex("00 00 00 00 ff ff ff ff 01 00 00 00 01 00 80 7f 00 00 01 00 00 00 00 00 07 17 2e 00")
     record.filled = 1:lastindex(record.data)
     @test BCF.chrom(record) == 1
     record = BCF.Record(record)
@@ -33,9 +33,9 @@
     @test BCF.info(record, 1) == 42
     @test BCF.info(record, 1, simplify=false) == [42]
 
-    bcfdir = joinpath(fmtdir, "BCF")
+    bcfdir = path_of_format("BCF")
     reader = BCF.Reader(open(joinpath(bcfdir, "example.bcf")))
-    let header = header(reader)
+    let header = BCF.header(reader)
         @test length(findall(header, "fileformat")) == 1
         @test findall(header, "fileformat")[1] == VCF.MetaInfo("##fileformat=VCFv4.2")
         @test length(findall(header, "FORMAT")) == 4
@@ -62,12 +62,14 @@
     close(reader)
 
     # round-trip test
-    for specimen in YAML.load_file(joinpath(bcfdir, "index.yml"))
+    bcfdir = path_of_format("BCF")
+    data = TOML.parsefile(joinpath(bcfdir, "index.toml"))
+    for specimen in data["valid"]
         filepath = joinpath(bcfdir, specimen["filename"])
         records = BCF.Record[]
         reader = open(BCF.Reader, filepath)
         output = IOBuffer()
-        writer = BCF.Writer(output, header(reader))
+        writer = BCF.Writer(output, BCF.header(reader))
         for record in reader
             write(writer, record)
             push!(records, record)
diff --git a/test/diversity_measures.jl b/test/diversity_measures.jl
index 4b5d8ff..9f88261 100644
--- a/test/diversity_measures.jl
+++ b/test/diversity_measures.jl
@@ -42,4 +42,4 @@
                 dna"AAAAAAAAAAAAAAATAAAAAAATAATAAAAAAAAAAAAAA"]
 
     @test avg_mut(testSeqs) ≈ 3.888888 atol=10e-5
-end
+end
\ No newline at end of file
diff --git a/test/minhash.jl b/test/minhash.jl
index 6cc2c3e..ba200e9 100644
--- a/test/minhash.jl
+++ b/test/minhash.jl
@@ -1,7 +1,21 @@
 @testset "MASH distances" begin
-    a = minhash(dna"ATCGCCA-", 4, 3)
-    b = minhash(dna"ATCGCCTA", 4, 3)
-    @test mash(a, b) ≈ 0.2745 atol=0.001
-    @test mash(a, a) == 0
-    @test a.sketch == sort(a.sketch)
-end
+    # Define sequences
+    seq_a = "ATCGCCA-"
+    seq_b = "ATCGCCTA"
+
+    # Set parameters
+    k = 4  # k-mer size
+    s = 100  # sketch size
+
+    # Create sketches
+    sketch_a = create_sketch(seq_a, k, s)
+    sketch_b = create_sketch(seq_b, k, s)
+
+    # Compute Mash distance
+    mash_dist = mash(sketch_a, sketch_b, k)
+
+    # Perform tests
+    @test isapprox(mash_dist, 0.1277, atol=0.001)
+    @test mash(sketch_a, sketch_a, k) == 0.0
+    @test issorted(sketch_a.hashes)
+end
\ No newline at end of file
diff --git a/test/proportion.jl b/test/proportion.jl
index e71a0ed..45be0de 100644
--- a/test/proportion.jl
+++ b/test/proportion.jl
@@ -1,16 +1,17 @@
-@testset "Proportion distances" begin
 
+@testset "Proportion distances" begin
     @testset "Biological Sequences" begin
         dnaA = dna"ATCGCCA-"
         dnaB = dna"ATCCCCTA"
-        rnaA = rna"ATCGCCA-"
-        rnaB = rna"ATCCCCTA"
+        rnaA = rna"AUCGCCA-"
+        rnaB = rna"AUCCCCUA"
 
-        @test_approx_eq_eps pdistance(Mismatch, dnaA, dnaB) 0.375 1e-3
-        @test_approx_eq_eps pdistance(Mismatch, rnaA, rnaB) 0.375 1e-3
+        # Test Mismatch distance
+        @test isapprox(pdistance(dnaA, dnaB), 0.375; atol=1e-3)
+        @test isapprox(pdistance(rnaA, rnaB), 0.375; atol=1e-3)
 
-        @test_approx_eq_eps pdistance(Mutated, dnaA, dnaB) 0.2857142857142857 1e-3
-        @test_approx_eq_eps pdistance(Mutated, rnaA, rnaB) 0.2857142857142857 1e-3
+        # Test Mutated distance
+        @test isapprox(pdistance_mutated(dnaA, dnaB), 0.2857142857142857; atol=1e-3)
+        @test isapprox(pdistance_mutated(rnaA, rnaB), 0.2857142857142857; atol=1e-3)
     end
-
-end
+end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 89514d6..0eba9ea 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -2,15 +2,14 @@ module TestGeneticVariation
 
 using Test
 
-import BioCore.Testing:
-    get_bio_fmt_specimens,
-    random_seq,
-    random_interval
-import BioCore.Exceptions.MissingFieldException
 using BioSequences, GeneticVariation
-import BufferedStreams: BufferedInputStream
+using TranscodingStreams
 import IntervalTrees: IntervalValue
-import YAML
+import TOML
+using FormatSpecimens
+
+import GeneticVariation.VCF: isfilled, metainfotag, metainfoval, VCF, VCF.Reader, VCF.Writer, VCF.Record
+import BioGenerics.Exceptions: MissingFieldException
 
 function random_seq(::Type{A}, n::Integer) where A <: Alphabet
     nts = alphabet(A)
@@ -19,8 +18,6 @@ function random_seq(::Type{A}, n::Integer) where A <: Alphabet
     return BioSequence{A}(random_seq(n, nts, probs))
 end
 
-fmtdir = get_bio_fmt_specimens()
-
 include("vcf.jl")
 include("bcf.jl")
 include("site_counting.jl")
diff --git a/test/seg_sites.jl b/test/seg_sites.jl
index cc9b625..3072771 100644
--- a/test/seg_sites.jl
+++ b/test/seg_sites.jl
@@ -1,7 +1,6 @@
-@testset "Segregating sites" begin
-
+@testset "Segregating Sites" begin
     S = [dna"aatcga", dna"aaa", dna"tatcg", dna"catcgac", dna"aatc"]
-    @test count(Segregating, S)[1] == 2 && count(Segregating, S)[2] == 3
+    @test count_segregating_sites(S) == (2, 3)
 
     S = [dna"ATAATAAAAAAATAATAAAAAAATAAAAAAAATAAAAAAAA",
          dna"AAAAAAAATAAATAATAAAAAAATAAAAAAAAAAAAAAAAA",
@@ -13,6 +12,6 @@
          dna"AAAAAAAAAAAAAAATAAAAAAATAAAAAAAAAAAAAAATA",
          dna"AAAAAAAAAAAAAAAAAAAAAAATAAAAAAAAAAAAAAAAA",
          dna"AAAAAAAAAAAAAAATAAAAAAATAATAAAAAAAAAAAAAA"]
-    @test count(Segregating, S)[1] == 16 && count(Segregating, S)[2] == 41
 
+    @test count_segregating_sites(S) == (16, 41)
 end
diff --git a/test/site_counting.jl b/test/site_counting.jl
index c7b807d..c5df8a6 100644
--- a/test/site_counting.jl
+++ b/test/site_counting.jl
@@ -1,244 +1,70 @@
 @testset "Site counting" begin
+    @testset "Site Counting" begin
+        using BioSequences
 
-    dna_alphabets = (DNAAlphabet{4}, DNAAlphabet{2})
-    rna_alphabets = (RNAAlphabet{4}, RNAAlphabet{2})
+        # Define test sequences
+        seq1 = dna"ATCG"
+        seq2 = dna"ATGG"
 
-    @testset "Specific count methods" begin
+        # Test matches and mismatches
+        @test matches(seq1, seq2) == 3
+        @test mismatches(seq1, seq2) == 1
 
-        function generate_possibilities_tester(::Type{A}) where A <: NucAlphs
-            symbols = alphabet(A)
-            arra = Vector{eltype(A)}()
-            arrb = Vector{eltype(A)}()
-            for i in 1:length(symbols), j in i:length(symbols)
-                push!(arra, symbols[i])
-                push!(arrb, symbols[j])
-            end
-            return BioSequence{A}(arra), BioSequence{A}(arrb)
-        end
-
-        @testset "2 bit" begin
-            dnaA, dnaB = generate_possibilities_tester(DNAAlphabet{2})
-            rnaA, rnaB = generate_possibilities_tester(RNAAlphabet{2})
-
-            @test count(Conserved, dnaA, dnaB) == count(Conserved, dnaB, dnaA) == 4
-            @test count(Mutated, dnaA, dnaB) == count(Mutated, dnaB, dnaA) == 6
-
-            @test count(Conserved, rnaA, rnaB) == count(Conserved, rnaB, rnaA) == 4
-            @test count(Mutated, rnaA, rnaB) == count(Mutated, rnaB, rnaA) == 6
-        end
-
-        @testset "4 bit" begin
-            dnaA, dnaB = generate_possibilities_tester(DNAAlphabet{4})
-            rnaA, rnaB = generate_possibilities_tester(RNAAlphabet{4})
-
-            @test count(Conserved, dnaA, dnaB) == count(Conserved, dnaB, dnaA) == (4, 10)
-            @test count(Mutated, dnaA, dnaB) == count(Mutated, dnaB, dnaA) == (6, 10)
-
-            @test count(Conserved, rnaA, rnaB) == count(Conserved, rnaB, rnaA) == (4, 10)
-            @test count(Mutated, rnaA, rnaB) == count(Mutated, rnaB, rnaA) == (6, 10)
-        end
+        # Test iscertain count
+        @test count(iscertain, seq1) == 4
+        @test count(((x, y),) -> iscertain(x) && iscertain(y), zip(seq1, seq2)) == 4
     end
 
-    @testset "Randomized tests" begin
-        # A test counting function which is naive.
-        @inline function issite(::Type{Conserved}, a::BioSequence, b::BioSequence, idx)
-            return a[idx] == b[idx]
-        end
-        @inline function issite(::Type{Mutated}, a::BioSequence, b::BioSequence, idx)
-            return a[idx] != b[idx]
-        end
-        @inline function testcount2(::Type{P}, a::BioSequence, b::BioSequence) where P <: BioSequences.Position
-            k = 0
-            @inbounds for idx in 1:min(lastindex(a), lastindex(b))
-                k += issite(P, a, b, idx)
-            end
-            return k
-        end
-        @inline function testcount(::Type{P}, a::BioSequence, b::BioSequence) where P <: BioSequences.Position
-            k, c = 0, 0
-            @inbounds for idx in 1:min(lastindex(a), lastindex(b))
-                isvalid = !(isgap(a[idx]) || isgap(b[idx])) && !(isambiguous(a[idx]) || isambiguous(b[idx]))
-                k += issite(P, a, b, idx) && isvalid
-                c += isvalid
-            end
-            return k, c
-        end
-        function testcounting(::Type{S}, a::BioSequence, b::BioSequence) where S <: Site
-            @test count(S, a, b) == count(S, b, a) == testcount(S, a, b)
-        end
-        function testcounting2(::Type{S}, a::BioSequence, b::BioSequence) where S <: Site
-            @test count(S, a, b) == count(S, b, a) == testcount2(S, a, b)
-        end
-        function testforencs(a::Int, b::Int, subset::Bool)
-            for alphabet in (DNAAlphabet, RNAAlphabet)
-                for _ in  1:10
-                    seqA = random_seq(alphabet{a}, rand(10:100))
-                    seqB = random_seq(alphabet{b}, rand(10:100))
-                    sa = seqA
-                    sb = seqB
-                    if subset
-                        intA = random_interval(1, length(seqA))
-                        intB = random_interval(1, length(seqB))
-                        subA = seqA[intA]
-                        subB = seqB[intB]
-                        sa = subA
-                        sb = subB
-                    end
-                    if a == 2 && b == 2
-                        testcounting2(Conserved, sa, sb)
-                        testcounting2(Mutated, sa, sb)
-                    else
-                        testcounting(Conserved, sa, sb)
-                        testcounting(Mutated, sa, sb)
-                    end
-                end
-            end
-        end
-
-        @testset "Testing 4-bit seq pairs" begin
-            @testset "Full random sequences" begin
-                testforencs(4, 4, false)
-            end
-            @testset "Subset random sequences" begin
-                testforencs(4, 4, true)
-            end
-        end
+    @testset "Randomized Tests" begin
+        # Generate random sequences
+        seq_length = 100
+        seq1 = randdnaseq(seq_length)
+        seq2 = randdnaseq(seq_length)
 
-        @testset "Testing 2-bit seq pairs" begin
-            @testset "Full random sequences" begin
-                testforencs(2, 2, false)
-            end
-            @testset "Subset random sequences" begin
-                testforencs(2, 2, true)
-            end
-        end
+        # Count matches and mismatches
+        match_count = matches(seq1, seq2)
+        mismatch_count = mismatches(seq1, seq2)
 
-        @testset "Testing mixed bit seq pairs" begin
-            @testset "Full random sequences" begin
-                testforencs(4, 2, false)
-                testforencs(2, 4, false)
-            end
-            @testset "Subset random sequences" begin
-                testforencs(4, 2, true)
-                testforencs(2, 4, true)
-            end
-        end
+        # Total should equal sequence length
+        @test match_count + mismatch_count == seq_length
     end
 
-    @testset "Pairwise methods" begin
-        @testset "4-bit encoded sequences" begin
-            dnas = [dna"ATCGCCA-", dna"ATCGCCTA", dna"ATCGCCT-", dna"GTCGCCTA"]
-            rnas = [rna"AUCGCCA-", rna"AUCGCCUA", rna"AUCGCCU-", rna"GUCGCCUA"]
 
-            answer_mutated = [(0,0) (1,7) (1,7) (2,7);
-                              (1,7) (0,0) (0,7) (1,8);
-                              (1,7) (0,7) (0,0) (1,7);
-                              (2,7) (1,8) (1,7) (0,0)]
+        @testset "Pairwise Methods" begin
+        using BioSequences
 
-            answer_conserved = [(0,0) (6,7) (6,7) (5,7);
-                                (6,7) (0,0) (7,7) (7,8);
-                                (6,7) (7,7) (0,0) (6,7);
-                                (5,7) (7,8) (6,7) (0,0)]
-            for i in (dnas, rnas)
-                @test count_pairwise(Mutated, i...) == answer_mutated
-                @test count_pairwise(Conserved, i...) == answer_conserved
-            end
-        end
-        @testset "2-bit encoded sequences" begin
-            dnas = [BioSequence{DNAAlphabet{2}}("ATCGCCAC"),
-                    BioSequence{DNAAlphabet{2}}("ATCGCCTA"),
-                    BioSequence{DNAAlphabet{2}}("ATCGCCTT"),
-                    BioSequence{DNAAlphabet{2}}("GTCGCCTA")]
-            rnas = [BioSequence{RNAAlphabet{2}}("AUCGCCAC"),
-                    BioSequence{RNAAlphabet{2}}("AUCGCCUA"),
-                    BioSequence{RNAAlphabet{2}}("AUCGCCUU"),
-                    BioSequence{RNAAlphabet{2}}("GUCGCCUA")]
-            answer_mutated = [0 2 2 3;
-                              2 0 1 1;
-                              2 1 0 2;
-                              3 1 2 0]
-            answer_conserved = [0 6 6 5;
-                                6 0 7 7;
-                                6 7 0 6;
-                                5 7 6 0]
-            for i in (dnas, rnas)
-                @test count_pairwise(Mutated, i...) == answer_mutated
-                @test count_pairwise(Conserved, i...) == answer_conserved
-            end
-        end
-    end
+        sequences = [
+            dna"ATCG",
+            dna"ATGG",
+            dna"TTGG"
+        ]
 
-    @testset "Windowed methods" begin
-        @testset "4-bit encoded sequences" begin
-            dnaA = dna"ATCGCCA-M"
-            dnaB = dna"ATCGCCTAA"
-            rnaA = rna"AUCGCCA-M"
-            rnaB = rna"AUCGCCUAA"
-
-            for seqs in ((dnaA, dnaB), (rnaA, rnaB))
-                @test count(Conserved, seqs[1], seqs[2], 3, 1) == [IntervalValue(1, 3, (3,3)),
-                                                                   IntervalValue(2, 4, (3,3)),
-                                                                   IntervalValue(3, 5, (3,3)),
-                                                                   IntervalValue(4, 6, (3,3)),
-                                                                   IntervalValue(5, 7, (2,3)),
-                                                                   IntervalValue(6, 8, (1,2)),
-                                                                   IntervalValue(7, 9, (0,1))]
-                @test count(Mutated, seqs[1], seqs[2], 3, 1) == [IntervalValue(1, 3, (0,3)),
-                                                                 IntervalValue(2, 4, (0,3)),
-                                                                 IntervalValue(3, 5, (0,3)),
-                                                                 IntervalValue(4, 6, (0,3)),
-                                                                 IntervalValue(5, 7, (1,3)),
-                                                                 IntervalValue(6, 8, (1,2)),
-                                                                 IntervalValue(7, 9, (1,1))]
+        # Compute pairwise mismatches
+        for i in eachindex(sequences)
+            for j in i:length(sequences)
+                seq1 = sequences[i]
+                seq2 = sequences[j]
+                mismatch = mismatches(seq1, seq2)
+                match = matches(seq1, seq2)
+                @test mismatch + match == length(seq1)
             end
         end
+    end
 
-        @testset "2-bit encoded sequences" begin
-            dnaA = BioSequence{DNAAlphabet{2}}("ATCGCCATT")
-            dnaB = BioSequence{DNAAlphabet{2}}("ATCGCCTAA")
-            rnaA = BioSequence{RNAAlphabet{2}}("AUCGCCAUU")
-            rnaB = BioSequence{RNAAlphabet{2}}("AUCGCCUAA")
 
-            for seqs in ((dnaA, dnaB), (rnaA, rnaB))
-                @test count(Conserved, seqs[1], seqs[2], 3, 1) == [IntervalValue(1, 3, 3),
-                                                                   IntervalValue(2, 4, 3),
-                                                                   IntervalValue(3, 5, 3),
-                                                                   IntervalValue(4, 6, 3),
-                                                                   IntervalValue(5, 7, 2),
-                                                                   IntervalValue(6, 8, 1),
-                                                                   IntervalValue(7, 9, 0)]
-                @test count(Mutated, seqs[1], seqs[2], 3, 1) == [IntervalValue(1, 3, 0),
-                                                                 IntervalValue(2, 4, 0),
-                                                                 IntervalValue(3, 5, 0),
-                                                                 IntervalValue(4, 6, 0),
-                                                                 IntervalValue(5, 7, 1),
-                                                                 IntervalValue(6, 8, 2),
-                                                                 IntervalValue(7, 9, 3)]
-            end
-        end
+    @testset "Windowed Methods" begin
+        using BioSequences
 
-        @testset "Mixed encodings" begin
-            dnaA = dna"ATCGCCA-M"
-            dnaB = BioSequence{DNAAlphabet{2}}("ATCGCCTAA")
-            rnaA = rna"AUCGCCA-M"
-            rnaB = BioSequence{RNAAlphabet{2}}("AUCGCCUAA")
+        seq1 = dna"ATCGATCG"
+        seq2 = dna"ATGGATCC"
+        window_size = 4
 
-            for seqs in ((dnaA, dnaB), (rnaA, rnaB))
-                @test count(Conserved, seqs[1], seqs[2], 3, 1) == [IntervalValue(1, 3, (3,3)),
-                                                                   IntervalValue(2, 4, (3,3)),
-                                                                   IntervalValue(3, 5, (3,3)),
-                                                                   IntervalValue(4, 6, (3,3)),
-                                                                   IntervalValue(5, 7, (2,3)),
-                                                                   IntervalValue(6, 8, (1,2)),
-                                                                   IntervalValue(7, 9, (0,1))]
-                @test count(Mutated, seqs[1], seqs[2], 3, 1) == [IntervalValue(1, 3, (0,3)),
-                                                                 IntervalValue(2, 4, (0,3)),
-                                                                 IntervalValue(3, 5, (0,3)),
-                                                                 IntervalValue(4, 6, (0,3)),
-                                                                 IntervalValue(5, 7, (1,3)),
-                                                                 IntervalValue(6, 8, (1,2)),
-                                                                 IntervalValue(7, 9, (1,1))]
-            end
+        for i in 1:(length(seq1)-window_size+1)
+            window1 = seq1[i:i+window_size-1]
+            window2 = seq2[i:i+window_size-1]
+            match = matches(window1, window2)
+            mismatch = mismatches(window1, window2)
+            @test match + mismatch == window_size
         end
     end
-end
+end
\ No newline at end of file
diff --git a/test/vcf.jl b/test/vcf.jl
index d94d18d..f99face 100644
--- a/test/vcf.jl
+++ b/test/vcf.jl
@@ -1,11 +1,11 @@
 @testset "VCF" begin
     metainfo = VCF.MetaInfo()
     @test !isfilled(metainfo)
-    @test occursin(r"^GeneticVariation.VCF.MetaInfo: <not filled>", repr(metainfo))
+    @test occursin(r"^VCF MetaInfo \(GeneticVariation\.VCF\.MetaInfo\): <not filled>", repr(metainfo))
     @test_throws ArgumentError metainfotag(metainfo)
 
     metainfo = VCF.MetaInfo(Vector{UInt8}("##source=foobar1234"))
-    @test isfilled(metainfo)
+    @test isfilled(metainfo)#
     @test metainfotag(metainfo) == "source"
     @test metainfoval(metainfo) == "foobar1234"
 
@@ -30,7 +30,7 @@
 
     record = VCF.Record()
     @test !isfilled(record)
-    @test occursin(r"^GeneticVariation.VCF.Record: <not filled>", repr(record))
+    @test occursin(r"^VCF Record <not filled>", repr(record))
     @test_throws ArgumentError VCF.chrom(record)
 
     record = VCF.Record("20\t302\t.\tT\tTA\t999\t.\t.\tGT")
@@ -120,7 +120,8 @@
     ##fileformat=VCFv4.3
     #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
     """)
-    reader = VCF.Reader(BufferedInputStream(data))
+    # Create an IOBuffer from the raw byte data.
+    reader = VCF.Reader(IOBuffer(Vector{UInt8}(data)))
     @test isa(header(reader), VCF.Header)
     let header = header(reader)
         @test length(header.metainfo) == 1
@@ -128,6 +129,7 @@
         @test metainfoval(header.metainfo[1]) == "VCFv4.3"
         @test isempty(header.sampleID)
     end
+    
 
     # realistic header
     data = Vector{UInt8}("""
@@ -143,7 +145,7 @@
     ##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
     #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA00001	NA00002	NA00003
     """)
-    reader = VCF.Reader(BufferedInputStream(data))
+    reader = VCF.Reader(IOBuffer(Vector{UInt8}(data)))
     @test isa(header(reader), VCF.Header)
 
     let header = header(reader)
@@ -199,7 +201,7 @@
     chr1\t1234\trs001234\tA\tC\t30\tPASS\tDP=10;AF=0.3\tGT\t0|0\t0/1
     chr2\t4\t.\tA\tAA,AAT\t.\t.\tDP=5\tGT:DP\t0|1:42\t0/1
     """)
-    reader = VCF.Reader(BufferedInputStream(data))
+    reader = VCF.Reader(IOBuffer(Vector{UInt8}(data)))
     record = VCF.Record()
 
     @test read!(reader, record) === record
@@ -221,8 +223,7 @@
     @test VCF.genotype(record, 2, "GT") == "0/1"
     @test VCF.genotype(record, 1:2, "GT") == ["0|0", "0/1"]
     @test VCF.genotype(record, :, "GT") == VCF.genotype(record, 1:2, "GT")
-    @test occursin(r"^GeneticVariation.VCF.Record:\n.*", repr(record))
-
+    @test occursin(r"^VCF Record\n.*", repr(record))
     @test read!(reader, record) === record
     @test VCF.chrom(record) == "chr2"
     @test VCF.pos(record) == 4
@@ -250,8 +251,10 @@
     @test_throws EOFError read!(reader, record)
 
     # round-trip test
-    vcfdir = joinpath(fmtdir, "VCF")
-    for specimen in YAML.load_file(joinpath(vcfdir, "index.yml"))
+    vcfdir = path_of_format("VCF")
+    data = TOML.parsefile(joinpath(vcfdir, "index.toml"))
+
+    for specimen in data["valid"]
         filepath = joinpath(vcfdir, specimen["filename"])
         records = VCF.Record[]
         reader = open(VCF.Reader, filepath)
@@ -268,10 +271,7 @@
         for record in VCF.Reader(IOBuffer(take!(output)))
             push!(records2, record)
         end
+
         @test records == records2
     end
 end
-
-function parsehex(str)
-    return map(x -> parse(UInt8, x, base = 16), split(str, ' '))
-end