Skip to content

Commit

Permalink
Merge pull request #4 from mashu/VisualizationExt
Browse files Browse the repository at this point in the history
Visualization ext
  • Loading branch information
mashu authored Oct 13, 2024
2 parents 6237ea6 + 464fac7 commit 2ebeb71
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 81 deletions.
9 changes: 8 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"

[weakdeps]
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"

[extensions]
VisualizationExt = "CairoMakie"

[compat]
BioSequences = "3.1.6"
CSV = "0.10.14"
Expand All @@ -21,7 +27,8 @@ ProgressMeter = "1.10.2"
julia = "1.10"

[extras]
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
test = ["Test", "CairoMakie"]
82 changes: 82 additions & 0 deletions ext/VisualizationExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
module VisualizationExt
using LineageCollapse
using CairoMakie
using DataFrames
import LineageCollapse: plot_diagnostics

"""
plot_diagnostics(df::DataFrame)
Generate professional diagnostic plots for the processed data using CairoMakie.
# Arguments
- `df::DataFrame`: Processed DataFrame with lineage information.
# Returns
- `CairoMakie.Figure`: A composite figure with multiple diagnostic visualizations.
"""
function plot_diagnostics(df::DataFrame)
fig = Figure(size=(1200, 1000), fontsize=12)

# Cluster Size Distribution
if hasproperty(df, :cluster_size)
ax1 = Axis(fig[1, 1], title="Cluster Size Distribution", xlabel="Cluster Size", ylabel="Frequency")
hist!(ax1, df.cluster_size, bins=50, color=:skyblue, strokecolor=:white, strokewidth=1)
else
ax1 = Axis(fig[1, 1], title="Cluster Size Distribution Not Available")
end

# CDR3 Length vs Cluster Size
if hasproperty(df, :cdr3_length) && hasproperty(df, :cluster_size)
ax2 = Axis(fig[1, 2], title="CDR3 Length vs Cluster Size", xlabel="CDR3 Length", ylabel="Cluster Size")
scatter!(ax2, df.cdr3_length, df.cluster_size, color=:darkblue, markersize=4, alpha=0.5)
else
ax2 = Axis(fig[1, 2], title="CDR3 Length vs Cluster Size Not Available")
end

# CDR3 Frequency Distribution
if hasproperty(df, :cdr3_frequency)
ax3 = Axis(fig[2, 1], title="CDR3 Frequency Distribution", xlabel="CDR3 Frequency", ylabel="Count")
hist!(ax3, df.cdr3_frequency, bins=50, color=:lightgreen, strokecolor=:white, strokewidth=1)
else
ax3 = Axis(fig[2, 1], title="CDR3 Frequency Distribution Not Available")
end

# Top 10 V Genes
ax4 = Axis(fig[2, 2], title="Top 10 V Genes", xlabel="V Gene", ylabel="Count")
if hasproperty(df, :v_call_first)
try
v_gene_counts = sort(combine(groupby(df, :v_call_first), nrow => :count), :count, rev=true)
if nrow(v_gene_counts) > 10
v_gene_counts = v_gene_counts[1:10, :]
end
barplot!(ax4, v_gene_counts.count, color=:orange)
ax4.xticks = (1:nrow(v_gene_counts), v_gene_counts.v_call_first)
ax4.xticklabelrotation = π/3
catch e
@warn "Error processing V gene counts: $e"
ax4.title = "Top 10 V Genes (Error in Processing)"
end
else
ax4.title = "Top 10 V Genes Not Available"
end

# Rotate x-axis labels for all plots
for ax in [ax1, ax2, ax3, ax4]
ax.xticklabelrotation = π/4
end

# Add a title to the entire figure
Label(fig[0, :], "Diagnostic Plots for Lineage Collapse", fontsize=20)

# Adjust layout
for (label, layout) in zip(["A", "B", "C", "D"], [fig[1,1], fig[1,2], fig[2,1], fig[2,2]])
Label(layout[1, 1, TopLeft()], label,
fontsize = 26,
padding = (6, 6, 6, 6),
halign = :right)
end

return fig
end
end
3 changes: 1 addition & 2 deletions src/LineageCollapse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@ module LineageCollapse
using ProgressMeter
using Clustering
using BioSequences
using CairoMakie

export load_data, preprocess_data, process_lineages, plot_diagnostics
function plot_diagnostics end

include("data_loading.jl")
include("preprocessing.jl")
include("lineage_processing.jl")
include("visualization.jl")
end
6 changes: 4 additions & 2 deletions src/lineage_processing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ function process_lineages(df::DataFrame;
grouped = groupby(df, [:v_call_first, :j_call_first, :cdr3_length])
processed_groups = Vector{DataFrame}()

@showprogress "Processing lineages" for group in grouped
prog = Progress(length(grouped), desc="Processing lineages")
for group in grouped
next!(prog)
if nrow(group) > 1
dist = pairwise_hamming(LongDNA{4}.(group.cdr3))
hclusters = hclust(dist, linkage=:average)
Expand All @@ -76,6 +78,6 @@ function process_lineages(df::DataFrame;
push!(processed_groups, cgroup)
end
end

finish!(prog)
return vcat(processed_groups...)
end
75 changes: 0 additions & 75 deletions src/visualization.jl

This file was deleted.

3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
using Test
using DataFrames
using BioSequences
using CairoMakie
using LineageCollapse
using CSV
using CairoMakie

@testset "LineageCollapse.jl" begin
@testset "Data Loading" begin
Expand Down Expand Up @@ -88,6 +88,7 @@ using CairoMakie
end
end

# Test visualization functionality only if CairoMakie is available
@testset "Visualization" begin
test_df = DataFrame(
cluster_size = [1, 2, 3, 4, 5],
Expand Down

0 comments on commit 2ebeb71

Please sign in to comment.