-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path07_make_avg_NI_ridgelines.qmd
71 lines (59 loc) · 2.57 KB
/
07_make_avg_NI_ridgelines.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
---
title: "07_make_ridgelines"
date: 02/13/2024
date-format: YYYY-MM-DD
author: Karl F Poncha
output: html
execute:
echo: false
---
```{python}
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
# Define the base directory and subdirectories
basedir = os.getcwd()
inputdir = os.path.join(basedir, 'output', 'averaged_interplay_scores')
expt_annotation_dir = os.path.join(basedir, 'data', 'experimental_annotations')
outputdir = os.path.join(basedir, 'figs', 'ridgelines')
# Ensure the output directory exists
os.makedirs(outputdir, exist_ok=True)
# Load the annotation file
annotation_path = os.path.join(expt_annotation_dir, '02_averaged_interplay_experimental_annotation.csv')
annotations = pd.read_csv(annotation_path)
# Function to load and merge CSV data with annotations
def load_and_merge_data(filename, annotations):
data_path = os.path.join(inputdir, filename)
data = pd.read_csv(data_path)
annotation = annotations[annotations['filename'] == filename].iloc[0]
data['organ'] = annotation['organ']
data['age'] = annotation['age'] # Age is already an integer
data['histone'] = annotation['histone']
return data
# Combine all data into a single DataFrame
all_data = pd.concat([load_and_merge_data(filename, annotations) for filename in annotations['filename']])
# Prepare plot data: group by organ, histone, and sort by age
plot_data = all_data.groupby(['organ', 'histone', 'age']).agg(list).reset_index()
# Sort data by age within each group
plot_data.sort_values(by=['organ', 'histone', 'age'], inplace=True)
# Plotting function for histograms
def plot_histograms(grouped_data, organ, histone):
fig, ax = plt.subplots(figsize=(10, 6))
colors = sns.color_palette("hsv", len(grouped_data['age'].unique()))
for (i, row), color in zip(grouped_data.iterrows(), colors):
sns.histplot(row['avg_normalized_interplay'], ax=ax, kde=True, color=color, alpha=0.175, label=f"{row['age']} months")
ax.legend(title='Age')
ax.set_title(f"Stacked Histogram of Avg Normalized Interplay \nfor {histone} PTMs in {organ} across ages")
ax.set_xlabel('Avg Normalized Interplay')
ax.set_ylabel('Frequency')
ax.set_ylim(0, 275)
# Set the background to be transparent
fig.patch.set_alpha(0)
ax.set_facecolor('none')
plt.savefig(os.path.join(outputdir, f"histogram_{organ}_{histone}.svg"), format='svg', transparent=True)
plt.close()
# Group by organ and histone and apply the plotting function
for (organ, histone), group in plot_data.groupby(['organ', 'histone']):
plot_histograms(group, organ, histone)
```