This repository has been archived by the owner on Jun 23, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
functions.R
2254 lines (1582 loc) · 61.2 KB
/
functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# SPACEGERM shiny app function definition script
# Copyright (C) 2017-2018 Marcel Schilling
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#######################
# general information #
#######################
# file: functions.R
# author(s): Marcel Schilling <marcel.schilling@mdc-berlin.de>
# created: 2017-02-23
# last update: 2018-08-16
# license: GNU Affero General Public License Version 3 (GNU AGPL v3)
# purpose: define functions for SPACEGERM shiny app
######################################
# change log (reverse chronological) #
######################################
# 2018-08-16: added support for migration time location measure
# added support for distance location measure
# 2018-05-31: added support for slice data passed in as database query / cosmetics
# 2018-05-17: dropped explicit loading of ggplot2 (pulled in by cowplot)
# replaced require by library
# 2018-05-16: renamed app for publication
# 2018-04-23: added fixed expression range support to 3D model coloring
# 2018-04-16: renamed y-axis limits inputs to expression range inputs
# 2018-04-13: added sample shifts & stretches to 3D model coloring
# removed left-over code from user specified sample shift/strech support
# removed support for user specified sample shifts
# added support for default ymin/max values
# parameterized span to use for smoothing for 3D model
# added 3D model CPM fitting and coloring support
# added 3D model plot function (outline colored by distal-to-proximal only)
# 2018-04-10: added support for dropout slices
# added support to hide smoothing standard errors
# 2018-04-04: added support for default shifts/stretches from input data
# corrected y-axis truncation to not discard hidden data
# 2018-04-03: parameterized span to use for smoothing
# parameterized number of points to impute for smoothing
# 2018-03-21: added support for CPM / cell abundance unit
# 2018-03-20: added support for slice width bars
# added support for gonad arm model plot
# 2018-02-27: added usage of smooth fit span and number of points parameters
# 2018-01-05: switched from identifying transcripts by name to isoform number (for color assignment)
# (identied as part after the last dot (".") in the transcript name; this enables
# re-using the same color for different genes)
# 2017-10-23: switched from encoding samples by color and isoforms by shape/linetype to the opposite
# added support for isoform-level profiles (using shape & linetype)
# 2017-10-20: added support for filtering by expression level type (gene/isoform profiles?) to
# account for inclusion of isoform-specific CPM data (not used by now)
# 2017-05-29: added sample stretches input panel generation & input extraction functions / added
# sample based stretch assignment / added stretch support to profile plot function
# 2017-05-24: added support for non-positive y-axis minumum for linearly scaled gene profiles
# 2017-05-23: added gene filtering by peak CPM minimum function
# 2017-05-22: added support for overwriting y-axis limits with user specified values
# 2017-05-17: made string-matching for normalization scheme determination fixed (i.e. non-regex)
# replaced log.transform heatmap option by abundance measure (CPM, log2(1 + CPM) or
# log10(1 + CPM))
# replaced row.scaling heatmap option by row normalization scheme (scaling, centering or
# none)
# 2017-04-19: added Euclidean distance as possible alternative to "1 - Pearson's r" metric for
# gene clustering
# 2017-04-18: added gene type input panel generation and gene type based filtering functions
# added removal of non-varying genes for heatmap
# 2017-04-13: added cpm.mean/min/max/lfc & percent.min/max to gene table annotation
# added gene table annotation (gene.name & cpm.sd) support
# 2017-04-12: switched (back) from gene rank to count based filtering
# moved gene rank based filtering out of heatmap function
# moved genotype based filtering out of heatmap function
# moved sample description based filtering out of heatmap function
# moved gene list based filtering out of heatmap function
# switched to tidy gene profile input data & rank based gene filtering
# 2017-04-11: added gene list file import functions
# 2017-04-10: added gene table XLSX export functions
# 2017-04-07: simplified gene cluster assignment extraction as suggested by Alicia Schep
# 2017-04-06: added gene table generation functions (incl. cluster assignment)
# 2017-04-05: added gene expression data log-transformation support
# made row-scaling for heatmap optional
# added gene cluster summary profiles
# added gene cluster count support
# fixed typo in default parameter definition
# switched heatmap generation from heatmaply to iheatmapr
# 2017-03-29: added genotype input panel generation and heatmap functions
# 2017-03-19: added plot columns count support
# 2017-03-02: made single y-scale for all sub-plots optional
# added fixed x-axis limits support
# 2017-02-24: added license comment
# added sample shifts input panel generation & input extraction functions / added sample
# based shift assignment / added shift support to profile plot function
# 2017-02-23: re-ordered plot options
# made raw data points optional
# made raw data lines optional
# added per-sample smooth fits
# made across-sample smooth fit optional
# added logarithmic y-axis scaling
# initial version (profile plot generation function)
#############
# libraries #
#############
# get pipe operators
library(magrittr)
# get log2_trans for logscale
library(scales)
# get llply
library(plyr)
# get tibbles
library(tibble)
# get filter
library(dplyr)
# get geom_circle
library(ggforce)
# get plot_grid
library(cowplot)
# get acast
library(reshape2)
# get interactive complex heatmap framework
library(iheatmapr)
# get write.xlsx & read.xlsx
library(xlsx)
library(plotly)
##############
# parameters #
##############
# load input data
source("data.R")
# load parameter definitions
source("params.R")
#############
# functions #
#############
####################
# helper functions #
####################
# generate expression range minimum input panel
generate.manual.exprmin.input <-
function(plot.options, id = "manual.exprmin"){
if("set.exprlim" %in% plot.options){
# initiate minimal expression range minimum
min.value = params$manual.exprmin.input.min
# adjust minimal expression range minimum based on logscale plot option
if("logscale" %in% plot.options)
# don't allow non-positive values for log-scale
min.value %<>% max(1)
# assign input panel
manual.exprmin.input <-
numericInput(inputId = id,
label = h3(params$manual.exprmin.input.label),
min = min.value,
max = params$manual.exprmin.input.max,
value = max(min.value,
params$manual.exprmin.input.default))}
# assign placeholder
else {manual.exprmin.input <- HTML("")}
# return input panel/placeholder for rendering
return(manual.exprmin.input)}
# generate expression range maximum input panel
generate.manual.exprmax.input <-
function(plot.options, exprmin, id = "manual.exprmax"){
if("set.exprlim" %in% plot.options){
# assign input panel
manual.exprmax.input <-
# define expression range maximum input panel
numericInput(inputId = id,
label = h3(params$manual.exprmax.input.label),
min = exprmin + 1,
max = params$manual.exprmax.input.max,
value = max(exprmin + 1,
params$manual.exprmax.input.default))}
# assign placeholder
else {manual.exprmax.input <- HTML("")}
# return input panel/placeholder for rendering
return(manual.exprmax.input)}
# parse gene names input from single string to vector
parse.gene.names<-
# define gene names parsing function
function(
# input gene names string
gene.names.input
# end gene names parsing function parameter definition
)
# begin gene names parsing function definition
{
# take single string gene names input
gene.names.input %>%
# split single string into vector
strsplit(params$gene.names.input.separator) %>%
# strsplit returns a list (single element in this case) --> convert to plain vector
unlist
# end gene names parsing function definition
}
# filter data by sample names
filter.data.by.sample.names <-
function(unfiltered.data, sample.names.to.keep)
unfiltered.data %>%
filter(sample.name %in% sample.names.to.keep)
# filter data by gene names
filter.data.by.gene.names <-
function(unfiltered.data, gene.names.to.keep)
unfiltered.data %>%
filter(gene.name %in% gene.names.to.keep)
# filter data by expression level type (gene/isoform profiles?)
filter.data.by.expression.level <-
function(unfiltered.data, isoform.level)
unfiltered.data %>%
filter(is.na(transcript.name) != isoform.level)
# add shift column to data
add.shift.column <-
function(input.data, shifts.by.sample)
input.data %>%
mutate(shift = shifts.by.sample[sample.name])
# add stretch column to data
add.stretch.column <-
function(input.data, stretches.by.sample)
input.data %>%
mutate(stretch = stretches.by.sample[sample.name])
# generate gonad arm model plot
plot.model <-
function(model.data,
location.measure = params$location.measure.input$default){
if(!(location.measure %in% c("Relative position [% distal-to-proximal]",
"Distance to the DTC [μm]"))) return(NULL)
scale.factor <- ifelse(location.measure ==
"Relative position [% distal-to-proximal]",
100 / model.data$len.gonad.arm.um, 1)
model.data$fit.radii %>%
ggplot(aes(dist.to.dtc.um * scale.factor,
radius.gonad.um * scale.factor)) %>%
+ geom_circle(data = model.data$germ.layers %>%
mutate(
center.dist.um =
(start.dist.um + end.dist.um) / 2,
n.circles =
as.integer(diam.gonad.um /
model.data$diam.germ.cell.um)) %>%
group_by(germ.layer) %>%
do(data_frame(center.dist.um = .$center.dist.um,
n.circles = .$n.circles,
circle = 1:.$n.circles)) %>%
ungroup %>%
mutate(y.center = (circle -.5 - n.circles / 2) *
model.data$diam.germ.cell.um,
radius.um = model.data$diam.germ.cell.um / 2),
aes(x0 = center.dist.um * scale.factor,
y0 = y.center * scale.factor,
r = radius.um * scale.factor),
inherit.aes = FALSE, color = "grey") %>%
+ geom_circle(data = model.data$oocytes %>%
mutate(center.dist.um =
(start.dist.um + end.dist.um) / 2,
radius.um = diam.um / 2),
aes(x0 = center.dist.um * scale.factor,
y0 = 0, r = radius.um * scale.factor),
inherit.aes = FALSE, color = "grey") %>%
+ geom_segment(data = model.data$zones %>%
filter(end.dist.um != model.data$len.gonad.arm.um),
aes(x = end.dist.um * scale.factor,
xend = end.dist.um * scale.factor,
y = -25 * scale.factor,
yend = 25 * scale.factor),
linetype = "dotted") %>%
+ geom_line() %>%
+ geom_line(data=model.data$fit.radii %>%
mutate(radius.gonad.um = -radius.gonad.um)) %>%
+ geom_segment(
data = model.data$fit.radii[c(1, nrow(model.data$fit.radii)),],
aes(xend = dist.to.dtc.um * scale.factor,
yend = -radius.gonad.um * scale.factor)) %>%
+ annotate("segment",
x = (model.data$len.gonad.arm.um - 60) * scale.factor,
xend = (model.data$len.gonad.arm.um - 10) * scale.factor,
y = -30 * scale.factor,
yend = -30 * scale.factor,
size = 1) %>%
+ annotate("text",
x = (model.data$len.gonad.arm.um - 35) * scale.factor,
y = -35 * scale.factor,
vjust = 1, label = "50 μm") %>%
+ expand_limits(y = -50 * scale.factor) %>%
+ coord_fixed() %>%
+ theme_void()}
dist2n.cells <- function(end.dist.um, start.dist.um = 0,
cell.data = input.data$gonad.model$cells){
# keep only relevant cell data
cell.data %<>%
filter(center.dist.um + radius.um >= start.dist.um &
center.dist.um - radius.um <= end.dist.um)
# catch zero-cells case
if(!nrow(cell.data)) return(0)
# weight cells by contained fraction of their total volume
cell.data %>%
mutate(width.um = pmin(center.dist.um + radius.um, end.dist.um) -
pmax(center.dist.um - radius.um, start.dist.um),
vol.um3 = (pi * width.um^2) / 3 * (3 * radius.um - width.um),
fraction = vol.um3 / (4 / 3 * pi * radius.um^3)) %$%
sum(n.cells * fraction)
}
unit2col <- function(unit.name)
ifelse(unit.name == "CPM / cell", "cpm.per.cell", "cpm")
# plot gene profiles
plot.profiles<-
# define profile plot function
function(
# plot data
plot.data
# show raw data points by default
,raw.points=T
# don't show raw data lines by default
,raw.lines=F
# show per-sample smooth fits by default
,smooth.each=T
# show across-sample smooth fit by default
,smooth.pooled=T
# log-transform y-axis by default
,logscale=T
# don't fix x-axis limits by default
,fix.xlim=F
# use a single y-scale for all sub-plots by default
,single.y.scale=T
# set y-axis limits automatically by default
,y.limits=NULL
# use default plot columns count by default
,ncols=params$ncols.plot.input.default
# plot gene level estimates by default
,isoform.level=F,
show.slice.width = TRUE,
abundance.unit = params$abundance.unit.default,
location.measure = params$location.measure.input.default,
n.points.smooth = params$smoothing.n.input.default,
span.smooth = params$smoothing.span.input.default,
show.model = TRUE,
show.smoothing.se = TRUE,
show.dropouts = TRUE,
model.plot = plot.model(input.data$gonad.model, location.measure))
# begin profile plot function definition
{
plot.data %<>%
mutate(percent.center = percent.center * stretch + shift,
percent.start = percent.center - (width.percent / 2 * stretch),
percent.end = percent.center + (width.percent / 2 * stretch),
dist.start.um = percent.start / 100 *
input.data$gonad.model$len.gonad.arm.um,
dist.end.um = percent.end / 100 *
input.data$gonad.model$len.gonad.arm.um,
dist.center.um = (dist.start.um + dist.end.um) / 2,
time.start.h = input.data$gonad.model$dist2time(dist.start.um),
time.end.h = input.data$gonad.model$dist2time(dist.end.um),
time.center.h =
input.data$gonad.model$dist2time(dist.center.um),
tx_color = isoform.level %>%
ifelse(list(transcript.name %>%
sub(".*[.]", "isoform ", .)
),
list("black")) %>%
unlist) %>%
group_by(dist.start.um, dist.end.um) %>%
mutate(n.cells = dist2n.cells(dist.end.um[1], dist.start.um[1])) %>%
ungroup %>%
mutate(cpm.per.cell = cpm / n.cells)
location.center <-
ifelse(location.measure == "Distance to the DTC [μm]",
"dist.center.um",
ifelse(location.measure == "Migration time from the DTC [h]",
"time.center.h", "percent.center"))
location.start <-
ifelse(location.measure == "Distance to the DTC [μm]", "dist.start.um",
ifelse(location.measure == "Migration time from the DTC [h]",
"time.start.h", "percent.start"))
location.end <-
ifelse(location.measure == "Distance to the DTC [μm]", "dist.end.um",
ifelse(location.measure == "Migration time from the DTC [h]",
"time.end.h", "percent.end"))
xlim.scale.factor <-
ifelse(location.measure == "Distance to the DTC [μm]",
input.data$gonad.model$len.gonad.arm.um / 100,
ifelse(location.measure == "Migration time from the DTC [h]",
input.data$gonad.model$dist2time(
input.data$gonad.model$len.gonad.arm.um) / 100, 1))
# create profile plot
profile.plot <-
plot.data %>%
filter(!dropout) %>%
ggplot(aes_string(x = location.center,
y = unit2col(abundance.unit), color = "tx_color",
linetype = "sample.name")) %>%
# split plot into panels
+ facet_wrap(
# generate one sub-plot per gene name
~gene.name
# adjust the (max.) number of sub-plots to put next to each other
,ncol=ncols
# set sub-plot scales
,scales=
# take single y-scale parameter
single.y.scale %>%
# set sub-plot scales based on single y-scale parameter
ifelse(
# if single y-scale requested: fix both scales for sub-plots
"fixed"
# if single y-scale not requested: fix only x-scale for sub-plots
,"free_y"
# end single y-scale parameter evaluation
)
# end sub-plot definition
) %>%
# color non-data plot elements in black, white & shades of grey only
+ theme_bw(
# adjust plot base font sizes
base_size=params$profile.plot.fontsize.base
# end general plot style parameter definition
) %>%
# add caption to plot
+ ggtitle(
# set plot title
label=params$profile.plot.title
# end plot caption parameter definition
) %>%
# adjust x-axis labelling
+ xlab(label = location.measure) %>%
+ ylab(label = params$profile.plot.ylab %>%
paste(paste0("[", abundance.unit, "]"),
sep = ifelse((nchar(abundance.unit) > 4) &
show.model, "\n", " "))) %>%
# adjust linetype (i.e. sample name) legend
+ scale_linetype_discrete(
# adjust linetype legend label
name=params$profile.plot.sample.legend.label
# end linetype legend parameter definition
)
# use color for isoform-level profiles
if(isoform.level)
# modify profile plot
profile.plot %<>%
# adjust color (i.e. isoform) palette/legend
+ scale_color_brewer(
# adjust sample name -> color mapping
palette=params$profile.plot.brewer.palette
# adjust color legend label
,name=params$profile.plot.isoform.legend.label
# end color palette/legend parameter definition
)
# keep gene-level profiles black & white
else
# modify profile plot
profile.plot %<>%
# adjust color (i.e. isoform) palette/legend
+ scale_color_grey(
# hide color legend
guide="none"
# end color palette/legend parameter definition
)
if(show.slice.width){
if(show.dropouts)
profile.plot %<>%
+ geom_errorbarh(aes_string(xmin = location.start,
xmax = location.end),
data = plot.data %>%
filter(dropout),
alpha = params$profile.plot.dropouts.alpha)
profile.plot %<>%
+ geom_errorbarh(aes_string(xmin = location.start,
xmax = location.end))}
# add raw data points if specified
if(raw.points){
if(show.dropouts)
profile.plot %<>%
+ geom_point(aes(shape = sample.name),
data = plot.data %>%
filter(dropout),
size = params$profile.plot.pointsize,
alpha = params$profile.plot.dropouts.alpha)
# modify profile plot
profile.plot %<>%
# plot raw data as points
+ geom_point(
# group data by sample name & shape points accordingly
aes(shape=sample.name)
# adjust data point size
,size=params$profile.plot.pointsize
# end data point parameter definition
) %>%
# adjust shape (i.e. sample name) legend
+ scale_shape_discrete(
# adjust shape legend label
name=params$profile.plot.sample.legend.label
# end shape legend parameter definition
)}
# add raw data lines if specified
if(raw.lines)
# modify profile plot
profile.plot %<>%
# plot per-slice data as lines
+ geom_line(
# adjust data line size
size=params$profile.plot.linesize.each
# end data line parameter definition
)
# add per-sample smooth fits if specified
if(smooth.each)
# modify profile plot
profile.plot %<>%
# fit smooth line across samples
+ geom_smooth(
# adjust smoothing method
,method = params$profile.plot.smoothing.method,
span = span.smooth,
n = n.points.smooth,
se = show.smoothing.se,
size = params$profile.plot.linesize.each
# end smooth line parameter definition
)
# add across-sample smooth fit if specified
if(smooth.pooled)
# modify profile plot
profile.plot %<>%
# fit smooth line across samples
+ geom_smooth(
# pool data across samples & adjust smooth line color
linetype=params$profile.plot.linetype.smooth
# adjust smoothing method
,method=params$profile.plot.smoothing.method,
span = span.smooth,
n = n.points.smooth,
se = show.smoothing.se,
size = params$profile.plot.linesize.pooled
# end smooth line parameter definition
)
# scale y-axis logarithmically if specified
if(logscale){
# only log2-transform y-axis if no y-axis limits specified
if(is.null(y.limits)) {
# modify profile plot
profile.plot %<>%
# log2-transform y-axis
+ scale_y_continuous(trans=log2_trans())
# log2-transform y-axis & overwrite y-axis limits if specified
} else {
# modify profile plot
profile.plot %<>%
# adjust y-axis
+ scale_y_continuous(
# log2-transform y-axis
trans=log2_trans()
# overwrite y-axis limits
,limits=y.limits
# end y-axis adjustment
)
# end logarithmic y-axis adjustment
}
# adjust linear y-axis if specified
} else {
# overwrite y-axis limits if specified
if(!is.null(y.limits)) {
# modify profile plot
profile.plot %<>%
+ coord_cartesian(ylim = y.limits)
# end overwriting of y-axis limits
}
# end linear y-axis adjustment
}
# fix x-axis limits if specified
if(fix.xlim)
# modify profile plot
profile.plot %<>%
# fix x-axis limits
+ xlim(params$profile.plot.xlim * xlim.scale.factor)
if(show.model &
location.measure %in% c("Relative position [% distal-to-proximal]",
"Distance to the DTC [μm]"))
profile.plot %<>%
plot_grid(model.plot, ncol = 1, align = "v", axis = "lr")
# return profile plot
profile.plot
# end profile plot function definition
}
# extract column from table file by column index
get.column.from.file<-
# define column from file extraction function
function(
# single row data.frame with name of table file to extract column from in datapath column
input.file
# index of table column to extract
,column.index
# index of table sheet to extract column from
,sheet.index=1
# end column from file extraction function parameter definition
)
# begin column from file extraction function definition
{
# take file to extract column from in datapath column
input.file %$%
# read column from XLSX file
read.xlsx(
# get input file path from input data.frame
file=datapath
# read only specified table sheet
,sheetIndex=sheet.index
# read only specified table column
,colIndex=column.index
# end XLSX file reading
) %>%
# convert single column data.frame to vector
unlist %>%
# strip off rownames before returning column data
unname
# end column from file extraction function definition
}
# extract gene list from table file
get.genes.from.file<-
# define gene list from file extraction function
function(
# single row data.frame with name of table file to extract gene list from in datapath column
table.file
# index of gene list column within table file
,gene.column=1
# end gene list from file extraction function parameter definition
)
# begin gene list from file extraction function definition
{
# take table file to extract gene list from
table.file %>%
# extract gene list column from table by specified column index
get.column.from.file(gene.column) %>%
# convert gene list to characters before returning them
as.character
# end gene list from file extraction function definition
}
# extract CPM matrix (genes as rows)
get.cpm.matrix<-
# define CPM matrix extraction function
function(
# expression data (gene, percent & cpm.fit columns)
expression.data
# name of CPM column
,cpm.colname="cpm.fit"
# end CPM matrix extraction function parameter definition
)
# begin CPM matrix extraction function definition
{
# take expression data
expression.data %>%
# convert tidy table to matrix
acast(
# use genes as rows, percent as columns
gene~percent
# fill matrix with corresponding CPM values
,value.var=cpm.colname
# end tidy table to matrix conversion
)
# end CPM matrix extraction function definition
}
# calculate distance matrix of matrix columns using "1 - Pearson's r" metric
get.column.distances.pearson<-
# define matrix columns "1 - Pearson's r" distance calculation function
function(
# matrix to get column distances of
column.matrix
# end matrix columns "1 - Pearson's r" distance calculation function parameter definition
)
# begin matrix columns "1 - Pearson's r" distance calculation function definition
{
# take matrix to get column distances of
column.matrix %>%
# calculate (pairwise) column (Pearson) correlations
cor(use="complete") %>%
# convert column similarities to distances
`-`(1,.) %>%
# return column distance matrix
as.dist
# end matrix columns "1 - Pearson's r" distance calculation function definition
}
# normalize matrix columns
normalize.columns<-
# define matrix column normalization function
function(
# matrix to normalize columns of
column.matrix
# should columns be z-transformed (FALSE: centering only)
,z.transform = TRUE
# end matrix column normalization function parameter definition
)
# begin matrix column normalization function definition
{
# take matrix to normalize columns of
column.matrix %>%
# scale columns (z-transform if specified, otherwise center only)
scale(scale = z.transform)
# end matrix column normalization function definition
}
# normalize matrix rows
normalize.rows<-
# define matrix row normalization function
function(
# matrix to normalize rows of
row.matrix
# should rows be z-transformed (FALSE: centering only)
,scaling = TRUE
# end matrix row normalization function parameter definition
)
# begin matrix row normalization function definition
{
# take matrix to normalize rows of
row.matrix %>%
# treat rows as columns
t %>%
# normalize columns (scale if specified)
normalize.columns(z.transform = scaling) %>%
# treat columns as rows
t
# end matrix row normalization function definition
}
# calculate distance matrix of matrix rows using "1 - Pearson's r" metric
get.row.distances.pearson<-
# define matrix rows "1 - Pearson's r" distance calculation function
function(
# matrix to get row distances of
row.matrix
# end matrix rows "1 - Pearson's r" distance calculation function parameter definition
)
# begin matrix rows "1 - Pearson's r" distance calculation function definition
{
# take matrix to get row distances of
row.matrix %>%
# treat rows as columns
t %>%
# calculate column "1 - Pearson's r" distances
get.column.distances.pearson
# end matrix rows "1 - Pearson's r" distance calculation function definition
}
# calculate distance matrix of matrix rows using "Euclidean distance" metric
get.row.distances.euclidean<-
# define matrix rows Euclidean distance calculation function
function(
# matrix to get row distances of