Skip to content

Commit 6d549b9

Browse files
committed
initial commit of new figure outlines
1 parent 47e9df8 commit 6d549b9

7 files changed

+446
-96
lines changed
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
##figure ploting functions incorporates and standardizes the calls made across figures
2+
3+
library(ggplot2)
4+
library(dplyr)
5+
library(ggridges)
6+
library(synapser)
7+
8+
##COLORS: standardize here
9+
modelcolors <- c()
10+
datasetcolors <- c()
11+
12+
exvivo = c('mpnst','beataml','sarcpdo','pancpdo','bladderpdo')
13+
14+
synapser::synLogin()
15+
16+
getModelPerformanceData <- function(){
17+
18+
allscoreslist <- list(deepttc = 'syn65880080',graphdrp = 'syn65928973',lgbm = 'syn65880116',pathdsp = 'syn65880133',uno = 'syn65676159')
19+
20+
##with pancpdo
21+
allscoreslist <- list(deepttc = 'syn66323471',graphdrp = 'syn66323492',lgbm = 'syn66323510',pathdsp = 'syn66326173',uno = 'syn66323527')
22+
23+
fullres <- do.call(rbind,lapply(names(allscoreslist),function(mod)
24+
readr::read_csv(synapser::synGet(allscoreslist[[mod]])$path) |> mutate(model = mod)))
25+
26+
fullres <- fullres |>
27+
mutate(withinDataset = ifelse(src == trg,TRUE,FALSE))
28+
29+
#lets remove same-dataset data
30+
cdres <- subset(fullres,!withinDataset)
31+
32+
##lets remove ex vivo training
33+
cdres <- subset(cdres,!src %in% c('mpnst','beataml'))
34+
35+
return(cdres)
36+
}
37+
38+
39+
40+
###these files are very big so i'm not sure how to deal with them.
41+
getModelPredictionData<-function(dset='lgbm'){
42+
43+
preds <- list(deepttc = 'syn68149793', graphdrp = 'syn68146828', lgbm = 'syn68149807', pathdsp = 'syn66772452', uno = 'syn68149809')
44+
45+
46+
fullres <- do.call(rbind,lapply(dset,function(mod)
47+
readr::read_csv(synapser::synGet(preds[[mod]])$path) |> mutate(model = mod)))
48+
49+
return(preds)
50+
}
51+
52+
#this function plots a single metric by all the possible values
53+
#
54+
ridgelineMetricPlots <- function(metric,dataset=cdres, prefix='all'){
55+
56+
sr <- dataset |>
57+
subset(met == metric)
58+
59+
60+
##facet by source - compare performance across a single source
61+
62+
##re-rank src samples by mean metrics
63+
mvals <- sr |> group_by(src) |>
64+
summarize(mvals = mean(value)) |>
65+
arrange(mvals)
66+
67+
if (metric == 'r2') {
68+
sr$value <- sapply(sr$value,function(x) ifelse(x < (-1),-1,x))
69+
}
70+
71+
sr$src = factor(sr$src,levels = mvals$src)
72+
73+
#compare models by source dataset
74+
p1 <- sr |>
75+
ggplot(aes(x = value,y = trg,fill = model)) +
76+
ggridges::geom_density_ridges(alpha = 0.5) +
77+
facet_grid(src~.) +
78+
ggtitle(paste0(metric,' by source dataset'))
79+
80+
##now we rerank by target dataset and evaluate by target
81+
mvals <- sr |> group_by(trg) |>
82+
summarize(mvals = mean(value)) |>
83+
arrange(mvals)
84+
sr$trg = factor(sr$trg,levels = mvals$trg)
85+
86+
#plot source by target data
87+
p3 <- sr |>
88+
ggplot(aes(x = value,y = src,fill = model)) +
89+
ggridges::geom_density_ridges(alpha = 0.5) +
90+
facet_grid(trg~.) +
91+
ggtitle(paste0(metric,' by target dataset'))
92+
93+
return(list(src=p1,trg=p3))
94+
}
95+
96+
97+
##here we have to interrogate the results to visualize how specific drugs are behaving
98+
performanceByDrugOrSample<-function(){
99+
100+
}
101+
102+
103+
##do we still need this function?
104+
105+
doModelPlot <- function(metric, dataset=cdres){
106+
107+
sr <- dataset |>
108+
subset(met == metric)
109+
##re-rank src samples by mean metric
110+
mvals <- sr |>
111+
group_by(trg) |>
112+
summarize(mvals = mean(value)) |>
113+
arrange(mvals)
114+
115+
if(metric == 'r2') {
116+
sr$value <- sapply(sr$value,function (x) ifelse(x<(-1),-1,x))
117+
}
118+
119+
sr$trg = factor(sr$trg,levels = mvals$trg)
120+
121+
sr |>
122+
subset(trg %in% exvivo) |>
123+
ggplot(aes(x = value,alpha = 0.8)) +
124+
geom_histogram() + facet_grid(model~trg) +
125+
ggtitle(paste0(metric,' evaluated on ex vivo data'))
126+
127+
128+
ggsave(paste0(metric,'exVivoPerformance.png'))
129+
130+
131+
sr |> subset(!trg %in% exvivo) |>
132+
ggplot(aes(x = value,alpha = 0.8)) +
133+
geom_histogram() + facet_grid(model~trg) +
134+
ggtitle(paste0(metric,' evaluated on cell line data'))
135+
136+
137+
ggsave(paste0(metric,'CellLinePerformance.png'))
138+
139+
}

manuscript/figure3CellLinePlots.Rmd

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
---
2+
title: "IMPROVE cell line results"
3+
author: "Sara gosline"
4+
date: "2025-03-27"
5+
output: html_document
6+
---
7+
8+
This document focuses on results we can glean from the cell line specific analysis using the IMPROVE framework and coderdata
9+
```{r setup, include = FALSE}
10+
knitr::opts_chunk$set(echo = TRUE)
11+
library(tidyverse)
12+
library(ggplot2)
13+
source('coderdataResultsFunctions.R')
14+
15+
```
16+
17+
## Collect cross-model performance data and try out plotting
18+
Currently the cross-model data results have been uploaded to synapse. Please request access on the [synapse team site](https://www.synapse.org/Team:3545388).
19+
20+
```{r data, message=FALSE, echo=FALSE, warning=FALSE}
21+
cdres <- getModelPerformanceData()
22+
23+
metrics <- c('pcc','scc')
24+
res <- lapply(metrics,function(x) {
25+
res <- doFullPlot(x,cdres)
26+
cowplot::plot_grid(res$src,res$trg)
27+
ggsave(paste0('all_',metric,'_ridglines.pdf'),height = 12,width = 10)
28+
return(res$src)
29+
})
30+
31+
print(res)
32+
33+
```
34+
35+
## Figure 3A: performance on cell lines
36+
37+
First result: evaluation on cell lines.
38+
39+
```{r model performance, message = FALSE, warning = FALSE}
40+
#current list of ex vivo datasets. include liverpdo when complete
41+
42+
43+
ccdres <- subset(cdres,!trg %in% exvivo)
44+
45+
ccres = lapply(metrics,function(x) {
46+
res <- doFullPlot(x, ccdres)
47+
cowplot::plot_grid(res$src,res$trg)
48+
ggsave(paste0('celllines',metric,'_ridglines.pdf'),height = 8,width = 10)
49+
return(res$src)
50+
})
51+
52+
print(ccres)
53+
54+
```
55+
56+
## Figure 3B - training/test set size
57+
58+
## Compare dataset size to performance
59+
60+
We wonder if the dataset size affects the predictive power.
61+
62+
```{r dataset size}
63+
64+
#number of combos
65+
combos = list(beataml = 3033,ccle = 10911,ctrpv2 = 303520,fimm = 2457 ,gcsi = 12320,
66+
gdscv1 = 105808,gdscv2 = 45323,mpnst = 250, nci60 = 2317205,prism = 633169)
67+
68+
numsamples = list()
69+
numdrugs = list()
70+
#todo: we can also evaluate number of samples or drugs
71+
72+
#e can get performance summaries
73+
gres <- ccdres |>
74+
subset(model!='uno')|>
75+
subset(met=='scc') |>
76+
group_by(met,src,trg,model) |>
77+
summarize(meanVal=mean(value)) |>
78+
left_join(data.frame(src = names(combos),sampleNum = unlist(combos))) |>
79+
arrange(meanVal)
80+
81+
mom <- gres|>group_by(src,sampleNum)|>summarize(mv=mean(meanVal))|>arrange(mv)
82+
83+
#gres <- subset(gres,met=='scc')
84+
gres$src = factor(gres$src,levels=unique(mom$src))
85+
gres |>
86+
ggplot(aes(x=src,y=meanVal,fill=model))+geom_boxplot()#+geom_jitter()
87+
88+
89+
```
90+
91+
92+
# Dataset prediction parsing
93+
Now we have to go into the individual predidictions to pull out trends
94+
95+
## Figure 3
96+
97+
What does this figure look like
98+
99+
100+

manuscript/figure4ExVivoResults.Rmd

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
---
2+
title: "Figure 4 ex vivo results"
3+
author: "Sara Gosline"
4+
date: "2025-06-02"
5+
output: html_document
6+
---
7+
8+
```{r setup, include=FALSE}
9+
knitr::opts_chunk$set(echo = TRUE)
10+
```
11+
12+
Here we are focused on getting the details from a model prediction algorithm - where did it fail, where did it succeed?
13+
14+
## Data and packages
15+
16+
First we get the packages loaded and logged into synapse.
17+
```{r setup, include = FALSE}
18+
knitr::opts_chunk$set(echo = TRUE)
19+
library(tidyverse)
20+
library(ggplot2)
21+
source('coderdataResultsFunctions.R')
22+
23+
```
24+
25+
26+
The data has been uploaded by natasha and can be downloaded as follows.
27+
28+
```{r download data}
29+
30+
cdres <- getModelPerformanceData()
31+
32+
ecdres <- subset(cdres,trg %in% exvivo)
33+
34+
35+
```
36+
37+
## Figure 4A
38+
39+
40+
```{r}
41+
exres = lapply(metrics,function(x) {
42+
doFullPlot(x, ecdres,'cellline')
43+
})
44+
45+
print(exres)
46+
```
47+
## Create funtion to dive in
48+
49+
50+
51+
```{r}
52+
53+
```
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
---
2+
title: "Figure 5 protein comparison"
3+
output: html_notebook
4+
---
5+
6+
The last figure is what we want to do to compare multiple omics measurements
7+
8+
```{r}
9+
knitr::opts_chunk$set(echo = TRUE)
10+
library(tidyverse)
11+
library(ggplot2)
12+
source('coderdataResultsFunctions.R')
13+
14+
```
15+
16+
17+
18+
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
---
2+
title: "Improve results interrogation"
3+
author: "Sara Gosline"
4+
date: "2025-06-02"
5+
output: html_document
6+
---
7+
8+
```{r setup, include=FALSE}
9+
knitr::opts_chunk$set(echo = TRUE)
10+
```
11+
12+
Here we are focused on getting the details from a model prediction algorithm - where did it fail, where did it succeed?
13+
14+
## Data and packages
15+
16+
First we get the packages loaded and logged into synapse.
17+
```{r setup, include = FALSE}
18+
knitr::opts_chunk$set(echo = TRUE)
19+
library(tidyverse)
20+
library(synapser)
21+
library(ggplot2)
22+
library(ggridges)
23+
synapser::synLogin()
24+
25+
```
26+
27+
28+
The data has been uploaded by natasha and can be downloaded as follows.
29+
30+
```{r download data}
31+
32+
33+
```
34+
35+
36+
37+
## Create funtion to dive in
38+
39+
40+
41+
```{r}
42+
43+
```

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy