diff --git a/build/build_all.py b/build/build_all.py
index 10b7bc55..7004dd74 100644
--- a/build/build_all.py
+++ b/build/build_all.py
@@ -40,7 +40,7 @@ def main():
     parser.add_argument('--figshare', action='store_true', help="Upload all local data to Figshare. FIGSHARE_TOKEN must be set in local environment.")
     parser.add_argument('--all',dest='all',default=False,action='store_true', help="Run all data build commands. This includes docker, samples, omics, drugs, exp arguments. This does not run the validate or figshare commands")
     parser.add_argument('--high_mem',dest='high_mem',default=False,action='store_true',help = "If you have 32 or more CPUs, this option is recommended. It will run many code portions in parallel. If you don't have enough memory, this will cause a run failure.")
-    parser.add_argument('--dataset',dest='datasets',default='broad_sanger,hcmi,beataml,cptac,mpnst,mpnstpdx,pancpdo,bladderpdo,sarcpdo,liverpdo',help='Datasets to process. Defaults to all available.')
+    parser.add_argument('--dataset',dest='datasets',default='broad_sanger,hcmi,beataml,pancpdo,bladderpdo,sarcpdo,liverpdo,mpnst',help='Datasets to process. Defaults to all available.')
     parser.add_argument('--version', type=str, required=False, help='Version number for the Figshare upload title (e.g., "0.1.29"). This is required for Figshare upload. This must be a higher version than previously published versions.')
     parser.add_argument('--github-username', type=str, required=False, help='GitHub username for the repository.')
     parser.add_argument('--github-email', type=str, required=False, help='GitHub email for the repository.')
@@ -119,7 +119,6 @@ def process_docker(datasets):
             'hcmi': ['hcmi'],
             'beataml': ['beataml'],
             'mpnst': ['mpnst'],
-            'mpnstpdx': ['mpnstpdx'],
             'pancpdo': ['pancpdo'],
             'bladderpdo': ['bladderpdo'],
             'sarcpdo': ['sarcpdo'],
@@ -410,7 +409,7 @@ def get_latest_commit_hash(owner, repo, branch='main'):
     # if args.figshare or args.validate:
         # FigShare File Prefixes:
         
-        prefixes = ['beataml', 'hcmi', 'cptac', 'mpnst', 'mpnstpdx', 'pancpdo', 'bladderpdo','sarcpdo', 'genes', 'drugs', 'liverpdo']
+        prefixes = ['beataml', 'hcmi', 'cptac', 'pancpdo', 'bladderpdo','sarcpdo', 'genes', 'drugs', 'liverpdo','mpnst']
         broad_sanger_datasets = ["ccle","ctrpv2","fimm","gdscv1","gdscv2","gcsi","prism","nci60"]
         if "broad_sanger" in datasets:
             prefixes.extend(broad_sanger_datasets)
diff --git a/build/build_dataset.py b/build/build_dataset.py
index 7904a43e..780b583b 100644
--- a/build/build_dataset.py
+++ b/build/build_dataset.py
@@ -41,7 +41,6 @@ def process_docker(dataset,validate):
         'hcmi': ['hcmi'],
         'beataml': ['beataml'],
         'mpnst': ['mpnst'],
-        'mpnstpdx': ['mpnstpdx'],
         'pancpdo': ['pancpdo'],
         'cptac': ['cptac'],
         'sarcpdo': ['sarcpdo'],
@@ -128,7 +127,6 @@ def process_omics(executor, dataset, should_continue):
         'broad_sanger': ['copy_number', 'mutations', 'proteomics', 'transcriptomics'],
         'cptac': ['copy_number', 'mutations', 'proteomics', 'transcriptomics'],
         'hcmi': ['mutations', 'transcriptomics'],
-        'mpnstpdx':['copy_number', 'mutations', 'proteomics', 'transcriptomics'],
         'sarcpdo': ['mutations', 'transcriptomics'],
         'pancpdo': ['transcriptomics'],
         'bladderpdo': ['copy_number', 'mutations', 'transcriptomics'],
diff --git a/build/mpnst/00_sample_gen.R b/build/mpnst/00_sample_gen.R
old mode 100755
new mode 100644
index 0ec5704b..db1f238e
--- a/build/mpnst/00_sample_gen.R
+++ b/build/mpnst/00_sample_gen.R
@@ -1,5 +1,4 @@
-# This script generate a new sample table based on pervious beatAML improved sample ID
-# It will take the maximum value of beatAML improved sample ID and continue from ID count from there
+# This script generate a new sample table based on previous dataset's sample file (taking the max improve_sample_id)
 # Load required libraries
 library(data.table)
 library(synapser)
@@ -11,14 +10,12 @@ if(length(args) > 1 ){
     stop("Up to one argument is allowed. This is the filepath to the previously run samples file.")
 }
 
-
 if (length(args) == 0 || is.na(args[1]) || args[1] == "" || !file.exists(args[1])) {
     orig_samples <- ""
 } else {
     orig_samples <- fread(args[1])
 }
 
-
 # Check if Synapse token is available from the environment
 synapse_token <- Sys.getenv("SYNAPSE_AUTH_TOKEN")
 if (synapse_token == "") {
@@ -29,6 +26,10 @@ synapser::synLogin(authToken=synapse_token)
 manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()|>
                                                              as.data.frame()
 
+#Drop contaminated sample JH-2-009
+manifest <- manifest %>% 
+  filter(Sample != "JH-2-009")
+
 
 ###sample file has a strict schema
 ## - improve_sample_id
@@ -62,9 +63,6 @@ main<-rbind(sampTable,pdxmt)|>
     dplyr::select(-MicroTissueDrugFolder)|>
     rbind(tumorTable)
 
-#main <- fread("mpnst/NF_MPNST_samples.csv")
-#previous_aml <- fread(args[1])#"beatAML/beataml_samples.csv")
-
 # If there is no previous samples file - start at 1, else, continue where the previous one left off.
 if (identical(orig_samples, "")) {
     max_id <- 1  
@@ -72,21 +70,6 @@ if (identical(orig_samples, "")) {
     max_id <- max(orig_samples$improve_sample_id, na.rm = TRUE)
 }
 
-
 main$improve_sample_id <- seq(from = max_id + 1, length.out = nrow(main))
 
-#synapse_main <- fread("mpnst/synapse_NF-MPNST_samples.csv")
-# Step 1: Create a dictionary from 'main'
-#id_dict <- setNames(main$improve_sample_id, main$other_id)
-
-# Step 2: Update 'ID' in 'synapse_main'
-#synapse_main$ID <- id_dict[synapse_main$Sample]
-
-# Handling NA values if any mismatch occurs (Optional based on your data integrity)
-# If there are NAs generated, you might need to check for unmatched keys
-# synapse_main$ID[is.na(synapse_main$ID)] <- -1  # Assign a placeholder like -1 for unmatched rows
-
-# Step 3: Save the updated 'synapse_main'
-#fwrite(synapse_main, "mpnst/synapse_NF-MPNST_samples.csv")
-#fwrite(main, "mpnst/NF_MPNST_samples.csv") # updated sample file
 fwrite(main,'/tmp/mpnst_samples.csv')
diff --git a/build/mpnst/01_combined_omics.R b/build/mpnst/01_combined_omics.R
new file mode 100644
index 00000000..dcbdfbae
--- /dev/null
+++ b/build/mpnst/01_combined_omics.R
@@ -0,0 +1,246 @@
+#!/usr/bin/env Rscript
+
+# Combined MPNST & MPNST-PDX Data Extraction Script
+# This script unifies data extraction for PDX, Tumor, and Xenograft-Derived Organoid samples.
+
+# Load required libraries
+library(data.table)
+library(synapser)
+library(dplyr)
+library(tidyr)
+
+# Retrieve command line arguments
+args <- commandArgs(trailingOnly = TRUE)
+if (length(args) < 3) {
+  stop("Usage: Rscript 01_combined_omics.R <PAT> <samples.csv> <genes.csv>", call. = FALSE)
+}
+PAT      <- args[1]
+samples  <- args[2]
+genes    <- args[3]
+
+# Log in to Synapse
+token <- PAT
+synLogin(authToken = token)
+
+# Read sample mapping and gene mapping
+samples_df <- fread(samples) %>%
+  select(improve_sample_id, common_name, model_type) %>%
+  distinct()
+genes_df <- fread(genes)
+
+# Subset by model type
+pdx_samps   <- filter(samples_df, model_type == "patient derived xenograft")
+tumor_samps<- filter(samples_df, model_type == "tumor")
+mt_samps    <- filter(samples_df, model_type == "xenograft derived organoid")  # These end up being the same as pdx_samps in the manifest.
+
+# Retrieve manifest table from Synapse
+manifest <- synTableQuery("select * from syn53503360")$asDataFrame() %>%
+  rename(common_name = Sample)
+
+# Build sample tables
+pdx_data <- manifest %>%
+  select(common_name, starts_with("PDX")) %>%
+  left_join(pdx_samps, by = "common_name") %>%
+  select(improve_sample_id, common_name, model_type,
+         RNASeq = PDX_RNASeq,
+         Mutations = PDX_Somatic_Mutations,
+         CopyNumber = PDX_CNV,
+         Proteomics = PDX_Proteomics) %>%
+  filter(!is.na(improve_sample_id))
+
+tumor_data <- manifest %>%
+  select(common_name, starts_with("Tumor")) %>%
+  left_join(tumor_samps, by = "common_name") %>%
+  select(improve_sample_id, common_name, model_type,
+         RNASeq = Tumor_RNASeq,
+         Mutations = Tumor_Somatic_Mutations,
+         CopyNumber = Tumor_CNV) %>%
+  mutate(Proteomics = "") %>%
+  filter(!is.na(improve_sample_id))
+
+mt_data <- manifest %>%                     #Note, this is the same as pdx_data but I think we default to "xenograft derived organoid" if present (based on original files)
+  select(common_name, starts_with("PDX")) %>%
+  left_join(mt_samps, by = "common_name") %>%
+  select(improve_sample_id, common_name, model_type,
+         RNASeq = PDX_RNASeq,
+         Mutations = PDX_Somatic_Mutations,
+         CopyNumber = PDX_CNV,
+         Proteomics = PDX_Proteomics) %>%
+  filter(!is.na(improve_sample_id))
+
+# Combine all sample tables
+dcombined <- bind_rows(pdx_data, tumor_data, mt_data) %>% distinct()
+print("dcombined:")
+print(dcombined)
+
+# Helper to assign study label based on model_type
+study_label <- function(type) {
+  case_when(
+    type == "patient derived xenograft"     ~ "MPNST PDX",
+    type == "tumor"                          ~ "MPNST Tumor",
+    type == "xenograft derived organoid"     ~ "MPNST PDX MT",
+    TRUE                                       ~ "MPNST"
+  )
+}
+
+# Helper to pick metadata based on sample ID and column
+pick_meta <- function(id, column) {
+  # columns are  {"Proteomics","RNASeq","Mutations","CopyNumber"}
+  if (any(tumor_data[[column]] == id, na.rm = TRUE)) {
+    sdf <- tumor_data %>% filter(.data[[column]] == id) %>% slice(1)
+  } else if (any(mt_data[[column]] == id, na.rm = TRUE)) {
+    sdf <- mt_data    %>% filter(.data[[column]] == id) %>% slice(1)
+  } else if (any(pdx_data[[column]] == id, na.rm = TRUE)) {
+    sdf <- pdx_data   %>% filter(.data[[column]] == id) %>% slice(1)
+  } else {
+    return(NULL)
+  }
+  list(
+    sample_id  = sdf$improve_sample_id,
+    model_type = sdf$model_type
+  )
+}
+
+# Safe extraction: only return non-empty data frames
+i_safe_extract <- function(df, sample_id, source_val, study_val) {
+  if (is.null(df) || nrow(df) == 0) return(NULL)
+  df$improve_sample_id <- sample_id
+  df$source            <- source_val
+  df$study             <- study_val
+  df
+}
+
+# 1) Proteomics
+proteomics_list <- lapply(
+  setdiff(dcombined$Proteomics, c("", NA, "NA")),
+  function(id) {
+    meta <- pick_meta(id, "Proteomics")
+    if (is.null(meta)) return(NULL)
+
+    df <- tryCatch(
+      fread(synGet(id)$path) %>%
+        rename(gene_symbol = Gene) %>%
+        left_join(genes_df, by = "gene_symbol") %>%
+        select(entrez_id, proteomics = logRatio) %>%
+        filter(!is.na(entrez_id), proteomics != 0) %>%
+        distinct(),
+      error = function(e) NULL
+    )
+    i_safe_extract(
+      df,
+      meta$sample_id,
+      "NF Data Portal",
+      study_label(meta$model_type)
+    )
+  }
+)
+proteomics <- bind_rows(proteomics_list)
+fwrite(proteomics, file.path("/tmp", "mpnst_proteomics.csv"))
+message("Wrote combined proteomics")
+
+
+# 2) Transcriptomics (PDX, Tumor, and Organoid / MT which comes from PDX..)
+transcriptomics_list <- lapply(
+  setdiff(dcombined$RNASeq, c("", NA, "NA")),
+  function(id) {
+    meta <- pick_meta(id, "RNASeq")
+    if (is.null(meta)) return(NULL)
+
+    df <- tryCatch({
+      fread(synGet(id)$path) %>%
+        separate(Name, into = c("other_id","vers"), sep = "\\.") %>%
+        select(-vers) %>%
+        left_join(genes_df) %>%
+        select(entrez_id, transcriptomics = TPM) %>%
+        filter(!is.na(entrez_id), transcriptomics != 0) %>%
+        distinct()
+    }, error = function(e) NULL)
+
+    i_safe_extract(
+      df,
+      meta$sample_id,
+      "NF Data Portal",
+      study_label(meta$model_type)
+    )
+  }
+)
+transcriptomics <- bind_rows(transcriptomics_list)
+fwrite(transcriptomics, file.path("/tmp", "mpnst_transcriptomics.csv"))
+message("Wrote combined transcriptomics")
+
+
+# 3) Mutations (WES)
+wes_list <- lapply(
+  setdiff(dcombined$Mutations, c("", NA, "NA")),
+  function(id) {
+    meta <- pick_meta(id, "Mutations")
+    if (is.null(meta)) return(NULL)
+
+    clean_id <- gsub('[\"\\[\\]]', '', id)
+    df <- tryCatch(
+      fread(synGet(clean_id)$path) %>%
+        select(entrez_id = Entrez_Gene_Id,
+               mutation               = HGVSc,
+               variant_classification = Variant_Classification) %>%
+        filter(entrez_id %in% genes_df$entrez_id) %>%
+        distinct(),
+      error = function(e) NULL
+    )
+
+    i_safe_extract(
+      df,
+      meta$sample_id,
+      "NF Data Portal",
+      study_label(meta$model_type)
+    )
+  }
+)
+wes <- bind_rows(wes_list)
+fwrite(wes, file.path("/tmp", "mpnst_mutations.csv"))
+message("Wrote combined mutations")
+
+
+# 4) Copy Number Variation (CNV)
+cnv_list <- lapply(
+  setdiff(dcombined$CopyNumber, c("", NA, "NA")),
+  function(id) {
+    meta <- pick_meta(id, "CopyNumber")
+    if (is.null(meta)) return(NULL)
+
+    clean_id <- gsub('[\"\\[\\]]', '', id)
+    raw <- tryCatch(fread(synGet(clean_id)$path), error = function(e) NULL)
+    if (is.null(raw)) return(NULL)
+
+    df_long <- raw %>%
+      separate_rows(gene, sep = ",") %>%
+      rename(gene_symbol = gene) %>%
+      left_join(genes_df, by = "gene_symbol") %>%
+      filter(!is.na(entrez_id)) %>%
+      select(entrez_id, log2) %>%
+      distinct() %>%
+      mutate(copy_number = 2^log2) %>%
+      select(-log2)
+
+    df <- df_long %>%
+      mutate(copy_call = case_when(
+        copy_number < 0.5210507 ~ "deep del",
+        copy_number < 0.7311832 ~ "het loss",
+        copy_number < 1.214125  ~ "diploid",
+        copy_number < 1.422233  ~ "gain",
+        TRUE                    ~ "amp"
+      ))
+
+    i_safe_extract(
+      df,
+      meta$sample_id,
+      "NF Data Portal",
+      study_label(meta$model_type)
+    )
+  }
+)
+cnv <- bind_rows(cnv_list)
+fwrite(cnv, file.path("/tmp", "mpnst_copy_number.csv"))
+message("Wrote combined copy number")
+
+
+message("All combined data files created.")
diff --git a/build/mpnst/01_mpnst_get_omics.R b/build/mpnst/01_mpnst_get_omics.R
deleted file mode 100755
index 9097465a..00000000
--- a/build/mpnst/01_mpnst_get_omics.R
+++ /dev/null
@@ -1,205 +0,0 @@
-# Load required libraries
-library(data.table)
-# library(biomaRt)# biomart issues still exist
-library(synapser)
-library(dplyr)
-
-# Retrieve command line arguments
-args <- commandArgs(trailingOnly = TRUE)
-
-# Check if a token was provided
-if (length(args) == 0) {
-  stop("No token or sample file provided. Usage: Rscript my_script.R <PAT> [samples] [genes]", call. = FALSE)
-}
-
-# Set your personal access token
-PAT <- args[1]
-patients <- args[2]
-genefile <- args[3]
-
-# Log in to Synapse
-synLogin(authToken = PAT)
-
-# Define the Ensembl mart # biomart issues still exist
-# ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl") # biomart issues still exist; fix later...
-
-# Path to the directory to save .sf files
-#path <- "./tmp"
-#dir.create(path, showWarnings = FALSE)
-
-# Read the sample mapping CSV and genes.csv
-samples_df <- fread(patients)|>
-    dplyr::select(improve_sample_id,common_name,model_type)|>
-                                        distinct()#"mpnst/synapse_NF-MPNST_samples.csv")
-
-pdx_samps<-subset(samples_df,model_type=='patient derived xenograft')
-tumor_samps<-subset(samples_df,model_type=='tumor')
-mt_samps<-subset(samples_df,model_type=='xenograft derived organoid')
-
-##now get the manifest from synapse
-manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()|>
-                                                             as.data.frame()|>
-                                                             dplyr::rename(common_name='Sample')
-
-
-##for now we only have tumor and PDX data
-##they each get their own sample identifier
-pdx_data<-manifest|>dplyr::select(common_name,starts_with("PDX"))|>
-    left_join(pdx_samps)|>
-    dplyr::select(improve_sample_id,common_name,model_type,RNASeq='PDX_RNASeq',Mutations='PDX_Somatic_Mutations',CopyNumber='PDX_CNV',Proteomics='PDX_Proteomics')|>
-        subset(!is.na(improve_sample_id))
-
-tumor_data<- manifest|>dplyr::select(common_name,starts_with("Tumor"))|>
-    left_join(tumor_samps)|>
-    dplyr::select(improve_sample_id,common_name,model_type,RNASeq='Tumor_RNASeq',Mutations='Tumor_Somatic_Mutations',CopyNumber='Tumor_CNV')|>
-    mutate(Proteomics='')|>
-    subset(!is.na(improve_sample_id))
-           ##we dont have tumor proteomics from these samples
-#print(tumor_data)
-
-mt_data<- manifest|>dplyr::select(common_name,starts_with("PDX"))|>
-    left_join(mt_samps)|>
-    dplyr::select(improve_sample_id,common_name,model_type, RNASeq='PDX_RNASeq',Mutations='PDX_Somatic_Mutations',CopyNumber='PDX_CNV',Proteomics='PDX_Proteomics')|>##we dont have mt data yet, so collecting PDX instead
-    subset(!is.na(improve_sample_id))
-#print(tumor_data)
-
-
-combined<-rbind(pdx_data,tumor_data,mt_data)|>distinct()
-
-# gene mapping table
-genes_df <- fread(genefile)
-
-
-##added proteomics first
-proteomics<-do.call('rbind',lapply(setdiff(mt_data$Proteomics,c('',NA,"NA")),function(x){
-                                        # if(x!=""){
-    #print(x)
-    sample<-subset(mt_data,Proteomics==x)
-    #print(sample)
-    res<-fread(synGet(x)$path)|>
-        #tidyr::separate(Name,into=c('other_id','vers'),sep='\\.')|>
-                                        #dplyr::select(-vers)|>
-        dplyr::rename(gene_symbol='Gene')|>
-        left_join(genes_df)|>
-        dplyr::select(entrez_id,proteomics='logRatio')|>
-        distinct()|>
-        subset(!is.na(entrez_id))|>
-        subset(proteomics!=0)
-
-    res$improve_sample_id=rep(sample$improve_sample_id[1],nrow(res))
-    res$source=rep('NF Data Portal',nrow(res))
-    res$study=rep('MPNST PDX MT',nrow(res))
-    return(distinct(res))
-                                        # }
-}))
-
-fwrite(proteomics,'/tmp/mpnst_proteomics.csv.gz')
-
-
-#### FIRST WE GET RNASeq Data
-
-rnaseq<-do.call('rbind',lapply(setdiff(mt_data$RNASeq,c(NA,"NA")),function(x){
-                                        # if(x!=""){
-    #print(x)
-    sample<-subset(mt_data,RNASeq==x)
-    #print(sample)
-    res<-fread(synGet(x)$path)|>
-        tidyr::separate(Name,into=c('other_id','vers'),sep='\\.')|>
-        dplyr::select(-vers)|>
-        left_join(genes_df)|>
-        dplyr::select(entrez_id,transcriptomics='TPM')|>
-        subset(!is.na(entrez_id))|>
-        subset(transcriptomics!=0)
-
-    res$improve_sample_id=rep(sample$improve_sample_id[1],nrow(res))
-    res$source=rep('NF Data Portal',nrow(res))
-    res$study=rep('MPNST PDX MT',nrow(res))
-    return(distinct(res))
-                                        # }
-}))
-
-fwrite(rnaseq,'/tmp/mpnst_transcriptomics.csv.gz')
-
-
-
-#####NEXT WE DO WES DATA
-print("Getting WES")
-wes<-do.call(rbind,lapply(setdiff(mt_data$`Mutations`,c(NA,"NA")),function(x){
-
-    x2=x#gsub('"','',gsub("[",'',gsub("]",'',x,fixed=T),fixed=T),fixed=T)
-    print(x)
-    sample<-subset(mt_data,Mutations==x)
-    print(sample$improve_sample_id)
-    res<-NULL
-    try(res<-fread(synGet(x2)$path)|>
-            dplyr::select(entrez_id='Entrez_Gene_Id',mutation='HGVSc',variant_classification='Variant_Classification')|>
-            subset(entrez_id%in%genes_df$entrez_id)|>
-            distinct())
-    if(is.null(res))
-        return(NULL)
-
-    res$improve_sample_id=rep(sample$improve_sample_id[1],nrow(res))
-    res$source=rep('NF Data Portal',nrow(res))
-    res$study=rep('MPNST PDX MT',nrow(res))
-
-    return(distinct(res))
-                                        # }
-}))
-
-fwrite(wes,'/tmp/mpnst_mutations.csv.gz')
-
-
-print(paste("getting CNV"))
-##next let's do CNVs!
-cnv<-do.call(rbind,lapply(setdiff(mt_data$CopyNumber,c(NA,"NA")),function(x){
-
-    x2=x#gsub('"','',gsub("[",'',gsub("]",'',x,fixed=T),fixed=T),fixed=T)
-    print(x)
-    sample<-subset(mt_data,CopyNumber==x)
-    print(sample$improve_sample_id)
-    res<-fread(synGet(x2)$path)
-
-    long_df<- res|>
-      tidyr::separate_rows(gene,sep=',')|>
-      dplyr::rename(gene_symbol='gene')|>
-      dplyr::left_join(genes_df)|>
-      subset(!is.na(entrez_id))|>
-      dplyr::select(entrez_id,log2)|>
-      dplyr::distinct()|>
-        dplyr::mutate(copy_number=2^log2)|>
-        dplyr::select(-log2)
-
-  res<-long_df|> ##deep del < 0.5210507 < het loss < 0.7311832 < diploid < 1.214125 < gain < 1.422233 < amp
-      dplyr::mutate(copy_call=ifelse(copy_number<0.5210507,'deep del',
-                                     ifelse(copy_number<0.7311832,'het loss',
-                                            ifelse(copy_number<1.214125,'diploid',
-                                                   ifelse(copy_number<1.422233,'gain','amp')))))|>
-    mutate(study='MPNST PDX MT',source='NF Data Portal',improve_sample_id=sample$improve_sample_id[1])|>
-    dplyr::distinct()
-
-    # long_df <- res[, strsplit(as.character(gene), ","), by = .(chromosome, start, end, depth, log2)]
-    # filtered_df <- long_df |>
-    #     subset(is.finite(log2))|>
-    #     filter(V1 %in% genes_df$gene) # get only protein coding genes and remove empty gene symbols
-    # filtered_df <- filtered_df[, .(gene_symbol = V1,
-    #                        improve_sample_id = sample$improve_sample_id[1],
-    #                        copy_number = 2^log2,
-    #                        source = "NF Data Portal",
-    #                        study = "MPNST PDX MT")]
-    # res<-filtered_df|> ##deep del < 0.5210507 < het loss < 0.7311832 < diploid < 1.214125 < gain < 1.422233 < amp
-    #     dplyr::mutate(copy_call=ifelse(copy_number<0.5210507,'deep del',
-    #                                    ifelse(copy_number<0.7311832,'het loss',
-    #                                           ifelse(copy_number<1.214125,'diploid',
-    #                                           ifelse(copy_number<1.422233,'gain','amp')))))|>
-    #     left_join(genes_df)|>
-    #     dplyr::select(entrez_id,improve_sample_id,copy_number,copy_call,study,source)|>
-    #     subset(!is.na(entrez_id))|>
-    #     distinct()
-    # res|>group_by(copy_call)|>summarize(n_distinct(entrez_id))
-    return(res)
-                                        # }
-}))
-
-fwrite(cnv,'/tmp/mpnst_copy_number.csv.gz')
-
-##TODO: get proteomics!!!
diff --git a/build/mpnst/02_get_drug_data.R b/build/mpnst/02_get_drug_data.R
index e90a31fb..f88f0f99 100644
--- a/build/mpnst/02_get_drug_data.R
+++ b/build/mpnst/02_get_drug_data.R
@@ -1,172 +1,128 @@
-# Load required libraries
+#!/usr/bin/env Rscript
+
+# Combined Drug List Extraction for MPNST & MPNST‑PDX
+
 library(data.table)
-# library(biomaRt)# biomart issues still exist
 library(dplyr)
 library(stringr)
 library(synapser)
+library(reticulate)
 
-
-# Retrieve command line arguments
+# 0) Args & login
 args <- commandArgs(trailingOnly = TRUE)
-
-
-# Check the number of arguments provided
 if (length(args) < 1) {
-  stop("At least one argument is required. Usage: Rscript 02_get_drug_data.R <newdrugfile> [olddrugfile]", call. = FALSE)
+  stop("Usage: Rscript combined_drug_list.R <newdrugfile.tsv> [old_drugs.tsv,...]", call.=FALSE)
 }
-
-
-# Assign arguments
-newdrugfile <- args[1]  # Path to the new drug file
-olddrugfiles <- ifelse(length(args) >= 2 && args[2] != "", args[2], NA)
-
-# Read SYNAPSE_AUTH_TOKEN from the environment
-synapse_token <- Sys.getenv("SYNAPSE_AUTH_TOKEN")
-if (synapse_token == "") {
-  stop("Error: SYNAPSE_AUTH_TOKEN environment variable is not set.")
+newdrugfile  <- args[1]
+newdrugfile <- file.path(newdrugfile)
+olddrugfiles <- if (length(args)>=2 && nzchar(args[2])) args[2] else NA
+
+token <- Sys.getenv("SYNAPSE_AUTH_TOKEN")
+if (token == "") stop("Please set SYNAPSE_AUTH_TOKEN in your environment", call.=FALSE)
+synLogin(authToken = token)
+
+# 1) Fetch manifest
+manifest <- synTableQuery("select * from syn53503360")$asDataFrame() %>%
+  rename(common_name = Sample)
+
+# 2) PDX‑sourced drugs via annotations
+pdx_df <- manifest %>%
+  select(common_name, PDX_Drug_Data) %>%
+  distinct() %>%
+  filter(!is.na(PDX_Drug_Data))
+
+pdx_ids <- unique(unlist(strsplit(pdx_df$PDX_Drug_Data, ",")))
+pdx_ids <- pdx_ids[ pdx_ids != "" & !is.na(pdx_ids) & pdx_ids != "NA" ]
+
+get_pdx_drugs <- function(synid) {
+  # Query the metadata table for this file's experimentalCondition
+  q <- sprintf(
+    "select experimentalCondition from syn21993642 where id='%s'",
+    synid
+  )
+  df <- synTableQuery(q)$asDataFrame()
+  if (nrow(df)==0) return(character(0))
+  # Split on semicolon, lowercase and drop empties
+  conds <- unlist(strsplit(df$experimentalCondition, ";"))
+  tolower(conds[conds!=""])
 }
 
-synLogin(authToken = synapse_token)
-
-##now get the manifest from synapse
-manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()|>
-                                                             as.data.frame()|>
-                                                             dplyr::rename(common_name='Sample')
+pdx_drugs <- unique(unlist(lapply(pdx_ids, get_pdx_drugs)))
+pdx_drugs <- setdiff(pdx_drugs, "control")
 
 
-##PDX contain list of files
-pdx<-manifest|>
-    dplyr::select(common_name,PDX_Drug_Data)|>
-    distinct()|>
-    subset(!is.na(PDX_Drug_Data))
+# 3) MicroTissue‑sourced drugs via table "children"
+mts_df <- manifest %>%
+  select(common_name, MicroTissueDrugFolder) %>%
+  filter(!is.na(MicroTissueDrugFolder))
 
+mts_ids <- unique(unlist(strsplit(mts_df$MicroTissueDrugFolder, ",")))
+mts_ids <- mts_ids[mts_ids != "" & !is.na(mts_ids) & mts_ids != "NA"]
 
-##MTS contain lists of directories
-mts<-manifest|>
-    dplyr::select(common_name,MicroTissueDrugFolder)|>
-    subset(!is.na(MicroTissueDrugFolder))
-
-
-
-##define functions
-
-##first function to get children from parentId
-getDrugsByParent<-function(parid){
-    qtab<-synTableQuery(paste('select id,name,experimentalCondition,parentId from syn21993642 where parentId=\'',parid,'\''))$asDataFrame()|>
-        subset(!is.na(experimentalCondition))|>dplyr::select(id,name,experimentalCondition)
-    ##now we need to parse the metadatda table get the info
-
-    return(unique(qtab$experimentalCondition))
-
+get_mts_drugs <- function(parentId) {
+  q <- sprintf("select experimentalCondition from syn21993642 where parentId='%s'", parentId)
+  synTableQuery(q)$asDataFrame() %>%
+    pull(experimentalCondition) %>%
+    unique() %>%
+    tolower()
 }
 
-##now loop through manifest to get all the files
-mts_fold <- data.table(mts)[,strsplit(as.character(MicroTissueDrugFolder),","), by = .(common_name)]
-
-alldrugs<-unique(unlist(lapply(mts_fold$V1,function(x){
-    samp<-subset(mts_fold,V1==x)
-    res<-getDrugsByParent(x)
-    return(res)
-})))
-
-
-alldrugs[which(alldrugs=='PD901')]<-'PD-0325901'
-
-print(paste(alldrugs,collapse=','))
-
+mts_drugs <- unique(unlist(lapply(mts_ids, get_mts_drugs)))
 
+# 4) Combine and fix bad names
+all_drugs <- unique(c(pdx_drugs, mts_drugs))
+all_drugs[all_drugs == "pd901"] <- "pd-0325901"
+message("Combined drug list: ", paste(all_drugs, collapse=", "))
 
-## new code:
-
-
-# Handle old drugs
+# 5) Read old‑drug files or initialize empty
 if (!is.na(olddrugfiles)) {
-  # Read and combine old drug files
-  olddrug_list <- lapply(unique(unlist(strsplit(olddrugfiles, split = ','))), function(x) {
-    if (file.exists(x)) {
-      return(fread(x, header = TRUE, sep = '\t', quote = ''))
-    } else {
-      warning(paste("Old drug file does not exist:", x))
-      return(NULL)
+  paths <- strsplit(olddrugfiles, ",")[[1]] %>% trimws()
+  old_list <- lapply(paths, function(f) {
+    if (file.exists(f)) fread(f, sep="\t", header=TRUE) else {
+      warning("Missing old‑drug file: ", f)
+      NULL
     }
   })
-  
-  # Remove NULL entries and ensure uniqueness
-  olddrug_list <- Filter(Negate(is.null), olddrug_list)
-  
-  if (length(olddrug_list) > 0) {
-    olddrugs <- unique(rbindlist(olddrug_list, use.names = TRUE, fill = TRUE))
-    print(paste('Read in', nrow(olddrugs), 'old drugs'))
+  old_list <- Filter(Negate(is.null), old_list)
+  if (length(old_list) > 0) {
+    olddrugs <- unique(rbindlist(old_list, use.names=TRUE, fill=TRUE))
+    message("Read ", nrow(olddrugs), " old drug records")
   } else {
-    olddrugs <- data.frame(
-      improve_drug_id = integer(),
-      chem_name = character(),
-      pubchem_id = character(),
-      canSMILES = character(),
-      # isoSMILES = character(),
-      InChIKey = character(),
-      formula = character(),
-      weight = numeric(),
-      stringsAsFactors = FALSE
+    olddrugs <- data.table(
+      improve_drug_id=integer(), chem_name=character(),
+      pubchem_id=character(), canSMILES=character(),
+      InChIKey=character(), formula=character(), weight=numeric()
     )
-    print("Old drug files not valid. Created empty olddrugs dataframe.")
+    message("No valid old data; using empty template")
   }
 } else {
-  # Create an empty dataframe with specified columns
-  olddrugs <- data.frame(
-    improve_drug_id = integer(),
-    chem_name = character(),
-    pubchem_id = character(),
-    canSMILES = character(),
-    # isoSMILES = character(),
-    InChIKey = character(),
-    formula = character(),
-    weight = numeric(),
-    stringsAsFactors = FALSE
+  olddrugs <- data.table(
+    improve_drug_id=integer(), chem_name=character(),
+    pubchem_id=character(), canSMILES=character(),
+    InChIKey=character(), formula=character(), weight=numeric()
   )
-  print("No old drug file provided. Created empty olddrugs dataframe.")
+  message("No old‑drug files provided; starting fresh")
 }
 
-# Write the initial drug file (old drugs)
-write.table(olddrugs, file = newdrugfile, sep = '\t', row.names = FALSE, quote = FALSE,col.names=T)
-
-
-# Define the ignore file path
-ignore_file_path <- '/tmp/mpnst_ignore_chems.txt'
-
-
-# ##copy old drug to new drug
-# olddrugs<-do.call(rbind,lapply(unique(unlist(strsplit(olddrugfiles,split=','))),function(x) read.table(x,header=T,sep='\t',quote='',comment.char='')))
-# olddrugs<-unique(olddrugs)
+# 6) Write placeholder
+fwrite(olddrugs, newdrugfile, sep="\t", quote=FALSE)
+message("Wrote placeholder to ", newdrugfile)
 
-# print(paste('Read in ',nrow(olddrugs),'old drugs'))
-#                                         #file.copy(olddrugfile,newdrugfile)
-# write.table(olddrugs,file=newdrugfile,sep='\t',row.names=F,quote=FALSE,col.names=T)
+# 7) Augment via Python
+ignore_file <- "/tmp/combined_drugs_ignore_chems.txt"
+use_python("/opt/venv/bin/python3", required=TRUE)
+# use_python("/Users/jaco059/miniconda3/bin/python3", required=TRUE)
 
-
-##now load reticulate down here
-
-library(reticulate)
-
-use_python("/opt/venv/bin/python3", required = TRUE)
+# source_python("build/utils/pubchem_retrieval.py")
 source_python("pubchem_retrieval.py")
-
-update_dataframe_and_write_tsv(unique_names=alldrugs,output_filename=newdrugfile,ignore_chems=ignore_file_path)
-
-
-tab<-read.table(newdrugfile,sep='\t',header=T,quote="",fill=TRUE)
-
-newdrugs<-tab|>
-    subset(chem_name%in%tolower(alldrugs))
-
-tab<-tab|>
-    subset(improve_drug_id%in%newdrugs$improve_drug_id)
-
-write.table(tab,file=newdrugfile,sep='\t',row.names=FALSE,quote=FALSE)
-
-print(paste("Final drug table written to", newdrugfile))
-
-
-##now call the python drug script
-
-
+update_dataframe_and_write_tsv(
+  unique_names    = all_drugs,
+  output_filename = newdrugfile,
+  ignore_chems    = ignore_file
+)
+
+# 8) Final filter & save
+tab       <- fread(newdrugfile, sep="\t", header=TRUE)
+final_tab <- unique(tab)
+fwrite(final_tab, newdrugfile, sep="\t", quote=FALSE)
+message("Wrote full synonyms list to ", newdrugfile)
\ No newline at end of file
diff --git a/build/mpnst/03_get_drug_response_data.R b/build/mpnst/03_get_drug_response_data.R
deleted file mode 100644
index 9bbb6f00..00000000
--- a/build/mpnst/03_get_drug_response_data.R
+++ /dev/null
@@ -1,152 +0,0 @@
-# Load required libraries
-library(data.table)
-# library(biomaRt)# biomart issues still exist
-library(synapser)
-library(dplyr)
-library(stringr)
-# Retrieve command line arguments
-args <- commandArgs(trailingOnly = TRUE)
-
-# Check if a token was provided
-if (length(args) == 0) {
-  stop("No token or sample file provided. Usage: Rscript my_script.R <PAT> [samples] [drugs]", call. = FALSE)
-}
-
-# Set your personal access token
-PAT <- args[1]
-patients <- args[2]
-drugfile <- args[3]
-
-# Log in to Synapse
-synLogin(authToken = PAT)
-
-
-# Read the sample mapping CSV and genes.csv
-samples_df <- fread(patients)|>
-    dplyr::select(improve_sample_id,common_name,model_type)|>
-    distinct()#"mpnst/synapse_NF-MPNST_samples.csv")
-print(head(samples_df))
-
-pdx_samps<-subset(samples_df,model_type=='patient derived xenograft')
-org_samps<-subset(samples_df,model_type=='organoid')
-
-##now get the manifest from synapse
-manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()|>
-                                                             as.data.table()|>
-                                                             dplyr::rename(common_name='Sample')
-
-
-##PDX contain list of files
-pdx<-manifest|>
-    dplyr::select(common_name,PDX_Drug_Data)|>
-    left_join(pdx_samps)|>
-    distinct()|>
-    subset(!is.na(PDX_Drug_Data))
-
-
-##MTS contain lists of directories
-mts<-manifest|>
-    dplyr::select(common_name,MicroTissueDrugFolder)|>
-    left_join(org_samps)|>
-    distinct()|>
-    subset(!is.na(MicroTissueDrugFolder))
-
-
-# Modify the extract_date_hour function to return a named vector
-extract_date_hour <- function(experiment_id) {
-  pattern <- "(\\d{6})_?(\\d{2,3})?"
-  matches <- str_match(experiment_id, pattern)
-  date <- matches[, 2]
-  hour <- matches[, 3]
-  date[is.na(date)] <- NA  # Replace with NA instead of blank
-  hour[is.na(hour)] <- 48  # Replace with 48 instead of blank (default)
-  return(list(date = date, hour = hour))
-}
-
-
-
-##define functions
-
-##first function to get children from parentId
-getDrugDataByParent<-function(parid,sampleId){
-    qtab<-synTableQuery(paste('select id,name,experimentalCondition,parentId from syn21993642 where parentId=\'',parid,'\''))$asDataFrame()|>
-                                                                                                                            as.data.frame()|>
-                                                                                                                            subset(!is.na(experimentalCondition))|>
-                                                                                                                            dplyr::select(id,name,experimentalCondition)|>
-                                                                                                                            subset(name!='synapse_storage_manifest.csv')
-    ##now we need to parse the metadatda table get the info
-
-    res<-do.call(rbind,lapply(qtab$id,function(x){
-        sname <- subset(qtab,id==x)
-        #print(sname)
-        sname <-extract_date_hour(sname$name)
-        #print(x)
-        #print(sname)
-        data <- fread(synGet(x)$path)|>
-            filter(response_type=='percent viability')|>
-            mutate(improve_sample_id=sampleId,
-                   DOSE=(10^dosage)*1000000, ##dosage is log(M), need to move to micromolar
-                   GROWTH=response, #/100,
-                   source = "NF Data Portal",
-                   #CELL = improve_sample_id,
-                   chem_name = compound_name,
-                   study = paste0('MT ',sname$date,' exp'),
-                   time = sname$hour) %>%
-            select(improve_sample_id,DOSE,GROWTH,source,chem_name,study,time)
-
-    return(data)
-    }))
-    return(res)
-}
-
-##now loop through manifest to get all the files
-mts_fold <- data.table(mts)[,strsplit(as.character(MicroTissueDrugFolder),","), by = .(improve_sample_id,common_name)]
-
-mts_fold <- mts_fold[which(!mts_fold$V1%in%c("NA",NA)),]
-
-print(mts_fold)
-
-alldrugs<-do.call(rbind,lapply(mts_fold$V1,function(x){
-    samp<-subset(mts_fold,V1==x)
-    print(samp$common_name)
-    res<-getDrugDataByParent(x,samp$improve_sample_id)
-    return(res)
-}))
-
-##do the drug matching
-drug_df<-fread(drugfile)
-
-##update drug name PD901 since it's mussing
-
-alldrugs$chem_name[which(alldrugs$chem_name=='PD901')]<-'PD-0325901'
-
-
-                                        #drug_df$chem_name=tolower(drug_df$chem_name)
-alldrugs$chem_name<-tolower(alldrugs$chem_name)
-
-#print(drug_df)
-drug_map<-subset(drug_df,chem_name%in%alldrugs$chem_name)
-
-findrugs<-alldrugs|>
-    left_join(drug_map)|>
-    mutate(time_unit='hours')|>
-    dplyr::select(DOSE,GROWTH,source,study,Drug=improve_drug_id,time,time_unit,improve_sample_id)|>
-    distinct()|>
-    subset(!is.na(Drug))
-
-missing<-setdiff(alldrugs$chem_name,drug_map$chem_name)
-print(paste('missing',length(missing),'drugs:'))
-print(paste(missing,collapse=','))
-
-#TODO: add in new drug lookup
-print(head(findrugs))
-fwrite(findrugs,'/tmp/curve_data.tsv',sep='\t')
-
-pycmd = '/opt/venv/bin/python fit_curve.py --input /tmp/curve_data.tsv --output /tmp/experiments'
-print('running curve fitting')
-system(pycmd)
-
-##mmve file name
-file.rename('/tmp/experiments.0','/tmp/mpnst_experiments.tsv')
-
-
diff --git a/build/mpnst/03_get_experiments.R b/build/mpnst/03_get_experiments.R
new file mode 100644
index 00000000..a430cae8
--- /dev/null
+++ b/build/mpnst/03_get_experiments.R
@@ -0,0 +1,282 @@
+library(data.table)
+library(synapser)
+library(dplyr)
+library(stringr)
+library(readr)
+library(readxl)
+library(tidyr)
+
+# Check that correct number of arguments are present
+args <- commandArgs(trailingOnly = TRUE)
+if (length(args) != 4) {
+  stop("Usage: Rscript 03_get_experiments.R <PAT> <samples.csv> <drugfile.tsv> <out_prefix>", call. = FALSE)
+}
+PAT        <- args[1]
+samples    <- args[2]
+drugfile   <- args[3]
+out_prefix <- args[4]
+
+synLogin(authToken = PAT)
+
+# Read in sampes file
+samples_df <- fread(samples) %>%
+  select(improve_sample_id, common_name, model_type) %>%
+  distinct()
+
+pdx_samps <- filter(samples_df, model_type == "patient derived xenograft")
+mt_samps  <- filter(samples_df, model_type == "xenograft derived organoid")
+
+# Get manifest table from Synapse
+manifest <- synTableQuery("select * from syn53503360")$asDataFrame() %>%
+  rename(common_name = Sample) %>%
+  as.data.table()
+
+# Helper Function to extract date and hour from experiment ID
+extract_date_hour <- function(experiment_id) {
+  pattern <- "(\\d{6})_?(\\d{2,3})?"
+  m <- str_match(experiment_id, pattern)
+  date <- m[,2]; hour <- m[,3]
+  date[is.na(date)] <- NA
+  hour[is.na(hour)] <- 48
+  list(date = date, hour = hour)
+}
+
+# ────────────────────────────────────────────────
+# MicroTissue Experiments 
+# ────────────────────────────────────────────────
+
+getDrugDataByParent <- function(parid, sampleId) {
+  q <- sprintf(
+    "select id,name,experimentalCondition,parentId from syn21993642 where parentId='%s'",
+    parid
+  )
+  qtab <- synTableQuery(q)$asDataFrame() %>%
+    filter(!is.na(experimentalCondition), name != "synapse_storage_manifest.csv") %>%
+    select(id, name, experimentalCondition)
+  do.call(rbind, lapply(qtab$id, function(x) {
+    info <- filter(qtab, id == x)
+    d    <- extract_date_hour(info$name)
+    fread(synGet(x)$path) %>%
+      filter(response_type == "percent viability") %>%
+      transmute(
+        improve_sample_id = sampleId,
+        DOSE              = (10^dosage) * 1e6,
+        GROWTH            = response,
+        source            = "NF Data Portal",
+        chem_name         = compound_name,
+        study             = paste0("MT ", d$date, " exp"),
+        time              = d$hour
+      )
+  }))
+}
+
+# Create map of MicroTissue Drug Folders
+mts_map <- manifest %>%
+  select(common_name, MicroTissueDrugFolder) %>%
+  inner_join(mt_samps, by = "common_name") %>%
+  separate_rows(MicroTissueDrugFolder, sep = ",") %>%
+  # keep exactly what old script did: drop only "NA" and actual NA
+  filter(
+    !is.na(MicroTissueDrugFolder),
+    MicroTissueDrugFolder != "NA"
+  ) %>%
+  select(
+    improve_sample_id,
+    folder = MicroTissueDrugFolder
+  )
+
+# Fetch all MicroTissue drug response data
+mt_data <- do.call(rbind, lapply(seq_len(nrow(mts_map)), function(i) {
+  sample_id <- mts_map$improve_sample_id[i]
+  folder    <- mts_map$folder[i]
+  getDrugDataByParent(folder, sample_id)
+}))
+
+drug_map <- fread(drugfile) %>%
+  select(improve_drug_id, chem_name) %>%
+  distinct()
+
+# Clean up drug names and join with drug_map
+mt_curve <- mt_data %>%
+  mutate(
+    chem_name = tolower(chem_name),
+    chem_name = ifelse(chem_name == "pd901", "pd-0325901", chem_name)
+  ) %>%
+  left_join(drug_map, by = "chem_name") %>%
+  filter(!is.na(improve_drug_id)) %>%
+  transmute(
+    source             = source,
+    improve_sample_id  = improve_sample_id,
+    Drug               = improve_drug_id,
+    study              = study,
+    time               = time,
+    time_unit          = "hours",
+    DOSE               = DOSE,
+    GROWTH             = GROWTH
+  )
+
+# Run curve fitting, Write MicroTissue curve data
+fwrite(mt_curve, file.path("/tmp", paste0(out_prefix, "_mt_curve_data.tsv")), sep = "\t")
+
+message("Wrote MT curve data")
+
+# Write MT experiments file
+system(sprintf(
+  "/opt/venv/bin/python fit_curve.py --input %s --output %s",
+  paste0("/tmp/", out_prefix, "_mt_curve_data.tsv"),
+  paste0("/tmp/", out_prefix, "_mt_experiments")
+))
+file.rename(
+  paste0("/tmp/", out_prefix, "_mt_experiments.0"),
+  paste0("/tmp/", out_prefix, "_mt_experiments.tsv")
+)
+message("Wrote MT experiments")
+
+# ────────────────────────────────────────────────
+# PDX Experiments
+# ────────────────────────────────────────────────
+
+# Create a map of PDX Drug Data
+# This will be used to fetch the drug data for each PDX sample
+pdx_map <- do.call(rbind, lapply(seq_len(nrow(manifest)), function(i) {
+  row <- manifest[i, ]
+  samp <- pdx_samps[pdx_samps$common_name == row$common_name, ]
+  if (nrow(samp)==0 || is.na(row$PDX_Drug_Data) || row$PDX_Drug_Data %in% c("", "NA"))
+    return(NULL)
+  ids <- strsplit(row$PDX_Drug_Data, ",")[[1]]
+  ids <- trimws(ids[ids!=""])
+  data.frame(
+    improve_sample_id = samp$improve_sample_id,
+    child_id          = ids,
+    stringsAsFactors  = FALSE
+  )
+}))
+
+# Create a dataframe of PDX metadata
+pdx_meta <- do.call(rbind, lapply(seq_len(nrow(pdx_map)), function(i) {
+  sid <- pdx_map$improve_sample_id[i]
+  cid <- pdx_map$child_id[i]
+  pid <- synGet(cid)$parentId
+  if (is.null(pid) || pid=="") stop("no parentId for ", cid)
+  data.frame(
+    improve_sample_id = sid,
+    child_id          = cid,
+    parentId          = pid,
+    stringsAsFactors  = FALSE
+  )
+}))
+
+all_pdx <- do.call(rbind, lapply(seq_len(nrow(pdx_meta)), function(i) {
+  m   <- pdx_meta[i, ]
+  pth <- synGet(m$child_id)$path
+  raw <- if (grepl("\\.xlsx?$", pth)) read_xlsx(pth) else read_csv(pth)
+
+  # detect second‐drug column
+  sec_opts  <- c("compound 2_name", "compound_2_name")
+  drug2_col <- intersect(sec_opts, names(raw))[1]
+  compound2 <- if (!is.na(drug2_col)) raw[[drug2_col]] else NA_character_
+
+  df <- data.frame(
+    child_id                  = m$child_id,
+    specimen_id               = raw$specimen_id,
+    compound_name             = raw$compound_name,
+    compound_2_name           = compound2,
+    experimental_time_point   = raw$experimental_time_point,
+    experimental_time_point_unit = raw$experimental_time_point_unit,
+    assay_value               = raw$assay_value,
+    stringsAsFactors = FALSE
+  )
+
+  df <- within(df, {
+    drug1     <- tolower(trimws(compound_name))
+    drug2     <- tolower(trimws(compound_2_name))
+    treatment <- ifelse(
+      is.na(drug1) | drug1 %in% c("", "na", "n/a", "nan"),
+      "control",
+      ifelse(!is.na(drug2) & drug2 != "",
+             paste(drug1, drug2, sep = "+"),
+             drug1
+      )
+    )
+    time      <- experimental_time_point
+    time_unit <- experimental_time_point_unit
+    volume    <- assay_value
+  })
+
+  df[ , c("child_id", "specimen_id", "treatment", "time", "time_unit", "volume")]
+}))
+
+# join on parentId and sample
+pdx_data <- merge(all_pdx, pdx_meta, by="child_id") 
+
+pdx_data <- subset(pdx_data, duplicated(child_id) | TRUE)  
+pdx_data <- within(pdx_data, {
+  experiment <- parentId
+  model_id   <- improve_sample_id
+})
+
+# Filter out experiments missing a control
+has_ctl     <- tapply(pdx_data$treatment == "control", pdx_data$experiment, any)
+no_ctl_exps <- names(has_ctl)[!has_ctl]
+pdx_data <- pdx_data[pdx_data$experiment %in% names(has_ctl)[has_ctl], ]
+
+# Reorder final columns
+pdx_data <- pdx_data[ , c("experiment","specimen_id","treatment",
+                          "time","time_unit","volume","model_id")]
+
+# Correct doxorubinsin typo across all data
+pdx_data$treatment <- gsub("doxorubinsin",
+                           "doxorubicin",
+                           pdx_data$treatment,
+                           ignore.case = TRUE)
+
+# Drop any remaining NA rows
+pdx_data <- na.omit(pdx_data)
+
+# write & fit
+fwrite(pdx_data, file.path("/tmp", paste0(out_prefix, "_pdx_curve_data.tsv")), sep = "\t")
+
+
+message("Wrote PDX curve data")
+
+system(sprintf(
+  "/opt/venv/bin/python calc_pdx_metrics.py %s --drugfile %s --outprefix %s --source 'NF Data Portal' --study 'MPNST PDX'",
+  paste0("/tmp/", out_prefix, "_pdx_curve_data.tsv"),
+  drugfile,
+  paste0("/tmp/", out_prefix, "_pdx")
+))
+
+
+
+message("Wrote PDX experiments to ", "/tmp/",  out_prefix, "_pdx_experiments.tsv and combinations")
+
+
+# ────────────────────────────────────────────────
+# Combine all Experiments
+# ────────────────────────────────────────────────
+
+# Read MicroTissue experiments
+mt_exp <- fread(paste0("/tmp/", out_prefix, "_mt_experiments.tsv")) %>%
+  mutate(
+    dose_response_value = as.character(dose_response_value)
+  )
+
+# Read PDX experiments
+pdx_exp <- fread(paste0("/tmp/", out_prefix, "_pdx_experiments.tsv")) %>%   
+  mutate(
+    dose_response_value = as.character(dose_response_value)
+  )
+
+# Join experiments into one.
+all_exp <- bind_rows(mt_exp, pdx_exp)
+
+# Write out Experiments
+fwrite(all_exp, paste0("/tmp/", out_prefix, "_experiments.tsv"), sep = "\t")
+message("Wrote combined experiments: /tmp/", out_prefix, "_experiments.tsv")
+
+
+# Rename the Drug Combination data file to fit schema naming
+file.rename(
+  paste0("/tmp/", out_prefix, "_pdx_combinations.tsv"),
+  paste0("/tmp/", out_prefix, "_combinations.tsv")
+)
\ No newline at end of file
diff --git a/build/mpnst/build_exp.sh b/build/mpnst/build_exp.sh
index a9a2b763..14506cfe 100644
--- a/build/mpnst/build_exp.sh
+++ b/build/mpnst/build_exp.sh
@@ -3,5 +3,7 @@ set -euo pipefail
 
 trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit 1' ERR
 
-echo "Running 03_get_drug_response_data.R with $SYNAPSE_AUTH_TOKEN, $1, and $2."
-Rscript 03_get_drug_response_data.R $SYNAPSE_AUTH_TOKEN $1 $2
+echo "Running 03_get_experiments.R with $SYNAPSE_AUTH_TOKEN, $1, and $2."
+Rscript 03_get_experiments.R $SYNAPSE_AUTH_TOKEN $1 $2 mpnst
+rm /tmp/mpnst_pdx_experiments.tsv /tmp/mpnst_mt_experiments.tsv /tmp/mpnst_mt_curve_data.tsv /tmp/mpnst_pdx_curve_data.tsv
+
diff --git a/build/mpnst/build_omics.sh b/build/mpnst/build_omics.sh
index b08ac63d..d6d2cec7 100644
--- a/build/mpnst/build_omics.sh
+++ b/build/mpnst/build_omics.sh
@@ -3,5 +3,5 @@ set -euo pipefail
 
 trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit 1' ERR
 
-echo "Running 01_mpnst_get_omics.R with $SYNAPSE_AUTH_TOKEN, $2, and $1."
-Rscript 01_mpnst_get_omics.R $SYNAPSE_AUTH_TOKEN $2 $1
+echo "Running 01_combined_omics.R with $SYNAPSE_AUTH_TOKEN, $2, and $1."
+Rscript 01_combined_omics.R $SYNAPSE_AUTH_TOKEN $2 $1
diff --git a/build/mpnst/requirements.r b/build/mpnst/requirements.r
index 7796236d..e8bfac35 100755
--- a/build/mpnst/requirements.r
+++ b/build/mpnst/requirements.r
@@ -9,3 +9,5 @@ install.packages("data.table")
 install.packages("R.utils")
 install.packages("stringr")
 install.packages("tidyr")
+install.packages("readr")
+install.packages("readxl")
diff --git a/build/mpnst/requirements.txt b/build/mpnst/requirements.txt
index 27c4dc2a..8f07cbd2 100755
--- a/build/mpnst/requirements.txt
+++ b/build/mpnst/requirements.txt
@@ -8,4 +8,5 @@ scikit-learn
 scipy
 requests
 mordredcommunity
-rdkit
\ No newline at end of file
+rdkit
+statsmodels
\ No newline at end of file
diff --git a/build/mpnst/sample_gen.R b/build/mpnst/sample_gen.R
deleted file mode 100644
index 3d19fa85..00000000
--- a/build/mpnst/sample_gen.R
+++ /dev/null
@@ -1,25 +0,0 @@
-# This script generate a new sample table based on pervious beatAML improved sample ID
-# It will take the maximum value of beatAML improved sample ID and continue from ID count from there
-# Load required libraries
-library(data.table)
-
-main <- fread("mpnst/NF_MPNST_samples.csv")
-previous_aml <- fread("beatAML/beataml_samples.csv")
-max_id <- max(previous_aml$improve_sample_id)
-main$improve_sample_id <- seq(from = max_id + 1, length.out = nrow(main))
-
-synapse_main <- fread("mpnst/synapse_NF-MPNST_samples.csv")
-# Step 1: Create a dictionary from 'main'
-id_dict <- setNames(main$improve_sample_id, main$other_id)
-
-# Step 2: Update 'ID' in 'synapse_main'
-synapse_main$ID <- id_dict[synapse_main$Sample]
-
-# Handling NA values if any mismatch occurs (Optional based on your data integrity)
-# If there are NAs generated, you might need to check for unmatched keys
-# synapse_main$ID[is.na(synapse_main$ID)] <- -1  # Assign a placeholder like -1 for unmatched rows
-
-# Step 3: Save the updated 'synapse_main'
-fwrite(synapse_main, "mpnst/synapse_NF-MPNST_samples.csv")
-fwrite(main, "mpnst/NF_MPNST_samples.csv") # updated sample file
-
diff --git a/build/mpnstpdx/01_mpnstpdx_get_omics.R b/build/mpnstpdx/01_mpnstpdx_get_omics.R
deleted file mode 100755
index 86e3cbb8..00000000
--- a/build/mpnstpdx/01_mpnstpdx_get_omics.R
+++ /dev/null
@@ -1,195 +0,0 @@
-# Load required libraries
-library(data.table)
-# library(biomaRt)# biomart issues still exist
-library(synapser)
-library(dplyr)
-
-# Retrieve command line arguments
-args <- commandArgs(trailingOnly = TRUE)
-
-# Check if a token was provided
-if (length(args) == 0) {
-  stop("No token or sample file provided. Usage: Rscript my_script.R <PAT> [samples] [genes]", call. = FALSE)
-}
-
-# Set your personal access token
-PAT <- args[1]
-patients <- args[2]
-genefile <- args[3]
-
-# Log in to Synapse
-synLogin(authToken = PAT)
-
-# Define the Ensembl mart # biomart issues still exist
-# ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl") # biomart issues still exist; fix later...
-
-# Path to the directory to save .sf files
-#path <- "./tmp"
-#dir.create(path, showWarnings = FALSE)
-
-# Read the sample mapping CSV and genes.csv
-samples_df <- fread(patients)|>
-    dplyr::select(improve_sample_id,common_name,model_type)|>
-                                        distinct()#"mpnst/synapse_NF-MPNSTpdx_samples.csv")
-
-pdx_samps<-subset(samples_df,model_type=='patient derived xenograft')
-tumor_samps<-subset(samples_df,model_type=='tumor')
-
-##now get the manifest from synapse
-manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()|>
-                                                             as.data.frame()|>
-                                                             dplyr::rename(common_name='Sample')
-
-
-##for now we only have tumor and pdx data
-##they each get their own sample identifier
-pdx_data<-manifest|>dplyr::select(common_name,starts_with("PDX"))|>
-    left_join(pdx_samps)|>
-    dplyr::select(improve_sample_id,RNASeq='PDX_RNASeq',Mutations='PDX_Somatic_Mutations',CopyNumber='PDX_CNV',Proteomics='PDX_Proteomics')
-
-tumor_data<- manifest|>dplyr::select(common_name,starts_with("Tumor"))|>
-    left_join(tumor_samps)|>
-    dplyr::select(improve_sample_id,RNASeq='Tumor_RNASeq',Mutations='Tumor_Somatic_Mutations',CopyNumber='Tumor_CNV')|>
-    mutate(Proteomics='') ##we dont have tumor proteomics from these samples
-#print(tumor_data)
-
-
-pdx_data<-rbind(pdx_data,tumor_data)|>distinct()
-
-# gene mapping table
-genes_df <- fread(genefile)
-
-
-##added proteomics first
-proteomics<-do.call('rbind',lapply(setdiff(pdx_data$Proteomics,c('',NA,"NA")),function(x){
-                                        # if(x!=""){
-    #print(x)
-    sample<-subset(pdx_data,Proteomics==x)
-    #print(sample)
-    res<-fread(synGet(x)$path)|>
-        #tidyr::separate(Name,into=c('other_id','vers'),sep='\\.')|>
-                                        #dplyr::select(-vers)|>
-        dplyr::rename(gene_symbol='Gene')|>
-        left_join(genes_df)|>
-        dplyr::select(entrez_id,proteomics='logRatio')|>
-        distinct()|>
-        subset(!is.na(entrez_id))|>
-        subset(proteomics!=0)
-
-    res$improve_sample_id=rep(sample$improve_sample_id[1],nrow(res))
-    res$source=rep('NF Data Portal',nrow(res))
-    res$study=rep('MPNST PDX',nrow(res))
-    return(distinct(res))
-                                        # }
-}))
-
-fwrite(proteomics,'/tmp/mpnstpdx_proteomics.csv.gz')
-
-
-#### FIRST WE GET RNASeq Data
-
-rnaseq<-do.call('rbind',lapply(setdiff(pdx_data$RNASeq,c(NA,"NA")),function(x){
-                                        # if(x!=""){
-    #print(x)
-    sample<-subset(pdx_data,RNASeq==x)
-    #print(sample)
-    res<-fread(synGet(x)$path)|>
-        tidyr::separate(Name,into=c('other_id','vers'),sep='\\.')|>
-        dplyr::select(-vers)|>
-        left_join(genes_df)|>
-        dplyr::select(entrez_id,transcriptomics='TPM')|>
-        subset(!is.na(entrez_id))|>
-        subset(transcriptomics!=0)
-
-    res$improve_sample_id=rep(sample$improve_sample_id[1],nrow(res))
-    res$source=rep('NF Data Portal',nrow(res))
-    res$study=rep('MPNST PDX',nrow(res))
-    return(distinct(res))
-                                        # }
-}))
-
-fwrite(rnaseq,'/tmp/mpnstpdx_transcriptomics.csv.gz')
-
-
-
-#####NEXT WE DO WES DATA
-print("Getting WES")
-wes<-do.call(rbind,lapply(setdiff(pdx_data$`Mutations`,c(NA,"NA")),function(x){
-
-    x2=x#gsub('"','',gsub("[",'',gsub("]",'',x,fixed=T),fixed=T),fixed=T)
-    print(x)
-    sample<-subset(pdx_data,Mutations==x)
-    print(sample$improve_sample_id)
-    res<-NULL
-    try(res<-fread(synGet(x2)$path)|>
-            dplyr::select(entrez_id='Entrez_Gene_Id',mutation='HGVSc',variant_classification='Variant_Classification')|>
-            subset(entrez_id%in%genes_df$entrez_id)|>
-            distinct())
-    if(is.null(res))
-        return(NULL)
-
-    res$improve_sample_id=rep(sample$improve_sample_id[1],nrow(res))
-    res$source=rep('NF Data Portal',nrow(res))
-    res$study=rep('MPNST PDX',nrow(res))
-
-    return(distinct(res))
-                                        # }
-}))
-
-fwrite(wes,'/tmp/mpnstpdx_mutations.csv.gz')
-
-
-print(paste("getting CNV"))
-##next let's do CNVs!
-cnv<-do.call(rbind,lapply(setdiff(pdx_data$CopyNumber,c(NA,"NA")),function(x){
-
-    x2=x#gsub('"','',gsub("[",'',gsub("]",'',x,fixed=T),fixed=T),fixed=T)
-    print(x)
-    sample<-subset(pdx_data,CopyNumber==x)
-    print(sample$improve_sample_id)
-    res<-fread(synGet(x2)$path)
-
-    long_df<- res|>
-      tidyr::separate_rows(gene,sep=',')|>
-      dplyr::rename(gene_symbol='gene')|>
-      dplyr::left_join(genes_df)|>
-      subset(!is.na(entrez_id))|>
-      dplyr::select(entrez_id,log2)|>
-      dplyr::distinct()|>
-        dplyr::mutate(copy_number=2^log2)|>
-        dplyr::select(-log2)
-
-  res<-long_df|> ##deep del < 0.5210507 < het loss < 0.7311832 < diploid < 1.214125 < gain < 1.422233 < amp
-      dplyr::mutate(copy_call=ifelse(copy_number<0.5210507,'deep del',
-                                     ifelse(copy_number<0.7311832,'het loss',
-                                            ifelse(copy_number<1.214125,'diploid',
-                                                   ifelse(copy_number<1.422233,'gain','amp')))))|>
-    mutate(study='MPNST PDX',source='NF Data Portal',improve_sample_id=sample$improve_sample_id[1])|>
-    dplyr::distinct()
-
-    # long_df <- res[, strsplit(as.character(gene), ","), by = .(chromosome, start, end, depth, log2)]
-    # filtered_df <- long_df |>
-    #     subset(is.finite(log2))|>
-    #     filter(V1 %in% genes_df$gene) # get only protein coding genes and remove empty gene symbols
-    # filtered_df <- filtered_df[, .(gene_symbol = V1,
-    #                        improve_sample_id = sample$improve_sample_id[1],
-    #                        copy_number = 2^log2,
-    #                        source = "NF Data Portal",
-    #                        study = "MPNST PDX")]
-    # res<-filtered_df|> ##deep del < 0.5210507 < het loss < 0.7311832 < diploid < 1.214125 < gain < 1.422233 < amp
-    #     dplyr::mutate(copy_call=ifelse(copy_number<0.5210507,'deep del',
-    #                                    ifelse(copy_number<0.7311832,'het loss',
-    #                                           ifelse(copy_number<1.214125,'diploid',
-    #                                           ifelse(copy_number<1.422233,'gain','amp')))))|>
-    #     left_join(genes_df)|>
-    #     dplyr::select(entrez_id,improve_sample_id,copy_number,copy_call,study,source)|>
-    #     subset(!is.na(entrez_id))|>
-    #     distinct()
-    # res|>group_by(copy_call)|>summarize(n_distinct(entrez_id))
-    return(res)
-                                        # }
-}))
-
-fwrite(cnv,'/tmp/mpnstpdx_copy_number.csv.gz')
-
-##TODO: get proteomics!!!
diff --git a/build/mpnstpdx/02_get_drug_data.R b/build/mpnstpdx/02_get_drug_data.R
deleted file mode 100644
index 1f6ad47e..00000000
--- a/build/mpnstpdx/02_get_drug_data.R
+++ /dev/null
@@ -1,120 +0,0 @@
-# Load required libraries
-library(data.table)
-# library(biomaRt)# biomart issues still exist
-library(dplyr)
-library(stringr)
-library(reticulate)
-library(synapser)
-library(tidyr)
-
-
-# Retrieve command line arguments
-args <- commandArgs(trailingOnly = TRUE)
-
-# Check if a token was provided
-if (length(args) == 0) {
-  stop("No token or sample file provided. Usage: Rscript my_script.R <PAT> [olddrugfile] [newdrugfile]", call. = FALSE)
-}
-
-# Set your personal access token
-PAT <- args[1]
-olddrugfiles <- args[2]
-newdrugfile <- args[3]
-# Log in to Synapse
-synLogin(authToken = PAT)
-
-
-##now get the manifest from synapse
-manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()|>
-                                                             as.data.frame()|>
-                                                             dplyr::rename(common_name='Sample')
-
-
-##PDX contain list of files
-pdx<-manifest|>
-    dplyr::select(common_name,PDX_Drug_Data)|>
-    distinct()|>
-    subset(!is.na(PDX_Drug_Data))
-
-
-
-
-
-##define functions
-
-#print(pdx)
-##now loop through manifest to get all the files
-pdx_fold <- data.table(pdx)[,strsplit(as.character(PDX_Drug_Data),","), by = .(common_name)]|>
-    subset(!is.na(V1))|>
-    subset(V1!='NA')|>
-    dplyr::rename(id='V1')
-
-#print(pdx_fold)
-###this is not all of themju
-pdx_meta<-do.call(rbind,lapply(pdx_fold$id, function(x) synapser::synGetAnnotations(x)|>
-                                          as.data.frame()|>
-                                          dplyr::select('experimentalCondition')|>
-                                          dplyr::mutate(id=x)))|>
-    left_join(pdx_fold)|>
-    tidyr::separate_rows(experimentalCondition,sep=';')|>
-    mutate(chem_name=tolower(experimentalCondition))
-
-#pdx_drug <- data.table(pdx_meta)[,strsplit(as.character(experimentalCondition),';'),by= .(common_name,id)]|>
-#    mutate(drug=tolower(experimentalCondition))
-#drugs<-sapply(pdx_meta$experimentalCondition,function(x) tolower(unlist(strsplit(x,split=';'))))|>
-#    unlist()|>
-#    unique()
-
-drugs<-setdiff(pdx_meta$chem_name,'control')
-
-
-print(paste(drugs,collapse=','))
-
-
-##copy old drug to new drug
-olddrugs<-do.call(rbind,lapply(unique(unlist(strsplit(olddrugfiles,split=','))),function(x) read.table(x,header=T,sep='\t',quote='',comment.char='')))
-olddrugs<-unique(olddrugs)
-
-print(paste('Read in ',nrow(olddrugs),'old drug files'))
-
-fdrugs<-subset(olddrugs,chem_name%in%drugs)
-if(nrow(fdrugs)>0){
-    dids<-fdrugs$improve_drug_id
-}else{
-    dids<-c()
-}
-newdrugs<-subset(olddrugs,improve_drug_id%in%dids)
-
-print(paste('Found',length(dids),'improved drug ids that exist, saving those'))
-
-
-                                        #file.copy(olddrugfile,newdrugfile)
-write.table(newdrugs,file=newdrugfile,sep='\t',row.names=F,quote=FALSE,col.names=T)
-output_file_path <- newdrugfile
-ignore_file_path <- '/tmp/mpnstpdx_ignore_chems.txt'
-
-
-##now load reticulate down here
-
-
-
-use_python("/opt/venv/bin/python3", required = TRUE)
-source_python("pubchem_retrieval.py")
-
-update_dataframe_and_write_tsv(unique_names=drugs,output_filename=output_file_path,ignore_chems=ignore_file_path)
-
-
-tab<-read.table(newdrugfile,sep='\t',header=T,quote="",comment.char="")
-
-newdrugs<-tab|>
-    subset(chem_name%in%tolower(alldrugs))
-
-tab<-tab|>
-    subset(improve_drug_id%in%newdrugs$improve_drug_id)
-
-write.table(tab,file=newdrugfile,sep='\t',row.names=FALSE,quote=FALSE)
-
-
-##now call the python drug script
-
-
diff --git a/build/mpnstpdx/03_get_drug_response_data.R b/build/mpnstpdx/03_get_drug_response_data.R
deleted file mode 100644
index 095dba34..00000000
--- a/build/mpnstpdx/03_get_drug_response_data.R
+++ /dev/null
@@ -1,174 +0,0 @@
-# Load required libraries
-library(data.table)
-# library(biomaRt)# biomart issues still exist
-library(synapser)
-library(dplyr)
-library(stringr)
-# Retrieve command line arguments
-args <- commandArgs(trailingOnly = TRUE)
-
-# Check if a token was provided
-if (length(args) == 0) {
-  stop("No token or sample file provided. Usage: Rscript my_script.R <PAT> [samples] [drugs]", call. = FALSE)
-}
-
-# Set your personal access token
-PAT <- args[1]
-patients <- args[2]
-drugfile <- args[3]
-
-# Log in to Synapse
-synLogin(authToken = PAT)
-
-
-# Read the sample mapping CSV and genes.csv
-samples_df <- fread(patients)|>
-    dplyr::select(improve_sample_id,common_name,model_type)|>
-    distinct()#"mpnst/synapse_NF-MPNST_samples.csv")
-print(head(samples_df))
-
-pdx_samps<-subset(samples_df,model_type=='patient derived xenograft')
-org_samps<-subset(samples_df,model_type=='organoid')
-
-##now get the manifest from synapse
-manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()|>
-                                                             as.data.frame()|>
-                                                             dplyr::rename(common_name='Sample')
-
-
-##PDX contain list of files
-pdx<-manifest|>
-    dplyr::select(common_name,PDX_Drug_Data)|>
-    subset(!PDX_Drug_Data%in%c("NA",NA))|>
-    left_join(pdx_samps)|>
-    distinct()
-
-print(pdx)
-
-
-# Modify the extract_date_hour function to return a named vector
-extract_date_hour <- function(experiment_id) {
-  pattern <- "(\\d{6})_?(\\d{2,3})?"
-  matches <- str_match(experiment_id, pattern)
-  date <- matches[, 2]
-  hour <- matches[, 3]
-  date[is.na(date)] <- NA  # Replace with NA instead of blank
-  hour[is.na(hour)] <- 48  # Replace with 48 instead of blank (default)
-  return(list(date = date, hour = hour))
-}
-
-
-
-##define functions
-
-##first function to get children from parentId
-
-##now loop through manifest to get all the files
-#mts_fold <- data.table(mts)[,strsplit(as.character(MicroTissueDrugFolder),","), by = .(improve_sample_id,common_name)]
-
-
-
-##do the drug matching
-drug_df<-fread(drugfile)|>
-    dplyr::select('improve_drug_id','chem_name')|>
-    distinct()
-
-##update drug name PD901 since it's mussing
-##now loop through manifest to get all the files
-pdx_fold <- data.table(pdx)[,strsplit(as.character(PDX_Drug_Data),","), by = .(common_name)]|>
-    dplyr::rename(id='V1')|>
-    subset(!is.na(id))
-
-pdx_meta<-do.call(rbind,lapply(pdx_fold$id, function(x) synapser::synGetAnnotations(x)|>
-                                           as.data.frame()|>
-                                          dplyr::select('experimentalCondition')|>
-                                          dplyr::mutate(id=x)))|>left_join(pdx_fold)|>
-   # tidyr::separate_rows(experimentalCondition,sep=';')|>
-  #  mutate(chem_name=tolower(experimentalCondition))|>
-   # left_join(drug_df)|>
-    left_join(pdx_samps)|>
-    dplyr::select(improve_sample_id,id)|>
-    distinct()|>
-    subset(!is.na(id))
-pdx_meta$parentId=unlist(lapply(pdx_meta$id,function(x) synGet(x)$parentId))
-
-##the older pdx data is in separate files. the newer is not.
-#we need to reformat the older to look like the newer
-oldfolders=c('syn22018363','syn22024460','syn22024428','syn22024429','syn22024437','syn22024438')
-
-old_meta<-subset(pdx_meta,parentId%in%oldfolders)
-
-old_data<-do.call(rbind,lapply(unique(old_meta$parentId),function(x){
-    ids<-subset(old_meta,parentId==x)|>
-        subset(!is.na(id))
-
-  do.call(rbind,lapply(ids$id,function(y){
-      tab<-readr::read_csv(synapser::synGet(y)$path)
-      print(head(tab))
-      tab<-dplyr::select(tab,c('specimen_id','compound_name','dose','dose_unit',
-                             'experimental_time_point','experimental_time_point_unit',
-                             'assay_type','assay_value','assay_units'))|>
-        mutate(id=x)|>
-        mutate(chem_name=tolower(compound_name))
-
-   #   tab$single_or_combo=sapply(tab$chem_name,function(z) ifelse(length(grep('\\+',z))>0,'combo','single'))
-      tab$chem_name=gsub('n/a','control',tab$chem_name)|>
-        tidyr::replace_na('control')
-
-      tab$chem_name=sapply(tab$chem_name,function(z) ifelse(z=='doxorubinsin','doxorubicin',z))
-    #  tab<-tab|>left_join(drug_df)
-      #print(head(tab))
-      return(tab)
-       }))
-}))|>
-  left_join(unique(select(old_meta,id=parentId,improve_sample_id)))|>
-  dplyr::select(experiment=id,model_id=improve_sample_id,specimen_id,treatment=chem_name,time=experimental_time_point,time_unit=experimental_time_point_unit,volume=assay_value)|>distinct()
-
-
-
-new_meta<-subset(pdx_meta,!parentId%in%oldfolders)
-
-##now combine each of the old pdx files into single files
-#each file has all experiments in it
-new_data<-do.call(rbind,lapply(unique(new_meta$id), function(x){
-    fpath=synapser::synGet(x)$path
-    if(length(grep('xls',fpath))>0){
-        tab<-readxl::read_xlsx(fpath)
-    }else{
-        tab<-readr::read_csv(fpath)
-    }
-    print(head(tab))
-    tab<-dplyr::select(tab,c('specimen_id','compound_name','dose','dose_unit',
-                             'experimental_time_point','experimental_time_point_unit',
-                             'assay_type','assay_value','assay_units'))|>
-        mutate(id=x)
-
-   # tab$single_or_combo=sapply(tab$compound_name,function(x) ifelse(length(grep('\\+',x))>0,'combo','single'))
-    tab$compound_name=gsub('N/A','control',tab$compound_name)|>tidyr::replace_na('control')
-    tab<-tab|>
-      mutate(chem_name=tolower(compound_name))#|>
-   #   left_join(drug_df)
-    #print(head(tab))
-    return(tab)}))|>
-    left_join(pdx_meta)|>
-    dplyr::select(experiment=id,model_id=improve_sample_id,specimen_id,treatment=chem_name,time=experimental_time_point,time_unit=experimental_time_point_unit,volume=assay_value)|>distinct()
-
-##maybe tweak the data frame a bit depending on curve fitting script
-pdx_data<-rbind(old_data,new_data)
-
-#single_pdx<-subset(pdx_data,single_or_combo=='single')
-#combo_pdx<-subset(pdx_data,single_or_combo=='combo')
-#print(head(pdx_data))
-fwrite(pdx_data,'/tmp/curve_data.tsv',sep='\t')
-
-##TODO: create new curve fitting script in python
-pycmd = '/opt/venv/bin/python calc_pdx_metrics.py --input /tmp/curve_data.tsv --outprefix /tmp/mpnstpdx'
-print('running curve fitting')
-#system(pycmd)
-
-##now read in data again, separate out by single/combo, then map to drug id
-
-##mmve file name
-#file.rename('/tmp/experiments.0','/tmp/mpnstpdx_experiments.tsv')
-
-
diff --git a/build/mpnstpdx/README.md b/build/mpnstpdx/README.md
deleted file mode 100755
index b0059283..00000000
--- a/build/mpnstpdx/README.md
+++ /dev/null
@@ -1,47 +0,0 @@
-## Build Instructions for MPNST PDX Dataset
-
-To build the MPNST PDX dataset, follow these steps from the coderdata root
-directory. Currently using the test files as input. 
-
-1. Build the Docker image:
-   ```
-   docker build -f build/docker/Dockerfile.mpnstpdx -t mpnstpdx . --build-arg HTTPS_PROXY=$HTTPS_PROXY
-   ```
-
-2. Generate new identifiers for these samples to create a
-   `mpnstpdx_samples.csv` file. This pulls from the latest synapse
-   project metadata table.
-   ```
-   docker run -v $PWD:/tmp -e SYNAPSE_AUTH_TOKEN=$SYNAPSE_AUTH_TOKEN mpnstpdx sh build_samples.sh /tmp/build/build_test/test_samples.csv
-   ```
-
-3. Pull the data and map it to the samples. This uses the metadata
-   table pulled above.
-   ```
-   docker run -v $PWD:/tmp -e SYNAPSE_AUTH_TOKEN=$SYNAPSE_AUTH_TOKEN mpnstpdx sh build_omics.sh /tmp/build/build_test/test_genes.csv /tmp/mpnstpdx_samples.csv 
-   ```
-
-4. Process drug data
-   ```
-   docker run -v $PWD:/tmp -e SYNAPSE_AUTH_TOKEN=$SYNAPSE_AUTH_TOKEN  mpnstpdx sh build_drugs.sh /tmp/build/build_test/test_drugs.tsv
-   ```
-   
-5. Process experiment data. This uses the metadata from above as well as the file directory on synapse:
-   ```
-   docker run -v $PWD:/tmp -e SYNAPSE_AUTH_TOKEN=$SYNAPSE_AUTH_TOKEN mpnstpdx sh build_exp.sh /tmp/mpnstpdx_samples.csv /tmp/mpnstpdx_drugs.tsv.gz
-   ```
-
-Please ensure that each step is followed in order for correct dataset compilation.
-
-## MPNST PDX Dataset Structure
-The MPNST dataset includes the following output files:
-```
-├── mpnstpdx_samples.csv
-├── mpnstpdx_transcriptomics.csv
-├── mpnstpdx_mutations.csv
-├── mpnstpdx_copy_number.csv
-├── mpnstpdx_drugs.tsv
-├── mpnstpdx_drug_descriptors.tsv.gz
-├── mpnstpdx_experiments.tsv.gz
-```
-
diff --git a/build/mpnstpdx/build_drugs.sh b/build/mpnstpdx/build_drugs.sh
deleted file mode 100644
index 78502bc7..00000000
--- a/build/mpnstpdx/build_drugs.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-##get drug data
-Rscript 02_get_drug_data.R $SYNAPSE_AUTH_TOKEN $1 /tmp/mpnstpdx_drugs.tsv
-##get drug descriptors
-/opt/venv/bin/python3 build_drug_desc.py --drugtable /tmp/mpnstpdx_drugs.tsv --desctable /tmp/mpnstpdx_drug_descriptors.tsv.gz
\ No newline at end of file
diff --git a/build/mpnstpdx/build_exp.sh b/build/mpnstpdx/build_exp.sh
deleted file mode 100644
index 4e34f6b3..00000000
--- a/build/mpnstpdx/build_exp.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-Rscript 03_get_drug_response_data.R $SYNAPSE_AUTH_TOKEN $1 $2
-/opt/venv/bin/python3 calc_pdx_metrics.py /tmp/curve_data.tsv --drugfile=/tmp/mpnstpdx_drugs.tsv --outprefix=/tmp/mpnstpdx
diff --git a/build/mpnstpdx/build_omics.sh b/build/mpnstpdx/build_omics.sh
deleted file mode 100644
index 969b4fba..00000000
--- a/build/mpnstpdx/build_omics.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-set -euo pipefail
-
-trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit 1' ERR
-
-echo "Running 01_mpnstpdx_get_omics.R with $SYNAPSE_AUTH_TOKEN, $2, and $1."
-Rscript 01_mpnstpdx_get_omics.R $SYNAPSE_AUTH_TOKEN $2 $1
diff --git a/build/mpnstpdx/build_samples.sh b/build/mpnstpdx/build_samples.sh
deleted file mode 100644
index aa88aa02..00000000
--- a/build/mpnstpdx/build_samples.sh
+++ /dev/null
@@ -1 +0,0 @@
-cp /tmp/mpnst_samples.csv /tmp/mpnstpdx_samples.csv
diff --git a/build/mpnstpdx/requirements.r b/build/mpnstpdx/requirements.r
deleted file mode 100755
index e6139cd4..00000000
--- a/build/mpnstpdx/requirements.r
+++ /dev/null
@@ -1,13 +0,0 @@
-install.packages('reticulate', repos='https://cloud.r-project.org')
-reticulate::use_virtualenv('/opt/venv', required = TRUE)
-install.packages('remotes')
-remotes::install_version('rjson', version = '0.2.21', repos = 'https://cloud.r-project.org')
-install.packages('synapser', repos = c('http://ran.synapse.org', 'https://cloud.r-project.org'))
-install.packages("dplyr")
-install.packages("data.table")
-install.packages("synapser", repos = c("http://ran.synapse.org", "https://cloud.r-project.org"))
-install.packages("R.utils")
-install.packages("stringr")
-install.packages("tidyr")
-install.packages('readr')
-install.packages("readxl")
diff --git a/build/mpnstpdx/requirements.txt b/build/mpnstpdx/requirements.txt
deleted file mode 100755
index b0944928..00000000
--- a/build/mpnstpdx/requirements.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-pyarrow
-pandas
-matplotlib
-numpy==1.26.4
-argparse
-tqdm
-scikit-learn
-scipy
-requests
-mordredcommunity
-rdkit
-statsmodels
diff --git a/build/utils/calc_pdx_metrics.py b/build/utils/calc_pdx_metrics.py
index e0f4c05a..83876dd3 100755
--- a/build/utils/calc_pdx_metrics.py
+++ b/build/utils/calc_pdx_metrics.py
@@ -184,7 +184,12 @@ def AUC(time, volume, time_normalize=True):
     dict: Dictionary containing the AUC value.
     """
     auc = trapz_auc(time, volume)
-    #print(time)
+    #print('at line 187')
+    #print(time.shape)
+    #print(time.dtype)
+    #print(np.max(time.astype(int)))
+    #print('auc is : ')
+    #print(auc)
     if time_normalize:
         auc = auc/np.max(time)
     return {"metric": "auc", "value": auc, 'time':np.max(time)}
@@ -270,10 +275,15 @@ def lmm(time, volume, treatment, drug_name):
         raise ValueError("These columns must be present: 'model_id', 'volume', 'time', 'exp_type'")
     
     data['log_volume'] = np.log(data['volume'])
-    
+    #print('drug name is ' + drug_name)
+    data['exp_type'] = data['exp_type'].astype('category')
+    data['exp_type']=pd.Categorical(data['exp_type'],categories = ['control',drug_name], ordered=True)
+    #print(data)
+    #print(data['exp_type'].cat.categories)
     # Define the formula for mixed linear model
     formula = 'log_volume ~ time*exp_type'
     
+    #print(data['exp_type'].cat.categories)
     # Fit the model
     model = mixedlm(formula, data, groups=data['model_id'])
     fit = model.fit()
@@ -284,6 +294,7 @@ def lmm(time, volume, treatment, drug_name):
 #    time_coef_value = fit.params['time']
     #print(fit.params)
     i_coef_value = fit.params['time:exp_type[T.'+drug_name+']']
+    #i_coef_value = fit.params['time:exp_type['+drug_name+']']
    # else:
    #     coef_value = None  # Handle the case when the interaction term is not present
     
@@ -301,6 +312,8 @@ def main():
     parser.add_argument('curvefile')
     parser.add_argument('--drugfile')
     parser.add_argument('--outprefix',default='/tmp/')
+    parser.add_argument('--study')
+    parser.add_argument('--source')
     
     args = parser.parse_args()
     
@@ -314,20 +327,21 @@ def main():
     expsing = expsing.dropna()
     
     # source	improve_sample_id	improve_drug_id	study	time	time_unit	dose_response_metric	dose_response_value
-
-    combos[['drug1','drug2']]=combos.drug.str.split('+',expand=True)
-    combos = combos.rename({'metric':'drug_combination_metric','value':'drug_combination_value','sample':'improve_sample_id'},axis=1).dropna()
-
-    expcomb = combos.rename({'drug1':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_1'},axis=1)[['improve_drug_1','drug2','improve_sample_id','time_unit','time','drug_combination_metric','drug_combination_value']]
-    expcomb = expcomb.rename({'drug2':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_2'},axis=1)[['improve_drug_1','improve_drug_2','improve_sample_id','time_unit','time','drug_combination_metric','drug_combination_value']]
-
-    expcomb[['source']]='Synapse'
-    expcomb[['study']]='MPNST PDX in vivo'
-
-    expsing[['source']]='Synapse'
-    expsing[['study']]='MPNST PDX in vivo'
+    if combos.shape[0]> 0:
+        combos[['drug1','drug2']]=combos['drug'].str.split('+',expand=True)
+        
+        combos = combos.rename({'metric':'drug_combination_metric','value':'drug_combination_value','sample':'improve_sample_id'},axis=1).dropna()
+        
+        expcomb = combos.rename({'drug1':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_1'},axis=1)[['improve_drug_1','drug2','improve_sample_id','time_unit','time','drug_combination_metric','drug_combination_value']]
+        expcomb = expcomb.rename({'drug2':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_2'},axis=1)[['improve_drug_1','improve_drug_2','improve_sample_id','time_unit','time','drug_combination_metric','drug_combination_value']]
+        expcomb[['source']]=args.source
+        expcomb[['study']]=args.study
+        expcomb.to_csv(args.outprefix+'_combinations.tsv',index=False, sep="\t")
+
+    expsing[['source']]=args.source
+    expsing[['study']]=args.study
     expsing.to_csv(args.outprefix+'_experiments.tsv',index=False, sep="\t")
-    expcomb.to_csv(args.outprefix+'_combinations.tsv',index=False, sep="\t")
+    #expcomb.to_csv(args.outprefix+'_combinations.tsv',index=False, sep="\t")
     
 
     
@@ -341,21 +355,25 @@ def get_drug_stats(df, control='control'):
     for name, group in tqdm(groups):
         # Each group contains multiple treatments and a control
         drugs = set(group.treatment) - set([control])
-        print(name[0])
-        print(drugs)
+        #print('line 355')
+        #print(name[0])
+        #print(drugs)
         mod = list(set(group.model_id))[0]
 
         ctl_data = group[group.treatment == control]
         ctl_time = np.array(ctl_data.time)
         ctl_volume = np.array(ctl_data.volume)
-
+        if (ctl_volume.shape[0] < 2):
+            continue
         ctl_auc = AUC(ctl_time, ctl_volume)
         for d in drugs:
-            print(d)
-            d_data = group[group.treatment == d]
+            #print('is our drug a string or dict?')
+            #print(str(d))
+            d_data = group[group.treatment == str(d)]
             treat_time = np.array(d_data.time)
             treat_volume = np.array(d_data.volume)
-
+            if (treat_volume.shape[0] < 2):
+                continue
             # Get ABC for group
             treat_auc = AUC(treat_time, treat_volume)
             treat_abc = ABC(ctl_time, ctl_volume, treat_time, treat_volume)
@@ -368,6 +386,7 @@ def get_drug_stats(df, control='control'):
 
             #llm
             comb = pd.concat([ctl_data, d_data])
+            #print(comb)
             lmm_res = lmm(comb.time, comb.volume, comb.treatment, d)
             lmm_res.update({'sample': mod, 'drug': d, 'time': np.max(treat_time), 'time_unit': 'days'})
             if '+' in d:
@@ -396,4 +415,4 @@ def get_drug_stats(df, control='control'):
     return sing, comb
 
 if __name__=='__main__':
-    main()
+    main()
\ No newline at end of file
diff --git a/schema/expected_files.yaml b/schema/expected_files.yaml
index 4cce4283..8035ff99 100644
--- a/schema/expected_files.yaml
+++ b/schema/expected_files.yaml
@@ -43,24 +43,6 @@ datasets:
     - target_class: Drug Descriptor
       file: /tmp/mpnst_drug_descriptors.tsv
 
-  mpnstpdx:
-    - target_class: Sample
-      file: /tmp/mpnstpdx_samples.csv
-    - target_class: Transcriptomics
-      file: /tmp/mpnstpdx_transcriptomics.csv
-    - target_class: Proteomics
-      file: /tmp/mpnstpdx_proteomics.csv
-    - target_class: Mutations
-      file: /tmp/mpnstpdx_mutations.csv
-    - target_class: Copy Number
-      file: /tmp/mpnstpdx_copy_number.csv
-    - target_class: Experiments
-      file: /tmp/mpnstpdx_experiments.tsv
-    - target_class: Drug
-      file: /tmp/mpnstpdx_drugs.tsv
-    - target_class: Drug Descriptor
-      file: /tmp/mpnstpdx_drug_descriptors.tsv
-
   cptac:
     - target_class: Sample
       file: /tmp/cptac_samples.csv

<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>pFad - Phonifier reborn</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
</head>
<body>
<h1>Pfad - The Proxy pFad of &#169; 2024 Garber Painting. All rights reserved.</h1>


<!-- Disclaimer -->
<p>Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.</p>
<br>
<p>Alternative Proxies:</p><p><a href="http://rainy.clevelandohioweatherforecast.com/php-proxy/index.php?q=https://patch-diff.githubusercontent.com/raw/PNNL-CompBio/coderdata/pull/425.diff" target="_blank">Alternative Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/index.php?u=https://patch-diff.githubusercontent.com/raw/PNNL-CompBio/coderdata/pull/425.diff" target="_blank">pFad Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v3index.php?u=https://patch-diff.githubusercontent.com/raw/PNNL-CompBio/coderdata/pull/425.diff" target="_blank">pFad v3 Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v4index.php?u=https://patch-diff.githubusercontent.com/raw/PNNL-CompBio/coderdata/pull/425.diff" target="_blank">pFad v4 Proxy</a></p></body>
</html>