-
Notifications
You must be signed in to change notification settings - Fork 28
Description
#options(download.file.method = "wininet")
#program to run Casper
library(data.table)
library(CaSpER)
library(edgeR)
sub <- read.csv('./cod_length.csv', row.names = 1)
dat <- fread("SL1to2_raw.csv")
dat$GeneID <- sub("\..*", "", dat$GeneID)
sub <- sub <- sub[sub$ensm %in% dat$GeneID, ]
cytoband <- cytoband_hg38
Generate the LOH annotation
rannotation <- generateAnnotation(id_type = "ensembl_gene_id", genes = dat$GeneID, ishg19 = FALSE, centromere)
Extract gene IDs from the LOH annotation (adjust if necessary based on the structure)
loh_genes <- unique(rannotation$Gene) # Change 'id' if needed based on the structure of rannotation
Find the common genes between the gene length data, gene expression data, and LOH annotation
common_genes <- intersect(intersect(dat$GeneID, sub$ensm), loh_genes)
Filter the gene expression data to retain only the common genes
dat_filtered <- dat[dat$GeneID %in% common_genes, ]
Filter the gene length data (sub) to retain only the common genes
sub_filtered <- sub[sub$ensm %in% common_genes, ]
Optionally, filter the LOH annotation to keep only the common genes
rannotation_filtered <- rannotation[rannotation$Gene %in% common_genes, ] # Adjust field name based on rannotation structure
Ensure that the filtered data is aligned
cat("Number of common genes: ", length(common_genes), "\n")
List of paths
paths <- c("./SL1/", "./SL2/")
folder_names <- basename(paths)
folder_names <- gsub("/$", "", folder_names)
Define a function to read the output, unlist it, and assign name
read_and_name <- function(path) {
result <- readBAFExtractOutput(path = path, sequencing.type = "bulk")
unlisted_result <- result[[1]]
return(unlisted_result)
}
Use lapply to apply the function to each path
loh <- lapply(paths, read_and_name)
names(loh) <- gsub(".baf", "", names(loh))
control.sample.ids = "PDA75"
loh.name.mapping <- data.frame(loh.name = folder_names, sample.name = folder_names)
dat_matrix <- as.matrix(data.frame(dat_filtered[!duplicated(dat_filtered$GeneID),], row.names = 1, check.names = F))
#convert to rpkm
dat_matrix <- rpkm(dat_matrix, gene.length = sub_filtered$Length, log=T)
#dat_matrix[is.na(dat_matrix)] <- 0
object <- CreateCasperObject(raw.data=dat_matrix, loh.name.mapping=loh.name.mapping, sequencing.type="bulk",
cnv.scale=3, loh.scale=3, matrix.type="normalized", expr.cutoff=1,
annotation=rannotation_filtered, method="iterative", loh=loh, filter="median",
control.sample.ids=control.sample.ids, cytoband=centromere, genomeVersion="hg38", log.transformed = T)
#Pairwise comparison of scales from BAF and expression signals
final.objects <- runCaSpER(object, removeCentromere=T, cytoband=cytoband, method="iterative")
#Pairwise comparison of scales from BAF and expression signals
final.objects <- runCaSpER(object, removeCentromere=T, cytoband=cytoband, method="iterative")
Performing recursive median filtering...
Performing HMM segmentation...
Processing cnv.scale:1 loh.scale:1...
Error in value[[jvseq[[jjj]]]] : subscript out of bounds