Skip to content

Error in running Casper #85

@mehlu22

Description

@mehlu22

#options(download.file.method = "wininet")

#program to run Casper
library(data.table)
library(CaSpER)
library(edgeR)

sub <- read.csv('./cod_length.csv', row.names = 1)
dat <- fread("SL1to2_raw.csv")
dat$GeneID <- sub("\..*", "", dat$GeneID)
sub <- sub <- sub[sub$ensm %in% dat$GeneID, ]

cytoband <- cytoband_hg38

Generate the LOH annotation

rannotation <- generateAnnotation(id_type = "ensembl_gene_id", genes = dat$GeneID, ishg19 = FALSE, centromere)

Extract gene IDs from the LOH annotation (adjust if necessary based on the structure)

loh_genes <- unique(rannotation$Gene) # Change 'id' if needed based on the structure of rannotation

Find the common genes between the gene length data, gene expression data, and LOH annotation

common_genes <- intersect(intersect(dat$GeneID, sub$ensm), loh_genes)

Filter the gene expression data to retain only the common genes

dat_filtered <- dat[dat$GeneID %in% common_genes, ]

Filter the gene length data (sub) to retain only the common genes

sub_filtered <- sub[sub$ensm %in% common_genes, ]

Optionally, filter the LOH annotation to keep only the common genes

rannotation_filtered <- rannotation[rannotation$Gene %in% common_genes, ] # Adjust field name based on rannotation structure

Ensure that the filtered data is aligned

cat("Number of common genes: ", length(common_genes), "\n")

List of paths

paths <- c("./SL1/", "./SL2/")
folder_names <- basename(paths)
folder_names <- gsub("/$", "", folder_names)

Define a function to read the output, unlist it, and assign name

read_and_name <- function(path) {
result <- readBAFExtractOutput(path = path, sequencing.type = "bulk")
unlisted_result <- result[[1]]
return(unlisted_result)
}

Use lapply to apply the function to each path

loh <- lapply(paths, read_and_name)
names(loh) <- gsub(".baf", "", names(loh))

control.sample.ids = "PDA75"

loh.name.mapping <- data.frame(loh.name = folder_names, sample.name = folder_names)

dat_matrix <- as.matrix(data.frame(dat_filtered[!duplicated(dat_filtered$GeneID),], row.names = 1, check.names = F))
#convert to rpkm
dat_matrix <- rpkm(dat_matrix, gene.length = sub_filtered$Length, log=T)
#dat_matrix[is.na(dat_matrix)] <- 0
object <- CreateCasperObject(raw.data=dat_matrix, loh.name.mapping=loh.name.mapping, sequencing.type="bulk",
cnv.scale=3, loh.scale=3, matrix.type="normalized", expr.cutoff=1,
annotation=rannotation_filtered, method="iterative", loh=loh, filter="median",
control.sample.ids=control.sample.ids, cytoband=centromere, genomeVersion="hg38", log.transformed = T)

#Pairwise comparison of scales from BAF and expression signals
final.objects <- runCaSpER(object, removeCentromere=T, cytoband=cytoband, method="iterative")

#Pairwise comparison of scales from BAF and expression signals
final.objects <- runCaSpER(object, removeCentromere=T, cytoband=cytoband, method="iterative")
Performing recursive median filtering...
Performing HMM segmentation...
Processing cnv.scale:1 loh.scale:1...
Error in value[[jvseq[[jjj]]]] : subscript out of bounds

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions