-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathpreprocess.R
More file actions
85 lines (65 loc) · 1.92 KB
/
preprocess.R
File metadata and controls
85 lines (65 loc) · 1.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#Preprocessing functions used in replicating arXiv rev.2 results for omics data in TCGA PanCan12 Freeze 4.7
#Tai-Hsien Ou Yang
#th8623@gmail.com
library("impute")
library("limma")
#1. RNAseq
normalizeRNASeq <- function(ge){
nz_row <- apply(ge, 1, function(x){sum(x==0)})
ge <- ge[nz_row < 0.5*(ncol(ge)), ]
nz_col <- apply(ge, 2, function(x){sum(x==0)})
ge <- ge[ , nz_col < 0.2*(nrow(ge)) ]
# impute zero counts and missing values
ge[ge==0] <- NA
ge <- log2(ge)
ge <- impute.knn(ge)$data
# normalize expression values using quantile normalization
ge <- normalizeBetweenArrays(ge)
return (ge)
}
#2. Methylation
normalizeMeth <- function(meth){
nna = apply(meth, 1, function(x){sum(is.na(x))})
meth = meth[nna < 0.5*ncol(meth),]
meth = impute.knn(meth)$data
return(meth)
}
#3. miRNA
normalizemiRNA <- function(mirna,map){
require("cafr")
nz = apply(mirna, 1, function(x){sum(x==0)})
mirna = mirna[nz < 0.5*(ncol(mirna)), ]
mirna[mirna==0] = NA
mirna = log2(mirna)
mirna = impute.knn(mirna)$data
mirna = normalizeBetweenArrays(mirna)
mirna = probeSummarization(ge=mirna, map=map, threshold=0.7, gene.colname="miR_stem")
return(mirna)
}
#4. RPPA
normalizeRPPA <- function(ge){
ge <- loadExpr(file.iterator)
nz <- apply(ge, 1, function(x){sum(x==0)})
ge <- ge[nz < 0.5*(ncol(ge)), ]
ge[ge==0] <- NA
ge = impute.knn(ge)$data
retrun(ge)
}
#Obsolete
imputeknn50<-function(e){
qualified.row<-apply( e, 1, function(x) { length(which(is.na(x)))<(0.5*ncol(e)) })
e<-round(e[which( qualified.row==TRUE),],4)
probe<-names(rownames(e)[which( qualified.row==TRUE)])
e<-impute.knn(e)$data
names(rownames(e))<-probe
return(e)
}
library("impute")
imputeknn50.probe<-function(e){
qualified.row<-apply( e, 1, function(x) { length(which(is.na(x)))<(0.5*ncol(e)) })
probe<-names(rownames(e)[which( qualified.row==TRUE)])
e<-round(e[which( qualified.row==TRUE),],4)
e<-impute.knn(e)$data
names(rownames(e))<-probe
return(e)
}