-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathvulnerability_modeling_tools.R
More file actions
133 lines (99 loc) · 3.77 KB
/
vulnerability_modeling_tools.R
File metadata and controls
133 lines (99 loc) · 3.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# Vulnerability modeling tools ----
# Imports ----
library(parallel)
tryCatch(library(rstan), error=function(cond){message("rstan is not installed!")})
library(reshape2)
# Functions ----
# Main function to run model in parallel:
run_model = function(gene, data, strain, folder="", label="", basedir="./data", results_path = "", prior_data=NULL){
message("Gene: ", gene)
clean_gene = gsub(":", "_", gene)
if(results_path == ""){
results_path = file.path(basedir, strain, folder)
}
dir.create(results_path, recursive=TRUE)
sample_file_path = get_gene_samples_path(gene, strain, results_path)
modeldata_file_path = get_gene_model_data_path(gene, strain, results_path)
message("Gettin data...")
model_data = get_model_data(gene, data, prior_data = prior_data)
message(sample_file_path)
message("Fitting model...")
fitvb <- vb(vb_model, data = model_data, grad_samples=100, tol_rel_obj=0.01,
init=0, sample_file=sample_file_path)
message("Saving model data...")
saveRDS(model_data, file = modeldata_file_path)
}
get_prior_row = function(gene, prior_data){
orf = strsplit(gene, ":")[[1]][1]
ii_orf = prior_data$gene == orf
if(sum(ii_orf) == 1){
prior_row = prior_data[ii_orf,]
} else {
prior_row = NULL
}
return(prior_row)
}
# Helper function to parse data and put in format Stan likes
get_model_data = function(gene, data, y_target="Y", prior_data=NULL){
ii_orf = data$ORF == gene
Xgene = data[ii_orf,]
Xgene$select_group = Xgene$guide_group
ii_detectable = Xgene$detectable == "True"
ii_good = Xgene$GOOD == "True"
ii_select = ii_good & ii_detectable
S1 = NULL
for (i in 1:max(Xgene$select_group[ii_select]))
{
ii_guide = Xgene$select_group == i
S1[i] = Xgene[ii_guide,]$STR[1]
}
prior_row = get_prior_row(gene, prior_data)
if(!is.null(prior_row)){
p_alpha_l_mean = prior_row$alpha_l # plateau start
p_beta_l_mean = prior_row$beta_l # plateau rate
p_gamma_mean = prior_row$gene_gamma # transition
p_beta_e_mean = prior_row$beta_max # decline rate
# Define priors for logistic regression curve
p_A1_mean = prior_row$beta_max
p_A2_mean = prior_row$K
p_A1_std = 0.4
p_A2_std = 0.4
p_B_mean = prior_row$B
p_B_std = 5
p_M_mean = prior_row$M
} else {
# Define priors for two-line model
p_alpha_l_mean = 0 # plateau start
p_beta_l_mean = 0 # plateau rate
p_gamma_mean = 7 # transition
p_beta_e_mean = 0 # decline rate
# Define priors for logistic regression curve
p_A1_mean = -0.5
p_A2_mean = 0.0
p_A1_std = 0.4
p_A2_std = 0.4
p_B_mean = 10
p_B_std = 5
p_M_mean = 0.5
}
# Data list for Stan
model_data = list(pam=Xgene$select[ii_select],
x=Xgene$G[ii_select],
y=Xgene[[y_target]][ii_select],
s=array(S1), N=length(Xgene$G[ii_select]),
J=max(Xgene$select_group[ii_select]),
p_alpha_l_mean = p_alpha_l_mean, p_alpha_l_std = 1, # plateau start
p_beta_l_mean = p_beta_l_mean, p_beta_l_std = 1, # plateau rate
p_gamma_mean = p_gamma_mean, p_gamma_std = 2, # transition
p_beta_e_mean = p_beta_e_mean, p_beta_e_std = 1, # decline rate
p_sigma_mean = 1, p_sigma_std = 2, # variance
p_A2_mean = p_A2_mean, p_A2_std = p_A2_std, # beta min
p_A1_mean=p_A1_mean, p_A1_std = p_A1_std, # beta max
p_B_mean = p_B_mean, p_B_std = p_B_std, # slope of logistic
L=c(-0.2, -0.5, 2),
U=c( 0.2, 4, 28),
kappa=5, rho=p_M_mean,
prior=0
)
return (model_data)
}