-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimport.R
More file actions
139 lines (111 loc) · 4.36 KB
/
import.R
File metadata and controls
139 lines (111 loc) · 4.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# Import and wrangling of the manuscript data
#
# The local STIGMA cohort dataset encompasses readouts of inflammation
# and metabolic markers of neurotransmitter precursors along with the
# explanatory variables (anxiety/depression signs and persistent symptoms)
# and confounders (age and sex)
#
# The INCOV validation dataset contains selected metabolites of the TRP decay
# pathway (so called TRYCATS pathway) and key inflammatory cytokines measured
# in blood of healthy individuals and COVID-19 participants at consecutive time
# points (acute, sub-acute and recovery) after clnical onset.
# tools --------
library(plyr)
library(tidyverse)
library(soucer)
library(trafo)
library(stringi)
library(readxl)
library(foreign)
insert_head()
c('./tools/globals.R',
'./tools/tools.R') %>%
source_all(message = TRUE, crash = TRUE)
# Sourcing the import scripts for single studies ------
insert_msg('Sourcing the import scripts')
source_all(c('./import scripts/local.R',
'./import scripts/incov.R'),
message = TRUE, crash = TRUE)
# Updating the project globals ------
insert_msg('Project globals')
globals$incov_lexicon <-
rbind(incov$annotation_proteome %>%
filter(variable %in% globals$incov_proteins),
incov$annotation_metabolome %>%
filter(variable %in% globals$incov_metabolites))
# SIMMUN, modeling and analysis dataset -----
insert_msg('Modeling SIMUN dataset')
## modeling responses, the optimal transformations defined
## during explorative analysis
stigma$responses <-
c('trp' = 'trp',
'kyn' = 'log_kyn',
'kyn_trp' = 'log_kyn_trp',
'phe' = 'log_phe',
'tyr' = 'log_tyr',
'phe_tyr' = 'sqrt_phe_tyr')
## explanatory variables
stigma$expl_lexicon <-
c(## infection-related features
## there are to few non-ambulatory COV cases
## for modeling
'infection' = 'SARS-CoV-2',
'anti_rbd_class' = 'anti-RBD IgG',
#'severity' = 'COVID-19 severity',
## inflammatory markers
## IL6 and CRP are excludd from analysis
## they are elevated only in few participants
## total neutrophisl correlate nearly absolutely with NLR
'log_neo' = 'log NEO, nmol/L',
#'crp_class' = 'CRP',
#'il6_class' = 'IL6',
'log_nlr' = 'log NLR',
#'sqrt_neutro' = 'log Neutro',
## demographics
'age' = 'Age, decades',
'sex' = 'Sex',
## psychometrics
'pss_stress_score' = 'Mental stress, PSS-4',
'hads_anx_score' = 'Anxiety, HADS',
'hads_dpr_score' = 'Depression, HADS',
## comorbidity, medical history
'psych_comorb' = 'Mental disorder',
'somatic_comorb' = 'Physical disorder',
'bmi_class' = 'BMI',
'smoking' = 'Smoking',
'alcohol' = 'Alcohol') %>%
compress(names_to = 'variable',
values_to = 'label')
## analysis data table with complete data
stigma$analysis_tbl <- stigma$data %>%
mutate(infection = car::recode(cov,
"'healthy' = 'no';
'SARS-CoV-2' = 'yes'"),
infection = factor(infection,
c('no', 'yes')),
age = age/10) %>%
select(patient_id,
all_of(unname(stigma$responses)),
all_of(stigma$expl_lexicon$variable)) %>%
filter(complete.cases(.))
## complete IDs
stigma$complete_ids <- stigma$analysis_tbl$patient_id
# INCOV: modeling and analysis dataset ------
insert_msg('INCOV: modeling and analysis dataset')
## with complete cases
incov$analysis_tbl <- incov[c("metabolome", "proteome")] %>%
map(select,
patient_id,
timepoint,
any_of(globals$incov_lexicon$variable)) %>%
reduce(inner_join, by = c('patient_id', 'timepoint')) %>%
right_join(incov$clinic[c('patient_id', 'timepoint', 'time_po',
'age', 'sex', 'bmi_class', 'bmi',
'cov', 'who_severity', 'severity')] %>%
mutate(time_po = ifelse(timepoint == 'healthy', 0, time_po)),
.,
by = c('patient_id', 'timepoint')) %>%
filter(complete.cases(.))
incov$complete_ids <- incov$analysis_tbl$patient_id
# END -----
insert_tail()