-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSQL_statements.sql
More file actions
135 lines (122 loc) · 3.43 KB
/
SQL_statements.sql
File metadata and controls
135 lines (122 loc) · 3.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# 1) What is the average LN_IC50 for all drugs across all cell lines?
SELECT
AVG(LN_IC50) AS Average_IC50
FROM
drug_sensitivity;
# 2) What is the minimum AUC observed for drugs targeting the TOP1 pathway?
SELECT
MIN(ds.AUC) AS Min_AUC
FROM
drug_sensitivity ds
JOIN
drug_targets dt ON ds.DRUG_ID = dt.DRUG_ID
JOIN
targets t ON dt.TARGET_ID = t.TARGET_ID
JOIN
targets_pathways tp ON t.TARGET_ID = tp.TARGET_ID
JOIN
pathways p ON tp.PATHWAY_ID = p.PATHWAY_ID
WHERE
p.PATHWAY_NAME = 'DNA replication';
# 3) How many cell lines exhibit a Z_SCORE above a threshold for Camptothecin?
SELECT
COUNT(ds.COSMIC_ID) AS Cell_Line_Count
FROM
drug_sensitivity ds
JOIN
drugs d ON ds.DRUG_ID = d.DRUG_ID
WHERE
d.DRUG_NAME = 'Camptothecin'
AND ds.Z_SCORE > 0.5;
# 4)What is the total count of cell lines for each GDSC Tissue descriptor?
SELECT
d1.DESCRIPTOR_1_NAME AS Tissue_Descriptor,
COUNT(c.COSMIC_ID) AS Total_Cell_Lines
FROM
cell_lines c
JOIN
gdsc_tissue_descriptor_2 d2 ON c.DESCRIPTOR_2_ID = d2.DESCRIPTOR_2_ID
JOIN
gdsc_tissue_descriptor_1 d1 ON d2.DESCRIPTOR_1_ID = d1.DESCRIPTOR_1_ID
GROUP BY
d1.DESCRIPTOR_1_NAME
ORDER BY
Total_Cell_Lines DESC;
# 5) What is the max LN_IC50 for drugs with MIN_CONC below 0.001?
SELECT
MAX(LN_IC50) AS Max_IC50
FROM
drug_sensitivity
WHERE
MIN_CONC < 0.001;
# 6) Which Cancer Type has the highest average Z_SCORE across all drugs?
SELECT
c.CANCER_TYPE,
AVG(ds.Z_SCORE) AS Avg_Z_Score
FROM
drug_sensitivity ds
JOIN
cell_lines c ON ds.COSMIC_ID = c.COSMIC_ID
GROUP BY
c.CANCER_TYPE
ORDER BY
Avg_Z_Score DESC
LIMIT 1;
# 7) What is the max-min of LN_IC50 values for drugs targeting DNA replication?
SELECT
(MAX(ds.LN_IC50) - MIN(ds.LN_IC50)) AS IC50_Range
FROM
drug_sensitivity ds
JOIN
drug_targets dt ON ds.DRUG_ID = dt.DRUG_ID
JOIN
targets t ON dt.TARGET_ID = t.TARGET_ID
JOIN
targets_pathways tp ON t.TARGET_ID = tp.TARGET_ID
JOIN
pathways p ON tp.PATHWAY_ID = p.PATHWAY_ID
WHERE
p.PATHWAY_NAME LIKE '%DNA replication%';
# 8) How many drugs have an AUC greater than 0.8 in bone tissue cell lines?
SELECT
COUNT(DISTINCT ds.DRUG_ID) AS Drug_Count
FROM
drug_sensitivity ds
JOIN
cell_lines c ON ds.COSMIC_ID = c.COSMIC_ID
JOIN
gdsc_tissue_descriptor_2 d2 ON c.DESCRIPTOR_2_ID = d2.DESCRIPTOR_2_ID
JOIN
gdsc_tissue_descriptor_1 d1 ON d2.DESCRIPTOR_1_ID = d1.DESCRIPTOR_1_ID
WHERE
d1.DESCRIPTOR_1_NAME LIKE '%bone%'
AND ds.AUC > 0.8;
# 9) What is the average LN_IC50 across all drugs and cell lines?
SELECT AVG(LN_IC50) FROM drug_sensitivity;
# 10) What is the lowest Z_SCORE recorded for Camptothecin?
SELECT
MIN(ds.Z_SCORE) AS Lowest_Z_Score
FROM
drug_sensitivity ds
JOIN
drugs d ON ds.DRUG_ID = d.DRUG_ID
WHERE
d.DRUG_NAME = 'Camptothecin';
# 11) How many unique CELL_LINE_NAMEs are there in the dataset?
SELECT
COUNT(DISTINCT CELL_LINE_NAME) AS Unique_Cell_Lines
FROM
cell_lines;
# 12) What is the total AUC for drugs tested on bone tissue cell lines?
SELECT
SUM(ds.AUC) AS Total_AUC
FROM
drug_sensitivity ds
JOIN
cell_lines c ON ds.COSMIC_ID = c.COSMIC_ID
JOIN
gdsc_tissue_descriptor_2 d2 ON c.DESCRIPTOR_2_ID = d2.DESCRIPTOR_2_ID
JOIN
gdsc_tissue_descriptor_1 d1 ON d2.DESCRIPTOR_1_ID = d1.DESCRIPTOR_1_ID
WHERE
d1.DESCRIPTOR_1_NAME LIKE '%bone%';