|
1 | 1 | from collections.abc import Callable |
2 | 2 |
|
3 | 3 | from .benchmark_dataset import BenchmarkDataset |
| 4 | +from .go_bio_proc import GOBiologicalProcess |
| 5 | +from .go_cell_comp import GOCellularComponent |
4 | 6 | from .go_mol_func import GOMolecularFunction |
5 | | -from .pcg_essentiality import ( |
6 | | - PCGEssHAP1, |
7 | | - PCGEssHEK293FT, |
8 | | - PCGEssK562, |
9 | | - PCGEssMDA_MB_231, |
10 | | - PCGEssTHP1, |
11 | | - PCGEssShared |
12 | | -) |
13 | | -from .lncrna_essentiality import ( |
14 | | - LNCRNAEssHAP1, |
15 | | - LNCRNAEssHEK293FT, |
16 | | - LNCRNAEssK562, |
17 | | - LNCRNAEssMDA_MB_231, |
18 | | - LNCRNAEssTHP1, |
19 | | - LNCRNAEssShared |
20 | | -) |
21 | 7 | from .rna_hl_human import RNAHalfLifeHuman |
22 | 8 | from .rna_hl_mouse import RNAHalfLifeMouse |
| 9 | +from .rna_loc_fazal import RNALocalizationFazal |
23 | 10 | from .rna_loc_ietswaart import RNALocalizationIetswaart |
| 11 | +from .mrl_hl_lbkwk import MRLHLLBKWK |
24 | 12 | from .prot_loc import ProteinLocalization |
25 | 13 | from .mrl_sugimoto import MRLSugimoto |
26 | 14 | from .mrl_sample import ( |
|
41 | 29 | DATASET_CATALOG: dict[str, Callable[..., BenchmarkDataset]] = { |
42 | 30 | "eclip-binding-k562": eCLIPBindingK562, |
43 | 31 | "eclip-binding-hepg2": eCLIPBindingHepG2, |
| 32 | + "go-bp": GOBiologicalProcess, |
| 33 | + "go-cc": GOCellularComponent, |
44 | 34 | "go-mf": GOMolecularFunction, |
45 | | - "pcg-ess-hap1": PCGEssHAP1, |
46 | | - "pcg-ess-hek293ft": PCGEssHEK293FT, |
47 | | - "pcg-ess-k562": PCGEssK562, |
48 | | - "pcg-ess-mda-mb-231": PCGEssMDA_MB_231, |
49 | | - "pcg-ess-thp1": PCGEssTHP1, |
50 | | - "pcg-ess-shared": PCGEssShared, |
51 | | - "lncrna-ess-hap1": LNCRNAEssHAP1, |
52 | | - "lncrna-ess-hek293ft": LNCRNAEssHEK293FT, |
53 | | - "lncrna-ess-k562": LNCRNAEssK562, |
54 | | - "lncrna-ess-mda-mb-231": LNCRNAEssMDA_MB_231, |
55 | | - "lncrna-ess-thp1": LNCRNAEssTHP1, |
56 | | - "lncrna-ess-shared": LNCRNAEssShared, |
57 | 35 | "rnahl-human": RNAHalfLifeHuman, |
58 | 36 | "rnahl-mouse": RNAHalfLifeMouse, |
| 37 | + "rna-loc-fazal": RNALocalizationFazal, |
59 | 38 | "rna-loc-ietswaart": RNALocalizationIetswaart, |
60 | 39 | "prot-loc": ProteinLocalization, |
| 40 | + "mrl-hl-lbkwk": MRLHLLBKWK, |
61 | 41 | "mrl-sugimoto": MRLSugimoto, |
62 | 42 | "mrl-sample-egfp": MRLSampleEGFP, |
63 | 43 | "mrl-sample-mcherry": MRLSampleMCherry, |
|
80 | 60 | "target_col": eCLIP_HepG2_TOP_RBPS_LIST, |
81 | 61 | "split_type": "homology", |
82 | 62 | }, |
| 63 | + "go-bp": { |
| 64 | + "dataset": "go-bp", |
| 65 | + "task": "multilabel", |
| 66 | + "target_col": "target", |
| 67 | + "split_type": "homology", |
| 68 | + }, |
| 69 | + "go-cc": { |
| 70 | + "dataset": "go-cc", |
| 71 | + "task": "multilabel", |
| 72 | + "target_col": "target", |
| 73 | + "split_type": "homology", |
| 74 | + }, |
83 | 75 | "go-mf": { |
84 | 76 | "dataset": "go-mf", |
85 | 77 | "task": "multilabel", |
86 | 78 | "target_col": "target", |
87 | 79 | "split_type": "homology", |
88 | 80 | }, |
| 81 | + "mrl-hl-lbkwk-hl": { |
| 82 | + "dataset": "mrl-hl-lbkwk", |
| 83 | + "task": "reg_ridge", |
| 84 | + "target_col": "target_in_cell_half_life", |
| 85 | + "split_type": "default", |
| 86 | + }, |
| 87 | + "mrl-hl-lbkwk-mrl": { |
| 88 | + "dataset": "mrl-hl-lbkwk", |
| 89 | + "task": "reg_ridge", |
| 90 | + "target_col": "target_ribosome_load", |
| 91 | + "split_type": "default", |
| 92 | + }, |
89 | 93 | "mrl-sugimoto": { |
90 | 94 | "dataset": "mrl-sugimoto", |
91 | 95 | "task": "reg_ridge", |
|
146 | 150 | "target_col": "target", |
147 | 151 | "split_type": "homology", |
148 | 152 | }, |
| 153 | + "rna-loc-fazal": { |
| 154 | + "dataset": "rna-loc-fazal", |
| 155 | + "task": "multilabel", |
| 156 | + "target_col": "target", |
| 157 | + "split_type": "homology", |
| 158 | + }, |
149 | 159 | "rna-loc-ietswaart": { |
150 | 160 | "dataset": "rna-loc-ietswaart", |
151 | 161 | "task": "multilabel", |
|
165 | 175 | "split_type": "homology", |
166 | 176 | }, |
167 | 177 | } |
168 | | - |
169 | | -for ttype in ["pcg", "lncrna"]: |
170 | | - split_type = "homology" if ttype == "pcg" else "default" |
171 | | - for cell in ["hap1", "hek293ft", "k562", "mda-mb-231", "thp1", "shared"]: |
172 | | - |
173 | | - cell_upper = cell.upper() |
174 | | - |
175 | | - DATASET_INFO[f"{ttype}-ess-{cell}"] = { |
176 | | - "dataset": f"{ttype}-ess-{cell}", |
177 | | - "task": "classification", |
178 | | - "target_col": f"target_essential_{cell_upper}", |
179 | | - "split_type": split_type, |
180 | | - } |
181 | | - |
182 | | - if cell != "shared": |
183 | | - DATASET_INFO[f"{ttype}-ess-{cell}-day14-log2fc"] = { |
184 | | - "dataset": f"{ttype}-ess-{cell}", |
185 | | - "task": "reg_ridge", |
186 | | - "target_col": f"target_day14_log2fc_{cell_upper}", |
187 | | - "split_type": split_type, |
188 | | - } |
0 commit comments