From 773766002bee23d7e248246c4a40e4480583ee39 Mon Sep 17 00:00:00 2001
From: JoneSu1 <sujunhua96@163.com>
Date: Wed, 6 May 2026 16:15:49 +0200
Subject: [PATCH] Add AlphaGenome adapter and multi-track support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add AlphaGenomeAdapter (src/deepISA/modeling/alpha_genome_adapter.py):
  single API call for multi-track/multi-biosample configs, sequence cache,
  probe-based n_tracks, backward-compatible single-track YAML format
- Add configs/ag_config_template.yaml with single- and multi-track examples
- Add notebooks/ag_biosample_reference.csv (714 biosamples × 10 assay types)
- Update tutorial notebook: local paths, AlphaGenome cells, multi-track docs
- Add tests: test_adapter.py, test_attr_filter_pipeline.py
- Update pyproject.toml and modeling __init__.py
---
 configs/ag_config_template.yaml              |  42 ++
 notebooks/ag_biosample_reference.csv         | 715 +++++++++++++++++++
 notebooks/deepISA_tutorial.ipynb             | 174 ++++-
 pyproject.toml                               |   5 +-
 src/deepISA/modeling/__init__.py             |   5 +
 src/deepISA/modeling/alpha_genome_adapter.py | 217 ++++++
 tests/test_adapter.py                        | 284 ++++++++
 tests/test_attr_filter_pipeline.py           | 100 +++
 8 files changed, 1509 insertions(+), 33 deletions(-)
 create mode 100644 configs/ag_config_template.yaml
 create mode 100644 notebooks/ag_biosample_reference.csv
 create mode 100644 src/deepISA/modeling/alpha_genome_adapter.py
 create mode 100644 tests/test_adapter.py
 create mode 100644 tests/test_attr_filter_pipeline.py

diff --git a/configs/ag_config_template.yaml b/configs/ag_config_template.yaml
new file mode 100644
index 0000000..144f664
--- /dev/null
+++ b/configs/ag_config_template.yaml
@@ -0,0 +1,42 @@
+# AlphaGenome backend config for deepISA
+# ─────────────────────────────────────────────────────────────────────────────
+# Usage:
+#   from deepISA.modeling.alpha_genome_adapter import AlphaGenomeAdapter
+#   adapter = AlphaGenomeAdapter("configs/ag_config_template.yaml")
+#
+# Browse available biosamples and assay types:
+#   notebooks/ag_biosample_reference.csv   (714 biosamples × 10 assay types)
+#
+# ─────────────────────────────────────────────────────────────────────────────
+# Option A — Single track  (most common, backward-compatible format)
+# ─────────────────────────────────────────────────────────────────────────────
+api_key: YOUR_API_KEY_HERE     # AlphaGenome API key
+output_type: DNASE             # DNASE | ATAC | CAGE | RNA_SEQ | CHIP_TF | CHIP_HISTONE | PROCAP
+biosample_name: GM12878        # exact string from ag_biosample_reference.csv
+
+# Optional — defaults shown:
+context_len: 16384             # AlphaGenome input length (do not change)
+seq_len: 600                   # deepISA region length (match your training)
+aggregation: sum               # how to aggregate positions in the 600bp window
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Option B — Multi-track  (combine assays / cell lines; still ONE API call)
+# ─────────────────────────────────────────────────────────────────────────────
+# Uncomment below and remove / comment out the single-track keys above.
+#
+# api_key: YOUR_API_KEY_HERE
+# context_len: 16384
+# seq_len: 600
+# aggregation: sum
+# tracks:
+#   - output_type: DNASE
+#     biosample_name: GM12878        # col 0 in output → isa_t0
+#   - output_type: CAGE
+#     biosample_name: GM12878        # cols 1-2       → isa_t1, isa_t2
+#   - output_type: ATAC
+#     biosample_name: K562           # col 3          → isa_t3
+#
+# n_tracks = sum of tracks for each (output_type, biosample) pair.
+# Use adapter.n_tracks to get the value at runtime.
+# Use calc_coop_score(..., track_idx=i) to analyse each track separately.
diff --git a/notebooks/ag_biosample_reference.csv b/notebooks/ag_biosample_reference.csv
new file mode 100644
index 0000000..ae8b89d
--- /dev/null
+++ b/notebooks/ag_biosample_reference.csv
@@ -0,0 +1,715 @@
+biosample_name,CHIP_TF,CHIP_HISTONE,SPLICE_SITE_USAGE,RNA_SEQ,CAGE,SPLICE_JUNCTIONS,DNASE,ATAC,CONTACT_MAPS,PROCAP
+22Rv1,1,1,0,0,0,0,0,0,0,0
+A172,0,0,2,2,2,1,1,0,0,0
+A375,0,0,2,2,0,1,0,0,0,0
+A549,33,4,2,2,2,1,1,1,0,0
+A673,1,3,2,2,0,1,1,0,0,2
+ACC112,0,7,0,0,0,0,0,0,0,0
+ACHN,0,0,0,0,0,0,1,0,0,0
+AG04449,1,1,0,0,0,0,1,0,0,0
+AG04450,1,4,2,2,0,1,1,0,0,0
+AG08395,0,0,0,0,0,0,1,0,0,0
+AG08396,0,0,0,0,0,0,1,0,0,0
+AG09309,1,1,0,0,0,0,1,0,0,0
+AG09319,1,1,0,0,0,0,1,0,0,0
+AG10803,1,1,0,0,0,0,1,0,0,0
+AG20443,0,0,0,0,0,0,1,0,0,0
+Ammon's horn,0,0,2,1,2,1,1,0,0,0
+B cell,1,10,4,4,0,2,1,1,0,0
+BE2C,1,4,2,1,0,1,1,0,0,0
+BJ,1,3,2,2,0,1,1,0,0,0
+BLaER1,1,9,2,2,0,1,0,0,0,0
+C1 segment of cervical spinal cord,0,0,2,1,0,1,0,0,0,0
+C4-2B,2,1,0,0,0,0,0,0,0,0
+CD14-positive monocyte,1,11,2,3,0,1,1,0,0,0
+"CD14-positive, CD16-negative classical monocyte",0,0,0,0,2,0,0,0,0,0
+"CD14-positive, CD16-positive monocyte",0,0,0,0,4,0,0,0,0,0
+"CD4-positive, CD25-positive, alpha-beta regulatory T cell",0,6,2,3,0,1,1,1,0,0
+"CD4-positive, alpha-beta T cell",1,6,4,3,0,2,1,1,0,0
+"CD4-positive, alpha-beta memory T cell",0,4,4,3,0,2,1,0,0,0
+"CD8-positive, alpha-beta T cell",1,7,4,3,0,2,1,1,0,0
+"CD8-positive, alpha-beta memory T cell",0,4,4,3,0,2,1,0,0,0
+CMK,0,0,0,0,0,0,1,0,0,0
+COLO829,0,0,0,0,0,0,1,0,0,0
+Caco-2,1,2,2,2,2,1,1,0,0,2
+Caki2,0,0,2,2,0,1,1,0,0,0
+Calu3,1,0,2,2,0,1,1,0,0,2
+CyT49,0,0,0,0,0,0,0,0,4,0
+D721Med,0,0,0,0,0,0,1,0,0,0
+DND-41,1,9,0,0,0,0,1,1,0,0
+DOHH2,1,9,0,0,0,0,0,0,0,0
+DU 145,0,0,0,0,2,0,0,0,0,0
+Daoy,0,0,2,2,0,1,1,0,0,0
+EH,0,0,0,0,0,0,1,0,0,0
+EL,0,0,0,0,0,0,1,0,0,0
+ELF-1,0,0,0,0,0,0,1,0,0,0
+ELR,0,0,0,0,0,0,1,0,0,0
+ES-I3,0,3,0,0,0,0,0,0,0,0
+G401,0,0,2,2,2,1,1,0,0,0
+GM03348,0,0,0,0,0,0,1,0,0,0
+GM04503,0,0,0,0,0,0,1,0,0,0
+GM04504,0,0,0,0,0,0,1,0,0,0
+GM06990,1,2,0,0,0,0,1,0,0,0
+GM08714,0,1,0,0,0,0,0,0,0,0
+GM10847,1,0,0,0,0,0,0,0,0,0
+GM12864,1,1,0,0,0,0,1,0,0,0
+GM12865,0,1,0,0,0,0,1,0,0,0
+GM12872,1,0,0,0,0,0,0,0,0,0
+GM12873,1,0,0,0,0,0,0,0,0,0
+GM12874,1,0,0,0,0,0,0,0,0,0
+GM12875,1,1,0,0,0,0,0,0,0,0
+GM12878,101,8,4,5,2,2,1,1,1,0
+GM12891,6,0,2,1,0,1,1,0,0,0
+GM12892,4,0,2,1,0,1,0,0,0,0
+GM13976,0,0,0,0,0,0,1,0,0,0
+GM15510,1,0,0,0,0,0,0,0,0,0
+GM18498,0,0,0,0,0,0,0,1,0,0
+GM18499,0,0,0,0,0,0,0,1,0,0
+GM18505,0,0,0,0,0,0,0,1,0,0
+GM18508,0,0,0,0,0,0,0,1,0,0
+GM18511,0,0,0,0,0,0,0,1,0,0
+GM18517,0,0,0,0,0,0,0,1,0,0
+GM18519,0,0,0,0,0,0,0,1,0,0
+GM18520,0,0,0,0,0,0,0,1,0,0
+GM18526,1,0,0,0,0,0,0,0,0,0
+GM18858,0,0,0,0,0,0,0,1,0,0
+GM18861,0,0,0,0,0,0,0,1,0,0
+GM18867,0,0,0,0,0,0,0,1,0,0
+GM18868,0,0,0,0,0,0,0,1,0,0
+GM18870,0,0,0,0,0,0,0,1,0,0
+GM18873,0,0,0,0,0,0,0,1,0,0
+GM18907,0,0,0,0,0,0,0,1,0,0
+GM18951,1,0,0,0,0,0,0,0,0,0
+GM19023,0,0,0,0,0,0,0,1,0,0
+GM19025,0,0,0,0,0,0,0,1,0,0
+GM19035,0,0,0,0,0,0,0,1,0,0
+GM19043,0,0,0,0,0,0,0,1,0,0
+GM19099,1,0,0,0,0,0,0,0,0,0
+GM19193,1,0,0,0,0,0,0,0,0,0
+GM19238,0,0,0,0,0,0,1,0,0,0
+GM19239,0,0,0,0,0,0,1,0,0,0
+GM19240,0,0,0,0,0,0,1,0,0,0
+GM19324,0,0,0,0,0,0,0,1,0,0
+GM19328,0,0,0,0,0,0,0,1,0,0
+GM19351,0,0,0,0,0,0,0,1,0,0
+GM19372,0,0,0,0,0,0,0,1,0,0
+GM19395,0,0,0,0,0,0,0,1,0,0
+GM19397,0,0,0,0,0,0,0,1,0,0
+GM19438,0,0,0,0,0,0,0,1,0,0
+GM19452,0,0,0,0,0,0,0,1,0,0
+GM19455,0,0,0,0,0,0,0,1,0,0
+GM19463,0,0,0,0,0,0,0,1,0,0
+GM19467,0,0,0,0,0,0,0,1,0,0
+GM20000,0,0,0,0,0,0,1,0,0,0
+GM21360,0,0,0,0,0,0,0,1,0,0
+GM21367,0,0,0,0,0,0,0,1,0,0
+GM21381,0,0,0,0,0,0,0,1,0,0
+GM21390,0,0,0,0,0,0,0,1,0,0
+GM21423,0,0,0,0,0,0,0,1,0,0
+GM21447,0,0,0,0,0,0,0,1,0,0
+GM21515,0,0,0,0,0,0,0,1,0,0
+GM21526,0,0,0,0,0,0,0,1,0,0
+GM21528,0,0,0,0,0,0,0,1,0,0
+GM21529,0,0,0,0,0,0,0,1,0,0
+GM21576,0,0,0,0,0,0,0,1,0,0
+GM21619,0,0,0,0,0,0,0,1,0,0
+GM21717,0,0,0,0,0,0,0,1,0,0
+GM21723,0,0,0,0,0,0,0,1,0,0
+GM21737,0,0,0,0,0,0,0,1,0,0
+GM21786,0,0,0,0,0,0,0,1,0,0
+GM21825,0,0,0,0,0,0,0,1,0,0
+GM23248,1,10,4,3,0,2,1,0,0,0
+GM23338,10,9,4,3,0,2,1,1,0,0
+GM25256,0,0,0,0,0,0,0,0,2,0
+H1,39,14,4,5,0,2,1,0,0,0
+H1-hESC,0,0,0,0,0,0,0,0,6,0
+H4,0,0,2,2,0,1,1,0,0,0
+H54,2,0,0,0,0,0,0,0,0,0
+H7,0,3,2,2,0,1,1,0,0,0
+H9,1,6,2,2,0,1,1,0,6,0
+HAP-1,0,4,0,0,0,0,1,0,0,0
+HCEC 1CT,0,0,0,0,0,0,1,0,0,0
+HCT116,25,8,4,3,0,2,1,1,2,0
+HEK293,106,3,0,0,2,0,0,0,0,0
+HEK293T,5,0,0,0,0,0,0,0,0,0
+HFF-Myc,0,1,0,0,0,0,1,0,0,0
+HFFc6,1,0,2,2,0,1,1,0,2,0
+HG02571,0,0,0,0,0,0,0,1,0,0
+HG02588,0,0,0,0,0,0,0,1,0,0
+HG02610,0,0,0,0,0,0,0,1,0,0
+HG02623,0,0,0,0,0,0,0,1,0,0
+HG02678,0,0,0,0,0,0,0,1,0,0
+HG02759,0,0,0,0,0,0,0,1,0,0
+HG02763,0,0,0,0,0,0,0,1,0,0
+HG02840,0,0,0,0,0,0,0,1,0,0
+HG02852,0,0,0,0,0,0,0,1,0,0
+HG02870,0,0,0,0,0,0,0,1,0,0
+HG02884,0,0,0,0,0,0,0,1,0,0
+HG02938,0,0,0,0,0,0,0,1,0,0
+HG02943,0,0,0,0,0,0,0,1,0,0
+HG02970,0,0,0,0,0,0,0,1,0,0
+HG02973,0,0,0,0,0,0,0,1,0,0
+HG02981,0,0,0,0,0,0,0,1,0,0
+HG03025,0,0,0,0,0,0,0,1,0,0
+HG03039,0,0,0,0,0,0,0,1,0,0
+HG03045,0,0,0,0,0,0,0,1,0,0
+HG03060,0,0,0,0,0,0,0,1,0,0
+HG03064,0,0,0,0,0,0,0,1,0,0
+HG03066,0,0,0,0,0,0,0,1,0,0
+HG03095,0,0,0,0,0,0,0,1,0,0
+HG03097,0,0,0,0,0,0,0,1,0,0
+HG03103,0,0,0,0,0,0,0,1,0,0
+HG03108,0,0,0,0,0,0,0,1,0,0
+HG03135,0,0,0,0,0,0,0,1,0,0
+HG03139,0,0,0,0,0,0,0,1,0,0
+HG03159,0,0,0,0,0,0,0,1,0,0
+HG03175,0,0,0,0,0,0,0,1,0,0
+HG03196,0,0,0,0,0,0,0,1,0,0
+HG03280,0,0,0,0,0,0,0,1,0,0
+HG03342,0,0,0,0,0,0,0,1,0,0
+HG03378,0,0,0,0,0,0,0,1,0,0
+HG03432,0,0,0,0,0,0,0,1,0,0
+HG03439,0,0,0,0,0,0,0,1,0,0
+HG03442,0,0,0,0,0,0,0,1,0,0
+HG03457,0,0,0,0,0,0,0,1,0,0
+HG03460,0,0,0,0,0,0,0,1,0,0
+HG03469,0,0,0,0,0,0,0,1,0,0
+HG03520,0,0,0,0,0,0,0,1,0,0
+HG03521,0,0,0,0,0,0,0,1,0,0
+HG03565,0,0,0,0,0,0,0,1,0,0
+HG03571,0,0,0,0,0,0,0,1,0,0
+HK-2,0,0,0,0,0,0,1,0,0,0
+HL-60,4,2,0,0,2,0,1,0,0,0
+HS-27A,0,0,0,0,0,0,1,0,0,0
+HS-5,0,0,0,0,0,0,1,0,0,0
+HT-29,0,0,2,2,0,1,1,0,0,0
+HT1080,0,0,4,4,2,2,1,0,0,0
+HTR-8/SVneo,0,0,0,0,0,0,1,0,0,0
+HUES48,0,7,0,0,0,0,0,0,0,0
+HUES6,0,7,0,0,0,0,0,0,0,0
+HUES64,0,7,2,2,0,1,0,0,0,0
+HeLa-S3,40,9,2,3,2,1,1,0,1,0
+HepG2,539,10,4,5,2,2,1,1,1,0
+HuH-7,0,0,0,0,0,0,1,0,0,0
+HuH-7.5,0,0,0,0,0,0,1,0,0,0
+IMR-90,12,26,4,4,0,2,1,1,1,0
+IgD-negative memory B cell,0,0,2,1,0,1,0,0,0,0
+Ishikawa,16,0,0,0,0,0,0,0,0,0
+Jurkat,0,0,0,0,2,0,0,0,0,0
+"Jurkat, Clone E6-1",0,1,2,1,0,1,1,0,0,0
+K562,306,10,4,5,2,2,1,1,0,2
+KBM-7,0,0,0,0,0,0,1,0,1,0
+KMS-11,1,6,0,0,0,0,0,0,0,0
+KOPT-K1,0,3,0,0,0,0,0,0,0,0
+Karpas-422,0,9,2,2,0,1,1,0,0,0
+L1-S8,0,0,0,0,0,0,1,0,0,0
+L1-S8R,0,0,0,0,0,0,1,0,0,0
+LHCN-M2,0,0,4,3,0,2,1,0,0,0
+LNCAP,1,0,0,0,0,0,0,0,0,0
+LNCaP clone FGC,1,1,0,0,0,0,1,0,0,0
+Langerhans cell,0,0,0,0,2,0,0,0,0,0
+LoVo,0,0,0,0,0,0,1,0,0,0
+Loucy,1,7,0,0,0,0,0,0,0,0
+M059J,0,0,2,2,0,1,1,0,0,0
+MCF 10A,1,0,2,2,0,1,1,0,0,2
+MCF-7,71,8,4,5,0,2,1,1,0,0
+MG63,0,2,2,2,0,1,1,0,0,0
+MM.1S,1,9,0,0,0,0,1,0,0,0
+NAMALWA,0,0,0,0,0,0,1,0,0,0
+NB4,4,1,0,0,0,0,1,0,0,0
+NCI-H226,0,0,0,0,2,0,1,0,0,0
+NCI-H460,0,0,4,4,2,2,1,0,0,0
+NCI-H929,0,8,0,0,0,0,1,1,0,0
+NT2/D1,1,6,0,0,0,0,1,0,0,0
+OCI-LY1,2,7,0,0,0,0,0,0,0,0
+OCI-LY3,1,10,0,0,0,0,0,0,0,0
+OCI-LY7,1,7,2,2,0,1,1,0,0,0
+PC-3,1,9,2,2,2,1,1,1,0,0
+PC-9,1,12,2,2,0,1,1,0,0,0
+PFSK-1,4,0,2,1,0,1,0,0,0,0
+Panc1,5,4,4,3,0,2,1,1,0,0
+Peyer's patch,2,5,4,3,0,2,1,0,0,0
+Purkinje cell,0,0,2,1,0,1,0,0,0,0
+RCC,0,0,0,0,0,0,1,0,0,0
+RCC 7860,0,0,0,0,0,0,1,0,0,0
+RKO,0,0,0,0,0,0,1,0,0,0
+RPMI7951,0,0,2,2,0,1,1,0,0,0
+RPMI8226,0,0,0,0,0,0,1,0,0,0
+RWPE1,1,1,0,0,0,0,1,0,0,0
+RWPE2,1,3,0,0,0,0,1,1,0,0
+Raji,1,0,0,0,2,0,0,0,0,0
+Right ventricle myocardium inferior,0,0,2,2,0,1,1,1,0,0
+Right ventricle myocardium superior,0,0,2,2,0,1,1,1,0,0
+SH-SY5Y,2,0,0,0,0,0,0,0,0,0
+SJCRH30,0,4,2,2,0,1,1,0,0,0
+SJSA1,0,4,2,2,0,1,1,0,0,0
+SK-MEL-5,0,0,4,4,0,2,1,0,0,0
+SK-N-DZ,0,0,2,2,0,1,1,0,0,0
+SK-N-MC,1,5,0,0,2,0,1,0,0,0
+SK-N-SH,31,10,2,3,0,1,1,0,0,0
+SU-DHL-6,0,6,0,0,0,0,0,0,0,0
+SW480,0,0,0,0,0,0,1,0,0,0
+Schwann cell,0,0,0,0,2,0,0,0,0,0
+Sertoli cell,0,0,0,0,2,0,0,0,0,0
+T follicular helper cell,0,0,0,0,0,0,1,0,0,0
+T-cell,0,6,4,4,0,2,1,1,0,0
+T-helper 1 cell,0,0,0,0,0,0,1,0,0,0
+T-helper 17 cell,0,0,0,2,0,0,1,1,0,0
+T-helper 2 cell,0,0,0,0,0,0,1,0,0,0
+T-helper 22 cell,0,0,0,0,0,0,1,0,0,0
+T-helper 9 cell,0,0,0,0,0,0,1,0,0,0
+T47D,1,0,0,0,0,0,1,0,0,0
+THP-1,0,0,0,0,2,0,0,0,0,0
+U-87 MG,0,0,2,1,0,1,0,0,0,0
+UCSF-4,0,5,0,0,0,0,0,0,0,0
+VCaP,1,1,0,0,0,0,0,0,0,0
+WERI-Rb-1,1,4,0,0,0,0,1,0,0,0
+WI38,1,1,0,0,0,0,0,0,0,0
+WTC11,34,0,2,3,0,1,1,1,0,0
+acinar cell of salivary gland,0,0,0,0,2,0,0,0,0,0
+acinar cell of sebaceous gland,0,0,0,0,2,0,0,0,0,0
+adipocyte,0,6,0,0,0,0,1,0,0,0
+adipocyte of breast,0,0,0,0,2,0,0,0,0,0
+adipocyte of omentum tissue,0,0,0,0,2,0,0,0,0,0
+adipose tissue,0,0,2,2,2,1,0,0,0,0
+adrenal gland,2,6,6,6,2,3,1,1,0,0
+adult organism,0,0,0,0,2,0,0,0,0,0
+airway epithelial cell,0,0,2,2,0,1,0,0,0,0
+amnion,0,3,2,2,2,1,0,0,0,0
+amnion mesenchymal stem cell,0,0,0,0,2,0,0,0,0,0
+amniotic epithelial cell,0,0,0,0,2,0,1,0,0,0
+amygdala,0,0,2,1,2,1,0,0,0,0
+angular gyrus,0,5,0,0,0,0,0,0,0,0
+annulus pulposus cell,0,0,0,0,2,0,0,0,0,0
+anterior cingulate cortex,0,0,2,1,0,1,0,0,0,0
+anterior lingual gland,0,0,2,1,0,1,0,0,0,0
+aorta,0,3,4,4,2,2,1,0,0,0
+aortic endothelial cell,0,0,0,0,2,0,0,0,0,0
+aortic smooth muscle cell,0,0,2,2,2,1,0,0,0,0
+artery,0,0,0,0,2,0,0,0,0,0
+articular chondrocyte of knee joint,0,0,2,2,0,1,0,0,0,0
+ascending aorta,1,5,4,3,0,2,1,0,0,0
+astrocyte,2,10,2,2,0,1,1,0,0,0
+astrocyte of the cerebellum,0,1,0,0,2,0,1,0,0,0
+astrocyte of the cerebral cortex,0,0,0,0,2,0,0,0,0,0
+astrocyte of the hippocampus,0,0,0,0,0,0,1,0,0,0
+astrocyte of the spinal cord,1,1,0,0,0,0,1,0,0,0
+basophil,0,0,0,0,4,0,0,0,0,0
+bile duct,0,0,0,0,0,0,0,1,0,0
+bladder microvascular endothelial cell,0,0,2,2,0,1,0,0,0,0
+blood,0,0,0,0,2,0,0,0,0,0
+body of pancreas,2,3,4,3,0,2,1,1,0,0
+bone marrow,0,0,0,0,2,0,0,0,0,0
+bone marrow cell,0,0,0,0,2,0,0,0,0,0
+brain,1,5,2,2,2,1,1,0,0,0
+brain microvascular endothelial cell,1,3,0,0,0,0,1,0,0,0
+brain pericyte,0,0,0,0,0,0,1,0,0,0
+breast,0,0,0,0,2,0,0,0,0,0
+breast epithelium,3,5,4,3,0,2,1,1,0,0
+bronchial epithelial cell,1,2,2,2,2,1,1,0,0,0
+bronchial smooth muscle cell,0,0,2,2,2,1,0,0,0,0
+bronchus fibroblast of lung,0,0,2,2,0,1,0,0,0,0
+calcaneal tendon,0,0,0,0,2,0,0,0,0,0
+camera-type eye,0,0,2,2,2,1,0,0,0,0
+capillary endothelial cell,0,0,0,0,2,0,0,0,0,0
+cardiac atrium fibroblast,0,0,2,2,0,1,0,0,0,0
+cardiac fibroblast,1,1,0,0,2,0,1,0,0,0
+cardiac mesenchymal cell,0,0,0,0,2,0,0,0,0,0
+cardiac muscle cell,1,5,4,3,2,2,1,0,0,0
+cardiac septum,0,0,2,2,0,1,1,1,0,0
+cardiac ventricle fibroblast,0,0,2,2,0,1,0,0,0,0
+caudate nucleus,0,5,2,1,2,1,0,0,0,0
+cell of skeletal muscle,0,0,0,0,2,0,0,0,0,0
+"central memory CD4-positive, alpha-beta T cell",0,0,0,0,0,0,1,0,0,0
+"central memory CD8-positive, alpha-beta T cell",0,0,0,0,0,0,1,1,0,0
+central nervous system pericyte,0,0,0,0,2,0,0,0,0,0
+cerebellar cortex,0,0,0,0,0,0,1,0,0,0
+cerebellar hemisphere,0,0,2,1,0,1,0,0,0,0
+cerebellum,0,1,6,5,2,3,1,0,0,0
+cerebrospinal fluid,0,0,0,0,2,0,0,0,0,0
+chondrocyte,1,0,2,2,2,1,1,0,0,0
+chorion,0,4,2,2,0,1,1,0,0,0
+chorionic villus,0,1,2,2,0,1,0,0,0,0
+choroid plexus epithelial cell,1,1,0,0,0,0,1,0,0,0
+cingulate gyrus,0,5,0,0,0,0,0,0,0,0
+colon,0,0,0,0,2,0,0,0,0,0
+colonic mucosa,1,6,2,2,0,1,1,1,0,0
+"common myeloid progenitor, CD34-positive",0,6,2,1,0,1,1,0,0,0
+corneal epithelial cell,0,0,0,0,2,0,0,0,0,0
+coronary artery,1,2,2,1,0,1,1,0,0,0
+corpus callosum,0,0,0,0,2,0,0,0,0,0
+cortex of kidney,0,0,2,1,0,1,0,0,0,0
+cranial nerve II,0,0,0,0,2,0,0,0,0,0
+cruciate ligament of knee,0,0,0,0,2,0,0,0,0,0
+dark melanocyte,0,0,0,0,2,0,0,0,0,0
+dermis blood vessel endothelial cell,0,0,2,2,0,1,1,0,0,0
+dermis lymphatic vessel endothelial cell,0,0,2,2,0,1,0,0,0,0
+dermis microvascular lymphatic vessel endothelial cell,0,0,2,2,0,1,1,0,0,0
+diaphragm,0,0,0,0,2,0,0,0,0,0
+diencephalon,0,0,2,2,2,1,0,0,0,0
+dorsal plus ventral thalamus,0,0,0,0,2,0,0,0,0,0
+dorsolateral prefrontal cortex,1,6,4,3,0,2,1,0,0,0
+duodenal mucosa,0,7,0,0,0,0,0,0,0,0
+duodenum,0,0,0,0,2,0,0,0,0,0
+dura mater,0,0,0,0,2,0,0,0,0,0
+ecto neural progenitor cell,0,0,0,0,0,0,1,0,0,0
+ectocervix,0,0,2,1,0,1,0,0,0,0
+ectodermal cell,0,4,2,2,0,1,0,0,0,0
+"effector CD4-positive, alpha-beta T cell",0,0,0,0,0,0,1,0,0,0
+"effector memory CD4-positive, alpha-beta T cell",0,3,0,0,0,0,1,0,0,0
+"effector memory CD8-positive, alpha-beta T cell",0,0,0,0,0,0,1,1,0,0
+egg chorion,0,0,0,0,2,0,0,0,0,0
+embryonic uterus,0,0,0,0,2,0,0,0,0,0
+endocervix,0,0,2,1,0,1,0,0,0,0
+endocrine pancreas,0,5,2,2,0,1,0,0,0,0
+endodermal cell,1,6,4,4,0,2,1,0,0,0
+endothelial cell,1,0,2,2,0,1,1,0,0,0
+endothelial cell of artery,0,0,0,0,2,0,0,0,0,0
+endothelial cell of coronary artery,0,0,2,2,0,1,0,0,0,0
+endothelial cell of lymphatic vessel,0,0,0,0,2,0,0,0,0,0
+endothelial cell of umbilical vein,8,9,4,5,2,2,1,0,0,2
+enteric smooth muscle cell,0,0,0,0,2,0,0,0,0,0
+eosinophil,0,0,0,0,2,0,0,0,0,0
+epidermal melanocyte,0,0,0,0,0,0,1,0,0,0
+epididymis,0,0,0,0,2,0,0,0,0,0
+epithelial cell of Malassez,0,0,0,0,4,0,0,0,0,0
+epithelial cell of alveolus of lung,0,0,2,2,0,1,0,0,0,0
+epithelial cell of esophagus,0,1,0,0,2,0,1,0,0,0
+epithelial cell of prostate,1,1,0,0,2,0,1,0,0,0
+epithelial cell of proximal tubule,1,1,2,2,2,1,1,0,0,0
+epithelial cell of umbilical artery,0,0,2,2,0,1,0,0,0,0
+erythroblast,1,0,0,0,0,0,0,0,0,0
+esophagus,0,5,2,2,2,1,0,0,0,0
+esophagus mucosa,0,0,2,2,0,1,0,1,0,0
+esophagus muscularis mucosa,3,6,4,3,0,2,0,0,0,0
+esophagus squamous epithelium,2,6,4,3,0,2,0,0,0,0
+eye,0,0,0,0,0,0,1,0,0,0
+fallopian tube,0,0,2,1,0,1,0,1,0,0
+fat cell,0,0,0,0,2,0,0,0,0,0
+female gonad,0,0,0,0,2,0,0,0,0,0
+femur,0,0,0,0,0,0,1,0,0,0
+fibroblast derived cell line,0,0,2,1,0,1,0,0,0,0
+fibroblast of breast,0,4,0,0,0,0,0,0,0,0
+fibroblast of choroid plexus,0,0,0,0,2,0,0,0,0,0
+fibroblast of dermis,1,7,2,2,2,1,1,0,0,0
+fibroblast of gingiva,0,0,0,0,2,0,1,0,0,0
+fibroblast of lung,1,8,4,5,0,2,1,0,0,0
+fibroblast of lymphatic vessel,0,0,0,0,2,0,0,0,0,0
+fibroblast of mammary gland,1,1,0,0,2,0,1,0,0,0
+fibroblast of peridontal ligament,0,0,0,0,0,0,1,0,0,0
+fibroblast of periodontium,0,0,0,0,2,0,0,0,0,0
+fibroblast of pulmonary artery,1,1,0,0,2,0,1,0,0,0
+fibroblast of skin of abdomen,0,0,2,1,0,1,1,0,0,0
+fibroblast of skin of back,0,0,2,1,0,1,1,0,0,0
+fibroblast of skin of left biceps,0,0,0,0,0,0,1,0,0,0
+fibroblast of skin of left quadriceps,0,0,0,0,0,0,1,0,0,0
+fibroblast of skin of right biceps,0,0,0,0,0,0,1,0,0,0
+fibroblast of skin of right quadriceps,0,0,0,0,0,0,1,0,0,0
+fibroblast of skin of scalp,0,0,2,1,0,1,1,0,0,0
+fibroblast of the aortic adventitia,1,1,2,2,2,1,1,0,0,0
+fibroblast of the conjuctiva,0,0,0,0,2,0,0,0,0,0
+fibroblast of the conjunctiva,0,0,0,0,0,0,1,0,0,0
+fibroblast of upper back skin,0,0,0,0,0,0,1,0,0,0
+fibroblast of villous mesenchyme,1,1,2,2,2,1,1,0,0,0
+forelimb muscle,0,0,2,1,0,1,1,0,0,0
+foreskin fibroblast,1,6,2,2,0,1,1,0,0,0
+foreskin keratinocyte,1,7,4,4,0,2,1,1,0,0
+foreskin melanocyte,0,7,2,2,0,1,1,0,0,0
+frontal cortex,0,0,4,3,2,2,1,0,0,0
+fungiform papilla,0,0,0,0,2,0,0,0,0,0
+gallbladder,0,0,0,0,2,0,0,0,0,0
+gamma-delta T cell,0,0,0,0,2,0,0,0,0,0
+gastrocnemius medialis,2,5,4,3,0,2,1,1,0,0
+gastroesophageal sphincter,4,6,4,3,0,2,0,1,0,0
+germinal matrix,0,5,0,0,0,0,0,0,0,0
+gingival epithelial cell,0,0,0,0,2,0,0,0,0,0
+globus pallidus,0,0,0,0,2,0,1,0,0,0
+glomerular endothelial cell,0,0,2,2,2,1,0,0,0,0
+glomerular visceral epithelial cell,0,0,0,0,0,0,1,0,0,0
+glutamatergic neuron,5,0,2,2,0,1,1,0,0,0
+hair follicle dermal papilla cell,0,0,2,2,0,1,0,0,0,0
+hair follicular keratinocyte,0,0,2,2,0,1,0,0,0,0
+head of caudate nucleus,0,0,0,0,0,0,1,0,0,0
+heart,0,4,4,5,2,2,1,0,0,0
+heart left ventricle,2,6,4,4,2,2,1,1,0,0
+heart right ventricle,1,6,4,4,0,2,1,1,0,0
+hematopoietic multipotent progenitor cell,0,0,2,2,0,1,1,0,0,0
+hepatic mesenchymal stem cell,0,0,0,0,2,0,0,0,0,0
+hepatic stellate cell,0,0,0,0,0,0,1,0,0,0
+hepatocyte,1,7,4,3,2,2,1,0,0,0
+hindlimb muscle,0,0,2,1,0,1,1,0,0,0
+hypothalamus,0,0,2,1,0,1,0,0,0,0
+iPS DF 19.11,0,4,0,0,0,0,0,0,0,0
+iPS DF 6.9,0,2,0,0,0,0,1,0,0,0
+iPS-11a,0,1,0,0,0,0,0,0,0,0
+iPS-15b,0,5,0,0,0,0,0,0,0,0
+iPS-18a,0,6,0,0,0,0,0,0,0,0
+iPS-18c,0,2,0,0,0,0,0,0,0,0
+iPS-20b,0,7,0,0,0,0,0,0,0,0
+iPS-NIHi11,0,0,0,0,0,0,1,0,0,0
+immature CD1a-positive Langerhans cell,0,0,0,0,2,0,0,0,0,0
+immature conventional dendritic cell,0,0,0,0,4,0,0,0,0,0
+immature natural killer cell,0,0,2,1,0,1,1,0,0,0
+inferior rectus extraocular muscle,0,0,0,0,2,0,0,0,0,0
+inflammatory macrophage,0,0,0,0,0,0,1,0,0,0
+insula,0,0,0,0,2,0,0,0,0,0
+intestinal epithelial cell,0,0,0,0,2,0,0,0,0,0
+iris pigment epithelial cell,0,0,0,0,2,0,1,0,0,0
+islet precursor cell,0,0,0,0,0,0,1,0,0,0
+keratinocyte,2,8,2,3,2,1,1,0,0,0
+keratocyte,0,0,0,0,2,0,0,0,0,0
+kidney,0,5,4,5,2,2,1,1,0,0
+kidney capillary endothelial cell,0,0,0,0,0,0,1,0,0,0
+kidney epithelial cell,1,2,2,2,0,1,1,0,0,0
+kidney tubule cell,0,0,0,0,0,0,1,0,0,0
+large intestine,0,6,0,1,0,0,1,0,0,0
+lateral rectus extra-ocular muscle,0,0,0,0,2,0,0,0,0,0
+layer of hippocampus,0,5,0,0,0,0,0,0,0,0
+left cardiac atrium,0,0,2,2,2,1,1,1,0,0
+left colon,0,0,2,2,0,1,1,1,0,0
+left forelimb,0,0,0,0,0,0,1,0,0,0
+left hindlimb,0,0,0,0,0,0,1,0,0,0
+left kidney,0,0,2,1,0,1,1,0,0,0
+left lobe of liver,0,0,2,2,0,1,1,1,0,0
+left lung,1,6,4,3,0,2,1,1,0,0
+left ovary,0,0,0,0,2,0,0,0,0,0
+left renal cortex interstitium,0,0,2,1,0,1,1,0,0,0
+left renal pelvis,0,0,2,1,0,1,1,0,0,0
+left ventricle myocardium,0,0,2,1,0,1,0,0,0,0
+left ventricle myocardium inferior,1,6,2,2,0,1,1,0,0,0
+left ventricle myocardium superior,0,0,2,2,0,1,1,1,0,0
+lens epithelial cell,0,0,0,0,2,0,0,0,0,0
+leptomeningeal cell,0,0,0,0,2,0,0,0,0,0
+light melanocyte,0,0,0,0,2,0,0,0,0,0
+liver,18,6,4,5,2,2,1,1,0,0
+locus ceruleus,0,0,0,0,2,0,0,0,0,0
+lower leg skin,2,0,4,3,0,2,1,0,0,0
+lower lobe of left lung,1,6,2,2,0,1,1,1,0,0
+lower lobe of right lung,1,5,2,2,2,1,1,1,0,0
+luminal epithelial cell of mammary gland,0,4,2,2,0,1,0,0,0,0
+lung,0,7,4,4,2,2,1,1,0,0
+lung fibroblast,0,0,0,0,2,0,0,0,0,0
+lung microvascular endothelial cell,0,0,2,2,0,1,1,0,0,0
+lymph node,0,0,0,0,2,0,0,0,0,0
+lymphoblast,0,0,2,1,0,1,0,0,0,0
+macrophage,0,0,0,0,2,0,0,0,0,0
+mammary epithelial cell,2,10,4,4,0,2,1,0,0,0
+mammary gland epithelial cell,0,0,0,0,2,0,0,0,0,0
+mammary microvascular endothelial cell,0,0,2,2,0,1,0,0,0,0
+mammary stem cell,0,0,2,2,0,1,0,0,0,0
+mast cell,0,0,0,0,2,0,0,0,0,0
+medial rectus extraocular muscle,0,0,0,0,2,0,0,0,0,0
+medulla oblongata,0,0,0,0,2,0,0,0,0,0
+melanocyte of skin,0,0,2,2,0,1,0,0,0,0
+memory B cell,0,0,0,0,0,0,1,1,0,0
+meninx,0,0,0,0,2,0,0,0,0,0
+mesangial cell,0,0,2,2,2,1,0,0,0,0
+mesenchymal cell,0,0,0,0,2,0,0,0,0,0
+mesenchymal stem cell,0,14,2,2,0,1,1,0,0,0
+mesenchymal stem cell of Wharton's jelly,0,0,2,2,2,1,0,0,0,0
+mesenchymal stem cell of adipose,0,0,2,2,2,1,0,0,0,0
+mesenchymal stem cell of the bone marrow,0,0,2,2,2,1,0,0,0,0
+mesenchymal stem cell of umbilical cord,0,0,0,0,2,0,0,0,0,0
+mesendoderm,0,7,2,2,0,1,1,0,0,0
+mesenteric fat pad,0,0,2,2,0,1,1,1,0,0
+mesodermal cell,0,5,2,2,0,1,0,0,0,0
+mesothelial cell,0,0,0,0,4,0,0,0,0,0
+mesothelial cell of epicardium,1,0,2,2,0,1,1,0,0,0
+metanephros,0,0,2,2,0,1,0,0,0,0
+middle frontal gyrus,0,0,0,0,2,0,0,0,0,0
+middle temporal gyrus,0,0,0,0,2,0,0,0,0,0
+mitral valve,0,0,0,0,2,0,0,0,0,0
+mole,0,0,0,2,0,0,0,0,0,0
+monocyte,0,0,0,0,2,0,0,0,0,0
+mononuclear cell,0,3,2,2,0,1,0,0,0,0
+motor neuron,0,3,2,1,0,1,0,1,0,0
+mouth mucosa,0,0,0,0,2,0,0,0,0,0
+mucosa of descending colon,1,3,2,2,0,1,1,1,0,0
+mucosa of gallbladder,0,0,2,2,0,1,1,1,0,0
+mucosa of rectum,0,5,0,0,0,0,0,0,0,0
+mucosa of stomach,0,4,0,0,0,0,0,0,0,0
+mucosa of urinary bladder,0,0,0,0,0,0,0,1,0,0
+muscle layer of colon,0,5,0,0,0,0,0,0,0,0
+muscle layer of duodenum,0,5,0,0,0,0,0,0,0,0
+muscle of arm,0,0,2,1,0,1,1,0,0,0
+muscle of back,0,0,2,1,0,1,1,0,0,0
+muscle of leg,0,6,2,3,0,1,1,0,0,0
+muscle of trunk,0,6,2,1,0,1,1,0,0,0
+myelocyte,0,0,0,0,2,0,0,0,0,0
+myocyte,0,0,4,3,0,2,1,0,0,0
+myoepithelial cell of mammary gland,0,6,2,2,0,1,0,0,0,0
+myometrial cell,0,0,2,2,0,1,0,0,0,0
+myotube,0,9,2,2,2,1,1,0,0,0
+nail plate,0,0,0,0,2,0,0,0,0,0
+naive B cell,0,0,2,1,0,1,1,1,0,0
+naive regulatory T cell,0,0,0,0,4,0,0,0,0,0
+"naive thymus-derived CD4-positive, alpha-beta T cell",0,4,4,3,2,2,1,1,0,0
+"naive thymus-derived CD8-positive, alpha-beta T cell",0,0,4,3,0,2,1,1,0,0
+nasal cavity respiratory epithelium epithelial cell of viscerocranial mucosa,0,0,2,2,0,1,0,0,0,0
+natural killer cell,1,5,4,4,2,2,1,1,0,0
+nephron,1,0,0,0,0,0,0,0,0,0
+nephron progenitor cell,1,0,0,0,0,0,1,0,0,0
+nephron tubule epithelial cell,0,0,0,0,2,0,0,0,0,0
+neural cell,5,4,2,2,0,1,0,0,0,0
+neural crest cell,1,0,2,2,0,1,1,0,0,0
+neural progenitor cell,3,9,2,2,0,1,1,0,0,0
+neuron,0,5,0,0,2,0,0,0,0,0
+neuronal stem cell,0,11,2,2,2,1,1,0,0,0
+neurosphere,0,6,2,2,0,1,0,0,0,0
+neutrophil,1,6,0,0,4,0,0,0,0,0
+non-pigmented ciliary epithelial cell,0,0,0,0,2,0,1,0,0,0
+nucleus accumbens,0,0,2,1,2,1,0,0,0,0
+nucleus pulposus cell of intervertebral disc,0,0,0,0,2,0,0,0,0,0
+occipital lobe,0,0,2,2,2,1,0,0,0,0
+occipital pole,0,0,0,0,2,0,0,0,0,0
+olfactory epithelial cell,0,0,0,0,4,0,0,0,0,0
+olfactory region,0,0,0,0,2,0,0,0,0,0
+oligodendrocyte precursor cell,0,0,0,0,2,0,0,0,0,0
+omental fat pad,1,0,4,3,0,2,1,1,0,0
+omentum preadipocyte,0,0,0,0,2,0,0,0,0,0
+osteoblast,1,9,2,2,2,1,0,0,0,0
+osteocyte,1,0,2,2,0,1,0,0,0,0
+outer medulla of kidney,0,0,2,1,0,1,0,0,0,0
+outer root sheath cell,0,0,0,0,2,0,0,0,0,0
+ovary,2,6,6,6,0,3,1,1,0,0
+pancreas,1,5,4,4,2,2,1,1,0,0
+paracentral gyrus,0,0,0,0,2,0,0,0,0,0
+parathyroid adenoma,1,6,0,0,0,0,0,0,0,0
+parietal lobe,0,0,2,2,2,1,0,0,0,0
+parotid gland,0,0,0,0,2,0,0,0,0,0
+penis,0,0,0,0,2,0,0,0,0,0
+pericardium fibroblast,0,0,2,2,0,1,0,0,0,0
+perineural cell,0,0,0,0,2,0,0,0,0,0
+peripheral blood mononuclear cell,0,7,2,2,0,1,0,0,0,0
+perirenal adipocyte cell,0,0,0,0,2,0,0,0,0,0
+perirenal preadipocyte,0,0,0,0,2,0,0,0,0,0
+pineal body,0,0,0,0,2,0,0,0,0,0
+pituitary gland,0,0,2,1,2,1,0,0,0,0
+placenta,1,6,4,4,2,2,1,0,0,0
+placental basal plate,0,1,2,2,0,1,0,0,0,0
+placental epithelial cell,0,0,2,2,2,1,0,0,0,0
+placental pericyte,0,0,2,2,0,1,0,0,0,0
+plasmacytoid dendritic cell,0,0,0,0,4,0,0,0,0,0
+pneumocyte,0,0,0,0,2,0,0,0,0,0
+pons,0,0,0,0,2,0,1,0,0,0
+postcentral gyrus,0,0,0,0,2,0,0,0,0,0
+posterior cingulate gyrus,0,0,0,0,0,0,1,0,0,0
+posterior vena cava,0,0,2,2,0,1,1,1,0,0
+preadipocyte of the breast,0,0,0,0,2,0,0,0,0,0
+progenitor cell of endocrine pancreas,1,0,2,2,0,1,1,0,0,0
+prostate gland,3,3,4,3,2,2,1,0,0,0
+prostate stromal cell,0,0,0,0,2,0,0,0,0,0
+psoas muscle,1,5,4,4,0,2,1,1,0,0
+pulmonary artery endothelial cell,0,0,2,2,0,1,1,0,0,0
+pulmonary valve,0,0,0,0,2,0,0,0,0,0
+putamen,0,0,2,1,2,1,1,0,0,0
+rectal smooth muscle tissue,0,4,0,0,0,0,0,0,0,0
+rectum,0,0,0,0,2,0,0,0,0,0
+regular cardiac myocyte,0,0,2,2,0,1,0,0,0,0
+renal cortex interstitium,0,0,2,1,0,1,1,0,0,0
+renal cortical epithelial cell,0,0,2,2,2,1,1,0,0,0
+renal pelvis,0,0,2,1,0,1,1,0,0,0
+respiratory epithelial cell,0,0,0,0,2,0,0,0,0,0
+respiratory system smooth muscle,0,0,0,0,2,0,0,0,0,0
+reticulocyte,0,0,0,0,2,0,0,0,0,0
+retina,0,0,0,0,2,0,1,0,0,0
+retinal pigment epithelial cell,1,1,0,0,2,0,1,0,0,0
+right atrium auricular region,1,5,4,3,0,2,1,0,0,0
+right cardiac atrium,0,3,4,4,0,2,1,1,0,0
+right forelimb,0,0,0,0,0,0,1,0,0,0
+right hindlimb,0,0,0,0,0,0,1,0,0,0
+right kidney,0,0,0,0,0,0,1,0,0,0
+right lobe of liver,2,4,4,3,0,2,1,1,0,0
+right lung,0,0,2,1,0,1,1,0,0,0
+right ovary,0,0,0,0,2,0,0,0,0,0
+right renal cortex interstitium,0,0,2,1,0,1,1,0,0,0
+right renal pelvis,0,0,2,1,0,1,1,0,0,0
+saliva-secreting gland,0,0,0,0,2,0,0,0,0,0
+sciatic nerve,0,0,2,2,0,1,1,1,0,0
+seminal vesicle,0,0,0,0,2,0,0,0,0,0
+sigmoid colon,4,5,6,5,0,3,1,1,0,0
+skeletal muscle cell,0,1,0,0,0,0,1,0,0,0
+skeletal muscle myoblast,1,7,4,5,2,2,1,0,0,0
+skeletal muscle satellite cell,0,6,2,2,2,1,0,0,0,0
+skeletal muscle tissue,0,6,2,2,2,1,0,0,0,0
+skin epidermis,0,2,0,0,0,0,0,0,0,0
+skin fibroblast,0,0,0,0,2,0,0,0,1,0
+skin of body,0,0,2,2,0,1,0,0,0,0
+skin of palm of manus,0,0,0,0,2,0,0,0,0,0
+small intestine,0,6,2,3,2,1,1,0,0,0
+smooth muscle cell,0,9,4,3,0,2,0,0,0,0
+smooth muscle cell of bladder,0,0,2,2,2,1,0,0,0,0
+smooth muscle cell of colon,0,0,0,0,2,0,0,0,0,0
+smooth muscle cell of prostate,0,0,0,0,2,0,0,0,0,0
+smooth muscle cell of the brachiocephalic vasculature,0,0,0,0,2,0,0,0,0,0
+smooth muscle cell of the brain vasculature,0,0,0,0,2,0,1,0,0,0
+smooth muscle cell of the carotid artery,0,0,0,0,2,0,0,0,0,0
+smooth muscle cell of the coronary artery,0,0,2,2,2,1,0,0,0,0
+smooth muscle cell of the esophagus,0,0,0,0,2,0,0,0,0,0
+smooth muscle cell of the internal thoracic artery,0,0,0,0,2,0,0,0,0,0
+smooth muscle cell of the pulmonary artery,0,0,2,2,2,1,0,0,0,0
+smooth muscle cell of the subclavian artery,0,0,0,0,2,0,0,0,0,0
+smooth muscle cell of the umbilical artery,0,0,2,2,2,1,0,0,0,0
+smooth muscle cell of the umbilical vein,0,0,0,0,2,0,0,0,0,0
+smooth muscle cell of trachea,0,0,2,2,2,1,0,0,0,0
+smooth muscle tissue,0,0,0,0,2,0,0,0,0,0
+soleus muscle,0,0,0,0,2,0,0,0,0,0
+spinal cord,0,5,4,5,2,2,1,0,0,0
+spleen,3,6,6,6,2,3,1,1,0,0
+stem cell of epidermis,0,0,0,0,2,0,0,0,0,0
+stomach,3,6,6,6,2,3,1,1,0,0
+stomach smooth muscle,0,6,0,0,0,0,0,0,0,0
+stromal cell of bone marrow,0,0,0,0,0,0,1,0,0,0
+stromal cell of pancreas,0,0,0,0,2,0,0,0,0,0
+subcutaneous abdominal adipose tissue,0,7,0,0,0,0,0,0,0,0
+subcutaneous adipose tissue,0,0,4,3,0,2,0,1,0,0
+subcutaneous fat cell,0,0,0,0,2,0,0,0,0,0
+subcutaneous preadipocyte,0,0,2,2,2,1,0,0,0,0
+submandibular gland,0,0,0,0,2,0,0,0,0,0
+substantia nigra,0,5,2,1,2,1,0,0,0,0
+superior rectus extraocular muscle,0,0,0,0,2,0,0,0,0,0
+suppressor macrophage,0,0,0,0,0,0,1,0,0,0
+suprapubic skin,3,5,4,3,0,2,1,0,0,0
+synovial cell,0,0,0,0,2,0,0,0,0,0
+temporal lobe,0,5,2,2,2,1,0,0,0,0
+tendon cell,0,0,0,0,2,0,0,0,0,0
+testis,1,3,6,6,2,3,1,0,0,0
+thoracic aorta,1,4,2,2,0,1,1,0,0,0
+thoracic aorta endothelial cell,0,0,2,2,2,1,0,0,0,0
+throat,0,0,0,0,2,0,0,0,0,0
+thymus,0,6,2,3,2,1,1,0,0,0
+thyroid gland,2,4,4,3,2,2,1,1,0,0
+tibial artery,1,6,2,1,0,1,1,1,0,0
+tibial nerve,3,6,4,3,0,2,1,0,0,0
+tongue,0,0,2,2,2,1,1,0,0,0
+tonsil,0,0,0,0,2,0,0,0,0,0
+trabecular meshwork cell,0,0,0,0,2,0,0,0,0,0
+trachea,0,0,0,0,2,0,0,0,0,0
+tracheal epithelial cell,0,0,2,2,2,1,0,0,0,0
+transverse colon,4,5,4,3,0,2,1,1,0,0
+tricuspid valve,0,0,0,0,2,0,0,0,0,0
+trophoblast,0,3,2,2,0,1,0,0,0,0
+trophoblast cell,0,14,2,2,0,1,1,0,0,0
+type B pancreatic cell,1,0,2,2,0,1,0,0,0,0
+umbilical cord,0,0,2,2,2,1,1,0,0,0
+umbilical cord blood,0,0,0,0,2,0,0,0,0,0
+upper lobe of left lung,4,4,4,3,0,2,1,1,0,0
+upper lobe of right lung,1,4,2,2,0,1,1,1,0,0
+ureter,0,0,2,2,0,1,1,1,0,0
+urethra,0,0,0,0,2,0,0,0,0,0
+urinary bladder,0,2,6,5,2,3,0,0,0,0
+urothelial cell,0,0,0,0,2,0,0,0,0,0
+urothelium cell line,0,0,0,0,0,0,1,0,0,0
+uterine cervix,0,0,0,0,2,0,0,0,0,0
+uterine smooth muscle cell,0,0,2,2,2,1,0,0,0,0
+uterus,2,2,4,3,2,2,1,1,0,0
+vagina,3,4,4,3,2,2,0,0,0,0
+vas deferens,0,0,0,0,2,0,0,0,0,0
+vein,0,0,0,0,2,0,0,0,0,0
+vein endothelial cell,0,0,2,2,2,1,0,0,0,0
+venous blood,0,0,2,1,0,1,0,0,0,0
+vermiform appendix,0,0,0,0,2,0,0,0,0,0
+vertebral mesenchymal stem cell,0,0,0,0,2,0,0,0,0,0
+visceral preadipocyte,0,0,0,0,2,0,0,0,0,0
+vitreous humor,0,0,0,0,2,0,0,0,0,0
+zone of skin,0,0,0,0,2,0,0,0,0,0
diff --git a/notebooks/deepISA_tutorial.ipynb b/notebooks/deepISA_tutorial.ipynb
index 83304df..c08c943 100644
--- a/notebooks/deepISA_tutorial.ipynb
+++ b/notebooks/deepISA_tutorial.ipynb
@@ -482,35 +482,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from deepISA.scoring.mapper import map_motifs\n",
-    "import torch\n",
-    "#TODO: write preprocess_remap(cell_type). \n",
-    "\n",
-    "# load the best model\n",
-    "model_path = os.path.join(RESULTS_DIR, \"Model\", \"model_best.pt\")\n",
-    "model.load_state_dict(torch.load(model_path, weights_only=True))\n",
-    "\n",
-    "map_motifs(\n",
-    "    regions_df=df_pos, \n",
-    "    fasta_path=FASTA_PATH,\n",
-    "    jaspar_path=JASPAR_BB,\n",
-    "    outpath=os.path.join(RESULTS_DIR,\"motif_locs.csv\"),\n",
-    "    model=model,\n",
-    "    device=DEVICE,\n",
-    "    tracks=[0],\n",
-    "    expressed_tfs=None, # set to None for tutorial, but providing expressed_tf_list is highly recommended for real analysis to reduce false positives.\n",
-    "    motif_score_thresh=500,\n",
-    "    remap_path=None,\n",
-    "    attr_percentile=70,\n",
-    "    attr_batch_size=1024\n",
-    ")\n",
-    "\n",
-    "\n",
-    "# read the mapped motif locations\n",
-    "df_motif_locs = pd.read_csv(os.path.join(RESULTS_DIR,\"motif_locs.csv\"))\n",
-    "df_motif_locs.head()"
-   ]
+   "source": "from deepISA.scoring.mapper import map_motifs\nimport torch\n\n# load the best model (or the pretrained model if you skipped training)\nmodel_path = os.path.join(RESULTS_DIR, \"Model\", \"model_best.pt\")\nmodel.load_state_dict(torch.load(model_path, weights_only=True))\n\n# Note: for local testing with mini_jaspar.bb, use motif_score_thresh=200\n# (mini_jaspar.bb max score is 494). For production with full JASPAR use 500.\nmap_motifs(\n    regions_df=df_pos, \n    fasta_path=FASTA_PATH,\n    jaspar_path=JASPAR_BB,\n    outpath=os.path.join(RESULTS_DIR,\"motif_locs.csv\"),\n    model=model,\n    device=DEVICE,\n    tracks=[0],\n    expressed_tfs=None, # set to None for tutorial; provide expressed_tf_list for real analysis\n    motif_score_thresh=200,  # use 500 for full JASPAR; 200 for mini_jaspar.bb test\n    remap_path=None,\n    attr_percentile=70,\n    attr_batch_size=1024\n)\n\n# read the mapped motif locations\ndf_motif_locs = pd.read_csv(os.path.join(RESULTS_DIR,\"motif_locs.csv\"))\ndf_motif_locs.head()"
   },
   {
    "cell_type": "markdown",
@@ -1180,6 +1152,148 @@
    "source": [
     "plot_cell_specificity(df_coop_tf)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Part 8: AlphaGenome Backend\n",
+    "\n",
+    "Use the AlphaGenome API as a drop-in model backend for ISA scoring —\n",
+    "the same pipeline (motif mapping → attr_filter → ISA) runs unchanged;\n",
+    "only the prediction model is swapped.\n",
+    "\n",
+    "**Prerequisites:**\n",
+    "- AlphaGenome API key (apply at https://github.com/google-deepmind/alphagenome)\n",
+    "- `pip install alphagenome pyyaml` (already in `deepisa_ag_env`)\n",
+    "- Parts 1–3 of this tutorial must have been run first: `FASTA_PATH`, `RESULTS_DIR`,\n",
+    "  and `motif_locs.csv` produced by `map_motifs()` are all required.\n",
+    "\n",
+    "---\n",
+    "\n",
+    "### Step 0: Choose your biosample and assay type\n",
+    "\n",
+    "AlphaGenome supports **714 human cell lines / tissues** and **10 assay types**.\n",
+    "Before writing the config, open the reference table to find the right combination:\n",
+    "\n",
+    "```\n",
+    "notebooks/ag_biosample_reference.csv\n",
+    "```\n",
+    "\n",
+    "Each row is a biosample, each column is an assay type, and the value is the\n",
+    "number of tracks available (0 = not available for that combination).\n",
+    "\n",
+    "| Assay | What it measures | Typical use |\n",
+    "|---|---|---|\n",
+    "| **DNASE** | DNase-seq chromatin accessibility | default for regulatory elements |\n",
+    "| **ATAC** | ATAC-seq open chromatin | alternative to DNASE |\n",
+    "| **CAGE** | CAGE-seq promoter activity | promoter / TSS analysis |\n",
+    "| **RNA_SEQ** | RNA-seq gene expression | gene-level signal |\n",
+    "| **CHIP_TF** | TF ChIP-seq occupancy | TF-centric analysis (many tracks) |\n",
+    "| **CHIP_HISTONE** | Histone mark ChIP-seq | chromatin state |\n",
+    "| PROCAP | PRO-cap nascent transcription | high-res TRE activity |\n",
+    "| SPLICE_SITE_USAGE | Splicing | splicing QTL analysis |\n",
+    "\n",
+    "The code cell below loads the table and filters it — run it to find your cell line.\n",
+    "Then copy the exact `biosample_name` string into the config cell."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": "import os\n\n# ── Paste your AlphaGenome API key here ──────────────────────────────────────\nALPHA_GENOME_API_KEY = \"YOUR_API_KEY_HERE\"   # ← replace with your key\n\n# Alternative: load from a .env file (one line: just the key)\n# with open(\"/path/to/.env\") as f:\n#     ALPHA_GENOME_API_KEY = f.read().strip()\n\nos.environ[\"ALPHA_GENOME_API_KEY\"] = ALPHA_GENOME_API_KEY\nprint(\"API key loaded:\", ALPHA_GENOME_API_KEY[:8] + \"...\" if len(ALPHA_GENOME_API_KEY) > 8 else \"(not set)\")",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": null,
+   "metadata": {},
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "# Load the reference table (714 biosamples × 10 assay types)\n",
+    "ref = pd.read_csv(\"ag_biosample_reference.csv\", index_col=0)\n",
+    "\n",
+    "# ── Search by cell-line name (case-insensitive substring match) ──────────────\n",
+    "SEARCH = \"GM12878\"   # ← change to your cell line\n",
+    "hits = ref[ref.index.str.contains(SEARCH, case=False)]\n",
+    "print(f\"Matches for '{SEARCH}':\")\n",
+    "print(hits.to_string())\n",
+    "\n",
+    "# ── Or: list all biosamples that have DNASE data ─────────────────────────────\n",
+    "# print(ref[ref['DNASE'] > 0].index.tolist())\n",
+    "\n",
+    "# ── Or: show the full table in a scrollable view ─────────────────────────────\n",
+    "# from IPython.display import display\n",
+    "# display(ref.style.highlight_max(axis=0))"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": "from alphagenome.models import dna_client\nfrom alphagenome.models.dna_output import OutputType\n\ndna_model_meta = dna_client.create(ALPHA_GENOME_API_KEY)\nmeta = dna_model_meta.output_metadata(dna_client.Organism.HOMO_SAPIENS).concatenate()\n\noutput_type      = \"DNASE\"           # change to ATAC, CAGE, RNA_SEQ, CHIP_TF, etc.\noutput_type_enum = OutputType[output_type]\nbiosamples = sorted(\n    meta[meta[\"output_type\"] == output_type_enum][\"biosample_name\"]\n    .dropna().unique()\n)\nprint(f\"Available biosamples for {output_type}: {len(biosamples)} total\")\nprint(\"First 20:\", biosamples[:20])",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "import yaml\n\n# Pick a biosample matching your cell type; browse the list above.\nBIOSAMPLE = biosamples[0]   # replace with e.g. \"GM12878\" for B-lymphocytes\nprint(f\"Using biosample: '{BIOSAMPLE}'\")\n\nAG_CONFIG_PATH = os.path.join(RESULTS_DIR, \"ag_config.yaml\")\n# ── Single-track config (default) ───────────────────────────────────────────\n# config = {\n#     'api_key': ALPHA_GENOME_API_KEY, 'output_type': output_type,\n#     'biosample_name': BIOSAMPLE, 'context_len': 16384, 'seq_len': 600, 'aggregation': 'sum',\n# }\n\n# ── Multi-track config: combine assays / cell lines in ONE API call ──────────\n# config = {\n#     'api_key': ALPHA_GENOME_API_KEY, 'context_len': 16384, 'seq_len': 600, 'aggregation': 'sum',\n#     'tracks': [\n#         {'output_type': 'DNASE', 'biosample_name': 'GM12878'},\n#         {'output_type': 'CAGE',  'biosample_name': 'GM12878'},\n#         {'output_type': 'ATAC',  'biosample_name': 'K562'},\n#     ]\n# }\n# n_tracks = DNASE(1) + CAGE(2) + ATAC(1) = 4   → isa_t0 .. isa_t3\n# Use calc_coop_score(..., track_idx=i) to analyse each track separately.\n\nconfig = {\n    \"api_key\":        ALPHA_GENOME_API_KEY,\n    \"output_type\":    output_type,\n    \"biosample_name\": BIOSAMPLE,\n    \"context_len\":    16384,\n    \"seq_len\":        600,\n    \"aggregation\":    \"sum\",\n}\nwith open(AG_CONFIG_PATH, \"w\") as f:\n    yaml.dump(config, f, default_flow_style=False)\nprint(f\"Config written → {AG_CONFIG_PATH}\")"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "id": null,
+   "metadata": {},
+   "source": [
+    "### Choosing tracks and handling multi-track assays\n",
+    "\n",
+    "The number of tracks (`n_tracks`) depends on your biosample + assay combination:\n",
+    "\n",
+    "| Assay | Typical n_tracks | Notes |\n",
+    "|---|---|---|\n",
+    "| DNASE | 1–3 | one experiment per biosample; easy to interpret |\n",
+    "| ATAC | 1–2 | same as DNASE |\n",
+    "| CAGE | 2–4 | plus/minus strand or replicate tracks |\n",
+    "| CHIP_TF | 1–100+ | **one track per TF** — see note below |\n",
+    "| CHIP_HISTONE | 1–10 | one per histone mark |\n",
+    "\n",
+    "**What happens with multiple tracks:**\n",
+    "- `adapter._ontology_terms` lists every track for your biosample.\n",
+    "- `run_single_isa` outputs columns `isa_t0, isa_t1, ..., isa_t{n-1}` — one per track.\n",
+    "- `calc_coop_score(..., track_idx=i)` lets you analyse one track at a time.\n",
+    "  Run it once per track of interest, or loop over `range(n_tracks)`.\n",
+    "\n",
+    "**CHIP_TF note:** each track is a different TF's ChIP-seq experiment.\n",
+    "With 30+ tracks the ISA output is wide but each column is interpretable independently.\n",
+    "Use `track_idx` to pick the column matching the TF you care about.\n",
+    "\n",
+    "**Recommended starting point:** use `DNASE` or `ATAC` (n_tracks ≤ 3).\n",
+    "The ISA scores will reflect overall chromatin accessibility — equivalent to\n",
+    "the Conv model's regression head."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "import torch\nfrom deepISA.modeling.alpha_genome_adapter import AlphaGenomeAdapter\nfrom deepISA.scoring.single_isa import run_single_isa\n\n# motif_locs.csv was produced (and already filtered) by map_motifs() in Part 3\nMOTIF_LOCS    = os.path.join(RESULTS_DIR, \"motif_locs.csv\")\nAG_ISA_PATH   = os.path.join(RESULTS_DIR, \"motif_single_isa_ag.csv\")\n\nadapter  = AlphaGenomeAdapter(AG_CONFIG_PATH)\nn_tracks = adapter.n_tracks   # true output width from probe call (may differ from len(_ontology_terms))\ndevice   = torch.device(\"cpu\")\nprint(f\"Adapter ready — biosample '{BIOSAMPLE}', n_tracks={n_tracks}\")\n\nrun_single_isa(\n    model                 = adapter,\n    fasta_path            = FASTA_PATH,\n    motif_locs_path       = MOTIF_LOCS,\n    outpath               = AG_ISA_PATH,\n    device                = device,\n    tracks                = list(range(n_tracks)),\n    num_regions_per_batch = 10,\n    pred_batch_size       = 1,   # one API call per sequence\n)\nprint(f\"AlphaGenome single ISA complete → {AG_ISA_PATH}\")"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": "from deepISA.scoring.combi_isa import run_combi_isa\n\nAG_COMBI_PATH = os.path.join(RESULTS_DIR, \"motif_combi_isa_ag.csv\")\n\nrun_combi_isa(\n    model                 = adapter,\n    fasta_path            = FASTA_PATH,\n    motif_locs_path       = MOTIF_LOCS,   # same filtered motif locs as single ISA\n    outpath               = AG_COMBI_PATH,\n    device                = device,\n    inde_dist_max         = 255,\n    tracks                = list(range(n_tracks)),\n    num_regions_per_batch = 5,\n    pred_batch_size       = 1,\n)\nprint(f\"AlphaGenome combinatorial ISA complete → {AG_COMBI_PATH}\")",
+   "outputs": [],
+   "execution_count": null
   }
  ],
  "metadata": {
@@ -1204,4 +1318,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 9d3341f..43419e3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,9 +37,8 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-dev = [
-    "pytest>=7.0",                # For the Phase 1/2 validation logic
-]
+dev          = ["pytest>=7.0"]
+alphagenome  = ["alphagenome", "pyyaml"]
 
 [tool.setuptools.packages.find]
 where = ["src"]
diff --git a/src/deepISA/modeling/__init__.py b/src/deepISA/modeling/__init__.py
index e69de29..0ede3a6 100644
--- a/src/deepISA/modeling/__init__.py
+++ b/src/deepISA/modeling/__init__.py
@@ -0,0 +1,5 @@
+# Alpha-Genome backend (optional — requires: pip install alphagenome pyyaml)
+try:
+    from deepISA.modeling.alpha_genome_adapter import AlphaGenomeAdapter
+except ImportError:
+    pass   # alphagenome not installed; ConvModel path unaffected
diff --git a/src/deepISA/modeling/alpha_genome_adapter.py b/src/deepISA/modeling/alpha_genome_adapter.py
new file mode 100644
index 0000000..7872d75
--- /dev/null
+++ b/src/deepISA/modeling/alpha_genome_adapter.py
@@ -0,0 +1,217 @@
+"""AlphaGenome adapter — drop-in nn.Module backend for deepISA.
+
+Config formats
+--------------
+Single track (backward-compatible):
+    api_key: YOUR_KEY
+    output_type: DNASE
+    biosample_name: GM12878
+    context_len: 16384   # optional, default 16384
+    seq_len: 600          # optional, default 600
+    aggregation: sum      # optional, default sum
+
+Multi-track (new):
+    api_key: YOUR_KEY
+    tracks:
+      - output_type: DNASE
+        biosample_name: GM12878
+      - output_type: CAGE
+        biosample_name: GM12878
+      - output_type: ATAC
+        biosample_name: K562
+    context_len: 16384
+    seq_len: 600
+    aggregation: sum
+
+Every sequence makes exactly ONE API call regardless of how many tracks are
+configured.  Columns in the output tensor are ordered by the `tracks` list.
+"""
+from __future__ import annotations
+from typing import Any
+import numpy as np
+import torch
+import yaml
+
+_DEFAULTS = {"context_len": 16384, "seq_len": 600, "aggregation": "sum"}
+
+_BASES = np.array(['A', 'C', 'G', 'T'], dtype='U1')
+
+
+def load_config(path: str) -> dict[str, Any]:
+    with open(path) as f:
+        cfg = yaml.safe_load(f)
+    if "api_key" not in cfg:
+        raise KeyError("alpha_genome config missing required key: 'api_key'")
+    # Normalise old single-track format → new tracks list
+    if "tracks" not in cfg:
+        for key in ("output_type", "biosample_name"):
+            if key not in cfg:
+                raise KeyError(f"alpha_genome config missing required key: '{key}'")
+        cfg["tracks"] = [{"output_type": cfg["output_type"],
+                           "biosample_name": cfg["biosample_name"]}]
+    return {**_DEFAULTS, **cfg}
+
+
+def _tensor_to_seqs(x: torch.Tensor) -> list[str]:
+    """(N, 4, L) one-hot tensor → list[str]. Vectorized via argmax."""
+    x_np     = x.cpu().numpy()
+    idx      = x_np.argmax(axis=1)
+    has_base = x_np.max(axis=1) > 0
+    chars    = np.where(has_base, _BASES[idx], 'N')
+    return [''.join(row) for row in chars]
+
+
+def _pad_seqs(seqs: list[str], context_len: int, seq_len: int) -> list[str]:
+    """Centre each seq in context_len of N padding."""
+    pad_left  = (context_len - seq_len) // 2
+    pad_right = context_len - seq_len - pad_left
+    pre, suf  = 'N' * pad_left, 'N' * pad_right
+    return [pre + s + suf for s in seqs]
+
+
+import torch.nn as nn
+from alphagenome.models import dna_client
+from alphagenome.models.dna_output import OutputType
+
+
+class AlphaGenomeAdapter(nn.Module):
+    """
+    Drop-in nn.Module replacement for deepISA's Conv model.
+
+    Supports one or more (output_type, biosample_name) track combinations via
+    the config file.  Every sequence prediction is a single API call; columns
+    are concatenated in the order the tracks appear in the config.
+
+    Returns (N, n_tracks) float32 tensor compatible with run_single_isa /
+    run_combi_isa.  Use adapter.n_tracks to know the output width.
+    """
+
+    def __init__(self, config_path: str) -> None:
+        super().__init__()
+        cfg = load_config(config_path)
+        self._cfg = cfg
+
+        self._dna_model = dna_client.create(cfg["api_key"])
+
+        meta = self._dna_model.output_metadata(
+            dna_client.Organism.HOMO_SAPIENS
+        ).concatenate()
+
+        ctx = cfg["context_len"]
+        sl  = cfg["seq_len"]
+        self._context_len = ctx
+        self._seq_len     = sl
+        self._start_idx   = (ctx - sl) // 2
+        self._end_idx     = self._start_idx + sl
+
+        # ── Resolve each (output_type, biosample) track ───────────────────────
+        tracks_cfg = cfg["tracks"]
+        all_terms: list[str] = []
+        all_output_type_enums: list[OutputType] = []
+
+        for track in tracks_cfg:
+            ot_str = track["output_type"]
+            bio    = track["biosample_name"]
+            ot_enum = OutputType[ot_str]
+            matched = meta[
+                (meta["output_type"] == ot_enum) &
+                (meta["biosample_name"] == bio)
+            ]
+            if matched.empty:
+                available = sorted(
+                    meta[meta["output_type"] == ot_enum]
+                    ["biosample_name"].dropna().unique()
+                )[:15]
+                raise ValueError(
+                    f"biosample_name='{bio}' not found for output_type='{ot_str}'.\n"
+                    f"Available (first 15): {available}\n"
+                    f"Browse notebooks/ag_biosample_reference.csv to find valid names."
+                )
+            terms = matched["ontology_curie"].dropna().unique().tolist()
+            all_terms.extend(terms)
+            if ot_enum not in all_output_type_enums:
+                all_output_type_enums.append(ot_enum)
+
+        self._all_output_type_enums: list[OutputType] = all_output_type_enums
+        self._all_terms: list[str] = list(dict.fromkeys(all_terms))  # dedup, keep order
+
+        # Keep _ontology_terms as alias for backward compatibility
+        self._ontology_terms = self._all_terms
+
+        # ── Probe call: learn exact column indices for each desired track ─────
+        # One call with all output types + all terms reveals which columns
+        # belong to which biosample via TrackData.metadata.biosample_name.
+        probe_out = self._dna_model.predict_sequence(
+            sequence="N" * ctx,
+            requested_outputs=self._all_output_type_enums,
+            ontology_terms=self._all_terms,
+        )
+
+        # _extraction_plan: ordered list of (attr_name, col_indices_array)
+        # Each entry corresponds to one desired (output_type, biosample) pair.
+        self._extraction_plan: list[tuple[str, np.ndarray]] = []
+        for track in tracks_cfg:
+            ot_str = track["output_type"]
+            bio    = track["biosample_name"]
+            attr   = ot_str.lower()          # "DNASE" → "dnase", "RNA_SEQ" → "rna_seq"
+            track_data = getattr(probe_out, attr)
+            tmeta = track_data.metadata.reset_index(drop=True)
+            col_idx = np.where(tmeta["biosample_name"] == bio)[0]
+            if len(col_idx) == 0:
+                raise ValueError(
+                    f"Probe returned no columns for biosample='{bio}' in {ot_str}. "
+                    f"Available in probe: {tmeta['biosample_name'].tolist()}"
+                )
+            self._extraction_plan.append((attr, col_idx))
+
+        self._n_tracks: int = sum(len(idx) for _, idx in self._extraction_plan)
+
+        # Sequence-level cache: raw 600bp string → list[float] (n_tracks,)
+        self._cache: dict[str, list[float]] = {}
+
+    # ── Public properties ─────────────────────────────────────────────────────
+
+    @property
+    def n_tracks(self) -> int:
+        """Total number of output tracks across all configured (output_type, biosample) pairs."""
+        return self._n_tracks
+
+    @property
+    def cache_size(self) -> int:
+        return len(self._cache)
+
+    def clear_cache(self) -> None:
+        self._cache.clear()
+
+    # ── nn.Module interface ───────────────────────────────────────────────────
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        x      : (N, 4, seq_len) one-hot tensor from compute_predictions
+        returns: (N, n_tracks) float32 tensor
+        """
+        seqs        = _tensor_to_seqs(x)
+        seqs_padded = _pad_seqs(seqs, self._context_len, self._seq_len)
+        scalars     = self._predict_sequential(seqs, seqs_padded)
+        return torch.tensor(scalars, dtype=torch.float32)
+
+    def _predict_sequential(
+        self, seqs: list[str], seqs_padded: list[str]
+    ) -> list[list[float]]:
+        """One API call per unique sequence; cache hits skip the API entirely."""
+        result = []
+        for raw, padded in zip(seqs, seqs_padded):
+            if raw not in self._cache:
+                output = self._dna_model.predict_sequence(
+                    sequence=padded,
+                    requested_outputs=self._all_output_type_enums,
+                    ontology_terms=self._all_terms,
+                )
+                parts = []
+                for attr, col_idx in self._extraction_plan:
+                    track_data = getattr(output, attr)
+                    window = track_data.values[self._start_idx:self._end_idx, :]
+                    parts.append(window[:, col_idx].sum(axis=0))  # (n_cols_for_this_bio,)
+                self._cache[raw] = np.concatenate(parts).tolist()
+            result.append(self._cache[raw])
+        return result
diff --git a/tests/test_adapter.py b/tests/test_adapter.py
new file mode 100644
index 0000000..3238845
--- /dev/null
+++ b/tests/test_adapter.py
@@ -0,0 +1,284 @@
+import sys
+import pytest
+import yaml
+import numpy as np
+import torch
+
+sys.path.insert(0, "deepISA/src")   # make deepISA importable without modifying it
+
+
+def test_load_config_reads_fields(tmp_path):
+    cfg = {
+        "api_key": "testkey",
+        "output_type": "DNASE",
+        "biosample_name": "GM12878",
+        "context_len": 16384,
+        "seq_len": 600,
+        "aggregation": "sum",
+    }
+    p = tmp_path / "config.yaml"
+    p.write_text(yaml.dump(cfg))
+
+    from deepisa_ag.adapter import load_config
+    loaded = load_config(str(p))
+    assert loaded["api_key"] == "testkey"
+    assert loaded["context_len"] == 16384
+    assert loaded["aggregation"] == "sum"
+
+
+def test_load_config_missing_required_key(tmp_path):
+    p = tmp_path / "bad.yaml"
+    p.write_text(yaml.dump({"output_type": "DNASE"}))
+
+    from deepisa_ag.adapter import load_config
+    with pytest.raises(KeyError):
+        load_config(str(p))
+
+
+# ── Task 2: vectorized sequence utilities ─────────────────────────────────────
+
+def test_tensor_to_seqs_roundtrip():
+    """one_hot_encode → tensor → _tensor_to_seqs should recover original strings."""
+    from deepISA.utils import one_hot_encode
+    from deepisa_ag.adapter import _tensor_to_seqs
+    seqs = ["ACGT" * 150]                  # 600 bp
+    x = torch.from_numpy(one_hot_encode(seqs))  # (1, 4, 600)
+    assert _tensor_to_seqs(x) == seqs
+
+
+def test_tensor_to_seqs_n_positions():
+    from deepisa_ag.adapter import _tensor_to_seqs
+    x = torch.zeros(1, 4, 4)              # all-zero → 'N'
+    assert _tensor_to_seqs(x)[0] == "NNNN"
+
+
+def test_pad_seqs_total_length():
+    from deepisa_ag.adapter import _pad_seqs
+    padded = _pad_seqs(["ACGT" * 150], context_len=16384, seq_len=600)
+    assert len(padded[0]) == 16384
+
+
+def test_pad_seqs_centre_preserved():
+    from deepisa_ag.adapter import _pad_seqs
+    seq = "ACGT" * 150
+    padded = _pad_seqs([seq], context_len=16384, seq_len=600)[0]
+    pad_left = (16384 - 600) // 2
+    assert padded[pad_left: pad_left + 600] == seq
+
+
+def test_pad_seqs_flanks_are_n():
+    from deepisa_ag.adapter import _pad_seqs
+    padded = _pad_seqs(["A" * 600], context_len=16384, seq_len=600)[0]
+    pad_left = (16384 - 600) // 2
+    assert set(padded[:pad_left]) == {"N"}
+    assert set(padded[pad_left + 600:]) == {"N"}
+
+
+# ── Task 3: AlphaGenomeAdapter class ─────────────────────────────────────────
+
+import pandas as pd
+from unittest.mock import MagicMock, patch
+from alphagenome.models.dna_output import OutputType
+
+
+def _fake_metadata(biosample: str, output_type: str) -> pd.DataFrame:
+    """Return metadata with real OutputType enum objects, matching the live API."""
+    return pd.DataFrame({
+        "biosample_name": [biosample],
+        "output_type":    [OutputType[output_type]],
+        "ontology_curie": ["CL:0000000"],
+    })
+
+
+def _fake_track_output(n_positions: int, n_tracks: int, value: float,
+                       biosample: str = "GM12878"):
+    td = MagicMock()
+    td.values = np.full((n_positions, n_tracks), value, dtype=np.float32)
+    # metadata must be a real DataFrame so probe-call col-index logic works
+    td.metadata = pd.DataFrame({"biosample_name": [biosample] * n_tracks})
+    return td
+
+
+def _fake_predict_output(value: float, output_attr: str = "dnase",
+                         biosample: str = "GM12878"):
+    out = MagicMock()
+    setattr(out, output_attr, _fake_track_output(16384, 1, value, biosample))
+    return out
+
+
+def _make_adapter(tmp_path, biosample="GM12878", output_type="DNASE", mock_dc=None):
+    cfg = {"api_key": "k", "output_type": output_type, "biosample_name": biosample,
+           "context_len": 16384, "seq_len": 600, "aggregation": "sum"}
+    (tmp_path / "cfg.yaml").write_text(yaml.dump(cfg))
+    mock_dc.create.return_value.output_metadata.return_value.concatenate.return_value = (
+        _fake_metadata(biosample, output_type))
+    from deepisa_ag.adapter import AlphaGenomeAdapter
+    return AlphaGenomeAdapter(str(tmp_path / "cfg.yaml"))
+
+
+def test_adapter_forward_returns_n_by_n_tracks(tmp_path):
+    with patch("deepisa_ag.adapter.dna_client") as mock_dc:
+        mock_dc.create.return_value.predict_sequence.return_value = (
+            _fake_predict_output(1.0))
+        adapter = _make_adapter(tmp_path, mock_dc=mock_dc)
+
+        from deepISA.utils import one_hot_encode
+        x = torch.from_numpy(one_hot_encode(["ACGT" * 150]))  # (1, 4, 600)
+        out = adapter(x)
+
+        assert out.shape == (1, 1)   # 1 seq × 1 track (mock has 1 track)
+        assert out.dtype == torch.float32
+
+
+def test_adapter_col0_equals_signal_sum(tmp_path):
+    """col 0 = sum of central 600 bp × 1 track × signal_value."""
+    signal_value = 0.5
+    with patch("deepisa_ag.adapter.dna_client") as mock_dc:
+        mock_dc.create.return_value.predict_sequence.return_value = (
+            _fake_predict_output(signal_value))
+        adapter = _make_adapter(tmp_path, mock_dc=mock_dc)
+
+        from deepISA.utils import one_hot_encode
+        x = torch.from_numpy(one_hot_encode(["ACGT" * 150]))
+        out = adapter(x)
+
+        expected = signal_value * 600 * 1   # sum over 600 positions × 1 track
+        assert float(out[0, 0]) == pytest.approx(expected)
+
+
+def test_adapter_cache_deduplicates_api_calls(tmp_path):
+    """Identical sequences must produce only one API call, not two."""
+    with patch("deepisa_ag.adapter.dna_client") as mock_dc:
+        mock_dc.create.return_value.predict_sequence.return_value = (
+            _fake_predict_output(1.0))
+        adapter = _make_adapter(tmp_path, mock_dc=mock_dc)
+
+        calls_after_init = mock_dc.create.return_value.predict_sequence.call_count
+
+        from deepISA.utils import one_hot_encode
+        x = torch.from_numpy(one_hot_encode(["ACGT" * 150]))
+
+        adapter(x)   # first call  → API hit, stored in cache
+        adapter(x)   # second call → cache hit, no API call
+
+        assert mock_dc.create.return_value.predict_sequence.call_count == calls_after_init + 1
+        assert adapter.cache_size == 1
+
+
+def test_adapter_clear_cache(tmp_path):
+    """clear_cache() resets the cache so the next call hits the API again."""
+    with patch("deepisa_ag.adapter.dna_client") as mock_dc:
+        mock_dc.create.return_value.predict_sequence.return_value = (
+            _fake_predict_output(1.0))
+        adapter = _make_adapter(tmp_path, mock_dc=mock_dc)
+
+        calls_after_init = mock_dc.create.return_value.predict_sequence.call_count
+
+        from deepISA.utils import one_hot_encode
+        x = torch.from_numpy(one_hot_encode(["ACGT" * 150]))
+
+        adapter(x)
+        assert adapter.cache_size == 1
+        adapter.clear_cache()
+        assert adapter.cache_size == 0
+        adapter(x)   # cache was cleared → one more API call
+        assert mock_dc.create.return_value.predict_sequence.call_count == calls_after_init + 2
+
+
+def test_adapter_bad_biosample_raises(tmp_path):
+    with patch("deepisa_ag.adapter.dna_client") as mock_dc:
+        mock_dc.create.return_value.output_metadata.return_value.concatenate.return_value = (
+            _fake_metadata("GM12878", "DNASE"))
+        cfg = {"api_key": "k", "output_type": "DNASE", "biosample_name": "NonExistent",
+               "context_len": 16384, "seq_len": 600, "aggregation": "sum"}
+        (tmp_path / "cfg.yaml").write_text(yaml.dump(cfg))
+
+        from deepisa_ag import AlphaGenomeAdapter
+        with pytest.raises(ValueError, match="not found"):
+            AlphaGenomeAdapter(str(tmp_path / "cfg.yaml"))
+
+
+# ── Task 4: Full-chain integration test ──────────────────────────────────────
+
+def test_full_chain_compute_predictions(tmp_path):
+    """adapter works as model arg in deepISA's compute_predictions — zero ISA code changes."""
+    from deepISA.modeling.predict import compute_predictions
+
+    with patch("deepisa_ag.adapter.dna_client") as mock_dc:
+        mock_dc.create.return_value.output_metadata.return_value.concatenate.return_value = (
+            _fake_metadata("GM12878", "DNASE"))
+        mock_dc.create.return_value.predict_sequence.side_effect = [
+            _fake_predict_output(1.0),   # probe in __init__
+            _fake_predict_output(2.0),   # seq 1 original
+            _fake_predict_output(1.0),   # seq 1 ablated
+        ]
+        adapter = _make_adapter(tmp_path, mock_dc=mock_dc)
+
+        device = torch.device("cpu")
+        seqs_orig  = ["ACGT" * 150]
+        seqs_ablat = ["NNNN" * 150]
+
+        preds_orig  = compute_predictions(adapter, seqs_orig,  device, batch_size=1)
+        preds_ablat = compute_predictions(adapter, seqs_ablat, device, batch_size=1)
+
+        isa = preds_orig[:, 0] - preds_ablat[:, 0]
+        assert preds_orig.shape  == (1, 1)
+        assert preds_ablat.shape == (1, 1)
+        assert float(isa[0]) == pytest.approx(2.0 * 600 - 1.0 * 600)  # 600.0
+
+
+# ── Task 5: multi-track config ────────────────────────────────────────────────
+
+def _fake_metadata_multi(pairs: list) -> pd.DataFrame:
+    """pairs = [(biosample, output_type_str), ...]"""
+    return pd.DataFrame({
+        "biosample_name": [b for b, _ in pairs],
+        "output_type":    [OutputType[ot] for _, ot in pairs],
+        "ontology_curie": [f"CL:{i:07d}" for i in range(len(pairs))],
+    })
+
+
+def test_multi_track_config_new_format(tmp_path):
+    """tracks: list config → correct n_tracks and output shape."""
+    biosample_a, biosample_b = "GM12878", "K562"
+    with patch("deepisa_ag.adapter.dna_client") as mock_dc:
+        mock_dc.create.return_value.output_metadata.return_value.concatenate.return_value = (
+            _fake_metadata_multi([
+                (biosample_a, "DNASE"),
+                (biosample_b, "ATAC"),
+            ])
+        )
+        # probe + forward calls: each returns dnase(1 col for A) + atac(1 col for B)
+        def make_output():
+            out = MagicMock()
+            out.dnase = _fake_track_output(16384, 1, 1.0, biosample_a)
+            out.atac  = _fake_track_output(16384, 1, 2.0, biosample_b)
+            return out
+        mock_dc.create.return_value.predict_sequence.return_value = make_output()
+
+        cfg = {"api_key": "k",
+               "tracks": [{"output_type": "DNASE", "biosample_name": biosample_a},
+                           {"output_type": "ATAC",  "biosample_name": biosample_b}],
+               "context_len": 16384, "seq_len": 600}
+        (tmp_path / "cfg.yaml").write_text(yaml.dump(cfg))
+        from deepisa_ag.adapter import AlphaGenomeAdapter
+        adapter = AlphaGenomeAdapter(str(tmp_path / "cfg.yaml"))
+
+        assert adapter.n_tracks == 2
+
+        from deepISA.utils import one_hot_encode
+        x = torch.from_numpy(one_hot_encode(["ACGT" * 150]))
+        out = adapter(x)
+        assert out.shape == (1, 2)
+        # col 0 = DNASE signal (1.0 × 600), col 1 = ATAC signal (2.0 × 600)
+        assert float(out[0, 0]) == pytest.approx(600.0)
+        assert float(out[0, 1]) == pytest.approx(1200.0)
+
+
+def test_single_track_old_format_still_works(tmp_path):
+    """Old output_type / biosample_name keys still accepted (backward compat)."""
+    with patch("deepisa_ag.adapter.dna_client") as mock_dc:
+        mock_dc.create.return_value.predict_sequence.return_value = (
+            _fake_predict_output(1.0))
+        adapter = _make_adapter(tmp_path, mock_dc=mock_dc)
+        assert adapter.n_tracks == 1
diff --git a/tests/test_attr_filter_pipeline.py b/tests/test_attr_filter_pipeline.py
new file mode 100644
index 0000000..b6614ab
--- /dev/null
+++ b/tests/test_attr_filter_pipeline.py
@@ -0,0 +1,100 @@
+import sys, os, pytest, yaml, torch
+import pandas as pd
+import numpy as np
+from unittest.mock import patch, MagicMock
+
+sys.path.insert(0, "deepISA/src")
+
+
+def _make_filtered_motif_csv(tmp_path):
+    """Minimal motif CSV matching attr_filter output: 3 motifs, 1 region."""
+    df = pd.DataFrame({
+        "chrom":              ["chr1", "chr1", "chr1"],
+        "start":              [1010,   1030,   1050],
+        "end":                [1025,   1045,   1065],
+        "tf":                 ["NFKB1","SP1","IRF1"],
+        "score":              [900,    850,    800],
+        "strand":             ["+",    "+",    "-"],
+        "region":             ["chr1:1000-1600"] * 3,
+        "start_rel":          [10,     30,     50],
+        "end_rel":            [25,     45,     65],
+        "second_max_t0":      [0.9,    0.7,    0.85],
+        "pass_threshold_t0":  [1,      1,      1],
+    })
+    p = tmp_path / "motif_filtered.csv"
+    df.to_csv(p, index=False)
+    return str(p)
+
+
+def _make_fasta(tmp_path):
+    """Write a minimal FASTA for chr1 (2000 bp of A) with index."""
+    fa = tmp_path / "mini.fa"
+    seq = "A" * 2000
+    fa.write_text(f">chr1\n{seq}\n")
+    fai = tmp_path / "mini.fa.fai"
+    fai.write_text(f"chr1\t2000\t6\t2000\t2001\n")
+    return str(fa)
+
+
+def test_two_stage_pipeline_isa_cols(tmp_path):
+    """
+    Verifies that run_single_isa preserves the pass_threshold_t0 column from
+    a pre-filtered motif CSV (as attr_filter would produce) and writes correct
+    isa_t0 values.
+    """
+    filtered_path = _make_filtered_motif_csv(tmp_path)
+    fasta_path    = _make_fasta(tmp_path)
+    out_path      = str(tmp_path / "isa_out.csv")
+
+    from alphagenome.models.dna_output import OutputType
+
+    cfg = {"api_key": "k", "output_type": "DNASE",
+           "biosample_name": "GM12878", "context_len": 16384,
+           "seq_len": 600, "aggregation": "sum"}
+    (tmp_path / "cfg.yaml").write_text(yaml.dump(cfg))
+
+    fake_meta = pd.DataFrame({
+        "biosample_name": ["GM12878"],
+        "output_type":    [OutputType["DNASE"]],
+        "ontology_curie": ["EFO:0002784"],
+    })
+
+    def _fake_output(val):
+        out = MagicMock()
+        track = MagicMock()
+        track.values = np.full((16384, 1), val, dtype=np.float32)
+        # metadata must be real DataFrame so probe col-index extraction works
+        track.metadata = pd.DataFrame({"biosample_name": ["GM12878"]})
+        out.dnase = track
+        return out
+
+    with patch("deepisa_ag.adapter.dna_client") as mock_dc:
+        mock_dc.create.return_value.output_metadata.return_value.concatenate.return_value = fake_meta
+        # 1 probe (__init__) + 1 orig + 3 ablated = 5 calls; extra entries are unused
+        mock_dc.create.return_value.predict_sequence.side_effect = [
+            _fake_output(1.0),                                    # probe in __init__
+            _fake_output(1.0), _fake_output(0.5),
+            _fake_output(1.0), _fake_output(0.5),
+            _fake_output(1.0), _fake_output(0.5),
+        ]
+        from deepisa_ag import AlphaGenomeAdapter
+        adapter = AlphaGenomeAdapter(str(tmp_path / "cfg.yaml"))
+
+    from deepISA.scoring.single_isa import run_single_isa
+    run_single_isa(
+        model                 = adapter,
+        fasta_path            = fasta_path,
+        motif_locs_path       = filtered_path,
+        outpath               = out_path,
+        device                = torch.device("cpu"),
+        tracks                = [0],
+        num_regions_per_batch = 10,
+        pred_batch_size       = 1,
+    )
+
+    result = pd.read_csv(out_path)
+    assert "isa_t0" in result.columns
+    assert "pass_threshold_t0" in result.columns      # pass-through from filter
+    assert len(result) == 3
+    # orig sum = 1.0 * 600 = 600, mut sum = 0.5 * 600 = 300  →  isa = 300
+    assert float(result["isa_t0"].iloc[0]) == pytest.approx(300.0, rel=1e-3)