diff --git a/.github/workflows/make-tutorials-json.yml b/.github/workflows/make-tutorials-json.yml index f872aa42..34c163d3 100644 --- a/.github/workflows/make-tutorials-json.yml +++ b/.github/workflows/make-tutorials-json.yml @@ -14,21 +14,24 @@ concurrency: group: "pages" cancel-in-progress: true +env: + FORCE_COLOR: "1" + jobs: mkjson: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: filter: blob:none fetch-depth: 0 - - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6 with: - python-version: "3.12" - cache: "pip" # caching pip dependencies + python-version: "3.13" + cache: pip # caching pip dependencies - name: Install dependencies for validation script run: pip install .[registry] - name: Execute validation script and create output directory @@ -36,9 +39,9 @@ jobs: ./tutorial-registry/validate.py --outdir=build - name: Upload GitHub Pages artifact - uses: actions/upload-pages-artifact@v3 + uses: actions/upload-pages-artifact@v4 with: - path: "build" + path: build deploy: runs-on: ubuntu-latest @@ -57,7 +60,7 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v1 + uses: actions/deploy-pages@v4 - name: Trigger website build run: | curl -XPOST \ diff --git a/pyproject.toml b/pyproject.toml index af7337b8..389154ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dev = ["pre-commit"] registry = [ "jsonschema", "pillow", - "httpx", + "httpx[http2]", "pyyaml", ] docs = [ diff --git a/tutorial-registry/tutorials/advanced-plotting/meta.yaml b/tutorial-registry/tutorials/advanced-plotting/meta.yaml index 2351c8c5..c688a36c 100644 --- a/tutorial-registry/tutorials/advanced-plotting/meta.yaml +++ b/tutorial-registry/tutorials/advanced-plotting/meta.yaml @@ -2,7 +2,7 @@ name: Advanced plotting description: | This tutorial explains how to customize matplotlib plots generated by scanpy or other scverse libraries. -link: https://scanpy-tutorials.readthedocs.io/en/latest/plotting/advanced.html +link: https://scanpy.readthedocs.io/en/stable/tutorials/plotting/advanced.html image: icon.png primary_category: Tips & Tricks order: 30 diff --git a/tutorial-registry/tutorials/decoupler-pseudobulk-de/meta.yaml b/tutorial-registry/tutorials/decoupler-pseudobulk-de/meta.yaml index 00fa11d2..b787a309 100644 --- a/tutorial-registry/tutorials/decoupler-pseudobulk-de/meta.yaml +++ b/tutorial-registry/tutorials/decoupler-pseudobulk-de/meta.yaml @@ -2,7 +2,7 @@ name: Pseudo-bulk differential expression and functional analysis description: | This notebook showcases decoupler for pathway and TF enrichment on ~5k Blood myeloid cells from healthy and COVID-19 infected patients. -link: https://decoupler-py.readthedocs.io/en/latest/notebooks/pseudobulk.html +link: https://decoupler.readthedocs.io/en/latest/notebooks/scell/rna_psbk.html image: icon.png primary_category: scRNA-seq order: 30 diff --git a/tutorial-registry/tutorials/plotting-in-scanpy/meta.yaml b/tutorial-registry/tutorials/plotting-in-scanpy/meta.yaml index 5854a482..60bc3e6f 100644 --- a/tutorial-registry/tutorials/plotting-in-scanpy/meta.yaml +++ b/tutorial-registry/tutorials/plotting-in-scanpy/meta.yaml @@ -2,7 +2,7 @@ name: Plotting in scanpy description: | This tutorial explores the visualization possibilities of scanpy, including embeddings and the visualization of marker genes and differentially expressed genes. -link: https://scanpy-tutorials.readthedocs.io/en/latest/plotting/core.html +link: https://scanpy.readthedocs.io/en/stable/tutorials/plotting/core.html image: icon.png primary_category: Tips & Tricks order: 10 diff --git a/tutorial-registry/validate.py b/tutorial-registry/validate.py index d83e6207..faedaa37 100755 --- a/tutorial-registry/validate.py +++ b/tutorial-registry/validate.py @@ -4,12 +4,13 @@ from __future__ import annotations import argparse +import asyncio import json import shutil import sys from pathlib import Path from textwrap import dedent -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any, Literal, cast import httpx import jsonschema @@ -17,13 +18,15 @@ from PIL import Image if TYPE_CHECKING: - from collections.abc import Generator, Iterable, Mapping + from collections.abc import Iterable, Mapping, ValuesView + + Tutorial = Mapping[str, str | Iterable[str]] HERE = Path(__file__).absolute().parent -def _check_url_exists(url: str) -> None: - response = httpx.get(url) +async def _check_url_exists(client: httpx.AsyncClient, url: str) -> None: + response = await client.head(url) if response.status_code != 200: raise ValueError(f"URL {url} is not reachable (error {response.status_code}). ") @@ -47,23 +50,22 @@ def _check_image(img_path: Path) -> None: ) -def validate_tutorials(schema_file: Path, tutorials_dir: Path) -> Generator[dict, None, None]: +async def validate_tutorials(schema_file: Path, tutorials_dir: Path) -> ValuesView[Tutorial]: """Find all tutorial `meta.yaml` files in the tutorials dir and yield tutorial records.""" schema = json.loads(schema_file.read_bytes()) - known_links = set() + known_links: dict[str, Tutorial] = {} known_primary_to_orders: dict[str, set[int]] = {} for tmp_meta_file in tutorials_dir.rglob("meta.yaml"): tutorial_id = tmp_meta_file.parent.name with tmp_meta_file.open() as f: - tmp_tutorial = yaml.load(f, yaml.SafeLoader) + tmp_tutorial = cast("Tutorial", yaml.load(f, yaml.SafeLoader)) jsonschema.validate(tmp_tutorial, schema) - link = tmp_tutorial["link"] - if link in known_links: + if (link := tmp_tutorial["link"]) in known_links: raise ValueError(f"When validating {tmp_meta_file}: Duplicate link: {link}") - known_links.add(link) + known_links[link] = tmp_tutorial # Check for duplicate orders within the same primary category primary_category = tmp_tutorial.get("primary_category") @@ -81,14 +83,17 @@ def validate_tutorials(schema_file: Path, tutorials_dir: Path) -> Generator[dict known_primary_to_orders[primary_category].add(order) - _check_url_exists(link) - # replace image path by absolute local path to image img_path = tutorials_dir / tutorial_id / tmp_tutorial["image"] _check_image(img_path) tmp_tutorial["image"] = str(img_path) - yield tmp_tutorial + headers = {"User-Agent": "scverse tutorial registry (https://github.com/scverse/scverse-tutorials)"} + async with httpx.AsyncClient(headers=headers, http2=True) as client, asyncio.TaskGroup() as tg: + for link, tutorial in known_links.items(): + tg.create_task(_check_url_exists(client, link), name=tutorial["name"]) + + return known_links.values() def load_categories(categories_file: Path) -> dict[str, Any]: @@ -99,7 +104,7 @@ def load_categories(categories_file: Path) -> dict[str, Any]: def make_output( categories: Iterable[Mapping[str, Mapping[Literal["description"], str]]], - tutorials: Iterable[Mapping[str, str | Iterable[str]]], + tutorials: Iterable[Tutorial], *, outdir: Path | None = None, ) -> None: @@ -135,9 +140,9 @@ def make_output( json.dump(result, sys.stdout, indent=2) -def main(schema_file: Path, meta_dir: Path, categories_file: Path, *, outdir: Path | None = None): +def main(schema_file: Path, meta_dir: Path, categories_file: Path, *, outdir: Path | None = None) -> None: """Validate and create output directory.""" - tutorials = list(validate_tutorials(schema_file, meta_dir)) + tutorials = asyncio.run(validate_tutorials(schema_file, meta_dir)) categories = load_categories(categories_file) make_output(categories, tutorials, outdir=outdir)