diff --git a/.github/workflows/demo-deck.yml b/.github/workflows/demo-deck.yml new file mode 100644 index 0000000..6eefe80 --- /dev/null +++ b/.github/workflows/demo-deck.yml @@ -0,0 +1,71 @@ +name: Build & publish demo deck + +# Renders docs/demo/NDP-demo-presentation.md with Marp and publishes the slides +# as a GitHub Pages site (HTML, navigable in the browser) plus a PDF that is +# accessible from the published site at /NDP-demo-presentation.pdf. + +on: + push: + branches: [main, feature/179-demo-documentation] + paths: + - 'docs/demo/**' + - '.github/workflows/demo-deck.yml' + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: pages + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: '20' + + - name: Install Marp CLI + run: npm install -g @marp-team/marp-cli@latest + + - name: Prepare site/ (copy local assets so relative URLs work) + run: | + mkdir -p site + cp -r docs/demo/assets site/assets + # Screenshots may not all be present yet; copy what's there. + if [ -d docs/demo/screenshots ]; then + cp -r docs/demo/screenshots site/screenshots + fi + + - name: Render HTML (slides) + run: | + marp --allow-local-files --html \ + --output site/index.html \ + docs/demo/NDP-demo-presentation.md + + - name: Render PDF (also published next to the HTML) + run: | + marp --allow-local-files --pdf \ + --output site/NDP-demo-presentation.pdf \ + docs/demo/NDP-demo-presentation.md + + - name: Upload Pages artifact + uses: actions/upload-pages-artifact@v3 + with: + path: site + + deploy: + needs: build + runs-on: ubuntu-latest + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + steps: + - id: deployment + uses: actions/deploy-pages@v4 diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 0000000..dc22bcf --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,314 @@ +# Configuration reference (`.env`) + +NDP-EP is configured entirely through environment variables, normally placed in +a `.env` file next to `docker-compose.yml`. The annotated template is +[`example.env`](../example.env) — copy it to `.env` and edit. + +This page explains **every** variable: what it is, what it controls, where to get +its value, and whether you need it. Booleans are `True` / `False`. + +- **Required** = the Endpoint will not work correctly without it for the feature it + controls. +- **Optional** = safe to leave at its default. +- Unknown variables are ignored (settings allow extras), so a shared `.env` across + components is fine. + +### Minimum to get started (connect to the National Data Platform) + +You really only need to set: + +- `AUTH_API_URL` → the platform's AAI (so logins/roles work). +- `LOCAL_CATALOG_BACKEND` + its backend vars (`MONGODB_*` **or** `CKAN_*`) → where this EP stores its catalog. +- `CKAN_LOCAL_ENABLED=True` → if this EP should allow publishing (not read-only). +- `ORGANIZATION`, `EP_NAME` → how your EP identifies itself. + +Everything else turns optional features on/off. + +--- + +## General / API + +#### `ROOT_PATH` +*Optional · default: empty (served at `/`).* +URL path prefix when the EP runs behind a reverse proxy at a sub-path (e.g. +`/ep-api`). It makes the API and UI build correct links. **Where:** match the path +your reverse proxy mounts the EP at; leave empty if it is served at the domain root. + +#### `ORGANIZATION` +*Optional · default: `Unknown Organization`.* +Display name of the organization running this Endpoint (shown in the UI and +reported to Federation). **Where:** you choose it. + +#### `EP_NAME` +*Optional · default: `Unknown EP`.* +Display name of this Endpoint (UI + Federation registry). **Where:** you choose it. + +#### `IS_PUBLIC` +*Optional · default: `True`.* +Whether this Endpoint advertises itself as public. **Where:** set `False` for a +private/internal EP. + +#### `USE_JUPYTERLAB` / `JUPYTER_URL` +*Optional · defaults: `False` / `https://jupyter.org/try-jupyter/lab/`.* +Show a "JupyterLab" link in the UI and where it points. **Where:** set `USE_JUPYTERLAB=True` +and `JUPYTER_URL` to your JupyterLab instance if you offer one. + +#### `SWAGGER_TITLE` / `SWAGGER_DESCRIPTION` / `SWAGGER_VERSION` +*Advanced · usually leave default.* +Metadata shown on the `/docs` (Swagger) page. `SWAGGER_VERSION` tracks the release +and normally should not be overridden. + +--- + +## Authentication & access (AAI) + +#### `AUTH_API_URL` +*Required · default: `https://idp.nationaldataplatform.org/temp/information`.* +The AAI endpoint the EP calls to **validate bearer tokens** and read the user's +identity, groups and **roles**. This is the single source of authentication. +**Where:** your NDP/AAI administrator. For the central platform use the default; +for a self-hosted AAI use its `…/information` URL. + +#### `TEST_TOKEN` +*Optional · dev only · default: `testing_token`.* +A fixed token the EP accepts without contacting AAI, for local testing. +**Where:** you set it for dev. **Leave blank in production.** + +#### `ENABLE_GROUP_BASED_ACCESS` +*Optional · default: `False`.* +If `True`, write operations (POST/PUT/DELETE) additionally require the user to +belong to one of `GROUP_NAMES`. This is an extra gate on top of the role model. +**Where:** enable only if you want group-scoped writes. + +#### `GROUP_NAMES` +*Optional · default: empty.* +Comma-separated list of groups allowed to write when `ENABLE_GROUP_BASED_ACCESS=True` +(e.g. `admins,data-managers`). Matching is case-insensitive; empty + enabled = +all writes denied. **Where:** the group names defined in Keycloak/AAI — ask your +AAI administrator. + +> **Roles** (`viewer` / `writer` / `admin`) are **not** set here — they travel in +> the token issued by AAI. See [roles-and-permissions.md](roles-and-permissions.md). + +--- + +## Local catalog (where this EP stores its data) + +#### `LOCAL_CATALOG_BACKEND` +*Required · default: `ckan`.* +Backend for **this EP's own** catalog: `ckan` or `mongodb`. (The global and +Pre-CKAN catalogs always use CKAN regardless.) **Where:** you choose — `mongodb` +is the simplest to self-host; `ckan` if you already run a CKAN. + +#### `CKAN_LOCAL_ENABLED` +*Optional · default: `False`.* +Master switch for **write** operations on the local catalog (create/update/delete +organizations, datasets, resources), for **any** backend. `False` makes the EP +read-only. *(The name keeps "CKAN" for historical reasons; it applies to MongoDB +too.)* **Where:** set `True` if this EP should accept publishing. + +### CKAN backend — only if `LOCAL_CATALOG_BACKEND=ckan` + +#### `CKAN_URL` +*Required (CKAN backend) · default: `http://localhost:5000`.* +Base URL of your CKAN instance. **Where:** your CKAN deployment URL. + +#### `CKAN_API_KEY` +*Required (CKAN backend).* +API key used to write to CKAN. **Where:** in CKAN, log in → your user profile → +**API Tokens** → create a token. + +#### `CKAN_VERIFY_SSL` +*Optional · default: `True`.* +Verify CKAN's TLS certificate. **Where:** set `False` only for self-signed/dev +CKAN instances (less secure). + +#### `CKAN_GLOBAL_URL` +*Optional · default: `https://nationaldataplatform.org/catalog`.* +Read-only global NDP catalog the EP can search alongside the local one. +**Where:** usually leave the default. + +### MongoDB backend — only if `LOCAL_CATALOG_BACKEND=mongodb` + +#### `MONGODB_CONNECTION_STRING` +*Required (MongoDB backend) · default: `mongodb://localhost:27017`.* +MongoDB connection URI for the local catalog. **Where:** your MongoDB. With the +bundled Compose (`--profile mongodb`) use `mongodb://admin:admin123@mongodb:27017`. + +#### `MONGODB_DATABASE` +*Optional · default: `ndp_local_catalog`.* +Database name used for the local catalog. **Where:** you choose. + +### Pre-CKAN (optional staging catalog) + +#### `PRE_CKAN_ENABLED` +*Optional · default: `False`.* +Enable a Pre-CKAN staging target (publish to a review instance before the global +catalog). + +#### `PRE_CKAN_URL` / `PRE_CKAN_API_KEY` / `PRE_CKAN_VERIFY_SSL` / `PRE_CKAN_ORGANIZATION` +*Required only if `PRE_CKAN_ENABLED=True`.* +URL, API key (same place as `CKAN_API_KEY`, but on the Pre-CKAN instance), TLS +verification, and the organization all staged datasets are published under +(overrides their `owner_org`). **Where:** from whoever operates your Pre-CKAN +(e.g. the SDSC staging instance). + +--- + +## S3 / object storage (MinIO) + +#### `S3_ENABLED` +*Optional · default: `False`.* +Enable S3 object storage and the **S3 Management** tool in the UI (writers only). + +#### `S3_ENDPOINT` +*Required if `S3_ENABLED=True` · default: `localhost:9000`.* +`host:port` of the S3-compatible service. **Where:** your MinIO/S3 endpoint. With +the bundled Compose (`--profile s3`) use `minio:9000`. + +#### `S3_ACCESS_KEY` / `S3_SECRET_KEY` +*Required if `S3_ENABLED=True` · dev defaults: `minioadmin` / `minioadmin123`.* +Credentials for the S3 service. **Where:** your MinIO/S3 admin console (Access +Keys). The dev MinIO ships with `minioadmin` / `minioadmin123`. + +#### `S3_SECURE` +*Optional · default: `False`.* +`True` for HTTPS, `False` for HTTP. **Where:** `True` when your S3 endpoint serves +TLS. + +#### `S3_REGION` +*Optional · default: `us-east-1`.* +Region label sent to the S3 API. **Where:** match your provider; the default is +fine for MinIO. + +--- + +## Streaming (Kafka) + +#### `KAFKA_CONNECTION` +*Optional · default: `False`.* +Enable Kafka connectivity (managing/streaming topics). + +#### `KAFKA_HOST` / `KAFKA_PORT` +*Required if `KAFKA_CONNECTION=True` · defaults: `localhost` / `9092`.* +Kafka broker address. **Where:** your broker. With the bundled Compose +(`--profile kafka`) use `kafka` and `9093` (internal) / `9092` (external). + +#### `KAFKA_PREFIX` +*Optional · default: `data_stream_`.* +Prefix applied to topics the EP manages. **Where:** you choose. + +#### `MAX_STREAMS` +*Optional · default: `10`.* +Maximum number of concurrent managed streams. + +--- + +## Affinities integration + +#### `AFFINITIES_ENABLED` +*Optional · default: `False`.* +When `True`, datasets/services created here are auto-registered in Affinities +(non-blocking — the EP keeps working if Affinities is down). + +#### `AFFINITIES_URL` +*Required if `AFFINITIES_ENABLED=True`.* +Base URL of the Affinities API (e.g. `http://affinities-api:8000`). **Where:** the +platform's Affinities URL, or your local one. + +#### `AFFINITIES_EP_UUID` +*Required if `AFFINITIES_ENABLED=True`.* +This Endpoint's UUID inside Affinities. **Where:** the endpoint's `uid` in +Affinities — list/create it via `GET`/`POST /ep` on the Affinities API, or from +the **Endpoints** page of the Affinities web app. See +[affinities-integration.md](affinities-integration.md). + +#### `AFFINITIES_TIMEOUT` +*Optional · default: `30`.* +HTTP timeout (seconds) for Affinities calls. + +--- + +## Federation (metrics reporting) + +#### `METRICS_ENDPOINT` +*Optional · default: `https://federation.ndp.utah.edu/metrics/`.* +The Federation endpoint this EP periodically reports health/usage metrics to, +which is how the EP becomes discoverable in the federation. **Where:** the +platform's Federation `/metrics/` URL, or your local Federation when self-hosting. + +#### `METRICS_INTERVAL_SECONDS` +*Optional · default: `3300` (55 min).* +How often metrics are reported. + +--- + +## Pelican federation (external data access) + +#### `PELICAN_ENABLED` +*Optional · default: `False`.* +Enable browsing/downloading from Pelican federations (OSDF, etc.) and support for +`pelican://` resource URLs. + +#### `PELICAN_FEDERATION_URL` +*Optional · default: empty (uses OSDF).* +Default Pelican federation, format `pelican://host` (e.g. `pelican://osg-htc.org`). +**Where:** the federation you target; leave empty for OSDF. + +#### `PELICAN_DIRECT_READS` +*Optional · default: `False`.* +Read straight from origin servers instead of caches. **Where:** keep `False` for +better performance unless you have a reason. + +--- + +## Remote execution (Rexec) + +#### `REXEC_CONNECTION` +*Optional · default: `False`.* +Enable the Remote Execution Deployment API integration. + +#### `REXEC_DEPLOYMENT_API_URL` +*Required if `REXEC_CONNECTION=True`.* +URL of the Remote Execution Deployment API. **Where:** from whoever operates that +service. + +--- + +## Access requests + +#### `ENABLE_ACCESS_REQUESTS` +*Optional · default: `False`.* +Enable the access-request workflow (a user requests access; an admin +approves/rejects). **Requires MongoDB** reachable via `MONGODB_CONNECTION_STRING`. +**Where:** turn on if you want self-service access requests. + +#### `ACCESS_REQUESTS_COLLECTION` +*Optional · default: `access_requests`.* +MongoDB collection used to store access requests. + +--- + +## Telemetry — OpenTelemetry (advanced) + +#### `OTEL_ENABLED` +*Optional · default: `False`.* Enable OpenTelemetry tracing. + +#### `OTEL_SERVICE_NAME` +*Optional · default: `ep-api`.* Service name shown in traces. + +#### `OTEL_EXPORTER_TYPE` +*Optional · default: `console`.* Where traces go: `console`, `otlp`, or `none`. + +#### `OTEL_OTLP_ENDPOINT` +*Optional · default: `http://localhost:4317`.* OTLP collector endpoint (when +`OTEL_EXPORTER_TYPE=otlp`). **Where:** your collector's address. + +#### `OTEL_OTLP_INSECURE` +*Optional · default: `True`.* Use a non-TLS OTLP connection. + +--- + +> **Tip:** start from [`example.env`](../example.env) and override only what your +> deployment needs. When in doubt about a value for a shared platform service +> (AAI, Affinities, Federation), ask your NDP administrator. diff --git a/docs/demo/NDP-demo-presentation.md b/docs/demo/NDP-demo-presentation.md new file mode 100644 index 0000000..d428456 --- /dev/null +++ b/docs/demo/NDP-demo-presentation.md @@ -0,0 +1,913 @@ +--- +marp: true +title: NDP — From zero to a federated, secure dataset +author: Raul Bardaji +paginate: true +theme: default +class: lead +--- + +" are speaker notes for whoever presents. +--> + + + +# National Data Platform (NDP) +## From zero to a federated, secure dataset + +A guided demo of every component and how they work together + + + +--- + +## What is NDP? + +A platform to **publish, discover and share data** across institutions. + +- Each institution runs its own **Endpoint (EP)**: its data catalog. +- EPs are **federated**: discovered and shared through a central registry. +- All with shared **identity and permissions** and, optionally, over a + **secure private network**. + +> Key idea: **distributed data, unified discovery.** + + + +--- + +## Platform components + +| Piece | What it is for | How it looks | +|---|---|---| +| **AAI** (Keycloak) | Who you are (login, users) | Login screen | +| **Affinities** | Relationships between datasets, services and endpoints | Relationships web app | +| **NDP-EP** | Your catalog: datasets, resources, storage | Endpoint web app | +| **Federation** | Central registry of all EPs | Federation web app | +| **Python library** | Do the same from code / automate | Notebook / script | +| **NetBird** (bonus) | Secure private network between machines | Network dashboard | + +--- + +## Component interactions + +![w:1080](assets/diagrams/component-interactions.svg) + + + +--- + +## Component interactions — step by step + +1. **Sign in** — the user authenticates through **AAI** (Keycloak), which also carries their **role** (viewer / writer / admin). +2. **Use the Endpoint** — with that token they publish and search in the **NDP-EP**, backed by **CKAN** (catalog) and **MinIO / S3** (storage). +3. **Register relationships** — the EP registers its datasets and services in **Affinities**, a non-blocking graph of how data, services and endpoints relate. +4. **Federate** — the EP reports to **Federation** (central registry + health & metrics). +5. **Secure transport** — all of it can run over a private, encrypted **NetBird** network. + + + +--- + +## Overview + +> A new user is granted access, publishes a dataset, performs the same tasks from +> code, and the dataset becomes discoverable across the federation — all securely. + +**Steps:** +1. Installation +2. Identity and permissions (sign in and get a role) +3. The Endpoint in action (publish and search from the web) +4. Automate with the Python library +5. Federation (the data is discovered elsewhere) +6. 🔒 Bonus: secure network with NetBird + +--- + +# Step 1 — Installation + +--- + +## Two ways to install + +**🟢 Most users — the NDP-EP only** +Run your own **Endpoint** and connect it to the **National Data Platform**, which +already provides identity (**AAI**), **Affinities** and **Federation**. +→ install **one** component. + +**🧪 Full stack — development / testing** +Run all components locally, with no dependency on the central NDP. +→ install **all** components. + +> The common case is covered first; the full stack follows. + +--- + +## Before you install — prerequisites + +Have these ready; the installer writes them into `.env`: + +- **AAI endpoint** — the AAI `…/information` URL, so logins and roles work. +- **Catalog backend** — a MongoDB (Compose can start it) **or** a reachable **CKAN** instance with an admin **API token**. +- **EP_UUID** — required only when using Affinities or per-EP roles (`group::…`). It is the Endpoint's `uid` in Affinities, used as `AFFINITIES_EP_UUID`. → see appendix *Obtaining the EP_UUID*. +- **Object storage** — optional, only for S3 features. + +> In the common case, the platform operators provide the AAI endpoint and, if +> applicable, the `EP_UUID`. + +--- + +## Install the NDP-EP (the common case) + +> ⚠️ **For system administrators.** Installation involves Docker, networking and +> environment configuration — it should be done by someone with sysadmin skills. + +```bash +git clone https://github.com/national-data-platform/ep-api.git +cd ep-api +cp example.env .env # configure your deployment +docker compose up -d # the Endpoint only +``` + +> The CKAN backend requires an existing, reachable CKAN instance and an administrator API token (`CKAN_API_KEY`). The MongoDB backend has no such prerequisite. + +> Run only the Endpoint, or add local backends with **Compose profiles** (next slide). + +> 📖 `.env` variables are explained in **`docs/configuration.md`** (template: `example.env`). + +--- + +## Compose profiles — core backends + +`docker compose --profile up -d` — combine as many as you need: + +| Profile | What it starts | +|---|---| +| *(none)* | **NDP-EP only** (API + web UI) | +| `mongodb` | MongoDB + Mongo Express (local catalog DB) | +| `s3` | MinIO (S3-compatible object storage) | +| `kafka` | Kafka + Zookeeper + Kafka UI (streaming) | + +--- + +## Compose profiles — extras + +| Profile | What it starts | +|---|---| +| `jupyter` | JupyterLab | +| `pelican` | Pelican federation (registry, director, origin, cache) | +| `full` | All backends above | + + + +--- + +## What you operate vs. what the platform provides + +| 🛠️ You operate (your Endpoint) | ☁️ Shared by the platform | +|---|---| +| **NDP-EP** — API + web UI | **AAI** — identity & roles | +| **Catalog database** — CKAN or MongoDB | **Affinities** — relationship registry | +| **Object storage** — MinIO / S3 *(optional)* | **Federation** — registry & discovery | + + + + +--- + +# Full stack (development / testing) +### Only if you want the whole system locally + + + +--- + +## Startup order (full stack) + +``` +1) AAI (Keycloak) → identity first, everything depends on it +2) Affinities → relationships (data · services · endpoints) +3) Federation → central registry +4) NDP-EP (+ backends) → catalog, connects to AAI and Federation + backends: CKAN · MongoDB · MinIO (S3 storage) +``` + +Each component starts the same way: enter its folder and `docker compose up -d`. + + + +--- + +## 1) Start AAI (identity) + +```bash +git clone https://github.com/sci-ndp/ndp-keycloak-aai-old.git +cd ndp-keycloak-aai-old +cp .env_template .env # set admin user/password + domain +# place fullchain.pem & privkey.pem in SSL/certificates/ (TLS) +docker compose up -d --build +``` + +**What you will see:** the Keycloak admin console and the NDP login screen. + + + + +--- + +## 2) Start Affinities (relationship registry) + +```bash +git clone https://github.com/sci-ndp/ndp-affinities.git +cd ndp-affinities +cp .env.example .env # optional: customize DB user/password +docker compose up -d +``` + +**What you will see (default URLs):** +- API: `http://localhost:8000/docs` +- **Affinities web app**: `http://localhost:3000` +- Database admin (pgAdmin): `http://localhost:5050` + + + +--- + +## 3) Start Federation (central registry) + +```bash +git clone https://github.com/sci-ndp/ndp-federation.git +cd ndp-federation +cp .env.example .env # set ADMIN_PASSWORD +docker compose up -d +``` + +**What you will see:** +- Web: `http://localhost:8020/ui/` +- API & docs: `http://localhost:8020/docs` + + + +--- + +## 4) Start the NDP-EP (+ backends) + +Identical to the common-case install. Configure `.env` to reference the local +AAI, Affinities and Federation instances, and start the data backends with a +Compose profile. + +```bash +docker compose --profile full up -d # Endpoint + MongoDB + MinIO + Kafka +``` + +**What you will see:** the Endpoint web app at `…/ep-api/ui/`, now wired to your local stack. + +--- + +## ✅ Check: everything is up + +```bash +docker ps # all containers "Up / healthy" +``` + +The NDP-EP is now reachable two ways: + +- **Web UI** — `…/ep-api/ui/` +- **HTTP API** — `…/ep-api/` (interactive docs at `…/ep-api/docs`) + + + + +--- + +# Step 2 — Identity and permissions +### A user signs in and gets a role + +--- + +## Bootstrap the first admin + +The Endpoint has **no user store** — identity and roles come from **AAI (Keycloak)**. +How the first admin is created depends on the deployment: + +- **🟢 NDP infrastructure (common case)** — register your Endpoint through the NDP + platform's onboarding process. It provisions the stack and your admin access; + the platform operators manage identity. +- **🧪 Full stack (self-hosted)** — you assign the admin role yourself in your own + Keycloak (next slide). + + + +--- + +## Bootstrap the first admin — full stack + +Self-hosted only. In your Keycloak (realm **NDP**) — granting roles from the EP UI +or the AAI API requires an existing admin, so the first one is set here: + +1. Create the user and set a password. +2. Assign the realm role **`ndp_admin`** (platform-wide), or **`group::admin`** for this Endpoint only (`EP_UUID` → see appendix *Obtaining the EP_UUID*). + +That user can then sign in and manage everyone else via the AAI API / EP. + + + +--- + +## Where users come from (AAI) + +Depends on the deployment: + +- **🟢 NDP infrastructure (common case)** — users are existing **nationaldataplatform.com** accounts; they sign in with their NDP identity. You do not create them. +- **🧪 Full stack (self-hosted)** — create them in your own **Keycloak** (Users → Add user → set password). → see appendix *Creating a user — Keycloak*. + +> A user alone **cannot publish anything yet** — they still need a **role**. + +--- + +## Requesting access (user) + +A new user has **no role**, so the Endpoint denies access — but offers a +**Request access** form with an optional justification. + +> Requires `ENABLE_ACCESS_REQUESTS=True`. + +--- + + + +![h:500](screenshots/22-request-access.png) + +--- + +## Approving access (admin) + +On the **Access Requests** page, an admin reviews pending requests and **approves** +each with a tier — **Viewer**, **Writer** or **Admin** — or **rejects** it. + +> Approval assigns the role; the user re-logs in to pick it up. + +--- + + + +![h:500](screenshots/23-access-requests-approve.png) + +--- + +## The three roles + +Roles come from **AAI** and are hierarchical (each tier includes the ones below): + +| Role | Can do | +|---|---| +| 👁️ **Viewer** | View and search data. **Read-only.** | +| ✏️ **Writer** | The above **+ create/edit** datasets, resources, and **S3 management**. | +| 🛠️ **Admin** | All of the above **+ administration** (dashboard, access requests). | + +> With no role assigned, a user can only see public data. **Secure by default.** + + + +--- + +# Step 3 — The Endpoint in action +### Search, publish and manage from the web + +--- + +## Search — the landing page + +The home page is **Search**, available to **everyone** (including viewers). +Free-text search across **name, description and keywords**. + +--- + +## Search — options + +- **Category** — All · Datasets · Services · Organizations +- **Catalog** — **Local** (this Endpoint) or **Global** (the federated NDP catalog) +- **Organization** filter, and **Yours** (only items you own) +- On your own items: **Publish** and **Delete** actions (role/ownership-gated) + +--- + + + +![h:500](screenshots/30-search-ui.png) + +--- + +## The "+ New" menu + +Available to **writers and admins**. Six creation flows, in two groups: + +| Kind | What it is | +|---|---| +| **Organization** | Top-level group that owns datasets and services | +| **Dataset** | Logical container of related resources, owned by an organization | +| **Service** | Network-accessible service (REST API, app, etc.) owned by an organization | +| **URL resource** | Link to a file or service (CSV, JSON, NetCDF, …) | +| **S3 resource** | Object in S3-compatible storage | +| **Kafka topic** | Streaming data flow | + +--- + +## "+ New" — Organization + +A top-level **group** that owns datasets and services. + +- **`name`** — Unique slug used as the organization ID and in URLs (lowercase letters, digits, `_`, `-`). + *Example:* `atmospheric-research` +- **`title`** — Human-readable display title shown across the UI. + *Example:* `Atmospheric Research Lab` +- **`description`** *(opt.)* — Free-text description shown on the organization page. + *Example:* `Group publishing radar and atmospheric datasets for research.` + +--- + +## "+ New" — Dataset (required) + +A logical container of related **resources**, owned by an organization. + +- **`name`** — Unique slug for the dataset; appears in the URL (lowercase, alphanumeric, `_`, `-`). + *Example:* `nexrad-reflectivity-2025` +- **`title`** — Human-readable title displayed on the dataset page. + *Example:* `NEXRAD reflectivity composites, 2025` +- **`owner_org`** — ID of the organization that owns this dataset (must already exist). + *Example:* `atmospheric-research` + +--- + +## "+ New" — Dataset (optional, 1/2) + +Describe the dataset and make it findable. + +- **`notes`** — Longer description / notes shown on the dataset page. + *Example:* `Hourly NEXRAD Level-II reflectivity composites over CONUS, 2025-01 to 2025-12.` +- **`tags`** — Short keywords used to categorize and search. + *Example:* `["radar", "nexrad", "reflectivity", "2025"]` +- **`groups`** — Catalog **groups** (collections of datasets) the dataset belongs to. + *Example:* `["weather", "remote-sensing"]` +- **`license_id`** — License identifier (CKAN license slug). + *Example:* `cc-by` +- **`version`** — Free-text version label. + *Example:* `v1.2.0` + +--- + +## "+ New" — Dataset (optional, 2/2) + +Extra metadata, embedded resources and visibility. + +- **`extras`** — Free-form metadata as key/value pairs. + *Example:* `{"region": "CONUS", "instrument": "NEXRAD"}` +- **`resources`** — Resources attached at creation (each `{url, name, format?, …}`). + *Example:* `[{"url": "https://data.example.org/radar/2025-01.nc", "name": "jan-2025", "format": "NetCDF"}]` +- **`private`** *(default `false`)* — Whether the dataset is private (only visible to its org). + *Example:* `false` + +--- + +## "+ New" — Service (required) + +A network-accessible **service** (REST API, web app, etc.) registered under the `services` org. + +- **`service_name`** — Unique service slug (1–100 chars). + *Example:* `radar-stats-api` +- **`service_title`** — Display title (1–200 chars). + *Example:* `Radar Statistics API` +- **`owner_org`** — Organization ID; **must be `services`** (all services live there). + *Example:* `services` +- **`service_url`** — URL where the service is reachable (`http(s)://…`). + *Example:* `https://api.atmospheric-research.org/radar/stats` + +--- + +## "+ New" — Service — `service_type` *(optional)* + +What kind of service this is — used by the UI to label and filter. +The UI offers three canonical options plus a free-text fallback (≤ 50 chars). + +- **API** — programmatic interface (REST/HTTP, GraphQL, gRPC…) called by code. + *Example:* `API` +- **UI** — human-facing interface (web app, dashboard, viewer…) opened in a browser. + *Example:* `UI` +- **Trigger** — event source / scheduled job (webhook, cron, producer…) that runs on its own. + *Example:* `Trigger` +- *…or any custom free-text value when none of the above applies.* + +--- + +## "+ New" — Service (other optional) + +Round out the service entry. + +- **`notes`** — Description or additional notes. + *Example:* `RESTful API exposing aggregated reflectivity statistics from the NEXRAD datasets.` +- **`health_check_url`** — URL of a health endpoint (`http(s)://…`) used for liveness checks. + *Example:* `https://api.atmospheric-research.org/radar/stats/health` +- **`documentation_url`** — URL to the service documentation. + *Example:* `https://docs.atmospheric-research.org/radar-stats` +- **`extras`** — Free-form metadata as key/value pairs. + *Example:* `{"version": "2.1.0", "environment": "production"}` + +--- + +## "+ New" — URL resource (required) + +A **link to a file or service** registered as a resource of a dataset. + +- **`resource_name`** — Unique slug for the resource (lowercase, alphanumeric, `_`, `-`). + *Example:* `radar-jan-2025` +- **`resource_title`** — Display title. + *Example:* `Radar reflectivity — January 2025` +- **`owner_org`** — Organization that owns the resource. + *Example:* `atmospheric-research` +- **`resource_url`** — URL of the file or service (must start with `http(s)://`). + *Example:* `https://data.example.org/nexrad/2025-01.nc` + +--- + +## "+ New" — URL resource (`file_type` & processing) + +Help the catalog interpret the content of the URL. + +- **`file_type`** — `stream`, `CSV`, `TXT`, `JSON`, `NetCDF`, or a custom value. + *Example:* `CSV` +- **`processing`** *(type-specific)* — how to read the file: + - **CSV:** `{"delimiter": ",", "header_line": 1, "start_line": 2}` + - **JSON:** `{"data_key": "results"}` + - **NetCDF:** `{"group": "/radar"}` + +--- + +## "+ New" — URL resource (other optional) + +- **`mapping`** — Field mapping: which fields to expose and how to rename them. + *Example:* `{"refl": "reflectivity_dBZ", "ts": "timestamp"}` +- **`notes`** — Additional notes about the resource. + *Example:* `Hourly NetCDF files; missing values flagged with -9999.` +- **`extras`** — Free-form metadata as key/value pairs. + *Example:* `{"region": "CONUS", "cadence": "1h"}` + +--- + +## "+ New" — S3 resource (identification) + +An **object in S3-compatible storage** registered as a resource. + +- **`resource_name`** — Unique slug (lowercase, alphanumeric, `_`, `-`). + *Example:* `radar-archive-2025` +- **`resource_title`** — Display title. + *Example:* `NEXRAD radar archive, 2025` +- **`owner_org`** — Organization ID that owns the resource. + *Example:* `atmospheric-research` + +--- + +## "+ New" — S3 resource (S3 details) + +- **`resource_s3`** — S3 URL of the object (`s3://bucket/path`, or `http(s)://…`). + *Example:* `s3://nexrad-archive/2025/` +- **`notes`** *(required; may be empty)* — Notes about the resource. + *Example:* `Annual archive of NEXRAD Level-II composites, partitioned by month.` +- **`extras`** *(opt.)* — Free-form metadata as key/value pairs. + *Example:* `{"format": "NetCDF", "size_GB": 480}` + +--- + +## "+ New" — Kafka topic (required) + +A **streaming data flow** registered as a system dataset. + +- **`dataset_name`** — Unique slug for the dataset entry. + *Example:* `nexrad-live` +- **`dataset_title`** — Display title. + *Example:* `NEXRAD radar — live stream` +- **`owner_org`** — Organization that owns the dataset. + *Example:* `atmospheric-research` +- **`dataset_description`** — Description of the stream. + *Example:* `Live JSON feed of NEXRAD radar volume scans, ~5 min cadence.` + +--- + +## "+ New" — Kafka topic (broker) + +Point at the broker and the topic. + +- **`kafka_topic`** — Kafka topic name. + *Example:* `nexrad.live` +- **`kafka_host`** — Kafka broker host. + *Example:* `kafka.atmospheric-research.org` +- **`kafka_port`** — Broker port (1–65535). + *Example:* `9092` + +--- + +## "+ New" — Kafka topic (options) + +Shape the messages. + +- **`mapping`** *(opt.)* — Field mapping (select/rename fields to send). + *Example:* `{"refl": "reflectivity_dBZ", "ts": "timestamp"}` +- **`processing`** *(opt.)* — Processing config. + *Example:* `{"data_key": "data", "info_key": "metadata"}` +- **`extras`** *(opt.)* — Free-form metadata. + *Example:* `{"avg_msgs_per_min": 12, "schema": "https://schemas.example.org/radar.json"}` + + + +--- + +## Storage management (S3) — writers only + +Manage **buckets** and **objects** in S3-compatible storage from the UI. + +**Requires:** +- `S3_ENABLED=True` in `.env`, plus `S3_ENDPOINT`, `S3_ACCESS_KEY`, `S3_SECRET_KEY` (and optionally `S3_SECURE`, `S3_REGION`). +- **Writer or admin** role — the menu entry is hidden otherwise; the API returns `403` to read-only users. + + + +--- + +## S3 Management — buckets + +A list view (Bucket name · Created · Actions) with a filter to find buckets quickly. + +- **Create** a bucket (name + region). +- **List** all buckets with creation dates. +- **Search / filter** by name. +- **View** bucket information and metadata. +- **Delete** a bucket (must be empty — no objects). +- **Refresh** the list. + +--- + +## S3 Management — objects + +Select a bucket → manage its objects (Name · Size · Last modified · Type). + +- **Upload** files via drag-and-drop or the file picker. +- **List** objects in the bucket. +- **Search** by prefix / path. +- **Download** an object to your computer. +- **View** detailed object **metadata**. +- **Generate a presigned URL** — a temporary, authenticated link for secure sharing. +- **Delete** individual objects. + +--- + +# Step 4 — Automate with Python +### The same operations, from code + + + +--- + +## The `ndp-ep` library + +Every web-app operation is also available **from code** — suitable for automation +and bulk loading. + +```bash +pip install ndp-ep +``` + +> Intended for researchers and teams that load data programmatically. + +--- + +## Example: in a few lines + +```python +from ndp_ep import APIClient + +# 1. Connect to the Endpoint with your token +client = APIClient(base_url="https://my-endpoint/ep-api", token="…") + +# 2. List organizations +print(client.list_organizations()) + +# 3. Create a dataset and search for it +client.create_dataset(name="measurements-2026", owner_org="my-org") +print(client.search_datasets("measurements")) +``` + + + + +--- + +## Web and code: a unified interface + +``` + Web (click) ─┐ + ├─► the SAME Endpoint ─► the SAME catalog + Python (code) ─┘ +``` + +> The web interface and the library target the same Endpoint: **identical data and permissions.** + +--- + +# Step 5 — Federation +### The data is discovered elsewhere + +--- + +## The Endpoint registers + +Each Endpoint registers with **Federation**. The central registry then tracks its +**status** and **metrics**. + + + +--- + +## Health and metrics + +The federation web app reports which Endpoints are **online**, since when, and +their activity. + + + +--- + +## What the Endpoint reports + +Every `METRICS_INTERVAL_SECONDS` (default **55 min**) — and only when `IS_PUBLIC=True` — the Endpoint posts a JSON payload to `METRICS_ENDPOINT`: + +**Identity & version** +- `organization`, `ep_name`, `version` (EP API version), `public_ip`, `timestamp`. + +**Catalog activity** +- `num_datasets`, `num_services`, `services` (list of service titles). + +**Host load** +- `cpu` (%), `memory` (`used/total GB`), `disk` (`used/total GB`). + +--- + +## What the Endpoint reports — infrastructure flags + +Which features this Endpoint exposes — booleans, plus a few details when enabled. + +- `jupyterlab_enabled` *(if true: `jupyterlab_url`)* +- `kafka_enabled` *(if true: `kafka_host`, `kafka_port`)* +- `s3_enabled` +- `pre_ckan_enabled` + +> No tokens, user data or dataset content are sent — only counts, infrastructure flags and host load. Federation drops the report if it cannot reach the endpoint, so the EP keeps working when the federation is down. + +--- + +## 🔒 Bonus — NetBird + +**[NetBird](https://netbird.io)** — open-source mesh VPN built on **WireGuard**. + +- Each machine joins a private virtual network and gets a **stable private IP**. +- Traffic flows **directly and encrypted** between authorized peers; access is restricted by policy. +- **No public ports** are exposed for the services themselves. + +**Why it matters for NDP:** when the Endpoint and the platform components run on different machines, NetBird connects them over a single private overlay — no per-service firewall opening. + +--- + +## Resources + +- **Endpoint (web):** `…/ep-api/ui/` · **API:** `…/ep-api/docs` +- **Federation:** `…:8020/ui/` +- **Affinities:** `…:3000` +- **Python library:** `pip install ndp-ep` · PyPI: `ndp-ep` +- **Repos:** `ep-api`, `ndp-federation`, `ndp-affinities`, `ndp-keycloak-aai-old`, `ndp-ep-py`, `netbird-ndp` + +--- + +# Appendix + +--- + +## Obtaining the EP_UUID — Affinities web app + +In the Affinities web app (`http://localhost:3000`, or your Affinities URL): +**Endpoints → Add Endpoint**, fill the form (**Kind** = `ndp-ep`, **URL**, optional +**Metadata**), then **Save**. The new row's **UID** is your `AFFINITIES_EP_UUID`. + +![h:300](screenshots/A1-affinities-add-endpoint.png) + +--- + +## Obtaining the EP_UUID — Affinities API + +**Via the Affinities API** (`http://localhost:8000`, Swagger at `/docs`): + +```bash +# list endpoints and copy your uid +curl http://localhost:8000/ep + +# or register this endpoint; the response includes "uid" +curl -X POST http://localhost:8000/ep -H 'Content-Type: application/json' \ + -d '{"kind":"ndp-ep","url":"https://","metadata":{"name":"My EP"}}' +``` + +The returned `uid` is your `AFFINITIES_EP_UUID`. + +--- + +## Creating a user — Keycloak + +In the Keycloak admin console, realm **NDP**: + +1. **Users → Add user** — set **Username** and the required profile fields (email, first/last name), then **Create**. +2. Open the user → **Credentials → Set password** — turn **Temporary** off, then **Save**. + +![h:280](screenshots/A2-keycloak-create-user.png) + +--- + +## Assigning groups & roles — AAI API + +After the user exists in Keycloak, assign groups/roles via the **AAI API** +(the caller must already be an admin): + +```bash +# authenticate as an admin +TOKEN=$(curl -s -X POST "$AAI/user/login" -H 'Content-Type: application/json' \ + -d '{"username":"","password":""}' | jq -r .access_token) + +# join the EP group (assigned the viewer role automatically) +curl -s -X POST "$AAI/group/add-user" -H "Authorization: Bearer $TOKEN" \ + -H 'Content-Type: application/json' \ + -d '{"group_name":"","username":""}' +``` + +--- + +## Assigning groups & roles — AAI API (cont.) + +Upgrade the tier (bare name: `viewer` | `writer` | `admin`): + +```bash +curl -s -X POST "$AAI/role/assign" -H "Authorization: Bearer $TOKEN" \ + -H 'Content-Type: application/json' \ + -d '{"groupName":"","roleName":"writer","username":""}' +``` + +> **First admin exception:** assign `ndp_admin` **directly in Keycloak** — no admin +> exists yet to call this API. The user must re-login for new roles to take effect. + +--- + +## Tokens stored at NDP-managed onboarding + +When the Endpoint is installed via the **nationaldataplatform.com** registration +(NDP infrastructure case), the onboarding process also **stores the access tokens +for the various catalogs** (CKAN sysadmin token, Pre-CKAN API key, etc.) on the +host — typically in each component's `.env` and a summary `user_info.env` — so +the Endpoint can read and write to them without manual token setup. diff --git a/docs/demo/README.md b/docs/demo/README.md new file mode 100644 index 0000000..97d7e4a --- /dev/null +++ b/docs/demo/README.md @@ -0,0 +1,70 @@ +# NDP demo — presentation & self-guided tutorial + +End-to-end material that walks through the whole NDP system (installation, web +usage, the Python library, federation and the secure network), aimed at **end +users and administrators**. + +## Files + +- `NDP-demo-presentation.md` — the presentation in **Marp** format. It doubles as + a self-guided tutorial: each step states what to do and what you will see. +- `assets/` — brand header/footer images (NDP logo + partner logos) reused from + the official `ndp ep - presentation.pptx`. Applied to every slide via CSS. +- `screenshots/` — drop the screenshots here (see the checklist below). + +## Turning it into slides + +**Option A — VS Code (easiest):** install the **"Marp for VS Code"** extension, +open `NDP-demo-presentation.md` and click the preview icon. From there you can +export to **PDF**, **PPTX** (PowerPoint) or **HTML**. + +**Option B — command line (Marp CLI):** + +```bash +# --allow-local-files is required because the brand header/footer use local images +npx @marp-team/marp-cli --allow-local-files NDP-demo-presentation.md -o NDP-demo-presentation.pdf +npx @marp-team/marp-cli --allow-local-files NDP-demo-presentation.md --pptx -o NDP-demo-presentation.pptx +npx @marp-team/marp-cli NDP-demo-presentation.md -o NDP-demo-presentation.html +``` + +> Run these from inside `docs/demo/` so the `assets/...` paths resolve. + +## Screenshots to capture + +Each `[📸 screenshots/NN-name.png …]` placeholder in the presentation maps to one +screenshot. Checklist: + +**Installation** +- [ ] `10-keycloak-login.png` — NDP login (Keycloak) +- [ ] `11-keycloak-admin.png` — Keycloak admin console (realm NDP) +- [ ] `12-affinities-frontend.png` — Affinities web app (relationships graph) +- [ ] `13-federation-ui.png` — federation web app (still empty) +- [ ] `14-ep-home.png` — Endpoint home page (search) +- [ ] `15-docker-ps.png` — `docker ps` with everything "Up" + +**Identity and permissions** +- [ ] `19-keycloak-assign-ndp-admin.png` — assigning the `ndp_admin` realm role in Keycloak (first admin, full stack) +- [ ] `22-request-access.png` — user's "Request access" form (no role yet) +- [ ] `23-access-requests-approve.png` — admin Access Requests page approving with a tier + +**Endpoint (web)** +- [ ] `30-search-ui.png` — Search page with options (category, catalog, filters) +- [ ] `33-create-resource.png` — example of a "+ New" creation form +- [ ] `34-search-results.png` — results with the dataset +- [ ] `36-s3-management.png` — S3 Management tool + +**Python** +- [ ] `40-notebook.png` — notebook running the library + +**Federation** +- [ ] `50-federation-ep-registered.png` — the EP in the federation +- [ ] `51-federation-health.png` — health/metrics + +**Appendix** +- [ ] `A1-affinities-add-endpoint.png` — Affinities "Add Endpoint" form (obtaining the EP_UUID) +- [ ] `A2-keycloak-create-user.png` — Keycloak: create user + set password (bootstrap) + +## Notes + +- Presentation text (and speaker notes ``) are in **English**. +- This material is written and refined incrementally (see issue #179). diff --git a/docs/demo/assets/diagrams/component-interactions.svg b/docs/demo/assets/diagrams/component-interactions.svg new file mode 100644 index 0000000..4c9e7c6 --- /dev/null +++ b/docs/demo/assets/diagrams/component-interactions.svg @@ -0,0 +1,106 @@ + + + + + + + + + + + + + + + + + + + + + + + + log in + token (identity + role) + catalog + storage + registers data & services + register · report metrics + + + + + + + + + Data User + + + + + + + + + AAI · Identity + Keycloak · tokens + roles + + + + + + + + NDP-EP + Endpoint · REST API + datasets · search · storage + + + + + + + + + + + + CKAN · MinIO (S3) + catalog & object storage + + + + + + + + + Affinities + relationships graph + + + + + + + + + Federation + endpoint registry · health + + + + + + + + + All of this can run over a private, encrypted NetBird network + + diff --git a/docs/demo/assets/footer-left.png b/docs/demo/assets/footer-left.png new file mode 100644 index 0000000..7f90de9 Binary files /dev/null and b/docs/demo/assets/footer-left.png differ diff --git a/docs/demo/assets/footer-right.png b/docs/demo/assets/footer-right.png new file mode 100644 index 0000000..8998ccd Binary files /dev/null and b/docs/demo/assets/footer-right.png differ diff --git a/docs/demo/assets/header-logo.png b/docs/demo/assets/header-logo.png new file mode 100644 index 0000000..28be64b Binary files /dev/null and b/docs/demo/assets/header-logo.png differ diff --git a/docs/demo/assets/icons/aai.svg b/docs/demo/assets/icons/aai.svg new file mode 100644 index 0000000..b965e17 --- /dev/null +++ b/docs/demo/assets/icons/aai.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/docs/demo/assets/icons/affinities.svg b/docs/demo/assets/icons/affinities.svg new file mode 100644 index 0000000..3bfbf2b --- /dev/null +++ b/docs/demo/assets/icons/affinities.svg @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/docs/demo/assets/icons/federation.svg b/docs/demo/assets/icons/federation.svg new file mode 100644 index 0000000..810884f --- /dev/null +++ b/docs/demo/assets/icons/federation.svg @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/docs/demo/assets/icons/ndp-ep.svg b/docs/demo/assets/icons/ndp-ep.svg new file mode 100644 index 0000000..3610299 --- /dev/null +++ b/docs/demo/assets/icons/ndp-ep.svg @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/docs/demo/assets/icons/netbird.svg b/docs/demo/assets/icons/netbird.svg new file mode 100644 index 0000000..9a5fb49 --- /dev/null +++ b/docs/demo/assets/icons/netbird.svg @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/docs/demo/assets/icons/python-lib.svg b/docs/demo/assets/icons/python-lib.svg new file mode 100644 index 0000000..ab84217 --- /dev/null +++ b/docs/demo/assets/icons/python-lib.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/docs/demo/screenshots/.gitkeep b/docs/demo/screenshots/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/demo/screenshots/22-request-access.png b/docs/demo/screenshots/22-request-access.png new file mode 100644 index 0000000..05e3155 Binary files /dev/null and b/docs/demo/screenshots/22-request-access.png differ diff --git a/docs/demo/screenshots/23-access-requests-approve.png b/docs/demo/screenshots/23-access-requests-approve.png new file mode 100644 index 0000000..6302a9e Binary files /dev/null and b/docs/demo/screenshots/23-access-requests-approve.png differ diff --git a/docs/demo/screenshots/30-search-ui.png b/docs/demo/screenshots/30-search-ui.png new file mode 100644 index 0000000..65fe6b0 Binary files /dev/null and b/docs/demo/screenshots/30-search-ui.png differ diff --git a/docs/demo/screenshots/A1-affinities-add-endpoint.png b/docs/demo/screenshots/A1-affinities-add-endpoint.png new file mode 100644 index 0000000..2b87d28 Binary files /dev/null and b/docs/demo/screenshots/A1-affinities-add-endpoint.png differ diff --git a/docs/ndp ep - presentation.pptx b/docs/ndp ep - presentation.pptx new file mode 100644 index 0000000..0643a2e Binary files /dev/null and b/docs/ndp ep - presentation.pptx differ diff --git a/example.env b/example.env index bce150f..fe78dce 100644 --- a/example.env +++ b/example.env @@ -43,6 +43,17 @@ ENABLE_GROUP_BASED_ACCESS=False # If empty and ENABLE_GROUP_BASED_ACCESS=True, all write operations will be denied GROUP_NAMES= +# ============================================== +# ACCESS REQUESTS (Optional) +# ============================================== +# Self-service workflow: a user requests access, an admin approves/rejects. +# Requires MongoDB reachable via MONGODB_CONNECTION_STRING (see below). +# Kept off by default so deployments without MongoDB keep booting. +ENABLE_ACCESS_REQUESTS=False + +# MongoDB collection used to store access requests +ACCESS_REQUESTS_COLLECTION=access_requests + # ============================================== # LOCAL CATALOG CONFIGURATION # ==============================================