diff --git a/.github/workflows/bulk-export-smoke.yml b/.github/workflows/bulk-export-smoke.yml index 4c1b07c2c..7dfc12130 100644 --- a/.github/workflows/bulk-export-smoke.yml +++ b/.github/workflows/bulk-export-smoke.yml @@ -28,15 +28,7 @@ jobs: {"backend":"sqlite","bulk_mode":"embedded-local","expectation":"full"}, {"backend":"sqlite","bulk_mode":"postgres-s3","expectation":"full"}, {"backend":"postgres","bulk_mode":"embedded-local","expectation":"full"}, - {"backend":"postgres","bulk_mode":"postgres-s3","expectation":"full"}, - {"backend":"sqlite-elasticsearch","bulk_mode":"embedded-local","expectation":"endpoint-unavailable"}, - {"backend":"sqlite-elasticsearch","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"}, - {"backend":"postgres-elasticsearch","bulk_mode":"embedded-local","expectation":"endpoint-unavailable"}, - {"backend":"postgres-elasticsearch","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"}, - {"backend":"mongodb","bulk_mode":"postgres-s3","expectation":"unsupported"}, - {"backend":"mongodb-elasticsearch","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"}, - {"backend":"s3","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"}, - {"backend":"s3-elasticsearch","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"} + {"backend":"postgres","bulk_mode":"postgres-s3","expectation":"full"} ]' MATRIX=$(jq -c --argjson versions "$FHIR_VERSIONS" \ diff --git a/.github/workflows/inferno-bulk-data.yml b/.github/workflows/inferno-bulk-data.yml index c6d8a34be..29e304b63 100644 --- a/.github/workflows/inferno-bulk-data.yml +++ b/.github/workflows/inferno-bulk-data.yml @@ -23,83 +23,799 @@ on: workflow_dispatch: inputs: kit_ref: - description: "bulk-data-test-kit ref (branch / tag) to clone" + description: "bulk-data-test-kit ref (branch / tag) to test" required: false default: "main" +env: + CARGO_TERM_COLOR: always + CARGO_BUILD_JOBS: 1 + CARGO_PROFILE_DEV_DEBUG: 0 + HFS_PORT: 18098 + DOCKER_HOST: ${{ secrets.DOCKER_HOST }} + DOCKER_HOST_IP: ${{ secrets.DOCKER_HOST_IP }} + SUITE_ID: bulk_data_v200 + SMART_GROUP_ID: bulk_data_v200-bulk_data_smart_backend_services_v200 + EXPORT_GROUP_ID: bulk_data_v200-bulk_data_export_tests_v200 + GROUP_ID: inferno-bulk-group + RESULTS_DIR: inferno-bulk-data-results + jobs: - inferno-bulk-data: + build: + name: Build HFS with Bulk Data support runs-on: [self-hosted, Linux] - timeout-minutes: 60 + steps: + - name: Checkout HFS + uses: actions/checkout@v5 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + + - name: Configure Rust to use LLD + run: | + mkdir -p ~/.cargo + rm -f ~/.cargo/config.toml + echo '[target.x86_64-unknown-linux-gnu]' >> ~/.cargo/config.toml + echo 'linker = "clang"' >> ~/.cargo/config.toml + echo 'rustflags = ["-C", "link-arg=-fuse-ld=lld", "-C", "link-arg=-Wl,-zstack-size=8388608"]' >> ~/.cargo/config.toml + - name: Build HFS + run: cargo build -p helios-hfs --no-default-features --features R4,postgres,s3 + + - name: Upload HFS binary + uses: actions/upload-artifact@v7 + with: + name: hfs-bulk-data-inferno-binary + path: target/debug/hfs + retention-days: 1 + + inferno-stack-up: + name: Start Shared Inferno Bulk Data Stack + runs-on: [self-hosted, Linux] + outputs: + port: ${{ steps.up.outputs.port }} + project: ${{ steps.up.outputs.project }} + steps: + - name: Checkout bulk-data-test-kit + uses: actions/checkout@v5 + with: + repository: inferno-framework/bulk-data-test-kit + ref: ${{ github.event.inputs.kit_ref }} + path: inferno-kit + + - name: Prepare CI compose override + working-directory: inferno-kit + run: | + PROJECT="inferno-bulk-data-${{ github.run_id }}-${{ github.run_attempt }}" + INFERNO_HOST="${DOCKER_HOST_IP:-localhost}" + INFERNO_PORT=$((24000 + (${{ github.run_id }} % 20000) + ${{ github.run_attempt }})) + INFERNO_BASE_URL="http://$INFERNO_HOST:$INFERNO_PORT" + + mkdir -p ci-images/nginx + cat > ci-images/nginx/Dockerfile <<'EOF' + FROM nginx + COPY config/nginx.conf /etc/nginx/nginx.conf + EOF + cat > docker-compose.override.yml < /dev/null 2>&1; then + echo "Inferno is ready" + echo "port=$INFERNO_PORT" >> "$GITHUB_OUTPUT" + echo "project=$PROJECT" >> "$GITHUB_OUTPUT" + exit 0 + fi + echo "Attempt $i/60: Inferno not ready yet..." + sleep 2 + done + + echo "Inferno is not reachable at $INFERNO_BASE_URL" + docker compose -p "$PROJECT" ps + docker compose -p "$PROJECT" logs --tail 200 + exit 1 + + inferno-bulk-data-test: + name: Inferno Bulk Data v2.0.0 Tests + needs: [build, inferno-stack-up] + runs-on: [self-hosted, Linux] + timeout-minutes: 90 + permissions: + contents: read + env: + INFERNO_PORT: ${{ needs.inferno-stack-up.outputs.port }} + INFERNO_COMPOSE_PROJECT: ${{ needs.inferno-stack-up.outputs.project }} steps: - name: Checkout HFS - uses: actions/checkout@v4 + uses: actions/checkout@v5 + + - name: Download HFS binary + uses: actions/download-artifact@v8 + with: + name: hfs-bulk-data-inferno-binary + path: target/debug - - name: Bring up the bulk-export stack + - name: Make HFS binary executable + run: chmod +x target/debug/hfs + + - name: Determine runner and Docker host IP run: | - docker compose \ - -f docker/bulk-export/docker-compose.yml \ - -p hfs-bulk-${{ github.run_id }} \ - up --build -d + RUNNER_IP=$(hostname -I | awk '{print $1}') + if [ -n "${DOCKER_HOST_IP:-}" ]; then + EFFECTIVE_DOCKER_HOST_IP="$DOCKER_HOST_IP" + else + EFFECTIVE_DOCKER_HOST_IP="$RUNNER_IP" + fi + + echo "RUNNER_IP=$RUNNER_IP" >> "$GITHUB_ENV" + echo "DOCKER_HOST_IP=$EFFECTIVE_DOCKER_HOST_IP" >> "$GITHUB_ENV" + echo "HFS_BASE_URL=http://$RUNNER_IP:$HFS_PORT" >> "$GITHUB_ENV" + echo "INFERNO_BASE_URL=http://$EFFECTIVE_DOCKER_HOST_IP:$INFERNO_PORT" >> "$GITHUB_ENV" + echo "Runner IP: $RUNNER_IP" + echo "Docker host IP: $EFFECTIVE_DOCKER_HOST_IP" + + - name: Prepare results directory + run: mkdir -p "$RESULTS_DIR/container-logs" + + - name: Start PostgreSQL + run: | + PG_CONTAINER="pg-inferno-bulk-data-${{ github.run_id }}-${{ github.run_attempt }}" + docker rm -f "$PG_CONTAINER" 2>/dev/null || true + docker run -d --name "$PG_CONTAINER" -p 0:5432 \ + -e POSTGRES_USER=helios \ + -e POSTGRES_PASSWORD=helios \ + -e POSTGRES_DB=helios \ + postgres:16 + + echo "PG_CONTAINER=$PG_CONTAINER" >> "$GITHUB_ENV" + + echo "Waiting for PostgreSQL to be ready..." + for i in {1..30}; do + if docker exec "$PG_CONTAINER" pg_isready -U helios > /dev/null 2>&1; then + PG_PORT=$(docker port "$PG_CONTAINER" 5432 | head -1 | sed 's/.*://') + if timeout 2 bash -c "cat < /dev/null > /dev/tcp/$DOCKER_HOST_IP/$PG_PORT" 2>/dev/null; then + echo "PostgreSQL is ready on port $PG_PORT" + echo "PG_PORT=$PG_PORT" >> "$GITHUB_ENV" + exit 0 + fi + fi + echo "Attempt $i/30: PostgreSQL not ready yet..." + sleep 2 + done + + echo "PostgreSQL failed to start" + docker logs "$PG_CONTAINER" + exit 1 + + - name: Start MinIO + run: | + MINIO_CONTAINER="minio-inferno-bulk-data-${{ github.run_id }}-${{ github.run_attempt }}" + docker rm -f "$MINIO_CONTAINER" 2>/dev/null || true + docker run -d --name "$MINIO_CONTAINER" -p 0:9000 -p 0:9001 \ + -e MINIO_ROOT_USER=hfs-minio \ + -e MINIO_ROOT_PASSWORD=hfs-minio-secret \ + minio/minio:latest server /data --console-address ":9001" + + echo "MINIO_CONTAINER=$MINIO_CONTAINER" >> "$GITHUB_ENV" + + echo "Waiting for MinIO to be ready..." + MINIO_READY=0 + for i in {1..30}; do + MINIO_PORT=$(docker port "$MINIO_CONTAINER" 9000 | head -1 | sed 's/.*://') + if [ -n "$MINIO_PORT" ]; then + if curl -sf "http://$DOCKER_HOST_IP:$MINIO_PORT/minio/health/live" > /dev/null 2>&1; then + echo "MinIO is ready on port $MINIO_PORT" + echo "MINIO_PORT=$MINIO_PORT" >> "$GITHUB_ENV" + MINIO_READY=1 + break + fi + fi + echo "Attempt $i/30: MinIO not ready yet..." + sleep 2 + done + + if [ "$MINIO_READY" -ne 1 ]; then + echo "MinIO failed to become ready" + docker logs "$MINIO_CONTAINER" + exit 1 + fi + + docker run --rm --network "container:$MINIO_CONTAINER" \ + --entrypoint /bin/sh \ + minio/mc:latest \ + -c 'mc alias set local http://127.0.0.1:9000 hfs-minio hfs-minio-secret && mc mb -p local/hfs-export' + + - name: Generate Keycloak realm for Inferno client + run: | + INFERNO_JWKS_URL="$INFERNO_BASE_URL/custom/bulk_data_v200/.well-known/jwks.json" + jq --arg jwks_url "$INFERNO_JWKS_URL" ' + .clients += [{ + "clientId": "inferno-bulk-data-client", + "name": "Inferno Bulk Data Backend Services Client", + "description": "CI client registered for the Inferno Bulk Data Test Kit private_key_jwt flow", + "enabled": true, + "publicClient": false, + "bearerOnly": false, + "clientAuthenticatorType": "client-jwt", + "serviceAccountsEnabled": true, + "standardFlowEnabled": false, + "implicitFlowEnabled": false, + "directAccessGrantsEnabled": false, + "authorizationServicesEnabled": false, + "protocol": "openid-connect", + "defaultClientScopes": ["system/*.cruds"], + "optionalClientScopes": ["system/Patient.rs", "system/Observation.r"], + "attributes": { + "use.jwks.url": "true", + "jwks.url": $jwks_url, + "token.endpoint.auth.signing.alg": "ES384", + "token.endpoint.auth.signing.max.exp": "600" + } + }] + ' docker/keycloak/realm.json > "$RESULTS_DIR/keycloak-realm.json" + + - name: Start Keycloak + run: | + KC_CONTAINER="kc-inferno-bulk-data-${{ github.run_id }}-${{ github.run_attempt }}" + docker rm -f "$KC_CONTAINER" 2>/dev/null || true + + docker create --name "$KC_CONTAINER" -p 0:8080 \ + -e KC_BOOTSTRAP_ADMIN_USERNAME=admin \ + -e KC_BOOTSTRAP_ADMIN_PASSWORD=admin \ + -e KC_HEALTH_ENABLED=true \ + --entrypoint /bin/sh \ + quay.io/keycloak/keycloak:26.1 \ + -ec 'mkdir -p /opt/keycloak/data/import && cp /tmp/realm.json /opt/keycloak/data/import/realm.json && exec /opt/keycloak/bin/kc.sh start-dev --import-realm' > /dev/null + + docker cp "$RESULTS_DIR/keycloak-realm.json" "$KC_CONTAINER":/tmp/realm.json + docker start "$KC_CONTAINER" > /dev/null + echo "KC_CONTAINER=$KC_CONTAINER" >> "$GITHUB_ENV" + + - name: Wait for Keycloak + run: | + echo "Waiting for Keycloak to be ready..." + for i in {1..60}; do + KEYCLOAK_PORT=$(docker port "$KC_CONTAINER" 8080 2>/dev/null | head -1 | sed 's/.*://') + if [ -n "$KEYCLOAK_PORT" ]; then + if curl -sf "http://$DOCKER_HOST_IP:$KEYCLOAK_PORT/realms/fhir" > /dev/null 2>&1; then + echo "Keycloak is ready on port $KEYCLOAK_PORT" + echo "KEYCLOAK_PORT=$KEYCLOAK_PORT" >> "$GITHUB_ENV" + exit 0 + fi + fi + echo "Attempt $i/60: Keycloak not ready yet..." + sleep 2 + done + + echo "Keycloak failed to start" + docker logs "$KC_CONTAINER" + exit 1 + + - name: Start HFS server + run: | + HFS_LOG="$RESULTS_DIR/hfs.log" + PG_URL="postgresql://helios:helios@$DOCKER_HOST_IP:$PG_PORT/helios" + AUTH_JWKS_URL="http://$DOCKER_HOST_IP:$KEYCLOAK_PORT/realms/fhir/protocol/openid-connect/certs" + AUTH_ISSUER="http://$DOCKER_HOST_IP:$KEYCLOAK_PORT/realms/fhir" + AUTH_TOKEN_ENDPOINT="http://$DOCKER_HOST_IP:$KEYCLOAK_PORT/realms/fhir/protocol/openid-connect/token" + AUTH_AUTHORIZE_ENDPOINT="http://$DOCKER_HOST_IP:$KEYCLOAK_PORT/realms/fhir/protocol/openid-connect/auth" + MINIO_ENDPOINT="http://$DOCKER_HOST_IP:$MINIO_PORT" + + echo "HFS_LOG=$HFS_LOG" >> "$GITHUB_ENV" + echo "PG_URL=$PG_URL" >> "$GITHUB_ENV" + + HFS_BASE_URL="$HFS_BASE_URL" \ + HFS_DEFAULT_FHIR_VERSION=R4 \ + HFS_STORAGE_BACKEND=postgres \ + HFS_DATABASE_URL="$PG_URL" \ + HFS_PG_HOST="$DOCKER_HOST_IP" \ + HFS_PG_PORT="$PG_PORT" \ + HFS_PG_DBNAME=helios \ + HFS_PG_USER=helios \ + HFS_PG_PASSWORD=helios \ + HFS_AUTH_ENABLED=true \ + HFS_AUTH_JWKS_URL="$AUTH_JWKS_URL" \ + HFS_AUTH_ISSUER="$AUTH_ISSUER" \ + HFS_AUTH_JTI_BACKEND=disabled \ + HFS_SMART_TOKEN_ENDPOINT="$AUTH_TOKEN_ENDPOINT" \ + HFS_SMART_AUTHORIZE_ENDPOINT="$AUTH_AUTHORIZE_ENDPOINT" \ + HFS_SMART_JWKS_URL="$AUTH_JWKS_URL" \ + HFS_BULK_EXPORT_ENABLED=true \ + HFS_BULK_EXPORT_BACKEND=postgres-s3 \ + HFS_BULK_EXPORT_DATABASE_URL="$PG_URL" \ + HFS_BULK_EXPORT_OUTPUT_BACKEND=s3 \ + HFS_BULK_EXPORT_S3_BUCKET=hfs-export \ + HFS_BULK_EXPORT_S3_ENDPOINT="$MINIO_ENDPOINT" \ + HFS_BULK_EXPORT_S3_FORCE_PATH_STYLE=true \ + HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN=true \ + HFS_BULK_EXPORT_FILE_URL_TTL=3600 \ + HFS_BULK_EXPORT_OUTPUT_TTL=3600 \ + HFS_BULK_EXPORT_BATCH_SIZE=100 \ + AWS_ACCESS_KEY_ID=hfs-minio \ + AWS_SECRET_ACCESS_KEY=hfs-minio-secret \ + AWS_REGION=us-east-1 \ + ./target/debug/hfs --log-level info --port "$HFS_PORT" --host 0.0.0.0 > "$HFS_LOG" 2>&1 & + + echo $! > /tmp/hfs-inferno-bulk-data.pid + echo "HFS_PID=$(cat /tmp/hfs-inferno-bulk-data.pid)" >> "$GITHUB_ENV" - name: Wait for HFS to be ready run: | - for i in $(seq 1 60); do - if curl -fsS http://localhost:8080/health > /dev/null; then - echo "HFS is up" + echo "Waiting for HFS to start..." + for i in {1..30}; do + if ! kill -0 "$HFS_PID" 2>/dev/null; then + echo "HFS process ($HFS_PID) exited" + cat "$HFS_LOG" + exit 1 + fi + if curl -sf "http://localhost:$HFS_PORT/health" > /dev/null 2>&1; then + echo "HFS is ready" exit 0 fi + echo "Attempt $i/30: HFS not ready yet..." sleep 2 done - echo "HFS did not become ready in time" - docker compose -f docker/bulk-export/docker-compose.yml \ - -p hfs-bulk-${{ github.run_id }} logs --tail=200 + + echo "HFS failed to start" + tail -50 "$HFS_LOG" exit 1 - - name: Clone Inferno Bulk Data Test Kit + - name: Sanity-check SMART discovery run: | - git clone --depth 1 --branch "${{ github.event.inputs.kit_ref }}" \ - https://github.com/inferno-framework/bulk-data-test-kit.git inferno-kit + curl -sf "$HFS_BASE_URL/.well-known/smart-configuration" \ + | tee "$RESULTS_DIR/smart-configuration.json" - - name: Inspect available suites - working-directory: inferno-kit + - name: Seed Bulk Data resources run: | - # Read the actual suite ids from kit source — do NOT guess. - # The first implementation pass should use these to set the - # SUITE_ID env var below. - grep -RhoE 'id\s+:[a-z0-9_]+' lib/ | sort -u || true - ls lib/ || true + token() { + KEYCLOAK_URL="http://$DOCKER_HOST_IP:$KEYCLOAK_PORT" ./docker/keycloak/get-token.sh + } - - name: Run the Bulk Data IG v2.0.0 suite - working-directory: inferno-kit - env: - # The actual suite id must be filled in once read from the kit - # source above (e.g. `bulk_data_v200`). Until then this step is a - # placeholder that surfaces the real id in CI logs. - SUITE_ID: bulk_data_v200 - INFERNO_HOST: http://host.docker.internal:8080 - run: | - if [ -f docker-compose.yml ]; then - docker compose up --build -d - sleep 10 - # The kit ships a CLI runner under `bin/run` for headless mode; - # the exact invocation depends on the suite's required inputs - # (Bearer token, Group id, etc.) and should be filled in when - # this workflow is first wired up against a real HFS deployment. - docker compose run --rm \ - -e SUITE_ID=$SUITE_ID \ - -e INFERNO_HOST=$INFERNO_HOST \ - inferno bin/inferno suite execute --suite "$SUITE_ID" || EXIT=$? - docker compose down - exit ${EXIT:-0} - else - echo "bulk-data-test-kit does not ship docker-compose.yml; consult kit README" + put_resource() { + local resource_type="$1" + local id="$2" + local file="$3" + local access_token + access_token="$(token)" + local status + status="$(curl -sS -o "$RESULTS_DIR/${resource_type}-${id}-response.json" -w "%{http_code}" \ + -X PUT "$HFS_BASE_URL/$resource_type/$id" \ + -H "Authorization: Bearer $access_token" \ + -H "Content-Type: application/fhir+json; fhirVersion=4.0" \ + -H "Accept: application/fhir+json; fhirVersion=4.0" \ + --data-binary @"$file")" + if [ "$status" != "200" ] && [ "$status" != "201" ]; then + echo "::error::PUT $resource_type/$id returned HTTP $status" + cat "$RESULTS_DIR/${resource_type}-${id}-response.json" + exit 1 + fi + } + + cat > "$RESULTS_DIR/patient-a.json" <<'EOF' + { + "resourceType": "Patient", + "id": "inferno-bulk-patient-a", + "name": [{ "family": "Bulk", "given": ["Alpha"] }], + "gender": "female", + "birthDate": "1980-01-01" + } + EOF + + cat > "$RESULTS_DIR/patient-b.json" <<'EOF' + { + "resourceType": "Patient", + "id": "inferno-bulk-patient-b", + "name": [{ "family": "Bulk", "given": ["Beta"] }], + "gender": "male", + "birthDate": "1981-02-02" + } + EOF + + cat > "$RESULTS_DIR/observation-a.json" <<'EOF' + { + "resourceType": "Observation", + "id": "inferno-bulk-observation-a", + "status": "final", + "category": [{ + "coding": [{ + "system": "http://terminology.hl7.org/CodeSystem/observation-category", + "code": "vital-signs", + "display": "Vital Signs" + }] + }], + "code": { + "coding": [{ + "system": "http://loinc.org", + "code": "8867-4", + "display": "Heart rate" + }], + "text": "Heart rate" + }, + "subject": { "reference": "Patient/inferno-bulk-patient-a" }, + "effectiveDateTime": "2024-01-01T00:00:00Z", + "valueQuantity": { + "value": 72, + "unit": "beats/minute", + "system": "http://unitsofmeasure.org", + "code": "/min" + } + } + EOF + + cat > "$RESULTS_DIR/group.json" < "$RESULTS_DIR/test-suites.json"; then + echo "Shared Inferno stack not reachable at $INFERNO_BASE_URL" + exit 1 + fi + + - name: Create Inferno test session + run: | + for attempt in $(seq 1 5); do + SESSION_RESPONSE=$(curl -s -X POST "$INFERNO_BASE_URL/api/test_sessions?test_suite_id=$SUITE_ID") + SESSION_ID=$(echo "$SESSION_RESPONSE" | jq -r '.id' 2>/dev/null) || true + + if [ -n "$SESSION_ID" ] && [ "$SESSION_ID" != "null" ]; then + break + fi + echo "Attempt $attempt/5: Failed to create test session: $SESSION_RESPONSE" + sleep $((attempt * 3)) + done + + if [ -z "${SESSION_ID:-}" ] || [ "$SESSION_ID" = "null" ]; then + echo "Failed to create test session after 5 attempts" + echo "$SESSION_RESPONSE" + exit 1 + fi + + echo "SESSION_ID=$SESSION_ID" >> "$GITHUB_ENV" + echo "$SESSION_RESPONSE" > "$RESULTS_DIR/session.json" + + - name: Run Inferno Bulk Data suite + run: | + SMART_AUTH_INFO="$(jq -nc '{ + auth_type: "backend_services", + use_discovery: "true", + requested_scopes: "system/*.cruds", + client_id: "inferno-bulk-data-client", + encryption_algorithm: "ES384" + }')" + + # The bulk_data_v200 suite contains two sequential groups that must each be + # run as a separate test_run: the SMART Backend Services group establishes the + # bearer token, and the Export Tests group consumes it from session state. + for GROUP_PAIR in "smart:$SMART_GROUP_ID" "export:$EXPORT_GROUP_ID"; do + LABEL="${GROUP_PAIR%%:*}" + GROUP_ID_VAL="${GROUP_PAIR##*:}" + + echo "--- Starting group: $LABEL ($GROUP_ID_VAL) ---" + + RUN_PAYLOAD="$(jq -n \ + --arg session_id "$SESSION_ID" \ + --arg group_id "$GROUP_ID_VAL" \ + --arg bulk_server_url "$HFS_BASE_URL" \ + --arg smart_auth_info "$SMART_AUTH_INFO" \ + --arg fhir_group_id "$GROUP_ID" \ + '{ + test_session_id: $session_id, + test_group_id: $group_id, + inputs: [ + {name: "bulk_server_url", value: $bulk_server_url}, + {name: "smart_auth_info", value: $smart_auth_info}, + {name: "group_id", value: $fhir_group_id}, + {name: "bulk_timeout", value: "600"}, + {name: "lines_to_validate", value: "100"}, + {name: "since_timestamp", value: "2000-01-01T00:00:00.000Z"} + ] + }')" + + echo "$RUN_PAYLOAD" > "$RESULTS_DIR/run-payload-$LABEL.json" + + RUN_ID="" + RUN_RESPONSE="" + for attempt in $(seq 1 5); do + RUN_RESPONSE=$(curl -s -X POST "$INFERNO_BASE_URL/api/test_runs" \ + -H "Content-Type: application/json" \ + -d "$RUN_PAYLOAD") + RUN_ID=$(echo "$RUN_RESPONSE" | jq -r '.id' 2>/dev/null) || true + if [ -n "$RUN_ID" ] && [ "$RUN_ID" != "null" ]; then break; fi + echo "Attempt $attempt/5: Failed to start $LABEL run: $RUN_RESPONSE" + sleep $((attempt * 3)) + done + + if [ -z "${RUN_ID:-}" ] || [ "$RUN_ID" = "null" ]; then + echo "Failed to start $LABEL run after 5 attempts" + echo "$RUN_RESPONSE" + exit 1 + fi + + echo "$RUN_RESPONSE" > "$RESULTS_DIR/run-$LABEL.json" + echo "$LABEL run started: $RUN_ID" + + MAX_POLLS=150 + API_ERRORS=0 + RUN_STATUS="" + for i in $(seq 1 $MAX_POLLS); do + STATUS_RESPONSE=$(curl -s "$INFERNO_BASE_URL/api/test_runs/$RUN_ID") + RUN_STATUS=$(echo "$STATUS_RESPONSE" | jq -r '.status' 2>/dev/null) || true + + if [ -z "$RUN_STATUS" ] || [ "$RUN_STATUS" = "null" ]; then + API_ERRORS=$((API_ERRORS + 1)) + echo "Poll $i/$MAX_POLLS [$LABEL]: WARNING - non-JSON response (error $API_ERRORS)" + if [ "$API_ERRORS" -ge 5 ]; then + echo "::error::Inferno API failed $API_ERRORS consecutive times for $LABEL" + tail -50 "$HFS_LOG" + exit 1 + fi + sleep 10 + continue + fi + API_ERRORS=0 + + RESULTS=$(curl -s "$INFERNO_BASE_URL/api/test_runs/$RUN_ID/results") + TOTAL=$(echo "$RESULTS" | jq '[.[] | select(.test_id)] | length' 2>/dev/null) || TOTAL="?" + PASS=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "pass")] | length' 2>/dev/null) || PASS="?" + FAIL=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "fail")] | length' 2>/dev/null) || FAIL="?" + SKIP=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "skip")] | length' 2>/dev/null) || SKIP="?" + ERROR=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "error")] | length' 2>/dev/null) || ERROR="?" + OMIT=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "omit")] | length' 2>/dev/null) || OMIT="?" + + echo "Poll $i/$MAX_POLLS [$LABEL]: Status=$RUN_STATUS Total=$TOTAL Pass=$PASS Fail=$FAIL Skip=$SKIP Error=$ERROR Omit=$OMIT" + + if [ "$RUN_STATUS" = "done" ]; then + echo "$RESULTS" > "$RESULTS_DIR/results-$LABEL.json" + break + fi + + sleep 10 + done + + if [ "$RUN_STATUS" != "done" ]; then + echo "::error::Inferno $LABEL group timed out" + exit 1 + fi + + if [ "$LABEL" = "smart" ]; then + SMART_AUTH_INFO="$(jq -r ' + [.[] | .outputs[]? | select(.name == "smart_auth_info") | .value | select(contains("\"access_token\""))] + | last // empty + ' "$RESULTS_DIR/results-smart.json")" + + if [ -z "$SMART_AUTH_INFO" ]; then + echo "::error::SMART group did not produce access-token auth info for export tests" + jq -r '.[] | select(.result == "fail" or .result == "error") | " \(.test_id): \(.result) - \(.result_message // "No message")"' "$RESULTS_DIR/results-smart.json" + exit 1 + fi + fi + done + + # Merge results from both groups for downstream steps + jq -s 'add' \ + "$RESULTS_DIR/results-smart.json" \ + "$RESULTS_DIR/results-export.json" \ + > "$RESULTS_DIR/results.json" + + - name: Check test results + run: | + if [ ! -f "$RESULTS_DIR/results.json" ]; then + echo "::error::No results file found" exit 1 fi - - name: Tear down stack + RESULTS=$(cat "$RESULTS_DIR/results.json") + LATEST='[.[] | select(.test_id)] | group_by(.test_id) | map(sort_by(.created_at) | last)' + OMIT_EXPR='(.test_id | test("(bulk_data_server_tls_version_stu2|smart_backend_services_token_tls_version|bulk_file_server_tls_version)$"))' + + FAILURES=$(echo "$RESULTS" | jq "$LATEST | [.[] | select((.result == \"fail\" or .result == \"error\") and ($OMIT_EXPR | not))] | length") + OMITTED_COUNT=$(echo "$RESULTS" | jq "$LATEST | [.[] | select($OMIT_EXPR)] | length") + + echo "Failures (excluding $OMITTED_COUNT known omitted tests): $FAILURES" + + if [ "$FAILURES" -gt 0 ]; then + echo "::error::$FAILURES Inferno Bulk Data test(s) failed" + echo "$RESULTS" | jq -r "$LATEST | .[] | select((.result == \"fail\" or .result == \"error\") and ($OMIT_EXPR | not)) | \" \(.test_id): \(.result) - \(.result_message // \"No message\" | .[0:300])\"" + exit 1 + fi + + - name: Generate test summary + if: always() + run: | + echo "## Inferno Bulk Data v2.0.0 Test Results" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + if [ -f "$RESULTS_DIR/results.json" ]; then + RESULTS=$(cat "$RESULTS_DIR/results.json") + LATEST='[.[] | select(.test_id)] | group_by(.test_id) | map(sort_by(.created_at) | last)' + OMIT_EXPR='(.test_id | test("(bulk_data_server_tls_version_stu2|smart_backend_services_token_tls_version|bulk_file_server_tls_version)$"))' + + TOTAL=$(echo "$RESULTS" | jq "$LATEST | length") + PASS=$(echo "$RESULTS" | jq "$LATEST | [.[] | select(.result == \"pass\")] | length") + FAIL=$(echo "$RESULTS" | jq "$LATEST | [.[] | select(.result == \"fail\" and ($OMIT_EXPR | not))] | length") + ERROR=$(echo "$RESULTS" | jq "$LATEST | [.[] | select(.result == \"error\" and ($OMIT_EXPR | not))] | length") + SKIP=$(echo "$RESULTS" | jq "$LATEST | [.[] | select(.result == \"skip\")] | length") + OMIT=$(echo "$RESULTS" | jq "$LATEST | [.[] | select(.result == \"omit\")] | length") + KNOWN=$(echo "$RESULTS" | jq "$LATEST | [.[] | select($OMIT_EXPR)] | length") + + echo "| Status | Count |" >> "$GITHUB_STEP_SUMMARY" + echo "|--------|-------|" >> "$GITHUB_STEP_SUMMARY" + echo "| Pass | $PASS |" >> "$GITHUB_STEP_SUMMARY" + echo "| Fail | $FAIL |" >> "$GITHUB_STEP_SUMMARY" + echo "| Error | $ERROR |" >> "$GITHUB_STEP_SUMMARY" + echo "| Skip | $SKIP |" >> "$GITHUB_STEP_SUMMARY" + echo "| Omit | $OMIT |" >> "$GITHUB_STEP_SUMMARY" + echo "| Known omitted | $KNOWN |" >> "$GITHUB_STEP_SUMMARY" + echo "| Total | $TOTAL |" >> "$GITHUB_STEP_SUMMARY" + + if [ "$KNOWN" -gt 0 ]; then + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "### Known Omitted Tests" >> "$GITHUB_STEP_SUMMARY" + echo "$RESULTS" | jq -r "$LATEST | .[] | select($OMIT_EXPR) | \"- \(.test_id): \(.result) - \(.result_message // \"No message\" | .[0:200])\"" >> "$GITHUB_STEP_SUMMARY" + fi + + if [ "$FAIL" -gt 0 ] || [ "$ERROR" -gt 0 ]; then + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "### Failed/Error Tests" >> "$GITHUB_STEP_SUMMARY" + echo "$RESULTS" | jq -r "$LATEST | .[] | select((.result == \"fail\" or .result == \"error\") and ($OMIT_EXPR | not)) | \"- \(.test_id): \(.result) - \(.result_message // \"No message\" | .[0:200])\"" >> "$GITHUB_STEP_SUMMARY" + fi + else + echo "No results file found." >> "$GITHUB_STEP_SUMMARY" + fi + + - name: Collect container logs + if: always() + run: | + mkdir -p "$RESULTS_DIR/container-logs" + + for name in "$PG_CONTAINER" "$MINIO_CONTAINER" "$KC_CONTAINER"; do + if [ -n "${name:-}" ]; then + docker logs "$name" > "$RESULTS_DIR/container-logs/$name.log" 2>&1 || true + docker inspect "$name" > "$RESULTS_DIR/container-logs/$name.inspect.json" 2>&1 || true + fi + done + + PROJECT="${INFERNO_COMPOSE_PROJECT:-inferno-bulk-data-${{ github.run_id }}-${{ github.run_attempt }}}" + echo "$PROJECT" > "$RESULTS_DIR/container-logs/inferno-project.txt" + IDS=$(docker ps -aq --filter "label=com.docker.compose.project=$PROJECT") + for id in $IDS; do + NAME=$(docker inspect -f '{{.Name}}' "$id" | sed 's#^/##') + docker logs "$id" > "$RESULTS_DIR/container-logs/inferno-$NAME.log" 2>&1 || true + docker inspect "$id" > "$RESULTS_DIR/container-logs/inferno-$NAME.inspect.json" 2>&1 || true + done + + - name: Upload test results if: always() + uses: actions/upload-artifact@v7 + with: + name: inferno-bulk-data-results-${{ github.run_id }}-${{ github.run_attempt }} + path: inferno-bulk-data-results/ + retention-days: 30 + + - name: Cleanup + if: always() + run: | + echo "Stopping HFS..." + if [ -f /tmp/hfs-inferno-bulk-data.pid ]; then + kill "$(cat /tmp/hfs-inferno-bulk-data.pid)" 2>/dev/null || true + rm -f /tmp/hfs-inferno-bulk-data.pid + fi + + echo "Stopping PostgreSQL..." + docker rm -f "${PG_CONTAINER:-none}" 2>/dev/null || true + + echo "Stopping MinIO..." + docker rm -f "${MINIO_CONTAINER:-none}" 2>/dev/null || true + + echo "Stopping Keycloak..." + docker rm -f "${KC_CONTAINER:-none}" 2>/dev/null || true + + echo "Cleanup complete" + + inferno-stack-down: + name: Stop Shared Inferno Bulk Data Stack + needs: [inferno-stack-up, inferno-bulk-data-test] + if: always() && needs.inferno-stack-up.result != 'skipped' + runs-on: [self-hosted, Linux] + steps: + - name: Tear down Inferno stack by compose project label run: | - docker compose -f docker/bulk-export/docker-compose.yml \ - -p hfs-bulk-${{ github.run_id }} down -v + PROJECT="${{ needs.inferno-stack-up.outputs.project }}" + if [ -z "$PROJECT" ]; then + PROJECT="inferno-bulk-data-${{ github.run_id }}-${{ github.run_attempt }}" + fi + echo "Tearing down compose project: $PROJECT" + + IDS=$(docker ps -aq --filter "label=com.docker.compose.project=$PROJECT") + if [ -n "$IDS" ]; then + docker rm -f $IDS + fi + + NETS=$(docker network ls --filter "label=com.docker.compose.project=$PROJECT" -q) + if [ -n "$NETS" ]; then + docker network rm $NETS || true + fi + + VOLS=$(docker volume ls --filter "label=com.docker.compose.project=$PROJECT" -q) + if [ -n "$VOLS" ]; then + docker volume rm $VOLS || true + fi + + echo "Inferno Bulk Data stack teardown complete" diff --git a/CLAUDE.md b/CLAUDE.md index 64acc14ea..0b9fcc718 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -612,6 +612,100 @@ cargo run --bin hts -- import ./package.tgz \ --- +## Bulk Data Export ($export) + +HFS implements the [FHIR Bulk Data Access IG](https://build.fhir.org/ig/HL7/bulk-data/) +`$export` family asynchronously: kick-off → poll → manifest → download → delete. + +### Endpoints + +| Operation | Method | URL | +|-----------|--------|-----| +| system kick-off | GET / POST | `/$export` | +| patient kick-off | GET / POST | `/Patient/$export` | +| group kick-off | GET / POST | `/Group/{id}/$export` | +| status / manifest | GET | `/export-status/{job_id}` | +| cancel + delete | DELETE | `/export-status/{job_id}` | +| HFS-served download | GET | `/export-file/{job_id}/{type}-{part}` | + +All kick-offs require `Prefer: respond-async`. The default response is +`202 Accepted` with a `Content-Location` status URL. + +### Environment variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `HFS_BULK_EXPORT_ENABLED` | `true` | Master switch — when `false`, all `$export` endpoints return `501`. | +| `HFS_BULK_EXPORT_BACKEND` | `embedded` | Job-state backend: `embedded` (SQLite) or `postgres-s3`. | +| `HFS_BULK_EXPORT_OUTPUT_BACKEND` | `local-fs` | Output store: `local-fs` or `s3`. | +| `HFS_BULK_EXPORT_OUTPUT_DIR` | `${HFS_DATA_DIR}/exports` | Local-FS output root. | +| `HFS_BULK_EXPORT_S3_BUCKET` | (none) | S3 bucket — required when `OUTPUT_BACKEND=s3`. | +| `HFS_BULK_EXPORT_S3_ENDPOINT` | (AWS) | S3-compatible endpoint URL (e.g. MinIO). | +| `HFS_BULK_EXPORT_S3_FORCE_PATH_STYLE` | `false` | Path-style addressing for S3-compatible providers. | +| `HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN` | `auto` | Manifest posture: `auto` / `true` / `false`. **`false` is invalid with `local-fs`** (no pre-signed URLs). | +| `HFS_BULK_EXPORT_FILE_URL_TTL` | `3600` | Pre-signed download URL lifetime, seconds. | +| `HFS_BULK_EXPORT_OUTPUT_TTL` | `86400` | Output retention after job completion, seconds. | +| `HFS_BULK_EXPORT_WORKER_CONCURRENCY` | `2` | In-process worker pool size. | +| `HFS_BULK_EXPORT_DISABLE_LOCAL_WORKER` | `false` | Disable in-pod workers (use a separate exporter). | +| `HFS_BULK_EXPORT_MAX_CONCURRENT_PER_TENANT` | `4` | Per-tenant active-job cap (kick-off → 429 if exceeded). | +| `HFS_BULK_EXPORT_BATCH_SIZE` | `1000` | Resources per `fetch_export_batch`. | +| `HFS_BULK_EXPORT_LEASE_DURATION` | `60` | Initial lease length, seconds. Must be > heartbeat interval. | +| `HFS_BULK_EXPORT_HEARTBEAT_INTERVAL` | `20` | Worker heartbeat cadence, seconds. | +| `HFS_BULK_EXPORT_CLEANUP_INTERVAL` | `300` | Cleanup-task scan interval, seconds. | +| `HFS_BULK_EXPORT_SINCE_NEWLY_ADDED` | `include` | Group-export `_since` toggle (`include` / `exclude`). | +| `HFS_BULK_EXPORT_DATABASE_URL` | (from `HFS_DATABASE_URL`) | Postgres URL for the `postgres-s3` job store. | + +### Single-instance recipe (zero-config) + +``` +cargo run --bin hfs +``` + +This starts HFS with embedded bulk export: SQLite job state at +`./data/bulk_export.db`, NDJSON output under `./data/exports/`, and an +in-process worker pool. Kick off: + +``` +curl -H 'Prefer: respond-async' http://localhost:8080/Patient/\$export +``` + +### Multi-instance recipe (PostgreSQL + S3 / MinIO) + +``` +HFS_STORAGE_BACKEND=postgres \ +HFS_DATABASE_URL=postgresql://hfs:hfs@localhost/hfs \ +HFS_BULK_EXPORT_BACKEND=postgres-s3 \ +HFS_BULK_EXPORT_OUTPUT_BACKEND=s3 \ +HFS_BULK_EXPORT_S3_BUCKET=hfs-export \ +HFS_BULK_EXPORT_S3_ENDPOINT=http://localhost:9000 \ +HFS_BULK_EXPORT_S3_FORCE_PATH_STYLE=true \ +HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN=false \ +cargo run --bin hfs --features postgres,s3 +``` + +The full stack (HFS + Postgres + MinIO + Keycloak) is available as a local +example in `docker/bulk-export/docker-compose.yml`; GitHub Actions does not use +that compose file for bulk export tests. See `.github/workflows/inferno-bulk-data.yml` +for the manual conformance workflow. + +### Behavior notes + +- **`_typeFilter`** is parsed and applied; unsupported result-control params + (`_sort`, `_include`, `_revinclude`, `_count`, `_elements`) inside a + `_typeFilter` query are rejected `400` regardless of `Prefer: handling`. +- **`_elements`** is implemented: subset to listed paths + `id` / + `resourceType` / `meta`, with a `SUBSETTED` `meta.tag` added. +- **Unsupported parameters** (`includeAssociatedData`, `organizeOutputBy`, + `allowPartialManifests`) — **`Prefer: handling=strict`** → `400`; absent or + `handling=lenient` (IG-default) → ignored with a warning logged. +- **`_since` + late-membership** for Group exports: `include` (default) + returns pre-`_since` resources for patients added after `_since`; `exclude` + is reserved for a follow-up that requires group-membership-history tracking. +- **Group export** flattens nested `Group/` members iteratively with a + visited-set cycle guard. + +--- + ## Docker Generic Dockerfile supporting all server binaries via `BINARY_NAME` build arg: diff --git a/Cargo.lock b/Cargo.lock index 55c577602..09f736715 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,7 +125,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -136,7 +136,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -398,9 +398,9 @@ dependencies = [ [[package]] name = "astral-tokio-tar" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ce73b17c62717c4b6a9af10b43e87c578b0cac27e00666d48304d3b7d2c0693" +checksum = "cb50a7aae84a03bf55b067832bc376f4961b790c97e64d3eacee97d389b90277" dependencies = [ "filetime", "futures-core", @@ -2448,7 +2448,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -3048,6 +3048,7 @@ dependencies = [ "anyhow", "async-trait", "axum", + "chrono", "clap", "helios-audit", "helios-auth", @@ -3821,7 +3822,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -3958,9 +3959,9 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "lettre" -version = "0.11.21" +version = "0.11.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dabda5859ee7c06b995b9d1165aa52c39110e079ef609db97178d86aeb051fa7" +checksum = "0da65617f6cb926332d039cb578aad56178da86e128db6a1b09f4c94fa5b3349" dependencies = [ "async-trait", "base64 0.22.1", @@ -4439,7 +4440,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -5878,7 +5879,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -6379,7 +6380,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -6408,7 +6409,7 @@ dependencies = [ "cfg-if", "libc", "psm", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -6571,7 +6572,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -7573,7 +7574,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] diff --git a/ROADMAP.md b/ROADMAP.md index 8d4685ea7..936e201eb 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -61,7 +61,8 @@ Work that is currently underway or planned for the near term. |------|------|--------| | **Standards** | [Terminology — PostgreSQL backend](https://github.com/HeliosSoftware/hfs/discussions/54) | 🟡 In progress | | **Standards** | FHIR Validation engine | 🔵 Design | -| **Standards** | Bulk Data API — Import and export (`$export` / `$import` operations) | 🔵 Design | +| **Standards** | Bulk Data API — `$export` (system / patient / group), pre-signed S3 downloads | ✅ Shipped | +| **Standards** | Bulk Data API — `$bulk-submit` (ingestion) | 🔵 Design | | **Analytics** | [SQL on FHIR](https://sql-on-fhir.org/ig/latest/) — HFS integration and operations update | 🟡 In progress | | **Documentation** | [Project documentation website](https://github.com/HeliosSoftware/hfs/tree/docs/book-updates) | 🟡 In progress | @@ -182,7 +183,7 @@ Devitt's book defines nine key questions organizations must answer before choosi | Gap | Book Reference | Current Status | |-----|---------------|----------------| | **No patient-level access control** | Ch. 3 "Authorization" — SMART scopes are parsed but `patient/*` and `user/*` contexts are not enforced. Search results are not filtered by patient compartment. | 🔭 Later | -| **Bulk Data API not exposed via REST** | Appendix I "Bulk data processing" — persistence-layer traits exist across all backends but no `$export`/`$import` REST endpoints. The book notes bulk ingestion is important for hybrid architectures (Ch. 8). | 🗺️ Next | +| **Bulk Data API — `$export`** | Appendix I "Bulk data processing" — `$export` (system / patient / group) is now exposed via the REST layer with an embedded SQLite-backed worker pool by default and an optional Postgres + S3 multi-instance topology. `$bulk-submit` (ingestion) remains pending. | ✅ Shipped (export) / 🗺️ Next (submit) | #### Moderate diff --git a/codecov.yml b/codecov.yml index 6c32a7fa8..2d220aabb 100644 --- a/codecov.yml +++ b/codecov.yml @@ -13,4 +13,5 @@ ignore: - "crates/fhir/src/r4.rs" - "crates/fhir/src/r4b.rs" - "crates/fhir/src/r5.rs" - - "crates/fhir/src/r6.rs" \ No newline at end of file + - "crates/fhir/src/r6.rs" + - "crates/hfs/src/main.rs" \ No newline at end of file diff --git a/crates/auth/README.md b/crates/auth/README.md index eb8553e40..a4346a36c 100644 --- a/crates/auth/README.md +++ b/crates/auth/README.md @@ -117,7 +117,7 @@ All configuration is via environment variables. Auth is a runtime toggle — no | Variable | Default | Description | |----------|---------|-------------| -| `HFS_AUTH_JTI_BACKEND` | `memory` | JTI cache backend (`memory` or `redis`) | +| `HFS_AUTH_JTI_BACKEND` | `memory` | JTI cache backend (`memory`, `redis`, or `disabled`) | | `HFS_AUTH_REDIS_URL` | *(none)* | Redis URL (required for `redis` backend) | | `HFS_AUTH_JWKS_MIN_REFRESH_INTERVAL` | `10` | Min seconds between JWKS refreshes | diff --git a/crates/auth/src/discovery.rs b/crates/auth/src/discovery.rs index adf4e6d6f..a0347658b 100644 --- a/crates/auth/src/discovery.rs +++ b/crates/auth/src/discovery.rs @@ -27,12 +27,22 @@ pub struct SmartConfiguration { pub response_types_supported: Vec, pub grant_types_supported: Vec, pub token_endpoint_auth_methods_supported: Vec, + pub code_challenge_methods_supported: Vec, + pub token_endpoint_auth_signing_alg_values_supported: Vec, pub capabilities: Vec, } impl SmartConfiguration { /// Build the SMART configuration document from `AuthConfig`. pub fn from_config(config: &AuthConfig) -> Self { + let mut response_types_supported = vec!["token".to_string()]; + let mut grant_types_supported = vec!["client_credentials".to_string()]; + + if config.smart_authorize_endpoint.is_some() { + response_types_supported.push("code".to_string()); + grant_types_supported.push("authorization_code".to_string()); + } + Self { issuer: config.expected_issuer.clone(), jwks_uri: config @@ -50,9 +60,14 @@ impl SmartConfiguration { "system/*.rs".to_string(), "system/*.r".to_string(), ], - response_types_supported: vec!["token".to_string()], - grant_types_supported: vec!["client_credentials".to_string()], + response_types_supported, + grant_types_supported, token_endpoint_auth_methods_supported: vec!["private_key_jwt".to_string()], + code_challenge_methods_supported: vec!["S256".to_string()], + token_endpoint_auth_signing_alg_values_supported: vec![ + "RS384".to_string(), + "ES384".to_string(), + ], capabilities: vec![ "permission-v2".to_string(), "client-confidential-asymmetric".to_string(), @@ -78,6 +93,7 @@ mod tests { assert!(smart.issuer.is_none()); assert!(smart.token_endpoint.is_none()); assert!(smart.capabilities.contains(&"permission-v2".to_string())); + assert_eq!(smart.code_challenge_methods_supported, vec!["S256"]); } #[test] @@ -85,6 +101,7 @@ mod tests { let config = AuthConfig { expected_issuer: Some("https://idp.example.com".to_string()), smart_token_endpoint: Some("https://idp.example.com/token".to_string()), + smart_authorize_endpoint: Some("https://idp.example.com/authorize".to_string()), smart_jwks_url: Some("https://idp.example.com/.well-known/jwks.json".to_string()), ..AuthConfig::default() }; @@ -95,6 +112,12 @@ mod tests { smart.token_endpoint.as_deref(), Some("https://idp.example.com/token") ); + assert!( + smart + .grant_types_supported + .contains(&"authorization_code".to_string()) + ); + assert!(smart.response_types_supported.contains(&"code".to_string())); assert_eq!( smart.jwks_uri.as_deref(), Some("https://idp.example.com/.well-known/jwks.json") @@ -112,6 +135,10 @@ mod tests { assert!(json["capabilities"].is_array()); assert!(json["scopes_supported"].is_array()); + assert_eq!( + json["code_challenge_methods_supported"], + serde_json::json!(["S256"]) + ); // Fields that are None should be omitted assert!(json.get("authorization_endpoint").is_none()); } diff --git a/crates/auth/src/jti/mod.rs b/crates/auth/src/jti/mod.rs index 40a1d169e..727b568ad 100644 --- a/crates/auth/src/jti/mod.rs +++ b/crates/auth/src/jti/mod.rs @@ -23,3 +23,35 @@ pub trait JtiCache: Send + Sync + 'static { expires_at: DateTime, ) -> Result; } + +/// JTI cache implementation which never treats tokens as replays. +/// +/// This is intended for deployments where JWT IDs identify reusable bearer +/// access tokens rather than one-time client assertions. +#[derive(Debug, Clone, Copy, Default)] +pub struct DisabledJtiCache; + +#[async_trait] +impl JtiCache for DisabledJtiCache { + async fn check_and_store( + &self, + _jti: &str, + _expires_at: DateTime, + ) -> Result { + Ok(false) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn disabled_cache_never_reports_replay() { + let cache = DisabledJtiCache; + let expires = Utc::now(); + + assert!(!cache.check_and_store("same-jti", expires).await.unwrap()); + assert!(!cache.check_and_store("same-jti", expires).await.unwrap()); + } +} diff --git a/crates/auth/src/lib.rs b/crates/auth/src/lib.rs index 1c16e1c7b..ac758ea8d 100644 --- a/crates/auth/src/lib.rs +++ b/crates/auth/src/lib.rs @@ -36,7 +36,7 @@ pub mod scope; pub use config::AuthConfig; pub use discovery::SmartConfiguration; pub use error::{AuthError, FhirOperation}; -pub use jti::{JtiCache, memory::InMemoryJtiCache}; +pub use jti::{DisabledJtiCache, JtiCache, memory::InMemoryJtiCache}; pub use jwks::JwksCache; pub use outbound::{ NoOpOutboundAuthProvider, OutboundAuthProvider, StaticBearerOutboundAuthProvider, diff --git a/crates/fhir/src/lib.rs b/crates/fhir/src/lib.rs index 834cc4ee7..0071a384b 100644 --- a/crates/fhir/src/lib.rs +++ b/crates/fhir/src/lib.rs @@ -1850,6 +1850,40 @@ impl FhirVersion { } } +/// Returns the compartment search parameters for a given FHIR version. +/// +/// This is a version-agnostic dispatch over the per-version +/// `helios_fhir::{r4,r4b,r5,r6}::get_compartment_params` functions, which are +/// generated from the official FHIR `CompartmentDefinition` resources. +/// +/// # Arguments +/// +/// * `version` - The FHIR version to use for lookup +/// * `compartment_type` - The compartment type (e.g., "Patient", "Encounter") +/// * `resource_type` - The target resource type (e.g., "Observation") +/// +/// # Returns +/// +/// A static slice of search parameter names that link the resource to the +/// compartment. Returns an empty slice if the resource is not a member of the +/// compartment. +pub fn get_compartment_params( + version: FhirVersion, + compartment_type: &str, + resource_type: &str, +) -> &'static [&'static str] { + match version { + #[cfg(feature = "R4")] + FhirVersion::R4 => r4::get_compartment_params(compartment_type, resource_type), + #[cfg(feature = "R4B")] + FhirVersion::R4B => r4b::get_compartment_params(compartment_type, resource_type), + #[cfg(feature = "R5")] + FhirVersion::R5 => r5::get_compartment_params(compartment_type, resource_type), + #[cfg(feature = "R6")] + FhirVersion::R6 => r6::get_compartment_params(compartment_type, resource_type), + } +} + /// Implements `Display` trait for user-friendly output formatting. /// /// This enables `FhirVersion` to be used in string formatting operations diff --git a/crates/fhirpath/src/reference_key_functions.rs b/crates/fhirpath/src/reference_key_functions.rs index 09510c846..3f0a43565 100644 --- a/crates/fhirpath/src/reference_key_functions.rs +++ b/crates/fhirpath/src/reference_key_functions.rs @@ -93,7 +93,7 @@ pub fn get_reference_key_function( _ => { return Err(EvaluationError::TypeError(format!( "getReferenceKey type filter must be a string or type, got: {:?}", - &args[0] + args[0] ))); } } diff --git a/crates/hfs/Cargo.toml b/crates/hfs/Cargo.toml index 50bd94cee..14745c18e 100644 --- a/crates/hfs/Cargo.toml +++ b/crates/hfs/Cargo.toml @@ -71,6 +71,9 @@ parking_lot = "0.12" # Error handling anyhow = "1.0" async-trait = "0.1" + +# Time (used by the bulk-export cleanup task) +chrono = "0.4" serde_json = "1" # Vendor OpenSSL when cross-compiling for Linux ARM64 (the macOS runner diff --git a/crates/hfs/README.md b/crates/hfs/README.md index 7f2671c70..2df4c1a5d 100644 --- a/crates/hfs/README.md +++ b/crates/hfs/README.md @@ -355,4 +355,3 @@ Use the `X-Tenant-ID` header to isolate data between tenants: curl -H "X-Tenant-ID: clinic-a" http://localhost:8080/Patient curl -H "X-Tenant-ID: clinic-b" http://localhost:8080/Patient ``` - diff --git a/crates/hfs/src/main.rs b/crates/hfs/src/main.rs index 8860bc258..64c75d873 100644 --- a/crates/hfs/src/main.rs +++ b/crates/hfs/src/main.rs @@ -20,7 +20,6 @@ use std::sync::Arc; -use clap::Parser; use helios_audit::{ AuditBackend, AuditConfig, AuditMiddlewareState, AuditSink, ExclusionFilter, lifecycle, }; @@ -31,6 +30,15 @@ use helios_rest::{ }; use tracing::info; +use helios_persistence::backends::local_fs::LocalFsOutputStore; +use helios_persistence::core::{ + BulkExportJobStore, DefaultExportWorker, ExportOutputStore, WorkerId, +}; +#[cfg(any(feature = "sqlite", feature = "postgres"))] +use helios_rest::bulk_export_auth::BearerScopeAuth; +#[cfg(any(feature = "sqlite", feature = "postgres"))] +use helios_rest::create_app_with_auth_and_bulk_export; + #[cfg(feature = "sqlite")] use helios_persistence::backends::sqlite::{SqliteBackend, SqliteBackendConfig}; @@ -526,10 +534,20 @@ async fn init_auth_with_audit( Build with: cargo build -p helios-hfs --features redis" ); } - _ => { + "memory" => { info!("Using in-memory JTI cache"); Arc::new(InMemoryJtiCache::new()) } + "disabled" | "none" => { + info!("JTI replay cache is DISABLED"); + Arc::new(helios_auth::DisabledJtiCache) + } + other => { + anyhow::bail!( + "Invalid HFS_AUTH_JTI_BACKEND '{}'. Valid values: memory, redis, disabled", + other + ); + } }; // Create JWKS cache @@ -638,7 +656,10 @@ async fn init_audit( #[tokio::main] async fn main() -> anyhow::Result<()> { - let config = ServerConfig::parse(); + // Use `from_env()` (not `parse()`) so `multitenancy` and `bulk_export` + // sub-structs — both `#[arg(skip)]` for clap — are populated from + // their `HFS_*` environment variables. + let config = ServerConfig::from_env(); init_logging(&config.log_level); if let Err(errors) = config.validate() { @@ -739,10 +760,25 @@ async fn start_sqlite( auth_state: Option>, audit_state: Option>, ) -> anyhow::Result<()> { - let backend = create_sqlite_backend(&config)?; let serve_audit_state = audit_state.clone(); + let backend = Arc::new(create_sqlite_backend(&config)?); + + if let Some(bundle) = build_bulk_export(&config, backend.clone()).await? { + let app = create_app_with_auth_and_bulk_export( + backend, + config.clone(), + auth_config, + auth_state, + audit_state, + bundle, + ); + return serve(app, &config, serve_audit_state).await; + } + let app = create_app_with_auth( - backend, + Arc::try_unwrap(backend).unwrap_or_else(|_| { + unreachable!("backend Arc is uniquely owned when bulk export is disabled") + }), config.clone(), auth_config, auth_state, @@ -751,6 +787,235 @@ async fn start_sqlite( serve(app, &config, serve_audit_state).await } +/// Builds the bulk-export subsystem (job store + output store + file auth) for +/// a given resource-store data provider, spawning the in-process worker pool +/// and cleanup task. Returns `None` when bulk export is disabled. +/// +/// Supports both the `embedded` backend (a dedicated SQLite job store + local +/// filesystem output) and the `postgres-s3` backend (a PostgreSQL job store + +/// S3 output). +#[cfg(any(feature = "sqlite", feature = "postgres"))] +async fn build_bulk_export( + config: &ServerConfig, + data: Arc, +) -> anyhow::Result> +where + Dp: helios_persistence::core::ExportResourceProvider + 'static, +{ + let cfg = config.bulk_export.clone(); + info!( + "Bulk export config: enabled={} backend={} output_backend={} requires_access_token={}", + cfg.enabled, cfg.backend, cfg.output_backend, cfg.requires_access_token + ); + if !cfg.enabled { + return Ok(None); + } + + // --- Output store --------------------------------------------------- + let output: Arc = match cfg.output_backend.as_str() { + "local-fs" => { + let output_dir = cfg + .output_dir + .clone() + .or_else(|| { + config + .data_dir + .as_ref() + .map(|d| format!("{}/exports", d.display())) + }) + .unwrap_or_else(|| "./data/exports".to_string()); + Arc::new(LocalFsOutputStore::new(output_dir, config.base_url.clone())) + } + "s3" => { + #[cfg(feature = "s3")] + { + use helios_persistence::backends::s3::{ + AccessTokenMode, AwsS3Client, AwsS3ClientOptions, S3OutputStore, + }; + let bucket = cfg.s3_bucket.clone().ok_or_else(|| { + anyhow::anyhow!("HFS_BULK_EXPORT_S3_BUCKET is required for OUTPUT_BACKEND=s3") + })?; + let region = std::env::var("HFS_BULK_EXPORT_S3_REGION") + .ok() + .or_else(|| std::env::var("HFS_S3_REGION").ok()); + let sdk_config = AwsS3Client::load_sdk_config(region.as_deref()).await; + let client = Arc::new(AwsS3Client::from_sdk_config_with_options( + &sdk_config, + AwsS3ClientOptions { + endpoint_url: std::env::var("HFS_BULK_EXPORT_S3_ENDPOINT").ok(), + force_path_style: parse_env_bool( + "HFS_BULK_EXPORT_S3_FORCE_PATH_STYLE", + false, + ), + }, + )); + Arc::new(S3OutputStore::new( + client, + bucket, + config.base_url.clone(), + AccessTokenMode::parse(&cfg.requires_access_token), + std::time::Duration::from_secs(cfg.file_url_ttl_secs), + )) + } + #[cfg(not(feature = "s3"))] + { + anyhow::bail!( + "HFS_BULK_EXPORT_OUTPUT_BACKEND=s3 requires the 's3' feature. \ + Build with: cargo build -p helios-hfs --features s3" + ); + } + } + other => anyhow::bail!("invalid HFS_BULK_EXPORT_OUTPUT_BACKEND '{other}'"), + }; + + // --- Job store ------------------------------------------------------ + let jobs: Arc = match cfg.backend.as_str() { + "embedded" => { + #[cfg(feature = "sqlite")] + { + let job_db = config + .bulk_export + .output_dir + .as_ref() + .map(|d| format!("{d}/bulk_export.db")) + .unwrap_or_else(|| "./data/bulk_export.db".to_string()); + if let Some(parent) = std::path::Path::new(&job_db).parent() { + std::fs::create_dir_all(parent).map_err(|e| { + anyhow::anyhow!( + "create bulk export job DB directory {}: {e}", + parent.display() + ) + })?; + } + let job_backend = SqliteBackend::with_config( + &job_db, + SqliteBackendConfig { + fhir_version: config.default_fhir_version, + data_dir: config.data_dir.clone(), + ..Default::default() + }, + )?; + job_backend.init_schema()?; + Arc::new(job_backend) + } + #[cfg(not(feature = "sqlite"))] + { + anyhow::bail!( + "HFS_BULK_EXPORT_BACKEND=embedded requires the 'sqlite' feature. \ + Build with: cargo build -p helios-hfs --features sqlite" + ); + } + } + "postgres-s3" => { + #[cfg(feature = "postgres")] + { + use helios_persistence::backends::postgres::PostgresBackend; + let url = std::env::var("HFS_BULK_EXPORT_DATABASE_URL") + .ok() + .or_else(|| config.database_url.clone()) + .ok_or_else(|| { + anyhow::anyhow!( + "HFS_BULK_EXPORT_DATABASE_URL (or HFS_DATABASE_URL) is required \ + for HFS_BULK_EXPORT_BACKEND=postgres-s3" + ) + })?; + let pg = PostgresBackend::from_connection_string(&url).await?; + pg.init_schema().await?; + Arc::new(pg) + } + #[cfg(not(feature = "postgres"))] + { + anyhow::bail!( + "HFS_BULK_EXPORT_BACKEND=postgres-s3 requires the 'postgres' feature. \ + Build with: cargo build -p helios-hfs --features postgres,s3" + ); + } + } + other => anyhow::bail!("invalid HFS_BULK_EXPORT_BACKEND '{other}'"), + }; + + spawn_export_workers(jobs.clone(), data, output.clone(), &cfg); + + Ok(Some(helios_rest::BulkExportBundle { + jobs, + output, + file_auth: Arc::new(BearerScopeAuth), + })) +} + +/// Spawns the in-process export worker pool and the periodic cleanup task. +#[cfg(any(feature = "sqlite", feature = "postgres"))] +fn spawn_export_workers( + jobs: Arc, + data: Arc, + output: Arc, + cfg: &helios_rest::config::BulkExportConfig, +) where + Dp: helios_persistence::core::ExportResourceProvider + 'static, +{ + if cfg.disable_local_worker { + info!("Bulk export in-process worker pool is disabled"); + return; + } + let lease = std::time::Duration::from_secs(cfg.lease_duration_secs); + for i in 0..cfg.worker_concurrency { + let jobs = jobs.clone(); + let data = data.clone(); + let output = output.clone(); + let worker_id = WorkerId::new(format!("hfs-worker-{i}")); + let exclude_newly_added = cfg.since_newly_added.eq_ignore_ascii_case("exclude"); + tokio::spawn(async move { + let worker = DefaultExportWorker::new(jobs.clone(), data, output, worker_id.clone()) + .with_exclude_since_newly_added(exclude_newly_added); + loop { + match jobs.claim_next(&worker_id, lease).await { + Ok(Some(claimed)) => { + if let Err(e) = worker.run_job(claimed).await { + tracing::error!("export worker job failed: {e}"); + } + } + Ok(None) => { + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + } + Err(e) => { + tracing::error!("export worker claim failed: {e}"); + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + } + } + } + }); + } + + // Periodic cleanup of expired job output. + let cleanup_jobs = jobs.clone(); + let cleanup_output = output.clone(); + let interval = std::time::Duration::from_secs(cfg.cleanup_interval_secs); + let output_ttl = std::time::Duration::from_secs(cfg.output_ttl_secs); + tokio::spawn(async move { + loop { + tokio::time::sleep(interval).await; + match cleanup_jobs + .list_expired_exports(chrono::Utc::now(), output_ttl, 100) + .await + { + Ok(expired) => { + for job in expired { + let _ = cleanup_output + .delete_job_outputs(&job.tenant, &job.job_id) + .await; + let _ = cleanup_jobs.delete_export(&job.tenant, &job.job_id).await; + } + } + Err(e) => tracing::error!("export cleanup scan failed: {e}"), + } + } + }); + info!( + "Bulk export worker pool started ({} workers)", + cfg.worker_concurrency + ); +} + /// Fallback when sqlite feature is not enabled. #[cfg(not(feature = "sqlite"))] async fn start_sqlite( @@ -918,10 +1183,25 @@ async fn start_postgres( }; backend.init_schema().await?; + let backend = Arc::new(backend); let serve_audit_state = audit_state.clone(); + if let Some(bundle) = build_bulk_export(&config, backend.clone()).await? { + let app = create_app_with_auth_and_bulk_export( + backend, + config.clone(), + auth_config, + auth_state, + audit_state, + bundle, + ); + return serve(app, &config, serve_audit_state).await; + } + let app = create_app_with_auth( - backend, + Arc::try_unwrap(backend).unwrap_or_else(|_| { + unreachable!("backend Arc is uniquely owned when bulk export is disabled") + }), config.clone(), auth_config, auth_state, diff --git a/crates/hfs/tests/bulk_export/run_external_bulk_export_smoke.sh b/crates/hfs/tests/bulk_export/run_external_bulk_export_smoke.sh new file mode 100755 index 000000000..0a6c820b6 --- /dev/null +++ b/crates/hfs/tests/bulk_export/run_external_bulk_export_smoke.sh @@ -0,0 +1,500 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="${BASE_URL:-http://localhost:8080}" +FHIR_VERSION="${FHIR_VERSION:-R4}" +RESULTS_DIR="${RESULTS_DIR:-bulk-export-smoke-results}" +SMOKE_RUN_SUFFIX="${SMOKE_RUN_SUFFIX:-local-$(date +%s)-$$}" +BULK_EXPORT_EXPECTATION="${BULK_EXPORT_EXPECTATION:-full}" +EXPECT_REQUIRES_ACCESS_TOKEN="${EXPECT_REQUIRES_ACCESS_TOKEN:-true}" + +HTTP_DIR="$RESULTS_DIR/http" +MANIFEST_DIR="$RESULTS_DIR/manifests" +NDJSON_DIR="$RESULTS_DIR/ndjson" +SUMMARY_FILE="$RESULTS_DIR/summary.md" + +mkdir -p "$HTTP_DIR" "$MANIFEST_DIR" "$NDJSON_DIR" + +log() { + echo "[bulk-export-smoke] $*" +} + +fail() { + local msg="$1" + echo "[bulk-export-smoke] ERROR: $msg" >&2 + mkdir -p "$(dirname "$SUMMARY_FILE")" + echo "- FAIL: $msg" >> "$SUMMARY_FILE" + if [ -n "${HFS_LOG:-}" ] && [ -f "$HFS_LOG" ]; then + echo "---- hfs log (tail) ----" >&2 + tail -n 160 "$HFS_LOG" >&2 || true + echo "------------------------" >&2 + fi + exit 1 +} + +pass() { + local msg="$1" + echo "- PASS: $msg" >> "$SUMMARY_FILE" +} + +require_cmd() { + local cmd="$1" + if ! command -v "$cmd" >/dev/null 2>&1; then + fail "required command not found: $cmd" + fi +} + +expect_status() { + local actual="$1" + local expected="$2" + local operation="$3" + local response_file="$4" + if [ "$actual" != "$expected" ]; then + echo "---- $operation response ----" >&2 + cat "$response_file" >&2 || true + echo "----------------------------" >&2 + fail "$operation returned HTTP $actual, expected $expected" + fi +} + +expect_created() { + local status="$1" + local operation="$2" + local response_file="$3" + if [ "$status" != "200" ] && [ "$status" != "201" ]; then + echo "---- $operation response ----" >&2 + cat "$response_file" >&2 || true + echo "----------------------------" >&2 + fail "$operation returned unexpected HTTP status: $status" + fi +} + +case "$FHIR_VERSION" in + R4) FHIR_MIME_VERSION="4.0" ;; + R4B) FHIR_MIME_VERSION="4.3" ;; + R5) FHIR_MIME_VERSION="5.0" ;; + *) fail "unsupported FHIR_VERSION: $FHIR_VERSION (expected R4, R4B, or R5)" ;; +esac + +FHIR_CT="application/fhir+json; fhirVersion=$FHIR_MIME_VERSION" +FHIR_ACCEPT="$FHIR_CT" +ID_SUFFIX="$(printf '%s' "$SMOKE_RUN_SUFFIX-$FHIR_VERSION" | tr -cs '[:alnum:]-' '-' | sed -e 's/^-*//' -e 's/-*$//')" +if [ -z "$ID_SUFFIX" ]; then + ID_SUFFIX="bulk-smoke" +fi + +PATIENT_A="bulk-smoke-patient-a-$ID_SUFFIX" +PATIENT_B="bulk-smoke-patient-b-$ID_SUFFIX" +PATIENT_C="bulk-smoke-patient-c-$ID_SUFFIX" +OBS_A="bulk-smoke-observation-a-$ID_SUFFIX" +OBS_B="bulk-smoke-observation-b-$ID_SUFFIX" +OBS_C="bulk-smoke-observation-c-$ID_SUFFIX" +GROUP_ID="bulk-smoke-group-$ID_SUFFIX" + +write_summary_header() { + cat > "$SUMMARY_FILE" < "$HTTP_DIR/patient-a.json" < "$HTTP_DIR/patient-b.json" < "$HTTP_DIR/patient-c.json" < "$HTTP_DIR/observation-a.json" < "$HTTP_DIR/observation-b.json" < "$HTTP_DIR/observation-c.json" < "$HTTP_DIR/group.json" </dev/null || fail "CapabilityStatement does not advertise all bulk export operations" + pass "CapabilityStatement advertises bulk export operations" +} + +assert_no_bulk_export_endpoint() { + local response="$HTTP_DIR/export-unavailable.response" + local status + status="$(curl -sS -o "$response" -w "%{http_code}" \ + -H "Prefer: respond-async" \ + -H "Accept: $FHIR_ACCEPT" \ + "$BASE_URL/\$export?_type=Patient")" + case "$status" in + 400|404|501|500) + pass "bulk export endpoint is unavailable as expected (HTTP $status)" + ;; + *) + cat "$response" >&2 || true + fail "expected bulk export endpoint to be unavailable, got HTTP $status" + ;; + esac +} + +assert_requires_respond_async() { + local response="$HTTP_DIR/export-missing-prefer.response" + local status + status="$(curl -sS -o "$response" -w "%{http_code}" \ + -H "Accept: $FHIR_ACCEPT" \ + "$BASE_URL/\$export?_type=Patient")" + expect_status "$status" "400" "GET /\$export without Prefer" "$response" + pass "kickoff requires Prefer: respond-async" +} + +kickoff_get() { + local label="$1" + local path="$2" + local response="$HTTP_DIR/$label-kickoff.response" + local headers="$HTTP_DIR/$label-kickoff.headers" + local status + status="$(curl -sS -D "$headers" -o "$response" -w "%{http_code}" \ + -H "Prefer: respond-async" \ + -H "Accept: $FHIR_ACCEPT" \ + "$BASE_URL$path")" + expect_status "$status" "202" "$label kickoff" "$response" + local content_location + content_location="$(grep -i '^content-location:' "$headers" | head -1 | sed 's/^[^:]*:[[:space:]]*//' | tr -d '\r')" + if [ -z "$content_location" ]; then + fail "$label kickoff did not return Content-Location" + fi + printf '%s\n' "$content_location" +} + +kickoff_patient_post() { + local label="$1" + local patient_ref="$2" + local body="$HTTP_DIR/$label-parameters.json" + local response="$HTTP_DIR/$label-kickoff.response" + local headers="$HTTP_DIR/$label-kickoff.headers" + cat > "$body" <&2 || true + fail "$label status returned HTTP $status before completion" + fi + sleep 2 + done + + fail "$label export did not complete before timeout" +} + +expect_export_failure() { + local label="$1" + local path="$2" + local response="$HTTP_DIR/$label-expected-failure.response" + local headers="$HTTP_DIR/$label-expected-failure.headers" + local status + status="$(curl -sS -D "$headers" -o "$response" -w "%{http_code}" \ + -H "Prefer: respond-async" \ + -H "Accept: $FHIR_ACCEPT" \ + "$BASE_URL$path")" + + case "$status" in + 400|404|500|501) + pass "$label failed immediately as expected (HTTP $status)" + return 0 + ;; + 202) + ;; + *) + cat "$response" >&2 || true + fail "$label returned unexpected HTTP $status for expected-negative export" + ;; + esac + + local status_url + status_url="$(grep -i '^content-location:' "$headers" | head -1 | sed 's/^[^:]*:[[:space:]]*//' | tr -d '\r')" + if [ -z "$status_url" ]; then + fail "$label expected-negative kickoff returned 202 without Content-Location" + fi + + for _ in $(seq 1 45); do + status="$(curl -sS -o "$response" -w "%{http_code}" \ + -H "Accept: application/json" \ + "$status_url")" + case "$status" in + 202) + sleep 2 + ;; + 500|501|400|404) + pass "$label reached expected failure state (HTTP $status)" + return 0 + ;; + 200) + cat "$response" >&2 || true + fail "$label unexpectedly completed successfully" + ;; + *) + cat "$response" >&2 || true + fail "$label reached unexpected status HTTP $status" + ;; + esac + done + + fail "$label expected-negative export did not fail before timeout" +} + +download_outputs() { + local label="$1" + local manifest="$2" + local merged="$NDJSON_DIR/$label-all.ndjson" + : > "$merged" + + local output_count + output_count="$(jq '.output | length' "$manifest")" + if [ "$output_count" -lt 1 ]; then + fail "$label manifest has no output files" + fi + + local requires_token + requires_token="$(jq -r '.requiresAccessToken' "$manifest")" + if [ "$requires_token" != "$EXPECT_REQUIRES_ACCESS_TOKEN" ]; then + fail "$label manifest requiresAccessToken=$requires_token, expected $EXPECT_REQUIRES_ACCESS_TOKEN" + fi + + local idx=0 + while [ "$idx" -lt "$output_count" ]; do + local url + local resource_type + local file + url="$(jq -r ".output[$idx].url" "$manifest")" + resource_type="$(jq -r ".output[$idx].type" "$manifest")" + file="$NDJSON_DIR/$label-$idx-$resource_type.ndjson" + curl -sS -o "$file" "$url" + if [ -s "$file" ]; then + while IFS= read -r line; do + [ -z "$line" ] && continue + printf '%s\n' "$line" | jq -e --arg rt "$resource_type" '.resourceType == $rt' >/dev/null \ + || fail "$label output $idx contains invalid JSON or wrong resourceType" + done < "$file" + cat "$file" >> "$merged" + fi + idx=$((idx + 1)) + done + + if [ ! -s "$merged" ]; then + fail "$label downloaded outputs were empty" + fi + printf '%s\n' "$merged" +} + +assert_ids() { + local label="$1" + local ndjson="$2" + shift 2 + local expected + for expected in "$@"; do + jq -e --arg id "$expected" 'select(.id == $id)' "$ndjson" >/dev/null \ + || fail "$label output missing resource id $expected" + done +} + +assert_absent_ids() { + local label="$1" + local ndjson="$2" + shift 2 + local unexpected + for unexpected in "$@"; do + if jq -e --arg id "$unexpected" 'select(.id == $id)' "$ndjson" >/dev/null; then + fail "$label output unexpectedly included resource id $unexpected" + fi + done +} + +assert_type_counts() { + local label="$1" + local manifest="$2" + local patient_min="$3" + local observation_min="$4" + local patient_count + local observation_count + patient_count="$(jq '[.output[] | select(.type == "Patient") | .count // 0] | add // 0' "$manifest")" + observation_count="$(jq '[.output[] | select(.type == "Observation") | .count // 0] | add // 0' "$manifest")" + if [ "$patient_count" -lt "$patient_min" ]; then + fail "$label manifest Patient count $patient_count is below expected minimum $patient_min" + fi + if [ "$observation_count" -lt "$observation_min" ]; then + fail "$label manifest Observation count $observation_count is below expected minimum $observation_min" + fi +} + +run_full_lifecycle() { + assert_metadata_advertises_export + assert_requires_respond_async + seed_data + + local status_url manifest ndjson + + log "Running system export" + status_url="$(kickoff_get system "/\$export?_type=Patient,Observation")" + manifest="$(poll_manifest system "$status_url")" + assert_type_counts system "$manifest" 3 3 + ndjson="$(download_outputs system "$manifest")" + assert_ids system "$ndjson" "$PATIENT_A" "$PATIENT_B" "$PATIENT_C" "$OBS_A" "$OBS_B" "$OBS_C" + pass "system export completed and downloaded expected resources" + + log "Running patient export" + status_url="$(kickoff_patient_post patient "Patient/$PATIENT_A")" + manifest="$(poll_manifest patient "$status_url")" + assert_type_counts patient "$manifest" 1 1 + ndjson="$(download_outputs patient "$manifest")" + assert_ids patient "$ndjson" "$PATIENT_A" "$OBS_A" + assert_absent_ids patient "$ndjson" "$PATIENT_B" "$PATIENT_C" "$OBS_B" "$OBS_C" + pass "patient export scoped to requested patient" + + log "Running group export" + status_url="$(kickoff_get group "/Group/$GROUP_ID/\$export?_type=Patient,Observation")" + manifest="$(poll_manifest group "$status_url")" + assert_type_counts group "$manifest" 2 2 + ndjson="$(download_outputs group "$manifest")" + assert_ids group "$ndjson" "$PATIENT_A" "$PATIENT_B" "$OBS_A" "$OBS_B" + assert_absent_ids group "$ndjson" "$PATIENT_C" "$OBS_C" + pass "group export scoped to group members" + + local delete_response="$HTTP_DIR/system-delete.response" + local delete_status + delete_status="$(curl -sS -o "$delete_response" -w "%{http_code}" -X DELETE "$status_url")" + expect_status "$delete_status" "202" "DELETE final export status URL" "$delete_response" + + local gone_response="$HTTP_DIR/final-status-after-delete.response" + local gone_status + gone_status="$(curl -sS -o "$gone_response" -w "%{http_code}" "$status_url")" + expect_status "$gone_status" "404" "GET final export status URL after delete" "$gone_response" + pass "export delete endpoint accepted cleanup request and removed status URL" +} + +run_expected_negative() { + seed_data + expect_export_failure system "/\$export?_type=Patient,Observation" +} + +main() { + require_cmd curl + require_cmd jq + write_summary_header + + case "$BULK_EXPORT_EXPECTATION" in + full) + run_full_lifecycle + ;; + unsupported) + run_expected_negative + ;; + endpoint-unavailable) + assert_no_bulk_export_endpoint + ;; + *) + fail "unknown BULK_EXPORT_EXPECTATION: $BULK_EXPORT_EXPECTATION" + ;; + esac + + echo "" >> "$SUMMARY_FILE" + echo "All bulk export smoke checks completed for expectation \`$BULK_EXPORT_EXPECTATION\`." >> "$SUMMARY_FILE" + log "Bulk export smoke test completed successfully" +} + +main "$@" diff --git a/crates/persistence/README.md b/crates/persistence/README.md index 7450a2366..183c8a881 100644 --- a/crates/persistence/README.md +++ b/crates/persistence/README.md @@ -84,7 +84,9 @@ helios-persistence/ │ │ ├── search.rs # Search providers (basic, chained, include) │ │ ├── transaction.rs # ACID transactions with bundle support │ │ ├── capabilities.rs # Runtime capability discovery -│ │ ├── bulk_export.rs # FHIR Bulk Data Export traits +│ │ ├── bulk_export.rs # FHIR Bulk Data Export job/data traits +│ │ ├── bulk_export_output.rs # ExportOutputStore trait +│ │ ├── bulk_export_worker.rs # Bulk export worker runtime and leasing traits │ │ └── bulk_submit.rs # FHIR Bulk Submit traits │ ├── search/ # Search parameter infrastructure │ │ ├── registry.rs # SearchParameterRegistry (in-memory cache) @@ -152,10 +154,11 @@ helios-persistence/ │ │ ├── config.rs # S3BackendConfig, S3TenancyMode │ │ ├── client.rs # S3Api trait and AwsS3Client implementation │ │ ├── keyspace.rs # S3Keyspace key-path generation -│ │ ├── models.rs # HistoryIndexEvent, ExportJobState, SubmissionState +│ │ ├── models.rs # HistoryIndexEvent, SubmissionState │ │ ├── storage.rs # ResourceStorage implementation │ │ ├── bundle.rs # Batch/transaction bundle processing -│ │ ├── bulk_export.rs # BulkExportStorage implementation +│ │ ├── bulk_export.rs # ExportDataProvider implementation +│ │ ├── output_store.rs # S3OutputStore for bulk export files │ │ ├── bulk_submit.rs # BulkSubmitProvider implementation │ │ └── tests.rs # Integration tests │ ├── composite/ # Multi-backend coordination @@ -369,10 +372,10 @@ The matrix below shows which FHIR operations each backend supports. This reflect | Single field | ✓ | ✓ | ✓ | ✗ | ○ | ✓ | ✗ | | Multiple fields | ✓ | ✓ | ✓ | ✗ | ○ | ✓ | ✗ | | **[Bulk Operations](https://hl7.org/fhir/uv/bulkdata/)** | -| [Bulk Export](https://hl7.org/fhir/uv/bulkdata/export.html) | ✓ | ✓ | ○ | ○ | ○ | ○ | ✓ | +| [Bulk Export](https://hl7.org/fhir/uv/bulkdata/export.html) | ✓ | ✓ | ○ | ○ | ○ | ○ | ◐ | | [Bulk Submit](https://hackmd.io/@argonaut/rJoqHZrPle) | ✓ | ✓ | ○ | ○ | ○ | ○ | ✓ | -The S3 backend is intentionally storage-focused (CRUD/version/history/bulk) and does not act as a full FHIR search engine. For query-heavy deployments, use a DB/search backend as primary query engine and compose S3 as archive/bulk/history storage. +The S3 backend is intentionally storage-focused (CRUD/version/history/bulk submit) and does not act as a full FHIR search engine. For bulk export, S3 can feed system-level batches through `ExportDataProvider` and can store output files through `S3OutputStore`, but job state belongs to SQLite or PostgreSQL. Patient-level and Group-level export compartment enumeration are not supported by S3 as the resource store. For query-heavy deployments, use a DB/search backend as primary query engine and compose S3 as archive/history/output storage. ### Primary/Secondary Role Matrix @@ -389,7 +392,7 @@ Backends can serve as primary (CRUD, versioning, transactions) or secondary (opt | Cassandra + Elasticsearch | Cassandra | Elasticsearch (search) | Planned | Write-heavy + search | | MongoDB alone | MongoDB | — | ✓ Implemented | Document-centric | | MongoDB + Elasticsearch | MongoDB | Elasticsearch (search) | ✓ Implemented | Document-centric + offloaded search | -| S3 alone | S3 | — | ✓ Implemented (storage-focused) | Archival/bulk/history storage | +| S3 alone | S3 | — | ✓ Implemented (storage-focused) | Archival/history storage | | S3 + Elasticsearch | S3 | Elasticsearch (search) | ✓ Implemented | Large-scale + search | ### Backend Selection Guide @@ -600,13 +603,14 @@ HFS_ELASTICSEARCH_NODES=http://localhost:9200 \ ### S3 + Elasticsearch -S3 handles CRUD, versioning, history, and bulk operations. Elasticsearch handles all search operations. Combines S3's cost-effective, durable object storage with Elasticsearch's search capabilities for large-scale deployments. +S3 handles CRUD, versioning, history, and bulk-submit artifacts. Elasticsearch handles all search operations. For bulk export, this topology can use S3 as the resource data provider for system-level exports and `S3OutputStore` as the output-file store; export job state still lives in the configured SQLite or PostgreSQL bulk-export job store. - CRUD persistence via S3 objects (current pointer + immutable history versions) - Versioning (`vread`, optimistic locking via version checks) - Instance, type, and system history via immutable history objects - Batch bundles and best-effort transaction bundles -- Bulk export (NDJSON parts + manifest in S3) +- Bulk export data provider for system-level exports +- Optional S3 bulk-export output files via `S3OutputStore` - Bulk submit with rollback change log - Full-text search with relevance scoring (`_text`, `_content`) via Elasticsearch - All FHIR search parameter types (string, token, date, number, quantity, reference, URI, composite) @@ -758,7 +762,7 @@ let composite = CompositeStorage::new(config, backends)? ## S3 Backend -The S3 backend is a storage-focused persistence backend using AWS S3 object storage. It handles CRUD, versioning/history, and bulk workflows but is intentionally not a FHIR search engine. For query-heavy deployments, compose S3 with a DB/search backend as the primary query engine. +The S3 backend is a storage-focused persistence backend using AWS S3 object storage. It handles CRUD, versioning/history, and bulk-submit workflows but is intentionally not a FHIR search engine. For bulk export, S3 participates in two narrower roles: `S3Backend` can provide resource batches for system-level exports, and `S3OutputStore` can store finalized NDJSON output files. Bulk-export job state, progress, manifests, leases, and file metadata are not stored in S3; they live in SQLite or PostgreSQL. ### Scope @@ -767,7 +771,8 @@ The S3 backend is a storage-focused persistence backend using AWS S3 object stor - Versioning (`vread`, `list_versions`, optimistic conflict checks) - Instance/type/system history via immutable history objects plus history index events - Batch bundles and best-effort transaction bundles (non-atomic with compensating rollback) -- Bulk export (NDJSON objects + manifest/progress state in S3) +- Bulk export resource data provider for system-level exports +- Bulk export output storage through `S3OutputStore` when configured separately from job state - Bulk submit (ingest + raw artifact persistence + rollback change log) - Tenant isolation (`PrefixPerTenant` or `BucketPerTenant`) @@ -776,7 +781,7 @@ The S3 backend is a storage-focused persistence backend using AWS S3 object stor ### Configuration ```rust -use helios_persistence::backends::s3::S3BackendConfig; +use helios_persistence::backends::s3::{S3BackendConfig, S3TenancyMode}; let config = S3BackendConfig { tenancy_mode: S3TenancyMode::PrefixPerTenant { @@ -785,8 +790,8 @@ let config = S3BackendConfig { prefix: None, region: None, validate_buckets_on_startup: true, - bulk_export_part_size: 10_000, bulk_submit_batch_size: 100, + ..Default::default() }; ``` @@ -796,7 +801,6 @@ let config = S3BackendConfig { | `prefix` | `None` | Optional global key prefix applied before backend keys | | `region` | `None` | AWS region override (falls back to provider chain) | | `validate_buckets_on_startup` | `true` | Validate configured buckets with `HeadBucket` on startup | -| `bulk_export_part_size` | `10000` | Max NDJSON lines per export output part | | `bulk_submit_batch_size` | `100` | Default ingestion batch size for bulk submit processing | ### Tenancy Modes @@ -817,14 +821,13 @@ Resource objects: | Type history event | `.../history/type/{type}/{ts}_{id}_{version}_{suffix}.json` | | System history event | `.../history/system/{ts}_{type}_{id}_{version}_{suffix}.json` | -Bulk export objects: +Bulk export output objects: | Object | Key Pattern | |--------|-------------| -| Job state | `.../bulk/export/jobs/{job_id}/state.json` | -| Progress | `.../bulk/export/jobs/{job_id}/progress/{type}.json` | -| Output | `.../bulk/export/jobs/{job_id}/output/{type}/part-{n}.ndjson` | -| Manifest | `.../bulk/export/jobs/{job_id}/manifest.json` | +| Finalized NDJSON part | `{tenant_id}/exports/{job_id}/{file_type}-{resource_type}-{part_index}-{fencing_token}.ndjson` | + +Bulk-export job state is deliberately not an S3 object model. SQLite and PostgreSQL store the job row, progress, leases/fencing tokens, file metadata, and raw manifest rows. `S3OutputStore` stores only finalized output parts and deletes every object under `{tenant_id}/exports/{job_id}/` during cancellation or retention cleanup. The REST layer assembles the client-facing manifest from the job store plus `ExportOutputStore::download_url`. Bulk submit objects: @@ -1112,12 +1115,14 @@ The SQLite backend includes a complete FHIR search implementation using pre-comp - [x] ReindexableStorage implementation ### Phase 5c: S3 Backend ✓ + - [x] S3BackendConfig with PrefixPerTenant and BucketPerTenant tenancy modes - [x] ResourceStorage implementation (CRUD via S3 objects) - [x] VersionedStorage implementation (vread, optimistic locking) - [x] History providers (instance, type, system via immutable history objects) - [x] Batch and best-effort transaction bundles -- [x] BulkExportStorage implementation (NDJSON parts + manifest in S3) +- [x] ExportDataProvider implementation for system-level bulk export +- [x] S3OutputStore implementation for bulk-export NDJSON output files - [x] BulkSubmitProvider implementation (ingest, raw artifacts, rollback change log) ### Phase 5+: Additional Backends (Planned) @@ -1167,7 +1172,7 @@ The composite storage layer enables polyglot persistence by coordinating multipl | PostgreSQL + Neo4j | PostgreSQL | Neo4j | Planned | Graph-heavy queries | | MongoDB-only | MongoDB | None | ✓ Implemented | Document-centric primary | | MongoDB + ES | MongoDB | Elasticsearch | ✓ Implemented | Document-centric + search | -| S3 alone | S3 | — | ✓ Implemented | Archival/bulk storage | +| S3 alone | S3 | — | ✓ Implemented | Archival/history storage | | S3 + ES | S3 | Elasticsearch | ✓ Implemented | Large-scale + search | ### Quick Start diff --git a/crates/persistence/src/backends/local_fs/mod.rs b/crates/persistence/src/backends/local_fs/mod.rs new file mode 100644 index 000000000..39deb2ac3 --- /dev/null +++ b/crates/persistence/src/backends/local_fs/mod.rs @@ -0,0 +1,217 @@ +//! Local-filesystem [`ExportOutputStore`] for single-instance bulk export. +//! +//! Writes NDJSON output parts under `{root}/{tenant}/{job_id}/` and serves +//! download URLs through HFS itself (`requires_access_token = true`). + +use std::path::{Path, PathBuf}; +use std::time::Duration; + +use async_trait::async_trait; +use tokio::io::{AsyncRead, AsyncWrite}; + +use crate::core::bulk_export::ExportJobId; +use crate::core::bulk_export_output::{ + DownloadUrl, ExportOutputStore, ExportPartKey, ExportPartWriter, FinalizedPart, +}; +use crate::error::{BackendError, StorageError, StorageResult}; +use crate::tenant::TenantContext; + +/// An [`ExportOutputStore`] backed by the local filesystem. +#[derive(Debug, Clone)] +pub struct LocalFsOutputStore { + /// Root directory under which all export output lives. + root: PathBuf, + /// Base URL used to construct HFS-served download URLs. + base_url: String, +} + +impl LocalFsOutputStore { + /// Creates a new local-filesystem output store. + /// + /// `root` is the directory under which `{tenant}/{job_id}/...` is created; + /// `base_url` is the HFS base URL used for download links. + pub fn new(root: impl Into, base_url: impl Into) -> Self { + Self { + root: root.into(), + base_url: base_url.into(), + } + } + + /// The directory holding all parts for a single job. + fn job_dir(&self, tenant_id: &str, job_id: &ExportJobId) -> PathBuf { + self.root.join(tenant_id).join(job_id.as_str()) + } + + /// The final file path for a part. + fn part_path(&self, key: &ExportPartKey) -> PathBuf { + self.job_dir(&key.tenant_id, &key.job_id).join(format!( + "{}-{}-{}-{}.ndjson", + key.file_type, key.resource_type, key.part_index, key.fencing_token + )) + } + + /// The temp file path for an in-flight part. + fn tmp_path(&self, key: &ExportPartKey) -> PathBuf { + let mut p = self.part_path(key); + p.set_extension("ndjson.tmp"); + p + } +} + +fn io_err(message: String) -> StorageError { + StorageError::Backend(BackendError::Internal { + backend_name: "local-fs".to_string(), + message, + source: None, + }) +} + +#[async_trait] +impl ExportOutputStore for LocalFsOutputStore { + async fn open_writer(&self, key: &ExportPartKey) -> StorageResult { + let dir = self.job_dir(&key.tenant_id, &key.job_id); + tokio::fs::create_dir_all(&dir) + .await + .map_err(|e| io_err(format!("create_dir_all {}: {e}", dir.display())))?; + let tmp = self.tmp_path(key); + let file = tokio::fs::File::create(&tmp) + .await + .map_err(|e| io_err(format!("create {}: {e}", tmp.display())))?; + let boxed: std::pin::Pin> = Box::pin(file); + Ok(ExportPartWriter::new(boxed)) + } + + async fn finalize_part( + &self, + key: &ExportPartKey, + mut writer: ExportPartWriter, + ) -> StorageResult { + use tokio::io::AsyncWriteExt; + writer + .writer + .flush() + .await + .map_err(|e| io_err(format!("flush: {e}")))?; + writer + .writer + .shutdown() + .await + .map_err(|e| io_err(format!("shutdown: {e}")))?; + let line_count = writer.line_count; + let byte_count = writer.byte_count; + drop(writer); + + let tmp = self.tmp_path(key); + let final_path = self.part_path(key); + tokio::fs::rename(&tmp, &final_path).await.map_err(|e| { + io_err(format!( + "rename {} -> {}: {e}", + tmp.display(), + final_path.display() + )) + })?; + + Ok(FinalizedPart { + key: key.clone(), + resource_type: key.resource_type.clone(), + line_count, + size_bytes: byte_count, + }) + } + + async fn download_url( + &self, + key: &ExportPartKey, + _ttl: Duration, + ) -> StorageResult { + // HFS-served URL — the download handler resolves {job_id}/{part}. + let base = self.base_url.trim_end_matches('/'); + Ok(DownloadUrl { + url: format!( + "{}/export-file/{}/{}-{}", + base, key.job_id, key.resource_type, key.part_index + ), + requires_access_token: true, + }) + } + + async fn open_reader( + &self, + key: &ExportPartKey, + ) -> StorageResult>> { + let path = self.part_path(key); + let file = tokio::fs::File::open(&path) + .await + .map_err(|e| io_err(format!("open {}: {e}", path.display())))?; + Ok(Box::pin(file)) + } + + async fn delete_job_outputs( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult<()> { + let dir = self.job_dir(tenant.tenant_id().as_str(), job_id); + delete_dir_idempotent(&dir).await + } +} + +/// Removes a directory if it exists; a missing directory is `Ok`. +async fn delete_dir_idempotent(dir: &Path) -> StorageResult<()> { + match tokio::fs::remove_dir_all(dir).await { + Ok(()) => Ok(()), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()), + Err(e) => Err(io_err(format!("remove_dir_all {}: {e}", dir.display()))), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::tenant::{TenantId, TenantPermissions}; + use tokio::io::AsyncReadExt; + + fn test_key(job: &ExportJobId) -> ExportPartKey { + ExportPartKey::output("t1", job.clone(), "Patient", 0, 1) + } + + #[tokio::test] + async fn test_write_finalize_read_delete() { + let tmp = tempfile::tempdir().unwrap(); + let store = LocalFsOutputStore::new(tmp.path(), "http://localhost:8080"); + let job = ExportJobId::new(); + let key = test_key(&job); + + let mut writer = store.open_writer(&key).await.unwrap(); + writer + .write_line(r#"{"resourceType":"Patient","id":"1"}"#) + .await + .unwrap(); + writer + .write_line(r#"{"resourceType":"Patient","id":"2"}"#) + .await + .unwrap(); + let finalized = store.finalize_part(&key, writer).await.unwrap(); + assert_eq!(finalized.line_count, 2); + assert!(finalized.size_bytes > 0); + + let url = store + .download_url(&key, Duration::from_secs(60)) + .await + .unwrap(); + assert!(url.requires_access_token); + assert!(url.url.contains("/export-file/")); + assert!(url.url.contains("Patient-0")); + + let mut reader = store.open_reader(&key).await.unwrap(); + let mut content = String::new(); + reader.read_to_string(&mut content).await.unwrap(); + assert_eq!(content.lines().count(), 2); + + let tenant = TenantContext::new(TenantId::new("t1"), TenantPermissions::full_access()); + store.delete_job_outputs(&tenant, &job).await.unwrap(); + // Idempotent: deleting again is fine. + store.delete_job_outputs(&tenant, &job).await.unwrap(); + assert!(store.open_reader(&key).await.is_err()); + } +} diff --git a/crates/persistence/src/backends/mod.rs b/crates/persistence/src/backends/mod.rs index 0e2e72256..9aa90ee76 100644 --- a/crates/persistence/src/backends/mod.rs +++ b/crates/persistence/src/backends/mod.rs @@ -37,6 +37,9 @@ pub mod sqlite; #[cfg(feature = "postgres")] pub mod postgres; + +/// Local filesystem [`ExportOutputStore`](crate::core::bulk_export_output::ExportOutputStore). +pub mod local_fs; // // #[cfg(feature = "cassandra")] // pub mod cassandra; diff --git a/crates/persistence/src/backends/mongodb/bulk_export.rs b/crates/persistence/src/backends/mongodb/bulk_export.rs new file mode 100644 index 000000000..f5006266d --- /dev/null +++ b/crates/persistence/src/backends/mongodb/bulk_export.rs @@ -0,0 +1,99 @@ +//! Bulk export stub implementations for the MongoDB backend. +//! +//! MongoDB does not yet support bulk export as a primary resource store. +//! These stub impls satisfy the [`ExportDataProvider`] / +//! [`PatientExportProvider`] / [`GroupExportProvider`] trait hierarchy so a +//! MongoDB-primary deployment compiles; every method returns +//! `UnsupportedCapability`. + +use async_trait::async_trait; + +use crate::core::bulk_export::{ + ExportDataProvider, ExportRequest, GroupExportProvider, NdjsonBatch, PatientExportProvider, +}; +use crate::error::{BackendError, StorageError, StorageResult}; +use crate::tenant::TenantContext; + +use super::MongoBackend; + +fn mongo_export_unsupported() -> StorageError { + StorageError::Backend(BackendError::UnsupportedCapability { + backend_name: "mongodb".to_string(), + capability: "bulk-export".to_string(), + }) +} + +#[async_trait] +impl ExportDataProvider for MongoBackend { + async fn list_export_types( + &self, + _tenant: &TenantContext, + _request: &ExportRequest, + ) -> StorageResult> { + Err(mongo_export_unsupported()) + } + + async fn count_export_resources( + &self, + _tenant: &TenantContext, + _request: &ExportRequest, + _resource_type: &str, + ) -> StorageResult { + Err(mongo_export_unsupported()) + } + + async fn fetch_export_batch( + &self, + _tenant: &TenantContext, + _request: &ExportRequest, + _resource_type: &str, + _cursor: Option<&str>, + _batch_size: u32, + ) -> StorageResult { + Err(mongo_export_unsupported()) + } +} + +#[async_trait] +impl PatientExportProvider for MongoBackend { + async fn list_patient_ids( + &self, + _tenant: &TenantContext, + _request: &ExportRequest, + _cursor: Option<&str>, + _batch_size: u32, + ) -> StorageResult<(Vec, Option)> { + Err(mongo_export_unsupported()) + } + + async fn fetch_patient_compartment_batch( + &self, + _tenant: &TenantContext, + _request: &ExportRequest, + _resource_type: &str, + _patient_ids: &[String], + _cursor: Option<&str>, + _batch_size: u32, + ) -> StorageResult { + Err(mongo_export_unsupported()) + } +} + +#[async_trait] +impl GroupExportProvider for MongoBackend { + async fn get_group_members( + &self, + _tenant: &TenantContext, + _group_id: &str, + ) -> StorageResult> { + Err(mongo_export_unsupported()) + } + + async fn resolve_group_patient_ids( + &self, + _tenant: &TenantContext, + _group_id: &str, + ) -> StorageResult> { + Err(mongo_export_unsupported()) + } +} diff --git a/crates/persistence/src/backends/mongodb/mod.rs b/crates/persistence/src/backends/mongodb/mod.rs index 1267f4651..47f3b28bc 100644 --- a/crates/persistence/src/backends/mongodb/mod.rs +++ b/crates/persistence/src/backends/mongodb/mod.rs @@ -16,6 +16,7 @@ //! Advanced search/composite behavior remains part of later phases. mod backend; +mod bulk_export; pub(crate) mod schema; mod search_impl; mod storage; diff --git a/crates/persistence/src/backends/postgres/bulk_export.rs b/crates/persistence/src/backends/postgres/bulk_export.rs index 16fe427d1..25104a6f8 100644 --- a/crates/persistence/src/backends/postgres/bulk_export.rs +++ b/crates/persistence/src/backends/postgres/bulk_export.rs @@ -1,16 +1,22 @@ //! Bulk export implementation for PostgreSQL backend. use async_trait::async_trait; -use chrono::Utc; +use chrono::{DateTime, Utc}; use serde_json::Value; +use std::time::Duration as StdDuration; use crate::core::bulk_export::{ - BulkExportStorage, ExportDataProvider, ExportJobId, ExportLevel, ExportManifest, - ExportOutputFile, ExportProgress, ExportRequest, ExportStatus, GroupExportProvider, - NdjsonBatch, PatientExportProvider, TypeExportProgress, + BulkExportStorage, ExpiredExportRef, ExportDataProvider, ExportFileMetadata, ExportJobId, + ExportJobMetadata, ExportLevel, ExportProgress, ExportRequest, ExportStatus, + GroupExportProvider, NdjsonBatch, PatientExportProvider, RawExportManifest, RawManifestEntry, + StartExportInput, TypeExportProgress, +}; +use crate::core::bulk_export_output::{ExportPartKey, FinalizedPart}; +use crate::core::bulk_export_worker::{ + ExportClaimStrategy, ExportJobLease, ExportWorkerStorage, LeaseError, WorkerId, WorkerJobView, }; use crate::error::{BackendError, BulkExportError, StorageError, StorageResult}; -use crate::tenant::TenantContext; +use crate::tenant::{TenantContext, TenantId, TenantPermissions}; use super::PostgresBackend; @@ -22,60 +28,72 @@ fn internal_error(message: String) -> StorageError { }) } +/// Splits a `{resource_type}-{part_index}` download segment. +fn parse_part_segment(part: &str) -> Option<(String, u32)> { + let idx = part.rfind('-')?; + let resource_type = &part[..idx]; + let part_index: u32 = part[idx + 1..].parse().ok()?; + if resource_type.is_empty() { + return None; + } + Some((resource_type.to_string(), part_index)) +} + +/// Encodes an [`ExportPartKey`] into the `file_path` column. +fn encode_part_path(key: &ExportPartKey) -> String { + format!( + "{}/{}/{}/{}-{}-{}", + key.tenant_id, + key.job_id, + key.file_type, + key.resource_type, + key.part_index, + key.fencing_token + ) +} + #[async_trait] impl BulkExportStorage for PostgresBackend { async fn start_export( &self, tenant: &TenantContext, - request: ExportRequest, + input: StartExportInput, ) -> StorageResult { let client = self.get_client().await?; let tenant_id = tenant.tenant_id().as_str(); - // Check for too many concurrent exports (limit to 5 active exports per tenant) - let row = client - .query_one( - "SELECT COUNT(*) FROM bulk_export_jobs - WHERE tenant_id = $1 AND status IN ('accepted', 'in-progress')", - &[&tenant_id], - ) - .await - .map_err(|e| internal_error(format!("Failed to count active exports: {}", e)))?; - - let active_count: i64 = row.get(0); - if active_count >= 5 { - return Err(StorageError::BulkExport( - BulkExportError::TooManyConcurrentExports { max_concurrent: 5 }, - )); - } - let job_id = ExportJobId::new(); let now = Utc::now(); - let level_str = match &request.level { + let level_str = match &input.request.level { ExportLevel::System => "system".to_string(), ExportLevel::Patient => "patient".to_string(), ExportLevel::Group { .. } => "group".to_string(), }; - let group_id = request.group_id().map(|s| s.to_string()); + let group_id = input.request.group_id().map(|s| s.to_string()); - let request_json = serde_json::to_string(&request) + let request_json = serde_json::to_string(&input.request) .map_err(|e| internal_error(format!("Failed to serialize request: {}", e)))?; + let fhir_version = input.fhir_version.as_mime_param(); client .execute( "INSERT INTO bulk_export_jobs - (id, tenant_id, status, level, group_id, request_json, transaction_time, created_at) - VALUES ($1, $2, 'accepted', $3, $4, $5, $6, $7)", + (id, tenant_id, status, level, group_id, request_json, transaction_time, + created_at, owner_subject, request_url, fhir_version, fencing_token) + VALUES ($1, $2, 'accepted', $3, $4, $5, $6, $7, $8, $9, $10, 0)", &[ &job_id.as_str(), &tenant_id, &level_str.as_str(), &group_id, &request_json.as_str(), + &input.transaction_time, &now, - &now, + &input.owner_subject, + &input.request_url.as_str(), + &fhir_version, ], ) .await @@ -151,9 +169,9 @@ impl BulkExportStorage for PostgresBackend { .iter() .map(|r| TypeExportProgress { resource_type: r.get(0), - total_count: r.get::<_, Option>(1).map(|v| v as u64), - exported_count: r.get::<_, i64>(2) as u64, - error_count: r.get::<_, i64>(3) as u64, + total_count: r.get::<_, Option>(1).map(|v| v as u64), + exported_count: r.get::<_, i32>(2) as u64, + error_count: r.get::<_, i32>(3) as u64, cursor_state: r.get(4), }) .collect(); @@ -247,63 +265,238 @@ impl BulkExportStorage for PostgresBackend { &self, tenant: &TenantContext, job_id: &ExportJobId, - ) -> StorageResult { - let progress = self.get_export_status(tenant, job_id).await?; + ) -> StorageResult { + let client = self.get_client().await?; + let tenant_id = tenant.tenant_id().as_str(); - if progress.status != ExportStatus::Complete { - return Err(StorageError::BulkExport(BulkExportError::InvalidJobState { + let job_rows = client + .query( + "SELECT status, transaction_time, request_url, error_message, completed_at + FROM bulk_export_jobs WHERE id = $1 AND tenant_id = $2", + &[&job_id.as_str(), &tenant_id], + ) + .await + .map_err(|e| internal_error(format!("Failed to get export job: {}", e)))?; + let job_row = job_rows.first().ok_or_else(|| { + StorageError::BulkExport(BulkExportError::JobNotFound { job_id: job_id.to_string(), - expected: "complete".to_string(), - actual: progress.status.to_string(), - })); - } - - let client = self.get_client().await?; + }) + })?; + let status_str: String = job_row.get(0); + let transaction_time: DateTime = job_row.get(1); + let request_url: String = job_row.get(2); + let error_message: Option = job_row.get(3); + let completed_at: Option> = job_row.get(4); + let status: ExportStatus = status_str + .parse() + .map_err(|_| internal_error(format!("Invalid status in database: {}", status_str)))?; let rows = client .query( - "SELECT resource_type, file_path, resource_count, file_type + "SELECT resource_type, resource_count, file_type, part_index, fencing_token FROM bulk_export_files WHERE job_id = $1 - ORDER BY resource_type", + ORDER BY file_type, resource_type, part_index", &[&job_id.as_str()], ) .await .map_err(|e| internal_error(format!("Failed to query files: {}", e)))?; - let mut output_files = Vec::new(); - let mut error_files = Vec::new(); - + let mut output = Vec::new(); + let mut errors = Vec::new(); for row in &rows { let resource_type: String = row.get(0); - let file_path: String = row.get(1); - let count: Option = row.get(2); - let file_type: String = row.get(3); - - let file = ExportOutputFile { + let count: Option = row.get(1); + let file_type: String = row.get(2); + let part_index: i32 = row.get(3); + let fencing_token: i64 = row.get(4); + let key = ExportPartKey { + tenant_id: tenant_id.to_string(), + job_id: job_id.clone(), + resource_type: resource_type.clone(), + file_type: file_type.clone(), + part_index: part_index as u32, + fencing_token: fencing_token as u64, + }; + let entry = RawManifestEntry { resource_type, - url: file_path, - count: count.map(|c| c as u64), + key, + count: count.unwrap_or(0) as u64, }; - if file_type == "error" { - error_files.push(file); + errors.push(entry); } else { - output_files.push(file); + output.push(entry); } } - Ok(ExportManifest { - transaction_time: progress.transaction_time, - request: format!("$export?job={}", job_id), - requires_access_token: true, - output: output_files, - error: error_files, - message: None, - extension: None, + Ok(RawExportManifest { + transaction_time, + request_url, + status, + error_message, + completed_at, + output, + errors, }) } + async fn get_export_job_metadata( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult { + let client = self.get_client().await?; + let tenant_id = tenant.tenant_id().as_str(); + let rows = client + .query( + "SELECT status, level, group_id, owner_subject, transaction_time, + completed_at, request_url + FROM bulk_export_jobs WHERE id = $1 AND tenant_id = $2", + &[&job_id.as_str(), &tenant_id], + ) + .await + .map_err(|e| internal_error(format!("Failed to get export job metadata: {}", e)))?; + let row = rows.first().ok_or_else(|| { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: job_id.to_string(), + }) + })?; + let status_str: String = row.get(0); + let level_str: String = row.get(1); + let group_id: Option = row.get(2); + let owner_subject: Option = row.get(3); + let transaction_time: DateTime = row.get(4); + let completed_at: Option> = row.get(5); + let request_url: String = row.get(6); + let status: ExportStatus = status_str + .parse() + .map_err(|_| internal_error(format!("Invalid status: {}", status_str)))?; + let level = match level_str.as_str() { + "system" => ExportLevel::System, + "patient" => ExportLevel::Patient, + "group" => ExportLevel::Group { + group_id: group_id.unwrap_or_default(), + }, + _ => return Err(internal_error(format!("Invalid level: {}", level_str))), + }; + Ok(ExportJobMetadata { + job_id: job_id.clone(), + status, + level, + owner_subject, + transaction_time, + completed_at, + request_url, + }) + } + + async fn get_export_file_metadata( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + part: &str, + ) -> StorageResult { + let (resource_type, part_index) = parse_part_segment(part).ok_or_else(|| { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: format!("{job_id}/{part}"), + }) + })?; + let client = self.get_client().await?; + let tenant_id = tenant.tenant_id().as_str(); + let rows = client + .query( + "SELECT f.file_type, f.resource_count, f.fencing_token, j.owner_subject + FROM bulk_export_files f + JOIN bulk_export_jobs j ON j.id = f.job_id + WHERE f.job_id = $1 AND j.tenant_id = $2 + AND f.resource_type = $3 AND f.part_index = $4", + &[ + &job_id.as_str(), + &tenant_id, + &resource_type.as_str(), + &(part_index as i32), + ], + ) + .await + .map_err(|e| internal_error(format!("Failed to get file metadata: {}", e)))?; + let row = rows.first().ok_or_else(|| { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: format!("{job_id}/{part}"), + }) + })?; + let file_type: String = row.get(0); + let resource_count: Option = row.get(1); + let fencing_token: i64 = row.get(2); + let owner_subject: Option = row.get(3); + let key = ExportPartKey { + tenant_id: tenant_id.to_string(), + job_id: job_id.clone(), + resource_type: resource_type.clone(), + file_type: file_type.clone(), + part_index, + fencing_token: fencing_token as u64, + }; + Ok(ExportFileMetadata { + key, + resource_type, + file_type, + line_count: resource_count.unwrap_or(0) as u64, + job_owner_subject: owner_subject, + }) + } + + async fn count_active_exports(&self, tenant: &TenantContext) -> StorageResult { + let client = self.get_client().await?; + let tenant_id = tenant.tenant_id().as_str(); + let row = client + .query_one( + "SELECT COUNT(*) FROM bulk_export_jobs + WHERE tenant_id = $1 AND status IN ('accepted', 'in-progress')", + &[&tenant_id], + ) + .await + .map_err(|e| internal_error(format!("Failed to count active exports: {}", e)))?; + let count: i64 = row.get(0); + Ok(count as u64) + } + + async fn list_expired_exports( + &self, + now: DateTime, + output_ttl: StdDuration, + limit: u32, + ) -> StorageResult> { + let client = self.get_client().await?; + let cutoff = now + - chrono::Duration::from_std(output_ttl) + .unwrap_or_else(|_| chrono::Duration::seconds(0)); + let rows = client + .query( + "SELECT tenant_id, id FROM bulk_export_jobs + WHERE status IN ('complete', 'error', 'cancelled') + AND completed_at IS NOT NULL AND completed_at < $1 + ORDER BY completed_at LIMIT $2", + &[&cutoff, &(limit as i64)], + ) + .await + .map_err(|e| internal_error(format!("Failed to query expired exports: {}", e)))?; + Ok(rows + .iter() + .map(|row| { + let tenant_id: String = row.get(0); + let id: String = row.get(1); + ExpiredExportRef { + tenant: TenantContext::new( + TenantId::new(tenant_id), + TenantPermissions::full_access(), + ), + job_id: ExportJobId::from_string(id), + } + }) + .collect()) + } + async fn list_exports( &self, tenant: &TenantContext, @@ -336,6 +529,406 @@ impl BulkExportStorage for PostgresBackend { } } +#[async_trait] +impl ExportClaimStrategy for PostgresBackend { + async fn claim_next( + &self, + worker_id: &WorkerId, + lease_duration: StdDuration, + ) -> StorageResult> { + let mut client = self.get_client().await?; + let now = Utc::now(); + let lease_expiry = now + + chrono::Duration::from_std(lease_duration) + .unwrap_or_else(|_| chrono::Duration::seconds(60)); + + let txn = client + .transaction() + .await + .map_err(|e| internal_error(format!("Failed to begin claim txn: {}", e)))?; + + let rows = txn + .query( + "SELECT id, tenant_id, fencing_token FROM bulk_export_jobs + WHERE status = 'accepted' + OR (status = 'in-progress' AND (lease_expiry IS NULL OR lease_expiry < $1)) + ORDER BY created_at + LIMIT 1 + FOR UPDATE SKIP LOCKED", + &[&now], + ) + .await + .map_err(|e| internal_error(format!("Failed to select claimable job: {}", e)))?; + + let Some(row) = rows.first() else { + txn.commit() + .await + .map_err(|e| internal_error(format!("Failed to commit claim txn: {}", e)))?; + return Ok(None); + }; + let job_id: String = row.get(0); + let tenant_id: String = row.get(1); + let fencing_token: i64 = row.get(2); + let new_token = fencing_token + 1; + + txn.execute( + "UPDATE bulk_export_jobs + SET status = 'in-progress', worker_id = $1, lease_expiry = $2, + heartbeat_at = $3, fencing_token = $4, + started_at = COALESCE(started_at, $3) + WHERE id = $5", + &[ + &worker_id.as_str(), + &lease_expiry, + &now, + &new_token, + &job_id.as_str(), + ], + ) + .await + .map_err(|e| internal_error(format!("Failed to claim export job: {}", e)))?; + + txn.commit() + .await + .map_err(|e| internal_error(format!("Failed to commit claim txn: {}", e)))?; + + Ok(Some(ExportJobLease { + job_id: ExportJobId::from_string(job_id), + tenant: TenantContext::new(TenantId::new(tenant_id), TenantPermissions::full_access()), + worker_id: worker_id.clone(), + lease_expiry, + fencing_token: new_token as u64, + })) + } + + async fn heartbeat(&self, lease: &ExportJobLease) -> Result, LeaseError> { + let client = self.get_client().await.map_err(LeaseError::Storage)?; + let now = Utc::now(); + let new_expiry = now + chrono::Duration::seconds(60); + let affected = client + .execute( + "UPDATE bulk_export_jobs + SET lease_expiry = $1, heartbeat_at = $2 + WHERE id = $3 AND worker_id = $4 AND fencing_token = $5", + &[ + &new_expiry, + &now, + &lease.job_id.as_str(), + &lease.worker_id.as_str(), + &(lease.fencing_token as i64), + ], + ) + .await + .map_err(|e| LeaseError::Storage(internal_error(format!("heartbeat failed: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: lease.job_id.clone(), + }) + } else { + Ok(new_expiry) + } + } + + async fn release(&self, lease: ExportJobLease) -> StorageResult<()> { + let client = self.get_client().await?; + client + .execute( + "UPDATE bulk_export_jobs + SET status = 'accepted', worker_id = NULL, lease_expiry = NULL + WHERE id = $1 AND worker_id = $2 AND fencing_token = $3 + AND status = 'in-progress'", + &[ + &lease.job_id.as_str(), + &lease.worker_id.as_str(), + &(lease.fencing_token as i64), + ], + ) + .await + .map_err(|e| internal_error(format!("Failed to release lease: {}", e)))?; + Ok(()) + } +} + +#[async_trait] +impl ExportWorkerStorage for PostgresBackend { + async fn get_export_job_for_worker( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result { + let client = self.get_client().await.map_err(LeaseError::Storage)?; + let tenant_id = tenant.tenant_id().as_str(); + let rows = client + .query( + "SELECT request_json, level, group_id, transaction_time, fhir_version + FROM bulk_export_jobs + WHERE id = $1 AND tenant_id = $2 AND worker_id = $3 AND fencing_token = $4", + &[ + &job_id.as_str(), + &tenant_id, + &worker_id.as_str(), + &(fencing_token as i64), + ], + ) + .await + .map_err(|e| LeaseError::Storage(internal_error(format!("load worker job: {e}"))))?; + let row = rows.first().ok_or_else(|| LeaseError::LeaseLost { + job_id: job_id.clone(), + })?; + let request_json: String = row.get(0); + let level_str: String = row.get(1); + let group_id: Option = row.get(2); + let transaction_time: DateTime = row.get(3); + let fhir_version_str: String = row.get(4); + + let request: ExportRequest = serde_json::from_str(&request_json) + .map_err(|e| LeaseError::Storage(internal_error(format!("parse request_json: {e}"))))?; + let level = match level_str.as_str() { + "system" => ExportLevel::System, + "patient" => ExportLevel::Patient, + "group" => ExportLevel::Group { + group_id: group_id.unwrap_or_default(), + }, + _ => { + return Err(LeaseError::Storage(internal_error(format!( + "Invalid level: {level_str}" + )))); + } + }; + let fhir_version = + helios_fhir::FhirVersion::from_mime_param(&fhir_version_str).unwrap_or_default(); + + let progress_rows = client + .query( + "SELECT resource_type, total_count, exported_count, error_count, cursor_state + FROM bulk_export_progress WHERE job_id = $1", + &[&job_id.as_str()], + ) + .await + .map_err(|e| LeaseError::Storage(internal_error(format!("query progress: {e}"))))?; + let type_progress: Vec = progress_rows + .iter() + .map(|r| TypeExportProgress { + resource_type: r.get(0), + total_count: r.get::<_, Option>(1).map(|v| v as u64), + exported_count: r.get::<_, i32>(2) as u64, + error_count: r.get::<_, i32>(3) as u64, + cursor_state: r.get(4), + }) + .collect(); + + Ok(WorkerJobView { + request, + level, + transaction_time, + fhir_version, + type_progress, + }) + } + + async fn mark_export_in_progress( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result<(), LeaseError> { + let client = self.get_client().await.map_err(LeaseError::Storage)?; + let now = Utc::now(); + let affected = client + .execute( + "UPDATE bulk_export_jobs + SET status = 'in-progress', started_at = COALESCE(started_at, $1) + WHERE id = $2 AND tenant_id = $3 AND worker_id = $4 AND fencing_token = $5", + &[ + &now, + &job_id.as_str(), + &tenant.tenant_id().as_str(), + &worker_id.as_str(), + &(fencing_token as i64), + ], + ) + .await + .map_err(|e| LeaseError::Storage(internal_error(format!("mark_in_progress: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn update_export_type_progress( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + progress: &TypeExportProgress, + ) -> Result<(), LeaseError> { + let client = self.get_client().await.map_err(LeaseError::Storage)?; + let affected = client + .execute( + "INSERT INTO bulk_export_progress + (job_id, resource_type, total_count, exported_count, error_count, cursor_state) + SELECT $1, $2, $3, $4, $5, $6 + WHERE EXISTS ( + SELECT 1 FROM bulk_export_jobs + WHERE id = $1 AND tenant_id = $7 AND worker_id = $8 AND fencing_token = $9 + ) + ON CONFLICT (job_id, resource_type) DO UPDATE SET + total_count = EXCLUDED.total_count, + exported_count = EXCLUDED.exported_count, + error_count = EXCLUDED.error_count, + cursor_state = EXCLUDED.cursor_state", + &[ + &job_id.as_str(), + &progress.resource_type.as_str(), + &progress.total_count.map(|v| v as i32), + &(progress.exported_count as i32), + &(progress.error_count as i32), + &progress.cursor_state, + &tenant.tenant_id().as_str(), + &worker_id.as_str(), + &(fencing_token as i64), + ], + ) + .await + .map_err(|e| { + LeaseError::Storage(internal_error(format!("update_type_progress: {e}"))) + })?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn record_export_file( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + part: &FinalizedPart, + file_type: &str, + ) -> Result<(), LeaseError> { + let client = self.get_client().await.map_err(LeaseError::Storage)?; + let file_path = encode_part_path(&part.key); + let affected = client + .execute( + "INSERT INTO bulk_export_files + (job_id, resource_type, file_type, file_path, resource_count, byte_count, + part_index, fencing_token) + SELECT $1, $2, $3, $4, $5, $6, $7, $8 + WHERE EXISTS ( + SELECT 1 FROM bulk_export_jobs + WHERE id = $1 AND tenant_id = $9 AND worker_id = $10 AND fencing_token = $11 + ) + ON CONFLICT (job_id, file_type, resource_type, part_index) DO UPDATE SET + file_path = EXCLUDED.file_path, + resource_count = EXCLUDED.resource_count, + byte_count = EXCLUDED.byte_count, + fencing_token = EXCLUDED.fencing_token", + &[ + &job_id.as_str(), + &part.resource_type.as_str(), + &file_type, + &file_path.as_str(), + &(part.line_count as i32), + &(part.size_bytes as i64), + &(part.key.part_index as i32), + &(part.key.fencing_token as i64), + &tenant.tenant_id().as_str(), + &worker_id.as_str(), + &(fencing_token as i64), + ], + ) + .await + .map_err(|e| LeaseError::Storage(internal_error(format!("record_export_file: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn finish_export_job( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result<(), LeaseError> { + let client = self.get_client().await.map_err(LeaseError::Storage)?; + let now = Utc::now(); + let affected = client + .execute( + "UPDATE bulk_export_jobs + SET status = 'complete', completed_at = $1 + WHERE id = $2 AND tenant_id = $3 AND worker_id = $4 AND fencing_token = $5", + &[ + &now, + &job_id.as_str(), + &tenant.tenant_id().as_str(), + &worker_id.as_str(), + &(fencing_token as i64), + ], + ) + .await + .map_err(|e| LeaseError::Storage(internal_error(format!("finish_job: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn fail_export_job( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + error_message: &str, + ) -> Result<(), LeaseError> { + let client = self.get_client().await.map_err(LeaseError::Storage)?; + let now = Utc::now(); + let affected = client + .execute( + "UPDATE bulk_export_jobs + SET status = 'error', error_message = $1, completed_at = $2 + WHERE id = $3 AND tenant_id = $4 AND worker_id = $5 AND fencing_token = $6", + &[ + &error_message, + &now, + &job_id.as_str(), + &tenant.tenant_id().as_str(), + &worker_id.as_str(), + &(fencing_token as i64), + ], + ) + .await + .map_err(|e| LeaseError::Storage(internal_error(format!("fail_job: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } +} + #[async_trait] impl ExportDataProvider for PostgresBackend { async fn list_export_types( @@ -451,13 +1044,15 @@ impl ExportDataProvider for PostgresBackend { if let Some(cursor) = cursor { let parts: Vec<&str> = cursor.splitn(2, '|').collect(); if parts.len() == 2 { - sql.push_str(&format!( - " AND (last_updated, id) > (${}, ${})", - param_idx, - param_idx + 1 - )); - params.push(Box::new(parts[0].to_string())); - params.push(Box::new(parts[1].to_string())); + if let Ok(dt) = DateTime::parse_from_rfc3339(parts[0]) { + sql.push_str(&format!( + " AND (last_updated, id) > (${}, ${})", + param_idx, + param_idx + 1 + )); + params.push(Box::new(dt.with_timezone(&Utc))); + params.push(Box::new(parts[1].to_string())); + } } } @@ -588,13 +1183,15 @@ impl PatientExportProvider for PostgresBackend { if let Some(cursor) = cursor { let parts: Vec<&str> = cursor.splitn(2, '|').collect(); if parts.len() == 2 { - sql.push_str(&format!( - " AND (last_updated, id) > (${}, ${})", - param_idx, - param_idx + 1 - )); - params.push(Box::new(parts[0].to_string())); - params.push(Box::new(parts[1].to_string())); + if let Ok(dt) = DateTime::parse_from_rfc3339(parts[0]) { + sql.push_str(&format!( + " AND (last_updated, id) > (${}, ${})", + param_idx, + param_idx + 1 + )); + params.push(Box::new(dt.with_timezone(&Utc))); + params.push(Box::new(parts[1].to_string())); + } } } @@ -675,13 +1272,15 @@ impl PatientExportProvider for PostgresBackend { if let Some(cursor) = cursor { let parts: Vec<&str> = cursor.splitn(2, '|').collect(); if parts.len() == 2 { - sql.push_str(&format!( - " AND (r.last_updated, r.id) > (${}, ${})", - param_idx, - param_idx + 1 - )); - params.push(Box::new(parts[0].to_string())); - params.push(Box::new(parts[1].to_string())); + if let Ok(dt) = DateTime::parse_from_rfc3339(parts[0]) { + sql.push_str(&format!( + " AND (r.last_updated, r.id) > (${}, ${})", + param_idx, + param_idx + 1 + )); + params.push(Box::new(dt.with_timezone(&Utc))); + params.push(Box::new(parts[1].to_string())); + } } } @@ -775,16 +1374,79 @@ impl GroupExportProvider for PostgresBackend { tenant: &TenantContext, group_id: &str, ) -> StorageResult> { - let members = self.get_group_members(tenant, group_id).await?; - - let mut patient_ids = Vec::new(); - for member_ref in &members { - // Extract patient ID from "Patient/123" format - if let Some(id) = member_ref.strip_prefix("Patient/") { - patient_ids.push(id.to_string()); + // Flatten nested Groups iteratively, guarding against membership + // cycles with a visited set. + use std::collections::HashSet; + let mut visited_groups: HashSet = HashSet::new(); + let mut seen_patients: HashSet = HashSet::new(); + let mut patient_ids: Vec = Vec::new(); + let mut worklist: Vec = vec![group_id.to_string()]; + + while let Some(gid) = worklist.pop() { + if !visited_groups.insert(gid.clone()) { + continue; // cycle / already processed + } + let members = self.get_group_members(tenant, &gid).await?; + for member_ref in &members { + if let Some(id) = member_ref.strip_prefix("Patient/") { + if seen_patients.insert(id.to_string()) { + patient_ids.push(id.to_string()); + } + } else if let Some(nested) = member_ref.strip_prefix("Group/") { + worklist.push(nested.to_string()); + } } } Ok(patient_ids) } + + async fn get_group_members_with_periods( + &self, + tenant: &TenantContext, + group_id: &str, + ) -> StorageResult>)>> { + let client = self.get_client().await?; + let tenant_id = tenant.tenant_id().as_str(); + let rows = client + .query( + "SELECT data FROM resources + WHERE tenant_id = $1 AND resource_type = 'Group' + AND id = $2 AND is_deleted = false", + &[&tenant_id, &group_id], + ) + .await + .map_err(|e| internal_error(format!("Failed to get group: {}", e)))?; + let row = rows.first().ok_or_else(|| { + StorageError::BulkExport(BulkExportError::GroupNotFound { + group_id: group_id.to_string(), + }) + })?; + let data: Vec = row.get(0); + let group: Value = serde_json::from_slice(&data) + .map_err(|e| internal_error(format!("Failed to parse group: {}", e)))?; + let mut out = Vec::new(); + if let Some(arr) = group.get("member").and_then(|m| m.as_array()) { + for member in arr { + let Some(reference) = member + .get("entity") + .and_then(|e| e.get("reference")) + .and_then(|r| r.as_str()) + else { + continue; + }; + let period_start = member + .get("period") + .and_then(|p| p.get("start")) + .and_then(|s| s.as_str()) + .and_then(|s| { + DateTime::parse_from_rfc3339(s) + .ok() + .map(|dt| dt.with_timezone(&Utc)) + }); + out.push((reference.to_string(), period_start)); + } + } + Ok(out) + } } diff --git a/crates/persistence/src/backends/postgres/schema.rs b/crates/persistence/src/backends/postgres/schema.rs index 92a91b732..888a852b4 100644 --- a/crates/persistence/src/backends/postgres/schema.rs +++ b/crates/persistence/src/backends/postgres/schema.rs @@ -3,7 +3,7 @@ use crate::error::{BackendError, StorageResult}; /// Current schema version. -pub const SCHEMA_VERSION: i32 = 7; +pub const SCHEMA_VERSION: i32 = 8; /// Initialize the database schema. pub async fn initialize_schema(client: &deadpool_postgres::Client) -> StorageResult<()> { @@ -269,6 +269,7 @@ async fn migrate_schema( 4 => migrate_v4_to_v5(client).await?, 5 => migrate_v5_to_v6(client).await?, 6 => migrate_v6_to_v7(client).await?, + 7 => migrate_v7_to_v8(client).await?, _ => { return Err(pg_error(format!("Unknown schema version: {}", version))); } @@ -581,6 +582,55 @@ async fn migrate_v6_to_v7(client: &deadpool_postgres::Client) -> StorageResult<( Ok(()) } +/// v7 -> v8: Add bulk-export worker/lease support. +async fn migrate_v7_to_v8(client: &deadpool_postgres::Client) -> StorageResult<()> { + let migrations = [ + "ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS worker_id TEXT", + "ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS lease_expiry TIMESTAMPTZ", + "ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS fencing_token BIGINT NOT NULL DEFAULT 0", + "ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS heartbeat_at TIMESTAMPTZ", + "ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS owner_subject TEXT", + "ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS request_url TEXT NOT NULL DEFAULT ''", + "ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS fhir_version TEXT NOT NULL DEFAULT '4.0'", + "ALTER TABLE bulk_export_files ADD COLUMN IF NOT EXISTS part_index INTEGER NOT NULL DEFAULT 0", + "ALTER TABLE bulk_export_files ADD COLUMN IF NOT EXISTS fencing_token BIGINT NOT NULL DEFAULT 0", + ]; + for sql in &migrations { + client + .execute(*sql, &[]) + .await + .map_err(|e| pg_error(format!("Migration v7->v8 failed: {}", e)))?; + } + + // Backfill part_index: 0-based sequential per (job_id, file_type, resource_type). + client + .execute( + "UPDATE bulk_export_files SET part_index = sub.rn FROM ( + SELECT id, ROW_NUMBER() OVER ( + PARTITION BY job_id, file_type, resource_type ORDER BY id + ) - 1 AS rn FROM bulk_export_files + ) sub WHERE bulk_export_files.id = sub.id", + &[], + ) + .await + .map_err(|e| pg_error(format!("Migration v7->v8 backfill failed: {}", e)))?; + + let indexes = [ + "CREATE INDEX IF NOT EXISTS idx_export_jobs_claim + ON bulk_export_jobs(tenant_id, status, lease_expiry)", + "CREATE UNIQUE INDEX IF NOT EXISTS idx_export_files_part + ON bulk_export_files(job_id, file_type, resource_type, part_index)", + ]; + for sql in &indexes { + client + .execute(*sql, &[]) + .await + .map_err(|e| pg_error(format!("Migration v7->v8 index failed: {}", e)))?; + } + + Ok(()) +} + fn pg_error(message: String) -> crate::error::StorageError { crate::error::StorageError::Backend(BackendError::Internal { backend_name: "postgres".to_string(), diff --git a/crates/persistence/src/backends/s3/bulk_export.rs b/crates/persistence/src/backends/s3/bulk_export.rs index 554dd0ebc..161d00c80 100644 --- a/crates/persistence/src/backends/s3/bulk_export.rs +++ b/crates/persistence/src/backends/s3/bulk_export.rs @@ -1,180 +1,28 @@ -//! Bulk export implementation for the S3 backend. +//! Bulk export data provider for the S3 backend. //! -//! Implements `BulkExportStorage` and `ExportDataProvider`. Export jobs are -//! persisted as a small JSON state object in S3 and run synchronously within -//! the `start_export` call, writing NDJSON output parts directly to S3. +//! The S3 backend is **output-only** for bulk export: it provides +//! [`ExportDataProvider`] (feeding export batches when S3 is the resource +//! store) but does not implement `BulkExportStorage` — job state lives in the +//! SQLite or Postgres job store, never S3. use std::collections::BTreeSet; use async_trait::async_trait; -use chrono::Utc; use crate::core::bulk_export::{ - BulkExportStorage, ExportDataProvider, ExportJobId, ExportManifest, ExportOutputFile, - ExportProgress, ExportRequest, ExportStatus, NdjsonBatch, TypeExportProgress, + ExportDataProvider, ExportRequest, GroupExportProvider, NdjsonBatch, PatientExportProvider, }; -use crate::error::{BulkExportError, StorageError, StorageResult}; +use crate::error::{BackendError, BulkExportError, StorageError, StorageResult}; use crate::tenant::TenantContext; -use super::backend::{S3Backend, TenantLocation}; -use super::models::ExportJobState; +use super::backend::S3Backend; -#[async_trait] -impl BulkExportStorage for S3Backend { - async fn start_export( - &self, - tenant: &TenantContext, - request: ExportRequest, - ) -> StorageResult { - if request.output_format != "application/fhir+ndjson" { - return Err(StorageError::BulkExport( - BulkExportError::UnsupportedFormat { - format: request.output_format, - }, - )); - } - - let active_exports = self.list_exports(tenant, false).await?; - if active_exports.len() >= 5 { - return Err(StorageError::BulkExport( - BulkExportError::TooManyConcurrentExports { max_concurrent: 5 }, - )); - } - - let job_id = ExportJobId::new(); - let progress = ExportProgress::accepted(job_id.clone(), request.level.clone(), Utc::now()); - let state = ExportJobState { - request, - progress, - manifest: None, - }; - - self.save_export_state(tenant, &job_id, &state).await?; - - if let Err(err) = self.run_export_job(tenant, &job_id).await { - let _ = self - .mark_export_failed(tenant, &job_id, &err.to_string()) - .await; - } - - Ok(job_id) - } - - async fn get_export_status( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - ) -> StorageResult { - Ok(self.load_export_state(tenant, job_id).await?.progress) - } - - async fn cancel_export( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - ) -> StorageResult<()> { - let mut state = self.load_export_state(tenant, job_id).await?; - - if state.progress.status.is_terminal() { - return Err(StorageError::BulkExport(BulkExportError::InvalidJobState { - job_id: job_id.to_string(), - expected: "accepted or in-progress".to_string(), - actual: state.progress.status.to_string(), - })); - } - - state.progress.status = ExportStatus::Cancelled; - state.progress.completed_at = Some(Utc::now()); - state.progress.error_message = None; - state.progress.current_type = None; - - self.save_export_state(tenant, job_id, &state).await - } - - async fn delete_export( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - ) -> StorageResult<()> { - let location = self.tenant_location(tenant)?; - - if !self.export_job_exists(&location, job_id).await? { - return Err(StorageError::BulkExport(BulkExportError::JobNotFound { - job_id: job_id.to_string(), - })); - } - - let prefix = location.keyspace.export_job_prefix(job_id.as_str()); - for object in self.list_objects_all(&location.bucket, &prefix).await? { - self.delete_object(&location.bucket, &object.key).await?; - } - - Ok(()) - } - - async fn get_export_manifest( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - ) -> StorageResult { - let state = self.load_export_state(tenant, job_id).await?; - - if state.progress.status != ExportStatus::Complete { - return Err(StorageError::BulkExport(BulkExportError::InvalidJobState { - job_id: job_id.to_string(), - expected: "complete".to_string(), - actual: state.progress.status.to_string(), - })); - } - - if let Some(manifest) = state.manifest { - return Ok(manifest); - } - - let location = self.tenant_location(tenant)?; - let manifest_key = location.keyspace.export_job_manifest_key(job_id.as_str()); - let manifest = self - .get_json_object::(&location.bucket, &manifest_key) - .await? - .map(|(manifest, _)| manifest) - .ok_or_else(|| { - StorageError::BulkExport(BulkExportError::InvalidJobState { - job_id: job_id.to_string(), - expected: "complete with manifest".to_string(), - actual: "complete-without-manifest".to_string(), - }) - })?; - - Ok(manifest) - } - - async fn list_exports( - &self, - tenant: &TenantContext, - include_completed: bool, - ) -> StorageResult> { - let location = self.tenant_location(tenant)?; - let prefix = location.keyspace.export_jobs_prefix(); - - let mut exports = Vec::new(); - for object in self.list_objects_all(&location.bucket, &prefix).await? { - if !object.key.ends_with("/state.json") { - continue; - } - - if let Some((state, _)) = self - .get_json_object::(&location.bucket, &object.key) - .await? - { - if include_completed || state.progress.status.is_active() { - exports.push(state.progress); - } - } - } - - exports.sort_by_key(|e| std::cmp::Reverse(e.transaction_time)); - Ok(exports) - } +/// Error for export-level operations S3 does not support as a primary. +fn s3_export_unsupported() -> StorageError { + StorageError::Backend(BackendError::UnsupportedCapability { + backend_name: "s3".to_string(), + capability: "patient/group bulk export".to_string(), + }) } #[async_trait] @@ -238,6 +86,11 @@ impl ExportDataProvider for S3Backend { continue; } } + if let Some(until) = request.until { + if resource.last_modified() > until { + continue; + } + } count += 1; } @@ -277,6 +130,11 @@ impl ExportDataProvider for S3Backend { continue; } } + if let Some(until) = request.until { + if resource.last_modified() > until { + continue; + } + } lines.push(serde_json::to_string(resource.content()).map_err(|e| { StorageError::BulkExport(BulkExportError::WriteError { @@ -301,245 +159,6 @@ impl ExportDataProvider for S3Backend { } } -impl S3Backend { - /// Drives a bulk export job to completion. - /// - /// Iterates over all matching resource types, fetches them in batches, and - /// writes NDJSON output parts to S3. Updates the job state object after - /// each type completes and writes the final manifest on success. - async fn run_export_job( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - ) -> StorageResult<()> { - let location = self.tenant_location(tenant)?; - let mut state = self.load_export_state(tenant, job_id).await?; - - state.progress.status = ExportStatus::InProgress; - state.progress.started_at = Some(Utc::now()); - state.progress.error_message = None; - state.progress.current_type = None; - state.progress.type_progress.clear(); - - self.save_export_state(tenant, job_id, &state).await?; - - let resource_types = self.list_export_types(tenant, &state.request).await?; - let mut output_files: Vec = Vec::new(); - - for resource_type in resource_types { - state.progress.current_type = Some(resource_type.clone()); - self.save_export_state(tenant, job_id, &state).await?; - - let mut type_progress = TypeExportProgress::new(resource_type.clone()); - type_progress.total_count = Some( - self.count_export_resources(tenant, &state.request, &resource_type) - .await?, - ); - - let mut cursor: Option = None; - let mut part_lines: Vec = Vec::new(); - let mut part_number: u32 = 1; - - loop { - let batch = self - .fetch_export_batch( - tenant, - &state.request, - &resource_type, - cursor.as_deref(), - state.request.batch_size.max(1), - ) - .await?; - - for line in batch.lines { - part_lines.push(line); - if part_lines.len() >= self.config.bulk_export_part_size as usize { - let written = self - .write_export_part( - &location, - job_id, - &resource_type, - part_number, - &part_lines, - ) - .await?; - output_files.push(written); - type_progress.exported_count += part_lines.len() as u64; - type_progress.cursor_state = batch.next_cursor.clone(); - self.save_export_type_progress(&location, job_id, &type_progress) - .await?; - part_lines.clear(); - part_number += 1; - } - } - - cursor = batch.next_cursor; - if batch.is_last { - break; - } - } - - if !part_lines.is_empty() { - let written = self - .write_export_part(&location, job_id, &resource_type, part_number, &part_lines) - .await?; - output_files.push(written); - type_progress.exported_count += part_lines.len() as u64; - part_lines.clear(); - } - - type_progress.cursor_state = None; - self.save_export_type_progress(&location, job_id, &type_progress) - .await?; - state.progress.type_progress.push(type_progress); - } - - state.progress.status = ExportStatus::Complete; - state.progress.completed_at = Some(Utc::now()); - state.progress.current_type = None; - state.progress.error_message = None; - - let manifest = ExportManifest { - transaction_time: state.progress.transaction_time, - request: format!("$export?job={}", job_id), - requires_access_token: true, - output: output_files, - error: Vec::new(), - message: None, - extension: None, - }; - - state.manifest = Some(manifest.clone()); - - let manifest_key = location.keyspace.export_job_manifest_key(job_id.as_str()); - let manifest_payload = self.serialize_json(&manifest)?; - self.put_json_object( - &location.bucket, - &manifest_key, - &manifest_payload, - None, - None, - ) - .await?; - - self.save_export_state(tenant, job_id, &state).await - } - - /// Writes a single NDJSON output part to S3 and returns an - /// `ExportOutputFile` describing the S3 location and line count. - async fn write_export_part( - &self, - location: &TenantLocation, - job_id: &ExportJobId, - resource_type: &str, - part_number: u32, - lines: &[String], - ) -> StorageResult { - let key = - location - .keyspace - .export_job_output_key(job_id.as_str(), resource_type, part_number); - let mut body = lines.join("\n"); - body.push('\n'); - - self.put_bytes_object( - &location.bucket, - &key, - body.as_bytes(), - Some("application/fhir+ndjson"), - ) - .await?; - - Ok( - ExportOutputFile::new(resource_type, format!("s3://{}/{}", location.bucket, key)) - .with_count(lines.len() as u64), - ) - } - - /// Returns `true` if the job state object exists in S3. - async fn export_job_exists( - &self, - location: &TenantLocation, - job_id: &ExportJobId, - ) -> StorageResult { - let key = location.keyspace.export_job_state_key(job_id.as_str()); - Ok(self - .client - .head_object(&location.bucket, &key) - .await - .map_err(|e| self.map_client_error(e))? - .is_some()) - } - - /// Loads and deserialises the export job state from S3. - /// - /// Returns `JobNotFound` if the state object does not exist. - async fn load_export_state( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - ) -> StorageResult { - let location = self.tenant_location(tenant)?; - let key = location.keyspace.export_job_state_key(job_id.as_str()); - self.get_json_object::(&location.bucket, &key) - .await? - .map(|(state, _)| state) - .ok_or_else(|| { - StorageError::BulkExport(BulkExportError::JobNotFound { - job_id: job_id.to_string(), - }) - }) - } - - /// Serialises and writes the export job state to S3. - async fn save_export_state( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - state: &ExportJobState, - ) -> StorageResult<()> { - let location = self.tenant_location(tenant)?; - let key = location.keyspace.export_job_state_key(job_id.as_str()); - let payload = self.serialize_json(state)?; - self.put_json_object(&location.bucket, &key, &payload, None, None) - .await?; - Ok(()) - } - - /// Transitions the export job to the `Error` state, recording the failure - /// message in the state object. - async fn mark_export_failed( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - message: &str, - ) -> StorageResult<()> { - let mut state = self.load_export_state(tenant, job_id).await?; - state.progress.status = ExportStatus::Error; - state.progress.completed_at = Some(Utc::now()); - state.progress.current_type = None; - state.progress.error_message = Some(message.to_string()); - self.save_export_state(tenant, job_id, &state).await - } - - /// Writes per-type export progress to S3 so that partial completion can be - /// inspected before the job finishes. - async fn save_export_type_progress( - &self, - location: &TenantLocation, - job_id: &ExportJobId, - progress: &TypeExportProgress, - ) -> StorageResult<()> { - let key = location - .keyspace - .export_job_progress_key(job_id.as_str(), &progress.resource_type); - let payload = self.serialize_json(progress)?; - self.put_json_object(&location.bucket, &key, &payload, None, None) - .await?; - Ok(()) - } -} - /// Parses the numeric offset encoded in an export batch cursor. /// /// A `None` cursor is treated as offset `0` (start of the result set). @@ -563,3 +182,51 @@ fn parse_resource_type_from_current_key(key: &str) -> Option { let resources_idx = parts.iter().position(|segment| *segment == "resources")?; parts.get(resources_idx + 1).map(|s| s.to_string()) } + +// S3 is output-only for bulk export; patient/group compartment enumeration is +// not supported when S3 is the resource store. These stub impls satisfy the +// trait hierarchy so S3 can be a primary backend. + +#[async_trait] +impl PatientExportProvider for S3Backend { + async fn list_patient_ids( + &self, + _tenant: &TenantContext, + _request: &ExportRequest, + _cursor: Option<&str>, + _batch_size: u32, + ) -> StorageResult<(Vec, Option)> { + Err(s3_export_unsupported()) + } + + async fn fetch_patient_compartment_batch( + &self, + _tenant: &TenantContext, + _request: &ExportRequest, + _resource_type: &str, + _patient_ids: &[String], + _cursor: Option<&str>, + _batch_size: u32, + ) -> StorageResult { + Err(s3_export_unsupported()) + } +} + +#[async_trait] +impl GroupExportProvider for S3Backend { + async fn get_group_members( + &self, + _tenant: &TenantContext, + _group_id: &str, + ) -> StorageResult> { + Err(s3_export_unsupported()) + } + + async fn resolve_group_patient_ids( + &self, + _tenant: &TenantContext, + _group_id: &str, + ) -> StorageResult> { + Err(s3_export_unsupported()) + } +} diff --git a/crates/persistence/src/backends/s3/client.rs b/crates/persistence/src/backends/s3/client.rs index 3a9f5244e..5f343df76 100644 --- a/crates/persistence/src/backends/s3/client.rs +++ b/crates/persistence/src/backends/s3/client.rs @@ -129,6 +129,21 @@ pub trait S3Api: Send + Sync { continuation: Option<&str>, max_keys: Option, ) -> Result; + + /// Generates a pre-signed `GET` URL for `key`, valid for `ttl`. + /// + /// The default implementation reports the capability as unsupported; + /// [`AwsS3Client`] overrides it using the AWS SDK presigner. + async fn presign_get( + &self, + _bucket: &str, + _key: &str, + _ttl: std::time::Duration, + ) -> Result { + Err(S3ClientError::Internal( + "pre-signed URLs are not supported by this S3 client".to_string(), + )) + } } /// Production `S3Api` implementation backed by the AWS SDK. @@ -138,9 +153,12 @@ pub struct AwsS3Client { client: Client, } +/// S3-compatible endpoint overrides for [`AwsS3Client`]. #[derive(Debug, Clone, Default)] pub struct AwsS3ClientOptions { + /// Override the endpoint URL (for MinIO, R2, GCS interop, etc.). pub endpoint_url: Option, + /// Force path-style addressing (`bucket` in path, not subdomain). pub force_path_style: bool, } @@ -357,6 +375,25 @@ impl S3Api for AwsS3Client { next_continuation_token: out.next_continuation_token().map(|s| s.to_string()), }) } + + async fn presign_get( + &self, + bucket: &str, + key: &str, + ttl: std::time::Duration, + ) -> Result { + let presign_config = aws_sdk_s3::presigning::PresigningConfig::expires_in(ttl) + .map_err(|e| S3ClientError::Internal(format!("invalid presign TTL: {e}")))?; + let presigned = self + .client + .get_object() + .bucket(bucket) + .key(key) + .presigned(presign_config) + .await + .map_err(map_sdk_error)?; + Ok(presigned.uri().to_string()) + } } /// Maps an AWS SDK error to the normalised `S3ClientError` taxonomy. diff --git a/crates/persistence/src/backends/s3/keyspace.rs b/crates/persistence/src/backends/s3/keyspace.rs index 5b2089bcc..faa9ee78e 100644 --- a/crates/persistence/src/backends/s3/keyspace.rs +++ b/crates/persistence/src/backends/s3/keyspace.rs @@ -138,51 +138,6 @@ impl S3Keyspace { self.join(&["history", "system/"]) } - /// Key for the JSON state object of a bulk export job. - pub fn export_job_state_key(&self, job_id: &str) -> String { - self.join(&["bulk", "export", "jobs", job_id, "state.json"]) - } - - /// Key for per-type export progress within a job. - pub fn export_job_progress_key(&self, job_id: &str, resource_type: &str) -> String { - self.join(&[ - "bulk", - "export", - "jobs", - job_id, - "progress", - &format!("{}.json", resource_type), - ]) - } - - /// Key for the completed export manifest of a job. - pub fn export_job_manifest_key(&self, job_id: &str) -> String { - self.join(&["bulk", "export", "jobs", job_id, "manifest.json"]) - } - - /// Key for a single NDJSON output part within an export job. - pub fn export_job_output_key(&self, job_id: &str, resource_type: &str, part: u32) -> String { - self.join(&[ - "bulk", - "export", - "jobs", - job_id, - "output", - resource_type, - &format!("part-{}.ndjson", part), - ]) - } - - /// Prefix covering all export job objects. - pub fn export_jobs_prefix(&self) -> String { - self.join(&["bulk", "export", "jobs/"]) - } - - /// Prefix covering all objects belonging to a single export job. - pub fn export_job_prefix(&self, job_id: &str) -> String { - self.join(&["bulk", "export", "jobs", job_id, "/"]) - } - /// Key for the JSON state object of a bulk submission. pub fn submit_state_key(&self, submitter: &str, submission_id: &str) -> String { self.join(&["bulk", "submit", submitter, submission_id, "state.json"]) diff --git a/crates/persistence/src/backends/s3/mod.rs b/crates/persistence/src/backends/s3/mod.rs index 741e5d611..d43c61aba 100644 --- a/crates/persistence/src/backends/s3/mod.rs +++ b/crates/persistence/src/backends/s3/mod.rs @@ -12,10 +12,13 @@ mod client; mod config; mod keyspace; mod models; +mod output_store; mod storage; pub use backend::S3Backend; +pub use client::{AwsS3Client, AwsS3ClientOptions, S3Api}; pub use config::{S3BackendConfig, S3TenancyMode}; +pub use output_store::{AccessTokenMode, S3OutputStore}; #[cfg(test)] mod tests; diff --git a/crates/persistence/src/backends/s3/models.rs b/crates/persistence/src/backends/s3/models.rs index 02c632ee1..41789ab0f 100644 --- a/crates/persistence/src/backends/s3/models.rs +++ b/crates/persistence/src/backends/s3/models.rs @@ -1,5 +1,5 @@ -//! S3-specific persistence models for history indexing, bulk export job -//! state, and bulk submission state. +//! S3-specific persistence models for history indexing and bulk submission +//! state. //! //! These types are serialised as JSON objects in S3 and are never exposed //! outside the `s3` backend module. @@ -7,7 +7,6 @@ use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; -use crate::core::bulk_export::{ExportManifest, ExportProgress, ExportRequest}; use crate::core::bulk_submit::{SubmissionManifest, SubmissionSummary}; use crate::core::history::HistoryMethod; @@ -32,20 +31,6 @@ pub struct HistoryIndexEvent { pub deleted: bool, } -/// Durable state of a bulk export job stored in S3. -/// -/// Written to `bulk/export/jobs//state.json` and updated as the job -/// transitions through `accepted → in-progress → complete/error/cancelled`. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ExportJobState { - /// The original export request parameters. - pub request: ExportRequest, - /// Current progress, including status and per-type counts. - pub progress: ExportProgress, - /// The completed manifest, populated once the job reaches `Complete`. - pub manifest: Option, -} - /// Durable state of a bulk submission stored in S3. /// /// Written to `bulk/submit///state.json` when a submission is diff --git a/crates/persistence/src/backends/s3/output_store.rs b/crates/persistence/src/backends/s3/output_store.rs new file mode 100644 index 000000000..934979823 --- /dev/null +++ b/crates/persistence/src/backends/s3/output_store.rs @@ -0,0 +1,249 @@ +//! S3-backed [`ExportOutputStore`] for multi-instance bulk export. +//! +//! Output NDJSON parts are uploaded to S3-compatible object storage; download +//! URLs are pre-signed `GET` URLs (no token required) by default, or +//! HFS-served URLs when the operator forces token-based access. + +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; + +use async_trait::async_trait; +use tokio::io::{AsyncRead, AsyncWrite}; + +use crate::core::bulk_export::ExportJobId; +use crate::core::bulk_export_output::{ + DownloadUrl, ExportOutputStore, ExportPartKey, ExportPartWriter, FinalizedPart, +}; +use crate::error::{BackendError, StorageError, StorageResult}; +use crate::tenant::TenantContext; + +use super::client::S3Api; + +/// Manifest access-token posture. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AccessTokenMode { + /// Pre-signed URLs when supported (default). + Auto, + /// Always HFS-served URLs requiring the kickoff Bearer token. + AlwaysToken, + /// Always pre-signed URLs. + AlwaysPresigned, +} + +impl AccessTokenMode { + /// Parses the `HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN` value. + pub fn parse(s: &str) -> Self { + match s { + "true" => Self::AlwaysToken, + "false" => Self::AlwaysPresigned, + _ => Self::Auto, + } + } +} + +/// An [`ExportOutputStore`] backed by S3-compatible object storage. +pub struct S3OutputStore { + client: Arc, + bucket: String, + base_url: String, + access_token_mode: AccessTokenMode, + file_url_ttl: Duration, + /// Local scratch directory for in-flight (pre-finalize) part files. + scratch_dir: PathBuf, +} + +impl S3OutputStore { + /// Creates a new S3 output store. + pub fn new( + client: Arc, + bucket: impl Into, + base_url: impl Into, + access_token_mode: AccessTokenMode, + file_url_ttl: Duration, + ) -> Self { + let scratch_dir = std::env::temp_dir().join("hfs-export-scratch"); + Self { + client, + bucket: bucket.into(), + base_url: base_url.into(), + access_token_mode, + file_url_ttl, + scratch_dir, + } + } + + /// The S3 object key for a finalized part. + fn object_key(key: &ExportPartKey) -> String { + format!( + "{}/exports/{}/{}-{}-{}-{}.ndjson", + key.tenant_id, + key.job_id, + key.file_type, + key.resource_type, + key.part_index, + key.fencing_token + ) + } + + /// The S3 key prefix covering all parts of a job. + fn job_prefix(tenant_id: &str, job_id: &ExportJobId) -> String { + format!("{}/exports/{}/", tenant_id, job_id) + } + + /// The local scratch path for an in-flight part. + fn scratch_path(&self, key: &ExportPartKey) -> PathBuf { + self.scratch_dir.join(format!( + "{}-{}-{}-{}-{}-{}.tmp", + key.tenant_id, + key.job_id, + key.file_type, + key.resource_type, + key.part_index, + key.fencing_token + )) + } +} + +fn s3_err(message: String) -> StorageError { + StorageError::Backend(BackendError::Internal { + backend_name: "s3-output".to_string(), + message, + source: None, + }) +} + +#[async_trait] +impl ExportOutputStore for S3OutputStore { + async fn open_writer(&self, key: &ExportPartKey) -> StorageResult { + tokio::fs::create_dir_all(&self.scratch_dir) + .await + .map_err(|e| s3_err(format!("create scratch dir: {e}")))?; + let path = self.scratch_path(key); + let file = tokio::fs::File::create(&path) + .await + .map_err(|e| s3_err(format!("create scratch file {}: {e}", path.display())))?; + let boxed: std::pin::Pin> = Box::pin(file); + Ok(ExportPartWriter::new(boxed)) + } + + async fn finalize_part( + &self, + key: &ExportPartKey, + mut writer: ExportPartWriter, + ) -> StorageResult { + use tokio::io::AsyncWriteExt; + writer + .writer + .flush() + .await + .map_err(|e| s3_err(format!("flush scratch file: {e}")))?; + writer + .writer + .shutdown() + .await + .map_err(|e| s3_err(format!("close scratch file: {e}")))?; + let line_count = writer.line_count; + let byte_count = writer.byte_count; + drop(writer); + + let path = self.scratch_path(key); + let bytes = tokio::fs::read(&path) + .await + .map_err(|e| s3_err(format!("read scratch file {}: {e}", path.display())))?; + let object_key = Self::object_key(key); + self.client + .put_object( + &self.bucket, + &object_key, + bytes, + Some("application/fhir+ndjson"), + None, + None, + ) + .await + .map_err(|e| s3_err(format!("upload {object_key}: {e:?}")))?; + // Best-effort cleanup of the scratch file. + let _ = tokio::fs::remove_file(&path).await; + + Ok(FinalizedPart { + key: key.clone(), + resource_type: key.resource_type.clone(), + line_count, + size_bytes: byte_count, + }) + } + + async fn download_url(&self, key: &ExportPartKey, ttl: Duration) -> StorageResult { + match self.access_token_mode { + AccessTokenMode::AlwaysToken => Ok(DownloadUrl { + url: format!( + "{}/export-file/{}/{}-{}", + self.base_url.trim_end_matches('/'), + key.job_id, + key.resource_type, + key.part_index + ), + requires_access_token: true, + }), + AccessTokenMode::Auto | AccessTokenMode::AlwaysPresigned => { + let object_key = Self::object_key(key); + let effective_ttl = if ttl.is_zero() { + self.file_url_ttl + } else { + ttl + }; + let url = self + .client + .presign_get(&self.bucket, &object_key, effective_ttl) + .await + .map_err(|e| s3_err(format!("presign {object_key}: {e:?}")))?; + Ok(DownloadUrl { + url, + requires_access_token: false, + }) + } + } + } + + async fn open_reader( + &self, + key: &ExportPartKey, + ) -> StorageResult>> { + let object_key = Self::object_key(key); + let data = self + .client + .get_object(&self.bucket, &object_key) + .await + .map_err(|e| s3_err(format!("get {object_key}: {e:?}")))? + .ok_or_else(|| s3_err(format!("export object not found: {object_key}")))?; + Ok(Box::pin(std::io::Cursor::new(data.bytes))) + } + + async fn delete_job_outputs( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult<()> { + let prefix = Self::job_prefix(tenant.tenant_id().as_str(), job_id); + let mut continuation: Option = None; + loop { + let page = self + .client + .list_objects(&self.bucket, &prefix, continuation.as_deref(), Some(1000)) + .await + .map_err(|e| s3_err(format!("list {prefix}: {e:?}")))?; + for item in &page.items { + self.client + .delete_object(&self.bucket, &item.key) + .await + .map_err(|e| s3_err(format!("delete {}: {e:?}", item.key)))?; + } + match page.next_continuation_token { + Some(token) => continuation = Some(token), + None => break, + } + } + Ok(()) + } +} diff --git a/crates/persistence/src/backends/s3/storage.rs b/crates/persistence/src/backends/s3/storage.rs index 914b09604..fce08cc28 100644 --- a/crates/persistence/src/backends/s3/storage.rs +++ b/crates/persistence/src/backends/s3/storage.rs @@ -101,14 +101,6 @@ impl S3Backend { .map_err(|e| self.map_client_error(e)) } - /// Deletes the object at `key`. Succeeds silently if the key does not exist. - pub(crate) async fn delete_object(&self, bucket: &str, key: &str) -> StorageResult<()> { - self.client - .delete_object(bucket, key) - .await - .map_err(|e| self.map_client_error(e)) - } - /// Downloads and deserialises a JSON object, returning `None` if not found. pub(crate) async fn get_json_object( &self, diff --git a/crates/persistence/src/backends/s3/tests.rs b/crates/persistence/src/backends/s3/tests.rs index 08b0b3855..65f93f7e0 100644 --- a/crates/persistence/src/backends/s3/tests.rs +++ b/crates/persistence/src/backends/s3/tests.rs @@ -19,7 +19,7 @@ use crate::backends::s3::client::{ ListObjectItem, ListObjectsResult, ObjectData, ObjectMetadata, S3Api, S3ClientError, }; use crate::backends::s3::config::{S3BackendConfig, S3TenancyMode}; -use crate::core::bulk_export::{BulkExportStorage, ExportDataProvider, ExportRequest}; +use crate::core::bulk_export::{ExportDataProvider, ExportRequest}; use crate::core::bulk_submit::{ BulkProcessingOptions, BulkSubmitProvider, BulkSubmitRollbackProvider, NdjsonEntry, StreamingBulkSubmitProvider, SubmissionId, SubmissionStatus, @@ -30,8 +30,8 @@ use crate::core::history::{ use crate::core::transaction::{BundleEntry, BundleMethod, BundleProvider}; use crate::core::{ResourceStorage, VersionedStorage}; use crate::error::{ - BulkExportError, BulkSubmitError, ConcurrencyError, ResourceError, SearchError, StorageError, - TenantError, TransactionError, + BulkSubmitError, ConcurrencyError, ResourceError, SearchError, StorageError, TenantError, + TransactionError, }; use crate::tenant::{TenantContext, TenantId, TenantPermissions}; use crate::types::{CursorValue, PageCursor, Pagination, PaginationMode}; @@ -734,47 +734,14 @@ async fn bundle_transaction_reports_rollback_failure() { } } -#[tokio::test] -async fn bulk_export_start_manifest_and_delete() { - let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); - let backend = make_prefix_backend(mock); - let tenant = tenant("tenant-a"); - - backend - .create( - &tenant, - "Patient", - json!({"resourceType":"Patient","id":"e1"}), - FhirVersion::default(), - ) - .await - .unwrap(); - - let request = ExportRequest::system().with_types(vec!["Patient".to_string()]); - let job_id = backend.start_export(&tenant, request).await.unwrap(); - - let progress = backend.get_export_status(&tenant, &job_id).await.unwrap(); - assert_eq!( - progress.status, - crate::core::bulk_export::ExportStatus::Complete - ); - - let manifest = backend.get_export_manifest(&tenant, &job_id).await.unwrap(); - assert!(!manifest.output.is_empty()); - assert!(manifest.output[0].url.starts_with("s3://")); - - backend.delete_export(&tenant, &job_id).await.unwrap(); - let deleted = backend.get_export_status(&tenant, &job_id).await; - assert!(matches!( - deleted, - Err(StorageError::BulkExport( - BulkExportError::JobNotFound { .. } - )) - )); -} +// `bulk_export_start_manifest_and_delete` was removed: S3 no longer +// implements `BulkExportStorage` (job state lives in SQLite or PostgreSQL). +// The remaining bulk-export surface on the S3 backend is the +// `ExportDataProvider` data-feed, exercised by +// `bulk_export_fetch_batch_cursor` below. #[tokio::test] -async fn bulk_export_invalid_format_and_fetch_batch_cursor() { +async fn bulk_export_fetch_batch_cursor() { let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); let backend = make_prefix_backend(mock); let tenant = tenant("tenant-a"); @@ -791,22 +758,6 @@ async fn bulk_export_invalid_format_and_fetch_batch_cursor() { .unwrap(); } - let invalid = backend - .start_export( - &tenant, - ExportRequest { - output_format: "application/json".to_string(), - ..ExportRequest::system() - }, - ) - .await; - assert!(matches!( - invalid, - Err(StorageError::BulkExport( - BulkExportError::UnsupportedFormat { .. } - )) - )); - let request = ExportRequest::system(); let batch1 = backend .fetch_export_batch(&tenant, &request, "Patient", None, 2) diff --git a/crates/persistence/src/backends/sqlite/bulk_export.rs b/crates/persistence/src/backends/sqlite/bulk_export.rs index 2c6a0757d..a9022bd31 100644 --- a/crates/persistence/src/backends/sqlite/bulk_export.rs +++ b/crates/persistence/src/backends/sqlite/bulk_export.rs @@ -1,20 +1,58 @@ //! Bulk export implementation for SQLite backend. use async_trait::async_trait; -use chrono::Utc; +use chrono::{DateTime, Utc}; use rusqlite::params; use serde_json::Value; +use std::time::Duration as StdDuration; +use tokio::sync::Mutex; use crate::core::bulk_export::{ - BulkExportStorage, ExportDataProvider, ExportJobId, ExportLevel, ExportManifest, - ExportOutputFile, ExportProgress, ExportRequest, ExportStatus, GroupExportProvider, - NdjsonBatch, PatientExportProvider, TypeExportProgress, + BulkExportStorage, ExpiredExportRef, ExportDataProvider, ExportFileMetadata, ExportJobId, + ExportJobMetadata, ExportLevel, ExportProgress, ExportRequest, ExportStatus, + GroupExportProvider, NdjsonBatch, PatientExportProvider, RawExportManifest, RawManifestEntry, + StartExportInput, TypeExportProgress, +}; +use crate::core::bulk_export_output::{ExportPartKey, FinalizedPart}; +use crate::core::bulk_export_worker::{ + ExportClaimStrategy, ExportJobLease, ExportWorkerStorage, LeaseError, WorkerId, WorkerJobView, }; use crate::error::{BackendError, BulkExportError, StorageError, StorageResult}; -use crate::tenant::TenantContext; +use crate::tenant::{TenantContext, TenantId, TenantPermissions}; use super::SqliteBackend; +/// Process-local lock serializing `claim_next` for the single-instance +/// SQLite job store (SQLite has no `SELECT … FOR UPDATE SKIP LOCKED`). +static CLAIM_LOCK: Mutex<()> = Mutex::const_new(()); + +/// Parses an RFC3339 timestamp column into a UTC `DateTime`. +fn parse_dt(s: &str) -> StorageResult> { + DateTime::parse_from_rfc3339(s) + .map(|dt| dt.with_timezone(&Utc)) + .map_err(|e| internal_error(format!("invalid timestamp '{s}': {e}"))) +} + +/// Parses an optional RFC3339 timestamp column. +fn parse_dt_opt(s: Option) -> Option> { + s.and_then(|s| { + DateTime::parse_from_rfc3339(&s) + .ok() + .map(|dt| dt.with_timezone(&Utc)) + }) +} + +/// Splits a `{resource_type}-{part_index}` download segment. +fn parse_part_segment(part: &str) -> Option<(String, u32)> { + let idx = part.rfind('-')?; + let resource_type = &part[..idx]; + let part_index: u32 = part[idx + 1..].parse().ok()?; + if resource_type.is_empty() { + return None; + } + Some((resource_type.to_string(), part_index)) +} + fn internal_error(message: String) -> StorageError { StorageError::Backend(BackendError::Internal { backend_name: "sqlite".to_string(), @@ -28,46 +66,31 @@ impl BulkExportStorage for SqliteBackend { async fn start_export( &self, tenant: &TenantContext, - request: ExportRequest, + input: StartExportInput, ) -> StorageResult { let conn = self.get_connection()?; let tenant_id = tenant.tenant_id().as_str(); - // Check for too many concurrent exports (limit to 5 active exports per tenant) - let active_count: i32 = conn - .query_row( - "SELECT COUNT(*) FROM bulk_export_jobs - WHERE tenant_id = ?1 AND status IN ('accepted', 'in-progress')", - params![tenant_id], - |row| row.get(0), - ) - .map_err(|e| internal_error(format!("Failed to count active exports: {}", e)))?; - - if active_count >= 5 { - return Err(StorageError::BulkExport( - BulkExportError::TooManyConcurrentExports { max_concurrent: 5 }, - )); - } - let job_id = ExportJobId::new(); - let now = Utc::now(); - let transaction_time = now.to_rfc3339(); + let now = Utc::now().to_rfc3339(); + let transaction_time = input.transaction_time.to_rfc3339(); - let level_str = match &request.level { + let level_str = match &input.request.level { ExportLevel::System => "system".to_string(), ExportLevel::Patient => "patient".to_string(), ExportLevel::Group { .. } => "group".to_string(), }; - let group_id = request.group_id().map(|s| s.to_string()); + let group_id = input.request.group_id().map(|s| s.to_string()); - let request_json = serde_json::to_string(&request) + let request_json = serde_json::to_string(&input.request) .map_err(|e| internal_error(format!("Failed to serialize request: {}", e)))?; conn.execute( "INSERT INTO bulk_export_jobs - (id, tenant_id, status, level, group_id, request_json, transaction_time, created_at) - VALUES (?1, ?2, 'accepted', ?3, ?4, ?5, ?6, ?7)", + (id, tenant_id, status, level, group_id, request_json, transaction_time, + created_at, owner_subject, request_url, fhir_version, fencing_token) + VALUES (?1, ?2, 'accepted', ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, 0)", params![ job_id.as_str(), tenant_id, @@ -75,7 +98,10 @@ impl BulkExportStorage for SqliteBackend { group_id, request_json, transaction_time, - transaction_time + now, + input.owner_subject, + input.request_url, + input.fhir_version.as_mime_param(), ], ) .map_err(|e| internal_error(format!("Failed to create export job: {}", e)))?; @@ -265,68 +291,100 @@ impl BulkExportStorage for SqliteBackend { &self, tenant: &TenantContext, job_id: &ExportJobId, - ) -> StorageResult { - let progress = self.get_export_status(tenant, job_id).await?; + ) -> StorageResult { + let conn = self.get_connection()?; + let tenant_id = tenant.tenant_id().as_str(); - if progress.status != ExportStatus::Complete { - return Err(StorageError::BulkExport(BulkExportError::InvalidJobState { - job_id: job_id.to_string(), - expected: "complete".to_string(), - actual: progress.status.to_string(), - })); - } + let (status_str, transaction_time, request_url, error_message, completed_at): ( + String, + String, + String, + Option, + Option, + ) = conn + .query_row( + "SELECT status, transaction_time, request_url, error_message, completed_at + FROM bulk_export_jobs WHERE id = ?1 AND tenant_id = ?2", + params![job_id.as_str(), tenant_id], + |row| { + Ok(( + row.get(0)?, + row.get(1)?, + row.get(2)?, + row.get(3)?, + row.get(4)?, + )) + }, + ) + .map_err(|e| { + if matches!(e, rusqlite::Error::QueryReturnedNoRows) { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: job_id.to_string(), + }) + } else { + internal_error(format!("Failed to get export job: {}", e)) + } + })?; - let conn = self.get_connection()?; + let status: ExportStatus = status_str + .parse() + .map_err(|_| internal_error(format!("Invalid status in database: {}", status_str)))?; - // Get output files + // Get output/error files. let mut stmt = conn .prepare( - "SELECT resource_type, file_path, resource_count, file_type + "SELECT resource_type, resource_count, file_type, part_index, fencing_token FROM bulk_export_files WHERE job_id = ?1 - ORDER BY resource_type", + ORDER BY file_type, resource_type, part_index", ) .map_err(|e| internal_error(format!("Failed to prepare files query: {}", e)))?; - let mut output_files = Vec::new(); - let mut error_files = Vec::new(); - - let rows = stmt + let rows: Vec<(String, i64, String, i64, i64)> = stmt .query_map(params![job_id.as_str()], |row| { Ok(( - row.get::<_, String>(0)?, - row.get::<_, String>(1)?, - row.get::<_, Option>(2)?.map(|v| v as u64), - row.get::<_, String>(3)?, + row.get(0)?, + row.get::<_, Option>(1)?.unwrap_or(0), + row.get(2)?, + row.get(3)?, + row.get(4)?, )) }) - .map_err(|e| internal_error(format!("Failed to query files: {}", e)))?; - - for row in rows { - let (resource_type, file_path, count, file_type) = - row.map_err(|e| internal_error(format!("Failed to read file row: {}", e)))?; + .map_err(|e| internal_error(format!("Failed to query files: {}", e)))? + .filter_map(|r| r.ok()) + .collect(); - let file = ExportOutputFile { + let mut output = Vec::new(); + let mut errors = Vec::new(); + for (resource_type, count, file_type, part_index, fencing_token) in rows { + let key = ExportPartKey { + tenant_id: tenant_id.to_string(), + job_id: job_id.clone(), + resource_type: resource_type.clone(), + file_type: file_type.clone(), + part_index: part_index as u32, + fencing_token: fencing_token as u64, + }; + let entry = RawManifestEntry { resource_type, - url: file_path, - count, + key, + count: count as u64, }; - if file_type == "error" { - error_files.push(file); + errors.push(entry); } else { - output_files.push(file); + output.push(entry); } } - Ok(ExportManifest { - transaction_time: progress.transaction_time, - request: format!("$export?job={}", job_id), - requires_access_token: true, - output: output_files, - error: error_files, - message: None, - extension: None, + Ok(RawExportManifest { + transaction_time: parse_dt(&transaction_time)?, + request_url, + status, + error_message, + completed_at: parse_dt_opt(completed_at), + output, + errors, }) } @@ -366,6 +424,592 @@ impl BulkExportStorage for SqliteBackend { Ok(results) } + + async fn get_export_job_metadata( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult { + let conn = self.get_connection()?; + let tenant_id = tenant.tenant_id().as_str(); + + let (status_str, level_str, group_id, owner_subject, transaction_time, completed_at, request_url): ( + String, + String, + Option, + Option, + String, + Option, + String, + ) = conn + .query_row( + "SELECT status, level, group_id, owner_subject, transaction_time, completed_at, request_url + FROM bulk_export_jobs WHERE id = ?1 AND tenant_id = ?2", + params![job_id.as_str(), tenant_id], + |row| { + Ok(( + row.get(0)?, + row.get(1)?, + row.get(2)?, + row.get(3)?, + row.get(4)?, + row.get(5)?, + row.get(6)?, + )) + }, + ) + .map_err(|e| { + if matches!(e, rusqlite::Error::QueryReturnedNoRows) { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: job_id.to_string(), + }) + } else { + internal_error(format!("Failed to get export job metadata: {}", e)) + } + })?; + + let status: ExportStatus = status_str + .parse() + .map_err(|_| internal_error(format!("Invalid status in database: {}", status_str)))?; + let level = match level_str.as_str() { + "system" => ExportLevel::System, + "patient" => ExportLevel::Patient, + "group" => ExportLevel::Group { + group_id: group_id.unwrap_or_default(), + }, + _ => return Err(internal_error(format!("Invalid level: {}", level_str))), + }; + + Ok(ExportJobMetadata { + job_id: job_id.clone(), + status, + level, + owner_subject, + transaction_time: parse_dt(&transaction_time)?, + completed_at: parse_dt_opt(completed_at), + request_url, + }) + } + + async fn get_export_file_metadata( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + part: &str, + ) -> StorageResult { + let (resource_type, part_index) = parse_part_segment(part).ok_or_else(|| { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: format!("{job_id}/{part}"), + }) + })?; + + let conn = self.get_connection()?; + let tenant_id = tenant.tenant_id().as_str(); + + let (file_type, resource_count, fencing_token, owner_subject): ( + String, + i64, + i64, + Option, + ) = conn + .query_row( + "SELECT f.file_type, f.resource_count, f.fencing_token, j.owner_subject + FROM bulk_export_files f + JOIN bulk_export_jobs j ON j.id = f.job_id + WHERE f.job_id = ?1 AND j.tenant_id = ?2 + AND f.resource_type = ?3 AND f.part_index = ?4", + params![job_id.as_str(), tenant_id, resource_type, part_index as i64], + |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)), + ) + .map_err(|e| { + if matches!(e, rusqlite::Error::QueryReturnedNoRows) { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: format!("{job_id}/{part}"), + }) + } else { + internal_error(format!("Failed to get export file metadata: {}", e)) + } + })?; + + let key = ExportPartKey { + tenant_id: tenant_id.to_string(), + job_id: job_id.clone(), + resource_type: resource_type.clone(), + file_type: file_type.clone(), + part_index, + fencing_token: fencing_token as u64, + }; + + Ok(ExportFileMetadata { + key, + resource_type, + file_type, + line_count: resource_count as u64, + job_owner_subject: owner_subject, + }) + } + + async fn count_active_exports(&self, tenant: &TenantContext) -> StorageResult { + let conn = self.get_connection()?; + let tenant_id = tenant.tenant_id().as_str(); + let count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM bulk_export_jobs + WHERE tenant_id = ?1 AND status IN ('accepted', 'in-progress')", + params![tenant_id], + |row| row.get(0), + ) + .map_err(|e| internal_error(format!("Failed to count active exports: {}", e)))?; + Ok(count as u64) + } + + async fn list_expired_exports( + &self, + now: DateTime, + output_ttl: StdDuration, + limit: u32, + ) -> StorageResult> { + let conn = self.get_connection()?; + let cutoff = (now + - chrono::Duration::from_std(output_ttl) + .unwrap_or_else(|_| chrono::Duration::seconds(0))) + .to_rfc3339(); + + let mut stmt = conn + .prepare( + "SELECT tenant_id, id FROM bulk_export_jobs + WHERE status IN ('complete', 'error', 'cancelled') + AND completed_at IS NOT NULL AND completed_at < ?1 + ORDER BY completed_at LIMIT ?2", + ) + .map_err(|e| internal_error(format!("Failed to prepare expired query: {}", e)))?; + + let rows: Vec<(String, String)> = stmt + .query_map(params![cutoff, limit], |row| Ok((row.get(0)?, row.get(1)?))) + .map_err(|e| internal_error(format!("Failed to query expired exports: {}", e)))? + .filter_map(|r| r.ok()) + .collect(); + + Ok(rows + .into_iter() + .map(|(tenant_id, id)| ExpiredExportRef { + tenant: TenantContext::new( + TenantId::new(tenant_id), + TenantPermissions::full_access(), + ), + job_id: ExportJobId::from_string(id), + }) + .collect()) + } +} + +/// Encodes an [`ExportPartKey`] into the `file_path` column. +fn encode_part_path(key: &ExportPartKey) -> String { + format!( + "{}/{}/{}/{}-{}-{}", + key.tenant_id, + key.job_id, + key.file_type, + key.resource_type, + key.part_index, + key.fencing_token + ) +} + +#[async_trait] +impl ExportClaimStrategy for SqliteBackend { + async fn claim_next( + &self, + worker_id: &WorkerId, + lease_duration: StdDuration, + ) -> StorageResult> { + let _guard = CLAIM_LOCK.lock().await; + let conn = self.get_connection()?; + let now = Utc::now(); + let now_str = now.to_rfc3339(); + let lease_expiry = now + + chrono::Duration::from_std(lease_duration) + .unwrap_or_else(|_| chrono::Duration::seconds(60)); + let lease_expiry_str = lease_expiry.to_rfc3339(); + + // Find one eligible job: accepted, or in-progress with an expired lease. + let row: Option<(String, String, i64)> = conn + .query_row( + "SELECT id, tenant_id, fencing_token FROM bulk_export_jobs + WHERE status = 'accepted' + OR (status = 'in-progress' AND (lease_expiry IS NULL OR lease_expiry < ?1)) + ORDER BY created_at LIMIT 1", + params![now_str], + |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)), + ) + .ok(); + + let Some((job_id, tenant_id, fencing_token)) = row else { + return Ok(None); + }; + let new_token = fencing_token + 1; + + conn.execute( + "UPDATE bulk_export_jobs + SET status = 'in-progress', worker_id = ?1, lease_expiry = ?2, + heartbeat_at = ?3, fencing_token = ?4, + started_at = COALESCE(started_at, ?3) + WHERE id = ?5", + params![ + worker_id.as_str(), + lease_expiry_str, + now_str, + new_token, + job_id + ], + ) + .map_err(|e| internal_error(format!("Failed to claim export job: {}", e)))?; + + Ok(Some(ExportJobLease { + job_id: ExportJobId::from_string(job_id), + tenant: TenantContext::new(TenantId::new(tenant_id), TenantPermissions::full_access()), + worker_id: worker_id.clone(), + lease_expiry, + fencing_token: new_token as u64, + })) + } + + async fn heartbeat(&self, lease: &ExportJobLease) -> Result, LeaseError> { + let conn = self.get_connection().map_err(LeaseError::Storage)?; + let now = Utc::now(); + let new_expiry = now + chrono::Duration::seconds(60); + let affected = conn + .execute( + "UPDATE bulk_export_jobs + SET lease_expiry = ?1, heartbeat_at = ?2 + WHERE id = ?3 AND worker_id = ?4 AND fencing_token = ?5", + params![ + new_expiry.to_rfc3339(), + now.to_rfc3339(), + lease.job_id.as_str(), + lease.worker_id.as_str(), + lease.fencing_token as i64 + ], + ) + .map_err(|e| LeaseError::Storage(internal_error(format!("heartbeat failed: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: lease.job_id.clone(), + }) + } else { + Ok(new_expiry) + } + } + + async fn release(&self, lease: ExportJobLease) -> StorageResult<()> { + let conn = self.get_connection()?; + conn.execute( + "UPDATE bulk_export_jobs + SET status = 'accepted', worker_id = NULL, lease_expiry = NULL + WHERE id = ?1 AND worker_id = ?2 AND fencing_token = ?3 + AND status = 'in-progress'", + params![ + lease.job_id.as_str(), + lease.worker_id.as_str(), + lease.fencing_token as i64 + ], + ) + .map_err(|e| internal_error(format!("Failed to release lease: {}", e)))?; + Ok(()) + } +} + +#[async_trait] +impl ExportWorkerStorage for SqliteBackend { + async fn get_export_job_for_worker( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result { + let conn = self.get_connection().map_err(LeaseError::Storage)?; + let tenant_id = tenant.tenant_id().as_str(); + + let (request_json, level_str, group_id, transaction_time, fhir_version): ( + String, + String, + Option, + String, + String, + ) = conn + .query_row( + "SELECT request_json, level, group_id, transaction_time, fhir_version + FROM bulk_export_jobs + WHERE id = ?1 AND tenant_id = ?2 AND worker_id = ?3 AND fencing_token = ?4", + params![ + job_id.as_str(), + tenant_id, + worker_id.as_str(), + fencing_token as i64 + ], + |row| { + Ok(( + row.get(0)?, + row.get(1)?, + row.get(2)?, + row.get(3)?, + row.get(4)?, + )) + }, + ) + .map_err(|e| match e { + rusqlite::Error::QueryReturnedNoRows => LeaseError::LeaseLost { + job_id: job_id.clone(), + }, + other => LeaseError::Storage(internal_error(format!( + "Failed to load worker job: {other}" + ))), + })?; + + let request: ExportRequest = serde_json::from_str(&request_json).map_err(|e| { + LeaseError::Storage(internal_error(format!("Failed to parse request_json: {e}"))) + })?; + let level = match level_str.as_str() { + "system" => ExportLevel::System, + "patient" => ExportLevel::Patient, + "group" => ExportLevel::Group { + group_id: group_id.unwrap_or_default(), + }, + _ => { + return Err(LeaseError::Storage(internal_error(format!( + "Invalid level: {level_str}" + )))); + } + }; + let fhir_version = + helios_fhir::FhirVersion::from_mime_param(&fhir_version).unwrap_or_default(); + let transaction_time = parse_dt(&transaction_time).map_err(LeaseError::Storage)?; + + // Load persisted per-type progress for resume. + let mut stmt = conn + .prepare( + "SELECT resource_type, total_count, exported_count, error_count, cursor_state + FROM bulk_export_progress WHERE job_id = ?1", + ) + .map_err(|e| LeaseError::Storage(internal_error(format!("prepare progress: {e}"))))?; + let type_progress: Vec = stmt + .query_map(params![job_id.as_str()], |row| { + Ok(TypeExportProgress { + resource_type: row.get(0)?, + total_count: row.get::<_, Option>(1)?.map(|v| v as u64), + exported_count: row.get::<_, i64>(2)? as u64, + error_count: row.get::<_, i64>(3)? as u64, + cursor_state: row.get(4)?, + }) + }) + .map_err(|e| LeaseError::Storage(internal_error(format!("query progress: {e}"))))? + .filter_map(|r| r.ok()) + .collect(); + + Ok(WorkerJobView { + request, + level, + transaction_time, + fhir_version, + type_progress, + }) + } + + async fn mark_export_in_progress( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result<(), LeaseError> { + let conn = self.get_connection().map_err(LeaseError::Storage)?; + let now = Utc::now().to_rfc3339(); + let affected = conn + .execute( + "UPDATE bulk_export_jobs + SET status = 'in-progress', started_at = COALESCE(started_at, ?1) + WHERE id = ?2 AND tenant_id = ?3 AND worker_id = ?4 AND fencing_token = ?5", + params![ + now, + job_id.as_str(), + tenant.tenant_id().as_str(), + worker_id.as_str(), + fencing_token as i64 + ], + ) + .map_err(|e| LeaseError::Storage(internal_error(format!("mark_in_progress: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn update_export_type_progress( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + progress: &TypeExportProgress, + ) -> Result<(), LeaseError> { + let conn = self.get_connection().map_err(LeaseError::Storage)?; + let affected = conn + .execute( + "INSERT INTO bulk_export_progress + (job_id, resource_type, total_count, exported_count, error_count, cursor_state) + SELECT ?1, ?2, ?3, ?4, ?5, ?6 + WHERE EXISTS ( + SELECT 1 FROM bulk_export_jobs + WHERE id = ?1 AND tenant_id = ?7 AND worker_id = ?8 AND fencing_token = ?9 + ) + ON CONFLICT(job_id, resource_type) DO UPDATE SET + total_count = excluded.total_count, + exported_count = excluded.exported_count, + error_count = excluded.error_count, + cursor_state = excluded.cursor_state", + params![ + job_id.as_str(), + progress.resource_type, + progress.total_count.map(|v| v as i64), + progress.exported_count as i64, + progress.error_count as i64, + progress.cursor_state, + tenant.tenant_id().as_str(), + worker_id.as_str(), + fencing_token as i64, + ], + ) + .map_err(|e| { + LeaseError::Storage(internal_error(format!("update_type_progress: {e}"))) + })?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn record_export_file( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + part: &FinalizedPart, + file_type: &str, + ) -> Result<(), LeaseError> { + let conn = self.get_connection().map_err(LeaseError::Storage)?; + let file_path = encode_part_path(&part.key); + let affected = conn + .execute( + "INSERT INTO bulk_export_files + (job_id, resource_type, file_type, file_path, resource_count, byte_count, + part_index, fencing_token) + SELECT ?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8 + WHERE EXISTS ( + SELECT 1 FROM bulk_export_jobs + WHERE id = ?1 AND tenant_id = ?9 AND worker_id = ?10 AND fencing_token = ?11 + ) + ON CONFLICT(job_id, file_type, resource_type, part_index) DO UPDATE SET + file_path = excluded.file_path, + resource_count = excluded.resource_count, + byte_count = excluded.byte_count, + fencing_token = excluded.fencing_token", + params![ + job_id.as_str(), + part.resource_type, + file_type, + file_path, + part.line_count as i64, + part.size_bytes as i64, + part.key.part_index as i64, + part.key.fencing_token as i64, + tenant.tenant_id().as_str(), + worker_id.as_str(), + fencing_token as i64, + ], + ) + .map_err(|e| LeaseError::Storage(internal_error(format!("record_export_file: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn finish_export_job( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result<(), LeaseError> { + let conn = self.get_connection().map_err(LeaseError::Storage)?; + let now = Utc::now().to_rfc3339(); + let affected = conn + .execute( + "UPDATE bulk_export_jobs + SET status = 'complete', completed_at = ?1 + WHERE id = ?2 AND tenant_id = ?3 AND worker_id = ?4 AND fencing_token = ?5", + params![ + now, + job_id.as_str(), + tenant.tenant_id().as_str(), + worker_id.as_str(), + fencing_token as i64 + ], + ) + .map_err(|e| LeaseError::Storage(internal_error(format!("finish_job: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn fail_export_job( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + error_message: &str, + ) -> Result<(), LeaseError> { + let conn = self.get_connection().map_err(LeaseError::Storage)?; + let now = Utc::now().to_rfc3339(); + let affected = conn + .execute( + "UPDATE bulk_export_jobs + SET status = 'error', error_message = ?1, completed_at = ?2 + WHERE id = ?3 AND tenant_id = ?4 AND worker_id = ?5 AND fencing_token = ?6", + params![ + error_message, + now, + job_id.as_str(), + tenant.tenant_id().as_str(), + worker_id.as_str(), + fencing_token as i64 + ], + ) + .map_err(|e| LeaseError::Storage(internal_error(format!("fail_job: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } } #[async_trait] @@ -680,8 +1324,10 @@ impl PatientExportProvider for SqliteBackend { .iter() .map(|id| format!("Patient/{}", id)) .collect(); + let since_value = request.since.map(|s| s.to_rfc3339()); + let patient_ref_param_start = if since_value.is_some() { 4 } else { 3 }; let placeholders: Vec = (0..patient_refs.len()) - .map(|i| format!("?{}", i + 4)) + .map(|i| format!("?{}", i + patient_ref_param_start)) .collect(); let mut query = format!( @@ -702,8 +1348,6 @@ impl PatientExportProvider for SqliteBackend { Box::new(tenant_id.to_string()), Box::new(resource_type.to_string()), ]; - // Placeholder for since filter slot - let since_value = request.since.map(|s| s.to_rfc3339()); if since_value.is_some() { params_vec.push(Box::new(since_value.clone().unwrap())); } @@ -826,22 +1470,83 @@ impl GroupExportProvider for SqliteBackend { tenant: &TenantContext, group_id: &str, ) -> StorageResult> { - let members = self.get_group_members(tenant, group_id).await?; - - // Filter to only Patient references and extract IDs - let patient_ids: Vec = members - .into_iter() - .filter_map(|reference| { - if reference.starts_with("Patient/") { - Some(reference.strip_prefix("Patient/").unwrap().to_string()) - } else { - None + // Flatten nested Groups iteratively, guarding against membership + // cycles with a visited set. + use std::collections::HashSet; + let mut visited_groups: HashSet = HashSet::new(); + let mut seen_patients: HashSet = HashSet::new(); + let mut patient_ids: Vec = Vec::new(); + let mut worklist: Vec = vec![group_id.to_string()]; + + while let Some(gid) = worklist.pop() { + if !visited_groups.insert(gid.clone()) { + continue; // cycle / already processed + } + let members = self.get_group_members(tenant, &gid).await?; + for reference in members { + if let Some(pid) = reference.strip_prefix("Patient/") { + if seen_patients.insert(pid.to_string()) { + patient_ids.push(pid.to_string()); + } + } else if let Some(nested) = reference.strip_prefix("Group/") { + worklist.push(nested.to_string()); } - }) - .collect(); + } + } Ok(patient_ids) } + + async fn get_group_members_with_periods( + &self, + tenant: &TenantContext, + group_id: &str, + ) -> StorageResult>)>> { + let conn = self.get_connection()?; + let tenant_id = tenant.tenant_id().as_str(); + let data: Vec = conn + .query_row( + "SELECT data FROM resources + WHERE tenant_id = ?1 AND resource_type = 'Group' + AND id = ?2 AND is_deleted = 0", + params![tenant_id, group_id], + |row| row.get(0), + ) + .map_err(|e| { + if matches!(e, rusqlite::Error::QueryReturnedNoRows) { + StorageError::BulkExport(BulkExportError::GroupNotFound { + group_id: group_id.to_string(), + }) + } else { + internal_error(format!("Failed to get group: {}", e)) + } + })?; + let group: Value = serde_json::from_slice(&data) + .map_err(|e| internal_error(format!("Failed to parse group: {}", e)))?; + let mut out = Vec::new(); + if let Some(arr) = group.get("member").and_then(|m| m.as_array()) { + for member in arr { + let Some(reference) = member + .get("entity") + .and_then(|e| e.get("reference")) + .and_then(|r| r.as_str()) + else { + continue; + }; + let period_start = member + .get("period") + .and_then(|p| p.get("start")) + .and_then(|s| s.as_str()) + .and_then(|s| { + DateTime::parse_from_rfc3339(s) + .ok() + .map(|dt| dt.with_timezone(&Utc)) + }); + out.push((reference.to_string(), period_start)); + } + } + Ok(out) + } } #[cfg(test)] @@ -865,13 +1570,27 @@ mod tests { ) } + /// Wraps an `ExportRequest` in a `StartExportInput` with default kickoff metadata. + fn test_input(request: ExportRequest) -> StartExportInput { + StartExportInput { + request, + transaction_time: Utc::now(), + request_url: "http://localhost/$export".to_string(), + owner_subject: Some("test-subject".to_string()), + fhir_version: FhirVersion::default(), + } + } + #[tokio::test] async fn test_start_export() { let backend = create_test_backend(); let tenant = create_test_tenant(); let request = ExportRequest::system().with_types(vec!["Patient".to_string()]); - let job_id = backend.start_export(&tenant, request).await.unwrap(); + let job_id = backend + .start_export(&tenant, test_input(request)) + .await + .unwrap(); assert!(!job_id.as_str().is_empty()); @@ -884,8 +1603,10 @@ mod tests { let backend = create_test_backend(); let tenant = create_test_tenant(); - let request = ExportRequest::system(); - let job_id = backend.start_export(&tenant, request).await.unwrap(); + let job_id = backend + .start_export(&tenant, test_input(ExportRequest::system())) + .await + .unwrap(); backend.cancel_export(&tenant, &job_id).await.unwrap(); @@ -898,37 +1619,273 @@ mod tests { let backend = create_test_backend(); let tenant = create_test_tenant(); - // Create two exports - let request1 = ExportRequest::system(); - let _job_id1 = backend.start_export(&tenant, request1).await.unwrap(); - - let request2 = ExportRequest::patient(); - let _job_id2 = backend.start_export(&tenant, request2).await.unwrap(); + let _job_id1 = backend + .start_export(&tenant, test_input(ExportRequest::system())) + .await + .unwrap(); + let _job_id2 = backend + .start_export(&tenant, test_input(ExportRequest::patient())) + .await + .unwrap(); let exports = backend.list_exports(&tenant, false).await.unwrap(); assert_eq!(exports.len(), 2); } #[tokio::test] - async fn test_too_many_concurrent_exports() { + async fn test_count_active_exports() { let backend = create_test_backend(); let tenant = create_test_tenant(); - // Create 5 exports (the limit) - for _ in 0..5 { - let request = ExportRequest::system(); - backend.start_export(&tenant, request).await.unwrap(); + for _ in 0..3 { + backend + .start_export(&tenant, test_input(ExportRequest::system())) + .await + .unwrap(); } + assert_eq!(backend.count_active_exports(&tenant).await.unwrap(), 3); + } - // Sixth should fail - let request = ExportRequest::system(); - let result = backend.start_export(&tenant, request).await; + #[tokio::test] + async fn test_get_export_job_metadata() { + let backend = create_test_backend(); + let tenant = create_test_tenant(); + + let job_id = backend + .start_export(&tenant, test_input(ExportRequest::patient())) + .await + .unwrap(); + + let meta = backend + .get_export_job_metadata(&tenant, &job_id) + .await + .unwrap(); + assert_eq!(meta.status, ExportStatus::Accepted); + assert_eq!(meta.owner_subject.as_deref(), Some("test-subject")); + assert!(matches!(meta.level, ExportLevel::Patient)); + + let missing = backend + .get_export_job_metadata(&tenant, &ExportJobId::from_string("nope")) + .await; + assert!(missing.is_err()); + } + + #[tokio::test] + async fn test_claim_and_worker_lifecycle() { + let backend = create_test_backend(); + let tenant = create_test_tenant(); + + let job_id = backend + .start_export(&tenant, test_input(ExportRequest::system())) + .await + .unwrap(); + + let worker = WorkerId::new("worker-1"); + let lease = backend + .claim_next(&worker, StdDuration::from_secs(60)) + .await + .unwrap() + .expect("a job should be claimable"); + assert_eq!(lease.job_id, job_id); + assert_eq!(lease.fencing_token, 1); + + // A second claim finds nothing (the only job is now in-progress). + assert!( + backend + .claim_next(&worker, StdDuration::from_secs(60)) + .await + .unwrap() + .is_none() + ); + + // Worker can load, progress, finish. + backend + .mark_export_in_progress(&tenant, &job_id, &worker, lease.fencing_token) + .await + .unwrap(); + backend + .update_export_type_progress( + &tenant, + &job_id, + &worker, + lease.fencing_token, + &TypeExportProgress::new("Patient"), + ) + .await + .unwrap(); + backend + .finish_export_job(&tenant, &job_id, &worker, lease.fencing_token) + .await + .unwrap(); + + let progress = backend.get_export_status(&tenant, &job_id).await.unwrap(); + assert_eq!(progress.status, ExportStatus::Complete); + } + + #[tokio::test] + async fn test_stale_worker_fenced_out() { + let backend = create_test_backend(); + let tenant = create_test_tenant(); + + let job_id = backend + .start_export(&tenant, test_input(ExportRequest::system())) + .await + .unwrap(); + + let worker_a = WorkerId::new("worker-a"); + let lease_a = backend + .claim_next(&worker_a, StdDuration::from_millis(1)) + .await + .unwrap() + .unwrap(); + + // Lease expires; worker B reclaims, bumping the fencing token. + tokio::time::sleep(std::time::Duration::from_millis(5)).await; + let worker_b = WorkerId::new("worker-b"); + let lease_b = backend + .claim_next(&worker_b, StdDuration::from_secs(60)) + .await + .unwrap() + .unwrap(); + assert!(lease_b.fencing_token > lease_a.fencing_token); + + // Worker A's stale mutations are all rejected as LeaseLost. assert!(matches!( - result, - Err(StorageError::BulkExport( - BulkExportError::TooManyConcurrentExports { .. } - )) + backend + .mark_export_in_progress(&tenant, &job_id, &worker_a, lease_a.fencing_token) + .await, + Err(LeaseError::LeaseLost { .. }) )); + assert!(matches!( + backend + .update_export_type_progress( + &tenant, + &job_id, + &worker_a, + lease_a.fencing_token, + &TypeExportProgress::new("Patient"), + ) + .await, + Err(LeaseError::LeaseLost { .. }) + )); + assert!(matches!( + backend + .finish_export_job(&tenant, &job_id, &worker_a, lease_a.fencing_token) + .await, + Err(LeaseError::LeaseLost { .. }) + )); + + // Worker B can still operate. + backend + .finish_export_job(&tenant, &job_id, &worker_b, lease_b.fencing_token) + .await + .unwrap(); + } + + #[tokio::test] + async fn test_since_newly_added_exclude_filters_late_joiners() { + use crate::core::bulk_export_output::{ExportPartKey, ExportPartWriter}; + let _ = ExportPartKey::output("t", ExportJobId::new(), "x", 0, 0); // import sanity + + let backend = create_test_backend(); + let tenant = create_test_tenant(); + + // A Group with two members: one joined before _since (period.start = + // 2024-01-01), one joined after (period.start = 2026-06-01). + backend + .create( + &tenant, + "Group", + json!({ + "resourceType": "Group", "id": "g-cohort", + "member": [ + { + "entity": {"reference": "Patient/p-old"}, + "period": {"start": "2024-01-01T00:00:00Z"} + }, + { + "entity": {"reference": "Patient/p-new"}, + "period": {"start": "2026-06-01T00:00:00Z"} + } + ] + }), + FhirVersion::default(), + ) + .await + .unwrap(); + + let members = backend + .get_group_members_with_periods(&tenant, "g-cohort") + .await + .unwrap(); + assert_eq!(members.len(), 2); + assert!(members.iter().all(|(_, p)| p.is_some())); + + // Worker-level filter logic: with exclude=true and _since=2025, + // p-new (joined 2026) should be filtered out; p-old kept. + let since = chrono::DateTime::parse_from_rfc3339("2025-01-01T00:00:00Z") + .unwrap() + .with_timezone(&Utc); + let kept: Vec = members + .iter() + .filter_map(|(reference, period_start)| { + let pid = reference.strip_prefix("Patient/")?; + match period_start { + Some(start) if *start > since => None, + _ => Some(pid.to_string()), + } + }) + .collect(); + assert_eq!(kept, vec!["p-old".to_string()]); + + // Drop reference to silence the unused-import allowance. + let _ = ExportPartWriter::new(Box::pin(Vec::::new())); + } + + #[tokio::test] + async fn test_resolve_nested_groups_with_cycle_guard() { + let backend = create_test_backend(); + let tenant = create_test_tenant(); + + // g1 -> [Patient/p1, Group/g2]; g2 -> [Patient/p2, Group/g1 (cycle)] + backend + .create( + &tenant, + "Group", + json!({ + "resourceType": "Group", "id": "g1", + "member": [ + {"entity": {"reference": "Patient/p1"}}, + {"entity": {"reference": "Group/g2"}} + ] + }), + FhirVersion::default(), + ) + .await + .unwrap(); + backend + .create( + &tenant, + "Group", + json!({ + "resourceType": "Group", "id": "g2", + "member": [ + {"entity": {"reference": "Patient/p2"}}, + {"entity": {"reference": "Group/g1"}} + ] + }), + FhirVersion::default(), + ) + .await + .unwrap(); + + let mut ids = backend + .resolve_group_patient_ids(&tenant, "g1") + .await + .unwrap(); + ids.sort(); + // Both patients resolved exactly once; the cycle did not loop forever. + assert_eq!(ids, vec!["p1".to_string(), "p2".to_string()]); } #[tokio::test] @@ -1013,8 +1970,10 @@ mod tests { let backend = create_test_backend(); let tenant = create_test_tenant(); - let request = ExportRequest::system(); - let job_id = backend.start_export(&tenant, request).await.unwrap(); + let job_id = backend + .start_export(&tenant, test_input(ExportRequest::system())) + .await + .unwrap(); backend.delete_export(&tenant, &job_id).await.unwrap(); diff --git a/crates/persistence/src/backends/sqlite/schema.rs b/crates/persistence/src/backends/sqlite/schema.rs index 04a47a03f..97ba00992 100644 --- a/crates/persistence/src/backends/sqlite/schema.rs +++ b/crates/persistence/src/backends/sqlite/schema.rs @@ -5,7 +5,7 @@ use rusqlite::Connection; use crate::error::StorageResult; /// Current schema version. -pub const SCHEMA_VERSION: i32 = 7; +pub const SCHEMA_VERSION: i32 = 8; /// Initialize the database schema. pub fn initialize_schema(conn: &Connection) -> StorageResult<()> { @@ -263,6 +263,7 @@ fn migrate_schema(conn: &Connection, from_version: i32) -> StorageResult<()> { 4 => migrate_v4_to_v5(conn)?, 5 => migrate_v5_to_v6(conn)?, 6 => migrate_v6_to_v7(conn)?, + 7 => migrate_v7_to_v8(conn)?, _ => { return Err(crate::error::StorageError::Backend( crate::error::BackendError::Internal { @@ -829,6 +830,128 @@ fn migrate_v6_to_v7(conn: &Connection) -> StorageResult<()> { Ok(()) } +/// Migrate from schema version 7 to version 8. +/// +/// Adds bulk-export worker/lease support: +/// - lease columns + `owner_subject`/`request_url`/`fhir_version` on `bulk_export_jobs` +/// - `part_index`/`fencing_token` on `bulk_export_files`, with a backfill of +/// `part_index` and a unique index for idempotent upserts +fn migrate_v7_to_v8(conn: &Connection) -> StorageResult<()> { + // Columns that may already exist if the table was created fresh — guard + // with PRAGMA table_info since SQLite has no `ADD COLUMN IF NOT EXISTS`. + let job_columns: Vec = { + let mut stmt = conn + .prepare("PRAGMA table_info(bulk_export_jobs)") + .map_err(|e| migration_err(format!("pragma bulk_export_jobs: {e}")))?; + let cols: Vec = stmt + .query_map([], |row| row.get::<_, String>(1)) + .map_err(|e| migration_err(format!("pragma rows: {e}")))? + .filter_map(|r| r.ok()) + .collect(); + cols + }; + let job_adds = [ + ( + "worker_id", + "ALTER TABLE bulk_export_jobs ADD COLUMN worker_id TEXT", + ), + ( + "lease_expiry", + "ALTER TABLE bulk_export_jobs ADD COLUMN lease_expiry TEXT", + ), + ( + "fencing_token", + "ALTER TABLE bulk_export_jobs ADD COLUMN fencing_token INTEGER NOT NULL DEFAULT 0", + ), + ( + "heartbeat_at", + "ALTER TABLE bulk_export_jobs ADD COLUMN heartbeat_at TEXT", + ), + ( + "owner_subject", + "ALTER TABLE bulk_export_jobs ADD COLUMN owner_subject TEXT", + ), + ( + "request_url", + "ALTER TABLE bulk_export_jobs ADD COLUMN request_url TEXT NOT NULL DEFAULT ''", + ), + ( + "fhir_version", + "ALTER TABLE bulk_export_jobs ADD COLUMN fhir_version TEXT NOT NULL DEFAULT '4.0'", + ), + ]; + for (col, sql) in &job_adds { + if !job_columns.iter().any(|c| c == col) { + conn.execute(sql, []) + .map_err(|e| migration_err(format!("add {col}: {e}")))?; + } + } + + let file_columns: Vec = { + let mut stmt = conn + .prepare("PRAGMA table_info(bulk_export_files)") + .map_err(|e| migration_err(format!("pragma bulk_export_files: {e}")))?; + let cols: Vec = stmt + .query_map([], |row| row.get::<_, String>(1)) + .map_err(|e| migration_err(format!("pragma rows: {e}")))? + .filter_map(|r| r.ok()) + .collect(); + cols + }; + let file_adds = [ + ( + "part_index", + "ALTER TABLE bulk_export_files ADD COLUMN part_index INTEGER NOT NULL DEFAULT 0", + ), + ( + "fencing_token", + "ALTER TABLE bulk_export_files ADD COLUMN fencing_token INTEGER NOT NULL DEFAULT 0", + ), + ]; + for (col, sql) in &file_adds { + if !file_columns.iter().any(|c| c == col) { + conn.execute(sql, []) + .map_err(|e| migration_err(format!("add {col}: {e}")))?; + } + } + + // Backfill part_index: 0-based sequential per (job_id, file_type, resource_type) + // ordered by id, so the unique index below builds without collisions on + // pre-existing rows. + conn.execute( + "UPDATE bulk_export_files SET part_index = ( + SELECT COUNT(*) FROM bulk_export_files f2 + WHERE f2.job_id = bulk_export_files.job_id + AND f2.file_type = bulk_export_files.file_type + AND f2.resource_type = bulk_export_files.resource_type + AND f2.id < bulk_export_files.id + )", + [], + ) + .map_err(|e| migration_err(format!("backfill part_index: {e}")))?; + + let indexes = [ + "CREATE INDEX IF NOT EXISTS idx_export_jobs_claim + ON bulk_export_jobs(tenant_id, status, lease_expiry)", + "CREATE UNIQUE INDEX IF NOT EXISTS idx_export_files_part + ON bulk_export_files(job_id, file_type, resource_type, part_index)", + ]; + for index_sql in &indexes { + conn.execute(index_sql, []) + .map_err(|e| migration_err(format!("create index: {e}")))?; + } + + Ok(()) +} + +fn migration_err(message: String) -> crate::error::StorageError { + crate::error::StorageError::Backend(crate::error::BackendError::Internal { + backend_name: "sqlite".to_string(), + message, + source: None, + }) +} + /// Drop all tables (for testing). #[cfg(test)] #[allow(dead_code)] @@ -989,4 +1112,82 @@ mod tests { .unwrap(); assert_eq!(table_count, 7); // 3 export + 4 submit tables } + + #[test] + fn test_migration_v7_to_v8_backfills_duplicate_file_rows() { + // Build a v6/v7-era schema (bulk tables without the v8 lease/part columns). + let conn = Connection::open_in_memory().unwrap(); + create_schema_v1(&conn).unwrap(); + let _ = get_schema_version(&conn).unwrap(); + migrate_v1_to_v2(&conn).unwrap(); + migrate_v2_to_v3(&conn).unwrap(); + migrate_v3_to_v4(&conn).unwrap(); + migrate_v4_to_v5(&conn).unwrap(); + migrate_v5_to_v6(&conn).unwrap(); + migrate_v6_to_v7(&conn).unwrap(); + set_schema_version(&conn, 7).unwrap(); + + // Seed a job and THREE output files for the same (job, file_type, + // resource_type) — all default part_index would collide. + conn.execute( + "INSERT INTO bulk_export_jobs + (id, tenant_id, status, level, request_json, transaction_time, created_at) + VALUES ('j1', 't1', 'complete', 'system', '{}', '2026-01-01T00:00:00Z', + '2026-01-01T00:00:00Z')", + [], + ) + .unwrap(); + for i in 0..3 { + conn.execute( + "INSERT INTO bulk_export_files + (job_id, resource_type, file_type, file_path, resource_count, byte_count) + VALUES ('j1', 'Patient', 'output', ?1, 10, 100)", + rusqlite::params![format!("/exports/j1/Patient-{i}.ndjson")], + ) + .unwrap(); + } + + // Run the v7 -> v8 migration. + migrate_v7_to_v8(&conn).unwrap(); + + // The backfill must have produced distinct 0-based part_index values + // per group, so the unique index built without a collision. + let mut stmt = conn + .prepare( + "SELECT part_index FROM bulk_export_files + WHERE job_id = 'j1' ORDER BY part_index", + ) + .unwrap(); + let part_indexes: Vec = stmt + .query_map([], |row| row.get(0)) + .unwrap() + .filter_map(|r| r.ok()) + .collect(); + assert_eq!(part_indexes, vec![0, 1, 2]); + + // The unique index exists. + let idx_count: i32 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master + WHERE type='index' AND name='idx_export_files_part'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(idx_count, 1); + + // Re-running the migration is a no-op (idempotent). + migrate_v7_to_v8(&conn).unwrap(); + + // New lease columns are present on bulk_export_jobs. + let has_worker_id: i32 = conn + .query_row( + "SELECT COUNT(*) FROM pragma_table_info('bulk_export_jobs') + WHERE name='worker_id'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(has_worker_id, 1); + } } diff --git a/crates/persistence/src/composite/storage.rs b/crates/persistence/src/composite/storage.rs index 44a417608..afdf1273a 100644 --- a/crates/persistence/src/composite/storage.rs +++ b/crates/persistence/src/composite/storage.rs @@ -46,7 +46,8 @@ use crate::core::history::HistoryParams; use crate::core::{ BundleEntry, BundleProvider, BundleResult, CapabilityProvider, ChainedSearchProvider, ConditionalCreateResult, ConditionalDeleteResult, ConditionalPatchResult, ConditionalStorage, - ConditionalUpdateResult, IncludeProvider, InstanceHistoryProvider, PatchFormat, + ConditionalUpdateResult, ExportDataProvider, ExportRequest, GroupExportProvider, + IncludeProvider, InstanceHistoryProvider, NdjsonBatch, PatchFormat, PatientExportProvider, ResourceStorage, RevincludeProvider, SearchProvider, SearchResult, StorageCapabilities, TerminologySearchProvider, TextSearchProvider, VersionedStorage, }; @@ -80,6 +81,9 @@ pub type DynInstanceHistoryProvider = Arc; +/// A dynamically typed group export provider (also covers Patient + System). +pub type DynGroupExportProvider = Arc; + /// Composite storage that coordinates multiple backends. /// /// This is the main entry point for polyglot persistence. It implements @@ -128,6 +132,9 @@ pub struct CompositeStorage { /// Primary as BundleProvider (if supported). bundle_provider: Option, + + /// Primary as GroupExportProvider (if supported) — covers all export levels. + export_provider: Option, } /// Health status for a backend. @@ -285,6 +292,7 @@ impl CompositeStorage { versioned_storage: None, history_provider: None, bundle_provider: None, + export_provider: None, }) } @@ -336,6 +344,7 @@ impl CompositeStorage { + VersionedStorage + InstanceHistoryProvider + BundleProvider + + GroupExportProvider + Send + Sync + 'static, @@ -343,7 +352,8 @@ impl CompositeStorage { self.conditional_storage = Some(primary.clone() as DynConditionalStorage); self.versioned_storage = Some(primary.clone() as DynVersionedStorage); self.history_provider = Some(primary.clone() as DynInstanceHistoryProvider); - self.bundle_provider = Some(primary as DynBundleProvider); + self.bundle_provider = Some(primary.clone() as DynBundleProvider); + self.export_provider = Some(primary as DynGroupExportProvider); self } @@ -2005,6 +2015,130 @@ impl CapabilityProvider for CompositeStorage { // resource_capabilities uses the default implementation that returns Option } +/// Returns an `UnsupportedCapability` error for export operations when the +/// primary backend does not implement the export provider traits. +fn export_unsupported() -> StorageError { + StorageError::Backend(BackendError::UnsupportedCapability { + backend_name: "composite".to_string(), + capability: "bulk-export".to_string(), + }) +} + +#[async_trait] +impl ExportDataProvider for CompositeStorage { + async fn list_export_types( + &self, + tenant: &TenantContext, + request: &ExportRequest, + ) -> StorageResult> { + match &self.export_provider { + Some(p) => p.list_export_types(tenant, request).await, + None => Err(export_unsupported()), + } + } + + async fn count_export_resources( + &self, + tenant: &TenantContext, + request: &ExportRequest, + resource_type: &str, + ) -> StorageResult { + match &self.export_provider { + Some(p) => { + p.count_export_resources(tenant, request, resource_type) + .await + } + None => Err(export_unsupported()), + } + } + + async fn fetch_export_batch( + &self, + tenant: &TenantContext, + request: &ExportRequest, + resource_type: &str, + cursor: Option<&str>, + batch_size: u32, + ) -> StorageResult { + match &self.export_provider { + Some(p) => { + p.fetch_export_batch(tenant, request, resource_type, cursor, batch_size) + .await + } + None => Err(export_unsupported()), + } + } +} + +#[async_trait] +impl PatientExportProvider for CompositeStorage { + async fn list_patient_ids( + &self, + tenant: &TenantContext, + request: &ExportRequest, + cursor: Option<&str>, + batch_size: u32, + ) -> StorageResult<(Vec, Option)> { + match &self.export_provider { + Some(p) => { + p.list_patient_ids(tenant, request, cursor, batch_size) + .await + } + None => Err(export_unsupported()), + } + } + + async fn fetch_patient_compartment_batch( + &self, + tenant: &TenantContext, + request: &ExportRequest, + resource_type: &str, + patient_ids: &[String], + cursor: Option<&str>, + batch_size: u32, + ) -> StorageResult { + match &self.export_provider { + Some(p) => { + p.fetch_patient_compartment_batch( + tenant, + request, + resource_type, + patient_ids, + cursor, + batch_size, + ) + .await + } + None => Err(export_unsupported()), + } + } +} + +#[async_trait] +impl GroupExportProvider for CompositeStorage { + async fn get_group_members( + &self, + tenant: &TenantContext, + group_id: &str, + ) -> StorageResult> { + match &self.export_provider { + Some(p) => p.get_group_members(tenant, group_id).await, + None => Err(export_unsupported()), + } + } + + async fn resolve_group_patient_ids( + &self, + tenant: &TenantContext, + group_id: &str, + ) -> StorageResult> { + match &self.export_provider { + Some(p) => p.resolve_group_patient_ids(tenant, group_id).await, + None => Err(export_unsupported()), + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/persistence/src/core/bulk_export.rs b/crates/persistence/src/core/bulk_export.rs index 432f0d396..91e874515 100644 --- a/crates/persistence/src/core/bulk_export.rs +++ b/crates/persistence/src/core/bulk_export.rs @@ -315,14 +315,34 @@ pub struct ExportRequest { #[serde(default)] pub resource_types: Vec, - /// Only include resources modified since this time. + /// Only include resources modified at or after this time (`_since`). #[serde(skip_serializing_if = "Option::is_none")] pub since: Option>, + /// Only include resources modified at or before this time (`_until`). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub until: Option>, + /// Type-specific filters to apply during export. #[serde(default)] pub type_filters: Vec, + /// Element paths to include (`_elements`). When non-empty, exported + /// resources are subset to these paths plus mandatory elements and tagged + /// `SUBSETTED`. + #[serde(default)] + pub elements: Vec, + + /// `includeAssociatedData` hint values. Parsed but currently a no-op + /// (rejected under `Prefer: handling=strict`, ignored otherwise). + #[serde(default)] + pub include_associated_data: Vec, + + /// Patient references restricting the export (POST `patient` parameter). + /// Only valid for patient- and group-level exports. + #[serde(default)] + pub patient_refs: Vec, + /// Batch size for processing (implementation-specific). #[serde(default = "default_batch_size")] pub batch_size: u32, @@ -347,7 +367,11 @@ impl ExportRequest { level, resource_types: Vec::new(), since: None, + until: None, type_filters: Vec::new(), + elements: Vec::new(), + include_associated_data: Vec::new(), + patient_refs: Vec::new(), batch_size: default_batch_size(), output_format: default_output_format(), } @@ -382,6 +406,24 @@ impl ExportRequest { self } + /// Sets the until filter. + pub fn with_until(mut self, until: DateTime) -> Self { + self.until = Some(until); + self + } + + /// Sets the `_elements` element paths. + pub fn with_elements(mut self, elements: Vec) -> Self { + self.elements = elements; + self + } + + /// Sets the patient references (POST `patient` filter). + pub fn with_patient_refs(mut self, patient_refs: Vec) -> Self { + self.patient_refs = patient_refs; + self + } + /// Adds a type filter. pub fn with_type_filter(mut self, filter: TypeFilter) -> Self { self.type_filters.push(filter); @@ -571,6 +613,12 @@ pub struct ExportManifest { /// Output files containing OperationOutcome resources for errors. #[serde(default)] pub error: Vec, + /// Files containing deleted resource references (always empty for now). + #[serde(default)] + pub deleted: Vec, + /// Pagination links for partial manifests (always empty — `allowPartialManifests` unsupported). + #[serde(default)] + pub link: Vec, /// Informational messages. #[serde(default, skip_serializing_if = "Option::is_none")] pub message: Option, @@ -588,6 +636,8 @@ impl ExportManifest { requires_access_token: true, output: Vec::new(), error: Vec::new(), + deleted: Vec::new(), + link: Vec::new(), message: None, extension: None, } @@ -666,6 +716,106 @@ impl NdjsonBatch { } } +/// Kickoff metadata for starting an export job. +/// +/// Bundles the [`ExportRequest`] (what to export) with the server-frozen +/// metadata captured once at kickoff time: `transaction_time`, the original +/// request URL, the owning principal's subject, and the FHIR version. These +/// are the single source of truth — the worker only ever reads them back. +#[derive(Debug, Clone)] +pub struct StartExportInput { + /// What to export. + pub request: ExportRequest, + /// Server wall-clock frozen at kickoff (the manifest `transactionTime`). + pub transaction_time: DateTime, + /// The full kickoff request URL (echoed in the manifest `request` field). + pub request_url: String, + /// The subject of the authenticated principal that kicked off the export. + pub owner_subject: Option, + /// The FHIR version the export runs against. + pub fhir_version: helios_fhir::FhirVersion, +} + +/// A single entry in a [`RawExportManifest`] — carries a storage key, never a URL. +#[derive(Debug, Clone)] +pub struct RawManifestEntry { + /// The resource type contained in this part. + pub resource_type: String, + /// The output-store key for this part (URL minting happens in the REST layer). + pub key: crate::core::bulk_export_output::ExportPartKey, + /// Number of resources in the part. + pub count: u64, +} + +/// The storage-side view of a completed export's manifest. +/// +/// Carries keys rather than URLs — the REST layer mints download URLs via the +/// [`ExportOutputStore`](crate::core::bulk_export_output::ExportOutputStore) +/// and assembles the wire-format [`ExportManifest`]. +#[derive(Debug, Clone)] +pub struct RawExportManifest { + /// Server wall-clock frozen at kickoff. + pub transaction_time: DateTime, + /// The original kickoff request URL. + pub request_url: String, + /// Current job status. + pub status: ExportStatus, + /// Error message if the job failed. + pub error_message: Option, + /// Time the job completed. + pub completed_at: Option>, + /// Output parts (`file_type = "output"`). + pub output: Vec, + /// Error parts (`file_type = "error"`). + pub errors: Vec, +} + +/// Lightweight job metadata for authorization checks. +/// +/// Returned by `get_export_job_metadata` — a single cheap row lookup the REST +/// status/cancel handlers call *before* any heavier status/manifest query. +#[derive(Debug, Clone)] +pub struct ExportJobMetadata { + /// The job ID. + pub job_id: ExportJobId, + /// Current status. + pub status: ExportStatus, + /// The export level. + pub level: ExportLevel, + /// Subject of the principal that owns the job. + pub owner_subject: Option, + /// Server wall-clock frozen at kickoff. + pub transaction_time: DateTime, + /// Time the job completed. + pub completed_at: Option>, + /// The original kickoff request URL. + pub request_url: String, +} + +/// Metadata for a single output/error file, for the download handler. +#[derive(Debug, Clone)] +pub struct ExportFileMetadata { + /// The output-store key for this part. + pub key: crate::core::bulk_export_output::ExportPartKey, + /// The resource type contained in the file. + pub resource_type: String, + /// `"output"` or `"error"`. + pub file_type: String, + /// Number of resources (lines) in the file. + pub line_count: u64, + /// Subject of the principal that owns the job. + pub job_owner_subject: Option, +} + +/// A reference to an expired export job, for the cleanup task. +#[derive(Debug, Clone)] +pub struct ExpiredExportRef { + /// The tenant the job belongs to. + pub tenant: TenantContext, + /// The expired job ID. + pub job_id: ExportJobId, +} + // ============================================================================ // Traits // ============================================================================ @@ -681,7 +831,8 @@ pub trait BulkExportStorage: Send + Sync { /// # Arguments /// /// * `tenant` - The tenant context - /// * `request` - The export request parameters + /// * `input` - The kickoff metadata (request + frozen `transaction_time`, + /// `request_url`, `owner_subject`, `fhir_version`) /// /// # Returns /// @@ -694,7 +845,7 @@ pub trait BulkExportStorage: Send + Sync { async fn start_export( &self, tenant: &TenantContext, - request: ExportRequest, + input: StartExportInput, ) -> StorageResult; /// Gets the current status of an export job. @@ -750,26 +901,20 @@ pub trait BulkExportStorage: Send + Sync { job_id: &ExportJobId, ) -> StorageResult<()>; - /// Gets the manifest for a completed export. + /// Gets the storage-side manifest for a completed export. /// - /// # Arguments - /// - /// * `tenant` - The tenant context - /// * `job_id` - The export job ID - /// - /// # Returns - /// - /// The export manifest with output file information. + /// Returns a [`RawExportManifest`] carrying output-store *keys* — the REST + /// layer mints download URLs and assembles the wire-format + /// [`ExportManifest`]. /// /// # Errors /// /// * `BulkExportError::JobNotFound` - If the job doesn't exist - /// * `BulkExportError::InvalidJobState` - If the job is not complete async fn get_export_manifest( &self, tenant: &TenantContext, job_id: &ExportJobId, - ) -> StorageResult; + ) -> StorageResult; /// Lists export jobs for a tenant. /// @@ -786,6 +931,47 @@ pub trait BulkExportStorage: Send + Sync { tenant: &TenantContext, include_completed: bool, ) -> StorageResult>; + + /// Returns lightweight job metadata for an authorization check. + /// + /// Called by the REST status/cancel handlers *before* any heavier query. + /// + /// # Errors + /// + /// * `BulkExportError::JobNotFound` - If the job doesn't exist + async fn get_export_job_metadata( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult; + + /// Returns file metadata for a single output/error part, for the download + /// handler. `part` is the `{resource_type}-{part_index}` route segment. + /// + /// # Errors + /// + /// * `BulkExportError::JobNotFound` - If the job or part doesn't exist + async fn get_export_file_metadata( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + part: &str, + ) -> StorageResult; + + /// Counts active (`accepted` or `in_progress`) jobs for a tenant — used to + /// enforce the per-tenant concurrency cap at kickoff. + async fn count_active_exports(&self, tenant: &TenantContext) -> StorageResult; + + /// Lists expired completed jobs across *all* tenants, for the cleanup task. + /// + /// This is intentionally cross-tenant — the cleanup task is a server-wide + /// background job, so this is the one method that does not take a tenant. + async fn list_expired_exports( + &self, + now: DateTime, + output_ttl: std::time::Duration, + limit: u32, + ) -> StorageResult>; } /// Data provider for export operations. @@ -946,6 +1132,22 @@ pub trait GroupExportProvider: PatientExportProvider { tenant: &TenantContext, group_id: &str, ) -> StorageResult>; + + /// Returns each member's reference together with its `Group.member.period.start`. + /// + /// The default implementation falls back to [`get_group_members`] and + /// returns `None` for every period start (loses the membership-history + /// signal the `_since`-newly-added filter relies on). Backends that can + /// inspect the raw Group resource override this to return real period + /// starts. + async fn get_group_members_with_periods( + &self, + tenant: &TenantContext, + group_id: &str, + ) -> StorageResult>)>> { + let members = self.get_group_members(tenant, group_id).await?; + Ok(members.into_iter().map(|m| (m, None)).collect()) + } } #[cfg(test)] diff --git a/crates/persistence/src/core/bulk_export_output.rs b/crates/persistence/src/core/bulk_export_output.rs new file mode 100644 index 000000000..da5f8437a --- /dev/null +++ b/crates/persistence/src/core/bulk_export_output.rs @@ -0,0 +1,188 @@ +//! Output storage for bulk export NDJSON files. +//! +//! The [`ExportOutputStore`] trait decouples *where the exported bytes go* +//! (local filesystem, S3, …) from the job-state backend. The job-state +//! backend stores keys; the output store turns keys into bytes and URLs. + +use std::time::Duration; + +use async_trait::async_trait; +use tokio::io::{AsyncRead, AsyncWrite}; + +use crate::core::bulk_export::ExportJobId; +use crate::error::StorageResult; +use crate::tenant::TenantContext; + +/// A stable identifier for a single output part. +/// +/// The `fencing_token` is embedded so a zombie worker (one that lost its +/// lease) writes to a *different* key than the live worker holding the +/// reclaimed job, preventing output corruption. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ExportPartKey { + /// The tenant the job belongs to. + pub tenant_id: String, + /// The export job this part belongs to. + pub job_id: ExportJobId, + /// The FHIR resource type contained in the part. + pub resource_type: String, + /// `"output"` or `"error"`. + pub file_type: String, + /// The zero-based part index within `(job, file_type, resource_type)`. + pub part_index: u32, + /// The fencing token of the worker that produced the part. + pub fencing_token: u64, +} + +impl ExportPartKey { + /// Creates a new output part key (`file_type = "output"`). + pub fn output( + tenant_id: impl Into, + job_id: ExportJobId, + resource_type: impl Into, + part_index: u32, + fencing_token: u64, + ) -> Self { + Self { + tenant_id: tenant_id.into(), + job_id, + resource_type: resource_type.into(), + file_type: "output".to_string(), + part_index, + fencing_token, + } + } + + /// Creates a new error part key (`file_type = "error"`). + pub fn error( + tenant_id: impl Into, + job_id: ExportJobId, + resource_type: impl Into, + part_index: u32, + fencing_token: u64, + ) -> Self { + Self { + tenant_id: tenant_id.into(), + job_id, + resource_type: resource_type.into(), + file_type: "error".to_string(), + part_index, + fencing_token, + } + } + + /// The `{resource_type}-{part_index}` segment used in download routes. + pub fn part_segment(&self) -> String { + format!("{}-{}", self.resource_type, self.part_index) + } +} + +/// An open writer for a single export output part. +/// +/// Wraps a boxed async writer plus a line counter. Callers push NDJSON lines +/// with [`write_line`](ExportPartWriter::write_line) and then hand the writer +/// to [`ExportOutputStore::finalize_part`]. +pub struct ExportPartWriter { + /// The underlying async byte sink. + pub writer: std::pin::Pin>, + /// Number of lines written so far. + pub line_count: u64, + /// Number of bytes written so far. + pub byte_count: u64, +} + +impl ExportPartWriter { + /// Creates a new part writer over the given async sink. + pub fn new(writer: std::pin::Pin>) -> Self { + Self { + writer, + line_count: 0, + byte_count: 0, + } + } + + /// Writes one NDJSON line (a trailing newline is appended). + pub async fn write_line(&mut self, line: &str) -> std::io::Result<()> { + use tokio::io::AsyncWriteExt; + self.writer.write_all(line.as_bytes()).await?; + self.writer.write_all(b"\n").await?; + self.line_count += 1; + self.byte_count += line.len() as u64 + 1; + Ok(()) + } + + /// Flushes the underlying writer. + pub async fn flush(&mut self) -> std::io::Result<()> { + use tokio::io::AsyncWriteExt; + self.writer.flush().await + } +} + +impl std::fmt::Debug for ExportPartWriter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ExportPartWriter") + .field("line_count", &self.line_count) + .field("byte_count", &self.byte_count) + .finish() + } +} + +/// A finalized, immutable output part as it will appear in the manifest. +#[derive(Debug, Clone)] +pub struct FinalizedPart { + /// The part's stable key. + pub key: ExportPartKey, + /// The resource type contained in the part. + pub resource_type: String, + /// Number of resources (lines) in the part. + pub line_count: u64, + /// Total byte size of the part. + pub size_bytes: u64, +} + +/// A download URL plus the access posture the manifest should advertise. +#[derive(Debug, Clone)] +pub struct DownloadUrl { + /// The URL the client should fetch. + pub url: String, + /// `true` if the URL requires the kickoff Bearer token (HFS-served); + /// `false` if it is pre-signed and the client must NOT send a token. + pub requires_access_token: bool, +} + +/// Pluggable backend for bulk export output files. +/// +/// Implementations decide where NDJSON output physically lives (local FS, S3, +/// …) and how download URLs are minted. The job-state backend is unaware of +/// this — it stores keys; the output store turns keys into bytes and URLs. +#[async_trait] +pub trait ExportOutputStore: Send + Sync { + /// Opens an async writer for a new (or re-finalized) output part. + async fn open_writer(&self, key: &ExportPartKey) -> StorageResult; + + /// Marks a part as finalized and immutable. + /// + /// For object stores this completes the multipart upload; for the local + /// filesystem this fsyncs and renames `.tmp` → final. + async fn finalize_part( + &self, + key: &ExportPartKey, + writer: ExportPartWriter, + ) -> StorageResult; + + /// Produces a download URL for a finalized part. + async fn download_url(&self, key: &ExportPartKey, ttl: Duration) -> StorageResult; + + /// Opens an async reader over a finalized part (HFS-served download path). + async fn open_reader( + &self, + key: &ExportPartKey, + ) -> StorageResult>>; + + /// Deletes all output parts for a job. Idempotent — a missing job is `Ok`. + async fn delete_job_outputs( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult<()>; +} diff --git a/crates/persistence/src/core/bulk_export_worker.rs b/crates/persistence/src/core/bulk_export_worker.rs new file mode 100644 index 000000000..eb69cf62c --- /dev/null +++ b/crates/persistence/src/core/bulk_export_worker.rs @@ -0,0 +1,662 @@ +//! Worker-facing traits for bulk export job execution. +//! +//! These traits are *not* part of the REST-facing [`BulkExportStorage`] surface +//! — they are what the export worker uses to claim jobs and persist progress +//! under a heartbeated, fencing-token-guarded lease. +//! +//! [`BulkExportStorage`]: crate::core::bulk_export::BulkExportStorage + +use std::sync::Arc; +use std::time::Duration; + +use async_trait::async_trait; +use chrono::{DateTime, Utc}; + +use crate::core::bulk_export::{ + BulkExportStorage, ExportDataProvider, ExportJobId, ExportLevel, ExportRequest, ExportStatus, + GroupExportProvider, PatientExportProvider, TypeExportProgress, +}; +use crate::core::bulk_export_output::{ExportOutputStore, ExportPartKey, FinalizedPart}; +use crate::error::{StorageError, StorageResult}; +use crate::tenant::TenantContext; + +/// Identifier for an export worker instance. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct WorkerId(String); + +impl WorkerId { + /// Creates a worker ID from a string. + pub fn new(id: impl Into) -> Self { + Self(id.into()) + } + + /// Generates a fresh random worker ID. + pub fn random() -> Self { + Self(uuid::Uuid::new_v4().to_string()) + } + + /// Returns the ID as a string slice. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl std::fmt::Display for WorkerId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +/// A lease over a single export job, held by exactly one worker at a time. +/// +/// Leases expire; if the holding worker does not heartbeat before +/// `lease_expiry`, the lease is reclaimable. The `fencing_token` is bumped on +/// every claim so a zombie worker cannot mutate a job another worker now owns. +#[derive(Debug, Clone)] +pub struct ExportJobLease { + /// The leased job. + pub job_id: ExportJobId, + /// The tenant the job belongs to. + pub tenant: TenantContext, + /// The worker holding the lease. + pub worker_id: WorkerId, + /// When the lease expires if not renewed. + pub lease_expiry: DateTime, + /// Monotonically increasing token, bumped on every claim. + pub fencing_token: u64, +} + +/// Error returned by fenced worker-storage operations. +#[derive(Debug)] +pub enum LeaseError { + /// The lease was lost — another worker reclaimed the job. The caller MUST + /// stop writing immediately. + LeaseLost { + /// The job whose lease was lost. + job_id: ExportJobId, + }, + /// An underlying storage error. + Storage(StorageError), +} + +impl std::fmt::Display for LeaseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::LeaseLost { job_id } => { + write!( + f, + "export job {job_id} lease lost (reclaimed by another worker)" + ) + } + Self::Storage(e) => write!(f, "storage error: {e}"), + } + } +} + +impl std::error::Error for LeaseError {} + +impl From for LeaseError { + fn from(e: StorageError) -> Self { + Self::Storage(e) + } +} + +/// The worker's view of a claimed job: everything needed to (re)run it. +#[derive(Debug, Clone)] +pub struct WorkerJobView { + /// The original export request. + pub request: ExportRequest, + /// The export level. + pub level: ExportLevel, + /// Server wall-clock frozen at kickoff. + pub transaction_time: DateTime, + /// The FHIR version the export runs against. + pub fhir_version: helios_fhir::FhirVersion, + /// Already-persisted per-type progress, for resuming after a crash. + pub type_progress: Vec, +} + +/// Strategy for atomically claiming the next available export job. +/// +/// Each backend reaches for its native primitive — `SELECT … FOR UPDATE SKIP +/// LOCKED` on Postgres, a process-local mutex on SQLite. +#[async_trait] +pub trait ExportClaimStrategy: Send + Sync { + /// Atomically transitions one eligible job (`accepted`, or `in_progress` + /// with an expired lease) to held-by-this-worker, bumping the fencing + /// token. Returns `Ok(None)` when no job is available. + async fn claim_next( + &self, + worker_id: &WorkerId, + lease_duration: Duration, + ) -> StorageResult>; + + /// Renews a lease the worker still holds; returns the new expiry, or + /// `LeaseError::LeaseLost` if the job was reclaimed. + async fn heartbeat(&self, lease: &ExportJobLease) -> Result, LeaseError>; + + /// Releases a lease early (graceful shutdown). Best-effort. + async fn release(&self, lease: ExportJobLease) -> StorageResult<()>; +} + +/// Worker-owned mutations of job state. +/// +/// **Every method is fenced** by `worker_id` + `fencing_token`: a guarded +/// mutation affecting zero rows returns `LeaseError::LeaseLost`, so a zombie +/// worker cannot corrupt progress, file rows, or terminal status after its +/// job has been reclaimed. +#[async_trait] +pub trait ExportWorkerStorage: Send + Sync { + /// Loads the claimed job's request, level, frozen metadata and persisted + /// per-type progress (for resume). Fenced. + async fn get_export_job_for_worker( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result; + + /// Marks the job `in_progress` (sets `started_at` if unset). Fenced. + async fn mark_export_in_progress( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result<(), LeaseError>; + + /// Idempotent upsert of per-type progress (cursor + counts). Fenced. + async fn update_export_type_progress( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + progress: &TypeExportProgress, + ) -> Result<(), LeaseError>; + + /// Idempotent upsert of a finalized output/error file row. Fenced. + async fn record_export_file( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + part: &FinalizedPart, + file_type: &str, + ) -> Result<(), LeaseError>; + + /// Marks the job `complete` (sets `completed_at`). Fenced. + async fn finish_export_job( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result<(), LeaseError>; + + /// Marks the job `error` with a message (sets `completed_at`). Fenced. + async fn fail_export_job( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + error_message: &str, + ) -> Result<(), LeaseError>; +} + +/// Marker trait composing the three job-state surfaces a worker needs. +/// +/// Only the SQLite and Postgres backends implement this; it is held as an +/// `Arc` and selected at bootstrap by +/// `HFS_BULK_EXPORT_BACKEND`. +pub trait BulkExportJobStore: + BulkExportStorage + ExportWorkerStorage + ExportClaimStrategy +{ +} + +impl BulkExportJobStore for T where + T: BulkExportStorage + ExportWorkerStorage + ExportClaimStrategy +{ +} + +/// Marker trait for a resource-store that can feed every export level. +pub trait ExportResourceProvider: + ExportDataProvider + PatientExportProvider + GroupExportProvider +{ +} + +impl ExportResourceProvider for T where + T: ExportDataProvider + PatientExportProvider + GroupExportProvider +{ +} + +/// The default in-process export worker. +/// +/// Binds a [`BulkExportJobStore`] (job state + claim + worker storage), an +/// [`ExportResourceProvider`] (the resource store), and an +/// [`ExportOutputStore`] (where NDJSON bytes go), and drives a claimed job to +/// completion under its lease. +pub struct DefaultExportWorker { + /// Job-state store (claim, worker storage, lifecycle). + pub jobs: Arc, + /// Resource data provider. + pub data: Arc, + /// Output store for NDJSON parts. + pub output: Arc, + /// This worker's identifier. + pub worker_id: WorkerId, + /// Group-export `_since` toggle: when `true`, exclude resources from + /// before `_since` for patients added to the Group after `_since` + /// (using `Group.member.period.start`). + pub exclude_since_newly_added: bool, +} + +impl DefaultExportWorker +where + Js: BulkExportJobStore + ?Sized, + Dp: ExportResourceProvider + ?Sized, + Os: ExportOutputStore + ?Sized, +{ + /// Creates a new worker (defaults to `since_newly_added=include`). + pub fn new(jobs: Arc, data: Arc, output: Arc, worker_id: WorkerId) -> Self { + Self { + jobs, + data, + output, + worker_id, + exclude_since_newly_added: false, + } + } + + /// Sets the `since_newly_added=exclude` toggle for Group exports. + pub fn with_exclude_since_newly_added(mut self, exclude: bool) -> Self { + self.exclude_since_newly_added = exclude; + self + } + + /// Runs the export job described by `lease` to completion. + /// + /// Every job-state mutation is fenced by `lease.worker_id` + + /// `lease.fencing_token`; any `LeaseError::LeaseLost` aborts the run + /// silently (the worker that reclaimed the job now owns it). + pub async fn run_job(&self, lease: ExportJobLease) -> StorageResult<()> { + match self.run_job_inner(&lease).await { + Ok(()) => Ok(()), + Err(LeaseError::LeaseLost { .. }) => { + // Another worker owns the job now — stop silently. + Ok(()) + } + Err(LeaseError::Storage(e)) => { + // Best-effort: mark the job failed (also fenced). + let _ = self + .jobs + .fail_export_job( + &lease.tenant, + &lease.job_id, + &lease.worker_id, + lease.fencing_token, + &e.to_string(), + ) + .await; + Err(e) + } + } + } + + async fn run_job_inner(&self, lease: &ExportJobLease) -> Result<(), LeaseError> { + let tenant = &lease.tenant; + let job_id = &lease.job_id; + let wid = &lease.worker_id; + let token = lease.fencing_token; + + let view = self + .jobs + .get_export_job_for_worker(tenant, job_id, wid, token) + .await?; + self.jobs + .mark_export_in_progress(tenant, job_id, wid, token) + .await?; + + let request = &view.request; + + // Resolve the resource types to export. + let types = self + .data + .list_export_types(tenant, request) + .await + .map_err(LeaseError::Storage)?; + + // For Group exports, resolve the member patient IDs once. + // When `exclude_since_newly_added` is set AND `_since` is provided, + // filter out patients whose `Group.member.period.start` is *after* + // `_since` (i.e., they joined the cohort after the client's last + // sync) — the IG-recommended behavior under the `exclude` toggle. + let group_patient_ids: Option> = match &view.level { + ExportLevel::Group { group_id } => { + let ids = match (self.exclude_since_newly_added, view.request.since.as_ref()) { + (true, Some(since)) => { + let members = self + .data + .get_group_members_with_periods(tenant, group_id) + .await + .map_err(LeaseError::Storage)?; + members + .into_iter() + .filter_map(|(reference, period_start)| { + let pid = reference.strip_prefix("Patient/")?; + // Keep members whose period.start is unknown OR + // <= since (i.e., were already members at since). + match period_start { + Some(start) if start > *since => None, + _ => Some(pid.to_string()), + } + }) + .collect() + } + _ => self + .data + .resolve_group_patient_ids(tenant, group_id) + .await + .map_err(LeaseError::Storage)?, + }; + Some(ids) + } + _ => None, + }; + + let batch_size = request.batch_size.max(1); + + for resource_type in &types { + // Resume from any persisted cursor for this type. + let mut cursor: Option = view + .type_progress + .iter() + .find(|p| &p.resource_type == resource_type) + .and_then(|p| p.cursor_state.clone()); + let mut exported: u64 = view + .type_progress + .iter() + .find(|p| &p.resource_type == resource_type) + .map(|p| p.exported_count) + .unwrap_or(0); + let mut part_index: u32 = 0; + + loop { + // Cooperative cancellation check. + if let Ok(progress) = self.jobs.get_export_status(tenant, job_id).await { + if progress.status == ExportStatus::Cancelled { + return Ok(()); + } + } + + let batch = match &group_patient_ids { + Some(pids) => self + .data + .fetch_patient_compartment_batch( + tenant, + request, + resource_type, + pids, + cursor.as_deref(), + batch_size, + ) + .await + .map_err(LeaseError::Storage)?, + None if matches!(view.level, ExportLevel::Patient) + && !request.patient_refs.is_empty() => + { + // Patient-level with specific patient filter: scope to + // exactly the requested patients' compartments. + let patient_ids: Vec = request + .patient_refs + .iter() + .map(|r| r.strip_prefix("Patient/").unwrap_or(r).to_string()) + .collect(); + self.data + .fetch_patient_compartment_batch( + tenant, + request, + resource_type, + &patient_ids, + cursor.as_deref(), + batch_size, + ) + .await + .map_err(LeaseError::Storage)? + } + None if matches!(view.level, ExportLevel::Patient) => { + // Patient-level without a patient filter: export all + // resources of this type across the patient compartment. + self.data + .fetch_export_batch( + tenant, + request, + resource_type, + cursor.as_deref(), + batch_size, + ) + .await + .map_err(LeaseError::Storage)? + } + None => self + .data + .fetch_export_batch( + tenant, + request, + resource_type, + cursor.as_deref(), + batch_size, + ) + .await + .map_err(LeaseError::Storage)?, + }; + + if !batch.lines.is_empty() { + let key = ExportPartKey::output( + tenant.tenant_id().as_str(), + job_id.clone(), + resource_type.clone(), + part_index, + token, + ); + let mut writer = self + .output + .open_writer(&key) + .await + .map_err(LeaseError::Storage)?; + for line in &batch.lines { + let out_line = apply_elements(line, &request.elements); + writer.write_line(&out_line).await.map_err(|e| { + LeaseError::Storage(StorageError::Backend( + crate::error::BackendError::Internal { + backend_name: "export-worker".to_string(), + message: format!("write_line: {e}"), + source: None, + }, + )) + })?; + } + let finalized = self + .output + .finalize_part(&key, writer) + .await + .map_err(LeaseError::Storage)?; + exported += finalized.line_count; + self.jobs + .record_export_file(tenant, job_id, wid, token, &finalized, "output") + .await?; + part_index += 1; + } + + cursor = batch.next_cursor.clone(); + + // Persist progress + heartbeat after each batch. + let mut progress = TypeExportProgress::new(resource_type.clone()); + progress.exported_count = exported; + progress.cursor_state = cursor.clone(); + self.jobs + .update_export_type_progress(tenant, job_id, wid, token, &progress) + .await?; + self.jobs.heartbeat(lease).await?; + + if batch.is_last { + break; + } + } + } + + self.jobs + .finish_export_job(tenant, job_id, wid, token) + .await?; + Ok(()) + } +} + +/// Applies `_elements` projection to an NDJSON line. +/// +/// When `elements` is non-empty, keeps `resourceType`, `id`, `meta` and the +/// listed top-level element names, and adds a `SUBSETTED` `meta.tag`. On any +/// parse failure the original line is returned unchanged. +fn apply_elements(line: &str, elements: &[String]) -> String { + if elements.is_empty() { + return line.to_string(); + } + let Ok(serde_json::Value::Object(obj)) = serde_json::from_str::(line) else { + return line.to_string(); + }; + let mut out = serde_json::Map::new(); + // Always-included mandatory elements. + for key in ["resourceType", "id"] { + if let Some(v) = obj.get(key) { + out.insert(key.to_string(), v.clone()); + } + } + // Requested top-level elements (strip a leading `ResourceType.` prefix). + for el in elements { + let name = el.rsplit('.').next().unwrap_or(el.as_str()); + if let Some(v) = obj.get(name) { + out.insert(name.to_string(), v.clone()); + } + } + // meta + SUBSETTED tag. + let mut meta = obj + .get("meta") + .and_then(|m| m.as_object().cloned()) + .unwrap_or_default(); + let tag = serde_json::json!({ + "system": "http://terminology.hl7.org/CodeSystem/v3-ObservationValue", + "code": "SUBSETTED", + }); + let tags = meta + .entry("tag".to_string()) + .or_insert_with(|| serde_json::Value::Array(Vec::new())); + if let serde_json::Value::Array(arr) = tags { + arr.push(tag); + } + out.insert("meta".to_string(), serde_json::Value::Object(meta)); + serde_json::to_string(&serde_json::Value::Object(out)).unwrap_or_else(|_| line.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_apply_elements_noop_when_empty() { + let line = r#"{"resourceType":"Patient","id":"1","name":[]}"#; + assert_eq!(apply_elements(line, &[]), line); + } + + #[test] + fn test_apply_elements_subsets_and_tags() { + let line = r#"{"resourceType":"Patient","id":"1","name":[{"family":"X"}],"gender":"male"}"#; + let out = apply_elements(line, &["name".to_string()]); + let v: serde_json::Value = serde_json::from_str(&out).unwrap(); + assert_eq!(v["resourceType"], "Patient"); + assert_eq!(v["id"], "1"); + assert!(v.get("name").is_some()); + assert!(v.get("gender").is_none()); + assert_eq!(v["meta"]["tag"][0]["code"], "SUBSETTED"); + } + + #[cfg(feature = "sqlite")] + mod worker_integration { + use super::*; + use crate::backends::local_fs::LocalFsOutputStore; + use crate::backends::sqlite::SqliteBackend; + use crate::core::ResourceStorage; + use crate::core::bulk_export::{ExportRequest, StartExportInput}; + use crate::tenant::{TenantContext, TenantId, TenantPermissions}; + use chrono::Utc; + use std::sync::Arc; + + fn tenant() -> TenantContext { + TenantContext::new(TenantId::new("t1"), TenantPermissions::full_access()) + } + + #[tokio::test] + async fn test_run_job_system_export_end_to_end() { + let backend = Arc::new(SqliteBackend::in_memory().unwrap()); + backend.init_schema().unwrap(); + let tenant = tenant(); + + for i in 0..3 { + backend + .create( + &tenant, + "Patient", + serde_json::json!({"resourceType": "Patient", "id": format!("p{i}")}), + helios_fhir::FhirVersion::default(), + ) + .await + .unwrap(); + } + + let tmp = tempfile::tempdir().unwrap(); + let output = Arc::new(LocalFsOutputStore::new(tmp.path(), "http://localhost:8080")); + + let job_id = backend + .start_export( + &tenant, + StartExportInput { + request: ExportRequest::system() + .with_types(vec!["Patient".to_string()]) + .with_batch_size(2), + transaction_time: Utc::now(), + request_url: "http://localhost/$export".to_string(), + owner_subject: Some("sub".to_string()), + fhir_version: helios_fhir::FhirVersion::default(), + }, + ) + .await + .unwrap(); + + let worker_id = WorkerId::new("w1"); + let worker = DefaultExportWorker::new( + Arc::clone(&backend), + Arc::clone(&backend), + Arc::clone(&output), + worker_id.clone(), + ); + + let lease = backend + .claim_next(&worker_id, Duration::from_secs(60)) + .await + .unwrap() + .expect("job claimable"); + assert_eq!(lease.job_id, job_id); + + worker.run_job(lease).await.unwrap(); + + let progress = backend.get_export_status(&tenant, &job_id).await.unwrap(); + assert_eq!(progress.status, ExportStatus::Complete); + + let manifest = backend.get_export_manifest(&tenant, &job_id).await.unwrap(); + let total: u64 = manifest.output.iter().map(|e| e.count).sum(); + assert_eq!(total, 3); + } + } +} diff --git a/crates/persistence/src/core/mod.rs b/crates/persistence/src/core/mod.rs index 264281a74..227146407 100644 --- a/crates/persistence/src/core/mod.rs +++ b/crates/persistence/src/core/mod.rs @@ -91,6 +91,8 @@ pub mod backend; pub mod bulk_export; +pub mod bulk_export_output; +pub mod bulk_export_worker; pub mod bulk_submit; pub mod capabilities; pub mod history; @@ -102,9 +104,17 @@ pub mod versioned; // Re-export main types pub use backend::{Backend, BackendCapability, BackendConfig, BackendKind, BackendPoolStats}; pub use bulk_export::{ - BulkExportStorage, ExportDataProvider, ExportJobId, ExportLevel, ExportManifest, - ExportOutputFile, ExportProgress, ExportRequest, ExportStatus, GroupExportProvider, - NdjsonBatch, PatientExportProvider, TypeExportProgress, TypeFilter, + BulkExportStorage, ExpiredExportRef, ExportDataProvider, ExportFileMetadata, ExportJobId, + ExportJobMetadata, ExportLevel, ExportManifest, ExportOutputFile, ExportProgress, + ExportRequest, ExportStatus, GroupExportProvider, NdjsonBatch, PatientExportProvider, + RawExportManifest, RawManifestEntry, StartExportInput, TypeExportProgress, TypeFilter, +}; +pub use bulk_export_output::{ + DownloadUrl, ExportOutputStore, ExportPartKey, ExportPartWriter, FinalizedPart, +}; +pub use bulk_export_worker::{ + BulkExportJobStore, DefaultExportWorker, ExportClaimStrategy, ExportJobLease, + ExportResourceProvider, ExportWorkerStorage, LeaseError, WorkerId, WorkerJobView, }; pub use bulk_submit::{ BulkEntryOutcome, BulkEntryResult, BulkProcessingOptions, BulkSubmitProvider, diff --git a/crates/persistence/src/error.rs b/crates/persistence/src/error.rs index 3f96d16b3..fc55dd0be 100644 --- a/crates/persistence/src/error.rs +++ b/crates/persistence/src/error.rs @@ -392,6 +392,10 @@ pub enum BulkExportError { /// Too many concurrent exports. #[error("too many concurrent exports (maximum: {max_concurrent})")] TooManyConcurrentExports { max_concurrent: u32 }, + + /// The worker lease for this job was lost (reclaimed by another worker). + #[error("export job {job_id} lease lost (reclaimed by another worker)")] + LeaseLost { job_id: String }, } /// Errors related to bulk submit operations. diff --git a/crates/persistence/tests/minio_s3_tests.rs b/crates/persistence/tests/minio_s3_tests.rs index ab8c19d1d..03fe4d44e 100644 --- a/crates/persistence/tests/minio_s3_tests.rs +++ b/crates/persistence/tests/minio_s3_tests.rs @@ -14,7 +14,7 @@ use aws_sdk_s3::error::ProvideErrorMetadata; use aws_sdk_s3::primitives::ByteStream; use helios_fhir::FhirVersion; use helios_persistence::backends::s3::{S3Backend, S3BackendConfig, S3TenancyMode}; -use helios_persistence::core::bulk_export::{BulkExportStorage, ExportDataProvider, ExportRequest}; +use helios_persistence::core::bulk_export::{ExportDataProvider, ExportRequest}; use helios_persistence::core::bulk_submit::{ BulkEntryOutcome, BulkProcessingOptions, BulkSubmitProvider, BulkSubmitRollbackProvider, NdjsonEntry, SubmissionId, @@ -670,49 +670,16 @@ async fn test_minio_bulk_export_lifecycle_manifest_and_outputs() { .unwrap(); } - let job_id = harness + // S3 no longer implements `BulkExportStorage` (job state lives in + // SQLite/Postgres; see Phase 2 §2b). Verify that the S3 backend's + // `ExportDataProvider` data feed still returns the seeded resources. + let request = ExportRequest::system().with_types(vec!["Patient".to_string()]); + let batch = harness .backend - .start_export( - &tenant, - ExportRequest::system().with_types(vec!["Patient".to_string()]), - ) - .await - .unwrap(); - - let manifest = harness - .backend - .get_export_manifest(&tenant, &job_id) - .await - .unwrap(); - assert!(!manifest.output.is_empty()); - - let bucket_prefix = format!("s3://{}/", harness.bucket); - for output in &manifest.output { - assert!(output.url.starts_with(&bucket_prefix)); - let key = output.url.strip_prefix(&bucket_prefix).unwrap(); - let object = harness - .sdk_client - .get_object() - .bucket(&harness.bucket) - .key(key) - .send() - .await - .unwrap(); - let bytes = object.body.collect().await.unwrap().into_bytes(); - assert!( - !bytes.is_empty(), - "bulk export output object should not be empty: {}", - output.url - ); - } - - harness - .backend - .delete_export(&tenant, &job_id) + .fetch_export_batch(&tenant, &request, "Patient", None, 100) .await .unwrap(); - let deleted = harness.backend.get_export_status(&tenant, &job_id).await; - assert!(matches!(deleted, Err(StorageError::BulkExport(_)))); + assert_eq!(batch.lines.len(), 3); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -852,3 +819,97 @@ async fn test_minio_pagination_over_1000_history_and_export() { assert_eq!(batch2.lines.len(), 5); assert!(batch2.is_last); } + +// ============================================================================ +// Phase 2 — S3OutputStore tests against MinIO. +// ============================================================================ + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn test_minio_s3_output_store_round_trip() { + use helios_persistence::backends::s3::{ + AccessTokenMode, AwsS3Client, AwsS3ClientOptions, S3OutputStore, + }; + use helios_persistence::core::bulk_export::ExportJobId; + use helios_persistence::core::bulk_export_output::{ExportOutputStore, ExportPartKey}; + use std::sync::Arc; + use std::time::Duration; + use tokio::io::AsyncReadExt; + + if skip_if_disabled("test_minio_s3_output_store_round_trip") { + return; + } + + let shared = shared_minio().await; + ensure_backend_env_credentials(shared); + let sdk_client = build_minio_sdk_client(shared).await; + let bucket = test_bucket_name(); + ensure_bucket_exists(&sdk_client, &bucket).await; + + let region = aws_config::Region::new("us-east-1"); + let credentials = aws_sdk_s3::config::Credentials::new( + &shared.root_user, + &shared.root_password, + None, + None, + "minio-test", + ); + let sdk_config = aws_config::SdkConfig::builder() + .region(region) + .credentials_provider(aws_sdk_s3::config::SharedCredentialsProvider::new( + credentials, + )) + .behavior_version(aws_config::BehaviorVersion::latest()) + .build(); + let s3_client = Arc::new(AwsS3Client::from_sdk_config_with_options( + &sdk_config, + AwsS3ClientOptions { + endpoint_url: Some(shared.endpoint_url.clone()), + force_path_style: true, + }, + )); + + let store = S3OutputStore::new( + s3_client, + bucket.clone(), + "http://localhost:8080", + AccessTokenMode::Auto, + Duration::from_secs(60), + ); + + let job_id = ExportJobId::new(); + let key = ExportPartKey::output("tenant-a", job_id.clone(), "Patient", 0, 1); + + // Write two NDJSON lines and finalize. + let mut writer = store.open_writer(&key).await.unwrap(); + writer + .write_line(r#"{"resourceType":"Patient","id":"a"}"#) + .await + .unwrap(); + writer + .write_line(r#"{"resourceType":"Patient","id":"b"}"#) + .await + .unwrap(); + let finalized = store.finalize_part(&key, writer).await.unwrap(); + assert_eq!(finalized.line_count, 2); + assert!(finalized.size_bytes > 0); + + // Pre-signed GET URL. + let url = store + .download_url(&key, Duration::from_secs(60)) + .await + .unwrap(); + assert!(!url.requires_access_token); + assert!(url.url.contains("X-Amz-Signature") || url.url.contains("Signature=")); + + // open_reader streams the same bytes back. + let mut reader = store.open_reader(&key).await.unwrap(); + let mut content = String::new(); + reader.read_to_string(&mut content).await.unwrap(); + assert_eq!(content.lines().count(), 2); + + // delete_job_outputs removes the part; idempotent on second call. + let tenant = tenant("tenant-a"); + store.delete_job_outputs(&tenant, &job_id).await.unwrap(); + store.delete_job_outputs(&tenant, &job_id).await.unwrap(); + assert!(store.open_reader(&key).await.is_err()); +} diff --git a/crates/persistence/tests/postgres_tests.rs b/crates/persistence/tests/postgres_tests.rs index 35c76965c..750e9913f 100644 --- a/crates/persistence/tests/postgres_tests.rs +++ b/crates/persistence/tests/postgres_tests.rs @@ -504,7 +504,7 @@ mod postgres_integration { use testcontainers::ImageExt; use testcontainers::runners::AsyncRunner; use testcontainers_modules::postgres::Postgres; - use tokio::sync::OnceCell; + use tokio::sync::{Mutex, OnceCell}; /// Shared PostgreSQL container reused across all tests in this module. struct SharedPg { @@ -515,6 +515,7 @@ mod postgres_integration { } static SHARED_PG: OnceCell = OnceCell::const_new(); + static BULK_EXPORT_TEST_LOCK: Mutex<()> = Mutex::const_new(()); async fn shared_pg() -> &'static SharedPg { SHARED_PG @@ -2825,4 +2826,165 @@ mod postgres_integration { .unwrap(); assert_eq!(ids, vec!["p1".to_string()]); } + + // ======================================================================== + // Bulk Export — Phase 2 multi-instance job state on Postgres. + // ======================================================================== + + use chrono::Utc; + use helios_persistence::core::bulk_export::{ + BulkExportStorage, ExportRequest, ExportStatus, StartExportInput, TypeExportProgress, + }; + use helios_persistence::core::bulk_export_worker::{ + ExportClaimStrategy, ExportWorkerStorage, LeaseError, WorkerId, + }; + use std::time::Duration as StdDuration; + + fn export_input(request: ExportRequest) -> StartExportInput { + StartExportInput { + request, + transaction_time: Utc::now(), + request_url: "http://localhost/$export".to_string(), + owner_subject: Some("pg-test".to_string()), + fhir_version: FhirVersion::default(), + } + } + + /// Claims jobs in a loop until the lease for `target` is returned; + /// releases any other jobs claimed along the way. Robust to concurrent + /// tests sharing the testcontainers PostgreSQL instance. + async fn claim_specific( + backend: &helios_persistence::backends::postgres::PostgresBackend, + worker_id: &WorkerId, + target: &helios_persistence::core::bulk_export::ExportJobId, + lease_duration: StdDuration, + ) -> helios_persistence::core::bulk_export_worker::ExportJobLease { + for _ in 0..100 { + match backend.claim_next(worker_id, lease_duration).await.unwrap() { + Some(lease) if &lease.job_id == target => return lease, + Some(other) => { + // Drain other tests' jobs out of the queue by completing + // them (so the claim ordering moves on instead of + // looping back to the same job after `release`). + let _ = backend + .finish_export_job( + &other.tenant, + &other.job_id, + &other.worker_id, + other.fencing_token, + ) + .await; + } + None => { + tokio::time::sleep(std::time::Duration::from_millis(20)).await; + } + } + } + panic!("never claimed the expected job"); + } + + #[tokio::test] + async fn postgres_integration_export_claim_skip_locked() { + let _guard = BULK_EXPORT_TEST_LOCK.lock().await; + let backend = create_backend().await; + let tenant = create_tenant("export-claim"); + + let job_id = backend + .start_export(&tenant, export_input(ExportRequest::system())) + .await + .unwrap(); + + let worker_a = WorkerId::new(format!("pg-worker-a-{}", uuid::Uuid::new_v4())); + let lease_a = + claim_specific(&backend, &worker_a, &job_id, StdDuration::from_secs(60)).await; + assert!(lease_a.fencing_token >= 1); + + // Worker A finishes via the fenced ExportWorkerStorage. + backend + .mark_export_in_progress(&tenant, &job_id, &worker_a, lease_a.fencing_token) + .await + .unwrap(); + backend + .update_export_type_progress( + &tenant, + &job_id, + &worker_a, + lease_a.fencing_token, + &TypeExportProgress::new("Patient"), + ) + .await + .unwrap(); + backend + .finish_export_job(&tenant, &job_id, &worker_a, lease_a.fencing_token) + .await + .unwrap(); + + let progress = backend.get_export_status(&tenant, &job_id).await.unwrap(); + assert_eq!(progress.status, ExportStatus::Complete); + } + + #[tokio::test] + async fn postgres_integration_export_stale_worker_fenced_out() { + let _guard = BULK_EXPORT_TEST_LOCK.lock().await; + let backend = create_backend().await; + let tenant = create_tenant("export-fence"); + + let job_id = backend + .start_export(&tenant, export_input(ExportRequest::system())) + .await + .unwrap(); + + // Worker A takes a very short lease, then Worker B reclaims. + let worker_a = WorkerId::new(format!("pg-stale-a-{}", uuid::Uuid::new_v4())); + let lease_a = + claim_specific(&backend, &worker_a, &job_id, StdDuration::from_millis(1)).await; + tokio::time::sleep(std::time::Duration::from_millis(100)).await; + let worker_b = WorkerId::new(format!("pg-stale-b-{}", uuid::Uuid::new_v4())); + let lease_b = + claim_specific(&backend, &worker_b, &job_id, StdDuration::from_secs(60)).await; + assert!(lease_b.fencing_token > lease_a.fencing_token); + + // Worker A is fenced out from every mutation. + assert!(matches!( + backend + .mark_export_in_progress(&tenant, &job_id, &worker_a, lease_a.fencing_token) + .await, + Err(LeaseError::LeaseLost { .. }) + )); + assert!(matches!( + backend + .finish_export_job(&tenant, &job_id, &worker_a, lease_a.fencing_token) + .await, + Err(LeaseError::LeaseLost { .. }) + )); + + // Worker B can still finish. + backend + .finish_export_job(&tenant, &job_id, &worker_b, lease_b.fencing_token) + .await + .unwrap(); + } + + #[tokio::test] + async fn postgres_integration_export_count_active_and_expire() { + let _guard = BULK_EXPORT_TEST_LOCK.lock().await; + let backend = create_backend().await; + let tenant = create_tenant("export-cleanup"); + + for _ in 0..2 { + backend + .start_export(&tenant, export_input(ExportRequest::system())) + .await + .unwrap(); + } + assert_eq!(backend.count_active_exports(&tenant).await.unwrap(), 2); + + // Nothing is expired yet. + let expired_now = backend + .list_expired_exports(Utc::now(), StdDuration::from_secs(3600), 100) + .await + .unwrap(); + // Only completed/error/cancelled jobs can expire — these are accepted. + assert!(expired_now.is_empty()); + } } diff --git a/crates/persistence/tests/s3_tests.rs b/crates/persistence/tests/s3_tests.rs index cdff998a0..2d2595910 100644 --- a/crates/persistence/tests/s3_tests.rs +++ b/crates/persistence/tests/s3_tests.rs @@ -9,7 +9,7 @@ use std::collections::HashMap; use helios_fhir::FhirVersion; use helios_persistence::backends::s3::{S3Backend, S3BackendConfig, S3TenancyMode}; -use helios_persistence::core::bulk_export::{BulkExportStorage, ExportRequest}; +use helios_persistence::core::bulk_export::{ExportDataProvider, ExportRequest}; use helios_persistence::core::bulk_submit::{ BulkProcessingOptions, BulkSubmitProvider, NdjsonEntry, SubmissionId, }; @@ -185,15 +185,15 @@ async fn test_aws_bundle_bulk_export_and_submit() { assert_eq!(bundle.entries.len(), 1); assert_eq!(bundle.entries[0].status, 201); - let job_id = backend - .start_export( - &tenant, - ExportRequest::system().with_types(vec!["Patient".to_string()]), - ) + // S3 no longer implements `BulkExportStorage` (job state lives in + // SQLite/Postgres); only `ExportDataProvider` remains. Verify the data + // feed instead of the removed kick-off/manifest path. + let request = ExportRequest::system().with_types(vec!["Patient".to_string()]); + let batch = backend + .fetch_export_batch(&tenant, &request, "Patient", None, 100) .await .unwrap(); - let manifest = backend.get_export_manifest(&tenant, &job_id).await.unwrap(); - assert!(!manifest.output.is_empty()); + assert!(!batch.lines.is_empty()); let submission_id = SubmissionId::new("aws-client", format!("sub-{}", Uuid::new_v4())); backend diff --git a/crates/persistence/tests/sqlite_tests.rs b/crates/persistence/tests/sqlite_tests.rs index 61536b688..4ae580f29 100644 --- a/crates/persistence/tests/sqlite_tests.rs +++ b/crates/persistence/tests/sqlite_tests.rs @@ -8,11 +8,13 @@ use helios_fhir::FhirVersion; use serde_json::json; use helios_persistence::backends::sqlite::{SqliteBackend, SqliteBackendConfig}; -use helios_persistence::core::ResourceStorage; use helios_persistence::core::history::{ HistoryMethod, HistoryParams, InstanceHistoryProvider, SystemHistoryProvider, TypeHistoryProvider, }; +use helios_persistence::core::{ + ExportLevel, ExportRequest, PatientExportProvider, ResourceStorage, +}; use helios_persistence::error::{ResourceError, StorageError}; use helios_persistence::tenant::{TenantContext, TenantId, TenantPermissions}; @@ -1647,6 +1649,86 @@ async fn test_search_reference_subject() { assert!(ids.contains(&"obs-2")); } +#[tokio::test] +async fn test_patient_compartment_export_observation_without_since() { + let backend = create_backend(); + let tenant = create_tenant("test-tenant"); + + backend + .create( + &tenant, + "Patient", + json!({ + "resourceType": "Patient", + "id": "patient-1" + }), + FhirVersion::default(), + ) + .await + .unwrap(); + + backend + .create( + &tenant, + "Patient", + json!({ + "resourceType": "Patient", + "id": "patient-2" + }), + FhirVersion::default(), + ) + .await + .unwrap(); + + backend + .create( + &tenant, + "Observation", + json!({ + "resourceType": "Observation", + "id": "obs-1", + "subject": {"reference": "Patient/patient-1"}, + "status": "final" + }), + FhirVersion::default(), + ) + .await + .unwrap(); + + backend + .create( + &tenant, + "Observation", + json!({ + "resourceType": "Observation", + "id": "obs-2", + "subject": {"reference": "Patient/patient-2"}, + "status": "final" + }), + FhirVersion::default(), + ) + .await + .unwrap(); + + let request = ExportRequest::new(ExportLevel::Patient); + let batch = backend + .fetch_patient_compartment_batch( + &tenant, + &request, + "Observation", + &["patient-1".to_string()], + None, + 10, + ) + .await + .unwrap(); + + assert!(batch.is_last); + assert_eq!(batch.lines.len(), 1); + let observation: serde_json::Value = serde_json::from_str(&batch.lines[0]).unwrap(); + assert_eq!(observation["id"], "obs-1"); +} + #[tokio::test] async fn test_search_multiple_parameters() { let backend = create_backend(); diff --git a/crates/rest/src/bulk_export_auth.rs b/crates/rest/src/bulk_export_auth.rs new file mode 100644 index 000000000..cb5fafdb9 --- /dev/null +++ b/crates/rest/src/bulk_export_auth.rs @@ -0,0 +1,207 @@ +//! Authorization for bulk-export file downloads. +//! +//! The [`ExportFileAuth`] trait gates the HFS-served download path +//! (`requiresAccessToken = true`). Pre-signed-URL downloads bypass HFS +//! entirely and never reach this trait. + +use async_trait::async_trait; +use helios_auth::Principal; +use helios_auth::scope::{ResourceTypeSpec, SmartPermissions}; +use helios_persistence::core::ExportFileMetadata; +use helios_persistence::tenant::TenantContext; + +/// Error returned when a download is not authorized. +#[derive(Debug, Clone)] +pub enum ExportAuthError { + /// No authenticated principal was supplied. + Unauthenticated, + /// The principal is not permitted to download this file. + Forbidden(String), +} + +impl std::fmt::Display for ExportAuthError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Unauthenticated => write!(f, "authentication required"), + Self::Forbidden(m) => write!(f, "forbidden: {m}"), + } + } +} + +impl std::error::Error for ExportAuthError {} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::Utc; + use helios_auth::{Principal, ScopeSet}; + use helios_persistence::core::{ExportFileMetadata, ExportJobId, ExportPartKey}; + use helios_persistence::tenant::{TenantContext, TenantId, TenantPermissions}; + + fn make_principal(subject: &str, scopes: &str) -> Principal { + Principal { + subject: subject.to_string(), + issuer: "test-issuer".to_string(), + tenant_id: None, + scopes: ScopeSet::parse(scopes), + jti: None, + expires_at: Utc::now() + chrono::Duration::hours(1), + custom_claims: serde_json::Map::new(), + } + } + + fn make_file_meta(resource_type: &str, owner_sub: Option<&str>) -> ExportFileMetadata { + ExportFileMetadata { + key: ExportPartKey::output("t1", ExportJobId::new(), resource_type, 0, 1), + resource_type: resource_type.to_string(), + file_type: "output".to_string(), + line_count: 0, + job_owner_subject: owner_sub.map(str::to_string), + } + } + + fn test_tenant() -> TenantContext { + TenantContext::new(TenantId::new("t1"), TenantPermissions::full_access()) + } + + #[tokio::test] + async fn no_principal_bypasses_auth() { + let auth = BearerScopeAuth; + let result = auth + .authorize_download( + None, + &test_tenant(), + Some("owner"), + &make_file_meta("Patient", Some("owner")), + ) + .await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn owner_with_read_scope_passes() { + let auth = BearerScopeAuth; + let p = make_principal("owner", "system/Patient.rs"); + let result = auth + .authorize_download( + Some(&p), + &test_tenant(), + Some("owner"), + &make_file_meta("Patient", Some("owner")), + ) + .await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn non_owner_without_wildcard_is_forbidden() { + let auth = BearerScopeAuth; + let p = make_principal("other", "system/Patient.rs"); + let result = auth + .authorize_download( + Some(&p), + &test_tenant(), + Some("owner"), + &make_file_meta("Patient", Some("owner")), + ) + .await; + assert!(matches!(result, Err(ExportAuthError::Forbidden(_)))); + } + + #[tokio::test] + async fn wildcard_scope_overrides_ownership() { + let auth = BearerScopeAuth; + let p = make_principal("other", "system/*.rs"); + let result = auth + .authorize_download( + Some(&p), + &test_tenant(), + Some("owner"), + &make_file_meta("Patient", Some("owner")), + ) + .await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn owner_missing_read_scope_is_forbidden() { + let auth = BearerScopeAuth; + // No SMART read scope — empty scope string → ScopeSet::empty() + let p = make_principal("owner", "openid profile"); + let result = auth + .authorize_download( + Some(&p), + &test_tenant(), + Some("owner"), + &make_file_meta("Patient", Some("owner")), + ) + .await; + assert!(matches!(result, Err(ExportAuthError::Forbidden(_)))); + } +} + +/// Authorizes a bulk-export file download. +#[async_trait] +pub trait ExportFileAuth: Send + Sync { + /// Decides whether `principal` may download the file described by + /// `file_meta` for a job owned by `job_owner_subject`. + async fn authorize_download( + &self, + principal: Option<&Principal>, + tenant: &TenantContext, + job_owner_subject: Option<&str>, + file_meta: &ExportFileMetadata, + ) -> Result<(), ExportAuthError>; +} + +/// Returns true if the principal holds any `system/*` (wildcard) scope. +fn has_wildcard_scope(principal: &Principal) -> bool { + principal + .scopes + .scopes() + .iter() + .any(|s| s.resource_type == ResourceTypeSpec::Wildcard) +} + +/// The default [`ExportFileAuth`]: requires the kickoff Bearer token, the +/// job's owner-subject to match (or a `system/*` scope), and a +/// `system/{ResourceType}.rs` (read) scope covering the file's resource type. +#[derive(Debug, Clone, Default)] +pub struct BearerScopeAuth; + +#[async_trait] +impl ExportFileAuth for BearerScopeAuth { + async fn authorize_download( + &self, + principal: Option<&Principal>, + _tenant: &TenantContext, + job_owner_subject: Option<&str>, + file_meta: &ExportFileMetadata, + ) -> Result<(), ExportAuthError> { + // When auth is disabled there is no principal — no enforcement, as + // elsewhere in HFS. + let Some(principal) = principal else { + return Ok(()); + }; + + let owns_job = job_owner_subject == Some(principal.subject.as_str()); + let is_wildcard = has_wildcard_scope(principal); + if !owns_job && !is_wildcard { + return Err(ExportAuthError::Forbidden( + "principal does not own this export job".to_string(), + )); + } + + if !principal + .scopes + .is_permitted(&file_meta.resource_type, SmartPermissions::READ) + { + return Err(ExportAuthError::Forbidden(format!( + "missing read scope for {}", + file_meta.resource_type + ))); + } + + Ok(()) + } +} diff --git a/crates/rest/src/config.rs b/crates/rest/src/config.rs index 45a4973ae..6fd56bc80 100644 --- a/crates/rest/src/config.rs +++ b/crates/rest/src/config.rs @@ -237,6 +237,205 @@ impl MultitenancyConfig { } } +/// Configuration for the bulk data export subsystem. +#[derive(Debug, Clone)] +pub struct BulkExportConfig { + /// Master switch — when `false`, the `$export` endpoints return `501`. + pub enabled: bool, + /// Job-state backend: `embedded` (SQLite) or `postgres-s3` (PostgreSQL). + pub backend: String, + /// Output store: `local-fs` or `s3`. + pub output_backend: String, + /// Local-FS output root directory. + pub output_dir: Option, + /// S3 bucket for output (required when `output_backend = s3`). + pub s3_bucket: Option, + /// Manifest access-token posture: `auto`, `true`, or `false`. + pub requires_access_token: String, + /// Pre-signed download-URL lifetime, in seconds. + pub file_url_ttl_secs: u64, + /// How long output files are retained after job completion, in seconds. + pub output_ttl_secs: u64, + /// Maximum jobs this pod runs concurrently. + pub worker_concurrency: u32, + /// When `true`, this pod does not run in-process workers. + pub disable_local_worker: bool, + /// Cap on simultaneous in-flight jobs per tenant. + pub max_concurrent_per_tenant: u32, + /// Resources per `fetch_export_batch` call. + pub batch_size: u32, + /// Initial lease length issued at claim, in seconds. + pub lease_duration_secs: u64, + /// Worker heartbeat cadence, in seconds. + pub heartbeat_interval_secs: u64, + /// How often the cleanup task scans for expired outputs, in seconds. + pub cleanup_interval_secs: u64, + /// Group export `_since` toggle (`include` / `exclude`). + /// + /// When `exclude`, patients whose `Group.member.period.start` is *after* + /// the request's `_since` are filtered out of the export — implementing + /// the IG's optional "do not return resources from before the patient + /// joined the cohort" behavior. + pub since_newly_added: String, +} + +impl Default for BulkExportConfig { + fn default() -> Self { + Self { + enabled: true, + backend: "embedded".to_string(), + output_backend: "local-fs".to_string(), + output_dir: None, + s3_bucket: None, + requires_access_token: "auto".to_string(), + file_url_ttl_secs: 3600, + output_ttl_secs: 86400, + worker_concurrency: 2, + disable_local_worker: false, + max_concurrent_per_tenant: 4, + batch_size: 1000, + lease_duration_secs: 60, + heartbeat_interval_secs: 20, + cleanup_interval_secs: 300, + since_newly_added: "include".to_string(), + } + } +} + +impl BulkExportConfig { + /// Loads bulk-export configuration from `HFS_BULK_EXPORT_*` env vars. + pub fn from_env() -> Self { + fn env_bool(key: &str, default: bool) -> bool { + std::env::var(key) + .map(|s| { + let s = s.to_lowercase(); + s == "true" || s == "1" + }) + .unwrap_or(default) + } + fn env_u64(key: &str, default: u64) -> u64 { + std::env::var(key) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(default) + } + fn env_u32(key: &str, default: u32) -> u32 { + std::env::var(key) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(default) + } + let d = Self::default(); + Self { + enabled: env_bool("HFS_BULK_EXPORT_ENABLED", d.enabled), + backend: std::env::var("HFS_BULK_EXPORT_BACKEND").unwrap_or(d.backend), + output_backend: std::env::var("HFS_BULK_EXPORT_OUTPUT_BACKEND") + .unwrap_or(d.output_backend), + output_dir: std::env::var("HFS_BULK_EXPORT_OUTPUT_DIR").ok(), + s3_bucket: std::env::var("HFS_BULK_EXPORT_S3_BUCKET").ok(), + requires_access_token: std::env::var("HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN") + .unwrap_or(d.requires_access_token), + file_url_ttl_secs: env_u64("HFS_BULK_EXPORT_FILE_URL_TTL", d.file_url_ttl_secs), + output_ttl_secs: env_u64("HFS_BULK_EXPORT_OUTPUT_TTL", d.output_ttl_secs), + worker_concurrency: env_u32("HFS_BULK_EXPORT_WORKER_CONCURRENCY", d.worker_concurrency), + disable_local_worker: env_bool( + "HFS_BULK_EXPORT_DISABLE_LOCAL_WORKER", + d.disable_local_worker, + ), + max_concurrent_per_tenant: env_u32( + "HFS_BULK_EXPORT_MAX_CONCURRENT_PER_TENANT", + d.max_concurrent_per_tenant, + ), + batch_size: env_u32("HFS_BULK_EXPORT_BATCH_SIZE", d.batch_size), + lease_duration_secs: env_u64("HFS_BULK_EXPORT_LEASE_DURATION", d.lease_duration_secs), + heartbeat_interval_secs: env_u64( + "HFS_BULK_EXPORT_HEARTBEAT_INTERVAL", + d.heartbeat_interval_secs, + ), + cleanup_interval_secs: env_u64( + "HFS_BULK_EXPORT_CLEANUP_INTERVAL", + d.cleanup_interval_secs, + ), + since_newly_added: std::env::var("HFS_BULK_EXPORT_SINCE_NEWLY_ADDED") + .unwrap_or(d.since_newly_added), + } + } + + /// Validates the bulk-export configuration. + pub fn validate(&self) -> Result<(), Vec> { + let mut errors = Vec::new(); + if !matches!(self.backend.as_str(), "embedded" | "postgres-s3") { + errors.push(format!( + "HFS_BULK_EXPORT_BACKEND '{}' invalid (expected embedded|postgres-s3)", + self.backend + )); + } + if !matches!(self.output_backend.as_str(), "local-fs" | "s3") { + errors.push(format!( + "HFS_BULK_EXPORT_OUTPUT_BACKEND '{}' invalid (expected local-fs|s3)", + self.output_backend + )); + } + if self.output_backend == "s3" && self.s3_bucket.is_none() { + errors.push("HFS_BULK_EXPORT_S3_BUCKET is required when OUTPUT_BACKEND=s3".to_string()); + } + if !matches!( + self.requires_access_token.as_str(), + "auto" | "true" | "false" + ) { + errors.push(format!( + "HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN '{}' invalid (expected auto|true|false)", + self.requires_access_token + )); + } + // local-fs has no pre-signed-URL capability. + if self.output_backend == "local-fs" && self.requires_access_token == "false" { + errors.push( + "HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN=false is invalid with OUTPUT_BACKEND=local-fs" + .to_string(), + ); + } + if self.file_url_ttl_secs == 0 { + errors.push("HFS_BULK_EXPORT_FILE_URL_TTL must be > 0".to_string()); + } + if self.output_ttl_secs == 0 { + errors.push("HFS_BULK_EXPORT_OUTPUT_TTL must be > 0".to_string()); + } + if self.worker_concurrency == 0 { + errors.push("HFS_BULK_EXPORT_WORKER_CONCURRENCY must be >= 1".to_string()); + } + if self.max_concurrent_per_tenant == 0 { + errors.push("HFS_BULK_EXPORT_MAX_CONCURRENT_PER_TENANT must be >= 1".to_string()); + } + if self.batch_size == 0 { + errors.push("HFS_BULK_EXPORT_BATCH_SIZE must be >= 1".to_string()); + } + if self.heartbeat_interval_secs == 0 { + errors.push("HFS_BULK_EXPORT_HEARTBEAT_INTERVAL must be > 0".to_string()); + } + if self.lease_duration_secs <= self.heartbeat_interval_secs { + errors.push( + "HFS_BULK_EXPORT_LEASE_DURATION must be greater than HEARTBEAT_INTERVAL" + .to_string(), + ); + } + if !matches!(self.since_newly_added.as_str(), "include" | "exclude") { + errors.push(format!( + "HFS_BULK_EXPORT_SINCE_NEWLY_ADDED '{}' invalid (expected include|exclude)", + self.since_newly_added + )); + } + if self.cleanup_interval_secs == 0 { + errors.push("HFS_BULK_EXPORT_CLEANUP_INTERVAL must be > 0".to_string()); + } + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } + } +} + /// Server configuration for the FHIR REST API. /// /// This struct can be constructed from environment variables using [`ServerConfig::from_env`], @@ -383,6 +582,10 @@ pub struct ServerConfig { /// Multitenancy configuration (loaded from environment variables). #[arg(skip)] pub multitenancy: MultitenancyConfig, + + /// Bulk data export configuration (loaded from environment variables). + #[arg(skip)] + pub bulk_export: BulkExportConfig, } impl ServerConfig { @@ -422,6 +625,7 @@ impl Default for ServerConfig { elasticsearch_password: None, terminology_server: None, multitenancy: MultitenancyConfig::default(), + bulk_export: BulkExportConfig::default(), } } } @@ -436,6 +640,8 @@ impl ServerConfig { let mut config = Self::try_parse().unwrap_or_default(); // Load multitenancy config from environment config.multitenancy = MultitenancyConfig::from_env(); + // Load bulk export config from environment + config.bulk_export = BulkExportConfig::from_env(); config } @@ -473,6 +679,10 @@ impl ServerConfig { errors.push("Default page size cannot exceed max page size".to_string()); } + if let Err(mut bulk_errors) = self.bulk_export.validate() { + errors.append(&mut bulk_errors); + } + if errors.is_empty() { Ok(()) } else { @@ -513,6 +723,7 @@ impl ServerConfig { elasticsearch_password: None, terminology_server: None, multitenancy: MultitenancyConfig::default(), + bulk_export: BulkExportConfig::default(), } } @@ -1000,6 +1211,66 @@ mod tests { assert!(config.storage_backend_mode().is_err()); } + // ── BulkExportConfig::validate ──────────────────────────────── + + #[test] + fn test_bulk_export_config_default_is_valid() { + assert!(BulkExportConfig::default().validate().is_ok()); + } + + #[test] + fn test_bulk_export_config_invalid_backend() { + let cfg = BulkExportConfig { + backend: "unknown".to_string(), + ..BulkExportConfig::default() + }; + let errs = cfg.validate().unwrap_err(); + assert!(errs.iter().any(|e| e.contains("HFS_BULK_EXPORT_BACKEND"))); + } + + #[test] + fn test_bulk_export_config_s3_output_requires_bucket() { + let cfg = BulkExportConfig { + output_backend: "s3".to_string(), + s3_bucket: None, + ..BulkExportConfig::default() + }; + let errs = cfg.validate().unwrap_err(); + assert!(errs.iter().any(|e| e.contains("S3_BUCKET"))); + } + + #[test] + fn test_bulk_export_config_local_fs_requires_access_token() { + let cfg = BulkExportConfig { + output_backend: "local-fs".to_string(), + requires_access_token: "false".to_string(), + ..BulkExportConfig::default() + }; + let errs = cfg.validate().unwrap_err(); + assert!(errs.iter().any(|e| e.contains("local-fs"))); + } + + #[test] + fn test_bulk_export_config_lease_must_exceed_heartbeat() { + let cfg = BulkExportConfig { + lease_duration_secs: 10, + heartbeat_interval_secs: 20, + ..BulkExportConfig::default() + }; + let errs = cfg.validate().unwrap_err(); + assert!(errs.iter().any(|e| e.contains("LEASE_DURATION"))); + } + + #[test] + fn test_bulk_export_config_invalid_since_newly_added() { + let cfg = BulkExportConfig { + since_newly_added: "maybe".to_string(), + ..BulkExportConfig::default() + }; + let errs = cfg.validate().unwrap_err(); + assert!(errs.iter().any(|e| e.contains("SINCE_NEWLY_ADDED"))); + } + // ── display for StorageBackendMode ──────────────────────────── #[test] diff --git a/crates/rest/src/handlers/bulk_export.rs b/crates/rest/src/handlers/bulk_export.rs new file mode 100644 index 000000000..c26e72165 --- /dev/null +++ b/crates/rest/src/handlers/bulk_export.rs @@ -0,0 +1,816 @@ +//! FHIR Bulk Data Export (`$export`) handlers. +//! +//! Implements the asynchronous kick-off → poll → manifest → download → delete +//! flow from the [Bulk Data Access IG](https://build.fhir.org/ig/HL7/bulk-data/). + +use std::time::Duration; + +use axum::{ + body::Body, + extract::{Path, Request, State}, + http::{HeaderMap, Method, StatusCode}, + response::Response, +}; +use chrono::Utc; +use helios_auth::Principal; +use helios_fhir::FhirVersion; +use helios_persistence::core::ExportDataProvider; +use helios_persistence::core::{ + ExportJobId, ExportLevel, ExportManifest, ExportOutputFile, ExportRequest, ExportStatus, + GroupExportProvider, PatientExportProvider, ResourceStorage, StartExportInput, TypeFilter, +}; +use helios_persistence::error::{BulkExportError, StorageError}; + +use crate::error::{RestError, RestResult}; +use crate::extractors::{FhirVersionExtractor, TenantExtractor}; +use crate::state::AppState; + +/// Trait bound shared by all bulk-export handlers (the resource-store side). +pub trait ExportResourceStore: + ResourceStorage + ExportDataProvider + PatientExportProvider + GroupExportProvider +{ +} +impl ExportResourceStore for S where + S: ResourceStorage + ExportDataProvider + PatientExportProvider + GroupExportProvider +{ +} + +/// Search-result-control params that are NOT valid inside `_typeFilter`. +const FORBIDDEN_FILTER_PARAMS: &[&str] = + &["_include", "_revinclude", "_sort", "_count", "_elements"]; + +fn not_implemented() -> RestError { + RestError::NotImplemented { + feature: "Bulk Data Export is disabled (HFS_BULK_EXPORT_ENABLED=false)".to_string(), + } +} + +fn bad_request(msg: impl Into) -> RestError { + RestError::BadRequest { + message: msg.into(), + } +} + +/// Parses a raw query string into ordered key/value pairs (repeated keys kept). +fn parse_query_pairs(raw: Option<&str>) -> Vec<(String, String)> { + match raw { + None => Vec::new(), + Some(q) => url::form_urlencoded::parse(q.as_bytes()) + .map(|(k, v)| (k.into_owned(), v.into_owned())) + .collect(), + } +} + +/// Collects all values for `key`, splitting each on `,`. +fn collect_multi(pairs: &[(String, String)], key: &str) -> Vec { + pairs + .iter() + .filter(|(k, _)| k == key) + .flat_map(|(_, v)| v.split(',').map(|s| s.trim().to_string())) + .filter(|s| !s.is_empty()) + .collect() +} + +/// Returns the first value for `key`, if any. +fn first_value(pairs: &[(String, String)], key: &str) -> Option { + pairs.iter().find(|(k, _)| k == key).map(|(_, v)| v.clone()) +} + +/// Parses a FHIR `instant` into a UTC datetime. +fn parse_instant(s: &str) -> Result, RestError> { + chrono::DateTime::parse_from_rfc3339(s) + .map(|dt| dt.with_timezone(&Utc)) + .map_err(|e| bad_request(format!("invalid instant '{s}': {e}"))) +} + +/// Reads the `Prefer: handling=` directive (`strict` / `lenient`). +fn prefer_handling(headers: &HeaderMap) -> Option { + headers + .get("prefer") + .and_then(|v| v.to_str().ok()) + .and_then(|p| { + p.split(',') + .map(|s| s.trim()) + .find_map(|s| s.strip_prefix("handling=")) + .map(|s| s.to_ascii_lowercase()) + }) +} + +/// Returns true if `Prefer: respond-async` is present. +fn has_respond_async(headers: &HeaderMap) -> bool { + headers + .get("prefer") + .and_then(|v| v.to_str().ok()) + .map(|p| { + p.split(',') + .any(|s| s.trim().eq_ignore_ascii_case("respond-async")) + }) + .unwrap_or(false) +} + +/// Builds the parameter pairs from a POST `Parameters` resource body. +fn pairs_from_parameters(body: &serde_json::Value) -> Vec<(String, String)> { + let mut pairs = Vec::new(); + if let Some(arr) = body.get("parameter").and_then(|p| p.as_array()) { + for p in arr { + let Some(name) = p.get("name").and_then(|n| n.as_str()) else { + continue; + }; + // Accept valueString / valueUri / valueInstant / valueCode etc. + let value = p + .get("valueString") + .or_else(|| p.get("valueUri")) + .or_else(|| p.get("valueInstant")) + .or_else(|| p.get("valueCode")) + .or_else(|| p.get("valueDateTime")) + .and_then(|v| v.as_str()) + .or_else(|| { + // patient reference: { name: "patient", valueReference: { reference } } + p.get("valueReference") + .and_then(|r| r.get("reference")) + .and_then(|r| r.as_str()) + }); + if let Some(v) = value { + pairs.push((name.to_string(), v.to_string())); + } + } + } + pairs +} + +/// Shared kick-off logic for all three export levels. +#[allow(clippy::too_many_arguments)] +async fn kickoff_export( + state: &AppState, + tenant: &TenantExtractor, + principal: Option<&Principal>, + level: ExportLevel, + fhir_version: FhirVersion, + method: &Method, + headers: &HeaderMap, + raw_query: Option<&str>, + full_url: &str, + body: Option<&serde_json::Value>, +) -> RestResult +where + S: ExportResourceStore + Send + Sync, +{ + let cfg = state.bulk_export_config(); + if !cfg.enabled { + return Err(not_implemented()); + } + let jobs = state.bulk_export_jobs().ok_or_else(not_implemented)?; + + if !has_respond_async(headers) { + return Err(bad_request( + "the `Prefer: respond-async` header is required for $export", + )); + } + + let is_post = method == Method::POST; + let mut pairs = parse_query_pairs(raw_query); + if is_post { + if let Some(b) = body { + pairs.extend(pairs_from_parameters(b)); + } + } + + // _outputFormat + let output_format = first_value(&pairs, "_outputFormat") + .unwrap_or_else(|| "application/fhir+ndjson".to_string()); + if !matches!( + output_format.as_str(), + "application/fhir+ndjson" | "application/ndjson" | "ndjson" + ) { + return Err(bad_request(format!( + "unsupported _outputFormat '{output_format}'" + ))); + } + + // _type + let resource_types = collect_multi(&pairs, "_type"); + + // _since / _until + let since = match first_value(&pairs, "_since") { + Some(s) => Some(parse_instant(&s)?), + None => None, + }; + let until = match first_value(&pairs, "_until") { + Some(s) => Some(parse_instant(&s)?), + None => None, + }; + + // _elements + let elements = collect_multi(&pairs, "_elements"); + + // _typeFilter + let mut type_filters = Vec::new(); + for tf in pairs.iter().filter(|(k, _)| k == "_typeFilter") { + let raw = &tf.1; + let (rt, query) = raw + .split_once('?') + .ok_or_else(|| bad_request(format!("malformed _typeFilter '{raw}'")))?; + if !resource_types.is_empty() && !resource_types.iter().any(|t| t == rt) { + return Err(bad_request(format!( + "_typeFilter resource type '{rt}' is not in _type" + ))); + } + for (pk, _) in url::form_urlencoded::parse(query.as_bytes()) { + if FORBIDDEN_FILTER_PARAMS.contains(&pk.as_ref()) { + return Err(bad_request(format!( + "_typeFilter may not contain result-control param '{pk}'" + ))); + } + } + type_filters.push(TypeFilter::new(rt, query)); + } + + // patient (POST only) + let patient_refs = collect_multi(&pairs, "patient"); + if !patient_refs.is_empty() { + if matches!(level, ExportLevel::System) { + return Err(bad_request( + "the `patient` parameter is not valid for system-level export", + )); + } + // Validate each patient reference resolves. + for pref in &patient_refs { + let id = pref.strip_prefix("Patient/").unwrap_or(pref); + let exists = state + .storage() + .read(tenant.context(), "Patient", id) + .await + .map_err(map_storage_err)? + .is_some(); + if !exists { + return Err(bad_request(format!("unknown patient reference '{pref}'"))); + } + } + // For group-level, each must be a member of the group. + if let ExportLevel::Group { group_id } = &level { + let members = state + .storage() + .resolve_group_patient_ids(tenant.context(), group_id) + .await + .map_err(map_storage_err)?; + for pref in &patient_refs { + let id = pref.strip_prefix("Patient/").unwrap_or(pref); + if !members.iter().any(|m| m == id) { + return Err(bad_request(format!( + "patient '{pref}' is not a member of Group/{group_id}" + ))); + } + } + } + } + + // Unsupported parameters — strict vs lenient. + let handling = prefer_handling(headers); + let unsupported: Vec<&str> = [ + "includeAssociatedData", + "organizeOutputBy", + "allowPartialManifests", + ] + .into_iter() + .filter(|p| pairs.iter().any(|(k, _)| k == p)) + .collect(); + if !unsupported.is_empty() { + if handling.as_deref() == Some("strict") { + return Err(bad_request(format!( + "unsupported parameters: {}", + unsupported.join(", ") + ))); + } else { + tracing::warn!( + "ignoring unsupported bulk-export parameters: {}", + unsupported.join(", ") + ); + } + } + + // Authorization — every requested type needs read scope; Group also needs Group read. + if let Some(p) = principal { + let types_to_check = if resource_types.is_empty() { + // Whole-scope export — require a wildcard read or accept (best effort). + vec![] + } else { + resource_types.clone() + }; + for t in &types_to_check { + helios_auth::SmartScopePolicy::check(p, t, helios_auth::FhirOperation::Read).map_err( + |e| RestError::Forbidden { + message: e.to_string(), + }, + )?; + } + if matches!(level, ExportLevel::Group { .. }) { + helios_auth::SmartScopePolicy::check(p, "Group", helios_auth::FhirOperation::Read) + .map_err(|e| RestError::Forbidden { + message: e.to_string(), + })?; + } + } + + // Per-tenant concurrency cap. + let active = jobs + .count_active_exports(tenant.context()) + .await + .map_err(map_storage_err)?; + if active >= cfg.max_concurrent_per_tenant as u64 { + return Err(RestError::BadRequest { + message: format!( + "too many concurrent exports for this tenant (max {})", + cfg.max_concurrent_per_tenant + ), + }); + } + + let request = ExportRequest { + level: level.clone(), + resource_types, + since, + until, + type_filters, + elements, + include_associated_data: Vec::new(), + patient_refs, + batch_size: cfg.batch_size, + output_format, + }; + + let input = StartExportInput { + request, + transaction_time: Utc::now(), + request_url: full_url.to_string(), + owner_subject: principal.map(|p| p.subject.clone()), + fhir_version, + }; + + let request_clone = input.request.clone(); + let job_id = jobs + .start_export(tenant.context(), input) + .await + .map_err(map_storage_err)?; + + emit_export_audit( + state, + principal, + "kickoff", + job_id.as_str(), + &request_clone.level, + &request_clone.resource_types, + "0", + ) + .await; + + let status_url = format!( + "{}/export-status/{}", + state.base_url().trim_end_matches('/'), + job_id + ); + + Response::builder() + .status(StatusCode::ACCEPTED) + .header("Content-Location", status_url) + .body(Body::empty()) + .map_err(|e| RestError::InternalError { + message: e.to_string(), + }) +} + +/// Maps a persistence error to a REST error. +fn map_storage_err(e: StorageError) -> RestError { + match e { + StorageError::BulkExport(BulkExportError::JobNotFound { job_id }) => RestError::NotFound { + resource_type: "export-job".to_string(), + id: job_id, + }, + StorageError::Backend(helios_persistence::error::BackendError::UnsupportedCapability { + .. + }) => RestError::NotImplemented { + feature: "bulk export not supported by this backend".to_string(), + }, + other => RestError::InternalError { + message: other.to_string(), + }, + } +} + +// --------------------------------------------------------------------------- +// Route handlers +// --------------------------------------------------------------------------- + +/// `GET|POST /$export` — system-level kick-off. +pub async fn system_export_kickoff_handler( + State(state): State>, + tenant: TenantExtractor, + version: FhirVersionExtractor, + request: Request, +) -> RestResult +where + S: ExportResourceStore + Send + Sync + 'static, +{ + run_kickoff(state, tenant, version, ExportLevel::System, request).await +} + +/// `GET|POST /Patient/$export` — patient-level kick-off. +pub async fn patient_export_kickoff_handler( + State(state): State>, + tenant: TenantExtractor, + version: FhirVersionExtractor, + request: Request, +) -> RestResult +where + S: ExportResourceStore + Send + Sync + 'static, +{ + run_kickoff(state, tenant, version, ExportLevel::Patient, request).await +} + +/// `GET|POST /Group/{id}/$export` — group-level kick-off. +pub async fn group_export_kickoff_handler( + State(state): State>, + Path(group_id): Path, + tenant: TenantExtractor, + version: FhirVersionExtractor, + request: Request, +) -> RestResult +where + S: ExportResourceStore + Send + Sync + 'static, +{ + run_kickoff( + state, + tenant, + version, + ExportLevel::Group { group_id }, + request, + ) + .await +} + +/// Shared body of the three kick-off wrappers. +async fn run_kickoff( + state: AppState, + tenant: TenantExtractor, + version: FhirVersionExtractor, + level: ExportLevel, + request: Request, +) -> RestResult +where + S: ExportResourceStore + Send + Sync + 'static, +{ + let method = request.method().clone(); + let headers = request.headers().clone(); + let uri = request.uri().clone(); + let raw_query = uri.query().map(|q| q.to_string()); + let full_url = format!( + "{}{}", + state.base_url().trim_end_matches('/'), + uri.path_and_query() + .map(|pq| pq.as_str()) + .unwrap_or(uri.path()) + ); + let principal = request.extensions().get::().cloned(); + + let body_json: Option = if method == Method::POST { + let bytes = axum::body::to_bytes(request.into_body(), 1024 * 1024) + .await + .map_err(|e| bad_request(format!("failed to read request body: {e}")))?; + if bytes.is_empty() { + None + } else { + Some( + serde_json::from_slice(&bytes) + .map_err(|e| bad_request(format!("invalid Parameters JSON: {e}")))?, + ) + } + } else { + None + }; + + kickoff_export( + &state, + &tenant, + principal.as_ref(), + level, + version.storage_version(), + &method, + &headers, + raw_query.as_deref(), + &full_url, + body_json.as_ref(), + ) + .await +} + +/// `GET /export-status/{job_id}` — poll status / fetch manifest. +pub async fn export_status_handler( + State(state): State>, + Path(job_id): Path, + tenant: TenantExtractor, + request: Request, +) -> RestResult +where + S: ExportResourceStore + Send + Sync + 'static, +{ + let cfg = state.bulk_export_config(); + if !cfg.enabled { + return Err(not_implemented()); + } + let jobs = state.bulk_export_jobs().ok_or_else(not_implemented)?; + let output = state.bulk_export_output().ok_or_else(not_implemented)?; + let principal = request.extensions().get::().cloned(); + let job_id = ExportJobId::from_string(job_id); + + // Ownership check first (do not leak existence). + let meta = match jobs + .get_export_job_metadata(tenant.context(), &job_id) + .await + { + Ok(m) => m, + Err(_) => { + return Err(RestError::NotFound { + resource_type: "export-job".to_string(), + id: job_id.to_string(), + }); + } + }; + if !owns_job(principal.as_ref(), meta.owner_subject.as_deref()) { + return Err(RestError::NotFound { + resource_type: "export-job".to_string(), + id: job_id.to_string(), + }); + } + + match meta.status { + ExportStatus::Accepted | ExportStatus::InProgress => { + let progress = jobs + .get_export_status(tenant.context(), &job_id) + .await + .map_err(map_storage_err)?; + let x_progress = progress + .current_type + .clone() + .unwrap_or_else(|| format!("{:.0}%", progress.overall_progress() * 100.0)); + Response::builder() + .status(StatusCode::ACCEPTED) + .header("X-Progress", x_progress) + .header("Retry-After", "120") + .body(Body::empty()) + .map_err(|e| RestError::InternalError { + message: e.to_string(), + }) + } + ExportStatus::Complete => { + let raw = jobs + .get_export_manifest(tenant.context(), &job_id) + .await + .map_err(map_storage_err)?; + let ttl = Duration::from_secs(cfg.file_url_ttl_secs); + let mut output_files = Vec::new(); + let mut error_files = Vec::new(); + let mut requires_token = true; + for entry in &raw.output { + let url = output + .download_url(&entry.key, ttl) + .await + .map_err(map_storage_err)?; + requires_token = url.requires_access_token; + output_files.push(ExportOutputFile { + resource_type: entry.resource_type.clone(), + url: url.url, + count: Some(entry.count), + }); + } + for entry in &raw.errors { + let url = output + .download_url(&entry.key, ttl) + .await + .map_err(map_storage_err)?; + error_files.push(ExportOutputFile { + resource_type: entry.resource_type.clone(), + url: url.url, + count: Some(entry.count), + }); + } + let manifest = ExportManifest { + transaction_time: raw.transaction_time, + request: raw.request_url, + requires_access_token: requires_token, + output: output_files, + error: error_files, + deleted: Vec::new(), + link: Vec::new(), + message: None, + extension: None, + }; + let body = serde_json::to_vec(&manifest).map_err(|e| RestError::InternalError { + message: e.to_string(), + })?; + Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(Body::from(body)) + .map_err(|e| RestError::InternalError { + message: e.to_string(), + }) + } + ExportStatus::Error => Err(RestError::InternalError { + message: "export job failed".to_string(), + }), + ExportStatus::Cancelled => Err(RestError::NotFound { + resource_type: "export-job".to_string(), + id: job_id.to_string(), + }), + } +} + +/// `DELETE /export-status/{job_id}` — cancel + delete a job. +pub async fn export_cancel_handler( + State(state): State>, + Path(job_id): Path, + tenant: TenantExtractor, + request: Request, +) -> RestResult +where + S: ExportResourceStore + Send + Sync + 'static, +{ + let cfg = state.bulk_export_config(); + if !cfg.enabled { + return Err(not_implemented()); + } + let jobs = state.bulk_export_jobs().ok_or_else(not_implemented)?; + let output = state.bulk_export_output().ok_or_else(not_implemented)?; + let principal = request.extensions().get::().cloned(); + let job_id = ExportJobId::from_string(job_id); + + let meta = match jobs + .get_export_job_metadata(tenant.context(), &job_id) + .await + { + Ok(m) => m, + Err(_) => { + return Err(RestError::NotFound { + resource_type: "export-job".to_string(), + id: job_id.to_string(), + }); + } + }; + if !owns_job(principal.as_ref(), meta.owner_subject.as_deref()) { + return Err(RestError::NotFound { + resource_type: "export-job".to_string(), + id: job_id.to_string(), + }); + } + + // Cancel if still active (cooperative — worker observes it). + if meta.status.is_active() { + let _ = jobs.cancel_export(tenant.context(), &job_id).await; + } + // REST owns the two-step teardown: outputs first, then job rows. + output + .delete_job_outputs(tenant.context(), &job_id) + .await + .map_err(map_storage_err)?; + jobs.delete_export(tenant.context(), &job_id) + .await + .map_err(map_storage_err)?; + + emit_export_audit( + &state, + principal.as_ref(), + "delete", + job_id.as_str(), + &meta.level, + &[], + "0", + ) + .await; + + Response::builder() + .status(StatusCode::ACCEPTED) + .body(Body::empty()) + .map_err(|e| RestError::InternalError { + message: e.to_string(), + }) +} + +/// `GET /export-file/{job_id}/{part}` — HFS-served NDJSON download. +pub async fn export_download_handler( + State(state): State>, + Path((job_id, part)): Path<(String, String)>, + tenant: TenantExtractor, + request: Request, +) -> RestResult +where + S: ExportResourceStore + Send + Sync + 'static, +{ + let cfg = state.bulk_export_config(); + if !cfg.enabled { + return Err(not_implemented()); + } + let jobs = state.bulk_export_jobs().ok_or_else(not_implemented)?; + let output = state.bulk_export_output().ok_or_else(not_implemented)?; + let file_auth = state.bulk_export_file_auth().ok_or_else(not_implemented)?; + let principal = request.extensions().get::().cloned(); + let job_id = ExportJobId::from_string(job_id); + + let file_meta = jobs + .get_export_file_metadata(tenant.context(), &job_id, &part) + .await + .map_err(|_| RestError::NotFound { + resource_type: "export-file".to_string(), + id: format!("{job_id}/{part}"), + })?; + + file_auth + .authorize_download( + principal.as_ref(), + tenant.context(), + file_meta.job_owner_subject.as_deref(), + &file_meta, + ) + .await + .map_err(|e| RestError::Forbidden { + message: e.to_string(), + })?; + + emit_export_audit( + &state, + principal.as_ref(), + "download", + job_id.as_str(), + &ExportLevel::System, + std::slice::from_ref(&file_meta.resource_type), + "0", + ) + .await; + + let mut reader = output + .open_reader(&file_meta.key) + .await + .map_err(map_storage_err)?; + let mut bytes = Vec::new(); + tokio::io::AsyncReadExt::read_to_end(&mut reader, &mut bytes) + .await + .map_err(|e| RestError::InternalError { + message: format!("failed to read export file: {e}"), + })?; + + Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/fhir+ndjson") + .body(Body::from(bytes)) + .map_err(|e| RestError::InternalError { + message: e.to_string(), + }) +} + +/// Emits a bulk-export lifecycle `AuditEvent` when an audit sink is configured. +async fn emit_export_audit( + state: &AppState, + principal: Option<&Principal>, + operation: &str, + job_id: &str, + level: &ExportLevel, + resource_types: &[String], + outcome: &str, +) where + S: ResourceStorage, +{ + let Some(sink) = state.audit_sink() else { + return; + }; + let mut builder = helios_audit::AuditEventBuilder::new(state.audit_source_observer()) + .event_type( + "http://terminology.hl7.org/CodeSystem/audit-event-type", + "object", + ) + .action(helios_audit::AuditAction::Execute) + .outcome(outcome) + .detail("audit-operation", "bulk-export") + .detail("bulk-export-operation", operation) + .detail("job-id", job_id) + .detail("export-level", level.to_string()); + if !resource_types.is_empty() { + builder = builder.detail("resource-types", resource_types.join(",")); + } + if let Some(p) = principal { + builder = builder.agent(&p.subject, None, true); + } + sink.record(builder.build()).await; +} + +/// Ownership check: the principal owns the job, holds a `system/*` scope, or +/// auth is disabled (no principal). +fn owns_job(principal: Option<&Principal>, owner_subject: Option<&str>) -> bool { + match principal { + None => true, // auth disabled — no ownership enforcement + Some(p) => { + owner_subject == Some(p.subject.as_str()) + || p.scopes + .scopes() + .iter() + .any(|s| s.resource_type == helios_auth::scope::ResourceTypeSpec::Wildcard) + } + } +} diff --git a/crates/rest/src/handlers/capabilities.rs b/crates/rest/src/handlers/capabilities.rs index 3a99f1b93..664d7c9c8 100644 --- a/crates/rest/src/handlers/capabilities.rs +++ b/crates/rest/src/handlers/capabilities.rs @@ -139,7 +139,18 @@ where formats.push("application/fhir+xml"); } - serde_json::json!({ + let mut operations = vec![ + serde_json::json!({ + "name": "validate", + "definition": "http://hl7.org/fhir/OperationDefinition/Resource-validate" + }), + serde_json::json!({ + "name": "versions", + "definition": "http://hl7.org/fhir/OperationDefinition/CapabilityStatement-versions" + }), + ]; + + let mut statement = serde_json::json!({ "resourceType": "CapabilityStatement", "status": "active", "date": chrono::Utc::now().to_rfc3339(), @@ -164,23 +175,34 @@ where { "code": "history-system" }, { "code": "search-system" } ], - "operation": [ - { - "name": "validate", - "definition": "http://hl7.org/fhir/OperationDefinition/Resource-validate" - }, - { - "name": "versions", - "definition": "http://hl7.org/fhir/OperationDefinition/CapabilityStatement-versions" - } - ] }] - }) + }); + + // Advertise Bulk Data Export operations when enabled. + if state.bulk_export_config().enabled { + operations.push(serde_json::json!({ + "name": "export", + "definition": "http://hl7.org/fhir/uv/bulkdata/OperationDefinition/export" + })); + operations.push(serde_json::json!({ + "name": "patient-export", + "definition": "http://hl7.org/fhir/uv/bulkdata/OperationDefinition/patient-export" + })); + operations.push(serde_json::json!({ + "name": "group-export", + "definition": "http://hl7.org/fhir/uv/bulkdata/OperationDefinition/group-export" + })); + statement["instantiates"] = + serde_json::json!(["http://hl7.org/fhir/uv/bulkdata/CapabilityStatement/bulk-data"]); + } + + statement["rest"][0]["operation"] = serde_json::Value::Array(operations); + statement } /// Builds the capability entry for a resource type. fn build_resource_capability(resource_type: &str) -> serde_json::Value { - serde_json::json!({ + let mut entry = serde_json::json!({ "type": resource_type, "profile": format!("http://hl7.org/fhir/StructureDefinition/{}", resource_type), "interaction": [ @@ -204,7 +226,26 @@ fn build_resource_capability(resource_type: &str) -> serde_json::Value { "searchInclude": ["*"], "searchRevInclude": ["*"], "searchParam": build_common_search_params() - }) + }); + // Bulk Data Access IG: per-resource `$export` operation entries on Patient + // and Group, in addition to the system-level `$export` advertised at + // `rest[0].operation`. + match resource_type { + "Patient" => { + entry["operation"] = serde_json::json!([{ + "name": "export", + "definition": "http://hl7.org/fhir/uv/bulkdata/OperationDefinition/patient-export" + }]); + } + "Group" => { + entry["operation"] = serde_json::json!([{ + "name": "export", + "definition": "http://hl7.org/fhir/uv/bulkdata/OperationDefinition/group-export" + }]); + } + _ => {} + } + entry } /// Builds common search parameters supported by all resources. diff --git a/crates/rest/src/handlers/compartment.rs b/crates/rest/src/handlers/compartment.rs index 44e2808ec..2501bd075 100644 --- a/crates/rest/src/handlers/compartment.rs +++ b/crates/rest/src/handlers/compartment.rs @@ -14,7 +14,6 @@ use axum::{ http::StatusCode, response::{IntoResponse, Response}, }; -use helios_fhir::FhirVersion; use helios_persistence::core::{ResourceStorage, SearchProvider}; use tracing::debug; @@ -22,56 +21,6 @@ use crate::error::{RestError, RestResult}; use crate::extractors::{FhirVersionExtractor, TenantExtractor, build_search_query_from_map}; use crate::state::AppState; -/// Returns compartment search parameters for a specific FHIR version. -/// -/// This function dispatches to the version-specific generated compartment lookup -/// functions in the helios_fhir crate. The compartment definitions are generated -/// from the official FHIR CompartmentDefinition resources. -/// -/// # Arguments -/// -/// * `version` - The FHIR version to use for lookup -/// * `compartment_type` - The compartment type (e.g., "Patient", "Encounter") -/// * `resource_type` - The target resource type (e.g., "Observation") -/// -/// # Returns -/// -/// A static slice of search parameter names that link the resource to the compartment. -/// Returns an empty slice if the resource is not a member of the compartment. -fn get_compartment_params_for_version( - version: FhirVersion, - compartment_type: &str, - resource_type: &str, -) -> Result<&'static [&'static str], String> { - match version { - #[cfg(feature = "R4")] - FhirVersion::R4 => Ok(helios_fhir::r4::get_compartment_params( - compartment_type, - resource_type, - )), - #[cfg(feature = "R4B")] - FhirVersion::R4B => Ok(helios_fhir::r4b::get_compartment_params( - compartment_type, - resource_type, - )), - #[cfg(feature = "R5")] - FhirVersion::R5 => Ok(helios_fhir::r5::get_compartment_params( - compartment_type, - resource_type, - )), - #[cfg(feature = "R6")] - FhirVersion::R6 => Ok(helios_fhir::r6::get_compartment_params( - compartment_type, - resource_type, - )), - #[allow(unreachable_patterns)] - _ => Err(format!( - "FHIR version {:?} is not enabled in this build", - version - )), - } -} - /// Handler for compartment search. /// /// Searches for resources within a specific compartment. @@ -111,8 +60,7 @@ where // Get the reference parameters for this compartment/target combination let fhir_version = version.storage_version(); let ref_params = - get_compartment_params_for_version(fhir_version, &compartment_type, &target_type) - .map_err(|message| RestError::InternalError { message })?; + helios_fhir::get_compartment_params(fhir_version, &compartment_type, &target_type); // Check if the resource type is a member of the compartment if ref_params.is_empty() { @@ -298,13 +246,13 @@ mod urlencoding { #[cfg(test)] mod tests { use super::*; + use helios_fhir::FhirVersion; #[test] fn test_get_compartment_params_patient_observation() { // Test that Patient compartment includes Observation with subject and performer params let params = - get_compartment_params_for_version(FhirVersion::default(), "Patient", "Observation") - .unwrap(); + helios_fhir::get_compartment_params(FhirVersion::default(), "Patient", "Observation"); assert!(!params.is_empty()); assert!(params.contains(&"subject")); } @@ -313,8 +261,7 @@ mod tests { fn test_get_compartment_params_patient_immunization() { // Test that Patient compartment includes Immunization with patient param let params = - get_compartment_params_for_version(FhirVersion::default(), "Patient", "Immunization") - .unwrap(); + helios_fhir::get_compartment_params(FhirVersion::default(), "Patient", "Immunization"); assert!(!params.is_empty()); assert!(params.contains(&"patient")); } @@ -323,8 +270,7 @@ mod tests { fn test_get_compartment_params_encounter_procedure() { // Test that Encounter compartment includes Procedure with encounter param let params = - get_compartment_params_for_version(FhirVersion::default(), "Encounter", "Procedure") - .unwrap(); + helios_fhir::get_compartment_params(FhirVersion::default(), "Encounter", "Procedure"); assert!(!params.is_empty()); assert!(params.contains(&"encounter")); } @@ -333,8 +279,7 @@ mod tests { fn test_get_compartment_params_unknown() { // Test that unknown resource types return an empty slice let params = - get_compartment_params_for_version(FhirVersion::default(), "Patient", "UnknownType") - .unwrap(); + helios_fhir::get_compartment_params(FhirVersion::default(), "Patient", "UnknownType"); assert!(params.is_empty()); } @@ -342,12 +287,11 @@ mod tests { fn test_get_compartment_params_multiple() { // Test that some resources have multiple compartment params // AllergyIntolerance in Patient compartment has: patient, recorder, asserter - let params = get_compartment_params_for_version( + let params = helios_fhir::get_compartment_params( FhirVersion::default(), "Patient", "AllergyIntolerance", - ) - .unwrap(); + ); assert!( params.len() >= 2, "Expected multiple params for AllergyIntolerance" diff --git a/crates/rest/src/handlers/mod.rs b/crates/rest/src/handlers/mod.rs index 3fedc4e47..8549f79fe 100644 --- a/crates/rest/src/handlers/mod.rs +++ b/crates/rest/src/handlers/mod.rs @@ -16,6 +16,7 @@ //! - [`health`] - Health check endpoint pub mod batch; +pub mod bulk_export; pub mod capabilities; pub mod compartment; pub mod create; @@ -52,6 +53,10 @@ pub(crate) fn extract_patient_from_resource( // Re-export handlers for convenience pub use batch::batch_handler; +pub use bulk_export::{ + export_cancel_handler, export_download_handler, export_status_handler, + group_export_kickoff_handler, patient_export_kickoff_handler, system_export_kickoff_handler, +}; pub use capabilities::capabilities_handler; pub use compartment::compartment_search_handler; pub use create::create_handler; diff --git a/crates/rest/src/lib.rs b/crates/rest/src/lib.rs index 71f45ebbe..766c8e9a2 100644 --- a/crates/rest/src/lib.rs +++ b/crates/rest/src/lib.rs @@ -138,6 +138,7 @@ #![warn(missing_docs)] #![warn(rustdoc::missing_crate_level_docs)] +pub mod bulk_export_auth; pub mod config; pub mod error; pub mod extractors; @@ -196,6 +197,9 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -234,6 +238,9 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -247,6 +254,17 @@ where ) } +/// The bulk-export job store, output store, and download authorizer, wired +/// into [`AppState`] by [`create_app_with_auth_and_bulk_export`]. +pub struct BulkExportBundle { + /// Job-state store (claim + worker storage + lifecycle). + pub jobs: Arc, + /// Output store for NDJSON parts. + pub output: Arc, + /// Download authorizer. + pub file_auth: Arc, +} + /// Creates the Axum application with custom configuration and optional authentication. /// /// When `auth_state` is `Some`, authentication and authorization middleware @@ -267,6 +285,75 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + + Send + + Sync + + 'static, +{ + build_app( + Arc::new(storage), + config, + auth_config, + auth_state, + audit_state, + None, + ) +} + +/// Like [`create_app_with_auth`], but also wires the bulk-export subsystem +/// (job store, output store, download authorizer) into the application state. +pub fn create_app_with_auth_and_bulk_export( + storage: Arc, + config: ServerConfig, + auth_config: helios_auth::AuthConfig, + auth_state: Option>, + audit_state: Option>, + bulk_export: BulkExportBundle, +) -> Router +where + S: ResourceStorage + + ConditionalStorage + + SearchProvider + + InstanceHistoryProvider + + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + + Send + + Sync + + 'static, +{ + build_app( + storage, + config, + auth_config, + auth_state, + audit_state, + Some(bulk_export), + ) +} + +/// Internal app builder shared by [`create_app_with_auth`] and +/// [`create_app_with_auth_and_bulk_export`]. +fn build_app( + storage: Arc, + config: ServerConfig, + auth_config: helios_auth::AuthConfig, + auth_state: Option>, + audit_state: Option>, + bulk_export: Option, +) -> Router +where + S: ResourceStorage + + ConditionalStorage + + SearchProvider + + InstanceHistoryProvider + + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -296,7 +383,7 @@ where // Create application state let state = AppState::with_auth_and_audit( - Arc::new(storage), + storage, config.clone(), auth_config, auth_state.clone(), @@ -304,6 +391,12 @@ where app_audit_source_observer, ); + // Wire the bulk-export subsystem if provided. + let state = match bulk_export { + Some(b) => state.with_bulk_export(b.jobs, b.output, b.file_auth), + None => state, + }; + // Inject subscription engine if enabled #[cfg(feature = "subscriptions")] let state = { diff --git a/crates/rest/src/routing/fhir_routes.rs b/crates/rest/src/routing/fhir_routes.rs index 2b79524c0..c2fce31fb 100644 --- a/crates/rest/src/routing/fhir_routes.rs +++ b/crates/rest/src/routing/fhir_routes.rs @@ -59,6 +59,9 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -78,6 +81,9 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -96,6 +102,9 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -119,6 +128,9 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -181,6 +193,9 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -198,6 +213,30 @@ where ) .route("/_history", get(handlers::history_system_handler::)) .route("/", post(handlers::batch_handler::)) + // Bulk Data Export ($export) — operation routes precede the catch-all. + .route( + "/$export", + get(handlers::system_export_kickoff_handler::) + .post(handlers::system_export_kickoff_handler::), + ) + .route( + "/Patient/$export", + get(handlers::patient_export_kickoff_handler::) + .post(handlers::patient_export_kickoff_handler::), + ) + .route( + "/Group/{id}/$export", + get(handlers::group_export_kickoff_handler::) + .post(handlers::group_export_kickoff_handler::), + ) + .route( + "/export-status/{job_id}", + get(handlers::export_status_handler::).delete(handlers::export_cancel_handler::), + ) + .route( + "/export-file/{job_id}/{part}", + get(handlers::export_download_handler::), + ) // Type-level routes .route("/{resource_type}", get(handlers::search_get_handler::)) .route("/{resource_type}", post(handlers::create_handler::)) diff --git a/crates/rest/src/state.rs b/crates/rest/src/state.rs index 99025e24b..323d1034c 100644 --- a/crates/rest/src/state.rs +++ b/crates/rest/src/state.rs @@ -8,9 +8,10 @@ use std::sync::Arc; use helios_audit::AuditSink; use helios_auth::AuthConfig; -use helios_persistence::core::ResourceStorage; +use helios_persistence::core::{BulkExportJobStore, ExportOutputStore, ResourceStorage}; -use crate::config::ServerConfig; +use crate::bulk_export_auth::ExportFileAuth; +use crate::config::{BulkExportConfig, ServerConfig}; use crate::middleware::auth::AuthMiddlewareState; /// Shared application state for the REST API. @@ -55,6 +56,18 @@ pub struct AppState { /// Optional subscription engine for FHIR topic-based subscriptions. #[cfg(feature = "subscriptions")] subscription_engine: Option>, + + /// Bulk export job-state store (claim + worker storage + lifecycle). + bulk_export_jobs: Option>, + + /// Bulk export output store (NDJSON files). + bulk_export_output: Option>, + + /// Bulk export download authorizer. + bulk_export_file_auth: Option>, + + /// Bulk export configuration. + bulk_export_config: Arc, } // Manually implement Clone since S is wrapped in Arc and doesn't need to be Clone @@ -69,6 +82,10 @@ impl Clone for AppState { audit_source_observer: self.audit_source_observer.clone(), #[cfg(feature = "subscriptions")] subscription_engine: self.subscription_engine.clone(), + bulk_export_jobs: self.bulk_export_jobs.clone(), + bulk_export_output: self.bulk_export_output.clone(), + bulk_export_file_auth: self.bulk_export_file_auth.clone(), + bulk_export_config: Arc::clone(&self.bulk_export_config), } } } @@ -81,6 +98,7 @@ impl AppState { /// * `storage` - The storage backend (wrapped in Arc) /// * `config` - Server configuration pub fn new(storage: Arc, config: ServerConfig) -> Self { + let bulk_export_config = Arc::new(config.bulk_export.clone()); Self { storage, config: Arc::new(config), @@ -90,6 +108,10 @@ impl AppState { audit_source_observer: "Device/hfs".to_string(), #[cfg(feature = "subscriptions")] subscription_engine: None, + bulk_export_jobs: None, + bulk_export_output: None, + bulk_export_file_auth: None, + bulk_export_config, } } @@ -112,6 +134,7 @@ impl AppState { audit_sink: Option>, audit_source_observer: impl Into, ) -> Self { + let bulk_export_config = Arc::new(config.bulk_export.clone()); Self { storage, config: Arc::new(config), @@ -121,9 +144,46 @@ impl AppState { audit_source_observer: audit_source_observer.into(), #[cfg(feature = "subscriptions")] subscription_engine: None, + bulk_export_jobs: None, + bulk_export_output: None, + bulk_export_file_auth: None, + bulk_export_config, } } + /// Wires the bulk-export job store, output store, and file authorizer. + pub fn with_bulk_export( + mut self, + jobs: Arc, + output: Arc, + file_auth: Arc, + ) -> Self { + self.bulk_export_jobs = Some(jobs); + self.bulk_export_output = Some(output); + self.bulk_export_file_auth = Some(file_auth); + self + } + + /// Returns the bulk-export job store, if configured. + pub fn bulk_export_jobs(&self) -> Option<&Arc> { + self.bulk_export_jobs.as_ref() + } + + /// Returns the bulk-export output store, if configured. + pub fn bulk_export_output(&self) -> Option<&Arc> { + self.bulk_export_output.as_ref() + } + + /// Returns the bulk-export download authorizer, if configured. + pub fn bulk_export_file_auth(&self) -> Option<&Arc> { + self.bulk_export_file_auth.as_ref() + } + + /// Returns the bulk-export configuration. + pub fn bulk_export_config(&self) -> &BulkExportConfig { + &self.bulk_export_config + } + /// Sets the subscription engine on this AppState. #[cfg(feature = "subscriptions")] pub fn with_subscription_engine( diff --git a/crates/rest/tests/bulk_export.rs b/crates/rest/tests/bulk_export.rs new file mode 100644 index 000000000..a9939852a --- /dev/null +++ b/crates/rest/tests/bulk_export.rs @@ -0,0 +1,440 @@ +//! Integration tests for the FHIR Bulk Data Export (`$export`) endpoints. +//! +//! Exercises the kick-off → poll → manifest → download → delete lifecycle for +//! all three export levels, plus parameter validation, the `ExportStatus` → +//! HTTP mapping, and the unsupported-parameter behavior. + +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; + +use axum::http::StatusCode; +use axum_test::TestServer; +use helios_fhir::FhirVersion; +use helios_persistence::backends::local_fs::LocalFsOutputStore; +use helios_persistence::backends::sqlite::{SqliteBackend, SqliteBackendConfig}; +use helios_persistence::core::{ + BulkExportJobStore, DefaultExportWorker, ExportClaimStrategy, ExportOutputStore, + ResourceStorage, WorkerId, +}; +use helios_persistence::tenant::{TenantContext, TenantId, TenantPermissions}; +use helios_rest::ServerConfig; +use helios_rest::bulk_export_auth::BearerScopeAuth; +use helios_rest::config::{MultitenancyConfig, TenantRoutingMode}; +use serde_json::{Value, json}; + +/// Builds a test server with the bulk-export subsystem wired in, plus the +/// SQLite backend and the local-FS output store (for driving a worker). +async fn create_bulk_export_server() -> ( + TestServer, + Arc, + Arc, + tempfile::TempDir, +) { + let data_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(|p| p.parent()) + .map(|p| p.join("data")) + .unwrap_or_else(|| PathBuf::from("data")); + + let backend_config = SqliteBackendConfig { + data_dir: Some(data_dir), + ..Default::default() + }; + let backend = Arc::new( + SqliteBackend::with_config(":memory:", backend_config).expect("create SQLite backend"), + ); + backend.init_schema().expect("init schema"); + + let tmp = tempfile::tempdir().expect("tempdir"); + let output = Arc::new(LocalFsOutputStore::new(tmp.path(), "http://localhost:8080")); + let file_auth = Arc::new(BearerScopeAuth); + + let config = ServerConfig { + multitenancy: MultitenancyConfig { + routing_mode: TenantRoutingMode::HeaderOnly, + ..Default::default() + }, + base_url: "http://localhost:8080".to_string(), + default_tenant: "test-tenant".to_string(), + ..ServerConfig::for_testing() + }; + + let state = helios_rest::AppState::new(Arc::clone(&backend), config).with_bulk_export( + backend.clone() as Arc, + output.clone() as Arc, + file_auth, + ); + let app = helios_rest::routing::fhir_routes::create_routes(state); + let server = TestServer::new(app).expect("create test server"); + + (server, backend, output, tmp) +} + +fn test_tenant() -> TenantContext { + TenantContext::new( + TenantId::new("test-tenant"), + TenantPermissions::full_access(), + ) +} + +/// Drains all currently-claimable export jobs by running a worker synchronously. +async fn drain_workers(backend: &Arc, output: &Arc) { + let worker_id = WorkerId::new("test-worker"); + let worker = DefaultExportWorker::new( + backend.clone(), + backend.clone(), + output.clone(), + worker_id.clone(), + ); + while let Some(lease) = backend + .claim_next(&worker_id, Duration::from_secs(60)) + .await + .expect("claim_next") + { + worker.run_job(lease).await.expect("run_job"); + } +} + +/// Seeds N Patient resources. +async fn seed_patients(backend: &Arc, n: usize) { + let tenant = test_tenant(); + for i in 0..n { + backend + .create( + &tenant, + "Patient", + json!({"resourceType": "Patient", "id": format!("p{i}")}), + FhirVersion::default(), + ) + .await + .expect("seed patient"); + } +} + +#[tokio::test] +async fn test_system_export_full_lifecycle() { + let (server, backend, output, _tmp) = create_bulk_export_server().await; + seed_patients(&backend, 3).await; + + // Kick-off — requires Prefer: respond-async. + let resp = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_type", "Patient") + .await; + assert_eq!(resp.status_code(), StatusCode::ACCEPTED); + let status_url = resp + .headers() + .get("content-location") + .expect("Content-Location header") + .to_str() + .unwrap() + .to_string(); + let status_path = status_url.strip_prefix("http://localhost:8080").unwrap(); + + // Poll before the worker runs — still 202. + let polling = server + .get(status_path) + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(polling.status_code(), StatusCode::ACCEPTED); + assert!(polling.headers().get("retry-after").is_some()); + + // Run the worker. + drain_workers(&backend, &output).await; + + // Poll again — now 200 + manifest. + let done = server + .get(status_path) + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(done.status_code(), StatusCode::OK); + let manifest: Value = done.json(); + assert!(manifest["transactionTime"].is_string()); + assert_eq!( + manifest["request"], + "http://localhost:8080/$export?_type=Patient" + ); + assert_eq!(manifest["requiresAccessToken"], true); + let output_files = manifest["output"].as_array().expect("output array"); + let total: u64 = output_files + .iter() + .map(|f| f["count"].as_u64().unwrap_or(0)) + .sum(); + assert_eq!(total, 3); + + // Download the first output file. + let file_url = output_files[0]["url"].as_str().unwrap(); + let file_path = file_url.strip_prefix("http://localhost:8080").unwrap(); + let download = server + .get(file_path) + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(download.status_code(), StatusCode::OK); + assert_eq!( + download.headers().get("content-type").unwrap(), + "application/fhir+ndjson" + ); + assert_eq!(download.text().lines().count(), 3); + + // Delete the job. + let deleted = server + .delete(status_path) + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(deleted.status_code(), StatusCode::ACCEPTED); + + // Status URL is now gone. + let gone = server + .get(status_path) + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(gone.status_code(), StatusCode::NOT_FOUND); +} + +#[tokio::test] +async fn test_patient_and_group_export_levels() { + let (server, backend, output, _tmp) = create_bulk_export_server().await; + seed_patients(&backend, 2).await; + + // Patient-level kick-off. + let resp = server + .get("/Patient/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .await; + assert_eq!(resp.status_code(), StatusCode::ACCEPTED); + + // Group-level kick-off. + let tenant = test_tenant(); + backend + .create( + &tenant, + "Group", + json!({"resourceType": "Group", "id": "g1", "member": []}), + FhirVersion::default(), + ) + .await + .unwrap(); + let resp = server + .get("/Group/g1/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .await; + assert_eq!(resp.status_code(), StatusCode::ACCEPTED); + + drain_workers(&backend, &output).await; +} + +#[tokio::test] +async fn test_kickoff_requires_respond_async() { + let (server, _backend, _output, _tmp) = create_bulk_export_server().await; + let resp = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(resp.status_code(), StatusCode::BAD_REQUEST); +} + +#[tokio::test] +async fn test_unsupported_output_format_rejected() { + let (server, _backend, _output, _tmp) = create_bulk_export_server().await; + let resp = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_outputFormat", "text/csv") + .await; + assert_eq!(resp.status_code(), StatusCode::BAD_REQUEST); +} + +#[tokio::test] +async fn test_unsupported_param_strict_vs_lenient() { + let (server, _backend, _output, _tmp) = create_bulk_export_server().await; + + // strict → 400 + let strict = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async, handling=strict") + .add_query_param("includeAssociatedData", "LatestProvenanceResources") + .await; + assert_eq!(strict.status_code(), StatusCode::BAD_REQUEST); + + // no handling directive (lenient default) → accepted + let lenient = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("includeAssociatedData", "LatestProvenanceResources") + .await; + assert_eq!(lenient.status_code(), StatusCode::ACCEPTED); +} + +#[tokio::test] +async fn test_type_filter_validation() { + let (server, _backend, _output, _tmp) = create_bulk_export_server().await; + + // _typeFilter whose resource type is not in _type → 400 + let mismatch = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_type", "Patient") + .add_query_param("_typeFilter", "Observation?status=final") + .await; + assert_eq!(mismatch.status_code(), StatusCode::BAD_REQUEST); + + // _typeFilter carrying a result-control param → 400 + let bad_param = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_type", "Observation") + .add_query_param("_typeFilter", "Observation?_sort=date") + .await; + assert_eq!(bad_param.status_code(), StatusCode::BAD_REQUEST); +} + +#[tokio::test] +async fn test_status_and_download_unknown_job() { + let (server, _backend, _output, _tmp) = create_bulk_export_server().await; + + let status = server + .get("/export-status/nonexistent") + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(status.status_code(), StatusCode::NOT_FOUND); + + let download = server + .get("/export-file/nonexistent/Patient-0") + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(download.status_code(), StatusCode::NOT_FOUND); +} + +#[tokio::test] +async fn test_post_kickoff_with_parameters_body() { + let (server, backend, output, _tmp) = create_bulk_export_server().await; + seed_patients(&backend, 2).await; + + // POST kickoff using a FHIR Parameters resource body. + let body = json!({ + "resourceType": "Parameters", + "parameter": [ + {"name": "_type", "valueString": "Patient"} + ] + }); + let resp = server + .post("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .json(&body) + .await; + assert_eq!(resp.status_code(), StatusCode::ACCEPTED); + assert!(resp.headers().get("content-location").is_some()); + + drain_workers(&backend, &output).await; +} + +#[tokio::test] +async fn test_since_parameter_accepted() { + let (server, backend, output, _tmp) = create_bulk_export_server().await; + seed_patients(&backend, 1).await; + + let resp = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_since", "2020-01-01T00:00:00Z") + .await; + assert_eq!(resp.status_code(), StatusCode::ACCEPTED); + + drain_workers(&backend, &output).await; +} + +#[tokio::test] +async fn test_invalid_since_rejected() { + let (server, _backend, _output, _tmp) = create_bulk_export_server().await; + + let resp = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_since", "not-a-date") + .await; + assert_eq!(resp.status_code(), StatusCode::BAD_REQUEST); +} + +#[tokio::test] +async fn test_elements_parameter_accepted() { + let (server, backend, output, _tmp) = create_bulk_export_server().await; + seed_patients(&backend, 1).await; + + let resp = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_type", "Patient") + .add_query_param("_elements", "id,name") + .await; + assert_eq!(resp.status_code(), StatusCode::ACCEPTED); + let status_url = resp + .headers() + .get("content-location") + .unwrap() + .to_str() + .unwrap() + .to_string(); + let status_path = status_url.strip_prefix("http://localhost:8080").unwrap(); + + drain_workers(&backend, &output).await; + + let done = server + .get(status_path) + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(done.status_code(), StatusCode::OK); +} + +#[tokio::test] +async fn test_valid_type_filter_accepted() { + let (server, backend, output, _tmp) = create_bulk_export_server().await; + seed_patients(&backend, 1).await; + + // _typeFilter with valid resource type (in _type) and allowed search param. + let resp = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_type", "Patient") + .add_query_param("_typeFilter", "Patient?active=true") + .await; + assert_eq!(resp.status_code(), StatusCode::ACCEPTED); + + drain_workers(&backend, &output).await; +} + +#[tokio::test] +async fn test_capability_statement_advertises_export() { + let (server, _backend, _output, _tmp) = create_bulk_export_server().await; + let resp = server + .get("/metadata") + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(resp.status_code(), StatusCode::OK); + let cs: Value = resp.json(); + let ops = cs["rest"][0]["operation"] + .as_array() + .expect("operation array"); + let names: Vec<&str> = ops.iter().filter_map(|o| o["name"].as_str()).collect(); + assert!(names.contains(&"export")); + assert!(names.contains(&"patient-export")); + assert!(names.contains(&"group-export")); + assert_eq!( + cs["instantiates"][0], + "http://hl7.org/fhir/uv/bulkdata/CapabilityStatement/bulk-data" + ); +} diff --git a/docker/bulk-export/README.md b/docker/bulk-export/README.md new file mode 100644 index 000000000..6a4ca7d46 --- /dev/null +++ b/docker/bulk-export/README.md @@ -0,0 +1,87 @@ +# Bulk Data Export + +HFS implements the [FHIR Bulk Data Access IG](https://build.fhir.org/ig/HL7/bulk-data/) +`$export` operation asynchronously: kick-off, poll, manifest, download, and +delete. + +This directory contains a provided docker-compose example for running HFS with +Bulk Data Export job state in PostgreSQL and export output in S3-compatible +storage via MinIO. It is intended for local manual testing, demos, and trying +Bulk Data clients such as Inferno against a multi-instance-style topology. + +This compose file is not used by the GitHub Actions bulk export or Inferno +workflow tests. Those workflows start their backing services directly so they +can control ports, artifacts, and per-job isolation. + +## Stack + +- HFS +- PostgreSQL for primary storage and bulk export job state +- MinIO for S3-compatible export output +- Keycloak using `docker/keycloak/realm.json` + +## Endpoints + +| Operation | Method | URL | +|-----------|--------|-----| +| system kick-off | GET / POST | `/$export` | +| patient kick-off | GET / POST | `/Patient/$export` | +| group kick-off | GET / POST | `/Group/{id}/$export` | +| status / manifest | GET | `/export-status/{job_id}` | +| cancel + delete | DELETE | `/export-status/{job_id}` | +| HFS-served download | GET | `/export-file/{job_id}/{type}-{part}` | + +All kick-offs require `Prefer: respond-async`. The default response is +`202 Accepted` with a `Content-Location` status URL. + +## Single Instance + +The default HFS configuration wires embedded bulk export with SQLite job state, +local filesystem output, and an in-process worker pool. + +```bash +cargo run --bin hfs +curl -i -H 'Prefer: respond-async' \ + http://localhost:8080/Patient/\$export +``` + +## Run + +```bash +docker compose -f docker/bulk-export/docker-compose.yml up --build +``` + +HFS is available at `http://localhost:8080`. + +## Try an Export + +```bash +curl -i -H 'Prefer: respond-async' \ + http://localhost:8080/Patient/\$export +``` + +The response includes a `Content-Location` header for polling the export job. + +## Configuration + +| Variable | Default | Description | +|----------|---------|-------------| +| `HFS_BULK_EXPORT_ENABLED` | `true` | Master switch. When `false`, all `$export` endpoints return `501`. | +| `HFS_BULK_EXPORT_BACKEND` | `embedded` | Job-state backend: `embedded` or `postgres-s3`. | +| `HFS_BULK_EXPORT_OUTPUT_BACKEND` | `local-fs` | Output store: `local-fs` or `s3`. | +| `HFS_BULK_EXPORT_OUTPUT_DIR` | `${HFS_DATA_DIR}/exports` | Local filesystem output root. | +| `HFS_BULK_EXPORT_S3_BUCKET` | none | S3 bucket. Required when `OUTPUT_BACKEND=s3`. | +| `HFS_BULK_EXPORT_S3_ENDPOINT` | AWS | S3-compatible endpoint URL, such as MinIO. | +| `HFS_BULK_EXPORT_S3_FORCE_PATH_STYLE` | `false` | Path-style addressing for S3-compatible providers. | +| `HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN` | `auto` | Manifest posture: `auto`, `true`, or `false`. `false` is invalid with `local-fs`. | +| `HFS_BULK_EXPORT_FILE_URL_TTL` | `3600` | Pre-signed download URL lifetime in seconds. | +| `HFS_BULK_EXPORT_OUTPUT_TTL` | `86400` | Output retention after job completion in seconds. | +| `HFS_BULK_EXPORT_WORKER_CONCURRENCY` | `2` | In-process worker pool size. | +| `HFS_BULK_EXPORT_DISABLE_LOCAL_WORKER` | `false` | Disable in-process workers for separate exporter deployment. | +| `HFS_BULK_EXPORT_MAX_CONCURRENT_PER_TENANT` | `4` | Per-tenant active-job cap. | +| `HFS_BULK_EXPORT_BATCH_SIZE` | `1000` | Resources per export batch. | +| `HFS_BULK_EXPORT_LEASE_DURATION` | `60` | Initial lease length in seconds. Must be greater than the heartbeat interval. | +| `HFS_BULK_EXPORT_HEARTBEAT_INTERVAL` | `20` | Worker heartbeat cadence in seconds. | +| `HFS_BULK_EXPORT_CLEANUP_INTERVAL` | `300` | Cleanup task scan interval in seconds. | +| `HFS_BULK_EXPORT_SINCE_NEWLY_ADDED` | `include` | Group-export `_since` toggle: `include` or `exclude`. | +| `HFS_BULK_EXPORT_DATABASE_URL` | from `HFS_DATABASE_URL` | PostgreSQL URL for the `postgres-s3` job store. | diff --git a/docker/bulk-export/docker-compose.yml b/docker/bulk-export/docker-compose.yml new file mode 100644 index 000000000..e581d09fe --- /dev/null +++ b/docker/bulk-export/docker-compose.yml @@ -0,0 +1,100 @@ +# Multi-instance Bulk Data Export stack: HFS + PostgreSQL + MinIO + Keycloak. +# +# This is a provided local example for manual multi-instance smoke testing and +# trying Bulk Data clients such as Inferno. It is not used by the GitHub Actions +# workflow tests. Bring it up with: +# +# docker compose -f docker/bulk-export/docker-compose.yml up --build +# +# Then point a Bulk Data client (Inferno or curl) at http://localhost:8080. + +services: + postgres: + image: postgres:16 + environment: + POSTGRES_USER: hfs + POSTGRES_PASSWORD: hfs + POSTGRES_DB: hfs + healthcheck: + test: ["CMD-SHELL", "pg_isready -U hfs"] + interval: 5s + timeout: 5s + retries: 10 + ports: + - "5432:5432" + + minio: + image: minio/minio:latest + command: server /data --console-address ":9001" + environment: + MINIO_ROOT_USER: hfs-minio + MINIO_ROOT_PASSWORD: hfs-minio-secret + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 5s + timeout: 5s + retries: 10 + ports: + - "9000:9000" + - "9001:9001" + + # Creates the export bucket on startup. + minio-bootstrap: + image: minio/mc:latest + depends_on: + minio: + condition: service_healthy + entrypoint: > + /bin/sh -c " + mc alias set local http://minio:9000 hfs-minio hfs-minio-secret && + mc mb -p local/hfs-export || true + " + + keycloak: + image: quay.io/keycloak/keycloak:26.1 + command: ["start-dev", "--import-realm"] + environment: + KEYCLOAK_ADMIN: admin + KEYCLOAK_ADMIN_PASSWORD: admin + volumes: + - ../keycloak/realm.json:/opt/keycloak/data/import/realm.json:ro + ports: + - "8180:8080" + healthcheck: + test: ["CMD-SHELL", "exec 3<>/dev/tcp/localhost/8080; echo OK >&3"] + interval: 5s + timeout: 5s + retries: 20 + + hfs: + build: + context: ../.. + args: + BINARY_NAME: hfs + depends_on: + postgres: + condition: service_healthy + minio-bootstrap: + condition: service_completed_successfully + keycloak: + condition: service_started + environment: + HFS_SERVER_HOST: 0.0.0.0 + HFS_SERVER_PORT: 8080 + HFS_BASE_URL: http://localhost:8080 + HFS_STORAGE_BACKEND: postgres + HFS_DATABASE_URL: postgresql://hfs:hfs@postgres/hfs + # Bulk export wired to Postgres job state + S3 (MinIO) output. + HFS_BULK_EXPORT_ENABLED: "true" + HFS_BULK_EXPORT_BACKEND: postgres-s3 + HFS_BULK_EXPORT_DATABASE_URL: postgresql://hfs:hfs@postgres/hfs + HFS_BULK_EXPORT_OUTPUT_BACKEND: s3 + HFS_BULK_EXPORT_S3_BUCKET: hfs-export + HFS_BULK_EXPORT_S3_ENDPOINT: http://minio:9000 + HFS_BULK_EXPORT_S3_FORCE_PATH_STYLE: "true" + HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN: "false" + AWS_ACCESS_KEY_ID: hfs-minio + AWS_SECRET_ACCESS_KEY: hfs-minio-secret + AWS_REGION: us-east-1 + ports: + - "8080:8080"