From c940b5ae3c201a9ac5a05f52d467ca6bae0ceb95 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:44:58 -0400 Subject: [PATCH 01/81] feat(fhir): add version-agnostic get_compartment_params dispatch Free function exposed at crate root that dispatches per FhirVersion to the existing helios_fhir::{r4,r4b,r5,r6}::get_compartment_params helpers. Lets persistence reuse the lookup without depending on helios-rest. --- crates/fhir/src/lib.rs | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/crates/fhir/src/lib.rs b/crates/fhir/src/lib.rs index 834cc4ee7..0071a384b 100644 --- a/crates/fhir/src/lib.rs +++ b/crates/fhir/src/lib.rs @@ -1850,6 +1850,40 @@ impl FhirVersion { } } +/// Returns the compartment search parameters for a given FHIR version. +/// +/// This is a version-agnostic dispatch over the per-version +/// `helios_fhir::{r4,r4b,r5,r6}::get_compartment_params` functions, which are +/// generated from the official FHIR `CompartmentDefinition` resources. +/// +/// # Arguments +/// +/// * `version` - The FHIR version to use for lookup +/// * `compartment_type` - The compartment type (e.g., "Patient", "Encounter") +/// * `resource_type` - The target resource type (e.g., "Observation") +/// +/// # Returns +/// +/// A static slice of search parameter names that link the resource to the +/// compartment. Returns an empty slice if the resource is not a member of the +/// compartment. +pub fn get_compartment_params( + version: FhirVersion, + compartment_type: &str, + resource_type: &str, +) -> &'static [&'static str] { + match version { + #[cfg(feature = "R4")] + FhirVersion::R4 => r4::get_compartment_params(compartment_type, resource_type), + #[cfg(feature = "R4B")] + FhirVersion::R4B => r4b::get_compartment_params(compartment_type, resource_type), + #[cfg(feature = "R5")] + FhirVersion::R5 => r5::get_compartment_params(compartment_type, resource_type), + #[cfg(feature = "R6")] + FhirVersion::R6 => r6::get_compartment_params(compartment_type, resource_type), + } +} + /// Implements `Display` trait for user-friendly output formatting. /// /// This enables `FhirVersion` to be used in string formatting operations From 75ded9a2ae2b43c8f56ab9142ab575f5e1581490 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:44:58 -0400 Subject: [PATCH 02/81] refactor(rest): use helios_fhir::get_compartment_params in compartment handler Drops the private get_compartment_params_for_version wrapper in favor of the new shared dispatch on the helios-fhir crate. --- crates/rest/src/handlers/compartment.rs | 72 +++---------------------- 1 file changed, 8 insertions(+), 64 deletions(-) diff --git a/crates/rest/src/handlers/compartment.rs b/crates/rest/src/handlers/compartment.rs index 44e2808ec..2501bd075 100644 --- a/crates/rest/src/handlers/compartment.rs +++ b/crates/rest/src/handlers/compartment.rs @@ -14,7 +14,6 @@ use axum::{ http::StatusCode, response::{IntoResponse, Response}, }; -use helios_fhir::FhirVersion; use helios_persistence::core::{ResourceStorage, SearchProvider}; use tracing::debug; @@ -22,56 +21,6 @@ use crate::error::{RestError, RestResult}; use crate::extractors::{FhirVersionExtractor, TenantExtractor, build_search_query_from_map}; use crate::state::AppState; -/// Returns compartment search parameters for a specific FHIR version. -/// -/// This function dispatches to the version-specific generated compartment lookup -/// functions in the helios_fhir crate. The compartment definitions are generated -/// from the official FHIR CompartmentDefinition resources. -/// -/// # Arguments -/// -/// * `version` - The FHIR version to use for lookup -/// * `compartment_type` - The compartment type (e.g., "Patient", "Encounter") -/// * `resource_type` - The target resource type (e.g., "Observation") -/// -/// # Returns -/// -/// A static slice of search parameter names that link the resource to the compartment. -/// Returns an empty slice if the resource is not a member of the compartment. -fn get_compartment_params_for_version( - version: FhirVersion, - compartment_type: &str, - resource_type: &str, -) -> Result<&'static [&'static str], String> { - match version { - #[cfg(feature = "R4")] - FhirVersion::R4 => Ok(helios_fhir::r4::get_compartment_params( - compartment_type, - resource_type, - )), - #[cfg(feature = "R4B")] - FhirVersion::R4B => Ok(helios_fhir::r4b::get_compartment_params( - compartment_type, - resource_type, - )), - #[cfg(feature = "R5")] - FhirVersion::R5 => Ok(helios_fhir::r5::get_compartment_params( - compartment_type, - resource_type, - )), - #[cfg(feature = "R6")] - FhirVersion::R6 => Ok(helios_fhir::r6::get_compartment_params( - compartment_type, - resource_type, - )), - #[allow(unreachable_patterns)] - _ => Err(format!( - "FHIR version {:?} is not enabled in this build", - version - )), - } -} - /// Handler for compartment search. /// /// Searches for resources within a specific compartment. @@ -111,8 +60,7 @@ where // Get the reference parameters for this compartment/target combination let fhir_version = version.storage_version(); let ref_params = - get_compartment_params_for_version(fhir_version, &compartment_type, &target_type) - .map_err(|message| RestError::InternalError { message })?; + helios_fhir::get_compartment_params(fhir_version, &compartment_type, &target_type); // Check if the resource type is a member of the compartment if ref_params.is_empty() { @@ -298,13 +246,13 @@ mod urlencoding { #[cfg(test)] mod tests { use super::*; + use helios_fhir::FhirVersion; #[test] fn test_get_compartment_params_patient_observation() { // Test that Patient compartment includes Observation with subject and performer params let params = - get_compartment_params_for_version(FhirVersion::default(), "Patient", "Observation") - .unwrap(); + helios_fhir::get_compartment_params(FhirVersion::default(), "Patient", "Observation"); assert!(!params.is_empty()); assert!(params.contains(&"subject")); } @@ -313,8 +261,7 @@ mod tests { fn test_get_compartment_params_patient_immunization() { // Test that Patient compartment includes Immunization with patient param let params = - get_compartment_params_for_version(FhirVersion::default(), "Patient", "Immunization") - .unwrap(); + helios_fhir::get_compartment_params(FhirVersion::default(), "Patient", "Immunization"); assert!(!params.is_empty()); assert!(params.contains(&"patient")); } @@ -323,8 +270,7 @@ mod tests { fn test_get_compartment_params_encounter_procedure() { // Test that Encounter compartment includes Procedure with encounter param let params = - get_compartment_params_for_version(FhirVersion::default(), "Encounter", "Procedure") - .unwrap(); + helios_fhir::get_compartment_params(FhirVersion::default(), "Encounter", "Procedure"); assert!(!params.is_empty()); assert!(params.contains(&"encounter")); } @@ -333,8 +279,7 @@ mod tests { fn test_get_compartment_params_unknown() { // Test that unknown resource types return an empty slice let params = - get_compartment_params_for_version(FhirVersion::default(), "Patient", "UnknownType") - .unwrap(); + helios_fhir::get_compartment_params(FhirVersion::default(), "Patient", "UnknownType"); assert!(params.is_empty()); } @@ -342,12 +287,11 @@ mod tests { fn test_get_compartment_params_multiple() { // Test that some resources have multiple compartment params // AllergyIntolerance in Patient compartment has: patient, recorder, asserter - let params = get_compartment_params_for_version( + let params = helios_fhir::get_compartment_params( FhirVersion::default(), "Patient", "AllergyIntolerance", - ) - .unwrap(); + ); assert!( params.len() >= 2, "Expected multiple params for AllergyIntolerance" From d236e5c1bcd1099a6c6ffee1588356d915720ed0 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:45:37 -0400 Subject: [PATCH 03/81] feat(persistence): add BulkExportError::LeaseLost variant Returned by fenced ExportWorkerStorage methods when a stale worker's mutation is rejected because the job has been reclaimed. --- crates/persistence/src/error.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/persistence/src/error.rs b/crates/persistence/src/error.rs index 3f96d16b3..fc55dd0be 100644 --- a/crates/persistence/src/error.rs +++ b/crates/persistence/src/error.rs @@ -392,6 +392,10 @@ pub enum BulkExportError { /// Too many concurrent exports. #[error("too many concurrent exports (maximum: {max_concurrent})")] TooManyConcurrentExports { max_concurrent: u32 }, + + /// The worker lease for this job was lost (reclaimed by another worker). + #[error("export job {job_id} lease lost (reclaimed by another worker)")] + LeaseLost { job_id: String }, } /// Errors related to bulk submit operations. From fe80542b723b499b0d45311cf10d378849f1c76d Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:45:37 -0400 Subject: [PATCH 04/81] feat(persistence): extend bulk-export types for the async kick-off flow - ExportRequest gains until / elements / include_associated_data / patient_refs - ExportManifest gains deleted / link (IG-required) - New StartExportInput bundles kickoff metadata (transaction_time, request_url, owner_subject, fhir_version) - New RawExportManifest / RawManifestEntry: storage-side manifest carrying ExportPartKey rather than wire URLs - New ExportJobMetadata, ExportFileMetadata, ExpiredExportRef - New GroupExportProvider::get_group_members_with_periods (default impl derived from get_group_members) so backends can surface Group.member.period.start for the _since-newly-added filter - BulkExportStorage gains start_export(StartExportInput) signature, RawExportManifest return, get_export_job_metadata, get_export_file_metadata, count_active_exports, list_expired_exports --- crates/persistence/src/core/bulk_export.rs | 230 +++++++++++++++++++-- 1 file changed, 216 insertions(+), 14 deletions(-) diff --git a/crates/persistence/src/core/bulk_export.rs b/crates/persistence/src/core/bulk_export.rs index 432f0d396..91e874515 100644 --- a/crates/persistence/src/core/bulk_export.rs +++ b/crates/persistence/src/core/bulk_export.rs @@ -315,14 +315,34 @@ pub struct ExportRequest { #[serde(default)] pub resource_types: Vec, - /// Only include resources modified since this time. + /// Only include resources modified at or after this time (`_since`). #[serde(skip_serializing_if = "Option::is_none")] pub since: Option>, + /// Only include resources modified at or before this time (`_until`). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub until: Option>, + /// Type-specific filters to apply during export. #[serde(default)] pub type_filters: Vec, + /// Element paths to include (`_elements`). When non-empty, exported + /// resources are subset to these paths plus mandatory elements and tagged + /// `SUBSETTED`. + #[serde(default)] + pub elements: Vec, + + /// `includeAssociatedData` hint values. Parsed but currently a no-op + /// (rejected under `Prefer: handling=strict`, ignored otherwise). + #[serde(default)] + pub include_associated_data: Vec, + + /// Patient references restricting the export (POST `patient` parameter). + /// Only valid for patient- and group-level exports. + #[serde(default)] + pub patient_refs: Vec, + /// Batch size for processing (implementation-specific). #[serde(default = "default_batch_size")] pub batch_size: u32, @@ -347,7 +367,11 @@ impl ExportRequest { level, resource_types: Vec::new(), since: None, + until: None, type_filters: Vec::new(), + elements: Vec::new(), + include_associated_data: Vec::new(), + patient_refs: Vec::new(), batch_size: default_batch_size(), output_format: default_output_format(), } @@ -382,6 +406,24 @@ impl ExportRequest { self } + /// Sets the until filter. + pub fn with_until(mut self, until: DateTime) -> Self { + self.until = Some(until); + self + } + + /// Sets the `_elements` element paths. + pub fn with_elements(mut self, elements: Vec) -> Self { + self.elements = elements; + self + } + + /// Sets the patient references (POST `patient` filter). + pub fn with_patient_refs(mut self, patient_refs: Vec) -> Self { + self.patient_refs = patient_refs; + self + } + /// Adds a type filter. pub fn with_type_filter(mut self, filter: TypeFilter) -> Self { self.type_filters.push(filter); @@ -571,6 +613,12 @@ pub struct ExportManifest { /// Output files containing OperationOutcome resources for errors. #[serde(default)] pub error: Vec, + /// Files containing deleted resource references (always empty for now). + #[serde(default)] + pub deleted: Vec, + /// Pagination links for partial manifests (always empty — `allowPartialManifests` unsupported). + #[serde(default)] + pub link: Vec, /// Informational messages. #[serde(default, skip_serializing_if = "Option::is_none")] pub message: Option, @@ -588,6 +636,8 @@ impl ExportManifest { requires_access_token: true, output: Vec::new(), error: Vec::new(), + deleted: Vec::new(), + link: Vec::new(), message: None, extension: None, } @@ -666,6 +716,106 @@ impl NdjsonBatch { } } +/// Kickoff metadata for starting an export job. +/// +/// Bundles the [`ExportRequest`] (what to export) with the server-frozen +/// metadata captured once at kickoff time: `transaction_time`, the original +/// request URL, the owning principal's subject, and the FHIR version. These +/// are the single source of truth — the worker only ever reads them back. +#[derive(Debug, Clone)] +pub struct StartExportInput { + /// What to export. + pub request: ExportRequest, + /// Server wall-clock frozen at kickoff (the manifest `transactionTime`). + pub transaction_time: DateTime, + /// The full kickoff request URL (echoed in the manifest `request` field). + pub request_url: String, + /// The subject of the authenticated principal that kicked off the export. + pub owner_subject: Option, + /// The FHIR version the export runs against. + pub fhir_version: helios_fhir::FhirVersion, +} + +/// A single entry in a [`RawExportManifest`] — carries a storage key, never a URL. +#[derive(Debug, Clone)] +pub struct RawManifestEntry { + /// The resource type contained in this part. + pub resource_type: String, + /// The output-store key for this part (URL minting happens in the REST layer). + pub key: crate::core::bulk_export_output::ExportPartKey, + /// Number of resources in the part. + pub count: u64, +} + +/// The storage-side view of a completed export's manifest. +/// +/// Carries keys rather than URLs — the REST layer mints download URLs via the +/// [`ExportOutputStore`](crate::core::bulk_export_output::ExportOutputStore) +/// and assembles the wire-format [`ExportManifest`]. +#[derive(Debug, Clone)] +pub struct RawExportManifest { + /// Server wall-clock frozen at kickoff. + pub transaction_time: DateTime, + /// The original kickoff request URL. + pub request_url: String, + /// Current job status. + pub status: ExportStatus, + /// Error message if the job failed. + pub error_message: Option, + /// Time the job completed. + pub completed_at: Option>, + /// Output parts (`file_type = "output"`). + pub output: Vec, + /// Error parts (`file_type = "error"`). + pub errors: Vec, +} + +/// Lightweight job metadata for authorization checks. +/// +/// Returned by `get_export_job_metadata` — a single cheap row lookup the REST +/// status/cancel handlers call *before* any heavier status/manifest query. +#[derive(Debug, Clone)] +pub struct ExportJobMetadata { + /// The job ID. + pub job_id: ExportJobId, + /// Current status. + pub status: ExportStatus, + /// The export level. + pub level: ExportLevel, + /// Subject of the principal that owns the job. + pub owner_subject: Option, + /// Server wall-clock frozen at kickoff. + pub transaction_time: DateTime, + /// Time the job completed. + pub completed_at: Option>, + /// The original kickoff request URL. + pub request_url: String, +} + +/// Metadata for a single output/error file, for the download handler. +#[derive(Debug, Clone)] +pub struct ExportFileMetadata { + /// The output-store key for this part. + pub key: crate::core::bulk_export_output::ExportPartKey, + /// The resource type contained in the file. + pub resource_type: String, + /// `"output"` or `"error"`. + pub file_type: String, + /// Number of resources (lines) in the file. + pub line_count: u64, + /// Subject of the principal that owns the job. + pub job_owner_subject: Option, +} + +/// A reference to an expired export job, for the cleanup task. +#[derive(Debug, Clone)] +pub struct ExpiredExportRef { + /// The tenant the job belongs to. + pub tenant: TenantContext, + /// The expired job ID. + pub job_id: ExportJobId, +} + // ============================================================================ // Traits // ============================================================================ @@ -681,7 +831,8 @@ pub trait BulkExportStorage: Send + Sync { /// # Arguments /// /// * `tenant` - The tenant context - /// * `request` - The export request parameters + /// * `input` - The kickoff metadata (request + frozen `transaction_time`, + /// `request_url`, `owner_subject`, `fhir_version`) /// /// # Returns /// @@ -694,7 +845,7 @@ pub trait BulkExportStorage: Send + Sync { async fn start_export( &self, tenant: &TenantContext, - request: ExportRequest, + input: StartExportInput, ) -> StorageResult; /// Gets the current status of an export job. @@ -750,26 +901,20 @@ pub trait BulkExportStorage: Send + Sync { job_id: &ExportJobId, ) -> StorageResult<()>; - /// Gets the manifest for a completed export. + /// Gets the storage-side manifest for a completed export. /// - /// # Arguments - /// - /// * `tenant` - The tenant context - /// * `job_id` - The export job ID - /// - /// # Returns - /// - /// The export manifest with output file information. + /// Returns a [`RawExportManifest`] carrying output-store *keys* — the REST + /// layer mints download URLs and assembles the wire-format + /// [`ExportManifest`]. /// /// # Errors /// /// * `BulkExportError::JobNotFound` - If the job doesn't exist - /// * `BulkExportError::InvalidJobState` - If the job is not complete async fn get_export_manifest( &self, tenant: &TenantContext, job_id: &ExportJobId, - ) -> StorageResult; + ) -> StorageResult; /// Lists export jobs for a tenant. /// @@ -786,6 +931,47 @@ pub trait BulkExportStorage: Send + Sync { tenant: &TenantContext, include_completed: bool, ) -> StorageResult>; + + /// Returns lightweight job metadata for an authorization check. + /// + /// Called by the REST status/cancel handlers *before* any heavier query. + /// + /// # Errors + /// + /// * `BulkExportError::JobNotFound` - If the job doesn't exist + async fn get_export_job_metadata( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult; + + /// Returns file metadata for a single output/error part, for the download + /// handler. `part` is the `{resource_type}-{part_index}` route segment. + /// + /// # Errors + /// + /// * `BulkExportError::JobNotFound` - If the job or part doesn't exist + async fn get_export_file_metadata( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + part: &str, + ) -> StorageResult; + + /// Counts active (`accepted` or `in_progress`) jobs for a tenant — used to + /// enforce the per-tenant concurrency cap at kickoff. + async fn count_active_exports(&self, tenant: &TenantContext) -> StorageResult; + + /// Lists expired completed jobs across *all* tenants, for the cleanup task. + /// + /// This is intentionally cross-tenant — the cleanup task is a server-wide + /// background job, so this is the one method that does not take a tenant. + async fn list_expired_exports( + &self, + now: DateTime, + output_ttl: std::time::Duration, + limit: u32, + ) -> StorageResult>; } /// Data provider for export operations. @@ -946,6 +1132,22 @@ pub trait GroupExportProvider: PatientExportProvider { tenant: &TenantContext, group_id: &str, ) -> StorageResult>; + + /// Returns each member's reference together with its `Group.member.period.start`. + /// + /// The default implementation falls back to [`get_group_members`] and + /// returns `None` for every period start (loses the membership-history + /// signal the `_since`-newly-added filter relies on). Backends that can + /// inspect the raw Group resource override this to return real period + /// starts. + async fn get_group_members_with_periods( + &self, + tenant: &TenantContext, + group_id: &str, + ) -> StorageResult>)>> { + let members = self.get_group_members(tenant, group_id).await?; + Ok(members.into_iter().map(|m| (m, None)).collect()) + } } #[cfg(test)] From 3b7bbb868847322964a13c9f29282c6838eb2310 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:45:37 -0400 Subject: [PATCH 05/81] feat(persistence): add ExportOutputStore trait + supporting types ExportPartKey (with embedded fencing_token), ExportPartWriter (line + byte counter over a boxed AsyncWrite), FinalizedPart, DownloadUrl, and the ExportOutputStore trait. Decouples 'where the bytes go' from the job-state backend. --- .../src/core/bulk_export_output.rs | 188 ++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 crates/persistence/src/core/bulk_export_output.rs diff --git a/crates/persistence/src/core/bulk_export_output.rs b/crates/persistence/src/core/bulk_export_output.rs new file mode 100644 index 000000000..da5f8437a --- /dev/null +++ b/crates/persistence/src/core/bulk_export_output.rs @@ -0,0 +1,188 @@ +//! Output storage for bulk export NDJSON files. +//! +//! The [`ExportOutputStore`] trait decouples *where the exported bytes go* +//! (local filesystem, S3, …) from the job-state backend. The job-state +//! backend stores keys; the output store turns keys into bytes and URLs. + +use std::time::Duration; + +use async_trait::async_trait; +use tokio::io::{AsyncRead, AsyncWrite}; + +use crate::core::bulk_export::ExportJobId; +use crate::error::StorageResult; +use crate::tenant::TenantContext; + +/// A stable identifier for a single output part. +/// +/// The `fencing_token` is embedded so a zombie worker (one that lost its +/// lease) writes to a *different* key than the live worker holding the +/// reclaimed job, preventing output corruption. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ExportPartKey { + /// The tenant the job belongs to. + pub tenant_id: String, + /// The export job this part belongs to. + pub job_id: ExportJobId, + /// The FHIR resource type contained in the part. + pub resource_type: String, + /// `"output"` or `"error"`. + pub file_type: String, + /// The zero-based part index within `(job, file_type, resource_type)`. + pub part_index: u32, + /// The fencing token of the worker that produced the part. + pub fencing_token: u64, +} + +impl ExportPartKey { + /// Creates a new output part key (`file_type = "output"`). + pub fn output( + tenant_id: impl Into, + job_id: ExportJobId, + resource_type: impl Into, + part_index: u32, + fencing_token: u64, + ) -> Self { + Self { + tenant_id: tenant_id.into(), + job_id, + resource_type: resource_type.into(), + file_type: "output".to_string(), + part_index, + fencing_token, + } + } + + /// Creates a new error part key (`file_type = "error"`). + pub fn error( + tenant_id: impl Into, + job_id: ExportJobId, + resource_type: impl Into, + part_index: u32, + fencing_token: u64, + ) -> Self { + Self { + tenant_id: tenant_id.into(), + job_id, + resource_type: resource_type.into(), + file_type: "error".to_string(), + part_index, + fencing_token, + } + } + + /// The `{resource_type}-{part_index}` segment used in download routes. + pub fn part_segment(&self) -> String { + format!("{}-{}", self.resource_type, self.part_index) + } +} + +/// An open writer for a single export output part. +/// +/// Wraps a boxed async writer plus a line counter. Callers push NDJSON lines +/// with [`write_line`](ExportPartWriter::write_line) and then hand the writer +/// to [`ExportOutputStore::finalize_part`]. +pub struct ExportPartWriter { + /// The underlying async byte sink. + pub writer: std::pin::Pin>, + /// Number of lines written so far. + pub line_count: u64, + /// Number of bytes written so far. + pub byte_count: u64, +} + +impl ExportPartWriter { + /// Creates a new part writer over the given async sink. + pub fn new(writer: std::pin::Pin>) -> Self { + Self { + writer, + line_count: 0, + byte_count: 0, + } + } + + /// Writes one NDJSON line (a trailing newline is appended). + pub async fn write_line(&mut self, line: &str) -> std::io::Result<()> { + use tokio::io::AsyncWriteExt; + self.writer.write_all(line.as_bytes()).await?; + self.writer.write_all(b"\n").await?; + self.line_count += 1; + self.byte_count += line.len() as u64 + 1; + Ok(()) + } + + /// Flushes the underlying writer. + pub async fn flush(&mut self) -> std::io::Result<()> { + use tokio::io::AsyncWriteExt; + self.writer.flush().await + } +} + +impl std::fmt::Debug for ExportPartWriter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ExportPartWriter") + .field("line_count", &self.line_count) + .field("byte_count", &self.byte_count) + .finish() + } +} + +/// A finalized, immutable output part as it will appear in the manifest. +#[derive(Debug, Clone)] +pub struct FinalizedPart { + /// The part's stable key. + pub key: ExportPartKey, + /// The resource type contained in the part. + pub resource_type: String, + /// Number of resources (lines) in the part. + pub line_count: u64, + /// Total byte size of the part. + pub size_bytes: u64, +} + +/// A download URL plus the access posture the manifest should advertise. +#[derive(Debug, Clone)] +pub struct DownloadUrl { + /// The URL the client should fetch. + pub url: String, + /// `true` if the URL requires the kickoff Bearer token (HFS-served); + /// `false` if it is pre-signed and the client must NOT send a token. + pub requires_access_token: bool, +} + +/// Pluggable backend for bulk export output files. +/// +/// Implementations decide where NDJSON output physically lives (local FS, S3, +/// …) and how download URLs are minted. The job-state backend is unaware of +/// this — it stores keys; the output store turns keys into bytes and URLs. +#[async_trait] +pub trait ExportOutputStore: Send + Sync { + /// Opens an async writer for a new (or re-finalized) output part. + async fn open_writer(&self, key: &ExportPartKey) -> StorageResult; + + /// Marks a part as finalized and immutable. + /// + /// For object stores this completes the multipart upload; for the local + /// filesystem this fsyncs and renames `.tmp` → final. + async fn finalize_part( + &self, + key: &ExportPartKey, + writer: ExportPartWriter, + ) -> StorageResult; + + /// Produces a download URL for a finalized part. + async fn download_url(&self, key: &ExportPartKey, ttl: Duration) -> StorageResult; + + /// Opens an async reader over a finalized part (HFS-served download path). + async fn open_reader( + &self, + key: &ExportPartKey, + ) -> StorageResult>>; + + /// Deletes all output parts for a job. Idempotent — a missing job is `Ok`. + async fn delete_job_outputs( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult<()>; +} From 2ab8ea61368011a9f0b19b6a2950f24a84c8bc4c Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:45:37 -0400 Subject: [PATCH 06/81] feat(persistence): add ExportWorkerStorage / ExportClaimStrategy / worker - WorkerId, ExportJobLease (with fencing_token), LeaseError - ExportClaimStrategy: claim_next + heartbeat + release - ExportWorkerStorage: every method fenced by (worker_id, fencing_token) so a stale worker cannot mutate progress, file rows, or terminal status after its lease has been reclaimed - BulkExportJobStore marker trait (BulkExportStorage + ExportWorkerStorage + ExportClaimStrategy) for bootstrap-time selection of the job store - DefaultExportWorker drives a claimed job to completion under its lease, applying _typeFilter / _since / _until / _elements, supporting resume from the persisted cursor, and honoring since_newly_added=exclude via Group.member.period.start --- .../src/core/bulk_export_worker.rs | 641 ++++++++++++++++++ 1 file changed, 641 insertions(+) create mode 100644 crates/persistence/src/core/bulk_export_worker.rs diff --git a/crates/persistence/src/core/bulk_export_worker.rs b/crates/persistence/src/core/bulk_export_worker.rs new file mode 100644 index 000000000..8edd2e4e3 --- /dev/null +++ b/crates/persistence/src/core/bulk_export_worker.rs @@ -0,0 +1,641 @@ +//! Worker-facing traits for bulk export job execution. +//! +//! These traits are *not* part of the REST-facing [`BulkExportStorage`] surface +//! — they are what the export worker uses to claim jobs and persist progress +//! under a heartbeated, fencing-token-guarded lease. +//! +//! [`BulkExportStorage`]: crate::core::bulk_export::BulkExportStorage + +use std::sync::Arc; +use std::time::Duration; + +use async_trait::async_trait; +use chrono::{DateTime, Utc}; + +use crate::core::bulk_export::{ + BulkExportStorage, ExportDataProvider, ExportJobId, ExportLevel, ExportRequest, ExportStatus, + GroupExportProvider, PatientExportProvider, TypeExportProgress, +}; +use crate::core::bulk_export_output::{ExportOutputStore, ExportPartKey, FinalizedPart}; +use crate::error::{StorageError, StorageResult}; +use crate::tenant::TenantContext; + +/// Identifier for an export worker instance. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct WorkerId(String); + +impl WorkerId { + /// Creates a worker ID from a string. + pub fn new(id: impl Into) -> Self { + Self(id.into()) + } + + /// Generates a fresh random worker ID. + pub fn random() -> Self { + Self(uuid::Uuid::new_v4().to_string()) + } + + /// Returns the ID as a string slice. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl std::fmt::Display for WorkerId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +/// A lease over a single export job, held by exactly one worker at a time. +/// +/// Leases expire; if the holding worker does not heartbeat before +/// `lease_expiry`, the lease is reclaimable. The `fencing_token` is bumped on +/// every claim so a zombie worker cannot mutate a job another worker now owns. +#[derive(Debug, Clone)] +pub struct ExportJobLease { + /// The leased job. + pub job_id: ExportJobId, + /// The tenant the job belongs to. + pub tenant: TenantContext, + /// The worker holding the lease. + pub worker_id: WorkerId, + /// When the lease expires if not renewed. + pub lease_expiry: DateTime, + /// Monotonically increasing token, bumped on every claim. + pub fencing_token: u64, +} + +/// Error returned by fenced worker-storage operations. +#[derive(Debug)] +pub enum LeaseError { + /// The lease was lost — another worker reclaimed the job. The caller MUST + /// stop writing immediately. + LeaseLost { + /// The job whose lease was lost. + job_id: ExportJobId, + }, + /// An underlying storage error. + Storage(StorageError), +} + +impl std::fmt::Display for LeaseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::LeaseLost { job_id } => { + write!( + f, + "export job {job_id} lease lost (reclaimed by another worker)" + ) + } + Self::Storage(e) => write!(f, "storage error: {e}"), + } + } +} + +impl std::error::Error for LeaseError {} + +impl From for LeaseError { + fn from(e: StorageError) -> Self { + Self::Storage(e) + } +} + +/// The worker's view of a claimed job: everything needed to (re)run it. +#[derive(Debug, Clone)] +pub struct WorkerJobView { + /// The original export request. + pub request: ExportRequest, + /// The export level. + pub level: ExportLevel, + /// Server wall-clock frozen at kickoff. + pub transaction_time: DateTime, + /// The FHIR version the export runs against. + pub fhir_version: helios_fhir::FhirVersion, + /// Already-persisted per-type progress, for resuming after a crash. + pub type_progress: Vec, +} + +/// Strategy for atomically claiming the next available export job. +/// +/// Each backend reaches for its native primitive — `SELECT … FOR UPDATE SKIP +/// LOCKED` on Postgres, a process-local mutex on SQLite. +#[async_trait] +pub trait ExportClaimStrategy: Send + Sync { + /// Atomically transitions one eligible job (`accepted`, or `in_progress` + /// with an expired lease) to held-by-this-worker, bumping the fencing + /// token. Returns `Ok(None)` when no job is available. + async fn claim_next( + &self, + worker_id: &WorkerId, + lease_duration: Duration, + ) -> StorageResult>; + + /// Renews a lease the worker still holds; returns the new expiry, or + /// `LeaseError::LeaseLost` if the job was reclaimed. + async fn heartbeat(&self, lease: &ExportJobLease) -> Result, LeaseError>; + + /// Releases a lease early (graceful shutdown). Best-effort. + async fn release(&self, lease: ExportJobLease) -> StorageResult<()>; +} + +/// Worker-owned mutations of job state. +/// +/// **Every method is fenced** by `worker_id` + `fencing_token`: a guarded +/// mutation affecting zero rows returns `LeaseError::LeaseLost`, so a zombie +/// worker cannot corrupt progress, file rows, or terminal status after its +/// job has been reclaimed. +#[async_trait] +pub trait ExportWorkerStorage: Send + Sync { + /// Loads the claimed job's request, level, frozen metadata and persisted + /// per-type progress (for resume). Fenced. + async fn get_export_job_for_worker( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result; + + /// Marks the job `in_progress` (sets `started_at` if unset). Fenced. + async fn mark_export_in_progress( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result<(), LeaseError>; + + /// Idempotent upsert of per-type progress (cursor + counts). Fenced. + async fn update_export_type_progress( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + progress: &TypeExportProgress, + ) -> Result<(), LeaseError>; + + /// Idempotent upsert of a finalized output/error file row. Fenced. + async fn record_export_file( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + part: &FinalizedPart, + file_type: &str, + ) -> Result<(), LeaseError>; + + /// Marks the job `complete` (sets `completed_at`). Fenced. + async fn finish_export_job( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result<(), LeaseError>; + + /// Marks the job `error` with a message (sets `completed_at`). Fenced. + async fn fail_export_job( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + error_message: &str, + ) -> Result<(), LeaseError>; +} + +/// Marker trait composing the three job-state surfaces a worker needs. +/// +/// Only the SQLite and Postgres backends implement this; it is held as an +/// `Arc` and selected at bootstrap by +/// `HFS_BULK_EXPORT_BACKEND`. +pub trait BulkExportJobStore: + BulkExportStorage + ExportWorkerStorage + ExportClaimStrategy +{ +} + +impl BulkExportJobStore for T where + T: BulkExportStorage + ExportWorkerStorage + ExportClaimStrategy +{ +} + +/// Marker trait for a resource-store that can feed every export level. +pub trait ExportResourceProvider: + ExportDataProvider + PatientExportProvider + GroupExportProvider +{ +} + +impl ExportResourceProvider for T where + T: ExportDataProvider + PatientExportProvider + GroupExportProvider +{ +} + +/// The default in-process export worker. +/// +/// Binds a [`BulkExportJobStore`] (job state + claim + worker storage), an +/// [`ExportResourceProvider`] (the resource store), and an +/// [`ExportOutputStore`] (where NDJSON bytes go), and drives a claimed job to +/// completion under its lease. +pub struct DefaultExportWorker { + /// Job-state store (claim, worker storage, lifecycle). + pub jobs: Arc, + /// Resource data provider. + pub data: Arc, + /// Output store for NDJSON parts. + pub output: Arc, + /// This worker's identifier. + pub worker_id: WorkerId, + /// Group-export `_since` toggle: when `true`, exclude resources from + /// before `_since` for patients added to the Group after `_since` + /// (using `Group.member.period.start`). + pub exclude_since_newly_added: bool, +} + +impl DefaultExportWorker +where + Js: BulkExportJobStore + ?Sized, + Dp: ExportResourceProvider + ?Sized, + Os: ExportOutputStore + ?Sized, +{ + /// Creates a new worker (defaults to `since_newly_added=include`). + pub fn new(jobs: Arc, data: Arc, output: Arc, worker_id: WorkerId) -> Self { + Self { + jobs, + data, + output, + worker_id, + exclude_since_newly_added: false, + } + } + + /// Sets the `since_newly_added=exclude` toggle for Group exports. + pub fn with_exclude_since_newly_added(mut self, exclude: bool) -> Self { + self.exclude_since_newly_added = exclude; + self + } + + /// Runs the export job described by `lease` to completion. + /// + /// Every job-state mutation is fenced by `lease.worker_id` + + /// `lease.fencing_token`; any `LeaseError::LeaseLost` aborts the run + /// silently (the worker that reclaimed the job now owns it). + pub async fn run_job(&self, lease: ExportJobLease) -> StorageResult<()> { + match self.run_job_inner(&lease).await { + Ok(()) => Ok(()), + Err(LeaseError::LeaseLost { .. }) => { + // Another worker owns the job now — stop silently. + Ok(()) + } + Err(LeaseError::Storage(e)) => { + // Best-effort: mark the job failed (also fenced). + let _ = self + .jobs + .fail_export_job( + &lease.tenant, + &lease.job_id, + &lease.worker_id, + lease.fencing_token, + &e.to_string(), + ) + .await; + Err(e) + } + } + } + + async fn run_job_inner(&self, lease: &ExportJobLease) -> Result<(), LeaseError> { + let tenant = &lease.tenant; + let job_id = &lease.job_id; + let wid = &lease.worker_id; + let token = lease.fencing_token; + + let view = self + .jobs + .get_export_job_for_worker(tenant, job_id, wid, token) + .await?; + self.jobs + .mark_export_in_progress(tenant, job_id, wid, token) + .await?; + + let request = &view.request; + + // Resolve the resource types to export. + let types = self + .data + .list_export_types(tenant, request) + .await + .map_err(LeaseError::Storage)?; + + // For Group exports, resolve the member patient IDs once. + // When `exclude_since_newly_added` is set AND `_since` is provided, + // filter out patients whose `Group.member.period.start` is *after* + // `_since` (i.e., they joined the cohort after the client's last + // sync) — the IG-recommended behavior under the `exclude` toggle. + let group_patient_ids: Option> = match &view.level { + ExportLevel::Group { group_id } => { + let ids = if self.exclude_since_newly_added && view.request.since.is_some() { + let since = view.request.since.unwrap(); + let members = self + .data + .get_group_members_with_periods(tenant, group_id) + .await + .map_err(LeaseError::Storage)?; + members + .into_iter() + .filter_map(|(reference, period_start)| { + let pid = reference.strip_prefix("Patient/")?; + // Keep members whose period.start is unknown OR + // <= since (i.e., were already members at since). + match period_start { + Some(start) if start > since => None, + _ => Some(pid.to_string()), + } + }) + .collect() + } else { + self.data + .resolve_group_patient_ids(tenant, group_id) + .await + .map_err(LeaseError::Storage)? + }; + Some(ids) + } + _ => None, + }; + + let batch_size = request.batch_size.max(1); + + for resource_type in &types { + // Resume from any persisted cursor for this type. + let mut cursor: Option = view + .type_progress + .iter() + .find(|p| &p.resource_type == resource_type) + .and_then(|p| p.cursor_state.clone()); + let mut exported: u64 = view + .type_progress + .iter() + .find(|p| &p.resource_type == resource_type) + .map(|p| p.exported_count) + .unwrap_or(0); + let mut part_index: u32 = 0; + + loop { + // Cooperative cancellation check. + if let Ok(progress) = self.jobs.get_export_status(tenant, job_id).await { + if progress.status == ExportStatus::Cancelled { + return Ok(()); + } + } + + let batch = match &group_patient_ids { + Some(pids) => self + .data + .fetch_patient_compartment_batch( + tenant, + request, + resource_type, + pids, + cursor.as_deref(), + batch_size, + ) + .await + .map_err(LeaseError::Storage)?, + None if matches!(view.level, ExportLevel::Patient) => { + // Patient-level: export the whole patient compartment. + // For simplicity, treat it like a system-level fetch of + // the type (patient-scoped filtering is applied by the + // provider via the request). + self.data + .fetch_export_batch( + tenant, + request, + resource_type, + cursor.as_deref(), + batch_size, + ) + .await + .map_err(LeaseError::Storage)? + } + None => self + .data + .fetch_export_batch( + tenant, + request, + resource_type, + cursor.as_deref(), + batch_size, + ) + .await + .map_err(LeaseError::Storage)?, + }; + + if !batch.lines.is_empty() { + let key = ExportPartKey::output( + tenant.tenant_id().as_str(), + job_id.clone(), + resource_type.clone(), + part_index, + token, + ); + let mut writer = self + .output + .open_writer(&key) + .await + .map_err(LeaseError::Storage)?; + for line in &batch.lines { + let out_line = apply_elements(line, &request.elements); + writer.write_line(&out_line).await.map_err(|e| { + LeaseError::Storage(StorageError::Backend( + crate::error::BackendError::Internal { + backend_name: "export-worker".to_string(), + message: format!("write_line: {e}"), + source: None, + }, + )) + })?; + } + let finalized = self + .output + .finalize_part(&key, writer) + .await + .map_err(LeaseError::Storage)?; + exported += finalized.line_count; + self.jobs + .record_export_file(tenant, job_id, wid, token, &finalized, "output") + .await?; + part_index += 1; + } + + cursor = batch.next_cursor.clone(); + + // Persist progress + heartbeat after each batch. + let mut progress = TypeExportProgress::new(resource_type.clone()); + progress.exported_count = exported; + progress.cursor_state = cursor.clone(); + self.jobs + .update_export_type_progress(tenant, job_id, wid, token, &progress) + .await?; + self.jobs.heartbeat(lease).await?; + + if batch.is_last { + break; + } + } + } + + self.jobs + .finish_export_job(tenant, job_id, wid, token) + .await?; + Ok(()) + } +} + +/// Applies `_elements` projection to an NDJSON line. +/// +/// When `elements` is non-empty, keeps `resourceType`, `id`, `meta` and the +/// listed top-level element names, and adds a `SUBSETTED` `meta.tag`. On any +/// parse failure the original line is returned unchanged. +fn apply_elements(line: &str, elements: &[String]) -> String { + if elements.is_empty() { + return line.to_string(); + } + let Ok(serde_json::Value::Object(obj)) = serde_json::from_str::(line) else { + return line.to_string(); + }; + let mut out = serde_json::Map::new(); + // Always-included mandatory elements. + for key in ["resourceType", "id"] { + if let Some(v) = obj.get(key) { + out.insert(key.to_string(), v.clone()); + } + } + // Requested top-level elements (strip a leading `ResourceType.` prefix). + for el in elements { + let name = el.rsplit('.').next().unwrap_or(el.as_str()); + if let Some(v) = obj.get(name) { + out.insert(name.to_string(), v.clone()); + } + } + // meta + SUBSETTED tag. + let mut meta = obj + .get("meta") + .and_then(|m| m.as_object().cloned()) + .unwrap_or_default(); + let tag = serde_json::json!({ + "system": "http://terminology.hl7.org/CodeSystem/v3-ObservationValue", + "code": "SUBSETTED", + }); + let tags = meta + .entry("tag".to_string()) + .or_insert_with(|| serde_json::Value::Array(Vec::new())); + if let serde_json::Value::Array(arr) = tags { + arr.push(tag); + } + out.insert("meta".to_string(), serde_json::Value::Object(meta)); + serde_json::to_string(&serde_json::Value::Object(out)).unwrap_or_else(|_| line.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_apply_elements_noop_when_empty() { + let line = r#"{"resourceType":"Patient","id":"1","name":[]}"#; + assert_eq!(apply_elements(line, &[]), line); + } + + #[test] + fn test_apply_elements_subsets_and_tags() { + let line = r#"{"resourceType":"Patient","id":"1","name":[{"family":"X"}],"gender":"male"}"#; + let out = apply_elements(line, &["name".to_string()]); + let v: serde_json::Value = serde_json::from_str(&out).unwrap(); + assert_eq!(v["resourceType"], "Patient"); + assert_eq!(v["id"], "1"); + assert!(v.get("name").is_some()); + assert!(v.get("gender").is_none()); + assert_eq!(v["meta"]["tag"][0]["code"], "SUBSETTED"); + } + + #[cfg(feature = "sqlite")] + mod worker_integration { + use super::*; + use crate::backends::local_fs::LocalFsOutputStore; + use crate::backends::sqlite::SqliteBackend; + use crate::core::ResourceStorage; + use crate::core::bulk_export::{ExportRequest, StartExportInput}; + use crate::tenant::{TenantContext, TenantId, TenantPermissions}; + use chrono::Utc; + use std::sync::Arc; + + fn tenant() -> TenantContext { + TenantContext::new(TenantId::new("t1"), TenantPermissions::full_access()) + } + + #[tokio::test] + async fn test_run_job_system_export_end_to_end() { + let backend = Arc::new(SqliteBackend::in_memory().unwrap()); + backend.init_schema().unwrap(); + let tenant = tenant(); + + for i in 0..3 { + backend + .create( + &tenant, + "Patient", + serde_json::json!({"resourceType": "Patient", "id": format!("p{i}")}), + helios_fhir::FhirVersion::default(), + ) + .await + .unwrap(); + } + + let tmp = tempfile::tempdir().unwrap(); + let output = Arc::new(LocalFsOutputStore::new(tmp.path(), "http://localhost:8080")); + + let job_id = backend + .start_export( + &tenant, + StartExportInput { + request: ExportRequest::system() + .with_types(vec!["Patient".to_string()]) + .with_batch_size(2), + transaction_time: Utc::now(), + request_url: "http://localhost/$export".to_string(), + owner_subject: Some("sub".to_string()), + fhir_version: helios_fhir::FhirVersion::default(), + }, + ) + .await + .unwrap(); + + let worker_id = WorkerId::new("w1"); + let worker = DefaultExportWorker::new( + Arc::clone(&backend), + Arc::clone(&backend), + Arc::clone(&output), + worker_id.clone(), + ); + + let lease = backend + .claim_next(&worker_id, Duration::from_secs(60)) + .await + .unwrap() + .expect("job claimable"); + assert_eq!(lease.job_id, job_id); + + worker.run_job(lease).await.unwrap(); + + let progress = backend.get_export_status(&tenant, &job_id).await.unwrap(); + assert_eq!(progress.status, ExportStatus::Complete); + + let manifest = backend.get_export_manifest(&tenant, &job_id).await.unwrap(); + let total: u64 = manifest.output.iter().map(|e| e.count).sum(); + assert_eq!(total, 3); + } + } +} From b881b46ec8deb979f58052babf195a075f99dbba Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:45:38 -0400 Subject: [PATCH 07/81] feat(persistence): re-export new bulk-export traits and types from core --- crates/persistence/src/core/mod.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/crates/persistence/src/core/mod.rs b/crates/persistence/src/core/mod.rs index 264281a74..227146407 100644 --- a/crates/persistence/src/core/mod.rs +++ b/crates/persistence/src/core/mod.rs @@ -91,6 +91,8 @@ pub mod backend; pub mod bulk_export; +pub mod bulk_export_output; +pub mod bulk_export_worker; pub mod bulk_submit; pub mod capabilities; pub mod history; @@ -102,9 +104,17 @@ pub mod versioned; // Re-export main types pub use backend::{Backend, BackendCapability, BackendConfig, BackendKind, BackendPoolStats}; pub use bulk_export::{ - BulkExportStorage, ExportDataProvider, ExportJobId, ExportLevel, ExportManifest, - ExportOutputFile, ExportProgress, ExportRequest, ExportStatus, GroupExportProvider, - NdjsonBatch, PatientExportProvider, TypeExportProgress, TypeFilter, + BulkExportStorage, ExpiredExportRef, ExportDataProvider, ExportFileMetadata, ExportJobId, + ExportJobMetadata, ExportLevel, ExportManifest, ExportOutputFile, ExportProgress, + ExportRequest, ExportStatus, GroupExportProvider, NdjsonBatch, PatientExportProvider, + RawExportManifest, RawManifestEntry, StartExportInput, TypeExportProgress, TypeFilter, +}; +pub use bulk_export_output::{ + DownloadUrl, ExportOutputStore, ExportPartKey, ExportPartWriter, FinalizedPart, +}; +pub use bulk_export_worker::{ + BulkExportJobStore, DefaultExportWorker, ExportClaimStrategy, ExportJobLease, + ExportResourceProvider, ExportWorkerStorage, LeaseError, WorkerId, WorkerJobView, }; pub use bulk_submit::{ BulkEntryOutcome, BulkEntryResult, BulkProcessingOptions, BulkSubmitProvider, From 795043180abcbce6d1fcc0cd43ae1082af85d9d9 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:13 -0400 Subject: [PATCH 08/81] =?UTF-8?q?feat(sqlite):=20v7=E2=86=92v8=20migration?= =?UTF-8?q?=20adds=20bulk-export=20lease=20+=20part=5Findex=20columns?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bulk_export_jobs: worker_id, lease_expiry, fencing_token, heartbeat_at, owner_subject, request_url, fhir_version + idx_export_jobs_claim. bulk_export_files: part_index, fencing_token + a backfill that assigns 0-based sequential part_index per (job_id, file_type, resource_type) before creating the unique idx_export_files_part. Includes test exercising the duplicate-row backfill case. --- .../persistence/src/backends/sqlite/schema.rs | 203 +++++++++++++++++- 1 file changed, 202 insertions(+), 1 deletion(-) diff --git a/crates/persistence/src/backends/sqlite/schema.rs b/crates/persistence/src/backends/sqlite/schema.rs index 04a47a03f..97ba00992 100644 --- a/crates/persistence/src/backends/sqlite/schema.rs +++ b/crates/persistence/src/backends/sqlite/schema.rs @@ -5,7 +5,7 @@ use rusqlite::Connection; use crate::error::StorageResult; /// Current schema version. -pub const SCHEMA_VERSION: i32 = 7; +pub const SCHEMA_VERSION: i32 = 8; /// Initialize the database schema. pub fn initialize_schema(conn: &Connection) -> StorageResult<()> { @@ -263,6 +263,7 @@ fn migrate_schema(conn: &Connection, from_version: i32) -> StorageResult<()> { 4 => migrate_v4_to_v5(conn)?, 5 => migrate_v5_to_v6(conn)?, 6 => migrate_v6_to_v7(conn)?, + 7 => migrate_v7_to_v8(conn)?, _ => { return Err(crate::error::StorageError::Backend( crate::error::BackendError::Internal { @@ -829,6 +830,128 @@ fn migrate_v6_to_v7(conn: &Connection) -> StorageResult<()> { Ok(()) } +/// Migrate from schema version 7 to version 8. +/// +/// Adds bulk-export worker/lease support: +/// - lease columns + `owner_subject`/`request_url`/`fhir_version` on `bulk_export_jobs` +/// - `part_index`/`fencing_token` on `bulk_export_files`, with a backfill of +/// `part_index` and a unique index for idempotent upserts +fn migrate_v7_to_v8(conn: &Connection) -> StorageResult<()> { + // Columns that may already exist if the table was created fresh — guard + // with PRAGMA table_info since SQLite has no `ADD COLUMN IF NOT EXISTS`. + let job_columns: Vec = { + let mut stmt = conn + .prepare("PRAGMA table_info(bulk_export_jobs)") + .map_err(|e| migration_err(format!("pragma bulk_export_jobs: {e}")))?; + let cols: Vec = stmt + .query_map([], |row| row.get::<_, String>(1)) + .map_err(|e| migration_err(format!("pragma rows: {e}")))? + .filter_map(|r| r.ok()) + .collect(); + cols + }; + let job_adds = [ + ( + "worker_id", + "ALTER TABLE bulk_export_jobs ADD COLUMN worker_id TEXT", + ), + ( + "lease_expiry", + "ALTER TABLE bulk_export_jobs ADD COLUMN lease_expiry TEXT", + ), + ( + "fencing_token", + "ALTER TABLE bulk_export_jobs ADD COLUMN fencing_token INTEGER NOT NULL DEFAULT 0", + ), + ( + "heartbeat_at", + "ALTER TABLE bulk_export_jobs ADD COLUMN heartbeat_at TEXT", + ), + ( + "owner_subject", + "ALTER TABLE bulk_export_jobs ADD COLUMN owner_subject TEXT", + ), + ( + "request_url", + "ALTER TABLE bulk_export_jobs ADD COLUMN request_url TEXT NOT NULL DEFAULT ''", + ), + ( + "fhir_version", + "ALTER TABLE bulk_export_jobs ADD COLUMN fhir_version TEXT NOT NULL DEFAULT '4.0'", + ), + ]; + for (col, sql) in &job_adds { + if !job_columns.iter().any(|c| c == col) { + conn.execute(sql, []) + .map_err(|e| migration_err(format!("add {col}: {e}")))?; + } + } + + let file_columns: Vec = { + let mut stmt = conn + .prepare("PRAGMA table_info(bulk_export_files)") + .map_err(|e| migration_err(format!("pragma bulk_export_files: {e}")))?; + let cols: Vec = stmt + .query_map([], |row| row.get::<_, String>(1)) + .map_err(|e| migration_err(format!("pragma rows: {e}")))? + .filter_map(|r| r.ok()) + .collect(); + cols + }; + let file_adds = [ + ( + "part_index", + "ALTER TABLE bulk_export_files ADD COLUMN part_index INTEGER NOT NULL DEFAULT 0", + ), + ( + "fencing_token", + "ALTER TABLE bulk_export_files ADD COLUMN fencing_token INTEGER NOT NULL DEFAULT 0", + ), + ]; + for (col, sql) in &file_adds { + if !file_columns.iter().any(|c| c == col) { + conn.execute(sql, []) + .map_err(|e| migration_err(format!("add {col}: {e}")))?; + } + } + + // Backfill part_index: 0-based sequential per (job_id, file_type, resource_type) + // ordered by id, so the unique index below builds without collisions on + // pre-existing rows. + conn.execute( + "UPDATE bulk_export_files SET part_index = ( + SELECT COUNT(*) FROM bulk_export_files f2 + WHERE f2.job_id = bulk_export_files.job_id + AND f2.file_type = bulk_export_files.file_type + AND f2.resource_type = bulk_export_files.resource_type + AND f2.id < bulk_export_files.id + )", + [], + ) + .map_err(|e| migration_err(format!("backfill part_index: {e}")))?; + + let indexes = [ + "CREATE INDEX IF NOT EXISTS idx_export_jobs_claim + ON bulk_export_jobs(tenant_id, status, lease_expiry)", + "CREATE UNIQUE INDEX IF NOT EXISTS idx_export_files_part + ON bulk_export_files(job_id, file_type, resource_type, part_index)", + ]; + for index_sql in &indexes { + conn.execute(index_sql, []) + .map_err(|e| migration_err(format!("create index: {e}")))?; + } + + Ok(()) +} + +fn migration_err(message: String) -> crate::error::StorageError { + crate::error::StorageError::Backend(crate::error::BackendError::Internal { + backend_name: "sqlite".to_string(), + message, + source: None, + }) +} + /// Drop all tables (for testing). #[cfg(test)] #[allow(dead_code)] @@ -989,4 +1112,82 @@ mod tests { .unwrap(); assert_eq!(table_count, 7); // 3 export + 4 submit tables } + + #[test] + fn test_migration_v7_to_v8_backfills_duplicate_file_rows() { + // Build a v6/v7-era schema (bulk tables without the v8 lease/part columns). + let conn = Connection::open_in_memory().unwrap(); + create_schema_v1(&conn).unwrap(); + let _ = get_schema_version(&conn).unwrap(); + migrate_v1_to_v2(&conn).unwrap(); + migrate_v2_to_v3(&conn).unwrap(); + migrate_v3_to_v4(&conn).unwrap(); + migrate_v4_to_v5(&conn).unwrap(); + migrate_v5_to_v6(&conn).unwrap(); + migrate_v6_to_v7(&conn).unwrap(); + set_schema_version(&conn, 7).unwrap(); + + // Seed a job and THREE output files for the same (job, file_type, + // resource_type) — all default part_index would collide. + conn.execute( + "INSERT INTO bulk_export_jobs + (id, tenant_id, status, level, request_json, transaction_time, created_at) + VALUES ('j1', 't1', 'complete', 'system', '{}', '2026-01-01T00:00:00Z', + '2026-01-01T00:00:00Z')", + [], + ) + .unwrap(); + for i in 0..3 { + conn.execute( + "INSERT INTO bulk_export_files + (job_id, resource_type, file_type, file_path, resource_count, byte_count) + VALUES ('j1', 'Patient', 'output', ?1, 10, 100)", + rusqlite::params![format!("/exports/j1/Patient-{i}.ndjson")], + ) + .unwrap(); + } + + // Run the v7 -> v8 migration. + migrate_v7_to_v8(&conn).unwrap(); + + // The backfill must have produced distinct 0-based part_index values + // per group, so the unique index built without a collision. + let mut stmt = conn + .prepare( + "SELECT part_index FROM bulk_export_files + WHERE job_id = 'j1' ORDER BY part_index", + ) + .unwrap(); + let part_indexes: Vec = stmt + .query_map([], |row| row.get(0)) + .unwrap() + .filter_map(|r| r.ok()) + .collect(); + assert_eq!(part_indexes, vec![0, 1, 2]); + + // The unique index exists. + let idx_count: i32 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master + WHERE type='index' AND name='idx_export_files_part'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(idx_count, 1); + + // Re-running the migration is a no-op (idempotent). + migrate_v7_to_v8(&conn).unwrap(); + + // New lease columns are present on bulk_export_jobs. + let has_worker_id: i32 = conn + .query_row( + "SELECT COUNT(*) FROM pragma_table_info('bulk_export_jobs') + WHERE name='worker_id'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(has_worker_id, 1); + } } From 6d9c7ddbc546f0cd43f1d9327d0e9238473e092b Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:13 -0400 Subject: [PATCH 09/81] feat(sqlite): implement BulkExportJobStore (claim + worker + lifecycle) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - start_export(StartExportInput): persists frozen kickoff metadata - get_export_manifest -> RawExportManifest assembled from rows - get_export_job_metadata / get_export_file_metadata - count_active_exports / list_expired_exports - ExportClaimStrategy via process-local mutex + INSERT/UPDATE - ExportWorkerStorage: every mutation fenced by worker_id + fencing_token (UPDATE … WHERE worker_id=? AND fencing_token=? for terminals, WHERE EXISTS-guarded ON CONFLICT upserts for progress + file rows) - get_group_members_with_periods reads Group.member.period.start - resolve_group_patient_ids flattens nested Groups with a cycle guard - Tests: stale-worker fencing, claim/lifecycle, group-cycle, since_newly_added --- .../src/backends/sqlite/bulk_export.rs | 1169 +++++++++++++++-- 1 file changed, 1064 insertions(+), 105 deletions(-) diff --git a/crates/persistence/src/backends/sqlite/bulk_export.rs b/crates/persistence/src/backends/sqlite/bulk_export.rs index 2c6a0757d..93f1591d3 100644 --- a/crates/persistence/src/backends/sqlite/bulk_export.rs +++ b/crates/persistence/src/backends/sqlite/bulk_export.rs @@ -1,20 +1,58 @@ //! Bulk export implementation for SQLite backend. use async_trait::async_trait; -use chrono::Utc; +use chrono::{DateTime, Utc}; use rusqlite::params; use serde_json::Value; +use std::time::Duration as StdDuration; +use tokio::sync::Mutex; use crate::core::bulk_export::{ - BulkExportStorage, ExportDataProvider, ExportJobId, ExportLevel, ExportManifest, - ExportOutputFile, ExportProgress, ExportRequest, ExportStatus, GroupExportProvider, - NdjsonBatch, PatientExportProvider, TypeExportProgress, + BulkExportStorage, ExpiredExportRef, ExportDataProvider, ExportFileMetadata, ExportJobId, + ExportJobMetadata, ExportLevel, ExportProgress, ExportRequest, ExportStatus, + GroupExportProvider, NdjsonBatch, PatientExportProvider, RawExportManifest, RawManifestEntry, + StartExportInput, TypeExportProgress, +}; +use crate::core::bulk_export_output::{ExportPartKey, FinalizedPart}; +use crate::core::bulk_export_worker::{ + ExportClaimStrategy, ExportJobLease, ExportWorkerStorage, LeaseError, WorkerId, WorkerJobView, }; use crate::error::{BackendError, BulkExportError, StorageError, StorageResult}; -use crate::tenant::TenantContext; +use crate::tenant::{TenantContext, TenantId, TenantPermissions}; use super::SqliteBackend; +/// Process-local lock serializing `claim_next` for the single-instance +/// SQLite job store (SQLite has no `SELECT … FOR UPDATE SKIP LOCKED`). +static CLAIM_LOCK: Mutex<()> = Mutex::const_new(()); + +/// Parses an RFC3339 timestamp column into a UTC `DateTime`. +fn parse_dt(s: &str) -> StorageResult> { + DateTime::parse_from_rfc3339(s) + .map(|dt| dt.with_timezone(&Utc)) + .map_err(|e| internal_error(format!("invalid timestamp '{s}': {e}"))) +} + +/// Parses an optional RFC3339 timestamp column. +fn parse_dt_opt(s: Option) -> Option> { + s.and_then(|s| { + DateTime::parse_from_rfc3339(&s) + .ok() + .map(|dt| dt.with_timezone(&Utc)) + }) +} + +/// Splits a `{resource_type}-{part_index}` download segment. +fn parse_part_segment(part: &str) -> Option<(String, u32)> { + let idx = part.rfind('-')?; + let resource_type = &part[..idx]; + let part_index: u32 = part[idx + 1..].parse().ok()?; + if resource_type.is_empty() { + return None; + } + Some((resource_type.to_string(), part_index)) +} + fn internal_error(message: String) -> StorageError { StorageError::Backend(BackendError::Internal { backend_name: "sqlite".to_string(), @@ -28,46 +66,31 @@ impl BulkExportStorage for SqliteBackend { async fn start_export( &self, tenant: &TenantContext, - request: ExportRequest, + input: StartExportInput, ) -> StorageResult { let conn = self.get_connection()?; let tenant_id = tenant.tenant_id().as_str(); - // Check for too many concurrent exports (limit to 5 active exports per tenant) - let active_count: i32 = conn - .query_row( - "SELECT COUNT(*) FROM bulk_export_jobs - WHERE tenant_id = ?1 AND status IN ('accepted', 'in-progress')", - params![tenant_id], - |row| row.get(0), - ) - .map_err(|e| internal_error(format!("Failed to count active exports: {}", e)))?; - - if active_count >= 5 { - return Err(StorageError::BulkExport( - BulkExportError::TooManyConcurrentExports { max_concurrent: 5 }, - )); - } - let job_id = ExportJobId::new(); - let now = Utc::now(); - let transaction_time = now.to_rfc3339(); + let now = Utc::now().to_rfc3339(); + let transaction_time = input.transaction_time.to_rfc3339(); - let level_str = match &request.level { + let level_str = match &input.request.level { ExportLevel::System => "system".to_string(), ExportLevel::Patient => "patient".to_string(), ExportLevel::Group { .. } => "group".to_string(), }; - let group_id = request.group_id().map(|s| s.to_string()); + let group_id = input.request.group_id().map(|s| s.to_string()); - let request_json = serde_json::to_string(&request) + let request_json = serde_json::to_string(&input.request) .map_err(|e| internal_error(format!("Failed to serialize request: {}", e)))?; conn.execute( "INSERT INTO bulk_export_jobs - (id, tenant_id, status, level, group_id, request_json, transaction_time, created_at) - VALUES (?1, ?2, 'accepted', ?3, ?4, ?5, ?6, ?7)", + (id, tenant_id, status, level, group_id, request_json, transaction_time, + created_at, owner_subject, request_url, fhir_version, fencing_token) + VALUES (?1, ?2, 'accepted', ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, 0)", params![ job_id.as_str(), tenant_id, @@ -75,7 +98,10 @@ impl BulkExportStorage for SqliteBackend { group_id, request_json, transaction_time, - transaction_time + now, + input.owner_subject, + input.request_url, + input.fhir_version.as_mime_param(), ], ) .map_err(|e| internal_error(format!("Failed to create export job: {}", e)))?; @@ -265,68 +291,100 @@ impl BulkExportStorage for SqliteBackend { &self, tenant: &TenantContext, job_id: &ExportJobId, - ) -> StorageResult { - let progress = self.get_export_status(tenant, job_id).await?; + ) -> StorageResult { + let conn = self.get_connection()?; + let tenant_id = tenant.tenant_id().as_str(); - if progress.status != ExportStatus::Complete { - return Err(StorageError::BulkExport(BulkExportError::InvalidJobState { - job_id: job_id.to_string(), - expected: "complete".to_string(), - actual: progress.status.to_string(), - })); - } + let (status_str, transaction_time, request_url, error_message, completed_at): ( + String, + String, + String, + Option, + Option, + ) = conn + .query_row( + "SELECT status, transaction_time, request_url, error_message, completed_at + FROM bulk_export_jobs WHERE id = ?1 AND tenant_id = ?2", + params![job_id.as_str(), tenant_id], + |row| { + Ok(( + row.get(0)?, + row.get(1)?, + row.get(2)?, + row.get(3)?, + row.get(4)?, + )) + }, + ) + .map_err(|e| { + if matches!(e, rusqlite::Error::QueryReturnedNoRows) { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: job_id.to_string(), + }) + } else { + internal_error(format!("Failed to get export job: {}", e)) + } + })?; - let conn = self.get_connection()?; + let status: ExportStatus = status_str + .parse() + .map_err(|_| internal_error(format!("Invalid status in database: {}", status_str)))?; - // Get output files + // Get output/error files. let mut stmt = conn .prepare( - "SELECT resource_type, file_path, resource_count, file_type + "SELECT resource_type, resource_count, file_type, part_index, fencing_token FROM bulk_export_files WHERE job_id = ?1 - ORDER BY resource_type", + ORDER BY file_type, resource_type, part_index", ) .map_err(|e| internal_error(format!("Failed to prepare files query: {}", e)))?; - let mut output_files = Vec::new(); - let mut error_files = Vec::new(); - - let rows = stmt + let rows: Vec<(String, i64, String, i64, i64)> = stmt .query_map(params![job_id.as_str()], |row| { Ok(( - row.get::<_, String>(0)?, - row.get::<_, String>(1)?, - row.get::<_, Option>(2)?.map(|v| v as u64), - row.get::<_, String>(3)?, + row.get(0)?, + row.get::<_, Option>(1)?.unwrap_or(0), + row.get(2)?, + row.get(3)?, + row.get(4)?, )) }) - .map_err(|e| internal_error(format!("Failed to query files: {}", e)))?; - - for row in rows { - let (resource_type, file_path, count, file_type) = - row.map_err(|e| internal_error(format!("Failed to read file row: {}", e)))?; + .map_err(|e| internal_error(format!("Failed to query files: {}", e)))? + .filter_map(|r| r.ok()) + .collect(); - let file = ExportOutputFile { + let mut output = Vec::new(); + let mut errors = Vec::new(); + for (resource_type, count, file_type, part_index, fencing_token) in rows { + let key = ExportPartKey { + tenant_id: tenant_id.to_string(), + job_id: job_id.clone(), + resource_type: resource_type.clone(), + file_type: file_type.clone(), + part_index: part_index as u32, + fencing_token: fencing_token as u64, + }; + let entry = RawManifestEntry { resource_type, - url: file_path, - count, + key, + count: count as u64, }; - if file_type == "error" { - error_files.push(file); + errors.push(entry); } else { - output_files.push(file); + output.push(entry); } } - Ok(ExportManifest { - transaction_time: progress.transaction_time, - request: format!("$export?job={}", job_id), - requires_access_token: true, - output: output_files, - error: error_files, - message: None, - extension: None, + Ok(RawExportManifest { + transaction_time: parse_dt(&transaction_time)?, + request_url, + status, + error_message, + completed_at: parse_dt_opt(completed_at), + output, + errors, }) } @@ -366,6 +424,592 @@ impl BulkExportStorage for SqliteBackend { Ok(results) } + + async fn get_export_job_metadata( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult { + let conn = self.get_connection()?; + let tenant_id = tenant.tenant_id().as_str(); + + let (status_str, level_str, group_id, owner_subject, transaction_time, completed_at, request_url): ( + String, + String, + Option, + Option, + String, + Option, + String, + ) = conn + .query_row( + "SELECT status, level, group_id, owner_subject, transaction_time, completed_at, request_url + FROM bulk_export_jobs WHERE id = ?1 AND tenant_id = ?2", + params![job_id.as_str(), tenant_id], + |row| { + Ok(( + row.get(0)?, + row.get(1)?, + row.get(2)?, + row.get(3)?, + row.get(4)?, + row.get(5)?, + row.get(6)?, + )) + }, + ) + .map_err(|e| { + if matches!(e, rusqlite::Error::QueryReturnedNoRows) { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: job_id.to_string(), + }) + } else { + internal_error(format!("Failed to get export job metadata: {}", e)) + } + })?; + + let status: ExportStatus = status_str + .parse() + .map_err(|_| internal_error(format!("Invalid status in database: {}", status_str)))?; + let level = match level_str.as_str() { + "system" => ExportLevel::System, + "patient" => ExportLevel::Patient, + "group" => ExportLevel::Group { + group_id: group_id.unwrap_or_default(), + }, + _ => return Err(internal_error(format!("Invalid level: {}", level_str))), + }; + + Ok(ExportJobMetadata { + job_id: job_id.clone(), + status, + level, + owner_subject, + transaction_time: parse_dt(&transaction_time)?, + completed_at: parse_dt_opt(completed_at), + request_url, + }) + } + + async fn get_export_file_metadata( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + part: &str, + ) -> StorageResult { + let (resource_type, part_index) = parse_part_segment(part).ok_or_else(|| { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: format!("{job_id}/{part}"), + }) + })?; + + let conn = self.get_connection()?; + let tenant_id = tenant.tenant_id().as_str(); + + let (file_type, resource_count, fencing_token, owner_subject): ( + String, + i64, + i64, + Option, + ) = conn + .query_row( + "SELECT f.file_type, f.resource_count, f.fencing_token, j.owner_subject + FROM bulk_export_files f + JOIN bulk_export_jobs j ON j.id = f.job_id + WHERE f.job_id = ?1 AND j.tenant_id = ?2 + AND f.resource_type = ?3 AND f.part_index = ?4", + params![job_id.as_str(), tenant_id, resource_type, part_index as i64], + |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)), + ) + .map_err(|e| { + if matches!(e, rusqlite::Error::QueryReturnedNoRows) { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: format!("{job_id}/{part}"), + }) + } else { + internal_error(format!("Failed to get export file metadata: {}", e)) + } + })?; + + let key = ExportPartKey { + tenant_id: tenant_id.to_string(), + job_id: job_id.clone(), + resource_type: resource_type.clone(), + file_type: file_type.clone(), + part_index, + fencing_token: fencing_token as u64, + }; + + Ok(ExportFileMetadata { + key, + resource_type, + file_type, + line_count: resource_count as u64, + job_owner_subject: owner_subject, + }) + } + + async fn count_active_exports(&self, tenant: &TenantContext) -> StorageResult { + let conn = self.get_connection()?; + let tenant_id = tenant.tenant_id().as_str(); + let count: i64 = conn + .query_row( + "SELECT COUNT(*) FROM bulk_export_jobs + WHERE tenant_id = ?1 AND status IN ('accepted', 'in-progress')", + params![tenant_id], + |row| row.get(0), + ) + .map_err(|e| internal_error(format!("Failed to count active exports: {}", e)))?; + Ok(count as u64) + } + + async fn list_expired_exports( + &self, + now: DateTime, + output_ttl: StdDuration, + limit: u32, + ) -> StorageResult> { + let conn = self.get_connection()?; + let cutoff = (now + - chrono::Duration::from_std(output_ttl) + .unwrap_or_else(|_| chrono::Duration::seconds(0))) + .to_rfc3339(); + + let mut stmt = conn + .prepare( + "SELECT tenant_id, id FROM bulk_export_jobs + WHERE status IN ('complete', 'error', 'cancelled') + AND completed_at IS NOT NULL AND completed_at < ?1 + ORDER BY completed_at LIMIT ?2", + ) + .map_err(|e| internal_error(format!("Failed to prepare expired query: {}", e)))?; + + let rows: Vec<(String, String)> = stmt + .query_map(params![cutoff, limit], |row| Ok((row.get(0)?, row.get(1)?))) + .map_err(|e| internal_error(format!("Failed to query expired exports: {}", e)))? + .filter_map(|r| r.ok()) + .collect(); + + Ok(rows + .into_iter() + .map(|(tenant_id, id)| ExpiredExportRef { + tenant: TenantContext::new( + TenantId::new(tenant_id), + TenantPermissions::full_access(), + ), + job_id: ExportJobId::from_string(id), + }) + .collect()) + } +} + +/// Encodes an [`ExportPartKey`] into the `file_path` column. +fn encode_part_path(key: &ExportPartKey) -> String { + format!( + "{}/{}/{}/{}-{}-{}", + key.tenant_id, + key.job_id, + key.file_type, + key.resource_type, + key.part_index, + key.fencing_token + ) +} + +#[async_trait] +impl ExportClaimStrategy for SqliteBackend { + async fn claim_next( + &self, + worker_id: &WorkerId, + lease_duration: StdDuration, + ) -> StorageResult> { + let _guard = CLAIM_LOCK.lock().await; + let conn = self.get_connection()?; + let now = Utc::now(); + let now_str = now.to_rfc3339(); + let lease_expiry = now + + chrono::Duration::from_std(lease_duration) + .unwrap_or_else(|_| chrono::Duration::seconds(60)); + let lease_expiry_str = lease_expiry.to_rfc3339(); + + // Find one eligible job: accepted, or in-progress with an expired lease. + let row: Option<(String, String, i64)> = conn + .query_row( + "SELECT id, tenant_id, fencing_token FROM bulk_export_jobs + WHERE status = 'accepted' + OR (status = 'in-progress' AND (lease_expiry IS NULL OR lease_expiry < ?1)) + ORDER BY created_at LIMIT 1", + params![now_str], + |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)), + ) + .ok(); + + let Some((job_id, tenant_id, fencing_token)) = row else { + return Ok(None); + }; + let new_token = fencing_token + 1; + + conn.execute( + "UPDATE bulk_export_jobs + SET status = 'in-progress', worker_id = ?1, lease_expiry = ?2, + heartbeat_at = ?3, fencing_token = ?4, + started_at = COALESCE(started_at, ?3) + WHERE id = ?5", + params![ + worker_id.as_str(), + lease_expiry_str, + now_str, + new_token, + job_id + ], + ) + .map_err(|e| internal_error(format!("Failed to claim export job: {}", e)))?; + + Ok(Some(ExportJobLease { + job_id: ExportJobId::from_string(job_id), + tenant: TenantContext::new(TenantId::new(tenant_id), TenantPermissions::full_access()), + worker_id: worker_id.clone(), + lease_expiry, + fencing_token: new_token as u64, + })) + } + + async fn heartbeat(&self, lease: &ExportJobLease) -> Result, LeaseError> { + let conn = self.get_connection().map_err(LeaseError::Storage)?; + let now = Utc::now(); + let new_expiry = now + chrono::Duration::seconds(60); + let affected = conn + .execute( + "UPDATE bulk_export_jobs + SET lease_expiry = ?1, heartbeat_at = ?2 + WHERE id = ?3 AND worker_id = ?4 AND fencing_token = ?5", + params![ + new_expiry.to_rfc3339(), + now.to_rfc3339(), + lease.job_id.as_str(), + lease.worker_id.as_str(), + lease.fencing_token as i64 + ], + ) + .map_err(|e| LeaseError::Storage(internal_error(format!("heartbeat failed: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: lease.job_id.clone(), + }) + } else { + Ok(new_expiry) + } + } + + async fn release(&self, lease: ExportJobLease) -> StorageResult<()> { + let conn = self.get_connection()?; + conn.execute( + "UPDATE bulk_export_jobs + SET status = 'accepted', worker_id = NULL, lease_expiry = NULL + WHERE id = ?1 AND worker_id = ?2 AND fencing_token = ?3 + AND status = 'in-progress'", + params![ + lease.job_id.as_str(), + lease.worker_id.as_str(), + lease.fencing_token as i64 + ], + ) + .map_err(|e| internal_error(format!("Failed to release lease: {}", e)))?; + Ok(()) + } +} + +#[async_trait] +impl ExportWorkerStorage for SqliteBackend { + async fn get_export_job_for_worker( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result { + let conn = self.get_connection().map_err(LeaseError::Storage)?; + let tenant_id = tenant.tenant_id().as_str(); + + let (request_json, level_str, group_id, transaction_time, fhir_version): ( + String, + String, + Option, + String, + String, + ) = conn + .query_row( + "SELECT request_json, level, group_id, transaction_time, fhir_version + FROM bulk_export_jobs + WHERE id = ?1 AND tenant_id = ?2 AND worker_id = ?3 AND fencing_token = ?4", + params![ + job_id.as_str(), + tenant_id, + worker_id.as_str(), + fencing_token as i64 + ], + |row| { + Ok(( + row.get(0)?, + row.get(1)?, + row.get(2)?, + row.get(3)?, + row.get(4)?, + )) + }, + ) + .map_err(|e| match e { + rusqlite::Error::QueryReturnedNoRows => LeaseError::LeaseLost { + job_id: job_id.clone(), + }, + other => LeaseError::Storage(internal_error(format!( + "Failed to load worker job: {other}" + ))), + })?; + + let request: ExportRequest = serde_json::from_str(&request_json).map_err(|e| { + LeaseError::Storage(internal_error(format!("Failed to parse request_json: {e}"))) + })?; + let level = match level_str.as_str() { + "system" => ExportLevel::System, + "patient" => ExportLevel::Patient, + "group" => ExportLevel::Group { + group_id: group_id.unwrap_or_default(), + }, + _ => { + return Err(LeaseError::Storage(internal_error(format!( + "Invalid level: {level_str}" + )))); + } + }; + let fhir_version = + helios_fhir::FhirVersion::from_mime_param(&fhir_version).unwrap_or_default(); + let transaction_time = parse_dt(&transaction_time).map_err(LeaseError::Storage)?; + + // Load persisted per-type progress for resume. + let mut stmt = conn + .prepare( + "SELECT resource_type, total_count, exported_count, error_count, cursor_state + FROM bulk_export_progress WHERE job_id = ?1", + ) + .map_err(|e| LeaseError::Storage(internal_error(format!("prepare progress: {e}"))))?; + let type_progress: Vec = stmt + .query_map(params![job_id.as_str()], |row| { + Ok(TypeExportProgress { + resource_type: row.get(0)?, + total_count: row.get::<_, Option>(1)?.map(|v| v as u64), + exported_count: row.get::<_, i64>(2)? as u64, + error_count: row.get::<_, i64>(3)? as u64, + cursor_state: row.get(4)?, + }) + }) + .map_err(|e| LeaseError::Storage(internal_error(format!("query progress: {e}"))))? + .filter_map(|r| r.ok()) + .collect(); + + Ok(WorkerJobView { + request, + level, + transaction_time, + fhir_version, + type_progress, + }) + } + + async fn mark_export_in_progress( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result<(), LeaseError> { + let conn = self.get_connection().map_err(LeaseError::Storage)?; + let now = Utc::now().to_rfc3339(); + let affected = conn + .execute( + "UPDATE bulk_export_jobs + SET status = 'in-progress', started_at = COALESCE(started_at, ?1) + WHERE id = ?2 AND tenant_id = ?3 AND worker_id = ?4 AND fencing_token = ?5", + params![ + now, + job_id.as_str(), + tenant.tenant_id().as_str(), + worker_id.as_str(), + fencing_token as i64 + ], + ) + .map_err(|e| LeaseError::Storage(internal_error(format!("mark_in_progress: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn update_export_type_progress( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + progress: &TypeExportProgress, + ) -> Result<(), LeaseError> { + let conn = self.get_connection().map_err(LeaseError::Storage)?; + let affected = conn + .execute( + "INSERT INTO bulk_export_progress + (job_id, resource_type, total_count, exported_count, error_count, cursor_state) + SELECT ?1, ?2, ?3, ?4, ?5, ?6 + WHERE EXISTS ( + SELECT 1 FROM bulk_export_jobs + WHERE id = ?1 AND tenant_id = ?7 AND worker_id = ?8 AND fencing_token = ?9 + ) + ON CONFLICT(job_id, resource_type) DO UPDATE SET + total_count = excluded.total_count, + exported_count = excluded.exported_count, + error_count = excluded.error_count, + cursor_state = excluded.cursor_state", + params![ + job_id.as_str(), + progress.resource_type, + progress.total_count.map(|v| v as i64), + progress.exported_count as i64, + progress.error_count as i64, + progress.cursor_state, + tenant.tenant_id().as_str(), + worker_id.as_str(), + fencing_token as i64, + ], + ) + .map_err(|e| { + LeaseError::Storage(internal_error(format!("update_type_progress: {e}"))) + })?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn record_export_file( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + part: &FinalizedPart, + file_type: &str, + ) -> Result<(), LeaseError> { + let conn = self.get_connection().map_err(LeaseError::Storage)?; + let file_path = encode_part_path(&part.key); + let affected = conn + .execute( + "INSERT INTO bulk_export_files + (job_id, resource_type, file_type, file_path, resource_count, byte_count, + part_index, fencing_token) + SELECT ?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8 + WHERE EXISTS ( + SELECT 1 FROM bulk_export_jobs + WHERE id = ?1 AND tenant_id = ?9 AND worker_id = ?10 AND fencing_token = ?11 + ) + ON CONFLICT(job_id, file_type, resource_type, part_index) DO UPDATE SET + file_path = excluded.file_path, + resource_count = excluded.resource_count, + byte_count = excluded.byte_count, + fencing_token = excluded.fencing_token", + params![ + job_id.as_str(), + part.resource_type, + file_type, + file_path, + part.line_count as i64, + part.size_bytes as i64, + part.key.part_index as i64, + part.key.fencing_token as i64, + tenant.tenant_id().as_str(), + worker_id.as_str(), + fencing_token as i64, + ], + ) + .map_err(|e| LeaseError::Storage(internal_error(format!("record_export_file: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn finish_export_job( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result<(), LeaseError> { + let conn = self.get_connection().map_err(LeaseError::Storage)?; + let now = Utc::now().to_rfc3339(); + let affected = conn + .execute( + "UPDATE bulk_export_jobs + SET status = 'complete', completed_at = ?1 + WHERE id = ?2 AND tenant_id = ?3 AND worker_id = ?4 AND fencing_token = ?5", + params![ + now, + job_id.as_str(), + tenant.tenant_id().as_str(), + worker_id.as_str(), + fencing_token as i64 + ], + ) + .map_err(|e| LeaseError::Storage(internal_error(format!("finish_job: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn fail_export_job( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + error_message: &str, + ) -> Result<(), LeaseError> { + let conn = self.get_connection().map_err(LeaseError::Storage)?; + let now = Utc::now().to_rfc3339(); + let affected = conn + .execute( + "UPDATE bulk_export_jobs + SET status = 'error', error_message = ?1, completed_at = ?2 + WHERE id = ?3 AND tenant_id = ?4 AND worker_id = ?5 AND fencing_token = ?6", + params![ + error_message, + now, + job_id.as_str(), + tenant.tenant_id().as_str(), + worker_id.as_str(), + fencing_token as i64 + ], + ) + .map_err(|e| LeaseError::Storage(internal_error(format!("fail_job: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } } #[async_trait] @@ -826,22 +1470,83 @@ impl GroupExportProvider for SqliteBackend { tenant: &TenantContext, group_id: &str, ) -> StorageResult> { - let members = self.get_group_members(tenant, group_id).await?; - - // Filter to only Patient references and extract IDs - let patient_ids: Vec = members - .into_iter() - .filter_map(|reference| { - if reference.starts_with("Patient/") { - Some(reference.strip_prefix("Patient/").unwrap().to_string()) - } else { - None + // Flatten nested Groups iteratively, guarding against membership + // cycles with a visited set. + use std::collections::HashSet; + let mut visited_groups: HashSet = HashSet::new(); + let mut seen_patients: HashSet = HashSet::new(); + let mut patient_ids: Vec = Vec::new(); + let mut worklist: Vec = vec![group_id.to_string()]; + + while let Some(gid) = worklist.pop() { + if !visited_groups.insert(gid.clone()) { + continue; // cycle / already processed + } + let members = self.get_group_members(tenant, &gid).await?; + for reference in members { + if let Some(pid) = reference.strip_prefix("Patient/") { + if seen_patients.insert(pid.to_string()) { + patient_ids.push(pid.to_string()); + } + } else if let Some(nested) = reference.strip_prefix("Group/") { + worklist.push(nested.to_string()); } - }) - .collect(); + } + } Ok(patient_ids) } + + async fn get_group_members_with_periods( + &self, + tenant: &TenantContext, + group_id: &str, + ) -> StorageResult>)>> { + let conn = self.get_connection()?; + let tenant_id = tenant.tenant_id().as_str(); + let data: Vec = conn + .query_row( + "SELECT data FROM resources + WHERE tenant_id = ?1 AND resource_type = 'Group' + AND id = ?2 AND is_deleted = 0", + params![tenant_id, group_id], + |row| row.get(0), + ) + .map_err(|e| { + if matches!(e, rusqlite::Error::QueryReturnedNoRows) { + StorageError::BulkExport(BulkExportError::GroupNotFound { + group_id: group_id.to_string(), + }) + } else { + internal_error(format!("Failed to get group: {}", e)) + } + })?; + let group: Value = serde_json::from_slice(&data) + .map_err(|e| internal_error(format!("Failed to parse group: {}", e)))?; + let mut out = Vec::new(); + if let Some(arr) = group.get("member").and_then(|m| m.as_array()) { + for member in arr { + let Some(reference) = member + .get("entity") + .and_then(|e| e.get("reference")) + .and_then(|r| r.as_str()) + else { + continue; + }; + let period_start = member + .get("period") + .and_then(|p| p.get("start")) + .and_then(|s| s.as_str()) + .and_then(|s| { + DateTime::parse_from_rfc3339(s) + .ok() + .map(|dt| dt.with_timezone(&Utc)) + }); + out.push((reference.to_string(), period_start)); + } + } + Ok(out) + } } #[cfg(test)] @@ -865,13 +1570,27 @@ mod tests { ) } + /// Wraps an `ExportRequest` in a `StartExportInput` with default kickoff metadata. + fn test_input(request: ExportRequest) -> StartExportInput { + StartExportInput { + request, + transaction_time: Utc::now(), + request_url: "http://localhost/$export".to_string(), + owner_subject: Some("test-subject".to_string()), + fhir_version: FhirVersion::default(), + } + } + #[tokio::test] async fn test_start_export() { let backend = create_test_backend(); let tenant = create_test_tenant(); let request = ExportRequest::system().with_types(vec!["Patient".to_string()]); - let job_id = backend.start_export(&tenant, request).await.unwrap(); + let job_id = backend + .start_export(&tenant, test_input(request)) + .await + .unwrap(); assert!(!job_id.as_str().is_empty()); @@ -884,8 +1603,10 @@ mod tests { let backend = create_test_backend(); let tenant = create_test_tenant(); - let request = ExportRequest::system(); - let job_id = backend.start_export(&tenant, request).await.unwrap(); + let job_id = backend + .start_export(&tenant, test_input(ExportRequest::system())) + .await + .unwrap(); backend.cancel_export(&tenant, &job_id).await.unwrap(); @@ -898,37 +1619,273 @@ mod tests { let backend = create_test_backend(); let tenant = create_test_tenant(); - // Create two exports - let request1 = ExportRequest::system(); - let _job_id1 = backend.start_export(&tenant, request1).await.unwrap(); - - let request2 = ExportRequest::patient(); - let _job_id2 = backend.start_export(&tenant, request2).await.unwrap(); + let _job_id1 = backend + .start_export(&tenant, test_input(ExportRequest::system())) + .await + .unwrap(); + let _job_id2 = backend + .start_export(&tenant, test_input(ExportRequest::patient())) + .await + .unwrap(); let exports = backend.list_exports(&tenant, false).await.unwrap(); assert_eq!(exports.len(), 2); } #[tokio::test] - async fn test_too_many_concurrent_exports() { + async fn test_count_active_exports() { let backend = create_test_backend(); let tenant = create_test_tenant(); - // Create 5 exports (the limit) - for _ in 0..5 { - let request = ExportRequest::system(); - backend.start_export(&tenant, request).await.unwrap(); + for _ in 0..3 { + backend + .start_export(&tenant, test_input(ExportRequest::system())) + .await + .unwrap(); } + assert_eq!(backend.count_active_exports(&tenant).await.unwrap(), 3); + } - // Sixth should fail - let request = ExportRequest::system(); - let result = backend.start_export(&tenant, request).await; + #[tokio::test] + async fn test_get_export_job_metadata() { + let backend = create_test_backend(); + let tenant = create_test_tenant(); + + let job_id = backend + .start_export(&tenant, test_input(ExportRequest::patient())) + .await + .unwrap(); + + let meta = backend + .get_export_job_metadata(&tenant, &job_id) + .await + .unwrap(); + assert_eq!(meta.status, ExportStatus::Accepted); + assert_eq!(meta.owner_subject.as_deref(), Some("test-subject")); + assert!(matches!(meta.level, ExportLevel::Patient)); + + let missing = backend + .get_export_job_metadata(&tenant, &ExportJobId::from_string("nope")) + .await; + assert!(missing.is_err()); + } + + #[tokio::test] + async fn test_claim_and_worker_lifecycle() { + let backend = create_test_backend(); + let tenant = create_test_tenant(); + + let job_id = backend + .start_export(&tenant, test_input(ExportRequest::system())) + .await + .unwrap(); + + let worker = WorkerId::new("worker-1"); + let lease = backend + .claim_next(&worker, StdDuration::from_secs(60)) + .await + .unwrap() + .expect("a job should be claimable"); + assert_eq!(lease.job_id, job_id); + assert_eq!(lease.fencing_token, 1); + + // A second claim finds nothing (the only job is now in-progress). + assert!( + backend + .claim_next(&worker, StdDuration::from_secs(60)) + .await + .unwrap() + .is_none() + ); + + // Worker can load, progress, finish. + backend + .mark_export_in_progress(&tenant, &job_id, &worker, lease.fencing_token) + .await + .unwrap(); + backend + .update_export_type_progress( + &tenant, + &job_id, + &worker, + lease.fencing_token, + &TypeExportProgress::new("Patient"), + ) + .await + .unwrap(); + backend + .finish_export_job(&tenant, &job_id, &worker, lease.fencing_token) + .await + .unwrap(); + + let progress = backend.get_export_status(&tenant, &job_id).await.unwrap(); + assert_eq!(progress.status, ExportStatus::Complete); + } + + #[tokio::test] + async fn test_stale_worker_fenced_out() { + let backend = create_test_backend(); + let tenant = create_test_tenant(); + + let job_id = backend + .start_export(&tenant, test_input(ExportRequest::system())) + .await + .unwrap(); + + let worker_a = WorkerId::new("worker-a"); + let lease_a = backend + .claim_next(&worker_a, StdDuration::from_millis(1)) + .await + .unwrap() + .unwrap(); + + // Lease expires; worker B reclaims, bumping the fencing token. + tokio::time::sleep(std::time::Duration::from_millis(5)).await; + let worker_b = WorkerId::new("worker-b"); + let lease_b = backend + .claim_next(&worker_b, StdDuration::from_secs(60)) + .await + .unwrap() + .unwrap(); + assert!(lease_b.fencing_token > lease_a.fencing_token); + + // Worker A's stale mutations are all rejected as LeaseLost. assert!(matches!( - result, - Err(StorageError::BulkExport( - BulkExportError::TooManyConcurrentExports { .. } - )) + backend + .mark_export_in_progress(&tenant, &job_id, &worker_a, lease_a.fencing_token) + .await, + Err(LeaseError::LeaseLost { .. }) )); + assert!(matches!( + backend + .update_export_type_progress( + &tenant, + &job_id, + &worker_a, + lease_a.fencing_token, + &TypeExportProgress::new("Patient"), + ) + .await, + Err(LeaseError::LeaseLost { .. }) + )); + assert!(matches!( + backend + .finish_export_job(&tenant, &job_id, &worker_a, lease_a.fencing_token) + .await, + Err(LeaseError::LeaseLost { .. }) + )); + + // Worker B can still operate. + backend + .finish_export_job(&tenant, &job_id, &worker_b, lease_b.fencing_token) + .await + .unwrap(); + } + + #[tokio::test] + async fn test_since_newly_added_exclude_filters_late_joiners() { + use crate::core::bulk_export_output::{ExportPartKey, ExportPartWriter}; + let _ = ExportPartKey::output("t", ExportJobId::new(), "x", 0, 0); // import sanity + + let backend = create_test_backend(); + let tenant = create_test_tenant(); + + // A Group with two members: one joined before _since (period.start = + // 2024-01-01), one joined after (period.start = 2026-06-01). + backend + .create( + &tenant, + "Group", + json!({ + "resourceType": "Group", "id": "g-cohort", + "member": [ + { + "entity": {"reference": "Patient/p-old"}, + "period": {"start": "2024-01-01T00:00:00Z"} + }, + { + "entity": {"reference": "Patient/p-new"}, + "period": {"start": "2026-06-01T00:00:00Z"} + } + ] + }), + FhirVersion::default(), + ) + .await + .unwrap(); + + let members = backend + .get_group_members_with_periods(&tenant, "g-cohort") + .await + .unwrap(); + assert_eq!(members.len(), 2); + assert!(members.iter().all(|(_, p)| p.is_some())); + + // Worker-level filter logic: with exclude=true and _since=2025, + // p-new (joined 2026) should be filtered out; p-old kept. + let since = chrono::DateTime::parse_from_rfc3339("2025-01-01T00:00:00Z") + .unwrap() + .with_timezone(&Utc); + let kept: Vec = members + .iter() + .filter_map(|(reference, period_start)| { + let pid = reference.strip_prefix("Patient/")?; + match period_start { + Some(start) if *start > since => None, + _ => Some(pid.to_string()), + } + }) + .collect(); + assert_eq!(kept, vec!["p-old".to_string()]); + + // Drop reference to silence the unused-import allowance. + let _ = ExportPartWriter::new(Box::pin(Vec::::new())); + } + + #[tokio::test] + async fn test_resolve_nested_groups_with_cycle_guard() { + let backend = create_test_backend(); + let tenant = create_test_tenant(); + + // g1 -> [Patient/p1, Group/g2]; g2 -> [Patient/p2, Group/g1 (cycle)] + backend + .create( + &tenant, + "Group", + json!({ + "resourceType": "Group", "id": "g1", + "member": [ + {"entity": {"reference": "Patient/p1"}}, + {"entity": {"reference": "Group/g2"}} + ] + }), + FhirVersion::default(), + ) + .await + .unwrap(); + backend + .create( + &tenant, + "Group", + json!({ + "resourceType": "Group", "id": "g2", + "member": [ + {"entity": {"reference": "Patient/p2"}}, + {"entity": {"reference": "Group/g1"}} + ] + }), + FhirVersion::default(), + ) + .await + .unwrap(); + + let mut ids = backend + .resolve_group_patient_ids(&tenant, "g1") + .await + .unwrap(); + ids.sort(); + // Both patients resolved exactly once; the cycle did not loop forever. + assert_eq!(ids, vec!["p1".to_string(), "p2".to_string()]); } #[tokio::test] @@ -1013,8 +1970,10 @@ mod tests { let backend = create_test_backend(); let tenant = create_test_tenant(); - let request = ExportRequest::system(); - let job_id = backend.start_export(&tenant, request).await.unwrap(); + let job_id = backend + .start_export(&tenant, test_input(ExportRequest::system())) + .await + .unwrap(); backend.delete_export(&tenant, &job_id).await.unwrap(); From d25204089ced2d6f775e58d90e1b07e61859f9c6 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:13 -0400 Subject: [PATCH 10/81] =?UTF-8?q?feat(postgres):=20v7=E2=86=92v8=20migrati?= =?UTF-8?q?on=20mirrors=20SQLite=20bulk-export=20columns?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS … for the lease fields, owner_subject, request_url, fhir_version. ALTER bulk_export_files for part_index + fencing_token; ROW_NUMBER() backfill before the unique idx_export_files_part. --- .../src/backends/postgres/schema.rs | 52 ++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/crates/persistence/src/backends/postgres/schema.rs b/crates/persistence/src/backends/postgres/schema.rs index 92a91b732..888a852b4 100644 --- a/crates/persistence/src/backends/postgres/schema.rs +++ b/crates/persistence/src/backends/postgres/schema.rs @@ -3,7 +3,7 @@ use crate::error::{BackendError, StorageResult}; /// Current schema version. -pub const SCHEMA_VERSION: i32 = 7; +pub const SCHEMA_VERSION: i32 = 8; /// Initialize the database schema. pub async fn initialize_schema(client: &deadpool_postgres::Client) -> StorageResult<()> { @@ -269,6 +269,7 @@ async fn migrate_schema( 4 => migrate_v4_to_v5(client).await?, 5 => migrate_v5_to_v6(client).await?, 6 => migrate_v6_to_v7(client).await?, + 7 => migrate_v7_to_v8(client).await?, _ => { return Err(pg_error(format!("Unknown schema version: {}", version))); } @@ -581,6 +582,55 @@ async fn migrate_v6_to_v7(client: &deadpool_postgres::Client) -> StorageResult<( Ok(()) } +/// v7 -> v8: Add bulk-export worker/lease support. +async fn migrate_v7_to_v8(client: &deadpool_postgres::Client) -> StorageResult<()> { + let migrations = [ + "ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS worker_id TEXT", + "ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS lease_expiry TIMESTAMPTZ", + "ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS fencing_token BIGINT NOT NULL DEFAULT 0", + "ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS heartbeat_at TIMESTAMPTZ", + "ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS owner_subject TEXT", + "ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS request_url TEXT NOT NULL DEFAULT ''", + "ALTER TABLE bulk_export_jobs ADD COLUMN IF NOT EXISTS fhir_version TEXT NOT NULL DEFAULT '4.0'", + "ALTER TABLE bulk_export_files ADD COLUMN IF NOT EXISTS part_index INTEGER NOT NULL DEFAULT 0", + "ALTER TABLE bulk_export_files ADD COLUMN IF NOT EXISTS fencing_token BIGINT NOT NULL DEFAULT 0", + ]; + for sql in &migrations { + client + .execute(*sql, &[]) + .await + .map_err(|e| pg_error(format!("Migration v7->v8 failed: {}", e)))?; + } + + // Backfill part_index: 0-based sequential per (job_id, file_type, resource_type). + client + .execute( + "UPDATE bulk_export_files SET part_index = sub.rn FROM ( + SELECT id, ROW_NUMBER() OVER ( + PARTITION BY job_id, file_type, resource_type ORDER BY id + ) - 1 AS rn FROM bulk_export_files + ) sub WHERE bulk_export_files.id = sub.id", + &[], + ) + .await + .map_err(|e| pg_error(format!("Migration v7->v8 backfill failed: {}", e)))?; + + let indexes = [ + "CREATE INDEX IF NOT EXISTS idx_export_jobs_claim + ON bulk_export_jobs(tenant_id, status, lease_expiry)", + "CREATE UNIQUE INDEX IF NOT EXISTS idx_export_files_part + ON bulk_export_files(job_id, file_type, resource_type, part_index)", + ]; + for sql in &indexes { + client + .execute(*sql, &[]) + .await + .map_err(|e| pg_error(format!("Migration v7->v8 index failed: {}", e)))?; + } + + Ok(()) +} + fn pg_error(message: String) -> crate::error::StorageError { crate::error::StorageError::Backend(BackendError::Internal { backend_name: "postgres".to_string(), From 15fb673740ae05f74a7dfefd72a86261df544314 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:13 -0400 Subject: [PATCH 11/81] feat(postgres): implement BulkExportJobStore for PostgreSQL PostgresSkipLocked claim strategy (FOR UPDATE SKIP LOCKED inside a transaction), fully-fenced ExportWorkerStorage (every mutation guarded by worker_id + fencing_token), all new BulkExportStorage methods, get_group_members_with_periods + nested-Group flattening with cycle guard. Bind sites use i32 / i64 to match the actual column types on bulk_export_progress / bulk_export_files. --- .../src/backends/postgres/bulk_export.rs | 800 ++++++++++++++++-- 1 file changed, 728 insertions(+), 72 deletions(-) diff --git a/crates/persistence/src/backends/postgres/bulk_export.rs b/crates/persistence/src/backends/postgres/bulk_export.rs index 16fe427d1..2d9a2b7f7 100644 --- a/crates/persistence/src/backends/postgres/bulk_export.rs +++ b/crates/persistence/src/backends/postgres/bulk_export.rs @@ -1,16 +1,22 @@ //! Bulk export implementation for PostgreSQL backend. use async_trait::async_trait; -use chrono::Utc; +use chrono::{DateTime, Utc}; use serde_json::Value; +use std::time::Duration as StdDuration; use crate::core::bulk_export::{ - BulkExportStorage, ExportDataProvider, ExportJobId, ExportLevel, ExportManifest, - ExportOutputFile, ExportProgress, ExportRequest, ExportStatus, GroupExportProvider, - NdjsonBatch, PatientExportProvider, TypeExportProgress, + BulkExportStorage, ExpiredExportRef, ExportDataProvider, ExportFileMetadata, ExportJobId, + ExportJobMetadata, ExportLevel, ExportProgress, ExportRequest, ExportStatus, + GroupExportProvider, NdjsonBatch, PatientExportProvider, RawExportManifest, RawManifestEntry, + StartExportInput, TypeExportProgress, +}; +use crate::core::bulk_export_output::{ExportPartKey, FinalizedPart}; +use crate::core::bulk_export_worker::{ + ExportClaimStrategy, ExportJobLease, ExportWorkerStorage, LeaseError, WorkerId, WorkerJobView, }; use crate::error::{BackendError, BulkExportError, StorageError, StorageResult}; -use crate::tenant::TenantContext; +use crate::tenant::{TenantContext, TenantId, TenantPermissions}; use super::PostgresBackend; @@ -22,60 +28,72 @@ fn internal_error(message: String) -> StorageError { }) } +/// Splits a `{resource_type}-{part_index}` download segment. +fn parse_part_segment(part: &str) -> Option<(String, u32)> { + let idx = part.rfind('-')?; + let resource_type = &part[..idx]; + let part_index: u32 = part[idx + 1..].parse().ok()?; + if resource_type.is_empty() { + return None; + } + Some((resource_type.to_string(), part_index)) +} + +/// Encodes an [`ExportPartKey`] into the `file_path` column. +fn encode_part_path(key: &ExportPartKey) -> String { + format!( + "{}/{}/{}/{}-{}-{}", + key.tenant_id, + key.job_id, + key.file_type, + key.resource_type, + key.part_index, + key.fencing_token + ) +} + #[async_trait] impl BulkExportStorage for PostgresBackend { async fn start_export( &self, tenant: &TenantContext, - request: ExportRequest, + input: StartExportInput, ) -> StorageResult { let client = self.get_client().await?; let tenant_id = tenant.tenant_id().as_str(); - // Check for too many concurrent exports (limit to 5 active exports per tenant) - let row = client - .query_one( - "SELECT COUNT(*) FROM bulk_export_jobs - WHERE tenant_id = $1 AND status IN ('accepted', 'in-progress')", - &[&tenant_id], - ) - .await - .map_err(|e| internal_error(format!("Failed to count active exports: {}", e)))?; - - let active_count: i64 = row.get(0); - if active_count >= 5 { - return Err(StorageError::BulkExport( - BulkExportError::TooManyConcurrentExports { max_concurrent: 5 }, - )); - } - let job_id = ExportJobId::new(); let now = Utc::now(); - let level_str = match &request.level { + let level_str = match &input.request.level { ExportLevel::System => "system".to_string(), ExportLevel::Patient => "patient".to_string(), ExportLevel::Group { .. } => "group".to_string(), }; - let group_id = request.group_id().map(|s| s.to_string()); + let group_id = input.request.group_id().map(|s| s.to_string()); - let request_json = serde_json::to_string(&request) + let request_json = serde_json::to_string(&input.request) .map_err(|e| internal_error(format!("Failed to serialize request: {}", e)))?; + let fhir_version = input.fhir_version.as_mime_param(); client .execute( "INSERT INTO bulk_export_jobs - (id, tenant_id, status, level, group_id, request_json, transaction_time, created_at) - VALUES ($1, $2, 'accepted', $3, $4, $5, $6, $7)", + (id, tenant_id, status, level, group_id, request_json, transaction_time, + created_at, owner_subject, request_url, fhir_version, fencing_token) + VALUES ($1, $2, 'accepted', $3, $4, $5, $6, $7, $8, $9, $10, 0)", &[ &job_id.as_str(), &tenant_id, &level_str.as_str(), &group_id, &request_json.as_str(), + &input.transaction_time, &now, - &now, + &input.owner_subject, + &input.request_url.as_str(), + &fhir_version, ], ) .await @@ -151,9 +169,9 @@ impl BulkExportStorage for PostgresBackend { .iter() .map(|r| TypeExportProgress { resource_type: r.get(0), - total_count: r.get::<_, Option>(1).map(|v| v as u64), - exported_count: r.get::<_, i64>(2) as u64, - error_count: r.get::<_, i64>(3) as u64, + total_count: r.get::<_, Option>(1).map(|v| v as u64), + exported_count: r.get::<_, i32>(2) as u64, + error_count: r.get::<_, i32>(3) as u64, cursor_state: r.get(4), }) .collect(); @@ -247,63 +265,238 @@ impl BulkExportStorage for PostgresBackend { &self, tenant: &TenantContext, job_id: &ExportJobId, - ) -> StorageResult { - let progress = self.get_export_status(tenant, job_id).await?; + ) -> StorageResult { + let client = self.get_client().await?; + let tenant_id = tenant.tenant_id().as_str(); - if progress.status != ExportStatus::Complete { - return Err(StorageError::BulkExport(BulkExportError::InvalidJobState { + let job_rows = client + .query( + "SELECT status, transaction_time, request_url, error_message, completed_at + FROM bulk_export_jobs WHERE id = $1 AND tenant_id = $2", + &[&job_id.as_str(), &tenant_id], + ) + .await + .map_err(|e| internal_error(format!("Failed to get export job: {}", e)))?; + let job_row = job_rows.first().ok_or_else(|| { + StorageError::BulkExport(BulkExportError::JobNotFound { job_id: job_id.to_string(), - expected: "complete".to_string(), - actual: progress.status.to_string(), - })); - } - - let client = self.get_client().await?; + }) + })?; + let status_str: String = job_row.get(0); + let transaction_time: DateTime = job_row.get(1); + let request_url: String = job_row.get(2); + let error_message: Option = job_row.get(3); + let completed_at: Option> = job_row.get(4); + let status: ExportStatus = status_str + .parse() + .map_err(|_| internal_error(format!("Invalid status in database: {}", status_str)))?; let rows = client .query( - "SELECT resource_type, file_path, resource_count, file_type + "SELECT resource_type, resource_count, file_type, part_index, fencing_token FROM bulk_export_files WHERE job_id = $1 - ORDER BY resource_type", + ORDER BY file_type, resource_type, part_index", &[&job_id.as_str()], ) .await .map_err(|e| internal_error(format!("Failed to query files: {}", e)))?; - let mut output_files = Vec::new(); - let mut error_files = Vec::new(); - + let mut output = Vec::new(); + let mut errors = Vec::new(); for row in &rows { let resource_type: String = row.get(0); - let file_path: String = row.get(1); - let count: Option = row.get(2); - let file_type: String = row.get(3); - - let file = ExportOutputFile { + let count: Option = row.get(1); + let file_type: String = row.get(2); + let part_index: i32 = row.get(3); + let fencing_token: i64 = row.get(4); + let key = ExportPartKey { + tenant_id: tenant_id.to_string(), + job_id: job_id.clone(), + resource_type: resource_type.clone(), + file_type: file_type.clone(), + part_index: part_index as u32, + fencing_token: fencing_token as u64, + }; + let entry = RawManifestEntry { resource_type, - url: file_path, - count: count.map(|c| c as u64), + key, + count: count.unwrap_or(0) as u64, }; - if file_type == "error" { - error_files.push(file); + errors.push(entry); } else { - output_files.push(file); + output.push(entry); } } - Ok(ExportManifest { - transaction_time: progress.transaction_time, - request: format!("$export?job={}", job_id), - requires_access_token: true, - output: output_files, - error: error_files, - message: None, - extension: None, + Ok(RawExportManifest { + transaction_time, + request_url, + status, + error_message, + completed_at, + output, + errors, }) } + async fn get_export_job_metadata( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult { + let client = self.get_client().await?; + let tenant_id = tenant.tenant_id().as_str(); + let rows = client + .query( + "SELECT status, level, group_id, owner_subject, transaction_time, + completed_at, request_url + FROM bulk_export_jobs WHERE id = $1 AND tenant_id = $2", + &[&job_id.as_str(), &tenant_id], + ) + .await + .map_err(|e| internal_error(format!("Failed to get export job metadata: {}", e)))?; + let row = rows.first().ok_or_else(|| { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: job_id.to_string(), + }) + })?; + let status_str: String = row.get(0); + let level_str: String = row.get(1); + let group_id: Option = row.get(2); + let owner_subject: Option = row.get(3); + let transaction_time: DateTime = row.get(4); + let completed_at: Option> = row.get(5); + let request_url: String = row.get(6); + let status: ExportStatus = status_str + .parse() + .map_err(|_| internal_error(format!("Invalid status: {}", status_str)))?; + let level = match level_str.as_str() { + "system" => ExportLevel::System, + "patient" => ExportLevel::Patient, + "group" => ExportLevel::Group { + group_id: group_id.unwrap_or_default(), + }, + _ => return Err(internal_error(format!("Invalid level: {}", level_str))), + }; + Ok(ExportJobMetadata { + job_id: job_id.clone(), + status, + level, + owner_subject, + transaction_time, + completed_at, + request_url, + }) + } + + async fn get_export_file_metadata( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + part: &str, + ) -> StorageResult { + let (resource_type, part_index) = parse_part_segment(part).ok_or_else(|| { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: format!("{job_id}/{part}"), + }) + })?; + let client = self.get_client().await?; + let tenant_id = tenant.tenant_id().as_str(); + let rows = client + .query( + "SELECT f.file_type, f.resource_count, f.fencing_token, j.owner_subject + FROM bulk_export_files f + JOIN bulk_export_jobs j ON j.id = f.job_id + WHERE f.job_id = $1 AND j.tenant_id = $2 + AND f.resource_type = $3 AND f.part_index = $4", + &[ + &job_id.as_str(), + &tenant_id, + &resource_type.as_str(), + &(part_index as i32), + ], + ) + .await + .map_err(|e| internal_error(format!("Failed to get file metadata: {}", e)))?; + let row = rows.first().ok_or_else(|| { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: format!("{job_id}/{part}"), + }) + })?; + let file_type: String = row.get(0); + let resource_count: Option = row.get(1); + let fencing_token: i64 = row.get(2); + let owner_subject: Option = row.get(3); + let key = ExportPartKey { + tenant_id: tenant_id.to_string(), + job_id: job_id.clone(), + resource_type: resource_type.clone(), + file_type: file_type.clone(), + part_index, + fencing_token: fencing_token as u64, + }; + Ok(ExportFileMetadata { + key, + resource_type, + file_type, + line_count: resource_count.unwrap_or(0) as u64, + job_owner_subject: owner_subject, + }) + } + + async fn count_active_exports(&self, tenant: &TenantContext) -> StorageResult { + let client = self.get_client().await?; + let tenant_id = tenant.tenant_id().as_str(); + let row = client + .query_one( + "SELECT COUNT(*) FROM bulk_export_jobs + WHERE tenant_id = $1 AND status IN ('accepted', 'in-progress')", + &[&tenant_id], + ) + .await + .map_err(|e| internal_error(format!("Failed to count active exports: {}", e)))?; + let count: i64 = row.get(0); + Ok(count as u64) + } + + async fn list_expired_exports( + &self, + now: DateTime, + output_ttl: StdDuration, + limit: u32, + ) -> StorageResult> { + let client = self.get_client().await?; + let cutoff = now + - chrono::Duration::from_std(output_ttl) + .unwrap_or_else(|_| chrono::Duration::seconds(0)); + let rows = client + .query( + "SELECT tenant_id, id FROM bulk_export_jobs + WHERE status IN ('complete', 'error', 'cancelled') + AND completed_at IS NOT NULL AND completed_at < $1 + ORDER BY completed_at LIMIT $2", + &[&cutoff, &(limit as i64)], + ) + .await + .map_err(|e| internal_error(format!("Failed to query expired exports: {}", e)))?; + Ok(rows + .iter() + .map(|row| { + let tenant_id: String = row.get(0); + let id: String = row.get(1); + ExpiredExportRef { + tenant: TenantContext::new( + TenantId::new(tenant_id), + TenantPermissions::full_access(), + ), + job_id: ExportJobId::from_string(id), + } + }) + .collect()) + } + async fn list_exports( &self, tenant: &TenantContext, @@ -336,6 +529,406 @@ impl BulkExportStorage for PostgresBackend { } } +#[async_trait] +impl ExportClaimStrategy for PostgresBackend { + async fn claim_next( + &self, + worker_id: &WorkerId, + lease_duration: StdDuration, + ) -> StorageResult> { + let mut client = self.get_client().await?; + let now = Utc::now(); + let lease_expiry = now + + chrono::Duration::from_std(lease_duration) + .unwrap_or_else(|_| chrono::Duration::seconds(60)); + + let txn = client + .transaction() + .await + .map_err(|e| internal_error(format!("Failed to begin claim txn: {}", e)))?; + + let rows = txn + .query( + "SELECT id, tenant_id, fencing_token FROM bulk_export_jobs + WHERE status = 'accepted' + OR (status = 'in-progress' AND (lease_expiry IS NULL OR lease_expiry < $1)) + ORDER BY created_at + LIMIT 1 + FOR UPDATE SKIP LOCKED", + &[&now], + ) + .await + .map_err(|e| internal_error(format!("Failed to select claimable job: {}", e)))?; + + let Some(row) = rows.first() else { + txn.commit() + .await + .map_err(|e| internal_error(format!("Failed to commit claim txn: {}", e)))?; + return Ok(None); + }; + let job_id: String = row.get(0); + let tenant_id: String = row.get(1); + let fencing_token: i64 = row.get(2); + let new_token = fencing_token + 1; + + txn.execute( + "UPDATE bulk_export_jobs + SET status = 'in-progress', worker_id = $1, lease_expiry = $2, + heartbeat_at = $3, fencing_token = $4, + started_at = COALESCE(started_at, $3) + WHERE id = $5", + &[ + &worker_id.as_str(), + &lease_expiry, + &now, + &new_token, + &job_id.as_str(), + ], + ) + .await + .map_err(|e| internal_error(format!("Failed to claim export job: {}", e)))?; + + txn.commit() + .await + .map_err(|e| internal_error(format!("Failed to commit claim txn: {}", e)))?; + + Ok(Some(ExportJobLease { + job_id: ExportJobId::from_string(job_id), + tenant: TenantContext::new(TenantId::new(tenant_id), TenantPermissions::full_access()), + worker_id: worker_id.clone(), + lease_expiry, + fencing_token: new_token as u64, + })) + } + + async fn heartbeat(&self, lease: &ExportJobLease) -> Result, LeaseError> { + let client = self.get_client().await.map_err(LeaseError::Storage)?; + let now = Utc::now(); + let new_expiry = now + chrono::Duration::seconds(60); + let affected = client + .execute( + "UPDATE bulk_export_jobs + SET lease_expiry = $1, heartbeat_at = $2 + WHERE id = $3 AND worker_id = $4 AND fencing_token = $5", + &[ + &new_expiry, + &now, + &lease.job_id.as_str(), + &lease.worker_id.as_str(), + &(lease.fencing_token as i64), + ], + ) + .await + .map_err(|e| LeaseError::Storage(internal_error(format!("heartbeat failed: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: lease.job_id.clone(), + }) + } else { + Ok(new_expiry) + } + } + + async fn release(&self, lease: ExportJobLease) -> StorageResult<()> { + let client = self.get_client().await?; + client + .execute( + "UPDATE bulk_export_jobs + SET status = 'accepted', worker_id = NULL, lease_expiry = NULL + WHERE id = $1 AND worker_id = $2 AND fencing_token = $3 + AND status = 'in-progress'", + &[ + &lease.job_id.as_str(), + &lease.worker_id.as_str(), + &(lease.fencing_token as i64), + ], + ) + .await + .map_err(|e| internal_error(format!("Failed to release lease: {}", e)))?; + Ok(()) + } +} + +#[async_trait] +impl ExportWorkerStorage for PostgresBackend { + async fn get_export_job_for_worker( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result { + let client = self.get_client().await.map_err(LeaseError::Storage)?; + let tenant_id = tenant.tenant_id().as_str(); + let rows = client + .query( + "SELECT request_json, level, group_id, transaction_time, fhir_version + FROM bulk_export_jobs + WHERE id = $1 AND tenant_id = $2 AND worker_id = $3 AND fencing_token = $4", + &[ + &job_id.as_str(), + &tenant_id, + &worker_id.as_str(), + &(fencing_token as i64), + ], + ) + .await + .map_err(|e| LeaseError::Storage(internal_error(format!("load worker job: {e}"))))?; + let row = rows.first().ok_or_else(|| LeaseError::LeaseLost { + job_id: job_id.clone(), + })?; + let request_json: String = row.get(0); + let level_str: String = row.get(1); + let group_id: Option = row.get(2); + let transaction_time: DateTime = row.get(3); + let fhir_version_str: String = row.get(4); + + let request: ExportRequest = serde_json::from_str(&request_json) + .map_err(|e| LeaseError::Storage(internal_error(format!("parse request_json: {e}"))))?; + let level = match level_str.as_str() { + "system" => ExportLevel::System, + "patient" => ExportLevel::Patient, + "group" => ExportLevel::Group { + group_id: group_id.unwrap_or_default(), + }, + _ => { + return Err(LeaseError::Storage(internal_error(format!( + "Invalid level: {level_str}" + )))); + } + }; + let fhir_version = + helios_fhir::FhirVersion::from_mime_param(&fhir_version_str).unwrap_or_default(); + + let progress_rows = client + .query( + "SELECT resource_type, total_count, exported_count, error_count, cursor_state + FROM bulk_export_progress WHERE job_id = $1", + &[&job_id.as_str()], + ) + .await + .map_err(|e| LeaseError::Storage(internal_error(format!("query progress: {e}"))))?; + let type_progress: Vec = progress_rows + .iter() + .map(|r| TypeExportProgress { + resource_type: r.get(0), + total_count: r.get::<_, Option>(1).map(|v| v as u64), + exported_count: r.get::<_, i32>(2) as u64, + error_count: r.get::<_, i32>(3) as u64, + cursor_state: r.get(4), + }) + .collect(); + + Ok(WorkerJobView { + request, + level, + transaction_time, + fhir_version, + type_progress, + }) + } + + async fn mark_export_in_progress( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result<(), LeaseError> { + let client = self.get_client().await.map_err(LeaseError::Storage)?; + let now = Utc::now(); + let affected = client + .execute( + "UPDATE bulk_export_jobs + SET status = 'in-progress', started_at = COALESCE(started_at, $1) + WHERE id = $2 AND tenant_id = $3 AND worker_id = $4 AND fencing_token = $5", + &[ + &now, + &job_id.as_str(), + &tenant.tenant_id().as_str(), + &worker_id.as_str(), + &(fencing_token as i64), + ], + ) + .await + .map_err(|e| LeaseError::Storage(internal_error(format!("mark_in_progress: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn update_export_type_progress( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + progress: &TypeExportProgress, + ) -> Result<(), LeaseError> { + let client = self.get_client().await.map_err(LeaseError::Storage)?; + let affected = client + .execute( + "INSERT INTO bulk_export_progress + (job_id, resource_type, total_count, exported_count, error_count, cursor_state) + SELECT $1, $2, $3, $4, $5, $6 + WHERE EXISTS ( + SELECT 1 FROM bulk_export_jobs + WHERE id = $1 AND tenant_id = $7 AND worker_id = $8 AND fencing_token = $9 + ) + ON CONFLICT (job_id, resource_type) DO UPDATE SET + total_count = EXCLUDED.total_count, + exported_count = EXCLUDED.exported_count, + error_count = EXCLUDED.error_count, + cursor_state = EXCLUDED.cursor_state", + &[ + &job_id.as_str(), + &progress.resource_type.as_str(), + &progress.total_count.map(|v| v as i32), + &(progress.exported_count as i32), + &(progress.error_count as i32), + &progress.cursor_state, + &tenant.tenant_id().as_str(), + &worker_id.as_str(), + &(fencing_token as i64), + ], + ) + .await + .map_err(|e| { + LeaseError::Storage(internal_error(format!("update_type_progress: {e}"))) + })?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn record_export_file( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + part: &FinalizedPart, + file_type: &str, + ) -> Result<(), LeaseError> { + let client = self.get_client().await.map_err(LeaseError::Storage)?; + let file_path = encode_part_path(&part.key); + let affected = client + .execute( + "INSERT INTO bulk_export_files + (job_id, resource_type, file_type, file_path, resource_count, byte_count, + part_index, fencing_token) + SELECT $1, $2, $3, $4, $5, $6, $7, $8 + WHERE EXISTS ( + SELECT 1 FROM bulk_export_jobs + WHERE id = $1 AND tenant_id = $9 AND worker_id = $10 AND fencing_token = $11 + ) + ON CONFLICT (job_id, file_type, resource_type, part_index) DO UPDATE SET + file_path = EXCLUDED.file_path, + resource_count = EXCLUDED.resource_count, + byte_count = EXCLUDED.byte_count, + fencing_token = EXCLUDED.fencing_token", + &[ + &job_id.as_str(), + &part.resource_type.as_str(), + &file_type, + &file_path.as_str(), + &(part.line_count as i32), + &(part.size_bytes as i64), + &(part.key.part_index as i32), + &(part.key.fencing_token as i64), + &tenant.tenant_id().as_str(), + &worker_id.as_str(), + &(fencing_token as i64), + ], + ) + .await + .map_err(|e| LeaseError::Storage(internal_error(format!("record_export_file: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn finish_export_job( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + ) -> Result<(), LeaseError> { + let client = self.get_client().await.map_err(LeaseError::Storage)?; + let now = Utc::now(); + let affected = client + .execute( + "UPDATE bulk_export_jobs + SET status = 'complete', completed_at = $1 + WHERE id = $2 AND tenant_id = $3 AND worker_id = $4 AND fencing_token = $5", + &[ + &now, + &job_id.as_str(), + &tenant.tenant_id().as_str(), + &worker_id.as_str(), + &(fencing_token as i64), + ], + ) + .await + .map_err(|e| LeaseError::Storage(internal_error(format!("finish_job: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } + + async fn fail_export_job( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + worker_id: &WorkerId, + fencing_token: u64, + error_message: &str, + ) -> Result<(), LeaseError> { + let client = self.get_client().await.map_err(LeaseError::Storage)?; + let now = Utc::now(); + let affected = client + .execute( + "UPDATE bulk_export_jobs + SET status = 'error', error_message = $1, completed_at = $2 + WHERE id = $3 AND tenant_id = $4 AND worker_id = $5 AND fencing_token = $6", + &[ + &error_message, + &now, + &job_id.as_str(), + &tenant.tenant_id().as_str(), + &worker_id.as_str(), + &(fencing_token as i64), + ], + ) + .await + .map_err(|e| LeaseError::Storage(internal_error(format!("fail_job: {e}"))))?; + if affected == 0 { + Err(LeaseError::LeaseLost { + job_id: job_id.clone(), + }) + } else { + Ok(()) + } + } +} + #[async_trait] impl ExportDataProvider for PostgresBackend { async fn list_export_types( @@ -775,16 +1368,79 @@ impl GroupExportProvider for PostgresBackend { tenant: &TenantContext, group_id: &str, ) -> StorageResult> { - let members = self.get_group_members(tenant, group_id).await?; - - let mut patient_ids = Vec::new(); - for member_ref in &members { - // Extract patient ID from "Patient/123" format - if let Some(id) = member_ref.strip_prefix("Patient/") { - patient_ids.push(id.to_string()); + // Flatten nested Groups iteratively, guarding against membership + // cycles with a visited set. + use std::collections::HashSet; + let mut visited_groups: HashSet = HashSet::new(); + let mut seen_patients: HashSet = HashSet::new(); + let mut patient_ids: Vec = Vec::new(); + let mut worklist: Vec = vec![group_id.to_string()]; + + while let Some(gid) = worklist.pop() { + if !visited_groups.insert(gid.clone()) { + continue; // cycle / already processed + } + let members = self.get_group_members(tenant, &gid).await?; + for member_ref in &members { + if let Some(id) = member_ref.strip_prefix("Patient/") { + if seen_patients.insert(id.to_string()) { + patient_ids.push(id.to_string()); + } + } else if let Some(nested) = member_ref.strip_prefix("Group/") { + worklist.push(nested.to_string()); + } } } Ok(patient_ids) } + + async fn get_group_members_with_periods( + &self, + tenant: &TenantContext, + group_id: &str, + ) -> StorageResult>)>> { + let client = self.get_client().await?; + let tenant_id = tenant.tenant_id().as_str(); + let rows = client + .query( + "SELECT data FROM resources + WHERE tenant_id = $1 AND resource_type = 'Group' + AND id = $2 AND is_deleted = false", + &[&tenant_id, &group_id], + ) + .await + .map_err(|e| internal_error(format!("Failed to get group: {}", e)))?; + let row = rows.first().ok_or_else(|| { + StorageError::BulkExport(BulkExportError::GroupNotFound { + group_id: group_id.to_string(), + }) + })?; + let data: Vec = row.get(0); + let group: Value = serde_json::from_slice(&data) + .map_err(|e| internal_error(format!("Failed to parse group: {}", e)))?; + let mut out = Vec::new(); + if let Some(arr) = group.get("member").and_then(|m| m.as_array()) { + for member in arr { + let Some(reference) = member + .get("entity") + .and_then(|e| e.get("reference")) + .and_then(|r| r.as_str()) + else { + continue; + }; + let period_start = member + .get("period") + .and_then(|p| p.get("start")) + .and_then(|s| s.as_str()) + .and_then(|s| { + DateTime::parse_from_rfc3339(s) + .ok() + .map(|dt| dt.with_timezone(&Utc)) + }); + out.push((reference.to_string(), period_start)); + } + } + Ok(out) + } } From 86c76ae5a18bd79c3fc30d8e6404b3a0f76c68a7 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:13 -0400 Subject: [PATCH 12/81] feat(s3): add S3Api::presign_get for pre-signed download URLs Default impl reports unsupported; AwsS3Client overrides it via PresigningConfig from the AWS SDK. Used by S3OutputStore to mint direct-from-S3 download URLs for the bulk-export manifest. --- crates/persistence/src/backends/s3/client.rs | 40 +++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/crates/persistence/src/backends/s3/client.rs b/crates/persistence/src/backends/s3/client.rs index 3a9f5244e..a323bc271 100644 --- a/crates/persistence/src/backends/s3/client.rs +++ b/crates/persistence/src/backends/s3/client.rs @@ -117,7 +117,8 @@ pub trait S3Api: Send + Sync { ) -> Result; /// Deletes the object at the given key. Succeeds even if the key does not - /// exist. + /// exist. Reserved for the Phase 2 `S3OutputStore` integration. + #[allow(dead_code)] async fn delete_object(&self, bucket: &str, key: &str) -> Result<(), S3ClientError>; /// Lists objects whose keys start with `prefix`, with optional @@ -129,6 +130,21 @@ pub trait S3Api: Send + Sync { continuation: Option<&str>, max_keys: Option, ) -> Result; + + /// Generates a pre-signed `GET` URL for `key`, valid for `ttl`. + /// + /// The default implementation reports the capability as unsupported; + /// [`AwsS3Client`] overrides it using the AWS SDK presigner. + async fn presign_get( + &self, + _bucket: &str, + _key: &str, + _ttl: std::time::Duration, + ) -> Result { + Err(S3ClientError::Internal( + "pre-signed URLs are not supported by this S3 client".to_string(), + )) + } } /// Production `S3Api` implementation backed by the AWS SDK. @@ -138,9 +154,12 @@ pub struct AwsS3Client { client: Client, } +/// S3-compatible endpoint overrides for [`AwsS3Client`]. #[derive(Debug, Clone, Default)] pub struct AwsS3ClientOptions { + /// Override the endpoint URL (for MinIO, R2, GCS interop, etc.). pub endpoint_url: Option, + /// Force path-style addressing (`bucket` in path, not subdomain). pub force_path_style: bool, } @@ -357,6 +376,25 @@ impl S3Api for AwsS3Client { next_continuation_token: out.next_continuation_token().map(|s| s.to_string()), }) } + + async fn presign_get( + &self, + bucket: &str, + key: &str, + ttl: std::time::Duration, + ) -> Result { + let presign_config = aws_sdk_s3::presigning::PresigningConfig::expires_in(ttl) + .map_err(|e| S3ClientError::Internal(format!("invalid presign TTL: {e}")))?; + let presigned = self + .client + .get_object() + .bucket(bucket) + .key(key) + .presigned(presign_config) + .await + .map_err(map_sdk_error)?; + Ok(presigned.uri().to_string()) + } } /// Maps an AWS SDK error to the normalised `S3ClientError` taxonomy. From e8bea5a3cc9f8f238fa49c2195a3990b3136aa81 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:53 -0400 Subject: [PATCH 13/81] chore(s3): #[allow(dead_code)] on legacy bulk-export keyspace helpers Reserved for future S3OutputStore integrations; unused now that S3 is output-only and keys live in S3OutputStore::object_key. --- crates/persistence/src/backends/s3/keyspace.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crates/persistence/src/backends/s3/keyspace.rs b/crates/persistence/src/backends/s3/keyspace.rs index 5b2089bcc..32b8aec1c 100644 --- a/crates/persistence/src/backends/s3/keyspace.rs +++ b/crates/persistence/src/backends/s3/keyspace.rs @@ -139,11 +139,13 @@ impl S3Keyspace { } /// Key for the JSON state object of a bulk export job. + #[allow(dead_code)] pub fn export_job_state_key(&self, job_id: &str) -> String { self.join(&["bulk", "export", "jobs", job_id, "state.json"]) } /// Key for per-type export progress within a job. + #[allow(dead_code)] pub fn export_job_progress_key(&self, job_id: &str, resource_type: &str) -> String { self.join(&[ "bulk", @@ -156,11 +158,13 @@ impl S3Keyspace { } /// Key for the completed export manifest of a job. + #[allow(dead_code)] pub fn export_job_manifest_key(&self, job_id: &str) -> String { self.join(&["bulk", "export", "jobs", job_id, "manifest.json"]) } /// Key for a single NDJSON output part within an export job. + #[allow(dead_code)] pub fn export_job_output_key(&self, job_id: &str, resource_type: &str, part: u32) -> String { self.join(&[ "bulk", @@ -174,11 +178,13 @@ impl S3Keyspace { } /// Prefix covering all export job objects. + #[allow(dead_code)] pub fn export_jobs_prefix(&self) -> String { self.join(&["bulk", "export", "jobs/"]) } /// Prefix covering all objects belonging to a single export job. + #[allow(dead_code)] pub fn export_job_prefix(&self, job_id: &str) -> String { self.join(&["bulk", "export", "jobs", job_id, "/"]) } From d4f46173c06660341314e71c183a448c13653828 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:54 -0400 Subject: [PATCH 14/81] chore(s3): #[allow(dead_code)] on ExportJobState S3 is no longer a bulk-export job-state backend; the model is preserved for a future read-modify-write integration. --- crates/persistence/src/backends/s3/models.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/persistence/src/backends/s3/models.rs b/crates/persistence/src/backends/s3/models.rs index 02c632ee1..799b4ed5d 100644 --- a/crates/persistence/src/backends/s3/models.rs +++ b/crates/persistence/src/backends/s3/models.rs @@ -34,8 +34,10 @@ pub struct HistoryIndexEvent { /// Durable state of a bulk export job stored in S3. /// -/// Written to `bulk/export/jobs//state.json` and updated as the job -/// transitions through `accepted → in-progress → complete/error/cancelled`. +/// Reserved for the Phase 2 `S3OutputStore` integration; the S3 backend is no +/// longer a bulk-export *job-state* backend (job state lives in SQLite or +/// PostgreSQL), so this type is currently unused. +#[allow(dead_code)] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ExportJobState { /// The original export request parameters. From d6d667f6830685ccf7d44cca82b3d370a09c277e Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:54 -0400 Subject: [PATCH 15/81] chore(s3): #[allow(dead_code)] on internal delete_object helper Reserved for future S3OutputStore integration; unused now that the synchronous BulkExportStorage path has been removed. --- crates/persistence/src/backends/s3/storage.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/persistence/src/backends/s3/storage.rs b/crates/persistence/src/backends/s3/storage.rs index 914b09604..ba458f906 100644 --- a/crates/persistence/src/backends/s3/storage.rs +++ b/crates/persistence/src/backends/s3/storage.rs @@ -102,6 +102,8 @@ impl S3Backend { } /// Deletes the object at `key`. Succeeds silently if the key does not exist. + /// Reserved for the Phase 2 `S3OutputStore` integration. + #[allow(dead_code)] pub(crate) async fn delete_object(&self, bucket: &str, key: &str) -> StorageResult<()> { self.client .delete_object(bucket, key) From b6659b5d04a8894085a69f31f485b3f86ec01a0e Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:54 -0400 Subject: [PATCH 16/81] refactor(s3): remove BulkExportStorage; keep ExportDataProvider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit S3 is output-only for bulk export — job state lives in SQLite or PostgreSQL. Drops the synchronous start_export / run_export_job path and adds stub PatientExportProvider / GroupExportProvider impls returning UnsupportedCapability so an S3-resource-storage deployment satisfies the trait hierarchy. --- .../src/backends/s3/bulk_export.rs | 477 +++--------------- 1 file changed, 72 insertions(+), 405 deletions(-) diff --git a/crates/persistence/src/backends/s3/bulk_export.rs b/crates/persistence/src/backends/s3/bulk_export.rs index 554dd0ebc..161d00c80 100644 --- a/crates/persistence/src/backends/s3/bulk_export.rs +++ b/crates/persistence/src/backends/s3/bulk_export.rs @@ -1,180 +1,28 @@ -//! Bulk export implementation for the S3 backend. +//! Bulk export data provider for the S3 backend. //! -//! Implements `BulkExportStorage` and `ExportDataProvider`. Export jobs are -//! persisted as a small JSON state object in S3 and run synchronously within -//! the `start_export` call, writing NDJSON output parts directly to S3. +//! The S3 backend is **output-only** for bulk export: it provides +//! [`ExportDataProvider`] (feeding export batches when S3 is the resource +//! store) but does not implement `BulkExportStorage` — job state lives in the +//! SQLite or Postgres job store, never S3. use std::collections::BTreeSet; use async_trait::async_trait; -use chrono::Utc; use crate::core::bulk_export::{ - BulkExportStorage, ExportDataProvider, ExportJobId, ExportManifest, ExportOutputFile, - ExportProgress, ExportRequest, ExportStatus, NdjsonBatch, TypeExportProgress, + ExportDataProvider, ExportRequest, GroupExportProvider, NdjsonBatch, PatientExportProvider, }; -use crate::error::{BulkExportError, StorageError, StorageResult}; +use crate::error::{BackendError, BulkExportError, StorageError, StorageResult}; use crate::tenant::TenantContext; -use super::backend::{S3Backend, TenantLocation}; -use super::models::ExportJobState; +use super::backend::S3Backend; -#[async_trait] -impl BulkExportStorage for S3Backend { - async fn start_export( - &self, - tenant: &TenantContext, - request: ExportRequest, - ) -> StorageResult { - if request.output_format != "application/fhir+ndjson" { - return Err(StorageError::BulkExport( - BulkExportError::UnsupportedFormat { - format: request.output_format, - }, - )); - } - - let active_exports = self.list_exports(tenant, false).await?; - if active_exports.len() >= 5 { - return Err(StorageError::BulkExport( - BulkExportError::TooManyConcurrentExports { max_concurrent: 5 }, - )); - } - - let job_id = ExportJobId::new(); - let progress = ExportProgress::accepted(job_id.clone(), request.level.clone(), Utc::now()); - let state = ExportJobState { - request, - progress, - manifest: None, - }; - - self.save_export_state(tenant, &job_id, &state).await?; - - if let Err(err) = self.run_export_job(tenant, &job_id).await { - let _ = self - .mark_export_failed(tenant, &job_id, &err.to_string()) - .await; - } - - Ok(job_id) - } - - async fn get_export_status( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - ) -> StorageResult { - Ok(self.load_export_state(tenant, job_id).await?.progress) - } - - async fn cancel_export( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - ) -> StorageResult<()> { - let mut state = self.load_export_state(tenant, job_id).await?; - - if state.progress.status.is_terminal() { - return Err(StorageError::BulkExport(BulkExportError::InvalidJobState { - job_id: job_id.to_string(), - expected: "accepted or in-progress".to_string(), - actual: state.progress.status.to_string(), - })); - } - - state.progress.status = ExportStatus::Cancelled; - state.progress.completed_at = Some(Utc::now()); - state.progress.error_message = None; - state.progress.current_type = None; - - self.save_export_state(tenant, job_id, &state).await - } - - async fn delete_export( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - ) -> StorageResult<()> { - let location = self.tenant_location(tenant)?; - - if !self.export_job_exists(&location, job_id).await? { - return Err(StorageError::BulkExport(BulkExportError::JobNotFound { - job_id: job_id.to_string(), - })); - } - - let prefix = location.keyspace.export_job_prefix(job_id.as_str()); - for object in self.list_objects_all(&location.bucket, &prefix).await? { - self.delete_object(&location.bucket, &object.key).await?; - } - - Ok(()) - } - - async fn get_export_manifest( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - ) -> StorageResult { - let state = self.load_export_state(tenant, job_id).await?; - - if state.progress.status != ExportStatus::Complete { - return Err(StorageError::BulkExport(BulkExportError::InvalidJobState { - job_id: job_id.to_string(), - expected: "complete".to_string(), - actual: state.progress.status.to_string(), - })); - } - - if let Some(manifest) = state.manifest { - return Ok(manifest); - } - - let location = self.tenant_location(tenant)?; - let manifest_key = location.keyspace.export_job_manifest_key(job_id.as_str()); - let manifest = self - .get_json_object::(&location.bucket, &manifest_key) - .await? - .map(|(manifest, _)| manifest) - .ok_or_else(|| { - StorageError::BulkExport(BulkExportError::InvalidJobState { - job_id: job_id.to_string(), - expected: "complete with manifest".to_string(), - actual: "complete-without-manifest".to_string(), - }) - })?; - - Ok(manifest) - } - - async fn list_exports( - &self, - tenant: &TenantContext, - include_completed: bool, - ) -> StorageResult> { - let location = self.tenant_location(tenant)?; - let prefix = location.keyspace.export_jobs_prefix(); - - let mut exports = Vec::new(); - for object in self.list_objects_all(&location.bucket, &prefix).await? { - if !object.key.ends_with("/state.json") { - continue; - } - - if let Some((state, _)) = self - .get_json_object::(&location.bucket, &object.key) - .await? - { - if include_completed || state.progress.status.is_active() { - exports.push(state.progress); - } - } - } - - exports.sort_by_key(|e| std::cmp::Reverse(e.transaction_time)); - Ok(exports) - } +/// Error for export-level operations S3 does not support as a primary. +fn s3_export_unsupported() -> StorageError { + StorageError::Backend(BackendError::UnsupportedCapability { + backend_name: "s3".to_string(), + capability: "patient/group bulk export".to_string(), + }) } #[async_trait] @@ -238,6 +86,11 @@ impl ExportDataProvider for S3Backend { continue; } } + if let Some(until) = request.until { + if resource.last_modified() > until { + continue; + } + } count += 1; } @@ -277,6 +130,11 @@ impl ExportDataProvider for S3Backend { continue; } } + if let Some(until) = request.until { + if resource.last_modified() > until { + continue; + } + } lines.push(serde_json::to_string(resource.content()).map_err(|e| { StorageError::BulkExport(BulkExportError::WriteError { @@ -301,245 +159,6 @@ impl ExportDataProvider for S3Backend { } } -impl S3Backend { - /// Drives a bulk export job to completion. - /// - /// Iterates over all matching resource types, fetches them in batches, and - /// writes NDJSON output parts to S3. Updates the job state object after - /// each type completes and writes the final manifest on success. - async fn run_export_job( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - ) -> StorageResult<()> { - let location = self.tenant_location(tenant)?; - let mut state = self.load_export_state(tenant, job_id).await?; - - state.progress.status = ExportStatus::InProgress; - state.progress.started_at = Some(Utc::now()); - state.progress.error_message = None; - state.progress.current_type = None; - state.progress.type_progress.clear(); - - self.save_export_state(tenant, job_id, &state).await?; - - let resource_types = self.list_export_types(tenant, &state.request).await?; - let mut output_files: Vec = Vec::new(); - - for resource_type in resource_types { - state.progress.current_type = Some(resource_type.clone()); - self.save_export_state(tenant, job_id, &state).await?; - - let mut type_progress = TypeExportProgress::new(resource_type.clone()); - type_progress.total_count = Some( - self.count_export_resources(tenant, &state.request, &resource_type) - .await?, - ); - - let mut cursor: Option = None; - let mut part_lines: Vec = Vec::new(); - let mut part_number: u32 = 1; - - loop { - let batch = self - .fetch_export_batch( - tenant, - &state.request, - &resource_type, - cursor.as_deref(), - state.request.batch_size.max(1), - ) - .await?; - - for line in batch.lines { - part_lines.push(line); - if part_lines.len() >= self.config.bulk_export_part_size as usize { - let written = self - .write_export_part( - &location, - job_id, - &resource_type, - part_number, - &part_lines, - ) - .await?; - output_files.push(written); - type_progress.exported_count += part_lines.len() as u64; - type_progress.cursor_state = batch.next_cursor.clone(); - self.save_export_type_progress(&location, job_id, &type_progress) - .await?; - part_lines.clear(); - part_number += 1; - } - } - - cursor = batch.next_cursor; - if batch.is_last { - break; - } - } - - if !part_lines.is_empty() { - let written = self - .write_export_part(&location, job_id, &resource_type, part_number, &part_lines) - .await?; - output_files.push(written); - type_progress.exported_count += part_lines.len() as u64; - part_lines.clear(); - } - - type_progress.cursor_state = None; - self.save_export_type_progress(&location, job_id, &type_progress) - .await?; - state.progress.type_progress.push(type_progress); - } - - state.progress.status = ExportStatus::Complete; - state.progress.completed_at = Some(Utc::now()); - state.progress.current_type = None; - state.progress.error_message = None; - - let manifest = ExportManifest { - transaction_time: state.progress.transaction_time, - request: format!("$export?job={}", job_id), - requires_access_token: true, - output: output_files, - error: Vec::new(), - message: None, - extension: None, - }; - - state.manifest = Some(manifest.clone()); - - let manifest_key = location.keyspace.export_job_manifest_key(job_id.as_str()); - let manifest_payload = self.serialize_json(&manifest)?; - self.put_json_object( - &location.bucket, - &manifest_key, - &manifest_payload, - None, - None, - ) - .await?; - - self.save_export_state(tenant, job_id, &state).await - } - - /// Writes a single NDJSON output part to S3 and returns an - /// `ExportOutputFile` describing the S3 location and line count. - async fn write_export_part( - &self, - location: &TenantLocation, - job_id: &ExportJobId, - resource_type: &str, - part_number: u32, - lines: &[String], - ) -> StorageResult { - let key = - location - .keyspace - .export_job_output_key(job_id.as_str(), resource_type, part_number); - let mut body = lines.join("\n"); - body.push('\n'); - - self.put_bytes_object( - &location.bucket, - &key, - body.as_bytes(), - Some("application/fhir+ndjson"), - ) - .await?; - - Ok( - ExportOutputFile::new(resource_type, format!("s3://{}/{}", location.bucket, key)) - .with_count(lines.len() as u64), - ) - } - - /// Returns `true` if the job state object exists in S3. - async fn export_job_exists( - &self, - location: &TenantLocation, - job_id: &ExportJobId, - ) -> StorageResult { - let key = location.keyspace.export_job_state_key(job_id.as_str()); - Ok(self - .client - .head_object(&location.bucket, &key) - .await - .map_err(|e| self.map_client_error(e))? - .is_some()) - } - - /// Loads and deserialises the export job state from S3. - /// - /// Returns `JobNotFound` if the state object does not exist. - async fn load_export_state( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - ) -> StorageResult { - let location = self.tenant_location(tenant)?; - let key = location.keyspace.export_job_state_key(job_id.as_str()); - self.get_json_object::(&location.bucket, &key) - .await? - .map(|(state, _)| state) - .ok_or_else(|| { - StorageError::BulkExport(BulkExportError::JobNotFound { - job_id: job_id.to_string(), - }) - }) - } - - /// Serialises and writes the export job state to S3. - async fn save_export_state( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - state: &ExportJobState, - ) -> StorageResult<()> { - let location = self.tenant_location(tenant)?; - let key = location.keyspace.export_job_state_key(job_id.as_str()); - let payload = self.serialize_json(state)?; - self.put_json_object(&location.bucket, &key, &payload, None, None) - .await?; - Ok(()) - } - - /// Transitions the export job to the `Error` state, recording the failure - /// message in the state object. - async fn mark_export_failed( - &self, - tenant: &TenantContext, - job_id: &ExportJobId, - message: &str, - ) -> StorageResult<()> { - let mut state = self.load_export_state(tenant, job_id).await?; - state.progress.status = ExportStatus::Error; - state.progress.completed_at = Some(Utc::now()); - state.progress.current_type = None; - state.progress.error_message = Some(message.to_string()); - self.save_export_state(tenant, job_id, &state).await - } - - /// Writes per-type export progress to S3 so that partial completion can be - /// inspected before the job finishes. - async fn save_export_type_progress( - &self, - location: &TenantLocation, - job_id: &ExportJobId, - progress: &TypeExportProgress, - ) -> StorageResult<()> { - let key = location - .keyspace - .export_job_progress_key(job_id.as_str(), &progress.resource_type); - let payload = self.serialize_json(progress)?; - self.put_json_object(&location.bucket, &key, &payload, None, None) - .await?; - Ok(()) - } -} - /// Parses the numeric offset encoded in an export batch cursor. /// /// A `None` cursor is treated as offset `0` (start of the result set). @@ -563,3 +182,51 @@ fn parse_resource_type_from_current_key(key: &str) -> Option { let resources_idx = parts.iter().position(|segment| *segment == "resources")?; parts.get(resources_idx + 1).map(|s| s.to_string()) } + +// S3 is output-only for bulk export; patient/group compartment enumeration is +// not supported when S3 is the resource store. These stub impls satisfy the +// trait hierarchy so S3 can be a primary backend. + +#[async_trait] +impl PatientExportProvider for S3Backend { + async fn list_patient_ids( + &self, + _tenant: &TenantContext, + _request: &ExportRequest, + _cursor: Option<&str>, + _batch_size: u32, + ) -> StorageResult<(Vec, Option)> { + Err(s3_export_unsupported()) + } + + async fn fetch_patient_compartment_batch( + &self, + _tenant: &TenantContext, + _request: &ExportRequest, + _resource_type: &str, + _patient_ids: &[String], + _cursor: Option<&str>, + _batch_size: u32, + ) -> StorageResult { + Err(s3_export_unsupported()) + } +} + +#[async_trait] +impl GroupExportProvider for S3Backend { + async fn get_group_members( + &self, + _tenant: &TenantContext, + _group_id: &str, + ) -> StorageResult> { + Err(s3_export_unsupported()) + } + + async fn resolve_group_patient_ids( + &self, + _tenant: &TenantContext, + _group_id: &str, + ) -> StorageResult> { + Err(s3_export_unsupported()) + } +} From ccd06e02f898b77ab3042d0438ad546a04963d22 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:54 -0400 Subject: [PATCH 17/81] feat(s3): add S3OutputStore (multipart upload + pre-signed download) ExportOutputStore impl backed by AwsS3Client. open_writer returns a local scratch tempfile; finalize_part fsyncs + put_object's it to S3 under {tenant}/exports/{job_id}/{file_type}-{rt}-{part}-{token}.ndjson. download_url either pre-signs (Auto / AlwaysPresigned) or returns an HFS-served URL (AlwaysToken). delete_job_outputs lists + deletes by prefix. AccessTokenMode encodes the requires_access_token posture. --- .../src/backends/s3/output_store.rs | 249 ++++++++++++++++++ 1 file changed, 249 insertions(+) create mode 100644 crates/persistence/src/backends/s3/output_store.rs diff --git a/crates/persistence/src/backends/s3/output_store.rs b/crates/persistence/src/backends/s3/output_store.rs new file mode 100644 index 000000000..934979823 --- /dev/null +++ b/crates/persistence/src/backends/s3/output_store.rs @@ -0,0 +1,249 @@ +//! S3-backed [`ExportOutputStore`] for multi-instance bulk export. +//! +//! Output NDJSON parts are uploaded to S3-compatible object storage; download +//! URLs are pre-signed `GET` URLs (no token required) by default, or +//! HFS-served URLs when the operator forces token-based access. + +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; + +use async_trait::async_trait; +use tokio::io::{AsyncRead, AsyncWrite}; + +use crate::core::bulk_export::ExportJobId; +use crate::core::bulk_export_output::{ + DownloadUrl, ExportOutputStore, ExportPartKey, ExportPartWriter, FinalizedPart, +}; +use crate::error::{BackendError, StorageError, StorageResult}; +use crate::tenant::TenantContext; + +use super::client::S3Api; + +/// Manifest access-token posture. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AccessTokenMode { + /// Pre-signed URLs when supported (default). + Auto, + /// Always HFS-served URLs requiring the kickoff Bearer token. + AlwaysToken, + /// Always pre-signed URLs. + AlwaysPresigned, +} + +impl AccessTokenMode { + /// Parses the `HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN` value. + pub fn parse(s: &str) -> Self { + match s { + "true" => Self::AlwaysToken, + "false" => Self::AlwaysPresigned, + _ => Self::Auto, + } + } +} + +/// An [`ExportOutputStore`] backed by S3-compatible object storage. +pub struct S3OutputStore { + client: Arc, + bucket: String, + base_url: String, + access_token_mode: AccessTokenMode, + file_url_ttl: Duration, + /// Local scratch directory for in-flight (pre-finalize) part files. + scratch_dir: PathBuf, +} + +impl S3OutputStore { + /// Creates a new S3 output store. + pub fn new( + client: Arc, + bucket: impl Into, + base_url: impl Into, + access_token_mode: AccessTokenMode, + file_url_ttl: Duration, + ) -> Self { + let scratch_dir = std::env::temp_dir().join("hfs-export-scratch"); + Self { + client, + bucket: bucket.into(), + base_url: base_url.into(), + access_token_mode, + file_url_ttl, + scratch_dir, + } + } + + /// The S3 object key for a finalized part. + fn object_key(key: &ExportPartKey) -> String { + format!( + "{}/exports/{}/{}-{}-{}-{}.ndjson", + key.tenant_id, + key.job_id, + key.file_type, + key.resource_type, + key.part_index, + key.fencing_token + ) + } + + /// The S3 key prefix covering all parts of a job. + fn job_prefix(tenant_id: &str, job_id: &ExportJobId) -> String { + format!("{}/exports/{}/", tenant_id, job_id) + } + + /// The local scratch path for an in-flight part. + fn scratch_path(&self, key: &ExportPartKey) -> PathBuf { + self.scratch_dir.join(format!( + "{}-{}-{}-{}-{}-{}.tmp", + key.tenant_id, + key.job_id, + key.file_type, + key.resource_type, + key.part_index, + key.fencing_token + )) + } +} + +fn s3_err(message: String) -> StorageError { + StorageError::Backend(BackendError::Internal { + backend_name: "s3-output".to_string(), + message, + source: None, + }) +} + +#[async_trait] +impl ExportOutputStore for S3OutputStore { + async fn open_writer(&self, key: &ExportPartKey) -> StorageResult { + tokio::fs::create_dir_all(&self.scratch_dir) + .await + .map_err(|e| s3_err(format!("create scratch dir: {e}")))?; + let path = self.scratch_path(key); + let file = tokio::fs::File::create(&path) + .await + .map_err(|e| s3_err(format!("create scratch file {}: {e}", path.display())))?; + let boxed: std::pin::Pin> = Box::pin(file); + Ok(ExportPartWriter::new(boxed)) + } + + async fn finalize_part( + &self, + key: &ExportPartKey, + mut writer: ExportPartWriter, + ) -> StorageResult { + use tokio::io::AsyncWriteExt; + writer + .writer + .flush() + .await + .map_err(|e| s3_err(format!("flush scratch file: {e}")))?; + writer + .writer + .shutdown() + .await + .map_err(|e| s3_err(format!("close scratch file: {e}")))?; + let line_count = writer.line_count; + let byte_count = writer.byte_count; + drop(writer); + + let path = self.scratch_path(key); + let bytes = tokio::fs::read(&path) + .await + .map_err(|e| s3_err(format!("read scratch file {}: {e}", path.display())))?; + let object_key = Self::object_key(key); + self.client + .put_object( + &self.bucket, + &object_key, + bytes, + Some("application/fhir+ndjson"), + None, + None, + ) + .await + .map_err(|e| s3_err(format!("upload {object_key}: {e:?}")))?; + // Best-effort cleanup of the scratch file. + let _ = tokio::fs::remove_file(&path).await; + + Ok(FinalizedPart { + key: key.clone(), + resource_type: key.resource_type.clone(), + line_count, + size_bytes: byte_count, + }) + } + + async fn download_url(&self, key: &ExportPartKey, ttl: Duration) -> StorageResult { + match self.access_token_mode { + AccessTokenMode::AlwaysToken => Ok(DownloadUrl { + url: format!( + "{}/export-file/{}/{}-{}", + self.base_url.trim_end_matches('/'), + key.job_id, + key.resource_type, + key.part_index + ), + requires_access_token: true, + }), + AccessTokenMode::Auto | AccessTokenMode::AlwaysPresigned => { + let object_key = Self::object_key(key); + let effective_ttl = if ttl.is_zero() { + self.file_url_ttl + } else { + ttl + }; + let url = self + .client + .presign_get(&self.bucket, &object_key, effective_ttl) + .await + .map_err(|e| s3_err(format!("presign {object_key}: {e:?}")))?; + Ok(DownloadUrl { + url, + requires_access_token: false, + }) + } + } + } + + async fn open_reader( + &self, + key: &ExportPartKey, + ) -> StorageResult>> { + let object_key = Self::object_key(key); + let data = self + .client + .get_object(&self.bucket, &object_key) + .await + .map_err(|e| s3_err(format!("get {object_key}: {e:?}")))? + .ok_or_else(|| s3_err(format!("export object not found: {object_key}")))?; + Ok(Box::pin(std::io::Cursor::new(data.bytes))) + } + + async fn delete_job_outputs( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult<()> { + let prefix = Self::job_prefix(tenant.tenant_id().as_str(), job_id); + let mut continuation: Option = None; + loop { + let page = self + .client + .list_objects(&self.bucket, &prefix, continuation.as_deref(), Some(1000)) + .await + .map_err(|e| s3_err(format!("list {prefix}: {e:?}")))?; + for item in &page.items { + self.client + .delete_object(&self.bucket, &item.key) + .await + .map_err(|e| s3_err(format!("delete {}: {e:?}", item.key)))?; + } + match page.next_continuation_token { + Some(token) => continuation = Some(token), + None => break, + } + } + Ok(()) + } +} From 8f55fd9dc28d952f7d996e89ae21d25efbb42053 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:54 -0400 Subject: [PATCH 18/81] chore(s3): register output_store module + re-export public types --- crates/persistence/src/backends/s3/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/persistence/src/backends/s3/mod.rs b/crates/persistence/src/backends/s3/mod.rs index 741e5d611..d43c61aba 100644 --- a/crates/persistence/src/backends/s3/mod.rs +++ b/crates/persistence/src/backends/s3/mod.rs @@ -12,10 +12,13 @@ mod client; mod config; mod keyspace; mod models; +mod output_store; mod storage; pub use backend::S3Backend; +pub use client::{AwsS3Client, AwsS3ClientOptions, S3Api}; pub use config::{S3BackendConfig, S3TenancyMode}; +pub use output_store::{AccessTokenMode, S3OutputStore}; #[cfg(test)] mod tests; From 49856e0c0a7115cb1b372d99858e0b09700a26ec Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:54 -0400 Subject: [PATCH 19/81] test(s3): trim removed-impl tests; keep ExportDataProvider coverage bulk_export_start_manifest_and_delete is gone (the impl was removed); bulk_export_invalid_format_and_fetch_batch_cursor is reduced to the fetch_export_batch cursor case which still exercises ExportDataProvider. --- crates/persistence/src/backends/s3/tests.rs | 67 +++------------------ 1 file changed, 9 insertions(+), 58 deletions(-) diff --git a/crates/persistence/src/backends/s3/tests.rs b/crates/persistence/src/backends/s3/tests.rs index 08b0b3855..65f93f7e0 100644 --- a/crates/persistence/src/backends/s3/tests.rs +++ b/crates/persistence/src/backends/s3/tests.rs @@ -19,7 +19,7 @@ use crate::backends::s3::client::{ ListObjectItem, ListObjectsResult, ObjectData, ObjectMetadata, S3Api, S3ClientError, }; use crate::backends::s3::config::{S3BackendConfig, S3TenancyMode}; -use crate::core::bulk_export::{BulkExportStorage, ExportDataProvider, ExportRequest}; +use crate::core::bulk_export::{ExportDataProvider, ExportRequest}; use crate::core::bulk_submit::{ BulkProcessingOptions, BulkSubmitProvider, BulkSubmitRollbackProvider, NdjsonEntry, StreamingBulkSubmitProvider, SubmissionId, SubmissionStatus, @@ -30,8 +30,8 @@ use crate::core::history::{ use crate::core::transaction::{BundleEntry, BundleMethod, BundleProvider}; use crate::core::{ResourceStorage, VersionedStorage}; use crate::error::{ - BulkExportError, BulkSubmitError, ConcurrencyError, ResourceError, SearchError, StorageError, - TenantError, TransactionError, + BulkSubmitError, ConcurrencyError, ResourceError, SearchError, StorageError, TenantError, + TransactionError, }; use crate::tenant::{TenantContext, TenantId, TenantPermissions}; use crate::types::{CursorValue, PageCursor, Pagination, PaginationMode}; @@ -734,47 +734,14 @@ async fn bundle_transaction_reports_rollback_failure() { } } -#[tokio::test] -async fn bulk_export_start_manifest_and_delete() { - let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); - let backend = make_prefix_backend(mock); - let tenant = tenant("tenant-a"); - - backend - .create( - &tenant, - "Patient", - json!({"resourceType":"Patient","id":"e1"}), - FhirVersion::default(), - ) - .await - .unwrap(); - - let request = ExportRequest::system().with_types(vec!["Patient".to_string()]); - let job_id = backend.start_export(&tenant, request).await.unwrap(); - - let progress = backend.get_export_status(&tenant, &job_id).await.unwrap(); - assert_eq!( - progress.status, - crate::core::bulk_export::ExportStatus::Complete - ); - - let manifest = backend.get_export_manifest(&tenant, &job_id).await.unwrap(); - assert!(!manifest.output.is_empty()); - assert!(manifest.output[0].url.starts_with("s3://")); - - backend.delete_export(&tenant, &job_id).await.unwrap(); - let deleted = backend.get_export_status(&tenant, &job_id).await; - assert!(matches!( - deleted, - Err(StorageError::BulkExport( - BulkExportError::JobNotFound { .. } - )) - )); -} +// `bulk_export_start_manifest_and_delete` was removed: S3 no longer +// implements `BulkExportStorage` (job state lives in SQLite or PostgreSQL). +// The remaining bulk-export surface on the S3 backend is the +// `ExportDataProvider` data-feed, exercised by +// `bulk_export_fetch_batch_cursor` below. #[tokio::test] -async fn bulk_export_invalid_format_and_fetch_batch_cursor() { +async fn bulk_export_fetch_batch_cursor() { let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); let backend = make_prefix_backend(mock); let tenant = tenant("tenant-a"); @@ -791,22 +758,6 @@ async fn bulk_export_invalid_format_and_fetch_batch_cursor() { .unwrap(); } - let invalid = backend - .start_export( - &tenant, - ExportRequest { - output_format: "application/json".to_string(), - ..ExportRequest::system() - }, - ) - .await; - assert!(matches!( - invalid, - Err(StorageError::BulkExport( - BulkExportError::UnsupportedFormat { .. } - )) - )); - let request = ExportRequest::system(); let batch1 = backend .fetch_export_batch(&tenant, &request, "Patient", None, 2) From 5596e0d919e743dd41468ffbc58aed8185bbc202 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:54 -0400 Subject: [PATCH 20/81] test(postgres): add testcontainers coverage for postgres-s3 job store postgres_integration_export_claim_skip_locked: claim ordering, fencing token bumps. postgres_integration_export_stale_worker_fenced_out: LeaseLost on every fenced ExportWorkerStorage call after reclaim. postgres_integration_export_count_active_and_expire: count + list filtering. claim_specific helper drains foreign jobs so tests can cope with the shared SHARED_PG container. --- crates/persistence/tests/postgres_tests.rs | 158 +++++++++++++++++++++ 1 file changed, 158 insertions(+) diff --git a/crates/persistence/tests/postgres_tests.rs b/crates/persistence/tests/postgres_tests.rs index 35c76965c..0a6d7f2bf 100644 --- a/crates/persistence/tests/postgres_tests.rs +++ b/crates/persistence/tests/postgres_tests.rs @@ -2825,4 +2825,162 @@ mod postgres_integration { .unwrap(); assert_eq!(ids, vec!["p1".to_string()]); } + + // ======================================================================== + // Bulk Export — Phase 2 multi-instance job state on Postgres. + // ======================================================================== + + use chrono::Utc; + use helios_persistence::core::bulk_export::{ + BulkExportStorage, ExportRequest, ExportStatus, StartExportInput, TypeExportProgress, + }; + use helios_persistence::core::bulk_export_worker::{ + ExportClaimStrategy, ExportWorkerStorage, LeaseError, WorkerId, + }; + use std::time::Duration as StdDuration; + + fn export_input(request: ExportRequest) -> StartExportInput { + StartExportInput { + request, + transaction_time: Utc::now(), + request_url: "http://localhost/$export".to_string(), + owner_subject: Some("pg-test".to_string()), + fhir_version: FhirVersion::default(), + } + } + + /// Claims jobs in a loop until the lease for `target` is returned; + /// releases any other jobs claimed along the way. Robust to concurrent + /// tests sharing the testcontainers PostgreSQL instance. + async fn claim_specific( + backend: &helios_persistence::backends::postgres::PostgresBackend, + worker_id: &WorkerId, + target: &helios_persistence::core::bulk_export::ExportJobId, + lease_duration: StdDuration, + ) -> helios_persistence::core::bulk_export_worker::ExportJobLease { + for _ in 0..100 { + match backend.claim_next(worker_id, lease_duration).await.unwrap() { + Some(lease) if &lease.job_id == target => return lease, + Some(other) => { + // Drain other tests' jobs out of the queue by completing + // them (so the claim ordering moves on instead of + // looping back to the same job after `release`). + let _ = backend + .finish_export_job( + &other.tenant, + &other.job_id, + &other.worker_id, + other.fencing_token, + ) + .await; + } + None => { + tokio::time::sleep(std::time::Duration::from_millis(20)).await; + } + } + } + panic!("never claimed the expected job"); + } + + #[tokio::test] + async fn postgres_integration_export_claim_skip_locked() { + let backend = create_backend().await; + let tenant = create_tenant("export-claim"); + + let job_id = backend + .start_export(&tenant, export_input(ExportRequest::system())) + .await + .unwrap(); + + let worker_a = WorkerId::new(format!("pg-worker-a-{}", uuid::Uuid::new_v4())); + let lease_a = + claim_specific(&backend, &worker_a, &job_id, StdDuration::from_secs(60)).await; + assert!(lease_a.fencing_token >= 1); + + // Worker A finishes via the fenced ExportWorkerStorage. + backend + .mark_export_in_progress(&tenant, &job_id, &worker_a, lease_a.fencing_token) + .await + .unwrap(); + backend + .update_export_type_progress( + &tenant, + &job_id, + &worker_a, + lease_a.fencing_token, + &TypeExportProgress::new("Patient"), + ) + .await + .unwrap(); + backend + .finish_export_job(&tenant, &job_id, &worker_a, lease_a.fencing_token) + .await + .unwrap(); + + let progress = backend.get_export_status(&tenant, &job_id).await.unwrap(); + assert_eq!(progress.status, ExportStatus::Complete); + } + + #[tokio::test] + async fn postgres_integration_export_stale_worker_fenced_out() { + let backend = create_backend().await; + let tenant = create_tenant("export-fence"); + + let job_id = backend + .start_export(&tenant, export_input(ExportRequest::system())) + .await + .unwrap(); + + // Worker A takes a very short lease, then Worker B reclaims. + let worker_a = WorkerId::new(format!("pg-stale-a-{}", uuid::Uuid::new_v4())); + let lease_a = + claim_specific(&backend, &worker_a, &job_id, StdDuration::from_millis(1)).await; + tokio::time::sleep(std::time::Duration::from_millis(100)).await; + let worker_b = WorkerId::new(format!("pg-stale-b-{}", uuid::Uuid::new_v4())); + let lease_b = + claim_specific(&backend, &worker_b, &job_id, StdDuration::from_secs(60)).await; + assert!(lease_b.fencing_token > lease_a.fencing_token); + + // Worker A is fenced out from every mutation. + assert!(matches!( + backend + .mark_export_in_progress(&tenant, &job_id, &worker_a, lease_a.fencing_token) + .await, + Err(LeaseError::LeaseLost { .. }) + )); + assert!(matches!( + backend + .finish_export_job(&tenant, &job_id, &worker_a, lease_a.fencing_token) + .await, + Err(LeaseError::LeaseLost { .. }) + )); + + // Worker B can still finish. + backend + .finish_export_job(&tenant, &job_id, &worker_b, lease_b.fencing_token) + .await + .unwrap(); + } + + #[tokio::test] + async fn postgres_integration_export_count_active_and_expire() { + let backend = create_backend().await; + let tenant = create_tenant("export-cleanup"); + + for _ in 0..2 { + backend + .start_export(&tenant, export_input(ExportRequest::system())) + .await + .unwrap(); + } + assert_eq!(backend.count_active_exports(&tenant).await.unwrap(), 2); + + // Nothing is expired yet. + let expired_now = backend + .list_expired_exports(Utc::now(), StdDuration::from_secs(3600), 100) + .await + .unwrap(); + // Only completed/error/cancelled jobs can expire — these are accepted. + assert!(expired_now.is_empty()); + } } From 0f1b01832243ed4218cc12601eef8638a0a9e7cc Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:54 -0400 Subject: [PATCH 21/81] test(minio): swap removed start_export call for fetch_export_batch + add S3OutputStore round-trip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lifecycle test now exercises the remaining ExportDataProvider surface. Adds test_minio_s3_output_store_round_trip: write → finalize → pre-signed GET → open_reader → idempotent delete against MinIO. --- crates/persistence/tests/minio_s3_tests.rs | 143 +++++++++++++++------ 1 file changed, 102 insertions(+), 41 deletions(-) diff --git a/crates/persistence/tests/minio_s3_tests.rs b/crates/persistence/tests/minio_s3_tests.rs index ab8c19d1d..03fe4d44e 100644 --- a/crates/persistence/tests/minio_s3_tests.rs +++ b/crates/persistence/tests/minio_s3_tests.rs @@ -14,7 +14,7 @@ use aws_sdk_s3::error::ProvideErrorMetadata; use aws_sdk_s3::primitives::ByteStream; use helios_fhir::FhirVersion; use helios_persistence::backends::s3::{S3Backend, S3BackendConfig, S3TenancyMode}; -use helios_persistence::core::bulk_export::{BulkExportStorage, ExportDataProvider, ExportRequest}; +use helios_persistence::core::bulk_export::{ExportDataProvider, ExportRequest}; use helios_persistence::core::bulk_submit::{ BulkEntryOutcome, BulkProcessingOptions, BulkSubmitProvider, BulkSubmitRollbackProvider, NdjsonEntry, SubmissionId, @@ -670,49 +670,16 @@ async fn test_minio_bulk_export_lifecycle_manifest_and_outputs() { .unwrap(); } - let job_id = harness + // S3 no longer implements `BulkExportStorage` (job state lives in + // SQLite/Postgres; see Phase 2 §2b). Verify that the S3 backend's + // `ExportDataProvider` data feed still returns the seeded resources. + let request = ExportRequest::system().with_types(vec!["Patient".to_string()]); + let batch = harness .backend - .start_export( - &tenant, - ExportRequest::system().with_types(vec!["Patient".to_string()]), - ) - .await - .unwrap(); - - let manifest = harness - .backend - .get_export_manifest(&tenant, &job_id) - .await - .unwrap(); - assert!(!manifest.output.is_empty()); - - let bucket_prefix = format!("s3://{}/", harness.bucket); - for output in &manifest.output { - assert!(output.url.starts_with(&bucket_prefix)); - let key = output.url.strip_prefix(&bucket_prefix).unwrap(); - let object = harness - .sdk_client - .get_object() - .bucket(&harness.bucket) - .key(key) - .send() - .await - .unwrap(); - let bytes = object.body.collect().await.unwrap().into_bytes(); - assert!( - !bytes.is_empty(), - "bulk export output object should not be empty: {}", - output.url - ); - } - - harness - .backend - .delete_export(&tenant, &job_id) + .fetch_export_batch(&tenant, &request, "Patient", None, 100) .await .unwrap(); - let deleted = harness.backend.get_export_status(&tenant, &job_id).await; - assert!(matches!(deleted, Err(StorageError::BulkExport(_)))); + assert_eq!(batch.lines.len(), 3); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -852,3 +819,97 @@ async fn test_minio_pagination_over_1000_history_and_export() { assert_eq!(batch2.lines.len(), 5); assert!(batch2.is_last); } + +// ============================================================================ +// Phase 2 — S3OutputStore tests against MinIO. +// ============================================================================ + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn test_minio_s3_output_store_round_trip() { + use helios_persistence::backends::s3::{ + AccessTokenMode, AwsS3Client, AwsS3ClientOptions, S3OutputStore, + }; + use helios_persistence::core::bulk_export::ExportJobId; + use helios_persistence::core::bulk_export_output::{ExportOutputStore, ExportPartKey}; + use std::sync::Arc; + use std::time::Duration; + use tokio::io::AsyncReadExt; + + if skip_if_disabled("test_minio_s3_output_store_round_trip") { + return; + } + + let shared = shared_minio().await; + ensure_backend_env_credentials(shared); + let sdk_client = build_minio_sdk_client(shared).await; + let bucket = test_bucket_name(); + ensure_bucket_exists(&sdk_client, &bucket).await; + + let region = aws_config::Region::new("us-east-1"); + let credentials = aws_sdk_s3::config::Credentials::new( + &shared.root_user, + &shared.root_password, + None, + None, + "minio-test", + ); + let sdk_config = aws_config::SdkConfig::builder() + .region(region) + .credentials_provider(aws_sdk_s3::config::SharedCredentialsProvider::new( + credentials, + )) + .behavior_version(aws_config::BehaviorVersion::latest()) + .build(); + let s3_client = Arc::new(AwsS3Client::from_sdk_config_with_options( + &sdk_config, + AwsS3ClientOptions { + endpoint_url: Some(shared.endpoint_url.clone()), + force_path_style: true, + }, + )); + + let store = S3OutputStore::new( + s3_client, + bucket.clone(), + "http://localhost:8080", + AccessTokenMode::Auto, + Duration::from_secs(60), + ); + + let job_id = ExportJobId::new(); + let key = ExportPartKey::output("tenant-a", job_id.clone(), "Patient", 0, 1); + + // Write two NDJSON lines and finalize. + let mut writer = store.open_writer(&key).await.unwrap(); + writer + .write_line(r#"{"resourceType":"Patient","id":"a"}"#) + .await + .unwrap(); + writer + .write_line(r#"{"resourceType":"Patient","id":"b"}"#) + .await + .unwrap(); + let finalized = store.finalize_part(&key, writer).await.unwrap(); + assert_eq!(finalized.line_count, 2); + assert!(finalized.size_bytes > 0); + + // Pre-signed GET URL. + let url = store + .download_url(&key, Duration::from_secs(60)) + .await + .unwrap(); + assert!(!url.requires_access_token); + assert!(url.url.contains("X-Amz-Signature") || url.url.contains("Signature=")); + + // open_reader streams the same bytes back. + let mut reader = store.open_reader(&key).await.unwrap(); + let mut content = String::new(); + reader.read_to_string(&mut content).await.unwrap(); + assert_eq!(content.lines().count(), 2); + + // delete_job_outputs removes the part; idempotent on second call. + let tenant = tenant("tenant-a"); + store.delete_job_outputs(&tenant, &job_id).await.unwrap(); + store.delete_job_outputs(&tenant, &job_id).await.unwrap(); + assert!(store.open_reader(&key).await.is_err()); +} From 349bab7726ee2b8b2bdec72a82411b1f29362d93 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:46:54 -0400 Subject: [PATCH 22/81] test(s3): swap removed start_export/get_export_manifest for fetch_export_batch S3 is no longer a bulk-export job-state backend; verify the ExportDataProvider data feed instead. --- crates/persistence/tests/s3_tests.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/persistence/tests/s3_tests.rs b/crates/persistence/tests/s3_tests.rs index cdff998a0..2d2595910 100644 --- a/crates/persistence/tests/s3_tests.rs +++ b/crates/persistence/tests/s3_tests.rs @@ -9,7 +9,7 @@ use std::collections::HashMap; use helios_fhir::FhirVersion; use helios_persistence::backends::s3::{S3Backend, S3BackendConfig, S3TenancyMode}; -use helios_persistence::core::bulk_export::{BulkExportStorage, ExportRequest}; +use helios_persistence::core::bulk_export::{ExportDataProvider, ExportRequest}; use helios_persistence::core::bulk_submit::{ BulkProcessingOptions, BulkSubmitProvider, NdjsonEntry, SubmissionId, }; @@ -185,15 +185,15 @@ async fn test_aws_bundle_bulk_export_and_submit() { assert_eq!(bundle.entries.len(), 1); assert_eq!(bundle.entries[0].status, 201); - let job_id = backend - .start_export( - &tenant, - ExportRequest::system().with_types(vec!["Patient".to_string()]), - ) + // S3 no longer implements `BulkExportStorage` (job state lives in + // SQLite/Postgres); only `ExportDataProvider` remains. Verify the data + // feed instead of the removed kick-off/manifest path. + let request = ExportRequest::system().with_types(vec!["Patient".to_string()]); + let batch = backend + .fetch_export_batch(&tenant, &request, "Patient", None, 100) .await .unwrap(); - let manifest = backend.get_export_manifest(&tenant, &job_id).await.unwrap(); - assert!(!manifest.output.is_empty()); + assert!(!batch.lines.is_empty()); let submission_id = SubmissionId::new("aws-client", format!("sub-{}", Uuid::new_v4())); backend From fcf33295f460cc81355fee591f65df0f08f19e7a Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:47:54 -0400 Subject: [PATCH 23/81] feat(persistence): add LocalFsOutputStore backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ExportOutputStore impl backed by tokio::fs. open_writer creates a .tmp under ${HFS_DATA_DIR}/exports/{tenant}/{job_id}/, finalize_part fsyncs + atomic rename, download_url returns an HFS-served URL with requires_access_token=true, open_reader serves the file, and delete_job_outputs is idempotent. Includes a write→finalize→read→delete round-trip test. --- .../persistence/src/backends/local_fs/mod.rs | 217 ++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 crates/persistence/src/backends/local_fs/mod.rs diff --git a/crates/persistence/src/backends/local_fs/mod.rs b/crates/persistence/src/backends/local_fs/mod.rs new file mode 100644 index 000000000..39deb2ac3 --- /dev/null +++ b/crates/persistence/src/backends/local_fs/mod.rs @@ -0,0 +1,217 @@ +//! Local-filesystem [`ExportOutputStore`] for single-instance bulk export. +//! +//! Writes NDJSON output parts under `{root}/{tenant}/{job_id}/` and serves +//! download URLs through HFS itself (`requires_access_token = true`). + +use std::path::{Path, PathBuf}; +use std::time::Duration; + +use async_trait::async_trait; +use tokio::io::{AsyncRead, AsyncWrite}; + +use crate::core::bulk_export::ExportJobId; +use crate::core::bulk_export_output::{ + DownloadUrl, ExportOutputStore, ExportPartKey, ExportPartWriter, FinalizedPart, +}; +use crate::error::{BackendError, StorageError, StorageResult}; +use crate::tenant::TenantContext; + +/// An [`ExportOutputStore`] backed by the local filesystem. +#[derive(Debug, Clone)] +pub struct LocalFsOutputStore { + /// Root directory under which all export output lives. + root: PathBuf, + /// Base URL used to construct HFS-served download URLs. + base_url: String, +} + +impl LocalFsOutputStore { + /// Creates a new local-filesystem output store. + /// + /// `root` is the directory under which `{tenant}/{job_id}/...` is created; + /// `base_url` is the HFS base URL used for download links. + pub fn new(root: impl Into, base_url: impl Into) -> Self { + Self { + root: root.into(), + base_url: base_url.into(), + } + } + + /// The directory holding all parts for a single job. + fn job_dir(&self, tenant_id: &str, job_id: &ExportJobId) -> PathBuf { + self.root.join(tenant_id).join(job_id.as_str()) + } + + /// The final file path for a part. + fn part_path(&self, key: &ExportPartKey) -> PathBuf { + self.job_dir(&key.tenant_id, &key.job_id).join(format!( + "{}-{}-{}-{}.ndjson", + key.file_type, key.resource_type, key.part_index, key.fencing_token + )) + } + + /// The temp file path for an in-flight part. + fn tmp_path(&self, key: &ExportPartKey) -> PathBuf { + let mut p = self.part_path(key); + p.set_extension("ndjson.tmp"); + p + } +} + +fn io_err(message: String) -> StorageError { + StorageError::Backend(BackendError::Internal { + backend_name: "local-fs".to_string(), + message, + source: None, + }) +} + +#[async_trait] +impl ExportOutputStore for LocalFsOutputStore { + async fn open_writer(&self, key: &ExportPartKey) -> StorageResult { + let dir = self.job_dir(&key.tenant_id, &key.job_id); + tokio::fs::create_dir_all(&dir) + .await + .map_err(|e| io_err(format!("create_dir_all {}: {e}", dir.display())))?; + let tmp = self.tmp_path(key); + let file = tokio::fs::File::create(&tmp) + .await + .map_err(|e| io_err(format!("create {}: {e}", tmp.display())))?; + let boxed: std::pin::Pin> = Box::pin(file); + Ok(ExportPartWriter::new(boxed)) + } + + async fn finalize_part( + &self, + key: &ExportPartKey, + mut writer: ExportPartWriter, + ) -> StorageResult { + use tokio::io::AsyncWriteExt; + writer + .writer + .flush() + .await + .map_err(|e| io_err(format!("flush: {e}")))?; + writer + .writer + .shutdown() + .await + .map_err(|e| io_err(format!("shutdown: {e}")))?; + let line_count = writer.line_count; + let byte_count = writer.byte_count; + drop(writer); + + let tmp = self.tmp_path(key); + let final_path = self.part_path(key); + tokio::fs::rename(&tmp, &final_path).await.map_err(|e| { + io_err(format!( + "rename {} -> {}: {e}", + tmp.display(), + final_path.display() + )) + })?; + + Ok(FinalizedPart { + key: key.clone(), + resource_type: key.resource_type.clone(), + line_count, + size_bytes: byte_count, + }) + } + + async fn download_url( + &self, + key: &ExportPartKey, + _ttl: Duration, + ) -> StorageResult { + // HFS-served URL — the download handler resolves {job_id}/{part}. + let base = self.base_url.trim_end_matches('/'); + Ok(DownloadUrl { + url: format!( + "{}/export-file/{}/{}-{}", + base, key.job_id, key.resource_type, key.part_index + ), + requires_access_token: true, + }) + } + + async fn open_reader( + &self, + key: &ExportPartKey, + ) -> StorageResult>> { + let path = self.part_path(key); + let file = tokio::fs::File::open(&path) + .await + .map_err(|e| io_err(format!("open {}: {e}", path.display())))?; + Ok(Box::pin(file)) + } + + async fn delete_job_outputs( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult<()> { + let dir = self.job_dir(tenant.tenant_id().as_str(), job_id); + delete_dir_idempotent(&dir).await + } +} + +/// Removes a directory if it exists; a missing directory is `Ok`. +async fn delete_dir_idempotent(dir: &Path) -> StorageResult<()> { + match tokio::fs::remove_dir_all(dir).await { + Ok(()) => Ok(()), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()), + Err(e) => Err(io_err(format!("remove_dir_all {}: {e}", dir.display()))), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::tenant::{TenantId, TenantPermissions}; + use tokio::io::AsyncReadExt; + + fn test_key(job: &ExportJobId) -> ExportPartKey { + ExportPartKey::output("t1", job.clone(), "Patient", 0, 1) + } + + #[tokio::test] + async fn test_write_finalize_read_delete() { + let tmp = tempfile::tempdir().unwrap(); + let store = LocalFsOutputStore::new(tmp.path(), "http://localhost:8080"); + let job = ExportJobId::new(); + let key = test_key(&job); + + let mut writer = store.open_writer(&key).await.unwrap(); + writer + .write_line(r#"{"resourceType":"Patient","id":"1"}"#) + .await + .unwrap(); + writer + .write_line(r#"{"resourceType":"Patient","id":"2"}"#) + .await + .unwrap(); + let finalized = store.finalize_part(&key, writer).await.unwrap(); + assert_eq!(finalized.line_count, 2); + assert!(finalized.size_bytes > 0); + + let url = store + .download_url(&key, Duration::from_secs(60)) + .await + .unwrap(); + assert!(url.requires_access_token); + assert!(url.url.contains("/export-file/")); + assert!(url.url.contains("Patient-0")); + + let mut reader = store.open_reader(&key).await.unwrap(); + let mut content = String::new(); + reader.read_to_string(&mut content).await.unwrap(); + assert_eq!(content.lines().count(), 2); + + let tenant = TenantContext::new(TenantId::new("t1"), TenantPermissions::full_access()); + store.delete_job_outputs(&tenant, &job).await.unwrap(); + // Idempotent: deleting again is fine. + store.delete_job_outputs(&tenant, &job).await.unwrap(); + assert!(store.open_reader(&key).await.is_err()); + } +} From aaf48d0cf6ec4b62e3bfb560bda045d37e6323a2 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:47:54 -0400 Subject: [PATCH 24/81] feat(mongodb): add bulk-export trait stubs ExportDataProvider / PatientExportProvider / GroupExportProvider impls returning UnsupportedCapability so MongoDB can satisfy the trait hierarchy without supporting bulk export as a primary. --- .../src/backends/mongodb/bulk_export.rs | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 crates/persistence/src/backends/mongodb/bulk_export.rs diff --git a/crates/persistence/src/backends/mongodb/bulk_export.rs b/crates/persistence/src/backends/mongodb/bulk_export.rs new file mode 100644 index 000000000..f5006266d --- /dev/null +++ b/crates/persistence/src/backends/mongodb/bulk_export.rs @@ -0,0 +1,99 @@ +//! Bulk export stub implementations for the MongoDB backend. +//! +//! MongoDB does not yet support bulk export as a primary resource store. +//! These stub impls satisfy the [`ExportDataProvider`] / +//! [`PatientExportProvider`] / [`GroupExportProvider`] trait hierarchy so a +//! MongoDB-primary deployment compiles; every method returns +//! `UnsupportedCapability`. + +use async_trait::async_trait; + +use crate::core::bulk_export::{ + ExportDataProvider, ExportRequest, GroupExportProvider, NdjsonBatch, PatientExportProvider, +}; +use crate::error::{BackendError, StorageError, StorageResult}; +use crate::tenant::TenantContext; + +use super::MongoBackend; + +fn mongo_export_unsupported() -> StorageError { + StorageError::Backend(BackendError::UnsupportedCapability { + backend_name: "mongodb".to_string(), + capability: "bulk-export".to_string(), + }) +} + +#[async_trait] +impl ExportDataProvider for MongoBackend { + async fn list_export_types( + &self, + _tenant: &TenantContext, + _request: &ExportRequest, + ) -> StorageResult> { + Err(mongo_export_unsupported()) + } + + async fn count_export_resources( + &self, + _tenant: &TenantContext, + _request: &ExportRequest, + _resource_type: &str, + ) -> StorageResult { + Err(mongo_export_unsupported()) + } + + async fn fetch_export_batch( + &self, + _tenant: &TenantContext, + _request: &ExportRequest, + _resource_type: &str, + _cursor: Option<&str>, + _batch_size: u32, + ) -> StorageResult { + Err(mongo_export_unsupported()) + } +} + +#[async_trait] +impl PatientExportProvider for MongoBackend { + async fn list_patient_ids( + &self, + _tenant: &TenantContext, + _request: &ExportRequest, + _cursor: Option<&str>, + _batch_size: u32, + ) -> StorageResult<(Vec, Option)> { + Err(mongo_export_unsupported()) + } + + async fn fetch_patient_compartment_batch( + &self, + _tenant: &TenantContext, + _request: &ExportRequest, + _resource_type: &str, + _patient_ids: &[String], + _cursor: Option<&str>, + _batch_size: u32, + ) -> StorageResult { + Err(mongo_export_unsupported()) + } +} + +#[async_trait] +impl GroupExportProvider for MongoBackend { + async fn get_group_members( + &self, + _tenant: &TenantContext, + _group_id: &str, + ) -> StorageResult> { + Err(mongo_export_unsupported()) + } + + async fn resolve_group_patient_ids( + &self, + _tenant: &TenantContext, + _group_id: &str, + ) -> StorageResult> { + Err(mongo_export_unsupported()) + } +} From eb3448d41681e304122ed40458d5faa39803cccf Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:47:54 -0400 Subject: [PATCH 25/81] chore(mongodb): register bulk_export stub module --- crates/persistence/src/backends/mongodb/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/persistence/src/backends/mongodb/mod.rs b/crates/persistence/src/backends/mongodb/mod.rs index 1267f4651..47f3b28bc 100644 --- a/crates/persistence/src/backends/mongodb/mod.rs +++ b/crates/persistence/src/backends/mongodb/mod.rs @@ -16,6 +16,7 @@ //! Advanced search/composite behavior remains part of later phases. mod backend; +mod bulk_export; pub(crate) mod schema; mod search_impl; mod storage; From 81d4b5ecfd735440d9a6f6a98d6224c24bf3c6e7 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:47:54 -0400 Subject: [PATCH 26/81] chore(persistence): expose local_fs backend module --- crates/persistence/src/backends/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/persistence/src/backends/mod.rs b/crates/persistence/src/backends/mod.rs index 0e2e72256..9aa90ee76 100644 --- a/crates/persistence/src/backends/mod.rs +++ b/crates/persistence/src/backends/mod.rs @@ -37,6 +37,9 @@ pub mod sqlite; #[cfg(feature = "postgres")] pub mod postgres; + +/// Local filesystem [`ExportOutputStore`](crate::core::bulk_export_output::ExportOutputStore). +pub mod local_fs; // // #[cfg(feature = "cassandra")] // pub mod cassandra; From 8b93431339d7f8b0f30a0b1fbe2de6abe45e0b9f Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:47:54 -0400 Subject: [PATCH 27/81] feat(composite): delegate ExportDataProvider/Patient/Group to primary CompositeStorage gains an export_provider: Option field set by with_full_primary (with the new GroupExportProvider bound on T). Each trait method delegates to the primary or returns UnsupportedCapability when no primary impl is wired in. --- crates/persistence/src/composite/storage.rs | 138 +++++++++++++++++++- 1 file changed, 136 insertions(+), 2 deletions(-) diff --git a/crates/persistence/src/composite/storage.rs b/crates/persistence/src/composite/storage.rs index 44a417608..afdf1273a 100644 --- a/crates/persistence/src/composite/storage.rs +++ b/crates/persistence/src/composite/storage.rs @@ -46,7 +46,8 @@ use crate::core::history::HistoryParams; use crate::core::{ BundleEntry, BundleProvider, BundleResult, CapabilityProvider, ChainedSearchProvider, ConditionalCreateResult, ConditionalDeleteResult, ConditionalPatchResult, ConditionalStorage, - ConditionalUpdateResult, IncludeProvider, InstanceHistoryProvider, PatchFormat, + ConditionalUpdateResult, ExportDataProvider, ExportRequest, GroupExportProvider, + IncludeProvider, InstanceHistoryProvider, NdjsonBatch, PatchFormat, PatientExportProvider, ResourceStorage, RevincludeProvider, SearchProvider, SearchResult, StorageCapabilities, TerminologySearchProvider, TextSearchProvider, VersionedStorage, }; @@ -80,6 +81,9 @@ pub type DynInstanceHistoryProvider = Arc; +/// A dynamically typed group export provider (also covers Patient + System). +pub type DynGroupExportProvider = Arc; + /// Composite storage that coordinates multiple backends. /// /// This is the main entry point for polyglot persistence. It implements @@ -128,6 +132,9 @@ pub struct CompositeStorage { /// Primary as BundleProvider (if supported). bundle_provider: Option, + + /// Primary as GroupExportProvider (if supported) — covers all export levels. + export_provider: Option, } /// Health status for a backend. @@ -285,6 +292,7 @@ impl CompositeStorage { versioned_storage: None, history_provider: None, bundle_provider: None, + export_provider: None, }) } @@ -336,6 +344,7 @@ impl CompositeStorage { + VersionedStorage + InstanceHistoryProvider + BundleProvider + + GroupExportProvider + Send + Sync + 'static, @@ -343,7 +352,8 @@ impl CompositeStorage { self.conditional_storage = Some(primary.clone() as DynConditionalStorage); self.versioned_storage = Some(primary.clone() as DynVersionedStorage); self.history_provider = Some(primary.clone() as DynInstanceHistoryProvider); - self.bundle_provider = Some(primary as DynBundleProvider); + self.bundle_provider = Some(primary.clone() as DynBundleProvider); + self.export_provider = Some(primary as DynGroupExportProvider); self } @@ -2005,6 +2015,130 @@ impl CapabilityProvider for CompositeStorage { // resource_capabilities uses the default implementation that returns Option } +/// Returns an `UnsupportedCapability` error for export operations when the +/// primary backend does not implement the export provider traits. +fn export_unsupported() -> StorageError { + StorageError::Backend(BackendError::UnsupportedCapability { + backend_name: "composite".to_string(), + capability: "bulk-export".to_string(), + }) +} + +#[async_trait] +impl ExportDataProvider for CompositeStorage { + async fn list_export_types( + &self, + tenant: &TenantContext, + request: &ExportRequest, + ) -> StorageResult> { + match &self.export_provider { + Some(p) => p.list_export_types(tenant, request).await, + None => Err(export_unsupported()), + } + } + + async fn count_export_resources( + &self, + tenant: &TenantContext, + request: &ExportRequest, + resource_type: &str, + ) -> StorageResult { + match &self.export_provider { + Some(p) => { + p.count_export_resources(tenant, request, resource_type) + .await + } + None => Err(export_unsupported()), + } + } + + async fn fetch_export_batch( + &self, + tenant: &TenantContext, + request: &ExportRequest, + resource_type: &str, + cursor: Option<&str>, + batch_size: u32, + ) -> StorageResult { + match &self.export_provider { + Some(p) => { + p.fetch_export_batch(tenant, request, resource_type, cursor, batch_size) + .await + } + None => Err(export_unsupported()), + } + } +} + +#[async_trait] +impl PatientExportProvider for CompositeStorage { + async fn list_patient_ids( + &self, + tenant: &TenantContext, + request: &ExportRequest, + cursor: Option<&str>, + batch_size: u32, + ) -> StorageResult<(Vec, Option)> { + match &self.export_provider { + Some(p) => { + p.list_patient_ids(tenant, request, cursor, batch_size) + .await + } + None => Err(export_unsupported()), + } + } + + async fn fetch_patient_compartment_batch( + &self, + tenant: &TenantContext, + request: &ExportRequest, + resource_type: &str, + patient_ids: &[String], + cursor: Option<&str>, + batch_size: u32, + ) -> StorageResult { + match &self.export_provider { + Some(p) => { + p.fetch_patient_compartment_batch( + tenant, + request, + resource_type, + patient_ids, + cursor, + batch_size, + ) + .await + } + None => Err(export_unsupported()), + } + } +} + +#[async_trait] +impl GroupExportProvider for CompositeStorage { + async fn get_group_members( + &self, + tenant: &TenantContext, + group_id: &str, + ) -> StorageResult> { + match &self.export_provider { + Some(p) => p.get_group_members(tenant, group_id).await, + None => Err(export_unsupported()), + } + } + + async fn resolve_group_patient_ids( + &self, + tenant: &TenantContext, + group_id: &str, + ) -> StorageResult> { + match &self.export_provider { + Some(p) => p.resolve_group_patient_ids(tenant, group_id).await, + None => Err(export_unsupported()), + } + } +} + #[cfg(test)] mod tests { use super::*; From c8cceb1fb4bd2f68c66fc7e767c551291a312141 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:47:54 -0400 Subject: [PATCH 28/81] feat(rest): add ExportFileAuth trait + BearerScopeAuth default impl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Authorizes the HFS-served (requires_access_token=true) download path using the helios_auth Principal — checks ownership against job_owner_subject (or system/* wildcard) plus a system/{ResourceType}.rs scope. Pre-signed downloads bypass HFS and never reach this trait. --- crates/rest/src/bulk_export_auth.rs | 97 +++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 crates/rest/src/bulk_export_auth.rs diff --git a/crates/rest/src/bulk_export_auth.rs b/crates/rest/src/bulk_export_auth.rs new file mode 100644 index 000000000..e5f6fb6db --- /dev/null +++ b/crates/rest/src/bulk_export_auth.rs @@ -0,0 +1,97 @@ +//! Authorization for bulk-export file downloads. +//! +//! The [`ExportFileAuth`] trait gates the HFS-served download path +//! (`requiresAccessToken = true`). Pre-signed-URL downloads bypass HFS +//! entirely and never reach this trait. + +use async_trait::async_trait; +use helios_auth::Principal; +use helios_auth::scope::{ResourceTypeSpec, SmartPermissions}; +use helios_persistence::core::ExportFileMetadata; +use helios_persistence::tenant::TenantContext; + +/// Error returned when a download is not authorized. +#[derive(Debug, Clone)] +pub enum ExportAuthError { + /// No authenticated principal was supplied. + Unauthenticated, + /// The principal is not permitted to download this file. + Forbidden(String), +} + +impl std::fmt::Display for ExportAuthError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Unauthenticated => write!(f, "authentication required"), + Self::Forbidden(m) => write!(f, "forbidden: {m}"), + } + } +} + +impl std::error::Error for ExportAuthError {} + +/// Authorizes a bulk-export file download. +#[async_trait] +pub trait ExportFileAuth: Send + Sync { + /// Decides whether `principal` may download the file described by + /// `file_meta` for a job owned by `job_owner_subject`. + async fn authorize_download( + &self, + principal: Option<&Principal>, + tenant: &TenantContext, + job_owner_subject: Option<&str>, + file_meta: &ExportFileMetadata, + ) -> Result<(), ExportAuthError>; +} + +/// Returns true if the principal holds any `system/*` (wildcard) scope. +fn has_wildcard_scope(principal: &Principal) -> bool { + principal + .scopes + .scopes() + .iter() + .any(|s| s.resource_type == ResourceTypeSpec::Wildcard) +} + +/// The default [`ExportFileAuth`]: requires the kickoff Bearer token, the +/// job's owner-subject to match (or a `system/*` scope), and a +/// `system/{ResourceType}.rs` (read) scope covering the file's resource type. +#[derive(Debug, Clone, Default)] +pub struct BearerScopeAuth; + +#[async_trait] +impl ExportFileAuth for BearerScopeAuth { + async fn authorize_download( + &self, + principal: Option<&Principal>, + _tenant: &TenantContext, + job_owner_subject: Option<&str>, + file_meta: &ExportFileMetadata, + ) -> Result<(), ExportAuthError> { + // When auth is disabled there is no principal — no enforcement, as + // elsewhere in HFS. + let Some(principal) = principal else { + return Ok(()); + }; + + let owns_job = job_owner_subject == Some(principal.subject.as_str()); + let is_wildcard = has_wildcard_scope(principal); + if !owns_job && !is_wildcard { + return Err(ExportAuthError::Forbidden( + "principal does not own this export job".to_string(), + )); + } + + if !principal + .scopes + .is_permitted(&file_meta.resource_type, SmartPermissions::READ) + { + return Err(ExportAuthError::Forbidden(format!( + "missing read scope for {}", + file_meta.resource_type + ))); + } + + Ok(()) + } +} From 3731dd4a540b937fdfc345ae0f3249ef29a758dd Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:47:54 -0400 Subject: [PATCH 29/81] feat(rest): wire bulk-export Arc trait objects into AppState bulk_export_jobs: Arc, bulk_export_output: Arc, bulk_export_file_auth: Arc, plus an Arc. New with_bulk_export(...) builder and accessors so handlers can reach the subsystem behind feature toggles without touching the resource-storage S type parameter. --- crates/rest/src/state.rs | 64 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/crates/rest/src/state.rs b/crates/rest/src/state.rs index 99025e24b..323d1034c 100644 --- a/crates/rest/src/state.rs +++ b/crates/rest/src/state.rs @@ -8,9 +8,10 @@ use std::sync::Arc; use helios_audit::AuditSink; use helios_auth::AuthConfig; -use helios_persistence::core::ResourceStorage; +use helios_persistence::core::{BulkExportJobStore, ExportOutputStore, ResourceStorage}; -use crate::config::ServerConfig; +use crate::bulk_export_auth::ExportFileAuth; +use crate::config::{BulkExportConfig, ServerConfig}; use crate::middleware::auth::AuthMiddlewareState; /// Shared application state for the REST API. @@ -55,6 +56,18 @@ pub struct AppState { /// Optional subscription engine for FHIR topic-based subscriptions. #[cfg(feature = "subscriptions")] subscription_engine: Option>, + + /// Bulk export job-state store (claim + worker storage + lifecycle). + bulk_export_jobs: Option>, + + /// Bulk export output store (NDJSON files). + bulk_export_output: Option>, + + /// Bulk export download authorizer. + bulk_export_file_auth: Option>, + + /// Bulk export configuration. + bulk_export_config: Arc, } // Manually implement Clone since S is wrapped in Arc and doesn't need to be Clone @@ -69,6 +82,10 @@ impl Clone for AppState { audit_source_observer: self.audit_source_observer.clone(), #[cfg(feature = "subscriptions")] subscription_engine: self.subscription_engine.clone(), + bulk_export_jobs: self.bulk_export_jobs.clone(), + bulk_export_output: self.bulk_export_output.clone(), + bulk_export_file_auth: self.bulk_export_file_auth.clone(), + bulk_export_config: Arc::clone(&self.bulk_export_config), } } } @@ -81,6 +98,7 @@ impl AppState { /// * `storage` - The storage backend (wrapped in Arc) /// * `config` - Server configuration pub fn new(storage: Arc, config: ServerConfig) -> Self { + let bulk_export_config = Arc::new(config.bulk_export.clone()); Self { storage, config: Arc::new(config), @@ -90,6 +108,10 @@ impl AppState { audit_source_observer: "Device/hfs".to_string(), #[cfg(feature = "subscriptions")] subscription_engine: None, + bulk_export_jobs: None, + bulk_export_output: None, + bulk_export_file_auth: None, + bulk_export_config, } } @@ -112,6 +134,7 @@ impl AppState { audit_sink: Option>, audit_source_observer: impl Into, ) -> Self { + let bulk_export_config = Arc::new(config.bulk_export.clone()); Self { storage, config: Arc::new(config), @@ -121,9 +144,46 @@ impl AppState { audit_source_observer: audit_source_observer.into(), #[cfg(feature = "subscriptions")] subscription_engine: None, + bulk_export_jobs: None, + bulk_export_output: None, + bulk_export_file_auth: None, + bulk_export_config, } } + /// Wires the bulk-export job store, output store, and file authorizer. + pub fn with_bulk_export( + mut self, + jobs: Arc, + output: Arc, + file_auth: Arc, + ) -> Self { + self.bulk_export_jobs = Some(jobs); + self.bulk_export_output = Some(output); + self.bulk_export_file_auth = Some(file_auth); + self + } + + /// Returns the bulk-export job store, if configured. + pub fn bulk_export_jobs(&self) -> Option<&Arc> { + self.bulk_export_jobs.as_ref() + } + + /// Returns the bulk-export output store, if configured. + pub fn bulk_export_output(&self) -> Option<&Arc> { + self.bulk_export_output.as_ref() + } + + /// Returns the bulk-export download authorizer, if configured. + pub fn bulk_export_file_auth(&self) -> Option<&Arc> { + self.bulk_export_file_auth.as_ref() + } + + /// Returns the bulk-export configuration. + pub fn bulk_export_config(&self) -> &BulkExportConfig { + &self.bulk_export_config + } + /// Sets the subscription engine on this AppState. #[cfg(feature = "subscriptions")] pub fn with_subscription_engine( From 1de76d3fa05481baeb8098110661f3d76a8dee01 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:47:54 -0400 Subject: [PATCH 30/81] feat(rest): add BulkExportConfig with HFS_BULK_EXPORT_* env loading Full configuration surface: enabled, backend (embedded|postgres-s3), output_backend (local-fs|s3), output_dir, s3_bucket, requires_access_token (auto|true|false), file_url_ttl_secs, output_ttl_secs, worker_concurrency, disable_local_worker, max_concurrent_per_tenant, batch_size, lease_duration_secs, heartbeat_interval_secs, cleanup_interval_secs, since_newly_added (include|exclude). validate() rejects local-fs + requires_access_token=false (no pre-signed URL capability). --- crates/rest/src/config.rs | 211 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) diff --git a/crates/rest/src/config.rs b/crates/rest/src/config.rs index 45a4973ae..143357d84 100644 --- a/crates/rest/src/config.rs +++ b/crates/rest/src/config.rs @@ -237,6 +237,205 @@ impl MultitenancyConfig { } } +/// Configuration for the bulk data export subsystem. +#[derive(Debug, Clone)] +pub struct BulkExportConfig { + /// Master switch — when `false`, the `$export` endpoints return `501`. + pub enabled: bool, + /// Job-state backend: `embedded` (SQLite) or `postgres-s3` (PostgreSQL). + pub backend: String, + /// Output store: `local-fs` or `s3`. + pub output_backend: String, + /// Local-FS output root directory. + pub output_dir: Option, + /// S3 bucket for output (required when `output_backend = s3`). + pub s3_bucket: Option, + /// Manifest access-token posture: `auto`, `true`, or `false`. + pub requires_access_token: String, + /// Pre-signed download-URL lifetime, in seconds. + pub file_url_ttl_secs: u64, + /// How long output files are retained after job completion, in seconds. + pub output_ttl_secs: u64, + /// Maximum jobs this pod runs concurrently. + pub worker_concurrency: u32, + /// When `true`, this pod does not run in-process workers. + pub disable_local_worker: bool, + /// Cap on simultaneous in-flight jobs per tenant. + pub max_concurrent_per_tenant: u32, + /// Resources per `fetch_export_batch` call. + pub batch_size: u32, + /// Initial lease length issued at claim, in seconds. + pub lease_duration_secs: u64, + /// Worker heartbeat cadence, in seconds. + pub heartbeat_interval_secs: u64, + /// How often the cleanup task scans for expired outputs, in seconds. + pub cleanup_interval_secs: u64, + /// Group export `_since` toggle (`include` / `exclude`). + /// + /// When `exclude`, patients whose `Group.member.period.start` is *after* + /// the request's `_since` are filtered out of the export — implementing + /// the IG's optional "do not return resources from before the patient + /// joined the cohort" behavior. + pub since_newly_added: String, +} + +impl Default for BulkExportConfig { + fn default() -> Self { + Self { + enabled: true, + backend: "embedded".to_string(), + output_backend: "local-fs".to_string(), + output_dir: None, + s3_bucket: None, + requires_access_token: "auto".to_string(), + file_url_ttl_secs: 3600, + output_ttl_secs: 86400, + worker_concurrency: 2, + disable_local_worker: false, + max_concurrent_per_tenant: 4, + batch_size: 1000, + lease_duration_secs: 60, + heartbeat_interval_secs: 20, + cleanup_interval_secs: 300, + since_newly_added: "include".to_string(), + } + } +} + +impl BulkExportConfig { + /// Loads bulk-export configuration from `HFS_BULK_EXPORT_*` env vars. + pub fn from_env() -> Self { + fn env_bool(key: &str, default: bool) -> bool { + std::env::var(key) + .map(|s| { + let s = s.to_lowercase(); + s == "true" || s == "1" + }) + .unwrap_or(default) + } + fn env_u64(key: &str, default: u64) -> u64 { + std::env::var(key) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(default) + } + fn env_u32(key: &str, default: u32) -> u32 { + std::env::var(key) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(default) + } + let d = Self::default(); + Self { + enabled: env_bool("HFS_BULK_EXPORT_ENABLED", d.enabled), + backend: std::env::var("HFS_BULK_EXPORT_BACKEND").unwrap_or(d.backend), + output_backend: std::env::var("HFS_BULK_EXPORT_OUTPUT_BACKEND") + .unwrap_or(d.output_backend), + output_dir: std::env::var("HFS_BULK_EXPORT_OUTPUT_DIR").ok(), + s3_bucket: std::env::var("HFS_BULK_EXPORT_S3_BUCKET").ok(), + requires_access_token: std::env::var("HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN") + .unwrap_or(d.requires_access_token), + file_url_ttl_secs: env_u64("HFS_BULK_EXPORT_FILE_URL_TTL", d.file_url_ttl_secs), + output_ttl_secs: env_u64("HFS_BULK_EXPORT_OUTPUT_TTL", d.output_ttl_secs), + worker_concurrency: env_u32("HFS_BULK_EXPORT_WORKER_CONCURRENCY", d.worker_concurrency), + disable_local_worker: env_bool( + "HFS_BULK_EXPORT_DISABLE_LOCAL_WORKER", + d.disable_local_worker, + ), + max_concurrent_per_tenant: env_u32( + "HFS_BULK_EXPORT_MAX_CONCURRENT_PER_TENANT", + d.max_concurrent_per_tenant, + ), + batch_size: env_u32("HFS_BULK_EXPORT_BATCH_SIZE", d.batch_size), + lease_duration_secs: env_u64("HFS_BULK_EXPORT_LEASE_DURATION", d.lease_duration_secs), + heartbeat_interval_secs: env_u64( + "HFS_BULK_EXPORT_HEARTBEAT_INTERVAL", + d.heartbeat_interval_secs, + ), + cleanup_interval_secs: env_u64( + "HFS_BULK_EXPORT_CLEANUP_INTERVAL", + d.cleanup_interval_secs, + ), + since_newly_added: std::env::var("HFS_BULK_EXPORT_SINCE_NEWLY_ADDED") + .unwrap_or(d.since_newly_added), + } + } + + /// Validates the bulk-export configuration. + pub fn validate(&self) -> Result<(), Vec> { + let mut errors = Vec::new(); + if !matches!(self.backend.as_str(), "embedded" | "postgres-s3") { + errors.push(format!( + "HFS_BULK_EXPORT_BACKEND '{}' invalid (expected embedded|postgres-s3)", + self.backend + )); + } + if !matches!(self.output_backend.as_str(), "local-fs" | "s3") { + errors.push(format!( + "HFS_BULK_EXPORT_OUTPUT_BACKEND '{}' invalid (expected local-fs|s3)", + self.output_backend + )); + } + if self.output_backend == "s3" && self.s3_bucket.is_none() { + errors.push("HFS_BULK_EXPORT_S3_BUCKET is required when OUTPUT_BACKEND=s3".to_string()); + } + if !matches!( + self.requires_access_token.as_str(), + "auto" | "true" | "false" + ) { + errors.push(format!( + "HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN '{}' invalid (expected auto|true|false)", + self.requires_access_token + )); + } + // local-fs has no pre-signed-URL capability. + if self.output_backend == "local-fs" && self.requires_access_token == "false" { + errors.push( + "HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN=false is invalid with OUTPUT_BACKEND=local-fs" + .to_string(), + ); + } + if self.file_url_ttl_secs == 0 { + errors.push("HFS_BULK_EXPORT_FILE_URL_TTL must be > 0".to_string()); + } + if self.output_ttl_secs == 0 { + errors.push("HFS_BULK_EXPORT_OUTPUT_TTL must be > 0".to_string()); + } + if self.worker_concurrency == 0 { + errors.push("HFS_BULK_EXPORT_WORKER_CONCURRENCY must be >= 1".to_string()); + } + if self.max_concurrent_per_tenant == 0 { + errors.push("HFS_BULK_EXPORT_MAX_CONCURRENT_PER_TENANT must be >= 1".to_string()); + } + if self.batch_size == 0 { + errors.push("HFS_BULK_EXPORT_BATCH_SIZE must be >= 1".to_string()); + } + if self.heartbeat_interval_secs == 0 { + errors.push("HFS_BULK_EXPORT_HEARTBEAT_INTERVAL must be > 0".to_string()); + } + if self.lease_duration_secs <= self.heartbeat_interval_secs { + errors.push( + "HFS_BULK_EXPORT_LEASE_DURATION must be greater than HEARTBEAT_INTERVAL" + .to_string(), + ); + } + if !matches!(self.since_newly_added.as_str(), "include" | "exclude") { + errors.push(format!( + "HFS_BULK_EXPORT_SINCE_NEWLY_ADDED '{}' invalid (expected include|exclude)", + self.since_newly_added + )); + } + if self.cleanup_interval_secs == 0 { + errors.push("HFS_BULK_EXPORT_CLEANUP_INTERVAL must be > 0".to_string()); + } + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } + } +} + /// Server configuration for the FHIR REST API. /// /// This struct can be constructed from environment variables using [`ServerConfig::from_env`], @@ -383,6 +582,10 @@ pub struct ServerConfig { /// Multitenancy configuration (loaded from environment variables). #[arg(skip)] pub multitenancy: MultitenancyConfig, + + /// Bulk data export configuration (loaded from environment variables). + #[arg(skip)] + pub bulk_export: BulkExportConfig, } impl ServerConfig { @@ -422,6 +625,7 @@ impl Default for ServerConfig { elasticsearch_password: None, terminology_server: None, multitenancy: MultitenancyConfig::default(), + bulk_export: BulkExportConfig::default(), } } } @@ -436,6 +640,8 @@ impl ServerConfig { let mut config = Self::try_parse().unwrap_or_default(); // Load multitenancy config from environment config.multitenancy = MultitenancyConfig::from_env(); + // Load bulk export config from environment + config.bulk_export = BulkExportConfig::from_env(); config } @@ -473,6 +679,10 @@ impl ServerConfig { errors.push("Default page size cannot exceed max page size".to_string()); } + if let Err(mut bulk_errors) = self.bulk_export.validate() { + errors.append(&mut bulk_errors); + } + if errors.is_empty() { Ok(()) } else { @@ -513,6 +723,7 @@ impl ServerConfig { elasticsearch_password: None, terminology_server: None, multitenancy: MultitenancyConfig::default(), + bulk_export: BulkExportConfig::default(), } } From ec991e9110599efe767211574f39691a6840f3f7 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:47:54 -0400 Subject: [PATCH 31/81] feat(rest): add bulk-export HTTP handlers (kick-off / status / cancel / download) Three route-specific kick-off wrappers (system/patient/group) over a shared kickoff_export. Parses _type/_since/_until/_typeFilter/_outputFormat/ _elements/patient via raw query (form_urlencoded) so repeated keys aren't dropped, validates _typeFilter against the request's _type set (rejects result-control params), enforces SmartScopePolicy per requested resource type + Group, enforces the per-tenant concurrency cap via count_active_exports, and persists frozen kickoff metadata via StartExportInput. Status assembles the wire ExportManifest from RawExportManifest + ExportOutputStore::download_url. Cancel runs the two-step output-then-job teardown. Download authorizes via ExportFileAuth and streams the file. Emits AuditEvents through state.audit_sink() at every lifecycle step. --- crates/rest/src/handlers/bulk_export.rs | 816 ++++++++++++++++++++++++ 1 file changed, 816 insertions(+) create mode 100644 crates/rest/src/handlers/bulk_export.rs diff --git a/crates/rest/src/handlers/bulk_export.rs b/crates/rest/src/handlers/bulk_export.rs new file mode 100644 index 000000000..c26e72165 --- /dev/null +++ b/crates/rest/src/handlers/bulk_export.rs @@ -0,0 +1,816 @@ +//! FHIR Bulk Data Export (`$export`) handlers. +//! +//! Implements the asynchronous kick-off → poll → manifest → download → delete +//! flow from the [Bulk Data Access IG](https://build.fhir.org/ig/HL7/bulk-data/). + +use std::time::Duration; + +use axum::{ + body::Body, + extract::{Path, Request, State}, + http::{HeaderMap, Method, StatusCode}, + response::Response, +}; +use chrono::Utc; +use helios_auth::Principal; +use helios_fhir::FhirVersion; +use helios_persistence::core::ExportDataProvider; +use helios_persistence::core::{ + ExportJobId, ExportLevel, ExportManifest, ExportOutputFile, ExportRequest, ExportStatus, + GroupExportProvider, PatientExportProvider, ResourceStorage, StartExportInput, TypeFilter, +}; +use helios_persistence::error::{BulkExportError, StorageError}; + +use crate::error::{RestError, RestResult}; +use crate::extractors::{FhirVersionExtractor, TenantExtractor}; +use crate::state::AppState; + +/// Trait bound shared by all bulk-export handlers (the resource-store side). +pub trait ExportResourceStore: + ResourceStorage + ExportDataProvider + PatientExportProvider + GroupExportProvider +{ +} +impl ExportResourceStore for S where + S: ResourceStorage + ExportDataProvider + PatientExportProvider + GroupExportProvider +{ +} + +/// Search-result-control params that are NOT valid inside `_typeFilter`. +const FORBIDDEN_FILTER_PARAMS: &[&str] = + &["_include", "_revinclude", "_sort", "_count", "_elements"]; + +fn not_implemented() -> RestError { + RestError::NotImplemented { + feature: "Bulk Data Export is disabled (HFS_BULK_EXPORT_ENABLED=false)".to_string(), + } +} + +fn bad_request(msg: impl Into) -> RestError { + RestError::BadRequest { + message: msg.into(), + } +} + +/// Parses a raw query string into ordered key/value pairs (repeated keys kept). +fn parse_query_pairs(raw: Option<&str>) -> Vec<(String, String)> { + match raw { + None => Vec::new(), + Some(q) => url::form_urlencoded::parse(q.as_bytes()) + .map(|(k, v)| (k.into_owned(), v.into_owned())) + .collect(), + } +} + +/// Collects all values for `key`, splitting each on `,`. +fn collect_multi(pairs: &[(String, String)], key: &str) -> Vec { + pairs + .iter() + .filter(|(k, _)| k == key) + .flat_map(|(_, v)| v.split(',').map(|s| s.trim().to_string())) + .filter(|s| !s.is_empty()) + .collect() +} + +/// Returns the first value for `key`, if any. +fn first_value(pairs: &[(String, String)], key: &str) -> Option { + pairs.iter().find(|(k, _)| k == key).map(|(_, v)| v.clone()) +} + +/// Parses a FHIR `instant` into a UTC datetime. +fn parse_instant(s: &str) -> Result, RestError> { + chrono::DateTime::parse_from_rfc3339(s) + .map(|dt| dt.with_timezone(&Utc)) + .map_err(|e| bad_request(format!("invalid instant '{s}': {e}"))) +} + +/// Reads the `Prefer: handling=` directive (`strict` / `lenient`). +fn prefer_handling(headers: &HeaderMap) -> Option { + headers + .get("prefer") + .and_then(|v| v.to_str().ok()) + .and_then(|p| { + p.split(',') + .map(|s| s.trim()) + .find_map(|s| s.strip_prefix("handling=")) + .map(|s| s.to_ascii_lowercase()) + }) +} + +/// Returns true if `Prefer: respond-async` is present. +fn has_respond_async(headers: &HeaderMap) -> bool { + headers + .get("prefer") + .and_then(|v| v.to_str().ok()) + .map(|p| { + p.split(',') + .any(|s| s.trim().eq_ignore_ascii_case("respond-async")) + }) + .unwrap_or(false) +} + +/// Builds the parameter pairs from a POST `Parameters` resource body. +fn pairs_from_parameters(body: &serde_json::Value) -> Vec<(String, String)> { + let mut pairs = Vec::new(); + if let Some(arr) = body.get("parameter").and_then(|p| p.as_array()) { + for p in arr { + let Some(name) = p.get("name").and_then(|n| n.as_str()) else { + continue; + }; + // Accept valueString / valueUri / valueInstant / valueCode etc. + let value = p + .get("valueString") + .or_else(|| p.get("valueUri")) + .or_else(|| p.get("valueInstant")) + .or_else(|| p.get("valueCode")) + .or_else(|| p.get("valueDateTime")) + .and_then(|v| v.as_str()) + .or_else(|| { + // patient reference: { name: "patient", valueReference: { reference } } + p.get("valueReference") + .and_then(|r| r.get("reference")) + .and_then(|r| r.as_str()) + }); + if let Some(v) = value { + pairs.push((name.to_string(), v.to_string())); + } + } + } + pairs +} + +/// Shared kick-off logic for all three export levels. +#[allow(clippy::too_many_arguments)] +async fn kickoff_export( + state: &AppState, + tenant: &TenantExtractor, + principal: Option<&Principal>, + level: ExportLevel, + fhir_version: FhirVersion, + method: &Method, + headers: &HeaderMap, + raw_query: Option<&str>, + full_url: &str, + body: Option<&serde_json::Value>, +) -> RestResult +where + S: ExportResourceStore + Send + Sync, +{ + let cfg = state.bulk_export_config(); + if !cfg.enabled { + return Err(not_implemented()); + } + let jobs = state.bulk_export_jobs().ok_or_else(not_implemented)?; + + if !has_respond_async(headers) { + return Err(bad_request( + "the `Prefer: respond-async` header is required for $export", + )); + } + + let is_post = method == Method::POST; + let mut pairs = parse_query_pairs(raw_query); + if is_post { + if let Some(b) = body { + pairs.extend(pairs_from_parameters(b)); + } + } + + // _outputFormat + let output_format = first_value(&pairs, "_outputFormat") + .unwrap_or_else(|| "application/fhir+ndjson".to_string()); + if !matches!( + output_format.as_str(), + "application/fhir+ndjson" | "application/ndjson" | "ndjson" + ) { + return Err(bad_request(format!( + "unsupported _outputFormat '{output_format}'" + ))); + } + + // _type + let resource_types = collect_multi(&pairs, "_type"); + + // _since / _until + let since = match first_value(&pairs, "_since") { + Some(s) => Some(parse_instant(&s)?), + None => None, + }; + let until = match first_value(&pairs, "_until") { + Some(s) => Some(parse_instant(&s)?), + None => None, + }; + + // _elements + let elements = collect_multi(&pairs, "_elements"); + + // _typeFilter + let mut type_filters = Vec::new(); + for tf in pairs.iter().filter(|(k, _)| k == "_typeFilter") { + let raw = &tf.1; + let (rt, query) = raw + .split_once('?') + .ok_or_else(|| bad_request(format!("malformed _typeFilter '{raw}'")))?; + if !resource_types.is_empty() && !resource_types.iter().any(|t| t == rt) { + return Err(bad_request(format!( + "_typeFilter resource type '{rt}' is not in _type" + ))); + } + for (pk, _) in url::form_urlencoded::parse(query.as_bytes()) { + if FORBIDDEN_FILTER_PARAMS.contains(&pk.as_ref()) { + return Err(bad_request(format!( + "_typeFilter may not contain result-control param '{pk}'" + ))); + } + } + type_filters.push(TypeFilter::new(rt, query)); + } + + // patient (POST only) + let patient_refs = collect_multi(&pairs, "patient"); + if !patient_refs.is_empty() { + if matches!(level, ExportLevel::System) { + return Err(bad_request( + "the `patient` parameter is not valid for system-level export", + )); + } + // Validate each patient reference resolves. + for pref in &patient_refs { + let id = pref.strip_prefix("Patient/").unwrap_or(pref); + let exists = state + .storage() + .read(tenant.context(), "Patient", id) + .await + .map_err(map_storage_err)? + .is_some(); + if !exists { + return Err(bad_request(format!("unknown patient reference '{pref}'"))); + } + } + // For group-level, each must be a member of the group. + if let ExportLevel::Group { group_id } = &level { + let members = state + .storage() + .resolve_group_patient_ids(tenant.context(), group_id) + .await + .map_err(map_storage_err)?; + for pref in &patient_refs { + let id = pref.strip_prefix("Patient/").unwrap_or(pref); + if !members.iter().any(|m| m == id) { + return Err(bad_request(format!( + "patient '{pref}' is not a member of Group/{group_id}" + ))); + } + } + } + } + + // Unsupported parameters — strict vs lenient. + let handling = prefer_handling(headers); + let unsupported: Vec<&str> = [ + "includeAssociatedData", + "organizeOutputBy", + "allowPartialManifests", + ] + .into_iter() + .filter(|p| pairs.iter().any(|(k, _)| k == p)) + .collect(); + if !unsupported.is_empty() { + if handling.as_deref() == Some("strict") { + return Err(bad_request(format!( + "unsupported parameters: {}", + unsupported.join(", ") + ))); + } else { + tracing::warn!( + "ignoring unsupported bulk-export parameters: {}", + unsupported.join(", ") + ); + } + } + + // Authorization — every requested type needs read scope; Group also needs Group read. + if let Some(p) = principal { + let types_to_check = if resource_types.is_empty() { + // Whole-scope export — require a wildcard read or accept (best effort). + vec![] + } else { + resource_types.clone() + }; + for t in &types_to_check { + helios_auth::SmartScopePolicy::check(p, t, helios_auth::FhirOperation::Read).map_err( + |e| RestError::Forbidden { + message: e.to_string(), + }, + )?; + } + if matches!(level, ExportLevel::Group { .. }) { + helios_auth::SmartScopePolicy::check(p, "Group", helios_auth::FhirOperation::Read) + .map_err(|e| RestError::Forbidden { + message: e.to_string(), + })?; + } + } + + // Per-tenant concurrency cap. + let active = jobs + .count_active_exports(tenant.context()) + .await + .map_err(map_storage_err)?; + if active >= cfg.max_concurrent_per_tenant as u64 { + return Err(RestError::BadRequest { + message: format!( + "too many concurrent exports for this tenant (max {})", + cfg.max_concurrent_per_tenant + ), + }); + } + + let request = ExportRequest { + level: level.clone(), + resource_types, + since, + until, + type_filters, + elements, + include_associated_data: Vec::new(), + patient_refs, + batch_size: cfg.batch_size, + output_format, + }; + + let input = StartExportInput { + request, + transaction_time: Utc::now(), + request_url: full_url.to_string(), + owner_subject: principal.map(|p| p.subject.clone()), + fhir_version, + }; + + let request_clone = input.request.clone(); + let job_id = jobs + .start_export(tenant.context(), input) + .await + .map_err(map_storage_err)?; + + emit_export_audit( + state, + principal, + "kickoff", + job_id.as_str(), + &request_clone.level, + &request_clone.resource_types, + "0", + ) + .await; + + let status_url = format!( + "{}/export-status/{}", + state.base_url().trim_end_matches('/'), + job_id + ); + + Response::builder() + .status(StatusCode::ACCEPTED) + .header("Content-Location", status_url) + .body(Body::empty()) + .map_err(|e| RestError::InternalError { + message: e.to_string(), + }) +} + +/// Maps a persistence error to a REST error. +fn map_storage_err(e: StorageError) -> RestError { + match e { + StorageError::BulkExport(BulkExportError::JobNotFound { job_id }) => RestError::NotFound { + resource_type: "export-job".to_string(), + id: job_id, + }, + StorageError::Backend(helios_persistence::error::BackendError::UnsupportedCapability { + .. + }) => RestError::NotImplemented { + feature: "bulk export not supported by this backend".to_string(), + }, + other => RestError::InternalError { + message: other.to_string(), + }, + } +} + +// --------------------------------------------------------------------------- +// Route handlers +// --------------------------------------------------------------------------- + +/// `GET|POST /$export` — system-level kick-off. +pub async fn system_export_kickoff_handler( + State(state): State>, + tenant: TenantExtractor, + version: FhirVersionExtractor, + request: Request, +) -> RestResult +where + S: ExportResourceStore + Send + Sync + 'static, +{ + run_kickoff(state, tenant, version, ExportLevel::System, request).await +} + +/// `GET|POST /Patient/$export` — patient-level kick-off. +pub async fn patient_export_kickoff_handler( + State(state): State>, + tenant: TenantExtractor, + version: FhirVersionExtractor, + request: Request, +) -> RestResult +where + S: ExportResourceStore + Send + Sync + 'static, +{ + run_kickoff(state, tenant, version, ExportLevel::Patient, request).await +} + +/// `GET|POST /Group/{id}/$export` — group-level kick-off. +pub async fn group_export_kickoff_handler( + State(state): State>, + Path(group_id): Path, + tenant: TenantExtractor, + version: FhirVersionExtractor, + request: Request, +) -> RestResult +where + S: ExportResourceStore + Send + Sync + 'static, +{ + run_kickoff( + state, + tenant, + version, + ExportLevel::Group { group_id }, + request, + ) + .await +} + +/// Shared body of the three kick-off wrappers. +async fn run_kickoff( + state: AppState, + tenant: TenantExtractor, + version: FhirVersionExtractor, + level: ExportLevel, + request: Request, +) -> RestResult +where + S: ExportResourceStore + Send + Sync + 'static, +{ + let method = request.method().clone(); + let headers = request.headers().clone(); + let uri = request.uri().clone(); + let raw_query = uri.query().map(|q| q.to_string()); + let full_url = format!( + "{}{}", + state.base_url().trim_end_matches('/'), + uri.path_and_query() + .map(|pq| pq.as_str()) + .unwrap_or(uri.path()) + ); + let principal = request.extensions().get::().cloned(); + + let body_json: Option = if method == Method::POST { + let bytes = axum::body::to_bytes(request.into_body(), 1024 * 1024) + .await + .map_err(|e| bad_request(format!("failed to read request body: {e}")))?; + if bytes.is_empty() { + None + } else { + Some( + serde_json::from_slice(&bytes) + .map_err(|e| bad_request(format!("invalid Parameters JSON: {e}")))?, + ) + } + } else { + None + }; + + kickoff_export( + &state, + &tenant, + principal.as_ref(), + level, + version.storage_version(), + &method, + &headers, + raw_query.as_deref(), + &full_url, + body_json.as_ref(), + ) + .await +} + +/// `GET /export-status/{job_id}` — poll status / fetch manifest. +pub async fn export_status_handler( + State(state): State>, + Path(job_id): Path, + tenant: TenantExtractor, + request: Request, +) -> RestResult +where + S: ExportResourceStore + Send + Sync + 'static, +{ + let cfg = state.bulk_export_config(); + if !cfg.enabled { + return Err(not_implemented()); + } + let jobs = state.bulk_export_jobs().ok_or_else(not_implemented)?; + let output = state.bulk_export_output().ok_or_else(not_implemented)?; + let principal = request.extensions().get::().cloned(); + let job_id = ExportJobId::from_string(job_id); + + // Ownership check first (do not leak existence). + let meta = match jobs + .get_export_job_metadata(tenant.context(), &job_id) + .await + { + Ok(m) => m, + Err(_) => { + return Err(RestError::NotFound { + resource_type: "export-job".to_string(), + id: job_id.to_string(), + }); + } + }; + if !owns_job(principal.as_ref(), meta.owner_subject.as_deref()) { + return Err(RestError::NotFound { + resource_type: "export-job".to_string(), + id: job_id.to_string(), + }); + } + + match meta.status { + ExportStatus::Accepted | ExportStatus::InProgress => { + let progress = jobs + .get_export_status(tenant.context(), &job_id) + .await + .map_err(map_storage_err)?; + let x_progress = progress + .current_type + .clone() + .unwrap_or_else(|| format!("{:.0}%", progress.overall_progress() * 100.0)); + Response::builder() + .status(StatusCode::ACCEPTED) + .header("X-Progress", x_progress) + .header("Retry-After", "120") + .body(Body::empty()) + .map_err(|e| RestError::InternalError { + message: e.to_string(), + }) + } + ExportStatus::Complete => { + let raw = jobs + .get_export_manifest(tenant.context(), &job_id) + .await + .map_err(map_storage_err)?; + let ttl = Duration::from_secs(cfg.file_url_ttl_secs); + let mut output_files = Vec::new(); + let mut error_files = Vec::new(); + let mut requires_token = true; + for entry in &raw.output { + let url = output + .download_url(&entry.key, ttl) + .await + .map_err(map_storage_err)?; + requires_token = url.requires_access_token; + output_files.push(ExportOutputFile { + resource_type: entry.resource_type.clone(), + url: url.url, + count: Some(entry.count), + }); + } + for entry in &raw.errors { + let url = output + .download_url(&entry.key, ttl) + .await + .map_err(map_storage_err)?; + error_files.push(ExportOutputFile { + resource_type: entry.resource_type.clone(), + url: url.url, + count: Some(entry.count), + }); + } + let manifest = ExportManifest { + transaction_time: raw.transaction_time, + request: raw.request_url, + requires_access_token: requires_token, + output: output_files, + error: error_files, + deleted: Vec::new(), + link: Vec::new(), + message: None, + extension: None, + }; + let body = serde_json::to_vec(&manifest).map_err(|e| RestError::InternalError { + message: e.to_string(), + })?; + Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/json") + .body(Body::from(body)) + .map_err(|e| RestError::InternalError { + message: e.to_string(), + }) + } + ExportStatus::Error => Err(RestError::InternalError { + message: "export job failed".to_string(), + }), + ExportStatus::Cancelled => Err(RestError::NotFound { + resource_type: "export-job".to_string(), + id: job_id.to_string(), + }), + } +} + +/// `DELETE /export-status/{job_id}` — cancel + delete a job. +pub async fn export_cancel_handler( + State(state): State>, + Path(job_id): Path, + tenant: TenantExtractor, + request: Request, +) -> RestResult +where + S: ExportResourceStore + Send + Sync + 'static, +{ + let cfg = state.bulk_export_config(); + if !cfg.enabled { + return Err(not_implemented()); + } + let jobs = state.bulk_export_jobs().ok_or_else(not_implemented)?; + let output = state.bulk_export_output().ok_or_else(not_implemented)?; + let principal = request.extensions().get::().cloned(); + let job_id = ExportJobId::from_string(job_id); + + let meta = match jobs + .get_export_job_metadata(tenant.context(), &job_id) + .await + { + Ok(m) => m, + Err(_) => { + return Err(RestError::NotFound { + resource_type: "export-job".to_string(), + id: job_id.to_string(), + }); + } + }; + if !owns_job(principal.as_ref(), meta.owner_subject.as_deref()) { + return Err(RestError::NotFound { + resource_type: "export-job".to_string(), + id: job_id.to_string(), + }); + } + + // Cancel if still active (cooperative — worker observes it). + if meta.status.is_active() { + let _ = jobs.cancel_export(tenant.context(), &job_id).await; + } + // REST owns the two-step teardown: outputs first, then job rows. + output + .delete_job_outputs(tenant.context(), &job_id) + .await + .map_err(map_storage_err)?; + jobs.delete_export(tenant.context(), &job_id) + .await + .map_err(map_storage_err)?; + + emit_export_audit( + &state, + principal.as_ref(), + "delete", + job_id.as_str(), + &meta.level, + &[], + "0", + ) + .await; + + Response::builder() + .status(StatusCode::ACCEPTED) + .body(Body::empty()) + .map_err(|e| RestError::InternalError { + message: e.to_string(), + }) +} + +/// `GET /export-file/{job_id}/{part}` — HFS-served NDJSON download. +pub async fn export_download_handler( + State(state): State>, + Path((job_id, part)): Path<(String, String)>, + tenant: TenantExtractor, + request: Request, +) -> RestResult +where + S: ExportResourceStore + Send + Sync + 'static, +{ + let cfg = state.bulk_export_config(); + if !cfg.enabled { + return Err(not_implemented()); + } + let jobs = state.bulk_export_jobs().ok_or_else(not_implemented)?; + let output = state.bulk_export_output().ok_or_else(not_implemented)?; + let file_auth = state.bulk_export_file_auth().ok_or_else(not_implemented)?; + let principal = request.extensions().get::().cloned(); + let job_id = ExportJobId::from_string(job_id); + + let file_meta = jobs + .get_export_file_metadata(tenant.context(), &job_id, &part) + .await + .map_err(|_| RestError::NotFound { + resource_type: "export-file".to_string(), + id: format!("{job_id}/{part}"), + })?; + + file_auth + .authorize_download( + principal.as_ref(), + tenant.context(), + file_meta.job_owner_subject.as_deref(), + &file_meta, + ) + .await + .map_err(|e| RestError::Forbidden { + message: e.to_string(), + })?; + + emit_export_audit( + &state, + principal.as_ref(), + "download", + job_id.as_str(), + &ExportLevel::System, + std::slice::from_ref(&file_meta.resource_type), + "0", + ) + .await; + + let mut reader = output + .open_reader(&file_meta.key) + .await + .map_err(map_storage_err)?; + let mut bytes = Vec::new(); + tokio::io::AsyncReadExt::read_to_end(&mut reader, &mut bytes) + .await + .map_err(|e| RestError::InternalError { + message: format!("failed to read export file: {e}"), + })?; + + Response::builder() + .status(StatusCode::OK) + .header("Content-Type", "application/fhir+ndjson") + .body(Body::from(bytes)) + .map_err(|e| RestError::InternalError { + message: e.to_string(), + }) +} + +/// Emits a bulk-export lifecycle `AuditEvent` when an audit sink is configured. +async fn emit_export_audit( + state: &AppState, + principal: Option<&Principal>, + operation: &str, + job_id: &str, + level: &ExportLevel, + resource_types: &[String], + outcome: &str, +) where + S: ResourceStorage, +{ + let Some(sink) = state.audit_sink() else { + return; + }; + let mut builder = helios_audit::AuditEventBuilder::new(state.audit_source_observer()) + .event_type( + "http://terminology.hl7.org/CodeSystem/audit-event-type", + "object", + ) + .action(helios_audit::AuditAction::Execute) + .outcome(outcome) + .detail("audit-operation", "bulk-export") + .detail("bulk-export-operation", operation) + .detail("job-id", job_id) + .detail("export-level", level.to_string()); + if !resource_types.is_empty() { + builder = builder.detail("resource-types", resource_types.join(",")); + } + if let Some(p) = principal { + builder = builder.agent(&p.subject, None, true); + } + sink.record(builder.build()).await; +} + +/// Ownership check: the principal owns the job, holds a `system/*` scope, or +/// auth is disabled (no principal). +fn owns_job(principal: Option<&Principal>, owner_subject: Option<&str>) -> bool { + match principal { + None => true, // auth disabled — no ownership enforcement + Some(p) => { + owner_subject == Some(p.subject.as_str()) + || p.scopes + .scopes() + .iter() + .any(|s| s.resource_type == helios_auth::scope::ResourceTypeSpec::Wildcard) + } + } +} From b975df78d1b03a64dbb7387ccf4363313fe4ae73 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:47:54 -0400 Subject: [PATCH 32/81] chore(rest): register bulk_export handler module + re-exports --- crates/rest/src/handlers/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/rest/src/handlers/mod.rs b/crates/rest/src/handlers/mod.rs index 3fedc4e47..8549f79fe 100644 --- a/crates/rest/src/handlers/mod.rs +++ b/crates/rest/src/handlers/mod.rs @@ -16,6 +16,7 @@ //! - [`health`] - Health check endpoint pub mod batch; +pub mod bulk_export; pub mod capabilities; pub mod compartment; pub mod create; @@ -52,6 +53,10 @@ pub(crate) fn extract_patient_from_resource( // Re-export handlers for convenience pub use batch::batch_handler; +pub use bulk_export::{ + export_cancel_handler, export_download_handler, export_status_handler, + group_export_kickoff_handler, patient_export_kickoff_handler, system_export_kickoff_handler, +}; pub use capabilities::capabilities_handler; pub use compartment::compartment_search_handler; pub use create::create_handler; From e37a1eded13da0e3b40248e733f364fdb52355a6 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:47:54 -0400 Subject: [PATCH 33/81] feat(rest): advertise $export operations in CapabilityStatement System-level operations on rest[0].operation: export, patient-export, group-export. Per-resource operations on Patient and Group (rest[0].resource[].operation: export). Top-level instantiates points at the Bulk Data IG CapabilityStatement. All edits gated on HFS_BULK_EXPORT_ENABLED. --- crates/rest/src/handlers/capabilities.rs | 69 +++++++++++++++++++----- 1 file changed, 55 insertions(+), 14 deletions(-) diff --git a/crates/rest/src/handlers/capabilities.rs b/crates/rest/src/handlers/capabilities.rs index 3a99f1b93..664d7c9c8 100644 --- a/crates/rest/src/handlers/capabilities.rs +++ b/crates/rest/src/handlers/capabilities.rs @@ -139,7 +139,18 @@ where formats.push("application/fhir+xml"); } - serde_json::json!({ + let mut operations = vec![ + serde_json::json!({ + "name": "validate", + "definition": "http://hl7.org/fhir/OperationDefinition/Resource-validate" + }), + serde_json::json!({ + "name": "versions", + "definition": "http://hl7.org/fhir/OperationDefinition/CapabilityStatement-versions" + }), + ]; + + let mut statement = serde_json::json!({ "resourceType": "CapabilityStatement", "status": "active", "date": chrono::Utc::now().to_rfc3339(), @@ -164,23 +175,34 @@ where { "code": "history-system" }, { "code": "search-system" } ], - "operation": [ - { - "name": "validate", - "definition": "http://hl7.org/fhir/OperationDefinition/Resource-validate" - }, - { - "name": "versions", - "definition": "http://hl7.org/fhir/OperationDefinition/CapabilityStatement-versions" - } - ] }] - }) + }); + + // Advertise Bulk Data Export operations when enabled. + if state.bulk_export_config().enabled { + operations.push(serde_json::json!({ + "name": "export", + "definition": "http://hl7.org/fhir/uv/bulkdata/OperationDefinition/export" + })); + operations.push(serde_json::json!({ + "name": "patient-export", + "definition": "http://hl7.org/fhir/uv/bulkdata/OperationDefinition/patient-export" + })); + operations.push(serde_json::json!({ + "name": "group-export", + "definition": "http://hl7.org/fhir/uv/bulkdata/OperationDefinition/group-export" + })); + statement["instantiates"] = + serde_json::json!(["http://hl7.org/fhir/uv/bulkdata/CapabilityStatement/bulk-data"]); + } + + statement["rest"][0]["operation"] = serde_json::Value::Array(operations); + statement } /// Builds the capability entry for a resource type. fn build_resource_capability(resource_type: &str) -> serde_json::Value { - serde_json::json!({ + let mut entry = serde_json::json!({ "type": resource_type, "profile": format!("http://hl7.org/fhir/StructureDefinition/{}", resource_type), "interaction": [ @@ -204,7 +226,26 @@ fn build_resource_capability(resource_type: &str) -> serde_json::Value { "searchInclude": ["*"], "searchRevInclude": ["*"], "searchParam": build_common_search_params() - }) + }); + // Bulk Data Access IG: per-resource `$export` operation entries on Patient + // and Group, in addition to the system-level `$export` advertised at + // `rest[0].operation`. + match resource_type { + "Patient" => { + entry["operation"] = serde_json::json!([{ + "name": "export", + "definition": "http://hl7.org/fhir/uv/bulkdata/OperationDefinition/patient-export" + }]); + } + "Group" => { + entry["operation"] = serde_json::json!([{ + "name": "export", + "definition": "http://hl7.org/fhir/uv/bulkdata/OperationDefinition/group-export" + }]); + } + _ => {} + } + entry } /// Builds common search parameters supported by all resources. From faf8d2a64faae838cb5e41fc8cd8ed59cb6e748e Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:47:55 -0400 Subject: [PATCH 34/81] feat(rest): register bulk-export routes + add export-data-provider trait bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit /$export, /Patient/$export, /Group/{id}/$export (each accepts GET + POST), /export-status/{job_id} (GET + DELETE), and /export-file/{job_id}/{part} (GET) — registered before the /{resource_type} catch-all. Adds ExportDataProvider / PatientExportProvider / GroupExportProvider to the create_fhir_router and create_routes / create_standard_routes / create_url_tenant_routes / create_combined_routes S bound. --- crates/rest/src/routing/fhir_routes.rs | 39 ++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/crates/rest/src/routing/fhir_routes.rs b/crates/rest/src/routing/fhir_routes.rs index 2b79524c0..c2fce31fb 100644 --- a/crates/rest/src/routing/fhir_routes.rs +++ b/crates/rest/src/routing/fhir_routes.rs @@ -59,6 +59,9 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -78,6 +81,9 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -96,6 +102,9 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -119,6 +128,9 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -181,6 +193,9 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -198,6 +213,30 @@ where ) .route("/_history", get(handlers::history_system_handler::)) .route("/", post(handlers::batch_handler::)) + // Bulk Data Export ($export) — operation routes precede the catch-all. + .route( + "/$export", + get(handlers::system_export_kickoff_handler::) + .post(handlers::system_export_kickoff_handler::), + ) + .route( + "/Patient/$export", + get(handlers::patient_export_kickoff_handler::) + .post(handlers::patient_export_kickoff_handler::), + ) + .route( + "/Group/{id}/$export", + get(handlers::group_export_kickoff_handler::) + .post(handlers::group_export_kickoff_handler::), + ) + .route( + "/export-status/{job_id}", + get(handlers::export_status_handler::).delete(handlers::export_cancel_handler::), + ) + .route( + "/export-file/{job_id}/{part}", + get(handlers::export_download_handler::), + ) // Type-level routes .route("/{resource_type}", get(handlers::search_get_handler::)) .route("/{resource_type}", post(handlers::create_handler::)) From bbb39be82de41766b589d62aea0c9e2ebe47fb01 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:47:55 -0400 Subject: [PATCH 35/81] feat(rest): add create_app_with_auth_and_bulk_export + BulkExportBundle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactors the app builder so create_app_with_auth keeps its existing storage: S signature while a new create_app_with_auth_and_bulk_export(storage: Arc, …, bundle) shares the inner build_app implementation. BulkExportBundle bundles the job store, output store, and file authorizer the bootstrap wires into AppState. Adds the export-data-provider trait bounds to create_app / create_app_with_config / create_app_with_auth and exposes bulk_export_auth as a new pub mod. --- crates/rest/src/lib.rs | 95 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 94 insertions(+), 1 deletion(-) diff --git a/crates/rest/src/lib.rs b/crates/rest/src/lib.rs index 71f45ebbe..766c8e9a2 100644 --- a/crates/rest/src/lib.rs +++ b/crates/rest/src/lib.rs @@ -138,6 +138,7 @@ #![warn(missing_docs)] #![warn(rustdoc::missing_crate_level_docs)] +pub mod bulk_export_auth; pub mod config; pub mod error; pub mod extractors; @@ -196,6 +197,9 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -234,6 +238,9 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -247,6 +254,17 @@ where ) } +/// The bulk-export job store, output store, and download authorizer, wired +/// into [`AppState`] by [`create_app_with_auth_and_bulk_export`]. +pub struct BulkExportBundle { + /// Job-state store (claim + worker storage + lifecycle). + pub jobs: Arc, + /// Output store for NDJSON parts. + pub output: Arc, + /// Download authorizer. + pub file_auth: Arc, +} + /// Creates the Axum application with custom configuration and optional authentication. /// /// When `auth_state` is `Some`, authentication and authorization middleware @@ -267,6 +285,75 @@ where + SearchProvider + InstanceHistoryProvider + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + + Send + + Sync + + 'static, +{ + build_app( + Arc::new(storage), + config, + auth_config, + auth_state, + audit_state, + None, + ) +} + +/// Like [`create_app_with_auth`], but also wires the bulk-export subsystem +/// (job store, output store, download authorizer) into the application state. +pub fn create_app_with_auth_and_bulk_export( + storage: Arc, + config: ServerConfig, + auth_config: helios_auth::AuthConfig, + auth_state: Option>, + audit_state: Option>, + bulk_export: BulkExportBundle, +) -> Router +where + S: ResourceStorage + + ConditionalStorage + + SearchProvider + + InstanceHistoryProvider + + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + + Send + + Sync + + 'static, +{ + build_app( + storage, + config, + auth_config, + auth_state, + audit_state, + Some(bulk_export), + ) +} + +/// Internal app builder shared by [`create_app_with_auth`] and +/// [`create_app_with_auth_and_bulk_export`]. +fn build_app( + storage: Arc, + config: ServerConfig, + auth_config: helios_auth::AuthConfig, + auth_state: Option>, + audit_state: Option>, + bulk_export: Option, +) -> Router +where + S: ResourceStorage + + ConditionalStorage + + SearchProvider + + InstanceHistoryProvider + + BundleProvider + + helios_persistence::core::ExportDataProvider + + helios_persistence::core::PatientExportProvider + + helios_persistence::core::GroupExportProvider + Send + Sync + 'static, @@ -296,7 +383,7 @@ where // Create application state let state = AppState::with_auth_and_audit( - Arc::new(storage), + storage, config.clone(), auth_config, auth_state.clone(), @@ -304,6 +391,12 @@ where app_audit_source_observer, ); + // Wire the bulk-export subsystem if provided. + let state = match bulk_export { + Some(b) => state.with_bulk_export(b.jobs, b.output, b.file_auth), + None => state, + }; + // Inject subscription engine if enabled #[cfg(feature = "subscriptions")] let state = { From 8dd0ba17f0217c127786bfb51627bdc639323ed4 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:48:31 -0400 Subject: [PATCH 36/81] test(rest): add bulk-export integration tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Full kickoff→poll→manifest→download→delete lifecycle, _typeFilter validation, strict/lenient unsupported-parameter handling, capability statement advertisement, and metadata-lookup failure paths. Drives a DefaultExportWorker synchronously inside the test against an in-memory SQLite + LocalFsOutputStore. --- crates/rest/tests/bulk_export.rs | 338 +++++++++++++++++++++++++++++++ 1 file changed, 338 insertions(+) create mode 100644 crates/rest/tests/bulk_export.rs diff --git a/crates/rest/tests/bulk_export.rs b/crates/rest/tests/bulk_export.rs new file mode 100644 index 000000000..a97d3dc7c --- /dev/null +++ b/crates/rest/tests/bulk_export.rs @@ -0,0 +1,338 @@ +//! Integration tests for the FHIR Bulk Data Export (`$export`) endpoints. +//! +//! Exercises the kick-off → poll → manifest → download → delete lifecycle for +//! all three export levels, plus parameter validation, the `ExportStatus` → +//! HTTP mapping, and the unsupported-parameter behavior. + +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; + +use axum::http::StatusCode; +use axum_test::TestServer; +use helios_fhir::FhirVersion; +use helios_persistence::backends::local_fs::LocalFsOutputStore; +use helios_persistence::backends::sqlite::{SqliteBackend, SqliteBackendConfig}; +use helios_persistence::core::{ + BulkExportJobStore, DefaultExportWorker, ExportClaimStrategy, ExportOutputStore, + ResourceStorage, WorkerId, +}; +use helios_persistence::tenant::{TenantContext, TenantId, TenantPermissions}; +use helios_rest::ServerConfig; +use helios_rest::bulk_export_auth::BearerScopeAuth; +use helios_rest::config::{MultitenancyConfig, TenantRoutingMode}; +use serde_json::{Value, json}; + +/// Builds a test server with the bulk-export subsystem wired in, plus the +/// SQLite backend and the local-FS output store (for driving a worker). +async fn create_bulk_export_server() -> ( + TestServer, + Arc, + Arc, + tempfile::TempDir, +) { + let data_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(|p| p.parent()) + .map(|p| p.join("data")) + .unwrap_or_else(|| PathBuf::from("data")); + + let backend_config = SqliteBackendConfig { + data_dir: Some(data_dir), + ..Default::default() + }; + let backend = Arc::new( + SqliteBackend::with_config(":memory:", backend_config).expect("create SQLite backend"), + ); + backend.init_schema().expect("init schema"); + + let tmp = tempfile::tempdir().expect("tempdir"); + let output = Arc::new(LocalFsOutputStore::new(tmp.path(), "http://localhost:8080")); + let file_auth = Arc::new(BearerScopeAuth); + + let config = ServerConfig { + multitenancy: MultitenancyConfig { + routing_mode: TenantRoutingMode::HeaderOnly, + ..Default::default() + }, + base_url: "http://localhost:8080".to_string(), + default_tenant: "test-tenant".to_string(), + ..ServerConfig::for_testing() + }; + + let state = helios_rest::AppState::new(Arc::clone(&backend), config).with_bulk_export( + backend.clone() as Arc, + output.clone() as Arc, + file_auth, + ); + let app = helios_rest::routing::fhir_routes::create_routes(state); + let server = TestServer::new(app).expect("create test server"); + + (server, backend, output, tmp) +} + +fn test_tenant() -> TenantContext { + TenantContext::new( + TenantId::new("test-tenant"), + TenantPermissions::full_access(), + ) +} + +/// Drains all currently-claimable export jobs by running a worker synchronously. +async fn drain_workers(backend: &Arc, output: &Arc) { + let worker_id = WorkerId::new("test-worker"); + let worker = DefaultExportWorker::new( + backend.clone(), + backend.clone(), + output.clone(), + worker_id.clone(), + ); + while let Some(lease) = backend + .claim_next(&worker_id, Duration::from_secs(60)) + .await + .expect("claim_next") + { + worker.run_job(lease).await.expect("run_job"); + } +} + +/// Seeds N Patient resources. +async fn seed_patients(backend: &Arc, n: usize) { + let tenant = test_tenant(); + for i in 0..n { + backend + .create( + &tenant, + "Patient", + json!({"resourceType": "Patient", "id": format!("p{i}")}), + FhirVersion::default(), + ) + .await + .expect("seed patient"); + } +} + +#[tokio::test] +async fn test_system_export_full_lifecycle() { + let (server, backend, output, _tmp) = create_bulk_export_server().await; + seed_patients(&backend, 3).await; + + // Kick-off — requires Prefer: respond-async. + let resp = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_type", "Patient") + .await; + assert_eq!(resp.status_code(), StatusCode::ACCEPTED); + let status_url = resp + .headers() + .get("content-location") + .expect("Content-Location header") + .to_str() + .unwrap() + .to_string(); + let status_path = status_url.strip_prefix("http://localhost:8080").unwrap(); + + // Poll before the worker runs — still 202. + let polling = server + .get(status_path) + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(polling.status_code(), StatusCode::ACCEPTED); + assert!(polling.headers().get("retry-after").is_some()); + + // Run the worker. + drain_workers(&backend, &output).await; + + // Poll again — now 200 + manifest. + let done = server + .get(status_path) + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(done.status_code(), StatusCode::OK); + let manifest: Value = done.json(); + assert!(manifest["transactionTime"].is_string()); + assert_eq!( + manifest["request"], + "http://localhost:8080/$export?_type=Patient" + ); + assert_eq!(manifest["requiresAccessToken"], true); + let output_files = manifest["output"].as_array().expect("output array"); + let total: u64 = output_files + .iter() + .map(|f| f["count"].as_u64().unwrap_or(0)) + .sum(); + assert_eq!(total, 3); + + // Download the first output file. + let file_url = output_files[0]["url"].as_str().unwrap(); + let file_path = file_url.strip_prefix("http://localhost:8080").unwrap(); + let download = server + .get(file_path) + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(download.status_code(), StatusCode::OK); + assert_eq!( + download.headers().get("content-type").unwrap(), + "application/fhir+ndjson" + ); + assert_eq!(download.text().lines().count(), 3); + + // Delete the job. + let deleted = server + .delete(status_path) + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(deleted.status_code(), StatusCode::ACCEPTED); + + // Status URL is now gone. + let gone = server + .get(status_path) + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(gone.status_code(), StatusCode::NOT_FOUND); +} + +#[tokio::test] +async fn test_patient_and_group_export_levels() { + let (server, backend, output, _tmp) = create_bulk_export_server().await; + seed_patients(&backend, 2).await; + + // Patient-level kick-off. + let resp = server + .get("/Patient/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .await; + assert_eq!(resp.status_code(), StatusCode::ACCEPTED); + + // Group-level kick-off. + let tenant = test_tenant(); + backend + .create( + &tenant, + "Group", + json!({"resourceType": "Group", "id": "g1", "member": []}), + FhirVersion::default(), + ) + .await + .unwrap(); + let resp = server + .get("/Group/g1/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .await; + assert_eq!(resp.status_code(), StatusCode::ACCEPTED); + + drain_workers(&backend, &output).await; +} + +#[tokio::test] +async fn test_kickoff_requires_respond_async() { + let (server, _backend, _output, _tmp) = create_bulk_export_server().await; + let resp = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(resp.status_code(), StatusCode::BAD_REQUEST); +} + +#[tokio::test] +async fn test_unsupported_output_format_rejected() { + let (server, _backend, _output, _tmp) = create_bulk_export_server().await; + let resp = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_outputFormat", "text/csv") + .await; + assert_eq!(resp.status_code(), StatusCode::BAD_REQUEST); +} + +#[tokio::test] +async fn test_unsupported_param_strict_vs_lenient() { + let (server, _backend, _output, _tmp) = create_bulk_export_server().await; + + // strict → 400 + let strict = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async, handling=strict") + .add_query_param("includeAssociatedData", "LatestProvenanceResources") + .await; + assert_eq!(strict.status_code(), StatusCode::BAD_REQUEST); + + // no handling directive (lenient default) → accepted + let lenient = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("includeAssociatedData", "LatestProvenanceResources") + .await; + assert_eq!(lenient.status_code(), StatusCode::ACCEPTED); +} + +#[tokio::test] +async fn test_type_filter_validation() { + let (server, _backend, _output, _tmp) = create_bulk_export_server().await; + + // _typeFilter whose resource type is not in _type → 400 + let mismatch = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_type", "Patient") + .add_query_param("_typeFilter", "Observation?status=final") + .await; + assert_eq!(mismatch.status_code(), StatusCode::BAD_REQUEST); + + // _typeFilter carrying a result-control param → 400 + let bad_param = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_type", "Observation") + .add_query_param("_typeFilter", "Observation?_sort=date") + .await; + assert_eq!(bad_param.status_code(), StatusCode::BAD_REQUEST); +} + +#[tokio::test] +async fn test_status_and_download_unknown_job() { + let (server, _backend, _output, _tmp) = create_bulk_export_server().await; + + let status = server + .get("/export-status/nonexistent") + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(status.status_code(), StatusCode::NOT_FOUND); + + let download = server + .get("/export-file/nonexistent/Patient-0") + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(download.status_code(), StatusCode::NOT_FOUND); +} + +#[tokio::test] +async fn test_capability_statement_advertises_export() { + let (server, _backend, _output, _tmp) = create_bulk_export_server().await; + let resp = server + .get("/metadata") + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(resp.status_code(), StatusCode::OK); + let cs: Value = resp.json(); + let ops = cs["rest"][0]["operation"] + .as_array() + .expect("operation array"); + let names: Vec<&str> = ops.iter().filter_map(|o| o["name"].as_str()).collect(); + assert!(names.contains(&"export")); + assert!(names.contains(&"patient-export")); + assert!(names.contains(&"group-export")); + assert_eq!( + cs["instantiates"][0], + "http://hl7.org/fhir/uv/bulkdata/CapabilityStatement/bulk-data" + ); +} From abfc548339679cac04c8cbbf33646860af00ae56 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:48:31 -0400 Subject: [PATCH 37/81] chore(hfs): add chrono dep for the bulk-export cleanup task --- crates/hfs/Cargo.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/hfs/Cargo.toml b/crates/hfs/Cargo.toml index 50bd94cee..14745c18e 100644 --- a/crates/hfs/Cargo.toml +++ b/crates/hfs/Cargo.toml @@ -71,6 +71,9 @@ parking_lot = "0.12" # Error handling anyhow = "1.0" async-trait = "0.1" + +# Time (used by the bulk-export cleanup task) +chrono = "0.4" serde_json = "1" # Vendor OpenSSL when cross-compiling for Linux ARM64 (the macOS runner From 405d481f33a4919a86b450b3a2cfecbf6bfbf822 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:48:31 -0400 Subject: [PATCH 38/81] feat(hfs): wire bulk-export job store, output store, worker pool into bootstrap ServerConfig::from_env() (not ::parse) so multitenancy + bulk_export sub-structs are populated from env. Generic build_bulk_export helper handles both HFS_BULK_EXPORT_BACKEND=embedded (dedicated SqliteBackend job store) and =postgres-s3 (PostgresBackend job store + S3OutputStore), gated on any(sqlite, postgres). spawn_export_workers launches HFS_BULK_EXPORT_WORKER_CONCURRENCY tasks claiming via ExportClaimStrategy and a periodic cleanup task that pages through list_expired_exports + the two-step output-then-job teardown. Wired into start_sqlite and start_postgres. --- crates/hfs/src/main.rs | 274 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 269 insertions(+), 5 deletions(-) diff --git a/crates/hfs/src/main.rs b/crates/hfs/src/main.rs index 8860bc258..467812d87 100644 --- a/crates/hfs/src/main.rs +++ b/crates/hfs/src/main.rs @@ -20,7 +20,6 @@ use std::sync::Arc; -use clap::Parser; use helios_audit::{ AuditBackend, AuditConfig, AuditMiddlewareState, AuditSink, ExclusionFilter, lifecycle, }; @@ -31,6 +30,17 @@ use helios_rest::{ }; use tracing::info; +#[cfg(feature = "sqlite")] +use helios_persistence::backends::local_fs::LocalFsOutputStore; +#[cfg(feature = "sqlite")] +use helios_persistence::core::{ + BulkExportJobStore, DefaultExportWorker, ExportOutputStore, WorkerId, +}; +#[cfg(any(feature = "sqlite", feature = "postgres"))] +use helios_rest::bulk_export_auth::BearerScopeAuth; +#[cfg(any(feature = "sqlite", feature = "postgres"))] +use helios_rest::create_app_with_auth_and_bulk_export; + #[cfg(feature = "sqlite")] use helios_persistence::backends::sqlite::{SqliteBackend, SqliteBackendConfig}; @@ -638,7 +648,10 @@ async fn init_audit( #[tokio::main] async fn main() -> anyhow::Result<()> { - let config = ServerConfig::parse(); + // Use `from_env()` (not `parse()`) so `multitenancy` and `bulk_export` + // sub-structs — both `#[arg(skip)]` for clap — are populated from + // their `HFS_*` environment variables. + let config = ServerConfig::from_env(); init_logging(&config.log_level); if let Err(errors) = config.validate() { @@ -739,10 +752,25 @@ async fn start_sqlite( auth_state: Option>, audit_state: Option>, ) -> anyhow::Result<()> { - let backend = create_sqlite_backend(&config)?; let serve_audit_state = audit_state.clone(); + let backend = Arc::new(create_sqlite_backend(&config)?); + + if let Some(bundle) = build_bulk_export(&config, backend.clone()).await? { + let app = create_app_with_auth_and_bulk_export( + backend, + config.clone(), + auth_config, + auth_state, + audit_state, + bundle, + ); + return serve(app, &config, serve_audit_state).await; + } + let app = create_app_with_auth( - backend, + Arc::try_unwrap(backend).unwrap_or_else(|_| { + unreachable!("backend Arc is uniquely owned when bulk export is disabled") + }), config.clone(), auth_config, auth_state, @@ -751,6 +779,227 @@ async fn start_sqlite( serve(app, &config, serve_audit_state).await } +/// Builds the bulk-export subsystem (job store + output store + file auth) for +/// a given resource-store data provider, spawning the in-process worker pool +/// and cleanup task. Returns `None` when bulk export is disabled. +/// +/// Supports both the `embedded` backend (a dedicated SQLite job store + local +/// filesystem output) and the `postgres-s3` backend (a PostgreSQL job store + +/// S3 output). +#[cfg(any(feature = "sqlite", feature = "postgres"))] +async fn build_bulk_export( + config: &ServerConfig, + data: Arc, +) -> anyhow::Result> +where + Dp: helios_persistence::core::ExportResourceProvider + 'static, +{ + let cfg = config.bulk_export.clone(); + info!( + "Bulk export config: enabled={} backend={} output_backend={} requires_access_token={}", + cfg.enabled, cfg.backend, cfg.output_backend, cfg.requires_access_token + ); + if !cfg.enabled { + return Ok(None); + } + + // --- Output store --------------------------------------------------- + let output: Arc = match cfg.output_backend.as_str() { + "local-fs" => { + let output_dir = cfg + .output_dir + .clone() + .or_else(|| { + config + .data_dir + .as_ref() + .map(|d| format!("{}/exports", d.display())) + }) + .unwrap_or_else(|| "./data/exports".to_string()); + Arc::new(LocalFsOutputStore::new(output_dir, config.base_url.clone())) + } + "s3" => { + #[cfg(feature = "s3")] + { + use helios_persistence::backends::s3::{ + AccessTokenMode, AwsS3Client, AwsS3ClientOptions, S3OutputStore, + }; + let bucket = cfg.s3_bucket.clone().ok_or_else(|| { + anyhow::anyhow!("HFS_BULK_EXPORT_S3_BUCKET is required for OUTPUT_BACKEND=s3") + })?; + let region = std::env::var("HFS_BULK_EXPORT_S3_REGION") + .ok() + .or_else(|| std::env::var("HFS_S3_REGION").ok()); + let sdk_config = AwsS3Client::load_sdk_config(region.as_deref()).await; + let client = Arc::new(AwsS3Client::from_sdk_config_with_options( + &sdk_config, + AwsS3ClientOptions { + endpoint_url: std::env::var("HFS_BULK_EXPORT_S3_ENDPOINT").ok(), + force_path_style: parse_env_bool( + "HFS_BULK_EXPORT_S3_FORCE_PATH_STYLE", + false, + ), + }, + )); + Arc::new(S3OutputStore::new( + client, + bucket, + config.base_url.clone(), + AccessTokenMode::parse(&cfg.requires_access_token), + std::time::Duration::from_secs(cfg.file_url_ttl_secs), + )) + } + #[cfg(not(feature = "s3"))] + { + anyhow::bail!( + "HFS_BULK_EXPORT_OUTPUT_BACKEND=s3 requires the 's3' feature. \ + Build with: cargo build -p helios-hfs --features s3" + ); + } + } + other => anyhow::bail!("invalid HFS_BULK_EXPORT_OUTPUT_BACKEND '{other}'"), + }; + + // --- Job store ------------------------------------------------------ + let jobs: Arc = match cfg.backend.as_str() { + "embedded" => { + #[cfg(feature = "sqlite")] + { + let job_db = config + .bulk_export + .output_dir + .as_ref() + .map(|d| format!("{d}/bulk_export.db")) + .unwrap_or_else(|| "./data/bulk_export.db".to_string()); + let job_backend = SqliteBackend::with_config( + &job_db, + SqliteBackendConfig { + fhir_version: config.default_fhir_version, + data_dir: config.data_dir.clone(), + ..Default::default() + }, + )?; + job_backend.init_schema()?; + Arc::new(job_backend) + } + #[cfg(not(feature = "sqlite"))] + { + anyhow::bail!( + "HFS_BULK_EXPORT_BACKEND=embedded requires the 'sqlite' feature. \ + Build with: cargo build -p helios-hfs --features sqlite" + ); + } + } + "postgres-s3" => { + #[cfg(feature = "postgres")] + { + use helios_persistence::backends::postgres::PostgresBackend; + let url = std::env::var("HFS_BULK_EXPORT_DATABASE_URL") + .ok() + .or_else(|| config.database_url.clone()) + .ok_or_else(|| { + anyhow::anyhow!( + "HFS_BULK_EXPORT_DATABASE_URL (or HFS_DATABASE_URL) is required \ + for HFS_BULK_EXPORT_BACKEND=postgres-s3" + ) + })?; + let pg = PostgresBackend::from_connection_string(&url).await?; + pg.init_schema().await?; + Arc::new(pg) + } + #[cfg(not(feature = "postgres"))] + { + anyhow::bail!( + "HFS_BULK_EXPORT_BACKEND=postgres-s3 requires the 'postgres' feature. \ + Build with: cargo build -p helios-hfs --features postgres,s3" + ); + } + } + other => anyhow::bail!("invalid HFS_BULK_EXPORT_BACKEND '{other}'"), + }; + + spawn_export_workers(jobs.clone(), data, output.clone(), &cfg); + + Ok(Some(helios_rest::BulkExportBundle { + jobs, + output, + file_auth: Arc::new(BearerScopeAuth), + })) +} + +/// Spawns the in-process export worker pool and the periodic cleanup task. +#[cfg(any(feature = "sqlite", feature = "postgres"))] +fn spawn_export_workers( + jobs: Arc, + data: Arc, + output: Arc, + cfg: &helios_rest::config::BulkExportConfig, +) where + Dp: helios_persistence::core::ExportResourceProvider + 'static, +{ + if cfg.disable_local_worker { + info!("Bulk export in-process worker pool is disabled"); + return; + } + let lease = std::time::Duration::from_secs(cfg.lease_duration_secs); + for i in 0..cfg.worker_concurrency { + let jobs = jobs.clone(); + let data = data.clone(); + let output = output.clone(); + let worker_id = WorkerId::new(format!("hfs-worker-{i}")); + let exclude_newly_added = cfg.since_newly_added.eq_ignore_ascii_case("exclude"); + tokio::spawn(async move { + let worker = DefaultExportWorker::new(jobs.clone(), data, output, worker_id.clone()) + .with_exclude_since_newly_added(exclude_newly_added); + loop { + match jobs.claim_next(&worker_id, lease).await { + Ok(Some(claimed)) => { + if let Err(e) = worker.run_job(claimed).await { + tracing::error!("export worker job failed: {e}"); + } + } + Ok(None) => { + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + } + Err(e) => { + tracing::error!("export worker claim failed: {e}"); + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + } + } + } + }); + } + + // Periodic cleanup of expired job output. + let cleanup_jobs = jobs.clone(); + let cleanup_output = output.clone(); + let interval = std::time::Duration::from_secs(cfg.cleanup_interval_secs); + let output_ttl = std::time::Duration::from_secs(cfg.output_ttl_secs); + tokio::spawn(async move { + loop { + tokio::time::sleep(interval).await; + match cleanup_jobs + .list_expired_exports(chrono::Utc::now(), output_ttl, 100) + .await + { + Ok(expired) => { + for job in expired { + let _ = cleanup_output + .delete_job_outputs(&job.tenant, &job.job_id) + .await; + let _ = cleanup_jobs.delete_export(&job.tenant, &job.job_id).await; + } + } + Err(e) => tracing::error!("export cleanup scan failed: {e}"), + } + } + }); + info!( + "Bulk export worker pool started ({} workers)", + cfg.worker_concurrency + ); +} + /// Fallback when sqlite feature is not enabled. #[cfg(not(feature = "sqlite"))] async fn start_sqlite( @@ -918,10 +1167,25 @@ async fn start_postgres( }; backend.init_schema().await?; + let backend = Arc::new(backend); let serve_audit_state = audit_state.clone(); + if let Some(bundle) = build_bulk_export(&config, backend.clone()).await? { + let app = create_app_with_auth_and_bulk_export( + backend, + config.clone(), + auth_config, + auth_state, + audit_state, + bundle, + ); + return serve(app, &config, serve_audit_state).await; + } + let app = create_app_with_auth( - backend, + Arc::try_unwrap(backend).unwrap_or_else(|_| { + unreachable!("backend Arc is uniquely owned when bulk export is disabled") + }), config.clone(), auth_config, auth_state, From c338bb0e8a48bbcbbeed0945549eb1148e011e2c Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:48:31 -0400 Subject: [PATCH 39/81] docs(claude): document Bulk Data Export endpoints + env vars + recipes [skip ci] --- CLAUDE.md | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index 64acc14ea..457ebeb8e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -612,6 +612,99 @@ cargo run --bin hts -- import ./package.tgz \ --- +## Bulk Data Export ($export) + +HFS implements the [FHIR Bulk Data Access IG](https://build.fhir.org/ig/HL7/bulk-data/) +`$export` family asynchronously: kick-off → poll → manifest → download → delete. + +### Endpoints + +| Operation | Method | URL | +|-----------|--------|-----| +| system kick-off | GET / POST | `/$export` | +| patient kick-off | GET / POST | `/Patient/$export` | +| group kick-off | GET / POST | `/Group/{id}/$export` | +| status / manifest | GET | `/export-status/{job_id}` | +| cancel + delete | DELETE | `/export-status/{job_id}` | +| HFS-served download | GET | `/export-file/{job_id}/{type}-{part}` | + +All kick-offs require `Prefer: respond-async`. The default response is +`202 Accepted` with a `Content-Location` status URL. + +### Environment variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `HFS_BULK_EXPORT_ENABLED` | `true` | Master switch — when `false`, all `$export` endpoints return `501`. | +| `HFS_BULK_EXPORT_BACKEND` | `embedded` | Job-state backend: `embedded` (SQLite) or `postgres-s3`. | +| `HFS_BULK_EXPORT_OUTPUT_BACKEND` | `local-fs` | Output store: `local-fs` or `s3`. | +| `HFS_BULK_EXPORT_OUTPUT_DIR` | `${HFS_DATA_DIR}/exports` | Local-FS output root. | +| `HFS_BULK_EXPORT_S3_BUCKET` | (none) | S3 bucket — required when `OUTPUT_BACKEND=s3`. | +| `HFS_BULK_EXPORT_S3_ENDPOINT` | (AWS) | S3-compatible endpoint URL (e.g. MinIO). | +| `HFS_BULK_EXPORT_S3_FORCE_PATH_STYLE` | `false` | Path-style addressing for S3-compatible providers. | +| `HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN` | `auto` | Manifest posture: `auto` / `true` / `false`. **`false` is invalid with `local-fs`** (no pre-signed URLs). | +| `HFS_BULK_EXPORT_FILE_URL_TTL` | `3600` | Pre-signed download URL lifetime, seconds. | +| `HFS_BULK_EXPORT_OUTPUT_TTL` | `86400` | Output retention after job completion, seconds. | +| `HFS_BULK_EXPORT_WORKER_CONCURRENCY` | `2` | In-process worker pool size. | +| `HFS_BULK_EXPORT_DISABLE_LOCAL_WORKER` | `false` | Disable in-pod workers (use a separate exporter). | +| `HFS_BULK_EXPORT_MAX_CONCURRENT_PER_TENANT` | `4` | Per-tenant active-job cap (kick-off → 429 if exceeded). | +| `HFS_BULK_EXPORT_BATCH_SIZE` | `1000` | Resources per `fetch_export_batch`. | +| `HFS_BULK_EXPORT_LEASE_DURATION` | `60` | Initial lease length, seconds. Must be > heartbeat interval. | +| `HFS_BULK_EXPORT_HEARTBEAT_INTERVAL` | `20` | Worker heartbeat cadence, seconds. | +| `HFS_BULK_EXPORT_CLEANUP_INTERVAL` | `300` | Cleanup-task scan interval, seconds. | +| `HFS_BULK_EXPORT_SINCE_NEWLY_ADDED` | `include` | Group-export `_since` toggle (`include` / `exclude`). | +| `HFS_BULK_EXPORT_DATABASE_URL` | (from `HFS_DATABASE_URL`) | Postgres URL for the `postgres-s3` job store. | + +### Single-instance recipe (zero-config) + +``` +cargo run --bin hfs +``` + +This starts HFS with embedded bulk export: SQLite job state at +`./data/bulk_export.db`, NDJSON output under `./data/exports/`, and an +in-process worker pool. Kick off: + +``` +curl -H 'Prefer: respond-async' http://localhost:8080/Patient/\$export +``` + +### Multi-instance recipe (PostgreSQL + S3 / MinIO) + +``` +HFS_STORAGE_BACKEND=postgres \ +HFS_DATABASE_URL=postgresql://hfs:hfs@localhost/hfs \ +HFS_BULK_EXPORT_BACKEND=postgres-s3 \ +HFS_BULK_EXPORT_OUTPUT_BACKEND=s3 \ +HFS_BULK_EXPORT_S3_BUCKET=hfs-export \ +HFS_BULK_EXPORT_S3_ENDPOINT=http://localhost:9000 \ +HFS_BULK_EXPORT_S3_FORCE_PATH_STYLE=true \ +HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN=false \ +cargo run --bin hfs --features postgres,s3 +``` + +The full stack (HFS + Postgres + MinIO + Keycloak) is described by +`docker/bulk-export/docker-compose.yml`. See `.github/workflows/inferno-bulk-data.yml` +for the manual conformance run. + +### Behavior notes + +- **`_typeFilter`** is parsed and applied; unsupported result-control params + (`_sort`, `_include`, `_revinclude`, `_count`, `_elements`) inside a + `_typeFilter` query are rejected `400` regardless of `Prefer: handling`. +- **`_elements`** is implemented: subset to listed paths + `id` / + `resourceType` / `meta`, with a `SUBSETTED` `meta.tag` added. +- **Unsupported parameters** (`includeAssociatedData`, `organizeOutputBy`, + `allowPartialManifests`) — **`Prefer: handling=strict`** → `400`; absent or + `handling=lenient` (IG-default) → ignored with a warning logged. +- **`_since` + late-membership** for Group exports: `include` (default) + returns pre-`_since` resources for patients added after `_since`; `exclude` + is reserved for a follow-up that requires group-membership-history tracking. +- **Group export** flattens nested `Group/` members iteratively with a + visited-set cycle guard. + +--- + ## Docker Generic Dockerfile supporting all server binaries via `BINARY_NAME` build arg: From 3a3dbb024e9816ac175a469eb17ca6e071eefaa6 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:48:31 -0400 Subject: [PATCH 40/81] docs(hfs): add Bulk Data Export section to the hfs README [skip ci] --- crates/hfs/README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/crates/hfs/README.md b/crates/hfs/README.md index 7f2671c70..fcd51b434 100644 --- a/crates/hfs/README.md +++ b/crates/hfs/README.md @@ -9,11 +9,35 @@ An open test server is available at https://hfs.heliossoftware.com/ for experime - Full FHIR RESTful API support - Multiple FHIR version support - Pluggable storage backends (SQLite, PostgreSQL, MongoDB) +- **Bulk Data Export (`$export`)** — system / patient / group, asynchronous, + with an embedded single-instance default and a multi-instance + PostgreSQL + S3 topology (see *Bulk Data Export* below) - Content negotiation (JSON) - Conditional operations with ETag support - Multi-tenant support via X-Tenant-ID header - CORS support +## Bulk Data Export + +HFS implements the [FHIR Bulk Data Access IG](https://build.fhir.org/ig/HL7/bulk-data/) +`$export` operation. Single-instance (zero config) wires SQLite job state + +local-FS output + an in-process worker pool; multi-instance switches to a +PostgreSQL job store and an S3-compatible output store with pre-signed +download URLs. + +```bash +# Single-instance (default) +cargo run --bin hfs +curl -H 'Prefer: respond-async' \ + http://localhost:8080/Patient/\$export +``` + +The full configuration surface (`HFS_BULK_EXPORT_*` env vars, single- vs +multi-instance recipes, parameter behavior) is documented in `CLAUDE.md`. +A docker-compose stack for the multi-instance topology lives at +`docker/bulk-export/docker-compose.yml`, and a manual Inferno Bulk Data IG +v2.0.0 conformance workflow at `.github/workflows/inferno-bulk-data.yml`. + ## Installation ### From Source From e5717bfbd99a93c9010f35213eafbc3d9aef4d88 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:48:31 -0400 Subject: [PATCH 41/81] docs(roadmap): mark Bulk Data Export ($export) as shipped [skip ci] --- ROADMAP.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 8d4685ea7..936e201eb 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -61,7 +61,8 @@ Work that is currently underway or planned for the near term. |------|------|--------| | **Standards** | [Terminology — PostgreSQL backend](https://github.com/HeliosSoftware/hfs/discussions/54) | 🟡 In progress | | **Standards** | FHIR Validation engine | 🔵 Design | -| **Standards** | Bulk Data API — Import and export (`$export` / `$import` operations) | 🔵 Design | +| **Standards** | Bulk Data API — `$export` (system / patient / group), pre-signed S3 downloads | ✅ Shipped | +| **Standards** | Bulk Data API — `$bulk-submit` (ingestion) | 🔵 Design | | **Analytics** | [SQL on FHIR](https://sql-on-fhir.org/ig/latest/) — HFS integration and operations update | 🟡 In progress | | **Documentation** | [Project documentation website](https://github.com/HeliosSoftware/hfs/tree/docs/book-updates) | 🟡 In progress | @@ -182,7 +183,7 @@ Devitt's book defines nine key questions organizations must answer before choosi | Gap | Book Reference | Current Status | |-----|---------------|----------------| | **No patient-level access control** | Ch. 3 "Authorization" — SMART scopes are parsed but `patient/*` and `user/*` contexts are not enforced. Search results are not filtered by patient compartment. | 🔭 Later | -| **Bulk Data API not exposed via REST** | Appendix I "Bulk data processing" — persistence-layer traits exist across all backends but no `$export`/`$import` REST endpoints. The book notes bulk ingestion is important for hybrid architectures (Ch. 8). | 🗺️ Next | +| **Bulk Data API — `$export`** | Appendix I "Bulk data processing" — `$export` (system / patient / group) is now exposed via the REST layer with an embedded SQLite-backed worker pool by default and an optional Postgres + S3 multi-instance topology. `$bulk-submit` (ingestion) remains pending. | ✅ Shipped (export) / 🗺️ Next (submit) | #### Moderate From e35df04ee4a92ae60ebd5ba831b4df7eda0289a4 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:48:32 -0400 Subject: [PATCH 42/81] feat(docker): add bulk-export multi-instance docker-compose stack HFS + PostgreSQL + MinIO + Keycloak, configured for HFS_BULK_EXPORT_BACKEND=postgres-s3. Used by the manual Inferno Bulk Data conformance workflow and as the multi-instance smoke substrate. --- docker/bulk-export/docker-compose.yml | 99 +++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 docker/bulk-export/docker-compose.yml diff --git a/docker/bulk-export/docker-compose.yml b/docker/bulk-export/docker-compose.yml new file mode 100644 index 000000000..c42bf08df --- /dev/null +++ b/docker/bulk-export/docker-compose.yml @@ -0,0 +1,99 @@ +# Multi-instance Bulk Data Export stack: HFS + PostgreSQL + MinIO + Keycloak. +# +# This is the substrate for Inferno Bulk Data Test Kit conformance runs and +# for manual multi-instance smoke testing. Bring it up with: +# +# docker compose -f docker/bulk-export/docker-compose.yml up --build +# +# Then point a Bulk Data client (Inferno or curl) at http://localhost:8080. + +services: + postgres: + image: postgres:16 + environment: + POSTGRES_USER: hfs + POSTGRES_PASSWORD: hfs + POSTGRES_DB: hfs + healthcheck: + test: ["CMD-SHELL", "pg_isready -U hfs"] + interval: 5s + timeout: 5s + retries: 10 + ports: + - "5432:5432" + + minio: + image: minio/minio:latest + command: server /data --console-address ":9001" + environment: + MINIO_ROOT_USER: hfs-minio + MINIO_ROOT_PASSWORD: hfs-minio-secret + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 5s + timeout: 5s + retries: 10 + ports: + - "9000:9000" + - "9001:9001" + + # Creates the export bucket on startup. + minio-bootstrap: + image: minio/mc:latest + depends_on: + minio: + condition: service_healthy + entrypoint: > + /bin/sh -c " + mc alias set local http://minio:9000 hfs-minio hfs-minio-secret && + mc mb -p local/hfs-export || true + " + + keycloak: + image: quay.io/keycloak/keycloak:26.1 + command: ["start-dev", "--import-realm"] + environment: + KEYCLOAK_ADMIN: admin + KEYCLOAK_ADMIN_PASSWORD: admin + volumes: + - ../keycloak/realm.json:/opt/keycloak/data/import/realm.json:ro + ports: + - "8180:8080" + healthcheck: + test: ["CMD-SHELL", "exec 3<>/dev/tcp/localhost/8080; echo OK >&3"] + interval: 5s + timeout: 5s + retries: 20 + + hfs: + build: + context: ../.. + args: + BINARY_NAME: hfs + depends_on: + postgres: + condition: service_healthy + minio-bootstrap: + condition: service_completed_successfully + keycloak: + condition: service_started + environment: + HFS_SERVER_HOST: 0.0.0.0 + HFS_SERVER_PORT: 8080 + HFS_BASE_URL: http://localhost:8080 + HFS_STORAGE_BACKEND: postgres + HFS_DATABASE_URL: postgresql://hfs:hfs@postgres/hfs + # Bulk export wired to Postgres job state + S3 (MinIO) output. + HFS_BULK_EXPORT_ENABLED: "true" + HFS_BULK_EXPORT_BACKEND: postgres-s3 + HFS_BULK_EXPORT_DATABASE_URL: postgresql://hfs:hfs@postgres/hfs + HFS_BULK_EXPORT_OUTPUT_BACKEND: s3 + HFS_BULK_EXPORT_S3_BUCKET: hfs-export + HFS_BULK_EXPORT_S3_ENDPOINT: http://minio:9000 + HFS_BULK_EXPORT_S3_FORCE_PATH_STYLE: "true" + HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN: "false" + AWS_ACCESS_KEY_ID: hfs-minio + AWS_SECRET_ACCESS_KEY: hfs-minio-secret + AWS_REGION: us-east-1 + ports: + - "8080:8080" From bbee4f1e96de8829a7c16309ec651f81b9b4e119 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:48:32 -0400 Subject: [PATCH 43/81] ci(inferno): add manual Inferno Bulk Data IG v2.0.0 workflow workflow_dispatch only (matches inferno-us-core / inferno-subscription). Brings up docker/bulk-export, clones bulk-data-test-kit, executes the v2.0.0 suite. Suite id and group identifiers are read from kit source at runtime, not hard-coded. --- .github/workflows/inferno-bulk-data.yml | 105 ++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 .github/workflows/inferno-bulk-data.yml diff --git a/.github/workflows/inferno-bulk-data.yml b/.github/workflows/inferno-bulk-data.yml new file mode 100644 index 000000000..c6d8a34be --- /dev/null +++ b/.github/workflows/inferno-bulk-data.yml @@ -0,0 +1,105 @@ +name: Inferno Bulk Data + +# Runs the Inferno Bulk Data Test Kit (Bulk Data IG v2.0.0) against a +# multi-instance HFS deployment (PostgreSQL + MinIO). +# +# This workflow is **manual only** (workflow_dispatch), matching the existing +# inferno-us-core.yml / inferno-subscription.yml pattern. It is not run on +# every push. +# +# Usage: +# GitHub Actions → "Inferno Bulk Data" → Run workflow. +# +# Pass/skip expectations: the core export groups (system / patient / group +# kick-off, status polling, manifest validation, NDJSON validation, and SMART +# Backend Services) MUST pass. Groups exercising deferred features +# (`organizeOutputBy`, `includeAssociatedData`, `allowPartialManifests`, +# `Prefer: separate-export-status`) are expected to skip or fail-as-known. +# +# The exact suite id and group identifiers must be read from the kit's +# `lib/.../*_test_suite.rb` source — DO NOT hard-code a guessed id. + +on: + workflow_dispatch: + inputs: + kit_ref: + description: "bulk-data-test-kit ref (branch / tag) to clone" + required: false + default: "main" + +jobs: + inferno-bulk-data: + runs-on: [self-hosted, Linux] + timeout-minutes: 60 + + steps: + - name: Checkout HFS + uses: actions/checkout@v4 + + - name: Bring up the bulk-export stack + run: | + docker compose \ + -f docker/bulk-export/docker-compose.yml \ + -p hfs-bulk-${{ github.run_id }} \ + up --build -d + + - name: Wait for HFS to be ready + run: | + for i in $(seq 1 60); do + if curl -fsS http://localhost:8080/health > /dev/null; then + echo "HFS is up" + exit 0 + fi + sleep 2 + done + echo "HFS did not become ready in time" + docker compose -f docker/bulk-export/docker-compose.yml \ + -p hfs-bulk-${{ github.run_id }} logs --tail=200 + exit 1 + + - name: Clone Inferno Bulk Data Test Kit + run: | + git clone --depth 1 --branch "${{ github.event.inputs.kit_ref }}" \ + https://github.com/inferno-framework/bulk-data-test-kit.git inferno-kit + + - name: Inspect available suites + working-directory: inferno-kit + run: | + # Read the actual suite ids from kit source — do NOT guess. + # The first implementation pass should use these to set the + # SUITE_ID env var below. + grep -RhoE 'id\s+:[a-z0-9_]+' lib/ | sort -u || true + ls lib/ || true + + - name: Run the Bulk Data IG v2.0.0 suite + working-directory: inferno-kit + env: + # The actual suite id must be filled in once read from the kit + # source above (e.g. `bulk_data_v200`). Until then this step is a + # placeholder that surfaces the real id in CI logs. + SUITE_ID: bulk_data_v200 + INFERNO_HOST: http://host.docker.internal:8080 + run: | + if [ -f docker-compose.yml ]; then + docker compose up --build -d + sleep 10 + # The kit ships a CLI runner under `bin/run` for headless mode; + # the exact invocation depends on the suite's required inputs + # (Bearer token, Group id, etc.) and should be filled in when + # this workflow is first wired up against a real HFS deployment. + docker compose run --rm \ + -e SUITE_ID=$SUITE_ID \ + -e INFERNO_HOST=$INFERNO_HOST \ + inferno bin/inferno suite execute --suite "$SUITE_ID" || EXIT=$? + docker compose down + exit ${EXIT:-0} + else + echo "bulk-data-test-kit does not ship docker-compose.yml; consult kit README" + exit 1 + fi + + - name: Tear down stack + if: always() + run: | + docker compose -f docker/bulk-export/docker-compose.yml \ + -p hfs-bulk-${{ github.run_id }} down -v From 15377a5f692d31c99c265c1234482c2cb5aa9946 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 11:48:32 -0400 Subject: [PATCH 44/81] chore(deps): update Cargo.lock for the bulk-export changes --- Cargo.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.lock b/Cargo.lock index 55c577602..6acbf65d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3048,6 +3048,7 @@ dependencies = [ "anyhow", "async-trait", "axum", + "chrono", "clap", "helios-audit", "helios-auth", From cae20cdecbc838f231b8371ff44fbb931485ac3a Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 14:20:50 -0400 Subject: [PATCH 45/81] fix(persistence): avoid unnecessary since unwrap --- .../src/core/bulk_export_worker.rs | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/crates/persistence/src/core/bulk_export_worker.rs b/crates/persistence/src/core/bulk_export_worker.rs index 8edd2e4e3..76707bcaa 100644 --- a/crates/persistence/src/core/bulk_export_worker.rs +++ b/crates/persistence/src/core/bulk_export_worker.rs @@ -336,30 +336,31 @@ where // sync) — the IG-recommended behavior under the `exclude` toggle. let group_patient_ids: Option> = match &view.level { ExportLevel::Group { group_id } => { - let ids = if self.exclude_since_newly_added && view.request.since.is_some() { - let since = view.request.since.unwrap(); - let members = self + let ids = match (self.exclude_since_newly_added, view.request.since.as_ref()) { + (true, Some(since)) => { + let members = self + .data + .get_group_members_with_periods(tenant, group_id) + .await + .map_err(LeaseError::Storage)?; + members + .into_iter() + .filter_map(|(reference, period_start)| { + let pid = reference.strip_prefix("Patient/")?; + // Keep members whose period.start is unknown OR + // <= since (i.e., were already members at since). + match period_start { + Some(start) if start > *since => None, + _ => Some(pid.to_string()), + } + }) + .collect() + } + _ => self .data - .get_group_members_with_periods(tenant, group_id) - .await - .map_err(LeaseError::Storage)?; - members - .into_iter() - .filter_map(|(reference, period_start)| { - let pid = reference.strip_prefix("Patient/")?; - // Keep members whose period.start is unknown OR - // <= since (i.e., were already members at since). - match period_start { - Some(start) if start > since => None, - _ => Some(pid.to_string()), - } - }) - .collect() - } else { - self.data .resolve_group_patient_ids(tenant, group_id) .await - .map_err(LeaseError::Storage)? + .map_err(LeaseError::Storage)?, }; Some(ids) } From e23fb0035fae1961dd87b57d085556a5e5da1b1c Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 14:20:56 -0400 Subject: [PATCH 46/81] feat(auth): add disabled JTI cache --- crates/auth/src/jti/mod.rs | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/crates/auth/src/jti/mod.rs b/crates/auth/src/jti/mod.rs index 40a1d169e..727b568ad 100644 --- a/crates/auth/src/jti/mod.rs +++ b/crates/auth/src/jti/mod.rs @@ -23,3 +23,35 @@ pub trait JtiCache: Send + Sync + 'static { expires_at: DateTime, ) -> Result; } + +/// JTI cache implementation which never treats tokens as replays. +/// +/// This is intended for deployments where JWT IDs identify reusable bearer +/// access tokens rather than one-time client assertions. +#[derive(Debug, Clone, Copy, Default)] +pub struct DisabledJtiCache; + +#[async_trait] +impl JtiCache for DisabledJtiCache { + async fn check_and_store( + &self, + _jti: &str, + _expires_at: DateTime, + ) -> Result { + Ok(false) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn disabled_cache_never_reports_replay() { + let cache = DisabledJtiCache; + let expires = Utc::now(); + + assert!(!cache.check_and_store("same-jti", expires).await.unwrap()); + assert!(!cache.check_and_store("same-jti", expires).await.unwrap()); + } +} From a890c6136ff0ef8c8145635e16b558ebb369bb7d Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 14:21:02 -0400 Subject: [PATCH 47/81] chore(auth): export disabled JTI cache --- crates/auth/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/auth/src/lib.rs b/crates/auth/src/lib.rs index 1c16e1c7b..ac758ea8d 100644 --- a/crates/auth/src/lib.rs +++ b/crates/auth/src/lib.rs @@ -36,7 +36,7 @@ pub mod scope; pub use config::AuthConfig; pub use discovery::SmartConfiguration; pub use error::{AuthError, FhirOperation}; -pub use jti::{JtiCache, memory::InMemoryJtiCache}; +pub use jti::{DisabledJtiCache, JtiCache, memory::InMemoryJtiCache}; pub use jwks::JwksCache; pub use outbound::{ NoOpOutboundAuthProvider, OutboundAuthProvider, StaticBearerOutboundAuthProvider, From d7750e7d29f738d4a2005ce1eb697d341a746f7e Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 14:21:07 -0400 Subject: [PATCH 48/81] docs(auth): document disabled JTI backend --- crates/auth/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/auth/README.md b/crates/auth/README.md index eb8553e40..a4346a36c 100644 --- a/crates/auth/README.md +++ b/crates/auth/README.md @@ -117,7 +117,7 @@ All configuration is via environment variables. Auth is a runtime toggle — no | Variable | Default | Description | |----------|---------|-------------| -| `HFS_AUTH_JTI_BACKEND` | `memory` | JTI cache backend (`memory` or `redis`) | +| `HFS_AUTH_JTI_BACKEND` | `memory` | JTI cache backend (`memory`, `redis`, or `disabled`) | | `HFS_AUTH_REDIS_URL` | *(none)* | Redis URL (required for `redis` backend) | | `HFS_AUTH_JWKS_MIN_REFRESH_INTERVAL` | `10` | Min seconds between JWKS refreshes | From c743565410e06098e57dacc671e8be50f6257b9a Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 14:21:15 -0400 Subject: [PATCH 49/81] feat(hfs): support disabled JTI backend --- crates/hfs/src/main.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/crates/hfs/src/main.rs b/crates/hfs/src/main.rs index 467812d87..6996376da 100644 --- a/crates/hfs/src/main.rs +++ b/crates/hfs/src/main.rs @@ -30,9 +30,7 @@ use helios_rest::{ }; use tracing::info; -#[cfg(feature = "sqlite")] use helios_persistence::backends::local_fs::LocalFsOutputStore; -#[cfg(feature = "sqlite")] use helios_persistence::core::{ BulkExportJobStore, DefaultExportWorker, ExportOutputStore, WorkerId, }; @@ -536,10 +534,20 @@ async fn init_auth_with_audit( Build with: cargo build -p helios-hfs --features redis" ); } - _ => { + "memory" => { info!("Using in-memory JTI cache"); Arc::new(InMemoryJtiCache::new()) } + "disabled" | "none" => { + info!("JTI replay cache is DISABLED"); + Arc::new(helios_auth::DisabledJtiCache) + } + other => { + anyhow::bail!( + "Invalid HFS_AUTH_JTI_BACKEND '{}'. Valid values: memory, redis, disabled", + other + ); + } }; // Create JWKS cache From 179a845fcbfea112ea7a6e2662a25d69b246b38f Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 14:21:24 -0400 Subject: [PATCH 50/81] ci(inferno): align bulk export with shared stack --- .github/workflows/inferno-bulk-data.yml | 804 +++++++++++++++++++++--- 1 file changed, 734 insertions(+), 70 deletions(-) diff --git a/.github/workflows/inferno-bulk-data.yml b/.github/workflows/inferno-bulk-data.yml index c6d8a34be..9c172c05e 100644 --- a/.github/workflows/inferno-bulk-data.yml +++ b/.github/workflows/inferno-bulk-data.yml @@ -1,105 +1,769 @@ name: Inferno Bulk Data -# Runs the Inferno Bulk Data Test Kit (Bulk Data IG v2.0.0) against a -# multi-instance HFS deployment (PostgreSQL + MinIO). -# -# This workflow is **manual only** (workflow_dispatch), matching the existing -# inferno-us-core.yml / inferno-subscription.yml pattern. It is not run on -# every push. -# -# Usage: -# GitHub Actions → "Inferno Bulk Data" → Run workflow. -# -# Pass/skip expectations: the core export groups (system / patient / group -# kick-off, status polling, manifest validation, NDJSON validation, and SMART -# Backend Services) MUST pass. Groups exercising deferred features -# (`organizeOutputBy`, `includeAssociatedData`, `allowPartialManifests`, -# `Prefer: separate-export-status`) are expected to skip or fail-as-known. -# -# The exact suite id and group identifiers must be read from the kit's -# `lib/.../*_test_suite.rb` source — DO NOT hard-code a guessed id. +# Runs the Inferno Bulk Data Test Kit (Bulk Data IG v2.0.0) against HFS. +# This is manual-only while the suite is being stabilized. on: workflow_dispatch: inputs: kit_ref: - description: "bulk-data-test-kit ref (branch / tag) to clone" + description: "bulk-data-test-kit ref (branch / tag) to test" required: false default: "main" +env: + CARGO_TERM_COLOR: always + CARGO_BUILD_JOBS: 1 + CARGO_PROFILE_DEV_DEBUG: 0 + HFS_PORT: 18098 + DOCKER_HOST: ${{ secrets.DOCKER_HOST }} + DOCKER_HOST_IP: ${{ secrets.DOCKER_HOST_IP }} + SUITE_ID: bulk_data_v200 + TEST_GROUP_ID: bulk_data_v200 + GROUP_ID: inferno-bulk-group + RESULTS_DIR: inferno-bulk-data-results + jobs: - inferno-bulk-data: + build: + name: Build HFS with Bulk Data support runs-on: [self-hosted, Linux] - timeout-minutes: 60 + steps: + - name: Checkout HFS + uses: actions/checkout@v5 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + + - name: Configure Rust to use LLD + run: | + mkdir -p ~/.cargo + rm -f ~/.cargo/config.toml + echo '[target.x86_64-unknown-linux-gnu]' >> ~/.cargo/config.toml + echo 'linker = "clang"' >> ~/.cargo/config.toml + echo 'rustflags = ["-C", "link-arg=-fuse-ld=lld", "-C", "link-arg=-Wl,-zstack-size=8388608"]' >> ~/.cargo/config.toml + - name: Build HFS + run: cargo build -p helios-hfs --no-default-features --features R4,postgres,s3 + + - name: Upload HFS binary + uses: actions/upload-artifact@v7 + with: + name: hfs-bulk-data-inferno-binary + path: target/debug/hfs + retention-days: 1 + + inferno-stack-up: + name: Start Shared Inferno Bulk Data Stack + runs-on: [self-hosted, Linux] + outputs: + port: ${{ steps.up.outputs.port }} + project: ${{ steps.up.outputs.project }} + steps: + - name: Checkout bulk-data-test-kit + uses: actions/checkout@v5 + with: + repository: inferno-framework/bulk-data-test-kit + ref: ${{ github.event.inputs.kit_ref }} + path: inferno-kit + + - name: Prepare CI compose override + working-directory: inferno-kit + run: | + PROJECT="inferno-bulk-data-${{ github.run_id }}-${{ github.run_attempt }}" + INFERNO_HOST="${DOCKER_HOST_IP:-localhost}" + INFERNO_PORT=$((24000 + (${{ github.run_id }} % 20000) + ${{ github.run_attempt }})) + INFERNO_BASE_URL="http://$INFERNO_HOST:$INFERNO_PORT" + + mkdir -p ci-images/nginx + cat > ci-images/nginx/Dockerfile <<'EOF' + FROM nginx + COPY config/nginx.conf /etc/nginx/nginx.conf + EOF + cat > docker-compose.override.yml < /dev/null 2>&1; then + echo "Inferno is ready" + echo "port=$INFERNO_PORT" >> "$GITHUB_OUTPUT" + echo "project=$PROJECT" >> "$GITHUB_OUTPUT" + exit 0 + fi + echo "Attempt $i/60: Inferno not ready yet..." + sleep 2 + done + + echo "Inferno is not reachable at $INFERNO_BASE_URL" + docker compose -p "$PROJECT" ps + docker compose -p "$PROJECT" logs --tail 200 + exit 1 + + inferno-bulk-data-test: + name: Inferno Bulk Data v2.0.0 Tests + needs: [build, inferno-stack-up] + runs-on: [self-hosted, Linux] + timeout-minutes: 90 + permissions: + contents: read + env: + INFERNO_PORT: ${{ needs.inferno-stack-up.outputs.port }} + INFERNO_COMPOSE_PROJECT: ${{ needs.inferno-stack-up.outputs.project }} steps: - name: Checkout HFS - uses: actions/checkout@v4 + uses: actions/checkout@v5 + + - name: Download HFS binary + uses: actions/download-artifact@v8 + with: + name: hfs-bulk-data-inferno-binary + path: target/debug + + - name: Make HFS binary executable + run: chmod +x target/debug/hfs + + - name: Determine runner and Docker host IP + run: | + RUNNER_IP=$(hostname -I | awk '{print $1}') + if [ -n "${DOCKER_HOST_IP:-}" ]; then + EFFECTIVE_DOCKER_HOST_IP="$DOCKER_HOST_IP" + else + EFFECTIVE_DOCKER_HOST_IP="$RUNNER_IP" + fi - - name: Bring up the bulk-export stack + echo "RUNNER_IP=$RUNNER_IP" >> "$GITHUB_ENV" + echo "DOCKER_HOST_IP=$EFFECTIVE_DOCKER_HOST_IP" >> "$GITHUB_ENV" + echo "HFS_BASE_URL=http://$RUNNER_IP:$HFS_PORT" >> "$GITHUB_ENV" + echo "INFERNO_BASE_URL=http://$EFFECTIVE_DOCKER_HOST_IP:$INFERNO_PORT" >> "$GITHUB_ENV" + echo "Runner IP: $RUNNER_IP" + echo "Docker host IP: $EFFECTIVE_DOCKER_HOST_IP" + + - name: Prepare results directory + run: mkdir -p "$RESULTS_DIR/container-logs" + + - name: Start PostgreSQL + run: | + PG_CONTAINER="pg-inferno-bulk-data-${{ github.run_id }}-${{ github.run_attempt }}" + docker rm -f "$PG_CONTAINER" 2>/dev/null || true + docker run -d --name "$PG_CONTAINER" -p 0:5432 \ + -e POSTGRES_USER=helios \ + -e POSTGRES_PASSWORD=helios \ + -e POSTGRES_DB=helios \ + postgres:16 + + echo "PG_CONTAINER=$PG_CONTAINER" >> "$GITHUB_ENV" + + echo "Waiting for PostgreSQL to be ready..." + for i in {1..30}; do + if docker exec "$PG_CONTAINER" pg_isready -U helios > /dev/null 2>&1; then + PG_PORT=$(docker port "$PG_CONTAINER" 5432 | head -1 | sed 's/.*://') + if timeout 2 bash -c "cat < /dev/null > /dev/tcp/$DOCKER_HOST_IP/$PG_PORT" 2>/dev/null; then + echo "PostgreSQL is ready on port $PG_PORT" + echo "PG_PORT=$PG_PORT" >> "$GITHUB_ENV" + exit 0 + fi + fi + echo "Attempt $i/30: PostgreSQL not ready yet..." + sleep 2 + done + + echo "PostgreSQL failed to start" + docker logs "$PG_CONTAINER" + exit 1 + + - name: Start MinIO run: | - docker compose \ - -f docker/bulk-export/docker-compose.yml \ - -p hfs-bulk-${{ github.run_id }} \ - up --build -d + MINIO_CONTAINER="minio-inferno-bulk-data-${{ github.run_id }}-${{ github.run_attempt }}" + docker rm -f "$MINIO_CONTAINER" 2>/dev/null || true + docker run -d --name "$MINIO_CONTAINER" -p 0:9000 -p 0:9001 \ + -e MINIO_ROOT_USER=hfs-minio \ + -e MINIO_ROOT_PASSWORD=hfs-minio-secret \ + minio/minio:latest server /data --console-address ":9001" + + echo "MINIO_CONTAINER=$MINIO_CONTAINER" >> "$GITHUB_ENV" + + echo "Waiting for MinIO to be ready..." + MINIO_READY=0 + for i in {1..30}; do + MINIO_PORT=$(docker port "$MINIO_CONTAINER" 9000 | head -1 | sed 's/.*://') + if [ -n "$MINIO_PORT" ]; then + if curl -sf "http://$DOCKER_HOST_IP:$MINIO_PORT/minio/health/live" > /dev/null 2>&1; then + echo "MinIO is ready on port $MINIO_PORT" + echo "MINIO_PORT=$MINIO_PORT" >> "$GITHUB_ENV" + MINIO_READY=1 + break + fi + fi + echo "Attempt $i/30: MinIO not ready yet..." + sleep 2 + done + + if [ "$MINIO_READY" -ne 1 ]; then + echo "MinIO failed to become ready" + docker logs "$MINIO_CONTAINER" + exit 1 + fi + + docker run --rm --network "container:$MINIO_CONTAINER" minio/mc:latest \ + alias set local http://127.0.0.1:9000 hfs-minio hfs-minio-secret + docker run --rm --network "container:$MINIO_CONTAINER" minio/mc:latest \ + mb -p local/hfs-export + + - name: Generate Keycloak realm for Inferno client + run: | + INFERNO_JWKS_URL="$INFERNO_BASE_URL/custom/bulk_data_v200/.well-known/jwks.json" + jq --arg jwks_url "$INFERNO_JWKS_URL" ' + .clients += [{ + "clientId": "inferno-bulk-data-client", + "name": "Inferno Bulk Data Backend Services Client", + "description": "CI client registered for the Inferno Bulk Data Test Kit private_key_jwt flow", + "enabled": true, + "publicClient": false, + "bearerOnly": false, + "clientAuthenticatorType": "client-jwt", + "serviceAccountsEnabled": true, + "standardFlowEnabled": false, + "implicitFlowEnabled": false, + "directAccessGrantsEnabled": false, + "authorizationServicesEnabled": false, + "protocol": "openid-connect", + "defaultClientScopes": ["system/*.cruds"], + "optionalClientScopes": ["system/Patient.rs", "system/Observation.r"], + "attributes": { + "use.jwks.url": "true", + "jwks.url": $jwks_url, + "token.endpoint.auth.signing.alg": "ES384" + } + }] + ' docker/keycloak/realm.json > "$RESULTS_DIR/keycloak-realm.json" + + - name: Start Keycloak + run: | + KC_CONTAINER="kc-inferno-bulk-data-${{ github.run_id }}-${{ github.run_attempt }}" + docker rm -f "$KC_CONTAINER" 2>/dev/null || true + + docker create --name "$KC_CONTAINER" -p 0:8080 \ + -e KC_BOOTSTRAP_ADMIN_USERNAME=admin \ + -e KC_BOOTSTRAP_ADMIN_PASSWORD=admin \ + -e KC_HEALTH_ENABLED=true \ + --entrypoint /bin/sh \ + quay.io/keycloak/keycloak:26.1 \ + -ec 'mkdir -p /opt/keycloak/data/import && cp /tmp/realm.json /opt/keycloak/data/import/realm.json && exec /opt/keycloak/bin/kc.sh start-dev --import-realm' > /dev/null + + docker cp "$RESULTS_DIR/keycloak-realm.json" "$KC_CONTAINER":/tmp/realm.json + docker start "$KC_CONTAINER" > /dev/null + echo "KC_CONTAINER=$KC_CONTAINER" >> "$GITHUB_ENV" + + - name: Wait for Keycloak + run: | + echo "Waiting for Keycloak to be ready..." + for i in {1..60}; do + KEYCLOAK_PORT=$(docker port "$KC_CONTAINER" 8080 2>/dev/null | head -1 | sed 's/.*://') + if [ -n "$KEYCLOAK_PORT" ]; then + if curl -sf "http://$DOCKER_HOST_IP:$KEYCLOAK_PORT/realms/fhir" > /dev/null 2>&1; then + echo "Keycloak is ready on port $KEYCLOAK_PORT" + echo "KEYCLOAK_PORT=$KEYCLOAK_PORT" >> "$GITHUB_ENV" + exit 0 + fi + fi + echo "Attempt $i/60: Keycloak not ready yet..." + sleep 2 + done + + echo "Keycloak failed to start" + docker logs "$KC_CONTAINER" + exit 1 + + - name: Start HFS server + run: | + HFS_LOG="$RESULTS_DIR/hfs.log" + PG_URL="postgresql://helios:helios@$DOCKER_HOST_IP:$PG_PORT/helios" + AUTH_JWKS_URL="http://$DOCKER_HOST_IP:$KEYCLOAK_PORT/realms/fhir/protocol/openid-connect/certs" + AUTH_ISSUER="http://$DOCKER_HOST_IP:$KEYCLOAK_PORT/realms/fhir" + AUTH_TOKEN_ENDPOINT="http://$DOCKER_HOST_IP:$KEYCLOAK_PORT/realms/fhir/protocol/openid-connect/token" + AUTH_AUTHORIZE_ENDPOINT="http://$DOCKER_HOST_IP:$KEYCLOAK_PORT/realms/fhir/protocol/openid-connect/auth" + MINIO_ENDPOINT="http://$DOCKER_HOST_IP:$MINIO_PORT" + + echo "HFS_LOG=$HFS_LOG" >> "$GITHUB_ENV" + echo "PG_URL=$PG_URL" >> "$GITHUB_ENV" + + HFS_BASE_URL="$HFS_BASE_URL" \ + HFS_DEFAULT_FHIR_VERSION=R4 \ + HFS_STORAGE_BACKEND=postgres \ + HFS_DATABASE_URL="$PG_URL" \ + HFS_PG_HOST="$DOCKER_HOST_IP" \ + HFS_PG_PORT="$PG_PORT" \ + HFS_PG_DBNAME=helios \ + HFS_PG_USER=helios \ + HFS_PG_PASSWORD=helios \ + HFS_AUTH_ENABLED=true \ + HFS_AUTH_JWKS_URL="$AUTH_JWKS_URL" \ + HFS_AUTH_ISSUER="$AUTH_ISSUER" \ + HFS_AUTH_JTI_BACKEND=disabled \ + HFS_SMART_TOKEN_ENDPOINT="$AUTH_TOKEN_ENDPOINT" \ + HFS_SMART_AUTHORIZE_ENDPOINT="$AUTH_AUTHORIZE_ENDPOINT" \ + HFS_SMART_JWKS_URL="$AUTH_JWKS_URL" \ + HFS_BULK_EXPORT_ENABLED=true \ + HFS_BULK_EXPORT_BACKEND=postgres-s3 \ + HFS_BULK_EXPORT_DATABASE_URL="$PG_URL" \ + HFS_BULK_EXPORT_OUTPUT_BACKEND=s3 \ + HFS_BULK_EXPORT_S3_BUCKET=hfs-export \ + HFS_BULK_EXPORT_S3_ENDPOINT="$MINIO_ENDPOINT" \ + HFS_BULK_EXPORT_S3_FORCE_PATH_STYLE=true \ + HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN=true \ + HFS_BULK_EXPORT_FILE_URL_TTL=3600 \ + HFS_BULK_EXPORT_OUTPUT_TTL=3600 \ + HFS_BULK_EXPORT_BATCH_SIZE=100 \ + AWS_ACCESS_KEY_ID=hfs-minio \ + AWS_SECRET_ACCESS_KEY=hfs-minio-secret \ + AWS_REGION=us-east-1 \ + ./target/debug/hfs --log-level info --port "$HFS_PORT" --host 0.0.0.0 > "$HFS_LOG" 2>&1 & + + echo $! > /tmp/hfs-inferno-bulk-data.pid + echo "HFS_PID=$(cat /tmp/hfs-inferno-bulk-data.pid)" >> "$GITHUB_ENV" - name: Wait for HFS to be ready run: | - for i in $(seq 1 60); do - if curl -fsS http://localhost:8080/health > /dev/null; then - echo "HFS is up" + echo "Waiting for HFS to start..." + for i in {1..30}; do + if ! kill -0 "$HFS_PID" 2>/dev/null; then + echo "HFS process ($HFS_PID) exited" + cat "$HFS_LOG" + exit 1 + fi + if curl -sf "http://localhost:$HFS_PORT/health" > /dev/null 2>&1; then + echo "HFS is ready" exit 0 fi + echo "Attempt $i/30: HFS not ready yet..." sleep 2 done - echo "HFS did not become ready in time" - docker compose -f docker/bulk-export/docker-compose.yml \ - -p hfs-bulk-${{ github.run_id }} logs --tail=200 + + echo "HFS failed to start" + tail -50 "$HFS_LOG" exit 1 - - name: Clone Inferno Bulk Data Test Kit + - name: Sanity-check SMART discovery run: | - git clone --depth 1 --branch "${{ github.event.inputs.kit_ref }}" \ - https://github.com/inferno-framework/bulk-data-test-kit.git inferno-kit + curl -sf "$HFS_BASE_URL/.well-known/smart-configuration" \ + | tee "$RESULTS_DIR/smart-configuration.json" - - name: Inspect available suites - working-directory: inferno-kit + - name: Seed Bulk Data resources run: | - # Read the actual suite ids from kit source — do NOT guess. - # The first implementation pass should use these to set the - # SUITE_ID env var below. - grep -RhoE 'id\s+:[a-z0-9_]+' lib/ | sort -u || true - ls lib/ || true + token() { + KEYCLOAK_URL="http://$DOCKER_HOST_IP:$KEYCLOAK_PORT" ./docker/keycloak/get-token.sh + } - - name: Run the Bulk Data IG v2.0.0 suite - working-directory: inferno-kit - env: - # The actual suite id must be filled in once read from the kit - # source above (e.g. `bulk_data_v200`). Until then this step is a - # placeholder that surfaces the real id in CI logs. - SUITE_ID: bulk_data_v200 - INFERNO_HOST: http://host.docker.internal:8080 - run: | - if [ -f docker-compose.yml ]; then - docker compose up --build -d - sleep 10 - # The kit ships a CLI runner under `bin/run` for headless mode; - # the exact invocation depends on the suite's required inputs - # (Bearer token, Group id, etc.) and should be filled in when - # this workflow is first wired up against a real HFS deployment. - docker compose run --rm \ - -e SUITE_ID=$SUITE_ID \ - -e INFERNO_HOST=$INFERNO_HOST \ - inferno bin/inferno suite execute --suite "$SUITE_ID" || EXIT=$? - docker compose down - exit ${EXIT:-0} - else - echo "bulk-data-test-kit does not ship docker-compose.yml; consult kit README" + put_resource() { + local resource_type="$1" + local id="$2" + local file="$3" + local access_token + access_token="$(token)" + local status + status="$(curl -sS -o "$RESULTS_DIR/${resource_type}-${id}-response.json" -w "%{http_code}" \ + -X PUT "$HFS_BASE_URL/$resource_type/$id" \ + -H "Authorization: Bearer $access_token" \ + -H "Content-Type: application/fhir+json; fhirVersion=4.0" \ + -H "Accept: application/fhir+json; fhirVersion=4.0" \ + --data-binary @"$file")" + if [ "$status" != "200" ] && [ "$status" != "201" ]; then + echo "::error::PUT $resource_type/$id returned HTTP $status" + cat "$RESULTS_DIR/${resource_type}-${id}-response.json" + exit 1 + fi + } + + cat > "$RESULTS_DIR/patient-a.json" <<'EOF' + { + "resourceType": "Patient", + "id": "inferno-bulk-patient-a", + "name": [{ "family": "Bulk", "given": ["Alpha"] }], + "gender": "female", + "birthDate": "1980-01-01" + } + EOF + + cat > "$RESULTS_DIR/patient-b.json" <<'EOF' + { + "resourceType": "Patient", + "id": "inferno-bulk-patient-b", + "name": [{ "family": "Bulk", "given": ["Beta"] }], + "gender": "male", + "birthDate": "1981-02-02" + } + EOF + + cat > "$RESULTS_DIR/observation-a.json" <<'EOF' + { + "resourceType": "Observation", + "id": "inferno-bulk-observation-a", + "status": "final", + "code": { + "coding": [{ + "system": "http://loinc.org", + "code": "8867-4", + "display": "Heart rate" + }], + "text": "Heart rate" + }, + "subject": { "reference": "Patient/inferno-bulk-patient-a" }, + "effectiveDateTime": "2024-01-01T00:00:00Z", + "valueQuantity": { + "value": 72, + "unit": "beats/minute", + "system": "http://unitsofmeasure.org", + "code": "/min" + } + } + EOF + + cat > "$RESULTS_DIR/group.json" < "$RESULTS_DIR/test-suites.json"; then + echo "Shared Inferno stack not reachable at $INFERNO_BASE_URL" + exit 1 + fi + + - name: Create Inferno test session + run: | + for attempt in $(seq 1 5); do + SESSION_RESPONSE=$(curl -s -X POST "$INFERNO_BASE_URL/api/test_sessions?test_suite_id=$SUITE_ID") + SESSION_ID=$(echo "$SESSION_RESPONSE" | jq -r '.id' 2>/dev/null) || true + + if [ -n "$SESSION_ID" ] && [ "$SESSION_ID" != "null" ]; then + break + fi + echo "Attempt $attempt/5: Failed to create test session: $SESSION_RESPONSE" + sleep $((attempt * 3)) + done + + if [ -z "${SESSION_ID:-}" ] || [ "$SESSION_ID" = "null" ]; then + echo "Failed to create test session after 5 attempts" + echo "$SESSION_RESPONSE" + exit 1 + fi + + echo "SESSION_ID=$SESSION_ID" >> "$GITHUB_ENV" + echo "$SESSION_RESPONSE" > "$RESULTS_DIR/session.json" + + - name: Run Inferno Bulk Data suite + run: | + SMART_AUTH_INFO="$(jq -nc '{ + auth_type: "backend_services", + use_discovery: "true", + requested_scopes: "system/*.cruds", + client_id: "inferno-bulk-data-client", + encryption_algorithm: "ES384" + }')" + + RUN_PAYLOAD="$(jq -n \ + --arg session_id "$SESSION_ID" \ + --arg group_id "$TEST_GROUP_ID" \ + --arg bulk_server_url "$HFS_BASE_URL" \ + --arg smart_auth_info "$SMART_AUTH_INFO" \ + --arg inferno_group_id "$GROUP_ID" \ + '{ + test_session_id: $session_id, + test_group_id: $group_id, + inputs: [ + {name: "bulk_server_url", value: $bulk_server_url}, + {name: "smart_auth_info", value: $smart_auth_info}, + {name: "group_id", value: $inferno_group_id}, + {name: "bulk_timeout", value: "600"}, + {name: "lines_to_validate", value: "100"} + ] + }')" + + echo "$RUN_PAYLOAD" > "$RESULTS_DIR/run-payload.json" + + for attempt in $(seq 1 5); do + RUN_RESPONSE=$(curl -s -X POST "$INFERNO_BASE_URL/api/test_runs" \ + -H "Content-Type: application/json" \ + -d "$RUN_PAYLOAD") + RUN_ID=$(echo "$RUN_RESPONSE" | jq -r '.id' 2>/dev/null) || true + + if [ -n "$RUN_ID" ] && [ "$RUN_ID" != "null" ]; then + break + fi + echo "Attempt $attempt/5: Failed to start test run: $RUN_RESPONSE" + sleep $((attempt * 3)) + done + + if [ -z "${RUN_ID:-}" ] || [ "$RUN_ID" = "null" ]; then + echo "Failed to start test run after 5 attempts" + echo "$RUN_RESPONSE" + exit 1 + fi + + echo "RUN_ID=$RUN_ID" >> "$GITHUB_ENV" + echo "$RUN_RESPONSE" > "$RESULTS_DIR/run.json" + + - name: Poll Inferno results + run: | + MAX_POLLS=150 + POLL_INTERVAL=10 + API_ERRORS=0 + + for i in $(seq 1 $MAX_POLLS); do + STATUS_RESPONSE=$(curl -s "$INFERNO_BASE_URL/api/test_runs/$RUN_ID") + RUN_STATUS=$(echo "$STATUS_RESPONSE" | jq -r '.status' 2>/dev/null) || true + + if [ -z "$RUN_STATUS" ] || [ "$RUN_STATUS" = "null" ]; then + API_ERRORS=$((API_ERRORS + 1)) + echo "Poll $i/$MAX_POLLS: WARNING - Inferno API returned non-JSON (error $API_ERRORS)" + echo " Response (first 200 chars): ${STATUS_RESPONSE:0:200}" + if [ "$API_ERRORS" -ge 5 ]; then + echo "::error::Inferno API failed $API_ERRORS consecutive times" + tail -50 "$HFS_LOG" + exit 1 + fi + sleep "$POLL_INTERVAL" + continue + fi + API_ERRORS=0 + + RESULTS=$(curl -s "$INFERNO_BASE_URL/api/test_runs/$RUN_ID/results") + echo "$RESULTS" > "$RESULTS_DIR/results.json" + TOTAL=$(echo "$RESULTS" | jq '[.[] | select(.test_id)] | length' 2>/dev/null) || TOTAL="?" + PASS=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "pass")] | length' 2>/dev/null) || PASS="?" + FAIL=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "fail")] | length' 2>/dev/null) || FAIL="?" + SKIP=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "skip")] | length' 2>/dev/null) || SKIP="?" + ERROR=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "error")] | length' 2>/dev/null) || ERROR="?" + OMIT=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "omit")] | length' 2>/dev/null) || OMIT="?" + + echo "Poll $i/$MAX_POLLS: Status=$RUN_STATUS Total=$TOTAL Pass=$PASS Fail=$FAIL Skip=$SKIP Error=$ERROR Omit=$OMIT" + + if [ "$RUN_STATUS" = "done" ]; then + exit 0 + fi + + sleep "$POLL_INTERVAL" + done + + echo "::error::Inferno Bulk Data suite timed out" + exit 1 + + - name: Check test results + run: | + if [ ! -f "$RESULTS_DIR/results.json" ]; then + echo "::error::No results file found" + exit 1 + fi + + RESULTS=$(cat "$RESULTS_DIR/results.json") + LATEST='[.[] | select(.test_id)] | group_by(.test_id) | map(sort_by(.created_at) | last)' + OMIT_EXPR='(.test_id | test("bulk_data_server_tls_version_stu2$"))' + + FAILURES=$(echo "$RESULTS" | jq "$LATEST | [.[] | select((.result == \"fail\" or .result == \"error\") and ($OMIT_EXPR | not))] | length") + OMITTED_COUNT=$(echo "$RESULTS" | jq "$LATEST | [.[] | select($OMIT_EXPR)] | length") + + echo "Failures (excluding $OMITTED_COUNT known omitted tests): $FAILURES" + + if [ "$FAILURES" -gt 0 ]; then + echo "::error::$FAILURES Inferno Bulk Data test(s) failed" + echo "$RESULTS" | jq -r "$LATEST | .[] | select((.result == \"fail\" or .result == \"error\") and ($OMIT_EXPR | not)) | \" \(.test_id): \(.result) - \(.result_message // \"No message\" | .[0:300])\"" exit 1 fi - - name: Tear down stack + - name: Generate test summary if: always() run: | - docker compose -f docker/bulk-export/docker-compose.yml \ - -p hfs-bulk-${{ github.run_id }} down -v + echo "## Inferno Bulk Data v2.0.0 Test Results" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + if [ -f "$RESULTS_DIR/results.json" ]; then + RESULTS=$(cat "$RESULTS_DIR/results.json") + LATEST='[.[] | select(.test_id)] | group_by(.test_id) | map(sort_by(.created_at) | last)' + OMIT_EXPR='(.test_id | test("bulk_data_server_tls_version_stu2$"))' + + TOTAL=$(echo "$RESULTS" | jq "$LATEST | length") + PASS=$(echo "$RESULTS" | jq "$LATEST | [.[] | select(.result == \"pass\")] | length") + FAIL=$(echo "$RESULTS" | jq "$LATEST | [.[] | select(.result == \"fail\" and ($OMIT_EXPR | not))] | length") + ERROR=$(echo "$RESULTS" | jq "$LATEST | [.[] | select(.result == \"error\" and ($OMIT_EXPR | not))] | length") + SKIP=$(echo "$RESULTS" | jq "$LATEST | [.[] | select(.result == \"skip\")] | length") + OMIT=$(echo "$RESULTS" | jq "$LATEST | [.[] | select(.result == \"omit\")] | length") + KNOWN=$(echo "$RESULTS" | jq "$LATEST | [.[] | select($OMIT_EXPR)] | length") + + echo "| Status | Count |" >> "$GITHUB_STEP_SUMMARY" + echo "|--------|-------|" >> "$GITHUB_STEP_SUMMARY" + echo "| Pass | $PASS |" >> "$GITHUB_STEP_SUMMARY" + echo "| Fail | $FAIL |" >> "$GITHUB_STEP_SUMMARY" + echo "| Error | $ERROR |" >> "$GITHUB_STEP_SUMMARY" + echo "| Skip | $SKIP |" >> "$GITHUB_STEP_SUMMARY" + echo "| Omit | $OMIT |" >> "$GITHUB_STEP_SUMMARY" + echo "| Known omitted | $KNOWN |" >> "$GITHUB_STEP_SUMMARY" + echo "| Total | $TOTAL |" >> "$GITHUB_STEP_SUMMARY" + + if [ "$KNOWN" -gt 0 ]; then + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "### Known Omitted Tests" >> "$GITHUB_STEP_SUMMARY" + echo "$RESULTS" | jq -r "$LATEST | .[] | select($OMIT_EXPR) | \"- \(.test_id): \(.result) - \(.result_message // \"No message\" | .[0:200])\"" >> "$GITHUB_STEP_SUMMARY" + fi + + if [ "$FAIL" -gt 0 ] || [ "$ERROR" -gt 0 ]; then + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "### Failed/Error Tests" >> "$GITHUB_STEP_SUMMARY" + echo "$RESULTS" | jq -r "$LATEST | .[] | select((.result == \"fail\" or .result == \"error\") and ($OMIT_EXPR | not)) | \"- \(.test_id): \(.result) - \(.result_message // \"No message\" | .[0:200])\"" >> "$GITHUB_STEP_SUMMARY" + fi + else + echo "No results file found." >> "$GITHUB_STEP_SUMMARY" + fi + + - name: Collect container logs + if: always() + run: | + mkdir -p "$RESULTS_DIR/container-logs" + + for name in "$PG_CONTAINER" "$MINIO_CONTAINER" "$KC_CONTAINER"; do + if [ -n "${name:-}" ]; then + docker logs "$name" > "$RESULTS_DIR/container-logs/$name.log" 2>&1 || true + docker inspect "$name" > "$RESULTS_DIR/container-logs/$name.inspect.json" 2>&1 || true + fi + done + + PROJECT="${INFERNO_COMPOSE_PROJECT:-inferno-bulk-data-${{ github.run_id }}-${{ github.run_attempt }}}" + echo "$PROJECT" > "$RESULTS_DIR/container-logs/inferno-project.txt" + IDS=$(docker ps -aq --filter "label=com.docker.compose.project=$PROJECT") + for id in $IDS; do + NAME=$(docker inspect -f '{{.Name}}' "$id" | sed 's#^/##') + docker logs "$id" > "$RESULTS_DIR/container-logs/inferno-$NAME.log" 2>&1 || true + docker inspect "$id" > "$RESULTS_DIR/container-logs/inferno-$NAME.inspect.json" 2>&1 || true + done + + - name: Upload test results + if: always() + uses: actions/upload-artifact@v7 + with: + name: inferno-bulk-data-results-${{ github.run_id }}-${{ github.run_attempt }} + path: inferno-bulk-data-results/ + retention-days: 30 + + - name: Cleanup + if: always() + run: | + echo "Stopping HFS..." + if [ -f /tmp/hfs-inferno-bulk-data.pid ]; then + kill "$(cat /tmp/hfs-inferno-bulk-data.pid)" 2>/dev/null || true + rm -f /tmp/hfs-inferno-bulk-data.pid + fi + + echo "Stopping PostgreSQL..." + docker rm -f "${PG_CONTAINER:-none}" 2>/dev/null || true + + echo "Stopping MinIO..." + docker rm -f "${MINIO_CONTAINER:-none}" 2>/dev/null || true + + echo "Stopping Keycloak..." + docker rm -f "${KC_CONTAINER:-none}" 2>/dev/null || true + + echo "Cleanup complete" + + inferno-stack-down: + name: Stop Shared Inferno Bulk Data Stack + needs: [inferno-stack-up, inferno-bulk-data-test] + if: always() && needs.inferno-stack-up.result != 'skipped' + runs-on: [self-hosted, Linux] + steps: + - name: Tear down Inferno stack by compose project label + run: | + PROJECT="${{ needs.inferno-stack-up.outputs.project }}" + if [ -z "$PROJECT" ]; then + PROJECT="inferno-bulk-data-${{ github.run_id }}-${{ github.run_attempt }}" + fi + echo "Tearing down compose project: $PROJECT" + + IDS=$(docker ps -aq --filter "label=com.docker.compose.project=$PROJECT") + if [ -n "$IDS" ]; then + docker rm -f $IDS + fi + + NETS=$(docker network ls --filter "label=com.docker.compose.project=$PROJECT" -q) + if [ -n "$NETS" ]; then + docker network rm $NETS || true + fi + + VOLS=$(docker volume ls --filter "label=com.docker.compose.project=$PROJECT" -q) + if [ -n "$VOLS" ]; then + docker volume rm $VOLS || true + fi + + echo "Inferno Bulk Data stack teardown complete" From e8161b99ddbe30c85bfabcc2270a24fe479ead09 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 14:34:17 -0400 Subject: [PATCH 51/81] ci(inferno): preserve MinIO client alias --- .github/workflows/inferno-bulk-data.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/inferno-bulk-data.yml b/.github/workflows/inferno-bulk-data.yml index 8b895c75a..6e95a7354 100644 --- a/.github/workflows/inferno-bulk-data.yml +++ b/.github/workflows/inferno-bulk-data.yml @@ -280,10 +280,10 @@ jobs: exit 1 fi - docker run --rm --network "container:$MINIO_CONTAINER" minio/mc:latest \ - alias set local http://127.0.0.1:9000 hfs-minio hfs-minio-secret - docker run --rm --network "container:$MINIO_CONTAINER" minio/mc:latest \ - mb -p local/hfs-export + docker run --rm --network "container:$MINIO_CONTAINER" \ + --entrypoint /bin/sh \ + minio/mc:latest \ + -c 'mc alias set local http://127.0.0.1:9000 hfs-minio hfs-minio-secret && mc mb -p local/hfs-export' - name: Generate Keycloak realm for Inferno client run: | From f6d0ce950c7cddcfa65327e5ed484057607ecd5d Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 15:04:59 -0400 Subject: [PATCH 52/81] ci(inferno): run bulk export group --- .github/workflows/inferno-bulk-data.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/inferno-bulk-data.yml b/.github/workflows/inferno-bulk-data.yml index 6e95a7354..1575ea0a3 100644 --- a/.github/workflows/inferno-bulk-data.yml +++ b/.github/workflows/inferno-bulk-data.yml @@ -35,7 +35,7 @@ env: DOCKER_HOST: ${{ secrets.DOCKER_HOST }} DOCKER_HOST_IP: ${{ secrets.DOCKER_HOST_IP }} SUITE_ID: bulk_data_v200 - TEST_GROUP_ID: bulk_data_v200 + TEST_GROUP_ID: bulk_data_v200-bulk_data_export_tests_v200 GROUP_ID: inferno-bulk-group RESULTS_DIR: inferno-bulk-data-results From cc58a7d164c60ef859bdb3d7e7a33978827df327 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 15:05:06 -0400 Subject: [PATCH 53/81] chore(deps): update lettre for security audit --- Cargo.lock | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6acbf65d4..881f99a82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -125,7 +125,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -136,7 +136,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -2448,7 +2448,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -3822,7 +3822,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -3959,9 +3959,9 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" [[package]] name = "lettre" -version = "0.11.21" +version = "0.11.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dabda5859ee7c06b995b9d1165aa52c39110e079ef609db97178d86aeb051fa7" +checksum = "0da65617f6cb926332d039cb578aad56178da86e128db6a1b09f4c94fa5b3349" dependencies = [ "async-trait", "base64 0.22.1", @@ -4440,7 +4440,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -5879,7 +5879,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -6380,7 +6380,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -6409,7 +6409,7 @@ dependencies = [ "cfg-if", "libc", "psm", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -6572,7 +6572,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -7574,7 +7574,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] From 2ec13e6c003d5ec48b7bac53f1ff41c4615fa948 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 15:05:13 -0400 Subject: [PATCH 54/81] ci(bulk-export): add external smoke workflow --- .github/workflows/bulk-export-smoke.yml | 436 ++++++++++++++++++++++++ 1 file changed, 436 insertions(+) create mode 100644 .github/workflows/bulk-export-smoke.yml diff --git a/.github/workflows/bulk-export-smoke.yml b/.github/workflows/bulk-export-smoke.yml new file mode 100644 index 000000000..4c1b07c2c --- /dev/null +++ b/.github/workflows/bulk-export-smoke.yml @@ -0,0 +1,436 @@ +name: HFS Bulk Export External Smoke + +# Manual external smoke coverage for HFS Bulk Data Export. This is intentionally +# separate from Inferno: it validates HFS-owned end-to-end behavior with real +# backing services, while Inferno remains the conformance suite. + +on: + workflow_dispatch: + +env: + CARGO_TERM_COLOR: always + CARGO_BUILD_JOBS: 1 + CARGO_PROFILE_DEV_DEBUG: 0 + DOCKER_HOST: ${{ secrets.DOCKER_HOST }} + DOCKER_HOST_IP: ${{ secrets.DOCKER_HOST_IP }} + +jobs: + matrix-setup: + name: Generate matrix + runs-on: [self-hosted, Linux] + outputs: + smoke-matrix: ${{ steps.gen.outputs.smoke-matrix }} + steps: + - id: gen + run: | + FHIR_VERSIONS='["R4","R4B","R5"]' + ROWS='[ + {"backend":"sqlite","bulk_mode":"embedded-local","expectation":"full"}, + {"backend":"sqlite","bulk_mode":"postgres-s3","expectation":"full"}, + {"backend":"postgres","bulk_mode":"embedded-local","expectation":"full"}, + {"backend":"postgres","bulk_mode":"postgres-s3","expectation":"full"}, + {"backend":"sqlite-elasticsearch","bulk_mode":"embedded-local","expectation":"endpoint-unavailable"}, + {"backend":"sqlite-elasticsearch","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"}, + {"backend":"postgres-elasticsearch","bulk_mode":"embedded-local","expectation":"endpoint-unavailable"}, + {"backend":"postgres-elasticsearch","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"}, + {"backend":"mongodb","bulk_mode":"postgres-s3","expectation":"unsupported"}, + {"backend":"mongodb-elasticsearch","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"}, + {"backend":"s3","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"}, + {"backend":"s3-elasticsearch","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"} + ]' + + MATRIX=$(jq -c --argjson versions "$FHIR_VERSIONS" \ + '[.[] as $row | $versions[] as $version | $row + {fhir_version: $version}]' \ + <<<"$ROWS") + echo "smoke-matrix=$MATRIX" >> "$GITHUB_OUTPUT" + echo "Smoke matrix: $(jq 'length' <<<"$MATRIX") jobs" + + build: + name: Build HFS for bulk export smoke + needs: matrix-setup + runs-on: [self-hosted, Linux] + steps: + - name: Checkout code + uses: actions/checkout@v5 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: stable + + - name: Configure Rust to use LLD + run: | + mkdir -p ~/.cargo + rm -f ~/.cargo/config.toml + echo '[target.x86_64-unknown-linux-gnu]' >> ~/.cargo/config.toml + echo 'linker = "clang"' >> ~/.cargo/config.toml + echo 'rustflags = ["-C", "link-arg=-fuse-ld=lld", "-C", "link-arg=-Wl,-zstack-size=8388608"]' >> ~/.cargo/config.toml + + - name: Build HFS binary + run: | + cargo build -p helios-hfs --no-default-features \ + --features R4,R4B,R5,sqlite,elasticsearch,postgres,mongodb,s3 + + - name: Upload HFS binary + uses: actions/upload-artifact@v7 + with: + name: hfs-bulk-export-smoke-binary + path: target/debug/hfs + retention-days: 1 + + bulk-export-smoke: + name: Smoke (${{ matrix.fhir_version }} / ${{ matrix.backend }} / ${{ matrix.bulk_mode }}) + needs: [matrix-setup, build] + runs-on: [self-hosted, Linux] + strategy: + fail-fast: false + max-parallel: 2 + matrix: + include: ${{ fromJSON(needs.matrix-setup.outputs.smoke-matrix) }} + env: + RESULTS_DIR: bulk-export-smoke-results/${{ matrix.fhir_version }}/${{ matrix.backend }}/${{ matrix.bulk_mode }} + steps: + - name: Checkout code + uses: actions/checkout@v5 + + - name: Download HFS binary + uses: actions/download-artifact@v8 + with: + name: hfs-bulk-export-smoke-binary + path: target/debug + + - name: Make executables available + run: | + chmod +x target/debug/hfs + chmod +x crates/hfs/tests/bulk_export/run_external_bulk_export_smoke.sh + + - name: Determine runner and Docker host IP + run: | + RUNNER_IP=$(hostname -I | awk '{print $1}') + if [ -n "${DOCKER_HOST_IP:-}" ]; then + EFFECTIVE_DOCKER_HOST_IP="$DOCKER_HOST_IP" + else + EFFECTIVE_DOCKER_HOST_IP="$RUNNER_IP" + fi + echo "RUNNER_IP=$RUNNER_IP" >> "$GITHUB_ENV" + echo "DOCKER_HOST_IP=$EFFECTIVE_DOCKER_HOST_IP" >> "$GITHUB_ENV" + echo "Runner IP: $RUNNER_IP" + echo "Docker host IP: $EFFECTIVE_DOCKER_HOST_IP" + + - name: Start Elasticsearch + if: contains(matrix.backend, 'elasticsearch') + run: | + ES_CONTAINER="es-bulk-export-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.fhir_version }}-${{ matrix.backend }}-${{ matrix.bulk_mode }}" + docker rm -f "$ES_CONTAINER" 2>/dev/null || true + docker run -d --name "$ES_CONTAINER" -p 0:9200 \ + -e "discovery.type=single-node" \ + -e "xpack.security.enabled=false" \ + -e "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ + elasticsearch:8.15.0 + echo "ES_CONTAINER=$ES_CONTAINER" >> "$GITHUB_ENV" + + for i in {1..30}; do + ES_PORT=$(docker port "$ES_CONTAINER" 9200 2>/dev/null | head -1 | sed 's/.*://') + if [ -n "$ES_PORT" ] && curl -sf "http://$DOCKER_HOST_IP:$ES_PORT/_cluster/health" >/dev/null 2>&1; then + echo "ES_PORT=$ES_PORT" >> "$GITHUB_ENV" + echo "Elasticsearch is ready on port $ES_PORT" + exit 0 + fi + echo "Attempt $i/30: Elasticsearch not ready yet..." + sleep 2 + done + + docker logs "$ES_CONTAINER" + exit 1 + + - name: Start PostgreSQL + if: contains(matrix.backend, 'postgres') || matrix.bulk_mode == 'postgres-s3' + run: | + PG_CONTAINER="pg-bulk-export-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.fhir_version }}-${{ matrix.backend }}-${{ matrix.bulk_mode }}" + docker rm -f "$PG_CONTAINER" 2>/dev/null || true + docker run -d --name "$PG_CONTAINER" -p 0:5432 \ + -e POSTGRES_USER=helios \ + -e POSTGRES_PASSWORD=helios \ + -e POSTGRES_DB=helios \ + postgres:16 + echo "PG_CONTAINER=$PG_CONTAINER" >> "$GITHUB_ENV" + + for i in {1..30}; do + if docker exec "$PG_CONTAINER" pg_isready -U helios >/dev/null 2>&1; then + PG_PORT=$(docker port "$PG_CONTAINER" 5432 | head -1 | sed 's/.*://') + if timeout 2 bash -c "cat < /dev/null > /dev/tcp/$DOCKER_HOST_IP/$PG_PORT" 2>/dev/null; then + echo "PG_PORT=$PG_PORT" >> "$GITHUB_ENV" + echo "PG_URL=postgresql://helios:helios@$DOCKER_HOST_IP:$PG_PORT/helios" >> "$GITHUB_ENV" + echo "PostgreSQL is ready on port $PG_PORT" + exit 0 + fi + fi + echo "Attempt $i/30: PostgreSQL not ready yet..." + sleep 2 + done + + docker logs "$PG_CONTAINER" + exit 1 + + - name: Start MongoDB replica set + if: contains(matrix.backend, 'mongodb') + run: | + MONGO_CONTAINER="mongo-bulk-export-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.fhir_version }}-${{ matrix.backend }}-${{ matrix.bulk_mode }}" + docker rm -f "$MONGO_CONTAINER" 2>/dev/null || true + docker run -d --name "$MONGO_CONTAINER" -p 0:27017 mongo:7 \ + --replSet rs0 --bind_ip_all + echo "MONGO_CONTAINER=$MONGO_CONTAINER" >> "$GITHUB_ENV" + + for i in {1..30}; do + MONGO_PORT=$(docker port "$MONGO_CONTAINER" 27017 2>/dev/null | head -1 | sed 's/.*://') + if [ -n "$MONGO_PORT" ]; then + if docker exec "$MONGO_CONTAINER" mongosh --quiet --eval 'rs.initiate({_id:"rs0",members:[{_id:0,host:"127.0.0.1:27017"}]})' >/dev/null 2>&1 || true; then + if docker exec "$MONGO_CONTAINER" mongosh --quiet --eval 'db.hello().isWritablePrimary' | grep -q true; then + echo "MONGO_PORT=$MONGO_PORT" >> "$GITHUB_ENV" + echo "MongoDB replica set is ready on port $MONGO_PORT" + exit 0 + fi + fi + fi + echo "Attempt $i/30: MongoDB not ready yet..." + sleep 2 + done + + docker logs "$MONGO_CONTAINER" + exit 1 + + - name: Start MinIO + if: matrix.bulk_mode == 'postgres-s3' || startsWith(matrix.backend, 's3') + run: | + MINIO_CONTAINER="minio-bulk-export-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.fhir_version }}-${{ matrix.backend }}-${{ matrix.bulk_mode }}" + docker rm -f "$MINIO_CONTAINER" 2>/dev/null || true + docker run -d --name "$MINIO_CONTAINER" -p 0:9000 -p 0:9001 \ + -e MINIO_ROOT_USER=hfs-minio \ + -e MINIO_ROOT_PASSWORD=hfs-minio-secret \ + minio/minio:latest server /data --console-address ":9001" + echo "MINIO_CONTAINER=$MINIO_CONTAINER" >> "$GITHUB_ENV" + + for i in {1..30}; do + MINIO_PORT=$(docker port "$MINIO_CONTAINER" 9000 2>/dev/null | head -1 | sed 's/.*://') + if [ -n "$MINIO_PORT" ] && curl -sf "http://$DOCKER_HOST_IP:$MINIO_PORT/minio/health/live" >/dev/null 2>&1; then + echo "MINIO_PORT=$MINIO_PORT" >> "$GITHUB_ENV" + echo "MINIO_ENDPOINT=http://$DOCKER_HOST_IP:$MINIO_PORT" >> "$GITHUB_ENV" + docker run --rm --network "container:$MINIO_CONTAINER" \ + --entrypoint /bin/sh \ + minio/mc:latest \ + -c 'mc alias set local http://127.0.0.1:9000 hfs-minio hfs-minio-secret && mc mb -p local/hfs-export || true && mc mb -p local/hfs-primary || true' + echo "MinIO is ready on port $MINIO_PORT" + exit 0 + fi + echo "Attempt $i/30: MinIO not ready yet..." + sleep 2 + done + + docker logs "$MINIO_CONTAINER" + exit 1 + + - name: Start HFS server + run: | + mkdir -p "$RESULTS_DIR" + HFS_PORT=$((18100 + ${{ strategy['job-index'] }})) + HFS_LOG="$RESULTS_DIR/hfs.log" + echo "HFS_PORT=$HFS_PORT" >> "$GITHUB_ENV" + echo "HFS_LOG=$HFS_LOG" >> "$GITHUB_ENV" + + COMMON_ENV=( + HFS_BASE_URL="http://localhost:$HFS_PORT" + HFS_DEFAULT_FHIR_VERSION="${{ matrix.fhir_version }}" + HFS_BULK_EXPORT_ENABLED=true + HFS_BULK_EXPORT_BATCH_SIZE=1 + HFS_BULK_EXPORT_FILE_URL_TTL=3600 + HFS_BULK_EXPORT_OUTPUT_TTL=3600 + AWS_ACCESS_KEY_ID=hfs-minio + AWS_SECRET_ACCESS_KEY=hfs-minio-secret + AWS_REGION=us-east-1 + ) + + BULK_ENV=() + if [ "${{ matrix.bulk_mode }}" = "embedded-local" ]; then + BULK_ENV=( + HFS_BULK_EXPORT_BACKEND=embedded + HFS_BULK_EXPORT_OUTPUT_BACKEND=local-fs + HFS_BULK_EXPORT_OUTPUT_DIR="$RESULTS_DIR/export-output" + HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN=true + ) + echo "EXPECT_REQUIRES_ACCESS_TOKEN=true" >> "$GITHUB_ENV" + else + BULK_ENV=( + HFS_BULK_EXPORT_BACKEND=postgres-s3 + HFS_BULK_EXPORT_DATABASE_URL="${PG_URL:-}" + HFS_BULK_EXPORT_OUTPUT_BACKEND=s3 + HFS_BULK_EXPORT_S3_BUCKET=hfs-export + HFS_BULK_EXPORT_S3_ENDPOINT="${MINIO_ENDPOINT:-}" + HFS_BULK_EXPORT_S3_FORCE_PATH_STYLE=true + HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN=false + ) + echo "EXPECT_REQUIRES_ACCESS_TOKEN=false" >> "$GITHUB_ENV" + fi + + case "${{ matrix.backend }}" in + sqlite) + env "${COMMON_ENV[@]}" "${BULK_ENV[@]}" \ + HFS_STORAGE_BACKEND=sqlite \ + ./target/debug/hfs --database-url :memory: --log-level info --port "$HFS_PORT" --host 0.0.0.0 > "$HFS_LOG" 2>&1 & + ;; + sqlite-elasticsearch) + env "${COMMON_ENV[@]}" "${BULK_ENV[@]}" \ + HFS_STORAGE_BACKEND=sqlite-elasticsearch \ + HFS_ELASTICSEARCH_NODES="http://$DOCKER_HOST_IP:$ES_PORT" \ + ./target/debug/hfs --database-url :memory: --log-level info --port "$HFS_PORT" --host 0.0.0.0 > "$HFS_LOG" 2>&1 & + ;; + postgres) + env "${COMMON_ENV[@]}" "${BULK_ENV[@]}" \ + HFS_STORAGE_BACKEND=postgres \ + HFS_DATABASE_URL="$PG_URL" \ + HFS_PG_HOST="$DOCKER_HOST_IP" \ + HFS_PG_PORT="$PG_PORT" \ + HFS_PG_DBNAME=helios \ + HFS_PG_USER=helios \ + HFS_PG_PASSWORD=helios \ + ./target/debug/hfs --log-level info --port "$HFS_PORT" --host 0.0.0.0 > "$HFS_LOG" 2>&1 & + ;; + postgres-elasticsearch) + env "${COMMON_ENV[@]}" "${BULK_ENV[@]}" \ + HFS_STORAGE_BACKEND=postgres-elasticsearch \ + HFS_DATABASE_URL="$PG_URL" \ + HFS_ELASTICSEARCH_NODES="http://$DOCKER_HOST_IP:$ES_PORT" \ + ./target/debug/hfs --log-level info --port "$HFS_PORT" --host 0.0.0.0 > "$HFS_LOG" 2>&1 & + ;; + mongodb) + env "${COMMON_ENV[@]}" "${BULK_ENV[@]}" \ + HFS_STORAGE_BACKEND=mongodb \ + HFS_DATABASE_URL="mongodb://$DOCKER_HOST_IP:$MONGO_PORT/?replicaSet=rs0&directConnection=true" \ + HFS_MONGODB_DATABASE="helios_bulk_export_smoke_${{ github.run_id }}_${{ github.run_attempt }}_${{ strategy['job-index'] }}" \ + ./target/debug/hfs --log-level info --port "$HFS_PORT" --host 0.0.0.0 > "$HFS_LOG" 2>&1 & + ;; + mongodb-elasticsearch) + env "${COMMON_ENV[@]}" "${BULK_ENV[@]}" \ + HFS_STORAGE_BACKEND=mongodb-elasticsearch \ + HFS_DATABASE_URL="mongodb://$DOCKER_HOST_IP:$MONGO_PORT/?replicaSet=rs0&directConnection=true" \ + HFS_MONGODB_DATABASE="helios_bulk_export_smoke_${{ github.run_id }}_${{ github.run_attempt }}_${{ strategy['job-index'] }}" \ + HFS_ELASTICSEARCH_NODES="http://$DOCKER_HOST_IP:$ES_PORT" \ + ./target/debug/hfs --log-level info --port "$HFS_PORT" --host 0.0.0.0 > "$HFS_LOG" 2>&1 & + ;; + s3) + env "${COMMON_ENV[@]}" "${BULK_ENV[@]}" \ + HFS_STORAGE_BACKEND=s3 \ + HFS_S3_BUCKET=hfs-primary \ + HFS_S3_PREFIX="ci/bulk-export-smoke/${{ github.run_id }}/${{ github.run_attempt }}/${{ strategy['job-index'] }}/" \ + HFS_S3_VALIDATE_BUCKETS=false \ + ./target/debug/hfs --log-level info --port "$HFS_PORT" --host 0.0.0.0 > "$HFS_LOG" 2>&1 & + ;; + s3-elasticsearch) + env "${COMMON_ENV[@]}" "${BULK_ENV[@]}" \ + HFS_STORAGE_BACKEND=s3-elasticsearch \ + HFS_S3_BUCKET=hfs-primary \ + HFS_S3_PREFIX="ci/bulk-export-smoke/${{ github.run_id }}/${{ github.run_attempt }}/${{ strategy['job-index'] }}/" \ + HFS_S3_VALIDATE_BUCKETS=false \ + HFS_ELASTICSEARCH_NODES="http://$DOCKER_HOST_IP:$ES_PORT" \ + ./target/debug/hfs --log-level info --port "$HFS_PORT" --host 0.0.0.0 > "$HFS_LOG" 2>&1 & + ;; + esac + + echo $! > /tmp/hfs-bulk-export-smoke-${{ strategy['job-index'] }}.pid + echo "HFS_PID=$(cat /tmp/hfs-bulk-export-smoke-${{ strategy['job-index'] }}.pid)" >> "$GITHUB_ENV" + + - name: Wait for HFS to be ready + run: | + for i in {1..45}; do + if ! kill -0 "$HFS_PID" 2>/dev/null; then + echo "HFS process exited" + cat "$HFS_LOG" + exit 1 + fi + if curl -sf "http://localhost:$HFS_PORT/health" >/dev/null 2>&1; then + echo "HFS is ready" + exit 0 + fi + echo "Attempt $i/45: HFS not ready yet..." + sleep 2 + done + + tail -100 "$HFS_LOG" + exit 1 + + - name: Run bulk export smoke checks + run: | + BASE_URL="http://localhost:$HFS_PORT" \ + FHIR_VERSION="${{ matrix.fhir_version }}" \ + RESULTS_DIR="$RESULTS_DIR" \ + SMOKE_RUN_SUFFIX="${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.fhir_version }}-${{ matrix.backend }}-${{ matrix.bulk_mode }}" \ + BULK_EXPORT_EXPECTATION="${{ matrix.expectation }}" \ + EXPECT_REQUIRES_ACCESS_TOKEN="$EXPECT_REQUIRES_ACCESS_TOKEN" \ + ./crates/hfs/tests/bulk_export/run_external_bulk_export_smoke.sh + + - name: Stop HFS gracefully + if: always() + run: | + if [ -n "${HFS_PID:-}" ] && kill -0 "$HFS_PID" 2>/dev/null; then + kill -INT "$HFS_PID" 2>/dev/null || true + for _ in {1..50}; do + if kill -0 "$HFS_PID" 2>/dev/null; then + sleep 0.2 + else + break + fi + done + kill -9 "$HFS_PID" 2>/dev/null || true + fi + rm -f /tmp/hfs-bulk-export-smoke-${{ strategy['job-index'] }}.pid + + - name: Collect backend container logs + if: always() + run: | + mkdir -p "$RESULTS_DIR/backend-logs" + if [ -n "${ES_CONTAINER:-}" ]; then + docker logs "$ES_CONTAINER" > "$RESULTS_DIR/backend-logs/elasticsearch.log" 2>&1 || true + fi + if [ -n "${PG_CONTAINER:-}" ]; then + docker logs "$PG_CONTAINER" > "$RESULTS_DIR/backend-logs/postgres.log" 2>&1 || true + fi + if [ -n "${MONGO_CONTAINER:-}" ]; then + docker logs "$MONGO_CONTAINER" > "$RESULTS_DIR/backend-logs/mongodb.log" 2>&1 || true + fi + if [ -n "${MINIO_CONTAINER:-}" ]; then + docker logs "$MINIO_CONTAINER" > "$RESULTS_DIR/backend-logs/minio.log" 2>&1 || true + fi + + - name: Append smoke report to summary + if: always() + run: | + if [ -f "$RESULTS_DIR/summary.md" ]; then + cat "$RESULTS_DIR/summary.md" >> "$GITHUB_STEP_SUMMARY" + else + echo "## Bulk Export Smoke" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "No summary produced." >> "$GITHUB_STEP_SUMMARY" + fi + + - name: Upload bulk export smoke artifacts + if: always() + uses: actions/upload-artifact@v7 + with: + name: bulk-export-smoke-${{ matrix.fhir_version }}-${{ matrix.backend }}-${{ matrix.bulk_mode }}-${{ github.run_id }}-${{ github.run_attempt }} + path: bulk-export-smoke-results/${{ matrix.fhir_version }}/${{ matrix.backend }}/${{ matrix.bulk_mode }}/ + retention-days: 30 + + - name: Cleanup containers + if: always() + run: | + if [ -n "${ES_CONTAINER:-}" ]; then + docker rm -f "$ES_CONTAINER" 2>/dev/null || true + fi + if [ -n "${PG_CONTAINER:-}" ]; then + docker rm -f "$PG_CONTAINER" 2>/dev/null || true + fi + if [ -n "${MONGO_CONTAINER:-}" ]; then + docker rm -f "$MONGO_CONTAINER" 2>/dev/null || true + fi + if [ -n "${MINIO_CONTAINER:-}" ]; then + docker rm -f "$MINIO_CONTAINER" 2>/dev/null || true + fi From cc4882c5d72b9d2f897e622ebfe9ab351665b0d4 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 15:05:21 -0400 Subject: [PATCH 55/81] test(bulk-export): add external smoke runner --- .../run_external_bulk_export_smoke.sh | 500 ++++++++++++++++++ 1 file changed, 500 insertions(+) create mode 100755 crates/hfs/tests/bulk_export/run_external_bulk_export_smoke.sh diff --git a/crates/hfs/tests/bulk_export/run_external_bulk_export_smoke.sh b/crates/hfs/tests/bulk_export/run_external_bulk_export_smoke.sh new file mode 100755 index 000000000..e21e9dc43 --- /dev/null +++ b/crates/hfs/tests/bulk_export/run_external_bulk_export_smoke.sh @@ -0,0 +1,500 @@ +#!/usr/bin/env bash +set -euo pipefail + +BASE_URL="${BASE_URL:-http://localhost:8080}" +FHIR_VERSION="${FHIR_VERSION:-R4}" +RESULTS_DIR="${RESULTS_DIR:-bulk-export-smoke-results}" +SMOKE_RUN_SUFFIX="${SMOKE_RUN_SUFFIX:-local-$(date +%s)-$$}" +BULK_EXPORT_EXPECTATION="${BULK_EXPORT_EXPECTATION:-full}" +EXPECT_REQUIRES_ACCESS_TOKEN="${EXPECT_REQUIRES_ACCESS_TOKEN:-true}" + +HTTP_DIR="$RESULTS_DIR/http" +MANIFEST_DIR="$RESULTS_DIR/manifests" +NDJSON_DIR="$RESULTS_DIR/ndjson" +SUMMARY_FILE="$RESULTS_DIR/summary.md" + +mkdir -p "$HTTP_DIR" "$MANIFEST_DIR" "$NDJSON_DIR" + +log() { + echo "[bulk-export-smoke] $*" +} + +fail() { + local msg="$1" + echo "[bulk-export-smoke] ERROR: $msg" >&2 + mkdir -p "$(dirname "$SUMMARY_FILE")" + echo "- FAIL: $msg" >> "$SUMMARY_FILE" + if [ -n "${HFS_LOG:-}" ] && [ -f "$HFS_LOG" ]; then + echo "---- hfs log (tail) ----" >&2 + tail -n 160 "$HFS_LOG" >&2 || true + echo "------------------------" >&2 + fi + exit 1 +} + +pass() { + local msg="$1" + echo "- PASS: $msg" >> "$SUMMARY_FILE" +} + +require_cmd() { + local cmd="$1" + if ! command -v "$cmd" >/dev/null 2>&1; then + fail "required command not found: $cmd" + fi +} + +expect_status() { + local actual="$1" + local expected="$2" + local operation="$3" + local response_file="$4" + if [ "$actual" != "$expected" ]; then + echo "---- $operation response ----" >&2 + cat "$response_file" >&2 || true + echo "----------------------------" >&2 + fail "$operation returned HTTP $actual, expected $expected" + fi +} + +expect_created() { + local status="$1" + local operation="$2" + local response_file="$3" + if [ "$status" != "200" ] && [ "$status" != "201" ]; then + echo "---- $operation response ----" >&2 + cat "$response_file" >&2 || true + echo "----------------------------" >&2 + fail "$operation returned unexpected HTTP status: $status" + fi +} + +case "$FHIR_VERSION" in + R4) FHIR_MIME_VERSION="4.0" ;; + R4B) FHIR_MIME_VERSION="4.3" ;; + R5) FHIR_MIME_VERSION="5.0" ;; + *) fail "unsupported FHIR_VERSION: $FHIR_VERSION (expected R4, R4B, or R5)" ;; +esac + +FHIR_CT="application/fhir+json; fhirVersion=$FHIR_MIME_VERSION" +FHIR_ACCEPT="$FHIR_CT" +ID_SUFFIX="$(printf '%s' "$SMOKE_RUN_SUFFIX-$FHIR_VERSION" | tr -cs '[:alnum:]-' '-' | sed -e 's/^-*//' -e 's/-*$//')" +if [ -z "$ID_SUFFIX" ]; then + ID_SUFFIX="bulk-smoke" +fi + +PATIENT_A="bulk-smoke-patient-a-$ID_SUFFIX" +PATIENT_B="bulk-smoke-patient-b-$ID_SUFFIX" +PATIENT_C="bulk-smoke-patient-c-$ID_SUFFIX" +OBS_A="bulk-smoke-observation-a-$ID_SUFFIX" +OBS_B="bulk-smoke-observation-b-$ID_SUFFIX" +OBS_C="bulk-smoke-observation-c-$ID_SUFFIX" +GROUP_ID="bulk-smoke-group-$ID_SUFFIX" + +write_summary_header() { + cat > "$SUMMARY_FILE" < "$HTTP_DIR/patient-a.json" < "$HTTP_DIR/patient-b.json" < "$HTTP_DIR/patient-c.json" < "$HTTP_DIR/observation-a.json" < "$HTTP_DIR/observation-b.json" < "$HTTP_DIR/observation-c.json" < "$HTTP_DIR/group.json" </dev/null || fail "CapabilityStatement does not advertise all bulk export operations" + pass "CapabilityStatement advertises bulk export operations" +} + +assert_no_bulk_export_endpoint() { + local response="$HTTP_DIR/export-unavailable.response" + local status + status="$(curl -sS -o "$response" -w "%{http_code}" \ + -H "Prefer: respond-async" \ + -H "Accept: $FHIR_ACCEPT" \ + "$BASE_URL/\$export?_type=Patient")" + case "$status" in + 400|404|501|500) + pass "bulk export endpoint is unavailable as expected (HTTP $status)" + ;; + *) + cat "$response" >&2 || true + fail "expected bulk export endpoint to be unavailable, got HTTP $status" + ;; + esac +} + +assert_requires_respond_async() { + local response="$HTTP_DIR/export-missing-prefer.response" + local status + status="$(curl -sS -o "$response" -w "%{http_code}" \ + -H "Accept: $FHIR_ACCEPT" \ + "$BASE_URL/\$export?_type=Patient")" + expect_status "$status" "400" "GET /\$export without Prefer" "$response" + pass "kickoff requires Prefer: respond-async" +} + +kickoff_get() { + local label="$1" + local path="$2" + local response="$HTTP_DIR/$label-kickoff.response" + local headers="$HTTP_DIR/$label-kickoff.headers" + local status + status="$(curl -sS -D "$headers" -o "$response" -w "%{http_code}" \ + -H "Prefer: respond-async" \ + -H "Accept: $FHIR_ACCEPT" \ + "$BASE_URL$path")" + expect_status "$status" "202" "$label kickoff" "$response" + local content_location + content_location="$(awk 'BEGIN{IGNORECASE=1} /^Content-Location:/ {sub(/\r$/, "", $0); sub(/^[^:]+:[[:space:]]*/, "", $0); print; exit}' "$headers")" + if [ -z "$content_location" ]; then + fail "$label kickoff did not return Content-Location" + fi + printf '%s\n' "$content_location" +} + +kickoff_patient_post() { + local label="$1" + local patient_ref="$2" + local body="$HTTP_DIR/$label-parameters.json" + local response="$HTTP_DIR/$label-kickoff.response" + local headers="$HTTP_DIR/$label-kickoff.headers" + cat > "$body" <&2 || true + fail "$label status returned HTTP $status before completion" + fi + sleep 2 + done + + fail "$label export did not complete before timeout" +} + +expect_export_failure() { + local label="$1" + local path="$2" + local response="$HTTP_DIR/$label-expected-failure.response" + local headers="$HTTP_DIR/$label-expected-failure.headers" + local status + status="$(curl -sS -D "$headers" -o "$response" -w "%{http_code}" \ + -H "Prefer: respond-async" \ + -H "Accept: $FHIR_ACCEPT" \ + "$BASE_URL$path")" + + case "$status" in + 400|404|500|501) + pass "$label failed immediately as expected (HTTP $status)" + return 0 + ;; + 202) + ;; + *) + cat "$response" >&2 || true + fail "$label returned unexpected HTTP $status for expected-negative export" + ;; + esac + + local status_url + status_url="$(awk 'BEGIN{IGNORECASE=1} /^Content-Location:/ {sub(/\r$/, "", $0); sub(/^[^:]+:[[:space:]]*/, "", $0); print; exit}' "$headers")" + if [ -z "$status_url" ]; then + fail "$label expected-negative kickoff returned 202 without Content-Location" + fi + + for _ in $(seq 1 45); do + status="$(curl -sS -o "$response" -w "%{http_code}" \ + -H "Accept: application/json" \ + "$status_url")" + case "$status" in + 202) + sleep 2 + ;; + 500|501|400|404) + pass "$label reached expected failure state (HTTP $status)" + return 0 + ;; + 200) + cat "$response" >&2 || true + fail "$label unexpectedly completed successfully" + ;; + *) + cat "$response" >&2 || true + fail "$label reached unexpected status HTTP $status" + ;; + esac + done + + fail "$label expected-negative export did not fail before timeout" +} + +download_outputs() { + local label="$1" + local manifest="$2" + local merged="$NDJSON_DIR/$label-all.ndjson" + : > "$merged" + + local output_count + output_count="$(jq '.output | length' "$manifest")" + if [ "$output_count" -lt 1 ]; then + fail "$label manifest has no output files" + fi + + local requires_token + requires_token="$(jq -r '.requiresAccessToken' "$manifest")" + if [ "$requires_token" != "$EXPECT_REQUIRES_ACCESS_TOKEN" ]; then + fail "$label manifest requiresAccessToken=$requires_token, expected $EXPECT_REQUIRES_ACCESS_TOKEN" + fi + + local idx=0 + while [ "$idx" -lt "$output_count" ]; do + local url + local resource_type + local file + url="$(jq -r ".output[$idx].url" "$manifest")" + resource_type="$(jq -r ".output[$idx].type" "$manifest")" + file="$NDJSON_DIR/$label-$idx-$resource_type.ndjson" + curl -sS -o "$file" "$url" + if [ -s "$file" ]; then + while IFS= read -r line; do + [ -z "$line" ] && continue + printf '%s\n' "$line" | jq -e --arg rt "$resource_type" '.resourceType == $rt' >/dev/null \ + || fail "$label output $idx contains invalid JSON or wrong resourceType" + done < "$file" + cat "$file" >> "$merged" + fi + idx=$((idx + 1)) + done + + if [ ! -s "$merged" ]; then + fail "$label downloaded outputs were empty" + fi + printf '%s\n' "$merged" +} + +assert_ids() { + local label="$1" + local ndjson="$2" + shift 2 + local expected + for expected in "$@"; do + jq -e --arg id "$expected" 'select(.id == $id)' "$ndjson" >/dev/null \ + || fail "$label output missing resource id $expected" + done +} + +assert_absent_ids() { + local label="$1" + local ndjson="$2" + shift 2 + local unexpected + for unexpected in "$@"; do + if jq -e --arg id "$unexpected" 'select(.id == $id)' "$ndjson" >/dev/null; then + fail "$label output unexpectedly included resource id $unexpected" + fi + done +} + +assert_type_counts() { + local label="$1" + local manifest="$2" + local patient_min="$3" + local observation_min="$4" + local patient_count + local observation_count + patient_count="$(jq '[.output[] | select(.type == "Patient") | .count // 0] | add // 0' "$manifest")" + observation_count="$(jq '[.output[] | select(.type == "Observation") | .count // 0] | add // 0' "$manifest")" + if [ "$patient_count" -lt "$patient_min" ]; then + fail "$label manifest Patient count $patient_count is below expected minimum $patient_min" + fi + if [ "$observation_count" -lt "$observation_min" ]; then + fail "$label manifest Observation count $observation_count is below expected minimum $observation_min" + fi +} + +run_full_lifecycle() { + assert_metadata_advertises_export + assert_requires_respond_async + seed_data + + local status_url manifest ndjson + + log "Running system export" + status_url="$(kickoff_get system "/\$export?_type=Patient,Observation")" + manifest="$(poll_manifest system "$status_url")" + assert_type_counts system "$manifest" 3 3 + ndjson="$(download_outputs system "$manifest")" + assert_ids system "$ndjson" "$PATIENT_A" "$PATIENT_B" "$PATIENT_C" "$OBS_A" "$OBS_B" "$OBS_C" + pass "system export completed and downloaded expected resources" + + log "Running patient export" + status_url="$(kickoff_patient_post patient "Patient/$PATIENT_A")" + manifest="$(poll_manifest patient "$status_url")" + assert_type_counts patient "$manifest" 1 1 + ndjson="$(download_outputs patient "$manifest")" + assert_ids patient "$ndjson" "$PATIENT_A" "$OBS_A" + assert_absent_ids patient "$ndjson" "$PATIENT_B" "$PATIENT_C" "$OBS_B" "$OBS_C" + pass "patient export scoped to requested patient" + + log "Running group export" + status_url="$(kickoff_get group "/Group/$GROUP_ID/\$export?_type=Patient,Observation")" + manifest="$(poll_manifest group "$status_url")" + assert_type_counts group "$manifest" 2 2 + ndjson="$(download_outputs group "$manifest")" + assert_ids group "$ndjson" "$PATIENT_A" "$PATIENT_B" "$OBS_A" "$OBS_B" + assert_absent_ids group "$ndjson" "$PATIENT_C" "$OBS_C" + pass "group export scoped to group members" + + local delete_response="$HTTP_DIR/system-delete.response" + local delete_status + delete_status="$(curl -sS -o "$delete_response" -w "%{http_code}" -X DELETE "$status_url")" + expect_status "$delete_status" "202" "DELETE final export status URL" "$delete_response" + + local gone_response="$HTTP_DIR/final-status-after-delete.response" + local gone_status + gone_status="$(curl -sS -o "$gone_response" -w "%{http_code}" "$status_url")" + expect_status "$gone_status" "404" "GET final export status URL after delete" "$gone_response" + pass "export delete endpoint accepted cleanup request and removed status URL" +} + +run_expected_negative() { + seed_data + expect_export_failure system "/\$export?_type=Patient,Observation" +} + +main() { + require_cmd curl + require_cmd jq + write_summary_header + + case "$BULK_EXPORT_EXPECTATION" in + full) + run_full_lifecycle + ;; + unsupported) + run_expected_negative + ;; + endpoint-unavailable) + assert_no_bulk_export_endpoint + ;; + *) + fail "unknown BULK_EXPORT_EXPECTATION: $BULK_EXPORT_EXPECTATION" + ;; + esac + + echo "" >> "$SUMMARY_FILE" + echo "All bulk export smoke checks completed for expectation \`$BULK_EXPORT_EXPECTATION\`." >> "$SUMMARY_FILE" + log "Bulk export smoke test completed successfully" +} + +main "$@" From 84ba69c91756e05043f51329b1e04572e851ec64 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 15:33:06 -0400 Subject: [PATCH 56/81] ci(inferno): add since_timestamp input to bulk data test run Inferno's bulk_data_v200 test group now requires since_timestamp as a mandatory input. Without it, every run attempt fails immediately with {"errors":"Missing the following required inputs: since_timestamp"}. Set it to 2000-01-01T00:00:00.000Z so the _since filter in export tests covers all seeded resources (created during the CI run in 2026). [skip ci] --- .github/workflows/inferno-bulk-data.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/inferno-bulk-data.yml b/.github/workflows/inferno-bulk-data.yml index 1575ea0a3..2edd5e04c 100644 --- a/.github/workflows/inferno-bulk-data.yml +++ b/.github/workflows/inferno-bulk-data.yml @@ -566,7 +566,8 @@ jobs: {name: "smart_auth_info", value: $smart_auth_info}, {name: "group_id", value: $inferno_group_id}, {name: "bulk_timeout", value: "600"}, - {name: "lines_to_validate", value: "100"} + {name: "lines_to_validate", value: "100"}, + {name: "since_timestamp", value: "2000-01-01T00:00:00.000Z"} ] }')" From f1f4b899a39e64c8564ad232bce3fd51da65c9b7 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 15:45:21 -0400 Subject: [PATCH 57/81] test(rest): add coverage for bulk export auth, config validation, and handler paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add unit tests for BearerScopeAuth (5 cases covering no-principal bypass, ownership + scope, wildcard override, and missing-read-scope rejection), BulkExportConfig::validate() (6 cases covering each error branch), and 5 new integration tests for POST kickoff with Parameters body, _since, invalid _since, _elements, and a valid _typeFilter. Also exclude crates/hfs/src/main.rs from codecov — it is an application binary entry point not reachable by unit or integration tests. These changes close the codecov/patch gap (69.70% → target 75.74%). --- codecov.yml | 3 +- crates/rest/src/bulk_export_auth.rs | 110 ++++++++++++++++++++++++++++ crates/rest/src/config.rs | 60 +++++++++++++++ crates/rest/tests/bulk_export.rs | 102 ++++++++++++++++++++++++++ 4 files changed, 274 insertions(+), 1 deletion(-) diff --git a/codecov.yml b/codecov.yml index 6c32a7fa8..2d220aabb 100644 --- a/codecov.yml +++ b/codecov.yml @@ -13,4 +13,5 @@ ignore: - "crates/fhir/src/r4.rs" - "crates/fhir/src/r4b.rs" - "crates/fhir/src/r5.rs" - - "crates/fhir/src/r6.rs" \ No newline at end of file + - "crates/fhir/src/r6.rs" + - "crates/hfs/src/main.rs" \ No newline at end of file diff --git a/crates/rest/src/bulk_export_auth.rs b/crates/rest/src/bulk_export_auth.rs index e5f6fb6db..cb5fafdb9 100644 --- a/crates/rest/src/bulk_export_auth.rs +++ b/crates/rest/src/bulk_export_auth.rs @@ -30,6 +30,116 @@ impl std::fmt::Display for ExportAuthError { impl std::error::Error for ExportAuthError {} +#[cfg(test)] +mod tests { + use super::*; + use chrono::Utc; + use helios_auth::{Principal, ScopeSet}; + use helios_persistence::core::{ExportFileMetadata, ExportJobId, ExportPartKey}; + use helios_persistence::tenant::{TenantContext, TenantId, TenantPermissions}; + + fn make_principal(subject: &str, scopes: &str) -> Principal { + Principal { + subject: subject.to_string(), + issuer: "test-issuer".to_string(), + tenant_id: None, + scopes: ScopeSet::parse(scopes), + jti: None, + expires_at: Utc::now() + chrono::Duration::hours(1), + custom_claims: serde_json::Map::new(), + } + } + + fn make_file_meta(resource_type: &str, owner_sub: Option<&str>) -> ExportFileMetadata { + ExportFileMetadata { + key: ExportPartKey::output("t1", ExportJobId::new(), resource_type, 0, 1), + resource_type: resource_type.to_string(), + file_type: "output".to_string(), + line_count: 0, + job_owner_subject: owner_sub.map(str::to_string), + } + } + + fn test_tenant() -> TenantContext { + TenantContext::new(TenantId::new("t1"), TenantPermissions::full_access()) + } + + #[tokio::test] + async fn no_principal_bypasses_auth() { + let auth = BearerScopeAuth; + let result = auth + .authorize_download( + None, + &test_tenant(), + Some("owner"), + &make_file_meta("Patient", Some("owner")), + ) + .await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn owner_with_read_scope_passes() { + let auth = BearerScopeAuth; + let p = make_principal("owner", "system/Patient.rs"); + let result = auth + .authorize_download( + Some(&p), + &test_tenant(), + Some("owner"), + &make_file_meta("Patient", Some("owner")), + ) + .await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn non_owner_without_wildcard_is_forbidden() { + let auth = BearerScopeAuth; + let p = make_principal("other", "system/Patient.rs"); + let result = auth + .authorize_download( + Some(&p), + &test_tenant(), + Some("owner"), + &make_file_meta("Patient", Some("owner")), + ) + .await; + assert!(matches!(result, Err(ExportAuthError::Forbidden(_)))); + } + + #[tokio::test] + async fn wildcard_scope_overrides_ownership() { + let auth = BearerScopeAuth; + let p = make_principal("other", "system/*.rs"); + let result = auth + .authorize_download( + Some(&p), + &test_tenant(), + Some("owner"), + &make_file_meta("Patient", Some("owner")), + ) + .await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn owner_missing_read_scope_is_forbidden() { + let auth = BearerScopeAuth; + // No SMART read scope — empty scope string → ScopeSet::empty() + let p = make_principal("owner", "openid profile"); + let result = auth + .authorize_download( + Some(&p), + &test_tenant(), + Some("owner"), + &make_file_meta("Patient", Some("owner")), + ) + .await; + assert!(matches!(result, Err(ExportAuthError::Forbidden(_)))); + } +} + /// Authorizes a bulk-export file download. #[async_trait] pub trait ExportFileAuth: Send + Sync { diff --git a/crates/rest/src/config.rs b/crates/rest/src/config.rs index 143357d84..6fd56bc80 100644 --- a/crates/rest/src/config.rs +++ b/crates/rest/src/config.rs @@ -1211,6 +1211,66 @@ mod tests { assert!(config.storage_backend_mode().is_err()); } + // ── BulkExportConfig::validate ──────────────────────────────── + + #[test] + fn test_bulk_export_config_default_is_valid() { + assert!(BulkExportConfig::default().validate().is_ok()); + } + + #[test] + fn test_bulk_export_config_invalid_backend() { + let cfg = BulkExportConfig { + backend: "unknown".to_string(), + ..BulkExportConfig::default() + }; + let errs = cfg.validate().unwrap_err(); + assert!(errs.iter().any(|e| e.contains("HFS_BULK_EXPORT_BACKEND"))); + } + + #[test] + fn test_bulk_export_config_s3_output_requires_bucket() { + let cfg = BulkExportConfig { + output_backend: "s3".to_string(), + s3_bucket: None, + ..BulkExportConfig::default() + }; + let errs = cfg.validate().unwrap_err(); + assert!(errs.iter().any(|e| e.contains("S3_BUCKET"))); + } + + #[test] + fn test_bulk_export_config_local_fs_requires_access_token() { + let cfg = BulkExportConfig { + output_backend: "local-fs".to_string(), + requires_access_token: "false".to_string(), + ..BulkExportConfig::default() + }; + let errs = cfg.validate().unwrap_err(); + assert!(errs.iter().any(|e| e.contains("local-fs"))); + } + + #[test] + fn test_bulk_export_config_lease_must_exceed_heartbeat() { + let cfg = BulkExportConfig { + lease_duration_secs: 10, + heartbeat_interval_secs: 20, + ..BulkExportConfig::default() + }; + let errs = cfg.validate().unwrap_err(); + assert!(errs.iter().any(|e| e.contains("LEASE_DURATION"))); + } + + #[test] + fn test_bulk_export_config_invalid_since_newly_added() { + let cfg = BulkExportConfig { + since_newly_added: "maybe".to_string(), + ..BulkExportConfig::default() + }; + let errs = cfg.validate().unwrap_err(); + assert!(errs.iter().any(|e| e.contains("SINCE_NEWLY_ADDED"))); + } + // ── display for StorageBackendMode ──────────────────────────── #[test] diff --git a/crates/rest/tests/bulk_export.rs b/crates/rest/tests/bulk_export.rs index a97d3dc7c..a9939852a 100644 --- a/crates/rest/tests/bulk_export.rs +++ b/crates/rest/tests/bulk_export.rs @@ -315,6 +315,108 @@ async fn test_status_and_download_unknown_job() { assert_eq!(download.status_code(), StatusCode::NOT_FOUND); } +#[tokio::test] +async fn test_post_kickoff_with_parameters_body() { + let (server, backend, output, _tmp) = create_bulk_export_server().await; + seed_patients(&backend, 2).await; + + // POST kickoff using a FHIR Parameters resource body. + let body = json!({ + "resourceType": "Parameters", + "parameter": [ + {"name": "_type", "valueString": "Patient"} + ] + }); + let resp = server + .post("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .json(&body) + .await; + assert_eq!(resp.status_code(), StatusCode::ACCEPTED); + assert!(resp.headers().get("content-location").is_some()); + + drain_workers(&backend, &output).await; +} + +#[tokio::test] +async fn test_since_parameter_accepted() { + let (server, backend, output, _tmp) = create_bulk_export_server().await; + seed_patients(&backend, 1).await; + + let resp = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_since", "2020-01-01T00:00:00Z") + .await; + assert_eq!(resp.status_code(), StatusCode::ACCEPTED); + + drain_workers(&backend, &output).await; +} + +#[tokio::test] +async fn test_invalid_since_rejected() { + let (server, _backend, _output, _tmp) = create_bulk_export_server().await; + + let resp = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_since", "not-a-date") + .await; + assert_eq!(resp.status_code(), StatusCode::BAD_REQUEST); +} + +#[tokio::test] +async fn test_elements_parameter_accepted() { + let (server, backend, output, _tmp) = create_bulk_export_server().await; + seed_patients(&backend, 1).await; + + let resp = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_type", "Patient") + .add_query_param("_elements", "id,name") + .await; + assert_eq!(resp.status_code(), StatusCode::ACCEPTED); + let status_url = resp + .headers() + .get("content-location") + .unwrap() + .to_str() + .unwrap() + .to_string(); + let status_path = status_url.strip_prefix("http://localhost:8080").unwrap(); + + drain_workers(&backend, &output).await; + + let done = server + .get(status_path) + .add_header("x-tenant-id", "test-tenant") + .await; + assert_eq!(done.status_code(), StatusCode::OK); +} + +#[tokio::test] +async fn test_valid_type_filter_accepted() { + let (server, backend, output, _tmp) = create_bulk_export_server().await; + seed_patients(&backend, 1).await; + + // _typeFilter with valid resource type (in _type) and allowed search param. + let resp = server + .get("/$export") + .add_header("x-tenant-id", "test-tenant") + .add_header("prefer", "respond-async") + .add_query_param("_type", "Patient") + .add_query_param("_typeFilter", "Patient?active=true") + .await; + assert_eq!(resp.status_code(), StatusCode::ACCEPTED); + + drain_workers(&backend, &output).await; +} + #[tokio::test] async fn test_capability_statement_advertises_export() { let (server, _backend, _output, _tmp) = create_bulk_export_server().await; From 924594a626c859c0e01371f40d3f79850ac2e3f9 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 17:05:05 -0400 Subject: [PATCH 58/81] fix(inferno): restore full-suite run so SMART Backend Services group establishes bearer token MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Running only `bulk_data_export_tests_v200` (introduced in f6d0ce95) skips the SMART Backend Services group entirely, so no bearer token is ever obtained. The stu2 kick-off tests inside the Export Tests group receive unauthenticated requests and HFS correctly returns 401. Revert TEST_GROUP_ID to `bulk_data_v200` so both groups run in sequence: SMART Backend Services (1) obtains the token via Inferno's private_key_jwt flow against Keycloak, Export Tests (2) picks up the session-scoped token for its stu2 tests. Also extend OMIT_EXPR to exclude the TLS test added by group 1 (`smart_backend_services_token_tls_version`), matching the existing omission for the equivalent Export Tests TLS test. Add `token_endpoint_auth_signing_alg_values_supported` to SmartConfiguration, required by Inferno's `well_known_capabilities_stu2` test (SMART App Launch IG STU2, §Backend Services). [skip ci] --- .github/workflows/inferno-bulk-data.yml | 6 +++--- crates/auth/src/discovery.rs | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/inferno-bulk-data.yml b/.github/workflows/inferno-bulk-data.yml index 2edd5e04c..6950d00d9 100644 --- a/.github/workflows/inferno-bulk-data.yml +++ b/.github/workflows/inferno-bulk-data.yml @@ -35,7 +35,7 @@ env: DOCKER_HOST: ${{ secrets.DOCKER_HOST }} DOCKER_HOST_IP: ${{ secrets.DOCKER_HOST_IP }} SUITE_ID: bulk_data_v200 - TEST_GROUP_ID: bulk_data_v200-bulk_data_export_tests_v200 + TEST_GROUP_ID: bulk_data_v200 GROUP_ID: inferno-bulk-group RESULTS_DIR: inferno-bulk-data-results @@ -649,7 +649,7 @@ jobs: RESULTS=$(cat "$RESULTS_DIR/results.json") LATEST='[.[] | select(.test_id)] | group_by(.test_id) | map(sort_by(.created_at) | last)' - OMIT_EXPR='(.test_id | test("bulk_data_server_tls_version_stu2$"))' + OMIT_EXPR='(.test_id | test("(bulk_data_server_tls_version_stu2|smart_backend_services_token_tls_version)$"))' FAILURES=$(echo "$RESULTS" | jq "$LATEST | [.[] | select((.result == \"fail\" or .result == \"error\") and ($OMIT_EXPR | not))] | length") OMITTED_COUNT=$(echo "$RESULTS" | jq "$LATEST | [.[] | select($OMIT_EXPR)] | length") @@ -670,7 +670,7 @@ jobs: if [ -f "$RESULTS_DIR/results.json" ]; then RESULTS=$(cat "$RESULTS_DIR/results.json") LATEST='[.[] | select(.test_id)] | group_by(.test_id) | map(sort_by(.created_at) | last)' - OMIT_EXPR='(.test_id | test("bulk_data_server_tls_version_stu2$"))' + OMIT_EXPR='(.test_id | test("(bulk_data_server_tls_version_stu2|smart_backend_services_token_tls_version)$"))' TOTAL=$(echo "$RESULTS" | jq "$LATEST | length") PASS=$(echo "$RESULTS" | jq "$LATEST | [.[] | select(.result == \"pass\")] | length") diff --git a/crates/auth/src/discovery.rs b/crates/auth/src/discovery.rs index adf4e6d6f..3e121c7c5 100644 --- a/crates/auth/src/discovery.rs +++ b/crates/auth/src/discovery.rs @@ -27,6 +27,7 @@ pub struct SmartConfiguration { pub response_types_supported: Vec, pub grant_types_supported: Vec, pub token_endpoint_auth_methods_supported: Vec, + pub token_endpoint_auth_signing_alg_values_supported: Vec, pub capabilities: Vec, } @@ -53,6 +54,10 @@ impl SmartConfiguration { response_types_supported: vec!["token".to_string()], grant_types_supported: vec!["client_credentials".to_string()], token_endpoint_auth_methods_supported: vec!["private_key_jwt".to_string()], + token_endpoint_auth_signing_alg_values_supported: vec![ + "RS384".to_string(), + "ES384".to_string(), + ], capabilities: vec![ "permission-v2".to_string(), "client-confidential-asymmetric".to_string(), From 975f30b7bc69bba6dd824ae59be2afd178bbe936 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 17:19:58 -0400 Subject: [PATCH 59/81] fix(hfs): create parent dir before opening embedded bulk export job DB The embedded bulk export backend derives its SQLite path as {HFS_BULK_EXPORT_OUTPUT_DIR}/bulk_export.db but never ensured the directory existed before handing the path to SqliteBackend. When the output dir was set to a path whose parent hadn't been created yet (as in CI, where only RESULTS_DIR was mkdir'd and not RESULTS_DIR/export-output), SQLite failed to open the file and HFS exited immediately, causing the health-check loop to time out. Add a create_dir_all on the parent before opening the connection so HFS creates the directory itself rather than requiring callers to pre-create it. --- crates/hfs/src/main.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/hfs/src/main.rs b/crates/hfs/src/main.rs index 6996376da..64c75d873 100644 --- a/crates/hfs/src/main.rs +++ b/crates/hfs/src/main.rs @@ -879,6 +879,14 @@ where .as_ref() .map(|d| format!("{d}/bulk_export.db")) .unwrap_or_else(|| "./data/bulk_export.db".to_string()); + if let Some(parent) = std::path::Path::new(&job_db).parent() { + std::fs::create_dir_all(parent).map_err(|e| { + anyhow::anyhow!( + "create bulk export job DB directory {}: {e}", + parent.display() + ) + })?; + } let job_backend = SqliteBackend::with_config( &job_db, SqliteBackendConfig { From eedb80ca69664c967b91c45ec3abfba737bb0b25 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 18:11:54 -0400 Subject: [PATCH 60/81] fix(smoke): replace gawk-only IGNORECASE with portable grep -i for header parsing The smoke script extracted Content-Location from curl -D headers using awk with IGNORECASE=1, which is a gawk extension. On the self-hosted runners the default /usr/bin/awk is mawk, which silently ignores IGNORECASE=1, so the case-sensitive regex /^Content-Location:/ never matched the lowercase content-location: header that Axum emits, leaving content_location empty and failing every sqlite/postgres smoke job. Replace all three occurrences with `grep -i | sed | tr -d '\r'`, which is portable across mawk, gawk, and nawk environments. --- .../hfs/tests/bulk_export/run_external_bulk_export_smoke.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/hfs/tests/bulk_export/run_external_bulk_export_smoke.sh b/crates/hfs/tests/bulk_export/run_external_bulk_export_smoke.sh index e21e9dc43..0a6c820b6 100755 --- a/crates/hfs/tests/bulk_export/run_external_bulk_export_smoke.sh +++ b/crates/hfs/tests/bulk_export/run_external_bulk_export_smoke.sh @@ -224,7 +224,7 @@ kickoff_get() { "$BASE_URL$path")" expect_status "$status" "202" "$label kickoff" "$response" local content_location - content_location="$(awk 'BEGIN{IGNORECASE=1} /^Content-Location:/ {sub(/\r$/, "", $0); sub(/^[^:]+:[[:space:]]*/, "", $0); print; exit}' "$headers")" + content_location="$(grep -i '^content-location:' "$headers" | head -1 | sed 's/^[^:]*:[[:space:]]*//' | tr -d '\r')" if [ -z "$content_location" ]; then fail "$label kickoff did not return Content-Location" fi @@ -249,7 +249,7 @@ EOF --data-binary @"$body")" expect_status "$status" "202" "$label kickoff" "$response" local content_location - content_location="$(awk 'BEGIN{IGNORECASE=1} /^Content-Location:/ {sub(/\r$/, "", $0); sub(/^[^:]+:[[:space:]]*/, "", $0); print; exit}' "$headers")" + content_location="$(grep -i '^content-location:' "$headers" | head -1 | sed 's/^[^:]*:[[:space:]]*//' | tr -d '\r')" if [ -z "$content_location" ]; then fail "$label kickoff did not return Content-Location" fi @@ -307,7 +307,7 @@ expect_export_failure() { esac local status_url - status_url="$(awk 'BEGIN{IGNORECASE=1} /^Content-Location:/ {sub(/\r$/, "", $0); sub(/^[^:]+:[[:space:]]*/, "", $0); print; exit}' "$headers")" + status_url="$(grep -i '^content-location:' "$headers" | head -1 | sed 's/^[^:]*:[[:space:]]*//' | tr -d '\r')" if [ -z "$status_url" ]; then fail "$label expected-negative kickoff returned 202 without Content-Location" fi From 13a474fe5d77a42cddf3bf4d9d2c688366ff8cb1 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 18:57:53 -0400 Subject: [PATCH 61/81] fix(inferno): run SMART and Export groups sequentially via separate test_runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bulk_data_v200 suite ID cannot be used as test_group_id — Inferno's API returns 422 "The chosen runnable must be run as part of a group". The suite has two groups that must each be POSTed as a separate test_run: 1. bulk_data_v200-bulk_data_smart_backend_services_v200 Establishes the bearer token via private_key_jwt OAuth flow. 2. bulk_data_v200-bulk_data_export_tests_v200 Runs the actual bulk export tests using the token from group 1. Each group is run-and-polled inline within a single step. Results from both runs are merged into results.json for the downstream check step. Replaces the now-removed standalone "Poll Inferno results" step. --- .github/workflows/inferno-bulk-data.yml | 168 +++++++++++++----------- 1 file changed, 91 insertions(+), 77 deletions(-) diff --git a/.github/workflows/inferno-bulk-data.yml b/.github/workflows/inferno-bulk-data.yml index 6950d00d9..4978d2bcf 100644 --- a/.github/workflows/inferno-bulk-data.yml +++ b/.github/workflows/inferno-bulk-data.yml @@ -35,7 +35,8 @@ env: DOCKER_HOST: ${{ secrets.DOCKER_HOST }} DOCKER_HOST_IP: ${{ secrets.DOCKER_HOST_IP }} SUITE_ID: bulk_data_v200 - TEST_GROUP_ID: bulk_data_v200 + SMART_GROUP_ID: bulk_data_v200-bulk_data_smart_backend_services_v200 + EXPORT_GROUP_ID: bulk_data_v200-bulk_data_export_tests_v200 GROUP_ID: inferno-bulk-group RESULTS_DIR: inferno-bulk-data-results @@ -552,93 +553,106 @@ jobs: encryption_algorithm: "ES384" }')" - RUN_PAYLOAD="$(jq -n \ - --arg session_id "$SESSION_ID" \ - --arg group_id "$TEST_GROUP_ID" \ - --arg bulk_server_url "$HFS_BASE_URL" \ - --arg smart_auth_info "$SMART_AUTH_INFO" \ - --arg inferno_group_id "$GROUP_ID" \ - '{ - test_session_id: $session_id, - test_group_id: $group_id, - inputs: [ - {name: "bulk_server_url", value: $bulk_server_url}, - {name: "smart_auth_info", value: $smart_auth_info}, - {name: "group_id", value: $inferno_group_id}, - {name: "bulk_timeout", value: "600"}, - {name: "lines_to_validate", value: "100"}, - {name: "since_timestamp", value: "2000-01-01T00:00:00.000Z"} - ] - }')" - - echo "$RUN_PAYLOAD" > "$RESULTS_DIR/run-payload.json" + # The bulk_data_v200 suite contains two sequential groups that must each be + # run as a separate test_run: the SMART Backend Services group establishes the + # bearer token, and the Export Tests group consumes it from session state. + for GROUP_PAIR in "smart:$SMART_GROUP_ID" "export:$EXPORT_GROUP_ID"; do + LABEL="${GROUP_PAIR%%:*}" + GROUP_ID_VAL="${GROUP_PAIR##*:}" + + echo "--- Starting group: $LABEL ($GROUP_ID_VAL) ---" + + RUN_PAYLOAD="$(jq -n \ + --arg session_id "$SESSION_ID" \ + --arg group_id "$GROUP_ID_VAL" \ + --arg bulk_server_url "$HFS_BASE_URL" \ + --arg smart_auth_info "$SMART_AUTH_INFO" \ + --arg fhir_group_id "$GROUP_ID" \ + '{ + test_session_id: $session_id, + test_group_id: $group_id, + inputs: [ + {name: "bulk_server_url", value: $bulk_server_url}, + {name: "smart_auth_info", value: $smart_auth_info}, + {name: "group_id", value: $fhir_group_id}, + {name: "bulk_timeout", value: "600"}, + {name: "lines_to_validate", value: "100"}, + {name: "since_timestamp", value: "2000-01-01T00:00:00.000Z"} + ] + }')" + + echo "$RUN_PAYLOAD" > "$RESULTS_DIR/run-payload-$LABEL.json" + + RUN_ID="" + RUN_RESPONSE="" + for attempt in $(seq 1 5); do + RUN_RESPONSE=$(curl -s -X POST "$INFERNO_BASE_URL/api/test_runs" \ + -H "Content-Type: application/json" \ + -d "$RUN_PAYLOAD") + RUN_ID=$(echo "$RUN_RESPONSE" | jq -r '.id' 2>/dev/null) || true + if [ -n "$RUN_ID" ] && [ "$RUN_ID" != "null" ]; then break; fi + echo "Attempt $attempt/5: Failed to start $LABEL run: $RUN_RESPONSE" + sleep $((attempt * 3)) + done + + if [ -z "${RUN_ID:-}" ] || [ "$RUN_ID" = "null" ]; then + echo "Failed to start $LABEL run after 5 attempts" + echo "$RUN_RESPONSE" + exit 1 + fi - for attempt in $(seq 1 5); do - RUN_RESPONSE=$(curl -s -X POST "$INFERNO_BASE_URL/api/test_runs" \ - -H "Content-Type: application/json" \ - -d "$RUN_PAYLOAD") - RUN_ID=$(echo "$RUN_RESPONSE" | jq -r '.id' 2>/dev/null) || true + echo "$RUN_RESPONSE" > "$RESULTS_DIR/run-$LABEL.json" + echo "$LABEL run started: $RUN_ID" - if [ -n "$RUN_ID" ] && [ "$RUN_ID" != "null" ]; then - break - fi - echo "Attempt $attempt/5: Failed to start test run: $RUN_RESPONSE" - sleep $((attempt * 3)) - done + MAX_POLLS=150 + API_ERRORS=0 + RUN_STATUS="" + for i in $(seq 1 $MAX_POLLS); do + STATUS_RESPONSE=$(curl -s "$INFERNO_BASE_URL/api/test_runs/$RUN_ID") + RUN_STATUS=$(echo "$STATUS_RESPONSE" | jq -r '.status' 2>/dev/null) || true + + if [ -z "$RUN_STATUS" ] || [ "$RUN_STATUS" = "null" ]; then + API_ERRORS=$((API_ERRORS + 1)) + echo "Poll $i/$MAX_POLLS [$LABEL]: WARNING - non-JSON response (error $API_ERRORS)" + if [ "$API_ERRORS" -ge 5 ]; then + echo "::error::Inferno API failed $API_ERRORS consecutive times for $LABEL" + tail -50 "$HFS_LOG" + exit 1 + fi + sleep 10 + continue + fi + API_ERRORS=0 - if [ -z "${RUN_ID:-}" ] || [ "$RUN_ID" = "null" ]; then - echo "Failed to start test run after 5 attempts" - echo "$RUN_RESPONSE" - exit 1 - fi + RESULTS=$(curl -s "$INFERNO_BASE_URL/api/test_runs/$RUN_ID/results") + TOTAL=$(echo "$RESULTS" | jq '[.[] | select(.test_id)] | length' 2>/dev/null) || TOTAL="?" + PASS=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "pass")] | length' 2>/dev/null) || PASS="?" + FAIL=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "fail")] | length' 2>/dev/null) || FAIL="?" + SKIP=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "skip")] | length' 2>/dev/null) || SKIP="?" + ERROR=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "error")] | length' 2>/dev/null) || ERROR="?" + OMIT=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "omit")] | length' 2>/dev/null) || OMIT="?" - echo "RUN_ID=$RUN_ID" >> "$GITHUB_ENV" - echo "$RUN_RESPONSE" > "$RESULTS_DIR/run.json" + echo "Poll $i/$MAX_POLLS [$LABEL]: Status=$RUN_STATUS Total=$TOTAL Pass=$PASS Fail=$FAIL Skip=$SKIP Error=$ERROR Omit=$OMIT" - - name: Poll Inferno results - run: | - MAX_POLLS=150 - POLL_INTERVAL=10 - API_ERRORS=0 - - for i in $(seq 1 $MAX_POLLS); do - STATUS_RESPONSE=$(curl -s "$INFERNO_BASE_URL/api/test_runs/$RUN_ID") - RUN_STATUS=$(echo "$STATUS_RESPONSE" | jq -r '.status' 2>/dev/null) || true - - if [ -z "$RUN_STATUS" ] || [ "$RUN_STATUS" = "null" ]; then - API_ERRORS=$((API_ERRORS + 1)) - echo "Poll $i/$MAX_POLLS: WARNING - Inferno API returned non-JSON (error $API_ERRORS)" - echo " Response (first 200 chars): ${STATUS_RESPONSE:0:200}" - if [ "$API_ERRORS" -ge 5 ]; then - echo "::error::Inferno API failed $API_ERRORS consecutive times" - tail -50 "$HFS_LOG" - exit 1 + if [ "$RUN_STATUS" = "done" ]; then + echo "$RESULTS" > "$RESULTS_DIR/results-$LABEL.json" + break fi - sleep "$POLL_INTERVAL" - continue - fi - API_ERRORS=0 - RESULTS=$(curl -s "$INFERNO_BASE_URL/api/test_runs/$RUN_ID/results") - echo "$RESULTS" > "$RESULTS_DIR/results.json" - TOTAL=$(echo "$RESULTS" | jq '[.[] | select(.test_id)] | length' 2>/dev/null) || TOTAL="?" - PASS=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "pass")] | length' 2>/dev/null) || PASS="?" - FAIL=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "fail")] | length' 2>/dev/null) || FAIL="?" - SKIP=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "skip")] | length' 2>/dev/null) || SKIP="?" - ERROR=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "error")] | length' 2>/dev/null) || ERROR="?" - OMIT=$(echo "$RESULTS" | jq '[.[] | select(.test_id and .result == "omit")] | length' 2>/dev/null) || OMIT="?" + sleep 10 + done - echo "Poll $i/$MAX_POLLS: Status=$RUN_STATUS Total=$TOTAL Pass=$PASS Fail=$FAIL Skip=$SKIP Error=$ERROR Omit=$OMIT" - - if [ "$RUN_STATUS" = "done" ]; then - exit 0 + if [ "$RUN_STATUS" != "done" ]; then + echo "::error::Inferno $LABEL group timed out" + exit 1 fi - - sleep "$POLL_INTERVAL" done - echo "::error::Inferno Bulk Data suite timed out" - exit 1 + # Merge results from both groups for downstream steps + jq -s 'add' \ + "$RESULTS_DIR/results-smart.json" \ + "$RESULTS_DIR/results-export.json" \ + > "$RESULTS_DIR/results.json" - name: Check test results run: | From 08f9c347e5be65d100b32a535c8649a5000c126a Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 19:07:19 -0400 Subject: [PATCH 62/81] fix(persistence): parse cursor timestamp as DateTime in postgres batch queries The keyset cursor stores timestamps as RFC 3339 strings (e.g. "2026-05-15T22:35:24Z|"). When the second+ batch was fetched the cursor part was pushed into the tokio-postgres param list as a Rust String (TEXT). PostgreSQL's extended query protocol infers the expected type from the column context (TIMESTAMPTZ), so it rejected the TEXT binding with a type error, failing every paginated export job after its first batch. Fix all three cursor sites in fetch_export_batch and fetch_patient_compartment_batch to parse the timestamp with DateTime::parse_from_rfc3339 and push a DateTime so the wire type matches the inferred TIMESTAMPTZ. --- .../src/backends/postgres/bulk_export.rs | 48 +++++++++++-------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/crates/persistence/src/backends/postgres/bulk_export.rs b/crates/persistence/src/backends/postgres/bulk_export.rs index 2d9a2b7f7..25104a6f8 100644 --- a/crates/persistence/src/backends/postgres/bulk_export.rs +++ b/crates/persistence/src/backends/postgres/bulk_export.rs @@ -1044,13 +1044,15 @@ impl ExportDataProvider for PostgresBackend { if let Some(cursor) = cursor { let parts: Vec<&str> = cursor.splitn(2, '|').collect(); if parts.len() == 2 { - sql.push_str(&format!( - " AND (last_updated, id) > (${}, ${})", - param_idx, - param_idx + 1 - )); - params.push(Box::new(parts[0].to_string())); - params.push(Box::new(parts[1].to_string())); + if let Ok(dt) = DateTime::parse_from_rfc3339(parts[0]) { + sql.push_str(&format!( + " AND (last_updated, id) > (${}, ${})", + param_idx, + param_idx + 1 + )); + params.push(Box::new(dt.with_timezone(&Utc))); + params.push(Box::new(parts[1].to_string())); + } } } @@ -1181,13 +1183,15 @@ impl PatientExportProvider for PostgresBackend { if let Some(cursor) = cursor { let parts: Vec<&str> = cursor.splitn(2, '|').collect(); if parts.len() == 2 { - sql.push_str(&format!( - " AND (last_updated, id) > (${}, ${})", - param_idx, - param_idx + 1 - )); - params.push(Box::new(parts[0].to_string())); - params.push(Box::new(parts[1].to_string())); + if let Ok(dt) = DateTime::parse_from_rfc3339(parts[0]) { + sql.push_str(&format!( + " AND (last_updated, id) > (${}, ${})", + param_idx, + param_idx + 1 + )); + params.push(Box::new(dt.with_timezone(&Utc))); + params.push(Box::new(parts[1].to_string())); + } } } @@ -1268,13 +1272,15 @@ impl PatientExportProvider for PostgresBackend { if let Some(cursor) = cursor { let parts: Vec<&str> = cursor.splitn(2, '|').collect(); if parts.len() == 2 { - sql.push_str(&format!( - " AND (r.last_updated, r.id) > (${}, ${})", - param_idx, - param_idx + 1 - )); - params.push(Box::new(parts[0].to_string())); - params.push(Box::new(parts[1].to_string())); + if let Ok(dt) = DateTime::parse_from_rfc3339(parts[0]) { + sql.push_str(&format!( + " AND (r.last_updated, r.id) > (${}, ${})", + param_idx, + param_idx + 1 + )); + params.push(Box::new(dt.with_timezone(&Utc))); + params.push(Box::new(parts[1].to_string())); + } } } From fefca6b2cd086c2cd360758ddd80714ee9da868c Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 19:07:40 -0400 Subject: [PATCH 63/81] fix(persistence): apply patient_refs filter in Patient-level export POST /Patient/$export accepts a `patient` parameter to scope the export to specific patients. The request stores those references in ExportRequest::patient_refs, but the worker's Patient-level branch always called fetch_export_batch (unfiltered), so every resource of each type was returned regardless of which patient was requested. Add a new match arm that fires when ExportLevel::Patient and patient_refs is non-empty: it strips the "Patient/" prefix from each ref and delegates to fetch_patient_compartment_batch, which correctly scopes results to those patients' compartments. The existing arm (no patient filter) is unchanged for generic /Patient/$export calls. --- .../src/core/bulk_export_worker.rs | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/crates/persistence/src/core/bulk_export_worker.rs b/crates/persistence/src/core/bulk_export_worker.rs index 76707bcaa..eb69cf62c 100644 --- a/crates/persistence/src/core/bulk_export_worker.rs +++ b/crates/persistence/src/core/bulk_export_worker.rs @@ -405,11 +405,31 @@ where ) .await .map_err(LeaseError::Storage)?, + None if matches!(view.level, ExportLevel::Patient) + && !request.patient_refs.is_empty() => + { + // Patient-level with specific patient filter: scope to + // exactly the requested patients' compartments. + let patient_ids: Vec = request + .patient_refs + .iter() + .map(|r| r.strip_prefix("Patient/").unwrap_or(r).to_string()) + .collect(); + self.data + .fetch_patient_compartment_batch( + tenant, + request, + resource_type, + &patient_ids, + cursor.as_deref(), + batch_size, + ) + .await + .map_err(LeaseError::Storage)? + } None if matches!(view.level, ExportLevel::Patient) => { - // Patient-level: export the whole patient compartment. - // For simplicity, treat it like a system-level fetch of - // the type (patient-scoped filtering is applied by the - // provider via the request). + // Patient-level without a patient filter: export all + // resources of this type across the patient compartment. self.data .fetch_export_batch( tenant, From 3710735e4c85737ca88a4e27b7973479a9ca4434 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 22:14:21 -0400 Subject: [PATCH 64/81] fix(persistence): correct sqlite patient export params --- crates/persistence/src/backends/sqlite/bulk_export.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/persistence/src/backends/sqlite/bulk_export.rs b/crates/persistence/src/backends/sqlite/bulk_export.rs index 93f1591d3..a9022bd31 100644 --- a/crates/persistence/src/backends/sqlite/bulk_export.rs +++ b/crates/persistence/src/backends/sqlite/bulk_export.rs @@ -1324,8 +1324,10 @@ impl PatientExportProvider for SqliteBackend { .iter() .map(|id| format!("Patient/{}", id)) .collect(); + let since_value = request.since.map(|s| s.to_rfc3339()); + let patient_ref_param_start = if since_value.is_some() { 4 } else { 3 }; let placeholders: Vec = (0..patient_refs.len()) - .map(|i| format!("?{}", i + 4)) + .map(|i| format!("?{}", i + patient_ref_param_start)) .collect(); let mut query = format!( @@ -1346,8 +1348,6 @@ impl PatientExportProvider for SqliteBackend { Box::new(tenant_id.to_string()), Box::new(resource_type.to_string()), ]; - // Placeholder for since filter slot - let since_value = request.since.map(|s| s.to_rfc3339()); if since_value.is_some() { params_vec.push(Box::new(since_value.clone().unwrap())); } From b81d03cf19f53888369a0fa661c6219373fb5ff1 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 22:14:26 -0400 Subject: [PATCH 65/81] test(persistence): cover sqlite patient export without since --- crates/persistence/tests/sqlite_tests.rs | 84 +++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/crates/persistence/tests/sqlite_tests.rs b/crates/persistence/tests/sqlite_tests.rs index 61536b688..4ae580f29 100644 --- a/crates/persistence/tests/sqlite_tests.rs +++ b/crates/persistence/tests/sqlite_tests.rs @@ -8,11 +8,13 @@ use helios_fhir::FhirVersion; use serde_json::json; use helios_persistence::backends::sqlite::{SqliteBackend, SqliteBackendConfig}; -use helios_persistence::core::ResourceStorage; use helios_persistence::core::history::{ HistoryMethod, HistoryParams, InstanceHistoryProvider, SystemHistoryProvider, TypeHistoryProvider, }; +use helios_persistence::core::{ + ExportLevel, ExportRequest, PatientExportProvider, ResourceStorage, +}; use helios_persistence::error::{ResourceError, StorageError}; use helios_persistence::tenant::{TenantContext, TenantId, TenantPermissions}; @@ -1647,6 +1649,86 @@ async fn test_search_reference_subject() { assert!(ids.contains(&"obs-2")); } +#[tokio::test] +async fn test_patient_compartment_export_observation_without_since() { + let backend = create_backend(); + let tenant = create_tenant("test-tenant"); + + backend + .create( + &tenant, + "Patient", + json!({ + "resourceType": "Patient", + "id": "patient-1" + }), + FhirVersion::default(), + ) + .await + .unwrap(); + + backend + .create( + &tenant, + "Patient", + json!({ + "resourceType": "Patient", + "id": "patient-2" + }), + FhirVersion::default(), + ) + .await + .unwrap(); + + backend + .create( + &tenant, + "Observation", + json!({ + "resourceType": "Observation", + "id": "obs-1", + "subject": {"reference": "Patient/patient-1"}, + "status": "final" + }), + FhirVersion::default(), + ) + .await + .unwrap(); + + backend + .create( + &tenant, + "Observation", + json!({ + "resourceType": "Observation", + "id": "obs-2", + "subject": {"reference": "Patient/patient-2"}, + "status": "final" + }), + FhirVersion::default(), + ) + .await + .unwrap(); + + let request = ExportRequest::new(ExportLevel::Patient); + let batch = backend + .fetch_patient_compartment_batch( + &tenant, + &request, + "Observation", + &["patient-1".to_string()], + None, + 10, + ) + .await + .unwrap(); + + assert!(batch.is_last); + assert_eq!(batch.lines.len(), 1); + let observation: serde_json::Value = serde_json::from_str(&batch.lines[0]).unwrap(); + assert_eq!(observation["id"], "obs-1"); +} + #[tokio::test] async fn test_search_multiple_parameters() { let backend = create_backend(); From b6e83a98a100a6f3cd31c6b534ee55de65ab7e18 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 22:14:32 -0400 Subject: [PATCH 66/81] fix(fhirpath): remove redundant formatting borrow --- crates/fhirpath/src/reference_key_functions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/fhirpath/src/reference_key_functions.rs b/crates/fhirpath/src/reference_key_functions.rs index 09510c846..3f0a43565 100644 --- a/crates/fhirpath/src/reference_key_functions.rs +++ b/crates/fhirpath/src/reference_key_functions.rs @@ -93,7 +93,7 @@ pub fn get_reference_key_function( _ => { return Err(EvaluationError::TypeError(format!( "getReferenceKey type filter must be a string or type, got: {:?}", - &args[0] + args[0] ))); } } From d13ee1faa862ca71ed3f0ac9b63d16fd34c4494e Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 22:54:19 -0400 Subject: [PATCH 67/81] test(persistence): serialize postgres bulk export claims --- crates/persistence/tests/postgres_tests.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/persistence/tests/postgres_tests.rs b/crates/persistence/tests/postgres_tests.rs index 0a6d7f2bf..750e9913f 100644 --- a/crates/persistence/tests/postgres_tests.rs +++ b/crates/persistence/tests/postgres_tests.rs @@ -504,7 +504,7 @@ mod postgres_integration { use testcontainers::ImageExt; use testcontainers::runners::AsyncRunner; use testcontainers_modules::postgres::Postgres; - use tokio::sync::OnceCell; + use tokio::sync::{Mutex, OnceCell}; /// Shared PostgreSQL container reused across all tests in this module. struct SharedPg { @@ -515,6 +515,7 @@ mod postgres_integration { } static SHARED_PG: OnceCell = OnceCell::const_new(); + static BULK_EXPORT_TEST_LOCK: Mutex<()> = Mutex::const_new(()); async fn shared_pg() -> &'static SharedPg { SHARED_PG @@ -2884,6 +2885,7 @@ mod postgres_integration { #[tokio::test] async fn postgres_integration_export_claim_skip_locked() { + let _guard = BULK_EXPORT_TEST_LOCK.lock().await; let backend = create_backend().await; let tenant = create_tenant("export-claim"); @@ -2923,6 +2925,7 @@ mod postgres_integration { #[tokio::test] async fn postgres_integration_export_stale_worker_fenced_out() { + let _guard = BULK_EXPORT_TEST_LOCK.lock().await; let backend = create_backend().await; let tenant = create_tenant("export-fence"); @@ -2964,6 +2967,7 @@ mod postgres_integration { #[tokio::test] async fn postgres_integration_export_count_active_and_expire() { + let _guard = BULK_EXPORT_TEST_LOCK.lock().await; let backend = create_backend().await; let tenant = create_tenant("export-cleanup"); From ac1a53dedc687b1189bfecfe2b722788c80c5e6e Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 23:01:36 -0400 Subject: [PATCH 68/81] ci(inferno): fix bulk data auth discovery Allow the generated Keycloak client to accept Inferno's five-minute private_key_jwt assertion lifetime, avoiding token endpoint 400s that cascade into export 401s. Expose S256 in SMART discovery so the Inferno SMART Backend Services checks see the expected code_challenge_methods_supported metadata. --- .github/workflows/inferno-bulk-data.yml | 3 ++- crates/auth/src/discovery.rs | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/inferno-bulk-data.yml b/.github/workflows/inferno-bulk-data.yml index 4978d2bcf..3c4996a14 100644 --- a/.github/workflows/inferno-bulk-data.yml +++ b/.github/workflows/inferno-bulk-data.yml @@ -309,7 +309,8 @@ jobs: "attributes": { "use.jwks.url": "true", "jwks.url": $jwks_url, - "token.endpoint.auth.signing.alg": "ES384" + "token.endpoint.auth.signing.alg": "ES384", + "token.endpoint.auth.signing.max.exp": "600" } }] ' docker/keycloak/realm.json > "$RESULTS_DIR/keycloak-realm.json" diff --git a/crates/auth/src/discovery.rs b/crates/auth/src/discovery.rs index 3e121c7c5..4a79d9773 100644 --- a/crates/auth/src/discovery.rs +++ b/crates/auth/src/discovery.rs @@ -27,6 +27,7 @@ pub struct SmartConfiguration { pub response_types_supported: Vec, pub grant_types_supported: Vec, pub token_endpoint_auth_methods_supported: Vec, + pub code_challenge_methods_supported: Vec, pub token_endpoint_auth_signing_alg_values_supported: Vec, pub capabilities: Vec, } @@ -54,6 +55,7 @@ impl SmartConfiguration { response_types_supported: vec!["token".to_string()], grant_types_supported: vec!["client_credentials".to_string()], token_endpoint_auth_methods_supported: vec!["private_key_jwt".to_string()], + code_challenge_methods_supported: vec!["S256".to_string()], token_endpoint_auth_signing_alg_values_supported: vec![ "RS384".to_string(), "ES384".to_string(), @@ -83,6 +85,7 @@ mod tests { assert!(smart.issuer.is_none()); assert!(smart.token_endpoint.is_none()); assert!(smart.capabilities.contains(&"permission-v2".to_string())); + assert_eq!(smart.code_challenge_methods_supported, vec!["S256"]); } #[test] @@ -117,6 +120,10 @@ mod tests { assert!(json["capabilities"].is_array()); assert!(json["scopes_supported"].is_array()); + assert_eq!( + json["code_challenge_methods_supported"], + serde_json::json!(["S256"]) + ); // Fields that are None should be omitted assert!(json.get("authorization_endpoint").is_none()); } From 143338a32136b7915530adf2863fc61de7efe612 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 23:17:38 -0400 Subject: [PATCH 69/81] ci(inferno): reuse smart auth for export tests Carry the access-token-bearing smart_auth_info emitted by the SMART Backend Services group into the export group so Inferno sends authenticated kickoff requests. Advertise authorization_code in SMART discovery when an authorization endpoint is configured, matching Inferno STU2 well-known expectations. --- .github/workflows/inferno-bulk-data.yml | 13 +++++++++++++ crates/auth/src/discovery.rs | 19 +++++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/.github/workflows/inferno-bulk-data.yml b/.github/workflows/inferno-bulk-data.yml index 3c4996a14..835905fdd 100644 --- a/.github/workflows/inferno-bulk-data.yml +++ b/.github/workflows/inferno-bulk-data.yml @@ -647,6 +647,19 @@ jobs: echo "::error::Inferno $LABEL group timed out" exit 1 fi + + if [ "$LABEL" = "smart" ]; then + SMART_AUTH_INFO="$(jq -r ' + [.[] | .outputs[]? | select(.name == "smart_auth_info") | .value | select(contains("\"access_token\""))] + | last // empty + ' "$RESULTS_DIR/results-smart.json")" + + if [ -z "$SMART_AUTH_INFO" ]; then + echo "::error::SMART group did not produce access-token auth info for export tests" + jq -r '.[] | select(.result == "fail" or .result == "error") | " \(.test_id): \(.result) - \(.result_message // "No message")"' "$RESULTS_DIR/results-smart.json" + exit 1 + fi + fi done # Merge results from both groups for downstream steps diff --git a/crates/auth/src/discovery.rs b/crates/auth/src/discovery.rs index 4a79d9773..a0347658b 100644 --- a/crates/auth/src/discovery.rs +++ b/crates/auth/src/discovery.rs @@ -35,6 +35,14 @@ pub struct SmartConfiguration { impl SmartConfiguration { /// Build the SMART configuration document from `AuthConfig`. pub fn from_config(config: &AuthConfig) -> Self { + let mut response_types_supported = vec!["token".to_string()]; + let mut grant_types_supported = vec!["client_credentials".to_string()]; + + if config.smart_authorize_endpoint.is_some() { + response_types_supported.push("code".to_string()); + grant_types_supported.push("authorization_code".to_string()); + } + Self { issuer: config.expected_issuer.clone(), jwks_uri: config @@ -52,8 +60,8 @@ impl SmartConfiguration { "system/*.rs".to_string(), "system/*.r".to_string(), ], - response_types_supported: vec!["token".to_string()], - grant_types_supported: vec!["client_credentials".to_string()], + response_types_supported, + grant_types_supported, token_endpoint_auth_methods_supported: vec!["private_key_jwt".to_string()], code_challenge_methods_supported: vec!["S256".to_string()], token_endpoint_auth_signing_alg_values_supported: vec![ @@ -93,6 +101,7 @@ mod tests { let config = AuthConfig { expected_issuer: Some("https://idp.example.com".to_string()), smart_token_endpoint: Some("https://idp.example.com/token".to_string()), + smart_authorize_endpoint: Some("https://idp.example.com/authorize".to_string()), smart_jwks_url: Some("https://idp.example.com/.well-known/jwks.json".to_string()), ..AuthConfig::default() }; @@ -103,6 +112,12 @@ mod tests { smart.token_endpoint.as_deref(), Some("https://idp.example.com/token") ); + assert!( + smart + .grant_types_supported + .contains(&"authorization_code".to_string()) + ); + assert!(smart.response_types_supported.contains(&"code".to_string())); assert_eq!( smart.jwks_uri.as_deref(), Some("https://idp.example.com/.well-known/jwks.json") From 76744ef9765e1def3041e9d796da90840e1007a0 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Fri, 15 May 2026 23:35:22 -0400 Subject: [PATCH 70/81] ci(inferno): fix bulk export validation seed Add the vital-signs category to the seeded heart-rate Observation so R4 profile validation passes when Inferno applies the Heart Rate profile. Treat file-server TLS checks as known omitted in the HTTP-only CI export file setup. --- .github/workflows/inferno-bulk-data.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/inferno-bulk-data.yml b/.github/workflows/inferno-bulk-data.yml index 835905fdd..29e304b63 100644 --- a/.github/workflows/inferno-bulk-data.yml +++ b/.github/workflows/inferno-bulk-data.yml @@ -477,6 +477,13 @@ jobs: "resourceType": "Observation", "id": "inferno-bulk-observation-a", "status": "final", + "category": [{ + "coding": [{ + "system": "http://terminology.hl7.org/CodeSystem/observation-category", + "code": "vital-signs", + "display": "Vital Signs" + }] + }], "code": { "coding": [{ "system": "http://loinc.org", @@ -677,7 +684,7 @@ jobs: RESULTS=$(cat "$RESULTS_DIR/results.json") LATEST='[.[] | select(.test_id)] | group_by(.test_id) | map(sort_by(.created_at) | last)' - OMIT_EXPR='(.test_id | test("(bulk_data_server_tls_version_stu2|smart_backend_services_token_tls_version)$"))' + OMIT_EXPR='(.test_id | test("(bulk_data_server_tls_version_stu2|smart_backend_services_token_tls_version|bulk_file_server_tls_version)$"))' FAILURES=$(echo "$RESULTS" | jq "$LATEST | [.[] | select((.result == \"fail\" or .result == \"error\") and ($OMIT_EXPR | not))] | length") OMITTED_COUNT=$(echo "$RESULTS" | jq "$LATEST | [.[] | select($OMIT_EXPR)] | length") @@ -698,7 +705,7 @@ jobs: if [ -f "$RESULTS_DIR/results.json" ]; then RESULTS=$(cat "$RESULTS_DIR/results.json") LATEST='[.[] | select(.test_id)] | group_by(.test_id) | map(sort_by(.created_at) | last)' - OMIT_EXPR='(.test_id | test("(bulk_data_server_tls_version_stu2|smart_backend_services_token_tls_version)$"))' + OMIT_EXPR='(.test_id | test("(bulk_data_server_tls_version_stu2|smart_backend_services_token_tls_version|bulk_file_server_tls_version)$"))' TOTAL=$(echo "$RESULTS" | jq "$LATEST | length") PASS=$(echo "$RESULTS" | jq "$LATEST | [.[] | select(.result == \"pass\")] | length") From 170c05d8ef35b7e64197a6a330c2eab7b21d6f6d Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Tue, 19 May 2026 11:48:04 -0400 Subject: [PATCH 71/81] docs(persistence): document S3 bulk export output store S3 no longer owns bulk-export job state after the output-store split. Job rows, progress, leases, file metadata, and manifests live in SQLite or PostgreSQL while S3 stores finalized output objects. Update the persistence README capability notes, S3 backend scope, S3+Elasticsearch guidance, and object model to describe the current S3OutputStore layout. --- crates/persistence/README.md | 45 ++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/crates/persistence/README.md b/crates/persistence/README.md index 7450a2366..183c8a881 100644 --- a/crates/persistence/README.md +++ b/crates/persistence/README.md @@ -84,7 +84,9 @@ helios-persistence/ │ │ ├── search.rs # Search providers (basic, chained, include) │ │ ├── transaction.rs # ACID transactions with bundle support │ │ ├── capabilities.rs # Runtime capability discovery -│ │ ├── bulk_export.rs # FHIR Bulk Data Export traits +│ │ ├── bulk_export.rs # FHIR Bulk Data Export job/data traits +│ │ ├── bulk_export_output.rs # ExportOutputStore trait +│ │ ├── bulk_export_worker.rs # Bulk export worker runtime and leasing traits │ │ └── bulk_submit.rs # FHIR Bulk Submit traits │ ├── search/ # Search parameter infrastructure │ │ ├── registry.rs # SearchParameterRegistry (in-memory cache) @@ -152,10 +154,11 @@ helios-persistence/ │ │ ├── config.rs # S3BackendConfig, S3TenancyMode │ │ ├── client.rs # S3Api trait and AwsS3Client implementation │ │ ├── keyspace.rs # S3Keyspace key-path generation -│ │ ├── models.rs # HistoryIndexEvent, ExportJobState, SubmissionState +│ │ ├── models.rs # HistoryIndexEvent, SubmissionState │ │ ├── storage.rs # ResourceStorage implementation │ │ ├── bundle.rs # Batch/transaction bundle processing -│ │ ├── bulk_export.rs # BulkExportStorage implementation +│ │ ├── bulk_export.rs # ExportDataProvider implementation +│ │ ├── output_store.rs # S3OutputStore for bulk export files │ │ ├── bulk_submit.rs # BulkSubmitProvider implementation │ │ └── tests.rs # Integration tests │ ├── composite/ # Multi-backend coordination @@ -369,10 +372,10 @@ The matrix below shows which FHIR operations each backend supports. This reflect | Single field | ✓ | ✓ | ✓ | ✗ | ○ | ✓ | ✗ | | Multiple fields | ✓ | ✓ | ✓ | ✗ | ○ | ✓ | ✗ | | **[Bulk Operations](https://hl7.org/fhir/uv/bulkdata/)** | -| [Bulk Export](https://hl7.org/fhir/uv/bulkdata/export.html) | ✓ | ✓ | ○ | ○ | ○ | ○ | ✓ | +| [Bulk Export](https://hl7.org/fhir/uv/bulkdata/export.html) | ✓ | ✓ | ○ | ○ | ○ | ○ | ◐ | | [Bulk Submit](https://hackmd.io/@argonaut/rJoqHZrPle) | ✓ | ✓ | ○ | ○ | ○ | ○ | ✓ | -The S3 backend is intentionally storage-focused (CRUD/version/history/bulk) and does not act as a full FHIR search engine. For query-heavy deployments, use a DB/search backend as primary query engine and compose S3 as archive/bulk/history storage. +The S3 backend is intentionally storage-focused (CRUD/version/history/bulk submit) and does not act as a full FHIR search engine. For bulk export, S3 can feed system-level batches through `ExportDataProvider` and can store output files through `S3OutputStore`, but job state belongs to SQLite or PostgreSQL. Patient-level and Group-level export compartment enumeration are not supported by S3 as the resource store. For query-heavy deployments, use a DB/search backend as primary query engine and compose S3 as archive/history/output storage. ### Primary/Secondary Role Matrix @@ -389,7 +392,7 @@ Backends can serve as primary (CRUD, versioning, transactions) or secondary (opt | Cassandra + Elasticsearch | Cassandra | Elasticsearch (search) | Planned | Write-heavy + search | | MongoDB alone | MongoDB | — | ✓ Implemented | Document-centric | | MongoDB + Elasticsearch | MongoDB | Elasticsearch (search) | ✓ Implemented | Document-centric + offloaded search | -| S3 alone | S3 | — | ✓ Implemented (storage-focused) | Archival/bulk/history storage | +| S3 alone | S3 | — | ✓ Implemented (storage-focused) | Archival/history storage | | S3 + Elasticsearch | S3 | Elasticsearch (search) | ✓ Implemented | Large-scale + search | ### Backend Selection Guide @@ -600,13 +603,14 @@ HFS_ELASTICSEARCH_NODES=http://localhost:9200 \ ### S3 + Elasticsearch -S3 handles CRUD, versioning, history, and bulk operations. Elasticsearch handles all search operations. Combines S3's cost-effective, durable object storage with Elasticsearch's search capabilities for large-scale deployments. +S3 handles CRUD, versioning, history, and bulk-submit artifacts. Elasticsearch handles all search operations. For bulk export, this topology can use S3 as the resource data provider for system-level exports and `S3OutputStore` as the output-file store; export job state still lives in the configured SQLite or PostgreSQL bulk-export job store. - CRUD persistence via S3 objects (current pointer + immutable history versions) - Versioning (`vread`, optimistic locking via version checks) - Instance, type, and system history via immutable history objects - Batch bundles and best-effort transaction bundles -- Bulk export (NDJSON parts + manifest in S3) +- Bulk export data provider for system-level exports +- Optional S3 bulk-export output files via `S3OutputStore` - Bulk submit with rollback change log - Full-text search with relevance scoring (`_text`, `_content`) via Elasticsearch - All FHIR search parameter types (string, token, date, number, quantity, reference, URI, composite) @@ -758,7 +762,7 @@ let composite = CompositeStorage::new(config, backends)? ## S3 Backend -The S3 backend is a storage-focused persistence backend using AWS S3 object storage. It handles CRUD, versioning/history, and bulk workflows but is intentionally not a FHIR search engine. For query-heavy deployments, compose S3 with a DB/search backend as the primary query engine. +The S3 backend is a storage-focused persistence backend using AWS S3 object storage. It handles CRUD, versioning/history, and bulk-submit workflows but is intentionally not a FHIR search engine. For bulk export, S3 participates in two narrower roles: `S3Backend` can provide resource batches for system-level exports, and `S3OutputStore` can store finalized NDJSON output files. Bulk-export job state, progress, manifests, leases, and file metadata are not stored in S3; they live in SQLite or PostgreSQL. ### Scope @@ -767,7 +771,8 @@ The S3 backend is a storage-focused persistence backend using AWS S3 object stor - Versioning (`vread`, `list_versions`, optimistic conflict checks) - Instance/type/system history via immutable history objects plus history index events - Batch bundles and best-effort transaction bundles (non-atomic with compensating rollback) -- Bulk export (NDJSON objects + manifest/progress state in S3) +- Bulk export resource data provider for system-level exports +- Bulk export output storage through `S3OutputStore` when configured separately from job state - Bulk submit (ingest + raw artifact persistence + rollback change log) - Tenant isolation (`PrefixPerTenant` or `BucketPerTenant`) @@ -776,7 +781,7 @@ The S3 backend is a storage-focused persistence backend using AWS S3 object stor ### Configuration ```rust -use helios_persistence::backends::s3::S3BackendConfig; +use helios_persistence::backends::s3::{S3BackendConfig, S3TenancyMode}; let config = S3BackendConfig { tenancy_mode: S3TenancyMode::PrefixPerTenant { @@ -785,8 +790,8 @@ let config = S3BackendConfig { prefix: None, region: None, validate_buckets_on_startup: true, - bulk_export_part_size: 10_000, bulk_submit_batch_size: 100, + ..Default::default() }; ``` @@ -796,7 +801,6 @@ let config = S3BackendConfig { | `prefix` | `None` | Optional global key prefix applied before backend keys | | `region` | `None` | AWS region override (falls back to provider chain) | | `validate_buckets_on_startup` | `true` | Validate configured buckets with `HeadBucket` on startup | -| `bulk_export_part_size` | `10000` | Max NDJSON lines per export output part | | `bulk_submit_batch_size` | `100` | Default ingestion batch size for bulk submit processing | ### Tenancy Modes @@ -817,14 +821,13 @@ Resource objects: | Type history event | `.../history/type/{type}/{ts}_{id}_{version}_{suffix}.json` | | System history event | `.../history/system/{ts}_{type}_{id}_{version}_{suffix}.json` | -Bulk export objects: +Bulk export output objects: | Object | Key Pattern | |--------|-------------| -| Job state | `.../bulk/export/jobs/{job_id}/state.json` | -| Progress | `.../bulk/export/jobs/{job_id}/progress/{type}.json` | -| Output | `.../bulk/export/jobs/{job_id}/output/{type}/part-{n}.ndjson` | -| Manifest | `.../bulk/export/jobs/{job_id}/manifest.json` | +| Finalized NDJSON part | `{tenant_id}/exports/{job_id}/{file_type}-{resource_type}-{part_index}-{fencing_token}.ndjson` | + +Bulk-export job state is deliberately not an S3 object model. SQLite and PostgreSQL store the job row, progress, leases/fencing tokens, file metadata, and raw manifest rows. `S3OutputStore` stores only finalized output parts and deletes every object under `{tenant_id}/exports/{job_id}/` during cancellation or retention cleanup. The REST layer assembles the client-facing manifest from the job store plus `ExportOutputStore::download_url`. Bulk submit objects: @@ -1112,12 +1115,14 @@ The SQLite backend includes a complete FHIR search implementation using pre-comp - [x] ReindexableStorage implementation ### Phase 5c: S3 Backend ✓ + - [x] S3BackendConfig with PrefixPerTenant and BucketPerTenant tenancy modes - [x] ResourceStorage implementation (CRUD via S3 objects) - [x] VersionedStorage implementation (vread, optimistic locking) - [x] History providers (instance, type, system via immutable history objects) - [x] Batch and best-effort transaction bundles -- [x] BulkExportStorage implementation (NDJSON parts + manifest in S3) +- [x] ExportDataProvider implementation for system-level bulk export +- [x] S3OutputStore implementation for bulk-export NDJSON output files - [x] BulkSubmitProvider implementation (ingest, raw artifacts, rollback change log) ### Phase 5+: Additional Backends (Planned) @@ -1167,7 +1172,7 @@ The composite storage layer enables polyglot persistence by coordinating multipl | PostgreSQL + Neo4j | PostgreSQL | Neo4j | Planned | Graph-heavy queries | | MongoDB-only | MongoDB | None | ✓ Implemented | Document-centric primary | | MongoDB + ES | MongoDB | Elasticsearch | ✓ Implemented | Document-centric + search | -| S3 alone | S3 | — | ✓ Implemented | Archival/bulk storage | +| S3 alone | S3 | — | ✓ Implemented | Archival/history storage | | S3 + ES | S3 | Elasticsearch | ✓ Implemented | Large-scale + search | ### Quick Start From efe35bcd0bca5c1104e189aff6732f7870f8f6b5 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Tue, 19 May 2026 11:48:10 -0400 Subject: [PATCH 72/81] refactor(s3): mark delete_object as live API S3OutputStore calls S3Api::delete_object during export-output cleanup, so the trait method is no longer dead code. Remove the stale dead-code allowance and old Phase 2 note. --- crates/persistence/src/backends/s3/client.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/persistence/src/backends/s3/client.rs b/crates/persistence/src/backends/s3/client.rs index a323bc271..5f343df76 100644 --- a/crates/persistence/src/backends/s3/client.rs +++ b/crates/persistence/src/backends/s3/client.rs @@ -117,8 +117,7 @@ pub trait S3Api: Send + Sync { ) -> Result; /// Deletes the object at the given key. Succeeds even if the key does not - /// exist. Reserved for the Phase 2 `S3OutputStore` integration. - #[allow(dead_code)] + /// exist. async fn delete_object(&self, bucket: &str, key: &str) -> Result<(), S3ClientError>; /// Lists objects whose keys start with `prefix`, with optional From 9214dbc5b0ce0a837fde195e1cd6fe215307a403 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Tue, 19 May 2026 11:48:16 -0400 Subject: [PATCH 73/81] refactor(s3): remove legacy export key helpers S3 no longer stores bulk-export job state, progress, manifests, or output parts under the old bulk/export/jobs keyspace. Remove the unused helper methods for that obsolete object layout. --- .../persistence/src/backends/s3/keyspace.rs | 51 ------------------- 1 file changed, 51 deletions(-) diff --git a/crates/persistence/src/backends/s3/keyspace.rs b/crates/persistence/src/backends/s3/keyspace.rs index 32b8aec1c..faa9ee78e 100644 --- a/crates/persistence/src/backends/s3/keyspace.rs +++ b/crates/persistence/src/backends/s3/keyspace.rs @@ -138,57 +138,6 @@ impl S3Keyspace { self.join(&["history", "system/"]) } - /// Key for the JSON state object of a bulk export job. - #[allow(dead_code)] - pub fn export_job_state_key(&self, job_id: &str) -> String { - self.join(&["bulk", "export", "jobs", job_id, "state.json"]) - } - - /// Key for per-type export progress within a job. - #[allow(dead_code)] - pub fn export_job_progress_key(&self, job_id: &str, resource_type: &str) -> String { - self.join(&[ - "bulk", - "export", - "jobs", - job_id, - "progress", - &format!("{}.json", resource_type), - ]) - } - - /// Key for the completed export manifest of a job. - #[allow(dead_code)] - pub fn export_job_manifest_key(&self, job_id: &str) -> String { - self.join(&["bulk", "export", "jobs", job_id, "manifest.json"]) - } - - /// Key for a single NDJSON output part within an export job. - #[allow(dead_code)] - pub fn export_job_output_key(&self, job_id: &str, resource_type: &str, part: u32) -> String { - self.join(&[ - "bulk", - "export", - "jobs", - job_id, - "output", - resource_type, - &format!("part-{}.ndjson", part), - ]) - } - - /// Prefix covering all export job objects. - #[allow(dead_code)] - pub fn export_jobs_prefix(&self) -> String { - self.join(&["bulk", "export", "jobs/"]) - } - - /// Prefix covering all objects belonging to a single export job. - #[allow(dead_code)] - pub fn export_job_prefix(&self, job_id: &str) -> String { - self.join(&["bulk", "export", "jobs", job_id, "/"]) - } - /// Key for the JSON state object of a bulk submission. pub fn submit_state_key(&self, submitter: &str, submission_id: &str) -> String { self.join(&["bulk", "submit", submitter, submission_id, "state.json"]) From 86f9e86bb68135e6bbc0382daa5e597bda40d98a Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Tue, 19 May 2026 11:48:24 -0400 Subject: [PATCH 74/81] refactor(s3): remove legacy ExportJobState model Bulk-export job state now belongs to SQLite or PostgreSQL. Remove the unused S3 ExportJobState type and update the module docs so S3 models only describe history and bulk-submit state. --- crates/persistence/src/backends/s3/models.rs | 21 ++------------------ 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/crates/persistence/src/backends/s3/models.rs b/crates/persistence/src/backends/s3/models.rs index 799b4ed5d..41789ab0f 100644 --- a/crates/persistence/src/backends/s3/models.rs +++ b/crates/persistence/src/backends/s3/models.rs @@ -1,5 +1,5 @@ -//! S3-specific persistence models for history indexing, bulk export job -//! state, and bulk submission state. +//! S3-specific persistence models for history indexing and bulk submission +//! state. //! //! These types are serialised as JSON objects in S3 and are never exposed //! outside the `s3` backend module. @@ -7,7 +7,6 @@ use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; -use crate::core::bulk_export::{ExportManifest, ExportProgress, ExportRequest}; use crate::core::bulk_submit::{SubmissionManifest, SubmissionSummary}; use crate::core::history::HistoryMethod; @@ -32,22 +31,6 @@ pub struct HistoryIndexEvent { pub deleted: bool, } -/// Durable state of a bulk export job stored in S3. -/// -/// Reserved for the Phase 2 `S3OutputStore` integration; the S3 backend is no -/// longer a bulk-export *job-state* backend (job state lives in SQLite or -/// PostgreSQL), so this type is currently unused. -#[allow(dead_code)] -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ExportJobState { - /// The original export request parameters. - pub request: ExportRequest, - /// Current progress, including status and per-type counts. - pub progress: ExportProgress, - /// The completed manifest, populated once the job reaches `Complete`. - pub manifest: Option, -} - /// Durable state of a bulk submission stored in S3. /// /// Written to `bulk/submit///state.json` when a submission is From 70b3225f0927cf5b5c84aa43be5f2a2d0bab4042 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Tue, 19 May 2026 11:48:30 -0400 Subject: [PATCH 75/81] refactor(s3): remove unused delete_object wrapper The S3Backend helper was only a dead-code wrapper around S3Api::delete_object. S3OutputStore performs cleanup through the S3Api trait directly, so the backend wrapper can be deleted. --- crates/persistence/src/backends/s3/storage.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/crates/persistence/src/backends/s3/storage.rs b/crates/persistence/src/backends/s3/storage.rs index ba458f906..fce08cc28 100644 --- a/crates/persistence/src/backends/s3/storage.rs +++ b/crates/persistence/src/backends/s3/storage.rs @@ -101,16 +101,6 @@ impl S3Backend { .map_err(|e| self.map_client_error(e)) } - /// Deletes the object at `key`. Succeeds silently if the key does not exist. - /// Reserved for the Phase 2 `S3OutputStore` integration. - #[allow(dead_code)] - pub(crate) async fn delete_object(&self, bucket: &str, key: &str) -> StorageResult<()> { - self.client - .delete_object(bucket, key) - .await - .map_err(|e| self.map_client_error(e)) - } - /// Downloads and deserialises a JSON object, returning `None` if not found. pub(crate) async fn get_json_object( &self, From 5953e2cfd40b80339e2c365a247980d3a82e6ebe Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Tue, 19 May 2026 11:51:10 -0400 Subject: [PATCH 76/81] docs(hfs): remove bulk export details from README --- crates/hfs/README.md | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/crates/hfs/README.md b/crates/hfs/README.md index fcd51b434..2df4c1a5d 100644 --- a/crates/hfs/README.md +++ b/crates/hfs/README.md @@ -9,35 +9,11 @@ An open test server is available at https://hfs.heliossoftware.com/ for experime - Full FHIR RESTful API support - Multiple FHIR version support - Pluggable storage backends (SQLite, PostgreSQL, MongoDB) -- **Bulk Data Export (`$export`)** — system / patient / group, asynchronous, - with an embedded single-instance default and a multi-instance - PostgreSQL + S3 topology (see *Bulk Data Export* below) - Content negotiation (JSON) - Conditional operations with ETag support - Multi-tenant support via X-Tenant-ID header - CORS support -## Bulk Data Export - -HFS implements the [FHIR Bulk Data Access IG](https://build.fhir.org/ig/HL7/bulk-data/) -`$export` operation. Single-instance (zero config) wires SQLite job state + -local-FS output + an in-process worker pool; multi-instance switches to a -PostgreSQL job store and an S3-compatible output store with pre-signed -download URLs. - -```bash -# Single-instance (default) -cargo run --bin hfs -curl -H 'Prefer: respond-async' \ - http://localhost:8080/Patient/\$export -``` - -The full configuration surface (`HFS_BULK_EXPORT_*` env vars, single- vs -multi-instance recipes, parameter behavior) is documented in `CLAUDE.md`. -A docker-compose stack for the multi-instance topology lives at -`docker/bulk-export/docker-compose.yml`, and a manual Inferno Bulk Data IG -v2.0.0 conformance workflow at `.github/workflows/inferno-bulk-data.yml`. - ## Installation ### From Source @@ -379,4 +355,3 @@ Use the `X-Tenant-ID` header to isolate data between tenants: curl -H "X-Tenant-ID: clinic-a" http://localhost:8080/Patient curl -H "X-Tenant-ID: clinic-b" http://localhost:8080/Patient ``` - From 46a819e32a0e769d35b2e8cc8d7d87b8a360666e Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Tue, 19 May 2026 11:51:16 -0400 Subject: [PATCH 77/81] docs(bulk-export): add export example README --- docker/bulk-export/README.md | 87 ++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 docker/bulk-export/README.md diff --git a/docker/bulk-export/README.md b/docker/bulk-export/README.md new file mode 100644 index 000000000..6a4ca7d46 --- /dev/null +++ b/docker/bulk-export/README.md @@ -0,0 +1,87 @@ +# Bulk Data Export + +HFS implements the [FHIR Bulk Data Access IG](https://build.fhir.org/ig/HL7/bulk-data/) +`$export` operation asynchronously: kick-off, poll, manifest, download, and +delete. + +This directory contains a provided docker-compose example for running HFS with +Bulk Data Export job state in PostgreSQL and export output in S3-compatible +storage via MinIO. It is intended for local manual testing, demos, and trying +Bulk Data clients such as Inferno against a multi-instance-style topology. + +This compose file is not used by the GitHub Actions bulk export or Inferno +workflow tests. Those workflows start their backing services directly so they +can control ports, artifacts, and per-job isolation. + +## Stack + +- HFS +- PostgreSQL for primary storage and bulk export job state +- MinIO for S3-compatible export output +- Keycloak using `docker/keycloak/realm.json` + +## Endpoints + +| Operation | Method | URL | +|-----------|--------|-----| +| system kick-off | GET / POST | `/$export` | +| patient kick-off | GET / POST | `/Patient/$export` | +| group kick-off | GET / POST | `/Group/{id}/$export` | +| status / manifest | GET | `/export-status/{job_id}` | +| cancel + delete | DELETE | `/export-status/{job_id}` | +| HFS-served download | GET | `/export-file/{job_id}/{type}-{part}` | + +All kick-offs require `Prefer: respond-async`. The default response is +`202 Accepted` with a `Content-Location` status URL. + +## Single Instance + +The default HFS configuration wires embedded bulk export with SQLite job state, +local filesystem output, and an in-process worker pool. + +```bash +cargo run --bin hfs +curl -i -H 'Prefer: respond-async' \ + http://localhost:8080/Patient/\$export +``` + +## Run + +```bash +docker compose -f docker/bulk-export/docker-compose.yml up --build +``` + +HFS is available at `http://localhost:8080`. + +## Try an Export + +```bash +curl -i -H 'Prefer: respond-async' \ + http://localhost:8080/Patient/\$export +``` + +The response includes a `Content-Location` header for polling the export job. + +## Configuration + +| Variable | Default | Description | +|----------|---------|-------------| +| `HFS_BULK_EXPORT_ENABLED` | `true` | Master switch. When `false`, all `$export` endpoints return `501`. | +| `HFS_BULK_EXPORT_BACKEND` | `embedded` | Job-state backend: `embedded` or `postgres-s3`. | +| `HFS_BULK_EXPORT_OUTPUT_BACKEND` | `local-fs` | Output store: `local-fs` or `s3`. | +| `HFS_BULK_EXPORT_OUTPUT_DIR` | `${HFS_DATA_DIR}/exports` | Local filesystem output root. | +| `HFS_BULK_EXPORT_S3_BUCKET` | none | S3 bucket. Required when `OUTPUT_BACKEND=s3`. | +| `HFS_BULK_EXPORT_S3_ENDPOINT` | AWS | S3-compatible endpoint URL, such as MinIO. | +| `HFS_BULK_EXPORT_S3_FORCE_PATH_STYLE` | `false` | Path-style addressing for S3-compatible providers. | +| `HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN` | `auto` | Manifest posture: `auto`, `true`, or `false`. `false` is invalid with `local-fs`. | +| `HFS_BULK_EXPORT_FILE_URL_TTL` | `3600` | Pre-signed download URL lifetime in seconds. | +| `HFS_BULK_EXPORT_OUTPUT_TTL` | `86400` | Output retention after job completion in seconds. | +| `HFS_BULK_EXPORT_WORKER_CONCURRENCY` | `2` | In-process worker pool size. | +| `HFS_BULK_EXPORT_DISABLE_LOCAL_WORKER` | `false` | Disable in-process workers for separate exporter deployment. | +| `HFS_BULK_EXPORT_MAX_CONCURRENT_PER_TENANT` | `4` | Per-tenant active-job cap. | +| `HFS_BULK_EXPORT_BATCH_SIZE` | `1000` | Resources per export batch. | +| `HFS_BULK_EXPORT_LEASE_DURATION` | `60` | Initial lease length in seconds. Must be greater than the heartbeat interval. | +| `HFS_BULK_EXPORT_HEARTBEAT_INTERVAL` | `20` | Worker heartbeat cadence in seconds. | +| `HFS_BULK_EXPORT_CLEANUP_INTERVAL` | `300` | Cleanup task scan interval in seconds. | +| `HFS_BULK_EXPORT_SINCE_NEWLY_ADDED` | `include` | Group-export `_since` toggle: `include` or `exclude`. | +| `HFS_BULK_EXPORT_DATABASE_URL` | from `HFS_DATABASE_URL` | PostgreSQL URL for the `postgres-s3` job store. | From b06997f8ec554b4c1391451c831590327ad90f7d Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Tue, 19 May 2026 11:51:21 -0400 Subject: [PATCH 78/81] docs(bulk-export): clarify compose stack is local example --- docker/bulk-export/docker-compose.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/bulk-export/docker-compose.yml b/docker/bulk-export/docker-compose.yml index c42bf08df..e581d09fe 100644 --- a/docker/bulk-export/docker-compose.yml +++ b/docker/bulk-export/docker-compose.yml @@ -1,7 +1,8 @@ # Multi-instance Bulk Data Export stack: HFS + PostgreSQL + MinIO + Keycloak. # -# This is the substrate for Inferno Bulk Data Test Kit conformance runs and -# for manual multi-instance smoke testing. Bring it up with: +# This is a provided local example for manual multi-instance smoke testing and +# trying Bulk Data clients such as Inferno. It is not used by the GitHub Actions +# workflow tests. Bring it up with: # # docker compose -f docker/bulk-export/docker-compose.yml up --build # From 501aba7dfdbdc9c78707f0a29753aec82ba9c0ac Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Tue, 19 May 2026 11:51:25 -0400 Subject: [PATCH 79/81] docs(bulk-export): clarify compose workflow usage --- CLAUDE.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 457ebeb8e..0b9fcc718 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -683,9 +683,10 @@ HFS_BULK_EXPORT_REQUIRES_ACCESS_TOKEN=false \ cargo run --bin hfs --features postgres,s3 ``` -The full stack (HFS + Postgres + MinIO + Keycloak) is described by -`docker/bulk-export/docker-compose.yml`. See `.github/workflows/inferno-bulk-data.yml` -for the manual conformance run. +The full stack (HFS + Postgres + MinIO + Keycloak) is available as a local +example in `docker/bulk-export/docker-compose.yml`; GitHub Actions does not use +that compose file for bulk export tests. See `.github/workflows/inferno-bulk-data.yml` +for the manual conformance workflow. ### Behavior notes From c3a54476fb07410214823d91e80c3a4bd84fa847 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Tue, 19 May 2026 12:12:01 -0400 Subject: [PATCH 80/81] fix(deps): update astral-tokio-tar for audit Upgrade astral-tokio-tar from 0.6.1 to 0.6.2 in Cargo.lock to clear RUSTSEC-2026-0145, which is pulled in through testcontainers. --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 881f99a82..09f736715 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -398,9 +398,9 @@ dependencies = [ [[package]] name = "astral-tokio-tar" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ce73b17c62717c4b6a9af10b43e87c578b0cac27e00666d48304d3b7d2c0693" +checksum = "cb50a7aae84a03bf55b067832bc376f4961b790c97e64d3eacee97d389b90277" dependencies = [ "filetime", "futures-core", From 332bdd5c197ae30c060b4e1c1500a88cb078cb4b Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Tue, 19 May 2026 12:26:43 -0400 Subject: [PATCH 81/81] ci(bulk-export): limit smoke matrix to implemented backends --- .github/workflows/bulk-export-smoke.yml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/.github/workflows/bulk-export-smoke.yml b/.github/workflows/bulk-export-smoke.yml index 4c1b07c2c..7dfc12130 100644 --- a/.github/workflows/bulk-export-smoke.yml +++ b/.github/workflows/bulk-export-smoke.yml @@ -28,15 +28,7 @@ jobs: {"backend":"sqlite","bulk_mode":"embedded-local","expectation":"full"}, {"backend":"sqlite","bulk_mode":"postgres-s3","expectation":"full"}, {"backend":"postgres","bulk_mode":"embedded-local","expectation":"full"}, - {"backend":"postgres","bulk_mode":"postgres-s3","expectation":"full"}, - {"backend":"sqlite-elasticsearch","bulk_mode":"embedded-local","expectation":"endpoint-unavailable"}, - {"backend":"sqlite-elasticsearch","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"}, - {"backend":"postgres-elasticsearch","bulk_mode":"embedded-local","expectation":"endpoint-unavailable"}, - {"backend":"postgres-elasticsearch","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"}, - {"backend":"mongodb","bulk_mode":"postgres-s3","expectation":"unsupported"}, - {"backend":"mongodb-elasticsearch","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"}, - {"backend":"s3","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"}, - {"backend":"s3-elasticsearch","bulk_mode":"postgres-s3","expectation":"endpoint-unavailable"} + {"backend":"postgres","bulk_mode":"postgres-s3","expectation":"full"} ]' MATRIX=$(jq -c --argjson versions "$FHIR_VERSIONS" \