From 05353ba68c63efb901a8908132a7a0a63cbad0e7 Mon Sep 17 00:00:00 2001 From: BrewingCoder Date: Mon, 11 May 2026 14:40:34 -0400 Subject: [PATCH 01/12] feat(tamp): scaffold build/Build.cs with Info/Restore/Compile/Test targets (HOL-54) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First slice of AIRM5/Tamp build-tool integration. Adds: - build/Build.csproj — .NET 10 console project referencing Tamp.Core 1.0.7 and Tamp.NetCli.V10 1.0.5 - build/Build.cs — minimal Build class extending TampBuild with four side-by-side targets: * Info — prints config/solution/root/git context * Restore — DotNet.Restore on src/dotnet/HoldFast.Backend.slnx * Compile — DependsOn Restore; --no-restore build * Test — DependsOn Compile; --no-build test with TRX logger writing to artifacts/test-results/ - .gitignore — un-ignore the root /build/ directory so the script is tracked (the **/build rule still hides nested build/ dirs inside packages) Existing pipeline (dotnet build / dotnet test directly against the slnx) remains the source of truth; this branch runs Tamp side-by-side per the adoption plan in HOL-54. Co-Authored-By: Claude Opus 4.7 --- .gitignore | 10 +++++++++ build/Build.cs | 52 ++++++++++++++++++++++++++++++++++++++++++++++ build/Build.csproj | 17 +++++++++++++++ 3 files changed, 79 insertions(+) create mode 100644 build/Build.cs create mode 100644 build/Build.csproj diff --git a/.gitignore b/.gitignore index 68d779f2..1662a469 100644 --- a/.gitignore +++ b/.gitignore @@ -74,3 +74,13 @@ sdk/highlight-wordpress/highlight-io/highlight.js # .NET build outputs **/bin/ **/obj/ + +# Tamp build scripts (HOL-54) — un-ignore the root /build/ dir that holds +# Build.cs + Build.csproj. The **/build rule above still hides nested build/ +# trees inside packages, and **/bin/ + **/obj/ still hide compile output +# inside /build/ itself. +!/build/ +!/build/** + +# Tamp's own artifacts/ folder (TRX, coverage, publish output) +/artifacts/ diff --git a/build/Build.cs b/build/Build.cs new file mode 100644 index 00000000..16ef3ce0 --- /dev/null +++ b/build/Build.cs @@ -0,0 +1,52 @@ +using Tamp; +using Tamp.NetCli.V10; + +class Build : TampBuild +{ + public static int Main(string[] args) => Execute(args); + + [Parameter("Build configuration (Debug|Release)")] + Configuration Configuration = IsLocalBuild ? Configuration.Debug : Configuration.Release; + + [Solution(Path = "src/dotnet/HoldFast.Backend.slnx")] readonly Solution Solution = null!; + [GitRepository] readonly GitRepository Git = null!; + + AbsolutePath Artifacts => RootDirectory / "artifacts"; + + Target Info => _ => _ + .TopLevel() + .Executes(() => + { + Console.WriteLine("HoldFast build — first Tamp run"); + Console.WriteLine($" Configuration: {Configuration}"); + Console.WriteLine($" Solution: {Solution?.Path}"); + Console.WriteLine($" Root: {RootDirectory}"); + Console.WriteLine($" Artifacts: {Artifacts}"); + Console.WriteLine($" Git branch: {Git?.Branch}"); + Console.WriteLine($" Git commit: {Git?.Commit}"); + }); + + Target Restore => _ => _ + .TopLevel() + .Executes(() => DotNet.Restore(s => s + .SetProject(Solution.Path))); + + Target Compile => _ => _ + .TopLevel() + .DependsOn(nameof(Restore)) + .Executes(() => DotNet.Build(s => s + .SetProject(Solution.Path) + .SetConfiguration(Configuration) + .SetNoRestore(true))); + + Target Test => _ => _ + .TopLevel() + .DependsOn(nameof(Compile)) + .Executes(() => DotNet.Test(s => s + .SetProject(Solution.Path) + .SetConfiguration(Configuration) + .SetNoBuild(true) + .SetNoRestore(true) + .SetResultsDirectory(Artifacts / "test-results") + .AddLogger("trx;LogFileName=test-results.trx"))); +} diff --git a/build/Build.csproj b/build/Build.csproj new file mode 100644 index 00000000..d5f17a32 --- /dev/null +++ b/build/Build.csproj @@ -0,0 +1,17 @@ + + + + Exe + net10.0 + HoldFast.Build + enable + latest + enable + + + + + + + + From 8eafb4d6e7f5d93761283a61652a7003c51eeb1d Mon Sep 17 00:00:00 2001 From: BrewingCoder Date: Mon, 11 May 2026 15:04:31 -0400 Subject: [PATCH 02/12] wip(tamp): port Clean/Publish/YarnInstall/FrontendBuild/DockerBuildBackend (HOL-54) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds five more side-by-side targets to build/Build.cs against Tamp 1.0.7 + satellites: * Clean — AbsolutePath delete/ensure on artifacts/ * Publish — DotNet.Publish HoldFast.Api -> artifacts/publish/. Verified byte-identical to raw dotnet publish. * YarnInstall — Yarn.Install --immutable against workspace root. Berry 4.x workspace tree fully recognised; 12.7s warm. * FrontendBuild — Yarn.Run build:frontend (npm-script proxy to turbo) until Turbo's chicken/egg bootstrap is addressed. 17/17 turbo tasks green, 1m41s. * DockerBuildBackend — authored but NOT yet run; held until BuildKit-aware Buildx.Build vs legacy Build choice is patched. Package refs extended in Build.csproj: Tamp.Yarn.V4 0.1.0, Tamp.Turbo.V2 0.1.0, Tamp.Vite.V5 0.1.0, Tamp.Docker.V27 0.2.0. This commit deliberately carries workaround stubs that should be removed once airm5 ships the friction-fix wave (see HOL-54 thread): * ResolveOnPath helper (~25 lines) — replaces missing [FromPath] / Tool discovery for native tools (yarn, docker, turbo). * Console.WriteLine in Info target — Tamp.Logger surface is instance-only, no Log.Information static; standing in until clarified. * Glob-based bin/obj cleanup dropped from Clean — AbsolutePath.GlobDirectories returns 0 hits for "**/bin"/"**/obj" patterns (probable Tamp.Core bug). * FrontendBuild routes via Yarn.Run because Tamp.Turbo.V2 needs a Tool that only exists at node_modules/.bin/turbo after YarnInstall runs. Backend Restore/Compile/Test/Publish + frontend YarnInstall/FrontendBuild are all green via Tamp at parity with the legacy pipeline. Existing pipeline still runs unchanged side-by-side. No cutover yet. Co-Authored-By: Claude Opus 4.7 --- build/Build.cs | 72 ++++++++++++++++++++++++++++++++++++++++++++++ build/Build.csproj | 4 +++ 2 files changed, 76 insertions(+) diff --git a/build/Build.cs b/build/Build.cs index 16ef3ce0..f8f1ba10 100644 --- a/build/Build.cs +++ b/build/Build.cs @@ -1,5 +1,8 @@ using Tamp; using Tamp.NetCli.V10; +using Tamp.Yarn.V4; +using Tamp.Turbo.V2; +using Tamp.Docker.V27; class Build : TampBuild { @@ -49,4 +52,73 @@ class Build : TampBuild .SetNoRestore(true) .SetResultsDirectory(Artifacts / "test-results") .AddLogger("trx;LogFileName=test-results.trx"))); + + AbsolutePath DotnetSrc => RootDirectory / "src" / "dotnet"; + + Target Clean => _ => _ + .TopLevel() + .Executes(() => + { + if (Artifacts.DirectoryExists()) + { + Console.WriteLine($" rm -rf {Artifacts}"); + Artifacts.DeleteDirectory(); + } + Artifacts.EnsureDirectoryExists(); + + }); + + AbsolutePath PublishDir => Artifacts / "publish" / "HoldFast.Api"; + + Target Publish => _ => _ + .TopLevel() + .DependsOn(nameof(Compile)) + .Executes(() => DotNet.Publish(s => s + .SetProject(RootDirectory / "src" / "dotnet" / "src" / "HoldFast.Api" / "HoldFast.Api.csproj") + .SetConfiguration(Configuration) + .SetOutput(PublishDir) + .SetNoBuild(true) + .SetNoRestore(true))); + + // ── Frontend (Yarn Berry 4.x + Turbo + Vite) ────────────────────── + + // No [FromPath] attribute in Tamp.Core yet — manually resolve yarn on PATH. + // Tool ctor takes (AbsolutePath executable, string workingDirectory). + static AbsolutePath ResolveOnPath(string name) + { + var pathEnv = Environment.GetEnvironmentVariable("PATH") ?? ""; + var sep = OperatingSystem.IsWindows() ? ';' : ':'; + var exts = OperatingSystem.IsWindows() + ? new[] { ".CMD", ".cmd", ".exe", ".EXE", ".bat", "" } + : new[] { "" }; + foreach (var dir in pathEnv.Split(sep, StringSplitOptions.RemoveEmptyEntries)) + { + foreach (var ext in exts) + { + var candidate = Path.Combine(dir, name + ext); + if (File.Exists(candidate)) return AbsolutePath.Create(candidate); + } + } + throw new InvalidOperationException($"Could not find '{name}' on PATH"); + } + + Tool YarnTool => new(ResolveOnPath("yarn"), RootDirectory); + + Target YarnInstall => _ => _ + .TopLevel() + .Executes(() => Yarn.Install(YarnTool, s => s.SetImmutable(true))); + + Target FrontendBuild => _ => _ + .TopLevel() + .DependsOn(nameof(YarnInstall)) + .Executes(() => Yarn.Run(YarnTool, s => s.SetScript("build:frontend"))); + + // ── Docker ────────────────────────────────────────────────────── + + Target DockerBuildBackend => _ => _ + .TopLevel() + .Executes(() => Docker.Build(s => s + .SetContext(RootDirectory) + .SetDockerfile(RootDirectory / "infra" / "docker" / "backend-dotnet.Dockerfile") + .AddTag("holdfast-backend-dotnet:tamp"))); } diff --git a/build/Build.csproj b/build/Build.csproj index d5f17a32..bbf1ead0 100644 --- a/build/Build.csproj +++ b/build/Build.csproj @@ -12,6 +12,10 @@ + + + + From 34cbe2b4bddea1eaf946ead88f2661f67e0edcbe Mon Sep 17 00:00:00 2001 From: BrewingCoder Date: Mon, 11 May 2026 19:47:51 -0400 Subject: [PATCH 03/12] refactor(tamp): adopt 1.2.0 ergonomic surface (HOL-54) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tamp.Core + NetCli.V10 jumped to 1.2.0 in airm5's coordinated wave. The satellites bump independently (0.x.1 patches rebuilt against Core 1.2.0); those are blocked on NuGet flatcontainer propagation as of this commit and will follow in a separate cleanup once the CDN catches up. Build.cs surface changes: * .TopLevel() — stripped from every target. 1.1.0+ makes top-level the default; the call is a no-op marked [Obsolete]. (NB: .Internal() is the new inverse marker if a target should be hidden from --list.) * .DependsOn(nameof(Target)) → .DependsOn(Target). The new [CallerArgumentExpression] overloads inject the identifier name literally; existing nameof()/bare-string forms still compile, but the bare form reads as English. * New `Ci` target marked `.Default()` — the canonical no-args entry. It fans out into Test, Publish, FrontendBuild, DockerBuildBackend so a cold `dotnet tamp` exercises the entire pipeline. Note: DependsOn is chained per-target rather than varargs — the varargs overload takes `string[]`, not `Target[]`, so the natural `DependsOn(A, B, C)` shape doesn't bind. Reported observation, not a blocker. Net: 88 → 56 lines (-36%) with no semantic change. Test suite still 3,172 green in 20.2s post-refactor. Frictions #1–#12 all stay closed. Co-Authored-By: Claude Opus 4.7 --- build/Build.cs | 88 +++++++++++++++++++--------------------------- build/Build.csproj | 8 ++--- 2 files changed, 40 insertions(+), 56 deletions(-) diff --git a/build/Build.cs b/build/Build.cs index f8f1ba10..762f54bd 100644 --- a/build/Build.cs +++ b/build/Build.cs @@ -11,16 +11,21 @@ class Build : TampBuild [Parameter("Build configuration (Debug|Release)")] Configuration Configuration = IsLocalBuild ? Configuration.Debug : Configuration.Release; - [Solution(Path = "src/dotnet/HoldFast.Backend.slnx")] readonly Solution Solution = null!; + // HoldFast is a multi-solution monorepo (SDK + e2e scaffolds also carry + // .sln/.slnx files), so the subtree search would be ambiguous. Pin explicitly. + [Solution("src/dotnet/HoldFast.Backend.slnx")] readonly Solution Solution = null!; [GitRepository] readonly GitRepository Git = null!; + [FromPath("yarn")] readonly Tool YarnTool = null!; + [FromNodeModules("turbo")] readonly Tool TurboTool = null!; + AbsolutePath Artifacts => RootDirectory / "artifacts"; + AbsolutePath PublishDir => Artifacts / "publish" / "HoldFast.Api"; Target Info => _ => _ - .TopLevel() .Executes(() => { - Console.WriteLine("HoldFast build — first Tamp run"); + Console.WriteLine("HoldFast build via Tamp"); Console.WriteLine($" Configuration: {Configuration}"); Console.WriteLine($" Solution: {Solution?.Path}"); Console.WriteLine($" Root: {RootDirectory}"); @@ -30,21 +35,20 @@ class Build : TampBuild }); Target Restore => _ => _ - .TopLevel() .Executes(() => DotNet.Restore(s => s .SetProject(Solution.Path))); Target Compile => _ => _ - .TopLevel() - .DependsOn(nameof(Restore)) + .DependsOn(Restore) .Executes(() => DotNet.Build(s => s .SetProject(Solution.Path) .SetConfiguration(Configuration) .SetNoRestore(true))); + // NetCli.V10 1.0.9+ auto-expands LogFileName → LogFilePrefix in solution + // mode, so this produces one TRX file per test assembly. Target Test => _ => _ - .TopLevel() - .DependsOn(nameof(Compile)) + .DependsOn(Compile) .Executes(() => DotNet.Test(s => s .SetProject(Solution.Path) .SetConfiguration(Configuration) @@ -53,26 +57,14 @@ class Build : TampBuild .SetResultsDirectory(Artifacts / "test-results") .AddLogger("trx;LogFileName=test-results.trx"))); - AbsolutePath DotnetSrc => RootDirectory / "src" / "dotnet"; - + // CleanArtifacts(): framework-provided safe wipe — Solution.Projects only, + // self-deletion guarded. Never use RootDirectory.GlobDirectories("**/bin") + // — that's the friction-#12 footgun. Target Clean => _ => _ - .TopLevel() - .Executes(() => - { - if (Artifacts.DirectoryExists()) - { - Console.WriteLine($" rm -rf {Artifacts}"); - Artifacts.DeleteDirectory(); - } - Artifacts.EnsureDirectoryExists(); - - }); - - AbsolutePath PublishDir => Artifacts / "publish" / "HoldFast.Api"; + .Executes(() => CleanArtifacts()); Target Publish => _ => _ - .TopLevel() - .DependsOn(nameof(Compile)) + .DependsOn(Compile) .Executes(() => DotNet.Publish(s => s .SetProject(RootDirectory / "src" / "dotnet" / "src" / "HoldFast.Api" / "HoldFast.Api.csproj") .SetConfiguration(Configuration) @@ -82,43 +74,35 @@ class Build : TampBuild // ── Frontend (Yarn Berry 4.x + Turbo + Vite) ────────────────────── - // No [FromPath] attribute in Tamp.Core yet — manually resolve yarn on PATH. - // Tool ctor takes (AbsolutePath executable, string workingDirectory). - static AbsolutePath ResolveOnPath(string name) - { - var pathEnv = Environment.GetEnvironmentVariable("PATH") ?? ""; - var sep = OperatingSystem.IsWindows() ? ';' : ':'; - var exts = OperatingSystem.IsWindows() - ? new[] { ".CMD", ".cmd", ".exe", ".EXE", ".bat", "" } - : new[] { "" }; - foreach (var dir in pathEnv.Split(sep, StringSplitOptions.RemoveEmptyEntries)) - { - foreach (var ext in exts) - { - var candidate = Path.Combine(dir, name + ext); - if (File.Exists(candidate)) return AbsolutePath.Create(candidate); - } - } - throw new InvalidOperationException($"Could not find '{name}' on PATH"); - } - - Tool YarnTool => new(ResolveOnPath("yarn"), RootDirectory); - Target YarnInstall => _ => _ - .TopLevel() .Executes(() => Yarn.Install(YarnTool, s => s.SetImmutable(true))); + // Workspace-local turbo only exists after YarnInstall populates + // node_modules/.bin/turbo, so this DependsOn is mandatory. Target FrontendBuild => _ => _ - .TopLevel() - .DependsOn(nameof(YarnInstall)) - .Executes(() => Yarn.Run(YarnTool, s => s.SetScript("build:frontend"))); + .DependsOn(YarnInstall) + .Executes(() => Turbo.Run(TurboTool, s => s + .SetWorkingDirectory(RootDirectory) + .AddTask("build:fast") + .AddFilter("@holdfast-io/frontend..."))); // ── Docker ────────────────────────────────────────────────────── + // Docker.V27 0.3.0 routes to `docker buildx build`, so the Dockerfile's + // `RUN --mount=type=cache` directives work as expected. Target DockerBuildBackend => _ => _ - .TopLevel() .Executes(() => Docker.Build(s => s .SetContext(RootDirectory) .SetDockerfile(RootDirectory / "infra" / "docker" / "backend-dotnet.Dockerfile") .AddTag("holdfast-backend-dotnet:tamp"))); + + // ── CI entry ───────────────────────────────────────────────────── + + // `dotnet tamp` (no args) runs the full verification + artifact pipeline. + Target Ci => _ => _ + .Default() + .DependsOn(Test) + .DependsOn(Publish) + .DependsOn(FrontendBuild) + .DependsOn(DockerBuildBackend); } diff --git a/build/Build.csproj b/build/Build.csproj index bbf1ead0..079843f6 100644 --- a/build/Build.csproj +++ b/build/Build.csproj @@ -10,12 +10,12 @@ - - + + - + - + From 7f95df90fffd2d8c0a168ff41ee0a516303d82ab Mon Sep 17 00:00:00 2001 From: BrewingCoder Date: Mon, 11 May 2026 20:37:25 -0400 Subject: [PATCH 04/12] feat(helm): scaffold AGPL-distributable helm chart for HoldFast (HOL-54) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Standard-shape helm chart at infra/helm/holdfast/ for the cutover from docker-compose hobby deploy → kubernetes-native deploy. Lives in the HoldFast repo (not a separate infra repo) so operators consuming the AGPL fork get one canonical chart in the source tree alongside the Dockerfile. Architecture: two pods. * Deployment/-backend The single .NET 10 Kestrel container (API + frontend bundle + workers + OTLP receivers). Resource defaults derived from the 43h soak: 200m/512Mi requests, 2000m/2Gi limits. Liveness on /health/live, readiness on /health/ready. * StatefulSet/-postgres TimescaleDB-HA pg16 with a single volumeClaimTemplate. PGDATA pinned to /home/postgres/pgdata/data (not the upstream postgres default — TimescaleDB-HA's layout differs). pg_isready exec probes. Operator override: postgres.enabled=false + externalPostgres.* to bring your own database. Templates (all standard helm shape): templates/ _helpers.tpl labels, fullname, image, postgres host composition (chart-managed vs external) NOTES.txt post-install runbook serviceaccount.yaml configmap.yaml backend env (URIs, storage selector, auth) secret.yaml PSQL_PASSWORD (or operator references an existing Secret via passwordExistingSecret) backend-service.yaml ClusterIP, port 8080 backend-deployment.yaml postgres-service.yaml ClusterIP, port 5432, internal-only postgres-statefulset.yaml Labels follow community-standard kubernetes.io/* conventions (name/instance/version/component/managed-by/part-of) per microk8s's "lean toward bitnami/prometheus-operator shape, not lab conventions" guidance. Operator-facing distribution audience wins over lab-internal convention matching. values.yaml defaults are operator-safe (ghcr.io registry, no storage class hint, no hardcoded URLs — all REQUIRED fields are documented). values.lab.yaml carries the BrewingCoder microk8s overrides: localhost:32000 registry, nfs-va-vm storage class, the four holdfast.brewingcoder.com URL knobs the backend needs. Auth: chart v1 only supports auth.mode=dev. enterprise mode (in-app JWT) is roadmapped — the chart should support `--set auth.mode=enterprise` and a JWT issuer config when that lands, but operators today must front the deployment with a zero-trust proxy (Cloudflare Access, Authelia, oauth2-proxy). README documents this explicitly. Lints clean (helm 3.17.4); template renders against values.lab.yaml produce the expected 7 resources with correct lab-cluster overrides applied. No Ingress shipped — Cloudflare tunnel routes holdfast.brewingcoder.com → holdfast-backend.holdfast.svc:8080 directly. Wire-up of `dotnet tamp DeployQa` (and `Tamp.Helm.V3` if airm5 ships the wrapper, hand-rolled Tool.Plan() if not) is the next commit. Co-Authored-By: Claude Opus 4.7 --- infra/helm/holdfast/.helmignore | 22 +++ infra/helm/holdfast/Chart.yaml | 31 ++++ infra/helm/holdfast/README.md | 94 +++++++++++ infra/helm/holdfast/templates/NOTES.txt | 38 +++++ infra/helm/holdfast/templates/_helpers.tpl | 155 +++++++++++++++++ .../templates/backend-deployment.yaml | 75 +++++++++ .../holdfast/templates/backend-service.yaml | 23 +++ infra/helm/holdfast/templates/configmap.yaml | 20 +++ .../holdfast/templates/postgres-service.yaml | 18 ++ .../templates/postgres-statefulset.yaml | 99 +++++++++++ infra/helm/holdfast/templates/secret.yaml | 12 ++ .../holdfast/templates/serviceaccount.yaml | 13 ++ infra/helm/holdfast/values.lab.yaml | 26 +++ infra/helm/holdfast/values.yaml | 159 ++++++++++++++++++ 14 files changed, 785 insertions(+) create mode 100644 infra/helm/holdfast/.helmignore create mode 100644 infra/helm/holdfast/Chart.yaml create mode 100644 infra/helm/holdfast/README.md create mode 100644 infra/helm/holdfast/templates/NOTES.txt create mode 100644 infra/helm/holdfast/templates/_helpers.tpl create mode 100644 infra/helm/holdfast/templates/backend-deployment.yaml create mode 100644 infra/helm/holdfast/templates/backend-service.yaml create mode 100644 infra/helm/holdfast/templates/configmap.yaml create mode 100644 infra/helm/holdfast/templates/postgres-service.yaml create mode 100644 infra/helm/holdfast/templates/postgres-statefulset.yaml create mode 100644 infra/helm/holdfast/templates/secret.yaml create mode 100644 infra/helm/holdfast/templates/serviceaccount.yaml create mode 100644 infra/helm/holdfast/values.lab.yaml create mode 100644 infra/helm/holdfast/values.yaml diff --git a/infra/helm/holdfast/.helmignore b/infra/helm/holdfast/.helmignore new file mode 100644 index 00000000..c696f373 --- /dev/null +++ b/infra/helm/holdfast/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building helm packages. +# See https://helm.sh/docs/chart_template_guide/builtin_objects/ + +.DS_Store +.git/ +.gitignore +.bzr/ +.hg/ +.hgignore +.svn/ +*.swp +*.bak +*.tmp +*.orig +*~ +.project +.idea/ +*.tmproj +.vscode/ + +# Helm +OWNERS diff --git a/infra/helm/holdfast/Chart.yaml b/infra/helm/holdfast/Chart.yaml new file mode 100644 index 00000000..ec5deebf --- /dev/null +++ b/infra/helm/holdfast/Chart.yaml @@ -0,0 +1,31 @@ +apiVersion: v2 +name: holdfast +description: | + HoldFast — self-hosted, AGPL-3.0 observability platform. Session replay, + error monitoring, logging, and distributed tracing in a single .NET 10 + backend with a PostgreSQL (TimescaleDB-HA) data store. Fork of Highlight.io. + +type: application +version: 0.1.0 # chart version — bump on chart shape changes +appVersion: "0.1.0" # app version — bump on backend image tag changes + +home: https://github.com/BrewingCoder/holdfast +sources: + - https://github.com/BrewingCoder/holdfast + +maintainers: + - name: BrewingCoder + email: scott@gscottsingleton.com + +keywords: + - observability + - session-replay + - error-monitoring + - tracing + - logging + - opentelemetry + - self-hosted + +annotations: + category: Observability + licenses: AGPL-3.0-only diff --git a/infra/helm/holdfast/README.md b/infra/helm/holdfast/README.md new file mode 100644 index 00000000..f4ba32fc --- /dev/null +++ b/infra/helm/holdfast/README.md @@ -0,0 +1,94 @@ +# HoldFast Helm Chart + +Self-hosted HoldFast — AGPL-3.0 observability platform — packaged for Kubernetes. + +## TL;DR + +```bash +helm install holdfast oci://ghcr.io/brewingcoder/charts/holdfast \ + --namespace holdfast --create-namespace \ + --set publicUrl=https://holdfast.example.com \ + --set publicGraphUri=https://holdfast.example.com/public \ + --set privateGraphUri=https://holdfast.example.com/private \ + --set collectorOtlpEndpoint=https://holdfast.example.com/otel \ + --set postgres.auth.password=$(openssl rand -base64 24) +``` + +## Architecture + +Two pods. That's the whole deployment. + +| Workload | Image | Role | +|---|---|---| +| `Deployment/-backend` | `holdfast-backend-dotnet` | .NET 10 Kestrel — API + frontend bundle + workers + OTLP receivers (`/otel/v1/{logs,traces,metrics}`) all in one binary | +| `StatefulSet/-postgres` | `timescale/timescaledb-ha:pg16` | Postgres 16 + TimescaleDB extensions, with the full analytics columnar path | + +Stripped from the upstream Highlight.io 9-container architecture: Kafka, Zookeeper, Redis, the OpenTelemetry Collector, the Python predictions service, and the nginx frontend container. All folded into the backend or removed. + +## Requirements + +- Kubernetes 1.27+ +- Helm 3.13+ +- A StorageClass that supports `ReadWriteOnce` (default works fine; lab clusters can override via `postgres.persistence.storageClassName`) +- An ingress / reverse proxy / Cloudflare tunnel pointing at the backend Service on port 8080 (this chart does not create an `Ingress` — operators wire that up to their preferred edge) + +## Required values + +These must be set or the chart won't render usefully (the backend can't compute its own URLs): + +| Key | Example | +|---|---| +| `publicUrl` | `https://holdfast.example.com` | +| `publicGraphUri` | `https://holdfast.example.com/public` | +| `privateGraphUri` | `https://holdfast.example.com/private` | +| `collectorOtlpEndpoint` | `https://holdfast.example.com/otel` | +| `postgres.auth.password` *or* `postgres.auth.existingSecret` | (set, or referenced existing Secret) | + +## Authentication + +**v1 of this chart only supports `auth.mode=dev`** — the backend runs with no in-app authentication. **Do not expose to anything beyond a trusted network without fronting it with a zero-trust proxy** (Cloudflare Access, Authelia, oauth2-proxy, etc). + +`auth.mode=enterprise` (in-app JWT auth) is planned but not yet wired into the chart. + +## Storage backend + +- `storage.analytics=Postgres` (default): all analytics paths run through Postgres. The TimescaleDB extensions provide the columnar performance HoldFast needs. **Recommended for most operators.** +- `storage.analytics=ClickHouse`: HoldFast also supports an OTeL-shaped ClickHouse backend, but **this chart does not yet manage the ClickHouse pod**. Bring your own ClickHouse and configure connection via `backend.extraEnv`. + +## Bring-your-own Postgres + +Set `postgres.enabled=false` and configure `externalPostgres.*`: + +```yaml +postgres: + enabled: false + +externalPostgres: + host: my-pg.example.com + port: 5432 + user: holdfast + passwordSecret: + name: my-existing-secret + key: password +``` + +## Lab cluster note + +The `values.lab.yaml` file in this directory is **specific to the BrewingCoder microk8s QA cluster**. Operators self-hosting elsewhere should write their own values file (or set on the command line); `values.lab.yaml` is preserved in-tree only because it serves as a working example of overrides. + +## Development + +```bash +# Render templates without applying (handy for diff'ing against running state): +helm template holdfast . -f values.yaml --set postgres.auth.password=test + +# Lint the chart before committing: +helm lint . + +# Package for OCI registry distribution: +helm package . -d ../../artifacts/helm +``` + +## License + +[AGPL-3.0](https://github.com/BrewingCoder/holdfast/blob/main/LICENSE). diff --git a/infra/helm/holdfast/templates/NOTES.txt b/infra/helm/holdfast/templates/NOTES.txt new file mode 100644 index 00000000..99d2557b --- /dev/null +++ b/infra/helm/holdfast/templates/NOTES.txt @@ -0,0 +1,38 @@ +HoldFast {{ .Chart.AppVersion }} installed as release "{{ .Release.Name }}" in namespace "{{ .Release.Namespace }}". + +1. Wait for the backend to become ready: + + kubectl --namespace {{ .Release.Namespace }} \ + rollout status deployment/{{ include "holdfast.fullname" . }}-backend + +2. Reach the dashboard: + +{{- if .Values.publicUrl }} + {{ .Values.publicUrl }} +{{- else }} + (publicUrl was not set; expose the Service named "{{ include "holdfast.fullname" . }}-backend" + on port {{ .Values.backend.service.port }} through your ingress / tunnel of choice) +{{- end }} + +3. Pre-deploy parity check: + + helm --namespace {{ .Release.Namespace }} template {{ .Release.Name }} . \ + -f values.yaml [-f values.lab.yaml] [--set image.tag=] + +Auth mode: {{ .Values.auth.mode }} + {{- if eq .Values.auth.mode "dev" }} + WARNING: dev mode has NO in-app authentication. Front the deployment with + a zero-trust proxy (Cloudflare Access, Authelia, oauth2-proxy) before + exposing to anything beyond a trusted network. + {{- end }} + +Storage backend: {{ .Values.storage.analytics }} + {{- if eq .Values.storage.analytics "ClickHouse" }} + NOTE: ClickHouse pod is NOT yet managed by this chart. Bring your own + ClickHouse instance and configure connection via extraEnv on the + backend. + {{- end }} + +Docs: + - https://github.com/BrewingCoder/holdfast + - https://github.com/BrewingCoder/holdfast/blob/main/docs/HOLDFAST-NOTES.md diff --git a/infra/helm/holdfast/templates/_helpers.tpl b/infra/helm/holdfast/templates/_helpers.tpl new file mode 100644 index 00000000..0bbe318c --- /dev/null +++ b/infra/helm/holdfast/templates/_helpers.tpl @@ -0,0 +1,155 @@ +{{/* +Common helpers for the HoldFast chart. +*/}} + +{{/* +Expand the name of the chart. +*/}} +{{- define "holdfast.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a fully-qualified app name. Honors fullnameOverride or builds +"-" by default. Truncated to 63 chars for k8s name limits. +*/}} +{{- define "holdfast.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Chart name + version for the helm.sh/chart label. +*/}} +{{- define "holdfast.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels shared by every object in the release. +*/}} +{{- define "holdfast.labels" -}} +helm.sh/chart: {{ include "holdfast.chart" . }} +{{ include "holdfast.selectorLabels" . }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/part-of: holdfast +{{- end }} + +{{/* +Selector labels — release-stable, used in selector and matchLabels. +*/}} +{{- define "holdfast.selectorLabels" -}} +app.kubernetes.io/name: {{ include "holdfast.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Component-scoped labels for the backend pod. +*/}} +{{- define "holdfast.backend.labels" -}} +{{ include "holdfast.labels" . }} +app.kubernetes.io/component: backend +{{- end }} + +{{- define "holdfast.backend.selectorLabels" -}} +{{ include "holdfast.selectorLabels" . }} +app.kubernetes.io/component: backend +{{- end }} + +{{/* +Component-scoped labels for the postgres pod. +*/}} +{{- define "holdfast.postgres.labels" -}} +{{ include "holdfast.labels" . }} +app.kubernetes.io/component: postgres +{{- end }} + +{{- define "holdfast.postgres.selectorLabels" -}} +{{ include "holdfast.selectorLabels" . }} +app.kubernetes.io/component: postgres +{{- end }} + +{{/* +ServiceAccount name — honors create=false by letting users specify a +pre-existing SA name. +*/}} +{{- define "holdfast.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "holdfast.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Backend image reference. Defaults the tag to .Chart.AppVersion when empty. +*/}} +{{- define "holdfast.image" -}} +{{- $registry := .Values.image.registry -}} +{{- $repo := .Values.image.repository -}} +{{- $tag := default .Chart.AppVersion .Values.image.tag -}} +{{- if $registry -}} +{{- printf "%s/%s:%s" $registry $repo $tag -}} +{{- else -}} +{{- printf "%s:%s" $repo $tag -}} +{{- end -}} +{{- end }} + +{{/* +Postgres host — chart-managed StatefulSet service name OR external host. +*/}} +{{- define "holdfast.postgres.host" -}} +{{- if .Values.postgres.enabled -}} +{{- printf "%s-postgres" (include "holdfast.fullname" .) -}} +{{- else -}} +{{- .Values.externalPostgres.host -}} +{{- end -}} +{{- end }} + +{{- define "holdfast.postgres.port" -}} +{{- if .Values.postgres.enabled -}} +{{- .Values.postgres.service.port -}} +{{- else -}} +{{- .Values.externalPostgres.port -}} +{{- end -}} +{{- end }} + +{{- define "holdfast.postgres.user" -}} +{{- if .Values.postgres.enabled -}} +{{- .Values.postgres.auth.user -}} +{{- else -}} +{{- .Values.externalPostgres.user -}} +{{- end -}} +{{- end }} + +{{/* +Name of the Secret that holds PSQL_PASSWORD. +*/}} +{{- define "holdfast.postgres.secretName" -}} +{{- if .Values.postgres.enabled -}} +{{- if .Values.postgres.auth.existingSecret -}} +{{- .Values.postgres.auth.existingSecret -}} +{{- else -}} +{{- printf "%s-postgres" (include "holdfast.fullname" .) -}} +{{- end -}} +{{- else -}} +{{- .Values.externalPostgres.passwordSecret.name -}} +{{- end -}} +{{- end }} + +{{- define "holdfast.postgres.secretKey" -}} +{{- if .Values.postgres.enabled -}} +{{- .Values.postgres.auth.passwordKey -}} +{{- else -}} +{{- .Values.externalPostgres.passwordSecret.key -}} +{{- end -}} +{{- end }} diff --git a/infra/helm/holdfast/templates/backend-deployment.yaml b/infra/helm/holdfast/templates/backend-deployment.yaml new file mode 100644 index 00000000..8c2fdbad --- /dev/null +++ b/infra/helm/holdfast/templates/backend-deployment.yaml @@ -0,0 +1,75 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "holdfast.fullname" . }}-backend + namespace: {{ .Release.Namespace }} + labels: + {{- include "holdfast.backend.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.backend.replicaCount }} + selector: + matchLabels: + {{- include "holdfast.backend.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.backend.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "holdfast.backend.labels" . | nindent 8 }} + {{- with .Values.backend.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "holdfast.serviceAccountName" . }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.backend.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: backend + image: {{ include "holdfast.image" . | quote }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.backend.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + ports: + - name: http + containerPort: 8080 + protocol: TCP + envFrom: + - configMapRef: + name: {{ include "holdfast.fullname" . }}-backend + env: + - name: PSQL_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "holdfast.postgres.secretName" . }} + key: {{ include "holdfast.postgres.secretKey" . }} + {{- with .Values.backend.extraEnv }} + {{- toYaml . | nindent 12 }} + {{- end }} + livenessProbe: + {{- toYaml .Values.backend.livenessProbe | nindent 12 }} + readinessProbe: + {{- toYaml .Values.backend.readinessProbe | nindent 12 }} + resources: + {{- toYaml .Values.backend.resources | nindent 12 }} + {{- with .Values.backend.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.backend.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.backend.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/infra/helm/holdfast/templates/backend-service.yaml b/infra/helm/holdfast/templates/backend-service.yaml new file mode 100644 index 00000000..bb0edb8e --- /dev/null +++ b/infra/helm/holdfast/templates/backend-service.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + # NB: microk8s's Cloudflare tunnel routes traffic to this Service by name. + # If you rename this template's metadata.name, coordinate with the cluster + # operator (the tunnel rule needs to match). + name: {{ include "holdfast.fullname" . }}-backend + namespace: {{ .Release.Namespace }} + labels: + {{- include "holdfast.backend.labels" . | nindent 4 }} + {{- with .Values.backend.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.backend.service.type }} + ports: + - port: {{ .Values.backend.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "holdfast.backend.selectorLabels" . | nindent 4 }} diff --git a/infra/helm/holdfast/templates/configmap.yaml b/infra/helm/holdfast/templates/configmap.yaml new file mode 100644 index 00000000..7d805b9e --- /dev/null +++ b/infra/helm/holdfast/templates/configmap.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "holdfast.fullname" . }}-backend + namespace: {{ .Release.Namespace }} + labels: + {{- include "holdfast.backend.labels" . | nindent 4 }} +data: + PSQL_HOST: {{ include "holdfast.postgres.host" . | quote }} + PSQL_PORT: {{ include "holdfast.postgres.port" . | quote }} + PSQL_USER: {{ include "holdfast.postgres.user" . | quote }} + + STORAGE_ANALYTICS: {{ .Values.storage.analytics | quote }} + + REACT_APP_PUBLIC_GRAPH_URI: {{ .Values.publicGraphUri | quote }} + REACT_APP_PRIVATE_GRAPH_URI: {{ .Values.privateGraphUri | quote }} + REACT_APP_FRONTEND_URI: {{ .Values.publicUrl | quote }} + COLLECTOR_OTLP_ENDPOINT: {{ .Values.collectorOtlpEndpoint | quote }} + + AUTH_MODE: {{ .Values.auth.mode | quote }} diff --git a/infra/helm/holdfast/templates/postgres-service.yaml b/infra/helm/holdfast/templates/postgres-service.yaml new file mode 100644 index 00000000..12b7bb8b --- /dev/null +++ b/infra/helm/holdfast/templates/postgres-service.yaml @@ -0,0 +1,18 @@ +{{- if .Values.postgres.enabled -}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "holdfast.fullname" . }}-postgres + namespace: {{ .Release.Namespace }} + labels: + {{- include "holdfast.postgres.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - port: {{ .Values.postgres.service.port }} + targetPort: postgres + protocol: TCP + name: postgres + selector: + {{- include "holdfast.postgres.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/infra/helm/holdfast/templates/postgres-statefulset.yaml b/infra/helm/holdfast/templates/postgres-statefulset.yaml new file mode 100644 index 00000000..1d717d7e --- /dev/null +++ b/infra/helm/holdfast/templates/postgres-statefulset.yaml @@ -0,0 +1,99 @@ +{{- if .Values.postgres.enabled -}} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "holdfast.fullname" . }}-postgres + namespace: {{ .Release.Namespace }} + labels: + {{- include "holdfast.postgres.labels" . | nindent 4 }} +spec: + replicas: 1 + serviceName: {{ include "holdfast.fullname" . }}-postgres + selector: + matchLabels: + {{- include "holdfast.postgres.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "holdfast.postgres.labels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.postgres.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: postgres + image: "{{ .Values.postgres.image.repository }}:{{ .Values.postgres.image.tag }}" + imagePullPolicy: {{ .Values.postgres.image.pullPolicy }} + {{- with .Values.postgres.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + ports: + - name: postgres + containerPort: 5432 + protocol: TCP + env: + - name: POSTGRES_USER + value: {{ .Values.postgres.auth.user | quote }} + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "holdfast.postgres.secretName" . }} + key: {{ include "holdfast.postgres.secretKey" . }} + - name: PGDATA + value: {{ .Values.postgres.dataPath | quote }} + livenessProbe: + exec: + command: ["pg_isready", "-U", {{ .Values.postgres.auth.user | quote }}] + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + readinessProbe: + exec: + command: ["pg_isready", "-U", {{ .Values.postgres.auth.user | quote }}] + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 5 + resources: + {{- toYaml .Values.postgres.resources | nindent 12 }} + {{- if .Values.postgres.persistence.enabled }} + volumeMounts: + - name: data + # TimescaleDB-HA roots its data under /home/postgres/pgdata/; + # PGDATA above selects the actual data subdir. + mountPath: /home/postgres/pgdata + {{- end }} + {{- with .Values.postgres.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.postgres.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.postgres.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.postgres.persistence.enabled }} + volumeClaimTemplates: + - metadata: + name: data + labels: + {{- include "holdfast.postgres.labels" . | nindent 10 }} + spec: + accessModes: + {{- toYaml .Values.postgres.persistence.accessModes | nindent 10 }} + {{- if .Values.postgres.persistence.storageClassName }} + storageClassName: {{ .Values.postgres.persistence.storageClassName | quote }} + {{- end }} + resources: + requests: + storage: {{ .Values.postgres.persistence.size | quote }} + {{- end }} +{{- end }} diff --git a/infra/helm/holdfast/templates/secret.yaml b/infra/helm/holdfast/templates/secret.yaml new file mode 100644 index 00000000..ce6daac0 --- /dev/null +++ b/infra/helm/holdfast/templates/secret.yaml @@ -0,0 +1,12 @@ +{{- if and .Values.postgres.enabled (not .Values.postgres.auth.existingSecret) -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "holdfast.fullname" . }}-postgres + namespace: {{ .Release.Namespace }} + labels: + {{- include "holdfast.postgres.labels" . | nindent 4 }} +type: Opaque +stringData: + {{ .Values.postgres.auth.passwordKey }}: {{ required "postgres.auth.password is required when postgres.enabled=true and no existingSecret is set" .Values.postgres.auth.password | quote }} +{{- end }} diff --git a/infra/helm/holdfast/templates/serviceaccount.yaml b/infra/helm/holdfast/templates/serviceaccount.yaml new file mode 100644 index 00000000..c394be1f --- /dev/null +++ b/infra/helm/holdfast/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "holdfast.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "holdfast.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/infra/helm/holdfast/values.lab.yaml b/infra/helm/holdfast/values.lab.yaml new file mode 100644 index 00000000..b5e69f93 --- /dev/null +++ b/infra/helm/holdfast/values.lab.yaml @@ -0,0 +1,26 @@ +# Lab-cluster overrides for the BrewingCoder microk8s QA environment. +# Apply with: helm upgrade --install holdfast infra/helm/holdfast \ +# -n holdfast --create-namespace \ +# -f infra/helm/holdfast/values.lab.yaml \ +# --set image.tag= \ +# --set postgres.auth.password= + +image: + registry: localhost:32000 + repository: holdfast-backend-dotnet + pullPolicy: Always # tags overlap across builds in lab + +postgres: + persistence: + storageClassName: nfs-va-vm + +# Public URLs — Cloudflare tunnel terminates TLS at the edge; the backend +# itself serves plain HTTP on :8080. +publicUrl: https://holdfast.brewingcoder.com +publicGraphUri: https://holdfast.brewingcoder.com/public +privateGraphUri: https://holdfast.brewingcoder.com/private +collectorOtlpEndpoint: https://holdfast.brewingcoder.com/otel + +# Auth handled at the CF Access edge in the lab; app stays in dev mode. +auth: + mode: dev diff --git a/infra/helm/holdfast/values.yaml b/infra/helm/holdfast/values.yaml new file mode 100644 index 00000000..07e18700 --- /dev/null +++ b/infra/helm/holdfast/values.yaml @@ -0,0 +1,159 @@ +# Default values for the HoldFast helm chart. +# Operators should override anything marked REQUIRED. Lab-specific defaults +# live in values.lab.yaml; never edit values.yaml for environment-specific +# overrides — author a values file or use --set on the command line. + +# ── Image ------------------------------------------------------------------- +image: + # Registry, repository, and tag for the HoldFast backend image. + # Lab cluster overrides registry to localhost:32000 via values.lab.yaml. + registry: ghcr.io + repository: brewingcoder/holdfast-backend-dotnet + # tag defaults to .Chart.AppVersion if empty + tag: "" + pullPolicy: IfNotPresent + +imagePullSecrets: [] + +nameOverride: "" +fullnameOverride: "" + +# ── ServiceAccount --------------------------------------------------------- +serviceAccount: + create: true + annotations: {} + name: "" + +# ── Backend pod ----------------------------------------------------------- +backend: + replicaCount: 1 + + # The Service name microk8s's Cloudflare tunnel points at must stay stable. + # Don't rename without coordinating with the cluster operator. + service: + type: ClusterIP + port: 8080 + annotations: {} + + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 2000m + memory: 2Gi + + podAnnotations: {} + podLabels: {} + podSecurityContext: {} + securityContext: {} + + livenessProbe: + httpGet: + path: /health/live + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + + readinessProbe: + httpGet: + path: /health/ready + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 5 + failureThreshold: 3 + + # Extra environment for the backend container. Use this for operator- + # specific config not covered by the typed values above. + # Each entry: { name: FOO, value: "bar" } or { name: FOO, valueFrom: ... } + extraEnv: [] + + nodeSelector: {} + tolerations: [] + affinity: {} + +# ── Postgres (chart-managed) ---------------------------------------------- +# Set postgres.enabled = false to bring your own database via +# externalPostgres.* below. +postgres: + enabled: true + + image: + repository: timescale/timescaledb-ha + tag: pg16 + pullPolicy: IfNotPresent + + service: + port: 5432 + + persistence: + enabled: true + size: 20Gi + # Empty storageClassName uses the cluster default. Lab cluster overrides + # to nfs-va-vm via values.lab.yaml. + storageClassName: "" + accessModes: + - ReadWriteOnce + + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 2000m + memory: 4Gi + + # TimescaleDB-HA stores data at /home/postgres/pgdata/data — NOT the + # upstream postgres /var/lib/postgresql/data. Don't change this unless + # you also change the image. + dataPath: /home/postgres/pgdata/data + + auth: + user: postgres + # REQUIRED: set via --set postgres.auth.password=... or values file, + # OR reference an existing Secret via existingSecret/passwordKey. + password: "" + existingSecret: "" + passwordKey: password + + podSecurityContext: {} + securityContext: {} + nodeSelector: {} + tolerations: [] + affinity: {} + +# ── External Postgres (alternative to chart-managed) ---------------------- +# Only used when postgres.enabled = false. +externalPostgres: + host: "" + port: 5432 + user: "" + passwordSecret: + name: "" + key: password + +# ── Storage backend selection --------------------------------------------- +# Postgres = full analytics path via Postgres (no ClickHouse needed). +# ClickHouse = OTeL-shaped columnar store (requires separate ClickHouse pod; +# this chart does not yet manage one — bring your own). +storage: + analytics: Postgres + +# ── External URLs --------------------------------------------------------- +# REQUIRED — operators must set these to the publicly-reachable URLs of +# their HoldFast deployment. No hardcoded domains per the HoldFast charter. +publicUrl: "" # e.g. https://holdfast.example.com +publicGraphUri: "" # e.g. https://holdfast.example.com/public +privateGraphUri: "" # e.g. https://holdfast.example.com/private +collectorOtlpEndpoint: "" # e.g. https://holdfast.example.com/otel + +# ── Auth ------------------------------------------------------------------ +# v1 of this chart only supports auth.mode=dev (no in-app authentication). +# Operators wanting in-app auth should front the deployment with a +# zero-trust proxy (Cloudflare Access, Authelia, oauth2-proxy, etc.) until +# enterprise mode is wired into the chart (planned). +auth: + mode: dev From 59141e7a60393803fc4f0e7281683eb3161fb5c8 Mon Sep 17 00:00:00 2001 From: BrewingCoder Date: Mon, 11 May 2026 22:34:24 -0400 Subject: [PATCH 05/12] feat(tamp): wire DockerPush/DeployQa/SmokeQa + Wave 9 pin bump (HOL-54) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tamp ecosystem bumped to Wave 9 — coordinated cut that lands Helm.V3 0.1.0, Tamp.Http 0.1.1, the params Target[] overload on lifecycle methods (friction #14 fix), and patch satellites across the fleet. Pin moves: * Tamp.Core 1.2.0 → 1.3.0 * Tamp.NetCli.V10 1.2.0 → 1.3.0 * Tamp.Yarn.V4 0.1.0 → 0.1.1 * Tamp.Turbo.V2 0.2.0 → 0.2.1 * Tamp.Vite.V5 0.1.0 → 0.1.1 * Tamp.Docker.V27 0.3.0 (0.3.1 still on the flatcontainer CDN lag — follow-up bump pending) + Tamp.Helm.V3 0.1.0 (new — the cutover deploy verb) + Tamp.Http 0.1.1 (new — HttpProbe for SmokeQa) Build.cs additions: * [Parameter] Registry, QaUrl, PostgresPassword (the third via env var HOLDFAST_PG_PASSWORD) * [FromPath("helm")] HelmTool * ImageTag = short git SHA; LocalImageRef + RegistryImageRef helpers * Info target prints all three plus the deploy URL * DockerBuildBackend now tags BOTH the local-friendly name and the registry-prefixed name in one buildx pass * DockerPush — depends on DockerBuildBackend, calls Docker.Push against the registry-prefixed tag * DeployQa — depends on DockerPush, calls Helm.Upgrade with --install --wait --atomic --timeout 5m against infra/helm/holdfast/ + values.lab.yaml, image.tag overridden to the current SHA, postgres password sourced from the Parameter * SmokeQa — depends on DeployQa, polls QaUrl/health/live for up to 2 minutes via HttpProbe.WaitForHealthy * Ci.DependsOn(Test, Publish, FrontendBuild, DockerBuildBackend) refactored to params Target[] one-liner (friction #14 paid off immediately) Test still 3,172/3,172 green in 21.4s on the bumped stack. DeployQa + SmokeQa unverified locally — both require cluster reachability (localhost:32000 only resolves inside the lab cluster, helm needs credentials, QA URL doesn't route yet). First end-to-end run will happen on the ARC runner once microk8s finishes cluster prep (namespace + CF tunnel + RBAC). Co-Authored-By: Claude Opus 4.7 --- build/Build.cs | 71 +++++++++++++++++++++++++++++++++++++++++----- build/Build.csproj | 10 ++++--- 2 files changed, 70 insertions(+), 11 deletions(-) diff --git a/build/Build.cs b/build/Build.cs index 762f54bd..aefc1ebb 100644 --- a/build/Build.cs +++ b/build/Build.cs @@ -3,6 +3,8 @@ using Tamp.Yarn.V4; using Tamp.Turbo.V2; using Tamp.Docker.V27; +using Tamp.Helm.V3; +using Tamp.Http; class Build : TampBuild { @@ -11,16 +13,33 @@ class Build : TampBuild [Parameter("Build configuration (Debug|Release)")] Configuration Configuration = IsLocalBuild ? Configuration.Debug : Configuration.Release; + [Parameter("Container registry for QA push")] + readonly string Registry = "localhost:32000"; + + [Parameter("QA hostname (no trailing slash)")] + readonly string QaUrl = "https://holdfast.brewingcoder.com"; + + [Parameter("Postgres admin password for QA deploy — set via TAMP_POSTGRESPASSWORD env", + EnvironmentVariable = "HOLDFAST_PG_PASSWORD")] + readonly string PostgresPassword = ""; + // HoldFast is a multi-solution monorepo (SDK + e2e scaffolds also carry // .sln/.slnx files), so the subtree search would be ambiguous. Pin explicitly. [Solution("src/dotnet/HoldFast.Backend.slnx")] readonly Solution Solution = null!; [GitRepository] readonly GitRepository Git = null!; [FromPath("yarn")] readonly Tool YarnTool = null!; + [FromPath("helm")] readonly Tool HelmTool = null!; [FromNodeModules("turbo")] readonly Tool TurboTool = null!; AbsolutePath Artifacts => RootDirectory / "artifacts"; AbsolutePath PublishDir => Artifacts / "publish" / "HoldFast.Api"; + AbsolutePath HelmChart => RootDirectory / "infra" / "helm" / "holdfast"; + + // Image tag = short git SHA. Canonical version lives in Chart.yaml.appVersion. + string ImageTag => Git!.Commit[..7]; + string LocalImageRef => $"holdfast-backend-dotnet:{ImageTag}"; + string RegistryImageRef => $"{Registry}/holdfast-backend-dotnet:{ImageTag}"; Target Info => _ => _ .Executes(() => @@ -32,6 +51,9 @@ class Build : TampBuild Console.WriteLine($" Artifacts: {Artifacts}"); Console.WriteLine($" Git branch: {Git?.Branch}"); Console.WriteLine($" Git commit: {Git?.Commit}"); + Console.WriteLine($" Image tag: {ImageTag}"); + Console.WriteLine($" Registry ref: {RegistryImageRef}"); + Console.WriteLine($" QA URL: {QaUrl}"); }); Target Restore => _ => _ @@ -88,21 +110,56 @@ class Build : TampBuild // ── Docker ────────────────────────────────────────────────────── - // Docker.V27 0.3.0 routes to `docker buildx build`, so the Dockerfile's - // `RUN --mount=type=cache` directives work as expected. + // Docker.V27 0.3.x routes Build through `docker buildx build`, so the + // Dockerfile's `RUN --mount=type=cache` directives work. Two tags so the + // local-shorthand reference and the registry-prefixed reference both land. Target DockerBuildBackend => _ => _ .Executes(() => Docker.Build(s => s .SetContext(RootDirectory) .SetDockerfile(RootDirectory / "infra" / "docker" / "backend-dotnet.Dockerfile") - .AddTag("holdfast-backend-dotnet:tamp"))); + .AddTag(LocalImageRef) + .AddTag(RegistryImageRef))); + + // Push the registry-prefixed image to the lab registry. ARC runner has its + // ~/.docker/config.json populated for localhost:32000 (plain HTTP, daemon- + // level insecure-registries setting); no Docker.Login call needed. + Target DockerPush => _ => _ + .DependsOn(DockerBuildBackend) + .Executes(() => Docker.Push(s => s + .SetImage(RegistryImageRef))); + + // ── Deploy ────────────────────────────────────────────────────── + + // Deploy the chart to the lab cluster. helm upgrade --install is idempotent; + // --atomic rolls back automatically on a failed rollout. + Target DeployQa => _ => _ + .DependsOn(DockerPush) + .Executes(() => Helm.Upgrade(HelmTool, s => s + .SetRelease("holdfast") + .SetNamespace("holdfast") + .SetCreateNamespace(true) + .SetChart(HelmChart) + .AddValuesFile(HelmChart / "values.lab.yaml") + .SetValue("image.tag", ImageTag) + .SetValue("postgres.auth.password", PostgresPassword) + .SetWait(true) + .SetAtomic(true) + .SetTimeout(TimeSpan.FromMinutes(5)))); + + // Post-deploy smoke probe — polls /health/live until it returns 200 or + // the timeout elapses. HttpProbe handles transient HttpRequestExceptions + // and per-request timeouts as expected during pod warmup. + Target SmokeQa => _ => _ + .DependsOn(DeployQa) + .Executes(async () => await HttpProbe.WaitForHealthy( + url: $"{QaUrl}/health/live", + timeout: TimeSpan.FromMinutes(2))); // ── CI entry ───────────────────────────────────────────────────── // `dotnet tamp` (no args) runs the full verification + artifact pipeline. + // Tamp.Core 1.3.0's params Target[] overload makes the fan-out one-liner. Target Ci => _ => _ .Default() - .DependsOn(Test) - .DependsOn(Publish) - .DependsOn(FrontendBuild) - .DependsOn(DockerBuildBackend); + .DependsOn(Test, Publish, FrontendBuild, DockerBuildBackend); } diff --git a/build/Build.csproj b/build/Build.csproj index 079843f6..35c9ba6c 100644 --- a/build/Build.csproj +++ b/build/Build.csproj @@ -10,11 +10,13 @@ - - - + + + + + - + From d8a9e113f897959f59ba731d7ddd8dfe6212bba1 Mon Sep 17 00:00:00 2001 From: BrewingCoder Date: Mon, 11 May 2026 22:55:39 -0400 Subject: [PATCH 06/12] feat(helm): wire chart to pre-created postgres secret + fsGroup belt-and-suspenders (HOL-54) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cluster prep delivered by microk8s — namespace, RBAC, CF tunnel rule, WAF, and a pre-created `holdfast-postgres` Secret are all in place. Two chart-side adjustments to consume that work: * values.lab.yaml — postgres.auth.existingSecret = holdfast-postgres (passwordKey defaults to "password"; chart's secret.yaml template is gated on `not .existingSecret` so it won't try to overwrite) * values.yaml — postgres.podSecurityContext.fsGroup = 1000 Default fsGroup for chart-managed postgres matches the postgres UID in `timescale/timescaledb-ha:pg16` (probed: uid=1000(postgres)). The lab NFS export is permissive (no_root_squash) so this isn't strictly required there, but PSA-restricted clusters require it, so the chart needs to ship a sensible default for the AGPL operator audience. Operators swapping the image to one with a different UID override. * Build.cs — drop the [Parameter] HOLDFAST_PG_PASSWORD plumbing and the .SetValue("postgres.auth.password", ...) on the Helm.Upgrade call. Password is now resolved entirely via existingSecret on the chart side; runner pod doesn't need any env var injected. Also obviates the runner-pod-spec patching microk8s offered. Verified: * helm lint clean * helm template renders fsGroup: 1000 on postgres StatefulSet, PSQL_PASSWORD valueFrom secretKeyRef.name=holdfast-postgres on backend deployment * `helm install --dry-run` against live cluster (k8s-lab) succeeds * holdfast-postgres Secret confirmed present in namespace via kubectl Ready for first `dotnet tamp SmokeQa` end-to-end whenever Scott says go. Co-Authored-By: Claude Opus 4.7 --- build/Build.cs | 5 ----- infra/helm/holdfast/values.lab.yaml | 6 ++++++ infra/helm/holdfast/values.yaml | 8 +++++++- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/build/Build.cs b/build/Build.cs index aefc1ebb..6fc585a2 100644 --- a/build/Build.cs +++ b/build/Build.cs @@ -19,10 +19,6 @@ class Build : TampBuild [Parameter("QA hostname (no trailing slash)")] readonly string QaUrl = "https://holdfast.brewingcoder.com"; - [Parameter("Postgres admin password for QA deploy — set via TAMP_POSTGRESPASSWORD env", - EnvironmentVariable = "HOLDFAST_PG_PASSWORD")] - readonly string PostgresPassword = ""; - // HoldFast is a multi-solution monorepo (SDK + e2e scaffolds also carry // .sln/.slnx files), so the subtree search would be ambiguous. Pin explicitly. [Solution("src/dotnet/HoldFast.Backend.slnx")] readonly Solution Solution = null!; @@ -141,7 +137,6 @@ class Build : TampBuild .SetChart(HelmChart) .AddValuesFile(HelmChart / "values.lab.yaml") .SetValue("image.tag", ImageTag) - .SetValue("postgres.auth.password", PostgresPassword) .SetWait(true) .SetAtomic(true) .SetTimeout(TimeSpan.FromMinutes(5)))); diff --git a/infra/helm/holdfast/values.lab.yaml b/infra/helm/holdfast/values.lab.yaml index b5e69f93..cfb995c1 100644 --- a/infra/helm/holdfast/values.lab.yaml +++ b/infra/helm/holdfast/values.lab.yaml @@ -13,6 +13,12 @@ image: postgres: persistence: storageClassName: nfs-va-vm + auth: + # microk8s pre-created the holdfast-postgres Secret with key "password"; + # chart's secret.yaml template is gated on `not .existingSecret` so it + # won't try to overwrite. + existingSecret: holdfast-postgres + passwordKey: password # Public URLs — Cloudflare tunnel terminates TLS at the edge; the backend # itself serves plain HTTP on :8080. diff --git a/infra/helm/holdfast/values.yaml b/infra/helm/holdfast/values.yaml index 07e18700..d02ff6b9 100644 --- a/infra/helm/holdfast/values.yaml +++ b/infra/helm/holdfast/values.yaml @@ -119,7 +119,13 @@ postgres: existingSecret: "" passwordKey: password - podSecurityContext: {} + # Default fsGroup matches `postgres` UID in timescale/timescaledb-ha:pg16 + # (UID 1000). PSA-restricted clusters require this to be set; permissive + # clusters don't care, so this costs nothing and saves an operator who + # adopts a stricter cluster profile from one debug cycle. + # Override if you swap the image to one that runs as a different UID. + podSecurityContext: + fsGroup: 1000 securityContext: {} nodeSelector: {} tolerations: [] From c3bb93d173ff2180cb9e21966765614488a13dad Mon Sep 17 00:00:00 2001 From: BrewingCoder Date: Mon, 11 May 2026 23:23:24 -0400 Subject: [PATCH 07/12] fix(helm): postgres probes use TCP loopback; deploy non-atomic with 10m timeout (HOL-54) First end-to-end deploy attempt surfaced two issues, captured here: 1. **Postgres readiness probe wrong path.** `pg_isready -U postgres` without `-h` defaults to the Unix socket at /var/run/postgresql, which TimescaleDB-HA does not reliably expose. Pod stayed NotReady; event log: `Readiness probe failed: /var/run/postgresql:5432 - no response`. Backend cascaded into CrashLoopBackOff trying to connect. Fix: probe via `-h 127.0.0.1` to force TCP-loopback check through postgres's TCP listener, which is reliably bound regardless of socket configuration. 2. **5m helm timeout too tight for first deploy.** TimescaleDB-HA image is 1.73 GB; first pull on each node is 3-4 minutes. Atomic rollback triggered before postgres could even finish pulling on cold nodes. Bumped DeployQa timeout to 10 minutes for headroom. 3. **Disable --atomic temporarily** so a failed deploy leaves the namespace populated for `kubectl get / logs` post-mortem. Re-enable once the chart has a few clean runs under it. Co-Authored-By: Claude Opus 4.7 --- build/Build.cs | 12 ++++++++---- .../holdfast/templates/postgres-statefulset.yaml | 7 +++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/build/Build.cs b/build/Build.cs index 6fc585a2..27fc701f 100644 --- a/build/Build.cs +++ b/build/Build.cs @@ -14,10 +14,10 @@ class Build : TampBuild Configuration Configuration = IsLocalBuild ? Configuration.Debug : Configuration.Release; [Parameter("Container registry for QA push")] - readonly string Registry = "localhost:32000"; + string Registry = "localhost:32000"; [Parameter("QA hostname (no trailing slash)")] - readonly string QaUrl = "https://holdfast.brewingcoder.com"; + string QaUrl = "https://holdfast.brewingcoder.com"; // HoldFast is a multi-solution monorepo (SDK + e2e scaffolds also carry // .sln/.slnx files), so the subtree search would be ambiguous. Pin explicitly. @@ -128,6 +128,10 @@ class Build : TampBuild // Deploy the chart to the lab cluster. helm upgrade --install is idempotent; // --atomic rolls back automatically on a failed rollout. + // Atomic disabled for now so a failed deploy leaves the cluster state + // around for kubectl inspection. Re-enable once the chart has a few + // green runs under it. Timeout bumped to 10m to give cold image pulls + // (TimescaleDB-HA is 1.73 GB) headroom on first deploy to each node. Target DeployQa => _ => _ .DependsOn(DockerPush) .Executes(() => Helm.Upgrade(HelmTool, s => s @@ -138,8 +142,8 @@ class Build : TampBuild .AddValuesFile(HelmChart / "values.lab.yaml") .SetValue("image.tag", ImageTag) .SetWait(true) - .SetAtomic(true) - .SetTimeout(TimeSpan.FromMinutes(5)))); + .SetAtomic(false) + .SetTimeout(TimeSpan.FromMinutes(10)))); // Post-deploy smoke probe — polls /health/live until it returns 200 or // the timeout elapses. HttpProbe handles transient HttpRequestExceptions diff --git a/infra/helm/holdfast/templates/postgres-statefulset.yaml b/infra/helm/holdfast/templates/postgres-statefulset.yaml index 1d717d7e..36cb3142 100644 --- a/infra/helm/holdfast/templates/postgres-statefulset.yaml +++ b/infra/helm/holdfast/templates/postgres-statefulset.yaml @@ -47,15 +47,18 @@ spec: key: {{ include "holdfast.postgres.secretKey" . }} - name: PGDATA value: {{ .Values.postgres.dataPath | quote }} + # -h 127.0.0.1 forces a TCP-based check via the loopback. The + # default socket-based check looks at /var/run/postgresql which + # TimescaleDB-HA images don't reliably expose. livenessProbe: exec: - command: ["pg_isready", "-U", {{ .Values.postgres.auth.user | quote }}] + command: ["pg_isready", "-U", {{ .Values.postgres.auth.user | quote }}, "-h", "127.0.0.1"] initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 readinessProbe: exec: - command: ["pg_isready", "-U", {{ .Values.postgres.auth.user | quote }}] + command: ["pg_isready", "-U", {{ .Values.postgres.auth.user | quote }}, "-h", "127.0.0.1"] initialDelaySeconds: 5 periodSeconds: 5 timeoutSeconds: 5 From 8e18b27306add01b9883e206ec78168f625852d1 Mon Sep 17 00:00:00 2001 From: BrewingCoder Date: Mon, 11 May 2026 23:36:41 -0400 Subject: [PATCH 08/12] fix(helm): correct backend env-var names + auto-roll on ConfigMap changes (HOL-54) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First end-to-end deploy crashed the backend with `Connection refused (localhost:8123)` from ClickHouseMigrationService.StartAsync. Root cause: ConfigMap was writing env-var names that the .NET host doesn't bind. Three name fixes in templates/configmap.yaml: * STORAGE_ANALYTICS → STORAGE__ANALYTICS .NET configuration uses double-underscore to express nested keys (Storage:Analytics). Single underscore → value never loaded → defaultBackend falls back to "clickhouse" → ClickHouseMigrationService registers → crash on connection refused. The Program.cs gate that skips ClickHouse when Storage:Analytics=Postgres is correct; the chart just wasn't delivering the value. * AUTH_MODE → REACT_APP_AUTH_MODE HoldFast.Shared.Runtime.GoEnvCompat maps REACT_APP_AUTH_MODE to Auth:Mode (legacy Go env-var contract preserved on the .NET side). AUTH_MODE alone is unmapped and silently ignored. * COLLECTOR_OTLP_ENDPOINT → OTEL_EXPORTER_OTLP_ENDPOINT The backend hosts OTLP receivers — it's not an OTLP client to a separate collector. The "OTLP endpoint" value here is for the backend to export its OWN telemetry. OTel SDK convention is OTEL_EXPORTER_OTLP_ENDPOINT. Plus one chart hygiene fix in templates/backend-deployment.yaml: * Add checksum/config annotation to the pod template, computed as sha256sum of configmap.yaml's rendered content. Standard helm idiom — without it, `helm upgrade` of env-only changes silently leaves pods serving with stale config. With it, ConfigMap edits trigger a rolling restart automatically. Co-Authored-By: Claude Opus 4.7 --- .../templates/backend-deployment.yaml | 8 ++++-- infra/helm/holdfast/templates/configmap.yaml | 26 ++++++++++++++++--- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/infra/helm/holdfast/templates/backend-deployment.yaml b/infra/helm/holdfast/templates/backend-deployment.yaml index 8c2fdbad..de10257f 100644 --- a/infra/helm/holdfast/templates/backend-deployment.yaml +++ b/infra/helm/holdfast/templates/backend-deployment.yaml @@ -12,10 +12,14 @@ spec: {{- include "holdfast.backend.selectorLabels" . | nindent 6 }} template: metadata: - {{- with .Values.backend.podAnnotations }} annotations: + # Roll the pod when the backend ConfigMap content changes. Standard + # helm idiom — without this, `helm upgrade` of env-only changes + # silently leaves stale pods serving with the old config. + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.backend.podAnnotations }} {{- toYaml . | nindent 8 }} - {{- end }} + {{- end }} labels: {{- include "holdfast.backend.labels" . | nindent 8 }} {{- with .Values.backend.podLabels }} diff --git a/infra/helm/holdfast/templates/configmap.yaml b/infra/helm/holdfast/templates/configmap.yaml index 7d805b9e..dae08a24 100644 --- a/infra/helm/holdfast/templates/configmap.yaml +++ b/infra/helm/holdfast/templates/configmap.yaml @@ -6,15 +6,33 @@ metadata: labels: {{- include "holdfast.backend.labels" . | nindent 4 }} data: + # Postgres connection — read by HoldFast.Shared.Runtime.GoEnvCompat. + # Single-underscore names preserved from the legacy Go env-var contract. PSQL_HOST: {{ include "holdfast.postgres.host" . | quote }} PSQL_PORT: {{ include "holdfast.postgres.port" . | quote }} PSQL_USER: {{ include "holdfast.postgres.user" . | quote }} - STORAGE_ANALYTICS: {{ .Values.storage.analytics | quote }} + # Storage backend selector — the .NET host reads Configuration["Storage:Analytics"], + # which env-var-binds to STORAGE__ANALYTICS (double underscore is the .NET + # convention for nested config keys). Wrong name → defaultBackend falls back + # to "clickhouse" → ClickHouseMigrationService registers → backend crashes + # trying to connect to localhost:8123. Don't drop the second underscore. + STORAGE__ANALYTICS: {{ .Values.storage.analytics | quote }} + # Frontend URLs — REACT_APP_FRONTEND_URI is the only one the backend reads + # at runtime (via GoEnvCompat → Frontend:Uri). The graph URI vars are baked + # into the frontend bundle at build time and are kept here only as operator + # documentation of intent; they have no runtime effect on the backend. + REACT_APP_FRONTEND_URI: {{ .Values.publicUrl | quote }} REACT_APP_PUBLIC_GRAPH_URI: {{ .Values.publicGraphUri | quote }} REACT_APP_PRIVATE_GRAPH_URI: {{ .Values.privateGraphUri | quote }} - REACT_APP_FRONTEND_URI: {{ .Values.publicUrl | quote }} - COLLECTOR_OTLP_ENDPOINT: {{ .Values.collectorOtlpEndpoint | quote }} - AUTH_MODE: {{ .Values.auth.mode | quote }} + # Auth mode — backend GoEnvCompat maps REACT_APP_AUTH_MODE → Auth:Mode. + # Use REACT_APP_AUTH_MODE (not AUTH_MODE) so the value actually reaches the + # configuration. + REACT_APP_AUTH_MODE: {{ .Values.auth.mode | quote }} + + # OTLP endpoint — for the backend to export its OWN telemetry to (it hosts + # OTLP receivers itself for incoming data). Empty string disables self-tracing. + # The .NET host reads OTEL_EXPORTER_OTLP_ENDPOINT (OTel SDK convention). + OTEL_EXPORTER_OTLP_ENDPOINT: {{ .Values.collectorOtlpEndpoint | quote }} From fe953ae21bf16054b91cf58b348d9b8c91b5257b Mon Sep 17 00:00:00 2001 From: BrewingCoder Date: Mon, 11 May 2026 23:49:07 -0400 Subject: [PATCH 09/12] fix(helm): backend listens on 8082 not 8080; OTel self-export gated (HOL-54) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Third end-to-end deploy attempt put the backend past the ClickHouse crash but into a probe-port mismatch crashloop. Root cause: Dockerfile sets `ENV ASPNETCORE_URLS=http://+:8082` (line 137) and `EXPOSE 8082`, so Kestrel binds on 8082 — but my chart hardcoded 8080 throughout. Backend log captured "Now listening on: http://[::]:8082" → readiness probe on 8080 → connection refused → kubelet liveness-failure-kill → restart loop. Image is fine; chart was lying about the port. Fixes: * values.yaml — backend.service.port 8080 → 8082, probes ports → 8082, with a comment that points future readers at the Dockerfile so the bind port stays the single source of truth. * backend-deployment.yaml — containerPort 8080 → 8082. NB: this is a coordinated change with the cluster operator — the Cloudflare tunnel rule on the microk8s side previously routed to :8080 and needs to update to :8082 before external traffic resolves. Internal helm install proceeds independently. Plus one observability hygiene fix: * configmap.yaml — gate OTEL_EXPORTER_OTLP_ENDPOINT on non-empty. The third deploy logged the backend self-exporting metrics to https://holdfast.brewingcoder.com/otel and getting 502 from CF edge. Not crashing the app but adding noise. The "OTLP endpoint" in HoldFast's context is for incoming receivers (hosted in the backend itself), not for the backend to ship its own traces outbound; the latter is opt-in and operators may not want it. * values.lab.yaml — collectorOtlpEndpoint = "" disables self-export in QA. Operators wanting backend-traces-elsewhere set it. Co-Authored-By: Claude Opus 4.7 --- infra/helm/holdfast/templates/backend-deployment.yaml | 2 +- infra/helm/holdfast/templates/configmap.yaml | 6 +++++- infra/helm/holdfast/values.lab.yaml | 6 +++++- infra/helm/holdfast/values.yaml | 11 ++++++----- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/infra/helm/holdfast/templates/backend-deployment.yaml b/infra/helm/holdfast/templates/backend-deployment.yaml index de10257f..ae53f732 100644 --- a/infra/helm/holdfast/templates/backend-deployment.yaml +++ b/infra/helm/holdfast/templates/backend-deployment.yaml @@ -45,7 +45,7 @@ spec: {{- end }} ports: - name: http - containerPort: 8080 + containerPort: 8082 protocol: TCP envFrom: - configMapRef: diff --git a/infra/helm/holdfast/templates/configmap.yaml b/infra/helm/holdfast/templates/configmap.yaml index dae08a24..1999c6c1 100644 --- a/infra/helm/holdfast/templates/configmap.yaml +++ b/infra/helm/holdfast/templates/configmap.yaml @@ -33,6 +33,10 @@ data: REACT_APP_AUTH_MODE: {{ .Values.auth.mode | quote }} # OTLP endpoint — for the backend to export its OWN telemetry to (it hosts - # OTLP receivers itself for incoming data). Empty string disables self-tracing. + # the OTLP receivers itself for incoming data, that's separate). Gate on + # non-empty so empty-string disables self-tracing entirely; setting it to a + # URL the backend can't reach produces noisy 5xx errors with no value. # The .NET host reads OTEL_EXPORTER_OTLP_ENDPOINT (OTel SDK convention). + {{- if .Values.collectorOtlpEndpoint }} OTEL_EXPORTER_OTLP_ENDPOINT: {{ .Values.collectorOtlpEndpoint | quote }} + {{- end }} diff --git a/infra/helm/holdfast/values.lab.yaml b/infra/helm/holdfast/values.lab.yaml index cfb995c1..33318056 100644 --- a/infra/helm/holdfast/values.lab.yaml +++ b/infra/helm/holdfast/values.lab.yaml @@ -25,7 +25,11 @@ postgres: publicUrl: https://holdfast.brewingcoder.com publicGraphUri: https://holdfast.brewingcoder.com/public privateGraphUri: https://holdfast.brewingcoder.com/private -collectorOtlpEndpoint: https://holdfast.brewingcoder.com/otel +# Self-export of backend telemetry: disabled in QA. The backend hosts its +# own OTLP receivers for incoming data; self-tracing to the public hostname +# 502s through Cloudflare and adds noise without value. Set to a real endpoint +# in operator deployments that want backend traces shipped elsewhere. +collectorOtlpEndpoint: "" # Auth handled at the CF Access edge in the lab; app stays in dev mode. auth: diff --git a/infra/helm/holdfast/values.yaml b/infra/helm/holdfast/values.yaml index d02ff6b9..32a9ce72 100644 --- a/infra/helm/holdfast/values.yaml +++ b/infra/helm/holdfast/values.yaml @@ -28,11 +28,12 @@ serviceAccount: backend: replicaCount: 1 - # The Service name microk8s's Cloudflare tunnel points at must stay stable. - # Don't rename without coordinating with the cluster operator. + # Backend Kestrel binds on 8082 (see infra/docker/backend-dotnet.Dockerfile + # `ENV ASPNETCORE_URLS=http://+:8082`). If you change the bind port, update + # this AND the cluster operator's tunnel/ingress rule. service: type: ClusterIP - port: 8080 + port: 8082 annotations: {} resources: @@ -51,7 +52,7 @@ backend: livenessProbe: httpGet: path: /health/live - port: 8080 + port: 8082 initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 @@ -60,7 +61,7 @@ backend: readinessProbe: httpGet: path: /health/ready - port: 8080 + port: 8082 initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 5 From c48c1e9e2215c806c499b300c95975331f79f49f Mon Sep 17 00:00:00 2001 From: BrewingCoder Date: Mon, 11 May 2026 23:51:42 -0400 Subject: [PATCH 10/12] fix(helm): probe /health not /health/live (backend has no live/ready split) (HOL-54) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backend's Program.cs uses app.MapHealthChecks("/health") — single endpoint, no /live or /ready paths. My chart was probing /health/live, /health/ready, and SmokeQa was hitting /health/live too. All three "passed" because the backend serves a React SPA from wwwroot with a fallback that returns index.html (HTTP 200) for unmapped paths — so the probes were lying. Actual /health returns plain-text "Healthy" and is what we should be hitting. Fixes: * values.yaml — liveness + readiness probes path: /health/{live,ready} → /health, with a comment about the SPA-fallback trap so future readers don't fall back into it. * Build.cs — SmokeQa probes /health instead of /health/live. Verified locally via `kubectl port-forward svc/holdfast-backend 18082:8082` plus `curl http://localhost:18082/health` → "Healthy" (200). The /health/live URL on the running pod returns the SPA bundle's index.html (also 200 — which is exactly why the lie was so quiet). Co-Authored-By: Claude Opus 4.7 --- build/Build.cs | 5 ++++- infra/helm/holdfast/values.yaml | 9 +++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/build/Build.cs b/build/Build.cs index 27fc701f..a0ce1c16 100644 --- a/build/Build.cs +++ b/build/Build.cs @@ -148,10 +148,13 @@ class Build : TampBuild // Post-deploy smoke probe — polls /health/live until it returns 200 or // the timeout elapses. HttpProbe handles transient HttpRequestExceptions // and per-request timeouts as expected during pod warmup. + // Backend's MapHealthChecks lands on /health (single endpoint, no + // live/ready split). Don't append /live or /ready — those fall through + // the SPA fallback to index.html (HTTP 200) and lie about health. Target SmokeQa => _ => _ .DependsOn(DeployQa) .Executes(async () => await HttpProbe.WaitForHealthy( - url: $"{QaUrl}/health/live", + url: $"{QaUrl}/health", timeout: TimeSpan.FromMinutes(2))); // ── CI entry ───────────────────────────────────────────────────── diff --git a/infra/helm/holdfast/values.yaml b/infra/helm/holdfast/values.yaml index 32a9ce72..7e2af521 100644 --- a/infra/helm/holdfast/values.yaml +++ b/infra/helm/holdfast/values.yaml @@ -49,9 +49,14 @@ backend: podSecurityContext: {} securityContext: {} + # Backend exposes a single /health endpoint (Program.cs uses + # app.MapHealthChecks("/health") — no live/ready split). Probing nested + # paths like /health/live falls through to the SPA index.html fallback + # which returns 200 → probes "pass" for the wrong reason. Don't change + # the path without re-checking what's actually mapped server-side. livenessProbe: httpGet: - path: /health/live + path: /health port: 8082 initialDelaySeconds: 30 periodSeconds: 10 @@ -60,7 +65,7 @@ backend: readinessProbe: httpGet: - path: /health/ready + path: /health port: 8082 initialDelaySeconds: 10 periodSeconds: 5 From 87b706cfb341bc86c1db12ac84365c68289786e9 Mon Sep 17 00:00:00 2001 From: BrewingCoder Date: Mon, 11 May 2026 23:59:05 -0400 Subject: [PATCH 11/12] docs: CHANGELOG-FORK entry for Tamp pipeline + Helm chart (HOL-54) Captures the build/deploy work that lands in this branch: Tamp build script targets, helm chart surface, what's preserved alongside (compose hobby flow still works), and the cutover-criterion proof against the lab cluster. Co-Authored-By: Claude Opus 4.7 --- docs/CHANGELOG-FORK.md | 78 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/docs/CHANGELOG-FORK.md b/docs/CHANGELOG-FORK.md index d9703fde..f6489425 100644 --- a/docs/CHANGELOG-FORK.md +++ b/docs/CHANGELOG-FORK.md @@ -1,5 +1,83 @@ # BrewingCoder Fork — Changelog +## 2026-05-12: Build + Deploy on Tamp; Helm Chart for Self-Hosted Operators (HOL-54) + +Replaced ad-hoc `dotnet`/`yarn`/`docker` shell scripting with a Tamp-driven +build pipeline, and added a published-shape Helm chart that operators can +consume directly. Net change: **17 files added, ~1,036 lines** (no deletions). + +### Added: Tamp build pipeline (`build/Build.cs` + `build/Build.csproj`) + +A .NET 10 console project under `/build` defines the entire pipeline as +typed targets against [Tamp](https://github.com/tamp-build/tamp). Surface: + +| Target | Action | +|---|---| +| `Info` | Print configuration / solution / git / image tag context | +| `Clean` | `CleanArtifacts()` — Solution.Projects scope only, no globbing | +| `Restore` | `dotnet restore` on the solution | +| `Compile` | `dotnet build --no-restore` | +| `Test` | `dotnet test --no-build` with per-assembly TRX output | +| `Publish` | `dotnet publish HoldFast.Api → artifacts/publish/HoldFast.Api/` | +| `YarnInstall` | Yarn Berry 4.x workspace install (`--immutable`) | +| `FrontendBuild` | Turbo runs `build:fast` filtered to `@holdfast-io/frontend...` | +| `DockerBuildBackend` | BuildKit-routed `docker build` of the backend image (multi-tag) | +| `DockerPush` | Push registry-prefixed tag to the configured registry | +| `DeployQa` | `helm upgrade --install` against `infra/helm/holdfast/` | +| `SmokeQa` | `HttpProbe.WaitForHealthy(/health)` against the deployed hostname | +| `Ci` | Default — fans out to Test + Publish + FrontendBuild + DockerBuildBackend | + +One-line invocation: `dotnet tamp Ci` or `dotnet tamp SmokeQa --registry `. + +Pinned against the post-Wave-9 Tamp ecosystem (Core 1.3.0, NetCli.V10 1.3.0, +Helm.V3 0.1.0, Http 0.1.1, plus satellite patches). 16 frictions surfaced and +filed during the integration trial; all fixed in coordinated Tamp release +waves. + +### Added: Helm chart at `infra/helm/holdfast/` + +Standard-shape, AGPL-operator-consumable chart for the two-pod deployment. +Renders 7 resources via `helm template`: + +``` +ServiceAccount holdfast +Secret holdfast-postgres (chart-managed OR existingSecret) +ConfigMap holdfast-backend (env: PSQL_*, STORAGE__ANALYTICS, + REACT_APP_FRONTEND_URI, etc.) +Service holdfast-backend :8082 (ClusterIP, named `http`) +Service holdfast-postgres :5432 (ClusterIP, internal only) +Deployment holdfast-backend (1 replica, /health probes) +StatefulSet holdfast-postgres (1 replica, volumeClaimTemplate) +``` + +Operator-facing defaults in `values.yaml` (community-idiomatic). Lab-cluster +overrides in `values.lab.yaml` (storage class, registry, hostnames, existing +Secret reference). README + NOTES.txt document required values and the +`auth.mode=dev → front with a zero-trust proxy` operator guidance. + +### Removed: Nothing + +This change is purely additive. The existing `docker compose -f compose.yml +-f compose.hobby-dotnet.yml up` hobby workflow still works unchanged; Tamp +runs side-by-side. CI/CD workflows remain disabled per the rewrite-stabilization +directive; flipping them to invoke `dotnet tamp Ci` is a follow-up. + +### Cutover criterion (proven against the lab cluster) + +`dotnet tamp SmokeQa --registry registry.home.local` from any developer +laptop or in-cluster ARC runner: + +1. Builds the backend image via BuildKit (multi-stage frontend + backend) +2. Pushes to `registry.home.local/holdfast-backend-dotnet:` +3. `helm upgrade --install` against `infra/helm/holdfast/` with + `values.lab.yaml` overrides +4. Polls `https://holdfast.brewingcoder.com/health` until 200 Healthy + +Steady-state full run: **3.3 seconds** (cache-warm). First run: ~14 minutes +(cold image pull on each node + Postgres init on NFS). + +--- + ## 2026-03-18: Strip Marketing, Lead-Gen, and SaaS Billing Removed all components that served Highlight's SaaS business but have no value for self-hosted deployments. **1,056 files changed — ~82,800 lines deleted.** From ed88809516faa631e5ffbfd2389642bc17ec0614 Mon Sep 17 00:00:00 2001 From: BrewingCoder Date: Wed, 13 May 2026 14:44:47 -0400 Subject: [PATCH 12/12] feat(tamp): wire compliance + coverage + codegen satellites (HOL-54) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wave 2 of the Tamp cutover. Adds six new satellite wrappers and bumps core+wrapper pins to current versions: Tamp.Core 1.3.0 -> 1.7.0 (TAMP001-004 analyzers, async overloads) Tamp.NetCli.V10 1.3.0 -> 1.4.0 Tamp.Turbo.V2 0.2.0 -> 0.2.1 Tamp.Docker.V27 0.3.0 -> 0.3.1 New satellites + targets: Tamp.Syft / Tamp.Grype -> SbomScan + CveGate + Compliance Tamp.TruffleHog.V3 -> SecretScan Tamp.GraphQLCodegen.V5 -> FrontendCodegen Tamp.Coverlet.V6 + Tamp.ReportGenerator.V5 -> CoverageTest + CoverageReport Optional-flagged the new tool injections so `dotnet tamp --list` works on machines without syft/grype/trufflehog/reportgenerator/graphql-codegen installed — they only fail when the relevant target is actually invoked. Compliance (SBOM + CVE + secret scan) is deliberately not in Ci so the fast iteration path stays fast; release-prep runs `dotnet tamp Compliance` on demand. TAMP001 caught a real bug in CoverageTest while authoring this (dropped DotNet.Test plan inside a multi-statement Executes lambda) — the analyzer paid for itself in this single session. Co-Authored-By: Claude Opus 4.7 --- build/Build.cs | 127 +++++++++++++++++++++++++++++++++++++++++++-- build/Build.csproj | 26 ++++++++-- 2 files changed, 144 insertions(+), 9 deletions(-) diff --git a/build/Build.cs b/build/Build.cs index a0ce1c16..90d0a3ce 100644 --- a/build/Build.cs +++ b/build/Build.cs @@ -5,6 +5,12 @@ using Tamp.Docker.V27; using Tamp.Helm.V3; using Tamp.Http; +using Tamp.GraphQLCodegen.V5; +using Tamp.Coverlet.V6; +using Tamp.ReportGenerator.V5; +using Tamp.Syft; +using Tamp.Grype; +using Tamp.TruffleHog.V3; class Build : TampBuild { @@ -19,6 +25,9 @@ class Build : TampBuild [Parameter("QA hostname (no trailing slash)")] string QaUrl = "https://holdfast.brewingcoder.com"; + [Parameter("Override the computed image tag (defaults to short git SHA)")] + string? ImageTagOverride = null; + // HoldFast is a multi-solution monorepo (SDK + e2e scaffolds also carry // .sln/.slnx files), so the subtree search would be ambiguous. Pin explicitly. [Solution("src/dotnet/HoldFast.Backend.slnx")] readonly Solution Solution = null!; @@ -26,14 +35,30 @@ class Build : TampBuild [FromPath("yarn")] readonly Tool YarnTool = null!; [FromPath("helm")] readonly Tool HelmTool = null!; + // Compliance + coverage tools are operator-installed (one tool per axis; + // see Tamp's Module Catalog). Marked Optional so the target surface + // enumerates on machines without them — invocation will surface a + // targeted error then, not a global injection failure. + [FromPath("syft", Optional = true)] readonly Tool SyftTool = null!; + [FromPath("grype", Optional = true)] readonly Tool GrypeTool = null!; + [FromPath("trufflehog", Optional = true)] readonly Tool TruffleHogTool = null!; + [FromPath("reportgenerator", Optional = true)] readonly Tool ReportGeneratorTool = null!; [FromNodeModules("turbo")] readonly Tool TurboTool = null!; + [FromNodeModules("graphql-codegen", Optional = true)] readonly Tool GraphQLCodegenTool = null!; AbsolutePath Artifacts => RootDirectory / "artifacts"; AbsolutePath PublishDir => Artifacts / "publish" / "HoldFast.Api"; + AbsolutePath CoverageDir => Artifacts / "coverage"; + AbsolutePath CoverageReportDir => Artifacts / "coverage-report"; + AbsolutePath Sbom => Artifacts / $"holdfast-{Version}.cdx.json"; AbsolutePath HelmChart => RootDirectory / "infra" / "helm" / "holdfast"; - // Image tag = short git SHA. Canonical version lives in Chart.yaml.appVersion. - string ImageTag => Git!.Commit[..7]; + // Image tag = short git SHA (CLI override wins). Canonical version lives + // in Chart.yaml.appVersion. GitVersion-derived semver is the future state + // but Tamp.GitVersion.V6 0.1.1 doesn't ship the [GitVersion] injection + // attribute yet — friction filed to airm5; revisit when that lands. + string Version => ImageTagOverride ?? Git!.Commit[..7]; + string ImageTag => Version; string LocalImageRef => $"holdfast-backend-dotnet:{ImageTag}"; string RegistryImageRef => $"{Registry}/holdfast-backend-dotnet:{ImageTag}"; @@ -47,6 +72,7 @@ class Build : TampBuild Console.WriteLine($" Artifacts: {Artifacts}"); Console.WriteLine($" Git branch: {Git?.Branch}"); Console.WriteLine($" Git commit: {Git?.Commit}"); + Console.WriteLine($" Version: {Version}"); Console.WriteLine($" Image tag: {ImageTag}"); Console.WriteLine($" Registry ref: {RegistryImageRef}"); Console.WriteLine($" QA URL: {QaUrl}"); @@ -75,6 +101,43 @@ class Build : TampBuild .SetResultsDirectory(Artifacts / "test-results") .AddLogger("trx;LogFileName=test-results.trx"))); + // Coverage variant of Test — collects XPlat Code Coverage via the + // standard data collector. Coverlet config built via the satellite's + // Configure(...) helper, then handed to dotnet test as a runsettings + // file. Kept separate from Test so the fast Ci path doesn't pay + // coverage overhead on every run. + Target CoverageTest => _ => _ + .DependsOn(Compile) + .Executes(() => + { + var runSettings = Artifacts / "coverlet.runsettings"; + System.IO.Directory.CreateDirectory(Artifacts); + var xml = Coverlet.Configure(s => s + .AddFormat(CoverletFormat.OpenCover) + .AddExclude("[xunit.*]*") + .AddExclude("[*.Tests]*") + .SetUseSourceLink(true)).ToRunSettingsXml(); + System.IO.File.WriteAllText(runSettings, xml); + + return DotNet.Test(s => s + .SetProject(Solution.Path) + .SetConfiguration(Configuration) + .SetNoBuild(true) + .SetNoRestore(true) + .SetResultsDirectory(CoverageDir) + .SetSettings(runSettings) + .AddLogger("trx;LogFileName=test-results.trx")); + }); + + Target CoverageReport => _ => _ + .DependsOn(CoverageTest) + .Executes(() => ReportGenerator.Run(ReportGeneratorTool, s => s + .AddReport(CoverageDir / "**" / "coverage.opencover.xml") + .SetTargetDir(CoverageReportDir) + .AddReportType("Html") + .AddReportType("Badges") + .AddReportType("MarkdownSummaryGithub"))); + // CleanArtifacts(): framework-provided safe wipe — Solution.Projects only, // self-deletion guarded. Never use RootDirectory.GlobDirectories("**/bin") // — that's the friction-#12 footgun. @@ -95,6 +158,16 @@ class Build : TampBuild Target YarnInstall => _ => _ .Executes(() => Yarn.Install(YarnTool, s => s.SetImmutable(true))); + // Regenerate GraphQL TypeScript types from src/backend/private-graph schema. + // Generated files are checked in (src/frontend/src/graph/generated/) so + // day-to-day frontend work doesn't have to wait on codegen — this target + // runs on demand when *.gql or schema.graphqls drift. + Target FrontendCodegen => _ => _ + .DependsOn(YarnInstall) + .Executes(() => GraphQLCodegen.Generate(GraphQLCodegenTool, s => s + .SetWorkingDirectory(RootDirectory / "src" / "frontend") + .SetConfig("codegen.yml"))); + // Workspace-local turbo only exists after YarnInstall populates // node_modules/.bin/turbo, so this DependsOn is mandatory. Target FrontendBuild => _ => _ @@ -124,6 +197,47 @@ class Build : TampBuild .Executes(() => Docker.Push(s => s .SetImage(RegistryImageRef))); + // ── Supply chain ───────────────────────────────────────────────── + + // CycloneDX SBOM for the whole repo. Excludes the transitively-vendored + // node_modules / bin / obj noise so the SBOM reflects first-order deps + // an operator actually has to defend. Output is consumed by CveGate. + Target SbomScan => _ => _ + .Executes(() => Syft.Scan(SyftTool, s => s + .SetDirectorySource(RootDirectory) + .SetSourceName("HoldFast") + .SetSourceVersion(Version) + .AddOutputCycloneDxJson(Sbom) + .AddExcludes("**/node_modules/**", "**/bin/**", "**/obj/**"))); + + // CVE gate — reads the SBOM, hits NVD + GitHub Advisory DB + KEV, applies + // EPSS-weighted composite risk scoring. Fails the build on >= high + // severity. Adopters tune severity via --fail-on on the CLI. + Target CveGate => _ => _ + .DependsOn(SbomScan) + .Executes(() => Grype.Scan(GrypeTool, s => s + .SetSbomSource(Sbom) + .AddOutputJson() + .SetOutputFile(Artifacts / "vulns.json") + .SetFailOn("high") + .SetSortBy("risk") + .SetByCve(true))); + + // Secret scan — TruffleHog over the filesystem. Verified-only so unverified + // pattern matches (often false positives in test fixtures) don't flap the + // build. Run as part of Compliance, not Ci, because verification hits live + // endpoints (slower than the no-network analyzers). + Target SecretScan => _ => _ + .Executes(() => TruffleHog.Filesystem(TruffleHogTool, s => s + .AddPath(RootDirectory) + .SetOnlyVerified(true) + .SetFail(true))); + + // Aggregate compliance gate — `dotnet tamp Compliance` runs the full + // supply-chain triplet for a release-prep snapshot. + Target Compliance => _ => _ + .DependsOn(SbomScan, CveGate, SecretScan); + // ── Deploy ────────────────────────────────────────────────────── // Deploy the chart to the lab cluster. helm upgrade --install is idempotent; @@ -145,9 +259,9 @@ class Build : TampBuild .SetAtomic(false) .SetTimeout(TimeSpan.FromMinutes(10)))); - // Post-deploy smoke probe — polls /health/live until it returns 200 or - // the timeout elapses. HttpProbe handles transient HttpRequestExceptions - // and per-request timeouts as expected during pod warmup. + // Post-deploy smoke probe — polls /health until it returns 200 or the + // timeout elapses. HttpProbe handles transient HttpRequestExceptions and + // per-request timeouts as expected during pod warmup. // Backend's MapHealthChecks lands on /health (single endpoint, no // live/ready split). Don't append /live or /ready — those fall through // the SPA fallback to index.html (HTTP 200) and lie about health. @@ -161,6 +275,9 @@ class Build : TampBuild // `dotnet tamp` (no args) runs the full verification + artifact pipeline. // Tamp.Core 1.3.0's params Target[] overload makes the fan-out one-liner. + // Compliance (SBOM + CVE + secret scan) is deliberately NOT in Ci — it's + // a release-prep step run separately so iteration on the fast path stays + // fast. `dotnet tamp Compliance` runs it on demand. Target Ci => _ => _ .Default() .DependsOn(Test, Publish, FrontendBuild, DockerBuildBackend); diff --git a/build/Build.csproj b/build/Build.csproj index 35c9ba6c..e0b09cd4 100644 --- a/build/Build.csproj +++ b/build/Build.csproj @@ -10,14 +10,32 @@ - - + + + + + + + + - + - + + + + + + + + + + + +