From a13e77c3191a15f06c99dd7d3f6f8ff7552f9317 Mon Sep 17 00:00:00 2001 From: BrewingCoder Date: Sat, 9 May 2026 12:25:47 -0400 Subject: [PATCH] infra: swap hobby PG to timescaledb-ha + bump CH max_concurrent_queries to 50 (HOL-35) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related infra tweaks surfaced during HOL-26 / HOL-29 prep work. ## TimescaleDB-HA replaces ankane/pgvector Hobby PG image swaps from ankane/pgvector:v0.5.1 to timescale/timescaledb-ha:pg16. The new image bundles: - TimescaleDB community 2.26.4 (Apache 2 base + Timescale License for some features; AGPL-compatible) - pgvector (replaces the existing ankane/pgvector role for embeddings) - timescaledb_toolkit - pg_partman is NOT in this image, but with TimescaleDB available we use drop_chunks for retention instead Volume layout note: timescaledb-ha lays its data dir at /home/postgres/pgdata/data, NOT the upstream /var/lib/postgresql/data. Updated the volume mount to match. Anyone swapping back to a different PG image needs to revert that path too — added a comment in compose.yml flagging this. Live-verified: TimescaleDB extension active, DevSeed populated 5 workspaces / 5 projects in the new image, HOL-26 PG migrations confirmed applicable. ## CH max_concurrent_queries 20 -> 50 HOL-24 set max_concurrent_queries=20 to constrain idle. Real workload exceeded it - even single SHOW CREATE TABLE queries got rejected with "Too many simultaneous queries" during HOL-29 schema-discovery work, because the backend's six worker hosted services keep CH busy with small queries continuously. 50 is still well below the default of 100 but accommodates the actual concurrency. Insert/select sub-caps bumped proportionally (10 -> 25 each). Live-verified: max_concurrent_queries reads 50 from the running CH container after recreate. ## Out of scope - Volume migration tooling for production deployments — the swap drops hobby dev data (DevSeed recreates it on first backend start). Real upgrades need a pg_dump/restore plan. Closes HOL-35. Co-Authored-By: Claude Opus 4.7 --- infra/docker/.env.example | 2 +- infra/docker/backend-dotnet.Dockerfile | 8 +++++++- infra/docker/compose.yml | 12 ++++++++++-- infra/docker/config.xml | 13 +++++++++---- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/infra/docker/.env.example b/infra/docker/.env.example index 0f6fff95..3bd6fa5d 100644 --- a/infra/docker/.env.example +++ b/infra/docker/.env.example @@ -21,7 +21,7 @@ COMPOSE_PATH_SEPARATOR=: # Infra dependencies — pinned for reproducibility. CLICKHOUSE_IMAGE_NAME=clickhouse/clickhouse-server:24.3.15.72-alpine -POSTGRES_IMAGE_NAME=ankane/pgvector:v0.5.1 +POSTGRES_IMAGE_NAME=timescale/timescaledb-ha:pg16 # OTEL collector image vars removed — HOL-21 dropped the collector container. # Backend hosts the OTLP receiver at /otel/v1/{logs,traces,metrics}. diff --git a/infra/docker/backend-dotnet.Dockerfile b/infra/docker/backend-dotnet.Dockerfile index ff090405..8d06e683 100644 --- a/infra/docker/backend-dotnet.Dockerfile +++ b/infra/docker/backend-dotnet.Dockerfile @@ -77,11 +77,17 @@ FROM --platform=$BUILDPLATFORM mcr.microsoft.com/dotnet/sdk:10.0 AS backend-buil WORKDIR /src -# Copy project files first for layer caching +# Copy project files first for layer caching. Every project in the slnx +# must be listed here — `dotnet restore` walks the project graph and needs +# each .csproj to exist on disk before generating the assets file. Adding +# a new project means adding a new line here (HOL-25 added Analytics, +# HOL-26 added Data.Postgres). COPY src/dotnet/HoldFast.Backend.slnx . +COPY src/dotnet/src/HoldFast.Analytics/HoldFast.Analytics.csproj src/HoldFast.Analytics/ COPY src/dotnet/src/HoldFast.Api/HoldFast.Api.csproj src/HoldFast.Api/ COPY src/dotnet/src/HoldFast.Data/HoldFast.Data.csproj src/HoldFast.Data/ COPY src/dotnet/src/HoldFast.Data.ClickHouse/HoldFast.Data.ClickHouse.csproj src/HoldFast.Data.ClickHouse/ +COPY src/dotnet/src/HoldFast.Data.Postgres/HoldFast.Data.Postgres.csproj src/HoldFast.Data.Postgres/ COPY src/dotnet/src/HoldFast.Domain/HoldFast.Domain.csproj src/HoldFast.Domain/ COPY src/dotnet/src/HoldFast.GraphQL.Private/HoldFast.GraphQL.Private.csproj src/HoldFast.GraphQL.Private/ COPY src/dotnet/src/HoldFast.GraphQL.Public/HoldFast.GraphQL.Public.csproj src/HoldFast.GraphQL.Public/ diff --git a/infra/docker/compose.yml b/infra/docker/compose.yml index 9a823607..df253e94 100644 --- a/infra/docker/compose.yml +++ b/infra/docker/compose.yml @@ -15,7 +15,11 @@ services: postgres: logging: *local-logging container_name: postgres - # a postgres image with pgvector installed + # HOL-35: timescale/timescaledb-ha bundles TimescaleDB + pgvector + + # pg_partman, replacing ankane/pgvector. Same PG 16 underneath so + # the data volume is drop-in reusable; first start activates the + # extra extensions once HOL-26's 0003_install_extensions migration + # runs (or analyst opens psql + CREATE EXTENSION manually). image: ${POSTGRES_IMAGE_NAME} restart: on-failure ports: @@ -23,7 +27,11 @@ services: environment: POSTGRES_HOST_AUTH_METHOD: trust volumes: - - postgres-data:/var/lib/postgresql/data + # HOL-35: timescale/timescaledb-ha lays out its data dir at + # /home/postgres/pgdata/data, not the upstream PG default. If + # you swap to a different PG image (e.g. plain postgres:16 or + # ankane/pgvector), revert to /var/lib/postgresql/data. + - postgres-data:/home/postgres/pgdata/data - ../../tools/scripts/migrations/init.sql:/root/init.sql healthcheck: test: ['CMD-SHELL', 'pg_isready -U postgres'] diff --git a/infra/docker/config.xml b/infra/docker/config.xml index c4f88fe8..38f6a0ea 100644 --- a/infra/docker/config.xml +++ b/infra/docker/config.xml @@ -54,10 +54,15 @@ 4 2 - - 20 - 10 - 10 + + 50 + 25 + 25