Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions src/dotnet/src/HoldFast.Api/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,32 @@ req.RequestUri is null ||
// HOL-25: ClickHouseService implements both the legacy IClickHouseService
// and the seven backend-neutral domain stores. Register the singleton once
// and resolve all eight interfaces through it — different callers can hold
// any subset and DI hands back the same instance. When HOL-26+ lands the
// Postgres backend, the ILogStore/etc. registrations swap to the PG impl
// (driven by Storage:Analytics config) without disturbing IClickHouseService
// callers, which migrate to the per-domain interfaces incrementally.
// any subset and DI hands back the same instance.
//
// HOL-29: per-domain backend swap. Each store can be toggled independently
// via Storage:Analytics:<StoreName> config (e.g. Storage:Analytics:LogStore =
// postgres). Default is ClickHouse (matches existing behavior). HOL-34 will
// consolidate this into a single Storage:Analytics top-level switch.
builder.Services.AddSingleton<ClickHouseService>();
builder.Services.AddSingleton<IClickHouseService>(sp => sp.GetRequiredService<ClickHouseService>());
builder.Services.AddSingleton<HoldFast.Analytics.ILogStore>(sp => sp.GetRequiredService<ClickHouseService>());

// PostgresLogStore registered as concrete type so it can be DI-injected
// either as ILogStore (when LogStore=postgres) or directly for tests/health
// checks without forcing it onto every deployment.
builder.Services.AddSingleton<HoldFast.Data.Postgres.PostgresLogStore>();

var logStoreBackend = builder.Configuration["Storage:Analytics:LogStore"] ?? "clickhouse";
if (logStoreBackend.Equals("postgres", StringComparison.OrdinalIgnoreCase))
{
builder.Services.AddSingleton<HoldFast.Analytics.ILogStore>(
sp => sp.GetRequiredService<HoldFast.Data.Postgres.PostgresLogStore>());
}
else
{
builder.Services.AddSingleton<HoldFast.Analytics.ILogStore>(
sp => sp.GetRequiredService<ClickHouseService>());
}

builder.Services.AddSingleton<HoldFast.Analytics.ITraceStore>(sp => sp.GetRequiredService<ClickHouseService>());
builder.Services.AddSingleton<HoldFast.Analytics.ISessionAnalyticsStore>(sp => sp.GetRequiredService<ClickHouseService>());
builder.Services.AddSingleton<HoldFast.Analytics.IErrorAnalyticsStore>(sp => sp.GetRequiredService<ClickHouseService>());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
-- HOL-29: logs hypertable + indexes.
--
-- Mirrors ClickHouse's `default.logs` table from src/backend/clickhouse/migrations/
-- 000006_create_logs_new + 000011 (service_version) + 000060 (environment) + the
-- Source column from one of the Source-adding migrations. Single consolidated
-- final-state DDL — fresh installs don't need the historical churn.
--
-- Partitioning: TimescaleDB hypertable with daily chunks (matches CH's
-- `PARTITION BY toDate(Timestamp)`). Retention policy drops chunks > 30 days
-- old, mirroring CH's `TTL Timestamp + toIntervalDay(30)`.
--
-- log_attributes uses JSONB (vs CH's Map) — better PG ergonomics, GIN-indexable
-- for key/value lookups, and round-trips cleanly to Dictionary<string,string>
-- in the .NET layer via Npgsql's built-in JSONB support.

CREATE TABLE IF NOT EXISTS analytics.logs (
timestamp TIMESTAMPTZ NOT NULL,
uuid UUID NOT NULL,
project_id INTEGER NOT NULL,
trace_id TEXT NOT NULL DEFAULT '',
span_id TEXT NOT NULL DEFAULT '',
secure_session_id TEXT NOT NULL DEFAULT '',
trace_flags INTEGER NOT NULL DEFAULT 0,
severity_text TEXT NOT NULL DEFAULT '',
severity_number INTEGER NOT NULL DEFAULT 0,
source TEXT NOT NULL DEFAULT '',
service_name TEXT NOT NULL DEFAULT '',
service_version TEXT NOT NULL DEFAULT '',
body TEXT NOT NULL DEFAULT '',
log_attributes JSONB NOT NULL DEFAULT '{}'::jsonb,
environment TEXT NOT NULL DEFAULT ''
);

-- TimescaleDB hypertable. The `if_not_exists` flag makes this re-runnable even
-- when no TS extension is present (the function call would fail without
-- TimescaleDB; we let migrations 0003 enable the extension first).
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'timescaledb') THEN
PERFORM create_hypertable(
'analytics.logs',
'timestamp',
chunk_time_interval => INTERVAL '1 day',
if_not_exists => TRUE
);
-- Drop chunks older than 30 days (replaces CH's TTL).
PERFORM add_retention_policy(
'analytics.logs',
INTERVAL '30 days',
if_not_exists => TRUE
);
RAISE NOTICE 'HOL-29: logs hypertable + 30-day retention configured';
ELSE
RAISE NOTICE
'HOL-29: TimescaleDB not installed - logs is a regular table. '
'Retention falls back to in-app DELETE (DataRetentionWorker).';
END IF;
END
$$;

-- Common query indexes. TimescaleDB partitions on timestamp so queries that
-- filter on (project_id, timestamp range) prune chunks before hitting indexes;
-- we still need a btree to support the cursor-paginated read pattern within
-- a chunk.
CREATE INDEX IF NOT EXISTS idx_logs_project_timestamp_uuid
ON analytics.logs (project_id, timestamp DESC, uuid DESC);

-- Trace-id and session-id lookups are common from the dashboard "logs for this
-- trace" / "logs for this session" panels. Partial indexes skip the empty-string
-- defaults so we don't bloat the index with 'no trace' rows.
CREATE INDEX IF NOT EXISTS idx_logs_trace_id
ON analytics.logs (trace_id, project_id, timestamp DESC)
WHERE trace_id <> '';
CREATE INDEX IF NOT EXISTS idx_logs_secure_session_id
ON analytics.logs (secure_session_id, project_id, timestamp DESC)
WHERE secure_session_id <> '';

-- JSONB attribute search via GIN. Supports `log_attributes @> '{"key":"val"}'`
-- and `log_attributes ? 'key'` containment/existence ops.
CREATE INDEX IF NOT EXISTS idx_logs_attributes_gin
ON analytics.logs USING GIN (log_attributes);

COMMENT ON TABLE analytics.logs IS
'Application logs ingested via OTLP and written by HoldFast.Worker.LogIngestionWorker. '
'Hypertable with daily chunks; 30-day retention via TimescaleDB drop_chunks policy. '
'Mirrors ClickHouse default.logs schema for cross-backend parity.';
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
-- HOL-29: log key/value catalog tables.
--
-- These power the dashboard's autocomplete UI for the logs filter:
-- - GetLogKeysAsync returns the distinct attribute keys for a project+date range
-- - GetLogKeyValuesAsync returns the distinct values for a (project, key) pair
--
-- ClickHouse used SummingMergeTree + materialized views to maintain these
-- catalogs. PG's equivalent for hobby scale: small tables that PostgresLogStore
-- upserts into inline during WriteLogsAsync. The trade-off vs continuous
-- aggregates is more work per insert, but that work is bounded by the number of
-- unique (project, key, day) tuples in a batch — small for typical workloads.
-- For high-volume deployments a future PR can swap to TimescaleDB continuous
-- aggregates over `analytics.logs.log_attributes`.

CREATE TABLE IF NOT EXISTS analytics.log_keys (
project_id INTEGER NOT NULL,
key TEXT NOT NULL,
day DATE NOT NULL,
count BIGINT NOT NULL DEFAULT 0,
type TEXT NOT NULL DEFAULT 'String',
PRIMARY KEY (project_id, key, day)
);

CREATE TABLE IF NOT EXISTS analytics.log_key_values (
project_id INTEGER NOT NULL,
key TEXT NOT NULL,
day DATE NOT NULL,
value TEXT NOT NULL,
count BIGINT NOT NULL DEFAULT 0,
PRIMARY KEY (project_id, key, day, value)
);

-- Lookup by (project, key) for the values autocomplete; (project) for the keys
-- autocomplete. Day filtering is handled by the PK leading columns.
CREATE INDEX IF NOT EXISTS idx_log_keys_project_day
ON analytics.log_keys (project_id, day DESC);
CREATE INDEX IF NOT EXISTS idx_log_key_values_project_key_day
ON analytics.log_key_values (project_id, key, day DESC);

COMMENT ON TABLE analytics.log_keys IS
'Log attribute key catalog, populated inline by HOL-29 PostgresLogStore.WriteLogsAsync. '
'Used by GetLogKeysAsync to drive the dashboard logs-filter autocomplete. '
'Mirrors ClickHouse default.log_keys schema.';
COMMENT ON TABLE analytics.log_key_values IS
'Log attribute (key, value) catalog. Same population strategy as log_keys.';
Loading
Loading