From b0f7b380467f6e4e546181ffd6724f2a0ef10c9a Mon Sep 17 00:00:00 2001 From: cplieger Date: Mon, 8 Jun 2026 20:21:26 +0200 Subject: [PATCH 1/6] refactor: use health.DefaultPath instead of hardcoded marker literal --- go.mod | 2 +- go.sum | 16 ++-------------- health.go | 8 +++----- 3 files changed, 6 insertions(+), 20 deletions(-) diff --git a/go.mod b/go.mod index 5da0559..d1e92d2 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,6 @@ require pgregory.net/rapid v1.3.0 require ( github.com/cplieger/atomicfile v1.1.0 - github.com/cplieger/health v1.0.2 + github.com/cplieger/health v1.1.0 golang.org/x/sync v0.21.0 ) diff --git a/go.sum b/go.sum index e212684..bd88919 100644 --- a/go.sum +++ b/go.sum @@ -1,21 +1,9 @@ github.com/coder/websocket v1.8.14 h1:9L0p0iKiNOibykf283eHkKUHHrpG7f65OE3BhhO7v9g= github.com/coder/websocket v1.8.14/go.mod h1:NX3SzP+inril6yawo5CQXx8+fk145lPDC6pumgx0mVg= -github.com/cplieger/atomicfile v1.0.0 h1:a5OI9LWwQT2rYwBv2HB632dM0ktroA/o1d3eXDbQqtI= -github.com/cplieger/atomicfile v1.0.0/go.mod h1:V9XmBWRC+keTJQk7I4pMGNXJBt5MMcoXrZUFKQR6PnE= -github.com/cplieger/atomicfile v1.0.1 h1:egWe0ngT8ITYALuRSApSPRs5PeRzRfcCc4FbFl3ABXk= -github.com/cplieger/atomicfile v1.0.1/go.mod h1:V9XmBWRC+keTJQk7I4pMGNXJBt5MMcoXrZUFKQR6PnE= -github.com/cplieger/atomicfile v1.0.3 h1:yeQlIMsGdYQkTEH5KHd4qnhtiT9kwMKRaXTf4R6wufY= -github.com/cplieger/atomicfile v1.0.3/go.mod h1:V9XmBWRC+keTJQk7I4pMGNXJBt5MMcoXrZUFKQR6PnE= github.com/cplieger/atomicfile v1.1.0 h1:lXTD6YVLAz72j8kTkHJElUo1PeYOhZxUAhzkvQa00H8= github.com/cplieger/atomicfile v1.1.0/go.mod h1:V9XmBWRC+keTJQk7I4pMGNXJBt5MMcoXrZUFKQR6PnE= -github.com/cplieger/health v1.0.0 h1:mevbZo10XtMAzfWaYoi6UrcGooFgHJvlhIJwbNQIVoM= -github.com/cplieger/health v1.0.0/go.mod h1:INjkN9qJnR14X5AFqX6ymFaHvx51djYgk2oQb/gJ9u8= -github.com/cplieger/health v1.0.1 h1:ZzOL6vEVOncHU2i2XDVg4G4v654vFaoCuE+Wvp0mXF0= -github.com/cplieger/health v1.0.1/go.mod h1:INjkN9qJnR14X5AFqX6ymFaHvx51djYgk2oQb/gJ9u8= -github.com/cplieger/health v1.0.2 h1:tS2XtQVQL4TXEAr8u0xCtjOqJFrOtRzD8pzm/l2DbSU= -github.com/cplieger/health v1.0.2/go.mod h1:INjkN9qJnR14X5AFqX6ymFaHvx51djYgk2oQb/gJ9u8= -golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= -golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +github.com/cplieger/health v1.1.0 h1:s//JW6MZtiHLLAhJhD21wxzeqFP94dqV0KMa+zJRTWM= +github.com/cplieger/health v1.1.0/go.mod h1:INjkN9qJnR14X5AFqX6ymFaHvx51djYgk2oQb/gJ9u8= golang.org/x/sync v0.21.0 h1:HLII4xRRTtCRkxYp4HNFF0Js/Og6q2i++KXbg0gHCwM= golang.org/x/sync v0.21.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= pgregory.net/rapid v1.3.0 h1:vBvO0VSqti75J1jjYqpgPNBLKMd1+gxa9fYo7vk/Exc= diff --git a/health.go b/health.go index 5202a49..e07522a 100644 --- a/health.go +++ b/health.go @@ -2,11 +2,9 @@ package main import "github.com/cplieger/health" -// healthMarkerPath is the default marker location. Docker healthchecks -// stat this path; the app creates and removes it at lifecycle points. -// /tmp is conventional because strict-tier compose services mount -// /tmp as tmpfs (see base.yaml -strict templates). -const healthMarkerPath = "/tmp/.healthy" +// healthMarkerPath is the marker location, sourced from the library so +// the literal lives in exactly one place. +const healthMarkerPath = health.DefaultPath // healthMarker wraps *health.Marker for backward-compatible usage in main. type healthMarker = health.Marker From 70db7b90894f622db81df76da2281635091aa8de Mon Sep 17 00:00:00 2001 From: Plieg Date: Mon, 8 Jun 2026 19:07:22 +0200 Subject: [PATCH 2/6] fix: treat unconfirmed cache dir-fsync as a warning, not a save failure atomicfile now returns *WriteError{PhaseDirSync} when the parent-directory fsync fails after a successful rename. cache.json is reconstructible (it is rebuilt from Plex on the next run), so a durability-only failure should not fail the cache write. SaveTo now logs a warning and returns nil on PhaseDirSync, while still propagating genuine write failures. Depends on atomicfile v1.1.0 (PhaseDirSync). CI is red until Renovate bumps the atomicfile dependency after that release; merge only after the bump lands. --- internal/cache/cache.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/internal/cache/cache.go b/internal/cache/cache.go index 48300bd..55a439b 100644 --- a/internal/cache/cache.go +++ b/internal/cache/cache.go @@ -121,7 +121,18 @@ func (c *Cache) SaveTo(path string) error { } if err := atomicfile.WriteFile(context.Background(), path, data, atomicfile.WithMode(0o600)); err != nil { - return err + // cache.json is reconstructible: a parent-dir fsync failure + // (PhaseDirSync) means the cache was written to disk but its + // durability across an immediate crash is not guaranteed. Don't + // fail the save for that — the data is present and would be rebuilt + // from Plex on the next run anyway. Surface it as a warning instead. + var we *atomicfile.WriteError + if errors.As(err, &we) && we.Phase == atomicfile.PhaseDirSync { + slog.Warn("cache written but parent-dir fsync unconfirmed; not guaranteed durable across an immediate crash", + "path", path, "error", err) + } else { + return err + } } slog.Debug("cache saved", "path", path, "bytes", len(data)) return nil From 31f0e255e609d168ce18ab31b38dcad8ccd34dc0 Mon Sep 17 00:00:00 2001 From: cplieger Date: Wed, 10 Jun 2026 10:46:30 +0200 Subject: [PATCH 3/6] refactor: deepen atomicfile use (SaveBytes, LoadJSON, stale-temp sweep, bounded CA read) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - SaveTo: WriteFile -> SaveBytes (lands on atomicfile's reapable temp scheme so the CleanupStaleTemps below is effective), keeping 70db7b9's PhaseDirSync tolerance — a durability-only dir-fsync failure stays a warning since cache.json is rebuilt from Plex on the next run. - LoadFrom: os.Open + io.LimitReader + ReadAll + Unmarshal core -> LoadJSON, keeping the missing-file=fresh-start and permissive-mode warning. - main: CleanupStaleTemps(/config, 1h) at startup to reap a crash-orphaned cache temp. - plex client: CA-cert os.ReadFile -> atomicfile.ReadBounded (1 MiB cap). --- internal/cache/cache.go | 41 ++++++++++++------------------------ internal/cache/cache_test.go | 10 +++++++-- internal/plex/client.go | 5 +++-- main.go | 5 +++++ 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/internal/cache/cache.go b/internal/cache/cache.go index 55a439b..ad59f83 100644 --- a/internal/cache/cache.go +++ b/internal/cache/cache.go @@ -13,7 +13,6 @@ import ( "encoding/json" "errors" "fmt" - "io" "log/slog" "os" "sync" @@ -68,7 +67,7 @@ func New() *Cache { } // LoadFrom reads the cache from the given path. A missing file returns nil -// (fresh start). Capped at maxCacheSize bytes via io.LimitReader. Warns if +// (fresh start). Capped at maxCacheSize bytes via atomicfile.LoadJSON. Warns if // the file has permissive mode bits set, as tokens are stored here. func (c *Cache) LoadFrom(path string) error { c.mu.Lock() @@ -77,33 +76,21 @@ func (c *Cache) LoadFrom(path string) error { c.data.LanguageProfiles = make(map[string]map[string]string) c.data.UserTokens = make(map[string]string) - f, err := os.Open(path) - if err != nil { - if errors.Is(err, os.ErrNotExist) { - return nil - } - return err - } - defer f.Close() - - if info, statErr := f.Stat(); statErr == nil { - if info.Mode().Perm()&0o077 != 0 { - slog.Warn("cache file has permissive mode; user tokens may be "+ - "readable by other host users", - "path", path, - "mode", info.Mode().Perm().String()) + info, statErr := os.Stat(path) + if statErr != nil { + if errors.Is(statErr, os.ErrNotExist) { + return nil // missing file = fresh start } + return statErr } - - data, err := io.ReadAll(io.LimitReader(f, maxCacheSize)) - if err != nil { - return err - } - if int64(len(data)) >= maxCacheSize { - slog.Warn("cache file at size limit, may be truncated", - "path", path, "bytes", len(data), "limit", maxCacheSize) + if info.Mode().Perm()&0o077 != 0 { + slog.Warn("cache file has permissive mode; user tokens may be "+ + "readable by other host users", + "path", path, "mode", info.Mode().Perm().String()) } - return json.Unmarshal(data, &c.data) + // LoadJSON bounds the read at maxCacheSize and unmarshals; an oversize + // file returns an error (caller starts fresh) rather than truncating. + return atomicfile.LoadJSON(context.Background(), path, maxCacheSize, &c.data) } // SaveTo atomically writes the cache to the given path (temp file + rename) @@ -120,7 +107,7 @@ func (c *Cache) SaveTo(path string) error { return fmt.Errorf("marshal: %w", err) } - if err := atomicfile.WriteFile(context.Background(), path, data, atomicfile.WithMode(0o600)); err != nil { + if err := atomicfile.SaveBytes(path, data, 0o600); err != nil { // cache.json is reconstructible: a parent-dir fsync failure // (PhaseDirSync) means the cache was written to disk but its // durability across an immediate crash is not guaranteed. Don't diff --git a/internal/cache/cache_test.go b/internal/cache/cache_test.go index de2a262..5a3c093 100644 --- a/internal/cache/cache_test.go +++ b/internal/cache/cache_test.go @@ -518,10 +518,16 @@ func TestCacheSaveToEnforces0600Permissions(t *testing.T) { func TestCacheSaveToRejectsBadDir(t *testing.T) { t.Parallel() + // Parent is a regular file, so MkdirAll fails with ENOTDIR even as root; + // SaveTo (via atomicfile.SaveBytes, which auto-creates the dir) must error. + f := filepath.Join(t.TempDir(), "afile") + if err := os.WriteFile(f, []byte("x"), 0o600); err != nil { + t.Fatalf("setup: %v", err) + } c := New() - err := c.SaveTo("/nonexistent/path-never-created/cache.json") + err := c.SaveTo(filepath.Join(f, "subdir", "cache.json")) if err == nil { - t.Fatal("SaveTo() on bad dir should return error, got nil") + t.Fatal("SaveTo() under a file should return error, got nil") } } diff --git a/internal/plex/client.go b/internal/plex/client.go index 6eae168..94f97b9 100644 --- a/internal/plex/client.go +++ b/internal/plex/client.go @@ -11,9 +11,10 @@ import ( "log/slog" "net/http" "net/url" - "os" "strings" "time" + + "github.com/cplieger/atomicfile" ) // maxResponseBody caps the number of bytes read from any single Plex JSON @@ -134,7 +135,7 @@ func newHTTPClient(caCertPath string) (*http.Client, error) { }, } if caCertPath != "" { - pemBytes, err := os.ReadFile(caCertPath) + pemBytes, err := atomicfile.ReadBounded(context.Background(), caCertPath, 1<<20) if err != nil { return nil, fmt.Errorf("reading PLEX_CA_CERT_PATH=%q: %w", caCertPath, err) } diff --git a/main.go b/main.go index 21ea0b7..b0e84b0 100644 --- a/main.go +++ b/main.go @@ -20,10 +20,12 @@ import ( "log/slog" "os" "os/signal" + "path/filepath" "sync" "syscall" "time" + "github.com/cplieger/atomicfile" "github.com/cplieger/plex-language-sync/internal/api" "github.com/cplieger/plex-language-sync/internal/cache" "github.com/cplieger/plex-language-sync/internal/ignore" @@ -101,6 +103,9 @@ func run() int { if err := c.LoadFrom(cachePath); err != nil { slog.Warn("cache load failed, starting fresh", "error", err) } + // Reap any temp orphaned by an interrupted SaveTo so they don't accumulate + // on the persistent /config volume. + atomicfile.CleanupStaleTemps(filepath.Dir(cachePath), time.Hour) // User manager — admin identity + cached shared-user tokens. um := users.NewManager(c) From b3ca21ef773425249ca640abbd0ce0f123658206 Mon Sep 17 00:00:00 2001 From: cplieger Date: Wed, 10 Jun 2026 14:58:06 +0200 Subject: [PATCH 4/6] refactor: drop health.go wrapper, call health library directly The app-side health.go was 28 LOC of single-call passthroughs from a pre-library era. The library API is stable and idiomatic; direct calls in main.go are clearer. Tests for the wrapper duplicated upstream coverage and are removed. From integration audit (matrix review). --- health.go | 28 ---------- health_test.go | 137 ------------------------------------------------- main.go | 5 +- 3 files changed, 3 insertions(+), 167 deletions(-) delete mode 100644 health.go delete mode 100644 health_test.go diff --git a/health.go b/health.go deleted file mode 100644 index e07522a..0000000 --- a/health.go +++ /dev/null @@ -1,28 +0,0 @@ -package main - -import "github.com/cplieger/health" - -// healthMarkerPath is the marker location, sourced from the library so -// the literal lives in exactly one place. -const healthMarkerPath = health.DefaultPath - -// healthMarker wraps *health.Marker for backward-compatible usage in main. -type healthMarker = health.Marker - -// newHealthMarker constructs a marker for path and probes the parent -// directory for writability. On failure it logs a single Warn with a -// fix hint and returns a marker in degraded mode. -func newHealthMarker(path string) *healthMarker { - return health.NewMarker(path) -} - -// runProbe runs in the separate `health` subcommand process. -func runProbe(path string) { - health.RunProbe(path) -} - -// probeCheck implements the health-probe decision without calling -// os.Exit, so it can be unit-tested. -func probeCheck(path string) int { - return health.ProbeCheck(path) -} diff --git a/health_test.go b/health_test.go deleted file mode 100644 index c1a5bcc..0000000 --- a/health_test.go +++ /dev/null @@ -1,137 +0,0 @@ -package main - -import ( - "os" - "path/filepath" - "testing" - - "pgregory.net/rapid" -) - -// TestHealthMarker_SetCreatesAndRemoves covers the happy path: a writable -// dir, Set(true) creates the marker, Set(false) removes it. -func TestHealthMarker_SetCreatesAndRemoves(t *testing.T) { - path := filepath.Join(t.TempDir(), ".healthy") - m := newHealthMarker(path) - - m.Set(true) - if _, err := os.Stat(path); err != nil { - t.Fatalf("marker should exist after Set(true): %v", err) - } - - m.Set(false) - if _, err := os.Stat(path); !os.IsNotExist(err) { - t.Fatalf("marker should not exist after Set(false): %v", err) - } -} - -// TestHealthMarker_Cleanup confirms Cleanup removes the marker and is -// safe to call when the marker already does not exist. -func TestHealthMarker_Cleanup(t *testing.T) { - path := filepath.Join(t.TempDir(), ".healthy") - m := newHealthMarker(path) - - m.Set(true) - m.Cleanup() - if _, err := os.Stat(path); !os.IsNotExist(err) { - t.Fatalf("marker should be gone after Cleanup: %v", err) - } - - // Second cleanup must not error. - m.Cleanup() -} - -// TestHealthMarker_DegradedMode verifies that when the marker directory -// is not writable, the marker enters degraded mode: Set and Cleanup are -// no-ops and no file is ever created. -func TestHealthMarker_DegradedMode(t *testing.T) { - // Create a read-only directory to simulate a compose misconfiguration. - dir := filepath.Join(t.TempDir(), "ro") - if err := os.Mkdir(dir, 0o500); err != nil { - t.Fatalf("mkdir ro: %v", err) - } - - path := filepath.Join(dir, ".healthy") - m := newHealthMarker(path) - - // In a non-writable dir, Set(true) should not create the file. - // If the env bypasses directory mode (root, certain filesystems), skip. - m.Set(true) - if _, err := os.Stat(path); err == nil { - t.Skip("test environment bypasses directory mode; skipping") - } - - m.Cleanup() // must not panic -} - -// TestHealthMarker_Idempotent ensures repeated Set(true) and Set(false) -// calls are safe and converge to the expected file state. -func TestHealthMarker_Idempotent(t *testing.T) { - path := filepath.Join(t.TempDir(), ".healthy") - m := newHealthMarker(path) - - for range 3 { - m.Set(true) - } - if _, err := os.Stat(path); err != nil { - t.Fatalf("marker should exist after repeated Set(true): %v", err) - } - - for range 3 { - m.Set(false) - } - if _, err := os.Stat(path); !os.IsNotExist(err) { - t.Fatalf("marker should not exist after repeated Set(false): %v", err) - } -} - -// TestHealthMarker_Property exercises arbitrary Set sequences and asserts -// that the file state always matches the last Set argument. -func TestHealthMarker_Property(t *testing.T) { - dir := t.TempDir() - rapid.Check(t, func(rt *rapid.T) { - // A fresh subdir per iteration so markers from earlier iterations - // don't leak into later ones. - nonce := rapid.StringMatching(`[a-z0-9]{8}`).Draw(rt, "nonce") - subdir := filepath.Join(dir, nonce) - if err := os.Mkdir(subdir, 0o755); err != nil { - rt.Fatalf("mkdir subdir: %v", err) - } - path := filepath.Join(subdir, ".healthy") - m := newHealthMarker(path) - - calls := rapid.SliceOfN(rapid.Bool(), 1, 30).Draw(rt, "calls") - for _, ok := range calls { - m.Set(ok) - } - last := calls[len(calls)-1] - - _, err := os.Stat(path) - exists := err == nil - if exists != last { - rt.Fatalf("after Set(%v): exists=%v, want %v", - last, exists, last) - } - }) -} - -// TestProbeCheck_Writable confirms the probe succeeds when the marker exists. -func TestProbeCheck_Writable(t *testing.T) { - path := filepath.Join(t.TempDir(), ".healthy") - m := newHealthMarker(path) - m.Set(true) - - if code := probeCheck(path); code != 0 { - t.Fatalf("probeCheck with marker present: got %d, want 0", code) - } -} - -// TestProbeCheck_Missing confirms the probe fails when marker is absent -// in a writable directory. -func TestProbeCheck_Missing(t *testing.T) { - path := filepath.Join(t.TempDir(), ".healthy") - - if code := probeCheck(path); code != 1 { - t.Fatalf("probeCheck with marker absent: got %d, want 1", code) - } -} diff --git a/main.go b/main.go index b0e84b0..04b6992 100644 --- a/main.go +++ b/main.go @@ -26,6 +26,7 @@ import ( "time" "github.com/cplieger/atomicfile" + "github.com/cplieger/health" "github.com/cplieger/plex-language-sync/internal/api" "github.com/cplieger/plex-language-sync/internal/cache" "github.com/cplieger/plex-language-sync/internal/ignore" @@ -57,7 +58,7 @@ const shutdownWaitBudget = 10 * time.Second func main() { if len(os.Args) > 1 && os.Args[1] == "health" { - runProbe(healthMarkerPath) + health.RunProbe(health.DefaultPath) } os.Exit(run()) } @@ -69,7 +70,7 @@ func run() int { cfg := loadConfig() logConfig(&cfg) - marker := newHealthMarker(healthMarkerPath) + marker := health.NewMarker(health.DefaultPath) marker.Set(false) client, err := plex.NewClient(cfg.plexURL, cfg.plexToken, cfg.caCertPath) From 6849c1eba554c9c22134b433d21e37ba82fc2bfe Mon Sep 17 00:00:00 2001 From: cplieger Date: Wed, 10 Jun 2026 19:07:24 +0200 Subject: [PATCH 5/6] refactor: name the CA-cert read-cap constant Replace the bare 1<<20 cap on the PLEX_CA_CERT_PATH bounded read with a named maxCACertSize const, matching the maxSecretSize convention elsewhere. --- internal/plex/client.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/plex/client.go b/internal/plex/client.go index 94f97b9..67a542d 100644 --- a/internal/plex/client.go +++ b/internal/plex/client.go @@ -135,7 +135,8 @@ func newHTTPClient(caCertPath string) (*http.Client, error) { }, } if caCertPath != "" { - pemBytes, err := atomicfile.ReadBounded(context.Background(), caCertPath, 1<<20) + const maxCACertSize = 1 << 20 // 1 MB + pemBytes, err := atomicfile.ReadBounded(context.Background(), caCertPath, maxCACertSize) if err != nil { return nil, fmt.Errorf("reading PLEX_CA_CERT_PATH=%q: %w", caCertPath, err) } From 89c327185753f0a7d1e0a14f76a2f520f42deca7 Mon Sep 17 00:00:00 2001 From: cplieger Date: Wed, 10 Jun 2026 19:14:57 +0200 Subject: [PATCH 6/6] refactor: collapse the shutdown defers into one ordered sequence The three shutdown defers (Cleanup, cache-save, Set(false)) encoded their required order implicitly via LIFO, and Set(false) + Cleanup both removed the health marker. Merge them into a single deferred shutdown routine that reads top-to-bottom in execution order: flag unhealthy, then save the cache. Set(false) already removes the marker, so the separate Cleanup is dropped. Behaviour is unchanged. --- main.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/main.go b/main.go index 04b6992..0adb735 100644 --- a/main.go +++ b/main.go @@ -118,19 +118,19 @@ func run() int { um.InitialRefreshWithRetry(ctx, client, identity.MachineIdentifier, users.DefaultRefreshConfig()) marker.Set(true) - defer marker.Cleanup() - // A failed cache save on shutdown loses the latest learned language - // profiles and user tokens — treat as Error (operator-actionable) - // rather than the Warn used for transient mid-run save failures. + // Shutdown sequence: flag unhealthy first so Docker stops routing health + // probes as passing while the (slow) cache save runs, then persist the + // cache. Set(false) removes the marker, so no separate Cleanup is needed. + // A failed save here loses the latest learned language profiles and user + // tokens, so it is logged at Error (operator-actionable), not the Warn used + // for transient mid-run save failures. defer func() { + marker.Set(false) if err := c.SaveTo(cachePath); err != nil { slog.Error("cache save on shutdown failed, profiles may be lost", "path", cachePath, "error", err) } }() - // Signal unhealthy immediately on shutdown so Docker stops routing - // health probes as passing while cache save / cleanup run. - defer marker.Set(false) // Compose the sync and scheduler subsystems from the concrete // internal/* packages, passing api.* interfaces so the subsystems