diff --git a/go.sum b/go.sum index e3994b6..d39d305 100644 --- a/go.sum +++ b/go.sum @@ -1,25 +1,9 @@ github.com/coder/websocket v1.8.14 h1:9L0p0iKiNOibykf283eHkKUHHrpG7f65OE3BhhO7v9g= github.com/coder/websocket v1.8.14/go.mod h1:NX3SzP+inril6yawo5CQXx8+fk145lPDC6pumgx0mVg= -github.com/cplieger/atomicfile v1.0.0 h1:a5OI9LWwQT2rYwBv2HB632dM0ktroA/o1d3eXDbQqtI= -github.com/cplieger/atomicfile v1.0.0/go.mod h1:V9XmBWRC+keTJQk7I4pMGNXJBt5MMcoXrZUFKQR6PnE= -github.com/cplieger/atomicfile v1.0.1 h1:egWe0ngT8ITYALuRSApSPRs5PeRzRfcCc4FbFl3ABXk= -github.com/cplieger/atomicfile v1.0.1/go.mod h1:V9XmBWRC+keTJQk7I4pMGNXJBt5MMcoXrZUFKQR6PnE= -github.com/cplieger/atomicfile v1.0.3 h1:yeQlIMsGdYQkTEH5KHd4qnhtiT9kwMKRaXTf4R6wufY= -github.com/cplieger/atomicfile v1.0.3/go.mod h1:V9XmBWRC+keTJQk7I4pMGNXJBt5MMcoXrZUFKQR6PnE= -github.com/cplieger/atomicfile v1.1.0 h1:lXTD6YVLAz72j8kTkHJElUo1PeYOhZxUAhzkvQa00H8= -github.com/cplieger/atomicfile v1.1.0/go.mod h1:V9XmBWRC+keTJQk7I4pMGNXJBt5MMcoXrZUFKQR6PnE= github.com/cplieger/atomicfile v1.2.0 h1:EootDaL3Y4yjxTQiImcgmUXI+E8qtIR0jXDfHxfEZGo= github.com/cplieger/atomicfile v1.2.0/go.mod h1:V9XmBWRC+keTJQk7I4pMGNXJBt5MMcoXrZUFKQR6PnE= -github.com/cplieger/health v1.0.0 h1:mevbZo10XtMAzfWaYoi6UrcGooFgHJvlhIJwbNQIVoM= -github.com/cplieger/health v1.0.0/go.mod h1:INjkN9qJnR14X5AFqX6ymFaHvx51djYgk2oQb/gJ9u8= -github.com/cplieger/health v1.0.1 h1:ZzOL6vEVOncHU2i2XDVg4G4v654vFaoCuE+Wvp0mXF0= -github.com/cplieger/health v1.0.1/go.mod h1:INjkN9qJnR14X5AFqX6ymFaHvx51djYgk2oQb/gJ9u8= -github.com/cplieger/health v1.0.2 h1:tS2XtQVQL4TXEAr8u0xCtjOqJFrOtRzD8pzm/l2DbSU= -github.com/cplieger/health v1.0.2/go.mod h1:INjkN9qJnR14X5AFqX6ymFaHvx51djYgk2oQb/gJ9u8= github.com/cplieger/health v1.1.0 h1:s//JW6MZtiHLLAhJhD21wxzeqFP94dqV0KMa+zJRTWM= github.com/cplieger/health v1.1.0/go.mod h1:INjkN9qJnR14X5AFqX6ymFaHvx51djYgk2oQb/gJ9u8= -golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= -golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sync v0.21.0 h1:HLII4xRRTtCRkxYp4HNFF0Js/Og6q2i++KXbg0gHCwM= golang.org/x/sync v0.21.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= pgregory.net/rapid v1.3.0 h1:vBvO0VSqti75J1jjYqpgPNBLKMd1+gxa9fYo7vk/Exc= diff --git a/health.go b/health.go deleted file mode 100644 index 5202a49..0000000 --- a/health.go +++ /dev/null @@ -1,30 +0,0 @@ -package main - -import "github.com/cplieger/health" - -// healthMarkerPath is the default marker location. Docker healthchecks -// stat this path; the app creates and removes it at lifecycle points. -// /tmp is conventional because strict-tier compose services mount -// /tmp as tmpfs (see base.yaml -strict templates). -const healthMarkerPath = "/tmp/.healthy" - -// healthMarker wraps *health.Marker for backward-compatible usage in main. -type healthMarker = health.Marker - -// newHealthMarker constructs a marker for path and probes the parent -// directory for writability. On failure it logs a single Warn with a -// fix hint and returns a marker in degraded mode. -func newHealthMarker(path string) *healthMarker { - return health.NewMarker(path) -} - -// runProbe runs in the separate `health` subcommand process. -func runProbe(path string) { - health.RunProbe(path) -} - -// probeCheck implements the health-probe decision without calling -// os.Exit, so it can be unit-tested. -func probeCheck(path string) int { - return health.ProbeCheck(path) -} diff --git a/health_test.go b/health_test.go deleted file mode 100644 index c1a5bcc..0000000 --- a/health_test.go +++ /dev/null @@ -1,137 +0,0 @@ -package main - -import ( - "os" - "path/filepath" - "testing" - - "pgregory.net/rapid" -) - -// TestHealthMarker_SetCreatesAndRemoves covers the happy path: a writable -// dir, Set(true) creates the marker, Set(false) removes it. -func TestHealthMarker_SetCreatesAndRemoves(t *testing.T) { - path := filepath.Join(t.TempDir(), ".healthy") - m := newHealthMarker(path) - - m.Set(true) - if _, err := os.Stat(path); err != nil { - t.Fatalf("marker should exist after Set(true): %v", err) - } - - m.Set(false) - if _, err := os.Stat(path); !os.IsNotExist(err) { - t.Fatalf("marker should not exist after Set(false): %v", err) - } -} - -// TestHealthMarker_Cleanup confirms Cleanup removes the marker and is -// safe to call when the marker already does not exist. -func TestHealthMarker_Cleanup(t *testing.T) { - path := filepath.Join(t.TempDir(), ".healthy") - m := newHealthMarker(path) - - m.Set(true) - m.Cleanup() - if _, err := os.Stat(path); !os.IsNotExist(err) { - t.Fatalf("marker should be gone after Cleanup: %v", err) - } - - // Second cleanup must not error. - m.Cleanup() -} - -// TestHealthMarker_DegradedMode verifies that when the marker directory -// is not writable, the marker enters degraded mode: Set and Cleanup are -// no-ops and no file is ever created. -func TestHealthMarker_DegradedMode(t *testing.T) { - // Create a read-only directory to simulate a compose misconfiguration. - dir := filepath.Join(t.TempDir(), "ro") - if err := os.Mkdir(dir, 0o500); err != nil { - t.Fatalf("mkdir ro: %v", err) - } - - path := filepath.Join(dir, ".healthy") - m := newHealthMarker(path) - - // In a non-writable dir, Set(true) should not create the file. - // If the env bypasses directory mode (root, certain filesystems), skip. - m.Set(true) - if _, err := os.Stat(path); err == nil { - t.Skip("test environment bypasses directory mode; skipping") - } - - m.Cleanup() // must not panic -} - -// TestHealthMarker_Idempotent ensures repeated Set(true) and Set(false) -// calls are safe and converge to the expected file state. -func TestHealthMarker_Idempotent(t *testing.T) { - path := filepath.Join(t.TempDir(), ".healthy") - m := newHealthMarker(path) - - for range 3 { - m.Set(true) - } - if _, err := os.Stat(path); err != nil { - t.Fatalf("marker should exist after repeated Set(true): %v", err) - } - - for range 3 { - m.Set(false) - } - if _, err := os.Stat(path); !os.IsNotExist(err) { - t.Fatalf("marker should not exist after repeated Set(false): %v", err) - } -} - -// TestHealthMarker_Property exercises arbitrary Set sequences and asserts -// that the file state always matches the last Set argument. -func TestHealthMarker_Property(t *testing.T) { - dir := t.TempDir() - rapid.Check(t, func(rt *rapid.T) { - // A fresh subdir per iteration so markers from earlier iterations - // don't leak into later ones. - nonce := rapid.StringMatching(`[a-z0-9]{8}`).Draw(rt, "nonce") - subdir := filepath.Join(dir, nonce) - if err := os.Mkdir(subdir, 0o755); err != nil { - rt.Fatalf("mkdir subdir: %v", err) - } - path := filepath.Join(subdir, ".healthy") - m := newHealthMarker(path) - - calls := rapid.SliceOfN(rapid.Bool(), 1, 30).Draw(rt, "calls") - for _, ok := range calls { - m.Set(ok) - } - last := calls[len(calls)-1] - - _, err := os.Stat(path) - exists := err == nil - if exists != last { - rt.Fatalf("after Set(%v): exists=%v, want %v", - last, exists, last) - } - }) -} - -// TestProbeCheck_Writable confirms the probe succeeds when the marker exists. -func TestProbeCheck_Writable(t *testing.T) { - path := filepath.Join(t.TempDir(), ".healthy") - m := newHealthMarker(path) - m.Set(true) - - if code := probeCheck(path); code != 0 { - t.Fatalf("probeCheck with marker present: got %d, want 0", code) - } -} - -// TestProbeCheck_Missing confirms the probe fails when marker is absent -// in a writable directory. -func TestProbeCheck_Missing(t *testing.T) { - path := filepath.Join(t.TempDir(), ".healthy") - - if code := probeCheck(path); code != 1 { - t.Fatalf("probeCheck with marker absent: got %d, want 1", code) - } -} diff --git a/internal/cache/cache.go b/internal/cache/cache.go index 48300bd..ad59f83 100644 --- a/internal/cache/cache.go +++ b/internal/cache/cache.go @@ -13,7 +13,6 @@ import ( "encoding/json" "errors" "fmt" - "io" "log/slog" "os" "sync" @@ -68,7 +67,7 @@ func New() *Cache { } // LoadFrom reads the cache from the given path. A missing file returns nil -// (fresh start). Capped at maxCacheSize bytes via io.LimitReader. Warns if +// (fresh start). Capped at maxCacheSize bytes via atomicfile.LoadJSON. Warns if // the file has permissive mode bits set, as tokens are stored here. func (c *Cache) LoadFrom(path string) error { c.mu.Lock() @@ -77,33 +76,21 @@ func (c *Cache) LoadFrom(path string) error { c.data.LanguageProfiles = make(map[string]map[string]string) c.data.UserTokens = make(map[string]string) - f, err := os.Open(path) - if err != nil { - if errors.Is(err, os.ErrNotExist) { - return nil - } - return err - } - defer f.Close() - - if info, statErr := f.Stat(); statErr == nil { - if info.Mode().Perm()&0o077 != 0 { - slog.Warn("cache file has permissive mode; user tokens may be "+ - "readable by other host users", - "path", path, - "mode", info.Mode().Perm().String()) + info, statErr := os.Stat(path) + if statErr != nil { + if errors.Is(statErr, os.ErrNotExist) { + return nil // missing file = fresh start } + return statErr } - - data, err := io.ReadAll(io.LimitReader(f, maxCacheSize)) - if err != nil { - return err + if info.Mode().Perm()&0o077 != 0 { + slog.Warn("cache file has permissive mode; user tokens may be "+ + "readable by other host users", + "path", path, "mode", info.Mode().Perm().String()) } - if int64(len(data)) >= maxCacheSize { - slog.Warn("cache file at size limit, may be truncated", - "path", path, "bytes", len(data), "limit", maxCacheSize) - } - return json.Unmarshal(data, &c.data) + // LoadJSON bounds the read at maxCacheSize and unmarshals; an oversize + // file returns an error (caller starts fresh) rather than truncating. + return atomicfile.LoadJSON(context.Background(), path, maxCacheSize, &c.data) } // SaveTo atomically writes the cache to the given path (temp file + rename) @@ -120,8 +107,19 @@ func (c *Cache) SaveTo(path string) error { return fmt.Errorf("marshal: %w", err) } - if err := atomicfile.WriteFile(context.Background(), path, data, atomicfile.WithMode(0o600)); err != nil { - return err + if err := atomicfile.SaveBytes(path, data, 0o600); err != nil { + // cache.json is reconstructible: a parent-dir fsync failure + // (PhaseDirSync) means the cache was written to disk but its + // durability across an immediate crash is not guaranteed. Don't + // fail the save for that — the data is present and would be rebuilt + // from Plex on the next run anyway. Surface it as a warning instead. + var we *atomicfile.WriteError + if errors.As(err, &we) && we.Phase == atomicfile.PhaseDirSync { + slog.Warn("cache written but parent-dir fsync unconfirmed; not guaranteed durable across an immediate crash", + "path", path, "error", err) + } else { + return err + } } slog.Debug("cache saved", "path", path, "bytes", len(data)) return nil diff --git a/internal/cache/cache_test.go b/internal/cache/cache_test.go index de2a262..5a3c093 100644 --- a/internal/cache/cache_test.go +++ b/internal/cache/cache_test.go @@ -518,10 +518,16 @@ func TestCacheSaveToEnforces0600Permissions(t *testing.T) { func TestCacheSaveToRejectsBadDir(t *testing.T) { t.Parallel() + // Parent is a regular file, so MkdirAll fails with ENOTDIR even as root; + // SaveTo (via atomicfile.SaveBytes, which auto-creates the dir) must error. + f := filepath.Join(t.TempDir(), "afile") + if err := os.WriteFile(f, []byte("x"), 0o600); err != nil { + t.Fatalf("setup: %v", err) + } c := New() - err := c.SaveTo("/nonexistent/path-never-created/cache.json") + err := c.SaveTo(filepath.Join(f, "subdir", "cache.json")) if err == nil { - t.Fatal("SaveTo() on bad dir should return error, got nil") + t.Fatal("SaveTo() under a file should return error, got nil") } } diff --git a/internal/plex/client.go b/internal/plex/client.go index 6eae168..67a542d 100644 --- a/internal/plex/client.go +++ b/internal/plex/client.go @@ -11,9 +11,10 @@ import ( "log/slog" "net/http" "net/url" - "os" "strings" "time" + + "github.com/cplieger/atomicfile" ) // maxResponseBody caps the number of bytes read from any single Plex JSON @@ -134,7 +135,8 @@ func newHTTPClient(caCertPath string) (*http.Client, error) { }, } if caCertPath != "" { - pemBytes, err := os.ReadFile(caCertPath) + const maxCACertSize = 1 << 20 // 1 MB + pemBytes, err := atomicfile.ReadBounded(context.Background(), caCertPath, maxCACertSize) if err != nil { return nil, fmt.Errorf("reading PLEX_CA_CERT_PATH=%q: %w", caCertPath, err) } diff --git a/main.go b/main.go index 21ea0b7..0adb735 100644 --- a/main.go +++ b/main.go @@ -20,10 +20,13 @@ import ( "log/slog" "os" "os/signal" + "path/filepath" "sync" "syscall" "time" + "github.com/cplieger/atomicfile" + "github.com/cplieger/health" "github.com/cplieger/plex-language-sync/internal/api" "github.com/cplieger/plex-language-sync/internal/cache" "github.com/cplieger/plex-language-sync/internal/ignore" @@ -55,7 +58,7 @@ const shutdownWaitBudget = 10 * time.Second func main() { if len(os.Args) > 1 && os.Args[1] == "health" { - runProbe(healthMarkerPath) + health.RunProbe(health.DefaultPath) } os.Exit(run()) } @@ -67,7 +70,7 @@ func run() int { cfg := loadConfig() logConfig(&cfg) - marker := newHealthMarker(healthMarkerPath) + marker := health.NewMarker(health.DefaultPath) marker.Set(false) client, err := plex.NewClient(cfg.plexURL, cfg.plexToken, cfg.caCertPath) @@ -101,6 +104,9 @@ func run() int { if err := c.LoadFrom(cachePath); err != nil { slog.Warn("cache load failed, starting fresh", "error", err) } + // Reap any temp orphaned by an interrupted SaveTo so they don't accumulate + // on the persistent /config volume. + atomicfile.CleanupStaleTemps(filepath.Dir(cachePath), time.Hour) // User manager — admin identity + cached shared-user tokens. um := users.NewManager(c) @@ -112,19 +118,19 @@ func run() int { um.InitialRefreshWithRetry(ctx, client, identity.MachineIdentifier, users.DefaultRefreshConfig()) marker.Set(true) - defer marker.Cleanup() - // A failed cache save on shutdown loses the latest learned language - // profiles and user tokens — treat as Error (operator-actionable) - // rather than the Warn used for transient mid-run save failures. + // Shutdown sequence: flag unhealthy first so Docker stops routing health + // probes as passing while the (slow) cache save runs, then persist the + // cache. Set(false) removes the marker, so no separate Cleanup is needed. + // A failed save here loses the latest learned language profiles and user + // tokens, so it is logged at Error (operator-actionable), not the Warn used + // for transient mid-run save failures. defer func() { + marker.Set(false) if err := c.SaveTo(cachePath); err != nil { slog.Error("cache save on shutdown failed, profiles may be lost", "path", cachePath, "error", err) } }() - // Signal unhealthy immediately on shutdown so Docker stops routing - // health probes as passing while cache save / cleanup run. - defer marker.Set(false) // Compose the sync and scheduler subsystems from the concrete // internal/* packages, passing api.* interfaces so the subsystems