diff --git a/AGENTS.md b/AGENTS.md index b60c794..711d355 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -33,7 +33,8 @@ Read this before changing `artifact-fs`. ## Non-obvious CLI/runtime behavior - `ARTIFACT_FS_ROOT` is the state root. `artifact-fs daemon --root` is the mount root. They are different things. -- `add-repo` is one-shot: register repo, clone blobless, build the initial snapshot, then exit. It does not mount FUSE or start background goroutines. +- `add-repo` is one-shot by default: register repo, clone blobless, build the initial snapshot, then exit. It does not mount FUSE or start background goroutines. +- `add-repo --async` only registers prepare state. The daemon mounts a gated placeholder, prepares clone/fetch and snapshot in the background, then opens the gate and starts watcher/refresh. - `daemon` is long-running: it mounts registered repos and starts watcher, refresh, and hydrator workers. - `git.CloneBlobless` already populates the git index with `read-tree HEAD`; be careful about extra index resets because they can discard staged state. diff --git a/README.md b/README.md index 38b5dee..a494531 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ Quick start against a public repo: ```bash export ARTIFACT_FS_ROOT=/tmp/artifact-fs-test -# Register and clone (returns immediately) +# Register, clone, and build the initial snapshot ./artifact-fs add-repo \ --name workers-sdk \ --remote https://github.com/cloudflare/workers-sdk.git \ @@ -102,6 +102,44 @@ Use `--hydration-concurrency` to control the number of parallel blob-fetch worke ./artifact-fs daemon --root /tmp --hydration-concurrency 8 ``` +## Async repo preparation + +By default, `add-repo` waits for the blobless clone and initial snapshot before returning. Use `--async` when the daemon should prepare the repo in the background: + +```bash +./artifact-fs add-repo \ + --name workers-sdk \ + --remote https://github.com/cloudflare/workers-sdk.git \ + --branch main \ + --mount-root /tmp \ + --async +``` + +The daemon mounts a placeholder immediately. Operations inside that repo mount, such as `ls`, `less`, or `git -C /tmp/workers-sdk status`, wait until the clone/fetch and snapshot publish have completed. If preparation fails, those operations return an I/O error until preparation is retried: + +```bash +./artifact-fs status --name workers-sdk +./artifact-fs prepare --name workers-sdk +``` + +Async HTTPS remotes must use ambient credentials, such as a configured Git credential helper or repo-local Git config. Inline credentials in the remote URL are rejected for async repositories. + +For workflows that create the gitdir separately, `--prepared-gitdir` makes the async step fetch and prepare an existing gitdir instead of running `git clone`: + +```bash +git init --separate-git-dir /tmp/workers-sdk.git --initial-branch main /tmp/workers-sdk +git -C /tmp/workers-sdk remote add origin https://github.com/cloudflare/workers-sdk.git + +./artifact-fs add-repo \ + --name workers-sdk \ + --branch main \ + --mount-root /tmp \ + --async \ + --prepared-gitdir \ + --git-dir /tmp/workers-sdk.git \ + --fetch-ref main +``` + ## Sandboxes and Containers [`examples/Dockerfile`](examples/Dockerfile) builds artifact-fs and starts a FUSE-mounted repo inside a container. The container requires `--cap-add SYS_ADMIN --device /dev/fuse` for FUSE access. @@ -129,7 +167,7 @@ On hosts with AppArmor enabled (Ubuntu default), add `--security-opt apparmor:un ## Architecture -ArtifactFS has two distinct phases: a one-shot **setup** (`add-repo`) that performs a fast blobless clone of a repo, and a long-running **daemon** that mounts it via FUSE and serves file operations. +ArtifactFS has two distinct phases: a one-shot **setup** (`add-repo`) that registers and usually prepares a fast blobless clone, and a long-running **daemon** that mounts it via FUSE and serves file operations. With `add-repo --async`, setup only registers the repo; the daemon performs clone/fetch and snapshot publishing while FUSE operations wait behind a readiness gate. ``` ┌─────────────────────────────────────────────────┐ @@ -174,7 +212,7 @@ ArtifactFS has two distinct phases: a one-shot **setup** (`add-repo`) that perfo ### Data flow -1. **Clone** -- `add-repo` runs `git clone --filter=blob:none` (blobless). Only commits, trees, and refs are fetched. No file content is downloaded. +1. **Clone/fetch** -- `add-repo` runs `git clone --filter=blob:none` (blobless) unless `--async` is used. In async mode, the daemon performs either the blobless clone or a fetch into a prepared gitdir. Only commits, trees, and refs are fetched. No file content is downloaded. 2. **Index** -- `git ls-tree -r -t -z HEAD` enumerates every path in the tree. Sizes are resolved locally via `git cat-file --batch-check` with `GIT_NO_LAZY_FETCH=1` to avoid network round-trips. The result is bulk-inserted into a SQLite `base_nodes` table as a new generation. diff --git a/e2e_async_test.go b/e2e_async_test.go new file mode 100644 index 0000000..7f49237 --- /dev/null +++ b/e2e_async_test.go @@ -0,0 +1,232 @@ +//go:build !windows + +package main + +import ( + "context" + "log/slog" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + "github.com/cloudflare/artifact-fs/internal/daemon" + "github.com/cloudflare/artifact-fs/internal/logging" + "github.com/cloudflare/artifact-fs/internal/model" +) + +func TestE2EAsyncPreparedGitDirBlocksUntilReady(t *testing.T) { + if os.Getenv("AFS_RUN_E2E_TESTS") != "1" { + t.Skip("skipping e2e tests (set AFS_RUN_E2E_TESTS=1 to run)") + } + skipIfNoFUSE(t) + + remoteURL := os.Getenv("AFS_E2E_REPO") + if remoteURL == "" { + remoteURL = createLocalTestRepo(t) + } + preparedGitDir, preparedWorktree := createPreparedGitDir(t, remoteURL) + _ = preparedWorktree + + unblock := filepath.Join(t.TempDir(), "unblock-fetch") + installBlockingGitFetchWrapper(t, unblock) + + repo := newAsyncPreparedE2ERepo(t, preparedGitDir, "main") + + waitForCondition(t, 10*time.Second, "async repo preparing", func() (bool, string) { + st, err := repo.svc.Status(context.Background(), repoName) + if err != nil { + return false, err.Error() + } + if st.State == model.PrepareStatePreparing { + return true, "" + } + return false, "state=" + st.State + }) + + done := make(chan error, 1) + go func() { + entries, err := os.ReadDir(repo.mountPath) + if err == nil && len(entries) == 0 { + err = os.ErrNotExist + } + done <- err + }() + + select { + case err := <-done: + t.Fatalf("ReadDir returned before async prepare was released: %v", err) + case <-time.After(500 * time.Millisecond): + } + + if err := os.WriteFile(unblock, []byte("go\n"), 0o644); err != nil { + t.Fatal(err) + } + select { + case err := <-done: + if err != nil { + t.Fatalf("ReadDir after prepare release: %v", err) + } + case <-time.After(30 * time.Second): + t.Fatal("ReadDir did not unblock after async prepare completed") + } + + entries := lsDir(t, repo.mountPath) + assertContains(t, entries, ".git") + assertContains(t, entries, "README.md") + assertGitStatus(t, repo.mountPath, map[string]string{}) +} + +func TestE2EAsyncPreparedGitDirFailureThenRetry(t *testing.T) { + if os.Getenv("AFS_RUN_E2E_TESTS") != "1" { + t.Skip("skipping e2e tests (set AFS_RUN_E2E_TESTS=1 to run)") + } + skipIfNoFUSE(t) + + remoteURL := os.Getenv("AFS_E2E_REPO") + if remoteURL == "" { + remoteURL = createLocalTestRepo(t) + } + preparedGitDir, preparedWorktree := createPreparedGitDir(t, "file://"+filepath.Join(t.TempDir(), "missing.git")) + repo := newAsyncPreparedE2ERepo(t, preparedGitDir, "main") + + waitForCondition(t, 10*time.Second, "async prepare failure", func() (bool, string) { + st, err := repo.svc.Status(context.Background(), repoName) + if err != nil { + return false, err.Error() + } + if st.State == model.PrepareStateFailed && st.PrepareError != "" { + return true, "" + } + return false, "state=" + st.State + " prepare_error=" + st.PrepareError + }) + + if _, err := os.ReadDir(repo.mountPath); err == nil { + t.Fatal("ReadDir unexpectedly succeeded after prepare failure") + } + + gitCmd(t, preparedWorktree, "remote", "set-url", "origin", remoteURL) + if err := repo.svc.Prepare(context.Background(), repoName); err != nil { + t.Fatalf("prepare retry: %v", err) + } + + waitForCondition(t, 30*time.Second, "async prepare retry ready", func() (bool, string) { + st, err := repo.svc.Status(context.Background(), repoName) + if err != nil { + return false, err.Error() + } + if st.State == "mounted" && st.PrepareError == "" { + return true, "" + } + return false, "state=" + st.State + " prepare_error=" + st.PrepareError + }) + + entries := lsDir(t, repo.mountPath) + assertContains(t, entries, "README.md") + assertGitStatus(t, repo.mountPath, map[string]string{}) +} + +func createPreparedGitDir(t *testing.T, remoteURL string) (gitDir string, worktree string) { + t.Helper() + tmp := t.TempDir() + gitDir = filepath.Join(tmp, "prepared.git") + worktree = filepath.Join(tmp, "prepared") + run(t, "", "git", "init", "--separate-git-dir", gitDir, "--initial-branch", "main", worktree) + run(t, worktree, "git", "remote", "add", "origin", remoteURL) + return gitDir, worktree +} + +func installBlockingGitFetchWrapper(t *testing.T, unblockPath string) { + t.Helper() + realGit, err := exec.LookPath("git") + if err != nil { + t.Fatal(err) + } + wrapperDir := t.TempDir() + wrapperPath := filepath.Join(wrapperDir, "git") + script := `#!/bin/sh +for arg in "$@"; do + if [ "$arg" = "fetch" ]; then + while [ ! -f "$AFS_ASYNC_GIT_UNBLOCK" ]; do + sleep 0.05 + done + break + fi +done +exec "$AFS_REAL_GIT" "$@" +` + if err := os.WriteFile(wrapperPath, []byte(script), 0o755); err != nil { + t.Fatal(err) + } + t.Setenv("AFS_REAL_GIT", realGit) + t.Setenv("AFS_ASYNC_GIT_UNBLOCK", unblockPath) + t.Setenv("PATH", wrapperDir+string(os.PathListSeparator)+os.Getenv("PATH")) +} + +func newAsyncPreparedE2ERepo(t *testing.T, preparedGitDir string, fetchRef string) *mountedE2ERepo { + t.Helper() + root, err := os.MkdirTemp("", "artifact-fs-e2e-async-root-*") + if err != nil { + t.Fatal(err) + } + mountDir, err := os.MkdirTemp("", "artifact-fs-e2e-async-mount-*") + if err != nil { + _ = os.RemoveAll(root) + t.Fatal(err) + } + mountPath := filepath.Join(mountDir, repoName) + if err := os.MkdirAll(mountPath, 0o755); err != nil { + _ = os.RemoveAll(mountDir) + _ = os.RemoveAll(root) + t.Fatal(err) + } + + ctx, cancel := context.WithCancel(context.Background()) + logger := logging.NewJSONLogger(os.Stderr, slog.LevelWarn) + svc, err := daemon.New(ctx, root, logger) + if err != nil { + cancel() + t.Fatal(err) + } + svc.SetMountRoot(mountDir) + + cfg := model.RepoConfig{ + Name: repoName, + ID: model.RepoID(repoName), + Branch: "main", + RefreshInterval: 5 * time.Minute, + MountRoot: mountDir, + GitDir: preparedGitDir, + PreparedGitDir: true, + FetchRef: fetchRef, + Enabled: true, + } + if err := svc.AddRepoWithOptions(ctx, cfg, daemon.AddRepoOptions{Async: true}); err != nil { + cancel() + _ = svc.Close() + t.Fatalf("add-repo async prepared-gitdir: %v", err) + } + + errCh := make(chan error, 1) + go func() { errCh <- svc.Start(ctx) }() + + if !waitForMount(t, mountPath, 60*time.Second) { + cancel() + _ = svc.Close() + t.Fatal("FUSE mount did not appear within timeout") + } + + repo := &mountedE2ERepo{ + root: root, + mountDir: mountDir, + mountPath: mountPath, + svc: svc, + cancel: cancel, + errCh: errCh, + } + t.Cleanup(func() { + repo.close(t) + }) + return repo +} diff --git a/e2e_setup_darwin_test.go b/e2e_setup_darwin_test.go index 5b63116..cfc5950 100644 --- a/e2e_setup_darwin_test.go +++ b/e2e_setup_darwin_test.go @@ -8,12 +8,24 @@ import ( "testing" ) -// skipIfNoFUSE skips the test if macFUSE is not installed. +// skipIfNoFUSE skips the test if macFUSE is not installed or the +// mount helper cannot be executed by this test process. Some sandboxed +// environments expose the macFUSE bundle but deny exec of mount_macfuse; +// without this preflight every e2e case waits for its mount timeout. func skipIfNoFUSE(t *testing.T) { t.Helper() if _, err := os.Stat("/Library/Filesystems/macfuse.fs"); err != nil { t.Skip("skipping: macFUSE not installed") } + helper := "/Library/Filesystems/macfuse.fs/Contents/Resources/mount_macfuse" + cmd := exec.Command(helper, "--help") + out, err := cmd.CombinedOutput() + if err == nil { + return + } + if strings.Contains(err.Error(), "operation not permitted") || strings.Contains(string(out), "operation not permitted") { + t.Skipf("skipping: macFUSE mount helper is not executable in this environment: %v", err) + } } // configGitSafeDir adds safe.directory entries for the mount path. diff --git a/internal/auth/redact.go b/internal/auth/redact.go index 8531345..48f76f9 100644 --- a/internal/auth/redact.go +++ b/internal/auth/redact.go @@ -1,6 +1,7 @@ package auth import ( + "net" "net/url" "regexp" "strings" @@ -14,7 +15,10 @@ func RedactRemoteURL(raw string) string { } u, err := url.Parse(raw) if err != nil { - return tokenLike.ReplaceAllString(raw, `$1=REDACTED`) + return redactMalformedURL(raw) + } + if u.User == nil && strings.Contains(raw, "@") && isMalformedHTTPUserinfo(raw, u) { + return redactMalformedURL(raw) } if u.User != nil { username := u.User.Username() @@ -22,26 +26,297 @@ func RedactRemoteURL(raw string) string { u.User = url.User("REDACTED") } } - if u.RawQuery != "" { - u.RawQuery = tokenLike.ReplaceAllString(u.RawQuery, `$1=REDACTED`) + if u.RawQuery != "" || u.ForceQuery { + u.RawQuery = "REDACTED" + } + if u.Fragment != "" || strings.Contains(raw, "#") { + u.Fragment = "REDACTED" } return u.String() } +func redactMalformedURL(raw string) string { + redacted := raw + authorityStart := malformedAuthorityStart(redacted) + if authorityStart < 0 { + return redactQueryFragment(tokenLike.ReplaceAllString(redacted, `$1=REDACTED`)) + } + userinfoEnd := len(redacted) + if relEnd := strings.IndexAny(redacted[authorityStart:], "?#"); relEnd >= 0 { + userinfoEnd = authorityStart + relEnd + } + if at := strings.LastIndex(redacted[authorityStart:userinfoEnd], "@"); at >= 0 { + redacted = redacted[:authorityStart] + "REDACTED" + redacted[authorityStart+at:] + } else if userinfoEnd < len(redacted) && strings.Contains(redacted[userinfoEnd:], "@") && redacted[authorityStart:userinfoEnd] != "" && !strings.Contains(redacted[authorityStart:userinfoEnd], "/") { + redacted = redacted[:authorityStart] + "REDACTED" + redacted[userinfoEnd:] + } + redacted = tokenLike.ReplaceAllString(redacted, `$1=REDACTED`) + return redactQueryFragment(redacted) +} + +func redactQueryFragment(raw string) string { + q := strings.Index(raw, "?") + f := strings.Index(raw, "#") + if q >= 0 && (f < 0 || q < f) { + if f >= 0 { + return raw[:q+1] + "REDACTED#REDACTED" + } + return raw[:q+1] + "REDACTED" + } + if f >= 0 { + return raw[:f+1] + "REDACTED" + } + return raw +} + +func HasInlineCredentials(raw string) bool { + if strings.ContainsAny(raw, "?#") { + return true + } + urlLike := strings.Contains(raw, "://") + u, err := url.Parse(raw) + if err != nil { + if malformedAuthorityStart(raw) >= 0 && strings.Contains(raw, "@") { + return true + } + if urlLike && strings.ContainsAny(raw, "@?#") { + return true + } + return tokenLike.MatchString(raw) + } + if u.User == nil && strings.Contains(raw, "@") && isMalformedHTTPUserinfo(raw, u) { + return true + } + if u.RawQuery != "" || u.ForceQuery || u.Fragment != "" || strings.Contains(raw, "#") { + return true + } + if tokenLike.MatchString(u.RawQuery) { + return true + } + if u.User == nil { + return false + } + username := u.User.Username() + _, hasPassword := u.User.Password() + switch strings.ToLower(u.Scheme) { + case "http", "https": + return username != "" || hasPassword + default: + return hasPassword + } +} + +func malformedAuthorityStart(raw string) int { + lower := strings.ToLower(strings.TrimSpace(raw)) + if i := strings.Index(lower, "://"); i >= 0 { + return i + len("://") + } + for _, prefix := range []string{"https://", "http://", "ssh://", "git://", "https:/", "http:/", "ssh:/", "git:/", "https//", "http//", "ssh//", "git//", "https:", "http:", "ssh:", "git:"} { + if strings.HasPrefix(lower, prefix) { + return len(prefix) + } + } + return -1 +} + +func isHTTPRemoteLike(raw string, scheme string) bool { + switch strings.ToLower(scheme) { + case "http", "https": + return true + } + return malformedAuthorityStart(raw) >= 0 +} + +func isMalformedHTTPUserinfo(raw string, u *url.URL) bool { + if !isHTTPRemoteLike(raw, u.Scheme) { + return false + } + if u.Host == "" { + return true + } + if strings.HasPrefix(u.Path, "/@") { + return true + } + if rawUsernameBeforeDelimiter(raw) { + return true + } + if !rawUserinfoCandidateHasPassword(raw) { + return false + } + pathBeforeAt := u.Path + if at := strings.Index(pathBeforeAt, "@"); at >= 0 { + pathBeforeAt = pathBeforeAt[:at] + } + if strings.Count(strings.Trim(pathBeforeAt, "/"), "/") > 0 { + host := u.Hostname() + if strings.Contains(host, ".") || host == "localhost" || net.ParseIP(host) != nil { + return false + } + } + return true +} + +func rawUsernameBeforeDelimiter(raw string) bool { + authorityStart := malformedAuthorityStart(raw) + if authorityStart < 0 { + return false + } + at := strings.LastIndex(raw[authorityStart:], "@") + if at < 0 { + return false + } + at += authorityStart + if relEnd := strings.IndexAny(raw[authorityStart:at], "?#"); relEnd >= 0 { + candidate := raw[authorityStart : authorityStart+relEnd] + return candidate != "" && !strings.Contains(candidate, "/") + } + return false +} + +func rawUserinfoCandidateHasPassword(raw string) bool { + authorityStart := malformedAuthorityStart(raw) + if authorityStart < 0 { + return false + } + userinfoEnd := len(raw) + if relEnd := strings.IndexAny(raw[authorityStart:], "?#"); relEnd >= 0 { + userinfoEnd = authorityStart + relEnd + } + at := strings.LastIndex(raw[authorityStart:userinfoEnd], "@") + if at < 0 { + return false + } + candidate := raw[authorityStart : authorityStart+at] + return strings.Contains(candidate, ":") +} + func RedactString(s string) string { if s == "" { return "" } - s = tokenLike.ReplaceAllString(s, `$1=REDACTED`) // Redact any URL-shaped substring with credentials (not just those with @) - if strings.Contains(s, "://") { + if strings.Contains(s, "://") || strings.ContainsAny(s, "?#@") { parts := strings.Split(s, " ") for i := range parts { - if strings.Contains(parts[i], "://") { - parts[i] = RedactRemoteURL(parts[i]) + if strings.Contains(parts[i], "://") || strings.ContainsAny(parts[i], "?#@") { + parts[i] = redactRemoteToken(parts[i]) } } s = strings.Join(parts, " ") } + s = tokenLike.ReplaceAllString(s, `$1=REDACTED`) return s } + +func redactRemoteToken(token string) string { + start := strings.IndexFunc(token, func(r rune) bool { + return !strings.ContainsRune("'\"([{<", r) + }) + if start < 0 { + return token + } + end := strings.LastIndexFunc(token, func(r rune) bool { + return !strings.ContainsRune("'\")]}>,.:", r) + }) + if end < start { + return token + } + core := token[start : end+1] + return token[:start] + redactRemoteCore(core) + token[end+1:] +} + +func redactRemoteCore(core string) string { + var b strings.Builder + for len(core) > 0 { + sep := strings.IndexAny(core, ",;") + if sep < 0 { + b.WriteString(redactSingleRemoteCore(core)) + break + } + next := strings.TrimLeft(core[sep+1:], " ") + if !hasSeparateRemoteMarker(next) && (separatorInsideUserinfo(core, sep) || !hasRemoteMarker(next)) { + b.WriteString(redactSingleRemoteCore(core)) + break + } + b.WriteString(redactSingleRemoteCore(core[:sep])) + b.WriteByte(core[sep]) + core = core[sep+1:] + } + return b.String() +} + +func separatorInsideUserinfo(core string, sep int) bool { + remoteStart := remoteStartIndex(core[:sep]) + if strings.Contains(core[remoteStart:sep], "@") { + return false + } + authorityStart := authorityStartInCore(core, remoteStart) + if authorityStart < 0 { + return false + } + candidate := core[authorityStart:sep] + if strings.Contains(candidate, "/") || candidate == "" { + return false + } + nextBoundary := len(core) + if relEnd := strings.IndexAny(core[sep+1:], " "); relEnd >= 0 { + nextBoundary = sep + 1 + relEnd + } + return strings.Contains(core[sep+1:nextBoundary], "@") +} + +func authorityStartInCore(core string, remoteStart int) int { + rest := strings.ToLower(core[remoteStart:]) + for _, marker := range []string{"://", ":/", "//", ":"} { + if i := strings.Index(rest, marker); i >= 0 { + return remoteStart + i + len(marker) + } + } + return -1 +} + +func hasSeparateRemoteMarker(s string) bool { + lower := strings.ToLower(s) + for _, marker := range []string{"https://", "http://", "ssh://", "git://"} { + if strings.HasPrefix(lower, marker) { + return true + } + } + return false +} + +func hasRemoteMarker(s string) bool { + if strings.Contains(s, "://") { + return true + } + lower := strings.ToLower(s) + for _, marker := range []string{"https:/", "http:/", "ssh:/", "git:/", "https//", "http//", "ssh//", "git//", "https:", "http:", "ssh:", "git:"} { + if strings.HasPrefix(lower, marker) { + return true + } + } + return false +} + +func redactSingleRemoteCore(core string) string { + remoteStart := remoteStartIndex(core) + return core[:remoteStart] + RedactRemoteURL(core[remoteStart:]) +} + +func remoteStartIndex(s string) int { + best := -1 + for _, marker := range []string{"https://", "http://", "ssh://", "git://", "https:/", "http:/", "ssh:/", "git:/", "https//", "http//", "ssh//", "git//", "https:", "http:", "ssh:", "git:"} { + if i := strings.Index(strings.ToLower(s), marker); i >= 0 && (best < 0 || i < best) { + best = i + } + } + if best >= 0 { + return best + } + if at := strings.Index(s, "@"); at >= 0 && strings.ContainsAny(s[at:], "?#") { + if eq := strings.LastIndex(s[:at], "="); eq >= 0 { + return eq + 1 + } + } + return 0 +} diff --git a/internal/auth/redact_test.go b/internal/auth/redact_test.go index db0629c..61ebee0 100644 --- a/internal/auth/redact_test.go +++ b/internal/auth/redact_test.go @@ -11,6 +11,308 @@ func TestRedactRemoteURL(t *testing.T) { if containsAny(out, []string{"token123", "abc"}) { t.Fatalf("token leaked in output: %s", out) } + + out = RedactRemoteURL("https://github.com/org/repo.git?sig=s3cr3t") + if containsAny(out, []string{"sig", "s3cr3t"}) { + t.Fatalf("query secret leaked in output: %s", out) + } + + out = RedactRemoteURL("https://github.com/org/repo.git#access_token=ghp_secret") + if containsAny(out, []string{"access_token", "ghp_secret"}) { + t.Fatalf("fragment secret leaked in output: %s", out) + } + + out = RedactRemoteURL("https://ghp_secret%zz@github.com/org/repo.git") + if containsAny(out, []string{"ghp_secret", "%zz"}) { + t.Fatalf("malformed userinfo leaked in output: %s", out) + } + + out = RedactRemoteURL("https://user:pa#ss@example.com/org/repo.git") + if containsAny(out, []string{"user", "pa", "ss"}) { + t.Fatalf("malformed userinfo with fragment delimiter leaked in output: %s", out) + } + + out = RedactRemoteURL("https://ghp_secret#@github.com/org/repo.git") + if containsAny(out, []string{"ghp_secret"}) { + t.Fatalf("malformed username-only userinfo leaked in output: %s", out) + } + + out = RedactRemoteURL("https://user:pa/ss@example.com/org/repo.git") + if containsAny(out, []string{"user", "pa", "ss"}) { + t.Fatalf("malformed userinfo with path delimiter leaked in output: %s", out) + } + + out = RedactRemoteURL("https://ghp_secret%zz:password=abc@example.com/org/repo.git") + if containsAny(out, []string{"ghp_secret", "%zz", "abc"}) { + t.Fatalf("malformed userinfo with token-like password leaked in output: %s", out) + } + + out = RedactRemoteURL("git@github.com:org/repo.git?sig=s3cr3t") + if containsAny(out, []string{"sig", "s3cr3t"}) { + t.Fatalf("scp-style query secret leaked in output: %s", out) + } + + out = RedactRemoteURL("https://ghp_secret/@github.com/org/repo.git") + if containsAny(out, []string{"ghp_secret"}) { + t.Fatalf("malformed https userinfo leaked in output: %s", out) + } + + out = RedactRemoteURL("https:/user:ghp_secret@github.com/org/repo.git") + if containsAny(out, []string{"ghp_secret", "user"}) { + t.Fatalf("missing-slash https userinfo leaked in output: %s", out) + } + + out = RedactRemoteURL("https://user:123/ss@example.com/org/repo.git") + if containsAny(out, []string{"user", "123", "ss"}) { + t.Fatalf("numeric-prefix https userinfo leaked in output: %s", out) + } + + out = RedactRemoteURL("https://user:123/dir/ss@example.com/org/repo.git") + if containsAny(out, []string{"user", "123", "ss"}) { + t.Fatalf("multi-segment https userinfo leaked in output: %s", out) + } + + out = RedactRemoteURL("https://user.name:pa/ss@example.com/org/repo.git") + if containsAny(out, []string{"user.name", "pa", "ss"}) { + t.Fatalf("dotted malformed https userinfo leaked in output: %s", out) + } + + out = RedactRemoteURL("https//ghp_secret@github.com/org/repo.git") + if containsAny(out, []string{"ghp_secret"}) { + t.Fatalf("missing-colon https userinfo leaked in output: %s", out) + } + + out = RedactString("fatal: https://ghp_secret%zz:password=abc@example.com/org/repo.git failed") + if containsAny(out, []string{"ghp_secret", "%zz", "abc"}) { + t.Fatalf("redacted string leaked malformed credentials: %s", out) + } + + out = RedactString("fatal: unable to access 'https://user:pass@example.com/org/repo.git/': failed") + if containsAny(out, []string{"user", "pass"}) { + t.Fatalf("quoted URL leaked credentials: %s", out) + } + + out = RedactString("fatal: git@github.com:org/repo.git?sig=s3cr3t failed") + if containsAny(out, []string{"sig", "s3cr3t"}) { + t.Fatalf("scp-style query leaked in redacted string: %s", out) + } + + out = RedactRemoteURL("ssh://git:sec%zzret@example.com/org/repo.git") + if containsAny(out, []string{"sec", "%zz", "ret"}) { + t.Fatalf("malformed ssh userinfo leaked in output: %s", out) + } + + out = RedactString("fatal: ssh:/git:pa/ss@github.com/org/repo.git failed") + if containsAny(out, []string{"pa/ss"}) { + t.Fatalf("malformed ssh-style userinfo leaked in redacted string: %s", out) + } + + out = RedactString("fatal: remote=ssh:/git:pa/ss@github.com/org/repo.git failed") + if containsAny(out, []string{"pa/ss"}) { + t.Fatalf("prefixed malformed ssh-style userinfo leaked in redacted string: %s", out) + } + + out = RedactString("fatal: https:/user:ghp_secret@github.com/org/repo.git failed") + if containsAny(out, []string{"ghp_secret", "user"}) { + t.Fatalf("http-like typo leaked in redacted string: %s", out) + } + + out = RedactString("fatal: remote=https:/user:ghp_secret@github.com/org/repo.git failed") + if containsAny(out, []string{"ghp_secret", "user"}) { + t.Fatalf("prefixed http-like typo leaked in redacted string: %s", out) + } + + out = RedactString("remotes=ssh://git@github.com/org/repo.git,https://ghp_secret@example.com/private.git") + if containsAny(out, []string{"ghp_secret"}) { + t.Fatalf("second URL credential leaked in redacted string: %s", out) + } + + out = RedactString("fatal: https://github.com/org/repo.git?sig=abc,def failed") + if containsAny(out, []string{"abc", "def"}) { + t.Fatalf("query value leaked in redacted string: %s", out) + } + + out = RedactString("fatal: https://user:pa,ss@example.com/org/repo.git failed") + if containsAny(out, []string{"user", "pa", "ss"}) { + t.Fatalf("comma-containing userinfo leaked in redacted string: %s", out) + } + + out = RedactString("fatal: https://user:pa;http:ss@example.com/org/repo.git failed") + if containsAny(out, []string{"user", "pa", "ss"}) { + t.Fatalf("semicolon-containing userinfo leaked in redacted string: %s", out) + } + + out = RedactString("fatal: https://user:pa,http:ss?x@example.com/org/repo.git failed") + if containsAny(out, []string{"user", "pa", "ss"}) { + t.Fatalf("comma/query malformed userinfo leaked in redacted string: %s", out) + } + + out = RedactString("fatal: https://user:pa;http:ss#x@example.com/org/repo.git failed") + if containsAny(out, []string{"user", "pa", "ss"}) { + t.Fatalf("semicolon/fragment malformed userinfo leaked in redacted string: %s", out) + } + + out = RedactString("fatal: https//ghp_secret,http:ss?x@example.com/org/repo.git failed") + if containsAny(out, []string{"ghp_secret", "ss"}) { + t.Fatalf("missing-colon comma/query malformed userinfo leaked in redacted string: %s", out) + } + + out = RedactString("fatal: https//ghp_secret;http:ss#x@example.com/org/repo.git failed") + if containsAny(out, []string{"ghp_secret", "ss"}) { + t.Fatalf("missing-colon semicolon/fragment malformed userinfo leaked in redacted string: %s", out) + } + + out = RedactRemoteURL("https://github.com/%zz?access_token=abc@def") + if containsAny(out, []string{"abc", "def"}) { + t.Fatalf("malformed query with at sign leaked in output: %s", out) + } +} + +func TestHasInlineCredentials(t *testing.T) { + tests := []struct { + name string + raw string + want bool + }{ + { + name: "https userinfo", + raw: "https://token@example.com/org/repo.git", + want: true, + }, + { + name: "token query parameter", + raw: "https://github.com/org/repo.git?access_token=secret", + want: true, + }, + { + name: "token-like query parameter", + raw: "https://github.com/org/repo.git?x-token-auth=secret", + want: true, + }, + { + name: "unrecognized query secret", + raw: "https://github.com/org/repo.git?sig=s3cr3t", + want: true, + }, + { + name: "benign-looking query", + raw: "https://github.com/org/repo.git?ref=main", + want: true, + }, + { + name: "fragment secret", + raw: "https://github.com/org/repo.git#access_token=secret", + want: true, + }, + { + name: "empty fragment marker", + raw: "https://github.com/org/repo.git#", + want: true, + }, + { + name: "malformed https userinfo path split", + raw: "https://ghp_secret/@github.com/org/repo.git", + want: true, + }, + { + name: "malformed https userinfo missing slash", + raw: "https:/user:ghp_secret@github.com/org/repo.git", + want: true, + }, + { + name: "malformed https userinfo numeric prefix", + raw: "https://user:123/ss@example.com/org/repo.git", + want: true, + }, + { + name: "malformed https userinfo missing colon", + raw: "https//ghp_secret@github.com/org/repo.git", + want: true, + }, + { + name: "malformed https parse error missing colon", + raw: "https//ghp_secret%zz@github.com/org/repo.git", + want: true, + }, + { + name: "empty query marker", + raw: "https://github.com/org/repo.git?", + want: true, + }, + { + name: "malformed userinfo", + raw: "https://user%zz@github.com/org/repo.git", + want: true, + }, + { + name: "malformed query secret", + raw: "https://github.com/org/%zz.git?sig=s3cr3t", + want: true, + }, + { + name: "scp-style ssh", + raw: "git@github.com:org/repo.git", + want: false, + }, + { + name: "scp-style ssh with query", + raw: "git@github.com:org/repo.git?sig=s3cr3t", + want: true, + }, + { + name: "scp-style ssh with fragment", + raw: "git@github.com:org/repo.git#access_token=secret", + want: true, + }, + { + name: "ssh username only", + raw: "ssh://git@github.com/org/repo.git", + want: false, + }, + { + name: "ssh username with query", + raw: "ssh://git@github.com/org/repo.git?ref=main", + want: true, + }, + { + name: "ssh password", + raw: "ssh://git:secret@github.com/org/repo.git", + want: true, + }, + { + name: "plain https remote", + raw: "https://github.com/org/repo.git", + want: false, + }, + { + name: "https path with at sign", + raw: "https://git.example.com/team/repo@2026.git", + want: false, + }, + { + name: "https ported path with at sign", + raw: "https://git.example.com:8443/team/repo@2026.git", + want: false, + }, + { + name: "https path with colon and at sign", + raw: "https://git.example.com/team/repo:v1@2026.git", + want: false, + }, + { + name: "localhost path with colon and at sign", + raw: "https://localhost/team/repo:v1@2026.git", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := HasInlineCredentials(tt.raw); got != tt.want { + t.Fatalf("HasInlineCredentials(%q) = %v, want %v", tt.raw, got, tt.want) + } + }) + } } func containsAny(s string, needles []string) bool { diff --git a/internal/cli/cli.go b/internal/cli/cli.go index 11a87fa..4ff7860 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -11,6 +11,7 @@ import ( "strings" "time" + "github.com/cloudflare/artifact-fs/internal/auth" "github.com/cloudflare/artifact-fs/internal/daemon" "github.com/cloudflare/artifact-fs/internal/logging" "github.com/cloudflare/artifact-fs/internal/model" @@ -63,12 +64,27 @@ func Run(ctx context.Context, args []string, stdout io.Writer, stderr io.Writer) ucli.StringFlag{Name: "branch", Value: "main", Usage: "branch to track"}, ucli.StringFlag{Name: "refresh", Value: "30s", Usage: "refresh interval"}, ucli.StringFlag{Name: "mount-root", Usage: "override mount root"}, + ucli.BoolFlag{Name: "async", Usage: "return after registration and prepare the repo in the daemon"}, + ucli.BoolFlag{Name: "prepared-gitdir", Usage: "use an existing git dir for async preparation"}, + ucli.StringFlag{Name: "git-dir", Usage: "explicit git dir path"}, + ucli.StringFlag{Name: "fetch-ref", Usage: "ref to fetch during async preparation"}, }, Action: withService(ctx, root, stderr, func(c *ucli.Context, svc *daemon.Service) error { name := strings.TrimSpace(c.String("name")) remote := strings.TrimSpace(c.String("remote")) - if name == "" || remote == "" { - return fmt.Errorf("--name and --remote are required") + async := c.Bool("async") + preparedGitDir := c.Bool("prepared-gitdir") + if preparedGitDir && !async { + return fmt.Errorf("--prepared-gitdir requires --async") + } + if preparedGitDir && strings.TrimSpace(c.String("git-dir")) == "" { + return fmt.Errorf("--git-dir is required with --prepared-gitdir") + } + if name == "" { + return fmt.Errorf("--name is required") + } + if remote == "" && !preparedGitDir { + return fmt.Errorf("--remote is required") } d, err := daemon.ParseRefresh(c.String("refresh")) if err != nil { @@ -81,11 +97,18 @@ func Run(ctx context.Context, args []string, stdout io.Writer, stderr io.Writer) Branch: c.String("branch"), RefreshInterval: d, MountRoot: c.String("mount-root"), + GitDir: c.String("git-dir"), + PreparedGitDir: preparedGitDir, + FetchRef: c.String("fetch-ref"), Enabled: true, } - if err := svc.AddRepo(ctx, cfg); err != nil { + if err := svc.AddRepoWithOptions(ctx, cfg, daemon.AddRepoOptions{Async: async}); err != nil { return err } + if async { + fmt.Fprintf(stdout, "queued %s\n", cfg.Name) + return nil + } fmt.Fprintf(stdout, "added %s\n", cfg.Name) return nil }), @@ -112,6 +135,13 @@ func Run(ctx context.Context, args []string, stdout io.Writer, stderr io.Writer) fmt.Fprintf(w, "fetched %s\n", name) return nil }), + nameCommand("prepare", "retry async repository preparation", ctx, root, stderr, stdout, func(c context.Context, svc *daemon.Service, name string, w io.Writer) error { + if err := svc.Prepare(c, name); err != nil { + return err + } + fmt.Fprintf(w, "preparing %s\n", name) + return nil + }), { Name: "list-repos", Usage: "list configured repos", @@ -216,10 +246,11 @@ func nameCommand(name, usage string, ctx context.Context, root string, stderr io } func formatStatusLine(st model.RepoRuntimeState) string { - return fmt.Sprintf("repo=%s state=%s head=%s ref=%s ahead=%d behind=%d diverged=%t last_fetch=%s result=%s hydrated_blobs=%d hydrated_bytes=%d overlay_dirty=%t", + return fmt.Sprintf("repo=%s state=%s head=%s ref=%s ahead=%d behind=%d diverged=%t last_fetch=%s result=%s prepare_error=%s hydrated_blobs=%d hydrated_bytes=%d overlay_dirty=%t", st.RepoID, st.State, st.CurrentHEADOID, st.CurrentHEADRef, st.AheadCount, st.BehindCount, st.Diverged, formatLastFetchAt(st.LastFetchAt), formatLastFetchResult(st.LastFetchResult), + formatPrepareError(st.PrepareError), st.HydratedBlobCount, st.HydratedBlobBytes, st.DirtyOverlay) } @@ -237,6 +268,13 @@ func formatLastFetchResult(result string) string { return result } +func formatPrepareError(err string) string { + if strings.TrimSpace(err) == "" { + return "none" + } + return strings.Join(strings.Fields(auth.RedactString(err)), "_") +} + func stubCommand(name, usage string, stdout io.Writer) ucli.Command { return ucli.Command{ Name: name, diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index 05ad6bd..84e9d1a 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -1,6 +1,8 @@ package cli import ( + "bytes" + "context" "strings" "testing" "time" @@ -23,6 +25,7 @@ func TestFormatStatusLineUsesNeverForUnsetFetch(t *testing.T) { for _, want := range []string{ "last_fetch=never", "result=never", + "prepare_error=none", "hydrated_blobs=3", "hydrated_bytes=42", } { @@ -44,3 +47,113 @@ func TestFormatStatusLineFormatsFetchTimestamp(t *testing.T) { t.Fatalf("status line %q missing formatted timestamp", got) } } + +func TestFormatStatusLineKeepsPrepareErrorSingleLine(t *testing.T) { + st := model.RepoRuntimeState{PrepareError: "fatal: clone failed\ntry again\tlater"} + + got := formatStatusLine(st) + if strings.ContainsAny(got, "\n\t") { + t.Fatalf("status line contains raw whitespace: %q", got) + } + if !strings.Contains(got, "prepare_error=fatal:_clone_failed_try_again_later") { + t.Fatalf("status line %q missing normalized prepare error", got) + } +} + +func TestFormatStatusLineRedactsPrepareError(t *testing.T) { + st := model.RepoRuntimeState{PrepareError: "clone https://token@example.com/org/repo.git?access_token=secret failed"} + + got := formatStatusLine(st) + if strings.Contains(got, "token@example.com") || strings.Contains(got, "secret") { + t.Fatalf("status line leaked prepare error credential: %q", got) + } + if !strings.Contains(got, "REDACTED") { + t.Fatalf("status line %q missing redaction marker", got) + } +} + +func TestAddRepoAsyncCLIRegistersWithoutClone(t *testing.T) { + t.Setenv("ARTIFACT_FS_ROOT", t.TempDir()) + var stdout, stderr bytes.Buffer + + code := Run(context.Background(), []string{ + "add-repo", + "--name", "repo", + "--remote", "https://github.com/example/repo.git", + "--branch", "main", + "--async", + }, &stdout, &stderr) + if code != 0 { + t.Fatalf("Run exit = %d, stderr=%q", code, stderr.String()) + } + if got := stdout.String(); got != "queued repo\n" { + t.Fatalf("stdout = %q, want queued repo", got) + } +} + +func TestAddRepoAsyncCLIFlagValidation(t *testing.T) { + tests := []struct { + name string + args []string + want string + }{ + { + name: "prepared_gitdir_requires_async", + args: []string{"add-repo", "--name", "repo", "--prepared-gitdir", "--git-dir", "/tmp/repo.git"}, + want: "--prepared-gitdir requires --async", + }, + { + name: "prepared_gitdir_requires_git_dir", + args: []string{"add-repo", "--name", "repo", "--async", "--prepared-gitdir"}, + want: "--git-dir is required with --prepared-gitdir", + }, + { + name: "async_clone_requires_remote", + args: []string{"add-repo", "--name", "repo", "--async"}, + want: "--remote is required", + }, + { + name: "async_rejects_inline_credentials", + args: []string{"add-repo", "--name", "repo", "--async", "--remote", "https://token@example.com/org/repo.git"}, + want: "async repositories must use ambient credentials", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Setenv("ARTIFACT_FS_ROOT", t.TempDir()) + var stdout, stderr bytes.Buffer + code := Run(context.Background(), tt.args, &stdout, &stderr) + if code == 0 { + t.Fatalf("Run unexpectedly succeeded, stdout=%q", stdout.String()) + } + if !strings.Contains(stderr.String(), tt.want) { + t.Fatalf("stderr = %q, want substring %q", stderr.String(), tt.want) + } + }) + } +} + +func TestPrepareCLIReportsQueuedPrepare(t *testing.T) { + t.Setenv("ARTIFACT_FS_ROOT", t.TempDir()) + var stdout, stderr bytes.Buffer + ctx := context.Background() + code := Run(ctx, []string{ + "add-repo", + "--name", "repo", + "--remote", "https://github.com/example/repo.git", + "--async", + }, &stdout, &stderr) + if code != 0 { + t.Fatalf("add-repo exit = %d, stderr=%q", code, stderr.String()) + } + + stdout.Reset() + stderr.Reset() + code = Run(ctx, []string{"prepare", "--name", "repo"}, &stdout, &stderr) + if code != 0 { + t.Fatalf("prepare exit = %d, stderr=%q", code, stderr.String()) + } + if got := stdout.String(); got != "preparing repo\n" { + t.Fatalf("stdout = %q, want preparing repo", got) + } +} diff --git a/internal/daemon/async_test.go b/internal/daemon/async_test.go new file mode 100644 index 0000000..b75eefa --- /dev/null +++ b/internal/daemon/async_test.go @@ -0,0 +1,597 @@ +package daemon + +import ( + "context" + "errors" + "io" + "log/slog" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/cloudflare/artifact-fs/internal/fusefs" + "github.com/cloudflare/artifact-fs/internal/model" + "github.com/cloudflare/artifact-fs/internal/snapshot" +) + +func TestAddRepoAsyncRegistersWithoutClone(t *testing.T) { + ctx := context.Background() + root := t.TempDir() + svc, err := New(ctx, root, slog.New(slog.NewTextHandler(io.Discard, nil))) + if err != nil { + t.Fatal(err) + } + defer svc.Close() + + cfg := model.RepoConfig{ + Name: "repo", + ID: "repo", + RemoteURL: "https://github.com/example/repo.git", + Branch: "master", + RefreshInterval: time.Minute, + Enabled: true, + } + if err := svc.AddRepoWithOptions(ctx, cfg, AddRepoOptions{Async: true}); err != nil { + t.Fatal(err) + } + + got, err := svc.registry.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + if got.PrepareState != model.PrepareStatePreparing { + t.Fatalf("PrepareState = %q, want preparing", got.PrepareState) + } + if got.FetchRef != "master" { + t.Fatalf("FetchRef = %q, want master", got.FetchRef) + } + if got.GitDir != filepath.Join(root, "repos", "repo", "git") { + t.Fatalf("GitDir = %q", got.GitDir) + } +} + +func TestAddRepoAsyncRejectsInlineCredentials(t *testing.T) { + ctx := context.Background() + svc, err := New(ctx, t.TempDir(), slog.New(slog.NewTextHandler(io.Discard, nil))) + if err != nil { + t.Fatal(err) + } + defer func() { + svc.mu.Lock() + delete(svc.preparing, model.RepoID("repo")) + svc.mu.Unlock() + _ = svc.Close() + }() + + cfg := model.RepoConfig{ + Name: "repo", + ID: "repo", + RemoteURL: "https://token@example.com/org/repo.git", + Branch: "master", + Enabled: true, + } + if err := svc.AddRepoWithOptions(ctx, cfg, AddRepoOptions{Async: true}); err == nil { + t.Fatal("expected inline credential error") + } +} + +func TestAddRepoPreparedGitDirValidation(t *testing.T) { + ctx := context.Background() + svc, err := New(ctx, t.TempDir(), slog.New(slog.NewTextHandler(io.Discard, nil))) + if err != nil { + t.Fatal(err) + } + defer svc.Close() + + cfg := model.RepoConfig{ + Name: "repo", + ID: "repo", + Branch: "master", + PreparedGitDir: true, + Enabled: true, + } + if err := svc.AddRepoWithOptions(ctx, cfg, AddRepoOptions{}); err == nil { + t.Fatal("expected --prepared-gitdir requires --async error") + } + if err := svc.AddRepoWithOptions(ctx, cfg, AddRepoOptions{Async: true}); err == nil { + t.Fatal("expected --git-dir required error") + } +} + +func TestSyncReposSkipsResetWhilePrepareWorkerInFlight(t *testing.T) { + ctx := context.Background() + svc, err := New(ctx, t.TempDir(), slog.New(slog.NewTextHandler(io.Discard, nil))) + if err != nil { + t.Fatal(err) + } + defer func() { + svc.mu.Lock() + delete(svc.preparing, model.RepoID("repo")) + svc.mu.Unlock() + _ = svc.Close() + }() + + cfg := model.RepoConfig{ + Name: "repo", + ID: "repo", + RemoteURL: "https://github.com/example/repo.git", + Branch: "master", + Enabled: true, + } + if err := svc.AddRepoWithOptions(ctx, cfg, AddRepoOptions{Async: true}); err != nil { + t.Fatal(err) + } + cfg, err = svc.registry.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + + gate := fusefs.NewReadyGate(true) + rt := &repoRuntime{ + cfg: cfg, + gate: gate, + active: true, + state: model.RepoRuntimeState{ + RepoID: cfg.ID, + State: repoStateMounted, + PrepareError: "", + }, + } + svc.mu.Lock() + svc.running[cfg.ID] = rt + svc.preparing[cfg.ID] = 1 + svc.mu.Unlock() + + if err := svc.syncRepos(ctx); err != nil { + t.Fatal(err) + } + if rt.state.State != repoStateMounted { + t.Fatalf("runtime state = %q, want mounted", rt.state.State) + } + if err := gate.Wait(ctx); err != nil { + t.Fatalf("gate was reset while prepare worker was in flight: %v", err) + } +} + +func TestRestartRunningPrepareSkipsStalePreparingSnapshot(t *testing.T) { + ctx := context.Background() + svc, err := New(ctx, t.TempDir(), slog.New(slog.NewTextHandler(io.Discard, nil))) + if err != nil { + t.Fatal(err) + } + defer func() { + svc.mu.Lock() + delete(svc.running, model.RepoID("repo")) + delete(svc.preparing, model.RepoID("repo")) + svc.mu.Unlock() + _ = svc.Close() + }() + + cfg := model.RepoConfig{ + Name: "repo", + ID: "repo", + RemoteURL: "https://github.com/example/repo.git", + Branch: "master", + Enabled: true, + } + if err := svc.AddRepoWithOptions(ctx, cfg, AddRepoOptions{Async: true}); err != nil { + t.Fatal(err) + } + latest, err := svc.registry.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + if err := svc.registry.UpdatePrepareState(ctx, latest.ID, model.PrepareStateReady, ""); err != nil { + t.Fatal(err) + } + stale := latest + stale.PrepareState = model.PrepareStatePreparing + + gate := fusefs.NewReadyGate(true) + rt := &repoRuntime{ + cfg: latest, + gate: gate, + active: true, + state: model.RepoRuntimeState{ + RepoID: latest.ID, + State: repoStateMounted, + }, + } + svc.mu.Lock() + svc.running[latest.ID] = rt + svc.mu.Unlock() + + svc.restartRunningPrepareIfCurrent(ctx, stale, rt, false) + if rt.state.State != repoStateMounted { + t.Fatalf("runtime state = %q, want mounted", rt.state.State) + } + if err := gate.Wait(ctx); err != nil { + t.Fatalf("gate was reset from stale registry snapshot: %v", err) + } + svc.mu.Lock() + _, preparing := svc.preparing[latest.ID] + svc.mu.Unlock() + if preparing { + t.Fatal("started prepare worker from stale registry snapshot") + } +} + +func TestRunPrepareFailurePersistsRedactedError(t *testing.T) { + ctx := context.Background() + svc, err := New(ctx, t.TempDir(), slog.New(slog.NewTextHandler(io.Discard, nil))) + if err != nil { + t.Fatal(err) + } + defer svc.Close() + + cfg := model.RepoConfig{ + Name: "repo", + ID: "repo", + Branch: "master", + GitDir: filepath.Join(t.TempDir(), "missing.git"), + PreparedGitDir: true, + FetchRef: "master", + Enabled: true, + } + if err := svc.AddRepoWithOptions(ctx, cfg, AddRepoOptions{Async: true}); err != nil { + t.Fatal(err) + } + got, err := svc.registry.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + if err := svc.runPrepare(ctx, got); err == nil { + t.Fatal("expected runPrepare failure") + } + got, err = svc.registry.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + if got.PrepareState != model.PrepareStateFailed { + t.Fatalf("PrepareState = %q, want failed", got.PrepareState) + } + if got.PrepareError == "" { + t.Fatal("PrepareError is empty, want persisted failure") + } + + if err := svc.setPrepareState(ctx, got, model.PrepareStateFailed, errors.New("clone https://token@example.com/org/repo.git failed")); err != nil { + t.Fatal(err) + } + got, err = svc.registry.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + if strings.Contains(got.PrepareError, "token") { + t.Fatalf("PrepareError was not redacted: %q", got.PrepareError) + } +} + +func TestStartPrepareWorkerTimesOutAndPersistsFailed(t *testing.T) { + ctx := context.Background() + bin := filepath.Join(t.TempDir(), "bin") + if err := os.MkdirAll(bin, 0o755); err != nil { + t.Fatal(err) + } + gitPath := filepath.Join(bin, "git") + if err := os.WriteFile(gitPath, []byte("#!/bin/sh\nexec sleep 10\n"), 0o755); err != nil { + t.Fatal(err) + } + t.Setenv("PATH", bin+string(os.PathListSeparator)+os.Getenv("PATH")) + + svc, err := New(ctx, t.TempDir(), slog.New(slog.NewTextHandler(io.Discard, nil))) + if err != nil { + t.Fatal(err) + } + svc.prepareTimeout = 20 * time.Millisecond + defer svc.Close() + + cfg := model.RepoConfig{ + Name: "repo", + ID: "repo", + RemoteURL: "https://github.com/example/repo.git", + Branch: "master", + Enabled: true, + } + if err := svc.AddRepoWithOptions(ctx, cfg, AddRepoOptions{Async: true}); err != nil { + t.Fatal(err) + } + got, err := svc.registry.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + svc.startPrepareWorker(ctx, got) + + got = waitForPrepareState(t, svc, "repo", model.PrepareStateFailed) + if got.PrepareError != "prepare timed out" { + t.Fatalf("PrepareError = %q, want timeout", got.PrepareError) + } + waitFor(t, time.Second, func() bool { + svc.mu.Lock() + defer svc.mu.Unlock() + _, preparing := svc.preparing[got.ID] + return !preparing + }) +} + +func TestRunPreparePreparedGitDirPublishesSnapshotAndMarksReady(t *testing.T) { + ctx := context.Background() + tmp := t.TempDir() + bare := filepath.Join(tmp, "origin.git") + work := filepath.Join(tmp, "work") + preparedGitDir := filepath.Join(tmp, "prepared.git") + preparedWorktree := filepath.Join(tmp, "prepared") + + runCmd(t, "git", "init", "--bare", bare) + runCmd(t, "git", "clone", bare, work) + runCmd(t, "git", "-C", work, "checkout", "-b", "master") + if err := os.WriteFile(filepath.Join(work, "README.md"), []byte("hello\n"), 0o644); err != nil { + t.Fatal(err) + } + runCmd(t, "git", "-C", work, "add", "README.md") + runCmd(t, "git", "-C", work, "-c", "user.name=test", "-c", "user.email=test@example.com", "commit", "-m", "init") + runCmd(t, "git", "-C", work, "push", "origin", "master") + + runCmd(t, "git", "init", "--separate-git-dir", preparedGitDir, "--initial-branch", "master", preparedWorktree) + runCmd(t, "git", "-C", preparedWorktree, "remote", "add", "origin", "file://"+bare) + + root := filepath.Join(tmp, "artifact-fs") + svc, err := New(ctx, root, slog.New(slog.NewTextHandler(io.Discard, nil))) + if err != nil { + t.Fatal(err) + } + defer svc.Close() + + cfg := model.RepoConfig{ + Name: "repo", + ID: "repo", + Branch: "master", + RefreshInterval: time.Minute, + GitDir: preparedGitDir, + PreparedGitDir: true, + FetchRef: "master", + Enabled: true, + } + if err := svc.AddRepoWithOptions(ctx, cfg, AddRepoOptions{Async: true}); err != nil { + t.Fatal(err) + } + got, err := svc.registry.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + if err := svc.runPrepare(ctx, got); err != nil { + t.Fatalf("runPrepare: %v", err) + } + + got, err = svc.registry.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + if got.PrepareState != model.PrepareStateReady { + t.Fatalf("PrepareState = %q, want ready", got.PrepareState) + } + if got.PrepareError != "" { + t.Fatalf("PrepareError = %q, want empty", got.PrepareError) + } + snap, err := snapshot.New(ctx, got.MetaDBPath) + if err != nil { + t.Fatal(err) + } + defer snap.Close() + _, ref, gen, err := snap.ReadState(ctx) + if err != nil { + t.Fatal(err) + } + if ref != "master" { + t.Fatalf("snapshot ref = %q, want master", ref) + } + if gen == 0 { + t.Fatal("snapshot generation = 0, want published generation") + } + if _, ok := snap.GetNode(gen, "README.md"); !ok { + t.Fatal("README.md not found in snapshot") + } +} + +func TestRunPrepareDoesNotOpenGateWhenReadyPersistenceFails(t *testing.T) { + ctx := context.Background() + tmp := t.TempDir() + preparedGitDir := createPreparedGitDir(t, tmp) + + svc, err := New(ctx, filepath.Join(tmp, "artifact-fs"), slog.New(slog.NewTextHandler(io.Discard, nil))) + if err != nil { + t.Fatal(err) + } + defer svc.Close() + + cfg := model.RepoConfig{ + Name: "repo", + ID: "repo", + Branch: "master", + RefreshInterval: time.Minute, + GitDir: preparedGitDir, + PreparedGitDir: true, + FetchRef: "master", + Enabled: true, + } + if err := svc.AddRepoWithOptions(ctx, cfg, AddRepoOptions{Async: true}); err != nil { + t.Fatal(err) + } + got, err := svc.registry.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + if err := svc.mountAsyncRepo(ctx, got); err != nil { + t.Fatal(err) + } + svc.mu.Lock() + gate := svc.running[got.ID].gate + svc.mu.Unlock() + if gate == nil { + t.Fatal("runtime gate is nil") + } + if err := svc.registry.Close(); err != nil { + t.Fatal(err) + } + + if err := svc.runPrepare(ctx, got); err == nil { + t.Fatal("expected ready state persistence failure") + } + waitCtx, cancel := context.WithTimeout(ctx, 20*time.Millisecond) + defer cancel() + if err := gate.Wait(waitCtx); !errors.Is(err, context.DeadlineExceeded) { + t.Fatalf("gate wait = %v, want deadline because ready was not durably persisted", err) + } +} + +func TestRunPrepareDoesNotMarkSupersededConfigReady(t *testing.T) { + ctx := context.Background() + tmp := t.TempDir() + firstGitDir := createPreparedGitDir(t, filepath.Join(tmp, "first")) + secondGitDir := createPreparedGitDir(t, filepath.Join(tmp, "second")) + + svc, err := New(ctx, filepath.Join(tmp, "artifact-fs"), slog.New(slog.NewTextHandler(io.Discard, nil))) + if err != nil { + t.Fatal(err) + } + defer svc.Close() + + cfg := model.RepoConfig{ + Name: "repo", + ID: "repo", + Branch: "master", + RefreshInterval: time.Minute, + GitDir: firstGitDir, + PreparedGitDir: true, + FetchRef: "master", + Enabled: true, + } + if err := svc.AddRepoWithOptions(ctx, cfg, AddRepoOptions{Async: true}); err != nil { + t.Fatal(err) + } + first, err := svc.registry.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + cfg.GitDir = secondGitDir + if err := svc.AddRepoWithOptions(ctx, cfg, AddRepoOptions{Async: true}); err != nil { + t.Fatal(err) + } + + err = svc.runPrepare(ctx, first) + if err == nil || !strings.Contains(err.Error(), "superseded") { + t.Fatalf("runPrepare error = %v, want superseded", err) + } + got, err := svc.registry.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + if got.PrepareState != model.PrepareStatePreparing { + t.Fatalf("PrepareState = %q, want preparing", got.PrepareState) + } + if got.GitDir != secondGitDir { + t.Fatalf("GitDir = %q, want newer git dir %q", got.GitDir, secondGitDir) + } +} + +func TestRunPrepareDoesNotMarkSupersededConfigFailed(t *testing.T) { + ctx := context.Background() + tmp := t.TempDir() + svc, err := New(ctx, filepath.Join(tmp, "artifact-fs"), slog.New(slog.NewTextHandler(io.Discard, nil))) + if err != nil { + t.Fatal(err) + } + defer svc.Close() + + cfg := model.RepoConfig{ + Name: "repo", + ID: "repo", + Branch: "master", + RefreshInterval: time.Minute, + GitDir: filepath.Join(tmp, "missing.git"), + PreparedGitDir: true, + FetchRef: "master", + Enabled: true, + } + if err := svc.AddRepoWithOptions(ctx, cfg, AddRepoOptions{Async: true}); err != nil { + t.Fatal(err) + } + first, err := svc.registry.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + cfg.GitDir = createPreparedGitDir(t, filepath.Join(tmp, "second")) + if err := svc.AddRepoWithOptions(ctx, cfg, AddRepoOptions{Async: true}); err != nil { + t.Fatal(err) + } + + if err := svc.runPrepare(ctx, first); err == nil { + t.Fatal("expected stale prepare failure") + } + got, err := svc.registry.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + if got.PrepareState != model.PrepareStatePreparing { + t.Fatalf("PrepareState = %q, want preparing", got.PrepareState) + } + if got.PrepareError != "" { + t.Fatalf("PrepareError = %q, want empty", got.PrepareError) + } +} + +func createPreparedGitDir(t *testing.T, tmp string) string { + t.Helper() + bare := filepath.Join(tmp, "origin.git") + work := filepath.Join(tmp, "work") + preparedGitDir := filepath.Join(tmp, "prepared.git") + preparedWorktree := filepath.Join(tmp, "prepared") + + runCmd(t, "git", "init", "--bare", bare) + runCmd(t, "git", "clone", bare, work) + runCmd(t, "git", "-C", work, "checkout", "-b", "master") + if err := os.WriteFile(filepath.Join(work, "README.md"), []byte("hello\n"), 0o644); err != nil { + t.Fatal(err) + } + runCmd(t, "git", "-C", work, "add", "README.md") + runCmd(t, "git", "-C", work, "-c", "user.name=test", "-c", "user.email=test@example.com", "commit", "-m", "init") + runCmd(t, "git", "-C", work, "push", "origin", "master") + + runCmd(t, "git", "init", "--separate-git-dir", preparedGitDir, "--initial-branch", "master", preparedWorktree) + runCmd(t, "git", "-C", preparedWorktree, "remote", "add", "origin", "file://"+bare) + return preparedGitDir +} + +func waitForPrepareState(t *testing.T, svc *Service, name string, state string) model.RepoConfig { + t.Helper() + var got model.RepoConfig + waitFor(t, 2*time.Second, func() bool { + var err error + got, err = svc.registry.GetRepo(context.Background(), name) + return err == nil && got.PrepareState == state + }) + return got +} + +func waitFor(t *testing.T, timeout time.Duration, ready func() bool) { + t.Helper() + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if ready() { + return + } + time.Sleep(10 * time.Millisecond) + } + t.Fatal("condition was not met before timeout") +} + +func runCmd(t *testing.T, name string, args ...string) { + t.Helper() + cmd := exec.Command(name, args...) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("%s %v failed: %v\n%s", name, args, err, string(out)) + } +} diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index d650f6a..b83b63b 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -26,15 +26,29 @@ import ( const DefaultHydrationConcurrency = 4 +const ( + defaultPrepareTimeout = 30 * time.Minute + prepareStateWriteTimeout = 5 * time.Second +) + +const ( + repoStateMounted = "mounted" + repoStateUnmounted = "unmounted" + repoStateDegraded = "degraded" +) + type Service struct { root string mountRoot string hydrationConcurrency int + prepareTimeout time.Duration logger *slog.Logger registry *registry.Store git *gitstore.Store mu sync.Mutex running map[model.RepoID]*repoRuntime + preparing map[model.RepoID]int64 + prepareSeq int64 mountFailures map[model.RepoID]*mountFailure } @@ -52,7 +66,9 @@ type repoRuntime struct { hydrator *hydrator.Service resolver *fusefs.Resolver mfs fusefs.MountedFS + gate *fusefs.ReadyGate state model.RepoRuntimeState + active bool } type aheadBehind struct { @@ -61,18 +77,24 @@ type aheadBehind struct { diverged bool } +type AddRepoOptions struct { + Async bool +} + func New(ctx context.Context, root string, logger *slog.Logger) (*Service, error) { reg, err := registry.New(ctx, filepath.Join(root, "config", "repos.sqlite")) if err != nil { return nil, err } svc := &Service{ - root: root, - logger: logger, - registry: reg, - git: gitstore.New(logger), - running: map[model.RepoID]*repoRuntime{}, - mountFailures: map[model.RepoID]*mountFailure{}, + root: root, + logger: logger, + registry: reg, + git: gitstore.New(logger), + prepareTimeout: defaultPrepareTimeout, + running: map[model.RepoID]*repoRuntime{}, + preparing: map[model.RepoID]int64{}, + mountFailures: map[model.RepoID]*mountFailure{}, } svc.git.SetBatchPoolSize(DefaultHydrationConcurrency) return svc, nil @@ -146,9 +168,21 @@ func (s *Service) syncRepos(ctx context.Context) error { continue } s.mu.Lock() - _, running := s.running[repo.ID] + rt, running := s.running[repo.ID] + _, alreadyPreparing := s.preparing[repo.ID] s.mu.Unlock() if running { + s.restartRunningPrepareIfCurrent(ctx, repo, rt, alreadyPreparing) + continue + } + if shouldMountAsync(repo) { + if err := s.mountAsyncRepo(ctx, repo); err != nil { + s.logger.Error("repo async mount failed", "repo", repo.Name, "error", err) + continue + } + if repo.PrepareState == model.PrepareStatePreparing { + s.startPrepareWorker(ctx, repo) + } continue } if mf, ok := s.mountFailures[repo.ID]; ok && time.Since(mf.lastAttempt) < mf.backoff { @@ -196,7 +230,71 @@ func (s *Service) syncRepos(ctx context.Context) error { return nil } +func (s *Service) restartRunningPrepareIfCurrent(ctx context.Context, repo model.RepoConfig, rt *repoRuntime, alreadyPreparing bool) { + if repo.PrepareState != model.PrepareStatePreparing || rt == nil { + return + } + latest, err := s.registry.GetRepo(ctx, repo.Name) + if err != nil { + s.logger.Error("repo prepare state refresh failed", "repo", repo.Name, "error", err) + return + } + if latest.PrepareState != model.PrepareStatePreparing { + return + } + configMatches := samePrepareConfig(rt.cfg, latest) + if alreadyPreparing && configMatches { + return + } + if rt.active || !configMatches { + s.unmount(latest.ID) + if err := s.mountAsyncRepo(ctx, latest); err != nil { + s.logger.Error("repo async remount failed", "repo", latest.Name, "error", err) + return + } + s.supersedePrepare(latest.ID) + s.startPrepareWorker(ctx, latest) + return + } + if alreadyPreparing { + return + } + if s.resetRunningPrepareState(latest) { + s.startPrepareWorker(ctx, latest) + } +} + +func samePrepareConfig(a model.RepoConfig, b model.RepoConfig) bool { + return a.Branch == b.Branch && + a.RemoteURL == b.RemoteURL && + a.PreparedGitDir == b.PreparedGitDir && + a.FetchRef == b.FetchRef && + a.GitDir == b.GitDir && + a.MetaDBPath == b.MetaDBPath && + a.OverlayDir == b.OverlayDir && + a.OverlayDBPath == b.OverlayDBPath && + a.BlobCacheDir == b.BlobCacheDir && + a.MountPath == b.MountPath +} + +func (s *Service) prepareConfigStillCurrent(ctx context.Context, cfg model.RepoConfig) bool { + latest, err := s.registry.GetRepo(ctx, cfg.Name) + if err != nil { + s.logger.Error("repo prepare state refresh failed", "repo", cfg.Name, "error", err) + return false + } + s.fillPaths(&latest) + if strings.TrimSpace(latest.FetchRef) == "" { + latest.FetchRef = latest.Branch + } + return samePrepareConfig(cfg, latest) +} + func (s *Service) AddRepo(ctx context.Context, cfg model.RepoConfig) error { + return s.AddRepoWithOptions(ctx, cfg, AddRepoOptions{}) +} + +func (s *Service) AddRepoWithOptions(ctx context.Context, cfg model.RepoConfig, opts AddRepoOptions) error { if err := model.ValidateRepoName(cfg.Name); err != nil { return err } @@ -207,12 +305,39 @@ func (s *Service) AddRepo(ctx context.Context, cfg model.RepoConfig) error { if cfg.RefreshInterval <= 0 { cfg.RefreshInterval = 30 * time.Second } + explicitGitDir := strings.TrimSpace(cfg.GitDir) != "" s.fillPaths(&cfg) + if strings.TrimSpace(cfg.FetchRef) == "" { + cfg.FetchRef = cfg.Branch + } + cloneURL := cfg.RemoteURL + if cfg.PreparedGitDir && !opts.Async { + return fmt.Errorf("--prepared-gitdir requires --async") + } + if opts.Async { + if strings.TrimSpace(cfg.RemoteURL) == "" && !cfg.PreparedGitDir { + return fmt.Errorf("--remote is required unless --prepared-gitdir is set") + } + if cfg.PreparedGitDir && !explicitGitDir { + return fmt.Errorf("--git-dir is required with --prepared-gitdir") + } + if auth.HasInlineCredentials(cfg.RemoteURL) { + return fmt.Errorf("async repositories must use ambient credentials; remove credentials from --remote") + } + cfg.PrepareState = model.PrepareStatePreparing + cfg.PrepareError = "" + } else if auth.HasInlineCredentials(cfg.RemoteURL) { + cfg.RemoteURL = "" + } if err := s.registry.AddRepo(ctx, cfg); err != nil { return err } + if opts.Async { + return nil + } // Clone and build snapshot so the repo is ready to mount, but don't start // the FUSE server -- that's the daemon's job. + cfg.RemoteURL = cloneURL return s.prepareRepo(ctx, cfg) } @@ -273,6 +398,12 @@ func (s *Service) FetchNow(ctx context.Context, name string) error { if err != nil { return err } + if cfg.PrepareState == model.PrepareStatePreparing { + return fusefs.ErrRepoNotReady + } + if cfg.PrepareState == model.PrepareStateFailed { + return fmt.Errorf("repo prepare failed: %s", cfg.PrepareError) + } if err := s.git.Fetch(ctx, cfg); err != nil { return err } @@ -288,12 +419,48 @@ func (s *Service) FetchNow(ctx context.Context, name string) error { return nil } +func (s *Service) Prepare(ctx context.Context, name string) error { + cfg, err := s.registry.GetRepo(ctx, name) + if err != nil { + return err + } + if !isAsyncRepo(cfg) { + return s.prepareRepo(ctx, cfg) + } + if cfg.PrepareState == model.PrepareStateReady { + return nil + } + cfg.PrepareState = model.PrepareStatePreparing + cfg.PrepareError = "" + if strings.TrimSpace(cfg.FetchRef) == "" { + cfg.FetchRef = cfg.Branch + } + if err := s.registry.UpdatePrepareStateForConfig(ctx, cfg, model.PrepareStatePreparing, ""); err != nil { + return err + } + cfg.PrepareState = model.PrepareStatePreparing + cfg.PrepareError = "" + if s.resetRunningPrepareState(cfg) { + s.startPrepareWorker(ctx, cfg) + } + return nil +} + func (s *Service) Remount(ctx context.Context, name string) error { cfg, err := s.registry.GetRepo(ctx, name) if err != nil { return err } s.unmount(cfg.ID) + if shouldMountAsync(cfg) { + if err := s.mountAsyncRepo(ctx, cfg); err != nil { + return err + } + if cfg.PrepareState == model.PrepareStatePreparing { + s.startPrepareWorker(ctx, cfg) + } + return nil + } return s.mountRepo(ctx, cfg) } @@ -408,7 +575,327 @@ func (s *Service) mountRepo(ctx context.Context, cfg model.RepoConfig) error { state: newRuntimeState(cfg.ID, headOID, headRef, gen), } s.startRuntime(rt) + s.startRepoBackground(rt) + + return nil +} + +func (s *Service) mountAsyncRepo(ctx context.Context, cfg model.RepoConfig) error { + s.fillPaths(&cfg) + if err := os.MkdirAll(cfg.MountPath, 0o755); err != nil { + return err + } + snap, err := snapshot.New(ctx, cfg.MetaDBPath) + if err != nil { + return err + } + headOID, headRef, gen, _ := snap.ReadState(ctx) + ov, err := overlay.New(ctx, cfg) + if err != nil { + snap.Close() + return err + } + h := hydrator.New(s.git) + resolver := &fusefs.Resolver{Snapshot: snap, Overlay: ov} + resolver.SetGeneration(gen) + if headOID != "" { + s.refreshCommitTime(ctx, cfg, headOID, resolver, "commit timestamp unavailable, mtime will use generation fallback") + } + h.SetOnHydrated(func(_ model.RepoID, objectOID string, size int64) { + snap.UpdateSize(resolver.Generation(), objectOID, size) + }) + h.Start(s.hydrationWorkers(), cfg) + + gate := fusefs.NewReadyGate(false) + if cfg.PrepareState == model.PrepareStateFailed { + gate.MarkFailed(prepareGateError(cfg.PrepareError)) + } + engine := &fusefs.Engine{ + Resolver: resolver, + Repo: cfg, + Overlay: ov, + Hydrator: h, + } + + mfs, err := fusefs.MountRepoWithGate(cfg, resolver, engine, gate) + if err != nil { + s.logger.Error("fuse mount failed, running without FUSE", "repo", cfg.Name, "error", err) + mfs = nil + } + runtimeCtx, cancel := context.WithCancel(ctx) + state := cfg.PrepareState + if strings.TrimSpace(state) == "" { + state = model.PrepareStatePreparing + } + rt := &repoRuntime{ + cfg: cfg, + ctx: runtimeCtx, + cancel: cancel, + snapshot: snap, + overlay: ov, + hydrator: h, + resolver: resolver, + mfs: mfs, + gate: gate, + state: model.RepoRuntimeState{ + RepoID: cfg.ID, + CurrentHEADOID: headOID, + CurrentHEADRef: headRef, + SnapshotGeneration: gen, + LastFetchResult: "never", + State: state, + PrepareError: cfg.PrepareError, + }, + } + s.startRuntime(rt) + return nil +} + +func (s *Service) startPrepareWorker(ctx context.Context, cfg model.RepoConfig) { + s.mu.Lock() + if _, ok := s.preparing[cfg.ID]; ok { + s.mu.Unlock() + return + } + s.prepareSeq++ + token := s.prepareSeq + workerCtx := ctx + if rt := s.running[cfg.ID]; rt != nil && rt.ctx != nil { + workerCtx = rt.ctx + } + s.preparing[cfg.ID] = token + s.mu.Unlock() + + go func() { + defer func() { + s.mu.Lock() + if s.preparing[cfg.ID] == token { + delete(s.preparing, cfg.ID) + } + s.mu.Unlock() + }() + prepareCtx, cancel := context.WithTimeout(workerCtx, s.prepareTimeoutDuration()) + defer cancel() + if err := s.runPrepare(prepareCtx, cfg); err != nil { + s.logger.Error("repo prepare failed", "repo", cfg.Name, "error", err) + } + }() +} +func (s *Service) supersedePrepare(id model.RepoID) { + s.mu.Lock() + delete(s.preparing, id) + s.mu.Unlock() +} + +func (s *Service) prepareTimeoutDuration() time.Duration { + if s.prepareTimeout > 0 { + return s.prepareTimeout + } + return defaultPrepareTimeout +} + +func (s *Service) resetRunningPrepareState(cfg model.RepoConfig) bool { + s.mu.Lock() + defer s.mu.Unlock() + rt, ok := s.running[cfg.ID] + if !ok || rt.gate == nil { + return false + } + rt.gate.Reset() + rt.cfg = cfg + rt.state.State = model.PrepareStatePreparing + rt.state.PrepareError = "" + return true +} + +func (s *Service) runPrepare(ctx context.Context, cfg model.RepoConfig) error { + s.fillPaths(&cfg) + if strings.TrimSpace(cfg.FetchRef) == "" { + cfg.FetchRef = cfg.Branch + } + + fail := func(err error) error { + if errors.Is(ctx.Err(), context.Canceled) || errors.Is(err, context.Canceled) { + return err + } + stateErr := err + if errors.Is(ctx.Err(), context.DeadlineExceeded) { + stateErr = errors.New("prepare timed out") + } + stateCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), prepareStateWriteTimeout) + defer cancel() + if !s.prepareConfigStillCurrent(stateCtx, cfg) { + return err + } + _ = s.setPrepareState(stateCtx, cfg, model.PrepareStateFailed, stateErr) + return err + } + + if cfg.PreparedGitDir { + if err := s.git.ValidatePreparedGitDir(ctx, cfg); err != nil { + return fail(err) + } + if err := s.git.FetchRefNonInteractive(ctx, cfg, cfg.FetchRef); err != nil { + return fail(err) + } + if err := s.git.PrepareFetchedBranch(ctx, cfg, cfg.FetchRef); err != nil { + return fail(err) + } + } else { + if strings.TrimSpace(cfg.RemoteURL) == "" { + return fail(errors.New("remote URL is required for async clone")) + } + if _, err := os.Stat(cfg.GitDir); err == nil { + if err := s.git.PrepareExistingCloneNonInteractive(ctx, cfg); err != nil { + return fail(err) + } + } else { + if err := s.git.CloneBloblessNonInteractive(ctx, cfg); err != nil { + return fail(err) + } + if err := s.git.PrepareExistingCloneNonInteractive(ctx, cfg); err != nil { + return fail(err) + } + } + } + + headOID, headRef, err := s.git.ResolveHEAD(ctx, cfg) + if err != nil { + return fail(err) + } + snap, closeSnap, err := s.snapshotForPrepare(ctx, cfg) + if err != nil { + return fail(err) + } + if closeSnap { + defer snap.Close() + } + gen, _, err := s.publishSnapshot(ctx, cfg, snap, headOID, headRef) + if err != nil { + return fail(err) + } + latest, err := s.registry.GetRepo(ctx, cfg.Name) + if err != nil { + return fail(err) + } + s.fillPaths(&latest) + if strings.TrimSpace(latest.FetchRef) == "" { + latest.FetchRef = latest.Branch + } + if !samePrepareConfig(cfg, latest) { + return errors.New("prepare superseded by newer repo config") + } + if err := s.setPrepareStateBeforeReadyGate(ctx, cfg); err != nil { + return fail(err) + } + if err := s.completePreparedRuntime(ctx, cfg, headOID, headRef, gen); err != nil { + return fail(err) + } + return nil +} + +func (s *Service) snapshotForPrepare(ctx context.Context, cfg model.RepoConfig) (*snapshot.Store, bool, error) { + s.mu.Lock() + rt := s.running[cfg.ID] + s.mu.Unlock() + if rt != nil && rt.snapshot != nil { + return rt.snapshot, false, nil + } + snap, err := snapshot.New(ctx, cfg.MetaDBPath) + if err != nil { + return nil, false, err + } + return snap, true, nil +} + +func (s *Service) completePreparedRuntime(ctx context.Context, cfg model.RepoConfig, headOID string, headRef string, gen int64) error { + s.mu.Lock() + rt := s.running[cfg.ID] + if rt != nil && !samePrepareConfig(rt.cfg, cfg) { + s.mu.Unlock() + return registry.ErrRepoChanged + } + s.mu.Unlock() + if rt == nil { + return nil + } + if !s.prepareConfigStillCurrent(ctx, cfg) { + return registry.ErrRepoChanged + } + if !s.prepareConfigStillCurrent(ctx, cfg) { + return registry.ErrRepoChanged + } + baseLookup := func(path string) (model.BaseNode, bool) { + return rt.snapshot.GetNode(gen, path) + } + if err := rt.overlay.Reconcile(ctx, baseLookup); err != nil { + return err + } + if !s.prepareConfigStillCurrent(ctx, cfg) { + return registry.ErrRepoChanged + } + s.refreshCommitTime(ctx, cfg, headOID, rt.resolver, "commit timestamp unavailable") + rt.resolver.SetGeneration(gen) + s.mu.Lock() + if s.running[cfg.ID] != rt || !samePrepareConfig(rt.cfg, cfg) { + s.mu.Unlock() + return registry.ErrRepoChanged + } + rt.cfg = cfg + rt.cfg.PrepareState = model.PrepareStateReady + rt.cfg.PrepareError = "" + setHeadState(&rt.state, headOID, headRef, gen) + rt.state.State = repoStateMounted + rt.state.PrepareError = "" + s.mu.Unlock() + rt.gate.MarkReady() + s.startRepoBackground(rt) + return nil +} + +func (s *Service) setPrepareState(ctx context.Context, cfg model.RepoConfig, state string, stateErr error) error { + return s.applyPrepareState(ctx, cfg, state, stateErr, true) +} + +func (s *Service) setPrepareStateBeforeReadyGate(ctx context.Context, cfg model.RepoConfig) error { + return s.applyPrepareState(ctx, cfg, model.PrepareStateReady, nil, false) +} + +func (s *Service) applyPrepareState(ctx context.Context, cfg model.RepoConfig, state string, stateErr error, applyReadyRuntime bool) error { + msg := "" + if stateErr != nil { + msg = auth.RedactString(stateErr.Error()) + } + if err := s.registry.UpdatePrepareStateForConfig(ctx, cfg, state, msg); err != nil { + return err + } + s.mu.Lock() + if rt, ok := s.running[cfg.ID]; ok { + if !samePrepareConfig(rt.cfg, cfg) { + s.mu.Unlock() + return nil + } + rt.cfg.PrepareState = state + rt.cfg.PrepareError = msg + rt.state.PrepareError = msg + if state != model.PrepareStateReady || applyReadyRuntime { + rt.state.State = runtimeStateForPrepareState(state) + } + if rt.gate != nil { + switch state { + case model.PrepareStateFailed: + rt.gate.MarkFailed(prepareGateError(msg)) + case model.PrepareStateReady: + if applyReadyRuntime { + rt.gate.MarkReady() + } + default: + rt.gate.Reset() + } + } + } + s.mu.Unlock() return nil } @@ -505,9 +992,11 @@ func (s *Service) refreshLoop(rt *repoRuntime) { func (s *Service) readPersistedStatus(ctx context.Context, cfg model.RepoConfig) model.RepoRuntimeState { // One-shot CLI process: reconstruct state from persisted stores and // OS-level mount check since we don't share memory with the daemon. - st := model.RepoRuntimeState{RepoID: cfg.ID, State: "unmounted", LastFetchResult: "never"} - if isMounted(cfg.MountPath) { - st.State = "mounted" + st := model.RepoRuntimeState{RepoID: cfg.ID, State: repoStateUnmounted, LastFetchResult: "never", PrepareError: cfg.PrepareError} + if isPendingOrFailedPrepareState(cfg.PrepareState) { + st.State = cfg.PrepareState + } else if isMounted(cfg.MountPath) { + st.State = repoStateMounted } if cfg.MetaDBPath != "" { if snap, err := snapshot.New(ctx, cfg.MetaDBPath); err == nil { @@ -568,18 +1057,28 @@ func (s *Service) startRuntime(rt *repoRuntime) { s.running[rt.cfg.ID] = rt s.mu.Unlock() + if rt.mfs != nil { + go func() { + _ = rt.mfs.Join(rt.ctx) + }() + } +} + +func (s *Service) startRepoBackground(rt *repoRuntime) { + s.mu.Lock() + if rt.active { + s.mu.Unlock() + return + } + rt.active = true + s.mu.Unlock() + go s.refreshLoop(rt) w := watcher.New(500 * time.Millisecond) go w.Watch(rt.ctx, rt.cfg.GitDir, func() { s.onHEADChanged(rt.ctx, rt) }) - - if rt.mfs != nil { - go func() { - _ = rt.mfs.Join(rt.ctx) - }() - } } func newRuntimeState(repoID model.RepoID, headOID string, headRef string, gen int64) model.RepoRuntimeState { @@ -589,7 +1088,7 @@ func newRuntimeState(repoID model.RepoID, headOID string, headRef string, gen in CurrentHEADRef: headRef, SnapshotGeneration: gen, LastFetchResult: "never", - State: "ready", + State: repoStateMounted, } } @@ -613,13 +1112,13 @@ func markFetchSuccess(st *model.RepoRuntimeState, at time.Time, state aheadBehin func markFetchResult(st *model.RepoRuntimeState, at time.Time, result string) { st.LastFetchResult = result st.LastFetchAt = at - if st.State == "degraded" && result == "ok" { - st.State = "ready" + if st.State == repoStateDegraded && result == "ok" { + st.State = repoStateMounted } } func markFetchFailure(st *model.RepoRuntimeState, result string) { - st.State = "degraded" + st.State = repoStateDegraded st.LastFetchResult = result } @@ -671,14 +1170,21 @@ func (s *Service) stopRuntime(rt *repoRuntime) { if rt.cancel != nil { rt.cancel() } + if rt.gate != nil { + rt.gate.MarkFailed(context.Canceled) + } if rt.mfs != nil { _ = rt.mfs.Unmount() } if rt.hydrator != nil { rt.hydrator.Stop() } - _ = rt.snapshot.Close() - _ = rt.overlay.Close() + if rt.snapshot != nil { + _ = rt.snapshot.Close() + } + if rt.overlay != nil { + _ = rt.overlay.Close() + } } func (s *Service) fillPaths(cfg *model.RepoConfig) { @@ -719,3 +1225,34 @@ func ParseRefresh(v string) (time.Duration, error) { } return d, nil } + +func isAsyncRepo(cfg model.RepoConfig) bool { + return cfg.PreparedGitDir || strings.TrimSpace(cfg.PrepareState) != "" +} + +func shouldMountAsync(cfg model.RepoConfig) bool { + return isPendingOrFailedPrepareState(cfg.PrepareState) +} + +func isPendingOrFailedPrepareState(state string) bool { + switch strings.TrimSpace(state) { + case model.PrepareStatePreparing, model.PrepareStateFailed: + return true + default: + return false + } +} + +func runtimeStateForPrepareState(state string) string { + if state == model.PrepareStateReady { + return repoStateMounted + } + return state +} + +func prepareGateError(msg string) error { + if strings.TrimSpace(msg) == "" { + return fusefs.ErrRepoNotReady + } + return errors.New(msg) +} diff --git a/internal/fusefs/fuse_unix.go b/internal/fusefs/fuse_unix.go index ef1b633..5ac4a50 100644 --- a/internal/fusefs/fuse_unix.go +++ b/internal/fusefs/fuse_unix.go @@ -206,6 +206,20 @@ func (fs *ArtifactFuse) GetInodeAttributes(_ context.Context, op *fuseops.GetIno return err } + if ref.IsRoot { + if fs.resolver != nil { + if mode, size, typ, mtime, ctime, err := fs.resolver.Getattr(ref.Path); err == nil { + op.Attributes = inodeAttrs(mode, uint64(size), typ, mtime, ctime) + op.AttributesExpiration = attrExpiry(time.Second) + return nil + } + } + now := time.Now() + op.Attributes = inodeAttrs(ref.Mode, 4096, "dir", now, now) + op.AttributesExpiration = attrExpiry(time.Second) + return nil + } + if ref.Path == ".git" { op.Attributes = fs.gitFileAttrs() op.AttributesExpiration = attrExpiry(time.Minute) @@ -559,8 +573,12 @@ func (m *mountedFSWrapper) Unmount() error { } func MountRepo(repo model.RepoConfig, resolver *Resolver, engine *Engine) (MountedFS, error) { + return MountRepoWithGate(repo, resolver, engine, nil) +} + +func MountRepoWithGate(repo model.RepoConfig, resolver *Resolver, engine *Engine, gate *ReadyGate) (MountedFS, error) { fsint := NewArtifactFuse(repo, resolver, engine) - server := fuseutil.NewFileSystemServer(fsint) + server := fuseutil.NewFileSystemServer(NewGatedFileSystem(fsint, gate)) mountCfg := &fuse.MountConfig{ FSName: "artifact-fs:" + repo.Name, diff --git a/internal/fusefs/fuse_unix_test.go b/internal/fusefs/fuse_unix_test.go index 4df7438..a03d9eb 100644 --- a/internal/fusefs/fuse_unix_test.go +++ b/internal/fusefs/fuse_unix_test.go @@ -3,8 +3,12 @@ package fusefs import ( + "context" "testing" "time" + + "github.com/cloudflare/artifact-fs/internal/model" + "github.com/jacobsa/fuse/fuseops" ) func TestInodeAttrsPreservesSeparateTimes(t *testing.T) { @@ -46,3 +50,48 @@ func TestGitFileAttrsUsesOneTimestamp(t *testing.T) { t.Fatalf("expected .git attrs to use one timestamp: atime=%v mtime=%v ctime=%v", attr.Atime, attr.Mtime, attr.Ctime) } } + +func TestRootInodeAttributesDoNotRequireResolver(t *testing.T) { + fs := NewArtifactFuse(model.RepoConfig{Name: "repo", GitDir: "/tmp/repo.git"}, nil, nil) + op := &fuseops.GetInodeAttributesOp{Inode: fuseops.RootInodeID} + + if err := fs.GetInodeAttributes(context.Background(), op); err != nil { + t.Fatalf("GetInodeAttributes(root): %v", err) + } + if !op.Attributes.Mode.IsDir() { + t.Fatalf("root mode = %#o, want directory", op.Attributes.Mode) + } + if op.Attributes.Size == 0 { + t.Fatal("root size = 0, want non-zero placeholder size") + } +} + +func TestRootInodeAttributesUseStableResolverAttrsWhenReady(t *testing.T) { + resolver := &Resolver{ + Snapshot: &fakeSnapshot{nodes: map[string]model.BaseNode{ + ".": {Path: ".", Type: "dir", Mode: 0o755, SizeBytes: 4096}, + }}, + Overlay: &fakeOverlay{entries: map[string]model.OverlayEntry{}}, + } + resolver.SetGeneration(7) + resolver.SetCommitTime(1_700_000_000) + fs := NewArtifactFuse(model.RepoConfig{Name: "repo", GitDir: "/tmp/repo.git"}, resolver, nil) + + first := &fuseops.GetInodeAttributesOp{Inode: fuseops.RootInodeID} + if err := fs.GetInodeAttributes(context.Background(), first); err != nil { + t.Fatalf("first GetInodeAttributes(root): %v", err) + } + time.Sleep(2 * time.Millisecond) + second := &fuseops.GetInodeAttributesOp{Inode: fuseops.RootInodeID} + if err := fs.GetInodeAttributes(context.Background(), second); err != nil { + t.Fatalf("second GetInodeAttributes(root): %v", err) + } + + want := time.Unix(1_700_000_000, 0) + if !first.Attributes.Mtime.Equal(want) || !second.Attributes.Mtime.Equal(want) { + t.Fatalf("root mtime = %v then %v, want stable %v", first.Attributes.Mtime, second.Attributes.Mtime, want) + } + if !first.Attributes.Ctime.Equal(second.Attributes.Ctime) { + t.Fatalf("root ctime changed: %v then %v", first.Attributes.Ctime, second.Attributes.Ctime) + } +} diff --git a/internal/fusefs/gated_fs.go b/internal/fusefs/gated_fs.go new file mode 100644 index 0000000..93b0783 --- /dev/null +++ b/internal/fusefs/gated_fs.go @@ -0,0 +1,232 @@ +//go:build !windows + +package fusefs + +import ( + "context" + "syscall" + + "github.com/jacobsa/fuse/fuseops" + "github.com/jacobsa/fuse/fuseutil" +) + +type gatedFileSystem struct { + next fuseutil.FileSystem + gate *ReadyGate +} + +func NewGatedFileSystem(next fuseutil.FileSystem, gate *ReadyGate) fuseutil.FileSystem { + if gate == nil { + return next + } + return &gatedFileSystem{next: next, gate: gate} +} + +func (fs *gatedFileSystem) wait(ctx context.Context) error { + if err := fs.gate.Wait(ctx); err != nil { + return syscall.EIO + } + return nil +} + +func (fs *gatedFileSystem) StatFS(ctx context.Context, op *fuseops.StatFSOp) error { + return fs.next.StatFS(ctx, op) +} + +func (fs *gatedFileSystem) LookUpInode(ctx context.Context, op *fuseops.LookUpInodeOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.LookUpInode(ctx, op) +} + +func (fs *gatedFileSystem) GetInodeAttributes(ctx context.Context, op *fuseops.GetInodeAttributesOp) error { + if op.Inode != fuseops.RootInodeID { + if err := fs.wait(ctx); err != nil { + return err + } + } + return fs.next.GetInodeAttributes(ctx, op) +} + +func (fs *gatedFileSystem) SetInodeAttributes(ctx context.Context, op *fuseops.SetInodeAttributesOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.SetInodeAttributes(ctx, op) +} + +func (fs *gatedFileSystem) ForgetInode(ctx context.Context, op *fuseops.ForgetInodeOp) error { + return fs.next.ForgetInode(ctx, op) +} + +func (fs *gatedFileSystem) BatchForget(ctx context.Context, op *fuseops.BatchForgetOp) error { + return fs.next.BatchForget(ctx, op) +} + +func (fs *gatedFileSystem) MkDir(ctx context.Context, op *fuseops.MkDirOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.MkDir(ctx, op) +} + +func (fs *gatedFileSystem) MkNode(ctx context.Context, op *fuseops.MkNodeOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.MkNode(ctx, op) +} + +func (fs *gatedFileSystem) CreateFile(ctx context.Context, op *fuseops.CreateFileOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.CreateFile(ctx, op) +} + +func (fs *gatedFileSystem) CreateLink(ctx context.Context, op *fuseops.CreateLinkOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.CreateLink(ctx, op) +} + +func (fs *gatedFileSystem) CreateSymlink(ctx context.Context, op *fuseops.CreateSymlinkOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.CreateSymlink(ctx, op) +} + +func (fs *gatedFileSystem) Rename(ctx context.Context, op *fuseops.RenameOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.Rename(ctx, op) +} + +func (fs *gatedFileSystem) RmDir(ctx context.Context, op *fuseops.RmDirOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.RmDir(ctx, op) +} + +func (fs *gatedFileSystem) Unlink(ctx context.Context, op *fuseops.UnlinkOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.Unlink(ctx, op) +} + +func (fs *gatedFileSystem) OpenDir(ctx context.Context, op *fuseops.OpenDirOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.OpenDir(ctx, op) +} + +func (fs *gatedFileSystem) ReadDir(ctx context.Context, op *fuseops.ReadDirOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.ReadDir(ctx, op) +} + +func (fs *gatedFileSystem) ReadDirPlus(ctx context.Context, op *fuseops.ReadDirPlusOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.ReadDirPlus(ctx, op) +} + +func (fs *gatedFileSystem) ReleaseDirHandle(ctx context.Context, op *fuseops.ReleaseDirHandleOp) error { + return fs.next.ReleaseDirHandle(ctx, op) +} + +func (fs *gatedFileSystem) OpenFile(ctx context.Context, op *fuseops.OpenFileOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.OpenFile(ctx, op) +} + +func (fs *gatedFileSystem) ReadFile(ctx context.Context, op *fuseops.ReadFileOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.ReadFile(ctx, op) +} + +func (fs *gatedFileSystem) WriteFile(ctx context.Context, op *fuseops.WriteFileOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.WriteFile(ctx, op) +} + +func (fs *gatedFileSystem) SyncFile(ctx context.Context, op *fuseops.SyncFileOp) error { + return fs.next.SyncFile(ctx, op) +} + +func (fs *gatedFileSystem) FlushFile(ctx context.Context, op *fuseops.FlushFileOp) error { + return fs.next.FlushFile(ctx, op) +} + +func (fs *gatedFileSystem) ReleaseFileHandle(ctx context.Context, op *fuseops.ReleaseFileHandleOp) error { + return fs.next.ReleaseFileHandle(ctx, op) +} + +func (fs *gatedFileSystem) ReadSymlink(ctx context.Context, op *fuseops.ReadSymlinkOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.ReadSymlink(ctx, op) +} + +func (fs *gatedFileSystem) RemoveXattr(ctx context.Context, op *fuseops.RemoveXattrOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.RemoveXattr(ctx, op) +} + +func (fs *gatedFileSystem) GetXattr(ctx context.Context, op *fuseops.GetXattrOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.GetXattr(ctx, op) +} + +func (fs *gatedFileSystem) ListXattr(ctx context.Context, op *fuseops.ListXattrOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.ListXattr(ctx, op) +} + +func (fs *gatedFileSystem) SetXattr(ctx context.Context, op *fuseops.SetXattrOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.SetXattr(ctx, op) +} + +func (fs *gatedFileSystem) Fallocate(ctx context.Context, op *fuseops.FallocateOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.Fallocate(ctx, op) +} + +func (fs *gatedFileSystem) SyncFS(ctx context.Context, op *fuseops.SyncFSOp) error { + if err := fs.wait(ctx); err != nil { + return err + } + return fs.next.SyncFS(ctx, op) +} + +func (fs *gatedFileSystem) Destroy() { + fs.next.Destroy() +} diff --git a/internal/fusefs/gated_fs_test.go b/internal/fusefs/gated_fs_test.go new file mode 100644 index 0000000..123d667 --- /dev/null +++ b/internal/fusefs/gated_fs_test.go @@ -0,0 +1,68 @@ +//go:build !windows + +package fusefs + +import ( + "context" + "errors" + "sync/atomic" + "syscall" + "testing" + "time" + + "github.com/jacobsa/fuse/fuseops" + "github.com/jacobsa/fuse/fuseutil" +) + +type recordingFS struct { + fuseutil.NotImplementedFileSystem + lookups atomic.Int32 +} + +func (fs *recordingFS) LookUpInode(context.Context, *fuseops.LookUpInodeOp) error { + fs.lookups.Add(1) + return nil +} + +func TestGatedFileSystemBlocksUntilReady(t *testing.T) { + next := &recordingFS{} + gate := NewReadyGate(false) + fs := NewGatedFileSystem(next, gate) + + done := make(chan error, 1) + go func() { + done <- fs.LookUpInode(context.Background(), &fuseops.LookUpInodeOp{}) + }() + + select { + case err := <-done: + t.Fatalf("LookUpInode returned before ready: %v", err) + case <-time.After(20 * time.Millisecond): + } + if got := next.lookups.Load(); got != 0 { + t.Fatalf("lookups before ready = %d, want 0", got) + } + + gate.MarkReady() + if err := <-done; err != nil { + t.Fatalf("LookUpInode after ready: %v", err) + } + if got := next.lookups.Load(); got != 1 { + t.Fatalf("lookups after ready = %d, want 1", got) + } +} + +func TestGatedFileSystemFailedGateReturnsEIO(t *testing.T) { + next := &recordingFS{} + gate := NewReadyGate(false) + gate.MarkFailed(errors.New("clone failed")) + fs := NewGatedFileSystem(next, gate) + + err := fs.LookUpInode(context.Background(), &fuseops.LookUpInodeOp{}) + if !errors.Is(err, syscall.EIO) { + t.Fatalf("LookUpInode error = %v, want EIO", err) + } + if got := next.lookups.Load(); got != 0 { + t.Fatalf("lookups after failed gate = %d, want 0", got) + } +} diff --git a/internal/fusefs/ready_gate.go b/internal/fusefs/ready_gate.go new file mode 100644 index 0000000..229fada --- /dev/null +++ b/internal/fusefs/ready_gate.go @@ -0,0 +1,115 @@ +package fusefs + +import ( + "context" + "errors" + "sync" +) + +var ErrRepoNotReady = errors.New("repo is not ready") + +type ReadyGate struct { + mu sync.Mutex + ready bool + err error + done chan struct{} + closed bool +} + +func NewReadyGate(ready bool) *ReadyGate { + g := &ReadyGate{ready: ready, done: make(chan struct{})} + if ready { + close(g.done) + g.closed = true + } + return g +} + +func (g *ReadyGate) Wait(ctx context.Context) error { + if g == nil { + return nil + } + g.mu.Lock() + if g.ready { + g.mu.Unlock() + return nil + } + if g.err != nil { + err := g.err + g.mu.Unlock() + return err + } + done := g.done + g.mu.Unlock() + + select { + case <-ctx.Done(): + return ctx.Err() + case <-done: + } + + g.mu.Lock() + defer g.mu.Unlock() + if g.ready { + return nil + } + if g.err != nil { + return g.err + } + return ErrRepoNotReady +} + +func (g *ReadyGate) Reset() { + if g == nil { + return + } + g.mu.Lock() + defer g.mu.Unlock() + if !g.ready && g.err == nil { + return + } + g.ready = false + g.err = nil + g.done = make(chan struct{}) + g.closed = false +} + +func (g *ReadyGate) MarkReady() { + if g == nil { + return + } + g.mu.Lock() + defer g.mu.Unlock() + if g.ready { + return + } + g.ready = true + g.err = nil + if !g.closed { + close(g.done) + g.closed = true + } +} + +func (g *ReadyGate) MarkFailed(err error) { + if g == nil { + return + } + if err == nil { + err = ErrRepoNotReady + } + g.mu.Lock() + defer g.mu.Unlock() + if g.err != nil { + g.err = err + return + } + if g.ready { + return + } + g.err = err + if !g.closed { + close(g.done) + g.closed = true + } +} diff --git a/internal/fusefs/ready_gate_test.go b/internal/fusefs/ready_gate_test.go new file mode 100644 index 0000000..4800cdc --- /dev/null +++ b/internal/fusefs/ready_gate_test.go @@ -0,0 +1,51 @@ +package fusefs + +import ( + "context" + "errors" + "testing" + "time" +) + +func TestReadyGateWaitsUntilReady(t *testing.T) { + g := NewReadyGate(false) + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + + done := make(chan error, 1) + go func() { + done <- g.Wait(ctx) + }() + + select { + case err := <-done: + t.Fatalf("Wait returned before ready: %v", err) + case <-time.After(20 * time.Millisecond): + } + + g.MarkReady() + if err := <-done; err != nil { + t.Fatalf("Wait returned error after ready: %v", err) + } +} + +func TestReadyGateFailureFailsFastAndCanReset(t *testing.T) { + g := NewReadyGate(false) + g.MarkFailed(errors.New("clone failed")) + + if err := g.Wait(context.Background()); err == nil || err.Error() != "clone failed" { + t.Fatalf("Wait error = %v, want clone failed", err) + } + + g.Reset() + g.MarkReady() + if err := g.Wait(context.Background()); err != nil { + t.Fatalf("Wait after reset/ready: %v", err) + } +} + +func TestReadyGateMarkReadyAfterFailureDoesNotPanic(t *testing.T) { + g := NewReadyGate(false) + g.MarkFailed(errors.New("clone failed")) + g.MarkReady() +} diff --git a/internal/gitstore/gitstore.go b/internal/gitstore/gitstore.go index 5c4418f..1ee9fa2 100644 --- a/internal/gitstore/gitstore.go +++ b/internal/gitstore/gitstore.go @@ -34,6 +34,14 @@ type readBlobResult struct { const maxReadBlobBytes int64 = 1<<31 - 1 +const fetchedFullRefRemoteTrackingRef = "refs/remotes/artifact-fs/fetch-ref" + +type fetchRefInfo struct { + sourceRef string + remoteRef string + branch string +} + func New(logger *slog.Logger) *Store { if logger == nil { logger = slog.Default() @@ -64,6 +72,14 @@ func (s *Store) SetBatchPoolSize(n int) { } func (s *Store) CloneBlobless(ctx context.Context, cfg model.RepoConfig) error { + return s.cloneBlobless(ctx, cfg, nil) +} + +func (s *Store) CloneBloblessNonInteractive(ctx context.Context, cfg model.RepoConfig) error { + return s.cloneBlobless(ctx, cfg, nonInteractiveGitEnv()) +} + +func (s *Store) cloneBlobless(ctx context.Context, cfg model.RepoConfig, extraEnv []string) error { if _, err := os.Stat(cfg.GitDir); err == nil { return nil } @@ -80,17 +96,22 @@ func (s *Store) CloneBlobless(ctx context.Context, cfg model.RepoConfig) error { // Strip credentials from the CLI-visible URL; pass them via a credential helper // so they don't appear in ps output. - safeURL, credHelper := credentialEnv(cfg.RemoteURL) + safeURL, credHelper, err := credentialEnv(cfg.RemoteURL) + if err != nil { + return err + } + env := append([]string{}, extraEnv...) + env = append(env, credHelper...) args := []string{"clone", "--filter=blob:none", "--no-checkout", "--single-branch", "--branch", cfg.Branch, safeURL, target} - if _, err := runGitWithEnv(ctx, "", credHelper, args...); err != nil { + if _, err := runGitWithEnv(ctx, "", env, args...); err != nil { return err } - if err := os.Rename(filepath.Join(target, ".git"), cfg.GitDir); err != nil { + // Populate the index so git status works inside the mount. + if _, err := runGit(ctx, filepath.Join(target, ".git"), "read-tree", "HEAD"); err != nil { return err } - // Populate the index so git status works inside the mount. - if _, err := runGit(ctx, cfg.GitDir, "read-tree", "HEAD"); err != nil { + if err := os.Rename(filepath.Join(target, ".git"), cfg.GitDir); err != nil { return err } return nil @@ -101,6 +122,94 @@ func (s *Store) Fetch(ctx context.Context, repo model.RepoConfig) error { return err } +func (s *Store) FetchRefNonInteractive(ctx context.Context, repo model.RepoConfig, ref string) error { + target, err := fetchRefTarget(repo, ref) + if err != nil { + return err + } + refspec := "+" + target.sourceRef + ":" + target.remoteRef + if target.branch != "" { + refspec = fmt.Sprintf("+refs/heads/%s:refs/remotes/origin/%s", target.branch, target.branch) + } + _, err = runGitWithEnv(ctx, repo.GitDir, nonInteractiveGitEnv(), "fetch", "--filter=blob:none", "origin", refspec) + return err +} + +func (s *Store) PrepareExistingCloneNonInteractive(ctx context.Context, repo model.RepoConfig) error { + if strings.TrimSpace(repo.RemoteURL) == "" { + return errors.New("remote URL is required") + } + safeURL, _, err := credentialEnv(repo.RemoteURL) + if err != nil { + return err + } + if safeURL != repo.RemoteURL { + return errors.New("existing clone remote must use ambient credentials") + } + remoteURL, err := runGit(ctx, repo.GitDir, "remote", "get-url", "origin") + if err != nil { + return err + } + if strings.TrimSpace(remoteURL) != strings.TrimSpace(repo.RemoteURL) { + if _, err := runGitWithEnv(ctx, repo.GitDir, nonInteractiveGitEnv(), "remote", "set-url", "origin", repo.RemoteURL); err != nil { + return err + } + } + if err := s.FetchRefNonInteractive(ctx, repo, repo.FetchRef); err != nil { + return err + } + return s.PrepareFetchedBranch(ctx, repo, repo.FetchRef) +} + +func (s *Store) PrepareFetchedBranch(ctx context.Context, repo model.RepoConfig, ref string) error { + target, err := fetchRefTarget(repo, ref) + if err != nil { + return err + } + oid, err := runGit(ctx, repo.GitDir, "rev-parse", "--verify", target.remoteRef+"^{commit}") + if err != nil { + return fmt.Errorf("remote ref %s missing after fetch: %w", target.remoteRef, err) + } + oid = strings.TrimSpace(oid) + if target.branch == "" { + if _, err := runGit(ctx, repo.GitDir, "update-ref", "--no-deref", "HEAD", oid); err != nil { + return err + } + return s.ReadTreeHEAD(ctx, repo) + } + if _, err := runGit(ctx, repo.GitDir, "update-ref", "refs/heads/"+target.branch, oid); err != nil { + return err + } + if _, err := runGit(ctx, repo.GitDir, "symbolic-ref", "HEAD", "refs/heads/"+target.branch); err != nil { + return err + } + if _, err := runGit(ctx, repo.GitDir, "branch", "--set-upstream-to", "origin/"+target.branch, target.branch); err != nil { + s.logger.Warn("set upstream failed", "repo", repo.Name, "error", err) + } + return s.ReadTreeHEAD(ctx, repo) +} + +func (s *Store) ValidatePreparedGitDir(ctx context.Context, repo model.RepoConfig) error { + if strings.TrimSpace(repo.GitDir) == "" { + return errors.New("git dir is required") + } + st, err := os.Stat(repo.GitDir) + if err != nil { + return err + } + if !st.IsDir() { + return fmt.Errorf("git dir %s is not a directory", repo.GitDir) + } + if _, err := runGit(ctx, repo.GitDir, "rev-parse", "--git-dir"); err != nil { + return err + } + remoteURL, err := runGit(ctx, repo.GitDir, "remote", "get-url", "origin") + if err == nil && remoteHasInlineCredentials(remoteURL) { + return errors.New("prepared git dir origin must use ambient credentials") + } + return nil +} + func (s *Store) ResolveHEAD(ctx context.Context, repo model.RepoConfig) (oid string, ref string, err error) { oid, err = runGit(ctx, repo.GitDir, "rev-parse", "HEAD") if err != nil { @@ -626,41 +735,320 @@ func runGitWithEnv(ctx context.Context, gitDir string, extraEnv []string, args . // credentialEnv returns a sanitized URL (safe for ps) and env vars that // configure a one-shot git credential helper to supply the real credentials. -func credentialEnv(rawURL string) (safeURL string, env []string) { +func credentialEnv(rawURL string) (safeURL string, env []string, err error) { if rawURL == "" { - return "", nil + return "", nil, nil + } + if strings.ContainsAny(rawURL, "?#") { + return "", nil, errors.New("remote URL must not include query or fragment") } u, err := url.Parse(rawURL) - if err != nil || u.User == nil { - return rawURL, nil + if err != nil { + if remoteHasInlineCredentials(rawURL) { + return "", nil, errors.New("malformed remote URL") + } + if rawUserinfoCandidateHasPassword(rawURL) { + return "", nil, errors.New("malformed remote URL") + } + if strings.Contains(rawURL, "://") { + return "", nil, errors.New("malformed remote URL") + } + return rawURL, nil, nil + } + if u.RawQuery != "" || u.ForceQuery || u.Fragment != "" || strings.Contains(rawURL, "#") { + return "", nil, errors.New("remote URL must not include query or fragment") + } + if u.User == nil && strings.Contains(rawURL, "@") && (auth.HasInlineCredentials(rawURL) || malformedUserinfoInRemote(rawURL, u)) { + return "", nil, errors.New("malformed remote URL") + } + if u.User == nil { + return rawURL, nil, nil + } + if !isHTTPRemote(rawURL, u.Scheme) { + if _, hasPassword := u.User.Password(); hasPassword { + return "", nil, errors.New("remote URL includes unsupported inline credentials") + } + return rawURL, nil, nil } username := u.User.Username() password, hasPassword := u.User.Password() if username == "" && !hasPassword { - return rawURL, nil + return rawURL, nil, nil } - // Build a credential helper that prints credentials to stdout. - // Uses printf to avoid shell quoting issues with single quotes in passwords. - var lines []string + credentialUsername := username + credentialPassword := password if hasPassword { - lines = append(lines, "username="+username, "password="+password) + credentialPassword = password } else if username != "" { // Token-as-username pattern (e.g., https://ghp_xxx@github.com) - lines = append(lines, "username="+username, "password="+username) + credentialPassword = username } - // Escape single quotes in the credential payload to prevent shell injection. - payload := strings.Join(lines, "\n") - payload = strings.ReplaceAll(payload, "'", "'\\''") - helper := fmt.Sprintf("!f() { printf '%%s\\n' '%s'; }; f", payload) + helper := "!f() { printf '%s\\n' \"username=$ARTIFACT_FS_GIT_USERNAME\" \"password=$ARTIFACT_FS_GIT_PASSWORD\"; }; f" u.User = nil return u.String(), []string{ "GIT_TERMINAL_PROMPT=0", - "GIT_CONFIG_COUNT=1", + "ARTIFACT_FS_GIT_USERNAME=" + credentialUsername, + "ARTIFACT_FS_GIT_PASSWORD=" + credentialPassword, + "GIT_CONFIG_COUNT=2", "GIT_CONFIG_KEY_0=credential.helper", - "GIT_CONFIG_VALUE_0=" + helper, + "GIT_CONFIG_VALUE_0=", + "GIT_CONFIG_KEY_1=credential.helper", + "GIT_CONFIG_VALUE_1=" + helper, + }, nil +} + +func isHTTPRemote(rawURL string, scheme string) bool { + switch strings.ToLower(scheme) { + case "http", "https": + return true + } + lower := strings.ToLower(strings.TrimSpace(rawURL)) + return strings.HasPrefix(lower, "http:/") || strings.HasPrefix(lower, "https:/") || + strings.HasPrefix(lower, "http//") || strings.HasPrefix(lower, "https//") || + strings.HasPrefix(lower, "http:") || strings.HasPrefix(lower, "https:") +} + +func isMalformedHTTPUserinfo(rawURL string, u *url.URL) bool { + if !isHTTPRemote(rawURL, u.Scheme) { + return false + } + if u.Host == "" { + return true + } + return strings.HasPrefix(u.Path, "/@") +} + +func remoteHasInlineCredentials(rawURL string) bool { + if strings.ContainsAny(rawURL, "?#") { + return true + } + u, err := url.Parse(rawURL) + if err != nil { + return auth.HasInlineCredentials(rawURL) || rawUserinfoCandidateHasPassword(rawURL) + } + if u.User != nil { + _, hasPassword := u.User.Password() + return isHTTPRemote(rawURL, u.Scheme) || hasPassword + } + return strings.Contains(rawURL, "@") && malformedUserinfoInRemote(rawURL, u) +} + +func malformedUserinfoInRemote(rawURL string, u *url.URL) bool { + if isHTTPRemote(rawURL, u.Scheme) { + return auth.HasInlineCredentials(rawURL) + } + if isMalformedHTTPUserinfo(rawURL, u) { + return true + } + if isHTTPRemote(rawURL, u.Scheme) && strings.Contains(u.Hostname(), ".") { + return false + } + return rawUserinfoCandidateHasPassword(rawURL) +} + +func rawUserinfoCandidateHasPassword(raw string) bool { + prefix := raw + start := -1 + if i := strings.LastIndex(prefix, "://"); i >= 0 { + start = i + len("://") + } else if i := strings.Index(prefix, ":/"); i >= 0 { + start = i + len(":/") + } else if i := strings.Index(prefix, "//"); i >= 0 { + start = i + len("//") + } else if i := strings.Index(prefix, ":"); i >= 0 { + start = i + len(":") + } + if start < 0 || start >= len(raw) { + return false + } + endChars := "?#" + if strings.Contains(raw, "://") { + endChars = "/?#" + } + end := len(raw) + if relEnd := strings.IndexAny(raw[start:], endChars); relEnd >= 0 { + end = start + relEnd + } + at := strings.LastIndex(raw[start:end], "@") + if at < 0 { + return false + } + at += start + return strings.Contains(raw[start:at], ":") +} + +func nonInteractiveGitEnv() []string { + return []string{"GIT_TERMINAL_PROMPT=0", "GIT_SSH_COMMAND=" + sshBatchModeCommand(os.Getenv("GIT_SSH_COMMAND"))} +} + +func sshBatchModeCommand(existing string) string { + existing = strings.TrimSpace(existing) + if existing == "" { + return "ssh -o BatchMode=yes" + } + tokens := splitShellFields(existing) + filtered := make([]string, 0, len(tokens)+2) + for i := 0; i < len(tokens); i++ { + tok := tokens[i] + lower := strings.ToLower(tok) + if lower == "-o" && i+1 < len(tokens) && isBatchModeOption(tokens[i+1]) { + i++ + continue + } + if strings.HasPrefix(lower, "-obatchmode=") { + continue + } + filtered = append(filtered, tok) + } + if len(filtered) == 0 { + filtered = append(filtered, "ssh") + } + filtered = append(filtered, "-o", "BatchMode=yes") + for i, tok := range filtered { + filtered[i] = shellQuote(tok) + } + return strings.Join(filtered, " ") +} + +func splitShellFields(s string) []string { + var fields []string + var b strings.Builder + var quote rune + escaped := false + for _, r := range s { + if escaped { + if r == '$' { + b.WriteString(`\$`) + } else { + b.WriteRune(r) + } + escaped = false + continue + } + switch { + case quote != 0: + if r == quote { + quote = 0 + } else if r == '$' && quote == '\'' { + b.WriteString(`\$`) + } else if r == '\\' && quote == '"' { + escaped = true + } else { + b.WriteRune(r) + } + case r == '\'' || r == '"': + quote = r + case r == '\\': + escaped = true + case r == ' ' || r == '\t' || r == '\n': + if b.Len() > 0 { + fields = append(fields, b.String()) + b.Reset() + } + default: + b.WriteRune(r) + } + } + if escaped { + b.WriteRune('\\') + } + if b.Len() > 0 { + fields = append(fields, b.String()) + } + return fields +} + +func isBatchModeOption(opt string) bool { + parts := strings.Fields(strings.ToLower(strings.TrimSpace(opt))) + if len(parts) == 0 { + return false + } + return parts[0] == "batchmode" || strings.HasPrefix(parts[0], "batchmode=") +} + +func shellQuote(s string) string { + if s == "" { + return "''" + } + if strings.Contains(s, "$") { + return doubleQuote(s) + } + if isShellSafe(s) { + return s + } + return "'" + strings.ReplaceAll(s, "'", "'\\''") + "'" +} + +func doubleQuote(s string) string { + var b strings.Builder + b.WriteByte('"') + for i := 0; i < len(s); i++ { + if s[i] == '\\' && i+1 < len(s) && s[i+1] == '$' { + b.WriteString(`\$`) + i++ + continue + } + switch s[i] { + case '\\', '"', '`': + b.WriteByte('\\') + } + b.WriteByte(s[i]) + } + b.WriteByte('"') + return b.String() +} + +func isShellSafe(s string) bool { + for _, r := range s { + if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') { + continue + } + if strings.ContainsRune("@%_+=:,./-~$", r) { + continue + } + return false + } + return true +} + +func fetchRefTarget(repo model.RepoConfig, ref string) (fetchRefInfo, error) { + ref = strings.TrimSpace(ref) + if ref == "" { + ref = strings.TrimSpace(repo.Branch) + } + if ref == "" { + return fetchRefInfo{}, errors.New("fetch ref is required") + } + if branch := branchName(ref); branch != "" { + return fetchRefInfo{ + sourceRef: "refs/heads/" + branch, + remoteRef: "refs/remotes/origin/" + branch, + branch: branch, + }, nil + } + if strings.HasPrefix(ref, "refs/") { + return fetchRefInfo{sourceRef: ref, remoteRef: fetchedFullRefRemoteTrackingRef}, nil + } + return fetchRefInfo{}, errors.New("fetch ref is required") +} + +func branchName(ref string) string { + ref = strings.TrimSpace(ref) + if strings.HasPrefix(ref, "refs/heads/") { + return strings.TrimPrefix(ref, "refs/heads/") + } + if strings.HasPrefix(ref, "refs/remotes/origin/") { + return strings.TrimPrefix(ref, "refs/remotes/origin/") + } + if strings.HasPrefix(ref, "origin/") { + return strings.TrimPrefix(ref, "origin/") + } + if strings.HasPrefix(ref, "refs/") { + return "" } + return ref } func rootNode(repoID model.RepoID) model.BaseNode { diff --git a/internal/gitstore/gitstore_test.go b/internal/gitstore/gitstore_test.go index b6e7d22..4a283d4 100644 --- a/internal/gitstore/gitstore_test.go +++ b/internal/gitstore/gitstore_test.go @@ -214,38 +214,311 @@ func TestReadTreeHEAD(t *testing.T) { } } -func TestCredentialEnvEscapesSingleQuotes(t *testing.T) { +func TestFetchRefNonInteractiveAndPrepareFetchedBranch(t *testing.T) { t.Parallel() - // Password with a single quote should be escaped - safeURL, env := credentialEnv("https://user:p@ss'word@github.com/org/repo.git") + tmp := t.TempDir() + bare := filepath.Join(tmp, "origin.git") + work := filepath.Join(tmp, "work") + preparedGitDir := filepath.Join(tmp, "prepared.git") + preparedWorktree := filepath.Join(tmp, "prepared") + + run(t, "git", "init", "--bare", bare) + run(t, "git", "clone", bare, work) + run(t, "git", "-C", work, "checkout", "-b", "master") + os.WriteFile(filepath.Join(work, "README.md"), []byte("hello\n"), 0o644) + run(t, "git", "-C", work, "add", "README.md") + run(t, "git", "-C", work, "-c", "user.name=test", "-c", "user.email=test@example.com", "commit", "-m", "init") + run(t, "git", "-C", work, "push", "origin", "master") + + run(t, "git", "init", "--separate-git-dir", preparedGitDir, "--initial-branch", "master", preparedWorktree) + run(t, "git", "-C", preparedWorktree, "remote", "add", "origin", "file://"+bare) + + cfg := model.RepoConfig{ID: "x", Name: "x", GitDir: preparedGitDir, Branch: "master"} + store := New(nil) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := store.ValidatePreparedGitDir(ctx, cfg); err != nil { + t.Fatalf("ValidatePreparedGitDir: %v", err) + } + if err := store.FetchRefNonInteractive(ctx, cfg, "master"); err != nil { + t.Fatalf("FetchRefNonInteractive: %v", err) + } + if err := store.PrepareFetchedBranch(ctx, cfg, "master"); err != nil { + t.Fatalf("PrepareFetchedBranch: %v", err) + } + oid, ref, err := store.ResolveHEAD(ctx, cfg) + if err != nil { + t.Fatalf("ResolveHEAD: %v", err) + } + if ref != "master" { + t.Fatalf("ref = %q, want master", ref) + } + nodes, err := store.BuildTreeIndex(ctx, cfg, oid) + if err != nil { + t.Fatalf("BuildTreeIndex: %v", err) + } + found := false + for _, n := range nodes { + if n.Path == "README.md" { + found = true + } + } + if !found { + t.Fatal("README.md not found in prepared tree") + } +} + +func TestPrepareExistingCloneNonInteractiveUpdatesBranch(t *testing.T) { + t.Parallel() + tmp := t.TempDir() + bare := filepath.Join(tmp, "origin.git") + work := filepath.Join(tmp, "work") + gitDir := filepath.Join(tmp, "repo.git") + + run(t, "git", "init", "--bare", bare) + run(t, "git", "clone", bare, work) + run(t, "git", "-C", work, "checkout", "-b", "master") + os.WriteFile(filepath.Join(work, "README.md"), []byte("master\n"), 0o644) + run(t, "git", "-C", work, "add", "README.md") + run(t, "git", "-C", work, "-c", "user.name=test", "-c", "user.email=test@example.com", "commit", "-m", "master") + run(t, "git", "-C", work, "push", "origin", "master") + run(t, "git", "-C", work, "checkout", "-b", "dev") + os.WriteFile(filepath.Join(work, "DEV.md"), []byte("dev\n"), 0o644) + run(t, "git", "-C", work, "add", "DEV.md") + run(t, "git", "-C", work, "-c", "user.name=test", "-c", "user.email=test@example.com", "commit", "-m", "dev") + run(t, "git", "-C", work, "push", "origin", "dev") + + store := New(nil) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + cfg := model.RepoConfig{ID: "x", Name: "x", GitDir: gitDir, RemoteURL: "file://" + bare, Branch: "master", FetchRef: "master"} + if err := store.CloneBloblessNonInteractive(ctx, cfg); err != nil { + t.Fatalf("CloneBloblessNonInteractive: %v", err) + } + cfg.Branch = "dev" + cfg.FetchRef = "dev" + if err := store.PrepareExistingCloneNonInteractive(ctx, cfg); err != nil { + t.Fatalf("PrepareExistingCloneNonInteractive: %v", err) + } + oid, ref, err := store.ResolveHEAD(ctx, cfg) + if err != nil { + t.Fatalf("ResolveHEAD: %v", err) + } + if ref != "dev" { + t.Fatalf("ref = %q, want dev", ref) + } + nodes, err := store.BuildTreeIndex(ctx, cfg, oid) + if err != nil { + t.Fatalf("BuildTreeIndex: %v", err) + } + found := false + for _, n := range nodes { + if n.Path == "DEV.md" { + found = true + } + } + if !found { + t.Fatal("DEV.md not found after existing clone prepare") + } +} + +func TestPrepareExistingCloneRejectsCredentialedRemoteBeforeSetURL(t *testing.T) { + tmp := t.TempDir() + gitDir := filepath.Join(tmp, "repo.git") + worktree := filepath.Join(tmp, "worktree") + run(t, "git", "init", "--separate-git-dir", gitDir, "--initial-branch", "master", worktree) + run(t, "git", "-C", worktree, "remote", "add", "origin", "https://github.com/org/repo.git") + + bin := filepath.Join(tmp, "bin") + if err := os.Mkdir(bin, 0o755); err != nil { + t.Fatal(err) + } + marker := filepath.Join(tmp, "set-url-invoked") + fakeGit := filepath.Join(bin, "git") + if err := os.WriteFile(fakeGit, []byte("#!/bin/sh\ncase \"$*\" in *\"remote set-url\"*) : > \"$GIT_SET_URL_MARKER\";; esac\nexec /usr/bin/git \"$@\"\n"), 0o755); err != nil { + t.Fatal(err) + } + t.Setenv("GIT_SET_URL_MARKER", marker) + t.Setenv("PATH", bin+string(os.PathListSeparator)+os.Getenv("PATH")) + + store := New(nil) + for _, remote := range []string{ + "ssh:/git:secret@github.com/org/repo.git", + "ssh//git:secret@github.com/org/repo.git", + "ssh:/git:pa/ss@github.com/org/repo.git", + } { + t.Run(remote, func(t *testing.T) { + cfg := model.RepoConfig{ID: "x", Name: "x", GitDir: gitDir, RemoteURL: remote, Branch: "master", FetchRef: "master"} + err := store.PrepareExistingCloneNonInteractive(context.Background(), cfg) + if err == nil { + t.Fatal("expected credentialed remote rejection") + } + if strings.Contains(err.Error(), "secret") { + t.Fatalf("error leaked credential: %v", err) + } + if _, statErr := os.Stat(marker); !errors.Is(statErr, os.ErrNotExist) { + t.Fatal("git remote set-url was invoked before rejecting credentialed remote") + } + }) + } +} + +func TestValidatePreparedGitDirRejectsCredentialedOrigin(t *testing.T) { + t.Parallel() + tmp := t.TempDir() + preparedGitDir := filepath.Join(tmp, "prepared.git") + preparedWorktree := filepath.Join(tmp, "prepared") + run(t, "git", "init", "--separate-git-dir", preparedGitDir, "--initial-branch", "master", preparedWorktree) + run(t, "git", "-C", preparedWorktree, "remote", "add", "origin", "https://ghp_secret@github.com/org/repo.git") + + cfg := model.RepoConfig{ID: "x", Name: "x", GitDir: preparedGitDir, Branch: "master"} + store := New(nil) + err := store.ValidatePreparedGitDir(context.Background(), cfg) + if err == nil { + t.Fatal("expected credentialed origin rejection") + } + if strings.Contains(err.Error(), "ghp_secret") { + t.Fatalf("error leaked origin credential: %v", err) + } +} + +func TestValidatePreparedGitDirRejectsMalformedCredentialedOrigin(t *testing.T) { + t.Parallel() + tmp := t.TempDir() + preparedGitDir := filepath.Join(tmp, "prepared.git") + preparedWorktree := filepath.Join(tmp, "prepared") + run(t, "git", "init", "--separate-git-dir", preparedGitDir, "--initial-branch", "master", preparedWorktree) + run(t, "git", "-C", preparedWorktree, "remote", "add", "origin", "ssh:/git:secret@github.com/org/repo.git") + + cfg := model.RepoConfig{ID: "x", Name: "x", GitDir: preparedGitDir, Branch: "master"} + store := New(nil) + if err := store.ValidatePreparedGitDir(context.Background(), cfg); err == nil { + t.Fatal("expected malformed credentialed origin rejection") + } +} + +func TestValidatePreparedGitDirAllowsAtInHTTPSOriginPath(t *testing.T) { + t.Parallel() + tmp := t.TempDir() + preparedGitDir := filepath.Join(tmp, "prepared.git") + preparedWorktree := filepath.Join(tmp, "prepared") + run(t, "git", "init", "--separate-git-dir", preparedGitDir, "--initial-branch", "master", preparedWorktree) + run(t, "git", "-C", preparedWorktree, "remote", "add", "origin", "https://git.example.com/team/repo@2026.git") + + cfg := model.RepoConfig{ID: "x", Name: "x", GitDir: preparedGitDir, Branch: "master"} + store := New(nil) + if err := store.ValidatePreparedGitDir(context.Background(), cfg); err != nil { + t.Fatalf("ValidatePreparedGitDir: %v", err) + } +} + +func TestFetchRefNonInteractiveFullRefPreparesDetachedHEAD(t *testing.T) { + t.Parallel() + tmp := t.TempDir() + bare := filepath.Join(tmp, "origin.git") + work := filepath.Join(tmp, "work") + preparedGitDir := filepath.Join(tmp, "prepared.git") + preparedWorktree := filepath.Join(tmp, "prepared") + + run(t, "git", "init", "--bare", bare) + run(t, "git", "clone", bare, work) + run(t, "git", "-C", work, "checkout", "-b", "master") + os.WriteFile(filepath.Join(work, "README.md"), []byte("hello\n"), 0o644) + run(t, "git", "-C", work, "add", "README.md") + run(t, "git", "-C", work, "-c", "user.name=test", "-c", "user.email=test@example.com", "commit", "-m", "init") + run(t, "git", "-C", work, "push", "origin", "master") + run(t, "git", "-C", work, "checkout", "-b", "pull-request") + os.WriteFile(filepath.Join(work, "PR.md"), []byte("pull request\n"), 0o644) + run(t, "git", "-C", work, "add", "PR.md") + run(t, "git", "-C", work, "-c", "user.name=test", "-c", "user.email=test@example.com", "commit", "-m", "pr") + run(t, "git", "-C", work, "push", "origin", "HEAD:refs/pull/10/head") + + run(t, "git", "init", "--separate-git-dir", preparedGitDir, "--initial-branch", "master", preparedWorktree) + run(t, "git", "-C", preparedWorktree, "remote", "add", "origin", "file://"+bare) + + cfg := model.RepoConfig{ID: "x", Name: "x", GitDir: preparedGitDir, Branch: "master"} + store := New(nil) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := store.FetchRefNonInteractive(ctx, cfg, "refs/pull/10/head"); err != nil { + t.Fatalf("FetchRefNonInteractive: %v", err) + } + if _, err := runGit(ctx, preparedGitDir, "rev-parse", "--verify", fetchedFullRefRemoteTrackingRef+"^{commit}"); err != nil { + t.Fatalf("expected fetched full ref at safe remote-tracking ref: %v", err) + } + if err := store.PrepareFetchedBranch(ctx, cfg, "refs/pull/10/head"); err != nil { + t.Fatalf("PrepareFetchedBranch: %v", err) + } + oid, ref, err := store.ResolveHEAD(ctx, cfg) + if err != nil { + t.Fatalf("ResolveHEAD: %v", err) + } + if ref != "DETACHED" { + t.Fatalf("ref = %q, want DETACHED", ref) + } + nodes, err := store.BuildTreeIndex(ctx, cfg, oid) + if err != nil { + t.Fatalf("BuildTreeIndex: %v", err) + } + found := false + for _, n := range nodes { + if n.Path == "PR.md" { + found = true + } + } + if !found { + t.Fatal("PR.md not found in prepared tree") + } +} + +func TestCredentialEnvKeepsSecretsOutOfHelperCommand(t *testing.T) { + t.Parallel() + safeURL, env, err := credentialEnv("https://user:p@ss'word@github.com/org/repo.git") + if err != nil { + t.Fatalf("credentialEnv: %v", err) + } if safeURL == "" { t.Fatal("expected non-empty safe URL") } if strings.Contains(safeURL, "p@ss") { t.Fatalf("safe URL should not contain password: %s", safeURL) } - // The credential helper env var should contain escaped quote - found := false + foundHelper := false + foundReset := false + foundPassword := false for _, e := range env { - if val, ok := strings.CutPrefix(e, "GIT_CONFIG_VALUE_0="); ok { - found = true + if e == "GIT_CONFIG_VALUE_0=" { + foundReset = true + } + if val, ok := strings.CutPrefix(e, "GIT_CONFIG_VALUE_1="); ok { + foundHelper = true if strings.Contains(val, "p@ss'word") { - t.Fatalf("unescaped password in helper: %s", val) - } - // Should contain the escaped form - if !strings.Contains(val, `'\''`) { - t.Fatalf("expected escaped single quote in helper, got: %s", val) + t.Fatalf("password leaked in helper command: %s", val) } } + if e == "ARTIFACT_FS_GIT_PASSWORD=p@ss'word" { + foundPassword = true + } } - if !found { - t.Fatal("expected GIT_CONFIG_VALUE_0 in env") + if !foundReset { + t.Fatal("expected empty credential.helper reset") + } + if !foundHelper { + t.Fatal("expected GIT_CONFIG_VALUE_1 in env") + } + if !foundPassword { + t.Fatalf("expected password env var, got %v", env) } } func TestCredentialEnvNoCredentials(t *testing.T) { t.Parallel() - safeURL, env := credentialEnv("https://github.com/org/repo.git") + safeURL, env, err := credentialEnv("https://github.com/org/repo.git") + if err != nil { + t.Fatalf("credentialEnv: %v", err) + } if safeURL != "https://github.com/org/repo.git" { t.Fatalf("expected unchanged URL, got %s", safeURL) } @@ -254,15 +527,390 @@ func TestCredentialEnvNoCredentials(t *testing.T) { } } +func TestCredentialEnvRejectsQueryAndFragment(t *testing.T) { + t.Parallel() + for _, raw := range []string{ + "https://github.com/org/repo.git?access_token=secret", + "https://github.com/org/repo.git#access_token=secret", + "https://github.com/org/repo.git#", + "git@github.com:org/repo.git?access_token=secret", + "git@github.com:org/repo.git#access_token=secret", + } { + t.Run(raw, func(t *testing.T) { + if _, _, err := credentialEnv(raw); err == nil { + t.Fatal("expected query or fragment rejection") + } + }) + } +} + func TestCredentialEnvTokenAsUsername(t *testing.T) { t.Parallel() - safeURL, env := credentialEnv("https://ghp_abc123@github.com/org/repo.git") + safeURL, env, err := credentialEnv("https://ghp_abc123@github.com/org/repo.git") + if err != nil { + t.Fatalf("credentialEnv: %v", err) + } if strings.Contains(safeURL, "ghp_abc123") { t.Fatalf("token should be stripped from safe URL: %s", safeURL) } if len(env) == 0 { t.Fatal("expected credential helper env vars") } + for _, e := range env { + if strings.HasPrefix(e, "GIT_CONFIG_VALUE_1=") && strings.Contains(e, "ghp_abc123") { + t.Fatalf("credential helper command leaked token: %s", e) + } + } +} + +func TestCredentialEnvPreservesSSHUsername(t *testing.T) { + t.Parallel() + safeURL, env, err := credentialEnv("ssh://git@github.com/org/repo.git") + if err != nil { + t.Fatalf("credentialEnv: %v", err) + } + if safeURL != "ssh://git@github.com/org/repo.git" { + t.Fatalf("safe URL = %q, want SSH username preserved", safeURL) + } + if len(env) != 0 { + t.Fatalf("expected no credential helper env for SSH username, got %v", env) + } +} + +func TestCredentialEnvRejectsSSHPassword(t *testing.T) { + t.Parallel() + if _, _, err := credentialEnv("ssh://git:secret@github.com/org/repo.git"); err == nil { + t.Fatal("expected SSH password rejection") + } +} + +func TestCredentialEnvRejectsMalformedSSHPassword(t *testing.T) { + t.Parallel() + for _, raw := range []string{ + "ssh:/git:secret@github.com/org/repo.git", + "ssh:/git:bad%zz@github.com/org/repo.git", + } { + t.Run(raw, func(t *testing.T) { + if _, _, err := credentialEnv(raw); err == nil { + t.Fatal("expected malformed SSH password rejection") + } + }) + } +} + +func TestCloneBloblessRejectsMalformedCredentialURLBeforeGit(t *testing.T) { + tmp := t.TempDir() + bin := filepath.Join(tmp, "bin") + if err := os.Mkdir(bin, 0o755); err != nil { + t.Fatal(err) + } + marker := filepath.Join(tmp, "git-invoked") + fakeGit := filepath.Join(bin, "git") + if err := os.WriteFile(fakeGit, []byte("#!/bin/sh\n: > \"$GIT_INVOKED_MARKER\"\nexit 1\n"), 0o755); err != nil { + t.Fatal(err) + } + t.Setenv("GIT_INVOKED_MARKER", marker) + t.Setenv("PATH", bin+string(os.PathListSeparator)+os.Getenv("PATH")) + + cfg := model.RepoConfig{ + GitDir: filepath.Join(tmp, "repo.git"), + RemoteURL: "https://user:bad%zz@example.com/org/repo.git", + Branch: "main", + } + store := New(nil) + err := store.CloneBlobless(context.Background(), cfg) + if err == nil { + t.Fatal("expected malformed remote URL error") + } + if strings.Contains(err.Error(), "bad%zz") || strings.Contains(err.Error(), "user") { + t.Fatalf("error leaked credential URL: %v", err) + } + if _, statErr := os.Stat(marker); !errors.Is(statErr, os.ErrNotExist) { + t.Fatalf("git was invoked before rejecting malformed URL") + } +} + +func TestCloneBloblessRejectsMalformedHTTPSUserinfoBeforeGit(t *testing.T) { + tmp := t.TempDir() + bin := filepath.Join(tmp, "bin") + if err := os.Mkdir(bin, 0o755); err != nil { + t.Fatal(err) + } + marker := filepath.Join(tmp, "git-invoked") + fakeGit := filepath.Join(bin, "git") + if err := os.WriteFile(fakeGit, []byte("#!/bin/sh\n: > \"$GIT_INVOKED_MARKER\"\nexit 1\n"), 0o755); err != nil { + t.Fatal(err) + } + t.Setenv("GIT_INVOKED_MARKER", marker) + t.Setenv("PATH", bin+string(os.PathListSeparator)+os.Getenv("PATH")) + + cfg := model.RepoConfig{ + GitDir: filepath.Join(tmp, "repo.git"), + RemoteURL: "https:/user:ghp_secret@github.com/org/repo.git", + Branch: "main", + } + store := New(nil) + err := store.CloneBlobless(context.Background(), cfg) + if err == nil { + t.Fatal("expected malformed remote URL error") + } + if strings.Contains(err.Error(), "ghp_secret") || strings.Contains(err.Error(), "user") { + t.Fatalf("error leaked credential URL: %v", err) + } + if _, statErr := os.Stat(marker); !errors.Is(statErr, os.ErrNotExist) { + t.Fatalf("git was invoked before rejecting malformed URL") + } +} + +func TestCloneBloblessRejectsMalformedHTTPParseErrorBeforeGit(t *testing.T) { + tmp := t.TempDir() + bin := filepath.Join(tmp, "bin") + if err := os.Mkdir(bin, 0o755); err != nil { + t.Fatal(err) + } + marker := filepath.Join(tmp, "git-invoked") + fakeGit := filepath.Join(bin, "git") + if err := os.WriteFile(fakeGit, []byte("#!/bin/sh\n: > \"$GIT_INVOKED_MARKER\"\nexit 1\n"), 0o755); err != nil { + t.Fatal(err) + } + t.Setenv("GIT_INVOKED_MARKER", marker) + t.Setenv("PATH", bin+string(os.PathListSeparator)+os.Getenv("PATH")) + + cfg := model.RepoConfig{ + GitDir: filepath.Join(tmp, "repo.git"), + RemoteURL: "https//ghp_secret%zz@github.com/org/repo.git", + Branch: "main", + } + store := New(nil) + err := store.CloneBlobless(context.Background(), cfg) + if err == nil { + t.Fatal("expected malformed remote URL error") + } + if strings.Contains(err.Error(), "ghp_secret") || strings.Contains(err.Error(), "%zz") { + t.Fatalf("error leaked credential URL: %v", err) + } + if _, statErr := os.Stat(marker); !errors.Is(statErr, os.ErrNotExist) { + t.Fatalf("git was invoked before rejecting malformed URL") + } +} + +func TestCloneBloblessRejectsMalformedGitStyleCredentialBeforeGit(t *testing.T) { + tmp := t.TempDir() + bin := filepath.Join(tmp, "bin") + if err := os.Mkdir(bin, 0o755); err != nil { + t.Fatal(err) + } + marker := filepath.Join(tmp, "git-invoked") + fakeGit := filepath.Join(bin, "git") + if err := os.WriteFile(fakeGit, []byte("#!/bin/sh\n: > \"$GIT_INVOKED_MARKER\"\nexit 1\n"), 0o755); err != nil { + t.Fatal(err) + } + t.Setenv("GIT_INVOKED_MARKER", marker) + t.Setenv("PATH", bin+string(os.PathListSeparator)+os.Getenv("PATH")) + + cfg := model.RepoConfig{ + GitDir: filepath.Join(tmp, "repo.git"), + RemoteURL: "git:secret@github.com:org/repo.git", + Branch: "main", + } + store := New(nil) + err := store.CloneBlobless(context.Background(), cfg) + if err == nil { + t.Fatal("expected malformed remote URL error") + } + if strings.Contains(err.Error(), "secret") { + t.Fatalf("error leaked credential URL: %v", err) + } + if _, statErr := os.Stat(marker); !errors.Is(statErr, os.ErrNotExist) { + t.Fatalf("git was invoked before rejecting malformed URL") + } +} + +func TestCloneBloblessRejectsPathSplitHTTPCredentialsBeforeGit(t *testing.T) { + tmp := t.TempDir() + bin := filepath.Join(tmp, "bin") + if err := os.Mkdir(bin, 0o755); err != nil { + t.Fatal(err) + } + marker := filepath.Join(tmp, "git-invoked") + fakeGit := filepath.Join(bin, "git") + if err := os.WriteFile(fakeGit, []byte("#!/bin/sh\n: > \"$GIT_INVOKED_MARKER\"\nexit 1\n"), 0o755); err != nil { + t.Fatal(err) + } + t.Setenv("GIT_INVOKED_MARKER", marker) + t.Setenv("PATH", bin+string(os.PathListSeparator)+os.Getenv("PATH")) + + cfg := model.RepoConfig{ + GitDir: filepath.Join(tmp, "repo.git"), + RemoteURL: "https://user:123/ss@example.com/org/repo.git", + Branch: "main", + } + store := New(nil) + err := store.CloneBlobless(context.Background(), cfg) + if err == nil { + t.Fatal("expected malformed remote URL error") + } + if strings.Contains(err.Error(), "123") || strings.Contains(err.Error(), "ss") { + t.Fatalf("error leaked credential URL: %v", err) + } + if _, statErr := os.Stat(marker); !errors.Is(statErr, os.ErrNotExist) { + t.Fatalf("git was invoked before rejecting malformed URL") + } +} + +func TestCredentialEnvRejectsHTTPSLikeUserinfoTypos(t *testing.T) { + t.Parallel() + for _, raw := range []string{ + "https:/user:ghp_secret@github.com/org/repo.git", + "https//ghp_secret@github.com/org/repo.git", + } { + t.Run(raw, func(t *testing.T) { + if _, _, err := credentialEnv(raw); err == nil { + t.Fatal("expected malformed HTTP-like remote rejection") + } + }) + } +} + +func TestCredentialEnvAllowsAtInHTTPSPath(t *testing.T) { + t.Parallel() + safeURL, env, err := credentialEnv("https://git.example.com/team/repo:v1@2026.git") + if err != nil { + t.Fatalf("credentialEnv: %v", err) + } + if safeURL != "https://git.example.com/team/repo:v1@2026.git" { + t.Fatalf("safe URL = %q", safeURL) + } + if len(env) != 0 { + t.Fatalf("expected no credential helper env, got %v", env) + } +} + +func TestNonInteractiveGitEnvForcesSSHBatchMode(t *testing.T) { + t.Setenv("GIT_SSH_COMMAND", "ssh -o BatchMode=no -i /secrets/deploy_key -o IdentitiesOnly=yes") + env := nonInteractiveGitEnv() + for _, e := range env { + if strings.HasPrefix(e, "GIT_SSH_COMMAND=") { + if !strings.Contains(e, "-i /secrets/deploy_key") { + t.Fatalf("expected existing identity option to be preserved, got %q", e) + } + if strings.Contains(e, "BatchMode=no") { + t.Fatalf("expected existing BatchMode option to be replaced, got %q", e) + } + if strings.Contains(e, "BatchMode=yes") { + return + } + break + } + } + t.Fatalf("expected forced GIT_SSH_COMMAND, got %v", env) +} + +func TestNonInteractiveGitEnvDefaultSSHBatchMode(t *testing.T) { + t.Setenv("GIT_SSH_COMMAND", "") + env := nonInteractiveGitEnv() + for _, e := range env { + if e == "GIT_SSH_COMMAND=ssh -o BatchMode=yes" { + return + } + } + t.Fatalf("expected forced GIT_SSH_COMMAND, got %v", env) +} + +func TestNonInteractiveGitEnvStripsQuotedBatchMode(t *testing.T) { + for _, command := range []string{ + `ssh -o "BatchMode=no" -i /secrets/deploy_key`, + `ssh -o BatchMode="no" -i /secrets/deploy_key`, + `ssh -o "BatchMode"=no -i /secrets/deploy_key`, + `ssh -o 'BatchMode no' -i /secrets/deploy_key`, + `ssh '-o' 'BatchMode=no' -i /secrets/deploy_key`, + `ssh -oBatchMode="no" -i /secrets/deploy_key`, + } { + t.Run(command, func(t *testing.T) { + t.Setenv("GIT_SSH_COMMAND", command) + env := nonInteractiveGitEnv() + for _, e := range env { + if strings.HasPrefix(e, "GIT_SSH_COMMAND=") { + if strings.Contains(e, "BatchMode=no") || strings.Contains(e, `BatchMode="no"`) { + t.Fatalf("expected quoted BatchMode option to be replaced, got %q", e) + } + if !strings.Contains(e, "-i /secrets/deploy_key") || !strings.Contains(e, "BatchMode=yes") { + t.Fatalf("expected identity and BatchMode=yes, got %q", e) + } + return + } + } + t.Fatalf("expected forced GIT_SSH_COMMAND, got %v", env) + }) + } +} + +func TestNonInteractiveGitEnvPreservesProxyCommand(t *testing.T) { + t.Setenv("GIT_SSH_COMMAND", "ssh -o ProxyCommand='ssh -o BatchMode=no bastion' -i /secrets/deploy_key") + env := nonInteractiveGitEnv() + for _, e := range env { + if strings.HasPrefix(e, "GIT_SSH_COMMAND=") { + if !strings.Contains(e, "ProxyCommand=ssh -o BatchMode=no bastion") { + t.Fatalf("expected ProxyCommand to be preserved, got %q", e) + } + if !strings.Contains(e, "BatchMode=yes") { + t.Fatalf("expected top-level BatchMode=yes, got %q", e) + } + return + } + } + t.Fatalf("expected forced GIT_SSH_COMMAND, got %v", env) +} + +func TestNonInteractiveGitEnvQuotesShellMetacharacters(t *testing.T) { + t.Setenv("GIT_SSH_COMMAND", `ssh -i /tmp/key\ prod -o UserKnownHostsFile=/tmp/known\ hosts`) + env := nonInteractiveGitEnv() + for _, e := range env { + if strings.HasPrefix(e, "GIT_SSH_COMMAND=") { + if !strings.Contains(e, "'/tmp/key prod'") || !strings.Contains(e, "'UserKnownHostsFile=/tmp/known hosts'") { + t.Fatalf("expected escaped shell paths to be quoted, got %q", e) + } + if !strings.Contains(e, "BatchMode=yes") { + t.Fatalf("expected BatchMode=yes, got %q", e) + } + return + } + } + t.Fatalf("expected forced GIT_SSH_COMMAND, got %v", env) +} + +func TestNonInteractiveGitEnvPreservesShellExpansion(t *testing.T) { + t.Setenv("GIT_SSH_COMMAND", `ssh -i "$HOME/.ssh/deploy key"`) + env := nonInteractiveGitEnv() + for _, e := range env { + if strings.HasPrefix(e, "GIT_SSH_COMMAND=") { + if !strings.Contains(e, "$HOME/.ssh/deploy key") { + t.Fatalf("expected HOME expansion to be preserved, got %q", e) + } + if strings.Contains(e, "'$HOME/.ssh/deploy key'") { + t.Fatalf("expected HOME expansion not to be single-quoted, got %q", e) + } + return + } + } + t.Fatalf("expected forced GIT_SSH_COMMAND, got %v", env) +} + +func TestNonInteractiveGitEnvPreservesEscapedDollar(t *testing.T) { + t.Setenv("GIT_SSH_COMMAND", `ssh -i '/tmp/key$prod dir'`) + env := nonInteractiveGitEnv() + for _, e := range env { + if strings.HasPrefix(e, "GIT_SSH_COMMAND=") { + if !strings.Contains(e, `/tmp/key\$prod dir`) { + t.Fatalf("expected escaped dollar to be preserved, got %q", e) + } + if strings.Contains(e, `/tmp/key$prod dir`) { + t.Fatalf("expected literal dollar not to become expandable, got %q", e) + } + return + } + } + t.Fatalf("expected forced GIT_SSH_COMMAND, got %v", env) } func TestSetBatchPoolSizeUpdatesExistingAndNewPools(t *testing.T) { diff --git a/internal/model/types.go b/internal/model/types.go index 7897453..7073e96 100644 --- a/internal/model/types.go +++ b/internal/model/types.go @@ -28,6 +28,10 @@ type RepoConfig struct { MetaDBPath string OverlayDBPath string Enabled bool + PreparedGitDir bool + FetchRef string + PrepareState string + PrepareError string } type RepoRuntimeState struct { @@ -44,8 +48,15 @@ type RepoRuntimeState struct { HydratedBlobBytes int64 DirtyOverlay bool State string + PrepareError string } +const ( + PrepareStatePreparing = "preparing" + PrepareStateReady = "ready" + PrepareStateFailed = "failed" +) + // BaseNode represents a tracked entry from the git tree. Inode IDs are assigned // at runtime by the FUSE layer (monotonic allocation, like tigrisfs). type BaseNode struct { @@ -143,7 +154,9 @@ type Registry interface { type GitStore interface { CloneBlobless(ctx context.Context, cfg RepoConfig) error + CloneBloblessNonInteractive(ctx context.Context, cfg RepoConfig) error Fetch(ctx context.Context, repo RepoConfig) error + FetchRefNonInteractive(ctx context.Context, repo RepoConfig, ref string) error ResolveHEAD(ctx context.Context, repo RepoConfig) (oid string, ref string, err error) BuildTreeIndex(ctx context.Context, repo RepoConfig, headOID string) ([]BaseNode, error) BlobToCache(ctx context.Context, repo RepoConfig, objectOID string, dstPath string) (size int64, err error) @@ -151,6 +164,8 @@ type GitStore interface { ComputeAheadBehind(ctx context.Context, repo RepoConfig) (ahead int, behind int, diverged bool, err error) CommitTimestamp(ctx context.Context, repo RepoConfig, oid string) (int64, error) ReadTreeHEAD(ctx context.Context, repo RepoConfig) error + PrepareFetchedBranch(ctx context.Context, repo RepoConfig, ref string) error + ValidatePreparedGitDir(ctx context.Context, repo RepoConfig) error } type SnapshotStore interface { diff --git a/internal/registry/registry.go b/internal/registry/registry.go index e07e766..6ff1b39 100644 --- a/internal/registry/registry.go +++ b/internal/registry/registry.go @@ -11,12 +11,15 @@ import ( "github.com/cloudflare/artifact-fs/internal/model" ) +var ErrRepoChanged = errors.New("repo config changed") + var migrations = []string{ `CREATE TABLE IF NOT EXISTS repos ( repo_id TEXT PRIMARY KEY, name TEXT NOT NULL UNIQUE, mount_root TEXT NOT NULL, mount_path TEXT NOT NULL, + remote_url TEXT NOT NULL DEFAULT '', remote_url_redacted TEXT NOT NULL, remote_url_secret_ref TEXT, branch TEXT NOT NULL, @@ -27,6 +30,10 @@ var migrations = []string{ meta_db_path TEXT NOT NULL, overlay_db_path TEXT NOT NULL, enabled INTEGER NOT NULL DEFAULT 1, + prepared_gitdir INTEGER NOT NULL DEFAULT 0, + fetch_ref TEXT NOT NULL DEFAULT '', + prepare_state TEXT NOT NULL DEFAULT '', + prepare_error TEXT NOT NULL DEFAULT '', created_at_ns INTEGER NOT NULL, updated_at_ns INTEGER NOT NULL );`, @@ -44,6 +51,10 @@ func New(ctx context.Context, dbPath string) (*Store, error) { if err := meta.ExecMigrations(ctx, db, migrations); err != nil { return nil, err } + if err := ensureRepoColumns(ctx, db); err != nil { + db.Close() + return nil, err + } return &Store{db: db}, nil } @@ -52,12 +63,13 @@ func (s *Store) Close() error { return s.db.Close() } func (s *Store) AddRepo(ctx context.Context, cfg model.RepoConfig) error { now := time.Now().UnixNano() _, err := s.db.ExecContext(ctx, ` - INSERT INTO repos (repo_id, name, mount_root, mount_path, remote_url_redacted, branch, refresh_interval_seconds, git_dir, overlay_dir, blob_cache_dir, meta_db_path, overlay_db_path, enabled, created_at_ns, updated_at_ns) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + INSERT INTO repos (repo_id, name, mount_root, mount_path, remote_url, remote_url_redacted, branch, refresh_interval_seconds, git_dir, overlay_dir, blob_cache_dir, meta_db_path, overlay_db_path, enabled, prepared_gitdir, fetch_ref, prepare_state, prepare_error, created_at_ns, updated_at_ns) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(repo_id) DO UPDATE SET name=excluded.name, mount_root=excluded.mount_root, mount_path=excluded.mount_path, + remote_url=excluded.remote_url, remote_url_redacted=excluded.remote_url_redacted, branch=excluded.branch, refresh_interval_seconds=excluded.refresh_interval_seconds, @@ -67,23 +79,65 @@ func (s *Store) AddRepo(ctx context.Context, cfg model.RepoConfig) error { meta_db_path=excluded.meta_db_path, overlay_db_path=excluded.overlay_db_path, enabled=excluded.enabled, + prepared_gitdir=excluded.prepared_gitdir, + fetch_ref=excluded.fetch_ref, + prepare_state=excluded.prepare_state, + prepare_error=excluded.prepare_error, updated_at_ns=excluded.updated_at_ns - `, string(cfg.ID), cfg.Name, cfg.MountRoot, cfg.MountPath, cfg.RemoteURLRedacted, cfg.Branch, int64(cfg.RefreshInterval.Seconds()), cfg.GitDir, cfg.OverlayDir, cfg.BlobCacheDir, cfg.MetaDBPath, cfg.OverlayDBPath, boolToInt(cfg.Enabled), now, now) + `, string(cfg.ID), cfg.Name, cfg.MountRoot, cfg.MountPath, cfg.RemoteURL, cfg.RemoteURLRedacted, cfg.Branch, int64(cfg.RefreshInterval.Seconds()), cfg.GitDir, cfg.OverlayDir, cfg.BlobCacheDir, cfg.MetaDBPath, cfg.OverlayDBPath, boolToInt(cfg.Enabled), boolToInt(cfg.PreparedGitDir), cfg.FetchRef, cfg.PrepareState, cfg.PrepareError, now, now) + return err +} + +func (s *Store) UpdatePrepareState(ctx context.Context, repoID model.RepoID, state string, prepareErr string) error { + _, err := s.db.ExecContext(ctx, ` + UPDATE repos + SET prepare_state=?, prepare_error=?, updated_at_ns=? + WHERE repo_id=? + `, state, prepareErr, time.Now().UnixNano(), string(repoID)) return err } +func (s *Store) UpdatePrepareStateForConfig(ctx context.Context, cfg model.RepoConfig, state string, prepareErr string) error { + res, err := s.db.ExecContext(ctx, ` + UPDATE repos + SET prepare_state=?, prepare_error=?, updated_at_ns=? + WHERE repo_id=? + AND branch=? + AND remote_url=? + AND prepared_gitdir=? + AND fetch_ref=? + AND git_dir=? + AND overlay_dir=? + AND blob_cache_dir=? + AND meta_db_path=? + AND overlay_db_path=? + AND mount_path=? + `, state, prepareErr, time.Now().UnixNano(), string(cfg.ID), cfg.Branch, cfg.RemoteURL, boolToInt(cfg.PreparedGitDir), cfg.FetchRef, cfg.GitDir, cfg.OverlayDir, cfg.BlobCacheDir, cfg.MetaDBPath, cfg.OverlayDBPath, cfg.MountPath) + if err != nil { + return err + } + rows, err := res.RowsAffected() + if err != nil { + return err + } + if rows == 0 { + return ErrRepoChanged + } + return nil +} + func (s *Store) RemoveRepo(ctx context.Context, name string) error { _, err := s.db.ExecContext(ctx, `DELETE FROM repos WHERE name=?`, name) return err } func (s *Store) GetRepo(ctx context.Context, name string) (model.RepoConfig, error) { - row := s.db.QueryRowContext(ctx, `SELECT repo_id, name, mount_root, mount_path, remote_url_redacted, branch, refresh_interval_seconds, git_dir, overlay_dir, blob_cache_dir, meta_db_path, overlay_db_path, enabled FROM repos WHERE name=?`, name) + row := s.db.QueryRowContext(ctx, `SELECT repo_id, name, mount_root, mount_path, remote_url, remote_url_redacted, branch, refresh_interval_seconds, git_dir, overlay_dir, blob_cache_dir, meta_db_path, overlay_db_path, enabled, prepared_gitdir, fetch_ref, prepare_state, prepare_error FROM repos WHERE name=?`, name) return scanRepo(row) } func (s *Store) ListRepos(ctx context.Context) ([]model.RepoConfig, error) { - rows, err := s.db.QueryContext(ctx, `SELECT repo_id, name, mount_root, mount_path, remote_url_redacted, branch, refresh_interval_seconds, git_dir, overlay_dir, blob_cache_dir, meta_db_path, overlay_db_path, enabled FROM repos ORDER BY name`) + rows, err := s.db.QueryContext(ctx, `SELECT repo_id, name, mount_root, mount_path, remote_url, remote_url_redacted, branch, refresh_interval_seconds, git_dir, overlay_dir, blob_cache_dir, meta_db_path, overlay_db_path, enabled, prepared_gitdir, fetch_ref, prepare_state, prepare_error FROM repos ORDER BY name`) if err != nil { return nil, err } @@ -107,7 +161,8 @@ func scanRepo(s scanner) (model.RepoConfig, error) { var cfg model.RepoConfig var refresh int64 var enabled int - if err := s.Scan(&cfg.ID, &cfg.Name, &cfg.MountRoot, &cfg.MountPath, &cfg.RemoteURLRedacted, &cfg.Branch, &refresh, &cfg.GitDir, &cfg.OverlayDir, &cfg.BlobCacheDir, &cfg.MetaDBPath, &cfg.OverlayDBPath, &enabled); err != nil { + var preparedGitDir int + if err := s.Scan(&cfg.ID, &cfg.Name, &cfg.MountRoot, &cfg.MountPath, &cfg.RemoteURL, &cfg.RemoteURLRedacted, &cfg.Branch, &refresh, &cfg.GitDir, &cfg.OverlayDir, &cfg.BlobCacheDir, &cfg.MetaDBPath, &cfg.OverlayDBPath, &enabled, &preparedGitDir, &cfg.FetchRef, &cfg.PrepareState, &cfg.PrepareError); err != nil { if errors.Is(err, sql.ErrNoRows) { return cfg, fmt.Errorf("repo not found") } @@ -115,6 +170,7 @@ func scanRepo(s scanner) (model.RepoConfig, error) { } cfg.RefreshInterval = time.Duration(refresh) * time.Second cfg.Enabled = enabled == 1 + cfg.PreparedGitDir = preparedGitDir == 1 return cfg, nil } @@ -124,3 +180,43 @@ func boolToInt(v bool) int { } return 0 } + +func ensureRepoColumns(ctx context.Context, db *sql.DB) error { + rows, err := db.QueryContext(ctx, `PRAGMA table_info(repos)`) + if err != nil { + return err + } + defer rows.Close() + cols := map[string]bool{} + for rows.Next() { + var cid int + var name string + var typ string + var notNull int + var defaultValue any + var pk int + if err := rows.Scan(&cid, &name, &typ, ¬Null, &defaultValue, &pk); err != nil { + return err + } + cols[name] = true + } + if err := rows.Err(); err != nil { + return err + } + add := map[string]string{ + "remote_url": `TEXT NOT NULL DEFAULT ''`, + "prepared_gitdir": `INTEGER NOT NULL DEFAULT 0`, + "fetch_ref": `TEXT NOT NULL DEFAULT ''`, + "prepare_state": `TEXT NOT NULL DEFAULT ''`, + "prepare_error": `TEXT NOT NULL DEFAULT ''`, + } + for name, ddl := range add { + if cols[name] { + continue + } + if _, err := db.ExecContext(ctx, fmt.Sprintf(`ALTER TABLE repos ADD COLUMN %s %s`, name, ddl)); err != nil { + return err + } + } + return nil +} diff --git a/internal/registry/registry_test.go b/internal/registry/registry_test.go new file mode 100644 index 0000000..5295361 --- /dev/null +++ b/internal/registry/registry_test.go @@ -0,0 +1,62 @@ +package registry + +import ( + "context" + "path/filepath" + "testing" + "time" + + "github.com/cloudflare/artifact-fs/internal/model" +) + +func TestRepoPrepareFieldsRoundTrip(t *testing.T) { + ctx := context.Background() + store, err := New(ctx, filepath.Join(t.TempDir(), "repos.sqlite")) + if err != nil { + t.Fatal(err) + } + defer store.Close() + + cfg := model.RepoConfig{ + ID: "repo", + Name: "repo", + MountRoot: "/mnt", + MountPath: "/mnt/repo", + RemoteURL: "https://github.com/example/repo.git", + RemoteURLRedacted: "https://github.com/example/repo.git", + Branch: "master", + RefreshInterval: time.Minute, + GitDir: "/git/repo", + OverlayDir: "/overlay/repo", + BlobCacheDir: "/cache/repo", + MetaDBPath: "/meta/repo.sqlite", + OverlayDBPath: "/overlay/repo/meta.sqlite", + Enabled: true, + PreparedGitDir: true, + FetchRef: "master", + PrepareState: model.PrepareStatePreparing, + } + if err := store.AddRepo(ctx, cfg); err != nil { + t.Fatal(err) + } + if err := store.UpdatePrepareState(ctx, cfg.ID, model.PrepareStateFailed, "clone failed"); err != nil { + t.Fatal(err) + } + + got, err := store.GetRepo(ctx, "repo") + if err != nil { + t.Fatal(err) + } + if !got.PreparedGitDir { + t.Fatal("PreparedGitDir = false, want true") + } + if got.FetchRef != "master" { + t.Fatalf("FetchRef = %q, want master", got.FetchRef) + } + if got.PrepareState != model.PrepareStateFailed { + t.Fatalf("PrepareState = %q, want failed", got.PrepareState) + } + if got.PrepareError != "clone failed" { + t.Fatalf("PrepareError = %q, want clone failed", got.PrepareError) + } +}