From 36ce331e6695140062df5a1e3081cc6a911f1fe4 Mon Sep 17 00:00:00 2001 From: Matt Silverlock Date: Sat, 13 Jun 2026 10:40:55 -0400 Subject: [PATCH] fix unknown-size file attrs --- internal/fusefs/fuse_unix.go | 45 +++++++- internal/fusefs/fuse_unix_test.go | 175 ++++++++++++++++++++++++++++++ internal/gitstore/gitstore.go | 27 +++-- 3 files changed, 234 insertions(+), 13 deletions(-) diff --git a/internal/fusefs/fuse_unix.go b/internal/fusefs/fuse_unix.go index ef1b633..c57974d 100644 --- a/internal/fusefs/fuse_unix.go +++ b/internal/fusefs/fuse_unix.go @@ -163,7 +163,7 @@ func (fs *ArtifactFuse) StatFS(_ context.Context, op *fuseops.StatFSOp) error { return nil } -func (fs *ArtifactFuse) LookUpInode(_ context.Context, op *fuseops.LookUpInodeOp) error { +func (fs *ArtifactFuse) LookUpInode(ctx context.Context, op *fuseops.LookUpInodeOp) error { parent, err := fs.requireInode(op.Parent, syscall.ENOENT) if err != nil { return err @@ -182,7 +182,7 @@ func (fs *ArtifactFuse) LookUpInode(_ context.Context, op *fuseops.LookUpInodeOp return nil } - mode, size, typ, mtime, ctime, err := fs.resolver.Getattr(childPath) + mode, size, typ, mtime, ctime, err := fs.resolveAttrs(ctx, childPath) if err != nil { if errors.Is(err, iofs.ErrNotExist) { return syscall.ENOENT @@ -200,7 +200,7 @@ func (fs *ArtifactFuse) LookUpInode(_ context.Context, op *fuseops.LookUpInodeOp return nil } -func (fs *ArtifactFuse) GetInodeAttributes(_ context.Context, op *fuseops.GetInodeAttributesOp) error { +func (fs *ArtifactFuse) GetInodeAttributes(ctx context.Context, op *fuseops.GetInodeAttributesOp) error { ref, err := fs.requireInode(op.Inode, syscall.ESTALE) if err != nil { return err @@ -212,15 +212,50 @@ func (fs *ArtifactFuse) GetInodeAttributes(_ context.Context, op *fuseops.GetIno return nil } - mode, size, typ, mtime, ctime, err := fs.resolver.Getattr(ref.Path) + mode, size, typ, mtime, ctime, err := fs.resolveAttrs(ctx, ref.Path) if err != nil { - return syscall.ENOENT + if errors.Is(err, iofs.ErrNotExist) { + return syscall.ENOENT + } + return syscall.EIO } op.Attributes = inodeAttrs(mode, uint64(size), typ, mtime, ctime) op.AttributesExpiration = attrExpiry(time.Second) return nil } +func (fs *ArtifactFuse) resolveAttrs(ctx context.Context, path string) (mode uint32, size int64, nodeType string, mtime time.Time, ctime time.Time, err error) { + n, err := fs.resolver.ResolvePath(path) + if err != nil { + return 0, 0, "", time.Time{}, time.Time{}, err + } + if n.FromOverlay { + typ := n.Overlay.NodeType() + mt := time.Unix(0, n.Overlay.MtimeUnixNs) + ct := time.Unix(0, n.Overlay.CtimeUnixNs) + return n.Overlay.Mode, n.Overlay.SizeBytes, typ, mt, ct, nil + } + + mode = normalizeMode(n.Base.Mode, n.Base.Type) + size = n.Base.SizeBytes + if n.Base.Type == "file" && n.Base.SizeState != "known" && n.Base.ObjectOID != "" { + _, hydratedSize, hErr := fs.engine.Hydrator.EnsureHydrated(ctx, fs.repo, n.Base) + if hErr != nil { + return 0, 0, "", time.Time{}, time.Time{}, hErr + } + size = hydratedSize + } + + // Base files use the HEAD commit timestamp for mtime so tools like + // make see a stable, meaningful value. + ct := fs.resolver.CommitTime() + if ct == 0 { + ct = fs.resolver.Generation() // fallback: commit time unavailable + } + mt := time.Unix(ct, 0) + return mode, size, n.Base.Type, mt, mt, nil +} + func (fs *ArtifactFuse) SetInodeAttributes(ctx context.Context, op *fuseops.SetInodeAttributesOp) error { ref, err := fs.requireInode(op.Inode, syscall.ESTALE) if err != nil { diff --git a/internal/fusefs/fuse_unix_test.go b/internal/fusefs/fuse_unix_test.go index 4df7438..077eec3 100644 --- a/internal/fusefs/fuse_unix_test.go +++ b/internal/fusefs/fuse_unix_test.go @@ -3,8 +3,14 @@ package fusefs import ( + "context" + "errors" + "syscall" "testing" "time" + + "github.com/cloudflare/artifact-fs/internal/model" + "github.com/jacobsa/fuse/fuseops" ) func TestInodeAttrsPreservesSeparateTimes(t *testing.T) { @@ -46,3 +52,172 @@ func TestGitFileAttrsUsesOneTimestamp(t *testing.T) { t.Fatalf("expected .git attrs to use one timestamp: atime=%v mtime=%v ctime=%v", attr.Atime, attr.Mtime, attr.Ctime) } } + +func TestLookUpInodeHydratesUnknownSizeBaseFileAttributes(t *testing.T) { + repo := model.RepoConfig{ID: "repo"} + base := model.BaseNode{ + RepoID: repo.ID, + Path: "file.txt", + Type: "file", + Mode: 0o100644, + ObjectOID: "blob", + SizeState: "unknown", + } + r := newResolver( + &fakeSnapshot{nodes: map[string]model.BaseNode{"file.txt": base}}, + &fakeOverlay{entries: map[string]model.OverlayEntry{}}, + ) + h := &fakeLookupHydrator{size: 12} + fs := NewArtifactFuse(repo, r, &Engine{Resolver: r, Repo: repo, Hydrator: h}) + op := &fuseops.LookUpInodeOp{Parent: fuseops.RootInodeID, Name: "file.txt"} + + if err := fs.LookUpInode(context.Background(), op); err != nil { + t.Fatalf("LookUpInode: %v", err) + } + if op.Entry.Attributes.Size != uint64(h.size) { + t.Fatalf("lookup size = %d, want hydrated size %d", op.Entry.Attributes.Size, h.size) + } + if h.calls != 1 { + t.Fatalf("EnsureHydrated calls = %d, want 1", h.calls) + } +} + +func TestGetInodeAttributesHydratesUnknownSizeBaseFileAttributes(t *testing.T) { + repo := model.RepoConfig{ID: "repo"} + base := model.BaseNode{ + RepoID: repo.ID, + Path: "file.txt", + Type: "file", + Mode: 0o100644, + ObjectOID: "blob", + SizeState: "unknown", + } + r := newResolver( + &fakeSnapshot{nodes: map[string]model.BaseNode{"file.txt": base}}, + &fakeOverlay{entries: map[string]model.OverlayEntry{}}, + ) + h := &fakeLookupHydrator{size: 12} + fs := NewArtifactFuse(repo, r, &Engine{Resolver: r, Repo: repo, Hydrator: h}) + lookup := &fuseops.LookUpInodeOp{Parent: fuseops.RootInodeID, Name: "file.txt"} + if err := fs.LookUpInode(context.Background(), lookup); err != nil { + t.Fatalf("LookUpInode: %v", err) + } + h.calls = 0 + + op := &fuseops.GetInodeAttributesOp{Inode: lookup.Entry.Child} + if err := fs.GetInodeAttributes(context.Background(), op); err != nil { + t.Fatalf("GetInodeAttributes: %v", err) + } + if op.Attributes.Size != uint64(h.size) { + t.Fatalf("getattr size = %d, want hydrated size %d", op.Attributes.Size, h.size) + } + if h.calls != 1 { + t.Fatalf("EnsureHydrated calls = %d, want 1", h.calls) + } +} + +func TestGetInodeAttributesHydrationFailureReturnsEIO(t *testing.T) { + repo := model.RepoConfig{ID: "repo"} + base := model.BaseNode{ + RepoID: repo.ID, + Path: "file.txt", + Type: "file", + Mode: 0o100644, + ObjectOID: "blob", + SizeState: "unknown", + } + r := newResolver( + &fakeSnapshot{nodes: map[string]model.BaseNode{"file.txt": base}}, + &fakeOverlay{entries: map[string]model.OverlayEntry{}}, + ) + h := &fakeLookupHydrator{size: 12} + fs := NewArtifactFuse(repo, r, &Engine{Resolver: r, Repo: repo, Hydrator: h}) + lookup := &fuseops.LookUpInodeOp{Parent: fuseops.RootInodeID, Name: "file.txt"} + if err := fs.LookUpInode(context.Background(), lookup); err != nil { + t.Fatalf("LookUpInode: %v", err) + } + h.err = errors.New("hydrate failed") + + op := &fuseops.GetInodeAttributesOp{Inode: lookup.Entry.Child} + if err := fs.GetInodeAttributes(context.Background(), op); err != syscall.EIO { + t.Fatalf("GetInodeAttributes err = %v, want EIO", err) + } +} + +func TestLookUpInodeDoesNotHydrateKnownOverlayDirOrSymlinkAttributes(t *testing.T) { + repo := model.RepoConfig{ID: "repo"} + tests := []struct { + name string + base model.BaseNode + overlay map[string]model.OverlayEntry + want uint64 + }{ + { + name: "known base file", + base: model.BaseNode{RepoID: repo.ID, Path: "file.txt", Type: "file", Mode: 0o100644, ObjectOID: "blob", SizeState: "known", SizeBytes: 0}, + want: 0, + }, + { + name: "overlay file", + base: model.BaseNode{RepoID: repo.ID, Path: "file.txt", Type: "file", Mode: 0o100644, ObjectOID: "blob", SizeState: "unknown"}, + overlay: map[string]model.OverlayEntry{ + "file.txt": {Path: "file.txt", Kind: model.OverlayKindModify, Mode: 0o644, SizeBytes: 3}, + }, + want: 3, + }, + { + name: "base dir", + base: model.BaseNode{RepoID: repo.ID, Path: "file.txt", Type: "dir", Mode: 0o40000, ObjectOID: "tree", SizeState: "unknown"}, + want: 4096, + }, + { + name: "base symlink", + base: model.BaseNode{RepoID: repo.ID, Path: "file.txt", Type: "symlink", Mode: 0o120000, ObjectOID: "blob", SizeState: "unknown"}, + want: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := newResolver( + &fakeSnapshot{nodes: map[string]model.BaseNode{"file.txt": tt.base}}, + &fakeOverlay{entries: tt.overlay}, + ) + h := &fakeLookupHydrator{size: 12} + fs := NewArtifactFuse(repo, r, &Engine{Resolver: r, Repo: repo, Hydrator: h}) + op := &fuseops.LookUpInodeOp{Parent: fuseops.RootInodeID, Name: "file.txt"} + + if err := fs.LookUpInode(context.Background(), op); err != nil { + t.Fatalf("LookUpInode: %v", err) + } + if op.Entry.Attributes.Size != tt.want { + t.Fatalf("lookup size = %d, want %d", op.Entry.Attributes.Size, tt.want) + } + if h.calls != 0 { + t.Fatalf("EnsureHydrated calls = %d, want 0", h.calls) + } + }) + } +} + +type fakeLookupHydrator struct { + size int64 + calls int + err error +} + +func (f *fakeLookupHydrator) Enqueue(model.HydrationTask) {} + +func (f *fakeLookupHydrator) EnsureHydrated(_ context.Context, _ model.RepoConfig, _ model.BaseNode) (string, int64, error) { + f.calls++ + if f.err != nil { + return "", 0, f.err + } + return "", f.size, nil +} + +func (f *fakeLookupHydrator) ReadBlob(_ context.Context, _ model.RepoConfig, _ model.BaseNode, _ int64) ([]byte, error) { + return nil, nil +} + +func (f *fakeLookupHydrator) QueueDepth(model.RepoID) int { return 0 } diff --git a/internal/gitstore/gitstore.go b/internal/gitstore/gitstore.go index 5c4418f..e4d6a23 100644 --- a/internal/gitstore/gitstore.go +++ b/internal/gitstore/gitstore.go @@ -171,8 +171,8 @@ func (s *Store) BuildTreeIndex(ctx context.Context, repo model.RepoConfig, headO // pack metadata and doesn't trigger network fetches on blobless clones. if err := s.batchResolveSizes(ctx, repo, nodes, blobOIDs, blobIndex); err != nil { // Non-fatal: sizes remain "unknown" and reads will still work via - // hydration. Log so operators can diagnose size=0 issues. - s.logger.Warn("batch size resolution failed, files will show size 0 until hydrated", "repo", repo.Name, "error", err) + // hydration. Log so operators can diagnose unexpected attr hydration. + s.logger.Warn("batch size resolution failed, some file sizes will resolve on demand", "repo", repo.Name, "error", err) } return addImplicitDirs(repo.ID, nodes), nil } @@ -198,13 +198,15 @@ func (s *Store) batchResolveSizes(ctx context.Context, repo model.RepoConfig, no if err := cmd.Start(); err != nil { return err } + var writeErr error for _, oid := range oids { - fmt.Fprintln(stdin, oid) - } - stdin.Close() - if err := cmd.Wait(); err != nil { - return err + if _, err := fmt.Fprintln(stdin, oid); err != nil { + writeErr = err + break + } } + closeErr := stdin.Close() + waitErr := cmd.Wait() // Output format: " " or " missing" scan := bufio.NewScanner(&outBuf) for scan.Scan() { @@ -223,7 +225,16 @@ func (s *Store) batchResolveSizes(ctx context.Context, repo model.RepoConfig, no nodes[idx].SizeState = "known" } } - return scan.Err() + if err := scan.Err(); err != nil { + return err + } + if writeErr != nil { + return writeErr + } + if closeErr != nil { + return closeErr + } + return waitErr } // BlobToCache fetches a git object and writes it to dstPath in a binary-safe manner.