diff --git a/fileseed.go b/fileseed.go index aff276d..9aa11ea 100644 --- a/fileseed.go +++ b/fileseed.go @@ -219,6 +219,18 @@ func (s *fileSeedSegment) clone(dst, src *os.File, srcOffset, srcLength, dstOffs srcAlignStart := (srcOffset/blocksize + 1) * blocksize srcAlignEnd := (srcOffset + srcLength) / blocksize * blocksize + + // If the range is too small to contain a full aligned block, there is + // nothing that can be cloned. Copy the whole range instead. This also + // guards against srcAlignEnd-srcAlignStart underflowing, and against + // calling CloneRange with a zero length, which FICLONERANGE interprets + // as "clone to the end of the source file". Filesystems with large + // blocks, like ZFS with the default 128k recordsize, hit this case + // frequently. + if srcAlignEnd <= srcAlignStart { + return s.copy(dst, src, srcOffset, srcLength, dstOffset) + } + dstAlignStart := (dstOffset/blocksize + 1) * blocksize alignLength := srcAlignEnd - srcAlignStart dstAlignEnd := dstAlignStart + alignLength @@ -237,5 +249,13 @@ func (s *fileSeedSegment) clone(dst, src *os.File, srcOffset, srcLength, dstOffs } copied += c2 // close the aligned blocks - return copied, alignLength, CloneRange(dst, src, srcAlignStart, alignLength, dstAlignStart) + if err := cloneRange(dst, src, srcAlignStart, alignLength, dstAlignStart); err != nil { + // Not every filesystem that passes the CanClone probe can clone every + // range. ZFS for example requires alignment to its record size and + // refuses to clone data that hasn't been committed to disk yet. Fall + // back to copying the blocks. + c3, _, err := s.copy(dst, src, srcAlignStart, alignLength, dstAlignStart) + return copied + c3, 0, err + } + return copied, alignLength, nil } diff --git a/fileseed_test.go b/fileseed_test.go new file mode 100644 index 0000000..801cd5b --- /dev/null +++ b/fileseed_test.go @@ -0,0 +1,91 @@ +package desync + +import ( + "bytes" + "errors" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Simulates filesystems like ZFS where CanClone succeeds but the actual +// cloning of blocks fails, e.g. due to alignment constraints or because the +// source hasn't been committed to disk yet. The seed segment is expected to +// fall back to copying the data. +func TestFileSeedSegmentCloneFallback(t *testing.T) { + cloneRange = func(dst, src *os.File, srcOffset, srcLength, dstOffset uint64) error { + return errors.New("simulated clone failure") + } + defer func() { cloneRange = CloneRange }() + + const blocksize = 4096 + size := uint64(3*blocksize + 100) + data := make([]byte, size) + for i := range data { + data[i] = byte(i) + } + dir := t.TempDir() + srcName := filepath.Join(dir, "seed") + require.NoError(t, os.WriteFile(srcName, data, 0644)) + + dstName := filepath.Join(dir, "out") + dst, err := os.Create(dstName) + require.NoError(t, err) + defer dst.Close() + require.NoError(t, dst.Truncate(int64(size))) + + segment := newFileSeedSegment(srcName, []IndexChunk{{Start: 0, Size: size}}, true) + copied, cloned, err := segment.WriteInto(dst, 0, size, blocksize, true) + require.NoError(t, err) + assert.Equal(t, uint64(0), cloned) + assert.Equal(t, size, copied) + + got, err := os.ReadFile(dstName) + require.NoError(t, err) + assert.Equal(t, data, got) +} + +// A segment smaller than a filesystem block contains no cloneable blocks and +// must be copied without touching data outside its range. Large blocks are +// common on ZFS where st_blksize reports the recordsize, 128k by default. +func TestFileSeedSegmentSmallerThanBlock(t *testing.T) { + var cloneCalls int + cloneRange = func(dst, src *os.File, srcOffset, srcLength, dstOffset uint64) error { + cloneCalls++ + return nil + } + defer func() { cloneRange = CloneRange }() + + const blocksize = 131072 + size := uint64(4096) + data := make([]byte, size) + for i := range data { + data[i] = byte(i + 1) + } + dir := t.TempDir() + srcName := filepath.Join(dir, "seed") + require.NoError(t, os.WriteFile(srcName, data, 0644)) + + // Make the target larger than the segment and fill it with a marker to + // detect writes outside the segment range + dstName := filepath.Join(dir, "out") + require.NoError(t, os.WriteFile(dstName, bytes.Repeat([]byte{0xff}, 3*blocksize), 0644)) + dst, err := os.OpenFile(dstName, os.O_RDWR, 0) + require.NoError(t, err) + defer dst.Close() + + segment := newFileSeedSegment(srcName, []IndexChunk{{Start: 0, Size: size}}, true) + copied, cloned, err := segment.WriteInto(dst, 0, size, blocksize, false) + require.NoError(t, err) + assert.Equal(t, uint64(0), cloned) + assert.Equal(t, size, copied) + assert.Equal(t, 0, cloneCalls) + + got, err := os.ReadFile(dstName) + require.NoError(t, err) + assert.Equal(t, data, got[:size]) + assert.Equal(t, bytes.Repeat([]byte{0xff}, 3*blocksize-int(size)), got[size:]) +} diff --git a/nullseed.go b/nullseed.go index 31ff9e5..b535290 100644 --- a/nullseed.go +++ b/nullseed.go @@ -117,7 +117,7 @@ func (s *nullChunkSection) WriteInto(dst *os.File, offset, length, blocksize uin } return s.copy(dst, offset, s.Size()) } - return s.clone(dst, offset, length, blocksize) + return s.clone(dst, offset, length, blocksize, isBlank) } func (s *nullChunkSection) copy(dst *os.File, offset, length uint64) (uint64, uint64, error) { @@ -130,10 +130,21 @@ func (s *nullChunkSection) copy(dst *os.File, offset, length uint64) (uint64, ui return uint64(copied), 0, err } -func (s *nullChunkSection) clone(dst *os.File, offset, length, blocksize uint64) (uint64, uint64, error) { +func (s *nullChunkSection) clone(dst *os.File, offset, length, blocksize uint64, isBlank bool) (uint64, uint64, error) { dstAlignStart := (offset/blocksize + 1) * blocksize dstAlignEnd := (offset + length) / blocksize * blocksize + // If the range is too small to contain a full aligned block, there is + // nothing that can be cloned, and the copies below would write outside + // the range. Write zeros over the whole range instead, or nothing if + // it's still blank. + if dstAlignEnd <= dstAlignStart { + if isBlank { + return 0, 0, nil + } + return s.copy(dst, offset, length) + } + // fill the area before the first aligned block var copied, cloned uint64 c1, _, err := s.copy(dst, offset, dstAlignStart-offset) @@ -149,8 +160,16 @@ func (s *nullChunkSection) clone(dst *os.File, offset, length, blocksize uint64) copied += c2 for blkOffset := dstAlignStart; blkOffset < dstAlignEnd; blkOffset += blocksize { - if err := CloneRange(dst, s.blockfile, 0, blocksize, blkOffset); err != nil { - return copied, cloned, err + if err := cloneRange(dst, s.blockfile, 0, blocksize, blkOffset); err != nil { + // Not every filesystem that passes the CanClone probe can clone + // every range. ZFS for example refuses to clone from the blockfile + // before it has been committed to disk. Fall back to writing zeros, + // or to doing nothing if the target range is still blank. + if isBlank { + return copied, cloned, nil + } + c3, _, err := s.copy(dst, blkOffset, dstAlignEnd-blkOffset) + return copied + c3, cloned, err } cloned += blocksize } diff --git a/nullseed_test.go b/nullseed_test.go new file mode 100644 index 0000000..231f03c --- /dev/null +++ b/nullseed_test.go @@ -0,0 +1,129 @@ +package desync + +import ( + "bytes" + "errors" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Simulates filesystems like ZFS where CanClone succeeds but the actual +// cloning of blocks fails, e.g. because the zero blockfile hasn't been +// committed to disk yet. The null seed is expected to fall back to writing +// zeros, or to leave a blank target untouched. +func TestNullChunkSectionCloneFallback(t *testing.T) { + defer func() { cloneRange = CloneRange }() + + const blocksize = 4096 + length := uint64(3*blocksize + 100) + dir := t.TempDir() + + blockfile, err := os.CreateTemp(dir, ".tmp-block") + require.NoError(t, err) + defer blockfile.Close() + _, err = blockfile.Write(make([]byte, blocksize)) + require.NoError(t, err) + + newSection := func() *nullChunkSection { + return &nullChunkSection{from: 0, to: length, blockfile: blockfile, canReflink: true} + } + + t.Run("copies zeros when cloning fails", func(t *testing.T) { + cloneRange = func(dst, src *os.File, srcOffset, srcLength, dstOffset uint64) error { + return errors.New("simulated clone failure") + } + dstName := filepath.Join(dir, "out1") + require.NoError(t, os.WriteFile(dstName, bytes.Repeat([]byte{0xff}, int(length)), 0644)) + dst, err := os.OpenFile(dstName, os.O_RDWR, 0) + require.NoError(t, err) + defer dst.Close() + + copied, cloned, err := newSection().WriteInto(dst, 0, length, blocksize, false) + require.NoError(t, err) + assert.Equal(t, uint64(0), cloned) + assert.Equal(t, length, copied) + + got, err := os.ReadFile(dstName) + require.NoError(t, err) + assert.Equal(t, make([]byte, length), got) + }) + + t.Run("leaves blank target untouched when cloning fails", func(t *testing.T) { + cloneRange = func(dst, src *os.File, srcOffset, srcLength, dstOffset uint64) error { + return errors.New("simulated clone failure") + } + dstName := filepath.Join(dir, "out2") + dst, err := os.Create(dstName) + require.NoError(t, err) + defer dst.Close() + require.NoError(t, dst.Truncate(int64(length))) + + _, cloned, err := newSection().WriteInto(dst, 0, length, blocksize, true) + require.NoError(t, err) + assert.Equal(t, uint64(0), cloned) + + got, err := os.ReadFile(dstName) + require.NoError(t, err) + assert.Equal(t, make([]byte, length), got) + }) + + t.Run("section smaller than a block writes only within its range", func(t *testing.T) { + var cloneCalls int + cloneRange = func(dst, src *os.File, srcOffset, srcLength, dstOffset uint64) error { + cloneCalls++ + return nil + } + const bigBlock = 131072 + from, sectionLen := uint64(1000), uint64(5000) + fileLen := uint64(3 * bigBlock) + dstName := filepath.Join(dir, "out-small") + require.NoError(t, os.WriteFile(dstName, bytes.Repeat([]byte{0xff}, int(fileLen)), 0644)) + dst, err := os.OpenFile(dstName, os.O_RDWR, 0) + require.NoError(t, err) + defer dst.Close() + + section := &nullChunkSection{from: from, to: from + sectionLen, blockfile: blockfile, canReflink: true} + copied, cloned, err := section.WriteInto(dst, from, sectionLen, bigBlock, false) + require.NoError(t, err) + assert.Equal(t, uint64(0), cloned) + assert.Equal(t, sectionLen, copied) + assert.Equal(t, 0, cloneCalls) + + got, err := os.ReadFile(dstName) + require.NoError(t, err) + assert.Equal(t, bytes.Repeat([]byte{0xff}, int(from)), got[:from]) + assert.Equal(t, make([]byte, sectionLen), got[from:from+sectionLen]) + assert.Equal(t, bytes.Repeat([]byte{0xff}, int(fileLen-from-sectionLen)), got[from+sectionLen:]) + }) + + t.Run("copies remainder when cloning fails mid-section", func(t *testing.T) { + var calls int + cloneRange = func(dst, src *os.File, srcOffset, srcLength, dstOffset uint64) error { + calls++ + if calls > 1 { + return errors.New("simulated clone failure") + } + // Write the zeros a real clone would have produced + _, err := dst.WriteAt(make([]byte, srcLength), int64(dstOffset)) + return err + } + dstName := filepath.Join(dir, "out3") + require.NoError(t, os.WriteFile(dstName, bytes.Repeat([]byte{0xff}, int(length)), 0644)) + dst, err := os.OpenFile(dstName, os.O_RDWR, 0) + require.NoError(t, err) + defer dst.Close() + + copied, cloned, err := newSection().WriteInto(dst, 0, length, blocksize, false) + require.NoError(t, err) + assert.Equal(t, uint64(blocksize), cloned) + assert.Equal(t, length-blocksize, copied) + + got, err := os.ReadFile(dstName) + require.NoError(t, err) + assert.Equal(t, make([]byte, length), got) + }) +} diff --git a/seed.go b/seed.go index ffc49f3..aa541a2 100644 --- a/seed.go +++ b/seed.go @@ -8,6 +8,10 @@ import ( // DefaultBlockSize is used when the actual filesystem block size cannot be determined automatically const DefaultBlockSize = 4096 +// cloneRange is an indirection over CloneRange that allows tests to simulate +// filesystems where CanClone succeeds but actual cloning fails (e.g. ZFS). +var cloneRange = CloneRange + // Seed represent a source of chunks other than the store. Typically a seed is // another index+blob that present on disk already and is used to copy or clone // existing chunks or blocks into the target from.