From 5cadd07c01d536895d67bfb36698c6e2555a1257 Mon Sep 17 00:00:00 2001 From: Sylvain Lesage Date: Thu, 11 Dec 2025 16:01:31 +0100 Subject: [PATCH] fetch the expected number of rows, instead of stopping at the expected byte offset --- src/dataframe.ts | 10 ++++++---- test/dataframe.test.ts | 16 +++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/dataframe.ts b/src/dataframe.ts index 7c481d2..9467fc8 100644 --- a/src/dataframe.ts +++ b/src/dataframe.ts @@ -185,20 +185,20 @@ export async function csvDataFrame(params: Params): Promise { const extraRows = 3 const fetchRowStart = Math.max(0, rowStart - extraRows) const fetchRowEnd = Math.min(rowEnd + extraRows) + const numRowsToFetch = fetchRowEnd - fetchRowStart const firstByte = estimator.guessByteOffset({ row: fetchRowStart }) - const lastBytePlusOne = estimator.guessByteOffset({ row: fetchRowEnd }) if (firstByte === undefined) { // cannot estimate return } - const lastByte = lastBytePlusOne ? lastBytePlusOne - 1 : firstByte - 1 // fetch at least one row const stats = { parsedRows: 0, alreadyStored: 0, newEmpty: 0, newFull: 0, + valid: 0, ignored: 0, reachedEOF: false, } @@ -248,14 +248,16 @@ export async function csvDataFrame(params: Params): Promise { eventTarget.dispatchEvent(new CustomEvent('resolve')) stats.newFull++ } + if (!isEmpty) { + stats.valid++ + } if (result.meta.byteOffset + result.meta.byteCount >= byteLength) { // end of file stats.reachedEOF = true } - if (result.meta.byteOffset > lastByte) { + if (stats.valid >= numRowsToFetch) { // end of the requested range - stats.ignored += 1 break } } diff --git a/test/dataframe.test.ts b/test/dataframe.test.ts index fd7edc9..7dd2d25 100644 --- a/test/dataframe.test.ts +++ b/test/dataframe.test.ts @@ -435,12 +435,15 @@ describe('csvDataFrame', () => { expect(df.getCell({ row: 10, column: 'a' })).toBeUndefined() await df.fetch?.({ rowStart: 10, rowEnd: 11 }) expect(df.getCell({ row: 10, column: 'a' })).toStrictEqual({ value: '27' }) // should be 10 - // fetch again, which refreshes the average row size + // fetch again, which might refresh the average row size await df.fetch?.({ rowStart: 10, rowEnd: 11 }) - expect(df.getCell({ row: 10, column: 'a' })).toStrictEqual({ value: '08' }) // should be 10 - // fetch again, which refreshes the average row size, but does not fetch more rows + expect(df.getCell({ row: 10, column: 'a' })).toStrictEqual({ value: '10' }) // should be 10 + // fetch again, which might refresh the average row size await df.fetch?.({ rowStart: 10, rowEnd: 11 }) - expect(df.getCell({ row: 10, column: 'a' })).toStrictEqual({ value: '08' }) // should be 10 + expect(df.getCell({ row: 10, column: 'a' })).toStrictEqual({ value: '09' }) // should be 10 + // fetch again, which might refresh the average row size + await df.fetch?.({ rowStart: 10, rowEnd: 11 }) + expect(df.getCell({ row: 10, column: 'a' })).toStrictEqual({ value: '09' }) // should be 10 revoke() }) @@ -491,12 +494,11 @@ describe('csvDataFrame', () => { // the first row can always be fetched await df.fetch?.({ rowStart: 0, rowEnd: 5 }) - // note that only one row has actually been fetched - expect(resolveEventCount).toBe(1) + expect(resolveEventCount).toBe(8) // now, the offset for row 30 can be estimated, and rows can be fetched await df.fetch?.({ rowStart: 30, rowEnd: 31 }) - expect(resolveEventCount).toBe(8) + expect(resolveEventCount).toBe(15) revoke() })