Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions arrow-rs/data/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ test-log = { version = "0.2", default-features = false, features = ["trace"] }
uuid = { version = "1.10", features = ["v4"] }

# Remove sample-arrow2 as we're migrating to arrow-rs
# sample-test = "0.2.1"
# sample-std = "0.2.1"
# sample-arrow2 = "0.17.2"
sample-test = "0.2.1"
sample-std = "0.2.1"
sample-arrow-rs = "55.2.0"

reqwest.workspace = true

Expand Down
13 changes: 7 additions & 6 deletions arrow-rs/data/src/segment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,11 +384,15 @@ pub mod test {
use crate::test::inferences_schema_a;
// Use arrow-rs transport
use plateau_transport_arrow_rs as transport;
use transport::SchemaChunk;
// Remove sample_arrow2 and sample_std imports for now since we removed these dependencies
// We'll reimplement test functionality using arrow-rs libraries
use sample_arrow_rs::{
array::ArbitraryArray,
chunk::ArbitraryChunk,
datatypes::{sample_flat, ArbitraryDataType},
};
use sample_std::{Chance, Regex};
use tempfile::tempdir;
use test::arrow::test::partial_write;
use transport::SchemaChunk;

impl Config {
pub fn nocommit() -> Self {
Expand Down Expand Up @@ -428,8 +432,6 @@ pub mod test {
}

// nulls=true breaks arrow2's parquet support, but is fine for feather
// XXX - need to create sample-arrow first
/*
pub fn deep_chunk(depth: usize, len: usize, nulls: bool) -> ArbitraryChunk<Regex, Chance> {
let names = Regex::new("[a-z]{4,8}");
let data_type = ArbitraryDataType {
Expand Down Expand Up @@ -457,7 +459,6 @@ pub mod test {
array,
}
}
*/

#[test]
fn test_interrupted_cache_write() -> Result<()> {
Expand Down
75 changes: 24 additions & 51 deletions arrow-rs/data/src/segment/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,11 +303,10 @@ pub mod test {
// Fix imports to use arrow-rs versions
use plateau_transport_arrow_rs as transport;
use transport::SchemaChunk;
// Temporarily comment out sample_arrow2 dependencies
// use sample_arrow2::chunk::{ChainedChunk, ChainedMultiChunk};
// use sample_std::{Random, Regex, Sample};
// use sample_test::sample_test;
// use crate::segment::test::deep_chunk;
// Use sample-arrow-rs for property-based testing
use crate::segment::test::deep_chunk;
use sample_arrow_rs::chunk::{ChainedChunk, ChainedMultiChunk};
use sample_test::sample_test;
use tempfile::tempdir;

use super::*;
Expand Down Expand Up @@ -537,76 +536,50 @@ pub mod test {
Ok(())
}

// Temporarily comment out tests that depend on sample_arrow2
/*
// Property-based tests using sample-arrow-rs
#[sample_test]
#[test_log::test]
fn arbitrary_chunk(#[sample(deep_chunk(3, 100, true).sample_one())] chunk: ChainedChunk) {
let chunk = chunk.value;
// In arrow-rs, RecordBatch already contains its schema
let batch = chunk.value;
let root = tempdir().unwrap();
let path = root.path().join("testing.arrow");
trace!(?batch);

// The RecordBatch already has a schema embedded
let schema = (*batch.schema()).clone();

use sample_std::Sample;
let mut name = Regex::new("[a-z]{4, 8}");
let mut g = Random::new();

let schema = Schema {
fields: chunk
.iter()
.map(|arr| {
Field::new(
name.generate(&mut g),
arr.data_type().clone(),
arr.validity().is_some(),
)
})
.collect(),
metadata: Metadata::default(),
};
let mut w = Writer::create_path(&path, &schema).unwrap();
w.write_chunk(chunk.clone()).unwrap();
w.write_chunk(batch.clone()).unwrap();
w.end().unwrap();

let r = Reader::open(&path).unwrap();
let chunks = r.collect::<anyhow::Result<Vec<_>>>().unwrap();
assert_eq!(chunks, vec![chunk]);
assert_eq!(chunks, vec![batch]);
}

#[sample_test]
#[test_log::test]
fn arbitrary_many_chunk(
#[sample(deep_chunk(5, 100, true).sample_many(2..10))] chunk: ChainedMultiChunk,
) {
let chunks = chunk.value;
let batches = chunk.value;
trace!(?batches);
let root = tempdir().unwrap();
let path = root.path().join("testing.arrow");

let mut name = Regex::new("[a-z]{4, 8}");
let mut g = Random::new();

let schema = Schema {
fields: chunks
.first()
.unwrap()
.iter()
.map(|arr| {
Field::new(
name.generate(&mut g),
arr.data_type().clone(),
arr.validity().is_some(),
)
})
.collect(),
metadata: Metadata::default(),
};
// Get schema from first batch - all batches should have same schema
let schema = (*batches.first().unwrap().schema()).clone();

let mut w = Writer::create_path(&path, &schema).unwrap();

for chunk in &chunks {
w.write_chunk(chunk.clone()).unwrap();
for batch in &batches {
w.write_chunk(batch.clone()).unwrap();
}
w.end().unwrap();

let r = Reader::open(&path).unwrap();
let actual_chunks = r.collect::<anyhow::Result<Vec<_>>>().unwrap();
assert_eq!(actual_chunks, chunks);
let actual_batches = r.collect::<anyhow::Result<Vec<_>>>().unwrap();
assert_eq!(actual_batches, batches);
}
*/
}
Loading