From ce06e53b3629e1d37cdc707c8dce87f807154c29 Mon Sep 17 00:00:00 2001 From: datdenkikniet Date: Wed, 2 Aug 2023 17:12:32 +0200 Subject: [PATCH 01/42] also buffer write output, with same size as for read buffer --- rust/src/pcube/compression.rs | 10 ++++++---- rust/src/pcube/mod.rs | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/rust/src/pcube/compression.rs b/rust/src/pcube/compression.rs index 61ea73b..8e09bdc 100644 --- a/rust/src/pcube/compression.rs +++ b/rust/src/pcube/compression.rs @@ -1,7 +1,9 @@ -use std::io::{BufReader, Read, Write}; +use std::io::{BufReader, BufWriter, Read, Write}; use flate2::{read::GzDecoder, write::GzEncoder}; +const BUF_SIZE: usize = 1024 * 16384; + /// Compression types supported for `.pcube` files. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Compression { @@ -45,7 +47,7 @@ where { pub fn new(compression: Compression, reader: T) -> Self { match compression { - Compression::None => Self::Uncompressed(BufReader::new(reader)), + Compression::None => Self::Uncompressed(BufReader::with_capacity(BUF_SIZE, reader)), Compression::Gzip => Self::Gzip(GzDecoder::new(reader)), } } @@ -74,7 +76,7 @@ pub enum Writer where T: Write, { - Uncompressed(T), + Uncompressed(BufWriter), Gzip(GzEncoder), } @@ -84,7 +86,7 @@ where { pub fn new(compression: Compression, writer: T) -> Self { match compression { - Compression::None => Self::Uncompressed(writer), + Compression::None => Self::Uncompressed(BufWriter::with_capacity(BUF_SIZE, writer)), Compression::Gzip => Self::Gzip(GzEncoder::new(writer, flate2::Compression::default())), } } diff --git a/rust/src/pcube/mod.rs b/rust/src/pcube/mod.rs index 8dc6175..468649c 100644 --- a/rust/src/pcube/mod.rs +++ b/rust/src/pcube/mod.rs @@ -224,6 +224,8 @@ impl PCubeFile { return Err(e); } + writer.flush()?; + Ok(()) } From d43d13815ff52e42219e1a1ddb0977569d38ceb3 Mon Sep 17 00:00:00 2001 From: datdenkikniet Date: Tue, 1 Aug 2023 20:12:56 +0200 Subject: [PATCH 02/42] Add counting option to converting, so that all cubes are counted --- rust/src/cli/cli.rs | 97 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 84 insertions(+), 13 deletions(-) diff --git a/rust/src/cli/cli.rs b/rust/src/cli/cli.rs index b298e5d..ee7edfa 100644 --- a/rust/src/cli/cli.rs +++ b/rust/src/cli/cli.rs @@ -5,12 +5,15 @@ use std::{ }; use clap::{Args, Parser, Subcommand, ValueEnum}; -use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; -use opencubes::{naive_polycube::NaivePolyCube, pcube::PCubeFile}; -use rayon::prelude::{IntoParallelIterator, ParallelIterator}; +use indicatif::{MultiProgress, ProgressBar, ProgressIterator, ProgressStyle}; +use opencubes::{ + naive_polycube::NaivePolyCube, + pcube::{PCubeFile, RawPCube}, +}; mod enumerate; use enumerate::enumerate; +use rayon::prelude::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; fn finish_bar(bar: &ProgressBar, duration: Duration, expansions: usize, n: usize) { let time = duration.as_micros(); @@ -37,7 +40,17 @@ fn finish_bar(bar: &ProgressBar, duration: Duration, expansions: usize, n: usize } fn unknown_bar() -> ProgressBar { - let style = ProgressStyle::with_template("[{elapsed_precise}] [{spinner:10.cyan/blue}] {msg}") + unknown_bar_with_pos(false) +} + +fn unknown_bar_with_pos(with_pos: bool) -> ProgressBar { + let template = if with_pos { + "[{elapsed_precise}] [{spinner:10.cyan/blue}] {pos} {msg}" + } else { + "[{elapsed_precise}] [{spinner:10.cyan/blue}] {msg}" + }; + + let style = ProgressStyle::with_template(template) .unwrap() .tick_strings(&[ ">---------", @@ -154,6 +167,13 @@ pub struct ConvertArgs { /// the conversion is complete. #[clap(short, long)] pub output_path: Option, + + /// Count the cubes in stream-oriented files before writing the converted file. + /// + /// Counting requires 2 passes for the conversion to be completed, which + /// can be slow. + #[clap(long, short = 'n')] + pub count: bool, } #[derive(Clone, Args)] @@ -318,7 +338,7 @@ pub fn convert(opts: &ConvertArgs) { // that the longest files are yielded last. let files: BTreeMap<_, _> = opts .path - .iter() + .par_iter() .map(|path| { let input_file = match PCubeFile::new_file(&path) { Ok(f) => f, @@ -327,6 +347,7 @@ pub fn convert(opts: &ConvertArgs) { std::process::exit(1); } }; + (input_file.len(), (input_file, path.to_string())) }) .collect(); @@ -334,18 +355,25 @@ pub fn convert(opts: &ConvertArgs) { // Iterate over the files and do some printing, in-order let files: Vec<_> = files .into_iter() - .map(|(_, (input_file, path))| { + .map(|(len, (input_file, path))| { let output_path = opts.output_path.clone().unwrap_or(path.clone()); - println!("Converting file {}", path); - println!("Final output path: {output_path}"); + multi_bar + .println(format!("Converting file {}", path)) + .unwrap(); + multi_bar + .println(format!("Final output path: {output_path}")) + .unwrap(); + if opts.canonicalize { - println!("Canonicalizing output"); + multi_bar.println("Canonicalizing output").unwrap(); } - println!("Input compression: {:?}", input_file.compression()); - println!("Output compression: {:?}", opts.compression); - - let len = input_file.len(); + multi_bar + .println(format!("Input compression: {:?}", input_file.compression())) + .unwrap(); + multi_bar + .println(format!("Output compression: {:?}", opts.compression)) + .unwrap(); let bar = if let Some(len) = len { make_bar(len as u64) @@ -363,6 +391,24 @@ pub fn convert(opts: &ConvertArgs) { files .into_par_iter() .for_each(|(input_file, path, output_path, len, bar)| { + let len = if opts.count && len.is_none() { + let bar = unknown_bar_with_pos(true); + let counting_bar = multi_bar.add(bar); + counting_bar.set_message(format!("polycubes counted in {path}")); + + let with_progress = PCubeFile::new_file(&path) + .unwrap() + .progress_with(counting_bar.clone()); + + let output = Some(with_progress.count()); + + counting_bar.finish_and_clear(); + + output + } else { + input_file.len() + }; + bar.set_message(path.to_string()); let canonical = input_file.canonical(); @@ -376,6 +422,30 @@ pub fn convert(opts: &ConvertArgs) { let mut total_read = 0; let mut last_tick = Instant::now(); + struct InputIter { + inner: I, + len: Option, + } + + impl Iterator for InputIter + where + I: Iterator, + { + type Item = RawPCube; + + fn next(&mut self) -> Option { + self.inner.next() + } + + fn size_hint(&self) -> (usize, Option) { + if let Some(len) = self.len { + (len, Some(len)) + } else { + (0, None) + } + } + } + let input = input_file.filter_map(|v| { total_read += 1; @@ -404,6 +474,7 @@ pub fn convert(opts: &ConvertArgs) { } }); + let input = InputIter { inner: input, len }; let canonical = canonical || opts.canonicalize; match PCubeFile::write_file( From e6fe2381f0cf3e55f370c55333b2bb3533232156 Mon Sep 17 00:00:00 2001 From: datdenkikniet Date: Fri, 4 Aug 2023 19:44:59 +0200 Subject: [PATCH 03/42] Fix bar & progress for validate --- rust/src/cli/cli.rs | 55 ++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/rust/src/cli/cli.rs b/rust/src/cli/cli.rs index ee7edfa..3fa6f1a 100644 --- a/rust/src/cli/cli.rs +++ b/rust/src/cli/cli.rs @@ -224,33 +224,36 @@ pub fn validate(opts: &ValidateArgs) -> std::io::Result<()> { let in_memory = !opts.no_in_memory; let n = opts.n; - println!("Validating {}", path); + let file = PCubeFile::new_file(path)?; + let canonical = file.canonical(); + let len = file.len(); + + let bar = if let Some(len) = len { + make_bar(len as u64) + } else { + unknown_bar_with_pos(true) + }; + + bar.set_message("cubes validated"); + + bar.println(format!("Validating {}", path)); let mut uniqueness = match (in_memory, uniqueness) { (true, true) => { - eprintln!("Verifying uniqueness."); + bar.println("Verifying uniqueness."); Some(HashSet::new()) } (false, true) => { + bar.abandon(); println!("Cannot verify uniqueness without placing all entries in memory. Re-run with `--no-uniqueness` enabled to run."); std::process::exit(1); } (_, false) => { - eprintln!("Not verifying uniqueness"); + bar.println("Not verifying uniqueness"); None } }; - let file = PCubeFile::new_file(path)?; - let canonical = file.canonical(); - let len = file.len(); - - let bar = if let Some(len) = len { - make_bar(len as u64) - } else { - unknown_bar() - }; - let exit = |msg: &str| { bar.abandon(); println!("{msg}"); @@ -258,21 +261,18 @@ pub fn validate(opts: &ValidateArgs) -> std::io::Result<()> { }; match (canonical, validate_canonical) { - (true, true) => eprintln!("Verifying entry canonicality. File indicates that entries are canonical."), - (false, true) => eprintln!("Not verifying entry canonicality. File header does not indicate that entries are canonical"), - (true, false) => eprintln!("Not verifying entry canonicality. File header indicates that they are, but check is disabled."), - (false, false) => eprintln!("Not verifying canonicality. File header does not indicate that entries are canonical, and check is disabled.") + (true, true) => bar.println("Verifying entry canonicality. File indicates that entries are canonical."), + (false, true) => bar.println("Not verifying entry canonicality. File header does not indicate that entries are canonical"), + (true, false) => bar.println("Not verifying entry canonicality. File header indicates that they are, but check is disabled."), + (false, false) => bar.println("Not verifying canonicality. File header does not indicate that entries are canonical, and check is disabled.") } if let Some(n) = n { - eprintln!("Verifying that all entries are N = {n}"); + bar.println(format!("Verifying that all entries are N = {n}")); } let mut total_read = 0; - let mut last_tick = Instant::now(); - bar.tick(); - for cube in file { let cube = match cube { Ok(c) => NaivePolyCube::from(c), @@ -284,14 +284,7 @@ pub fn validate(opts: &ValidateArgs) -> std::io::Result<()> { total_read += 1; - if len.is_some() { - bar.inc(1); - } else if last_tick.elapsed() >= Duration::from_millis(66) { - last_tick = Instant::now(); - bar.set_message(format!("{total_read}")); - bar.inc(1); - bar.tick(); - } + bar.inc(1); let mut form: Option = None; let canonical_form = || cube.pcube_canonical_form(); @@ -317,10 +310,10 @@ pub fn validate(opts: &ValidateArgs) -> std::io::Result<()> { exit("Found non-unique polycubes."); } } - - bar.finish(); } + bar.finish(); + println!("Success: {path}, containing {total_read} cubes, is valid"); Ok(()) From eeb64884c83c74374f3b8c79c954eb68a1531d31 Mon Sep 17 00:00:00 2001 From: datdenkikniet Date: Fri, 4 Aug 2023 19:39:58 +0200 Subject: [PATCH 04/42] (ab)use LEB128 for fixed-width header so we can write the count without having to re-read the whole file Put this in a const Just get rid of this limit, we can handle it without --- rust/src/pcube/mod.rs | 206 +++++++++++++++++++++++++++++------------- 1 file changed, 143 insertions(+), 63 deletions(-) diff --git a/rust/src/pcube/mod.rs b/rust/src/pcube/mod.rs index 468649c..c9885d2 100644 --- a/rust/src/pcube/mod.rs +++ b/rust/src/pcube/mod.rs @@ -2,7 +2,7 @@ use std::{ fs::File, - io::{ErrorKind, Read, Seek, Write}, + io::{ErrorKind, Read, Write}, iter::Peekable, path::Path, }; @@ -93,25 +93,7 @@ where let [orientation, compression] = header; let canonicalized = orientation != 0; - let mut cube_count: u64 = 0; - let mut shift = 0; - loop { - let mut next_byte = [0u8; 1]; - input.read_exact(&mut next_byte)?; - - let [next_byte] = next_byte; - - cube_count |= ((next_byte & 0x7F) as u64) << shift; - - shift += 7; - if shift > 64 { - panic!("Cannot load possibly more than u64 cubes..."); - } - - if next_byte & 0x80 == 0 { - break; - } - } + let cube_count = PCubeFile::read_leb128(&mut input)?; let len = if cube_count == 0 { None @@ -177,56 +159,102 @@ impl PCubeFile { Self::new(file) } - /// Write implementation - fn write_impl( - write_magic: bool, - mut cubes: I, - is_canonical: bool, - compression: Compression, - mut write: W, - ) -> std::io::Result<()> - where - I: Iterator, - W: Write, - { - if write_magic { - write.write_all(&MAGIC)?; - } + fn read_leb128(mut reader: impl Read) -> std::io::Result { + let mut cube_count: u64 = 0; + let mut shift = 0; + loop { + let mut next_byte = [0u8; 1]; + reader.read_exact(&mut next_byte)?; - let compression_val = compression.into(); - let orientation_val = if is_canonical { 1 } else { 0 }; + let [next_byte] = next_byte; - write.write_all(&[orientation_val, compression_val])?; + let is_last_byte = (next_byte & 0x80) == 0x00; + let value = (next_byte & 0x7F) as u64; - let mut cube_count = 0; - let (_, max) = cubes.size_hint(); + if shift > 63 && value != 0 || shift > 56 && value > 1 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Cannot load more than u64 cubes", + )); + } + + cube_count |= value.overflowing_shl(shift).0; + shift += 7; - if let Some(max) = max { - cube_count = max; + if is_last_byte { + break; + } } + return Ok(cube_count); + } + + /// Write a leb128 value + /// + /// If `prefill` is `true`, this function will always + /// write 10 bytes of data describing `number`. + fn write_leb128(mut number: u64, mut writer: impl Write, prefill: bool) -> std::io::Result<()> { let mut ran_once = false; - while cube_count > 0 || !ran_once { + let mut bytes_written = 0; + while number > 0 || !ran_once || (prefill && bytes_written < 10) { ran_once = true; - let mut next_byte = (cube_count as u8) & 0x7F; - cube_count >>= 7; + let mut next_byte = (number as u8) & 0x7F; + number >>= 7; - if cube_count > 0 { + if number > 0 || (prefill && bytes_written != 9) { next_byte |= 0x80; } - write.write_all(&[next_byte])?; + writer.write_all(&[next_byte])?; + bytes_written += 1; } + Ok(()) + } + + /// Write the header + /// + /// If `prefill_len` is `true`, the length is _always_ written + /// as 10 bytes. This way, rewriting the header in-place is possible. + fn write_header( + mut write: impl Write, + magic: [u8; 4], + is_canonical: bool, + compression: Compression, + cube_count: Option, + prefill_len: bool, + ) -> std::io::Result<()> { + let compression_val = compression.into(); + let orientation_val = if is_canonical { 1 } else { 0 }; + + let cube_count = cube_count.unwrap_or(0); + + write.write_all(&magic)?; + write.write_all(&[orientation_val, compression_val])?; + Self::write_leb128(cube_count, &mut write, prefill_len)?; + + Ok(()) + } + + /// Write implementation + fn write_impl(cubes: I, compression: Compression, write: W) -> std::io::Result + where + I: Iterator, + W: Write, + { let mut writer = Writer::new(compression, write); - if let Some(e) = cubes.find_map(|v| v.pack(&mut writer).err()) { + let mut cube_count = 0; + if let Some(e) = cubes + .inspect(|_| cube_count += 1) + .find_map(|v| v.pack(&mut writer).err()) + { return Err(e); } writer.flush()?; - Ok(()) + Ok(cube_count) } /// Write the [`RawPCube`]s produced by `I` into `W`. @@ -237,13 +265,43 @@ impl PCubeFile { is_canonical: bool, compression: Compression, cubes: I, - write: W, + mut write: W, + ) -> std::io::Result + where + I: Iterator, + W: std::io::Write, + { + let len = cubes.size_hint().1.map(|v| v as u64); + + Self::write_header(&mut write, MAGIC, is_canonical, compression, len, false)?; + + Self::write_impl(cubes, compression, write) + } + + pub fn write_seekable( + mut seekable: S, + is_canonical: bool, + compression: Compression, + cubes: I, ) -> std::io::Result<()> where + S: std::io::Seek + std::io::Write, I: Iterator, - W: Write, { - Self::write_impl(true, cubes, is_canonical, compression, write) + let len = cubes.size_hint().1.map(|v| v as u64); + let magic = [0, 0, 0, 0]; + Self::write_header(&mut seekable, magic, is_canonical, compression, len, true)?; + + let len = Self::write_impl(cubes, compression, &mut seekable)?; + let len = Some(len as u64); + + // Write magic and cube length at the end + seekable.rewind()?; + Self::write_header(&mut seekable, MAGIC, is_canonical, compression, len, true)?; + + seekable.flush()?; + + Ok(()) } /// Write the [`RawPCube`]s produced by `I` to the file at `path`. @@ -266,19 +324,10 @@ impl PCubeFile { where I: Iterator, { - let mut file = std::fs::File::create(path.as_ref())?; - + let file = std::fs::File::create(path.as_ref())?; file.set_len(0)?; - file.seek(std::io::SeekFrom::Start(0))?; - file.write_all(&[0, 0, 0, 0])?; - - Self::write_impl(false, cubes, is_canonical, compression, &mut file)?; - - // Write magic last - file.seek(std::io::SeekFrom::Start(0))?; - file.write_all(&MAGIC)?; - Ok(()) + Self::write_seekable(file, is_canonical, compression, cubes) } } @@ -409,3 +458,34 @@ where } impl AllUniquePolycubeIterator for AllUnique where T: Read {} + +#[test] +pub fn leb128_len() { + let values = [0, 1, 24, 150283, 0x7FFFF_FFFF, u64::MAX - 1, u64::MAX]; + + for value in values { + let mut data = Vec::new(); + PCubeFile::write_leb128(value, &mut data, true).unwrap(); + + assert_eq!(value, PCubeFile::read_leb128(&data[..]).unwrap()); + } + + let mut many_zeros = [0x80; 20]; + many_zeros[19] = 0x00; + + assert!(PCubeFile::read_leb128(&many_zeros[..]).is_ok()); +} + +#[test] +pub fn leb128_unparseable() { + let unparseable_values = [ + &[0x81, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x02][..], + &[ + 0x81, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01, + ][..], + ]; + + for unparseable in unparseable_values { + assert!(PCubeFile::read_leb128(unparseable).is_err()); + } +} From a37c83c9f96aa7e7f2c2400606f5a5509f41dcd0 Mon Sep 17 00:00:00 2001 From: datdenkikniet Date: Sat, 5 Aug 2023 21:55:46 +0200 Subject: [PATCH 05/42] Converting now longer needs a counting option, as it is always done --- rust/src/cli/cli.rs | 83 +++++---------------------------------------- 1 file changed, 9 insertions(+), 74 deletions(-) diff --git a/rust/src/cli/cli.rs b/rust/src/cli/cli.rs index 3fa6f1a..d50654b 100644 --- a/rust/src/cli/cli.rs +++ b/rust/src/cli/cli.rs @@ -1,15 +1,12 @@ use std::{ collections::{BTreeMap, HashSet}, path::PathBuf, - time::{Duration, Instant}, + time::Duration, }; use clap::{Args, Parser, Subcommand, ValueEnum}; -use indicatif::{MultiProgress, ProgressBar, ProgressIterator, ProgressStyle}; -use opencubes::{ - naive_polycube::NaivePolyCube, - pcube::{PCubeFile, RawPCube}, -}; +use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; +use opencubes::{naive_polycube::NaivePolyCube, pcube::PCubeFile}; mod enumerate; use enumerate::enumerate; @@ -167,13 +164,6 @@ pub struct ConvertArgs { /// the conversion is complete. #[clap(short, long)] pub output_path: Option, - - /// Count the cubes in stream-oriented files before writing the converted file. - /// - /// Counting requires 2 passes for the conversion to be completed, which - /// can be slow. - #[clap(long, short = 'n')] - pub count: bool, } #[derive(Clone, Args)] @@ -371,38 +361,20 @@ pub fn convert(opts: &ConvertArgs) { let bar = if let Some(len) = len { make_bar(len as u64) } else { - unknown_bar() + unknown_bar_with_pos(true) }; let bar = multi_bar.add(bar); - (input_file, path, output_path, len, bar) + (input_file, path, output_path, bar) }) .collect(); // Convert, in parallel files .into_par_iter() - .for_each(|(input_file, path, output_path, len, bar)| { - let len = if opts.count && len.is_none() { - let bar = unknown_bar_with_pos(true); - let counting_bar = multi_bar.add(bar); - counting_bar.set_message(format!("polycubes counted in {path}")); - - let with_progress = PCubeFile::new_file(&path) - .unwrap() - .progress_with(counting_bar.clone()); - - let output = Some(with_progress.count()); - - counting_bar.finish_and_clear(); - - output - } else { - input_file.len() - }; - - bar.set_message(path.to_string()); + .for_each(|(input_file, path, output_path, bar)| { + bar.set_message(format!("cubes converted for {path}")); let canonical = input_file.canonical(); let mut output_path_temp = PathBuf::from(&output_path); @@ -412,36 +384,7 @@ pub fn convert(opts: &ConvertArgs) { output_path_temp.pop(); output_path_temp.push(filename); - let mut total_read = 0; - let mut last_tick = Instant::now(); - - struct InputIter { - inner: I, - len: Option, - } - - impl Iterator for InputIter - where - I: Iterator, - { - type Item = RawPCube; - - fn next(&mut self) -> Option { - self.inner.next() - } - - fn size_hint(&self) -> (usize, Option) { - if let Some(len) = self.len { - (len, Some(len)) - } else { - (0, None) - } - } - } - let input = input_file.filter_map(|v| { - total_read += 1; - let cube = match v { Ok(v) => Some(v), Err(e) => { @@ -451,14 +394,7 @@ pub fn convert(opts: &ConvertArgs) { } }?; - if len.is_some() { - bar.inc(1); - } else if last_tick.elapsed() >= Duration::from_millis(66) { - last_tick = Instant::now(); - bar.set_message(format!("{total_read}")); - bar.inc(1); - bar.tick(); - } + bar.inc(1); if opts.canonicalize { Some(NaivePolyCube::from(cube).canonical_form().into()) @@ -467,7 +403,6 @@ pub fn convert(opts: &ConvertArgs) { } }); - let input = InputIter { inner: input, len }; let canonical = canonical || opts.canonicalize; match PCubeFile::write_file( @@ -485,7 +420,7 @@ pub fn convert(opts: &ConvertArgs) { if !bar.is_finished() { match std::fs::rename(output_path_temp, output_path) { - Ok(_) => bar.finish_with_message(format!("{path} Done!")), + Ok(_) => bar.finish(), Err(e) => { bar.abandon_with_message(format!("{path} Failed to write final file: {e}")); return; From 15977da327def8382288d9a1c418638cd0b1ea69 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Fri, 28 Jul 2023 16:44:01 +0300 Subject: [PATCH 06/42] Memory mapped file API (mapped_file library) MIT license in mapped_file.hpp and mapped_file.cpp - Supports 64-bit file seeking. (+4GiB files) - Can memory map portions of the opened file or entire file. - Can flush modified read-write mappings back into disk. - Read-write regions will grow the backing file in multiple 4096 blocks. - mapped::file class for accessing an file on disk. - mapped::region class for memory mapping raw area of file. - mapped::struct_region template for accessing an on-disk structure - mapped::array_region template for accessing an on-disk array of T Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/CMakeLists.txt | 4 + cpp/libraries/mapped_file.cpp | 317 +++++++++++++++++++++++ cpp/libraries/mapped_file.hpp | 467 ++++++++++++++++++++++++++++++++++ 3 files changed, 788 insertions(+) create mode 100644 cpp/libraries/mapped_file.cpp create mode 100644 cpp/libraries/mapped_file.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6151054..05e50f0 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -38,6 +38,9 @@ macro(ConfigureTarget Target) ) endmacro() +add_library(mapped_file STATIC "libraries/mapped_file.cpp") +ConfigureTarget(mapped_file) + # Source files add_library(CubeObjs OBJECT "src/cubes.cpp" @@ -50,6 +53,7 @@ ConfigureTarget(CubeObjs) # Build main program add_executable(${PROJECT_NAME} "program.cpp" $) target_link_libraries(${PROJECT_NAME} pthread) +target_link_libraries(${PROJECT_NAME} mapped_file) ConfigureTarget(${PROJECT_NAME}) # Optionally build tests diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp new file mode 100644 index 0000000..a3731cd --- /dev/null +++ b/cpp/libraries/mapped_file.cpp @@ -0,0 +1,317 @@ +/** + * Copyright 2023 Jarmo A Tiitto + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the “Software”), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "mapped_file.hpp" + +#include +#include +#include +#include + +// POSIX/Linux APIs +#include +#include +#include +#include + +#ifndef MAP_HUGE_2MB +#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) +#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) +#endif + +namespace mapped { + +/** + * Mapped file POSIX/Linux compatible implementation + */ +file::file() : fd(-1), fd_size(0) {} + +file::~file() { close(); } + +void file::close() { + if (fd >= 0) { + ::fsync(fd); + ::close(fd); + fd = -1; + fd_size = 0; + } +} + +int file::open(const char* fname) { + close(); + + fd = ::open64(fname, O_RDONLY); + if (fd == -1) { + std::fprintf(stderr, "Error opening file for reading\n"); + return -1; + } + + struct stat64 finfo; + if (fstat64(fd, &finfo)) { + std::fprintf(stderr, "Error opening file for reading\n"); + return -1; + } + fd_size = finfo.st_size; + fd_rw = false; + return 0; +} + +int file::openrw(const char* fname, size_t maxsize, int flags) { + // create new files with "normal" permissions: "-rw-r--r--" + const mode_t fperms = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH; + + close(); + + maxsize = roundUp(maxsize); + + if (!flags) { + fd = ::open64(fname, O_RDWR | O_CLOEXEC); + if (fd == -1) { + std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + return -1; + } + + fd_rw = true; + + struct stat64 finfo; + if (fstat64(fd, &finfo)) { + std::fprintf(stderr, "Error getting file size:%s\n", std::strerror(errno)); + return -1; + } + return truncate(finfo.st_size); + + } else if ((flags & (CREATE | RESIZE)) == (CREATE | RESIZE)) { + fd = ::open64(fname, O_CREAT | O_RDWR | O_TRUNC | O_CLOEXEC, fperms); + if (fd == -1) { + std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + return -1; + } + fd_rw = true; + return truncate(maxsize); + + } else if ((flags & RESIZE) != 0) { + fd = ::open64(fname, O_RDWR | O_CLOEXEC, fperms); + if (fd == -1) { + std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + return -1; + } + fd_rw = true; + return truncate(maxsize); + } else { + std::fprintf(stderr, "Invalid open flags:%s\n", std::strerror(errno)); + return -1; + } +} + +bool file::is_rw() const { return fd_rw; } + +seekoff_t file::size() const { return fd_size; } + +int file::truncate(seekoff_t newsize) { + // resize the backing file + if (newsize != fd_size && ftruncate64(fd, newsize)) { + std::fprintf(stderr, "Error resizing backing file:%s\n", std::strerror(errno)); + return -1; + } + fd_size = newsize; + return 0; +} + +/** + * Mapped region POSIX/Linux compatible implementation. + */ + +region::region(std::shared_ptr src, seekoff_t fpos, len_t size) : mfile(src) { + std::lock_guard lock(mfile->mut); + remap(fpos, size); +} + +region::region(std::shared_ptr src) : mfile(src) { + std::lock_guard lock(mfile->mut); + remap(0, mfile->size()); +} + +region::~region() { + std::lock_guard lock(mfile->mut); + map_fseek = 0; + remap(0, 0); +} + +/** + * This is the core implementation of mapped_file: + * remap(0,0) releases the mapping. + * remap(0, n) mmap roundUp(n) bytes at offset 0 + * remap(0, k) mremap roundUp(n) bytes at offset 0 (grows the existing mapping) + * remap(n, j) munmap old region, mmap new at offset roundDown(n) + * + * In read-write mode the backing file is grown to fit the mapping. + */ +void region::remap(const seekoff_t fpos, const len_t size) { + if (fpos == usr_fseek && size == usr_size) return; // No-op + // check if [fpos, fpos+size] fits into the existing + // mmap() window and only adjust the user region. + if (size && map_ptr && (map_fseek <= fpos && fpos + size <= map_fseek + map_size)) { + usr_fseek = fpos; + usr_ptr = (uint8_t*)map_ptr + (fpos - map_fseek); + usr_size = size; + return; + } + + // if size == 0 or the usr_fseek != fpos, + // we have to unmap the old region first, if any. + if (!!map_ptr && (size == 0 || usr_fseek != fpos)) { + if (::munmap(map_ptr, map_size) == -1) { + std::fprintf(stderr, "Error mapping file memory\n"); + return; + } + map_ptr = nullptr; + map_size = 0; + usr_ptr = nullptr; + usr_size = 0; + if (size == 0) return; + } + // keep what user tried to ask: + usr_fseek = fpos; + usr_size = size; + + if (map_ptr && map_fseek == fpos) { + // this mapping exists already at same map_fseek + // remap it to grow the region. + auto newsize = roundUp(size); + void* newptr = mremap(map_ptr, map_size, newsize, MREMAP_MAYMOVE); + if (newptr == MAP_FAILED) { + std::fprintf(stderr, "Error resizing memory-map of file:%s\n", std::strerror(errno)); + std::abort(); + return; + } + map_ptr = newptr; + map_size = size; + return; + } + + // create new mapping + if (mfile->is_rw()) { + // RW mapping + auto newsize = roundUp(size); + if (mfile->size() < fpos + newsize && mfile->truncate(fpos + newsize)) { + // failed. Disk full? + std::abort(); + return; + } + // mmap requires fpos && size to be multiple of PAGE_SIZE + map_fseek = roundDown(fpos); + if (map_fseek < fpos) { + // adjust size to cover. + newsize += PAGE_SIZE; + } + map_size = newsize; + map_ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, mfile->fd, map_fseek); + if (map_ptr == MAP_FAILED) { + std::fprintf(stderr, "Error memory-mapping file:%s %lu %d %lu\n", std::strerror(errno), size, mfile->fd, fpos); + std::abort(); + return; + } + } else { + // RO mapping + if (mfile->size() < fpos) { + // can't: the backing file is too small. + std::fprintf(stderr, "Error seeking past end of file.\n"); + std::abort(); + return; + } + map_size = roundUp(size); + map_fseek = roundDown(fpos); + // Map the region. (use huge pages, don't reserve backing store) + map_ptr = mmap(0, map_size, PROT_READ, MAP_SHARED | MAP_NORESERVE | MAP_HUGE_2MB, mfile->fd, map_fseek); + + if (!map_ptr || map_ptr == MAP_FAILED) { + std::fprintf(stderr, "Error mapping file\n"); + std::abort(); + return; + } + } + // adjust the usr_ptr to fix + // any page misalignment. + usr_ptr = (uint8_t*)map_ptr + (fpos - map_fseek); +} + +void region::jump(seekoff_t fpos) { + std::lock_guard lock(mfile->mut); + remap(fpos, map_size); + is_dirty = false; +} + +void region::flushJump(seekoff_t fpos) { + flush(); + std::lock_guard lock(mfile->mut); + remap(fpos, map_size); +} + +void region::flush() { + // only flush if dirty and RW mapped. + std::lock_guard lock(mfile->mut); + if (is_dirty && mfile->is_rw()) { + is_dirty = false; + if (msync(map_ptr, map_size, MS_ASYNC)) { + std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); + } + } +} + +void region::sync() { + // only flush if dirty and RW mapped. + std::lock_guard lock(mfile->mut); + if (is_dirty && mfile->is_rw()) { + is_dirty = false; + if (msync(map_ptr, map_size, MS_SYNC)) { + std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); + } + } +} + +/* +TODO: +void region::resident(void * paddr, size_t lenght, bool resident) { + // Align paddr to PAGE_SIZE + void * start = reinterpret_cast(uintptr_t(paddr) & ~(PAGE_SIZE-1)); + lenght = roundToPage(lenght); + + if(madvise(start, lenght, resident ? MADV_WILLNEED : MADV_DONTNEED)) { + std::fprintf(stderr,"Error setting memory-map residency:%s\n",std::strerror(errno)); + } +} + +void region::discard(void * paddr, size_t lenght) { + // get range of pages that may be discarded. + // this is always an subset of [paddr, paddr+lenght] range. + void * start = (void*)roundUp((uintptr_t)paddr, PAGE_SIZE); + lenght = roundDown(lenght, PAGE_SIZE); + + if(start < (char*)paddr + lenght && lenght >= PAGE_SIZE) { + // note: errors are ignored here. + madvise(start, lenght, MADV_REMOVE); + } +} +*/ + +}; // namespace mapped diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp new file mode 100644 index 0000000..0aaefff --- /dev/null +++ b/cpp/libraries/mapped_file.hpp @@ -0,0 +1,467 @@ +/** + * Copyright 2023 Jarmo A Tiitto + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the “Software”), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef MAPPEDFILE_HPP_INCLUDED +#define MAPPEDFILE_HPP_INCLUDED + +#include +#include +#include +#include +#include +#include +#include + +/** + * Memory mapped file I/O utilities + * - mapped::file class for opening an file + * - mapped::region class for RW/RO memory mapping part the file instance. + * - mapped::struct_region template for RW/RO accessing part the file as specified type. + * - mapped::array_region template for RW/RO accessing part of the file as array of T elements. + * + * @note + * When doing read-only mapping the region instance + * should be const qualified as this restricts + * the region class API to read-only operations and prevents + * accidental modification of the file. + * Use std::make_unique() in this case. + * + * @note + * When using the read-write features the backing file is resized + * in multiple PAGE_SIZE blocks even if the actually mapped size is + * something else. + * openrw(...,size,RESIZE) always truncates the file to roundUp(size). + * You should do file->truncate(< sizeInBytes>) to make the file + * size exactly what you want before the file is closed. + * + * Modified regions should flush() or sync() before they are destroyed + * or the modified data may not end up in the file. + * + * TODO: + * - Two region instances should not overlap, + * i.e same portion of the file should not be mapped twice. + * (Not sure if this is actually broken now, but you have been warned) + * - Multi-threading support not tested/written. + * Currently the same mapped region can be used by multiple threads, + * but cannot it be modified. + * - Better error handling. (exceptions?, error codes?) + * Currently critical errors are printed and std::abort() is called. + * How do we handle system errors that happen in constructors? + */ +namespace mapped { + +const size_t PAGE_SIZE = 4096; + +static inline size_t roundToPage(ptrdiff_t x) { return (std::max(0, x - 1) & ~(PAGE_SIZE - 1)) + PAGE_SIZE; } + +constexpr inline size_t roundUp(uintptr_t x) { return (x + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); } + +constexpr inline size_t roundDown(uintptr_t x) { return (x & ~(PAGE_SIZE - 1)); } + +/** + * seekoff_t: Position of the file cursor + */ +using seekoff_t = uint64_t; +/** + * len_t: length of file data + */ +using len_t = size_t; + +class file; + +/** + * Memory-mapped region + * @brief + * the region base class implementation memory maps + * an raw memory range from the file. + */ +class region { + protected: + // actually mapped region: + void* map_ptr = nullptr; + size_t map_size = 0; + seekoff_t map_fseek = 0; + // what constructor asked: + void* usr_ptr = nullptr; + size_t usr_size = 0; + seekoff_t usr_fseek = 0; + // todo: maybe use std::weak_ptr? + // that would allow file to be released and + // any any existing region(s) would still work. + // (but only if remap() is not called) + std::shared_ptr mfile; + // non-const data access sets is_dirty. + bool is_dirty = false; + + void remap(const seekoff_t fpos, const len_t size); + + public: + /** + * Open memory mapped region into a file. + * @brief + * Seeks at fpos in file and map size bytes + * starting from that position in file. + * @note + * - Seeking past the EOF in file that is read-only will fail. + * The mapped size may extend past EOF but accessing past EOF + * either returns undefined data or program is terminated by OS. + * (EOF is at file->size()) + * - Seeking past the EOF that is read-write + * grows the backing file to fit the mapping. + * The backing file is always extended in multiple of PAGE_SIZE bytes. + * @note + * If size and/or fpos are not aligned to multiple of PAGE_SIZE + * they are forcibly aligned internally. This results in + * regionSize() and regionSeek() that may differ compared to + * size() and getSeek(). + * Side-effect is that backing file may grow more than expected. + */ + region(std::shared_ptr src, seekoff_t fpos, len_t size); + + /** + * Open memory mapped region into the file + * @brief + * same as region(myfile, 0, myfile.size()) + * and memory maps the entire file. + */ + explicit region(std::shared_ptr src); + + /** + * Note: even if region was modified, + * destructor will not flush()/sync() before tearing down the mapping. + */ + virtual ~region(); + + /** + * Get data pointer. + */ + const void* data() const { return usr_ptr; } + void* data() { + is_dirty = true; + return usr_ptr; + } + + std::shared_ptr getFile() { return mfile; } + + /** + * Get the seek used to init this region. + */ + seekoff_t getSeek() const { return usr_fseek; } + /** + * Get the size used to init this region. + */ + len_t size() const { return usr_size; } + + /** + * Get page aligned seek <= getSeek() + */ + seekoff_t regionSeek() const { return map_fseek; } + /** + * Get page aligned size >= size() + */ + len_t regionSize() const { return map_size; } + + /** + * Resize the mapped region. + * @note the mapped memory address may move, + * but current contents are preserved. + * @warn all pointers or references into + * the mapping are invalidated. + */ + void resize(len_t newsize); + + // todo: window(len_t virtsize) + // since region() is already lying that it can map + // non-page-aligned offsets and sizes + // window() would grow this over-aligned window + // to arbitrary size and keep the initialized + // user size. + // This allows remap() to just adjust the usr_ptr + // if the region window fits in. + + /** + * Flush mapped memory region into the file. + * @brief this is an hint to operating system that + * memory region shall be synchronized to disk. + * It may not wait for this to have completed before returning. + * @note Use sync() instead if you must guarantee the data has + * reached persistent storage. + */ + void flush(); + + /** + * Synchronize modified memory region onto disk. + */ + void sync(); + + /** + * Set memory region to resident/or released. + * @brief setting memory range to non-resident state + * causes system to drop the data from system memory. + * Reading non-resident memory region again causes system to + * fetch data from the disk again. + * @warn if memory region is not flushed before setting + * it non-resident any writes may be discarded to backing file. + */ + // void resident(bool state); + + /** + * Discard memory region. + * @brief discarding memory range causes system + * to reclaim the memory *and* the on-disk area. + * This means the data is lost in the mapped memory region, + * and any data within will not be written onto disk by sync() + * Subsequent reads after discard() return undefined data. + */ + // void discard(); + + /** + * Seek in the file to fpos position and + * remap the memory region there. + * @warn all pointers or references into + * the mapping are invalidated. + */ + void jump(seekoff_t fpos); + + /** + * Flush the current region and + * Seek in the file to fpos position and + * remap the memory region there. + * @warn all pointers or references into + * the mapping are invalidated. + */ + void flushJump(seekoff_t fpos); +}; + +static_assert(std::is_move_constructible_v); +static_assert(std::is_move_assignable_v); +static_assert(std::is_swappable_v); + +/** + * Typed region. + * struct_region allows directly accessing an on-disk structure. + * The region size is implicit from the type. + */ +template +class struct_region : protected region { + public: + using type = typename std::decay::type; + static_assert(std::is_standard_layout_v, "T must be plain-old-data type"); + + /** + * Memory map struct_region at fpos in file. + */ + struct_region(std::shared_ptr f, seekoff_t fpos) : region(f, fpos, sizeof(type)) {} + + type* get() { return static_cast(data()); } + const type* get() const { return static_cast(data()); } + + type* operator->() { return get(); } + const type* operator->() const { return get(); } + + type& operator*() { return *get(); } + const type& operator*() const { return *get(); } + + using region::flush; + using region::getFile; + using region::getSeek; + using region::sync; + + // note: size means the sizeof(T) + using region::size; + + /** + * Get the file seek position just after *this. + */ + seekoff_t getEndSeek() const { return getSeek() + sizeof(T); } + + /** + * Seek to fpos in file and remap the region. + * @return the pointer into the new position + */ + type* jump(seekoff_t fpos) { + region::jump(fpos); + return get(); + } + + type* flushJump(seekoff_t fpos) { + region::flushJump(fpos); + return get(); + } +}; + +/** + * Typed array region. + * @brief + * array_region allows directly accessing an on-disk array of structures + * The element size is implicit from the type and length of the array + * is provided by the constructor. + * @provides resize(), operator[], begin(), end() + */ +template +class array_region : protected region { + protected: + size_t num_elements = 0; + + public: + using type = typename std::decay::type; + static_assert(std::is_standard_layout_v, "T must be plain-old-data type"); + + /** + * Memory map array_region at fpos in file and map array_size elements. + */ + array_region(std::shared_ptr f, seekoff_t fpos, size_t array_size) : region(f, fpos, sizeof(type) * array_size), num_elements(array_size) {} + + /** + * Get pointer to first mapped element. + */ + type* get() { return static_cast(data()); } + const type* get() const { return static_cast(data()); } + + using region::flush; + using region::getFile; + using region::getSeek; + using region::sync; + + /** + * Resize the mapped array region. + */ + void resize(size_t elements) { + region::resize(sizeof(T) * elements); + num_elements = elements; + } + + /** + * Get number of mapped *elements* + */ + size_t size() const { return num_elements; } + + /** + * Access the array elements + */ + T& operator[](size_t index) { + assert(index < num_elements); + return get()[index]; + } + const T& operator[](size_t index) const { + assert(index < num_elements); + return get()[index]; + } + /** + * Iterators + */ + T* begin() { return get(); } + T* end() { return get() + num_elements; } + const T* begin() const { return get(); } + const T* end() const { return get() + num_elements; } + + /** + * Get the file seek position just after *this. + */ + seekoff_t getEndSeek() const { return getSeek() + sizeof(T) * num_elements; } + + /** + * Seek to fpos in file and remap the region. + * @return the pointer into the first element in the array + */ + type* jump(seekoff_t fpos) { + region::jump(fpos); + return get(); + } + + type* flushJump(seekoff_t fpos) { + region::flushJump(fpos); + return get(); + } +}; + +/** + * Memory-mapped file I/O class. + * @note + * file should be created with std::make_shared() + * as mapped region(s) take shared ownership of the file. + */ +class file : public std::enable_shared_from_this { + private: + std::mutex mut; + int fd; + seekoff_t fd_size; + bool fd_rw; + // the file and region classes are inherently coupled, + // and we don't want to expose the internals. + friend class region; + + public: + enum : int { + CREATE = 0x1, // Create new file, if doesn't exist. + RESIZE = 0x2, // Resize file. + RO = 0x4 // + }; + + file(); + ~file(); + + /** + * Open file in read-only mode. + * @return non-zero if error occurred. + */ + int open(const char* file); + + /** + * Create/Open file in read-write mode. + * @param flags + * - CREATE|RESIZE creates or replaces existing file + * that is truncated to maxsize. + * - RESIZE opens existing file and truncates it to + * maxsize. The file must exist already. + * - flags == 0 ignores the maxsize argument and opens + * existing file. + * @warn default open mode discards any previous file contents! + * @return non-zero if error occurred. + */ + int openrw(const char* file, len_t maxsize, int flags = CREATE | RESIZE); + + /** + * Check if file open R/W or RO + */ + bool is_rw() const; + + /** + * Resize the open file to newsize bytes. + * (file must be open in R/W mode) + * @return non-zero if error occurred. + */ + int truncate(seekoff_t newsize); + + /** + * Current length of the file + * The file EOF (end-of-file) is at this position. + */ + seekoff_t size() const; + + // Close the file. + void close(); +}; + +}; // namespace mapped +#endif From cd076b51b3bb9321302c0d9410f49fb2607051ba Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Sun, 30 Jul 2023 22:04:24 +0300 Subject: [PATCH 07/42] - fixup region::remap() mremap case not saving the correct size. - silence few std::printf's since opening non-existing file is handled by returning -1 Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/libraries/mapped_file.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index a3731cd..698a673 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -62,13 +62,13 @@ int file::open(const char* fname) { fd = ::open64(fname, O_RDONLY); if (fd == -1) { - std::fprintf(stderr, "Error opening file for reading\n"); + //std::fprintf(stderr, "Error opening file for reading\n"); return -1; } struct stat64 finfo; if (fstat64(fd, &finfo)) { - std::fprintf(stderr, "Error opening file for reading\n"); + std::fprintf(stderr, "Error getting file size: %s\n", std::strerror(errno)); return -1; } fd_size = finfo.st_size; @@ -87,7 +87,7 @@ int file::openrw(const char* fname, size_t maxsize, int flags) { if (!flags) { fd = ::open64(fname, O_RDWR | O_CLOEXEC); if (fd == -1) { - std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + //std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); return -1; } @@ -103,7 +103,7 @@ int file::openrw(const char* fname, size_t maxsize, int flags) { } else if ((flags & (CREATE | RESIZE)) == (CREATE | RESIZE)) { fd = ::open64(fname, O_CREAT | O_RDWR | O_TRUNC | O_CLOEXEC, fperms); if (fd == -1) { - std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + //std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); return -1; } fd_rw = true; @@ -112,7 +112,7 @@ int file::openrw(const char* fname, size_t maxsize, int flags) { } else if ((flags & RESIZE) != 0) { fd = ::open64(fname, O_RDWR | O_CLOEXEC, fperms); if (fd == -1) { - std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + //std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); return -1; } fd_rw = true; @@ -205,7 +205,7 @@ void region::remap(const seekoff_t fpos, const len_t size) { return; } map_ptr = newptr; - map_size = size; + map_size = newsize; return; } @@ -233,7 +233,7 @@ void region::remap(const seekoff_t fpos, const len_t size) { } } else { // RO mapping - if (mfile->size() < fpos) { + if (mfile->size() <= fpos) { // can't: the backing file is too small. std::fprintf(stderr, "Error seeking past end of file.\n"); std::abort(); From fc6b4109297b8adbd5afc0951c7082a21d448846 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Sun, 30 Jul 2023 22:24:03 +0300 Subject: [PATCH 08/42] fixup missing const in struct_region and array_region Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/libraries/mapped_file.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index 0aaefff..59f89ac 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -336,7 +336,7 @@ class array_region : protected region { * Get pointer to first mapped element. */ type* get() { return static_cast(data()); } - const type* get() const { return static_cast(data()); } + const type* get() const { return static_cast(data()); } using region::flush; using region::getFile; From b0ff53ef9d2f467f88d94bc1b244a832e18465d1 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Mon, 31 Jul 2023 04:12:10 +0300 Subject: [PATCH 09/42] libmappedfile: implement oversized mapped region The memory map now supports mapping oversized "window" into the file: - flush(), sync() only flush the user area - jump(), flushJump() have fast path speed up when new user area fits into the oversized window. Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/libraries/mapped_file.cpp | 33 +++++++++++++++++++++------------ cpp/libraries/mapped_file.hpp | 13 ++++++++++--- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index 698a673..e69ddbb 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -141,20 +141,21 @@ int file::truncate(seekoff_t newsize) { * Mapped region POSIX/Linux compatible implementation. */ -region::region(std::shared_ptr src, seekoff_t fpos, len_t size) : mfile(src) { +region::region(std::shared_ptr src, seekoff_t fpos, len_t size, len_t window) : mfile(src) { std::lock_guard lock(mfile->mut); - remap(fpos, size); + remap(fpos, size, window); } region::region(std::shared_ptr src) : mfile(src) { std::lock_guard lock(mfile->mut); - remap(0, mfile->size()); + auto sz = mfile->size(); + remap(0, sz, sz); } region::~region() { std::lock_guard lock(mfile->mut); map_fseek = 0; - remap(0, 0); + remap(0, 0, 0); } /** @@ -166,7 +167,7 @@ region::~region() { * * In read-write mode the backing file is grown to fit the mapping. */ -void region::remap(const seekoff_t fpos, const len_t size) { +void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { if (fpos == usr_fseek && size == usr_size) return; // No-op // check if [fpos, fpos+size] fits into the existing // mmap() window and only adjust the user region. @@ -197,7 +198,7 @@ void region::remap(const seekoff_t fpos, const len_t size) { if (map_ptr && map_fseek == fpos) { // this mapping exists already at same map_fseek // remap it to grow the region. - auto newsize = roundUp(size); + auto newsize = roundUp(std::max(size, window)); void* newptr = mremap(map_ptr, map_size, newsize, MREMAP_MAYMOVE); if (newptr == MAP_FAILED) { std::fprintf(stderr, "Error resizing memory-map of file:%s\n", std::strerror(errno)); @@ -212,7 +213,7 @@ void region::remap(const seekoff_t fpos, const len_t size) { // create new mapping if (mfile->is_rw()) { // RW mapping - auto newsize = roundUp(size); + auto newsize = roundUp(std::max(size, window)); if (mfile->size() < fpos + newsize && mfile->truncate(fpos + newsize)) { // failed. Disk full? std::abort(); @@ -239,7 +240,7 @@ void region::remap(const seekoff_t fpos, const len_t size) { std::abort(); return; } - map_size = roundUp(size); + map_size = roundUp(std::max(size, window)); map_fseek = roundDown(fpos); // Map the region. (use huge pages, don't reserve backing store) map_ptr = mmap(0, map_size, PROT_READ, MAP_SHARED | MAP_NORESERVE | MAP_HUGE_2MB, mfile->fd, map_fseek); @@ -257,14 +258,14 @@ void region::remap(const seekoff_t fpos, const len_t size) { void region::jump(seekoff_t fpos) { std::lock_guard lock(mfile->mut); - remap(fpos, map_size); + remap(fpos, usr_size, map_size); is_dirty = false; } void region::flushJump(seekoff_t fpos) { flush(); std::lock_guard lock(mfile->mut); - remap(fpos, map_size); + remap(fpos, usr_size, map_size); } void region::flush() { @@ -272,7 +273,11 @@ void region::flush() { std::lock_guard lock(mfile->mut); if (is_dirty && mfile->is_rw()) { is_dirty = false; - if (msync(map_ptr, map_size, MS_ASYNC)) { + auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); + auto flush_len = roundUp(usr_size); + if(flush_begin < usr_ptr) + flush_len += PAGE_SIZE; + if (msync(flush_begin, flush_len, MS_ASYNC)) { std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); } } @@ -283,7 +288,11 @@ void region::sync() { std::lock_guard lock(mfile->mut); if (is_dirty && mfile->is_rw()) { is_dirty = false; - if (msync(map_ptr, map_size, MS_SYNC)) { + auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); + auto flush_len = roundUp(usr_size); + if(flush_begin < usr_ptr) + flush_len += PAGE_SIZE; + if (msync(flush_begin, flush_len, MS_SYNC)) { std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); } } diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index 59f89ac..57b1521 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -113,7 +113,7 @@ class region { // non-const data access sets is_dirty. bool is_dirty = false; - void remap(const seekoff_t fpos, const len_t size); + void remap(const seekoff_t fpos, const len_t size, const len_t window); public: /** @@ -121,6 +121,13 @@ class region { * @brief * Seeks at fpos in file and map size bytes * starting from that position in file. + * @param window + * over-extend mapping up to max(size,window) bytes. + * Setting window bigger than size allows more efficient operation: + * [fpos, fpos + window] area is memory mapped + * but region will only operate on the + * [roundDown(fpos), roundup(fpos+size)] + * sub-portion of the memory. * @note * - Seeking past the EOF in file that is read-only will fail. * The mapped size may extend past EOF but accessing past EOF @@ -136,7 +143,7 @@ class region { * size() and getSeek(). * Side-effect is that backing file may grow more than expected. */ - region(std::shared_ptr src, seekoff_t fpos, len_t size); + region(std::shared_ptr src, seekoff_t fpos, len_t size, len_t window = 0); /** * Open memory mapped region into the file @@ -271,7 +278,7 @@ class struct_region : protected region { /** * Memory map struct_region at fpos in file. */ - struct_region(std::shared_ptr f, seekoff_t fpos) : region(f, fpos, sizeof(type)) {} + struct_region(std::shared_ptr f, seekoff_t fpos, len_t window = 0) : region(f, fpos, sizeof(type), window) {} type* get() { return static_cast(data()); } const type* get() const { return static_cast(data()); } From adba81838f6d874c3fae65c4e4fb12fabc411d45 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Wed, 2 Aug 2023 13:14:26 +0300 Subject: [PATCH 10/42] libmappedfile: Provide writeAt() readAt() API - Provide region::writeAt() and region::readAt() that enable copying data into/from the backing file even if the target area of the backing file is not memory-mapped. - Fixup flushed length in flush() sync() - Run clang-format Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/libraries/mapped_file.cpp | 85 +++++++++++++++++++++++++++++++---- cpp/libraries/mapped_file.hpp | 38 ++++++++++++++-- 2 files changed, 112 insertions(+), 11 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index e69ddbb..ef888f9 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -62,7 +62,7 @@ int file::open(const char* fname) { fd = ::open64(fname, O_RDONLY); if (fd == -1) { - //std::fprintf(stderr, "Error opening file for reading\n"); + // std::fprintf(stderr, "Error opening file for reading\n"); return -1; } @@ -87,7 +87,7 @@ int file::openrw(const char* fname, size_t maxsize, int flags) { if (!flags) { fd = ::open64(fname, O_RDWR | O_CLOEXEC); if (fd == -1) { - //std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + // std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); return -1; } @@ -103,7 +103,7 @@ int file::openrw(const char* fname, size_t maxsize, int flags) { } else if ((flags & (CREATE | RESIZE)) == (CREATE | RESIZE)) { fd = ::open64(fname, O_CREAT | O_RDWR | O_TRUNC | O_CLOEXEC, fperms); if (fd == -1) { - //std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + // std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); return -1; } fd_rw = true; @@ -112,7 +112,7 @@ int file::openrw(const char* fname, size_t maxsize, int flags) { } else if ((flags & RESIZE) != 0) { fd = ::open64(fname, O_RDWR | O_CLOEXEC, fperms); if (fd == -1) { - //std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + // std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); return -1; } fd_rw = true; @@ -275,8 +275,7 @@ void region::flush() { is_dirty = false; auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); auto flush_len = roundUp(usr_size); - if(flush_begin < usr_ptr) - flush_len += PAGE_SIZE; + if (flush_begin < usr_ptr) flush_len += PAGE_SIZE; if (msync(flush_begin, flush_len, MS_ASYNC)) { std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); } @@ -290,14 +289,84 @@ void region::sync() { is_dirty = false; auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); auto flush_len = roundUp(usr_size); - if(flush_begin < usr_ptr) - flush_len += PAGE_SIZE; + if (flush_begin < usr_ptr) flush_len += PAGE_SIZE; if (msync(flush_begin, flush_len, MS_SYNC)) { std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); } } } +void region::writeAt(seekoff_t fpos, len_t datasize, const void* data) { + auto srcmem = (const char*)data; + + std::lock_guard lock(mfile->mut); + if(mfile->size() < fpos+datasize && mfile->truncate(fpos+datasize)) { + return; + } + + // does write fall out the mapped area begin? + if (fpos < map_fseek) { + // max size that can be written before map_fseek + ssize_t wr = std::min(map_fseek - fpos, datasize); + if (pwrite(mfile->fd, srcmem, wr, fpos) != wr) { + std::fprintf(stderr, "Error writing file:%s\n", std::strerror(errno)); + } + srcmem += wr; + fpos += wr; + datasize -= wr; + } + + if (fpos >= map_fseek && fpos < map_fseek + map_size && datasize) { + // max size that can be copied into this mapping: + ssize_t wr = std::min(map_size - (fpos - map_fseek), datasize); + std::memcpy((char*)map_ptr + (fpos - map_fseek), srcmem, wr); + srcmem += wr; + fpos += wr; + datasize -= wr; + } + + // does write fall out the mapped area end? + if (datasize) { + // write into backing file after the mapped area: + if (pwrite(mfile->fd, srcmem, datasize, fpos) != ssize_t(datasize)) { + std::fprintf(stderr, "Error writing file:%s\n", std::strerror(errno)); + } + } +} + +void region::readAt(seekoff_t fpos, len_t datasize, void* data) { + auto dstmem = (char*)data; + + // does read fall out the mapped area begin? + if (fpos < map_fseek) { + // max size that can be written before map_fseek + ssize_t rd = std::min(map_fseek - fpos, datasize); + if (pread(mfile->fd, dstmem, rd, fpos) != rd) { + std::fprintf(stderr, "Error reading file:%s\n", std::strerror(errno)); + } + dstmem += rd; + fpos += rd; + datasize -= rd; + } + + if (fpos >= map_fseek && fpos < map_fseek + map_size && datasize) { + // max size that can be copied from this mapping: + ssize_t rd = std::min(map_size - (fpos - map_fseek), datasize); + std::memcpy(dstmem, (char*)map_ptr + (fpos - map_fseek), rd); + dstmem += rd; + fpos += rd; + datasize -= rd; + } + + // does read fall out the mapped area end? + if (datasize) { + // read from backing file after the mapped area: + if (pread(mfile->fd, dstmem, datasize, fpos) != ssize_t(datasize)) { + std::fprintf(stderr, "Error reading file:%s\n", std::strerror(errno)); + } + } +} + /* TODO: void region::resident(void * paddr, size_t lenght, bool resident) { diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index 57b1521..dd2f61f 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -200,17 +200,18 @@ class region { // todo: window(len_t virtsize) // since region() is already lying that it can map // non-page-aligned offsets and sizes - // window() would grow this over-aligned window + // window() would grow this over-extended the memory mapping // to arbitrary size and keep the initialized // user size. - // This allows remap() to just adjust the usr_ptr - // if the region window fits in. /** * Flush mapped memory region into the file. * @brief this is an hint to operating system that * memory region shall be synchronized to disk. * It may not wait for this to have completed before returning. + * @note only the page aligned region + * [roundDown(data()), roundUp(data()+size())] + * is flushed. * @note Use sync() instead if you must guarantee the data has * reached persistent storage. */ @@ -221,6 +222,33 @@ class region { */ void sync(); + /** + * Write data into the backing file. + * @brief + * writeAt() stores range of bytes into the backing file. + * @note + * The region doesn't need to have this area to be memory-mapped: + * The data that falls into the memory-mapped + * [regionSeek(), regionSeek()+regionSize()] area is simply memcpy'ed. + * Any data that falls out this window is written directly + * into the backing file. + * The backing file is grown to fit the data when needed. + */ + void writeAt(seekoff_t fpos, len_t datasize, const void* data); + + /** + * Read data from the backing file. + * @brief + * readAt() reads [fpos, fpos+datasize] range of bytes from the backing file + * @note + * The region doesn't need to have this area memory-mapped + * The read out area that falls into the memory-mapped + * [regionSeek(), regionSeek()+regionSize()] area is simply memcpy'ed. + * Any data that falls out this window is read directly + * from the backing file. + */ + void readAt(seekoff_t fpos, len_t datasize, void* data); + /** * Set memory region to resident/or released. * @brief setting memory range to non-resident state @@ -292,7 +320,9 @@ class struct_region : protected region { using region::flush; using region::getFile; using region::getSeek; + using region::readAt; using region::sync; + using region::writeAt; // note: size means the sizeof(T) using region::size; @@ -348,7 +378,9 @@ class array_region : protected region { using region::flush; using region::getFile; using region::getSeek; + using region::readAt; using region::sync; + using region::writeAt; /** * Resize the mapped array region. From 747cca1aa541bfb5a7eb4948c55bc812921cb0ae Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Wed, 2 Aug 2023 23:26:33 +0300 Subject: [PATCH 11/42] libmappedfile: Misc changes - Provide FSTUNE flag that attempts to speed up file access when new file created with CREATE|RESIZE. It effectievely sets chattr +X and +A flags on the file. - Make readAt() const qualified. Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/libraries/mapped_file.cpp | 12 +++++++++++- cpp/libraries/mapped_file.hpp | 9 +++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index ef888f9..5318596 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #ifndef MAP_HUGE_2MB #define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) #define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) @@ -107,6 +110,13 @@ int file::openrw(const char* fname, size_t maxsize, int flags) { return -1; } fd_rw = true; + + if(flags & FSTUNE) { + int flags = 0; + ioctl(fd, FS_IOC_GETFLAGS, &flags); + flags |= FS_NOATIME_FL | FS_NOCOW_FL; + ioctl(fd, FS_IOC_SETFLAGS, &flags); + } return truncate(maxsize); } else if ((flags & RESIZE) != 0) { @@ -334,7 +344,7 @@ void region::writeAt(seekoff_t fpos, len_t datasize, const void* data) { } } -void region::readAt(seekoff_t fpos, len_t datasize, void* data) { +void region::readAt(seekoff_t fpos, len_t datasize, void* data) const { auto dstmem = (char*)data; // does read fall out the mapped area begin? diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index dd2f61f..7cc3b1e 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -247,7 +247,7 @@ class region { * Any data that falls out this window is read directly * from the backing file. */ - void readAt(seekoff_t fpos, len_t datasize, void* data); + void readAt(seekoff_t fpos, len_t datasize, void* data) const; /** * Set memory region to resident/or released. @@ -452,9 +452,10 @@ class file : public std::enable_shared_from_this { public: enum : int { - CREATE = 0x1, // Create new file, if doesn't exist. - RESIZE = 0x2, // Resize file. - RO = 0x4 // + CREATE = 0x1, //!< Create new file, if doesn't exist. + RESIZE = 0x2, //!< Resize file. + FSTUNE = 0x4 //!< When creating new file attempt to set + //!< file system attributes to improve performance. }; file(); From 9b8f46310181e990ac551bf9c0e5b73959118a8e Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Thu, 10 Aug 2023 00:06:19 +0300 Subject: [PATCH 12/42] libmapped_file: Make region moveable - Provide proper move aware object. region objects are now safe to use in STL containers like vector/deque. - Implement region::resident() (not tested) Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/libraries/mapped_file.cpp | 47 +++++++++++++++++++++++++++-------- cpp/libraries/mapped_file.hpp | 38 ++++++++++++++++++++++++++-- 2 files changed, 72 insertions(+), 13 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index 5318596..c101f3a 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include // POSIX/Linux APIs @@ -163,7 +164,7 @@ region::region(std::shared_ptr src) : mfile(src) { } region::~region() { - std::lock_guard lock(mfile->mut); + // destructor is not thread-safe. map_fseek = 0; remap(0, 0, 0); } @@ -238,7 +239,30 @@ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { map_size = newsize; map_ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, mfile->fd, map_fseek); if (map_ptr == MAP_FAILED) { + // If this gets triggered we are in deep trouble std::fprintf(stderr, "Error memory-mapping file:%s %lu %d %lu\n", std::strerror(errno), size, mfile->fd, fpos); + std::fprintf(stderr, "Dumping /proc/self/maps:\n"); + // for debugging information try print /proc/self/mmaps contents + // as this explains why we hit some limit of the system. + std::ifstream fmaps("/proc/self/maps"); + std::string buf; + int count = 0; + while(std::getline(fmaps, buf)) { + std::fprintf(stderr, "%s\n", buf.c_str()); + ++count; + } + std::fprintf(stderr, "counted %d memory-maps in process.\n", count); + + + + // todo: if this really is an hard limit of the hardware + // for *number of mmap() areas* this means we forced to: + // - register all regions in ordered list by mapped seek offset in the mapped::file + // - when mmap fails we have to merge adjacent regions + // - reference count the regions + // - data() returned memory address becomes even more unstable: + // it is invalidated by adjacent construction/deconstruction of region objects + // - destruction gets complicated. std::abort(); return; } @@ -377,18 +401,19 @@ void region::readAt(seekoff_t fpos, len_t datasize, void* data) const { } } -/* -TODO: -void region::resident(void * paddr, size_t lenght, bool resident) { - // Align paddr to PAGE_SIZE - void * start = reinterpret_cast(uintptr_t(paddr) & ~(PAGE_SIZE-1)); - lenght = roundToPage(lenght); - - if(madvise(start, lenght, resident ? MADV_WILLNEED : MADV_DONTNEED)) { - std::fprintf(stderr,"Error setting memory-map residency:%s\n",std::strerror(errno)); - } + +void region::resident(bool resident) { + std::lock_guard lock(mfile->mut); + auto _begin = (void*)roundDown((uintptr_t)usr_ptr); + auto _len = roundUp(usr_size); + if (_begin < usr_ptr) _len += PAGE_SIZE; + + if(madvise(_begin, _len, resident ? MADV_WILLNEED : MADV_DONTNEED)) { + std::fprintf(stderr,"Error setting memory-map residency:%s\n",std::strerror(errno)); + } } +/* void region::discard(void * paddr, size_t lenght) { // get range of pages that may be discarded. // this is always an subset of [paddr, paddr+lenght] range. diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index 7cc3b1e..5cda7c1 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -115,6 +115,8 @@ class region { void remap(const seekoff_t fpos, const len_t size, const len_t window); + region() {} + public: /** * Open memory mapped region into a file. @@ -159,6 +161,31 @@ class region { */ virtual ~region(); + // region is not copyable + region(const region&) =delete; + region& operator=(const region&) =delete; + + // region is moveable + friend void swap(region& a, region& b) { + using std::swap; + // thread-safety? None. + swap(a.map_ptr,b.map_ptr); + swap(a.map_size,b.map_size); + swap(a.map_fseek,b.map_fseek); + swap(a.usr_ptr,b.usr_ptr); + swap(a.usr_size,b.usr_size); + swap(a.usr_fseek,b.usr_fseek); + swap(a.mfile,b.mfile); + swap(a.is_dirty,b.is_dirty); + } + region(region&& mv) : region() { + swap(*this, mv); + } + region& operator=(region&& mv) { + swap(*this, mv); + return *this; + } + /** * Get data pointer. */ @@ -256,9 +283,11 @@ class region { * Reading non-resident memory region again causes system to * fetch data from the disk again. * @warn if memory region is not flushed before setting - * it non-resident any writes may be discarded to backing file. + * resident(false) any writes may be discarded to backing file. + * @todo: more strict version? + * actually unmap the region() until data() is called. */ - // void resident(bool state); + void resident(bool state); /** * Discard memory region. @@ -323,6 +352,7 @@ class struct_region : protected region { using region::readAt; using region::sync; using region::writeAt; + using region::resident; // note: size means the sizeof(T) using region::size; @@ -347,6 +377,10 @@ class struct_region : protected region { } }; +static_assert(std::is_move_constructible_v>); +static_assert(std::is_move_assignable_v>); +static_assert(std::is_swappable_v>); + /** * Typed array region. * @brief From 774760ebf357dc98c04b87f7e728e5ef7e9500af Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Thu, 10 Aug 2023 03:55:44 +0300 Subject: [PATCH 13/42] libmappedfile: Implement region::window() - region::window() allows over-extending the memory-mapping The "user mapped" portions stays same but regionSize() is changed. Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/libraries/mapped_file.cpp | 8 ++++++++ cpp/libraries/mapped_file.hpp | 16 ++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index c101f3a..f4a02e1 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -290,6 +290,14 @@ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { usr_ptr = (uint8_t*)map_ptr + (fpos - map_fseek); } +void region::window(len_t window) { + std::lock_guard lock(mfile->mut); + auto usize = usr_size; + // note: remap() does nothing if window == usr_size + remap(usr_fseek, window, window); + usr_size = usize; +} + void region::jump(seekoff_t fpos) { std::lock_guard lock(mfile->mut); remap(fpos, usr_size, map_size); diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index 5cda7c1..e9893e1 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -224,12 +224,15 @@ class region { */ void resize(len_t newsize); - // todo: window(len_t virtsize) - // since region() is already lying that it can map - // non-page-aligned offsets and sizes - // window() would grow this over-extended the memory mapping - // to arbitrary size and keep the initialized - // user size. + /** + * @brief over-extend mapping up to max(size(),window) bytes. + * Setting window bigger than size() allows more efficient operation: + * [regionSeek(), regionSeek() + window] area is memory mapped + * but region will only operate on the + * [roundDown(getSeek()), roundUp(getSeek()+size())] + * sub-portion of the memory. + */ + void window(len_t window = 0); /** * Flush mapped memory region into the file. @@ -353,6 +356,7 @@ class struct_region : protected region { using region::sync; using region::writeAt; using region::resident; + using region::window; // note: size means the sizeof(T) using region::size; From 1a5c4265998912160c45ca95774ce9b18595ed40 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Thu, 10 Aug 2023 07:11:47 +0300 Subject: [PATCH 14/42] libmapped_file: Tune the memory mapping a bit - For resident() it is better to mark the entire mapped region rather than just the user area. Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/libraries/mapped_file.cpp | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index f4a02e1..56e1d11 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -252,18 +252,6 @@ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { ++count; } std::fprintf(stderr, "counted %d memory-maps in process.\n", count); - - - - // todo: if this really is an hard limit of the hardware - // for *number of mmap() areas* this means we forced to: - // - register all regions in ordered list by mapped seek offset in the mapped::file - // - when mmap fails we have to merge adjacent regions - // - reference count the regions - // - data() returned memory address becomes even more unstable: - // it is invalidated by adjacent construction/deconstruction of region objects - // - destruction gets complicated. - std::abort(); return; } } else { @@ -285,6 +273,11 @@ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { return; } } + + // hint that this memory is accessed in random order. + if(madvise(map_ptr, map_size, MADV_RANDOM)) { + std::fprintf(stderr, "warn: madvice(MADV_RANDOM) failed: %s\n", std::strerror(errno)); + } // adjust the usr_ptr to fix // any page misalignment. usr_ptr = (uint8_t*)map_ptr + (fpos - map_fseek); @@ -412,11 +405,7 @@ void region::readAt(seekoff_t fpos, len_t datasize, void* data) const { void region::resident(bool resident) { std::lock_guard lock(mfile->mut); - auto _begin = (void*)roundDown((uintptr_t)usr_ptr); - auto _len = roundUp(usr_size); - if (_begin < usr_ptr) _len += PAGE_SIZE; - - if(madvise(_begin, _len, resident ? MADV_WILLNEED : MADV_DONTNEED)) { + if(madvise(map_ptr, map_size, resident ? MADV_WILLNEED : MADV_DONTNEED)) { std::fprintf(stderr,"Error setting memory-map residency:%s\n",std::strerror(errno)); } } From 574876b4617eb6c40d45b23e884cff6cfbdd607a Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Fri, 11 Aug 2023 21:28:43 +0300 Subject: [PATCH 15/42] libmappedfile: comment fixups Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/libraries/mapped_file.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index e9893e1..a2fccbd 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -92,8 +92,7 @@ class file; /** * Memory-mapped region * @brief - * the region base class implementation memory maps - * an raw memory range from the file. + * the region base class memory-maps an raw memory range from the file. */ class region { protected: From 3d197a1175a64838e9639ff40d8b763d5d5e70c3 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Sat, 12 Aug 2023 05:20:40 +0300 Subject: [PATCH 16/42] libmappedfile: Locking and discard work - Implement more fine-grained locking for region. - Implement region::discard() This effectively zero fills memory area within the mapping and punches hole into the backing file. Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/libraries/mapped_file.cpp | 62 ++++++++++++++++++++++++----------- cpp/libraries/mapped_file.hpp | 20 +++++++---- 2 files changed, 56 insertions(+), 26 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index 56e1d11..e7261dc 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -153,18 +153,19 @@ int file::truncate(seekoff_t newsize) { */ region::region(std::shared_ptr src, seekoff_t fpos, len_t size, len_t window) : mfile(src) { - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); remap(fpos, size, window); } region::region(std::shared_ptr src) : mfile(src) { - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); auto sz = mfile->size(); remap(0, sz, sz); } region::~region() { // destructor is not thread-safe. + std::lock_guard lock(mtx); map_fseek = 0; remap(0, 0, 0); } @@ -177,6 +178,8 @@ region::~region() { * remap(n, j) munmap old region, mmap new at offset roundDown(n) * * In read-write mode the backing file is grown to fit the mapping. + * + * @warn this->mtx must be held when this function is called. */ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { if (fpos == usr_fseek && size == usr_size) return; // No-op @@ -225,11 +228,16 @@ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { if (mfile->is_rw()) { // RW mapping auto newsize = roundUp(std::max(size, window)); + + // take file lock so size() check --> truncate is atomic. + std::unique_lock trunclock(mfile->mut); if (mfile->size() < fpos + newsize && mfile->truncate(fpos + newsize)) { // failed. Disk full? std::abort(); return; } + trunclock.unlock(); + // mmap requires fpos && size to be multiple of PAGE_SIZE map_fseek = roundDown(fpos); if (map_fseek < fpos) { @@ -265,7 +273,7 @@ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { map_size = roundUp(std::max(size, window)); map_fseek = roundDown(fpos); // Map the region. (use huge pages, don't reserve backing store) - map_ptr = mmap(0, map_size, PROT_READ, MAP_SHARED | MAP_NORESERVE | MAP_HUGE_2MB, mfile->fd, map_fseek); + map_ptr = mmap(0, map_size, PROT_READ, MAP_SHARED | MAP_NORESERVE, mfile->fd, map_fseek); if (!map_ptr || map_ptr == MAP_FAILED) { std::fprintf(stderr, "Error mapping file\n"); @@ -284,7 +292,7 @@ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { } void region::window(len_t window) { - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); auto usize = usr_size; // note: remap() does nothing if window == usr_size remap(usr_fseek, window, window); @@ -292,20 +300,20 @@ void region::window(len_t window) { } void region::jump(seekoff_t fpos) { - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); remap(fpos, usr_size, map_size); is_dirty = false; } void region::flushJump(seekoff_t fpos) { flush(); - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); remap(fpos, usr_size, map_size); } void region::flush() { // only flush if dirty and RW mapped. - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); if (is_dirty && mfile->is_rw()) { is_dirty = false; auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); @@ -319,7 +327,7 @@ void region::flush() { void region::sync() { // only flush if dirty and RW mapped. - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); if (is_dirty && mfile->is_rw()) { is_dirty = false; auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); @@ -334,10 +342,12 @@ void region::sync() { void region::writeAt(seekoff_t fpos, len_t datasize, const void* data) { auto srcmem = (const char*)data; - std::lock_guard lock(mfile->mut); + // take file lock so that file size check --> truncate is atomic. + std::unique_lock trunclock(mfile->mut); if(mfile->size() < fpos+datasize && mfile->truncate(fpos+datasize)) { return; } + trunclock.unlock(); // does write fall out the mapped area begin? if (fpos < map_fseek) { @@ -404,24 +414,36 @@ void region::readAt(seekoff_t fpos, len_t datasize, void* data) const { void region::resident(bool resident) { - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); if(madvise(map_ptr, map_size, resident ? MADV_WILLNEED : MADV_DONTNEED)) { std::fprintf(stderr,"Error setting memory-map residency:%s\n",std::strerror(errno)); } } -/* -void region::discard(void * paddr, size_t lenght) { - // get range of pages that may be discarded. - // this is always an subset of [paddr, paddr+lenght] range. - void * start = (void*)roundUp((uintptr_t)paddr, PAGE_SIZE); - lenght = roundDown(lenght, PAGE_SIZE); - if(start < (char*)paddr + lenght && lenght >= PAGE_SIZE) { - // note: errors are ignored here. - madvise(start, lenght, MADV_REMOVE); +void region::discard(seekoff_t fpos, len_t datasize) { + + auto cur = usr_fseek + fpos; + + if (cur < map_fseek + map_size) { + // max size that can discarded from this mapping: + ssize_t dm = std::min(map_size - (cur - map_fseek), datasize); + + // Have to be careful here: if we delete too much + // caller will not have an good time. + // align size down to page size. + dm = roundDown(dm); + // align file offset up + auto _first = roundUp(cur - map_fseek); + if(_first > cur - map_fseek) + dm -= PAGE_SIZE; + + if(dm >= (signed)PAGE_SIZE) { + if(madvise((char*)map_ptr + _first, dm, MADV_REMOVE)) { + std::fprintf(stderr,"Error discarding memory-map region:%s\n",std::strerror(errno)); + } } + } } -*/ }; // namespace mapped diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index a2fccbd..b86657a 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -92,10 +92,11 @@ class file; /** * Memory-mapped region * @brief - * the region base class memory-maps an raw memory range from the file. + * the base class memory-maps an raw range of bytes from the backing file. */ class region { protected: + std::mutex mtx; // actually mapped region: void* map_ptr = nullptr; size_t map_size = 0; @@ -167,7 +168,9 @@ class region { // region is moveable friend void swap(region& a, region& b) { using std::swap; - // thread-safety? None. + //std::lock(a.mtx,b.mtx); + //std::lock_guard l0(a.mtx, std::adopt_lock); + //std::lock_guard l1(b.mtx, std::adopt_lock); swap(a.map_ptr,b.map_ptr); swap(a.map_size,b.map_size); swap(a.map_fseek,b.map_fseek); @@ -286,8 +289,6 @@ class region { * fetch data from the disk again. * @warn if memory region is not flushed before setting * resident(false) any writes may be discarded to backing file. - * @todo: more strict version? - * actually unmap the region() until data() is called. */ void resident(bool state); @@ -297,9 +298,15 @@ class region { * to reclaim the memory *and* the on-disk area. * This means the data is lost in the mapped memory region, * and any data within will not be written onto disk by sync() - * Subsequent reads after discard() return undefined data. + * Subsequent reads after discard() return zero filled data. + * @note + * The discarded area shall be within the mapped area. + * @param fpos + * file offset from begin of this mapping. (getSeek() + fpos) + * @param datasize + * length of the data area to discard. */ - // void discard(); + void discard(seekoff_t fpos, len_t datasize); /** * Seek in the file to fpos position and @@ -356,6 +363,7 @@ class struct_region : protected region { using region::writeAt; using region::resident; using region::window; + using region::discard; // note: size means the sizeof(T) using region::size; From f2b1f8c07765702c40d4ecb9b3577316c97f8531 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Mon, 31 Jul 2023 00:06:43 +0300 Subject: [PATCH 17/42] Do const safety pass - The filePointer points into read-only memory from mmap() so apply const to few places to ensure nothing is writing into it. - getCubesByShape() may return pointers to past-end of the mmap() area if shape table entry size is zero. ShapeEntry::offset can be wrong if the size is also zero. Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/include/cube.hpp | 2 +- cpp/include/newCache.hpp | 24 ++++++++++++------------ cpp/src/newCache.cpp | 9 ++++++--- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/cpp/include/cube.hpp b/cpp/include/cube.hpp index 83feaa7..f612ef4 100644 --- a/cpp/include/cube.hpp +++ b/cpp/include/cube.hpp @@ -69,7 +69,7 @@ struct Cube { // Construct from external source. // Cube shares this the memory until modified. // Caller guarantees the memory given will live longer than *this - Cube(XYZ *start, uint8_t n) : bits{1, n}, array(start) {} + Cube(const XYZ *start, uint8_t n) : bits{1, n}, array(const_cast(start)) {} // Copy ctor. Cube(const Cube ©) : Cube(copy.size()) { std::copy(copy.begin(), copy.end(), begin()); } diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 29e622e..04bb3fe 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -18,7 +18,7 @@ class CubeIterator { using reference = Cube&; // or also value_type& // constructor - CubeIterator(uint32_t _n, XYZ* ptr) : n(_n), m_ptr(ptr) {} + CubeIterator(uint32_t _n, const XYZ* ptr) : n(_n), m_ptr(ptr) {} // invalid iterator (can't deference) explicit CubeIterator() : n(0), m_ptr(nullptr) {} @@ -53,13 +53,13 @@ class CubeIterator { private: uint32_t n; - XYZ* m_ptr; + const XYZ* m_ptr; }; class ShapeRange { public: - ShapeRange(XYZ* start, XYZ* stop, uint64_t _cubeLen, XYZ _shape) - : b(_cubeLen, start), e(_cubeLen, stop), size_(((uint64_t)stop - (uint64_t)start) / (_cubeLen * sizeof(XYZ))), shape_(_shape) {} + ShapeRange(const XYZ* start, const XYZ* stop, uint64_t _cubeLen, XYZ _shape) + : b(_cubeLen, start), e(_cubeLen, stop), size_(std::distance(start, stop) / _cubeLen), shape_(_shape) {} CubeIterator begin() { return b; } CubeIterator end() { return e; } @@ -118,26 +118,26 @@ class CacheReader : public ICache { }; CubeIterator begin() { - uint8_t* start = filePointer + shapes[0].offset; - return CubeIterator(header->n, (XYZ*)start); + const uint8_t* start = filePointer + shapes[0].offset; + return CubeIterator(header->n, (const XYZ*)start); } CubeIterator end() { - uint8_t* stop = filePointer + shapes[0].offset + header->numPolycubes * header->n * XYZ_SIZE; - return CubeIterator(header->n, (XYZ*)stop); + const uint8_t* stop = filePointer + shapes[0].offset + header->numPolycubes * header->n * XYZ_SIZE; + return CubeIterator(header->n, (const XYZ*)stop); } ShapeRange getCubesByShape(uint32_t i) override; private: - uint8_t* filePointer; + const uint8_t* filePointer; std::string path_; int fileDescriptor_; uint64_t fileSize_; bool fileLoaded_; - Header dummyHeader; - Header* header; - ShapeEntry* shapes; + const Header dummyHeader; + const Header* header; + const ShapeEntry* shapes; }; class FlatCache : public ICache { diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index ef925dc..db57900 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -65,15 +65,18 @@ ShapeRange CacheReader::getCubesByShape(uint32_t i) { if (i >= header->numShapes) { return ShapeRange{nullptr, nullptr, 0, XYZ(0, 0, 0)}; } - XYZ* start = reinterpret_cast(filePointer + shapes[i].offset); - XYZ* end = reinterpret_cast(filePointer + shapes[i].offset + shapes[i].size); + if(shapes[i].size <= 0) { + return ShapeRange(nullptr, nullptr, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); + } + auto start = reinterpret_cast(filePointer + shapes[i].offset); + auto end = reinterpret_cast(filePointer + shapes[i].offset + shapes[i].size); return ShapeRange(start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); } void CacheReader::unload() { // unmap file from memory if (fileLoaded_) { - if (munmap(filePointer, fileSize_) == -1) { + if (munmap((void*)filePointer, fileSize_) == -1) { // error handling std::printf("error unmapping file\n"); } From 2a39964cf2199a6406a1989dbbf5ec3f77b20835 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Mon, 31 Jul 2023 00:28:21 +0300 Subject: [PATCH 18/42] Close the `friend class Workset` trick. - I can actually read how the progress is calculated. Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/include/newCache.hpp | 4 +++- cpp/src/cubes.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 04bb3fe..9189182 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -27,6 +27,8 @@ class CubeIterator { const value_type operator*() const { return Cube(m_ptr, n); } // pointer operator->() { return (pointer)m_ptr; } + const XYZ* data() const { return m_ptr; } + // Prefix increment CubeIterator& operator++() { m_ptr += n; @@ -49,7 +51,7 @@ class CubeIterator { friend bool operator<(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr < b.m_ptr; }; friend bool operator>(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr > b.m_ptr; }; friend bool operator!=(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr != b.m_ptr; }; - friend class Workset; + //friend class Workset; private: uint32_t n; diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index cdcc5b4..2bcd3c1 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -48,7 +48,7 @@ struct Workset { auto a = _begin; _begin += 500; if (_begin > _end) _begin = _end; - return {a, _begin, a < _end, 100 * (float)((uint64_t)a.m_ptr - (uint64_t)_begin_total.m_ptr) / ((uint64_t)_end.m_ptr - (uint64_t)_begin_total.m_ptr)}; + return {a, _begin, a < _end, 100 * float(std::distance(_begin_total.data(), a.data())) / std::distance(_begin_total.data(), _end.data())}; } void expand(const Cube &c) { From f8a5671f4885090b63c22f5048c262229ec24dcb Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Mon, 31 Jul 2023 01:00:52 +0300 Subject: [PATCH 19/42] Update newCache to use libmappedfile Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/include/newCache.hpp | 17 ++++++--- cpp/src/newCache.cpp | 78 +++++++++++++++++++++++----------------- 2 files changed, 58 insertions(+), 37 deletions(-) diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 9189182..ca3d71f 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -6,6 +6,7 @@ #include "cube.hpp" #include "hashes.hpp" +#include "mapped_file.hpp" class Workset; @@ -119,7 +120,10 @@ class CacheReader : public ICache { uint64_t size; // in bytes should be multiple of XYZ_SIZE }; - CubeIterator begin() { + // Do begin() and end() make sense for CacheReader + // If the cache file provides data for more than single shape? + // The data might not even be mapped contiguously to save memory. + /*CubeIterator begin() { const uint8_t* start = filePointer + shapes[0].offset; return CubeIterator(header->n, (const XYZ*)start); } @@ -127,15 +131,18 @@ class CacheReader : public ICache { CubeIterator end() { const uint8_t* stop = filePointer + shapes[0].offset + header->numPolycubes * header->n * XYZ_SIZE; return CubeIterator(header->n, (const XYZ*)stop); - } + }*/ + // get shapes at index [0, numShapes()[ ShapeRange getCubesByShape(uint32_t i) override; private: - const uint8_t* filePointer; + std::shared_ptr file_; + std::unique_ptr> header_; + std::unique_ptr> shapes_; + std::unique_ptr> xyz_; + std::string path_; - int fileDescriptor_; - uint64_t fileSize_; bool fileLoaded_; const Header dummyHeader; const Header* header; diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index db57900..cd9726c 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -6,8 +6,7 @@ #include -CacheReader::CacheReader() - : filePointer(nullptr), path_(""), fileDescriptor_(-1), fileSize_(0), fileLoaded_(false), dummyHeader{0, 0, 0, 0}, header(&dummyHeader), shapes(nullptr) {} +CacheReader::CacheReader() : path_(""), fileLoaded_(false), dummyHeader{0, 0, 0, 0}, header(&dummyHeader), shapes(nullptr) {} void CacheReader::printHeader() { if (fileLoaded_) { @@ -33,28 +32,37 @@ int CacheReader::printShapes(void) { int CacheReader::loadFile(const std::string path) { unload(); path_ = path; - fileDescriptor_ = open(path.c_str(), O_RDONLY); - if (fileDescriptor_ == -1) { + // open read-only backing file: + file_ = std::make_shared(); + if (file_->open(path.c_str())) { std::printf("error opening file\n"); return 1; } - // get filesize - fileSize_ = lseek(fileDescriptor_, 0, SEEK_END); - lseek(fileDescriptor_, 0, SEEK_SET); - - // memory map file - filePointer = (uint8_t*)mmap(NULL, fileSize_, PROT_READ, MAP_SHARED, fileDescriptor_, 0); - if (filePointer == MAP_FAILED) { - // error handling - std::printf("errorm mapping file memory"); - close(fileDescriptor_); - return 2; + // map the header struct + header_ = std::make_unique>(file_, 0); + header = header_->get(); + + if (header->magic != MAGIC) { + std::printf("error opening file: file not recognized\n"); + return 1; } - header = (Header*)(filePointer); - shapes = (ShapeEntry*)(filePointer + sizeof(Header)); + // map the ShapeEntry array: + shapes_ = std::make_unique>(file_, header_->getEndSeek(), (*header_)->numShapes); + shapes = shapes_->get(); + + size_t datasize = 0; + for (unsigned int i = 0; i < header->numShapes; ++i) { + datasize += shapes[i].size; + } + + // map rest of the file as XYZ data: + if (file_->size() != shapes_->getEndSeek() + datasize) { + std::printf("warn: file size does not match expected value\n"); + } + xyz_ = std::make_unique>(file_, shapes_->getEndSeek(), datasize); fileLoaded_ = true; @@ -65,28 +73,34 @@ ShapeRange CacheReader::getCubesByShape(uint32_t i) { if (i >= header->numShapes) { return ShapeRange{nullptr, nullptr, 0, XYZ(0, 0, 0)}; } - if(shapes[i].size <= 0) { - return ShapeRange(nullptr, nullptr, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); + if (shapes[i].size <= 0) { + return ShapeRange{nullptr, nullptr, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)}; + } + // get section start + // note: shapes[i].offset may have bogus offset + // if any earlier shape table entry was empty before i + // so we ignore the offset here. + size_t offset = 0; + for (unsigned int k = 0; k < i; ++k) { + offset += shapes[k].size; } - auto start = reinterpret_cast(filePointer + shapes[i].offset); - auto end = reinterpret_cast(filePointer + shapes[i].offset + shapes[i].size); - return ShapeRange(start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); + auto index = offset / XYZ_SIZE; + auto num_xyz = shapes[i].size / XYZ_SIZE; + // pointers to Cube data: + auto start = xyz_->get() + index; + auto end = xyz_->get() + index + num_xyz; + return ShapeRange{start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)}; } void CacheReader::unload() { - // unmap file from memory + // unload file from memory if (fileLoaded_) { - if (munmap((void*)filePointer, fileSize_) == -1) { - // error handling - std::printf("error unmapping file\n"); - } - - // close file descriptor - close(fileDescriptor_); + xyz_.reset(); + shapes_.reset(); + header_.reset(); + file_.reset(); fileLoaded_ = false; } - fileDescriptor_ = -1; - filePointer = nullptr; header = &dummyHeader; shapes = nullptr; } From f62780e0f42bb0829d9be5deb4b65f2ebd315944 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Sun, 13 Aug 2023 21:04:53 +0300 Subject: [PATCH 20/42] fixup tests not compiling. Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/tests/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index b30d160..42e0014 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -19,4 +19,5 @@ add_executable(${PROJECT_NAME} $ ${TESTS}) target_link_libraries(GTest::GTest INTERFACE gtest_main) target_link_libraries(${PROJECT_NAME} pthread GTest::GTest) +target_link_libraries(${PROJECT_NAME} mapped_file) ConfigureTarget(${PROJECT_NAME}) From c7609446280216887dced0a172ac8439a1ad0e7c Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Wed, 2 Aug 2023 13:32:37 +0300 Subject: [PATCH 21/42] Make DEBUG_PRINT less noisy DEBUG_LEVEL selects the level of debug prints that are compiled in. 0 => Same as not compiling with DEBUG at all. 1 => Only DEBUG_PRINT() 2 => DEBUG1_PRINT() and lower levels are enabled 3 => DEBUG2_PRINT() and lower levels are enabled Change few of the noisiest prints to be silent with DEBUG_LEVEL == 1 Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/include/hashes.hpp | 2 +- cpp/include/utils.hpp | 35 +++++++++++++++++++++++++++++++---- cpp/src/cubes.cpp | 4 ++-- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index 7999d5c..09feeed 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -100,7 +100,7 @@ struct Hashy { DEBUG_PRINTF("%ld maps by shape\n\r", byshape.size()); for (auto &set : byshape) { auto part = set.second.size(); - DEBUG_PRINTF("bucket [%2d %2d %2d]: %ld\n", set.first.x(), set.first.y(), set.first.z(), part); + DEBUG1_PRINTF("bucket [%2d %2d %2d]: %ld\n", set.first.x(), set.first.y(), set.first.z(), part); sum += part; } return sum; diff --git a/cpp/include/utils.hpp b/cpp/include/utils.hpp index 4cd23e3..f895877 100644 --- a/cpp/include/utils.hpp +++ b/cpp/include/utils.hpp @@ -3,12 +3,39 @@ #define OPENCUBES_UTILS_HPP #include + +// Debug print level: all prints enabled +// below DEBUG_LEVEL. +// DEBUG_LEVEL -> 0 all prints disabled. +// DEBUG_LEVEL -> 1 enable DEBUG_PRINTF() statements +// DEBUG_LEVEL -> 2 enable DEBUG1_PRINTF() statements and earlier +// DEBUG_LEVEL -> 3 all prints enabled. +#define DEBUG_LEVEL 1 + #ifdef DEBUG + +#if DEBUG_LEVEL >= 1 #define DEBUG_PRINTF(...) std::printf(__VA_ARGS__) -#else -#define DEBUG_PRINTF(...) \ - do { \ - } while (0) #endif +#if DEBUG_LEVEL >= 2 +#define DEBUG1_PRINTF(...) std::printf(__VA_ARGS__) +#endif + +#if DEBUG_LEVEL >= 3 +#define DEBUG2_PRINTF(...) std::printf(__VA_ARGS__) +#endif + +#endif + +#ifndef DEBUG_PRINTF +#define DEBUG_PRINTF(...) do {} while (0) #endif +#ifndef DEBUG1_PRINTF +#define DEBUG1_PRINTF(...) do {} while (0) +#endif +#ifndef DEBUG2_PRINTF +#define DEBUG2_PRINTF(...) do {} while (0) +#endif + +#endif \ No newline at end of file diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index 2bcd3c1..bea7327 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -87,14 +87,14 @@ struct Workset { std::set_difference(candidates.begin(), end, c.begin(), c.end(), std::back_inserter(tmp)); candidates = std::move(tmp); - DEBUG_PRINTF("candidates: %lu\n\r", candidates.size()); + DEBUG1_PRINTF("candidates: %lu\n\r", candidates.size()); Cube newCube(c.size() + 1); Cube lowestHashCube(newCube.size()); Cube rotatedCube(newCube.size()); for (const auto &p : candidates) { - DEBUG_PRINTF("(%2d %2d %2d)\n\r", p.x(), p.y(), p.z()); + DEBUG2_PRINTF("(%2d %2d %2d)\n\r", p.x(), p.y(), p.z()); int ax = (p.x() < 0) ? 1 : 0; int ay = (p.y() < 0) ? 1 : 0; int az = (p.z() < 0) ? 1 : 0; From 064b9739db23f3bdc7df9cb619bf95bd73941a48 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Thu, 10 Aug 2023 08:13:52 +0300 Subject: [PATCH 22/42] Hack Cube struct into 8-bytes This is v3 reversion of this hack: Previously the uint8_t bit-field actually caused Cube to be 16-bytes due to padding. Bitpack/Hack the size, is_shared flag and memory address into into private struct bits_t. This halves the Cube struct size. Note: If we get any segfaults from de-referencing the pointer returned by get() helper this hack must be reverted. Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/include/cube.hpp | 74 ++++++++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 23 deletions(-) diff --git a/cpp/include/cube.hpp b/cpp/include/cube.hpp index f612ef4..e6719a7 100644 --- a/cpp/include/cube.hpp +++ b/cpp/include/cube.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "utils.hpp" @@ -45,20 +46,47 @@ using XYZSet = std::unordered_set>; struct Cube { private: - struct { - uint8_t is_shared : 1; - uint8_t size : 7; // MAX 127 - } bits; - XYZ *array = nullptr; - - static_assert(sizeof(bits) == sizeof(uint8_t)); + // cube memory is stored two ways: + // normal, new'd buffer + // shared, external memory. + + struct bits_t { + uint64_t is_shared : 1; + uint64_t size : 7; // MAX 127 + uint64_t addr : 56; // low 56-bits of memory address. + }; + // fields + bits_t fields; + + static_assert(sizeof(bits_t) == sizeof(void*)); + + static XYZ *get(bits_t key) { + // pointer bit-hacking: + uint64_t addr = key.addr; + return reinterpret_cast(addr); + } + static bits_t put(bool is_shared, int size, XYZ *addr) { + // mask off top byte from the memory address to fit it into bits_t::addr + // on x86-64 it is not used by the hardware (yet). + // This hack actually saves 8 bytes because previously + // the uint8_t caused padding to 16 bytes. + // @note if we get segfaults dereferencing get(fields) + // then this is the problem and this hack must be undone. + uint64_t tmp = reinterpret_cast((void *)addr); + tmp &= 0xffffffffffffff; + bits_t bits; + bits.addr = tmp; + bits.is_shared = is_shared; + bits.size = size; + return bits; + } public: // Empty cube - Cube() : bits{0, 0} {} + Cube() : fields{put(0, 0, nullptr)} {} // Cube with N capacity - explicit Cube(uint8_t N) : bits{0, N}, array(new XYZ[bits.size]) {} + explicit Cube(uint8_t N) : fields{put(0,N, new XYZ[N])} {} // Construct from pieces Cube(std::initializer_list il) : Cube(il.size()) { std::copy(il.begin(), il.end(), begin()); } @@ -69,20 +97,23 @@ struct Cube { // Construct from external source. // Cube shares this the memory until modified. // Caller guarantees the memory given will live longer than *this - Cube(const XYZ *start, uint8_t n) : bits{1, n}, array(const_cast(start)) {} + Cube(const XYZ *start, uint8_t n) : fields{put(1,n,const_cast(start))} {} // Copy ctor. Cube(const Cube ©) : Cube(copy.size()) { std::copy(copy.begin(), copy.end(), begin()); } ~Cube() { + bits_t bits = fields; if (!bits.is_shared) { - delete[] array; + delete[] get(bits); } } friend void swap(Cube &a, Cube &b) { using std::swap; - swap(a.array, b.array); - swap(a.bits, b.bits); + bits_t abits = a.fields; + bits_t bbits = b.fields; + a.fields = bbits; + b.fields = abits; } Cube(Cube &&mv) : Cube() { swap(*this, mv); } @@ -98,19 +129,15 @@ struct Cube { return *this; } - size_t size() const { return bits.size; } + size_t size() const { return fields.size; } XYZ *data() { - if (bits.is_shared) { - // lift to RAM: this should never happen really. - Cube tmp(array, bits.size); - swap(*this, tmp); - std::printf("Bad use of Cube\n"); - } - return array; - } + return get(fields); + } - const XYZ *data() const { return array; } + const XYZ *data() const { + return get(fields); + } XYZ *begin() { return data(); } @@ -140,6 +167,7 @@ struct Cube { } }; +static_assert(sizeof(Cube) == 8, "Unexpected sizeof(Cube) for Cube"); static_assert(std::is_move_assignable_v, "Cube must be moveable"); static_assert(std::is_swappable_v, "Cube must swappable"); From ca14b5501559f0fd953da0241a0e8e1b6087bc42 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Fri, 11 Aug 2023 19:54:07 +0300 Subject: [PATCH 23/42] Hashy const qualifiers. - Small changes diffed. Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/include/cube.hpp | 6 +++--- cpp/include/hashes.hpp | 16 ++++++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/cpp/include/cube.hpp b/cpp/include/cube.hpp index e6719a7..72b9d97 100644 --- a/cpp/include/cube.hpp +++ b/cpp/include/cube.hpp @@ -47,8 +47,8 @@ using XYZSet = std::unordered_set>; struct Cube { private: // cube memory is stored two ways: - // normal, new'd buffer - // shared, external memory. + // normal, new'd buffer: is_shared == false + // shared, external memory: is_shared == true struct bits_t { uint64_t is_shared : 1; @@ -59,7 +59,7 @@ struct Cube { bits_t fields; static_assert(sizeof(bits_t) == sizeof(void*)); - + // extract the pointer from bits_t static XYZ *get(bits_t key) { // pointer bit-hacking: uint64_t addr = key.addr; diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index 09feeed..b154d2a 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -28,7 +28,7 @@ using CubeSet = std::unordered_set>; struct Hashy { struct Subsubhashy { CubeSet set; - std::shared_mutex set_mutex; + mutable std::shared_mutex set_mutex; template void insert(CubeT &&c) { @@ -36,12 +36,16 @@ struct Hashy { set.emplace(std::forward(c)); } - bool contains(const Cube &c) { + bool contains(const Cube &c) const { std::shared_lock lock(set_mutex); - return set.count(c); + auto itr = set.find(c); + if(itr != set.end()) { + return true; + } + return false; } - auto size() { + auto size() const { std::shared_lock lock(set_mutex); return set.size(); } @@ -59,7 +63,7 @@ struct Hashy { // printf("new size %ld\n\r", byshape[shape].size()); } - auto size() { + auto size() const { size_t sum = 0; for (auto &set : byhash) { auto part = set.size(); @@ -95,7 +99,7 @@ struct Hashy { set.insert(std::forward(c)); } - auto size() { + auto size() const { size_t sum = 0; DEBUG_PRINTF("%ld maps by shape\n\r", byshape.size()); for (auto &set : byshape) { From 74c0dc31db4e191a0a5816b4d9dfa7b290208e66 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Sat, 12 Aug 2023 02:40:59 +0300 Subject: [PATCH 24/42] cubes: Refactor thread scheduling - Launching new threads is expensive. Refactor the cubes.cpp threading code so that The started threads are kept running until the main process is complete. - Allow main thread do a it's preparation work in parallel with the running Workset. (The next cache file can be loaded while the old one is being processed.) Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/src/cubes.cpp | 138 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 104 insertions(+), 34 deletions(-) diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index bea7327..ac04e12 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include "cache.hpp" #include "cube.hpp" @@ -19,22 +21,27 @@ const int PERF_STEP = 500; struct Workset { std::mutex mu; + + CacheReader cr; CubeIterator _begin_total; CubeIterator _begin; CubeIterator _end; Hashy &hashes; XYZ targetShape, shape, expandDim; bool notSameShape; - Workset(ShapeRange &data, Hashy &hashes, XYZ targetShape, XYZ shape, XYZ expandDim, bool notSameShape) - : _begin_total(data.begin()) - , _begin(data.begin()) - , _end(data.end()) - , hashes(hashes) + Workset(Hashy &hashes, XYZ targetShape, XYZ shape, XYZ expandDim, bool notSameShape) + : hashes(hashes) , targetShape(targetShape) , shape(shape) , expandDim(expandDim) , notSameShape(notSameShape) {} + void setRange(ShapeRange &data) { + _begin_total = data.begin(); + _begin = data.begin(); + _end = data.end(); + } + struct Subset { CubeIterator _begin, _end; bool valid; @@ -131,26 +138,69 @@ struct Workset { }; struct Worker { - Workset &ws; + std::shared_ptr ws; int id; - Worker(Workset &ws_, int id_) : ws(ws_), id(id_) {} + int state = 3; // 1 == completed/waiting for job, 2 == processing, 3 == job assigned. + std::mutex mtx; + std::condition_variable cond; + std::condition_variable cond2; + std::thread thr; + + Worker(int id_) : id(id_), thr(&Worker::run, this) {} + ~Worker() { + std::unique_lock lock(mtx); + state = 0; + cond.notify_one(); + lock.unlock(); + thr.join(); + } + + void launch(std::shared_ptr ws_) { + std::unique_lock lock(mtx); + while(state > 1) { + cond2.wait(lock); + } + ws = ws_; + state = 3; + cond.notify_one(); + } + + void sync() { + std::unique_lock lock(mtx); + while(state > 1) { + cond2.wait(lock); + } + ws.reset(); + } + void run() { - // std::printf("start %d\n", id); - auto subset = ws.getPart(); - while (subset.valid) { - if (id == 0) { - std::printf(" %5.2f%%\r", subset.percent); - std::flush(std::cout); - } - // std::cout << id << " next subset " << &*subset.begin() << " to " << &*subset.end() << "\n"; - for (auto &c : subset) { - // std::printf("%p\n", (void *)&c); - // c.print(); - ws.expand(c); + std::unique_lock lock(mtx); + std::printf("thread nro. %d started.\n", id); + while(state) { + state = 1; + cond2.notify_one(); + while(state == 1) + cond.wait(lock); + if(!state) + return; + state = 2; + // std::printf("start %d\n", id); + auto subset = ws->getPart(); + while (subset.valid) { + if (id == 0) { + std::printf(" %5.2f%%\r", subset.percent); + std::flush(std::cout); + } + // std::cout << id << " next subset " << &*subset.begin() << " to " << &*subset.end() << "\n"; + for (auto &c : subset) { + // std::printf("%p\n", (void *)&c); + // c.print(); + ws->expand(c); + } + subset = ws->getPart(); } - subset = ws.getPart(); + // std::printf("finished %d\n", id); } - // std::printf("finished %d\n", id); } }; @@ -185,10 +235,19 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c } std::printf("N = %d || generating new cubes from %lu base cubes.\n\r", n, base->size()); hashes.init(n); + + // Start worker threads. + std::deque workers; + for (int i = 0; i < threads; ++i) { + workers.emplace_back(i); + } + + uint64_t totalSum = 0; auto start = std::chrono::steady_clock::now(); uint32_t totalOutputShapes = hashes.byshape.size(); uint32_t outShapeCount = 0; + auto prevShapes = Hashy::generateShapes(n - 1); for (auto &tup : hashes.byshape) { outShapeCount++; @@ -210,13 +269,14 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c if (diffy == 1) if (shape.y() == shape.x()) diffx = 1; - std::printf(" shape %d %d %d\n\r", shape.x(), shape.y(), shape.z()); + auto ws = std::make_shared(hashes, targetShape, shape, XYZ(diffx, diffy, diffz), abssum); if (use_split_cache) { // load cache file only for this shape std::string cachefile = base_path + "cubes_" + std::to_string(n - 1) + "_" + std::to_string(prevShapes[sid].x()) + "-" + std::to_string(prevShapes[sid].y()) + "-" + std::to_string(prevShapes[sid].z()) + ".bin"; - cr.loadFile(cachefile); + ws->cr.loadFile(cachefile); + base = &ws->cr; // cr.printHeader(); } auto s = base->getCubesByShape(sid); @@ -224,20 +284,26 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c std::printf("ERROR caches shape does not match expected shape!\n"); exit(-1); } - // std::printf("starting %d threads\n\r", threads); - std::vector ts; - Workset ws(s, hashes, targetShape, shape, XYZ(diffx, diffy, diffz), abssum); - std::vector workers; - ts.reserve(threads); - workers.reserve(threads); - for (int i = 0; i < threads; ++i) { - workers.emplace_back(ws, i); - ts.emplace_back(&Worker::run, std::ref(workers[i])); + + ws->setRange(s); + + // Wait for jobs to complete. + for (auto& thr : workers) { + thr.sync(); } - for (int i = 0; i < threads; ++i) { - ts[i].join(); + std::printf(" shape %d %d %d\n\r", shape.x(), shape.y(), shape.z()); + // launch the new jobs. + // Because the workset is held by shared_ptr + // main thread can do above preparation work in parallel + // while the jobs are running. + for (auto& thr : workers) { + thr.launch(ws); } } + // Wait for jobs to complete. + for (auto& thr : workers) { + thr.sync(); + } std::printf(" num: %lu\n\r", hashes.byshape[targetShape].size()); totalSum += hashes.byshape[targetShape].size(); if (write_cache && split_cache) { @@ -252,6 +318,10 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c } } } + + // Stop the workers. + workers.clear(); + if (write_cache && !split_cache) { Cache::save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); } From 9ea1c7eaf0ab7b630d1b276f0b85398cd669b0ad Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Sun, 13 Aug 2023 20:13:47 +0300 Subject: [PATCH 25/42] CacheWriter class Implement replacement for Cache::save() CacheWriter should produce identical files to the old code, but is slightly faster as it doesn't wait for the file finalization. The old code still exists as reference but nothing is using it except tests. - libmappedfile would allow the serialization process to be parallelized. (WIP, Not implemented yet.) - Move Header ShapeEntry into cacheformat namespace - Implement CacheWriter - Update cubes.cpp to use the new CacheWriter - Cube::copyout() helper. Idea for this helper is that if the cube representation is something else than plain XYZ array. Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/include/cube.hpp | 7 +++ cpp/include/hashes.hpp | 2 +- cpp/include/newCache.hpp | 74 +++++++++++++++--------- cpp/src/cubes.cpp | 12 ++-- cpp/src/newCache.cpp | 119 +++++++++++++++++++++++++++++++++++++-- 5 files changed, 178 insertions(+), 36 deletions(-) diff --git a/cpp/include/cube.hpp b/cpp/include/cube.hpp index 72b9d97..e92e570 100644 --- a/cpp/include/cube.hpp +++ b/cpp/include/cube.hpp @@ -165,6 +165,13 @@ struct Cube { void print() const { for (auto &p : *this) std::printf(" (%2d %2d %2d)\n\r", p.x(), p.y(), p.z()); } + + /** + * Copy cube data into destination buffer. + */ + void copyout(int num, XYZ* dest) const { + std::copy_n(begin(), num, dest); + } }; static_assert(sizeof(Cube) == 8, "Unexpected sizeof(Cube) for Cube"); diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index b154d2a..49462d2 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -101,7 +101,7 @@ struct Hashy { auto size() const { size_t sum = 0; - DEBUG_PRINTF("%ld maps by shape\n\r", byshape.size()); + DEBUG1_PRINTF("%ld maps by shape\n\r", byshape.size()); for (auto &set : byshape) { auto part = set.second.size(); DEBUG1_PRINTF("bucket [%2d %2d %2d]: %ld\n", set.first.x(), set.first.y(), set.first.z(), part); diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index ca3d71f..242a273 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -4,11 +4,33 @@ #include #include +#include +#include + #include "cube.hpp" #include "hashes.hpp" #include "mapped_file.hpp" -class Workset; +namespace cacheformat { + static constexpr uint32_t MAGIC = 0x42554350; + static constexpr uint32_t XYZ_SIZE = 3; + static constexpr uint32_t ALL_SHAPES = -1; + + struct Header { + uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 + uint32_t n; // we will never need 32bit but it is nicely aligned + uint32_t numShapes; // defines length of the shapeTable + uint64_t numPolycubes; // total number of polycubes + }; + struct ShapeEntry { + uint8_t dim0; // offset by -1 + uint8_t dim1; // offset by -1 + uint8_t dim2; // offset by -1 + uint8_t reserved; // for alignment + uint64_t offset; // from beginning of file + uint64_t size; // in bytes should be multiple of XYZ_SIZE + }; +}; class CubeIterator { public: @@ -52,7 +74,6 @@ class CubeIterator { friend bool operator<(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr < b.m_ptr; }; friend bool operator>(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr > b.m_ptr; }; friend bool operator!=(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr != b.m_ptr; }; - //friend class Workset; private: uint32_t n; @@ -101,25 +122,6 @@ class CacheReader : public ICache { uint32_t numShapes() override { return header->numShapes; }; operator bool() { return fileLoaded_; } - static constexpr uint32_t MAGIC = 0x42554350; - static constexpr uint32_t XYZ_SIZE = 3; - static constexpr uint32_t ALL_SHAPES = -1; - - struct Header { - uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 - uint32_t n; // we will never need 32bit but it is nicely aligned - uint32_t numShapes; // defines length of the shapeTable - uint64_t numPolycubes; // total number of polycubes - }; - struct ShapeEntry { - uint8_t dim0; // offset by -1 - uint8_t dim1; // offset by -1 - uint8_t dim2; // offset by -1 - uint8_t reserved; // for alignment - uint64_t offset; // from beginning of file - uint64_t size; // in bytes should be multiple of XYZ_SIZE - }; - // Do begin() and end() make sense for CacheReader // If the cache file provides data for more than single shape? // The data might not even be mapped contiguously to save memory. @@ -138,15 +140,15 @@ class CacheReader : public ICache { private: std::shared_ptr file_; - std::unique_ptr> header_; - std::unique_ptr> shapes_; + std::unique_ptr> header_; + std::unique_ptr> shapes_; std::unique_ptr> xyz_; std::string path_; bool fileLoaded_; - const Header dummyHeader; - const Header* header; - const ShapeEntry* shapes; + const cacheformat::Header dummyHeader; + const cacheformat::Header* header; + const cacheformat::ShapeEntry* shapes; }; class FlatCache : public ICache { @@ -180,4 +182,24 @@ class FlatCache : public ICache { size_t size() override { return allXYZs.size() / n / sizeof(XYZ); } }; +class CacheWriter { +protected: + // CacheWriter flushes the data in background. + std::deque> m_flushes; +public: + CacheWriter() {} + ~CacheWriter(); + + /** + * Capture snapshot of the Hashy and write cache file. + * The data may not be entirely flushed before save() returns. + */ + void save(std::string path, Hashy &hashes, uint8_t n); + + /** + * Complete all flushes immediately. + */ + void flush(); +}; + #endif diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index ac04e12..7e5cf66 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -10,7 +10,6 @@ #include #include -#include "cache.hpp" #include "cube.hpp" #include "hashes.hpp" #include "newCache.hpp" @@ -216,7 +215,8 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c hashes.insert(Cube{{XYZ(0, 0, 0)}}, XYZ(0, 0, 0)); std::printf("%ld elements for %d\n\r", hashes.size(), n); if (write_cache) { - Cache::save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); + CacheWriter cw; + cw.save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); } return FlatCache(hashes, n); } @@ -242,6 +242,7 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c workers.emplace_back(i); } + CacheWriter cw; uint64_t totalSum = 0; auto start = std::chrono::steady_clock::now(); @@ -307,7 +308,7 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c std::printf(" num: %lu\n\r", hashes.byshape[targetShape].size()); totalSum += hashes.byshape[targetShape].size(); if (write_cache && split_cache) { - Cache::save(base_path + "cubes_" + std::to_string(n) + "_" + std::to_string(targetShape.x()) + "-" + std::to_string(targetShape.y()) + "-" + + cw.save(base_path + "cubes_" + std::to_string(n) + "_" + std::to_string(targetShape.x()) + "-" + std::to_string(targetShape.y()) + "-" + std::to_string(targetShape.z()) + ".bin", hashes, n); } @@ -323,8 +324,11 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c workers.clear(); if (write_cache && !split_cache) { - Cache::save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); + cw.save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); } + + cw.flush(); + auto end = std::chrono::steady_clock::now(); auto dt_ms = std::chrono::duration_cast(end - start).count(); std::printf("took %.2f s\033[0K\n\r", dt_ms / 1000.f); diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index cd9726c..f7af6ed 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -41,16 +41,16 @@ int CacheReader::loadFile(const std::string path) { } // map the header struct - header_ = std::make_unique>(file_, 0); + header_ = std::make_unique>(file_, 0); header = header_->get(); - if (header->magic != MAGIC) { + if (header->magic != cacheformat::MAGIC) { std::printf("error opening file: file not recognized\n"); return 1; } // map the ShapeEntry array: - shapes_ = std::make_unique>(file_, header_->getEndSeek(), (*header_)->numShapes); + shapes_ = std::make_unique>(file_, header_->getEndSeek(), (*header_)->numShapes); shapes = shapes_->get(); size_t datasize = 0; @@ -84,8 +84,8 @@ ShapeRange CacheReader::getCubesByShape(uint32_t i) { for (unsigned int k = 0; k < i; ++k) { offset += shapes[k].size; } - auto index = offset / XYZ_SIZE; - auto num_xyz = shapes[i].size / XYZ_SIZE; + auto index = offset / cacheformat::XYZ_SIZE; + auto num_xyz = shapes[i].size / cacheformat::XYZ_SIZE; // pointers to Cube data: auto start = xyz_->get() + index; auto end = xyz_->get() + index + num_xyz; @@ -106,3 +106,112 @@ void CacheReader::unload() { } CacheReader::~CacheReader() { unload(); } + +CacheWriter::CacheWriter::~CacheWriter() +{ + flush(); +} + + +void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { + if (hashes.size() == 0) return; + + using namespace mapped; + using namespace cacheformat; + + auto file_ = std::make_shared(); + if (file_->openrw(path.c_str(), 0)) { + std::printf("error opening file\n"); + return; + } + + auto header = std::make_unique>(file_, 0); + (*header)->magic = cacheformat::MAGIC; + (*header)->n = n; + (*header)->numShapes = hashes.byshape.size(); + (*header)->numPolycubes = hashes.size(); + + std::vector keys; + keys.reserve((*header)->numShapes); + for (auto &pair : hashes.byshape) keys.push_back(pair.first); + std::sort(keys.begin(), keys.end()); + + auto shapeEntry = std::make_unique>(file_, header->getEndSeek(), (*header)->numShapes); + + uint64_t offset = shapeEntry->getEndSeek(); + size_t num_cubes = 0; + int i = 0; + for (auto &key : keys) { + auto& se = (*shapeEntry)[i++]; + se.dim0 = key.x(); + se.dim1 = key.y(); + se.dim2 = key.z(); + se.reserved = 0; + se.offset = offset; + auto count = hashes.byshape[key].size() ; + num_cubes += count; + se.size = count * XYZ_SIZE * n; + offset += se.size; + } + + // put XYZs + // do this in parallel? + // it takes an long while to write out the file. + // note: we are at peak memory use in this function. + + auto xyz = std::make_unique>(file_, (*shapeEntry)[0].offset, num_cubes * n); + auto put = xyz->get(); + + for (auto &key : keys) { + for (auto &subset : hashes.byshape[key].byhash) { + auto itr = subset.set.begin(); + while(itr != subset.set.end()) { + static_assert(sizeof(XYZ) == XYZ_SIZE); + assert(itr->size() == n); + itr->copyout(n, put); + put += n; + ++itr; + } + } + } + // move the resources into lambda and async launch it. + // the file is finalized in background. + m_flushes.emplace_back(std::async(std::launch::async, [ + file = std::move(file_), + header = std::move(header), + shapeEntry = std::move(shapeEntry), + xyz = std::move(xyz)]() mutable { + // flush. + header->flush(); + shapeEntry->flush(); + xyz->flush(); + // Truncate file to proper size. + file->truncate(xyz->getEndSeek()); + file->close(); + xyz.reset(); + shapeEntry.reset(); + header.reset(); + file.reset(); + })); + + // cleanup completed flushes. (don't wait) + auto rm = std::remove_if(m_flushes.begin(), m_flushes.end(), [](auto& fut) { + if(fut.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { + fut.get(); + return true; + } + return false; + }); + m_flushes.erase(rm, m_flushes.end()); + + std::printf("saved %s, %d unfinished.\n\r", path.c_str(), (int)m_flushes.size()); +} + +void CacheWriter::flush() +{ + for(auto& fut : m_flushes) { + fut.get(); + } + m_flushes.clear(); +} + From d850fdbea0f8ee5ad2c617c6ace86b7d98167626 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Mon, 14 Aug 2023 04:23:15 +0300 Subject: [PATCH 26/42] CacheWriter: Parallel serialization - CacheWriter now uses thread pool and copies the Hashy using worker threads. This would not be possible without libmapped_file. (N=13 completes now in less than 310 seconds, depends on disk) - Add nice progress bar Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/include/newCache.hpp | 70 ++++++++++------- cpp/src/cubes.cpp | 4 +- cpp/src/newCache.cpp | 164 ++++++++++++++++++++++++++++----------- 3 files changed, 163 insertions(+), 75 deletions(-) diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 242a273..888ff14 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -1,36 +1,38 @@ #pragma once #ifndef OPENCUBES_NEWCACHE_HPP #define OPENCUBES_NEWCACHE_HPP +#include #include -#include - #include -#include +#include +#include +#include +#include #include "cube.hpp" #include "hashes.hpp" #include "mapped_file.hpp" namespace cacheformat { - static constexpr uint32_t MAGIC = 0x42554350; - static constexpr uint32_t XYZ_SIZE = 3; - static constexpr uint32_t ALL_SHAPES = -1; - - struct Header { - uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 - uint32_t n; // we will never need 32bit but it is nicely aligned - uint32_t numShapes; // defines length of the shapeTable - uint64_t numPolycubes; // total number of polycubes - }; - struct ShapeEntry { - uint8_t dim0; // offset by -1 - uint8_t dim1; // offset by -1 - uint8_t dim2; // offset by -1 - uint8_t reserved; // for alignment - uint64_t offset; // from beginning of file - uint64_t size; // in bytes should be multiple of XYZ_SIZE - }; +static constexpr uint32_t MAGIC = 0x42554350; +static constexpr uint32_t XYZ_SIZE = 3; +static constexpr uint32_t ALL_SHAPES = -1; + +struct Header { + uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 + uint32_t n; // we will never need 32bit but it is nicely aligned + uint32_t numShapes; // defines length of the shapeTable + uint64_t numPolycubes; // total number of polycubes +}; +struct ShapeEntry { + uint8_t dim0; // offset by -1 + uint8_t dim1; // offset by -1 + uint8_t dim2; // offset by -1 + uint8_t reserved; // for alignment + uint64_t offset; // from beginning of file + uint64_t size; // in bytes should be multiple of XYZ_SIZE }; +}; // namespace cacheformat class CubeIterator { public: @@ -183,18 +185,32 @@ class FlatCache : public ICache { }; class CacheWriter { -protected: - // CacheWriter flushes the data in background. - std::deque> m_flushes; -public: - CacheWriter() {} + protected: + std::mutex m_mtx; + std::condition_variable m_run; + std::condition_variable m_wait; + bool m_active = true; + + // Jobs that flush and finalize the written file. + std::deque> m_flushes; + + // Temporary copy jobs into the memory mapped file. + std::deque> m_copy; + + // thread pool executing the jobs. + std::deque m_flushers; + + void run(); + + public: + CacheWriter(int num_threads = 8); ~CacheWriter(); /** * Capture snapshot of the Hashy and write cache file. * The data may not be entirely flushed before save() returns. */ - void save(std::string path, Hashy &hashes, uint8_t n); + void save(std::string path, Hashy& hashes, uint8_t n); /** * Complete all flushes immediately. diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index 7e5cf66..1630bb6 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -215,7 +215,7 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c hashes.insert(Cube{{XYZ(0, 0, 0)}}, XYZ(0, 0, 0)); std::printf("%ld elements for %d\n\r", hashes.size(), n); if (write_cache) { - CacheWriter cw; + CacheWriter cw(1); cw.save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); } return FlatCache(hashes, n); @@ -242,7 +242,7 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c workers.emplace_back(i); } - CacheWriter cw; + CacheWriter cw(threads); uint64_t totalSum = 0; auto start = std::chrono::steady_clock::now(); diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index f7af6ed..4d95792 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -1,8 +1,4 @@ -#include "../include/newCache.hpp" - -#include -#include -#include +#include "newCache.hpp" #include @@ -107,11 +103,54 @@ void CacheReader::unload() { CacheReader::~CacheReader() { unload(); } -CacheWriter::CacheWriter::~CacheWriter() -{ +CacheWriter::CacheWriter(int num_threads) { + for (int i = 0; i < num_threads; ++i) { + m_flushers.emplace_back(&CacheWriter::run, this); + } +} + +CacheWriter::CacheWriter::~CacheWriter() { flush(); + // stop the threads. + std::unique_lock lock(m_mtx); + m_active = false; + m_run.notify_all(); + lock.unlock(); + for (auto &thr : m_flushers) thr.join(); } +void CacheWriter::run() { + std::unique_lock lock(m_mtx); + while (m_active) { + // do copy jobs: + if (!m_copy.empty()) { + auto task = std::move(m_copy.front()); + m_copy.pop_front(); + lock.unlock(); + + task(); + + lock.lock(); + continue; + } + // file flushes: + if (!m_flushes.empty()) { + auto task = std::move(m_flushes.front()); + m_flushes.pop_front(); + lock.unlock(); + + task(); + + lock.lock(); + continue; + } + // notify that we are done here. + m_wait.notify_one(); + // wait for jobs. + m_run.wait(lock); + } + m_wait.notify_one(); +} void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { if (hashes.size() == 0) return; @@ -125,7 +164,7 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { return; } - auto header = std::make_unique>(file_, 0); + auto header = std::make_shared>(file_, 0); (*header)->magic = cacheformat::MAGIC; (*header)->n = n; (*header)->numShapes = hashes.byshape.size(); @@ -136,51 +175,91 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { for (auto &pair : hashes.byshape) keys.push_back(pair.first); std::sort(keys.begin(), keys.end()); - auto shapeEntry = std::make_unique>(file_, header->getEndSeek(), (*header)->numShapes); + auto shapeEntry = std::make_shared>(file_, header->getEndSeek(), (*header)->numShapes); uint64_t offset = shapeEntry->getEndSeek(); size_t num_cubes = 0; int i = 0; for (auto &key : keys) { - auto& se = (*shapeEntry)[i++]; + auto &se = (*shapeEntry)[i++]; se.dim0 = key.x(); se.dim1 = key.y(); se.dim2 = key.z(); se.reserved = 0; se.offset = offset; - auto count = hashes.byshape[key].size() ; + auto count = hashes.byshape[key].size(); num_cubes += count; se.size = count * XYZ_SIZE * n; offset += se.size; } // put XYZs - // do this in parallel? - // it takes an long while to write out the file. - // note: we are at peak memory use in this function. + // Serialize large CubeSet(s) in parallel. - auto xyz = std::make_unique>(file_, (*shapeEntry)[0].offset, num_cubes * n); + auto xyz = std::make_shared>(file_, (*shapeEntry)[0].offset, num_cubes * n); auto put = xyz->get(); + auto copyrange = [n](CubeSet::iterator itr, CubeSet::iterator end, XYZ *dest) -> void { + while (itr != end) { + static_assert(sizeof(XYZ) == XYZ_SIZE); + assert(itr->size() == n); + itr->copyout(n, dest); + dest += n; + ++itr; + } + }; + + auto time_start = std::chrono::steady_clock::now(); for (auto &key : keys) { for (auto &subset : hashes.byshape[key].byhash) { auto itr = subset.set.begin(); - while(itr != subset.set.end()) { - static_assert(sizeof(XYZ) == XYZ_SIZE); - assert(itr->size() == n); - itr->copyout(n, put); - put += n; - ++itr; + + ptrdiff_t dist = subset.set.size(); + // distribute if range is large enough. + auto skip = std::max(4096L, std::max(1L, dist / (signed)m_flushers.size())); + while (dist > skip) { + auto start = itr; + auto dest = put; + + auto inc = std::min(dist, skip); + std::advance(itr, inc); + put += n * inc; + dist = std::distance(itr, subset.set.end()); + + auto done = 100.0f * (std::distance(xyz->get(), put) / float(num_cubes * n)); + std::printf("writing data %5.2f%% ... \r", done); + std::flush(std::cout); + + std::lock_guard lock(m_mtx); + m_copy.emplace_back(std::bind(copyrange, start, itr, dest)); + m_run.notify_all(); + } + // copy remainder, if any. + if (dist) { + std::lock_guard lock(m_mtx); + m_copy.emplace_back(std::bind(copyrange, itr, subset.set.end(), put)); + m_run.notify_all(); + put += n * dist; + + auto done = 100.0f * (std::distance(xyz->get(), put) / float(num_cubes * n)); + std::printf("writing data %5.2f%% ... \r", done); + std::flush(std::cout); } } } - // move the resources into lambda and async launch it. - // the file is finalized in background. - m_flushes.emplace_back(std::async(std::launch::async, [ - file = std::move(file_), - header = std::move(header), - shapeEntry = std::move(shapeEntry), - xyz = std::move(xyz)]() mutable { + + // sanity check: + assert(put == (*xyz).get() + num_cubes * n); + + // sync up. + std::unique_lock lock(m_mtx); + while (!m_copy.empty()) { + m_wait.wait(lock); + } + + // move the resources into flush job. + m_flushes.emplace_back(std::bind( + [](auto &&file, auto &&header, auto &&shapeEntry, auto &&xyz) -> void { // flush. header->flush(); shapeEntry->flush(); @@ -188,30 +267,23 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { // Truncate file to proper size. file->truncate(xyz->getEndSeek()); file->close(); + file.reset(); xyz.reset(); shapeEntry.reset(); header.reset(); - file.reset(); - })); + }, + std::move(file_), std::move(header), std::move(shapeEntry), std::move(xyz))); + m_run.notify_all(); - // cleanup completed flushes. (don't wait) - auto rm = std::remove_if(m_flushes.begin(), m_flushes.end(), [](auto& fut) { - if(fut.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - fut.get(); - return true; - } - return false; - }); - m_flushes.erase(rm, m_flushes.end()); + auto time_end = std::chrono::steady_clock::now(); + auto dt_ms = std::chrono::duration_cast(time_end - time_start).count(); - std::printf("saved %s, %d unfinished.\n\r", path.c_str(), (int)m_flushes.size()); + std::printf("saved %s, took %.2f s\n\r", path.c_str(), dt_ms / 1000.f); } -void CacheWriter::flush() -{ - for(auto& fut : m_flushes) { - fut.get(); +void CacheWriter::flush() { + std::unique_lock lock(m_mtx); + while (!m_flushes.empty()) { + m_wait.wait(lock); } - m_flushes.clear(); } - From 5e0d24567aed37118035c23bc8e083c9232f810e Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Mon, 14 Aug 2023 04:37:48 +0300 Subject: [PATCH 27/42] Remove include/cache.hpp src/cache.cpp The old cache code has been deprecated since CacheWriter arrived: Only user was in tests/src/test_cache.cpp so drop the test case because it doesn't have any impact on the main cubes anymore. - Delete include/cache.hpp src/cache.cpp source files. Hopefully they will not be missed. :-) Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/CMakeLists.txt | 1 - cpp/include/cache.hpp | 35 -------- cpp/src/cache.cpp | 163 ----------------------------------- cpp/tests/src/test_cache.cpp | 10 --- 4 files changed, 209 deletions(-) delete mode 100644 cpp/include/cache.hpp delete mode 100644 cpp/src/cache.cpp delete mode 100644 cpp/tests/src/test_cache.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 05e50f0..78e91a2 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -44,7 +44,6 @@ ConfigureTarget(mapped_file) # Source files add_library(CubeObjs OBJECT "src/cubes.cpp" - "src/cache.cpp" "src/rotations.cpp" "src/newCache.cpp" ) diff --git a/cpp/include/cache.hpp b/cpp/include/cache.hpp deleted file mode 100644 index 6c3480d..0000000 --- a/cpp/include/cache.hpp +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once -#ifndef OPENCUBES_CACHE_HPP -#define OPENCUBES_CACHE_HPP -#include - -#include "hashes.hpp" -#include "utils.hpp" - -struct Cache { - static constexpr uint32_t MAGIC = 0x42554350; - static constexpr uint32_t XYZ_SIZE = 3; - static constexpr uint32_t ALL_SHAPES = -1; - struct Header { - uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 - uint32_t n; // we will never need 32bit but it is nicely aligned - uint32_t numShapes; // defines length of the shapeTable - uint64_t numPolycubes; // total number of polycubes - }; - struct ShapeEntry { - uint8_t dim0; // offset by -1 - uint8_t dim1; // offset by -1 - uint8_t dim2; // offset by -1 - uint8_t reserved; // for alignment - uint64_t offset; // from beginning of file - uint64_t size; // in bytes should be multiple of XYZ_SIZE - }; - - static void save(std::string path, Hashy& hashes, uint8_t n); - static Hashy load(std::string path, uint32_t extractShape = ALL_SHAPES); - - int filedesc; - void* mmap_ptr; -}; - -#endif diff --git a/cpp/src/cache.cpp b/cpp/src/cache.cpp deleted file mode 100644 index 071ff0f..0000000 --- a/cpp/src/cache.cpp +++ /dev/null @@ -1,163 +0,0 @@ -#include "cache.hpp" - -#include -#include -#include -#include -#include - -#include "utils.hpp" - -/* -==================== -cache file header -==================== - -uint32_t magic = "PCUB" -uint32_t n = cache file for n cubes in a polycube -uint32_t numShapes = number of different shapes in cachefile -------- - -==================== -shapetable: -==================== -shapeEntry { - uint8_t dim0 // offset by -1 - uint8_t dim1 // offset by -1 - uint8_t dim2 // offset by -1 - uint8_t reserved - uint64_t offset in file -} -shapeEntry[numShapes] - - -==================== -XYZ data -==================== - -*/ - -void Cache::save(std::string path, Hashy &hashes, uint8_t n) { - if (hashes.size() == 0) return; - std::ofstream ofs(path, std::ios::binary); - Header header; - header.magic = MAGIC; - header.n = n; - header.numShapes = hashes.byshape.size(); - header.numPolycubes = hashes.size(); - ofs.write((const char *)&header, sizeof(header)); - - std::vector keys; - keys.reserve(header.numShapes); - for (auto &pair : hashes.byshape) keys.push_back(pair.first); - std::sort(keys.begin(), keys.end()); - uint64_t offset = sizeof(Header) + header.numShapes * sizeof(ShapeEntry); - for (auto &key : keys) { - ShapeEntry se; - se.dim0 = key.x(); - se.dim1 = key.y(); - se.dim2 = key.z(); - se.reserved = 0; - se.offset = offset; - se.size = hashes.byshape[key].size() * XYZ_SIZE * n; - offset += se.size; - ofs.write((const char *)&se, sizeof(ShapeEntry)); - } - // put XYZs - for (auto &key : keys) { - for (auto &subset : hashes.byshape[key].byhash) - for (const auto &c : subset.set) { - if constexpr (sizeof(XYZ) == XYZ_SIZE) { - ofs.write((const char *)c.data(), sizeof(XYZ) * c.size()); - } else { - for (const auto &p : c) { - ofs.write((const char *)p.data, XYZ_SIZE); - } - } - } - } - - std::printf("saved %s\n\r", path.c_str()); -} - -Hashy Cache::load(std::string path, uint32_t extractShape) { - Hashy cubes; - auto ifs = std::ifstream(path, std::ios::binary); - if (!ifs.is_open()) return cubes; - Header header; - if (!ifs.read((char *)&header, sizeof(header))) { - return cubes; - } - // check magic - if (header.magic != MAGIC) { - return cubes; - } -#ifdef CACHE_LOAD_HEADER_ONLY - std::printf("loading cache file \"%s\" for N = %u", path.c_str(), header.n); - std::printf(", %u shapes, %lu XYZs\n\r", header.numShapes, header.numPolycubes); -#endif - auto cubeSize = XYZ_SIZE * header.n; - DEBUG_PRINTF("cubeSize: %u\n\r", cubeSize); - - for (uint32_t i = 0; i < header.numShapes; ++i) { - ShapeEntry shapeEntry; - if (!ifs.read((char *)&shapeEntry, sizeof(shapeEntry))) { - std::printf("ERROR reading ShapeEntry %u\n\r", i); - exit(-1); - } - if (ALL_SHAPES != extractShape && i != extractShape) continue; -#ifdef CACHE_PRINT_SHAPEENTRIES - std::printf("ShapeEntry %3u: [%2d %2d %2d] offset: 0x%08lx size: 0x%08lx (%ld polycubes)\n\r", i, shapeEntry.dim0, shapeEntry.dim1, shapeEntry.dim2, - shapeEntry.offset, shapeEntry.size, shapeEntry.size / cubeSize); -#endif - if (shapeEntry.size % cubeSize != 0) { - std::printf("ERROR shape block is not divisible by cubeSize!\n\r"); - exit(-1); - } -#ifndef CACHE_LOAD_HEADER_ONLY - // remember pos in file - auto pos = ifs.tellg(); - - // read XYZ contents - ifs.seekg(shapeEntry.offset); - const uint32_t CHUNK_SIZE = 512 * XYZ_SIZE; - uint8_t buf[CHUNK_SIZE] = {0}; - uint64_t buf_offset = 0; - uint32_t numCubes = shapeEntry.size / cubeSize; - XYZ shape(shapeEntry.dim0, shapeEntry.dim1, shapeEntry.dim2); - uint64_t readsize = shapeEntry.size - buf_offset; - if (readsize > CHUNK_SIZE) readsize = CHUNK_SIZE; - if (!ifs.read((char *)&buf, readsize)) { - std::printf("ERROR reading XYZs for Shape %u\n\r", i); - exit(-1); - } - for (uint32_t j = 0; j < numCubes; ++j) { - Cube next(header.n); - for (uint32_t k = 0; k < header.n; ++k) { - // check if buf contains next XYZ - uint64_t curr_offset = j * cubeSize + k * XYZ_SIZE; - if (curr_offset >= buf_offset + CHUNK_SIZE) { - // std::printf("reload buffer\n\r"); - buf_offset += CHUNK_SIZE; - readsize = shapeEntry.size - buf_offset; - if (readsize > CHUNK_SIZE) readsize = CHUNK_SIZE; - if (!ifs.read((char *)&buf, readsize)) { - std::printf("ERROR reading XYZs for Shape %u\n\r", i); - exit(-1); - } - } - - next.data()[k].data[0] = buf[curr_offset - buf_offset + 0]; - next.data()[k].data[1] = buf[curr_offset - buf_offset + 1]; - next.data()[k].data[2] = buf[curr_offset - buf_offset + 2]; - } - cubes.insert(next, shape); - } - - // restore pos - ifs.seekg(pos); -#endif - } - std::printf(" loaded %lu cubes\n\r", cubes.size()); - return cubes; -} diff --git a/cpp/tests/src/test_cache.cpp b/cpp/tests/src/test_cache.cpp deleted file mode 100644 index ae10cbf..0000000 --- a/cpp/tests/src/test_cache.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include - -#include "cache.hpp" - -TEST(CacheTests, TestCacheLoadDoesNotThrow) { EXPECT_NO_THROW(Cache::load("./test_data.bin")); } - -TEST(CacheTests, TestCacheSaveDoesNotThrow) { - auto data = Cache::load("./test_data.bin"); - EXPECT_NO_THROW(Cache::save("./temp.bin", data, 255)); -} \ No newline at end of file From 95c6a07641e690ab96de3463d286d7cc185b10e8 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Mon, 14 Aug 2023 09:56:50 +0300 Subject: [PATCH 28/42] CacheWriter: Fix-up synchronization CacheWriter didn't properly wait for queued job(s) to complete. Fix with counter that is incremented on queue and decremented *after* the task is run. Signed-off-by: Jarmo Tiitto Signed-off-by: JATothrim --- cpp/include/newCache.hpp | 2 ++ cpp/src/newCache.cpp | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 888ff14..c24a06b 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -192,9 +192,11 @@ class CacheWriter { bool m_active = true; // Jobs that flush and finalize the written file. + size_t m_num_flushes = 0; std::deque> m_flushes; // Temporary copy jobs into the memory mapped file. + size_t m_num_copys = 0; std::deque> m_copy; // thread pool executing the jobs. diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index 4d95792..93f15b3 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -131,6 +131,7 @@ void CacheWriter::run() { task(); lock.lock(); + --m_num_copys; continue; } // file flushes: @@ -142,6 +143,7 @@ void CacheWriter::run() { task(); lock.lock(); + --m_num_flushes; continue; } // notify that we are done here. @@ -232,12 +234,14 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { std::lock_guard lock(m_mtx); m_copy.emplace_back(std::bind(copyrange, start, itr, dest)); + ++m_num_copys; m_run.notify_all(); } // copy remainder, if any. if (dist) { std::lock_guard lock(m_mtx); m_copy.emplace_back(std::bind(copyrange, itr, subset.set.end(), put)); + ++m_num_copys; m_run.notify_all(); put += n * dist; @@ -253,7 +257,7 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { // sync up. std::unique_lock lock(m_mtx); - while (!m_copy.empty()) { + while (m_num_copys) { m_wait.wait(lock); } @@ -273,6 +277,7 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { header.reset(); }, std::move(file_), std::move(header), std::move(shapeEntry), std::move(xyz))); + ++m_num_flushes; m_run.notify_all(); auto time_end = std::chrono::steady_clock::now(); @@ -283,7 +288,7 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { void CacheWriter::flush() { std::unique_lock lock(m_mtx); - while (!m_flushes.empty()) { + while (m_num_flushes) { m_wait.wait(lock); } } From e5a7bce4811be3add531d38a8be652c1d75053d9 Mon Sep 17 00:00:00 2001 From: JATothrim Date: Thu, 17 Aug 2023 22:52:27 +0300 Subject: [PATCH 29/42] Update Readme.md to reflect the state of the C++ implementation. The C++ implementation has gained the split cache files and their associated command line parameters since Readme.md was last updated. Document the `./cubes` program usage and how to use the split cache files. Signed-off-by: JATothrim --- cpp/Readme.md | 52 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/cpp/Readme.md b/cpp/Readme.md index ce7de8c..7e34fba 100644 --- a/cpp/Readme.md +++ b/cpp/Readme.md @@ -1,27 +1,71 @@ # C++ implementation of opencubes - uses list representation of coordinates with ones - hashfunction for coordinate is simple concatination of bytes -- can split problem into threads, but performance can be improoved +- can split problem into threads, but performance can be improved ## usage: ```bash ./cubes -n N ``` -options: +### options: ``` +-n --cube_size +the size of polycube to generate up to +This parameter is required. + -t --threads the number of threads to use while generating This parameter is optional. The default value is '1'. -c --use_cache -whether to load cache files +whether to load cache files. +The last N-1 run must have used -w parameter and that process +must have completed without errors. The cache file +must be present under the cache folder. (-f parameter) This parameter is optional. The default value is '0'. -w --write_cache -wheather to save cache files +whether to save cache files +This parameter is optional. The default value is '0'. + +-s --split_cache +whether to save separated cache files per output shape. +requires -w parameter to take affect. +No combined cache file is saved when -s is present. This parameter is optional. The default value is '0'. + +-u --use_split_cache +whether to load separated cache files per output shape. +The last N-1 run must have used -s parameter and that process +must have completed without errors. The split cache file(s) +must be present under the cache folder. (-f parameter) +This parameter is optional. The default value is '0'. + +-f --cache_file_folder +where to store cache files. +This parameter is optional. The default value is './cache/'. ``` +### split cache usage: +Starting with N=9 and beyond it makes sense to use the disk cache system. +To generate starting cache run: +```bash +./cubes -n 9 -w -s +``` + +Above saves of the results into the cache folder (specified with -f parameter) +as split cache files. Next N=10 run can continue processing from where the last N=9 process stopped: +```bash +./cubes -n 10 -w -s -u +``` +The split cache file mode attempts to minimize memory usage. +All following runs can use above command by incrementing the N by one each time. + +If required you can merge the split cache files +back into single file at last run by dropping the `-s` parameter. +Merging the split cache this way however uses vastly more memory. +(Tool should be developed to export/merge the split cache files as standard cube format file) + ## building (cmake) To build a release version (with optimisations , default) ```bash From 9658905fcab738ae87eb7976a54ef55508300f25 Mon Sep 17 00:00:00 2001 From: JATothrim Date: Wed, 19 Jul 2023 20:18:30 +0300 Subject: [PATCH 30/42] Add build time configure options. - Imported commit v2 for next branch. - Current `git rev-list -n1 HEAD`, used compiler and build type and critical settings are embedded into the cubes binary. - `cubes -v` prints now how it was built. - CUBES_MAX_N constant now available from "config.hpp" - CONFIG_PACK_CUBE_ADDR now available from "config.hpp" - New options can be added into "config.hpp.in" - Add anti-goof measure for the read-only config.hpp The config defines can be changed at cmake configure time. Signed-off-by: JATothrim --- cpp/CMakeLists.txt | 27 ++++++++++++++++++++++++++- cpp/config.hpp.in | 18 ++++++++++++++++++ cpp/program.cpp | 7 ++++++- 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 cpp/config.hpp.in diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 78e91a2..9da6b04 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -3,11 +3,36 @@ project(cubes CXX) # default to release build because speed maters. if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE "Release") + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "CMAKE_BUILD_TYPE: Release, Debug or RelWithDebInfo" FORCE) endif() +if(NOT BUILD_CUBES_MAX_N) + set(BUILD_CUBES_MAX_N 20 CACHE STRING "Limit of maximum N Polycubes to be computed" FORCE) +endif() + +if(NOT BUILD_PACK_CUBE_ADDR) + set(BUILD_PACK_CUBE_ADDR 1 CACHE BOOL "Pack Cube struct XYZ memory address into 56-bit field." FORCE) +endif() + +# Try extract current HEAD commit-id in git +find_package(Git) +if(GIT_FOUND) + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-list -n1 HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE RESULT + OUTPUT_VARIABLE CONFIG_GIT_VERSION) + message(STATUS "Set ${CONFIG_GIT_VERSION} to build version info") +endif() + +# generate config.hpp header in build directory. +set(CONFIG_IS_READONLY "Warning: this file is overwritten during build. Do not edit.") +configure_file("config.hpp.in" "config.hpp") + include_directories("include") include_directories("libraries") +include_directories("${PROJECT_BINARY_DIR}") macro(ConfigureTarget Target) # Enable C++17 diff --git a/cpp/config.hpp.in b/cpp/config.hpp.in new file mode 100644 index 0000000..b70f440 --- /dev/null +++ b/cpp/config.hpp.in @@ -0,0 +1,18 @@ +#pragma once +#ifndef OPENCUBES_CONFIG_HPP +#define OPENCUBES_CONFIG_HPP + +// @CONFIG_IS_READONLY@ + +// Version info embedded into the build +#define CONFIG_VERSION "@CONFIG_GIT_VERSION@" +#define CONFIG_BUILDTYPE "@CMAKE_BUILD_TYPE@" +#define CONFIG_COMPILERID "@CMAKE_CXX_COMPILER_ID@ @CMAKE_CXX_COMPILER_VERSION@" + +// Enable Cube struct pointer compaction +#define CONFIG_PACK_CUBE_ADDR @BUILD_PACK_CUBE_ADDR@ + +// Maximum Polycubes N that may be computed +#define CUBES_MAX_N @BUILD_CUBES_MAX_N@ + +#endif diff --git a/cpp/program.cpp b/cpp/program.cpp index d9f0169..5c37ab0 100644 --- a/cpp/program.cpp +++ b/cpp/program.cpp @@ -1,14 +1,16 @@ #include #include "cmdparser.hpp" +#include "config.hpp" #include "cubes.hpp" void configure_arguments(cli::Parser& parser) { - parser.set_required("n", "cube_size", "the size of polycube to generate up to"); + parser.set_optional("n", "cube_size", 1, "the size of polycube to generate up to"); parser.set_optional("t", "threads", 1, "the number of threads to use while generating"); parser.set_optional("c", "use_cache", false, "whether to load cache files"); parser.set_optional("w", "write_cache", false, "wheather to save cache files"); parser.set_optional("s", "split_cache", false, "wheather to save in sparate cache files per output shape"); + parser.set_optional("v", "version", false, "print build version info"); parser.set_optional("u", "use_split_cache", false, "use separate cachefile by input shape"); parser.set_optional("f", "cache_file_folder", "./cache/", "where to store cache files"); } @@ -17,6 +19,9 @@ int main(int argc, char** argv) { cli::Parser parser(argc, argv); configure_arguments(parser); parser.run_and_exit_if_error(); + if (parser.get("v")) { + std::printf("Built from %s, %s, %s\n", CONFIG_VERSION, CONFIG_BUILDTYPE, CONFIG_COMPILERID); + } gen(parser.get("n"), parser.get("t"), parser.get("c"), parser.get("w"), parser.get("s"), parser.get("u"), parser.get("f")); return 0; } From b12bb50a7acd5ab8ccb4a4709ab07b10228410f6 Mon Sep 17 00:00:00 2001 From: JATothrim Date: Mon, 21 Aug 2023 18:01:38 +0300 Subject: [PATCH 31/42] Provide configure option for the Cube struct compaction. - CUBES_PACK_CUBE_XYZ_ADDR CMake option. By default do still enable compaction of Cube struct into 8-bytes. If the hack does not work on some system this can be set to OFF to revert the hack on configure time. - Add assert into Cube::copyout() Signed-off-by: JATothrim --- cpp/CMakeLists.txt | 6 ++--- cpp/config.hpp.in | 2 +- cpp/include/cube.hpp | 54 +++++++++++++++++++++++++++++++------------- 3 files changed, 42 insertions(+), 20 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 9da6b04..68344fe 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -7,11 +7,11 @@ if(NOT CMAKE_BUILD_TYPE) endif() if(NOT BUILD_CUBES_MAX_N) - set(BUILD_CUBES_MAX_N 20 CACHE STRING "Limit of maximum N Polycubes to be computed" FORCE) + set(BUILD_CUBES_MAX_N 20 CACHE STRING "Limit of maximum N Polycubes to be computed") endif() -if(NOT BUILD_PACK_CUBE_ADDR) - set(BUILD_PACK_CUBE_ADDR 1 CACHE BOOL "Pack Cube struct XYZ memory address into 56-bit field." FORCE) +if(NOT CUBES_PACK_CUBE_XYZ_ADDR) + set(CUBES_PACK_CUBE_XYZ_ADDR ON CACHE BOOL "Pack Cube struct XYZ memory address into 56-bit field.") endif() # Try extract current HEAD commit-id in git diff --git a/cpp/config.hpp.in b/cpp/config.hpp.in index b70f440..695addc 100644 --- a/cpp/config.hpp.in +++ b/cpp/config.hpp.in @@ -10,7 +10,7 @@ #define CONFIG_COMPILERID "@CMAKE_CXX_COMPILER_ID@ @CMAKE_CXX_COMPILER_VERSION@" // Enable Cube struct pointer compaction -#define CONFIG_PACK_CUBE_ADDR @BUILD_PACK_CUBE_ADDR@ +#cmakedefine01 CUBES_PACK_CUBE_XYZ_ADDR // Maximum Polycubes N that may be computed #define CUBES_MAX_N @BUILD_CUBES_MAX_N@ diff --git a/cpp/include/cube.hpp b/cpp/include/cube.hpp index e92e570..a13a2e0 100644 --- a/cpp/include/cube.hpp +++ b/cpp/include/cube.hpp @@ -3,12 +3,13 @@ #define OPENCUBES_CUBE_HPP #include +#include #include #include #include #include -#include +#include "config.hpp" #include "utils.hpp" struct XYZ { @@ -49,44 +50,66 @@ struct Cube { // cube memory is stored two ways: // normal, new'd buffer: is_shared == false // shared, external memory: is_shared == true - +#if CUBES_PACK_CUBE_XYZ_ADDR == 1 struct bits_t { uint64_t is_shared : 1; uint64_t size : 7; // MAX 127 uint64_t addr : 56; // low 56-bits of memory address. }; + static_assert(sizeof(bits_t) == sizeof(void *)); +#else + struct bits_t { + uint64_t addr; + uint8_t is_shared : 1; + uint8_t size : 7; // MAX 127 + }; +#endif // fields bits_t fields; - - static_assert(sizeof(bits_t) == sizeof(void*)); // extract the pointer from bits_t static XYZ *get(bits_t key) { // pointer bit-hacking: uint64_t addr = key.addr; +#if CUBES_PACK_CUBE_XYZ_ADDR == 1 +// todo: on x86-64 depending if 5-level-paging is enabled +// either 47-bit or 56-bit should be replicated to the high +// part of the address. Don't know how to do this check yet, +// so the high 8-bits is left zeroed. +// If we get segfaults dereferencing get(fields) +// then CUBES_PACK_CUBE_XYZ_ADDR must be disabled. +#endif return reinterpret_cast(addr); } static bits_t put(bool is_shared, int size, XYZ *addr) { - // mask off top byte from the memory address to fit it into bits_t::addr +#if CUBES_PACK_CUBE_XYZ_ADDR == 1 + // pack the memory address into 56-bits // on x86-64 it is not used by the hardware (yet). // This hack actually saves 8 bytes because previously // the uint8_t caused padding to 16 bytes. - // @note if we get segfaults dereferencing get(fields) - // then this is the problem and this hack must be undone. uint64_t tmp = reinterpret_cast((void *)addr); + assert((tmp & ~0xffffffffffffff) == 0 && "BUG: CUBES_PACK_CUBE_XYZ_ADDR should be disabled"); tmp &= 0xffffffffffffff; bits_t bits; bits.addr = tmp; bits.is_shared = is_shared; bits.size = size; return bits; +#else + bits_t bits; + bits.addr = reinterpret_cast((void *)addr); + bits.is_shared = is_shared; + bits.size = size; + return bits; +#endif } + public: // Empty cube Cube() : fields{put(0, 0, nullptr)} {} // Cube with N capacity - explicit Cube(uint8_t N) : fields{put(0,N, new XYZ[N])} {} + explicit Cube(uint8_t N) : fields{put(0, N, new XYZ[N])} {} // Construct from pieces Cube(std::initializer_list il) : Cube(il.size()) { std::copy(il.begin(), il.end(), begin()); } @@ -97,7 +120,7 @@ struct Cube { // Construct from external source. // Cube shares this the memory until modified. // Caller guarantees the memory given will live longer than *this - Cube(const XYZ *start, uint8_t n) : fields{put(1,n,const_cast(start))} {} + Cube(const XYZ *start, uint8_t n) : fields{put(1, n, const_cast(start))} {} // Copy ctor. Cube(const Cube ©) : Cube(copy.size()) { std::copy(copy.begin(), copy.end(), begin()); } @@ -131,13 +154,9 @@ struct Cube { size_t size() const { return fields.size; } - XYZ *data() { - return get(fields); - } + XYZ *data() { return get(fields); } - const XYZ *data() const { - return get(fields); - } + const XYZ *data() const { return get(fields); } XYZ *begin() { return data(); } @@ -169,12 +188,15 @@ struct Cube { /** * Copy cube data into destination buffer. */ - void copyout(int num, XYZ* dest) const { + void copyout(int num, XYZ *dest) const { + assert(num <= size()); std::copy_n(begin(), num, dest); } }; +#if CUBES_PACK_CUBE_XYZ_ADDR == 1 static_assert(sizeof(Cube) == 8, "Unexpected sizeof(Cube) for Cube"); +#endif static_assert(std::is_move_assignable_v, "Cube must be moveable"); static_assert(std::is_swappable_v, "Cube must swappable"); From 37536552066612544e6c000bdf39188cf3a66b70 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Tue, 15 Aug 2023 23:48:23 +0300 Subject: [PATCH 32/42] Hashy refactor Hashy code is somewhat tangled and there is now known possible data-race in `Hashy::insert()`. This issue cannot be permanently fixed without hiding the `Hashy::byshape` under protected/private and preventing direct access to the member. Replacements to the direct member access will come in later changes. - Move Subhashy and Subsubhashy out from Hashy class. Signed-off-by: JATothrim --- cpp/include/hashes.hpp | 81 +++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index 49462d2..8ed5c04 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -25,54 +25,55 @@ struct HashCube { using CubeSet = std::unordered_set>; -struct Hashy { - struct Subsubhashy { - CubeSet set; - mutable std::shared_mutex set_mutex; +struct Subsubhashy { + CubeSet set; + mutable std::shared_mutex set_mutex; - template - void insert(CubeT &&c) { - std::lock_guard lock(set_mutex); - set.emplace(std::forward(c)); - } + template + void insert(CubeT &&c) { + std::lock_guard lock(set_mutex); + set.emplace(std::forward(c)); + } - bool contains(const Cube &c) const { - std::shared_lock lock(set_mutex); - auto itr = set.find(c); - if(itr != set.end()) { - return true; - } - return false; + bool contains(const Cube &c) const { + std::shared_lock lock(set_mutex); + auto itr = set.find(c); + if (itr != set.end()) { + return true; } + return false; + } - auto size() const { - std::shared_lock lock(set_mutex); - return set.size(); - } - }; - template - struct Subhashy { - std::array byhash; + auto size() const { + std::shared_lock lock(set_mutex); + return set.size(); + } +}; - template - void insert(CubeT &&c) { - HashCube hash; - auto idx = hash(c) % NUM; - auto &set = byhash[idx]; - if (!set.contains(c)) set.insert(std::forward(c)); - // printf("new size %ld\n\r", byshape[shape].size()); - } +template +struct Subhashy { + std::array byhash; - auto size() const { - size_t sum = 0; - for (auto &set : byhash) { - auto part = set.size(); - sum += part; - } - return sum; + template + void insert(CubeT &&c) { + HashCube hash; + auto idx = hash(c) % NUM; + auto &set = byhash[idx]; + if (!set.contains(c)) set.insert(std::forward(c)); + // printf("new size %ld\n\r", byshape[shape].size()); + } + + auto size() const { + size_t sum = 0; + for (auto &set : byhash) { + auto part = set.size(); + sum += part; } - }; + return sum; + } +}; +struct Hashy { std::map> byshape; static std::vector generateShapes(int n) { From cda5b3a2e69f10dc0b0b77f65e2607c455e07670 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Wed, 16 Aug 2023 00:03:58 +0300 Subject: [PATCH 33/42] Hashy refactor: SubsubHashy - Make Subsubhashy a class to note its members aren't directly accessible. - Hide members under protected - Discover class users and fix them. Mainly iterating the SubsubHashy. Signed-off-by: JATothrim --- cpp/include/hashes.hpp | 15 ++++++++++++++- cpp/include/newCache.hpp | 2 +- cpp/src/cubes.cpp | 3 +-- cpp/src/newCache.cpp | 8 ++++---- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index 8ed5c04..6c543cb 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -25,10 +25,12 @@ struct HashCube { using CubeSet = std::unordered_set>; -struct Subsubhashy { +class Subsubhashy { + protected: CubeSet set; mutable std::shared_mutex set_mutex; + public: template void insert(CubeT &&c) { std::lock_guard lock(set_mutex); @@ -48,6 +50,17 @@ struct Subsubhashy { std::shared_lock lock(set_mutex); return set.size(); } + + void clear() { + std::lock_guard lock(set_mutex); + set.clear(); + set.reserve(1); + } + + auto begin() const { return set.begin(); } + auto end() const { return set.end(); } + auto begin() { return set.begin(); } + auto end() { return set.end(); } }; template diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index c24a06b..453bb9e 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -167,7 +167,7 @@ class FlatCache : public ICache { for (auto& [shape, set] : hashes.byshape) { auto begin = allXYZs.data() + allXYZs.size(); for (auto& subset : set.byhash) { - for (auto& cube : subset.set) + for (auto& cube : subset) // allXYZs.emplace_back(allXYZs.end(), subset.set.begin(), subset.set.end()); std::copy(cube.begin(), cube.end(), std::back_inserter(allXYZs)); } diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index 1630bb6..737e398 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -314,8 +314,7 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c } if (split_cache) { for (auto &subset : hashes.byshape[targetShape].byhash) { - subset.set.clear(); - subset.set.reserve(1); + subset.clear(); } } } diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index 93f15b3..26b0514 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -214,9 +214,9 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { auto time_start = std::chrono::steady_clock::now(); for (auto &key : keys) { for (auto &subset : hashes.byshape[key].byhash) { - auto itr = subset.set.begin(); + auto itr = subset.begin(); - ptrdiff_t dist = subset.set.size(); + ptrdiff_t dist = subset.size(); // distribute if range is large enough. auto skip = std::max(4096L, std::max(1L, dist / (signed)m_flushers.size())); while (dist > skip) { @@ -226,7 +226,7 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { auto inc = std::min(dist, skip); std::advance(itr, inc); put += n * inc; - dist = std::distance(itr, subset.set.end()); + dist = std::distance(itr, subset.end()); auto done = 100.0f * (std::distance(xyz->get(), put) / float(num_cubes * n)); std::printf("writing data %5.2f%% ... \r", done); @@ -240,7 +240,7 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { // copy remainder, if any. if (dist) { std::lock_guard lock(m_mtx); - m_copy.emplace_back(std::bind(copyrange, itr, subset.set.end(), put)); + m_copy.emplace_back(std::bind(copyrange, itr, subset.end(), put)); ++m_num_copys; m_run.notify_all(); put += n * dist; From e57e436ea01c53118b0ec582cf7596406c9ec199 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Wed, 16 Aug 2023 00:10:39 +0300 Subject: [PATCH 34/42] Hashy refactor: SubHashy - Make Subhashy a class to note its members aren't directly accessible. - Hide members under protected - Discover class users and fix them. Signed-off-by: JATothrim --- cpp/include/hashes.hpp | 10 ++++++++-- cpp/include/newCache.hpp | 2 +- cpp/src/cubes.cpp | 2 +- cpp/src/newCache.cpp | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index 6c543cb..a0e9d4c 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -64,9 +64,10 @@ class Subsubhashy { }; template -struct Subhashy { +class Subhashy { + protected: std::array byhash; - + public: template void insert(CubeT &&c) { HashCube hash; @@ -84,6 +85,11 @@ struct Subhashy { } return sum; } + + auto begin() const { return byhash.begin(); } + auto end() const { return byhash.end(); } + auto begin() { return byhash.begin(); } + auto end() { return byhash.end(); } }; struct Hashy { diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 453bb9e..1a0a8e8 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -166,7 +166,7 @@ class FlatCache : public ICache { // std::printf("Flatcache %d %p %p\n", n, (void*)allXYZs.data(), (void*)shapes.data()); for (auto& [shape, set] : hashes.byshape) { auto begin = allXYZs.data() + allXYZs.size(); - for (auto& subset : set.byhash) { + for (auto& subset : set) { for (auto& cube : subset) // allXYZs.emplace_back(allXYZs.end(), subset.set.begin(), subset.set.end()); std::copy(cube.begin(), cube.end(), std::back_inserter(allXYZs)); diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index 737e398..b8b0779 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -313,7 +313,7 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c hashes, n); } if (split_cache) { - for (auto &subset : hashes.byshape[targetShape].byhash) { + for (auto &subset : hashes.byshape[targetShape]) { subset.clear(); } } diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index 26b0514..fff4e8d 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -213,7 +213,7 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { auto time_start = std::chrono::steady_clock::now(); for (auto &key : keys) { - for (auto &subset : hashes.byshape[key].byhash) { + for (auto &subset : hashes.byshape[key]) { auto itr = subset.begin(); ptrdiff_t dist = subset.size(); From 67ae09a13288f712bc20455f5423e644dcac6964 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Wed, 16 Aug 2023 00:32:59 +0300 Subject: [PATCH 35/42] Hashy refactor: Hashy class - Finally fix the potential data-race in Hashy::insert(): insert() uses the at() to lookup/create the shape and it is thread-safe. - Make Hashy a class to note its members aren't directly accessible. - Hide members under protected - Discover class users and fix them. - Added begin(), end(), numShapes() and at() replacing direct member access. Signed-off-by: JATothrim --- cpp/include/hashes.hpp | 34 +++++++++++++++++++++++++++++++--- cpp/include/newCache.hpp | 4 ++-- cpp/src/cubes.cpp | 10 +++++----- cpp/src/newCache.cpp | 8 ++++---- 4 files changed, 42 insertions(+), 14 deletions(-) diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index a0e9d4c..7234bd3 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -67,6 +67,7 @@ template class Subhashy { protected: std::array byhash; + public: template void insert(CubeT &&c) { @@ -92,9 +93,12 @@ class Subhashy { auto end() { return byhash.end(); } }; -struct Hashy { +class Hashy { + protected: std::map> byshape; + mutable std::shared_mutex set_mutex; + public: static std::vector generateShapes(int n) { std::vector out; for (int x = 0; x < n; ++x) @@ -109,17 +113,31 @@ struct Hashy { void init(int n) { // create all subhashy which will be needed for N + std::lock_guard lock(set_mutex); for (auto s : generateShapes(n)) byshape[s].size(); std::printf("%ld sets by shape for N=%d\n\r", byshape.size(), n); } + Subhashy<32> &at(XYZ shape) { + std::shared_lock lock(set_mutex); + auto itr = byshape.find(shape); + if (itr != byshape.end()) { + return itr->second; + } + lock.unlock(); + // Not sure if this is supposed to happen normally + // if init() creates all subhashys required. + std::lock_guard elock(set_mutex); + return byshape[shape]; + } + template void insert(CubeT &&c, XYZ shape) { - auto &set = byshape[shape]; - set.insert(std::forward(c)); + at(shape).insert(std::forward(c)); } auto size() const { + std::shared_lock lock(set_mutex); size_t sum = 0; DEBUG1_PRINTF("%ld maps by shape\n\r", byshape.size()); for (auto &set : byshape) { @@ -129,5 +147,15 @@ struct Hashy { } return sum; } + + int numShapes() const { + std::shared_lock lock(set_mutex); + return byshape.size(); + } + + auto begin() const { return byshape.begin(); } + auto end() const { return byshape.end(); } + auto begin() { return byshape.begin(); } + auto end() { return byshape.end(); } }; #endif diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 1a0a8e8..1d62940 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -162,9 +162,9 @@ class FlatCache : public ICache { FlatCache() {} FlatCache(Hashy& hashes, uint8_t n) : n(n) { allXYZs.reserve(hashes.size() * n); - shapes.reserve(hashes.byshape.size()); + shapes.reserve(hashes.numShapes()); // std::printf("Flatcache %d %p %p\n", n, (void*)allXYZs.data(), (void*)shapes.data()); - for (auto& [shape, set] : hashes.byshape) { + for (auto& [shape, set] : hashes) { auto begin = allXYZs.data() + allXYZs.size(); for (auto& subset : set) { for (auto& cube : subset) diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index b8b0779..6b60085 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -246,11 +246,11 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c uint64_t totalSum = 0; auto start = std::chrono::steady_clock::now(); - uint32_t totalOutputShapes = hashes.byshape.size(); + uint32_t totalOutputShapes = hashes.numShapes(); uint32_t outShapeCount = 0; auto prevShapes = Hashy::generateShapes(n - 1); - for (auto &tup : hashes.byshape) { + for (auto &tup : hashes) { outShapeCount++; XYZ targetShape = tup.first; std::printf("process output shape %3d/%d [%2d %2d %2d]\n\r", outShapeCount, totalOutputShapes, targetShape.x(), targetShape.y(), targetShape.z()); @@ -305,15 +305,15 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c for (auto& thr : workers) { thr.sync(); } - std::printf(" num: %lu\n\r", hashes.byshape[targetShape].size()); - totalSum += hashes.byshape[targetShape].size(); + std::printf(" num: %lu\n\r", hashes.at(targetShape).size()); + totalSum += hashes.at(targetShape).size(); if (write_cache && split_cache) { cw.save(base_path + "cubes_" + std::to_string(n) + "_" + std::to_string(targetShape.x()) + "-" + std::to_string(targetShape.y()) + "-" + std::to_string(targetShape.z()) + ".bin", hashes, n); } if (split_cache) { - for (auto &subset : hashes.byshape[targetShape]) { + for (auto &subset : hashes.at(targetShape)) { subset.clear(); } } diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index fff4e8d..d54b057 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -169,12 +169,12 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { auto header = std::make_shared>(file_, 0); (*header)->magic = cacheformat::MAGIC; (*header)->n = n; - (*header)->numShapes = hashes.byshape.size(); + (*header)->numShapes = hashes.numShapes(); (*header)->numPolycubes = hashes.size(); std::vector keys; keys.reserve((*header)->numShapes); - for (auto &pair : hashes.byshape) keys.push_back(pair.first); + for (auto &pair : hashes) keys.push_back(pair.first); std::sort(keys.begin(), keys.end()); auto shapeEntry = std::make_shared>(file_, header->getEndSeek(), (*header)->numShapes); @@ -189,7 +189,7 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { se.dim2 = key.z(); se.reserved = 0; se.offset = offset; - auto count = hashes.byshape[key].size(); + auto count = hashes.at(key).size(); num_cubes += count; se.size = count * XYZ_SIZE * n; offset += se.size; @@ -213,7 +213,7 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { auto time_start = std::chrono::steady_clock::now(); for (auto &key : keys) { - for (auto &subset : hashes.byshape[key]) { + for (auto &subset : hashes.at(key)) { auto itr = subset.begin(); ptrdiff_t dist = subset.size(); From b79f161bab09fb6e74f964c537d69d9742b4a4dc Mon Sep 17 00:00:00 2001 From: JATothrim Date: Sat, 19 Aug 2023 14:23:52 +0300 Subject: [PATCH 36/42] libmappedfile: Provide standalone I/O operations for file Implement few basic operations in mapped::file so that mapped::region is not needed for these: - readAt() and writeAt() - copyAt() is the most interesting because the data copy is done by the operating system. Signed-off-by: JATothrim --- cpp/libraries/mapped_file.cpp | 40 +++++++++++++++++++++++++++++++++++ cpp/libraries/mapped_file.hpp | 23 ++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index e7261dc..f0e4b0f 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -148,6 +148,46 @@ int file::truncate(seekoff_t newsize) { return 0; } +int file::readAt(seekoff_t fpos, len_t size, void* dataout) const +{ + ssize_t rd = pread(fd, dataout, size, fpos); + if (rd != (signed)size) { + std::fprintf(stderr, "Error reading data from file:%s\n", std::strerror(errno)); + return -1; + } + return 0; +} + +int file::writeAt(seekoff_t fpos, len_t size, const void* datain) +{ + std::lock_guard lock(mut); + + ssize_t rd = pwrite(fd, datain, size, fpos); + if (rd != (signed)size) { + std::fprintf(stderr, "Error writing data into file:%s\n", std::strerror(errno)); + return -1; + } + + fd_size = std::max(fd_size, fpos+size); + return 0; +} + +int file::copyAt(std::shared_ptr other, seekoff_t other_fpos, len_t size, seekoff_t dest_fpos) +{ + off64_t srcp = other_fpos; + off64_t dstp = dest_fpos; + ssize_t cpy = ::copy_file_range(other->fd, &srcp, fd, &dstp, size, 0); + if (cpy != (signed)size) { + std::fprintf(stderr, "Error copying file data:%s\n", std::strerror(errno)); + return -1; + } + + std::lock_guard lock(mut); + fd_size = std::max(fd_size, dest_fpos+size); + return 0; +} + + /** * Mapped region POSIX/Linux compatible implementation. */ diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index b86657a..ce3c2e3 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -538,6 +538,29 @@ class file : public std::enable_shared_from_this { */ int truncate(seekoff_t newsize); + /** + * Read @size bytes starting at file offset @fpos + * @note copies [fpos, fpos+size] into [dataout, dataout+size] + * @return non-zero if error occurred. + */ + int readAt(seekoff_t fpos, len_t size, void* dataout) const; + + /** + * Write @size bytes starting at file offset @fpos + * @note copies [datain, datain+size] into [fpos, fpos+size] + * @note the file size after writeAt() is std::max(size(), fpos+size) + * @return non-zero if error occurred. + */ + int writeAt(seekoff_t fpos, len_t size, const void* datain); + + /** + * Copy @size bytes starting at file offset @other_fpos + * from @other file copying the data at @dest_fpos in this file. + * @note copies from other:[other_fpos, other_fpos+size] into this:[dest_fpos, dest_fpos+size] + * @note if other is same as *this the destination range cannot overlap with the source range. + */ + int copyAt(std::shared_ptr other, seekoff_t other_fpos, len_t size, seekoff_t dest_fpos); + /** * Current length of the file * The file EOF (end-of-file) is at this position. From 64278c8e1bfd81cca15d2d21a2613fee38947670 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Wed, 16 Aug 2023 22:20:25 +0300 Subject: [PATCH 37/42] Hashy CubeSwapper Implement way to temporally dump the cube data into disk storage in order to save system memory. For `./cubes -n 13 -w -s -u` run heaptrack tool reports: - total runtime: 26min 18s - peak RSS: 2.4 Gb - peak heap memory: 978 Mb This confirms that only the std::unordered_set<> internal nodes (and the lookup array) are kept in memory. Slow down is expected as accessing an element reads it from the disk. The swap files are named as `storage_.bin` in the cache folder. These files are normally deleted as soon as they are no longer needed. Important!! the process can open so many files simultaneously that the system NOFILE limit is reached. This limit should be raised with `ulimit -n 128000` to avoid terminating the program. The minimum number for open file handles is at least: * 32 - CubeSwapSet is specialized std::unordered_set<> that stores the cube data in a file. - CubeStorage acts as pseudo allocator for the cube data. - CubePtr is the key type inserted in to CubeSwapSet. This only an 64-bit offset into the backing file and CubePtr is owned by CubeStorage that created it. - CubePtr::get(const CubeStorage&) reads out the Cube from the storage. Hashy users are adapted to use it where needed. - Clearing Hashy is now quite fast because there is no memory to be freed for CubePtrs. SubsubHashy::clear() simply deletes the data and the backing file. - Compiling in C++20 mode enables speed up by allowing SubsubHashy::contains() to work with Cube and CubePtr types. Signed-off-by: JATothrim --- cpp/CMakeLists.txt | 1 + cpp/include/cubeSwapSet.hpp | 178 ++++++++++++++++++++++++++++++++++++ cpp/include/hashes.hpp | 86 +++++++++++++---- cpp/include/newCache.hpp | 5 +- cpp/src/cubeSwapSet.cpp | 121 ++++++++++++++++++++++++ cpp/src/cubes.cpp | 45 ++++----- cpp/src/newCache.cpp | 11 ++- 7 files changed, 393 insertions(+), 54 deletions(-) create mode 100644 cpp/include/cubeSwapSet.hpp create mode 100644 cpp/src/cubeSwapSet.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 68344fe..ad05812 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -71,6 +71,7 @@ add_library(CubeObjs OBJECT "src/cubes.cpp" "src/rotations.cpp" "src/newCache.cpp" + "src/cubeSwapSet.cpp" ) ConfigureTarget(CubeObjs) diff --git a/cpp/include/cubeSwapSet.hpp b/cpp/include/cubeSwapSet.hpp new file mode 100644 index 0000000..b7a2e5f --- /dev/null +++ b/cpp/include/cubeSwapSet.hpp @@ -0,0 +1,178 @@ +#pragma once +#ifndef OPENCUBES_CUBE_DISKSWAP_SET_HPP +#define OPENCUBES_CUBE_DISKSWAP_SET_HPP + +#include +#include +#include +#include +#include +#include + +#include "cube.hpp" +#include "mapped_file.hpp" + +/** + * Implement std::unordered_set<> that stores element data in a file. + * + * Cubes stored with size N in the set have constant cost of RAM memory: + * Only the std::unordered_set<> itself and the internals nodes are stored in RAM. + * The element *data* (i.e. XYZ data) is stored in the file. + * The performance cost is that each time the element is accessed + * the data has to be read back from the file. + * (Iterating the entire CubeSwapSet involves reading the entire backing file) + * + * Clearing the CubeSwapSet does not release the backing file space managed by CubeStorage. + * Call to CubeStorage::discard() is required after clearing or destructing + * the CubeSwapSet instance to cleanup the file. + * Elements cannot be removed one-by-one. + */ +class CubeStorage; + +/** + * Overlay that reads the cube data from the backing file. + * CubePtr needs its associated CubeStorage instance to be able to + * access its contents with CubePtr::get() + * The associated CubeStorage owning the CubePtr + * should always be available where CubePtr is used. + */ +class CubePtr { + protected: + mapped::seekoff_t m_seek = 0; + + public: + explicit CubePtr(mapped::seekoff_t offset) : m_seek(offset) {} + CubePtr(const CubePtr& c) : m_seek(c.m_seek) {} + + /** + * Get the Cube pointed by this instance. + */ + Cube get(const CubeStorage& storage) const; + + template + void copyout(const CubeStorage& storage, size_t n, Itr out) const { + auto tmp = get(storage); + std::copy_n(tmp.begin(), n, out); + } + + mapped::seekoff_t seek() const { return m_seek; } +}; + +/** + * Stateful comparator for Cubeptr + */ +class CubePtrEqual { + protected: + const CubeStorage* m_storage = nullptr; + public: + // C++20 feature: + using is_transparent = void; + + CubePtrEqual(const CubeStorage* ctx) : m_storage(ctx) {} + CubePtrEqual(const CubePtrEqual& ctx) : m_storage(ctx.m_storage) {} + + bool operator()(const CubePtr& a, const CubePtr& b) const { return a.get(*m_storage) == b.get(*m_storage); } + + bool operator()(const Cube& a, const CubePtr& b) const { return a == b.get(*m_storage); } + + bool operator()(const CubePtr& a, const Cube& b) const { return a.get(*m_storage) == b; } +}; + +class CubePtrHash { + protected: + const CubeStorage* m_storage = nullptr; + public: + // C++20 feature: + using is_transparent = void; + using transparent_key_equal = CubePtrEqual; + + CubePtrHash(const CubeStorage* ctx) : m_storage(ctx) {} + CubePtrHash(const CubePtrHash& ctx) : m_storage(ctx.m_storage) {} + + size_t operator()(const Cube& x) const { + std::size_t seed = x.size(); + for (auto& p : x) { + auto x = HashXYZ()(p); + seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + return seed; + } + + size_t operator()(const CubePtr& x) const { + auto cube = x.get(*m_storage); + std::size_t seed = cube.size(); + for (auto& p : cube) { + auto x = HashXYZ()(p); + seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2); + } + return seed; + } +}; + +class CubeStorage { + protected: + std::mutex m_mtx; + std::filesystem::path m_fpath; + std::shared_ptr m_file; + std::unique_ptr m_map; + + static std::atomic m_init_num; + const size_t m_cube_size; + mapped::seekoff_t m_prev_seek = 0; + mapped::seekoff_t m_alloc_seek = 0; + + public: + /** + * Initialize Cube file storage + * @param fname directory where to store the backing file. + * @param n The storage is reserved in n sized chunks. + * This should be equal to Cube::size() that are passed into allocate() + * as no other allocation size is supported. + * @note the backing file creation is delayed until allocate() is called first time. + */ + CubeStorage(std::filesystem::path path, size_t n); + ~CubeStorage(); + + // not copyable + CubeStorage(const CubeStorage&) = delete; + CubeStorage& operator=(const CubeStorage&) = delete; + // move constructible: but only if no allocations exists + CubeStorage(CubeStorage&& mv); + CubeStorage& operator=(CubeStorage&& mv) = delete; + + size_t cubeSize() const { return m_cube_size; } + + /** + * Store Cube data into the backing file. + * Returns CubePtr that can be inserted into CubeSwapSet. + * @note cube.size() must be equal to this->cubeSize() + */ + CubePtr allocate(const Cube& cube); + + /** + * Revert the effect of last allocate() + */ + void cancel_allocation(); + + /** + * Retrieve the cube data from the backing file. + */ + Cube read(const CubePtr& x) const; + + /** + * Drop all stored data. + * Shrinks the backing file to zero size and deletes it. + */ + void discard(); +}; + +/** + * CubeStorage enabled std::unordered_set<> + * + * The CubeSwapSet must be constructed with already initialized + * stateful instances of CubePtrEqual and CubePtrHash functors + * that resolve the CubePtr instance using the CubeStorage instance. + */ +using CubeSwapSet = std::unordered_set; + +#endif \ No newline at end of file diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index 7234bd3..79bedfb 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -3,12 +3,15 @@ #define OPENCUBES_HASHES_HPP #include #include +#include +#include #include #include #include #include #include "cube.hpp" +#include "cubeSwapSet.hpp" #include "utils.hpp" struct HashCube { @@ -27,24 +30,30 @@ using CubeSet = std::unordered_set>; class Subsubhashy { protected: - CubeSet set; + CubeStorage set_storage; + CubeSwapSet set; mutable std::shared_mutex set_mutex; public: + explicit Subsubhashy(std::filesystem::path path, size_t n) : set_storage(path, n), set(1, CubePtrHash(&set_storage), CubePtrEqual(&set_storage)) {} + template void insert(CubeT &&c) { std::lock_guard lock(set_mutex); - set.emplace(std::forward(c)); + auto [itr, isnew] = set.emplace(set_storage.allocate(std::forward(c))); + if (!isnew) { + set_storage.cancel_allocation(); + } } +#if __cplusplus > 201703L +// todo: need C++17 equivalent for *generic* +// contains() or find() that accepts both Cube and CubePtr types bool contains(const Cube &c) const { std::shared_lock lock(set_mutex); - auto itr = set.find(c); - if (itr != set.end()) { - return true; - } - return false; + return set.contains(c); } +#endif auto size() const { std::shared_lock lock(set_mutex); @@ -57,27 +66,45 @@ class Subsubhashy { set.reserve(1); } + // Get CubeStorage instance. + // [this->begin(), this->end()] iterated CubePtr's + // Can be resolved with CubePtr::get(this->storage()) + // that returns copy of the data as Cube. + const CubeStorage &storage() const { return set_storage; } + auto begin() const { return set.begin(); } auto end() const { return set.end(); } auto begin() { return set.begin(); } auto end() { return set.end(); } }; -template class Subhashy { protected: - std::array byhash; + std::deque byhash; public: + Subhashy(int NUM, size_t N, std::filesystem::path path) { + for (int i = 0; i < NUM; ++i) { + byhash.emplace_back(path, N); + } + } + template void insert(CubeT &&c) { HashCube hash; - auto idx = hash(c) % NUM; + auto idx = hash(c) % byhash.size(); auto &set = byhash[idx]; - if (!set.contains(c)) set.insert(std::forward(c)); +#if __cplusplus > 201703L + if (set.contains(c)) return; +#endif + set.insert(std::forward(c)); // printf("new size %ld\n\r", byshape[shape].size()); } + void clear() { + for (auto &set : byhash) set.clear(); + } + auto size() const { size_t sum = 0; for (auto &set : byhash) { @@ -95,7 +122,9 @@ class Subhashy { class Hashy { protected: - std::map> byshape; + std::map byshape; + std::filesystem::path base_path; + int N; mutable std::shared_mutex set_mutex; public: @@ -111,24 +140,41 @@ class Hashy { return out; } + explicit Hashy(std::string path = ".") : base_path(path) {} + void init(int n) { // create all subhashy which will be needed for N - std::lock_guard lock(set_mutex); - for (auto s : generateShapes(n)) byshape[s].size(); + N = n; + for (auto s : generateShapes(n)) { + initSubHashy(n, s); + } std::printf("%ld sets by shape for N=%d\n\r", byshape.size(), n); } - Subhashy<32> &at(XYZ shape) { + Subhashy &initSubHashy(int n, XYZ s) { + assert(N == n); + + auto itr = byshape.find(s); + if (itr == byshape.end()) { + auto [itr, isnew] = byshape.emplace(s, Subhashy(32, n, base_path)); + assert(isnew); + itr->second.size(); + return itr->second; + } else { + return itr->second; + } + } + + Subhashy &at(XYZ shape) { std::shared_lock lock(set_mutex); auto itr = byshape.find(shape); if (itr != byshape.end()) { return itr->second; } - lock.unlock(); - // Not sure if this is supposed to happen normally - // if init() creates all subhashys required. - std::lock_guard elock(set_mutex); - return byshape[shape]; + // should never get here... + std::printf("BUG: missing shape [%2d %2d %2d]:\n\r", shape.x(), shape.y(), shape.z()); + std::abort(); + return *((Subhashy *)0); } template diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 1d62940..b9705ce 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -167,9 +167,8 @@ class FlatCache : public ICache { for (auto& [shape, set] : hashes) { auto begin = allXYZs.data() + allXYZs.size(); for (auto& subset : set) { - for (auto& cube : subset) - // allXYZs.emplace_back(allXYZs.end(), subset.set.begin(), subset.set.end()); - std::copy(cube.begin(), cube.end(), std::back_inserter(allXYZs)); + for (auto& cubeptr : subset) + cubeptr.copyout(subset.storage(), n, std::back_inserter(allXYZs)); } auto end = allXYZs.data() + allXYZs.size(); // std::printf(" SR %p %p\n", (void*)begin, (void*)end); diff --git a/cpp/src/cubeSwapSet.cpp b/cpp/src/cubeSwapSet.cpp new file mode 100644 index 0000000..1f391ff --- /dev/null +++ b/cpp/src/cubeSwapSet.cpp @@ -0,0 +1,121 @@ +#include "cubeSwapSet.hpp" + +#include + +std::atomic CubeStorage::m_init_num(0); + +CubeStorage::CubeStorage(std::filesystem::path path, size_t n) : m_cube_size(n) { + // Generate file name: + m_fpath = path / ("storage_" + std::to_string(m_init_num.fetch_add(1)) + ".bin"); +} + +CubeStorage::~CubeStorage() { discard(); } + +CubeStorage::CubeStorage(CubeStorage&& mv) + : m_fpath(std::move(mv.m_fpath)), m_file(std::move(mv.m_file)), m_map(std::move(mv.m_map)), m_cube_size(mv.m_cube_size), m_alloc_seek(mv.m_alloc_seek) { + // no allocations can exist in the moved from object: + assert(m_alloc_seek == 0); +} + +CubePtr CubeStorage::allocate(const Cube& cube) { + std::lock_guard lock(m_mtx); + + if (!m_file) { + using namespace mapped; + // file not open yet. + m_file = std::make_shared(); + if (m_file->openrw(m_fpath.c_str(), 0, file::CREATE | file::RESIZE | file::FSTUNE)) { + std::printf("CubeStorage::allocate() ERROR: Failed to create backing file: %s\n", m_fpath.c_str()); + std::abort(); + } + // Map some data. + // todo: mapped::file could provide following: + // m_file->readAt(offset,size,datain) + // m_file->writeAt(offset,size,dataout) + // so that we don't need this mapping for I/O. + // However the mapped::region::readAt() will be faster if + // the area fits in the region window and is accessed multiple times. + m_map = std::make_unique(m_file, 0, PAGE_SIZE); + } + + if (m_cube_size != cube.size()) { + std::printf("CubeStorage::allocate() ERROR: Cube size different than initialized"); + std::abort(); + } + + m_map->writeAt(m_alloc_seek, m_cube_size * sizeof(XYZ), cube.data()); + + auto fpos = m_alloc_seek; + m_prev_seek = m_alloc_seek; + m_alloc_seek += m_cube_size * sizeof(XYZ); + + return CubePtr(fpos); +} + +void CubeStorage::cancel_allocation() { + std::lock_guard lock(m_mtx); + // last allocation was mistake. + if (m_alloc_seek >= m_cube_size * sizeof(XYZ)) m_alloc_seek -= m_cube_size * sizeof(XYZ); + + // allocate() -> cancel_allocation() must be serialized: + assert(m_alloc_seek == m_prev_seek); +} + +Cube CubeStorage::read(const CubePtr& x) const { + // todo: How to speed up: + // Option 1: + // Memory-map the file in 2 MiB aligned chunks: + // This would speed up reading the same data multiple times. + // Chunk is mapped by rounding down the x.seek() to multiple of 2MiB + // and creating 2MiB sized mapping at that file offset. + // Caching the last file offset used we could detect + // when we have do do jump() to the next "reading window". + // -Plus: let the kernel do the caching for us. + // -Plus: no memory overhead. + // -Minus: if implemented with just single memory-map per CubeStorage + // threads can fight about what chunk is currently mapped. + // Option 2: + // Implement fine-grained read-cache with: + // std::unordered_map + // And begin evicting them once the cache is full using + // cache eviction policy. (E.g. least-recently-used LRU) + // The cache should be made to be thread local + // so it won't interfere with other workers. + // -Plus: We decide how much data to keep in memory + // -Plus: No need to remap the memory. + // -Minus: complicated to implement. + Cube tmp(m_cube_size); + m_map->readAt(x.seek(), m_cube_size * sizeof(XYZ), tmp.data()); + return tmp; +} + +void CubeStorage::discard() { + std::lock_guard lock(m_mtx); + + if (m_file) { + // avoid flushing any more data to disk: + m_map->discard(0, m_map->regionSize()); + m_map.reset(); + m_file->truncate(0); + m_file.reset(); + m_alloc_seek = 0; + + // Try remove the file created... + std::error_code ec; + auto stat = std::filesystem::status(m_fpath, ec); + if (!ec && std::filesystem::is_regular_file(stat)) { + if (!std::filesystem::remove(m_fpath, ec)) { + std::printf("WARN: failed to remove file: %s", m_fpath.c_str()); + } + } else { + std::printf("WARN: failed to get file status: %s", m_fpath.c_str()); + } + } +} + +Cube CubePtr::get(const CubeStorage& storage) const { + // CubePtr::get() is really just an convenience function... + // However this cannot be implemented in the header file because + // CubeStorage definition is not known. + return storage.read(*this); +} diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index 6b60085..89b4e12 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -2,13 +2,13 @@ #include #include +#include #include +#include #include #include #include #include -#include -#include #include "cube.hpp" #include "hashes.hpp" @@ -29,11 +29,7 @@ struct Workset { XYZ targetShape, shape, expandDim; bool notSameShape; Workset(Hashy &hashes, XYZ targetShape, XYZ shape, XYZ expandDim, bool notSameShape) - : hashes(hashes) - , targetShape(targetShape) - , shape(shape) - , expandDim(expandDim) - , notSameShape(notSameShape) {} + : hashes(hashes), targetShape(targetShape), shape(shape), expandDim(expandDim), notSameShape(notSameShape) {} void setRange(ShapeRange &data) { _begin_total = data.begin(); @@ -139,7 +135,7 @@ struct Workset { struct Worker { std::shared_ptr ws; int id; - int state = 3; // 1 == completed/waiting for job, 2 == processing, 3 == job assigned. + int state = 3; // 1 == completed/waiting for job, 2 == processing, 3 == job assigned. std::mutex mtx; std::condition_variable cond; std::condition_variable cond2; @@ -156,7 +152,7 @@ struct Worker { void launch(std::shared_ptr ws_) { std::unique_lock lock(mtx); - while(state > 1) { + while (state != 1) { cond2.wait(lock); } ws = ws_; @@ -166,7 +162,7 @@ struct Worker { void sync() { std::unique_lock lock(mtx); - while(state > 1) { + while (state != 1) { cond2.wait(lock); } ws.reset(); @@ -175,13 +171,11 @@ struct Worker { void run() { std::unique_lock lock(mtx); std::printf("thread nro. %d started.\n", id); - while(state) { + while (state) { state = 1; cond2.notify_one(); - while(state == 1) - cond.wait(lock); - if(!state) - return; + while (state == 1) cond.wait(lock); + if (!state) return; state = 2; // std::printf("start %d\n", id); auto subset = ws->getPart(); @@ -207,7 +201,7 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c if (!std::filesystem::is_directory(base_path)) { std::filesystem::create_directory(base_path); } - Hashy hashes; + Hashy hashes(base_path); if (n < 1) return {}; else if (n == 1) { @@ -248,12 +242,13 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c auto start = std::chrono::steady_clock::now(); uint32_t totalOutputShapes = hashes.numShapes(); uint32_t outShapeCount = 0; - auto prevShapes = Hashy::generateShapes(n - 1); - for (auto &tup : hashes) { + + for (const auto &tup : hashes) { outShapeCount++; XYZ targetShape = tup.first; std::printf("process output shape %3d/%d [%2d %2d %2d]\n\r", outShapeCount, totalOutputShapes, targetShape.x(), targetShape.y(), targetShape.z()); + for (uint32_t sid = 0; sid < prevShapes.size(); ++sid) { auto &shape = prevShapes[sid]; int diffx = targetShape.x() - shape.x(); @@ -289,7 +284,7 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c ws->setRange(s); // Wait for jobs to complete. - for (auto& thr : workers) { + for (auto &thr : workers) { thr.sync(); } std::printf(" shape %d %d %d\n\r", shape.x(), shape.y(), shape.z()); @@ -297,25 +292,23 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c // Because the workset is held by shared_ptr // main thread can do above preparation work in parallel // while the jobs are running. - for (auto& thr : workers) { + for (auto &thr : workers) { thr.launch(ws); } } // Wait for jobs to complete. - for (auto& thr : workers) { + for (auto &thr : workers) { thr.sync(); } std::printf(" num: %lu\n\r", hashes.at(targetShape).size()); totalSum += hashes.at(targetShape).size(); if (write_cache && split_cache) { cw.save(base_path + "cubes_" + std::to_string(n) + "_" + std::to_string(targetShape.x()) + "-" + std::to_string(targetShape.y()) + "-" + - std::to_string(targetShape.z()) + ".bin", - hashes, n); + std::to_string(targetShape.z()) + ".bin", + hashes, n); } if (split_cache) { - for (auto &subset : hashes.at(targetShape)) { - subset.clear(); - } + hashes.at(targetShape).clear(); } } diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index d54b057..cfb078a 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -1,4 +1,5 @@ #include "newCache.hpp" +#include "cubeSwapSet.hpp" #include @@ -201,11 +202,11 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { auto xyz = std::make_shared>(file_, (*shapeEntry)[0].offset, num_cubes * n); auto put = xyz->get(); - auto copyrange = [n](CubeSet::iterator itr, CubeSet::iterator end, XYZ *dest) -> void { + auto copyrange = [n](const CubeStorage& storage, CubeSwapSet::iterator itr, CubeSwapSet::iterator end, XYZ *dest) -> void { while (itr != end) { static_assert(sizeof(XYZ) == XYZ_SIZE); - assert(itr->size() == n); - itr->copyout(n, dest); + assert(storage.cubeSize() == n); + itr->copyout(storage, n, dest); dest += n; ++itr; } @@ -233,14 +234,14 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { std::flush(std::cout); std::lock_guard lock(m_mtx); - m_copy.emplace_back(std::bind(copyrange, start, itr, dest)); + m_copy.emplace_back(std::bind(copyrange, std::ref(subset.storage()), start, itr, dest)); ++m_num_copys; m_run.notify_all(); } // copy remainder, if any. if (dist) { std::lock_guard lock(m_mtx); - m_copy.emplace_back(std::bind(copyrange, itr, subset.end(), put)); + m_copy.emplace_back(std::bind(copyrange, std::ref(subset.storage()), itr, subset.end(), put)); ++m_num_copys; m_run.notify_all(); put += n * dist; From ea703294761622e4a5f7d4faae1243dcb9f9c9ea Mon Sep 17 00:00:00 2001 From: JATothrim Date: Sat, 19 Aug 2023 17:18:48 +0300 Subject: [PATCH 38/42] CubeSwapper: I/O optimizations - Thread-local read-cache for CubeStorage: The read-cache is private for each thread that calls CubeStorage::read() The cache is shared by all CubeStorage instances per thread. Entries are evicted from the cache with LRU policy. (least-recently-used) - Massive CacheWriter optimizations: The written CubeStorage file is extremely useful for CacheWriter. CacheWriter now uses mapped::file::copyAt() to merge the CubeStorage file into the saved cache-file as-is. This completely by-passes iterating the CubeSwapSet Cube-by-Cube and makes CacheWriter::save() return without waiting data copy process to actually complete. Once copy job is complete the source CubeStorage file is deleted. CubeStorage::discard() now simply drops reference to the old file. Signed-off-by: JATothrim --- cpp/include/cubeSwapSet.hpp | 162 ++++++++++++++++++---------- cpp/include/hashes.hpp | 27 +++-- cpp/src/cubeSwapSet.cpp | 207 ++++++++++++++++++++++++------------ cpp/src/newCache.cpp | 125 +++++++++++----------- 4 files changed, 318 insertions(+), 203 deletions(-) diff --git a/cpp/include/cubeSwapSet.hpp b/cpp/include/cubeSwapSet.hpp index b7a2e5f..4a5e6f3 100644 --- a/cpp/include/cubeSwapSet.hpp +++ b/cpp/include/cubeSwapSet.hpp @@ -2,39 +2,50 @@ #ifndef OPENCUBES_CUBE_DISKSWAP_SET_HPP #define OPENCUBES_CUBE_DISKSWAP_SET_HPP +#include #include #include -#include -#include +#include #include -#include #include "cube.hpp" #include "mapped_file.hpp" /** - * Implement std::unordered_set<> that stores element data in a file. + * CubeSwapSet: Implement std::unordered_set<> that offloads XYZ data into a file: * - * Cubes stored with size N in the set have constant cost of RAM memory: - * Only the std::unordered_set<> itself and the internals nodes are stored in RAM. + * Cubes stored in the set have reduced cost of memory: + * Only the std::unordered_set<> itself and the internal nodes are stored in RAM. * The element *data* (i.e. XYZ data) is stored in the file. - * The performance cost is that each time the element is accessed - * the data has to be read back from the file. - * (Iterating the entire CubeSwapSet involves reading the entire backing file) + * The performance cost is that each time the set element is accessed + * the data is read back from the file. + * (Iterating the entire CubeSwapSet involves reading the entire file) * - * Clearing the CubeSwapSet does not release the backing file space managed by CubeStorage. - * Call to CubeStorage::discard() is required after clearing or destructing - * the CubeSwapSet instance to cleanup the file. - * Elements cannot be removed one-by-one. + * Features: + * - XYZ data is recorded sequentially into the file and + * the Cube size is not saved in the storage file. + * - Cube XYZ data length is constant in CubeStorage instance. + * - Clearing the CubeSwapSet does not release the file managed by CubeStorage. + * (CubePtr(s) cannot be erased from CubeStorage) + * - CubeStorage::read(const CubePtr&) caches up to 1024 Cubes for each thread. + * This read-cache is maintained by any thread that calls CubePtr::get(). + * CubeStorage::discard() is used to begin writing the XYZ data at new file instance. + * - CacheWriter utilizes the file instance from CubeStorage: + * the CubeSwapSet is not iterated through at all by CacheWriter + * and instead CubeStorage::getFile() is assigned into a copy job and then + * copied into the cache-file with mapped::file::copyAt(). + * The source storage file is deleted once the copy is completed. + * This provides wait-free saving of the cache-file and uses + * minimal amount of system memory. */ class CubeStorage; /** - * Overlay that reads the cube data from the backing file. - * CubePtr needs its associated CubeStorage instance to be able to - * access its contents with CubePtr::get() - * The associated CubeStorage owning the CubePtr - * should always be available where CubePtr is used. + * CubePtr: "File Pointer to Cube" that reads the cube data from file. + * CubePtr needs CubeStorage instance to be able to access + * its contents with CubePtr::get(). + * The associated CubeStorage should always be available + * in context where CubePtr(s) data is accessed. */ class CubePtr { protected: @@ -46,13 +57,23 @@ class CubePtr { /** * Get the Cube pointed by this instance. + * @note The Cube is cached in the thread-local read-cache. + * @warn + * The Cube object is local to calling thread and shall + * not be passed into other threads. + */ + const Cube& get(const CubeStorage& storage) const; + + /** + * Raw data copy. By-passes the thread-local cache. */ - Cube get(const CubeStorage& storage) const; + void copyout(const CubeStorage& storage, size_t n, XYZ* out) const; template void copyout(const CubeStorage& storage, size_t n, Itr out) const { - auto tmp = get(storage); - std::copy_n(tmp.begin(), n, out); + std::vector buff(n); + copyout(storage, n, buff.data()); + std::copy_n(buff.begin(), n, out); } mapped::seekoff_t seek() const { return m_seek; } @@ -64,42 +85,36 @@ class CubePtr { class CubePtrEqual { protected: const CubeStorage* m_storage = nullptr; + public: - // C++20 feature: + // C++20 feature: using is_transparent = void; CubePtrEqual(const CubeStorage* ctx) : m_storage(ctx) {} CubePtrEqual(const CubePtrEqual& ctx) : m_storage(ctx.m_storage) {} - bool operator()(const CubePtr& a, const CubePtr& b) const { return a.get(*m_storage) == b.get(*m_storage); } - - bool operator()(const Cube& a, const CubePtr& b) const { return a == b.get(*m_storage); } - - bool operator()(const CubePtr& a, const Cube& b) const { return a.get(*m_storage) == b; } + bool operator()(const CubePtr& a, const CubePtr& b) const { + // todo: there is possibility that + // a.get() returned cube is *deleted* from the cache by b.get() + // The read-cache size must be at least 3 to avoid this. + return a.get(*m_storage) == b.get(*m_storage); + } }; class CubePtrHash { protected: const CubeStorage* m_storage = nullptr; + public: - // C++20 feature: + // C++20 feature: using is_transparent = void; using transparent_key_equal = CubePtrEqual; CubePtrHash(const CubeStorage* ctx) : m_storage(ctx) {} CubePtrHash(const CubePtrHash& ctx) : m_storage(ctx.m_storage) {} - size_t operator()(const Cube& x) const { - std::size_t seed = x.size(); - for (auto& p : x) { - auto x = HashXYZ()(p); - seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2); - } - return seed; - } - size_t operator()(const CubePtr& x) const { - auto cube = x.get(*m_storage); + auto& cube = x.get(*m_storage); std::size_t seed = cube.size(); for (auto& p : cube) { auto x = HashXYZ()(p); @@ -111,24 +126,24 @@ class CubePtrHash { class CubeStorage { protected: - std::mutex m_mtx; + mutable std::mutex m_mtx; std::filesystem::path m_fpath; std::shared_ptr m_file; - std::unique_ptr m_map; static std::atomic m_init_num; + int m_storage_version = 0; const size_t m_cube_size; - mapped::seekoff_t m_prev_seek = 0; - mapped::seekoff_t m_alloc_seek = 0; + + mapped::seekoff_t m_alloc_seek; public: /** * Initialize Cube file storage - * @param fname directory where to store the backing file. - * @param n The storage is reserved in n sized chunks. - * This should be equal to Cube::size() that are passed into allocate() - * as no other allocation size is supported. - * @note the backing file creation is delayed until allocate() is called first time. + * @param path directory where to write the storage file. + * @param n The storage is written in n sized chunks of XYZ structs. + * This should be equal to Cube::size() that are passed into local() + * Different sized Cubes in same CubeStorage instance will not work. + * @note the file creation is delayed until commit() is called first time. */ CubeStorage(std::filesystem::path path, size_t n); ~CubeStorage(); @@ -136,32 +151,63 @@ class CubeStorage { // not copyable CubeStorage(const CubeStorage&) = delete; CubeStorage& operator=(const CubeStorage&) = delete; - // move constructible: but only if no allocations exists + // move constructible: but only if no allocations exists in mv CubeStorage(CubeStorage&& mv); CubeStorage& operator=(CubeStorage&& mv) = delete; size_t cubeSize() const { return m_cube_size; } /** - * Store Cube data into the backing file. - * Returns CubePtr that can be inserted into CubeSwapSet. - * @note cube.size() must be equal to this->cubeSize() + * Make thread local CubePtr instance. + * @note + * Other thread cannot access the returned CubePtr until commit() is called. + */ + CubePtr local(const Cube& cube) const; + + /** + * Publish the last local() returned CubePtr. + * commit() writes this the data into the file storage + * making it visible to all threads. + */ + void commit(); + + /** + * Discard the last local() returned CubePtr. + */ + void drop() const; + + /** + * Retrieve the cube data from the backing file + * and cache the result for the caller thread. + */ + const Cube& read(const CubePtr& x) const; + + /** + * Copy the cube data from the storage into destination buffer. + */ + void copydata(const CubePtr& x, size_t n, XYZ* destination) const; + + /** + * Explicitly clear the calling thread's read-cache. + * @note this will initialize callers read-cache instance + * if the thread has not used the read-cache yet. + * So only call this from thread that has used to read(). */ - CubePtr allocate(const Cube& cube); + void resetReadCache() const; /** - * Revert the effect of last allocate() + * Get the file name CubeStorage is using. */ - void cancel_allocation(); + std::filesystem::path fileName() const { return m_fpath; } /** - * Retrieve the cube data from the backing file. + * Get the mapped::file instance. + * @note this can be null if nothing has been written to the storage yet. */ - Cube read(const CubePtr& x) const; + std::shared_ptr getFile() const { return m_file; } /** * Drop all stored data. - * Shrinks the backing file to zero size and deletes it. */ void discard(); }; @@ -171,7 +217,7 @@ class CubeStorage { * * The CubeSwapSet must be constructed with already initialized * stateful instances of CubePtrEqual and CubePtrHash functors - * that resolve the CubePtr instance using the CubeStorage instance. + * that resolve the CubePtr(s) using the CubeStorage instance. */ using CubeSwapSet = std::unordered_set; diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index 79bedfb..fcbab4e 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -26,7 +26,7 @@ struct HashCube { } }; -using CubeSet = std::unordered_set>; +// using CubeSet = std::unordered_set>; class Subsubhashy { protected: @@ -40,20 +40,22 @@ class Subsubhashy { template void insert(CubeT &&c) { std::lock_guard lock(set_mutex); - auto [itr, isnew] = set.emplace(set_storage.allocate(std::forward(c))); - if (!isnew) { - set_storage.cancel_allocation(); + auto cptr = set_storage.local(std::forward(c)); + auto [itr, isnew] = set.emplace(cptr); + if (isnew) { + set_storage.commit(); + } else { + set_storage.drop(); } } -#if __cplusplus > 201703L -// todo: need C++17 equivalent for *generic* -// contains() or find() that accepts both Cube and CubePtr types bool contains(const Cube &c) const { std::shared_lock lock(set_mutex); - return set.contains(c); + auto cptr = set_storage.local(c); + auto itr = set.find(cptr); + set_storage.drop(); + return itr != set.end(); } -#endif auto size() const { std::shared_lock lock(set_mutex); @@ -64,12 +66,10 @@ class Subsubhashy { std::lock_guard lock(set_mutex); set.clear(); set.reserve(1); + set_storage.discard(); } // Get CubeStorage instance. - // [this->begin(), this->end()] iterated CubePtr's - // Can be resolved with CubePtr::get(this->storage()) - // that returns copy of the data as Cube. const CubeStorage &storage() const { return set_storage; } auto begin() const { return set.begin(); } @@ -94,9 +94,8 @@ class Subhashy { HashCube hash; auto idx = hash(c) % byhash.size(); auto &set = byhash[idx]; -#if __cplusplus > 201703L + if (set.contains(c)) return; -#endif set.insert(std::forward(c)); // printf("new size %ld\n\r", byshape[shape].size()); } diff --git a/cpp/src/cubeSwapSet.cpp b/cpp/src/cubeSwapSet.cpp index 1f391ff..a4c8e4c 100644 --- a/cpp/src/cubeSwapSet.cpp +++ b/cpp/src/cubeSwapSet.cpp @@ -1,6 +1,56 @@ #include "cubeSwapSet.hpp" #include +#include +#include + +/** + * thread-local read-cache for Cube(s) + */ +class ThreadCache { + public: + static ThreadCache& get(); + + struct entry { + // read-cache "key" + const CubeStorage* storage; + mapped::seekoff_t seek; + int version; + + friend bool operator==(const entry& a, const entry& b) { return std::tie(a.storage, a.seek, a.version) == std::tie(b.storage, b.seek, b.version); } + }; + + struct state { + // cached data. + Cube cube; + std::list::iterator lru; + }; + + struct entry_hash { + size_t operator()(const entry& x) const { + size_t seed = uintptr_t(x.storage); + seed ^= x.seek + 0x9e3779b9 + (seed << 6) + (seed >> 2); + seed ^= x.version + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + }; + }; + + // Least-recently-used, LRU eviction policy list. + std::list lru; + // trick: make map with reference_wrapper + // as key so we don't need to duplicate the data from the lru list. + // surprisingly C++17 cache.find(entry) works. + std::unordered_map, state, entry_hash, std::equal_to> cache; + + bool local_enabled = false; + mapped::seekoff_t local_seek = -1; + Cube local; +}; + +ThreadCache& ThreadCache::get() { + static thread_local ThreadCache instance; + return instance; +} std::atomic CubeStorage::m_init_num(0); @@ -12,12 +62,20 @@ CubeStorage::CubeStorage(std::filesystem::path path, size_t n) : m_cube_size(n) CubeStorage::~CubeStorage() { discard(); } CubeStorage::CubeStorage(CubeStorage&& mv) - : m_fpath(std::move(mv.m_fpath)), m_file(std::move(mv.m_file)), m_map(std::move(mv.m_map)), m_cube_size(mv.m_cube_size), m_alloc_seek(mv.m_alloc_seek) { + : m_fpath(std::move(mv.m_fpath)), m_file(std::move(mv.m_file)), m_cube_size(mv.m_cube_size), m_alloc_seek(mv.m_alloc_seek) { // no allocations can exist in the moved from object: assert(m_alloc_seek == 0); } -CubePtr CubeStorage::allocate(const Cube& cube) { +CubePtr CubeStorage::local(const Cube& cube) const { + auto& ctx = ThreadCache::get(); + ctx.local = cube; + ctx.local_seek = m_alloc_seek; + ctx.local_enabled = true; + return CubePtr(ctx.local_seek); +} + +void CubeStorage::commit() { std::lock_guard lock(m_mtx); if (!m_file) { @@ -25,97 +83,110 @@ CubePtr CubeStorage::allocate(const Cube& cube) { // file not open yet. m_file = std::make_shared(); if (m_file->openrw(m_fpath.c_str(), 0, file::CREATE | file::RESIZE | file::FSTUNE)) { - std::printf("CubeStorage::allocate() ERROR: Failed to create backing file: %s\n", m_fpath.c_str()); + std::printf("CubeStorage::allocate() ERROR: Failed to create file: %s\n", m_fpath.c_str()); std::abort(); } - // Map some data. - // todo: mapped::file could provide following: - // m_file->readAt(offset,size,datain) - // m_file->writeAt(offset,size,dataout) - // so that we don't need this mapping for I/O. - // However the mapped::region::readAt() will be faster if - // the area fits in the region window and is accessed multiple times. - m_map = std::make_unique(m_file, 0, PAGE_SIZE); } - if (m_cube_size != cube.size()) { - std::printf("CubeStorage::allocate() ERROR: Cube size different than initialized"); - std::abort(); - } - - m_map->writeAt(m_alloc_seek, m_cube_size * sizeof(XYZ), cube.data()); + auto& ctx = ThreadCache::get(); + assert(ctx.local_enabled); + assert(ctx.local_seek == m_alloc_seek); + ctx.local_enabled = false; - auto fpos = m_alloc_seek; - m_prev_seek = m_alloc_seek; + m_file->writeAt(m_alloc_seek, m_cube_size * sizeof(XYZ), ctx.local.data()); m_alloc_seek += m_cube_size * sizeof(XYZ); +} - return CubePtr(fpos); +void CubeStorage::drop() const { + auto& ctx = ThreadCache::get(); + assert(ctx.local_enabled); + ctx.local_enabled = false; + ctx.local_seek = -1; } -void CubeStorage::cancel_allocation() { - std::lock_guard lock(m_mtx); - // last allocation was mistake. - if (m_alloc_seek >= m_cube_size * sizeof(XYZ)) m_alloc_seek -= m_cube_size * sizeof(XYZ); +const Cube& CubeStorage::read(const CubePtr& x) const { + // Get thread's cache instance: + auto& ctx = ThreadCache::get(); + + // Check if x is actually the object returned by local(): + if (ctx.local_enabled && x.seek() == ctx.local_seek) { + assert(ctx.local.size() == m_cube_size); + return ctx.local; + } - // allocate() -> cancel_allocation() must be serialized: - assert(m_alloc_seek == m_prev_seek); + ThreadCache::entry key{this, x.seek(), m_storage_version}; + auto itr = ctx.cache.find(key); + if (itr != ctx.cache.end()) { + // cache-hit. + // LRU policy simply moves the element at back of the list: + if (std::next(itr->second.lru) != ctx.lru.end()) { + ctx.lru.splice(itr->second.lru, ctx.lru, ctx.lru.end()); + } + return itr->second.cube; + } else { + // cache-miss. + // Evict entry at front if read-cache is full: + if (ctx.cache.size() >= 1024) { + auto rm = ctx.cache.find(ctx.lru.front()); + ctx.cache.erase(rm); + ctx.lru.pop_front(); + } + + // Read Cube data + Cube tmp(m_cube_size); + m_file->readAt(x.seek(), m_cube_size * sizeof(XYZ), tmp.data()); + + // Move it into an new read-cache entry: + auto nitr = ctx.lru.insert(ctx.lru.end(), key); + auto [itr, ok] = ctx.cache.emplace(std::ref(*nitr), ThreadCache::state{std::move(tmp), nitr}); + assert(ok); + return itr->second.cube; + } +} + +void CubeStorage::resetReadCache() const { + auto& ctx = ThreadCache::get(); + ctx.cache.clear(); + ctx.lru.clear(); } -Cube CubeStorage::read(const CubePtr& x) const { - // todo: How to speed up: - // Option 1: - // Memory-map the file in 2 MiB aligned chunks: - // This would speed up reading the same data multiple times. - // Chunk is mapped by rounding down the x.seek() to multiple of 2MiB - // and creating 2MiB sized mapping at that file offset. - // Caching the last file offset used we could detect - // when we have do do jump() to the next "reading window". - // -Plus: let the kernel do the caching for us. - // -Plus: no memory overhead. - // -Minus: if implemented with just single memory-map per CubeStorage - // threads can fight about what chunk is currently mapped. - // Option 2: - // Implement fine-grained read-cache with: - // std::unordered_map - // And begin evicting them once the cache is full using - // cache eviction policy. (E.g. least-recently-used LRU) - // The cache should be made to be thread local - // so it won't interfere with other workers. - // -Plus: We decide how much data to keep in memory - // -Plus: No need to remap the memory. - // -Minus: complicated to implement. - Cube tmp(m_cube_size); - m_map->readAt(x.seek(), m_cube_size * sizeof(XYZ), tmp.data()); - return tmp; +void CubeStorage::copydata(const CubePtr& x, size_t n, XYZ* destination) const { + // copydata() doesn't use thread's read-cache + // so local() cannot be active: + assert(!ThreadCache::get().local_enabled); + m_file->readAt(x.seek(), n * sizeof(XYZ), destination); } void CubeStorage::discard() { std::lock_guard lock(m_mtx); if (m_file) { - // avoid flushing any more data to disk: - m_map->discard(0, m_map->regionSize()); - m_map.reset(); - m_file->truncate(0); + // The backing file is kept intact + // so that CacheWriter can process it. m_file.reset(); m_alloc_seek = 0; - - // Try remove the file created... - std::error_code ec; - auto stat = std::filesystem::status(m_fpath, ec); - if (!ec && std::filesystem::is_regular_file(stat)) { - if (!std::filesystem::remove(m_fpath, ec)) { - std::printf("WARN: failed to remove file: %s", m_fpath.c_str()); - } - } else { - std::printf("WARN: failed to get file status: %s", m_fpath.c_str()); - } + // Thread read-cache problem: + // discard() must cause eviction of all entries for each + // thread's read cache that point into this. + // This done by incrementing m_storage_version: + // the entries can't simply be found as they are + // made with m_storage_version - 1 value. + // The entries are eventually evicted by + // the read-cache this way. + ++m_storage_version; } } -Cube CubePtr::get(const CubeStorage& storage) const { +const Cube& CubePtr::get(const CubeStorage& storage) const { // CubePtr::get() is really just an convenience function... // However this cannot be implemented in the header file because // CubeStorage definition is not known. return storage.read(*this); } + +void CubePtr::copyout(const CubeStorage& storage, size_t n, XYZ* out) const { + // CubePtr::copyout() is really just an convenience function... + // However this cannot be implemented in the header file because + // CubeStorage definition is not known. + storage.copydata(*this, n, out); +} \ No newline at end of file diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index cfb078a..8ae2f5b 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -1,8 +1,9 @@ #include "newCache.hpp" -#include "cubeSwapSet.hpp" #include +#include "cubeSwapSet.hpp" + CacheReader::CacheReader() : path_(""), fileLoaded_(false), dummyHeader{0, 0, 0, 0}, header(&dummyHeader), shapes(nullptr) {} void CacheReader::printHeader() { @@ -167,18 +168,24 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { return; } + // Write header: auto header = std::make_shared>(file_, 0); (*header)->magic = cacheformat::MAGIC; (*header)->n = n; (*header)->numShapes = hashes.numShapes(); (*header)->numPolycubes = hashes.size(); + header->flush(); std::vector keys; keys.reserve((*header)->numShapes); for (auto &pair : hashes) keys.push_back(pair.first); std::sort(keys.begin(), keys.end()); + // Write shape table: auto shapeEntry = std::make_shared>(file_, header->getEndSeek(), (*header)->numShapes); + header.reset(); + + static_assert(XYZ_SIZE == sizeof(XYZ), "XYZ_SIZE differs from sizeof(XYZ)"); uint64_t offset = shapeEntry->getEndSeek(); size_t num_cubes = 0; @@ -195,89 +202,79 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { se.size = count * XYZ_SIZE * n; offset += se.size; } + shapeEntry->flush(); // put XYZs - // Serialize large CubeSet(s) in parallel. - - auto xyz = std::make_shared>(file_, (*shapeEntry)[0].offset, num_cubes * n); - auto put = xyz->get(); - - auto copyrange = [n](const CubeStorage& storage, CubeSwapSet::iterator itr, CubeSwapSet::iterator end, XYZ *dest) -> void { - while (itr != end) { - static_assert(sizeof(XYZ) == XYZ_SIZE); - assert(storage.cubeSize() == n); - itr->copyout(storage, n, dest); - dest += n; - ++itr; + // Schedule merging of the cache file. + // CubeSwapSet enables massive optimizations in how + // CacheWriter can merge the SubsubHashy's data into the final cache file: + // - copystorage lambda takes the source file and it's file name from the + // SubsubHashy::storage() returned CubeStorage. + // - mapped::file::copyAt() is used to efficiently copy the source file contents into this cache file + // - Finally the copystorage lambda *deletes* the source storage file + // The main program does not need to wait for this process to complete. + + // copystorage takes shared ownership of the file_ + auto copystorage = [n, file = file_](std::shared_ptr src, std::filesystem::path rmname, size_t num, mapped::seekoff_t dest) -> void { + file->copyAt(src, 0, num * n * sizeof(XYZ), dest); + src.reset(); + + // Try remove the source storage file. + std::error_code ec; + auto stat = std::filesystem::status(rmname, ec); + if (!ec && std::filesystem::is_regular_file(stat)) { + if (!std::filesystem::remove(rmname, ec)) { + std::printf("WARN: failed to remove file: %s", rmname.c_str()); + } + } else { + std::printf("WARN: failed to get file status: %s", rmname.c_str()); } }; + mapped::seekoff_t fileEnd = shapeEntry->getEndSeek(); auto time_start = std::chrono::steady_clock::now(); - for (auto &key : keys) { - for (auto &subset : hashes.at(key)) { - auto itr = subset.begin(); - - ptrdiff_t dist = subset.size(); - // distribute if range is large enough. - auto skip = std::max(4096L, std::max(1L, dist / (signed)m_flushers.size())); - while (dist > skip) { - auto start = itr; - auto dest = put; - - auto inc = std::min(dist, skip); - std::advance(itr, inc); - put += n * inc; - dist = std::distance(itr, subset.end()); - - auto done = 100.0f * (std::distance(xyz->get(), put) / float(num_cubes * n)); - std::printf("writing data %5.2f%% ... \r", done); - std::flush(std::cout); - - std::lock_guard lock(m_mtx); - m_copy.emplace_back(std::bind(copyrange, std::ref(subset.storage()), start, itr, dest)); - ++m_num_copys; - m_run.notify_all(); - } - // copy remainder, if any. - if (dist) { - std::lock_guard lock(m_mtx); - m_copy.emplace_back(std::bind(copyrange, std::ref(subset.storage()), itr, subset.end(), put)); + for (size_t i = 0; i < keys.size(); ++i) { + auto put = (*shapeEntry)[i].offset; + for (auto &subset : hashes.at(keys[i])) { + ptrdiff_t num = subset.size(); + if (num) { + // By pass iterating the Subsubhashy entirely + // and copy the data from CubeStorage file *directly* into this file. + // the Cube data does end up in different order than when copying one-by-one. + // But we don't care as the order is random already. + // the copy job also deletes the CubeStorage::fileName() file from the disk + // once the data copy completes. + std::unique_lock lock(m_mtx); + m_copy.emplace_back(std::bind(copystorage, subset.storage().getFile(), subset.storage().fileName(), num, put)); ++m_num_copys; m_run.notify_all(); - put += n * dist; - - auto done = 100.0f * (std::distance(xyz->get(), put) / float(num_cubes * n)); - std::printf("writing data %5.2f%% ... \r", done); + std::printf("scheduled copy jobs: %*d ... \r", 3, (int)m_num_copys); std::flush(std::cout); } + put += num * n * XYZ_SIZE; } + fileEnd = std::max(fileEnd, put); } + shapeEntry.reset(); - // sanity check: - assert(put == (*xyz).get() + num_cubes * n); - - // sync up. + // sync up a bit. + // don't allow the copy job queue to grow indefinitely + // if the disk can't keep up. std::unique_lock lock(m_mtx); - while (m_num_copys) { + while (m_num_copys > m_flushers.size()) { + std::printf("waiting for %*d copy jobs to complete ... \r", 3, (int)m_num_copys); + std::flush(std::cout); m_wait.wait(lock); } - // move the resources into flush job. + // move the file into flush job. m_flushes.emplace_back(std::bind( - [](auto &&file, auto &&header, auto &&shapeEntry, auto &&xyz) -> void { - // flush. - header->flush(); - shapeEntry->flush(); - xyz->flush(); - // Truncate file to proper size. - file->truncate(xyz->getEndSeek()); + [fileEnd](auto &&file) -> void { + file->truncate(fileEnd); file->close(); file.reset(); - xyz.reset(); - shapeEntry.reset(); - header.reset(); }, - std::move(file_), std::move(header), std::move(shapeEntry), std::move(xyz))); + std::move(file_))); ++m_num_flushes; m_run.notify_all(); @@ -290,6 +287,8 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { void CacheWriter::flush() { std::unique_lock lock(m_mtx); while (m_num_flushes) { + std::printf("%*d copy jobs total remaining on %*d files ... \r", 3, (int)m_num_copys, 2, (int)m_num_flushes); + std::flush(std::cout); m_wait.wait(lock); } } From 2f86284a5d8a65966b0abd451908a38c26514533 Mon Sep 17 00:00:00 2001 From: JATothrim Date: Fri, 25 Aug 2023 18:52:55 +0300 Subject: [PATCH 39/42] CubeStorage: Memory map 2 MiB area at end of the file. - Memory map 2 MiB region at end of the backing file. This consumes additional 2 MiB of RAM per CubeStorage instance but reduces the number of file::truncate() and systems calls issued by large factor. The mapped region also speeds up CubeStorage::read() if the CubePtr falls into the mapped area as mapped::region::readAt() can simply memcpy the data. - Reduce Subsubhashy::insert() write-lock scope. If the entry is dropped (because another thread inserted it first) unlock immediately before CubeStorage::drop() is called. Signed-off-by: JATothrim --- cpp/include/cubeSwapSet.hpp | 13 ++++++++++--- cpp/include/hashes.hpp | 3 ++- cpp/src/cubeSwapSet.cpp | 34 +++++++++++++++++++++++++++------- 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/cpp/include/cubeSwapSet.hpp b/cpp/include/cubeSwapSet.hpp index 4a5e6f3..b3395aa 100644 --- a/cpp/include/cubeSwapSet.hpp +++ b/cpp/include/cubeSwapSet.hpp @@ -134,8 +134,13 @@ class CubeStorage { int m_storage_version = 0; const size_t m_cube_size; + mapped::seekoff_t m_reserved_end; + // End of committed data. mapped::seekoff_t m_alloc_seek; + // m_file_head: 2 MiB memory mapped area at end of the file. + std::unique_ptr m_file_head; + public: /** * Initialize Cube file storage @@ -160,7 +165,9 @@ class CubeStorage { /** * Make thread local CubePtr instance. * @note - * Other thread cannot access the returned CubePtr until commit() is called. + * Other thread(s) cannot access the returned CubePtr until commit() is called. + * This requires that external lock is held for the data structure + * if CubePtr is made visible to other thread(s) until this thread calls commit() */ CubePtr local(const Cube& cube) const; @@ -189,9 +196,9 @@ class CubeStorage { /** * Explicitly clear the calling thread's read-cache. - * @note this will initialize callers read-cache instance + * @note this will *initialize* callers read-cache instance * if the thread has not used the read-cache yet. - * So only call this from thread that has used to read(). + * Only call this from thread that has used to read() previously. */ void resetReadCache() const; diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index fcbab4e..cc838ab 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -39,12 +39,13 @@ class Subsubhashy { template void insert(CubeT &&c) { - std::lock_guard lock(set_mutex); + std::unique_lock lock(set_mutex); auto cptr = set_storage.local(std::forward(c)); auto [itr, isnew] = set.emplace(cptr); if (isnew) { set_storage.commit(); } else { + lock.unlock(); set_storage.drop(); } } diff --git a/cpp/src/cubeSwapSet.cpp b/cpp/src/cubeSwapSet.cpp index a4c8e4c..7d5819b 100644 --- a/cpp/src/cubeSwapSet.cpp +++ b/cpp/src/cubeSwapSet.cpp @@ -76,7 +76,7 @@ CubePtr CubeStorage::local(const Cube& cube) const { } void CubeStorage::commit() { - std::lock_guard lock(m_mtx); + std::unique_lock lock(m_mtx); if (!m_file) { using namespace mapped; @@ -86,15 +86,32 @@ void CubeStorage::commit() { std::printf("CubeStorage::allocate() ERROR: Failed to create file: %s\n", m_fpath.c_str()); std::abort(); } + + // memory map 2 MiB chunk for writing. + // This also works as "pre-read-cache" for read(): + // Any CubePtr(s) in this window even if they + // are not yet in thread's read-cache have fast readAt(). + m_file_head = std::make_unique(m_file, 0, 2 * 1024 * 1024); + } + auto datasize = m_cube_size * sizeof(XYZ); + auto write_fpos = m_alloc_seek; + + if(m_reserved_end < m_alloc_seek + datasize) { + // advance the backing file m_file_head to next 2 MiB chunk. + m_reserved_end += 2 * 1024 * 1024; + m_file_head->flushJump(m_reserved_end); } + // advance write offset: + m_alloc_seek = write_fpos + datasize; + // allow parallel m_file_head->writeAt() calls: + lock.unlock(); auto& ctx = ThreadCache::get(); assert(ctx.local_enabled); assert(ctx.local_seek == m_alloc_seek); ctx.local_enabled = false; - m_file->writeAt(m_alloc_seek, m_cube_size * sizeof(XYZ), ctx.local.data()); - m_alloc_seek += m_cube_size * sizeof(XYZ); + m_file_head->writeAt(write_fpos, datasize, ctx.local.data()); } void CubeStorage::drop() const { @@ -134,7 +151,7 @@ const Cube& CubeStorage::read(const CubePtr& x) const { // Read Cube data Cube tmp(m_cube_size); - m_file->readAt(x.seek(), m_cube_size * sizeof(XYZ), tmp.data()); + m_file_head->readAt(x.seek(), m_cube_size * sizeof(XYZ), tmp.data()); // Move it into an new read-cache entry: auto nitr = ctx.lru.insert(ctx.lru.end(), key); @@ -154,7 +171,7 @@ void CubeStorage::copydata(const CubePtr& x, size_t n, XYZ* destination) const { // copydata() doesn't use thread's read-cache // so local() cannot be active: assert(!ThreadCache::get().local_enabled); - m_file->readAt(x.seek(), n * sizeof(XYZ), destination); + m_file_head->readAt(x.seek(), n * sizeof(XYZ), destination); } void CubeStorage::discard() { @@ -163,11 +180,14 @@ void CubeStorage::discard() { if (m_file) { // The backing file is kept intact // so that CacheWriter can process it. + m_file_head->flush(); + m_file_head.reset(); m_file.reset(); m_alloc_seek = 0; + m_reserved_end = 0; // Thread read-cache problem: - // discard() must cause eviction of all entries for each - // thread's read cache that point into this. + // discard() must cause eviction of all read-cache + // entries for each thread's read cache that point into this. // This done by incrementing m_storage_version: // the entries can't simply be found as they are // made with m_storage_version - 1 value. From 55e15f9f2358f335dbfa972288a8676fee8c1da1 Mon Sep 17 00:00:00 2001 From: JATothrim Date: Fri, 25 Aug 2023 23:11:55 +0300 Subject: [PATCH 40/42] Fix-up asserts and debug build. Signed-off-by: JATothrim --- cpp/include/cube.hpp | 2 +- cpp/src/cubeSwapSet.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cube.hpp b/cpp/include/cube.hpp index a13a2e0..5abf4c7 100644 --- a/cpp/include/cube.hpp +++ b/cpp/include/cube.hpp @@ -189,7 +189,7 @@ struct Cube { * Copy cube data into destination buffer. */ void copyout(int num, XYZ *dest) const { - assert(num <= size()); + assert(num <= (signed)size()); std::copy_n(begin(), num, dest); } }; diff --git a/cpp/src/cubeSwapSet.cpp b/cpp/src/cubeSwapSet.cpp index 7d5819b..979e7ab 100644 --- a/cpp/src/cubeSwapSet.cpp +++ b/cpp/src/cubeSwapSet.cpp @@ -108,7 +108,7 @@ void CubeStorage::commit() { auto& ctx = ThreadCache::get(); assert(ctx.local_enabled); - assert(ctx.local_seek == m_alloc_seek); + assert(ctx.local_seek == write_fpos); ctx.local_enabled = false; m_file_head->writeAt(write_fpos, datasize, ctx.local.data()); From 713b0630ebbe65a16d893991a19795bc14168d70 Mon Sep 17 00:00:00 2001 From: JATothrim Date: Fri, 25 Aug 2023 23:23:58 +0300 Subject: [PATCH 41/42] CacheReader: Abstract CubeIterator interface Surprisingly N=14 is not possible with 16GiB of memory because at certain point of progress OS begins to swap *something* and the process grinds to halt. This happens even if *there is free-memory available* so something is going haywire. I found out that the culprit maybe that large (+3 GiB) CacheReader memory mappings are being swapped out from the memory. The OS is trying keeping the previously accessed memory in system memory to our detriment. For -t K threads we only need to have K Cubes from the cache-file in memory at once. Only way out of this problem is to not memory map the entire cache file at once and instead read it Cube-by-Cube. I think @nsch0e would have wanted to implement reading this way from the beginning but he was missing the `mapped::file::readAt()` that works with absolute file offsets and can read the file in parallel. Currently FlatCache and CacheReader use the same CubeIterator and ShapeRange types. This is an problem for implementing better CubeIterator that reads the Cubes one-by-one from a file because any changes to these would break FlatCache that doesn't use cache files. Start by adding abstract interfaces for CubeIterator and ShapeRange. - ICubeIterator base class interface for Cube iterators - CubeIterator the current implementation for ICubeIterator. - CacheIterator type-erased proxy. This is needed to avoid disrupting the CubeIterator class users too much and make the type-erased iterator work in practice. - IShapeRange base class interface. - Make ICache::getCubesByShape() return reference to the IShapeRange. - Adapt CubeIterator users to use CacheIterator instead. Signed-off-by: JATothrim --- cpp/include/newCache.hpp | 165 +++++++++++++++++++++++++++++++-------- cpp/src/cubes.cpp | 18 ++--- cpp/src/newCache.cpp | 49 +++++++----- 3 files changed, 172 insertions(+), 60 deletions(-) diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index b9705ce..c24cde7 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -34,7 +34,47 @@ struct ShapeEntry { }; }; // namespace cacheformat -class CubeIterator { +/** + * newCache.hpp: provide two versions of the cache: + * + * - FlatCache implements "memory-only" cache and is constructed from Hashy. + * It is needed for boot-strapping the cache files and computing + * cubes without writing any data into disk. + * FlatCache::getCubesByShape() return ShapeRange that points into the Cube data in memory. + * ShapeRange then provides the Cube range as CubeIterator(s). + * + * - CacheReader implements the actual cache file system. + * CacheReader::getCubesByShape() return FileShapeRange that + * defines subset shape range from the cache file. + * FileShapeRange then provides the Cube range as CubeFileIterator(s). + */ +class ICubeIterator { + public: + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = Cube; + using pointer = Cube*; // or also value_type* + using reference = Cube&; // or also value_type& + + virtual ~ICubeIterator(){}; + + virtual std::unique_ptr clone() const = 0; + + virtual const value_type operator*() const = 0; + virtual uint64_t seek() const = 0; + virtual ICubeIterator& operator++() = 0; + virtual ICubeIterator& operator+=(int incr) = 0; + + friend bool operator==(const ICubeIterator& a, const ICubeIterator& b) { return a.seek() == b.seek(); }; + friend bool operator<(const ICubeIterator& a, const ICubeIterator& b) { return a.seek() < b.seek(); }; + friend bool operator>(const ICubeIterator& a, const ICubeIterator& b) { return a.seek() > b.seek(); }; + friend bool operator!=(const ICubeIterator& a, const ICubeIterator& b) { return a.seek() != b.seek(); }; +}; + +/** + * Iterator for Cubes stored in some memory area. + */ +class CubeIterator : public ICubeIterator { public: using iterator_category = std::forward_iterator_tag; using difference_type = std::ptrdiff_t; @@ -48,19 +88,22 @@ class CubeIterator { // invalid iterator (can't deference) explicit CubeIterator() : n(0), m_ptr(nullptr) {} + std::unique_ptr clone() const override { return std::make_unique(*this); } + // derefecence - const value_type operator*() const { return Cube(m_ptr, n); } + const value_type operator*() const override { return Cube(m_ptr, n); } + // pointer operator->() { return (pointer)m_ptr; } - const XYZ* data() const { return m_ptr; } + uint64_t seek() const override { return (uint64_t)m_ptr; } // Prefix increment - CubeIterator& operator++() { + ICubeIterator& operator++() override { m_ptr += n; return *this; } - CubeIterator& operator+=(int incr) { + ICubeIterator& operator+=(int incr) override { m_ptr += n * incr; return *this; } @@ -82,19 +125,88 @@ class CubeIterator { const XYZ* m_ptr; }; -class ShapeRange { +/** + * To avoid complicating the use of the ICubeIterator + * CacheIterator provides type-erased wrapper that can be copied. + */ +class CacheIterator { + public: + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = Cube; + using pointer = Cube*; // or also value_type* + using reference = Cube&; // or also value_type& + + CacheIterator() {} + + template + explicit CacheIterator(Itr&& init) : proxy(std::make_unique>(std::forward(init))) {} + + CacheIterator(const CacheIterator& copy) { + if (copy.proxy) { + proxy = copy.proxy->clone(); + } + } + CacheIterator& operator=(const CacheIterator& x) { + CacheIterator tmp(x); + std::swap(proxy, tmp.proxy); + return *this; + } + CacheIterator(CacheIterator&& copy) =default; + CacheIterator& operator=(CacheIterator&& x) =default; + + const value_type operator*() const { return **proxy; } + + uint64_t seek() const { return proxy->seek(); } + + CacheIterator& operator++() { + ++(*proxy); + return *this; + } + CacheIterator& operator+=(int incr) { + (*proxy) += incr; + return *this; + } + + CacheIterator operator++(int) { + CacheIterator tmp = *this; + ++(*this); + return tmp; + } + + friend bool operator==(const CacheIterator& a, const CacheIterator& b) { return a.seek() == b.seek(); }; + friend bool operator<(const CacheIterator& a, const CacheIterator& b) { return a.seek() < b.seek(); }; + friend bool operator>(const CacheIterator& a, const CacheIterator& b) { return a.seek() > b.seek(); }; + friend bool operator!=(const CacheIterator& a, const CacheIterator& b) { return a.seek() != b.seek(); }; + + private: + std::unique_ptr proxy; +}; + +class IShapeRange { + public: + IShapeRange(){}; + virtual ~IShapeRange() {} + + virtual CacheIterator begin() const = 0; + virtual CacheIterator end() const = 0; + virtual XYZ& shape() = 0; + virtual size_t size() const = 0; +}; + +class ShapeRange : public IShapeRange { public: ShapeRange(const XYZ* start, const XYZ* stop, uint64_t _cubeLen, XYZ _shape) - : b(_cubeLen, start), e(_cubeLen, stop), size_(std::distance(start, stop) / _cubeLen), shape_(_shape) {} + : b(CubeIterator(_cubeLen, start)), e(CubeIterator(_cubeLen, stop)), size_(std::distance(start, stop) / _cubeLen), shape_(_shape) {} - CubeIterator begin() { return b; } - CubeIterator end() { return e; } + CacheIterator begin() const override { return b; } + CacheIterator end() const override { return e; } - XYZ& shape() { return shape_; } - auto size() const { return size_; } + XYZ& shape() override { return shape_; } + size_t size() const override { return size_; } private: - CubeIterator b, e; + CacheIterator b, e; uint64_t size_; XYZ shape_; }; @@ -102,7 +214,7 @@ class ShapeRange { class ICache { public: virtual ~ICache(){}; - virtual ShapeRange getCubesByShape(uint32_t i) = 0; + virtual IShapeRange& getCubesByShape(uint32_t i) = 0; virtual uint32_t numShapes() = 0; virtual size_t size() = 0; }; @@ -124,21 +236,8 @@ class CacheReader : public ICache { uint32_t numShapes() override { return header->numShapes; }; operator bool() { return fileLoaded_; } - // Do begin() and end() make sense for CacheReader - // If the cache file provides data for more than single shape? - // The data might not even be mapped contiguously to save memory. - /*CubeIterator begin() { - const uint8_t* start = filePointer + shapes[0].offset; - return CubeIterator(header->n, (const XYZ*)start); - } - - CubeIterator end() { - const uint8_t* stop = filePointer + shapes[0].offset + header->numPolycubes * header->n * XYZ_SIZE; - return CubeIterator(header->n, (const XYZ*)stop); - }*/ - // get shapes at index [0, numShapes()[ - ShapeRange getCubesByShape(uint32_t i) override; + IShapeRange& getCubesByShape(uint32_t i) override; private: std::shared_ptr file_; @@ -146,6 +245,8 @@ class CacheReader : public ICache { std::unique_ptr> shapes_; std::unique_ptr> xyz_; + std::vector shapeRanges; + std::string path_; bool fileLoaded_; const cacheformat::Header dummyHeader; @@ -167,16 +268,18 @@ class FlatCache : public ICache { for (auto& [shape, set] : hashes) { auto begin = allXYZs.data() + allXYZs.size(); for (auto& subset : set) { - for (auto& cubeptr : subset) - cubeptr.copyout(subset.storage(), n, std::back_inserter(allXYZs)); + for (auto& cubeptr : subset) cubeptr.copyout(subset.storage(), n, std::back_inserter(allXYZs)); } auto end = allXYZs.data() + allXYZs.size(); // std::printf(" SR %p %p\n", (void*)begin, (void*)end); shapes.emplace_back(begin, end, n, shape); } + + // Add dummy shape range at back: + shapes.emplace_back(nullptr, nullptr, n, XYZ(0, 0, 0)); } - ShapeRange getCubesByShape(uint32_t i) override { - if (i >= shapes.size()) return ShapeRange{nullptr, nullptr, 0, XYZ(0, 0, 0)}; + IShapeRange& getCubesByShape(uint32_t i) override { + if (i >= shapes.size() - 1) return shapes.back(); return shapes[i]; }; uint32_t numShapes() override { return shapes.size(); }; diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index 89b4e12..ec34936 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -22,27 +22,27 @@ struct Workset { std::mutex mu; CacheReader cr; - CubeIterator _begin_total; - CubeIterator _begin; - CubeIterator _end; + CacheIterator _begin_total; + CacheIterator _begin; + CacheIterator _end; Hashy &hashes; XYZ targetShape, shape, expandDim; bool notSameShape; Workset(Hashy &hashes, XYZ targetShape, XYZ shape, XYZ expandDim, bool notSameShape) : hashes(hashes), targetShape(targetShape), shape(shape), expandDim(expandDim), notSameShape(notSameShape) {} - void setRange(ShapeRange &data) { + void setRange(IShapeRange &data) { _begin_total = data.begin(); _begin = data.begin(); _end = data.end(); } struct Subset { - CubeIterator _begin, _end; + CacheIterator _begin, _end; bool valid; float percent; - auto begin() { return _begin; } - auto end() { return _end; } + CacheIterator begin() { return _begin; } + CacheIterator end() { return _end; } }; Subset getPart() { @@ -50,7 +50,7 @@ struct Workset { auto a = _begin; _begin += 500; if (_begin > _end) _begin = _end; - return {a, _begin, a < _end, 100 * float(std::distance(_begin_total.data(), a.data())) / std::distance(_begin_total.data(), _end.data())}; + return {a, _begin, a < _end, 100 * float(a.seek() - _begin_total.seek() + 1) / (_end.seek() - _begin_total.seek() + 1)}; } void expand(const Cube &c) { @@ -275,7 +275,7 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c base = &ws->cr; // cr.printHeader(); } - auto s = base->getCubesByShape(sid); + auto& s = base->getCubesByShape(sid); if (shape != s.shape()) { std::printf("ERROR caches shape does not match expected shape!\n"); exit(-1); diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index 8ae2f5b..e2d7800 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -51,48 +51,57 @@ int CacheReader::loadFile(const std::string path) { shapes_ = std::make_unique>(file_, header_->getEndSeek(), (*header_)->numShapes); shapes = shapes_->get(); + // Initialize ShapeRanges size_t datasize = 0; for (unsigned int i = 0; i < header->numShapes; ++i) { datasize += shapes[i].size; } - // map rest of the file as XYZ data: if (file_->size() != shapes_->getEndSeek() + datasize) { std::printf("warn: file size does not match expected value\n"); } + xyz_ = std::make_unique>(file_, shapes_->getEndSeek(), datasize); + // Initialize shapeRanges array: + size_t offset = 0; + for (unsigned int i = 0; i < header->numShapes; ++i) { + if (shapes[i].size) { + auto index = offset / cacheformat::XYZ_SIZE; + auto num_xyz = shapes[i].size / cacheformat::XYZ_SIZE; + auto start = xyz_->get() + index; + auto end = xyz_->get() + index + num_xyz; + + shapeRanges.emplace_back(start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); + } else { + // table entry has no data. + // shapes[i].offset may have bogus value. + shapeRanges.emplace_back(nullptr, nullptr, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); + } + + offset += shapes[i].size; + } + + // Add dummy entry at back: + shapeRanges.emplace_back(nullptr, nullptr, header->n, XYZ(0, 0, 0)); + fileLoaded_ = true; return 0; } -ShapeRange CacheReader::getCubesByShape(uint32_t i) { +IShapeRange &CacheReader::getCubesByShape(uint32_t i) { if (i >= header->numShapes) { - return ShapeRange{nullptr, nullptr, 0, XYZ(0, 0, 0)}; - } - if (shapes[i].size <= 0) { - return ShapeRange{nullptr, nullptr, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)}; + return shapeRanges.back(); } - // get section start - // note: shapes[i].offset may have bogus offset - // if any earlier shape table entry was empty before i - // so we ignore the offset here. - size_t offset = 0; - for (unsigned int k = 0; k < i; ++k) { - offset += shapes[k].size; - } - auto index = offset / cacheformat::XYZ_SIZE; - auto num_xyz = shapes[i].size / cacheformat::XYZ_SIZE; - // pointers to Cube data: - auto start = xyz_->get() + index; - auto end = xyz_->get() + index + num_xyz; - return ShapeRange{start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)}; + + return shapeRanges[i]; } void CacheReader::unload() { // unload file from memory if (fileLoaded_) { + shapeRanges.clear(); xyz_.reset(); shapes_.reset(); header_.reset(); From 37d51e5ee567a8c9a6cf36c0ef441aa7f342205e Mon Sep 17 00:00:00 2001 From: JATothrim Date: Sat, 26 Aug 2023 03:36:26 +0300 Subject: [PATCH 42/42] CacheReader: Implement cache file reading one Cube at time - Remove CacheReader XYZ mapping. - Add CubeReadIterator that reads Cubes one at time. - FileShapeRange takes the cache file and offsets into the file - Update CacheReader::loadFile() to initialize array of FileShapeRange from the cache file. Result is celebration hooray for computing N=14 first time with less than 9 GiB of RSS: ```process output shape 99/101 [ 3 5 5] shape 2 5 5 shape 3 4 5 num: 588828 saved ./cache/cubes_14_3-5-5.bin, took 0.01 s process output shape 100/101 [ 4 4 4] shape 3 4 4 shape 4 4 4 num: 3341560 saved ./cache/cubes_14_4-4-4.bin, took 0.11 s process output shape 101/101 [ 4 4 5] shape 3 4 5 shape 4 4 4 num: 752858 saved ./cache/cubes_14_4-4-5.bin, took 0.02 s took 7231.83 s num total cubes: 1039496297``` My nvme disk was not particularly happy with with `output shape 80/101 [ 2 3 4]` that produced an +8 GiB file at end. The disk throttled badly after reaching 60*C... But it did complete eventually at reasonable pace and memory usage dropped below 7 GiB for rest of the run. N=15 will require more tuning to the CubeStorage read-cache and more parallel file system. btrfs looks to be not very good at this job as writing the storage files in parallel reduces the program to near single threaded speed. Signed-off-by: JATothrim --- cpp/include/newCache.hpp | 78 ++++++++++++++++++++++++++++++++++++++-- cpp/src/newCache.cpp | 26 +++++++------- 2 files changed, 88 insertions(+), 16 deletions(-) diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index c24cde7..20fd660 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -8,6 +8,7 @@ #include #include #include +#include #include "cube.hpp" #include "hashes.hpp" @@ -125,6 +126,61 @@ class CubeIterator : public ICubeIterator { const XYZ* m_ptr; }; +class CubeReadIterator : public ICubeIterator { + public: + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = Cube; + using pointer = Cube*; // or also value_type* + using reference = Cube&; // or also value_type& + + // constructor + CubeReadIterator(std::shared_ptr file, uint32_t _n, mapped::seekoff_t offset) : n(_n), m_seek(offset), m_file(file) {} + + // invalid iterator (can't deference) + explicit CubeReadIterator() : n(0), m_seek(-1) {} + + std::unique_ptr clone() const override { return std::make_unique(*this); } + + // derefecence + const value_type operator*() const override { return read(); } + + // pointer operator->() { return (pointer)m_seek; } + + uint64_t seek() const override { return (uint64_t)m_seek; } + + // Prefix increment + ICubeIterator& operator++() override { + m_seek += n * sizeof(XYZ); + return *this; + } + + ICubeIterator& operator+=(int incr) override { + m_seek += n * incr * sizeof(XYZ); + return *this; + } + + // Postfix increment + CubeReadIterator operator++(int) { + CubeReadIterator tmp = *this; + ++(*this); + return tmp; + } + + friend bool operator==(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek == b.m_seek; }; + friend bool operator<(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek < b.m_seek; }; + friend bool operator>(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek > b.m_seek; }; + friend bool operator!=(const CubeReadIterator& a, const CubeReadIterator& b) { return a.m_seek != b.m_seek; }; + + private: + uint32_t n; + mapped::seekoff_t m_seek; + std::shared_ptr m_file; + + // de-reference is implemented by read() + Cube read() const; +}; + /** * To avoid complicating the use of the ICubeIterator * CacheIterator provides type-erased wrapper that can be copied. @@ -211,6 +267,25 @@ class ShapeRange : public IShapeRange { XYZ shape_; }; +class FileShapeRange : public IShapeRange { + public: + FileShapeRange(std::shared_ptr file, mapped::seekoff_t start, mapped::seekoff_t stop, uint64_t _cubeLen, XYZ _shape) + : b(CubeReadIterator(file, _cubeLen, start)), + e(CubeReadIterator(file, _cubeLen, stop)), + size_((stop - start) / _cubeLen), shape_(_shape) {} + + CacheIterator begin() const override { return b; } + CacheIterator end() const override { return e; } + + XYZ& shape() override { return shape_; } + size_t size() const override { return size_; } + + private: + CacheIterator b, e; + uint64_t size_; + XYZ shape_; +}; + class ICache { public: virtual ~ICache(){}; @@ -243,9 +318,8 @@ class CacheReader : public ICache { std::shared_ptr file_; std::unique_ptr> header_; std::unique_ptr> shapes_; - std::unique_ptr> xyz_; - std::vector shapeRanges; + std::vector shapeRanges; std::string path_; bool fileLoaded_; diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index e2d7800..9e0c54e 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -61,35 +61,34 @@ int CacheReader::loadFile(const std::string path) { std::printf("warn: file size does not match expected value\n"); } - xyz_ = std::make_unique>(file_, shapes_->getEndSeek(), datasize); - // Initialize shapeRanges array: - size_t offset = 0; for (unsigned int i = 0; i < header->numShapes; ++i) { if (shapes[i].size) { - auto index = offset / cacheformat::XYZ_SIZE; - auto num_xyz = shapes[i].size / cacheformat::XYZ_SIZE; - auto start = xyz_->get() + index; - auto end = xyz_->get() + index + num_xyz; - - shapeRanges.emplace_back(start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); + auto start = shapes[i].offset; + auto end = start + shapes[i].size; + shapeRanges.emplace_back(file_, start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); } else { // table entry has no data. // shapes[i].offset may have bogus value. - shapeRanges.emplace_back(nullptr, nullptr, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); + shapeRanges.emplace_back(file_, -1, -1, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); } - - offset += shapes[i].size; } // Add dummy entry at back: - shapeRanges.emplace_back(nullptr, nullptr, header->n, XYZ(0, 0, 0)); + shapeRanges.emplace_back(file_, -1, -1, header->n, XYZ(0, 0, 0)); fileLoaded_ = true; return 0; } +Cube CubeReadIterator::read() const { + Cube tmp(n); + m_file->readAt(m_seek, n * sizeof(XYZ), tmp.data()); + return tmp; +} + + IShapeRange &CacheReader::getCubesByShape(uint32_t i) { if (i >= header->numShapes) { return shapeRanges.back(); @@ -102,7 +101,6 @@ void CacheReader::unload() { // unload file from memory if (fileLoaded_) { shapeRanges.clear(); - xyz_.reset(); shapes_.reset(); header_.reset(); file_.reset();