From 730b882f49692339c67980403706da297b491a37 Mon Sep 17 00:00:00 2001 From: antoine-de Date: Wed, 23 Mar 2022 10:49:47 +0100 Subject: [PATCH 01/10] Poc to have typed object indexes Same as https://github.com/rust-transit/gtfs-structure/pull/123 but without generational_indexes. I think the indexes are really great for performance since: * all structures are then `Vector` (great random access and cache friendly iterations) * the ids are short (integer vs string) I don't think we need those performance in this crate though. Having only typed index (a think wrapper over the real gtfs identifier) would be more convenient I think. It would: * ease debug (easy to print the gtfs identifier, instead of having a meaningless integer) * ease serialisation (same, we can serialize the string right away) * be a more more close to the gtfs representation This is still *very* early WIP, I'm not convinced at all by the ergonomics, I'd like to keep the property of the Index in https://github.com/rust-transit/gtfs-structure/pull/123 that the `Id` have at least existed at one point (even if I don't plan to ensure this if an object is deleted). --- Cargo.toml | 2 +- src/gtfs.rs | 62 ++++++++++++++++++++++++++------------------------ src/id.rs | 31 +++++++++++++++++++++++++ src/lib.rs | 2 ++ src/objects.rs | 54 +++++++++++++++++++------------------------ 5 files changed, 90 insertions(+), 61 deletions(-) create mode 100644 src/id.rs diff --git a/Cargo.toml b/Cargo.toml index 781861c..16e6742 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ read-url = ["reqwest", "futures"] [dependencies] bytes = "1" csv = "1.1" -derivative = "2.1" +derivative = "2.2.0" serde = "1.0" serde_derive = "1.0" chrono = "0.4" diff --git a/src/gtfs.rs b/src/gtfs.rs index 6859ea1..aa63c66 100644 --- a/src/gtfs.rs +++ b/src/gtfs.rs @@ -1,9 +1,8 @@ -use crate::{objects::*, Error, RawGtfs}; +use crate::{objects::*, Error, Id, RawGtfs}; use chrono::prelude::NaiveDate; use chrono::Duration; use std::collections::{HashMap, HashSet}; use std::convert::TryFrom; -use std::sync::Arc; /// Data structure with all the GTFS objects /// @@ -27,8 +26,8 @@ pub struct Gtfs { pub calendar: HashMap, /// All calendar dates grouped by service_id pub calendar_dates: HashMap>, - /// All stop by `stop_id`. Stops are in an [Arc] because they are also referenced by each [StopTime] - pub stops: HashMap>, + /// All stop by `stop_id` + pub stops: HashMap, Stop>, /// All routes by `route_id` pub routes: HashMap, /// All trips by `trip_id` @@ -49,12 +48,12 @@ impl TryFrom for Gtfs { /// /// It might fail if some mandatory files couldn’t be read or if there are references to other objects that are invalid. fn try_from(raw: RawGtfs) -> Result { + let frequencies = raw.frequencies.unwrap_or_else(|| Ok(Vec::new()))?; let stops = to_stop_map( raw.stops?, raw.transfers.unwrap_or_else(|| Ok(Vec::new()))?, raw.pathways.unwrap_or(Ok(Vec::new()))?, )?; - let frequencies = raw.frequencies.unwrap_or_else(|| Ok(Vec::new()))?; let trips = create_trips(raw.trips?, raw.stop_times?, frequencies, &stops)?; Ok(Gtfs { @@ -175,10 +174,10 @@ impl Gtfs { } /// Gets a [Stop] by its `stop_id` - pub fn get_stop<'a>(&'a self, id: &str) -> Result<&'a Stop, Error> { + pub fn get_stop<'a>(&'a self, id: &Id) -> Result<&'a Stop, Error> { match self.stops.get(id) { Some(stop) => Ok(stop), - None => Err(Error::ReferenceError(id.to_owned())), + None => Err(Error::ReferenceError(id.to_string())), } } @@ -225,7 +224,7 @@ impl Gtfs { } } -fn to_map(elements: impl IntoIterator) -> HashMap { +fn to_map(elements: impl IntoIterator) -> HashMap { elements .into_iter() .map(|e| (e.id().to_owned(), e)) @@ -236,33 +235,35 @@ fn to_stop_map( stops: Vec, raw_transfers: Vec, raw_pathways: Vec, -) -> Result>, Error> { - let mut stop_map: HashMap = - stops.into_iter().map(|s| (s.id.clone(), s)).collect(); - +) -> Result, Stop>, Error> { + let mut stop_map = HashMap::, Stop>::default(); + for s in stops.into_iter() { + stop_map.insert(Id::must_exists(s.id.clone()), s); + } for transfer in raw_transfers { + // Note: I'm not convinced at all by this Id::must_exists... + // we shouldn't have to allocate here, and we must find a way to really ensure that the id exists (or remove the verbosity) stop_map - .get(&transfer.to_stop_id) + .get(&Id::must_exists(transfer.to_stop_id.clone())) .ok_or_else(|| Error::ReferenceError(transfer.to_stop_id.to_string()))?; - stop_map - .entry(transfer.from_stop_id.clone()) - .and_modify(|stop| stop.transfers.push(StopTransfer::from(transfer))); + let to_stop_id = Id::must_exists(transfer.to_stop_id.clone()); + let s = stop_map + .get_mut(&Id::must_exists(transfer.from_stop_id.clone())) + .ok_or_else(|| Error::ReferenceError(transfer.from_stop_id.to_string()))?; + s.transfers.push(StopTransfer::from((transfer, to_stop_id))); } for pathway in raw_pathways { stop_map - .get(&pathway.to_stop_id) + .get(&Id::must_exists(pathway.to_stop_id.clone())) .ok_or_else(|| Error::ReferenceError(pathway.to_stop_id.to_string()))?; - stop_map - .entry(pathway.from_stop_id.clone()) - .and_modify(|stop| stop.pathways.push(Pathway::from(pathway))); + let s = stop_map + .get_mut(&Id::must_exists(pathway.from_stop_id.clone())) + .ok_or_else(|| Error::ReferenceError(pathway.to_stop_id.to_string()))?; + let stop_id = Id::must_exists(pathway.to_stop_id.clone()); + s.pathways.push(Pathway::from((pathway, stop_id))); } - - let res = stop_map - .into_iter() - .map(|(i, s)| (i, Arc::new(s))) - .collect(); - Ok(res) + Ok(stop_map) } fn to_shape_map(shapes: Vec) -> HashMap> { @@ -292,7 +293,7 @@ fn create_trips( raw_trips: Vec, raw_stop_times: Vec, raw_frequencies: Vec, - stops: &HashMap>, + stops: &HashMap, Stop>, ) -> Result, Error> { let mut trips = to_map(raw_trips.into_iter().map(|rt| Trip { id: rt.id, @@ -312,10 +313,11 @@ fn create_trips( let trip = &mut trips .get_mut(&s.trip_id) .ok_or_else(|| Error::ReferenceError(s.trip_id.to_string()))?; + let stop_id = Id::must_exists(s.stop_id.clone()); let stop = stops - .get(&s.stop_id) - .ok_or_else(|| Error::ReferenceError(s.stop_id.to_string()))?; - trip.stop_times.push(StopTime::from(&s, Arc::clone(stop))); + .get(&stop_id) + .ok_or_else(|| Error::ReferenceError(stop_id.to_string()))?; + trip.stop_times.push(StopTime::from(&s, stop_id)); } for trip in &mut trips.values_mut() { diff --git a/src/id.rs b/src/id.rs new file mode 100644 index 0000000..ee0baf4 --- /dev/null +++ b/src/id.rs @@ -0,0 +1,31 @@ +use core::marker::PhantomData; +#[derive(Derivative)] +#[derive(Serialize, Deserialize)] +#[derivative(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] +pub struct Id { + id: String, + #[serde(skip)] + #[derivative(Debug(bound = ""))] + #[derivative(Debug = "ignore")] + #[derivative(Clone(bound = ""))] + #[derivative(Eq(bound = ""))] + #[derivative(PartialEq(bound = ""))] + #[derivative(Hash(bound = ""))] + _phantom: PhantomData, +} + +impl Id { + pub fn must_exists(s: String) -> Id { + Id { + id: s, + _phantom: PhantomData, + } + } +} + +impl std::ops::Deref for Id { + type Target = str; + fn deref(&self) -> &str { + &self.id + } +} diff --git a/src/lib.rs b/src/lib.rs index ccbd3ff..188e021 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,6 +47,7 @@ mod enums; pub mod error; mod gtfs; mod gtfs_reader; +mod id; pub(crate) mod objects; mod raw_gtfs; mod serde_helpers; @@ -57,5 +58,6 @@ mod tests; pub use error::Error; pub use gtfs::Gtfs; pub use gtfs_reader::GtfsReader; +pub use id::Id; pub use objects::*; pub use raw_gtfs::RawGtfs; diff --git a/src/objects.rs b/src/objects.rs index 8e4b079..97a93c0 100644 --- a/src/objects.rs +++ b/src/objects.rs @@ -1,5 +1,5 @@ pub use crate::enums::*; -use crate::serde_helpers::*; +use crate::{serde_helpers::*, Id}; use chrono::{Datelike, NaiveDate, Weekday}; use rgb::RGB8; @@ -9,17 +9,11 @@ use std::sync::Arc; /// Objects that have an identifier implement this trait /// /// Those identifier are technical and should not be shown to travellers -pub trait Id { +pub trait WithId { /// Identifier of the object fn id(&self) -> &str; } -impl Id for Arc { - fn id(&self) -> &str { - self.as_ref().id() - } -} - /// Trait to introspect what is the object’s type (stop, route…) pub trait Type { /// What is the type of the object @@ -100,7 +94,7 @@ impl Type for Calendar { } } -impl Id for Calendar { +impl WithId for Calendar { fn id(&self) -> &str { &self.id } @@ -199,7 +193,7 @@ impl Type for Stop { } } -impl Id for Stop { +impl WithId for Stop { fn id(&self) -> &str { &self.id } @@ -258,14 +252,14 @@ pub struct RawStopTime { } /// The moment where a vehicle, running on [Trip] stops at a [Stop]. See -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug)] pub struct StopTime { /// Arrival time of the stop time. /// It's an option since the intermediate stops can have have no arrival /// and this arrival needs to be interpolated pub arrival_time: Option, /// [Stop] where the vehicle stops - pub stop: Arc, + pub stop: Id, /// Departure time of the stop time. /// It's an option since the intermediate stops can have have no departure /// and this departure needs to be interpolated @@ -290,7 +284,7 @@ pub struct StopTime { impl StopTime { /// Creates [StopTime] by linking a [RawStopTime::stop_id] to the actual [Stop] - pub fn from(stop_time_gtfs: &RawStopTime, stop: Arc) -> Self { + pub fn from(stop_time_gtfs: &RawStopTime, stop: Id) -> Self { Self { arrival_time: stop_time_gtfs.arrival_time, departure_time: stop_time_gtfs.departure_time, @@ -362,7 +356,7 @@ impl Type for Route { } } -impl Id for Route { +impl WithId for Route { fn id(&self) -> &str { &self.id } @@ -412,7 +406,7 @@ impl Type for RawTrip { } } -impl Id for RawTrip { +impl WithId for RawTrip { fn id(&self) -> &str { &self.id } @@ -463,7 +457,7 @@ impl Type for Trip { } } -impl Id for Trip { +impl WithId for Trip { fn id(&self) -> &str { &self.id } @@ -514,7 +508,7 @@ impl Type for Agency { } } -impl Id for Agency { +impl WithId for Agency { fn id(&self) -> &str { match &self.id { None => "", @@ -555,7 +549,7 @@ impl Type for Shape { } } -impl Id for Shape { +impl WithId for Shape { fn id(&self) -> &str { &self.id } @@ -582,7 +576,7 @@ pub struct FareAttribute { pub transfer_duration: Option, } -impl Id for FareAttribute { +impl WithId for FareAttribute { fn id(&self) -> &str { &self.id } @@ -655,22 +649,22 @@ pub struct RawTransfer { pub min_transfer_time: Option, } -#[derive(Debug, Default, Clone)] +#[derive(Debug, Clone)] /// Transfer information between stops pub struct StopTransfer { /// Stop which to transfer to - pub to_stop_id: String, + pub to_stop_id: Id, /// Type of the transfer pub transfer_type: TransferType, /// Minimum time needed to make the transfer in seconds pub min_transfer_time: Option, } -impl From for StopTransfer { +impl From<(RawTransfer, Id)> for StopTransfer { /// Converts from a [RawTransfer] to a [StopTransfer] - fn from(transfer: RawTransfer) -> Self { + fn from((transfer, to_stop_id): (RawTransfer, Id)) -> Self { Self { - to_stop_id: transfer.to_stop_id, + to_stop_id: to_stop_id, transfer_type: transfer.transfer_type, min_transfer_time: transfer.min_transfer_time, } @@ -755,12 +749,12 @@ pub struct RawPathway { } /// Pathway going from a stop to another. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct Pathway { /// Uniquely identifies the pathway pub id: String, /// Location at which the pathway ends - pub to_stop_id: String, + pub to_stop_id: Id, /// Type of pathway between the specified (from_stop_id, to_stop_id) pair pub mode: PathwayMode, /// Indicates in which direction the pathway can be used @@ -781,18 +775,18 @@ pub struct Pathway { pub reversed_signposted_as: Option, } -impl Id for Pathway { +impl WithId for Pathway { fn id(&self) -> &str { &self.id } } -impl From for Pathway { +impl From<(RawPathway, Id)> for Pathway { /// Converts from a [RawPathway] to a [Pathway] - fn from(raw: RawPathway) -> Self { + fn from((raw, to_stop_id): (RawPathway, Id)) -> Self { Self { id: raw.id, - to_stop_id: raw.to_stop_id, + to_stop_id: to_stop_id, mode: raw.mode, is_bidirectional: raw.is_bidirectional, length: raw.length, From 3e175a82942c544364348a74b42dad367ae60b7a Mon Sep 17 00:00:00 2001 From: antoine-de Date: Wed, 23 Mar 2022 23:04:45 +0100 Subject: [PATCH 02/10] try to encapsulate all behind a collection This way and Id must be lookup before being created --- src/gtfs.rs | 56 ++++++++++++++++++++------------------------ src/id.rs | 63 +++++++++++++++++++++++++++++++++++++++++++++++--- src/objects.rs | 4 ++-- src/tests.rs | 15 ++++++++---- 4 files changed, 98 insertions(+), 40 deletions(-) diff --git a/src/gtfs.rs b/src/gtfs.rs index aa63c66..8b9612d 100644 --- a/src/gtfs.rs +++ b/src/gtfs.rs @@ -1,4 +1,4 @@ -use crate::{objects::*, Error, Id, RawGtfs}; +use crate::{id::Collection, objects::*, Error, RawGtfs}; use chrono::prelude::NaiveDate; use chrono::Duration; use std::collections::{HashMap, HashSet}; @@ -27,7 +27,7 @@ pub struct Gtfs { /// All calendar dates grouped by service_id pub calendar_dates: HashMap>, /// All stop by `stop_id` - pub stops: HashMap, Stop>, + pub stops: Collection, /// All routes by `route_id` pub routes: HashMap, /// All trips by `trip_id` @@ -49,7 +49,7 @@ impl TryFrom for Gtfs { /// It might fail if some mandatory files couldn’t be read or if there are references to other objects that are invalid. fn try_from(raw: RawGtfs) -> Result { let frequencies = raw.frequencies.unwrap_or_else(|| Ok(Vec::new()))?; - let stops = to_stop_map( + let stops = to_stop_collection( raw.stops?, raw.transfers.unwrap_or_else(|| Ok(Vec::new()))?, raw.pathways.unwrap_or(Ok(Vec::new()))?, @@ -174,11 +174,11 @@ impl Gtfs { } /// Gets a [Stop] by its `stop_id` - pub fn get_stop<'a>(&'a self, id: &Id) -> Result<&'a Stop, Error> { - match self.stops.get(id) { - Some(stop) => Ok(stop), - None => Err(Error::ReferenceError(id.to_string())), - } + pub fn get_stop<'a>(&'a self, raw_id: &str) -> Result<&'a Stop, Error> { + self.stops + .get_by_str(raw_id) + .ok_or_else(|| Error::ReferenceError(raw_id.to_string())) + .map(|(_stop_id, s)| s) } /// Gets a [Trip] by its `trip_id` @@ -231,37 +231,32 @@ fn to_map(elements: impl IntoIterator) -> HashMap, raw_transfers: Vec, raw_pathways: Vec, -) -> Result, Stop>, Error> { - let mut stop_map = HashMap::, Stop>::default(); - for s in stops.into_iter() { - stop_map.insert(Id::must_exists(s.id.clone()), s); - } +) -> Result, Error> { + let mut stop_map: Collection = stops.into_iter().collect(); for transfer in raw_transfers { // Note: I'm not convinced at all by this Id::must_exists... // we shouldn't have to allocate here, and we must find a way to really ensure that the id exists (or remove the verbosity) - stop_map - .get(&Id::must_exists(transfer.to_stop_id.clone())) + let to_stop_id = stop_map + .get_id(&transfer.to_stop_id) .ok_or_else(|| Error::ReferenceError(transfer.to_stop_id.to_string()))?; - let to_stop_id = Id::must_exists(transfer.to_stop_id.clone()); - let s = stop_map - .get_mut(&Id::must_exists(transfer.from_stop_id.clone())) + let (_, s) = stop_map + .get_mut_by_str(&transfer.from_stop_id) .ok_or_else(|| Error::ReferenceError(transfer.from_stop_id.to_string()))?; s.transfers.push(StopTransfer::from((transfer, to_stop_id))); } for pathway in raw_pathways { - stop_map - .get(&Id::must_exists(pathway.to_stop_id.clone())) - .ok_or_else(|| Error::ReferenceError(pathway.to_stop_id.to_string()))?; - let s = stop_map - .get_mut(&Id::must_exists(pathway.from_stop_id.clone())) + let to_stop_id = stop_map + .get_id(&pathway.to_stop_id) .ok_or_else(|| Error::ReferenceError(pathway.to_stop_id.to_string()))?; - let stop_id = Id::must_exists(pathway.to_stop_id.clone()); - s.pathways.push(Pathway::from((pathway, stop_id))); + let (_, s) = stop_map + .get_mut_by_str(&pathway.from_stop_id) + .ok_or_else(|| Error::ReferenceError(pathway.from_stop_id.to_string()))?; + s.pathways.push(Pathway::from((pathway, to_stop_id))); } Ok(stop_map) } @@ -293,7 +288,7 @@ fn create_trips( raw_trips: Vec, raw_stop_times: Vec, raw_frequencies: Vec, - stops: &HashMap, Stop>, + stops: &Collection, ) -> Result, Error> { let mut trips = to_map(raw_trips.into_iter().map(|rt| Trip { id: rt.id, @@ -313,10 +308,9 @@ fn create_trips( let trip = &mut trips .get_mut(&s.trip_id) .ok_or_else(|| Error::ReferenceError(s.trip_id.to_string()))?; - let stop_id = Id::must_exists(s.stop_id.clone()); - let stop = stops - .get(&stop_id) - .ok_or_else(|| Error::ReferenceError(stop_id.to_string()))?; + let stop_id = stops + .get_id(&s.stop_id) + .ok_or_else(|| Error::ReferenceError(s.stop_id.to_string()))?; trip.stop_times.push(StopTime::from(&s, stop_id)); } diff --git a/src/id.rs b/src/id.rs index ee0baf4..a4c5050 100644 --- a/src/id.rs +++ b/src/id.rs @@ -1,6 +1,9 @@ +use crate::WithId; use core::marker::PhantomData; -#[derive(Derivative)] -#[derive(Serialize, Deserialize)] +use std::{collections::HashMap, iter::FromIterator}; + +/// Typed Id over a [Collection] +#[derive(Derivative, Serialize, Deserialize)] #[derivative(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] pub struct Id { id: String, @@ -15,12 +18,17 @@ pub struct Id { } impl Id { - pub fn must_exists(s: String) -> Id { + fn must_exists(s: String) -> Id { Id { id: s, _phantom: PhantomData, } } + + /// get as str + pub fn as_str(&self) -> &str { + self + } } impl std::ops::Deref for Id { @@ -29,3 +37,52 @@ impl std::ops::Deref for Id { &self.id } } + +/// Collection with typed Ids +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Collection(HashMap, T>); + +impl Default for Collection { + fn default() -> Self { + Collection(HashMap::default()) + } +} + +impl Collection { + pub fn get_id(&self, raw_id: &str) -> Option> { + let id = Id::must_exists(raw_id.to_owned()); + if self.0.contains_key(&id) { + Some(id) + } else { + None + } + } + pub(crate) fn get_by_str(&self, raw_id: &str) -> Option<(Id, &T)> { + let id = Id::must_exists(raw_id.to_owned()); + self.0.get(&id).map(|v| (id, v)) + } + + pub(crate) fn get_mut_by_str(&mut self, raw_id: &str) -> Option<(Id, &mut T)> { + let id = Id::must_exists(raw_id.to_owned()); + self.0.get_mut(&id).map(|v| (id, v)) + } + + pub fn get(&self, id: &Id) -> Option<&T> { + self.0.get(id) + } + pub fn len(&self) -> usize { + self.0.len() + } +} + +impl FromIterator for Collection { + fn from_iter>(iter: I) -> Self { + let mut c = Self::default(); + + for i in iter { + let _ = c.0.insert(Id::must_exists(i.id().to_owned()), i); + } + + c + } +} diff --git a/src/objects.rs b/src/objects.rs index 97a93c0..c5d99ca 100644 --- a/src/objects.rs +++ b/src/objects.rs @@ -664,7 +664,7 @@ impl From<(RawTransfer, Id)> for StopTransfer { /// Converts from a [RawTransfer] to a [StopTransfer] fn from((transfer, to_stop_id): (RawTransfer, Id)) -> Self { Self { - to_stop_id: to_stop_id, + to_stop_id, transfer_type: transfer.transfer_type, min_transfer_time: transfer.min_transfer_time, } @@ -786,7 +786,7 @@ impl From<(RawPathway, Id)> for Pathway { fn from((raw, to_stop_id): (RawPathway, Id)) -> Self { Self { id: raw.id, - to_stop_id: to_stop_id, + to_stop_id, mode: raw.mode, is_bidirectional: raw.is_bidirectional, length: raw.length, diff --git a/src/tests.rs b/src/tests.rs index 3c038a1..7a649b1 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -149,11 +149,15 @@ fn read_transfers() { assert_eq!( "stop5", - gtfs.get_stop("stop3").unwrap().transfers[0].to_stop_id + gtfs.get_stop("stop3").unwrap().transfers[0] + .to_stop_id + .as_str() ); assert_eq!( "stop2", - gtfs.get_stop("stop1").unwrap().transfers[0].to_stop_id + gtfs.get_stop("stop1").unwrap().transfers[0] + .to_stop_id + .as_str() ); assert_eq!( TransferType::Recommended, @@ -180,7 +184,7 @@ fn read_pathways() { let pathways = >fs.get_stop("stop1").unwrap().pathways; assert_eq!(1, pathways.len()); - assert_eq!("stop3", pathways[0].to_stop_id); + assert_eq!("stop3", pathways[0].to_stop_id.as_str()); assert_eq!(PathwayMode::Walkway, pathways[0].mode); assert_eq!( PathwayDirectionType::Unidirectional, @@ -348,7 +352,10 @@ fn read_interpolated_stops() { assert_eq!(1, gtfs.feed_info.len()); // the second stop have no departure/arrival, it should not cause any problems assert_eq!( - gtfs.trips["trip1"].stop_times[1].stop.name, + gtfs.stops + .get(>fs.trips["trip1"].stop_times[1].stop) + .expect("no stop") + .name, "Stop Point child of 1" ); assert!(gtfs.trips["trip1"].stop_times[1].arrival_time.is_none()); From 974b9a747ec2144fe15ae2c2cb16d56a82418a2e Mon Sep 17 00:00:00 2001 From: antoine-de Date: Tue, 3 May 2022 20:47:40 +0200 Subject: [PATCH 03/10] =?UTF-8?q?Implement=20borrow=20and=20remove=20lots?= =?UTF-8?q?=20of=20`to=5Fowned()`=20=F0=9F=8E=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/id.rs | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/id.rs b/src/id.rs index a4c5050..e7b0dae 100644 --- a/src/id.rs +++ b/src/id.rs @@ -18,6 +18,8 @@ pub struct Id { } impl Id { + + /// private method to build an Id that exists in the [Collection] fn must_exists(s: String) -> Id { Id { id: s, @@ -38,6 +40,13 @@ impl std::ops::Deref for Id { } } +// Implements Borrow to be able to look in the hashmap with just a &str instead of a String +impl std::borrow::Borrow for Id { + fn borrow(&self) -> &str { + &self.id + } +} + /// Collection with typed Ids #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Collection(HashMap, T>); @@ -49,32 +58,35 @@ impl Default for Collection { } impl Collection { + + /// Get a typed [Id] from a raw &str + /// An [Id] can be returned only if it exists in the [Collection] pub fn get_id(&self, raw_id: &str) -> Option> { - let id = Id::must_exists(raw_id.to_owned()); - if self.0.contains_key(&id) { - Some(id) - } else { - None - } - } - pub(crate) fn get_by_str(&self, raw_id: &str) -> Option<(Id, &T)> { - let id = Id::must_exists(raw_id.to_owned()); - self.0.get(&id).map(|v| (id, v)) + self.get_by_str(raw_id).map(|(k, _v)| k.clone()) } + /// Get an &[Id] and a reference to the object associated with it if it exists in the [Collection] + pub(crate) fn get_by_str(&self, raw_id: &str) -> Option<(&Id, &T)> { + self.0.get_key_value(raw_id) + } + + /// Get an &[Id] and a mutable reference to the object associated with it if it exists in the [Collection] pub(crate) fn get_mut_by_str(&mut self, raw_id: &str) -> Option<(Id, &mut T)> { - let id = Id::must_exists(raw_id.to_owned()); - self.0.get_mut(&id).map(|v| (id, v)) + self.0.get_mut(raw_id).map(|v| (Id::must_exists(raw_id.to_owned()), v)) } + /// Get the object associated to the typed [Id] pub fn get(&self, id: &Id) -> Option<&T> { self.0.get(id) } + + /// Returns the number of objects in the [Collection] pub fn len(&self) -> usize { self.0.len() } } +// Implements FromIterator to be able to easily build a [Collection] if we know how to associate an object with its [Id] impl FromIterator for Collection { fn from_iter>(iter: I) -> Self { let mut c = Self::default(); From 5bf0b272ea70c1c085d2bc1b7fddb7ae64f9a4d2 Mon Sep 17 00:00:00 2001 From: antoine-de Date: Tue, 3 May 2022 20:54:21 +0200 Subject: [PATCH 04/10] implements AsRef for easier composition --- src/id.rs | 16 +++++++++++----- src/tests.rs | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/id.rs b/src/id.rs index e7b0dae..32f1470 100644 --- a/src/id.rs +++ b/src/id.rs @@ -18,7 +18,6 @@ pub struct Id { } impl Id { - /// private method to build an Id that exists in the [Collection] fn must_exists(s: String) -> Id { Id { @@ -27,12 +26,18 @@ impl Id { } } - /// get as str + /// Extracts a string slice containing the entire [Id] pub fn as_str(&self) -> &str { self } } +impl std::convert::AsRef for Id { + fn as_ref(&self) -> &str { + self + } +} + impl std::ops::Deref for Id { type Target = str; fn deref(&self) -> &str { @@ -58,7 +63,6 @@ impl Default for Collection { } impl Collection { - /// Get a typed [Id] from a raw &str /// An [Id] can be returned only if it exists in the [Collection] pub fn get_id(&self, raw_id: &str) -> Option> { @@ -69,10 +73,12 @@ impl Collection { pub(crate) fn get_by_str(&self, raw_id: &str) -> Option<(&Id, &T)> { self.0.get_key_value(raw_id) } - + /// Get an &[Id] and a mutable reference to the object associated with it if it exists in the [Collection] pub(crate) fn get_mut_by_str(&mut self, raw_id: &str) -> Option<(Id, &mut T)> { - self.0.get_mut(raw_id).map(|v| (Id::must_exists(raw_id.to_owned()), v)) + self.0 + .get_mut(raw_id) + .map(|v| (Id::must_exists(raw_id.to_owned()), v)) } /// Get the object associated to the typed [Id] diff --git a/src/tests.rs b/src/tests.rs index 7a649b1..f5c0d41 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -184,7 +184,7 @@ fn read_pathways() { let pathways = >fs.get_stop("stop1").unwrap().pathways; assert_eq!(1, pathways.len()); - assert_eq!("stop3", pathways[0].to_stop_id.as_str()); + assert_eq!("stop3", pathways[0].to_stop_id.as_ref()); assert_eq!(PathwayMode::Walkway, pathways[0].mode); assert_eq!( PathwayDirectionType::Unidirectional, From cec259d1a5b67d0d35287a1acd1e424ecda16a64 Mon Sep 17 00:00:00 2001 From: antoine-de Date: Sun, 15 May 2022 14:44:27 +0200 Subject: [PATCH 05/10] implements various iterators --- src/id.rs | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/id.rs b/src/id.rs index 32f1470..2caaea1 100644 --- a/src/id.rs +++ b/src/id.rs @@ -1,6 +1,9 @@ use crate::WithId; use core::marker::PhantomData; -use std::{collections::HashMap, iter::FromIterator}; +use std::{ + collections::{hash_map, HashMap}, + iter::FromIterator, +}; /// Typed Id over a [Collection] #[derive(Derivative, Serialize, Deserialize)] @@ -90,6 +93,26 @@ impl Collection { pub fn len(&self) -> usize { self.0.len() } + + // Return true if the collection has no objects. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Iterates over the ([Id], &T) of the [Collection]. + pub fn iter(&self) -> hash_map::Iter, T> { + self.0.iter() + } + + /// Iterates over the &T of the [Collection]. + pub fn values(&self) -> hash_map::Values<'_, Id, T> { + self.0.values() + } + + /// Iterates over the &mut T of the [Collection]. + pub fn values_mut(&mut self) -> hash_map::ValuesMut<'_, Id, T> { + self.0.values_mut() + } } // Implements FromIterator to be able to easily build a [Collection] if we know how to associate an object with its [Id] @@ -104,3 +127,21 @@ impl FromIterator for Collection { c } } + +impl<'a, T> IntoIterator for &'a Collection { + type Item = (&'a Id, &'a T); + type IntoIter = hash_map::Iter<'a, Id, T>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl IntoIterator for Collection { + type Item = (Id, T); + type IntoIter = hash_map::IntoIter, T>; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} From 7e146eb6255961dd7144136e8ea0f3e74a8b749e Mon Sep 17 00:00:00 2001 From: antoine-de Date: Sun, 15 May 2022 14:54:15 +0200 Subject: [PATCH 06/10] implements index on collection --- src/id.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/id.rs b/src/id.rs index 2caaea1..63b35fd 100644 --- a/src/id.rs +++ b/src/id.rs @@ -145,3 +145,22 @@ impl IntoIterator for Collection { self.0.into_iter() } } + +impl std::ops::Index<&Q> for Collection +where + Id: std::borrow::Borrow, + Q: Eq + std::hash::Hash, +{ + type Output = T; + + /// Returns a reference to the value corresponding to the supplied key. + /// + /// # Panics + /// + /// Panics if the key is not present in the [Collection]. + /// This can happens only if the key was removed from the collection at one point. + /// If no key are removed from the [Collection], you can safely use this method. + fn index(&self, k: &Q) -> &Self::Output { + &self.0[k] + } +} From ae424e1aac2f8a5f520d39ad13ec7ee5357d23c6 Mon Sep 17 00:00:00 2001 From: antoine-de Date: Sun, 15 May 2022 15:05:21 +0200 Subject: [PATCH 07/10] rename methods --- src/gtfs.rs | 11 +++++++++-- src/tests.rs | 37 ++++++++++++++++++++----------------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/src/gtfs.rs b/src/gtfs.rs index 8b9612d..ff73f2c 100644 --- a/src/gtfs.rs +++ b/src/gtfs.rs @@ -1,4 +1,4 @@ -use crate::{id::Collection, objects::*, Error, RawGtfs}; +use crate::{id::Collection, id::Id, objects::*, Error, RawGtfs}; use chrono::prelude::NaiveDate; use chrono::Duration; use std::collections::{HashMap, HashSet}; @@ -174,7 +174,14 @@ impl Gtfs { } /// Gets a [Stop] by its `stop_id` - pub fn get_stop<'a>(&'a self, raw_id: &str) -> Result<&'a Stop, Error> { + pub fn get_stop<'a>(&'a self, raw_id: &Id) -> Result<&'a Stop, Error> { + self.stops + .get(raw_id) + .ok_or_else(|| Error::ReferenceError(raw_id.to_string())) + } + + /// Gets a [Stop] by a &str + pub fn get_stop_by_raw_id<'a>(&'a self, raw_id: &str) -> Result<&'a Stop, Error> { self.stops .get_by_str(raw_id) .ok_or_else(|| Error::ReferenceError(raw_id.to_string())) diff --git a/src/tests.rs b/src/tests.rs index f5c0d41..5be29de 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -33,22 +33,25 @@ fn read_stop() { assert_eq!(6, gtfs.stops.len()); assert_eq!( LocationType::StopArea, - gtfs.get_stop("stop1").unwrap().location_type + gtfs.get_stop_by_raw_id("stop1").unwrap().location_type ); assert_eq!( LocationType::StopPoint, - gtfs.get_stop("stop2").unwrap().location_type + gtfs.get_stop_by_raw_id("stop2").unwrap().location_type + ); + assert_eq!( + Some(48.796_058), + gtfs.get_stop_by_raw_id("stop2").unwrap().latitude ); - assert_eq!(Some(48.796_058), gtfs.get_stop("stop2").unwrap().latitude); assert_eq!( Some("1".to_owned()), - gtfs.get_stop("stop3").unwrap().parent_station + gtfs.get_stop_by_raw_id("stop3").unwrap().parent_station ); assert_eq!( LocationType::GenericNode, - gtfs.get_stop("stop6").unwrap().location_type + gtfs.get_stop_by_raw_id("stop6").unwrap().location_type ); - assert_eq!(None, gtfs.get_stop("stop6").unwrap().latitude); + assert_eq!(None, gtfs.get_stop_by_raw_id("stop6").unwrap().latitude); } #[test] @@ -144,36 +147,36 @@ fn read_fare_attributes() { #[test] fn read_transfers() { let gtfs = Gtfs::from_path("fixtures/basic").expect("impossible to read gtfs"); - assert_eq!(1, gtfs.get_stop("stop3").unwrap().transfers.len()); - assert_eq!(1, gtfs.get_stop("stop1").unwrap().transfers.len()); + assert_eq!(1, gtfs.get_stop_by_raw_id("stop3").unwrap().transfers.len()); + assert_eq!(1, gtfs.get_stop_by_raw_id("stop1").unwrap().transfers.len()); assert_eq!( "stop5", - gtfs.get_stop("stop3").unwrap().transfers[0] + gtfs.get_stop_by_raw_id("stop3").unwrap().transfers[0] .to_stop_id .as_str() ); assert_eq!( "stop2", - gtfs.get_stop("stop1").unwrap().transfers[0] + gtfs.get_stop_by_raw_id("stop1").unwrap().transfers[0] .to_stop_id .as_str() ); assert_eq!( TransferType::Recommended, - gtfs.get_stop("stop3").unwrap().transfers[0].transfer_type + gtfs.get_stop_by_raw_id("stop3").unwrap().transfers[0].transfer_type ); assert_eq!( TransferType::Impossible, - gtfs.get_stop("stop1").unwrap().transfers[0].transfer_type + gtfs.get_stop_by_raw_id("stop1").unwrap().transfers[0].transfer_type ); assert_eq!( Some(60), - gtfs.get_stop("stop3").unwrap().transfers[0].min_transfer_time + gtfs.get_stop_by_raw_id("stop3").unwrap().transfers[0].min_transfer_time ); assert_eq!( None, - gtfs.get_stop("stop1").unwrap().transfers[0].min_transfer_time + gtfs.get_stop_by_raw_id("stop1").unwrap().transfers[0].min_transfer_time ); } @@ -181,7 +184,7 @@ fn read_transfers() { fn read_pathways() { let gtfs = Gtfs::from_path("fixtures/basic").expect("impossible to read gtfs"); - let pathways = >fs.get_stop("stop1").unwrap().pathways; + let pathways = >fs.get_stop_by_raw_id("stop1").unwrap().pathways; assert_eq!(1, pathways.len()); assert_eq!("stop3", pathways[0].to_stop_id.as_ref()); @@ -237,12 +240,12 @@ fn read_from_gtfs() { assert!(gtfs.get_calendar("service1").is_ok()); assert!(gtfs.get_calendar_date("service1").is_ok()); - assert!(gtfs.get_stop("stop1").is_ok()); + assert!(gtfs.get_stop_by_raw_id("stop1").is_ok()); assert!(gtfs.get_route("1").is_ok()); assert!(gtfs.get_trip("trip1").is_ok()); assert!(gtfs.get_fare_attributes("50").is_ok()); - assert!(gtfs.get_stop("Utopia").is_err()); + assert!(gtfs.get_stop_by_raw_id("Utopia").is_err()); } #[test] From d07f31028d6086930a405b85e60280695ad8013a Mon Sep 17 00:00:00 2001 From: antoine-de Date: Sun, 15 May 2022 16:34:31 +0200 Subject: [PATCH 08/10] add a mutable getter on stops --- src/id.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/id.rs b/src/id.rs index 63b35fd..dd588f4 100644 --- a/src/id.rs +++ b/src/id.rs @@ -89,6 +89,11 @@ impl Collection { self.0.get(id) } + /// Get a mutable reference on the object associated to the typed [Id] + pub fn get_mut(&mut self, id: &Id) -> Option<&mut T> { + self.0.get_mut(id) + } + /// Returns the number of objects in the [Collection] pub fn len(&self) -> usize { self.0.len() From 227cc5e74fbc9c84e2b8716e66d4d7d79053e067 Mon Sep 17 00:00:00 2001 From: antoine-de Date: Sun, 15 May 2022 16:34:59 +0200 Subject: [PATCH 09/10] add a complete example on stops handling --- examples/gtfs_reader.rs | 33 +++++++++++++++++++++++++++++++-- fixtures/basic/stops.txt | 2 +- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/examples/gtfs_reader.rs b/examples/gtfs_reader.rs index f7851ac..4e3219a 100644 --- a/examples/gtfs_reader.rs +++ b/examples/gtfs_reader.rs @@ -2,8 +2,8 @@ fn main() { /* Gtfs::new will try to guess if you provide a path, a local zip file or a remote zip file. You can also use Gtfs::from_path, Gtfs::from_url */ - let gtfs = gtfs_structures::GtfsReader::default() - .read_stop_times(false) + let mut gtfs = gtfs_structures::GtfsReader::default() + .read_stop_times(true) .read("fixtures/basic") .expect("impossible to read gtfs"); gtfs.print_stats(); @@ -12,4 +12,33 @@ fn main() { let route_1 = gtfs.routes.get("1").expect("no route 1"); println!("{}: {:?}", route_1.short_name, route_1); + + // you can access a stop by a &str + let _ = gtfs + .get_stop_by_raw_id("stop1") + .expect("unable to find stop Stop Area"); + + let trip = gtfs.trips.get("trip1").expect("no route 1"); + let stop_id: >fs_structures::Id = + &trip.stop_times.first().expect("no stoptimes").stop; + + // or with a typed id if you have one + + // if no stops have been removed from the gtfs, you can safely access the stops by it's id + let s = >fs.stops[stop_id]; + println!("stop name: {}", &s.name); + + // if some removal have been done, you can also you those method to get an Option + let s = gtfs.get_stop(stop_id).expect("this stop should exists"); + println!("stop description: {}", &s.description); + + // or you can access it via `stops.get` + let s = gtfs.stops.get(stop_id).expect("this stop should exists"); + println!("stop location type: {:?}", &s.location_type); + + let mut s = gtfs + .stops + .get_mut(stop_id) + .expect("this stop should exists"); + s.code = Some("code".into()); } diff --git a/fixtures/basic/stops.txt b/fixtures/basic/stops.txt index d3984fe..24c11e7 100644 --- a/fixtures/basic/stops.txt +++ b/fixtures/basic/stops.txt @@ -1,6 +1,6 @@ stop_id,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,wheelchair_boarding stop1,"Stop Area",, 48.796058 ,2.449386,,,1,, -stop2,"StopPoint",,48.796058,2.449386,,,,, +stop2,"StopPoint",some description,48.796058,2.449386,,,,, stop3,"Stop Point child of 1",,48.796058,2.449386,,,0,1, stop4,"StopPoint2",,48.796058,2.449386,,,,, stop5,"Stop Point child of 1 bis",,48.796058,2.449386,,,0,1, From 21da3284a34ce9d308a2372a4c0843603d0796dc Mon Sep 17 00:00:00 2001 From: antoine-de Date: Sun, 15 May 2022 16:36:14 +0200 Subject: [PATCH 10/10] remove useless comment --- src/gtfs.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/gtfs.rs b/src/gtfs.rs index ff73f2c..ce8eaef 100644 --- a/src/gtfs.rs +++ b/src/gtfs.rs @@ -245,8 +245,6 @@ fn to_stop_collection( ) -> Result, Error> { let mut stop_map: Collection = stops.into_iter().collect(); for transfer in raw_transfers { - // Note: I'm not convinced at all by this Id::must_exists... - // we shouldn't have to allocate here, and we must find a way to really ensure that the id exists (or remove the verbosity) let to_stop_id = stop_map .get_id(&transfer.to_stop_id) .ok_or_else(|| Error::ReferenceError(transfer.to_stop_id.to_string()))?;