use std::{ collections::HashMap, error::Error, fmt::Display, path::PathBuf, time::{Duration, SystemTime}, }; use rand::{thread_rng, Rng}; use rand_derive2::RandGen; use serde::Deserialize; use tokio::sync::Mutex; pub struct StationsList { dir: PathBuf, stations: Mutex>, } struct OptStation { updated: Option, station: Option, } impl StationsList { pub fn new_from(dir: PathBuf) -> Result> { let _ = std::fs::create_dir_all(&dir); let mut stations: HashMap = Default::default(); for f in std::fs::read_dir(&dir)? { let f = f?; let fmeta = f.metadata()?; if fmeta.is_file() { let id = f.file_name(); let id = id .to_str() .ok_or_else(|| format!("non-utf8 filename {:?}!", f.file_name()))?; let fp = f.path(); let save = std::fs::read_to_string(&fp)?; let station = if save.trim().is_empty() { None } else { Some(Station::from_save(id, save)?) }; stations.insert( id.trim().to_owned(), OptStation { updated: Some(fmeta.modified().unwrap_or_else(|_| SystemTime::now())), station, }, ); } else if fmeta.is_dir() { } } let mut count_s: usize = 0; let mut count_n: usize = 0; for station in stations.values() { if station.station.is_some() { count_s += 1; } else { count_n += 1; } } eprintln!("Loaded {count_s} stations and {count_n} non-stations from {dir:?}"); Ok(Self { dir, stations: Mutex::new(stations), }) } pub fn station_count(&self) -> usize { self.stations .blocking_lock() .values() .filter(|v| v.station.is_some()) .count() } pub fn add_new_from_departures(&self, deps: &Departures) { let mut stations = self.stations.blocking_lock(); for dep in deps.entries.iter().map(|v| v.iter()).flatten() { for id in [&dep.stop_place.slug, &dep.destination.slug] .into_iter() .chain(dep.via_stops.iter().map(|v| &v.slug)) .map(|v| v.trim()) .filter(|v| !v.is_empty()) { if !stations.contains_key(id) { stations.insert( id.to_owned(), OptStation { updated: None, station: None, }, ); } } } } pub fn query_for_new_stations(&self) -> Result<(), Box> { // maybe find a new station or update an existing one let html = reqwest::blocking::get("https://bahnhof.de/en/search")? .error_for_status()? .text()?; for (match_index, match_str) in html.match_indices(r#"href="/en/"#) { let id = &html[match_index + match_str.len()..]; if let Some(end) = id.find(r#"""#) { let id = id[..end].trim(); if !id.contains(['/', '%', '&', '?', '#']) { let mut stations = self.stations.blocking_lock(); Self::requery_station_if_necessary_or_add_new_int(id, &self.dir, &mut stations); } } } Ok(()) } pub fn requery_random_station(&self, cache_count: usize) -> Result<(), Box> { let mut stations = self.stations.blocking_lock(); if !stations.is_empty() { for id in stations .iter() .skip(thread_rng().gen_range(0..stations.len())) .take(cache_count) .map(|v| v.0.clone()) .collect::>() { if Self::requery_station_if_necessary_or_add_new_int(&id, &self.dir, &mut stations) { break; } } } Ok(()) } pub fn requery_station_if_necessary_or_add_new(&self, id: &str) -> bool { Self::requery_station_if_necessary_or_add_new_int( id, &self.dir, &mut self.stations.blocking_lock(), ) } fn requery_station_if_necessary_or_add_new_int( id: &str, dir: &PathBuf, stations: &mut HashMap, ) -> bool { if let Some(s) = stations.get_mut(id) { // recheck stations after a day, and recheck non-stations after a week if s.updated.is_none_or(|updated| { updated.elapsed().is_ok_and(|elapsed| { elapsed > Duration::from_secs( if s.station.is_some() { 1 } else { 7 } * 24 * 60 * 60, ) }) }) { match Station::query_station(id) { Ok(station) => { if let Some(prev) = s.station.take() { if prev == station { eprintln!("Confirmed station {id} (unchanged)"); } else { eprintln!( "Updated station {id} (changed): {prev:?} -> {station:?}" ); if let Err(e) = std::fs::write(dir.join(id), station.to_save()) { eprintln!("[ERR] Couldn't save file {:?}: {e}", dir.join(id)); } } } else { eprintln!("Added new station {id}: non-station -> {station:?}"); if let Err(e) = std::fs::write(dir.join(id), station.to_save()) { eprintln!("[ERR] Couldn't save file {:?}: {e}", dir.join(id)); } } s.station = Some(station); } Err(e) => { if s.updated.is_none_or(|updated| { updated.elapsed().is_ok_and(|elapsed| { elapsed > Duration::from_secs(7 * 24 * 60 * 60) }) }) { eprintln!("Error querying station {id} and last updated over a week ago, marking as non-station! Error: {e}"); let _ = stations.remove(id); if let Err(e) = std::fs::write(dir.join(id), "") { eprintln!("[ERR] Couldn't save file {:?}: {e}", dir.join(id)); } } else { eprintln!( "Error querying station {id}, keeping old data for now. Error: {e}" ); } } } true } else { false } } else { stations.insert( id.to_owned(), OptStation { updated: Some(SystemTime::now()), station: match Station::query_station(id) { Ok(station) => { eprintln!("Added new station {id}: nothing -> {station:?}"); if let Err(e) = std::fs::write(dir.join(id), station.to_save()) { eprintln!("[ERR] Couldn't save file {:?}: {e}", dir.join(id)); } Some(station) } Err(e) => { eprintln!("Marked {id} as not a station. Error: {e}"); if let Err(e) = std::fs::write(dir.join(id), "") { eprintln!("[ERR] Couldn't save file {:?}: {e}", dir.join(id)); } None } }, }, ); true } } pub fn get_station(&self, id: &str, map: impl FnOnce(&Station) -> T) -> Option { self.stations .blocking_lock() .get(id) .and_then(|v| v.station.as_ref()) .map(map) } pub fn get_random_station( &self, limit_tries_cuberoot: usize, map: impl Fn(&str, &Station) -> Option, ) -> Option { let stations = self.stations.blocking_lock(); for i in 1..=limit_tries_cuberoot { for i in rand::seq::index::sample( &mut thread_rng(), stations.len(), (i * i).min(stations.len()), ) { if let Some((id, opt)) = stations.iter().nth(i) { if let Some(station) = &opt.station { if let Some(v) = map(id, station) { return Some(v); } } } } } None } pub fn find_stations(&self, name: &str) -> Vec<(String, String)> { let stations = self.stations.blocking_lock(); let name = name.to_lowercase(); if let Some(s) = stations.get(&name).and_then(|v| v.station.as_ref()) { vec![(name.to_owned(), s.name.clone())] } else { let mut o = HashMap::::new(); for (id, station) in stations.iter() { if let Some(station) = &station.station { if station.name.to_lowercase() == name { o.insert(id.clone(), (station.name.clone(), 0)); } } } if o.len() < 3 { for (id, station) in stations.iter() { if let Some(station) = &station.station { if station.name.to_lowercase().starts_with(&name) { o.insert( id.clone(), ( station.name.clone(), station.name.len().saturating_sub(name.len()).min(100), ), ); } } } if o.len() < 3 { for (id, station) in stations.iter() { if let Some(station) = &station.station { if let Some(pos) = station.name.to_lowercase().find(&name) { o.insert(id.clone(), (station.name.clone(), (100 + pos).min(200))); } } } } } let mut o = o.into_iter().collect::>(); o.sort_unstable_by_key(|v| v.1 .1); o.into_iter().map(|v| (v.0, v.1 .0)).collect() } } } #[derive(Clone, Debug, PartialEq)] pub struct Station { name: String, eva_numbers: Vec, } impl Station { pub fn name(&self) -> &str { &self.name } fn from_save(id: &str, save: String) -> Result> { let mut name = None; let mut eva_numbers = None; for line in save.lines() { match line .split_once('=') .map_or_else(|| ("", line.trim()), |(a, b)| (a.trim(), b.trim())) { ("name", v) => name = Some(v.to_owned()), ("evanums", v) => eva_numbers = Some(v.to_owned()), ("", line) => match line { line => Err(format!("invalid flag-line in bahnhof {id}: {line}"))?, }, (key, value) => Err(format!( "invalid key-value-line in bahnhof {id}: {key}={value}" ))?, } } Ok(Station { name: name.ok_or_else(|| format!("in station {id}: missing name"))?, eva_numbers: eva_numbers .ok_or_else(|| format!("in station {id}: missing evanums"))? .split(',') .map(|v| { v.trim() .parse() .map_err(|e| format!("eva number {v} could not be parsed: {e}")) }) .collect::>()?, }) } fn to_save(&self) -> String { let mut o = String::new(); o.push_str("name="); o.push_str(&self.name); o.push('\n'); if !self.eva_numbers.is_empty() { o.push_str("evanums="); for (i, num) in self.eva_numbers.iter().enumerate() { if i != 0 { o.push(','); } o.push_str(&format!("{num}")); } o.push('\n'); } o } fn query_station(id: &str) -> Result> { let html = reqwest::blocking::get(format!("https://www.bahnhof.de/{id}/departure"))?.text()?; let start_pat = r#"Abfahrt "#; if let (Some(start), Some(end)) = (html.find(start_pat), html.find("")) { let start = start + start_pat.len(); let mut name = if end > start { html[start..end].trim() } else { "" }; // skip next char (some UTF8 dash) if !name.is_empty() { name = name[name.chars().next().unwrap().len_utf8()..].trim(); } let pat = r#", _>>()?; if eva_numbers.is_empty() { Err("no evaNumbers (found empty list)")?; } Ok(Self { name: name.to_owned(), eva_numbers, }) } else { Err("missing evaNumbers")? } } else { Err("missing evaNumbers")? } } else { let start_pat = ""; if let (Some(start), Some(end)) = (html.find(start_pat), html.find("")) { if start + start_pat.len() < end { Err(format!( "missing title `Abfahrt - `: title was `{}`", &html[start + start_pat.len()..end] ))? } else { Err(format!( "missing title `Abfahrt - `: before " ))? } } else { Err(format!( "missing title `Abfahrt - <name>`: no <title> found in `{html}`" ))? } } } pub fn query_departures( &self, id: &str, minutes: u8, filter_transports: FilterTransports, ) -> Result<Departures, Box<dyn Error>> { let _ = id; if self.eva_numbers.is_empty() { Err("station has no eva numbers")?; } let json = reqwest::blocking::get(self.query_departures_url(false, minutes, filter_transports))? .text()?; Ok(serde_json::from_str(&json).map_err(|e| format!("{e}\nin:\n{json}"))?) } pub fn query_arrivals( &self, id: &str, minutes: u8, filter_transports: FilterTransports, ) -> Result<Arrivals, Box<dyn Error>> { let _ = id; if self.eva_numbers.is_empty() { Err("station has no eva numbers")?; } let json = reqwest::blocking::get(self.query_departures_url(true, minutes, filter_transports))? .text()?; Ok(serde_json::from_str(&json).map_err(|e| format!("{e}\nin:\n{json}"))?) } fn query_departures_url( &self, arrivals: bool, minutes: u8, filter_transports: FilterTransports, ) -> String { let mut url = format!( "https://www.bahnhof.de/api/boards/{}?", if arrivals { "arrivals" } else { "departures" } ); for num in self.eva_numbers.iter() { url.push_str(&format!("evaNumbers={num}&")); } url.push_str(&format!( "duration={minutes}{filter_transports}&locale=de&sortBy=TIME_SCHEDULE" )); url } pub fn query_activity_count( &self, id: &str, arrivals: bool, minutes: u8, filter_transports: FilterTransports, ) -> Result<usize, Box<dyn Error>> { let _ = id; if self.eva_numbers.is_empty() { Err("station has no eva numbers")?; } let mut url = format!( "https://www.bahnhof.de/api/boards/{}?", if arrivals { "arrivals" } else { "departures" } ); for num in self.eva_numbers.iter() { url.push_str(&format!("evaNumbers={num}&")); } url.push_str(&format!( "duration={minutes}{filter_transports}&locale=de&sortBy=TIME_SCHEDULE" )); let json = reqwest::blocking::get(url)?.text()?; let arrivals: ArrivalsOrDepartures = serde_json::from_str(&json).map_err(|e| format!("{e}\nin:\n{json}"))?; Ok(arrivals .entries .iter() .filter(|v| !v.iter().any(|v| v.canceled)) .count()) } } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct ArrivalsOrDepartures { pub entries: Vec<Vec<ArrivalOrDeparture>>, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct ArrivalOrDeparture { pub canceled: bool, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Departures { /// Vec<Vec<_>> because there may be a departure of multiple trains (usually coupled together, but still different trains/routes/destinations) pub entries: Vec<Vec<Departure>>, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Departure { pub canceled: bool, pub line_name: String, pub stop_place: SomeStation, pub destination: SomeStation, pub via_stops: Vec<SomeStation>, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct SomeStation { // pub eva_number: String, pub name: String, #[serde(default)] pub slug: String, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Arrivals { pub entries: Vec<Vec<Arrival>>, } #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct Arrival { pub canceled: bool, pub line_name: String, // pub stop_place: SomeStation, pub origin: SomeStation, // pub via_stops: Vec<SomeStation>, } #[derive(Clone, Copy, RandGen, PartialEq, Eq)] pub enum FilterTransports { All, AllKnown, AllTrains, AllTrainsKnown, Trains, TrainsKnown, AllRegionalTrains, AllRegionalTrainsKnown, RegionalTrains, RegionalTrainsKnown, HighSpeedTrains, HighSpeedTrainsKnown, } pub const ALL_FILTER_TRANSPORTS: [FilterTransports; 6] = [ FilterTransports::All, FilterTransports::AllTrains, FilterTransports::Trains, FilterTransports::AllRegionalTrains, FilterTransports::RegionalTrains, FilterTransports::HighSpeedTrains, ]; impl Display for FilterTransports { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { if self.allow_high_speed() { write!( f, "&filterTransports=HIGH_SPEED_TRAIN&filterTransports=INTERCITY_TRAIN&filterTransports=INTER_REGIONAL_TRAIN" )?; } if self.allow_regional() { write!(f, "&filterTransports=REGIONAL_TRAIN")?; } if self.allow_local() { write!(f, "&filterTransports=CITY_TRAIN&filterTransports=TRAM")?; } if self.allow_bus() { write!(f, "&filterTransports=BUS")?; } if self.allow_unknown() { write!(f, "&filterTransports=UNKNOWN")?; } Ok(()) } } impl FilterTransports { fn allow_unknown(&self) -> bool { match self { Self::All | Self::AllTrains | Self::Trains | Self::AllRegionalTrains | Self::RegionalTrains | Self::HighSpeedTrains => true, Self::AllKnown | Self::AllTrainsKnown | Self::TrainsKnown | Self::AllRegionalTrainsKnown | Self::RegionalTrainsKnown | Self::HighSpeedTrainsKnown => false, } } fn allow_bus(&self) -> bool { match self { Self::All | Self::AllKnown => true, Self::AllTrains | Self::AllTrainsKnown | Self::Trains | Self::TrainsKnown | Self::AllRegionalTrains | Self::AllRegionalTrainsKnown | Self::RegionalTrains | Self::RegionalTrainsKnown | Self::HighSpeedTrains | Self::HighSpeedTrainsKnown => false, } } fn allow_local(&self) -> bool { match self { Self::All | Self::AllKnown | Self::AllTrains | Self::AllTrainsKnown | Self::AllRegionalTrains | Self::AllRegionalTrainsKnown => true, Self::Trains | Self::TrainsKnown | Self::RegionalTrains | Self::RegionalTrainsKnown | Self::HighSpeedTrains | Self::HighSpeedTrainsKnown => false, } } fn allow_regional(&self) -> bool { match self { Self::All | Self::AllKnown | Self::AllTrains | Self::AllTrainsKnown | Self::Trains | Self::TrainsKnown | Self::AllRegionalTrains | Self::AllRegionalTrainsKnown | Self::RegionalTrains | Self::RegionalTrainsKnown => true, Self::HighSpeedTrains | Self::HighSpeedTrainsKnown => false, } } fn allow_high_speed(&self) -> bool { match self { Self::All | Self::AllKnown | Self::AllTrains | Self::AllTrainsKnown | Self::Trains | Self::TrainsKnown | Self::HighSpeedTrains | Self::HighSpeedTrainsKnown => true, Self::AllRegionalTrains | Self::AllRegionalTrainsKnown | Self::RegionalTrains | Self::RegionalTrainsKnown => false, } } pub fn only_known(&self) -> Self { match self { Self::All | Self::AllKnown => Self::AllKnown, Self::AllTrains | Self::AllTrainsKnown => Self::AllTrainsKnown, Self::Trains | Self::TrainsKnown => Self::TrainsKnown, Self::AllRegionalTrains | Self::AllRegionalTrainsKnown => Self::AllRegionalTrainsKnown, Self::RegionalTrains | Self::RegionalTrainsKnown => Self::RegionalTrainsKnown, Self::HighSpeedTrains | Self::HighSpeedTrainsKnown => Self::HighSpeedTrainsKnown, } } pub fn and_unknown(&self) -> Self { match self { Self::All | Self::AllKnown => Self::All, Self::AllTrains | Self::AllTrainsKnown => Self::AllTrains, Self::Trains | Self::TrainsKnown => Self::Trains, Self::AllRegionalTrains | Self::AllRegionalTrainsKnown => Self::AllRegionalTrains, Self::RegionalTrains | Self::RegionalTrainsKnown => Self::RegionalTrains, Self::HighSpeedTrains | Self::HighSpeedTrainsKnown => Self::HighSpeedTrains, } } pub fn explined(&self) -> &'static str { match self { Self::All | Self::AllKnown => "Öffis (Zug, S+U, Tram, Bus)", Self::AllTrains | Self::AllTrainsKnown => "Schienenverkehr (Zug, S+U, Tram)", Self::Trains | Self::TrainsKnown => "Züge (Zug, ohne Tram)", Self::AllRegionalTrains | Self::AllRegionalTrainsKnown => { "Deutschland-Ticket (Regio, S+U, Tram)" } Self::RegionalTrains | Self::RegionalTrainsKnown => "Regionalzüge (Regio, ohne Tram)", Self::HighSpeedTrains | Self::HighSpeedTrainsKnown => "nur Hochgeschwindigkeitszüge", } } }