bahnreise/src/stations_list.rs
2025-03-02 02:53:37 +01:00

712 lines
25 KiB
Rust

use std::{
collections::HashMap,
error::Error,
fmt::Display,
path::PathBuf,
time::{Duration, SystemTime},
};
use rand::{thread_rng, Rng};
use rand_derive2::RandGen;
use serde::Deserialize;
use tokio::sync::Mutex;
pub struct StationsList {
dir: PathBuf,
stations: Mutex<HashMap<String, OptStation>>,
}
struct OptStation {
updated: Option<SystemTime>,
station: Option<Station>,
}
impl StationsList {
pub fn new_from(dir: PathBuf) -> Result<Self, Box<dyn Error>> {
let _ = std::fs::create_dir_all(&dir);
let mut stations: HashMap<String, OptStation> = Default::default();
for f in std::fs::read_dir(&dir)? {
let f = f?;
let fmeta = f.metadata()?;
if fmeta.is_file() {
let id = f.file_name();
let id = id
.to_str()
.ok_or_else(|| format!("non-utf8 filename {:?}!", f.file_name()))?;
let fp = f.path();
let save = std::fs::read_to_string(&fp)?;
let station = if save.trim().is_empty() {
None
} else {
Some(Station::from_save(id, save)?)
};
stations.insert(
id.trim().to_owned(),
OptStation {
updated: Some(fmeta.modified().unwrap_or_else(|_| SystemTime::now())),
station,
},
);
} else if fmeta.is_dir() {
}
}
let mut count_s: usize = 0;
let mut count_n: usize = 0;
for station in stations.values() {
if station.station.is_some() {
count_s += 1;
} else {
count_n += 1;
}
}
eprintln!("Loaded {count_s} stations and {count_n} non-stations from {dir:?}");
Ok(Self {
dir,
stations: Mutex::new(stations),
})
}
pub fn station_count(&self) -> usize {
self.stations
.blocking_lock()
.values()
.filter(|v| v.station.is_some())
.count()
}
pub fn add_new_from_departures(&self, deps: &Departures) {
let mut stations = self.stations.blocking_lock();
for dep in deps.entries.iter().map(|v| v.iter()).flatten() {
for id in [&dep.stop_place.slug, &dep.destination.slug]
.into_iter()
.chain(dep.via_stops.iter().map(|v| &v.slug))
.map(|v| v.trim())
.filter(|v| !v.is_empty())
{
if !stations.contains_key(id) {
stations.insert(
id.to_owned(),
OptStation {
updated: None,
station: None,
},
);
}
}
}
}
pub fn query_for_new_stations(&self) -> Result<(), Box<dyn Error>> {
// maybe find a new station or update an existing one
let html = reqwest::blocking::get("https://bahnhof.de/en/search")?
.error_for_status()?
.text()?;
for (match_index, match_str) in html.match_indices(r#"href="/en/"#) {
let id = &html[match_index + match_str.len()..];
if let Some(end) = id.find(r#"""#) {
let id = id[..end].trim();
if !id.contains(['/', '%', '&', '?', '#']) {
let mut stations = self.stations.blocking_lock();
Self::requery_station_if_necessary_or_add_new_int(id, &self.dir, &mut stations);
}
}
}
Ok(())
}
pub fn requery_random_station(&self, cache_count: usize) -> Result<(), Box<dyn Error>> {
let mut stations = self.stations.blocking_lock();
if !stations.is_empty() {
for id in stations
.iter()
.skip(thread_rng().gen_range(0..stations.len()))
.take(cache_count)
.map(|v| v.0.clone())
.collect::<Vec<_>>()
{
if Self::requery_station_if_necessary_or_add_new_int(&id, &self.dir, &mut stations)
{
break;
}
}
}
Ok(())
}
pub fn requery_station_if_necessary_or_add_new(&self, id: &str) -> bool {
Self::requery_station_if_necessary_or_add_new_int(
id,
&self.dir,
&mut self.stations.blocking_lock(),
)
}
fn requery_station_if_necessary_or_add_new_int(
id: &str,
dir: &PathBuf,
stations: &mut HashMap<String, OptStation>,
) -> bool {
if let Some(s) = stations.get_mut(id) {
// recheck stations after a day, and recheck non-stations after a week
if s.updated.is_none_or(|updated| {
updated.elapsed().is_ok_and(|elapsed| {
elapsed
> Duration::from_secs(
if s.station.is_some() { 1 } else { 7 } * 24 * 60 * 60,
)
})
}) {
match Station::query_station(id) {
Ok(station) => {
if let Some(prev) = s.station.take() {
if prev == station {
eprintln!("Confirmed station {id} (unchanged)");
} else {
eprintln!(
"Updated station {id} (changed): {prev:?} -> {station:?}"
);
if let Err(e) = std::fs::write(dir.join(id), station.to_save()) {
eprintln!("[ERR] Couldn't save file {:?}: {e}", dir.join(id));
}
}
} else {
eprintln!("Added new station {id}: non-station -> {station:?}");
if let Err(e) = std::fs::write(dir.join(id), station.to_save()) {
eprintln!("[ERR] Couldn't save file {:?}: {e}", dir.join(id));
}
}
s.station = Some(station);
}
Err(e) => {
if s.updated.is_none_or(|updated| {
updated.elapsed().is_ok_and(|elapsed| {
elapsed > Duration::from_secs(7 * 24 * 60 * 60)
})
}) {
eprintln!("Error querying station {id} and last updated over a week ago, marking as non-station! Error: {e}");
let _ = stations.remove(id);
if let Err(e) = std::fs::write(dir.join(id), "") {
eprintln!("[ERR] Couldn't save file {:?}: {e}", dir.join(id));
}
} else {
eprintln!(
"Error querying station {id}, keeping old data for now. Error: {e}"
);
}
}
}
true
} else {
false
}
} else {
stations.insert(
id.to_owned(),
OptStation {
updated: Some(SystemTime::now()),
station: match Station::query_station(id) {
Ok(station) => {
eprintln!("Added new station {id}: nothing -> {station:?}");
if let Err(e) = std::fs::write(dir.join(id), station.to_save()) {
eprintln!("[ERR] Couldn't save file {:?}: {e}", dir.join(id));
}
Some(station)
}
Err(e) => {
eprintln!("Marked {id} as not a station. Error: {e}");
if let Err(e) = std::fs::write(dir.join(id), "") {
eprintln!("[ERR] Couldn't save file {:?}: {e}", dir.join(id));
}
None
}
},
},
);
true
}
}
pub fn get_station<T>(&self, id: &str, map: impl FnOnce(&Station) -> T) -> Option<T> {
self.stations
.blocking_lock()
.get(id)
.and_then(|v| v.station.as_ref())
.map(map)
}
pub fn get_random_station<T>(
&self,
limit_tries_cuberoot: usize,
map: impl Fn(&str, &Station) -> Option<T>,
) -> Option<T> {
let stations = self.stations.blocking_lock();
for i in 1..=limit_tries_cuberoot {
for i in rand::seq::index::sample(
&mut thread_rng(),
stations.len(),
(i * i).min(stations.len()),
) {
if let Some((id, opt)) = stations.iter().nth(i) {
if let Some(station) = &opt.station {
if let Some(v) = map(id, station) {
return Some(v);
}
}
}
}
}
None
}
pub fn find_stations(&self, name: &str) -> Vec<(String, String)> {
let stations = self.stations.blocking_lock();
let name = name.to_lowercase();
if let Some(s) = stations.get(&name).and_then(|v| v.station.as_ref()) {
vec![(name.to_owned(), s.name.clone())]
} else {
let mut o = HashMap::<String, (String, usize)>::new();
for (id, station) in stations.iter() {
if let Some(station) = &station.station {
if station.name.to_lowercase() == name {
o.insert(id.clone(), (station.name.clone(), 0));
}
}
}
if o.len() < 3 {
for (id, station) in stations.iter() {
if let Some(station) = &station.station {
if station.name.to_lowercase().starts_with(&name) {
o.insert(
id.clone(),
(
station.name.clone(),
station.name.len().saturating_sub(name.len()).min(100),
),
);
}
}
}
if o.len() < 3 {
for (id, station) in stations.iter() {
if let Some(station) = &station.station {
if let Some(pos) = station.name.to_lowercase().find(&name) {
o.insert(id.clone(), (station.name.clone(), (100 + pos).min(200)));
}
}
}
}
}
let mut o = o.into_iter().collect::<Vec<_>>();
o.sort_unstable_by_key(|v| v.1 .1);
o.into_iter().map(|v| (v.0, v.1 .0)).collect()
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct Station {
name: String,
eva_numbers: Vec<u128>,
}
impl Station {
pub fn name(&self) -> &str {
&self.name
}
fn from_save(id: &str, save: String) -> Result<Self, Box<dyn Error>> {
let mut name = None;
let mut eva_numbers = None;
for line in save.lines() {
match line
.split_once('=')
.map_or_else(|| ("", line.trim()), |(a, b)| (a.trim(), b.trim()))
{
("name", v) => name = Some(v.to_owned()),
("evanums", v) => eva_numbers = Some(v.to_owned()),
("", line) => match line {
line => Err(format!("invalid flag-line in bahnhof {id}: {line}"))?,
},
(key, value) => Err(format!(
"invalid key-value-line in bahnhof {id}: {key}={value}"
))?,
}
}
Ok(Station {
name: name.ok_or_else(|| format!("in station {id}: missing name"))?,
eva_numbers: eva_numbers
.ok_or_else(|| format!("in station {id}: missing evanums"))?
.split(',')
.map(|v| {
v.trim()
.parse()
.map_err(|e| format!("eva number {v} could not be parsed: {e}"))
})
.collect::<Result<_, _>>()?,
})
}
fn to_save(&self) -> String {
let mut o = String::new();
o.push_str("name=");
o.push_str(&self.name);
o.push('\n');
if !self.eva_numbers.is_empty() {
o.push_str("evanums=");
for (i, num) in self.eva_numbers.iter().enumerate() {
if i != 0 {
o.push(',');
}
o.push_str(&format!("{num}"));
}
o.push('\n');
}
o
}
fn query_station(id: &str) -> Result<Self, Box<dyn Error>> {
let html =
reqwest::blocking::get(format!("https://www.bahnhof.de/{id}/departure"))?.text()?;
let start_pat = r#"<title>Abfahrt "#;
if let (Some(start), Some(end)) = (html.find(start_pat), html.find("</title>")) {
let start = start + start_pat.len();
let mut name = if end > start {
html[start..end].trim()
} else {
""
};
// skip next char (some UTF8 dash)
if !name.is_empty() {
name = name[name.chars().next().unwrap().len_utf8()..].trim();
}
let pat = r#"<meta name="bf:evaNumbers" content=""#;
if let Some(index) = html.find(pat) {
let rest = &html[index + pat.len()..];
if let Some(end) = rest.find('"') {
let eva_numbers = rest[..end]
.trim()
.split(',')
.map(|v| {
v.trim()
.parse()
.map_err(|e| format!("eva number {v} could not be parsed: {e}"))
})
.collect::<Result<Vec<_>, _>>()?;
if eva_numbers.is_empty() {
Err("no evaNumbers (found empty list)")?;
}
Ok(Self {
name: name.to_owned(),
eva_numbers,
})
} else {
Err("missing evaNumbers")?
}
} else {
Err("missing evaNumbers")?
}
} else {
let start_pat = "<title>";
if let (Some(start), Some(end)) = (html.find(start_pat), html.find("</title>")) {
if start + start_pat.len() < end {
Err(format!(
"missing title `Abfahrt - <name>`: title was `{}`",
&html[start + start_pat.len()..end]
))?
} else {
Err(format!(
"missing title `Abfahrt - <name>`: </title> before <title>"
))?
}
} else {
Err(format!(
"missing title `Abfahrt - <name>`: no <title> found in `{html}`"
))?
}
}
}
pub fn query_departures(
&self,
id: &str,
minutes: u8,
filter_transports: FilterTransports,
) -> Result<Departures, Box<dyn Error>> {
let _ = id;
if self.eva_numbers.is_empty() {
Err("station has no eva numbers")?;
}
let json =
reqwest::blocking::get(self.query_departures_url(false, minutes, filter_transports))?
.text()?;
Ok(serde_json::from_str(&json).map_err(|e| format!("{e}\nin:\n{json}"))?)
}
pub fn query_arrivals(
&self,
id: &str,
minutes: u8,
filter_transports: FilterTransports,
) -> Result<Arrivals, Box<dyn Error>> {
let _ = id;
if self.eva_numbers.is_empty() {
Err("station has no eva numbers")?;
}
let json =
reqwest::blocking::get(self.query_departures_url(true, minutes, filter_transports))?
.text()?;
Ok(serde_json::from_str(&json).map_err(|e| format!("{e}\nin:\n{json}"))?)
}
fn query_departures_url(
&self,
arrivals: bool,
minutes: u8,
filter_transports: FilterTransports,
) -> String {
let mut url = format!(
"https://www.bahnhof.de/api/boards/{}?",
if arrivals { "arrivals" } else { "departures" }
);
for num in self.eva_numbers.iter() {
url.push_str(&format!("evaNumbers={num}&"));
}
url.push_str(&format!(
"duration={minutes}{filter_transports}&locale=de&sortBy=TIME_SCHEDULE"
));
url
}
pub fn query_activity_count(
&self,
id: &str,
arrivals: bool,
minutes: u8,
filter_transports: FilterTransports,
) -> Result<usize, Box<dyn Error>> {
let _ = id;
if self.eva_numbers.is_empty() {
Err("station has no eva numbers")?;
}
let mut url = format!(
"https://www.bahnhof.de/api/boards/{}?",
if arrivals { "arrivals" } else { "departures" }
);
for num in self.eva_numbers.iter() {
url.push_str(&format!("evaNumbers={num}&"));
}
url.push_str(&format!(
"duration={minutes}{filter_transports}&locale=de&sortBy=TIME_SCHEDULE"
));
let json = reqwest::blocking::get(url)?.text()?;
let arrivals: ArrivalsOrDepartures =
serde_json::from_str(&json).map_err(|e| format!("{e}\nin:\n{json}"))?;
Ok(arrivals
.entries
.iter()
.filter(|v| !v.iter().any(|v| v.canceled))
.count())
}
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ArrivalsOrDepartures {
pub entries: Vec<Vec<ArrivalOrDeparture>>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct ArrivalOrDeparture {
pub canceled: bool,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Departures {
/// Vec<Vec<_>> because there may be a departure of multiple trains (usually coupled together, but still different trains/routes/destinations)
pub entries: Vec<Vec<Departure>>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Departure {
pub canceled: bool,
pub line_name: String,
pub stop_place: SomeStation,
pub destination: SomeStation,
pub via_stops: Vec<SomeStation>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SomeStation {
// pub eva_number: String,
pub name: String,
#[serde(default)]
pub slug: String,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Arrivals {
pub entries: Vec<Vec<Arrival>>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Arrival {
pub canceled: bool,
pub line_name: String,
// pub stop_place: SomeStation,
pub origin: SomeStation,
// pub via_stops: Vec<SomeStation>,
}
#[derive(Clone, Copy, RandGen, PartialEq, Eq)]
pub enum FilterTransports {
All,
AllKnown,
AllTrains,
AllTrainsKnown,
Trains,
TrainsKnown,
AllRegionalTrains,
AllRegionalTrainsKnown,
RegionalTrains,
RegionalTrainsKnown,
HighSpeedTrains,
HighSpeedTrainsKnown,
}
pub const ALL_FILTER_TRANSPORTS: [FilterTransports; 6] = [
FilterTransports::All,
FilterTransports::AllTrains,
FilterTransports::Trains,
FilterTransports::AllRegionalTrains,
FilterTransports::RegionalTrains,
FilterTransports::HighSpeedTrains,
];
impl Display for FilterTransports {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if self.allow_high_speed() {
write!(
f,
"&filterTransports=HIGH_SPEED_TRAIN&filterTransports=INTERCITY_TRAIN&filterTransports=INTER_REGIONAL_TRAIN"
)?;
}
if self.allow_regional() {
write!(f, "&filterTransports=REGIONAL_TRAIN")?;
}
if self.allow_local() {
write!(f, "&filterTransports=CITY_TRAIN&filterTransports=TRAM")?;
}
if self.allow_bus() {
write!(f, "&filterTransports=BUS")?;
}
if self.allow_unknown() {
write!(f, "&filterTransports=UNKNOWN")?;
}
Ok(())
}
}
impl FilterTransports {
fn allow_unknown(&self) -> bool {
match self {
Self::All
| Self::AllTrains
| Self::Trains
| Self::AllRegionalTrains
| Self::RegionalTrains
| Self::HighSpeedTrains => true,
Self::AllKnown
| Self::AllTrainsKnown
| Self::TrainsKnown
| Self::AllRegionalTrainsKnown
| Self::RegionalTrainsKnown
| Self::HighSpeedTrainsKnown => false,
}
}
fn allow_bus(&self) -> bool {
match self {
Self::All | Self::AllKnown => true,
Self::AllTrains
| Self::AllTrainsKnown
| Self::Trains
| Self::TrainsKnown
| Self::AllRegionalTrains
| Self::AllRegionalTrainsKnown
| Self::RegionalTrains
| Self::RegionalTrainsKnown
| Self::HighSpeedTrains
| Self::HighSpeedTrainsKnown => false,
}
}
fn allow_local(&self) -> bool {
match self {
Self::All
| Self::AllKnown
| Self::AllTrains
| Self::AllTrainsKnown
| Self::AllRegionalTrains
| Self::AllRegionalTrainsKnown => true,
Self::Trains
| Self::TrainsKnown
| Self::RegionalTrains
| Self::RegionalTrainsKnown
| Self::HighSpeedTrains
| Self::HighSpeedTrainsKnown => false,
}
}
fn allow_regional(&self) -> bool {
match self {
Self::All
| Self::AllKnown
| Self::AllTrains
| Self::AllTrainsKnown
| Self::Trains
| Self::TrainsKnown
| Self::AllRegionalTrains
| Self::AllRegionalTrainsKnown
| Self::RegionalTrains
| Self::RegionalTrainsKnown => true,
Self::HighSpeedTrains | Self::HighSpeedTrainsKnown => false,
}
}
fn allow_high_speed(&self) -> bool {
match self {
Self::All
| Self::AllKnown
| Self::AllTrains
| Self::AllTrainsKnown
| Self::Trains
| Self::TrainsKnown
| Self::HighSpeedTrains
| Self::HighSpeedTrainsKnown => true,
Self::AllRegionalTrains
| Self::AllRegionalTrainsKnown
| Self::RegionalTrains
| Self::RegionalTrainsKnown => false,
}
}
pub fn only_known(&self) -> Self {
match self {
Self::All | Self::AllKnown => Self::AllKnown,
Self::AllTrains | Self::AllTrainsKnown => Self::AllTrainsKnown,
Self::Trains | Self::TrainsKnown => Self::TrainsKnown,
Self::AllRegionalTrains | Self::AllRegionalTrainsKnown => Self::AllRegionalTrainsKnown,
Self::RegionalTrains | Self::RegionalTrainsKnown => Self::RegionalTrainsKnown,
Self::HighSpeedTrains | Self::HighSpeedTrainsKnown => Self::HighSpeedTrainsKnown,
}
}
pub fn and_unknown(&self) -> Self {
match self {
Self::All | Self::AllKnown => Self::All,
Self::AllTrains | Self::AllTrainsKnown => Self::AllTrains,
Self::Trains | Self::TrainsKnown => Self::Trains,
Self::AllRegionalTrains | Self::AllRegionalTrainsKnown => Self::AllRegionalTrains,
Self::RegionalTrains | Self::RegionalTrainsKnown => Self::RegionalTrains,
Self::HighSpeedTrains | Self::HighSpeedTrainsKnown => Self::HighSpeedTrains,
}
}
pub fn explined(&self) -> &'static str {
match self {
Self::All | Self::AllKnown => "Öffis (Zug, S+U, Tram, Bus)",
Self::AllTrains | Self::AllTrainsKnown => "Schienenverkehr (Zug, S+U, Tram)",
Self::Trains | Self::TrainsKnown => "Züge (Zug, ohne Tram)",
Self::AllRegionalTrains | Self::AllRegionalTrainsKnown => {
"Deutschland-Ticket (Regio, S+U, Tram)"
}
Self::RegionalTrains | Self::RegionalTrainsKnown => "Regionalzüge (Regio, ohne Tram)",
Self::HighSpeedTrains | Self::HighSpeedTrainsKnown => "nur Hochgeschwindigkeitszüge",
}
}
}