From 42ca5ae3f04329ce71ebd110f4ca9bb7b7e18107 Mon Sep 17 00:00:00 2001 From: Mark Date: Wed, 31 Jan 2024 19:20:22 +0100 Subject: [PATCH] refine string functions in stdlib --- mers_lib/src/program/configs/mod.rs | 1 + mers_lib/src/program/configs/with_string.rs | 208 +++++++------------- 2 files changed, 74 insertions(+), 135 deletions(-) diff --git a/mers_lib/src/program/configs/mod.rs b/mers_lib/src/program/configs/mod.rs index a7390f5..08c0453 100755 --- a/mers_lib/src/program/configs/mod.rs +++ b/mers_lib/src/program/configs/mod.rs @@ -7,6 +7,7 @@ use crate::{ program::run::CheckInfo, }; +pub mod util; pub mod with_base; pub mod with_command_running; pub mod with_get; diff --git a/mers_lib/src/program/configs/with_string.rs b/mers_lib/src/program/configs/with_string.rs index 0ba4049..53d3631 100755 --- a/mers_lib/src/program/configs/with_string.rs +++ b/mers_lib/src/program/configs/with_string.rs @@ -1,150 +1,88 @@ -use std::sync::{Arc, Mutex}; +use std::sync::{Arc, RwLock}; -use crate::{ - data::{self, Data, MersType, Type}, - program::run::{CheckInfo, Info}, -}; +use crate::data::{self, Data, MersType, Type}; -use super::Config; +use super::{util, Config}; impl Config { /// `trim: fn` removes leading and trailing whitespace from a string /// `substring: fn` extracts part of a string. usage: (str, start).substring or (str, start, end).substring. start and end may be negative, in which case they become str.len - n: (str, 0, -1) shortens the string by 1. /// `index_of: fn` finds the index of a pattern in a string /// `index_of_rev: fn` finds the last index of a pattern in a string + /// `starts_with: fn` checks if the string starts with the pattern + /// `ends_with: fn` checks if the string ends with the pattern + /// `str_split_once: fn` splits the string at the given pattern, removing that pattern from the string. + /// `str_split_once_rev: fn` like split_str_once, but splits at the last found instance of the pattern instead of the first. + /// `str_split: fn` splits the string at the given pattern, removing that pattern from the string. /// `to_string: fn` turns any argument into a (more or less useful) string representation /// `concat: fn` concatenates all arguments given to it. arg must be an enumerable pub fn with_string(self) -> Self { - self.add_var("trim".to_string(), Data::new(data::function::Function { - info: Arc::new(Info::neverused()), - info_check: Arc::new(Mutex::new(CheckInfo::neverused())), - out: Arc::new(|a, _i| if a.is_included_in(&data::string::StringT) { - Ok(Type::new(data::string::StringT)) - } else { - Err(format!("cannot call trim on non-strings").into()) - }), - run: Arc::new(|a, _i| { - Data::new(data::string::String(a.get().as_any().downcast_ref::().unwrap().0.trim().to_owned())) - }), - inner_statements: None, - })).add_var("concat".to_string(), Data::new(data::function::Function { - info: Arc::new(Info::neverused()), - info_check: Arc::new(Mutex::new(CheckInfo::neverused())), - out: Arc::new(|a, _i| if a.iterable().is_some() { - Ok(Type::new(data::string::StringT)) - } else { - Err(format!("concat called on non-iterable type {a}").into()) - }), - run: Arc::new(|a, _i| Data::new(data::string::String(a.get().iterable().unwrap().map(|v| v.get().to_string()).collect()))), - inner_statements: None, - })).add_var("to_string".to_string(), Data::new(data::function::Function { - info: Arc::new(Info::neverused()), - info_check: Arc::new(Mutex::new(CheckInfo::neverused())), - out: Arc::new(|_a, _i| Ok(Type::new(data::string::StringT))), - run: Arc::new(|a, _i| Data::new(data::string::String(a.get().to_string()))), - inner_statements: None, - })).add_var("index_of".to_string(), Data::new(data::function::Function { - info: Arc::new(Info::neverused()), - info_check: Arc::new(Mutex::new(CheckInfo::neverused())), - out: Arc::new(|a, _i| if a.is_included_in(&data::tuple::TupleT(vec![Type::new(data::string::StringT), Type::new(data::string::StringT)])) { - Ok(Type::newm(vec![ - Arc::new(data::tuple::TupleT(vec![])), - Arc::new(data::int::IntT), - ])) - } else { - Err(format!("wrong args for index_of: must be (string, string)").into()) - }), - run: Arc::new(|a, _i| index_of(a, false)), - inner_statements: None, - })).add_var("index_of_rev".to_string(), Data::new(data::function::Function { - info: Arc::new(Info::neverused()), - info_check: Arc::new(Mutex::new(CheckInfo::neverused())), - out: Arc::new(|a, _i| if a.is_included_in(&data::tuple::TupleT(vec![Type::new(data::string::StringT), Type::new(data::string::StringT)])) { - Ok(Type::newm(vec![ - Arc::new(data::tuple::TupleT(vec![])), - Arc::new(data::int::IntT), - ])) - } else { - Err(format!("wrong args for index_of: must be (string, string)").into()) - }), - run: Arc::new(|a, _i| index_of(a, true)), - inner_statements: None, - })).add_var("substring".to_string(), Data::new(data::function::Function { - info: Arc::new(Info::neverused()), - info_check: Arc::new(Mutex::new(CheckInfo::neverused())), - out: Arc::new(|a, _i| { - for t in a.types.iter() { - if let Some(t) = t.as_any().downcast_ref::() { - if t.0.len() != 2 && t.0.len() != 3 { - return Err(format!("cannot call substring with tuple argument of len != 3").into()); - } - if !t.0[0].is_included_in(&data::string::StringT) { - return Err(format!("cannot call substring with tuple argument that isn't (*string*, int, int)").into()); - } - if !t.0[1].is_included_in(&data::int::IntT) { - return Err(format!("cannot call substring with tuple argument that isn't (string, *int*, int)").into()); - } - if t.0.len() > 2 && !t.0[2].is_included_in(&data::int::IntT) { - return Err(format!("cannot call substring with tuple argument that isn't (string, int, *int*)").into()); - } - } else { - return Err(format!("cannot call substring with non-tuple argument.").into()); - } - } - Ok(if a.types.is_empty() { - Type::empty() + self + .add_var("trim".to_string(), Data::new(util::to_mers_func_concrete_string_to_string(|v| v.trim().to_owned()))) + .add_var("index_of".to_string(), Data::new(util::to_mers_func_concrete_string_string_to_opt_int(|v, p| v.find(p).map(|v| v as _)))) + .add_var("index_of_rev".to_string(), Data::new(util::to_mers_func_concrete_string_string_to_opt_int(|v, p| v.rfind(p).map(|v| v as _) ))) + .add_var("starts_with".to_string(), Data::new(util::to_mers_func_concrete_string_string_to_bool(|v, p| v.starts_with(p)))) + .add_var("ends_with".to_string(), Data::new(util::to_mers_func_concrete_string_string_to_bool(|v, p| v.ends_with(p)))) + .add_var("str_split_once".to_string(), Data::new(util::to_mers_func_concrete_string_string_to_opt_string_string(|v, p| v.split_once(p).map(|(a, b)| (a.to_owned(), b.to_owned()))))) + .add_var("str_split_once_rev".to_string(), Data::new(util::to_mers_func_concrete_string_string_to_opt_string_string(|v, p| v.rsplit_once(p).map(|(a, b)| (a.to_owned(), b.to_owned()))))) + .add_var("str_split".to_string(), Data::new(util::to_mers_func_concrete_string_string_to_any(Type::new(super::with_list::ListT(Type::new(data::string::StringT))), |v, p| Data::new(super::with_list::List(v.split(p).map(|v| Arc::new(RwLock::new(Data::new(data::string::String(v.to_owned()))))).collect()))))) + .add_var("concat".to_string(), Data::new(util::to_mers_func( + |a| if a.iterable().is_some() { + Ok(Type::new(data::string::StringT)) } else { - Type::new(data::string::StringT) + Err(format!("concat called on non-iterable type {a}").into()) + }, + |a| Data::new(data::string::String(a.get().iterable().unwrap().map(|v| v.get().to_string()).collect()))), + )) + .add_var("to_string".to_string(), Data::new(util::to_mers_func(|_a| Ok(Type::new(data::string::StringT)), + |a| Data::new(data::string::String(a.get().to_string())) + ))) + .add_var("substring".to_string(), Data::new(util::to_mers_func( + |a| { + for t in a.types.iter() { + if let Some(t) = t.as_any().downcast_ref::() { + if t.0.len() != 2 && t.0.len() != 3 { + return Err(format!("cannot call substring with tuple argument of len != 3").into()); + } + if !t.0[0].is_included_in(&data::string::StringT) { + return Err(format!("cannot call substring with tuple argument that isn't (*string*, int, int)").into()); + } + if !t.0[1].is_included_in(&data::int::IntT) { + return Err(format!("cannot call substring with tuple argument that isn't (string, *int*, int)").into()); + } + if t.0.len() > 2 && !t.0[2].is_included_in(&data::int::IntT) { + return Err(format!("cannot call substring with tuple argument that isn't (string, int, *int*)").into()); + } + } else { + return Err(format!("cannot call substring with non-tuple argument.").into()); + } + } + Ok(if a.types.is_empty() { + Type::empty() + } else { + Type::new(data::string::StringT) + }) + }, + |a| { + let tuple = a.get(); + let tuple = tuple.as_any().downcast_ref::().expect("called substring with non-tuple arg"); + let (s, start, end) = (&tuple.0[0], &tuple.0[1], tuple.0.get(2)); + let s = s.get(); + let s = &s.as_any().downcast_ref::().unwrap().0; + let start = start.get(); + let start = start.as_any().downcast_ref::().unwrap().0; + let start = if start < 0 { s.len().saturating_sub(start.abs() as usize) } else { start as usize }; + let end = end + .map(|end| end.get()) + .map(|end| end.as_any().downcast_ref::().unwrap().0) + .map(|i| if i < 0 { s.len().saturating_sub(i.abs() as usize) } else { i as usize }) + .unwrap_or(usize::MAX); + let end = end.min(s.len()); + if end < start { + return Data::new(data::string::String(String::new())); + } + Data::new(data::string::String(s[start..end].to_owned())) }) - }), - run: Arc::new(|a, _i| { - let tuple = a.get(); - let tuple = tuple.as_any().downcast_ref::().expect("called substring with non-tuple arg"); - let (s, start, end) = (&tuple.0[0], &tuple.0[1], tuple.0.get(2)); - let s = s.get(); - let s = &s.as_any().downcast_ref::().unwrap().0; - let start = start.get(); - let start = start.as_any().downcast_ref::().unwrap().0; - let start = if start < 0 { s.len().saturating_sub(start.abs() as usize) } else { start as usize }; - let end = end - .map(|end| end.get()) - .map(|end| end.as_any().downcast_ref::().unwrap().0) - .map(|i| if i < 0 { s.len().saturating_sub(i.abs() as usize) } else { i as usize }) - .unwrap_or(usize::MAX); - let end = end.min(s.len()); - if end < start { - return Data::new(data::string::String(String::new())); - } - Data::new(data::string::String(s[start..end].to_owned())) - - }), - inner_statements: None, - })) - } -} - -fn index_of(a: Data, rev: bool) -> Data { - let a = a.get(); - let a = a - .as_any() - .downcast_ref::() - .expect("index_of called on non-tuple"); - let src = a.0[0].get(); - let src = &src - .as_any() - .downcast_ref::() - .unwrap() - .0; - let pat = a.0[1].get(); - let pat = &pat - .as_any() - .downcast_ref::() - .unwrap() - .0; - let i = if rev { src.rfind(pat) } else { src.find(pat) }; - if let Some(i) = i { - Data::new(data::int::Int(i as _)) - } else { - Data::empty_tuple() + )) } }