changed regex builtin and added binaries to git repo

This commit is contained in:
Mark 2023-06-29 15:16:31 -04:00
parent f0314817a2
commit b6d01c7c25
17 changed files with 245 additions and 228 deletions

1
.gitignore vendored
View File

@ -2,5 +2,4 @@
/mers/Cargo.lock
/mers_libs/*/target
/mers_libs/*/Cargo.lock
/build_scripts/mers
/build_scripts/nu_plugin_mers

5
build_scripts/build_cross.sh Executable file
View File

@ -0,0 +1,5 @@
#!/usr/bin/env sh
./build_musl.sh
cd ../mers
cargo build --release --target x86_64-pc-windows-gnu
cp target/x86_64-pc-windows-gnu/release/mers.exe ../build_scripts

BIN
build_scripts/mers Executable file

Binary file not shown.

BIN
build_scripts/mers.exe Executable file

Binary file not shown.

View File

@ -157,4 +157,19 @@ replaces occurences of arg1 in arg0 with arg2
### regex
returns a list of matches of the arg0 regex that were found in the string arg1
given a regex (in string form), this function returns either `Err(string)` or a function which, when called with another string, returns a list of matches found in that string:
lines_regex := regex(".*").assume_no_enum()
fn lines(s string) {
lines_regex.run(s)
}
debug("a string\nwith multiple\nlines!".lines())
This is done because compiling regex is somewhat slow, so if multiple strings have to be searched by the regex,
it would be inefficient to recompile the regex every time. (btw: credit goes to the Regex crate, which is used to implement this)
### split
given two strings, splits the first one at the pattern specified by the second one:
word_count := "a string containing five words".split(" ").len()

View File

@ -1,24 +0,0 @@
input := fs_read("/tmp/pin.txt").assume_no_enum().bytes_to_string().assume_no_enum()
fn prio(s string) {
switch! "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".index_of(s) {
[] [] 0
int n n + 1
}
}
sum := 0
for line input.regex(".*").assume_no_enum() {
left := line.substring(0, line.len() / 2)
right := line.substring(line.len() / 2)
for ch right.regex(".").assume_no_enum() {
if left.contains(ch) {
&sum = sum + prio(ch)
true
}
}
[]
}
println("sum: " + sum.to_string())

View File

@ -1,32 +0,0 @@
input := fs_read("/tmp/pin.txt").assume_no_enum().bytes_to_string().assume_no_enum()
fn prio(s string) {
switch! "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".index_of(s) {
[] [] 0
int n n + 1
}
}
sum := 0
for group input.regex(".*\\n.*\\n.*\\n?").assume_no_enum() {
rucksacks := group.regex(".*").assume_no_enum()
a := rucksacks.get(0).assume1()
b := rucksacks.get(1).assume1()
c := rucksacks.get(2).assume1()
println(a)
println(b)
println(c)
println("---")
badge_type := for ch "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".regex(".").assume_no_enum() {
if a.contains(ch) && b.contains(ch) && c.contains(ch) {
ch
}
}
switch! badge_type {
string s &sum = sum + prio(s)
[] [] []
}
}
println("sum: " + sum.to_string())

View File

@ -1,21 +0,0 @@
input := fs_read("/tmp/pin.txt").assume_no_enum().bytes_to_string().assume_no_enum()
fn get_pair(s string) {
list := s.regex("[^-]+").assume_no_enum()
[
list.get(0).assume1("A").parse_int().assume1("a")
list.get(1).assume1("B").parse_int().assume1("b")
]
}
count := 0
for pair input.regex(".+").assume_no_enum() {
s := pair.regex("[^,]*").assume_no_enum()
s1 := get_pair(s.get(0).assume1("bb"))
s2 := get_pair(s.get(1).assume1("aa"))
if { s1.0 <= s2.0 && s1.1 >= s2.1 } || { s1.0 >= s2.0 && s1.1 <= s2.1 } {
&count = count + 1
}
}
println("count: " + count.to_string())

View File

@ -1,29 +0,0 @@
input := fs_read("/tmp/pin.txt").assume_no_enum().bytes_to_string().assume_no_enum()
fn get_pair(s string) {
list := s.regex("[^-]+").assume_no_enum()
[
list.get(0).assume1().parse_int().assume1()
list.get(1).assume1().parse_int().assume1()
]
}
count := 0
for pair input.regex(".+").assume_no_enum() {
s := pair.regex("[^,]*").assume_no_enum()
s1 := get_pair(s.get(0).assume1())
s2 := get_pair(s.get(1).assume1())
if {
s1.0 <= s2.0 && s2.0 <= s1.1
} || {
s1.0 <= s2.1 && s2.1 <= s1.1
} || {
s2.0 <= s1.0 && s1.0 <= s2.1
} || {
s2.0 <= s1.1 && s1.1 <= s2.1
} {
&count = count + 1
}
}
println("count: " + count.to_string())

View File

@ -16,7 +16,7 @@ fn rnd() {
}
words := rnd()
for word text.regex("\\S+").assume_no_enum()
for word regex("\\S+").assume_no_enum().run(text)
if words > 0 {
print(word + " ")
&words = words - 1

2
mers/Cargo.lock generated
View File

@ -647,7 +647,7 @@ dependencies = [
[[package]]
name = "mers"
version = "0.2.2"
version = "0.2.3"
dependencies = [
"colorize",
"edit",

View File

@ -1,6 +1,6 @@
[package]
name = "mers"
version = "0.2.2"
version = "0.2.3"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -1,7 +1,7 @@
use std::{io::Write, path::PathBuf, sync::Arc, time::Duration};
use super::{
code_runnable::RStatement,
code_runnable::{RFunction, RFunctionType, RStatement},
global_info::{GSInfo, GlobalScriptInfo},
val_data::{thread::VDataThreadEnum, VData, VDataEnum},
val_type::{VSingleType, VType},
@ -82,6 +82,7 @@ pub enum BuiltinFunction {
Substring,
Replace,
Regex,
Split,
}
impl BuiltinFunction {
@ -145,6 +146,7 @@ impl BuiltinFunction {
"substring" => Self::Substring,
"replace" => Self::Replace,
"regex" => Self::Regex,
"split" => Self::Split,
_ => return None,
})
}
@ -472,7 +474,7 @@ impl BuiltinFunction {
}
}
// two strings
Self::Contains | Self::StartsWith | Self::EndsWith | Self::Regex => {
Self::Contains | Self::StartsWith | Self::EndsWith => {
input.len() == 2
&& input
.iter()
@ -503,6 +505,15 @@ impl BuiltinFunction {
Self::Trim => {
input.len() == 1 && input[0].fits_in(&VSingleType::String.to(), info).is_empty()
}
Self::Regex => {
input.len() == 1 && input[0].fits_in(&VSingleType::String.to(), info).is_empty()
}
Self::Split => {
input.len() == 2
&& input
.iter()
.all(|v| v.fits_in(&VSingleType::String.to(), info).is_empty())
}
}
}
/// for invalid inputs, may panic
@ -781,12 +792,20 @@ impl BuiltinFunction {
Self::Replace => VSingleType::String.to(),
Self::Regex => VType {
types: vec![
// [string ...]
VSingleType::List(VSingleType::String.to()),
// fn((string [string ...]))
VSingleType::Function(vec![(
vec![VSingleType::String.to()],
VSingleType::List(VSingleType::String.to()).to(),
)]),
// Err(string)
VSingleType::EnumVariant(EV_ERR, VSingleType::String.to()),
],
},
Self::Split => VSingleType::List(
// [string ...]
VSingleType::String.to(),
)
.to(),
}
}
pub fn run(&self, args: &Vec<RStatement>, info: &GSInfo) -> VData {
@ -964,39 +983,22 @@ impl BuiltinFunction {
}),
BuiltinFunction::Run => args[0].run(info).operate_on_data_immut(|v| {
if let VDataEnum::Function(f) = v {
if f.inputs.len() != args.len() - 1 {
unreachable!("wrong input count")
}
for (i, var) in f.inputs.iter().enumerate() {
let val = args[i + 1].run(info).clone_data();
var.lock().unwrap().0 = val;
}
f.run(info)
f.run(info, args.iter().skip(1).map(|v| v.run(info)).collect())
} else {
unreachable!()
}
}),
BuiltinFunction::Thread => args[0].run(info).operate_on_data_immut(|v| {
if let VDataEnum::Function(f) = v {
if f.inputs.len() != args.len() - 1 {
unreachable!("wrong input count")
}
let mut run_input_types = vec![];
for (i, var) in f.inputs.iter().enumerate() {
let val = args[i + 1].run(info).clone_data();
run_input_types.push(val.out_single());
var.lock().unwrap().0 = val;
}
let args: Vec<_> = args.into_iter().skip(1).map(|v| v.run(info)).collect();
let out_type = f
.out_by_map(
&run_input_types.iter().map(|v| v.clone().into()).collect(),
&info,
)
.out_by_map(&args.iter().map(|v| v.out()).collect(), &info)
.unwrap();
let info = Arc::clone(info);
let f = Arc::clone(f);
VDataEnum::Thread(
VDataThreadEnum::Running(std::thread::spawn(move || f.run(&info))).to(),
VDataThreadEnum::Running(std::thread::spawn(move || f.run(&info, args)))
.to(),
out_type,
)
.to()
@ -1680,28 +1682,67 @@ impl BuiltinFunction {
})
})
}),
Self::Regex => args[0].run(info).operate_on_data_immut(|a| {
args[1].run(info).operate_on_data_immut(|regex| {
if let (VDataEnum::String(a), VDataEnum::String(regex)) = (a, regex) {
match regex::Regex::new(regex.as_str()) {
Ok(regex) => VDataEnum::List(
Self::Regex => args[0].run(info).operate_on_data_immut(|regex_string| {
if let VDataEnum::String(regex_string) = regex_string {
match regex::RegexBuilder::new(&regex_string).build() {
Ok(regex) => VDataEnum::Function(Arc::new(RFunction {
statement: RFunctionType::Func(Box::new(move |_info, args| {
// this is the function returned by regex().
// it takes one string as its argument
// and returns another function
args[0].operate_on_data_immut(|s| {
if let VDataEnum::String(s) = s {
// when the regex function is given a string to operate on,
// it returns all the matches.
VDataEnum::List(
VSingleType::String.to(),
regex
.find_iter(a.as_str())
.map(|v| VDataEnum::String(v.as_str().to_string()).to())
.find_iter(s)
.map(|m| {
VDataEnum::String(
s[m.start()..m.end()].to_owned(),
)
.to()
})
.collect(),
)
.to()
} else {
unreachable!()
}
})
})),
out_map: vec![],
}))
.to(),
Err(e) => VDataEnum::EnumVariant(
Err(err) => VDataEnum::EnumVariant(
EV_ERR,
Box::new(VDataEnum::String(e.to_string()).to()),
Box::new(VDataEnum::String(err.to_string()).to()),
)
.to(),
}
} else {
unreachable!()
}
}),
Self::Split => args[0].run(info).operate_on_data_immut(|a| {
if let VDataEnum::String(a) = a {
args[1].run(info).operate_on_data_immut(|b| {
if let VDataEnum::String(b) = b {
VDataEnum::List(
VSingleType::String.to(),
a.split(b)
.map(|v| VDataEnum::String(v.to_owned()).to())
.collect(),
)
.to()
} else {
unreachable!()
}
})
} else {
unreachable!()
}
}),
}
}

View File

@ -1,4 +1,7 @@
use std::sync::{Arc, Mutex};
use std::{
fmt::Debug,
sync::{Arc, Mutex},
};
use super::{
builtins::BuiltinFunction,
@ -54,13 +57,21 @@ impl RBlock {
}
}
#[derive(Clone, Debug)]
pub struct RFunction {
pub inputs: Vec<Arc<Mutex<(VData, VType)>>>,
pub input_types: Vec<VType>,
pub statement: RStatement,
pub statement: RFunctionType,
pub out_map: Vec<(Vec<VType>, VType)>,
}
impl Debug for RFunction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.out_map)
}
}
pub enum RFunctionType {
/// ignores all args and returns some default value
Dummy,
Statement(Vec<Arc<Mutex<(VData, VType)>>>, RStatement, Vec<VType>),
Func(Box<dyn Fn(&GSInfo, Vec<VData>) -> VData + Send + Sync>),
}
impl PartialEq for RFunction {
fn eq(&self, _other: &Self) -> bool {
false
@ -70,8 +81,17 @@ impl Eq for RFunction {
fn assert_receiver_is_total_eq(&self) {}
}
impl RFunction {
pub fn run(&self, info: &GSInfo) -> VData {
self.statement.run(info)
pub fn run(&self, info: &GSInfo, args: Vec<VData>) -> VData {
match &self.statement {
RFunctionType::Dummy => VDataEnum::Bool(false).to(),
RFunctionType::Statement(inputs, s, _) => {
for (i, input) in inputs.iter().enumerate() {
input.lock().unwrap().0.assign(args[i].clone_mut());
}
s.run(info)
}
RFunctionType::Func(f) => f(info, args),
}
}
pub fn out_by_map(&self, input_types: &Vec<VType>, info: &GlobalScriptInfo) -> Option<VType> {
// NOTE: This can ONLY use self.out_map, because it's used by the VSingleType.fits_in method.
@ -104,23 +124,31 @@ impl RFunction {
t
})
}
pub fn out_by_statement(&self, input_types: &Vec<VType>, info: &GlobalScriptInfo) -> VType {
let mut actual = Vec::with_capacity(self.inputs.len());
pub fn out_by_statement(
&self,
input_types: &Vec<VType>,
info: &GlobalScriptInfo,
) -> Option<VType> {
if let RFunctionType::Statement(inputs, statement, _) = &self.statement {
let mut actual = Vec::with_capacity(inputs.len());
// simulate these variable types
for (fn_input, c_type) in self.inputs.iter().zip(input_types.iter()) {
for (fn_input, c_type) in inputs.iter().zip(input_types.iter()) {
actual.push(std::mem::replace(
&mut fn_input.lock().unwrap().1,
c_type.clone(),
));
}
// not get the return type if these were the actual types
let out = self.statement.out(info);
let out = statement.out(info);
// reset
for (fn_input, actual) in self.inputs.iter().zip(actual) {
for (fn_input, actual) in inputs.iter().zip(actual) {
fn_input.lock().unwrap().1 = actual;
}
// return
out
Some(out)
} else {
None
}
}
}
@ -201,10 +229,7 @@ impl RStatementEnum {
}
}
Self::FunctionCall(func, args) => {
for (i, input) in func.inputs.iter().enumerate() {
input.lock().unwrap().0.assign(args[i].run(info));
}
func.run(info)
func.run(info, args.iter().map(|s| s.run(info)).collect())
}
Self::BuiltinFunctionCall(v, args) => v.run(args, info),
Self::LibFunctionCall(libid, fnid, args, _) => {
@ -270,7 +295,7 @@ impl RStatementEnum {
}
}
VDataEnum::Function(f) => loop {
if let Some(v) = f.run(info).matches() {
if let Some(v) = f.run(info, vec![]).matches() {
if let Some(v) = in_loop(v).matches() {
oval = v;
break;
@ -430,9 +455,6 @@ impl RScript {
Ok(Self { main, info })
}
pub fn run(&self, args: Vec<VData>) -> VData {
for (input, arg) in self.main.inputs.iter().zip(args.into_iter()) {
input.lock().unwrap().0 = arg;
}
self.main.run(&self.info)
self.main.run(&self.info, args)
}
}

View File

@ -15,7 +15,7 @@ use super::{
builtins::BuiltinFunction,
code_macro::Macro,
code_parsed::{SBlock, SFunction, SStatement, SStatementEnum},
code_runnable::{RBlock, RFunction, RScript, RStatement, RStatementEnum},
code_runnable::{RBlock, RFunction, RFunctionType, RScript, RStatement, RStatementEnum},
fmtgs::FormatGs,
global_info::GSInfo,
};
@ -235,42 +235,55 @@ fn function(
}
let mut o = RFunction {
out_map: vec![],
inputs: input_vars,
statement: RFunctionType::Statement(
input_vars,
statement(&s.statement, ginfo, &mut linfo.clone())?,
input_types,
statement: statement(&s.statement, ginfo, &mut linfo.clone())?,
),
};
o.out_map = {
let mut map = vec![];
let mut indices: Vec<_> = o.input_types.iter().map(|_| 0).collect();
let mut indices: Vec<_> = if let RFunctionType::Statement(_, _, input_types) = &o.statement
{
input_types.iter().map(|_| 0).collect()
} else {
unreachable!()
};
// like counting: advance first index, when we reach the end, reset to zero and advance the next index, ...
loop {
let mut current_types = Vec::with_capacity(o.input_types.len());
if let RFunctionType::Statement(_, _, input_types) = &o.statement {
let mut current_types = Vec::with_capacity(input_types.len());
let mut adv = true;
let mut was_last = o.input_types.is_empty();
for i in 0..o.input_types.len() {
current_types.push(match o.input_types[i].types.get(indices[i]) {
let mut was_last = input_types.is_empty();
for i in 0..input_types.len() {
current_types.push(match input_types[i].types.get(indices[i]) {
Some(v) => v.clone().to(),
None => VType::empty(),
});
if adv {
if indices[i] + 1 < o.input_types[i].types.len() {
if indices[i] + 1 < input_types[i].types.len() {
indices[i] += 1;
adv = false;
} else {
indices[i] = 0;
// we just reset the last index back to 0 - if we don't break
// from the loop, we will just start all over again.
if i + 1 == o.input_types.len() {
if i + 1 == input_types.len() {
was_last = true;
}
}
}
}
let out = o.out_by_statement(&current_types, &ginfo);
let out = o
.out_by_statement(&current_types, &ginfo)
.expect("this should always be a Statement function type");
map.push((current_types, out));
if was_last {
break map;
}
} else {
unreachable!()
}
}
};
Ok(o)
@ -438,12 +451,13 @@ fn statement_adv(
func: &RFunction,
ginfo: &GlobalScriptInfo,
) -> bool {
func.inputs.len() == arg_types.len()
&& func
.inputs
.iter()
.zip(arg_types.iter())
.all(|(fn_in, arg)| arg.fits_in(&fn_in.lock().unwrap().1, ginfo).is_empty())
func.out_by_map(arg_types, ginfo).is_some()
// func.inputs.len() == arg_types.len()
// && func
// .inputs
// .iter()
// .zip(arg_types.iter())
// .all(|(fn_in, arg)| arg.fits_in(&fn_in.lock().unwrap().1, ginfo).is_empty())
}
if let Some(funcs) = linfo.fns.get(v) {
'find_func: {

View File

@ -4,10 +4,9 @@ use std::{
};
use super::{
code_runnable::{RFunction, RStatementEnum},
code_runnable::{RFunction, RFunctionType},
fmtgs::FormatGs,
global_info::{GSInfo, GlobalScriptInfo},
val_data::VDataEnum,
};
use super::global_info::LogMsg;
@ -375,9 +374,7 @@ impl VSingleType {
(Self::Function(a), Self::Function(b)) => 'fnt: {
// since RFunction.out only uses out_map, we can create a dummy RFunction here.
let af = RFunction {
inputs: vec![],
input_types: vec![],
statement: RStatementEnum::Value(VDataEnum::Bool(false).to()).to(),
statement: RFunctionType::Dummy,
out_map: a.clone(),
};
for (ins, out) in b {

View File

@ -29,7 +29,7 @@ fn normal_main() {
let mut args_to_skip = 2;
let mut file = match args.len() {
0 => {
println!("Please provide some arguments, such as the path to a file or \"-e <code>\".");
println!("no arguments, use -h for help");
std::process::exit(100);
}
_ => {
@ -50,8 +50,38 @@ fn normal_main() {
continue;
}
match ch {
'h' => {
eprintln!("~~~~ mers help ~~~~");
eprintln!();
eprintln!(" ~~ cli ~~");
eprintln!("Mers has the following cli options:");
eprintln!("-h shows this Help message");
eprintln!("-e - mers will treat the run argument as code to be Executed rather than a file path");
eprintln!(" mers -e 'println(\"Hello, World!\")'");
eprintln!(
"-c - mers will Check the code for errors, but won't run it"
);
eprintln!("-f - mers will Format the code and print it. useful if you suspect the parser might be misinterpreting your code");
eprintln!(
"+c - use Colors in the output to better visualize things"
);
eprintln!("+C - don't use colors (opposite of +c, redundant since this is the default)");
eprintln!("-v - mers will be more Verbose");
eprintln!("+???+ - customize what mers is verbose about and how - bad syntax, barely useful, don't use it until it gets improved (TODO!)");
eprintln!("-i - launches an Interactive session to play around with (opens your editor and runs code on each file save)");
eprintln!("+t - spawns a new terminal for the editor (if you use a terminal editors, add +t)");
eprintln!(" mers -i+t");
eprintln!("-t - launches the Tutor, which will attempt to Teach you the basics of the language");
eprintln!();
eprintln!(" ~~ getting started ~~");
eprintln!("mers doesn't need a specific structure for directories, just create a UTF-8 text file, write code, and run it:");
eprintln!(" echo 'println(\"Hello, World!\")' > hello.mers");
eprintln!(" mers hello.mers");
return;
}
'e' => execute = true,
'v' => verbose = true,
'c' => run = false,
'f' => {
run = false;
info.log.after_parse.stderr = true;
@ -178,7 +208,7 @@ fn normal_main() {
file.into(),
)
} else {
println!("please provide either a file or -e and a script to run!");
println!("nothing to do - missing arguments?");
std::process::exit(101);
}
}