From 1b79cfc08ff16b9c1d843f8d8475a32097239b90 Mon Sep 17 00:00:00 2001 From: Mark <> Date: Wed, 26 Jun 2024 20:27:29 +0200 Subject: [PATCH] fix some edge cases in parser --- mers/Cargo.toml | 4 +- mers_lib/Cargo.toml | 2 +- mers_lib/src/errors/mod.rs | 1 + mers_lib/src/errors/themes.rs | 3 +- mers_lib/src/parsing/statements.rs | 234 ++++++++++++++++------------- 5 files changed, 132 insertions(+), 112 deletions(-) diff --git a/mers/Cargo.toml b/mers/Cargo.toml index 557d6b8..83945ef 100644 --- a/mers/Cargo.toml +++ b/mers/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mers" -version = "0.8.16" +version = "0.8.20" edition = "2021" license = "MIT OR Apache-2.0" description = "dynamically typed but type-checked programming language" @@ -15,7 +15,7 @@ default = ["colored-output"] colored-output = ["mers_lib/ecolor-term", "mers_lib/pretty-print", "dep:colored"] [dependencies] -mers_lib = "0.8.16" +mers_lib = "0.8.20" # mers_lib = { path = "../mers_lib" } clap = { version = "4.3.19", features = ["derive"] } colored = { version = "2.1.0", optional = true } diff --git a/mers_lib/Cargo.toml b/mers_lib/Cargo.toml index 50bbd73..005292b 100755 --- a/mers_lib/Cargo.toml +++ b/mers_lib/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mers_lib" -version = "0.8.19" +version = "0.8.20" edition = "2021" license = "MIT OR Apache-2.0" description = "library to use the mers language in other projects" diff --git a/mers_lib/src/errors/mod.rs b/mers_lib/src/errors/mod.rs index 829cbac..61eb64c 100644 --- a/mers_lib/src/errors/mod.rs +++ b/mers_lib/src/errors/mod.rs @@ -81,6 +81,7 @@ pub enum EColor { AsTypeTypeAnnotation, BadCharInTupleType, BadCharInFunctionType, + BadCharAtStartOfStatement, BadTypeFromParsed, TypeAnnotationNoClosingBracket, TryBadSyntax, diff --git a/mers_lib/src/errors/themes.rs b/mers_lib/src/errors/themes.rs index 8169bd2..3b4f20d 100644 --- a/mers_lib/src/errors/themes.rs +++ b/mers_lib/src/errors/themes.rs @@ -70,8 +70,7 @@ pub fn default_theme( UnknownVariable => hard_err, BackslashEscapeUnknown => hard_err, BackslashEscapeEOF | StringEOF | TypeEOF => missing, - BadCharInTupleType => hard_err, - BadCharInFunctionType => hard_err, + BadCharInTupleType | BadCharInFunctionType | BadCharAtStartOfStatement => hard_err, TryBadSyntax => hard_err, TypeAnnotationNoClosingBracket | BracketedRefTypeNoClosingBracket => missing, diff --git a/mers_lib/src/parsing/statements.rs b/mers_lib/src/parsing/statements.rs index 71e2228..3c57b5c 100755 --- a/mers_lib/src/parsing/statements.rs +++ b/mers_lib/src/parsing/statements.rs @@ -112,113 +112,116 @@ pub fn parse( return Ok(None); }; let mut pos_after_first = src.get_pos(); - src.skip_whitespace(); - match src.peek_word_allow_colon() { - ":=" => { - let pos_in_src = src.get_pos(); - src.next_word_allow_colon(); - let source = parse(src, srca)?.ok_or_else(|| { - CheckError::new() - .src(vec![((pos_in_src, src.get_pos(), srca).into(), None)]) - .msg_str(format!("EOF after `:=`")) - })?; - first = Box::new(program::parsed::init_to::InitTo { - pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), - target: first, - source, - }); - } - "=" => { - let pos_in_src = src.get_pos(); - src.next_word_allow_colon(); - let source = parse(src, srca)?.ok_or_else(|| { - CheckError::new() - .src(vec![( - (first.source_range().start(), src.get_pos(), srca).into(), - None, - )]) - .msg_str(format!("EOF after `=`")) - })?; - first = Box::new(program::parsed::assign_to::AssignTo { - pos_in_src: (pos_in_src, src.get_pos(), srca).into(), - target: first, - source, - }); - } - "->" => { - let pos_in_src = src.get_pos(); - src.next_word_allow_colon(); - let run = match parse(src, srca) { - Ok(Some(v)) => v, - Ok(None) => { - return Err(CheckError::new() + loop { + src.skip_whitespace(); + match src.peek_word_allow_colon() { + ":=" => { + let pos_in_src = src.get_pos(); + src.next_word_allow_colon(); + let source = parse(src, srca)?.ok_or_else(|| { + CheckError::new() .src(vec![((pos_in_src, src.get_pos(), srca).into(), None)]) - .msg_str(format!("EOF after `->`"))) - } - Err(e) => return Err(e), - }; - first = Box::new(program::parsed::function::Function { - pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), - arg: first, - run, - }); - } - _ => loop { - src.skip_whitespace(); - let dot_in_src = src.get_pos(); - if let Some('.') = src.peek_char() { - src.next_char(); - src.skip_whitespace(); - if src.peek_word() == "try" { - src.next_word(); - src.skip_whitespace(); - if let Some('(') = src.next_char() { - let funcs = parse_tuple_without_open(src, srca)?; - first = Box::new(program::parsed::r#try::Try { - pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), - arg: first, - funcs, - }); - pos_after_first = src.get_pos(); - } else { - return Err(CheckError::new() - .msg_str(format!("Expected `(` after `.try`")) - .src(vec![( - (dot_in_src, src.get_pos(), srca).into(), - Some(EColor::TryBadSyntax), - )])); - } - } else { - let chained = match parse_no_chain(src, srca) { - Ok(Some(v)) => v, - Ok(None) => { - return Err(CheckError::new() - .src(vec![((dot_in_src, src.get_pos(), srca).into(), None)]) - .msg_str(format!("EOF after `.`"))) - } - Err(e) => return Err(e), - }; - // allow a.f(b, c) syntax (but not f(a, b, c)) - if let Some('(') = src.peek_char() { - src.next_char(); - let elems = parse_multiple(src, srca, ")")?; - first = Box::new(program::parsed::tuple::Tuple { - pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), - elems: [first].into_iter().chain(elems).collect(), - }); - } - first = Box::new(program::parsed::chain::Chain { - pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), - first, - chained, - }); - pos_after_first = src.get_pos(); - } - } else { - src.set_pos(pos_after_first); + .msg_str(format!("EOF after `:=`")) + })?; + first = Box::new(program::parsed::init_to::InitTo { + pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), + target: first, + source, + }); break; } - }, + "=" => { + let pos_in_src = src.get_pos(); + src.next_word_allow_colon(); + let source = parse(src, srca)?.ok_or_else(|| { + CheckError::new() + .src(vec![( + (first.source_range().start(), src.get_pos(), srca).into(), + None, + )]) + .msg_str(format!("EOF after `=`")) + })?; + first = Box::new(program::parsed::assign_to::AssignTo { + pos_in_src: (pos_in_src, src.get_pos(), srca).into(), + target: first, + source, + }); + break; + } + "->" => { + let pos_in_src = src.get_pos(); + src.next_word_allow_colon(); + let run = match parse(src, srca) { + Ok(Some(v)) => v, + Ok(None) => { + return Err(CheckError::new() + .src(vec![((pos_in_src, src.get_pos(), srca).into(), None)]) + .msg_str(format!("EOF after `->`"))) + } + Err(e) => return Err(e), + }; + first = Box::new(program::parsed::function::Function { + pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), + arg: first, + run, + }); + break; + } + _ => (), + } + let dot_in_src = src.get_pos(); + if let Some('.') = src.peek_char() { + src.next_char(); + src.skip_whitespace(); + if src.peek_word() == "try" { + src.next_word(); + src.skip_whitespace(); + if let Some('(') = src.next_char() { + let funcs = parse_tuple_without_open(src, srca)?; + first = Box::new(program::parsed::r#try::Try { + pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), + arg: first, + funcs, + }); + pos_after_first = src.get_pos(); + } else { + return Err(CheckError::new() + .msg_str(format!("Expected `(` after `.try`")) + .src(vec![( + (dot_in_src, src.get_pos(), srca).into(), + Some(EColor::TryBadSyntax), + )])); + } + } else { + let chained = match parse_no_chain(src, srca) { + Ok(Some(v)) => v, + Ok(None) => { + return Err(CheckError::new() + .src(vec![((dot_in_src, src.get_pos(), srca).into(), None)]) + .msg_str(format!("EOF after `.`"))) + } + Err(e) => return Err(e), + }; + // allow a.f(b, c) syntax (but not f(a, b, c)) + if let Some('(') = src.peek_char() { + src.next_char(); + let elems = parse_multiple(src, srca, ")")?; + first = Box::new(program::parsed::tuple::Tuple { + pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), + elems: [first].into_iter().chain(elems).collect(), + }); + } + first = Box::new(program::parsed::chain::Chain { + pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), + first, + chained, + }); + pos_after_first = src.get_pos(); + } + } else { + src.set_pos(pos_after_first); + break; + } } if matches!(src.peek_char(), Some(',' | ';')) { src.next_char(); @@ -495,15 +498,17 @@ pub fn parse_no_chain( pos_in_src: (pos_in_src, src.get_pos(), srca).into(), data: Data::new(crate::data::bool::Bool(false)), }), - "" => return Ok(None), - o => { + o if !o.trim().is_empty() => { let o = o.to_string(); src.section_begin("literals, variables, and other non-keyword things".to_string()); if let Ok(n) = o.parse() { if src.peek_char() == Some('.') { let here = src.get_pos(); src.next_char(); - if let Ok(num) = format!("{o}.{}", src.next_word()).parse() { + let after_dot = src.next_word(); + if let Some(Ok(num)) = + (!after_dot.is_empty()).then_some(format!("{o}.{}", after_dot).parse()) + { Box::new(program::parsed::value::Value { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), data: Data::new(crate::data::float::Float(num)), @@ -546,6 +551,21 @@ pub fn parse_no_chain( } } } + // empty string (after calling .trim()) + _ => { + if src.next_char().is_some() { + // unexpected word-separator character + return Err(CheckError::new() + .src(vec![( + (pos_in_src, src.get_pos(), srca).into(), + Some(EColor::BadCharAtStartOfStatement), + )]) + .msg_str("Unexpected character found at the start of a statement".to_owned())); + } else { + // EOF + return Ok(None); + } + } })) }