use std::{path::PathBuf, sync::Arc}; use super::{Source, SourceFrom, SourcePos}; use crate::{ data::Data, errors::{CheckError, EColor}, program::{ self, parsed::{as_type::AsType, MersStatement}, }, }; pub fn parse( src: &mut Source, srca: &Arc, ) -> Result>, CheckError> { src.section_begin("statement".to_string()); src.skip_whitespace(); // type annotation: // [type] statement // force output type to be `type` // [[name] type] // define `name` as `type` // [[name] := statement] // define `name` as the type of `statement` (`statement` is never executed) if matches!(src.peek_char(), Some('[')) { let pos_in_src = src.get_pos(); src.next_char(); return Ok(Some(if matches!(src.peek_char(), Some('[')) { src.next_char(); // [[... let name = src.next_word(); let name = name.trim().to_owned(); src.skip_whitespace(); if !matches!(src.next_char(), Some(']')) { return Err(CheckError::from(format!( "Expected ']' after type name in [[type_name]]" ))); } src.skip_whitespace(); if src.peek_word_allow_colon() == ":=" { src.next_word_allow_colon(); // [[name] := statement] let statement = match parse(src, srca) { Ok(Some(v)) => v, Ok(None) => { return Err(CheckError::new() .src(vec![((pos_in_src, src.get_pos(), srca).into(), None)]) .msg_str(format!("EOF after `[[...] := ...]` type definition"))) } Err(e) => return Err(e), }; if !matches!(src.next_char(), Some(']')) { return Err(CheckError::new().msg_str(format!( "Expected ']' after statement in [[type_name] := statement]" ))); } Box::new(program::parsed::custom_type::CustomType { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), name, source: Err(statement), }) } else { // [[name] type] src.skip_whitespace(); let as_type = super::types::parse_type(src, srca)?; src.skip_whitespace(); if !matches!(src.next_char(), Some(']')) { return Err(CheckError::new().msg_str(format!( "Expected ']' after type definition in [[type_name] type_definition]" ))); } Box::new(program::parsed::custom_type::CustomType { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), name, source: Ok(as_type), }) } } else { // [type] statement src.skip_whitespace(); let type_pos_in_src = src.get_pos(); let as_type = super::types::parse_type(src, srca)?; let type_pos_in_src = (type_pos_in_src, src.get_pos(), srca).into(); src.skip_whitespace(); if !matches!(src.next_char(), Some(']')) { return Err(CheckError::new() .src(vec![( (pos_in_src, src.get_pos(), srca).into(), Some(EColor::TypeAnnotationNoClosingBracket), )]) .msg_str(format!("Missing closing bracket ']' after type annotation"))); } let statement = match parse(src, srca) { Ok(Some(v)) => v, Ok(None) => { return Err(CheckError::new() .src(vec![((pos_in_src, src.get_pos(), srca).into(), None)]) .msg_str(format!("EOF after `[...]` type annotation"))) } Err(e) => return Err(e), }; Box::new(AsType { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), statement, as_type, type_pos_in_src, expand_type: true, }) })); } let mut first = if let Some(s) = parse_no_chain(src, srca)? { s } else { return Ok(None); }; let mut pos_after_first = src.get_pos(); loop { src.skip_whitespace(); match src.peek_word_allow_colon() { ":=" => { let pos_in_src = src.get_pos(); src.next_word_allow_colon(); let source = parse(src, srca)?.ok_or_else(|| { CheckError::new() .src(vec![((pos_in_src, src.get_pos(), srca).into(), None)]) .msg_str(format!("EOF after `:=`")) })?; first = Box::new(program::parsed::init_to::InitTo { pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), target: first, source, }); break; } "=" => { let pos_in_src = src.get_pos(); src.next_word_allow_colon(); let source = parse(src, srca)?.ok_or_else(|| { CheckError::new() .src(vec![( (first.source_range().start(), src.get_pos(), srca).into(), None, )]) .msg_str(format!("EOF after `=`")) })?; first = Box::new(program::parsed::assign_to::AssignTo { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), target: first, source, }); break; } "->" => { let pos_in_src = src.get_pos(); src.next_word_allow_colon(); let run = match parse(src, srca) { Ok(Some(v)) => v, Ok(None) => { return Err(CheckError::new() .src(vec![((pos_in_src, src.get_pos(), srca).into(), None)]) .msg_str(format!("EOF after `->`"))) } Err(e) => return Err(e), }; first = Box::new(program::parsed::function::Function { pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), arg: first, run, }); break; } _ => (), } let dot_in_src = src.get_pos(); if let Some('.') = src.peek_char() { src.next_char(); src.skip_whitespace(); if src.peek_word() == "try" { src.next_word(); src.skip_whitespace(); if let Some('(') = src.next_char() { let funcs = parse_tuple_without_open(src, srca)?; first = Box::new(program::parsed::r#try::Try { pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), arg: first, funcs, }); pos_after_first = src.get_pos(); } else { return Err(CheckError::new() .msg_str(format!("Expected `(` after `.try`")) .src(vec![( (dot_in_src, src.get_pos(), srca).into(), Some(EColor::TryBadSyntax), )])); } } else { let chained = match parse_no_chain(src, srca) { Ok(Some(v)) => v, Ok(None) => { return Err(CheckError::new() .src(vec![((dot_in_src, src.get_pos(), srca).into(), None)]) .msg_str(format!("EOF after `.`"))) } Err(e) => return Err(e), }; // allow a.f(b, c) syntax (but not f(a, b, c)) if let Some('(') = src.peek_char() { src.next_char(); let elems = parse_multiple(src, srca, ")")?; first = Box::new(program::parsed::tuple::Tuple { pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), elems: [first].into_iter().chain(elems).collect(), }); } first = Box::new(program::parsed::chain::Chain { pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), first, chained, }); pos_after_first = src.get_pos(); } } else if let Some(':') = src.peek_char() { src.next_char(); let field = src.next_word().to_owned(); first = Box::new(program::parsed::field::Field { pos_in_src: (first.source_range().start(), src.get_pos(), srca).into(), object: first, field, }); pos_after_first = src.get_pos(); } else { src.set_pos(pos_after_first); break; } } if matches!(src.peek_char(), Some(',' | ';')) { src.next_char(); } Ok(Some(first)) } pub fn parse_tuple_without_open( src: &mut Source, srca: &Arc, ) -> Result>, CheckError> { parse_multiple(src, srca, ")") } pub fn parse_multiple( src: &mut Source, srca: &Arc, end: &str, ) -> Result>, CheckError> { src.section_begin("block".to_string()); let mut statements = vec![]; loop { src.skip_whitespace(); if src.peek_char().is_some_and(|ch| end.contains(ch)) { src.next_char(); break; } else if let Some(s) = parse(src, srca)? { statements.push(s); } else { // EOF break; } } Ok(statements) } pub fn parse_no_chain( src: &mut Source, srca: &Arc, ) -> Result>, CheckError> { src.skip_whitespace(); src.section_begin("statement no chain".to_string()); match src.peek_char() { Some('#') => { let pos_in_src = src.get_pos(); src.next_char(); if src.peek_char().is_none() { return Err(CheckError::new() .src(vec![((pos_in_src, src.get_pos(), srca).into(), None)]) .msg_str(format!("EOF after #"))); } if src.peek_char().is_some_and(|ch| ch.is_whitespace()) { src.skip_whitespace(); return Err(CheckError::new() .src(vec![( (pos_in_src, src.get_pos(), srca).into(), Some(EColor::WhitespaceAfterHashtag), )]) .msg_str(format!("Whitespace after #"))); } match src.next_word() { "include" => { if !src.allow_includes { return Err(CheckError::new() .src(vec![( (pos_in_src, src.get_pos(), srca).into(), Some(EColor::HashIncludeCantLoadFile), )]) .msg_str(format!("not allowed to use #include (only allowed when source code is read from a file, or if allow_includes is explicitly set)"))); } let end_in_src = src.get_pos(); src.skip_whitespace(); let string_in_src = src.get_pos(); if src.next_char() == Some('"') { let file_path_str = parse_string(src, srca, string_in_src)?; let mut file_path: PathBuf = PathBuf::from(&file_path_str); if !file_path.is_absolute() { if let SourceFrom::File(other_file_path) = srca.src_from() { if let Some(files_dir) = other_file_path.parent() { file_path = files_dir.join(file_path); } } } match Source::new_from_file(file_path) { Ok(mut inner_src) => { let inner_srca = Arc::new(inner_src.clone()); return Ok(Some(Box::new( program::parsed::include_mers::IncludeMers { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), include: match super::parse(&mut inner_src, &inner_srca) { Ok(v) => v, Err(e) => { return Err(CheckError::new().err_with_diff_src(e)) } }, inner_src, }, ))); } Err(e) => { return Err(CheckError::new() .src(vec![ ((pos_in_src, end_in_src, srca).into(), None), ( (string_in_src, src.get_pos(), srca).into(), Some(EColor::HashIncludeCantLoadFile), ), ]) .msg_str(format!("Can't load file '{file_path_str}': {e}"))); } } } else { return Err(CheckError::new() .src(vec![ ((pos_in_src, end_in_src, srca).into(), None), ((string_in_src, src.get_pos(), srca).into(), Some(EColor::HashIncludeNotAString)), ]) .msg_str(format!( "#include must be followed by a string literal like \"file.mers\" (\" expected)." ))); } } other => { let msg = format!("Unknown #statement: {other}"); return Err(CheckError::new() .src(vec![( (pos_in_src, src.get_pos(), srca).into(), Some(EColor::HashUnknown), )]) .msg_str(msg)); } } } Some('{') => { // try: is this an object? let pos_in_src = src.get_pos(); src.next_char(); let pos_in_src_after_bracket = src.get_pos(); { let mut elems: Vec<(String, _)> = vec![]; loop { src.skip_whitespace(); if src.peek_char() == Some('}') { src.next_char(); for (i, a) in elems.iter().enumerate() { if elems.iter().skip(1 + i).any(|b| a.0 == b.0) { return Err(CheckError::new() .src(vec![( (pos_in_src, src.get_pos(), srca).into(), Some(EColor::ObjectDuplicateField), )]) .msg_str(format!( "This object contains more than one field named `{}`", a.0 ))); } } return Ok(Some(Box::new(program::parsed::object::Object { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), elems, }))); } let name = src.next_word().to_owned(); src.skip_whitespace(); match src.next_char() { Some(':') if src.next_char().is_some_and(|c| c.is_whitespace()) => elems .push(( name, match parse(src, srca) { Ok(Some(v)) => v, Ok(None) => { return Err(CheckError::new() .src(vec![( (pos_in_src, src.get_pos(), srca).into(), None, )]) .msg_str(format!("EOF after `:` in object"))) } Err(e) => { return Err(CheckError::new() .src(vec![( (pos_in_src, src.get_pos(), srca).into(), None, )]) .msg_str(format!( "Error in statement after `:` in object" )) .err(e)) } }, )), _ => { // not an object (or invalid syntax) src.set_pos(pos_in_src_after_bracket); break; } } } } // if not an object let statements = parse_multiple(src, srca, "}")?; return Ok(Some(Box::new(program::parsed::block::Block { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), statements, }))); } Some('(') => { let pos_in_src = src.get_pos(); src.next_char(); let elems = parse_tuple_without_open(src, srca)?; return Ok(Some(Box::new(program::parsed::tuple::Tuple { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), elems, }))); } Some('"') => { src.section_begin("string literal".to_string()); let pos_in_src = src.get_pos(); src.next_char(); let s = parse_string(src, srca, pos_in_src)?; return Ok(Some(Box::new(program::parsed::value::Value { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), data: Data::new(crate::data::string::String(s)), }))); } _ => {} } let pos_in_src = src.get_pos(); Ok(Some(match src.next_word() { "if" => { src.section_begin("if".to_string()); src.skip_whitespace(); let condition = match parse(src, srca) { Ok(Some(v)) => v, Ok(None) => { return Err(CheckError::new() .src(vec![((pos_in_src, src.get_pos(), srca).into(), None)]) .msg_str(format!("EOF in `if`"))) } Err(e) => return Err(e), }; let on_true = match parse(src, srca) { Ok(Some(v)) => v, Ok(None) => { return Err(CheckError::new() .src(vec![((pos_in_src, src.get_pos(), srca).into(), None)]) .msg_str(format!("EOF after `if `"))) } Err(e) => return Err(e), }; let on_false = { src.skip_whitespace(); if src.peek_word() == "else" { src.section_begin("else".to_string()); src.next_word(); Some(match parse(src, srca) { Ok(Some(v)) => v, Ok(None) => { return Err(CheckError::new() .src(vec![((pos_in_src, src.get_pos(), srca).into(), None)]) .msg_str(format!("EOF after `else`"))) } Err(e) => return Err(e), }) } else { None } }; Box::new(program::parsed::r#if::If { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), condition, on_true, on_false, }) } "loop" => { src.section_begin("loop".to_string()); src.skip_whitespace(); let inner = match parse(src, srca) { Ok(Some(v)) => v, Ok(None) => { return Err(CheckError::new() .src(vec![((pos_in_src, src.get_pos(), srca).into(), None)]) .msg_str(format!("EOF after `loop`"))) } Err(e) => return Err(e), }; Box::new(program::parsed::r#loop::Loop { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), inner, }) } "true" => Box::new(program::parsed::value::Value { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), data: Data::new(crate::data::bool::Bool(true)), }), "false" => Box::new(program::parsed::value::Value { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), data: Data::new(crate::data::bool::Bool(false)), }), o if !o.trim().is_empty() => { let o = o.to_string(); src.section_begin("literals, variables, and other non-keyword things".to_string()); if let Ok(n) = o.parse() { if src.peek_char() == Some('.') { let here = src.get_pos(); src.next_char(); let after_dot = src.next_word(); if let Some(Ok(num)) = (!after_dot.is_empty()).then_some(format!("{o}.{}", after_dot).parse()) { Box::new(program::parsed::value::Value { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), data: Data::new(crate::data::float::Float(num)), }) } else { src.set_pos(here); Box::new(program::parsed::value::Value { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), data: Data::new(crate::data::int::Int(n)), }) } } else { Box::new(program::parsed::value::Value { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), data: Data::new(crate::data::int::Int(n)), }) } } else if let Some(b) = o .ends_with('b') .then(|| o[0..o.len() - 1].parse().ok()) .flatten() { Box::new(program::parsed::value::Value { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), data: Data::new(crate::data::byte::Byte(b)), }) } else { if let Some('&') = o.chars().next() { Box::new(program::parsed::variable::Variable { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), is_ref: true, var: o[1..].to_string(), }) } else { Box::new(program::parsed::variable::Variable { pos_in_src: (pos_in_src, src.get_pos(), srca).into(), is_ref: false, var: o.to_string(), }) } } } // empty string (after calling .trim()) _ => { if src.next_char().is_some() { // unexpected word-separator character return Err(CheckError::new() .src(vec![( (pos_in_src, src.get_pos(), srca).into(), Some(EColor::BadCharAtStartOfStatement), )]) .msg_str("Unexpected character found at the start of a statement".to_owned())); } else { // EOF return Ok(None); } } })) } /// expects to be called *after* a " character is consumed from src pub fn parse_string( src: &mut Source, srca: &Arc, double_quote: SourcePos, ) -> Result { parse_string_custom_end(src, srca, double_quote, '"', '"', "", EColor::StringEOF) } pub fn parse_string_custom_end( src: &mut Source, srca: &Arc, opening: SourcePos, opening_char: char, closing_char: char, string_prefix: &str, eof_color: EColor, ) -> Result { let mut s = String::new(); loop { if let Some(ch) = src.next_char() { if ch == '\\' { let backslash_in_src = src.get_pos(); s.push(match src.next_char() { Some('\\') => '\\', Some('r') => '\r', Some('n') => '\n', Some('t') => '\t', Some('"') => '"', Some(c) if c == closing_char || c == opening_char => c, Some(o) => { return Err(CheckError::new() .src(vec![( (backslash_in_src, src.get_pos(), srca).into(), Some(EColor::BackslashEscapeUnknown), )]) .msg_str(format!("unknown backslash escape '\\{o}'"))); } None => { return Err(CheckError::new() .src(vec![( (backslash_in_src, src.get_pos(), srca).into(), Some(EColor::BackslashEscapeEOF), )]) .msg_str(format!("EOF in backslash escape"))); } }); } else if ch == closing_char { break; } else { s.push(ch); } } else { return Err(CheckError::new() .src(vec![( (opening, src.get_pos(), srca).into(), Some(eof_color), )]) .msg_str(format!( "EOF in {string_prefix}string literal{}", if closing_char != '"' { format!( " {opening_char}...{closing_char} (end string with '{closing_char}')" ) } else { String::new() } ))); } } Ok(s) } pub fn to_string_literal(val: &str, end: char) -> String { val.replace("\\", "\\\\") .replace("\r", "\\r") .replace("\n", "\\n") .replace("\"", "\\\"") .replace(end, format!("\\{end}").as_str()) }