From da22776f24e863550975def277b14d7b1f62c6f7 Mon Sep 17 00:00:00 2001 From: Mark <> Date: Tue, 24 Mar 2026 22:44:54 +0100 Subject: [PATCH] init --- .gitignore | 1 + Cargo.lock | 7 + Cargo.toml | 6 + example.mharkup | 255 +++++++++++++++++++++++ src/doc.rs | 70 +++++++ src/main.rs | 141 +++++++++++++ src/parse.rs | 519 +++++++++++++++++++++++++++++++++++++++++++++ src/to/html.rs | 543 ++++++++++++++++++++++++++++++++++++++++++++++++ src/to/mod.rs | 13 ++ src/to/plain.rs | 98 +++++++++ 10 files changed, 1653 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 example.mharkup create mode 100644 src/doc.rs create mode 100644 src/main.rs create mode 100644 src/parse.rs create mode 100644 src/to/html.rs create mode 100644 src/to/mod.rs create mode 100644 src/to/plain.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..22059e8 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "mharkup" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..9a96073 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "mharkup" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/example.mharkup b/example.mharkup new file mode 100644 index 0000000..9b15b36 --- /dev/null +++ b/example.mharkup @@ -0,0 +1,255 @@ +mharkup example + +an example document which +explains and documents the +features of mharkup. + +# motivation + +mharkup is a weird markup language +to get around things that annoy me +in most markdown flavors. + +## goals + +as someone using mharkup, you + +` can read and write it in your text editor +` can export and `/printable(print) it +` can easily (re)organize your document +` probably won't apply formatting on accident `/backtick() + +# the format + +## documents + +documents are split into sections. +the first section is always the document's title. +sections are separated by an empty line. + +for example, here are the sections you just read: + +[[ +#!mharkup +## documents + +documents are split into sections. +the first section is always the document's title. +sections are separated by an empty line. + +for example, here are the sections you just read: + +`()#!mharkup +## documents +... +]] + +each mharkup file starts with a title, +the first line of the file, which doesn't have +any leading hashtags. a file `+(should) also have a small +description of what it will contain, and the file's +actual content `+(should) be in subdocuments. +for example, here's how this file starts: + +[[ +#!mharkup +mharkup example + +an example document which +explains and documents the +features of mharkup. + +# motivation +... +]] + +## sections + +in most cases, a section is just +a few lines of text. you've probably +already noticed that linebreaks are usually +ignored `-(or, more accurately, replaced with spaces). + +sections beginning with a hashtag (#) are special. +the first line specifies the type of section you want, +and the other lines are the section's content. +you've already seen this with #!mharkup in the +first example. + +the types of sections you can create like this are: + +` #@ quotes +` #! code blocks +` #\ various extensions + +### backticks /here + +in a section, you can use the backtick (``) +to apply text formatting, or you can start a section +with a backtick followed by a space to create a list. +here's a list of things the backtick can do: + +` create a list +` represent itself ``: ```` +` add a `-(note): ``-(note) +` mark something that's `~(wrong): ``~(wrong) +` highlight a `_(special) element: ``_(special) +` add emphasis to `+(something): ``+(something) +` highlight something `*(important): ``*(important) +` insert raw `{`text`}: `<`{`text`}>: `2{`<`{`text`}>}}: `2<`2{`<`{`text`}>}}>>: ... +` link `/here(somewhere): ``/here(somewhere) +` use an extension: ``\spec{...} or ``\spec<...> +` combine any of these: `*_+(whoah): ``*_+(whoah) + +## + +you can also insert a subdocument by inserting +a new title section. for example, the backticks +subdocument starts with the following section: + +#!mharkup +### backticks /here + +and these sections are back in the outer +document, at depth 2 instead of 3, because +of an empty title section: + +#!mharkup +## + +since sections are separated by an empty line, +they don't usually contain empty lines themselves. +however, they absolutely can, and for code blocks, +you will definitely need it quite quickly. + +in normal text, where newlines are usually removed, +an empty line can be used to force a linebreak. + +to add an empty line to a section, +enclose it in at least one set of square brackets. +`-(for the following example, to be able to include +the double brackets, the #!mharkup section is enclosed in +3 more sets of square brackets.) + +[[[ +#!mharkup +[[ +#@mark +and that's all, + +i think... +]] +]]] + +[[ +#@mark +and that's all, + +i think... +]] + +the following subdocuments just show how +to use the different section types in more detail. + +### quotes + +#@mark @21st century +i've solved many problems +that noone's ever had. + +in this case, the first line +of the section was #@mark @21st century. + +### code + +#!python +def do_something_productive(): + pass + +in this case, the first line +of the section was #!python. + +### extensions + +#!mharkup +#\texm +\sum_{a=0}^{20} a^2 + +#\texm +\sum_{a=0}^{20} a^2 + +#!mharkup +or inline: `{`\texm} `<`2\texm{ \frac{1}{2} }}> + +or inline: `\texm or `2\texm{ \frac{1}{2} }} + +#!mharkup +#\html +deprecated, but funny + +#\html +deprecated, but funny + +# reasoning + +## why is text formatting so inconvenient? /backtick + +i really don't want to think about how many special +characters i can't use while writing my plaintext documents. +i know that writing `<`*(word)> instead of **word** is inconvenient, +but using *, _, and even \ without any escaping may `-[(?)] already be worth it. + +Also, * and _ can't be nested, so if you took `+(this **cool** section) +and copy-pasted it into an already bold section, you would get +`+(**this **cool** section**) which is just... ew. +Meanwhile `{`*(this `*(cool) section)} works just fine +and produces `*(this `*(cool) section). + +## why does it need to be printable? /printable + +i don't expect to ever print out one of these documents, +but i want them to be exportable to a static format, +one where the user can read every piece of information +in the document without having to interact with their device. +this requirement just happens to line up with printability :) + +## why don't the titles get smaller? /titlesize + +so that you can have as many layers of document +as you want without asking yourself +"is that just text in a

or is it a really small heading?", +and so that you can move subdocuments between layers without having to think about it. + +to illustrate this, see how

and look in your browser: + +#\html +this is a paragraph +

followed by a h7, the smallest heading

+ +[ +crazy, right? and i actually lied. +the "paragraph" is a and the "h7" is a

element :) + +and here's 10 layers of documents in mharkup: +] + +########## whoah, that's a lot + +but it's still perfectly readable + +## why are there no nested lists? /listnesting + +the real answer is, implementing it was hard +and it didn't feel natural with the rest of the syntax. + +another answer is, if you have enough content to +require nested lists, you should just use subdocuments. + +this will also force you to have good titles for each element. +it's sometimes unclear if you should use nested lists or +headings in a markdown document, and my own .md notes are +inconsistent in this way. and when i did use a nested list, +some elements were like titles for the inner list, +and others had their own content and then still contained +another list. it's an inconsistent mess, so not having it in +mharkup isn't too upsetting for me `-[(yet..., i'm a big fan of lists in .md)]. diff --git a/src/doc.rs b/src/doc.rs new file mode 100644 index 0000000..c517dd2 --- /dev/null +++ b/src/doc.rs @@ -0,0 +1,70 @@ +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Document<'a> { + pub title: Text<'a>, + pub depth: usize, + pub link: &'a str, + pub sections: Vec>, +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Section<'a> { + Paragraph(Text<'a>), + List(Vec>), + Code(&'a str, Text<'a>), + Quote(&'a str, &'a str, Text<'a>), + Ext(&'a str, &'a str), + Document(Document<'a>), +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Text<'a>(pub Vec>); + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Component<'a> { + Char(char), + Text(&'a str), + Note(Text<'a>), + Wrong(Text<'a>), + Special(Text<'a>), + Emphasis(Text<'a>), + Important(Text<'a>), + Link(&'a str, Text<'a>), + Ext(&'a str, &'a str), +} + +impl<'a> Text<'a> { + pub fn is_empty(&self) -> bool { + self.0.iter().any(|c| c.is_empty()) + } +} +impl<'a> Component<'a> { + pub fn is_empty(&self) -> bool { + match self { + Component::Char(_) => false, + Component::Text(t) => t.is_empty(), + Component::Note(t) + | Component::Wrong(t) + | Component::Special(t) + | Component::Emphasis(t) + | Component::Important(t) => t.is_empty(), + Component::Link(target, t) => target.is_empty() && t.is_empty(), + Component::Ext(_, _) => false, + } + } +} + +impl<'a> Document<'a> { + pub fn find_link_target(&'a self, link: &str) -> Option<&'a Document<'a>> { + if self.link == link { + return Some(self); + } + for child in &self.sections { + if let Section::Document(child) = child + && let Some(target) = child.find_link_target(link) + { + return Some(target); + } + } + None + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..b930388 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,141 @@ +//! # mharkup +//! +//! another markup language +//! +//! ## sections +//! +//! Sections are delimited by double newlines (\n\n). +//! A double newline can be included in any section by surrounding +//! the section with at least one set of square brackets. +//! +//! ## input +//! +//! Must be utf-8 and must end with a linebreak. +//! Linebreaks are \n (no \r). + +use std::{ + io::{Read, Write}, + net::Shutdown, + time::{Duration, SystemTime}, +}; + +use crate::{doc::Document, parse::parse}; + +mod doc; +mod parse; +mod to; + +fn main() { + let args = std::env::args().skip(1).collect::>(); + if args.len() < 2 { + eprintln!("Usage: .mharkup [specs...]"); + eprintln!("output formats and (available specs)"); + eprintln!("` html: a static html+css document (html, texm)"); + eprintln!("` html-http: html with auto-refresh"); + return; + } + let specs = &args[2..]; + let src = || std::fs::read_to_string(&args[0]).unwrap(); + match args[1].to_lowercase().trim() { + "html" => println!( + "{}", + to::html::page(&parse(&src()).unwrap(), specs, &mut to::html::State::new()) + ), + "html-http" => { + let addr = std::env::var("ADDR"); + let addr = addr.as_ref().map_or("localhost:8000", |v| v.as_str()); + let wait = std::env::var("WAIT") + .ok() + .and_then(|v| v.trim().parse().ok()) + .unwrap_or(100); + eprintln!(); + eprintln!("Binding to {addr} (ADDR env var)"); + eprintln!("Checking file mtime every {wait}ms while handling a request (WAIT env var)"); + let listener = std::net::TcpListener::bind(addr).unwrap(); + eprintln!(); + eprintln!("With your browser, open http://{addr}/"); + eprintln!("Without javascript, use http://{addr}/s"); + eprintln!("For a static page, open http://{addr}/1"); + let mut ptime = SystemTime::UNIX_EPOCH + Duration::from_mins(9); + loop { + let mut state = to::html::State::new(); + let mut buf1 = vec![String::new(); 1024]; + let mut buf2 = vec![None; 1024]; + let mut i = 0; + // connection might time out after 30 seconds, + // so send a 304 before that can happen. + let maxchecks = 25 * 1000 / wait; + 'accept_connection: while i < 1024 { + let (mut connection, _) = listener.accept().unwrap(); + let mut buf = [0u8; 8]; + if connection.read_exact(&mut buf).is_err() { + continue; + } + let allow_delay = if let Ok(req) = str::from_utf8(&buf) { + if req.contains(" /u") { + state.head_to_body = "\n"; + true + } else if req.contains(" /1") { + state.head_to_body = "\n"; + false + } else if req.contains(" /s") { + state.head_to_body = + "\n\n"; + true + } else { + state.head_to_body = r#" + +"#; + false + } + } else { + continue; + }; + if allow_delay { + 'delay_connection: { + for _ in 0..maxchecks { + let mtime = + std::fs::metadata(&args[0]).unwrap().modified().unwrap(); + if mtime != ptime { + ptime = mtime; + break 'delay_connection; + } else { + std::thread::sleep(std::time::Duration::from_millis(wait)); + } + } + connection + .write_all( + b"HTTP/1.1 304 Not Modified\r\nContent-Length: 0\r\n\r\n", + ) + .ok(); + connection.shutdown(Shutdown::Both).ok(); + continue 'accept_connection; + } + } + buf1[i] = src(); + let src = unsafe { &*((&buf1[i]) as *const String) }; + buf2[i] = Some(parse(src).unwrap()); + let doc = unsafe { &*(buf2[i].as_ref().unwrap() as *const Document<'_>) }; + i += 1; + let mut http = + "HTTP/1.1 200 Ok\r\nContent-Length: 0000000000\r\n\r\n".to_owned(); + let len = http.len(); + to::html::page_to(doc, specs, &mut state, &mut http); + let content_length = format!("{}", http.len() - len); + http.replace_range(len - 4 - content_length.len()..len - 4, &content_length); + connection.write_all(http.as_bytes()).ok(); + connection.shutdown(Shutdown::Both).ok(); + } + } + } + _ => eprintln!("Unknown format, run without arguments for help"), + } +} diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..8f161de --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,519 @@ +use crate::doc::{Component, Document, Section, Text}; + +pub fn parse<'a>(mut src: &'a str) -> Result, ParseError<'a>> { + let doc = parse_document(&mut src, 0)?.unwrap(); + assert!(src.trim().is_empty()); + Ok(doc) +} + +#[derive(Debug)] +pub struct ParseError<'a>(ParseErr<'a>, usize); +#[derive(Debug)] +#[allow(dead_code)] +pub enum ParseErr<'a> { + InvalidSectionHash, + InvalidHeadingDepth, + BracketsAtStartOfSectionWithNoLinebreakAfter, + BracketedSectionNotTerminated, + BracketedSectionTerminatedByTooManyBrackets, + IncompleteBacktick, + MissingClosing(char), + InvalidBacktickBracketCountSpecifier, + InvalidBacktickModifier, + SubsectionNotAllowedHere, + FileTitleWithLink, + r#__Zzzz(&'a str), +} + +#[derive(Clone, Copy)] +pub struct ParseTextOpts<'a> { + closing: Option<(char, &'a str)>, + remove_linebreaks: bool, + remove_indentation: (usize, usize), +} +#[allow(dead_code)] +impl<'a> ParseTextOpts<'a> { + fn new() -> Self { + Self { + closing: None, + remove_linebreaks: false, + remove_indentation: (0, 0), + } + } + pub fn default(self) -> Self { + Self::new() + } + fn closing(mut self, char: char, str: &'a str) -> Self { + self.closing = Some((char, str)); + self + } + pub fn remove_linebreaks(mut self) -> Self { + self.remove_linebreaks = true; + self + } + pub fn remove_all_indentation(mut self, depth: usize) -> Self { + self.remove_indentation = (depth, depth); + self + } + pub fn remove_all_indentation_2( + mut self, + depth_initial: usize, + depth_linebreak: usize, + ) -> Self { + self.remove_indentation = (depth_initial, depth_linebreak); + self + } + pub fn remove_only_initial_indentation(mut self, depth: usize) -> Self { + self.remove_indentation = (depth, 0); + self + } + pub fn remove_only_linebreak_indentation(mut self, depth: usize) -> Self { + self.remove_indentation = (0, depth); + self + } +} +pub fn parse_text<'a>( + src: &mut &'a str, + opts: impl FnOnce(ParseTextOpts<'static>) -> ParseTextOpts<'static>, +) -> Result, ParseError<'a>> { + parse_text_impl(src, opts(ParseTextOpts::new())) +} +pub fn parse_text_impl<'a>( + src: &mut &'a str, + ptopts: ParseTextOpts<'_>, +) -> Result, ParseError<'a>> { + let closing = ptopts.closing; + let remove_linebreaks = ptopts.remove_linebreaks; + let remove_indentation = ptopts.remove_indentation; + let mut components = Vec::new(); + let len = src.len(); + let pos = |src: &str| len - src.len(); + let indentation_bytes = src + .chars() + .take(remove_indentation.0) + .take_while(|ch| ch.is_whitespace() && *ch != '\n') + .map(|ch| ch.len_utf8()) + .sum(); + *src = &src[indentation_bytes..]; + while let Some(i) = src.find(['`', '\n']) { + if let Some((_, closing)) = closing + && let Some(c) = src[..i].find(closing) + { + if c > 0 { + components.push(Component::Text(&src[..c])); + } + *src = &src[c + closing.len()..]; + return Ok(Text(components)); + } + let is_linebreak = src[i..].starts_with('\n'); + if is_linebreak { + if remove_linebreaks { + if src[i + 1..].starts_with('\n') { + // keep one of the two linebreaks + components.push(Component::Text(&src[..i + 1])); + *src = &src[i + 2..]; + } else { + // remove the linebreak, put a space + components.push(Component::Text(&src[..i])); + components.push(Component::Char(' ')); + *src = &src[i + 1..]; + } + } else { + // keep the linebreak + components.push(Component::Text(&src[..i + 1])); + *src = &src[i + 1..]; + } + let indentation_bytes = src + .chars() + .take(remove_indentation.1) + .take_while(|ch| ch.is_whitespace() && *ch != '\n') + .map(|ch| ch.len_utf8()) + .sum(); + *src = &src[indentation_bytes..]; + continue; + } + if i > 0 { + components.push(Component::Text(&src[..i])); + } + *src = &src[i + 1..]; + match src + .chars() + .next() + .ok_or(ParseError(ParseErr::IncompleteBacktick, pos(src)))? + { + '`' => { + *src = &src[1..]; + components.push(Component::Char('`')); + } + ' ' => return Err(ParseError(ParseErr::IncompleteBacktick, pos(src))), + _ => { + let i = src + .chars() + .take_while(|&ch| { + !(ch.is_whitespace() + || closing.is_some_and(|(c, _)| c == ch) + || matches!(ch, '(' | '[' | '{' | '<')) + }) + .map(|ch| ch.len_utf8()) + .sum(); + if let Some(bracket) = src[i..] + .chars() + .next() + .filter(|&ch| !(ch.is_whitespace() || closing.is_some_and(|(c, _)| c == ch))) + { + let mut opts = &src[..i]; + *src = &src[i + bracket.len_utf8()..]; + let numbrackets = { + let i = opts + .find(|ch: char| ch.is_ascii_punctuation()) + .unwrap_or(opts.len()); + if i == 0 { + 1 + } else if let Some(num) = opts[..i].parse().ok().filter(|n| *n > 0) { + opts = &opts[i..]; + num + } else { + return Err(ParseError( + ParseErr::InvalidBacktickBracketCountSpecifier, + pos(src) - opts.len(), + )); + } + }; + let pos = pos(src); + let (closing, raw) = match bracket { + '(' => (')', false), + '[' => (']', false), + '{' => ('}', true), + '<' => ('>', true), + _ => unreachable!(), + }; + let mut pat = closing.to_string(); + if numbrackets > 1 { + pat = pat.repeat(numbrackets); + } + let mut inner = if raw { + if let Some(i) = src.find(&pat) { + let text = &src[..i]; + *src = &src[i + pat.len()..]; + Ok(Component::Text(text)) + } else { + return Err(ParseError(ParseErr::MissingClosing(closing), pos)); + } + } else { + Err(parse_text_impl(src, ptopts.closing(closing, &pat)) + .map_err(|ParseError(e, i)| ParseError(e, i + pos))?) + }; + while let Some(ch) = opts.chars().next() { + opts = &opts[ch.len_utf8()..]; + let opt = if let Some(i) = opts.find(|ch: char| ch.is_ascii_punctuation()) { + let opt = &opts[..i]; + opts = &opts[i..]; + opt + } else { + let opt = opts; + opts = ""; + opt + }; + match ch { + '-' if opt.is_empty() => { + inner = Ok(text(inner, Component::Note)); + } + '~' if opt.is_empty() => { + inner = Ok(text(inner, Component::Wrong)); + } + '_' if opt.is_empty() => { + inner = Ok(text(inner, Component::Special)); + } + '+' if opt.is_empty() => { + inner = Ok(text(inner, Component::Emphasis)); + } + '*' if opt.is_empty() => { + inner = Ok(text(inner, Component::Important)); + } + '/' if !opt.is_empty() => { + inner = Ok(text(inner, |v| Component::Link(opt, v))); + } + '\\' if raw && !opt.is_empty() => { + let mut inner = inner.as_mut().expect("because we are in raw mode"); + loop { + match inner { + Component::Text(raw) => { + *inner = Component::Ext(opt, raw); + break; + } + Component::Link(_, text) + | Component::Note(text) + | Component::Wrong(text) + | Component::Special(text) + | Component::Emphasis(text) + | Component::Important(text) => inner = &mut text.0[0], + Component::Char(_) | Component::Ext(_, _) => { + unreachable!() + } + } + } + } + _ => { + return Err(ParseError( + ParseErr::InvalidBacktickModifier, + pos - opts.len(), + )); + } + } + } + fn text<'a>( + inner: Result, Text<'a>>, + f: impl FnOnce(Text<'a>) -> Component<'a>, + ) -> Component<'a> { + match inner { + Ok(component) => f(Text(vec![component])), + Err(text) => f(text), + } + } + match inner { + Ok(component) => components.push(component), + Err(text) => components.extend(text.0), + } + } else { + return Err(ParseError(ParseErr::IncompleteBacktick, pos(src))); + } + } + } + } + if let Some((ch, closing)) = closing { + if let Some(c) = src.find(closing) { + if c > 0 { + components.push(Component::Text(&src[..c])); + } + *src = &src[c + closing.len()..]; + } else { + return Err(ParseError(ParseErr::MissingClosing(ch), pos(src))); + } + } else if !src.is_empty() { + components.push(Component::Text(&src[..])); + *src = ""; + } + Ok(Text(components)) +} + +// NOTE: for a `depth` of 0, this never returns `Ok(Err(_))`. +pub fn parse_document<'a>( + src: &mut &'a str, + min_depth: usize, +) -> Result, (usize, bool)>, ParseError<'a>> { + let mut src = Parser::new(src); + let (title_section, pos) = src.take_section()?; + let depth = match title_section.chars().take_while(|ch| *ch == '#').count() { + d if d >= min_depth => d, + d if d != 0 && d < min_depth => { + return Ok(Err( + if title_section[d..].starts_with(char::is_whitespace) { + // has a title, so go to parent of the depth we want + (d - 1, false) + } else { + // has no title, so continue using old section of that depth + (d, true) + }, + )); + } + _ => return Err(ParseError(ParseErr::InvalidHeadingDepth, pos)), + }; + if let Some(mut title) = if depth == 0 { + Some(title_section) + } else { + title_section[depth..].strip_prefix(char::is_whitespace) + } { + let link = if let Some(i) = title + .starts_with('/') + .then_some(1) + .or_else(|| title.rfind(" /").map(|i| i + 2)) + && let prelink = &title[..i.saturating_sub(2)] + && let link = title[i..].trim_end() + && !link.contains(char::is_whitespace) + { + if depth == 0 { + return Err(ParseError(ParseErr::FileTitleWithLink, pos)); + } + title = prelink; + link + } else { + "" + }; + let sections = match parse_sections(&mut src, depth, true) { + Ok(Ok(sections)) => sections, + Ok(Err(info)) => return Ok(Err(info)), + Err(e) => return Err(e), + }; + Ok(Ok(Document { + title: parse_text(&mut title, |o| o.remove_linebreaks()) + .map_err(|ParseError(e, i)| ParseError(e, i + pos + depth + 1))?, + depth, + link, + sections, + })) + } else { + Err(ParseError(ParseErr::InvalidSectionHash, pos)) + } +} + +fn parse_sections<'a>( + src: &mut Parser<'a, '_>, + depth: usize, + document: bool, +) -> Result>, (usize, bool)>, ParseError<'a>> { + let mut sections = Vec::new(); + 'sections: while let (mut section, pos) = src.take_section()? + && !section.is_empty() + { + if section.starts_with('#') { + let posthash = section.trim_start_matches('#'); + match posthash.chars().next() { + // => section hashes + Some(' ' | '\n') | None => { + if !document { + return Err(ParseError(ParseErr::SubsectionNotAllowedHere, pos)); + } + src.reset_to(pos); + match parse_document(src.get(), depth + 1) + .map_err(|ParseError(e, i)| ParseError(e, i + pos))? + { + Ok(child_document) => { + sections.push(Section::Document(child_document)); + } + Err((d, notitle)) => { + // a section of the same depth + if d == depth && notitle { + // terminated a subsection, + // continue parsing here + } else { + // pass to parent for parsing + src.reset_to(pos); + break 'sections; + } + } + } + } + Some('\\') => { + let (spec, inner) = posthash[1..].split_once('\n').unwrap_or((posthash, "")); + sections.push(Section::Ext(spec.strip_prefix(' ').unwrap_or(spec), inner)); + } + Some('!') => { + let (lang, mut inner) = + posthash[1..].split_once('\n').unwrap_or((posthash, "")); + sections.push(Section::Code( + lang.strip_prefix(' ').unwrap_or(lang), + parse_text(&mut inner, |o| o.default()) + .map_err(|ParseError(e, i)| ParseError(e, i + pos))?, + )); + } + Some('@') => { + let (attributed, mut inner) = + posthash[1..].split_once('\n').unwrap_or((posthash, "")); + let attributed = attributed.strip_prefix(' ').unwrap_or(attributed); + let (attributed, context) = + attributed.rsplit_once('@').unwrap_or((attributed, "")); + sections.push(Section::Quote( + attributed.strip_suffix(' ').unwrap_or(attributed), + context.strip_prefix(' ').unwrap_or(context), + parse_text(&mut inner, |o| o.remove_linebreaks()) + .map_err(|ParseError(e, i)| ParseError(e, i + pos))?, + )); + } + _ => return Err(ParseError(ParseErr::InvalidSectionHash, pos)), + } + } else if let Some(contents) = section.strip_prefix("` ") { + let mut pos = pos; + sections.push(Section::List( + contents + .split("\n` ") + .map(|mut elem| { + pos += 2; + let p = pos; + pos += elem.len(); + parse_text(&mut elem, |o| { + o.remove_linebreaks().remove_only_linebreak_indentation(2) + }) + .map_err(|ParseError(e, i)| ParseError(e, i + p)) + }) + .collect::, _>>()?, + )); + } else { + sections.push(Section::Paragraph( + parse_text(&mut section, |o| o.remove_linebreaks()) + .map_err(|ParseError(e, i)| ParseError(e, i + pos))?, + )); + } + } + Ok(Ok(sections)) +} + +struct Parser<'a, 'b>(&'b mut &'a str, &'a str); +impl<'a, 'b> Parser<'a, 'b> { + fn new(src: &'b mut &'a str) -> Self { + Self(src, src) + } + fn pos(&self) -> usize { + self.1.len() - self.0.len() + } + fn reset_to(&mut self, pos: usize) { + *self.0 = &self.1[pos..]; + } + fn get<'c>(&'c mut self) -> &'c mut &'a str { + self.0 + } + fn trim_empty_lines(&mut self) { + *self.0 = self.0.trim_start_matches('\n'); + } + fn take_section(&mut self) -> Result<(&'a str, usize), ParseError<'a>> { + self.trim_empty_lines(); + let start = self.pos(); + let brackets = self.0.chars().take_while(|ch| *ch == '[').count(); + if brackets > 0 { + if !self.0[brackets..].starts_with('\n') { + return Err(ParseError( + ParseErr::BracketsAtStartOfSectionWithNoLinebreakAfter, + start, + )); + } + *self.0 = &self.0[brackets..]; + self.trim_empty_lines(); + } + self.take_rest_of_section(brackets) + .map(|section| (section, start)) + .map_err(|e| ParseError(e, start)) + } + fn take_rest_of_section(&mut self, brackets: usize) -> Result<&'a str, ParseErr<'a>> { + if brackets == 0 { + if let Some(i) = self.0.find("\n\n") { + let out = &self.0[0..i]; + *self.0 = &self.0[i + 2..]; + Ok(out) + } else { + let out = &self.0[..]; + *self.0 = ""; + Ok(out) + } + } else { + let pat = "]".repeat(brackets) + "\n"; + let mut k = 0; + while let Some(i) = self.0[k..].find(&pat).map(|i| i + k) { + if self.0[0..i].ends_with('\n') && self.0[i + pat.len()..].starts_with('\n') { + let out = &self.0[0..i - 1]; + *self.0 = &self.0[i + pat.len() + 1..]; + return Ok(out); + } else if self.0[0..i].ends_with('\n') && self.0[i + pat.len()..].is_empty() { + let out = &self.0[0..i - 1]; + *self.0 = &self.0[i + pat.len()..]; + return Ok(out); + } else if self.0[0..i].ends_with(']') + && self.0[0..i].trim_end_matches(']').ends_with('\n') + { + return Err(ParseErr::BracketedSectionTerminatedByTooManyBrackets); + } else { + k = i + pat.len(); + } + } + Err(ParseErr::BracketedSectionNotTerminated) + } + } +} diff --git a/src/to/html.rs b/src/to/html.rs new file mode 100644 index 0000000..9e215ae --- /dev/null +++ b/src/to/html.rs @@ -0,0 +1,543 @@ +use std::{borrow::Cow, collections::BTreeMap, process::Command}; + +use crate::{ + doc::{Component, Document, Section, Text}, + to::{Spec, SpecsExt}, +}; + +pub struct State<'a> { + pub head_to_body: &'a str, + cache_docs: BTreeMap<&'a Document<'a>, String>, + cache_secs: BTreeMap<&'a Section<'a>, String>, +} +impl<'a> State<'a> { + pub fn new() -> Self { + State { + head_to_body: "\n", + cache_docs: Default::default(), + cache_secs: Default::default(), + } + } +} + +pub fn page<'a>(doc: &'a Document<'a>, specs: &[impl Spec], state: &mut State<'a>) -> String { + let mut out = String::new(); + page_to(doc, specs, state, &mut out); + out +} +pub fn page_to<'a>( + doc: &'a Document<'a>, + specs: &[impl Spec], + state: &mut State<'a>, + out: &mut String, +) { + let doc_start = format!( + r##" + + + + + +{} + +{} +"##, + escape(&crate::to::plain::text_minimal(&doc.title)), + state.head_to_body, + ); + if out.is_empty() { + *out = doc_start; + } else { + out.push_str(&doc_start); + } + doc_to(doc, specs, out, state); + out.push_str("\n\n"); +} + +struct DocArgs<'a, 'b> { + state: &'b mut State<'a>, + root: &'a Document<'a>, + depth: usize, + max_len: Option, +} +fn doc_to<'a>(doc: &'a Document<'a>, specs: &[impl Spec], out: &mut String, state: &mut State<'a>) { + doc_to_impl( + doc, + specs, + out, + &mut DocArgs { + state, + root: doc, + depth: 0, + max_len: None, + }, + ) +} +fn doc_preview_to<'a>( + doc: &'a Document<'a>, + root: &'a Document<'a>, + specs: &[impl Spec], + out: &mut String, + state: &mut State<'a>, +) { + doc_to_impl( + doc, + specs, + out, + &mut DocArgs { + state, + root, + depth: doc.depth, + max_len: Some(1024), + }, + ) +} +fn doc_to_impl<'a>( + doc: &'a Document<'a>, + specs: &[impl Spec], + out: &mut String, + doc_args: &mut DocArgs<'a, '_>, +) { + if let Some(prev) = doc_args.state.cache_docs.get(doc) { + out.push_str(prev); + return; + } + let out_start = out.len(); + let depth = doc_args.depth; + let end_at = doc_args.max_len.map(|len| out.len() + len); + for d in depth..=doc.depth { + out.push_str(&format!(r#"

{}"#, '\n')); + } + if !doc.title.is_empty() { + let a = if !doc.link.is_empty() && doc_args.max_len.is_none() { + let link = doc.link.replace(char::is_whitespace, "-"); + let link = escape_quot(&link); + format!( + r##"{}"##, + link, + link, + escape(doc.link), + ) + } else { + String::new() + }; + out.push_str(&format!( + r#"
"#, + doc.depth, + )); + text_to(&doc.title, specs, out, doc_args); + out.push_str(&format!("{a}
\n")); + } + for section in &doc.sections { + if let Some(prev) = doc_args.state.cache_secs.get(section) { + out.push_str(prev); + continue; + } + let section_start = out.len(); + if end_at.is_some_and(|len| out.len() > len) { + break; + } + out.push_str(&format!( + r#"
"y par", + Section::List(..) => "y list", + Section::Code(..) => "y code", + Section::Quote(..) => "y quote", + Section::Ext(..) => "y ext", + Section::Document(..) => "sub", + }, + )); + match section { + Section::Paragraph(text) => { + out.push_str(r#"">
"#); + text_to(text, specs, out, doc_args); + out.push_str("
"); + } + Section::List(list) => { + out.push_str(r#"">
    "#); + for elem in list { + out.push_str("
  • "); + text_to(elem, specs, out, doc_args); + out.push_str("
  • "); + } + out.push_str("
"); + } + Section::Code(lang, text) => { + out.push_str(&format!( + r#" l-{}">
{}
"#,
+                    escape_quot(&lang.replace(char::is_whitespace, "-")),
+                    escape(lang),
+                ));
+                text_to(text, specs, out, doc_args);
+                out.push_str("
"); + } + Section::Quote(attributed, context, text) => { + out.push_str(&format!( + r#" a-{} c-{}">
{}
"#, + escape_quot(&attributed.replace(char::is_whitespace, "-")), + escape_quot(&context.replace(char::is_whitespace, "-")), + escape(attributed), + )); + text_to(text, specs, out, doc_args); + out.push_str(&format!( + r#"
{}
"#, + escape(context), + )); + } + Section::Ext(spec @ "html", source) if specs.has(spec) => { + out.push_str(&format!( + r#" s3 spec e-{}">
{}
"#, + escape_quot(spec), + escape(spec), + )); + out.push_str(source); + out.push_str("
"); + } + Section::Ext(spec @ "texm", source) if specs.has(spec) => { + let l = out.len() + 2; + out.push_str(&format!( + r#" s0 spec e-{}">
{}
"#, + escape_quot(spec), + escape(spec), + )); + let status = texm_to(source, out); + out.replace_range(l..=l, status); + out.push_str("
"); + } + Section::Ext(spec, source) => { + out.push_str(&format!( + r#" nospec e-{}">
{}
"#, + escape_quot(spec), + escape(spec), + )); + out.push_str(&escape(source)); + out.push_str("
"); + } + Section::Document(inner) => { + out.push_str(r#"">"#); + let mut args = DocArgs { + state: doc_args.state, + root: doc_args.root, + max_len: doc_args.max_len.map(|v| v / 2), + depth: doc.depth + 1, + }; + doc_to_impl(inner, specs, out, &mut args); + } + } + out.push_str("
\n"); + doc_args + .state + .cache_secs + .insert(section, out[section_start..].to_owned()); + } + for _ in depth..=doc.depth { + out.push_str("
\n"); + } + doc_args + .state + .cache_docs + .insert(doc, out[out_start..].to_owned()); +} + +fn text_to<'a: 'b, 'b>( + text: &'a Text<'a>, + specs: &[impl Spec], + out: &mut String, + args: &mut DocArgs<'a, 'b>, +) { + for component in &text.0 { + match component { + Component::Char(ch) => escape_to(*ch, out), + Component::Text(text) => out.push_str(escape(text).as_ref()), + Component::Note(text) => { + out.push_str(r#""#); + text_to(text, specs, out, args); + out.push_str(""); + } + Component::Wrong(text) => { + out.push_str(r#""#); + text_to(text, specs, out, args); + out.push_str(""); + } + Component::Special(text) => { + out.push_str(r#""#); + text_to(text, specs, out, args); + out.push_str(""); + } + Component::Emphasis(text) => { + out.push_str(r#""#); + text_to(text, specs, out, args); + out.push_str(""); + } + Component::Important(text) => { + out.push_str(r#""#); + text_to(text, specs, out, args); + out.push_str(""); + } + Component::Link(target, text) => { + let link_target: Option<&'a Document<'a>> = args + .max_len + .is_none() + .then(|| args.root.find_link_target(target)) + .flatten(); + if link_target.is_some() { + out.push_str(r#""#); + } + out.push_str(&format!( + r##""##, + escape_quot(&target.replace(char::is_whitespace, "-")), + )); + text_to(text, specs, out, args); + out.push_str(&format!( + r#"[{}]"#, + escape(target), + )); + out.push_str(""); + if let Some(doc) = link_target { + out.push_str(r#"
"#); + doc_preview_to(doc, args.root, specs, out, args.state); + out.push_str("
"); + } + } + Component::Ext(spec, source) => match if specs.has(spec) { *spec } else { "" } { + "html" => { + out.push_str(&format!( + r#""#, + escape_quot(spec) + )); + out.push_str(source); + out.push_str(""); + } + "texm" => { + out.push_str(r#""#, escape_quot(spec))); + let status = texm_to(source, out); + out.replace_range(l..=l, status); + out.push_str(""); + } + _ => { + out.push_str("?["); + out.push_str(&escape(spec)); + out.push(':'); + out.push_str(source); + out.push_str(&escape(source)); + out.push_str("]?"); + } + }, + } + } +} + +fn escape_to(ch: char, out: &mut String) { + match ch { + '&' => out.push_str("&"), + '<' => out.push_str("<"), + '>' => out.push_str(">"), + ch => out.push(ch), + } +} + +fn escape(str: &str) -> Cow<'_, str> { + let mut out = Cow::Borrowed(str); + if out.contains('&') { + out = Cow::Owned(out.replace('&', "&")); + } + if out.contains('<') { + out = Cow::Owned(out.replace('<', "<")); + } + if out.contains('>') { + out = Cow::Owned(out.replace('<', ">")); + } + out +} + +fn escape_quot(str: &str) -> Cow<'_, str> { + let mut out = Cow::Borrowed(str); + if out.contains('"') { + out = Cow::Owned(out.replace('"', "\\\"")); + } + if out.contains('\'') { + out = Cow::Owned(out.replace('\'', "\\'")); + } + out +} + +fn texm_to(source: &str, out: &mut String) -> &'static str { + if let Ok(res) = Command::new("latex2mathml").args(["-t", source]).output() { + if res.status.success() { + out.push_str(String::from_utf8_lossy(&res.stdout).trim()); + "5" + } else { + out.push('$'); + out.push_str(&escape(source)); + out.push_str("$["); + out.push_str(&escape(String::from_utf8_lossy(&res.stderr).trim())); + out.push(']'); + "1" + } + } else { + out.push('$'); + out.push_str(&escape(source)); + out.push('$'); + "1" + } +} diff --git a/src/to/mod.rs b/src/to/mod.rs new file mode 100644 index 0000000..c0e60cf --- /dev/null +++ b/src/to/mod.rs @@ -0,0 +1,13 @@ +pub mod html; +pub mod plain; + +pub trait Spec: AsRef {} +pub trait SpecsExt { + fn has(&self, spec: &str) -> bool; +} +impl SpecsExt for [T] { + fn has(&self, spec: &str) -> bool { + self.iter().any(|s| s.as_ref() == spec) + } +} +impl> Spec for T {} diff --git a/src/to/plain.rs b/src/to/plain.rs new file mode 100644 index 0000000..9dfa1c2 --- /dev/null +++ b/src/to/plain.rs @@ -0,0 +1,98 @@ +use crate::{ + doc::{Component, Text}, + to::{Spec, SpecsExt}, +}; + +// pub fn page(doc: &Document<'_>, specs: &Specs) -> String { +// } + +// fn doc_to(doc: &Document<'_>, depth: usize, specs: &Specs, out: &mut String) { +// } + +pub fn text_minimal(text: &Text<'_>) -> String { + let mut out = String::new(); + let specs: &'static [&'static str] = &[]; + text_to(text, false, specs, &mut out); + out +} + +fn text_to(text: &Text<'_>, complete: bool, specs: &[impl Spec], out: &mut String) { + for component in &text.0 { + match component { + Component::Char(ch) => out.push(*ch), + Component::Text(text) => out.push_str(text), + Component::Note(_) => { + if complete { + out.push('-'); + text_to(text, complete, specs, out); + out.push('-'); + } + } + Component::Wrong(text) => { + if complete { + out.push('~'); + text_to(text, complete, specs, out); + out.push('~'); + } + } + Component::Special(text) => { + if complete { + out.push('_'); + } + text_to(text, complete, specs, out); + if complete { + out.push('_'); + } + } + Component::Emphasis(text) => { + if complete { + out.push('*'); + } + text_to(text, complete, specs, out); + if complete { + out.push('*'); + } + } + Component::Important(text) => { + if complete { + out.push_str("**"); + } else { + out.push('*'); + } + text_to(text, complete, specs, out); + if complete { + out.push_str("**"); + } else { + out.push('*'); + } + } + Component::Link(target, text) => { + if complete { + out.push('_'); + } + text_to(text, complete, specs, out); + if complete { + out.push_str("_ (→ "); + out.push_str(target); + out.push(')'); + } + } + Component::Ext(spec, source) => { + if complete { + #[allow(clippy::match_single_binding)] + match if specs.has(spec) { *spec } else { "" } { + _ => { + out.push_str("?["); + out.push_str(spec); + out.push(':'); + out.push_str(source); + out.push_str("]?"); + } + } + } else { + out.push('…'); + } + } + } + } +}