use anyhow::{anyhow, bail}; use std::{ borrow::Cow, io::{BufRead, Lines}, iter::Peekable, }; const LISTING_DELIMITER: &str = "----"; const IMAGE_BLOCK_PREFIX: &str = "image::"; const VIDEO_BLOCK_PREFIX: &str = "video::"; struct Converter<'a, 'b, R: BufRead> { iter: &'a mut Peekable>, output: &'b mut String, } impl<'a, 'b, R: BufRead> Converter<'a, 'b, R> { fn new(iter: &'a mut Peekable>, output: &'b mut String) -> Self { Self { iter, output } } fn process(&mut self) -> anyhow::Result<()> { self.process_document_header()?; self.skip_blank_lines()?; self.output.push('\n'); loop { let line = self.iter.peek().unwrap().as_deref().map_err(|e| anyhow!("{e}"))?; if get_title(line).is_some() { let line = self.iter.next().unwrap().unwrap(); let (level, title) = get_title(&line).unwrap(); self.write_title(level, title); } else if get_list_item(line).is_some() { self.process_list()?; } else if line.starts_with('[') { self.process_source_code_block(0)?; } else if line.starts_with(LISTING_DELIMITER) { self.process_listing_block(None, 0)?; } else if line.starts_with('.') { self.process_block_with_title(0)?; } else if line.starts_with(IMAGE_BLOCK_PREFIX) { self.process_image_block(None, 0)?; } else if line.starts_with(VIDEO_BLOCK_PREFIX) { self.process_video_block(None, 0)?; } else { self.process_paragraph(0, |line| line.is_empty())?; } self.skip_blank_lines()?; if self.iter.peek().is_none() { break; } self.output.push('\n'); } Ok(()) } fn process_document_header(&mut self) -> anyhow::Result<()> { self.process_document_title()?; while let Some(line) = self.iter.next() { let line = line?; if line.is_empty() { break; } if !line.starts_with(':') { self.write_line(&line, 0) } } Ok(()) } fn process_document_title(&mut self) -> anyhow::Result<()> { if let Some(Ok(line)) = self.iter.next() && let Some((level, title)) = get_title(&line) { let title = process_inline_macros(title)?; if level == 1 { self.write_title(level, &title); return Ok(()); } } bail!("document title not found") } fn process_list(&mut self) -> anyhow::Result<()> { let mut nesting = ListNesting::default(); while let Some(line) = self.iter.peek() { let line = line.as_deref().map_err(|e| anyhow!("{e}"))?; if get_list_item(line).is_some() { let line = self.iter.next().unwrap()?; let line = process_inline_macros(&line)?; let (marker, item) = get_list_item(&line).unwrap(); nesting.set_current(marker); self.write_list_item(item, &nesting); self.process_paragraph(nesting.indent(), |line| { line.is_empty() || get_list_item(line).is_some() || line == "+" })?; } else if line == "+" { let _ = self.iter.next().unwrap()?; let line = self .iter .peek() .ok_or_else(|| anyhow!("list continuation unexpectedly terminated"))?; let line = line.as_deref().map_err(|e| anyhow!("{e}"))?; let indent = nesting.indent(); if line.starts_with('[') { self.write_line("", 0); self.process_source_code_block(indent)?; } else if line.starts_with(LISTING_DELIMITER) { self.write_line("", 0); self.process_listing_block(None, indent)?; } else if line.starts_with('.') { self.write_line("", 0); self.process_block_with_title(indent)?; } else if line.starts_with(IMAGE_BLOCK_PREFIX) { self.write_line("", 0); self.process_image_block(None, indent)?; } else if line.starts_with(VIDEO_BLOCK_PREFIX) { self.write_line("", 0); self.process_video_block(None, indent)?; } else { self.write_line("", 0); let current = nesting.current().unwrap(); self.process_paragraph(indent, |line| { line.is_empty() || get_list_item(line).filter(|(m, _)| m == current).is_some() || line == "+" })?; } } else { break; } self.skip_blank_lines()?; } Ok(()) } fn process_source_code_block(&mut self, level: usize) -> anyhow::Result<()> { if let Some(Ok(line)) = self.iter.next() && let Some(styles) = line.strip_prefix("[source").and_then(|s| s.strip_suffix(']')) { let mut styles = styles.split(','); if !styles.next().unwrap().is_empty() { bail!("not a source code block"); } let language = styles.next(); return self.process_listing_block(language, level); } bail!("not a source code block") } fn process_listing_block(&mut self, style: Option<&str>, level: usize) -> anyhow::Result<()> { if let Some(Ok(line)) = self.iter.next() && line == LISTING_DELIMITER { self.write_indent(level); self.output.push_str("```"); if let Some(style) = style { self.output.push_str(style); } self.output.push('\n'); while let Some(line) = self.iter.next() { let line = line?; if line == LISTING_DELIMITER { self.write_line("```", level); return Ok(()); } else { self.write_line(&line, level); } } bail!("listing block is not terminated") } bail!("not a listing block") } fn process_block_with_title(&mut self, level: usize) -> anyhow::Result<()> { if let Some(Ok(line)) = self.iter.next() { let title = line.strip_prefix('.').ok_or_else(|| anyhow!("extraction of the title failed"))?; let line = self .iter .peek() .ok_or_else(|| anyhow!("target block for the title is not found"))?; let line = line.as_deref().map_err(|e| anyhow!("{e}"))?; if line.starts_with(IMAGE_BLOCK_PREFIX) { return self.process_image_block(Some(title), level); } else if line.starts_with(VIDEO_BLOCK_PREFIX) { return self.process_video_block(Some(title), level); } else { bail!("title for that block type is not supported"); } } bail!("not a title") } fn process_image_block(&mut self, caption: Option<&str>, level: usize) -> anyhow::Result<()> { if let Some(Ok(line)) = self.iter.next() && let Some((url, attrs)) = parse_media_block(&line, IMAGE_BLOCK_PREFIX) { let alt = if let Some(stripped) = attrs.strip_prefix('"').and_then(|s| s.strip_suffix('"')) { stripped } else { attrs }; if let Some(caption) = caption { self.write_caption_line(caption, level); } self.write_indent(level); self.output.push_str("!["); self.output.push_str(alt); self.output.push_str("]("); self.output.push_str(url); self.output.push_str(")\n"); return Ok(()); } bail!("not a image block") } fn process_video_block(&mut self, caption: Option<&str>, level: usize) -> anyhow::Result<()> { if let Some(Ok(line)) = self.iter.next() && let Some((url, attrs)) = parse_media_block(&line, VIDEO_BLOCK_PREFIX) { let html_attrs = match attrs { "options=loop" => "controls loop", r#"options="autoplay,loop""# => "autoplay controls loop", _ => bail!("unsupported video syntax"), }; if let Some(caption) = caption { self.write_caption_line(caption, level); } self.write_indent(level); self.output.push_str(r#"\n"); return Ok(()); } bail!("not a video block") } fn process_paragraph

(&mut self, level: usize, predicate: P) -> anyhow::Result<()> where P: Fn(&str) -> bool, { while let Some(line) = self.iter.peek() { let line = line.as_deref().map_err(|e| anyhow!("{e}"))?; if predicate(line) { break; } self.write_indent(level); let line = self.iter.next().unwrap()?; let line = line.trim_start(); let line = process_inline_macros(line)?; if let Some(stripped) = line.strip_suffix('+') { self.output.push_str(stripped); self.output.push('\\'); } else { self.output.push_str(&line); } self.output.push('\n'); } Ok(()) } fn skip_blank_lines(&mut self) -> anyhow::Result<()> { while let Some(line) = self.iter.peek() { if !line.as_deref().unwrap().is_empty() { break; } self.iter.next().unwrap()?; } Ok(()) } fn write_title(&mut self, indent: usize, title: &str) { for _ in 0..indent { self.output.push('#'); } self.output.push(' '); self.output.push_str(title); self.output.push('\n'); } fn write_list_item(&mut self, item: &str, nesting: &ListNesting) { let (marker, indent) = nesting.marker(); self.write_indent(indent); self.output.push_str(marker); self.output.push_str(item); self.output.push('\n'); } fn write_caption_line(&mut self, caption: &str, indent: usize) { self.write_indent(indent); self.output.push('_'); self.output.push_str(caption); self.output.push_str("_\\\n"); } fn write_indent(&mut self, indent: usize) { for _ in 0..indent { self.output.push(' '); } } fn write_line(&mut self, line: &str, indent: usize) { self.write_indent(indent); self.output.push_str(line); self.output.push('\n'); } } pub(crate) fn convert_asciidoc_to_markdown(input: R) -> anyhow::Result where R: BufRead, { let mut output = String::new(); let mut iter = input.lines().peekable(); let mut converter = Converter::new(&mut iter, &mut output); converter.process()?; Ok(output) } fn get_title(line: &str) -> Option<(usize, &str)> { strip_prefix_symbol(line, '=') } fn get_list_item(line: &str) -> Option<(ListMarker, &str)> { const HYPHEN_MARKER: &str = "- "; if let Some(text) = line.strip_prefix(HYPHEN_MARKER) { Some((ListMarker::Hyphen, text)) } else if let Some((count, text)) = strip_prefix_symbol(line, '*') { Some((ListMarker::Asterisk(count), text)) } else if let Some((count, text)) = strip_prefix_symbol(line, '.') { Some((ListMarker::Dot(count), text)) } else { None } } fn strip_prefix_symbol(line: &str, symbol: char) -> Option<(usize, &str)> { let mut iter = line.chars(); if iter.next()? != symbol { return None; } let mut count = 1; loop { match iter.next() { Some(ch) if ch == symbol => { count += 1; } Some(' ') => { break; } _ => return None, } } Some((count, iter.as_str())) } fn parse_media_block<'a>(line: &'a str, prefix: &str) -> Option<(&'a str, &'a str)> { if let Some(line) = line.strip_prefix(prefix) && let Some((url, rest)) = line.split_once('[') && let Some(attrs) = rest.strip_suffix(']') { return Some((url, attrs)); } None } #[derive(Debug)] struct ListNesting(Vec); impl ListNesting { fn current(&mut self) -> Option<&ListMarker> { self.0.last() } fn set_current(&mut self, marker: ListMarker) { let Self(markers) = self; if let Some(index) = markers.iter().position(|m| *m == marker) { markers.truncate(index + 1); } else { markers.push(marker); } } fn indent(&self) -> usize { self.0.iter().map(|m| m.in_markdown().len()).sum() } fn marker(&self) -> (&str, usize) { let Self(markers) = self; let indent = markers.iter().take(markers.len() - 1).map(|m| m.in_markdown().len()).sum(); let marker = match markers.last() { None => "", Some(marker) => marker.in_markdown(), }; (marker, indent) } } impl Default for ListNesting { fn default() -> Self { Self(Vec::::with_capacity(6)) } } #[derive(Debug, PartialEq, Eq)] enum ListMarker { Asterisk(usize), Hyphen, Dot(usize), } impl ListMarker { fn in_markdown(&self) -> &str { match self { ListMarker::Asterisk(_) => "- ", ListMarker::Hyphen => "- ", ListMarker::Dot(_) => "1. ", } } } fn process_inline_macros(line: &str) -> anyhow::Result> { let mut chars = line.char_indices(); loop { let (start, end, a_macro) = match get_next_line_component(&mut chars) { Component::None => break, Component::Text => continue, Component::Macro(s, e, m) => (s, e, m), }; let mut src = line.chars(); let mut processed = String::new(); for _ in 0..start { processed.push(src.next().unwrap()); } processed.push_str(a_macro.process()?.as_str()); for _ in start..end { let _ = src.next().unwrap(); } let mut pos = end; loop { let (start, end, a_macro) = match get_next_line_component(&mut chars) { Component::None => break, Component::Text => continue, Component::Macro(s, e, m) => (s, e, m), }; for _ in pos..start { processed.push(src.next().unwrap()); } processed.push_str(a_macro.process()?.as_str()); for _ in start..end { let _ = src.next().unwrap(); } pos = end; } for ch in src { processed.push(ch); } return Ok(Cow::Owned(processed)); } Ok(Cow::Borrowed(line)) } fn get_next_line_component(chars: &mut std::str::CharIndices<'_>) -> Component { let (start, mut macro_name) = match chars.next() { None => return Component::None, Some((_, ch)) if ch == ' ' || !ch.is_ascii() => return Component::Text, Some((pos, ch)) => (pos, String::from(ch)), }; loop { match chars.next() { None => return Component::None, Some((_, ch)) if ch == ' ' || !ch.is_ascii() => return Component::Text, Some((_, ':')) => break, Some((_, ch)) => macro_name.push(ch), } } let mut macro_target = String::new(); loop { match chars.next() { None => return Component::None, Some((_, ' ')) => return Component::Text, Some((_, '[')) => break, Some((_, ch)) => macro_target.push(ch), } } let mut attr_value = String::new(); let end = loop { match chars.next() { None => return Component::None, Some((pos, ']')) => break pos + 1, Some((_, ch)) => attr_value.push(ch), } }; Component::Macro(start, end, Macro::new(macro_name, macro_target, attr_value)) } enum Component { None, Text, Macro(usize, usize, Macro), } struct Macro { name: String, target: String, attrs: String, } impl Macro { fn new(name: String, target: String, attrs: String) -> Self { Self { name, target, attrs } } fn process(&self) -> anyhow::Result { let name = &self.name; let text = match name.as_str() { "https" => { let url = &self.target; let anchor_text = &self.attrs; format!("[{anchor_text}](https:{url})") } "image" => { let url = &self.target; let alt = &self.attrs; format!("![{alt}]({url})") } "kbd" => { let keys = self.attrs.split('+').map(|k| Cow::Owned(format!("{k}"))); keys.collect::>().join("+") } "pr" => { let pr = &self.target; let url = format!("https://github.com/rust-lang/rust-analyzer/pull/{pr}"); format!("[`#{pr}`]({url})") } "commit" => { let hash = &self.target; let short = &hash[0..7]; let url = format!("https://github.com/rust-lang/rust-analyzer/commit/{hash}"); format!("[`{short}`]({url})") } "release" => { let date = &self.target; let url = format!("https://github.com/rust-lang/rust-analyzer/releases/{date}"); format!("[`{date}`]({url})") } _ => bail!("macro not supported: {name}"), }; Ok(text) } } #[cfg(test)] mod tests { use super::*; use std::fs::read_to_string; #[test] fn test_asciidoc_to_markdown_conversion() { let input = read_to_string("test_data/input.adoc").unwrap(); let expected = read_to_string("test_data/expected.md").unwrap(); let actual = convert_asciidoc_to_markdown(std::io::Cursor::new(&input)).unwrap(); assert_eq!(actual, expected); } macro_rules! test_inline_macro_processing { ($(( $name:ident, $input:expr, $expected:expr ),)*) => ($( #[test] fn $name() { let input = $input; let actual = process_inline_macros(&input).unwrap(); let expected = $expected; assert_eq!(actual, expected) } )*); } test_inline_macro_processing! { (inline_macro_processing_for_empty_line, "", ""), (inline_macro_processing_for_line_with_no_macro, "foo bar", "foo bar"), ( inline_macro_processing_for_macro_in_line_start, "kbd::[Ctrl+T] foo", "Ctrl+T foo" ), ( inline_macro_processing_for_macro_in_line_end, "foo kbd::[Ctrl+T]", "foo Ctrl+T" ), ( inline_macro_processing_for_macro_in_the_middle_of_line, "foo kbd::[Ctrl+T] foo", "foo Ctrl+T foo" ), ( inline_macro_processing_for_several_macros, "foo kbd::[Ctrl+T] foo kbd::[Enter] foo", "foo Ctrl+T foo Enter foo" ), ( inline_macro_processing_for_several_macros_without_text_in_between, "foo kbd::[Ctrl+T]kbd::[Enter] foo", "foo Ctrl+TEnter foo" ), } }