use anyhow::{anyhow, bail};
use std::{
io::{BufRead, Lines},
iter::Peekable,
};
const LISTING_DELIMITER: &'static str = "----";
const IMAGE_BLOCK_PREFIX: &'static str = "image::";
const VIDEO_BLOCK_PREFIX: &'static str = "video::";
struct Converter<'a, 'b, R: BufRead> {
iter: &'a mut Peekable<Lines<R>>,
output: &'b mut String,
}
impl<'a, 'b, R: BufRead> Converter<'a, 'b, R> {
fn new(iter: &'a mut Peekable<Lines<R>>, output: &'b mut String) -> Self {
Self { iter, output }
}
fn process(&mut self) -> anyhow::Result<()> {
self.process_document_header()?;
self.skip_blank_lines()?;
self.output.push('\n');
loop {
let line = self.iter.peek().unwrap().as_deref().map_err(|e| anyhow!("{e}"))?;
if get_title(line).is_some() {
let line = self.iter.next().unwrap().unwrap();
let (level, title) = get_title(&line).unwrap();
self.write_title(level, title);
} else if get_list_item(line).is_some() {
self.process_list()?;
} else if line.starts_with('[') {
self.process_source_code_block(0)?;
} else if line.starts_with(LISTING_DELIMITER) {
self.process_listing_block(None, 0)?;
} else if line.starts_with('.') {
self.process_block_with_title(0)?;
} else if line.starts_with(IMAGE_BLOCK_PREFIX) {
self.process_image_block(None, 0)?;
} else if line.starts_with(VIDEO_BLOCK_PREFIX) {
self.process_video_block(None, 0)?;
} else {
self.process_paragraph(0, |line| line.is_empty())?;
}
self.skip_blank_lines()?;
if self.iter.peek().is_none() {
break;
}
self.output.push('\n');
}
Ok(())
}
fn process_document_header(&mut self) -> anyhow::Result<()> {
self.process_document_title()?;
while let Some(line) = self.iter.next() {
let line = line?;
if line.is_empty() {
break;
}
if !line.starts_with(':') {
self.write_line(&line, 0)
}
}
Ok(())
}
fn process_document_title(&mut self) -> anyhow::Result<()> {
if let Some(Ok(line)) = self.iter.next() {
if let Some((level, title)) = get_title(&line) {
if level == 1 {
self.write_title(level, title);
return Ok(());
}
}
}
bail!("document title not found")
}
fn process_list(&mut self) -> anyhow::Result<()> {
let mut nesting = ListNesting::new();
while let Some(line) = self.iter.peek() {
let line = line.as_deref().map_err(|e| anyhow!("{e}"))?;
if get_list_item(&line).is_some() {
let line = self.iter.next().unwrap()?;
let (marker, item) = get_list_item(&line).unwrap();
nesting.set_current(marker);
self.write_list_item(item, &nesting);
self.process_paragraph(nesting.indent(), |line| {
line.is_empty() || get_list_item(line).is_some() || line == "+"
})?;
} else if line == "+" {
let _ = self.iter.next().unwrap()?;
let line = self
.iter
.peek()
.ok_or_else(|| anyhow!("list continuation unexpectedly terminated"))?;
let line = line.as_deref().map_err(|e| anyhow!("{e}"))?;
let indent = nesting.indent();
if line.starts_with('[') {
self.write_line("", 0);
self.process_source_code_block(indent)?;
} else if line.starts_with(LISTING_DELIMITER) {
self.write_line("", 0);
self.process_listing_block(None, indent)?;
} else if line.starts_with('.') {
self.write_line("", 0);
self.process_block_with_title(indent)?;
} else if line.starts_with(IMAGE_BLOCK_PREFIX) {
self.write_line("", 0);
self.process_image_block(None, indent)?;
} else if line.starts_with(VIDEO_BLOCK_PREFIX) {
self.write_line("", 0);
self.process_video_block(None, indent)?;
} else {
self.write_line("", 0);
let current = nesting.current().unwrap();
self.process_paragraph(indent, |line| {
line.is_empty()
|| get_list_item(line).filter(|(m, _)| m == current).is_some()
|| line == "+"
})?;
}
} else {
break;
}
self.skip_blank_lines()?;
}
Ok(())
}
fn process_source_code_block(&mut self, level: usize) -> anyhow::Result<()> {
if let Some(Ok(line)) = self.iter.next() {
if let Some(styles) = line.strip_prefix("[source").and_then(|s| s.strip_suffix(']')) {
let mut styles = styles.split(',');
if !styles.next().unwrap().is_empty() {
bail!("not a source code block");
}
let language = styles.next();
return self.process_listing_block(language, level);
}
}
bail!("not a source code block")
}
fn process_listing_block(&mut self, style: Option<&str>, level: usize) -> anyhow::Result<()> {
if let Some(Ok(line)) = self.iter.next() {
if line == LISTING_DELIMITER {
self.write_indent(level);
self.output.push_str("```");
if let Some(style) = style {
self.output.push_str(style);
}
self.output.push('\n');
while let Some(line) = self.iter.next() {
let line = line?;
if line == LISTING_DELIMITER {
self.write_line("```", level);
return Ok(());
} else {
self.write_line(&line, level);
}
}
bail!("listing block is not terminated")
}
}
bail!("not a listing block")
}
fn process_block_with_title(&mut self, level: usize) -> anyhow::Result<()> {
if let Some(Ok(line)) = self.iter.next() {
let title =
line.strip_prefix('.').ok_or_else(|| anyhow!("extraction of the title failed"))?;
let line = self
.iter
.peek()
.ok_or_else(|| anyhow!("target block for the title is not found"))?;
let line = line.as_deref().map_err(|e| anyhow!("{e}"))?;
if line.starts_with(IMAGE_BLOCK_PREFIX) {
return self.process_image_block(Some(title), level);
} else if line.starts_with(VIDEO_BLOCK_PREFIX) {
return self.process_video_block(Some(title), level);
} else {
bail!("title for that block type is not supported");
}
}
bail!("not a title")
}
fn process_image_block(&mut self, caption: Option<&str>, level: usize) -> anyhow::Result<()> {
if let Some(Ok(line)) = self.iter.next() {
if let Some((url, attrs)) = parse_media_block(&line, IMAGE_BLOCK_PREFIX) {
let alt = if let Some(stripped) =
attrs.strip_prefix('"').and_then(|s| s.strip_suffix('"'))
{
stripped
} else {
attrs
};
if let Some(caption) = caption {
self.write_caption_line(caption, level);
}
self.write_indent(level);
self.output.push_str(";
self.output.push_str(url);
self.output.push_str(")\n");
return Ok(());
}
}
bail!("not a image block")
}
fn process_video_block(&mut self, caption: Option<&str>, level: usize) -> anyhow::Result<()> {
if let Some(Ok(line)) = self.iter.next() {
if let Some((url, attrs)) = parse_media_block(&line, VIDEO_BLOCK_PREFIX) {
let html_attrs = match attrs {
"options=loop" => "controls loop",
r#"options="autoplay,loop""# => "autoplay controls loop",
_ => bail!("unsupported video syntax"),
};
if let Some(caption) = caption {
self.write_caption_line(caption, level);
}
self.write_indent(level);
self.output.push_str(r#"<video src=""#);
self.output.push_str(url);
self.output.push_str(r#"" "#);
self.output.push_str(html_attrs);
self.output.push_str(">Your browser does not support the video tag.</video>\n");
return Ok(());
}
}
bail!("not a video block")
}
fn process_paragraph<P>(&mut self, level: usize, predicate: P) -> anyhow::Result<()>
where
P: Fn(&str) -> bool,
{
while let Some(line) = self.iter.peek() {
let line = line.as_deref().map_err(|e| anyhow!("{e}"))?;
if predicate(&line) {
break;
}
self.write_indent(level);
let line = self.iter.next().unwrap()?;
let line = line.trim_start();
if line.ends_with('+') {
let line = &line[..(line.len() - 1)];
self.output.push_str(line);
self.output.push('\\');
} else {
self.output.push_str(&line);
}
self.output.push('\n');
}
Ok(())
}
fn skip_blank_lines(&mut self) -> anyhow::Result<()> {
while let Some(line) = self.iter.peek() {
if !line.as_deref().unwrap().is_empty() {
break;
}
self.iter.next().unwrap()?;
}
Ok(())
}
fn write_title(&mut self, indent: usize, title: &str) {
for _ in 0..indent {
self.output.push('#');
}
self.output.push(' ');
self.output.push_str(title);
self.output.push('\n');
}
fn write_list_item(&mut self, item: &str, nesting: &ListNesting) {
let (marker, indent) = nesting.marker();
self.write_indent(indent);
self.output.push_str(marker);
self.output.push_str(item);
self.output.push('\n');
}
fn write_caption_line(&mut self, caption: &str, indent: usize) {
self.write_indent(indent);
self.output.push('_');
self.output.push_str(caption);
self.output.push_str("_\\\n");
}
fn write_indent(&mut self, indent: usize) {
for _ in 0..indent {
self.output.push(' ');
}
}
fn write_line(&mut self, line: &str, indent: usize) {
self.write_indent(indent);
self.output.push_str(line);
self.output.push('\n');
}
}
pub(crate) fn convert_asciidoc_to_markdown<R>(input: R) -> anyhow::Result<String>
where
R: BufRead,
{
let mut output = String::new();
let mut iter = input.lines().peekable();
let mut converter = Converter::new(&mut iter, &mut output);
converter.process()?;
Ok(output)
}
fn get_title(line: &str) -> Option<(usize, &str)> {
strip_prefix_symbol(line, '=')
}
fn get_list_item(line: &str) -> Option<(ListMarker, &str)> {
const HYPHYEN_MARKER: &'static str = "- ";
if let Some(text) = line.strip_prefix(HYPHYEN_MARKER) {
Some((ListMarker::Hyphen, text))
} else if let Some((count, text)) = strip_prefix_symbol(line, '*') {
Some((ListMarker::Asterisk(count), text))
} else if let Some((count, text)) = strip_prefix_symbol(line, '.') {
Some((ListMarker::Dot(count), text))
} else {
None
}
}
fn strip_prefix_symbol(line: &str, symbol: char) -> Option<(usize, &str)> {
let mut iter = line.chars();
if iter.next()? != symbol {
return None;
}
let mut count = 1;
loop {
match iter.next() {
Some(ch) if ch == symbol => {
count += 1;
}
Some(' ') => {
break;
}
_ => return None,
}
}
Some((count, iter.as_str()))
}
fn parse_media_block<'a>(line: &'a str, prefix: &str) -> Option<(&'a str, &'a str)> {
if let Some(line) = line.strip_prefix(prefix) {
if let Some((url, rest)) = line.split_once('[') {
if let Some(attrs) = rest.strip_suffix(']') {
return Some((url, attrs));
}
}
}
None
}
#[derive(Debug)]
struct ListNesting(Vec<ListMarker>);
impl ListNesting {
fn new() -> Self {
Self(Vec::<ListMarker>::with_capacity(6))
}
fn current(&mut self) -> Option<&ListMarker> {
self.0.last()
}
fn set_current(&mut self, marker: ListMarker) {
let Self(markers) = self;
if let Some(index) = markers.iter().position(|m| *m == marker) {
markers.truncate(index + 1);
} else {
markers.push(marker);
}
}
fn indent(&self) -> usize {
self.0.iter().map(|m| m.in_markdown().len()).sum()
}
fn marker(&self) -> (&str, usize) {
let Self(markers) = self;
let indent = markers.iter().take(markers.len() - 1).map(|m| m.in_markdown().len()).sum();
let marker = match markers.last() {
None => "",
Some(marker) => marker.in_markdown(),
};
(marker, indent)
}
}
#[derive(Debug, PartialEq, Eq)]
enum ListMarker {
Asterisk(usize),
Hyphen,
Dot(usize),
}
impl ListMarker {
fn in_markdown(&self) -> &str {
match self {
ListMarker::Asterisk(_) => "- ",
ListMarker::Hyphen => "- ",
ListMarker::Dot(_) => "1. ",
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_asciidoc_to_markdown_conversion() {
let input = "\
= Changelog #256
:sectanchors:
:page-layout: post
Hello!
Commit: commit:0123456789abcdef0123456789abcdef01234567[] +
Release: release:2022-01-01[]
== New Features
* pr:1111[] foo bar baz
- hyphen-prefixed list item
* nested list item
** `foo` -> `foofoo`
** `bar` -> `barbar`
* listing in the secondary level
. install
. add to config
+
[source,json]
----
{\"foo\":\"bar\"}
----
* list item with continuation
+
image::https://example.com/animation.gif[]
+
image::https://example.com/animation.gif[\"alt text\"]
+
video::https://example.com/movie.mp4[options=loop]
+
video::https://example.com/movie.mp4[options=\"autoplay,loop\"]
+
.Image
image::https://example.com/animation.gif[]
+
.Video
video::https://example.com/movie.mp4[options=loop]
+
[source,bash]
----
rustup update nightly
----
+
----
This is a plain listing.
----
* single line item followed by empty lines
* multiline list
item followed by empty lines
* multiline list
item with indent
* multiline list
item not followed by empty lines
* multiline list
item followed by different marker
** foo
** bar
* multiline list
item followed by list continuation
+
paragraph
paragraph
== Another Section
* foo bar baz
* foo bar baz
The highlight of the month is probably pr:1111[].
[source,bash]
----
rustup update nightly
----
[source]
----
rustup update nightly
----
----
This is a plain listing.
----
";
let expected = "\
# Changelog #256
Hello!
Commit: commit:0123456789abcdef0123456789abcdef01234567[] \\
Release: release:2022-01-01[]
## New Features
- pr:1111[] foo bar baz
- hyphen-prefixed list item
- nested list item
- `foo` -> `foofoo`
- `bar` -> `barbar`
- listing in the secondary level
1. install
1. add to config
```json
{\"foo\":\"bar\"}
```
- list item with continuation


<video src=\"https://example.com/movie.mp4\" controls loop>Your browser does not support the video tag.</video>
<video src=\"https://example.com/movie.mp4\" autoplay controls loop>Your browser does not support the video tag.</video>
_Image_\\

_Video_\\
<video src=\"https://example.com/movie.mp4\" controls loop>Your browser does not support the video tag.</video>
```bash
rustup update nightly
```
```
This is a plain listing.
```
- single line item followed by empty lines
- multiline list
item followed by empty lines
- multiline list
item with indent
- multiline list
item not followed by empty lines
- multiline list
item followed by different marker
- foo
- bar
- multiline list
item followed by list continuation
paragraph
paragraph
## Another Section
- foo bar baz
- foo bar baz
The highlight of the month is probably pr:1111[].
```bash
rustup update nightly
```
```
rustup update nightly
```
```
This is a plain listing.
```
";
let actual = convert_asciidoc_to_markdown(std::io::Cursor::new(input)).unwrap();
assert_eq!(actual, expected);
}
}