//! Markdown grammar for supports CommonMark and GFM. item = _{ SOI ~ line* ~ EOI } line = _{ expr | newline } expr = _{ comment | html | meta_info | block | inline | td_tag } /// Matches a CJK character CJK = { HAN | HANGUL | KATAKANA | HIRAGANA | BOPOMOFO } /// Block elements, e.g.: paragraphs, lists, code blocks, etc. block = ${ meta_tags | hr | list | codeblock | block_item | paragraph } /// Inline elements, e.g.: links, images, bold, italic, code, etc. inline = ${ wikilinks | img | link | code | mark } paragraph = { (inline | text)+ } text = @{ string ~ (newline ~ string)* } /// Matches HTML tags (In some Markdown implementations, HTML tags are allowed) html = { tag_self | tag_start ~ ws* ~ inner_html* ~ ws* ~ tag_end } inner_html = _{ html | inner_text } inner_text = { (!("<" | ">") ~ ANY)+ } tag_start = @{ "<" ~ ws* ~ (!("/" | ">") ~ ANY)* ~ ws* ~ ">" } tag_end = @{ "" ~ ANY)* ~ ws* ~ ">" } tag_self = @{ "<" ~ ws* ~ (!(">" | "/>") ~ ANY)* ~ ws* ~ "/>" } hr = @{ "--" ~ "-"+ } identifier = @{ ("_" | "-" | "." | ASCII_ALPHANUMERIC) } newline = @{ "\n" | "\r\n" } space = @{ " " } blank_line = @{ space* ~ newline } ws = @{ space | newline } /// 4 spaces or a tab indent = @{ " "{4, } | "\t" } codeblock = ${ PUSH("```") ~ codeblock_lang ~ codeblock_code ~ POP | indent_code+ } /// Codeblock that used 4 spaces or a tab as indent indent_code = @{ indent ~ (!"\n" ~ ANY)* ~ newline } codeblock_lang = { identifier* } codeblock_code = { (!(PEEK) ~ ANY)* } /// Matches table cell td_tag = @{ space* ~ "|" ~ space* } block_prefix = @{ ("######" | "#####" | "####" | "###" | "##" | "#" | ">") } block_item = { (block_prefix ~ space* ~ (inline | string)+) } list = { list_item ~ (list_item | list_paragraph)* } list_item = ${ list_prefix ~ (inline | string)+ ~ newline+ } list_paragraph = { indent ~ (inline | string)* ~ newline+ } list_prefix = @{ (space* ~ ("*" | "-" | ASCII_DIGIT ~ "." | "[" ~ (" " | "x" | "X") ~ "]") ~ " "*) } /// Matches meta info for some special Markdown implementations, e.g.: YAML front matter /// For example: /// /// ------------------ /// title: Hello World /// author: Jason Lee /// tags: Rust, JavaScript /// ------------------ meta_info = ${ meta_wrap ~ newline ~ meta_pair* ~ meta_wrap ~ newline* } meta_wrap = @{ "-"{3, } } meta_pair = ${ meta_key ~ string ~ newline } meta_key = @{ (!(":" | newline) ~ identifier)* ~ ":" ~ " "* } /// Ignore tags in Markdown, e.g.: "tags: 美国, 中国" meta_tags = @{ meta_key ~ meta_tags_val+ ~ meta_tags_item ~ newline } meta_tags_val = @{ meta_tags_item ~ meta_tags_divider } meta_tags_item = { (!(newline | ",") ~ (CJK | " " | ASCII_ALPHANUMERIC))* } meta_tags_divider = { " "* ~ "," ~ " "* } image_prefix = @{ "!" } img = ${ image_prefix ~ link } /// Matches link, e.g.: `[Hello](/hello)` or `[Hello]` link = ${ link_string_wrap ~ href? } link_string_wrap = { open_bracket ~ (mark* ~ link_string ~ mark*) ~ close_bracket } link_string = @{ (!(close_bracket) ~ ANY)* } open_bracket = @{ "[" } close_bracket = @{ "]" } href = @{ paren } wikilinks = ${ "[[" ~ (!("]]") ~ ANY)* ~ "]]" } mark = ${ code | PUSH(open_mark) ~ (mark | mark_string) ~ close_mark } open_mark = @{ "***" | "**" | "*" | "~~" | "\"" } close_mark = @{ POP } mark_string = { (!(PEEK | inline) ~ ANY)* } code = ${ PUSH(open_code) ~ inner_code ~ close_code } open_code = @{ "`" } close_code = @{ POP } inner_code = @{ (!(newline | PEEK) ~ ANY)* } string = @{ (!(newline | inline) ~ ANY)+ } /// HTML comment, e.g.: comment = ${ open_comment ~ (!close_comment ~ ANY)* ~ close_comment } open_comment = @{ "" } /// Matches link href part, e.g.: `(hello (world))` paren = { open_paren ~ inner_paren ~ paren* ~ inner_paren* ~ close_paren | open_paren ~ close_paren } inner_paren = { (!(newline | open_paren | close_paren) ~ ANY)+ } open_paren = { "(" } close_paren = { ")" }