2024-08-03 15:50:53 +02:00
|
|
|
-- Adapted from Markdown LPeg lexer by Mitchell to simplify and lex some pandoc
|
|
|
|
-- specifics.
|
2024-07-31 12:05:39 +02:00
|
|
|
|
|
|
|
local lexer = lexer
|
|
|
|
local P, S, B = lpeg.P, lpeg.S, lpeg.B
|
|
|
|
|
|
|
|
local lex = lexer.new(..., {no_user_word_lists = true})
|
|
|
|
|
|
|
|
local md = '.markdown'
|
|
|
|
|
|
|
|
local ws = lex:get_rule('whitespace')
|
|
|
|
local nl = P('\n')
|
|
|
|
local bl = nl * nl
|
|
|
|
local nws = lexer.any - lexer.space
|
|
|
|
|
|
|
|
-- Heading.
|
|
|
|
lex:add_rule('heading',
|
2024-08-03 15:50:53 +02:00
|
|
|
lex:tag(lexer.COMMENT, lexer.starts_line(P('#')^-6)) *
|
|
|
|
P(' ') *
|
|
|
|
lex:tag(lexer.HEADING, lexer.to_eol(nws) *
|
|
|
|
#bl))
|
|
|
|
|
2024-07-31 12:05:39 +02:00
|
|
|
-- Blockquote.
|
2024-08-03 15:50:53 +02:00
|
|
|
lex:add_rule('blockquote',
|
|
|
|
lex:tag(lexer.COMMENT,
|
2024-09-13 20:39:26 +02:00
|
|
|
lexer.starts_line(P('>' + P(' '))^1)))
|
2024-07-31 12:05:39 +02:00
|
|
|
|
|
|
|
-- Horizontal rule.
|
|
|
|
lex:add_rule('hr',
|
2024-08-03 15:50:53 +02:00
|
|
|
lex:tag(lexer.COMMENT, B(bl) * S('*-_')^3 * #bl))
|
2024-07-31 12:05:39 +02:00
|
|
|
|
2024-08-03 15:50:53 +02:00
|
|
|
-- Native div.
|
|
|
|
lex:add_rule("native_div",
|
|
|
|
lex:tag(lexer.COMMENT, lexer.starts_line(lexer.to_eol(P(':')^3))))
|
2024-07-31 12:05:39 +02:00
|
|
|
|
|
|
|
-- Code block.
|
|
|
|
local code_line = lexer.starts_line(
|
|
|
|
(B(' ') + B('\t')) * lexer.to_eol(), true)
|
2024-08-03 15:50:53 +02:00
|
|
|
local code_block = lexer.range(lexer.starts_line('```', true), lexer.starts_line(P('```'))) +
|
|
|
|
lexer.range(lexer.starts_line('~~~', true), lexer.starts_line(P('~~~')))
|
|
|
|
local code_inline = lpeg.Cmt(lpeg.C(P('`')^1), function(input, index, bt)
|
2024-07-31 12:05:39 +02:00
|
|
|
local _, e = input:find('[^`]' .. bt .. '%f[^`]', index)
|
|
|
|
return (e or #input) + 1
|
|
|
|
end)
|
2024-08-03 15:50:53 +02:00
|
|
|
lex:add_rule('block_code', lex:tag(lexer.CODE, code_line + code_block))
|
2024-07-31 12:05:39 +02:00
|
|
|
|
|
|
|
-- Escape.
|
|
|
|
lex:add_rule('escape', lex:tag(lexer.DEFAULT, P('\\') * 1))
|
|
|
|
|
|
|
|
-- Bracket.
|
|
|
|
lex:add_rule('brackets',
|
2024-08-03 15:50:53 +02:00
|
|
|
lex:tag(lexer.COMMENT, S('[]')))
|
|
|
|
|
|
|
|
-- Native span.
|
|
|
|
lex:add_rule('native_span',
|
|
|
|
B(']') * lex:tag(lexer.COMMENT, lexer.range('{', '}')))
|
2024-07-31 12:05:39 +02:00
|
|
|
|
|
|
|
-- Footnote.
|
|
|
|
lex:add_rule('footnote_key',
|
2024-08-03 15:50:53 +02:00
|
|
|
lex:tag(lexer.REFERENCE, P('^')) * #P('[') +
|
|
|
|
B('[') * lex:tag(lexer.REFERENCE, P('^') * (lexer.any - lexer.space - S('^[]'))^1) * #P(']'))
|
|
|
|
-- @todo footnote reference
|
|
|
|
|
|
|
|
-- Cite.
|
2024-07-31 12:05:39 +02:00
|
|
|
lex:add_rule('cite_key',
|
2024-08-03 15:50:53 +02:00
|
|
|
B(lexer.space + P('[')) * lex:tag(lexer.REFERENCE,
|
|
|
|
P('-')^-1 *
|
|
|
|
(P('@') * (lexer.alnum + P('_')) * (lexer.alnum + S(':.#$%&-+?<>~/'))^0 +
|
|
|
|
P('@') * lexer.range('{', '}'))))
|
2024-07-31 12:05:39 +02:00
|
|
|
|
|
|
|
-- Link.
|
|
|
|
lex:add_rule('link_text',
|
2024-08-03 15:50:53 +02:00
|
|
|
B('[') * lex:tag(lexer.LINK, (lexer.any - P(']'))^1) * #P(']' * lexer.range('(', ')')))
|
2024-07-31 12:05:39 +02:00
|
|
|
lex:add_rule('link_target',
|
2024-08-03 15:50:53 +02:00
|
|
|
B(']') * lex:tag(lexer.COMMENT, lexer.range('(', ')')))
|
|
|
|
-- @todo link reference
|
2024-07-31 12:05:39 +02:00
|
|
|
|
|
|
|
-- Image
|
2024-08-03 15:50:53 +02:00
|
|
|
lex:add_rule('image_bang', lex:tag(lexer.REFERENCE, P('!')) * #P('['))
|
2024-07-31 12:05:39 +02:00
|
|
|
|
|
|
|
local punct_space = lexer.punct + lexer.space
|
|
|
|
|
|
|
|
-- Handles flanking delimiters as described in
|
|
|
|
-- https://github.github.com/gfm/#emphasis-and-strong-emphasis in the cases
|
|
|
|
-- where simple delimited ranges are not sufficient.
|
|
|
|
local function flanked_range(s, not_inword)
|
|
|
|
local fl_char = lexer.any - s - lexer.space
|
|
|
|
local left_fl = B(punct_space - s) * s * #fl_char + s * #(fl_char - lexer.punct)
|
|
|
|
local right_fl = B(lexer.punct) * s * #(punct_space - s) + B(fl_char) * s
|
|
|
|
return left_fl * (lexer.any - bl - (not_inword and s * #punct_space or s))^0 * right_fl
|
|
|
|
end
|
|
|
|
|
|
|
|
local asterisk_strong = flanked_range('**')
|
|
|
|
local underscore_strong = (B(punct_space) + #lexer.starts_line('_')) * flanked_range('__', true) * #(punct_space + -1)
|
|
|
|
lex:add_rule('strong', lex:tag(lexer.BOLD, asterisk_strong + underscore_strong))
|
|
|
|
|
|
|
|
local asterisk_em = flanked_range('*')
|
|
|
|
local underscore_em = (B(punct_space) + #lexer.starts_line('_')) * flanked_range('_', true) * #(punct_space + -1)
|
|
|
|
lex:add_rule('em', lex:tag(lexer.ITALIC, asterisk_em + underscore_em))
|
|
|
|
|
|
|
|
-- Embedded HTML.
|
|
|
|
local html = lexer.load('html')
|
|
|
|
local html_start_rule = lexer.starts_line(P(' ')^-3) *
|
|
|
|
#P('<') *
|
|
|
|
html:get_rule('tag') +
|
|
|
|
html:get_rule('comment')
|
|
|
|
local html_end_rule = #bl * ws
|
|
|
|
lex:embed(html, html_start_rule, html_end_rule)
|
|
|
|
|
|
|
|
-- Embedded YAML.
|
|
|
|
local yaml = lexer.load('yaml')
|
2024-08-26 17:03:12 +02:00
|
|
|
local doc_bounds_rule = yaml:get_rule('doc_bounds')
|
|
|
|
local hr_rule = lex:get_rule('hr')
|
|
|
|
local yaml_start_rule = doc_bounds_rule - hr_rule
|
|
|
|
local yaml_end_rule = doc_bounds_rule - hr_rule
|
2024-07-31 12:05:39 +02:00
|
|
|
lex:embed(yaml, yaml_start_rule, yaml_end_rule)
|
|
|
|
|
|
|
|
return lex
|