441 lines
15 KiB
Lua
441 lines
15 KiB
Lua
|
--- Replaces plain quotation marks with typographic ones.
|
|||
|
--
|
|||
|
-- # SYNOPSIS
|
|||
|
--
|
|||
|
-- pandoc --lua-filter pandoc-quotes.lua
|
|||
|
--
|
|||
|
--
|
|||
|
-- # DESCRIPTION
|
|||
|
--
|
|||
|
-- pandoc-quotes.lua is a filter for pandoc that replaces non-typographic
|
|||
|
-- quotation marks with typographic ones for languages other than American
|
|||
|
-- English.
|
|||
|
--
|
|||
|
-- You can define which typographic quotation marks to replace plain ones with
|
|||
|
-- by setting either a document's quot-marks, quot-lang, or lang
|
|||
|
-- metadata field. If none of these is set, pandoc-quotes.lua does nothing.
|
|||
|
--
|
|||
|
-- You can add your own mapping of a language to quotation marks or override
|
|||
|
-- the default ones by setting quot-marks-by-lang.
|
|||
|
--
|
|||
|
-- ## quot-marks
|
|||
|
--
|
|||
|
-- A list of four strings, where the first item lists the primary left
|
|||
|
-- quotation mark, the second the primary right quotation mark, the third
|
|||
|
-- the secondary left quotation mark, and the fourth the secondary right
|
|||
|
-- quotation mark.
|
|||
|
--
|
|||
|
-- For example:
|
|||
|
--
|
|||
|
-- ```yaml
|
|||
|
-- ---
|
|||
|
-- quot-marks:
|
|||
|
-- - ''
|
|||
|
-- - ''
|
|||
|
-- - '
|
|||
|
-- - '
|
|||
|
-- ...
|
|||
|
-- ```
|
|||
|
--
|
|||
|
-- You always have to set all four.
|
|||
|
--
|
|||
|
-- If each quotation mark consists of one character only,
|
|||
|
-- you can write the whole list as a simple string.
|
|||
|
--
|
|||
|
-- For example:
|
|||
|
--
|
|||
|
-- ```yaml
|
|||
|
-- ---
|
|||
|
-- quot-marks: ""''
|
|||
|
-- ...
|
|||
|
-- ```
|
|||
|
--
|
|||
|
-- If quot-marks is set, the other fields are ignored.
|
|||
|
--
|
|||
|
--
|
|||
|
-- # quotation-lang
|
|||
|
--
|
|||
|
-- An RFC 5646-like code for the language the quotation marks of
|
|||
|
-- which shall be used (e.g., "pt-BR", "es").
|
|||
|
--
|
|||
|
-- For example:
|
|||
|
--
|
|||
|
-- ```yaml
|
|||
|
-- ---
|
|||
|
-- quot-lang: de-AT
|
|||
|
-- ...
|
|||
|
-- ```
|
|||
|
--
|
|||
|
-- Note: Only the language and the country tags of RFC 5646 are supported.
|
|||
|
-- For example, "it-CH" (i.e., Italian as spoken in Switzerland) is fine,
|
|||
|
-- but "it-756" (also Italian as spoken in Switzerland) will return the
|
|||
|
-- quotation marks for "it" (i.e., Italian as spoken in general).
|
|||
|
--
|
|||
|
-- If quot-marks is set, quot-lang is ignored.
|
|||
|
--
|
|||
|
--
|
|||
|
-- # lang
|
|||
|
--
|
|||
|
-- The format of lang is the same as for quot-lang. If quot-marks
|
|||
|
-- or quot-lang is set, lang is ignored.
|
|||
|
--
|
|||
|
-- For example:
|
|||
|
--
|
|||
|
-- ```yaml
|
|||
|
-- ---
|
|||
|
-- lang: de-AT
|
|||
|
-- ...
|
|||
|
-- ```
|
|||
|
--
|
|||
|
--
|
|||
|
-- # ADDING LANGUAGES
|
|||
|
--
|
|||
|
-- You can add quotation marks for unsupported languages, or override the
|
|||
|
-- defaults, by setting the metadata field quot-marks-by-lang to a maping
|
|||
|
-- of RFC 5646-like language codes (e.g., "pt-BR", "es") to lists of quotation
|
|||
|
-- marks, which are given in the same format as for the quot-marks
|
|||
|
-- metadata field.
|
|||
|
--
|
|||
|
-- For example:
|
|||
|
--
|
|||
|
-- ```yaml
|
|||
|
-- ---
|
|||
|
-- quot-marks-by-lang:
|
|||
|
-- abc-XYZ: ""''
|
|||
|
-- lang: abc-XYZ
|
|||
|
-- ...
|
|||
|
-- ```
|
|||
|
--
|
|||
|
--
|
|||
|
-- # CAVEATS
|
|||
|
--
|
|||
|
-- pandoc represents documents as abstract syntax trees internally, and
|
|||
|
-- quotations are nodes in that tree. However, pandoc-quotes.lua replaces
|
|||
|
-- those nodes with their content, adding proper quotation marks. That is,
|
|||
|
-- pandoc-quotes.lua pushes quotations from the syntax of a document's
|
|||
|
-- representation into its semantics. That being so, you should not
|
|||
|
-- use pandoc-quotes.lua with output formats that represent quotes
|
|||
|
-- syntactically (e.g., HTML, LaTeX, ConTexT). Moroever, filters running after
|
|||
|
-- pandoc-quotes won't recognise quotes. So, it should be the last or
|
|||
|
-- one of the last filters you apply.
|
|||
|
--
|
|||
|
-- Support for quotation marks of different languages is certainly incomplete
|
|||
|
-- and likely erroneous. See <https://github.com/odkr/pandoc-quotes.lua> if
|
|||
|
-- you'd like to help with this.
|
|||
|
--
|
|||
|
-- pandoc-quotes.lua is Unicode-agnostic.
|
|||
|
--
|
|||
|
--
|
|||
|
-- # SEE ALSO
|
|||
|
--
|
|||
|
-- pandoc(1)
|
|||
|
--
|
|||
|
--
|
|||
|
-- # AUTHOR
|
|||
|
--
|
|||
|
-- Copyright 2019 Odin Kroeger
|
|||
|
--
|
|||
|
--
|
|||
|
-- # LICENSE
|
|||
|
--
|
|||
|
-- Permission is hereby granted, free of charge, to any person obtaining a copy
|
|||
|
-- of this software and associated documentation files (the "Software"), to
|
|||
|
-- deal in the Software without restriction, including without limitation the
|
|||
|
-- rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|||
|
-- sell copies of the Software, and to permit persons to whom the Software is
|
|||
|
-- furnished to do so, subject to the following conditions:
|
|||
|
--
|
|||
|
-- The above copyright notice and this permission notice shall be included in
|
|||
|
-- all copies or substantial portions of the Software.
|
|||
|
--
|
|||
|
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|||
|
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|||
|
-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|||
|
-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|||
|
-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|||
|
-- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|||
|
-- IN THE SOFTWARE.
|
|||
|
--
|
|||
|
--
|
|||
|
-- @script pandoc-quotes.lua
|
|||
|
-- @release 0.1.10
|
|||
|
-- @author Odin Kroeger
|
|||
|
-- @copyright 2018, 2020 Odin Kroeger
|
|||
|
-- @license MIT
|
|||
|
|
|||
|
|
|||
|
-- # INITIALISATION
|
|||
|
|
|||
|
local M = {}
|
|||
|
|
|||
|
local pairs = pairs
|
|||
|
local require = require
|
|||
|
|
|||
|
local io = io
|
|||
|
local table = table
|
|||
|
local package = package
|
|||
|
|
|||
|
local pandoc = pandoc
|
|||
|
if not pandoc.utils then pandoc.utils = require 'pandoc.utils' end
|
|||
|
|
|||
|
local _ENV = M
|
|||
|
|
|||
|
local text = require 'text'
|
|||
|
|
|||
|
|
|||
|
-- # CONSTANTS
|
|||
|
|
|||
|
--- The name of this script.
|
|||
|
SCRIPT_NAME = 'pandoc-quotes.lua'
|
|||
|
|
|||
|
--- The path seperator of the operating system.
|
|||
|
PATH_SEP = package.config:sub(1, 1)
|
|||
|
|
|||
|
--- The character sequence to end a line.
|
|||
|
if PATH_SEP == '\\' then EOL = '\r\n'
|
|||
|
else EOL = '\n' end
|
|||
|
|
|||
|
|
|||
|
--- A list of mappings from RFC 5646-ish language codes to quotation marks.
|
|||
|
--
|
|||
|
-- I have adopted the list below from:
|
|||
|
-- <https://en.wikipedia.org/w/index.php?title=Quotation_mark&oldid=836731669>
|
|||
|
--
|
|||
|
-- I tried to come up with reasonable defaults for secondary quotes for
|
|||
|
-- language that, according to the Wikipedia, don't have any.
|
|||
|
--
|
|||
|
-- Adding languages:
|
|||
|
--
|
|||
|
-- Add an ordered pair, where the first item is an RFC 5646 language
|
|||
|
-- code (though only the language and country tags are supported) and the
|
|||
|
-- second item is a list of quotation marks, in the following order:
|
|||
|
-- primary left, primary right, secondary left, secondary right.
|
|||
|
--
|
|||
|
-- You have to list four quotation marks, even if the langauge you add does
|
|||
|
-- not use secondary quotation marks. Just come up with something that makes
|
|||
|
-- sense. This is because a user may, rightly, find that just because their
|
|||
|
-- language does not 'officially' have secondary quotation marks, they
|
|||
|
-- are going to use them anyway. And they should get a reasonable result,
|
|||
|
-- not a runtime error.
|
|||
|
--
|
|||
|
-- The order in which languages are listed is meaningless. If you define
|
|||
|
-- variants for a language that is spoken in different countries, also
|
|||
|
-- define a 'default' for the language alone, without the country tag.
|
|||
|
QUOT_MARKS_BY_LANG = {
|
|||
|
ar = {'”', '“', '’', '‘' },
|
|||
|
bs = {'”', '”', '’', '’' },
|
|||
|
bo = {'「', '」', '『', '』' },
|
|||
|
bs = {'”', '”', '’', '’' },
|
|||
|
cn = {'「', '」', '『', '』' },
|
|||
|
cs = {'„', '“', '‚', '‘' },
|
|||
|
cy = {'‘', '’', '“', '”' },
|
|||
|
da = {'»', '«', '›', '‹' },
|
|||
|
de = {'„', '“', '‚', '‘' },
|
|||
|
['de-CH'] = {'«', '»', '‹', '›' },
|
|||
|
el = {'«', '»', '“', '”' },
|
|||
|
en = {'“', '”', '‘', '’' },
|
|||
|
['en-US'] = {'“', '”', '‘', '’' },
|
|||
|
['en-GB'] = {'‘', '’', '“', '”' },
|
|||
|
['en-UK'] = {'‘', '’', '“', '”' },
|
|||
|
['en-CA'] = {'“', '”', '‘', '’' },
|
|||
|
eo = {'“', '”', '‘', '’' },
|
|||
|
es = {'«', '»', '“', '”' },
|
|||
|
et = {'„', '“', '‚', '‘' },
|
|||
|
fi = {'”', '”', '’', '’' },
|
|||
|
fil = {'“', '”', '‘', '’' },
|
|||
|
fa = {'«', '»', '‹', '›' },
|
|||
|
fr = {'«', '»', '‹', '›' },
|
|||
|
ga = {'“', '”', '‘', '’' },
|
|||
|
gd = {'‘', '’', '“', '”' },
|
|||
|
gl = {'«', '»', '‹', '›' },
|
|||
|
he = {'“', '”', '‘', '’' },
|
|||
|
hi = {'“', '”', '‘', '’' },
|
|||
|
hu = {'„', '”', '»', '«' },
|
|||
|
hr = {'„', '“', '‚', '‘' },
|
|||
|
ia = {'“', '”', '‘', '’' },
|
|||
|
id = {'“', '”', '‘', '’' },
|
|||
|
is = {'„', '“', '‚', '‘' },
|
|||
|
it = {'«', '»', '“', '”' },
|
|||
|
['it-CH'] = {'«', '»', '‹', '›' },
|
|||
|
ja = {'「', '」', '『', '』' },
|
|||
|
jbo = {'lu', 'li\'u', 'lu', 'li\'u'},
|
|||
|
ka = {'„', '“', '‚', '‘' },
|
|||
|
khb = {'《', '》', '〈', '〉' },
|
|||
|
kk = {'«', '»', '‹', '›' },
|
|||
|
km = {'«', '»', '‹', '›' },
|
|||
|
ko = {'《', '》', '〈', '〉' },
|
|||
|
['ko-KR'] = {'“', '”', '‘', '’' },
|
|||
|
lt = {'„', '“', '‚', '‘' },
|
|||
|
lv = {'„', '“', '‚', '‘' },
|
|||
|
lo = {'«', '»', '‹', '›' },
|
|||
|
nl = {'„', '”', '‚', '’' },
|
|||
|
mk = {'„', '“', '’', '‘' },
|
|||
|
mn = {'«', '»', '‹', '›' },
|
|||
|
mt = {'“', '”', '‘', '’' },
|
|||
|
no = {'«', '»', '«', '»' },
|
|||
|
pl = {'„', '”', '»', '«' },
|
|||
|
ps = {'«', '»', '‹', '›' },
|
|||
|
pt = {'«', '»', '“', '”' },
|
|||
|
['pt-BR'] = {'“', '”', '‘', '’' },
|
|||
|
rm = {'«', '»', '‹', '›' },
|
|||
|
ro = {'„', '”', '«', '»' },
|
|||
|
ru = {'«', '»', '“', '”' },
|
|||
|
sk = {'„', '“', '‚', '‘' },
|
|||
|
sl = {'»', '«', '„', '“' },
|
|||
|
sq = {'„', '“', '‚', '‘' },
|
|||
|
sr = {'„', '“', '’', '’' },
|
|||
|
sv = {'”', '”', '’', '’' },
|
|||
|
tdd = {'「', '」', '『', '』' },
|
|||
|
ti = {'«', '»', '‹', '›' },
|
|||
|
th = {'“', '”', '‘', '’' },
|
|||
|
thi = {'「', '」', '『', '』' },
|
|||
|
tr = {'«', '»', '‹', '›' },
|
|||
|
ug = {'«', '»', '‹', '›' },
|
|||
|
uk = {'«', '»', '„', '“' },
|
|||
|
uz = {'«', '»', '„', '“' },
|
|||
|
vi = {'“', '”', '‘', '’' },
|
|||
|
wen = {'„', '“', '‚', '‘' },
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
-- # FUNCTIONS
|
|||
|
|
|||
|
--- Prints warnings to STDERR.
|
|||
|
--
|
|||
|
-- Prefixes messages with `SCRIPT_NAME` and ": ".
|
|||
|
-- Also appends an end of line sequence.
|
|||
|
--
|
|||
|
-- @tparam string str A string format to be written to STDERR.
|
|||
|
-- @tparam string ... Arguments to that format.
|
|||
|
function warn (str, ...)
|
|||
|
io.stderr:write(SCRIPT_NAME, ': ', string.format(str, ...), EOL)
|
|||
|
end
|
|||
|
|
|||
|
|
|||
|
--- Applies a function to every element of a list.
|
|||
|
--
|
|||
|
-- @tparam func f The function.
|
|||
|
-- @tparam tab list The list.
|
|||
|
-- @treturn tab The return values of `f`.
|
|||
|
function map (f, list)
|
|||
|
local ret = {}
|
|||
|
for k, v in pairs(list) do ret[k] = f(v) end
|
|||
|
return ret
|
|||
|
end
|
|||
|
|
|||
|
do
|
|||
|
local stringify = pandoc.utils.stringify
|
|||
|
|
|||
|
--- Reads quotation marks from a `quot-marks` metadata field.
|
|||
|
--
|
|||
|
-- @tparam pandoc.MetaValue The content of a metadata field.
|
|||
|
-- Must be either of type pandoc.MetaInlines or pandoc.MetaList.
|
|||
|
-- @treturn[1] {pandoc.Str,pandoc.Str,pandoc.Str,pandoc.Str}
|
|||
|
-- A table of quotation marks
|
|||
|
-- @treturn[2] `nil` if an error occurred.
|
|||
|
-- @treturn[2] string An error message.
|
|||
|
function get_quotation_marks (meta)
|
|||
|
if meta.t == 'MetaInlines' then
|
|||
|
local marks = stringify(meta)
|
|||
|
if text.len(marks) ~= 4 then
|
|||
|
return nil, 'not four quotation marks'
|
|||
|
end
|
|||
|
local ret = {}
|
|||
|
for i = 1, 4 do ret[i] = text.sub(marks, i, i) end
|
|||
|
return ret
|
|||
|
elseif meta.t == 'MetaList' then
|
|||
|
local marks = map(stringify, meta)
|
|||
|
if #marks ~= 4 then
|
|||
|
return nil, 'not four quotation marks'
|
|||
|
end
|
|||
|
return marks
|
|||
|
end
|
|||
|
return nil, 'neither a string nor a list'
|
|||
|
end
|
|||
|
end
|
|||
|
|
|||
|
|
|||
|
do
|
|||
|
local stringify = pandoc.utils.stringify
|
|||
|
|
|||
|
-- Holds the quotation marks for the language of the document.
|
|||
|
-- Common to `configure` and `insert_quot_marks`.
|
|||
|
local QUOT_MARKS = nil
|
|||
|
|
|||
|
--- Determines the quotation marks for the document.
|
|||
|
--
|
|||
|
-- Stores them in `QUOT_MARKS`, which it shares with `insert_quot_marks`.
|
|||
|
-- Prints errors to STDERR.
|
|||
|
--
|
|||
|
-- @tparam pandoc.Meta The document's metadata.
|
|||
|
function configure (meta)
|
|||
|
local quot_marks, lang
|
|||
|
if meta['quot-marks-by-lang'] then
|
|||
|
for k, v in pairs(meta['quot-marks-by-lang']) do
|
|||
|
local quot_marks, err = get_quotation_marks(v)
|
|||
|
if not quot_marks then
|
|||
|
warn('metadata field "quot-marks-by-lang": lang "%s": %s.',
|
|||
|
k, err)
|
|||
|
return
|
|||
|
end
|
|||
|
QUOT_MARKS_BY_LANG[k] = quot_marks
|
|||
|
end
|
|||
|
end
|
|||
|
if meta['quot-marks'] then
|
|||
|
local err
|
|||
|
quot_marks, err = get_quotation_marks(meta['quot-marks'])
|
|||
|
if not quot_marks then
|
|||
|
warn('metadata field "quot-marks": %s.', err)
|
|||
|
return
|
|||
|
end
|
|||
|
elseif meta['quot-lang'] then
|
|||
|
lang = stringify(meta['quot-lang'])
|
|||
|
elseif meta['lang'] then
|
|||
|
lang = stringify(meta['lang'])
|
|||
|
end
|
|||
|
if lang then
|
|||
|
for i = 1, 3 do
|
|||
|
if i == 2 then lang = lang:match '^(%a+)'
|
|||
|
elseif i == 3 then
|
|||
|
local expr = '^' .. lang .. '-'
|
|||
|
for k, v in pairs(QUOT_MARKS_BY_LANG) do
|
|||
|
if k:match(expr) then quot_marks = v break end
|
|||
|
end
|
|||
|
end
|
|||
|
if i < 3 then quot_marks = QUOT_MARKS_BY_LANG[lang] end
|
|||
|
if quot_marks then break end
|
|||
|
end
|
|||
|
end
|
|||
|
if quot_marks then QUOT_MARKS = map(pandoc.Str, quot_marks)
|
|||
|
elseif lang then warn('%s: unknown language.', lang) end
|
|||
|
end
|
|||
|
|
|||
|
|
|||
|
do
|
|||
|
local insert = table.insert
|
|||
|
--- Replaces quoted elements with quoted text.
|
|||
|
--
|
|||
|
-- Uses the quotation marks stored in `QUOT_MARKS`,
|
|||
|
-- which it shares with `configure`.
|
|||
|
--
|
|||
|
-- @tparam pandoc.Quoted quoted A quoted element.
|
|||
|
-- @treturn {pandoc.Str,pandoc.Inline,...,pandoc.Str}
|
|||
|
-- A list with the opening quote (as `pandoc.Str`),
|
|||
|
-- the content of `quoted`, and the closing quote (as `pandoc.Str`).
|
|||
|
function insert_quot_marks (quoted)
|
|||
|
if not QUOT_MARKS then return end
|
|||
|
local quote_type = quoted.quotetype
|
|||
|
local inlines = quoted.content
|
|||
|
local left, right
|
|||
|
if quote_type == 'DoubleQuote' then left, right = 1, 2
|
|||
|
elseif quote_type == 'SingleQuote' then left, right = 3, 4
|
|||
|
else error('unknown quote type') end
|
|||
|
insert(inlines, 1, QUOT_MARKS[left])
|
|||
|
insert(inlines, QUOT_MARKS[right])
|
|||
|
return inlines
|
|||
|
end
|
|||
|
end
|
|||
|
end
|
|||
|
|
|||
|
return {{Meta = configure}, {Quoted = insert_quot_marks}}
|