--- Replaces plain quotation marks with typographic ones. -- -- # SYNOPSIS -- -- pandoc --lua-filter pandoc-quotes.lua -- -- -- # DESCRIPTION -- -- pandoc-quotes.lua is a filter for pandoc that replaces non-typographic -- quotation marks with typographic ones for languages other than American -- English. -- -- You can define which typographic quotation marks to replace plain ones with -- by setting either a document's quot-marks, quot-lang, or lang -- metadata field. If none of these is set, pandoc-quotes.lua does nothing. -- -- You can add your own mapping of a language to quotation marks or override -- the default ones by setting quot-marks-by-lang. -- -- ## quot-marks -- -- A list of four strings, where the first item lists the primary left -- quotation mark, the second the primary right quotation mark, the third -- the secondary left quotation mark, and the fourth the secondary right -- quotation mark. -- -- For example: -- -- ```yaml -- --- -- quot-marks: -- - '' -- - '' -- - ' -- - ' -- ... -- ``` -- -- You always have to set all four. -- -- If each quotation mark consists of one character only, -- you can write the whole list as a simple string. -- -- For example: -- -- ```yaml -- --- -- quot-marks: ""'' -- ... -- ``` -- -- If quot-marks is set, the other fields are ignored. -- -- -- # quotation-lang -- -- An RFC 5646-like code for the language the quotation marks of -- which shall be used (e.g., "pt-BR", "es"). -- -- For example: -- -- ```yaml -- --- -- quot-lang: de-AT -- ... -- ``` -- -- Note: Only the language and the country tags of RFC 5646 are supported. -- For example, "it-CH" (i.e., Italian as spoken in Switzerland) is fine, -- but "it-756" (also Italian as spoken in Switzerland) will return the -- quotation marks for "it" (i.e., Italian as spoken in general). -- -- If quot-marks is set, quot-lang is ignored. -- -- -- # lang -- -- The format of lang is the same as for quot-lang. If quot-marks -- or quot-lang is set, lang is ignored. -- -- For example: -- -- ```yaml -- --- -- lang: de-AT -- ... -- ``` -- -- -- # ADDING LANGUAGES -- -- You can add quotation marks for unsupported languages, or override the -- defaults, by setting the metadata field quot-marks-by-lang to a maping -- of RFC 5646-like language codes (e.g., "pt-BR", "es") to lists of quotation -- marks, which are given in the same format as for the quot-marks -- metadata field. -- -- For example: -- -- ```yaml -- --- -- quot-marks-by-lang: -- abc-XYZ: ""'' -- lang: abc-XYZ -- ... -- ``` -- -- -- # CAVEATS -- -- pandoc represents documents as abstract syntax trees internally, and -- quotations are nodes in that tree. However, pandoc-quotes.lua replaces -- those nodes with their content, adding proper quotation marks. That is, -- pandoc-quotes.lua pushes quotations from the syntax of a document's -- representation into its semantics. That being so, you should not -- use pandoc-quotes.lua with output formats that represent quotes -- syntactically (e.g., HTML, LaTeX, ConTexT). Moroever, filters running after -- pandoc-quotes won't recognise quotes. So, it should be the last or -- one of the last filters you apply. -- -- Support for quotation marks of different languages is certainly incomplete -- and likely erroneous. See if -- you'd like to help with this. -- -- pandoc-quotes.lua is Unicode-agnostic. -- -- -- # SEE ALSO -- -- pandoc(1) -- -- -- # AUTHOR -- -- Copyright 2019 Odin Kroeger -- -- -- # LICENSE -- -- Permission is hereby granted, free of charge, to any person obtaining a copy -- of this software and associated documentation files (the "Software"), to -- deal in the Software without restriction, including without limitation the -- rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -- sell copies of the Software, and to permit persons to whom the Software is -- furnished to do so, subject to the following conditions: -- -- The above copyright notice and this permission notice shall be included in -- all copies or substantial portions of the Software. -- -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -- IN THE SOFTWARE. -- -- -- @script pandoc-quotes.lua -- @release 0.1.10 -- @author Odin Kroeger -- @copyright 2018, 2020 Odin Kroeger -- @license MIT -- # INITIALISATION local M = {} local pairs = pairs local require = require local io = io local table = table local package = package local pandoc = pandoc if not pandoc.utils then pandoc.utils = require 'pandoc.utils' end local _ENV = M local text = require 'text' -- # CONSTANTS --- The name of this script. SCRIPT_NAME = 'pandoc-quotes.lua' --- The path seperator of the operating system. PATH_SEP = package.config:sub(1, 1) --- The character sequence to end a line. if PATH_SEP == '\\' then EOL = '\r\n' else EOL = '\n' end --- A list of mappings from RFC 5646-ish language codes to quotation marks. -- -- I have adopted the list below from: -- -- -- I tried to come up with reasonable defaults for secondary quotes for -- language that, according to the Wikipedia, don't have any. -- -- Adding languages: -- -- Add an ordered pair, where the first item is an RFC 5646 language -- code (though only the language and country tags are supported) and the -- second item is a list of quotation marks, in the following order: -- primary left, primary right, secondary left, secondary right. -- -- You have to list four quotation marks, even if the langauge you add does -- not use secondary quotation marks. Just come up with something that makes -- sense. This is because a user may, rightly, find that just because their -- language does not 'officially' have secondary quotation marks, they -- are going to use them anyway. And they should get a reasonable result, -- not a runtime error. -- -- The order in which languages are listed is meaningless. If you define -- variants for a language that is spoken in different countries, also -- define a 'default' for the language alone, without the country tag. QUOT_MARKS_BY_LANG = { ar = {'”', '“', '’', '‘' }, bs = {'”', '”', '’', '’' }, bo = {'「', '」', '『', '』' }, bs = {'”', '”', '’', '’' }, cn = {'「', '」', '『', '』' }, cs = {'„', '“', '‚', '‘' }, cy = {'‘', '’', '“', '”' }, da = {'»', '«', '›', '‹' }, de = {'„', '“', '‚', '‘' }, ['de-CH'] = {'«', '»', '‹', '›' }, el = {'«', '»', '“', '”' }, en = {'“', '”', '‘', '’' }, ['en-US'] = {'“', '”', '‘', '’' }, ['en-GB'] = {'‘', '’', '“', '”' }, ['en-UK'] = {'‘', '’', '“', '”' }, ['en-CA'] = {'“', '”', '‘', '’' }, eo = {'“', '”', '‘', '’' }, es = {'«', '»', '“', '”' }, et = {'„', '“', '‚', '‘' }, fi = {'”', '”', '’', '’' }, fil = {'“', '”', '‘', '’' }, fa = {'«', '»', '‹', '›' }, fr = {'«', '»', '‹', '›' }, ga = {'“', '”', '‘', '’' }, gd = {'‘', '’', '“', '”' }, gl = {'«', '»', '‹', '›' }, he = {'“', '”', '‘', '’' }, hi = {'“', '”', '‘', '’' }, hu = {'„', '”', '»', '«' }, hr = {'„', '“', '‚', '‘' }, ia = {'“', '”', '‘', '’' }, id = {'“', '”', '‘', '’' }, is = {'„', '“', '‚', '‘' }, it = {'«', '»', '“', '”' }, ['it-CH'] = {'«', '»', '‹', '›' }, ja = {'「', '」', '『', '』' }, jbo = {'lu', 'li\'u', 'lu', 'li\'u'}, ka = {'„', '“', '‚', '‘' }, khb = {'《', '》', '〈', '〉' }, kk = {'«', '»', '‹', '›' }, km = {'«', '»', '‹', '›' }, ko = {'《', '》', '〈', '〉' }, ['ko-KR'] = {'“', '”', '‘', '’' }, lt = {'„', '“', '‚', '‘' }, lv = {'„', '“', '‚', '‘' }, lo = {'«', '»', '‹', '›' }, nl = {'„', '”', '‚', '’' }, mk = {'„', '“', '’', '‘' }, mn = {'«', '»', '‹', '›' }, mt = {'“', '”', '‘', '’' }, no = {'«', '»', '«', '»' }, pl = {'„', '”', '»', '«' }, ps = {'«', '»', '‹', '›' }, pt = {'«', '»', '“', '”' }, ['pt-BR'] = {'“', '”', '‘', '’' }, rm = {'«', '»', '‹', '›' }, ro = {'„', '”', '«', '»' }, ru = {'«', '»', '“', '”' }, sk = {'„', '“', '‚', '‘' }, sl = {'»', '«', '„', '“' }, sq = {'„', '“', '‚', '‘' }, sr = {'„', '“', '’', '’' }, sv = {'”', '”', '’', '’' }, tdd = {'「', '」', '『', '』' }, ti = {'«', '»', '‹', '›' }, th = {'“', '”', '‘', '’' }, thi = {'「', '」', '『', '』' }, tr = {'«', '»', '‹', '›' }, ug = {'«', '»', '‹', '›' }, uk = {'«', '»', '„', '“' }, uz = {'«', '»', '„', '“' }, vi = {'“', '”', '‘', '’' }, wen = {'„', '“', '‚', '‘' }, } -- # FUNCTIONS --- Prints warnings to STDERR. -- -- Prefixes messages with `SCRIPT_NAME` and ": ". -- Also appends an end of line sequence. -- -- @tparam string str A string format to be written to STDERR. -- @tparam string ... Arguments to that format. function warn (str, ...) io.stderr:write(SCRIPT_NAME, ': ', string.format(str, ...), EOL) end --- Applies a function to every element of a list. -- -- @tparam func f The function. -- @tparam tab list The list. -- @treturn tab The return values of `f`. function map (f, list) local ret = {} for k, v in pairs(list) do ret[k] = f(v) end return ret end do local stringify = pandoc.utils.stringify --- Reads quotation marks from a `quot-marks` metadata field. -- -- @tparam pandoc.MetaValue The content of a metadata field. -- Must be either of type pandoc.MetaInlines or pandoc.MetaList. -- @treturn[1] {pandoc.Str,pandoc.Str,pandoc.Str,pandoc.Str} -- A table of quotation marks -- @treturn[2] `nil` if an error occurred. -- @treturn[2] string An error message. function get_quotation_marks (meta) if meta.t == 'MetaInlines' then local marks = stringify(meta) if text.len(marks) ~= 4 then return nil, 'not four quotation marks' end local ret = {} for i = 1, 4 do ret[i] = text.sub(marks, i, i) end return ret elseif meta.t == 'MetaList' then local marks = map(stringify, meta) if #marks ~= 4 then return nil, 'not four quotation marks' end return marks end return nil, 'neither a string nor a list' end end do local stringify = pandoc.utils.stringify -- Holds the quotation marks for the language of the document. -- Common to `configure` and `insert_quot_marks`. local QUOT_MARKS = nil --- Determines the quotation marks for the document. -- -- Stores them in `QUOT_MARKS`, which it shares with `insert_quot_marks`. -- Prints errors to STDERR. -- -- @tparam pandoc.Meta The document's metadata. function configure (meta) local quot_marks, lang if meta['quot-marks-by-lang'] then for k, v in pairs(meta['quot-marks-by-lang']) do local quot_marks, err = get_quotation_marks(v) if not quot_marks then warn('metadata field "quot-marks-by-lang": lang "%s": %s.', k, err) return end QUOT_MARKS_BY_LANG[k] = quot_marks end end if meta['quot-marks'] then local err quot_marks, err = get_quotation_marks(meta['quot-marks']) if not quot_marks then warn('metadata field "quot-marks": %s.', err) return end elseif meta['quot-lang'] then lang = stringify(meta['quot-lang']) elseif meta['lang'] then lang = stringify(meta['lang']) end if lang then for i = 1, 3 do if i == 2 then lang = lang:match '^(%a+)' elseif i == 3 then local expr = '^' .. lang .. '-' for k, v in pairs(QUOT_MARKS_BY_LANG) do if k:match(expr) then quot_marks = v break end end end if i < 3 then quot_marks = QUOT_MARKS_BY_LANG[lang] end if quot_marks then break end end end if quot_marks then QUOT_MARKS = map(pandoc.Str, quot_marks) elseif lang then warn('%s: unknown language.', lang) end end do local insert = table.insert --- Replaces quoted elements with quoted text. -- -- Uses the quotation marks stored in `QUOT_MARKS`, -- which it shares with `configure`. -- -- @tparam pandoc.Quoted quoted A quoted element. -- @treturn {pandoc.Str,pandoc.Inline,...,pandoc.Str} -- A list with the opening quote (as `pandoc.Str`), -- the content of `quoted`, and the closing quote (as `pandoc.Str`). function insert_quot_marks (quoted) if not QUOT_MARKS then return end local quote_type = quoted.quotetype local inlines = quoted.content local left, right if quote_type == 'DoubleQuote' then left, right = 1, 2 elseif quote_type == 'SingleQuote' then left, right = 3, 4 else error('unknown quote type') end insert(inlines, 1, QUOT_MARKS[left]) insert(inlines, QUOT_MARKS[right]) return inlines end end end return {{Meta = configure}, {Quoted = insert_quot_marks}}