bavbavhaus.net/broken_links.lua

52 lines
1.7 KiB
Lua
Raw Normal View History

2024-06-21 15:31:13 +02:00
local l = require("pandoc.logging")
assert(#arg > 0, "\n" ..
"[ERROR] usage: pandoc lua dependencies.lua <input-file>")
for i, v in ipairs(arg) do
local f = assert(io.open(v), "\n" ..
"[ERROR] could not open " .. v .. " for reading.")
local data = f:read("a")
f:close()
local visited = {}
pandoc.read(data):walk({ Link = function(link)
-- early return for external links
if link.target:find("^https?%:%/%/") then return end
if link.target:find("^mailto%:") then return end
local fp, anchor = link.target:match("^(.+%.md)#?(.*)$")
-- early return for non markdown files
if not fp then return end
local f = io.open(fp)
if f == nil then
print("Broken internal link: " .. v .. " -> " .. fp)
return
end
if anchor == "" then f:close(); return end
local data = f:read("a"); f:close()
local found = false
pandoc.read(data):walk({ Div = function(div)
if not div.attr then return end
if div.attr.identifier == anchor then found = true end
end }):walk({ Span = function(span)
if not span.attr then return end
if span.attr.identifier == anchor then found = true end
end }):walk({ Header = function(header)
if not header.attr then return end
if header.attr.identifier == anchor then found = true end
end }):walk({ CodeBlock = function(codeblock)
if not codeblock.attr then return end
if codeblock.attr.identifier == anchor then found = true end
end }):walk({ Code = function(code)
if not code.attr then return end
if code.attr.identifier == anchor then found = true end
end })
if not found then print("Broken link: " .. v .. " -> " .. link.target) end
end })
end