Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions src/resources/pandoc/datadir/lpegshortcode.lua
Original file line number Diff line number Diff line change
Expand Up @@ -313,12 +313,64 @@ local function wrap_lpeg_match(pattern, txt)
return txt
end

-- Convert a string to its hexadecimal representation
local function string_to_hex(str)
return (str:gsub('.', function(c)
return string.format('%02X', string.byte(c))
end))
end

local md_shortcode_2_uuid = "b58fc729-690b-4000-b19f-365a4093b2ff"
local md_shortcode_2_uuid_pattern = "b58fc729%-690b%-4000%-b19f%-365a4093b2ff%-"
local function md_escaped_shortcode_2_fun(s)
return table.concat({
md_shortcode_2_uuid,
"-",
string_to_hex("{{{<" .. s .. ">}}}"),
"-"
})
end

local function md_shortcode_2_fun(open, space, lst, close)
local raw = open .. space
for i = 1, #lst do
local un = unshortcode:match(lst[i])
raw = raw .. (un or lst[i])
end
raw = raw .. close
return table.concat({
md_shortcode_2_uuid,
"-",
string_to_hex(raw),
"-"
});
end

-- This new transformation into a plain UUID-guarded string,
-- is designed to survive the pandoc markdown reader barrier under Pandoc 3.7 and later.
-- we still need the first shortcode transformation to actually convert
-- to a span when it's safe to do so, but this transformation
-- is safe to use in all contexts (including link and image targets).
local md_shortcode_2 = make_shortcode_parser({
escaped = md_escaped_shortcode_2_fun,
string = md_string_param,
keyvalue = md_keyvalue_param,
shortcode = md_shortcode_2_fun,
ignore_pattern = lpeg.P("{.hidden .quarto-markdown-envelope-contents render-id=\"") * (lpeg.P(1) - lpeg.P("\"}"))^1 * lpeg.P("\"}")
})

return {
lpegs = {
md_shortcode = md_shortcode,
md_shortcode_2 = md_shortcode_2,
md_shortcode_2_uuid = md_shortcode_2_uuid_pattern,
unshortcode = unshortcode -- for undoing shortcodes in non-markdown contexts
},

parse_md_shortcode_2 = function(txt)
return wrap_lpeg_match(md_shortcode_2, txt)
end,

parse_md_shortcode = function(txt)
return wrap_lpeg_match(md_shortcode, txt)
end,
Expand Down
102 changes: 74 additions & 28 deletions src/resources/pandoc/datadir/readqmd.lua
Original file line number Diff line number Diff line change
Expand Up @@ -113,23 +113,20 @@ local function unescape_invalid_tags(str, tags)
return str
end

local function urldecode(url)
if url == nil then
return
end
url = url:gsub("+", " ")
url = url:gsub("%%(%x%x)", function(x)
return string.char(tonumber(x, 16))
end)
url = url:gsub('%&quot%;', '"')
return url
-- Convert a hexadecimal string back to the original string
local function hex_to_string(hex)
return (hex:gsub('..', function(cc)
return string.char(tonumber(cc, 16))
end))
end

local function readqmd(txt, opts)
local uuid_pattern = "b58fc729%-690b%-4000%-b19f%-365a4093b2ff%-([A-Fa-f0-9]+)%-"
local tags
txt = md_fenced_div.attempt_to_fix_fenced_div(txt)
txt, tags = escape_invalid_tags(txt)
txt = md_shortcode.parse_md_shortcode(txt)
txt = md_shortcode.parse_md_shortcode_2(txt)
print(txt)
local flavor = {
format = "markdown",
extensions = {},
Expand All @@ -151,17 +148,27 @@ local function readqmd(txt, opts)
-- so we need to undo that damage here

local unshortcode_text = function (c)
if c.text:match("data%-is%-shortcode%=%\"1%\"") then
c.text = md_shortcode.unparse_md_shortcode(c.text)
end
c.text = c.text:gsub(uuid_pattern, hex_to_string)
return c
end

local function filter_attrs(el)
for k,v in pairs(el.attributes) do
if type(v) == "string" and v:match("data%-is%-shortcode%=%\"1%\"") then
local new_v = md_shortcode.unparse_md_shortcode(v)
el.attributes[k] = new_v
if type(v) == "string" then
local new_str = v:gsub(uuid_pattern, hex_to_string)
-- we avoid always assigning to slightly workaround
-- what appears to be a foundational problem with Pandoc's Lua API
-- while accessing attributes with repeated keys.
-- Quarto is still going to be broken for the case
-- where there are shortcodes inside values of attributes with
-- repeated keys:
--
-- []{k='{{< meta k1 >}}' k='{{< meta k2 >}}'}
--
-- But I don't know how to work around this.
if new_str ~= v then
el.attributes[k] = new_str
end
end
end
return el
Expand All @@ -170,9 +177,7 @@ local function readqmd(txt, opts)
local doc = pandoc.read(txt or "", flavor, opts):walk {
CodeBlock = function (cb)
cb.classes = cb.classes:map(restore_invalid_tags)
if cb.text:match("data%-is%-shortcode%=%\"1%\"") then
cb.text = md_shortcode.unparse_md_shortcode(cb.text)
end
cb.text = cb.text:gsub(uuid_pattern, hex_to_string)
cb.text = unescape_invalid_tags(cb.text, tags)
return cb
end,
Expand All @@ -184,20 +189,61 @@ local function readqmd(txt, opts)
Div = filter_attrs,
Link = function (l)
l = filter_attrs(l)
if l.target:match("data%-is%-shortcode%=%%221%%22") then
l.target = md_shortcode.unparse_md_shortcode(urldecode(l.target))
return l
end
l.target = l.target:gsub(uuid_pattern, hex_to_string)
return l
end,
Image = function (i)
i = filter_attrs(i)
if i.src:match("data%-is%-shortcode%=%%221%%22") then
i.src = md_shortcode.unparse_md_shortcode(urldecode(i.src))
return i
end
-- Replace UUID-encoded shortcodes in i.src
i.src = i.src:gsub(uuid_pattern, hex_to_string)
return i
end,
Str = function(str_node)
local str = str_node.text
-- Quick check: if UUID not present at all, return as-is
if not str:find("b58fc729-690b-4000-b19f-365a4093b2ff", 1, true) then
return nil
end

local result = pandoc.Inlines{}
local pos = 1

while true do
local match_start, match_end, hex_content = str:find(uuid_pattern, pos)

if not match_start then
-- No more matches; append remaining string if any
if pos <= #str then
table.insert(result, pandoc.Str(str:sub(pos)))
end
break
end

-- Append prefix before the match as a Str node (if non-empty)
if match_start > pos then
table.insert(result, pandoc.Str(str:sub(pos, match_start - 1)))
end

-- Convert hex to original shortcode string
local shortcode_text = hex_to_string(hex_content)

-- Parse the shortcode to markdown span syntax
local parsed_md = md_shortcode.parse_md_shortcode(shortcode_text) or ""

-- Convert to Pandoc inlines via pandoc.read
local doc = pandoc.read(parsed_md, "markdown")
local inlines = doc.blocks[1] and doc.blocks[1].content or pandoc.Inlines{}
-- Append the inlines to result
for _, inline in ipairs(inlines) do
table.insert(result, inline)
end

-- Move position past the match
pos = match_end + 1
end

return result
end
}
return doc
end
Expand Down
Loading