Module:add etymology: Difference between revisions
Jump to navigation
Jump to search
TheNightAvl (talk | contribs) No edit summary |
TheNightAvl (talk | contribs) No edit summary |
||
(22 intermediate revisions by the same user not shown) | |||
Line 4: | Line 4: | ||
local m_languages = require("Module:languages") | local m_languages = require("Module:languages") | ||
local pos = { | |||
"Adjective", | |||
"Adverb", | |||
"Circumfix", | |||
"Conjunction", | |||
"Contraction", | |||
"Infix", | |||
"Interjection", | |||
"Noun", | |||
"Numeral", | |||
"Participle", | |||
"Particle", | |||
"Phrase", | |||
"Prefix", | |||
"Preposition", | |||
"Pronoun", | |||
"Proper noun", | |||
"Suffix", | |||
"Verb" | |||
} | |||
function export.format(frame) | function export.format(frame) | ||
Line 10: | Line 31: | ||
local language = m_languages.get_by_code(args[1]) | local language = m_languages.get_by_code(args[1]) | ||
local content = current:getContent() | local content = args["content"] or current:getContent() | ||
content = mw.ustring.match(content, "(==%s*" .. language.name .. "%s*==.+) | content = assert(mw.ustring.match(content, "(==%s*" .. language.name .. "%s*==.+)%s==[^=]+==%s") or mw.ustring.match(content, "==%s*" .. language.name .. "%s*==.+"), "No " .. language.name .. " section found in:\n" .. content) | ||
local etymology_i = 1 | local etymology_i = 1 | ||
Line 38: | Line 59: | ||
-- reformats single-etymology pages | -- reformats single-etymology pages | ||
local regex_pronunciation = "(===%s*Pronunciation%s*===.+) | local regex_pronunciation = "(===%s*Pronunciation%s*===.+)%s===[^=]+===%s" | ||
local regex_etymology = "(===%s*{{Etymology%|" .. language.code .. "}}%s*===.+) | local regex_etymology = "(===%s*{{Etymology%|" .. language.code .. "}}%s*===.+)%s===[^=]+===%s" | ||
local section, index = {}, {} | local section, index = {}, {} | ||
index.h1, index.h2, section.heading = mw.ustring.find(content, "(==%s*" .. language.name .. "%s*== | index.h1, index.h2, section.heading = mw.ustring.find(content, "(==%s*" .. language.name .. "%s*==%s)") | ||
index.pron1, index.pron2, section.pronunciation = mw.ustring.find(content, regex_pronunciation) | index.pron1, index.pron2, section.pronunciation = mw.ustring.find(content, regex_pronunciation) | ||
index.e1, index.e2, section.etymology = mw.ustring.find(content, regex_etymology) | index.e1, index.e2, section.etymology = mw.ustring.find(content, regex_etymology) | ||
local check_init = 1 | local check_init = 1 | ||
while mw.ustring.find(content, " | --[[ | ||
local match_start, _, section_check = mw.ustring.find(content, " | while mw.ustring.find(content, "===[^=]+===.+", check_init) do | ||
if mw.ustring.find(frame:preprocess(section_check), "^===[^=]+=== | local match_start, _, section_check = mw.ustring.find(content, "(===[^=]+===.+)", check_init) | ||
index.m1 | assert(section_check) | ||
if mw.ustring.find(frame:preprocess(section_check), "^===[^=]+===%s+%<strong class=\"headword\"") then | |||
index.m1 = mw.ustring.find(content, section_check, check_init, true) | |||
section.main = section_check | |||
break | break | ||
else | else | ||
check_init = match_start + | check_init = match_start + 1 | ||
end | end | ||
end | end | ||
if section.main = | ]]-- | ||
for _, ps in ipairs(pos) do | |||
index.m1 = mw.ustring.find(content, "===%s*" .. ps .. "%s*===") | |||
if index.m1 then | |||
section.main = mw.ustring.sub(content, index.m1) | |||
break | |||
end | |||
end | |||
assert(section.main, "Could not locate head template in:\n" .. content) | |||
section.pre = mw.ustring.sub(content, index.h2 + 1, math.min(index.pron1, index.e1, index.m1) - 1) | section.pre = mw.ustring.sub(content, index.h2 + 1, math.min(index.pron1 or math.huge, index.e1 or math.huge, index.m1 or math.huge) - 1) | ||
if mw.ustring.find(section.pre, "%S") == nil then section.pre = nil end | if mw.ustring.find(section.pre, "%S") == nil then section.pre = nil end | ||
-- strips of any other headings | -- strips of any other headings | ||
Line 78: | Line 110: | ||
end | end | ||
end | end | ||
section.etymology = mw.ustring.gsub(section.etymology, "{{Etymology%|(" .. language.code .. ")}}", "{{Etymology|%1|1}}") | section.etymology = mw.ustring.gsub(section.etymology, "{{Etymology%|(" .. language.code .. ")}}", "{{Etymology|%1|1}}") .. "\n" | ||
else | else | ||
section.etymology = "=== {{Etymology|" .. language.code .. "|1}} ===" | section.etymology = "=== {{Etymology|" .. language.code .. "|1}} ===\n\n" | ||
end | end | ||
Line 87: | Line 119: | ||
section.main = mw.ustring.gsub(section.main, "===", "====") | section.main = mw.ustring.gsub(section.main, "===", "====") | ||
return section.heading .. ("\n" .. (section.pronunciation or "")) .. "\n" .. section.etymology .. (section.pre or "") .. section.main .. "\n\n=== {{Etymology|" .. language.code .. "|2}} ===" | |||
end | end |
Latest revision as of 19:15, 12 August 2024
Underlies {{add etymology}}
.
local export = {}
local getArgs = require("Module:Arguments").getArgs
local m_languages = require("Module:languages")
local pos = {
"Adjective",
"Adverb",
"Circumfix",
"Conjunction",
"Contraction",
"Infix",
"Interjection",
"Noun",
"Numeral",
"Participle",
"Particle",
"Phrase",
"Prefix",
"Preposition",
"Pronoun",
"Proper noun",
"Suffix",
"Verb"
}
function export.format(frame)
local args = getArgs(frame)
local current = (args["entry"] and mw.title.makeTitle("", args["entry"])) or mw.title.getCurrentTitle()
local language = m_languages.get_by_code(args[1])
local content = args["content"] or current:getContent()
content = assert(mw.ustring.match(content, "(==%s*" .. language.name .. "%s*==.+)%s==[^=]+==%s") or mw.ustring.match(content, "==%s*" .. language.name .. "%s*==.+"), "No " .. language.name .. " section found in:\n" .. content)
local etymology_i = 1
for etymology_heading in mw.ustring.gmatch(content, "===%s*Etymology%s?[0-9]*%s*===") do -- replaces un-templated etymology headings
if etymology_i == 1 then
content = mw.ustring.gsub(content, etymology_heading, "=== {{Etymology|" .. language.code .. "}} ===")
elseif etymology_i == 2 then
content = mw.ustring.gsub(content, "{{Etymology%|(" .. language.code .. ")}}", "{{Etymology|%1|1}}")
end
content = mw.ustring.gsub(content, etymology_heading, "=== {{Etymology|" .. language.code .. "|" .. etymology_i .. "}} ===")
etymology_i = etymology_i + 1
end
-- recounts etymology headings
for etymology_n in mw.ustring.gmatch(content, "===%s*{{Etymology%|" .. language.code .. "%|([0-9]+)}}%s*===") do
etymology_i = tonumber(etymology_n) + 1
end
if args[2] == "format" then --if just wanting to format
return content
elseif etymology_i > 2 then --if already set up for multiple etymologies
return content .. "\n\n=== {{Etymology|" .. language.code .. "|" .. etymology_i .. "}} ==="
end
-- reformats single-etymology pages
local regex_pronunciation = "(===%s*Pronunciation%s*===.+)%s===[^=]+===%s"
local regex_etymology = "(===%s*{{Etymology%|" .. language.code .. "}}%s*===.+)%s===[^=]+===%s"
local section, index = {}, {}
index.h1, index.h2, section.heading = mw.ustring.find(content, "(==%s*" .. language.name .. "%s*==%s)")
index.pron1, index.pron2, section.pronunciation = mw.ustring.find(content, regex_pronunciation)
index.e1, index.e2, section.etymology = mw.ustring.find(content, regex_etymology)
local check_init = 1
--[[
while mw.ustring.find(content, "===[^=]+===.+", check_init) do
local match_start, _, section_check = mw.ustring.find(content, "(===[^=]+===.+)", check_init)
assert(section_check)
if mw.ustring.find(frame:preprocess(section_check), "^===[^=]+===%s+%<strong class=\"headword\"") then
index.m1 = mw.ustring.find(content, section_check, check_init, true)
section.main = section_check
break
else
check_init = match_start + 1
end
end
]]--
for _, ps in ipairs(pos) do
index.m1 = mw.ustring.find(content, "===%s*" .. ps .. "%s*===")
if index.m1 then
section.main = mw.ustring.sub(content, index.m1)
break
end
end
assert(section.main, "Could not locate head template in:\n" .. content)
section.pre = mw.ustring.sub(content, index.h2 + 1, math.min(index.pron1 or math.huge, index.e1 or math.huge, index.m1 or math.huge) - 1)
if mw.ustring.find(section.pre, "%S") == nil then section.pre = nil end
-- strips of any other headings
while section.pronunciation do
if mw.ustring.match(section.pronunciation, regex_pronunciation) == nil then
break
else
section.pronunciation = mw.ustring.match(section.pronunciation, regex_pronunciation)
end
end
if section.etymology then
while true do
if mw.ustring.match(section.etymology, regex_etymology) == nil then
break
else
section.etymology = mw.ustring.match(section.etymology, regex_etymology)
end
end
section.etymology = mw.ustring.gsub(section.etymology, "{{Etymology%|(" .. language.code .. ")}}", "{{Etymology|%1|1}}") .. "\n"
else
section.etymology = "=== {{Etymology|" .. language.code .. "|1}} ===\n\n"
end
-- bump down heading levels
if section.pre then section.pre = mw.ustring.gsub(section.pre, "===", "====") end
section.main = mw.ustring.gsub(section.main, "===", "====")
return section.heading .. ("\n" .. (section.pronunciation or "")) .. "\n" .. section.etymology .. (section.pre or "") .. section.main .. "\n\n=== {{Etymology|" .. language.code .. "|2}} ==="
end
return export
--[[
Debug console test string:
=p.format(mw.getCurrentFrame():newChild{title="whatever",args={"rad", ["entry"] = "ar"}})
]]