Module:add etymology

Revision as of 17:57, 12 August 2024 by TheNightAvl (talk | contribs)

Underlies {{add etymology}}.


local export = {}

local getArgs = require("Module:Arguments").getArgs

local m_languages = require("Module:languages")

local pos = {
	"Adjective",
	"Adverb",
	"Circumfix",
	"Conjunction",
	"Contraction",
	"Infix",
	"Interjection",
	"Noun",
	"Numeral",
	"Participle",
	"Particle",
	"Phrase",
	"Prefix",
	"Preposition",
	"Pronoun",
	"Proper noun",
	"Suffix",
	"Verb"
}

function export.format(frame)
	local args = getArgs(frame)
	local current = (args["entry"] and mw.title.makeTitle("", args["entry"])) or mw.title.getCurrentTitle()
	local language = m_languages.get_by_code(args[1])
	
	local content = args["content"] or current:getContent()
	
	content = assert(mw.ustring.match(content, "(==%s*" .. language.name .. "%s*==.+)%s==[^=]+==%s") or mw.ustring.match(content, "==%s*" .. language.name .. "%s*==.+"), "No " .. language.name .. " section found in:\n" .. content)

	local etymology_i = 1
	for etymology_heading in mw.ustring.gmatch(content, "===%s*Etymology%s?[0-9]*%s*===") do -- replaces un-templated etymology headings
		if etymology_i == 1 then
			content = mw.ustring.gsub(content, etymology_heading, "=== {{Etymology|" .. language.code .. "}} ===")
		elseif etymology_i == 2 then
			content = mw.ustring.gsub(content, "{{Etymology%|(" .. language.code .. ")}}", "{{Etymology|%1|1}}")
		end
		content = mw.ustring.gsub(content, etymology_heading, "=== {{Etymology|" .. language.code .. "|" .. etymology_i .. "}} ===")
		etymology_i = etymology_i + 1
	end

	-- recounts etymology headings
	for etymology_n in mw.ustring.gmatch(content, "===%s*{{Etymology%|" .. language.code .. "%|([0-9]+)}}%s*===") do
		etymology_i = tonumber(etymology_n) + 1
	end
	
	if args[2] == "format" then --if just wanting to format
		return content
	elseif etymology_i > 2 then --if already set up for multiple etymologies
		return content .. "\n\n=== {{Etymology|" .. language.code .. "|" .. etymology_i .. "}} ==="
	end
	
	-- reformats single-etymology pages
	
	local regex_pronunciation = "(===%s*Pronunciation%s*===.+)%s===[^=]+===%s"
	local regex_etymology = "(===%s*{{Etymology%|" .. language.code .. "}}%s*===.+)%s===[^=]+===%s"
	
	local section, index = {}, {}
	
	index.h1, index.h2, section.heading = mw.ustring.find(content, "(==%s*" .. language.name .. "%s*==%s)")
	index.pron1, index.pron2, section.pronunciation = mw.ustring.find(content, regex_pronunciation)
	index.e1, index.e2, section.etymology = mw.ustring.find(content, regex_etymology)
	local check_init = 1
	--[[
	while mw.ustring.find(content, "===[^=]+===.+", check_init) do
		local match_start, _, section_check = mw.ustring.find(content, "(===[^=]+===.+)", check_init)
		assert(section_check)
		if mw.ustring.find(frame:preprocess(section_check), "^===[^=]+===%s+%<strong class=\"headword\"") then
			index.m1 = mw.ustring.find(content, section_check, check_init, true)
			section.main = section_check
			break
		else
			check_init = match_start + 1
		end
	end
	]]--
	for _, ps in ipairs(pos) do
		index.m1 = mw.ustring.find(content, "===%s*" .. ps .. "%s*===")
		if index.m1 then
			section.main = mw.ustring.sub(content, index.m1)
			break
		end
	end
	assert(section.main, "Could not locate head template in:\n" .. content)
	
	
	section.pre = mw.ustring.sub(content, index.h2 + 1, math.min(index.pron1, index.e1, index.m1) - 1)
	if mw.ustring.find(section.pre, "%S") == nil then section.pre = nil end
	-- strips of any other headings
	while section.pronunciation do
		if mw.ustring.match(section.pronunciation, regex_pronunciation) == nil then
			break
		else
			section.pronunciation = mw.ustring.match(section.pronunciation, regex_pronunciation)
		end
	end
	
	if section.etymology then
		while true do
			if mw.ustring.match(section.etymology, regex_etymology) == nil then
				break
			else
				section.etymology = mw.ustring.match(section.etymology, regex_etymology)
			end
		end
		section.etymology = mw.ustring.gsub(section.etymology, "{{Etymology%|(" .. language.code .. ")}}", "{{Etymology|%1|1}}")
	else
		section.etymology = "=== {{Etymology|" .. language.code .. "|1}} ==="
	end
	
	-- bump down heading levels
	if section.pre then section.pre = mw.ustring.gsub(section.pre, "===", "====") end
	section.main = mw.ustring.gsub(section.main, "===", "====")
	
	return section.heading .. ("\n" .. (section.pronunciation or "")) .. "\n" .. section.etymology .. (section.pre or "") .. section.main .. "\n=== {{Etymology|" .. language.code .. "|2}} ==="

end

return export

--[[
Debug console test string:
=p.format(mw.getCurrentFrame():newChild{title="whatever",args={"rad", ["entry"] = "ar"}})
]]