Module:add etymology: Difference between revisions

From Laenkea
Jump to navigation Jump to search
No edit summary
No edit summary
 
(22 intermediate revisions by the same user not shown)
Line 4: Line 4:


local m_languages = require("Module:languages")
local m_languages = require("Module:languages")
local pos = {
"Adjective",
"Adverb",
"Circumfix",
"Conjunction",
"Contraction",
"Infix",
"Interjection",
"Noun",
"Numeral",
"Participle",
"Particle",
"Phrase",
"Prefix",
"Preposition",
"Pronoun",
"Proper noun",
"Suffix",
"Verb"
}


function export.format(frame)
function export.format(frame)
Line 10: Line 31:
local language = m_languages.get_by_code(args[1])
local language = m_languages.get_by_code(args[1])
local content = current:getContent()
local content = args["content"] or current:getContent()
content = mw.ustring.match(content, "(==%s*" .. language.name .. "%s*==.+)\n==[^=]+==\n") or mw.ustring.match(content, "==%s*" .. language.name .. "%s*==.+") or error("No section found")
content = assert(mw.ustring.match(content, "(==%s*" .. language.name .. "%s*==.+)%s==[^=]+==%s") or mw.ustring.match(content, "==%s*" .. language.name .. "%s*==.+"), "No " .. language.name .. " section found in:\n" .. content)


local etymology_i = 1
local etymology_i = 1
Line 38: Line 59:
-- reformats single-etymology pages
-- reformats single-etymology pages
local regex_pronunciation = "(===%s*Pronunciation%s*===.+)\n===[^=]+===\n"
local regex_pronunciation = "(===%s*Pronunciation%s*===.+)%s===[^=]+===%s"
local regex_etymology = "(===%s*{{Etymology%|" .. language.code .. "}}%s*===.+)\n===[^=]+===\n"
local regex_etymology = "(===%s*{{Etymology%|" .. language.code .. "}}%s*===.+)%s===[^=]+===%s"
local section, index = {}, {}
local section, index = {}, {}
index.h1, index.h2, section.heading = mw.ustring.find(content, "(==%s*" .. language.name .. "%s*==\n)")
index.h1, index.h2, section.heading = mw.ustring.find(content, "(==%s*" .. language.name .. "%s*==%s)")
index.pron1, index.pron2, section.pronunciation = mw.ustring.find(content, regex_pronunciation)
index.pron1, index.pron2, section.pronunciation = mw.ustring.find(content, regex_pronunciation)
index.e1, index.e2, section.etymology = mw.ustring.find(content, regex_etymology)
index.e1, index.e2, section.etymology = mw.ustring.find(content, regex_etymology)
local check_init = 1
local check_init = 1
while mw.ustring.find(content, "\n===[^=]+===\n.+", check_init) do
--[[
local match_start, _, section_check = mw.ustring.find(content, "\n(===[^=]+===\n.+)", check_init)
while mw.ustring.find(content, "===[^=]+===.+", check_init) do
if mw.ustring.find(frame:preprocess(section_check), "^===[^=]+===\n+<strong class=\"headword\"") then
local match_start, _, section_check = mw.ustring.find(content, "(===[^=]+===.+)", check_init)
index.m1, section.main = mw.ustring.find(content, section_check, check_init, true), section_check
assert(section_check)
if mw.ustring.find(frame:preprocess(section_check), "^===[^=]+===%s+%<strong class=\"headword\"") then
index.m1 = mw.ustring.find(content, section_check, check_init, true)
section.main = section_check
break
break
else
else
check_init = match_start + 2
check_init = match_start + 1
end
end
end
end
if section.main == nil then error("Could not locate head template") end
]]--
for _, ps in ipairs(pos) do
index.m1 = mw.ustring.find(content, "===%s*" .. ps .. "%s*===")
if index.m1 then
section.main = mw.ustring.sub(content, index.m1)
break
end
end
assert(section.main, "Could not locate head template in:\n" .. content)
section.pre = mw.ustring.sub(content, index.h2 + 1, math.min(index.pron1, index.e1, index.m1) - 1)
section.pre = mw.ustring.sub(content, index.h2 + 1, math.min(index.pron1 or math.huge, index.e1 or math.huge, index.m1 or math.huge) - 1)
if mw.ustring.find(section.pre, "%S") == nil then section.pre = nil end
if mw.ustring.find(section.pre, "%S") == nil then section.pre = nil end
-- strips of any other headings
-- strips of any other headings
Line 78: Line 110:
end
end
end
end
section.etymology = mw.ustring.gsub(section.etymology, "{{Etymology%|(" .. language.code .. ")}}", "{{Etymology|%1|1}}")
section.etymology = mw.ustring.gsub(section.etymology, "{{Etymology%|(" .. language.code .. ")}}", "{{Etymology|%1|1}}") .. "\n"
else
else
section.etymology = "=== {{Etymology|" .. language.code .. "|1}} ==="
section.etymology = "=== {{Etymology|" .. language.code .. "|1}} ===\n\n"
end
end
Line 87: Line 119:
section.main = mw.ustring.gsub(section.main, "===", "====")
section.main = mw.ustring.gsub(section.main, "===", "====")
mw.log("••••••••••••••• PRE-PROCESSED")
return section.heading .. ("\n" .. (section.pronunciation or "")) .. "\n" .. section.etymology .. (section.pre or "") .. section.main .. "\n\n=== {{Etymology|" .. language.code .. "|2}} ==="
mw.log(content)
mw.log("\n••••••••••••••• RESULT")
mw.log("•••••••••••••••••••• HEADING")
mw.log(section.heading)
mw.log("•••••••••••••••••••• PRONUNCIATION")
mw.log(section.pronunciation)
mw.log("•••••••••••••••••••• ETYMOLOGY")
mw.log(section.etymology)
mw.log("•••••••••••••••••••• PRE")
mw.log(section.pre)
mw.log("•••••••••••••••••••• MAIN")
mw.log(section.main)
mw.log("••••••••••••••••••••\n=== {{Etymology|" .. language.code .. "|2}} ===")
return section.heading .. (section.pronunciation or "") .. section.etymology .. (section.pre or "") .. section.main .. "\n\n=== {{Etymology|" .. language.code .. "|2}} ==="


end
end

Latest revision as of 19:15, 12 August 2024

Underlies {{add etymology}}.


local export = {}

local getArgs = require("Module:Arguments").getArgs

local m_languages = require("Module:languages")

local pos = {
	"Adjective",
	"Adverb",
	"Circumfix",
	"Conjunction",
	"Contraction",
	"Infix",
	"Interjection",
	"Noun",
	"Numeral",
	"Participle",
	"Particle",
	"Phrase",
	"Prefix",
	"Preposition",
	"Pronoun",
	"Proper noun",
	"Suffix",
	"Verb"
}

function export.format(frame)
	local args = getArgs(frame)
	local current = (args["entry"] and mw.title.makeTitle("", args["entry"])) or mw.title.getCurrentTitle()
	local language = m_languages.get_by_code(args[1])
	
	local content = args["content"] or current:getContent()
	
	content = assert(mw.ustring.match(content, "(==%s*" .. language.name .. "%s*==.+)%s==[^=]+==%s") or mw.ustring.match(content, "==%s*" .. language.name .. "%s*==.+"), "No " .. language.name .. " section found in:\n" .. content)

	local etymology_i = 1
	for etymology_heading in mw.ustring.gmatch(content, "===%s*Etymology%s?[0-9]*%s*===") do -- replaces un-templated etymology headings
		if etymology_i == 1 then
			content = mw.ustring.gsub(content, etymology_heading, "=== {{Etymology|" .. language.code .. "}} ===")
		elseif etymology_i == 2 then
			content = mw.ustring.gsub(content, "{{Etymology%|(" .. language.code .. ")}}", "{{Etymology|%1|1}}")
		end
		content = mw.ustring.gsub(content, etymology_heading, "=== {{Etymology|" .. language.code .. "|" .. etymology_i .. "}} ===")
		etymology_i = etymology_i + 1
	end

	-- recounts etymology headings
	for etymology_n in mw.ustring.gmatch(content, "===%s*{{Etymology%|" .. language.code .. "%|([0-9]+)}}%s*===") do
		etymology_i = tonumber(etymology_n) + 1
	end
	
	if args[2] == "format" then --if just wanting to format
		return content
	elseif etymology_i > 2 then --if already set up for multiple etymologies
		return content .. "\n\n=== {{Etymology|" .. language.code .. "|" .. etymology_i .. "}} ==="
	end
	
	-- reformats single-etymology pages
	
	local regex_pronunciation = "(===%s*Pronunciation%s*===.+)%s===[^=]+===%s"
	local regex_etymology = "(===%s*{{Etymology%|" .. language.code .. "}}%s*===.+)%s===[^=]+===%s"
	
	local section, index = {}, {}
	
	index.h1, index.h2, section.heading = mw.ustring.find(content, "(==%s*" .. language.name .. "%s*==%s)")
	index.pron1, index.pron2, section.pronunciation = mw.ustring.find(content, regex_pronunciation)
	index.e1, index.e2, section.etymology = mw.ustring.find(content, regex_etymology)
	local check_init = 1
	--[[
	while mw.ustring.find(content, "===[^=]+===.+", check_init) do
		local match_start, _, section_check = mw.ustring.find(content, "(===[^=]+===.+)", check_init)
		assert(section_check)
		if mw.ustring.find(frame:preprocess(section_check), "^===[^=]+===%s+%<strong class=\"headword\"") then
			index.m1 = mw.ustring.find(content, section_check, check_init, true)
			section.main = section_check
			break
		else
			check_init = match_start + 1
		end
	end
	]]--
	for _, ps in ipairs(pos) do
		index.m1 = mw.ustring.find(content, "===%s*" .. ps .. "%s*===")
		if index.m1 then
			section.main = mw.ustring.sub(content, index.m1)
			break
		end
	end
	assert(section.main, "Could not locate head template in:\n" .. content)
	
	
	section.pre = mw.ustring.sub(content, index.h2 + 1, math.min(index.pron1 or math.huge, index.e1 or math.huge, index.m1 or math.huge) - 1)
	if mw.ustring.find(section.pre, "%S") == nil then section.pre = nil end
	-- strips of any other headings
	while section.pronunciation do
		if mw.ustring.match(section.pronunciation, regex_pronunciation) == nil then
			break
		else
			section.pronunciation = mw.ustring.match(section.pronunciation, regex_pronunciation)
		end
	end
	
	if section.etymology then
		while true do
			if mw.ustring.match(section.etymology, regex_etymology) == nil then
				break
			else
				section.etymology = mw.ustring.match(section.etymology, regex_etymology)
			end
		end
		section.etymology = mw.ustring.gsub(section.etymology, "{{Etymology%|(" .. language.code .. ")}}", "{{Etymology|%1|1}}") .. "\n"
	else
		section.etymology = "=== {{Etymology|" .. language.code .. "|1}} ===\n\n"
	end
	
	-- bump down heading levels
	if section.pre then section.pre = mw.ustring.gsub(section.pre, "===", "====") end
	section.main = mw.ustring.gsub(section.main, "===", "====")
	
	return section.heading .. ("\n" .. (section.pronunciation or "")) .. "\n" .. section.etymology .. (section.pre or "") .. section.main .. "\n\n=== {{Etymology|" .. language.code .. "|2}} ==="

end

return export

--[[
Debug console test string:
=p.format(mw.getCurrentFrame():newChild{title="whatever",args={"rad", ["entry"] = "ar"}})
]]