Module:rad-stem

Revision as of 11:51, 1 December 2023 by TheNightAvl (talk | contribs)
local export = {}
local getArgs = require('Module:Arguments').getArgs

local m_IPA = require("Module:rad-IPA")
local m_links = require("Module:links")
local m_languages = require("Module:languages")
local m_parameters = require("Module:parameters")

local V = "aáàâảeéèiíìỉoóòøuúùûūủyýỳỷ"
local C = "bcdðfghħjĵkķlmnņpqrsștvwxzþ°"
local O_voiced = "bdðgħvxz"
local O_unvoiced = "cfhkķpqsștþ"
local O = O_voiced .. O_unvoiced
local N = "mnņ"
local L = "lr"
local R = L .. N
local J = "jĵw"

stem_data = {
	-- heavy stems --
	['b'] = 'C', ['c'] = 'C', ['d'] = 'C', ['ð'] = 'C', ['dz'] = 'C', ['f'] = 'C', ['g'] = 'C', ['j'] = 'C', ['k'] = 'C', ['l'] = 'C', ['m'] = 'C', ['n'] = 'C', ['ņ'] = 'C', ['p'] = 'C', ['r'] = 'C', ['s'] = 'C', ['ș'] = 'C', ['t'] = 'C',['v'] = 'C', ['x'] = 'C', ['z'] = 'C', ['þ'] = 'C',
	
	-- heavy spirants --
	['bH'] = 'CH', ['cH'] = 'CH', ['dH'] = 'CH', ['ðH'] = 'CH', ['dzH'] = 'CH', ['fH'] = 'CH', ['gH'] = 'CH', ['jH'] = 'CH', ['kH'] = 'CH', ['lḤ'] = 'CH', ['mḤ'] = 'CH', ['nḤ'] = 'CH', ['ņḤ'] = 'CH', ['pH'] = 'CH', ['rḤ'] = 'CH', ['sH'] = 'CH', ['șH'] = 'CH', ['tH'] = 'CH', ['vH'] = 'CH', ['xH'] = 'CH', ['zH'] = 'CH', ['þH'] = 'CH',
	
	-- semi-light --
	['a^eG'] = 'G', ['a^oG'] = 'G', ['eG'] = 'G', ['G'] = 'G', ['ieG'] = 'G', ['iG'] = 'G', ['oG'] = 'G', ['øG'] = 'G', ['uG'] = 'G', ['yG'] = 'G',
	
	-- light --
	['a^e'] = 'V', ['a^o'] = 'V', ['ai'] = 'V', ['au'] = 'V', ['e'] = 'V', ['i'] = 'V', ['ie'] = 'V', ['nj^e'] = 'V', ['o'] = 'V', ['ø'] = 'V', ['oe'] = 'V', ['ove'] = 'V', ['t^o'] = 'V', ['u'] = 'V', ['uve'] = 'V', ['Vj'] = 'V', ['y'] = 'V',
	
	-- light spirant --
	['H'] = 'VH', ['HH'] = 'VH', ['Ḥ'] = 'VH', ['ḤḤ'] = 'VH',
}

light_noun_data = {
	['a^e'] = {"a", "a", "e", "an", "ast"},
	['a^o'] = {"a", "a", "u", "an", "ast"},
	['ai'] = {"ai", "ai"},
	['áj'] = {"ájr", "áj"},
	['au'] = {"au", "au"},
	['e'] = {"a", "e", "i"},
	['i'] = {"e", "i"},
	['ie'] = {"ía", "ie"},
	['o'] = {"a", "o", "u", "un", "ust"},
	['ø'] = {"a", "ø", "y"},
	['u'] = {"e", "u"},
	['y'] = {"e", "y"},
	
	['a^eG'] = {"ár", "ív", "á", "ád", "aúr", "ai"},
	['a^oG'] = {"ár", "úv", "á", "ád", "aúr", "ai"},
	['eG'] = {"ír", "í", "ea", "íd", "eúr", "í"},
	['iG'] = {"ír", "í", "ea", "íd", "iúr", "í"},
	['ieG'] = {"ỉr", "ỉ", "iea", "ỉd", "ieúr", "ỉ"},
	['oG'] = {"úr", "ív", "oa", "úd", "oúr", "í"},
	['øG'] = {"ýr", "ýj", "øa", "ýd", "øúr", "ý"},
	['oG'] = {"úr", "ív", "oa", "úd", "ủr", "í"},
	['øG'] = {"ýr", "ýj", "øa", "ýd", "yúr", "ý"},
	['G'] = {"jr", "j", "a", "x", "úr", "j"},
}

pos_data = { "noun", "verb" }

function table.contains(table, element)
  for _, value in pairs(table) do
    if value == element then
      return true
    end
  end
  return false
end

-- this function resolves all consonant clashes
function resolve_ending(resolution)
	
	local function resolve(to_Match, to_Replace)
		if mw.ustring.find(resolution, to_Match) then
			resolution = mw.ustring.gsub(resolution, to_Match, to_Replace)
			-- mw.log( "/" .. to_Match .. "/ → " .. to_Replace .. " : " .. resolution )
		end
	end
	
	-- resolve heavy spirant stems --
	
	resolve("", "")

	resolve("(ie[HḤ][HḤ]?%=e)$", "íe")
	resolve("(ye[HḤ][HḤ]?%=e)$", "ýe")
	resolve("(a[HḤ][HḤ]?%=e)$", "ai")
	resolve("(e[HḤ][HḤ]?%=e)$", "ei")
	resolve("(ø[HḤ][HḤ]?%=e)$", "øi")
	resolve("(i[HḤ][HḤ]?%=e)$", "iè")
	resolve("(y[HḤ][HḤ]?%=e)$", "yè")
	resolve("(â[HḤ][HḤ]%=e)$", "âi")
	
	resolve("(ieH%=i%=s)", "ies")
	resolve("(ieH%=i)", "eí")
	resolve("([ei][HḤ]%=i)", "=í")
	resolve("(í[HḤ]%=i)", "ỉ")
	resolve("([øy][HḤ]%=i)", "=ý")
	resolve("(ý[HḤ]%=i)", "ỷ")
	
	resolve("([ouû]H%=[ei])", "ui")
	resolve("(aH%=[ei])", "ai")
	resolve("([" .. V .. "])H%=[ei]", "%1je")
	resolve("ieH%=([aú])", "eív%1")
	resolve("([" .. V .. "])H%=([aú])", "%1v%2")
	
	-- resolve semi-light and light spirant stems
	resolve("(%=ý%=s)$", "yes")
	resolve("(%=ú%=s)$", "uos")
	resolve("(%=ỉ%=s)$", "uos")
	resolve("([" .. C .. "][" .. C .. "])%=í%=s$", "%1ies")
	resolve("(%=í%=s)$", "jes")
	
	-- resolve light stems
	resolve("%=tr", "t=s")
	resolve("%=njr", "nj=s")
	resolve("aij%=([ae])$", "aí%1")
	resolve("[uo]v%=([ae])$", "ú%1")
	resolve("o%=a$", "oà")
	
	-- resolve spirants
	resolve("ḤḤ%=g", "=k")
	resolve("ḤḤ%=d", "=t")
	resolve("ḤḤ%=s", "=s")
	resolve("ḤḤ%=a", "=à")
	resolve("ḤḤ%=úr", "úr")
	resolve("ḤḤ%=e", "=e")
	resolve("ḤḤ%=i", "=șe")
	
	resolve("HH%=g", "=ak")
	resolve("HH%=d", "=at")
	resolve("HH%=s", "=as")
	resolve("HH%=a", "=à")
	resolve("HH%=úr", "úr")
	resolve("HH%=e", "=e")
	resolve("HH%=i", "=așe")
	resolve("y%=à", "ya")
	resolve("y%=a", "øa")
	resolve("i%=à", "ia")
	resolve("i%=a", "ea")
	resolve("u%=à", "ua")
	resolve("u%=a", "oa")
	resolve("[aà]%=[aà]", "á")
	resolve("[áả]%=[aà]", "ả")
	resolve("â%=([aàáả])", "o%1")
	
	resolve("[ḤH]%=g", "g")
	resolve("[ḤH]%=([ds])", "=%1")
	resolve("Ḥ%=i", "j=i")
	resolve("Ḥ%=e", "ge")
	resolve("H%=i", "ș=i")
	resolve("Ḥ%=([" .. V .. "])", "g%1")

	-- resolve =g/=d
	resolve("([rv])%=g","%1=u")
	resolve("(j%=g)","j")
	resolve("(j%=d)","x")
	
	resolve("([pf]%=g)","f")
	resolve("([pf]%=d)","f=t")
	resolve("(g%=g)","g")
	resolve("(k%=g)","k")
	resolve("(k%=d)","t")
	
	resolve("(m%=g)","n=g")
	
	resolve("b%=([gd])","v=%1")
	
	resolve("(j[ctþ]%=g)","ș=k")
	resolve("(j[ctþ]%=d)","ș=t")
	
	resolve("j[dð]%=([gd])","x=%1")
	resolve("jdz%=([gd])","x=%1")
	
	resolve("([sș]?[șķ]%=g)","ș=k")
	resolve("([sș]?[șķ]%=d)","ș=t")
	
	resolve("[zx]?d?x%=([gd])","x=%1")
	
	resolve("(s?[cstþ]%=g)","s=k")
	resolve("(s?[cstþ]%=d)","s=t")
	
	resolve("z?[zdð]%=([gd])","z=%1")
	resolve("z?dz%=([gd])","z=%1")
	
	resolve("([" .. C .. "])[mnņ]%=([gd])", "%1=%2")
	
	-- resolve =s
	resolve("([sș])%=s","%1")
	resolve("j%=s","ș")
	
	-- resolve VvC
	resolve("([^eoø])av%=([^" .. V .. "])","%1au=%2")
	resolve("^av%=([^" .. V .. "])","au=%1")
	resolve("(àv%=)","au=")
	
	resolve("([^a])uv%=([^" .. V .. "])","%1ú=%2")
	resolve("([^au])ov%=([^" .. V .. "])","%1ú=%2")
	resolve("^[ou]v%=([^" .. V .. "])","ú=%1")
	resolve("([òù]v%=)","ú=")
	
	resolve("([^i])ev%=([^" .. V .. "])","%1ø=%2")
	resolve("^ev%=([^" .. V .. "])","ø=%1")
	resolve("(èv%=)","ø=")
	
	resolve("([^aeuøâ])iv%=([^" .. V .. "])","%1y=%2")
	resolve("^iv%=([^" .. V .. "])","y=%1")
	resolve("(ìv%=)","y=")
	
	-- resolve CvC
	resolve("mv%=([" .. C .. "])","nu=%1")
	resolve("([" .. C .. "])v%=([" .. C .. "])","%1u=%2")
	resolve("([" .. C .. "])v%=e$","%1u=i")
	resolve("([" .. C .. "])v%=a$","%1u=a")
	
	-- resolve -ûve → -úe
	resolve("ûv%=e", "ú=e")
	
	-- resolve aja, ava
	resolve("([^eoø])a[vj]%=a","%1á")
	resolve("(à[vj]%=a)","á")
	
	-- resolve f/þ~v/ð
	resolve("([" .. V .. O .. L .. J .. "])f%=([" .. V .. J .. "])","%1v=%2")
	resolve("([" .. V .. O .. L .. J .. "])þ%=([" .. V .. J .. "])","%1ð=%2")
	
	-- resolve k/g + front vowel
	resolve("k%=([ie])","c=%1")
	resolve("g%=([ie])","dz=%1")
	
	-- resolve palatal + i
	resolve("([jșxķ])%=i","%1=e")
	
	-- resolve misc.
	resolve("(v%=g)","v")
	resolve("[HḤ]", "")
	resolve("(%=)","")
	resolve("(°)","")
	
	return resolution
end

function export.getNounStem(args) -- rad-inflection|pos|principle_part| 3 | 4 | 5
	local principle_parts = {
		["nom"] = "",
		["gen"] = "",
		["dat"] = "",
		["acc"] = "",
		["ins"] = "",
		["nompl"] = "",
		["genpl"] = "",
		["genpl_short"] = "",
	}
	
	local stem = ""
	local stem_broken = ""
	local stem_broken_e = ""
	local stem_raised = ""
	local ending = ""
	
	-- create stem and ending parameters -- 
	mw.log("Parameters:")
	if stem_data[args[4]] or stem_data[args[5]] or stem_data[args[6]] then
		
		local v_index = 4		-- max nucleus position
		
		while true do
			if mw.ustring.find(args[v_index], "([%^%~])") and args[v_index + 1] ~= nil then
				if mw.ustring.find(args[v_index], "([%^])") then
					stem = mw.ustring.match(args[v_index], "([^%^]+)%^") or ""
					stem_raised = mw.ustring.match(args[v_index], "%^([^%^]+)")
				else
					stem = args[v_index]
					stem_raised = args[v_index]
				end
				if mw.ustring.find(stem, "([%~])") then
					stem_broken = mw.ustring.match(stem, "%~([^%~%^]+)")
					stem = mw.ustring.match(stem, "([^%~]+)%~")
					if stem_raised == args[v_index] then
						stem_raised = stem_broken
					end
				else
					stem_broken = stem
				end
				break
			else
				if mw.ustring.find(V, mw.ustring.sub(args[v_index], -1)) and args[v_index + 1] ~= nil then
					stem = args[v_index]
					stem_raised = stem
					stem_broken = stem
					break
				elseif v_index == 3 then
					stem = args[3]
					stem_raised = stem
					stem_broken = stem
					break
				else
					v_index = 3
				end
			end
		end
		
		if stem_broken == "u" then
			stem_broken_e = "û"
		else
			stem_broken_e = stem_broken
		end
		
		if v_index > 3 then
			stem = args[v_index - 1] .. stem
			stem_broken = args[v_index - 1] .. stem_broken
			stem_broken_e = args[v_index - 1] .. stem_broken_e
			stem_raised = args[v_index - 1] .. stem_raised
		end
				
		if args[v_index + 2] ~= nil then
			ending = args[v_index + 2]
			
			stem = stem .. args[v_index + 1]
			stem_broken = stem_broken .. args[v_index + 1]
			stem_broken_e = stem_broken_e .. args[v_index + 1]
			stem_raised = stem_raised .. args[v_index + 1]
			
		elseif args[v_index + 1] ~= nil then
			ending = args[v_index + 1]
			
		elseif stem_data[args[v_index]] then
			ending = args[v_index]
			stem = nil
			stem_broken = nil
			stem_broken_e = nil
			stem_raised = nil
		else error("No valid ending detected.") end
		
	else
		error("Invalid format: Please format the arguments as hv|o~u^u|þ, dv|a^u|n|þ, he|rḤ or lorál|eH")
	end
	
	if stem_data[ending] == 'C' or stem_data[ending] == 'CH' or stem_data[ending] == 'VH' then
				stem = stem .. ending
				stem_broken = stem_broken .. ending
				stem_broken_e = stem_broken_e .. ending
				stem_raised = stem_raised .. ending
	end
			
	--
	mw.log("Stem: " .. stem)
	if stem_broken then mw.log("Broken stem: " .. stem_broken ) end
	if stem_broken_e then mw.log("Broken stem (û): " .. stem_broken_e ) end
	if stem_raised then mw.log("Raised stem: " .. stem_raised ) end
	mw.log("Ending: " .. ending)
	--
	
	-- generate endings from declension type --
	
	-- HEAVY AND HEAVY SPIRANT --
	if stem_data[ending] == 'C' or stem_data[ending] == 'CH' or stem_data[ending] == 'VH' then
		if ending == "j" and not (mw.ustring.sub(stem, -2) == "oj" and mw.ustring.sub(stem, -3) ~= "aoj" and mw.ustring.sub(stem, -3) ~= "uoj") then
			principle_parts["nom"] = stem .. "=s"
			principle_parts["acc"] = stem .. "=d"
		else
			principle_parts["nom"] = stem_broken .. "=s"
			principle_parts["acc"] = stem_broken .. "=d"
		end
		if ending == 'v' and stem_raised == stem_broken then
			principle_parts["gen"] = stem .. "=g"
		else
			principle_parts["gen"] = stem_raised .. "=g"
		end
		principle_parts["dat"] = stem .. "=a"
		principle_parts["ins"] = stem .. "=úr"
		principle_parts["nompl"] = stem_broken_e .. "=e"
		principle_parts["genpl"] = stem .. "=i"
		principle_parts["genpl_short"] = principle_parts["genpl"] .. "=s"
		
	-- LIGHT -- 
	elseif ending == 'nj^e' then
		principle_parts["nom"] = stem_broken .. "nș"
		principle_parts["gen"] = stem_broken_e .. "njesk"
		principle_parts["dat"] = stem_broken_e .. "=" .. "nja"
		principle_parts["acc"] = stem_broken .. "nșt"
		principle_parts["ins"] =  stem .. "nivúr"
		principle_parts["nompl"] = stem_broken .. "nxe"
		principle_parts["genpl"] = stem_broken .. "nje"
		principle_parts["genpl_short"] = stem_broken_e .. "nje=s"
	elseif ending == 't^o' then
		principle_parts["nom"] = stem_broken .. "ts"
		principle_parts["gen"] = stem .. "tusk"
		principle_parts["dat"] = stem .. "ta"
		principle_parts["acc"] = stem_broken .. "tst"
		principle_parts["ins"] =  stem_broken .. "tvúr"
		principle_parts["nompl"] = stem_broken .. "tse"
		principle_parts["genpl"] = stem_broken .. "tsi"
		principle_parts["genpl_short"] = stem .. "tu=s"
	elseif stem_data[ending] == 'V' then
		principle_parts["nom"] = stem_broken_e .. "=" .. light_noun_data[ending][1]
		principle_parts["gen"] = stem .. "=" .. (light_noun_data[ending][3] or light_noun_data[ending][2]) .. "sk"
		principle_parts["dat"] = stem .. "=" .. (light_noun_data[ending][4] or (light_noun_data[ending][2] .. "n"))
		principle_parts["acc"] = stem .. "=" .. (light_noun_data[ending][5] or (light_noun_data[ending][2] .. "st"))
		principle_parts["ins"] =  stem .. "=" .. (light_noun_data[ending][6] or (light_noun_data[ending][2] .. "vúr"))
		principle_parts["nompl"] = stem .. "=" .. light_noun_data[ending][2] .. "re"
		principle_parts["genpl"] = stem .. "=" .. light_noun_data[ending][2] .. "ri"
		principle_parts["genpl_short"] = stem .. "=" .. (light_noun_data[ending][3] or light_noun_data[ending][2]) .. "=s"
		
	-- SEMI-LIGHT
	elseif stem_data[ending] == 'G' then
		principle_parts["nom"] = stem_broken .. light_noun_data[ending][1]
		principle_parts["gen"] = stem_broken .. light_noun_data[ending][2]
		principle_parts["dat"] = stem .. light_noun_data[ending][3]
		principle_parts["acc"] = stem .. light_noun_data[ending][4]
		principle_parts["ins"] =  stem .. light_noun_data[ending][5]
		principle_parts["nompl"] = stem_broken .. light_noun_data[ending][6] .. "re"
		principle_parts["genpl"] = stem_broken .. light_noun_data[ending][6] .. "ri"
		principle_parts["genpl_short"] = stem_broken .. "=" .. light_noun_data[ending][6] .. "=s"
	end
	
	mw.log("——— Resolving ———")
	for part, infl in pairs(principle_parts) do
		principle_parts[part] = resolve_ending(infl)
	end
	
	principle_parts["type"] = stem_data[ending]
	
	mw.logObject(principle_parts)
	return principle_parts
end

--[[
function export.getVerbStem(args)

end
]]--

function export.getStem(frame)
	local args = getArgs(frame)
	local word = mw.title.getCurrentTitle().subpageText
	local to_Return = ""
	
	if args[1] ~= nil and table.contains(pos_data, args[1]) then
		local pos = args[1]
	else
		local error_text = "Parameter 1 must be:"
		for i, _ in pairs(data) do
			error_text = error_text .. " [" .. i .. "]"
		end
		error(error_text)
	end
	 
	if args[1] == "noun" then
		to_Return = export.getNounStem(args)[args[2]]
	elseif args[1] == "verb" then
		to_Return = export.getVerbStem(args)[args[2]]
	end
	
	return to_Return
end

return export

--[[
Debug console test string:
=p.getStem(mw.getCurrentFrame():newChild{title="whatever",args={"noun", "nom", "dv", "a^u", "n", "þ"}})
]]--