Module:rad-stem: Difference between revisions

Remaining to add: HH, Ḥ and ḤḤ stems
m (TheNightAvl moved page Module:rad-infl to Module:rad-stem without leaving a redirect)
(Remaining to add: HH, Ḥ and ḤḤ stems)
Line 12: Line 12:


local V = "aáàâảeéèiíìỉoóòøuúùûūủyýỳỷ"
local V = "aáàâảeéèiíìỉoóòøuúùûūủyýỳỷ"
local C = "bcdðfghħjĵkķlmnņpqrsștvwxzþ"
local C = "bcdðfghħjĵkķlmnņpqrsștvwxzþ°"
local O_voiced = "bdðgħvxz"
local O_voiced = "bdðgħvxz"
local O_unvoiced = "cfhkķpqsștþ"
local O_unvoiced = "cfhkķpqsștþ"
Line 23: Line 23:
stem_data = {
stem_data = {
-- heavy stems --
-- heavy stems --
['ab'] = 'C', ['av'] = 'C', ['b'] = 'C', ['c'] = 'C', ['Cf'] = 'C', ['Cm'] = 'C', ['Cn'] = 'C', ['Cņ'] = 'C', ['Cv'] = 'C', ['Cþ'] = 'C', ['d'] = 'C', ['ð'] = 'C', ['dz'] = 'C', ['eb'] = 'C', ['ev'] = 'C', ['f'] = 'C', ['g'] = 'C', ['ib'] = 'C', ['íb'] = 'C', ['ir'] = 'C', ['iv'] = 'C', ['ív'] = 'C', ['j'] = 'C', ['jð'] = 'C', ['jþ'] = 'C', ['k'] = 'C', ['l'] = 'C', ['lb'] = 'C', ['m'] = 'C', ['mb'] = 'C', ['n'] = 'C', ['ņ'] = 'C', ['ob'] = 'C', ['p'] = 'C', ['r'] = 'C', ['rb'] = 'C', ['s'] = 'C', ['ș'] = 'C', ['t'] = 'C', ['ub'] = 'C', ['uv'] = 'C', ['v'] = 'C', ['x'] = 'C', ['z'] = 'C', ['þ'] = 'C', ['st'] = 'C', ['șt'] = 'C', ['zd'] = 'C', ['xd'] = 'C',
['b'] = 'C', ['c'] = 'C', ['d'] = 'C', ['ð'] = 'C', ['dz'] = 'C', ['f'] = 'C', ['g'] = 'C', ['j'] = 'C', ['k'] = 'C', ['l'] = 'C', ['m'] = 'C', ['n'] = 'C', ['ņ'] = 'C', ['p'] = 'C', ['r'] = 'C', ['s'] = 'C', ['ș'] = 'C', ['t'] = 'C',['v'] = 'C', ['x'] = 'C', ['z'] = 'C', ['þ'] = 'C',
-- heavy spirants --
-- heavy spirants --
['abH'] = 'CH', ['avH'] = 'CH', ['bH'] = 'CH', ['cH'] = 'CH', ['CmḤ'] = 'CH', ['CnḤ'] = 'CH', ['CņḤ'] = 'CH', ['CvH'] = 'CH', ['dH'] = 'CH', ['ðH'] = 'CH', ['dzH'] = 'CH', ['ebH'] = 'CH', ['evH'] = 'CH', ['fH'] = 'CH', ['gH'] = 'CH', ['ibH'] = 'CH', ['íbH'] = 'CH', ['ivH'] = 'CH', ['ívH'] = 'CH', ['jðH'] = 'CH', ['jH'] = 'CH', ['jþH'] = 'CH', ['kH'] = 'CH', ['lbH'] = 'CH', ['lḤ'] = 'CH', ['mbH'] = 'CH', ['mḤ'] = 'CH', ['nḤ'] = 'CH', ['ņḤ'] = 'CH', ['pH'] = 'CH', ['rbH'] = 'CH', ['rḤ'] = 'CH', ['sH'] = 'CH', ['șH'] = 'CH', ['tH'] = 'CH', ['vH'] = 'CH', ['xH'] = 'CH', ['zH'] = 'CH', ['þH'] = 'CH', ['stH'] = 'CH', ['ștH'] = 'CH', ['zdH'] = 'CH', ['xdH'] = 'CH',
['bH'] = 'CH', ['cH'] = 'CH', ['dH'] = 'CH', ['ðH'] = 'CH', ['dzH'] = 'CH', ['fH'] = 'CH', ['gH'] = 'CH', ['jH'] = 'CH', ['kH'] = 'CH', ['lḤ'] = 'CH', ['mḤ'] = 'CH', ['nḤ'] = 'CH', ['ņḤ'] = 'CH', ['pH'] = 'CH', ['rḤ'] = 'CH', ['sH'] = 'CH', ['șH'] = 'CH', ['tH'] = 'CH', ['vH'] = 'CH', ['xH'] = 'CH', ['zH'] = 'CH', ['þH'] = 'CH',
-- semi-light --
-- semi-light --
['aG'] = 'G', ['eG'] = 'G', ['G'] = 'G', [''] = 'G', ['ieG'] = 'G', ['iG'] = 'G', ['oG'] = 'G', ['øG'] = 'G', ['rG̣'] = 'G', ['uG'] = 'G', ['yG'] = 'G',
['a^eG'] = 'G', ['a^oG'] = 'G', ['eG'] = 'G', ['G'] = 'G', ['ieG'] = 'G', ['iG'] = 'G', ['oG'] = 'G', ['øG'] = 'G', ['uG'] = 'G', ['yG'] = 'G',
-- light --
-- light --
['a^e'] = 'V', ['a^o'] = 'V', ['ai'] = 'V', ['aijo'] = 'V', ['au'] = 'V', ['C^V'] = 'V', ['Cvo'] = 'V', ['Cvu'] = 'V', ['e'] = 'V', ['i'] = 'V', ['ie'] = 'V', ['io'] = 'V', ['Je'] = 'V', ['Ji'] = 'V', ['nj^e'] = 'V', ['o'] = 'V', ['ø'] = 'V', ['oe'] = 'V', ['ove'] = 'V', ['t^o'] = 'V', ['u'] = 'V', ['uve'] = 'V', ['Vj'] = 'V', ['y'] = 'V',
['a^e'] = 'V', ['a^o'] = 'V', ['ai'] = 'V', ['au'] = 'V', ['e'] = 'V', ['i'] = 'V', ['ie'] = 'V', ['nj^e'] = 'V', ['o'] = 'V', ['ø'] = 'V', ['oe'] = 'V', ['ove'] = 'V', ['t^o'] = 'V', ['u'] = 'V', ['uve'] = 'V', ['Vj'] = 'V', ['y'] = 'V',
-- light spirant --
-- light spirant --
['a^eH'] = 'VH', ['a^oH'] = 'VH', ['aH'] = 'VH', ['aHH'] = 'VH', ['áHH'] = 'VH', ['âHH'] = 'VH', ['aòH'] = 'VH', ['eH'] = 'VH', ['H'] = 'VH', [''] = 'VH', ['HH'] = 'VH', ['ḤḤ'] = 'VH', ['ieH'] = 'VH', ['ieHH'] = 'VH', ['iH'] = 'VH', ['iḤ'] = 'VH', ['íH'] = 'VH', ['íḤ'] = 'VH', ['iHH'] = 'VH', ['oH'] = 'VH', ['øH'] = 'VH', ['uH'] = 'VH', ['yH'] = 'VH', ['ýH'] = 'VH', ['ýḤ'] = 'VH', ['yHH'] = 'VH',
['H'] = 'VH', ['HH'] = 'VH', [''] = 'VH', ['ḤḤ'] = 'VH',
}
 
light_noun_data = {
['a^e'] = {"a", "a", "e", "an", "ast"},
['a^o'] = {"a", "a", "u", "an", "ast"},
['ai'] = {"ai", "ai"},
['áj'] = {"ájr", "áj"},
['au'] = {"au", "au"},
['e'] = {"a", "e", "i"},
['i'] = {"e", "i"},
['ie'] = {"ía", "ie"},
['o'] = {"a", "o", "u", "un", "ust"},
['ø'] = {"a", "ø", "y"},
['u'] = {"e", "u"},
['y'] = {"e", "y"},
['a^eG'] = {"ár", "ív", "á", "ád", "aúr", "ai"},
['a^oG'] = {"ár", "úv", "á", "ád", "aúr", "ai"},
['eG'] = {"ír", "í", "ea", "íd", "eúr", "í"},
['iG'] = {"ír", "í", "ea", "íd", "iúr", "í"},
['ieG'] = {"ỉr", "ỉ", "iea", "ỉd", "ieúr", "ỉ"},
['oG'] = {"úr", "ív", "oa", "úd", "oúr", "í"},
['øG'] = {"ýr", "ýj", "øa", "ýd", "øúr", "ý"},
['oG'] = {"úr", "ív", "oa", "úd", "ủr", "í"},
['øG'] = {"ýr", "ýj", "øa", "ýd", "yúr", "ý"},
['G'] = {"jr", "j", "a", "x", "úr", "j"},
}
}


Line 55: Line 81:
if mw.ustring.find(resolution, to_Match) then
if mw.ustring.find(resolution, to_Match) then
resolution = mw.ustring.gsub(resolution, to_Match, to_Replace)
resolution = mw.ustring.gsub(resolution, to_Match, to_Replace)
mw.log( "/" .. to_Match .. "/ → " .. to_Replace .. " : " .. resolution )
-- mw.log( "/" .. to_Match .. "/ → " .. to_Replace .. " : " .. resolution )
end
end
end
end
-- resolve heavy spirant stems --
resolve("(ieH%=e)$", "íe")
resolve("(yeH%=e)$", "ýe")
resolve("(aH%=e)$", "ai")
resolve("(eH%=e)$", "ei")
resolve("(øH%=e)$", "øi")
resolve("(iH%=e)$", "iè")
resolve("(yH%=e)$", "yè")
resolve("(ieH%=i%=s)", "ies")
resolve("(ieH%=i)", "eí")
resolve("([ei]H%=i)", "=í")
resolve("(íH%=i)", "ỉ")
resolve("([øy]H%=i)", "=ý")
resolve("(ýH%=i)", "ỷ")
resolve("([ouû]H%=[ei])", "ui")
resolve("(aH%=[ei])", "ai")
resolve("([" .. V .. "])H%=[ei]", "%1je")
resolve("ieH%=([aú])", "eív%1")
resolve("([" .. V .. "])H%=([aú])", "%1v%2")
-- resolve semi-light and light spirant stems
resolve("(%=ý%=s)$", "yes")
resolve("(%=ú%=s)$", "uos")
resolve("(%=ỉ%=s)$", "uos")
resolve("([" .. C .. "][" .. C .. "])%=í%=s$", "%1ies")
resolve("(%=í%=s)$", "jes")
-- resolve light stems
resolve("%=tr", "t=s")
resolve("%=njr", "nj=s")
resolve("aij%=([ae])$", "aí%1")
resolve("[uo]v%=([ae])$", "ú%1")
resolve("o%=a$", "oà")
-- resolve spirants
resolve("[ḤH]%=g", "g")
resolve("[ḤH]%=([ds])", "=%1")
resolve("Ḥ%=i", "j=i")
resolve("Ḥ%=e", "ge")
resolve("H%=i", "ș=i")
resolve("Ḥ%=([" .. V .. "])", "g%1")


-- resolve =g/=d
-- resolve =g/=d
Line 116: Line 188:
-- resolve CvC
-- resolve CvC
resolve("mv%=([" .. C .. "])","nu=%1")
resolve("([" .. C .. "])v%=([" .. C .. "])","%1u=%2")
resolve("([" .. C .. "])v%=([" .. C .. "])","%1u=%2")
resolve("([" .. C .. "])v%=e","%1u=i")
resolve("([" .. C .. "])v%=e$","%1u=i")
resolve("([" .. C .. "])v%=a$","%1u=a")
-- resolve -ûve → -úe
-- resolve -ûve → -úe
Line 137: Line 211:
resolve("([jșxķ])%=i","%1=e")
resolve("([jșxķ])%=i","%1=e")
-- resolve =
-- resolve misc.
resolve("(v%=g)","v")
resolve("(v%=g)","v")
resolve("[HḤ]", "")
resolve("(%=)","")
resolve("(%=)","")
resolve("(°)","")
return resolution
return resolution
Line 164: Line 240:
-- create stem and ending parameters --  
-- create stem and ending parameters --  
mw.log("Parameters:")
mw.log("Parameters:")
if stem_data[args[4]] == 'C' or stem_data[args[4]] or stem_data[args[5]] == 'C' or stem_data[args[5]] or stem_data[args[6]] == 'C' or stem_data[args[6]] == 'CH' then
if stem_data[args[4]] or stem_data[args[5]] or stem_data[args[6]] then
local v_index = 4 -- expected nucleus position
local v_index = 4 -- max nucleus position
while true do
while true do
if mw.ustring.find(args[v_index], "([%^%~])") then
if mw.ustring.find(args[v_index], "([%^%~])") and args[v_index + 1] ~= nil then
if mw.ustring.find(args[v_index], "([%^])") then
if mw.ustring.find(args[v_index], "([%^])") then
stem = mw.ustring.match(args[v_index], "([^%^]+)%^") or ""
stem = mw.ustring.match(args[v_index], "([^%^]+)%^") or ""
Line 188: Line 264:
break
break
else
else
if mw.ustring.find(V, mw.ustring.sub(args[v_index], -1)) then
if mw.ustring.find(V, mw.ustring.sub(args[v_index], -1)) and args[v_index + 1] ~= nil then
stem = args[v_index]
stem = args[v_index]
stem_raised = stem
stem_raised = stem
Line 194: Line 270:
break
break
elseif v_index == 3 then
elseif v_index == 3 then
error()
stem = args[3]
stem_raised = stem
stem_broken = stem
break
else
else
v_index = 3
v_index = 3
Line 207: Line 286:
end
end
stem = stem .. args[v_index + 1]
stem_broken = stem_broken .. args[v_index + 1]
stem_broken_e = stem_broken_e .. args[v_index + 1]
stem_raised = stem_raised .. args[v_index + 1]
if v_index > 3 then
if v_index > 3 then
stem = args[v_index - 1] .. stem
stem = args[v_index - 1] .. stem
Line 217: Line 292:
stem_raised = args[v_index - 1] .. stem_raised
stem_raised = args[v_index - 1] .. stem_raised
end
end
if args[v_index + 2]~= nil then
stem = stem .. args[v_index + 2]
if args[v_index + 2] ~= nil then
stem_broken = stem_broken .. args[v_index + 2]
stem_broken_e = stem_broken_e .. args[v_index + 2]
stem_raised = stem_raised .. args[v_index + 2]
ending = args[v_index + 2]
ending = args[v_index + 2]
else
stem = stem .. args[v_index + 1]
stem_broken = stem_broken .. args[v_index + 1]
stem_broken_e = stem_broken_e .. args[v_index + 1]
stem_raised = stem_raised .. args[v_index + 1]
elseif args[v_index + 1] ~= nil then
ending = args[v_index + 1]
ending = args[v_index + 1]
end
elseif stem_data[args[4]] then
elseif stem_data[args[v_index]] then
stem = args[3] .. args[4]
ending = args[v_index]
ending = args[4]
stem = nil
stem_broken = nil
stem_broken = nil
stem_broken_e = nil
stem_broken_e = nil
stem_raised = nil
stem_raised = nil
elseif stem_data[args[3]] then
else error("No valid ending detected.") end
stem = args[3]
ending = args[3]
stem_broken = nil
stem_broken_e = nil
stem_raised = nil
else
else
error("Invalid format: Please format the arguments as hv|o~u^u|þ, dv|a^u|n|þ, he|rḤ or lorál|eH")
error("Invalid format: Please format the arguments as hv|o~u^u|þ, dv|a^u|n|þ, he|rḤ or lorál|eH")
end
end
if stem_data[ending] == 'C' or stem_data[ending] == 'CH' or stem_data[ending] == 'VH' then
stem = stem .. ending
stem_broken = stem_broken .. ending
stem_broken_e = stem_broken_e .. ending
stem_raised = stem_raised .. ending
end
--
--
mw.log("Stem: " .. stem)
mw.log("Stem: " .. stem)
if stem_broken then mw.log("Broken stem: " .. stem_broken ) end
if stem_broken then mw.log("Broken stem: " .. stem_broken ) end
if stem_broken_e then mw.log("Broken stem (û): " .. stem_broken_e ) end
if stem_raised then mw.log("Raised stem: " .. stem_raised ) end
if stem_raised then mw.log("Raised stem: " .. stem_raised ) end
mw.log("Ending: " .. ending)
mw.log("Ending: " .. ending)
Line 250: Line 332:
-- generate endings from declension type --
-- generate endings from declension type --
if stem_data[ending] == 'C' or stem_data[ending] == 'CH' then -- heavy stems
-- HEAVY AND HEAVY SPIRANT --
if stem_data[ending] == 'C' or stem_data[ending] == 'CH' or stem_data[ending] == 'VH' then
if ending == "j" and not (mw.ustring.sub(stem, -2) == "oj" and mw.ustring.sub(stem, -3) ~= "aoj" and mw.ustring.sub(stem, -3) ~= "uoj") then
if ending == "j" and not (mw.ustring.sub(stem, -2) == "oj" and mw.ustring.sub(stem, -3) ~= "aoj" and mw.ustring.sub(stem, -3) ~= "uoj") then
principle_parts["nom"] = stem .. "=s"
principle_parts["nom"] = stem .. "=s"
Line 267: Line 351:
principle_parts["nompl"] = stem_broken_e .. "=e"
principle_parts["nompl"] = stem_broken_e .. "=e"
principle_parts["genpl"] = stem .. "=i"
principle_parts["genpl"] = stem .. "=i"
principle_parts["genpl_short"] = principle_parts["genpl"]
principle_parts["genpl_short"] = principle_parts["genpl"] .. "=s"
-- LIGHT --
elseif ending == 'nj^e' then
principle_parts["nom"] = stem_broken .. "nș"
principle_parts["gen"] = stem_broken_e .. "njesk"
principle_parts["dat"] = stem_broken_e .. "=" .. "nja"
principle_parts["acc"] = stem_broken .. "nșt"
principle_parts["ins"] =  stem .. "nivúr"
principle_parts["nompl"] = stem_broken .. "nxe"
principle_parts["genpl"] = stem_broken .. "nje"
principle_parts["genpl_short"] = stem_broken_e .. "nje=s"
elseif ending == 't^o' then
principle_parts["nom"] = stem_broken .. "ts"
principle_parts["gen"] = stem .. "tusk"
principle_parts["dat"] = stem .. "ta"
principle_parts["acc"] = stem_broken .. "tst"
principle_parts["ins"] =  stem_broken .. "tvúr"
principle_parts["nompl"] = stem_broken .. "tse"
principle_parts["genpl"] = stem_broken .. "tsi"
principle_parts["genpl_short"] = stem .. "tu=s"
elseif stem_data[ending] == 'V' then
principle_parts["nom"] = stem_broken_e .. "=" .. light_noun_data[ending][1]
principle_parts["gen"] = stem .. "=" .. (light_noun_data[ending][3] or light_noun_data[ending][2]) .. "sk"
principle_parts["dat"] = stem .. "=" .. (light_noun_data[ending][4] or (light_noun_data[ending][2] .. "n"))
principle_parts["acc"] = stem .. "=" .. (light_noun_data[ending][5] or (light_noun_data[ending][2] .. "st"))
principle_parts["ins"] =  stem .. "=" .. (light_noun_data[ending][6] or (light_noun_data[ending][2] .. "vúr"))
principle_parts["nompl"] = stem .. "=" .. light_noun_data[ending][2] .. "re"
principle_parts["genpl"] = stem .. "=" .. light_noun_data[ending][2] .. "ri"
principle_parts["genpl_short"] = stem .. "=" .. (light_noun_data[ending][3] or light_noun_data[ending][2]) .. "=s"
-- SEMI-LIGHT
elseif stem_data[ending] == 'G' then
principle_parts["nom"] = stem_broken .. light_noun_data[ending][1]
principle_parts["gen"] = stem_broken .. light_noun_data[ending][2]
principle_parts["dat"] = stem .. light_noun_data[ending][3]
principle_parts["acc"] = stem .. light_noun_data[ending][4]
principle_parts["ins"] =  stem .. light_noun_data[ending][5]
principle_parts["nompl"] = stem_broken .. light_noun_data[ending][6] .. "re"
principle_parts["genpl"] = stem_broken .. light_noun_data[ending][6] .. "ri"
principle_parts["genpl_short"] = stem_broken .. "=" .. light_noun_data[ending][6] .. "=s"
end
end