10,735
edits
No edit summary |
TheNightAvl (talk | contribs) mNo edit summary Tag: Reverted |
||
Line 1: | Line 1: | ||
local export = {} | local export = {} | ||
local getArgs = require('Module:Arguments').getArgs | |||
local | -- DATA -- | ||
local | |||
local | local grapheme = { | ||
[ | ["a"] = true, ["á"] = true, ["à"] = true, ["â"] = true, ["ả"] = true, ["ai"] = true, ["âi"] = true, ["ao"] = true, ["ào"] = true, ["aoi"] = true, ["au"] = true, ["b"] = true, ["c"] = true, ["d"] = true, ["ð"] = true, ["dx"] = true, ["dz"] = true, ["e"] = true, ["é"] = true, ["è"] = true, ["ea"] = true, ["ei"] = true, ["f"] = true, ["g"] = true, ["h"] = true, ["ħ"] = true, ["i"] = true, ["í"] = true, ["ỉ"] = true, ["ie"] = true, ["ìe"] = true, ["ỉe"] = true, ["iea"] = true, ["j"] = true, ["ĵ"] = true, ["k"] = true, ["ķ"] = true, ["l"] = true, ["m"] = true, ["n"] = true, ["ņ"] = true, ["o"] = true, ["ó"] = true, ["ò"] = true, ["ø"] = true, ["oa"] = true, ["øa"] = true, ["øi"] = true, ["p"] = true, ["q"] = true, ["r"] = true, ["s"] = true, ["ș"] = true, ["t"] = true, ["ts"] = true, ["tș"] = true, ["u"] = true, ["ú"] = true, ["ù"] = true, ["û"] = true, ["ū"] = true, ["ủ"] = true, ["ui"] = true, ["uo"] = true, ["ủo"] = true, ["uoa"] = true, ["v"] = true, ["w"] = true, ["x"] = true, ["y"] = true, ["ỳ"] = true, ["ý"] = true, ["ỷ"] = true, ["ye"] = true, ["ỷe"] = true, ["yea"] = true, ["z"] = true, ["þ"] = true, | ||
[ | |||
[" | ["gj"] = true, ["lj"] = true, | ||
} | |||
local vowel = { | |||
["a"] = true, ["á"] = true, ["à"] = true, ["â"] = true, ["ả"] = true, ["ai"] = true, ["âi"] = true, ["ao"] = true, ["ào"] = true, ["aoi"] = true, ["au"] = true, ["e"] = true, ["é"] = true, ["è"] = true, ["ea"] = true, ["ei"] = true, ["i"] = true, ["í"] = true, ["ỉ"] = true, ["ie"] = true, ["ìe"] = true, ["ỉe"] = true, ["iea"] = true, ["o"] = true, ["ó"] = true, ["ò"] = true, ["ø"] = true, ["oa"] = true, ["øa"] = true, ["øi"] = true, ["u"] = true, ["ú"] = true, ["ù"] = true, ["û"] = true, ["ū"] = true, ["ủ"] = true, ["ui"] = true, ["uo"] = true, ["ủo"] = true, ["uoa"] = true, ["y"] = true, ["ỳ"] = true, ["ý"] = true, ["ỷ"] = true, ["ye"] = true, ["ỷe"] = true, ["yea"] = true, | |||
} | |||
local consonant = { | |||
["b"] = true, ["c"] = true, ["d"] = true, ["ð"] = true, ["dx"] = true, ["dz"] = true, ["f"] = true, ["g"] = true, ["h"] = true, ["ħ"] = true, ["j"] = true, ["ĵ"] = true, ["k"] = true, ["ķ"] = true, ["l"] = true, ["m"] = true, ["n"] = true, ["ņ"] = true, ["p"] = true, ["q"] = true, ["r"] = true, ["s"] = true, ["ș"] = true, ["t"] = true, ["ts"] = true, ["tș"] = true, ["v"] = true, ["w"] = true, ["x"] = true, ["z"] = true, ["þ"] = true, | |||
["gj"] = true, ["lj"] = true, | |||
} | |||
local affix = { | |||
["·"] = true, ["-"] = true, ["’"] = true, ["‘"] = true, | |||
} | |||
local nasal = { | |||
["m"] = true, ["n"] = true, ["ņ"] = true, | |||
} | |||
local glide = { | |||
["j"] = true, ["ĵ"] = true, ["w"] = true, | |||
["gj"] = true, ["lj"] = true, | |||
} | |||
local Cv_fixed = { -- <Cv> combinations that uniformly pronounced without /v/ -- | |||
["b"] = true, | |||
["f"] = true, | |||
["p"] = true, | |||
} | |||
local Cr = { -- consonants that can precede /r/ in a medial onset -- | |||
["p"] = true, ["k"] = true, ["b"] = true, ["g"] = true, | |||
["ð"] = true, | |||
["f"] = true, ["ħ"] = true, ["h"] = true, | |||
["v"] = true, | |||
["þ"] = true, | |||
["t"] = true, ["d"] = true, | |||
} | |||
local Cl = { -- consonants that can precede /l/ in a medial onset -- | |||
["p"] = true, ["k"] = true, ["b"] = true, ["g"] = true, | |||
["ð"] = true, | |||
["f"] = true, ["ħ"] = true, ["h"] = true, | |||
["v"] = true, | |||
["þ"] = true, | |||
["s"] = true, ["ș"] = true, ["z"] = true, ["x"] = true, | |||
} | |||
local Cv = { -- consonants that can precede /v/ in a medial onset -- | |||
["d"] = true, ["ð"] = true, ["dz"] = true, | |||
["dʒ"] = true, ["g"] = true, ["ħ"] = true, ["h"] = true, | |||
["k"] = true, ["l"] = true, ["r"] = true, ["s"] = true, ["ș"] = true, | |||
["t"] = true, ["c"] = true, ["ķ"] = true, ["ts"] = true, ["tș"] = true, | |||
["z"] = true, ["x"] = true, ["þ"] = true, ["m"] = true, ["n"] = true, | |||
} | |||
local CN = { -- consonants that can precede nasals in a medial onset -- (non-glide continuants) | |||
["ð"] = true, | |||
["f"] = true, ["ħ"] = true, ["h"] = true, | |||
["l"] = true, ["r"] = true, ["s"] = true, ["ș"] = true, | |||
["v"] = true, | |||
["z"] = true, ["x"] = true, ["þ"] = true, | |||
} | |||
local irregular = { | |||
["eurú"] = {"eu", "r", "ú"}, | |||
["eurov"] = {"eu", "r", "o", "v"}, | |||
} | } | ||
function | local irregular_grapheme = { "eu", } | ||
local | local irregular_vowel = { "eu", } | ||
function graphemise(word) | |||
mw.log("————— GRAPHEMISING —————") | |||
local graphemes = {} | |||
while mw.ustring.len(word) > 0 do | |||
local limit = mw.ustring.len(word) | |||
for i = 1, limit do | |||
local orig_string = mw.ustring.sub(word, i) | |||
local check_string = mw.ustring.lower(orig_string) | |||
if irregular[check_string] then | |||
mw.log("Irregular spelling recognised: " .. orig_string) | |||
local capitals = {} | |||
-- get capital data -- | |||
for j = 1, #orig_string do | |||
if mw.ustring.sub(orig_string, j, j) == mw.ustring.upper(mw.ustring.sub(check_string, j, j)) then | |||
capitals[j] = true | |||
end | |||
end | |||
-- ammend irregular data to match capitals -- | |||
local index = 1 | |||
for j = 1, #irregular[check_string] do | |||
local new_data = "" | |||
for k = 1, #irregular[check_string][j] do | |||
local letter = mw.ustring.sub(irregular[check_string][j], k, k) | |||
if capitals[index] then | |||
letter = mw.ustring.upper(letter) | |||
end | |||
new_data = new_data .. letter | |||
index = index + 1 | |||
end | |||
irregular[check_string][j] = new_data | |||
end | |||
for j = 0, #irregular[check_string] - 1 do | |||
table.insert(graphemes, 1, irregular[check_string][#irregular[check_string] - j]) | |||
mw.log("<" .. irregular[check_string][#irregular[check_string] - j] .. "> logged.") | |||
end | |||
word = mw.ustring.sub(word, 1, i - 1) | |||
break | |||
elseif grapheme[check_string] then | |||
table.insert(graphemes, 1, orig_string) | |||
mw.log("<" .. orig_string .. "> logged.") | |||
word = mw.ustring.sub(word, 1, i - 1) | |||
break | |||
elseif mw.ustring.len(check_string) == 1 then | |||
table.insert(graphemes, 1, orig_string) | |||
mw.log("<" .. orig_string .. "> logged.") | |||
word = mw.ustring.sub(word, 1, i - 1) | |||
break | |||
end | |||
end | |||
end | |||
mw.log("String exhausted.") | |||
mw.log("<" .. table.concat(graphemes, "><") .. ">") | |||
return graphemes | |||
end | |||
function syllabify(graphemes) | |||
mw.log("————— SYLLABIFYING —————") | |||
for _, g in ipairs(irregular_grapheme) do | |||
grapheme[g] = true | |||
end | |||
for _, g in ipairs(irregular_vowel) do | |||
vowel[g] = true | |||
end | |||
local function swap(Pos1, Pos2) | |||
mw.log(graphemes[Pos1] .. " ↔ " .. graphemes[Pos2] .. " at position " .. Pos1) | |||
local temp = graphemes[Pos1] | |||
graphemes[Pos1] = graphemes[Pos2] | |||
graphemes[Pos2] = temp | |||
end | |||
local i = 1 | local i = 1 | ||
while | while true do -- add σ before each vowel | ||
if | |||
if graphemes[i] == nil then break end | |||
if vowel[mw.ustring.lower(graphemes[i])] then | |||
table.insert(graphemes, i, "σ") | |||
mw.log("σ inserted in position " .. i) | |||
i = i + 2 | |||
else | else | ||
if | i = i + 1 | ||
end | |||
end | |||
i = #graphemes | |||
while true do | |||
local g_current = graphemes[i] | |||
local g_after = graphemes[i+1] | |||
local g_prev = graphemes[i-1] | |||
local g_prev2 = graphemes[i-2] | |||
local g_prev3 = graphemes[i-3] | |||
if g_current then | |||
g_current = mw.ustring.lower(g_current) | |||
end | |||
if g_after then | |||
g_after = mw.ustring.lower(g_after) | |||
end | |||
if g_prev then | |||
g_prev = mw.ustring.lower(g_prev) | |||
end | |||
if g_prev2 then | |||
g_prev2 = mw.ustring.lower(g_prev2) | |||
end | |||
if g_prev3 then | |||
g_prev3 = mw.ustring.lower(g_prev3) | |||
end | |||
-- J ↔ σ | |||
if glide[g_prev] and g_current == "σ" and vowel[g_after] then | |||
swap(i, i-1) | |||
-- (J · ) ↔ σ | |||
elseif glide[g_prev2] and affix[g_prev] and g_current == "σ" and vowel[g_after] then | |||
swap(i, i-1) | |||
swap(i-1, i-2) | |||
-- C ↔ σ (J) | |||
elseif consonant[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) then | |||
swap(i, i-1) | |||
elseif consonant[g_prev2] and affix[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) then | |||
swap(i, i-1) | |||
swap(i-1, i-2) | |||
-- C Cᵥ ↔ σ v | |||
elseif consonant[g_prev2] and Cv[g_prev] and g_current == "σ" and g_after == "v" then | |||
swap(i, i-1) | |||
elseif consonant[g_prev3] and Cv[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then | |||
swap(i, i-1) | |||
swap(i-1, i-2) | |||
-- Bv ↔ σ v | |||
elseif Cv_fixed[g_prev] and g_current == "σ" and g_after == "v" then | |||
swap(i, i-1) | |||
elseif Cv_fixed[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then | |||
swap(i, i-1) | |||
swap(i-1, i-2) | |||
-- C Cₙ ↔ σ N | |||
elseif consonant[g_prev2] and CN[g_prev] and g_current == "σ" and nasal[g_after] then | |||
swap(i, i-1) | |||
elseif consonant[g_prev3] and CN[g_prev2] and affix[g_prev] and g_current == "σ" and nasal[g_after] then | |||
swap(i, i-1) | |||
swap(i-1, i-2) | |||
-- C Cᵣ ↔ σ r | |||
elseif consonant[g_prev2] and Cr[g_prev] and g_current == "σ" and g_after == "r" then | |||
swap(i, i-1) | |||
elseif consonant[g_prev3] and Cr[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "r" then | |||
swap(i, i-1) | |||
swap(i-1, i-2) | |||
-- C Cₗ ↔ σ l | |||
elseif consonant[g_prev2] and Cl[g_prev] and g_current == "σ" and g_after == "l" then | |||
swap(i, i-1) | |||
elseif consonant[g_prev3] and Cl[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "l" then | |||
swap(i, i-1) | |||
swap(i-1, i-2) | |||
elseif g_current == "σ" then | |||
if i == 1 then | |||
table.remove(graphemes, 1) | |||
else | |||
for j = 1, i-1 do | |||
if vowel[mw.ustring.lower(graphemes[i - j])] or graphemes[i - j] == "σ" then | |||
break | |||
elseif i - j == 1 then | |||
table.remove(graphemes, i) | |||
break | |||
elseif not affix[graphemes[i-j]] and not consonant[mw.ustring.lower(graphemes[i-j])] then | |||
table.remove(graphemes, i) | |||
break | |||
end | |||
end | |||
end | |||
end | |||
if i > 1 then | |||
i = i - 1 | |||
else break | |||
end | end | ||
end | end | ||
return | |||
return graphemes | |||
end | end | ||
function export. | |||
local args = | function export.generate(frame) | ||
local args = getArgs(frame) | |||
if args[1] == nil then | |||
error("Word needed") | |||
end | |||
local outputSyllables = args[1] | |||
outputSyllables = graphemise(outputSyllables) | |||
outputSyllables = syllabify(outputSyllables) | |||
local divider = args[2] | |||
if divider == nil then | |||
divider = "|" | |||
end | |||
outputSyllables = table.concat(outputSyllables) | |||
outputSyllables = mw.ustring.gsub(outputSyllables, "(σ)", divider) | |||
return outputSyllables | |||
end | end | ||
function export. | function export.generate_array(frame) | ||
local args = | local args = getArgs(frame) | ||
if args[1] == nil then | |||
error("Word needed") | |||
end | |||
local outputSyllables = args[1] | |||
outputSyllables = graphemise(outputSyllables) | |||
outputSyllables = syllabify(outputSyllables) | |||
return outputSyllables | |||
end | end | ||
return export | return export | ||
--[[ | |||
Debug console test string: | |||
=p.generate(mw.getCurrentFrame():newChild{title="whatever",args={"rjaovs"}}) | |||
]] |