Module:rad-syllables: Difference between revisions
Jump to navigation
Jump to search
TheNightAvl (talk | contribs) m (Added ài) Tags: Mobile edit Mobile web edit |
TheNightAvl (talk | contribs) m (Beautified) |
||
(3 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
local export = {} | local export = {} | ||
local getArgs = require( | local getArgs = require("Module:Arguments").getArgs | ||
-- DATA -- | -- DATA -- | ||
local grapheme = { | local grapheme = { | ||
["a"] = true, | |||
["á"] = true, | |||
["à"] = true, | |||
["â"] = true, | |||
["ả"] = true, | |||
["ai"] = true, | |||
["ài"] = true, | |||
["âi"] = true, | |||
["ao"] = true, | |||
["ào"] = true, | |||
["aoi"] = true, | |||
["au"] = true, | |||
["b"] = true, | |||
["c"] = true, | |||
["d"] = true, | |||
["ð"] = true, | |||
["dx"] = true, | |||
["dz"] = true, | |||
["e"] = true, | |||
["é"] = true, | |||
["è"] = true, | |||
["ea"] = true, | |||
["ei"] = true, | |||
["f"] = true, | |||
["g"] = true, | |||
["h"] = true, | |||
["ħ"] = true, | |||
["i"] = true, | |||
["ì"] = true, | |||
["í"] = true, | |||
["ỉ"] = true, | |||
["ie"] = true, | |||
["ìe"] = true, | |||
["ỉe"] = true, | |||
["iea"] = true, | |||
["j"] = true, | |||
["ĵ"] = true, | |||
["k"] = true, | |||
["ķ"] = true, | |||
["l"] = true, | |||
["m"] = true, | |||
["n"] = true, | |||
["ņ"] = true, | |||
["o"] = true, | |||
["ó"] = true, | |||
["ò"] = true, | |||
["ø"] = true, | |||
["oa"] = true, | |||
["øa"] = true, | |||
["øi"] = true, | |||
["p"] = true, | |||
["q"] = true, | |||
["r"] = true, | |||
["s"] = true, | |||
["ș"] = true, | |||
["t"] = true, | |||
["ts"] = true, | |||
["tș"] = true, | |||
["u"] = true, | |||
["ú"] = true, | |||
["ù"] = true, | |||
["û"] = true, | |||
["ū"] = true, | |||
["ủ"] = true, | |||
["ui"] = true, | |||
["uo"] = true, | |||
["ủo"] = true, | |||
["uoa"] = true, | |||
["v"] = true, | |||
["w"] = true, | |||
["x"] = true, | |||
["y"] = true, | |||
["ỳ"] = true, | |||
["ý"] = true, | |||
["ỷ"] = true, | |||
["ye"] = true, | |||
["ỷe"] = true, | |||
["yea"] = true, | |||
["z"] = true, | |||
["þ"] = true, | |||
["gj"] = true, | |||
["lj"] = true | |||
} | } | ||
local vowel = { | local vowel = { | ||
["a"] = true, | |||
["á"] = true, | |||
["à"] = true, | |||
["â"] = true, | |||
["ả"] = true, | |||
["ai"] = true, | |||
["ài"] = true, | |||
["âi"] = true, | |||
["ao"] = true, | |||
["ào"] = true, | |||
["aoi"] = true, | |||
["au"] = true, | |||
["e"] = true, | |||
["é"] = true, | |||
["è"] = true, | |||
["ea"] = true, | |||
["ei"] = true, | |||
["i"] = true, | |||
["ì"] = true, | |||
["í"] = true, | |||
["ỉ"] = true, | |||
["ie"] = true, | |||
["ìe"] = true, | |||
["ỉe"] = true, | |||
["iea"] = true, | |||
["o"] = true, | |||
["ó"] = true, | |||
["ò"] = true, | |||
["ø"] = true, | |||
["oa"] = true, | |||
["øa"] = true, | |||
["øi"] = true, | |||
["u"] = true, | |||
["ú"] = true, | |||
["ù"] = true, | |||
["û"] = true, | |||
["ū"] = true, | |||
["ủ"] = true, | |||
["ui"] = true, | |||
["uo"] = true, | |||
["ủo"] = true, | |||
["uoa"] = true, | |||
["y"] = true, | |||
["ỳ"] = true, | |||
["ý"] = true, | |||
["ỷ"] = true, | |||
["ye"] = true, | |||
["ỷe"] = true, | |||
["yea"] = true | |||
} | } | ||
local consonant = { | local consonant = { | ||
["b"] = true, | |||
["c"] = true, | |||
["d"] = true, | |||
["ð"] = true, | |||
["dx"] = true, | |||
["dz"] = true, | |||
["f"] = true, | |||
["g"] = true, | |||
["h"] = true, | |||
["ħ"] = true, | |||
["j"] = true, | |||
["ĵ"] = true, | |||
["k"] = true, | |||
["ķ"] = true, | |||
["l"] = true, | |||
["m"] = true, | |||
["n"] = true, | |||
["ņ"] = true, | |||
["p"] = true, | |||
["q"] = true, | |||
["r"] = true, | |||
["s"] = true, | |||
["ș"] = true, | |||
["t"] = true, | |||
["ts"] = true, | |||
["tș"] = true, | |||
["v"] = true, | |||
["w"] = true, | |||
["x"] = true, | |||
["z"] = true, | |||
["þ"] = true, | |||
["gj"] = true, | |||
["lj"] = true | |||
} | } | ||
local affix = { | local affix = { | ||
["·"] = true, | |||
["-"] = true, | |||
["’"] = true, | |||
["‘"] = true | |||
} | } | ||
local nasal = { | local nasal = { | ||
["m"] = true, | |||
["n"] = true, | |||
["ņ"] = true | |||
} | } | ||
local glide = { | local glide = { | ||
["j"] = true, | |||
["ĵ"] = true, | |||
["w"] = true, | |||
["gj"] = true, | |||
["lj"] = true | |||
} | |||
local sibilant = { | |||
["dz"] = true, | |||
["dʒ"] = true, | |||
["s"] = true, | |||
["ș"] = true, | |||
["c"] = true, | |||
["ķ"] = true, | |||
["ts"] = true, | |||
["tș"] = true, | |||
["z"] = true, | |||
["x"] = true | |||
} | |||
local Cs = { | |||
["d"] = true, | |||
["t"] = true, | |||
["dz"] = true, | |||
["dʒ"] = true, | |||
["s"] = true, | |||
["ș"] = true, | |||
["c"] = true, | |||
["ķ"] = true, | |||
["ts"] = true, | |||
["tș"] = true, | |||
["z"] = true, | |||
["x"] = true | |||
} | |||
local sibilant = { | |||
["s"] = true, | |||
["ș"] = true, | |||
["z"] = true, | |||
["x"] = true | |||
} | } | ||
local Cv_fixed = { -- <Cv> combinations that uniformly pronounced without /v/ -- | local Cv_fixed = { | ||
-- <Cv> combinations that uniformly pronounced without /v/ -- | |||
["b"] = true, | |||
["f"] = true, | |||
["p"] = true | |||
} | } | ||
local Cr = { -- consonants that can precede /r/ in a medial onset -- | local Cr = { | ||
-- consonants that can precede /r/ in a medial onset -- | |||
["p"] = true, | |||
["k"] = true, | |||
["b"] = true, | |||
["g"] = true, | |||
["ð"] = true, | |||
["f"] = true, | |||
["ħ"] = true, | |||
["h"] = true, | |||
["v"] = true, | |||
["þ"] = true, | |||
["t"] = true, | |||
["d"] = true | |||
} | } | ||
local Cl = { -- consonants that can precede /l/ in a medial onset -- | local Cl = { | ||
-- consonants that can precede /l/ in a medial onset -- | |||
["p"] = true, | |||
["k"] = true, | |||
["b"] = true, | |||
["g"] = true, | |||
["ð"] = true, | |||
["f"] = true, | |||
["ħ"] = true, | |||
["h"] = true, | |||
["v"] = true, | |||
["þ"] = true, | |||
["s"] = true, | |||
["ș"] = true, | |||
["z"] = true, | |||
["x"] = true | |||
} | } | ||
local Cv = { -- consonants that can precede /v/ in a medial onset -- | local Cv = { | ||
-- consonants that can precede /v/ in a medial onset -- | |||
["d"] = true, | |||
["ð"] = true, | |||
["dz"] = true, | |||
["dʒ"] = true, | |||
["g"] = true, | |||
["ħ"] = true, | |||
["h"] = true, | |||
["k"] = true, | |||
["l"] = true, | |||
["r"] = true, | |||
["s"] = true, | |||
["ș"] = true, | |||
["t"] = true, | |||
["c"] = true, | |||
["ķ"] = true, | |||
["ts"] = true, | |||
["tș"] = true, | |||
["z"] = true, | |||
["x"] = true, | |||
["þ"] = true, | |||
["m"] = true, | |||
["n"] = true | |||
} | } | ||
local CN = { -- consonants that can precede nasals in a medial onset -- (non-glide continuants) | local CN = { | ||
-- consonants that can precede nasals in a medial onset -- (non-glide continuants) | |||
["ð"] = true, | |||
["f"] = true, | |||
["ħ"] = true, | |||
["h"] = true, | |||
["l"] = true, | |||
["r"] = true, | |||
["s"] = true, | |||
["ș"] = true, | |||
["v"] = true, | |||
["z"] = true, | |||
["x"] = true, | |||
["þ"] = true | |||
} | } | ||
local irregular = { | local irregular = { | ||
["eurú"] = {"eu", "r", "ú"}, | |||
["eurov"] = {"eu", "r", "o", "v"}, | |||
["zeus"] = {"z", "eu", "s"}, | |||
["zeud"] = {"z", "eu", "d"} | |||
} | } | ||
local irregular_grapheme = { "eu" | local irregular_grapheme = {"eu"} | ||
local irregular_vowel = { "eu" | local irregular_vowel = {"eu"} | ||
function graphemise(word) | function graphemise(word) | ||
--mw.log("————— GRAPHEMISING —————") | |||
local graphemes = {} | |||
while mw.ustring.len(word) > 0 do | |||
local limit = mw.ustring.len(word) | |||
for i = 1, limit do | |||
local orig_string = mw.ustring.sub(word, i) | |||
local check_string = mw.ustring.lower(orig_string) | |||
if irregular[check_string] then | |||
--mw.log("Irregular spelling recognised: " .. orig_string) | |||
local capitals = {} | |||
-- get capital data -- | |||
for j = 1, #orig_string do | |||
if mw.ustring.sub(orig_string, j, j) == mw.ustring.upper(mw.ustring.sub(check_string, j, j)) then | |||
capitals[j] = true | |||
end | |||
end | |||
-- ammend irregular data to match capitals -- | |||
local index = 1 | |||
for j = 1, #irregular[check_string] do | |||
local new_data = "" | |||
for k = 1, #irregular[check_string][j] do | |||
local letter = mw.ustring.sub(irregular[check_string][j], k, k) | |||
if capitals[index] then | |||
letter = mw.ustring.upper(letter) | |||
end | |||
new_data = new_data .. letter | |||
index = index + 1 | |||
end | |||
irregular[check_string][j] = new_data | |||
end | |||
for j = 0, #irregular[check_string] - 1 do | |||
table.insert(graphemes, 1, irregular[check_string][#irregular[check_string] - j]) | |||
--mw.log("<" .. irregular[check_string][#irregular[check_string] - j] .. "> logged.") | |||
end | |||
word = mw.ustring.sub(word, 1, i - 1) | |||
break | |||
elseif grapheme[check_string] then | |||
table.insert(graphemes, 1, orig_string) | |||
--mw.log("<" .. orig_string .. "> logged.") | |||
word = mw.ustring.sub(word, 1, i - 1) | |||
break | |||
elseif mw.ustring.len(check_string) == 1 then | |||
table.insert(graphemes, 1, orig_string) | |||
--mw.log("<" .. orig_string .. "> logged.") | |||
word = mw.ustring.sub(word, 1, i - 1) | |||
break | |||
end | |||
end | |||
end | |||
--mw.log("String exhausted.") | |||
--mw.log("<" .. table.concat(graphemes, "><") .. ">") | |||
return graphemes | |||
end | end | ||
function syllabify(graphemes) | function syllabify(graphemes) | ||
--mw.log("————— SYLLABIFYING —————") | |||
for _, g in ipairs(irregular_grapheme) do | |||
grapheme[g] = true | |||
end | |||
for _, g in ipairs(irregular_vowel) do | |||
vowel[g] = true | |||
end | |||
local function swap(Pos1, Pos2) | |||
--mw.log(graphemes[Pos1] .. " ↔ " .. graphemes[Pos2] .. " at position " .. Pos1) | |||
local temp = graphemes[Pos1] | |||
graphemes[Pos1] = graphemes[Pos2] | |||
graphemes[Pos2] = temp | |||
end | |||
local i = 1 | |||
while true do -- add σ before each vowel | |||
if graphemes[i] == nil then | |||
break | |||
end | |||
if vowel[mw.ustring.lower(graphemes[i])] then | |||
table.insert(graphemes, i, "σ") | |||
--mw.log("σ inserted in position " .. i) | |||
i = i + 2 | |||
else | |||
i = i + 1 | |||
end | |||
end | |||
i = #graphemes | |||
while true do | |||
local g_current = graphemes[i] | |||
local g_after = graphemes[i + 1] | |||
local g_prev = graphemes[i - 1] | |||
local g_prev2 = graphemes[i - 2] | |||
local g_prev3 = graphemes[i - 3] | |||
if g_current then | |||
g_current = mw.ustring.lower(g_current) | |||
end | |||
if g_after then | |||
g_after = mw.ustring.lower(g_after) | |||
end | |||
if g_prev then | |||
g_prev = mw.ustring.lower(g_prev) | |||
end | |||
if g_prev2 then | |||
g_prev2 = mw.ustring.lower(g_prev2) | |||
end | |||
if g_prev3 then | |||
g_prev3 = mw.ustring.lower(g_prev3) | |||
end | |||
-- J ↔ σ | |||
if glide[g_prev] and g_current == "σ" and vowel[g_after] then | |||
-- (J · ) ↔ σ | |||
swap(i, i - 1) | |||
elseif glide[g_prev2] and affix[g_prev] and g_current == "σ" and vowel[g_after] then | |||
-- C ↔ σ (J) | |||
swap(i, i - 1) | |||
swap(i - 1, i - 2) | |||
elseif | |||
consonant[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) and | |||
not (g_prev == "j" and g_after == "j") | |||
then | |||
swap(i, i - 1) | |||
elseif | |||
consonant[g_prev2] and affix[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) and | |||
not (g_prev2 == "j" and g_after == "j") | |||
then | |||
-- C Cᵥ ↔ σ v | |||
swap(i, i - 1) | |||
swap(i - 1, i - 2) | |||
elseif consonant[g_prev2] and Cv[g_prev] and g_current == "σ" and g_after == "v" then | |||
swap(i, i - 1) | |||
elseif consonant[g_prev3] and Cv[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then | |||
-- Bv ↔ σ v | |||
swap(i, i - 1) | |||
swap(i - 1, i - 2) | |||
elseif Cv_fixed[g_prev] and g_current == "σ" and g_after == "v" then | |||
swap(i, i - 1) | |||
elseif Cv_fixed[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then | |||
-- C Cₙ ↔ σ N | |||
swap(i, i - 1) | |||
swap(i - 1, i - 2) | |||
elseif consonant[g_prev2] and CN[g_prev] and g_current == "σ" and nasal[g_after] then | |||
swap(i, i - 1) | |||
elseif consonant[g_prev3] and CN[g_prev2] and affix[g_prev] and g_current == "σ" and nasal[g_after] then | |||
-- C Cᵣ ↔ σ r | |||
swap(i, i - 1) | |||
swap(i - 1, i - 2) | |||
elseif consonant[g_prev2] and Cr[g_prev] and g_current == "σ" and g_after == "r" then | |||
swap(i, i - 1) | |||
elseif consonant[g_prev3] and Cr[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "r" then | |||
-- C Cₗ ↔ σ l | |||
swap(i, i - 1) | |||
swap(i - 1, i - 2) | |||
elseif consonant[g_prev2] and Cl[g_prev] and g_current == "σ" and g_after == "l" then | |||
swap(i, i - 1) | |||
elseif consonant[g_prev3] and Cl[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "l" then | |||
-- Cs ↔ σ S | |||
swap(i, i - 1) | |||
swap(i - 1, i - 2) | |||
elseif Cs[g_prev] and g_current == "σ" and sibilant[g_after] then | |||
swap(i, i - 1) | |||
elseif Cs[g_prev2] and affix[g_prev] and g_current == "σ" and sibilant[g_after] then | |||
swap(i, i - 1) | |||
swap(i - 1, i - 2) | |||
elseif g_current == "σ" then | |||
if i == 1 then | |||
table.remove(graphemes, 1) | |||
else | |||
for j = 1, i - 1 do | |||
if vowel[mw.ustring.lower(graphemes[i - j])] or graphemes[i - j] == "σ" then | |||
break | |||
elseif i - j == 1 then | |||
table.remove(graphemes, i) | |||
break | |||
elseif not affix[graphemes[i - j]] and not consonant[mw.ustring.lower(graphemes[i - j])] then | |||
table.remove(graphemes, i) | |||
break | |||
end | |||
end | |||
end | |||
end | |||
if i > 1 then | |||
i = i - 1 | |||
else | |||
break | |||
end | |||
end | |||
return graphemes | |||
end | end | ||
function export.generate(frame) | |||
local args = getArgs(frame) | |||
if args[1] == nil then | |||
error("Word needed") | |||
end | |||
local outputSyllables = args[1] | |||
outputSyllables = graphemise(outputSyllables) | |||
outputSyllables = syllabify(outputSyllables) | |||
local divider = args[2] | |||
if divider == nil then | |||
divider = "|" | |||
end | |||
outputSyllables = table.concat(outputSyllables) | |||
outputSyllables = mw.ustring.gsub(outputSyllables, "(σ)", divider) | |||
return outputSyllables | |||
end | end | ||
function export.generate_array(frame) | function export.generate_array(frame) | ||
local args = getArgs(frame) | |||
if args[1] == nil then | |||
error("Word needed") | |||
end | |||
local outputSyllables = args[1] | |||
outputSyllables = graphemise(outputSyllables) | |||
outputSyllables = syllabify(outputSyllables) | |||
return outputSyllables | |||
end | end | ||
Latest revision as of 12:07, 21 July 2024
Used in {{rad-syllables}}
. This module produces syllabification from a single input:
{{#invoke:rad-syllables|generate|asehņieșe}}
→ a|seh|ņie|șe
You can specify a custom delimiter in the second parameter:
{{#invoke:rad-syllables|generate|asehņieșe|・}}
→ a・seh・ņie・șe
Capitals will be maintained:
{{#invoke:rad-syllables|generate|aSeHņIeȘe}}
→ a|SeH|ņIe|Șe
local export = {}
local getArgs = require("Module:Arguments").getArgs
-- DATA --
local grapheme = {
["a"] = true,
["á"] = true,
["à"] = true,
["â"] = true,
["ả"] = true,
["ai"] = true,
["ài"] = true,
["âi"] = true,
["ao"] = true,
["ào"] = true,
["aoi"] = true,
["au"] = true,
["b"] = true,
["c"] = true,
["d"] = true,
["ð"] = true,
["dx"] = true,
["dz"] = true,
["e"] = true,
["é"] = true,
["è"] = true,
["ea"] = true,
["ei"] = true,
["f"] = true,
["g"] = true,
["h"] = true,
["ħ"] = true,
["i"] = true,
["ì"] = true,
["í"] = true,
["ỉ"] = true,
["ie"] = true,
["ìe"] = true,
["ỉe"] = true,
["iea"] = true,
["j"] = true,
["ĵ"] = true,
["k"] = true,
["ķ"] = true,
["l"] = true,
["m"] = true,
["n"] = true,
["ņ"] = true,
["o"] = true,
["ó"] = true,
["ò"] = true,
["ø"] = true,
["oa"] = true,
["øa"] = true,
["øi"] = true,
["p"] = true,
["q"] = true,
["r"] = true,
["s"] = true,
["ș"] = true,
["t"] = true,
["ts"] = true,
["tș"] = true,
["u"] = true,
["ú"] = true,
["ù"] = true,
["û"] = true,
["ū"] = true,
["ủ"] = true,
["ui"] = true,
["uo"] = true,
["ủo"] = true,
["uoa"] = true,
["v"] = true,
["w"] = true,
["x"] = true,
["y"] = true,
["ỳ"] = true,
["ý"] = true,
["ỷ"] = true,
["ye"] = true,
["ỷe"] = true,
["yea"] = true,
["z"] = true,
["þ"] = true,
["gj"] = true,
["lj"] = true
}
local vowel = {
["a"] = true,
["á"] = true,
["à"] = true,
["â"] = true,
["ả"] = true,
["ai"] = true,
["ài"] = true,
["âi"] = true,
["ao"] = true,
["ào"] = true,
["aoi"] = true,
["au"] = true,
["e"] = true,
["é"] = true,
["è"] = true,
["ea"] = true,
["ei"] = true,
["i"] = true,
["ì"] = true,
["í"] = true,
["ỉ"] = true,
["ie"] = true,
["ìe"] = true,
["ỉe"] = true,
["iea"] = true,
["o"] = true,
["ó"] = true,
["ò"] = true,
["ø"] = true,
["oa"] = true,
["øa"] = true,
["øi"] = true,
["u"] = true,
["ú"] = true,
["ù"] = true,
["û"] = true,
["ū"] = true,
["ủ"] = true,
["ui"] = true,
["uo"] = true,
["ủo"] = true,
["uoa"] = true,
["y"] = true,
["ỳ"] = true,
["ý"] = true,
["ỷ"] = true,
["ye"] = true,
["ỷe"] = true,
["yea"] = true
}
local consonant = {
["b"] = true,
["c"] = true,
["d"] = true,
["ð"] = true,
["dx"] = true,
["dz"] = true,
["f"] = true,
["g"] = true,
["h"] = true,
["ħ"] = true,
["j"] = true,
["ĵ"] = true,
["k"] = true,
["ķ"] = true,
["l"] = true,
["m"] = true,
["n"] = true,
["ņ"] = true,
["p"] = true,
["q"] = true,
["r"] = true,
["s"] = true,
["ș"] = true,
["t"] = true,
["ts"] = true,
["tș"] = true,
["v"] = true,
["w"] = true,
["x"] = true,
["z"] = true,
["þ"] = true,
["gj"] = true,
["lj"] = true
}
local affix = {
["·"] = true,
["-"] = true,
["’"] = true,
["‘"] = true
}
local nasal = {
["m"] = true,
["n"] = true,
["ņ"] = true
}
local glide = {
["j"] = true,
["ĵ"] = true,
["w"] = true,
["gj"] = true,
["lj"] = true
}
local sibilant = {
["dz"] = true,
["dʒ"] = true,
["s"] = true,
["ș"] = true,
["c"] = true,
["ķ"] = true,
["ts"] = true,
["tș"] = true,
["z"] = true,
["x"] = true
}
local Cs = {
["d"] = true,
["t"] = true,
["dz"] = true,
["dʒ"] = true,
["s"] = true,
["ș"] = true,
["c"] = true,
["ķ"] = true,
["ts"] = true,
["tș"] = true,
["z"] = true,
["x"] = true
}
local sibilant = {
["s"] = true,
["ș"] = true,
["z"] = true,
["x"] = true
}
local Cv_fixed = {
-- <Cv> combinations that uniformly pronounced without /v/ --
["b"] = true,
["f"] = true,
["p"] = true
}
local Cr = {
-- consonants that can precede /r/ in a medial onset --
["p"] = true,
["k"] = true,
["b"] = true,
["g"] = true,
["ð"] = true,
["f"] = true,
["ħ"] = true,
["h"] = true,
["v"] = true,
["þ"] = true,
["t"] = true,
["d"] = true
}
local Cl = {
-- consonants that can precede /l/ in a medial onset --
["p"] = true,
["k"] = true,
["b"] = true,
["g"] = true,
["ð"] = true,
["f"] = true,
["ħ"] = true,
["h"] = true,
["v"] = true,
["þ"] = true,
["s"] = true,
["ș"] = true,
["z"] = true,
["x"] = true
}
local Cv = {
-- consonants that can precede /v/ in a medial onset --
["d"] = true,
["ð"] = true,
["dz"] = true,
["dʒ"] = true,
["g"] = true,
["ħ"] = true,
["h"] = true,
["k"] = true,
["l"] = true,
["r"] = true,
["s"] = true,
["ș"] = true,
["t"] = true,
["c"] = true,
["ķ"] = true,
["ts"] = true,
["tș"] = true,
["z"] = true,
["x"] = true,
["þ"] = true,
["m"] = true,
["n"] = true
}
local CN = {
-- consonants that can precede nasals in a medial onset -- (non-glide continuants)
["ð"] = true,
["f"] = true,
["ħ"] = true,
["h"] = true,
["l"] = true,
["r"] = true,
["s"] = true,
["ș"] = true,
["v"] = true,
["z"] = true,
["x"] = true,
["þ"] = true
}
local irregular = {
["eurú"] = {"eu", "r", "ú"},
["eurov"] = {"eu", "r", "o", "v"},
["zeus"] = {"z", "eu", "s"},
["zeud"] = {"z", "eu", "d"}
}
local irregular_grapheme = {"eu"}
local irregular_vowel = {"eu"}
function graphemise(word)
--mw.log("————— GRAPHEMISING —————")
local graphemes = {}
while mw.ustring.len(word) > 0 do
local limit = mw.ustring.len(word)
for i = 1, limit do
local orig_string = mw.ustring.sub(word, i)
local check_string = mw.ustring.lower(orig_string)
if irregular[check_string] then
--mw.log("Irregular spelling recognised: " .. orig_string)
local capitals = {}
-- get capital data --
for j = 1, #orig_string do
if mw.ustring.sub(orig_string, j, j) == mw.ustring.upper(mw.ustring.sub(check_string, j, j)) then
capitals[j] = true
end
end
-- ammend irregular data to match capitals --
local index = 1
for j = 1, #irregular[check_string] do
local new_data = ""
for k = 1, #irregular[check_string][j] do
local letter = mw.ustring.sub(irregular[check_string][j], k, k)
if capitals[index] then
letter = mw.ustring.upper(letter)
end
new_data = new_data .. letter
index = index + 1
end
irregular[check_string][j] = new_data
end
for j = 0, #irregular[check_string] - 1 do
table.insert(graphemes, 1, irregular[check_string][#irregular[check_string] - j])
--mw.log("<" .. irregular[check_string][#irregular[check_string] - j] .. "> logged.")
end
word = mw.ustring.sub(word, 1, i - 1)
break
elseif grapheme[check_string] then
table.insert(graphemes, 1, orig_string)
--mw.log("<" .. orig_string .. "> logged.")
word = mw.ustring.sub(word, 1, i - 1)
break
elseif mw.ustring.len(check_string) == 1 then
table.insert(graphemes, 1, orig_string)
--mw.log("<" .. orig_string .. "> logged.")
word = mw.ustring.sub(word, 1, i - 1)
break
end
end
end
--mw.log("String exhausted.")
--mw.log("<" .. table.concat(graphemes, "><") .. ">")
return graphemes
end
function syllabify(graphemes)
--mw.log("————— SYLLABIFYING —————")
for _, g in ipairs(irregular_grapheme) do
grapheme[g] = true
end
for _, g in ipairs(irregular_vowel) do
vowel[g] = true
end
local function swap(Pos1, Pos2)
--mw.log(graphemes[Pos1] .. " ↔ " .. graphemes[Pos2] .. " at position " .. Pos1)
local temp = graphemes[Pos1]
graphemes[Pos1] = graphemes[Pos2]
graphemes[Pos2] = temp
end
local i = 1
while true do -- add σ before each vowel
if graphemes[i] == nil then
break
end
if vowel[mw.ustring.lower(graphemes[i])] then
table.insert(graphemes, i, "σ")
--mw.log("σ inserted in position " .. i)
i = i + 2
else
i = i + 1
end
end
i = #graphemes
while true do
local g_current = graphemes[i]
local g_after = graphemes[i + 1]
local g_prev = graphemes[i - 1]
local g_prev2 = graphemes[i - 2]
local g_prev3 = graphemes[i - 3]
if g_current then
g_current = mw.ustring.lower(g_current)
end
if g_after then
g_after = mw.ustring.lower(g_after)
end
if g_prev then
g_prev = mw.ustring.lower(g_prev)
end
if g_prev2 then
g_prev2 = mw.ustring.lower(g_prev2)
end
if g_prev3 then
g_prev3 = mw.ustring.lower(g_prev3)
end
-- J ↔ σ
if glide[g_prev] and g_current == "σ" and vowel[g_after] then
-- (J · ) ↔ σ
swap(i, i - 1)
elseif glide[g_prev2] and affix[g_prev] and g_current == "σ" and vowel[g_after] then
-- C ↔ σ (J)
swap(i, i - 1)
swap(i - 1, i - 2)
elseif
consonant[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) and
not (g_prev == "j" and g_after == "j")
then
swap(i, i - 1)
elseif
consonant[g_prev2] and affix[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) and
not (g_prev2 == "j" and g_after == "j")
then
-- C Cᵥ ↔ σ v
swap(i, i - 1)
swap(i - 1, i - 2)
elseif consonant[g_prev2] and Cv[g_prev] and g_current == "σ" and g_after == "v" then
swap(i, i - 1)
elseif consonant[g_prev3] and Cv[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then
-- Bv ↔ σ v
swap(i, i - 1)
swap(i - 1, i - 2)
elseif Cv_fixed[g_prev] and g_current == "σ" and g_after == "v" then
swap(i, i - 1)
elseif Cv_fixed[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then
-- C Cₙ ↔ σ N
swap(i, i - 1)
swap(i - 1, i - 2)
elseif consonant[g_prev2] and CN[g_prev] and g_current == "σ" and nasal[g_after] then
swap(i, i - 1)
elseif consonant[g_prev3] and CN[g_prev2] and affix[g_prev] and g_current == "σ" and nasal[g_after] then
-- C Cᵣ ↔ σ r
swap(i, i - 1)
swap(i - 1, i - 2)
elseif consonant[g_prev2] and Cr[g_prev] and g_current == "σ" and g_after == "r" then
swap(i, i - 1)
elseif consonant[g_prev3] and Cr[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "r" then
-- C Cₗ ↔ σ l
swap(i, i - 1)
swap(i - 1, i - 2)
elseif consonant[g_prev2] and Cl[g_prev] and g_current == "σ" and g_after == "l" then
swap(i, i - 1)
elseif consonant[g_prev3] and Cl[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "l" then
-- Cs ↔ σ S
swap(i, i - 1)
swap(i - 1, i - 2)
elseif Cs[g_prev] and g_current == "σ" and sibilant[g_after] then
swap(i, i - 1)
elseif Cs[g_prev2] and affix[g_prev] and g_current == "σ" and sibilant[g_after] then
swap(i, i - 1)
swap(i - 1, i - 2)
elseif g_current == "σ" then
if i == 1 then
table.remove(graphemes, 1)
else
for j = 1, i - 1 do
if vowel[mw.ustring.lower(graphemes[i - j])] or graphemes[i - j] == "σ" then
break
elseif i - j == 1 then
table.remove(graphemes, i)
break
elseif not affix[graphemes[i - j]] and not consonant[mw.ustring.lower(graphemes[i - j])] then
table.remove(graphemes, i)
break
end
end
end
end
if i > 1 then
i = i - 1
else
break
end
end
return graphemes
end
function export.generate(frame)
local args = getArgs(frame)
if args[1] == nil then
error("Word needed")
end
local outputSyllables = args[1]
outputSyllables = graphemise(outputSyllables)
outputSyllables = syllabify(outputSyllables)
local divider = args[2]
if divider == nil then
divider = "|"
end
outputSyllables = table.concat(outputSyllables)
outputSyllables = mw.ustring.gsub(outputSyllables, "(σ)", divider)
return outputSyllables
end
function export.generate_array(frame)
local args = getArgs(frame)
if args[1] == nil then
error("Word needed")
end
local outputSyllables = args[1]
outputSyllables = graphemise(outputSyllables)
outputSyllables = syllabify(outputSyllables)
return outputSyllables
end
return export
--[[
Debug console test string:
=p.generate(mw.getCurrentFrame():newChild{title="whatever",args={"rjaovs"}})
]]