10,731
edits
TheNightAvl (talk | contribs) m (Fixed typo in irregular data) |
TheNightAvl (talk | contribs) (Added capitalisation mechanism) |
||
Line 78: | Line 78: | ||
} | } | ||
local irregular_grapheme = {"eu"} | local irregular_grapheme = { "eu", } | ||
local irregular_vowel = {"eu"} | local irregular_vowel = { "eu", } | ||
function graphemise(word) | function graphemise(word) | ||
Line 91: | Line 91: | ||
for i = 1, limit do | for i = 1, limit do | ||
local | local orig_string = mw.ustring.sub(word, i) | ||
local check_string = mw.ustring.lower(orig_string) | |||
if irregular[check_string] then | if irregular[check_string] then | ||
mw.log("Irregular spelling recognised: " .. check_string) | mw.log("Irregular spelling recognised: " .. orig_string) | ||
local capitals = {} | |||
-- get capital data -- | |||
for j = 1, #orig_string do | |||
if mw.ustring.sub(orig_string, j, j) == mw.ustring.upper(mw.ustring.sub(check_string, j, j)) then | |||
capitals[j] = true | |||
end | |||
end | |||
-- ammend irregular data to match capitals -- | |||
local index = 1 | |||
for j = 1, #irregular[check_string] do | |||
local new_data = "" | |||
for k = 1, #irregular[check_string][j] do | |||
local letter = mw.ustring.sub(irregular[check_string][j], k, k) | |||
if capitals[index] then | |||
letter = mw.ustring.upper(letter) | |||
end | |||
new_data = new_data .. letter | |||
index = index + 1 | |||
end | |||
irregular[check_string][j] = new_data | |||
end | |||
for j = 0, #irregular[check_string] - 1 do | for j = 0, #irregular[check_string] - 1 do | ||
table.insert(graphemes, 1, irregular[check_string][#irregular[check_string] - j]) | table.insert(graphemes, 1, irregular[check_string][#irregular[check_string] - j]) | ||
Line 102: | Line 132: | ||
break | break | ||
elseif grapheme[check_string] then | elseif grapheme[check_string] then | ||
table.insert(graphemes, 1, | table.insert(graphemes, 1, orig_string) | ||
mw.log("<" .. | mw.log("<" .. orig_string .. "> logged.") | ||
word = mw.ustring.sub(word, 1, i - 1) | word = mw.ustring.sub(word, 1, i - 1) | ||
break | break | ||
elseif mw.ustring.len(check_string) == 1 then | elseif mw.ustring.len(check_string) == 1 then | ||
table.insert(graphemes, 1, | table.insert(graphemes, 1, orig_string) | ||
mw.log("<" .. | mw.log("<" .. orig_string .. "> logged.") | ||
word = mw.ustring.sub(word, 1, i - 1) | word = mw.ustring.sub(word, 1, i - 1) | ||
break | break | ||
Line 126: | Line 156: | ||
mw.log("————— SYLLABIFYING —————") | mw.log("————— SYLLABIFYING —————") | ||
for _, g in ipairs(irregular_grapheme) do | |||
grapheme[g] = true | |||
end | |||
for _, g in ipairs(irregular_vowel) do | |||
vowel[g] = true | |||
end | |||
local function swap(Pos1, Pos2) | local function swap(Pos1, Pos2) | ||
mw.log(graphemes[Pos1] .. " ↔ " .. graphemes[Pos2] .. " at position " .. Pos1) | mw.log(graphemes[Pos1] .. " ↔ " .. graphemes[Pos2] .. " at position " .. Pos1) | ||
local | local temp = graphemes[Pos1] | ||
graphemes[Pos1] = graphemes[Pos2] | graphemes[Pos1] = graphemes[Pos2] | ||
graphemes[Pos2] = | graphemes[Pos2] = temp | ||
end | end | ||
Line 142: | Line 176: | ||
if graphemes[i] == nil then break end | if graphemes[i] == nil then break end | ||
if vowel[graphemes[i]] then | if vowel[mw.ustring.lower(graphemes[i])] then | ||
table.insert(graphemes, i, "σ") | table.insert(graphemes, i, "σ") | ||
mw.log("σ inserted in position " .. i) | mw.log("σ inserted in position " .. i) | ||
Line 158: | Line 192: | ||
local g_prev2 = graphemes[i-2] | local g_prev2 = graphemes[i-2] | ||
local g_prev3 = graphemes[i-3] | local g_prev3 = graphemes[i-3] | ||
if g_current then | |||
g_current = mw.ustring.lower(g_current) | |||
end | |||
if g_after then | |||
g_after = mw.ustring.lower(g_after) | |||
end | |||
if g_prev then | |||
g_prev = mw.ustring.lower(g_prev) | |||
end | |||
if g_prev2 then | |||
g_prev2 = mw.ustring.lower(g_prev2) | |||
end | |||
if g_prev3 then | |||
g_prev3 = mw.ustring.lower(g_prev3) | |||
end | |||
-- J ↔ σ | -- J ↔ σ | ||
Line 221: | Line 271: | ||
else | else | ||
for j = 1, i-1 do | for j = 1, i-1 do | ||
if vowel[graphemes[i - j]] or graphemes[i - j] == "σ" then | if vowel[mw.ustring.lower(graphemes[i - j])] or graphemes[i - j] == "σ" then | ||
break | break | ||
elseif i - j == 1 then | elseif i - j == 1 then | ||
table.remove(graphemes, i) | table.remove(graphemes, i) | ||
break | break | ||
elseif not affix[graphemes[i-j]] and not consonant[graphemes[i-j]] then | elseif not affix[graphemes[i-j]] and not consonant[mw.ustring.lower(graphemes[i-j])] then | ||
table.remove(graphemes, i) | table.remove(graphemes, i) | ||
break | break |