Module:rad-syllables: Difference between revisions

Added capitalisation mechanism
m (Fixed typo in irregular data)
(Added capitalisation mechanism)
Line 78: Line 78:
}
}


local irregular_grapheme = {"eu"}
local irregular_grapheme = { "eu", }
local irregular_vowel = {"eu"}
local irregular_vowel = { "eu", }


function graphemise(word)
function graphemise(word)
Line 91: Line 91:
for i = 1, limit do
for i = 1, limit do
local check_string = mw.ustring.sub(word, i)
local orig_string = mw.ustring.sub(word, i)
local check_string = mw.ustring.lower(orig_string)
if irregular[check_string] then
if irregular[check_string] then
mw.log("Irregular spelling recognised: " .. check_string)
mw.log("Irregular spelling recognised: " .. orig_string)
local capitals = {}
-- get capital data --
for j = 1, #orig_string do
if mw.ustring.sub(orig_string, j, j) == mw.ustring.upper(mw.ustring.sub(check_string, j, j)) then
capitals[j] = true
end
end
-- ammend irregular data to match capitals --
local index = 1
for j = 1, #irregular[check_string] do
local new_data = ""
for k = 1, #irregular[check_string][j] do
local letter = mw.ustring.sub(irregular[check_string][j], k, k)
if capitals[index] then
letter = mw.ustring.upper(letter)
end
new_data = new_data .. letter
index = index + 1
end
irregular[check_string][j] = new_data
end
for j = 0, #irregular[check_string] - 1 do
for j = 0, #irregular[check_string] - 1 do
table.insert(graphemes, 1, irregular[check_string][#irregular[check_string] - j])
table.insert(graphemes, 1, irregular[check_string][#irregular[check_string] - j])
Line 102: Line 132:
break
break
elseif grapheme[check_string] then
elseif grapheme[check_string] then
table.insert(graphemes, 1, check_string)
table.insert(graphemes, 1, orig_string)
mw.log("<" .. check_string .. "> logged.")
mw.log("<" .. orig_string .. "> logged.")
word = mw.ustring.sub(word, 1, i - 1)
word = mw.ustring.sub(word, 1, i - 1)
break
break
elseif mw.ustring.len(check_string) == 1 then
elseif mw.ustring.len(check_string) == 1 then
table.insert(graphemes, 1, check_string)
table.insert(graphemes, 1, orig_string)
mw.log("<" .. check_string .. "> logged.")
mw.log("<" .. orig_string .. "> logged.")
word = mw.ustring.sub(word, 1, i - 1)
word = mw.ustring.sub(word, 1, i - 1)
break
break
Line 126: Line 156:
mw.log("————— SYLLABIFYING —————")
mw.log("————— SYLLABIFYING —————")
table.insert(grapheme, irregular_grapheme)
for _, g in ipairs(irregular_grapheme) do
table.insert(vowel, irregular_vowel)
grapheme[g] = true
end
for _, g in ipairs(irregular_vowel) do
vowel[g] = true
end
local function swap(Pos1, Pos2)
local function swap(Pos1, Pos2)
mw.log(graphemes[Pos1] .. " ↔ " .. graphemes[Pos2] .. " at position " .. Pos1)
mw.log(graphemes[Pos1] .. " ↔ " .. graphemes[Pos2] .. " at position " .. Pos1)
local tmp = graphemes[Pos1]
local temp = graphemes[Pos1]
graphemes[Pos1] = graphemes[Pos2]
graphemes[Pos1] = graphemes[Pos2]
graphemes[Pos2] = tmp
graphemes[Pos2] = temp
end
end


Line 142: Line 176:
if graphemes[i] == nil then break end
if graphemes[i] == nil then break end
if vowel[graphemes[i]] then
if vowel[mw.ustring.lower(graphemes[i])] then
table.insert(graphemes, i, "σ")
table.insert(graphemes, i, "σ")
mw.log("σ inserted in position " .. i)
mw.log("σ inserted in position " .. i)
Line 158: Line 192:
local g_prev2 = graphemes[i-2]
local g_prev2 = graphemes[i-2]
local g_prev3 = graphemes[i-3]
local g_prev3 = graphemes[i-3]
if g_current then
g_current = mw.ustring.lower(g_current)
end
if g_after then
g_after = mw.ustring.lower(g_after)
end
if g_prev then
g_prev = mw.ustring.lower(g_prev)
end
if g_prev2 then
g_prev2 = mw.ustring.lower(g_prev2)
end
if g_prev3 then
g_prev3 = mw.ustring.lower(g_prev3)
end
-- J ↔ σ
-- J ↔ σ
Line 221: Line 271:
else
else
for j = 1, i-1 do
for j = 1, i-1 do
if vowel[graphemes[i - j]] or graphemes[i - j] == "σ" then
if vowel[mw.ustring.lower(graphemes[i - j])] or graphemes[i - j] == "σ" then
break
break
elseif i - j == 1 then
elseif i - j == 1 then
table.remove(graphemes, i)
table.remove(graphemes, i)
break
break
elseif not affix[graphemes[i-j]] and not consonant[graphemes[i-j]] then
elseif not affix[graphemes[i-j]] and not consonant[mw.ustring.lower(graphemes[i-j])] then
table.remove(graphemes, i)
table.remove(graphemes, i)
break
break