Module:rad-syllables: Difference between revisions

From Laenkea
Jump to navigation Jump to search
mNo edit summary
m (Beautified)
 
(5 intermediate revisions by the same user not shown)
Line 1: Line 1:
local export = {}
local export = {}
local getArgs = require('Module:Arguments').getArgs
local getArgs = require("Module:Arguments").getArgs


-- DATA --
-- DATA --


local grapheme = {
local grapheme = {
["a"] = true, ["á"] = true, ["à"] = true, ["â"] = true, ["ả"] = true, ["ai"] = true, ["âi"] = true, ["ao"] = true, ["ào"] = true, ["aoi"] = true, ["au"] = true, ["b"] = true, ["c"] = true, ["d"] = true, ["ð"] = true, ["dx"] = true, ["dz"] = true, ["e"] = true, ["é"] = true, ["è"] = true, ["ea"] = true, ["ei"] = true, ["f"] = true, ["g"] = true, ["h"] = true, ["ħ"] = true, ["i"] = true, ["í"] = true, ["ỉ"] = true, ["ie"] = true, ["ìe"] = true, ["ỉe"] = true, ["iea"] = true, ["j"] = true, ["ĵ"] = true, ["k"] = true, ["ķ"] = true, ["l"] = true, ["m"] = true, ["n"] = true, ["ņ"] = true, ["o"] = true, ["ó"] = true, ["ò"] = true, ["ø"] = true, ["oa"] = true, ["øa"] = true, ["øi"] = true, ["p"] = true, ["q"] = true, ["r"] = true, ["s"] = true, ["ș"] = true, ["t"] = true, ["ts"] = true, ["tș"] = true, ["u"] = true, ["ú"] = true, ["ù"] = true, ["û"] = true, ["ū"] = true, ["ủ"] = true, ["ui"] = true, ["uo"] = true, ["ủo"] = true, ["uoa"] = true, ["v"] = true, ["w"] = true, ["x"] = true, ["y"] = true, ["ỳ"] = true, ["ý"] = true, ["ỷ"] = true, ["ye"] = true, ["ỷe"] = true, ["yea"] = true, ["z"] = true, ["þ"] = true,
    ["a"] = true,
    ["á"] = true,
["gj"] = true, ["lj"] = true,
    ["à"] = true,
    ["â"] = true,
    ["ả"] = true,
    ["ai"] = true,
    ["ài"] = true,
    ["âi"] = true,
    ["ao"] = true,
    ["ào"] = true,
    ["aoi"] = true,
    ["au"] = true,
    ["b"] = true,
    ["c"] = true,
    ["d"] = true,
    ["ð"] = true,
    ["dx"] = true,
    ["dz"] = true,
    ["e"] = true,
    ["é"] = true,
    ["è"] = true,
    ["ea"] = true,
    ["ei"] = true,
    ["f"] = true,
    ["g"] = true,
    ["h"] = true,
    ["ħ"] = true,
    ["i"] = true,
    ["ì"] = true,
    ["í"] = true,
    ["ỉ"] = true,
    ["ie"] = true,
    ["ìe"] = true,
    ["ỉe"] = true,
    ["iea"] = true,
    ["j"] = true,
    ["ĵ"] = true,
    ["k"] = true,
    ["ķ"] = true,
    ["l"] = true,
    ["m"] = true,
    ["n"] = true,
    ["ņ"] = true,
    ["o"] = true,
    ["ó"] = true,
    ["ò"] = true,
    ["ø"] = true,
    ["oa"] = true,
    ["øa"] = true,
    ["øi"] = true,
    ["p"] = true,
    ["q"] = true,
    ["r"] = true,
    ["s"] = true,
    ["ș"] = true,
    ["t"] = true,
    ["ts"] = true,
    ["tș"] = true,
    ["u"] = true,
    ["ú"] = true,
    ["ù"] = true,
    ["û"] = true,
    ["ū"] = true,
    ["ủ"] = true,
    ["ui"] = true,
    ["uo"] = true,
    ["ủo"] = true,
    ["uoa"] = true,
    ["v"] = true,
    ["w"] = true,
    ["x"] = true,
    ["y"] = true,
    ["ỳ"] = true,
    ["ý"] = true,
    ["ỷ"] = true,
    ["ye"] = true,
    ["ỷe"] = true,
    ["yea"] = true,
    ["z"] = true,
    ["þ"] = true,
    ["gj"] = true,
    ["lj"] = true
}
}


local vowel = {
local vowel = {
["a"] = true, ["á"] = true, ["à"] = true, ["â"] = true, ["ả"] = true, ["ai"] = true, ["âi"] = true, ["ao"] = true, ["ào"] = true, ["aoi"] = true, ["au"] = true, ["e"] = true, ["é"] = true, ["è"] = true, ["ea"] = true, ["ei"] = true, ["i"] = true, ["í"] = true, ["ỉ"] = true, ["ie"] = true, ["ìe"] = true, ["ỉe"] = true, ["iea"] = true, ["o"] = true, ["ó"] = true, ["ò"] = true, ["ø"] = true, ["oa"] = true, ["øa"] = true, ["øi"] = true, ["u"] = true, ["ú"] = true, ["ù"] = true, ["û"] = true, ["ū"] = true, ["ủ"] = true, ["ui"] = true, ["uo"] = true, ["ủo"] = true, ["uoa"] = true, ["y"] = true, ["ỳ"] = true, ["ý"] = true, ["ỷ"] = true, ["ye"] = true, ["ỷe"] = true, ["yea"] = true,
    ["a"] = true,
    ["á"] = true,
    ["à"] = true,
    ["â"] = true,
    ["ả"] = true,
    ["ai"] = true,
    ["ài"] = true,
    ["âi"] = true,
    ["ao"] = true,
    ["ào"] = true,
    ["aoi"] = true,
    ["au"] = true,
    ["e"] = true,
    ["é"] = true,
    ["è"] = true,
    ["ea"] = true,
    ["ei"] = true,
    ["i"] = true,
    ["ì"] = true,
    ["í"] = true,
    ["ỉ"] = true,
    ["ie"] = true,
    ["ìe"] = true,
    ["ỉe"] = true,
    ["iea"] = true,
    ["o"] = true,
    ["ó"] = true,
    ["ò"] = true,
    ["ø"] = true,
    ["oa"] = true,
    ["øa"] = true,
    ["øi"] = true,
    ["u"] = true,
    ["ú"] = true,
    ["ù"] = true,
    ["û"] = true,
    ["ū"] = true,
    ["ủ"] = true,
    ["ui"] = true,
    ["uo"] = true,
    ["ủo"] = true,
    ["uoa"] = true,
    ["y"] = true,
    ["ỳ"] = true,
    ["ý"] = true,
    ["ỷ"] = true,
    ["ye"] = true,
    ["ỷe"] = true,
    ["yea"] = true
}
}


local consonant = {
local consonant = {
["b"] = true, ["c"] = true, ["d"] = true, ["ð"] = true, ["dx"] = true, ["dz"] = true, ["f"] = true, ["g"] = true, ["h"] = true, ["ħ"] = true, ["j"] = true, ["ĵ"] = true, ["k"] = true, ["ķ"] = true, ["l"] = true, ["m"] = true, ["n"] = true, ["ņ"] = true, ["p"] = true, ["q"] = true, ["r"] = true, ["s"] = true, ["ș"] = true, ["t"] = true, ["ts"] = true, ["tș"] = true, ["v"] = true, ["w"] = true, ["x"] = true, ["z"] = true, ["þ"] = true,
    ["b"] = true,
    ["c"] = true,
["gj"] = true, ["lj"] = true,
    ["d"] = true,
    ["ð"] = true,
    ["dx"] = true,
    ["dz"] = true,
    ["f"] = true,
    ["g"] = true,
    ["h"] = true,
    ["ħ"] = true,
    ["j"] = true,
    ["ĵ"] = true,
    ["k"] = true,
    ["ķ"] = true,
    ["l"] = true,
    ["m"] = true,
    ["n"] = true,
    ["ņ"] = true,
    ["p"] = true,
    ["q"] = true,
    ["r"] = true,
    ["s"] = true,
    ["ș"] = true,
    ["t"] = true,
    ["ts"] = true,
    ["tș"] = true,
    ["v"] = true,
    ["w"] = true,
    ["x"] = true,
    ["z"] = true,
    ["þ"] = true,
    ["gj"] = true,
    ["lj"] = true
}
}


local affix = {
local affix = {
["·"] = true, ["-"] = true, ["’"] = true, ["‘"] = true,
    ["·"] = true,
    ["-"] = true,
    ["’"] = true,
    ["‘"] = true
}
}


local nasal = {
local nasal = {
["m"] = true, ["n"] = true, ["ņ"] = true,
    ["m"] = true,
    ["n"] = true,
    ["ņ"] = true
}
}
 
local glide = {
local glide = {
["j"] = true, ["ĵ"] = true, ["w"] = true,
    ["j"] = true,
    ["ĵ"] = true,
["gj"] = true, ["lj"] = true,
    ["w"] = true,
    ["gj"] = true,
    ["lj"] = true
}
 
local sibilant = {
    ["dz"] = true,
    ["dʒ"] = true,
    ["s"] = true,
    ["ș"] = true,
    ["c"] = true,
    ["ķ"] = true,
    ["ts"] = true,
    ["tș"] = true,
    ["z"] = true,
    ["x"] = true
}
 
local Cs = {
    ["d"] = true,
    ["t"] = true,
    ["dz"] = true,
    ["dʒ"] = true,
    ["s"] = true,
    ["ș"] = true,
    ["c"] = true,
    ["ķ"] = true,
    ["ts"] = true,
    ["tș"] = true,
    ["z"] = true,
    ["x"] = true
}
 
local sibilant = {
    ["s"] = true,
    ["ș"] = true,
    ["z"] = true,
    ["x"] = true
}
}


local Cv_fixed = { -- <Cv> combinations that uniformly pronounced without /v/ --
local Cv_fixed = {
["b"] = true,
    -- <Cv> combinations that uniformly pronounced without /v/ --
["f"] = true,
    ["b"] = true,
["p"] = true,
    ["f"] = true,
    ["p"] = true
}
}


local Cr = { -- consonants that can precede /r/ in a medial onset --
local Cr = {
["p"] = true, ["k"] = true, ["b"] = true, ["g"] = true,  
    -- consonants that can precede /r/ in a medial onset --
    ["p"] = true,
["ð"] = true,
    ["k"] = true,
["f"] = true, ["ħ"] = true, ["h"] = true,
    ["b"] = true,
["v"] = true,
    ["g"] = true,
["þ"] = true,  
    ["ð"] = true,
    ["f"] = true,
["t"] = true, ["d"] = true,
    ["ħ"] = true,
    ["h"] = true,
    ["v"] = true,
    ["þ"] = true,
    ["t"] = true,
    ["d"] = true
}
}


local Cl = { -- consonants that can precede /l/ in a medial onset --
local Cl = {
["p"] = true, ["k"] = true, ["b"] = true, ["g"] = true,  
    -- consonants that can precede /l/ in a medial onset --
    ["p"] = true,
["ð"] = true,
    ["k"] = true,
["f"] = true, ["ħ"] = true, ["h"] = true,
    ["b"] = true,
["v"] = true,
    ["g"] = true,
["þ"] = true,  
    ["ð"] = true,
    ["f"] = true,
["s"] = true, ["ș"] = true, ["z"] = true, ["x"] = true,
    ["ħ"] = true,
    ["h"] = true,
    ["v"] = true,
    ["þ"] = true,
    ["s"] = true,
    ["ș"] = true,
    ["z"] = true,
    ["x"] = true
}
}


local Cv = { -- consonants that can precede /v/ in a medial onset --
local Cv = {
["d"] = true, ["ð"] = true, ["dz"] = true,
    -- consonants that can precede /v/ in a medial onset --
["dʒ"] = true, ["g"] = true, ["ħ"] = true, ["h"] = true,
    ["d"] = true,
["k"] = true, ["l"] = true, ["r"] = true, ["s"] = true, ["ș"] = true,
    ["ð"] = true,
["t"] = true, ["c"] = true, ["ķ"] = true, ["ts"] = true, ["tș"] = true,  
    ["dz"] = true,
["z"] = true, ["x"] = true, ["þ"] = true, ["m"] = true, ["n"] = true,
    ["dʒ"] = true,
    ["g"] = true,
    ["ħ"] = true,
    ["h"] = true,
    ["k"] = true,
    ["l"] = true,
    ["r"] = true,
    ["s"] = true,
    ["ș"] = true,
    ["t"] = true,
    ["c"] = true,
    ["ķ"] = true,
    ["ts"] = true,
    ["tș"] = true,
    ["z"] = true,
    ["x"] = true,
    ["þ"] = true,
    ["m"] = true,
    ["n"] = true
}
}


local CN = { -- consonants that can precede nasals in a medial onset -- (non-glide continuants)
local CN = {
["ð"] = true,
    -- consonants that can precede nasals in a medial onset -- (non-glide continuants)
["f"] = true, ["ħ"] = true, ["h"] = true,
    ["ð"] = true,
["l"] = true, ["r"] = true, ["s"] = true, ["ș"] = true,
    ["f"] = true,
["v"] = true,
    ["ħ"] = true,
["z"] = true, ["x"] = true, ["þ"] = true,
    ["h"] = true,
    ["l"] = true,
    ["r"] = true,
    ["s"] = true,
    ["ș"] = true,
    ["v"] = true,
    ["z"] = true,
    ["x"] = true,
    ["þ"] = true
}
}


local irregular = {
local irregular = {
["eurú"] = {"eu", "r", "ú"},
    ["eurú"] = {"eu", "r", "ú"},
["eurov"] = {"eu", "r", "o", "v"},
    ["eurov"] = {"eu", "r", "o", "v"},
["zeus"] = {"z", "eu", "s"},
    ["zeus"] = {"z", "eu", "s"},
["zeud"] = {"z", "eu", "d"},
    ["zeud"] = {"z", "eu", "d"}
}
}


local irregular_grapheme = { "eu", }
local irregular_grapheme = {"eu"}
local irregular_vowel = { "eu", }
local irregular_vowel = {"eu"}


function graphemise(word)
function graphemise(word)
mw.log("————— GRAPHEMISING —————")
    --mw.log("————— GRAPHEMISING —————")


local graphemes = {}
    local graphemes = {}
while mw.ustring.len(word) > 0 do
local limit = mw.ustring.len(word)
for i = 1, limit do
local orig_string = mw.ustring.sub(word, i)
local check_string = mw.ustring.lower(orig_string)
if irregular[check_string] then
mw.log("Irregular spelling recognised: " .. orig_string)
local capitals = {}
-- get capital data --
for j = 1, #orig_string do
if mw.ustring.sub(orig_string, j, j) == mw.ustring.upper(mw.ustring.sub(check_string, j, j)) then
capitals[j] = true
end
end
-- ammend irregular data to match capitals --
local index = 1
for j = 1, #irregular[check_string] do
local new_data = ""
for k = 1, #irregular[check_string][j] do
local letter = mw.ustring.sub(irregular[check_string][j], k, k)
if capitals[index] then
letter = mw.ustring.upper(letter)
end
new_data = new_data .. letter
index = index + 1
end
irregular[check_string][j] = new_data
end
for j = 0, #irregular[check_string] - 1 do
table.insert(graphemes, 1, irregular[check_string][#irregular[check_string] - j])
mw.log("<" .. irregular[check_string][#irregular[check_string] - j] .. "> logged.")
end
word = mw.ustring.sub(word, 1, i - 1)
break
elseif grapheme[check_string] then
table.insert(graphemes, 1, orig_string)
mw.log("<" .. orig_string .. "> logged.")
word = mw.ustring.sub(word, 1, i - 1)
break
elseif mw.ustring.len(check_string) == 1 then
table.insert(graphemes, 1, orig_string)
mw.log("<" .. orig_string .. "> logged.")
word = mw.ustring.sub(word, 1, i - 1)
break
end
end
end
mw.log("String exhausted.")


mw.log("<" .. table.concat(graphemes, "><") .. ">")
    while mw.ustring.len(word) > 0 do
return graphemes
        local limit = mw.ustring.len(word)
 
        for i = 1, limit do
            local orig_string = mw.ustring.sub(word, i)
            local check_string = mw.ustring.lower(orig_string)
 
            if irregular[check_string] then
                --mw.log("Irregular spelling recognised: " .. orig_string)
                local capitals = {}
 
                -- get capital data --
                for j = 1, #orig_string do
                    if mw.ustring.sub(orig_string, j, j) == mw.ustring.upper(mw.ustring.sub(check_string, j, j)) then
                        capitals[j] = true
                    end
                end
 
                -- ammend irregular data to match capitals --
                local index = 1
                for j = 1, #irregular[check_string] do
                    local new_data = ""
 
                    for k = 1, #irregular[check_string][j] do
                        local letter = mw.ustring.sub(irregular[check_string][j], k, k)
 
                        if capitals[index] then
                            letter = mw.ustring.upper(letter)
                        end
 
                        new_data = new_data .. letter
                        index = index + 1
                    end
 
                    irregular[check_string][j] = new_data
                end
 
                for j = 0, #irregular[check_string] - 1 do
                    table.insert(graphemes, 1, irregular[check_string][#irregular[check_string] - j])
                    --mw.log("<" .. irregular[check_string][#irregular[check_string] - j] .. "> logged.")
                end
                word = mw.ustring.sub(word, 1, i - 1)
                break
            elseif grapheme[check_string] then
                table.insert(graphemes, 1, orig_string)
                --mw.log("<" .. orig_string .. "> logged.")
                word = mw.ustring.sub(word, 1, i - 1)
                break
            elseif mw.ustring.len(check_string) == 1 then
                table.insert(graphemes, 1, orig_string)
                --mw.log("<" .. orig_string .. "> logged.")
                word = mw.ustring.sub(word, 1, i - 1)
                break
            end
        end
    end
 
    --mw.log("String exhausted.")
 
    --mw.log("<" .. table.concat(graphemes, "><") .. ">")
    return graphemes
end
end


function syllabify(graphemes)
function syllabify(graphemes)
mw.log("————— SYLLABIFYING —————")
    --mw.log("————— SYLLABIFYING —————")
for _, g in ipairs(irregular_grapheme) do
grapheme[g] = true
end
for _, g in ipairs(irregular_vowel) do
vowel[g] = true
end
local function swap(Pos1, Pos2)
mw.log(graphemes[Pos1] .. " ↔ " .. graphemes[Pos2] .. " at position " .. Pos1)
local temp = graphemes[Pos1]
graphemes[Pos1] = graphemes[Pos2]
graphemes[Pos2] = temp
end


local i = 1
    for _, g in ipairs(irregular_grapheme) do
        grapheme[g] = true
while true do -- add σ before each vowel
    end
    for _, g in ipairs(irregular_vowel) do
if graphemes[i] == nil then break end
        vowel[g] = true
    end
if vowel[mw.ustring.lower(graphemes[i])] then
 
table.insert(graphemes, i, "σ")
    local function swap(Pos1, Pos2)
mw.log("σ inserted in position " .. i)
        --mw.log(graphemes[Pos1] .. " ↔ " .. graphemes[Pos2] .. " at position " .. Pos1)
i = i + 2
        local temp = graphemes[Pos1]
else
        graphemes[Pos1] = graphemes[Pos2]
i = i + 1
        graphemes[Pos2] = temp
end
    end
end
 
    local i = 1
i = #graphemes
 
while true do
    while true do -- add σ before each vowel
local g_current = graphemes[i]
        if graphemes[i] == nil then
local g_after = graphemes[i+1]
            break
local g_prev = graphemes[i-1]
        end
local g_prev2 = graphemes[i-2]
 
local g_prev3 = graphemes[i-3]
        if vowel[mw.ustring.lower(graphemes[i])] then
            table.insert(graphemes, i, "σ")
if g_current then
            --mw.log("σ inserted in position " .. i)
g_current = mw.ustring.lower(g_current)
            i = i + 2
end
        else
if g_after then
            i = i + 1
g_after = mw.ustring.lower(g_after)
        end
end
    end
if g_prev then
 
g_prev = mw.ustring.lower(g_prev)
    i = #graphemes
end
    while true do
if g_prev2 then
        local g_current = graphemes[i]
g_prev2 = mw.ustring.lower(g_prev2)
        local g_after = graphemes[i + 1]
end
        local g_prev = graphemes[i - 1]
if g_prev3 then
        local g_prev2 = graphemes[i - 2]
g_prev3 = mw.ustring.lower(g_prev3)
        local g_prev3 = graphemes[i - 3]
end
 
        if g_current then
-- J ↔ σ
            g_current = mw.ustring.lower(g_current)
if glide[g_prev] and g_current == "σ" and vowel[g_after] then
        end
swap(i, i-1)
        if g_after then
            g_after = mw.ustring.lower(g_after)
-- (J · ) ↔ σ
        end
elseif glide[g_prev2] and affix[g_prev] and g_current == "σ" and vowel[g_after] then
        if g_prev then
swap(i, i-1)
            g_prev = mw.ustring.lower(g_prev)
swap(i-1, i-2)
        end
        if g_prev2 then
-- C ↔ σ (J)
            g_prev2 = mw.ustring.lower(g_prev2)
elseif consonant[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) then
        end
swap(i, i-1)
        if g_prev3 then
            g_prev3 = mw.ustring.lower(g_prev3)
elseif consonant[g_prev2] and affix[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) then
        end
swap(i, i-1)
 
swap(i-1, i-2)
        -- J ↔ σ
        if glide[g_prev] and g_current == "σ" and vowel[g_after] then
-- C Cᵥ ↔ σ v
            -- (J · ) ↔ σ
            swap(i, i - 1)
elseif consonant[g_prev2] and Cv[g_prev] and g_current == "σ" and g_after == "v" then
        elseif glide[g_prev2] and affix[g_prev] and g_current == "σ" and vowel[g_after] then
swap(i, i-1)
            -- C ↔ σ (J)
            swap(i, i - 1)
elseif consonant[g_prev3] and Cv[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then
            swap(i - 1, i - 2)
swap(i, i-1)
        elseif
swap(i-1, i-2)
            consonant[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) and
                not (g_prev == "j" and g_after == "j")
-- Bv ↔ σ v
        then
elseif Cv_fixed[g_prev] and g_current == "σ" and g_after == "v" then
            swap(i, i - 1)
swap(i, i-1)
        elseif
            consonant[g_prev2] and affix[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) and
elseif Cv_fixed[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then
                not (g_prev2 == "j" and g_after == "j")
swap(i, i-1)
        then
swap(i-1, i-2)
            -- C Cᵥ ↔ σ v
            swap(i, i - 1)
-- C Cₙ ↔ σ N
            swap(i - 1, i - 2)
elseif consonant[g_prev2] and CN[g_prev] and g_current == "σ" and nasal[g_after] then
        elseif consonant[g_prev2] and Cv[g_prev] and g_current == "σ" and g_after == "v" then
swap(i, i-1)
            swap(i, i - 1)
        elseif consonant[g_prev3] and Cv[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then
elseif consonant[g_prev3] and CN[g_prev2] and affix[g_prev] and g_current == "σ" and nasal[g_after] then
            -- Bv ↔ σ v
swap(i, i-1)
            swap(i, i - 1)
swap(i-1, i-2)
            swap(i - 1, i - 2)
        elseif Cv_fixed[g_prev] and g_current == "σ" and g_after == "v" then
-- C Cᵣ ↔ σ r
            swap(i, i - 1)
elseif consonant[g_prev2] and Cr[g_prev] and g_current == "σ" and g_after == "r" then
        elseif Cv_fixed[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then
swap(i, i-1)
            -- C Cₙ ↔ σ N
            swap(i, i - 1)
elseif consonant[g_prev3] and Cr[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "r" then
            swap(i - 1, i - 2)
swap(i, i-1)
        elseif consonant[g_prev2] and CN[g_prev] and g_current == "σ" and nasal[g_after] then
swap(i-1, i-2)
            swap(i, i - 1)
        elseif consonant[g_prev3] and CN[g_prev2] and affix[g_prev] and g_current == "σ" and nasal[g_after] then
-- C Cₗ ↔ σ l
            -- C Cᵣ ↔ σ r
elseif consonant[g_prev2] and Cl[g_prev] and g_current == "σ" and g_after == "l" then
            swap(i, i - 1)
swap(i, i-1)
            swap(i - 1, i - 2)
        elseif consonant[g_prev2] and Cr[g_prev] and g_current == "σ" and g_after == "r" then
elseif consonant[g_prev3] and Cl[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "l" then
            swap(i, i - 1)
swap(i, i-1)
        elseif consonant[g_prev3] and Cr[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "r" then
swap(i-1, i-2)
            -- C Cₗ ↔ σ l
elseif g_current == "σ" then
            swap(i, i - 1)
if i == 1 then
            swap(i - 1, i - 2)
table.remove(graphemes, 1)
        elseif consonant[g_prev2] and Cl[g_prev] and g_current == "σ" and g_after == "l" then
else
            swap(i, i - 1)
for j = 1, i-1 do
        elseif consonant[g_prev3] and Cl[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "l" then
if vowel[mw.ustring.lower(graphemes[i - j])] or graphemes[i - j] == "σ" then
            -- Cs ↔ σ S
break
            swap(i, i - 1)
elseif i - j == 1 then
            swap(i - 1, i - 2)
table.remove(graphemes, i)
        elseif Cs[g_prev] and g_current == "σ" and sibilant[g_after] then
break
            swap(i, i - 1)
elseif not affix[graphemes[i-j]] and not consonant[mw.ustring.lower(graphemes[i-j])] then
        elseif Cs[g_prev2] and affix[g_prev] and g_current == "σ" and sibilant[g_after] then
table.remove(graphemes, i)
            swap(i, i - 1)
break
            swap(i - 1, i - 2)
end
        elseif g_current == "σ" then
end
            if i == 1 then
end
                table.remove(graphemes, 1)
            else
end
                for j = 1, i - 1 do
                    if vowel[mw.ustring.lower(graphemes[i - j])] or graphemes[i - j] == "σ" then
if i > 1 then
                        break
i = i - 1
                    elseif i - j == 1 then
else break
                        table.remove(graphemes, i)
end
                        break
end
                    elseif not affix[graphemes[i - j]] and not consonant[mw.ustring.lower(graphemes[i - j])] then
                        table.remove(graphemes, i)
return graphemes
                        break
                    end
                end
            end
        end
 
        if i > 1 then
            i = i - 1
        else
            break
        end
    end
 
    return graphemes
end
end


function export.generate(frame)
    local args = getArgs(frame)
    if args[1] == nil then
        error("Word needed")
    end


function export.generate(frame)
    local outputSyllables = args[1]
local args = getArgs(frame)
 
    outputSyllables = graphemise(outputSyllables)
if args[1] == nil then
    outputSyllables = syllabify(outputSyllables)
error("Word needed")
 
end
    local divider = args[2]
 
local outputSyllables = args[1]
    if divider == nil then
        divider = "|"
outputSyllables = graphemise(outputSyllables)
    end
outputSyllables = syllabify(outputSyllables)
local divider = args[2]
if divider == nil then
divider = "|"
end


outputSyllables = table.concat(outputSyllables)
    outputSyllables = table.concat(outputSyllables)
outputSyllables = mw.ustring.gsub(outputSyllables, "(σ)", divider)
    outputSyllables = mw.ustring.gsub(outputSyllables, "(σ)", divider)


return outputSyllables
    return outputSyllables
end
end


function export.generate_array(frame)
function export.generate_array(frame)
local args = getArgs(frame)
    local args = getArgs(frame)
 
if args[1] == nil then
    if args[1] == nil then
error("Word needed")
        error("Word needed")
end
    end
 
local outputSyllables = args[1]
    local outputSyllables = args[1]
 
outputSyllables = graphemise(outputSyllables)
    outputSyllables = graphemise(outputSyllables)
outputSyllables = syllabify(outputSyllables)
    outputSyllables = syllabify(outputSyllables)
 
return outputSyllables
    return outputSyllables
end
end



Latest revision as of 12:07, 21 July 2024

Used in {{rad-syllables}}. This module produces syllabification from a single input: {{#invoke:rad-syllables|generate|asehņieșe}} → a|seh|ņie|șe

You can specify a custom delimiter in the second parameter: {{#invoke:rad-syllables|generate|asehņieșe|・}} → a・seh・ņie・șe

Capitals will be maintained: {{#invoke:rad-syllables|generate|aSeHņIeȘe}} → a|SeH|ņIe|Șe


local export = {}
local getArgs = require("Module:Arguments").getArgs

-- DATA --

local grapheme = {
    ["a"] = true,
    ["á"] = true,
    ["à"] = true,
    ["â"] = true,
    ["ả"] = true,
    ["ai"] = true,
    ["ài"] = true,
    ["âi"] = true,
    ["ao"] = true,
    ["ào"] = true,
    ["aoi"] = true,
    ["au"] = true,
    ["b"] = true,
    ["c"] = true,
    ["d"] = true,
    ["ð"] = true,
    ["dx"] = true,
    ["dz"] = true,
    ["e"] = true,
    ["é"] = true,
    ["è"] = true,
    ["ea"] = true,
    ["ei"] = true,
    ["f"] = true,
    ["g"] = true,
    ["h"] = true,
    ["ħ"] = true,
    ["i"] = true,
    ["ì"] = true,
    ["í"] = true,
    ["ỉ"] = true,
    ["ie"] = true,
    ["ìe"] = true,
    ["ỉe"] = true,
    ["iea"] = true,
    ["j"] = true,
    ["ĵ"] = true,
    ["k"] = true,
    ["ķ"] = true,
    ["l"] = true,
    ["m"] = true,
    ["n"] = true,
    ["ņ"] = true,
    ["o"] = true,
    ["ó"] = true,
    ["ò"] = true,
    ["ø"] = true,
    ["oa"] = true,
    ["øa"] = true,
    ["øi"] = true,
    ["p"] = true,
    ["q"] = true,
    ["r"] = true,
    ["s"] = true,
    ["ș"] = true,
    ["t"] = true,
    ["ts"] = true,
    ["tș"] = true,
    ["u"] = true,
    ["ú"] = true,
    ["ù"] = true,
    ["û"] = true,
    ["ū"] = true,
    ["ủ"] = true,
    ["ui"] = true,
    ["uo"] = true,
    ["ủo"] = true,
    ["uoa"] = true,
    ["v"] = true,
    ["w"] = true,
    ["x"] = true,
    ["y"] = true,
    ["ỳ"] = true,
    ["ý"] = true,
    ["ỷ"] = true,
    ["ye"] = true,
    ["ỷe"] = true,
    ["yea"] = true,
    ["z"] = true,
    ["þ"] = true,
    ["gj"] = true,
    ["lj"] = true
}

local vowel = {
    ["a"] = true,
    ["á"] = true,
    ["à"] = true,
    ["â"] = true,
    ["ả"] = true,
    ["ai"] = true,
    ["ài"] = true,
    ["âi"] = true,
    ["ao"] = true,
    ["ào"] = true,
    ["aoi"] = true,
    ["au"] = true,
    ["e"] = true,
    ["é"] = true,
    ["è"] = true,
    ["ea"] = true,
    ["ei"] = true,
    ["i"] = true,
    ["ì"] = true,
    ["í"] = true,
    ["ỉ"] = true,
    ["ie"] = true,
    ["ìe"] = true,
    ["ỉe"] = true,
    ["iea"] = true,
    ["o"] = true,
    ["ó"] = true,
    ["ò"] = true,
    ["ø"] = true,
    ["oa"] = true,
    ["øa"] = true,
    ["øi"] = true,
    ["u"] = true,
    ["ú"] = true,
    ["ù"] = true,
    ["û"] = true,
    ["ū"] = true,
    ["ủ"] = true,
    ["ui"] = true,
    ["uo"] = true,
    ["ủo"] = true,
    ["uoa"] = true,
    ["y"] = true,
    ["ỳ"] = true,
    ["ý"] = true,
    ["ỷ"] = true,
    ["ye"] = true,
    ["ỷe"] = true,
    ["yea"] = true
}

local consonant = {
    ["b"] = true,
    ["c"] = true,
    ["d"] = true,
    ["ð"] = true,
    ["dx"] = true,
    ["dz"] = true,
    ["f"] = true,
    ["g"] = true,
    ["h"] = true,
    ["ħ"] = true,
    ["j"] = true,
    ["ĵ"] = true,
    ["k"] = true,
    ["ķ"] = true,
    ["l"] = true,
    ["m"] = true,
    ["n"] = true,
    ["ņ"] = true,
    ["p"] = true,
    ["q"] = true,
    ["r"] = true,
    ["s"] = true,
    ["ș"] = true,
    ["t"] = true,
    ["ts"] = true,
    ["tș"] = true,
    ["v"] = true,
    ["w"] = true,
    ["x"] = true,
    ["z"] = true,
    ["þ"] = true,
    ["gj"] = true,
    ["lj"] = true
}

local affix = {
    ["·"] = true,
    ["-"] = true,
    ["’"] = true,
    ["‘"] = true
}

local nasal = {
    ["m"] = true,
    ["n"] = true,
    ["ņ"] = true
}

local glide = {
    ["j"] = true,
    ["ĵ"] = true,
    ["w"] = true,
    ["gj"] = true,
    ["lj"] = true
}

local sibilant = {
    ["dz"] = true,
    ["dʒ"] = true,
    ["s"] = true,
    ["ș"] = true,
    ["c"] = true,
    ["ķ"] = true,
    ["ts"] = true,
    ["tș"] = true,
    ["z"] = true,
    ["x"] = true
}

local Cs = {
    ["d"] = true,
    ["t"] = true,
    ["dz"] = true,
    ["dʒ"] = true,
    ["s"] = true,
    ["ș"] = true,
    ["c"] = true,
    ["ķ"] = true,
    ["ts"] = true,
    ["tș"] = true,
    ["z"] = true,
    ["x"] = true
}

local sibilant = {
    ["s"] = true,
    ["ș"] = true,
    ["z"] = true,
    ["x"] = true
}

local Cv_fixed = {
    -- <Cv> combinations that uniformly pronounced without /v/ --
    ["b"] = true,
    ["f"] = true,
    ["p"] = true
}

local Cr = {
    -- consonants that can precede /r/ in a medial onset --
    ["p"] = true,
    ["k"] = true,
    ["b"] = true,
    ["g"] = true,
    ["ð"] = true,
    ["f"] = true,
    ["ħ"] = true,
    ["h"] = true,
    ["v"] = true,
    ["þ"] = true,
    ["t"] = true,
    ["d"] = true
}

local Cl = {
    -- consonants that can precede /l/ in a medial onset --
    ["p"] = true,
    ["k"] = true,
    ["b"] = true,
    ["g"] = true,
    ["ð"] = true,
    ["f"] = true,
    ["ħ"] = true,
    ["h"] = true,
    ["v"] = true,
    ["þ"] = true,
    ["s"] = true,
    ["ș"] = true,
    ["z"] = true,
    ["x"] = true
}

local Cv = {
    -- consonants that can precede /v/ in a medial onset --
    ["d"] = true,
    ["ð"] = true,
    ["dz"] = true,
    ["dʒ"] = true,
    ["g"] = true,
    ["ħ"] = true,
    ["h"] = true,
    ["k"] = true,
    ["l"] = true,
    ["r"] = true,
    ["s"] = true,
    ["ș"] = true,
    ["t"] = true,
    ["c"] = true,
    ["ķ"] = true,
    ["ts"] = true,
    ["tș"] = true,
    ["z"] = true,
    ["x"] = true,
    ["þ"] = true,
    ["m"] = true,
    ["n"] = true
}

local CN = {
    -- consonants that can precede nasals in a medial onset -- (non-glide continuants)
    ["ð"] = true,
    ["f"] = true,
    ["ħ"] = true,
    ["h"] = true,
    ["l"] = true,
    ["r"] = true,
    ["s"] = true,
    ["ș"] = true,
    ["v"] = true,
    ["z"] = true,
    ["x"] = true,
    ["þ"] = true
}

local irregular = {
    ["eurú"] = {"eu", "r", "ú"},
    ["eurov"] = {"eu", "r", "o", "v"},
    ["zeus"] = {"z", "eu", "s"},
    ["zeud"] = {"z", "eu", "d"}
}

local irregular_grapheme = {"eu"}
local irregular_vowel = {"eu"}

function graphemise(word)
    --mw.log("————— GRAPHEMISING —————")

    local graphemes = {}

    while mw.ustring.len(word) > 0 do
        local limit = mw.ustring.len(word)

        for i = 1, limit do
            local orig_string = mw.ustring.sub(word, i)
            local check_string = mw.ustring.lower(orig_string)

            if irregular[check_string] then
                --mw.log("Irregular spelling recognised: " .. orig_string)
                local capitals = {}

                -- get capital data --
                for j = 1, #orig_string do
                    if mw.ustring.sub(orig_string, j, j) == mw.ustring.upper(mw.ustring.sub(check_string, j, j)) then
                        capitals[j] = true
                    end
                end

                -- ammend irregular data to match capitals --
                local index = 1
                for j = 1, #irregular[check_string] do
                    local new_data = ""

                    for k = 1, #irregular[check_string][j] do
                        local letter = mw.ustring.sub(irregular[check_string][j], k, k)

                        if capitals[index] then
                            letter = mw.ustring.upper(letter)
                        end

                        new_data = new_data .. letter
                        index = index + 1
                    end

                    irregular[check_string][j] = new_data
                end

                for j = 0, #irregular[check_string] - 1 do
                    table.insert(graphemes, 1, irregular[check_string][#irregular[check_string] - j])
                    --mw.log("<" .. irregular[check_string][#irregular[check_string] - j] .. "> logged.")
                end
                word = mw.ustring.sub(word, 1, i - 1)
                break
            elseif grapheme[check_string] then
                table.insert(graphemes, 1, orig_string)
                --mw.log("<" .. orig_string .. "> logged.")
                word = mw.ustring.sub(word, 1, i - 1)
                break
            elseif mw.ustring.len(check_string) == 1 then
                table.insert(graphemes, 1, orig_string)
                --mw.log("<" .. orig_string .. "> logged.")
                word = mw.ustring.sub(word, 1, i - 1)
                break
            end
        end
    end

    --mw.log("String exhausted.")

    --mw.log("<" .. table.concat(graphemes, "><") .. ">")
    return graphemes
end

function syllabify(graphemes)
    --mw.log("————— SYLLABIFYING —————")

    for _, g in ipairs(irregular_grapheme) do
        grapheme[g] = true
    end
    for _, g in ipairs(irregular_vowel) do
        vowel[g] = true
    end

    local function swap(Pos1, Pos2)
        --mw.log(graphemes[Pos1] .. " ↔ " .. graphemes[Pos2] .. " at position " .. Pos1)
        local temp = graphemes[Pos1]
        graphemes[Pos1] = graphemes[Pos2]
        graphemes[Pos2] = temp
    end

    local i = 1

    while true do -- add σ before each vowel
        if graphemes[i] == nil then
            break
        end

        if vowel[mw.ustring.lower(graphemes[i])] then
            table.insert(graphemes, i, "σ")
            --mw.log("σ inserted in position " .. i)
            i = i + 2
        else
            i = i + 1
        end
    end

    i = #graphemes
    while true do
        local g_current = graphemes[i]
        local g_after = graphemes[i + 1]
        local g_prev = graphemes[i - 1]
        local g_prev2 = graphemes[i - 2]
        local g_prev3 = graphemes[i - 3]

        if g_current then
            g_current = mw.ustring.lower(g_current)
        end
        if g_after then
            g_after = mw.ustring.lower(g_after)
        end
        if g_prev then
            g_prev = mw.ustring.lower(g_prev)
        end
        if g_prev2 then
            g_prev2 = mw.ustring.lower(g_prev2)
        end
        if g_prev3 then
            g_prev3 = mw.ustring.lower(g_prev3)
        end

        -- J ↔ σ
        if glide[g_prev] and g_current == "σ" and vowel[g_after] then
            -- (J · ) ↔ σ
            swap(i, i - 1)
        elseif glide[g_prev2] and affix[g_prev] and g_current == "σ" and vowel[g_after] then
            -- C ↔ σ (J)
            swap(i, i - 1)
            swap(i - 1, i - 2)
        elseif
            consonant[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) and
                not (g_prev == "j" and g_after == "j")
         then
            swap(i, i - 1)
        elseif
            consonant[g_prev2] and affix[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) and
                not (g_prev2 == "j" and g_after == "j")
         then
            -- C Cᵥ ↔ σ v
            swap(i, i - 1)
            swap(i - 1, i - 2)
        elseif consonant[g_prev2] and Cv[g_prev] and g_current == "σ" and g_after == "v" then
            swap(i, i - 1)
        elseif consonant[g_prev3] and Cv[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then
            -- Bv ↔ σ v
            swap(i, i - 1)
            swap(i - 1, i - 2)
        elseif Cv_fixed[g_prev] and g_current == "σ" and g_after == "v" then
            swap(i, i - 1)
        elseif Cv_fixed[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then
            -- C Cₙ ↔ σ N
            swap(i, i - 1)
            swap(i - 1, i - 2)
        elseif consonant[g_prev2] and CN[g_prev] and g_current == "σ" and nasal[g_after] then
            swap(i, i - 1)
        elseif consonant[g_prev3] and CN[g_prev2] and affix[g_prev] and g_current == "σ" and nasal[g_after] then
            -- C Cᵣ ↔ σ r
            swap(i, i - 1)
            swap(i - 1, i - 2)
        elseif consonant[g_prev2] and Cr[g_prev] and g_current == "σ" and g_after == "r" then
            swap(i, i - 1)
        elseif consonant[g_prev3] and Cr[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "r" then
            -- C Cₗ ↔ σ l
            swap(i, i - 1)
            swap(i - 1, i - 2)
        elseif consonant[g_prev2] and Cl[g_prev] and g_current == "σ" and g_after == "l" then
            swap(i, i - 1)
        elseif consonant[g_prev3] and Cl[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "l" then
            -- Cs ↔ σ S
            swap(i, i - 1)
            swap(i - 1, i - 2)
        elseif Cs[g_prev] and g_current == "σ" and sibilant[g_after] then
            swap(i, i - 1)
        elseif Cs[g_prev2] and affix[g_prev] and g_current == "σ" and sibilant[g_after] then
            swap(i, i - 1)
            swap(i - 1, i - 2)
        elseif g_current == "σ" then
            if i == 1 then
                table.remove(graphemes, 1)
            else
                for j = 1, i - 1 do
                    if vowel[mw.ustring.lower(graphemes[i - j])] or graphemes[i - j] == "σ" then
                        break
                    elseif i - j == 1 then
                        table.remove(graphemes, i)
                        break
                    elseif not affix[graphemes[i - j]] and not consonant[mw.ustring.lower(graphemes[i - j])] then
                        table.remove(graphemes, i)
                        break
                    end
                end
            end
        end

        if i > 1 then
            i = i - 1
        else
            break
        end
    end

    return graphemes
end

function export.generate(frame)
    local args = getArgs(frame)

    if args[1] == nil then
        error("Word needed")
    end

    local outputSyllables = args[1]

    outputSyllables = graphemise(outputSyllables)
    outputSyllables = syllabify(outputSyllables)

    local divider = args[2]

    if divider == nil then
        divider = "|"
    end

    outputSyllables = table.concat(outputSyllables)
    outputSyllables = mw.ustring.gsub(outputSyllables, "(σ)", divider)

    return outputSyllables
end

function export.generate_array(frame)
    local args = getArgs(frame)

    if args[1] == nil then
        error("Word needed")
    end

    local outputSyllables = args[1]

    outputSyllables = graphemise(outputSyllables)
    outputSyllables = syllabify(outputSyllables)

    return outputSyllables
end

return export

--[[
Debug console test string:
=p.generate(mw.getCurrentFrame():newChild{title="whatever",args={"rjaovs"}})
]]