Module:rad-syllables: Difference between revisions
Jump to navigation
Jump to search
TheNightAvl (talk | contribs) (Created page with "local export = {} local getArgs = require('Module:Arguments').getArgs -- DATA -- local grapheme = { ["a"] = true, ["á"] = true, ["à"] = true, ["â"] = true, ["ả"] = true, ["ai"] = true, ["âi"] = true, ["ao"] = true, ["ào"] = true, ["aoi"] = true, ["au"] = true, ["b"] = true, ["c"] = true, ["d"] = true, ["ð"] = true, ["dx"] = true, ["dz"] = true, ["e"] = true, ["é"] = true, ["è"] = true, ["ea"] = true, ["ei"] = true, ["f"] = true, ["g"] = true, ["h"] = true, [...") |
(No difference)
|
Revision as of 23:25, 20 August 2023
Used in {{rad-syllables}}
. This module produces syllabification from a single input:
{{#invoke:rad-syllables|generate|asehņieșe}}
→ a|seh|ņie|șe
You can specify a custom delimiter in the second parameter:
{{#invoke:rad-syllables|generate|asehņieșe|・}}
→ a|seh|ņie|șe
Capitals will be maintained:
{{#invoke:rad-syllables|generate|aSeHņIeȘe}}
→ aSeHņIeȘe
local export = {}
local getArgs = require('Module:Arguments').getArgs
-- DATA --
local grapheme = {
["a"] = true, ["á"] = true, ["à"] = true, ["â"] = true, ["ả"] = true, ["ai"] = true, ["âi"] = true, ["ao"] = true, ["ào"] = true, ["aoi"] = true, ["au"] = true, ["b"] = true, ["c"] = true, ["d"] = true, ["ð"] = true, ["dx"] = true, ["dz"] = true, ["e"] = true, ["é"] = true, ["è"] = true, ["ea"] = true, ["ei"] = true, ["f"] = true, ["g"] = true, ["h"] = true, ["ħ"] = true, ["i"] = true, ["í"] = true, ["ỉ"] = true, ["ie"] = true, ["ìe"] = true, ["ỉe"] = true, ["iea"] = true, ["j"] = true, ["ĵ"] = true, ["k"] = true, ["ķ"] = true, ["l"] = true, ["m"] = true, ["n"] = true, ["ņ"] = true, ["o"] = true, ["ó"] = true, ["ò"] = true, ["ø"] = true, ["oa"] = true, ["øa"] = true, ["øi"] = true, ["p"] = true, ["q"] = true, ["r"] = true, ["s"] = true, ["ș"] = true, ["t"] = true, ["ts"] = true, ["tș"] = true, ["u"] = true, ["ú"] = true, ["ù"] = true, ["û"] = true, ["ū"] = true, ["ủ"] = true, ["ui"] = true, ["uo"] = true, ["ủo"] = true, ["uoa"] = true, ["v"] = true, ["w"] = true, ["x"] = true, ["y"] = true, ["ỳ"] = true, ["ý"] = true, ["ỷ"] = true, ["ye"] = true, ["ỷe"] = true, ["yea"] = true, ["z"] = true, ["þ"] = true,
}
local vowel = {
["a"] = true, ["á"] = true, ["à"] = true, ["â"] = true, ["ả"] = true, ["ai"] = true, ["âi"] = true, ["ao"] = true, ["ào"] = true, ["aoi"] = true, ["au"] = true, ["e"] = true, ["é"] = true, ["è"] = true, ["ea"] = true, ["ei"] = true, ["i"] = true, ["í"] = true, ["ỉ"] = true, ["ie"] = true, ["ìe"] = true, ["ỉe"] = true, ["iea"] = true, ["o"] = true, ["ó"] = true, ["ò"] = true, ["ø"] = true, ["oa"] = true, ["øa"] = true, ["øi"] = true, ["u"] = true, ["ú"] = true, ["ù"] = true, ["û"] = true, ["ū"] = true, ["ủ"] = true, ["ui"] = true, ["uo"] = true, ["ủo"] = true, ["uoa"] = true, ["y"] = true, ["ỳ"] = true, ["ý"] = true, ["ỷ"] = true, ["ye"] = true, ["ỷe"] = true, ["yea"] = true,
}
local consonant = {
["b"] = true, ["c"] = true, ["d"] = true, ["ð"] = true, ["dx"] = true, ["dz"] = true, ["f"] = true, ["g"] = true, ["h"] = true, ["ħ"] = true, ["j"] = true, ["ĵ"] = true, ["k"] = true, ["ķ"] = true, ["l"] = true, ["m"] = true, ["n"] = true, ["ņ"] = true, ["p"] = true, ["q"] = true, ["r"] = true, ["s"] = true, ["ș"] = true, ["t"] = true, ["ts"] = true, ["tș"] = true, ["v"] = true, ["w"] = true, ["x"] = true, ["z"] = true, ["þ"] = true,
}
local affix = {
["·"] = true, ["-"] = true,
}
local nasal = {
["m"] = true, ["n"] = true, ["ņ"] = true,
}
local glide = {
["j"] = true, ["ĵ"] = true, ["w"] = true,
}
local Cv_fixed = { -- <Cv> combinations that uniformly pronounced without /v/ --
["b"] = true,
["f"] = true,
["p"] = true,
}
local Cr = { -- consonants that can precede /r/ in a medial onset --
["p"] = true, ["k"] = true, ["b"] = true, ["g"] = true,
["ð"] = true,
["f"] = true, ["ħ"] = true, ["h"] = true,
["v"] = true,
["þ"] = true,
["t"] = true, ["d"] = true,
}
local Cl = { -- consonants that can precede /l/ in a medial onset --
["p"] = true, ["k"] = true, ["b"] = true, ["g"] = true,
["ð"] = true,
["f"] = true, ["ħ"] = true, ["h"] = true,
["v"] = true,
["þ"] = true,
["s"] = true, ["ș"] = true, ["z"] = true, ["x"] = true,
}
local Cv = { -- consonants that can precede /v/ in a medial onset --
["d"] = true, ["ð"] = true, ["dz"] = true,
["dʒ"] = true, ["g"] = true, ["ħ"] = true, ["h"] = true,
["k"] = true, ["l"] = true, ["r"] = true, ["s"] = true, ["ș"] = true,
["t"] = true, ["c"] = true, ["ķ"] = true, ["ts"] = true, ["tș"] = true,
["z"] = true, ["x"] = true, ["þ"] = true, ["m"] = true, ["n"] = true,
}
local CN = { -- consonants that can precede nasals in a medial onset -- (non-glide continuants)
["ð"] = true,
["f"] = true, ["ħ"] = true, ["h"] = true,
["l"] = true, ["r"] = true, ["s"] = true, ["ș"] = true,
["v"] = true,
["z"] = true, ["x"] = true, ["þ"] = true,
}
local irregular = {
["eurú"] = {"eu", "r", "ú"},
["euro"] = {"eu", "r", "o", "v"},
}
local irregular_grapheme = {"eu"}
local irregular_vowel = {"eu"}
function graphemise(word)
mw.log("————— GRAPHEMISING —————")
local graphemes = {}
while mw.ustring.len(word) > 0 do
local limit = mw.ustring.len(word)
for i = 1, limit do
local check_string = mw.ustring.sub(word, i)
if irregular[check_string] then
mw.log("Irregular spelling recognised: " .. check_string)
for j = 0, #irregular[check_string] - 1 do
table.insert(graphemes, 1, irregular[check_string][#irregular[check_string] - j])
mw.log("<" .. irregular[check_string][#irregular[check_string] - j] .. "> logged.")
end
word = mw.ustring.sub(word, 1, i - 1)
break
elseif grapheme[check_string] then
table.insert(graphemes, 1, check_string)
mw.log("<" .. check_string .. "> logged.")
word = mw.ustring.sub(word, 1, i - 1)
break
elseif mw.ustring.len(check_string) == 1 then
table.insert(graphemes, 1, check_string)
mw.log("<" .. check_string .. "> logged.")
word = mw.ustring.sub(word, 1, i - 1)
break
end
end
end
mw.log("String exhausted.")
mw.log("<" .. table.concat(graphemes, "><") .. ">")
return graphemes
end
function syllabify(graphemes)
mw.log("————— SYLLABIFYING —————")
table.insert(grapheme, irregular_grapheme)
table.insert(vowel, irregular_vowel)
local function swap(Pos1, Pos2)
mw.log(graphemes[Pos1] .. " ↔ " .. graphemes[Pos2] .. " at position " .. Pos1)
local tmp = graphemes[Pos1]
graphemes[Pos1] = graphemes[Pos2]
graphemes[Pos2] = tmp
end
local i = 1
while true do -- add σ before each vowel
if graphemes[i] == nil then break end
if vowel[graphemes[i]] then
table.insert(graphemes, i, "σ")
mw.log("σ inserted in position " .. i)
i = i + 2
else
i = i + 1
end
end
i = #graphemes
while true do
local g_current = graphemes[i]
local g_after = graphemes[i+1]
local g_prev = graphemes[i-1]
local g_prev2 = graphemes[i-2]
local g_prev3 = graphemes[i-3]
-- J ↔ σ
if glide[g_prev] and g_current == "σ" and vowel[g_after] then
swap(i, i-1)
-- (J · ) ↔ σ
elseif glide[g_prev2] and affix[g_prev] and g_current == "σ" and vowel[g_after] then
swap(i, i-1)
swap(i-1, i-2)
-- C ↔ σ (J)
elseif consonant[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) then
swap(i, i-1)
elseif consonant[g_prev2] and affix[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) then
swap(i, i-1)
swap(i-1, i-2)
-- C Cᵥ ↔ σ v
elseif consonant[g_prev2] and Cv[g_prev] and g_current == "σ" and g_after == "v" then
swap(i, i-1)
elseif consonant[g_prev3] and Cv[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then
swap(i, i-1)
swap(i-1, i-2)
-- Bv ↔ σ v
elseif Cv_fixed[g_prev] and g_current == "σ" and g_after == "v" then
swap(i, i-1)
elseif Cv_fixed[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then
swap(i, i-1)
swap(i-1, i-2)
-- C Cₙ ↔ σ N
elseif consonant[g_prev2] and CN[g_prev] and g_current == "σ" and nasal[g_after] then
swap(i, i-1)
elseif consonant[g_prev3] and CN[g_prev2] and affix[g_prev] and g_current == "σ" and nasal[g_after] then
swap(i, i-1)
swap(i-1, i-2)
-- C Cᵣ ↔ σ r
elseif consonant[g_prev2] and Cr[g_prev] and g_current == "σ" and g_after == "r" then
swap(i, i-1)
elseif consonant[g_prev3] and Cr[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "r" then
swap(i, i-1)
swap(i-1, i-2)
-- C Cₗ ↔ σ l
elseif consonant[g_prev2] and Cl[g_prev] and g_current == "σ" and g_after == "l" then
swap(i, i-1)
elseif consonant[g_prev3] and Cl[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "l" then
swap(i, i-1)
swap(i-1, i-2)
elseif g_current == "σ" then
if i == 1 then
table.remove(graphemes, 1)
else
for j = 1, i-1 do
if vowel[graphemes[i - j]] or graphemes[i - j] == "σ" then
break
elseif i - j == 1 then
table.remove(graphemes, i)
break
elseif not affix[graphemes[i-j]] and not consonant[graphemes[i-j]] then
table.remove(graphemes, i)
break
end
end
end
end
if i > 1 then
i = i - 1
else break
end
end
graphemes = table.concat(graphemes)
graphemes = mw.ustring.gsub(graphemes, "(σ)", "|")
return graphemes
end
function export.generate(frame)
local args = getArgs(frame)
if args[1] == nil then
error("Word needed.")
end
local outputSyllables = args[1]
local parameters = {}
local p = 2
mw.log("——— Parameters ———")
while args[p] do
parameters[args[p]] = true
mw.log(args[p] .. " = true")
p = p + 1
end
outputSyllables = graphemise(outputSyllables)
outputSyllables = syllabify(outputSyllables)
return outputSyllables
end
return export
--[[
Debug console test string:
=p.generate(mw.getCurrentFrame():newChild{title="whatever",args={"rjaovs"}})
]]