|
|
Line 1: |
Line 1: |
| local export = {} | | local export = {} |
| local getArgs = require('Module:Arguments').getArgs
| |
|
| |
|
| -- DATA --
| | local m_languages = require("Module:languages") |
| | | local m_parameters = require("Module:parameters") |
| local grapheme = { | | local params = { |
| ["a"] = true, ["á"] = true, ["à"] = true, ["â"] = true, ["ả"] = true, ["ai"] = true, ["âi"] = true, ["ao"] = true, ["ào"] = true, ["aoi"] = true, ["au"] = true, ["b"] = true, ["c"] = true, ["d"] = true, ["ð"] = true, ["dx"] = true, ["dz"] = true, ["e"] = true, ["é"] = true, ["è"] = true, ["ea"] = true, ["ei"] = true, ["f"] = true, ["g"] = true, ["h"] = true, ["ħ"] = true, ["i"] = true, ["í"] = true, ["ỉ"] = true, ["ie"] = true, ["ìe"] = true, ["ỉe"] = true, ["iea"] = true, ["j"] = true, ["ĵ"] = true, ["k"] = true, ["ķ"] = true, ["l"] = true, ["m"] = true, ["n"] = true, ["ņ"] = true, ["o"] = true, ["ó"] = true, ["ò"] = true, ["ø"] = true, ["oa"] = true, ["øa"] = true, ["øi"] = true, ["p"] = true, ["q"] = true, ["r"] = true, ["s"] = true, ["ș"] = true, ["t"] = true, ["ts"] = true, ["tș"] = true, ["u"] = true, ["ú"] = true, ["ù"] = true, ["û"] = true, ["ū"] = true, ["ủ"] = true, ["ui"] = true, ["uo"] = true, ["ủo"] = true, ["uoa"] = true, ["v"] = true, ["w"] = true, ["x"] = true, ["y"] = true, ["ỳ"] = true, ["ý"] = true, ["ỷ"] = true, ["ye"] = true, ["ỷe"] = true, ["yea"] = true, ["z"] = true, ["þ"] = true,
| | [1] = {required = true}, |
|
| | [2] = {list = true, allow_empty = true}, |
| ["gj"] = true, ["lj"] = true,
| | ["caption"] = {}, |
| }
| |
| | |
| local vowel = { | |
| ["a"] = true, ["á"] = true, ["à"] = true, ["â"] = true, ["ả"] = true, ["ai"] = true, ["âi"] = true, ["ao"] = true, ["ào"] = true, ["aoi"] = true, ["au"] = true, ["e"] = true, ["é"] = true, ["è"] = true, ["ea"] = true, ["ei"] = true, ["i"] = true, ["í"] = true, ["ỉ"] = true, ["ie"] = true, ["ìe"] = true, ["ỉe"] = true, ["iea"] = true, ["o"] = true, ["ó"] = true, ["ò"] = true, ["ø"] = true, ["oa"] = true, ["øa"] = true, ["øi"] = true, ["u"] = true, ["ú"] = true, ["ù"] = true, ["û"] = true, ["ū"] = true, ["ủ"] = true, ["ui"] = true, ["uo"] = true, ["ủo"] = true, ["uoa"] = true, ["y"] = true, ["ỳ"] = true, ["ý"] = true, ["ỷ"] = true, ["ye"] = true, ["ỷe"] = true, ["yea"] = true,
| |
| }
| |
| | |
| local consonant = { | |
| ["b"] = true, ["c"] = true, ["d"] = true, ["ð"] = true, ["dx"] = true, ["dz"] = true, ["f"] = true, ["g"] = true, ["h"] = true, ["ħ"] = true, ["j"] = true, ["ĵ"] = true, ["k"] = true, ["ķ"] = true, ["l"] = true, ["m"] = true, ["n"] = true, ["ņ"] = true, ["p"] = true, ["q"] = true, ["r"] = true, ["s"] = true, ["ș"] = true, ["t"] = true, ["ts"] = true, ["tș"] = true, ["v"] = true, ["w"] = true, ["x"] = true, ["z"] = true, ["þ"] = true, | |
|
| |
| ["gj"] = true, ["lj"] = true,
| |
| }
| |
| | |
| local affix = {
| |
| ["·"] = true, ["-"] = true, ["’"] = true, ["‘"] = true,
| |
| } | |
| | |
| local nasal = {
| |
| ["m"] = true, ["n"] = true, ["ņ"] = true,
| |
| }
| |
|
| |
| local glide = {
| |
| ["j"] = true, ["ĵ"] = true, ["w"] = true,
| |
|
| |
| ["gj"] = true, ["lj"] = true, | |
| }
| |
| | |
| local Cv_fixed = { -- <Cv> combinations that uniformly pronounced without /v/ --
| |
| ["b"] = true,
| |
| ["f"] = true,
| |
| ["p"] = true,
| |
| } | |
| | |
| local Cr = { -- consonants that can precede /r/ in a medial onset --
| |
| ["p"] = true, ["k"] = true, ["b"] = true, ["g"] = true,
| |
|
| |
| ["ð"] = true,
| |
| ["f"] = true, ["ħ"] = true, ["h"] = true,
| |
| ["v"] = true,
| |
| ["þ"] = true, | |
|
| |
| ["t"] = true, ["d"] = true,
| |
| }
| |
| | |
| local Cl = { -- consonants that can precede /l/ in a medial onset --
| |
| ["p"] = true, ["k"] = true, ["b"] = true, ["g"] = true,
| |
|
| |
| ["ð"] = true,
| |
| ["f"] = true, ["ħ"] = true, ["h"] = true,
| |
| ["v"] = true,
| |
| ["þ"] = true,
| |
|
| |
| ["s"] = true, ["ș"] = true, ["z"] = true, ["x"] = true,
| |
|
| |
| }
| |
| | |
| local Cv = { -- consonants that can precede /v/ in a medial onset --
| |
| ["d"] = true, ["ð"] = true, ["dz"] = true,
| |
| ["dʒ"] = true, ["g"] = true, ["ħ"] = true, ["h"] = true,
| |
| ["k"] = true, ["l"] = true, ["r"] = true, ["s"] = true, ["ș"] = true,
| |
| ["t"] = true, ["c"] = true, ["ķ"] = true, ["ts"] = true, ["tș"] = true,
| |
| ["z"] = true, ["x"] = true, ["þ"] = true, ["m"] = true, ["n"] = true,
| |
| }
| |
| | |
| local CN = { -- consonants that can precede nasals in a medial onset -- (non-glide continuants)
| |
| ["ð"] = true,
| |
| ["f"] = true, ["ħ"] = true, ["h"] = true,
| |
| ["l"] = true, ["r"] = true, ["s"] = true, ["ș"] = true,
| |
| ["v"] = true,
| |
| ["z"] = true, ["x"] = true, ["þ"] = true,
| |
| }
| |
| | |
| local irregular = {
| |
| ["eurú"] = {"eu", "r", "ú"},
| |
| ["eurov"] = {"eu", "r", "o", "v"},
| |
| } | | } |
|
| |
|
| local irregular_grapheme = { "eu", }
| | function export.make_syllables(language, caption, list) |
| local irregular_vowel = { "eu", }
| | local syllables = "" |
| | |
| function graphemise(word) | |
| mw.log("————— GRAPHEMISING —————")
| |
| | |
| local graphemes = {}
| |
|
| |
| while mw.ustring.len(word) > 0 do
| |
|
| |
| local limit = mw.ustring.len(word)
| |
|
| |
| for i = 1, limit do
| |
| local orig_string = mw.ustring.sub(word, i)
| |
| local check_string = mw.ustring.lower(orig_string)
| |
|
| |
| if irregular[check_string] then
| |
| mw.log("Irregular spelling recognised: " .. orig_string)
| |
| local capitals = {}
| |
|
| |
| -- get capital data --
| |
| for j = 1, #orig_string do
| |
| if mw.ustring.sub(orig_string, j, j) == mw.ustring.upper(mw.ustring.sub(check_string, j, j)) then
| |
| capitals[j] = true
| |
| end
| |
| end
| |
|
| |
| -- ammend irregular data to match capitals --
| |
| local index = 1
| |
| for j = 1, #irregular[check_string] do
| |
| local new_data = ""
| |
|
| |
| for k = 1, #irregular[check_string][j] do
| |
| local letter = mw.ustring.sub(irregular[check_string][j], k, k)
| |
|
| |
| if capitals[index] then
| |
| letter = mw.ustring.upper(letter)
| |
| end
| |
|
| |
| new_data = new_data .. letter
| |
| index = index + 1
| |
| end
| |
|
| |
| irregular[check_string][j] = new_data
| |
|
| |
| end
| |
|
| |
| for j = 0, #irregular[check_string] - 1 do
| |
| table.insert(graphemes, 1, irregular[check_string][#irregular[check_string] - j])
| |
| mw.log("<" .. irregular[check_string][#irregular[check_string] - j] .. "> logged.")
| |
| end
| |
| word = mw.ustring.sub(word, 1, i - 1)
| |
| break
| |
| elseif grapheme[check_string] then
| |
| table.insert(graphemes, 1, orig_string)
| |
| mw.log("<" .. orig_string .. "> logged.")
| |
| word = mw.ustring.sub(word, 1, i - 1)
| |
| break
| |
| elseif mw.ustring.len(check_string) == 1 then
| |
| table.insert(graphemes, 1, orig_string)
| |
| mw.log("<" .. orig_string .. "> logged.")
| |
| word = mw.ustring.sub(word, 1, i - 1)
| |
| break
| |
| end
| |
|
| |
| end
| |
|
| |
| end
| |
|
| |
| mw.log("String exhausted.")
| |
| | |
| mw.log("<" .. table.concat(graphemes, "><") .. ">")
| |
| return graphemes
| |
| end
| |
| | |
| function syllabify(graphemes)
| |
| mw.log("————— SYLLABIFYING —————")
| |
|
| |
| for _, g in ipairs(irregular_grapheme) do
| |
| grapheme[g] = true
| |
| end
| |
| for _, g in ipairs(irregular_vowel) do
| |
| vowel[g] = true
| |
| end
| |
|
| |
| local function swap(Pos1, Pos2)
| |
| mw.log(graphemes[Pos1] .. " ↔ " .. graphemes[Pos2] .. " at position " .. Pos1)
| |
| local temp = graphemes[Pos1]
| |
| graphemes[Pos1] = graphemes[Pos2]
| |
| graphemes[Pos2] = temp
| |
| end
| |
| | |
| local i = 1 | | local i = 1 |
| | | local n_i = 1 |
| while true do -- add σ before each vowel | | while list[i] do |
|
| | if list[i] == "" then |
| if graphemes[i] == nil then break end | | syllables = syllables .. ", " |
|
| | n_i = 1 |
| if vowel[mw.ustring.lower(graphemes[i])] then
| |
| table.insert(graphemes, i, "σ") | |
| mw.log("σ inserted in position " .. i)
| |
| i = i + 2 | |
| else | | else |
| i = i + 1 | | if n_i > 1 then syllables = syllables .. "·" end |
| end
| | syllables = syllables .. list[i] |
| end
| | n_i = n_i + 1 |
|
| |
| i = #graphemes
| |
| while true do
| |
| local g_current = graphemes[i]
| |
| local g_after = graphemes[i+1]
| |
| local g_prev = graphemes[i-1]
| |
| local g_prev2 = graphemes[i-2]
| |
| local g_prev3 = graphemes[i-3]
| |
|
| |
| if g_current then
| |
| g_current = mw.ustring.lower(g_current)
| |
| end
| |
| if g_after then
| |
| g_after = mw.ustring.lower(g_after)
| |
| end
| |
| if g_prev then
| |
| g_prev = mw.ustring.lower(g_prev)
| |
| end
| |
| if g_prev2 then
| |
| g_prev2 = mw.ustring.lower(g_prev2)
| |
| end
| |
| if g_prev3 then
| |
| g_prev3 = mw.ustring.lower(g_prev3)
| |
| end
| |
|
| |
| -- J ↔ σ
| |
| if glide[g_prev] and g_current == "σ" and vowel[g_after] then
| |
| swap(i, i-1)
| |
|
| |
| -- (J · ) ↔ σ
| |
| elseif glide[g_prev2] and affix[g_prev] and g_current == "σ" and vowel[g_after] then
| |
| swap(i, i-1) | |
| swap(i-1, i-2)
| |
|
| |
| -- C ↔ σ (J)
| |
| elseif consonant[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) then
| |
| swap(i, i-1)
| |
|
| |
| elseif consonant[g_prev2] and affix[g_prev] and g_current == "σ" and (vowel[g_after] or glide[g_after]) then
| |
| swap(i, i-1)
| |
| swap(i-1, i-2)
| |
|
| |
| -- C Cᵥ ↔ σ v
| |
|
| |
| elseif consonant[g_prev2] and Cv[g_prev] and g_current == "σ" and g_after == "v" then
| |
| swap(i, i-1)
| |
|
| |
| elseif consonant[g_prev3] and Cv[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then
| |
| swap(i, i-1)
| |
| swap(i-1, i-2)
| |
|
| |
| -- Bv ↔ σ v
| |
| elseif Cv_fixed[g_prev] and g_current == "σ" and g_after == "v" then
| |
| swap(i, i-1)
| |
|
| |
| elseif Cv_fixed[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "v" then
| |
| swap(i, i-1)
| |
| swap(i-1, i-2)
| |
|
| |
| -- C Cₙ ↔ σ N
| |
| elseif consonant[g_prev2] and CN[g_prev] and g_current == "σ" and nasal[g_after] then
| |
| swap(i, i-1)
| |
|
| |
| elseif consonant[g_prev3] and CN[g_prev2] and affix[g_prev] and g_current == "σ" and nasal[g_after] then
| |
| swap(i, i-1)
| |
| swap(i-1, i-2)
| |
|
| |
| -- C Cᵣ ↔ σ r
| |
| elseif consonant[g_prev2] and Cr[g_prev] and g_current == "σ" and g_after == "r" then
| |
| swap(i, i-1)
| |
|
| |
| elseif consonant[g_prev3] and Cr[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "r" then
| |
| swap(i, i-1)
| |
| swap(i-1, i-2)
| |
|
| |
| -- C Cₗ ↔ σ l
| |
| elseif consonant[g_prev2] and Cl[g_prev] and g_current == "σ" and g_after == "l" then
| |
| swap(i, i-1)
| |
|
| |
| elseif consonant[g_prev3] and Cl[g_prev2] and affix[g_prev] and g_current == "σ" and g_after == "l" then
| |
| swap(i, i-1)
| |
| swap(i-1, i-2)
| |
| elseif g_current == "σ" then
| |
| if i == 1 then
| |
| table.remove(graphemes, 1)
| |
| else
| |
| for j = 1, i-1 do
| |
| if vowel[mw.ustring.lower(graphemes[i - j])] or graphemes[i - j] == "σ" then
| |
| break
| |
| elseif i - j == 1 then
| |
| table.remove(graphemes, i)
| |
| break
| |
| elseif not affix[graphemes[i-j]] and not consonant[mw.ustring.lower(graphemes[i-j])] then
| |
| table.remove(graphemes, i)
| |
| break
| |
| end
| |
| end
| |
| end
| |
| | |
| end
| |
|
| |
| if i > 1 then
| |
| i = i - 1
| |
| else break
| |
| end | | end |
| | i = i + 1 |
| end | | end |
|
| | return caption .. ': <span style="font-size:110%;">' .. syllables .. '</span>' |
| return graphemes | |
|
| |
| end | | end |
|
| |
|
| | | function export.show_syllables(frame) |
| function export.generate(frame) | | local args = m_parameters.process(frame:getParent().args, params) |
| local args = getArgs(frame) | | return export.make_syllables(m_languages.get_by_code(args[1]), args["caption"] or "Syllabification", args[2]) |
|
| |
| if args[1] == nil then
| |
| error("Word needed")
| |
| end | |
|
| |
| local outputSyllables = args[1]
| |
|
| |
| outputSyllables = graphemise(outputSyllables)
| |
| outputSyllables = syllabify(outputSyllables)
| |
|
| |
| local divider = args[2]
| |
|
| |
| if divider == nil then
| |
| divider = "|"
| |
| end
| |
| | |
| outputSyllables = table.concat(outputSyllables)
| |
| outputSyllables = mw.ustring.gsub(outputSyllables, "(σ)", divider)
| |
| | |
| return outputSyllables
| |
|
| |
| end | | end |
|
| |
|
| function export.generate_array(frame) | | function export.show_hyphens(frame) |
| local args = getArgs(frame) | | local args = m_parameters.process(frame:getParent().args, params) |
| | | return export.make_syllables(m_languages.get_by_code(args[1]), args["caption"] or "Hyphenation", args[2]) |
| if args[1] == nil then
| |
| error("Word needed")
| |
| end
| |
|
| |
| local outputSyllables = args[1]
| |
|
| |
| outputSyllables = graphemise(outputSyllables)
| |
| outputSyllables = syllabify(outputSyllables)
| |
|
| |
| return outputSyllables
| |
|
| |
| end | | end |
|
| |
|
| return export | | return export |
|
| |
| --[[
| |
| Debug console test string:
| |
| =p.generate(mw.getCurrentFrame():newChild{title="whatever",args={"rjaovs"}})
| |
| ]]
| |