Module:rad-IPA
Jump to navigation
Jump to search
See {{rad-IPA}}
.
local export = {}
local getArgs = require("Module:Arguments").getArgs
-- DATA --
local data = {
["a"] = {
["i"] = "ai",
["o"] = {
["i"] = "ɔi",
[false] = "ɔː"
},
["ô"] = "ɔa",
["u"] = "au",
[false] = "a"
},
["á"] = "aː",
["ả"] = "aːː",
["â"] = {
["i"] = "ɤi",
[false] = "ɤ"
},
["b"] = "b",
["c"] = "ts",
["d"] = {
["x"] = "dʒ",
["z"] = "dz",
[false] = "d"
},
["ð"] = "ð",
["e"] = {
["a"] = "æː",
["i"] = "ei",
[false] = "ɛ"
},
["é"] = "eː",
["f"] = "f",
["g"] = "ɡ",
["h"] = "h",
["ħ"] = "ɣ",
["i"] = {
["e"] = {
["a"] = "ia",
["j"] = "iej",
[false] = "ie"
},
[false] = "i"
},
["í"] = "iː",
["ỉ"] = {
["e"] = "iːe",
[false] = "iːː"
},
["j"] = "j",
["ĵ"] = "ĵ",
["k"] = "k",
["ķ"] = "tʃ",
["l"] = "l",
["m"] = "m",
["n"] = "n",
["ņ"] = "ŋ",
["o"] = {
["a"] = "ɔa",
[false] = "ɔ"
},
["ô"] = "ɔ",
["ó"] = "oː",
["ø"] = {
["a"] = "œa",
["i"] = "ei",
[false] = "œ"
},
["p"] = "p",
["q"] = "k",
["r"] = "r",
["s"] = "s",
["ș"] = "ʃ",
["t"] = "t",
["u"] = {
["i"] = "ɤi",
["o"] = {
["a"] = "ua",
["j"] = "uoj",
[false] = "uo"
},
[false] = "u"
},
["ù"] = "ù",
["û"] = "ɤ",
["ú"] = "uː",
["ủ"] = {
["o"] = "uːo",
[false] = "uːː"
},
["ū"] = "ū",
["v"] = "v",
["w"] = "w",
["x"] = "ʒ",
["y"] = {
["e"] = {
["a"] = "ia",
["j"] = "yej",
[false] = "ie"
},
[false] = "i"
},
["ỳ"] = "y",
["z"] = "z",
["þ"] = "θ",
["·"] = "·",
["-"] = "-",
[" "] = " ",
["."] = "|",
["'"] = "'",
["‿"] = "‿"
}
data["à"] = data["a"]
data["è"] = data["e"]
data["ì"] = data["i"]
data["ò"] = data["o"]
data["ý"] = data["í"]
data["ỷ"] = data["ỉ"]
data[","] = data["."]
data[":"] = data["."]
data[";"] = data["."]
data["!"] = data["."]
data["?"] = data["."]
data["‘"] = data["'"]
data["’"] = data["'"]
local irregular = {
["eurú"] = "ørú",
["eurov"] = "ørov",
["euruv"] = "øruv",
["heņre"] = "heņgre",
["nrao"] = "drao",
["nron"] = "dron",
["ryņl"] = "ryņgl",
["zeus"] = "zøs",
["zeud"] = "zød"
}
local initialism = {
["A"] = "a",
["Â"] = "â",
["B"] = "bâ",
["C"] = "câ",
["D"] = "dâ",
["Đ"] = "að",
["E"] = "e",
["F"] = "af",
["G"] = "gâ",
["H"] = "ha",
["I"] = "i",
["J"] = "ai",
["K"] = "kâ",
["Ķ"] = "kjâ",
["L"] = "al",
["M"] = "am",
["N"] = "an",
["Ņ"] = "aņ",
["O"] = "o",
["Ø"] = "ø",
["P"] = "pâ",
["Q"] = "ku",
["R"] = "ar",
["S"] = "as",
["Ș"] = "aș",
["T"] = "tâ",
["Þ"] = "aþ",
["U"] = "u",
["Û"] = "û",
["V"] = "au",
["W"] = "au",
["X"] = "ax",
["Y"] = "y",
["Z"] = "az"
}
local valid_phone = {
["a"] = true,
["aː"] = true,
["aːː"] = true,
["æː"] = true,
["ai"] = true,
["au"] = true,
["b"] = true,
["ç"] = true,
["d"] = true,
["ð"] = true,
["dz"] = true,
["dʒ"] = true,
["eː"] = true,
["ei"] = true,
["ɛ"] = true,
["ɤ"] = true,
["ɤi"] = true,
["f"] = true,
["ɡ"] = true,
["ɣ"] = true,
["h"] = true,
["i"] = true,
["iː"] = true,
["iːː"] = true,
["iːe"] = true,
["ia"] = true,
["ie"] = true,
["j"] = true,
["k"] = true,
["l"] = true,
["m"] = true,
["n"] = true,
["ŋ"] = true,
["oː"] = true,
["œ"] = true,
["œa"] = true,
["øi"] = true,
["ɔ"] = true,
["ɔː"] = true,
["ɔa"] = true,
["ɔi"] = true,
["p"] = true,
["r"] = true,
["s"] = true,
["ʃ"] = true,
["t"] = true,
["ts"] = true,
["tʃ"] = true,
["u"] = true,
["uː"] = true,
["uːː"] = true,
["uːo"] = true,
["ua"] = true,
["uo"] = true,
["v"] = true,
["w"] = true,
["y"] = true,
["z"] = true,
["ʒ"] = true,
["θ"] = true,
-- allophones --
["x"] = true,
["ʝ"] = true
}
local valid_phone_temp = {"iːj", "uːj", "yːj", "ù", "ū", "ĵ"}
-- CONSONANT GROUPS --
local consonant = {
["b"] = true,
["ç"] = true,
["d"] = true,
["ð"] = true,
["dz"] = true,
["dʒ"] = true,
["f"] = true,
["ɡ"] = true,
["ɣ"] = true,
["h"] = true,
["j"] = true,
["k"] = true,
["l"] = true,
["m"] = true,
["n"] = true,
["ŋ"] = true,
["p"] = true,
["r"] = true,
["s"] = true,
["ʃ"] = true,
["t"] = true,
["ts"] = true,
["tʃ"] = true,
["v"] = true,
["w"] = true,
["z"] = true,
["ʒ"] = true,
["θ"] = true,
-- allophones --
["x"] = true,
["ʝ"] = true,
["ʋ"] = true,
["ɲ"] = true,
["r̥"] = true
}
local consonant_temp = {"ĵ"}
local velar = {
["k"] = true,
["ɡ"] = true,
["ɣ"] = true,
["ŋ"] = true,
-- allophones --
["x"] = true
}
local obstruent = {
["b"] = true,
["ç"] = true,
["d"] = true,
["ð"] = true,
["dz"] = true,
["dʒ"] = true,
["f"] = true,
["ɡ"] = true,
["ɣ"] = true,
["h"] = true,
["k"] = true,
["p"] = true,
["s"] = true,
["ʃ"] = true,
["t"] = true,
["ts"] = true,
["tʃ"] = true,
["v"] = true,
["z"] = true,
["ʒ"] = true,
["θ"] = true,
-- allophones --
["x"] = true,
["ʋ"] = true
}
local obstruent_voiced = {
["b"] = true,
["d"] = true,
["ð"] = true,
["dz"] = true,
["dʒ"] = true,
["ɡ"] = true,
["ɣ"] = true,
["v"] = true,
["z"] = true,
["ʒ"] = true
}
local obstruent_devoice = {
["b"] = "p",
["ç"] = "ç",
["d"] = "t",
["ð"] = "θ",
["dz"] = "ts",
["dʒ"] = "tʃ",
["f"] = "f",
["ɡ"] = "k",
["ɣ"] = "h",
["h"] = "h",
["k"] = "k",
["p"] = "p",
["s"] = "s",
["ʃ"] = "ʃ",
["t"] = "t",
["ts"] = "ts",
["tʃ"] = "tʃ",
["v"] = "f",
["z"] = "s",
["ʒ"] = "ʃ",
["θ"] = "θ",
-- allophones --
["x"] = "x"
}
local obstruent_voice = {
["b"] = "b",
["ç"] = "j",
["d"] = "d",
["ð"] = "ð",
["dz"] = "dz",
["dʒ"] = "dʒ",
["f"] = "v",
["ɡ"] = "ɡ",
["ɣ"] = "ɣ",
["h"] = "h",
["k"] = "ɡ",
["p"] = "b",
["s"] = "z",
["ʃ"] = "ʒ",
["t"] = "d",
["ts"] = "dz",
["tʃ"] = "dʒ",
["v"] = "v",
["z"] = "z",
["ʒ"] = "ʒ",
["θ"] = "ð",
-- allophones --
["x"] = "ɣ"
}
local affricate = {
["dz"] = true,
["dʒ"] = true,
["ts"] = true,
["tʃ"] = true
}
local sibilant = {
["dz"] = true,
["dʒ"] = true,
["s"] = true,
["ʃ"] = true,
["ts"] = true,
["tʃ"] = true,
["z"] = true,
["ʒ"] = true
}
local sibilant_alv = {
["dz"] = true,
["s"] = true,
["ts"] = true,
["z"] = true
}
local sibilant_post = {
["dʒ"] = true,
["ʃ"] = true,
["tʃ"] = true,
["ʒ"] = true
}
local sibilant_alv_to_post = {
["dz"] = "dʒ",
["s"] = "ʃ",
["ts"] = "tʃ",
["z"] = "ʒ"
}
local nasal = {
["m"] = true,
["n"] = true,
["ŋ"] = true,
["ɲ"] = true
}
local glide = {
["j"] = true,
["w"] = true
}
local glide_temp = {"ĵ"}
local iotate = {
["d"] = "dʒ",
["dz"] = "dʒ",
["ɡ"] = "j",
["ɣ"] = "j",
["h"] = "ç",
["k"] = "tʃ",
["l"] = "j",
["s"] = "ʃ",
["t"] = "tʃ",
["ts"] = "tʃ",
["z"] = "ʒ"
}
local Cv_fixed = {
-- <Cv> combinations that uniformly pronounced without /v/ --
["b"] = true,
["f"] = true,
["p"] = true
}
local Cv_split = {
-- <Cv> combinations that are only pronounced with /v/ post-vocalically --
["m"] = true,
["n"] = true
}
local Cr = {
-- consonants that can precede /r/ in a medial onset --
["p"] = true,
["k"] = true,
["b"] = true,
["ɡ"] = true,
["ç"] = true,
["ð"] = true,
["f"] = true,
["ɣ"] = true,
["h"] = true,
["v"] = true,
["θ"] = true,
["t"] = true,
["d"] = true,
-- allophones --
["x"] = true,
["ʋ"] = true
}
local Cl = {
-- consonants that can precede /l/ in a medial onset --
["p"] = true,
["k"] = true,
["b"] = true,
["ɡ"] = true,
["ç"] = true,
["ð"] = true,
["f"] = true,
["ɣ"] = true,
["h"] = true,
["v"] = true,
["θ"] = true,
["s"] = true,
["ʃ"] = true,
["z"] = true,
["ʒ"] = true,
-- allophones --
["x"] = true,
["ʋ"] = true
}
local Cv = {
-- consonants that can precede /v/ in a medial onset --
["ç"] = true,
["d"] = true,
["ð"] = true,
["dz"] = true,
["dʒ"] = true,
["ɡ"] = true,
["ɣ"] = true,
["h"] = true,
["k"] = true,
["l"] = true,
["r"] = true,
["s"] = true,
["ʃ"] = true,
["t"] = true,
["ts"] = true,
["tʃ"] = true,
["z"] = true,
["ʒ"] = true,
["θ"] = true,
-- allophones --
["x"] = true,
["ʝ"] = true
}
local continuant = {
-- consonants that can precede nasals in a medial onset -- (non-glide continuants)
["ç"] = true,
["ð"] = true,
["f"] = true,
["ɣ"] = true,
["h"] = true,
["l"] = true,
["r"] = true,
["s"] = true,
["ʃ"] = true,
["v"] = true,
["z"] = true,
["ʒ"] = true,
["θ"] = true,
-- allophones --
["x"] = true,
["ʋ"] = true
}
local CN = continuant
local fricative = {
-- excludes [h]
["ç"] = true,
["ð"] = true,
["f"] = true,
["ɣ"] = true,
["l"] = true,
["r"] = true,
["s"] = true,
["ʃ"] = true,
["v"] = true,
["z"] = true,
["ʒ"] = true,
["θ"] = true,
-- allophones --
["x"] = true,
["ʋ"] = true
}
-- VOWEL GROUPS --
local vowel = {
["a"] = true,
["aː"] = true,
["aːː"] = true,
["æː"] = true,
["ai"] = true,
["au"] = true,
["eː"] = true,
["ei"] = true,
["ɛ"] = true,
["ɤ"] = true,
["ɤi"] = true,
["i"] = true,
["iː"] = true,
["iːː"] = true,
["iːe"] = true,
["ia"] = true,
["ie"] = true,
["oː"] = true,
["œ"] = true,
["œa"] = true,
["øi"] = true,
["ɔ"] = true,
["ɔː"] = true,
["ɔa"] = true,
["ɔi"] = true,
["u"] = true,
["uː"] = true,
["uːː"] = true,
["uːo"] = true,
["ua"] = true,
["uo"] = true,
["y"] = true
}
local vowel_temp = {"iːj", "uːj", "yːj", "ù", "ū"}
local marked_hiatus = {
["a"] = true,
["ai"] = true,
["au"] = true,
["ei"] = true,
["ɛ"] = true,
["ɤ"] = true,
["ɤi"] = true,
["i"] = true,
["iːe"] = true,
["ia"] = true,
["ie"] = true,
["oː"] = true,
["œ"] = true,
["œa"] = true,
["øi"] = true,
["ɔ"] = true,
["ɔa"] = true,
["ɔi"] = true,
["u"] = true,
["uːo"] = true,
["ua"] = true,
["uo"] = true,
["y"] = true
}
local long = {
["aː"] = true,
["aːː"] = true,
["æː"] = true,
["ai"] = true,
["au"] = true,
["eː"] = true,
["ei"] = true,
["ɤi"] = true,
["iː"] = true,
["iːː"] = true,
["iːe"] = true,
["ia"] = true,
["ie"] = true,
["oː"] = true,
["œa"] = true,
["øi"] = true,
["ɔː"] = true,
["ɔa"] = true,
["ɔi"] = true,
["uː"] = true,
["uːː"] = true,
["uːo"] = true,
["ua"] = true,
["uo"] = true
}
local weak = {
["ɤ"] = true,
["i"] = true
}
-- allophonic only --
local nasalise = {
["a"] = "ãˑ",
["aː"] = "ãːˑ",
["aːː"] = "ãːːˑ",
["æː"] = "æ̃ːˑ",
["ai"] = "ãˑĩ",
["au"] = "ãˑũ",
["eː"] = "ẽːˑ",
["ei"] = "ẽˑĩ",
["ɛ"] = "ɛ̃ˑ",
["ɤ"] = "ɤ̃ˑ",
["ɤi"] = "ɤ̃ˑĩ",
["i"] = "ĩˑ",
["iː"] = "ĩːˑ",
["iːː"] = "ĩːːˑ",
["iːe"] = "ĩːˑẽ",
["ia"] = "ĩˑã",
["ie"] = "ĩˑẽ",
["oː"] = "õːˑ",
["œ"] = "œ̃ˑ",
["œa"] = "œ̃ˑã",
["øi"] = "ø̃ˑĩ",
["ɔ"] = "ɔ̃ˑ",
["ɔː"] = "ɔ̃ːˑ",
["ɔa"] = "ɔ̃ˑã",
["ɔi"] = "ɔ̃ˑĩ",
["u"] = "ũˑ",
["uː"] = "ũːˑ",
["uːː"] = "ũːːˑ",
["uːo"] = "ũːˑõ",
["ua"] = "ũˑã",
["uo"] = "ũˑõ",
["y"] = "ỹˑ",
["ɑ"] = "ɑ̃ˑ",
["ä"] = "ä̃ˑ",
["äi"] = "ä̃ˑĩ",
["äu"] = "ä̃ˑũ",
["iɐ"] = "ĩˑɐ̃",
["uɐ"] = "ũˑɐ̃"
}
-- MISC --
local boundary = {
[" "] = true,
["|"] = true,
["·"] = true,
["‿"] = true
}
function process_temp()
for _, temp in ipairs(valid_phone_temp) do
valid_phone[temp] = true
end
for _, temp in ipairs(vowel_temp) do
vowel[temp] = true
end
for _, temp in ipairs(consonant_temp) do
consonant[temp] = true
end
for _, temp in ipairs(glide_temp) do
glide[temp] = true
end
end
-- PROCESS FUNCTIONS --
local IPA = {}
function generate_IPA(word)
local s = mw.ustring.lower(word)
-- Replace irregulars --
for toReplace, ReplaceKey in pairs(irregular) do
if mw.ustring.find(s, toReplace) then
s = mw.ustring.gsub(s, "(" .. toReplace .. ")", ReplaceKey)
-- mw.log("Irregular spelling <" .. toReplace .. "> recognised. Treating as <" .. ReplaceKey .. ">.")
end
end
if mw.ustring.find(s, "(rredeș)") then -- deals with Rredeșa and Rrezlieþs
s = mw.ustring.gsub(s, "(a rredeș)", "ar‿redeș")
s = mw.ustring.gsub(s, "(u rredeș)", "ùr‿redeș")
s = mw.ustring.gsub(s, "(rredeș)", "redeș")
elseif mw.ustring.find(s, "(rrezl)") then
s = mw.ustring.gsub(s, "(u rrezl)", "ùr‿rezl")
s = mw.ustring.gsub(s, "(rrezl)", "rezl")
end
local s_len = mw.ustring.len(s)
IPA = {}
local split_s = {}
for i = 1, s_len do
split_s[i] = mw.ustring.sub(s, i, i)
end
-- generate_IPA: mw.log("————— BEGINNING BASE GENERATION —————")
if s_len == 0 then
error("Empty input")
end
while s_len > 0 do
local getData = {}
local multiMatch = false
local i_iteration = -2
-- generate_IPA: mw.log("=========================\nCURRENT TEST STRING: <".. mw.ustring.upper(s) .. ">")
if s_len < 3 then
i_iteration = 1 - s_len
end
for i = i_iteration, 0 do
-- generate_IPA: mw.log("————— <" .. split_s[s_len + i] .. "> selected. (i = " .. i .. ") —————")
getData = data[split_s[s_len + i]]
local deadEnd = false
if data[split_s[s_len + i]] == nil then
error("'" .. split_s[s_len + i] .. "' is an invalid character")
end
while type(getData) == "table" do
if i == 0 then
if getData[false] then
-- generate_IPA: mw.log("Index acquired: " .. getData)
-- generate_IPA: mw.log("Singular index recognised.")
getData = getData[false]
else
error(split_s[s_len] .. " is an invalid character")
end
else
-- generate_IPA: mw.log("Tabular index recognised.")
for j = 1, 0 - i do
local currentCombo = ""
-- for logs only
if i == -2 and j == 2 then
currentCombo =
split_s[s_len + i + j - 2] ..
" + " .. split_s[s_len + i + j - 1] .. " + " .. split_s[s_len + i + j]
else
currentCombo = split_s[s_len + i + j - 1] .. " + " .. split_s[s_len + i + j]
end
-- generate_IPA: mw.log("Testing " .. currentCombo)
--
if getData[split_s[s_len + i + j]] then
getData = getData[split_s[s_len + i + j]]
-- generate_IPA: mw.log("Combination recognised: " .. currentCombo .. " (j = " .. j .. ")")
if type(getData) == "string" then
if j + i == 0 then
-- generate_IPA: mw.log("Index acquired: " .. getData)
multiMatch = true
break
else
-- generate_IPA: mw.log('Non-final index: dead end.')
getData = {}
deadEnd = true
break
end
elseif j + i == 0 and getData[false] then
getData = getData[false]
-- generate_IPA: mw.log("Index acquired: " .. getData)
multiMatch = true
break
elseif j + i == 0 and not getData[false] then
error("data[" .. table.concat(getData, "][") .. "][false] is missing.")
else
-- generate_IPA: mw.log("Target still tabular: reiterating.")
end
else
-- generate_IPA: mw.log('Dead end.')
getData = {}
deadEnd = true
break
end
end
if type(getData) == "table" then
break
end
end
end
if type(getData) == "string" and (i == 0 or multiMatch == true) then
-- generate_IPA: mw.log("Target acquired of length " .. 1 - i .. ", converting to [" .. getData .. "].")
table.insert(IPA, 1, getData)
s = mw.ustring.sub(s, 1, s_len + i - 1)
s_len = mw.ustring.len(s)
break
elseif deadEnd == false then
-- generate_IPA: mw.log('Non-final index: dead end.')
end
end
end
-- generate_IPA: mw.log('————— STRING EXHAUSTED —————')
-- generate_IPA: mw.log("Base generation result: [" .. table.concat(IPA,"][") .. "]")
return IPA
end
function resolve_vowels(phones)
local working_phones = phones
-- mw.log("————— BEGINNING VOWEL RESOLUTION —————")
local i = 0
while true do
i = i + 1
local p_prev = working_phones[i - 1]
local p_current = working_phones[i]
local p_next = working_phones[i + 1]
local p_next2 = working_phones[i + 2]
local p_next3 = working_phones[i + 3]
if p_current == nil then
break
end
local function update_p()
p_prev = working_phones[i - 1]
p_current = working_phones[i]
p_next = working_phones[i + 1]
p_next2 = working_phones[i + 2]
p_next3 = working_phones[i + 3]
end
local function p_Resolve(p_new)
working_phones[i] = p_new
-- mw.log("[" .. p_current .. "] resolved to [" .. p_new .. "] in position ".. i .. ".")
p_new = ""
end
local function p_Convert(p_new)
working_phones[i] = p_new
-- mw.log("[" .. p_current .. "] converted to [" .. p_new .. "] in position ".. i .. ".")
p_current = p_new
p_new = ""
end
local function p_RemoveNext()
table.remove(working_phones, i + 1)
update_p()
end
-- Resolution of [aù] --
if p_prev == "a" and p_current == "ù" then
-- mw.log("<aù> recognised in position " .. i .. ". Converting to resolvable [u].")
p_Convert("u")
p_current = "u"
end
-- Resolution of [u], [ù] and [ū] --
if p_current == "u" then
-- mw.log("[u] found in position " .. i .. ". Testing for resolution.")
if consonant[p_next] then
if glide[p_next2] then
if not vowel[p_next3] then
-- mw.log("ɤCj!V environment identified.")
p_Convert("ɤ")
else
-- mw.log("No resolution needed.")
end
elseif not vowel[p_next2] then
-- mw.log("ɤC!V environment identified.")
p_Convert("ɤ")
else
-- mw.log("No resolution needed.")
end
else
-- mw.log("No resolution needed.")
end
elseif p_current == "ù" or p_current == "ū" then
-- mw.log("Fixed [u] found in position " .. i .. ".")
p_Convert("u")
end
-- Resolution of <Vj> to diphthongs
if p_next == "j" and not vowel[p_next2] then
if p_current == "a" then
p_Resolve("ai")
p_RemoveNext()
elseif p_current == "ɤ" then
p_Resolve("ɤi")
p_RemoveNext()
elseif p_current == "ɛ" then
p_Resolve("ei")
p_RemoveNext()
elseif p_current == "ɔː" then
p_Resolve("ɔi")
p_RemoveNext()
end
end
-- Resolution of <ei> and <øi> --
if p_current == "ei" then
for j = 1, i do
local check_phone = working_phones[i - j]
if boundary[check_phone] or check_phone == nil then
-- mw.log("Initial [ei] found in position" .. i .. ".")
p_Resolve("ai")
break
elseif not consonant[check_phone] then
break
end
end
end
if vowel[p_next] then
-- Resolution of prevocalic <iej>, <uoj> and <yej> --
if p_current == "iej" or p_current == "yej" or p_current == "uoj" then
-- mw.log("Pre-vocalic <" .. p_current .. "> found in position " .. i .. ".")
if p_current == "uoj" then
p_Resolve("uo")
table.insert(working_phones, i + 1, "j")
-- mw.log("[j] inserted to position " .. i + 1 .. ".")
update_p()
else
p_Resolve("iː")
end
end
-- Resolution of prevocalic [ie] and [uo] --
if p_current == "ie" then
p_Resolve("iː")
elseif p_current == "uo" and p_next ~= "j" then
p_Resolve("uː")
end
else
-- Resolution of non-prevocalic <iej>, <uoj> and <yej> --
for toResolve, ResolveKey in pairs({["iej"] = "ei", ["uoj"] = "ɔi", ["yej"] = "øi"}) do
if p_current == toResolve then
p_Resolve(ResolveKey)
end
end
end
-- Resolution of [j] after [i]
if p_current == "j" and (p_prev == "i" or p_prev == "iː" or p_prev == "iːː") then
table.remove(working_phones, i)
i = i - 1
update_p()
end
end
-- Removes placeholder phones from data --
for _, temp in ipairs(vowel_temp) do
valid_phone[temp] = nil
vowel[temp] = nil
end
-- mw.log("Vowel resolution result: [" .. table.concat(working_phones,"][") .. "]")
return working_phones
end
function resolve_consonants(phones, hj_dv, phon)
local working_phones = phones
-- mw.log("————— BEGINNING CONSONANT RESOLUTION —————")
local i = 0
while true do
i = i + 1
local p_prev = working_phones[i - 1]
local p_current = working_phones[i]
local p_next = working_phones[i + 1]
local p_next2 = working_phones[i + 2]
local p_next3 = working_phones[i + 3]
if p_current == nil then
break
end
local function p_Resolve(p_new)
working_phones[i] = p_new
-- mw.log("[" .. p_current .. "] resolved to [" .. p_new .. "] in position ".. i .. ".")
p_new = ""
end
local function p_Convert(p_new)
working_phones[i] = p_new
-- mw.log("[" .. p_current .. "] converted to [" .. p_new .. "] in position ".. i .. ".")
p_current = p_new
p_new = ""
end
local function p_RemoveNext()
table.remove(working_phones, i + 1)
p_next = working_phones[i + 1]
p_next2 = working_phones[i + 2]
p_next3 = working_phones[i + 3]
end
-- Resolution of iotation --
if iotate[p_current] and p_next == "j" then
-- mw.log("[j] removed from position " .. i + 1 .. ".")
-- mw.log("[" .. p_current .. "][j] found in position " .. i .. ".")
p_Convert(iotate[p_current])
p_RemoveNext()
elseif p_current == "ĵ" then
p_Convert("j")
end
-- Resolution of hjádvanþs --
if not hj_dv then
if p_current == "ç" then
-- mw.log("hj = false:")
p_Convert("ʃ")
elseif p_current == "j" and (boundary[p_prev] or p_prev == nil) and consonant[p_next] then
-- mw.log("hj = false:")
p_Convert("ʒ")
end
end
-- Resolution of (T)Ss, (T)Sș, ts, ds, tș, dș (progressive voicing assimilation) --
if p_next == "s" then
if sibilant[p_current] then
-- mw.log("[s] removed from position " .. i + 1 .. " following a sibilant.")
p_RemoveNext()
elseif p_current == "t" then
-- mw.log("[t][s] → [ts] in position " .. i .. ".")
p_Convert("ts")
p_RemoveNext()
elseif p_current == "d" then
p_Convert("dz")
p_RemoveNext()
-- mw.log("[d][s] → [dz] in position " .. i .. ".")
end
elseif p_next == "ʃ" then
if sibilant_post[p_current] == true then
-- mw.log("[ʃ] removed from position " .. i + 1 .. " following a postalveolar sibilant.")
p_RemoveNext()
elseif sibilant_alv[p_current] then
-- mw.log("[" .. p_current .. "][ʃ] → [" .. sibilant_alv_to_post[p_current] .. "] in position " .. i .. ".")
p_Convert(sibilant_alv_to_post[p_current])
p_RemoveNext()
elseif p_current == "t" then
-- mw.log("[t][ʃ] → [tʃ] in position " .. i .. ".")
p_Convert("tʃ")
p_RemoveNext()
elseif p_current == "d" then
p_Convert("dʒ")
p_RemoveNext()
-- mw.log("[d][ʃ] → [dʒ] in position " .. i .. ".")
end
end
-- Resolution of Cv --
if p_next == "v" and ((Cv_split[p_current] and not vowel[p_prev]) or Cv_fixed[p_current]) then
-- mw.log("Cluster [" .. p_current .. "][v] resolved to [" .. p_current .. "] at position " .. i .. ".")
p_RemoveNext()
end
-- Resolution of regressive voicing assimilation --
if obstruent[p_current] then
-- mw.log("Obstruent [" .. p_current .. "] found in position " .. i .. ". Searching for cluster.")
local final_i = i
-- find voicing of final obstruent in cluster --
for k = i + 1, #working_phones do
local check_phone = working_phones[k]
if obstruent[check_phone] and check_phone ~= "v" then -- /v/ is excluded
final_i = k
else
break
end
end
if final_i ~= i then -- if cluster recognised --
final_obs = working_phones[final_i]
-- mw.log("Final obstruent in cluster is [" .. final_obs .. "], cluster length " .. final_i - i + 1 .. ".")
-- assimilate --
if obstruent_voiced[p_current] ~= obstruent_voiced[final_obs] then
-- mw.log("Voicing mismatch found in cluster at position " .. i .. ".")
if obstruent_voiced[final_obs] == true then
p_Convert(obstruent_voice[p_current])
else
p_Convert(obstruent_devoice[p_current])
end
else
-- mw.log("No voicing mismatch found.")
end
else
-- mw.log("No cluster found.")
end
end
-- tv and kv
if p_current == "v" and (p_prev == "t" or p_prev == "k") then
p_Convert("f")
-- mw.log("[" .. p_prev .. "][v] → [" .. p_prev .. "][f] in position " .. i .. ".")
end
-- j + [i]
local i_initial = {["i"] = true, ["iː"] = true, ["iːː"] = true, ["ie"] = true, ["iːe"] = true}
if p_current == "j" and i_initial[p_next] then
table.remove(working_phones, i)
-- mw.log("[j] removed from position ".. i .. ".")
p_current = working_phones[i]
end
-- [i] + j
local i_oid = {["i"] = true, ["iː"] = true, ["iːː"] = true}
if p_current == "j" and i_oid[p_prev] then
table.remove(working_phones, i)
-- mw.log("[j] removed from position ".. i .. ".")
p_current = working_phones[i]
end
-- Resolution of geminates --
if
p_next == p_current and (boundary[p_next2] or p_next2 == nil or consonant[p_next2]) and p_next2 ~= "j" and
p_next2 ~= "w" and
consonant[p_current]
then
p_RemoveNext()
-- mw.log("Geminate [" .. p_current .. "] removed in coda position at position " .. i .. ".")
end
-- Removal of apostrophes --
if p_current == "'" then
table.remove(working_phones, i)
-- mw.log("Apostrophe removed from position ".. i .. ".")
p_current = working_phones[i]
end
-- PHONETIC RESOLUTION --
if phon then
-- Cv --
if p_current == "v" and obstruent[p_prev] and p_prev ~= "d" and p_prev ~= "g" then
p_Resolve("ʋ")
-- mw.log("[" .. p_prev .. "][ʋ] → [" .. p_prev .. "][ʋ] in position " .. i .. ".")
end
if p_current == "h" and not vowel[p_next] then -- hC
-- mw.log("phon = true:")
p_Convert("x")
elseif p_current == "h" and p_next == "j" then
-- mw.log("phon = true:")
p_Convert("ç")
p_RemoveNext()
end
if p_current == "n" and p_next == "j" then
-- mw.log("phon = true:")
p_Convert("ɲ")
p_RemoveNext()
end
if p_current == "n" and p_next == "n" and p_next2 == "j" then
-- mw.log("phon = true:")
p_Convert("ɲ")
end
--
--[[
if p_current == "r" and p_next == "s" then
p_Resolve("r̥")
end
]] if
p_current == "œ" and (p_next == "a" or p_next == "aː" or p_next == "aːː")
then
table.insert(working_phones, i + 1, "w")
end
end
-- Resolution of NK --
if p_current == "n" and velar[p_next] then
-- mw.log("n + velar cluster found.")
p_Resolve("ŋ")
end
-- RESOLUTION OF MULTIPLE BOUNDARIES --
if p_current == " " and p_next == "|" then
p_Convert("|")
p_RemoveNext()
elseif p_current == " " and p_next == " " then
p_RemoveNext()
elseif p_current == "|" and (p_next == " " or p_next == "|") then
p_RemoveNext()
end
end
-- Removes placeholder phones from data --
for _, temp in ipairs(consonant_temp) do
valid_phone[temp] = nil
consonant[temp] = nil
glide[temp] = nil
end
-- mw.log("Consonant resolution result: [" .. table.concat(working_phones,"][") .. "]")
return working_phones
end
function get_syllables(phones)
local working_phones = phones
local syllables = {}
local working_syllable = {
["onset"] = {},
["nucleus"] = "",
["coda"] = {}
}
local syllable_no = 1
local function logSyllable()
local onset = ""
local coda = ""
if working_syllable["onset"] then
onset = table.concat(working_syllable["onset"], " ")
end
if working_syllable["coda"] then
coda = table.concat(working_syllable["coda"], " ")
end
-- mw.log( onset .. " [ " .. working_syllable["nucleus"] .. " ] " .. coda )
end
-- mw.log("————— BUILDING SYLLABLES —————")
-- division into syllables --
while #working_phones > 0 do
local p_current = working_phones[1]
local p_next = working_phones[2]
local p_next2 = working_phones[3]
local p_next3 = working_phones[4]
local function register_syllable(check)
if check == true and working_syllable["nucleus"] ~= "" then
syllables[syllable_no] = working_syllable
if #syllables[syllable_no]["onset"] == 0 then
syllables[syllable_no]["onset"] = nil
end
if #syllables[syllable_no]["coda"] == 0 then
syllables[syllable_no]["coda"] = nil
end
-- mw.log("Syllable " .. syllable_no .. " registered:")
logSyllable()
-- mw.log("====================")
syllable_no = syllable_no + 1
working_syllable = {
["onset"] = {},
["nucleus"] = "",
["coda"] = {}
}
end
end
local function register_boundary(check)
if check == true then
register_syllable(true)
syllables[syllable_no] = p_current
table.remove(working_phones, 1)
-- mw.log("Boundary syllable " .. syllable_no .. " registered: '" .. p_current .. "'\n====================")
syllable_no = syllable_no + 1
end
end
local function register_phone(check)
if check == true then
if vowel[p_current] then
-- mw.log("[" .. p_current .. "] is now the working nucleus.")
working_syllable["nucleus"] = p_current
elseif consonant[p_current] then
-- mw.log("[" .. p_current .. "] appended to the working " .. unit .. ".")
local unit = ""
if working_syllable["nucleus"] == "" then
unit = "onset"
else
unit = "coda"
end
table.insert(working_syllable[unit], p_current)
else
error("Invalid input for function register_phone")
end
table.remove(working_phones, 1)
-- mw.log("Current working syllable:")
-- logSyllable()
if #working_phones == 0 then
register_syllable(true)
end
end
end
if consonant[p_current] then
if working_syllable["nucleus"] == "" then
register_phone(true)
else
if vowel[p_next] or (glide[p_next] and vowel[p_next2]) then -- *C*(J)V
register_syllable(true)
register_phone(true)
elseif #working_syllable["coda"] > 0 then -- C | …
local function JV(check)
if check == true then
if vowel[p_next2] or (glide[p_next2] and vowel[p_next3]) then -- C | *C*l(J)V
register_syllable(true)
register_phone(true)
else
register_phone(true)
end
end
end
if Cl[p_current] and p_next == "l" then -- C | *C*l …
JV(true) -- C | *C*l(J)V
elseif Cr[p_current] and p_next == "r" then -- C | *C*r …
JV(true) -- C | *C*r(J)V
elseif Cv[p_current] and (p_next == "v" or p_next == "ʋ" or p_next == "f") then -- C | *C*v …
JV(true) -- C | *C*v(J)V
elseif CN[p_current] and nasal[p_next] then -- C | *C*N …
JV(true) -- C | *C*N(J)V
else
register_phone(true)
end
else
register_phone(true)
end
end
elseif vowel[p_current] then
if working_syllable["nucleus"] == "" then
register_phone(true)
else
register_syllable(true)
register_phone(true)
end
elseif boundary[p_current] or p_current == "-" then
register_boundary(true)
else
error("Unrecognised phone in syllabifier: " .. p_current)
end
end
-- mw.log("STRING EXHAUSTED\n====================")
return syllables
end
function tag_syllables(syllables)
-- mw.log("————— TAGGING SYLLABLES —————")
local syl_count = #syllables
for i = 1, syl_count do
local syllable = syllables[i]
local function tag_syllable(tag, value)
syllables[i][tag] = value
-- mw.log("syllables[" .. i .. "][" .. tag .. "] = " .. tostring(value) )
end
-- tag checked syllables --
if type(syllable) == "table" then
if syllable["coda"] == nil then
tag_syllable("checked", false)
else
tag_syllable("checked", true)
end
-- tag long syllables --
if long[syllable["nucleus"]] then
tag_syllable("long", true)
else
tag_syllable("long", false)
end
-- tag weak syllables (nucleus = [i] [ɤ]) --
if weak[syllable["nucleus"]] then
tag_syllable("weak", true)
else
tag_syllable("weak", false)
end
-- tag heavy syllables (nucleus is long coda is an obstruent-initial cluster) -
if syllable["coda"] then
if long[syllable["nucleus"]] and affricate[syllable["coda"][1]] then
tag_syllable("heavy", true)
elseif long[syllable["nucleus"]] and obstruent[syllable["coda"][1]] and syllable["coda"][2] then
tag_syllable("heavy", true)
else
tag_syllable("heavy", false)
end
end
-- mw.log("—————")
end
end
return syllables
end
function get_stress(syllables)
--[[
Stress in Radestrian is calculated excluding · prefixes, which are stressed as if they were standalone words but with secondary stress. Stress in Radestrian falls on either the first syllable or the second syllable. The first syllable is the default stress position. Second-syllable stress occurs in seven circumstances:
1:
σₙ = 2
σ₁: short, open
σ₂: heavy
2:
σₙ = 3
σ₁: short
σ₂: long
3:
σₙ = 3
σ₁: short, open
σ₂: short, not weak, closed
4:
σₙ = 4+
σ₁: short
σ₂: long
5:
σₙ = 4+
σ₁: short
σ₂: short, closed
σ₃: any
6:
σₙ = 4+
σ₁: short
σ₂: short, open
σ₃: short
]]
local working_word = {}
local output = {}
local test_n = #syllables
-- mw.log("————— BEGINNING STRESS ASSIGNMENT —————")
while #syllables > 0 do
-- basically an iteration of working words
if type(syllables[1]) == "table" then -- testing for boundary syllable
-- set up current working word by adding and removing the initial syllables from the array until a boundary is registered
for i = 1, test_n do
table.insert(working_word, syllables[1])
table.remove(syllables, 1)
-- mw.log("Syllable " .. i .. " registered to working word.")
if syllables[1] == nil or type(syllables[1]) == "string" then -- check next syllable is a boundary (now 1 after deletion of registered syllable)
-- mw.log("Boundary found: continuing to stress assignment.")
break
end -- if next syllable is a boundary, register and continue to stress assignment
end
-- INTERNAL FUNCTIONS
local function stress(syl) -- assigns primary or secondary stress to the current working word
if syl == 1 or syl == 2 then
if
(syllables[1] == "-" or output[#output] == "·") and syllables[1] ~= "·" and
output[#output] ~= "-"
then -- check if following · or preceding - ; if so, assign stress regardless of monosyllabicness
-- mw.log("Primary stress assigned to syllable " .. syl .. ".")
working_word[syl]["stress"] = true
elseif #working_word > 1 then -- if monosyllabic, do not add a redundant stress mark
if output[#output] == "-" then
-- HYPHEN NO LONGER CAUSES SECONDARY STRESS
elseif syllables[1] == "·" then -- check if preceding ·
-- mw.log("Secondary stress assigned to syllable " .. syl .. ".")
working_word[syl]["stress2"] = true
else
-- mw.log("Primary stress assigned to syllable " .. syl .. ".")
working_word[syl]["stress"] = true
end
else
-- mw.log("Implicit primary stress assigned to monosyllable.")
end
-- register the working word
if #working_word > 0 then
-- mw.log("Word registered to output.")
for j = 1, #working_word do
table.insert(output, working_word[j])
end
working_word = {}
else
error("An empty word cannot be registered")
end
else
error("Empty or invalid input to stress()")
end
end
-- calculate stress on the working word
-- mw.log("Syllables in working word: " .. #working_word)
if #working_word == 0 or working_word == nil then
error("The working word is empty")
elseif
#working_word == 1 or working_word[1]["long"] -- monosyllabic?
then -- long initial
stress(1)
elseif #working_word == 2 then -- disyllabic?
if working_word[1]["checked"] == false and working_word[2]["heavy"] then -- condition 1
stress(2)
else
stress(1)
end
elseif #working_word == 3 then -- trisyllabic?
if
working_word[2]["long"] or -- condition 2
((not working_word[2]["weak"]) and working_word[2]["checked"]) -- condition 3
then
stress(2)
else
stress(1)
end
elseif #working_word >= 4 then -- 4+ syllables
if
working_word[2]["long"] or -- condition 4
working_word[2]["checked"] or -- condition 5
not working_word[3]["long"] -- condition 6
then
stress(2)
else
stress(1)
end
else
error("Invalid syllable count")
end
elseif syllables[1] == nil then
error("Empty syllable found in array. The array should contain no empty syllables")
else -- if boundary is identified as the working word, then
-- mw.log ("Boundary syllable identified.")
table.insert(output, syllables[1])
-- mw.log("Boundary registered to output: [" .. syllables[1] .. "]")
table.remove(syllables, 1)
end
-- mw.log("—————")
end
-- mw.log("SYLLABLES EXHAUSTED\n==========\nRemoving [·] and [-] from the system.")
local screened = false
while screened == false do
for i = 1, #output do
if output[i] == "-" or output[i] == "·" then
table.remove(output, i)
break
elseif i == #output then
screened = true
end
end
end
return output
end
function resolve_syllables(syllables)
-- mw.log("————— RESOLVING SYLLABLES —————")
if #syllables > 1 then
for i = 1, #syllables - 1 do
local working_cluster = {}
if
(syllables[i]["coda"] or syllables[i + 1]["onset"]) and syllables[i + 1] and
type(syllables[i]) == "table" and
type(syllables[i + 1]) == "table"
then
-- mw.log("Resolving clusters in syllables " .. i .. " and " .. i + 1 .. ".")
-- get intersyllabic clusters --
if syllables[i]["coda"] then
for j = 1, #syllables[i]["coda"] do
table.insert(working_cluster, syllables[i]["coda"][j])
end
end
if syllables[i + 1]["onset"] then
for j = 1, #syllables[i + 1]["onset"] do
table.insert(working_cluster, syllables[i + 1]["onset"][j])
end
end
-- mw.log("Cluster to be resolved: " .. table.concat(working_cluster))
local p_last = working_cluster[#working_cluster]
local p_last2 = working_cluster[#working_cluster - 1]
local p_last3 = working_cluster[#working_cluster - 2]
local p_last4 = working_cluster[#working_cluster - 3]
local new_coda = {}
local new_onset = {}
-- splits the current cluster into new coda and new onset and re-assigns them to the syllables --
local function split(point)
if point < 1 or point > 3 or point == nil then
error("split() must take integer values between 1 to 4 inclusive")
end
for j = #working_cluster + 1 - point, #working_cluster do
table.insert(new_onset, working_cluster[j])
end
if #working_cluster - point > 0 then
for j = 1, #working_cluster - point do
table.insert(new_coda, working_cluster[j])
end
end
-- mw.log("σ" .. i .. " | σ" .. i+1 .. " : … " .. table.concat(new_coda) .. " | " .. table.concat(new_onset) .. " …")
syllables[i]["coda"] = new_coda
syllables[i + 1]["onset"] = new_onset
end
-- determine syllable boundary --
if glide[p_last] and nasal[p_last2] and CN[p_last3] and p_last4 then -- C|CNJ
split(3)
elseif nasal[p_last] and CN[p_last2] and p_last3 then -- C|CN
split(2)
elseif glide[p_last] and p_last2 == "r" and Cr[p_last3] and p_last4 then -- C|CrJ
split(3)
elseif p_last == "r" and Cr[p_last2] and p_last3 then -- C|Cr
split(2)
elseif glide[p_last] and p_last2 == "l" and Cl[p_last3] and p_last4 then -- C|ClJ
split(3)
elseif p_last == "l" and Cl[p_last2] and p_last3 then -- C|Cl
split(2)
elseif
glide[p_last] and (p_last2 == "v" or p_last2 == "ʋ" or p_last2 == "f") and Cv[p_last3] and p_last4
then -- C|CvJ
split(3)
elseif (p_last == "v" or p_last == "ʋ" or p_last == "f") and Cv[p_last2] and p_last3 then -- C|Cv
split(2)
elseif glide[p_last] and p_last2 then -- C|CJ
split(2)
else -- C|C
split(1)
end
end
end
end
return syllables
end
function syllables_to_string(syllables, phon)
local output = ""
-- mw.log("————— COMPILING OUTPUT —————")
for i = 1, #syllables do
local onset = ""
local coda = ""
local stress = ""
-- add hiatus marker --
if syllables[i + 1] then
if
syllables[i]["coda"] == nil and marked_hiatus[syllables[i]["nucleus"]] and
syllables[i + 1]["onset"] == nil and
type(syllables[i + 1]) == "table"
then
if not (syllables[i + 1]["stress"] or syllables[i + 1]["stress2"]) then
syllables[i]["coda"] = {"."}
-- mw.log("Hiatus marked between syllables " .. i .. " and " .. i+1 .. ".")
end
end
end
-- phonetic /a/ backing --
if phon then
if syllables[i]["nucleus"] == "a" then
if syllables[i + 1] then
if syllables[i + 1]["nucleus"] == "æː" then
syllables[i]["nucleus"] = "ɑ"
end
end
if syllables[i - 1] then
if syllables[i - 1]["nucleus"] == "æː" then
syllables[i]["nucleus"] = "ɑ"
end
end
--[[
if syllables[i]["nucleus"] == "a" then
syllables[i]["nucleus"] = "ä"
end
]]
--
end
--[[
if syllables[i]["nucleus"] == "ai" then
syllables[i]["nucleus"] = "äi"
end
if syllables[i]["nucleus"] == "au" then
syllables[i]["nucleus"] = "äu"
end
if syllables[i]["nucleus"] == "ia" then
syllables[i]["nucleus"] = "iɐ"
end
if syllables[i]["nucleus"] == "ua" then
syllables[i]["nucleus"] = "uɐ"
end
]]
--
end
-- gemination
if phon and syllables[i] and syllables[i + 1] then
if syllables[i]["coda"] and syllables[i + 1]["onset"] then
if syllables[i]["coda"][#syllables[i]["coda"]] == syllables[i + 1]["onset"][1] then
table.remove(syllables[i]["coda"], #syllables[i]["coda"])
syllables[i + 1]["onset"][1] = syllables[i + 1]["onset"][1] .. "ː"
end
end
end
-- phonetic nasalisation --
if phon and syllables[i]["coda"] then
local function get_nasal(check)
if check then
-- mw.log("Phonetic nasalisation in syllable " .. i .. ": " .. syllables[i]["nucleus"] .. " → " .. nasalise[syllables[i]["nucleus"]])
syllables[i]["nucleus"] = nasalise[syllables[i]["nucleus"]]
table.remove(syllables[i]["coda"], 1)
end
end
if syllables[i]["coda"][1] == "ɲ" then
if fricative[syllables[i]["coda"][2]] then
get_nasal(true)
table.insert(syllables[i]["coda"], 1, "j̃")
elseif syllables[i + 1] then
if syllables[i + 1]["onset"] then
if syllables[i]["coda"][2] == nil and fricative[syllables[i + 1]["onset"][1]] then
get_nasal(true)
table.insert(syllables[i]["coda"], 1, "j̃")
end
end
end
elseif syllables[i]["coda"][1] == "n" then
if fricative[syllables[i]["coda"][2]] then
get_nasal(true)
elseif syllables[i + 1] then
if syllables[i + 1]["onset"] then
if syllables[i]["coda"][2] == nil and fricative[syllables[i + 1]["onset"][1]] then
get_nasal(true)
end
end
end
elseif syllables[i]["coda"][1] == "ŋ" then
if syllables[i]["coda"][2] == "h" or syllables[i]["coda"][2] == "x" or syllables[i]["coda"][2] == "ɣ" then
get_nasal(true)
elseif syllables[i + 1] then
if syllables[i + 1]["onset"] then
if
syllables[i]["coda"][2] == nil and
(syllables[i + 1]["onset"][1] == "h" or syllables[i + 1]["onset"][1] == "x" or
syllables[i + 1]["onset"][1] == "ɣ")
then
get_nasal(true)
end
end
end
end
end
if type(syllables[i]) == "table" then
if syllables[i]["onset"] then
onset = table.concat(syllables[i]["onset"])
end
if syllables[i]["coda"] then
coda = table.concat(syllables[i]["coda"])
end
if syllables[i]["stress"] then
stress = "ˈ"
end
if syllables[i]["stress2"] then
stress = "ˌ"
end
output = output .. stress .. onset .. syllables[i]["nucleus"] .. coda
elseif syllables[i] == " " then
output = output .. " "
elseif syllables[i] == "|" then
output = output .. " | "
elseif syllables[i] == "‿" then
output = output .. "‿"
else
error("Unknown phone identified: " .. syllables[i])
end
end
-- post rectifications --
output = mw.ustring.gsub(output, "(ˈˌ)", "ˌ")
-- vowel length resolution
output = mw.ustring.gsub(output, "i%.iː?e", "iːe")
output = mw.ustring.gsub(output, "i%.iːː?", "iːː")
output = mw.ustring.gsub(output, "i%.i", "iː")
output = mw.ustring.gsub(output, "iːiː?e", "iːe")
output = mw.ustring.gsub(output, "iːiː*", "iːː")
output = mw.ustring.gsub(output, "u%.uː?o", "uːo")
output = mw.ustring.gsub(output, "u%.uːː?", "uːː")
output = mw.ustring.gsub(output, "u%.u", "uː")
output = mw.ustring.gsub(output, "uːuː?o", "uːo")
output = mw.ustring.gsub(output, "uːuː*", "uːː")
output = mw.ustring.gsub(output, "([^œɔui])a%.aːː?", "%1aːː")
output = mw.ustring.gsub(output, "([^œɔui])a%.a([^iu])", "%1aː%2")
output = mw.ustring.gsub(output, "aːaː*", "aːː")
return output
end
function export.get_rhyme(IPA)
if mw.ustring.find(IPA, "([ %|])") or mw.ustring.find(IPA, "([%-·])$") then
return nil
end
if mw.ustring.find(IPA, "(ˈ)") then
IPA = mw.ustring.gsub(IPA, "(ˌ)", "")
IPA = mw.ustring.gsub(IPA, "^(.*ˈ)", "")
end
while true do
local check = mw.ustring.sub(IPA, 1, 1)
if consonant[check] then
IPA = mw.ustring.sub(IPA, 2)
else
break
end
end
return IPA
end
function export.get_weak_rhyme(IPA)
if mw.ustring.find(IPA, "([ %|])") or mw.ustring.find(IPA, "ˈ") == nil then
return nil
end
local C_regex = "szlntdrmpbθðhjwçfvʃʒxŋkɡɲɣ%."
local V_long_regex = "[uyiɔœɛoɤeaæø][uyiɔœɛoɤeaæøː][ːeoi]?"
local V_short_regex = "[uyiɔœɛɤa]([^ːaeoiu])"
IPA = mw.ustring.match(IPA, "[^ˈˌ]+$")
IPA = mw.ustring.gsub(IPA, "ː(" .. V_short_regex .. ")", "ː.%1")
IPA = mw.ustring.gsub(IPA, "ː(" .. V_long_regex .. ")", "ː.%1")
IPA = mw.ustring.gsub(IPA, "^[" .. C_regex .. "]+", "") -- delete consonant cluster
IPA = mw.ustring.gsub(IPA, "^" .. V_short_regex, "%1") -- delete short vowel
IPA = mw.ustring.gsub(IPA, "^".. V_long_regex, "") -- delete long vowel
IPA = mw.ustring.gsub(IPA, "^[" .. C_regex .. "]+", "") -- delete consonant cluster
-- mw.log(IPA)
if #IPA == 0 or mw.ustring.find(IPA, "^[uyiɔœɛɤa][" .. C_regex .. "]*$") or mw.ustring.find(IPA, "^" .. V_long_regex .. "[" .. C_regex .. "]*$") or
mw.ustring.find(IPA, "^" .. V_long_regex .. "[" .. C_regex .. "]+[uyiɔœɛɤa][" .. C_regex .. "]*$") or (mw.ustring.find(IPA, "^".. V_long_regex) and mw.ustring.find(IPA, "[" .. C_regex .. "]" .. V_short_regex) == nil and mw.ustring.find(IPA, "[uyiɔœɛɤa][" .. C_regex .. "]*$") == nil)
then
return nil
end
local IPA_rhymes = {}
while #IPA > 0 do
if mw.ustring.find(IPA, "^[uyiɔœɛɤa][" .. C_regex .. "]*$") or
mw.ustring.find(IPA, "^" .. V_long_regex .. "[" .. C_regex .. "]*$") or
mw.ustring.find(IPA, "^" .. V_long_regex .. "[" .. C_regex .. "]+[uyiɔœɛɤa][" .. C_regex .. "]*$") or
(mw.ustring.find(IPA, "^".. V_long_regex) and mw.ustring.find(IPA, "[" .. C_regex .. "]" .. V_short_regex) == nil and mw.ustring.find(IPA, "[uyiɔœɛɤa][" .. C_regex .. "]*$") == nil)
then
table.insert(IPA_rhymes, IPA)
break
elseif mw.ustring.find(IPA, "^".. V_long_regex .. "[" .. C_regex .. "]+" .. V_short_regex) or mw.ustring.find(IPA, "^".. V_long_regex .. "[" .. C_regex .. "]+[uyiɔœɛɤa][" .. C_regex .. "]*$") then
table.insert(IPA_rhymes, IPA)
IPA = mw.ustring.gsub(IPA, "^".. V_long_regex, "") -- delete long vowel
else
IPA = mw.ustring.gsub(IPA, "^" .. V_short_regex, "%1") -- delete short vowel
IPA = mw.ustring.gsub(IPA, "^".. V_long_regex, "") -- delete long vowel
IPA = mw.ustring.gsub(IPA, "^[" .. C_regex .. "]+", "") -- delete consonant cluster
end
end
if #IPA_rhymes > 0 then return IPA_rhymes
else return nil end
end
-- mw.logObject(p.get_weak_rhyme("ˈliːaijaːrs"))
function export.generate(frame)
local args = getArgs(frame)
local outputIPA = args[1]
local parameters = {}
local p = 2
-- mw.log("——— Parameters ———")
while args[p] do
parameters[args[p]] = true
-- mw.log(args[p] .. " = true")
p = p + 1
end
process_temp()
outputIPA = mw.ustring.gsub(outputIPA, "(% %;)", " ")
local is_prefix = mw.ustring.sub(outputIPA, -1) == "-" or mw.ustring.sub(outputIPA, -1) == "·"
local is_suffix = mw.ustring.sub(outputIPA, 1, 1) == "-"
-- process acronyms
local acronym = mw.ustring.match(outputIPA, "[A-ZÀÁẢÂÈÌÍỈÒØÙÛÚỦÝỶỲŪ][A-ZÀÁẢÂÈÌÍỈÒØÙÛÚỦÝỶỲŪ]+")
if acronym then
local spellout = acronym
for letter, name in pairs(initialism) do
spellout = mw.ustring.gsub(spellout, "^" .. letter, name)
spellout = mw.ustring.gsub(spellout, letter, "·" .. name)
mw.log(spellout)
end
outputIPA = mw.ustring.gsub(outputIPA, acronym .. "%-?", spellout)
end
-- begin generation
outputIPA = generate_IPA(outputIPA)
outputIPA = resolve_vowels(outputIPA)
outputIPA = resolve_consonants(outputIPA, parameters["hj"], parameters["phon"])
outputIPA = get_syllables(outputIPA)
outputIPA = tag_syllables(outputIPA)
outputIPA = get_stress(outputIPA)
outputIPA = resolve_syllables(outputIPA)
outputIPA = syllables_to_string(outputIPA, parameters["phon"])
if parameters["rhyme"] then
outputIPA = export.get_rhyme(outputIPA)
end
if outputIPA == nil then
-- mw.log("Invalid rhyme request.")
return nil
else
if is_prefix then
outputIPA = outputIPA .. "-"
end
if is_suffix then
outputIPA = "-" .. outputIPA
outputIPA = mw.ustring.gsub(outputIPA, "(ˌ)", "")
end
local nolarge = ""
if parameters["nolarge"] then
nolarge = " nolarge"
end
if parameters["phon"] and parameters["format"] then
outputIPA = '<span class="IPA' .. nolarge .. '">[' .. outputIPA .. "]</span>"
elseif parameters["format"] then
outputIPA = '<span class="IPA' .. nolarge .. '">/' .. outputIPA .. "/</span>"
end
-- mw.log(outputIPA)
return outputIPA
end
end
return export
--[[
Debug console test string:
=p.generate(mw.getCurrentFrame():newChild{title="whatever",args={"rjaovs"}})
=p.generate(mw.getCurrentFrame():newChild{title="whatever",args={"rjaovs", "hj"}})
]]