10,794
edits
TheNightAvl (talk | contribs) m (Combined stress conditions) |
TheNightAvl (talk | contribs) m (Beautified) |
||
Line 1: | Line 1: | ||
local export = {} | local export = {} | ||
local getArgs = require( | local getArgs = require("Module:Arguments").getArgs | ||
-- DATA -- | -- DATA -- | ||
local data = { | local data = { | ||
["a"] = { | |||
["i"] = "ai", | |||
["o"] = { | |||
["i"] = "ɔi", | |||
[false] = "ɔː" | |||
}, | |||
["ô"] = "ɔa", | |||
["u"] = "au", | |||
[false] = "a" | |||
}, | |||
["á"] = "aː", | |||
["ả"] = "aːː", | |||
["â"] = { | |||
["i"] = "ɤi", | |||
[false] = "ɤ" | |||
}, | |||
["b"] = "b", | |||
["c"] = "ts", | |||
["d"] = { | |||
["x"] = "dʒ", | |||
["z"] = "dz", | |||
[false] = "d" | |||
}, | |||
["ð"] = "ð", | |||
["e"] = { | |||
["a"] = "æː", | |||
["i"] = "ei", | |||
[false] = "ɛ" | |||
}, | |||
["é"] = "eː", | |||
["f"] = "f", | |||
["g"] = "ɡ", | |||
["h"] = "h", | |||
["ħ"] = "ɣ", | |||
["i"] = { | |||
["e"] = { | |||
["a"] = "ia", | |||
["j"] = "iej", | |||
[false] = "ie" | |||
}, | |||
[false] = "i" | |||
}, | |||
["í"] = "iː", | |||
["ỉ"] = { | |||
["e"] = "iːe", | |||
[false] = "iːː" | |||
}, | |||
["j"] = "j", | |||
["ĵ"] = "ĵ", | |||
["k"] = "k", | |||
["ķ"] = "tʃ", | |||
["l"] = "l", | |||
["m"] = "m", | |||
["n"] = "n", | |||
["ņ"] = "ŋ", | |||
["o"] = { | |||
["a"] = "ɔa", | |||
[false] = "ɔ" | |||
}, | |||
["ô"] = "ɔ", | |||
["ó"] = "oː", | |||
["ø"] = { | |||
["a"] = "œa", | |||
["i"] = "ei", | |||
[false] = "œ" | |||
}, | |||
["p"] = "p", | |||
["q"] = "k", | |||
["r"] = "r", | |||
["s"] = "s", | |||
["ș"] = "ʃ", | |||
["t"] = "t", | |||
["u"] = { | |||
["i"] = "ɤi", | |||
["o"] = { | |||
["a"] = "ua", | |||
["j"] = "uoj", | |||
[false] = "uo" | |||
}, | |||
[false] = "u" | |||
}, | |||
["ù"] = "ù", | |||
["û"] = "ɤ", | |||
["ú"] = "uː", | |||
["ủ"] = { | |||
["o"] = "uːo", | |||
[false] = "uːː" | |||
}, | |||
["ū"] = "ū", | |||
["v"] = "v", | |||
["w"] = "w", | |||
["x"] = "ʒ", | |||
["y"] = { | |||
["e"] = { | |||
["a"] = "ia", | |||
["j"] = "yej", | |||
[false] = "ie" | |||
}, | |||
[false] = "i" | |||
}, | |||
["ỳ"] = "y", | |||
["z"] = "z", | |||
["þ"] = "θ", | |||
["·"] = "·", | |||
["-"] = "-", | |||
[" "] = " ", | |||
["."] = "|", | |||
["'"] = "'", | |||
["‿"] = "‿" | |||
} | } | ||
data["à"] = data["a"] | data["à"] = data["a"] | ||
Line 131: | Line 131: | ||
local irregular = { | local irregular = { | ||
["eurú"] = "ørú", | |||
["eurov"] = "ørov", | |||
["euruv"] = "øruv", | |||
["heņre"] = "heņgre", | |||
["nrao"] = "drao", | |||
["nron"] = "dron", | |||
["ryņl"] = "ryņgl", | |||
["zeus"] = "zøs", | |||
["zeud"] = "zød" | |||
} | } | ||
local valid_phone = { | local valid_phone = { | ||
["a"] = true, | |||
["aː"] = true, | |||
["aːː"] = true, | |||
["æː"] = true, | |||
["ai"] = true, | |||
["au"] = true, | |||
["b"] = true, | |||
["ç"] = true, | |||
["d"] = true, | |||
["ð"] = true, | |||
["dz"] = true, | |||
["dʒ"] = true, | |||
["eː"] = true, | |||
["ei"] = true, | |||
["ɛ"] = true, | |||
["ɤ"] = true, | |||
["ɤi"] = true, | |||
["f"] = true, | |||
["ɡ"] = true, | |||
["ɣ"] = true, | |||
["h"] = true, | |||
["i"] = true, | |||
["iː"] = true, | |||
["iːː"] = true, | |||
["iːe"] = true, | |||
["ia"] = true, | |||
["ie"] = true, | |||
["j"] = true, | |||
["k"] = true, | |||
["l"] = true, | |||
["m"] = true, | |||
["n"] = true, | |||
["ŋ"] = true, | |||
["oː"] = true, | |||
["œ"] = true, | |||
["œa"] = true, | |||
["øi"] = true, | |||
["ɔ"] = true, | |||
["ɔː"] = true, | |||
["ɔa"] = true, | |||
["ɔi"] = true, | |||
["p"] = true, | |||
["r"] = true, | |||
["s"] = true, | |||
["ʃ"] = true, | |||
["t"] = true, | |||
["ts"] = true, | |||
["tʃ"] = true, | |||
["u"] = true, | |||
["uː"] = true, | |||
["uːː"] = true, | |||
["uːo"] = true, | |||
["ua"] = true, | |||
["uo"] = true, | |||
["v"] = true, | |||
["w"] = true, | |||
["y"] = true, | |||
["z"] = true, | |||
["ʒ"] = true, | |||
["θ"] = true, | |||
-- allophones -- | |||
["x"] = true, | |||
["ʝ"] = true | |||
} | } | ||
local valid_phone_temp = {"iːj", "uːj", "yːj", "ù", "ū", "ĵ"} | |||
-- CONSONANT GROUPS -- | -- CONSONANT GROUPS -- | ||
local consonant = { | local consonant = { | ||
["b"] = true, | |||
["ç"] = true, | |||
["d"] = true, | |||
["ð"] = true, | |||
["dz"] = true, | |||
["dʒ"] = true, | |||
["f"] = true, | |||
["ɡ"] = true, | |||
["ɣ"] = true, | |||
["h"] = true, | |||
["j"] = true, | |||
["k"] = true, | |||
["l"] = true, | |||
["m"] = true, | |||
["n"] = true, | |||
["ŋ"] = true, | |||
["p"] = true, | |||
["r"] = true, | |||
["s"] = true, | |||
["ʃ"] = true, | |||
["t"] = true, | |||
["ts"] = true, | |||
["tʃ"] = true, | |||
["v"] = true, | |||
["w"] = true, | |||
["z"] = true, | |||
["ʒ"] = true, | |||
["θ"] = true, | |||
-- allophones -- | |||
["x"] = true, | |||
["ʝ"] = true, | |||
["ʋ"] = true, | |||
["ɲ"] = true, | |||
["r̥"] = true | |||
} | } | ||
local consonant_temp = {"ĵ"} | |||
local velar = { | local velar = { | ||
["k"] = true, | |||
["ɡ"] = true, | |||
["ɣ"] = true, | |||
["ŋ"] = true, | |||
-- allophones -- | |||
["x"] = true | |||
} | } | ||
local obstruent = { | local obstruent = { | ||
["b"] = true, | |||
["ç"] = true, | |||
["d"] = true, | |||
["ð"] = true, | |||
["dz"] = true, | |||
["dʒ"] = true, | |||
["f"] = true, | |||
["ɡ"] = true, | |||
["ɣ"] = true, | |||
["h"] = true, | |||
["k"] = true, | |||
["p"] = true, | |||
["s"] = true, | |||
["ʃ"] = true, | |||
["t"] = true, | |||
["ts"] = true, | |||
["tʃ"] = true, | |||
["v"] = true, | |||
["z"] = true, | |||
["ʒ"] = true, | |||
["θ"] = true, | |||
-- allophones -- | |||
["x"] = true, | |||
["ʋ"] = true | |||
} | |||
local obstruent_voiced = { | |||
["b"] = true, | |||
["d"] = true, | |||
["ð"] = true, | |||
["dz"] = true, | |||
["dʒ"] = true, | |||
["ɡ"] = true, | |||
["ɣ"] = true, | |||
["v"] = true, | |||
["z"] = true, | |||
["ʒ"] = true | |||
} | |||
local obstruent_devoice = { | |||
["b"] = "p", | |||
["ç"] = "ç", | |||
["d"] = "t", | |||
["ð"] = "θ", | |||
["dz"] = "ts", | |||
["dʒ"] = "tʃ", | |||
["f"] = "f", | |||
["ɡ"] = "k", | |||
["ɣ"] = "h", | |||
["h"] = "h", | |||
["k"] = "k", | |||
["p"] = "p", | |||
["s"] = "s", | |||
["ʃ"] = "ʃ", | |||
["t"] = "t", | |||
["ts"] = "ts", | |||
["tʃ"] = "tʃ", | |||
["v"] = "f", | |||
["z"] = "s", | |||
["ʒ"] = "ʃ", | |||
["θ"] = "θ", | |||
-- allophones -- | |||
["x"] = "x" | |||
} | |||
local obstruent_voice = { | |||
["b"] = "b", | |||
["ç"] = "j", | |||
["d"] = "d", | |||
["ð"] = "ð", | |||
["dz"] = "dz", | |||
["dʒ"] = "dʒ", | |||
["f"] = "v", | |||
["ɡ"] = "ɡ", | |||
["ɣ"] = "ɣ", | |||
["h"] = "h", | |||
["k"] = "ɡ", | |||
["p"] = "b", | |||
["s"] = "z", | |||
["ʃ"] = "ʒ", | |||
["t"] = "d", | |||
["ts"] = "dz", | |||
["tʃ"] = "dʒ", | |||
["v"] = "v", | |||
["z"] = "z", | |||
["ʒ"] = "ʒ", | |||
["θ"] = "ð", | |||
-- allophones -- | |||
["x"] = "ɣ" | |||
} | } | ||
local affricate = { | local affricate = { | ||
["dz"] = true, | |||
["dʒ"] = true, | |||
["ts"] = true, | |||
["tʃ"] = true | |||
} | } | ||
local sibilant = { | local sibilant = { | ||
["dz"] = true, | |||
["dʒ"] = true, | |||
["s"] = true, | |||
["ʃ"] = true, | |||
["ts"] = true, | |||
["tʃ"] = true, | |||
["z"] = true, | |||
["ʒ"] = true | |||
} | |||
local sibilant_alv = { | |||
["dz"] = true, | |||
["s"] = true, | |||
["ts"] = true, | |||
["z"] = true | |||
} | } | ||
local sibilant_post = { | |||
["dʒ"] = true, | |||
["ʃ"] = true, | |||
["tʃ"] = true, | |||
["ʒ"] = true | |||
} | |||
local sibilant_alv_to_post = { | |||
["dz"] = "dʒ", | |||
["s"] = "ʃ", | |||
["ts"] = "tʃ", | |||
["z"] = "ʒ" | |||
} | |||
local nasal = { | local nasal = { | ||
["m"] = true, | |||
["n"] = true, | |||
["ŋ"] = true, | |||
["ɲ"] = true | |||
} | } | ||
local glide = { | local glide = { | ||
["j"] = true, | |||
["w"] = true | |||
} | } | ||
local glide_temp = {"ĵ"} | |||
local iotate = { | local iotate = { | ||
["d"] = "dʒ", | |||
["dz"] = "dʒ", | |||
["ɡ"] = "j", | |||
["ɣ"] = "j", | |||
["h"] = "ç", | |||
["k"] = "tʃ", | |||
["l"] = "j", | |||
["s"] = "ʃ", | |||
["t"] = "tʃ", | |||
["ts"] = "tʃ", | |||
["z"] = "ʒ" | |||
} | } | ||
local Cv_fixed = { -- <Cv> combinations that uniformly pronounced without /v/ -- | local Cv_fixed = { | ||
-- <Cv> combinations that uniformly pronounced without /v/ -- | |||
["b"] = true, | |||
["f"] = true, | |||
["p"] = true | |||
} | } | ||
local Cv_split = { -- <Cv> combinations that are only pronounced with /v/ post-vocalically -- | local Cv_split = { | ||
-- <Cv> combinations that are only pronounced with /v/ post-vocalically -- | |||
["m"] = true, | |||
["n"] = true | |||
} | } | ||
local Cr = { -- consonants that can precede /r/ in a medial onset -- | local Cr = { | ||
-- consonants that can precede /r/ in a medial onset -- | |||
["p"] = true, | |||
["k"] = true, | |||
["b"] = true, | |||
["ɡ"] = true, | |||
["ç"] = true, | |||
["ð"] = true, | |||
["f"] = true, | |||
["ɣ"] = true, | |||
["h"] = true, | |||
["v"] = true, | |||
["θ"] = true, | |||
["t"] = true, | |||
["d"] = true, | |||
-- allophones -- | |||
["x"] = true, | |||
["ʋ"] = true | |||
} | } | ||
local Cl = { -- consonants that can precede /l/ in a medial onset -- | local Cl = { | ||
-- consonants that can precede /l/ in a medial onset -- | |||
["p"] = true, | |||
["k"] = true, | |||
["b"] = true, | |||
["ɡ"] = true, | |||
["ç"] = true, | |||
["ð"] = true, | |||
["f"] = true, | |||
["ɣ"] = true, | |||
["h"] = true, | |||
["v"] = true, | |||
["θ"] = true, | |||
["s"] = true, | |||
["ʃ"] = true, | |||
["z"] = true, | |||
["ʒ"] = true, | |||
-- allophones -- | |||
["x"] = true, | |||
["ʋ"] = true | |||
} | } | ||
local Cv = { -- consonants that can precede /v/ in a medial onset -- | local Cv = { | ||
-- consonants that can precede /v/ in a medial onset -- | |||
["ç"] = true, | |||
["d"] = true, | |||
["ð"] = true, | |||
["dz"] = true, | |||
["dʒ"] = true, | |||
["ɡ"] = true, | |||
["ɣ"] = true, | |||
["h"] = true, | |||
["k"] = true, | |||
["l"] = true, | |||
["r"] = true, | |||
["s"] = true, | |||
["ʃ"] = true, | |||
["t"] = true, | |||
["ts"] = true, | |||
["tʃ"] = true, | |||
["z"] = true, | |||
["ʒ"] = true, | |||
["θ"] = true, | |||
-- allophones -- | |||
["x"] = true, | |||
["ʝ"] = true | |||
} | } | ||
local continuant = { -- consonants that can precede nasals in a medial onset -- (non-glide continuants) | local continuant = { | ||
-- consonants that can precede nasals in a medial onset -- (non-glide continuants) | |||
["ç"] = true, | |||
["ð"] = true, | |||
["f"] = true, | |||
["ɣ"] = true, | |||
["h"] = true, | |||
["l"] = true, | |||
["r"] = true, | |||
["s"] = true, | |||
["ʃ"] = true, | |||
["v"] = true, | |||
["z"] = true, | |||
["ʒ"] = true, | |||
["θ"] = true, | |||
-- allophones -- | |||
["x"] = true, | |||
["ʋ"] = true | |||
} | } | ||
local CN = continuant | |||
local fricative = { | |||
-- excludes [h] | |||
["ç"] = true, | |||
["ð"] = true, | |||
["f"] = true, | |||
["ɣ"] = true, | |||
["l"] = true, | |||
["r"] = true, | |||
["s"] = true, | |||
["ʃ"] = true, | |||
["v"] = true, | |||
["z"] = true, | |||
["ʒ"] = true, | |||
["θ"] = true, | |||
-- allophones -- | |||
["x"] = true, | |||
["ʋ"] = true | |||
} | } | ||
Line 364: | Line 551: | ||
local vowel = { | local vowel = { | ||
["a"] = true, | |||
["aː"] = true, | |||
["aːː"] = true, | |||
["æː"] = true, | |||
["ai"] = true, | |||
["au"] = true, | |||
["eː"] = true, | |||
["ei"] = true, | |||
["ɛ"] = true, | |||
["ɤ"] = true, | |||
["ɤi"] = true, | |||
["i"] = true, | |||
["iː"] = true, | |||
["iːː"] = true, | |||
["iːe"] = true, | |||
["ia"] = true, | |||
["ie"] = true, | |||
["oː"] = true, | |||
["œ"] = true, | |||
["œa"] = true, | |||
["øi"] = true, | |||
["ɔ"] = true, | |||
["ɔː"] = true, | |||
["ɔa"] = true, | |||
["ɔi"] = true, | |||
["u"] = true, | |||
["uː"] = true, | |||
["uːː"] = true, | |||
["uːo"] = true, | |||
["ua"] = true, | |||
["uo"] = true, | |||
["y"] = true | |||
} | } | ||
local vowel_temp = {"iːj", "uːj", "yːj", "ù", "ū"} | |||
local marked_hiatus = { | local marked_hiatus = { | ||
["a"] = true, | |||
["ai"] = true, | |||
["au"] = true, | |||
["ei"] = true, | |||
["ɛ"] = true, | |||
["ɤ"] = true, | |||
["ɤi"] = true, | |||
["i"] = true, | |||
["iːe"] = true, | |||
["ia"] = true, | |||
["ie"] = true, | |||
["oː"] = true, | |||
["œ"] = true, | |||
["œa"] = true, | |||
["øi"] = true, | |||
["ɔ"] = true, | |||
["ɔa"] = true, | |||
["ɔi"] = true, | |||
["u"] = true, | |||
["uːo"] = true, | |||
["ua"] = true, | |||
["uo"] = true, | |||
["y"] = true | |||
} | } | ||
local long = { | local long = { | ||
["aː"] = true, | |||
["aːː"] = true, | |||
["æː"] = true, | |||
["ai"] = true, | |||
["au"] = true, | |||
["eː"] = true, | |||
["ei"] = true, | |||
["ɤi"] = true, | |||
["iː"] = true, | |||
["iːː"] = true, | |||
["iːe"] = true, | |||
["ia"] = true, | |||
["ie"] = true, | |||
["oː"] = true, | |||
["œa"] = true, | |||
["øi"] = true, | |||
["ɔː"] = true, | |||
["ɔa"] = true, | |||
["ɔi"] = true, | |||
["uː"] = true, | |||
["uːː"] = true, | |||
["uːo"] = true, | |||
["ua"] = true, | |||
["uo"] = true | |||
} | } | ||
local weak = { | local weak = { | ||
["ɤ"] = true, | |||
["i"] = true | |||
} | } | ||
-- allophonic only -- | -- allophonic only -- | ||
local nasalise = { | local nasalise = { | ||
["a"] = "ãˑ", | |||
["aː"] = "ãːˑ", | |||
["aːː"] = "ãːːˑ", | |||
["æː"] = "æ̃ːˑ", | |||
["ai"] = "ãˑĩ", | |||
["au"] = "ãˑũ", | |||
["eː"] = "ẽːˑ", | |||
["ei"] = "ẽˑĩ", | |||
["ɛ"] = "ɛ̃ˑ", | |||
["ɤ"] = "ɤ̃ˑ", | |||
["ɤi"] = "ɤ̃ˑĩ", | |||
["i"] = "ĩˑ", | |||
["iː"] = "ĩːˑ", | |||
["iːː"] = "ĩːːˑ", | |||
["iːe"] = "ĩːˑẽ", | |||
["ia"] = "ĩˑã", | |||
["ie"] = "ĩˑẽ", | |||
["oː"] = "õːˑ", | |||
["œ"] = "œ̃ˑ", | |||
["œa"] = "œ̃ˑã", | |||
["øi"] = "ø̃ˑĩ", | |||
["ɔ"] = "ɔ̃ˑ", | |||
["ɔː"] = "ɔ̃ːˑ", | |||
["ɔa"] = "ɔ̃ˑã", | |||
["ɔi"] = "ɔ̃ˑĩ", | |||
["u"] = "ũˑ", | |||
["uː"] = "ũːˑ", | |||
["uːː"] = "ũːːˑ", | |||
["uːo"] = "ũːˑõ", | |||
["ua"] = "ũˑã", | |||
["uo"] = "ũˑõ", | |||
["y"] = "ỹˑ", | |||
["ɑ"] = "ɑ̃ˑ", | |||
["ä"] = "ä̃ˑ", | |||
["äi"] = "ä̃ˑĩ", | |||
["äu"] = "ä̃ˑũ", | |||
["iɐ"] = "ĩˑɐ̃", | |||
["uɐ"] = "ũˑɐ̃" | |||
} | } | ||
-- MISC -- | -- MISC -- | ||
local boundary = { | local boundary = { | ||
[" "] = true, | |||
["|"] = true, | |||
["·"] = true, | |||
["‿"] = true | |||
} | } | ||
function process_temp() | function process_temp() | ||
for _, temp in ipairs(valid_phone_temp) do | |||
valid_phone[temp] = true | |||
end | |||
for _, temp in ipairs(vowel_temp) do | |||
vowel[temp] = true | |||
end | |||
for _, temp in ipairs(consonant_temp) do | |||
consonant[temp] = true | |||
end | |||
for _, temp in ipairs(glide_temp) do | |||
glide[temp] = true | |||
end | |||
end | end | ||
Line 438: | Line 715: | ||
function generate_IPA(word) | function generate_IPA(word) | ||
local s = mw.ustring.lower(word) | |||
-- Replace irregulars -- | |||
for toReplace, ReplaceKey in pairs(irregular) do | |||
if mw.ustring.find(s, toReplace) then | |||
s = mw.ustring.gsub(s, "(" .. toReplace .. ")", ReplaceKey) | |||
-- mw.log("Irregular spelling <" .. toReplace .. "> recognised. Treating as <" .. ReplaceKey .. ">.") | |||
end | |||
end | |||
if mw.ustring.find(s, "(rredeș)") then -- deals with Rredeșa and Rrezlieþs | |||
s = mw.ustring.gsub(s, "(a rredeș)", "ar‿redeș") | |||
s = mw.ustring.gsub(s, "(u rredeș)", "ùr‿redeș") | |||
s = mw.ustring.gsub(s, "(rredeș)", "redeș") | |||
elseif mw.ustring.find(s, "(rrezl)") then | |||
s = mw.ustring.gsub(s, "(u rrezl)", "ùr‿rezl") | |||
s = mw.ustring.gsub(s, "(rrezl)", "rezl") | |||
end | |||
local s_len = mw.ustring.len(s) | |||
IPA = {} | |||
local split_s = {} | |||
for i = 1, s_len do | |||
split_s[i] = mw.ustring.sub(s, i, i) | |||
end | |||
-- generate_IPA: mw.log("————— BEGINNING BASE GENERATION —————") | |||
if s_len == 0 then | |||
error("Empty input") | |||
end | |||
while s_len > 0 do | |||
local getData = {} | |||
local multiMatch = false | |||
local i_iteration = -2 | |||
-- generate_IPA: mw.log("=========================\nCURRENT TEST STRING: <".. mw.ustring.upper(s) .. ">") | |||
if s_len < 3 then | |||
i_iteration = 1 - s_len | |||
end | |||
for i = i_iteration, 0 do | |||
-- generate_IPA: mw.log("————— <" .. split_s[s_len + i] .. "> selected. (i = " .. i .. ") —————") | |||
getData = data[split_s[s_len + i]] | |||
local deadEnd = false | |||
if data[split_s[s_len + i]] == nil then | |||
error("'" .. split_s[s_len + i] .. "' is an invalid character") | |||
end | |||
while type(getData) == "table" do | |||
if i == 0 then | |||
if getData[false] then | |||
-- generate_IPA: mw.log("Index acquired: " .. getData) | |||
-- generate_IPA: mw.log("Singular index recognised.") | |||
getData = getData[false] | |||
else | |||
error(split_s[s_len] .. " is an invalid character") | |||
end | |||
else | |||
-- generate_IPA: mw.log("Tabular index recognised.") | |||
for j = 1, 0 - i do | |||
local currentCombo = "" | |||
-- for logs only | |||
if i == -2 and j == 2 then | |||
currentCombo = | |||
split_s[s_len + i + j - 2] .. | |||
" + " .. split_s[s_len + i + j - 1] .. " + " .. split_s[s_len + i + j] | |||
else | |||
currentCombo = split_s[s_len + i + j - 1] .. " + " .. split_s[s_len + i + j] | |||
end | |||
-- generate_IPA: mw.log("Testing " .. currentCombo) | |||
-- | |||
if getData[split_s[s_len + i + j]] then | |||
getData = getData[split_s[s_len + i + j]] | |||
-- generate_IPA: mw.log("Combination recognised: " .. currentCombo .. " (j = " .. j .. ")") | |||
if type(getData) == "string" then | |||
if j + i == 0 then | |||
-- generate_IPA: mw.log("Index acquired: " .. getData) | |||
multiMatch = true | |||
break | |||
else | |||
-- generate_IPA: mw.log('Non-final index: dead end.') | |||
getData = {} | |||
deadEnd = true | |||
break | |||
end | |||
elseif j + i == 0 and getData[false] then | |||
getData = getData[false] | |||
-- generate_IPA: mw.log("Index acquired: " .. getData) | |||
multiMatch = true | |||
break | |||
elseif j + i == 0 and not getData[false] then | |||
error("data[" .. table.concat(getData, "][") .. "][false] is missing.") | |||
else | |||
-- generate_IPA: mw.log("Target still tabular: reiterating.") | |||
end | |||
else | |||
-- generate_IPA: mw.log('Dead end.') | |||
getData = {} | |||
deadEnd = true | |||
break | |||
end | |||
end | |||
if type(getData) == "table" then | |||
break | |||
end | |||
end | |||
end | |||
if type(getData) == "string" and (i == 0 or multiMatch == true) then | |||
-- generate_IPA: mw.log("Target acquired of length " .. 1 - i .. ", converting to [" .. getData .. "].") | |||
table.insert(IPA, 1, getData) | |||
s = mw.ustring.sub(s, 1, s_len + i - 1) | |||
s_len = mw.ustring.len(s) | |||
break | |||
elseif deadEnd == false then | |||
-- generate_IPA: mw.log('Non-final index: dead end.') | |||
end | |||
end | |||
end | |||
-- generate_IPA: mw.log('————— STRING EXHAUSTED —————') | |||
-- generate_IPA: mw.log("Base generation result: [" .. table.concat(IPA,"][") .. "]") | |||
return IPA | |||
end | end | ||
function resolve_vowels(phones) | function resolve_vowels(phones) | ||
local working_phones = phones | |||
-- mw.log("————— BEGINNING VOWEL RESOLUTION —————") | |||
local i = 0 | |||
while true do | |||
i = i + 1 | |||
local p_prev = working_phones[i - 1] | |||
local p_current = working_phones[i] | |||
local p_next = working_phones[i + 1] | |||
local p_next2 = working_phones[i + 2] | |||
local p_next3 = working_phones[i + 3] | |||
if p_current == nil then | |||
break | |||
end | |||
local function update_p() | |||
p_prev = working_phones[i - 1] | |||
p_current = working_phones[i] | |||
p_next = working_phones[i + 1] | |||
p_next2 = working_phones[i + 2] | |||
p_next3 = working_phones[i + 3] | |||
end | |||
local function p_Resolve(p_new) | |||
working_phones[i] = p_new | |||
-- mw.log("[" .. p_current .. "] resolved to [" .. p_new .. "] in position ".. i .. ".") | |||
p_new = "" | |||
end | |||
local function p_Convert(p_new) | |||
working_phones[i] = p_new | |||
-- mw.log("[" .. p_current .. "] converted to [" .. p_new .. "] in position ".. i .. ".") | |||
p_current = p_new | |||
p_new = "" | |||
end | |||
local function p_RemoveNext() | |||
table.remove(working_phones, i + 1) | |||
update_p() | |||
end | |||
-- Resolution of [aù] -- | |||
if p_prev == "a" and p_current == "ù" then | |||
-- mw.log("<aù> recognised in position " .. i .. ". Converting to resolvable [u].") | |||
p_Convert("u") | |||
p_current = "u" | |||
end | |||
-- Resolution of [u], [ù] and [ū] -- | |||
if p_current == "u" then | |||
-- mw.log("[u] found in position " .. i .. ". Testing for resolution.") | |||
if consonant[p_next] then | |||
if glide[p_next2] then | |||
if not vowel[p_next3] then | |||
-- mw.log("ɤCj!V environment identified.") | |||
p_Convert("ɤ") | |||
else | |||
-- mw.log("No resolution needed.") | |||
end | |||
elseif not vowel[p_next2] then | |||
-- mw.log("ɤC!V environment identified.") | |||
p_Convert("ɤ") | |||
else | |||
-- mw.log("No resolution needed.") | |||
end | |||
else | |||
-- mw.log("No resolution needed.") | |||
end | |||
elseif p_current == "ù" or p_current == "ū" then | |||
-- mw.log("Fixed [u] found in position " .. i .. ".") | |||
p_Convert("u") | |||
end | |||
-- Resolution of <Vj> to diphthongs | |||
if p_next == "j" and not vowel[p_next2] then | |||
if p_current == "a" then | |||
p_Resolve("ai") | |||
p_RemoveNext() | |||
elseif p_current == "ɤ" then | |||
p_Resolve("ɤi") | |||
p_RemoveNext() | |||
elseif p_current == "ɛ" then | |||
p_Resolve("ei") | |||
p_RemoveNext() | |||
elseif p_current == "ɔː" then | |||
p_Resolve("ɔi") | |||
p_RemoveNext() | |||
end | |||
end | |||
-- Resolution of <ei> and <øi> -- | |||
if p_current == "ei" then | |||
for j = 1, i do | |||
local check_phone = working_phones[i - j] | |||
if boundary[check_phone] or check_phone == nil then | |||
-- mw.log("Initial [ei] found in position" .. i .. ".") | |||
p_Resolve("ai") | |||
break | |||
elseif not consonant[check_phone] then | |||
break | |||
end | |||
end | |||
end | |||
if vowel[p_next] then | |||
-- Resolution of prevocalic <iej>, <uoj> and <yej> -- | |||
if p_current == "iej" or p_current == "yej" or p_current == "uoj" then | |||
-- mw.log("Pre-vocalic <" .. p_current .. "> found in position " .. i .. ".") | |||
if p_current == "uoj" then | |||
p_Resolve("uo") | |||
table.insert(working_phones, i + 1, "j") | |||
-- mw.log("[j] inserted to position " .. i + 1 .. ".") | |||
update_p() | |||
else | |||
p_Resolve("iː") | |||
end | |||
end | |||
-- Resolution of prevocalic [ie] and [uo] -- | |||
if p_current == "ie" then | |||
p_Resolve("iː") | |||
elseif p_current == "uo" and p_next ~= "j" then | |||
p_Resolve("uː") | |||
end | |||
else | |||
-- Resolution of non-prevocalic <iej>, <uoj> and <yej> -- | |||
for toResolve, ResolveKey in pairs({["iej"] = "ei", ["uoj"] = "ɔi", ["yej"] = "øi"}) do | |||
if p_current == toResolve then | |||
p_Resolve(ResolveKey) | |||
end | |||
end | |||
end | |||
-- Resolution of [j] after [i] | |||
if p_current == "j" and (p_prev == "i" or p_prev == "iː" or p_prev == "iːː") then | |||
table.remove(working_phones, i) | |||
i = i - 1 | |||
update_p() | |||
end | |||
end | |||
-- Removes placeholder phones from data -- | |||
for _, temp in ipairs(vowel_temp) do | |||
valid_phone[temp] = nil | |||
vowel[temp] = nil | |||
end | |||
-- mw.log("Vowel resolution result: [" .. table.concat(working_phones,"][") .. "]") | |||
return working_phones | |||
end | end | ||
function resolve_consonants(phones, hj_dv, phon) | function resolve_consonants(phones, hj_dv, phon) | ||
local working_phones = phones | |||
-- mw.log("————— BEGINNING CONSONANT RESOLUTION —————") | |||
local i = 0 | |||
while true do | |||
i = i + 1 | |||
local p_prev = working_phones[i - 1] | |||
local p_current = working_phones[i] | |||
local p_next = working_phones[i + 1] | |||
local p_next2 = working_phones[i + 2] | |||
local p_next3 = working_phones[i + 3] | |||
if p_current == nil then | |||
break | |||
end | |||
local function p_Resolve(p_new) | |||
working_phones[i] = p_new | |||
-- mw.log("[" .. p_current .. "] resolved to [" .. p_new .. "] in position ".. i .. ".") | |||
p_new = "" | |||
end | |||
local function p_Convert(p_new) | |||
working_phones[i] = p_new | |||
-- mw.log("[" .. p_current .. "] converted to [" .. p_new .. "] in position ".. i .. ".") | |||
p_current = p_new | |||
p_new = "" | |||
end | |||
local function p_RemoveNext() | |||
table.remove(working_phones, i + 1) | |||
p_next = working_phones[i + 1] | |||
p_next2 = working_phones[i + 2] | |||
p_next3 = working_phones[i + 3] | |||
end | |||
-- Resolution of iotation -- | |||
if iotate[p_current] and p_next == "j" then | |||
-- mw.log("[j] removed from position " .. i + 1 .. ".") | |||
-- mw.log("[" .. p_current .. "][j] found in position " .. i .. ".") | |||
p_Convert(iotate[p_current]) | |||
p_RemoveNext() | |||
elseif p_current == "ĵ" then | |||
p_Convert("j") | |||
end | |||
-- Resolution of hjádvanþs -- | |||
if not hj_dv then | |||
if p_current == "ç" then | |||
-- mw.log("hj = false:") | |||
p_Convert("ʃ") | |||
elseif p_current == "j" and (boundary[p_prev] or p_prev == nil) and consonant[p_next] then | |||
-- mw.log("hj = false:") | |||
p_Convert("ʒ") | |||
end | |||
end | |||
-- Resolution of (T)Ss, (T)Sș, ts, ds, tș, dș (progressive voicing assimilation) -- | |||
if p_next == "s" then | |||
if sibilant[p_current] then | |||
-- mw.log("[s] removed from position " .. i + 1 .. " following a sibilant.") | |||
p_RemoveNext() | |||
elseif p_current == "t" then | |||
-- mw.log("[t][s] → [ts] in position " .. i .. ".") | |||
p_Convert("ts") | |||
p_RemoveNext() | |||
elseif p_current == "d" then | |||
p_Convert("dz") | |||
p_RemoveNext() | |||
-- mw.log("[d][s] → [dz] in position " .. i .. ".") | |||
end | |||
elseif p_next == "ʃ" then | |||
if sibilant_post[p_current] == true then | |||
-- mw.log("[ʃ] removed from position " .. i + 1 .. " following a postalveolar sibilant.") | |||
p_RemoveNext() | |||
elseif sibilant_alv[p_current] then | |||
-- mw.log("[" .. p_current .. "][ʃ] → [" .. sibilant_alv_to_post[p_current] .. "] in position " .. i .. ".") | |||
p_Convert(sibilant_alv_to_post[p_current]) | |||
p_RemoveNext() | |||
elseif p_current == "t" then | |||
-- mw.log("[t][ʃ] → [tʃ] in position " .. i .. ".") | |||
p_Convert("tʃ") | |||
p_RemoveNext() | |||
elseif p_current == "d" then | |||
p_Convert("dʒ") | |||
p_RemoveNext() | |||
-- mw.log("[d][ʃ] → [dʒ] in position " .. i .. ".") | |||
end | |||
end | |||
-- Resolution of Cv -- | |||
if p_next == "v" and ((Cv_split[p_current] and not vowel[p_prev]) or Cv_fixed[p_current]) then | |||
-- mw.log("Cluster [" .. p_current .. "][v] resolved to [" .. p_current .. "] at position " .. i .. ".") | |||
p_RemoveNext() | |||
end | |||
-- Resolution of regressive voicing assimilation -- | |||
if obstruent[p_current] then | |||
-- mw.log("Obstruent [" .. p_current .. "] found in position " .. i .. ". Searching for cluster.") | |||
local final_i = i | |||
-- find voicing of final obstruent in cluster -- | |||
for k = i + 1, #working_phones do | |||
local check_phone = working_phones[k] | |||
if obstruent[check_phone] and check_phone ~= "v" then -- /v/ is excluded | |||
final_i = k | |||
else | |||
break | |||
end | |||
end | |||
if final_i ~= i then -- if cluster recognised -- | |||
final_obs = working_phones[final_i] | |||
-- mw.log("Final obstruent in cluster is [" .. final_obs .. "], cluster length " .. final_i - i + 1 .. ".") | |||
-- assimilate -- | |||
if obstruent_voiced[p_current] ~= obstruent_voiced[final_obs] then | |||
-- mw.log("Voicing mismatch found in cluster at position " .. i .. ".") | |||
if obstruent_voiced[final_obs] == true then | |||
p_Convert(obstruent_voice[p_current]) | |||
else | |||
p_Convert(obstruent_devoice[p_current]) | |||
end | |||
else | |||
-- mw.log("No voicing mismatch found.") | |||
end | |||
else | |||
-- mw.log("No cluster found.") | |||
end | |||
end | |||
-- tv and kv | |||
if p_current == "v" and (p_prev == "t" or p_prev == "k") then | |||
p_Convert("f") | |||
-- mw.log("[" .. p_prev .. "][v] → [" .. p_prev .. "][f] in position " .. i .. ".") | |||
end | |||
-- j + [i] | |||
local i_initial = {["i"] = true, ["iː"] = true, ["iːː"] = true, ["ie"] = true, ["iːe"] = true} | |||
if p_current == "j" and i_initial[p_next] then | |||
table.remove(working_phones, i) | |||
-- mw.log("[j] removed from position ".. i .. ".") | |||
p_current = working_phones[i] | |||
end | |||
-- [i] + j | |||
local i_oid = {["i"] = true, ["iː"] = true, ["iːː"] = true} | |||
if p_current == "j" and i_oid[p_prev] then | |||
table.remove(working_phones, i) | |||
-- mw.log("[j] removed from position ".. i .. ".") | |||
p_current = working_phones[i] | |||
end | |||
-- Resolution of geminates -- | |||
if | |||
p_next == p_current and (boundary[p_next2] or p_next2 == nil or consonant[p_next2]) and p_next2 ~= "j" and | |||
p_next2 ~= "w" and | |||
consonant[p_current] | |||
then | |||
p_RemoveNext() | |||
-- mw.log("Geminate [" .. p_current .. "] removed in coda position at position " .. i .. ".") | |||
end | |||
-- Removal of apostrophes -- | |||
if p_current == "'" then | |||
table.remove(working_phones, i) | |||
-- mw.log("Apostrophe removed from position ".. i .. ".") | |||
p_current = working_phones[i] | |||
end | |||
-- PHONETIC RESOLUTION -- | |||
if phon then | |||
-- Cv -- | |||
if p_current == "v" and obstruent[p_prev] and p_prev ~= "d" and p_prev ~= "g" then | |||
p_Resolve("ʋ") | |||
-- mw.log("[" .. p_prev .. "][ʋ] → [" .. p_prev .. "][ʋ] in position " .. i .. ".") | |||
end | |||
if p_current == "h" and not vowel[p_next] then -- hC | |||
-- mw.log("phon = true:") | |||
p_Convert("x") | |||
elseif p_current == "h" and p_next == "j" then | |||
-- mw.log("phon = true:") | |||
p_Convert("ç") | |||
p_RemoveNext() | |||
end | |||
if p_current == "n" and p_next == "j" then | |||
-- mw.log("phon = true:") | |||
p_Convert("ɲ") | |||
p_RemoveNext() | |||
end | |||
if p_current == "n" and p_next == "n" and p_next2 == "j" then | |||
-- mw.log("phon = true:") | |||
p_Convert("ɲ") | |||
end | |||
-- | |||
--[[ | |||
if p_current == "r" and p_next == "s" then | if p_current == "r" and p_next == "s" then | ||
Line 939: | Line 1,218: | ||
end | end | ||
]] | ]] if | ||
p_current == "œ" and (p_next == "a" or p_next == "aː" or p_next == "aːː") | |||
then | |||
table.insert(working_phones, i + 1, "w") | |||
end | |||
end | |||
-- Resolution of NK -- | |||
if p_current == "n" and velar[p_next] then | |||
-- mw.log("n + velar cluster found.") | |||
p_Resolve("ŋ") | |||
end | |||
-- RESOLUTION OF MULTIPLE BOUNDARIES -- | |||
if p_current == " " and p_next == "|" then | |||
p_Convert("|") | |||
p_RemoveNext() | |||
elseif p_current == " " and p_next == " " then | |||
p_RemoveNext() | |||
elseif p_current == "|" and (p_next == " " or p_next == "|") then | |||
p_RemoveNext() | |||
end | |||
end | |||
-- Removes placeholder phones from data -- | |||
for _, temp in ipairs(consonant_temp) do | |||
valid_phone[temp] = nil | |||
consonant[temp] = nil | |||
glide[temp] = nil | |||
end | |||
-- mw.log("Consonant resolution result: [" .. table.concat(working_phones,"][") .. "]") | |||
return working_phones | |||
end | end | ||
function get_syllables(phones) | function get_syllables(phones) | ||
local working_phones = phones | |||
local syllables = {} | |||
local working_syllable = { | |||
["onset"] = {}, | |||
["nucleus"] = "", | |||
["coda"] = {} | |||
} | |||
local syllable_no = 1 | |||
local function logSyllable() | |||
local onset = "" | |||
local coda = "" | |||
if working_syllable["onset"] then | |||
onset = table.concat(working_syllable["onset"], " ") | |||
end | |||
if working_syllable["coda"] then | |||
coda = table.concat(working_syllable["coda"], " ") | |||
end | |||
-- mw.log( onset .. " [ " .. working_syllable["nucleus"] .. " ] " .. coda ) | |||
end | |||
-- mw.log("————— BUILDING SYLLABLES —————") | |||
-- division into syllables -- | |||
while #working_phones > 0 do | |||
local p_current = working_phones[1] | |||
local p_next = working_phones[2] | |||
local p_next2 = working_phones[3] | |||
local p_next3 = working_phones[4] | |||
local function register_syllable(check) | |||
if check == true and working_syllable["nucleus"] ~= "" then | |||
syllables[syllable_no] = working_syllable | |||
if #syllables[syllable_no]["onset"] == 0 then | |||
syllables[syllable_no]["onset"] = nil | |||
end | |||
if #syllables[syllable_no]["coda"] == 0 then | |||
syllables[syllable_no]["coda"] = nil | |||
end | |||
-- mw.log("Syllable " .. syllable_no .. " registered:") | |||
logSyllable() | |||
-- mw.log("====================") | |||
syllable_no = syllable_no + 1 | |||
working_syllable = { | |||
["onset"] = {}, | |||
["nucleus"] = "", | |||
["coda"] = {} | |||
} | |||
end | |||
end | |||
local function register_boundary(check) | |||
if check == true then | |||
register_syllable(true) | |||
syllables[syllable_no] = p_current | |||
table.remove(working_phones, 1) | |||
-- mw.log("Boundary syllable " .. syllable_no .. " registered: '" .. p_current .. "'\n====================") | |||
syllable_no = syllable_no + 1 | |||
end | |||
end | |||
local function register_phone(check) | |||
if check == true then | |||
if vowel[p_current] then | |||
-- mw.log("[" .. p_current .. "] is now the working nucleus.") | |||
working_syllable["nucleus"] = p_current | |||
elseif consonant[p_current] then | |||
-- mw.log("[" .. p_current .. "] appended to the working " .. unit .. ".") | |||
local unit = "" | |||
if working_syllable["nucleus"] == "" then | |||
unit = "onset" | |||
else | |||
unit = "coda" | |||
end | |||
table.insert(working_syllable[unit], p_current) | |||
else | |||
error("Invalid input for function register_phone") | |||
end | |||
table.remove(working_phones, 1) | |||
-- mw.log("Current working syllable:") | |||
-- logSyllable() | |||
if #working_phones == 0 then | |||
register_syllable(true) | |||
end | |||
end | |||
end | |||
if consonant[p_current] then | |||
if working_syllable["nucleus"] == "" then | |||
register_phone(true) | |||
else | |||
if vowel[p_next] or (glide[p_next] and vowel[p_next2]) then -- *C*(J)V | |||
register_syllable(true) | |||
register_phone(true) | |||
elseif #working_syllable["coda"] > 0 then -- C | … | |||
local function JV(check) | |||
if check == true then | |||
if vowel[p_next2] or (glide[p_next2] and vowel[p_next3]) then -- C | *C*l(J)V | |||
register_syllable(true) | |||
register_phone(true) | |||
else | |||
register_phone(true) | |||
end | |||
end | |||
end | |||
if Cl[p_current] and p_next == "l" then -- C | *C*l … | |||
JV(true) -- C | *C*l(J)V | |||
elseif Cr[p_current] and p_next == "r" then -- C | *C*r … | |||
JV(true) -- C | *C*r(J)V | |||
elseif Cv[p_current] and (p_next == "v" or p_next == "ʋ" or p_next == "f") then -- C | *C*v … | |||
JV(true) -- C | *C*v(J)V | |||
elseif CN[p_current] and nasal[p_next] then -- C | *C*N … | |||
JV(true) -- C | *C*N(J)V | |||
else | |||
register_phone(true) | |||
end | |||
else | |||
register_phone(true) | |||
end | |||
end | |||
elseif vowel[p_current] then | |||
if working_syllable["nucleus"] == "" then | |||
register_phone(true) | |||
else | |||
register_syllable(true) | |||
register_phone(true) | |||
end | |||
elseif boundary[p_current] or p_current == "-" then | |||
register_boundary(true) | |||
else | |||
error("Unrecognised phone in syllabifier: " .. p_current) | |||
end | |||
end | |||
-- mw.log("STRING EXHAUSTED\n====================") | |||
return syllables | |||
end | end | ||
function tag_syllables(syllables) | function tag_syllables(syllables) | ||
-- mw.log("————— TAGGING SYLLABLES —————") | |||
local syl_count = #syllables | |||
for i = 1, syl_count do | |||
local syllable = syllables[i] | |||
local function tag_syllable(tag, value) | |||
syllables[i][tag] = value | |||
-- mw.log("syllables[" .. i .. "][" .. tag .. "] = " .. tostring(value) ) | |||
end | |||
-- tag checked syllables -- | |||
if type(syllable) == "table" then | |||
if syllable["coda"] == nil then | |||
tag_syllable("checked", false) | |||
else | |||
tag_syllable("checked", true) | |||
end | |||
-- tag long syllables -- | |||
if long[syllable["nucleus"]] then | |||
tag_syllable("long", true) | |||
else | |||
tag_syllable("long", false) | |||
end | |||
-- tag weak syllables (nucleus = [i] [ɤ]) -- | |||
if weak[syllable["nucleus"]] then | |||
tag_syllable("weak", true) | |||
else | |||
tag_syllable("weak", false) | |||
end | |||
-- tag heavy syllables (nucleus is long coda is an obstruent-initial cluster) - | |||
if syllable["coda"] then | |||
if long[syllable["nucleus"]] and affricate[syllable["coda"][1]] then | |||
tag_syllable("heavy", true) | |||
elseif long[syllable["nucleus"]] and obstruent[syllable["coda"][1]] and syllable["coda"][2] then | |||
tag_syllable("heavy", true) | |||
else | |||
tag_syllable("heavy", false) | |||
end | |||
end | |||
-- mw.log("—————") | |||
end | |||
end | |||
return syllables | |||
end | end | ||
function get_stress(syllables) | function get_stress(syllables) | ||
--[[ | |||
Stress in Radestrian is calculated excluding · prefixes, which are stressed as if they were standalone words but with secondary stress. Stress in Radestrian falls on either the first syllable or the second syllable. The first syllable is the default stress position. Second-syllable stress occurs in seven circumstances: | Stress in Radestrian is calculated excluding · prefixes, which are stressed as if they were standalone words but with secondary stress. Stress in Radestrian falls on either the first syllable or the second syllable. The first syllable is the default stress position. Second-syllable stress occurs in seven circumstances: | ||
Line 1,216: | Line 1,485: | ||
]] | ]] | ||
local working_word = {} | |||
local output = {} | |||
local test_n = #syllables | |||
-- mw.log("————— BEGINNING STRESS ASSIGNMENT —————") | |||
while #syllables > 0 do | |||
-- basically an iteration of working words | |||
if type(syllables[1]) == "table" then -- testing for boundary syllable | |||
-- set up current working word by adding and removing the initial syllables from the array until a boundary is registered | |||
for i = 1, test_n do | |||
table.insert(working_word, syllables[1]) | |||
table.remove(syllables, 1) | |||
-- mw.log("Syllable " .. i .. " registered to working word.") | |||
if syllables[1] == nil or type(syllables[1]) == "string" then -- check next syllable is a boundary (now 1 after deletion of registered syllable) | |||
-- mw.log("Boundary found: continuing to stress assignment.") | |||
break | |||
end -- if next syllable is a boundary, register and continue to stress assignment | |||
end | |||
-- INTERNAL FUNCTIONS | |||
local function stress(syl) -- assigns primary or secondary stress to the current working word | |||
if syl == 1 or syl == 2 then | |||
if | |||
(syllables[1] == "-" or output[#output] == "·") and syllables[1] ~= "·" and | |||
output[#output] ~= "-" | |||
then -- check if following · or preceding - ; if so, assign stress regardless of monosyllabicness | |||
-- mw.log("Primary stress assigned to syllable " .. syl .. ".") | |||
working_word[syl]["stress"] = true | |||
elseif #working_word > 1 then -- if monosyllabic, do not add a redundant stress mark | |||
if output[#output] == "-" or syllables[1] == "·" then -- check if following - or preceding · | |||
-- mw.log("Secondary stress assigned to syllable " .. syl .. ".") | |||
working_word[syl]["stress2"] = true | |||
else | |||
-- mw.log("Primary stress assigned to syllable " .. syl .. ".") | |||
working_word[syl]["stress"] = true | |||
end | |||
else | |||
-- mw.log("Implicit primary stress assigned to monosyllable.") | |||
end | |||
-- register the working word | |||
if #working_word > 0 then | |||
-- mw.log("Word registered to output.") | |||
for j = 1, #working_word do | |||
table.insert(output, working_word[j]) | |||
end | |||
working_word = {} | |||
else | |||
error("An empty word cannot be registered") | |||
end | |||
else | |||
error("Empty or invalid input to stress()") | |||
end | |||
end | |||
-- calculate stress on the working word | |||
-- mw.log("Syllables in working word: " .. #working_word) | |||
if #working_word == 0 or working_word == nil then | |||
error("The working word is empty") | |||
elseif | |||
#working_word == 1 or working_word[1]["long"] -- monosyllabic? | |||
then -- long initial | |||
stress(1) | |||
elseif #working_word == 2 then -- disyllabic? | |||
if working_word[1]["checked"] == false and working_word[2]["heavy"] then -- condition 1 | |||
stress(2) | |||
else | |||
stress(1) | |||
end | |||
elseif #working_word == 3 then -- trisyllabic? | |||
if | |||
working_word[2]["long"] or -- condition 2 | |||
((not working_word[2]["weak"]) and working_word[2]["checked"]) -- condition 3 | |||
then | |||
stress(2) | |||
else | |||
stress(1) | |||
end | |||
elseif #working_word >= 4 then -- 4+ syllables | |||
if | |||
working_word[2]["long"] or -- condition 4 | |||
working_word[2]["checked"] or -- condition 5 | |||
not working_word[3]["long"] -- condition 6 | |||
then | |||
stress(2) | |||
else | |||
stress(1) | |||
end | |||
else | |||
error("Invalid syllable count") | |||
end | |||
elseif syllables[1] == nil then | |||
error("Empty syllable found in array. The array should contain no empty syllables") | |||
else -- if boundary is identified as the working word, then | |||
-- mw.log ("Boundary syllable identified.") | |||
table.insert(output, syllables[1]) | |||
-- mw.log("Boundary registered to output: [" .. syllables[1] .. "]") | |||
table.remove(syllables, 1) | |||
end | |||
-- mw.log("—————") | |||
end | |||
-- mw.log("SYLLABLES EXHAUSTED\n==========\nRemoving [·] and [-] from the system.") | |||
local screened = false | |||
while screened == false do | |||
for i = 1, #output do | |||
if output[i] == "-" or output[i] == "·" then | |||
table.remove(output, i) | |||
break | |||
elseif i == #output then | |||
screened = true | |||
end | |||
end | |||
end | |||
return output | |||
end | end | ||
function resolve_syllables(syllables) | function resolve_syllables(syllables) | ||
-- mw.log("————— RESOLVING SYLLABLES —————") | |||
if #syllables > 1 then | |||
for i = 1, #syllables - 1 do | |||
local working_cluster = {} | |||
if | |||
(syllables[i]["coda"] or syllables[i + 1]["onset"]) and syllables[i + 1] and | |||
type(syllables[i]) == "table" and | |||
type(syllables[i + 1]) == "table" | |||
then | |||
-- mw.log("Resolving clusters in syllables " .. i .. " and " .. i + 1 .. ".") | |||
-- get intersyllabic clusters -- | |||
if syllables[i]["coda"] then | |||
for j = 1, #syllables[i]["coda"] do | |||
table.insert(working_cluster, syllables[i]["coda"][j]) | |||
end | |||
end | |||
if syllables[i + 1]["onset"] then | |||
for j = 1, #syllables[i + 1]["onset"] do | |||
table.insert(working_cluster, syllables[i + 1]["onset"][j]) | |||
end | |||
end | |||
-- mw.log("Cluster to be resolved: " .. table.concat(working_cluster)) | |||
local p_last = working_cluster[#working_cluster] | |||
local p_last2 = working_cluster[#working_cluster - 1] | |||
local p_last3 = working_cluster[#working_cluster - 2] | |||
local p_last4 = working_cluster[#working_cluster - 3] | |||
local new_coda = {} | |||
local new_onset = {} | |||
-- splits the current cluster into new coda and new onset and re-assigns them to the syllables -- | |||
local function split(point) | |||
if point < 1 or point > 3 or point == nil then | |||
error("split() must take integer values between 1 to 4 inclusive") | |||
end | |||
for j = #working_cluster + 1 - point, #working_cluster do | |||
table.insert(new_onset, working_cluster[j]) | |||
end | |||
if #working_cluster - point > 0 then | |||
for j = 1, #working_cluster - point do | |||
table.insert(new_coda, working_cluster[j]) | |||
end | |||
end | |||
-- mw.log("σ" .. i .. " | σ" .. i+1 .. " : … " .. table.concat(new_coda) .. " | " .. table.concat(new_onset) .. " …") | |||
syllables[i]["coda"] = new_coda | |||
syllables[i + 1]["onset"] = new_onset | |||
end | |||
-- determine syllable boundary -- | |||
if glide[p_last] and nasal[p_last2] and CN[p_last3] and p_last4 then -- C|CNJ | |||
split(3) | |||
elseif nasal[p_last] and CN[p_last2] and p_last3 then -- C|CN | |||
split(2) | |||
elseif glide[p_last] and p_last2 == "r" and Cr[p_last3] and p_last4 then -- C|CrJ | |||
split(3) | |||
elseif p_last == "r" and Cr[p_last2] and p_last3 then -- C|Cr | |||
split(2) | |||
elseif glide[p_last] and p_last2 == "l" and Cl[p_last3] and p_last4 then -- C|ClJ | |||
split(3) | |||
elseif p_last == "l" and Cl[p_last2] and p_last3 then -- C|Cl | |||
split(2) | |||
elseif | |||
glide[p_last] and (p_last2 == "v" or p_last2 == "ʋ" or p_last2 == "f") and Cv[p_last3] and p_last4 | |||
then -- C|CvJ | |||
split(3) | |||
elseif (p_last == "v" or p_last == "ʋ" or p_last == "f") and Cv[p_last2] and p_last3 then -- C|Cv | |||
split(2) | |||
elseif glide[p_last] and p_last2 then -- C|CJ | |||
split(2) | |||
else -- C|C | |||
split(1) | |||
end | |||
end | |||
end | |||
end | |||
return syllables | |||
end | end | ||
function syllables_to_string(syllables, phon) | function syllables_to_string(syllables, phon) | ||
local output = "" | |||
-- mw.log("————— COMPILING OUTPUT —————") | |||
for i = 1, #syllables do | |||
local onset = "" | |||
local coda = "" | |||
local stress = "" | |||
-- add hiatus marker -- | |||
if syllables[i + 1] then | |||
if | |||
syllables[i]["coda"] == nil and marked_hiatus[syllables[i]["nucleus"]] and | |||
syllables[i + 1]["onset"] == nil and | |||
type(syllables[i + 1]) == "table" | |||
then | |||
if not (syllables[i + 1]["stress"] or syllables[i + 1]["stress2"]) then | |||
syllables[i]["coda"] = {"."} | |||
-- mw.log("Hiatus marked between syllables " .. i .. " and " .. i+1 .. ".") | |||
end | |||
end | |||
end | |||
-- phonetic /a/ backing -- | |||
if phon then | |||
if syllables[i]["nucleus"] == "a" then | |||
if syllables[i + 1] then | |||
if syllables[i + 1]["nucleus"] == "æː" then | |||
syllables[i]["nucleus"] = "ɑ" | |||
end | |||
end | |||
if syllables[i - 1] then | |||
if syllables[i - 1]["nucleus"] == "æː" then | |||
syllables[i]["nucleus"] = "ɑ" | |||
end | |||
end | |||
--[[ | |||
if syllables[i]["nucleus"] == "a" then | if syllables[i]["nucleus"] == "a" then | ||
syllables[i]["nucleus"] = "ä" | syllables[i]["nucleus"] = "ä" | ||
end | end | ||
]]-- | ]] | ||
-- | |||
end | |||
--[[ | |||
if syllables[i]["nucleus"] == "ai" then | if syllables[i]["nucleus"] == "ai" then | ||
syllables[i]["nucleus"] = "äi" | syllables[i]["nucleus"] = "äi" | ||
Line 1,487: | Line 1,751: | ||
syllables[i]["nucleus"] = "uɐ" | syllables[i]["nucleus"] = "uɐ" | ||
end | end | ||
]]-- | ]] | ||
-- | |||
end | |||
-- gemination | |||
if phon and syllables[i] and syllables[i + 1] then | |||
if syllables[i]["coda"] and syllables[i + 1]["onset"] then | |||
if syllables[i]["coda"][#syllables[i]["coda"]] == syllables[i + 1]["onset"][1] then | |||
table.remove(syllables[i]["coda"], #syllables[i]["coda"]) | |||
syllables[i + 1]["onset"][1] = syllables[i + 1]["onset"][1] .. "ː" | |||
end | |||
end | |||
end | |||
-- phonetic nasalisation -- | |||
if phon and syllables[i]["coda"] then | |||
local function get_nasal(check) | |||
if check then | |||
-- mw.log("Phonetic nasalisation in syllable " .. i .. ": " .. syllables[i]["nucleus"] .. " → " .. nasalise[syllables[i]["nucleus"]]) | |||
syllables[i]["nucleus"] = nasalise[syllables[i]["nucleus"]] | |||
table.remove(syllables[i]["coda"], 1) | |||
end | |||
end | |||
if syllables[i]["coda"][1] == "ɲ" then | |||
if fricative[syllables[i]["coda"][2]] then | |||
get_nasal(true) | |||
table.insert(syllables[i]["coda"], 1, "j̃") | |||
elseif syllables[i + 1] then | |||
if syllables[i + 1]["onset"] then | |||
if syllables[i]["coda"][2] == nil and fricative[syllables[i + 1]["onset"][1]] then | |||
get_nasal(true) | |||
table.insert(syllables[i]["coda"], 1, "j̃") | |||
end | |||
end | |||
end | |||
elseif syllables[i]["coda"][1] == "n" then | |||
if fricative[syllables[i]["coda"][2]] then | |||
get_nasal(true) | |||
elseif syllables[i + 1] then | |||
if syllables[i + 1]["onset"] then | |||
if syllables[i]["coda"][2] == nil and fricative[syllables[i + 1]["onset"][1]] then | |||
get_nasal(true) | |||
end | |||
end | |||
end | |||
elseif syllables[i]["coda"][1] == "ŋ" then | |||
if syllables[i]["coda"][2] == "h" or syllables[i]["coda"][2] == "x" or syllables[i]["coda"][2] == "ɣ" then | |||
get_nasal(true) | |||
elseif syllables[i + 1] then | |||
if syllables[i + 1]["onset"] then | |||
if | |||
syllables[i]["coda"][2] == nil and | |||
(syllables[i + 1]["onset"][1] == "h" or syllables[i + 1]["onset"][1] == "x" or | |||
syllables[i + 1]["onset"][1] == "ɣ") | |||
then | |||
get_nasal(true) | |||
end | |||
end | |||
end | |||
end | |||
end | |||
if type(syllables[i]) == "table" then | |||
if syllables[i]["onset"] then | |||
onset = table.concat(syllables[i]["onset"]) | |||
end | |||
if syllables[i]["coda"] then | |||
coda = table.concat(syllables[i]["coda"]) | |||
end | |||
if syllables[i]["stress"] then | |||
stress = "ˈ" | |||
end | |||
if syllables[i]["stress2"] then | |||
stress = "ˌ" | |||
end | |||
output = output .. stress .. onset .. syllables[i]["nucleus"] .. coda | |||
elseif syllables[i] == " " then | |||
output = output .. " " | |||
elseif syllables[i] == "|" then | |||
output = output .. " | " | |||
elseif syllables[i] == "‿" then | |||
output = output .. "‿" | |||
else | |||
error("Unknown phone identified: " .. syllables[i]) | |||
end | |||
end | |||
-- post rectifications -- | |||
output = mw.ustring.gsub(output, "(ˈˌ)", "ˌ") | |||
-- vowel length resolution | |||
output = mw.ustring.gsub(output, "i%.iː?e", "iːe") | |||
output = mw.ustring.gsub(output, "i%.iːː?", "iːː") | |||
output = mw.ustring.gsub(output, "i%.i", "iː") | |||
output = mw.ustring.gsub(output, "iːiː?e", "iːe") | |||
output = mw.ustring.gsub(output, "iːiː*", "iːː") | |||
output = mw.ustring.gsub(output, "u%.uː?o", "uːo") | |||
output = mw.ustring.gsub(output, "u%.uːː?", "uːː") | |||
output = mw.ustring.gsub(output, "u%.u", "uː") | |||
output = mw.ustring.gsub(output, "uːuː?o", "uːo") | |||
output = mw.ustring.gsub(output, "uːuː*", "uːː") | |||
output = mw.ustring.gsub(output, "([^œɔui])a%.aːː?", "%1aːː") | |||
output = mw.ustring.gsub(output, "([^œɔui])a%.a([^iu])", "%1aː%2") | |||
output = mw.ustring.gsub(output, "aːaː*", "aːː") | |||
return output | |||
end | end | ||
function export.get_rhyme(IPA) | function export.get_rhyme(IPA) | ||
if mw.ustring.find(IPA, "([ %|])") then | |||
return nil | |||
end | |||
if mw.ustring.find(IPA, "(ˈ)") then | |||
IPA = mw.ustring.gsub(IPA, "(ˌ)", "") | |||
IPA = mw.ustring.gsub(IPA, "^(.*ˈ)", "") | |||
end | |||
while true do | |||
local check = mw.ustring.sub(IPA, 1, 1) | |||
if consonant[check] then | |||
IPA = mw.ustring.sub(IPA, 2) | |||
else | |||
break | |||
end | |||
end | |||
return IPA | |||
end | end | ||
function export.generate(frame) | function export.generate(frame) | ||
local args = getArgs(frame) | |||
local outputIPA = args[1] | |||
local parameters = {} | |||
local p = 2 | |||
-- mw.log("——— Parameters ———") | |||
while args[p] do | |||
parameters[args[p]] = true | |||
-- mw.log(args[p] .. " = true") | |||
p = p + 1 | |||
end | |||
process_temp() | |||
outputIPA = mw.ustring.gsub(outputIPA, "(% %;)", " ") | |||
local is_prefix = mw.ustring.sub(outputIPA, -1) == "-" or mw.ustring.sub(outputIPA, -1) == "·" | |||
local is_suffix = mw.ustring.sub(outputIPA, 1, 1) == "-" | |||
outputIPA = generate_IPA(outputIPA) | |||
outputIPA = resolve_vowels(outputIPA) | |||
outputIPA = resolve_consonants(outputIPA, parameters["hj"], parameters["phon"]) | |||
outputIPA = get_syllables(outputIPA) | |||
outputIPA = tag_syllables(outputIPA) | |||
outputIPA = get_stress(outputIPA) | |||
outputIPA = resolve_syllables(outputIPA) | |||
outputIPA = syllables_to_string(outputIPA, parameters["phon"]) | |||
if parameters["rhyme"] then | |||
outputIPA = export.get_rhyme(outputIPA) | |||
end | |||
if outputIPA == nil then | |||
-- mw.log("Invalid rhyme request.") | |||
return nil | |||
else | |||
if is_prefix then | |||
outputIPA = outputIPA .. "-" | |||
end | |||
if is_suffix then | |||
outputIPA = "-" .. outputIPA | |||
outputIPA = mw.ustring.gsub(outputIPA, "(ˌ)", "") | |||
end | |||
local nolarge = "" | |||
if parameters["nolarge"] then | |||
nolarge = " nolarge" | |||
end | |||
if parameters["phon"] and parameters["format"] then | |||
outputIPA = '<span class="IPA' .. nolarge .. '">[' .. outputIPA .. "]</span>" | |||
elseif parameters["format"] then | |||
outputIPA = '<span class="IPA' .. nolarge .. '">/' .. outputIPA .. "/</span>" | |||
end | |||
-- mw.log(outputIPA) | |||
return outputIPA | |||
end | |||
end | end | ||