Module:rad-IPA: Difference between revisions

From Laenkea
Jump to navigation Jump to search
(Added syllabification)
(Added syllable tagging)
Line 101: Line 101:
[false] = "i",
[false] = "i",
},
},
["ỳ"] = "",
["ỳ"] = "y",
["z"] = "z",
["z"] = "z",
["þ"] = "θ",
["þ"] = "θ",
Line 196: Line 196:
["z"] = "z", ["ʒ"] = "ʒ", ["θ"] = "ð",  
["z"] = "z", ["ʒ"] = "ʒ", ["θ"] = "ð",  
}
}
local affricate = {
["dz"] = true, ["dʒ"] = true ,
["ts"] = true, ["tʃ"] = true,
}
local sibilant = {
local sibilant = {
Line 311: Line 316:
vowel[temp] = true
vowel[temp] = true
end
end
local long = {
["a"] = false, ["aː"] = true, ["aːː"] = true, ["æː"] = true, ["ai"] = true, ["au"] = true,
["eː"] = true, ["ei"] = true, ["ɛ"] = false, ["ɤ"] = false, ["ɤi"] = true, ["i"] = false,
["iː"] = true, ["iːː"] = true, ["iːe"] = true, ["ia"] = true, ["ie"] = true, ["oː"] = true,
["œ"] = false, ["œa"] = true, ["øi"] = true, ["ɔ"] = false, ["ɔː"] = true, ["ɔa"] = true,
["ɔi"] = true, ["u"] = false, ["uː"] = true, ["uːː"] = true, ["uːo"] = true, ["ua"] = true,
["uo"] = true, ["y"] = false,
}
local weak = {
["a"] = false, ["aː"] = false, ["aːː"] = false, ["æː"] = false, ["ai"] = false, ["au"] = false,
["eː"] = false, ["ei"] = false, ["ɛ"] = false, ["ɤ"] = true, ["ɤi"] = false, ["i"] = true,
["iː"] = false, ["iːː"] = false, ["iːe"] = false, ["ia"] = false, ["ie"] = false, ["oː"] = false,
["œ"] = false, ["œa"] = false, ["øi"] = false, ["ɔ"] = false, ["ɔː"] = false, ["ɔa"] = false,
["ɔi"] = false, ["u"] = false, ["uː"] = false, ["uːː"] = false, ["uːo"] = false, ["ua"] = false,
["uo"] = false, ["y"] = false,
}
-- MISC --
-- MISC --
Line 330: Line 353:
if mw.ustring.match(s, toReplace) then
if mw.ustring.match(s, toReplace) then
s = mw.ustring.gsub(s, toReplace, ReplaceKey)
s = mw.ustring.gsub(s, toReplace, ReplaceKey)
mw.log("Irregular spelling <" .. toReplace .. "> recognised. Treating as <" .. ReplaceKey .. ">.")
-- generate_IPA: mw.log("Irregular spelling <" .. toReplace .. "> recognised. Treating as <" .. ReplaceKey .. ">.")
end
end
end
end
Line 380: Line 403:
  -- generate_IPA: mw.log("Tabular index recognised.")
  -- generate_IPA: mw.log("Tabular index recognised.")
  for j = 1, 0 - i do
  for j = 1, 0 - i do
--[[
local currentCombo = ""
local currentCombo = ""
-- for logs only
if i == -2 and j == 2 then
if i == -2 and j == 2 then
currentCombo = split_s[s_len + i + j - 2] .. " + " .. split_s[s_len + i + j - 1] .. " + " .. split_s[s_len + i + j]
currentCombo = split_s[s_len + i + j - 2] .. " + " .. split_s[s_len + i + j - 1] .. " + " .. split_s[s_len + i + j]
Line 387: Line 410:
currentCombo = split_s[s_len + i + j - 1] .. " + " .. split_s[s_len + i + j]
currentCombo = split_s[s_len + i + j - 1] .. " + " .. split_s[s_len + i + j]
end
end
-- generate_IPA: mw.log("Testing " .. currentCombo) ]]
-- generate_IPA: mw.log("Testing " .. currentCombo)
--
if getData[split_s[s_len + i + j]] then
if getData[split_s[s_len + i + j]] then
  getData = getData[split_s[s_len + i + j]]
  getData = getData[split_s[s_len + i + j]]
Line 480: Line 504:
-- Resolution of [u], [ù] and [ū] --
-- Resolution of [u], [ù] and [ū] --
if p_current == "u" then
if p_current == "u" then
mw.log("[u] found in position " .. i .. ".")
mw.log("[u] found in position " .. i .. ". Testing for resolution.")
if consonant[p_next] then
if consonant[p_next] then
if glide[p_next2] then
if glide[p_next2] then
Line 486: Line 510:
mw.log("ɤCj!V environment identified.")
mw.log("ɤCj!V environment identified.")
p_Resolve("ɤ")
p_Resolve("ɤ")
else
mw.log("No resolution needed.")
end
end
elseif not vowel[p_next2] and not glide[p_next2] then
elseif not vowel[p_next2] and not glide[p_next2] then
mw.log("ɤC!V environment identified.")
mw.log("ɤC!V environment identified.")
p_Resolve("ɤ")
p_Resolve("ɤ")
else
mw.log("No resolution needed.")
end
end
else
mw.log("No resolution needed.")
end
end
elseif p_current == "ù" or p_current == "ū" then
elseif p_current == "ù" or p_current == "ū" then
Line 550: Line 580:
end
end
 
if boundary[p_current] then
mw.log("——— word boundary detected ———")
end
end
end
Line 710: Line 736:
p_RemoveNext()
p_RemoveNext()
mw.log("Geminate [" .. p_current .. "] removed in final position at position " .. i .. ".")
mw.log("Geminate [" .. p_current .. "] removed in final position at position " .. i .. ".")
end
if boundary[p_current] then
mw.log("——— word boundary detected ———")
end
end
Line 755: Line 777:
end
end
mw.log("————— BEGINNING SYLLABIFICATION —————")
mw.log("————— BUILDING SYLLABLES —————")
-- division into syllables --
-- division into syllables --
Line 775: Line 797:
mw.log("Syllable " .. syllable_no .. " registered:")
mw.log("Syllable " .. syllable_no .. " registered:")
logSyllable()
logSyllable()
mw.log("===== NEW SYLLABLE=====")
mw.log("====================")
syllable_no = syllable_no + 1
syllable_no = syllable_no + 1
working_syllable = {
working_syllable = {
Line 790: Line 812:
syllables[syllable_no] = p_current
syllables[syllable_no] = p_current
table.remove(working_phones, 1)
table.remove(working_phones, 1)
mw.log("Boundary syllable " .. syllable_no .. " registered: '" .. p_current .. "'.")
mw.log("Boundary syllable " .. syllable_no .. " registered: '" .. p_current .. "'\n====================")
syllable_no = syllable_no + 1
syllable_no = syllable_no + 1
end
end
Line 813: Line 835:
end
end
table.remove(working_phones, 1)
table.remove(working_phones, 1)
mw.log("Current test string: " .. table.concat(working_phones))
-- mw.log("Current working syllable:")
mw.log("Current working syllable:")
-- logSyllable()
logSyllable()
if #working_phones == 0 then
if #working_phones == 0 then
Line 822: Line 843:
end
end
end
end
mw.log(" ========= ")
mw.log("Current test string: " .. table.concat(working_phones))
if consonant[p_current] then
if consonant[p_current] then
Line 875: Line 893:
end
end


mw.log("——— STRING EXHAUSTED ———")
mw.log("STRING EXHAUSTED\n====================")
return syllables
end
 
function tag_syllables(syllables)
 
mw.log("————— TAGGING SYLLABLES —————")
local syl_count = #syllables
for i = 1, syl_count do
local syllable = syllables[i]
local function tag_syllable(tag, value)
syllables[i][tag] = value
mw.log("syllables[" .. i .. "][" .. tag .. "] = " .. tostring(value) )
end
-- tag checked syllables --
if type(syllable) == "table" then
if syllable["coda"] == nil then
tag_syllable("checked", false)
else
tag_syllable("checked", true)
end
-- tag long syllables --
if long[syllable["nucleus"]] then
tag_syllable("long", true)
else
tag_syllable("long", false)
end
-- tag weak syllables (nucleus = [i] [ɤ]) --
if weak[syllable["nucleus"]] then
tag_syllable("weak", true)
else
tag_syllable("weak", false)
end
-- tag heavy syllables (nucleus is long coda is an obstruent-initial cluster) -
if syllable["coda"] then
if long[syllable["nucleus"]] and affricate[syllable["coda"][1]] then
tag_syllable("heavy", true)
elseif long[syllable["nucleus"]] and obstruent[syllable["coda"][1]] and syllable["coda"][2] then
tag_syllable("heavy", true)
else
tag_syllable("heavy", false)
end
end
 
mw.log("—————")
end
end
return syllables
return syllables
Line 891: Line 968:
outputIPA = resolve_consonants(outputIPA, hj)
outputIPA = resolve_consonants(outputIPA, hj)
outputIPA = get_syllables(outputIPA)
outputIPA = get_syllables(outputIPA)
outputIPA = tag_syllables(outputIPA)
-- test output only --
-- test output only --
local test_output = ""
for i = 1, #outputIPA do
for i = 1, #outputIPA do
local onset = ""
local onset = ""
Line 907: Line 987:
end
end
mw.log("Syllable " .. i .. ": " .. onset .. " [ " .. outputIPA[i]["nucleus"] .. " ] " .. coda )
test_output = test_output .. " " .. onset .. " [ " .. outputIPA[i]["nucleus"] .. " ] " .. coda
else
else
mw.log("Boundary syllable " .. i .. ": '" .. outputIPA[i] .. "'")
test_output = test_output .. " " .. outputIPA[i]
end
end
end
end
mw.log(test_output)
-- return outputIPA
-- return outputIPA
end
end

Revision as of 19:24, 19 August 2023

See {{rad-IPA}}.


local export = {}
local getArgs = require('Module:Arguments').getArgs

-- DATA --

local data = {
	["a"] = {
		["i"] = "ai",
		["o"] = {
			["i"] = "ɔi",
			[false] = "ɔː",
		},
		["u"] = "au",
		[false] = "a",
	},
	["á"] = "aː",
	["ả"] = "aːː",
	["â"] = "ɤ",
	["âi"] = "ɤi",
	["b"] = "b",
	["c"] = "ts",
	["d"] = {
		["x"] = "dʒ",
		["z"] = "dz",
		[false] = "d",
	},
	["ð"] = "ð",
	["e"] = {
		["a"] = "æː",
		["i"] = "ei",
		[false] = "ɛ",
	},
	["é"] = "eː",
	["f"] = "f",
	["g"] = "ɡ",
	["h"] = "h",
	["ħ"] = "ɣ",
	["i"] = {
		["e"] = {
			["a"] = "ia",
			["j"] = "iej",
			[false] = "ie",
		},
		[false] = "i",
	},
	["í"] = "iː",
	["ỉ"] = {
		["e"] = "iːe",
		[false] = "iːː",
	},
	["j"] = "j",
	["ĵ"] = "ĵ",
	["k"] = "k",
	["ķ"] = "tʃ",
	["l"] = "l",
	["m"] = "m",
	["n"] = "n",
	["ņ"] = "ŋ",
	["o"] = {
		["a"] = "ɔa",
		[false] = "ɔ",
	},
	["ó"] = "oː",
	["ø"] = {
		["a"] = "œa",
		["i"] = "ei",
		[false] = "œ",
	},
	["p"] = "p",
	["q"] = "k",
	["r"] = "r",
	["s"] = "s",
	["ș"] = "ʃ",
	["t"] = "t",
	["u"] = {
		["i"] = "ɤi",
		["o"] = {
			["a"] = "ua",
			["j"] = "uoj",
			[false] = "uo",
		},
		[false] = "u",
	},
	["ù"] = "ù",
	["û"] = "ɤ",
	["ú"] = "uː",
	["ủ"] = {
		["o"] = "uːo",
		[false] = "uːː",
	},
	["ū"] = "ū",
	["v"] = "v",
	["w"] = "w",
	["x"] = "ʒ",
	["y"] = {
		["e"] = {
			["a"] = "ia",
			["j"] = "yej",
			[false] = "ie",
		},
		[false] = "i",
	},
	["ỳ"] = "y",
	["z"] = "z",
	["þ"] = "θ",
	["·"] = "·",
	["-"] = "-",
	[" "] = " ",
	["."] = "|",
}
data["à"] = data["a"]
data["è"] = data["e"]
data["ì"] = data["i"]
data["ò"] = data["o"]
data["ý"] = data["í"]
data["ỷ"] = data["ỉ"]

data[","] = data["."]
data[":"] = data["."]
data[";"] = data["."]
data["!"] = data["."]
data["?"] = data["."]

local irregular = {
	["eurú"] = "ørú",
	["eurov"] = "ørov",
	["heņre"] = "heņgre",
	["nrao"] = "drao",
	["ryņl"] = "ryņgl",
}

-- ALL PHONES --
local valid_phone = {
	["a"] = true, ["aː"] = true, ["aːː"] = true, ["æː"] = true, ["ai"] = true,
	["au"] = true, ["b"] = true, ["ç"] = true, ["d"] = true, ["ð"] = true,
	["dz"] = true, ["dʒ"] = true, ["eː"] = true, ["ei"] = true, ["ɛ"] = true,
	["ɤ"] = true, ["ɤi"] = true, ["f"] = true, ["ɡ"] = true, ["ɣ"] = true,
	["h"] = true, ["i"] = true, ["iː"] = true, ["iːː"] = true, ["iːe"] = true,
	["ia"] = true, ["ie"] = true, ["j"] = true, ["k"] = true, ["l"] = true,
	["m"] = true, ["n"] = true, ["ŋ"] = true, ["oː"] = true, ["œ"] = true,
	["œa"] = true, ["øi"] = true, ["ɔ"] = true, ["ɔː"] = true, ["ɔa"] = true,
	["ɔi"] = true, ["p"] = true, ["r"] = true, ["s"] = true, ["ʃ"] = true,
	["t"] = true, ["ts"] = true, ["tʃ"] = true, ["u"] = true, ["uː"] = true,
	["uːː"] = true, ["uːo"] = true, ["ua"] = true, ["uo"] = true, ["v"] = true,
	["w"] = true, ["y"] = true, ["z"] = true, ["ʒ"] = true, ["θ"] = true, 
}
	local valid_phone_temp = {"iːj", "uːj", "yːj", "ù", "ū", "ĵ"}
		for _, temp in ipairs(valid_phone_temp) do
			valid_phone[temp] = true
		end
		
-- CONSONANT GROUPS --

local consonant = {
	["b"] = true, ["ç"] = true, ["d"] = true, ["ð"] = true, ["dz"] = true,
	["dʒ"] = true, ["f"] = true, ["ɡ"] = true, ["ɣ"] = true, ["h"] = true,
	["j"] = true, ["k"] = true, ["l"] = true, ["m"] = true, ["n"] = true,
	["ŋ"] = true, ["p"] = true, ["r"] = true, ["s"] = true, ["ʃ"] = true,
	["t"] = true, ["ts"] = true, ["tʃ"] = true, ["v"] = true, ["w"] = true,
	["z"] = true, ["ʒ"] = true, ["θ"] = true, 
}
	local consonant_temp = {"ĵ"}
		for _, temp in ipairs(consonant_temp) do
			consonant[temp] = true
		end
		
local obstruent = {
	["b"] = true, ["ç"] = true, ["d"] = true, ["ð"] = true, ["dz"] = true,
	["dʒ"] = true, ["f"] = true, ["ɡ"] = true, ["ɣ"] = true, ["h"] = true,
	["k"] = true, ["p"] = true, ["s"] = true, ["ʃ"] = true,
	["t"] = true, ["ts"] = true, ["tʃ"] = true, ["v"] = true,
	["z"] = true, ["ʒ"] = true, ["θ"] = true, 
}

	local obstruent_voiced = {
		["b"] = true, ["ç"] = false, ["d"] = true, ["ð"] = true, ["dz"] = true,
		["dʒ"] = true, ["f"] = false, ["ɡ"] = true, ["ɣ"] = true, ["h"] = false,
		["k"] = false, ["p"] = false, ["s"] = false, ["ʃ"] = false,
		["t"] = false, ["ts"] = false, ["tʃ"] = false, ["v"] = true,
		["z"] = true, ["ʒ"] = true, ["θ"] = false, 
	}
	
		local obstruent_devoice = {
			["b"] = "p", ["ç"] = "ç", ["d"] = "t", ["ð"] = "θ", ["dz"] = "ts",
			["dʒ"] = "tʃ", ["f"] = "f", ["ɡ"] = "k", ["ɣ"] = "h", ["h"] = "h",
			["k"] = "k", ["p"] = "p", ["s"] = "s", ["ʃ"] = "ʃ",
			["t"] = "t", ["ts"] = "ts", ["tʃ"] = "tʃ", ["v"] = "f",
			["z"] = "s", ["ʒ"] = "ʃ", ["θ"] = "θ", 
		}
		
		local obstruent_voice = {
			["b"] = "b", ["ç"] = "j", ["d"] = "d", ["ð"] = "ð", ["dz"] = "dz",
			["dʒ"] = "dʒ", ["f"] = "v", ["ɡ"] = "ɡ", ["ɣ"] = "ɣ", ["h"] = "h",
			["k"] = "ɡ", ["p"] = "b", ["s"] = "z", ["ʃ"] = "ʒ",
			["t"] = "d", ["ts"] = "dz", ["tʃ"] = "dʒ", ["v"] = "v",
			["z"] = "z", ["ʒ"] = "ʒ", ["θ"] = "ð", 
		}
		
local affricate = {
	["dz"] = true, ["dʒ"] = true ,
	["ts"] = true, ["tʃ"] = true,
}
		
local sibilant = {
	["dz"] = true, ["dʒ"] = true ,
	["s"] = true, ["ʃ"] = true,
	["ts"] = true, ["tʃ"] = true,
	["z"] = true, ["ʒ"] = true,
}

	local sibilant_alv = {
		["dz"] = true,
		["s"] = true,
		["ts"] = true,
		["z"] = true,
	}

	local sibilant_post = {
		["dʒ"] = true,
		["ʃ"] = true,
		["tʃ"] = true,
		["ʒ"] = true,
	}
	
	local sibilant_alv_to_post = {
		["dz"] = "dʒ",
		["s"] = "ʃ",
		["ts"] = "tʃ",
		["z"] = "ʒ",
	}

local nasal = {
	["m"] = true, ["n"] = true, ["ŋ"] = true, 
}
		
local glide = {
	["j"] = true, ["w"] = true,
}
	local glide_temp = {"ĵ"}
		for _, temp in ipairs(glide_temp) do
			glide[temp] = true
		end
		
local iotate = {
	["d"] = "dʒ", ["dz"] = "dʒ",
	["ɡ"] = "j", ["ɣ"] = "j",
	["h"] = "ç",
	["k"] = "tʃ",
	["l"] = "j",
	["s"] = "ʃ",
	["t"] = "tʃ", ["ts"] = "tʃ",
	["z"] = "ʒ",
}

local Cv_fixed = { -- <Cv> combinations that uniformly pronounced without /v/ --
	["b"] = true,
	["f"] = true,
	["p"] = true, 
}

local Cv_split = { -- <Cv> combinations that are only pronounced with /v/ post-vocalically --
	["m"] = true, ["n"] = true, 
}

local Cr = { -- consonants that can precede /r/ in a medial onset --
	["p"] = true, ["k"] = true, ["b"] = true, ["ɡ"] = true, 
	
	["ç"] = true, ["ð"] = true,
	["f"] = true, ["ɣ"] = true, ["h"] = true,
	["v"] = true,
	["θ"] = true, 
	
	["t"] = true, ["d"] = true,
}

local Cl = { -- consonants that can precede /l/ in a medial onset --
	["p"] = true, ["k"] = true, ["b"] = true, ["ɡ"] = true, 
	
	["ç"] = true, ["ð"] = true,
	["f"] = true, ["ɣ"] = true, ["h"] = true,
	["v"] = true,
	["θ"] = true, 
	
	["s"] = true, ["ʃ"] = true, ["z"] = true, ["ʒ"] = true, 
}

local Cv = { -- consonants that can precede /v/ in a medial onset --
	["ç"] = true, ["d"] = true, ["ð"] = true, ["dz"] = true,
	["dʒ"] = true, ["ɡ"] = true, ["ɣ"] = true, ["h"] = true,
	["k"] = true, ["l"] = true, ["r"] = true, ["s"] = true, ["ʃ"] = true,
	["t"] = true, ["ts"] = true, ["tʃ"] = true,
	["z"] = true, ["ʒ"] = true, ["θ"] = true,
}

local continuant = { -- consonants that can precede nasals in a medial onset -- (non-glide continuants)
	["ç"] = true, ["ð"] = true,
	["f"] = true, ["ɣ"] = true, ["h"] = true,
	["l"] = true, ["r"] = true, ["s"] = true, ["ʃ"] = true,
	["v"] = true,
	["z"] = true, ["ʒ"] = true, ["θ"] = true, 
}
	local CN = continuant

-- VOWEL GROUPS --

local vowel = {
	["a"] = true, ["aː"] = true, ["aːː"] = true, ["æː"] = true, ["ai"] = true, ["au"] = true,
	["eː"] = true, ["ei"] = true, ["ɛ"] = true, ["ɤ"] = true, ["ɤi"] = true, ["i"] = true,
	["iː"] = true, ["iːː"] = true, ["iːe"] = true, ["ia"] = true, ["ie"] = true, ["oː"] = true,
	["œ"] = true, ["œa"] = true, ["øi"] = true, ["ɔ"] = true, ["ɔː"] = true, ["ɔa"] = true,
	["ɔi"] = true, ["u"] = true, ["uː"] = true, ["uːː"] = true, ["uːo"] = true, ["ua"] = true,
	["uo"] = true, ["y"] = true,
}
	local vowel_temp = {"iːj", "uːj", "yːj", "ù", "ū"}
		for _, temp in ipairs(vowel_temp) do
			vowel[temp] = true
		end
		
local long = {
	["a"] = false, ["aː"] = true, ["aːː"] = true, ["æː"] = true, ["ai"] = true, ["au"] = true,
	["eː"] = true, ["ei"] = true, ["ɛ"] = false, ["ɤ"] = false, ["ɤi"] = true, ["i"] = false,
	["iː"] = true, ["iːː"] = true, ["iːe"] = true, ["ia"] = true, ["ie"] = true, ["oː"] = true,
	["œ"] = false, ["œa"] = true, ["øi"] = true, ["ɔ"] = false, ["ɔː"] = true, ["ɔa"] = true,
	["ɔi"] = true, ["u"] = false, ["uː"] = true, ["uːː"] = true, ["uːo"] = true, ["ua"] = true,
	["uo"] = true, ["y"] = false,
}

local weak = {
	["a"] = false, ["aː"] = false, ["aːː"] = false, ["æː"] = false, ["ai"] = false, ["au"] = false,
	["eː"] = false, ["ei"] = false, ["ɛ"] = false, ["ɤ"] = true, ["ɤi"] = false, ["i"] = true,
	["iː"] = false, ["iːː"] = false, ["iːe"] = false, ["ia"] = false, ["ie"] = false, ["oː"] = false,
	["œ"] = false, ["œa"] = false, ["øi"] = false, ["ɔ"] = false, ["ɔː"] = false, ["ɔa"] = false,
	["ɔi"] = false, ["u"] = false, ["uː"] = false, ["uːː"] = false, ["uːo"] = false, ["ua"] = false,
	["uo"] = false, ["y"] = false,
}
		
-- MISC --

local boundary = {
	[" "] = true, ["|"] = true, ["·"] = true,
}

-- PROCESS FUNCTIONS --

local IPA = {}

function generate_IPA(word)
	local s = mw.ustring.lower(word)
	
	-- Replace irregulars --
	
	for toReplace, ReplaceKey in pairs(irregular) do
		if mw.ustring.match(s, toReplace) then
			s = mw.ustring.gsub(s, toReplace, ReplaceKey)
			-- generate_IPA: mw.log("Irregular spelling <" .. toReplace .. "> recognised. Treating as <" .. ReplaceKey .. ">.")
		end
	end
	
	local s_len = mw.ustring.len(s)
	IPA = {}

	local split_s = {}
	for i = 1, s_len do
	  split_s[i] = mw.ustring.sub(s, i,i)
	end
	
	-- generate_IPA: mw.log("————— BEGINNING BASE GENERATION —————")

	if s_len == 0 then
		error("Empty input.")
	end

	while s_len > 0 do
		local getData = {}
		local multiMatch = false
		local i_iteration = -2

		-- generate_IPA: mw.log("=========================\nCURRENT TEST STRING: <".. mw.ustring.upper(s) .. ">")

		if s_len < 3 then
			i_iteration = 1 - s_len
		end

		for i = i_iteration, 0 do
		  -- generate_IPA: mw.log("————— <" .. split_s[s_len + i] .. "> selected. (i = " .. i .. ") —————")
		  getData = data[split_s[s_len + i]]
		  local deadEnd = false
		  
		  if data[split_s[s_len + i]] == nil then
			error("'" .. split_s[s_len + i] .. "' is an invalid character.")
		  end
		  
		  while type(getData) == "table" do
			if i == 0 then
				if getData[false] then
				  -- generate_IPA: mw.log("Singular index recognised.")
				  getData = getData[false]
				  -- generate_IPA: mw.log("Index acquired: " .. getData)
				else
				  error(split_s[s_len] .. " is an invalid character.")
				end
			else
			  -- generate_IPA: mw.log("Tabular index recognised.")
			  for j = 1, 0 - i do
				local currentCombo = ""
				-- for logs only
				if i == -2 and j == 2 then
					currentCombo = split_s[s_len + i + j - 2] .. " + " .. split_s[s_len + i + j - 1] .. " + " .. split_s[s_len + i + j]
					else
					currentCombo = split_s[s_len + i + j - 1] .. " + " .. split_s[s_len + i + j]
				end
				-- generate_IPA: mw.log("Testing " .. currentCombo)
				--
				if getData[split_s[s_len + i + j]] then
				  getData = getData[split_s[s_len + i + j]]
				  -- generate_IPA: mw.log("Combination recognised: " .. currentCombo .. " (j = " .. j .. ")")
				  if type(getData) == "string" then
					
					if j + i == 0 then
					  -- generate_IPA: mw.log("Index acquired: " .. getData)
					  multiMatch = true
					  break
					else
					  -- generate_IPA: mw.log('Non-final index: dead end.')
					  getData = {}
					  deadEnd = true
					  break
					end
					
				  elseif j + i == 0 and getData[false] then
					getData = getData[false]
					-- generate_IPA: mw.log("Index acquired: " .. getData)
					multiMatch = true
					break
				  elseif j + i == 0 and not getData[false] then
					error("data[" .. table.concat(getData, "][") .. "][false] is missing." )
				  else
					-- generate_IPA: mw.log("Target still tabular: reiterating.")
				  end
				  else
					-- generate_IPA: mw.log('Dead end.')
					getData = {}
					deadEnd = true
					break
				end
			  end
			  if type(getData) == "table" then break end
			end
		  end
		  if type(getData) == "string" and (i == 0 or multiMatch == true) then
			  -- generate_IPA: mw.log("Target acquired of length " .. 1 - i .. ", converting to [" .. getData .. "].")
			  table.insert(IPA, 1, getData)
			  s = mw.ustring.sub(s, 1, s_len + i - 1)
			  s_len = mw.ustring.len(s)
			  break
			elseif deadEnd == false then
				-- generate_IPA: mw.log('Non-final index: dead end.')
		  end
		end
	end

	-- generate_IPA: mw.log('————— STRING EXHAUSTED —————')
	mw.log("Base generation result: [" .. table.concat(IPA,"][") .. "]")
	return IPA
end

function resolve_vowels(phones)
	local working_phones = phones
	mw.log("————— BEGINNING VOWEL RESOLUTION —————")

	local i = 0
	while true do
		i = i + 1
		local p_prev = working_phones[i - 1]
		local p_current = working_phones[i]
		local p_next = working_phones[i + 1]
		local p_next2 = working_phones[i + 2]
		local p_next3 = working_phones[i + 3]
		local toResolve = false
		
		if p_current == nil then break end
		
		local function p_Resolve(p_new)
			working_phones[i] = p_new
			mw.log("[" .. p_current .. "] resolved to [" .. p_new .. "] in position ".. i .. ".")
			p_new = ""
		end
		
		local function p_Convert(p_new)
			working_phones[i] = p_new
			mw.log("[" .. p_current .. "] converted to [" .. p_new .. "] in position ".. i .. ".")
			p_current = p_new
			p_new = ""
		end
		
		-- Resolution of [aù] --
		if p_prev == "a" and p_current == "ù" then
			mw.log("<aù> recognised in position " .. i .. ". Converting to resolvable [u].")
			p_Convert("u")
			p_current = "u"
		end
		
		
		-- Resolution of [u], [ù] and [ū] --
		if p_current == "u" then
			mw.log("[u] found in position " .. i .. ". Testing for resolution.")
			if consonant[p_next] then
				if glide[p_next2] then
					if not vowel[p_next3] then
						mw.log("ɤCj!V environment identified.")
						p_Resolve("ɤ")
					else
						mw.log("No resolution needed.")
					end
				elseif not vowel[p_next2] and not glide[p_next2] then
					mw.log("ɤC!V environment identified.")
					p_Resolve("ɤ")
				else
						mw.log("No resolution needed.")
				end
			else
					mw.log("No resolution needed.")
			end
			elseif p_current == "ù" or p_current == "ū" then
				mw.log("Fixed [u] found in position " .. i .. ".")
				working_phones[i] = "u"
		end
		
		if toResolve == true then
			working_phones[i] = "ɤ"
			mw.log("[u] → [ɤ] in position ".. i .. ".")
		end
		
		-- Resolution of <ei> and  <øi> --
		if p_current == "ei" then
			for j = 1, i do
				local check_phone = working_phones[i - j]
				if boundary[check_phone] or check_phone == nil then
					mw.log("Initial [ei] found in position" .. i .. ".")
					p_Resolve("ai")
					break
				elseif not consonant[check_phone] then
					break
				end
			end
		end
		
		if vowel[p_next] then
		
			-- Resolution of prevocalic <iej>, <uoj> and <yej> -- 
			
			if p_current == "iej" or p_current == "yej" or p_current == "uoj" then
				mw.log("Pre-vocalic <" .. p_current .. "> found in position " .. i .. ".")
				if p_current == "uoj" then
					p_Resolve("uo")
					table.insert(working_phones, i + 1, "j")
					mw.log("[j] inserted to position " .. i + 1 .. ".")
				else
					p_Resolve("iː")
				end
			end
			
			-- Resolution of prevocalic [ie] and [uo] -- 
			
			if p_current == "ie" then
				p_Resolve("iː")
			elseif p_current == "uo" then
				p_Resolve("uː")
			end
		
		else
		
			-- Resolution of non-prevocalic <iej>, <uoj> and <yej> -- 
		
			for toResolve, ResolveKey in pairs({["iej"] = "ei", ["uoj"] = "ɔi", ["yej"] = "øi"}) do
				if p_current == toResolve then
					p_Resolve(ResolveKey)
				end
			end
			
		end

	end
	
	-- Removes placeholder phones from data --
	
	for _, temp in ipairs(vowel_temp) do
		valid_phone[temp] = nil
		vowel[temp] = nil
	end
	
	mw.log("Vowel resolution result: [" .. table.concat(working_phones,"][") .. "]")
	return working_phones
	
end

function resolve_consonants(phones, hj_dv)
	local working_phones = phones
	mw.log("————— BEGINNING CONSONANT RESOLUTION —————")

	local i = 0
	while true do
		i = i + 1
		local p_prev = working_phones[i - 1]
		local p_current = working_phones[i]
		local p_next = working_phones[i + 1]
		local p_next2 = working_phones[i + 2]
		local p_next3 = working_phones[i + 3]
		local toResolve = false
		
		if p_current == nil then break end
		
		local function p_Resolve(p_new)
			working_phones[i] = p_new
			mw.log("[" .. p_current .. "] resolved to [" .. p_new .. "] in position ".. i .. ".")
			p_new = ""
		end
		
		local function p_Convert(p_new)
			working_phones[i] = p_new
			mw.log("[" .. p_current .. "] converted to [" .. p_new .. "] in position ".. i .. ".")
			p_current = p_new
			p_new = ""
		end
		
		local function p_RemoveNext()
			table.remove(working_phones, i + 1)
			p_next = working_phones[i + 1]
			p_next2 = working_phones[i + 2]
			p_next3 = working_phones[i + 3]
		end
		
		-- Resolution of iotation --
		
		if iotate[p_current] and p_next == "j" then
			mw.log("[" .. p_current .. "][j] found in position " .. i .. ".")
			p_Convert(iotate[p_current])
			p_RemoveNext()
			mw.log("[j] removed from position " .. i + 1 .. ".")
			elseif p_current == "ĵ" then
				p_Convert("j")
		end
		
		-- Resolution of hjádvanþs --
		
		if hj_dv == false then
			if p_current == "ç" then
				mw.log("hjádvanþs = false:")
				p_Convert("ʃ")
			elseif p_current == "j" and (boundary[p_prev] or p_prev == nil) and consonant[p_next] then
				mw.log("hjádvanþs = false:")
				p_Convert("ʒ")
			end
		end
		
		-- Resolution of (T)Ss, (T)Sș, ts, ds, tș, dș (progressive voicing assimilation) --
		
		if p_next == "s" then
			if sibilant[p_current] then
				p_RemoveNext()
				mw.log("[s] removed from position " .. i + 1 .. " following a sibilant.")
				elseif p_current == "t" then
				p_Convert("ts")
				p_RemoveNext()
				mw.log("[t][s] → [ts] in position " .. i .. ".")
				elseif p_current == "d" then
				p_Convert("dz")
				p_RemoveNext()
				mw.log("[d][s] → [dz] in position " .. i .. ".")
			end
		elseif p_next == "ʃ" then
			if sibilant_post[p_current] == true then
				p_RemoveNext()
				mw.log("[ʃ] removed from position " .. i + 1 .. " following a postalveolar sibilant.")
				elseif sibilant_alv[p_current] then
					mw.log("[" .. p_current .. "][ʃ] → [" .. sibilant_alv_to_post[p_current] .. "] in position " .. i .. ".")
					p_Convert(sibilant_alv_to_post[p_current])
					p_RemoveNext()
				elseif p_current == "t" then
					p_Convert("tʃ")
					p_RemoveNext()
					mw.log("[t][ʃ] → [tʃ] in position " .. i .. ".")
				elseif p_current == "d" then
					p_Convert("dʒ")
					p_RemoveNext()
					mw.log("[d][ʃ] → [dʒ] in position " .. i .. ".")
			end
		end
		
		-- Resolution of Cv --
		
		if p_next == "v" and ((Cv_split[p_current] and not vowel[p_prev]) or Cv_fixed[p_current]) then
			mw.log("Cluster [" .. p_current .. "][v] resolved to [" .. p_current .. "] at position " .. i .. ".")
			p_RemoveNext()
		end
		
		-- Resolution of regressive voicing assimilation --
		
		if obstruent[p_current] == true then
			mw.log("Obstruent [" .. p_current .. "] found in position " .. i .. ". Searching for cluster.")
			local final_i = i
			
			-- find voicing of final obstruent in cluster --
			for j = i + 1, #working_phones do
				local check_phone = working_phones[j]
				if obstruent[check_phone] == true and not check_phone == "v" then -- /v/ is excluded
					final_i = j
				else break
				end
			end
			
			if final_i > i then -- if cluster recognised --
				final_obs = working_phones[final_i]
				mw.log("Final obstruent in cluster is [" .. final_obs .. "], cluster length " .. final_i - i + 1 .. ".")
				
				-- assimilate --
				
				if not obstruent_voiced[p_current] == obstruent_voiced[final_obs] then
					mw.log("Voicing mismatch found in cluster at position " .. i .. ".")
					if obstruent_voiced[final_obs] == true then
						p_Convert(obstruent_voice[p_current])
						else
						p_Convert(obstruent_devoice[p_current])
					end
				else
					mw.log("No voicing mismatch found.")
				end
				else
					mw.log("No cluster found.")
			end
		end
		
		-- Resolution of geminates --
		
		if p_next == p_current and (boundary[p_next2] or p_next2 == nil) and consonant[p_current] then
			p_RemoveNext()
			mw.log("Geminate [" .. p_current .. "] removed in final position at position " .. i .. ".")
		end
		
	end
		
	-- Removes placeholder phones from data --
	
	for _, temp in ipairs(consonant_temp) do
		valid_phone[temp] = nil
		consonant[temp] = nil
		glide[temp] = nil
	end
	
	mw.log("Consonant resolution result: [" .. table.concat(working_phones,"][") .. "]")
	return working_phones
	
end

function get_syllables(phones)
	local working_phones = phones
	local syllables = {}
	local working_syllable = {
		["onset"] = {},
		["nucleus"] = "",
		["coda"] = {},
	}
	local syllable_no = 1
	
	local function logSyllable()
		local onset = ""
		local coda = ""
		
		if working_syllable["onset"] then
			onset = table.concat(working_syllable["onset"], " ")
		end
		if working_syllable["coda"] then
			coda = table.concat(working_syllable["coda"], " ")
		end
		
		mw.log( onset .. " [ " .. working_syllable["nucleus"] .. " ] " .. coda )
	end
	
	mw.log("————— BUILDING SYLLABLES —————")
	
	-- division into syllables --
	while #working_phones > 0 do
		local p_current = working_phones[1]
		local p_next = working_phones[2]
		local p_next2 = working_phones[3]
		local p_next3 = working_phones[4]
		
		local function register_syllable(check)
			if check == true then
				syllables[syllable_no] = working_syllable
				if #syllables[syllable_no]["onset"] == 0 then
					syllables[syllable_no]["onset"] = nil
				end
				if #syllables[syllable_no]["coda"] == 0 then
					syllables[syllable_no]["coda"] = nil
				end
				mw.log("Syllable " .. syllable_no .. " registered:")
				logSyllable()
				mw.log("====================")
				syllable_no = syllable_no + 1
				working_syllable = {
					["onset"] = {},
					["nucleus"] = "",
					["coda"] = {},
				}
			end
		end
		
		local function register_boundary(check)
			if check == true then
				register_syllable(true)
				syllables[syllable_no] = p_current
				table.remove(working_phones, 1)
				mw.log("Boundary syllable " .. syllable_no .. " registered: '" .. p_current .. "'\n====================")
				syllable_no = syllable_no + 1
			end
		end
		
		local function register_phone(check)
			if check == true then
				if vowel[p_current] then
					working_syllable["nucleus"] = p_current
					mw.log("[" .. p_current .. "] is now the working nucleus.")
				elseif consonant[p_current] then
					local unit = ""
					if working_syllable["nucleus"] == "" then
						unit = "onset"
					else
						unit = "coda"
					end
					table.insert(working_syllable[unit], p_current)
					mw.log("[" .. p_current .. "] appended to the working " .. unit .. ".")
				else
					error("Invalid input for function register_phone")
				end
				table.remove(working_phones, 1)
				-- mw.log("Current working syllable:")
				-- logSyllable()
				
				if #working_phones == 0 then
					register_syllable(true)
				end
			end
		end
		
		if consonant[p_current] then
			if working_syllable["nucleus"] == ""  then
				register_phone(true)
			else
				if vowel[p_next] or (glide[p_next] and vowel[p_next2]) then -- *C*(J)V
					register_syllable(true)
					register_phone(true)
				elseif #working_syllable["coda"] > 0 then -- C | …
				
					local function JV(check)
						if check == true then
							if vowel[p_next2] or (glide[p_next2] and vowel[p_next3]) then -- C | *C*l(J)V
								register_syllable(true)
								register_phone(true)
							else
								register_phone(true)
							end
						end
					end
					
					if Cl[p_current] and p_next == "l" then -- C | *C*l …
						JV(true) -- C | *C*l(J)V
					elseif Cr[p_current] and p_next == "r" then -- C | *C*r …
						JV(true) -- C | *C*r(J)V
					elseif Cv[p_current] and p_next == "v" then -- C | *C*v …
						JV(true) -- C | *C*v(J)V
					elseif CN[p_current] and nasal[p_next] then -- C | *C*N …
						JV(true) -- C | *C*N(J)V
					else
						register_phone(true)
					end
				else
					register_phone(true)
				end
			end
		elseif vowel[p_current] then
			if working_syllable["nucleus"] == "" then
				register_phone(true)
			else
				register_syllable(true)
				register_phone(true)
			end
		elseif boundary[p_current] or p_current == "-" then
			register_boundary(true)
		else
			error("Unrecognised phone in syllabifier: " .. p_current)
		end
	end

	mw.log("STRING EXHAUSTED\n====================")
	
	return syllables
end

function tag_syllables(syllables)

	mw.log("————— TAGGING SYLLABLES —————")
	
	local syl_count = #syllables
	
	for i = 1, syl_count do
	
		local syllable = syllables[i]
		
		local function tag_syllable(tag, value)
			syllables[i][tag] = value
			mw.log("syllables[" .. i .. "][" .. tag .. "] = " .. tostring(value) )
		end
			
		-- tag checked syllables --
		if type(syllable) == "table" then
		
			if syllable["coda"] == nil then
				tag_syllable("checked", false)
			else
				tag_syllable("checked", true)
			end
			
			-- tag long syllables --
			if long[syllable["nucleus"]] then
				tag_syllable("long", true)
			else
				tag_syllable("long", false)
			end
			
			-- tag weak syllables (nucleus = [i] [ɤ]) --
			if weak[syllable["nucleus"]] then
				tag_syllable("weak", true)
			else
				tag_syllable("weak", false)
			end
			
			-- tag heavy syllables (nucleus is long coda is an obstruent-initial cluster) -
			
			if syllable["coda"] then
				if long[syllable["nucleus"]] and affricate[syllable["coda"][1]] then
					tag_syllable("heavy", true)
				elseif long[syllable["nucleus"]] and obstruent[syllable["coda"][1]] and syllable["coda"][2] then
					tag_syllable("heavy", true)
				else
					tag_syllable("heavy", false)
				end
			end

			mw.log("—————")
		
		end
		
	end
	
	return syllables
end

function export.generate(frame)
	local args = getArgs(frame)
	local outputIPA = generate_IPA(args[1])
	local hj = args[2] or false
		if hj == "false" then
			hj = false
		end
	
	outputIPA = resolve_vowels(outputIPA)
	outputIPA = resolve_consonants(outputIPA, hj)
	outputIPA = get_syllables(outputIPA)
	outputIPA = tag_syllables(outputIPA)
	
	-- test output only --
	
	local test_output = ""
		for i = 1, #outputIPA do
			local onset = ""
			local coda = ""
			
			if type(outputIPA[i]) == "table" then
				
				if outputIPA[i]["onset"] then
					onset = table.concat(outputIPA[i]["onset"], " ")
				end
				
				if outputIPA[i]["coda"] then
					coda = table.concat(outputIPA[i]["coda"], " ")
				end
				
				test_output = test_output .. " • " .. onset .. " [ " .. outputIPA[i]["nucleus"] .. " ] " .. coda
			else
				test_output = test_output .. " • " .. outputIPA[i]
			end
		end
		mw.log(test_output)
	-- return outputIPA
end

return export

--[[
Debug console test string:
=p.generate(mw.getCurrentFrame():newChild{title="whatever",args={"rjaovs"}})
=p.generate(mw.getCurrentFrame():newChild{title="whatever",args={"rjaovs", "true"}}) (for hjádvanþs)
]]