Module:rad-IPA

Revision as of 21:20, 17 August 2023 by TheNightAvl (talk | contribs) (Added more vowel resolution)

See {{rad-IPA}}.


local export = {}
local getArgs = require('Module:Arguments').getArgs

local data = {
	["a"] = {
		["i"] = "ai",
		["o"] = {
			["i"] = "ɔi",
			[false] = "ɔː",
		},
		["u"] = "au",
		[false] = "a",
	},
	["á"] = "aː",
	["ả"] = "aːː",
	["â"] = "ɤ",
	["âi"] = "ɤi",
	["b"] = "b",
	["c"] = "ts",
	["d"] = {
		["x"] = "dʒ",
		["z"] = "dz",
		[false] = "d",
	},
	["ð"] = "ð",
	["e"] = {
		["a"] = "æː",
		["i"] = "ei",
		[false] = "ɛ",
	},
	["é"] = "eː",
	["f"] = "f",
	["g"] = "ɡ",
	["h"] = "h",
	["ħ"] = "ɣ",
	["i"] = {
		["e"] = {
			["a"] = "ia",
			["j"] = "iej",
			[false] = "ie",
		},
		[false] = "i",
	},
	["í"] = "iː",
	["ỉ"] = {
		["e"] = "iːe",
		[false] = "iːː",
	},
	["j"] = "j",
	["ĵ"] = "ĵ",
	["k"] = "k",
	["ķ"] = "tʃ",
	["l"] = "l",
	["m"] = "m",
	["n"] = "n",
	["ņ"] = "ŋ",
	["o"] = {
		["a"] = "ɔa",
		[false] = "ɔ",
	},
	["ó"] = "oː",
	["ø"] = {
		["a"] = "œa",
		["i"] = "ei",
		[false] = "œ",
	},
	["p"] = "p",
	["q"] = "k",
	["r"] = "r",
	["s"] = "s",
	["ș"] = "ʃ",
	["t"] = "t",
	["u"] = {
		["i"] = "ɤi",
		["o"] = {
			["a"] = "ua",
			["j"] = "uoj",
			[false] = "uo",
		},
		[false] = "u",
	},
	["ù"] = "ù",
	["û"] = "ɤ",
	["ú"] = "uː",
	["ủ"] = {
		["o"] = "uːo",
		[false] = "uːː",
	},
	["ū"] = "ū",
	["v"] = "v",
	["w"] = "w",
	["x"] = "ʒ",
	["y"] = {
		["e"] = {
			["a"] = "ia",
			["j"] = "yej",
			[false] = "ie",
		},
		[false] = "i",
	},
	["ỳ"] = "ỳ",
	["z"] = "z",
	["þ"] = "θ",
	["·"] = "·",
	[" "] = " ",
	["."] = "|",
}
data["à"] = data["a"]
data["è"] = data["e"]
data["ì"] = data["i"]
data["ò"] = data["o"]
data["ý"] = data["í"]
data["ỷ"] = data["ỉ"]

data[","] = data["."]
data["!"] = data["."]
data["?"] = data["."]

local valid_phone = {
	["a"] = true, ["aː"] = true, ["aːː"] = true, ["æː"] = true, ["ai"] = true, ["au"] = true, ["b"] = true,
	["ç"] = true, ["d"] = true, ["ð"] = true, ["eː"] = true, ["ei"] = true, ["ɛ"] = true, ["ɤ"] = true,
	["ɤi"] = true, ["f"] = true, ["ɡ"] = true, ["ɣ"] = true, ["h"] = true, ["i"] = true, ["iː"] = true,
	["iːː"] = true, ["iːe"] = true, ["ia"] = true, ["ie"] = true, ["j"] = true, ["k"] = true, ["l"] = true,
	["m"] = true, ["n"] = true, ["ŋ"] = true, ["oː"] = true, ["œ"] = true, ["œa"] = true, ["øi"] = true,
	["ɔ"] = true, ["ɔː"] = true, ["ɔa"] = true, ["ɔi"] = true, ["p"] = true, ["r"] = true, ["s"] = true,
	["ʃ"] = true, ["t"] = true, ["u"] = true, ["uː"] = true, ["uːː"] = true, ["uːo"] = true, ["ua"] = true,
	["uo"] = true, ["v"] = true, ["w"] = true, ["y"] = true, ["z"] = true, ["ʒ"] = true, ["θ"] = true,
	
	-- Temporary phones --
	
	["iːj"] = true, ["uːj"] = true, ["yːj"] = true, 
}

local consonant = {
	["b"] = true, ["ç"] = true, ["d"] = true, ["ð"] = true, ["f"] = true,
	["ɡ"] = true, ["ɣ"] = true, ["h"] = true, ["j"] = true, ["k"] = true,
	["l"] = true, ["m"] = true, ["n"] = true, ["ŋ"] = true, ["p"] = true,
	["r"] = true, ["s"] = true, ["ʃ"] = true, ["t"] = true, ["v"] = true,
	["w"] = true, ["y"] = true, ["z"] = true, ["ʒ"] = true, ["θ"] = true,
}

local vowel = {
	["a"] = true, ["aː"] = true, ["aːː"] = true, ["æː"] = true, ["ai"] = true, ["au"] = true,
	["eː"] = true, ["ei"] = true, ["ɛ"] = true, ["ɤ"] = true, ["ɤi"] = true, ["i"] = true,
	["iː"] = true, ["iːː"] = true, ["iːe"] = true, ["ia"] = true, ["ie"] = true, ["oː"] = true,
	["œ"] = true, ["œa"] = true, ["øi"] = true, ["ɔ"] = true, ["ɔː"] = true, ["ɔa"] = true,
	["ɔi"] = true, ["u"] = true, ["uː"] = true, ["uːː"] = true, ["uːo"] = true, ["ua"] = true,
	["uo"] = true, 
	
	-- Temporary phones --
	
	["iːj"] = true, ["uːj"] = true, ["yːj"] = true, 
}

local glide = {
	["j"] = true, ["w"] = true,
}

local boundary = {
	[" "] = true, ["|"] = true, ["·"] = true,
}

local IPA = {}

-- PROCESS FUNCTIONS --

function generate_IPA(word)
	local s = word
	local s_len = mw.ustring.len(s)
	IPA = {}

	local split_s = {}
	for i = 1, s_len do
	  split_s[i] = mw.ustring.sub(s, i,i)
	end
	
	-- generate_IPA: mw.log("————— BEGINNING BASE GENERATION —————")

	if s_len == 0 then
		error("Empty input.")
	end

	while s_len > 0 do
		local getData = {}
		local multiMatch = false
		local i_iteration = -2

		-- generate_IPA: mw.log("=========================\nCURRENT TEST STRING: <".. mw.ustring.upper(s) .. ">")

		if s_len < 3 then
			i_iteration = 1 - s_len
		end

		for i = i_iteration, 0 do
		  -- generate_IPA: mw.log("————— <" .. split_s[s_len + i] .. "> selected. (i = " .. i .. ") —————")
		  getData = data[split_s[s_len + i]]
		  local deadEnd = false
		  
		  if data[split_s[s_len + i]] == nil then
			error("'" .. split_s[s_len + i] .. "' is an invalid character.")
		  end
		  
		  while type(getData) == "table" do
			if i == 0 then
				if getData[false] then
				  -- generate_IPA: mw.log("Singular index recognised.")
				  getData = getData[false]
				  -- generate_IPA: mw.log("Index acquired: " .. getData)
				else
				  error(split_s[s_len] .. " is an invalid character.")
				end
			else
			  -- generate_IPA: mw.log("Tabular index recognised.")
			  for j = 1, 0 - i do
				-- DEBUG PRINT ONLY
				local currentCombo = ""
				if i == -2 and j == 2 then
					currentCombo = split_s[s_len + i + j - 2] .. " + " .. split_s[s_len + i + j - 1] .. " + " .. split_s[s_len + i + j]
					else
					currentCombo = split_s[s_len + i + j - 1] .. " + " .. split_s[s_len + i + j]
				end
				-- END OF DEBUG PRINT
				-- generate_IPA: mw.log("Testing " .. currentCombo)
				if getData[split_s[s_len + i + j]] then
				  getData = getData[split_s[s_len + i + j]]
				  -- generate_IPA: mw.log("Combination recognised: " .. currentCombo .. " (j = " .. j .. ")")
				  if type(getData) == "string" then
					
					if j + i == 0 then
					  -- generate_IPA: mw.log("Index acquired: " .. getData)
					  multiMatch = true
					  break
					else
					  -- generate_IPA: mw.log('Non-final index: dead end.')
					  getData = {}
					  deadEnd = true
					  break
					end
					
				  elseif j + i == 0 and getData[false] then
					getData = getData[false]
					-- generate_IPA: mw.log("Index acquired: " .. getData)
					multiMatch = true
					break
				  elseif j + i == 0 and not getData[false] then
					error("data[" .. table.concat(getData, "][") .. "][false] is missing." )
				  else
					-- generate_IPA: mw.log("Target still tabular: reiterating.")
				  end
				  else
					-- generate_IPA: mw.log('Dead end.')
					getData = {}
					deadEnd = true
					break
				end
			  end
			  if type(getData) == "table" then break end
			end
		  end
		  if type(getData) == "string" and (i == 0 or multiMatch == true) then
			  -- generate_IPA: mw.log("Target acquired of length " .. 1 - i .. ", converting to [" .. getData .. "].")
			  table.insert(IPA, 1, getData)
			  s = mw.ustring.sub(s, 1, s_len + i - 1)
			  s_len = mw.ustring.len(s)
			  break
			elseif deadEnd == false then
				-- generate_IPA: mw.log('Non-final index: dead end.')
		  end
		end
	end

	-- generate_IPA: mw.log('————— STRING EXHAUSTED —————')
	mw.log("Base generation result: [" .. table.concat(IPA,"][") .. "]")
	return IPA
end

function resolve_vowels(phones)
	local working_phones = phones
	mw.log("————— BEGINNING VOWEL RESOLUTION —————")

	for i = 1, #working_phones do
		local p_prev3 = working_phones[i - 3]
		local p_prev2 = working_phones[i - 2]
		local p_prev = working_phones[i - 1]
		local p_current = working_phones[i]
		local p_next = working_phones[i + 1]
		local p_next2 = working_phones[i + 2]
		local p_next3 = working_phones[i + 3]
		local toResolve = false
		
		local function p_Resolve(p_new)
			working_phones[i] = p_new
			mw.log("[" .. p_current .. "] resolved to [" .. p_new .. "] in position ".. i .. ".")
			p_new = ""
		end
		
		local function p_Convert(p_new)
			working_phones[i] = p_new
			mw.log("[" .. p_current .. "] converted to [" .. p_new .. "] in position ".. i .. ".")
			p_current = p_new
			p_new = ""
		end
		
		-- RESOLUTION OF [aù] --
		if p_prev == "a" and p_current == "ù" then
			mw.log("<aù> recognised in position " .. i .. ". Converting to resolvable [u].")
			p_Convert("u")
			p_current = "u"
		end
		
		
		-- RESOLUTION OF [u], [ù] and [ū] --
		if p_current == "u" then
			mw.log("[u] found in position " .. i .. ".")
			if consonant[p_next] then
				if glide[p_next2] then
					if not vowel[p_next3] then
						mw.log("ɤCj!V environment identified.")
						p_Resolve("ɤ")
					end
				elseif not vowel[p_next2] and not glide[p_next2] then
					mw.log("ɤC!V environment identified.")
					p_Resolve("ɤ")
				end
			end
			elseif p_current == "ù" or p_current == "ū" then
				mw.log("Fixed [u] found in position " .. i .. ".")
				working_phones[i] = "u"
		end
		
		if toResolve == true then
			working_phones[i] = "ɤ"
			mw.log("[u] → [ɤ] in position ".. i .. ".")
		end
		
		-- RESOLUTION OF <ei> and  <øi> --
		if p_current == "ei" then
			for j = 1, i do
				local check_phone = working_phones[i - j]
				if boundary[check_phone] or check_phone == nil then
					mw.log("Initial [ei] found in position" .. i .. ".")
					p_Resolve("ai")
					break
				elseif not consonant[check_phone] then
					break
				end
			end
		end
		
		-- RESOLUTION OF <iej>, <uoj> and <yej> -- 
		
		if vowel[p_next] then
			
			if p_current == "iej" or p_current == "yej" or p_current == "uoj" then
				mw.log("Pre-vocalic <" .. p_current .. "> found in position " .. i .. ".")
				if p_current == "uoj" then
					p_Resolve("uo")
				else
					p_Resolve("ie")
				end
				table.insert(working_phones, i + 1, "j")
				mw.log("[j] inserted to position " .. i + 1 .. ".")
			end
			
		else
		
			if p_current == "iej" then
				p_Resolve("ei")
			elseif p_current == "uoj" then
				p_Resolve("ɔi")
			elseif p_current == "yej" then
				p_Resolve("øi")
			end
			
		end
		
	end
	
	mw.log("Vowel resolution result: [" .. table.concat(working_phones,"][") .. "]")
	return working_phones
	
end

function export.generate(frame)
	local args = getArgs(frame)
	local outputIPA = generate_IPA(args[1])
	outputIPA = resolve_vowels(outputIPA)
	outputIPA = table.concat(outputIPA,"][")
	
	return "[" .. outputIPA .. "]"
end

return export