Module:lfv-IPA

Revision as of 20:35, 11 November 2023 by TheNightAvl (talk | contribs)
local export = {}
local getArgs = require('Module:Arguments').getArgs

-- DATA --

local data = {
	["a"] = "a", ["e"] = "e", ["ê"] = "ɛ",
	["i"] = "i", ["o"] = "o", ["ô"] = "ɔ",
	["ø"] = "ø", ["u"] = "u", ["y"] = "y",
	
	["b"] = "b",
	["d"] = "d",
	["ð"] = "ð",
	["f"] = "f",
	["g"] = "ɡ",
	["ǧ"] = "ɣ",
	["h"] = "x",
	["j"] = "j",
	["k"] = "k",
	["l"] = "l",
	["lj"] = "ʎ",
	["m"] = "m",
	["n"] = "n",
	["nj"] = "ɲ",
	["ñ"] = "ŋ",
	["p"] = "p",
	["r"] = "r",
	["s"] = "s",
	["š"] = "ʃ",
	["t"] = "t",
	["þ"] = "θ",
	["v"] = "v",
	["w"] = "w",
	["ŵ"] = "ɥ",
	["z"] = "z",
	["ž"] = "ʒ",
	
	["hl"] = "l̥",
	["hlj"] = "ʎ̥",
	["hm"] = "m̥",
	["hn"] = "n̥",
	["hnj"] = "ɲ̊",
	["hñ"] = "ŋ̊",
	["hr"] = "r̥",
	
	["tl"] = "ɬ",
	["dl"] = "ɮ",
	["ls"] = "ɬ",
	["lz"] = "ɮ",
	["lš"] = "ɬɬ",
	["lž"] = "ɮɮ",

	["l·s"] = "ɬɬ",
	["l·z"] = "ɮɮ",
	["llj"] = "ʎʎ",
	["nnj"] = "ɲɲ",
	["rtl"] = "ɬɬ",
	["rdl"] = "ɮɮ",
	["ltl"] = "ɬɬ",
	["ldl"] = "ɮɮ",

	["lhl"] = "l̥l̥",
	["lhlj"] = "ʎʎ",
	["mhm"] = "mm",
	["nhn"] = "nn",
	["nhnj"] = "ɲɲ",
	["ñhñ"] = "ŋŋ",
	["rhr"] = "rr",
	
	[" "] = " ",
	["."] = "|",
	
	--[[change the character you want to use to mark secondary stress in the input→]] ["_"] = "ˌ",
}
data[","] = data["."]

local vowel = {
	["a"] = true,
	["e"] = true,
	["ɛ"] = true,
	["i"] = true,
	["o"] = true,
	["ɔ"] = true,
	["ø"] = true,
	["u"] = true,
	["y"] = true,
}

local consonant = {
	["b"] = true,
	["d"] = true,
	["ð"] = true,
	["f"] = true,
	["ɡ"] = true,
	["ɣ"] = true,
	["x"] = true,
	["j"] = true,
	["k"] = true,
	["l"] = true,
	["ʎ"] = true,
	["m"] = true,
	["n"] = true,
	["ɲ"] = true,
	["ŋ"] = true,
	["p"] = true,
	["r"] = true,
	["s"] = true,
	["ʃ"] = true,
	["t"] = true,
	["θ"] = true,
	["v"] = true,
	["w"] = true,
	["ɥ"] = true,
	["z"] = true,
	["ʒ"] = true,
	["ɬ"] = true,
	["ɮ"] = true,
}

local boundary = {
	[" "] = true,
	["|"] = true,
}

local intervocalic_voicing = {
	["ʎ̥"] = "ʎʎ",
	["m̥"] = "mm",
	["n̥"] = "nn",
	["ɲ̊"] = "ɲɲ",
	["ŋ̊"] = "ŋŋ",
	["r̥"] = "rr",
}

local devoice = {
	["d"] = "d̥",
	["ð"] = "ð̥",
	["ɡ"] = "ɡ̊",
	["ɣ"] = "ɣ̊",
	["l"] = "l̥",
	["ʎ"] = "ʎ̥",
	["r"] = "r̥",
	["z"] = "z̥",
	["ʒ"] = "ʒ̊",
	["ɮ"] = "ɮ̊",
}

local voiceless = {
	["f"] = true,
	["x"] = true,
	["k"] = true,
	["p"] = true,
	["s"] = true,
	["ʃ"] = true,
	["t"] = true,
	["θ"] = true,
	["ɬ"] = true,
}

function generate_IPA(word, phon, col)

	local working_IPA = {}
	local is_affix = false
	
	-- base generation -- 
	while #word > 0 do
		if data[mw.ustring.sub(word, 1, 4)] then
			table.insert(working_IPA, data[mw.ustring.sub(word, 1, 4)])
			word = mw.ustring.sub(word, 5)
		elseif data[mw.ustring.sub(word, 1, 3)] then
			table.insert(working_IPA, data[mw.ustring.sub(word, 1, 3)])
			word = mw.ustring.sub(word, 4)
		elseif data[mw.ustring.sub(word, 1, 2)] then
			table.insert(working_IPA, data[mw.ustring.sub(word, 1, 2)])
			word = mw.ustring.sub(word, 3)
		elseif data[mw.ustring.sub(word, 1, 1)] then
			table.insert(working_IPA, data[mw.ustring.sub(word, 1, 1)])
			word = mw.ustring.sub(word, 2)
		elseif mw.ustring.sub(word, 1, 1) == "-" then
			if #working_IPA > 0 and #word > 1 then			-- if not at the beginning and not at the end
				table.insert(working_IPA, "ˌ")
			else
				table.insert(working_IPA, "-")
				is_affix = true
			end
			word = mw.ustring.sub(word, 2)
		elseif mw.ustring.sub(word, 1, 1) == "·" then
			word = mw.ustring.sub(word, 2)
		else
			error("The character '" .. mw.ustring.sub(word, 1, 1) .. "' is not recognised.")
		end
	end
	
	local i = 1
	local secondary_stress = {}
	
	while working_IPA[i] do
		if working_IPA[i] == "ˌ" then
			table.insert(secondary_stress, i)
			table.remove(working_IPA, i)
			i = i - 1
		elseif working_IPA[i] == " " and boundary[working_IPA[i-1]] then
			table.remove(working_IPA, i)
			i = i - 1
		end
		i = i + 1
	end
	
	-- resolve --
	
	i = 1
	
	while working_IPA[i] do
	
		local p_prev = working_IPA[i-1]
		local p_current = working_IPA[i]
		local p_next = working_IPA[i+1]
		
		local function p_resolve(phone)
			working_IPA[i] = phone
			p_prev = working_IPA[i-1]
			p_current = working_IPA[i]
			p_next = working_IPA[i+1]
		end
		
		local function remove_next(check)
			if check then
				table.remove(working_IPA, i+1)
				p_next = working_IPA[i+1]
				for index, position in ipairs(secondary_stress) do
					if position > i + 1 then
						secondary_stress[index] = secondary_stress[index] - 1
					end
				end
			end
		end
		
		-- intervocalic voicing and gemination of voiceless sonorants --
		if vowel[p_prev] and intervocalic_voicing[p_current] ~= nil and vowel[p_next] then
			p_resolve(intervocalic_voicing[p_current])
		end
		
		-- account for <CCu/iV> spelling --
		if consonant[p_prev] and working_IPA[i-2] == p_prev and vowel[p_next] then
			if p_current == "u" then
				p_resolve("w")
			elseif p_current == "i" then
				p_resolve("j")
			end
		end
		
		-- colloquial only --
		if col then
		
			if p_current == "r" and p_next == "ʃ" then
				p_resolve("ʃ")
			end
			
			if p_current == "l" and p_next == "l" and working_IPA[i+2] == "i" and vowel[working_IPA[i+3]] then
				p_resolve("ʎʎ")
				remove_next(true)
				remove_next(true)
			end
			
			if p_current == "n" and p_next == "n" and working_IPA[i+2] == "i" and vowel[working_IPA[i+3]] then
				p_resolve("ɲɲ")
				remove_next(true)
				remove_next(true)
			end
			
			if p_current == "x" and p_next == "d" then
				working_IPA[i+1] = "ð"
				if vowel[working_IPA[i-2]] and (p_prev == "i" or p_prev == "u") then
					table.remove(working_IPA, i)
					for index, position in ipairs(secondary_stress) do
						if position > i then
							secondary_stress[index] = secondary_stress[index] - 1
						end
					end
					working_IPA[i-2] = working_IPA[i-2] .. "ː"
					p_current = working_IPA[i]
					p_next = working_IPA[i+1]
				elseif vowel[p_prev] then
					p_resolve("ː")
				end
			end
		end
		
		-- phonetic only --
		if phon then
			
			-- l̥ → ɬ --
			if p_current == "l̥" then
				p_resolve("ɬ")
			elseif p_current == "l̥l̥" then
				p_resolve("ɬɬ")
			end
			
			-- terminal devoicing --
			
			if devoice[p_current] and (boundary[p_next] or p_next == nil) then
				if not (p_next == " " and vowel[working_IPA[i+2]]) then
					p_resolve(devoice[p_current])
				end
			end
			
			-- regressive devoicing --
			
			if p_current == "r" and voiceless[p_next] then
				p_resolve("r̥")
			elseif p_current == "l" and voiceless[p_next] then
				p_resolve("l̥")
			end
			
			-- /v/ --
			if p_current == "v" then
				if consonant[p_prev] and p_prev ~= "u̯" then
					p_resolve("ʋ")
				elseif (consonant[p_prev] or boundary[p_prev] or p_prev == nil) and consonant[p_next] then
					p_resolve("ʋ")
				elseif consonant[p_next] or boundary[p_next] or p_next == nil then
					p_resolve("u̯")
				end
			end
			
			-- /w/ --
			if p_current == "w" and p_prev ~= "u̯" then
				p_resolve("u̯")
			end
			
			-- /ɥ/ --
			if p_current == "ɥ" then
				p_resolve("y̯")
			end
			
			-- /j/ --
			if p_current == "j" and p_prev ~= "i̯" then
				p_resolve("i̯")
			end
			
			-- /xj/ --
			if p_current == "x" and p_next == "j" then
					p_resolve("ç")
			end
			
			-- V{i u} --
			if vowel[p_prev] and p_current == "i" then
					p_resolve("i̯")
			elseif vowel[p_prev] and p_current == "u" then
					p_resolve("u̯")
			end
			
			-- {i u} V --
			if vowel[p_next] and (p_current == "i" or p_current == "u") then
				local desyllabify = false
				
				for checker = 1, i-1 do
					if vowel[working_IPA[i - checker]] then
						desyllabify = true
						break
					elseif boundary[working_IPA[i - checker]] or working_IPA[i - checker] == nil then
						break
					end
				end
				
				if desyllabify then
					if p_current == "u" then
						p_resolve("u̯")
					elseif p_current == "i" then
						p_resolve("i̯")
					end
				end
			end
			
			-- stress --
			if (boundary[p_prev] or p_prev == nil) and is_affix == false then
				p_resolve("ˈ" .. p_current)
			end
			
		end
		
		i = i + 1
	end
	
	-- secondary stress --
	if phon then
		while secondary_stress[1] do
			table.insert(working_IPA, secondary_stress[1], "ˌ")
			table.remove(secondary_stress,1)
			for no, value in ipairs(secondary_stress) do
				secondary_stress[no] = value + 1
			end
		end
	end
	
	working_IPA = table.concat(working_IPA)
	
	if phon then
	
		-- /xd/ --
		if mw.ustring.find(working_IPA, "(xd̥)") then
			working_IPA = mw.ustring.gsub(working_IPA, "(xd̥)", "hd̥")
		elseif mw.ustring.find(working_IPA, "(xd)") then
			working_IPA = mw.ustring.gsub(working_IPA, "(xd)", "hd") .. "~" .. mw.ustring.gsub(working_IPA, "(xd)", "ɦd")
		end
	
	end
	
	working_IPA = mw.ustring.gsub(working_IPA, "%|", " | ")
	
	return working_IPA
	
end

function export.generate(frame)
	local args = getArgs(frame)
	local outputIPA = ""
	
	local parameters = {}
	local valid_parameter = {
		["phon"] = true,
		["col"] = true,
		["colloquial"] = true,
		["nolarge"] = true,
		["cite"] = true,
	}
	local p = 2
	
	if valid_parameter[args[1]] or args[1] == nil then
		outputIPA = mw.title.getCurrentTitle().subpageText
		p = 1
	else
		outputIPA = args[1]
	end
	
	-- mw.log("——— Parameters ———")
	while args[p] do
		parameters[args[p]] = true
		-- mw.log(args[p] .. " = true")
		p = p + 1
	end
	
	if parameters["col"] or parameters["colloquial"] then
		parameters["phon"] = true
	end
	
	local nolarge = ""
	if parameters["nolarge"] then
		nolarge = " nolarge"
	end
	
	outputIPA = mw.ustring.gsub(outputIPA, "(%&nbsp%;)", " ")
	outputIPA = mw.ustring.lower(outputIPA)
	
	if parameters["cite"] then
		local broad = generate_IPA(outputIPA, false, false)
		local narrow = generate_IPA(outputIPA, true, parameters["colloquial"] or parameters["col"] or false)
		
		if "ˈ" .. broad ~= narrow then
			outputIPA = "<span class=\"IPA" .. nolarge .. "\">/" .. broad .. "/ [" .. narrow .. "]</span>"
		else
			outputIPA = "<span class=\"IPA" .. nolarge .. "\">/" .. broad .. "/</span>"
		end
		
	else
		outputIPA = generate_IPA(outputIPA, parameters["phon"] or false, parameters["colloquial"] or parameters["col"] or false)
		
		if parameters["phon"] and parameters["format"] then
			outputIPA = "<span class=\"IPA" .. nolarge .. "\">[" .. outputIPA .. "]</span>"
		elseif parameters["format"] then
			outputIPA = "<span class=\"IPA" .. nolarge .. "\">/" .. outputIPA .. "/</span>"
		end
	end
	
	-- mw.log(outputIPA)
	return outputIPA
	
end

return export

--[[
Debug console test string:
=p.generate(mw.getCurrentFrame():newChild{title="whatever",args={"kwilliehda", "phon"}})
]]