Module:lfv-IPA

local export = {}
local getArgs = require('Module:Arguments').getArgs

-- DATA --

--[[change the character you want to use to mark secondary stress in the input→]] local secondary_stress_marker = "_"

local data = {
	["a"] = "a", ["e"] = "e", ["ê"] = "ɛ",
	["i"] = "i", ["o"] = "o", ["ô"] = "ɔ",
	["ø"] = "ø", ["u"] = "u", ["y"] = "y",
	
	["b"] = "b",
	["d"] = "d",
	["ð"] = "ð",
	["f"] = "f",
	["g"] = "ɡ",
	["ǧ"] = "ɣ",
	["h"] = "x",
	["j"] = "j",
	["k"] = "k",
	["l"] = "l",
	["lj"] = "ʎ",
	["m"] = "m",
	["n"] = "n",
	["nj"] = "ɲ",
	["ñ"] = "ŋ",
	["p"] = "p",
	["r"] = "r",
	["s"] = "s",
	["š"] = "ʃ",
	["t"] = "t",
	["þ"] = "θ",
	["v"] = "v",
	["w"] = "w",
	["ŵ"] = "ɥ",
	["z"] = "z",
	["ž"] = "ʒ",
	
	["hl"] = "ɬ",
	["hlj"] = "ʎ̥",
	["hm"] = "m̥",
	["hn"] = "n̥",
	["hnj"] = "ɲ̊",
	["hñ"] = "ŋ̊",
	["hr"] = "r̥",
	
	["tl"] = "ɬ",
	["dl"] = "ɮ",
	["ls"] = "ɬ",
	["lz"] = "ɮ",
	["lš"] = "ɬɬ",
	["lž"] = "ɮɮ",
	
	["tˌl"] = "ˌɬ",
	["dˌl"] = "ˌɮ",
	["lˌs"] = "ˌɬ",
	["lˌz"] = "ˌɮ",
	["lˌš"] = "ɬˌɬ",
	["lˌž"] = "ɮˌɮ",

	["l·s"] = "ɬɬ",
	["l·z"] = "ɮɮ",
	["llj"] = "ʎʎ",
	["nnj"] = "ɲɲ",
	["rtl"] = "ɬɬ",
	["rdl"] = "ɮɮ",
	["ltl"] = "ɬɬ",
	["ldl"] = "ɮɮ",
	
	["l·ˌs"] = "ɬˌɬ",
	["l·ˌz"] = "ɮˌɮ",
	["lˌlj"] = "ʎˌʎ",
	["nˌnj"] = "ɲˌɲ",
	["rtˌl"] = "ɬˌɬ",
	["rdˌl"] = "ɮˌɮ",
	["ltˌl"] = "ɬˌɬ",
	["ldˌl"] = "ɮˌɮ",

	["lhl"] = "ɬɬ",
	["lhlj"] = "ʎʎ",
	["mhm"] = "mm",
	["nhm"] = "mm",
	["nhn"] = "nn",
	["nhnj"] = "ɲɲ",
	["ñhñ"] = "ŋŋ",
	["rhr"] = "rr",
	["nm"] = "mm",
	
	["lˌhl"] = "ɬˌɬ",
	["lˌhlj"] = "ʎˌʎ",
	["mˌhm"] = "mˌm",
	["nˌhm"] = "mˌm",
	["nˌhn"] = "nˌn",
	["nˌhnj"] = "ɲˌɲ",
	["ñˌhñ"] = "ŋˌŋ",
	["rˌhr"] = "rˌr",
	["nˌm"] = "mˌm",
	
	[" "] = " ",
	["."] = "|",
	
	["ˌ"] = "ˌ",
}
data[","] = data["."]

local affixes = mw.loadData("Module:lfv-IPA/affixes")
local prefixes, suffixes = affixes.prefixes, affixes.suffixes

local vowel = {
	["a"] = true,
	["e"] = true,
	["ɛ"] = true,
	["i"] = true,
	["o"] = true,
	["ɔ"] = true,
	["ø"] = true,
	["u"] = true,
	["y"] = true,
}
local ortho_vowels = "aeêioôøuy"
for v, _ in pairs(vowel) do
	vowel["ˈ" .. v] = true	
end

local consonant = {
	["b"] = true,
	["d"] = true,
	["ð"] = true,
	["f"] = true,
	["ɡ"] = true,
	["ɣ"] = true,
	["x"] = true,
	["j"] = true,
	["k"] = true,
	["l"] = true,
	["ʎ"] = true,
	["m"] = true,
	["n"] = true,
	["ɲ"] = true,
	["ŋ"] = true,
	["p"] = true,
	["r"] = true,
	["s"] = true,
	["ʃ"] = true,
	["t"] = true,
	["θ"] = true,
	["v"] = true,
	["w"] = true,
	["ɥ"] = true,
	["z"] = true,
	["ʒ"] = true,
	["ɬ"] = true,
	["ɮ"] = true,
}
for c, _ in pairs(consonant) do
	consonant["ˈ" .. c] = true	
end
local ortho_consonants = "bdðfgǧhjklmnñprsštþvwŵzž"
local ortho_digraphs = {"hlj", "hl", "hm", "hnj", "hn", "hñ", "hr","lj", "ls","lz","nj"}

local boundary = {
	[" "] = true,
	["|"] = true,
}

local intervocalic_voicing = {
	["ʎ̥"] = "ʎʎ",
	["m̥"] = "mm",
	["n̥"] = "nn",
	["ɲ̊"] = "ɲɲ",
	["ŋ̊"] = "ŋŋ",
	["r̥"] = "rr",
	
	["ˌʎ̥"] = "ʎˌʎ",
	["ˌm̥"] = "mˌm",
	["ˌn̥"] = "nˌn",
	["ˌɲ̊"] = "ɲˌɲ",
	["ˌŋ̊"] = "ŋˌŋ",
	["ˌr̥"] = "rˌr",
}

local devoice = {
	["d"] = "d̥",
	["ð"] = "ð̥",
	["ɡ"] = "ɡ̊",
	["ɣ"] = "ɣ̊",
	["l"] = "l̥",
	["ʎ"] = "ʎ̥",
	["r"] = "r̥",
	["z"] = "z̥",
	["ʒ"] = "ʒ̊",
	["ɮ"] = "ɮ̊",
}

local voiceless = {
	["f"] = true,
	["x"] = true,
	["k"] = true,
	["p"] = true,
	["s"] = true,
	["ʃ"] = true,
	["t"] = true,
	["θ"] = true,
	["ɬ"] = true,
}

local velar = {
	["ɡ"] = true,
	["ɣ"] = true,
	["x"] = true,
	["k"] = true,
}

function generate_IPA(word, phon, col, nopref, nosuff)

	local working_IPA = {}
	local is_affix = false
	
	word = word:gsub(secondary_stress_marker, "ˌ")
	
	-- auto secondary stress --
	if phon then
		if nopref ~= true then
			for _, pref in ipairs(prefixes) do
				if mw.ustring.find(word, "^" .. pref) then
					word = mw.ustring.gsub(word, "^" .. pref, pref .. "ˌ")
					break	
				end
			end
		end
		if nosuff ~= true then
			for _, suff in ipairs(suffixes) do
				if mw.ustring.find(word, suff .. "$") then
					word = mw.ustring.gsub(word, suff .. "$", "ˌ" .. suff)
					break	
				end
			end
		end
		for _, digraph in ipairs(ortho_digraphs) do
			word = mw.ustring.gsub(word, digraph .. "ˌ([" .. ortho_vowels .. "])", "ˌ" .. digraph .. "%1")
		end
		word = mw.ustring.gsub(word, "([" .. ortho_consonants .. "])ˌ([" .. ortho_vowels .. "])", "ˌ%1%2")
	end
	
	-- base generation -- 
	while #word > 0 do
		if data[mw.ustring.sub(word, 1, 5)] then
			table.insert(working_IPA, data[mw.ustring.sub(word, 1, 5)])
			word = mw.ustring.sub(word, 6)
		elseif data[mw.ustring.sub(word, 1, 4)] then
			table.insert(working_IPA, data[mw.ustring.sub(word, 1, 4)])
			word = mw.ustring.sub(word, 5)
		elseif data[mw.ustring.sub(word, 1, 3)] then
			table.insert(working_IPA, data[mw.ustring.sub(word, 1, 3)])
			word = mw.ustring.sub(word, 4)
		elseif data[mw.ustring.sub(word, 1, 2)] then
			table.insert(working_IPA, data[mw.ustring.sub(word, 1, 2)])
			word = mw.ustring.sub(word, 3)
		elseif data[mw.ustring.sub(word, 1, 1)] then
			table.insert(working_IPA, data[mw.ustring.sub(word, 1, 1)])
			word = mw.ustring.sub(word, 2)
		elseif mw.ustring.sub(word, 1, 1) == "-" then
			if #working_IPA > 0 and #word > 1 then			-- if not at the beginning and not at the end
				table.insert(working_IPA, "ˌ")
			else
				table.insert(working_IPA, "-")
				is_affix = true
			end
			word = mw.ustring.sub(word, 2)
		elseif mw.ustring.sub(word, 1, 1) == "·" then
			word = mw.ustring.sub(word, 2)
		else
			error("The character '" .. mw.ustring.sub(word, 1, 1) .. "' is not recognised.")
		end
	end
	
	word = word:gsub("ˌˌ+", "ˌ")
	
	local i = 1
	local secondary_stress = {}
	
	while working_IPA[i] do
		if working_IPA[i] == "ˌ" then
			table.insert(secondary_stress, i)
			table.remove(working_IPA, i)
			i = i - 1
		elseif working_IPA[i] == " " and boundary[working_IPA[i-1]] then
			table.remove(working_IPA, i)
			i = i - 1
		end
		i = i + 1
	end
	
	-- resolve --
	
	i = 1
	
	while working_IPA[i] do
	
		local p_prev = working_IPA[i-1]
		local p_current = working_IPA[i]
		local p_next = working_IPA[i+1]
		
		local function p_resolve(phone)
			working_IPA[i] = phone
			p_prev = working_IPA[i-1]
			p_current = working_IPA[i]
			p_next = working_IPA[i+1]
		end
		
		local function remove_next(check)
			if check then
				table.remove(working_IPA, i+1)
				p_next = working_IPA[i+1]
				for index, position in ipairs(secondary_stress) do
					if position > i + 1 then
						secondary_stress[index] = secondary_stress[index] - 1
					end
				end
			end
		end
		
		mw.logObject(working_IPA)
		
		-- intervocalic voicing and gemination of voiceless sonorants --
		if vowel[p_prev] and intervocalic_voicing[p_current] ~= nil and vowel[p_next] then
			p_resolve(intervocalic_voicing[p_current])
		end
		
		-- account for <CCu/iV> spelling --
		if consonant[p_prev] and working_IPA[i-2] == p_prev and vowel[p_next] then
			if p_current == "u" then
				p_resolve("w")
			elseif p_current == "i" then
				p_resolve("j")
			end
		end
		
		-- n → ŋ --
		if p_current == "n" and velar[p_next] then
			p_resolve("ŋ")
		end
		
		-- colloquial only --
		if col then
		
			if p_current == "r" and p_next == "ʃ" then
				p_resolve("ʃ")
			end
			
			if p_current == "l" and p_next == "l" and working_IPA[i+2] == "i" and vowel[working_IPA[i+3]] then
				p_resolve("ʎʎ")
				remove_next(true)
				remove_next(true)
			end
			
			if p_current == "n" and p_next == "n" and working_IPA[i+2] == "i" and vowel[working_IPA[i+3]] then
				p_resolve("ɲɲ")
				remove_next(true)
				remove_next(true)
			end
			
			if p_current == "x" and p_next == "d" then
				working_IPA[i+1] = "ð"
				if vowel[working_IPA[i-2]] and (p_prev == "i" or p_prev == "u") then
					table.remove(working_IPA, i)
					for index, position in ipairs(secondary_stress) do
						if position > i then
							secondary_stress[index] = secondary_stress[index] - 1
						end
					end
					working_IPA[i-2] = working_IPA[i-2] .. "ː"
					p_current = working_IPA[i]
					p_next = working_IPA[i+1]
				elseif vowel[p_prev] then
					p_resolve("ː")
				end
			end
		end
		
		-- phonetic only --
		if phon then
			
			-- terminal devoicing --
			
			if devoice[p_current] and (boundary[p_next] or p_next == nil) then
				if not (p_next == " " and vowel[working_IPA[i+2]]) then
					p_resolve(devoice[p_current])
				end
			end
			
			-- regressive devoicing --
			
			if p_current == "r" and voiceless[p_next] then
				p_resolve("r̥")
			elseif p_current == "l" and voiceless[p_next] then
				p_resolve("l̥")
			end
			
			-- /v/ --
			if p_current == "v" then
				if consonant[p_prev] and p_prev ~= "u̯" then
					p_resolve("ʋ")
				elseif (consonant[p_prev] or boundary[p_prev] or p_prev == nil) and consonant[p_next] then
					p_resolve("ʋ")
				elseif consonant[p_next] or boundary[p_next] or p_next == nil then
					p_resolve("u̯")
				end
			end
			
			-- /w/ --
			if p_current == "w" and p_prev ~= "u̯" then
				p_resolve("u̯")
			end
			
			-- /ɥ/ --
			if p_current == "ɥ" then
				p_resolve("y̯")
			end
			
			-- /j/ --
			if p_current == "j" and p_prev ~= "i̯" then
				p_resolve("i̯")
			end
			
			-- /xj/ --
			if p_current == "x" and p_next == "j" then
					p_resolve("ç")
			end
			
			-- V{i u} --
			if vowel[p_prev] and p_current == "i" then
					p_resolve("i̯")
			elseif vowel[p_prev] and p_current == "u" then
					p_resolve("u̯")
			end
			
			-- {i u} V --
			if vowel[p_next] and (p_current == "i" or p_current == "u") then
				local desyllabify = false
				
				for checker = 1, i-1 do
					if vowel[working_IPA[i - checker]] then
						desyllabify = true
						break
					elseif boundary[working_IPA[i - checker]] or working_IPA[i - checker] == nil then
						break
					end
				end
				
				if desyllabify then
					if p_current == "u" then
						p_resolve("u̯")
					elseif p_current == "i" then
						p_resolve("i̯")
					end
				end
			end
			
			-- stress --
			if (boundary[p_prev] or p_prev == nil) and is_affix == false then
				p_resolve("ˈ" .. p_current)
			end
			
		end
		
		i = i + 1
	end
	
	-- secondary stress --
	if phon then
		while secondary_stress[1] do
			table.insert(working_IPA, secondary_stress[1], "ˌ")
			table.remove(secondary_stress,1)
			for no, value in ipairs(secondary_stress) do
				secondary_stress[no] = value + 1
			end
		end
	end
	
	working_IPA = table.concat(working_IPA)
	
	if phon then
		
		-- /xd/ /xɡ/ --
		if mw.ustring.find(working_IPA, "(x[dɡ][̥̊])") then
			working_IPA = mw.ustring.gsub(working_IPA, "(x)([dɡ][̥̊])", "h%2")
		elseif mw.ustring.find(working_IPA, "(x[dɡ])") then
			working_IPA = mw.ustring.gsub(working_IPA, "(x)([dɡ])", "h%2") .. "~" .. mw.ustring.gsub(working_IPA, "(x)([dɡ])", "ɦ%2")
		end
	
	end
	
	working_IPA = mw.ustring.gsub(working_IPA, "%|", " | ")
	
	return working_IPA
	
end

function export.generate(frame)
	local args = getArgs(frame)
	local outputIPA = ""
	
	local parameters = {}
	local valid_parameter = {
		["phon"] = true,
		["col"] = true,
		["colloquial"] = true,
		["nolarge"] = true,
		["cite"] = true,
		["nopref"] = true,
		["nosuff"] = true,
	}
	local p = 2
	
	if valid_parameter[args[1]] or args[1] == nil then
		outputIPA = mw.title.getCurrentTitle().subpageText
		p = 1
	else
		outputIPA = args[1]
	end
	
	-- mw.log("——— Parameters ———")
	while args[p] do
		parameters[args[p]] = true
		-- mw.log(args[p] .. " = true")
		p = p + 1
	end
	
	if parameters["col"] or parameters["colloquial"] then
		parameters["phon"] = true
	end
	
	local nolarge = ""
	if parameters["nolarge"] then
		nolarge = " nolarge"
	end
	
	outputIPA = mw.ustring.gsub(outputIPA, "(%&nbsp%;)", " ")
	outputIPA = mw.ustring.lower(outputIPA)
	
	if parameters["cite"] then
		local broad = generate_IPA(outputIPA, false, false, parameters["nopref"] or false, parameters["nosuff"] or false)
		local narrow = generate_IPA(outputIPA, true, parameters["colloquial"] or parameters["col"] or false, parameters["nopref"] or false, parameters["nosuff"] or false)
		
		if "ˈ" .. broad ~= narrow then
			outputIPA = "<span class=\"IPA" .. nolarge .. "\">/" .. broad .. "/ [" .. narrow .. "]</span>"
		else
			outputIPA = "<span class=\"IPA" .. nolarge .. "\">/" .. broad .. "/</span>"
		end
		
	else
		outputIPA = generate_IPA(outputIPA, parameters["phon"] or false, parameters["colloquial"] or parameters["col"] or false, parameters["nopref"] or false, parameters["nosuff"] or false)
		
		if parameters["phon"] and parameters["format"] then
			outputIPA = "<span class=\"IPA" .. nolarge .. "\">[" .. outputIPA .. "]</span>"
		elseif parameters["format"] then
			outputIPA = "<span class=\"IPA" .. nolarge .. "\">/" .. outputIPA .. "/</span>"
		end
	end
	
	-- mw.log(outputIPA)
	return outputIPA
	
end

return export

--[[
Debug console test string:
=p.generate(mw.getCurrentFrame():newChild{title="whatever",args={"kwilliehda", "phon"}})
]]