Module:descendants

From Laenkea
Jump to navigation Jump to search

Underlies {{descendant}} and {{descendant tree}}.


local export = {}
local getArgs = require("Module:Arguments").getArgs
local m_links = require("Module:links")
local m_languages = require("Module:languages")
local m_inline = require("Module:inline")
local m_table = require("Module:table")
local see_further_text = "see there for further descendants"

function export.descendant(frame)
	local args = getArgs(frame)
	local out = ""
	local lang = args[1]
	if mw.ustring.find(lang, "[%>%_]") then
		lang = mw.ustring.match(lang, "[%>%_]([^%>%_]+)$")
	end
	if mw.ustring.find(lang, "[0-9]+$") then
		lang = mw.ustring.gsub(lang, "[0-9]+$", "")
	end
	lang = m_languages.get_by_code(lang)
	local borrowed = args["borrowed"] or args["bor"] or args["b"]
	local see_desc = args["see"]
	local noname = args["noname"]
	local lostcap = args["lost"] or "lost"
	
	if borrowed then out = "<span class=\"desc-arrow\" title=\"borrowed\">→</span>" .. out end
	if not noname then out = out .. lang.name .. ":&nbsp;" end
	
	if args[2] then
		local terms = {}
		local i = 2
		while args[i] do
			local term, term_args = m_inline.parse(args[i])
			table.insert(terms, m_links.full_link({
					language = lang,
					term = term,
					alt = term_args.alt,
					anchor = term_args.anchor or term_args.a,
					gloss = term_args.t,
					pos = term_args.pos,
					nolink = term_args.nolink,
					hypo = term_args.hypo or term_args.hypothetical,
					nobold = true,
				})
			)
			i = i + 1	
		end
		out = out .. table.concat(terms, ", ")
		if see_desc then out = out .. " (''" .. see_further_text .. "'')" end
	else
		out = out .. "— (''" .. lostcap .. "'')"
	end
	
	return out
end

function export.langtree(list)

	-- build in-set ancestry
	local ancestors = {}
	for _, lang in ipairs(list) do
		
		local function mismatch_check(base, test) -- function for processing doublets
			for _, test_index in ipairs(list) do
				if mw.ustring.find(test_index, "[0-9]+$") and mw.ustring.gsub(test_index, "[0-9]+$", "") == test and not m_table.contains(list, base .. mw.ustring.match(test_index, "[0-9]+$")) then
					mw.addWarning("Multiple possible ancestors found for [" .. lang .. "] and mismatch of corresponding descendants detected ([" .. test_index .. "] but no [" .. base .. mw.ustring.match(test_index, "[0-9]+$") .. "]); please double-check numbering and identical code calls, as these may not have been properly triaged.")
				end
			end
		end
		
		-- process ancestors
		ancestors[lang] = {}
		
		if mw.ustring.find(lang, "[%>%_]") then
		
			-- process ad hoc ancestors
			local ancestor_path = ""
			for piece, delim in mw.ustring.gmatch(lang, "([^%>%_]+)([%>%_])") do
				ancestor_path = ancestor_path .. piece
				if mw.ustring.match(lang, "^%>") then
					ancestor_path = ">" .. ancestor_path	
				end
				table.insert(ancestors[lang], ancestor_path)
				ancestor_path = ancestor_path .. delim
			end
		
		else -- if ad hoc ancestors unspecified, i.e. the code does not contain the path symbols > or _
			
			if mw.ustring.find(lang, "[0-9]+$") then
				local lang_temp, index = mw.ustring.match(lang, "^([^0-9]+)([0-9]+)$")
				for _, test in ipairs(list) do
					if mw.ustring.find(test, "[^0-9]" .. index .. "$") then -- matches existing historical doublets with identical, e.g. rad-o2, if rad2 is present
						local test_temp = mw.ustring.gsub(test, index .. "$", "") -- get base
						if m_languages.get_by_code(lang_temp).lineage[test_temp] then
							table.insert(ancestors[lang], test)
						end
					elseif m_languages.get_by_code(lang_temp).lineage[test] and not m_table.contains(list, test .. index) then -- if test is still an ancestor to current language, e.g. rad-o, if rad2 is present, and checks if rad-o2 is not present
						mismatch_check(lang_temp, test)
						table.insert(ancestors[lang], test)
					end
				end
			else
				for _, test in ipairs(list) do
					if m_languages.get_by_code(lang).lineage[test] then
						mismatch_check(lang, test)
						table.insert(ancestors[lang], test)
					end
				end
			end
		end
	end
	
	local structure = {}
	for lang, _ in pairs(ancestors) do
		if #ancestors[lang] == 0 then
			structure[lang] = {}
			ancestors[lang] = nil
		end
	end
	
	local swept = {}
	local function search_and_sort(superordinate)
		for mother, it in pairs(superordinate) do
			for lang, lang_ancestors in pairs(ancestors) do
				for i, lang_ancestor in ipairs(ancestors[lang]) do
					if swept[lang_ancestor] then table.remove(ancestors[lang], i) end
				end
			end
			for lang, lang_ancestors in pairs(ancestors) do
				if #lang_ancestors == 1 and lang_ancestors[1] == mother then
					superordinate[mother][lang] = {}
					swept[mother] = true
					ancestors[lang] = nil
				end
			end
			search_and_sort(it)
		end
	end
	
	search_and_sort(structure)
	return structure

end

function export.desctree(frame)
	local args = getArgs(frame)
	local out = ""
	local current_lang, code
	local terms = {}
	local langs = {}
	local indenter = args["indent"] or args["in"] or ""
	
	for i, term in ipairs(args) do --ryn:term1|term2|term3|rad:term1|term2 ...
		local lang_change = mw.ustring.match(term, "^([^%:%<]+)%:") or mw.ustring.match(term, "^(.+%>)%:")
		if i == 1 and lang_change == nil then error("No language code in the first parameter") end
		if lang_change then
			term = mw.ustring.sub(term, mw.ustring.len(lang_change) + 2)
			
			local auto_ancestor, auto_bor = mw.ustring.sub(lang_change, 1, 1) == "_", mw.ustring.sub(lang_change, 1, 2) == ">>"
			if auto_ancestor or auto_bor then
				if code == nil then error("No preceding language code specified") end
				if auto_bor then lang_change = mw.ustring.sub(lang_change, 2) end
				lang_change = code .. lang_change
			end
			
			local lang_args = {}
			code, lang_args = m_inline.parse(lang_change)
			-- if new language, build table
			if terms[code] then
				local base_code, old_i = mw.ustring.match(code, "^(.*[^0-9])([0-9]*)$")
				local new_i = 2
				while terms[base_code .. new_i] do
					new_i = new_i + 1
				end
				code = base_code .. new_i
				mw.addWarning("Duplicate code detected: converted [" .. base_code .. (old_i or "") .. "] to [" .. code .. "]")
			end
			terms[code] = {}
			table.insert(langs, code)
			
			current_lang = code
			
			local delim
			lang_args["from"], delim, current_lang = mw.ustring.match(current_lang, "^(.*)([%>%_])([^%>%_%:0-9]+)[0-9]*$")
			if delim and delim == ">" then lang_args["bor"] = true end
			-- process arguments
			for lang_arg, lang_val in pairs(lang_args) do
		    	terms[code][lang_arg] = lang_val
			end
		end
		table.insert(terms[code], term)
	end
	
	local items = {}
	
	for lang, lang_args in pairs(terms) do
		table.insert(lang_args, 1, lang)
		items[lang] = export.descendant(lang_args)
	end
	
	-- BEGIN TRIAGE --

	local structure = export.langtree(langs)

	local outs = {}
	
	local it = 0
	function process_structure(tab)
		it = it + 1
		
		local sorter = {}
		local sorter_proto = {}
		local sorter_bor = {}
		local sorter_bor_proto = {}
		for key, _ in pairs(tab) do
			local is_proto = mw.ustring.find(key, "%-pro[0-9]*$")
			local is_borrowed = mw.ustring.find(key, "%>[^%>%_%:]+$")
			if is_proto then
				if is_borrowed then table.insert(sorter_bor_proto, key) else table.insert(sorter_proto, key) end
			else
				if is_borrowed then table.insert(sorter_bor, key) else table.insert(sorter, key) end
			end
		end
		table.sort(sorter)
		table.sort(sorter_proto)
		table.sort(sorter_bor)
		table.sort(sorter_bor_proto)
		for _, key in ipairs(sorter_bor) do table.insert(sorter, key) end
		for _, key in ipairs(sorter_proto) do table.insert(sorter, key) end
		for _, key in ipairs(sorter_bor_proto) do table.insert(sorter, key) end
		
		for _, key in ipairs(sorter) do
			local out = " " .. items[key]
			for i = 1, it do out = "*" .. out end -- this asterisk is for list formatting
			out = indenter .. out
			table.insert(outs, out)
			process_structure(tab[key])
		end
		it = it - 1
	end
	
	process_structure(structure)
	
	local render = table.concat(outs, "\n")
	
	render = mw.ustring.gsub(render, "^%s", "")
	
	return render
	
end

return export

--[[
Debug console test string:
=p.descendant(mw.getCurrentFrame():newChild{title="whatever",args={"rad"}})
=p.desctree(mw.getCurrentFrame():newChild{title="whatever",args={"ryn>rad-o:1", "aeg:2", "3", "ryn:4", "ryn>rad-o_rad:5", ">rad-pro:6", "hrd-pro:7", "ryn-o:8", "lfv-pro:9", "lfv-o:10", "lfv:11", "rad-pro:12", "kil<lost:oh no>:", "bal<see>:13"}})
mw.logObject(p.langtree({"rad", "rad-o", "ryn", "hrd-pro", "lfv"}))
]]--