Module:descendants: Difference between revisions

From Laenkea
Jump to navigation Jump to search
(Fixed bug in which descendants of direct borrowings were not showing up)
No edit summary
 
(One intermediate revision by the same user not shown)
Line 1: Line 1:
local export = {}
local export = {}
local getArgs = require("Module:Arguments").getArgs
local getArgs = require("Module:Arguments").getArgs
Line 79: Line 77:
for piece, delim in mw.ustring.gmatch(lang, "([^%>%_]+)([%>%_])") do
for piece, delim in mw.ustring.gmatch(lang, "([^%>%_]+)([%>%_])") do
ancestor_path = ancestor_path .. piece
ancestor_path = ancestor_path .. piece
if mw.ustring.gmatch(lang, "^%>") then
if mw.ustring.match(lang, "^%>") then
ancestor_path = ">" .. ancestor_path
ancestor_path = ">" .. ancestor_path
end
end

Latest revision as of 20:50, 13 July 2024

Underlies {{descendant}} and {{descendant tree}}.


local export = {}
local getArgs = require("Module:Arguments").getArgs
local m_links = require("Module:links")
local m_languages = require("Module:languages")
local m_inline = require("Module:inline")
local m_table = require("Module:table")
local see_further_text = "see there for further descendants"

function export.descendant(frame)
	local args = getArgs(frame)
	local out = ""
	local lang = args[1]
	if mw.ustring.find(lang, "[%>%_]") then
		lang = mw.ustring.match(lang, "[%>%_]([^%>%_]+)$")
	end
	if mw.ustring.find(lang, "[0-9]+$") then
		lang = mw.ustring.gsub(lang, "[0-9]+$", "")
	end
	lang = m_languages.get_by_code(lang)
	local borrowed = args["borrowed"] or args["bor"] or args["b"]
	local see_desc = args["see"]
	local noname = args["noname"]
	local lostcap = args["lost"] or "lost"
	
	if borrowed then out = "<span class=\"desc-arrow\" title=\"borrowed\">→</span>" .. out end
	if not noname then out = out .. lang.name .. ":&nbsp;" end
	
	if args[2] then
		local terms = {}
		local i = 2
		while args[i] do
			local term, term_args = m_inline.parse(args[i])
			table.insert(terms, m_links.full_link({
					language = lang,
					term = term,
					alt = term_args.alt,
					anchor = term_args.anchor or term_args.a,
					gloss = term_args.t,
					pos = term_args.pos,
					nolink = term_args.nolink,
					hypo = term_args.hypo or term_args.hypothetical,
					nobold = true,
				})
			)
			i = i + 1	
		end
		out = out .. table.concat(terms, ", ")
		if see_desc then out = out .. " (''" .. see_further_text .. "'')" end
	else
		out = out .. "— (''" .. lostcap .. "'')"
	end
	
	return out
end

function export.langtree(list)

	-- build in-set ancestry
	local ancestors = {}
	for _, lang in ipairs(list) do
		
		local function mismatch_check(base, test) -- function for processing doublets
			for _, test_index in ipairs(list) do
				if mw.ustring.find(test_index, "[0-9]+$") and mw.ustring.gsub(test_index, "[0-9]+$", "") == test and not m_table.contains(list, base .. mw.ustring.match(test_index, "[0-9]+$")) then
					mw.addWarning("Multiple possible ancestors found for [" .. lang .. "] and mismatch of corresponding descendants detected ([" .. test_index .. "] but no [" .. base .. mw.ustring.match(test_index, "[0-9]+$") .. "]); please double-check numbering and identical code calls, as these may not have been properly triaged.")
				end
			end
		end
		
		-- process ancestors
		ancestors[lang] = {}
		
		if mw.ustring.find(lang, "[%>%_]") then
		
			-- process ad hoc ancestors
			local ancestor_path = ""
			for piece, delim in mw.ustring.gmatch(lang, "([^%>%_]+)([%>%_])") do
				ancestor_path = ancestor_path .. piece
				if mw.ustring.match(lang, "^%>") then
					ancestor_path = ">" .. ancestor_path	
				end
				table.insert(ancestors[lang], ancestor_path)
				ancestor_path = ancestor_path .. delim
			end
		
		else -- if ad hoc ancestors unspecified, i.e. the code does not contain the path symbols > or _
			
			if mw.ustring.find(lang, "[0-9]+$") then
				local lang_temp, index = mw.ustring.match(lang, "^([^0-9]+)([0-9]+)$")
				for _, test in ipairs(list) do
					if mw.ustring.find(test, "[^0-9]" .. index .. "$") then -- matches existing historical doublets with identical, e.g. rad-o2, if rad2 is present
						local test_temp = mw.ustring.gsub(test, index .. "$", "") -- get base
						if m_languages.get_by_code(lang_temp).lineage[test_temp] then
							table.insert(ancestors[lang], test)
						end
					elseif m_languages.get_by_code(lang_temp).lineage[test] and not m_table.contains(list, test .. index) then -- if test is still an ancestor to current language, e.g. rad-o, if rad2 is present, and checks if rad-o2 is not present
						mismatch_check(lang_temp, test)
						table.insert(ancestors[lang], test)
					end
				end
			else
				for _, test in ipairs(list) do
					if m_languages.get_by_code(lang).lineage[test] then
						mismatch_check(lang, test)
						table.insert(ancestors[lang], test)
					end
				end
			end
		end
	end
	
	local structure = {}
	for lang, _ in pairs(ancestors) do
		if #ancestors[lang] == 0 then
			structure[lang] = {}
			ancestors[lang] = nil
		end
	end
	
	local swept = {}
	local function search_and_sort(superordinate)
		for mother, it in pairs(superordinate) do
			for lang, lang_ancestors in pairs(ancestors) do
				for i, lang_ancestor in ipairs(ancestors[lang]) do
					if swept[lang_ancestor] then table.remove(ancestors[lang], i) end
				end
			end
			for lang, lang_ancestors in pairs(ancestors) do
				if #lang_ancestors == 1 and lang_ancestors[1] == mother then
					superordinate[mother][lang] = {}
					swept[mother] = true
					ancestors[lang] = nil
				end
			end
			search_and_sort(it)
		end
	end
	
	search_and_sort(structure)
	return structure

end

function export.desctree(frame)
	local args = getArgs(frame)
	local out = ""
	local current_lang, code
	local terms = {}
	local langs = {}
	local indenter = args["indent"] or args["in"] or ""
	
	for i, term in ipairs(args) do --ryn:term1|term2|term3|rad:term1|term2 ...
		local lang_change = mw.ustring.match(term, "^([^%:%<]+)%:") or mw.ustring.match(term, "^(.+%>)%:")
		if i == 1 and lang_change == nil then error("No language code in the first parameter") end
		if lang_change then
			term = mw.ustring.sub(term, mw.ustring.len(lang_change) + 2)
			
			local auto_ancestor, auto_bor = mw.ustring.sub(lang_change, 1, 1) == "_", mw.ustring.sub(lang_change, 1, 2) == ">>"
			if auto_ancestor or auto_bor then
				if code == nil then error("No preceding language code specified") end
				if auto_bor then lang_change = mw.ustring.sub(lang_change, 2) end
				lang_change = code .. lang_change
			end
			
			local lang_args = {}
			code, lang_args = m_inline.parse(lang_change)
			-- if new language, build table
			if terms[code] then
				local base_code, old_i = mw.ustring.match(code, "^(.*[^0-9])([0-9]*)$")
				local new_i = 2
				while terms[base_code .. new_i] do
					new_i = new_i + 1
				end
				code = base_code .. new_i
				mw.addWarning("Duplicate code detected: converted [" .. base_code .. (old_i or "") .. "] to [" .. code .. "]")
			end
			terms[code] = {}
			table.insert(langs, code)
			
			current_lang = code
			
			local delim
			lang_args["from"], delim, current_lang = mw.ustring.match(current_lang, "^(.*)([%>%_])([^%>%_%:0-9]+)[0-9]*$")
			if delim and delim == ">" then lang_args["bor"] = true end
			-- process arguments
			for lang_arg, lang_val in pairs(lang_args) do
		    	terms[code][lang_arg] = lang_val
			end
		end
		table.insert(terms[code], term)
	end
	
	local items = {}
	
	for lang, lang_args in pairs(terms) do
		table.insert(lang_args, 1, lang)
		items[lang] = export.descendant(lang_args)
	end
	
	-- BEGIN TRIAGE --

	local structure = export.langtree(langs)

	local outs = {}
	
	local it = 0
	function process_structure(tab)
		it = it + 1
		
		local sorter = {}
		local sorter_proto = {}
		local sorter_bor = {}
		local sorter_bor_proto = {}
		for key, _ in pairs(tab) do
			local is_proto = mw.ustring.find(key, "%-pro[0-9]*$")
			local is_borrowed = mw.ustring.find(key, "%>[^%>%_%:]+$")
			if is_proto then
				if is_borrowed then table.insert(sorter_bor_proto, key) else table.insert(sorter_proto, key) end
			else
				if is_borrowed then table.insert(sorter_bor, key) else table.insert(sorter, key) end
			end
		end
		table.sort(sorter)
		table.sort(sorter_proto)
		table.sort(sorter_bor)
		table.sort(sorter_bor_proto)
		for _, key in ipairs(sorter_bor) do table.insert(sorter, key) end
		for _, key in ipairs(sorter_proto) do table.insert(sorter, key) end
		for _, key in ipairs(sorter_bor_proto) do table.insert(sorter, key) end
		
		for _, key in ipairs(sorter) do
			local out = " " .. items[key]
			for i = 1, it do out = "*" .. out end -- this asterisk is for list formatting
			out = indenter .. out
			table.insert(outs, out)
			process_structure(tab[key])
		end
		it = it - 1
	end
	
	process_structure(structure)
	
	local render = table.concat(outs, "\n")
	
	render = mw.ustring.gsub(render, "^%s", "")
	
	return render
	
end

return export

--[[
Debug console test string:
=p.descendant(mw.getCurrentFrame():newChild{title="whatever",args={"rad"}})
=p.desctree(mw.getCurrentFrame():newChild{title="whatever",args={"ryn>rad-o:1", "aeg:2", "3", "ryn:4", "ryn>rad-o_rad:5", ">rad-pro:6", "hrd-pro:7", "ryn-o:8", "lfv-pro:9", "lfv-o:10", "lfv:11", "rad-pro:12", "kil<lost:oh no>:", "bal<see>:13"}})
mw.logObject(p.langtree({"rad", "rad-o", "ryn", "hrd-pro", "lfv"}))
]]--