Module:descendants: Difference between revisions

From Laenkea
Jump to navigation Jump to search
No edit summary
No edit summary
 
(12 intermediate revisions by the same user not shown)
Line 4: Line 4:
local m_languages = require("Module:languages")
local m_languages = require("Module:languages")
local m_inline = require("Module:inline")
local m_inline = require("Module:inline")
local m_table = require("Module:table")
local see_further_text = "see there for further descendants"


function export.descendant(frame)
function export.descendant(frame)
Line 36: Line 38:
gloss = term_args.t,
gloss = term_args.t,
pos = term_args.pos,
pos = term_args.pos,
nolink = term_args.nolink,
hypo = term_args.hypo or term_args.hypothetical,
nobold = true,
nobold = true,
})
})
Line 42: Line 46:
end
end
out = out .. table.concat(terms, ", ")
out = out .. table.concat(terms, ", ")
if see_desc then out = out .. " (''see there for further descendants'')" end
if see_desc then out = out .. " (''" .. see_further_text .. "'')" end
else
else
out = out .. "— (''" .. lostcap .. "'')"
out = out .. "— (''" .. lostcap .. "'')"
Line 51: Line 55:


function export.langtree(list)
function export.langtree(list)
 
-- build in-set ancestry
-- build in-set ancestry
local ancestors = {}
local ancestors = {}
for _, lang in ipairs(list) do
for _, lang in ipairs(list) do
local function mismatch_check(base, test) -- function for processing doublets
for _, test_index in ipairs(list) do
if mw.ustring.find(test_index, "[0-9]+$") and mw.ustring.gsub(test_index, "[0-9]+$", "") == test and not m_table.contains(list, base .. mw.ustring.match(test_index, "[0-9]+$")) then
mw.addWarning("Multiple possible ancestors found for [" .. lang .. "] and mismatch of corresponding descendants detected ([" .. test_index .. "] but no [" .. base .. mw.ustring.match(test_index, "[0-9]+$") .. "]); please double-check numbering and identical code calls, as these may not have been properly triaged.")
end
end
end
-- process ancestors
ancestors[lang] = {}
ancestors[lang] = {}
if mw.ustring.find(lang, "[%>%_]") then
if mw.ustring.find(lang, "[%>%_]") then
local ancestor_temp = ""
for piece in mw.ustring.gmatch(lang, "([^%>%_]+[%>%_])") do
-- process ad hoc ancestors
ancestor_temp = ancestor_temp .. piece
local ancestor_path = ""
ancestor_add = mw.ustring.gsub(ancestor_temp, "[%>%_]$", "")
for piece, delim in mw.ustring.gmatch(lang, "([^%>%_]+)([%>%_])") do
table.insert(ancestors[lang], ancestor_add)
ancestor_path = ancestor_path .. piece
if mw.ustring.match(lang, "^%>") then
ancestor_path = ">" .. ancestor_path
end
table.insert(ancestors[lang], ancestor_path)
ancestor_path = ancestor_path .. delim
end
end
elseif mw.ustring.find(lang, "[0-9]+$") then
local lang_temp, index = mw.ustring.match(lang, "^([^0-9]+)([0-9]+)$")
else -- if ad hoc ancestors unspecified, i.e. the code does not contain the path symbols > or _
for _, test in ipairs(list) do
if mw.ustring.find(test, index .. "$") then
if mw.ustring.find(lang, "[0-9]+$") then
local test_temp = mw.ustring.gsub(test, index .. "$", "")
local lang_temp, index = mw.ustring.match(lang, "^([^0-9]+)([0-9]+)$")
if m_languages.get_by_code(lang_temp).lineage[test_temp] then
for _, test in ipairs(list) do
if mw.ustring.find(test, "[^0-9]" .. index .. "$") then -- matches existing historical doublets with identical, e.g. rad-o2, if rad2 is present
local test_temp = mw.ustring.gsub(test, index .. "$", "") -- get base
if m_languages.get_by_code(lang_temp).lineage[test_temp] then
table.insert(ancestors[lang], test)
end
elseif m_languages.get_by_code(lang_temp).lineage[test] and not m_table.contains(list, test .. index) then -- if test is still an ancestor to current language, e.g. rad-o, if rad2 is present, and checks if rad-o2 is not present
mismatch_check(lang_temp, test)
table.insert(ancestors[lang], test)
table.insert(ancestors[lang], test)
end
end
elseif m_languages.get_by_code(lang_temp).lineage[test] then
end
local nope = false
else
for _, contain_test in ipairs(list) do
for _, test in ipairs(list) do
if contain_test == test .. index then nope = true end
if m_languages.get_by_code(lang).lineage[test] then
mismatch_check(lang, test)
table.insert(ancestors[lang], test)
end
end
if not nope then table.insert(ancestors[lang], test) end
end
end
else
for _, test in ipairs(list) do
if m_languages.get_by_code(lang).lineage[test] then
table.insert(ancestors[lang], test)
end
end
end
end
Line 129: Line 151:
for i, term in ipairs(args) do --ryn:term1|term2|term3|rad:term1|term2 ...
for i, term in ipairs(args) do --ryn:term1|term2|term3|rad:term1|term2 ...
local lang_change = mw.ustring.match(term, "^([^%:%<]+)%:")
local lang_change = mw.ustring.match(term, "^([^%:%<]+)%:") or mw.ustring.match(term, "^(.+%>)%:")
local lang_change_arged = mw.ustring.match(term, "^(.+%>)%:")
if i == 1 and lang_change == nil then error("No language code in the first parameter") end
local from_lang, index
if lang_change then
if i == 1 and not (lang_change or lang_change_arged) then error("Language code needs to be specified in the first parameter as xyz(<bor>):term₁") end
term = mw.ustring.sub(term, mw.ustring.len(lang_change) + 2)
if lang_change or lang_change_arged then
if lang_change then
local auto_ancestor, auto_bor = mw.ustring.sub(lang_change, 1, 1) == "_", mw.ustring.sub(lang_change, 1, 2) == ">>"
current_lang = lang_change
if auto_ancestor or auto_bor then
term = mw.ustring.gsub(term, "^[^%:]+%:%s*", "")
if code == nil then error("No preceding language code specified") end
elseif lang_change_arged then
if auto_bor then lang_change = mw.ustring.sub(lang_change, 2) end
current_lang = mw.ustring.match(lang_change_arged, "^[^%<]+")
lang_change = code .. lang_change
term = mw.ustring.gsub(term, "^.+%>%:%s*", "")
end
end
if mw.ustring.find(current_lang, "[%>%_][^%:]") then
from_lang, current_lang = mw.ustring.match(current_lang, "^(.*[%>%_])([^%>%_%:]+)$")
local lang_args = {}
if mw.ustring.match(from_lang, "%>$") then
code, lang_args = m_inline.parse(lang_change)
if lang_change_arged then
-- if new language, build table
lang_change_arged = lang_change_arged .. "<bor>"
if terms[code] then
else
local base_code, old_i = mw.ustring.match(code, "^(.*[^0-9])([0-9]*)$")
lang_change_arged = "<bor>"
local new_i = 2
end
while terms[base_code .. new_i] do
new_i = new_i + 1
end
end
code = base_code .. new_i
mw.addWarning("Duplicate code detected: converted [" .. base_code .. (old_i or "") .. "] to [" .. code .. "]")
end
end
if mw.ustring.find(current_lang, "[0-9]+$") then
terms[code] = {}
current_lang, index = mw.ustring.match(current_lang, "^(.*)([0-9]+)$")
table.insert(langs, code)
end
current_lang = m_languages.get_by_code(current_lang)
current_lang = code
code = current_lang.code
if from_lang then code = from_lang .. code end
local delim
if index then code = code .. index end
lang_args["from"], delim, current_lang = mw.ustring.match(current_lang, "^(.*)([%>%_])([^%>%_%:0-9]+)[0-9]*$")
if terms[code] == nil then
if delim and delim == ">" then lang_args["bor"] = true end
terms[code] = {}
-- process arguments
table.insert(langs, code)
for lang_arg, lang_val in pairs(lang_args) do
end
    terms[code][lang_arg] = lang_val
end
if lang_change_arged then
for lang_arg in mw.ustring.gmatch(lang_change_arged, "%<([^%>]+)%>") do
if mw.ustring.find(lang_arg, "%:") then
local before, after = mw.ustring.match(lang_arg, "^([^%:]+)%:([^%:]+)$")
terms[code][before] = after
else
terms[code][lang_arg] = true
end
end
end
end
end
if from_lang then terms[code]["from"] = from_lang end
table.insert(terms[code], term)
table.insert(terms[code], term)
end
end
Line 185: Line 199:
-- BEGIN TRIAGE --
-- BEGIN TRIAGE --
 
local structure = export.langtree(langs)
local structure = export.langtree(langs)
 
local outs = {}
local outs = {}
Line 199: Line 213:
local sorter_bor_proto = {}
local sorter_bor_proto = {}
for key, _ in pairs(tab) do
for key, _ in pairs(tab) do
if mw.ustring.find(key, "%-pro[0-9]*$") then
local is_proto = mw.ustring.find(key, "%-pro[0-9]*$")
if mw.ustring.find(key, "%>[^%>%_%:]+$") then
local is_borrowed = mw.ustring.find(key, "%>[^%>%_%:]+$")
table.insert(sorter_bor_proto, key)
if is_proto then
else
if is_borrowed then table.insert(sorter_bor_proto, key) else table.insert(sorter_proto, key) end
table.insert(sorter_proto, key)
end
else
else
if mw.ustring.find(key, "%>[^%>%_%:]+$") then
if is_borrowed then table.insert(sorter_bor, key) else table.insert(sorter, key) end
table.insert(sorter_bor, key)
else
table.insert(sorter, key)
end
end
end
end
end
Line 223: Line 231:
for _, key in ipairs(sorter) do
for _, key in ipairs(sorter) do
local out = " " .. items[key]
local out = " " .. items[key]
for i = 1, it do
for i = 1, it do out = "*" .. out end -- this asterisk is for list formatting
out = "*" .. out
end
out = indenter .. out
out = indenter .. out
table.insert(outs, out)
table.insert(outs, out)
Line 248: Line 254:
Debug console test string:
Debug console test string:
=p.descendant(mw.getCurrentFrame():newChild{title="whatever",args={"rad"}})
=p.descendant(mw.getCurrentFrame():newChild{title="whatever",args={"rad"}})
=p.desctree(mw.getCurrentFrame():newChild{title="whatever",args={"ryn>rad-o:1", "aeg:2", "3", "ryn:4", "ryn>rad-o_rad:5", ">rad-pro:6", "hrd-pro:7", "ryn-o:8", "lfv-pro:9", "lfv-o:10", "lfv:11", "rad-pro:12"}})
=p.desctree(mw.getCurrentFrame():newChild{title="whatever",args={"ryn>rad-o:1", "aeg:2", "3", "ryn:4", "ryn>rad-o_rad:5", ">rad-pro:6", "hrd-pro:7", "ryn-o:8", "lfv-pro:9", "lfv-o:10", "lfv:11", "rad-pro:12", "kil<lost:oh no>:", "bal<see>:13"}})
mw.logObject(p.langtree({"rad", "rad-o", "ryn", "hrd-pro", "lfv"}))
]]--
]]--

Latest revision as of 20:50, 13 July 2024

Underlies {{descendant}} and {{descendant tree}}.


local export = {}
local getArgs = require("Module:Arguments").getArgs
local m_links = require("Module:links")
local m_languages = require("Module:languages")
local m_inline = require("Module:inline")
local m_table = require("Module:table")
local see_further_text = "see there for further descendants"

function export.descendant(frame)
	local args = getArgs(frame)
	local out = ""
	local lang = args[1]
	if mw.ustring.find(lang, "[%>%_]") then
		lang = mw.ustring.match(lang, "[%>%_]([^%>%_]+)$")
	end
	if mw.ustring.find(lang, "[0-9]+$") then
		lang = mw.ustring.gsub(lang, "[0-9]+$", "")
	end
	lang = m_languages.get_by_code(lang)
	local borrowed = args["borrowed"] or args["bor"] or args["b"]
	local see_desc = args["see"]
	local noname = args["noname"]
	local lostcap = args["lost"] or "lost"
	
	if borrowed then out = "<span class=\"desc-arrow\" title=\"borrowed\">→</span>" .. out end
	if not noname then out = out .. lang.name .. ":&nbsp;" end
	
	if args[2] then
		local terms = {}
		local i = 2
		while args[i] do
			local term, term_args = m_inline.parse(args[i])
			table.insert(terms, m_links.full_link({
					language = lang,
					term = term,
					alt = term_args.alt,
					anchor = term_args.anchor or term_args.a,
					gloss = term_args.t,
					pos = term_args.pos,
					nolink = term_args.nolink,
					hypo = term_args.hypo or term_args.hypothetical,
					nobold = true,
				})
			)
			i = i + 1	
		end
		out = out .. table.concat(terms, ", ")
		if see_desc then out = out .. " (''" .. see_further_text .. "'')" end
	else
		out = out .. "— (''" .. lostcap .. "'')"
	end
	
	return out
end

function export.langtree(list)

	-- build in-set ancestry
	local ancestors = {}
	for _, lang in ipairs(list) do
		
		local function mismatch_check(base, test) -- function for processing doublets
			for _, test_index in ipairs(list) do
				if mw.ustring.find(test_index, "[0-9]+$") and mw.ustring.gsub(test_index, "[0-9]+$", "") == test and not m_table.contains(list, base .. mw.ustring.match(test_index, "[0-9]+$")) then
					mw.addWarning("Multiple possible ancestors found for [" .. lang .. "] and mismatch of corresponding descendants detected ([" .. test_index .. "] but no [" .. base .. mw.ustring.match(test_index, "[0-9]+$") .. "]); please double-check numbering and identical code calls, as these may not have been properly triaged.")
				end
			end
		end
		
		-- process ancestors
		ancestors[lang] = {}
		
		if mw.ustring.find(lang, "[%>%_]") then
		
			-- process ad hoc ancestors
			local ancestor_path = ""
			for piece, delim in mw.ustring.gmatch(lang, "([^%>%_]+)([%>%_])") do
				ancestor_path = ancestor_path .. piece
				if mw.ustring.match(lang, "^%>") then
					ancestor_path = ">" .. ancestor_path	
				end
				table.insert(ancestors[lang], ancestor_path)
				ancestor_path = ancestor_path .. delim
			end
		
		else -- if ad hoc ancestors unspecified, i.e. the code does not contain the path symbols > or _
			
			if mw.ustring.find(lang, "[0-9]+$") then
				local lang_temp, index = mw.ustring.match(lang, "^([^0-9]+)([0-9]+)$")
				for _, test in ipairs(list) do
					if mw.ustring.find(test, "[^0-9]" .. index .. "$") then -- matches existing historical doublets with identical, e.g. rad-o2, if rad2 is present
						local test_temp = mw.ustring.gsub(test, index .. "$", "") -- get base
						if m_languages.get_by_code(lang_temp).lineage[test_temp] then
							table.insert(ancestors[lang], test)
						end
					elseif m_languages.get_by_code(lang_temp).lineage[test] and not m_table.contains(list, test .. index) then -- if test is still an ancestor to current language, e.g. rad-o, if rad2 is present, and checks if rad-o2 is not present
						mismatch_check(lang_temp, test)
						table.insert(ancestors[lang], test)
					end
				end
			else
				for _, test in ipairs(list) do
					if m_languages.get_by_code(lang).lineage[test] then
						mismatch_check(lang, test)
						table.insert(ancestors[lang], test)
					end
				end
			end
		end
	end
	
	local structure = {}
	for lang, _ in pairs(ancestors) do
		if #ancestors[lang] == 0 then
			structure[lang] = {}
			ancestors[lang] = nil
		end
	end
	
	local swept = {}
	local function search_and_sort(superordinate)
		for mother, it in pairs(superordinate) do
			for lang, lang_ancestors in pairs(ancestors) do
				for i, lang_ancestor in ipairs(ancestors[lang]) do
					if swept[lang_ancestor] then table.remove(ancestors[lang], i) end
				end
			end
			for lang, lang_ancestors in pairs(ancestors) do
				if #lang_ancestors == 1 and lang_ancestors[1] == mother then
					superordinate[mother][lang] = {}
					swept[mother] = true
					ancestors[lang] = nil
				end
			end
			search_and_sort(it)
		end
	end
	
	search_and_sort(structure)
	return structure

end

function export.desctree(frame)
	local args = getArgs(frame)
	local out = ""
	local current_lang, code
	local terms = {}
	local langs = {}
	local indenter = args["indent"] or args["in"] or ""
	
	for i, term in ipairs(args) do --ryn:term1|term2|term3|rad:term1|term2 ...
		local lang_change = mw.ustring.match(term, "^([^%:%<]+)%:") or mw.ustring.match(term, "^(.+%>)%:")
		if i == 1 and lang_change == nil then error("No language code in the first parameter") end
		if lang_change then
			term = mw.ustring.sub(term, mw.ustring.len(lang_change) + 2)
			
			local auto_ancestor, auto_bor = mw.ustring.sub(lang_change, 1, 1) == "_", mw.ustring.sub(lang_change, 1, 2) == ">>"
			if auto_ancestor or auto_bor then
				if code == nil then error("No preceding language code specified") end
				if auto_bor then lang_change = mw.ustring.sub(lang_change, 2) end
				lang_change = code .. lang_change
			end
			
			local lang_args = {}
			code, lang_args = m_inline.parse(lang_change)
			-- if new language, build table
			if terms[code] then
				local base_code, old_i = mw.ustring.match(code, "^(.*[^0-9])([0-9]*)$")
				local new_i = 2
				while terms[base_code .. new_i] do
					new_i = new_i + 1
				end
				code = base_code .. new_i
				mw.addWarning("Duplicate code detected: converted [" .. base_code .. (old_i or "") .. "] to [" .. code .. "]")
			end
			terms[code] = {}
			table.insert(langs, code)
			
			current_lang = code
			
			local delim
			lang_args["from"], delim, current_lang = mw.ustring.match(current_lang, "^(.*)([%>%_])([^%>%_%:0-9]+)[0-9]*$")
			if delim and delim == ">" then lang_args["bor"] = true end
			-- process arguments
			for lang_arg, lang_val in pairs(lang_args) do
		    	terms[code][lang_arg] = lang_val
			end
		end
		table.insert(terms[code], term)
	end
	
	local items = {}
	
	for lang, lang_args in pairs(terms) do
		table.insert(lang_args, 1, lang)
		items[lang] = export.descendant(lang_args)
	end
	
	-- BEGIN TRIAGE --

	local structure = export.langtree(langs)

	local outs = {}
	
	local it = 0
	function process_structure(tab)
		it = it + 1
		
		local sorter = {}
		local sorter_proto = {}
		local sorter_bor = {}
		local sorter_bor_proto = {}
		for key, _ in pairs(tab) do
			local is_proto = mw.ustring.find(key, "%-pro[0-9]*$")
			local is_borrowed = mw.ustring.find(key, "%>[^%>%_%:]+$")
			if is_proto then
				if is_borrowed then table.insert(sorter_bor_proto, key) else table.insert(sorter_proto, key) end
			else
				if is_borrowed then table.insert(sorter_bor, key) else table.insert(sorter, key) end
			end
		end
		table.sort(sorter)
		table.sort(sorter_proto)
		table.sort(sorter_bor)
		table.sort(sorter_bor_proto)
		for _, key in ipairs(sorter_bor) do table.insert(sorter, key) end
		for _, key in ipairs(sorter_proto) do table.insert(sorter, key) end
		for _, key in ipairs(sorter_bor_proto) do table.insert(sorter, key) end
		
		for _, key in ipairs(sorter) do
			local out = " " .. items[key]
			for i = 1, it do out = "*" .. out end -- this asterisk is for list formatting
			out = indenter .. out
			table.insert(outs, out)
			process_structure(tab[key])
		end
		it = it - 1
	end
	
	process_structure(structure)
	
	local render = table.concat(outs, "\n")
	
	render = mw.ustring.gsub(render, "^%s", "")
	
	return render
	
end

return export

--[[
Debug console test string:
=p.descendant(mw.getCurrentFrame():newChild{title="whatever",args={"rad"}})
=p.desctree(mw.getCurrentFrame():newChild{title="whatever",args={"ryn>rad-o:1", "aeg:2", "3", "ryn:4", "ryn>rad-o_rad:5", ">rad-pro:6", "hrd-pro:7", "ryn-o:8", "lfv-pro:9", "lfv-o:10", "lfv:11", "rad-pro:12", "kil<lost:oh no>:", "bal<see>:13"}})
mw.logObject(p.langtree({"rad", "rad-o", "ryn", "hrd-pro", "lfv"}))
]]--