Module:etymology

Revision as of 10:25, 20 February 2024 by Maria (talk | contribs)

The etymology module provides functionality for various etymology templates:


local export = {}

local m_languages = require("Module:languages")
local m_links = require("Module:links")
local m_parameters = require("Module:parameters")

local no_term_params = {
    [1] = {required = true},
    ["nocap"] = {type = "boolean"},
    ["notext"] = {type = "boolean"},
    ["nocat"] = {type = "boolean"},
}
local no_term_data = {
    ["onomatopoeic"] = {
        categories = {"onomatopoeias"},
        glossary = true,
    },
    ["unknown"] = {
        categories = {"terms with unknown etymologies"},
    },
}

local single_term_params = {
	[1] = {required = true},
	[2] = {required = true},
	[3] = {alias_of = "alt"},
	[4] = {alias_of = "t"},
	["alt"] = {},
	["t"] = {},
	["pos"] = {},
	["nocap"] = {type = "boolean"},
	["notext"] = {type = "boolean"},
	["nocat"] = {type = "boolean"},
}
local single_term_data = {
    ["abbreviation"] = {
        glossary = true,
        categories = {"abbreviations"}
    },
    ["back-formation"] = {
        glossary = true,
        categories = {"back-formations"}
    },
    ["clipping"] = {
        glossary = true,
        categories = {"clippings"},
    },
    ["doublet"] = {
    	glossary = true,
    	categories = {"doublets"},
    },
    ["ellipsis"] = {
        glossary = true,
        categories = {"ellipses"},
    },
    ["initialism"] = {
        glossary = true,
        categories = {"initialisms"},
    },
    ["rebracketing"] = {
        glossary = true,
        categories = {"rebracketings"},
    },
    ["reduplication"] = {
        glossary = true,
        categories = {"reduplications"},
    },
}

local derived_term_params = {
    [1] = {required = true},
    [2] = {required = true},
    [3] = {required = true},
    [4] = {alias_of = "alt"},
    [5] = {alias_of = "t"},
    ["alt"] = {},
    ["t"] = {},
    ["pos"] = {},
    ["nocap"] = {type = "boolean"},
    ["notext"] = {type = "boolean"},
    ["nocat"] = {type = "boolean"},
}
local derived_term_data = {
    ["borrowed"] = {
        silent = true,
        categories = {"terms borrowed from"},
    },
    ["calque"] = {
        glossary = "calque",
        categories = {"terms calqued from"},
    },
    ["derived"] = {
        silent = true,
        categories = {"terms derived from"},
    },
    ["inherited"] = {
    	silent = true,
    	categories = {"terms inherited from"},
    },
    ["learned borrowing"] = {
        glossary = true,
        categories = {"terms borrowed from", "learned borrowings from"},
    },
    ["orthographic borrowing"] = {
        glossary = true,
        categories = {"terms borrowed from", "orthographic borrowings from"},
    },
    ["semantic loan"] = {
        glossary = true,
        categories = {"terms derived from", "semantic loans from"},
    },
    ["phono-semantic matching"] = {
        glossary = true,
        categories = {"terms derived from", "phono-semantic matchings from"},
    },
    ["transliteration"] = {
        glossary = true,
        categories = {"terms derived from", "transliterations of"},
    },
}

local affix_params = {
    [1] = {required = true},
    [2] = {list = true},
    ["t"] = {list = true, allow_holes = true},
    ["l"] = {list = true, allow_holes = true},
    ["alt"] = {list = true, allow_holes = true},
    ["pos"] = {list = true, allow_holes = true},
    ["noaff"] = {list = true, allow_holes = true, type = "boolean"},
    ["nocat"] = {type = "boolean"},
}
local affix_delimiter = {
    ["-"] = true,
    ["·"] = true,
}

local function is_infix(word)
    return affix_delimiter[mw.ustring.sub(word, 1, 1)] and affix_delimiter[mw.ustring.sub(word, -1)]
end

local function is_prefix(word)
    return affix_delimiter[mw.ustring.sub(word, -1)]
end

local function is_suffix(word)
    return affix_delimiter[mw.ustring.sub(word, 1, 1)]
end

function export.format_etymology(out, categories)
    for _, category in ipairs(categories) do
        out = out .. "[[Category:" .. category .. "]]"
    end
    return out
end

function export.format_glossary(text, glossary_arg, nocap_arg)
    if not glossary_arg then return nocap_arg and text or mw.ustring.gsub(text, "^%l", string.upper) end
    local glossary_link = type(glossary_arg) == "string" and glossary_arg or text
    local glossary_display = nocap_arg and text or mw.ustring.gsub(text, "^%l", string.upper)
    return "[[Appendix:Glossary#" .. glossary_link .. "|" .. glossary_display .. "]]"
end

function export.hydrate_category(category, language_to, language_from)
    local new_category = language_to.name .. " " .. category
    if language_from then new_category = new_category .. " " .. language_from.name end
    return new_category
end

function export.hydrate_categories(categories, language_to, language_from)
    local new_categories = {}
    for i, category in ipairs(categories) do
        new_categories[i] = export.hydrate_category(category, language_to, language_from)
    end
    return new_categories
end

function export.no_term_etymology(template, frame)
    local data, args = no_term_data[template], m_parameters.process(frame:getParent().args, no_term_params)
    local out, categories = "", {}
    local language = m_languages.get_by_code(args[1])
    if not args["nocat"] then categories = export.hydrate_categories(data["categories"], language) end
    if not args["notext"] then out = export.format_glossary(template, data["glossary"], args["nocap"]) end
    return export.format_etymology(out, categories)
end

function export.single_term_etymology(template, frame)
    local data, args = single_term_data[template], m_parameters.process(frame:getParent().args, single_term_params)
    local out, categories = "", {}
    local language = m_languages.get_by_code(args[1])
    if not args["notext"] then out = out .. export.format_glossary(template, data["glossary"], args["nocap"]) .. " of " end
    out = out .. m_links.full_link({
        term = args[2],
        language = language,
        alt = args["alt"],
        gloss = args["t"],
        pos = args["pos"],
        nobold = true,
    },"term")
    if not args["nocat"] then categories = export.hydrate_categories(data["categories"], language) end
    return export.format_etymology(out, categories)
end

function export.derived_term_etymology(template, frame)
    local data, args = derived_term_data[template], m_parameters.process(frame:getParent().args, derived_term_params)
    local out, categories = "", {}
    local language_to = m_languages.get_by_code(args[1])
    local language_from = m_languages.get_by_code(args[2])
    if (not args["notext"]) and (not data["silent"]) then out = out .. export.format_glossary(template, data["glossary"], args["nocap"]) .. " of " end
    out = out .. m_links.full_link({
        term = args[3],
        language = language_from,
        alt = args["alt"],
        gloss = args["t"],
        pos = args["pos"],
        showlanguage = true,
        nobold = true,
    },"term")
    if not args["nocat"] then categories = export.hydrate_categories(data["categories"], language_to, language_from) end
    return export.format_etymology(out, categories)
end

function export.affix_etymology(frame)
    local args = m_parameters.process(frame:getParent().args, affix_params)
    local pre_out, categories = {}, {}
    local language_to = m_languages.get_by_code(args[1])
    local n_parts, n_affixes = 0, 0
    for i, term in ipairs(args[2]) do
        n_parts = n_parts + 1
        local language_from = nil
        if args["l"][i] then language_from = m_languages.get_by_code(args["l"][i]) end
        local cite_term = term
        if (language_from and language_from.proto) or ((not language_from) and language_to.proto) then cite_term = "*" .. cite_term end
        if not args["nocat"] then
            if args["noaff"][i] then
                --cool, this is a marked non-affix, don't let it be classified as one!
            elseif language_from then
            	table.insert(categories, language_to.name .. " terms derived from " .. language_from.name)
            	if is_infix(term) or is_prefix(term) or is_suffix(term) then n_affixes = n_affixes + 1 end
            elseif is_infix(term) then
                table.insert(categories, language_to.name .. " terms infixed with " .. cite_term)
                n_affixes = n_affixes + 1
            elseif is_prefix(term) then
                table.insert(categories, language_to.name .. " terms prefixed with " .. cite_term)
                n_affixes = n_affixes + 1
            elseif is_suffix(term) then
                table.insert(categories, language_to.name .. " terms suffixed with " .. cite_term)
                n_affixes = n_affixes + 1
            end
        end
        table.insert(pre_out, m_links.full_link({
            term = term,
            language = language_from or language_to,
            alt = args["alt"][i],
            gloss = args["t"][i],
            pos = args["pos"][i],
            showlanguage = (language_from and true),
            nobold = true,
        },"term"))
    end
    if (not args["nocat"]) and (n_parts > 1 and n_affixes == 0) then
        table.insert(categories, language_to.name .. " compound terms")
    end
    return export.format_etymology(table.concat(pre_out, " + "), categories)
end

function export.show(frame)
    local template = frame.args[1]
    if no_term_data[template] then return export.no_term_etymology(template, frame) end
    if single_term_data[template] then return export.single_term_etymology(template, frame) end
    if derived_term_data[template] then return export.derived_term_etymology(template, frame) end
    if template == "affix" then return export.affix_etymology(frame) end
    error("No such sub-template type is defined!")
end

return export