Module:etymology: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
TheNightAvl (talk | contribs) No edit summary |
||
(45 intermediate revisions by 3 users not shown) | |||
Line 1: | Line 1: | ||
local export = {} | local export = {} | ||
local m_inline = require("Module:inline") | |||
local m_languages = require("Module:languages") | local m_languages = require("Module:languages") | ||
local m_links = require("Module:links") | local m_links = require("Module:links") | ||
local m_parameters = require("Module:parameters") | local m_parameters = require("Module:parameters") | ||
local plaenk_alt = mw.loadData("Module:lnk-pro-morph/data").alt | |||
local function root_reminder(code) return "Have you forgotten to use <code><nowiki>{{root|" .. code .. "|lnk-pro}}</nowiki></code>? Remember to use it even in derived terms and compounds." end | |||
local new_pos_data = { | |||
["adjective"] = { | |||
categories = {"adjectivisations"}, | |||
}, | |||
["noun"] = { | |||
categories = {"nominalisations"}, | |||
}, | |||
["verb"] = { | |||
categories = {"verbalisations"}, | |||
}, | |||
} | |||
local new_pos_aliases = { | |||
["adj"] = "adjective", | |||
["n"] = "noun", | |||
["v"] = "verb", | |||
} | |||
local no_term_params = { | local no_term_params = { | ||
[1] = {required = true}, | [1] = {required = true}, | ||
["nocap"] = {type = "boolean"}, | ["nocap"] = {type = "boolean"}, | ||
["nc"] = {alias_of = "nocap"}, | |||
["notext"] = {type = "boolean"}, | ["notext"] = {type = "boolean"}, | ||
["nocat"] = {type = "boolean"}, | ["nocat"] = {type = "boolean"}, | ||
Line 14: | Line 36: | ||
["onomatopoeic"] = { | ["onomatopoeic"] = { | ||
categories = {"onomatopoeias"}, | categories = {"onomatopoeias"}, | ||
text = true, | |||
}, | }, | ||
["unknown"] = { | ["unknown"] = { | ||
categories = {"terms with unknown etymologies"}, | categories = {"terms with unknown etymologies"}, | ||
text = function(nocap) return (nocap and "u" or "U") .. "nknown" end, | |||
}, | |||
["uncertain"] = { | |||
categories = {"terms with uncertain etymologies"}, | |||
text = function(nocap) return (nocap and "u" or "U") .. "ncertain" end, | |||
}, | }, | ||
} | } | ||
local single_term_params = { | local single_term_params = { | ||
[1] = {required = true}, | |||
[2] = {required = true}, | |||
[3] = {alias_of = "alt"}, | |||
[4] = {alias_of = "t"}, | |||
["alt"] = {}, | |||
["t"] = {}, | |||
["anchor"] = {}, | |||
["a"] = {alias_of = "anchor"}, | |||
["pos"] = {}, | |||
["newpos"] = {}, | |||
["nocap"] = {type = "boolean"}, | |||
["nc"] = {alias_of = "nocap"}, | |||
["notext"] = {type = "boolean"}, | |||
["nocat"] = {type = "boolean"}, | |||
} | } | ||
local single_term_data = { | local single_term_data = { | ||
["abbreviation"] = { | ["abbreviation"] = { | ||
text = true, | |||
categories = {"abbreviations"} | categories = {"abbreviations"} | ||
}, | }, | ||
["back-formation"] = { | ["back-formation"] = { | ||
text = true, | |||
preposition = "from", | |||
categories = {"back-formations"} | categories = {"back-formations"} | ||
}, | }, | ||
["clipping"] = { | ["clipping"] = { | ||
text = true, | |||
categories = {"clippings"}, | categories = {"clippings"}, | ||
}, | |||
["contraction"] = { | |||
text = true, | |||
categories = {"contractions"}, | |||
}, | |||
["deadjectival"] = { | |||
text = true, | |||
preposition = "from", | |||
categories = {"deadjectivals"}, | |||
}, | |||
["denominal"] = { | |||
text = true, | |||
preposition = "from", | |||
categories = {"denominals"}, | |||
}, | |||
["deverbal"] = { | |||
text = true, | |||
preposition = "from", | |||
categories = {"deverbals"}, | |||
}, | }, | ||
["doublet"] = { | ["doublet"] = { | ||
text = true, | |||
categories = {"doublets"}, | |||
}, | }, | ||
["ellipsis"] = { | ["ellipsis"] = { | ||
text = true, | |||
categories = {"ellipses"}, | categories = {"ellipses"}, | ||
}, | |||
["grammaticalisation"] = { | |||
text = true, | |||
categories = {"grammaticalisations"}, | |||
}, | }, | ||
["initialism"] = { | ["initialism"] = { | ||
text = true, | |||
categories = {"initialisms"}, | categories = {"initialisms"}, | ||
}, | }, | ||
["rebracketing"] = { | ["rebracketing"] = { | ||
text = true, | |||
categories = {"rebracketings"}, | categories = {"rebracketings"}, | ||
}, | }, | ||
["reduplication"] = { | ["reduplication"] = { | ||
text = true, | |||
categories = {"reduplications"}, | categories = {"reduplications"}, | ||
}, | }, | ||
["univerbation"] = { | |||
text = true, | |||
categories = {"univerbations"}, | |||
}, | |||
} | |||
local multi_term_params = { | |||
[1] = {required = true}, | |||
[2] = {list = true}, | |||
["alt"] = {list = true, allow_holes = true}, | |||
["anchor"] = {list = true, allow_holes = true}, | |||
["a"] = {alias_of = "anchor"}, | |||
["t"] = {list = true, allow_holes = true}, | |||
["pos"] = {list = true, allow_holes = true}, | |||
["nocap"] = {type = "boolean"}, | |||
["nc"] = {alias_of = "nocap"}, | |||
["notext"] = {type = "boolean"}, | |||
["nocat"] = {type = "boolean"}, | |||
} | |||
local multi_term_data = { | |||
["blend"] = { | |||
text = true, | |||
categories = {"blends"}, | |||
}, | |||
} | } | ||
Line 75: | Line 154: | ||
[5] = {alias_of = "t"}, | [5] = {alias_of = "t"}, | ||
["alt"] = {}, | ["alt"] = {}, | ||
["anchor"] = {}, | |||
["a"] = {alias_of = "anchor"}, | |||
["t"] = {}, | ["t"] = {}, | ||
["pos"] = {}, | ["pos"] = {}, | ||
["nocap"] = {type = "boolean"}, | ["nocap"] = {type = "boolean"}, | ||
["nc"] = {alias_of = "nocap"}, | |||
["notext"] = {type = "boolean"}, | ["notext"] = {type = "boolean"}, | ||
["nocat"] = {type = "boolean"}, | ["nocat"] = {type = "boolean"}, | ||
Line 83: | Line 165: | ||
local derived_term_data = { | local derived_term_data = { | ||
["borrowed"] = { | ["borrowed"] = { | ||
categories = {"terms borrowed from"}, | categories = {"terms borrowed from"}, | ||
}, | }, | ||
["calque"] = { | ["calque"] = { | ||
text = true, | |||
categories = {"terms calqued from"}, | categories = {"terms calqued from"}, | ||
}, | }, | ||
["derived"] = { | ["derived"] = { | ||
categories = {"terms derived from"}, | categories = {"terms derived from"}, | ||
}, | }, | ||
["inherited"] = { | ["inherited"] = { | ||
categories = {"terms inherited from"}, | |||
}, | }, | ||
["learned borrowing"] = { | ["learned borrowing"] = { | ||
text = true, | |||
categories = {"terms borrowed from", "learned borrowings from"}, | categories = {"terms borrowed from", "learned borrowings from"}, | ||
}, | }, | ||
["orthographic borrowing"] = { | ["orthographic borrowing"] = { | ||
text = true, | |||
categories = {"terms borrowed from", "orthographic borrowings from"}, | categories = {"terms borrowed from", "orthographic borrowings from"}, | ||
}, | }, | ||
["semantic loan"] = { | ["semantic loan"] = { | ||
text = true, | |||
categories = {"terms derived from", "semantic loans from"}, | categories = {"terms derived from", "semantic loans from"}, | ||
}, | }, | ||
["phono-semantic matching"] = { | ["phono-semantic matching"] = { | ||
text = true, | |||
categories = {"terms derived from", "phono-semantic matchings from"}, | categories = {"terms derived from", "phono-semantic matchings from"}, | ||
}, | }, | ||
["transliteration"] = { | ["transliteration"] = { | ||
text = true, | |||
categories = {"terms derived from", "transliterations of"}, | categories = {"terms derived from", "transliterations of"}, | ||
}, | }, | ||
Line 123: | Line 202: | ||
[1] = {required = true}, | [1] = {required = true}, | ||
[2] = {list = true}, | [2] = {list = true}, | ||
["type"] = {}, | |||
["t"] = {list = true, allow_holes = true}, | ["t"] = {list = true, allow_holes = true}, | ||
["l"] = {list = true, allow_holes = true}, | ["l"] = {list = true, allow_holes = true}, | ||
["alt"] = {list = true, allow_holes = true}, | ["alt"] = {list = true, allow_holes = true}, | ||
["anchor"] = {list = true, allow_holes = true}, | |||
["a"] = {alias_of = "anchor"}, | |||
["pos"] = {list = true, allow_holes = true}, | ["pos"] = {list = true, allow_holes = true}, | ||
["noaff"] = {list = true, allow_holes = true, type = "boolean"}, | ["noaff"] = {list = true, allow_holes = true, type = "boolean"}, | ||
["root"] = {alias_of = "noaff"}, | |||
["nolink"] = {list = true, allow_holes = true, type = "boolean"}, | |||
["hypo"] = {list = true, allow_holes = true, type = "boolean"}, | |||
["hypothetical"] = {alias_of = "hypo"}, | |||
["notext"] = {type = "boolean"}, | |||
["nocap"] = {type = "boolean"}, | |||
["nc"] = {alias_of = "nocap"}, | |||
["nocat"] = {type = "boolean"}, | ["nocat"] = {type = "boolean"}, | ||
} | |||
local affix_data = { | |||
["affix"] = {}, | |||
["surface analysis"] = { | |||
text = function(nocap) return (nocap and "b" or "B") .. "y [[Appendix:Glossary#surface analysis|surface analysis]], " end | |||
} | |||
} | } | ||
local affix_delimiter = { | local affix_delimiter = { | ||
["-"] = true, | ["-"] = true, | ||
["·"] = true, | ["·"] = true, | ||
} | |||
local affix_delimiter_pattern = "[%-·]" | |||
local affix_compound_data = { | |||
["alliterative"] = { | |||
categories = {"alliterative compounds"}, | |||
}, | |||
["antonymous"] = { | |||
categories = {"antonymous compounds"}, | |||
}, | |||
["bahuvrihi"] = { | |||
categories = {"bahuvrihi compounds"}, | |||
}, | |||
["coordinative"] = { | |||
categories = {"coordinative compounds"}, | |||
}, | |||
["descriptive"] = { | |||
categories = {"descriptive compounds"}, | |||
}, | |||
["determinative"] = { | |||
categories = {"determinative compounds"}, | |||
}, | |||
["dvandva"] = { | |||
categories = {"dvandva compounds"}, | |||
}, | |||
["endocentric"] = { | |||
categories = {"endocentric compounds"}, | |||
}, | |||
["exocentric"] = { | |||
categories = {"exocentric compounds"}, | |||
}, | |||
["karmadharaya"] = { | |||
categories = {"karmadharaya compounds"}, | |||
}, | |||
["rhyming"] = { | |||
categories = {"rhyming compounds"}, | |||
}, | |||
["synonymous"] = { | |||
categories = {"synonymous compounds"}, | |||
}, | |||
["tatpurusa"] = { | |||
categories = {"tatpurusa compounds"}, | |||
}, | |||
} | |||
local affix_compound_aliases = { | |||
allit = "alliterative", | |||
ant = "antonymous", | |||
bahu = "bahuvrihi", | |||
bv = "bahuvrihi", | |||
coord = "coordinative", | |||
desc = "descriptive", | |||
det = "determinative", | |||
dva = "dvandva", | |||
endo = "endocentric", | |||
exo = "exocentric", | |||
karma = "karmadharaya", | |||
kd = "karmadharaya", | |||
rhy = "rhyming", | |||
syn = "synonymous", | |||
tat = "tatpurusa", | |||
tp = "tatpurusa", | |||
} | } | ||
local function is_circumfix(word) | |||
return mw.ustring.find(word, affix_delimiter_pattern .. " " .. affix_delimiter_pattern) ~= nil | |||
end | |||
local function is_infix(word) | local function is_infix(word) | ||
Line 147: | Line 306: | ||
end | end | ||
function | local function format_etymology(out, categories) | ||
for _, category in ipairs(categories) do | for _, category in ipairs(categories) do | ||
out = out .. "[[Category:" .. category .. "]]" | out = out .. "[[Category:" .. category .. "]]" | ||
Line 154: | Line 313: | ||
end | end | ||
function | local function format_solo_text(label, text_data, nocap_arg) | ||
if not | if not text_data then return "" end | ||
if type(text_data) == "function" then return text_data(nocap_arg) end | |||
return "[[Appendix:Glossary#" .. label .. "|" .. (nocap_arg and label or mw.ustring.gsub(label, "^%l", string.upper)) .. "]]" | |||
end | |||
local function format_prefixed_text(label, text_data, nocap_arg, preposition) | |||
if not text_data then return "" end | |||
if type(text_data) == "function" then return text_data(nocap_arg) end | |||
return "[[Appendix:Glossary#" .. label .. "|" .. (nocap_arg and label or mw.ustring.gsub(label, "^%l", string.upper)) .. "]] " .. (preposition or "of") .. " " | |||
end | end | ||
function | local function hydrate_category(category, language_to, language_from, newpos) | ||
local new_category = language_to.name .. " " .. category | local new_category = language_to.name .. " " .. category | ||
if language_from then new_category = new_category .. " " .. language_from.name end | if language_from then new_category = new_category .. " " .. language_from.name end | ||
Line 167: | Line 331: | ||
end | end | ||
function | local function hydrate_categories(categories, language_to, language_from, newpos) | ||
local new_categories = {} | local new_categories = {} | ||
for i, category in ipairs(categories) do | for i, category in ipairs(categories) do | ||
new_categories[i] = | new_categories[i] = language_to.name .. " " .. category .. (language_from and (" " .. language_from.name) or "") | ||
end | |||
if newpos then | |||
newpos = new_pos_aliases[newpos] or newpos | |||
local newpos_data = new_pos_data[newpos] | |||
for _, category in ipairs(newpos_data["categories"]) do table.insert(new_categories, language_to.name .. " " .. category) end | |||
end | end | ||
return new_categories | return new_categories | ||
end | end | ||
function | local function no_term_etymology(template, frame) | ||
local data, args = no_term_data[template], m_parameters.process(frame:getParent().args, no_term_params) | local data, args = no_term_data[template], m_parameters.process(frame:getParent().args, no_term_params) | ||
local out, categories = "", {} | local out, categories = "", {} | ||
local language = m_languages.get_by_code(args[1]) | local language = m_languages.get_by_code(args[1]) | ||
if not args["nocat"] then categories = | if not args["nocat"] then categories = hydrate_categories(data["categories"], language) end | ||
if not args["notext"] then out = | if not args["notext"] then out = format_solo_text(template, data["text"], args["nocap"]) end | ||
return | return format_etymology(out, categories) | ||
end | end | ||
function | local function single_term_etymology(template, frame) | ||
local data, args = single_term_data[template], m_parameters.process(frame:getParent().args, single_term_params) | local data, args = single_term_data[template], m_parameters.process(frame:getParent().args, single_term_params) | ||
local out, categories = "", {} | local out, categories = "", {} | ||
local language = m_languages.get_by_code(args[1]) | local language = m_languages.get_by_code(args[1]) | ||
if not args["notext"] then out = out .. | if not args["notext"] then out = out .. format_prefixed_text(template, data["text"], args["nocap"], data["preposition"]) end | ||
out = out .. m_links.full_link({ | out = out .. m_links.full_link({ | ||
term = args[2], | term = args[2], | ||
Line 195: | Line 364: | ||
gloss = args["t"], | gloss = args["t"], | ||
pos = args["pos"], | pos = args["pos"], | ||
anchor = args["anchor"], | |||
nobold = true, | nobold = true, | ||
},"term") | }, "term") | ||
if not args["nocat"] then categories = | if not args["nocat"] then | ||
return | categories = hydrate_categories(data["categories"], language, nil, args["newpos"]) | ||
mw.addWarning(root_reminder(args[1])) | |||
end | |||
return format_etymology(out, categories) | |||
end | end | ||
function | local function multi_term_etymology(template, frame) | ||
local data, args = multi_term_data[template], m_parameters.process(frame:getParent().args, multi_term_params) | |||
local out, categories = "", {} | |||
local language = m_languages.get_by_code(args[1]) | |||
if not args["notext"] then out = out .. format_prefixed_text(template, data["text"], args["nocap"]) end | |||
for i, word in ipairs(args[2]) do | |||
local i_term, i_args = m_inline.parse(word) | |||
if i > 1 then out = out .. " + " end | |||
out = out .. m_links.full_link({ | |||
term = i_term, | |||
language = language, | |||
alt = args["alt"][i] or i_args["alt"], | |||
gloss = args["t"][i] or i_args["t"], | |||
pos = args["pos"][i] or i_args["pos"], | |||
anchor = args["anchor"][i] or i_args["anchor"] or i_args["a"], | |||
}, "term") | |||
end | |||
if not args["nocat"] then | |||
categories = hydrate_categories(data["categories"], language, nil, args["preposition"]) | |||
mw.addWarning(root_reminder(args[1])) | |||
end | |||
return format_etymology(out, categories) | |||
end | |||
local function derived_term_etymology(template, frame) | |||
local data, args = derived_term_data[template], m_parameters.process(frame:getParent().args, derived_term_params) | local data, args = derived_term_data[template], m_parameters.process(frame:getParent().args, derived_term_params) | ||
local out, categories = "", {} | local out, categories = "", {} | ||
local language_to = m_languages.get_by_code(args[1]) | local language_to = m_languages.get_by_code(args[1]) | ||
local language_from = m_languages.get_by_code(args[2]) | local language_from = m_languages.get_by_code(args[2]) | ||
if (not args["notext"]) and (not data["silent"]) then out = out .. | local term = args[3] | ||
local alt | |||
if args[2] == "lnk-pro" and plaenk_alt[term] then | |||
alt = term | |||
term = plaenk_alt[term] | |||
end | |||
if (not args["notext"]) and (not data["silent"]) then out = out .. format_prefixed_text(template, data["text"], args["nocap"], data["preposition"]) end | |||
out = out .. m_links.full_link({ | out = out .. m_links.full_link({ | ||
term = | term = term, | ||
language = language_from, | language = language_from, | ||
alt = args["alt"], | alt = args["alt"] or alt, | ||
gloss = args["t"], | gloss = args["t"], | ||
pos = args["pos"], | pos = args["pos"], | ||
anchor = args["anchor"], | |||
showlanguage = true, | showlanguage = true, | ||
nobold = true, | nobold = true, | ||
},"term") | }, "term") | ||
if not args["nocat"] then categories = | if not args["nocat"] then | ||
return | categories = hydrate_categories(data["categories"], language_to, language_from) | ||
mw.addWarning(root_reminder(args[1])) | |||
end | |||
return format_etymology(out, categories) | |||
end | end | ||
function | local function affix_etymology(template, frame) | ||
local args = m_parameters.process(frame:getParent().args, affix_params) | local data, args = affix_data[template], m_parameters.process(frame:getParent().args, affix_params) | ||
local pre_out, categories = {}, {} | local pre_out, categories = {}, {} | ||
local language_to = m_languages.get_by_code(args[1]) | local language_to = m_languages.get_by_code(args[1]) | ||
local n_parts, n_affixes = 0, 0 | local n_parts, n_affixes = 0, 0 | ||
for i, term in ipairs(args[2]) do | for i, term in ipairs(args[2]) do | ||
local i_term, i_args = m_inline.parse(term) | |||
local i_alt | |||
if args[1] == "lnk-pro" and plaenk_alt[i_term] then | |||
i_alt = i_term | |||
i_term = plaenk_alt[i_term] | |||
end | |||
n_parts = n_parts + 1 | n_parts = n_parts + 1 | ||
local language_from = nil | local language_from = nil | ||
if args["l"][i] then language_from = m_languages.get_by_code(args["l"][i]) end | if args["l"][i] or i_args["l"] then language_from = m_languages.get_by_code(args["l"][i] or i_args["l"]) end | ||
local cite_term = | local cite_term = i_term | ||
if (language_from and language_from.proto) or ((not language_from) and language_to.proto) then cite_term = " | if (language_from and language_from.proto) or ((not language_from) and language_to.proto) then cite_term = "*" .. cite_term end | ||
if not args["nocat"] then | if not args["nocat"] then | ||
if args["noaff"][i] then | if args["noaff"][i] or i_args["noaff"] then | ||
-- | -- this is a marked non-affix, don't let it be classified as one! | ||
elseif language_from then | elseif language_from then | ||
table.insert(categories, language_to.name .. " terms derived from " .. language_from.name) | |||
if is_infix(i_term) or is_prefix(i_term) or is_suffix(i_term) or is_circumfix(i_term) then n_affixes = n_affixes + 1 end | |||
elseif is_infix( | elseif is_infix(i_term) then | ||
table.insert(categories, language_to.name .. " terms infixed with " .. cite_term) | table.insert(categories, language_to.name .. " terms infixed with " .. cite_term) | ||
n_affixes = n_affixes + 1 | n_affixes = n_affixes + 1 | ||
elseif is_prefix( | elseif is_prefix(i_term) then | ||
table.insert(categories, language_to.name .. " terms prefixed with " .. cite_term) | table.insert(categories, language_to.name .. " terms prefixed with " .. cite_term) | ||
n_affixes = n_affixes + 1 | n_affixes = n_affixes + 1 | ||
elseif is_suffix( | elseif is_suffix(i_term) then | ||
table.insert(categories, language_to.name .. " terms suffixed with " .. cite_term) | table.insert(categories, language_to.name .. " terms suffixed with " .. cite_term) | ||
n_affixes = n_affixes + 1 | n_affixes = n_affixes + 1 | ||
elseif is_circumfix(i_term) then | |||
table.insert(categories, language_to.name .. " terms circumfixed with " .. cite_term) | |||
n_affixes = n_affixes + 1 | |||
end | end | ||
end | end | ||
table.insert(pre_out, m_links.full_link({ | table.insert(pre_out, m_links.full_link({ | ||
term = i_term, | |||
language = language_from or language_to, | |||
alt = args["alt"][i] or i_args["alt"] or i_alt, | |||
gloss = args["t"][i] or i_args["t"], | |||
pos = args["pos"][i] or i_args["pos"], | |||
anchor = args["anchor"][i] or i_args["anchor"] or i_args["a"], | |||
nolink = args["nolink"][i] or i_args["nolink"], | |||
hypo = args["hypo"][i] or i_args["hypo"], | |||
showlanguage = (language_from and true), | |||
nobold = true, | |||
}, "term")) | |||
end | end | ||
if (not args["nocat"]) and (n_parts > 1 and n_affixes == 0) then | if (not args["nocat"]) and (n_parts > 1 and n_affixes == 0) then | ||
table.insert(categories, language_to.name .. " compound terms") | table.insert(categories, language_to.name .. " compound terms") | ||
if args["type"] then | |||
local affix_type = affix_compound_data[affix_compound_aliases[args["type"]] or args["type"]] | |||
if affix_type then | |||
for _, category in ipairs(affix_type["categories"]) do | |||
table.insert(categories, language_to.name .. " " .. category) | |||
end | |||
end | |||
end | |||
end | |||
local out = table.concat(pre_out, " + ") | |||
if not args["notext"] then | |||
out = format_prefixed_text(template, data["text"], args["nocap"], data["preposition"]) .. out | |||
end | end | ||
if not args["nocat"] then mw.addWarning(root_reminder(args[1])) end | |||
return format_etymology(out, categories) | |||
end | end | ||
function export.show(frame) | function export.show(frame) | ||
local template = frame.args[1] | local template = frame.args[1] | ||
if no_term_data[template] then return | if no_term_data[template] then return no_term_etymology(template, frame) end | ||
if single_term_data[template] then return | if single_term_data[template] then return single_term_etymology(template, frame) end | ||
if derived_term_data[template] then return | if multi_term_data[template] then return multi_term_etymology(template, frame) end | ||
if template | if derived_term_data[template] then return derived_term_etymology(template, frame) end | ||
if affix_data[template] then return affix_etymology(template, frame) end | |||
error("No such sub-template type is defined!") | error("No such sub-template type is defined!") | ||
end | end | ||
return export | return export |
Latest revision as of 16:20, 1 August 2024
The etymology module provides functionality for various etymology templates:
{{abbreviation}}
,{{abbr}}
{{affix}}
,{{aff}}
{{back-formation}}
,{{back-form}}
,{{bf}}
{{blend}}
{{borrowed}}
,{{borrow}}
,{{bor}}
{{calque}}
,{{clq}}
,{{cal}}
{{clipping}}
{{contraction}}
,{{contr}}
{{deadjectival}}
,{{deadj}}
{{denominal}}
,{{den}}
{{derived}}
,{{derive}}
,{{der}}
{{deverbal}}
,{{dev}}
{{doublet}}
,{{dbt}}
{{ellipsis}}
{{grammaticalisation}}
,{{gram}}
{{inherited}}
,{{inherit}}
,{{inh}}
{{initialism}}
,{{init}}
{{learned borrowing}}
,{{lbor}}
{{onomatopoeic}}
,{{onom}}
{{orthographic borrowing}}
,{{obor}}
{{phono-semantic matching}}
,{{psm}}
{{rebracketing}}
,{{rbr}}
{{reduplication}}
,{{rdp}}
{{semantic loan}}
,{{sl}}
{{transliteration}}
,{{translit}}
{{uncertain}}
,{{unc}}
{{univerbation}}
,{{univ}}
{{unknown}}
,{{unk}}
local export = {}
local m_inline = require("Module:inline")
local m_languages = require("Module:languages")
local m_links = require("Module:links")
local m_parameters = require("Module:parameters")
local plaenk_alt = mw.loadData("Module:lnk-pro-morph/data").alt
local function root_reminder(code) return "Have you forgotten to use <code><nowiki>{{root|" .. code .. "|lnk-pro}}</nowiki></code>? Remember to use it even in derived terms and compounds." end
local new_pos_data = {
["adjective"] = {
categories = {"adjectivisations"},
},
["noun"] = {
categories = {"nominalisations"},
},
["verb"] = {
categories = {"verbalisations"},
},
}
local new_pos_aliases = {
["adj"] = "adjective",
["n"] = "noun",
["v"] = "verb",
}
local no_term_params = {
[1] = {required = true},
["nocap"] = {type = "boolean"},
["nc"] = {alias_of = "nocap"},
["notext"] = {type = "boolean"},
["nocat"] = {type = "boolean"},
}
local no_term_data = {
["onomatopoeic"] = {
categories = {"onomatopoeias"},
text = true,
},
["unknown"] = {
categories = {"terms with unknown etymologies"},
text = function(nocap) return (nocap and "u" or "U") .. "nknown" end,
},
["uncertain"] = {
categories = {"terms with uncertain etymologies"},
text = function(nocap) return (nocap and "u" or "U") .. "ncertain" end,
},
}
local single_term_params = {
[1] = {required = true},
[2] = {required = true},
[3] = {alias_of = "alt"},
[4] = {alias_of = "t"},
["alt"] = {},
["t"] = {},
["anchor"] = {},
["a"] = {alias_of = "anchor"},
["pos"] = {},
["newpos"] = {},
["nocap"] = {type = "boolean"},
["nc"] = {alias_of = "nocap"},
["notext"] = {type = "boolean"},
["nocat"] = {type = "boolean"},
}
local single_term_data = {
["abbreviation"] = {
text = true,
categories = {"abbreviations"}
},
["back-formation"] = {
text = true,
preposition = "from",
categories = {"back-formations"}
},
["clipping"] = {
text = true,
categories = {"clippings"},
},
["contraction"] = {
text = true,
categories = {"contractions"},
},
["deadjectival"] = {
text = true,
preposition = "from",
categories = {"deadjectivals"},
},
["denominal"] = {
text = true,
preposition = "from",
categories = {"denominals"},
},
["deverbal"] = {
text = true,
preposition = "from",
categories = {"deverbals"},
},
["doublet"] = {
text = true,
categories = {"doublets"},
},
["ellipsis"] = {
text = true,
categories = {"ellipses"},
},
["grammaticalisation"] = {
text = true,
categories = {"grammaticalisations"},
},
["initialism"] = {
text = true,
categories = {"initialisms"},
},
["rebracketing"] = {
text = true,
categories = {"rebracketings"},
},
["reduplication"] = {
text = true,
categories = {"reduplications"},
},
["univerbation"] = {
text = true,
categories = {"univerbations"},
},
}
local multi_term_params = {
[1] = {required = true},
[2] = {list = true},
["alt"] = {list = true, allow_holes = true},
["anchor"] = {list = true, allow_holes = true},
["a"] = {alias_of = "anchor"},
["t"] = {list = true, allow_holes = true},
["pos"] = {list = true, allow_holes = true},
["nocap"] = {type = "boolean"},
["nc"] = {alias_of = "nocap"},
["notext"] = {type = "boolean"},
["nocat"] = {type = "boolean"},
}
local multi_term_data = {
["blend"] = {
text = true,
categories = {"blends"},
},
}
local derived_term_params = {
[1] = {required = true},
[2] = {required = true},
[3] = {required = true},
[4] = {alias_of = "alt"},
[5] = {alias_of = "t"},
["alt"] = {},
["anchor"] = {},
["a"] = {alias_of = "anchor"},
["t"] = {},
["pos"] = {},
["nocap"] = {type = "boolean"},
["nc"] = {alias_of = "nocap"},
["notext"] = {type = "boolean"},
["nocat"] = {type = "boolean"},
}
local derived_term_data = {
["borrowed"] = {
categories = {"terms borrowed from"},
},
["calque"] = {
text = true,
categories = {"terms calqued from"},
},
["derived"] = {
categories = {"terms derived from"},
},
["inherited"] = {
categories = {"terms inherited from"},
},
["learned borrowing"] = {
text = true,
categories = {"terms borrowed from", "learned borrowings from"},
},
["orthographic borrowing"] = {
text = true,
categories = {"terms borrowed from", "orthographic borrowings from"},
},
["semantic loan"] = {
text = true,
categories = {"terms derived from", "semantic loans from"},
},
["phono-semantic matching"] = {
text = true,
categories = {"terms derived from", "phono-semantic matchings from"},
},
["transliteration"] = {
text = true,
categories = {"terms derived from", "transliterations of"},
},
}
local affix_params = {
[1] = {required = true},
[2] = {list = true},
["type"] = {},
["t"] = {list = true, allow_holes = true},
["l"] = {list = true, allow_holes = true},
["alt"] = {list = true, allow_holes = true},
["anchor"] = {list = true, allow_holes = true},
["a"] = {alias_of = "anchor"},
["pos"] = {list = true, allow_holes = true},
["noaff"] = {list = true, allow_holes = true, type = "boolean"},
["root"] = {alias_of = "noaff"},
["nolink"] = {list = true, allow_holes = true, type = "boolean"},
["hypo"] = {list = true, allow_holes = true, type = "boolean"},
["hypothetical"] = {alias_of = "hypo"},
["notext"] = {type = "boolean"},
["nocap"] = {type = "boolean"},
["nc"] = {alias_of = "nocap"},
["nocat"] = {type = "boolean"},
}
local affix_data = {
["affix"] = {},
["surface analysis"] = {
text = function(nocap) return (nocap and "b" or "B") .. "y [[Appendix:Glossary#surface analysis|surface analysis]], " end
}
}
local affix_delimiter = {
["-"] = true,
["·"] = true,
}
local affix_delimiter_pattern = "[%-·]"
local affix_compound_data = {
["alliterative"] = {
categories = {"alliterative compounds"},
},
["antonymous"] = {
categories = {"antonymous compounds"},
},
["bahuvrihi"] = {
categories = {"bahuvrihi compounds"},
},
["coordinative"] = {
categories = {"coordinative compounds"},
},
["descriptive"] = {
categories = {"descriptive compounds"},
},
["determinative"] = {
categories = {"determinative compounds"},
},
["dvandva"] = {
categories = {"dvandva compounds"},
},
["endocentric"] = {
categories = {"endocentric compounds"},
},
["exocentric"] = {
categories = {"exocentric compounds"},
},
["karmadharaya"] = {
categories = {"karmadharaya compounds"},
},
["rhyming"] = {
categories = {"rhyming compounds"},
},
["synonymous"] = {
categories = {"synonymous compounds"},
},
["tatpurusa"] = {
categories = {"tatpurusa compounds"},
},
}
local affix_compound_aliases = {
allit = "alliterative",
ant = "antonymous",
bahu = "bahuvrihi",
bv = "bahuvrihi",
coord = "coordinative",
desc = "descriptive",
det = "determinative",
dva = "dvandva",
endo = "endocentric",
exo = "exocentric",
karma = "karmadharaya",
kd = "karmadharaya",
rhy = "rhyming",
syn = "synonymous",
tat = "tatpurusa",
tp = "tatpurusa",
}
local function is_circumfix(word)
return mw.ustring.find(word, affix_delimiter_pattern .. " " .. affix_delimiter_pattern) ~= nil
end
local function is_infix(word)
return affix_delimiter[mw.ustring.sub(word, 1, 1)] and affix_delimiter[mw.ustring.sub(word, -1)]
end
local function is_prefix(word)
return affix_delimiter[mw.ustring.sub(word, -1)]
end
local function is_suffix(word)
return affix_delimiter[mw.ustring.sub(word, 1, 1)]
end
local function format_etymology(out, categories)
for _, category in ipairs(categories) do
out = out .. "[[Category:" .. category .. "]]"
end
return out
end
local function format_solo_text(label, text_data, nocap_arg)
if not text_data then return "" end
if type(text_data) == "function" then return text_data(nocap_arg) end
return "[[Appendix:Glossary#" .. label .. "|" .. (nocap_arg and label or mw.ustring.gsub(label, "^%l", string.upper)) .. "]]"
end
local function format_prefixed_text(label, text_data, nocap_arg, preposition)
if not text_data then return "" end
if type(text_data) == "function" then return text_data(nocap_arg) end
return "[[Appendix:Glossary#" .. label .. "|" .. (nocap_arg and label or mw.ustring.gsub(label, "^%l", string.upper)) .. "]] " .. (preposition or "of") .. " "
end
local function hydrate_category(category, language_to, language_from, newpos)
local new_category = language_to.name .. " " .. category
if language_from then new_category = new_category .. " " .. language_from.name end
return new_category
end
local function hydrate_categories(categories, language_to, language_from, newpos)
local new_categories = {}
for i, category in ipairs(categories) do
new_categories[i] = language_to.name .. " " .. category .. (language_from and (" " .. language_from.name) or "")
end
if newpos then
newpos = new_pos_aliases[newpos] or newpos
local newpos_data = new_pos_data[newpos]
for _, category in ipairs(newpos_data["categories"]) do table.insert(new_categories, language_to.name .. " " .. category) end
end
return new_categories
end
local function no_term_etymology(template, frame)
local data, args = no_term_data[template], m_parameters.process(frame:getParent().args, no_term_params)
local out, categories = "", {}
local language = m_languages.get_by_code(args[1])
if not args["nocat"] then categories = hydrate_categories(data["categories"], language) end
if not args["notext"] then out = format_solo_text(template, data["text"], args["nocap"]) end
return format_etymology(out, categories)
end
local function single_term_etymology(template, frame)
local data, args = single_term_data[template], m_parameters.process(frame:getParent().args, single_term_params)
local out, categories = "", {}
local language = m_languages.get_by_code(args[1])
if not args["notext"] then out = out .. format_prefixed_text(template, data["text"], args["nocap"], data["preposition"]) end
out = out .. m_links.full_link({
term = args[2],
language = language,
alt = args["alt"],
gloss = args["t"],
pos = args["pos"],
anchor = args["anchor"],
nobold = true,
}, "term")
if not args["nocat"] then
categories = hydrate_categories(data["categories"], language, nil, args["newpos"])
mw.addWarning(root_reminder(args[1]))
end
return format_etymology(out, categories)
end
local function multi_term_etymology(template, frame)
local data, args = multi_term_data[template], m_parameters.process(frame:getParent().args, multi_term_params)
local out, categories = "", {}
local language = m_languages.get_by_code(args[1])
if not args["notext"] then out = out .. format_prefixed_text(template, data["text"], args["nocap"]) end
for i, word in ipairs(args[2]) do
local i_term, i_args = m_inline.parse(word)
if i > 1 then out = out .. " + " end
out = out .. m_links.full_link({
term = i_term,
language = language,
alt = args["alt"][i] or i_args["alt"],
gloss = args["t"][i] or i_args["t"],
pos = args["pos"][i] or i_args["pos"],
anchor = args["anchor"][i] or i_args["anchor"] or i_args["a"],
}, "term")
end
if not args["nocat"] then
categories = hydrate_categories(data["categories"], language, nil, args["preposition"])
mw.addWarning(root_reminder(args[1]))
end
return format_etymology(out, categories)
end
local function derived_term_etymology(template, frame)
local data, args = derived_term_data[template], m_parameters.process(frame:getParent().args, derived_term_params)
local out, categories = "", {}
local language_to = m_languages.get_by_code(args[1])
local language_from = m_languages.get_by_code(args[2])
local term = args[3]
local alt
if args[2] == "lnk-pro" and plaenk_alt[term] then
alt = term
term = plaenk_alt[term]
end
if (not args["notext"]) and (not data["silent"]) then out = out .. format_prefixed_text(template, data["text"], args["nocap"], data["preposition"]) end
out = out .. m_links.full_link({
term = term,
language = language_from,
alt = args["alt"] or alt,
gloss = args["t"],
pos = args["pos"],
anchor = args["anchor"],
showlanguage = true,
nobold = true,
}, "term")
if not args["nocat"] then
categories = hydrate_categories(data["categories"], language_to, language_from)
mw.addWarning(root_reminder(args[1]))
end
return format_etymology(out, categories)
end
local function affix_etymology(template, frame)
local data, args = affix_data[template], m_parameters.process(frame:getParent().args, affix_params)
local pre_out, categories = {}, {}
local language_to = m_languages.get_by_code(args[1])
local n_parts, n_affixes = 0, 0
for i, term in ipairs(args[2]) do
local i_term, i_args = m_inline.parse(term)
local i_alt
if args[1] == "lnk-pro" and plaenk_alt[i_term] then
i_alt = i_term
i_term = plaenk_alt[i_term]
end
n_parts = n_parts + 1
local language_from = nil
if args["l"][i] or i_args["l"] then language_from = m_languages.get_by_code(args["l"][i] or i_args["l"]) end
local cite_term = i_term
if (language_from and language_from.proto) or ((not language_from) and language_to.proto) then cite_term = "*" .. cite_term end
if not args["nocat"] then
if args["noaff"][i] or i_args["noaff"] then
-- this is a marked non-affix, don't let it be classified as one!
elseif language_from then
table.insert(categories, language_to.name .. " terms derived from " .. language_from.name)
if is_infix(i_term) or is_prefix(i_term) or is_suffix(i_term) or is_circumfix(i_term) then n_affixes = n_affixes + 1 end
elseif is_infix(i_term) then
table.insert(categories, language_to.name .. " terms infixed with " .. cite_term)
n_affixes = n_affixes + 1
elseif is_prefix(i_term) then
table.insert(categories, language_to.name .. " terms prefixed with " .. cite_term)
n_affixes = n_affixes + 1
elseif is_suffix(i_term) then
table.insert(categories, language_to.name .. " terms suffixed with " .. cite_term)
n_affixes = n_affixes + 1
elseif is_circumfix(i_term) then
table.insert(categories, language_to.name .. " terms circumfixed with " .. cite_term)
n_affixes = n_affixes + 1
end
end
table.insert(pre_out, m_links.full_link({
term = i_term,
language = language_from or language_to,
alt = args["alt"][i] or i_args["alt"] or i_alt,
gloss = args["t"][i] or i_args["t"],
pos = args["pos"][i] or i_args["pos"],
anchor = args["anchor"][i] or i_args["anchor"] or i_args["a"],
nolink = args["nolink"][i] or i_args["nolink"],
hypo = args["hypo"][i] or i_args["hypo"],
showlanguage = (language_from and true),
nobold = true,
}, "term"))
end
if (not args["nocat"]) and (n_parts > 1 and n_affixes == 0) then
table.insert(categories, language_to.name .. " compound terms")
if args["type"] then
local affix_type = affix_compound_data[affix_compound_aliases[args["type"]] or args["type"]]
if affix_type then
for _, category in ipairs(affix_type["categories"]) do
table.insert(categories, language_to.name .. " " .. category)
end
end
end
end
local out = table.concat(pre_out, " + ")
if not args["notext"] then
out = format_prefixed_text(template, data["text"], args["nocap"], data["preposition"]) .. out
end
if not args["nocat"] then mw.addWarning(root_reminder(args[1])) end
return format_etymology(out, categories)
end
function export.show(frame)
local template = frame.args[1]
if no_term_data[template] then return no_term_etymology(template, frame) end
if single_term_data[template] then return single_term_etymology(template, frame) end
if multi_term_data[template] then return multi_term_etymology(template, frame) end
if derived_term_data[template] then return derived_term_etymology(template, frame) end
if affix_data[template] then return affix_etymology(template, frame) end
error("No such sub-template type is defined!")
end
return export