Module:etymology: Difference between revisions

From Laenkea
Jump to navigation Jump to search
No edit summary
No edit summary
 
(45 intermediate revisions by 3 users not shown)
Line 1: Line 1:
local export = {}
local export = {}


local m_inline = require("Module:inline")
local m_languages = require("Module:languages")
local m_languages = require("Module:languages")
local m_links = require("Module:links")
local m_links = require("Module:links")
local m_parameters = require("Module:parameters")
local m_parameters = require("Module:parameters")
local plaenk_alt = mw.loadData("Module:lnk-pro-morph/data").alt
local function root_reminder(code) return "Have you forgotten to use <code><nowiki>{{root|" .. code .. "|lnk-pro}}</nowiki></code>? Remember to use it even in derived terms and compounds." end
local new_pos_data = {
["adjective"] = {
categories = {"adjectivisations"},
},
["noun"] = {
categories = {"nominalisations"},
},
["verb"] = {
categories = {"verbalisations"},
},
}
local new_pos_aliases = {
["adj"] = "adjective",
["n"] = "noun",
["v"] = "verb",
}


local no_term_params = {
local no_term_params = {
     [1] = {required = true},
     [1] = {required = true},
     ["nocap"] = {type = "boolean"},
     ["nocap"] = {type = "boolean"},
    ["nc"] = {alias_of = "nocap"},
     ["notext"] = {type = "boolean"},
     ["notext"] = {type = "boolean"},
     ["nocat"] = {type = "boolean"},
     ["nocat"] = {type = "boolean"},
Line 14: Line 36:
     ["onomatopoeic"] = {
     ["onomatopoeic"] = {
         categories = {"onomatopoeias"},
         categories = {"onomatopoeias"},
         glossary = true,
         text = true,
     },
     },
     ["unknown"] = {
     ["unknown"] = {
         categories = {"terms with unknown etymologies"},
         categories = {"terms with unknown etymologies"},
        text = function(nocap) return (nocap and "u" or "U") .. "nknown" end,
    },
    ["uncertain"] = {
        categories = {"terms with uncertain etymologies"},
        text = function(nocap) return (nocap and "u" or "U") .. "ncertain" end,
     },
     },
}
}


local single_term_params = {
local single_term_params = {
[1] = {required = true},
    [1] = {required = true},
[2] = {required = true},
    [2] = {required = true},
[3] = {alias_of = "alt"},
    [3] = {alias_of = "alt"},
[4] = {alias_of = "t"},
    [4] = {alias_of = "t"},
["alt"] = {},
    ["alt"] = {},
["t"] = {},
    ["t"] = {},
["pos"] = {},
    ["anchor"] = {},
["nocap"] = {type = "boolean"},
    ["a"] = {alias_of = "anchor"},
["notext"] = {type = "boolean"},
    ["pos"] = {},
["nocat"] = {type = "boolean"},
    ["newpos"] = {},
    ["nocap"] = {type = "boolean"},
    ["nc"] = {alias_of = "nocap"},
    ["notext"] = {type = "boolean"},
    ["nocat"] = {type = "boolean"},
}
}
local single_term_data = {
local single_term_data = {
     ["abbreviation"] = {
     ["abbreviation"] = {
         glossary = true,
         text = true,
         categories = {"abbreviations"}
         categories = {"abbreviations"}
     },
     },
     ["back-formation"] = {
     ["back-formation"] = {
         glossary = true,
         text = true,
        preposition = "from",
         categories = {"back-formations"}
         categories = {"back-formations"}
     },
     },
     ["clipping"] = {
     ["clipping"] = {
         glossary = true,
         text = true,
         categories = {"clippings"},
         categories = {"clippings"},
    },
    ["contraction"] = {
    text = true,
    categories = {"contractions"},
    },
    ["deadjectival"] = {
    text = true,
    preposition = "from",
    categories = {"deadjectivals"},
    },
    ["denominal"] = {
text = true,
preposition = "from",
categories = {"denominals"},
    },
    ["deverbal"] = {
    text = true,
    preposition = "from",
    categories = {"deverbals"},
     },
     },
     ["doublet"] = {
     ["doublet"] = {
    glossary = true,
        text = true,
    categories = {"doublets"},
        categories = {"doublets"},
     },
     },
     ["ellipsis"] = {
     ["ellipsis"] = {
         glossary = true,
         text = true,
         categories = {"ellipses"},
         categories = {"ellipses"},
    },
    ["grammaticalisation"] = {
        text = true,
        categories = {"grammaticalisations"},
     },
     },
     ["initialism"] = {
     ["initialism"] = {
         glossary = true,
         text = true,
         categories = {"initialisms"},
         categories = {"initialisms"},
     },
     },
     ["rebracketing"] = {
     ["rebracketing"] = {
         glossary = true,
         text = true,
         categories = {"rebracketings"},
         categories = {"rebracketings"},
     },
     },
     ["reduplication"] = {
     ["reduplication"] = {
         glossary = true,
         text = true,
         categories = {"reduplications"},
         categories = {"reduplications"},
     },
     },
    ["univerbation"] = {
        text = true,
        categories = {"univerbations"},
    },
}
local multi_term_params = {
[1] = {required = true},
[2] = {list = true},
["alt"] = {list = true, allow_holes = true},
["anchor"] = {list = true, allow_holes = true},
    ["a"] = {alias_of = "anchor"},
["t"] = {list = true, allow_holes = true},
["pos"] = {list = true, allow_holes = true},
["nocap"] = {type = "boolean"},
["nc"] = {alias_of = "nocap"},
["notext"] = {type = "boolean"},
["nocat"] = {type = "boolean"},
}
local multi_term_data = {
["blend"] = {
text = true,
categories = {"blends"},
},
}
}


Line 75: Line 154:
     [5] = {alias_of = "t"},
     [5] = {alias_of = "t"},
     ["alt"] = {},
     ["alt"] = {},
    ["anchor"] = {},
    ["a"] = {alias_of = "anchor"},
     ["t"] = {},
     ["t"] = {},
     ["pos"] = {},
     ["pos"] = {},
     ["nocap"] = {type = "boolean"},
     ["nocap"] = {type = "boolean"},
    ["nc"] = {alias_of = "nocap"},
     ["notext"] = {type = "boolean"},
     ["notext"] = {type = "boolean"},
     ["nocat"] = {type = "boolean"},
     ["nocat"] = {type = "boolean"},
Line 83: Line 165:
local derived_term_data = {
local derived_term_data = {
     ["borrowed"] = {
     ["borrowed"] = {
        silent = true,
         categories = {"terms borrowed from"},
         categories = {"terms borrowed from"},
     },
     },
     ["calque"] = {
     ["calque"] = {
         glossary = "calque",
         text = true,
         categories = {"terms calqued from"},
         categories = {"terms calqued from"},
     },
     },
     ["derived"] = {
     ["derived"] = {
        silent = true,
         categories = {"terms derived from"},
         categories = {"terms derived from"},
     },
     },
     ["inherited"] = {
     ["inherited"] = {
    silent = true,
        categories = {"terms inherited from"},
    categories = {"terms inherited from"},
     },
     },
     ["learned borrowing"] = {
     ["learned borrowing"] = {
         glossary = true,
         text = true,
         categories = {"terms borrowed from", "learned borrowings from"},
         categories = {"terms borrowed from", "learned borrowings from"},
     },
     },
     ["orthographic borrowing"] = {
     ["orthographic borrowing"] = {
         glossary = true,
         text = true,
         categories = {"terms borrowed from", "orthographic borrowings from"},
         categories = {"terms borrowed from", "orthographic borrowings from"},
     },
     },
     ["semantic loan"] = {
     ["semantic loan"] = {
         glossary = true,
         text = true,
         categories = {"terms derived from", "semantic loans from"},
         categories = {"terms derived from", "semantic loans from"},
     },
     },
     ["phono-semantic matching"] = {
     ["phono-semantic matching"] = {
         glossary = true,
         text = true,
         categories = {"terms derived from", "phono-semantic matchings from"},
         categories = {"terms derived from", "phono-semantic matchings from"},
     },
     },
     ["transliteration"] = {
     ["transliteration"] = {
         glossary = true,
         text = true,
         categories = {"terms derived from", "transliterations of"},
         categories = {"terms derived from", "transliterations of"},
     },
     },
Line 123: Line 202:
     [1] = {required = true},
     [1] = {required = true},
     [2] = {list = true},
     [2] = {list = true},
    ["type"] = {},
     ["t"] = {list = true, allow_holes = true},
     ["t"] = {list = true, allow_holes = true},
     ["l"] = {list = true, allow_holes = true},
     ["l"] = {list = true, allow_holes = true},
     ["alt"] = {list = true, allow_holes = true},
     ["alt"] = {list = true, allow_holes = true},
    ["anchor"] = {list = true, allow_holes = true},
    ["a"] = {alias_of = "anchor"},
     ["pos"] = {list = true, allow_holes = true},
     ["pos"] = {list = true, allow_holes = true},
     ["noaff"] = {list = true, allow_holes = true, type = "boolean"},
     ["noaff"] = {list = true, allow_holes = true, type = "boolean"},
    ["root"] = {alias_of = "noaff"},
    ["nolink"] = {list = true, allow_holes = true, type = "boolean"},
    ["hypo"] = {list = true, allow_holes = true, type = "boolean"},
    ["hypothetical"] = {alias_of = "hypo"},
    ["notext"] = {type = "boolean"},
    ["nocap"] = {type = "boolean"},
    ["nc"] = {alias_of = "nocap"},
     ["nocat"] = {type = "boolean"},
     ["nocat"] = {type = "boolean"},
}
local affix_data = {
    ["affix"] = {},
    ["surface analysis"] = {
        text = function(nocap) return (nocap and "b" or "B") .. "y [[Appendix:Glossary#surface analysis|surface analysis]], " end
    }
}
}
local affix_delimiter = {
local affix_delimiter = {
     ["-"] = true,
     ["-"] = true,
     ["·"] = true,
     ["·"] = true,
}
local affix_delimiter_pattern = "[%-·]"
local affix_compound_data = {
["alliterative"] = {
categories = {"alliterative compounds"},
},
["antonymous"] = {
categories = {"antonymous compounds"},
},
["bahuvrihi"] = {
categories = {"bahuvrihi compounds"},
},
["coordinative"] = {
categories = {"coordinative compounds"},
},
["descriptive"] = {
categories = {"descriptive compounds"},
},
["determinative"] = {
categories = {"determinative compounds"},
},
["dvandva"] = {
categories = {"dvandva compounds"},
},
["endocentric"] = {
categories = {"endocentric compounds"},
},
["exocentric"] = {
categories = {"exocentric compounds"},
},
["karmadharaya"] = {
categories = {"karmadharaya compounds"},
},
["rhyming"] = {
categories = {"rhyming compounds"},
},
["synonymous"] = {
categories = {"synonymous compounds"},
},
["tatpurusa"] = {
categories = {"tatpurusa compounds"},
},
}
local affix_compound_aliases = {
allit = "alliterative",
ant = "antonymous",
bahu = "bahuvrihi",
bv = "bahuvrihi",
coord = "coordinative",
desc = "descriptive",
det = "determinative",
dva = "dvandva",
endo = "endocentric",
exo = "exocentric",
karma = "karmadharaya",
kd = "karmadharaya",
rhy = "rhyming",
syn = "synonymous",
tat = "tatpurusa",
tp = "tatpurusa",
}
}
local function is_circumfix(word)
return mw.ustring.find(word, affix_delimiter_pattern .. " " .. affix_delimiter_pattern) ~= nil
end


local function is_infix(word)
local function is_infix(word)
Line 147: Line 306:
end
end


function export.format_etymology(out, categories)
local function format_etymology(out, categories)
     for _, category in ipairs(categories) do
     for _, category in ipairs(categories) do
         out = out .. "[[Category:" .. category .. "]]"
         out = out .. "[[Category:" .. category .. "]]"
Line 154: Line 313:
end
end


function export.format_glossary(text, glossary_arg, nocap_arg)
local function format_solo_text(label, text_data, nocap_arg)
     if not glossary_arg then return nocap_arg and text or mw.ustring.gsub(text, "^%l", string.upper) end
     if not text_data then return "" end
     local glossary_link = type(glossary_arg) == "string" and glossary_arg or text
    if type(text_data) == "function" then return text_data(nocap_arg) end
     local glossary_display = nocap_arg and text or mw.ustring.gsub(text, "^%l", string.upper)
    return "[[Appendix:Glossary#" .. label .. "|" .. (nocap_arg and label or mw.ustring.gsub(label, "^%l", string.upper)) .. "]]"
    return "[[Appendix:Glossary#" .. glossary_link .. "|" .. glossary_display .. "]]"
end
 
local function format_prefixed_text(label, text_data, nocap_arg, preposition)
    if not text_data then return "" end
     if type(text_data) == "function" then return text_data(nocap_arg) end
     return "[[Appendix:Glossary#" .. label .. "|" .. (nocap_arg and label or mw.ustring.gsub(label, "^%l", string.upper)) .. "]] " .. (preposition or "of") .. " "
end
end


function export.hydrate_category(category, language_to, language_from)
local function hydrate_category(category, language_to, language_from, newpos)
     local new_category = language_to.name .. " " .. category
     local new_category = language_to.name .. " " .. category
     if language_from then new_category = new_category .. " " .. language_from.name end
     if language_from then new_category = new_category .. " " .. language_from.name end
Line 167: Line 331:
end
end


function export.hydrate_categories(categories, language_to, language_from)
local function hydrate_categories(categories, language_to, language_from, newpos)
     local new_categories = {}
     local new_categories = {}
     for i, category in ipairs(categories) do
     for i, category in ipairs(categories) do
         new_categories[i] = export.hydrate_category(category, language_to, language_from)
         new_categories[i] = language_to.name .. " " .. category .. (language_from and (" " .. language_from.name) or "")
    end
    if newpos then
    newpos = new_pos_aliases[newpos] or newpos
    local newpos_data = new_pos_data[newpos]
    for _, category in ipairs(newpos_data["categories"]) do table.insert(new_categories, language_to.name .. " " .. category) end
     end
     end
     return new_categories
     return new_categories
end
end


function export.no_term_etymology(template, frame)
local function no_term_etymology(template, frame)
     local data, args = no_term_data[template], m_parameters.process(frame:getParent().args, no_term_params)
     local data, args = no_term_data[template], m_parameters.process(frame:getParent().args, no_term_params)
     local out, categories = "", {}
     local out, categories = "", {}
     local language = m_languages.get_by_code(args[1])
     local language = m_languages.get_by_code(args[1])
     if not args["nocat"] then categories = export.hydrate_categories(data["categories"], language) end
     if not args["nocat"] then categories = hydrate_categories(data["categories"], language) end
     if not args["notext"] then out = export.format_glossary(template, data["glossary"], args["nocap"]) end
     if not args["notext"] then out = format_solo_text(template, data["text"], args["nocap"]) end
     return export.format_etymology(out, categories)
     return format_etymology(out, categories)
end
end


function export.single_term_etymology(template, frame)
local function single_term_etymology(template, frame)
     local data, args = single_term_data[template], m_parameters.process(frame:getParent().args, single_term_params)
     local data, args = single_term_data[template], m_parameters.process(frame:getParent().args, single_term_params)
     local out, categories = "", {}
     local out, categories = "", {}
     local language = m_languages.get_by_code(args[1])
     local language = m_languages.get_by_code(args[1])
     if not args["notext"] then out = out .. export.format_glossary(template, data["glossary"], args["nocap"]) .. " of " end
     if not args["notext"] then out = out .. format_prefixed_text(template, data["text"], args["nocap"], data["preposition"]) end
     out = out .. m_links.full_link({
     out = out .. m_links.full_link({
         term = args[2],
         term = args[2],
Line 195: Line 364:
         gloss = args["t"],
         gloss = args["t"],
         pos = args["pos"],
         pos = args["pos"],
        anchor = args["anchor"],
         nobold = true,
         nobold = true,
     },"term")
     }, "term")
     if not args["nocat"] then categories = export.hydrate_categories(data["categories"], language) end
     if not args["nocat"] then
     return export.format_etymology(out, categories)
    categories = hydrate_categories(data["categories"], language, nil, args["newpos"])
    mw.addWarning(root_reminder(args[1]))
end
     return format_etymology(out, categories)
end
end


function export.derived_term_etymology(template, frame)
local function multi_term_etymology(template, frame)
local data, args = multi_term_data[template], m_parameters.process(frame:getParent().args, multi_term_params)
local out, categories = "", {}
local language = m_languages.get_by_code(args[1])
if not args["notext"] then out = out .. format_prefixed_text(template, data["text"], args["nocap"]) end
for i, word in ipairs(args[2]) do
local i_term, i_args = m_inline.parse(word)
if i > 1 then out = out .. " + " end
out = out .. m_links.full_link({
term = i_term,
language = language,
alt = args["alt"][i] or i_args["alt"],
gloss = args["t"][i] or i_args["t"],
pos = args["pos"][i] or i_args["pos"],
anchor = args["anchor"][i] or i_args["anchor"] or i_args["a"],
}, "term")
end
if not args["nocat"] then
categories = hydrate_categories(data["categories"], language, nil, args["preposition"])
mw.addWarning(root_reminder(args[1]))
end
return format_etymology(out, categories)
end
 
local function derived_term_etymology(template, frame)
     local data, args = derived_term_data[template], m_parameters.process(frame:getParent().args, derived_term_params)
     local data, args = derived_term_data[template], m_parameters.process(frame:getParent().args, derived_term_params)
     local out, categories = "", {}
     local out, categories = "", {}
     local language_to = m_languages.get_by_code(args[1])
     local language_to = m_languages.get_by_code(args[1])
     local language_from = m_languages.get_by_code(args[2])
     local language_from = m_languages.get_by_code(args[2])
     if (not args["notext"]) and (not data["silent"]) then out = out .. export.format_glossary(template, data["glossary"], args["nocap"]) .. " of " end
    local term = args[3]
    local alt
    if args[2] == "lnk-pro" and plaenk_alt[term] then
    alt = term
    term = plaenk_alt[term]
end
     if (not args["notext"]) and (not data["silent"]) then out = out .. format_prefixed_text(template, data["text"], args["nocap"], data["preposition"]) end
     out = out .. m_links.full_link({
     out = out .. m_links.full_link({
         term = args[3],
         term = term,
         language = language_from,
         language = language_from,
         alt = args["alt"],
         alt = args["alt"] or alt,
         gloss = args["t"],
         gloss = args["t"],
         pos = args["pos"],
         pos = args["pos"],
        anchor = args["anchor"],
         showlanguage = true,
         showlanguage = true,
         nobold = true,
         nobold = true,
     },"term")
     }, "term")
     if not args["nocat"] then categories = export.hydrate_categories(data["categories"], language_to, language_from) end
     if not args["nocat"] then
     return export.format_etymology(out, categories)
    categories = hydrate_categories(data["categories"], language_to, language_from)
    mw.addWarning(root_reminder(args[1]))
    end
     return format_etymology(out, categories)
end
end


function export.affix_etymology(frame)
local function affix_etymology(template, frame)
     local args = m_parameters.process(frame:getParent().args, affix_params)
     local data, args = affix_data[template], m_parameters.process(frame:getParent().args, affix_params)
     local pre_out, categories = {}, {}
     local pre_out, categories = {}, {}
     local language_to = m_languages.get_by_code(args[1])
     local language_to = m_languages.get_by_code(args[1])
     local n_parts, n_affixes = 0, 0
     local n_parts, n_affixes = 0, 0
     for i, term in ipairs(args[2]) do
     for i, term in ipairs(args[2]) do
        local i_term, i_args = m_inline.parse(term)
        local i_alt
        if args[1] == "lnk-pro" and plaenk_alt[i_term] then
        i_alt = i_term
        i_term = plaenk_alt[i_term]
    end
         n_parts = n_parts + 1
         n_parts = n_parts + 1
         local language_from = nil
         local language_from = nil
         if args["l"][i] then language_from = m_languages.get_by_code(args["l"][i]) end
         if args["l"][i] or i_args["l"] then language_from = m_languages.get_by_code(args["l"][i] or i_args["l"]) end
         local cite_term = term
         local cite_term = i_term
         if (language_from and language_from.proto) or ((not language_from) and language_to.proto) then cite_term = "*" .. cite_term end
         if (language_from and language_from.proto) or ((not language_from) and language_to.proto) then cite_term = "&#42;" .. cite_term end
         if not args["nocat"] then
         if not args["nocat"] then
             if args["noaff"][i] then
             if args["noaff"][i] or i_args["noaff"] then
                 --cool, this is a marked non-affix, don't let it be classified as one!
                 -- this is a marked non-affix, don't let it be classified as one!
             elseif language_from then
             elseif language_from then
            table.insert(categories, language_to.name .. " terms derived from " .. language_from.name)
                table.insert(categories, language_to.name .. " terms derived from " .. language_from.name)
            if is_infix(term) or is_prefix(term) or is_suffix(term) then n_affixes = n_affixes + 1 end
                if is_infix(i_term) or is_prefix(i_term) or is_suffix(i_term) or is_circumfix(i_term) then n_affixes = n_affixes + 1 end
             elseif is_infix(term) then
             elseif is_infix(i_term) then
                 table.insert(categories, language_to.name .. " terms infixed with " .. cite_term)
                 table.insert(categories, language_to.name .. " terms infixed with " .. cite_term)
                 n_affixes = n_affixes + 1
                 n_affixes = n_affixes + 1
             elseif is_prefix(term) then
             elseif is_prefix(i_term) then
                 table.insert(categories, language_to.name .. " terms prefixed with " .. cite_term)
                 table.insert(categories, language_to.name .. " terms prefixed with " .. cite_term)
                 n_affixes = n_affixes + 1
                 n_affixes = n_affixes + 1
             elseif is_suffix(term) then
             elseif is_suffix(i_term) then
                 table.insert(categories, language_to.name .. " terms suffixed with " .. cite_term)
                 table.insert(categories, language_to.name .. " terms suffixed with " .. cite_term)
                 n_affixes = n_affixes + 1
                 n_affixes = n_affixes + 1
            elseif is_circumfix(i_term) then
            table.insert(categories, language_to.name .. " terms circumfixed with " .. cite_term)
            n_affixes = n_affixes + 1
             end
             end
         end
         end
         table.insert(pre_out, m_links.full_link({
         table.insert(pre_out, m_links.full_link({
            term = term,
            term = i_term,
            language = language_from or language_to,
            language = language_from or language_to,
            alt = args["alt"][i],
            alt = args["alt"][i] or i_args["alt"] or i_alt,
            gloss = args["t"][i],
            gloss = args["t"][i] or i_args["t"],
            pos = args["pos"][i],
            pos = args["pos"][i] or i_args["pos"],
            showlanguage = (language_from and true),
            anchor = args["anchor"][i] or i_args["anchor"] or i_args["a"],
            nobold = true,
            nolink = args["nolink"][i] or i_args["nolink"],
        },"term"))
            hypo = args["hypo"][i] or i_args["hypo"],
            showlanguage = (language_from and true),
            nobold = true,
        }, "term"))
     end
     end
     if (not args["nocat"]) and (n_parts > 1 and n_affixes == 0) then
     if (not args["nocat"]) and (n_parts > 1 and n_affixes == 0) then
         table.insert(categories, language_to.name .. " compound terms")
         table.insert(categories, language_to.name .. " compound terms")
        if args["type"] then
        local affix_type = affix_compound_data[affix_compound_aliases[args["type"]] or args["type"]]
        if affix_type then
        for _, category in ipairs(affix_type["categories"]) do
        table.insert(categories, language_to.name .. " " .. category)
        end
        end
        end
    end
    local out = table.concat(pre_out, " + ")
    if not args["notext"] then
        out = format_prefixed_text(template, data["text"], args["nocap"], data["preposition"]) .. out
     end
     end
     return export.format_etymology(table.concat(pre_out, " + "), categories)
     if not args["nocat"] then mw.addWarning(root_reminder(args[1])) end
    return format_etymology(out, categories)
end
end


function export.show(frame)
function export.show(frame)
     local template = frame.args[1]
     local template = frame.args[1]
     if no_term_data[template] then return export.no_term_etymology(template, frame) end
     if no_term_data[template] then return no_term_etymology(template, frame) end
     if single_term_data[template] then return export.single_term_etymology(template, frame) end
     if single_term_data[template] then return single_term_etymology(template, frame) end
     if derived_term_data[template] then return export.derived_term_etymology(template, frame) end
    if multi_term_data[template] then return multi_term_etymology(template, frame) end
     if template == "affix" then return export.affix_etymology(frame) end
     if derived_term_data[template] then return derived_term_etymology(template, frame) end
     if affix_data[template] then return affix_etymology(template, frame) end
     error("No such sub-template type is defined!")
     error("No such sub-template type is defined!")
end
end


return export
return export

Latest revision as of 16:20, 1 August 2024

The etymology module provides functionality for various etymology templates:


local export = {}

local m_inline = require("Module:inline")
local m_languages = require("Module:languages")
local m_links = require("Module:links")
local m_parameters = require("Module:parameters")
local plaenk_alt = mw.loadData("Module:lnk-pro-morph/data").alt

local function root_reminder(code) return "Have you forgotten to use <code><nowiki>{{root|" .. code .. "|lnk-pro}}</nowiki></code>? Remember to use it even in derived terms and compounds." end

local new_pos_data = {
	["adjective"] = {
		categories = {"adjectivisations"},
	},
	["noun"] = {
		categories = {"nominalisations"},
	},
	["verb"] = {
		categories = {"verbalisations"},
	},
}
local new_pos_aliases = {
	["adj"] = "adjective",
	["n"] = "noun",
	["v"] = "verb",
}

local no_term_params = {
    [1] = {required = true},
    ["nocap"] = {type = "boolean"},
    ["nc"] = {alias_of = "nocap"},
    ["notext"] = {type = "boolean"},
    ["nocat"] = {type = "boolean"},
}
local no_term_data = {
    ["onomatopoeic"] = {
        categories = {"onomatopoeias"},
        text = true,
    },
    ["unknown"] = {
        categories = {"terms with unknown etymologies"},
        text = function(nocap) return (nocap and "u" or "U") .. "nknown" end,
    },
    ["uncertain"] = {
        categories = {"terms with uncertain etymologies"},
        text = function(nocap) return (nocap and "u" or "U") .. "ncertain" end,
    },
}

local single_term_params = {
    [1] = {required = true},
    [2] = {required = true},
    [3] = {alias_of = "alt"},
    [4] = {alias_of = "t"},
    ["alt"] = {},
    ["t"] = {},
    ["anchor"] = {},
    ["a"] = {alias_of = "anchor"},
    ["pos"] = {},
    ["newpos"] = {},
    ["nocap"] = {type = "boolean"},
    ["nc"] = {alias_of = "nocap"},
    ["notext"] = {type = "boolean"},
    ["nocat"] = {type = "boolean"},
}
local single_term_data = {
    ["abbreviation"] = {
        text = true,
        categories = {"abbreviations"}
    },
    ["back-formation"] = {
        text = true,
        preposition = "from",
        categories = {"back-formations"}
    },
    ["clipping"] = {
        text = true,
        categories = {"clippings"},
    },
    ["contraction"] = {
    	text = true,
    	categories = {"contractions"},
    },
    ["deadjectival"] = {
    	text = true,
    	preposition = "from",
    	categories = {"deadjectivals"},
    },
    ["denominal"] = {
		text = true,
		preposition = "from",
		categories = {"denominals"},
    },
    ["deverbal"] = {
    	text = true,
    	preposition = "from",
    	categories = {"deverbals"},
    },
    ["doublet"] = {
        text = true,
        categories = {"doublets"},
    },
    ["ellipsis"] = {
        text = true,
        categories = {"ellipses"},
    },
    ["grammaticalisation"] = {
        text = true,
        categories = {"grammaticalisations"},
    },
    ["initialism"] = {
        text = true,
        categories = {"initialisms"},
    },
    ["rebracketing"] = {
        text = true,
        categories = {"rebracketings"},
    },
    ["reduplication"] = {
        text = true,
        categories = {"reduplications"},
    },
    ["univerbation"] = {
        text = true,
        categories = {"univerbations"},
    },
}

local multi_term_params = {
	[1] = {required = true},
	[2] = {list = true},
	["alt"] = {list = true, allow_holes = true},
	["anchor"] = {list = true, allow_holes = true},
    ["a"] = {alias_of = "anchor"},
	["t"] = {list = true, allow_holes = true},
	["pos"] = {list = true, allow_holes = true},
	["nocap"] = {type = "boolean"},
	["nc"] = {alias_of = "nocap"},
	["notext"] = {type = "boolean"},
	["nocat"] = {type = "boolean"},
}
local multi_term_data = {
	["blend"] = {
		text = true,
		categories = {"blends"},
	},
}

local derived_term_params = {
    [1] = {required = true},
    [2] = {required = true},
    [3] = {required = true},
    [4] = {alias_of = "alt"},
    [5] = {alias_of = "t"},
    ["alt"] = {},
    ["anchor"] = {},
    ["a"] = {alias_of = "anchor"},
    ["t"] = {},
    ["pos"] = {},
    ["nocap"] = {type = "boolean"},
    ["nc"] = {alias_of = "nocap"},
    ["notext"] = {type = "boolean"},
    ["nocat"] = {type = "boolean"},
}
local derived_term_data = {
    ["borrowed"] = {
        categories = {"terms borrowed from"},
    },
    ["calque"] = {
        text = true,
        categories = {"terms calqued from"},
    },
    ["derived"] = {
        categories = {"terms derived from"},
    },
    ["inherited"] = {
        categories = {"terms inherited from"},
    },
    ["learned borrowing"] = {
        text = true,
        categories = {"terms borrowed from", "learned borrowings from"},
    },
    ["orthographic borrowing"] = {
        text = true,
        categories = {"terms borrowed from", "orthographic borrowings from"},
    },
    ["semantic loan"] = {
        text = true,
        categories = {"terms derived from", "semantic loans from"},
    },
    ["phono-semantic matching"] = {
        text = true,
        categories = {"terms derived from", "phono-semantic matchings from"},
    },
    ["transliteration"] = {
        text = true,
        categories = {"terms derived from", "transliterations of"},
    },
}

local affix_params = {
    [1] = {required = true},
    [2] = {list = true},
    ["type"] = {},
    ["t"] = {list = true, allow_holes = true},
    ["l"] = {list = true, allow_holes = true},
    ["alt"] = {list = true, allow_holes = true},
    ["anchor"] = {list = true, allow_holes = true},
    ["a"] = {alias_of = "anchor"},
    ["pos"] = {list = true, allow_holes = true},
    ["noaff"] = {list = true, allow_holes = true, type = "boolean"},
    ["root"] = {alias_of = "noaff"},
    ["nolink"] = {list = true, allow_holes = true, type = "boolean"},
    ["hypo"] = {list = true, allow_holes = true, type = "boolean"},
    ["hypothetical"] = {alias_of = "hypo"},
    ["notext"] = {type = "boolean"},
    ["nocap"] = {type = "boolean"},
    ["nc"] = {alias_of = "nocap"},
    ["nocat"] = {type = "boolean"},
}
local affix_data = {
    ["affix"] = {},
    ["surface analysis"] = {
        text = function(nocap) return (nocap and "b" or "B") .. "y [[Appendix:Glossary#surface analysis|surface analysis]], " end
    }
}
local affix_delimiter = {
    ["-"] = true,
    ["·"] = true,
}
local affix_delimiter_pattern = "[%-·]"
local affix_compound_data = {
	["alliterative"] = {
		categories = {"alliterative compounds"},
	},
	["antonymous"] = {
		categories = {"antonymous compounds"},
	},
	["bahuvrihi"] = {
		categories = {"bahuvrihi compounds"},
	},
	["coordinative"] = {
		categories = {"coordinative compounds"},
	},
	["descriptive"] = {
		categories = {"descriptive compounds"},
	},
	["determinative"] = {
		categories = {"determinative compounds"},
	},
	["dvandva"] = {
		categories = {"dvandva compounds"},
	},
	["endocentric"] = {
		categories = {"endocentric compounds"},
	},
	["exocentric"] = {
		categories = {"exocentric compounds"},
	},
	["karmadharaya"] = {
		categories = {"karmadharaya compounds"},
	},
	["rhyming"] = {
		categories = {"rhyming compounds"},
	},
	["synonymous"] = {
		categories = {"synonymous compounds"},
	},
	["tatpurusa"] = {
		categories = {"tatpurusa compounds"},
	},
}
local affix_compound_aliases = {
	allit = "alliterative",
	ant = "antonymous",
	bahu = "bahuvrihi",
	bv = "bahuvrihi",
	coord = "coordinative",
	desc = "descriptive",
	det = "determinative",
	dva = "dvandva",
	endo = "endocentric",
	exo = "exocentric",
	karma = "karmadharaya",
	kd = "karmadharaya",
	rhy = "rhyming",
	syn = "synonymous",
	tat = "tatpurusa",
	tp = "tatpurusa",
}

local function is_circumfix(word)
	return mw.ustring.find(word, affix_delimiter_pattern .. " " .. affix_delimiter_pattern) ~= nil
end

local function is_infix(word)
    return affix_delimiter[mw.ustring.sub(word, 1, 1)] and affix_delimiter[mw.ustring.sub(word, -1)]
end

local function is_prefix(word)
    return affix_delimiter[mw.ustring.sub(word, -1)]
end

local function is_suffix(word)
    return affix_delimiter[mw.ustring.sub(word, 1, 1)]
end

local function format_etymology(out, categories)
    for _, category in ipairs(categories) do
        out = out .. "[[Category:" .. category .. "]]"
    end
    return out
end

local function format_solo_text(label, text_data, nocap_arg)
    if not text_data then return "" end
    if type(text_data) == "function" then return text_data(nocap_arg) end
    return "[[Appendix:Glossary#" .. label .. "|" .. (nocap_arg and label or mw.ustring.gsub(label, "^%l", string.upper)) .. "]]"
end

local function format_prefixed_text(label, text_data, nocap_arg, preposition)
    if not text_data then return "" end
    if type(text_data) == "function" then return text_data(nocap_arg) end
    return "[[Appendix:Glossary#" .. label .. "|" .. (nocap_arg and label or mw.ustring.gsub(label, "^%l", string.upper)) .. "]] " .. (preposition or "of") .. " "
end

local function hydrate_category(category, language_to, language_from, newpos)
    local new_category = language_to.name .. " " .. category
    if language_from then new_category = new_category .. " " .. language_from.name end
    return new_category
end

local function hydrate_categories(categories, language_to, language_from, newpos)
    local new_categories = {}
    for i, category in ipairs(categories) do
        new_categories[i] = language_to.name .. " " .. category .. (language_from and (" " .. language_from.name) or "")
    end
    if newpos then
    	newpos = new_pos_aliases[newpos] or newpos
    	local newpos_data = new_pos_data[newpos]
    	for _, category in ipairs(newpos_data["categories"]) do table.insert(new_categories, language_to.name .. " " .. category) end
    end
    return new_categories
end

local function no_term_etymology(template, frame)
    local data, args = no_term_data[template], m_parameters.process(frame:getParent().args, no_term_params)
    local out, categories = "", {}
    local language = m_languages.get_by_code(args[1])
    if not args["nocat"] then categories = hydrate_categories(data["categories"], language) end
    if not args["notext"] then out = format_solo_text(template, data["text"], args["nocap"]) end
    return format_etymology(out, categories)
end

local function single_term_etymology(template, frame)
    local data, args = single_term_data[template], m_parameters.process(frame:getParent().args, single_term_params)
    local out, categories = "", {}
    local language = m_languages.get_by_code(args[1])
    if not args["notext"] then out = out .. format_prefixed_text(template, data["text"], args["nocap"], data["preposition"]) end
    out = out .. m_links.full_link({
        term = args[2],
        language = language,
        alt = args["alt"],
        gloss = args["t"],
        pos = args["pos"],
        anchor = args["anchor"],
        nobold = true,
    }, "term")
    if not args["nocat"] then
    	categories = hydrate_categories(data["categories"], language, nil, args["newpos"])
    	mw.addWarning(root_reminder(args[1]))
	end
    return format_etymology(out, categories)
end

local function multi_term_etymology(template, frame)
	local data, args = multi_term_data[template], m_parameters.process(frame:getParent().args, multi_term_params)
	local out, categories = "", {}
	local language = m_languages.get_by_code(args[1])
	if not args["notext"] then out = out .. format_prefixed_text(template, data["text"], args["nocap"]) end
	for i, word in ipairs(args[2]) do
		local i_term, i_args = m_inline.parse(word)
		if i > 1 then out = out .. " + " end
		out = out .. m_links.full_link({
			term = i_term,
			language = language,
			alt = args["alt"][i] or i_args["alt"],
			gloss = args["t"][i] or i_args["t"],
			pos = args["pos"][i] or i_args["pos"],
			anchor = args["anchor"][i] or i_args["anchor"] or i_args["a"],
		}, "term")
	end
	if not args["nocat"] then
		categories = hydrate_categories(data["categories"], language, nil, args["preposition"])
		mw.addWarning(root_reminder(args[1]))
	end
	return format_etymology(out, categories)
end

local function derived_term_etymology(template, frame)
    local data, args = derived_term_data[template], m_parameters.process(frame:getParent().args, derived_term_params)
    local out, categories = "", {}
    local language_to = m_languages.get_by_code(args[1])
    local language_from = m_languages.get_by_code(args[2])
    local term = args[3]
    local alt
    if args[2] == "lnk-pro" and plaenk_alt[term] then
    	alt = term
    	term = plaenk_alt[term]
	end
    if (not args["notext"]) and (not data["silent"]) then out = out .. format_prefixed_text(template, data["text"], args["nocap"], data["preposition"]) end
    out = out .. m_links.full_link({
        term = term,
        language = language_from,
        alt = args["alt"] or alt,
        gloss = args["t"],
        pos = args["pos"],
        anchor = args["anchor"],
        showlanguage = true,
        nobold = true,
    }, "term")
    if not args["nocat"] then
    	categories = hydrate_categories(data["categories"], language_to, language_from)
    	mw.addWarning(root_reminder(args[1]))
    end
    return format_etymology(out, categories)
end

local function affix_etymology(template, frame)
    local data, args = affix_data[template], m_parameters.process(frame:getParent().args, affix_params)
    local pre_out, categories = {}, {}
    local language_to = m_languages.get_by_code(args[1])
    local n_parts, n_affixes = 0, 0
    for i, term in ipairs(args[2]) do
        local i_term, i_args = m_inline.parse(term)
        local i_alt
        if args[1] == "lnk-pro" and plaenk_alt[i_term] then
        	i_alt = i_term
        	i_term = plaenk_alt[i_term]
    	end
        n_parts = n_parts + 1
        local language_from = nil
        if args["l"][i] or i_args["l"] then language_from = m_languages.get_by_code(args["l"][i] or i_args["l"]) end
        local cite_term = i_term
        if (language_from and language_from.proto) or ((not language_from) and language_to.proto) then cite_term = "&#42;" .. cite_term end
        if not args["nocat"] then
            if args["noaff"][i] or i_args["noaff"] then
                -- this is a marked non-affix, don't let it be classified as one!
            elseif language_from then
                table.insert(categories, language_to.name .. " terms derived from " .. language_from.name)
                if is_infix(i_term) or is_prefix(i_term) or is_suffix(i_term) or is_circumfix(i_term) then n_affixes = n_affixes + 1 end
            elseif is_infix(i_term) then
                table.insert(categories, language_to.name .. " terms infixed with " .. cite_term)
                n_affixes = n_affixes + 1
            elseif is_prefix(i_term) then
                table.insert(categories, language_to.name .. " terms prefixed with " .. cite_term)
                n_affixes = n_affixes + 1
            elseif is_suffix(i_term) then
                table.insert(categories, language_to.name .. " terms suffixed with " .. cite_term)
                n_affixes = n_affixes + 1
            elseif is_circumfix(i_term) then
            	table.insert(categories, language_to.name .. " terms circumfixed with " .. cite_term)
            	n_affixes = n_affixes + 1
            end
        end
        table.insert(pre_out,  m_links.full_link({
	            term = i_term,
	            language = language_from or language_to,
	            alt = args["alt"][i] or i_args["alt"] or i_alt,
	            gloss = args["t"][i] or i_args["t"],
	            pos = args["pos"][i] or i_args["pos"],
	            anchor = args["anchor"][i] or i_args["anchor"] or i_args["a"],
	            nolink = args["nolink"][i] or i_args["nolink"],
	            hypo = args["hypo"][i] or i_args["hypo"],
	            showlanguage = (language_from and true),
	            nobold = true,
	        }, "term"))
    end
    if (not args["nocat"]) and (n_parts > 1 and n_affixes == 0) then
        table.insert(categories, language_to.name .. " compound terms")
        if args["type"] then
        	local affix_type = affix_compound_data[affix_compound_aliases[args["type"]] or args["type"]]
        	if affix_type then
        		for _, category in ipairs(affix_type["categories"]) do
        			table.insert(categories, language_to.name .. " " .. category)
        		end
        	end
        end
    end
    local out = table.concat(pre_out, " + ")
    if not args["notext"] then
        out = format_prefixed_text(template, data["text"], args["nocap"], data["preposition"]) .. out
    end
    if not args["nocat"] then mw.addWarning(root_reminder(args[1])) end
    return format_etymology(out, categories)
end

function export.show(frame)
    local template = frame.args[1]
    if no_term_data[template] then return no_term_etymology(template, frame) end
    if single_term_data[template] then return single_term_etymology(template, frame) end
    if multi_term_data[template] then return multi_term_etymology(template, frame) end
    if derived_term_data[template] then return derived_term_etymology(template, frame) end
    if affix_data[template] then return affix_etymology(template, frame) end
    error("No such sub-template type is defined!")
end

return export