Modul:etymon
Roman
Dokumentasi untuk modul ini dapat dibuat di Modul:etymon/doc
local export = {}
local function make_glossary_link(term, display_text)
if not term then return display_text end
return "[[Appendix:Glossary#" .. term:gsub(" ", "_") .. "|" .. display_text .. "]]"
end
local function create_keyword_entry(text, phrase, is_group, abbrev, glossary)
local entry = {
phrase = phrase,
is_group = is_group or false,
}
if glossary then
local base_word = text:match("^(.-)%s+of") or text:match("^(.-)%s+from") or text
local rest = text:sub(#base_word + 1)
entry.text = make_glossary_link(glossary, base_word) .. rest
entry.glossary = glossary
else
entry.text = text
end
if abbrev then
entry.abbrev = abbrev
end
return entry
end
local keywords = {
["from"] = create_keyword_entry("From", "from", false),
["uder"] = create_keyword_entry("From", "from", false),
["inh"] = create_keyword_entry("Inherited from", "from", false, nil, "inherited"),
["af"] = create_keyword_entry("From", "from", true),
["blend"] = create_keyword_entry("Blend of", "a blend of", true, "blend", "blend"),
["bor"] = create_keyword_entry("Borrowed from", "borrowed from", false, "bor.", "loanword"),
["lbor"] = create_keyword_entry("Learned borrowing from", "a learned borrowing from", false, "lbor.", "learned borrowing"),
["obor"] = create_keyword_entry("Orthographic borrowing from", "an orthographic borrowing from", false, "obor.", "orthographic borrowing"),
["slbor"] = create_keyword_entry("Semi-learned borrowing from", "a semi-learned borrowing from", false, "slbor.", "semi-learned borrowing"),
["ubor"] = create_keyword_entry("Unadapted borrowing from", "an unadapted borrowing from", false, "ubor.", "unadapted borrowing"),
["der"] = create_keyword_entry("Derived from", "from", false, "der.", "derived terms"),
["calque"] = create_keyword_entry("Calque of", "a calque of", false, "calq.", "calque"),
["sl"] = create_keyword_entry("Semantic loan of", "a semantic loan of", false, "sl.", "semantic loan"),
["bf"] = create_keyword_entry("Back-formation from", "a back-formation from", false, "bf.", "back-formation"),
["translit"] = create_keyword_entry("Transliteration of", "borrowed from", false, "translit.", "transliteration"),
["vrd"] = create_keyword_entry("Vṛddhi derivative of", "a vṛddhi derivative of", false, "vṛd.", "vṛddhi derivative"),
["aphetic"] = create_keyword_entry("Aphetic form of", "an aphetic form of", false, "aph.", "aphesis"),
["influence"] = create_keyword_entry("Influenced by", "influenced by", false, "influ.", "contamination")
}
local config = {
unallowed_langs = {
["zh"] = { ref = "[[Wiktionary:Beer parlour/2025/May#Template:etymon for Chinese]]" }
},
colors = {
GREY = "var(--wikt-palette-grey, #9e9e9e)",
LIGHTGREY = "var(--wikt-palette-lightgrey, #ccc)",
BEIGE = "var(--wikt-palette-beige, #fffbf2)",
CYAN = "var(--wikt-palette-cyan, #eaffff)",
PINK = "var(--wikt-palette-pink, #ffe0f0)",
BLACK = "var(--wikt-palette-black, #202122)"
}
}
local state = {
argsOf = {},
disambiguationCount = {},
singleEtymons = {},
currentPageHasInlineEtymology = false,
currentPageHasRedundantEtymology = false,
usedIdlessEtymon = false,
topLevelHasInlineEtymology = false,
topLevelRedundantEtymology = false,
topLevelIdlessEtymon = false,
maxDepthReached = 0,
totalNodes = 0,
languageCount = {},
}
local Loader = {}
Loader.modules = {
anchors = "Module:anchors",
etydate = "Module:etydate",
etymology = "Module:etymology",
headword_data = "Module:headword/data",
languages = "Module:languages",
languages_errorgetby = "Module:languages/errorGetBy",
links = "Module:links",
pages = "Module:pages",
parameters = "Module:parameters",
parameters_data = "Module:parameters/data",
string_utilities = "Module:string utilities",
template_parser = "Module:template parser",
utilities = "Module:utilities",
debug = "Module:debug",
parse_utilities = "Module:parse utilities",
references = "Module:references",
}
local templatestyles_module_name = "Module:TemplateStyles"
local function templatestyles(...)
templatestyles = require(templatestyles_module_name)
return templatestyles(...)
end
function Loader.init(self)
local loaded = {}
local function get(module_name)
if not loaded[module_name] then
loaded[module_name] = require(self.modules[module_name])
end
return loaded[module_name]
end
local mt = {}
function mt.__index(t, k)
if self.modules[k] then
local func_loader = {}
local func_mt = {}
function func_mt.__index(t2, k2)
local func = get(k)[k2]
rawset(t2, k2, func)
return func
end
setmetatable(func_loader, func_mt)
rawset(t, k, func_loader)
return func_loader
end
end
return setmetatable({}, mt)
end
local M = Loader:init()
local Util = {}
function Util.get_lang(code, no_error)
if no_error then
return M.languages.getByCode(code, nil, true)
end
return M.languages.getByCode(code, nil, true) or M.languages_errorgetby.code(code, true, true)
end
function Util.get_norm_lang(lang)
if lang:inFamily("zhx") and not lang:inFamily("qfa-cnt") then
return M.languages.getByCode("zh")
end
return lang
end
function Util.make_link(params)
if params.lang:getCode() == "zh" then
params.tr = ""
end
return M.links.full_link(params, "term")
end
local is_content_page_cached
function Util.is_content_page()
if is_content_page_cached == nil then
is_content_page_cached = M.pages.is_content_page(mw.title.getCurrentTitle())
end
return is_content_page_cached
end
local page_data_cached
function Util.get_page_data()
if not page_data_cached then
page_data_cached = mw.loadData(Loader.modules.headword_data).page
end
return page_data_cached
end
local function parse_etymon_references(refs_text)
if not refs_text or refs_text == "" then
return ""
end
return M.references.parse_references(refs_text)
end
local function parse_tree_references(node)
if node.ref then
node.parsed_ref = parse_etymon_references(node.ref)
end
if node.children then
for _, child in ipairs(node.children) do
parse_tree_references(child)
end
end
end
local function is_keyword(param, allow_colon_less)
if param:sub(1, 1) == ":" then
local keyword = param:sub(2)
return keywords[keyword] or keyword == "afeq" or keyword == "root" or keyword == "conf"
end
if allow_colon_less then
return keywords[param] or param == "afeq" or param == "root" or param == "conf"
end
return false
end
local function get_keyword(param, allow_colon_less)
if param:sub(1, 1) == ":" then
return param:sub(2)
end
if allow_colon_less and (keywords[param] or param == "afeq" or param == "root" or param == "conf") then
return param
end
return nil
end
local DataRetriever = {}
local etymon_param_mods = {
id = {},
t = {},
tr = {},
ts = {},
pos = {},
alt = {},
ety = {},
unc = { type = "boolean" },
ref = {},
}
function DataRetriever.try_parse_etymon(param, contextLang)
if is_keyword(param) then
return nil
end
local lang, rest
local lang_code, rest_match = param:match("^([a-zA-Z.%-]+):(.*)$")
if lang_code then
lang = Util.get_lang(lang_code, false)
if not lang then return nil end
rest = rest_match
else
lang = contextLang
rest = param
end
if not rest:find("<", 1, true) then
return {
lang = lang,
term = M.string_utilities.trim(rest)
}
end
local term_text = rest:match("^(.-)<") or rest
local function generate_obj(ignored_term)
return { term = M.string_utilities.trim(term_text) }
end
local parsed_obj = M.parse_utilities.parse_inline_modifiers(rest,
{ param_mods = etymon_param_mods, generate_obj = generate_obj })
if parsed_obj.id and parsed_obj.id:match("^!") then
parsed_obj.id = parsed_obj.id:sub(2)
parsed_obj.override = true
end
parsed_obj.lang = lang
return parsed_obj
end
function DataRetriever.scrape_page(etymonPage, etymonTitle, key, etymonLang, etymonId, redirectedFrom)
local content = etymonTitle:getContent()
if not content then
state.argsOf[key] = "redlink"
return
end
local redirectTarget = etymonTitle.redirectTarget
if not redirectTarget then
content = M.pages.get_section(content, etymonLang:getFullName(), 2)
if not content then
state.argsOf[key] = "missing"
return
end
end
local etymonLangcode = etymonLang:getFullCode()
local L2_key = etymonLangcode .. ":" .. etymonPage
local found_templates_for_lang = {}
local found_ids = {}
for template in M.template_parser.find_templates(content) do
if template:get_name() == "etymon" then
local templateArgs = template:get_arguments()
if templateArgs[1] == etymonLangcode then
if templateArgs.id then
state.argsOf[L2_key .. ":" .. templateArgs.id] = templateArgs
table.insert(found_ids, templateArgs.id)
end
table.insert(found_templates_for_lang, templateArgs)
end
end
end
state.disambiguationCount[L2_key] = found_ids
if #found_templates_for_lang == 1 then
state.singleEtymons[L2_key] = found_templates_for_lang[1]
end
if redirectedFrom and state.disambiguationCount[L2_key] then
state.disambiguationCount[redirectedFrom] = state.disambiguationCount[redirectedFrom] or {}
for _, id in ipairs(state.disambiguationCount[L2_key]) do
table.insert(state.disambiguationCount[redirectedFrom], id)
end
end
if state.argsOf[key] then
return
elseif redirectedFrom or not redirectTarget then
state.argsOf[key] = "missing"
return
end
etymonPage = redirectTarget.prefixedText
DataRetriever.scrape_page(etymonPage, redirectTarget, L2_key .. ":" .. etymonId, etymonLang, etymonId, L2_key)
state.argsOf[key] = state.argsOf[etymonLangcode .. ":" .. etymonPage .. ":" .. etymonId]
end
function DataRetriever.get_args(etymon_data, isTopLevel)
if etymon_data.id == "?" then return "nolink" end
local page = M.links.get_link_page(etymon_data.term, etymon_data.lang)
local normLang = Util.get_norm_lang(etymon_data.lang)
if etymon_data.id then
local key = normLang:getFullCode() .. ":" .. page .. ":" .. etymon_data.id
if state.argsOf[key] == nil then
local title = mw.title.new(page)
if not title then error('Invalid page title "' .. page .. '" encountered.') end
DataRetriever.scrape_page(page, title, key, normLang, etymon_data.id)
end
return state.argsOf[key]
else
state.usedIdlessEtymon = true
if isTopLevel then
state.topLevelIdlessEtymon = true
end
local base_key = normLang:getFullCode() .. ":" .. page
if state.disambiguationCount[base_key] == nil then
local title = mw.title.new(page)
if not title then error('Invalid page title "' .. page .. '" encountered.') end
DataRetriever.scrape_page(page, title, base_key .. ":", normLang, nil)
end
local ids = state.disambiguationCount[base_key] or {}
local count = #ids
if count == 1 then
return state.singleEtymons[base_key]
elseif count > 1 then
local id_list = {}
for _, id in ipairs(ids) do
if id and id ~= "" then
table.insert(id_list, "\"" .. id .. "\"")
end
end
local suggestion_text = ""
if #id_list > 0 then
suggestion_text = " Available IDs: " .. table.concat(id_list, ", ") .. "."
end
error("Etymology link to '[[" .. page .. "]]' is ambiguous. The page has " ..
count .. " etymon templates for " .. normLang:getCanonicalName() ..
". Please specify an ID." .. suggestion_text)
else
return "missing"
end
end
end
function DataRetriever.build_data_tree(lang, title, args, seen, depth)
seen = seen or {}
depth = depth or 0
local isTopLevel = (depth == 0)
if depth > state.maxDepthReached then
state.maxDepthReached = depth
end
state.totalNodes = state.totalNodes + 1
local langCode = lang:getCode()
state.languageCount[langCode] = (state.languageCount[langCode] or 0) + 1
local currId = (type(args) == "table" and args.id) or ""
local key = Util.get_norm_lang(lang):getFullCode() .. ":" .. M.links.get_link_page(title, lang) .. ":" .. currId
local node = { lang = lang, title = title, id = currId, args = args, children = {}, status = "ok" }
if type(args) ~= "table" or seen[key] then
node.status = args or "missing"
return node
end
seen[key] = true
local derType, confidence, ignoreEtymons = "from", "conf", false
for i = 2, #args do
local param = args[i]
if is_keyword(param) then
local keyword = get_keyword(param)
if keyword == "conf" then
confidence = keyword
else
derType = keyword
confidence = "conf"
ignoreEtymons = (keyword == "afeq")
end
elseif param:sub(1, 1) == ":" then
error("Invalid keyword '" .. param .. "'. Did you mean a valid keyword like ':bor', ':inh', etc.?")
else
local etymon_data = DataRetriever.try_parse_etymon(param, lang)
if etymon_data and not ignoreEtymons then
local etymonArgs = DataRetriever.get_args(etymon_data, isTopLevel)
if etymon_data.ety then
if etymonArgs == "redlink" or etymonArgs == "missing" then
state.currentPageHasInlineEtymology = true
if isTopLevel then
state.topLevelHasInlineEtymology = true
end
local ety_string = etymon_data.ety
local segments = M.parse_utilities.parse_balanced_segment_run(ety_string, "<", ">")
local keyword = M.string_utilities.trim(segments[1])
if not is_keyword(keyword, true) then
error("Invalid keyword '" .. keyword .. "' in inline etymology <ety:" .. keyword .. ">")
end
local inline_params = {}
for j = 2, #segments, 2 do
local parent_wrapper = segments[j]
if parent_wrapper and parent_wrapper ~= "" then
table.insert(inline_params, parent_wrapper:sub(2, -2))
end
end
etymonArgs = { etymon_data.lang:getCode() }
table.insert(etymonArgs, ":" .. keyword)
for _, p in ipairs(inline_params) do
table.insert(etymonArgs, p)
end
etymonArgs.id = etymon_data.id
else
state.currentPageHasRedundantEtymology = true
if isTopLevel then
state.topLevelRedundantEtymology = true
end
end
end
local childNode = DataRetriever.build_data_tree(etymon_data.lang, etymon_data.term, etymonArgs, seen,
depth + 1)
childNode.target_key = Util.get_norm_lang(etymon_data.lang):getFullCode() ..
":" .. M.links.get_link_page(etymon_data.term, etymon_data.lang)
childNode.id = etymon_data.id
childNode.t = etymon_data.t
childNode.tr = etymon_data.tr
childNode.ts = etymon_data.ts
childNode.pos = etymon_data.pos
childNode.alt = etymon_data.alt
childNode.ref = etymon_data.ref
childNode.derType = derType
childNode.is_uncertain = etymon_data.unc
childNode.override = etymon_data.override
table.insert(node.children, childNode)
end
end
end
return node
end
local Validator = {}
function Validator.run(lang, params, id, title, pos)
if id then
if mw.ustring.len(id) < 2 then error("The `id` parameter must have at least two characters.") end
if id == title or id == Util.get_page_data().pagename then
error(
"The `id` parameter must not be the same as the page title.")
end
end
if pos and not (" prefix suffix interfix infix root "):find(" " .. pos .. " ") then
error(
"Unknown value provided for `pos`.")
end
local currentKeyword = "from"
local etymonsInGroup = {}
local function checkGroup()
if keywords[currentKeyword] and keywords[currentKeyword].is_group and currentKeyword ~= 'af' and #etymonsInGroup <= 1 then
error("Detected `:" .. currentKeyword .. "` group with fewer than two etymons.")
end
etymonsInGroup = {}
end
for _, param in ipairs(params) do
if param:sub(1, 1) == ":" and not is_keyword(param) then
error("Invalid keyword '" .. param .. "'. Did you mean a valid keyword like ':bor', ':inh', etc.?")
end
if is_keyword(param) then
if get_keyword(param) ~= "conf" then
checkGroup()
currentKeyword = get_keyword(param)
end
else
local etymon_data = DataRetriever.try_parse_etymon(param, lang)
if etymon_data then
table.insert(etymonsInGroup, param)
local paramLang = etymon_data.lang
if currentKeyword == "from" and paramLang:getFullCode() ~= lang:getFullCode() then
error("`:from` is for same-language derivation, but language does not match.")
elseif currentKeyword == "inh" then
M.etymology.check_ancestor(lang, paramLang)
end
else
table.insert(etymonsInGroup, param)
end
end
end
checkGroup()
end
local function track_ranges(track_func, base_key, value, ranges, langCode)
track_func("etymon/" .. base_key .. "/" .. value)
if langCode then
track_func("etymon/lang/" .. langCode .. "/" .. base_key .. "/" .. value)
end
for _, range in ipairs(ranges) do
local matches = false
if range.min and range.max then
matches = value >= range.min and value <= range.max
elseif range.min then
matches = value >= range.min
elseif range.max then
matches = value <= range.max
elseif range.exact then
matches = value == range.exact
end
if matches then
track_func("etymon/" .. base_key .. "/" .. range.label)
if langCode then
track_func("etymon/lang/" .. langCode .. "/" .. base_key .. "/" .. range.label)
end
break
end
end
end
function export.main(frame)
local parentArgs = frame:getParent().args
local allArgs = M.parameters.process(parentArgs, mw.loadData(Loader.modules.parameters_data).etymon)
local lang, id, title, text, tree, exnihilo, etydate, pos, args =
allArgs[1], allArgs.id, allArgs.title, allArgs.text, allArgs.tree,
allArgs.exnihilo, allArgs.etydate, allArgs.pos, allArgs[2]
if not title then
local pdata = Util.get_page_data()
title = pdata.pagename
if pdata.namespace == "Reconstruction" then title = "*" .. title end
end
Validator.run(lang, args, id, title, pos)
table.insert(args, 1, lang:getCode()); args.id = id
state.argsOf[lang:getCode() .. ":" .. title .. ":" .. (id or "")] = args
local etyDataTree = DataRetriever.build_data_tree(lang, title, args)
parse_tree_references(etyDataTree)
local output = {}
table.insert(output, tostring(mw.html.create("li")
:attr("id", M.anchors.language_anchor(lang, id))
:attr("class", "etymonid")
:attr("data-lang", lang:getCode())
:attr("data-id", id or "")
:css("list-style", "none")
:allDone()))
local disallowed = config.unallowed_langs[lang:getCode()]
if disallowed then
local error_text = " for " .. lang:getFullName()
if disallowed["ref"] then
error_text = error_text .. "; see " .. disallowed["ref"]
else
error_text = error_text .. "."
end
if tree then error("Etymology trees are not allowed" .. error_text) end
if text then error("Etymology texts are not allowed" .. error_text) end
end
if etydate then
local etydate_param_mods = {
ref = { list = true, type = "references", allow_holes = true },
refn = { list = true, allow_holes = true },
nocap = { type = "boolean" },
}
local function generate_etydate_obj(etydate_text)
local etydate_specs = {}
for spec in etydate_text:gmatch("[^,]+") do
table.insert(etydate_specs, mw.text.trim(spec))
end
return { [1] = etydate_specs }
end
local parsed_etydate = M.parse_utilities.parse_inline_modifiers(etydate,
{ param_mods = etydate_param_mods, generate_obj = generate_etydate_obj })
local etydate_args = {
[1] = parsed_etydate[1],
nocap = parsed_etydate.nocap or false,
ref = parsed_etydate.ref or {},
refn = parsed_etydate.refn or { maxindex = 0 }
}
if etydate_args.refn then
local max = 0
for k, v in pairs(etydate_args.refn) do
if type(k) == "number" and k > max then
max = k
end
end
etydate_args.refn.maxindex = max
end
etyDataTree.etydate = M.etydate.format_etydate(etydate_args)
end
if tree then
local tree_renderer = require('Module:etymon/tree')
table.insert(output, templatestyles("Module:etymon/styles.css"))
table.insert(output, tree_renderer.render(etyDataTree, config, keywords, Util.make_link))
end
if text then
local text_renderer = require('Module:etymon/text')
local usePlus, maxDepth
if text == "++" then
usePlus, maxDepth = true, false
elseif text == "+" then
usePlus, maxDepth = true, 1
elseif text == "-" then
usePlus, maxDepth = false, 1
else
usePlus, maxDepth = false, false
end
table.insert(output, text_renderer.render(etyDataTree, keywords, Util.make_link, usePlus, maxDepth))
end
if Util.is_content_page() and state.maxDepthReached > 0 then
local track = require("Module:debug/track")
local langCode = lang:getCode()
local depth_ranges = {
{ min = 50, label = "extremely-deep" },
{ min = 20, label = "20+" },
{ min = 10, max = 19, label = "10-19" },
{ min = 5, max = 9, label = "5-9" },
{ min = 3, max = 4, label = "3-4" },
{ max = 2, label = "1-2" }
}
local node_ranges = {
{ min = 100, label = "extremely-large" },
{ min = 50, label = "50+" },
{ min = 20, max = 49, label = "20-49" },
{ min = 10, max = 19, label = "10-19" },
{ min = 5, max = 9, label = "5-9" },
{ max = 4, label = "1-4" }
}
local language_ranges = {
{ min = 10, label = "10+" },
{ min = 5, max = 9, label = "5-9" },
{ min = 3, max = 4, label = "3-4" },
{ exact = 2, label = "2" },
{ exact = 1, label = "1" }
}
track_ranges(track, "depth", state.maxDepthReached, depth_ranges, langCode)
track_ranges(track, "nodes", state.totalNodes, node_ranges, langCode)
local uniqueLanguages = 0
for _ in pairs(state.languageCount) do
uniqueLanguages = uniqueLanguages + 1
end
track_ranges(track, "unique-languages", uniqueLanguages, language_ranges, langCode)
end
local categories = {}
if Util.is_content_page() then
local category_renderer = require('Module:etymon/categories')
categories = category_renderer.render(lang, etyDataTree, state.disambiguationCount, Util.get_norm_lang)
if tree then table.insert(categories, lang:getCanonicalName() .. " entries with etymology trees") end
if text then table.insert(categories, lang:getCanonicalName() .. " entries with etymology texts") end
if exnihilo then table.insert(categories, lang:getCanonicalName() .. " terms coined ex nihilo") end
if state.topLevelHasInlineEtymology then
table.insert(categories, "Pages with inline etymon for redlinks")
end
if state.topLevelRedundantEtymology then
table.insert(categories, "Pages with redundant inline etymon")
end
if state.topLevelIdlessEtymon then
table.insert(categories, "Pages using etymon with no ID")
end
end
if #categories > 0 and lang:getCode() ~= "zh" then
table.insert(output, M.utilities.format_categories(categories, lang))
end
return table.concat(output)
end
return export