Loncat ke isi

Modul:etymon

Deri Wikikamus

Dokumentasi untuk modul ini dapat dibuat di Modul:etymon/doc

local export = {}

local function make_glossary_link(term, display_text)
	if not term then return display_text end
	return "[[Appendix:Glossary#" .. term:gsub(" ", "_") .. "|" .. display_text .. "]]"
end

local function create_keyword_entry(text, phrase, is_group, abbrev, glossary)
	local entry = {
		phrase = phrase,
		is_group = is_group or false,
	}

	if glossary then
		local base_word = text:match("^(.-)%s+of") or text:match("^(.-)%s+from") or text
		local rest = text:sub(#base_word + 1)
		entry.text = make_glossary_link(glossary, base_word) .. rest
		entry.glossary = glossary
	else
		entry.text = text
	end

	if abbrev then
		entry.abbrev = abbrev
	end

	return entry
end

local keywords = {
	["from"] = create_keyword_entry("From", "from", false),
	["uder"] = create_keyword_entry("From", "from", false),
	["inh"] = create_keyword_entry("Inherited from", "from", false, nil, "inherited"),
	["af"] = create_keyword_entry("From", "from", true),
	["blend"] = create_keyword_entry("Blend of", "a blend of", true, "blend", "blend"),
	["bor"] = create_keyword_entry("Borrowed from", "borrowed from", false, "bor.", "loanword"),
	["lbor"] = create_keyword_entry("Learned borrowing from", "a learned borrowing from", false, "lbor.", "learned borrowing"),
	["obor"] = create_keyword_entry("Orthographic borrowing from", "an orthographic borrowing from", false, "obor.", "orthographic borrowing"),
	["slbor"] = create_keyword_entry("Semi-learned borrowing from", "a semi-learned borrowing from", false, "slbor.", "semi-learned borrowing"),
	["ubor"] = create_keyword_entry("Unadapted borrowing from", "an unadapted borrowing from", false, "ubor.", "unadapted borrowing"),
	["der"] = create_keyword_entry("Derived from", "from", false, "der.", "derived terms"),
	["calque"] = create_keyword_entry("Calque of", "a calque of", false, "calq.", "calque"),
	["sl"] = create_keyword_entry("Semantic loan of", "a semantic loan of", false, "sl.", "semantic loan"),
	["bf"] = create_keyword_entry("Back-formation from", "a back-formation from", false, "bf.", "back-formation"),
	["translit"] = create_keyword_entry("Transliteration of", "borrowed from", false, "translit.", "transliteration"),
	["vrd"] = create_keyword_entry("Vṛddhi derivative of", "a vṛddhi derivative of", false, "vṛd.", "vṛddhi derivative"),
	["aphetic"] = create_keyword_entry("Aphetic form of", "an aphetic form of", false, "aph.", "aphesis"),
	["influence"] = create_keyword_entry("Influenced by", "influenced by", false, "influ.", "contamination")
}

local config = {
	unallowed_langs = {
		["zh"] = { ref = "[[Wiktionary:Beer parlour/2025/May#Template:etymon for Chinese]]" }
	},
	colors = {
		GREY = "var(--wikt-palette-grey, #9e9e9e)",
		LIGHTGREY = "var(--wikt-palette-lightgrey, #ccc)",
		BEIGE = "var(--wikt-palette-beige, #fffbf2)",
		CYAN = "var(--wikt-palette-cyan, #eaffff)",
		PINK = "var(--wikt-palette-pink, #ffe0f0)",
		BLACK = "var(--wikt-palette-black, #202122)"
	}
}

local state = {
	argsOf = {},
	disambiguationCount = {},
	singleEtymons = {},
	currentPageHasInlineEtymology = false,
	currentPageHasRedundantEtymology = false,
	usedIdlessEtymon = false,
	topLevelHasInlineEtymology = false,
	topLevelRedundantEtymology = false,
	topLevelIdlessEtymon = false,
	maxDepthReached = 0,
	totalNodes = 0,
	languageCount = {},
}

local Loader = {}
Loader.modules = {
	anchors = "Module:anchors",
	etydate = "Module:etydate",
	etymology = "Module:etymology",
	headword_data = "Module:headword/data",
	languages = "Module:languages",
	languages_errorgetby = "Module:languages/errorGetBy",
	links = "Module:links",
	pages = "Module:pages",
	parameters = "Module:parameters",
	parameters_data = "Module:parameters/data",
	string_utilities = "Module:string utilities",
	template_parser = "Module:template parser",
	utilities = "Module:utilities",
	debug = "Module:debug",
	parse_utilities = "Module:parse utilities",
	references = "Module:references",
}

local templatestyles_module_name = "Module:TemplateStyles"
local function templatestyles(...)
	templatestyles = require(templatestyles_module_name)
	return templatestyles(...)
end

function Loader.init(self)
	local loaded = {}
	local function get(module_name)
		if not loaded[module_name] then
			loaded[module_name] = require(self.modules[module_name])
		end
		return loaded[module_name]
	end

	local mt = {}
	function mt.__index(t, k)
		if self.modules[k] then
			local func_loader = {}
			local func_mt = {}
			function func_mt.__index(t2, k2)
				local func = get(k)[k2]
				rawset(t2, k2, func)
				return func
			end

			setmetatable(func_loader, func_mt)
			rawset(t, k, func_loader)
			return func_loader
		end
	end

	return setmetatable({}, mt)
end

local M = Loader:init()

local Util = {}

function Util.get_lang(code, no_error)
	if no_error then
		return M.languages.getByCode(code, nil, true)
	end
	return M.languages.getByCode(code, nil, true) or M.languages_errorgetby.code(code, true, true)
end

function Util.get_norm_lang(lang)
	if lang:inFamily("zhx") and not lang:inFamily("qfa-cnt") then
		return M.languages.getByCode("zh")
	end
	return lang
end

function Util.make_link(params)
	if params.lang:getCode() == "zh" then
		params.tr = ""
	end
	return M.links.full_link(params, "term")
end

local is_content_page_cached
function Util.is_content_page()
	if is_content_page_cached == nil then
		is_content_page_cached = M.pages.is_content_page(mw.title.getCurrentTitle())
	end
	return is_content_page_cached
end

local page_data_cached
function Util.get_page_data()
	if not page_data_cached then
		page_data_cached = mw.loadData(Loader.modules.headword_data).page
	end
	return page_data_cached
end

local function parse_etymon_references(refs_text)
	if not refs_text or refs_text == "" then
		return ""
	end

	return M.references.parse_references(refs_text)
end

local function parse_tree_references(node)
	if node.ref then
		node.parsed_ref = parse_etymon_references(node.ref)
	end

	if node.children then
		for _, child in ipairs(node.children) do
			parse_tree_references(child)
		end
	end
end

local function is_keyword(param, allow_colon_less)
	if param:sub(1, 1) == ":" then
		local keyword = param:sub(2)
		return keywords[keyword] or keyword == "afeq" or keyword == "root" or keyword == "conf"
	end
	if allow_colon_less then
		return keywords[param] or param == "afeq" or param == "root" or param == "conf"
	end
	return false
end

local function get_keyword(param, allow_colon_less)
	if param:sub(1, 1) == ":" then
		return param:sub(2)
	end
	if allow_colon_less and (keywords[param] or param == "afeq" or param == "root" or param == "conf") then
		return param
	end
	return nil
end

local DataRetriever = {}

local etymon_param_mods = {
	id = {},
	t = {},
	tr = {},
	ts = {},
	pos = {},
	alt = {},
	ety = {},
	unc = { type = "boolean" },
	ref = {},
}

function DataRetriever.try_parse_etymon(param, contextLang)
	if is_keyword(param) then
		return nil
	end

	local lang, rest
	local lang_code, rest_match = param:match("^([a-zA-Z.%-]+):(.*)$")
	if lang_code then
		lang = Util.get_lang(lang_code, false)
		if not lang then return nil end
		rest = rest_match
	else
		lang = contextLang
		rest = param
	end

	if not rest:find("<", 1, true) then
		return {
			lang = lang,
			term = M.string_utilities.trim(rest)
		}
	end

	local term_text = rest:match("^(.-)<") or rest

	local function generate_obj(ignored_term)
		return { term = M.string_utilities.trim(term_text) }
	end

	local parsed_obj = M.parse_utilities.parse_inline_modifiers(rest,
		{ param_mods = etymon_param_mods, generate_obj = generate_obj })

	if parsed_obj.id and parsed_obj.id:match("^!") then
		parsed_obj.id = parsed_obj.id:sub(2)
		parsed_obj.override = true
	end

	parsed_obj.lang = lang
	return parsed_obj
end

function DataRetriever.scrape_page(etymonPage, etymonTitle, key, etymonLang, etymonId, redirectedFrom)
	local content = etymonTitle:getContent()
	if not content then
		state.argsOf[key] = "redlink"
		return
	end

	local redirectTarget = etymonTitle.redirectTarget
	if not redirectTarget then
		content = M.pages.get_section(content, etymonLang:getFullName(), 2)
		if not content then
			state.argsOf[key] = "missing"
			return
		end
	end

	local etymonLangcode = etymonLang:getFullCode()
	local L2_key = etymonLangcode .. ":" .. etymonPage
	local found_templates_for_lang = {}
	local found_ids = {}

	for template in M.template_parser.find_templates(content) do
		if template:get_name() == "etymon" then
			local templateArgs = template:get_arguments()
			if templateArgs[1] == etymonLangcode then
				if templateArgs.id then
					state.argsOf[L2_key .. ":" .. templateArgs.id] = templateArgs
					table.insert(found_ids, templateArgs.id)
				end
				table.insert(found_templates_for_lang, templateArgs)
			end
		end
	end

	state.disambiguationCount[L2_key] = found_ids

	if #found_templates_for_lang == 1 then
		state.singleEtymons[L2_key] = found_templates_for_lang[1]
	end

	if redirectedFrom and state.disambiguationCount[L2_key] then
		state.disambiguationCount[redirectedFrom] = state.disambiguationCount[redirectedFrom] or {}
		for _, id in ipairs(state.disambiguationCount[L2_key]) do
			table.insert(state.disambiguationCount[redirectedFrom], id)
		end
	end

	if state.argsOf[key] then
		return
	elseif redirectedFrom or not redirectTarget then
		state.argsOf[key] = "missing"
		return
	end

	etymonPage = redirectTarget.prefixedText
	DataRetriever.scrape_page(etymonPage, redirectTarget, L2_key .. ":" .. etymonId, etymonLang, etymonId, L2_key)
	state.argsOf[key] = state.argsOf[etymonLangcode .. ":" .. etymonPage .. ":" .. etymonId]
end

function DataRetriever.get_args(etymon_data, isTopLevel)
	if etymon_data.id == "?" then return "nolink" end

	local page = M.links.get_link_page(etymon_data.term, etymon_data.lang)
	local normLang = Util.get_norm_lang(etymon_data.lang)

	if etymon_data.id then
		local key = normLang:getFullCode() .. ":" .. page .. ":" .. etymon_data.id
		if state.argsOf[key] == nil then
			local title = mw.title.new(page)
			if not title then error('Invalid page title "' .. page .. '" encountered.') end
			DataRetriever.scrape_page(page, title, key, normLang, etymon_data.id)
		end
		return state.argsOf[key]
	else
		state.usedIdlessEtymon = true
		if isTopLevel then
			state.topLevelIdlessEtymon = true
		end

		local base_key = normLang:getFullCode() .. ":" .. page
		if state.disambiguationCount[base_key] == nil then
			local title = mw.title.new(page)
			if not title then error('Invalid page title "' .. page .. '" encountered.') end
			DataRetriever.scrape_page(page, title, base_key .. ":", normLang, nil)
		end

		local ids = state.disambiguationCount[base_key] or {}
		local count = #ids

		if count == 1 then
			return state.singleEtymons[base_key]
		elseif count > 1 then
			local id_list = {}
			for _, id in ipairs(ids) do
				if id and id ~= "" then
					table.insert(id_list, "\"" .. id .. "\"")
				end
			end

			local suggestion_text = ""
			if #id_list > 0 then
				suggestion_text = " Available IDs: " .. table.concat(id_list, ", ") .. "."
			end

			error("Etymology link to '[[" .. page .. "]]' is ambiguous. The page has " ..
				count .. " etymon templates for " .. normLang:getCanonicalName() ..
				". Please specify an ID." .. suggestion_text)
		else
			return "missing"
		end
	end
end

function DataRetriever.build_data_tree(lang, title, args, seen, depth)
	seen = seen or {}
	depth = depth or 0
	local isTopLevel = (depth == 0)

	if depth > state.maxDepthReached then
		state.maxDepthReached = depth
	end

	state.totalNodes = state.totalNodes + 1

	local langCode = lang:getCode()
	state.languageCount[langCode] = (state.languageCount[langCode] or 0) + 1

	local currId = (type(args) == "table" and args.id) or ""
	local key = Util.get_norm_lang(lang):getFullCode() .. ":" .. M.links.get_link_page(title, lang) .. ":" .. currId
	local node = { lang = lang, title = title, id = currId, args = args, children = {}, status = "ok" }

	if type(args) ~= "table" or seen[key] then
		node.status = args or "missing"
		return node
	end

	seen[key] = true
	local derType, confidence, ignoreEtymons = "from", "conf", false

	for i = 2, #args do
		local param = args[i]

		if is_keyword(param) then
			local keyword = get_keyword(param)
			if keyword == "conf" then
				confidence = keyword
			else
				derType = keyword
				confidence = "conf"
				ignoreEtymons = (keyword == "afeq")
			end
		elseif param:sub(1, 1) == ":" then
			error("Invalid keyword '" .. param .. "'. Did you mean a valid keyword like ':bor', ':inh', etc.?")
		else
			local etymon_data = DataRetriever.try_parse_etymon(param, lang)
			if etymon_data and not ignoreEtymons then
				local etymonArgs = DataRetriever.get_args(etymon_data, isTopLevel)
				if etymon_data.ety then
					if etymonArgs == "redlink" or etymonArgs == "missing" then
						state.currentPageHasInlineEtymology = true
						if isTopLevel then
							state.topLevelHasInlineEtymology = true
						end
						local ety_string = etymon_data.ety
						local segments = M.parse_utilities.parse_balanced_segment_run(ety_string, "<", ">")
						local keyword = M.string_utilities.trim(segments[1])

						if not is_keyword(keyword, true) then
							error("Invalid keyword '" .. keyword .. "' in inline etymology <ety:" .. keyword .. ">")
						end

						local inline_params = {}
						for j = 2, #segments, 2 do
							local parent_wrapper = segments[j]
							if parent_wrapper and parent_wrapper ~= "" then
								table.insert(inline_params, parent_wrapper:sub(2, -2))
							end
						end
						etymonArgs = { etymon_data.lang:getCode() }
						table.insert(etymonArgs, ":" .. keyword)
						for _, p in ipairs(inline_params) do
							table.insert(etymonArgs, p)
						end
						etymonArgs.id = etymon_data.id
					else
						state.currentPageHasRedundantEtymology = true
						if isTopLevel then
							state.topLevelRedundantEtymology = true
						end
					end
				end

				local childNode = DataRetriever.build_data_tree(etymon_data.lang, etymon_data.term, etymonArgs, seen,
					depth + 1)
				childNode.target_key = Util.get_norm_lang(etymon_data.lang):getFullCode() ..
					":" .. M.links.get_link_page(etymon_data.term, etymon_data.lang)
				childNode.id = etymon_data.id
				childNode.t = etymon_data.t
				childNode.tr = etymon_data.tr
				childNode.ts = etymon_data.ts
				childNode.pos = etymon_data.pos
				childNode.alt = etymon_data.alt
				childNode.ref = etymon_data.ref
				childNode.derType = derType
				childNode.is_uncertain = etymon_data.unc
				childNode.override = etymon_data.override
				table.insert(node.children, childNode)
			end
		end
	end

	return node
end

local Validator = {}

function Validator.run(lang, params, id, title, pos)
	if id then
		if mw.ustring.len(id) < 2 then error("The `id` parameter must have at least two characters.") end
		if id == title or id == Util.get_page_data().pagename then
			error(
				"The `id` parameter must not be the same as the page title.")
		end
	end
	if pos and not (" prefix suffix interfix infix root "):find(" " .. pos .. " ") then
		error(
			"Unknown value provided for `pos`.")
	end

	local currentKeyword = "from"
	local etymonsInGroup = {}

	local function checkGroup()
		if keywords[currentKeyword] and keywords[currentKeyword].is_group and currentKeyword ~= 'af' and #etymonsInGroup <= 1 then
			error("Detected `:" .. currentKeyword .. "` group with fewer than two etymons.")
		end
		etymonsInGroup = {}
	end

	for _, param in ipairs(params) do
		if param:sub(1, 1) == ":" and not is_keyword(param) then
			error("Invalid keyword '" .. param .. "'. Did you mean a valid keyword like ':bor', ':inh', etc.?")
		end

		if is_keyword(param) then
			if get_keyword(param) ~= "conf" then
				checkGroup()
				currentKeyword = get_keyword(param)
			end
		else
			local etymon_data = DataRetriever.try_parse_etymon(param, lang)
			if etymon_data then
				table.insert(etymonsInGroup, param)
				local paramLang = etymon_data.lang
				if currentKeyword == "from" and paramLang:getFullCode() ~= lang:getFullCode() then
					error("`:from` is for same-language derivation, but language does not match.")
				elseif currentKeyword == "inh" then
					M.etymology.check_ancestor(lang, paramLang)
				end
			else
				table.insert(etymonsInGroup, param)
			end
		end
	end
	checkGroup()
end

local function track_ranges(track_func, base_key, value, ranges, langCode)
	track_func("etymon/" .. base_key .. "/" .. value)
	if langCode then
		track_func("etymon/lang/" .. langCode .. "/" .. base_key .. "/" .. value)
	end
	
	for _, range in ipairs(ranges) do
		local matches = false
		if range.min and range.max then
			matches = value >= range.min and value <= range.max
		elseif range.min then
			matches = value >= range.min
		elseif range.max then
			matches = value <= range.max
		elseif range.exact then
			matches = value == range.exact
		end
		
		if matches then
			track_func("etymon/" .. base_key .. "/" .. range.label)
			if langCode then
				track_func("etymon/lang/" .. langCode .. "/" .. base_key .. "/" .. range.label)
			end
			break
		end
	end
end

function export.main(frame)
	local parentArgs = frame:getParent().args
	local allArgs = M.parameters.process(parentArgs, mw.loadData(Loader.modules.parameters_data).etymon)
	local lang, id, title, text, tree, exnihilo, etydate, pos, args =
		allArgs[1], allArgs.id, allArgs.title, allArgs.text, allArgs.tree,
		allArgs.exnihilo, allArgs.etydate, allArgs.pos, allArgs[2]

	if not title then
		local pdata = Util.get_page_data()
		title = pdata.pagename
		if pdata.namespace == "Reconstruction" then title = "*" .. title end
	end

	Validator.run(lang, args, id, title, pos)

	table.insert(args, 1, lang:getCode()); args.id = id
	state.argsOf[lang:getCode() .. ":" .. title .. ":" .. (id or "")] = args

	local etyDataTree = DataRetriever.build_data_tree(lang, title, args)

	parse_tree_references(etyDataTree)

	local output = {}
	table.insert(output, tostring(mw.html.create("li")
		:attr("id", M.anchors.language_anchor(lang, id))
		:attr("class", "etymonid")
		:attr("data-lang", lang:getCode())
		:attr("data-id", id or "")
		:css("list-style", "none")
		:allDone()))

	local disallowed = config.unallowed_langs[lang:getCode()]
	if disallowed then
		local error_text = " for " .. lang:getFullName()
		if disallowed["ref"] then
			error_text = error_text .. "; see " .. disallowed["ref"]
		else
			error_text = error_text .. "."
		end
		if tree then error("Etymology trees are not allowed" .. error_text) end
		if text then error("Etymology texts are not allowed" .. error_text) end
	end

	if etydate then
		local etydate_param_mods = {
			ref = { list = true, type = "references", allow_holes = true },
			refn = { list = true, allow_holes = true },
			nocap = { type = "boolean" },
		}

		local function generate_etydate_obj(etydate_text)
			local etydate_specs = {}
			for spec in etydate_text:gmatch("[^,]+") do
				table.insert(etydate_specs, mw.text.trim(spec))
			end
			return { [1] = etydate_specs }
		end

		local parsed_etydate = M.parse_utilities.parse_inline_modifiers(etydate,
			{ param_mods = etydate_param_mods, generate_obj = generate_etydate_obj })

		local etydate_args = {
			[1] = parsed_etydate[1],
			nocap = parsed_etydate.nocap or false,
			ref = parsed_etydate.ref or {},
			refn = parsed_etydate.refn or { maxindex = 0 }
		}

		if etydate_args.refn then
			local max = 0
			for k, v in pairs(etydate_args.refn) do
				if type(k) == "number" and k > max then
					max = k
				end
			end
			etydate_args.refn.maxindex = max
		end

		etyDataTree.etydate = M.etydate.format_etydate(etydate_args)
	end

	if tree then
		local tree_renderer = require('Module:etymon/tree')
		table.insert(output, templatestyles("Module:etymon/styles.css"))
		table.insert(output, tree_renderer.render(etyDataTree, config, keywords, Util.make_link))
	end

	if text then
		local text_renderer = require('Module:etymon/text')
		local usePlus, maxDepth
		if text == "++" then
			usePlus, maxDepth = true, false
		elseif text == "+" then
			usePlus, maxDepth = true, 1
		elseif text == "-" then
			usePlus, maxDepth = false, 1
		else
			usePlus, maxDepth = false, false
		end
		table.insert(output, text_renderer.render(etyDataTree, keywords, Util.make_link, usePlus, maxDepth))
	end

	if Util.is_content_page() and state.maxDepthReached > 0 then
		local track = require("Module:debug/track")
		local langCode = lang:getCode()

		local depth_ranges = {
			{ min = 50, label = "extremely-deep" },
			{ min = 20, label = "20+" },
			{ min = 10, max = 19,     label = "10-19" },
			{ min = 5,  max = 9,      label = "5-9" },
			{ min = 3,  max = 4,      label = "3-4" },
			{ max = 2,  label = "1-2" }
		}

		local node_ranges = {
			{ min = 100, label = "extremely-large" },
			{ min = 50, label = "50+" },
			{ min = 20, max = 49,     label = "20-49" },
			{ min = 10, max = 19,     label = "10-19" },
			{ min = 5,  max = 9,      label = "5-9" },
			{ max = 4,  label = "1-4" }
		}

		local language_ranges = {
			{ min = 10,  label = "10+" },
			{ min = 5,   max = 9,      label = "5-9" },
			{ min = 3,   max = 4,      label = "3-4" },
			{ exact = 2, label = "2" },
			{ exact = 1, label = "1" }
		}

		track_ranges(track, "depth", state.maxDepthReached, depth_ranges, langCode)

		track_ranges(track, "nodes", state.totalNodes, node_ranges, langCode)

		local uniqueLanguages = 0
		for _ in pairs(state.languageCount) do
			uniqueLanguages = uniqueLanguages + 1
		end
		track_ranges(track, "unique-languages", uniqueLanguages, language_ranges, langCode)
	end

	local categories = {}
	if Util.is_content_page() then
		local category_renderer = require('Module:etymon/categories')
		categories = category_renderer.render(lang, etyDataTree, state.disambiguationCount, Util.get_norm_lang)
		if tree then table.insert(categories, lang:getCanonicalName() .. " entries with etymology trees") end
		if text then table.insert(categories, lang:getCanonicalName() .. " entries with etymology texts") end
		if exnihilo then table.insert(categories, lang:getCanonicalName() .. " terms coined ex nihilo") end
		if state.topLevelHasInlineEtymology then
			table.insert(categories, "Pages with inline etymon for redlinks")
		end
		if state.topLevelRedundantEtymology then
			table.insert(categories, "Pages with redundant inline etymon")
		end
		if state.topLevelIdlessEtymon then
			table.insert(categories, "Pages using etymon with no ID")
		end
	end

	if #categories > 0 and lang:getCode() ~= "zh" then
		table.insert(output, M.utilities.format_categories(categories, lang))
	end

	return table.concat(output)
end

return export