Loncat ke isi

Modul:utilities

Deri Wikikamus

Dokumentasi untuk modul ini dapat dibuat di Modul:utilities/doc

local U = {}

local Scripts = require("Modul:scripts")

-- Internal helper: build category link with optional sortkey
local function makeCat(name, sortkey)
	if sortkey and sortkey ~= "" then
		return string.format("[[Bangsaan:%s|%s]]", name, sortkey)
	else
		return string.format("[[Bangsaan:%s]]", name)
	end
end

---------------------------------------------------------------------
-- Whitelist of scripts for which stripping diacritics is appropriate
---------------------------------------------------------------------
local stripScripts = {
	Latn = true,
	Arab = true,
	Hebr = true,
	Syrc = true,
	Thaa = true,
}

---------------------------------------------------------------------
-- Helper: try to coerce a table/frame into a reasonable string
-- Returns nil or "" if nothing usable found.
---------------------------------------------------------------------
local function extractStringFromTable(t)
	if type(t) ~= "table" then return nil end

	-- If it's a "frame-like" table with .args
	if t.args and type(t.args) == "table" then
		-- prefer positional 1
		if type(t.args[1]) == "string" and t.args[1] ~= "" then
			return t.args[1]
		end
		-- prefer named "text"
		if type(t.args["text"]) == "string" and t.args["text"] ~= "" then
			return t.args["text"]
		end
		-- otherwise return the first non-empty string value in args
		for k, v in pairs(t.args) do
			if type(v) == "string" and v ~= "" then
				return v
			end
		end
	end

	-- If it's a plain array-like table, try index 1
	if type(t[1]) == "string" and t[1] ~= "" then
		return t[1]
	end

	-- Nothing usable found
	return nil
end

---------------------------------------------------------------------
-- Proper diacritic stripping: coerce input, normalize → remove combining marks → recompose
-- Handles precomposed characters by using NFD decomposition where available.
---------------------------------------------------------------------
function U.sortkeyStrip(text)
	-- If text is a table (frame or args), try to extract a string first
	if type(text) == "table" then
		local extracted = extractStringFromTable(text)
		text = extracted or ""
	end

	if not text or text == "" then return text end

	-- detect script using Modul:scripts
	local sc = Scripts.detect(text)
	if not stripScripts[sc] then
		-- not in whitelist: return original text unchanged
		return text
	end

	-- Prefer normalization approach (handles precomposed letters like U+00E1)
	if mw.ustring and mw.ustring.toNFD then
		local decomp = mw.ustring.toNFD(text)

		local pattern = "[" ..
			mw.ustring.char(0x0300) .. "-" .. mw.ustring.char(0x036F) ..
			mw.ustring.char(0x1AB0) .. "-" .. mw.ustring.char(0x1AFF) ..
			mw.ustring.char(0x1DC0) .. "-" .. mw.ustring.char(0x1DFF) ..
			mw.ustring.char(0x20D0) .. "-" .. mw.ustring.char(0x20FF) ..
			mw.ustring.char(0xFE20) .. "-" .. mw.ustring.char(0xFE2F) ..
			"]"

		decomp = mw.ustring.gsub(decomp, pattern, "")
		return mw.ustring.toNFC(decomp)
	else
		local cps = { mw.ustring.codepoint(text, 1, -1) }
		local out = {}
		for i = 1, #cps do
			local cp = cps[i]
			if not (
				(cp >= 0x0300 and cp <= 0x036F) or
				(cp >= 0x1AB0 and cp <= 0x1AFF) or
				(cp >= 0x1DC0 and cp <= 0x1DFF) or
				(cp >= 0x20D0 and cp <= 0x20FF) or
				(cp >= 0xFE20 and cp <= 0xFE2F)
			) then
				table.insert(out, mw.ustring.char(cp))
			end
		end
		return table.concat(out)
	end
end

---------------------------------------------------------------------
-- NEW: Get normalized subpagename for sortkey
---------------------------------------------------------------------
function U.getSortkey()
	local title = mw.title.getCurrentTitle()
	local base = title.subpageText or title.text or ""
	if base == "" then return "" end

	local last = mw.ustring.match(base, "([^/]+)$") or base
	last = mw.ustring.match(last, "^%s*(.-)%s*$") or last
	if last == "" then last = base end

	local stripped = U.sortkeyStrip(last)
	if not stripped or stripped == "" then
		return last
	end
	return stripped
end

---------------------------------------------------------------------
-- Category link without language code
---------------------------------------------------------------------
function U.cat(text, sortkey)
	if not text or text == "" then
		return U.errorcat({ "sablonan" })
	end
	local cleanSort = U.sortkeyStrip(sortkey)
	return makeCat(text, cleanSort)
end

---------------------------------------------------------------------
-- Category link with language code
---------------------------------------------------------------------
function U.langcat(lang, text, sortkey)
	if not lang or lang == "" or not text or text == "" then
		return U.errorcat({ "basa" })
	end
	local cleanSort = U.sortkeyStrip(sortkey)
	return makeCat(lang .. ":" .. text, cleanSort)
end

---------------------------------------------------------------------
-- Convert category into a category page link (no categorization)
---------------------------------------------------------------------
function U.catlink(cat)
	if not cat or cat == "" then
		return ""
	end
	return cat:gsub("^%[%[", "[[:")
end

---------------------------------------------------------------------
-- Generic error message + category
---------------------------------------------------------------------
function U.errorcat(frame)
	local args = frame.args or frame
	local typeKey = args[1] or ""
	local suffix = args[2] or ""

	local messages = {
		basa        = "Ngablu: Isian kode basa kaga' aci.",
		sablonan    = "Ngablu: Isian sablonan kaga' aci.",
		pentol      = "Ngablu: Isian pentol kata kaga' aci.",
		pengujungan = "Ngablu: Isian pengujungan kaga' aci.",
		default     = "Ngablu: Ada nyang salah.",
	}

	local prefixes = {
		basa        = "Halaman dengen basa",
		sablonan    = "Halaman dengen sablonan",
		pentol      = "Halaman dengen pentol",
		pengujungan = "Halaman dengen pengujungan",
		default     = "Halaman",
	}

	local msg = messages[typeKey] or messages["default"]
	local base = prefixes[typeKey] or prefixes["default"]

	if suffix ~= "" then
		msg = msg .. " (" .. suffix .. ")"
		base = base .. " " .. suffix
	end
	
	local ns = mw.title.getCurrentTitle().namespace
	if ns == 0 or ns == 114 then
		return string.format(
			"<strong class='error'>%s</strong>[[Bangsaan:%s galat]]",
			U.safeEscape(msg),
			base
		)
	else
		return ""
	end
end

---------------------------------------------------------------------
-- Generate rhyme categories
---------------------------------------------------------------------
function U.rhymecat(lang, rhymes, syllables, sortkey)
	local cats = {}
	local cleanSort = U.sortkeyStrip(sortkey)
	for i, rhyme in ipairs(rhymes or {}) do
		if rhyme and rhyme ~= "" then
			local catName = "Pengujungan/" .. rhyme
			if syllables and syllables[i] and syllables[i] ~= "" then
				catName = catName .. "/" .. syllables[i] .. " ucap"
			end
			table.insert(cats, U.langcat(lang, catName, cleanSort))
		end
	end
	return table.concat(cats, "\n")
end

---------------------------------------------------------------------
-- Etymology category generator
---------------------------------------------------------------------
function U.etymcat(lang1, type, lang2text, sortkey)
	if not lang1 or lang1 == "" then
		return U.errorcat({ "basa" })
	end
	if not type or type == "" then
		return U.errorcat({ "sablonan", "etimologi" })
	end

	local cleanSort = U.sortkeyStrip(sortkey)

	if lang2text and lang2text ~= "" then
		return U.langcat(lang1, type .. " deri " .. lang2text, cleanSort)
	else
		return U.langcat(lang1, type, cleanSort)
	end
end

---------------------------------------------------------------------
-- Wrappers for template usage
---------------------------------------------------------------------
function U.getCat(frame)
	local args = frame.args
	return U.cat(args[1], args[2])
end

function U.getLangcat(frame)
	local args = frame.args
	return U.langcat(args[1], args[2], args[3])
end

function U.getEtymcat(frame)
	local args = frame.args
	return U.etymcat(args[1], args[2], args[3], args[4])
end

function U.getErrorcat(frame)
	return U.errorcat(frame)
end

---------------------------------------------------------------------
-- Safe text escaping
---------------------------------------------------------------------
function U.safeEscape(text)
	if mw.text and mw.text.escape then
		return mw.text.escape(text)
	end
	return text or ""
end

return U