Modulo:string utilities

Provides some utility functions for manipulating strings.



string_utilities.format( str, tbl )

This funtion, unlike string.format and mw.ustring.format, takes just two parameters—a format string and a table—and replaces all instances of {param_name} in the format string with the table's entry for param_name. The opening and closing brace characters can be escaped with {\op} and {\cl}, respectively. A table entry beginning with a slash can be escaped by doubling the initial slash.

  • string_utilities.format("{foo} fish, {bar} fish, {baz} fish, {quux} fish", {["foo"]="one", ["bar"]="two", ["baz"]="red", ["quux"]="blue"})
    produces: "one fish, two fish, red fish, blue fish"
  • string_utilities.format("The set {\\op}1, 2, 3{\\cl} contains {\\\\hello} elements.", {["\\hello"]="three"})
    produces: "The set {1, 2, 3} contains three elements."
    • Note that the single and double backslashes should be entered as double and quadruple backslashes when quoted in a literal string.

local module_name = "string_utilities"
local export = {}

local format_escapes = {
    ["op"] = "{",
    ["cl"] = "}",

function export.format(str, tbl)
    return (string.gsub(str, "{(\\?)((\\?)[^{}]*)}", function (p1, name, p2)
        if #p1 + #p2 == 1 then
            return format_escapes[name] or error(module_name .. ".format: unrecognized escape sequence '{\\" .. name .. "}'")
            return tbl[name] or error(module_name .. ".format: '" .. name .. "' not found in table")

-- Reimplementation of mw.ustring.split() that includes any capturing
-- groups in the splitting pattern. This works like Python's re.split()
-- function, except that it has Lua's behavior when the split pattern
-- is empty (i.e. advancing by one character at a time; Python returns the
-- whole remainder of the string).
function export.capturing_split(str, pattern)
    local ret = {}
    -- (.-) corresponds to (.*?) in Python or Perl; () captures the
    -- current position after matching.
    pattern = "(.-)" .. pattern .. "()"
    local start = 1
    while true do
        -- Did we reach the end of the string?
        if start > #str then
            table.insert(ret, "")
            return ret
        -- match() returns all captures as multiple return values;
        -- we need to insert into a table to get them all.
        local captures = {mw.ustring.match(str, pattern, start)}
        -- If no match, add the remainder of the string.
        if #captures == 0 then
            table.insert(ret, mw.ustring.sub(str, start))
            return ret
        local newstart = table.remove(captures)
        -- Special case: If we don't advance by any characters, then advance
        -- by one character; this avoids an infinite loop, and makes splitting
        -- by an empty string work the way mw.ustring.split() does. If we
        -- reach the end of the string this way, return immediately, so we
        -- don't get a final empty string.
        if newstart == start then
            table.insert(ret, mw.ustring.sub(str, start, start))
            table.remove(captures, 1)
            start = start + 1
            if start > #str then
            	return ret
            table.insert(ret, table.remove(captures, 1))
            start = newstart
        -- Insert any captures from the splitting pattern.
        for _, x in ipairs(captures) do
            table.insert(ret, x)

return export