Module:Category handler

From MINR.ORG WIKI
Revision as of 11:39, 25 June 2013 by Mr. Stradivarius (talk) (add blacklist check)

Documentation for this module may be created at Module:Category handler/doc

-- Configuration data.
local cfg = {}

cfg.nocat = 'nocat'    
cfg.categories = 'categories'
cfg.subpage = 'subpage'
cfg.page = 'page'
cfg.category2 = 'category2'
cfg.all = 'all'
cfg.main = 'main'
cfg.other = 'other'

-- The categorisation blacklist. Pages that match Lua patterns in this
-- list will not be categorised unless the appropriate options are set.
-- If the namespace name has a space in, it must be written with an
-- underscore, e.g. "Wikipedia_talk". Other parts of the title can have
-- either underscores or spaces.
cfg.blacklist = {
    '^Main Page$', -- don't categorise the main page.
    
    -- Don't categorise the following pages or their subpages.
    '^Wikipedia:Cascade%-protected items$',
    '^Wikipedia:Cascade%-protected items/.*$',
    '^User:UBX$', -- The userbox "template" space.
    '^User:UBX/.*$',
    '^User_talk:UBX$',
    '^User_talk:UBX/.*$',
    
    -- Don't categorise subpages of these pages, but allow
    -- categorisation of the base page.
    '^Wikipedia:Template messages/.*$',
    
    '/[aA]rchive' -- Don't categorise archives.
}

-- Module start.
local p = {}
local args = {}

-- Get the page object. This will return the page object for the page
-- specified, or nil if there are errors in the title or if the
-- expensive function count has been exceeded.
local function getPageObject()
    -- Get the title object for args.page if it is specified. Otherwise
    -- get the title object for the current page.
    if args[cfg.page] then
        -- Get the page object, passing the function through pcall 
        -- in case we are over the expensive function count limit.
        local noError, pageObject = pcall(mw.title.new, args[cfg.page])
        if not noError then
            return nil
        else
            return pageObject
        end
    else
        return mw.title.getCurrentTitle()
    end    
end

-- Find whether we need to return a category or not.
local function needsCategory( pageObject )
    if not pageObject then return end
    if args[cfg.nocat] == 'true'
        or ( args[cfg.category2] and args[cfg.category2] ~= 'yes' )
        or ( args[cfg.subpage] == 'no' and pageObject.isSubpage )
        or ( args[cfg.subpage] == 'only' and not pageObject.isSubpage ) then
        return false
    else
        return true
    end
end

-- Find whether we need to check the blacklist or not.
local function needsBlacklistCheck()
    if args[cfg.nocat] == 'false'
        or args[cfg.categories] == 'yes'
        or args[cfg.category2] == 'yes' then
        return false
    else
        return true
    end
end

-- Searches the blacklist to find a match with the page object. The 
-- string searched is the namespace plus the title, including subpages.
-- Returns true if there is a match, otherwise returns false.
local function findBlacklistMatch(pageObject)
    if not pageObject then return end
    
    -- Get the title to check.
    local title = pageObject.nsText -- Get the namespace.
    -- Append a colon if the namespace isn't the blank string.
    if #title > 0 then
        title = title .. ':' .. pageObject.text
    else
        title = pageObject.text
    end
    
    -- Check the blacklist.
    for i, pattern in ipairs( cfg.blacklist ) do
        if mw.ustring.match( title, pattern ) then
            return true
        end
    end
    return false
end

local function _main()
    local pageObject = getPageObject()
    if not needsCategory( pageObject ) then return end
    if needsBlacklistCheck() then
        return findBlacklistMatch( pageObject )
    end
end

-- Process the arguments.
function p.main(frame)
    -- If called via #invoke, use the args passed into the invoking
    -- template, or the args passed to #invoke if any exist. Otherwise
    -- assume args are being passed directly in.
    local origArgs
    if frame == mw.getCurrentFrame() then
        origArgs = frame:getParent().args
        for k, v in pairs( frame.args ) do
            origArgs = frame.args
            break
        end
    else
        origArgs = frame
    end
    
    -- The following don't need blank values preserved:
    -- nocat
    -- categories
    -- subpage
    -- page
    -- positional parameters (1-10)
    
    -- The following *do* need blank values preserved
    -- category2
    -- all
    -- other
    -- main
    -- all the namespace parameters

    -- Trim whitespace and remove blank arguments for the following args:
    -- 1, 2, 3 etc., "nocat", "categories", "subpage", and "page".
    for k, v in pairs(origArgs) do
        v = mw.text.trim(v) -- Trim whitespace.
        if type(k) == 'number'
            or k == cfg.nocat
            or k == cfg.categories
            or k == cfg.subpage
            or k == cfg.page then
            if v ~= '' then
                args[k] = v
            end
        else
            args[k] = v
        end
    end
    
    -- Lower-case "nocat", "categories", "category2", and "subpage".
    local lowercase = { cfg.nocat, cfg.categories, cfg.category2, cfg.subpage }
    for _, v in ipairs( lowercase ) do
        if args[v] then
            args[v] = mw.ustring.lower( args[v] )
        end
    end
    
    return _main()
end

return p