Difference between revisions of "Module:Category handler"

From MINR.ORG WIKI
(add blacklist check)
(add the rest of the features, improve the comments)
Line 1: Line 1:
-- Configuration data.
+
----------------------------------------------------------------------
 +
--                                                                  --
 +
--                        CATEGORY HANDLER                        --
 +
--                                                                  --
 +
--      This module implements the {{category handler}} template    --
 +
--      in Lua, with a few improvements: all namespaces and all    --
 +
--      namespace aliases are supported, and namespace names are    --
 +
--      detected automatically for the local wiki. This module      --
 +
--      requires [[Module:Namespace detect]] to be available on    --
 +
--      the local wiki. It can be configured for different wikis    --
 +
--      by altering the values in the "cfg" table.                  --
 +
--                                                                  --
 +
----------------------------------------------------------------------
 +
 
 +
----------------------------------------------------------------------
 +
--                      Configuration data                         --
 +
--      Language-specific parameter names and values can be set    --
 +
--      here.                                                       --
 +
----------------------------------------------------------------------
 +
 
 
local cfg = {}
 
local cfg = {}
  
 +
-- cfg.nocat is the parameter name to suppress categorisation.
 +
-- cfg.nocatTrue is the value to suppress categorisation, and
 +
-- cfg.nocatFalse is the value to both categorise and to skip the
 +
-- blacklist check.
 
cfg.nocat = 'nocat'     
 
cfg.nocat = 'nocat'     
 +
cfg.nocatTrue = 'true'
 +
cfg.nocatFalse = 'false'
 +
 +
-- The parameter name for the legacy "categories" parameter.
 
cfg.categories = 'categories'
 
cfg.categories = 'categories'
 +
cfg.categoriesYes = 'yes'
 +
 +
-- The parameter name for the legacy "category2" parameter. This
 +
-- skips the blacklist if set to the cfg.category2Yes value, and
 +
-- suppresses categorisation if present but equal to anything other
 +
-- than cfg.category2Yes.
 +
cfg.category2 = 'category2'
 +
cfg.category2Yes = 'yes'
 +
 +
-- cfg.subpage is the parameter name to specify how to behave on
 +
-- subpages. cfg.subpageNo is the value to specify to not
 +
-- categorise on subpages; cfg.only is the value to specify to only
 +
-- categorise on subpages.
 
cfg.subpage = 'subpage'
 
cfg.subpage = 'subpage'
cfg.page = 'page'
+
cfg.subpageNo = 'no'
cfg.category2 = 'category2'
+
cfg.subpageOnly = 'only'
 +
 
 +
-- The parameter for data to return in all namespaces.
 
cfg.all = 'all'
 
cfg.all = 'all'
cfg.main = 'main'
+
 
 +
-- The parameter name for data to return if no data is specified for
 +
-- the namespace that is detected. This must be the same as the
 +
-- cfg.other parameter in [[Module:Namespace detect]].
 
cfg.other = 'other'
 
cfg.other = 'other'
 +
 +
-- The parameter name used to specify a page other than the current
 +
-- page; used for testing and demonstration. This must be the same
 +
-- as the cfg.page parameter in [[Module:Namespace detect]].
 +
cfg.page = 'page'
  
 
-- The categorisation blacklist. Pages that match Lua patterns in this
 
-- The categorisation blacklist. Pages that match Lua patterns in this
Line 34: Line 84:
 
}
 
}
  
-- Module start.
+
-- This is a table of namespaces to categorise by default.
 +
cfg.defaultNamespaces = {
 +
    0, -- Main
 +
    6, -- File
 +
    12, -- Help
 +
    14 -- Category
 +
}
 +
 
 +
----------------------------------------------------------------------
 +
--                    End configuration data                      --
 +
----------------------------------------------------------------------
 +
 
 +
-- Get dependent modules and declare the table of functions that we will
 +
-- return.
 +
local NamespaceDetect = require('Module:Namespace detect')
 
local p = {}
 
local p = {}
local args = {}
 
  
-- Get the page object. This will return the page object for the page
+
----------------------------------------------------------------------
-- specified, or nil if there are errors in the title or if the
+
--                         Local functions                          --
-- expensive function count has been exceeded.
+
--     The following are internal functions, which we do not want  --
local function getPageObject()
+
--      to be accessible from other modules.                       --
    -- Get the title object for args.page if it is specified. Otherwise
+
----------------------------------------------------------------------
    -- get the title object for the current page.
 
    if args[cfg.page] then
 
        -- Get the page object, passing the function through pcall
 
        -- in case we are over the expensive function count limit.
 
        local noError, pageObject = pcall(mw.title.new, args[cfg.page])
 
        if not noError then
 
            return nil
 
        else
 
            return pageObject
 
        end
 
    else
 
        return mw.title.getCurrentTitle()
 
    end   
 
end
 
  
 
-- Find whether we need to return a category or not.
 
-- Find whether we need to return a category or not.
local function needsCategory( pageObject )
+
local function needsCategory( pageObject, args )
     if not pageObject then return end
+
    -- If there is no pageObject available, then that either means that we are over
     if args[cfg.nocat] == 'true'
+
    -- the expensive function limit or that the title specified was invalid. Invalid
         or ( args[cfg.category2] and args[cfg.category2] ~= 'yes' )
+
    -- titles will probably only be a problem during testing, so choose the best
         or ( args[cfg.subpage] == 'no' and pageObject.isSubpage )
+
    -- default for being over the expensive function limit, i.e. categorise the page.
         or ( args[cfg.subpage] == 'only' and not pageObject.isSubpage ) then
+
     if not pageObject then  
 +
        return true
 +
    end
 +
    -- Only categorise if the relevant options are set.
 +
     if args[cfg.nocat] == cfg.nocatTrue
 +
         or ( args[cfg.category2] and args[cfg.category2] ~= cfg.category2Yes )
 +
         or ( args[cfg.subpage] == cfg.subpageNo and pageObject.isSubpage )
 +
         or ( args[cfg.subpage] == cfg.subpageOnly and not pageObject.isSubpage ) then
 
         return false
 
         return false
 
     else
 
     else
Line 72: Line 128:
  
 
-- Find whether we need to check the blacklist or not.
 
-- Find whether we need to check the blacklist or not.
local function needsBlacklistCheck()
+
local function needsBlacklistCheck( args )
     if args[cfg.nocat] == 'false'
+
     if args[cfg.nocat] == cfg.nocatFalse
         or args[cfg.categories] == 'yes'
+
         or args[cfg.categories] == cfg.categoriesYes
         or args[cfg.category2] == 'yes' then
+
         or args[cfg.category2] == cfg.category2Yes then
 
         return false
 
         return false
 
     else
 
     else
Line 85: Line 141:
 
-- string searched is the namespace plus the title, including subpages.
 
-- string searched is the namespace plus the title, including subpages.
 
-- Returns true if there is a match, otherwise returns false.
 
-- Returns true if there is a match, otherwise returns false.
local function findBlacklistMatch(pageObject)
+
local function findBlacklistMatch( pageObject )
 
     if not pageObject then return end
 
     if not pageObject then return end
 
      
 
      
Line 106: Line 162:
 
end
 
end
  
local function _main()
+
-- Find whether any namespace parameters have been specified.
     local pageObject = getPageObject()
+
-- Mappings is the table of parameter mappings taken from
     if not needsCategory( pageObject ) then return end
+
-- [[Module:Namespace detect]].
     if needsBlacklistCheck() then
+
local function nsParamsExist( mappings, args )
         return findBlacklistMatch( pageObject )
+
    if args[cfg.all] or args[cfg.other] then
 +
        return true
 +
    end
 +
    for ns, params in pairs( mappings ) do
 +
        for i, param in ipairs( params ) do
 +
            if args[param] then
 +
                return true
 +
            end
 +
        end
 +
    end
 +
    return false
 +
end
 +
 
 +
-- The main structure of the module. Checks whether we need to categorise,
 +
-- and then passes the relevant arguments to [[Module:Namespace detect]].
 +
local function _main( args )
 +
    -- Get the page object and argument mappings from
 +
    -- [[Module:Namespace detect]], to save us from having to rewrite the
 +
    -- code.
 +
     local pageObject = NamespaceDetect.getPageObject()
 +
    local mappings = NamespaceDetect.getParamMappings()
 +
   
 +
    -- Check if we need a category or not, and return nothing if not.
 +
     if not needsCategory( pageObject, args ) then return end
 +
   
 +
    local ret = '' -- The string to return.
 +
     if needsBlacklistCheck( args ) and not findBlacklistMatch( pageObject ) then
 +
         if not nsParamsExist( mappings, args ) then
 +
            -- No namespace parameters exist; basic usage.
 +
            local ndargs = {}
 +
            for _, nsid in ipairs( cfg.defaultNamespaces ) do
 +
                ndargs[ mw.ustring.lower( mw.site.namespaces[ nsid ].name ) ] = args[1]
 +
            end
 +
            ndargs.page = args.page
 +
            local ndresult = NamespaceDetect.main( ndargs )
 +
            if ndresult then
 +
                ret = ret .. ndresult
 +
            end
 +
        else
 +
            -- Namespace parameters exist; advanced usage.
 +
            -- If the all parameter is specified, return it.
 +
            if args.all then
 +
                ret = ret .. args.all
 +
            end
 +
           
 +
            -- Get the arguments to pass to [[Module:Namespace detect]].
 +
            local ndargs = {}
 +
            for ns, params in pairs( mappings ) do
 +
                for _, param in ipairs( params ) do
 +
                    ndargs[param] = args[param] or args[cfg.other] or nil
 +
                end
 +
            end
 +
            if args.other then
 +
                ndargs.other = args.other
 +
            end
 +
            if args.page then
 +
                ndargs.page = args.page
 +
            end
 +
            local data = NamespaceDetect.main( ndargs )
 +
           
 +
            -- Work out what to return based on the result of the namespace
 +
            -- detect call.
 +
            local datanum = tonumber( data )
 +
            if type( datanum ) == 'number' then
 +
                -- "data" is a number, so return that positional parameter.
 +
                -- Remove non-positive integer values, as only positive integers
 +
                -- from 1-10 were used with the old template.
 +
                if datanum > 0
 +
                    and math.floor( datanum ) == datanum
 +
                    and args[datanum] then
 +
                    ret = ret .. args[ datanum ]
 +
                end
 +
            else
 +
                -- "data" is not a number, so return it as it is.
 +
                if type(data) == 'string' then
 +
                    ret = ret .. data
 +
                end
 +
            end
 +
        end
 
     end
 
     end
 +
    return ret
 
end
 
end
  
-- Process the arguments.
+
----------------------------------------------------------------------
function p.main(frame)
+
--                        Global functions                          --
 +
--      The following functions are global, because we want them    --
 +
--      to be accessible from #invoke and from other Lua modules.  --
 +
--      At the moment only the main function is here. It processes  --
 +
--      the arguments and passes them to the _main function.         --
 +
----------------------------------------------------------------------
 +
 
 +
function p.main( frame )
 
     -- If called via #invoke, use the args passed into the invoking
 
     -- If called via #invoke, use the args passed into the invoking
 
     -- template, or the args passed to #invoke if any exist. Otherwise
 
     -- template, or the args passed to #invoke if any exist. Otherwise
Line 129: Line 271:
 
         origArgs = frame
 
         origArgs = frame
 
     end
 
     end
   
 
    -- The following don't need blank values preserved:
 
    -- nocat
 
    -- categories
 
    -- subpage
 
    -- page
 
    -- positional parameters (1-10)
 
   
 
    -- The following *do* need blank values preserved
 
    -- category2
 
    -- all
 
    -- other
 
    -- main
 
    -- all the namespace parameters
 
  
 
     -- Trim whitespace and remove blank arguments for the following args:
 
     -- Trim whitespace and remove blank arguments for the following args:
 
     -- 1, 2, 3 etc., "nocat", "categories", "subpage", and "page".
 
     -- 1, 2, 3 etc., "nocat", "categories", "subpage", and "page".
     for k, v in pairs(origArgs) do
+
    local args = {}
 +
     for k, v in pairs( origArgs ) do
 
         v = mw.text.trim(v) -- Trim whitespace.
 
         v = mw.text.trim(v) -- Trim whitespace.
 
         if type(k) == 'number'
 
         if type(k) == 'number'
Line 161: Line 290:
 
     end
 
     end
 
      
 
      
     -- Lower-case "nocat", "categories", "category2", and "subpage".
+
     -- Lower-case "nocat", "categories", "category2", and "subpage". These
 +
    -- parameters are put in lower case whenever they appear in the old
 +
    -- template, so we can just do it once here and save ourselves some work.
 
     local lowercase = { cfg.nocat, cfg.categories, cfg.category2, cfg.subpage }
 
     local lowercase = { cfg.nocat, cfg.categories, cfg.category2, cfg.subpage }
 
     for _, v in ipairs( lowercase ) do
 
     for _, v in ipairs( lowercase ) do
Line 169: Line 300:
 
     end
 
     end
 
      
 
      
     return _main()
+
     return _main( args )
 
end
 
end
  
 
return p
 
return p

Revision as of 10:36, 1 July 2013

Documentation for this module may be created at Module:Category handler/doc

----------------------------------------------------------------------
--                                                                  --
--                         CATEGORY HANDLER                         --
--                                                                  --
--      This module implements the {{category handler}} template    --
--      in Lua, with a few improvements: all namespaces and all     --
--      namespace aliases are supported, and namespace names are    --
--      detected automatically for the local wiki. This module      --
--      requires [[Module:Namespace detect]] to be available on     --
--      the local wiki. It can be configured for different wikis    --
--      by altering the values in the "cfg" table.                  --
--                                                                  --
----------------------------------------------------------------------

----------------------------------------------------------------------
--                      Configuration data                          --
--      Language-specific parameter names and values can be set     --
--      here.                                                       --
----------------------------------------------------------------------

local cfg = {}

-- cfg.nocat is the parameter name to suppress categorisation.
-- cfg.nocatTrue is the value to suppress categorisation, and 
-- cfg.nocatFalse is the value to both categorise and to skip the
-- blacklist check.
cfg.nocat = 'nocat'    
cfg.nocatTrue = 'true'
cfg.nocatFalse = 'false'

-- The parameter name for the legacy "categories" parameter. 
cfg.categories = 'categories'
cfg.categoriesYes = 'yes'

-- The parameter name for the legacy "category2" parameter. This
-- skips the blacklist if set to the cfg.category2Yes value, and
-- suppresses categorisation if present but equal to anything other
-- than cfg.category2Yes.
cfg.category2 = 'category2'
cfg.category2Yes = 'yes'

-- cfg.subpage is the parameter name to specify how to behave on
-- subpages. cfg.subpageNo is the value to specify to not 
-- categorise on subpages; cfg.only is the value to specify to only
-- categorise on subpages.
cfg.subpage = 'subpage'
cfg.subpageNo = 'no'
cfg.subpageOnly = 'only'

-- The parameter for data to return in all namespaces.
cfg.all = 'all'

-- The parameter name for data to return if no data is specified for
-- the namespace that is detected. This must be the same as the 
-- cfg.other parameter in [[Module:Namespace detect]].
cfg.other = 'other'

-- The parameter name used to specify a page other than the current
-- page; used for testing and demonstration. This must be the same
-- as the cfg.page parameter in [[Module:Namespace detect]].
cfg.page = 'page'

-- The categorisation blacklist. Pages that match Lua patterns in this
-- list will not be categorised unless the appropriate options are set.
-- If the namespace name has a space in, it must be written with an
-- underscore, e.g. "Wikipedia_talk". Other parts of the title can have
-- either underscores or spaces.
cfg.blacklist = {
    '^Main Page$', -- don't categorise the main page.
    
    -- Don't categorise the following pages or their subpages.
    '^Wikipedia:Cascade%-protected items$',
    '^Wikipedia:Cascade%-protected items/.*$',
    '^User:UBX$', -- The userbox "template" space.
    '^User:UBX/.*$',
    '^User_talk:UBX$',
    '^User_talk:UBX/.*$',
    
    -- Don't categorise subpages of these pages, but allow
    -- categorisation of the base page.
    '^Wikipedia:Template messages/.*$',
    
    '/[aA]rchive' -- Don't categorise archives.
}

-- This is a table of namespaces to categorise by default.
cfg.defaultNamespaces = {
    0, -- Main
    6, -- File
    12, -- Help
    14 -- Category
}

----------------------------------------------------------------------
--                     End configuration data                       --
----------------------------------------------------------------------

-- Get dependent modules and declare the table of functions that we will
-- return.
local NamespaceDetect = require('Module:Namespace detect')
local p = {}

----------------------------------------------------------------------
--                         Local functions                          --
--      The following are internal functions, which we do not want  --
--      to be accessible from other modules.                        --
----------------------------------------------------------------------

-- Find whether we need to return a category or not.
local function needsCategory( pageObject, args )
    -- If there is no pageObject available, then that either means that we are over
    -- the expensive function limit or that the title specified was invalid. Invalid
    -- titles will probably only be a problem during testing, so choose the best
    -- default for being over the expensive function limit, i.e. categorise the page.
    if not pageObject then 
        return true
    end
    -- Only categorise if the relevant options are set.
    if args[cfg.nocat] == cfg.nocatTrue
        or ( args[cfg.category2] and args[cfg.category2] ~= cfg.category2Yes )
        or ( args[cfg.subpage] == cfg.subpageNo and pageObject.isSubpage )
        or ( args[cfg.subpage] == cfg.subpageOnly and not pageObject.isSubpage ) then
        return false
    else
        return true
    end
end

-- Find whether we need to check the blacklist or not.
local function needsBlacklistCheck( args )
    if args[cfg.nocat] == cfg.nocatFalse
        or args[cfg.categories] == cfg.categoriesYes
        or args[cfg.category2] == cfg.category2Yes then
        return false
    else
        return true
    end
end

-- Searches the blacklist to find a match with the page object. The 
-- string searched is the namespace plus the title, including subpages.
-- Returns true if there is a match, otherwise returns false.
local function findBlacklistMatch( pageObject )
    if not pageObject then return end
    
    -- Get the title to check.
    local title = pageObject.nsText -- Get the namespace.
    -- Append a colon if the namespace isn't the blank string.
    if #title > 0 then
        title = title .. ':' .. pageObject.text
    else
        title = pageObject.text
    end
    
    -- Check the blacklist.
    for i, pattern in ipairs( cfg.blacklist ) do
        if mw.ustring.match( title, pattern ) then
            return true
        end
    end
    return false
end

-- Find whether any namespace parameters have been specified.
-- Mappings is the table of parameter mappings taken from
-- [[Module:Namespace detect]].
local function nsParamsExist( mappings, args )
    if args[cfg.all] or args[cfg.other] then
        return true
    end
    for ns, params in pairs( mappings ) do
        for i, param in ipairs( params ) do
            if args[param] then
                return true
            end
        end
    end
    return false
end

-- The main structure of the module. Checks whether we need to categorise,
-- and then passes the relevant arguments to [[Module:Namespace detect]].
local function _main( args )
    -- Get the page object and argument mappings from
    -- [[Module:Namespace detect]], to save us from having to rewrite the
    -- code.
    local pageObject = NamespaceDetect.getPageObject()
    local mappings = NamespaceDetect.getParamMappings()
    
    -- Check if we need a category or not, and return nothing if not.
    if not needsCategory( pageObject, args ) then return end
    
    local ret = '' -- The string to return.
    if needsBlacklistCheck( args ) and not findBlacklistMatch( pageObject ) then
        if not nsParamsExist( mappings, args ) then
            -- No namespace parameters exist; basic usage.
            local ndargs = {}
            for _, nsid in ipairs( cfg.defaultNamespaces ) do
                ndargs[ mw.ustring.lower( mw.site.namespaces[ nsid ].name ) ] = args[1]
            end
            ndargs.page = args.page
            local ndresult = NamespaceDetect.main( ndargs )
            if ndresult then
                ret = ret .. ndresult
            end
        else
            -- Namespace parameters exist; advanced usage.
            -- If the all parameter is specified, return it.
            if args.all then
                ret = ret .. args.all
            end
            
            -- Get the arguments to pass to [[Module:Namespace detect]].
            local ndargs = {}
            for ns, params in pairs( mappings ) do
                for _, param in ipairs( params ) do
                    ndargs[param] = args[param] or args[cfg.other] or nil
                end
            end
            if args.other then
                ndargs.other = args.other
            end
            if args.page then
                ndargs.page = args.page
            end
            local data = NamespaceDetect.main( ndargs )
            
            -- Work out what to return based on the result of the namespace
            -- detect call.
            local datanum = tonumber( data )
            if type( datanum ) == 'number' then
                -- "data" is a number, so return that positional parameter.
                -- Remove non-positive integer values, as only positive integers
                -- from 1-10 were used with the old template.
                if datanum > 0 
                    and math.floor( datanum ) == datanum
                    and args[datanum] then
                    ret = ret .. args[ datanum ]
                end
            else
                -- "data" is not a number, so return it as it is.
                if type(data) == 'string' then
                    ret = ret .. data
                end
            end
        end
    end
    return ret
end

----------------------------------------------------------------------
--                        Global functions                          --
--      The following functions are global, because we want them    --
--      to be accessible from #invoke and from other Lua modules.   --
--      At the moment only the main function is here. It processes  --
--      the arguments and passes them to the _main function.         --
----------------------------------------------------------------------

function p.main( frame )
    -- If called via #invoke, use the args passed into the invoking
    -- template, or the args passed to #invoke if any exist. Otherwise
    -- assume args are being passed directly in.
    local origArgs
    if frame == mw.getCurrentFrame() then
        origArgs = frame:getParent().args
        for k, v in pairs( frame.args ) do
            origArgs = frame.args
            break
        end
    else
        origArgs = frame
    end

    -- Trim whitespace and remove blank arguments for the following args:
    -- 1, 2, 3 etc., "nocat", "categories", "subpage", and "page".
    local args = {}
    for k, v in pairs( origArgs ) do
        v = mw.text.trim(v) -- Trim whitespace.
        if type(k) == 'number'
            or k == cfg.nocat
            or k == cfg.categories
            or k == cfg.subpage
            or k == cfg.page then
            if v ~= '' then
                args[k] = v
            end
        else
            args[k] = v
        end
    end
    
    -- Lower-case "nocat", "categories", "category2", and "subpage". These
    -- parameters are put in lower case whenever they appear in the old
    -- template, so we can just do it once here and save ourselves some work.
    local lowercase = { cfg.nocat, cfg.categories, cfg.category2, cfg.subpage }
    for _, v in ipairs( lowercase ) do
        if args[v] then
            args[v] = mw.ustring.lower( args[v] )
        end
    end
    
    return _main( args )
end

return p