Module:Category handler: Difference between revisions

From the Croc Wiki, the Croc encyclopedia
Jump to navigationJump to search
Content added Content deleted
(add blacklist check)
(add the rest of the features, improve the comments)
Line 1: Line 1:
----------------------------------------------------------------------
-- Configuration data.
-- --
-- CATEGORY HANDLER --
-- --
-- This module implements the {{category handler}} template --
-- in Lua, with a few improvements: all namespaces and all --
-- namespace aliases are supported, and namespace names are --
-- detected automatically for the local wiki. This module --
-- requires [[Module:Namespace detect]] to be available on --
-- the local wiki. It can be configured for different wikis --
-- by altering the values in the "cfg" table. --
-- --
----------------------------------------------------------------------

----------------------------------------------------------------------
-- Configuration data --
-- Language-specific parameter names and values can be set --
-- here. --
----------------------------------------------------------------------

local cfg = {}
local cfg = {}


-- cfg.nocat is the parameter name to suppress categorisation.
-- cfg.nocatTrue is the value to suppress categorisation, and
-- cfg.nocatFalse is the value to both categorise and to skip the
-- blacklist check.
cfg.nocat = 'nocat'
cfg.nocat = 'nocat'
cfg.nocatTrue = 'true'
cfg.nocatFalse = 'false'

-- The parameter name for the legacy "categories" parameter.
cfg.categories = 'categories'
cfg.categories = 'categories'
cfg.subpage = 'subpage'
cfg.categoriesYes = 'yes'

cfg.page = 'page'
-- The parameter name for the legacy "category2" parameter. This
-- skips the blacklist if set to the cfg.category2Yes value, and
-- suppresses categorisation if present but equal to anything other
-- than cfg.category2Yes.
cfg.category2 = 'category2'
cfg.category2 = 'category2'
cfg.category2Yes = 'yes'

-- cfg.subpage is the parameter name to specify how to behave on
-- subpages. cfg.subpageNo is the value to specify to not
-- categorise on subpages; cfg.only is the value to specify to only
-- categorise on subpages.
cfg.subpage = 'subpage'
cfg.subpageNo = 'no'
cfg.subpageOnly = 'only'

-- The parameter for data to return in all namespaces.
cfg.all = 'all'
cfg.all = 'all'

cfg.main = 'main'
-- The parameter name for data to return if no data is specified for
-- the namespace that is detected. This must be the same as the
-- cfg.other parameter in [[Module:Namespace detect]].
cfg.other = 'other'
cfg.other = 'other'

-- The parameter name used to specify a page other than the current
-- page; used for testing and demonstration. This must be the same
-- as the cfg.page parameter in [[Module:Namespace detect]].
cfg.page = 'page'


-- The categorisation blacklist. Pages that match Lua patterns in this
-- The categorisation blacklist. Pages that match Lua patterns in this
Line 34: Line 84:
}
}


-- This is a table of namespaces to categorise by default.
-- Module start.
cfg.defaultNamespaces = {
0, -- Main
6, -- File
12, -- Help
14 -- Category
}

----------------------------------------------------------------------
-- End configuration data --
----------------------------------------------------------------------

-- Get dependent modules and declare the table of functions that we will
-- return.
local NamespaceDetect = require('Module:Namespace detect')
local p = {}
local p = {}
local args = {}


----------------------------------------------------------------------
-- Get the page object. This will return the page object for the page
-- Local functions --
-- specified, or nil if there are errors in the title or if the
-- The following are internal functions, which we do not want --
-- expensive function count has been exceeded.
-- to be accessible from other modules. --
local function getPageObject()
----------------------------------------------------------------------
-- Get the title object for args.page if it is specified. Otherwise
-- get the title object for the current page.
if args[cfg.page] then
-- Get the page object, passing the function through pcall
-- in case we are over the expensive function count limit.
local noError, pageObject = pcall(mw.title.new, args[cfg.page])
if not noError then
return nil
else
return pageObject
end
else
return mw.title.getCurrentTitle()
end
end


-- Find whether we need to return a category or not.
-- Find whether we need to return a category or not.
local function needsCategory( pageObject )
local function needsCategory( pageObject, args )
if not pageObject then return end
-- If there is no pageObject available, then that either means that we are over
-- the expensive function limit or that the title specified was invalid. Invalid
if args[cfg.nocat] == 'true'
-- titles will probably only be a problem during testing, so choose the best
or ( args[cfg.category2] and args[cfg.category2] ~= 'yes' )
-- default for being over the expensive function limit, i.e. categorise the page.
or ( args[cfg.subpage] == 'no' and pageObject.isSubpage )
or ( args[cfg.subpage] == 'only' and not pageObject.isSubpage ) then
if not pageObject then
return true
end
-- Only categorise if the relevant options are set.
if args[cfg.nocat] == cfg.nocatTrue
or ( args[cfg.category2] and args[cfg.category2] ~= cfg.category2Yes )
or ( args[cfg.subpage] == cfg.subpageNo and pageObject.isSubpage )
or ( args[cfg.subpage] == cfg.subpageOnly and not pageObject.isSubpage ) then
return false
return false
else
else
Line 72: Line 128:


-- Find whether we need to check the blacklist or not.
-- Find whether we need to check the blacklist or not.
local function needsBlacklistCheck()
local function needsBlacklistCheck( args )
if args[cfg.nocat] == 'false'
if args[cfg.nocat] == cfg.nocatFalse
or args[cfg.categories] == 'yes'
or args[cfg.categories] == cfg.categoriesYes
or args[cfg.category2] == 'yes' then
or args[cfg.category2] == cfg.category2Yes then
return false
return false
else
else
Line 85: Line 141:
-- string searched is the namespace plus the title, including subpages.
-- string searched is the namespace plus the title, including subpages.
-- Returns true if there is a match, otherwise returns false.
-- Returns true if there is a match, otherwise returns false.
local function findBlacklistMatch(pageObject)
local function findBlacklistMatch( pageObject )
if not pageObject then return end
if not pageObject then return end
Line 106: Line 162:
end
end


-- Find whether any namespace parameters have been specified.
local function _main()
-- Mappings is the table of parameter mappings taken from
local pageObject = getPageObject()
-- [[Module:Namespace detect]].
if not needsCategory( pageObject ) then return end
local function nsParamsExist( mappings, args )
if needsBlacklistCheck() then
if args[cfg.all] or args[cfg.other] then
return findBlacklistMatch( pageObject )
return true
end
end
for ns, params in pairs( mappings ) do
for i, param in ipairs( params ) do
if args[param] then
return true
end
end
end
return false
end
end


-- The main structure of the module. Checks whether we need to categorise,
-- Process the arguments.
-- and then passes the relevant arguments to [[Module:Namespace detect]].
function p.main(frame)
local function _main( args )
-- Get the page object and argument mappings from
-- [[Module:Namespace detect]], to save us from having to rewrite the
-- code.
local pageObject = NamespaceDetect.getPageObject()
local mappings = NamespaceDetect.getParamMappings()
-- Check if we need a category or not, and return nothing if not.
if not needsCategory( pageObject, args ) then return end
local ret = '' -- The string to return.
if needsBlacklistCheck( args ) and not findBlacklistMatch( pageObject ) then
if not nsParamsExist( mappings, args ) then
-- No namespace parameters exist; basic usage.
local ndargs = {}
for _, nsid in ipairs( cfg.defaultNamespaces ) do
ndargs[ mw.ustring.lower( mw.site.namespaces[ nsid ].name ) ] = args[1]
end
ndargs.page = args.page
local ndresult = NamespaceDetect.main( ndargs )
if ndresult then
ret = ret .. ndresult
end
else
-- Namespace parameters exist; advanced usage.
-- If the all parameter is specified, return it.
if args.all then
ret = ret .. args.all
end
-- Get the arguments to pass to [[Module:Namespace detect]].
local ndargs = {}
for ns, params in pairs( mappings ) do
for _, param in ipairs( params ) do
ndargs[param] = args[param] or args[cfg.other] or nil
end
end
if args.other then
ndargs.other = args.other
end
if args.page then
ndargs.page = args.page
end
local data = NamespaceDetect.main( ndargs )
-- Work out what to return based on the result of the namespace
-- detect call.
local datanum = tonumber( data )
if type( datanum ) == 'number' then
-- "data" is a number, so return that positional parameter.
-- Remove non-positive integer values, as only positive integers
-- from 1-10 were used with the old template.
if datanum > 0
and math.floor( datanum ) == datanum
and args[datanum] then
ret = ret .. args[ datanum ]
end
else
-- "data" is not a number, so return it as it is.
if type(data) == 'string' then
ret = ret .. data
end
end
end
end
return ret
end

----------------------------------------------------------------------
-- Global functions --
-- The following functions are global, because we want them --
-- to be accessible from #invoke and from other Lua modules. --
-- At the moment only the main function is here. It processes --
-- the arguments and passes them to the _main function. --
----------------------------------------------------------------------

function p.main( frame )
-- If called via #invoke, use the args passed into the invoking
-- If called via #invoke, use the args passed into the invoking
-- template, or the args passed to #invoke if any exist. Otherwise
-- template, or the args passed to #invoke if any exist. Otherwise
Line 129: Line 271:
origArgs = frame
origArgs = frame
end
end
-- The following don't need blank values preserved:
-- nocat
-- categories
-- subpage
-- page
-- positional parameters (1-10)
-- The following *do* need blank values preserved
-- category2
-- all
-- other
-- main
-- all the namespace parameters


-- Trim whitespace and remove blank arguments for the following args:
-- Trim whitespace and remove blank arguments for the following args:
-- 1, 2, 3 etc., "nocat", "categories", "subpage", and "page".
-- 1, 2, 3 etc., "nocat", "categories", "subpage", and "page".
local args = {}
for k, v in pairs(origArgs) do
for k, v in pairs( origArgs ) do
v = mw.text.trim(v) -- Trim whitespace.
v = mw.text.trim(v) -- Trim whitespace.
if type(k) == 'number'
if type(k) == 'number'
Line 161: Line 290:
end
end
-- Lower-case "nocat", "categories", "category2", and "subpage".
-- Lower-case "nocat", "categories", "category2", and "subpage". These
-- parameters are put in lower case whenever they appear in the old
-- template, so we can just do it once here and save ourselves some work.
local lowercase = { cfg.nocat, cfg.categories, cfg.category2, cfg.subpage }
local lowercase = { cfg.nocat, cfg.categories, cfg.category2, cfg.subpage }
for _, v in ipairs( lowercase ) do
for _, v in ipairs( lowercase ) do
Line 169: Line 300:
end
end
return _main()
return _main( args )
end
end



Revision as of 09:36, July 1, 2013

Documentation for this module may be created at Module:Category handler/doc

----------------------------------------------------------------------
--                                                                  --
--                         CATEGORY HANDLER                         --
--                                                                  --
--      This module implements the {{category handler}} template    --
--      in Lua, with a few improvements: all namespaces and all     --
--      namespace aliases are supported, and namespace names are    --
--      detected automatically for the local wiki. This module      --
--      requires [[Module:Namespace detect]] to be available on     --
--      the local wiki. It can be configured for different wikis    --
--      by altering the values in the "cfg" table.                  --
--                                                                  --
----------------------------------------------------------------------

----------------------------------------------------------------------
--                      Configuration data                          --
--      Language-specific parameter names and values can be set     --
--      here.                                                       --
----------------------------------------------------------------------

local cfg = {}

-- cfg.nocat is the parameter name to suppress categorisation.
-- cfg.nocatTrue is the value to suppress categorisation, and 
-- cfg.nocatFalse is the value to both categorise and to skip the
-- blacklist check.
cfg.nocat = 'nocat'    
cfg.nocatTrue = 'true'
cfg.nocatFalse = 'false'

-- The parameter name for the legacy "categories" parameter. 
cfg.categories = 'categories'
cfg.categoriesYes = 'yes'

-- The parameter name for the legacy "category2" parameter. This
-- skips the blacklist if set to the cfg.category2Yes value, and
-- suppresses categorisation if present but equal to anything other
-- than cfg.category2Yes.
cfg.category2 = 'category2'
cfg.category2Yes = 'yes'

-- cfg.subpage is the parameter name to specify how to behave on
-- subpages. cfg.subpageNo is the value to specify to not 
-- categorise on subpages; cfg.only is the value to specify to only
-- categorise on subpages.
cfg.subpage = 'subpage'
cfg.subpageNo = 'no'
cfg.subpageOnly = 'only'

-- The parameter for data to return in all namespaces.
cfg.all = 'all'

-- The parameter name for data to return if no data is specified for
-- the namespace that is detected. This must be the same as the 
-- cfg.other parameter in [[Module:Namespace detect]].
cfg.other = 'other'

-- The parameter name used to specify a page other than the current
-- page; used for testing and demonstration. This must be the same
-- as the cfg.page parameter in [[Module:Namespace detect]].
cfg.page = 'page'

-- The categorisation blacklist. Pages that match Lua patterns in this
-- list will not be categorised unless the appropriate options are set.
-- If the namespace name has a space in, it must be written with an
-- underscore, e.g. "Wikipedia_talk". Other parts of the title can have
-- either underscores or spaces.
cfg.blacklist = {
    '^Main Page$', -- don't categorise the main page.
    
    -- Don't categorise the following pages or their subpages.
    '^Wikipedia:Cascade%-protected items$',
    '^Wikipedia:Cascade%-protected items/.*$',
    '^User:UBX$', -- The userbox "template" space.
    '^User:UBX/.*$',
    '^User_talk:UBX$',
    '^User_talk:UBX/.*$',
    
    -- Don't categorise subpages of these pages, but allow
    -- categorisation of the base page.
    '^Wikipedia:Template messages/.*$',
    
    '/[aA]rchive' -- Don't categorise archives.
}

-- This is a table of namespaces to categorise by default.
cfg.defaultNamespaces = {
    0, -- Main
    6, -- File
    12, -- Help
    14 -- Category
}

----------------------------------------------------------------------
--                     End configuration data                       --
----------------------------------------------------------------------

-- Get dependent modules and declare the table of functions that we will
-- return.
local NamespaceDetect = require('Module:Namespace detect')
local p = {}

----------------------------------------------------------------------
--                         Local functions                          --
--      The following are internal functions, which we do not want  --
--      to be accessible from other modules.                        --
----------------------------------------------------------------------

-- Find whether we need to return a category or not.
local function needsCategory( pageObject, args )
    -- If there is no pageObject available, then that either means that we are over
    -- the expensive function limit or that the title specified was invalid. Invalid
    -- titles will probably only be a problem during testing, so choose the best
    -- default for being over the expensive function limit, i.e. categorise the page.
    if not pageObject then 
        return true
    end
    -- Only categorise if the relevant options are set.
    if args[cfg.nocat] == cfg.nocatTrue
        or ( args[cfg.category2] and args[cfg.category2] ~= cfg.category2Yes )
        or ( args[cfg.subpage] == cfg.subpageNo and pageObject.isSubpage )
        or ( args[cfg.subpage] == cfg.subpageOnly and not pageObject.isSubpage ) then
        return false
    else
        return true
    end
end

-- Find whether we need to check the blacklist or not.
local function needsBlacklistCheck( args )
    if args[cfg.nocat] == cfg.nocatFalse
        or args[cfg.categories] == cfg.categoriesYes
        or args[cfg.category2] == cfg.category2Yes then
        return false
    else
        return true
    end
end

-- Searches the blacklist to find a match with the page object. The 
-- string searched is the namespace plus the title, including subpages.
-- Returns true if there is a match, otherwise returns false.
local function findBlacklistMatch( pageObject )
    if not pageObject then return end
    
    -- Get the title to check.
    local title = pageObject.nsText -- Get the namespace.
    -- Append a colon if the namespace isn't the blank string.
    if #title > 0 then
        title = title .. ':' .. pageObject.text
    else
        title = pageObject.text
    end
    
    -- Check the blacklist.
    for i, pattern in ipairs( cfg.blacklist ) do
        if mw.ustring.match( title, pattern ) then
            return true
        end
    end
    return false
end

-- Find whether any namespace parameters have been specified.
-- Mappings is the table of parameter mappings taken from
-- [[Module:Namespace detect]].
local function nsParamsExist( mappings, args )
    if args[cfg.all] or args[cfg.other] then
        return true
    end
    for ns, params in pairs( mappings ) do
        for i, param in ipairs( params ) do
            if args[param] then
                return true
            end
        end
    end
    return false
end

-- The main structure of the module. Checks whether we need to categorise,
-- and then passes the relevant arguments to [[Module:Namespace detect]].
local function _main( args )
    -- Get the page object and argument mappings from
    -- [[Module:Namespace detect]], to save us from having to rewrite the
    -- code.
    local pageObject = NamespaceDetect.getPageObject()
    local mappings = NamespaceDetect.getParamMappings()
    
    -- Check if we need a category or not, and return nothing if not.
    if not needsCategory( pageObject, args ) then return end
    
    local ret = '' -- The string to return.
    if needsBlacklistCheck( args ) and not findBlacklistMatch( pageObject ) then
        if not nsParamsExist( mappings, args ) then
            -- No namespace parameters exist; basic usage.
            local ndargs = {}
            for _, nsid in ipairs( cfg.defaultNamespaces ) do
                ndargs[ mw.ustring.lower( mw.site.namespaces[ nsid ].name ) ] = args[1]
            end
            ndargs.page = args.page
            local ndresult = NamespaceDetect.main( ndargs )
            if ndresult then
                ret = ret .. ndresult
            end
        else
            -- Namespace parameters exist; advanced usage.
            -- If the all parameter is specified, return it.
            if args.all then
                ret = ret .. args.all
            end
            
            -- Get the arguments to pass to [[Module:Namespace detect]].
            local ndargs = {}
            for ns, params in pairs( mappings ) do
                for _, param in ipairs( params ) do
                    ndargs[param] = args[param] or args[cfg.other] or nil
                end
            end
            if args.other then
                ndargs.other = args.other
            end
            if args.page then
                ndargs.page = args.page
            end
            local data = NamespaceDetect.main( ndargs )
            
            -- Work out what to return based on the result of the namespace
            -- detect call.
            local datanum = tonumber( data )
            if type( datanum ) == 'number' then
                -- "data" is a number, so return that positional parameter.
                -- Remove non-positive integer values, as only positive integers
                -- from 1-10 were used with the old template.
                if datanum > 0 
                    and math.floor( datanum ) == datanum
                    and args[datanum] then
                    ret = ret .. args[ datanum ]
                end
            else
                -- "data" is not a number, so return it as it is.
                if type(data) == 'string' then
                    ret = ret .. data
                end
            end
        end
    end
    return ret
end

----------------------------------------------------------------------
--                        Global functions                          --
--      The following functions are global, because we want them    --
--      to be accessible from #invoke and from other Lua modules.   --
--      At the moment only the main function is here. It processes  --
--      the arguments and passes them to the _main function.         --
----------------------------------------------------------------------

function p.main( frame )
    -- If called via #invoke, use the args passed into the invoking
    -- template, or the args passed to #invoke if any exist. Otherwise
    -- assume args are being passed directly in.
    local origArgs
    if frame == mw.getCurrentFrame() then
        origArgs = frame:getParent().args
        for k, v in pairs( frame.args ) do
            origArgs = frame.args
            break
        end
    else
        origArgs = frame
    end

    -- Trim whitespace and remove blank arguments for the following args:
    -- 1, 2, 3 etc., "nocat", "categories", "subpage", and "page".
    local args = {}
    for k, v in pairs( origArgs ) do
        v = mw.text.trim(v) -- Trim whitespace.
        if type(k) == 'number'
            or k == cfg.nocat
            or k == cfg.categories
            or k == cfg.subpage
            or k == cfg.page then
            if v ~= '' then
                args[k] = v
            end
        else
            args[k] = v
        end
    end
    
    -- Lower-case "nocat", "categories", "category2", and "subpage". These
    -- parameters are put in lower case whenever they appear in the old
    -- template, so we can just do it once here and save ourselves some work.
    local lowercase = { cfg.nocat, cfg.categories, cfg.category2, cfg.subpage }
    for _, v in ipairs( lowercase ) do
        if args[v] then
            args[v] = mw.ustring.lower( args[v] )
        end
    end
    
    return _main( args )
end

return p