Module:Delink: Difference between revisions

From the Croc Wiki, the Croc encyclopedia
Jump to navigationJump to search
Content added Content deleted
(split the search algorithm out into its own function, add data type checks)
(Try parsing wikilinks and URLs on the same pass)
Line 115: Line 115:
end
end


local function delinkLinkClass(s, pattern, delinkFunction)
local function _delink(args)
if not type(s) == "string" then
local text = args[1] or ""
-- Iterate over the text string, and replace any matched text with the
error("Attempt to de-link non-string input.", 2)
end
if not ( type(pattern) == "string" and mw.ustring.sub(pattern, 1, 1) == "^" ) then
error('Invalid pattern detected. Patterns must begin with "^".', 2)
end
-- Iterate over the text string, and replace any matched text. using the
-- delink function. We need to iterate character by character rather
-- delink function. We need to iterate character by character rather
-- than just use gsub, otherwise nested links aren't detected properly.
-- than just use gsub, otherwise nested links aren't detected properly.
local result = ""
local result = ""
while mw.ustring.len(s) > 0 do
while mw.ustring.len(text) > 0 do
-- Replace text using one iteration of gsub.
-- Replace text using one iteration of gsub.
s = mw.ustring.gsub(s, pattern, delinkFunction, 1)
text = mw.ustring.gsub(text, "^%[%[.-%]%]", delinkWikilink, 1) -- De-link wikilinks.
text = mw.ustring.gsub(text, "^%[.-%]", delinkURL, 1) -- De-link URLs
-- Append the left-most character to the result string.
-- Append the left-most character to the result string.
result = result .. mw.ustring.sub(s, 1, 1)
result = result .. mw.ustring.sub(text, 1, 1)
s = mw.ustring.sub(s, 2, -1)
text = mw.ustring.sub(text, 2, -1)
end
end
result = mw.ustring.gsub(result, "%s+", " ") -- Remove extra whitespace.
return result
return result
end

local function _delink(args)
local text = args[1] or ""
text = delinkLinkClass(text, "^%[%[.-%]%]", delinkWikilink) -- De-link wikilinks.
text = delinkLinkClass(text, "^%[.-%]", delinkURL) -- De-link URLs.
text = mw.ustring.gsub(text, "%s+", " ") -- Remove extra whitespace.
return text
end
end


Line 159: Line 148:
args = frame
args = frame
end
end
return _delink(args)
return _delink(args)
end
end

Revision as of 09:24, April 4, 2013

Documentation for this module may be created at Module:Delink/doc

-- This module de-links most wikitext.

p = {}

local function delinkURL(s)
    -- Assume we have already delinked internal wikilinks, and that
    -- we have been passed some text between two square brackets [foo].
    
    -- Check if the text has a valid URL prefix and at least one valid URL character.
    local valid_url_prefixes = {"//", "http://", "https://", "ftp://", "gopher://", "mailto:", "news:", "irc://"} 
    local url_prefix
    for i,v in ipairs(valid_url_prefixes) do
        if mw.ustring.match(s, '^%[' .. v ..'[^"%s].*%]' ) then
            url_prefix = v
            break
        end
    end
    
    -- Get display text
    if not url_prefix then
        return s
    else
        s = mw.ustring.match(s, "^%[" .. url_prefix .. "(.*)%]") -- Grab all of the text after the URL prefix and before the final square bracket.
        s = mw.ustring.match(s, '^.-(["<> ].*)') or "" -- Grab all of the text after the first URL separator character ("<> ).
        s = mw.ustring.match(s, "^%s*(%S.*)$") or "" -- If the separating character was a space, trim it off.
        return s
    end
end

local function delinkReversePipeTrick(s)
    if mw.ustring.match(s, "^%[%[|.*|") then -- Check for multiple pipes.
        return s
    else
        return mw.ustring.match(s, "%[%[|(.*)%]%]")
    end
end

local function delinkPipeTrick(s)
    local linkarea, display = "", ""
    -- We need to deal with colons, brackets, and commas, per [[Help:Pipe trick]].
    
    -- First, remove the text before the first colon, if any.
    if mw.ustring.match(s, ":") then
        s = mw.ustring.match(s, "%[%[.-:(.*)|%]%]")
    -- If there are no colons, grab all of the text apart from the square brackets and the pipe.
    else
        s = mw.ustring.match(s, "%[%[(.*)|%]%]")
    end
    
    -- Next up, brackets and commas.
    if mw.ustring.match(s, "%(.-%)$") then -- Brackets trump commas.
        s = mw.ustring.match(s, "(.-) ?%(.-%)$")
    elseif mw.ustring.match(s, ",") then -- If there are no brackets, display only the text before the first comma.
        s = mw.ustring.match(s, "(.-),.*$")
    end
    return s
end

local function delinkWikilink(s)
    -- Deal with the reverse pipe trick.
    if mw.ustring.match(s, "%[%[|") then
        return delinkReversePipeTrick(s)
    end
    
    -- Check for bad titles. To do this we need to find the
    -- title area of the link, i.e. the part before any pipes.
    local titlearea
    if mw.ustring.match(s, "|") then -- Find if we're dealing with a piped link.
        titlearea = mw.ustring.match(s, "^%[%[(.-)|.*%]%]")
    else
        titlearea = mw.ustring.match(s, "^%[%[(.-)%]%]")
    end
    -- If the title area is not a valid title, return the whole string.
    -- Use pcall in case we're over the expensive functions limit.
    local goodcall, title = pcall(mw.title.new, titlearea, "")
    if not (goodcall and title) then
        return s
    end
    -- Check for characters that are allowed in titles but not in wikilinks.
    local other_invalid_link_strings = { '&#x0000;' }
    for i,v in ipairs(other_invalid_link_strings) do
        if mw.ustring.match(titlearea, v) then
            return s
        end
    end
    
    -- Check for categories, interwikis, and files.
    local colonprefix = mw.ustring.match(s, "%[%[(.-):.*%]%]") or "" -- Get the text before the first colon.
    if mw.language.isKnownLanguageTag(colonprefix)
    or mw.ustring.match(colonprefix, "^[Cc]ategory$")
    or mw.ustring.match(colonprefix, "^[Ff]ile$")
    or mw.ustring.match(colonprefix, "^[Ii]mage$") then
        return ""
    end
    
    -- Remove the colon if the link is using the [[Help:Colon trick]].
    if mw.ustring.match(s, "%[%[:") then
        s = "[[" .. mw.ustring.match(s, "%[%[:(.*%]%])")
    end
    
    -- Deal with links using the [[Help:Pipe trick]].
    if mw.ustring.match(s, "^%[%[[^|]*|%]%]") then
        return delinkPipeTrick(s)
    end
    
    -- Find the display area of the wikilink
    local display
    if mw.ustring.match(s, "|") then -- Find if we're dealing with a piped link.
        display = mw.ustring.match(s, "^%[%[.-|(.+)%]%]")
    else
        display = mw.ustring.match(s, "^%[%[(.-)%]%]")
    end

    return display
end

local function _delink(args)
    local text = args[1] or ""
    -- Iterate over the text string, and replace any matched text with the 
    -- delink function. We need to iterate character by character rather 
    -- than just use gsub, otherwise nested links aren't detected properly.
    local result = ""
    while mw.ustring.len(text) > 0 do
        -- Replace text using one iteration of gsub.
        text = mw.ustring.gsub(text, "^%[%[.-%]%]", delinkWikilink, 1) -- De-link wikilinks.
        text = mw.ustring.gsub(text, "^%[.-%]", delinkURL, 1) -- De-link URLs
        -- Append the left-most character to the result string.
        result = result .. mw.ustring.sub(text, 1, 1)
        text = mw.ustring.sub(text, 2, -1)
    end
    result = mw.ustring.gsub(result, "%s+", " ") -- Remove extra whitespace.
    return result
end

function p.delink(frame)
    local args
    if frame == mw.getCurrentFrame() then
        -- We're being called via #invoke. If the invoking template passed any args, use
        -- them. Otherwise, use the args that were passed into the template.
        args = frame:getParent().args
        for k, v in pairs(frame.args) do
            args = frame.args
            break
        end
    else
        -- We're being called from another module or from the debug console, so assume
        -- the args are passed in directly.
        args = frame
    end
 
    return _delink(args)
end

return p