Difference between revisions of "Module:Excerpt"

From Donjon Wiki
Jump to navigation Jump to search
m (1 revision imported)
 
Line 1: Line 1:
-- Module:Excerpt implements the Excerpt template
-- Get localized data
-- Documentation and master version: https://en.wikipedia.org/wiki/Module:Excerpt
local d = require("Module:Excerpt/i18n")
-- Authors: User:Sophivorus, User:Certes, User:Aidan9382 & others
-- License: CC-BY-SA-3.0


local Transcluder = require( 'Module:Transcluder' )
local p = {}


local yesno = require( 'Module:Yesno' )
-- Helper function to test for truthy and falsy values
local function is(value)
if not value or value == "" or value == "0" or value == "false" or value == "no" then
return false
end
return true
end


local ok, config = pcall( require, 'Module:Excerpt/config' )
-- Error handling function
if not ok then config = {} end
-- Throws a Lua error or returns an empty string if error reporting is disabled
errors = true -- show errors by default
local function luaError(message, value)
if not is(errors) then return "" end -- error reporting is disabled
message = d.errors[message] or message or ""
message = mw.ustring.format(message, value)
error(message, 2)
end


local p = {}
-- Error handling function
-- Returns a wiki friendly error or an empty string if error reporting is disabled
local function wikiError(message, value)
if not is(errors) then return "" end -- error reporting is disabled
message = d.errors[message] or message or ""
message = mw.ustring.format(message, value)
message = d.errors.prefix .. message
if mw.title.getCurrentTitle().isContentPage then
local errorsCategory = mw.title.new(d.errorsCategory, 'Category')
if errorsCategory then message = message .. '[[' .. errorsCategory.prefixedText .. ']]' end
end
message = mw.html.create('div'):addClass('error'):wikitext(message)
return message
end


-- Helper function to get arguments
-- Helper function to match from a list regular expressions
local args
-- Like so: match pre..list[1]..post or pre..list[2]..post or ...
local function getArg( key, default )
local function matchAny(text, pre, list, post, init)
local value = args[ key ]
local match = {}
if value and mw.text.trim( value ) ~= '' then
for i = 1, #list do
return value
match = { mw.ustring.match(text, pre .. list[i] .. post, init) }
if match[1] then return unpack(match) end
end
end
return default
return nil
end
end


-- Helper function to handle errors
-- Helper function to convert imagemaps into standard images
local function getError( message, value )
local function convertImageMap(imagemap)
if type( message ) == 'string' then
local image = matchAny(imagemap, "[>\n]%s*", d.fileNamespaces, "[^\n]*")
message = Transcluder.getError( message, value )
if image then
return "<!--imagemap-->[[" .. mw.ustring.gsub(image, "[>\n]%s*", "", 1) .. "]]"
else
return "" -- remove entire block if image can't be extracted
end
end
if config.categories and config.categories.errors and mw.title.getCurrentTitle().isContentPage then
end
message:node( '[[Category:' .. config.categories.errors .. ']]' )
 
-- Helper function to convert a comma-separated list of numbers or min-max ranges into a list of booleans
-- For example: "1,3-5" to {1=true,2=false,3=true,4=true,5=true}
local function numberFlags(str)
if not str then return {} end
local flags = {}
local ranges = mw.text.split(str, ",") -- parse ranges: "1,3-5" to {"1","3-5"}
for _, r in pairs(ranges) do
local min, max = mw.ustring.match(r, "^%s*(%d+)%s*%-%s*(%d+)%s*$") -- "3-5" to min=3 max=5
if not max then min, max = mw.ustring.match(r, "^%s*((%d+))%s*$") end -- "1" to min=1 max=1
if max then
for p = min, max do flags[p] = true end
end
end
end
return message
return flags
end
end


-- Helper function to get localized messages
-- Helper function to convert template arguments into an array of arguments fit for get()
local function getMessage( key )
local function parseArgs(frame)
local ok, TNT = pcall( require, 'Module:TNT' )
local args = {}
if not ok then return key end
for key, value in pairs(frame:getParent().args) do args[key] = value end
return TNT.format( 'I18n/Module:Excerpt.tab', key )
for key, value in pairs(frame.args) do args[key] = value end -- args from a Lua call have priority over parent args from template
args.paraflags = numberFlags(args["paragraphs"] or "") -- parse paragraphs: "1,3-5" to {"1","3-5"}
args.fileflags = numberFlags(args["files"] or "") -- parse file numbers
return args
end
end


-- Main entry point for templates
-- Helper function to remove unwanted templates and pseudo-templates such as #tag:ref and DEFAULTSORT
function p.main( frame )
local function stripTemplate(t)
args = Transcluder.parseArgs( frame )
-- If template is unwanted then return "" (gsub will replace by nothing), else return nil (gsub will keep existing string)
if matchAny(t, "^{{%s*", d.unwantedInlineTemplates, "%s*%f[|}]") then return "" end


-- Make sure the requested page exists
-- If template is wanted but produces an unwanted reference then return the string with |shortref or |ref removed
local page = getArg( 1 )
local noRef = mw.ustring.gsub(t, "|%s*shortref%s*%f[|}]", "")
if not page or page == '{{{1}}}' then return getError( 'no-page' ) end
noRef = mw.ustring.gsub(noRef, "|%s*ref%s*%f[|}]", "")
 
-- If a wanted template has unwanted nested templates, purge them too
noRef = mw.ustring.sub(noRef, 1, 2) .. mw.ustring.gsub(mw.ustring.sub(noRef, 3), "%b{}", stripTemplate)
 
-- Replace {{audio}} by its text parameter: {{Audio|Foo.ogg|Bar}} → Bar
noRef = mw.ustring.gsub(noRef, "^{{%s*[Aa]udio.-|.-|(.-)%f[|}].*", "%1")
 
-- Replace {{Nihongo foot}} by its text parameter: {{Nihongo foot|English|英語|eigo}} → English
noRef = mw.ustring.gsub(noRef, "^{{%s*[Nn]ihongo[ _]+foot%s*|(.-)%f[|}].*", "%1")
 
if noRef ~= t then return noRef end
 
return nil -- not an unwanted template: keep
end
 
-- Get a page's content, following redirects
-- Also returns the page name, or the target page name if a redirect was followed, or false if no page found
-- For file pages, returns the content of the file description page
local function getContent(page)
local title = mw.title.new(page)
local title = mw.title.new(page)
if not title then return getError( 'invalid-title', page ) end
if not title then return false, false end
if title.isRedirect then title = title.redirectTarget end
 
if not title.exists then return getError( 'page-not-found', page ) end
local target = title.redirectTarget
page = title.prefixedText
if target then title = target end
 
return title:getContent(), title.prefixedText
end
 
-- Get the tables only
local function getTables(text, options)
local tables = {}
for candidate in mw.ustring.gmatch(text, "%b{}") do
if mw.ustring.sub(candidate, 1, 2) == '{|' then
table.insert(tables, candidate)
end
end
return table.concat(tables, '\n')
end
 
-- Get the lists only
local function getLists(text, options)
local lists = {}
for list in mw.ustring.gmatch(text, "\n[*#][^\n]+") do
table.insert(lists, list)
end
return table.concat(lists, '\n')
end
 
-- Check image for suitability
local function checkImage(image)
local page = matchAny(image, "", d.fileNamespaces, "%s*:[^|%]]*") -- match File:(name) or Image:(name)
if not page then return false end
 
-- Limit to image types: .gif, .jpg, .jpeg, .png, .svg, .tiff, .xcf (exclude .ogg, audio, etc.)
local fileTypes = {"[Gg][Ii][Ff]", "[Jj][Pp][Ee]?[Gg]", "[Pp][Nn][Gg]", "[Ss][Vv][Gg]", "[Tt][Ii][Ff][Ff]", "[Xx][Cc][Ff]"}
if not matchAny(page, "%.", fileTypes, "%s*$") then return false end
 
-- Check the local wiki
local fileDescription, fileTitle = getContent(page) -- get file description and title after following any redirect
if not fileTitle or fileTitle == "" then return false end -- the image doesn't exist
 
-- Check Commons
if not fileDescription or fileDescription == "" then
local frame = mw.getCurrentFrame()
fileDescription = frame:preprocess("{{" .. fileTitle .. "}}")
end
 
-- Filter non-free images
if not fileDescription or fileDescription == "" or mw.ustring.match(fileDescription, "[Nn]on%-free") then return false end
 
return true
end
 
-- Attempt to parse [[File:...]] or [[Image:...]], either anywhere (start=false) or at the start only (start=true)
local function parseImage(text, start)
local startre = ""
if start then startre = "^" end -- a true flag restricts search to start of string
local image = matchAny(text, startre .. "%[%[%s*", d.fileNamespaces, "%s*:.*") -- [[File: or [[Image: ...
if image then
image = mw.ustring.match(image, "%b[]%s*") -- matching [[...]] to handle wikilinks nested in caption
end
return image
end
 
-- Parse a caption, which ends at a | (end of parameter) or } (end of infobox) but may contain nested [..] and {..}
local function parseCaption(caption)
if not caption then return nil end
local length = mw.ustring.len(caption)
local position = 1
while position <= length do
local linkStart, linkEnd = mw.ustring.find(caption, "%b[]", position)
linkStart = linkStart or length + 1 -- avoid comparison with nil when no link
local templateStart, templateEnd = mw.ustring.find(caption, "%b{}", position)
templateStart = templateStart or length + 1 -- avoid comparison with nil when no template
local argEnd = mw.ustring.find(caption, "[|}]", position) or length + 1
if linkStart < templateStart and linkStart < argEnd then
position = linkEnd + 1 -- skip wikilink
elseif templateStart < argEnd then
position = templateEnd + 1 -- skip template
else -- argument ends before the next wikilink or template
return mw.ustring.sub(caption, 1, argEnd - 1)
end
end
return caption -- No terminator found: return entire caption
end


-- Set variables from the template parameters
-- Attempt to construct a [[File:...]] block from {{infobox ... |image= ...}}
local section = getArg( 2, mw.ustring.match( getArg( 1 ), '[^#]+#(.+)' ) )
local function argImage(text)
local hat = yesno( getArg( 'hat', true ) )
local token = nil
local edit = yesno( getArg( 'edit', true ) )
local hasNamedArgs = mw.ustring.find(text, "|") and mw.ustring.find(text, "=")
local this = getArg( 'this' )
if not hasNamedArgs then return nil end -- filter out any template that obviously doesn't contain an image
local only = getArg( 'only' )
local files = getArg( 'files', getArg( 'file', ( only == 'file' and 1 ) ) )
local lists = getArg( 'lists', getArg( 'list', ( only == 'list' and 1 ) ) )
local tables = getArg( 'tables', getArg( 'table', ( only == 'table' and 1 ) ) )
local templates = getArg( 'templates', getArg( 'template', ( only == 'template' and 1 ) ) )
local paragraphs = getArg( 'paragraphs', getArg( 'paragraph', ( only == 'paragraph' and 1 ) ) )
local references = getArg( 'references' )
local subsections = not yesno( getArg( 'subsections' ) )
local noLinks = not yesno( getArg( 'links', true ) )
local noBold = not yesno( getArg( 'bold' ) )
local onlyFreeFiles = yesno( getArg( 'onlyfreefiles', true ) )
local briefDates = yesno( getArg( 'briefdates', false ) )
local inline = yesno( getArg( 'inline' ) )
local quote = yesno( getArg( 'quote' ) )
local more = yesno( getArg( 'more' ) )
local class = getArg( 'class' )
local displaytitle = getArg( 'displaytitle' ) or page


-- Build the hatnote
-- ensure image map is captured
if hat and not inline then
text = mw.ustring.gsub(text, '<!%-%-imagemap%-%->', '|imagemap=')
if this then
 
hat = this
-- find all images
elseif quote then
local hasImages = false
hat = getMessage( 'this' )
local images = {}
elseif only then
local captureFrom = 1
hat = getMessage( only )
while captureFrom < mw.ustring.len(text) do
local argname, position, image = mw.ustring.match(text, "|%s*([^=|]-[Ii][Mm][Aa][Gg][Ee][^=|]-)%s*=%s*()(.*)", captureFrom)
if image then -- ImageCaption=, image_size=, image_upright=, etc. do not introduce an image
local lcArgName = mw.ustring.lower(argname)
if mw.ustring.find(lcArgName, "caption")
or mw.ustring.find(lcArgName, "size")
or mw.ustring.find(lcArgName, "upright") then
image = nil
end
end
if image then
hasImages = true
images[position] = image
captureFrom = position
else
else
hat = getMessage( 'section' )
captureFrom = mw.ustring.len(text)
end
end
hat = hat .. ' ' .. getMessage( 'excerpt' ) .. ' '
end
if section then
captureFrom = 1
hat = hat .. '[[:' .. page .. '#' .. mw.uri.anchorEncode( section ) .. '|' .. displaytitle
while captureFrom < mw.ustring.len(text) do
.. ' § ' .. mw.ustring.gsub( section, '%[%[([^]|]+)|?[^]]*%]%]', '%1' ) .. ']].' -- remove nested links
local position, image = mw.ustring.match(text, "|%s*[^=|]-[Pp][Hh][Oo][Tt][Oo][^=|]-%s*=%s*()(.*)", captureFrom)
if image then
hasImages = true
images[position] = image
captureFrom = position
else
else
hat = hat .. '[[:' .. page .. '|' .. displaytitle .. ']].'
captureFrom = mw.ustring.len(text)
end
end
if edit then
end
hat = hat .. '<span class="mw-editsection-like plainlinks"><span class="mw-editsection-bracket">[</span>['
captureFrom = 1
hat = hat .. title:fullUrl( 'action=edit' ) .. ' ' .. mw.message.new( 'editsection' ):plain()
while captureFrom < mw.ustring.len(text) do
hat = hat .. ']<span class="mw-editsection-bracket">]</span></span>'
local position, image = mw.ustring.match(text, "|%s*[^=|{}]-%s*=%s*()%[?%[?([^|{}]*%.%a%a%a%a?)%s*%f[|}]", captureFrom)
if image then
hasImages = true
if not images[position] then
images[position] = image
end
captureFrom = position
else
captureFrom = mw.ustring.len(text)
end
end
if config.hat then
end
hat = config.hat .. hat .. '}}'
 
hat = frame:preprocess( hat )
if not hasImages then return nil end
 
-- find all captions
local captions = {}
captureFrom = 1
while captureFrom < mw.ustring.len(text) do
local position, caption = matchAny(text, "|%s*", d.captionParams, "%s*=%s*()([^\n]+)", captureFrom)
if caption then
-- extend caption to parse "| caption = Foo {{Template\n on\n multiple lines}} Bar\n"
local bracedCaption = mw.ustring.match(text, "^[^\n]-%b{}[^\n]+", position)
if bracedCaption and bracedCaption ~= "" then caption = bracedCaption end
caption = mw.text.trim(caption)
local captionStart = mw.ustring.sub(caption, 1, 1)
if captionStart == '|' or captionStart == '}' then caption = nil end
end
if caption then
-- find nearest image, and use same index for captions table
local i = position
while i > 0 and not images[i] do
i = i - 1
if images[i] then
if not captions[i] then
captions[i] = parseCaption(caption)
end
end
end
captureFrom = position
else
else
hat = mw.html.create( 'div' ):addClass( 'dablink excerpt-hat' ):wikitext( hat )
captureFrom = mw.ustring.len(text)
end
end
 
-- find all alt text
local altTexts = {}
for position, altText in mw.ustring.gmatch(text, "|%s*[Aa][Ll][Tt]%s*=%s*()([^\n]*)") do
if altText then
 
-- altText is terminated by }} or |, but first skip any matched [[...]] and {{...}}
local lookFrom = math.max( -- find position after whichever comes last: start of string, end of last ]] or end of last }}
mw.ustring.match(altText, ".*{%b{}}()") or 1, -- if multiple {{...}}, .* consumes all but one, leaving the last for %b
mw.ustring.match(altText, ".*%[%b[]%]()") or 1)
 
local length = mw.ustring.len(altText)
local afterText = math.min( -- find position after whichever comes first: end of string, }} or |
mw.ustring.match(altText, "()}}", lookFrom) or length+1,
mw.ustring.match(altText, "()|", lookFrom) or length+1)
altText = mw.ustring.sub(altText, 1, afterText-1) -- chop off |... or }}... which is not part of [[...]] or {{...}}
 
altText = mw.text.trim(altText)
local altTextStart = mw.ustring.sub(altText, 1, 1)
if altTextStart == '|' or altTextStart == '}' then altText = nil end
end
if altText then
-- find nearest image, and use same index for altTexts table
local i = position
while i > 0 and not images[i] do
i = i - 1
if images[i] then
if not altTexts[i] then
altTexts[i] = altText
end
end
end
end
end
else
hat = nil
end
end


-- Build the "Read more" link
-- find all image sizes
if more and not inline then
local imageSizes = {}
more = "'''[[" .. page .. '#' .. ( section or '' ) .. "|" .. getMessage( 'more' ) .. "]]'''"
for position, imageSizeMatch in mw.ustring.gmatch(text, "|%s*[Ii][Mm][Aa][Gg][Ee][ _]?[Ss][Ii][Zz][Ee]%s*=%s*()([^}|\n]*)") do
more = mw.html.create( 'div' ):addClass( 'noprint excerpt-more' ):wikitext( more )
local imageSize = mw.ustring.match(imageSizeMatch, "=%s*([^}|\n]*)")
else
if imageSize then
more = nil
imageSize = mw.text.trim(imageSize )
local imageSizeStart = mw.ustring.sub(imageSize, 1, 1)
if imageSizeStart == '|' or imageSizeStart == '}' then imageSize = nil end
end
if imageSize then
-- find nearest image, and use same index for imageSizes table
local i = position
while i > 0 and not images[i] do
i = i - 1
if images[i] then
if not imageSizes[i] then
imageSizes[i] = imageSize
end
end
end
end
end
end


-- Build the options for Module:Transcluder out of the template parameters and the desired defaults
-- sort the keys of the images table (in a table sequence), so that images can be iterated over in order
local options = {
local keys = {}
files = files,
for key, val in pairs(images) do
lists = lists,
table.insert(keys, key)
tables = tables,
end
paragraphs = paragraphs,
table.sort(keys)
sections = subsections,
categories = 0,
references = references,
only = only and mw.text.trim( only, 's' ) .. 's',
noLinks = noLinks,
noBold = noBold,
noSelfLinks = true,
noNonFreeFiles = onlyFreeFiles,
noBehaviorSwitches = true,
fixReferences = true,
linkBold = true,
}


-- Get the excerpt itself
-- add in relevant optional parameters for each image: caption, alt text and image size
local title = page .. '#' .. ( section or '' )
local imageTokens = {}
local ok, excerpt = pcall( Transcluder.get, title, options )
for _, index in ipairs(keys) do
if not ok then return getError( excerpt ) end
local image = images[index]
if mw.text.trim( excerpt ) == '' and not only then
local token = parseImage(image, true) -- look for image=[[File:...]] etc.
if section then return getError( 'section-empty', section ) else return getError( 'lead-empty' ) end
if not token then
image = mw.ustring.match(image, "^[^}|\n]*") -- remove later arguments
token = "[[" -- Add File: unless name already begins File: or Image:
if not matchAny(image, "^", d.fileNamespaces, "%s*:") then
token = token .. "File:"
end
token = token .. image
local caption = captions[index]
if caption and mw.ustring.match(caption, "%S") then token = token .. "|" .. caption end
local alt = altTexts[index]
if alt then token = token .. "|alt=" .. alt end
local image_size = imageSizes[index]
if image_size and mw.ustring.match(image_size, "%S") then token = token .. "|" .. image_size end
token = token .. "]]"
end
token = mw.ustring.gsub(token, "\n","") .. "\n"
table.insert(imageTokens, token)
end
end
return imageTokens
end


-- Fix birth and death dates, but only in the first paragraph
local function modifyImage(image, fileArgs)
if briefDates then
if fileArgs then
local startpos = 1 -- skip initial templates
for _, filearg in pairs(mw.text.split(fileArgs, "|")) do -- handle fileArgs=left|border etc.
local s
local fa = mw.ustring.gsub(filearg, "=.*", "") -- "upright=0.75" → "upright"
local e = 0
local group = {fa} -- group of "border" is ["border"]...
repeat
for _, g in pairs(d.imageParams) do
startpos = e + 1
for _, a in pairs(g) do
s, e = mw.ustring.find( excerpt, "%s*%b{}%s*", startpos )
if fa == a then group = g end -- ...but group of "left" is ["right", "left", "center", "none"]
until not s or s > startpos
s, e = mw.ustring.find( excerpt, "%b()", startpos ) -- get (...), which may be (year–year)
if s and s < startpos + 100 then -- look only near the start
local year1, conjunction, year2 = mw.ustring.match( mw.ustring.sub( excerpt, s, e ), '(%d%d%d+)(.-)(%d%d%d+)' )
if year1 and year2 and (mw.ustring.match( conjunction, '[%-–—]' ) or mw.ustring.match( conjunction, '{{%s*[sS]nd%s*}}' )) then
local y1 = tonumber(year1)
local y2 = tonumber(year2)
if y2 > y1 and y2 < y1 + 125 and y1 <= tonumber( os.date( "%Y" )) then
excerpt = mw.ustring.sub( excerpt, 1, s ) .. year1 .. "" .. year2 .. mw.ustring.sub( excerpt, e )
end
end
end
end
for _, a in pairs(group) do
image = mw.ustring.gsub(image, "|%s*" .. a .. "%f[%A]%s*=[^|%]]*", "") -- remove "|upright=0.75" etc.
image = mw.ustring.gsub(image, "|%s*" .. a .. "%s*([|%]])", "%1") -- replace "|left|" by "|" etc.
end
image = mw.ustring.gsub(image, "([|%]])", "|" .. filearg .. "%1", 1) -- replace "|" by "|left|" etc.
end
end
return image
end
-- a basic parser to trim down extracted wikitext
--  @param text : Wikitext to be processed
--  @param options : A table of options...
--          options.paraflags : Which number paragraphs to keep, as either a string (e.g. '1,3-5') or a table (e.g. {1=true,2=false,3=true,4=true,5=true}. If not present, all paragraphs will be kept.
--          options.fileflags : table of which files to keep, as either a string (e.g. '1,3-5') or a table (e.g. {1=true,2=false,3=true,4=true,5=true}
--          options.fileargs : args for the [[File:]] syntax, such as 'left'
-- options.filesOnly : only return the files and not the prose
local function parse(text, options)
local allParagraphs = true -- keep all paragraphs?
if options.paraflags then
if type(options.paraflags) ~= "table" then options.paraflags = numberFlags(options.paraflags) end
for _, v in pairs(options.paraflags) do
if v then allParagraphs = false end -- if any para specifically requested, don't keep all
end
end
end
if is(options.filesOnly) then
allParagraphs = false
options.paraflags = {}
end
end


-- If no file was found, try to get one from the infobox
local maxfile = 0 -- for efficiency, stop checking images after this many have been found
local fileNamespaces = Transcluder.getNamespaces( 'File' )
if options.fileflags then
if ( ( only == 'file' or only == 'files' ) or ( not only and ( files ~= '0' or not files ) ) ) and -- caller asked for files
if type(options.fileflags) ~= "table" then options.fileflags = numberFlags(options.fileflags) end
not Transcluder.matchAny( excerpt, '%[%[', fileNamespaces, ':' ) and -- and there are no files in Transcluder's output
for k, v in pairs(options.fileflags) do
config.captions -- and we have the config option required to try finding files in templates
if v and k > maxfile then maxfile = k end -- set maxfile = highest key in fileflags
then
end
-- We cannot distinguish the infobox from the other templates so we search them all
end
local infobox = Transcluder.getTemplates( excerpt );
infobox = table.concat( infobox )
local fileArgs = options.fileargs and mw.text.trim(options.fileargs)
local parameters = Transcluder.getParameters( infobox )
if fileArgs == '' then fileArgs = nil end
local file, captions, caption
 
for _, pair in pairs( config.captions ) do
local leadStart = nil -- have we found some text yet?
file = pair[1]
local t = "" -- the stripped down output text
file = parameters[file]
local fileText = "" -- output text with concatenated [[File:Foo|...]]\n entries
if file and Transcluder.matchAny( file, '^.*%.', { '[Jj][Pp][Ee]?[Gg]', '[Pp][Nn][Gg]', '[Gg][Ii][Ff]', '[Ss][Vv][Gg]' }, '.*' ) then
local files = 0 -- how many images so far
file = mw.ustring.match( file, '%[?%[?.-:([^{|]+)%]?%]?' ) or file -- [[File:Example.jpg{{!}}upright=1.5]] to Example.jpg
local paras = 0 -- how many paragraphs so far
captions = pair[2]
local startLine = true -- at the start of a line (no non-spaces found since last \n)?
for _, p in pairs( captions ) do
 
if parameters[ p ] then caption = parameters[ p ] break end
text = mw.ustring.gsub(text,"^%s*","") -- remove initial white space
 
-- Add named files
local f = options.files
if f and mw.ustring.match(f, "[^%d%s%-,]") then -- filename rather than number list
f = mw.ustring.gsub(f, "^%s*File%s*:%s*", "", 1)
f = mw.ustring.gsub(f, "^%s*Image%s*:%s*", "", 1)
f = "[[File:" .. f .. "]]"
f = modifyImage(f, "thumb")
f = modifyImage(f, fileArgs)
if checkImage(f) then fileText = fileText .. f .. "\n" end
end
 
repeat -- loop around parsing a template, image or paragraph
local token = mw.ustring.match(text, "^%b{}%s*") or false -- {{Template}} or {| Table |}
if not leadStart and not token then token = mw.ustring.match(text, "^%b<>%s*%b{}%s*") end -- allow <tag>{{template}} before lead has started
 
local line = mw.ustring.match(text, "[^\n]*")
if token and line and mw.ustring.len(token) < mw.ustring.len(line) then -- template is followed by text (but it may just be other templates)
line = mw.ustring.gsub(line, "%b{}", "") -- remove all templates from this line
line = mw.ustring.gsub(line, "%b<>", "") -- remove all HTML tags from this line
-- if anything is left, other than an incomplete further template or an image, keep the template: it counts as part of the line
if mw.ustring.find(line, "%S") and not matchAny(line, "^%s*", { "{{", "%[%[%s*[Ff]ile:", "%[%[%s*[Ii]mage:" }, "") then
token = nil
end
end
 
if token then -- found a template which is not the prefix to a line of text
 
if is(options.keepTables) and mw.ustring.sub(token, 1, 2) == '{|' then
t = t .. token -- keep tables
 
elseif mw.ustring.sub(token, 1, 3) == '{{#' then
t = t .. token -- keep parser functions
 
elseif leadStart then -- lead has already started, so keep the template within the text, unless it's a whole line (navbox etc.)
if not is(options.filesOnly) and not startLine then t = t .. token end
 
elseif matchAny(token, "^{{%s*", d.wantedBlockTemplates, "%s*%f[|}]") then
t = t .. token -- keep wanted block templates
 
elseif files < maxfile then -- discard template, but if we are still collecting images...
local images = argImage(token) or {}
if not images then
local image = parseImage(token, false) -- look for embedded [[File:...]], |image=, etc.
if image then table.insert(images, image) end
end
end
excerpt = '[[File:' .. file .. '|thumb|' .. ( caption or '' ) .. ']]' .. excerpt
for _, image in ipairs(images) do
if ( onlyFreeFiles ) then
if files < maxfile and checkImage(image) then -- if image is found and qualifies (not a sound file, non-free, etc.)
excerpt = Transcluder.removeNonFreeFiles( excerpt )
files = files + 1 -- count the file, whether displaying it or not
if options.fileflags and options.fileflags[files] then -- if displaying this image
image = modifyImage(image, "thumb")
image = modifyImage(image, fileArgs)
fileText = fileText .. image
end
end
end
end
break
end
end
end
else -- the next token in text is not a template
token = parseImage(text, true)
if token then -- the next token in text looks like an image
if files < maxfile and checkImage(token) then -- if more images are wanted and this is a wanted image
files = files + 1
if options.fileflags and options.fileflags[files] then
local image = token -- copy token for manipulation by adding |right etc. without changing the original
image = modifyImage(image, fileArgs)
fileText = fileText .. image
end
end
else -- got a paragraph, which ends at a file, image, blank line or end of text
local afterEnd = mw.ustring.len(text) + 1
local blankPosition = mw.ustring.find(text, "\n%s*\n") or afterEnd -- position of next paragraph delimiter (or end of text)
local endPosition = math.min( -- find position of whichever comes first: [[File:, [[Image: or paragraph delimiter
mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterEnd,
mw.ustring.find(text, "%[%[%s*[Ii]mage%s*:") or afterEnd,
blankPosition)
token = mw.ustring.sub(text, 1, endPosition-1)
if blankPosition < afterEnd and blankPosition == endPosition then -- paragraph ends with a blank line
token = token .. mw.ustring.match(text, "\n%s*\n", blankPosition)
end
local isHatnote = not(leadStart) and mw.ustring.sub(token, 1, 1) == ':'
if not isHatnote then
leadStart = leadStart or mw.ustring.len(t) + 1 -- we got a paragraph, so mark the start of the lead section
paras = paras + 1
if allParagraphs or (options.paraflags and options.paraflags[paras]) then t = t .. token end -- add if this paragraph wanted
end
end -- of "else got a paragraph"
end -- of "else not a template"
 
if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end -- remove parsed token from remaining text
startLine = mw.ustring.find(token, "\n%s*$") -- will the next token be the first non-space on a line?
until not text or text == "" or not token or token == "" -- loop until all text parsed
 
text = mw.ustring.gsub(t, "\n+$", "") -- remove trailing line feeds, so "{{Transclude text excerpt|Foo}} more" flows on one line
 
return fileText .. text
end
 
local function cleanupText(text, options)
text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments
text = mw.ustring.gsub(text, "<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove noinclude bits
if mw.ustring.find(text, "[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]") then -- avoid expensive search if possible
text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text between onlyinclude sections
text = mw.ustring.gsub(text, "^.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text before first onlyinclude section
text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.*", "") -- remove text after last onlyinclude section
end
if not is(options.keepSubsections) then
text = mw.ustring.gsub(text, "\n==.*","") -- remove first ==Heading== and everything after it
text = mw.ustring.gsub(text, "^==.*","") -- ...even if the lead is empty
end
if not is(options.keepRefs) then
text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]-/%s*>", "") -- remove refs cited elsewhere
text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff].->.-<%s*/%s*[Rr][Ee][Ff]%s*>", "") -- remove refs
text = mw.ustring.gsub(text, "%b{}", stripTemplate) -- remove unwanted templates such as references
end
text = mw.ustring.gsub(text, "<%s*[Ss][Cc][Oo][Rr][Ee].->.-<%s*/%s*[Ss][Cc][Oo][Rr][Ee]%s*>", "") -- remove musical scores
text = mw.ustring.gsub(text, "<%s*[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp].->.-<%s*/%s*[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp]%s*>", convertImageMap) -- convert imagemaps into standard images
text = mw.ustring.gsub(text, "%s*{{%s*[Tt][Oo][Cc].-}}", "") -- remove most common tables of contents
text = mw.ustring.gsub(text, "%s*__[A-Z]*TOC__", "") -- remove TOC behavior switches
text = mw.ustring.gsub(text, "\n%s*{{%s*[Pp]p%-.-}}", "\n") -- remove protection templates
text = mw.ustring.gsub(text, "%s*{{[^{|}]*[Ss]idebar%s*}}", "") -- remove most sidebars
text = mw.ustring.gsub(text, "%s*{{[^{|}]*%-[Ss]tub%s*}}", "") -- remove most stub templates
text = mw.ustring.gsub(text, "%s*%[%[%s*:?[Cc]ategory:.-%]%]", "") -- remove categories
text = mw.ustring.gsub(text, "^:[^\n]+\n","") -- remove DIY hatnote indented with a colon
return text
end
 
-- Parse a ==Section== from a page
local function getSection(text, section, mainOnly)
local escapedSection = mw.ustring.gsub(mw.uri.decode(section), "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1") -- %26 → & etc, then ^ → %^ etc.
local level, content = mw.ustring.match(text .. "\n", "\n(==+)%s*" .. escapedSection .. "%s*==.-\n(.*)")
if not content then return luaError("sectionNotFound", section) end
local nextSection
if mainOnly then
nextSection = "\n==.*" -- Main part of section terminates at any level of header
else
nextSection = "\n==" .. mw.ustring.rep("=?", #level - 2) .. "[^=].*" -- "===" → "\n===?[^=].*", matching "==" or "===" but not "===="
end
end
content = mw.ustring.gsub(content, nextSection, "") -- remove later sections with headings at this level or higher
if mw.ustring.match(content, "^%s*$") then return luaError("sectionEmpty", section) end
return content
end
-- Parse a <section begin="Name of the fragment">
-- @todo Implement custom parsing of fragments rather than relying on #lst
local function getFragment(page, fragment)
local frame = mw.getCurrentFrame()
local text = frame:callParserFunction('#lst', page, fragment)
if mw.ustring.match(text, "^%s*$") then return luaError("fragmentEmpty", fragment) end
return text
end


-- Unlike other elements, templates are filtered here
-- Remove unmatched <tag> or </tag> tags
-- because we had to search the infoboxes for files
local function fixTags(text, tag)
local trash
local startCount = 0
if only and ( only == 'template' or only == 'templates' ) then
for i in mw.ustring.gmatch(text, "<%s*" .. tag .. "%f[^%w_].->") do startCount = startCount + 1 end
trash, excerpt = Transcluder.getTemplates( excerpt, templates );
 
else -- Remove blacklisted templates
local endCount = 0
local blacklist = config.blacklist and table.concat( config.blacklist, ',' ) or ''
for i in mw.ustring.gmatch(text, "<%s*/" .. tag .. "%f[^%w_].->") do endCount = endCount + 1 end
if templates then
 
if string.sub( templates, 1, 1 ) == '-' then --Unwanted templates. Append to blacklist
if startCount > endCount then -- more <tag> than </tag>: remove the last few <tag>s
blacklist = templates .. ',' .. blacklist
local i = 0
else --Wanted templates. Replaces blacklist and acts as whitelist
text = mw.ustring.gsub(text, "<%s*" .. tag .. "%f[^%w_].->", function(t)
blacklist = templates
i = i + 1
if i > endCount then return "" else return nil end
end) -- "end" here terminates the anonymous replacement function(t) passed to gsub
elseif endCount > startCount then -- more </tag> than <tag>: remove the first few </tag>s
text = mw.ustring.gsub(text, "<%s*/" .. tag .. "%f[^%w_].->", "", endCount - startCount)
end
return text
end
 
local function fixTemplates(text)
repeat -- hide matched {{template}}s including nested templates
local t = text
text = mw.ustring.gsub(text, "{(%b{})}", "\27{\27%1\27}\27") -- {{sometemplate}} → E{Esometemplate}E}E where E represents escape
text = mw.ustring.gsub(text, "(< *math[^>]*>[^<]-)}}(.-< */math *>)", "%1}\27}\27%2") -- <math>\{sqrt\{hat{x}}</math> → <math>\{sqrt\{hat{x}E}E</math>
until text == t
text = text.gsub(text, "([{}])%1[^\27].*", "") -- remove unmatched {{, }} and everything thereafter, avoiding }E}E etc.
text = text.gsub(text, "([{}])%1$", "") -- remove unmatched {{, }} at end of text
text = mw.ustring.gsub(text, "\27", "") -- unhide matched pairs: E{E{ → {{, etc.
return text
end
 
local function fixLinks(text)
repeat -- hide matched [[wikilink]]s including nested links like [[File:Example.jpg|Some [[nested]] link.]]
local t = text
text = mw.ustring.gsub(text, "%[(%b[])%]", "\27[\27%1\27]\27")
until text == t
text = text.gsub(text, "([%[%]])%1[^\27].*", "") -- remove unmatched [[ or ]] and everything thereafter, avoiding ]E]E etc.
text = text.gsub(text, "([%[%]])%1$", "") -- remove unmatched [[ or ]] at end of text
text = mw.ustring.gsub(text, "\27", "") -- unhide matched pairs: ]E]E → ]], etc.
return text
end
 
-- Replace the first call to each reference defined outside of the text for the full reference, to prevent undefined references
-- Then prefix the page title to the reference names to prevent conflicts
-- that is, replace <ref name="Foo"> for <ref name="Title of the article Foo">
-- and also <ref name="Foo" /> for <ref name="Title of the article Foo" />
-- also remove reference groups: <ref name="Foo" group="Bar"> for <ref name="Title of the article Foo">
-- and <ref group="Bar"> for <ref>
-- @todo The current regex may fail in cases with both kinds of quotes, like <ref name="Darwin's book">
local function fixRefs(text, page, full)
if not full then full = getContent(page) end
local refNames = {}
local refName
local refBody
local position = 1
while position < mw.ustring.len(text) do
refName, position = mw.ustring.match(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?([^\"'>]+)[\"']?[^>]*/%s*>()", position)
if refName then
refName = mw.text.trim(refName)
if not refNames[refName] then -- make sure we process each ref name only once
table.insert(refNames, refName)
refName = mw.ustring.gsub(refName, "[%^%$%(%)%.%[%]%*%+%-%?%%]", "%%%0") -- escape special characters
refBody = mw.ustring.match(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?%s*" .. refName .. "%s*[\"']?[^>/]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>")
if not refBody then -- the ref body is not in the excerpt
refBody = mw.ustring.match(full, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?%s*" .. refName .. "%s*[\"']?[^/>]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>")
if refBody then -- the ref body was found elsewhere
text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?%s*" .. refName .. "%s*[\"']?[^>]*/?%s*>", refBody, 1)
end
end
end
end
else
else
blacklist = '-' .. blacklist
position = mw.ustring.len(text)
end
end
trash, excerpt = Transcluder.getTemplates( excerpt, blacklist );
end
end
text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?([^\"'>/]+)[\"']?[^>/]*(/?)%s*>", '<ref name="' .. page .. ' %1" %2>')
text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]*group%s*=%s*[\"']?[^\"'>/]+[\"']%s*>", '<ref>')
return text
end


-- Remove extra line breaks but leave one before and after so the parser interprets lists, tables, etc. correctly
-- Replace the bold title or synonym near the start of the article by a wikilink to the article
excerpt = mw.text.trim( excerpt )
function linkBold(text, page)
excerpt = string.gsub( excerpt, '\n\n\n+', '\n\n' )
local lang = mw.language.getContentLanguage()
excerpt = '\n' .. excerpt .. '\n'
local position = mw.ustring.find(text, "'''" .. lang:ucfirst(page) .. "'''", 1, true) -- look for "'''Foo''' is..." (uc) or "A '''foo''' is..." (lc)
or mw.ustring.find(text, "'''" .. lang:lcfirst(page) .. "'''", 1, true) -- plain search: special characters in page represent themselves
if position then
local length = mw.ustring.len(page)
text = mw.ustring.sub(text, 1, position + 2) .. "[[" .. mw.ustring.sub(text, position + 3, position + length + 2) .. "]]" .. mw.ustring.sub(text, position + length + 3, -1) -- link it
else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name)
text = mw.ustring.gsub(text, "()'''(.-'*)'''", function(a, b)
if not mw.ustring.find(b, "%[") then -- if not wikilinked
return "'''[[" .. page .. "|" .. b .. "]]'''" -- replace '''Foo''' by '''[[page|Foo]]'''
else
return nil -- instruct gsub to make no change
end
end, 1) -- "end" here terminates the anonymous replacement function(a, b) passed to gsub
end
return text
end


-- Remove nested categories
-- Main function for modules
excerpt = frame:preprocess( excerpt )
local function get(page, options)
local categories, excerpt = Transcluder.getCategories( excerpt, options.categories )
if options.errors then errors = options.errors end


-- Add tracking categories
if not page or page == "" then return luaError("noPage") end
if config.categories then
 
local contentCategory = config.categories.content
local text
if contentCategory and mw.title.getCurrentTitle().isContentPage then
page, section = mw.ustring.match(page, "([^#]+)#?([^#]*)")
excerpt = excerpt .. '[[Category:' .. contentCategory .. ']]'
text, page = getContent(page)
end
if not page then return luaError("noPage") end
local namespaceCategory = config.categories[ mw.title.getCurrentTitle().namespace ]
if not text then return luaError("pageNotFound", page) end
if namespaceCategory then
local full = text -- save the full text for later
excerpt = excerpt .. '[[Category:' .. namespaceCategory .. ']]'
 
end
if is(options.fragment) then
text = getFragment(page, options.fragment)
end
end


-- Load the styles
if is(section) then
local styles
text = getSection(text, section)
if config.styles then
styles = frame:extensionTag( 'templatestyles', '', { src = config.styles } )
end
end


-- Combine and return the elements
-- Strip text of all undersirables
if inline then
text = cleanupText(text, options)
return mw.text.trim( excerpt )
text = parse(text, options)
end
 
local tag = 'div'
-- Replace the bold title or synonym near the start of the article by a wikilink to the article
if quote then
text = linkBold(text, page)
tag = 'blockquote'
 
-- Remove '''bold text''' if requested
if is(options.nobold) then text = mw.ustring.gsub(text, "'''", "") end
 
-- Keep only tables if requested
if is(options.tablesOnly) then text = getTables(text) end
 
-- Keep only lists if requested
if is(options.listsOnly) then text = getLists(text) end
 
-- Seek and destroy unterminated templates, links and tags
text = fixTemplates(text)
text = fixLinks(text)
text = fixTags(text, "div")
 
-- Fix broken references
if is(options.keepRefs) then text = fixRefs(text, page, full) end
 
return text
end
 
-- Main invocation function for templates
local function main(frame)
local args = parseArgs(frame)
local page = args[1]
local ok, text = pcall(get, page, args)
if not ok then
text = d.errors.prefix .. text
if d.errorsCategory and d.errorsCategory ~= "" and mw.title.getCurrentTitle().isContentPage then
text = text .. '[[' .. d.errorsCategory .. ']]'
end
return mw.html.create('div'):addClass('error'):wikitext(text)
end
end
excerpt = mw.html.create( 'div' ):addClass( 'excerpt' ):wikitext( excerpt )
return frame:preprocess(text)
local block = mw.html.create( tag ):addClass( 'excerpt-block' ):addClass( class )
return block:node( styles ):node( hat ):node( excerpt ):node( more )
end
end
-- Entry points for templates
function p.main(frame) return main(frame) end
function p.wikiError(message, value) return wikiError(message, value) end
-- Entry points for other Lua modules
function p.get(page, options) return get(page, options) end
function p.getContent(page) return getContent(page) end
function p.getSection(text, section) return getSection(text, section) end
function p.getTables(text, options) return getTables(text, options) end
function p.getLists(text, options) return getLists(text, options) end
function p.parse(text, options) return parse(text, options) end
function p.parseImage(text, start) return parseImage(text, start) end
function p.parseArgs(frame) return parseArgs(frame) end
function p.argImage(text) return argImage(text) end
function p.checkImage(image) return checkImage(image) end
function p.cleanupText(text, options) return cleanupText(text, options) end
function p.luaError(message, value) return luaError(message, value) end
function p.is(value) return is(value) end
function p.numberFlags(str) return numberFlags(str) end


-- Entry points for backwards compatibility
-- Entry points for backwards compatibility
function p.lead( frame ) return p.main( frame ) end
function p.getsection(text, section) return getSection(text, section) end
function p.excerpt( frame ) return p.main( frame ) end
function p.parseimage(text, start) return parseImage(text, start) end
function p.checkimage(image) return checkImage(image) end
function p.argimage(text) return argImage(text) end
function p.numberflags(str) return numberFlags(str) end


return p
return p

Latest revision as of 16:44, 7 January 2024

Documentation for this module may be created at Module:Excerpt/doc

-- Get localized data
local d = require("Module:Excerpt/i18n")

local p = {}

-- Helper function to test for truthy and falsy values
local function is(value)
	if not value or value == "" or value == "0" or value == "false" or value == "no" then
		return false
	end
	return true
end

-- Error handling function
-- Throws a Lua error or returns an empty string if error reporting is disabled
errors = true -- show errors by default
local function luaError(message, value)
	if not is(errors) then return "" end -- error reporting is disabled
	message = d.errors[message] or message or ""
	message = mw.ustring.format(message, value)
	error(message, 2)
end

-- Error handling function
-- Returns a wiki friendly error or an empty string if error reporting is disabled
local function wikiError(message, value)
	if not is(errors) then return "" end -- error reporting is disabled
	message = d.errors[message] or message or ""
	message = mw.ustring.format(message, value)
	message = d.errors.prefix .. message
	if mw.title.getCurrentTitle().isContentPage then
		local errorsCategory = mw.title.new(d.errorsCategory, 'Category')
		if errorsCategory then message = message .. '[[' .. errorsCategory.prefixedText .. ']]' end
	end
	message = mw.html.create('div'):addClass('error'):wikitext(message)
	return message
end

-- Helper function to match from a list regular expressions
-- Like so: match pre..list[1]..post or pre..list[2]..post or ...
local function matchAny(text, pre, list, post, init)
	local match = {}
	for i = 1, #list do
		match = { mw.ustring.match(text, pre .. list[i] .. post, init) }
		if match[1] then return unpack(match) end
	end
	return nil
end

-- Helper function to convert imagemaps into standard images
local function convertImageMap(imagemap)
	local image = matchAny(imagemap, "[>\n]%s*", d.fileNamespaces, "[^\n]*")
	if image then
		return "<!--imagemap-->[[" .. mw.ustring.gsub(image, "[>\n]%s*", "", 1) .. "]]"
	else
		return "" -- remove entire block if image can't be extracted
	end
end

-- Helper function to convert a comma-separated list of numbers or min-max ranges into a list of booleans
-- For example: "1,3-5" to {1=true,2=false,3=true,4=true,5=true}
local function numberFlags(str)
	if not str then return {} end
	local flags = {}
	local ranges = mw.text.split(str, ",") -- parse ranges: "1,3-5" to {"1","3-5"}
	for _, r in pairs(ranges) do
		local min, max = mw.ustring.match(r, "^%s*(%d+)%s*%-%s*(%d+)%s*$") -- "3-5" to min=3 max=5
		if not max then	min, max = mw.ustring.match(r, "^%s*((%d+))%s*$") end -- "1" to min=1 max=1
		if max then
			for p = min, max do flags[p] = true end
		end
	end
	return flags
end

-- Helper function to convert template arguments into an array of arguments fit for get()
local function parseArgs(frame)
	local args = {}
	for key, value in pairs(frame:getParent().args) do args[key] = value end
	for key, value in pairs(frame.args) do args[key] = value end -- args from a Lua call have priority over parent args from template
	args.paraflags = numberFlags(args["paragraphs"] or "") -- parse paragraphs: "1,3-5" to {"1","3-5"}
	args.fileflags = numberFlags(args["files"] or "") -- parse file numbers
	return args
end

-- Helper function to remove unwanted templates and pseudo-templates such as #tag:ref and DEFAULTSORT
local function stripTemplate(t)
	-- If template is unwanted then return "" (gsub will replace by nothing), else return nil (gsub will keep existing string)
	if matchAny(t, "^{{%s*", d.unwantedInlineTemplates, "%s*%f[|}]") then return "" end

	-- If template is wanted but produces an unwanted reference then return the string with |shortref or |ref removed
	local noRef = mw.ustring.gsub(t, "|%s*shortref%s*%f[|}]", "")
	noRef = mw.ustring.gsub(noRef, "|%s*ref%s*%f[|}]", "")

	-- If a wanted template has unwanted nested templates, purge them too
	noRef = mw.ustring.sub(noRef, 1, 2) .. mw.ustring.gsub(mw.ustring.sub(noRef, 3), "%b{}", stripTemplate)

	-- Replace {{audio}} by its text parameter: {{Audio|Foo.ogg|Bar}} → Bar
	noRef = mw.ustring.gsub(noRef, "^{{%s*[Aa]udio.-|.-|(.-)%f[|}].*", "%1")

	-- Replace {{Nihongo foot}} by its text parameter: {{Nihongo foot|English|英語|eigo}} → English
	noRef = mw.ustring.gsub(noRef, "^{{%s*[Nn]ihongo[ _]+foot%s*|(.-)%f[|}].*", "%1")

	if noRef ~= t then return noRef end

	return nil -- not an unwanted template: keep
end

-- Get a page's content, following redirects
-- Also returns the page name, or the target page name if a redirect was followed, or false if no page found
-- For file pages, returns the content of the file description page
local function getContent(page)
	local title = mw.title.new(page)
	if not title then return false, false end

	local target = title.redirectTarget
	if target then title = target end

	return title:getContent(), title.prefixedText
end

-- Get the tables only
local function getTables(text, options)
	local tables = {}
	for candidate in mw.ustring.gmatch(text, "%b{}") do
		if mw.ustring.sub(candidate, 1, 2) == '{|' then
			table.insert(tables, candidate)
		end
	end
	return table.concat(tables, '\n')
end

-- Get the lists only
local function getLists(text, options)
	local lists = {}
	for list in mw.ustring.gmatch(text, "\n[*#][^\n]+") do
		table.insert(lists, list)
	end
	return table.concat(lists, '\n')
end

-- Check image for suitability
local function checkImage(image)
	local page = matchAny(image, "", d.fileNamespaces, "%s*:[^|%]]*") -- match File:(name) or Image:(name)
	if not page then return false end

	-- Limit to image types: .gif, .jpg, .jpeg, .png, .svg, .tiff, .xcf (exclude .ogg, audio, etc.)
	local fileTypes = {"[Gg][Ii][Ff]", "[Jj][Pp][Ee]?[Gg]", "[Pp][Nn][Gg]", "[Ss][Vv][Gg]", "[Tt][Ii][Ff][Ff]", "[Xx][Cc][Ff]"}
	if not matchAny(page, "%.", fileTypes, "%s*$") then return false end

	-- Check the local wiki
	local fileDescription, fileTitle = getContent(page) -- get file description and title after following any redirect
	if not fileTitle or fileTitle == "" then return false end -- the image doesn't exist

	-- Check Commons
	if not fileDescription or fileDescription == "" then
		local frame = mw.getCurrentFrame()
		fileDescription = frame:preprocess("{{" .. fileTitle .. "}}")
	end

	-- Filter non-free images
	if not fileDescription or fileDescription == "" or mw.ustring.match(fileDescription, "[Nn]on%-free") then return false end

	return true
end

-- Attempt to parse [[File:...]] or [[Image:...]], either anywhere (start=false) or at the start only (start=true)
local function parseImage(text, start)
	local startre = ""
	if start then startre = "^" end -- a true flag restricts search to start of string
	local image = matchAny(text, startre .. "%[%[%s*", d.fileNamespaces, "%s*:.*") -- [[File: or [[Image: ...
	if image then
		image = mw.ustring.match(image, "%b[]%s*") -- matching [[...]] to handle wikilinks nested in caption
	end
	return image
end

-- Parse a caption, which ends at a | (end of parameter) or } (end of infobox) but may contain nested [..] and {..}
local function parseCaption(caption)
	if not caption then return nil end
	local length = mw.ustring.len(caption)
	local position = 1
	while position <= length do
		local linkStart, linkEnd = mw.ustring.find(caption, "%b[]", position)
		linkStart = linkStart or length + 1 -- avoid comparison with nil when no link
		local templateStart, templateEnd = mw.ustring.find(caption, "%b{}", position)
		templateStart = templateStart or length + 1 -- avoid comparison with nil when no template
		local argEnd = mw.ustring.find(caption, "[|}]", position) or length + 1
		if linkStart < templateStart and linkStart < argEnd then
			position = linkEnd + 1 -- skip wikilink
		elseif templateStart < argEnd then
			position = templateEnd + 1 -- skip template
		else -- argument ends before the next wikilink or template
			return mw.ustring.sub(caption, 1, argEnd - 1)
		end
	end
	return caption -- No terminator found: return entire caption
end					

-- Attempt to construct a [[File:...]] block from {{infobox ... |image= ...}}
local function argImage(text)
	local token = nil
	local hasNamedArgs = mw.ustring.find(text, "|") and mw.ustring.find(text, "=")
	if not hasNamedArgs then return nil end -- filter out any template that obviously doesn't contain an image

	-- ensure image map is captured
	text = mw.ustring.gsub(text, '<!%-%-imagemap%-%->', '|imagemap=')

	-- find all images
	local hasImages = false
	local images = {}
	local captureFrom = 1
	while captureFrom < mw.ustring.len(text) do
		local argname, position, image = mw.ustring.match(text, "|%s*([^=|]-[Ii][Mm][Aa][Gg][Ee][^=|]-)%s*=%s*()(.*)", captureFrom)
		if image then -- ImageCaption=, image_size=, image_upright=, etc. do not introduce an image
			local lcArgName = mw.ustring.lower(argname)
			if mw.ustring.find(lcArgName, "caption")
			 or mw.ustring.find(lcArgName, "size")
			 or mw.ustring.find(lcArgName, "upright") then
				image = nil
			end
		end
		if image then
			hasImages = true
			images[position] = image
			captureFrom = position
		else
			captureFrom = mw.ustring.len(text)
		end
	end
	captureFrom = 1
	while captureFrom < mw.ustring.len(text) do
		local position, image = mw.ustring.match(text, "|%s*[^=|]-[Pp][Hh][Oo][Tt][Oo][^=|]-%s*=%s*()(.*)", captureFrom)
		if image then
			hasImages = true
			images[position] = image
			captureFrom = position
		else
			captureFrom = mw.ustring.len(text)
		end
	end
	captureFrom = 1
	while captureFrom < mw.ustring.len(text) do
		local position, image = mw.ustring.match(text, "|%s*[^=|{}]-%s*=%s*()%[?%[?([^|{}]*%.%a%a%a%a?)%s*%f[|}]", captureFrom)
		if image then
			hasImages = true
			if not images[position] then
				images[position] = image
			end
			captureFrom = position
		else
			captureFrom = mw.ustring.len(text)
		end
	end

	if not hasImages then return nil end

	-- find all captions
	local captions = {}
	captureFrom = 1
	while captureFrom < mw.ustring.len(text) do
		local position, caption = matchAny(text, "|%s*", d.captionParams, "%s*=%s*()([^\n]+)", captureFrom)
		if caption then
			-- extend caption to parse "| caption = Foo {{Template\n on\n multiple lines}} Bar\n"
			local bracedCaption = mw.ustring.match(text, "^[^\n]-%b{}[^\n]+", position)
			if bracedCaption and bracedCaption ~= "" then caption = bracedCaption end
			caption = mw.text.trim(caption)
			local captionStart = mw.ustring.sub(caption, 1, 1)
			if captionStart == '|' or captionStart == '}' then caption = nil end
		end
		if caption then
			-- find nearest image, and use same index for captions table
			local i = position
			while i > 0 and not images[i] do
				i = i - 1
				if images[i] then
					if not captions[i] then
						captions[i] = parseCaption(caption)
					end
				end
			end
			captureFrom = position
		else
			captureFrom = mw.ustring.len(text)
		end
	end

	-- find all alt text
	local altTexts = {}
	for position, altText in mw.ustring.gmatch(text, "|%s*[Aa][Ll][Tt]%s*=%s*()([^\n]*)") do
		if altText then

			-- altText is terminated by }} or |, but first skip any matched [[...]] and {{...}}
			local lookFrom = math.max( -- find position after whichever comes last: start of string, end of last ]] or end of last }}
			 mw.ustring.match(altText, ".*{%b{}}()") or 1, -- if multiple {{...}}, .* consumes all but one, leaving the last for %b
			 mw.ustring.match(altText, ".*%[%b[]%]()") or 1)

			local length = mw.ustring.len(altText)
			local afterText = math.min( -- find position after whichever comes first: end of string, }} or |
			 mw.ustring.match(altText, "()}}", lookFrom) or length+1,
			 mw.ustring.match(altText, "()|", lookFrom) or length+1)
			altText = mw.ustring.sub(altText, 1, afterText-1) -- chop off |... or }}... which is not part of [[...]] or {{...}}

			altText = mw.text.trim(altText)
			local altTextStart = mw.ustring.sub(altText, 1, 1)
			if altTextStart == '|' or altTextStart == '}' then altText = nil end
		end
		if altText then
			-- find nearest image, and use same index for altTexts table
			local i = position
			while i > 0 and not images[i] do
				i = i - 1
				if images[i] then
					if not altTexts[i] then
						altTexts[i] = altText
					end
				end
			end
		end
	end

	-- find all image sizes
	local imageSizes = {}
	for position, imageSizeMatch in mw.ustring.gmatch(text, "|%s*[Ii][Mm][Aa][Gg][Ee][ _]?[Ss][Ii][Zz][Ee]%s*=%s*()([^}|\n]*)") do
		local imageSize = mw.ustring.match(imageSizeMatch, "=%s*([^}|\n]*)")
		if imageSize then
			imageSize = mw.text.trim(imageSize )
			local imageSizeStart = mw.ustring.sub(imageSize, 1, 1)
			if imageSizeStart == '|' or imageSizeStart == '}' then imageSize = nil end
		end
		if imageSize then
			-- find nearest image, and use same index for imageSizes table
			local i = position
			while i > 0 and not images[i] do
				i = i - 1
				if images[i] then
					if not imageSizes[i] then
						imageSizes[i] = imageSize
					end
				end
			end
		end
	end

	-- sort the keys of the images table (in a table sequence), so that images can be iterated over in order
	local keys = {}
	for key, val in pairs(images) do
		table.insert(keys, key)
	end
	table.sort(keys)

	 -- add in relevant optional parameters for each image: caption, alt text and image size
	local imageTokens = {}
	for _, index in ipairs(keys) do
		local image = images[index]
		local token = parseImage(image, true) -- look for image=[[File:...]] etc.
		if not token then
			image = mw.ustring.match(image, "^[^}|\n]*") -- remove later arguments
			token = "[[" -- Add File: unless name already begins File: or Image:
			if not matchAny(image, "^", d.fileNamespaces, "%s*:") then
				token = token .. "File:"
			end
			token = token .. image
			local caption = captions[index]
			if caption and mw.ustring.match(caption, "%S") then token = token .. "|" .. caption end
			local alt = altTexts[index]
			if alt then token = token .. "|alt=" .. alt end
			local image_size = imageSizes[index]
			if image_size and mw.ustring.match(image_size, "%S") then token = token .. "|" .. image_size end
			token = token .. "]]"
		end
		token = mw.ustring.gsub(token, "\n","") .. "\n"
		table.insert(imageTokens, token)
	end
	return imageTokens
end

local function modifyImage(image, fileArgs)
	if fileArgs then
		for _, filearg in pairs(mw.text.split(fileArgs, "|")) do -- handle fileArgs=left|border etc.
			local fa = mw.ustring.gsub(filearg, "=.*", "") -- "upright=0.75" → "upright"
			local group = {fa} -- group of "border" is ["border"]...
			for _, g in pairs(d.imageParams) do
				for _, a in pairs(g) do
					if fa == a then group = g end -- ...but group of "left" is ["right", "left", "center", "none"]
				end
			end
			for _, a in pairs(group) do
				image = mw.ustring.gsub(image, "|%s*" .. a .. "%f[%A]%s*=[^|%]]*", "") -- remove "|upright=0.75" etc.
				image = mw.ustring.gsub(image, "|%s*" .. a .. "%s*([|%]])", "%1") -- replace "|left|" by "|" etc.
			end

			image = mw.ustring.gsub(image, "([|%]])", "|" .. filearg .. "%1", 1) -- replace "|" by "|left|" etc.
		end
	end
	return image
end

-- a basic parser to trim down extracted wikitext
--   @param text : Wikitext to be processed
--   @param options : A table of options...
--          options.paraflags : Which number paragraphs to keep, as either a string (e.g. '1,3-5') or a table (e.g. {1=true,2=false,3=true,4=true,5=true}. If not present, all paragraphs will be kept.
--          options.fileflags : table of which files to keep, as either a string (e.g. '1,3-5') or a table (e.g. {1=true,2=false,3=true,4=true,5=true}
--          options.fileargs : args for the [[File:]] syntax, such as 'left'
--			options.filesOnly : only return the files and not the prose
local function parse(text, options)
	local allParagraphs = true -- keep all paragraphs?
	if options.paraflags then
		if type(options.paraflags) ~= "table" then options.paraflags = numberFlags(options.paraflags) end
		for _, v in pairs(options.paraflags) do
			if v then allParagraphs = false end -- if any para specifically requested, don't keep all
		end
	end
	if is(options.filesOnly) then
		allParagraphs = false
		options.paraflags = {}
	end

	local maxfile = 0 -- for efficiency, stop checking images after this many have been found
	if options.fileflags then
		if type(options.fileflags) ~= "table" then options.fileflags = numberFlags(options.fileflags) end
		for k, v in pairs(options.fileflags) do
			if v and k > maxfile then maxfile = k end -- set maxfile = highest key in fileflags
		end
	end
	
	local fileArgs = options.fileargs and mw.text.trim(options.fileargs)
	if fileArgs == '' then fileArgs = nil end

	local leadStart = nil -- have we found some text yet?
	local t = "" -- the stripped down output text
	local fileText = "" -- output text with concatenated [[File:Foo|...]]\n entries
	local files = 0 -- how many images so far
	local paras = 0 -- how many paragraphs so far
	local startLine = true -- at the start of a line (no non-spaces found since last \n)?

	text = mw.ustring.gsub(text,"^%s*","") -- remove initial white space

	-- Add named files
	local f = options.files
	if f and mw.ustring.match(f, "[^%d%s%-,]") then -- filename rather than number list
		f = mw.ustring.gsub(f, "^%s*File%s*:%s*", "", 1)
		f = mw.ustring.gsub(f, "^%s*Image%s*:%s*", "", 1)
		f = "[[File:" .. f .. "]]"
		f = modifyImage(f, "thumb")
		f = modifyImage(f, fileArgs)
		if checkImage(f) then fileText = fileText .. f .. "\n" end
	end

	repeat -- loop around parsing a template, image or paragraph
		local token = mw.ustring.match(text, "^%b{}%s*") or false -- {{Template}} or {| Table |}
		if not leadStart and not token then token = mw.ustring.match(text, "^%b<>%s*%b{}%s*") end -- allow <tag>{{template}} before lead has started

		local line = mw.ustring.match(text, "[^\n]*")
		if token and line and mw.ustring.len(token) < mw.ustring.len(line) then -- template is followed by text (but it may just be other templates)
			line = mw.ustring.gsub(line, "%b{}", "") -- remove all templates from this line
			line = mw.ustring.gsub(line, "%b<>", "") -- remove all HTML tags from this line
			-- if anything is left, other than an incomplete further template or an image, keep the template: it counts as part of the line
			if mw.ustring.find(line, "%S") and not matchAny(line, "^%s*", { "{{", "%[%[%s*[Ff]ile:", "%[%[%s*[Ii]mage:" }, "") then
				token = nil
			end
		end

		if token then -- found a template which is not the prefix to a line of text

			if is(options.keepTables) and mw.ustring.sub(token, 1, 2) == '{|' then
				t = t .. token -- keep tables

			elseif mw.ustring.sub(token, 1, 3) == '{{#' then
				t = t .. token -- keep parser functions

			elseif leadStart then -- lead has already started, so keep the template within the text, unless it's a whole line (navbox etc.)
				if not is(options.filesOnly) and not startLine then t = t .. token end

			elseif matchAny(token, "^{{%s*", d.wantedBlockTemplates, "%s*%f[|}]") then
				t = t .. token -- keep wanted block templates

			elseif files < maxfile then -- discard template, but if we are still collecting images...
				local images = argImage(token) or {}
				if not images then
					local image = parseImage(token, false) -- look for embedded [[File:...]], |image=, etc.
					if image then table.insert(images, image) end
				end
				for _, image in ipairs(images) do
					if files < maxfile and checkImage(image) then -- if image is found and qualifies (not a sound file, non-free, etc.)
						files = files + 1 -- count the file, whether displaying it or not
						if options.fileflags and options.fileflags[files] then -- if displaying this image
							image = modifyImage(image, "thumb")
							image = modifyImage(image, fileArgs)
							fileText = fileText .. image
						end
					end
				end
			end
		else -- the next token in text is not a template
			token = parseImage(text, true)
			if token then -- the next token in text looks like an image
				if files < maxfile and checkImage(token) then -- if more images are wanted and this is a wanted image
					files = files + 1
					if options.fileflags and options.fileflags[files] then
						local image = token -- copy token for manipulation by adding |right etc. without changing the original
						image = modifyImage(image, fileArgs)
						fileText = fileText .. image
					end
				end
			else -- got a paragraph, which ends at a file, image, blank line or end of text
				local afterEnd = mw.ustring.len(text) + 1
				local blankPosition = mw.ustring.find(text, "\n%s*\n") or afterEnd -- position of next paragraph delimiter (or end of text)
				local endPosition = math.min( -- find position of whichever comes first: [[File:, [[Image: or paragraph delimiter
				 mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterEnd,
				 mw.ustring.find(text, "%[%[%s*[Ii]mage%s*:") or afterEnd,
				 blankPosition)
				token = mw.ustring.sub(text, 1, endPosition-1)
				if blankPosition < afterEnd and blankPosition == endPosition then -- paragraph ends with a blank line
					token = token .. mw.ustring.match(text, "\n%s*\n", blankPosition)
				end
				local isHatnote = not(leadStart) and mw.ustring.sub(token, 1, 1) == ':'
				if not isHatnote then
					leadStart = leadStart or mw.ustring.len(t) + 1 -- we got a paragraph, so mark the start of the lead section
					paras = paras + 1
					if allParagraphs or (options.paraflags and options.paraflags[paras]) then t = t .. token end -- add if this paragraph wanted
				end
			end -- of "else got a paragraph"
		end -- of "else not a template"

		if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end -- remove parsed token from remaining text
		startLine = mw.ustring.find(token, "\n%s*$") -- will the next token be the first non-space on a line?
	until not text or text == "" or not token or token == "" -- loop until all text parsed

	text = mw.ustring.gsub(t, "\n+$", "") -- remove trailing line feeds, so "{{Transclude text excerpt|Foo}} more" flows on one line

	return fileText .. text
end

local function cleanupText(text, options)
	text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments
	text = mw.ustring.gsub(text, "<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove noinclude bits
	if mw.ustring.find(text, "[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]") then -- avoid expensive search if possible
		text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text between onlyinclude sections
		text = mw.ustring.gsub(text, "^.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text before first onlyinclude section
		text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.*", "") -- remove text after last onlyinclude section
	end
	if not is(options.keepSubsections) then
		text = mw.ustring.gsub(text, "\n==.*","") -- remove first ==Heading== and everything after it
		text = mw.ustring.gsub(text, "^==.*","") -- ...even if the lead is empty
	end
	if not is(options.keepRefs) then
		text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]-/%s*>", "") -- remove refs cited elsewhere
		text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff].->.-<%s*/%s*[Rr][Ee][Ff]%s*>", "") -- remove refs
		text = mw.ustring.gsub(text, "%b{}", stripTemplate) -- remove unwanted templates such as references
	end
	text = mw.ustring.gsub(text, "<%s*[Ss][Cc][Oo][Rr][Ee].->.-<%s*/%s*[Ss][Cc][Oo][Rr][Ee]%s*>", "") -- remove musical scores
	text = mw.ustring.gsub(text, "<%s*[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp].->.-<%s*/%s*[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp]%s*>", convertImageMap) -- convert imagemaps into standard images
	text = mw.ustring.gsub(text, "%s*{{%s*[Tt][Oo][Cc].-}}", "") -- remove most common tables of contents
	text = mw.ustring.gsub(text, "%s*__[A-Z]*TOC__", "") -- remove TOC behavior switches
	text = mw.ustring.gsub(text, "\n%s*{{%s*[Pp]p%-.-}}", "\n") -- remove protection templates
	text = mw.ustring.gsub(text, "%s*{{[^{|}]*[Ss]idebar%s*}}", "") -- remove most sidebars
	text = mw.ustring.gsub(text, "%s*{{[^{|}]*%-[Ss]tub%s*}}", "") -- remove most stub templates
	text = mw.ustring.gsub(text, "%s*%[%[%s*:?[Cc]ategory:.-%]%]", "") -- remove categories
	text = mw.ustring.gsub(text, "^:[^\n]+\n","") -- remove DIY hatnote indented with a colon
	return text
end

-- Parse a ==Section== from a page
local function getSection(text, section, mainOnly)
	local escapedSection = mw.ustring.gsub(mw.uri.decode(section), "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1") -- %26 → & etc, then ^ → %^ etc.
	local level, content = mw.ustring.match(text .. "\n", "\n(==+)%s*" .. escapedSection .. "%s*==.-\n(.*)")
	if not content then return luaError("sectionNotFound", section) end
	local nextSection
	if mainOnly then
		nextSection = "\n==.*" -- Main part of section terminates at any level of header
	else
		nextSection = "\n==" .. mw.ustring.rep("=?", #level - 2) .. "[^=].*" -- "===" → "\n===?[^=].*", matching "==" or "===" but not "===="
	end
	content = mw.ustring.gsub(content, nextSection, "") -- remove later sections with headings at this level or higher
	if mw.ustring.match(content, "^%s*$") then return luaError("sectionEmpty", section) end
	return content
end

-- Parse a <section begin="Name of the fragment">
-- @todo Implement custom parsing of fragments rather than relying on #lst
local function getFragment(page, fragment)
	local frame = mw.getCurrentFrame()
	local text = frame:callParserFunction('#lst', page, fragment)
	if mw.ustring.match(text, "^%s*$") then return luaError("fragmentEmpty", fragment) end
	return text
end

-- Remove unmatched <tag> or </tag> tags
local function fixTags(text, tag)
	local startCount = 0
	for i in mw.ustring.gmatch(text, "<%s*" .. tag .. "%f[^%w_].->") do startCount = startCount + 1 end

	local endCount = 0
	for i in mw.ustring.gmatch(text, "<%s*/" .. tag .. "%f[^%w_].->") do endCount = endCount + 1 end

	if startCount > endCount then -- more <tag> than </tag>: remove the last few <tag>s
		local i = 0
		text = mw.ustring.gsub(text, "<%s*" .. tag .. "%f[^%w_].->", function(t)
			i = i + 1
			if i > endCount then return "" else return nil end
		end) -- "end" here terminates the anonymous replacement function(t) passed to gsub
	elseif endCount > startCount then -- more </tag> than <tag>: remove the first few </tag>s
		text = mw.ustring.gsub(text, "<%s*/" .. tag .. "%f[^%w_].->", "", endCount - startCount)
	end
	return text
end

local function fixTemplates(text)
	repeat -- hide matched {{template}}s including nested templates
		local t = text
		text = mw.ustring.gsub(text, "{(%b{})}", "\27{\27%1\27}\27") -- {{sometemplate}} → E{Esometemplate}E}E where E represents escape
		text = mw.ustring.gsub(text, "(< *math[^>]*>[^<]-)}}(.-< */math *>)", "%1}\27}\27%2") -- <math>\{sqrt\{hat{x}}</math> → <math>\{sqrt\{hat{x}E}E</math>
	until text == t
	text = text.gsub(text, "([{}])%1[^\27].*", "") -- remove unmatched {{, }} and everything thereafter, avoiding }E}E etc.
	text = text.gsub(text, "([{}])%1$", "") -- remove unmatched {{, }} at end of text
	text = mw.ustring.gsub(text, "\27", "") -- unhide matched pairs: E{E{ → {{, etc.
	return text
end

local function fixLinks(text)
	repeat -- hide matched [[wikilink]]s including nested links like [[File:Example.jpg|Some [[nested]] link.]]
		local t = text
		text = mw.ustring.gsub(text, "%[(%b[])%]", "\27[\27%1\27]\27")
	until text == t
	text = text.gsub(text, "([%[%]])%1[^\27].*", "") -- remove unmatched [[ or ]] and everything thereafter, avoiding ]E]E etc.
	text = text.gsub(text, "([%[%]])%1$", "") -- remove unmatched [[ or ]] at end of text
	text = mw.ustring.gsub(text, "\27", "") -- unhide matched pairs: ]E]E → ]], etc.
	return text
end

-- Replace the first call to each reference defined outside of the text for the full reference, to prevent undefined references
-- Then prefix the page title to the reference names to prevent conflicts
-- that is, replace <ref name="Foo"> for <ref name="Title of the article Foo">
-- and also <ref name="Foo" /> for <ref name="Title of the article Foo" />
-- also remove reference groups: <ref name="Foo" group="Bar"> for <ref name="Title of the article Foo">
-- and <ref group="Bar"> for <ref>
-- @todo The current regex may fail in cases with both kinds of quotes, like <ref name="Darwin's book">
local function fixRefs(text, page, full)
	if not full then full = getContent(page) end
	local refNames = {}
	local refName
	local refBody
	local position = 1
	while position < mw.ustring.len(text) do
		refName, position = mw.ustring.match(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?([^\"'>]+)[\"']?[^>]*/%s*>()", position)
		if refName then
			refName = mw.text.trim(refName)
			if not refNames[refName] then -- make sure we process each ref name only once
				table.insert(refNames, refName)
				refName = mw.ustring.gsub(refName, "[%^%$%(%)%.%[%]%*%+%-%?%%]", "%%%0") -- escape special characters
				refBody = mw.ustring.match(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?%s*" .. refName .. "%s*[\"']?[^>/]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>")
				if not refBody then -- the ref body is not in the excerpt
					refBody = mw.ustring.match(full, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?%s*" .. refName .. "%s*[\"']?[^/>]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>")
					if refBody then -- the ref body was found elsewhere
						text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?%s*" .. refName .. "%s*[\"']?[^>]*/?%s*>", refBody, 1)
					end
				end
			end
		else
			position = mw.ustring.len(text)
		end
	end
	text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?([^\"'>/]+)[\"']?[^>/]*(/?)%s*>", '<ref name="' .. page .. ' %1" %2>')
	text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]*group%s*=%s*[\"']?[^\"'>/]+[\"']%s*>", '<ref>')
	return text
end

-- Replace the bold title or synonym near the start of the article by a wikilink to the article
function linkBold(text, page)
	local lang = mw.language.getContentLanguage()
	local position = mw.ustring.find(text, "'''" .. lang:ucfirst(page) .. "'''", 1, true) -- look for "'''Foo''' is..." (uc) or "A '''foo''' is..." (lc)
		or mw.ustring.find(text, "'''" .. lang:lcfirst(page) .. "'''", 1, true) -- plain search: special characters in page represent themselves
	if position then
		local length = mw.ustring.len(page)
		text = mw.ustring.sub(text, 1, position + 2) .. "[[" .. mw.ustring.sub(text, position + 3, position + length + 2) .. "]]" .. mw.ustring.sub(text, position + length + 3, -1) -- link it
	else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name)
		text = mw.ustring.gsub(text, "()'''(.-'*)'''", function(a, b)
			if not mw.ustring.find(b, "%[") then -- if not wikilinked
				return "'''[[" .. page .. "|" .. b .. "]]'''" -- replace '''Foo''' by '''[[page|Foo]]'''
			else
				return nil -- instruct gsub to make no change
			end
		 end, 1) -- "end" here terminates the anonymous replacement function(a, b) passed to gsub
	end
	return text
end

-- Main function for modules
local function get(page, options)
	if options.errors then errors = options.errors end

	if not page or page == "" then return luaError("noPage") end

	local text
	page, section = mw.ustring.match(page, "([^#]+)#?([^#]*)")
	text, page = getContent(page)
	if not page then return luaError("noPage") end
	if not text then return luaError("pageNotFound", page) end
	local full = text -- save the full text for later

	if is(options.fragment) then
		text = getFragment(page, options.fragment)
	end

	if is(section) then
		text = getSection(text, section)
	end

	-- Strip text of all undersirables
	text = cleanupText(text, options)
	text = parse(text, options)

	-- Replace the bold title or synonym near the start of the article by a wikilink to the article
	text = linkBold(text, page)

	-- Remove '''bold text''' if requested
	if is(options.nobold) then text = mw.ustring.gsub(text, "'''", "") end

	-- Keep only tables if requested
	if is(options.tablesOnly) then text = getTables(text) end

	-- Keep only lists if requested
	if is(options.listsOnly) then text = getLists(text) end

	-- Seek and destroy unterminated templates, links and tags
	text = fixTemplates(text)
	text = fixLinks(text)
	text = fixTags(text, "div")

	-- Fix broken references
	if is(options.keepRefs) then text = fixRefs(text, page, full) end

	return text
end

-- Main invocation function for templates
local function main(frame)
	local args = parseArgs(frame)
	local page = args[1]
	local ok, text = pcall(get, page, args)
	if not ok then
		text = d.errors.prefix .. text
		if d.errorsCategory and d.errorsCategory ~= "" and mw.title.getCurrentTitle().isContentPage then
			text = text .. '[[' .. d.errorsCategory .. ']]'
		end
		return mw.html.create('div'):addClass('error'):wikitext(text)
	end
	return frame:preprocess(text)
end

-- Entry points for templates
function p.main(frame) return main(frame) end
function p.wikiError(message, value) return wikiError(message, value) end

-- Entry points for other Lua modules
function p.get(page, options) return get(page, options) end
function p.getContent(page) return getContent(page) end
function p.getSection(text, section) return getSection(text, section) end
function p.getTables(text, options) return getTables(text, options) end
function p.getLists(text, options) return getLists(text, options) end
function p.parse(text, options) return parse(text, options) end
function p.parseImage(text, start) return parseImage(text, start) end
function p.parseArgs(frame) return parseArgs(frame) end
function p.argImage(text) return argImage(text) end
function p.checkImage(image) return checkImage(image) end
function p.cleanupText(text, options) return cleanupText(text, options) end
function p.luaError(message, value) return luaError(message, value) end
function p.is(value) return is(value) end
function p.numberFlags(str) return numberFlags(str) end

-- Entry points for backwards compatibility
function p.getsection(text, section) return getSection(text, section) end
function p.parseimage(text, start) return parseImage(text, start) end
function p.checkimage(image) return checkImage(image) end
function p.argimage(text) return argImage(text) end
function p.numberflags(str) return numberFlags(str) end

return p