Module:Ref info

From MattWiki
Jump to: navigation, search

Documentation for this module may be created at Module:Ref info/doc

-- This module counts the number of times that various reference tags and cs1|2 templates appear.
-- {{#invoke:ref_count|ref_count}}
-- {{ref info}}
-- {{ref info|aristotle}}

local p = {}

-- these are the cannonical names.  What to do about redirects? Lua patterns?

local cs1_template_list = {'ar[Xx]iv', '[Aa][Vv] media', '[Aa][Vv] media notes', 'book', 'conference', '[Dd][Vv][Dd] notes', 'encyclopedia',
	'episode', 'interview', 'journal', 'magazine', 'mailing list', 'map', 'news', 'newsgroup', 'podcast', 'press release',
	'report', 'sign', 'speech', 'serial', 'techreport', 'thesis', 'web'};

local cleanup_template_list = {'[Cc]itation needed', '[Dd]isputed inline', '[Dd]ubious', '[Ff]ailed verification'};

local dead_link_template_list = {'[Dd]ea?d[%- ]?[Ll]inks?', '[Dd]ead ?cite', '[Dd]ead page', '[Dd]ead ?url', '[Dd]ead%-inline',
	'404', '[Bb]ad ?link', '[Bb]roken ?link', '[Dd][Ll]', '[Ll]ink ?broken', '[Dd]ead'}

--[[--------------------------< C O U N T _ P A T T E R N >----------------------------------------------------

this is a general purpose function used to count occurrences of patterns in the unparsed article text

]]

local function count_pattern (text, pattern)
	local _;
	local count;
	_, count = mw.ustring.gsub (text, pattern, '%1');
	return count;
end


--[[--------------------------< C O U N T _ C S 1 >------------------------------------------------------------

Using the list of cs1 templates, make a count of just those references or templates as dictated by base_pattern.

]]

local function count_cs1 (text, base_pattern)
	local _;
	local count, total = 0, 0;
	
	for i, template in ipairs (cs1_template_list) do
		pattern = string.format	(base_pattern, template);						-- make a pattern for the selected cs1 template
		_, count = mw.ustring.gsub (text, pattern, '%1');						-- count occurences of that pattern
		total = total + count;													-- accumulate a total
	end
	return total;
end


--[[--------------------------< C O U N T _ C L E A N U P >----------------------------------------------------

Using the list of cleanup templates, make a count of those templates as dictated by base_pattern.

]]

local function count_cleanup (text, base_pattern)
	local _;
	local count, total = 0, 0;
	
	for i, template in ipairs (cleanup_template_list) do
		pattern = string.format	(base_pattern, template);						-- make a pattern for the selected cleanup template
		_, count = mw.ustring.gsub (text, pattern, '%1');						-- count occurences of that pattern
		total = total + count;													-- accumulate a total
	end
	return total;
end


--[[--------------------------< C O U N T _ D E A D _ L I N K S >----------------------------------------------

Using the list of dead link templates, make a count of those templates as dictated by base_pattern.

]]

local function count_dead_links (text, base_pattern)
	local _;
	local count, total = 0, 0;
	
	for i, template in ipairs (dead_link_template_list) do
		pattern = string.format	(base_pattern, template);						-- make a pattern for the selected cleanup template
		_, count = mw.ustring.gsub (text, pattern, '%1');						-- count occurences of that pattern
		total = total + count;													-- accumulate a total
	end
	return total;
end


--[[--------------------------< H A S _ L D R >----------------------------------------------------------------

returns a string set to 'yes' if the article uses list defined references.  ldr uses {{reflist |refs=...}} or
<references>...</references>.  Here we do simple 'find's to make the determination.

It is also possible to do ldr with {{refbegin}} ... {{refend}} 

the pattern value is passed to this function but ignored
]]

local function has_ldr (text, pattern)
	result = {};
	
	if mw.ustring.find (text, '{{%s*[Rr]eflist[^}]*|%s*refs%s*=%s*[^}|]+') then	-- does page use {{Reflist |refs=...}}?
		return 'yes'
	elseif mw.ustring.find (text, '<references>[^<]+') then						-- does page use <references>...</references>?
		return 'yes'
	else
		return 'no';
	end
end


--[[--------------------------< O B J E C T S   T A B L E >----------------------------------------------------

Here we define various properties and values necessary to the counting of referencing objects

]]

local objects = {
	['unnamed_refs'] = {														-- count unnamed ref tags
		['func'] = count_pattern,												-- the function that does the work for this object
		['pattern'] = '(<ref>)',												-- a pattern that the function uses to find and count this object
		['count'] = 0,															-- the returned result (called count because that is the most common but can be 'yes' or 'no' etc
		['label'] = 'unnamed refs'										-- a label and marckup for displaying the result; used with string.format()
		},
	['named_refs'] = {															-- count named ref tags
		['func'] = count_pattern,
		['pattern'] = '(<ref%s+name%s*=%s*[%a%d%p ]+>)',
		['count'] = 0,
		['label'] = 'named refs'
		},
	['self_closed_refs'] = {													-- count self closed ref tags
		['func'] = count_pattern,
		['pattern'] = '(<ref%s*name%s*=%s*["%a%d%p ]+/>)',
		['count'] = 0,
		['label'] = 'self closed'
		},
	['r_templates'] = {															-- count R templates (wrapper for self closed refs)
		['func'] = count_pattern,
		['pattern'] = '({{%s*[Rr]%s*|)',
		['count'] = 0,
		['label'] = 'R templates'
		},
	['refn_templates'] = {															-- count R templates (wrapper for self closed refs)
		['func'] = count_pattern,
		['pattern'] = '({{%s*[Rr]efn%s*|)',
		['count'] = 0,
		['label'] = 'Refn templates'
		},
	['bare_url_refs'] = {														-- count bare url refs
		['func'] = count_pattern,												-- TODO: separate function to detect protocol relative urls?
		['pattern'] = '(<ref[^>]*>%s*http[^<%s]+%s*</ref>)',
		['count'] = 0,
		['label'] = '<span style="font-size:inherit" class="error">bare url refs</span>'
		},
	['ext_link_refs'] = {														-- count unlabeled external link refs
		['func'] = count_pattern,												-- TODO: separate function to detect protocol relative urls?
		['pattern'] = '(<ref[^>]*>%[%s*http[^%]<%s]+%][^<]*</ref>)',
		['count'] = 0,
		['label'] = '<span style="font-size:inherit" class="error">bare ext link refs</span>'
		},
	['cs1_like_refs'] = {														-- count cs1 refs and refs that look like cs1 (cite something)
		['func'] = count_pattern,
		['pattern'] = '(<ref[^>]*>[^<{]*{{%s*[Cc]ite%s+[^|]+)',
		['count'] = 0,
		['label'] = 'cs1-like refs'
		},
	['cs1_refs'] = {															-- count cs1 refs only
		['func'] = count_cs1,
		['pattern'] = '(<ref[^>]*>[^<{]*{{%%s*[Cc]ite%%s+%s%%s*|)',				-- will be modified in the func by string.format()
		['count'] = 0,
		['label'] = 'cs1 refs'
		},
	['cs1_like_templates'] = {													-- count cs1 templates and templates that look like cs1
		['func'] = count_pattern,
		['pattern'] = '({{%s*[Cc]ite%s+[^|]+)',
		['count'] = 0,
		['label'] = 'cs1-like templates'
		},
	['cs1_templates'] = {														-- count cs1 templates only
		['func'] = count_cs1,
		['pattern'] = '({{%%s*[Cc]ite%%s+%s%%s*|)',								-- will be modified in the func by string.format()
		['count'] = 0,
		['label'] = 'cs1 templates'
		},
	['cs2_refs'] = {															-- count cs2 refs
		['func'] = count_pattern,
		['pattern'] = '(<ref[^>]*>[^<{]*{{%s*[Cc]itation%s*|)',
		['count'] = 0,
		['label'] = 'cs2 refs'
		},
	['cs2_templates'] = {														-- count cs2 templates
		['func'] = count_pattern,
		['pattern'] = '({{%s*[Cc]itation%s*|)',
		['count'] = 0,
		['label'] = 'cs2 templates'
		},
	['vcite_refs'] = {															-- count vancite, vcite, and vcite2 refs
		['func'] = count_pattern,
		['pattern'] = '(<ref[^>]*>[^<{]*{{%s*[Vv]a?n?cite2?%s+[^|]+)',
		['count'] = 0,
		['label'] = 'vcite refs'
		},
	['vcite_templates'] = {														-- count vancite, vcite, and vcite2 templates
		['func'] = count_pattern,
		['pattern'] = '({{%s*[Vv]a?n?cite2?%s+[^|]+)',
		['count'] = 0,
		['label'] = 'vcite templates'
		},
	['wikicite_templates'] = {													-- count wikicite templates
		['func'] = count_pattern,
		['pattern'] = '({{%s*[Ww]ikicite%s*|)',
		['count'] = 0,
		['label'] = 'wikicite templates'
		},
	['harv_refs'] = {															-- count harv refs
		['func'] = count_pattern,
		['pattern'] = '(<ref[^>]*>[^<{]*{{%s*[Hh]arv[nbcolptx]*%s*|)',
		['count'] = 0,
		['label'] = 'harv refs'
		},
	['harv_templates'] = {														-- count harv templates
		['func'] = count_pattern,
		['pattern'] = '({{%s*[Hh]arv[nbcolptx]*%s*|)',
		['count'] = 0,
		['label'] = 'harv templates'
		},
	['sfn_templates'] = {														-- count sfn templates
		['func'] = count_pattern,
		['pattern'] = '({{%s*[Ss]fn[mp]?%s*|)',
		['count'] = 0,
		['label'] = 'sfn templates'
		},
	['rp_templates'] = {														-- count rp templates
		['func'] = count_pattern,
		['pattern'] = '({{%s*[Rr]p%s*|)',
		['count'] = 0,
		['label'] = 'rp templates'
		},
	['ldr'] = {																	-- does this article use list defined references?
		['func'] = has_ldr,
		['pattern'] = '',														-- uses multiple patterns which are defined in the function
		['count'] = 'no',
		['label'] = 'uses ldr'
		},
	['refbegin_templates'] = {													-- count refbegin templates - bibliography lists
		['func'] = count_pattern,
		['pattern'] = '({{%s*[Rr]efbegin)',
		['count'] = 0,
		['label'] = 'refbegin templates'
		},
	['cleanup_templates'] = {													-- count cleanup templates
		['func'] = count_cleanup,
		['pattern'] = '({{%%s*%s)',												-- will be modified in the func by string.format()
		['count'] = 0,
		['label'] = 'cleanup templates'
		},
	['dead_link_templates'] = {													-- count deadlink templates (includes redirects)
		['func'] = count_dead_links,
		['pattern'] = '({{%%s*%s%%s*|)',										-- will be modified in the func by string.format()
		['count'] = 0,
		['label'] = 'dead link templates'
		},
	}
																				-- here we set the order in which the objects are processed
local order = {'unnamed_refs', 'named_refs', 'self_closed_refs',				-- these three are always output
	'r_templates',																-- this and the others only produce output when ...
	'refn_templates',															-- ... their count is not 0 or not 'no'
	'bare_url_refs',
	'ext_link_refs',
	'cs1_refs',
	'cs1_templates',
	'cs1_like_refs',
	'cs1_like_templates',
	'cs2_refs', 'cs2_templates',
	'vcite_refs', 'vcite_templates',
	'wikicite_templates',
	'harv_refs', 'harv_templates',
	'sfn_templates',
	'rp_templates',
	'ldr',
	'refbegin_templates',
	'cleanup_templates',
	'dead_link_templates'
	};


--[[--------------------------< P . R E F _ I N F O >----------------------------------------------------------

the working part of Template:Ref info

]]


function p.ref_info(frame)
	local text;																	-- unparsed page content
	local title;																-- page title without namespace or interwiki references
	local nstitle;																-- page title with namespace and interwiki references
	local page_title_object;													-- 
	local output = {};
	local i=1;
	local style = frame.args.style or '';										-- styling css for output table
	
	if frame.args[1] then
		page_title_object = mw.title.new(frame.args[1]);						-- title object for the page specified in the template call
	else
		page_title_object = mw.title.getCurrentTitle();							-- title object for the current page
	end

	text = page_title_object:getContent();										-- the unparsed content of the selected page
	nstitle = page_title_object.prefixedText;									-- the title of the page (with namespace)
	title = page_title_object.text;												-- the title of the page (without namespace)

	if nil == text then
		return string.format ('<span style="font-size:100%%" class="error">{{ref info}} – page is empty or does not exist: %s</span>', frame.args[1] or 'no page');
	end

	while order[i] do															-- loop through order and search for the related objects
		object = order[i];														-- the selected object
		objects[object].count = objects[object].func (text, objects[object].pattern)	-- do the search and store the result
		i=i+1;																	-- bump to the next object
	end
																				-- for those that count duplicates remove the duplicates from the counts
	objects['named_refs'].count = objects['named_refs'].count - objects['self_closed_refs'].count;
	objects['cs1_like_refs'].count = objects['cs1_like_refs'].count - objects['cs1_refs'].count;
	objects['cs1_like_templates'].count = objects['cs1_like_templates'].count - objects['cs1_templates'].count;

	table.insert (output, string.format ('{| class="wikitable" style="text-align:right; %s"\n|+reference info for [[%s|%s]]', style, nstitle, title));	-- output table header

	i=1;																		-- reset the indexer
	while order[i] do															-- loop through order and output from the related objects
		object = order[i];														-- the selected object
		if i<=3 then															-- first three (reference tags) are always output
			table.insert (output, string.format ('%s\n|%s', objects[object].label, objects[object].count));
		elseif 'string' == type (objects[object].count) then					-- objects[object].count can be a string or a number
			if 'no' ~= objects[object].count then								-- if a string and not 'no' ...
				table.insert (output, string.format ('%s\n|%s', objects[object].label, objects[object].count));	-- output the result
			end
		elseif 'number' == type (objects[object].count) then					-- if a number ...
		 	if 0 < objects[object].count then									-- ... and count is greater than zero ...
				table.insert (output, string.format ('%s\n|%s', objects[object].label, objects[object].count));	-- ... output the result
			end
		end
		i=i+1;																	-- bump the indexer
	end
	local result = table.concat (output,'\n|-\n! scope="row" | ');
	return result .. '\n|}'
end

return p