Changes

Jump to: navigation, search

Module:Citation/CS1/Utilities

10,686 bytes added, 2 years ago
update per RfC;
local u = {}
 
local z = {
error_categories error_cats_t = {}; -- for categorizing citations that contain errors error_ids error_ids_t = {}; -- list of error identifiers; used to prevent duplication of certain errors; local to this module message_tail error_msgs_t = {}; -- sequence table of error messages maintenance_cats maint_cats_t = {}; -- for categorizing citations that aren't erroneous per se, but could use a little work properties_cats prop_cats_t = {}; -- for categorizing citations based on certain properties, language of source for instance prop_keys_t = {}; -- for adding classes to the citation's <cite> tag
};
]]
local function is_set( var )
return not (var == nil or var == '');
end
]]
local function in_array( needle, haystack )
if needle == nil then
return false;
end
for n,v in ipairs( haystack ) do
if v == needle then
return n;
end
return false;
end
 
 
--[[--------------------------< H A S _ A C C E P T _ A S _ W R I T T E N >------------------------------------
 
When <str> is wholly wrapped in accept-as-written markup, return <str> without markup and true; return <str> and false else
 
with allow_empty = false, <str> must have at least one character inside the markup
with allow_empty = true, <str> the markup frame can be empty like (()) to distinguish an empty template parameter from the specific condition "has no applicable value" in citation-context.
 
After further evaluation the two cases might be merged at a later stage, but should be kept separated for now.
 
]]
 
local function has_accept_as_written (str, allow_empty)
if not is_set (str) then
return str, false;
end
 
local count;
 
if true == allow_empty then
str, count = str:gsub ('^%(%((.*)%)%)$', '%1'); -- allows (()) to be an empty set
else
str, count = str:gsub ('^%(%((.+)%)%)$', '%1');
end
return str, 0 ~= count;
end
--[[--------------------------< S U B S T I T U T E >----------------------------------------------------------
Populates numbered arguments in a message string using an argument table. <args> may be a single string or asequence table of multiple strings.
]]
local function substitute( msg, args ) return args and mw.message.newRawMessage( msg, args ):plain() or msg;
end
--[[--------------------------< E R R O R _ C O M M E N T >----------------------------------------------------
Wraps error messages with css CSS markup according to the state of hidden. <content> may be a single string or asequence table of multiple strings.
]]
local function error_comment( content, hidden ) return substitute( hidden and cfg.presentation['hidden-error'] or cfg.presentation['visible-error'], content );end  --[[--------------------------< H Y P H E N _ T O _ D A S H >-------------------------------------------------- Converts a hyphen to a dash under certain conditions. The hyphen must separatelike items; unlike items are returned unmodified. These forms are modified: letter - letter (A - B) digit - digit (4-5) digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5) letterdigit - letterdigit (A1-A5) (an optional separator between letter and digit is supported – a.1-a.5 or a-1-a-5) digitletter - digitletter (5a - 5d) (an optional separator between letter and digit is supported – 5.a-5.d or 5-a-5-d) any other forms are returned unmodified. str may be a comma- or semicolon-separated list ]] local function hyphen_to_dash (str) if not is_set (str) then return str; end  local accept; -- boolean  str = str:gsub ("(%(%(.-%)%))", function(m) return m:gsub(",", ","):gsub(";", ";") end) -- replace commas and semicolons in accept-as-written markup with similar unicode characters so they'll be ignored during the split str = str:gsub ('&[nm]dash;', {['&ndash;'] = '–', ['&mdash;'] = '—'}); -- replace &mdash; and &ndash; entities with their characters; semicolon mucks up the text.split str = str:gsub ('&#45;', '-'); -- replace HTML numeric entity with hyphen character str = str:gsub ('&nbsp;', ' '); -- replace &nbsp; entity with generic keyboard space character local out = {}; local list = mw.text.split (str, '%s*[,;]%s*'); -- split str at comma or semicolon separators if there are any  for _, item in ipairs (list) do -- for each item in the list item, accept = has_accept_as_written (item); -- remove accept-this-as-written markup when it wraps all of item if not accept and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%-–—]%s*%w*[%.%-]?%w+$') then -- if a hyphenated range or has endash or emdash separators if item:match ('^%a+[%.%-]?%d+%s*%-%s*%a+[%.%-]?%d+$') or -- letterdigit hyphen letterdigit (optional separator between letter and digit) item:match ('^%d+[%.%-]?%a+%s*%-%s*%d+[%.%-]?%a+$') or -- digitletter hyphen digitletter (optional separator between digit and letter) item:match ('^%d+[%.%-]%d+%s*%-%s*%d+[%.%-]%d+$') or -- digit separator digit hyphen digit separator digit item:match ('^%d+%s*%-%s*%d+$') or -- digit hyphen digit item:match ('^%a+%s*%-%s*%a+$') then -- letter hyphen letter item = item:gsub ('(%w*[%.%-]?%w+)%s*%-%s*(%w*[%.%-]?%w+)', '%1–%2'); -- replace hyphen, remove extraneous space characters else item = mw.ustring.gsub (item, '%s*[–—]%s*', '–'); -- for endash or emdash separated ranges, replace em with en, remove extraneous whitespace end end table.insert (out, item); -- add the (possibly modified) item to the output table end  local temp_str = ''; -- concatenate the output table into a comma separated string temp_str, accept = has_accept_as_written (table.concat (out, ', ')); -- remove accept-this-as-written markup when it wraps all of concatenated out if accept then temp_str = has_accept_as_written (str); -- when global markup removed, return original str; do it this way to suppress boolean second return value return temp_str:gsub(",", ","):gsub(";", ";"); else return temp_str:gsub(",", ","):gsub(";", ";"); -- else, return assembled temp_str end
end
--[=[-------------------------< M A K E _ W I K I L I N K >----------------------------------------------------
Makes a wikilink; when bot both link and display text is provided, returns a wikilink in the form [[L|D]]; if onlylink is provided(or link and display are the same), returns a wikilink in the form [[L]]; if neither are provided or link is omitted, returns anempty string.
]=]
local function make_wikilink (link, display)
if not is_set (link) thenreturn '' end if is_set (display) and link ~= display then return table.concat ({'[[', link, '|', display, ']]'}); else return table.concat ({'[[', link, ']]'}); end
else
return table.concat ({'[[', link, ']]'});
end
end
--[[--------------------------< S E T _ M E S S A G E R R O R >--------------------------------------------------------------
Sets an error condition message using the ~/Configuration error_conditions{} table along with arguments supplied in the functioncall, inserts the resulting message in z.error_msgs_t{} sequence table, and returns the appropriate error message. The actual placement  <error_id> – key value for appropriate error handler in ~/Configuration error_conditions{} table <arguments> – may be a single string or a sequence table of multiple strings to be subsititued into error_conditions[error_id].message<raw> – boolean true – causes this function to return the error message not wrapped in visible-error, hidden-error span tag; returns error_conditions[error_id].hidden as a second return value does not add message to z.error_msgs_t sequence table false, nil – adds message wrapped in visible-error, hidden-error span tag to z.error_msgs_t returns the output error message wrapped in visible-error, hidden-error span tag; there isno second return value<prefix> – string to be prepended to <message> -- TODO: remove support for these unused(?) arguments?<suffix> – string to be appended to <message> TODO: change z.error_cats_t and z.maint_cats_t to have the responsibility of the calling function.form cat_name = true? this to avoid dups without having to have an extra table
]]
local added_maint_cats = {} -- list of maintenance categories that have been added to z.maint_cats_t; TODO: figure out how to delete this table local function set_errorset_message ( error_id, arguments, raw, prefix, suffix ) local error_state = cfg.error_conditions[ error_id ];
prefix = prefix or ""''; suffix = suffix or ""'';
if error_state == nil then
error( cfg.messages['undefined_error'] .. ': ' .. error_id); -- because missing error handler in Module:Citation/CS1/Configuration  elseif is_set( error_state.category ) then if error_state.message then -- when error_state.message defined, this is an error message table.insert( z.error_categorieserror_cats_t, error_state.category ); else if not added_maint_cats[error_id] then added_maint_cats[error_id] = true; -- note that we've added this category table.insert (z.maint_cats_t, substitute (error_state.category, arguments)); -- make cat name then add to table end return; -- because no message, nothing more to do end
end
local message = substitute( error_state.message, arguments );
message = table.concat (
});
-- message = table.concat ({message, ' (', substitute (cfg.presentation['wikilink'], -- {cfgz.messageserror_ids_t['help page link'error_id] .. '#' .. error_state.anchor, cfg.messages['help page label']}), ')'})= true;-- message = message .. " ([[" .. cfg if z.messageserror_ids_t['help page linkerr_citation_missing_title'] .. and -- "#" .. error_state.anchor .. "|" ..if missing-- cfg.messages['help page label'] .. "]])";title error already noted z.error_ids[ error_id ] = true; if in_array( error_id, { 'bare_url_missing_titleerr_bare_url_missing_title', 'trans_missing_titleerr_trans_missing_title' } )then -- and this error is one of these and z.error_ids['citation_missing_title'] then return '', false; -- don't bother because one flavor of missing title is sufficient
end
message = table.concat({ prefix, message, suffix }); if raw true == true raw then return message, error_state.hidden; -- return message not wrapped in visible-error, hidden-error span tag
end
return message = error_comment( message, error_state.hidden ); -- wrap message in visible-error, hidden-error span tag table.insert (z.error_msgs_t, message); -- add it to the messages sequence table return message; -- and done; return value generally not used but is used as a flag in various functions of ~/Identifiers
end
alias – one of the list of possible aliases in the aliases lists from Module:Citation/CS1/Configuration
index – for enumerated parameters, identifies which one
enumerated – true/false flag used to choose how enumerated aliases are examined
value – value associated with an alias that has previously been selected; nil if not yet selected
selected – the alias that has previously been selected; nil if not yet selected
end
if is_set(args[alias]) then -- alias is in the template's argument list
if value ~= nil and selected ~= alias then -- if we have already selected one of the aliases
local skip;
for _, v in ipairs(error_list) do -- spin through the error list to see if we've added this alias
if v == alias then
skip = true;
end
if not skip then -- has not been added so
table.insert( error_list, alias ); -- add error alias to the error list
end
else
--[[--------------------------< A D D _ M A I N T _ C A T >------------------------------------------------------
Adds a category to z.maintenance_cats maint_cats_t using names from the configuration file with additional text if any.To prevent duplication, the added_maint_cats table lists the categories by key that have been added to z.maintenance_catsmaint_cats_t.
]]
local added_maint_cats = {} -- list of maintenance categories that have been added to z.maintenance_cats
local function add_maint_cat (key, arguments)
if not added_maint_cats [key] then
added_maint_cats [key] = true; -- note that we've added this category
table.insert( z.maintenance_catsmaint_cats_t, substitute (cfg.maint_cats [key], arguments)); -- make name then add to table endend  --[[--------------------------< A D D _ P R O P _ C A T >-------------------------------------------------------- Adds a category to z.prop_cats_t using names from the configuration file with additional text if any. foreign_lang_source and foreign_lang_source_2 keys have a language code appended to them so that multiple languagesmay be categorized but multiples of the same language are not categorized. added_prop_cats is a table declared in page scope variables above ]] local added_prop_cats = {}; -- list of property categories that have been added to z.prop_cats_t local function add_prop_cat (key, arguments, key_modifier) local key_modified = key .. ((key_modifier and key_modifier) or ''); -- modify <key> with <key_modifier> if present and not nil if not added_prop_cats [key_modified] then added_prop_cats [key_modified] = true; -- note that we've added this category table.insert (z.prop_cats_t, substitute (cfg.prop_cats [key], arguments)); -- make name then add to table table.insert (z.prop_keys_t, 'cs1-prop-' .. key); -- convert key to class for use in the citation's <cite> tag
end
end
]]
local function safe_for_italics( str ) if not is_set(str) then return str;end  else if str:sub(1,1) == "'" then str = "<span></span>" .. str; end if str:sub(-1,-1) == "'" then str = str .. "<span></span>"; end -- Remove newlines as they break italics. return str:gsub( '\n', ' ' ); end -- Remove newlines as they break italics.
end
local function wrap_style (key, str)
if not is_set( str ) then
return "";
elseif in_array( key, { 'italic-title', 'trans-italic-title' } ) then str = safe_for_italics( str );
end
return substitute( cfg.presentation[key], {str} );end  --[[--------------------------< M A K E _ S E P _ L I S T >------------------------------------------------------------ make a separated list of items using provided separators. <sep_list> - typically '<comma><space>' <sep_list_pair> - typically '<space>and<space>' <sep_list_end> - typically '<comma><space>and<space>' or '<comma><space>&<space>' defaults to cfg.presentation['sep_list'], cfg.presentation['sep_list_pair'], and cfg.presentation['sep_list_end']if <sep_list_end> is specified, <sep_list> and <sep_list_pair> must also be supplied ]] local function make_sep_list (count, list_seq, sep_list, sep_list_pair, sep_list_end) local list = '';  if not sep_list then -- set the defaults sep_list = cfg.presentation['sep_list']; sep_list_pair = cfg.presentation['sep_list_pair']; sep_list_end = cfg.presentation['sep_list_end']; end if 2 >= count then list = table.concat (list_seq, sep_list_pair); -- insert separator between two items; returns list_seq[1] then only one item elseif 2 < count then list = table.concat (list_seq, sep_list, 1, count - 1); -- concatenate all but last item with plain list separator list = table.concat ({list, list_seq[count]}, sep_list_end); -- concatenate last item onto end of <list> with final separator end return list;
end
]]
local function select_one( args, aliases_list, error_condition, index )
local value = nil; -- the value assigned to the selected parameter
local selected = ''; -- the name of the parameter we have chosen
if index ~= nil then index = tostring(index); end
for _, alias in ipairs( aliases_list ) do -- for each alias in the aliases list
if alias:match ('#') then -- if this alias can be enumerated
if '1' == index then -- when index is 1 test for enumerated and non-enumerated aliases
value, selected = is_alias_used (args, alias, index, false, value, selected, error_list); -- first test for non-enumerated alias
end
value, selected = is_alias_used (args, alias, index, true, value, selected, error_list); -- test for enumerated alias
else
value, selected = is_alias_used (args, alias, index, false, value, selected, error_list); --test for non-enumerated alias
end
end
if #error_list > 0 and 'none' ~= error_condition then -- for cases where this code is used outside of extract_names()
local error_str = ""; for _i, k v in ipairs( error_list ) do if error_str ~= "" then error_str = error_str .. cfg.messageserror_list['parameter-separator'i] end error_str = error_str .. wrap_style ('parameter', kv); end if #error_list > 1 then error_str = error_str .. cfg.messages['parameter-final-separator']; else error_str = error_str .. cfg.messages['parameter-pair-separator'];
end
error_str = error_str .table. insert (error_list, wrap_style ('parameter', selected)); table.insertset_message ( z.message_tailerror_condition, { set_errormake_sep_list ( error_condition, {error_str}#error_list, true error_list) } );
end
The str:gsub() returns either A|B froma [[A|B]] or B from [[B]] or B from B (no wikilink markup).
In l(), l:gsub() removes the link and pipe (if they exist); the second :gsub() trims white space whitespace from the label
if str was wrapped in wikilink markup. Presumably, this is because without wikimarkup in str, there is no match
in the initial gsub, the replacement function l() doesn't get called.
local function remove_wiki_link (str)
return (str:gsub( "%[%[([^%[%]]*)%]%]", function(l) return l:gsub( "^[^|]*|(.*)$", "%1" ):gsub("^%s*(.-)%s*$", "%1");
end));
end
--[=[-------------------------< I S _ W I K I L I N K >--------------------------------------------------------
Determines if str is a wikilink, extracts, and returns the the wikilink type, link text, and display text parts.
If str is a complex wikilink ([[L|D]]):
returns wl_type 2 and D and L from [[L|D]];
returns wl_type 0, str as D, and L as empty string.
trims leading and trailing white space whitespace and pipes from L and D ([[L|]] and [[|D]] are accepted by MediaWiki and
treated like [[D]]; while [[|D|]] is not accepted by MediaWiki, here, we accept it and return D without the pipes).
local wl_type = 2; -- assume that str is a complex wikilink [[L|D]]
if not str:match ('^%[%[[^%]]+%]%]$') then -- is str some sort of a wikilink (must have some sort of content) return 0, str, ''; -- not a wikilink; return wl_type as 0, str as D, and empty string as L end L, D = str:match ('^%[%[([^|]+)|([^%]]+)%]%]$'); -- get L and D from [[L|D]]
if not is_set (D) then -- if no separate linkdisplay D = str:match ('^%[%[([^%]]*)|*%]%]$'); -- get D from [[D]] or [[D|]]
wl_type = 1;
end
if not is_set (D) then -- no wikilink markup
D = str; -- return the string as D
wl_type = 0; -- but say that it is not a wikilink
end
D = mw.text.trim (D, '%s|'); -- trim white space and pipe characters
L = L and mw.text.trim (L, '%s|');
return wl_type, D, L or '';
end
 
 
--[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >--------------------------------
 
Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.
This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to
markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.
 
Returns the argument without wiki markup and a number; the number is more-or-less meaningless except as a flag
to indicate that markup was replaced; do not rely on it as an indicator of how many of any kind of markup was
removed; returns the argument and nil when no markup removed
 
]]
 
local function strip_apostrophe_markup (argument)
if not is_set (argument) then
return argument, nil; -- no argument, nothing to do
end
 
if nil == argument:find ( "''", 1, true ) then -- Is there at least one double apostrophe? If not, exit.
return argument, nil;
end
 
local flag;
while true do
if argument:find ("'''''", 1, true) then -- bold italic (5)
argument, flag = argument:gsub ("%'%'%'%'%'", ""); -- remove all instances of it
elseif argument:find ("''''", 1, true) then -- italic start and end without content (4)
argument, flag=argument:gsub ("%'%'%'%'", "");
elseif argument:find ("'''", 1, true) then -- bold (3)
argument, flag=argument:gsub ("%'%'%'", "");
elseif argument:find ("''", 1, true) then -- italic (2)
argument, flag = argument:gsub ("%'%'", "");
else
break;
end
end
 
return argument, flag; -- done
end
--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]
return { add_maint_cat = add_maint_cat, -- return exported functions and tables add_prop_cat = add_prop_cat, error_comment = error_comment, has_accept_as_written = has_accept_as_written, hyphen_to_dash = hyphen_to_dash, in_array = in_array,
is_set = is_set,
in_array = in_array,
substitute = substitute,
error_comment = error_comment,
set_error = set_error,
select_one = select_one,
add_maint_cat = add_maint_cat,
wrap_style = wrap_style,
safe_for_italics = safe_for_italics,
remove_wiki_link = remove_wiki_link,
is_wikilink = is_wikilink,
make_sep_list = make_sep_list,
make_wikilink = make_wikilink,
remove_wiki_link = remove_wiki_link,
safe_for_italics = safe_for_italics,
select_one = select_one,
set_message = set_message,
set_selected_modules = set_selected_modules,
strip_apostrophe_markup = strip_apostrophe_markup, substitute = substitute, wrap_style = wrap_style,  z = z, -- exported table
}

Navigation menu