Latest revision |
Your text |
Line 1: |
Line 1: |
− | --[[
| |
− |
| |
− | This module is intended to provide access to basic string functions.
| |
− |
| |
− | Most of the functions provided here can be invoked with named parameters,
| |
− | unnamed parameters, or a mixture. If named parameters are used, Mediawiki will
| |
− | automatically remove any leading or trailing whitespace from the parameter.
| |
− | Depending on the intended use, it may be advantageous to either preserve or
| |
− | remove such whitespace.
| |
− |
| |
− | Global options
| |
− | ignore_errors: If set to 'true' or 1, any error condition will result in
| |
− | an empty string being returned rather than an error message.
| |
− |
| |
− | error_category: If an error occurs, specifies the name of a category to
| |
− | include with the error message. The default category is
| |
− | [Category:Errors reported by Module String].
| |
− |
| |
− | no_category: If set to 'true' or 1, no category will be added if an error
| |
− | is generated.
| |
− |
| |
− | Unit tests for this module are available at Module:String/tests.
| |
− | ]]
| |
− |
| |
| local str = {} | | local str = {} |
| | | |
− | --[[
| |
− | len
| |
− |
| |
− | This function returns the length of the target string.
| |
− |
| |
− | Usage:
| |
− | {{#invoke:String|len|target_string|}}
| |
− | OR
| |
− | {{#invoke:String|len|s=target_string}}
| |
− |
| |
− | Parameters
| |
− | s: The string whose length to report
| |
− |
| |
− | If invoked using named parameters, Mediawiki will automatically remove any leading or
| |
− | trailing whitespace from the target string.
| |
− | ]]
| |
| function str.len( frame ) | | function str.len( frame ) |
− | local new_args = str._getParameters( frame.args, {'s'} );
| + | return mw.ustring.len( frame.args.s ) |
− | local s = new_args['s'] or '';
| |
− | return mw.ustring.len( s ) | |
| end | | end |
| | | |
− | --[[
| |
− | sub
| |
− |
| |
− | This function returns a substring of the target string at specified indices.
| |
− |
| |
− | Usage:
| |
− | {{#invoke:String|sub|target_string|start_index|end_index}}
| |
− | OR
| |
− | {{#invoke:String|sub|s=target_string|i=start_index|j=end_index}}
| |
− |
| |
− | Parameters
| |
− | s: The string to return a subset of
| |
− | i: The fist index of the substring to return, defaults to 1.
| |
− | j: The last index of the string to return, defaults to the last character.
| |
− |
| |
− | The first character of the string is assigned an index of 1. If either i or j
| |
− | is a negative value, it is interpreted the same as selecting a character by
| |
− | counting from the end of the string. Hence, a value of -1 is the same as
| |
− | selecting the last character of the string.
| |
− |
| |
− | If the requested indices are out of range for the given string, an error is
| |
− | reported.
| |
− | ]]
| |
| function str.sub( frame ) | | function str.sub( frame ) |
− | local new_args = str._getParameters( frame.args, { 's', 'i', 'j' } ); | + | return mw.ustring.sub( frame.args.s, tonumber( frame.args.i ), tonumber( frame.args.j ) ) |
− | local s = new_args['s'] or '';
| |
− | local i = tonumber( new_args['i'] ) or 1;
| |
− | local j = tonumber( new_args['j'] ) or -1;
| |
− |
| |
− | local len = mw.ustring.len( s );
| |
− | | |
− | -- Convert negatives for range checking
| |
− | if i < 0 then
| |
− | i = len + i + 1;
| |
− | end
| |
− | if j < 0 then
| |
− | j = len + j + 1;
| |
− | end
| |
− |
| |
− | if i > len or j > len or i < 1 or j < 1 then
| |
− | return str._error( 'String subset index out of range' );
| |
− | end
| |
− | if j < i then
| |
− | return str._error( 'String subset indices out of order' );
| |
− | end
| |
− |
| |
− | return mw.ustring.sub( s, i, j )
| |
− | end
| |
− | | |
− | --[[
| |
− | This function implements that features of {{str sub old}} and is kept in order
| |
− | to maintain these older templates.
| |
− | ]]
| |
− | function str.sublength( frame )
| |
− | local i = tonumber( frame.args.i ) or 0
| |
− | local len = tonumber( frame.args.len )
| |
− | return mw.ustring.sub( frame.args.s, i + 1, len and ( i + len ) )
| |
| end | | end |
| | | |
− | --[[
| |
− | match
| |
− |
| |
− | This function returns a substring from the source string that matches a
| |
− | specified pattern.
| |
− |
| |
− | Usage:
| |
− | {{#invoke:String|match|source_string|pattern_string|start_index|match_number|plain_flag|nomatch_output}}
| |
− | OR
| |
− | {{#invoke:String|pos|s=source_string|pattern=pattern_string|start=start_index
| |
− | |match=match_number|plain=plain_flag|nomatch=nomatch_output}}
| |
− |
| |
− | Parameters
| |
− | s: The string to search
| |
− | pattern: The pattern or string to find within the string
| |
− | start: The index within the source string to start the search. The first
| |
− | character of the string has index 1. Defaults to 1.
| |
− | match: In some cases it may be possible to make multiple matches on a single
| |
− | string. This specifies which match to return, where the first match is
| |
− | match= 1. If a negative number is specified then a match is returned
| |
− | counting from the last match. Hence match = -1 is the same as requesting
| |
− | the last match. Defaults to 1.
| |
− | plain: A flag indicating that the pattern should be understood as plain
| |
− | text. Defaults to false.
| |
− | nomatch: If no match is found, output the "nomatch" value rather than an error.
| |
− |
| |
− | If invoked using named parameters, Mediawiki will automatically remove any leading or
| |
− | trailing whitespace from each string. In some circumstances this is desirable, in
| |
− | other cases one may want to preserve the whitespace.
| |
− |
| |
− | If the match_number or start_index are out of range for the string being queried, then
| |
− | this function generates an error. An error is also generated if no match is found.
| |
− | If one adds the parameter ignore_errors=true, then the error will be suppressed and
| |
− | an empty string will be returned on any failure.
| |
− |
| |
− | For information on constructing Lua patterns, a form of [regular expression], see:
| |
− |
| |
− | * http://www.lua.org/manual/5.1/manual.html#5.4.1
| |
− | * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns
| |
− | * http://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns
| |
− |
| |
− | ]]
| |
| function str.match( frame ) | | function str.match( frame ) |
− | local new_args = str._getParameters( frame.args, {'s', 'pattern', 'start', 'match', 'plain', 'nomatch'} ); | + | return mw.ustring.match( frame.args.s, frame.args.pattern, tonumber( frame.args.i ) ) |
− | local s = new_args['s'] or '';
| |
− | local start = tonumber( new_args['start'] ) or 1;
| |
− | local plain_flag = str._getBoolean( new_args['plain'] or false );
| |
− | local pattern = new_args['pattern'] or '';
| |
− | local match_index = math.floor( tonumber(new_args['match']) or 1 );
| |
− | local nomatch = new_args['nomatch'];
| |
− |
| |
− | if s == '' then
| |
− | return str._error( 'Target string is empty' );
| |
− | end
| |
− | if pattern == '' then
| |
− | return str._error( 'Pattern string is empty' );
| |
− | end
| |
− | if math.abs(start) < 1 or math.abs(start) > mw.ustring.len( s ) then
| |
− | return str._error( 'Requested start is out of range' );
| |
− | end
| |
− | if match_index == 0 then
| |
− | return str._error( 'Match index is out of range' );
| |
− | end
| |
− | if plain_flag then
| |
− | pattern = str._escapePattern( pattern );
| |
− | end
| |
− |
| |
− | local result
| |
− | if match_index == 1 then
| |
− | -- Find first match is simple case
| |
− | result = mw.ustring.match( s, pattern, start )
| |
− | else
| |
− | if start > 1 then
| |
− | s = mw.ustring.sub( s, start );
| |
− | end
| |
− |
| |
− | local iterator = mw.ustring.gmatch(s, pattern);
| |
− | if match_index > 0 then
| |
− | -- Forward search
| |
− | for w in iterator do
| |
− | match_index = match_index - 1;
| |
− | if match_index == 0 then
| |
− | result = w;
| |
− | break;
| |
− | end
| |
− | end
| |
− | else
| |
− | -- Reverse search
| |
− | local result_table = {};
| |
− | local count = 1;
| |
− | for w in iterator do
| |
− | result_table[count] = w;
| |
− | count = count + 1;
| |
− | end
| |
− |
| |
− | result = result_table[ count + match_index ];
| |
− | end
| |
− | end
| |
− |
| |
− | if result == nil then
| |
− | if nomatch == nil then
| |
− | return str._error( 'Match not found' );
| |
− | else
| |
− | return nomatch;
| |
− | end
| |
− | else
| |
− | return result;
| |
− | end
| |
− | end
| |
− | | |
− | --[[
| |
− | pos
| |
− | | |
− | This function returns a single character from the target string at position pos.
| |
− | | |
− | Usage:
| |
− | {{#invoke:String|pos|target_string|index_value}}
| |
− | OR
| |
− | {{#invoke:String|pos|target=target_string|pos=index_value}}
| |
− | | |
− | Parameters
| |
− | target: The string to search
| |
− | pos: The index for the character to return
| |
− | | |
− | If invoked using named parameters, Mediawiki will automatically remove any leading or
| |
− | trailing whitespace from the target string. In some circumstances this is desirable, in
| |
− | other cases one may want to preserve the whitespace.
| |
− | | |
− | The first character has an index value of 1.
| |
− | | |
− | If one requests a negative value, this function will select a character by counting backwards
| |
− | from the end of the string. In other words pos = -1 is the same as asking for the last character.
| |
− | | |
− | A requested value of zero, or a value greater than the length of the string returns an error.
| |
− | ]]
| |
− | function str.pos( frame )
| |
− | local new_args = str._getParameters( frame.args, {'target', 'pos'} );
| |
− | local target_str = new_args['target'] or '';
| |
− | local pos = tonumber( new_args['pos'] ) or 0;
| |
− | | |
− | if pos == 0 or math.abs(pos) > mw.ustring.len( target_str ) then
| |
− | return str._error( 'String index out of range' );
| |
− | end
| |
− |
| |
− | return mw.ustring.sub( target_str, pos, pos );
| |
− | end
| |
− | | |
− | --[[
| |
− | str_find
| |
− | | |
− | This function duplicates the behavior of {{str_find}}, including all of its quirks.
| |
− | This is provided in order to support existing templates, but is NOT RECOMMENDED for
| |
− | new code and templates. New code is recommended to use the "find" function instead.
| |
− | | |
− | Returns the first index in "source" that is a match to "target". Indexing is 1-based,
| |
− | and the function returns -1 if the "target" string is not present in "source".
| |
− | | |
− | Important Note: If the "target" string is empty / missing, this function returns a
| |
− | value of "1", which is generally unexpected behavior, and must be accounted for
| |
− | separatetly.
| |
− | ]]
| |
− | function str.str_find( frame )
| |
− | local new_args = str._getParameters( frame.args, {'source', 'target'} );
| |
− | local source_str = new_args['source'] or '';
| |
− | local target_str = new_args['target'] or '';
| |
− | | |
− | if target_str == '' then
| |
− | return 1;
| |
− | end
| |
− |
| |
− | local start = mw.ustring.find( source_str, target_str, 1, true )
| |
− | if start == nil then
| |
− | start = -1
| |
− | end
| |
− |
| |
− | return start
| |
− | end
| |
− | | |
− | --[[
| |
− | find
| |
− | | |
− | This function allows one to search for a target string or pattern within another
| |
− | string.
| |
− | | |
− | Usage:
| |
− | {{#invoke:String|find|source_str|target_string|start_index|plain_flag}}
| |
− | OR
| |
− | {{#invoke:String|find|source=source_str|target=target_str|start=start_index|plain=plain_flag}}
| |
− | | |
− | Parameters
| |
− | source: The string to search
| |
− | target: The string or pattern to find within source
| |
− | start: The index within the source string to start the search, defaults to 1
| |
− | plain: Boolean flag indicating that target should be understood as plain
| |
− | text and not as a Lua style regular expression, defaults to true
| |
− | | |
− | If invoked using named parameters, Mediawiki will automatically remove any leading or
| |
− | trailing whitespace from the parameter. In some circumstances this is desirable, in
| |
− | other cases one may want to preserve the whitespace.
| |
− | | |
− | This function returns the first index >= "start" where "target" can be found
| |
− | within "source". Indices are 1-based. If "target" is not found, then this
| |
− | function returns 0. If either "source" or "target" are missing / empty, this
| |
− | function also returns 0.
| |
− | | |
− | This function should be safe for UTF-8 strings.
| |
− | ]]
| |
− | function str.find( frame )
| |
− | local new_args = str._getParameters( frame.args, {'source', 'target', 'start', 'plain' } );
| |
− | local source_str = new_args['source'] or '';
| |
− | local pattern = new_args['target'] or '';
| |
− | local start_pos = tonumber(new_args['start']) or 1;
| |
− | local plain = new_args['plain'] or true;
| |
− |
| |
− | if source_str == '' or pattern == '' then
| |
− | return 0;
| |
− | end
| |
− |
| |
− | plain = str._getBoolean( plain );
| |
− | | |
− | local start = mw.ustring.find( source_str, pattern, start_pos, plain )
| |
− | if start == nil then
| |
− | start = 0
| |
− | end
| |
− |
| |
− | return start
| |
− | end
| |
− | | |
− | --[[
| |
− | replace
| |
− | | |
− | This function allows one to replace a target string or pattern within another
| |
− | string.
| |
− | | |
− | Usage:
| |
− | {{#invoke:String|replace|source_str|pattern_string|replace_string|replacement_count|plain_flag}}
| |
− | OR
| |
− | {{#invoke:String|replace|source=source_string|pattern=pattern_string|replace=replace_string|
| |
− | count=replacement_count|plain=plain_flag}}
| |
− | | |
− | Parameters
| |
− | source: The string to search
| |
− | pattern: The string or pattern to find within source
| |
− | replace: The replacement text
| |
− | count: The number of occurences to replace, defaults to all.
| |
− | plain: Boolean flag indicating that pattern should be understood as plain
| |
− | text and not as a Lua style regular expression, defaults to true
| |
− | ]]
| |
− | function str.replace( frame )
| |
− | local new_args = str._getParameters( frame.args, {'source', 'pattern', 'replace', 'count', 'plain' } );
| |
− | local source_str = new_args['source'] or '';
| |
− | local pattern = new_args['pattern'] or '';
| |
− | local replace = new_args['replace'] or '';
| |
− | local count = tonumber( new_args['count'] );
| |
− | local plain = new_args['plain'] or true;
| |
− |
| |
− | if source_str == '' or pattern == '' then
| |
− | return source_str;
| |
− | end
| |
− | plain = str._getBoolean( plain );
| |
− | | |
− | if plain then
| |
− | pattern = str._escapePattern( pattern );
| |
− | replace = mw.ustring.gsub( replace, "%%", "%%%%" ); --Only need to escape replacement sequences.
| |
− | end
| |
− |
| |
− | local result;
| |
− | | |
− | if count ~= nil then
| |
− | result = mw.ustring.gsub( source_str, pattern, replace, count );
| |
− | else
| |
− | result = mw.ustring.gsub( source_str, pattern, replace );
| |
− | end
| |
− | | |
− | return result;
| |
− | end
| |
− | | |
− | --[[
| |
− | simple function to pipe string.rep to templates.
| |
− | ]]
| |
− | | |
− | function str.rep( frame )
| |
− | local repetitions = tonumber( frame.args[2] )
| |
− | if not repetitions then
| |
− | return str._error( 'function rep expects a number as second parameter, received "' .. ( frame.args[2] or '' ) .. '"' )
| |
− | end
| |
− | return string.rep( frame.args[1] or '', repetitions )
| |
− | end
| |
− | | |
− | --[[
| |
− | Helper function that populates the argument list given that user may need to use a mix of
| |
− | named and unnamed parameters. This is relevant because named parameters are not
| |
− | identical to unnamed parameters due to string trimming, and when dealing with strings
| |
− | we sometimes want to either preserve or remove that whitespace depending on the application.
| |
− | ]]
| |
− | function str._getParameters( frame_args, arg_list )
| |
− | local new_args = {};
| |
− | local index = 1;
| |
− | local value;
| |
− |
| |
− | for i,arg in ipairs( arg_list ) do
| |
− | value = frame_args[arg]
| |
− | if value == nil then
| |
− | value = frame_args[index];
| |
− | index = index + 1;
| |
− | end
| |
− | new_args[arg] = value;
| |
− | end
| |
− |
| |
− | return new_args;
| |
− | end
| |
− | | |
− | --[[
| |
− | Helper function to handle error messages.
| |
− | ]]
| |
− | function str._error( error_str )
| |
− | local frame = mw.getCurrentFrame();
| |
− | local error_category = frame.args.error_category or 'Errors reported by Module String';
| |
− | local ignore_errors = frame.args.ignore_errors or false;
| |
− | local no_category = frame.args.no_category or false;
| |
− |
| |
− | if str._getBoolean(ignore_errors) then
| |
− | return '';
| |
− | end
| |
− |
| |
− | local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>';
| |
− | if error_category ~= '' and not str._getBoolean( no_category ) then
| |
− | error_str = '[[Category:' .. error_category .. ']]' .. error_str;
| |
− | end
| |
− |
| |
− | return error_str;
| |
− | end
| |
− | | |
− | --[[
| |
− | Helper Function to interpret boolean strings
| |
− | ]]
| |
− | function str._getBoolean( boolean_str )
| |
− | local boolean_value;
| |
− |
| |
− | if type( boolean_str ) == 'string' then
| |
− | boolean_str = boolean_str:lower();
| |
− | if boolean_str == 'false' or boolean_str == 'no' or boolean_str == '0'
| |
− | or boolean_str == '' then
| |
− | boolean_value = false;
| |
− | else
| |
− | boolean_value = true;
| |
− | end
| |
− | elseif type( boolean_str ) == 'boolean' then
| |
− | boolean_value = boolean_str;
| |
− | else
| |
− | error( 'No boolean value found' );
| |
− | end
| |
− | return boolean_value
| |
− | end
| |
− | | |
− | --[[
| |
− | Helper function that escapes all pattern characters so that they will be treated
| |
− | as plain text.
| |
− | ]]
| |
− | function str._escapePattern( pattern_str )
| |
− | return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" );
| |
| end | | end |
| | | |
| return str | | return str |