class MatchData
MatchData
encapsulates the result of matching a Regexp
against string. It is returned by Regexp#match
and String#match
, and also stored in a global variable returned by Regexp.last_match
.
Usage:
url = 'https://docs.ruby-lang.org/en/2.5.0/MatchData.html' m = url.match(/(\d\.?)+/) # => #<MatchData "2.5.0" 1:"0"> m.string # => "https://docs.ruby-lang.org/en/2.5.0/MatchData.html" m.regexp # => /(\d\.?)+/ # entire matched substring: m[0] # => "2.5.0" # Working with unnamed captures m = url.match(%r{([^/]+)/([^/]+)\.html$}) m.captures # => ["2.5.0", "MatchData"] m[1] # => "2.5.0" m.values_at(1, 2) # => ["2.5.0", "MatchData"] # Working with named captures m = url.match(%r{(?<version>[^/]+)/(?<module>[^/]+)\.html$}) m.captures # => ["2.5.0", "MatchData"] m.named_captures # => {"version"=>"2.5.0", "module"=>"MatchData"} m[:version] # => "2.5.0" m.values_at(:version, :module) # => ["2.5.0", "MatchData"] # Numerical indexes are working, too m[1] # => "2.5.0" m.values_at(1, 2) # => ["2.5.0", "MatchData"]
Global variables equivalence¶ ↑
Parts of last MatchData
(returned by Regexp.last_match
) are also aliased as global variables:
-
$~
isRegexp.last_match
; -
$&
isRegexp.last_match
[ 0 ]
; -
$1
,$2
, and so on areRegexp.last_match
[ i ]
(captures by number); -
$`
isRegexp.last_match
.pre_match
; -
$'
isRegexp.last_match
.post_match
; -
$+
isRegexp.last_match
[ -1 ]
(the last capture).
See also “Special global variables” section in Regexp
documentation.
Public Instance Methods
Returns true
if object
is another MatchData object whose target string, regexp, match, and captures are the same as self
, false
otherwise.
When arguments index
, +start and length
, or range
are given, returns match and captures in the style of Array#[]
:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] # => "HX1138" m[1, 2] # => ["H", "X"] m[1..3] # => ["H", "X", "113"] m[-3, 2] # => ["X", "113"]
When string or symbol argument name
is given, returns the matched substring for the given name:
m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge") # => #<MatchData "hoge" foo:"h" bar:"ge"> m['foo'] # => "h" m[:bar] # => "ge"
If multiple captures have the same name, returns the last matched substring.
m = /(?<foo>.)(?<foo>.+)/.match("hoge") # => #<MatchData "hoge" foo:"h" foo:"oge"> m[:foo] #=> "oge" m = /\W(?<foo>.+)|\w(?<foo>.+)|(?<foo>.+)/.match("hoge") #<MatchData "hoge" foo:nil foo:"oge" foo:nil> m[:foo] #=> "oge"
static VALUE match_aref(int argc, VALUE *argv, VALUE match) { VALUE idx, length; match_check(match); rb_scan_args(argc, argv, "11", &idx, &length); if (NIL_P(length)) { if (FIXNUM_P(idx)) { return rb_reg_nth_match(FIX2INT(idx), match); } else { int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, idx); if (num >= 0) { return rb_reg_nth_match(num, match); } else { return match_ary_aref(match, idx, Qnil); } } } else { long beg = NUM2LONG(idx); long len = NUM2LONG(length); long num_regs = RMATCH_REGS(match)->num_regs; if (len < 0) { return Qnil; } if (beg < 0) { beg += num_regs; if (beg < 0) return Qnil; } else if (beg > num_regs) { return Qnil; } if (beg+len > num_regs) { len = num_regs - beg; } return match_ary_subseq(match, beg, len, Qnil); } }
Returns the offset (in characters) of the beginning of the specified match.
When non-negative integer argument n
is given, returns the offset of the beginning of the n
th match:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] # => "HX1138" m.begin(0) # => 1 m[3] # => "113" m.begin(3) # => 3 m = /(т)(е)(с)/.match('тест') # => #<MatchData "тес" 1:"т" 2:"е" 3:"с"> m[0] # => "тес" m.begin(0) # => 0 m[3] # => "с" m.begin(3) # => 2
When string or symbol argument name
is given, returns the offset of the beginning for the named match:
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") # => #<MatchData "hog" foo:"h" bar:"g"> m[:foo] # => "h" m.begin('foo') # => 0 m[:bar] # => "g" m.begin(:bar) # => 2
Related: MatchData#end
, MatchData#offset
, MatchData#byteoffset
.
static VALUE match_begin(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); match_check(match); backref_number_check(regs, i); if (BEG(i) < 0) return Qnil; update_char_offset(match); return LONG2NUM(RMATCH_EXT(match)->char_offset[i].beg); }
Returns a two-element array containing the beginning and ending byte-based offsets of the nth match. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.byteoffset(0) #=> [1, 7] m.byteoffset(4) #=> [6, 7] m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") p m.byteoffset(:foo) #=> [0, 1] p m.byteoffset(:bar) #=> [2, 3]
static VALUE match_byteoffset(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); match_check(match); backref_number_check(regs, i); if (BEG(i) < 0) return rb_assoc_new(Qnil, Qnil); return rb_assoc_new(LONG2NUM(BEG(i)), LONG2NUM(END(i))); }
Returns the array of captures, which are all matches except m[0]
:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] # => "HX1138" m.captures # => ["H", "X", "113", "8"]
Related: MatchData.to_a
.
static VALUE match_captures(VALUE match) { return match_array(match, 1); }
Returns a hash of the named captures for the given names.
m = /(?<hours>\d{2}):(?<minutes>\d{2}):(?<seconds>\d{2})/.match("18:37:22") m.deconstruct_keys([:hours, :minutes]) # => {:hours => "18", :minutes => "37"} m.deconstruct_keys(nil) # => {:hours => "18", :minutes => "37", :seconds => "22"}
Returns an empty hash if no named captures were defined:
m = /(\d{2}):(\d{2}):(\d{2})/.match("18:37:22") m.deconstruct_keys(nil) # => {}
static VALUE match_deconstruct_keys(VALUE match, VALUE keys) { VALUE h; long i; match_check(match); if (NIL_P(RMATCH(match)->regexp)) { return rb_hash_new_with_size(0); } if (NIL_P(keys)) { h = rb_hash_new_with_size(onig_number_of_names(RREGEXP_PTR(RMATCH(match)->regexp))); struct MEMO *memo; memo = MEMO_NEW(h, match, 1); onig_foreach_name(RREGEXP_PTR(RMATCH(match)->regexp), match_named_captures_iter, (void*)memo); return h; } Check_Type(keys, T_ARRAY); if (onig_number_of_names(RREGEXP_PTR(RMATCH(match)->regexp)) < RARRAY_LEN(keys)) { return rb_hash_new_with_size(0); } h = rb_hash_new_with_size(RARRAY_LEN(keys)); for (i=0; i<RARRAY_LEN(keys); i++) { VALUE key = RARRAY_AREF(keys, i); VALUE name; Check_Type(key, T_SYMBOL); name = rb_sym2str(key); int num = NAME_TO_NUMBER(RMATCH_REGS(match), RMATCH(match)->regexp, RMATCH(match)->regexp, RSTRING_PTR(name), RSTRING_END(name)); if (num >= 0) { rb_hash_aset(h, key, rb_reg_nth_match(num, match)); } else { return h; } } return h; }
Returns the offset (in characters) of the end of the specified match.
When non-negative integer argument n
is given, returns the offset of the end of the n
th match:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] # => "HX1138" m.end(0) # => 7 m[3] # => "113" m.end(3) # => 6 m = /(т)(е)(с)/.match('тест') # => #<MatchData "тес" 1:"т" 2:"е" 3:"с"> m[0] # => "тес" m.end(0) # => 3 m[3] # => "с" m.end(3) # => 3
When string or symbol argument name
is given, returns the offset of the end for the named match:
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") # => #<MatchData "hog" foo:"h" bar:"g"> m[:foo] # => "h" m.end('foo') # => 1 m[:bar] # => "g" m.end(:bar) # => 3
Related: MatchData#begin
, MatchData#offset
, MatchData#byteoffset
.
static VALUE match_end(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); match_check(match); backref_number_check(regs, i); if (BEG(i) < 0) return Qnil; update_char_offset(match); return LONG2NUM(RMATCH_EXT(match)->char_offset[i].end); }
Returns the integer hash value for self
, based on the target string, regexp, match, and captures.
See also Object#hash
.
static VALUE match_hash(VALUE match) { const struct re_registers *regs; st_index_t hashval; match_check(match); hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str)); hashval = rb_hash_uint(hashval, reg_hash(match_regexp(match))); regs = RMATCH_REGS(match); hashval = rb_hash_uint(hashval, regs->num_regs); hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg))); hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end))); hashval = rb_hash_end(hashval); return ST2FIX(hashval); }
Returns a string representation of self
:
m = /.$/.match("foo") # => #<MatchData "o"> m.inspect # => "#<MatchData \"o\">" m = /(.)(.)(.)/.match("foo") # => #<MatchData "foo" 1:"f" 2:"o" 3:"o"> m.inspect # => "#<MatchData \"foo\" 1:\"f\" 2:\"o\ m = /(.)(.)?(.)/.match("fo") # => #<MatchData "fo" 1:"f" 2:nil 3:"o"> m.inspect # => "#<MatchData \"fo\" 1:\"f\" 2:nil 3:\"o\">"
Related: MatchData#to_s
.
static VALUE match_inspect(VALUE match) { VALUE cname = rb_class_path(rb_obj_class(match)); VALUE str; int i; struct re_registers *regs = RMATCH_REGS(match); int num_regs = regs->num_regs; struct backref_name_tag *names; VALUE regexp = RMATCH(match)->regexp; if (regexp == 0) { return rb_sprintf("#<%"PRIsVALUE":%p>", cname, (void*)match); } else if (NIL_P(regexp)) { return rb_sprintf("#<%"PRIsVALUE": %"PRIsVALUE">", cname, rb_reg_nth_match(0, match)); } names = ALLOCA_N(struct backref_name_tag, num_regs); MEMZERO(names, struct backref_name_tag, num_regs); onig_foreach_name(RREGEXP_PTR(regexp), match_inspect_name_iter, names); str = rb_str_buf_new2("#<"); rb_str_append(str, cname); for (i = 0; i < num_regs; i++) { VALUE v; rb_str_buf_cat2(str, " "); if (0 < i) { if (names[i].name) rb_str_buf_cat(str, (const char *)names[i].name, names[i].len); else { rb_str_catf(str, "%d", i); } rb_str_buf_cat2(str, ":"); } v = rb_reg_nth_match(i, match); if (NIL_P(v)) rb_str_buf_cat2(str, "nil"); else rb_str_buf_append(str, rb_str_inspect(v)); } rb_str_buf_cat2(str, ">"); return str; }
Returns the matched substring corresponding to the given argument.
When non-negative argument n
is given, returns the matched substring for the n
th match:
m = /(.)(.)(\d+)(\d)(\w)?/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8" 5:nil> m.match(0) # => "HX1138" m.match(4) # => "8" m.match(5) # => nil
When string or symbol argument name
is given, returns the matched substring for the given name:
m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge") # => #<MatchData "hoge" foo:"h" bar:"ge"> m.match('foo') # => "h" m.match(:bar) # => "ge"
static VALUE match_nth(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); backref_number_check(regs, i); long start = BEG(i), end = END(i); if (start < 0) return Qnil; return rb_str_subseq(RMATCH(match)->str, start, end - start); }
Returns the length (in characters) of the matched substring corresponding to the given argument.
When non-negative argument n
is given, returns the length of the matched substring for the n
th match:
m = /(.)(.)(\d+)(\d)(\w)?/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8" 5:nil> m.match_length(0) # => 6 m.match_length(4) # => 1 m.match_length(5) # => nil
When string or symbol argument name
is given, returns the length of the matched substring for the named match:
m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge") # => #<MatchData "hoge" foo:"h" bar:"ge"> m.match_length('foo') # => 1 m.match_length(:bar) # => 2
static VALUE match_nth_length(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); match_check(match); backref_number_check(regs, i); if (BEG(i) < 0) return Qnil; update_char_offset(match); const struct rmatch_offset *const ofs = &RMATCH_EXT(match)->char_offset[i]; return LONG2NUM(ofs->end - ofs->beg); }
Returns a hash of the named captures; each key is a capture name; each value is its captured string or nil
:
m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge") # => #<MatchData "hoge" foo:"h" bar:"ge"> m.named_captures # => {"foo"=>"h", "bar"=>"ge"} m = /(?<a>.)(?<b>.)/.match("01") # => #<MatchData "01" a:"0" b:"1"> m.named_captures #=> {"a" => "0", "b" => "1"} m = /(?<a>.)(?<b>.)?/.match("0") # => #<MatchData "0" a:"0" b:nil> m.named_captures #=> {"a" => "0", "b" => nil} m = /(?<a>.)(?<a>.)/.match("01") # => #<MatchData "01" a:"0" a:"1"> m.named_captures #=> {"a" => "1"}
If keyword argument symbolize_names
is given a true value, the keys in the resulting hash are Symbols:
m = /(?<a>.)(?<a>.)/.match("01") # => #<MatchData "01" a:"0" a:"1"> m.named_captures(symbolize_names: true) #=> {:a => "1"}
static VALUE match_named_captures(int argc, VALUE *argv, VALUE match) { VALUE hash; struct MEMO *memo; match_check(match); if (NIL_P(RMATCH(match)->regexp)) return rb_hash_new(); VALUE opt; VALUE symbolize_names = 0; rb_scan_args(argc, argv, "0:", &opt); if (!NIL_P(opt)) { static ID keyword_ids[1]; VALUE symbolize_names_val; if (!keyword_ids[0]) { keyword_ids[0] = rb_intern_const("symbolize_names"); } rb_get_kwargs(opt, keyword_ids, 0, 1, &symbolize_names_val); if (!UNDEF_P(symbolize_names_val) && RTEST(symbolize_names_val)) { symbolize_names = 1; } } hash = rb_hash_new(); memo = MEMO_NEW(hash, match, symbolize_names); onig_foreach_name(RREGEXP(RMATCH(match)->regexp)->ptr, match_named_captures_iter, (void*)memo); return hash; }
Returns an array of the capture names (see Named Captures):
m = /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge") # => #<MatchData "hog" foo:"h" bar:"o" baz:"g"> m.names # => ["foo", "bar", "baz"] m = /foo/.match('foo') # => #<MatchData "foo"> m.names # => [] # No named captures.
Equivalent to:
m = /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge") m.regexp.names # => ["foo", "bar", "baz"]
static VALUE match_names(VALUE match) { match_check(match); if (NIL_P(RMATCH(match)->regexp)) return rb_ary_new_capa(0); return rb_reg_names(RMATCH(match)->regexp); }
Returns a 2-element array containing the beginning and ending offsets (in characters) of the specified match.
When non-negative integer argument n
is given, returns the starting and ending offsets of the n
th match:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] # => "HX1138" m.offset(0) # => [1, 7] m[3] # => "113" m.offset(3) # => [3, 6] m = /(т)(е)(с)/.match('тест') # => #<MatchData "тес" 1:"т" 2:"е" 3:"с"> m[0] # => "тес" m.offset(0) # => [0, 3] m[3] # => "с" m.offset(3) # => [2, 3]
When string or symbol argument name
is given, returns the starting and ending offsets for the named match:
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") # => #<MatchData "hog" foo:"h" bar:"g"> m[:foo] # => "h" m.offset('foo') # => [0, 1] m[:bar] # => "g" m.offset(:bar) # => [2, 3]
Related: MatchData#byteoffset
, MatchData#begin
, MatchData#end
.
static VALUE match_offset(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); match_check(match); backref_number_check(regs, i); if (BEG(i) < 0) return rb_assoc_new(Qnil, Qnil); update_char_offset(match); return rb_assoc_new(LONG2NUM(RMATCH_EXT(match)->char_offset[i].beg), LONG2NUM(RMATCH_EXT(match)->char_offset[i].end)); }
Returns the substring of the target string from the end of the first match in self
(that is, self[0]
) to the end of the string; equivalent to regexp global variable $'
:
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] # => "HX1138" m.post_match # => ": The Movie"\
Related: MatchData.pre_match
.
VALUE rb_reg_match_post(VALUE match) { VALUE str; long pos; struct re_registers *regs; if (NIL_P(match)) return Qnil; match_check(match); regs = RMATCH_REGS(match); if (BEG(0) == -1) return Qnil; str = RMATCH(match)->str; pos = END(0); str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos); return str; }
Returns the substring of the target string from its beginning up to the first match in self
(that is, self[0]
); equivalent to regexp global variable $`
:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] # => "HX1138" m.pre_match # => "T"
Related: MatchData#post_match
.
VALUE rb_reg_match_pre(VALUE match) { VALUE str; struct re_registers *regs; if (NIL_P(match)) return Qnil; match_check(match); regs = RMATCH_REGS(match); if (BEG(0) == -1) return Qnil; str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0)); return str; }
Returns the regexp that produced the match:
m = /a.*b/.match("abc") # => #<MatchData "ab"> m.regexp # => /a.*b/
static VALUE match_regexp(VALUE match) { VALUE regexp; match_check(match); regexp = RMATCH(match)->regexp; if (NIL_P(regexp)) { VALUE str = rb_reg_nth_match(0, match); regexp = rb_reg_regcomp(rb_reg_quote(str)); RB_OBJ_WRITE(match, &RMATCH(match)->regexp, regexp); } return regexp; }
Returns size of the match array:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m.size # => 5
static VALUE match_size(VALUE match) { match_check(match); return INT2FIX(RMATCH_REGS(match)->num_regs); }
Returns the target string if it was frozen; otherwise, returns a frozen copy of the target string:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m.string # => "THX1138."
static VALUE match_string(VALUE match) { match_check(match); return RMATCH(match)->str; /* str is frozen */ }
Returns the array of matches:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m.to_a # => ["HX1138", "H", "X", "113", "8"]
Related: MatchData#captures
.
static VALUE match_to_a(VALUE match) { return match_array(match, 0); }
Returns the matched string:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m.to_s # => "HX1138" m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge") # => #<MatchData "hoge" foo:"h" bar:"ge"> m.to_s # => "hoge"
Related: MatchData.inspect
.
static VALUE match_to_s(VALUE match) { VALUE str = rb_reg_last_match(match_check(match)); if (NIL_P(str)) str = rb_str_new(0,0); return str; }
Returns match and captures at the given indexes
, which may include any mixture of:
-
Integers.
-
Ranges.
-
Names (strings and symbols).
Examples:
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m.values_at(0, 2, -2) # => ["HX1138", "X", "113"] m.values_at(1..2, -1) # => ["H", "X", "8"] m = /(?<a>\d+) *(?<op>[+\-*\/]) *(?<b>\d+)/.match("1 + 2") # => #<MatchData "1 + 2" a:"1" op:"+" b:"2"> m.values_at(0, 1..2, :a, :b, :op) # => ["1 + 2", "1", "+", "1", "2", "+"]
static VALUE match_values_at(int argc, VALUE *argv, VALUE match) { VALUE result; int i; match_check(match); result = rb_ary_new2(argc); for (i=0; i<argc; i++) { if (FIXNUM_P(argv[i])) { rb_ary_push(result, rb_reg_nth_match(FIX2INT(argv[i]), match)); } else { int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, argv[i]); if (num >= 0) { rb_ary_push(result, rb_reg_nth_match(num, match)); } else { match_ary_aref(match, argv[i], result); } } } return result; }