class MatchData
MatchData
encapsulates the result of matching a Regexp
against string. It is returned by Regexp#match
and String#match
, and also stored in a global variable returned by Regexp.last_match
.
Usage:
url = 'https://docs.ruby-lang.org/en/2.5.0/MatchData.html' m = url.match(/(\d\.?)+/) # => #<MatchData "2.5.0" 1:"0"> m.string # => "https://docs.ruby-lang.org/en/2.5.0/MatchData.html" m.regexp # => /(\d\.?)+/ # entire matched substring: m[0] # => "2.5.0" # Working with unnamed captures m = url.match(%r{([^/]+)/([^/]+)\.html$}) m.captures # => ["2.5.0", "MatchData"] m[1] # => "2.5.0" m.values_at(1, 2) # => ["2.5.0", "MatchData"] # Working with named captures m = url.match(%r{(?<version>[^/]+)/(?<module>[^/]+)\.html$}) m.captures # => ["2.5.0", "MatchData"] m.named_captures # => {"version"=>"2.5.0", "module"=>"MatchData"} m[:version] # => "2.5.0" m.values_at(:version, :module) # => ["2.5.0", "MatchData"] # Numerical indexes are working, too m[1] # => "2.5.0" m.values_at(1, 2) # => ["2.5.0", "MatchData"]
Global variables equivalence¶ ↑
Parts of last MatchData
(returned by Regexp.last_match
) are also aliased as global variables:
-
$~
isRegexp.last_match
; -
$&
isRegexp.last_match[0]
; -
$1
,$2
, and so on areRegexp.last_match[i]
(captures by number); -
$`
isRegexp.last_match.pre_match
; -
$'
isRegexp.last_match.post_match
; -
$+
isRegexp.last_match[-1]
(the last capture).
See also “Special global variables” section in Regexp
documentation.
Public Instance Methods
Equality—Two matchdata are equal if their target strings, patterns, and matched positions are identical.
static VALUE match_equal(VALUE match1, VALUE match2) { const struct re_registers *regs1, *regs2; if (match1 == match2) return Qtrue; if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse; if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse; if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse; if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse; regs1 = RMATCH_REGS(match1); regs2 = RMATCH_REGS(match2); if (regs1->num_regs != regs2->num_regs) return Qfalse; if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse; if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse; return Qtrue; }
Match Reference – MatchData
acts as an array, and may be accessed using the normal array indexing techniques. mtch[0]
is equivalent to the special variable $&
, and returns the entire matched string. mtch[1]
, mtch[2]
, and so on return the values of the matched backreferences (portions of the pattern between parentheses).
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] #=> "HX1138" m[1, 2] #=> ["H", "X"] m[1..3] #=> ["H", "X", "113"] m[-3, 2] #=> ["X", "113"] m = /(?<foo>a+)b/.match("ccaaab") m #=> #<MatchData "aaab" foo:"aaa"> m["foo"] #=> "aaa" m[:foo] #=> "aaa"
static VALUE match_aref(int argc, VALUE *argv, VALUE match) { VALUE idx, length; match_check(match); rb_scan_args(argc, argv, "11", &idx, &length); if (NIL_P(length)) { if (FIXNUM_P(idx)) { return rb_reg_nth_match(FIX2INT(idx), match); } else { int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, idx); if (num >= 0) { return rb_reg_nth_match(num, match); } else { return match_ary_aref(match, idx, Qnil); } } } else { long beg = NUM2LONG(idx); long len = NUM2LONG(length); long num_regs = RMATCH_REGS(match)->num_regs; if (len < 0) { return Qnil; } if (beg < 0) { beg += num_regs; if (beg < 0) return Qnil; } else if (beg > num_regs) { return Qnil; } else if (beg+len > num_regs) { len = num_regs - beg; } return match_ary_subseq(match, beg, len, Qnil); } }
Returns the offset of the start of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.begin(0) #=> 1 m.begin(2) #=> 2 m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") p m.begin(:foo) #=> 0 p m.begin(:bar) #=> 2
static VALUE match_begin(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); match_check(match); if (i < 0 || regs->num_regs <= i) rb_raise(rb_eIndexError, "index %d out of matches", i); if (BEG(i) < 0) return Qnil; update_char_offset(match); return INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg); }
Returns the array of captures; equivalent to mtch.to_a[1..-1]
.
f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures f1 #=> "H" f2 #=> "X" f3 #=> "113" f4 #=> "8"
static VALUE match_captures(VALUE match) { return match_array(match, 1); }
Returns the offset of the character immediately following the end of the nth element of the match array in the string. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.end(0) #=> 7 m.end(2) #=> 3 m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") p m.end(:foo) #=> 1 p m.end(:bar) #=> 3
static VALUE match_end(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); match_check(match); if (i < 0 || regs->num_regs <= i) rb_raise(rb_eIndexError, "index %d out of matches", i); if (BEG(i) < 0) return Qnil; update_char_offset(match); return INT2FIX(RMATCH(match)->rmatch->char_offset[i].end); }
Equality—Two matchdata are equal if their target strings, patterns, and matched positions are identical.
static VALUE match_equal(VALUE match1, VALUE match2) { const struct re_registers *regs1, *regs2; if (match1 == match2) return Qtrue; if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse; if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse; if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse; if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse; regs1 = RMATCH_REGS(match1); regs2 = RMATCH_REGS(match2); if (regs1->num_regs != regs2->num_regs) return Qfalse; if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse; if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse; return Qtrue; }
Produce a hash based on the target string, regexp and matched positions of this matchdata.
See also Object#hash.
static VALUE match_hash(VALUE match) { const struct re_registers *regs; st_index_t hashval; match_check(match); hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str)); hashval = rb_hash_uint(hashval, reg_hash(match_regexp(match))); regs = RMATCH_REGS(match); hashval = rb_hash_uint(hashval, regs->num_regs); hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg))); hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end))); hashval = rb_hash_end(hashval); return ST2FIX(hashval); }
Returns a printable version of mtch.
puts /.$/.match("foo").inspect #=> #<MatchData "o"> puts /(.)(.)(.)/.match("foo").inspect #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o"> puts /(.)(.)?(.)/.match("fo").inspect #=> #<MatchData "fo" 1:"f" 2:nil 3:"o"> puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect #=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
static VALUE match_inspect(VALUE match) { VALUE cname = rb_class_path(rb_obj_class(match)); VALUE str; int i; struct re_registers *regs = RMATCH_REGS(match); int num_regs = regs->num_regs; struct backref_name_tag *names; VALUE regexp = RMATCH(match)->regexp; if (regexp == 0) { return rb_sprintf("#<%"PRIsVALUE":%p>", cname, (void*)match); } else if (NIL_P(regexp)) { return rb_sprintf("#<%"PRIsVALUE": %"PRIsVALUE">", cname, rb_reg_nth_match(0, match)); } names = ALLOCA_N(struct backref_name_tag, num_regs); MEMZERO(names, struct backref_name_tag, num_regs); onig_foreach_name(RREGEXP_PTR(regexp), match_inspect_name_iter, names); str = rb_str_buf_new2("#<"); rb_str_append(str, cname); for (i = 0; i < num_regs; i++) { VALUE v; rb_str_buf_cat2(str, " "); if (0 < i) { if (names[i].name) rb_str_buf_cat(str, (const char *)names[i].name, names[i].len); else { rb_str_catf(str, "%d", i); } rb_str_buf_cat2(str, ":"); } v = rb_reg_nth_match(i, match); if (v == Qnil) rb_str_buf_cat2(str, "nil"); else rb_str_buf_append(str, rb_str_inspect(v)); } rb_str_buf_cat2(str, ">"); return str; }
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.length #=> 5 m.size #=> 5
static VALUE match_size(VALUE match) { match_check(match); return INT2FIX(RMATCH_REGS(match)->num_regs); }
Returns a Hash
using named capture.
A key of the hash is a name of the named captures. A value of the hash is a string of last successful capture of corresponding group.
m = /(?<a>.)(?<b>.)/.match("01") m.named_captures #=> {"a" => "0", "b" => "1"} m = /(?<a>.)(?<b>.)?/.match("0") m.named_captures #=> {"a" => "0", "b" => nil} m = /(?<a>.)(?<a>.)/.match("01") m.named_captures #=> {"a" => "1"} m = /(?<a>x)|(?<a>y)/.match("x") m.named_captures #=> {"a" => "x"}
static VALUE match_named_captures(VALUE match) { VALUE hash; struct MEMO *memo; match_check(match); if (NIL_P(RMATCH(match)->regexp)) return rb_hash_new(); hash = rb_hash_new(); memo = MEMO_NEW(hash, match, 0); onig_foreach_name(RREGEXP(RMATCH(match)->regexp)->ptr, match_named_captures_iter, (void*)memo); return hash; }
Returns a list of names of captures as an array of strings. It is same as mtch.regexp.names.
/(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").names #=> ["foo", "bar", "baz"] m = /(?<x>.)(?<y>.)?/.match("a") #=> #<MatchData "a" x:"a" y:nil> m.names #=> ["x", "y"]
static VALUE match_names(VALUE match) { match_check(match); if (NIL_P(RMATCH(match)->regexp)) return rb_ary_new_capa(0); return rb_reg_names(RMATCH(match)->regexp); }
Returns a two-element array containing the beginning and ending offsets of the nth match. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.offset(0) #=> [1, 7] m.offset(4) #=> [6, 7] m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") p m.offset(:foo) #=> [0, 1] p m.offset(:bar) #=> [2, 3]
static VALUE match_offset(VALUE match, VALUE n) { int i = match_backref_number(match, n); struct re_registers *regs = RMATCH_REGS(match); match_check(match); if (i < 0 || regs->num_regs <= i) rb_raise(rb_eIndexError, "index %d out of matches", i); if (BEG(i) < 0) return rb_assoc_new(Qnil, Qnil); update_char_offset(match); return rb_assoc_new(INT2FIX(RMATCH(match)->rmatch->char_offset[i].beg), INT2FIX(RMATCH(match)->rmatch->char_offset[i].end)); }
Returns the portion of the original string after the current match. Equivalent to the special variable $'
.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") m.post_match #=> ": The Movie"
VALUE rb_reg_match_post(VALUE match) { VALUE str; long pos; struct re_registers *regs; if (NIL_P(match)) return Qnil; match_check(match); regs = RMATCH_REGS(match); if (BEG(0) == -1) return Qnil; str = RMATCH(match)->str; pos = END(0); str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos); if (OBJ_TAINTED(match)) OBJ_TAINT(str); return str; }
Returns the portion of the original string before the current match. Equivalent to the special variable $`
.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.pre_match #=> "T"
VALUE rb_reg_match_pre(VALUE match) { VALUE str; struct re_registers *regs; if (NIL_P(match)) return Qnil; match_check(match); regs = RMATCH_REGS(match); if (BEG(0) == -1) return Qnil; str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0)); if (OBJ_TAINTED(match)) OBJ_TAINT(str); return str; }
Returns the regexp.
m = /a.*b/.match("abc") m.regexp #=> /a.*b/
static VALUE match_regexp(VALUE match) { VALUE regexp; match_check(match); regexp = RMATCH(match)->regexp; if (NIL_P(regexp)) { VALUE str = rb_reg_nth_match(0, match); regexp = rb_reg_regcomp(rb_reg_quote(str)); RMATCH(match)->regexp = regexp; } return regexp; }
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.length #=> 5 m.size #=> 5
static VALUE match_size(VALUE match) { match_check(match); return INT2FIX(RMATCH_REGS(match)->num_regs); }
Returns a frozen copy of the string passed in to match
.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.string #=> "THX1138."
static VALUE match_string(VALUE match) { match_check(match); return RMATCH(match)->str; /* str is frozen */ }
Returns the array of matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.to_a #=> ["HX1138", "H", "X", "113", "8"]
Because to_a
is called when expanding *
variable, there's a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).
all,f1,f2,f3 = * /(.)(.)(\d+)(\d)/.match("THX1138.") all #=> "HX1138" f1 #=> "H" f2 #=> "X" f3 #=> "113"
static VALUE match_to_a(VALUE match) { return match_array(match, 0); }
Returns the entire matched string.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.to_s #=> "HX1138"
static VALUE match_to_s(VALUE match) { VALUE str = rb_reg_last_match(match); match_check(match); if (NIL_P(str)) str = rb_str_new(0,0); if (OBJ_TAINTED(match)) OBJ_TAINT(str); if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str); return str; }
Uses each index to access the matching values, returning an array of the corresponding matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") m.to_a #=> ["HX1138", "H", "X", "113", "8"] m.values_at(0, 2, -2) #=> ["HX1138", "X", "113"] m = /(?<a>\d+) *(?<op>[+\-*\/]) *(?<b>\d+)/.match("1 + 2") m.to_a #=> ["1 + 2", "1", "+", "2"] m.values_at(:a, :b, :op) #=> ["1", "2", "+"]
static VALUE match_values_at(int argc, VALUE *argv, VALUE match) { VALUE result; int i; match_check(match); result = rb_ary_new2(argc); for (i=0; i<argc; i++) { if (FIXNUM_P(argv[i])) { rb_ary_push(result, rb_reg_nth_match(FIX2INT(argv[i]), match)); } else { int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, argv[i]); if (num >= 0) { rb_ary_push(result, rb_reg_nth_match(num, match)); } else { match_ary_aref(match, argv[i], result); } } } return result; }