Feature #5588 » 5588_negative_lookahead.patch
| include/ruby/oniguruma.h | ||
|---|---|---|
| 
     #define ONIG_OPTION_NONE                 0U 
   | 
||
| 
     #define ONIG_OPTION_IGNORECASE           1U 
   | 
||
| 
     #define ONIG_OPTION_EXTEND               (ONIG_OPTION_IGNORECASE         << 1) 
   | 
||
| 
     #define ONIG_OPTION_MULTILINE            (ONIG_OPTION_EXTEND             << 1) 
   | 
||
| 
     #define ONIG_OPTION_NEGATE               (ONIG_OPTION_EXTEND             << 1) 
   | 
||
| 
     #define ONIG_OPTION_MULTILINE            (ONIG_OPTION_NEGATE             << 1) 
   | 
||
| 
     #define ONIG_OPTION_SINGLELINE           (ONIG_OPTION_MULTILINE          << 1) 
   | 
||
| 
     #define ONIG_OPTION_FIND_LONGEST         (ONIG_OPTION_SINGLELINE         << 1) 
   | 
||
| 
     #define ONIG_OPTION_FIND_NOT_EMPTY       (ONIG_OPTION_FIND_LONGEST       << 1) 
   | 
||
| re.c | ||
|---|---|---|
| 
     #define KCODE_FIXED FL_USER4 
   | 
||
| 
     #define ARG_REG_OPTION_MASK \ 
   | 
||
| 
         (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND) 
   | 
||
| 
         (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND|ONIG_OPTION_NEGATE) 
   | 
||
| 
     #define ARG_ENCODING_FIXED    16 
   | 
||
| 
     #define ARG_ENCODING_NONE     32 
   | 
||
| ... | ... | |
| 
           case 'm': 
   | 
||
| 
     	val = ONIG_OPTION_MULTILINE; 
   | 
||
| 
     	break; 
   | 
||
| 
           case 'v': 
   | 
||
| 
     	val = ONIG_OPTION_NEGATE; 
   | 
||
| 
     	break; 
   | 
||
| 
           default: 
   | 
||
| 
     	val = 0; 
   | 
||
| 
     	break; 
   | 
||
| ... | ... | |
| 
     } 
   | 
||
| 
     static char * 
   | 
||
| 
     option_to_str(char str[4], int options) 
   | 
||
| 
     option_to_str(char str[5], int options) 
   | 
||
| 
     { 
   | 
||
| 
         char *p = str; 
   | 
||
| 
         if (options & ONIG_OPTION_MULTILINE) *p++ = 'm'; 
   | 
||
| 
         if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i'; 
   | 
||
| 
         if (options & ONIG_OPTION_EXTEND) *p++ = 'x'; 
   | 
||
| 
         if (options & ONIG_OPTION_NEGATE) *p++ = 'v'; 
   | 
||
| 
         *p = 0; 
   | 
||
| 
         return str; 
   | 
||
| 
     } 
   | 
||
| ... | ... | |
| 
         rb_reg_expr_str(str, s, len, enc, resenc); 
   | 
||
| 
         rb_str_buf_cat2(str, "/"); 
   | 
||
| 
         if (re) { 
   | 
||
| 
     	char opts[4]; 
   | 
||
| 
     	char opts[5]; 
   | 
||
| 
     	rb_reg_check(re); 
   | 
||
| 
     	if (*option_to_str(opts, RREGEXP(re)->ptr->options)) 
   | 
||
| 
     	    rb_str_buf_cat2(str, opts); 
   | 
||
| ... | ... | |
| 
      *  generally more readable version of <i>rxp</i>. 
   | 
||
| 
      * 
   | 
||
| 
      *      r1 = /ab+c/ix           #=> /ab+c/ix 
   | 
||
| 
      *      s1 = r1.to_s            #=> "(?ix-m:ab+c)" 
   | 
||
| 
      *      r2 = Regexp.new(s1)     #=> /(?ix-m:ab+c)/ 
   | 
||
| 
      *      s1 = r1.to_s            #=> "(?ix-mv:ab+c)" 
   | 
||
| 
      *      r2 = Regexp.new(s1)     #=> /(?ix-mv:ab+c)/ 
   | 
||
| 
      *      r1 == r2                #=> false 
   | 
||
| 
      *      r1.source               #=> "ab+c" 
   | 
||
| 
      *      r2.source               #=> "(?ix-m:ab+c)" 
   | 
||
| 
      *      r2.source               #=> "(?ix-mv:ab+c)" 
   | 
||
| 
      */ 
   | 
||
| 
     static VALUE 
   | 
||
| 
     rb_reg_to_s(VALUE re) 
   | 
||
| 
     { 
   | 
||
| 
         int options, opt; 
   | 
||
| 
         const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND; 
   | 
||
| 
         const int embeddable = ARG_REG_OPTION_MASK; 
   | 
||
| 
         long len; 
   | 
||
| 
         const UChar* ptr; 
   | 
||
| 
         VALUE str = rb_str_buf_new2("(?"); 
   | 
||
| 
         char optbuf[5]; 
   | 
||
| 
         char optbuf[6]; 
   | 
||
| 
         rb_encoding *enc = rb_enc_get(re); 
   | 
||
| 
         rb_reg_check(re); 
   | 
||
| ... | ... | |
| 
     static VALUE 
   | 
||
| 
     rb_enc_reg_error_desc(const char *s, long len, rb_encoding *enc, int options, const char *err) 
   | 
||
| 
     { 
   | 
||
| 
         char opts[6]; 
   | 
||
| 
         char opts[7]; 
   | 
||
| 
         VALUE desc = rb_str_buf_new2(err); 
   | 
||
| 
         rb_encoding *resenc = rb_default_internal_encoding(); 
   | 
||
| 
         if (resenc == NULL) resenc = rb_default_external_encoding(); 
   | 
||
| ... | ... | |
| 
      *  options are propagated, and new options may not be specified (a change as of 
   | 
||
| 
      *  Ruby 1.8). If <i>options</i> is a <code>Fixnum</code>, it should be one or 
   | 
||
| 
      *  more of the constants <code>Regexp::EXTENDED</code>, 
   | 
||
| 
      *  <code>Regexp::IGNORECASE</code>, and <code>Regexp::MULTILINE</code>, 
   | 
||
| 
      *  <em>or</em>-ed together. Otherwise, if <i>options</i> is not 
   | 
||
| 
      *  <code>nil</code>, the regexp will be case insensitive. 
   | 
||
| 
      *  When the <i>lang</i> parameter is `n' or `N' sets the regexp no encoding. 
   | 
||
| 
      *  <code>Regexp::IGNORECASE</code>, <code>Regexp::MULTILINE</code>, and 
   | 
||
| 
      *  <code>Regexp::NEGATED</code>, <em>or</em>-ed together. Otherwise, if 
   | 
||
| 
      *  <i>options</i> is not <code>nil</code>, the regexp will be case 
   | 
||
| 
      *  insensitive.  When the <i>lang</i> parameter is `n' or `N' sets the regexp 
   | 
||
| 
      *  no encoding. 
   | 
||
| 
      * 
   | 
||
| 
      *     r1 = Regexp.new('^a-z+:\\s+\w+')           #=> /^a-z+:\s+\w+/ 
   | 
||
| 
      *     r2 = Regexp.new('cat', true)               #=> /cat/i 
   | 
||
| ... | ... | |
| 
      *     Regexp.union("a+b*c")                #=> /a\+b\*c/ 
   | 
||
| 
      *     Regexp.union("skiing", "sledding")   #=> /skiing|sledding/ 
   | 
||
| 
      *     Regexp.union(["skiing", "sledding"]) #=> /skiing|sledding/ 
   | 
||
| 
      *     Regexp.union(/dogs/, /cats/i)        #=> /(?-mix:dogs)|(?i-mx:cats)/ 
   | 
||
| 
      *     Regexp.union(/dogs/, /cats/i)        #=> /(?-mixv:dogs)|(?i-mxv:cats)/ 
   | 
||
| 
      */ 
   | 
||
| 
     static VALUE 
   | 
||
| 
     rb_reg_s_union_m(VALUE self, VALUE args) 
   | 
||
| ... | ... | |
| 
         /* see Regexp.options and Regexp.new */ 
   | 
||
| 
         rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND)); 
   | 
||
| 
         /* see Regexp.options and Regexp.new */ 
   | 
||
| 
         rb_define_const(rb_cRegexp, "NEGATED", INT2FIX(ONIG_OPTION_NEGATE)); 
   | 
||
| 
         /* see Regexp.options and Regexp.new */ 
   | 
||
| 
         rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE)); 
   | 
||
| 
         /* see Regexp.options and Regexp.new */ 
   | 
||
| 
         rb_define_const(rb_cRegexp, "FIXEDENCODING", INT2FIX(ARG_ENCODING_FIXED)); 
   | 
||
| regexec.c | ||
|---|---|---|
| 
       if (r != ONIG_MISMATCH) 
   | 
||
| 
         fprintf(stderr, "onig_search: error %d\n", r); 
   | 
||
| 
     #endif 
   | 
||
| 
       return r; 
   | 
||
| 
       goto negate; 
   | 
||
| 
      mismatch_no_msa: 
   | 
||
| 
       r = ONIG_MISMATCH; 
   | 
||
| ... | ... | |
| 
       if (r != ONIG_MISMATCH) 
   | 
||
| 
         fprintf(stderr, "onig_search: error %d\n", r); 
   | 
||
| 
     #endif 
   | 
||
| 
       return r; 
   | 
||
| 
       goto negate; 
   | 
||
| 
      match: 
   | 
||
| 
       ONIG_STATE_DEC_THREAD(reg); 
   | 
||
| 
       MATCH_ARG_FREE(msa); 
   | 
||
| 
       return s - str; 
   | 
||
| 
       r = s - str; 
   | 
||
| 
       /* fall */ 
   | 
||
| 
      negate: 
   | 
||
| 
       if (r >= ONIG_MISMATCH && IS_NEGATE(reg->options)) 
   | 
||
| 
         return r == ONIG_MISMATCH ? ONIG_NORMAL : ONIG_MISMATCH; 
   | 
||
| 
       return r; 
   | 
||
| 
     } 
   | 
||
| 
     extern OnigEncoding 
   | 
||
| regint.h | ||
|---|---|---|
| 
     #define IS_MULTILINE(option)      ((option) & ONIG_OPTION_MULTILINE) 
   | 
||
| 
     #define IS_IGNORECASE(option)     ((option) & ONIG_OPTION_IGNORECASE) 
   | 
||
| 
     #define IS_EXTEND(option)         ((option) & ONIG_OPTION_EXTEND) 
   | 
||
| 
     #define IS_NEGATE(option)         ((option) & ONIG_OPTION_NEGATE) 
   | 
||
| 
     #define IS_FIND_LONGEST(option)   ((option) & ONIG_OPTION_FIND_LONGEST) 
   | 
||
| 
     #define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) 
   | 
||
| 
     #define IS_FIND_CONDITION(option) ((option) & \ 
   | 
||
| regparse.c | ||
|---|---|---|
| 
     #ifdef USE_POSIXLINE_OPTION 
   | 
||
| 
         case 'p': 
   | 
||
| 
     #endif 
   | 
||
| 
         case '-': case 'i': case 'm': case 's': case 'x': 
   | 
||
| 
         case '-': case 'i': case 'm': case 's': case 'x': case 'v': 
   | 
||
| 
           { 
   | 
||
| 
     	int neg = 0; 
   | 
||
| ... | ... | |
| 
     	  case '-':  neg = 1; break; 
   | 
||
| 
     	  case 'x':  ONOFF(option, ONIG_OPTION_EXTEND,     neg); break; 
   | 
||
| 
     	  case 'v':  ONOFF(option, ONIG_OPTION_NEGATE,     neg); break; 
   | 
||
| 
     	  case 'i':  ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break; 
   | 
||
| 
     	  case 's': 
   | 
||
| 
     	    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { 
   | 
||
| ... | ... | |
| 
     	    if (r < 0) return r; 
   | 
||
| 
     	    *np = node_new_option(option); 
   | 
||
| 
     	    CHECK_NULL_RETURN_MEMERR(*np); 
   | 
||
| 
                 /* expand "(?v:r)" into "(?:(?!r).)" */ 
   | 
||
| 
                 if (IS_NEGATE(option)) { 
   | 
||
| 
                   Node *seq, *nla, *any; 
   | 
||
| 
                   /* build "(?!r)" */ 
   | 
||
| 
                   nla = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); 
   | 
||
| 
                   CHECK_NULL_RETURN_MEMERR(nla); 
   | 
||
| 
                   NANCHOR(nla)->target = target; 
   | 
||
| 
                   /* build "." */ 
   | 
||
| 
                   any = node_new_anychar(); 
   | 
||
| 
                   if (IS_NULL(any)) { 
   | 
||
| 
                     onig_node_free(nla); 
   | 
||
| 
                     return ONIGERR_MEMORY; 
   | 
||
| 
                   } 
   | 
||
| 
                   /* put "(?!r)" and "." in sequence */ 
   | 
||
| 
                   seq = node_new_list(nla, NULL); 
   | 
||
| 
                   if (IS_NULL(seq)) { 
   | 
||
| 
                     onig_node_free(nla); 
   | 
||
| 
                     onig_node_free(any); 
   | 
||
| 
                     return ONIGERR_MEMORY; 
   | 
||
| 
                   } 
   | 
||
| 
                   NCDR(seq) = node_new_list(any, NULL); 
   | 
||
| 
                   if (IS_NULL(NCDR(seq))) { 
   | 
||
| 
                     onig_node_free(nla); 
   | 
||
| 
                     onig_node_free(any); 
   | 
||
| 
                     onig_node_free(seq); 
   | 
||
| 
                     return ONIGERR_MEMORY; 
   | 
||
| 
                   } 
   | 
||
| 
                   target = seq; 
   | 
||
| 
                 } 
   | 
||
| 
     	    NENCLOSE(*np)->target = target; 
   | 
||
| 
     	    *src = p; 
   | 
||
| 
     	    return 0; 
   | 
||
| test/ruby/test_regexp.rb | ||
|---|---|---|
| 
       end 
   | 
||
| 
       def test_to_s 
   | 
||
| 
         assert_equal '(?-mix:\x00)', Regexp.new("\0").to_s 
   | 
||
| 
         assert_equal '(?-mixv:\x00)', Regexp.new("\0").to_s 
   | 
||
| 
       end 
   | 
||
| 
       def test_union 
   | 
||
| ... | ... | |
| 
       end 
   | 
||
| 
       def test_to_s2 
   | 
||
| 
         assert_equal('(?-mix:foo)', /(?:foo)/.to_s) 
   | 
||
| 
         assert_equal('(?m-ix:foo)', /(?:foo)/m.to_s) 
   | 
||
| 
         assert_equal('(?mi-x:foo)', /(?:foo)/mi.to_s) 
   | 
||
| 
         assert_equal('(?mix:foo)', /(?:foo)/mix.to_s) 
   | 
||
| 
         assert_equal('(?m-ix:foo)', /(?m-ix:foo)/.to_s) 
   | 
||
| 
         assert_equal('(?mi-x:foo)', /(?mi-x:foo)/.to_s) 
   | 
||
| 
         assert_equal('(?mix:foo)', /(?mix:foo)/.to_s) 
   | 
||
| 
         assert_equal('(?mix:)', /(?mix)/.to_s) 
   | 
||
| 
         assert_equal('(?-mix:(?mix:foo) )', /(?mix:foo) /.to_s) 
   | 
||
| 
         assert_equal('(?-mixv:foo)', /(?:foo)/.to_s) 
   | 
||
| 
         assert_equal('(?m-ixv:foo)', /(?:foo)/m.to_s) 
   | 
||
| 
         assert_equal('(?mi-xv:foo)', /(?:foo)/mi.to_s) 
   | 
||
| 
         assert_equal('(?mix-v:foo)', /(?:foo)/mix.to_s) 
   | 
||
| 
         assert_equal('(?mixv:foo)', /(?:foo)/mixv.to_s) 
   | 
||
| 
         assert_equal('(?m-ixv:foo)', /(?m-ixv:foo)/.to_s) 
   | 
||
| 
         assert_equal('(?mi-xv:foo)', /(?mi-xv:foo)/.to_s) 
   | 
||
| 
         assert_equal('(?mix-v:foo)', /(?mix-v:foo)/.to_s) 
   | 
||
| 
         assert_equal('(?mixv:foo)', /(?mixv:foo)/.to_s) 
   | 
||
| 
         assert_equal('(?mixv:)', /(?mixv)/.to_s) 
   | 
||
| 
         assert_equal('(?-mixv:(?mixv:foo) )', /(?mixv:foo) /.to_s) 
   | 
||
| 
       end 
   | 
||
| 
       def test_casefold_p 
   | 
||
| ... | ... | |
| 
       def test_options 
   | 
||
| 
         assert_equal(Regexp::IGNORECASE, /a/i.options) 
   | 
||
| 
         assert_equal(Regexp::EXTENDED, /a/x.options) 
   | 
||
| 
         assert_equal(Regexp::NEGATED, /a/v.options) 
   | 
||
| 
         assert_equal(Regexp::MULTILINE, /a/m.options) 
   | 
||
| 
       end 
   | 
||
| ... | ... | |
| 
         assert_equal(/foo/, Regexp.union(/foo/)) 
   | 
||
| 
         assert_equal(/foo/, Regexp.union([/foo/])) 
   | 
||
| 
         assert_equal(/\t/, Regexp.union("\t")) 
   | 
||
| 
         assert_equal(/(?-mix:\u3042)|(?-mix:\u3042)/, Regexp.union(/\u3042/, /\u3042/)) 
   | 
||
| 
         assert_equal(/(?-mixv:\u3042)|(?-mixv:\u3042)/, Regexp.union(/\u3042/, /\u3042/)) 
   | 
||
| 
         assert_equal("\u3041", "\u3041"[Regexp.union(/\u3042/, "\u3041")]) 
   | 
||
| 
       end 
   | 
||
| ... | ... | |
| 
         assert_match(/invalid hex escape/, error.message) 
   | 
||
| 
         assert_equal(1, error.message.scan(/.*invalid .*escape.*/i).size, bug3539) 
   | 
||
| 
       end 
   | 
||
| 
       def test_negated_regexp_creation 
   | 
||
| 
         assert_nothing_raised { eval("/ruby/v") } 
   | 
||
| 
         assert_nothing_raised { eval("/(?v:ruby)/") } 
   | 
||
| 
         assert_nothing_raised { eval("/(?-v:ruby)/") } 
   | 
||
| 
         negated = Regexp.new("ruby", Regexp::NEGATED) 
   | 
||
| 
         assert_equal(/ruby/v, negated) 
   | 
||
| 
         assert_equal(/ruby/v, Regexp.new(negated)) 
   | 
||
| 
         assert_equal(/ruby/v, Regexp.new(Regexp.new(negated))) 
   | 
||
| 
         assert_equal(/(?v-mix:ruby)/, Regexp.new(negated.to_s)) 
   | 
||
| 
       end 
   | 
||
| 
       def test_negated_regexp_matching 
   | 
||
| 
         assert_match(/ruby/, "ruby") 
   | 
||
| 
         assert_match(/ruby/, "rubyperl") 
   | 
||
| 
         assert_match(/ruby/, "perlruby") 
   | 
||
| 
         assert_no_match(/ruby/, "perl") 
   | 
||
| 
         assert_match(/(?-v:ruby)/, "ruby") 
   | 
||
| 
         assert_match(/(?-v:ruby)/, "rubyperl") 
   | 
||
| 
         assert_match(/(?-v:ruby)/, "perlruby") 
   | 
||
| 
         assert_no_match(/(?-v:ruby)/, "perl") 
   | 
||
| 
         assert_no_match(/ruby/v, "ruby") 
   | 
||
| 
         assert_no_match(/ruby/v, "rubyperl") 
   | 
||
| 
         assert_no_match(/ruby/v, "perlruby") 
   | 
||
| 
         assert_match(/ruby/v, "perl") 
   | 
||
| 
         assert_match(/(?v:ruby)/, "ruby") 
   | 
||
| 
         assert(/(?v:ruby)/ =~ "ruby") 
   | 
||
| 
         assert_equal(["r", "u", "by"], [$`, $&, $']) 
   | 
||
| 
         assert_no_match(/^(?v:ruby)/, "ruby") 
   | 
||
| 
         assert(/^(?v:ruby)/ !~ "ruby") 
   | 
||
| 
         assert_equal([nil, nil, nil], [$`, $&, $']) 
   | 
||
| 
         assert_match(/(?v:ruby)/, "rubyperl") 
   | 
||
| 
         assert(/(?v:ruby)/ =~ "rubyperl") 
   | 
||
| 
         assert_equal(["r", "u", "byperl"], [$`, $&, $']) 
   | 
||
| 
         assert_no_match(/^(?v:ruby)/, "rubyperl") 
   | 
||
| 
         assert(/^(?v:ruby)/ !~ "rubyperl") 
   | 
||
| 
         assert_equal([nil, nil, nil], [$`, $&, $']) 
   | 
||
| 
         assert_match(/(?v:ruby)/, "perlruby") 
   | 
||
| 
         assert(/(?v:ruby)/ =~ "perlruby") 
   | 
||
| 
         assert_equal(["", "p", "erlruby"], [$`, $&, $']) 
   | 
||
| 
         assert_match(/^(?v:ruby)/, "perlruby") 
   | 
||
| 
         assert(/^(?v:ruby)/ =~ "perlruby") 
   | 
||
| 
         assert_equal(["", "p", "erlruby"], [$`, $&, $']) 
   | 
||
| 
         assert_match(/(?v:ruby)/, "perl") 
   | 
||
| 
         assert(/(?v:ruby)/ =~ "perl") 
   | 
||
| 
         assert_equal(["", "p", "erl"], [$`, $&, $']) 
   | 
||
| 
         assert_match(/^(?v:ruby)/, "perl") 
   | 
||
| 
         assert(/^(?v:ruby)/ =~ "perl") 
   | 
||
| 
         assert_equal(["", "p", "erl"], [$`, $&, $']) 
   | 
||
| 
         assert_no_match(/a(?v:b)c/, "abc") 
   | 
||
| 
         assert(/a(?v:b)c/ !~ "abc") 
   | 
||
| 
         assert_equal([nil, nil, nil], [$`, $&, $']) 
   | 
||
| 
         assert_no_match(/a(?v:b)c/, "ac") 
   | 
||
| 
         assert_match(/a(?v:b)?c/, "ac") 
   | 
||
| 
         assert_match(/a(?v:b)c/, "axc") 
   | 
||
| 
         assert_no_match(/a(?v:b)c/, "axbc") 
   | 
||
| 
         assert_no_match(/a(?v:b)+c/, "axbc") 
   | 
||
| 
         assert_match(/a(?v:b)bc/, "axbc") 
   | 
||
| 
         assert_no_match(/a(?v:b)c/, "ab_c") 
   | 
||
| 
         assert_no_match(/a(?v:b)c/, "a_bc") 
   | 
||
| 
         assert_match(/a(?v:b)bc/, "a_bc") 
   | 
||
| 
         assert_no_match(/"[^<&"]*"/, '"aa<&a"') 
   | 
||
| 
         assert_no_match(/"(?v:[<&"])*"/, '"aa<&a"') 
   | 
||
| 
         assert_no_match(/"(?v:<|&|")*"/, '"aa<&a"') 
   | 
||
| 
         languages = %w[ruby perl python lisp smalltalk] 
   | 
||
| 
         assert_equal %w[perl lisp smalltalk], languages.grep(/l/) 
   | 
||
| 
         assert_equal %w[ruby python], languages.grep(/l/v) 
   | 
||
| 
         assert_equal %w[ruby perl python smalltalk], languages.grep(/^(?v:l)/) 
   | 
||
| 
       end 
   | 
||
| 
     end 
   | 
||
- « Previous
 - 1
 - 2
 - 3
 - 4
 - Next »