Project

General

Profile

Bug #6719 ยป 0001-altered-documentation-for-String.sub-where-verbage-w.patch

blackwatertepes (Tyler Kuhn), 07/11/2012 06:47 AM

View differences:

string.c
79 79

  
80 80
#define STR_SET_LEN(str, n) do { \
81 81
    if (STR_EMBED_P(str)) {\
82
	STR_SET_EMBED_LEN((str), (n));\
82
  STR_SET_EMBED_LEN((str), (n));\
83 83
    }\
84 84
    else {\
85
	RSTRING(str)->as.heap.len = (n);\
85
  RSTRING(str)->as.heap.len = (n);\
86 86
    }\
87 87
} while (0)
88 88

  
89 89
#define STR_DEC_LEN(str) do {\
90 90
    if (STR_EMBED_P(str)) {\
91
	long n = RSTRING_LEN(str);\
92
	n--;\
93
	STR_SET_EMBED_LEN((str), n);\
91
  long n = RSTRING_LEN(str);\
92
  n--;\
93
  STR_SET_EMBED_LEN((str), n);\
94 94
    }\
95 95
    else {\
96
	RSTRING(str)->as.heap.len--;\
96
  RSTRING(str)->as.heap.len--;\
97 97
    }\
98 98
} while (0)
99 99

  
100 100
#define RESIZE_CAPA(str,capacity) do {\
101 101
    if (STR_EMBED_P(str)) {\
102
	if ((capacity) > RSTRING_EMBED_LEN_MAX) {\
103
	    char *tmp = ALLOC_N(char, (capacity)+1);\
104
	    memcpy(tmp, RSTRING_PTR(str), RSTRING_LEN(str));\
105
	    RSTRING(str)->as.heap.ptr = tmp;\
106
	    RSTRING(str)->as.heap.len = RSTRING_LEN(str);\
102
  if ((capacity) > RSTRING_EMBED_LEN_MAX) {\
103
      char *tmp = ALLOC_N(char, (capacity)+1);\
104
      memcpy(tmp, RSTRING_PTR(str), RSTRING_LEN(str));\
105
      RSTRING(str)->as.heap.ptr = tmp;\
106
      RSTRING(str)->as.heap.len = RSTRING_LEN(str);\
107 107
            STR_SET_NOEMBED(str);\
108
	    RSTRING(str)->as.heap.aux.capa = (capacity);\
109
	}\
108
      RSTRING(str)->as.heap.aux.capa = (capacity);\
109
  }\
110 110
    }\
111 111
    else {\
112
	REALLOC_N(RSTRING(str)->as.heap.ptr, char, (capacity)+1);\
113
	if (!STR_NOCAPA_P(str))\
114
	    RSTRING(str)->as.heap.aux.capa = (capacity);\
112
  REALLOC_N(RSTRING(str)->as.heap.ptr, char, (capacity)+1);\
113
  if (!STR_NOCAPA_P(str))\
114
      RSTRING(str)->as.heap.aux.capa = (capacity);\
115 115
    }\
116 116
} while (0)
117 117

  
......
232 232
    const char *p = s;
233 233

  
234 234
    if (*cr == ENC_CODERANGE_BROKEN)
235
	return e - s;
235
  return e - s;
236 236

  
237 237
    if (rb_enc_to_index(enc) == 0) {
238
	/* enc is ASCII-8BIT.  ASCII-8BIT string never be broken. */
239
	p = search_nonascii(p, e);
240
	*cr = (!p && *cr != ENC_CODERANGE_VALID) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
241
	return e - s;
238
  /* enc is ASCII-8BIT.  ASCII-8BIT string never be broken. */
239
  p = search_nonascii(p, e);
240
  *cr = (!p && *cr != ENC_CODERANGE_VALID) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
241
  return e - s;
242 242
    }
243 243
    else if (rb_enc_asciicompat(enc)) {
244
	p = search_nonascii(p, e);
245
	if (!p) {
246
	    if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT;
247
	    return e - s;
248
	}
249
	while (p < e) {
250
	    int ret = rb_enc_precise_mbclen(p, e, enc);
251
	    if (!MBCLEN_CHARFOUND_P(ret)) {
252
		*cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN;
253
		return p - s;
254
	    }
255
	    p += MBCLEN_CHARFOUND_LEN(ret);
256
	    if (p < e) {
257
		p = search_nonascii(p, e);
258
		if (!p) {
259
		    *cr = ENC_CODERANGE_VALID;
260
		    return e - s;
261
		}
262
	    }
263
	}
264
	*cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID;
265
	return p - s;
244
  p = search_nonascii(p, e);
245
  if (!p) {
246
      if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT;
247
      return e - s;
248
  }
249
  while (p < e) {
250
      int ret = rb_enc_precise_mbclen(p, e, enc);
251
      if (!MBCLEN_CHARFOUND_P(ret)) {
252
    *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN;
253
    return p - s;
254
      }
255
      p += MBCLEN_CHARFOUND_LEN(ret);
256
      if (p < e) {
257
    p = search_nonascii(p, e);
258
    if (!p) {
259
        *cr = ENC_CODERANGE_VALID;
260
        return e - s;
261
    }
262
      }
263
  }
264
  *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID;
265
  return p - s;
266 266
    }
267 267
    else {
268
	while (p < e) {
269
	    int ret = rb_enc_precise_mbclen(p, e, enc);
270
	    if (!MBCLEN_CHARFOUND_P(ret)) {
271
		*cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN;
272
		return p - s;
273
	    }
274
	    p += MBCLEN_CHARFOUND_LEN(ret);
275
	}
276
	*cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID;
277
	return p - s;
268
  while (p < e) {
269
      int ret = rb_enc_precise_mbclen(p, e, enc);
270
      if (!MBCLEN_CHARFOUND_P(ret)) {
271
    *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN;
272
    return p - s;
273
      }
274
      p += MBCLEN_CHARFOUND_LEN(ret);
275
  }
276
  *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID;
277
  return p - s;
278 278
    }
279 279
}
280 280

  
......
293 293
    str_enc_copy(dest, src);
294 294
    switch (ENC_CODERANGE(src)) {
295 295
      case ENC_CODERANGE_7BIT:
296
	ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
297
	break;
296
  ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
297
  break;
298 298
      case ENC_CODERANGE_VALID:
299
	if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
300
	    search_nonascii(RSTRING_PTR(dest), RSTRING_END(dest)))
301
	    ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID);
302
	else
303
	    ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
304
	break;
299
  if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
300
      search_nonascii(RSTRING_PTR(dest), RSTRING_END(dest)))
301
      ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID);
302
  else
303
      ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
304
  break;
305 305
      default:
306
	if (RSTRING_LEN(dest) == 0) {
307
	    if (!rb_enc_asciicompat(STR_ENC_GET(src)))
308
		ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID);
309
	    else
310
		ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
311
	}
312
	break;
306
  if (RSTRING_LEN(dest) == 0) {
307
      if (!rb_enc_asciicompat(STR_ENC_GET(src)))
308
    ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID);
309
      else
310
    ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
311
  }
312
  break;
313 313
    }
314 314
}
315 315

  
......
326 326
    int cr = ENC_CODERANGE(str);
327 327

  
328 328
    if (cr == ENC_CODERANGE_UNKNOWN) {
329
	rb_encoding *enc = STR_ENC_GET(str);
329
  rb_encoding *enc = STR_ENC_GET(str);
330 330
        cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
331 331
        ENC_CODERANGE_SET(str, cr);
332 332
    }
......
349 349
str_mod_check(VALUE s, const char *p, long len)
350 350
{
351 351
    if (RSTRING_PTR(s) != p || RSTRING_LEN(s) != len){
352
	rb_raise(rb_eRuntimeError, "string modified");
352
  rb_raise(rb_eRuntimeError, "string modified");
353 353
    }
354 354
}
355 355

  
......
357 357
rb_str_capacity(VALUE str)
358 358
{
359 359
    if (STR_EMBED_P(str)) {
360
	return RSTRING_EMBED_LEN_MAX;
360
  return RSTRING_EMBED_LEN_MAX;
361 361
    }
362 362
    else if (STR_NOCAPA_P(str)) {
363
	return RSTRING(str)->as.heap.len;
363
  return RSTRING(str)->as.heap.len;
364 364
    }
365 365
    else {
366
	return RSTRING(str)->as.heap.aux.capa;
366
  return RSTRING(str)->as.heap.aux.capa;
367 367
    }
368 368
}
369 369

  
......
386 386
    VALUE str;
387 387

  
388 388
    if (len < 0) {
389
	rb_raise(rb_eArgError, "negative string size (or size too big)");
389
  rb_raise(rb_eArgError, "negative string size (or size too big)");
390 390
    }
391 391

  
392 392
    str = str_alloc(klass);
393 393
    if (len > RSTRING_EMBED_LEN_MAX) {
394
	RSTRING(str)->as.heap.aux.capa = len;
395
	RSTRING(str)->as.heap.ptr = ALLOC_N(char,len+1);
396
	STR_SET_NOEMBED(str);
394
  RSTRING(str)->as.heap.aux.capa = len;
395
  RSTRING(str)->as.heap.ptr = ALLOC_N(char,len+1);
396
  STR_SET_NOEMBED(str);
397 397
    }
398 398
    else if (len == 0) {
399
	ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
399
  ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
400 400
    }
401 401
    if (ptr) {
402
	memcpy(RSTRING_PTR(str), ptr, len);
402
  memcpy(RSTRING_PTR(str), ptr, len);
403 403
    }
404 404
    STR_SET_LEN(str, len);
405 405
    RSTRING_PTR(str)[len] = '\0';
......
432 432
rb_str_new_cstr(const char *ptr)
433 433
{
434 434
    if (!ptr) {
435
	rb_raise(rb_eArgError, "NULL pointer given");
435
  rb_raise(rb_eArgError, "NULL pointer given");
436 436
    }
437 437
    return rb_str_new(ptr, strlen(ptr));
438 438
}
......
486 486
    if (!from) from = rb_enc_get(str);
487 487
    if (from == to) return str;
488 488
    if ((rb_enc_asciicompat(to) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) ||
489
	to == rb_ascii8bit_encoding()) {
490
	if (STR_ENC_GET(str) != to) {
491
	    str = rb_str_dup(str);
492
	    rb_enc_associate(str, to);
493
	}
494
	return str;
489
  to == rb_ascii8bit_encoding()) {
490
  if (STR_ENC_GET(str) != to) {
491
      str = rb_str_dup(str);
492
      rb_enc_associate(str, to);
493
  }
494
  return str;
495 495
    }
496 496

  
497 497
    len = RSTRING_LEN(str);
......
504 504
    sp = (unsigned char*)RSTRING_PTR(str);
505 505
    dp = (unsigned char*)RSTRING_PTR(newstr);
506 506
    ret = rb_econv_convert(ec, &sp, (unsigned char*)RSTRING_END(str),
507
			   &dp, (unsigned char*)RSTRING_END(newstr), 0);
507
         &dp, (unsigned char*)RSTRING_END(newstr), 0);
508 508
    rb_econv_close(ec);
509 509
    switch (ret) {
510 510
      case econv_destination_buffer_full:
511
	/* destination buffer short */
512
	len = len < 2 ? 2 : len * 2;
513
	rb_str_resize(newstr, len);
514
	goto retry;
511
  /* destination buffer short */
512
  len = len < 2 ? 2 : len * 2;
513
  rb_str_resize(newstr, len);
514
  goto retry;
515 515

  
516 516
      case econv_finished:
517
	len = dp - (unsigned char*)RSTRING_PTR(newstr);
518
	rb_str_set_len(newstr, len);
519
	rb_enc_associate(newstr, to);
520
	return newstr;
517
  len = dp - (unsigned char*)RSTRING_PTR(newstr);
518
  rb_str_set_len(newstr, len);
519
  rb_enc_associate(newstr, to);
520
  return newstr;
521 521

  
522 522
      default:
523
	/* some error, return original */
524
	return str;
523
  /* some error, return original */
524
  return str;
525 525
    }
526 526
}
527 527

  
......
538 538

  
539 539
    str = rb_tainted_str_new(ptr, len);
540 540
    if (eenc == rb_usascii_encoding() &&
541
	rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
542
	rb_enc_associate(str, rb_ascii8bit_encoding());
543
	return str;
541
  rb_enc_str_coderange(str) != ENC_CODERANGE_7BIT) {
542
  rb_enc_associate(str, rb_ascii8bit_encoding());
543
  return str;
544 544
    }
545 545
    rb_enc_associate(str, eenc);
546 546
    return rb_str_conv_enc(str, eenc, rb_default_internal_encoding());
......
604 604
str_replace_shared(VALUE str2, VALUE str)
605 605
{
606 606
    if (RSTRING_LEN(str) <= RSTRING_EMBED_LEN_MAX) {
607
	STR_SET_EMBED(str2);
608
	memcpy(RSTRING_PTR(str2), RSTRING_PTR(str), RSTRING_LEN(str)+1);
609
	STR_SET_EMBED_LEN(str2, RSTRING_LEN(str));
607
  STR_SET_EMBED(str2);
608
  memcpy(RSTRING_PTR(str2), RSTRING_PTR(str), RSTRING_LEN(str)+1);
609
  STR_SET_EMBED_LEN(str2, RSTRING_LEN(str));
610 610
    }
611 611
    else {
612
	str = rb_str_new_frozen(str);
613
	FL_SET(str2, STR_NOEMBED);
614
	RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
615
	RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
616
	RSTRING(str2)->as.heap.aux.shared = str;
617
	FL_SET(str2, ELTS_SHARED);
612
  str = rb_str_new_frozen(str);
613
  FL_SET(str2, STR_NOEMBED);
614
  RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
615
  RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
616
  RSTRING(str2)->as.heap.aux.shared = str;
617
  FL_SET(str2, ELTS_SHARED);
618 618
    }
619 619
    rb_enc_cr_str_exact_copy(str2, str);
620 620

  
......
655 655
    RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
656 656
    RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
657 657
    if (STR_SHARED_P(str)) {
658
	VALUE shared = RSTRING(str)->as.heap.aux.shared;
659
	assert(OBJ_FROZEN(shared));
660
	FL_SET(str2, ELTS_SHARED);
661
	RSTRING(str2)->as.heap.aux.shared = shared;
658
  VALUE shared = RSTRING(str)->as.heap.aux.shared;
659
  assert(OBJ_FROZEN(shared));
660
  FL_SET(str2, ELTS_SHARED);
661
  RSTRING(str2)->as.heap.aux.shared = shared;
662 662
    }
663 663
    else {
664
	FL_SET(str, ELTS_SHARED);
665
	RSTRING(str)->as.heap.aux.shared = str2;
664
  FL_SET(str, ELTS_SHARED);
665
  RSTRING(str)->as.heap.aux.shared = str2;
666 666
    }
667 667
    rb_enc_cr_str_exact_copy(str2, str);
668 668
    OBJ_INFECT(str2, str);
......
677 677
    if (OBJ_FROZEN(orig)) return orig;
678 678
    klass = rb_obj_class(orig);
679 679
    if (STR_SHARED_P(orig) && (str = RSTRING(orig)->as.heap.aux.shared)) {
680
	long ofs;
681
	assert(OBJ_FROZEN(str));
682
	ofs = RSTRING_LEN(str) - RSTRING_LEN(orig);
683
	if ((ofs > 0) || (klass != RBASIC(str)->klass) ||
684
	    (!OBJ_TAINTED(str) && OBJ_TAINTED(orig)) ||
685
	    ENCODING_GET(str) != ENCODING_GET(orig)) {
686
	    str = str_new3(klass, str);
687
	    RSTRING(str)->as.heap.ptr += ofs;
688
	    RSTRING(str)->as.heap.len -= ofs;
689
	    rb_enc_cr_str_exact_copy(str, orig);
690
	    OBJ_INFECT(str, orig);
691
	}
680
  long ofs;
681
  assert(OBJ_FROZEN(str));
682
  ofs = RSTRING_LEN(str) - RSTRING_LEN(orig);
683
  if ((ofs > 0) || (klass != RBASIC(str)->klass) ||
684
      (!OBJ_TAINTED(str) && OBJ_TAINTED(orig)) ||
685
      ENCODING_GET(str) != ENCODING_GET(orig)) {
686
      str = str_new3(klass, str);
687
      RSTRING(str)->as.heap.ptr += ofs;
688
      RSTRING(str)->as.heap.len -= ofs;
689
      rb_enc_cr_str_exact_copy(str, orig);
690
      OBJ_INFECT(str, orig);
691
  }
692 692
    }
693 693
    else if (STR_EMBED_P(orig)) {
694
	str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
695
	rb_enc_cr_str_exact_copy(str, orig);
696
	OBJ_INFECT(str, orig);
694
  str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
695
  rb_enc_cr_str_exact_copy(str, orig);
696
  OBJ_INFECT(str, orig);
697 697
    }
698 698
    else if (STR_ASSOC_P(orig)) {
699
	VALUE assoc = RSTRING(orig)->as.heap.aux.shared;
700
	FL_UNSET(orig, STR_ASSOC);
701
	str = str_new4(klass, orig);
702
	FL_SET(str, STR_ASSOC);
703
	RSTRING(str)->as.heap.aux.shared = assoc;
699
  VALUE assoc = RSTRING(orig)->as.heap.aux.shared;
700
  FL_UNSET(orig, STR_ASSOC);
701
  str = str_new4(klass, orig);
702
  FL_SET(str, STR_ASSOC);
703
  RSTRING(str)->as.heap.aux.shared = assoc;
704 704
    }
705 705
    else {
706
	str = str_new4(klass, orig);
706
  str = str_new4(klass, orig);
707 707
    }
708 708
    OBJ_FREEZE(str);
709 709
    return str;
......
719 719
}
720 720

  
721 721
RUBY_ALIAS_FUNCTION(rb_str_new5(VALUE obj, const char *ptr, long len),
722
	   rb_str_new_with_class, (obj, ptr, len))
722
     rb_str_new_with_class, (obj, ptr, len))
723 723
#define rb_str_new5 rb_str_new_with_class
724 724

  
725 725
static VALUE
......
739 739
    VALUE str = str_alloc(rb_cString);
740 740

  
741 741
    if (capa < STR_BUF_MIN_SIZE) {
742
	capa = STR_BUF_MIN_SIZE;
742
  capa = STR_BUF_MIN_SIZE;
743 743
    }
744 744
    FL_SET(str, STR_NOEMBED);
745 745
    RSTRING(str)->as.heap.aux.capa = capa;
......
790 790
rb_str_free(VALUE str)
791 791
{
792 792
    if (!STR_EMBED_P(str) && !STR_SHARED_P(str)) {
793
	xfree(RSTRING(str)->as.heap.ptr);
793
  xfree(RSTRING(str)->as.heap.ptr);
794 794
    }
795 795
}
796 796

  
......
798 798
rb_str_memsize(VALUE str)
799 799
{
800 800
    if (!STR_EMBED_P(str) && !STR_SHARED_P(str)) {
801
	return RSTRING(str)->as.heap.aux.capa;
801
  return RSTRING(str)->as.heap.aux.capa;
802 802
    }
803 803
    else {
804
	return 0;
804
  return 0;
805 805
    }
806 806
}
807 807

  
......
824 824
    str_discard(str);
825 825
    OBJ_INFECT(str, str2);
826 826
    if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX) {
827
	STR_SET_EMBED(str);
828
	memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+1);
829
	STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
827
  STR_SET_EMBED(str);
828
  memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+1);
829
  STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
830 830
        rb_enc_associate(str, enc);
831 831
        ENC_CODERANGE_SET(str, cr);
832
	return;
832
  return;
833 833
    }
834 834
    STR_SET_NOEMBED(str);
835 835
    STR_UNSET_NOCAPA(str);
836 836
    RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
837 837
    RSTRING(str)->as.heap.len = RSTRING_LEN(str2);
838 838
    if (STR_NOCAPA_P(str2)) {
839
	FL_SET(str, RBASIC(str2)->flags & STR_NOCAPA);
840
	RSTRING(str)->as.heap.aux.shared = RSTRING(str2)->as.heap.aux.shared;
839
  FL_SET(str, RBASIC(str2)->flags & STR_NOCAPA);
840
  RSTRING(str)->as.heap.aux.shared = RSTRING(str2)->as.heap.aux.shared;
841 841
    }
842 842
    else {
843
	RSTRING(str)->as.heap.aux.capa = RSTRING(str2)->as.heap.aux.capa;
843
  RSTRING(str)->as.heap.aux.capa = RSTRING(str2)->as.heap.aux.capa;
844 844
    }
845
    STR_SET_EMBED(str2);	/* abandon str2 */
845
    STR_SET_EMBED(str2);  /* abandon str2 */
846 846
    RSTRING_PTR(str2)[0] = 0;
847 847
    STR_SET_EMBED_LEN(str2, 0);
848 848
    rb_enc_associate(str, enc);
......
857 857
    VALUE str;
858 858

  
859 859
    if (RB_TYPE_P(obj, T_STRING)) {
860
	return obj;
860
  return obj;
861 861
    }
862 862
    str = rb_funcall(obj, id_to_s, 0);
863 863
    if (!RB_TYPE_P(str, T_STRING))
864
	return rb_any_to_s(obj);
864
  return rb_any_to_s(obj);
865 865
    if (OBJ_TAINTED(obj)) OBJ_TAINT(str);
866 866
    return str;
867 867
}
......
873 873

  
874 874
    len = RSTRING_LEN(str2);
875 875
    if (STR_ASSOC_P(str2)) {
876
	str2 = rb_str_new4(str2);
876
  str2 = rb_str_new4(str2);
877 877
    }
878 878
    if (STR_SHARED_P(str2)) {
879
	VALUE shared = RSTRING(str2)->as.heap.aux.shared;
880
	assert(OBJ_FROZEN(shared));
881
	STR_SET_NOEMBED(str);
882
	RSTRING(str)->as.heap.len = len;
883
	RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
884
	FL_SET(str, ELTS_SHARED);
885
	FL_UNSET(str, STR_ASSOC);
886
	RSTRING(str)->as.heap.aux.shared = shared;
879
  VALUE shared = RSTRING(str2)->as.heap.aux.shared;
880
  assert(OBJ_FROZEN(shared));
881
  STR_SET_NOEMBED(str);
882
  RSTRING(str)->as.heap.len = len;
883
  RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
884
  FL_SET(str, ELTS_SHARED);
885
  FL_UNSET(str, STR_ASSOC);
886
  RSTRING(str)->as.heap.aux.shared = shared;
887 887
    }
888 888
    else {
889
	str_replace_shared(str, str2);
889
  str_replace_shared(str, str2);
890 890
    }
891 891

  
892 892
    OBJ_INFECT(str, str2);
......
927 927
    VALUE orig;
928 928

  
929 929
    if (argc > 0 && rb_scan_args(argc, argv, "01", &orig) == 1)
930
	rb_str_replace(str, orig);
930
  rb_str_replace(str, orig);
931 931
    return str;
932 932
}
933 933

  
......
942 942
    }
943 943
    else if (rb_enc_asciicompat(enc)) {
944 944
        c = 0;
945
	if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) {
946
	    while (p < e) {
947
		if (ISASCII(*p)) {
948
		    q = search_nonascii(p, e);
949
		    if (!q)
950
			return c + (e - p);
951
		    c += q - p;
952
		    p = q;
953
		}
954
		p += rb_enc_fast_mbclen(p, e, enc);
955
		c++;
956
	    }
957
	}
958
	else {
959
	    while (p < e) {
960
		if (ISASCII(*p)) {
961
		    q = search_nonascii(p, e);
962
		    if (!q)
963
			return c + (e - p);
964
		    c += q - p;
965
		    p = q;
966
		}
967
		p += rb_enc_mbclen(p, e, enc);
968
		c++;
969
	    }
970
	}
945
  if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) {
946
      while (p < e) {
947
    if (ISASCII(*p)) {
948
        q = search_nonascii(p, e);
949
        if (!q)
950
      return c + (e - p);
951
        c += q - p;
952
        p = q;
953
    }
954
    p += rb_enc_fast_mbclen(p, e, enc);
955
    c++;
956
      }
957
  }
958
  else {
959
      while (p < e) {
960
    if (ISASCII(*p)) {
961
        q = search_nonascii(p, e);
962
        if (!q)
963
      return c + (e - p);
964
        c += q - p;
965
        p = q;
966
    }
967
    p += rb_enc_mbclen(p, e, enc);
968
    c++;
969
      }
970
  }
971 971
        return c;
972 972
    }
973 973

  
......
992 992

  
993 993
    *cr = 0;
994 994
    if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
995
	return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
995
  return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
996 996
    }
997 997
    else if (rb_enc_asciicompat(enc)) {
998
	c = 0;
999
	while (p < e) {
1000
	    if (ISASCII(*p)) {
1001
		q = search_nonascii(p, e);
1002
		if (!q) {
1003
		    if (!*cr) *cr = ENC_CODERANGE_7BIT;
1004
		    return c + (e - p);
1005
		}
1006
		c += q - p;
1007
		p = q;
1008
	    }
1009
	    ret = rb_enc_precise_mbclen(p, e, enc);
1010
	    if (MBCLEN_CHARFOUND_P(ret)) {
1011
		*cr |= ENC_CODERANGE_VALID;
1012
		p += MBCLEN_CHARFOUND_LEN(ret);
1013
	    }
1014
	    else {
1015
		*cr = ENC_CODERANGE_BROKEN;
1016
		p++;
1017
	    }
1018
	    c++;
1019
	}
1020
	if (!*cr) *cr = ENC_CODERANGE_7BIT;
1021
	return c;
998
  c = 0;
999
  while (p < e) {
1000
      if (ISASCII(*p)) {
1001
    q = search_nonascii(p, e);
1002
    if (!q) {
1003
        if (!*cr) *cr = ENC_CODERANGE_7BIT;
1004
        return c + (e - p);
1005
    }
1006
    c += q - p;
1007
    p = q;
1008
      }
1009
      ret = rb_enc_precise_mbclen(p, e, enc);
1010
      if (MBCLEN_CHARFOUND_P(ret)) {
1011
    *cr |= ENC_CODERANGE_VALID;
1012
    p += MBCLEN_CHARFOUND_LEN(ret);
1013
      }
1014
      else {
1015
    *cr = ENC_CODERANGE_BROKEN;
1016
    p++;
1017
      }
1018
      c++;
1019
  }
1020
  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1021
  return c;
1022 1022
    }
1023 1023

  
1024 1024
    for (c=0; p<e; c++) {
1025
	ret = rb_enc_precise_mbclen(p, e, enc);
1026
	if (MBCLEN_CHARFOUND_P(ret)) {
1027
	    *cr |= ENC_CODERANGE_VALID;
1028
	    p += MBCLEN_CHARFOUND_LEN(ret);
1029
	}
1030
	else {
1031
	    *cr = ENC_CODERANGE_BROKEN;
1025
  ret = rb_enc_precise_mbclen(p, e, enc);
1026
  if (MBCLEN_CHARFOUND_P(ret)) {
1027
      *cr |= ENC_CODERANGE_VALID;
1028
      p += MBCLEN_CHARFOUND_LEN(ret);
1029
  }
1030
  else {
1031
      *cr = ENC_CODERANGE_BROKEN;
1032 1032
            if (p + rb_enc_mbminlen(enc) <= e)
1033 1033
                p += rb_enc_mbminlen(enc);
1034 1034
            else
1035 1035
                p = e;
1036
	}
1036
  }
1037 1037
    }
1038 1038
    if (!*cr) *cr = ENC_CODERANGE_7BIT;
1039 1039
    return c;
......
1090 1090
    if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
1091 1091
        enc == rb_utf8_encoding()) {
1092 1092

  
1093
	VALUE len = 0;
1094
	if ((int)sizeof(VALUE) * 2 < e - p) {
1095
	    const VALUE *s, *t;
1096
	    const VALUE lowbits = sizeof(VALUE) - 1;
1097
	    s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1098
	    t = (const VALUE*)(~lowbits & (VALUE)e);
1099
	    while (p < (const char *)s) {
1100
		if (is_utf8_lead_byte(*p)) len++;
1101
		p++;
1102
	    }
1103
	    while (s < t) {
1104
		len += count_utf8_lead_bytes_with_word(s);
1105
		s++;
1106
	    }
1107
	    p = (const char *)s;
1108
	}
1109
	while (p < e) {
1110
	    if (is_utf8_lead_byte(*p)) len++;
1111
	    p++;
1112
	}
1113
	return (long)len;
1093
  VALUE len = 0;
1094
  if ((int)sizeof(VALUE) * 2 < e - p) {
1095
      const VALUE *s, *t;
1096
      const VALUE lowbits = sizeof(VALUE) - 1;
1097
      s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1098
      t = (const VALUE*)(~lowbits & (VALUE)e);
1099
      while (p < (const char *)s) {
1100
    if (is_utf8_lead_byte(*p)) len++;
1101
    p++;
1102
      }
1103
      while (s < t) {
1104
    len += count_utf8_lead_bytes_with_word(s);
1105
    s++;
1106
      }
1107
      p = (const char *)s;
1108
  }
1109
  while (p < e) {
1110
      if (is_utf8_lead_byte(*p)) len++;
1111
      p++;
1112
  }
1113
  return (long)len;
1114 1114
    }
1115 1115
#endif
1116 1116
    n = rb_enc_strlen_cr(p, e, enc, &cr);
......
1174 1174
rb_str_empty(VALUE str)
1175 1175
{
1176 1176
    if (RSTRING_LEN(str) == 0)
1177
	return Qtrue;
1177
  return Qtrue;
1178 1178
    return Qfalse;
1179 1179
}
1180 1180

  
......
1199 1199
    str3 = rb_str_new(0, RSTRING_LEN(str1)+RSTRING_LEN(str2));
1200 1200
    memcpy(RSTRING_PTR(str3), RSTRING_PTR(str1), RSTRING_LEN(str1));
1201 1201
    memcpy(RSTRING_PTR(str3) + RSTRING_LEN(str1),
1202
	   RSTRING_PTR(str2), RSTRING_LEN(str2));
1202
     RSTRING_PTR(str2), RSTRING_LEN(str2));
1203 1203
    RSTRING_PTR(str3)[RSTRING_LEN(str3)] = '\0';
1204 1204

  
1205 1205
    if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
1206
	OBJ_TAINT(str3);
1206
  OBJ_TAINT(str3);
1207 1207
    ENCODING_CODERANGE_SET(str3, rb_enc_to_index(enc),
1208
			   ENC_CODERANGE_AND(ENC_CODERANGE(str1), ENC_CODERANGE(str2)));
1208
         ENC_CODERANGE_AND(ENC_CODERANGE(str1), ENC_CODERANGE(str2)));
1209 1209
    return str3;
1210 1210
}
1211 1211

  
......
1229 1229

  
1230 1230
    len = NUM2LONG(times);
1231 1231
    if (len < 0) {
1232
	rb_raise(rb_eArgError, "negative argument");
1232
  rb_raise(rb_eArgError, "negative argument");
1233 1233
    }
1234 1234
    if (len && LONG_MAX/len <  RSTRING_LEN(str)) {
1235
	rb_raise(rb_eArgError, "argument too big");
1235
  rb_raise(rb_eArgError, "argument too big");
1236 1236
    }
1237 1237

  
1238 1238
    str2 = rb_str_new5(str, 0, len *= RSTRING_LEN(str));
......
1274 1274
    volatile VALUE tmp = rb_check_array_type(arg);
1275 1275

  
1276 1276
    if (!NIL_P(tmp)) {
1277
	return rb_str_format(RARRAY_LENINT(tmp), RARRAY_PTR(tmp), str);
1277
  return rb_str_format(RARRAY_LENINT(tmp), RARRAY_PTR(tmp), str);
1278 1278
    }
1279 1279
    return rb_str_format(1, &arg, str);
1280 1280
}
......
1283 1283
str_modifiable(VALUE str)
1284 1284
{
1285 1285
    if (FL_TEST(str, STR_TMPLOCK)) {
1286
	rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked");
1286
  rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked");
1287 1287
    }
1288 1288
    rb_check_frozen(str);
1289 1289
    if (!OBJ_UNTRUSTED(str) && rb_safe_level() >= 4)
1290
	rb_raise(rb_eSecurityError, "Insecure: can't modify string");
1290
  rb_raise(rb_eSecurityError, "Insecure: can't modify string");
1291 1291
}
1292 1292

  
1293 1293
static inline int
......
1309 1309
    if (len > capa) len = capa;
1310 1310
    ptr = ALLOC_N(char, capa + 1);
1311 1311
    if (RSTRING_PTR(str)) {
1312
	memcpy(ptr, RSTRING_PTR(str), len);
1312
  memcpy(ptr, RSTRING_PTR(str), len);
1313 1313
    }
1314 1314
    STR_SET_NOEMBED(str);
1315 1315
    STR_UNSET_NOCAPA(str);
......
1325 1325
rb_str_modify(VALUE str)
1326 1326
{
1327 1327
    if (!str_independent(str))
1328
	str_make_independent(str);
1328
  str_make_independent(str);
1329 1329
    ENC_CODERANGE_CLEAR(str);
1330 1330
}
1331 1331

  
......
1333 1333
rb_str_modify_expand(VALUE str, long expand)
1334 1334
{
1335 1335
    if (expand < 0) {
1336
	rb_raise(rb_eArgError, "negative expanding string size");
1336
  rb_raise(rb_eArgError, "negative expanding string size");
1337 1337
    }
1338 1338
    if (!str_independent(str)) {
1339
	str_make_independent_expand(str, expand);
1339
  str_make_independent_expand(str, expand);
1340 1340
    }
1341 1341
    else if (expand > 0) {
1342
	long len = RSTRING_LEN(str);
1343
	long capa = len + expand;
1344
	if (!STR_EMBED_P(str)) {
1345
	    REALLOC_N(RSTRING(str)->as.heap.ptr, char, capa+1);
1346
	    RSTRING(str)->as.heap.aux.capa = capa;
1347
	}
1348
	else if (capa > RSTRING_EMBED_LEN_MAX) {
1349
	    str_make_independent_expand(str, expand);
1350
	}
1342
  long len = RSTRING_LEN(str);
1343
  long capa = len + expand;
1344
  if (!STR_EMBED_P(str)) {
1345
      REALLOC_N(RSTRING(str)->as.heap.ptr, char, capa+1);
1346
      RSTRING(str)->as.heap.aux.capa = capa;
1347
  }
1348
  else if (capa > RSTRING_EMBED_LEN_MAX) {
1349
      str_make_independent_expand(str, expand);
1350
  }
1351 1351
    }
1352 1352
    ENC_CODERANGE_CLEAR(str);
1353 1353
}
......
1357 1357
str_modify_keep_cr(VALUE str)
1358 1358
{
1359 1359
    if (!str_independent(str))
1360
	str_make_independent(str);
1360
  str_make_independent(str);
1361 1361
    if (ENC_CODERANGE(str) == ENC_CODERANGE_BROKEN)
1362
	/* Force re-scan later */
1363
	ENC_CODERANGE_CLEAR(str);
1362
  /* Force re-scan later */
1363
  ENC_CODERANGE_CLEAR(str);
1364 1364
}
1365 1365

  
1366 1366
static inline void
......
1368 1368
{
1369 1369
    str_modifiable(str);
1370 1370
    if (!STR_SHARED_P(str) && !STR_EMBED_P(str)) {
1371
	xfree(RSTRING_PTR(str));
1372
	RSTRING(str)->as.heap.ptr = 0;
1373
	RSTRING(str)->as.heap.len = 0;
1371
  xfree(RSTRING_PTR(str));
1372
  RSTRING(str)->as.heap.ptr = 0;
1373
  RSTRING(str)->as.heap.len = 0;
1374 1374
    }
1375 1375
}
1376 1376

  
......
1380 1380
    /* sanity check */
1381 1381
    rb_check_frozen(str);
1382 1382
    if (STR_ASSOC_P(str)) {
1383
	/* already associated */
1384
	rb_ary_concat(RSTRING(str)->as.heap.aux.shared, add);
1383
  /* already associated */
1384
  rb_ary_concat(RSTRING(str)->as.heap.aux.shared, add);
1385 1385
    }
1386 1386
    else {
1387
	if (STR_SHARED_P(str)) {
1388
	    VALUE assoc = RSTRING(str)->as.heap.aux.shared;
1389
	    str_make_independent(str);
1390
	    if (STR_ASSOC_P(assoc)) {
1391
		assoc = RSTRING(assoc)->as.heap.aux.shared;
1392
		rb_ary_concat(assoc, add);
1393
		add = assoc;
1394
	    }
1395
	}
1396
	else if (STR_EMBED_P(str)) {
1397
	    str_make_independent(str);
1398
	}
1399
	else if (RSTRING(str)->as.heap.aux.capa != RSTRING_LEN(str)) {
1400
	    RESIZE_CAPA(str, RSTRING_LEN(str));
1401
	}
1402
	FL_SET(str, STR_ASSOC);
1403
	RBASIC(add)->klass = 0;
1404
	RSTRING(str)->as.heap.aux.shared = add;
1387
  if (STR_SHARED_P(str)) {
1388
      VALUE assoc = RSTRING(str)->as.heap.aux.shared;
1389
      str_make_independent(str);
1390
      if (STR_ASSOC_P(assoc)) {
1391
    assoc = RSTRING(assoc)->as.heap.aux.shared;
1392
    rb_ary_concat(assoc, add);
1393
    add = assoc;
1394
      }
1395
  }
1396
  else if (STR_EMBED_P(str)) {
1397
      str_make_independent(str);
1398
  }
1399
  else if (RSTRING(str)->as.heap.aux.capa != RSTRING_LEN(str)) {
1400
      RESIZE_CAPA(str, RSTRING_LEN(str));
1401
  }
1402
  FL_SET(str, STR_ASSOC);
1403
  RBASIC(add)->klass = 0;
1404
  RSTRING(str)->as.heap.aux.shared = add;
1405 1405
    }
1406 1406
}
1407 1407

  
......
1410 1410
{
1411 1411
    if (STR_SHARED_P(str)) str = RSTRING(str)->as.heap.aux.shared;
1412 1412
    if (STR_ASSOC_P(str)) {
1413
	return RSTRING(str)->as.heap.aux.shared;
1413
  return RSTRING(str)->as.heap.aux.shared;
1414 1414
    }
1415 1415
    return Qfalse;
1416 1416
}
......
1420 1420
{
1421 1421
    rb_encoding *enc = rb_enc_get(str);
1422 1422
    if (!rb_enc_asciicompat(enc)) {
1423
	rb_raise(rb_eEncCompatError, "ASCII incompatible encoding: %s", rb_enc_name(enc));
1423
  rb_raise(rb_eEncCompatError, "ASCII incompatible encoding: %s", rb_enc_name(enc));
1424 1424
    }
1425 1425
}
1426 1426

  
......
1429 1429
{
1430 1430
    VALUE s = *ptr;
1431 1431
    if (!RB_TYPE_P(s, T_STRING)) {
1432
	s = rb_str_to_str(s);
1433
	*ptr = s;
1432
  s = rb_str_to_str(s);
1433
  *ptr = s;
1434 1434
    }
1435 1435
    return s;
1436 1436
}
......
1450 1450
    long len = RSTRING_LEN(str);
1451 1451

  
1452 1452
    if (!s || memchr(s, 0, len)) {
1453
	rb_raise(rb_eArgError, "string contains null byte");
1453
  rb_raise(rb_eArgError, "string contains null byte");
1454 1454
    }
1455 1455
    if (s[len]) {
1456
	rb_str_modify(str);
1457
	s = RSTRING_PTR(str);
1458
	s[RSTRING_LEN(str)] = 0;
1456
  rb_str_modify(str);
1457
  s = RSTRING_PTR(str);
1458
  s[RSTRING_LEN(str)] = 0;
1459 1459
    }
1460 1460
    return s;
1461 1461
}
......
1507 1507
            if (ISASCII(*p)) {
1508 1508
                p2 = search_nonascii(p, e2);
1509 1509
                if (!p2) {
1510
		    nth -= e2 - p;
1511
		    *nthp = nth;
1510
        nth -= e2 - p;
1511
        *nthp = nth;
1512 1512
                    return (char *)e2;
1513 1513
                }
1514 1514
                nth -= p2 - p;
......
1544 1544
str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
1545 1545
{
1546 1546
    if (singlebyte)
1547
	p += nth;
1547
  p += nth;
1548 1548
    else {
1549
	p = str_nth_len(p, e, &nth, enc);
1549
  p = str_nth_len(p, e, &nth, enc);
1550 1550
    }
1551 1551
    if (!p) return 0;
1552 1552
    if (p > e) p = e;
......
1566 1566
rb_str_offset(VALUE str, long pos)
1567 1567
{
1568 1568
    return str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
1569
		      STR_ENC_GET(str), single_byte_optimizable(str));
1569
          STR_ENC_GET(str), single_byte_optimizable(str));
1570 1570
}
1571 1571

  
1572 1572
#ifdef NONASCII_MASK
......
1575 1575
{
1576 1576
    long nth = *nthp;
1577 1577
    if ((int)SIZEOF_VALUE * 2 < e - p && (int)SIZEOF_VALUE * 2 < nth) {
1578
	const VALUE *s, *t;
1579
	const VALUE lowbits = sizeof(VALUE) - 1;
1580
	s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1581
	t = (const VALUE*)(~lowbits & (VALUE)e);
1582
	while (p < (const char *)s) {
1583
	    if (is_utf8_lead_byte(*p)) nth--;
1584
	    p++;
1585
	}
1586
	do {
1587
	    nth -= count_utf8_lead_bytes_with_word(s);
1588
	    s++;
1589
	} while (s < t && (int)sizeof(VALUE) <= nth);
1590
	p = (char *)s;
1578
  const VALUE *s, *t;
1579
  const VALUE lowbits = sizeof(VALUE) - 1;
1580
  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1581
  t = (const VALUE*)(~lowbits & (VALUE)e);
1582
  while (p < (const char *)s) {
1583
      if (is_utf8_lead_byte(*p)) nth--;
1584
      p++;
1585
  }
1586
  do {
1587
      nth -= count_utf8_lead_bytes_with_word(s);
1588
      s++;
1589
  } while (s < t && (int)sizeof(VALUE) <= nth);
1590
  p = (char *)s;
1591 1591
    }
1592 1592
    while (p < e) {
1593
	if (is_utf8_lead_byte(*p)) {
1594
	    if (nth == 0) break;
1595
	    nth--;
1596
	}
1597
	p++;
1593
  if (is_utf8_lead_byte(*p)) {
1594
      if (nth == 0) break;
1595
      nth--;
1596
  }
1597
  p++;
1598 1598
    }
1599 1599
    *nthp = nth;
1600 1600
    return (char *)p;
......
1615 1615
    if (single_byte_optimizable(str) || pos < 0)
1616 1616
        return pos;
1617 1617
    else {
1618
	char *p = RSTRING_PTR(str);
1618
  char *p = RSTRING_PTR(str);
1619 1619
        return enc_strlen(p, p + pos, STR_ENC_GET(str), ENC_CODERANGE(str));
1620 1620
    }
1621 1621
}
......
1651 1651

  
1652 1652
    if (len < 0) return 0;
1653 1653
    if (!blen) {
1654
	len = 0;
1654
  len = 0;
1655 1655
    }
1656 1656
    if (single_byte_optimizable(str)) {
1657
	if (beg > blen) return 0;
1658
	if (beg < 0) {
1659
	    beg += blen;
1660
	    if (beg < 0) return 0;
1661
	}
1662
	if (beg + len > blen)
1663
	    len = blen - beg;
1664
	if (len < 0) return 0;
1665
	p = s + beg;
1666
	goto end;
1657
  if (beg > blen) return 0;
1658
  if (beg < 0) {
1659
      beg += blen;
1660
      if (beg < 0) return 0;
1661
  }
1662
  if (beg + len > blen)
1663
      len = blen - beg;
1664
  if (len < 0) return 0;
1665
  p = s + beg;
1666
  goto end;
1667 1667
    }
1668 1668
    if (beg < 0) {
1669
	if (len > -beg) len = -beg;
1670
	if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
1671
	    beg = -beg;
1672
	    while (beg-- > len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
1673
	    p = e;
1674
	    if (!p) return 0;
1675
	    while (len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
1676
	    if (!p) return 0;
1677
	    len = e - p;
1678
	    goto end;
1679
	}
1680
	else {
1681
	    slen = str_strlen(str, enc);
1682
	    beg += slen;
1683
	    if (beg < 0) return 0;
1684
	    p = s + beg;
1685
	    if (len == 0) goto end;
1686
	}
1669
  if (len > -beg) len = -beg;
1670
  if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
1671
      beg = -beg;
1672
      while (beg-- > len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
1673
      p = e;
1674
      if (!p) return 0;
1675
      while (len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
1676
      if (!p) return 0;
1677
      len = e - p;
1678
      goto end;
1679
  }
1680
  else {
1681
      slen = str_strlen(str, enc);
1682
      beg += slen;
1683
      if (beg < 0) return 0;
1684
      p = s + beg;
1685
      if (len == 0) goto end;
1686
  }
1687 1687
    }
1688 1688
    else if (beg > 0 && beg > RSTRING_LEN(str)) {
1689
	return 0;
1689
  return 0;
1690 1690
    }
1691 1691
    if (len == 0) {
1692
	if (beg > str_strlen(str, enc)) return 0;
1693
	p = s + beg;
1692
  if (beg > str_strlen(str, enc)) return 0;
1693
  p = s + beg;
1694 1694
    }
1695 1695
#ifdef NONASCII_MASK
1696 1696
    else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
......
1701 1701
    }
1702 1702
#endif
1703 1703
    else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1704
	int char_sz = rb_enc_mbmaxlen(enc);
1704
  int char_sz = rb_enc_mbmaxlen(enc);
1705 1705

  
1706
	p = s + beg * char_sz;
1707
	if (p > e) {
1708
	    return 0;
1709
	}
1706
  p = s + beg * char_sz;
1707
  if (p > e) {
1708
      return 0;
1709
  }
1710 1710
        else if (len * char_sz > e - p)
1711 1711
            len = e - p;
1712 1712
        else
1713
	    len *= char_sz;
1713
      len *= char_sz;
1714 1714
    }
1715 1715
    else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
1716
	if (beg > 0) return 0;
1717
	len = 0;
1716
  if (beg > 0) return 0;
1717
  len = 0;
1718 1718
    }
1719 1719
    else {
1720
	len = str_offset(p, e, len, enc, 0);
1720
  len = str_offset(p, e, len, enc, 0);
1721 1721
    }
1722 1722
  end:
1723 1723
    *lenp = len;
......
1732 1732

  
1733 1733
    if (!p) return Qnil;
1734 1734
    if (len > RSTRING_EMBED_LEN_MAX && p + len == RSTRING_END(str)) {
1735
	str2 = rb_str_new4(str);
1736
	str2 = str_new3(rb_obj_class(str2), str2);
1737
	RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
1738
	RSTRING(str2)->as.heap.len = len;
1735
  str2 = rb_str_new4(str);
1736
  str2 = str_new3(rb_obj_class(str2), str2);
1737
  RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
1738
  RSTRING(str2)->as.heap.len = len;
1739 1739
    }
1740 1740
    else {
1741
	str2 = rb_str_new5(str, p, len);
1742
	rb_enc_cr_str_copy_for_substr(str2, str);
1743
	OBJ_INFECT(str2, str);
1741
  str2 = rb_str_new5(str, p, len);
1742
  rb_enc_cr_str_copy_for_substr(str2, str);
1743
  OBJ_INFECT(str2, str);
1744 1744
    }
1745 1745

  
1746 1746
    return str2;
......
1750 1750
rb_str_freeze(VALUE str)
1751 1751
{
1752 1752
    if (STR_ASSOC_P(str)) {
1753
	VALUE ary = RSTRING(str)->as.heap.aux.shared;
1754
	OBJ_FREEZE(ary);
1753
  VALUE ary = RSTRING(str)->as.heap.aux.shared;
1754
  OBJ_FREEZE(ary);
1755 1755
    }
1756 1756
    return rb_obj_freeze(str);
1757 1757
}
......
1763 1763
rb_str_locktmp(VALUE str)
1764 1764
{
1765 1765
    if (FL_TEST(str, STR_TMPLOCK)) {
1766
	rb_raise(rb_eRuntimeError, "temporal locking already locked string");
1766
  rb_raise(rb_eRuntimeError, "temporal locking already locked string");
1767 1767
    }
1768 1768
    FL_SET(str, STR_TMPLOCK);
1769 1769
    return str;
......
1773 1773
rb_str_unlocktmp(VALUE str)
1774 1774
{
1775 1775
    if (!FL_TEST(str, STR_TMPLOCK)) {
1776
	rb_raise(rb_eRuntimeError, "temporal unlocking already unlocked string");
1776
  rb_raise(rb_eRuntimeError, "temporal unlocking already unlocked string");
1777 1777
    }
1778 1778
    FL_UNSET(str, STR_TMPLOCK);
1779 1779
    return str;
......
1786 1786

  
1787 1787
    str_modifiable(str);
1788 1788
    if (STR_SHARED_P(str)) {
1789
	rb_raise(rb_eRuntimeError, "can't set length of shared string");
1789
  rb_raise(rb_eRuntimeError, "can't set length of shared string");
1790 1790
    }
1791 1791
    if (len > (capa = (long)rb_str_capacity(str))) {
1792
	rb_bug("probable buffer overflow: %ld for %ld", len, capa);
1792
  rb_bug("probable buffer overflow: %ld for %ld", len, capa);
1793 1793
    }
1794 1794
    STR_SET_LEN(str, len);
1795 1795
    RSTRING_PTR(str)[len] = '\0';
......
1802 1802
    int independent;
1803 1803

  
1804 1804
    if (len < 0) {
1805
	rb_raise(rb_eArgError, "negative string size (or size too big)");
1805
  rb_raise(rb_eArgError, "negative string size (or size too big)");
1806 1806
    }
1807 1807

  
1808 1808
    independent = str_independent(str);
1809 1809
    ENC_CODERANGE_CLEAR(str);
1810 1810
    slen = RSTRING_LEN(str);
1811 1811
    if (len != slen) {
1812
	if (STR_EMBED_P(str)) {
1813
	    if (len <= RSTRING_EMBED_LEN_MAX) {
1814
		STR_SET_EMBED_LEN(str, len);
1815
		RSTRING(str)->as.ary[len] = '\0';
1816
		return str;
1817
	    }
1818
	    str_make_independent_expand(str, len - slen);
1819
	    STR_SET_NOEMBED(str);
1820
	}
1821
	else if (len <= RSTRING_EMBED_LEN_MAX) {
1822
	    char *ptr = RSTRING(str)->as.heap.ptr;
1823
	    STR_SET_EMBED(str);
1824
	    if (slen > len) slen = len;
1825
	    if (slen > 0) MEMCPY(RSTRING(str)->as.ary, ptr, char, slen);
1826
	    RSTRING(str)->as.ary[len] = '\0';
1827
	    STR_SET_EMBED_LEN(str, len);
1828
	    if (independent) xfree(ptr);
1829
	    return str;
1830
	}
1831
	else if (!independent) {
1832
	    str_make_independent_expand(str, len - slen);
1833
	}
1834
	else if (slen < len || slen - len > 1024) {
1835
	    REALLOC_N(RSTRING(str)->as.heap.ptr, char, len+1);
1836
	}
1837
	if (!STR_NOCAPA_P(str)) {
1838
	    RSTRING(str)->as.heap.aux.capa = len;
1839
	}
1840
	RSTRING(str)->as.heap.len = len;
1841
	RSTRING(str)->as.heap.ptr[len] = '\0';	/* sentinel */
1812
  if (STR_EMBED_P(str)) {
1813
      if (len <= RSTRING_EMBED_LEN_MAX) {
1814
    STR_SET_EMBED_LEN(str, len);
1815
    RSTRING(str)->as.ary[len] = '\0';
1816
    return str;
1817
      }
1818
      str_make_independent_expand(str, len - slen);
1819
      STR_SET_NOEMBED(str);
1820
  }
1821
  else if (len <= RSTRING_EMBED_LEN_MAX) {
1822
      char *ptr = RSTRING(str)->as.heap.ptr;
1823
      STR_SET_EMBED(str);
1824
      if (slen > len) slen = len;
1825
      if (slen > 0) MEMCPY(RSTRING(str)->as.ary, ptr, char, slen);
1826
      RSTRING(str)->as.ary[len] = '\0';
1827
      STR_SET_EMBED_LEN(str, len);
1828
      if (independent) xfree(ptr);
1829
      return str;
1830
  }
1831
  else if (!independent) {
1832
      str_make_independent_expand(str, len - slen);
1833
  }
1834
  else if (slen < len || slen - len > 1024) {
1835
      REALLOC_N(RSTRING(str)->as.heap.ptr, char, len+1);
1836
  }
1837
  if (!STR_NOCAPA_P(str)) {
1838
      RSTRING(str)->as.heap.aux.capa = len;
1839
  }
1840
  RSTRING(str)->as.heap.len = len;
1841
  RSTRING(str)->as.heap.ptr[len] = '\0';  /* sentinel */
1842 1842
    }
1843 1843
    return str;
1844 1844
}
......
1854 1854
    rb_str_modify(str);
1855 1855
    if (len == 0) return 0;
1856 1856
    if (STR_ASSOC_P(str)) {
1857
	FL_UNSET(str, STR_ASSOC);
1858
	capa = RSTRING(str)->as.heap.aux.capa = RSTRING_LEN(str);
1857
  FL_UNSET(str, STR_ASSOC);
1858
  capa = RSTRING(str)->as.heap.aux.capa = RSTRING_LEN(str);
1859 1859
    }
1860 1860
    else if (STR_EMBED_P(str)) {
1861
	capa = RSTRING_EMBED_LEN_MAX;
1861
  capa = RSTRING_EMBED_LEN_MAX;
1862 1862
    }
1863 1863
    else {
1864
	capa = RSTRING(str)->as.heap.aux.capa;
1864
  capa = RSTRING(str)->as.heap.aux.capa;
1865 1865
    }
1866 1866
    if (RSTRING_LEN(str) >= LONG_MAX - len) {
1867
	rb_raise(rb_eArgError, "string sizes too big");
1867
  rb_raise(rb_eArgError, "string sizes too big");
1868 1868
    }
1869 1869
    total = RSTRING_LEN(str)+len;
1870 1870
    if (capa <= total) {
1871
	while (total > capa) {
1872
	    if (capa + 1 >= LONG_MAX / 2) {
1873
		capa = (total + 4095) / 4096;
1874
		break;
1875
	    }
1876
	    capa = (capa + 1) * 2;
1877
	}
1878
	RESIZE_CAPA(str, capa);
1871
  while (total > capa) {
1872
      if (capa + 1 >= LONG_MAX / 2) {
1873
    capa = (total + 4095) / 4096;
1874
    break;
1875
      }
1876
      capa = (capa + 1) * 2;
1877
  }
1878
  RESIZE_CAPA(str, capa);
1879 1879
    }
1880 1880
    if (off != -1) {
1881 1881
        ptr = RSTRING_PTR(str) + off;
......
1894 1894
{
1895 1895
    if (len == 0) return str;
1896 1896
    if (len < 0) {
1897
	rb_raise(rb_eArgError, "negative string size (or size too big)");
1897
  rb_raise(rb_eArgError, "negative string size (or size too big)");
1898 1898
    }
1899 1899
    return str_buf_cat(str, ptr, len);
1900 1900
}
......
1909 1909
rb_str_cat(VALUE str, const char *ptr, long len)
1910 1910
{
1911 1911
    if (len < 0) {
1912
	rb_raise(rb_eArgError, "negative string size (or size too big)");
1912
  rb_raise(rb_eArgError, "negative string size (or size too big)");
1913 1913
    }
1914 1914
    if (STR_ASSOC_P(str)) {
1915
	char *p;
1916
	rb_str_modify_expand(str, len);
1917
	p = RSTRING(str)->as.heap.ptr;
1918
	memcpy(p + RSTRING(str)->as.heap.len, ptr, len);
1919
	len = RSTRING(str)->as.heap.len += len;
1920
	p[len] = '\0'; /* sentinel */
1921
	return str;
1915
  char *p;
1916
  rb_str_modify_expand(str, len);
1917
  p = RSTRING(str)->as.heap.ptr;
1918
  memcpy(p + RSTRING(str)->as.heap.len, ptr, len);
1919
  len = RSTRING(str)->as.heap.len += len;
1920
  p[len] = '\0'; /* sentinel */
1921
  return str;
1922 1922
    }
1923 1923

  
1924 1924
    return rb_str_buf_cat(str, ptr, len);
......
1960 1960
            }
1961 1961
            goto incompatible;
1962 1962
        }
1963
	if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
1964
	    ptr_cr = coderange_scan(ptr, len, ptr_enc);
1965
	}
1963
  if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
1964
      ptr_cr = coderange_scan(ptr, len, ptr_enc);
1965
  }
1966 1966
        if (str_cr == ENC_CODERANGE_UNKNOWN) {
1967 1967
            if (ENCODING_IS_ASCII8BIT(str) || ptr_cr != ENC_CODERANGE_7BIT) {
1968 1968
                str_cr = rb_enc_str_coderange(str);
......
1997 1997
    }
1998 1998
    else if (str_cr == ENC_CODERANGE_VALID) {
1999 1999
        res_encindex = str_encindex;
2000
	if (ptr_cr == ENC_CODERANGE_7BIT || ptr_cr == ENC_CODERANGE_VALID)
2001
	    res_cr = str_cr;
2002
	else
2003
	    res_cr = ptr_cr;
2000
  if (ptr_cr == ENC_CODERANGE_7BIT || ptr_cr == ENC_CODERANGE_VALID)
2001
      res_cr = str_cr;
2002
  else
2003
      res_cr = ptr_cr;
2004 2004
    }
2005 2005
    else { /* str_cr == ENC_CODERANGE_BROKEN */
2006 2006
        res_encindex = str_encindex;
......
2009 2009
    }
2010 2010

  
2011 2011
    if (len < 0) {
2012
	rb_raise(rb_eArgError, "negative string size (or size too big)");
2012
  rb_raise(rb_eArgError, "negative string size (or size too big)");
2013 2013
    }
2014 2014
    str_buf_cat(str, ptr, len);
2015 2015
    ENCODING_CODERANGE_SET(str, res_encindex, res_cr);
......
2111 2111
    rb_encoding *enc = STR_ENC_GET(str1);
2112 2112

  
2113 2113
    if (FIXNUM_P(str2) || RB_TYPE_P(str2, T_BIGNUM)) {
2114
	if (rb_num_to_uint(str2, &code) == 0) {
2115
	}
2116
	else if (FIXNUM_P(str2)) {
2117
	    rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
2118
	}
2119
	else {
2120
	    rb_raise(rb_eRangeError, "bignum out of char range");
2121
	}
2114
  if (rb_num_to_uint(str2, &code) == 0) {
2115
  }
2116
  else if (FIXNUM_P(str2)) {
2117
      rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
2118
  }
2119
  else {
2120
      rb_raise(rb_eRangeError, "bignum out of char range");
2121
  }
2122 2122
    }
2123 2123
    else {
2124
	return rb_str_append(str1, str2);
2124
  return rb_str_append(str1, str2);
2125 2125
    }
2126 2126

  
2127 2127
    if (enc == rb_usascii_encoding()) {
2128
	/* US-ASCII automatically extended to ASCII-8BIT */
2129
	char buf[1];
2130
	buf[0] = (char)code;
2131
	if (code > 0xFF) {
2132
	    rb_raise(rb_eRangeError, "%u out of char range", code);
2133
	}
2134
	rb_str_cat(str1, buf, 1);
2135
	if (code > 127) {
2136
	    rb_enc_associate(str1, rb_ascii8bit_encoding());
2137
	    ENC_CODERANGE_SET(str1, ENC_CODERANGE_VALID);
2138
	}
2128
  /* US-ASCII automatically extended to ASCII-8BIT */
2129
  char buf[1];
2130
  buf[0] = (char)code;
2131
  if (code > 0xFF) {
2132
      rb_raise(rb_eRangeError, "%u out of char range", code);
2133
  }
2134
  rb_str_cat(str1, buf, 1);
2135
  if (code > 127) {
2136
      rb_enc_associate(str1, rb_ascii8bit_encoding());
2137
      ENC_CODERANGE_SET(str1, ENC_CODERANGE_VALID);
2138
  }
2139 2139
    }
2140 2140
    else {
2141
	long pos = RSTRING_LEN(str1);
2142
	int cr = ENC_CODERANGE(str1);
2143
	int len;
2144
	char *buf;
2145

  
2146
	switch (len = rb_enc_codelen(code, enc)) {
2147
	  case ONIGERR_INVALID_CODE_POINT_VALUE:
2148
	    rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
... This diff was truncated because it exceeds the maximum size that can be displayed.