Project

General

Profile

Feature #2447 ยป symbol_table_without_string.patch

mame (Yusuke Endoh), 12/06/2009 11:24 PM

View differences:

parse.y
9216 9216
#define ENABLE_SELECTOR_NAMESPACE 0
9217 9217
#endif
9218 9218

  
9219
struct sym_entry {
9220
    rb_encoding *enc;
9221
    int only_7bit_and_len;
9222
    const char *name;
9223
};
9224
#define SYM_INFO_LEN(obj) ((obj)->only_7bit_and_len >> 1)
9225
#define SYM_INFO_ONLY_7BIT(obj) ((obj)->only_7bit_and_len & 1)
9226

  
9227
static st_index_t
9228
sym_hash(struct sym_entry *sym)
9229
{
9230
    return rb_memhash(sym->name, SYM_INFO_LEN(sym)) ^ (SYM_INFO_ONLY_7BIT(sym) ? 0 : (VALUE)sym->enc);
9231
}
9232

  
9233
static inline int
9234
sym_comparable(struct sym_entry *sym1, struct sym_entry *sym2)
9235
{
9236
    if (SYM_INFO_LEN(sym1) == 0) return TRUE;
9237
    if (SYM_INFO_LEN(sym2) == 0) return TRUE;
9238
    if (sym1->enc == sym2->enc) return TRUE;
9239
    if (SYM_INFO_ONLY_7BIT(sym1)) {
9240
	if (SYM_INFO_ONLY_7BIT(sym2)) return TRUE;
9241
	if (rb_enc_asciicompat(sym2->enc))
9242
	    return TRUE;
9243
    }
9244
    if (SYM_INFO_ONLY_7BIT(sym2)) {
9245
	if (rb_enc_asciicompat(sym1->enc))
9246
	    return TRUE;
9247
    }
9248
    return FALSE;
9249
}
9250

  
9251
static int
9252
sym_hash_cmp(struct sym_entry *sym1, struct sym_entry *sym2)
9253
{
9254
    long len;
9255
    if (!sym_comparable(sym1, sym2)) return 1;
9256
    if (SYM_INFO_LEN(sym1) == SYM_INFO_LEN(sym2) &&
9257
	memcmp(sym1->name, sym2->name, SYM_INFO_LEN(sym1)) == 0) {
9258
	return 0;
9259
    }
9260
    return 1;
9261
}
9262

  
9219 9263
static struct symbols {
9220 9264
    ID last_id;
9221 9265
    st_table *sym_id;
......
9224 9268
    st_table *ivar2_id;
9225 9269
    st_table *id_ivar2;
9226 9270
#endif
9227
    VALUE op_sym[tLAST_TOKEN];
9271
    struct sym_entry op_sym[tLAST_TOKEN];
9228 9272
} global_symbols = {tLAST_ID};
9229 9273

  
9230 9274
static const struct st_hash_type symhash = {
9231
    rb_str_hash_cmp,
9232
    rb_str_hash,
9275
    sym_hash_cmp,
9276
    sym_hash,
9233 9277
};
9234 9278

  
9235 9279
#if ENABLE_SELECTOR_NAMESPACE
......
9275 9319
void
9276 9320
rb_gc_mark_symbols(void)
9277 9321
{
9278
    rb_mark_tbl(global_symbols.id_str);
9279
    rb_gc_mark_locations(global_symbols.op_sym,
9280
			 global_symbols.op_sym + tLAST_TOKEN);
9281 9322
}
9282 9323
#endif /* !RIPPER */
9283 9324

  
......
9416 9457
    return m == e;
9417 9458
}
9418 9459

  
9460
static inline void
9461
setup_sym_entry(struct sym_entry *sym, const char *name, long len, rb_encoding *enc)
9462
{
9463
    int ascii = rb_coderange_scan(name, len, enc) == ENC_CODERANGE_7BIT;
9464

  
9465
    sym->enc = enc;
9466
    sym->name = name;
9467
    sym->only_7bit_and_len = (len << 1) | ascii;
9468
}
9469

  
9419 9470
static ID
9420 9471
register_symid(ID id, const char *name, long len, rb_encoding *enc)
9421 9472
{
9422
    VALUE str = rb_enc_str_new(name, len, enc);
9423
    OBJ_FREEZE(str);
9424
    st_add_direct(global_symbols.sym_id, (st_data_t)str, id);
9425
    st_add_direct(global_symbols.id_str, id, (st_data_t)str);
9473
    struct sym_entry *sym = ALLOC(struct sym_entry);
9474
    char *buf = ALLOC_N(char, len + 1);
9475
    memcpy(buf, name, len);
9476
    buf[len] = 0;
9477
    setup_sym_entry(sym, buf, len, enc);
9478
    sym->name = buf;
9479

  
9480
    st_add_direct(global_symbols.sym_id, (st_data_t)sym, id);
9481
    st_add_direct(global_symbols.id_str, id, (st_data_t)sym);
9426 9482
    return id;
9427 9483
}
9428 9484

  
......
9432 9488
    const char *m = name;
9433 9489
    const char *e = m + len;
9434 9490
    unsigned char c;
9435
    VALUE str;
9436 9491
    ID id;
9437 9492
    long last;
9438 9493
    int mb;
9439
    struct RString fake_str;
9440
    fake_str.basic.flags = T_STRING|RSTRING_NOEMBED|FL_FREEZE;
9441
    fake_str.basic.klass = rb_cString;
9442
    fake_str.as.heap.len = len;
9443
    fake_str.as.heap.ptr = (char *)name;
9444
    fake_str.as.heap.aux.capa = len;
9445
    str = (VALUE)&fake_str;
9446
    rb_enc_associate(str, enc);
9447

  
9448
    if (st_lookup(global_symbols.sym_id, str, (st_data_t *)&id))
9494
    struct sym_entry sym;
9495

  
9496
    setup_sym_entry(&sym, name, len, enc);
9497

  
9498
    if (st_lookup(global_symbols.sym_id, (st_data_t)&sym, (st_data_t *)&id))
9449 9499
	return id;
9450 9500

  
9451 9501
    if (rb_cString && !rb_enc_asciicompat(enc)) {
......
9581 9631
    return id;
9582 9632
}
9583 9633

  
9584
VALUE
9585
rb_id2str(ID id)
9634
static struct sym_entry *
9635
find_sym_entry(ID id)
9586 9636
{
9587 9637
    st_data_t data;
9588 9638

  
......
9590 9640
	int i = 0;
9591 9641

  
9592 9642
	if (id < INT_MAX && rb_ispunct((int)id)) {
9593
	    VALUE str = global_symbols.op_sym[i = (int)id];
9594
	    if (!str) {
9643
	    struct sym_entry *sym = &global_symbols.op_sym[i = (int)id];
9644
	    if (!sym->name) {
9595 9645
		char name[2];
9596 9646
		name[0] = (char)id;
9597 9647
		name[1] = 0;
9598
		str = rb_usascii_str_new(name, 1);
9599
		OBJ_FREEZE(str);
9600
		global_symbols.op_sym[i] = str;
9648
		setup_sym_entry(sym, name, 1, rb_usascii_encoding());
9649
		sym->name = strdup(name);
9601 9650
	    }
9602
	    return str;
9651
	    return sym;
9603 9652
	}
9604 9653
	for (i = 0; i < op_tbl_count; i++) {
9605 9654
	    if (op_tbl[i].token == id) {
9606
		VALUE str = global_symbols.op_sym[i];
9607
		if (!str) {
9608
		    str = rb_usascii_str_new2(op_tbl[i].name);
9609
		    OBJ_FREEZE(str);
9610
		    global_symbols.op_sym[i] = str;
9655
		struct sym_entry *sym = &global_symbols.op_sym[i];
9656
		if (!sym->name) {
9657
		    const char *name = op_tbl[i].name;
9658
		    setup_sym_entry(sym, name, strlen(name), rb_usascii_encoding());
9611 9659
		}
9612
		return str;
9660
		return sym;
9613 9661
	    }
9614 9662
	}
9615 9663
    }
9616 9664

  
9617 9665
    if (st_lookup(global_symbols.id_str, id, &data)) {
9618
        VALUE str = (VALUE)data;
9619
        if (RBASIC(str)->klass == 0)
9620
            RBASIC(str)->klass = rb_cString;
9621
	return str;
9666
	return (struct sym_entry *)data;
9622 9667
    }
9623 9668

  
9624 9669
    if (is_attrset_id(id)) {
......
9633 9678
	rb_str_cat(str, "=", 1);
9634 9679
	rb_intern_str(str);
9635 9680
	if (st_lookup(global_symbols.id_str, id, &data)) {
9636
            VALUE str = (VALUE)data;
9637
            if (RBASIC(str)->klass == 0)
9638
                RBASIC(str)->klass = rb_cString;
9639
            return str;
9681
	    return (struct sym_entry *)data;
9640 9682
        }
9641 9683
    }
9642 9684
    return 0;
9643 9685
}
9644 9686

  
9687
VALUE
9688
rb_id2str(ID id)
9689
{
9690
    struct sym_entry *sym = find_sym_entry(id);
9691

  
9692
    if (!sym) return 0;
9693
    return rb_enc_str_new(sym->name, SYM_INFO_LEN(sym), sym->enc);
9694
}
9695

  
9645 9696
const char *
9646 9697
rb_id2name(ID id)
9647 9698
{
9648
    VALUE str = rb_id2str(id);
9699
    struct sym_entry *sym = find_sym_entry(id);
9649 9700

  
9650
    if (!str) return 0;
9651
    return RSTRING_PTR(str);
9701
    if (!sym) return 0;
9702
    return sym->name;
9652 9703
}
9653 9704

  
9654 9705
static int
string.c
174 174
    return NULL;
175 175
}
176 176

  
177
static int
178
coderange_scan(const char *p, long len, rb_encoding *enc)
177
int
178
rb_coderange_scan(const char *p, long len, rb_encoding *enc)
179 179
{
180 180
    const char *e = p + len;
181 181

  
......
324 324

  
325 325
    if (cr == ENC_CODERANGE_UNKNOWN) {
326 326
	rb_encoding *enc = STR_ENC_GET(str);
327
        cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
327
        cr = rb_coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
328 328
        ENC_CODERANGE_SET(str, cr);
329 329
    }
330 330
    return cr;
......
1815 1815
            ptr_cr = ENC_CODERANGE_UNKNOWN;
1816 1816
        }
1817 1817
        else if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
1818
            ptr_cr = coderange_scan(ptr, len, rb_enc_from_index(ptr_encindex));
1818
            ptr_cr = rb_coderange_scan(ptr, len, rb_enc_from_index(ptr_encindex));
1819 1819
        }
1820 1820
    }
1821 1821
    else {
......
1832 1832
            goto incompatible;
1833 1833
        }
1834 1834
	if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
1835
	    ptr_cr = coderange_scan(ptr, len, ptr_enc);
1835
	    ptr_cr = rb_coderange_scan(ptr, len, ptr_enc);
1836 1836
	}
1837 1837
        if (str_cr == ENC_CODERANGE_UNKNOWN) {
1838 1838
            if (str_a8 || ptr_cr != ENC_CODERANGE_7BIT) {
......
3460 3460
        if (!enc) {
3461 3461
            rb_encoding *str_enc = STR_ENC_GET(str);
3462 3462
	    p = RSTRING_PTR(str); len = RSTRING_LEN(str);
3463
	    if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
3464
		coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
3463
	    if (rb_coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
3464
		rb_coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
3465 3465
                rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
3466 3466
			 rb_enc_name(str_enc),
3467 3467
			 rb_enc_name(STR_ENC_GET(repl)));