Feature #1831 ยป ruby-core-24593.patch
include/ruby/oniguruma.h | ||
---|---|---|
/* syntax (behavior) warning */
|
||
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
|
||
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
|
||
#define ONIG_SYN_WARN_CC_DUP (1U<<26) /* [aa] */
|
||
/* meta character specifiers (onig_set_meta_char()) */
|
||
#define ONIG_META_CHAR_ESCAPE 0
|
regparse.c | ||
---|---|---|
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
|
||
ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
|
||
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
|
||
ONIG_SYN_WARN_CC_DUP |
|
||
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
|
||
, ONIG_OPTION_NONE
|
||
,
|
||
... | ... | |
onig_verb_warn = f;
|
||
}
|
||
static void CC_DUP_WARN(ScanEnv *env);
|
||
static void
|
||
bbuf_free(BBuf* bbuf)
|
||
{
|
||
... | ... | |
(OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
|
||
#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
|
||
add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
|
||
add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
|
||
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
|
||
if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
|
||
... | ... | |
} while (0)
|
||
#define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
|
||
if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \
|
||
BS_ROOM(bs, pos) |= BS_BIT(pos); \
|
||
} while (0)
|
||
#define BITSET_IS_EMPTY(bs,empty) do {\
|
||
int i;\
|
||
empty = 1;\
|
||
... | ... | |
} while (0)
|
||
static void
|
||
bitset_set_range(BitSetRef bs, int from, int to)
|
||
bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
|
||
{
|
||
int i;
|
||
for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
|
||
BITSET_SET_BIT(bs, i);
|
||
BITSET_SET_BIT_CHKDUP(bs, i);
|
||
}
|
||
}
|
||
... | ... | |
}
|
||
static int
|
||
add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
|
||
add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
|
||
{
|
||
int r, inc_n, pos;
|
||
int low, high, bound, x;
|
||
... | ... | |
if (inc_n != 1) {
|
||
if (from > data[low*2])
|
||
from = data[low*2];
|
||
else CC_DUP_WARN(env);
|
||
if (to < data[(high - 1)*2 + 1])
|
||
to = data[(high - 1)*2 + 1];
|
||
else CC_DUP_WARN(env);
|
||
}
|
||
if (inc_n != 0 && (OnigCodePoint )high < n) {
|
||
... | ... | |
return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
|
||
}
|
||
return add_code_range_to_buf(pbuf, from, to);
|
||
return add_code_range_to_buf(pbuf, env, from, to);
|
||
}
|
||
static int
|
||
not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
|
||
not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env)
|
||
{
|
||
int r, i, n;
|
||
OnigCodePoint pre, from, *data, to = 0;
|
||
... | ... | |
from = data[i*2];
|
||
to = data[i*2+1];
|
||
if (pre <= from - 1) {
|
||
r = add_code_range_to_buf(pbuf, pre, from - 1);
|
||
r = add_code_range_to_buf(pbuf, env, pre, from - 1);
|
||
if (r != 0) return r;
|
||
}
|
||
if (to == ~((OnigCodePoint )0)) break;
|
||
pre = to + 1;
|
||
}
|
||
if (to < ~((OnigCodePoint )0)) {
|
||
r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
|
||
r = add_code_range_to_buf(pbuf, env, to + 1, ~((OnigCodePoint )0));
|
||
}
|
||
return r;
|
||
}
|
||
... | ... | |
static int
|
||
or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
|
||
BBuf* bbuf2, int not2, BBuf** pbuf)
|
||
BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
|
||
{
|
||
int r;
|
||
OnigCodePoint i, n1, *data1;
|
||
... | ... | |
return bbuf_clone(pbuf, bbuf2);
|
||
}
|
||
else {
|
||
return not_code_range_buf(enc, bbuf2, pbuf);
|
||
return not_code_range_buf(enc, bbuf2, pbuf, env);
|
||
}
|
||
}
|
||
}
|
||
... | ... | |
r = bbuf_clone(pbuf, bbuf2);
|
||
}
|
||
else if (not1 == 0) { /* 1 OR (not 2) */
|
||
r = not_code_range_buf(enc, bbuf2, pbuf);
|
||
r = not_code_range_buf(enc, bbuf2, pbuf, env);
|
||
}
|
||
if (r != 0) return r;
|
||
for (i = 0; i < n1; i++) {
|
||
from = data1[i*2];
|
||
to = data1[i*2+1];
|
||
r = add_code_range_to_buf(pbuf, from, to);
|
||
r = add_code_range_to_buf(pbuf, env, from, to);
|
||
if (r != 0) return r;
|
||
}
|
||
return 0;
|
||
}
|
||
static int
|
||
and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
|
||
and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,
|
||
OnigCodePoint* data, int n)
|
||
{
|
||
int i, r;
|
||
... | ... | |
else if (from2 <= to1) {
|
||
if (to2 < to1) {
|
||
if (from1 <= from2 - 1) {
|
||
r = add_code_range_to_buf(pbuf, from1, from2-1);
|
||
r = add_code_range_to_buf(pbuf, env, from1, from2-1);
|
||
if (r != 0) return r;
|
||
}
|
||
from1 = to2 + 1;
|
||
... | ... | |
if (from1 > to1) break;
|
||
}
|
||
if (from1 <= to1) {
|
||
r = add_code_range_to_buf(pbuf, from1, to1);
|
||
r = add_code_range_to_buf(pbuf, env, from1, to1);
|
||
if (r != 0) return r;
|
||
}
|
||
return 0;
|
||
}
|
||
static int
|
||
and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
|
||
and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
|
||
{
|
||
int r;
|
||
OnigCodePoint i, j, n1, n2, *data1, *data2;
|
||
... | ... | |
if (to2 < from1) continue;
|
||
from = MAX(from1, from2);
|
||
to = MIN(to1, to2);
|
||
r = add_code_range_to_buf(pbuf, from, to);
|
||
r = add_code_range_to_buf(pbuf, env, from, to);
|
||
if (r != 0) return r;
|
||
}
|
||
}
|
||
... | ... | |
for (i = 0; i < n1; i++) {
|
||
from1 = data1[i*2];
|
||
to1 = data1[i*2+1];
|
||
r = and_code_range1(pbuf, from1, to1, data2, n2);
|
||
r = and_code_range1(pbuf, env, from1, to1, data2, n2);
|
||
if (r != 0) return r;
|
||
}
|
||
}
|
||
... | ... | |
}
|
||
static int
|
||
and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
|
||
and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
|
||
{
|
||
OnigEncoding enc = env->enc;
|
||
int r, not1, not2;
|
||
BBuf *buf1, *buf2, *pbuf;
|
||
BitSetRef bsr1, bsr2;
|
||
... | ... | |
if (! ONIGENC_IS_SINGLEBYTE(enc)) {
|
||
if (not1 != 0 && not2 != 0) {
|
||
r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
|
||
r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);
|
||
}
|
||
else {
|
||
r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
|
||
r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);
|
||
if (r == 0 && not1 != 0) {
|
||
BBuf *tbuf;
|
||
r = not_code_range_buf(enc, pbuf, &tbuf);
|
||
r = not_code_range_buf(enc, pbuf, &tbuf, env);
|
||
if (r != 0) {
|
||
bbuf_free(pbuf);
|
||
return r;
|
||
... | ... | |
}
|
||
static int
|
||
or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
|
||
or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
|
||
{
|
||
OnigEncoding enc = env->enc;
|
||
int r, not1, not2;
|
||
BBuf *buf1, *buf2, *pbuf;
|
||
BitSetRef bsr1, bsr2;
|
||
... | ... | |
if (! ONIGENC_IS_SINGLEBYTE(enc)) {
|
||
if (not1 != 0 && not2 != 0) {
|
||
r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
|
||
r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);
|
||
}
|
||
else {
|
||
r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
|
||
r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);
|
||
if (r == 0 && not1 != 0) {
|
||
BBuf *tbuf;
|
||
r = not_code_range_buf(enc, pbuf, &tbuf);
|
||
r = not_code_range_buf(enc, pbuf, &tbuf, env);
|
||
if (r != 0) {
|
||
bbuf_free(pbuf);
|
||
return r;
|
||
... | ... | |
}
|
||
}
|
||
static void
|
||
CC_DUP_WARN(ScanEnv *env)
|
||
{
|
||
UChar buf[WARN_BUFSIZE];
|
||
if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
|
||
if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_DUP) &&
|
||
!((env)->warnings_flag & ONIG_SYN_WARN_CC_DUP)) {
|
||
(env)->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
|
||
onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
|
||
env->pattern, env->pattern_end,
|
||
(UChar* )"character class has duplicated range");
|
||
(*onig_warn)((char* )buf);
|
||
}
|
||
}
|
||
static UChar*
|
||
find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
|
||
UChar **next, OnigEncoding enc)
|
||
... | ... | |
static int
|
||
add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
|
||
OnigEncoding enc ARG_UNUSED,
|
||
ScanEnv* env,
|
||
OnigCodePoint sb_out, const OnigCodePoint mbr[])
|
||
{
|
||
int i, r;
|
||
... | ... | |
if (j >= sb_out) {
|
||
if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++;
|
||
else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
|
||
r = add_code_range_to_buf(&(cc->mbuf), j,
|
||
r = add_code_range_to_buf(&(cc->mbuf), env, j,
|
||
ONIGENC_CODE_RANGE_TO(mbr, i));
|
||
if (r != 0) return r;
|
||
i++;
|
||
... | ... | |
goto sb_end;
|
||
}
|
||
BITSET_SET_BIT(cc->bs, j);
|
||
BITSET_SET_BIT_CHKDUP(cc->bs, j);
|
||
}
|
||
}
|
||
sb_end:
|
||
for ( ; i < n; i++) {
|
||
r = add_code_range_to_buf(&(cc->mbuf),
|
||
r = add_code_range_to_buf(&(cc->mbuf), env,
|
||
ONIGENC_CODE_RANGE_FROM(mbr, i),
|
||
ONIGENC_CODE_RANGE_TO(mbr, i));
|
||
if (r != 0) return r;
|
||
... | ... | |
if (j >= sb_out) {
|
||
goto sb_end2;
|
||
}
|
||
BITSET_SET_BIT(cc->bs, j);
|
||
BITSET_SET_BIT_CHKDUP(cc->bs, j);
|
||
}
|
||
prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
|
||
}
|
||
for (j = prev; j < sb_out; j++) {
|
||
BITSET_SET_BIT(cc->bs, j);
|
||
BITSET_SET_BIT_CHKDUP(cc->bs, j);
|
||
}
|
||
sb_end2:
|
||
... | ... | |
for (i = 0; i < n; i++) {
|
||
if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
|
||
r = add_code_range_to_buf(&(cc->mbuf), prev,
|
||
r = add_code_range_to_buf(&(cc->mbuf), env, prev,
|
||
ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
|
||
if (r != 0) return r;
|
||
}
|
||
prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
|
||
}
|
||
if (prev < 0x7fffffff) {
|
||
r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
|
||
r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);
|
||
if (r != 0) return r;
|
||
}
|
||
}
|
||
... | ... | |
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
|
||
if (r == 0) {
|
||
return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
|
||
return add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
|
||
}
|
||
else if (r != ONIG_NO_SUPPORT_CONFIG) {
|
||
return r;
|
||
... | ... | |
if (not != 0) {
|
||
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
|
||
if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
|
||
BITSET_SET_BIT(cc->bs, c);
|
||
BITSET_SET_BIT_CHKDUP(cc->bs, c);
|
||
}
|
||
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
|
||
}
|
||
else {
|
||
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
|
||
if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
|
||
BITSET_SET_BIT(cc->bs, c);
|
||
BITSET_SET_BIT_CHKDUP(cc->bs, c);
|
||
}
|
||
}
|
||
break;
|
||
... | ... | |
if (not != 0) {
|
||
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
|
||
if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
|
||
BITSET_SET_BIT(cc->bs, c);
|
||
BITSET_SET_BIT_CHKDUP(cc->bs, c);
|
||
}
|
||
}
|
||
else {
|
||
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
|
||
if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
|
||
BITSET_SET_BIT(cc->bs, c);
|
||
BITSET_SET_BIT_CHKDUP(cc->bs, c);
|
||
}
|
||
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
|
||
}
|
||
... | ... | |
case ONIGENC_CTYPE_WORD:
|
||
if (not == 0) {
|
||
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
|
||
if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
|
||
if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
|
||
}
|
||
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
|
||
}
|
||
... | ... | |
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
|
||
if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
|
||
&& ! ONIGENC_IS_CODE_WORD(enc, c))
|
||
BITSET_SET_BIT(cc->bs, c);
|
||
BITSET_SET_BIT_CHKDUP(cc->bs, c);
|
||
}
|
||
}
|
||
break;
|
||
... | ... | |
if (*state == CCS_VALUE && *type != CCV_CLASS) {
|
||
if (*type == CCV_SB)
|
||
BITSET_SET_BIT(cc->bs, (int )(*vs));
|
||
BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
|
||
else if (*type == CCV_CODE_POINT) {
|
||
r = add_code_range(&(cc->mbuf), env, *vs, *vs);
|
||
if (r < 0) return r;
|
||
... | ... | |
switch (*state) {
|
||
case CCS_VALUE:
|
||
if (*type == CCV_SB)
|
||
BITSET_SET_BIT(cc->bs, (int )(*vs));
|
||
BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
|
||
else if (*type == CCV_CODE_POINT) {
|
||
r = add_code_range(&(cc->mbuf), env, *vs, *vs);
|
||
if (r < 0) return r;
|
||
... | ... | |
else
|
||
return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
|
||
}
|
||
bitset_set_range(cc->bs, (int )*vs, (int )v);
|
||
bitset_set_range(env, cc->bs, (int )*vs, (int )v);
|
||
}
|
||
else {
|
||
r = add_code_range(&(cc->mbuf), env, *vs, v);
|
||
... | ... | |
else
|
||
return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
|
||
}
|
||
bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
|
||
bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
|
||
r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
|
||
if (r < 0) return r;
|
||
#if 0
|
||
... | ... | |
r = parse_char_class(&anode, tok, &p, end, env);
|
||
if (r != 0) goto cc_open_err;
|
||
acc = NCCLASS(anode);
|
||
r = or_cclass(cc, acc, env->enc);
|
||
r = or_cclass(cc, acc, env);
|
||
onig_node_free(anode);
|
||
cc_open_err:
|
||
... | ... | |
state = CCS_START;
|
||
if (IS_NOT_NULL(prev_cc)) {
|
||
r = and_cclass(prev_cc, cc, env->enc);
|
||
r = and_cclass(prev_cc, cc, env);
|
||
if (r != 0) goto err;
|
||
bbuf_free(cc->mbuf);
|
||
}
|
||
... | ... | |
}
|
||
if (IS_NOT_NULL(prev_cc)) {
|
||
r = and_cclass(prev_cc, cc, env->enc);
|
||
r = and_cclass(prev_cc, cc, env);
|
||
if (r != 0) goto err;
|
||
bbuf_free(cc->mbuf);
|
||
cc = prev_cc;
|
||
... | ... | |
if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
|
||
if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
|
||
BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
|
||
BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE);
|
||
else
|
||
add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
|
||
}
|
||
... | ... | |
add_code_range(&(cc->mbuf), env, *to, *to);
|
||
}
|
||
else {
|
||
BITSET_SET_BIT(bs, *to);
|
||
BITSET_SET_BIT_CHKDUP(bs, *to);
|
||
}
|
||
}
|
||
#else
|
||
... | ... | |
BITSET_CLEAR_BIT(bs, *to);
|
||
}
|
||
else
|
||
BITSET_SET_BIT(bs, *to);
|
||
BITSET_SET_BIT_CHKDUP(bs, *to);
|
||
}
|
||
}
|
||
#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
|
regparse.h | ||
---|---|---|
int curr_max_regnum;
|
||
int has_recursion;
|
||
#endif
|
||
int warnings_flag;
|
||
} ScanEnv;
|
||