From 549432978fb92f790f356aacc54ce2c04752f469 Mon Sep 17 00:00:00 2001 From: Hiroshi Shirosaki Date: Mon, 11 Sep 2017 17:22:34 +0900 Subject: [PATCH] dir.c: performance fix with braces Braces were expended before ruby_glob0(). This caused to call replace_real_basename() for same plain patterns repeatedly. Move blace expansion into glob_helper() in ruby_glob0() to reduce replace_real_basename() call. This fix changes the order of glob results. --- dir.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 138 insertions(+), 30 deletions(-) diff --git a/dir.c b/dir.c index 71eb935..c9b7689 100644 --- a/dir.c +++ b/dir.c @@ -1527,7 +1527,7 @@ do_opendir(const int basefd, const char *path, int flags, rb_encoding *enc, } /* Globing pattern */ -enum glob_pattern_type { PLAIN, ALPHA, MAGICAL, RECURSIVE, MATCH_ALL, MATCH_DIR }; +enum glob_pattern_type { PLAIN, ALPHA, BRACE, MAGICAL, RECURSIVE, MATCH_ALL, MATCH_DIR }; /* Return nonzero if S has any special globbing chars in it. */ static enum glob_pattern_type @@ -1535,15 +1535,20 @@ has_magic(const char *p, const char *pend, int flags, rb_encoding *enc) { const int escape = !(flags & FNM_NOESCAPE); int hasalpha = 0; + int hasmagical = 0; register char c; while (p < pend && (c = *p++) != 0) { switch (c) { + case '{': + return BRACE; + case '*': case '?': case '[': - return MAGICAL; + hasmagical = 1; + break; case '\\': if (escape && p++ >= pend) @@ -1568,7 +1573,7 @@ has_magic(const char *p, const char *pend, int flags, rb_encoding *enc) p = Next(p-1, pend, enc); } - return hasalpha ? ALPHA : PLAIN; + return hasmagical ? MAGICAL : hasalpha ? ALPHA : PLAIN; } /* Find separator in globbing pattern. */ @@ -1589,6 +1594,13 @@ find_dirsep(const char *p, const char *pend, int flags, rb_encoding *enc) open = 0; continue; + case '{': + open = 1; + continue; + case '}': + open = 0; + continue; + case '/': if (!open) return (char *)p-1; @@ -1664,6 +1676,21 @@ glob_make_pattern(const char *p, const char *e, int flags, rb_encoding *enc) const enum glob_pattern_type non_magic = (USE_NAME_ON_FS || FNM_SYSCASE) ? PLAIN : ALPHA; char *buf; + if (magic == BRACE) { + /* brace pattern is parsed after expansion */ + buf = GLOB_ALLOC_N(char, e-p+1); + if (!buf) { + GLOB_FREE(tmp); + goto error; + } + memcpy(buf, p, e-p); + buf[e-p] = '\0'; + tmp->type = BRACE; + tmp->str = buf; + *tail = tmp; + tmp->next = 0; + return list; + } if (!(FNM_SYSCASE || magic > non_magic) && !recursive && *m) { const char *m2; while (has_magic(m+1, m2 = find_dirsep(m+1, e, flags, enc), flags, enc) <= non_magic && @@ -1994,6 +2021,58 @@ dirent_match(const char *pat, rb_encoding *enc, const char *name, const struct d return 0; } +struct push_glob_args { + int fd; + const char *path; + size_t baselen; + size_t namelen; + int dirsep; /* '/' should be placed before appending child entry's name to 'path'. */ + rb_pathtype_t pathtype; /* type of 'path' */ + int flags; + const ruby_glob_funcs_t *funcs; + VALUE arg; +}; + +struct dirent_brace_args { + const char *name; + const struct dirent *dp; + int flags; +}; + +static int +dirent_match_brace(const char *pattern, VALUE val, void *enc) +{ + struct dirent_brace_args *arg = (struct dirent_brace_args *)val; + + return dirent_match(pattern, enc, arg->name, arg->dp, arg->flags); +} + +/* join paths from pattern list of glob_make_pattern() */ +static const char* +join_path_from_pattern(struct glob_pattern **beg) +{ + struct glob_pattern *p; + const char *path = ""; + + for (p = *beg; p; p = p->next) { + const char *str; + switch (p->type) { + case RECURSIVE: + str = "**"; + break; + default: + str = p->str; + } + path = join_path(path, strlen(path), (p != *beg), str, strlen(str)); + } + return path; +} + +static int push_caller(const char *path, VALUE val, void *enc); + +static int ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg, + rb_encoding *enc, VALUE var); + static int glob_helper( int fd, @@ -2012,7 +2091,7 @@ glob_helper( struct stat st; int status = 0; struct glob_pattern **cur, **new_beg, **new_end; - int plain = 0, magical = 0, recursive = 0, match_all = 0, match_dir = 0; + int plain = 0, brace = 0, magical = 0, recursive = 0, match_all = 0, match_dir = 0; int escape = !(flags & FNM_NOESCAPE); size_t pathlen = baselen + namelen; const char *base = path; @@ -2036,6 +2115,9 @@ glob_helper( magical = 1; #endif break; + case BRACE: + brace = 1; + break; case MAGICAL: magical = 2; break; @@ -2050,6 +2132,20 @@ glob_helper( } } + if (brace) { + struct push_glob_args args; + const char* brace_path = join_path_from_pattern(beg); + args.fd = fd; + args.path = path; + args.baselen = baselen; + args.namelen = namelen; + args.dirsep = dirsep; + args.flags = flags; + args.funcs = funcs; + args.arg = arg; + return ruby_brace_expand(brace_path, flags, push_caller, (VALUE)&args, enc, Qfalse); + } + if (*base) { if (match_all && pathtype == path_unknown) { if (do_lstat(fd, base, &st, flags, enc) == 0) { @@ -2186,6 +2282,7 @@ glob_helper( for (cur = beg; cur < end; ++cur) { struct glob_pattern *p = *cur; + struct dirent_brace_args args; if (p->type == RECURSIVE) { if (new_pathtype == path_directory || /* not symlink but real directory */ new_pathtype == path_exist) { @@ -2195,6 +2292,14 @@ glob_helper( p = p->next; /* 0 times recursion */ } switch (p->type) { + case BRACE: + args.name = name; + args.dp = dp; + args.flags = flags; + if (ruby_brace_expand(p->str, flags, dirent_match_brace, + (VALUE)&args, enc, Qfalse) > 0) + *new_end++ = p->next; + break; case ALPHA: # if USE_NAME_ON_FS == USE_NAME_ON_FS_BY_FNMATCH if (plainname) { @@ -2293,6 +2398,24 @@ glob_helper( } static int +push_caller(const char *path, VALUE val, void *enc) +{ + struct push_glob_args *arg = (struct push_glob_args *)val; + struct glob_pattern *list; + int status; + + list = glob_make_pattern(path, path + strlen(path), arg->flags, enc); + if (!list) { + return -1; + } + status = glob_helper(arg->fd, arg->path, arg->baselen, arg->namelen, arg->dirsep, + arg->pathtype, &list, &list + 1, arg->flags, arg->funcs, + arg->arg, enc); + glob_free_pattern(list); + return status; +} + +static int ruby_glob0(const char *path, int fd, const char *base, int flags, const ruby_glob_funcs_t *funcs, VALUE arg, rb_encoding *enc) @@ -2484,25 +2607,11 @@ ruby_brace_glob(const char *str, int flags, ruby_glob_func *func, VALUE arg) return ruby_brace_glob_with_enc(str, flags, func, arg, rb_ascii8bit_encoding()); } -struct push_glob_args { - struct glob_args glob; - int flags; - int fd; -}; - -static int -push_caller(const char *path, VALUE val, void *enc) -{ - struct push_glob_args *arg = (struct push_glob_args *)val; - - return ruby_glob0(path, arg->fd, arg->glob.base, arg->flags, &rb_glob_funcs, - (VALUE)&arg->glob, enc); -} - static int push_glob(VALUE ary, VALUE str, VALUE base, int flags) { - struct push_glob_args args; + struct glob_args args; + int fd; rb_encoding *enc = rb_enc_get(str); #if defined _WIN32 || defined __APPLE__ @@ -2513,30 +2622,29 @@ push_glob(VALUE ary, VALUE str, VALUE base, int flags) if (rb_enc_to_index(enc) == ENCINDEX_US_ASCII) enc = rb_ascii8bit_encoding(); flags |= GLOB_VERBOSE; - args.glob.func = push_pattern; - args.glob.value = ary; - args.glob.enc = enc; - args.glob.base = 0; - args.flags = flags; - args.fd = AT_FDCWD; + args.func = push_pattern; + args.value = ary; + args.enc = enc; + args.base = 0; + fd = AT_FDCWD; if (!NIL_P(base)) { if (!RB_TYPE_P(base, T_STRING) || !rb_enc_check(str, base)) { struct dir_data *dirp = DATA_PTR(base); if (!dirp->dir) dir_closed(); #ifdef HAVE_DIRFD - if ((args.fd = dirfd(dirp->dir)) == -1) + if ((fd = dirfd(dirp->dir)) == -1) rb_sys_fail_path(dir_inspect(base)); #endif base = dirp->path; } - args.glob.base = RSTRING_PTR(base); + args.base = RSTRING_PTR(base); } #if defined _WIN32 || defined __APPLE__ enc = rb_utf8_encoding(); #endif - return ruby_brace_expand(RSTRING_PTR(str), flags, - push_caller, (VALUE)&args, enc, str); + return ruby_glob0(RSTRING_PTR(str), fd, args.base, flags, &rb_glob_funcs, + (VALUE)&args, enc); } static VALUE -- 2.7.4