Feature #13873
closedOptimize Dir.glob with FNM_EXTGLOB
Description
Rails often queries Dir.glob with complex pattern.
On accessing Rails site, it calls glob as follows on querying templates.
Note that Dir[] insists File::FNM_EXTGLOB.
def find_template_paths(query)
Dir[query].uniq.reject do |filename|
File.directory?(filename) ||
# deals with case-insensitive file systems.
!File.fnmatch(query, filename, File::FNM_EXTGLOB)
end
end
For example on accessing index page of Post Model (/posts), it calls following glob:
Dir["/home/naruse/foo/app/views/posts/index{.en,}{.html,.text,.js,.css,.ics,.csv,.vcf,.png,.jpeg,.gif,.bmp,.tiff,.svg,.mpeg,.xml,.rss,.atom,.yaml,.multipart_form,.url_encoded_form,.json,.pdf,.zip,.gzip,}{}{.raw,.erb,.html,.builder,.ruby,.coffee,.jbuilder,}"]'
It calls many stat as follows:
% sudo strace ./miniruby58882 -e'Dir["/home/naruse/foo/app/views/posts/index{.en,}{.html,.text,.js,.css,.ics,.csv,.vcf,.png,.jpeg,.gif,.bmp,.tiff,.svg,.mpeg,.xml,.rss,.atom,.yaml,.multipart_form,.url_encoded_form,.json,.pdf,.zip,.gzip,}{}{.raw,.erb,.html,.builder,.ruby,.coffee,.jbuilder,}"]'|&grep newfstatat|wc -l
400
Because current glob implementation handles braces in early stage,
then calls stat for each combination of brace patterns instead of using readdir results.
Below is a patch to use readdir instead of hundreds of syscalls.
Note that this patch changes the order of the result as follows:
1) Failure:
TestDir#test_glob [/home/naruse/ruby/test/ruby/test_dir.rb:157]:
<["/tmp/test-all/__test_dir__20170906-8620-ka8yqu/}}{}",
"/tmp/test-all/__test_dir__20170906-8620-ka8yqu/}}a"]> expected but was
<["/tmp/test-all/__test_dir__20170906-8620-ka8yqu/}}a",
"/tmp/test-all/__test_dir__20170906-8620-ka8yqu/}}{}"]>.
diff --git a/dir.c b/dir.c
index b7afaec4e0..7ca84fa8c5 100644
--- a/dir.c
+++ b/dir.c
@@ -290,6 +290,8 @@ bracket(
#define UNESCAPE(p) (escape && *(p) == '\\' ? (p) + 1 : (p))
#define ISEND(p) (!*(p) || (pathname && *(p) == '/'))
#define RETURN(val) return *pcur = p, *scur = s, (val);
+#define FNMATCH_ALLOC_N(type, n) ((type *)malloc(sizeof(type) * (n)))
+#define FNMATCH_FREE(ptr) free(ptr)
static int
fnmatch_helper(
@@ -348,6 +350,56 @@ fnmatch_helper(
}
goto failed;
}
+
+ case '{': {
+ size_t len = pend - p;
+ char *buf = FNMATCH_ALLOC_N(char, len);
+ const char *rbrace = NULL;
+ while (p < pend) {
+ const char *t = ++p;
+ int nest = 0;
+ while (p < pend && !(*p == ',' && nest == 0)) {
+ if (*p == '{') nest++;
+ if (*p == '}') {
+ if (nest == 0) {
+ if (!rbrace) rbrace = p;
+ goto rest;
+ }
+ nest--;
+ }
+ if (*p == '\\' && escape) {
+ if (++p >= pend) break;
+ }
+ Inc(p, pend, enc);
+ }
+ if (!rbrace) {
+ rbrace = p;
+ while (rbrace < pend && !(*rbrace == '}' && nest == 0)) {
+ if (*rbrace == '{') nest++;
+ if (*rbrace == '}') nest--;
+ if (*rbrace == '\\' && escape) {
+ if (++p >= pend) break;
+ }
+ Inc(rbrace, pend, enc);
+ }
+ }
+rest:
+ memcpy(buf, t, p-t);
+ buf[p-t]=0;
+ strlcpy(buf+(p-t), rbrace+1, len-(p-t));
+ {
+ const char *pp = buf, *ss = s;
+ r = fnmatch_helper((const char **)&pp, &ss, flags|FNM_DOTMATCH, enc);
+ }
+ if (r == 0) {
+ p = buf;
+ FNMATCH_FREE(buf);
+ RETURN(0);
+ }
+ if (p >= rbrace) break;
+ }
+ FNMATCH_FREE(buf);
+ }
}
/* ordinary */
@@ -1426,6 +1478,12 @@ has_magic(const char *p, const char *pend, int flags, rb_encoding *enc)
case '[':
return MAGICAL;
+ case '{':
+ if (flags & FNM_EXTGLOB) {
+ return MAGICAL;
+ }
+ break;
+
case '\\':
if (escape && p++ >= pend)
continue;
@@ -2272,6 +2330,13 @@ push_pattern(const char *path, VALUE ary, void *enc)
rb_ary_push(ary, name);
}
+struct push_glob_args {
+ struct glob_args glob;
+ int flags;
+ int fd;
+};
+static int push_caller(const char *path, VALUE val, void *enc);
+
static int
ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg,
rb_encoding *enc, VALUE var)
@@ -2280,7 +2345,7 @@ ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg,
const char *p = str;
const char *pend = p + strlen(p);
const char *s = p;
- const char *lbrace = 0, *rbrace = 0;
+ const char *lbrace = NULL, *rbrace = NULL;
int nest = 0, status = 0;
while (*p) {
@@ -2299,9 +2364,18 @@ ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg,
if (lbrace && rbrace) {
size_t len = strlen(s) + 1;
- char *buf = GLOB_ALLOC_N(char, len);
+ char *buf;
long shift;
+ if (func == push_caller && !strchr(lbrace, '/')) {
+ /* Now it reaches file basename entry. */
+ /* Handle braces in glob_helper */
+ struct push_glob_args *a = (struct push_glob_args *)arg;
+ a->flags |= FNM_EXTGLOB;
+ return glob_call_func(func, s, arg, enc);
+ }
+
+ buf = GLOB_ALLOC_N(char, len);
if (!buf) return -1;
memcpy(buf, s, lbrace-s);
shift = (lbrace-s);
@@ -2365,12 +2439,6 @@ ruby_brace_glob(const char *str, int flags, ruby_glob_func *func, VALUE arg)
return ruby_brace_glob_with_enc(str, flags, func, arg, rb_ascii8bit_encoding());
}
-struct push_glob_args {
- struct glob_args glob;
- int flags;
- int fd;
-};
-
static int
push_caller(const char *path, VALUE val, void *enc)
{
Updated by naruse (Yui NARUSE) over 7 years ago
- Related to Bug #13167: Dir.glob is 25x slower since Ruby 2.2 added
Updated by matz (Yukihiro Matsumoto) over 7 years ago
Agreed.
Matz
Updated by naruse (Yui NARUSE) over 7 years ago
- Status changed from Open to Closed
Applied in changeset trunk|r60253.
Dir.glob with FNM_EXTGLOB is optimized [Feature #13873]
The order of resulted array is changed in some cases.