Project

General

Profile

Actions

Feature #13873

closed

Optimize Dir.glob with FNM_EXTGLOB

Added by naruse (Yui NARUSE) over 6 years ago. Updated over 6 years ago.

Status:
Closed
Assignee:
-
Target version:
-
[ruby-core:82660]

Description

Rails often queries Dir.glob with complex pattern.

On accessing Rails site, it calls glob as follows on querying templates.
Note that Dir[] insists File::FNM_EXTGLOB.

      def find_template_paths(query)
        Dir[query].uniq.reject do |filename|
          File.directory?(filename) ||
            # deals with case-insensitive file systems.
            !File.fnmatch(query, filename, File::FNM_EXTGLOB)
        end
      end

https://github.com/rails/rails/blob/6f1c18308ebffc97d51440cdeed7be71de58f26a/actionview/lib/action_view/template/resolver.rb#L247

For example on accessing index page of Post Model (/posts), it calls following glob:
Dir["/home/naruse/foo/app/views/posts/index{.en,}{.html,.text,.js,.css,.ics,.csv,.vcf,.png,.jpeg,.gif,.bmp,.tiff,.svg,.mpeg,.xml,.rss,.atom,.yaml,.multipart_form,.url_encoded_form,.json,.pdf,.zip,.gzip,}{}{.raw,.erb,.html,.builder,.ruby,.coffee,.jbuilder,}"]'

It calls many stat as follows:

% sudo strace ./miniruby58882 -e'Dir["/home/naruse/foo/app/views/posts/index{.en,}{.html,.text,.js,.css,.ics,.csv,.vcf,.png,.jpeg,.gif,.bmp,.tiff,.svg,.mpeg,.xml,.rss,.atom,.yaml,.multipart_form,.url_encoded_form,.json,.pdf,.zip,.gzip,}{}{.raw,.erb,.html,.builder,.ruby,.coffee,.jbuilder,}"]'|&grep newfstatat|wc -l
400

Because current glob implementation handles braces in early stage,
then calls stat for each combination of brace patterns instead of using readdir results.

Below is a patch to use readdir instead of hundreds of syscalls.
Note that this patch changes the order of the result as follows:

  1) Failure:
TestDir#test_glob [/home/naruse/ruby/test/ruby/test_dir.rb:157]:
<["/tmp/test-all/__test_dir__20170906-8620-ka8yqu/}}{}",
 "/tmp/test-all/__test_dir__20170906-8620-ka8yqu/}}a"]> expected but was
<["/tmp/test-all/__test_dir__20170906-8620-ka8yqu/}}a",
 "/tmp/test-all/__test_dir__20170906-8620-ka8yqu/}}{}"]>.
diff --git a/dir.c b/dir.c
index b7afaec4e0..7ca84fa8c5 100644
--- a/dir.c
+++ b/dir.c
@@ -290,6 +290,8 @@ bracket(
 #define UNESCAPE(p) (escape && *(p) == '\\' ? (p) + 1 : (p))
 #define ISEND(p) (!*(p) || (pathname && *(p) == '/'))
 #define RETURN(val) return *pcur = p, *scur = s, (val);
+#define FNMATCH_ALLOC_N(type, n) ((type *)malloc(sizeof(type) * (n)))
+#define FNMATCH_FREE(ptr) free(ptr)
 
 static int
 fnmatch_helper(
@@ -348,6 +350,56 @@ fnmatch_helper(
 	    }
 	    goto failed;
 	  }
+
+	  case '{': {
+	    size_t len = pend - p;
+	    char *buf = FNMATCH_ALLOC_N(char, len);
+	    const char *rbrace = NULL;
+	    while (p < pend) {
+		const char *t = ++p;
+		int nest = 0;
+		while (p < pend && !(*p == ',' && nest == 0)) {
+		    if (*p == '{') nest++;
+		    if (*p == '}') {
+			if (nest == 0) {
+			    if (!rbrace) rbrace = p;
+			    goto rest;
+			}
+			nest--;
+		    }
+		    if (*p == '\\' && escape) {
+			if (++p >= pend) break;
+		    }
+		    Inc(p, pend, enc);
+		}
+		if (!rbrace) {
+		    rbrace = p;
+		    while (rbrace < pend && !(*rbrace == '}' && nest == 0)) {
+			if (*rbrace == '{') nest++;
+			if (*rbrace == '}') nest--;
+			if (*rbrace == '\\' && escape) {
+			    if (++p >= pend) break;
+			}
+			Inc(rbrace, pend, enc);
+		    }
+		}
+rest:
+		memcpy(buf, t, p-t);
+		buf[p-t]=0;
+		strlcpy(buf+(p-t), rbrace+1, len-(p-t));
+		{
+		    const char *pp = buf, *ss = s;
+		    r = fnmatch_helper((const char **)&pp, &ss, flags|FNM_DOTMATCH, enc);
+		}
+		if (r == 0) {
+		    p = buf;
+		    FNMATCH_FREE(buf);
+		    RETURN(0);
+		}
+		if (p >= rbrace) break;
+	    }
+	    FNMATCH_FREE(buf);
+	  }
 	}
 
 	/* ordinary */
@@ -1426,6 +1478,12 @@ has_magic(const char *p, const char *pend, int flags, rb_encoding *enc)
 	  case '[':
 	    return MAGICAL;
 
+	  case '{':
+	    if (flags & FNM_EXTGLOB) {
+	      return MAGICAL;
+	    }
+	    break;
+
 	  case '\\':
 	    if (escape && p++ >= pend)
 		continue;
@@ -2272,6 +2330,13 @@ push_pattern(const char *path, VALUE ary, void *enc)
     rb_ary_push(ary, name);
 }
 
+struct push_glob_args {
+    struct glob_args glob;
+    int flags;
+    int fd;
+};
+static int push_caller(const char *path, VALUE val, void *enc);
+
 static int
 ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg,
 		  rb_encoding *enc, VALUE var)
@@ -2280,7 +2345,7 @@ ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg,
     const char *p = str;
     const char *pend = p + strlen(p);
     const char *s = p;
-    const char *lbrace = 0, *rbrace = 0;
+    const char *lbrace = NULL, *rbrace = NULL;
     int nest = 0, status = 0;
 
     while (*p) {
@@ -2299,9 +2364,18 @@ ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg,
 
     if (lbrace && rbrace) {
 	size_t len = strlen(s) + 1;
-	char *buf = GLOB_ALLOC_N(char, len);
+	char *buf;
 	long shift;
 
+	if (func == push_caller && !strchr(lbrace, '/')) {
+	    /* Now it reaches file basename entry. */
+	    /* Handle braces in glob_helper */
+	    struct push_glob_args *a = (struct push_glob_args *)arg;
+	    a->flags |= FNM_EXTGLOB;
+	    return glob_call_func(func, s, arg, enc);
+	}
+
+	buf = GLOB_ALLOC_N(char, len);
 	if (!buf) return -1;
 	memcpy(buf, s, lbrace-s);
 	shift = (lbrace-s);
@@ -2365,12 +2439,6 @@ ruby_brace_glob(const char *str, int flags, ruby_glob_func *func, VALUE arg)
     return ruby_brace_glob_with_enc(str, flags, func, arg, rb_ascii8bit_encoding());
 }
 
-struct push_glob_args {
-    struct glob_args glob;
-    int flags;
-    int fd;
-};
-
 static int
 push_caller(const char *path, VALUE val, void *enc)
 {

Related issues 1 (0 open1 closed)

Related to Ruby master - Bug #13167: Dir.glob is 25x slower since Ruby 2.2Closedh.shirosaki (Hiroshi Shirosaki)Actions
Actions

Also available in: Atom PDF

Like0
Like0Like0Like0