Project

General

Profile

Actions

Feature #19129

closed

Radix_Sort for arrays of fixnums (implemented)

Added by e8c (Viktor Reznov) almost 2 years ago. Updated almost 2 years ago.

Status:
Feedback
Assignee:
-
Target version:
-
[ruby-core:110729]

Description

Code is already written, all in one listing (git diff, test file, and results of tests):

$ cat ../ruby_sort/test.rb 
#!/bin/ruby

srand 0
r = Array.new 1e7.to_i do rand -2 ** 40...2 ** 40 end

puts

5.times do

    a = r.clone
    t = Time.now
    a.sort!
    puts "\tRun ##{_1 + 1}: %.3f s" % [Time.now - t]
    
end

puts; exit unless $*[0]

p (r.sort { _1 <=> _2 }) == (r.sort)

puts

$ ./miniruby ../ruby_sort/test.rb  # qsort

	Run #1: 2.209 s
	Run #2: 2.259 s
	Run #3: 2.229 s
	Run #4: 2.196 s
	Run #5: 2.226 s

$ ./miniruby ../ruby_sort/test.rb  # rsort

	Run #1: 0.328 s
	Run #2: 0.351 s
	Run #3: 0.352 s
	Run #4: 0.323 s
	Run #5: 0.342 s

diff --git a/array.c b/array.c
index a33c43bdbf..fe52291c21 100644
--- a/array.c
+++ b/array.c
@@ -3521,6 +3521,83 @@ sort_2(const void *ap, const void *bp, void *dummy)
     return n;
 }
 
+#if __linux__ && __SIZEOF_POINTER__ == 8
+
+static int rsort(void *const _p, const long l) {
+
+    for (const VALUE *p = _p, *const P = p + 64; p < P;)
+        if (!FIXNUM_P(*p++)) return 1;
+
+    uint64_t F[8][256] = {}, *a = _p, *b = malloc(8 * l);
+    if (b == NULL) return 1;
+
+    for (uint64_t *p = a + 64, *p2 = b + 64, *const P = p + (l - 64); p < P; p2++) {
+        *p2 = *p ^ (1UL << 63);
+        for (int i = 0; i < 8; i++)
+            F[i][(*p2 >> i * 8) & 255]++;
+        if (!FIXNUM_P(*p++)) {
+            free(b);
+            return 1;
+        }
+    }
+
+    for (uint64_t *p = a, *p2 = b, *const P = p + 64; p < P; p2++) {
+        *p2 = *p++ ^ (1UL << 63);
+        for (int i = 0; i < 8; i++)
+            F[i][(*p2 >> i * 8) & 255]++;
+    }
+
+    { uint64_t *t = a; a = b, b = t; }
+
+    uint64_t skip[8] = {}; int last = 10;
+
+    for (int i = 0; i < 8; i++) {
+        uint64_t x = 0, t, *o = F[i];
+        for (int j = 0; j < 256; j++) {
+            if ((t = o[j]) == (uint64_t)l) {
+                skip[i] = 1;
+                break;
+            }
+            x = (o[j] = x) + t;
+        }
+    }
+
+    for (int i = 7; i >= 0; --i)
+        if (skip[i] == 0) {
+            last = i;
+            break;
+        }
+
+    if (last == 10) {
+        free(a);
+        return 0;
+    }
+
+    for (int i = 0; i < 8; i++) {
+        if (skip[i]) continue;
+        uint64_t *o = F[i];
+        if (i < last)
+            for (uint64_t *p = a, *const P = p + l; p < P; p++) {
+                b[o[(*p >> i * 8) & 255]++] = *p;
+            }
+        else
+            for (uint64_t *p = a, *const P = p + l; p < P; p++) {
+                b[o[(*p >> i * 8) & 255]++] = *p ^ (1UL << 63);
+            }
+        o = a, a = b, b = o;
+    }
+
+    if (a != _p) {
+        memcpy(_p, a, 8 * l);
+        b = a;
+    }
+
+    free(b);
+    return 0;
+}
+
+#endif
+
 /*
  *  call-seq:
  *    array.sort! -> self
@@ -3577,8 +3654,20 @@ rb_ary_sort_bang(VALUE ary)
         data.cmp_opt.opt_methods = 0;
         data.cmp_opt.opt_inited = 0;
         RARRAY_PTR_USE(tmp, ptr, {
+
+#if __linux__ && __SIZEOF_POINTER__ == 8
+
+            if ((len < 10000 || rb_block_given_p()) || rsort(ptr, len))
+                ruby_qsort(ptr, len, sizeof(VALUE),
+                           rb_block_given_p() ? sort_1 : sort_2, &data);
+
+#else
+
             ruby_qsort(ptr, len, sizeof(VALUE),
-                       rb_block_given_p()?sort_1:sort_2, &data);
+                       rb_block_given_p() ? sort_1 : sort_2, &data);
+
+#endif
+
         }); /* WB: no new reference */
         rb_ary_modify(ary);
         if (ARY_EMBED_P(tmp)) {

From: https://github.com/alantudyk/xPNG/blob/8dcb85cde20a41030fd1de8402a7aa0047936a25/until_fork/ruby_sort.txt

No time to create a pull request.

Actions #1

Updated by e8c (Viktor Reznov) almost 2 years ago

  • Description updated (diff)
Actions #2

Updated by e8c (Viktor Reznov) almost 2 years ago

  • Description updated (diff)

Updated by e8c (Viktor Reznov) almost 2 years ago

And doubles (merge_sort): https://github.com/alantudyk/ruby/commit/5b3d6c994f133698228358ca91d3e8965fbb45fe

$ ../build/miniruby double.rb 

	Run #1: 1.337 s
	Run #2: 1.340 s
	Run #3: 1.340 s
	Run #4: 1.340 s
	Run #5: 1.341 s

$ ./double.rb 

	Run #1: 3.836 s
	Run #2: 3.843 s
	Run #3: 3.823 s
	Run #4: 3.837 s
	Run #5: 3.844 s

I promise not to use this issue as a personal blog.

Updated by nobu (Nobuyoshi Nakada) almost 2 years ago

That idea is interesting.

I have a few questions.

  • How is Linux concerned?
  • What are tons of magic numbers?
Actions #5

Updated by nobu (Nobuyoshi Nakada) almost 2 years ago

  • Status changed from Open to Feedback
Actions

Also available in: Atom PDF

Like0
Like0Like0Like0Like0Like0