From 258a472b70b9d7356d02a841864b8f9af16ed831 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Fri, 26 Apr 2019 12:53:38 -0700 Subject: [PATCH] Use realpath(3) instead of custom realpath implementation if available One reason to do this is simplicity, as this approach is ~30 lines of code instead of ~200. Performance wise, this performs 25%-115% better, using the following benchmark on OpenBSD 6.5: ```ruby require 'benchmark' f = File pwd = Dir.pwd Dir.mkdir('b') unless f.directory?('b') f.write('b/a', '') unless f.file?('b/a') args = [ ["b/a", nil], ["#{pwd}/b/a", nil], ['a', 'b'], ["#{pwd}/b/a", 'b'], ["b/a", pwd] ] args.each do |path, base| print "File.realpath(#{path.inspect}, #{base.inspect}): ".ljust(50) puts Benchmark.measure{100000.times{f.realpath(path, base)}} end ``` Before: ``` File.realpath("b/a", nil): 4.330000 2.990000 7.320000 ( 7.316244) File.realpath("/home/testr/ruby/b/a", nil): 3.560000 2.680000 6.240000 ( 6.240951) File.realpath("a", "b"): 4.370000 3.080000 7.450000 ( 7.452511) File.realpath("/home/testr/ruby/b/a", "b"): 3.730000 2.640000 6.370000 ( 6.371979) File.realpath("b/a", "/home/testr/ruby"): 3.590000 2.630000 6.220000 ( 6.226824) ``` After: ``` File.realpath("b/a", nil): 1.370000 2.030000 3.400000 ( 3.400775) File.realpath("/home/testr/ruby/b/a", nil): 1.260000 2.770000 4.030000 ( 4.024957) File.realpath("a", "b"): 2.090000 1.990000 4.080000 ( 4.080284) File.realpath("/home/testr/ruby/b/a", "b"): 1.400000 2.620000 4.020000 ( 4.015505) File.realpath("b/a", "/home/testr/ruby"): 2.150000 2.760000 4.910000 ( 4.910634) ``` By using realpath(3), we can better integrate with system security features such as OpenBSD's unveil(2) system call. This change passes all tests except for one assertion related to taintedness. Previously, if either argument to File.realpath is an absolute path, then the returned value is considered not tainted. However, I believe that behavior to be incorrect, because if there is a symlink anywhere in the path, the returned value can contain a section that was taken from the file system (unreliable source) that was not marked as untainted. Example: ```ruby Dir.mkdir('b') unless File.directory?('b') File.write('b/a', '') unless File.file?('b/a') File.symlink('b', 'c') unless File.symlink?('c') path = File.realpath('c/a'.untaint, Dir.pwd.untaint) path # "/home/testr/ruby/b/a" path.tainted? # should be true, as 'b' comes from file system ``` I believe it is safer to always mark the output of realpath as tainted to prevent this issue, which is what this commit does. --- configure.ac | 1 + file.c | 49 ++++++++++++++++++++++++++++++++++++++---- test/ruby/test_file.rb | 2 +- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index 9997ee255f..c1f04b6cf9 100644 --- a/configure.ac +++ b/configure.ac @@ -1845,6 +1845,7 @@ AC_CHECK_FUNCS(pwrite) AC_CHECK_FUNCS(qsort_r) AC_CHECK_FUNCS(qsort_s) AC_CHECK_FUNCS(readlink) +AC_CHECK_FUNCS(realpath) AC_CHECK_FUNCS(round) AC_CHECK_FUNCS(sched_getaffinity) AC_CHECK_FUNCS(seekdir) diff --git a/file.c b/file.c index 7ab2f2a026..e95d2c83ef 100644 --- a/file.c +++ b/file.c @@ -4181,9 +4181,47 @@ realpath_rec(long *prefixlenp, VALUE *resolvedp, const char *unresolved, VALUE f return 0; } +static VALUE rb_file_join(VALUE ary); + static VALUE rb_check_realpath_internal(VALUE basedir, VALUE path, enum rb_realpath_mode mode) { +#ifdef HAVE_REALPATH + rb_encoding *origenc; + char resolved_buf[PATH_MAX]; + VALUE unresolved_path; + VALUE resolved; + + unresolved_path = rb_str_dup_frozen(path); + origenc = rb_enc_get(unresolved_path); + if (*RSTRING_PTR(unresolved_path) != '/' && !NIL_P(basedir)) { + unresolved_path = rb_file_join(rb_ary_new_from_args(2, basedir, unresolved_path)); + } + unresolved_path = TO_OSPATH(unresolved_path); + + if(realpath(RSTRING_PTR(unresolved_path), resolved_buf) == NULL) { + if (mode == RB_REALPATH_CHECK) { + return Qnil; + } + rb_sys_fail_path(unresolved_path); + } + resolved = rb_enc_str_new_cstr(resolved_buf, origenc); + + if (mode == RB_REALPATH_STRICT || mode == RB_REALPATH_CHECK) { + struct stat st; + + if (rb_stat(resolved, &st) < 0) { + if (mode == RB_REALPATH_STRICT) { + rb_sys_fail_path(unresolved_path); + } + return Qnil; + } + } + + if(rb_enc_str_coderange(resolved) == ENC_CODERANGE_BROKEN) { + rb_enc_associate(resolved, rb_ascii8bit_encoding()); + } +#else long prefixlen; VALUE resolved; VALUE unresolved_path; @@ -4261,6 +4299,8 @@ rb_check_realpath_internal(VALUE basedir, VALUE path, enum rb_realpath_mode mode if (realpath_rec(&prefixlen, &resolved, path_names, Qnil, loopcheck, mode, 1)) return Qnil; + RB_GC_GUARD(curdir); + if (origenc != rb_enc_get(resolved)) { if (rb_enc_str_asciionly_p(resolved)) { rb_enc_associate(resolved, origenc); @@ -4269,10 +4309,13 @@ rb_check_realpath_internal(VALUE basedir, VALUE path, enum rb_realpath_mode mode resolved = rb_str_conv_enc(resolved, NULL, origenc); } } +#endif - OBJ_INFECT(resolved, unresolved_path); + /* Always taint, as a path component not in path or basedir could + be returned due to it being the referent of a symlink. + */ + rb_obj_taint(resolved); RB_GC_GUARD(unresolved_path); - RB_GC_GUARD(curdir); return resolved; } @@ -4697,8 +4740,6 @@ rb_file_s_split(VALUE klass, VALUE path) return rb_assoc_new(rb_file_dirname(path), rb_file_s_basename(1,&path)); } -static VALUE rb_file_join(VALUE ary); - static VALUE file_inspect_join(VALUE ary, VALUE arg, int recur) { diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb index 5e9574cf32..36c154d36c 100644 --- a/test/ruby/test_file.rb +++ b/test/ruby/test_file.rb @@ -298,7 +298,7 @@ def test_realpath_taintedness assert_predicate(File.realpath(base, dir), :tainted?) base.untaint dir.untaint - assert_not_predicate(File.realpath(base, dir), :tainted?) + assert_predicate(File.realpath(base, dir), :tainted?) assert_predicate(Dir.chdir(dir) {File.realpath(base)}, :tainted?) } end -- 2.21.0