diff --git a/configure.in b/configure.in index e8b1cc7..8463bf7 100644 --- a/configure.in +++ b/configure.in @@ -1186,7 +1186,7 @@ main() AC_CHECK_FUNCS(cygwin_conv_path) AC_LIBOBJ([langinfo]) ], -[mingw*], [ LIBS="-lshell32 -lws2_32 -limagehlp $LIBS" +[mingw*], [ LIBS="-lshell32 -lws2_32 -limagehlp -lshlwapi $LIBS" ac_cv_header_a_out_h=no ac_cv_header_pwd_h=no ac_cv_header_utime_h=no diff --git a/file.c b/file.c index 4050067..c30e3b2 100644 --- a/file.c +++ b/file.c @@ -2882,8 +2882,9 @@ append_fspath(VALUE result, VALUE fname, char *dir, rb_encoding **enc, rb_encodi return buf + dirlen; } -static VALUE -file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result) +#ifndef _WIN32 +VALUE +rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result) { const char *s, *b, *fend; char *buf, *p, *pend, *root; @@ -2945,7 +2946,7 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result) /* specified drive, but not full path */ int same = 0; if (!NIL_P(dname) && !not_same_drive(dname, s[0])) { - file_expand_path(dname, Qnil, abs_mode, result); + rb_file_expand_path_internal(dname, Qnil, abs_mode, long_name, result); BUFINIT(); if (has_drive_letter(p) && TOLOWER(p[0]) == TOLOWER(s[0])) { /* ok, same drive */ @@ -2969,7 +2970,7 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result) #endif else if (!rb_is_absolute_path(s)) { if (!NIL_P(dname)) { - file_expand_path(dname, Qnil, abs_mode, result); + rb_file_expand_path_internal(dname, Qnil, abs_mode, long_name, result); rb_enc_associate(result, rb_enc_check(result, fname)); BUFINIT(); p = pend; @@ -3222,6 +3223,7 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result) ENC_CODERANGE_CLEAR(result); return result; } +#endif /* _WIN32 */ #define EXPAND_PATH_BUFFER() rb_usascii_str_new(0, MAXPATHLEN + 2) @@ -3232,14 +3234,21 @@ file_expand_path(VALUE fname, VALUE dname, int abs_mode, VALUE result) static VALUE file_expand_path_1(VALUE fname) { - return file_expand_path(fname, Qnil, 0, EXPAND_PATH_BUFFER()); + return rb_file_expand_path_internal(fname, Qnil, 0, 0, EXPAND_PATH_BUFFER()); } VALUE rb_file_expand_path(VALUE fname, VALUE dname) { check_expand_path_args(fname, dname); - return file_expand_path(fname, dname, 0, EXPAND_PATH_BUFFER()); + return rb_file_expand_path_internal(fname, dname, 0, 1, EXPAND_PATH_BUFFER()); +} + +VALUE +rb_file_expand_path_fast(VALUE fname, VALUE dname) +{ + check_expand_path_args(fname, dname); + return rb_file_expand_path_internal(fname, dname, 0, 0, EXPAND_PATH_BUFFER()); } /* @@ -3276,7 +3285,7 @@ VALUE rb_file_absolute_path(VALUE fname, VALUE dname) { check_expand_path_args(fname, dname); - return file_expand_path(fname, dname, 1, EXPAND_PATH_BUFFER()); + return rb_file_expand_path_internal(fname, dname, 1, 1, EXPAND_PATH_BUFFER()); } /* @@ -5250,7 +5259,7 @@ rb_find_file_ext_safe(VALUE *filep, const char *const *ext, int safe_level) RB_GC_GUARD(str) = rb_get_path_check(str, safe_level); if (RSTRING_LEN(str) == 0) continue; - file_expand_path(fname, str, 0, tmp); + rb_file_expand_path_internal(fname, str, 0, 0, tmp); if (rb_file_load_ok(RSTRING_PTR(tmp))) { *filep = copy_path_class(tmp, *filep); return (int)(j+1); @@ -5309,7 +5318,7 @@ rb_find_file_safe(VALUE path, int safe_level) VALUE str = RARRAY_PTR(load_path)[i]; RB_GC_GUARD(str) = rb_get_path_check(str, safe_level); if (RSTRING_LEN(str) > 0) { - file_expand_path(path, str, 0, tmp); + rb_file_expand_path_internal(path, str, 0, 0, tmp); f = RSTRING_PTR(tmp); if (rb_file_load_ok(f)) goto found; } @@ -5544,4 +5553,8 @@ Init_File(void) rb_define_method(rb_cStat, "setuid?", rb_stat_suid, 0); rb_define_method(rb_cStat, "setgid?", rb_stat_sgid, 0); rb_define_method(rb_cStat, "sticky?", rb_stat_sticky, 0); + +#ifdef _WIN32 + rb_w32_init_file(); +#endif } diff --git a/internal.h b/internal.h index c091a1c..ed4864c 100644 --- a/internal.h +++ b/internal.h @@ -105,8 +105,15 @@ VALUE rb_home_dir(const char *user, VALUE result); VALUE rb_realpath_internal(VALUE basedir, VALUE path, int strict); void rb_file_const(const char*, VALUE); int rb_file_load_ok(const char *); +VALUE rb_file_expand_path_fast(VALUE, VALUE); +VALUE rb_file_expand_path_internal(VALUE, VALUE, int, int, VALUE); void Init_File(void); +#ifdef _WIN32 +/* file.c, win32/file.c */ +void rb_w32_init_file(void); +#endif + /* gc.c */ void Init_heap(void); void *ruby_mimmalloc(size_t size); diff --git a/load.c b/load.c index fd5f862..b21d17c 100644 --- a/load.c +++ b/load.c @@ -43,7 +43,7 @@ rb_get_expanded_load_path(void) ary = rb_ary_new2(RARRAY_LEN(load_path)); for (i = 0; i < RARRAY_LEN(load_path); ++i) { - VALUE path = rb_file_expand_path(RARRAY_PTR(load_path)[i], Qnil); + VALUE path = rb_file_expand_path_fast(RARRAY_PTR(load_path)[i], Qnil); rb_str_freeze(path); rb_ary_push(ary, path); } @@ -233,7 +233,7 @@ rb_feature_provided(const char *feature, const char **loading) if (*feature == '.' && (feature[1] == '/' || strncmp(feature+1, "./", 2) == 0)) { - fullpath = rb_file_expand_path(rb_str_new2(feature), Qnil); + fullpath = rb_file_expand_path_fast(rb_str_new2(feature), Qnil); feature = RSTRING_PTR(fullpath); } if (ext && !strchr(ext, '/')) { diff --git a/test/ruby/test_file_exhaustive.rb b/test/ruby/test_file_exhaustive.rb index 7467bff..0570530 100644 --- a/test/ruby/test_file_exhaustive.rb +++ b/test/ruby/test_file_exhaustive.rb @@ -14,6 +14,7 @@ class TestFileExhaustive < Test::Unit::TestCase def setup @dir = Dir.mktmpdir("rubytest-file") + @rootdir = "#{DRIVE}/" File.chown(-1, Process.gid, @dir) @file = make_tmp_filename("file") @zerofile = make_tmp_filename("zerofile") @@ -450,6 +451,12 @@ class TestFileExhaustive < Test::Unit::TestCase assert_equal(expected.force_encoding(cp), File.expand_path(a.dup.force_encoding(cp)), cp) end + path = "\u3042\u3044\u3046\u3048\u304a".encode("EUC-JP") + assert_equal("#{Dir.pwd}/#{path}".encode("CP932"), File.expand_path(path).encode("CP932")) + + path = "\u3042\u3044\u3046\u3048\u304a".encode("CP51932") + assert_equal("#{Dir.pwd}/#{path}", File.expand_path(path)) + assert_incompatible_encoding {|d| File.expand_path(d)} end @@ -460,7 +467,13 @@ class TestFileExhaustive < Test::Unit::TestCase begin bug3630 = '[ruby-core:31537]' home = ENV["HOME"] + home_drive = ENV["HOMEDRIVE"] + home_path = ENV["HOMEPATH"] + user_profile = ENV["USERPROFILE"] ENV["HOME"] = nil + ENV["HOMEDRIVE"] = nil + ENV["HOMEPATH"] = nil + ENV["USERPROFILE"] = nil assert_raise(ArgumentError) { File.expand_path("~") } ENV["HOME"] = "~" assert_raise(ArgumentError, bug3630) { File.expand_path("~") } @@ -468,9 +481,187 @@ class TestFileExhaustive < Test::Unit::TestCase assert_raise(ArgumentError, bug3630) { File.expand_path("~") } ensure ENV["HOME"] = home + ENV["HOMEDRIVE"] = home_drive + ENV["HOMEPATH"] = home_path + ENV["USERPROFILE"] = user_profile + end + end + + def test_expand_path_resolve_empty_string_current_directory + assert_equal(Dir.pwd, File.expand_path("")) + end + + def test_expand_path_resolve_dot_current_directory + assert_equal(Dir.pwd, File.expand_path(".")) + end + + def test_expand_path_resolve_file_name_relative_current_directory + assert_equal(File.join(Dir.pwd, "foo"), File.expand_path("foo")) + end + + def test_ignore_nil_dir_string + assert_equal(File.join(Dir.pwd, "foo"), File.expand_path("foo", nil)) + end + + def test_expand_path_resolve_file_name_and_dir_string_relative + assert_equal(File.join(Dir.pwd, "bar", "foo"), + File.expand_path("foo", "bar")) + end + + def test_expand_path_cleanup_dots_file_name + bug = "[ruby-talk:18512]" + + assert_equal(File.join(Dir.pwd, ".a"), File.expand_path(".a"), bug) + assert_equal(File.join(Dir.pwd, "..a"), File.expand_path("..a"), bug) + + if DRIVE + # cleanup dots only on Windows + assert_equal(File.join(Dir.pwd, "a"), File.expand_path("a."), bug) + skip "FIXME" + assert_equal(File.join(Dir.pwd, "a"), File.expand_path("a.."), bug) + else + assert_equal(File.join(Dir.pwd, "a."), File.expand_path("a."), bug) + assert_equal(File.join(Dir.pwd, "a.."), File.expand_path("a.."), bug) end end + def test_expand_path_converts_a_pathname_to_an_absolute_pathname_using_a_complete_path + assert_equal(@dir, File.expand_path("", "#{@dir}")) + assert_equal(File.join(@dir, "a"), File.expand_path("a", "#{@dir}")) + assert_equal(File.join(@dir, "a"), File.expand_path("../a", "#{@dir}/xxx")) + assert_equal(@rootdir, File.expand_path(".", "#{@rootdir}")) + end + + def test_expand_path_ignores_supplied_dir_if_path_contains_a_drive_letter + assert_equal(@rootdir, File.expand_path(@rootdir, "D:/")) + end if DRIVE + + def test_expand_path_removes_trailing_slashes_from_absolute_path + assert_equal(File.join(@rootdir, "foo"), File.expand_path("#{@rootdir}foo/")) + assert_equal(File.join(@rootdir, "foo.rb"), File.expand_path("#{@rootdir}foo.rb/")) + end + + def test_expand_path_removes_trailing_spaces_from_absolute_path + assert_equal(File.join(@rootdir, "a"), File.expand_path("#{@rootdir}a ")) + end if DRIVE + + def test_expand_path_converts_a_pathname_which_starts_with_a_slash_using_dir_s_drive + assert_match(%r"\Az:/foo\z"i, File.expand_path('/foo', "z:/bar")) + end if DRIVE + + def test_expand_path_converts_a_pathname_which_starts_with_a_slash_and_unc_pathname + assert_equal("//foo", File.expand_path('//foo', "//bar")) + assert_equal("//bar/foo", File.expand_path('/foo', "//bar")) + assert_equal("//foo", File.expand_path('//foo', "/bar")) + end if DRIVE + + def test_expand_path_converts_a_dot_with_unc_dir + assert_equal("//", File.expand_path('.', "//")) + end + + def test_expand_path_preserves_unc_path_root + assert_equal("//", File.expand_path("//")) + assert_equal("//", File.expand_path("//.")) + assert_equal("//", File.expand_path("//..")) + end + + def test_expand_path_converts_a_pathname_which_starts_with_a_slash_using_host_share + assert_match(%r"\A//host/share/foo\z"i, File.expand_path('/foo', "//host/share/bar")) + end if DRIVE + + def test_expand_path_converts_a_pathname_which_starts_with_a_slash_using_a_current_drive + assert_match(%r"\A#{DRIVE}/foo\z"i, File.expand_path('/foo')) + end + + def test_expand_path_returns_tainted_strings_or_not + assert_equal(true, File.expand_path('foo').tainted?) + assert_equal(true, File.expand_path('foo'.taint).tainted?) + assert_equal(true, File.expand_path('/foo'.taint).tainted?) + assert_equal(true, File.expand_path('foo', 'bar').tainted?) + assert_equal(true, File.expand_path('foo', '/bar'.taint).tainted?) + assert_equal(true, File.expand_path('foo'.taint, '/bar').tainted?) + assert_equal(true, File.expand_path('~').tainted?) + + if DRIVE + assert_equal(true, File.expand_path('/foo').tainted?) + assert_equal(false, File.expand_path('//foo').tainted?) + assert_equal(true, File.expand_path('C:/foo'.taint).tainted?) + assert_equal(false, File.expand_path('C:/foo').tainted?) + assert_equal(true, File.expand_path('foo', '/bar').tainted?) + assert_equal(true, File.expand_path('foo', 'C:/bar'.taint).tainted?) + assert_equal(true, File.expand_path('foo'.taint, 'C:/bar').tainted?) + assert_equal(false, File.expand_path('foo', 'C:/bar').tainted?) + assert_equal(false, File.expand_path('C:/foo/../bar').tainted?) + assert_equal(false, File.expand_path('foo', '//bar').tainted?) + else + assert_equal(false, File.expand_path('/foo').tainted?) + assert_equal(false, File.expand_path('foo', '/bar').tainted?) + end + end + + def test_expand_path_converts_a_pathname_to_an_absolute_pathname_using_home_as_base + old_home = ENV["HOME"] + home = ENV["HOME"] = "#{DRIVE}/UserHome" + assert_equal(home, File.expand_path("~")) + assert_equal(home, File.expand_path("~", "C:/FooBar")) + assert_equal(File.join(home, "a"), File.expand_path("~/a", "C:/FooBar")) + ensure + ENV["HOME"] = old_home + end + + def test_expand_path_converts_a_pathname_to_an_absolute_pathname_using_unc_home + old_home = ENV["HOME"] + unc_home = ENV["HOME"] = "//UserHome" + assert_equal(unc_home, File.expand_path("~")) + ensure + ENV["HOME"] = old_home + end if DRIVE + + def test_expand_path_does_not_modify_a_home_string_argument + old_home = ENV["HOME"] + home = ENV["HOME"] = "#{DRIVE}/UserHome" + str = "~/a" + assert_equal("#{home}/a", File.expand_path(str)) + assert_equal("~/a", str) + ensure + ENV["HOME"] = old_home + end + + def test_expand_path_raises_argument_error_for_any_supplied_username + bug = '[ruby-core:39597]' + assert_raise(ArgumentError, bug) { File.expand_path("~anything") } + end if DRIVE + + def test_expand_path_raises_a_type_error_if_not_passed_a_string_type + assert_raise(TypeError) { File.expand_path(1) } + assert_raise(TypeError) { File.expand_path(nil) } + assert_raise(TypeError) { File.expand_path(true) } + end + + def test_expand_path_expands_dot_dir + assert_equal("#{DRIVE}/dir", File.expand_path("#{DRIVE}/./dir")) + end + + def test_expand_path_does_not_modify_the_string_argument + str = "./a/b/../c" + assert_equal("#{Dir.pwd}/a/c", File.expand_path(str, Dir.pwd)) + assert_equal("./a/b/../c", str) + end + + def test_expand_path_returns_a_string_when_passed_a_string_subclass + sub = Class.new(String) + str = sub.new "./a/b/../c" + path = File.expand_path(str, Dir.pwd) + assert_equal("#{Dir.pwd}/a/c", path) + assert_instance_of(String, path) + end + + def test_expand_path_accepts_objects_that_have_a_to_path_method + klass = Class.new { def to_path; "a/b/c"; end } + obj = klass.new + assert_equal("#{Dir.pwd}/a/b/c", File.expand_path(obj)) + end + def test_basename assert_equal(File.basename(@file).sub(/\.test$/, ""), File.basename(@file, ".test")) assert_equal("", s = File.basename("")) diff --git a/win32/Makefile.sub b/win32/Makefile.sub index c0cdfa9..979890d 100644 --- a/win32/Makefile.sub +++ b/win32/Makefile.sub @@ -226,7 +226,7 @@ EXTLIBS = EXTSOLIBS = !endif !if !defined(LIBS) -LIBS = oldnames.lib user32.lib advapi32.lib shell32.lib ws2_32.lib imagehlp.lib $(EXTLIBS) +LIBS = oldnames.lib user32.lib advapi32.lib shell32.lib ws2_32.lib imagehlp.lib shlwapi.lib $(EXTLIBS) !endif !if !defined(MISSING) MISSING = acosh.obj cbrt.obj crypt.obj erf.obj ffs.obj langinfo.obj lgamma_r.obj strlcat.obj strlcpy.obj tgamma.obj win32/win32.obj win32/file.obj setproctitle.obj diff --git a/win32/file.c b/win32/file.c index 955f91b..885f8fe 100644 --- a/win32/file.c +++ b/win32/file.c @@ -1,10 +1,621 @@ #include "ruby/ruby.h" +#include "ruby/encoding.h" #include +#include +#include #ifndef INVALID_FILE_ATTRIBUTES # define INVALID_FILE_ATTRIBUTES ((DWORD)-1) #endif +/* cache 'encoding name' => 'code page' into a hash */ +static VALUE rb_code_page; + +#define IS_DIR_SEPARATOR_P(c) (c == L'\\' || c == L'/') +#define IS_DIR_UNC_P(c) (IS_DIR_SEPARATOR_P(c[0]) && IS_DIR_SEPARATOR_P(c[1])) + +/* MultiByteToWideChar() doesn't work with code page 51932 */ +#define INVALID_CODE_PAGE 51932 +#define PATH_BUFFER_SIZE MAX_PATH * 2 + +#define insecure_obj_p(obj, level) ((level) >= 4 || ((level) > 0 && OBJ_TAINTED(obj))) + +static inline void +replace_wchar(wchar_t *s, int find, int replace) +{ + while (*s != 0) { + if (*s == find) + *s = replace; + s++; + } +} + +/* Convert str from multibyte char to wchar with specified code page */ +static inline void +convert_mb_to_wchar(VALUE str, wchar_t **wstr, wchar_t **wstr_pos, size_t *wstr_len, UINT code_page) +{ + size_t len; + + if (NIL_P(str)) + return; + + len = MultiByteToWideChar(code_page, 0, RSTRING_PTR(str), -1, NULL, 0) + 1; + *wstr = (wchar_t *)xmalloc(len * sizeof(wchar_t)); + if (wstr_pos) + *wstr_pos = *wstr; + + MultiByteToWideChar(code_page, 0, RSTRING_PTR(str), -1, *wstr, len); + *wstr_len = len - 2; +} + +static inline void +convert_wchar_to_mb(const wchar_t *wstr, char **str, size_t *str_len, UINT code_page) +{ + size_t len; + + len = WideCharToMultiByte(code_page, 0, wstr, -1, NULL, 0, NULL, NULL); + *str = (char *)xmalloc(len * sizeof(char)); + WideCharToMultiByte(code_page, 0, wstr, -1, *str, len, NULL, NULL); + + /* do not count terminator as part of the string length */ + *str_len = len - 1; +} + +/* + Return user's home directory using environment variables combinations. + Memory allocated by this function should be manually freeded afterwards. + + Try: + HOME, HOMEDRIVE + HOMEPATH and USERPROFILE environment variables + TODO: Special Folders - Profile and Personal +*/ +static wchar_t * +home_dir() +{ + wchar_t *buffer = NULL; + size_t buffer_len = 0, len = 0; + size_t home_env = 0; + + /* + GetEnvironmentVariableW when used with NULL will return the required + buffer size and its terminating character. + http://msdn.microsoft.com/en-us/library/windows/desktop/ms683188(v=vs.85).aspx + */ + + if (len = GetEnvironmentVariableW(L"HOME", NULL, 0)) { + buffer_len = len; + home_env = 1; + } else if (len = GetEnvironmentVariableW(L"HOMEDRIVE", NULL, 0)) { + buffer_len = len; + if (len = GetEnvironmentVariableW(L"HOMEPATH", NULL, 0)) { + buffer_len += len; + home_env = 2; + } else { + buffer_len = 0; + } + } else if (len = GetEnvironmentVariableW(L"USERPROFILE", NULL, 0)) { + buffer_len = len; + home_env = 3; + } + + /* allocate buffer */ + if (home_env) + buffer = (wchar_t *)xmalloc(buffer_len * sizeof(wchar_t)); + + switch (home_env) { + case 1: + /* HOME */ + GetEnvironmentVariableW(L"HOME", buffer, buffer_len); + break; + case 2: + /* HOMEDRIVE + HOMEPATH */ + len = GetEnvironmentVariableW(L"HOMEDRIVE", buffer, buffer_len); + GetEnvironmentVariableW(L"HOMEPATH", buffer + len, buffer_len - len); + break; + case 3: + /* USERPROFILE */ + GetEnvironmentVariableW(L"USERPROFILE", buffer, buffer_len); + break; + default: + break; + } + + if (home_env) { + /* sanitize backslashes with forwardslashes */ + replace_wchar(buffer, L'\\', L'/'); + + return buffer; + } + + return NULL; +} + +/* Remove trailing invalid ':$DATA' of the path. */ +static inline size_t +remove_invalid_alternative_data(wchar_t *wfullpath, size_t size) { + static const wchar_t prime[] = L":$DATA"; + enum { prime_len = (sizeof(prime) / sizeof(wchar_t)) -1 }; + + if (size <= prime_len || _wcsnicmp(wfullpath + size - prime_len, prime, prime_len) != 0) + return size; + + /* alias of stream */ + /* get rid of a bug of x64 VC++ */ + if (wfullpath[size - (prime_len + 1)] == ':') { + /* remove trailing '::$DATA' */ + size -= prime_len + 1; /* prime */ + wfullpath[size] = L'\0'; + } else { + /* remove trailing ':$DATA' of paths like '/aa:a:$DATA' */ + wchar_t *pos = wfullpath + size - (prime_len + 1); + while (!IS_DIR_SEPARATOR_P(*pos) && pos != wfullpath) { + if (*pos == L':') { + size -= prime_len; /* alternative */ + wfullpath[size] = L'\0'; + break; + } + pos--; + } + } + return size; +} + +/* Return system code page. */ +static inline UINT +system_code_page() { + return AreFileApisANSI() ? CP_ACP : CP_OEMCP; +} + +/* + Return code page number of the encoding. + Cache code page into a hash for performance since finding the code page in + Encoding#names is slow. +*/ +static UINT +code_page(rb_encoding *enc) +{ + VALUE code_page_value, name_key; + VALUE encoding, names_ary = Qundef, name; + char *enc_name; + struct RString fake_str; + ID names; + long i; + + if (!enc) + return system_code_page(); + + enc_name = (char *)rb_enc_name(enc); + + fake_str.basic.flags = T_STRING|RSTRING_NOEMBED; + fake_str.basic.klass = rb_cString; + fake_str.as.heap.len = strlen(enc_name); + fake_str.as.heap.ptr = enc_name; + fake_str.as.heap.aux.capa = fake_str.as.heap.len; + name_key = (VALUE)&fake_str; + ENCODING_CODERANGE_SET(name_key, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + + code_page_value = rb_hash_lookup(rb_code_page, name_key); + if (code_page_value != Qnil) + return (UINT)FIX2INT(code_page_value); + + name_key = rb_usascii_str_new2(enc_name); + + encoding = rb_enc_from_encoding(enc); + if (!NIL_P(encoding)) { + CONST_ID(names, "names"); + names_ary = rb_funcall(encoding, names, 0); + } + + if (enc == rb_usascii_encoding()) { + UINT code_page = 20127; + rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page)); + return code_page; + } + else if (enc == rb_ascii8bit_encoding()) { + UINT code_page = 437; + rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page)); + return code_page; + } + + if (names_ary != Qundef) { + for (i = 0; i < RARRAY_LEN(names_ary); i++) { + name = RARRAY_PTR(names_ary)[i]; + if (strncmp("CP", RSTRING_PTR(name), 2) == 0) { + int code_page = atoi(RSTRING_PTR(name) + 2); + if (code_page != 0) { + rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page)); + return (UINT)code_page; + } + } + } + } + + rb_hash_aset(rb_code_page, name_key, INT2FIX(INVALID_CODE_PAGE)); + return INVALID_CODE_PAGE; +} + +static inline VALUE +fix_string_encoding(VALUE str, rb_encoding *encoding) +{ + VALUE result, tmp; + + tmp = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), encoding); + result = rb_str_encode(tmp, rb_enc_from_encoding(rb_utf8_encoding()), 0, Qnil); + + return result; +} + +/* + Replace the last part of the path to long name. + We try to avoid to call FindFirstFileW() since it takes long time. +*/ +static inline size_t +replace_to_long_name(wchar_t **wfullpath, size_t size, int heap) { + WIN32_FIND_DATAW find_data; + HANDLE find_handle; + + /* + Skip long name conversion if the path is already long name. + Short name is 8.3 format. + http://en.wikipedia.org/wiki/8.3_filename + This check can be skipped for directory components that have file + extensions longer than 3 characters, or total lengths longer than + 12 characters. + http://msdn.microsoft.com/en-us/library/windows/desktop/aa364980(v=vs.85).aspx + */ + size_t const max_short_name_size = 8 + 1 + 3; + size_t const max_extension_size = 3; + size_t path_len = 1, extension_len = 0; + wchar_t *pos = *wfullpath; + + if (size == 3 && pos[1] == L':' && pos[2] == L'\\' && pos[3] == L'\0') { + /* root path doesn't need short name expansion */ + return size; + } + + pos = *wfullpath + size - 1; + while (!IS_DIR_SEPARATOR_P(*pos) && pos != *wfullpath) { + if (!extension_len && *pos == L'.') { + extension_len = path_len - 1; + } + if (path_len > max_short_name_size || extension_len > max_extension_size) { + return size; + } + path_len++; + pos--; + } + + find_handle = FindFirstFileW(*wfullpath, &find_data); + if (find_handle != INVALID_HANDLE_VALUE) { + size_t trail_pos = wcslen(*wfullpath); + size_t file_len = wcslen(find_data.cFileName); + + FindClose(find_handle); + while (trail_pos > 0) { + if (IS_DIR_SEPARATOR_P((*wfullpath)[trail_pos])) + break; + trail_pos--; + } + size = trail_pos + 1 + file_len; + if ((size + 1) > sizeof(*wfullpath) / sizeof((*wfullpath)[0])) { + wchar_t *buf = (wchar_t *)xmalloc((size + 1) * sizeof(wchar_t)); + wcsncpy(buf, *wfullpath, trail_pos + 1); + if (heap) + xfree(*wfullpath); + *wfullpath = buf; + } + wcsncpy(*wfullpath + trail_pos + 1, find_data.cFileName, file_len + 1); + } + return size; +} + +VALUE +rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result) +{ + size_t size = 0, wpath_len = 0, wdir_len = 0, whome_len = 0; + size_t buffer_len = 0; + char *fullpath = NULL; + wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL, *wdir = NULL; + wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL; + UINT path_cp, cp; + VALUE path = fname, dir = dname; + wchar_t wfullpath_buffer[PATH_BUFFER_SIZE]; + wchar_t path_drive = L'\0', dir_drive = L'\0'; + int ignore_dir = 0; + rb_encoding *path_encoding; + int tainted = 0; + + /* tainted if path is tainted */ + tainted = OBJ_TAINTED(path); + + /* get path encoding */ + if (NIL_P(dir)) { + path_encoding = rb_enc_get(path); + } else { + path_encoding = rb_enc_check(path, dir); + } + + cp = path_cp = code_page(path_encoding); + + /* workaround invalid codepage */ + if (path_cp == INVALID_CODE_PAGE) { + cp = CP_UTF8; + if (!NIL_P(path)) { + path = fix_string_encoding(path, path_encoding); + } + } + + /* convert char * to wchar_t */ + convert_mb_to_wchar(path, &wpath, &wpath_pos, &wpath_len, cp); + + /* determine if we need the user's home directory */ + /* expand '~' only if NOT rb_file_absolute_path() where `abs_mode` is 1 */ + if (abs_mode == 0 && ((wpath_len == 1 && wpath_pos[0] == L'~') || + (wpath_len >= 2 && wpath_pos[0] == L'~' && IS_DIR_SEPARATOR_P(wpath_pos[1])))) { + /* tainted if expanding '~' */ + tainted = 1; + + whome = home_dir(); + if (whome == NULL) { + xfree(wpath); + rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'"); + } + whome_len = wcslen(whome); + + if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) { + xfree(wpath); + rb_raise(rb_eArgError, "non-absolute home"); + } + + /* use filesystem encoding if expanding home dir */ + path_encoding = rb_filesystem_encoding(); + cp = path_cp = system_code_page(); + + /* ignores dir since we are expading home */ + ignore_dir = 1; + + /* exclude ~ from the result */ + wpath_pos++; + wpath_len--; + + /* exclude separator if present */ + if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { + wpath_pos++; + wpath_len--; + } + } else if (wpath_len >= 2 && wpath_pos[1] == L':') { + if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) { + /* ignore dir since path contains a drive letter and a root slash */ + ignore_dir = 1; + } else { + /* determine if we ignore dir or not later */ + path_drive = wpath_pos[0]; + } + } else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') { + wchar_t *wuser = wpath_pos + 1; + wchar_t *pos = wuser; + char *user; + + /* tainted if expanding '~' */ + tainted = 1; + + while (!IS_DIR_SEPARATOR_P(*pos) && *pos != '\0') + pos++; + + *pos = '\0'; + convert_wchar_to_mb(wuser, &user, &size, cp); + + /* convert to VALUE and set the path encoding */ + if (path_cp == INVALID_CODE_PAGE) { + VALUE tmp = rb_enc_str_new(user, size, rb_utf8_encoding()); + result = rb_str_encode(tmp, rb_enc_from_encoding(path_encoding), 0, Qnil); + rb_str_resize(tmp, 0); + } else { + result = rb_enc_str_new(user, size, path_encoding); + } + + xfree(wpath); + if (user) + xfree(user); + + rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result)); + } + + /* convert dir */ + if (!ignore_dir && !NIL_P(dir)) { + /* fix string encoding */ + if (path_cp == INVALID_CODE_PAGE) { + dir = fix_string_encoding(dir, path_encoding); + } + + /* convert char * to wchar_t */ + convert_mb_to_wchar(dir, &wdir, NULL, &wdir_len, cp); + + if (wdir_len >= 2 && wdir[1] == L':') { + dir_drive = wdir[0]; + if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { + wdir_len = 2; + } + } else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) { + /* UNC path */ + if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) { + /* cut the UNC path tail to '//host/share' */ + size_t separators = 0; + size_t pos = 2; + while (pos < wdir_len && separators < 2) { + if (IS_DIR_SEPARATOR_P(wdir[pos])) { + separators++; + } + pos++; + } + if (separators == 2) + wdir_len = pos - 1; + } + } + } + + /* determine if we ignore dir or not */ + if (!ignore_dir && path_drive && dir_drive) { + if (towupper(path_drive) == towupper(dir_drive)) { + /* exclude path drive letter to use dir */ + wpath_pos += 2; + wpath_len -= 2; + } else { + /* ignore dir since path drive is different from dir drive */ + ignore_dir = 1; + wdir_len = 0; + } + } + + if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) { + /* ignore dir since path has UNC root */ + ignore_dir = 1; + wdir_len = 0; + } else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) && + !dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) { + /* ignore dir since path has root slash and dir doesn't have drive or UNC root */ + ignore_dir = 1; + wdir_len = 0; + } + + buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1; + + buffer = buffer_pos = (wchar_t *)xmalloc((buffer_len + 1) * sizeof(wchar_t)); + + /* add home */ + if (whome_len) { + wcsncpy(buffer_pos, whome, whome_len); + buffer_pos += whome_len; + } + + /* Add separator if required */ + if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { + buffer_pos[0] = L'\\'; + buffer_pos++; + } + + if (wdir_len) { + /* tainted if dir is used and dir is tainted */ + if (!tainted && OBJ_TAINTED(dir)) + tainted = 1; + + wcsncpy(buffer_pos, wdir, wdir_len); + buffer_pos += wdir_len; + } + + /* add separator if required */ + if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) { + buffer_pos[0] = L'\\'; + buffer_pos++; + } + + /* now deal with path */ + if (wpath_len) { + wcsncpy(buffer_pos, wpath_pos, wpath_len); + buffer_pos += wpath_len; + } + + /* GetFullPathNameW requires at least "." to determine current directory */ + if (wpath_len == 0) { + buffer_pos[0] = L'.'; + buffer_pos++; + } + + /* Ensure buffer is NULL terminated */ + buffer_pos[0] = L'\0'; + + /* tainted if path is relative */ + if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer))) + tainted = 1; + + /* FIXME: Make this more robust */ + /* Determine require buffer size */ + size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL); + if (size > PATH_BUFFER_SIZE) { + // allocate enough memory to contain the response + wfullpath = (wchar_t *)xmalloc(size * sizeof(wchar_t)); + size = GetFullPathNameW(buffer, size, wfullpath, NULL); + } else { + wfullpath = wfullpath_buffer; + } + + /* Remove any trailing slashes */ + if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) && + wfullpath[size - 2] != L':' && + !(size == 2 && IS_DIR_UNC_P(wfullpath))) { + size -= 1; + wfullpath[size] = L'\0'; + } + + /* Remove any trailing dot */ + if (wfullpath[size - 1] == L'.') { + size -= 1; + wfullpath[size] = L'\0'; + } + + /* removes trailing invalid ':$DATA' */ + size = remove_invalid_alternative_data(wfullpath, size); + + /* Replace the trailing path to long name */ + if (long_name) + size = replace_to_long_name(&wfullpath, size, (wfullpath != wfullpath_buffer)); + + /* sanitize backslashes with forwardslashes */ + replace_wchar(wfullpath, L'\\', L'/'); + + /* convert to char * */ + size = WideCharToMultiByte(cp, 0, wfullpath, size, NULL, 0, NULL, NULL); + if (size > (size_t)RSTRING_LEN(result)) { + rb_str_modify(result); + rb_str_resize(result, size); + } + + WideCharToMultiByte(cp, 0, wfullpath, size, RSTRING_PTR(result), size, NULL, NULL); + rb_str_set_len(result, size); + + /* convert to VALUE and set the path encoding */ + if (path_cp == INVALID_CODE_PAGE) { + VALUE tmp; + size_t len; + + rb_enc_associate(result, rb_utf8_encoding()); + ENC_CODERANGE_CLEAR(result); + tmp = rb_str_encode(result, rb_enc_from_encoding(path_encoding), 0, Qnil); + len = RSTRING_LEN(tmp); + rb_str_modify(result); + rb_str_resize(result, len); + memcpy(RSTRING_PTR(result), RSTRING_PTR(tmp), len); + rb_str_resize(tmp, 0); + } + rb_enc_associate(result, path_encoding); + ENC_CODERANGE_CLEAR(result); + + /* makes the result object tainted if expanding tainted strings or returning modified path */ + if (tainted) + OBJ_TAINT(result); + + /* TODO: better cleanup */ + if (buffer) + xfree(buffer); + + if (wpath) + xfree(wpath); + + if (wdir) + xfree(wdir); + + if (whome) + xfree(whome); + + if (wfullpath && wfullpath != wfullpath_buffer) + xfree(wfullpath); + + if (fullpath) + xfree(fullpath); + + return result; +} + int rb_file_load_ok(const char *path) { @@ -27,3 +638,12 @@ rb_file_load_ok(const char *path) } return ret; } + +void +rb_w32_init_file(void) +{ + rb_code_page = rb_hash_new(); + + /* prevent GC removing rb_code_page */ + rb_gc_register_mark_object(rb_code_page); +}