From a044fc25f7dc12836c154c3efeb62168bb7794c4 Mon Sep 17 00:00:00 2001 From: Gareth Adams Date: Wed, 19 Jun 2019 13:21:58 +0200 Subject: [PATCH 1/2] open-uri.rb: Treat HTTPS the same as HTTP Previously, OpenURI followed guidance in RFC2616/3.7.1: > When no explicit charset parameter is provided by the sender, media > subtypes of the "text" type are defined to have a default charset > value of "ISO-8859-1" when received via HTTP. However this RFC was written before TLS was established and OpenURI was never updated to treat HTTPS traffic the same way. So, HTTPS documents received a different default to HTTP documents. This commit removes the scheme check so that all text/* documents processed by OpenURI are treated the same way. In theory this processing gets applied to FTP URIs too, but there's no mechanism in OpenURI for FTP documents to have Content-Type metadata appended to them, so this ends up being a no-op. Fixes: https://bugs.ruby-lang.org/issues/15933 --- lib/open-uri.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/open-uri.rb b/lib/open-uri.rb index 38f074ef59..e772658d2b 100644 --- a/lib/open-uri.rb +++ b/lib/open-uri.rb @@ -543,7 +543,7 @@ def content_type # It can be used to guess charset. # # If charset parameter and block is not given, - # nil is returned except text type in HTTP. + # nil is returned except text type. # In that case, "iso-8859-1" is returned as defined by RFC2616 3.7.1. def charset type, *parameters = content_type_parse @@ -551,8 +551,7 @@ def charset pair.last.downcase elsif block_given? yield - elsif type && %r{\Atext/} =~ type && - @base_uri && /\Ahttp\z/i =~ @base_uri.scheme + elsif type && %r{\Atext/} =~ type "iso-8859-1" # RFC2616 3.7.1 else nil -- 2.15.1 From 0b3e61f566ca09a0f8b8847eb2de661c243a51f8 Mon Sep 17 00:00:00 2001 From: Gareth Adams Date: Wed, 19 Jun 2019 13:28:21 +0200 Subject: [PATCH 2/2] open-uri.rb: Change default charset for text/* to UTF-8 Replaces the default ISO-8859-1 charset previously defined in RFC2616 (now obsoleted) with a UTF-8 charset as defined in RFC6838. Fixes: https://bugs.ruby-lang.org/issues/15933 --- lib/open-uri.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/open-uri.rb b/lib/open-uri.rb index e772658d2b..dd9e07e3d2 100644 --- a/lib/open-uri.rb +++ b/lib/open-uri.rb @@ -544,7 +544,7 @@ def content_type # # If charset parameter and block is not given, # nil is returned except text type. - # In that case, "iso-8859-1" is returned as defined by RFC2616 3.7.1. + # In that case, "utf-8" is returned as defined by RFC6838 4.2.1 def charset type, *parameters = content_type_parse if pair = parameters.assoc('charset') @@ -552,7 +552,7 @@ def charset elsif block_given? yield elsif type && %r{\Atext/} =~ type - "iso-8859-1" # RFC2616 3.7.1 + "utf-8" # RFC6838 4.2.1 else nil end -- 2.15.1