Project

General

Profile

Bug #1787 » lib_uri_common-u.patch

drry (drry  ), 07/19/2009 12:34 AM

View differences:

lib/uri/common.rb (working copy)
# alpha = lowalpha | upalpha
ALPHA = "a-zA-Z"
# digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | 7" |
# "8" | "9"
DIGIT = "0-9"
# alphanum = alpha | digit
ALNUM = "#{ALPHA}\\d"
ALNUM = "#{ALPHA}#{DIGIT}"
# hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
# "a" | "b" | "c" | "d" | "e" | "f"
HEX = "a-fA-F\\d"
HEX = "#{DIGIT}a-fA-F"
# escaped = "%" hex hex
ESCAPED = "%[#{HEX}]{2}"
# mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
......
def split(uri)
case uri
when ''
# null uri
# null uri
when @regexp[:ABS_URI]
scheme, opaque, userinfo, host, port,
registry, path, query, fragment = $~[1..-1]
scheme, opaque, userinfo, host, port,
registry, path, query, fragment = $~[1..-1]
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
# absoluteURI = scheme ":" ( hier_part | opaque_part )
# hier_part = ( net_path | abs_path ) [ "?" query ]
# opaque_part = uric_no_slash *uric
# abs_path = "/" path_segments
# net_path = "//" authority [ abs_path ]
# authority = server | reg_name
# server = [ [ userinfo "@" ] hostport ]
if !scheme
raise InvalidURIError,
"bad URI(absolute but no scheme): #{uri}"
end
if !opaque && (!path && (!host && !registry))
raise InvalidURIError,
"bad URI(absolute but no path): #{uri}"
end
# absoluteURI = scheme ":" ( hier_part | opaque_part )
# hier_part = ( net_path | abs_path ) [ "?" query ]
# opaque_part = uric_no_slash *uric
# abs_path = "/" path_segments
# net_path = "//" authority [ abs_path ]
# authority = server | reg_name
# server = [ [ userinfo "@" ] hostport ]
if !scheme
raise InvalidURIError,
"bad URI(absolute but no scheme): #{uri}"
end
if !opaque && (!path && (!host && !registry))
raise InvalidURIError,
"bad URI(absolute but no path): #{uri}"
end
when @regexp[:REL_URI]
scheme = nil
opaque = nil
scheme = nil
opaque = nil
userinfo, host, port, registry,
rel_segment, abs_path, query, fragment = $~[1..-1]
if rel_segment && abs_path
path = rel_segment + abs_path
elsif rel_segment
path = rel_segment
elsif abs_path
path = abs_path
end
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
# relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
userinfo, host, port, registry,
rel_segment, abs_path, query, fragment = $~[1..-1]
if rel_segment && abs_path
path = rel_segment + abs_path
elsif rel_segment
path = rel_segment
elsif abs_path
path = abs_path
end
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
# relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
# net_path = "//" authority [ abs_path ]
# abs_path = "/" path_segments
# rel_path = rel_segment [ abs_path ]
# net_path = "//" authority [ abs_path ]
# abs_path = "/" path_segments
# rel_path = rel_segment [ abs_path ]
# authority = server | reg_name
# server = [ [ userinfo "@" ] hostport ]
# authority = server | reg_name
# server = [ [ userinfo "@" ] hostport ]
else
raise InvalidURIError, "bad URI(is not URI?): #{uri}"
raise InvalidURIError, "bad URI(is not URI?): #{uri}"
end
path = '' if !path && !opaque # (see RFC2396 Section 5.2)
ret = [
scheme,
userinfo, host, port, # X
registry, # X
path, # Y
opaque, # Y
query,
fragment
scheme,
userinfo, host, port, # X
registry, # X
path, # Y
opaque, # Y
query,
fragment
]
return ret
end
def parse(uri)
scheme, userinfo, host, port,
registry, path, opaque, query, fragment = self.split(uri)
registry, path, opaque, query, fragment = self.split(uri)
if scheme && URI.scheme_list.include?(scheme.upcase)
URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
registry, path, opaque, query,
fragment, self)
else
Generic.new(scheme, userinfo, host, port,
registry, path, opaque, query,
fragment, self)
Generic.new(scheme, userinfo, host, port,
registry, path, opaque, query,
fragment, self)
end
end
def join(*str)
u = self.parse(str[0])
str[1 .. -1].each do |x|
u = u.merge(x)
u = u.merge(x)
end
u
end
def extract(str, schemes = nil, &block)
if block_given?
str.scan(make_regexp(schemes)) { yield $& }
nil
str.scan(make_regexp(schemes)) { yield $& }
nil
else
result = []
str.scan(make_regexp(schemes)) { result.push $& }
result
result = []
str.scan(make_regexp(schemes)) { result.push $& }
result
end
end
def make_regexp(schemes = nil)
unless schemes
@regexp[:ABS_URI_REF]
@regexp[:ABS_URI_REF]
else
/(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x
/(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x
end
end
......
# hostname = *( domainlabel "." ) toplabel [ "." ]
unless hostname
ret[:HOSTNAME] = hostname = "(?:#{domlabel}\\.)*#{toplabel}\\.?"
ret[:HOSTNAME] = hostname = "(?:#{domlabel}\\.)*#{toplabel}\\.?"
end
# RFC 2373, APPENDIX B:
......
# allowed too. Here is a replacement.
#
# IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
ret[:IPV4ADDR] = ipv4addr = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}"
v4digit = "(?:[01]?[#{PATTERN::DIGIT}]{1,2}?|2[0-4][#{PATTERN::DIGIT}]25[0-5])"
ipv4addr = "#{v4digit}\\." * 3 + v4digit
ret[:IPV4ADDR] = ipv4addr
# hex4 = 1*4HEXDIG
hex4 = "[#{PATTERN::HEX}]{1,4}"
# lastpart = hex4 | IPv4address
......
# host = hostname | IPv4address | IPv6reference (RFC 2732)
ret[:HOST] = host = "(?:#{hostname}|#{ipv4addr}|#{ipv6ref})"
# port = *digit
port = '\d*'
port = '["{PATTERN::DIGIT}]*'
# hostport = host [ ":" port ]
ret[:HOSTPORT] = hostport = "#{host}(?::#{port})?"
......
ret[:REL_SEGMENT] = rel_segment = "(?:[#{unreserved};@&=+$,]|#{escaped})+"
# scheme = alpha *( alpha | digit | "+" | "-" | "." )
ret[:SCHEME] = scheme = "[#{PATTERN::ALPHA}][-+.#{PATTERN::ALPHA}\\d]*"
ret[:SCHEME] = scheme = "[#{PATTERN::ALPHA}][-+.#{PATTERN::ALPHA}#{PATTERN::DIGIT}]*"
# abs_path = "/" path_segments
ret[:ABS_PATH] = abs_path = "/#{path_segments}"
......
ret[:URI_REF] = uri_ref = "(?:#{abs_uri}|#{rel_uri})?(?:##{fragment})?"
ret[:X_ABS_URI] = "
(#{scheme}): (?# 1: scheme)
(#{scheme}): (?# 1: scheme)
(?:
(#{opaque_part}) (?# 2: opaque)
(#{opaque_part}) (?# 2: opaque)
|
(?:(?:
//(?:
(?:(?:(#{userinfo})@)? (?# 3: userinfo)
(?:(#{host})(?::(\\d*))?))? (?# 4: host, 5: port)
(?:(?:(#{userinfo})@)? (?# 3: userinfo)
(?:(#{host}) (?# 4: host)
(?::([#{PATTERN::DIGIT}]*))?))? (?# 5: port)
|
(#{reg_name}) (?# 6: registry)
(#{reg_name}) (?# 6: registry)
)
|
(?!//)) (?# XXX: '//' is the mark for hostport)
(#{abs_path})? (?# 7: path)
)(?:\\?(#{query}))? (?# 8: query)
(?!//)) (?# XXX: '//' is the mark for hostport)
(#{abs_path})? (?# 7: path)
)(?:\\?(#{query}))? (?# 8: query)
)
(?:\\#(#{fragment}))? (?# 9: fragment)
(?:\\#(#{fragment}))? (?# 9: fragment)
"
ret[:X_REL_URI] = "
......
(?:
//
(?:
(?:(#{userinfo})@)? (?# 1: userinfo)
(#{host})?(?::(\\d*))? (?# 2: host, 3: port)
(?:(#{userinfo})@)? (?# 1: userinfo)
(#{host})? (?# 2: host)
(?::([#{PATTERN::DIGIT}]*))? (?# 3: port)
|
(#{reg_name}) (?# 4: registry)
(#{reg_name}) (?# 4: registry)
)
)
|
(#{rel_segment}) (?# 5: rel_segment)
(#{rel_segment}) (?# 5: rel_segment)
)?
(#{abs_path})? (?# 6: abs_path)
(?:\\?(#{query}))? (?# 7: query)
(?:\\#(#{fragment}))? (?# 8: fragment)
(#{abs_path})? (?# 6: abs_path)
(?:\\?(#{query}))? (?# 7: query)
(?:\\#(#{fragment}))? (?# 8: fragment)
"
ret
(1-1/6)