Skip to content

Commit

Permalink
Add Content-Disposition parameter parser
Browse files Browse the repository at this point in the history
The ReDoS fix in ee25ab9 breaks valid
requests, because colons are valid inside parameter values.  You cannot
use a regexp scan and ensure correct behavior, since values inside
parameters can be escaped.  Issues like this are the reason for the
famous "now they have two problems" quote regarding regexps.

Add a basic parser for parameters in Content-Disposition.  This parser
is based purely on String#{index,slice!,[],==}, usually with string
arguments for #index (though one case uses a simple regexp).  There
are two loops (one nested in the other), but the use of slice! ensures
that forward progress is always made on each loop iteration.

In addition to fixing the bug introduced by the security fix, this
removes multiple separate passes over the mime head, one pass to get
the parameter name for Content-Disposition, and a separate pass to get
the filename. It removes the get_filename method, though some of the
code is kept in a smaller normalize_filename method.

This removes 18 separate regexp contents that were previously used
just for the separate parse to find the filename for the content
disposition.

Fixes rack#2076
  • Loading branch information
jeremyevans committed Apr 28, 2023
1 parent ab360dd commit 645b5b4
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 50 deletions.
132 changes: 82 additions & 50 deletions lib/rack/multipart/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,9 @@ class BoundaryTooLongError < StandardError

EOL = "\r\n"
MULTIPART = %r|\Amultipart/.*boundary=\"?([^\";,]+)\"?|ni
TOKEN = /[^\s()<>,;:\\"\/\[\]?=]+/
CONDISP = /Content-Disposition:\s*#{TOKEN}\s*/i
VALUE = /"(?:\\"|[^"])*"|#{TOKEN}/
BROKEN = /^#{CONDISP}.*;\s*filename=(#{VALUE})/i
MULTIPART_CONTENT_TYPE = /Content-Type: (.*)#{EOL}/ni
MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:[^:]*;\s*name=(#{VALUE})/ni
MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:(.*)(?=#{EOL}(\S|\z))/ni
MULTIPART_CONTENT_ID = /Content-ID:\s*([^#{EOL}]*)/ni
# Updated definitions from RFC 2231
ATTRIBUTE_CHAR = %r{[^ \x00-\x1f\x7f)(><@,;:\\"/\[\]?='*%]}
ATTRIBUTE = /#{ATTRIBUTE_CHAR}+/
SECTION = /\*[0-9]+/
REGULAR_PARAMETER_NAME = /#{ATTRIBUTE}#{SECTION}?/
REGULAR_PARAMETER = /(#{REGULAR_PARAMETER_NAME})=(#{VALUE})/
EXTENDED_OTHER_NAME = /#{ATTRIBUTE}\*[1-9][0-9]*\*/
EXTENDED_OTHER_VALUE = /%[0-9a-fA-F][0-9a-fA-F]|#{ATTRIBUTE_CHAR}/
EXTENDED_OTHER_PARAMETER = /(#{EXTENDED_OTHER_NAME})=(#{EXTENDED_OTHER_VALUE}*)/
EXTENDED_INITIAL_NAME = /#{ATTRIBUTE}(?:\*0)?\*/
EXTENDED_INITIAL_VALUE = /[a-zA-Z0-9\-]*'[a-zA-Z0-9\-]*'#{EXTENDED_OTHER_VALUE}*/
EXTENDED_INITIAL_PARAMETER = /(#{EXTENDED_INITIAL_NAME})=(#{EXTENDED_INITIAL_VALUE})/
EXTENDED_PARAMETER = /#{EXTENDED_INITIAL_PARAMETER}|#{EXTENDED_OTHER_PARAMETER}/
DISPPARM = /;\s*(?:#{REGULAR_PARAMETER}|#{EXTENDED_PARAMETER})\s*/
RFC2183 = /^#{CONDISP}(#{DISPPARM})+$/i

class Parser
BUFSIZE = 1_048_576
Expand Down Expand Up @@ -316,13 +297,89 @@ def handle_mime_head
if @sbuf.scan_until(@head_regex)
head = @sbuf[1]
content_type = head[MULTIPART_CONTENT_TYPE, 1]
if name = head[MULTIPART_CONTENT_DISPOSITION, 1]
name = dequote(name)
if disposition = head[MULTIPART_CONTENT_DISPOSITION, 1]
# ignore actual content-disposition value (should always be form-data)
i = disposition.index(';')
disposition.slice!(0, i+1)
param = nil

# Parse parameter list
while i = disposition.index('=')
# Found end of parameter name, ensure forward progress in loop
param = disposition.slice!(0, i+1)

# Remove ending equals and preceding whitespace from parameter name
param.chomp!('=')
param.lstrip!

if disposition[0] == '"'
# Parameter value is quoted, parse it, handling backslash escapes
disposition.slice!(0, 1)
value = String.new

while i = disposition.index(/(["\\])/)
c = $1

# Append all content until ending quote or escape
value << disposition.slice!(0, i)

# Remove either backslash or ending quote,
# ensures forward progress in loop
disposition.slice!(0, 1)

# stop parsing parameter value if found ending quote
break if c == '"'

escaped_char = disposition.slice!(0, 1)
if param == 'filename' && escaped_char != '"'
# Possible IE uploaded filename, append both escape backslash and value
value << c << escaped_char
else
# Other only append escaped value
value << escaped_char
end
end
else
if i = disposition.index(';')
# Parameter value unquoted (which may be invalid), value ends at semicolon
value = disposition.slice!(0, i)
else
# If no ending semicolon, assume remainder of line is value and stop
# parsing
disposition.strip!
value = disposition
disposition = ''
end
end

case param
when 'name'
name = value
when 'filename'
filename = value
when 'filename*'
filename_star = value
# else
# ignore other parameters
end

# skip trailing semicolon, to proceed to next parameter
if i = disposition.index(';')
disposition.slice!(0, i+1)
end
end
else
name = head[MULTIPART_CONTENT_ID, 1]
end

filename = get_filename(head)
if filename_star
encoding, _, filename = filename_star.split("'", 3)
filename = normalize_filename(filename || '')
filename.force_encoding(::Encoding.find(encoding))
elsif filename
filename = $1 if filename =~ /^"(.*)"$/
filename = normalize_filename(filename)
end

if name.nil? || name.empty?
name = filename || "#{content_type || TEXT_PLAIN}[]".dup
Expand Down Expand Up @@ -367,39 +424,14 @@ def consume_boundary
end
end

def get_filename(head)
filename = nil
case head
when RFC2183
params = Hash[*head.scan(DISPPARM).flat_map(&:compact)]

if filename = params['filename*']
encoding, _, filename = filename.split("'", 3)
elsif filename = params['filename']
filename = $1 if filename =~ /^"(.*)"$/
end
when BROKEN
filename = $1
filename = $1 if filename =~ /^"(.*)"$/
end

return unless filename

def normalize_filename(filename)
if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) }
filename = Utils.unescape_path(filename)
end

filename.scrub!

if filename !~ /\\[^\\"]/
filename = filename.gsub(/\\(.)/, '\1')
end

if encoding
filename.force_encoding ::Encoding.find(encoding)
end

filename
filename.split(/[\/\\]/).last || String.new
end

CHARSET = "charset"
Expand Down
80 changes: 80 additions & 0 deletions test/spec_multipart.rb
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,86 @@ def rd.rewind; end
Timeout::timeout(10) { Rack::Multipart.parse_multipart(env) }
end

content_disposition_parse = lambda do |params|
boundary = '---------------------------932620571087722842402766118'

data = StringIO.new
data.write("--#{boundary}")
data.write("\r\n")
data.write("Content-Disposition: form-data;#{params}")
data.write("\r\n")
data.write("content-type:application/pdf\r\n")
data.write("\r\n")
data.write("--#{boundary}--\r\n")
data.rewind

fixture = {
"CONTENT_TYPE" => "multipart/form-data; boundary=#{boundary}",
"CONTENT_LENGTH" => data.length.to_s,
:input => data,
}

env = Rack::MockRequest.env_for '/', fixture
Rack::Multipart.parse_multipart(env)
end

# see https://github.com/rack/rack/issues/2076
it "parse content-disposition with modification date before name parameter" do
x = content_disposition_parse.call(' filename="sample.sql"; modification-date="Wed, 26 Apr 2023 11:01:34 GMT"; size=24; name="file"')
x.keys.must_equal ["file"]
x["file"][:filename].must_equal "sample.sql"
x["file"][:name].must_equal "file"
end

it "parse content-disposition with colon in parameter value before name parameter" do
x = content_disposition_parse.call(' filename="sam:ple.sql"; name="file"')
x.keys.must_equal ["file"]
x["file"][:filename].must_equal "sam:ple.sql"
x["file"][:name].must_equal "file"
end

it "parse content-disposition with name= in parameter value before name parameter" do
x = content_disposition_parse.call('filename="name=bar"; name="file"')
x.keys.must_equal ["file"]
x["file"][:filename].must_equal "name=bar"
x["file"][:name].must_equal "file"
end

it "parse content-disposition with unquoted parameter values" do
x = content_disposition_parse.call('filename=sam:ple.sql; name=file')
x.keys.must_equal ["file"]
x["file"][:filename].must_equal "sam:ple.sql"
x["file"][:name].must_equal "file"
end

it "parse content-disposition with backslash escaped parameter values" do
x = content_disposition_parse.call('filename="foo\"bar"; name=file')
x.keys.must_equal ["file"]
x["file"][:filename].must_equal "foo\"bar"
x["file"][:name].must_equal "file"
end

it "parse content-disposition with IE full paths in filename" do
x = content_disposition_parse.call('filename="c:\foo\bar"; name=file;')
x.keys.must_equal ["file"]
x["file"][:filename].must_equal "bar"
x["file"][:name].must_equal "file"
end

it "parse content-disposition with escaped parameter values in name" do
x = content_disposition_parse.call('filename="bar"; name="file\\\\-\\xfoo"')
x.keys.must_equal ["file\\-xfoo"]
x["file\\-xfoo"][:filename].must_equal "bar"
x["file\\-xfoo"][:name].must_equal "file\\-xfoo"
end

it "parse content-disposition with escaped parameter values in name" do
x = content_disposition_parse.call('filename="bar"; name="file\\\\-\\xfoo"')
x.keys.must_equal ["file\\-xfoo"]
x["file\\-xfoo"][:filename].must_equal "bar"
x["file\\-xfoo"][:name].must_equal "file\\-xfoo"
end

it 'raises an EOF error on content-length mismatch' do
env = Rack::MockRequest.env_for("/", multipart_fixture(:empty))
env['rack.input'] = StringIO.new
Expand Down

0 comments on commit 645b5b4

Please sign in to comment.