From 4167f0ce67e42b082605bca75c7bdfd01eb23804 Mon Sep 17 00:00:00 2001 From: John Lees-Miller Date: Thu, 12 Sep 2019 22:01:38 +0100 Subject: [PATCH 1/2] Validate entry sizes when extracting --- README.md | 67 +++++++++++++++++++++++++++++++-------- lib/zip.rb | 4 ++- lib/zip/entry.rb | 7 ++++ lib/zip/errors.rb | 1 + test/file_extract_test.rb | 62 ++++++++++++++++++++++++++++++++++++ 5 files changed, 127 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 321b811..a193c24 100644 --- a/README.md +++ b/README.md @@ -129,12 +129,15 @@ After this entries in zip archive will be saved in ordered state. ### Reading a Zip file ```ruby +MAX_SIZE = 1024**2 # 1MiB (but of course you can increase this) Zip::File.open('foo.zip') do |zip_file| # Handle entries one by one zip_file.each do |entry| - # Extract to file/directory/symlink puts "Extracting #{entry.name}" - entry.extract(dest_file) + raise 'File too large when extracted' if entry.size > MAX_SIZE + + # Extract to file or directory based on name in the archive + entry.extract # Read into memory content = entry.get_input_stream.read @@ -142,6 +145,7 @@ Zip::File.open('foo.zip') do |zip_file| # Find specific entry entry = zip_file.glob('*.csv').first + raise 'File too large when extracted' if entry.size > MAX_SIZE puts entry.get_input_stream.read end ``` @@ -212,6 +216,29 @@ In some zip date of files stored in incorrect format. You can hide warning about Zip.warn_invalid_date = false ``` +By default, `rubyzip`'s `extract` method checks that an entry's reported uncompressed size is not (significantly) smaller than its actual size. This is to help you protect your application against [zip bombs](https://en.wikipedia.org/wiki/Zip_bomb). Before `extract`ing an entry, you should check that its size is in the range you expect. For example, if your application supports processing up to 100 files at once, each up to 10MiB, your zip extraction code might look like: + +```ruby +MAX_FILE_SIZE = 10 * 1024**2 # 10MiB +MAX_FILES = 100 +Zip::File.open('foo.zip') do |zip_file| + num_files = 0 + zip_file.each do |entry| + num_files += 1 if entry.file? + raise 'Too many extracted files' if num_files > MAX_FILES + raise 'File too large when extracted' if entry.size > MAX_FILE_SIZE + entry.extract + end +end +``` + +If you need to extract zip files that report incorrect uncompressed sizes and you really trust them not too be too large, you can disable this setting with +```ruby +Zip.validate_entry_sizes = false +``` + +Note that if you use the lower level `Zip::InputStream` interface, `rubyzip` does *not* check the entry `size`s. In this case, the caller is responsible for making sure it does not read more data than expected from the input stream. + You can set the default compression level like so: ```ruby diff --git a/lib/zip.rb b/lib/zip.rb index e6ebad3..25d3764 100644 --- a/lib/zip.rb +++ b/lib/zip.rb @@ -37,7 +37,7 @@ end module Zip extend self - attr_accessor :unicode_names, :on_exists_proc, :continue_on_exists_proc, :sort_entries, :default_compression, :write_zip64_support, :warn_invalid_date + attr_accessor :unicode_names, :on_exists_proc, :continue_on_exists_proc, :sort_entries, :default_compression, :write_zip64_support, :warn_invalid_date, :validate_entry_sizes def reset! @_ran_once = false @@ -48,6 +48,7 @@ module Zip @default_compression = ::Zlib::DEFAULT_COMPRESSION @write_zip64_support = false @warn_invalid_date = true + @validate_entry_sizes = true end def setup diff --git a/lib/zip/entry.rb b/lib/zip/entry.rb index 80160b5..bd3e4f3 100644 --- a/lib/zip/entry.rb +++ b/lib/zip/entry.rb @@ -583,9 +583,16 @@ def create_file(dest_path, _continue_on_exists_proc = proc { Zip.continue_on_exi get_input_stream do |is| set_extra_attributes_on_path(dest_path) + bytes_written = 0 buf = '' while buf = is.sysread(::Zip::Decompressor::CHUNK_SIZE, buf) os << buf + + next unless ::Zip.validate_entry_sizes + bytes_written += buf.bytesize + if bytes_written > size + raise ::Zip::EntrySizeError, "Entry #{name} should be #{size}B but is larger when inflated" + end end end end diff --git a/lib/zip/errors.rb b/lib/zip/errors.rb index b2bcccd..364c6ee 100644 --- a/lib/zip/errors.rb +++ b/lib/zip/errors.rb @@ -4,6 +4,7 @@ class EntryExistsError < Error; end class DestinationFileExistsError < Error; end class CompressionMethodError < Error; end class EntryNameError < Error; end + class EntrySizeError < Error; end class InternalError < Error; end # Backwards compatibility with v1 (delete in v2) diff --git a/test/file_extract_test.rb b/test/file_extract_test.rb index 57833fc..6103aea 100644 --- a/test/file_extract_test.rb +++ b/test/file_extract_test.rb @@ -10,6 +10,10 @@ def setup ::File.delete(EXTRACTED_FILENAME) if ::File.exist?(EXTRACTED_FILENAME) end + def teardown + ::Zip.reset! + end + def test_extract ::Zip::File.open(TEST_ZIP.zip_name) { |zf| @@ -87,4 +91,61 @@ def test_extractNonEntry2 assert(!File.exist?(outFile)) end + def test_extract_incorrect_size + # The uncompressed size fields in the zip file cannot be trusted. This makes + # it harder for callers to validate the sizes of the files they are + # extracting, which can lead to denial of service. See also + # https://en.wikipedia.org/wiki/Zip_bomb + Dir.mktmpdir do |tmp| + real_zip = File.join(tmp, 'real.zip') + fake_zip = File.join(tmp, 'fake.zip') + file_name = 'a' + true_size = 500_000 + fake_size = 1 + + ::Zip::File.open(real_zip, ::Zip::File::CREATE) do |zf| + zf.get_output_stream(file_name) do |os| + os.write 'a' * true_size + end + end + + compressed_size = nil + ::Zip::File.open(real_zip) do |zf| + a_entry = zf.find_entry(file_name) + compressed_size = a_entry.compressed_size + assert_equal true_size, a_entry.size + end + + true_size_bytes = [compressed_size, true_size, file_name.size].pack('LLS') + fake_size_bytes = [compressed_size, fake_size, file_name.size].pack('LLS') + + data = File.binread(real_zip) + assert data.include?(true_size_bytes) + data.gsub! true_size_bytes, fake_size_bytes + + File.open(fake_zip, 'wb') do |file| + file.write data + end + + Dir.chdir tmp do + ::Zip::File.open(fake_zip) do |zf| + a_entry = zf.find_entry(file_name) + assert_equal fake_size, a_entry.size + + ::Zip.validate_entry_sizes = false + a_entry.extract + assert_equal true_size, File.size(file_name) + FileUtils.rm file_name + + ::Zip.validate_entry_sizes = true + error = assert_raises ::Zip::EntrySizeError do + a_entry.extract + end + assert_equal \ + 'Entry a should be 1B but is larger when inflated', + error.message + end + end + end + end end From 97cb6aefe6d12bd2429d7a2e119ccb26f259d71d Mon Sep 17 00:00:00 2001 From: John Lees-Miller Date: Wed, 18 Sep 2019 18:34:23 +0100 Subject: [PATCH 2/2] Warn when an entry size is invalid --- lib/zip/entry.rb | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/zip/entry.rb b/lib/zip/entry.rb index bd3e4f3..677e49e 100644 --- a/lib/zip/entry.rb +++ b/lib/zip/entry.rb @@ -604,14 +604,19 @@ def create_file(dest_path, _continue_on_exists_proc = proc { Zip.continue_on_exi set_extra_attributes_on_path(dest_path) bytes_written = 0 + warned = false buf = '' while buf = is.sysread(::Zip::Decompressor::CHUNK_SIZE, buf) os << buf - - next unless ::Zip.validate_entry_sizes bytes_written += buf.bytesize - if bytes_written > size - raise ::Zip::EntrySizeError, "Entry #{name} should be #{size}B but is larger when inflated" + if bytes_written > size && !warned + message = "Entry #{name} should be #{size}B but is larger when inflated" + if ::Zip.validate_entry_sizes + raise ::Zip::EntrySizeError, message + else + puts "WARNING: #{message}" + warned = true + end end end end