Skip to content

Commit

Permalink
Merge pull request #7122 from rubygems/martinemde/digestio-opaque-etag
Browse files Browse the repository at this point in the history
Opaque ETags in Compact Index requests
  • Loading branch information
martinemde committed Nov 27, 2023
2 parents 058b156 + a3ef6d9 commit 0339d53
Show file tree
Hide file tree
Showing 17 changed files with 703 additions and 145 deletions.
1 change: 1 addition & 0 deletions Manifest.txt
Expand Up @@ -51,6 +51,7 @@ bundler/lib/bundler/cli/update.rb
bundler/lib/bundler/cli/viz.rb
bundler/lib/bundler/compact_index_client.rb
bundler/lib/bundler/compact_index_client/cache.rb
bundler/lib/bundler/compact_index_client/cache_file.rb
bundler/lib/bundler/compact_index_client/gem_parser.rb
bundler/lib/bundler/compact_index_client/updater.rb
bundler/lib/bundler/constants.rb
Expand Down
21 changes: 14 additions & 7 deletions bundler/lib/bundler/compact_index_client.rb
Expand Up @@ -5,7 +5,13 @@

module Bundler
class CompactIndexClient
# NOTE: MD5 is here not because we expect a server to respond with it, but
# because we use it to generate the etag on first request during the upgrade
# to the compact index client that uses opaque etags saved to files.
# Remove once 2.5.0 has been out for a while.
SUPPORTED_DIGESTS = { "sha-256" => :SHA256, "md5" => :MD5 }.freeze
DEBUG_MUTEX = Thread::Mutex.new

def self.debug
return unless ENV["DEBUG_COMPACT_INDEX"]
DEBUG_MUTEX.synchronize { warn("[#{self}] #{yield}") }
Expand All @@ -14,6 +20,7 @@ def self.debug
class Error < StandardError; end

require_relative "compact_index_client/cache"
require_relative "compact_index_client/cache_file"
require_relative "compact_index_client/updater"

attr_reader :directory
Expand Down Expand Up @@ -54,13 +61,13 @@ def sequentially

def names
Bundler::CompactIndexClient.debug { "/names" }
update(@cache.names_path, "names")
update("names", @cache.names_path, @cache.names_etag_path)
@cache.names
end

def versions
Bundler::CompactIndexClient.debug { "/versions" }
update(@cache.versions_path, "versions")
update("versions", @cache.versions_path, @cache.versions_etag_path)
versions, @info_checksums_by_name = @cache.versions
versions
end
Expand All @@ -76,36 +83,36 @@ def dependencies(names)
def update_and_parse_checksums!
Bundler::CompactIndexClient.debug { "update_and_parse_checksums!" }
return @info_checksums_by_name if @parsed_checksums
update(@cache.versions_path, "versions")
update("versions", @cache.versions_path, @cache.versions_etag_path)
@info_checksums_by_name = @cache.checksums
@parsed_checksums = true
end

private

def update(local_path, remote_path)
def update(remote_path, local_path, local_etag_path)
Bundler::CompactIndexClient.debug { "update(#{local_path}, #{remote_path})" }
unless synchronize { @endpoints.add?(remote_path) }
Bundler::CompactIndexClient.debug { "already fetched #{remote_path}" }
return
end
@updater.update(local_path, url(remote_path))
@updater.update(url(remote_path), local_path, local_etag_path)
end

def update_info(name)
Bundler::CompactIndexClient.debug { "update_info(#{name})" }
path = @cache.info_path(name)
checksum = @updater.checksum_for_file(path)
unless existing = @info_checksums_by_name[name]
Bundler::CompactIndexClient.debug { "skipping updating info for #{name} since it is missing from versions" }
return
end
checksum = SharedHelpers.checksum_for_file(path, :MD5)
if checksum == existing
Bundler::CompactIndexClient.debug { "skipping updating info for #{name} since the versions checksum matches the local checksum" }
return
end
Bundler::CompactIndexClient.debug { "updating info for #{name} since the versions checksum #{existing} != the local checksum #{checksum}" }
update(path, "info/#{name}")
update("info/#{name}", path, @cache.info_etag_path(name))
end

def url(path)
Expand Down
30 changes: 25 additions & 5 deletions bundler/lib/bundler/compact_index_client/cache.rb
Expand Up @@ -9,11 +9,8 @@ class Cache

def initialize(directory)
@directory = Pathname.new(directory).expand_path
info_roots.each do |dir|
SharedHelpers.filesystem_access(dir) do
FileUtils.mkdir_p(dir)
end
end
info_roots.each {|dir| mkdir(dir) }
mkdir(info_etag_root)
end

def names
Expand All @@ -24,6 +21,10 @@ def names_path
directory.join("names")
end

def names_etag_path
directory.join("names.etag")
end

def versions
versions_by_name = Hash.new {|hash, key| hash[key] = [] }
info_checksums_by_name = {}
Expand All @@ -49,6 +50,10 @@ def versions_path
directory.join("versions")
end

def versions_etag_path
directory.join("versions.etag")
end

def checksums
checksums = {}

Expand Down Expand Up @@ -76,8 +81,19 @@ def info_path(name)
end
end

def info_etag_path(name)
name = name.to_s
info_etag_root.join("#{name}-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}")
end

private

def mkdir(dir)
SharedHelpers.filesystem_access(dir) do
FileUtils.mkdir_p(dir)
end
end

def lines(path)
return [] unless path.file?
lines = SharedHelpers.filesystem_access(path, :read, &:read).split("\n")
Expand All @@ -96,6 +112,10 @@ def info_roots
directory.join("info-special-characters"),
]
end

def info_etag_root
directory.join("info-etags")
end
end
end
end
153 changes: 153 additions & 0 deletions bundler/lib/bundler/compact_index_client/cache_file.rb
@@ -0,0 +1,153 @@
# frozen_string_literal: true

require_relative "../vendored_fileutils"
require "rubygems/package"

module Bundler
class CompactIndexClient
# write cache files in a way that is robust to concurrent modifications
# if digests are given, the checksums will be verified
class CacheFile
DEFAULT_FILE_MODE = 0o644
private_constant :DEFAULT_FILE_MODE

class Error < RuntimeError; end
class ClosedError < Error; end

class DigestMismatchError < Error
def initialize(digests, expected_digests)
super "Calculated checksums #{digests.inspect} did not match expected #{expected_digests.inspect}."
end
end

# Initialize with a copy of the original file, then yield the instance.
def self.copy(path, &block)
new(path) do |file|
file.initialize_digests

SharedHelpers.filesystem_access(path, :read) do
path.open("rb") do |s|
file.open {|f| IO.copy_stream(s, f) }
end
end

yield file
end
end

# Write data to a temp file, then replace the original file with it verifying the digests if given.
def self.write(path, data, digests = nil)
return unless data
new(path) do |file|
file.digests = digests
file.write(data)
end
end

attr_reader :original_path, :path

def initialize(original_path, &block)
@original_path = original_path
@perm = original_path.file? ? original_path.stat.mode : DEFAULT_FILE_MODE
@path = original_path.sub(/$/, ".#{$$}.tmp")
return unless block_given?
begin
yield self
ensure
close
end
end

def size
path.size
end

# initialize the digests using CompactIndexClient::SUPPORTED_DIGESTS, or a subset based on keys.
def initialize_digests(keys = nil)
@digests = keys ? SUPPORTED_DIGESTS.slice(*keys) : SUPPORTED_DIGESTS.dup
@digests.transform_values! {|algo_class| SharedHelpers.digest(algo_class).new }
end

# reset the digests so they don't contain any previously read data
def reset_digests
@digests&.each_value(&:reset)
end

# set the digests that will be verified at the end
def digests=(expected_digests)
@expected_digests = expected_digests

if @expected_digests.nil?
@digests = nil
elsif @digests
@digests = @digests.slice(*@expected_digests.keys)
else
initialize_digests(@expected_digests.keys)
end
end

# remove this method when we stop generating md5 digests for legacy etags
def md5
@digests && @digests["md5"]
end

def digests?
@digests&.any?
end

# Open the temp file for writing, reusing original permissions, yielding the IO object.
def open(write_mode = "wb", perm = @perm, &block)
raise ClosedError, "Cannot reopen closed file" if @closed
SharedHelpers.filesystem_access(path, :write) do
path.open(write_mode, perm) do |f|
yield digests? ? Gem::Package::DigestIO.new(f, @digests) : f
end
end
end

# Returns false without appending when no digests since appending is too error prone to do without digests.
def append(data)
return false unless digests?
open("a") {|f| f.write data }
verify && commit
end

def write(data)
reset_digests
open {|f| f.write data }
commit!
end

def commit!
verify || raise(DigestMismatchError.new(@base64digests, @expected_digests))
commit
end

# Verify the digests, returning true on match, false on mismatch.
def verify
return true unless @expected_digests && digests?
@base64digests = @digests.transform_values!(&:base64digest)
@digests = nil
@base64digests.all? {|algo, digest| @expected_digests[algo] == digest }
end

# Replace the original file with the temp file without verifying digests.
# The file is permanently closed.
def commit
raise ClosedError, "Cannot commit closed file" if @closed
SharedHelpers.filesystem_access(original_path, :write) do
FileUtils.mv(path, original_path)
end
@closed = true
end

# Remove the temp file without replacing the original file.
# The file is permanently closed.
def close
return if @closed
FileUtils.remove_file(path) if @path&.file?
@closed = true
end
end
end
end

0 comments on commit 0339d53

Please sign in to comment.