From 9fa014710d22e30c0be05bddc78e073373def5bd Mon Sep 17 00:00:00 2001 From: Mike McQuaid <mike@mikemcquaid.com> Date: Thu, 23 Feb 2017 10:15:06 +0000 Subject: [PATCH] audit: further refactor http content checks. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check homepages and don鈥檛 check mirrors unless `鈥攕trict`. --- Library/Homebrew/dev-cmd/audit.rb | 144 ++++++++++++++---------------- 1 file changed, 69 insertions(+), 75 deletions(-) diff --git a/Library/Homebrew/dev-cmd/audit.rb b/Library/Homebrew/dev-cmd/audit.rb index 493f1eb09c..65b109f3b0 100644 --- a/Library/Homebrew/dev-cmd/audit.rb +++ b/Library/Homebrew/dev-cmd/audit.rb @@ -174,30 +174,62 @@ class FormulaAuditor @specs = %w[stable devel head].map { |s| formula.send(s) }.compact end - def url_status_code(url, range: false) - # The system Curl is too old and unreliable with HTTPS homepages on - # Yosemite and below. - return "200" unless DevelopmentTools.curl_handles_most_https_homepages? + def self.check_http_content(url, user_agents: [:default]) + details = nil + user_agent = nil + user_agents.each do |ua| + details = http_content_headers_and_checksum(url, user_agent: ua) + user_agent = ua + break if details[:status].to_s.start_with?("2") + end - extra_args = [ - "--connect-timeout", "15", - "--output", "/dev/null", - "--write-out", "%{http_code}" - ] - extra_args << "--range" << "0-0" if range - extra_args << url - - status_code = nil - [:browser, :default].each do |user_agent| - args = curl_args( - extra_args: extra_args, - show_output: true, - user_agent: user_agent, - ) - status_code = Open3.popen3(*args) { |_, stdout, _, _| stdout.read } - break if status_code.start_with? "2" - end - status_code + return "The URL #{url} is not reachable" unless details[:status] + unless details[:status].start_with? "2" + return "The URL #{url} is not reachable (HTTP status code #{details[:status]})" + end + + return unless url.start_with? "http:" + + secure_url = url.sub "http", "https" + secure_details = + http_content_headers_and_checksum(secure_url, user_agent: user_agent) + + if !details[:status].to_s.start_with?("2") || + !secure_details[:status].to_s.start_with?("2") + return + end + + etag_match = details[:etag] && + details[:etag] == secure_details[:etag] + content_length_match = + details[:content_length] && + details[:content_length] == secure_details[:content_length] + file_match = details[:file_hash] == secure_details[:file_hash] + + return if !etag_match && !content_length_match && !file_match + "The URL #{url} could use HTTPS rather than HTTP" + end + + def self.http_content_headers_and_checksum(url, user_agent: :default) + args = curl_args( + extra_args: ["--connect-timeout", "15", "--include", url], + show_output: true, + user_agent: user_agent, + ) + output = Open3.popen3(*args) { |_, stdout, _, _| stdout.read } + + status_code = :unknown + while status_code == :unknown || status_code.to_s.start_with?("3") + headers, _, output = output.partition("\r\n\r\n") + status_code = headers[%r{HTTP\/.* (\d+)}, 1] + end + + { + status: status_code, + etag: headers[%r{ETag: ([wW]\/)?"(([^"]|\\")*)"}, 2], + content_length: headers[/Content-Length: (\d+)/, 1], + file_hash: Digest::SHA256.digest(output), + } end def audit_style @@ -619,9 +651,13 @@ class FormulaAuditor return unless @online - status_code = url_status_code(homepage) - return if status_code.start_with? "2" - problem "The homepage #{homepage} is not reachable (HTTP status code #{status_code})" + # The system Curl is too old and unreliable with HTTPS homepages on + # Yosemite and below. + return unless DevelopmentTools.curl_handles_most_https_homepages? + if http_content_problem = FormulaAuditor.check_http_content(homepage, + user_agents: [:browser, :default]) + problem http_content_problem + end end def audit_bottle_spec @@ -671,11 +707,11 @@ class FormulaAuditor %w[Stable Devel HEAD].each do |name| next unless spec = formula.send(name.downcase) - ra = ResourceAuditor.new(spec, online: @online).audit + ra = ResourceAuditor.new(spec, online: @online, strict: @strict).audit problems.concat ra.problems.map { |problem| "#{name}: #{problem}" } spec.resources.each_value do |resource| - ra = ResourceAuditor.new(resource, online: @online).audit + ra = ResourceAuditor.new(resource, online: @online, strict: @strict).audit problems.concat ra.problems.map { |problem| "#{name} resource #{resource.name.inspect}: #{problem}" } @@ -1231,6 +1267,7 @@ class ResourceAuditor @using = resource.using @specs = resource.specs @online = options[:online] + @strict = options[:strict] @problems = [] end @@ -1492,7 +1529,10 @@ class ResourceAuditor urls.each do |url| strategy = DownloadStrategyDetector.detect(url) if strategy <= CurlDownloadStrategy && !url.start_with?("file") - check_http_content url + next if !@strict && mirrors.include?(url) + if http_content_problem = FormulaAuditor.check_http_content(url) + problem http_content_problem + end elsif strategy <= GitDownloadStrategy unless Utils.git_remote_exists url problem "The URL #{url} is not a valid git URL" @@ -1505,53 +1545,7 @@ class ResourceAuditor end end - def check_http_content(url) - details = get_content_details(url) - - if details[:status].nil? - problem "The URL #{url} is not reachable" - elsif !details[:status].start_with? "2" - problem "The URL #{url} is not reachable (HTTP status code #{details[:status]})" - end - - return unless url.start_with? "http:" - - secure_url = url.sub "http", "https" - secure_details = get_content_details(secure_url) - - if !details[:status].to_s.start_with?("2") || - !secure_details[:status].to_s.start_with?("2") - return - end - - etag_match = details[:etag] && - details[:etag] == secure_details[:etag] - content_length_match = - details[:content_length] && - details[:content_length] == secure_details[:content_length] - file_match = details[:file_hash] == secure_details[:file_hash] - - return if !etag_match && !content_length_match && !file_match - problem "The URL #{url} could use HTTPS rather than HTTP" - end - def problem(text) @problems << text end - - def get_content_details(url) - out = {} - output, = curl_output "--connect-timeout", "15", "--include", url - status_code = :unknown - while status_code == :unknown || status_code.to_s.start_with?("3") - headers, _, output = output.partition("\r\n\r\n") - status_code = headers[%r{HTTP\/.* (\d+)}, 1] - end - - out[:status] = status_code - out[:etag] = headers[%r{ETag: ([wW]\/)?"(([^"]|\\")*)"}, 2] - out[:content_length] = headers[/Content-Length: (\d+)/, 1] - out[:file_hash] = Digest::SHA256.digest output - out - end end -- GitLab