From 9fa014710d22e30c0be05bddc78e073373def5bd Mon Sep 17 00:00:00 2001
From: Mike McQuaid <mike@mikemcquaid.com>
Date: Thu, 23 Feb 2017 10:15:06 +0000
Subject: [PATCH] audit: further refactor http content checks.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Check homepages and don鈥檛 check mirrors unless `鈥攕trict`.
---
 Library/Homebrew/dev-cmd/audit.rb | 144 ++++++++++++++----------------
 1 file changed, 69 insertions(+), 75 deletions(-)

diff --git a/Library/Homebrew/dev-cmd/audit.rb b/Library/Homebrew/dev-cmd/audit.rb
index 493f1eb09c..65b109f3b0 100644
--- a/Library/Homebrew/dev-cmd/audit.rb
+++ b/Library/Homebrew/dev-cmd/audit.rb
@@ -174,30 +174,62 @@ class FormulaAuditor
     @specs = %w[stable devel head].map { |s| formula.send(s) }.compact
   end
 
-  def url_status_code(url, range: false)
-    # The system Curl is too old and unreliable with HTTPS homepages on
-    # Yosemite and below.
-    return "200" unless DevelopmentTools.curl_handles_most_https_homepages?
+  def self.check_http_content(url, user_agents: [:default])
+    details = nil
+    user_agent = nil
+    user_agents.each do |ua|
+      details = http_content_headers_and_checksum(url, user_agent: ua)
+      user_agent = ua
+      break if details[:status].to_s.start_with?("2")
+    end
 
-    extra_args = [
-      "--connect-timeout", "15",
-      "--output", "/dev/null",
-      "--write-out", "%{http_code}"
-    ]
-    extra_args << "--range" << "0-0" if range
-    extra_args << url
-
-    status_code = nil
-    [:browser, :default].each do |user_agent|
-      args = curl_args(
-        extra_args: extra_args,
-        show_output: true,
-        user_agent: user_agent,
-      )
-      status_code = Open3.popen3(*args) { |_, stdout, _, _| stdout.read }
-      break if status_code.start_with? "2"
-    end
-    status_code
+    return "The URL #{url} is not reachable" unless details[:status]
+    unless details[:status].start_with? "2"
+      return "The URL #{url} is not reachable (HTTP status code #{details[:status]})"
+    end
+
+    return unless url.start_with? "http:"
+
+    secure_url = url.sub "http", "https"
+    secure_details =
+      http_content_headers_and_checksum(secure_url, user_agent: user_agent)
+
+    if !details[:status].to_s.start_with?("2") ||
+       !secure_details[:status].to_s.start_with?("2")
+      return
+    end
+
+    etag_match = details[:etag] &&
+                 details[:etag] == secure_details[:etag]
+    content_length_match =
+      details[:content_length] &&
+      details[:content_length] == secure_details[:content_length]
+    file_match = details[:file_hash] == secure_details[:file_hash]
+
+    return if !etag_match && !content_length_match && !file_match
+    "The URL #{url} could use HTTPS rather than HTTP"
+  end
+
+  def self.http_content_headers_and_checksum(url, user_agent: :default)
+    args = curl_args(
+      extra_args: ["--connect-timeout", "15", "--include", url],
+      show_output: true,
+      user_agent: user_agent,
+    )
+    output = Open3.popen3(*args) { |_, stdout, _, _| stdout.read }
+
+    status_code = :unknown
+    while status_code == :unknown || status_code.to_s.start_with?("3")
+      headers, _, output = output.partition("\r\n\r\n")
+      status_code = headers[%r{HTTP\/.* (\d+)}, 1]
+    end
+
+    {
+      status: status_code,
+      etag: headers[%r{ETag: ([wW]\/)?"(([^"]|\\")*)"}, 2],
+      content_length: headers[/Content-Length: (\d+)/, 1],
+      file_hash: Digest::SHA256.digest(output),
+    }
   end
 
   def audit_style
@@ -619,9 +651,13 @@ class FormulaAuditor
 
     return unless @online
 
-    status_code = url_status_code(homepage)
-    return if status_code.start_with? "2"
-    problem "The homepage #{homepage} is not reachable (HTTP status code #{status_code})"
+    # The system Curl is too old and unreliable with HTTPS homepages on
+    # Yosemite and below.
+    return unless DevelopmentTools.curl_handles_most_https_homepages?
+    if http_content_problem = FormulaAuditor.check_http_content(homepage,
+                                             user_agents: [:browser, :default])
+      problem http_content_problem
+    end
   end
 
   def audit_bottle_spec
@@ -671,11 +707,11 @@ class FormulaAuditor
     %w[Stable Devel HEAD].each do |name|
       next unless spec = formula.send(name.downcase)
 
-      ra = ResourceAuditor.new(spec, online: @online).audit
+      ra = ResourceAuditor.new(spec, online: @online, strict: @strict).audit
       problems.concat ra.problems.map { |problem| "#{name}: #{problem}" }
 
       spec.resources.each_value do |resource|
-        ra = ResourceAuditor.new(resource, online: @online).audit
+        ra = ResourceAuditor.new(resource, online: @online, strict: @strict).audit
         problems.concat ra.problems.map { |problem|
           "#{name} resource #{resource.name.inspect}: #{problem}"
         }
@@ -1231,6 +1267,7 @@ class ResourceAuditor
     @using    = resource.using
     @specs    = resource.specs
     @online   = options[:online]
+    @strict   = options[:strict]
     @problems = []
   end
 
@@ -1492,7 +1529,10 @@ class ResourceAuditor
     urls.each do |url|
       strategy = DownloadStrategyDetector.detect(url)
       if strategy <= CurlDownloadStrategy && !url.start_with?("file")
-        check_http_content url
+        next if !@strict && mirrors.include?(url)
+        if http_content_problem = FormulaAuditor.check_http_content(url)
+          problem http_content_problem
+        end
       elsif strategy <= GitDownloadStrategy
         unless Utils.git_remote_exists url
           problem "The URL #{url} is not a valid git URL"
@@ -1505,53 +1545,7 @@ class ResourceAuditor
     end
   end
 
-  def check_http_content(url)
-    details = get_content_details(url)
-
-    if details[:status].nil?
-      problem "The URL #{url} is not reachable"
-    elsif !details[:status].start_with? "2"
-      problem "The URL #{url} is not reachable (HTTP status code #{details[:status]})"
-    end
-
-    return unless url.start_with? "http:"
-
-    secure_url = url.sub "http", "https"
-    secure_details = get_content_details(secure_url)
-
-    if !details[:status].to_s.start_with?("2") ||
-       !secure_details[:status].to_s.start_with?("2")
-      return
-    end
-
-    etag_match = details[:etag] &&
-                 details[:etag] == secure_details[:etag]
-    content_length_match =
-      details[:content_length] &&
-      details[:content_length] == secure_details[:content_length]
-    file_match = details[:file_hash] == secure_details[:file_hash]
-
-    return if !etag_match && !content_length_match && !file_match
-    problem "The URL #{url} could use HTTPS rather than HTTP"
-  end
-
   def problem(text)
     @problems << text
   end
-
-  def get_content_details(url)
-    out = {}
-    output, = curl_output "--connect-timeout", "15", "--include", url
-    status_code = :unknown
-    while status_code == :unknown || status_code.to_s.start_with?("3")
-      headers, _, output = output.partition("\r\n\r\n")
-      status_code = headers[%r{HTTP\/.* (\d+)}, 1]
-    end
-
-    out[:status] = status_code
-    out[:etag] = headers[%r{ETag: ([wW]\/)?"(([^"]|\\")*)"}, 2]
-    out[:content_length] = headers[/Content-Length: (\d+)/, 1]
-    out[:file_hash] = Digest::SHA256.digest output
-    out
-  end
 end
-- 
GitLab