From 343091c828d1e572829b86253d79b326c1986bcd Mon Sep 17 00:00:00 2001
From: Andrew Janke <floss@apjanke.net>
Date: Tue, 5 Apr 2016 17:23:24 -0400
Subject: [PATCH] Merge pull request #27 from apjanke/test-bot-xml-filter

test-bot: revise Step output transcoding and XML character filtering
---
 Library/Homebrew/cmd/test-bot.rb     | 85 +++++++++++++++++++---------
 Library/Homebrew/extend/fileutils.rb | 11 ++++
 2 files changed, 70 insertions(+), 26 deletions(-)

diff --git a/Library/Homebrew/cmd/test-bot.rb b/Library/Homebrew/cmd/test-bot.rb
index fb28c5f3d6..067e7da97c 100644
--- a/Library/Homebrew/cmd/test-bot.rb
+++ b/Library/Homebrew/cmd/test-bot.rb
@@ -3,23 +3,24 @@
 # Usage: brew test-bot [options...] <pull-request|formula>
 #
 # Options:
-# --keep-logs:     Write and keep log files under ./brewbot/
+# --keep-logs:     Write and keep log files under ./brewbot/.
 # --cleanup:       Clean the Homebrew directory. Very dangerous. Use with care.
 # --clean-cache:   Remove all cached downloads. Use with care.
 # --skip-setup:    Don't check the local system is setup correctly.
 # --skip-homebrew: Don't check Homebrew's files and tests are all valid.
 # --junit:         Generate a JUnit XML test results file.
-# --no-bottle:     Run brew install without --build-bottle
+# --no-bottle:     Run brew install without --build-bottle.
 # --keep-old:      Run brew bottle --keep-old to build new bottles for a single platform.
-# --legacy         Bulid formula from legacy Homebrew/homebrew repo.
+# --legacy         Build formula from legacy Homebrew/homebrew repo.
 #                  (TODO remove it when it's not longer necessary)
-# --HEAD:          Run brew install with --HEAD
-# --local:         Ask Homebrew to write verbose logs under ./logs/ and set HOME to ./home/
-# --tap=<tap>:     Use the git repository of the given tap
+# --HEAD:          Run brew install with --HEAD.
+# --local:         Ask Homebrew to write verbose logs under ./logs/ and set HOME to ./home/.
+# --tap=<tap>:     Use the git repository of the given tap.
 # --dry-run:       Just print commands, don't run them.
 # --fail-fast:     Immediately exit on a failing step.
-# --verbose:       Print out all logs in realtime
-# --fast:          Don't install any packages but run e.g. audit anyway.
+# --verbose:       Print test step output in realtime. Has the side effect of passing output
+#                  as raw bytes instead of re-encoding in UTF-8.
+# --fast:          Don't install any packages, but run e.g. audit anyway.
 #
 # --ci-master:           Shortcut for Homebrew master branch CI options.
 # --ci-pr:               Shortcut for Homebrew pull request CI options.
@@ -38,6 +39,10 @@ module Homebrew
   BYTES_IN_1_MEGABYTE = 1024*1024
   HOMEBREW_TAP_REGEX = %r{^([\w-]+)/homebrew-([\w-]+)$}
 
+  def ruby_has_encoding?
+    String.method_defined?(:force_encoding)
+  end
+
   def resolve_test_tap
     if tap = ARGV.value("tap")
       return Tap.fetch(tap)
@@ -142,7 +147,9 @@ module Homebrew
       end
 
       verbose = ARGV.verbose?
-      @output = ""
+      # Step may produce arbitrary output and we read it bytewise, so must
+      # buffer it as binary and convert to UTF-8 once complete
+      output = Homebrew.ruby_has_encoding? ? "".encode!("BINARY") : ""
       working_dir = Pathname.new(@command.first == "git" ? @repository : Dir.pwd)
       read, write = IO.pipe
 
@@ -155,13 +162,14 @@ module Homebrew
           working_dir.cd { exec(*@command) }
         end
         write.close
-        while buf = read.read(1)
+        while buf = read.readpartial(4096)
           if verbose
             print buf
             $stdout.flush
           end
-          @output << buf
+          output << buf
         end
+      rescue EOFError
       ensure
         read.close
       end
@@ -171,8 +179,9 @@ module Homebrew
       @status = $?.success? ? :passed : :failed
       puts_result
 
-      if has_output?
-        @output = fix_encoding(@output)
+
+      unless output.empty?
+        @output = fix_encoding!(output)
         puts @output if (failed? || @puts_output_on_success) && !verbose
         File.write(log_file_path, @output) if ARGV.include? "--keep-logs"
       end
@@ -182,20 +191,20 @@ module Homebrew
 
     private
 
-    if String.method_defined?(:force_encoding)
-      def fix_encoding(str)
-        return str if str.valid_encoding?
+    if Homebrew.ruby_has_encoding?
+      def fix_encoding!(str)
         # Assume we are starting from a "mostly" UTF-8 string
         str.force_encoding(Encoding::UTF_8)
+        return str if str.valid_encoding?
         str.encode!(Encoding::UTF_16, :invalid => :replace)
         str.encode!(Encoding::UTF_8)
       end
     elsif require "iconv"
-      def fix_encoding(str)
+      def fix_encoding!(str)
         Iconv.conv("UTF-8//IGNORE", "UTF-8", str)
       end
     else
-      def fix_encoding(str)
+      def fix_encoding!(str)
         str
       end
     end
@@ -943,14 +952,7 @@ module Homebrew
           testcase.add_attribute "time", step.time
 
           if step.has_output?
-            # Remove invalid XML CData characters from step output.
-            output = step.output.delete("\000\a\b\e\f\x2\x1f")
-
-            if output.bytesize > BYTES_IN_1_MEGABYTE
-              output = "truncated output to 1MB:\n" \
-                + output.slice(-BYTES_IN_1_MEGABYTE, BYTES_IN_1_MEGABYTE)
-            end
-
+            output = sanitize_output_for_xml(step.output)
             cdata = REXML::CData.new output
 
             if step.passed?
@@ -981,4 +983,35 @@ module Homebrew
 
     Homebrew.failed = any_errors
   end
+
+  def sanitize_output_for_xml(output)
+    unless output.empty?
+      # Remove invalid XML CData characters from step output.
+      if ruby_has_encoding?
+        # This is the regex for valid XML chars, but only works in Ruby 2.0+
+        # /[\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\u{10000}-\u{10FFFF}]/
+        # For 1.9 compatibility, use the inverse of that, which stays under \u10000
+        # invalid_xml_pat = /[\x00-\x08\x0B\x0C\x0E-\x1F\uD800-\uDFFF\uFFFE\uFFFF]/
+        # But Ruby won't allow you to reference surrogates, so we have:
+        invalid_xml_pat = /[\x00-\x08\x0B\x0C\x0E-\x1F\uFFFE\uFFFF]/
+        output = output.gsub(invalid_xml_pat, "\uFFFD")
+      else
+        output = output.delete("\000\a\b\e\f\x2\x1f")
+      end
+
+      # Truncate to 1MB
+      if output.bytesize > BYTES_IN_1_MEGABYTE
+        if ruby_has_encoding?
+          binary_output = output.force_encoding("BINARY")
+          output = binary_output.slice(-BYTES_IN_1_MEGABYTE, BYTES_IN_1_MEGABYTE)
+          fix_encoding!(output)
+        else
+          output = output.slice(-BYTES_IN_1_MEGABYTE, BYTES_IN_1_MEGABYTE)
+        end
+        output = "truncated output to 1MB:\n" + output
+      end
+    end
+    output
+  end
 end
+
diff --git a/Library/Homebrew/extend/fileutils.rb b/Library/Homebrew/extend/fileutils.rb
index 74da1f6e68..ee6735e27a 100644
--- a/Library/Homebrew/extend/fileutils.rb
+++ b/Library/Homebrew/extend/fileutils.rb
@@ -133,3 +133,14 @@ module FileUtils
     ENV.update(removed)
   end
 end
+
+# Shim File.write for Ruby 1.8.7, where it's absent
+unless File.respond_to?(:write)
+  class File
+    def self.write(filename, contents)
+      File.open(filename, 'w') do |file|
+        file.write contents
+      end
+    end
+  end
+end
-- 
GitLab