From 43acf5c8a46ef21487f98693640e0a439a3ae2d8 Mon Sep 17 00:00:00 2001
From: Junegunn Choi <junegunn.c@gmail.com>
Date: Fri, 15 Nov 2013 20:40:57 +0900
Subject: [PATCH] Extended mode

- Implement prefix caching of extended mode
- Improved ranking algorithm for extended mode
- Fix nfc conversion bug
---
 README.md        |  13 ++++
 Rakefile         |   7 ++
 fzf              |  79 +++++++++++++++-------
 test/test_fzf.rb | 170 ++++++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 230 insertions(+), 39 deletions(-)

diff --git a/README.md b/README.md
index d5764293..acfe21d6 100644
--- a/README.md
+++ b/README.md
@@ -114,6 +114,19 @@ The following readline key bindings should also work as expected.
 If you enable multi-select mode with `-m` option, you can select multiple items
 with TAB or Shift-TAB key.
 
+### Extended mode
+
+With `-x` or `--extended` option, fzf will start in "extended mode".
+
+In extended mode, you can specify multiple patterns delimited by spaces, such as: `^music .mp3$ sbtrkt !rmx`
+
+| Token    | Description                   | Match type           |
+| -------- | ----------------------------- | -------------------- |
+| `^music` | Items that start with `music` | prefix-exact-match   |
+| `.mp3$`  | Items that end with `.mp3`    | suffix-exact-match   |
+| `sbtrkt` | Items that match `sbtrkt`     | fuzzy-match          |
+| `!rmx`   | Items that do not match `rmx` | invert-fuzzy-match   |
+
 Usage as Vim plugin
 -------------------
 
diff --git a/Rakefile b/Rakefile
index 29955274..1c999e7c 100644
--- a/Rakefile
+++ b/Rakefile
@@ -1 +1,8 @@
 require "bundler/gem_tasks"
+require 'rake/testtask'
+
+Rake::TestTask.new(:test) do |test|
+  test.pattern = 'test/**/test_*.rb'
+  test.verbose = true
+end
+
diff --git a/fzf b/fzf
index 12b61367..978feaee 100755
--- a/fzf
+++ b/fzf
@@ -68,7 +68,7 @@ class FZF
   def initialize argv, source = $stdin
     usage 0 unless (%w[--help -h] & argv).empty?
     @rxflag = argv.delete('+i') ? 0 : Regexp::IGNORECASE
-    @sort   = %w[+s --no-sort].map  { |e| argv.delete e }.compact.empty? ?
+    @sort   = %w[+s --no-sort].map   { |e| argv.delete e }.compact.empty? ?
       ENV.fetch('FZF_DEFAULT_SORT', 500).to_i : nil
     @color  = %w[+c --no-color].map  { |e| argv.delete e }.compact.empty?
     @multi  = !%w[-m --multi].map    { |e| argv.delete e }.compact.empty?
@@ -149,6 +149,12 @@ class FZF
         ret
       end
 
+      def self.to_nfc arr
+        [NFC_BEGIN + arr[0] * JJCOUNT +
+         (arr[1] || 0) * JONGSUNGS +
+         (arr[2] || 0)].pack('U*')
+      end
+
       def self.nfc str, offsets = []
         ret  = ''
         omap = []
@@ -165,9 +171,7 @@ class FZF
               next
             else
               omap[-1] = omap[-1] + 1
-              ret << [NFC_BEGIN + pend[0] * JJCOUNT +
-                                  (pend[1] || 0) * JONGSUNGS +
-                                  (pend[2] || 0)].pack('U*')
+              ret << to_nfc(pend)
               pend.clear
             end
           end
@@ -177,6 +181,7 @@ class FZF
             ret << c
           end
         end
+        ret << to_nfc(pend) unless pend.empty?
         return [ret,
                 offsets.map { |pair|
                   b, e = pair
@@ -324,8 +329,14 @@ class FZF
   def sort_by_rank list
     list.sort_by { |tuple|
       line, offsets = tuple
-      matchlen = (offsets.map { |pair| pair.last  }.max || 0) -
-                 (offsets.map { |pair| pair.first }.min || 0)
+      matchlen = 0
+      pe = nil
+      offsets.sort.each do |pair|
+        b, e = pair
+        b    = pe if pe && pe > b
+        pe   = e
+        matchlen += e - b
+      end
       [matchlen, line.length, line]
     }
   end
@@ -453,7 +464,7 @@ class FZF
 
   def start_search
     main     = Thread.current
-    matcher  = (@xmode ? XFuzzyMatcher : FuzzyMatcher).new @rxflag
+    matcher  = (@xmode ? ExtendedFuzzyMatcher : FuzzyMatcher).new @rxflag
     searcher = Thread.new {
       lists   = []
       events  = {}
@@ -654,15 +665,14 @@ class FZF
         end
         @stdout.puts got
       end
-      exit 0
     end
   end
 
   class FuzzyMatcher < Matcher
-    attr_reader :cache, :rxflag
+    attr_reader :caches, :rxflag
 
     def initialize rxflag
-      @cache  = Hash.new { |h, k| h[k] = {} }
+      @caches = Hash.new { |h, k| h[k] = {} }
       @regexp = {}
       @rxflag = rxflag
     end
@@ -680,7 +690,7 @@ class FZF
     def match list, q, prefix, suffix
       regexp = fuzzy_regex q
 
-      cache = @cache[list.object_id]
+      cache = @caches[list.object_id]
       prefix_cache = nil
       (prefix.length - 1).downto(1) do |len|
         break if prefix_cache = cache[prefix[0, len]]
@@ -702,28 +712,49 @@ class FZF
     end
   end
 
-  class XFuzzyMatcher < FuzzyMatcher
+  class ExtendedFuzzyMatcher < FuzzyMatcher
+    def initialize rxflag
+      super
+      require 'set'
+      @regexps = {}
+    end
+
     def match list, q, prefix, suffix
-      regexps = q.strip.split(/\s+/).map { |w|
+      q = q.strip
+
+      regexps = @regexps[q] ||= q.split(/\s+/).map { |w|
         invert =
           if w =~ /^!/
             w = w[1..-1]
             true
           end
 
-        [ case w
-          when ''
-            nil
-          when /^\^/
-            w.length > 1 ? Regexp.new('^' << w[1..-1], rxflag) : nil
-          when /\$$/
-            w.length > 1 ? Regexp.new(w[0..-2] << '$', rxflag) : nil
-          else
-            fuzzy_regex w
-          end, invert ]
+        [ @regexp[w] ||=
+            case w
+            when ''
+              nil
+            when /^\^/
+              w.length > 1 ?
+                Regexp.new('^' << Regexp.escape(w[1..-1]), rxflag) : nil
+            when /\$$/
+              w.length > 1 ?
+                Regexp.new(Regexp.escape(w[0..-2]) << '$', rxflag) : nil
+            else
+              fuzzy_regex w
+            end, invert ]
       }.select { |pair| pair.first }
 
-      list.map { |line|
+      # Look for prefix cache
+      cache  = @caches[list.object_id]
+      prefix = prefix.strip.sub(/\$\S+$/, '').sub(/!\S+$/, '')
+      prefix_cache = nil
+      (prefix.length - 1).downto(1) do |len|
+        break if prefix_cache = cache[Set[@regexps[prefix[0, len]]]]
+      end
+
+      cache[Set[regexps]] ||= (prefix_cache ?
+                               prefix_cache.map { |e| e.first } :
+                               list).map { |line|
         offsets = []
         regexps.all? { |pair|
           regexp, invert = pair
diff --git a/test/test_fzf.rb b/test/test_fzf.rb
index 2af60b8a..2ea3f5a9 100644
--- a/test/test_fzf.rb
+++ b/test/test_fzf.rb
@@ -56,11 +56,15 @@ class TestFZF < MiniTest::Unit::TestCase
 
   def test_trim
     fzf = FZF.new []
-    assert_equal ['靷�.',   6], fzf.trim('臧€雮橂嫟霛茧氚旍偓.', 4, true)
-    assert_equal ['氚旍偓.', 5], fzf.trim('臧€雮橂嫟霛茧氚旍偓.', 5, true)
-    assert_equal ['臧€雮�',  6], fzf.trim('臧€雮橂嫟霛茧氚旍偓.', 4, false)
-    assert_equal ['臧€雮�',  6], fzf.trim('臧€雮橂嫟霛茧氚旍偓.', 5, false)
-    assert_equal ['臧€雮榓', 6], fzf.trim('臧€雮榓b霛茧氚旍偓.', 5, false)
+    assert_equal ['靷�.',     6], fzf.trim('臧€雮橂嫟霛茧氚旍偓.', 4, true)
+    assert_equal ['氚旍偓.',   5], fzf.trim('臧€雮橂嫟霛茧氚旍偓.', 5, true)
+    assert_equal ['氚旍偓.',   5], fzf.trim('臧€雮橂嫟霛茧氚旍偓.', 6, true)
+    assert_equal ['毵堧皵靷�.', 4], fzf.trim('臧€雮橂嫟霛茧氚旍偓.', 7, true)
+    assert_equal ['臧€雮�',    6], fzf.trim('臧€雮橂嫟霛茧氚旍偓.', 4, false)
+    assert_equal ['臧€雮�',    6], fzf.trim('臧€雮橂嫟霛茧氚旍偓.', 5, false)
+    assert_equal ['臧€雮榓',   6], fzf.trim('臧€雮榓b霛茧氚旍偓.', 5, false)
+    assert_equal ['臧€雮榓b',  5], fzf.trim('臧€雮榓b霛茧氚旍偓.', 6, false)
+    assert_equal ['臧€雮榓b',  5], fzf.trim('臧€雮榓b霛茧氚旍偓.', 7, false)
   end
 
   def test_format
@@ -107,7 +111,7 @@ class TestFZF < MiniTest::Unit::TestCase
       juicily
       juiciness
       juicy]
-    assert matcher.cache.empty?
+    assert matcher.caches.empty?
     assert_equal(
       [["juice",     [[0, 1]]],
        ["juiceful",  [[0, 1]]],
@@ -115,10 +119,10 @@ class TestFZF < MiniTest::Unit::TestCase
        ["juicily",   [[0, 1]]],
        ["juiciness", [[0, 1]]],
        ["juicy",     [[0, 1]]]], matcher.match(list, 'j', '', '').sort)
-    assert !matcher.cache.empty?
-    assert_equal [list.object_id], matcher.cache.keys
-    assert_equal 1, matcher.cache[list.object_id].length
-    assert_equal 6, matcher.cache[list.object_id]['j'].length
+    assert !matcher.caches.empty?
+    assert_equal [list.object_id], matcher.caches.keys
+    assert_equal 1, matcher.caches[list.object_id].length
+    assert_equal 6, matcher.caches[list.object_id]['j'].length
 
     assert_equal(
       [["juicily",   [[0, 5]]],
@@ -128,14 +132,96 @@ class TestFZF < MiniTest::Unit::TestCase
       [["juicily",   [[2, 5]]],
        ["juiciness", [[2, 5]]]], matcher.match(list, 'ii', '', '').sort)
 
-    assert_equal 3, matcher.cache[list.object_id].length
-    assert_equal 2, matcher.cache[list.object_id]['ii'].length
+    assert_equal 3, matcher.caches[list.object_id].length
+    assert_equal 2, matcher.caches[list.object_id]['ii'].length
 
     # TODO : partial_cache
   end
 
+  def test_fuzzy_matcher_case_sensitive
+    assert_equal [['Fruit', [[0, 5]]]],
+      FZF::FuzzyMatcher.new(0).match(%w[Fruit Grapefruit], 'Fruit', '', '').sort
+
+    assert_equal [["Fruit", [[0, 5]]], ["Grapefruit", [[5, 10]]]],
+      FZF::FuzzyMatcher.new(Regexp::IGNORECASE).
+      match(%w[Fruit Grapefruit], 'Fruit', '', '').sort
+  end
+
+  def test_extended_fuzzy_matcher
+    matcher = FZF::ExtendedFuzzyMatcher.new Regexp::IGNORECASE
+    list = %w[
+      juice
+      juiceful
+      juiceless
+      juicily
+      juiciness
+      juicy
+      _juice]
+    match = proc { |q, prefix|
+      matcher.match(list, q, prefix, '').sort.map { |p| [p.first, p.last.sort] }
+    }
+
+    assert matcher.caches.empty?
+    3.times do
+      ['y j', 'j y'].each do |pat|
+        (0..pat.length - 1).each do |prefix_length|
+          prefix = pat[0, prefix_length]
+          assert_equal(
+            [["juicily", [[0, 1], [6, 7]]],
+             ["juicy",   [[0, 1], [4, 5]]]],
+            match.call(pat, prefix))
+        end
+      end
+
+      # $
+      assert_equal [["juiceful",  [[7, 8]]]], match.call('l$', '')
+      assert_equal [["juiceful",  [[7, 8]]],
+                    ["juiceless", [[5, 6]]],
+                    ["juicily",   [[5, 6]]]], match.call('l', '')
+
+      # ^
+      assert_equal list.length,     match.call('j', '').length
+      assert_equal list.length - 1, match.call('^j', '').length
+
+      # !
+      assert_equal 0, match.call('!j', '').length
+
+      # ! + ^
+      assert_equal [["_juice", []]], match.call('!^j', '')
+
+      # ! + $
+      assert_equal list.length - 1, match.call('!l$', '').length
+
+      # ! + f
+      assert_equal [["juicy", [[4, 5]]]], match.call('y !l', '')
+    end
+    assert !matcher.caches.empty?
+  end
+
+  def test_xfuzzy_matcher_prefix_cache
+    matcher = FZF::ExtendedFuzzyMatcher.new Regexp::IGNORECASE
+    list = %w[
+      a.java
+      b.java
+      java.jive
+      c.java$
+      d.java
+    ]
+    2.times do
+      assert_equal 5, matcher.match(list, 'java',   'java',   '').length
+      assert_equal 3, matcher.match(list, 'java$',  'java$',  '').length
+      assert_equal 1, matcher.match(list, 'java$$', 'java$$', '').length
+
+      assert_equal 0, matcher.match(list, '!java',  '!java',  '').length
+      assert_equal 4, matcher.match(list, '!^jav',  '!^jav',  '').length
+      assert_equal 4, matcher.match(list, '!^java', '!^java', '').length
+      assert_equal 2, matcher.match(list, '!^java !b !c', '!^java', '').length
+    end
+  end
+
   def test_sort_by_rank
-    matcher = FZF::FuzzyMatcher.new Regexp::IGNORECASE
+    matcher  = FZF::FuzzyMatcher.new Regexp::IGNORECASE
+    xmatcher = FZF::ExtendedFuzzyMatcher.new Regexp::IGNORECASE
     list = %w[
       0____1
       0_____1
@@ -146,7 +232,61 @@ class TestFZF < MiniTest::Unit::TestCase
       0______1
       ___01___
     ]
-    assert_equal %w[01 01_ _01_ ___01___ ____0_1 0____1 0_____1 0______1],
-        FZF.new([]).sort_by_rank(matcher.match(list, '01', '', '')).map(&:first)
+    assert_equal(
+      [["01",       [[0, 2]]],
+       ["01_",      [[0, 2]]],
+       ["_01_",     [[1, 3]]],
+       ["___01___", [[3, 5]]],
+       ["____0_1",  [[4, 7]]],
+       ["0____1",   [[0, 6]]],
+       ["0_____1",  [[0, 7]]],
+       ["0______1", [[0, 8]]]],
+      FZF.new([]).sort_by_rank(matcher.match(list, '01', '', '')))
+
+    assert_equal(
+      [["01",       [[0, 1], [1, 2]]],
+       ["01_",      [[0, 1], [1, 2]]],
+       ["_01_",     [[1, 2], [2, 3]]],
+       ["0____1",   [[0, 1], [5, 6]]],
+       ["0_____1",  [[0, 1], [6, 7]]],
+       ["____0_1",  [[4, 5], [6, 7]]],
+       ["0______1", [[0, 1], [7, 8]]],
+       ["___01___", [[3, 4], [4, 5]]]],
+      FZF.new([]).sort_by_rank(xmatcher.match(list, '0 1', '', '')))
+
+    assert_equal(
+      [["_01_",     [[1, 3], [0, 4]]],
+       ["0____1",   [[0, 6], [1, 3]]],
+       ["0_____1",  [[0, 7], [1, 3]]],
+       ["0______1", [[0, 8], [1, 3]]],
+       ["___01___", [[3, 5], [0, 2]]],
+       ["____0_1",  [[4, 7], [0, 2]]]],
+      FZF.new([]).sort_by_rank(xmatcher.match(list, '01 __', '', '')))
+  end
+
+  if RUBY_PLATFORM =~ /darwin/
+    NFD = '釀掅叀釂剙釁翅啹'
+    def test_nfc
+      assert_equal 6, NFD.length
+      assert_equal ["頃滉竴", [[0, 1], [1, 2]]],
+        FZF::UConv.nfc(NFD, [[0, 3], [3, 6]])
+
+      nfd2 = 'before' + NFD + 'after'
+      assert_equal 6 + 6 + 5, nfd2.length
+
+      nfc, offsets = FZF::UConv.nfc(nfd2, [[4, 14], [9, 13]])
+      o1, o2 = offsets
+      assert_equal 'before頃滉竴after', nfc
+      assert_equal 're頃滉竴af',        nfc[(o1.first...o1.last)]
+      assert_equal '旮€a',             nfc[(o2.first...o2.last)]
+    end
+
+    def test_nfd
+      nfc = '頃滉竴'
+      nfd = FZF::UConv.nfd(nfc)
+      assert_equal 6, nfd.length
+      assert_equal NFD, nfd
+    end
   end
 end
+
-- 
GitLab