From d9c8a9a880dd9fc28c9fb466bd1c81cd6a8a159c Mon Sep 17 00:00:00 2001
From: Junegunn Choi <junegunn.c@gmail.com>
Date: Sun, 14 Aug 2016 04:23:37 +0900
Subject: [PATCH] [perf] Remove memory copy when using string delimiter

---
 src/tokenizer.go       | 12 ++++++------
 src/util/chars.go      | 36 ++++++++++++++++++++++++++++++++++++
 src/util/chars_test.go | 25 +++++++++++++++++++++++++
 3 files changed, 67 insertions(+), 6 deletions(-)

diff --git a/src/tokenizer.go b/src/tokenizer.go
index eec19898..e48f48bf 100644
--- a/src/tokenizer.go
+++ b/src/tokenizer.go
@@ -140,13 +140,13 @@ func Tokenize(text util.Chars, delimiter Delimiter) []Token {
 		return withPrefixLengths(tokens, prefixLength)
 	}
 
-	var tokens []string
 	if delimiter.str != nil {
-		tokens = strings.Split(text.ToString(), *delimiter.str)
-		for i := 0; i < len(tokens)-1; i++ {
-			tokens[i] = tokens[i] + *delimiter.str
-		}
-	} else if delimiter.regex != nil {
+		return withPrefixLengths(text.Split(*delimiter.str), 0)
+	}
+
+	// FIXME performance
+	var tokens []string
+	if delimiter.regex != nil {
 		str := text.ToString()
 		for len(str) > 0 {
 			loc := delimiter.regex.FindStringIndex(str)
diff --git a/src/util/chars.go b/src/util/chars.go
index 6034ee53..12417c66 100644
--- a/src/util/chars.go
+++ b/src/util/chars.go
@@ -118,3 +118,39 @@ func (chars *Chars) Slice(b int, e int) Chars {
 	}
 	return Chars{bytes: chars.bytes[b:e]}
 }
+
+func (chars *Chars) Split(delimiter string) []Chars {
+	delim := []rune(delimiter)
+	numChars := chars.Length()
+	numDelim := len(delim)
+	begin := 0
+	ret := make([]Chars, 0, 1)
+
+	for index := 0; index < numChars; {
+		if index+numDelim <= numChars {
+			match := true
+			for off, d := range delim {
+				if chars.Get(index+off) != d {
+					match = false
+					break
+				}
+			}
+			// Found the delimiter
+			if match {
+				incr := Max(numDelim, 1)
+				ret = append(ret, chars.Slice(begin, index+incr))
+				index += incr
+				begin = index
+				continue
+			}
+		} else {
+			// Impossible to find the delimiter in the remaining substring
+			break
+		}
+		index++
+	}
+	if begin < numChars || len(ret) == 0 {
+		ret = append(ret, chars.Slice(begin, numChars))
+	}
+	return ret
+}
diff --git a/src/util/chars_test.go b/src/util/chars_test.go
index 2cb6fc76..12c629d5 100644
--- a/src/util/chars_test.go
+++ b/src/util/chars_test.go
@@ -55,3 +55,28 @@ func TestTrimLength(t *testing.T) {
 	check("  h   o  ", 5)
 	check("         ", 0)
 }
+
+func TestSplit(t *testing.T) {
+	check := func(str string, delim string, tokens ...string) {
+		input := ToChars([]byte(str))
+		result := input.Split(delim)
+		if len(result) != len(tokens) {
+			t.Errorf("Invalid Split result for '%s': %d tokens found (expected %d): %s",
+				str, len(result), len(tokens), result)
+		}
+		for idx, token := range tokens {
+			if result[idx].ToString() != token {
+				t.Errorf("Invalid Split result for '%s': %s (expected %s)",
+					str, result[idx].ToString(), token)
+			}
+		}
+	}
+	check("abc:def::", ":", "abc:", "def:", ":")
+	check("abc:def::", "-", "abc:def::")
+	check("abc", "", "a", "b", "c")
+	check("abc", "a", "a", "bc")
+	check("abc", "ab", "ab", "c")
+	check("abc", "abc", "abc")
+	check("abc", "abcd", "abc")
+	check("", "abcd", "")
+}
-- 
GitLab