From 5e72709613b816531c1e0aed6a710257e08bb5d8 Mon Sep 17 00:00:00 2001
From: Junegunn Choi <junegunn.c@gmail.com>
Date: Tue, 18 Jul 2017 02:17:05 +0900
Subject: [PATCH] Speed up initial scanning with bitwise AND operation

---
 src/util/chars.go | 58 +++++++++++++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 22 deletions(-)

diff --git a/src/util/chars.go b/src/util/chars.go
index 8325cf44..5e702004 100644
--- a/src/util/chars.go
+++ b/src/util/chars.go
@@ -6,6 +6,11 @@ import (
 	"unsafe"
 )
 
+const (
+	overflow64 uint64 = 0x8080808080808080
+	overflow32 uint32 = 0x80808080
+)
+
 type Chars struct {
 	slice           []byte // or []rune
 	inBytes         bool
@@ -17,33 +22,42 @@ type Chars struct {
 	Index int32
 }
 
-// ToChars converts byte array into rune array
-func ToChars(bytes []byte) Chars {
-	var runes []rune
-	inBytes := true
-	numBytes := len(bytes)
-	for i := 0; i < numBytes; {
-		if bytes[i] < utf8.RuneSelf {
-			if !inBytes {
-				runes = append(runes, rune(bytes[i]))
-			}
-			i++
-		} else {
-			if inBytes {
-				inBytes = false
-				runes = make([]rune, i, numBytes)
-				for j := 0; j < i; j++ {
-					runes[j] = rune(bytes[j])
-				}
-			}
-			r, sz := utf8.DecodeRune(bytes[i:])
-			i += sz
-			runes = append(runes, r)
+func checkAscii(bytes []byte) (bool, int) {
+	i := 0
+	for ; i < len(bytes)-8; i += 8 {
+		if (overflow64 & *(*uint64)(unsafe.Pointer(&bytes[i]))) > 0 {
+			return false, i
+		}
+	}
+	for ; i < len(bytes)-4; i += 4 {
+		if (overflow32 & *(*uint32)(unsafe.Pointer(&bytes[i]))) > 0 {
+			return false, i
 		}
 	}
+	for ; i < len(bytes); i++ {
+		if bytes[i] >= utf8.RuneSelf {
+			return false, i
+		}
+	}
+	return true, 0
+}
+
+// ToChars converts byte array into rune array
+func ToChars(bytes []byte) Chars {
+	inBytes, bytesUntil := checkAscii(bytes)
 	if inBytes {
 		return Chars{slice: bytes, inBytes: inBytes}
 	}
+
+	runes := make([]rune, bytesUntil, len(bytes))
+	for i := 0; i < bytesUntil; i++ {
+		runes[i] = rune(bytes[i])
+	}
+	for i := bytesUntil; i < len(bytes); {
+		r, sz := utf8.DecodeRune(bytes[i:])
+		i += sz
+		runes = append(runes, r)
+	}
 	return RunesToChars(runes)
 }
 
-- 
GitLab