Skip to content
Snippets Groups Projects
Unverified Commit 5e727096 authored by Junegunn Choi's avatar Junegunn Choi
Browse files

Speed up initial scanning with bitwise AND operation

parent 9e85cba0
No related branches found
No related tags found
No related merge requests found
......@@ -6,6 +6,11 @@ import (
"unsafe"
)
const (
overflow64 uint64 = 0x8080808080808080
overflow32 uint32 = 0x80808080
)
type Chars struct {
slice []byte // or []rune
inBytes bool
......@@ -17,33 +22,42 @@ type Chars struct {
Index int32
}
// ToChars converts byte array into rune array
func ToChars(bytes []byte) Chars {
var runes []rune
inBytes := true
numBytes := len(bytes)
for i := 0; i < numBytes; {
if bytes[i] < utf8.RuneSelf {
if !inBytes {
runes = append(runes, rune(bytes[i]))
}
i++
} else {
if inBytes {
inBytes = false
runes = make([]rune, i, numBytes)
for j := 0; j < i; j++ {
runes[j] = rune(bytes[j])
}
}
r, sz := utf8.DecodeRune(bytes[i:])
i += sz
runes = append(runes, r)
func checkAscii(bytes []byte) (bool, int) {
i := 0
for ; i < len(bytes)-8; i += 8 {
if (overflow64 & *(*uint64)(unsafe.Pointer(&bytes[i]))) > 0 {
return false, i
}
}
for ; i < len(bytes)-4; i += 4 {
if (overflow32 & *(*uint32)(unsafe.Pointer(&bytes[i]))) > 0 {
return false, i
}
}
for ; i < len(bytes); i++ {
if bytes[i] >= utf8.RuneSelf {
return false, i
}
}
return true, 0
}
// ToChars converts byte array into rune array
func ToChars(bytes []byte) Chars {
inBytes, bytesUntil := checkAscii(bytes)
if inBytes {
return Chars{slice: bytes, inBytes: inBytes}
}
runes := make([]rune, bytesUntil, len(bytes))
for i := 0; i < bytesUntil; i++ {
runes[i] = rune(bytes[i])
}
for i := bytesUntil; i < len(bytes); {
r, sz := utf8.DecodeRune(bytes[i:])
i += sz
runes = append(runes, r)
}
return RunesToChars(runes)
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment