Skip to content
Snippets Groups Projects
Unverified Commit a16d8f66 authored by Junegunn Choi's avatar Junegunn Choi
Browse files

Normalize pattern string before passing it to Algo function

parent 45793d75
No related branches found
No related tags found
No related merge requests found
......@@ -246,21 +246,9 @@ func normalizeRune(r rune) rune {
return r
}
func normalizeRunes(runes []rune) []rune {
ret := make([]rune, len(runes))
copy(ret, runes)
for idx, r := range runes {
if r < 0x00C0 || r > 0x2184 {
continue
}
n := normalized[r]
if n > 0 {
ret[idx] = normalized[r]
}
}
return ret
}
// Algo functions make two assumptions
// 1. "pattern" is given in lowercase if "caseSensitive" is false
// 2. "pattern" is already normalized if "normalize" is true
type Algo func(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int)
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
......@@ -283,10 +271,6 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C
return FuzzyMatchV1(caseSensitive, normalize, forward, input, pattern, withPos, slab)
}
if normalize {
pattern = normalizeRunes(pattern)
}
// Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages
offset16 := 0
offset32 := 0
......@@ -539,10 +523,6 @@ func FuzzyMatchV1(caseSensitive bool, normalize bool, forward bool, text util.Ch
lenRunes := text.Length()
lenPattern := len(pattern)
if normalize {
pattern = normalizeRunes(pattern)
}
for index := 0; index < lenRunes; index++ {
char := text.Get(indexAt(index, lenRunes, forward))
// This is considerably faster than blindly applying strings.ToLower to the
......@@ -626,10 +606,6 @@ func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text util
return Result{-1, -1, 0}, nil
}
if normalize {
pattern = normalizeRunes(pattern)
}
// For simplicity, only look at the bonus at the first character position
pidx := 0
bestPos, bonus, bestBonus := -1, int16(0), int16(-1)
......@@ -693,10 +669,6 @@ func PrefixMatch(caseSensitive bool, normalize bool, forward bool, text util.Cha
return Result{-1, -1, 0}, nil
}
if normalize {
pattern = normalizeRunes(pattern)
}
for index, r := range pattern {
char := text.Get(index)
if !caseSensitive {
......@@ -726,10 +698,6 @@ func SuffixMatch(caseSensitive bool, normalize bool, forward bool, text util.Cha
return Result{-1, -1, 0}, nil
}
if normalize {
pattern = normalizeRunes(pattern)
}
for index, r := range pattern {
char := text.Get(index + diff)
if !caseSensitive {
......
......@@ -406,3 +406,19 @@ var normalized map[rune]rune = map[rune]rune{
0x028F: 'Y', // , LATIN LETTER SMALL CAPITAL
0x1D22: 'Z', // , LATIN LETTER SMALL CAPITAL
}
// NormalizeRunes normalizes latin script letters
func NormalizeRunes(runes []rune) []rune {
ret := make([]rune, len(runes))
copy(ret, runes)
for idx, r := range runes {
if r < 0x00C0 || r > 0x2184 {
continue
}
n := normalized[r]
if n > 0 {
ret[idx] = normalized[r]
}
}
return ret
}
......@@ -95,7 +95,7 @@ func BuildPattern(fuzzy bool, fuzzyAlgo algo.Algo, extended bool, caseMode Case,
termSets := []termSet{}
if extended {
termSets = parseTerms(fuzzy, caseMode, asString)
termSets = parseTerms(fuzzy, caseMode, normalize, asString)
Loop:
for _, termSet := range termSets {
for idx, term := range termSet {
......@@ -140,7 +140,7 @@ func BuildPattern(fuzzy bool, fuzzyAlgo algo.Algo, extended bool, caseMode Case,
return ptr
}
func parseTerms(fuzzy bool, caseMode Case, str string) []termSet {
func parseTerms(fuzzy bool, caseMode Case, normalize bool, str string) []termSet {
tokens := _splitRegex.Split(str, -1)
sets := []termSet{}
set := termSet{}
......@@ -196,10 +196,14 @@ func parseTerms(fuzzy bool, caseMode Case, str string) []termSet {
sets = append(sets, set)
set = termSet{}
}
textRunes := []rune(text)
if normalize {
textRunes = algo.NormalizeRunes(textRunes)
}
set = append(set, term{
typ: typ,
inv: inv,
text: []rune(text),
text: textRunes,
caseSensitive: caseSensitive,
origText: origText})
switchSet = true
......
......@@ -15,7 +15,7 @@ func init() {
}
func TestParseTermsExtended(t *testing.T) {
terms := parseTerms(true, CaseSmart,
terms := parseTerms(true, CaseSmart, false,
"| aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$ | ^iii$ ^xxx | 'yyy | | zzz$ | !ZZZ |")
if len(terms) != 9 ||
terms[0][0].typ != termFuzzy || terms[0][0].inv ||
......@@ -50,7 +50,7 @@ func TestParseTermsExtended(t *testing.T) {
}
func TestParseTermsExtendedExact(t *testing.T) {
terms := parseTerms(false, CaseSmart,
terms := parseTerms(false, CaseSmart, false,
"aaa 'bbb ^ccc ddd$ !eee !'fff !^ggg !hhh$")
if len(terms) != 8 ||
terms[0][0].typ != termExact || terms[0][0].inv || len(terms[0][0].text) != 3 ||
......@@ -66,7 +66,7 @@ func TestParseTermsExtendedExact(t *testing.T) {
}
func TestParseTermsEmpty(t *testing.T) {
terms := parseTerms(true, CaseSmart, "' $ ^ !' !^ !$")
terms := parseTerms(true, CaseSmart, false, "' $ ^ !' !^ !$")
if len(terms) != 0 {
t.Errorf("%s", terms)
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment