fix: replace shoud replace the longest match

This commit is contained in:
hudahai 2023-02-04 15:31:16 +08:00 committed by Kevin Wan
parent b6f1bce695
commit d9a732a273
3 changed files with 73 additions and 61 deletions

View File

@ -98,3 +98,45 @@ func (n *node) find(chars []rune) []scope {
return scopes return scopes
} }
func (n *node) longestMatch(chars []rune, start int) (used int, jump *node, matched bool) {
cur := n
var matchedNode *node
for i := start; i < len(chars); i++ {
child, ok := cur.children[chars[i]]
if ok {
cur = child
if cur.end {
matchedNode = cur
}
} else {
if matchedNode != nil {
return matchedNode.depth, nil, true
}
if n.end {
return start, nil, true
}
var jump *node
for cur.fail != nil {
jump, ok = cur.fail.children[chars[i]]
if ok {
break
}
cur = cur.fail
}
if jump != nil {
return i + 1 - jump.depth, jump, false
}
return i + 1, nil, false
}
}
// this longest matched node
if matchedNode != nil {
return matchedNode.depth, nil, true
}
// last mathed node
if n.end {
return start, nil, true
}
return len(chars), nil, false
}

View File

@ -1,6 +1,8 @@
package stringx package stringx
import "strings" import (
"bytes"
)
type ( type (
// Replacer interface wraps the Replace method. // Replacer interface wraps the Replace method.
@ -30,68 +32,27 @@ func NewReplacer(mapping map[string]string) Replacer {
// Replace replaces text with given substitutes. // Replace replaces text with given substitutes.
func (r *replacer) Replace(text string) string { func (r *replacer) Replace(text string) string {
var builder strings.Builder var buf bytes.Buffer
var start int target := []rune(text)
chars := []rune(text)
size := len(chars)
for start < size {
cur := r.node cur := r.node
nextStart := 0
if start > 0 { for len(target) != 0 {
builder.WriteString(string(chars[:start])) used, jump, matched := cur.longestMatch(target, nextStart)
} if matched {
replaced := r.mapping[string(target[:used])]
for i := start; i < size; i++ { target = append([]rune(replaced), target[used:]...)
child, ok := cur.children[chars[i]]
if ok {
cur = child
} else if cur == r.node {
builder.WriteRune(chars[i])
// cur already points to root, set start only
start = i + 1
continue
} else {
curDepth := cur.depth
cur = cur.fail
child, ok = cur.children[chars[i]]
if !ok {
// write this path
builder.WriteString(string(chars[i-curDepth : i+1]))
// go to root
cur = r.node cur = r.node
start = i + 1 } else {
continue buf.WriteString(string(target[:used]))
} target = target[used:]
if jump != nil {
failDepth := cur.depth cur = jump
// write path before jump nextStart = jump.depth
builder.WriteString(string(chars[start : start+curDepth-failDepth])) } else {
start += curDepth - failDepth cur = r.node
cur = child nextStart = 0
}
if cur.end {
val := string(chars[i+1-cur.depth : i+1])
builder.WriteString(r.mapping[val])
builder.WriteString(string(chars[i+1:]))
// only matching this path, all previous paths are done
if start >= i+1-cur.depth && i+1 >= size {
return builder.String()
}
chars = []rune(builder.String())
size = len(chars)
builder.Reset()
break
} }
} }
if !cur.end {
builder.WriteString(string(chars[start:]))
return builder.String()
} }
} return buf.String()
return string(chars)
} }

View File

@ -51,6 +51,15 @@ func TestReplacer_ReplaceMultiMatches(t *testing.T) {
assert.Equal(t, "零一23四五一23四五", NewReplacer(mapping).Replace("零一二三四五一二三四五")) assert.Equal(t, "零一23四五一23四五", NewReplacer(mapping).Replace("零一二三四五一二三四五"))
} }
func TestReplacer_ReplaceLongestMatching(t *testing.T) {
keywords := map[string]string{
"日本": "japan",
"日本的首都": "东京",
}
replacer := NewReplacer(keywords)
assert.Equal(t, "东京在japan", replacer.Replace("日本的首都在日本"))
}
func TestReplacer_ReplaceJumpToFail(t *testing.T) { func TestReplacer_ReplaceJumpToFail(t *testing.T) {
mapping := map[string]string{ mapping := map[string]string{
"bcdf": "1235", "bcdf": "1235",