mirror of
https://github.com/zeromicro/go-zero.git
synced 2025-01-23 09:00:20 +08:00
refactor: simplify stringx.Replacer, and avoid potential infinite loops (#2877)
* simplify replace * backup * refactor: simplify stringx.Replacer * chore: add comments and const * chore: add more tests * chore: rename variable
This commit is contained in:
parent
9be17a2d28
commit
cddf3875cf
@ -96,101 +96,3 @@ func (n *node) find(chars []rune) []scope {
|
||||
|
||||
return scopes
|
||||
}
|
||||
|
||||
func (n *node) longestMatch(chars []rune, paths []*node) (uselessLen, matchLen int, nextPaths []*node) {
|
||||
cur := n
|
||||
var longestMatched *node
|
||||
|
||||
for i := len(paths); i < len(chars); i++ {
|
||||
char := chars[i]
|
||||
child, ok := cur.children[char]
|
||||
if ok {
|
||||
cur = child
|
||||
if cur.end {
|
||||
longestMatched = cur
|
||||
}
|
||||
paths = append(paths, cur)
|
||||
} else {
|
||||
if longestMatched != nil {
|
||||
return 0, longestMatched.depth, nil
|
||||
}
|
||||
|
||||
if n.end {
|
||||
return 0, n.depth, nil
|
||||
}
|
||||
|
||||
// new path match
|
||||
var jump *node
|
||||
// old path pre longest preMatch
|
||||
preMatch, preStart := findMatch(paths)
|
||||
icur := cur
|
||||
for icur.fail != nil {
|
||||
jump, ok = icur.fail.children[char]
|
||||
if ok {
|
||||
break
|
||||
}
|
||||
|
||||
icur = icur.fail
|
||||
}
|
||||
|
||||
switch {
|
||||
case preMatch != nil && jump != nil:
|
||||
if jumpStart := i - jump.depth + 1; preStart < jumpStart {
|
||||
return preStart, preMatch.depth, nil
|
||||
} else {
|
||||
return jumpStart, 0, append(paths[jumpStart:], jump)
|
||||
}
|
||||
case preMatch != nil && jump == nil:
|
||||
return preStart, preMatch.depth, nil
|
||||
case preMatch == nil && jump != nil:
|
||||
return i - jump.depth + 1, 0, append(paths[i-jump.depth+1:], jump)
|
||||
case preMatch == nil && jump == nil:
|
||||
return i + 1, 0, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// this longest matched node
|
||||
if longestMatched != nil {
|
||||
return 0, longestMatched.depth, nil
|
||||
}
|
||||
|
||||
if n.end {
|
||||
return 0, n.depth, nil
|
||||
}
|
||||
|
||||
match, start := findMatch(paths)
|
||||
if match != nil {
|
||||
return start, match.depth, nil
|
||||
}
|
||||
|
||||
return len(chars), 0, nil
|
||||
}
|
||||
|
||||
func findMatch(path []*node) (*node, int) {
|
||||
var result *node
|
||||
var start int
|
||||
|
||||
for i := len(path) - 1; i >= 0; i-- {
|
||||
icur := path[i]
|
||||
var cur *node
|
||||
for icur.fail != nil {
|
||||
if icur.fail.end {
|
||||
cur = icur.fail
|
||||
break
|
||||
}
|
||||
icur = icur.fail
|
||||
}
|
||||
if cur != nil {
|
||||
if result == nil {
|
||||
result = cur
|
||||
start = i - result.depth + 1
|
||||
} else if curStart := i - cur.depth + 1; curStart < start {
|
||||
result = cur
|
||||
start = curStart
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result, start
|
||||
}
|
||||
|
@ -6,15 +6,6 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestLongestMatchGuardedCondition(t *testing.T) {
|
||||
n := new(node)
|
||||
n.end = true
|
||||
uselessLen, matchLen, jump := n.longestMatch([]rune(""), nil)
|
||||
assert.Equal(t, 0, uselessLen)
|
||||
assert.Nil(t, jump)
|
||||
assert.Equal(t, 0, matchLen)
|
||||
}
|
||||
|
||||
func TestFuzzNodeCase1(t *testing.T) {
|
||||
keywords := []string{
|
||||
"cs8Zh",
|
||||
@ -202,228 +193,3 @@ func BenchmarkNodeFind(b *testing.B) {
|
||||
trie.find([]rune("日本AV演员兼电视、电影演员。无名氏AV女优是xx出道, 日本AV女优们最精彩的表演是AV演员色情表演"))
|
||||
}
|
||||
}
|
||||
|
||||
func TestNode_longestMatchCase0(t *testing.T) {
|
||||
// match the longest word
|
||||
keywords := []string{
|
||||
"a",
|
||||
"ab",
|
||||
"abc",
|
||||
"abcd",
|
||||
}
|
||||
trie := new(node)
|
||||
for _, keyword := range keywords {
|
||||
trie.add(keyword)
|
||||
}
|
||||
trie.build()
|
||||
|
||||
uselessLen, matchLen, jump := trie.longestMatch([]rune("abcef"), nil)
|
||||
assert.Equal(t, 0, uselessLen)
|
||||
assert.Equal(t, 3, matchLen)
|
||||
assert.Nil(t, jump)
|
||||
}
|
||||
|
||||
func TestNode_longestMatchCase1(t *testing.T) {
|
||||
keywords := []string{
|
||||
"abcde",
|
||||
"bcde",
|
||||
"cde",
|
||||
"de",
|
||||
|
||||
"b",
|
||||
"bc",
|
||||
}
|
||||
trie := new(node)
|
||||
for _, keyword := range keywords {
|
||||
trie.add(keyword)
|
||||
}
|
||||
trie.build()
|
||||
|
||||
uselessLen, matchLen, jump := trie.longestMatch([]rune("abcdf"), nil)
|
||||
assert.Equal(t, 1, uselessLen)
|
||||
assert.Equal(t, 2, matchLen)
|
||||
assert.Nil(t, jump)
|
||||
}
|
||||
|
||||
func TestNode_longestMatchCase2(t *testing.T) {
|
||||
keywords := []string{
|
||||
"abcde",
|
||||
"bcde",
|
||||
"cde",
|
||||
"de",
|
||||
|
||||
"c",
|
||||
"cd",
|
||||
}
|
||||
trie := new(node)
|
||||
for _, keyword := range keywords {
|
||||
trie.add(keyword)
|
||||
}
|
||||
trie.build()
|
||||
|
||||
uselessLen, matchLen, jump := trie.longestMatch([]rune("abcdf"), nil)
|
||||
assert.Equal(t, 2, uselessLen)
|
||||
assert.Equal(t, 2, matchLen)
|
||||
assert.Nil(t, jump)
|
||||
}
|
||||
|
||||
func TestNode_longestMatchCase3(t *testing.T) {
|
||||
keywords := []string{
|
||||
"abcde",
|
||||
"bcde",
|
||||
"cde",
|
||||
"de",
|
||||
|
||||
"b",
|
||||
"bc",
|
||||
"c",
|
||||
"cd",
|
||||
}
|
||||
trie := new(node)
|
||||
for _, keyword := range keywords {
|
||||
trie.add(keyword)
|
||||
}
|
||||
trie.build()
|
||||
|
||||
uselessLen, matchLen, jump := trie.longestMatch([]rune("abcdf"), nil)
|
||||
assert.Equal(t, 1, uselessLen)
|
||||
assert.Equal(t, 2, matchLen)
|
||||
assert.Nil(t, jump)
|
||||
}
|
||||
|
||||
func TestNode_longestMatchCase4(t *testing.T) {
|
||||
keywords := []string{
|
||||
"abcde",
|
||||
"bcdf",
|
||||
"bcd",
|
||||
}
|
||||
trie := new(node)
|
||||
for _, keyword := range keywords {
|
||||
trie.add(keyword)
|
||||
}
|
||||
trie.build()
|
||||
|
||||
uselessLen, matchLen, paths := trie.longestMatch([]rune("abcdf"), nil)
|
||||
assert.Equal(t, 1, uselessLen)
|
||||
assert.Equal(t, 0, matchLen)
|
||||
assert.Equal(t, 4, len(paths))
|
||||
}
|
||||
|
||||
func TestNode_longestMatchCase5(t *testing.T) {
|
||||
keywords := []string{
|
||||
"abcdef",
|
||||
"bcde",
|
||||
}
|
||||
trie := new(node)
|
||||
for _, keyword := range keywords {
|
||||
trie.add(keyword)
|
||||
}
|
||||
trie.build()
|
||||
|
||||
uselessLen, matchLen, paths := trie.longestMatch([]rune("abcde"), nil)
|
||||
assert.Equal(t, 1, uselessLen)
|
||||
assert.Equal(t, 4, matchLen)
|
||||
assert.Nil(t, paths)
|
||||
}
|
||||
|
||||
func TestNode_longestMatchCase6(t *testing.T) {
|
||||
keywords := []string{
|
||||
"abcde",
|
||||
"bc",
|
||||
"d",
|
||||
}
|
||||
trie := new(node)
|
||||
for _, keyword := range keywords {
|
||||
trie.add(keyword)
|
||||
}
|
||||
trie.build()
|
||||
|
||||
uselessLen, matchLen, jump := trie.longestMatch([]rune("abcd"), nil)
|
||||
assert.Equal(t, 1, uselessLen)
|
||||
assert.Equal(t, 2, matchLen)
|
||||
assert.Nil(t, jump)
|
||||
}
|
||||
|
||||
func TestNode_longestMatchCase7(t *testing.T) {
|
||||
keywords := []string{
|
||||
"abcdeg",
|
||||
"cdef",
|
||||
"bcde",
|
||||
}
|
||||
trie := new(node)
|
||||
for _, keyword := range keywords {
|
||||
trie.add(keyword)
|
||||
}
|
||||
trie.build()
|
||||
|
||||
word := []rune("abcdef")
|
||||
uselessLen, matchLen, paths := trie.longestMatch(word, nil)
|
||||
assert.Equal(t, 1, uselessLen)
|
||||
assert.Equal(t, 4, matchLen)
|
||||
assert.Nil(t, paths)
|
||||
uselessLen, matchLen, paths = trie.longestMatch(word[uselessLen+matchLen:], paths)
|
||||
assert.Equal(t, 1, uselessLen)
|
||||
assert.Equal(t, 0, matchLen)
|
||||
assert.Nil(t, paths)
|
||||
}
|
||||
|
||||
func TestNode_longestMatchCase8(t *testing.T) {
|
||||
keywords := []string{
|
||||
"abcdeg",
|
||||
"cdef",
|
||||
"cde",
|
||||
}
|
||||
trie := new(node)
|
||||
for _, keyword := range keywords {
|
||||
trie.add(keyword)
|
||||
}
|
||||
trie.build()
|
||||
|
||||
word := []rune("abcdef")
|
||||
uselessLen, matchLen, paths := trie.longestMatch(word, nil)
|
||||
assert.Equal(t, 2, uselessLen)
|
||||
assert.Equal(t, 0, matchLen)
|
||||
assert.NotNil(t, paths)
|
||||
}
|
||||
|
||||
func TestNode_longestMatchCase9(t *testing.T) {
|
||||
keywords := []string{
|
||||
"abcdeg",
|
||||
"cdef",
|
||||
"cde",
|
||||
"cd",
|
||||
}
|
||||
trie := new(node)
|
||||
for _, keyword := range keywords {
|
||||
trie.add(keyword)
|
||||
}
|
||||
trie.build()
|
||||
|
||||
word := []rune("abcde")
|
||||
uselessLen, matchLen, paths := trie.longestMatch(word, nil)
|
||||
assert.Equal(t, 2, uselessLen)
|
||||
assert.Equal(t, 3, matchLen)
|
||||
assert.Nil(t, paths)
|
||||
}
|
||||
|
||||
func TestNode_jump(t *testing.T) {
|
||||
keywords := []string{
|
||||
"de",
|
||||
"fe",
|
||||
}
|
||||
trie := new(node)
|
||||
for _, keyword := range keywords {
|
||||
trie.add(keyword)
|
||||
}
|
||||
trie.build()
|
||||
target := []rune("dfe")
|
||||
|
||||
uselessLen, matchLen, paths := trie.longestMatch(target, nil)
|
||||
assert.Equal(t, 1, uselessLen)
|
||||
assert.Equal(t, 0, matchLen)
|
||||
assert.NotNil(t, paths)
|
||||
uselessLen, matchLen, paths = paths[len(paths)-1].longestMatch(target[uselessLen+matchLen:], paths)
|
||||
assert.Equal(t, 0, uselessLen)
|
||||
assert.Equal(t, 2, matchLen)
|
||||
assert.Nil(t, paths)
|
||||
}
|
||||
|
@ -1,9 +1,14 @@
|
||||
package stringx
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// replace more than once to avoid overlapped keywords after replace.
|
||||
// only try 2 times to avoid too many or infinite loops.
|
||||
const replaceTimes = 2
|
||||
|
||||
type (
|
||||
// Replacer interface wraps the Replace method.
|
||||
Replacer interface {
|
||||
@ -32,29 +37,48 @@ func NewReplacer(mapping map[string]string) Replacer {
|
||||
|
||||
// Replace replaces text with given substitutes.
|
||||
func (r *replacer) Replace(text string) string {
|
||||
var buf strings.Builder
|
||||
var paths []*node
|
||||
target := []rune(text)
|
||||
cur := r.node
|
||||
|
||||
for len(target) != 0 {
|
||||
uselessLen, matchLen, nextPaths := cur.longestMatch(target, paths)
|
||||
if uselessLen > 0 {
|
||||
buf.WriteString(string(target[:uselessLen]))
|
||||
target = target[uselessLen:]
|
||||
}
|
||||
if matchLen > 0 {
|
||||
replaced := r.mapping[string(target[:matchLen])]
|
||||
target = append([]rune(replaced), target[matchLen:]...)
|
||||
}
|
||||
if len(nextPaths) != 0 {
|
||||
cur = nextPaths[len(nextPaths)-1]
|
||||
paths = nextPaths
|
||||
} else {
|
||||
cur = r.node
|
||||
paths = nil
|
||||
for i := 0; i < replaceTimes; i++ {
|
||||
var replaced bool
|
||||
if text, replaced = r.doReplace(text); !replaced {
|
||||
return text
|
||||
}
|
||||
}
|
||||
|
||||
return buf.String()
|
||||
return text
|
||||
}
|
||||
|
||||
func (r *replacer) doReplace(text string) (string, bool) {
|
||||
chars := []rune(text)
|
||||
scopes := r.find(chars)
|
||||
if len(scopes) == 0 {
|
||||
return text, false
|
||||
}
|
||||
|
||||
sort.Slice(scopes, func(i, j int) bool {
|
||||
if scopes[i].start < scopes[j].start {
|
||||
return true
|
||||
}
|
||||
if scopes[i].start == scopes[j].start {
|
||||
return scopes[i].stop > scopes[j].stop
|
||||
}
|
||||
return false
|
||||
})
|
||||
|
||||
var buf strings.Builder
|
||||
var index int
|
||||
for i := 0; i < len(scopes); i++ {
|
||||
scp := &scopes[i]
|
||||
if scp.start < index {
|
||||
continue
|
||||
}
|
||||
|
||||
buf.WriteString(string(chars[index:scp.start]))
|
||||
buf.WriteString(r.mapping[string(chars[scp.start:scp.stop])])
|
||||
index = scp.stop
|
||||
}
|
||||
if index < len(chars) {
|
||||
buf.WriteString(string(chars[index:]))
|
||||
}
|
||||
|
||||
return buf.String(), true
|
||||
}
|
||||
|
@ -219,3 +219,13 @@ func TestReplacer_ReplaceLongestMatch(t *testing.T) {
|
||||
})
|
||||
assert.Equal(t, "东京是东京", replacer.Replace("日本的首都是东京"))
|
||||
}
|
||||
|
||||
func TestReplacer_ReplaceIndefinitely(t *testing.T) {
|
||||
mapping := map[string]string{
|
||||
"日本的首都": "东京",
|
||||
"东京": "日本的首都",
|
||||
}
|
||||
assert.NotPanics(t, func() {
|
||||
NewReplacer(mapping).Replace("日本的首都是东京")
|
||||
})
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user