mirror of
https://github.com/zeromicro/go-zero.git
synced 2025-02-02 16:28:39 +08:00
add keywords utility example
This commit is contained in:
parent
22e75cdf78
commit
418f8f6666
BIN
doc/images/trie.png
Normal file
BIN
doc/images/trie.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 91 KiB |
61
doc/keywords.md
Normal file
61
doc/keywords.md
Normal file
@ -0,0 +1,61 @@
|
||||
# 高效的关键词替换和敏感词过滤工具
|
||||
|
||||
## 1. 算法介绍
|
||||
|
||||
利用高效的Trie树建立关键词树,如下图所示,然后依次查找字符串中的相连字符是否形成树的一条路径
|
||||
|
||||
![trie](images/trie.png =250x250)
|
||||
|
||||
发现掘金上[这篇文章](https://juejin.im/post/6844903750490914829)写的比较详细,可以一读,具体原理在此不详述。
|
||||
|
||||
## 2. 关键词替换
|
||||
|
||||
```go
|
||||
replacer := stringx.NewReplacer(map[string]string{
|
||||
"PHP": "PPT",
|
||||
"世界上": "吹牛",
|
||||
})
|
||||
fmt.Println(replacer.Replace("PHP是世界上最好的语言!"))
|
||||
```
|
||||
|
||||
可以得到:
|
||||
```
|
||||
PPT是吹牛最好的语言!
|
||||
```
|
||||
|
||||
示例代码见`example/stringx/replace/replace.go`
|
||||
|
||||
## 3. 敏感词过滤
|
||||
|
||||
```go
|
||||
filter := stringx.NewTrie([]string{
|
||||
"AV演员",
|
||||
"苍井空",
|
||||
"AV",
|
||||
"日本AV女优",
|
||||
"AV演员色情",
|
||||
}, stringx.WithMask('?'))
|
||||
safe, keywords, found := filter.Filter("日本AV演员兼电视、电影演员。苍井空AV女优是xx出道, 日本AV女优们最精彩的表演是AV演员色情表演")
|
||||
fmt.Println(safe)
|
||||
fmt.Println(keywords)
|
||||
fmt.Println(found)
|
||||
```
|
||||
|
||||
可以得到:
|
||||
|
||||
```
|
||||
日本????兼电视、电影演员。?????女优是xx出道, ??????们最精彩的表演是??????表演
|
||||
[苍井空 日本AV女优 AV演员色情 AV AV演员]
|
||||
true
|
||||
```
|
||||
|
||||
示例代码见`example/stringx/filter/filter.go`
|
||||
|
||||
## 4. Benchmark
|
||||
|
||||
```
|
||||
| Sentences | Keywords | Regex | Go-Zero |
|
||||
|-----------|----------|----------|----------|
|
||||
| 10000 | 10000 | 16min10s | 27.2ms
|
||||
```
|
||||
|
21
example/stringx/filter/filter.go
Normal file
21
example/stringx/filter/filter.go
Normal file
@ -0,0 +1,21 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tal-tech/go-zero/core/stringx"
|
||||
)
|
||||
|
||||
func main() {
|
||||
filter := stringx.NewTrie([]string{
|
||||
"AV演员",
|
||||
"苍井空",
|
||||
"AV",
|
||||
"日本AV女优",
|
||||
"AV演员色情",
|
||||
}, stringx.WithMask('?'))
|
||||
safe, keywords, found := filter.Filter("日本AV演员兼电视、电影演员。苍井空AV女优是xx出道, 日本AV女优们最精彩的表演是AV演员色情表演")
|
||||
fmt.Println(safe)
|
||||
fmt.Println(keywords)
|
||||
fmt.Println(found)
|
||||
}
|
15
example/stringx/replace/replace.go
Normal file
15
example/stringx/replace/replace.go
Normal file
@ -0,0 +1,15 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/tal-tech/go-zero/core/stringx"
|
||||
)
|
||||
|
||||
func main() {
|
||||
replacer := stringx.NewReplacer(map[string]string{
|
||||
"PHP": "PPT",
|
||||
"世界上": "吹牛",
|
||||
})
|
||||
fmt.Println(replacer.Replace("PHP是世界上最好的语言!"))
|
||||
}
|
Loading…
Reference in New Issue
Block a user