Skip to content

Commit

Permalink
fix unicode
Browse files Browse the repository at this point in the history
  • Loading branch information
zmh-program committed Dec 12, 2023
1 parent 0d45c5b commit 86995a8
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
6 changes: 5 additions & 1 deletion adapter/oneapi/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,11 @@ func getRobustnessResult(chunk string) string {

matches := compile.FindStringSubmatch(chunk)
if len(matches) > 1 {
return matches[1]
partial := matches[1]
// if is the unicode character
if strings.HasPrefix(partial, "\\u") {
return utils.DecodeUnicode(partial)
}
} else {
return ""
}
Expand Down
14 changes: 14 additions & 0 deletions utils/char.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,17 @@ func ExtractImageUrls(data string) []string {
re := regexp.MustCompile(`(https?://\S+\.(?:png|jpg|jpeg|gif|webp))`)
return re.FindAllString(data, -1)
}

func DecodeUnicode(data string) string {
re := regexp.MustCompile(`\\u([0-9a-fA-F]{4})`)
return re.ReplaceAllStringFunc(data, func(s string) string {
if len(s) < 6 {
return s
}
val, err := strconv.ParseInt(s[2:], 16, 32)
if err != nil {
return s
}
return strconv.FormatInt(val, 10)
})
}

0 comments on commit 86995a8

Please sign in to comment.