mirror of
https://github.com/NoFxAiOS/nofx.git
synced 2026-07-04 11:30:58 +08:00
fix(decision+news): correct Unicode regex escaping & precompile telegram patterns
## Critical Fix: Unicode Regex Escaping ### decision/engine.go - ❌ WRONG: `regexp.MustCompile(`[\u200B...]`)` (raw string, no escaping) - ✅ FIXED: `regexp.MustCompile("[\u200B...]")` (double quotes, proper Unicode) **Impact**: Backticks don't parse \uXXXX escape sequences in Go! - Raw string: matches literal text "\u200B" (useless) - Double quotes: matches Unicode characters U+200B, U+200C, U+200D, U+FEFF (correct) ### news/provider/telegram/telegram.go - Move regex patterns to global precompiled variables - Eliminates repeated compilation in stripHTML() ## Performance - Regex compilation: O(n) → O(1) - stripHTML() now uses precompiled patterns ## Testing ✅ Compilation successful ✅ Unicode characters properly matched
This commit is contained in:
@@ -22,7 +22,7 @@ var (
|
|||||||
reJSONArray = regexp.MustCompile(`(?is)\[\s*\{.*?\}\s*\]`)
|
reJSONArray = regexp.MustCompile(`(?is)\[\s*\{.*?\}\s*\]`)
|
||||||
reArrayHead = regexp.MustCompile(`^\[\s*\{`)
|
reArrayHead = regexp.MustCompile(`^\[\s*\{`)
|
||||||
reArrayOpenSpace = regexp.MustCompile(`^\[\s+\{`)
|
reArrayOpenSpace = regexp.MustCompile(`^\[\s+\{`)
|
||||||
reInvisibleRunes = regexp.MustCompile(`[\u200B\u200C\u200D\uFEFF]`)
|
reInvisibleRunes = regexp.MustCompile("[\u200B\u200C\u200D\uFEFF]")
|
||||||
)
|
)
|
||||||
|
|
||||||
// PositionInfo 持仓信息
|
// PositionInfo 持仓信息
|
||||||
|
|||||||
@@ -14,6 +14,13 @@ import (
|
|||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
reHTMLTag = regexp.MustCompile(`\<[\S\s]+?\>`)
|
||||||
|
reStyleBlock = regexp.MustCompile(`\<style[\S\s]+?\</style\>`)
|
||||||
|
reScriptBlock = regexp.MustCompile(`\<script[\S\s]+?\</script\>`)
|
||||||
|
reMultiSpace = regexp.MustCompile(`\s{2,}`)
|
||||||
|
)
|
||||||
|
|
||||||
// Message 表示 Telegram 消息结构
|
// Message 表示 Telegram 消息结构
|
||||||
type Message struct {
|
type Message struct {
|
||||||
MessageID string `json:"messageId"`
|
MessageID string `json:"messageId"`
|
||||||
@@ -268,27 +275,19 @@ func splitLast(s, sep string) []string {
|
|||||||
|
|
||||||
// stripHTML 移除字符串中的所有 HTML 标签,只保留纯文本
|
// stripHTML 移除字符串中的所有 HTML 标签,只保留纯文本
|
||||||
func stripHTML(s string) string {
|
func stripHTML(s string) string {
|
||||||
// 将HTML标签全转换成小写(确保匹配大小写不敏感的标签)
|
// 先將 HTML 標籤統一成小寫字母,方便後續匹配
|
||||||
re := regexp.MustCompile(`\<[\S\s]+?\>`)
|
s = reHTMLTag.ReplaceAllStringFunc(s, strings.ToLower)
|
||||||
s = re.ReplaceAllStringFunc(s, strings.ToLower)
|
|
||||||
|
|
||||||
// 去除 <style> 标签及其内容
|
// 移除樣式與腳本區塊
|
||||||
re = regexp.MustCompile(`\<style[\S\s]+?\</style\>`)
|
s = reStyleBlock.ReplaceAllString(s, "")
|
||||||
s = re.ReplaceAllString(s, "")
|
s = reScriptBlock.ReplaceAllString(s, "")
|
||||||
|
|
||||||
// 去除 <script> 标签及其内容
|
// 將剩餘標籤替換為換行,保留文本結構
|
||||||
re = regexp.MustCompile(`\<script[\S\s]+?\</script\>`)
|
s = reHTMLTag.ReplaceAllString(s, "\n")
|
||||||
s = re.ReplaceAllString(s, "")
|
|
||||||
|
|
||||||
// 去除所有尖括号内的 HTML 代码,并换成换行符
|
// 收斂連續空白為單一換行
|
||||||
re = regexp.MustCompile(`\<[\S\s]+?\>`)
|
s = reMultiSpace.ReplaceAllString(s, "\n")
|
||||||
s = re.ReplaceAllString(s, "\n")
|
|
||||||
|
|
||||||
// 去除连续的换行符和空白字符
|
|
||||||
re = regexp.MustCompile(`\s{2,}`)
|
|
||||||
s = re.ReplaceAllString(s, "\n")
|
|
||||||
|
|
||||||
// 去除首尾的空白字符
|
|
||||||
return strings.TrimSpace(s)
|
return strings.TrimSpace(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user