From db7c0359f438a5724d09bf3502d09d75431e5cd7 Mon Sep 17 00:00:00 2001 From: ZhouYongyou <128128010+zhouyongyou@users.noreply.github.com> Date: Wed, 5 Nov 2025 01:02:49 +0800 Subject: [PATCH] fix(decision+news): correct Unicode regex escaping & precompile telegram patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Critical Fix: Unicode Regex Escaping ### decision/engine.go - ❌ WRONG: `regexp.MustCompile(`[\u200B...]`)` (raw string, no escaping) - ✅ FIXED: `regexp.MustCompile("[\u200B...]")` (double quotes, proper Unicode) **Impact**: Backticks don't parse \uXXXX escape sequences in Go! - Raw string: matches literal text "\u200B" (useless) - Double quotes: matches Unicode characters U+200B, U+200C, U+200D, U+FEFF (correct) ### news/provider/telegram/telegram.go - Move regex patterns to global precompiled variables - Eliminates repeated compilation in stripHTML() ## Performance - Regex compilation: O(n) → O(1) - stripHTML() now uses precompiled patterns ## Testing ✅ Compilation successful ✅ Unicode characters properly matched --- decision/engine.go | 2 +- news/provider/telegram/telegram.go | 33 +++++++++++++++--------------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/decision/engine.go b/decision/engine.go index a8dbdd5e..7f28738f 100644 --- a/decision/engine.go +++ b/decision/engine.go @@ -22,7 +22,7 @@ var ( reJSONArray = regexp.MustCompile(`(?is)\[\s*\{.*?\}\s*\]`) reArrayHead = regexp.MustCompile(`^\[\s*\{`) reArrayOpenSpace = regexp.MustCompile(`^\[\s+\{`) - reInvisibleRunes = regexp.MustCompile(`[\u200B\u200C\u200D\uFEFF]`) + reInvisibleRunes = regexp.MustCompile("[\u200B\u200C\u200D\uFEFF]") ) // PositionInfo 持仓信息 diff --git a/news/provider/telegram/telegram.go b/news/provider/telegram/telegram.go index 821cefa5..9d4ed252 100644 --- a/news/provider/telegram/telegram.go +++ b/news/provider/telegram/telegram.go @@ -14,6 +14,13 @@ import ( "github.com/samber/lo" ) +var ( + reHTMLTag = regexp.MustCompile(`\<[\S\s]+?\>`) + reStyleBlock = regexp.MustCompile(`\`) + reScriptBlock = regexp.MustCompile(`\`) + reMultiSpace = regexp.MustCompile(`\s{2,}`) +) + // Message 表示 Telegram 消息结构 type Message struct { MessageID string `json:"messageId"` @@ -268,27 +275,19 @@ func splitLast(s, sep string) []string { // stripHTML 移除字符串中的所有 HTML 标签,只保留纯文本 func stripHTML(s string) string { - // 将HTML标签全转换成小写(确保匹配大小写不敏感的标签) - re := regexp.MustCompile(`\<[\S\s]+?\>`) - s = re.ReplaceAllStringFunc(s, strings.ToLower) + // 先將 HTML 標籤統一成小寫字母,方便後續匹配 + s = reHTMLTag.ReplaceAllStringFunc(s, strings.ToLower) - // 去除