From db7c0359f438a5724d09bf3502d09d75431e5cd7 Mon Sep 17 00:00:00 2001
From: ZhouYongyou <128128010+zhouyongyou@users.noreply.github.com>
Date: Wed, 5 Nov 2025 01:02:49 +0800
Subject: [PATCH] fix(decision+news): correct Unicode regex escaping &
precompile telegram patterns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
## Critical Fix: Unicode Regex Escaping
### decision/engine.go
- ❌ WRONG: `regexp.MustCompile(`[\u200B...]`)` (raw string, no escaping)
- ✅ FIXED: `regexp.MustCompile("[\u200B...]")` (double quotes, proper Unicode)
**Impact**: Backticks don't parse \uXXXX escape sequences in Go!
- Raw string: matches literal text "\u200B" (useless)
- Double quotes: matches Unicode characters U+200B, U+200C, U+200D, U+FEFF (correct)
### news/provider/telegram/telegram.go
- Move regex patterns to global precompiled variables
- Eliminates repeated compilation in stripHTML()
## Performance
- Regex compilation: O(n) → O(1)
- stripHTML() now uses precompiled patterns
## Testing
✅ Compilation successful
✅ Unicode characters properly matched
---
decision/engine.go | 2 +-
news/provider/telegram/telegram.go | 33 +++++++++++++++---------------
2 files changed, 17 insertions(+), 18 deletions(-)
diff --git a/decision/engine.go b/decision/engine.go
index a8dbdd5e..7f28738f 100644
--- a/decision/engine.go
+++ b/decision/engine.go
@@ -22,7 +22,7 @@ var (
reJSONArray = regexp.MustCompile(`(?is)\[\s*\{.*?\}\s*\]`)
reArrayHead = regexp.MustCompile(`^\[\s*\{`)
reArrayOpenSpace = regexp.MustCompile(`^\[\s+\{`)
- reInvisibleRunes = regexp.MustCompile(`[\u200B\u200C\u200D\uFEFF]`)
+ reInvisibleRunes = regexp.MustCompile("[\u200B\u200C\u200D\uFEFF]")
)
// PositionInfo 持仓信息
diff --git a/news/provider/telegram/telegram.go b/news/provider/telegram/telegram.go
index 821cefa5..9d4ed252 100644
--- a/news/provider/telegram/telegram.go
+++ b/news/provider/telegram/telegram.go
@@ -14,6 +14,13 @@ import (
"github.com/samber/lo"
)
+var (
+ reHTMLTag = regexp.MustCompile(`\<[\S\s]+?\>`)
+ reStyleBlock = regexp.MustCompile(`\`)
- s = re.ReplaceAllString(s, "")
+ // 移除樣式與腳本區塊
+ s = reStyleBlock.ReplaceAllString(s, "")
+ s = reScriptBlock.ReplaceAllString(s, "")
- // 去除 `)
- s = re.ReplaceAllString(s, "")
+ // 將剩餘標籤替換為換行,保留文本結構
+ s = reHTMLTag.ReplaceAllString(s, "\n")
- // 去除所有尖括号内的 HTML 代码,并换成换行符
- re = regexp.MustCompile(`\<[\S\s]+?\>`)
- s = re.ReplaceAllString(s, "\n")
+ // 收斂連續空白為單一換行
+ s = reMultiSpace.ReplaceAllString(s, "\n")
- // 去除连续的换行符和空白字符
- re = regexp.MustCompile(`\s{2,}`)
- s = re.ReplaceAllString(s, "\n")
-
- // 去除首尾的空白字符
return strings.TrimSpace(s)
}