From e229b567ad7e8b7ea8ef00ee1cb2dace93fd513a Mon Sep 17 00:00:00 2001 From: Chaoying <32626585+Chaoyingz@users.noreply.github.com> Date: Thu, 24 Mar 2022 19:49:25 +0800 Subject: [PATCH] Support feature names contain Chinese punctuation (#1003) --- qlib/utils/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/qlib/utils/__init__.py b/qlib/utils/__init__.py index 8852155bc..de60b18b7 100644 --- a/qlib/utils/__init__.py +++ b/qlib/utils/__init__.py @@ -274,9 +274,11 @@ def parse_field(field): if not isinstance(field, str): field = str(field) + # Chinese Punctuation Regex: \nff08 = (, \nff09 = ), \nff1a = :, \n3001 = 。 + chinese_punctuation_regex = r"\u3001\uff1a\uff08\uff09" for pattern, new in [ - (r"\$\$(\w+)", r'PFeature("\1")'), # $$ must be before $ - (r"\$(\w+)", rf'Feature("\1")'), + (rf"\$\$([\w{chinese_punctuation_regex}]+)", r'PFeature("\1")'), # $$ must be before $ + (rf"\$([\w{chinese_punctuation_regex}]+)", r'Feature("\1")'), (r"(\w+\s*)\(", r"Operators.\1("), ]: # Features # Operators field = re.sub(pattern, new, field)