Connect TrainTask & Rolling & DDG-DA (#1599)

* Connect train task to ddg-da & rolling * Pylint & black formatting * Formatting
2026-07-05 20:11:08 +08:00 · 2023-07-17 09:58:58 +08:00
parent 5e0873ca81
commit 1c9841b15e
14 changed files with 584 additions and 356 deletions
--- a/qlib/finco/.env.example
+++ b/qlib/finco/.env.example
@@ -15,4 +15,6 @@ MAX_TOKENS=1600
 MAX_RETRY=120

 CONTINOUS_MODE=True
-DEBUG_MODE=True
+DEBUG_MODE=True
+
+# TEMPERATURE=
--- a/qlib/finco/init.py
+++ b/qlib/finco/init.py
@@ -10,4 +10,4 @@ def get_finco_path() -> Path:
    return the template path
    Because the template path is located in the folder. We don't know where it is located. So __file__ for this module will be used.
    """
-    return DIRNAME
+    return DIRNAME
--- a/qlib/finco/conf.py
+++ b/qlib/finco/conf.py
@@ -1,5 +1,5 @@
 # TODO: use pydantic for other modules in Qlib
-from pydantic import BaseSettings
+# from pydantic_settings import BaseSettings
 from qlib.finco.utils import SingletonBaseClass

 import os
--- a/qlib/finco/context.py
+++ b/qlib/finco/context.py
@@ -0,0 +1,97 @@
+from dataclasses import dataclass, field
+import copy
+from pathlib import Path
+from typing import Optional, List
+from qlib.finco.log import FinCoLog
+from qlib.typehint import Literal
+
+from qlib.finco.utils import similarity
+
+
+@dataclass
+class Design:
+    plan: str
+    classes: str
+    decision: str
+
+
+@dataclass
+class Exp:
+    """Experiment"""
+
+    # compoments
+    dataset: Optional[Design] = None
+    datahandler: Optional[Design] = None
+    model: Optional[Design] = None
+    record: Optional[Design] = None
+    strategy: Optional[Design] = None
+    backtest: Optional[Design] = None
+
+    # basic
+    template: Optional[Path] = None
+
+    # rolling strategy. None indicates no rolling
+    rolling: Optional[Literal["base", "ddgda"]] = None
+
+
+@dataclass
+class StructContext:
+    """Part of the context have clear meaning and structure, so they will be saved here and can be easily retrieved and understood"""
+
+    # TODO: move more content in WorkflowContextManager.context to here
+    workspace: Path
+    exp_list: List[Exp] = field(default_factory=list)  # the planned experiments
+
+
+class WorkflowContextManager:
+    """Context Manager stores the context of the workflow"""
+
+    """All context are key value pairs which saves the input, output and status of the whole workflow"""
+
+    def __init__(self, workspace: Path) -> None:
+        self.context = {}
+        self.logger = FinCoLog()
+        # this context is public
+        self.struct_context = StructContext(workspace)  # TODO: move more content in context to here
+        self.set_context("workspace", workspace)  # TODO: remove me
+
+    def set_context(self, key, value):
+        if key in self.context:
+            self.logger.warning("The key already exists in the context, the value will be overwritten")
+        self.context[key] = value
+
+    def get_context(self, key):
+        # NOTE: if the key doesn't exist, return None. In the future, we may raise an error to detect abnormal behavior
+        if key not in self.context:
+            self.logger.warning("The key doesn't exist in the context")
+            return None
+        return self.context[key]
+
+    def update_context(self, key, new_value):
+        # NOTE: if the key doesn't exist, return None. In the future, we may raise an error to detect abnormal behavior
+        if key not in self.context:
+            self.logger.warning("The key doesn't exist in the context")
+        self.context.update({key: new_value})
+
+    def get_all_context(self):
+        """return a deep copy of the context"""
+        """TODO: do we need to return a deep copy?"""
+        return copy.deepcopy(self.context)
+
+    def retrieve(self, query: str) -> dict:
+        if query in self.context.keys():
+            return {query: self.context.get(query)}
+
+        # Note: retrieve information from context by string similarity maybe abandon in future
+        scores = {}
+        for k, v in self.context.items():
+            scores.update({k: max(similarity(query, k), similarity(query, v))})
+        max_score_key = max(scores, key=scores.get)
+        return {max_score_key: self.context.get(max_score_key)}
+
+    def clear(self, reserve: list = None):
+        if reserve is None:
+            reserve = []
+
+        _context = {k: self.get_context(k) for k in reserve}
+        self.context = _context
--- a/qlib/finco/knowledge.py
+++ b/qlib/finco/knowledge.py
@@ -65,6 +65,7 @@ class YamlStorage(Storage):
    This class is responsible for storage and loading of Knowledge related data in yaml format.

    """
+
    DEFAULT_NAME = "storage.yml"

    def __init__(self, path: Union[str, Path]):
@@ -82,7 +83,7 @@ class YamlStorage(Storage):
    def save(self, **kwargs):
        """use pickle as the default save method"""
        Path.mkdir(self.path.parent, exist_ok=True)
-        with open(self.path, 'w') as f:
+        with open(self.path, "w") as f:
            yaml.dump(self.documents, f)


@@ -190,9 +191,14 @@ class ExperimentKnowledge(Knowledge):
    def brief(self):
        docs = []
        for recorder in self.storage.recs:
-            docs.append({"exp_name": self.storage.exp.name, "record_info": recorder.info,
-                         "config": recorder.load_object("config"),
-                         "context_summary": recorder.load_object("context_summary")})
+            docs.append(
+                {
+                    "exp_name": self.storage.exp.name,
+                    "record_info": recorder.info,
+                    "config": recorder.load_object("config"),
+                    "context_summary": recorder.load_object("context_summary"),
+                }
+            )
        return docs


@@ -295,7 +301,7 @@ class InfrastructureKnowledge(Knowledge):

        """
        functions = []
-        for py_file_path in Path(directory).rglob('*.py'):
+        for py_file_path in Path(directory).rglob("*.py"):
            for _functions in self.get_functions_with_docstrings(py_file_path):
                functions.append(_functions)

@@ -314,7 +320,7 @@ class InfrastructureKnowledge(Knowledge):
        docstring = None
        for line in lines:
            if line.strip().startswith("def ") or line.strip().startswith("class "):
-                func = line.strip().split(' ')[1].split('(')[0]
+                func = line.strip().split(" ")[1].split("(")[0]
                if func.startswith("__"):
                    continue
                if current_func is not None:
@@ -336,7 +342,6 @@ class InfrastructureKnowledge(Knowledge):


 class Topic:
-
    def __init__(self, name: str, describe: Template):
        self.name = name
        self.describe = describe
@@ -347,9 +352,7 @@ class Topic:
    def summarize(self, docs: list):
        self.logger.info(f"Summarize topic: \nname: {self.name}\ndescribe: {self.describe.module}")
        prompt_workflow_selection = self.describe.render(docs=docs)
-        response = APIBackend().build_messages_and_create_chat_completion(
-            user_prompt=prompt_workflow_selection
-        )
+        response = APIBackend().build_messages_and_create_chat_completion(user_prompt=prompt_workflow_selection)

        self.knowledge = response
        self.docs = docs
@@ -395,22 +398,26 @@ class KnowledgeBase:

    def load_practice_knowledge(self, path: Path) -> PracticeKnowledge:
        self.practice_knowledge = PracticeKnowledge(
-            YamlStorage(path.joinpath(f"{self.KT_PRACTICE}/{YamlStorage.DEFAULT_NAME}")))
+            YamlStorage(path.joinpath(f"{self.KT_PRACTICE}/{YamlStorage.DEFAULT_NAME}"))
+        )
        return self.practice_knowledge

    def load_execute_knowledge(self, path: Path) -> ExecuteKnowledge:
        self.execute_knowledge = ExecuteKnowledge(
-            YamlStorage(path.joinpath(f"{self.KT_EXECUTE}/{YamlStorage.DEFAULT_NAME}")))
+            YamlStorage(path.joinpath(f"{self.KT_EXECUTE}/{YamlStorage.DEFAULT_NAME}"))
+        )
        return self.execute_knowledge

    def load_finance_knowledge(self, path: Path) -> FinanceKnowledge:
        self.finance_knowledge = FinanceKnowledge(
-            YamlStorage(path.joinpath(f"{self.KT_FINANCE}/{YamlStorage.DEFAULT_NAME}")))
+            YamlStorage(path.joinpath(f"{self.KT_FINANCE}/{YamlStorage.DEFAULT_NAME}"))
+        )
        return self.finance_knowledge

    def load_infrastructure_knowledge(self, path: Path) -> InfrastructureKnowledge:
        self.infrastructure_knowledge = InfrastructureKnowledge(
-            YamlStorage(path.joinpath(f"{self.KT_INFRASTRUCTURE}/{YamlStorage.DEFAULT_NAME}")))
+            YamlStorage(path.joinpath(f"{self.KT_INFRASTRUCTURE}/{YamlStorage.DEFAULT_NAME}"))
+        )
        return self.infrastructure_knowledge

    def get_knowledge(self, knowledge_type: str = None):
@@ -423,8 +430,12 @@ class KnowledgeBase:
        elif knowledge_type == self.KT_INFRASTRUCTURE:
            knowledge = self.infrastructure_knowledge.knowledge
        else:
-            knowledge = self.execute_knowledge.knowledge + self.practice_knowledge.knowledge \
-                        + self.finance_knowledge.knowledge + self.infrastructure_knowledge.knowledge
+            knowledge = (
+                self.execute_knowledge.knowledge
+                + self.practice_knowledge.knowledge
+                + self.finance_knowledge.knowledge
+                + self.infrastructure_knowledge.knowledge
+            )
        return knowledge

    def query(self, knowledge_type: str = None, content: str = None, n: int = 5):
@@ -450,8 +461,9 @@ class KnowledgeBase:
        similar_n_docs = [knowledge[i] for i in similar_n_indexes]

        prompt = Template(
-            """find the most relevant doc with this query: '{{content}}' from docs='{{docs}}'. 
-            Just return the most relevant item I provided, no more explain. For example: {'function': 'config.resolve_path', 'docstring': None}""")
+            """find the most relevant doc with this query: '{{content}}' from docs='{{docs}}'.
+            Just return the most relevant item I provided, no more explain. For example: {'function': 'config.resolve_path', 'docstring': None}"""
+        )
        prompt_workflow_selection = prompt.render(content=content, docs=similar_n_docs)
        response = APIBackend().build_messages_and_create_chat_completion(
            user_prompt=prompt_workflow_selection, system_prompt="You are an excellent assistant."
--- a/qlib/finco/llm.py
+++ b/qlib/finco/llm.py
@@ -43,7 +43,7 @@ class APIBackend(SingletonBaseClass):
                "content": system_prompt,
            }
        ]
-        messages.extend(former_messages[-1*cfg.max_past_message_include:])
+        messages.extend(former_messages[-1 * cfg.max_past_message_include :])
        messages.append(
            {
                "role": "user",
@@ -82,7 +82,6 @@ class APIBackend(SingletonBaseClass):
        temperature: float = None,
        max_tokens: Optional[int] = None,
    ) -> str:
-
        if self.debug_mode:
            key = json.dumps(messages)
            if key in self.cache:
--- a/qlib/finco/log.py
+++ b/qlib/finco/log.py
@@ -12,6 +12,7 @@ class LogColors:
    """
    ANSI color codes for use in console output.
    """
+
    RED = "\033[91m"
    GREEN = "\033[92m"
    YELLOW = "\033[93m"
@@ -58,8 +59,11 @@ def formatting_log(logger, title="Info"):
    a context manager, print liens before and after a function
    """
    length = {"Start": 120, "Task": 120, "Info": 60, "Interact": 60, "End": 120}.get(title, 60)
-    color, bold = (LogColors.YELLOW, LogColors.BOLD) \
-        if title in ["Start", "Task", "Info", "Interact", "End"] else (LogColors.CYAN, "")
+    color, bold = (
+        (LogColors.YELLOW, LogColors.BOLD)
+        if title in ["Start", "Task", "Info", "Interact", "End"]
+        else (LogColors.CYAN, "")
+    )
    logger.info("")
    logger.info(f"{color}{bold}{'-'} {title} {'-' * (length - len(title))}{LogColors.END}")
    yield
@@ -99,12 +103,12 @@ class FinCoLog(SingletonBaseClass):
                    f"{LogColors.MAGENTA}{LogColors.BOLD}Role:{LogColors.END} "
                    f"{LogColors.CYAN}{m['role']}{LogColors.END}\n"
                    + f"{LogColors.MAGENTA}{LogColors.BOLD}Content:{LogColors.END} "
-                      f"{LogColors.CYAN}{m['content']}{LogColors.END}\n")
+                    f"{LogColors.CYAN}{m['content']}{LogColors.END}\n"
+                )

    def log_response(self, response: str):
        with formatting_log(self.logger, "GPT Response"):
-            self.logger.info(
-                f"{LogColors.CYAN}{response}{LogColors.END}\n")
+            self.logger.info(f"{LogColors.CYAN}{response}{LogColors.END}\n")

    # TODO:
    # It looks wierd if we only have logger
@@ -118,14 +122,13 @@ class FinCoLog(SingletonBaseClass):
    def plain_info(self, *args):
        for arg in args:
            self.logger.info(
-                f"{LogColors.YELLOW}{LogColors.BOLD}Info:{LogColors.END}{LogColors.WHITE}{arg}{LogColors.END}")
+                f"{LogColors.YELLOW}{LogColors.BOLD}Info:{LogColors.END}{LogColors.WHITE}{arg}{LogColors.END}"
+            )

    def warning(self, *args):
        for arg in args:
-            self.logger.warning(
-                f"{LogColors.BLUE}{LogColors.BOLD}Warning:{LogColors.END}{arg}")
+            self.logger.warning(f"{LogColors.BLUE}{LogColors.BOLD}Warning:{LogColors.END}{arg}")

    def error(self, *args):
        for arg in args:
-            self.logger.error(
-                f"{LogColors.RED}{LogColors.BOLD}Error:{LogColors.END}{arg}")
+            self.logger.error(f"{LogColors.RED}{LogColors.BOLD}Error:{LogColors.END}{arg}")
--- a/qlib/finco/prompt_template.py
+++ b/qlib/finco/prompt_template.py
@@ -10,8 +10,9 @@ from qlib.finco import get_finco_path
 class PromptTemplate(SingletonBaseClass):
    def __init__(self) -> None:
        super().__init__()
-        _template = yaml.load(open(Path.joinpath(get_finco_path(), "prompt_template.yaml"), "r"),
-                              Loader=yaml.FullLoader)
+        _template = yaml.load(
+            open(Path.joinpath(get_finco_path(), "prompt_template.yaml"), "r"), Loader=yaml.FullLoader
+        )
        for k, v in _template.items():
            if k == "mods":
                continue
@@ -28,5 +29,5 @@ class PromptTemplate(SingletonBaseClass):
            file_path = Path(file_path)
        Path.mkdir(file_path.parent, exist_ok=True)

-        with open(file_path, 'w') as f:
+        with open(file_path, "w") as f:
            yaml.dump(self.__dict__, f)
--- a/qlib/finco/task.py
+++ b/qlib/finco/task.py
--- a/qlib/finco/tpl/sl/workflow_config.yaml
+++ b/qlib/finco/tpl/sl/workflow_config.yaml
@@ -10,13 +10,25 @@ data_handler_config: &data_handler_config
    fit_start_time: 2008-01-01
    fit_end_time: 2014-12-31
    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
 port_analysis_config: &port_analysis_config
    strategy:
        class: TopkDropoutStrategy
        module_path: qlib.contrib.strategy
        kwargs:
-            model: <MODEL>
-            dataset: <DATASET>
+            signal: <PRED>
            topk: 50
            n_drop: 5
    backtest:
@@ -32,18 +44,11 @@ port_analysis_config: &port_analysis_config
            min_cost: 5
 task:
    model:
-        class: LGBModel
-        module_path: qlib.contrib.model.gbdt
+        class: LinearModel
+        module_path: qlib.contrib.model.linear
        kwargs:
-            loss: mse
-            colsample_bytree: 0.8879
-            learning_rate: 0.2
-            subsample: 0.8789
-            lambda_l1: 205.6999
-            lambda_l2: 580.9768
-            max_depth: 8
-            num_leaves: 210
-            num_threads: 20
+            estimator: ridge
+            alpha: 0.05
    dataset:
        class: DatasetH
        module_path: qlib.data.dataset
@@ -65,7 +70,7 @@ task:
        - class: SigAnaRecord
          module_path: qlib.workflow.record_temp
          kwargs: 
-            ana_long_short: False
+            ana_long_short: True
            ann_scaler: 252
        - class: PortAnaRecord
          module_path: qlib.workflow.record_temp
--- a/qlib/finco/utils.py
+++ b/qlib/finco/utils.py
@@ -20,6 +20,7 @@ class SingletonBaseClass(metaclass=SingletonMeta):
    This class becomes necessary

    """
+
    # TODO: Add move this class to Qlib's general utils.


@@ -42,4 +43,4 @@ def similarity(text1, text2):

 def random_string(length=10):
    letters = string.ascii_letters + string.digits
-    return ''.join(random.choice(letters) for i in range(length))
+    return "".join(random.choice(letters) for i in range(length))
--- a/qlib/finco/workflow.py
+++ b/qlib/finco/workflow.py
@@ -1,67 +1,40 @@
 import sys
-import copy
 import shutil
-from pathlib import Path
 from typing import List
-
-from qlib.finco.task import HighLevelPlanTask, SummarizeTask, TrainTask
+from pathlib import Path
+from qlib.finco.task import HighLevelPlanTask, SummarizeTask
 from qlib.finco.prompt_template import PromptTemplate, Template
 from qlib.finco.log import FinCoLog, LogColors
-from qlib.finco.utils import similarity
 from qlib.finco.llm import APIBackend
 from qlib.finco.conf import Config
 from qlib.finco.knowledge import KnowledgeBase, Topic
+from qlib.finco.context import WorkflowContextManager


-class WorkflowContextManager:
-    """Context Manager stores the context of the workflow"""
-
-    """All context are key value pairs which saves the input, output and status of the whole workflow"""
-
-    def __init__(self) -> None:
-        self.context = {}
-        self.logger = FinCoLog()
-
-    def set_context(self, key, value):
-        if key in self.context:
-            self.logger.warning("The key already exists in the context, the value will be overwritten")
-        self.context[key] = value
-
-    def get_context(self, key):
-        # NOTE: if the key doesn't exist, return None. In the future, we may raise an error to detect abnormal behavior
-        if key not in self.context:
-            self.logger.warning("The key doesn't exist in the context")
-            return None
-        return self.context[key]
-
-    def update_context(self, key, new_value):
-        # NOTE: if the key doesn't exist, return None. In the future, we may raise an error to detect abnormal behavior
-        if key not in self.context:
-            self.logger.warning("The key doesn't exist in the context")
-        self.context.update({key: new_value})
-
-    def get_all_context(self):
-        """return a deep copy of the context"""
-        """TODO: do we need to return a deep copy?"""
-        return copy.deepcopy(self.context)
-
-    def retrieve(self, query: str) -> dict:
-        if query in self.context.keys():
-            return {query: self.context.get(query)}
-
-        # Note: retrieve information from context by string similarity maybe abandon in future
-        scores = {}
-        for k, v in self.context.items():
-            scores.update({k: max(similarity(query, k), similarity(query, v))})
-        max_score_key = max(scores, key=scores.get)
-        return {max_score_key: self.context.get(max_score_key)}
-
-    def clear(self, reserve: list = None):
-        if reserve is None:
-            reserve = []
-
-        _context = {k: self.get_context(k) for k in reserve}
-        self.context = _context
+# TODO: it is not necessary in current phase
+# class TaskDAG:
+#     """
+#     This is a Task manager. it maintains a graph and a stack stucture to manager the task
+#     The reason why the DGA relationship is maintained outside instead of inside the task is that
+#     - To make the creating of task simpler(user don't have to care about the relation-ship)
+#     - To manage the relation ship when poping and executing the tasks is relatively easier instead of scattering them everywhere
+#     """
+#     def __init__(self) -> None:
+#         self._finished = []
+#         self._stack = []
+#         self._dag = defaultdict(list)  # from id(object) -> list of id(object)
+#
+#     def pop(self):
+#         return  self._stack.pop(0)
+#
+#     def push(self, task: Union[Task, List[Task]], parent: Optional[Task] = None):
+#         if isinstance(task, Task):
+#             task = [task]
+#         if parent is not None:
+#             self._dag
+#
+#     def done(self) -> bool:
+#         return len(self._stack) == 0


 class WorkflowManager:
@@ -78,8 +51,7 @@ class WorkflowManager:
        self._confirm_and_rm()

        self.prompt_template = PromptTemplate()
-        self.context = WorkflowContextManager()
-        self.context.set_context("workspace", self._workspace)
+        self.context = WorkflowContextManager(workspace=self._workspace)
        self.default_user_prompt = "Please help me build a low turnover strategy that focus more on longterm return in China A csi300. Please help to use lightgbm model."

    def _confirm_and_rm(self):
@@ -92,7 +64,8 @@ class WorkflowManager:
                    f"If you do not need to delete {self._workspace},"
                    f" please change the workspace dir or rename existing files\n"
                    f"Are you sure you want to delete, yes(Y/y), no (N/n):",
-                    color=LogColors.WHITE)
+                    color=LogColors.WHITE,
+                )
            )
            if str(flag) not in ["Y", "y"]:
                sys.exit()
@@ -150,10 +123,12 @@ class WorkflowManager:
            # TODO: sort the task list based on the priority of the task
            # task_list = sorted(task_list, key=lambda x: x.task_type)
            t = task_list.pop(0)
-            self.logger.info(f"Task finished: {[str(task) for task in task_finished]}",
-                             f"Task in queue: {task_list_info}",
-                             f"Executing task: {str(t)}",
-                             title="Task")
+            self.logger.info(
+                f"Task finished: {[str(task) for task in task_finished]}",
+                f"Task in queue: {task_list_info}",
+                f"Executing task: {str(t)}",
+                title="Task",
+            )

            t.assign_context_manager(self.context)
            res = t.execute()
@@ -174,9 +149,10 @@ class LearnManager:
        self.epoch = 0
        self.wm = WorkflowManager()

-        self.topics = [Topic(name=topic, describe=self.wm.prompt_template.get(f"Topic_{topic}")) for topic in
-                       self.__DEFAULT_TOPICS]
-        self.knowledge_base = KnowledgeBase(workdir=Path.cwd().joinpath('knowledge'))
+        self.topics = [
+            Topic(name=topic, describe=self.wm.prompt_template.get(f"Topic_{topic}")) for topic in self.__DEFAULT_TOPICS
+        ]
+        self.knowledge_base = KnowledgeBase(workdir=Path.cwd().joinpath("knowledge"))
        self.knowledge_base.execute_knowledge.add([])
        self.knowledge_base.query(knowledge_type="infrastructure", content="resolve_path")

@@ -209,14 +185,17 @@ class LearnManager:

        for task in task_finished:
            prompt_workflow_selection = self.wm.prompt_template.get(f"{self.__class__.__name__}_user").render(
-                summary=summary, brief=knowledge_of_topics,
+                summary=summary,
+                brief=knowledge_of_topics,
                task_finished=[str(t) for t in task_finished],
-                task=task.__class__.__name__, system=task.system.render(), user_prompt=user_prompt
+                task=task.__class__.__name__,
+                system=task.system.render(),
+                user_prompt=user_prompt,
            )

            response = APIBackend().build_messages_and_create_chat_completion(
                user_prompt=prompt_workflow_selection,
-                system_prompt=self.wm.prompt_template.get(f"{self.__class__.__name__}_system").render()
+                system_prompt=self.wm.prompt_template.get(f"{self.__class__.__name__}_system").render(),
            )

            # todo: response assertion
--- a/scripts/finco/cmd.sh
+++ b/scripts/finco/cmd.sh
@@ -12,4 +12,4 @@ if [ -e $DIR/cridential.sh ]; then
 fi

 # run the command
-python -m qlib.finco.cli "please help me build a low turnover strategy that focus more on longterm return"
+python -m qlib.finco.cli "build an A-share stock market daily portfolio in quantitative investment and minimize the maximum drawdown."
--- a/setup.py
+++ b/setup.py
@@ -176,11 +176,12 @@ setup(
        ],
        "finco": [
            # finco is not necessary for all Qlib users; So a single require section is used for it.
-            "openapi",
+            "openai",
            "pydantic",  # Please add it to basic requirements after the design of pydantic is state.
+            "pydantic-settings",
            "python-dotenv",  # I don't think this is necessary if we use pydantic.
            "fuzzywuzzy",
-            "python-Levenshtein"    # not necessary but accelerate fuzzywuzzy calculation
+            "python-Levenshtein",  # not necessary but accelerate fuzzywuzzy calculation
        ],
    },
    include_package_data=True,