Merge pull request #1536 from microsoft/xuyang1/add_debug_mode_to_save_cache

add a debug mode to speed up debug process
2026-07-01 01:51:18 +08:00 · 2023-06-01 19:44:17 +08:00
parent d46b4c1ebf 5f37f32184
commit e376648860
6 changed files with 122 additions and 112 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,7 @@ dist/
 qlib/VERSION.txt
 qlib/data/_libs/expanding.cpp
 qlib/data/_libs/rolling.cpp
+qlib/finco/prompt_cache.json
 examples/estimator/estimator_example/
 examples/rl/data/
 examples/rl/checkpoints/
--- a/qlib/finco/.env.example
+++ b/qlib/finco/.env.example
@@ -0,0 +1,18 @@
+
+OPENAI_API_KEY=your_api_key
+
+# USE_AZURE=True
+# AZURE_API_BASE=your_api_base
+# AZURE_API_VERSION=your_api_version
+
+# use gpt-4 means more token but more wait time
+# MODEL=gpt-4
+# MAX_TOKENS=1600
+# MAX_RETRY=1000
+
+
+MAX_TOKENS=1600
+MAX_RETRY=120
+
+CONTINOUS_MODE=True
+DEBUG_MODE=True
--- a/qlib/finco/conf.py
+++ b/qlib/finco/conf.py
@@ -1,15 +1,10 @@
 # TODO: use pydantic for other modules in Qlib
 from pydantic import (BaseSettings)
+from qlib.finco.utils import Singleton

 import os

-class Config():
-    _instance = None
-    def __new__(cls, *args, **kwargs):  
-        if cls._instance is None:  
-            cls._instance = super().__new__(cls, *args, **kwargs)  
-        return cls._instance  
-  
+class Config(Singleton):
    def __init__(self):
        self.use_azure = os.getenv("USE_AZURE") == "True"
        self.temperature = 0.5 if os.getenv("TEMPERATURE") is None else float(os.getenv("TEMPERATURE"))
@@ -23,4 +18,5 @@ class Config():

        self.max_retry = int(os.getenv("MAX_RETRY")) if os.getenv("MAX_RETRY") is not None else None

-        self.continous_mode = os.getenv("CONTINOUS_MODE") == "True" if os.getenv("CONTINOUS_MODE") is not None else False
+        self.continous_mode = os.getenv("CONTINOUS_MODE") == "True" if os.getenv("CONTINOUS_MODE") is not None else False
+        self.debug_mode = os.getenv("DEBUG_MODE") == "True" if os.getenv("DEBUG_MODE") is not None else False
--- a/qlib/finco/llm.py
+++ b/qlib/finco/llm.py
@@ -1,65 +1,96 @@
+import os
 import time
 import openai
+import json
 from typing import Optional
+from qlib.log import get_module_logger
 from qlib.finco.conf import Config
+from qlib.finco.utils import Singleton


-def example():
-    response = openai.ChatCompletion.create(
-        engine="gpt-35-turbo",  # The deployment name you chose when you deployed the ChatGPT or GPT-4 model.
-        # engine="gpt-4",  # NOTE: this raises this error: openai.error.RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-05-15 have exceeded call rate limit of your current OpenAI S0 pricing tier
-        # engine="gpt-4-32k",  # This works for only;
-        messages=[
-            {"role": "system", "content": "Assistant is a large language model trained by OpenAI."},
-            {"role": "user", "content": "Who were the founders of Microsoft?"},
-        ],
-    )
-    print(response)
+class APIBackend(Singleton):
+    def __init__(self):
+        self.cfg = Config()
+        openai.api_key = self.cfg.openai_api_key
+        if self.cfg.use_azure:
+            openai.api_type = "azure"
+            openai.api_base = self.cfg.azure_api_base
+            openai.api_version = self.cfg.azure_api_version
+        self.use_azure = self.cfg.use_azure

-def try_create_chat_completion(max_retry=10, **kwargs):
-    cfg = Config()
-    max_retry = cfg.max_retry if cfg.max_retry is not None else max_retry
-    for i in range(max_retry):
-        try:
-            response = create_chat_completion(**kwargs)
-            return response
-        except openai.error.RateLimitError as e:
-            print(e)
-            print(f"Retrying {i+1}th time...")
-            time.sleep(1)
-            continue
-    raise Exception(f"Failed to create chat completion after {max_retry} retries.")
-
-def create_chat_completion(
-    messages,
-    model = None,
-    temperature: float = None,
-    max_tokens: Optional[int] = None,
-) -> str:
-    cfg = Config()
-
-    if temperature is None:
-        temperature = cfg.temperature
-    if max_tokens is None:
-        max_tokens = cfg.max_tokens
+        self.debug_mode = False
+        if self.cfg.debug_mode:
+            self.debug_mode = True
+            cwd = os.getcwd()
+            self.cache_file_location = os.path.join(cwd, "prompt_cache.json")
+            self.cache = json.load(open(self.cache_file_location, "r")) if os.path.exists(self.cache_file_location) else {}
    
-    openai.api_key = cfg.openai_api_key
-    if cfg.use_azure:
-        openai.api_type = "azure"
-        openai.api_base = cfg.azure_api_base
-        openai.api_version = cfg.azure_api_version
-        response = openai.ChatCompletion.create(
-            engine=cfg.model,
-            messages=messages,
-            max_tokens=cfg.max_tokens,
-        )
-    else:
-        response = openai.ChatCompletion.create(
-            model=cfg.model,
-            messages=messages,
-        )
-    resp = response.choices[0].message["content"]
-    return resp
+    def build_messages_and_create_chat_completion(self, user_prompt, system_prompt=None):
+        """build the messages to avoid implementing several redundant lines of code"""
+        cfg = Config()
+        # TODO: system prompt should always be provided. In development stage we can use default value
+        if system_prompt is None:
+            try:
+                system_prompt = cfg.system_prompt
+            except AttributeError:
+                get_module_logger("finco").warning("system_prompt is not set, using default value.")
+                system_prompt = "You are an AI assistant who helps to answer user's questions about finance."
+        messages = [
+            {
+                "role": "system",
+                "content": system_prompt,
+            },
+            {
+                "role": "user",
+                "content": user_prompt,
+            },
+        ]
+        response = self.try_create_chat_completion(messages=messages)
+        return response

-if __name__ == "__main__":
-    create_chat_completion()
+    def try_create_chat_completion(self, max_retry=10, **kwargs):
+        max_retry = self.cfg.max_retry if self.cfg.max_retry is not None else max_retry
+        for i in range(max_retry):
+            try:
+                response = self.create_chat_completion(**kwargs)
+                return response
+            except openai.error.RateLimitError as e:
+                print(e)
+                print(f"Retrying {i+1}th time...")
+                time.sleep(1)
+                continue
+        raise Exception(f"Failed to create chat completion after {max_retry} retries.")
+
+    def create_chat_completion(
+        self,
+        messages,
+        model = None,
+        temperature: float = None,
+        max_tokens: Optional[int] = None,
+    ) -> str:
+        
+        if self.debug_mode:
+            if messages[1]["content"] in self.cache:
+                return self.cache[messages[1]["content"]]
+
+        if temperature is None:
+            temperature = self.cfg.temperature
+        if max_tokens is None:
+            max_tokens = self.cfg.max_tokens
+        
+        if self.cfg.use_azure:
+            response = openai.ChatCompletion.create(
+                engine=self.cfg.model,
+                messages=messages,
+                max_tokens=self.cfg.max_tokens,
+            )
+        else:
+            response = openai.ChatCompletion.create(
+                model=self.cfg.model,
+                messages=messages,
+            )
+        resp = response.choices[0].message["content"]
+        if self.debug_mode:
+            self.cache[messages[1]["content"]] = resp
+            json.dump(self.cache, open(self.cache_file_location, "w"))
+        return resp
--- a/qlib/finco/task.py
+++ b/qlib/finco/task.py
@@ -8,7 +8,7 @@ import re
 import logging

 from qlib.log import get_module_logger
-from qlib.finco.utils import build_messages_and_create_chat_completion
+from qlib.finco.llm import APIBackend

 class Task():
    """
@@ -100,9 +100,7 @@ workflow: reinforcement learning
        prompt_workflow_selection = Template(
            self.__DEFAULT_WORKFLOW_USER_PROMPT
        ).render(user_prompt=user_prompt)
-        response = build_messages_and_create_chat_completion(prompt_workflow_selection, self.__DEFAULT_WORKFLOW_SYSTEM_PROMPT)
-        # TODO: use the above line instead of the following line before release!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-        # response = 'workflow: supervised learning'
+        response = APIBackend().build_messages_and_create_chat_completion(prompt_workflow_selection, self.__DEFAULT_WORKFLOW_SYSTEM_PROMPT)
        self.save_chat_history_to_context_manager(prompt_workflow_selection, response, self.__DEFAULT_WORKFLOW_SYSTEM_PROMPT)
        workflow = response.split(":")[1].strip().lower()
        self.executed = True
@@ -179,10 +177,7 @@ components:
        prompt_plan_all = Template(
            self.__DEFAULT_WORKFLOW_USER_PROMPT
        ).render(user_prompt=user_prompt)
-        response = build_messages_and_create_chat_completion(prompt_plan_all, self.__DEFAULT_WORKFLOW_SYSTEM_PROMPT)
-        # TODO: use upper lines instead of the following line before release!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-        # response = "components:\n- Dataset: (Default) I will use the default dataset module in Qlib. Because it is a suitable dataset to get a long-term return in China A stock market.\n\n- Model: (Personized) I will implement a CustomTransformerModel inherited from qlib.model.base and use the transformer model with 10 MLP layers before the head. Because it meets the user's requirement of using transformer model with 10 MLP layers before the head to achieve better long-term return.\n\n- Record: (Default) I will use SignalRecord in qlib.workflow.record_temp and SigAnaRecord in qlib.workflow.record_temp to save all the signals and the analysis results. Because user needs to check the metrics to determine whether the system meets the requirements.\n\n- Strategy: (Default) I will use TopkDropoutStrategy in qlib.contrib.strategy. Because it is a more robust strategy which saves turnover fee.\n\n- Backtest: (Default) I will use the default backtest module in Qlib. Because it can tell the user a more real performance result of the model we build."
-
+        response = APIBackend().build_messages_and_create_chat_completion(prompt_plan_all, self.__DEFAULT_WORKFLOW_SYSTEM_PROMPT)
        self.save_chat_history_to_context_manager(prompt_plan_all, response, self.__DEFAULT_WORKFLOW_SYSTEM_PROMPT)
        if "components" not in response:
            self.logger.warning("The response is not in the correct format, which probably means the answer is not correct")
@@ -310,19 +305,7 @@ target component: {{target_component}}
            backtest_plan=prompt_element_dict["Backtest_plan"],
            target_component=self.target_componet
        )
-        response = build_messages_and_create_chat_completion(config_prompt, self.__DEFAULT_CONFIG_ACTION_SYSTEM_PROMPT)
-        # TODO use the upper lines to replace the following lines after debug
-        # if self.target_componet == "Dataset":
-        #     response = 'Config:\n```yaml\ndataset:\n    class: FeatureDataset\n    module_path: qlib.data.dataset\n    kwargs:\n        handler_config:\n            class: Alpha360\n            module_path: qlib.contrib.data.handler\n        segment:\n            train:\n                start_time: 2010-01-01\n                end_time: 2017-12-31\n            valid:\n                start_time: 2018-01-01\n                end_time: 2018-12-31\n            test:\n                start_time: 2019-01-01\n                end_time: 2019-12-31\n        scenario: Alpha360\n        feature:\n            label: \n                class: RegressionLabel\n                module_path: qlib.contrib.data.label\n                kwargs:\n                    label_name: f01\n                    direction: 1\n            transform:\n                class: Sequence\n                module_path: qlib.contrib.transform\n                kwargs:\n                    len_seq: 10\n                    step: 5\n                    mode: slide\n                    group: single\n            custom:\n                transform:\n                    class: CustomTransform\n                    module_path: qlib.contrib.data.transform\n                    kwargs:\n                        # customized feature processing for your features\n                        ma_windows:[5,10,20]\n                        macd_windows:[5,10,20]\n                        rsi_windows:[5,10]\n        train_loader:\n            class: PyTorchLoader\n            module_path: qlib.data.dataloader\n            kwargs:\n                batch_size: 4096\n                num_workers: 1\n        valid_loader:\n            class: PyTorchLoader\n            module_path: qlib.data.dataloader\n            kwargs:\n                batch_size: 4096\n                num_workers: 1   \n```   \nReason: I use the FeatureDataset class because the user did not provide the csv data format and we can assume the data is in the default format of Qlib. The FeatureDataset loads data by segment and we can choose the segment of train, valid and test for our model. In addition, I use the customized transform to process the features. With the customized transform, we could add more feature engineering procedures according to our own requirements.\n\nImprove suggestion: Since you are trying to build a low turnover strategy, you may consider using the subseries selection method to improve the efficiency of your model. Moreover, you can fine-tune the parameters in the customized transform according to your own requirements.'
-        # elif self.target_componet == "Model":
-        #     response = "Config:\n```yaml\nmodel:\nclass: CustomTransformerModel\nmodule_path: user_module.CustomModel \nkwargs:\n    transformer_stack_kwargs:\n        num_heads: 8\n        num_layers: 6\n        hidden_dim: 256\n        dropout: 0.1\n    mlp_last_dim: 128\n    mlp_activation: relu\n    mlp_layers: 10\n```\n`user_module` should be replaced with your own module path where you define the `CustomTransformerModel`.\n\nReason: \n- `CustomTransformerModel` is used to meet the user's requirement of using a transformer model with 10 MLP layers before the head, which might capture long-term signals more effectively.\n- `num_heads` and `num_layers` are set to 8 and 6 respectively as they are commonly used values in transformer model for financial time series, and they allow the model to capture complex patterns in the data with sufficient capacity.\n- `hidden_dim` is set to 256 to control the model's capacity, balance the computational efficiency, and avoid overfitting to the training data.\n- `mlp_last_dim` is set to 128 to avoid too much dimensionality reduction, and the activation function `relu` is commonly used in financial time series modeling.\n- `mlp_layers` is set to 10 to satisfy the user's requirement of using 10 MLP layers before the head, and it provides the model with enough capacity to capture long-term signals.\n\nImprove suggestion: \n- You can try to adjust the transformer stack hyperparameters, such as `num_heads`, `num_layers`, `hidden_dim`, or the MLP hyperparameters, such as `mlp_last_dim`, `mlp_activation`, `mlp_layers`, to improve the model's performance.\n- You can also try experimenting with different optimizers (`Adam`, `SGD`, etc.) or learning rates to optimize the training procedure.\n- Fine-tuning the hyperparameters may be based on observing the validation metrics and balancing computational efficiency with model performance."
-        # elif self.target_componet == "Record":
-        #     response = 'Config:\n```yaml\nrecord:\nclass: SignalRecord\nmodule_path: qlib.workflow.record_temp\nkwargs:\n    mode: "train"\n    dump_path: "./dump_dir/"\n    std_q: 10\n    max_keep_days: 365\n    clear_cache: True\n```\nReason: I choose the SignalRecord because it saves all the signals and is suitable for the task of building a low turnover strategy with a focus on long-term return. The std_q and max_keep_days are set to 10 and 365 respectively to enable the system to remove signals that are not performing well and keep signals that are performing well for up to a year. The clear_cache setting is set to True to ensure that the cache is cleared before each new run.\nImprove suggestion: Since the user wants to focus on long-term return, it might be worth considering increasing the max_keep_days value to 730 to keep signals for up to two years. Also, it is worth considering using a more advanced record module such as SigAnaRecord or MetaRecord to get more detailed analysis results.'
-        # elif self.target_componet == "Strategy":
-        #     response = 'Config:\n```yaml\nstrategy:\nclass: TopkDropoutStrategy\nmodule_path: qlib.contrib.strategy.strategy\nkwargs:\n    topk: 20\n    threshold: 0.025\n    max_orders: 5\n    dropout: 0.25\n    warmup_length: 100\n```\nReason: Since the user wants a low turnover strategy, I choose TopkDropoutStrategy as it saves turnover fee. The hyperparameters are set to default in Qlib, which is suitable for general use.\nImprove suggestion: For a more customized strategy, the user can try adjusting the hyperparameters of TopkDropoutStrategy, such as increasing the topk value to expand the stock pool, or modifying the threshold to adjust the confidence level of the model. Additionally, the user can explore other strategies in `qlib.contrib.strategy` module to achieve better long-term return.'
-        # elif self.target_componet == "Backtest":
-        #     response = 'Config:\n```yaml\nbacktest:\nclass: NormalBacktest\nmodule_path: qlib.backtest.backtest\nkwargs:\n    start_time: "2008-01-01"\n    end_time: "2021-12-31"\n    rebalance_period: "month"\n    benchmark: "SH000300"\n    trade_cost: 0.0015\n    min_cost: 5\n    return_mean: false\n```\nReason: I choose the default NormalBacktest in Qlib because it is a straightforward backtest method for evaluating long-term investment strategies. It has a reasonable trade cost and can perform monthly rebalance, making it suitable for evaluating the low turnover strategy that the user requires.\n\nImprove suggestion: If the user wants to fine-tune the performance of the backtest, they can consider adjusting the trade cost and minimum cost (min_cost) parameters to better reflect their trading environment. Additionally, they may experiment with other backtest methods in Qlib, such as LongShortBacktest or DelayEvalBacktest, to see if they can further improve the performance of their strategy.'
-        
+        response = APIBackend().build_messages_and_create_chat_completion(config_prompt, self.__DEFAULT_CONFIG_ACTION_SYSTEM_PROMPT)
        self.save_chat_history_to_context_manager(config_prompt, response, self.__DEFAULT_CONFIG_ACTION_SYSTEM_PROMPT)
        res = re.search(r"Config:(.*)Reason:(.*)Improve suggestion:(.*)", response, re.S)
        assert res is not None and len(res.groups()) == 3, "The response of config action task is not in the correct format"
@@ -459,7 +442,7 @@ target component: {{target_component}}
            target_component=self.target_component,
            user_config=config
        )
-        response = build_messages_and_create_chat_completion(implement_prompt, self.__DEFAULT_IMPLEMENT_ACTION_SYSTEM_PROMPT)
+        response = APIBackend().build_messages_and_create_chat_completion(implement_prompt, self.__DEFAULT_IMPLEMENT_ACTION_SYSTEM_PROMPT)
        self.save_chat_history_to_context_manager(implement_prompt, response, self.__DEFAULT_IMPLEMENT_ACTION_SYSTEM_PROMPT)

        res = re.search(r"Code:(.*)Explanation:(.*)Modified config:(.*)", response, re.S)
--- a/qlib/finco/utils.py
+++ b/qlib/finco/utils.py
@@ -1,7 +1,11 @@
 import json
-from qlib.finco.llm import try_create_chat_completion
-from qlib.finco.conf import Config
-from qlib.log import get_module_logger
+
+class Singleton():
+    _instance = None
+    def __new__(cls, *args, **kwargs):  
+        if cls._instance is None:  
+            cls._instance = super().__new__(cls, *args, **kwargs)  
+        return cls._instance  

 def parse_json(response):
    try:
@@ -10,26 +14,3 @@ def parse_json(response):
        pass

    raise Exception(f"Failed to parse response: {response}, please report it or help us to fix it.")
-
-def build_messages_and_create_chat_completion(user_prompt, system_prompt=None):
-    """build the messages to avoid implementing several redundant lines of code"""
-    cfg = Config()
-    # TODO: system prompt should always be provided. In development stage we can use default value
-    if system_prompt is None:
-        try:
-            system_prompt = cfg.system_prompt
-        except AttributeError:
-            get_module_logger("finco").warning("system_prompt is not set, using default value.")
-            system_prompt = "You are an AI assistant who helps to answer user's questions about finance."
-    messages = [
-        {
-            "role": "system",
-            "content": system_prompt,
-        },
-        {
-            "role": "user",
-            "content": user_prompt,
-        },
-    ]
-    response = try_create_chat_completion(messages=messages)
-    return response