optimize log (#1612 )

optimize workflow and output format
Merge pull request #1609 from microsoft/xuyang1/finetune_prompts
2026-06-29 17:11:20 +08:00 · 2023-08-01 18:57:48 +08:00 · 2023-07-20 12:15:04 +08:00 · 2023-07-19 20:01:07 +08:00 · 2023-07-19 20:00:09 +08:00 · 2023-07-18 21:47:58 +08:00
23 changed files with 14006 additions and 122 deletions
--- a/qlib/config.py
+++ b/qlib/config.py
@@ -486,5 +486,8 @@ class QlibConfig(Config):
        return self._registered


+DEFAULT_QLIB_DOT_PATH = Path("~/.qlib/").expanduser()
+
+
 # global config
 C = QlibConfig(_default_config)
--- a/qlib/finco/conf.py
+++ b/qlib/finco/conf.py
@@ -13,7 +13,7 @@ class Config(SingletonBaseClass):

    def __init__(self):
        self.use_azure = os.getenv("USE_AZURE") == "True"
-        self.temperature = 0.5 if os.getenv("TEMPERATURE") is None else float(os.getenv("TEMPERATURE"))
+        self.temperature = 0 if os.getenv("TEMPERATURE") is None else float(os.getenv("TEMPERATURE"))
        self.max_tokens = 800 if os.getenv("MAX_TOKENS") is None else int(os.getenv("MAX_TOKENS"))

        self.openai_api_key = os.getenv("OPENAI_API_KEY")
--- a/qlib/finco/demo_081e403e5712.yml
+++ b/qlib/finco/demo_081e403e5712.yml
--- a/qlib/finco/demo_3a6439665713.yml
+++ b/qlib/finco/demo_3a6439665713.yml
--- a/qlib/finco/demo_c15e087a5706.yml
+++ b/qlib/finco/demo_c15e087a5706.yml
--- a/qlib/finco/demo_failed.yml
+++ b/qlib/finco/demo_failed.yml
--- a/qlib/finco/demo_milestone.yml
+++ b/qlib/finco/demo_milestone.yml
--- a/qlib/finco/knowledge.py
+++ b/qlib/finco/knowledge.py
@@ -215,6 +215,8 @@ class PracticeKnowledge(Knowledge):
        self.summarize()

    def add(self, docs: List, storage_name: str = YamlStorage.DEFAULT_NAME):
+        s = "\n".join(docs)
+        logger.info(f'Add to Practice Knowledge:\n {s}')
        storage = self.get_storage(storage_name)
        if storage is None:
            storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(storage_name))
@@ -238,10 +240,6 @@ class FinanceKnowledge(Knowledge):
        storage = self.get_storage(YamlStorage.DEFAULT_NAME)
        if len(storage.documents) == 0:
            docs = self.read_files_in_directory(self.workdir.joinpath(self.name))
-            docs.extend([
-                {"content": "[Success]: XXXX, the results looks reasonable  # Keywords: supervised learning, data"},
-                {"content": "[Fail]: XXXX, it raise memory error due to  YYYYY  "
-                            "# Keywords: supervised learning, data"}])
            self.add(docs)
        self.summarize()

@@ -378,20 +376,27 @@ class InfrastructureKnowledge(Knowledge):


 class Topic:
-    def __init__(self, name: str, describe: Template):
+    def __init__(self, name: str, system: Template, user: Template):
        self.name = name
-        self.describe = describe
+        self.system_prompt_template = system
+        self.user_prompt_template = user
        self.docs = []
        self.knowledge = None
        self.logger = FinCoLog()

-    def summarize(self, docs: list):
-        self.logger.info(f"Summarize Topic \nname: {self.name}\ndescribe: {self.describe.module}")
-        prompt_workflow_selection = self.describe.render(docs=docs)
-        response = APIBackend().build_messages_and_create_chat_completion(user_prompt=prompt_workflow_selection)
+    def summarize(self, practice_knowlege, user_intention, target, diffrence, target_metrics):
+        system_prompt = self.system_prompt_template.render(topic=self.name)
+        user_prompt = self.user_prompt_template.render(
+            experiment_1_info = practice_knowlege[0],
+            experiment_2_info = practice_knowlege[1],
+            user_intention=user_intention,
+            target=target,
+            diffrence=diffrence,
+            target_metrics=target_metrics)
+        response = APIBackend().build_messages_and_create_chat_completion(user_prompt=user_prompt, system_prompt=system_prompt)

        self.knowledge = response
-        self.docs = docs
+        self.docs = practice_knowlege
        self.logger.info(f"Summary of {self.name}:\n{self.knowledge}")


@@ -483,27 +488,48 @@ class KnowledgeBase(SingletonBaseClass):
        # literal search/semantic search

        knowledge = self.get_knowledge(knowledge_type=knowledge_type)
-        if len(knowledge) == 0:
+        if len(knowledge) == 0 or knowledge_type == "infrastructure":
            return ""

+        if knowledge_type == "practice":
+            knowledge = [line for line in knowledge if line.startswith("practice_knowledge on")]
+
        scores = []
        for k in knowledge:
            scores.append(similarity(str(k), content))
        sorted_indexes = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
        similar_n_indexes = sorted_indexes[:n]
-        similar_n_docs = [knowledge[i] for i in similar_n_indexes]
+        similar_n_docs = "\n".join([knowledge[i] for i in similar_n_indexes])

-        prompt = Template(
+        user_prompt_template = Template(
 """
-find the most relevant doc with this query: '{{content}}' from docs='{{docs}}'. 
-Just return the most relevant item I provided, no more explain.
-please treat the docs as sentences and always response no less than 5 relevant sentences.
-List all the relevant sentences in number index without any interaction and conversation.
+query: '{{query}}' 
+paragraph:
+{{paragraph}}.
 """
        )
-        prompt_workflow_selection = prompt.render(content=content, docs=similar_n_docs)
+        user_prompt = user_prompt_template.render(query=content, paragraph=similar_n_docs)
+        system_prompt = """
+You are an assistant who find relevant sentences from a long paragraph to fit user's query sentence. Relevant means the sentence might provide userful information to explain user's query sentence. People after reading the relevant sentences might have a better understanding of the query sentence.
+
+Please response no less than ten sentences, if paragraph is not enough, you can return less than ten. Don't pop out irrelevant sentences. Please list the sentences in a number index instead of a whole paragraph.
+
+Example input:
+query: what is the best model for image classification?
+paragraph:
+Image classification is the process of identifying and categorizing objects within an image into different groups or classes.
+Machine learning is a type of artificial intelligence that enables computers to learn and make decisions without being explicitly programmed.
+The solar system is a collection of celestial bodies, including the Sun, planets, moons, and other objects, that orbit around the Sun due to its gravitational pull.
+A car is a wheeled vehicle, typically powered by an engine or electric motor, used for transportation of people and goods.
+ResNet, short for Residual Network, is a type of deep learning architecture designed to improve the accuracy and training speed of neural networks for image recognition tasks.
+
+Example output:
+1. ResNet, short for Residual Network, is a type of deep learning architecture designed to improve the accuracy and training speed of neural networks for image recognition tasks.
+2. Image classification is the process of identifying and categorizing objects within an image into different groups or classes.
+3. Machine learning is a type of artificial intelligence that enables computers to learn and make decisions without being explicitly programmed.
+"""
        response = APIBackend().build_messages_and_create_chat_completion(
-            user_prompt=prompt_workflow_selection, system_prompt="You are an excellent assistant."
+            user_prompt=user_prompt, system_prompt=system_prompt
        )

        return response
--- a/qlib/finco/knowledge/finance/gpt-generated.txt
+++ b/qlib/finco/knowledge/finance/gpt-generated.txt
@@ -36,6 +36,8 @@ However, incorporating a rolling meta-controller also comes with potential chall

 Meta controller like rolling or DDGDA is proved to be relevant to the maximum drawdown but effect is still unknown. Because seeing the latest data is always good to avoid some new patterns recently. A static model is not good at doing it.

+If rolling has been proved in practice knowledge that it's useful in minimizing max drawdown or maximize dropdown, always try DDGDA in the following experiments to further minimize the max drawdown.
+
 DDG-DA, which stands for Data Distribution Generation for Predictable Concept Drift Adaptation, is a research paper focused on addressing the challenge of concept drift in machine learning and data mining. Concept drift occurs when the statistical properties of a target variable change over time, causing the model's performance to degrade. This is a common issue in various domains, including finance, where market conditions and economic factors can change rapidly.

 The DDG-DA paper proposes a framework for generating synthetic datasets that simulate concept drift in a controlled and predictable manner. By creating these datasets, researchers can better understand how concept drift affects the performance of their machine learning models and develop strategies for adapting to these changes.
--- a/qlib/finco/llm.py
+++ b/qlib/finco/llm.py
@@ -1,11 +1,44 @@
+import re
 import os
 import time
 import openai
 import json
-from typing import Optional
+import yaml
+from typing import Optional, Tuple, Union
 from qlib.finco.conf import Config
 from qlib.finco.utils import SingletonBaseClass
 from qlib.finco.log import FinCoLog
+from qlib.config import DEFAULT_QLIB_DOT_PATH
+from pathlib import Path
+
+
+class ConvManager:
+    """
+    This is a conversation manager of LLM
+    It is for convenience of exporting conversation for debugging.
+    """
+
+    def __init__(self, path: Union[Path, str] = DEFAULT_QLIB_DOT_PATH / "llm_conv", recent_n: int = 10) -> None:
+        self.path = Path(path)
+        self.path.mkdir(parents=True, exist_ok=True)
+        self.recent_n = recent_n
+
+    def _rotate_files(self):
+        pairs = []
+        for f in self.path.glob("*.json"):
+            m = re.match(r"(\d+).json", f.name)
+            if m is not None:
+                n = int(m.group(1))
+                pairs.append((n, f))
+            pass
+        pairs.sort(key=lambda x: x[0])
+        for n, f in pairs[: self.recent_n][::-1]:
+            f.rename(self.path / f"{n+1}.json")
+
+    def append(self, conv: Tuple[list, str]):
+        self._rotate_files()
+        json.dump(conv, open(self.path / "0.json", "w"))
+        # TODO: reseve line breaks to make it more convient to edit file directly.


 class APIBackend(SingletonBaseClass):
@@ -54,6 +87,8 @@ class APIBackend(SingletonBaseClass):
        response = self.try_create_chat_completion(messages=messages, **kwargs)
        fcl.log_message(messages)
        fcl.log_response(response)
+        if self.debug_mode:
+            ConvManager().append((messages, response))
        return response

    def try_create_chat_completion(self, max_retry=10, **kwargs):
--- a/qlib/finco/log.py
+++ b/qlib/finco/log.py
@@ -2,6 +2,7 @@
 This module will base on Qlib's logger module and provides some interactive functions.
 """
 import logging
+import time

 from typing import Dict, List
 from qlib.finco.utils import SingletonBaseClass
@@ -58,15 +59,18 @@ def formatting_log(logger, title="Info"):
    """
    a context manager, print liens before and after a function
    """
-    length = {"Start": 120, "Task": 120, "Info": 60, "Interact": 60, "End": 120}.get(title, 60)
+    length = {"Start": 90, "Round": 90, "Task": 90, "Info": 60, "Interact": 60, "End": 90}.get(title, 60)
    color, bold = (
        (LogColors.YELLOW, LogColors.BOLD)
-        if title in ["Start", "Task", "Info", "Interact", "End"]
+        if title in ["Start", "Round", "Task", "Info", "Interact", "End"]
        else (LogColors.CYAN, "")
    )
    logger.info("")
    logger.info(f"{color}{bold}{'-'} {title} {'-' * (length - len(title))}{LogColors.END}")
+
    yield
+    if color == LogColors.YELLOW:
+        time.sleep(2)
    logger.info("")


@@ -109,6 +113,7 @@ class FinCoLog(SingletonBaseClass):
    def log_response(self, response: str):
        with formatting_log(self.logger, "GPT Response"):
            self.logger.info(f"{LogColors.CYAN}{response}{LogColors.END}\n")
+            time.sleep(1)

    # TODO:
    # It looks wierd if we only have logger
--- a/qlib/finco/prompt_cache0719.json
+++ b/qlib/finco/prompt_cache0719.json
--- a/qlib/finco/prompt_template.yaml
+++ b/qlib/finco/prompt_template.yaml
@@ -37,26 +37,19 @@ IdeaTask_system : |-
  Rolling or DDGDA is a kind of data controller which applys custom weight to data in time dimention. So set Data as target module when rolling or DDGDA is used in business level. Never do research both on rolling or DDGDA.
  We often use linear model as default model supervised learning because it trains very fast. If the user didn't plan to achieve very high accuracy, use default model and datahandler is a good choice to save time.

-  User will tell you the knowledge type and content in the conversation, if user said "following lists the {practice or finance} knowledge:", you should memorize and understand them then answer "OK" without any other words, finally, user will tell you the research intention, you should answer exactly the same format as the input without any interaction or conversation.
+  User will tell you the type and content of knowledge and the research intention, you should answer exactly the same format as the input without any interaction or conversation.

  Example input:
-  Input 1:
  following lists the practice knowledge:
  …
  …
-  Output 1:
-  OK

-  Input 2:
  following lists the finance knowledge:
  …
  …
-  Output 2:
-  OK
-  Input 3:
-  Research intention:
-  build an US stock market daily portfolio in quantitative investment and maximize the excess return.
-  Output 3:
+  Research intention: build an US stock market daily portfolio in quantitative investment and maximize the excess return.
+
+  Example output:
  Target: maximize the excess return
  Deliverables: a daily quantitative investment strategy in US stock market. A model will be included in the strategy.
  Thinking directions:
@@ -68,7 +61,11 @@ IdeaTask_system : |-
    Because the user wants to maximize the excess return and more complicated model often extracts more deep pattern from the data. So try a more complicated DNN model to get more excess return than a simple linear model.

 IdeaTask_user : |-
-  pass
+  following lists the practice knowledge:
+  {{ practice_knowledge }}
+  following lists the finance knowledge:
+  {{ finance_knowledge }}
+  Research intention: {{ user_intention }}

 HighLevelPlanTask_system: |- 
  You are an Quant investment Research and development assistant whose job is to determine high level plans to testify user's research intention.
@@ -1057,13 +1054,29 @@ ImplementActionTask_user : |-
  target component: {{target_component}}

 SummarizeTask_system : |-
-  You are an expert in quant domain.
-  Your task is to help user to analysis the output of qlib, your main focus is on the backtesting metrics of 
-  user strategies. Warnings reported during runtime can be ignored if deemed appropriate.
-  your information including the strategy's backtest log and runtime log. 
-  You may receive some scripts of the codes as well, you can use them to analysis the output.
-  At the same time, you can also use your knowledge of the Microsoft/Qlib project and finance to complete your tasks.
-  If there are any abnormal areas in the log or scripts, please also point them out.
+  You are an expert in quant domain. Your task is to help user to analyze the output of two experiments in Qlib, your main focus is on the backtesting metrics of user strategies.
+
+  User has conducted two experiments, which differs only in very small part. 
+  On each experiment, user will give you:
+  1. user's intention why doint these experiments
+  2. The id to differ the experiments
+  3. The yaml config of the experiment
+  4. A small description of the experiment
+  5. the backtest metrics of the experiment
+
+  Finally, user will tell you the targte of doing these experiments, difference between the two experiments and target metrics from the user.
+
+  User will provide a figure path which user has generated some images, please include them in your report.
+
+  You should understand user's intention and target, compare the relevant metrics of the two experiments based on user's intention, give conclusion to the target.
+
+  Please make a table to compare the metrics of two experiments, and make it easy to rean like calculating some increase or highlighting some key metrics.
+
+  You should make summarizations to each experiments, conclusions and recommendations to the further reseach experiments to the user and you should make the report longer.
+
+  Notice: 
+  1. max_drawdown might be presented in negative number or positive number, better max_drawdown (also known lower max_drawdown) means the abstract of it is small, so don't compare each max_drawdown with the number, use the abstract of it instead. This is very important because misunderstanding might cause totally wrong conclusion!!!
+  2. try not to say two experiments performs similar because small progress also means better, even two experiments performs similar, you should still point out who is better.
  
  Example output 1:
  The matrix in log shows that your strategy's max draw down is a bit large, based on your annualized return, 
@@ -1103,8 +1116,19 @@ SummarizeTask_system : |-
  Don't list data user doesn't provide.

 SummarizeTask_user : |-
-  Here is my information: '{{information}}'
-  My intention is: {{user_intention}}. Please provide me with a summary and   recommendation based on my intention and the information I have provided. There are some figures which absolute path are: {{figure_path}}, You must display these images in markdown using the appropriate image format.
+  Here is my results on two experiments:
+  experiment 1:
+  {{experiment_1_info}}
+  experiment 2:
+  {{experiment_2_info}}
+  target:
+  {{ target }}
+  difference:
+  {{ difference }}
+  target metrics:
+  {{ target_metrics }}
+  My intention is: {{user_intention}}. 
+  Please provide me with a summary and recommendation based on my intention and the information I have provided. There are some figures which absolute path are: {{figure_path}}, You must display these images in markdown using the appropriate image format.

 SummarizeTask_context_system : |-
  Your purpose is to find out the important information offered by user. You can just show the data provided by user in markdown format.
@@ -1131,11 +1155,38 @@ LearnManager_user : |-
  If you have no idea how to optimize the system prompt, you can just return the original system prompt.
  you will adjust {{task}}'s system prompt to:

-Topic_IC : |-
-  Summarize the influence of parameters on IC: {{docs}}. (Example response: Max draw-down become larger over time)
+Topic_user : |-
+  experiment 1:
+  {{experiment_1_info}}
+  experiment 2:
+  {{experiment_2_info}}
+  target:
+  {{ target }}
+  difference:
+  {{ difference }}
+  target metrics:
+  {{ target_metrics }}
+  My intention is: {{user_intention}}. 

-Topic_MaxDropDown : |-
-  Summarize the influence of parameters on max dropdown: {{docs}}. (Example response: Max draw-down become larger over time)
+Topic_system : |-
+  Your job is to summarize the influence of parameters on max dropdown.

-Topic_RollingModel : |-
-  What conclusion can you draw from: {{docs}}. Answer questions as concisely as possible. (Example response: rolling model is good at making the Max draw-down smaller.)
+  User has conducted two experiments, which differs only in very small part. 
+  On each experiment, user will give you:
+  1. user's intention why doint these experiments
+  2. The id to differ the experiments
+  3. The yaml config of the experiment
+  4. A small description of the experiment
+  5. the backtest metrics of the experiment
+
+  Finally, user will tell you the targte of doing these experiments, difference between the two experiments and target metrics from the user.
+
+  You should compare the metrics of two experiments and give a conclusion on the effection of the difference of the experiments on the topic {{ topic }}.
+
+  Notice: max_drawdown might be presented in negative number or positive number, better max_drawdown means the abstract of it is small, so don't compare each max_drawdown with the number, use the abstract of it instead. This is very important because misunderstanding might cause totally wrong conclusion!!!
+
+  After that, you should give a small explanation to your conclusion.
+
+  Example output format:
+  rolling model is good at making the Max draw-down smaller.
+  explanation: ...
--- a/qlib/finco/record.txt
+++ b/qlib/finco/record.txt
@@ -0,0 +1,2 @@
+conda activate qlib38
+python cli_learn.py "build an A-share stock market daily portfolio in quantitative investment and minimize the maximum drawdown while maintaining return."
--- a/qlib/finco/task.py
+++ b/qlib/finco/task.py
@@ -1,4 +1,5 @@
 import os
+import time

 from pathlib import Path
 import io
@@ -19,6 +20,7 @@ from qlib.workflow import R
 from qlib.finco.log import FinCoLog, LogColors
 from qlib.finco.conf import Config
 from qlib.finco.knowledge import KnowledgeBase
+from qlib.finco.utils import directory_tree

 from qlib.finco.context import Design, Exp, WorkflowContextManager

@@ -176,27 +178,23 @@ class IdeaTask(PlanTask):
        practice_knowledge = KnowledgeBase().query(knowledge_type=KnowledgeBase.KT_PRACTICE, content=user_intention)
        finance_knowledge = KnowledgeBase().query(knowledge_type=KnowledgeBase.KT_FINANCE, content=user_intention)

+        if practice_knowledge == "":
+            practice_knowledge = "So far no former experiments have been done, so no practice knowledge is available."
+        else:
+            practice_knowledge = "\nSo you can try other advanced algorithm."
+            finance_knowledge += "\n11. When rolling is proved to be working in reducing max drawdown, it's a good idea to try DDGDA as controllerto to further improve performance."
+
+
        system_prompt = self.system.render()

        former_messages = []
-        for knowlege in [practice_knowledge, finance_knowledge]:
-            if knowlege != '':
-                knowlege_type = "practice" if knowlege is practice_knowledge else "finance"
-                user_prompt = ""
-                user_prompt += f"following lists the {knowlege_type} knowledge:\n"
-                user_prompt += f"{knowlege}\n"
-                response = APIBackend().build_messages_and_create_chat_completion(
-                    user_prompt, system_prompt, former_messages=former_messages
-                )
-                assert "ok" in response.lower(), "The response is not ok"
-                self.save_chat_history_to_context_manager(
-                    user_prompt, response, system_prompt
-                )
-                former_messages = self._context_manager.get_context("chat_history")[self.__class__.__name__]['None'][1:]
-        user_prompt = f"""\nResearch intention:\n{user_intention}"""
+        user_prompt = self.user.render(practice_knowledge=practice_knowledge, finance_knowledge=finance_knowledge, user_intention=user_intention)
        response = APIBackend().build_messages_and_create_chat_completion(
            user_prompt, system_prompt, former_messages=former_messages
        )
+        self.save_chat_history_to_context_manager(user_prompt, response, system_prompt)
+        
+        time.sleep(3)

        re_search_pattern = f"Target: (.*)Deliverables:(.*)Thinking directions:(.*)Business level:(.*)Algorithm level:(.*)Details:(.*)"
        re_search_res = re.search(re_search_pattern, response, re.S)
@@ -260,7 +258,13 @@ class HighLevelPlanTask(PlanTask):
        ), "The response of config action task is not in the correct format"

        self._context_manager.set_context("high_level_workflow", res.group(1).strip())
+
        self._context_manager.set_context("high_level_experiments", res.group(2).strip())
+        experiment_description_search_res = re.search("1.(.*)2.(.*)", res.group(2).strip(), re.S)
+        assert experiment_description_search_res is not None, "The experiment description is not in the correct format"
+        self._context_manager.set_context("experiments_desc_1", experiment_description_search_res.group(1).strip())
+        self._context_manager.set_context("experiments_desc_2", experiment_description_search_res.group(2).strip())
+
        self._context_manager.set_context("high_level_metrics", res.group(3).strip())

        if "supervised learning" in self._context_manager.get_context("high_level_workflow").lower():
@@ -279,7 +283,7 @@ class SLPlanTask(PlanTask):

    def execute(self):
        workflow = self._context_manager.get_context("high_level_workflow")
-        assert workflow.lower() == "supervised learning", "The workflow is not supervised learning"
+        assert "supervised learning" in workflow.lower(), "The workflow is not supervised learning"

        target = self._context_manager.get_context("target")
        deliverable = self._context_manager.get_context("deliverable")
@@ -354,6 +358,7 @@ class SLPlanTask(PlanTask):
                assert decision in ["Default", "Personized"], f"The decision of {name} is not correct"
            # TODO: the strctured experiments should replace
            self._context_manager.struct_context.exp_list.append(exp)
+        self._context_manager.set_context("experiments_difference", match_res.group(experiment_count + 1))

        # 1) create a workspace
        # TODO: we have to make choice between `sl` and  `sl-cfg`
@@ -413,7 +418,7 @@ class TrainTask(Task):

    def execute(self):
        workflow_config = f"experiment_{self._experiment_index}.yaml"
-
+        time.sleep(2)
        workspace = self._context_manager.get_context("workspace")
        workflow_path = workspace.joinpath(workflow_config)
        with workflow_path.open() as f:
@@ -433,6 +438,8 @@ class TrainTask(Task):
        R.set_uri(Path(workspace).joinpath("mlruns").as_uri())
        if not self._rolling:
            command = f"qrun {str(workflow_path)}"
+            self.logger.plain_info(f"Run the command: {command}")
+            
            try:
                # Run the command and capture the output
                workspace = self._context_manager.get_context("workspace")
@@ -473,29 +480,36 @@ class TrainTask(Task):
                    return ret_list
        elif not self._ddgda:
            command = f"python -m qlib.contrib.rolling base --conf_path {workflow_path} run"
+            self.logger.plain_info(f"Run the command: {command}")
            # Run the command and capture the output
            workspace = self._context_manager.struct_context.workspace
            subprocess.run(
                command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True, cwd=str(workspace), shell=True
            )
            # todo: dont manage record by id, experiment_id=2 doesnt contains metrics
-            try:
-                exp = R.get_exp(experiment_id="3")
-            except qlib.utils.exceptions.ExpAlreadyExistError:
-                exp = R.get_exp(experiment_id="2")
+            exp = R.get_exp(experiment_name="Experiment")

        else:
            command = f"python -m qlib.contrib.rolling ddgda --conf_path {workflow_path} run"
+            self.logger.plain_info(f"Run the command: {command}")
            # Run the command and capture the output
            workspace = self._context_manager.struct_context.workspace
            subprocess.run(
                command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True, cwd=str(workspace), shell=True
            )
-            try:
-                exp = R.get_exp(experiment_id="3")
-            except qlib.utils.exceptions.ExpAlreadyExistError:
-                exp = R.get_exp(experiment_id="2")
+            exp = R.get_exp(experiment_name="Experiment")
+            
+        with open(rf"{workspace}/script.sh", "a") as fw:
+            fw.write(command)
+            fw.write("\n")
+            fw.write("\n")
+            fw.flush()

+        with open(rf"{workspace}/README.md", "w") as fw:
+            fw.write(f"\n")
+            fw.flush()
+
+        self.logger.plain_info(f"Workspace output:\n{directory_tree(workspace, max_depth=1)}")
        # first recorder is the latest
        recorder = exp.list_recorders(rtype=exp.RT_L)[0]
        self._context_manager.set_context(f"experiment_{self._experiment_index}_recorder", recorder)
@@ -742,6 +756,8 @@ class HyperparameterFinetuneActionTask(ActionTask):
            self._context_manager.set_context(f"experiment_{experiment_id}_ddgda", ddgda_res)
            self._context_manager.set_context(f"experiment_{experiment_id}_config_finetune_reason", reason_res)

+        import shutil
+        shutil.copytree(r"/home/xuyang/workspace/qlib/qlib/finco/mlruns", r"/home/xuyang/workspace/qlib/qlib/finco/finco_workspace/mlruns")
        return return_tasks


@@ -1160,16 +1176,12 @@ class SummarizeTask(Task):
    def execute(self) -> Any:
        workspace = self._context_manager.get_context("workspace")
        user_intention = self._context_manager.get_context("user_intention")
-
-        file_info = self.get_info_from_file(workspace)
-        context_info = self.get_info_from_context()  # too long context make response unstable.
+        target = self._context_manager.get_context(f"target")
+        diffrence = self._context_manager.get_context(f"experiments_difference")
+        target_metrics = self._context_manager.get_context(f"high_level_metrics")

        figure_path = self.get_figure_path(workspace)

-        # todo: remove 'be' after test
-        be = APIBackend()
-        be.debug_mode = False
-
        def _get_value_from_info(info: list, k: str):
            for i in info:
                if k in i.keys():
@@ -1179,40 +1191,55 @@ class SummarizeTask(Task):
        experiment_count = self._context_manager.get_context("experiment_count")
        for exp_id in range(1, experiment_count + 1):
            recorder = self._context_manager.get_context(f"experiment_{exp_id}_recorder")
-            reason = self._context_manager.get_context(f"experiment_{exp_id}_config_finetune_reason")
+            experiments_desc = self._context_manager.get_context(f"experiments_desc_{exp_id}")
            workflow_yaml = self._context_manager.get_context(f"workflow_{exp_id}_yaml")
            record_info = [{"metrics": recorder.list_metrics()}]

-            information = context_info + file_info + record_info
+            # information = context_info + file_info + record_info

-            context_summary = {}
-            for key in self.__DEFAULT_SUMMARIZE_CONTEXT:
-                prompt_workflow_selection = self.summarize_context_user.render(
-                    key=key, value=_get_value_from_info(info=information, k=key)
-                )
-                response = be.build_messages_and_create_chat_completion(
-                    user_prompt=prompt_workflow_selection, system_prompt=self.summarize_context_system.render()
-                )
-                context_summary.update({key: response})
+            # context_summary = {}
+            # for key in self.__DEFAULT_SUMMARIZE_CONTEXT:
+            #     prompt_workflow_selection = self.summarize_context_user.render(
+            #         key=key, value=_get_value_from_info(info=information, k=key)
+            #     )
+            #     response = be.build_messages_and_create_chat_completion(
+            #         user_prompt=prompt_workflow_selection, system_prompt=self.summarize_context_system.render()
+            #     )
+            #     context_summary.update({key: response})

-            recorder.save_objects(context_summary=context_summary)
+            # recorder.save_objects(context_summary=context_summary)

            prompt_workflow_selection = self.summarize_metrics_user.render(
                information=_get_value_from_info(info=record_info, k="metrics"), user_prompt=user_intention
            )
-            metrics_response = be.build_messages_and_create_chat_completion(
+            metrics_response = APIBackend().build_messages_and_create_chat_completion(
                user_prompt=prompt_workflow_selection, system_prompt=self.summarize_metrics_system.render()
            )

-            KnowledgeBase().practice_knowledge.add([{"user_intention": user_intention, "experiment_id": exp_id,
-                                                     "workflow": workflow_yaml, "reason": reason,
-                                                     "experiment_metrics": metrics_response}])
+            experiment_practice_knowledge = f"""
+user_intention: {user_intention},
+experiment_id: {exp_id},
+workflow yaml: 
+```yaml
+{yaml.safe_dump(workflow_yaml)},
+```
+experiments description: 
+{experiments_desc},
+experiment_metrics: 
+{metrics_response}
+"""
+            KnowledgeBase().practice_knowledge.add([experiment_practice_knowledge])

        prompt_workflow_selection = self.user.render(
-            information=file_info + KnowledgeBase().practice_knowledge.knowledge[-2:],
-            figure_path=figure_path, user_prompt=user_intention
+            experiment_1_info = KnowledgeBase().practice_knowledge.knowledge[-2],
+            experiment_2_info = KnowledgeBase().practice_knowledge.knowledge[-1],
+            figure_path=figure_path, 
+            user_intention=user_intention,
+            target=target,
+            diffrence=diffrence,
+            target_metrics=target_metrics
        )
-        response = be.build_messages_and_create_chat_completion(
+        response = APIBackend().build_messages_and_create_chat_completion(
            user_prompt=prompt_workflow_selection, system_prompt=self.system.render()
        )
        self._context_manager.set_context("summary", response)
--- a/qlib/finco/tpl/sl/workflow_config.yaml
+++ b/qlib/finco/tpl/sl/workflow_config.yaml
@@ -10,6 +10,7 @@ data_handler_config: &data_handler_config
    fit_start_time: 2008-01-01
    fit_end_time: 2014-12-31
    instruments: *market
+    label: ["Ref($close, -21) / Ref($close, -1) - 1"]
    infer_processors:
        - class: RobustZScoreNorm
          kwargs:
--- a/qlib/finco/utils.py
+++ b/qlib/finco/utils.py
@@ -2,6 +2,8 @@ import json
 import string
 import random

+from typing import List
+from pathlib import Path
 from fuzzywuzzy import fuzz


@@ -44,3 +46,26 @@ def similarity(text1, text2):
 def random_string(length=10):
    letters = string.ascii_letters + string.digits
    return "".join(random.choice(letters) for i in range(length))
+
+
+def directory_tree(root_dif, max_depth=None):
+
+    def _directory_tree(root_dir, padding="", deep=1, max_d=None) -> List:
+        _output = []
+        if max_d and deep > max_d:
+            return _output
+
+        files = sorted(root_dir.iterdir())
+        for i, file in enumerate(files):
+            if i == len(files) - 1:
+                _output.append(padding + '└── ' + file.name)
+                if file.is_dir():
+                    _output.extend(_directory_tree(file, padding + "    ", deep=deep + 1, max_d=max_d))
+            else:
+                _output.append(padding + '├── ' + file.name)
+                if file.is_dir():
+                    _output.extend(_directory_tree(file, padding + "│   ", deep=deep + 1, max_d=max_d))
+        return _output
+
+    output = _directory_tree(root_dif, max_d=max_depth)
+    return '\n'.join(output)
--- a/qlib/finco/workflow.py
+++ b/qlib/finco/workflow.py
@@ -1,4 +1,5 @@
 import sys
+import time
 import shutil
 from typing import List

@@ -55,7 +56,7 @@ class WorkflowManager:
        self.prompt_template = PromptTemplate()
        self.context = WorkflowContextManager(workspace=self._workspace)
        self.context.set_context("workspace", self._workspace)
-        self.default_user_prompt = "build an A-share stock market daily portfolio in quantitative investment and minimize the maximum drawdown."
+        self.default_user_prompt = "build an A-share stock market daily portfolio in quantitative investment and minimize the maximum drawdown while maintaining return."

    def _confirm_and_rm(self):
        # if workspace exists, please confirm and remove it. Otherwise exit.
@@ -146,20 +147,21 @@ class WorkflowManager:


 class LearnManager:
-    __DEFAULT_TOPICS = ["IC", "MaxDropDown", "RollingModel"]
+    __DEFAULT_TOPICS = ["RollingModel"]

    def __init__(self):
        self.epoch = 0
        self.wm = WorkflowManager()

        self.topics = [
-            Topic(name=topic, describe=self.wm.prompt_template.get(f"Topic_{topic}")) for topic in self.__DEFAULT_TOPICS
+            Topic(name=topic, system=self.wm.prompt_template.get(f"Topic_system"), user=self.wm.prompt_template.get(f"Topic_user")) for topic in self.__DEFAULT_TOPICS
        ]
        self.knowledge_base = KnowledgeBase()

    def run(self, prompt):
        # todo: add early stop condition
        for i in range(10):
+            self.wm.logger.info(f"Round: {self.epoch+1}", title="Round")
            self.wm.run(prompt)
            self.learn()
            self.epoch += 1
@@ -181,25 +183,30 @@ class LearnManager:
        user_intention = self.wm.context.get_context("user_intention")
        summary = self.wm.context.get_context("summary")

-        [topic.summarize(self.knowledge_base.practice_knowledge.knowledge[-2:]) for topic in self.topics]
-        [self.knowledge_base.practice_knowledge.add([{"practice_knowledge": topic.knowledge}]) for topic in self.topics]
-        knowledge_of_topics = [{topic.name: topic.knowledge} for topic in self.topics]
+        
+        target = self.wm.context.get_context(f"target")
+        diffrence = self.wm.context.get_context(f"experiments_difference")
+        target_metrics = self.wm.context.get_context(f"high_level_metrics")

-        for task in task_finished:
-            prompt_workflow_selection = self.wm.prompt_template.get(f"{self.__class__.__name__}_user").render(
-                summary=summary,
-                brief=knowledge_of_topics,
-                task_finished=[str(t) for t in task_finished],
-                task=task.__class__.__name__, system=task.system.render(), user_intention=user_intention
-            )
+        [topic.summarize(self.knowledge_base.practice_knowledge.knowledge[-2:], user_intention, target, diffrence, target_metrics) for topic in self.topics]
+        [self.knowledge_base.practice_knowledge.add([f"practice_knowledge on {topic.name}:\,{topic.knowledge}"]) for topic in self.topics]
+        # knowledge_of_topics = [{topic.name: topic.knowledge} for topic in self.topics]

-            response = APIBackend().build_messages_and_create_chat_completion(
-                user_prompt=prompt_workflow_selection,
-                system_prompt=self.wm.prompt_template.get(f"{self.__class__.__name__}_system").render(),
-            )
+        # for task in task_finished:
+        #     prompt_workflow_selection = self.wm.prompt_template.get(f"{self.__class__.__name__}_user").render(
+        #         summary=summary,
+        #         brief=knowledge_of_topics,
+        #         task_finished=[str(t) for t in task_finished],
+        #         task=task.__class__.__name__, system=task.system.render(), user_intention=user_intention
+        #     )

-            # todo: response assertion
-            task.prompt_template.update(key=f"{task.__class__.__name__}_system", value=Template(response))
+        #     response = APIBackend().build_messages_and_create_chat_completion(
+        #         user_prompt=prompt_workflow_selection,
+        #         system_prompt=self.wm.prompt_template.get(f"{self.__class__.__name__}_system").render(),
+        #     )
+
+        #     # todo: response assertion
+        #     task.prompt_template.update(key=f"{task.__class__.__name__}_system", value=Template(response))

        self.wm.prompt_template.save(Path.joinpath(workspace, f"prompts/checkpoint_{self.epoch}.yml"))
        self.wm.context.clear(reserve=["workspace"])
--- a/qlib/utils/mod.py
+++ b/qlib/utils/mod.py
@@ -206,6 +206,9 @@ def find_all_classes(module_path: Union[str, ModuleType], cls: type) -> List[typ
        >>> from qlib.data.dataset.handler import DataHandler
        >>> find_all_classes("qlib.contrib.data.handler", DataHandler)
        [<class 'qlib.contrib.data.handler.Alpha158'>, <class 'qlib.contrib.data.handler.Alpha158vwap'>, <class 'qlib.contrib.data.handler.Alpha360'>, <class 'qlib.contrib.data.handler.Alpha360vwap'>, <class 'qlib.data.dataset.handler.DataHandlerLP'>]
+        >>> from qlib.contrib.rolling.base import Rolling
+        >>> find_all_classes("qlib.contrib.rolling", Rolling)
+        [<class 'qlib.contrib.rolling.base.Rolling'>, <class 'qlib.contrib.rolling.ddgda.DDGDA'>]

    TODO:
    - skip import error
@@ -220,7 +223,7 @@ def find_all_classes(module_path: Union[str, ModuleType], cls: type) -> List[typ

    def _append_cls(obj):
        # Leverage the closure trick to reuse code
-        if isinstance(obj, type) and issubclass(obj, cls) and cls not in cls_list:
+        if isinstance(obj, type) and issubclass(obj, cls) and obj not in cls_list:
            cls_list.append(obj)

    for attr in dir(mod):
--- a/tests/finco/knowledge/execute/storage.yml
+++ b/tests/finco/knowledge/execute/storage.yml
@@ -0,0 +1,4 @@
+- content: '[Success]: XXXX, the results looks reasonable  # Keywords: supervised
+    learning, data'
+- content: '[Fail]: XXXX, it raise memory error due to  YYYYY  # Keywords: supervised
+    learning, data'
--- a/tests/finco/knowledge/finance/storage.yml
+++ b/tests/finco/knowledge/finance/storage.yml
@@ -0,0 +1 @@
+[]
--- a/tests/finco/knowledge/infrastructure/storage.yml
+++ b/tests/finco/knowledge/infrastructure/storage.yml
--- a/tests/finco/test_sumarize.py
+++ b/tests/finco/test_sumarize.py
@@ -9,6 +9,7 @@ from qlib.finco.task import SummarizeTask
 from qlib.finco.workflow import WorkflowContextManager
 from qlib.finco.llm import APIBackend
 from qlib.finco.workflow import WorkflowManager
+from qlib.finco.knowledge import PracticeKnowledge, YamlStorage

 load_dotenv(verbose=True, override=True)

@@ -61,6 +62,9 @@ class TestSummarize(unittest.TestCase):
        resp = task.get_info_from_file("")
        print(resp)

+    def test_practice_knowledge(self):
+        pk = PracticeKnowledge(YamlStorage(path.joinpath(Path.cwd().joinpath("knowledge")/f"{self.KT_PRACTICE}/{YamlStorage.DEFAULT_NAME}")))
+        pk.add(["test1", "test2"])

 if __name__ == "__main__":
    unittest.main()
Author	SHA1	Message	Date
Fivele-Li	5af99e1d3f	optimize log (#1612 )	2023-08-01 18:57:48 +08:00
Fivele-Li	70a066baf8	optimize workflow and output format	2023-07-20 12:15:04 +08:00
Xu Yang	f93f331a3b	Merge pull request #1609 from microsoft/xuyang1/finetune_prompts finetune prompts	2023-07-19 20:01:07 +08:00
Xu Yang	561086d9e1	commit	2023-07-19 20:00:09 +08:00
Young	8eb129358b	Add prompt logger	2023-07-18 21:47:58 +08:00
Xu Yang	ce8cb517e9	hot fix one small bug in template	2023-07-18 11:52:43 +08:00
Xu Yang	1c5a73aa81	small refinement in finance knowledge	2023-07-17 21:33:40 +08:00
Xu Yang	d909d54362	Merge pull request #1603 from microsoft/xuyang1/add_idea_task add idea task and round1	2023-07-17 20:38:43 +08:00
you-n-g	b21e044513	Fix find class bug (#1601 )	2023-07-17 20:09:13 +08:00