optimize log

Merge pull request #1609 from microsoft/xuyang1/finetune_prompts
finetune prompts
2026-07-21 19:27:36 +08:00 · 2023-07-20 12:45:07 +08:00 · 2023-07-19 20:01:07 +08:00 · 2023-07-19 20:00:09 +08:00 · 2023-07-18 21:47:58 +08:00 · 2023-07-18 11:52:43 +08:00
33 changed files with 4441 additions and 6 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,10 @@ dist/
 qlib/VERSION.txt
 qlib/data/_libs/expanding.cpp
 qlib/data/_libs/rolling.cpp
+qlib/finco/prompt_cache.json
+qlib/finco/finco_workspace/
+qlib/finco/knowledge/*/knowledge.pkl
+qlib/finco/knowledge/*/storage.yml
 examples/estimator/estimator_example/
 examples/rl/data/
 examples/rl/checkpoints/
--- a/qlib/config.py
+++ b/qlib/config.py
@@ -486,5 +486,8 @@ class QlibConfig(Config):
        return self._registered


+DEFAULT_QLIB_DOT_PATH = Path("~/.qlib/").expanduser()
+
+
 # global config
 C = QlibConfig(_default_config)
--- a/qlib/contrib/analyzer.py
+++ b/qlib/contrib/analyzer.py
@@ -0,0 +1,111 @@
+import logging
+import matplotlib.pyplot as plt
+from pathlib import Path
+import numpy as np
+
+from ..log import get_module_logger
+from ..contrib.eva.alpha import calc_ic, calc_long_short_return, calc_long_short_prec
+
+logger = get_module_logger("analysis", logging.INFO)
+
+
+class AnalyzerTemp:
+    def __init__(self, recorder, output_dir=None, **kwargs):
+        self.recorder = recorder
+        self.output_dir = Path(output_dir) if output_dir else "./"
+
+    def load(self, name: str):
+        """
+        It behaves the same as self.recorder.load_object.
+        But it is an easier interface because users don't have to care about `get_path` and `artifact_path`
+
+        Parameters
+        ----------
+        name : str
+            the name for the file to be load.
+
+        Return
+        ------
+        The stored records.
+        """
+        return self.recorder.load_object(name)
+
+    def analyse(self, **kwargs):
+        """
+        Analyse data index, distribution .etc
+
+        Parameters
+        ----------
+
+
+        Return
+        ------
+        The handled data.
+        """
+        raise NotImplementedError(f"Please implement the `analysis` method.")
+
+
+class HFAnalyzer(AnalyzerTemp):
+    """
+    This is the Signal Analysis class that generates the analysis results such as IC and IR.
+
+    default output image filename is "HFAnalyzerTable.jpeg"
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def analyse(self):
+        pred = self.load("pred.pkl")
+        label = self.load("label.pkl")
+
+        long_pre, short_pre = calc_long_short_prec(pred.iloc[:, 0], label.iloc[:, 0], is_alpha=True)
+        ic, ric = calc_ic(pred.iloc[:, 0], label.iloc[:, 0])
+        metrics = {
+            "IC": ic.mean(),
+            "ICIR": ic.mean() / ic.std(),
+            "Rank IC": ric.mean(),
+            "Rank ICIR": ric.mean() / ric.std(),
+            "Long precision": long_pre.mean(),
+            "Short precision": short_pre.mean(),
+        }
+
+        long_short_r, long_avg_r = calc_long_short_return(pred.iloc[:, 0], label.iloc[:, 0])
+        metrics.update(
+            {
+                "Long-Short Average Return": long_short_r.mean(),
+                "Long-Short Average Sharpe": long_short_r.mean() / long_short_r.std(),
+            }
+        )
+
+        table = [[k, v] for (k, v) in metrics.items()]
+        plt.table(cellText=table, loc="center")
+        plt.axis("off")
+        plt.savefig(self.output_dir.joinpath("HFAnalyzerTable.jpeg"))
+        plt.clf()
+
+        plt.scatter(np.arange(0, len(pred)), pred.iloc[:, 0])
+        plt.scatter(np.arange(0, len(label)), label.iloc[:, 0])
+        plt.title("HFAnalyzer")
+        plt.savefig(self.output_dir.joinpath("HFAnalyzer.jpeg"))
+        return "HFAnalyzer.jpeg"
+
+
+class SignalAnalyzer(AnalyzerTemp):
+    """
+    This is the Signal Analysis class that generates the analysis results such as IC and IR.
+
+    default output image filename is "signalAnalysis.jpeg"
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def analyse(self, dataset=None, **kwargs):
+        label = self.load("label.pkl")
+
+        plt.hist(label)
+        plt.title("SignalAnalyzer")
+        plt.savefig(self.output_dir.joinpath("signalAnalysis.jpeg"))
+
+        return "signalAnalysis.jpeg"
--- a/qlib/contrib/data/handler.py
+++ b/qlib/contrib/data/handler.py
@@ -1,6 +1,8 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.

+from typing import Optional
+from qlib.utils.data import update_config
 from ...data.dataset.handler import DataHandlerLP
 from ...data.dataset.processor import Processor
 from ...utils import get_callable_kwargs
@@ -57,12 +59,13 @@ class Alpha360(DataHandlerLP):
        fit_end_time=None,
        filter_pipe=None,
        inst_processors=None,
+        data_loader: Optional[dict] = None,
        **kwargs
    ):
        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
        learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)

-        data_loader = {
+        _data_loader = {
            "class": "QlibDataLoader",
            "kwargs": {
                "config": {
@@ -74,12 +77,14 @@ class Alpha360(DataHandlerLP):
                "inst_processors": inst_processors,
            },
        }
+        if data_loader is not None:
+            update_config(_data_loader, data_loader)

        super().__init__(
            instruments=instruments,
            start_time=start_time,
            end_time=end_time,
-            data_loader=data_loader,
+            data_loader=_data_loader,
            learn_processors=learn_processors,
            infer_processors=infer_processors,
            **kwargs
@@ -153,12 +158,13 @@ class Alpha158(DataHandlerLP):
        process_type=DataHandlerLP.PTYPE_A,
        filter_pipe=None,
        inst_processors=None,
+        data_loader: Optional[dict] = None,
        **kwargs
    ):
        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
        learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)

-        data_loader = {
+        _data_loader = {
            "class": "QlibDataLoader",
            "kwargs": {
                "config": {
@@ -170,11 +176,13 @@ class Alpha158(DataHandlerLP):
                "inst_processors": inst_processors,
            },
        }
+        if data_loader is not None:
+            update_config(_data_loader, data_loader)
        super().__init__(
            instruments=instruments,
            start_time=start_time,
            end_time=end_time,
-            data_loader=data_loader,
+            data_loader=_data_loader,
            infer_processors=infer_processors,
            learn_processors=learn_processors,
            process_type=process_type,
--- a/qlib/finco/.env.example
+++ b/qlib/finco/.env.example
@@ -0,0 +1,20 @@
+
+OPENAI_API_KEY=your_api_key
+
+# USE_AZURE=True
+# AZURE_API_BASE=your_api_base
+# AZURE_API_VERSION=your_api_version
+
+# use gpt-4 means more token but more wait time
+# MODEL=gpt-4
+# MAX_TOKENS=1600
+# MAX_RETRY=1000
+
+
+MAX_TOKENS=1600
+MAX_RETRY=120
+
+CONTINOUS_MODE=True
+DEBUG_MODE=True
+
+# TEMPERATURE=
--- a/qlib/finco/README.md
+++ b/qlib/finco/README.md
@@ -0,0 +1,22 @@
+# This is an experimental branch of "`FI`nancial `CO`pilot of `Qlib`"
+
+## Installation
+
+- To run this module, you need to first install Qlib following the instruction in [install-from-source](/README.md#install-from-source) or follow:
+
+```python
+python -m pip install git+https://github.com/microsoft/qlib.git@finco
+```
+
+- then you need to install other dependencies of finco:
+```python
+python -m pip install pydantic openai python-dotenv
+```
+
+## Quick run
+
+To run this module, you can start the workflow easily with one command:
+
+```sh
+cd qlib/finco; python cli.py "your prompt"
+```
--- a/qlib/finco/init.py
+++ b/qlib/finco/init.py
@@ -0,0 +1,13 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+from pathlib import Path
+
+DIRNAME = Path(__file__).absolute().resolve().parent
+
+
+def get_finco_path() -> Path:
+    """
+    return the template path
+    Because the template path is located in the folder. We don't know where it is located. So __file__ for this module will be used.
+    """
+    return DIRNAME
--- a/qlib/finco/cli.py
+++ b/qlib/finco/cli.py
@@ -0,0 +1,15 @@
+import fire
+from qlib.finco.workflow import WorkflowManager
+from dotenv import load_dotenv
+from qlib import auto_init
+
+
+def main(prompt=None):
+    load_dotenv(verbose=True, override=True)
+    wm = WorkflowManager()
+    wm.run(prompt)
+
+
+if __name__ == "__main__":
+    auto_init()
+    fire.Fire(main)
--- a/qlib/finco/cli_learn.py
+++ b/qlib/finco/cli_learn.py
@@ -0,0 +1,15 @@
+import fire
+from qlib.finco.workflow import LearnManager
+from dotenv import load_dotenv
+from qlib import auto_init
+
+
+def main(prompt=None):
+    load_dotenv(verbose=True, override=True)
+    lm = LearnManager()
+    lm.run(prompt)
+
+
+if __name__ == "__main__":
+    auto_init()
+    fire.Fire(main)
--- a/qlib/finco/conf.py
+++ b/qlib/finco/conf.py
@@ -0,0 +1,32 @@
+# TODO: use pydantic for other modules in Qlib
+# from pydantic_settings import BaseSettings
+from qlib.finco.utils import SingletonBaseClass
+
+import os
+
+
+class Config(SingletonBaseClass):
+    """
+    This config is for fast demo purpose.
+    Please use BaseSettings insetead in the future
+    """
+
+    def __init__(self):
+        self.use_azure = os.getenv("USE_AZURE") == "True"
+        self.temperature = 0.5 if os.getenv("TEMPERATURE") is None else float(os.getenv("TEMPERATURE"))
+        self.max_tokens = 800 if os.getenv("MAX_TOKENS") is None else int(os.getenv("MAX_TOKENS"))
+
+        self.openai_api_key = os.getenv("OPENAI_API_KEY")
+        self.use_azure = os.getenv("USE_AZURE") == "True"
+        self.azure_api_base = os.getenv("AZURE_API_BASE")
+        self.azure_api_version = os.getenv("AZURE_API_VERSION")
+        self.model = os.getenv("MODEL") or ("gpt-35-turbo" if self.use_azure else "gpt-3.5-turbo")
+
+        self.max_retry = int(os.getenv("MAX_RETRY")) if os.getenv("MAX_RETRY") is not None else None
+
+        self.continuous_mode = (
+            os.getenv("CONTINOUS_MODE") == "True" if os.getenv("CONTINOUS_MODE") is not None else False
+        )
+        self.debug_mode = os.getenv("DEBUG_MODE") == "True" if os.getenv("DEBUG_MODE") is not None else False
+        self.workspace = os.getenv("WORKSPACE") if os.getenv("WORKSPACE") is not None else "./finco_workspace"
+        self.max_past_message_include = int(os.getenv("MAX_PAST_MESSAGE_INCLUDE") or 6) // 2 * 2
--- a/qlib/finco/context.py
+++ b/qlib/finco/context.py
@@ -0,0 +1,97 @@
+from dataclasses import dataclass, field
+import copy
+from pathlib import Path
+from typing import Optional, List
+from qlib.finco.log import FinCoLog
+from qlib.typehint import Literal
+
+from qlib.finco.utils import similarity
+
+
+@dataclass
+class Design:
+    plan: str
+    classes: str
+    decision: str
+
+
+@dataclass
+class Exp:
+    """Experiment"""
+
+    # compoments
+    dataset: Optional[Design] = None
+    datahandler: Optional[Design] = None
+    model: Optional[Design] = None
+    record: Optional[Design] = None
+    strategy: Optional[Design] = None
+    backtest: Optional[Design] = None
+
+    # basic
+    template: Optional[Path] = None
+
+    # rolling strategy. None indicates no rolling
+    rolling: Optional[Literal["base", "ddgda"]] = None
+
+
+@dataclass
+class StructContext:
+    """Part of the context have clear meaning and structure, so they will be saved here and can be easily retrieved and understood"""
+
+    # TODO: move more content in WorkflowContextManager.context to here
+    workspace: Path
+    exp_list: List[Exp] = field(default_factory=list)  # the planned experiments
+
+
+class WorkflowContextManager:
+    """Context Manager stores the context of the workflow"""
+
+    """All context are key value pairs which saves the input, output and status of the whole workflow"""
+
+    def __init__(self, workspace: Path) -> None:
+        self.context = {}
+        self.logger = FinCoLog()
+        # this context is public
+        self.struct_context = StructContext(workspace)  # TODO: move more content in context to here
+        self.set_context("workspace", workspace)  # TODO: remove me
+
+    def set_context(self, key, value):
+        if key in self.context:
+            self.logger.warning("The key already exists in the context, the value will be overwritten")
+        self.context[key] = value
+
+    def get_context(self, key):
+        # NOTE: if the key doesn't exist, return None. In the future, we may raise an error to detect abnormal behavior
+        if key not in self.context:
+            self.logger.warning("The key doesn't exist in the context")
+            return None
+        return self.context[key]
+
+    def update_context(self, key, new_value):
+        # NOTE: if the key doesn't exist, return None. In the future, we may raise an error to detect abnormal behavior
+        if key not in self.context:
+            self.logger.warning("The key doesn't exist in the context")
+        self.context.update({key: new_value})
+
+    def get_all_context(self):
+        """return a deep copy of the context"""
+        """TODO: do we need to return a deep copy?"""
+        return copy.deepcopy(self.context)
+
+    def retrieve(self, query: str) -> dict:
+        if query in self.context.keys():
+            return {query: self.context.get(query)}
+
+        # Note: retrieve information from context by string similarity maybe abandon in future
+        scores = {}
+        for k, v in self.context.items():
+            scores.update({k: max(similarity(query, k), similarity(query, v))})
+        max_score_key = max(scores, key=scores.get)
+        return {max_score_key: self.context.get(max_score_key)}
+
+    def clear(self, reserve: list = None):
+        if reserve is None:
+            reserve = []
+
+        _context = {k: self.get_context(k) for k in reserve}
+        self.context = _context
--- a/qlib/finco/knowledge.py
+++ b/qlib/finco/knowledge.py
@@ -0,0 +1,539 @@
+from pathlib import Path
+from jinja2 import Template
+from typing import List, Union
+import pickle
+import yaml
+
+from qlib.workflow import R
+from qlib.finco.log import FinCoLog
+from qlib.finco.llm import APIBackend
+from qlib.finco.utils import similarity, random_string, SingletonBaseClass
+
+logger = FinCoLog()
+
+
+class Storage:
+    """
+    This class is responsible for storage and loading of Knowledge related data.
+
+    """
+
+    def __init__(self, path: Union[str, Path], name: str = None):
+        self.path = path if isinstance(path, Path) else Path(path)
+        self.name = name if name else self.path.name
+        self.source = None
+
+        # todo: get document by key
+        self.documents = []
+
+    def add(self, documents: List):
+        self.documents.extend(documents)
+        self.save()
+
+    def load(self, **kwargs):
+        raise NotImplementedError(f"Please implement the `load` method.")
+
+    def save(self, **kwargs):
+        raise NotImplementedError(f"Please implement the `save` method.")
+
+
+class PickleStorage(Storage):
+    """
+    This class is responsible for storage and loading of Knowledge related data in pickle format.
+
+    """
+
+    def __init__(self, path: Union[str, Path]):
+        super().__init__(path)
+
+    @classmethod
+    def load(cls, path: Union[str, Path]):
+        """use pickle as the default load method"""
+        path = path if isinstance(path, Path) else Path(path)
+        with open(path, "rb") as f:
+            return pickle.load(f)
+
+    def save(self, **kwargs):
+        """use pickle as the default save method"""
+        Path.mkdir(self.path.parent, exist_ok=True)
+        with open(self.path, "wb") as f:
+            pickle.dump(self, f)
+
+
+class YamlStorage(Storage):
+    """
+    This class is responsible for storage and loading of Knowledge related data in yaml format.
+
+    """
+
+    DEFAULT_NAME = "storage.yml"
+
+    def __init__(self, path: Union[str, Path]):
+        super().__init__(path)
+        assert self.path.name, "Yaml storage should specify file name."
+        self.load()
+
+    def load(self):
+        """load data from yaml format file"""
+        try:
+            self.documents = yaml.safe_load(self.path.open())
+        except FileNotFoundError:
+            logger.warning(f"YamlStorage: file {self.path} doesn't exist.")
+
+    def save(self, **kwargs):
+        """use pickle as the default save method"""
+        Path.mkdir(self.path.parent, exist_ok=True, parents=True)
+        with open(self.path, 'w') as f:
+            yaml.dump(self.documents, f)
+
+
+class ExperimentStorage(Storage):
+    """
+    This class is responsible for storage and loading of mlflow related data.
+
+    """
+
+    def __init__(self, exp_name, path=None):
+        super().__init__(path=path)
+        self.exp_name = exp_name
+        self.exp = None
+        self.recs = []
+        self.docs = []
+
+    def load(self, exp_name, rec_id=None):
+        recs = []
+        self.exp = R.get_exp(experiment_name=exp_name)
+        for r in self.exp.list_recorders(rtype=self.exp.RT_L):
+            if rec_id is not None and r.id != rec_id:
+                continue
+            recs.append(r)
+        self.recs.extend(recs)
+
+
+class Knowledge:
+    """
+    Use to handle knowledge in finCo such as experiment and outside domain information
+    """
+
+    def __init__(self, storages: Union[List[Storage], Storage], name: str = None):
+        self.name = name if name else random_string()
+        self.workdir = Path.cwd().joinpath("knowledge")
+        self.storages = [storages] if isinstance(storages, Storage) else storages
+        self.knowledge = []
+
+    def get_storage(self, name: str):
+        """
+        return first storage matched given name, else return None
+        """
+        for storage in self.storages:
+            if storage.name == name:
+                return storage
+        return None
+
+    def summarize(self, **kwargs):
+        """
+        summarize storage data to knowledge, default knowledge is storage.documents
+
+        Parameters
+        ----------
+
+        Return
+        ------
+        """
+        knowledge = []
+        for storage in self.storages:
+            knowledge.extend(storage.documents)
+        self.knowledge = knowledge
+
+    @classmethod
+    def load(cls, path: Union[str, Path]):
+        """
+        Load knowledge in memory
+        use pickle as the default file type
+        Parameters
+        ----------
+
+        Return
+        ------
+        """
+        """"""
+        path = path if isinstance(path, Path) else Path(path)
+        with open(path, "rb") as f:
+            return pickle.load(f)
+
+    def brief(self, **kwargs):
+        """
+        Return a brief summary of knowledge
+
+        Parameters
+        ----------
+
+        Return
+        ------
+        """
+        raise NotImplementedError(f"Please implement the `load` method.")
+
+    def save(self, **kwargs):
+        """save knowledge persistently"""
+        # todo: storages save index only
+        Path.mkdir(self.workdir.joinpath(self.name), exist_ok=True)
+        with open(self.workdir.joinpath(self.name).joinpath("knowledge.pkl"), "wb") as f:
+            pickle.dump(self, f)
+
+
+class ExperimentKnowledge(Knowledge):
+    """
+    Handle knowledge from experiments
+    """
+
+    def __init__(self, storages: Union[List[ExperimentStorage], ExperimentStorage]):
+        super().__init__(storages=storages)
+        self.storage = storages
+
+    def brief(self):
+        docs = []
+        for recorder in self.storage.recs:
+            docs.append(
+                {
+                    "exp_name": self.storage.exp.name,
+                    "record_info": recorder.info,
+                    "config": recorder.load_object("config"),
+                    "context_summary": recorder.load_object("context_summary"),
+                }
+            )
+        return docs
+
+
+class PracticeKnowledge(Knowledge):
+    """
+    some template sentence for now
+    """
+
+    def __init__(self, storages: Union[List[YamlStorage], YamlStorage]):
+        super().__init__(storages=storages, name="practice")
+
+        self.summarize()
+
+    def add(self, docs: List, storage_name: str = YamlStorage.DEFAULT_NAME):
+        s = "\n".join(docs)
+        logger.info(f'Add to Practice Knowledge:\n {s}')
+        storage = self.get_storage(storage_name)
+        if storage is None:
+            storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(storage_name))
+            storage.add(documents=docs)
+            self.storages.append(storage)
+        else:
+            storage.add(documents=docs)
+
+        self.summarize()
+        self.save()
+
+
+class FinanceKnowledge(Knowledge):
+    """
+    Knowledge from articles
+    """
+
+    def __init__(self, storages: Union[List[YamlStorage], YamlStorage]):
+        super().__init__(storages=storages, name="finance")
+
+        storage = self.get_storage(YamlStorage.DEFAULT_NAME)
+        if len(storage.documents) == 0:
+            docs = self.read_files_in_directory(self.workdir.joinpath(self.name))
+            self.add(docs)
+        self.summarize()
+
+    def add(self, docs: List, storage_name: str = YamlStorage.DEFAULT_NAME):
+        storage = self.get_storage(storage_name)
+        if storage is None:
+            storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(storage_name))
+            storage.add(documents=docs)
+            self.storages.append(storage)
+        else:
+            storage.add(documents=docs)
+
+        self.summarize()
+        self.save()
+
+    @staticmethod
+    def read_files_in_directory(directory) -> List:
+        """
+        read all .txt files under directory
+        """
+        # todo: split article in trunks
+        file_contents = []
+        for file_path in Path(directory).rglob("*.txt"):
+            if file_path.is_file():
+                file_content = file_path.read_text(encoding="utf-8")
+                file_contents.append(file_content)
+        return file_contents
+
+
+class ExecuteKnowledge(Knowledge):
+    """
+    Config and associate execution result(pass or error message). We can regard the example in prompt as pass execution
+    """
+
+    def __init__(self, storages: Union[List[YamlStorage], YamlStorage]):
+        super().__init__(storages=storages, name="execute")
+        self.summarize()
+
+        storage = self.get_storage(YamlStorage.DEFAULT_NAME)
+        if len(storage.documents) == 0:
+            docs = [{"content": "[Success]: XXXX, the results looks reasonable  # Keywords: supervised learning, data"},
+                    {"content": "[Fail]: XXXX, it raise memory error due to  YYYYY  "
+                                "# Keywords: supervised learning, data"}]
+            self.add(docs)
+        self.summarize()
+
+    def add(self, docs: List, storage_name: str = YamlStorage.DEFAULT_NAME):
+        storage = self.get_storage(storage_name)
+        if storage is None:
+            storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(storage_name))
+            storage.add(documents=docs)
+            self.storages.append(storage)
+        else:
+            storage.add(documents=docs)
+
+        self.summarize()
+        self.save()
+
+
+class InfrastructureKnowledge(Knowledge):
+    """
+    Knowledge from sentences, docstring, and code
+    """
+
+    def __init__(self, storages: Union[List[YamlStorage], YamlStorage]):
+        super().__init__(storages=storages, name="infrastructure")
+
+        storage = self.get_storage(YamlStorage.DEFAULT_NAME)
+        if len(storage.documents) == 0:
+            docs = self.get_functions_and_docstrings(Path(__file__).parent.parent.parent)
+            docs.extend([{"docstring": "All the models can be import from `qlib.contrib.models`  "
+                                       "# Keywords: supervised learning"},
+                         {"docstring": "The API to run rolling models can be found in …   #Keywords: control"},
+                         {"docstring": "Here are a list of Qlib’s available analyzers.    #KEYWORDS: analysis"}])
+            self.add(docs)
+        self.summarize()
+
+    def add(self, docs: List, storage_name: str = YamlStorage.DEFAULT_NAME):
+        storage = self.get_storage(storage_name)
+        if storage is None:
+            storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(storage_name))
+            storage.add(documents=docs)
+            self.storages.append(storage)
+        else:
+            storage.add(documents=docs)
+
+        self.summarize()
+        self.save()
+
+    def get_functions_and_docstrings(self, directory) -> List:
+        """
+        get all method and docstring in .py files under directory
+
+        """
+        functions = []
+        for py_file_path in Path(directory).rglob("*.py"):
+            for _functions in self.get_functions_with_docstrings(py_file_path):
+                functions.append(_functions)
+
+        return functions
+
+    @staticmethod
+    def get_functions_with_docstrings(file_path):
+        """
+        Extract method name and docstring using string matching method
+        """
+        with open(file_path, "r", encoding="utf8") as f:
+            lines = f.readlines()
+
+        functions = []
+        current_func = None
+        docstring = None
+        for line in lines:
+            if line.strip().startswith("def ") or line.strip().startswith("class "):
+                func = line.strip().split(" ")[1].split("(")[0]
+                if func.startswith("__"):
+                    continue
+                if current_func is not None:
+                    docstring = docstring.replace('"""', "") if docstring else docstring
+                    functions.append({"function": current_func, "docstring": docstring})
+                current_func = f"{file_path.name.split('.')[0]}.{func}"
+                docstring = None
+            elif current_func is not None and docstring is None and line.strip().startswith('"""'):
+                docstring = line
+            elif current_func is not None and docstring is not None:
+                docstring += line.strip()
+                if line.strip().endswith('"""'):
+                    docstring = docstring.replace('"""', "") if docstring else docstring
+                    functions.append({"function": current_func, "docstring": docstring})
+                    current_func = None
+                    docstring = None
+
+        return functions
+
+
+class Topic:
+    def __init__(self, name: str, system: Template, user: Template):
+        self.name = name
+        self.system_prompt_template = system
+        self.user_prompt_template = user
+        self.docs = []
+        self.knowledge = None
+        self.logger = FinCoLog()
+
+    def summarize(self, practice_knowlege, user_intention, target, diffrence, target_metrics):
+        system_prompt = self.system_prompt_template.render(topic=self.name)
+        user_prompt = self.user_prompt_template.render(
+            experiment_1_info = practice_knowlege[0],
+            experiment_2_info = practice_knowlege[1],
+            user_intention=user_intention,
+            target=target,
+            diffrence=diffrence,
+            target_metrics=target_metrics)
+        response = APIBackend().build_messages_and_create_chat_completion(user_prompt=user_prompt, system_prompt=system_prompt)
+
+        self.knowledge = response
+        self.docs = practice_knowlege
+        self.logger.info(f"Summary of {self.name}:\n{self.knowledge}")
+
+
+class KnowledgeBase(SingletonBaseClass):
+    """
+    Load knowledge, offer brief information of knowledge and common handle interfaces
+    """
+
+    KT_EXECUTE = "execute"
+    KT_PRACTICE = "practice"
+    KT_FINANCE = "finance"
+    KT_INFRASTRUCTURE = "infrastructure"
+
+    def __init__(self, workdir=None):
+        self.logger = FinCoLog()
+        self.workdir = Path(workdir) if workdir else Path.cwd()
+
+        if not self.workdir.exists():
+            self.logger.warning(f"{self.workdir} not exist, create empty directory.")
+            Path.mkdir(self.workdir)
+
+        self.practice_knowledge = self.load_practice_knowledge(self.workdir)
+        self.execute_knowledge = self.load_execute_knowledge(self.workdir)
+        self.finance_knowledge = self.load_finance_knowledge(self.workdir)
+        self.infrastructure_knowledge = self.load_infrastructure_knowledge(self.workdir)
+
+    def load_experiment_knowledge(self, path) -> List:
+        # similar to practice knowledge, not use for now
+        if isinstance(path, str):
+            path = Path(path)
+
+        knowledge = []
+        path = path if path.name == "mlruns" else path.joinpath("mlruns")
+        # todo: check the influence of set uri
+        R.set_uri(path.as_uri())
+        for exp_name in R.list_experiments():
+            knowledge.append(ExperimentKnowledge(storages=ExperimentStorage(exp_name=exp_name)))
+
+        self.logger.plain_info(f"Load knowledge from: {path} finished.")
+        return knowledge
+
+    def load_practice_knowledge(self, path: Path) -> PracticeKnowledge:
+        self.practice_knowledge = PracticeKnowledge(
+            YamlStorage(path.joinpath(Path.cwd().joinpath("knowledge")/f"{self.KT_PRACTICE}/{YamlStorage.DEFAULT_NAME}")))
+        return self.practice_knowledge
+
+    def load_execute_knowledge(self, path: Path) -> ExecuteKnowledge:
+        self.execute_knowledge = ExecuteKnowledge(
+            YamlStorage(path.joinpath(Path.cwd().joinpath("knowledge")/f"{self.KT_EXECUTE}/{YamlStorage.DEFAULT_NAME}")))
+        return self.execute_knowledge
+
+    def load_finance_knowledge(self, path: Path) -> FinanceKnowledge:
+        self.finance_knowledge = FinanceKnowledge(
+            YamlStorage(path.joinpath(Path.cwd().joinpath("knowledge")/f"{self.KT_FINANCE}/{YamlStorage.DEFAULT_NAME}")))
+        return self.finance_knowledge
+
+    def load_infrastructure_knowledge(self, path: Path) -> InfrastructureKnowledge:
+        self.infrastructure_knowledge = InfrastructureKnowledge(
+            YamlStorage(path.joinpath(Path.cwd().joinpath("knowledge")/f"{self.KT_INFRASTRUCTURE}/{YamlStorage.DEFAULT_NAME}")))
+        return self.infrastructure_knowledge
+
+    def get_knowledge(self, knowledge_type: str = None):
+        if knowledge_type == self.KT_EXECUTE:
+            knowledge = self.execute_knowledge.knowledge
+        elif knowledge_type == self.KT_PRACTICE:
+            knowledge = self.practice_knowledge.knowledge
+        elif knowledge_type == self.KT_FINANCE:
+            knowledge = self.finance_knowledge.knowledge
+        elif knowledge_type == self.KT_INFRASTRUCTURE:
+            knowledge = self.infrastructure_knowledge.knowledge
+        else:
+            knowledge = (
+                    self.execute_knowledge.knowledge
+                    + self.practice_knowledge.knowledge
+                    + self.finance_knowledge.knowledge
+                    + self.infrastructure_knowledge.knowledge
+            )
+        return knowledge
+
+    def query(self, knowledge_type: str = None, content: str = None, n: int = 5):
+        """
+
+        @param knowledge_type: self.KT_EXECUTE, self.KT_PRACTICE or self.KT_FINANCE
+        @param content: content to query KnowledgeBase
+        @param n: top n knowledge to ask ChatGPT
+        @return:
+        """
+        # todo: replace list with persistent storage strategy such as ES/pinecone to enable
+        # literal search/semantic search
+
+        knowledge = self.get_knowledge(knowledge_type=knowledge_type)
+        if len(knowledge) == 0 or knowledge_type == "infrastructure":
+            return ""
+
+        if knowledge_type == "practice":
+            knowledge = [line for line in knowledge if line.startswith("practice_knowledge on")]
+
+        scores = []
+        for k in knowledge:
+            scores.append(similarity(str(k), content))
+        sorted_indexes = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
+        similar_n_indexes = sorted_indexes[:n]
+        similar_n_docs = "\n".join([knowledge[i] for i in similar_n_indexes])
+
+        user_prompt_template = Template(
+"""
+query: '{{query}}' 
+paragraph:
+{{paragraph}}.
+"""
+        )
+        user_prompt = user_prompt_template.render(query=content, paragraph=similar_n_docs)
+        system_prompt = """
+You are an assistant who find relevant sentences from a long paragraph to fit user's query sentence. Relevant means the sentence might provide userful information to explain user's query sentence. People after reading the relevant sentences might have a better understanding of the query sentence.
+
+Please response no less than ten sentences, if paragraph is not enough, you can return less than ten. Don't pop out irrelevant sentences. Please list the sentences in a number index instead of a whole paragraph.
+
+Example input:
+query: what is the best model for image classification?
+paragraph:
+Image classification is the process of identifying and categorizing objects within an image into different groups or classes.
+Machine learning is a type of artificial intelligence that enables computers to learn and make decisions without being explicitly programmed.
+The solar system is a collection of celestial bodies, including the Sun, planets, moons, and other objects, that orbit around the Sun due to its gravitational pull.
+A car is a wheeled vehicle, typically powered by an engine or electric motor, used for transportation of people and goods.
+ResNet, short for Residual Network, is a type of deep learning architecture designed to improve the accuracy and training speed of neural networks for image recognition tasks.
+
+Example output:
+1. ResNet, short for Residual Network, is a type of deep learning architecture designed to improve the accuracy and training speed of neural networks for image recognition tasks.
+2. Image classification is the process of identifying and categorizing objects within an image into different groups or classes.
+3. Machine learning is a type of artificial intelligence that enables computers to learn and make decisions without being explicitly programmed.
+"""
+        response = APIBackend().build_messages_and_create_chat_completion(
+            user_prompt=user_prompt, system_prompt=system_prompt
+        )
+
+        return response
+
+
+# perhaps init KnowledgeBase in other place
+KnowledgeBase(workdir=Path.cwd().joinpath('knowledge'))
--- a/qlib/finco/knowledge/finance/gpt-generated.txt
+++ b/qlib/finco/knowledge/finance/gpt-generated.txt
@@ -0,0 +1,47 @@
+Quantitative investment research, often referred to as "quant," is an investment approach that uses mathematical and statistical models to analyze financial data and identify investment opportunities. This method relies heavily on computer algorithms and advanced data analysis techniques to develop trading strategies and make investment decisions.
+
+One of the key aspects of quant investment research is the development of predictive models to forecast asset prices, market movements, and other financial variables. These models are typically built using historical data and refined through rigorous testing and validation processes.
+
+In quant investment research, various metrics are used to evaluate the performance of a model or strategy. Some common metrics include annual return, information coefficient, maximum drawdown, and cumulative sum (cumsum) return.
+
+Annual return is a measure of an investment's performance over the course of a year and is expressed as a percentage. It is an important metric to consider but can be controversial as higher annual returns are often associated with higher risks.
+
+Maximum drawdown is the largest peak-to-trough decline in an investment's value over a specified period. It is a measure of the strategy's risk and can be controversial since increasing annual return often leads to a more dynamic strategy with larger drawdowns.
+
+Information coefficient (IC) is a measure of the relationship between predicted returns and actual returns. A higher IC indicates a stronger relationship and suggests a more effective predictive model.
+
+Cumulative sum return is the total return generated by an investment over a given period. It is useful for evaluating the overall performance of a strategy and is particularly relevant when comparing multiple strategies over the same time frame.
+
+Another important aspect of quant investment research is portfolio optimization, which involves determining the optimal allocation of assets to maximize returns while minimizing risk.
+
+Quantitative researchers often use techniques such as factor analysis to identify underlying drivers of asset returns. This helps them to build more robust models and better understand the relationships between various financial variables.
+
+Machine learning has become increasingly popular in quant investment research, as it offers new ways to identify patterns and relationships in large datasets. Techniques such as neural networks, decision trees, and clustering algorithms are commonly used in this field.
+
+Backtesting is a critical step in the development of a quantitative investment strategy. It involves applying a model or algorithm to historical data to see how it would have performed under various market conditions.
+
+Risk management is a crucial component of quant investment research. Quantitative researchers must carefully consider the potential risks associated with their models and strategies and take steps to mitigate these risks.
+
+In recent years, there has been a growing interest in alternative data sources for quant investment research. These can include social media sentiment, satellite imagery, and other non-traditional data sources that may provide unique insights into market trends and investment opportunities.
+
+Despite its reliance on data and mathematical models, quant investment research is not without its challenges and limitations. Some critics argue that it is inherently backward-looking, as it relies on historical data to make predictions about future market movements.
+
+Furthermore, quant models may struggle to adapt to rapidly changing market conditions and may be susceptible to overfitting, which occurs when a model is tailored too closely to the training data and performs poorly on new, unseen data.
+
+In conclusion, quantitative investment research is a growing field that combines advanced mathematical and statistical techniques with financial expertise to develop sophisticated investment strategies and models. While it offers significant potential for generating returns and managing risk, it also comes with its own set of challenges and limitations that must be carefully considered.
+
+A rolling meta-controller is a valuable tool in quantitative investment research, providing several benefits such as increased adaptability, enhanced risk management, and improved performance. By continuously updating the parameters of underlying trading models based on recent data, this approach helps strategies stay relevant in dynamic market conditions and reduces the risk of overfitting. Additionally, a rolling meta-controller can manage a diverse portfolio of trading models, leading to a more robust and diversified investment strategy.
+
+However, incorporating a rolling meta-controller also comes with potential challenges, such as increased complexity, higher computational requirements, and the risk of parameter instability. It is essential for quantitative researchers to carefully weigh these factors when deciding whether to include a rolling meta-controller in their trading strategies. By balancing the potential advantages and disadvantages, researchers can optimize their strategies to achieve their specific investment objectives and risk tolerances.
+
+Meta controller like rolling or DDGDA is proved to be relevant to the maximum drawdown but effect is still unknown. Because seeing the latest data is always good to avoid some new patterns recently. A static model is not good at doing it.
+
+If rolling has been proved in practice knowledge that it's useful in minimizing max drawdown or maximize dropdown, always try DDGDA in the following experiments to further minimize the max drawdown.
+
+DDG-DA, which stands for Data Distribution Generation for Predictable Concept Drift Adaptation, is a research paper focused on addressing the challenge of concept drift in machine learning and data mining. Concept drift occurs when the statistical properties of a target variable change over time, causing the model's performance to degrade. This is a common issue in various domains, including finance, where market conditions and economic factors can change rapidly.
+
+The DDG-DA paper proposes a framework for generating synthetic datasets that simulate concept drift in a controlled and predictable manner. By creating these datasets, researchers can better understand how concept drift affects the performance of their machine learning models and develop strategies for adapting to these changes.
+
+The main idea behind DDG-DA is to create synthetic data distributions that mimic the underlying data generating process while controlling the extent of concept drift. This is achieved by using a combination of data transformation techniques, such as scaling, rotation, and translation of the original data distribution.
+
+By generating synthetic datasets with controlled concept drift, researchers can evaluate and compare the performance of various adaptation techniques in a more systematic and controlled manner. This can lead to the development of more robust and adaptive machine learning models that can better handle changing data distributions, ultimately improving the performance of these models in real-world applications, such as finance and investment.
--- a/qlib/finco/llm.py
+++ b/qlib/finco/llm.py
@@ -0,0 +1,139 @@
+import re
+import os
+import time
+import openai
+import json
+import yaml
+from typing import Optional, Tuple, Union
+from qlib.finco.conf import Config
+from qlib.finco.utils import SingletonBaseClass
+from qlib.finco.log import FinCoLog
+from qlib.config import DEFAULT_QLIB_DOT_PATH
+from pathlib import Path
+
+
+class ConvManager:
+    """
+    This is a conversation manager of LLM
+    It is for convenience of exporting conversation for debugging.
+    """
+
+    def __init__(self, path: Union[Path, str] = DEFAULT_QLIB_DOT_PATH / "llm_conv", recent_n: int = 10) -> None:
+        self.path = Path(path)
+        self.path.mkdir(parents=True, exist_ok=True)
+        self.recent_n = recent_n
+
+    def _rotate_files(self):
+        pairs = []
+        for f in self.path.glob("*.json"):
+            m = re.match(r"(\d+).json", f.name)
+            if m is not None:
+                n = int(m.group(1))
+                pairs.append((n, f))
+            pass
+        pairs.sort(key=lambda x: x[0])
+        for n, f in pairs[: self.recent_n][::-1]:
+            f.rename(self.path / f"{n+1}.json")
+
+    def append(self, conv: Tuple[list, str]):
+        self._rotate_files()
+        json.dump(conv, open(self.path / "0.json", "w"))
+        # TODO: reseve line breaks to make it more convient to edit file directly.
+
+
+class APIBackend(SingletonBaseClass):
+    def __init__(self):
+        self.cfg = Config()
+        openai.api_key = self.cfg.openai_api_key
+        if self.cfg.use_azure:
+            openai.api_type = "azure"
+            openai.api_base = self.cfg.azure_api_base
+            openai.api_version = self.cfg.azure_api_version
+        self.use_azure = self.cfg.use_azure
+
+        self.debug_mode = False
+        if self.cfg.debug_mode:
+            self.debug_mode = True
+            cwd = os.getcwd()
+            self.cache_file_location = os.path.join(cwd, "prompt_cache.json")
+            self.cache = (
+                json.load(open(self.cache_file_location, "r")) if os.path.exists(self.cache_file_location) else {}
+            )
+
+    def build_messages_and_create_chat_completion(self, user_prompt, system_prompt=None, former_messages=[], **kwargs):
+        """build the messages to avoid implementing several redundant lines of code"""
+        cfg = Config()
+        # TODO: system prompt should always be provided. In development stage we can use default value
+        if system_prompt is None:
+            try:
+                system_prompt = cfg.system_prompt
+            except AttributeError:
+                FinCoLog().warning("system_prompt is not set, using default value.")
+                system_prompt = "You are an AI assistant who helps to answer user's questions about finance."
+        messages = [
+            {
+                "role": "system",
+                "content": system_prompt,
+            }
+        ]
+        messages.extend(former_messages[-1 * cfg.max_past_message_include :])
+        messages.append(
+            {
+                "role": "user",
+                "content": user_prompt,
+            }
+        )
+        fcl = FinCoLog()
+        response = self.try_create_chat_completion(messages=messages, **kwargs)
+        fcl.log_message(messages)
+        fcl.log_response(response)
+        if self.debug_mode:
+            ConvManager().append((messages, response))
+        return response
+
+    def try_create_chat_completion(self, max_retry=10, **kwargs):
+        max_retry = self.cfg.max_retry if self.cfg.max_retry is not None else max_retry
+        for i in range(max_retry):
+            try:
+                response = self.create_chat_completion(**kwargs)
+                return response
+            except (openai.error.RateLimitError, openai.error.Timeout, openai.error.APIError) as e:
+                print(e)
+                print(f"Retrying {i+1}th time...")
+                time.sleep(1)
+                continue
+        raise Exception(f"Failed to create chat completion after {max_retry} retries.")
+
+    def create_chat_completion(
+        self,
+        messages,
+        model=None,
+        temperature: float = None,
+        max_tokens: Optional[int] = None,
+    ) -> str:
+        if self.debug_mode:
+            key = json.dumps(messages)
+            if key in self.cache:
+                return self.cache[key]
+
+        if temperature is None:
+            temperature = self.cfg.temperature
+        if max_tokens is None:
+            max_tokens = self.cfg.max_tokens
+
+        if self.cfg.use_azure:
+            response = openai.ChatCompletion.create(
+                engine=self.cfg.model,
+                messages=messages,
+                max_tokens=self.cfg.max_tokens,
+            )
+        else:
+            response = openai.ChatCompletion.create(
+                model=self.cfg.model,
+                messages=messages,
+            )
+        resp = response.choices[0].message["content"]
+        if self.debug_mode:
+            self.cache[key] = resp
+            json.dump(self.cache, open(self.cache_file_location, "w"))
+        return resp
--- a/qlib/finco/log.py
+++ b/qlib/finco/log.py
@@ -0,0 +1,139 @@
+"""
+This module will base on Qlib's logger module and provides some interactive functions.
+"""
+import logging
+import time
+
+from typing import Dict, List
+from qlib.finco.utils import SingletonBaseClass
+from contextlib import contextmanager
+
+
+class LogColors:
+    """
+    ANSI color codes for use in console output.
+    """
+
+    RED = "\033[91m"
+    GREEN = "\033[92m"
+    YELLOW = "\033[93m"
+    BLUE = "\033[94m"
+    MAGENTA = "\033[95m"
+    CYAN = "\033[96m"
+    WHITE = "\033[97m"
+    GRAY = "\033[90m"
+    BLACK = "\033[30m"
+
+    BOLD = "\033[1m"
+    ITALIC = "\033[3m"
+
+    END = "\033[0m"
+
+    @classmethod
+    def get_all_colors(cls):
+        names = dir(cls)
+        names = [name for name in names if not name.startswith("__") and not callable(getattr(cls, name))]
+        var_values = [getattr(cls, name) for name in names]
+        return var_values
+
+    def render(self, text: str, color: str = "", style: str = ""):
+        """
+        render text by input color and style. It's not recommend that input text is already rendered.
+        """
+        # This method is called too frequently, which is not good.
+        colors = self.get_all_colors()
+        # Perhaps color and font should be distinguished here.
+        if color:
+            assert color in colors, f"color should be in: {colors} but now is: {color}"
+        if style:
+            assert style in colors, f"style should be in: {colors} but now is: {style}"
+
+        text = f"{color}{text}{self.END}"
+        text = f"{style}{text}{self.END}"
+
+        return text
+
+
+@contextmanager
+def formatting_log(logger, title="Info"):
+    """
+    a context manager, print liens before and after a function
+    """
+    length = {"Start": 90, "Round": 90, "Task": 90, "Info": 60, "Interact": 60, "End": 90}.get(title, 60)
+    color, bold = (
+        (LogColors.YELLOW, LogColors.BOLD)
+        if title in ["Start", "Round", "Task", "Info", "Interact", "End"]
+        else (LogColors.CYAN, "")
+    )
+    logger.info("")
+    logger.info(f"{color}{bold}{'-'} {title} {'-' * (length - len(title))}{LogColors.END}")
+
+    yield
+    if color == LogColors.YELLOW:
+        time.sleep(2)
+    logger.info("")
+
+
+class FinCoLog(SingletonBaseClass):
+    # TODO:
+    # - config to file logger and save it into workspace
+    def __init__(self) -> None:
+        self.logger = logging.Logger("interactive")
+        # TODO:  merge these with Qlib's default logger.
+        #  We can do the same thing by changing the default log dict of Qlib.
+        #  Reference: https://github.com/microsoft/qlib/blob/main/qlib/config.py#L155
+
+        handler = logging.StreamHandler()
+        handler.setFormatter(logging.Formatter("%(message)s"))
+        self.logger.addHandler(handler)
+        self.logger.setLevel(logging.INFO)
+
+    def log_message(self, messages: List[Dict[str, str]]):
+        """
+        messages is some info like this  [
+            {
+                "role": "system",
+                "content": system_prompt,
+            },
+            {
+                "role": "user",
+                "content": user_prompt,
+            },
+        ]
+        """
+        with formatting_log(self.logger, "GPT Messages"):
+            for m in messages:
+                self.logger.info(
+                    f"{LogColors.MAGENTA}{LogColors.BOLD}Role:{LogColors.END} "
+                    f"{LogColors.CYAN}{m['role']}{LogColors.END}\n"
+                    + f"{LogColors.MAGENTA}{LogColors.BOLD}Content:{LogColors.END} "
+                    f"{LogColors.CYAN}{m['content']}{LogColors.END}\n"
+                )
+
+    def log_response(self, response: str):
+        with formatting_log(self.logger, "GPT Response"):
+            self.logger.info(f"{LogColors.CYAN}{response}{LogColors.END}\n")
+            time.sleep(1)
+
+    # TODO:
+    # It looks wierd if we only have logger
+    def info(self, *args, plain=False, title="Info"):
+        if plain:
+            return self.plain_info(*args)
+        with formatting_log(self.logger, title):
+            for arg in args:
+                self.logger.info(f"{LogColors.WHITE}{arg}{LogColors.END}")
+
+    def plain_info(self, *args):
+        for arg in args:
+            self.logger.info(
+                f"{LogColors.YELLOW}{LogColors.BOLD}Info:{LogColors.END}{LogColors.WHITE}{arg}{LogColors.END}"
+            )
+
+    def warning(self, *args):
+        for arg in args:
+            self.logger.warning(f"{LogColors.BLUE}{LogColors.BOLD}Warning:{LogColors.END}{arg}")
+
+    def error(self, *args):
+        for arg in args:
+            self.logger.error(f"{LogColors.RED}{LogColors.BOLD}Error:{LogColors.END}{arg}")
--- a/qlib/finco/prompt_template.py
+++ b/qlib/finco/prompt_template.py
@@ -0,0 +1,33 @@
+from typing import Union
+from pathlib import Path
+from jinja2 import Template
+import yaml
+
+from qlib.finco.utils import SingletonBaseClass
+from qlib.finco import get_finco_path
+
+
+class PromptTemplate(SingletonBaseClass):
+    def __init__(self) -> None:
+        super().__init__()
+        _template = yaml.load(
+            open(Path.joinpath(get_finco_path(), "prompt_template.yaml"), "r"), Loader=yaml.FullLoader
+        )
+        for k, v in _template.items():
+            if k == "mods":
+                continue
+            self.__setattr__(k, Template(v))
+
+    def get(self, key: str):
+        return self.__dict__.get(key, Template(""))
+
+    def update(self, key: str, value):
+        self.__setattr__(key, value)
+
+    def save(self, file_path: Union[str, Path]):
+        if isinstance(file_path, str):
+            file_path = Path(file_path)
+        Path.mkdir(file_path.parent, exist_ok=True)
+
+        with open(file_path, "w") as f:
+            yaml.dump(self.__dict__, f)
--- a/qlib/finco/prompt_template.yaml
+++ b/qlib/finco/prompt_template.yaml
--- a/qlib/finco/task.py
+++ b/qlib/finco/task.py
--- a/qlib/finco/tpl/README.md
+++ b/qlib/finco/tpl/README.md
@@ -0,0 +1,12 @@
+This is a set of templates that should be copied for a new project.
+
+Here are the explanations for the templates folder.
+
+| folder | explanations                                                     |
+|--------|------------------------------------------------------------------|
+| sl     | Default configuration for supervised learning                    |
+| sl-cfg | Like configuration in sl. But the dataset is highly configurable |
+
+
+# TODO
+- [ ] [Copier](https://copier.readthedocs.io/en/stable/#quick-start) may be useful if the generation process becomes complicated
--- a/qlib/finco/tpl/init.py
+++ b/qlib/finco/tpl/init.py
@@ -0,0 +1,13 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+from pathlib import Path
+
+DIRNAME = Path(__file__).absolute().resolve().parent
+
+
+def get_tpl_path() -> Path:
+    """
+    return the template path
+    Because the template path is located in the folder. We don't know where it is located. So __file__ for this module will be used.
+    """
+    return DIRNAME
--- a/qlib/finco/tpl/sl-cfg/workflow_config.yaml
+++ b/qlib/finco/tpl/sl-cfg/workflow_config.yaml
--- a/qlib/finco/tpl/sl/workflow_config.yaml
+++ b/qlib/finco/tpl/sl/workflow_config.yaml
@@ -0,0 +1,79 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+experiment_name: finCo
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    label: ["Ref($close, -21) / Ref($close, -1) - 1"]
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy
+        kwargs:
+            signal: <PRED>
+            topk: 50
+            n_drop: 5
+    backtest:
+        start_time: 2017-01-01
+        end_time: 2020-08-01
+        account: 100000000
+        benchmark: *benchmark
+        exchange_kwargs:
+            limit_threshold: 0.095
+            deal_price: close
+            open_cost: 0.0005
+            close_cost: 0.0015
+            min_cost: 5
+task:
+    model:
+        class: LinearModel
+        module_path: qlib.contrib.model.linear
+        kwargs:
+            estimator: ridge
+            alpha: 0.05
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            model: <MODEL>
+            dataset: <DATASET>
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: True
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/qlib/finco/utils.py
+++ b/qlib/finco/utils.py
@@ -0,0 +1,71 @@
+import json
+import string
+import random
+
+from typing import List
+from pathlib import Path
+from fuzzywuzzy import fuzz
+
+
+class SingletonMeta(type):
+    _instance = None
+
+    def __call__(cls, *args, **kwargs):
+        if cls._instance is None:
+            cls._instance = super(SingletonMeta, cls).__call__(*args, **kwargs)
+        return cls._instance
+
+
+class SingletonBaseClass(metaclass=SingletonMeta):
+    """
+    Because we try to support defining Singleton with `class A(SingletonBaseClass)` instead of `A(metaclass=SingletonMeta)`
+    This class becomes necessary
+
+    """
+
+    # TODO: Add move this class to Qlib's general utils.
+
+
+def parse_json(response):
+    try:
+        return json.loads(response)
+    except json.decoder.JSONDecodeError:
+        pass
+
+    raise Exception(f"Failed to parse response: {response}, please report it or help us to fix it.")
+
+
+def similarity(text1, text2):
+    text1 = text1 if isinstance(text1, str) else ""
+    text2 = text2 if isinstance(text2, str) else ""
+
+    # Maybe we can use other similarity algorithm such as tfidf
+    return fuzz.ratio(text1, text2)
+
+
+def random_string(length=10):
+    letters = string.ascii_letters + string.digits
+    return "".join(random.choice(letters) for i in range(length))
+
+
+def directory_tree(root_dif, max_depth=None):
+
+    def _directory_tree(root_dir, padding="", deep=1, max_d=None) -> List:
+        _output = []
+        if max_d and deep > max_d:
+            return _output
+
+        files = sorted(root_dir.iterdir())
+        for i, file in enumerate(files):
+            if i == len(files) - 1:
+                _output.append(padding + '└── ' + file.name)
+                if file.is_dir():
+                    _output.extend(_directory_tree(file, padding + "    ", deep=deep + 1, max_d=max_d))
+            else:
+                _output.append(padding + '├── ' + file.name)
+                if file.is_dir():
+                    _output.extend(_directory_tree(file, padding + "│   ", deep=deep + 1, max_d=max_d))
+        return _output
+
+    output = _directory_tree(root_dif, max_d=max_depth)
+    return '\n'.join(output)
--- a/qlib/finco/workflow.py
+++ b/qlib/finco/workflow.py
@@ -0,0 +1,212 @@
+import sys
+import time
+import shutil
+from typing import List
+
+from pathlib import Path
+
+from qlib.finco.task import IdeaTask, SummarizeTask
+from qlib.finco.prompt_template import PromptTemplate, Template
+from qlib.finco.log import FinCoLog, LogColors
+from qlib.finco.llm import APIBackend
+from qlib.finco.conf import Config
+from qlib.finco.knowledge import KnowledgeBase, Topic
+from qlib.finco.context import WorkflowContextManager
+
+
+# TODO: it is not necessary in current phase
+# class TaskDAG:
+#     """
+#     This is a Task manager. it maintains a graph and a stack stucture to manager the task
+#     The reason why the DGA relationship is maintained outside instead of inside the task is that
+#     - To make the creating of task simpler(user don't have to care about the relation-ship)
+#     - To manage the relation ship when poping and executing the tasks is relatively easier instead of scattering them everywhere
+#     """
+#     def __init__(self) -> None:
+#         self._finished = []
+#         self._stack = []
+#         self._dag = defaultdict(list)  # from id(object) -> list of id(object)
+#
+#     def pop(self):
+#         return  self._stack.pop(0)
+#
+#     def push(self, task: Union[Task, List[Task]], parent: Optional[Task] = None):
+#         if isinstance(task, Task):
+#             task = [task]
+#         if parent is not None:
+#             self._dag
+#
+#     def done(self) -> bool:
+#         return len(self._stack) == 0
+
+
+class WorkflowManager:
+    """This manage the whole task automation workflow including tasks and actions"""
+
+    def __init__(self, workspace=None) -> None:
+        self.logger = FinCoLog()
+
+        if workspace is None:
+            self._workspace = Path.cwd() / "finco_workspace"
+        else:
+            self._workspace = Path(workspace)
+        self.conf = Config()
+        self._confirm_and_rm()
+
+        self.prompt_template = PromptTemplate()
+        self.context = WorkflowContextManager(workspace=self._workspace)
+        self.context.set_context("workspace", self._workspace)
+        self.default_user_prompt = "build an A-share stock market daily portfolio in quantitative investment and minimize the maximum drawdown while maintaining return."
+
+    def _confirm_and_rm(self):
+        # if workspace exists, please confirm and remove it. Otherwise exit.
+        if self._workspace.exists() and not self.conf.continuous_mode:
+            self.logger.info(title="Interact")
+            flag = input(
+                LogColors().render(
+                    f"Will be deleted: \n\t{self._workspace}\n"
+                    f"If you do not need to delete {self._workspace},"
+                    f" please change the workspace dir or rename existing files\n"
+                    f"Are you sure you want to delete, yes(Y/y), no (N/n):",
+                    color=LogColors.WHITE,
+                )
+            )
+            if str(flag) not in ["Y", "y"]:
+                sys.exit()
+            else:
+                # remove self._workspace
+                shutil.rmtree(self._workspace)
+        elif self._workspace.exists() and self.conf.continuous_mode:
+            shutil.rmtree(self._workspace)
+
+    def set_context(self, key, value):
+        """Direct call set_context method of the context manager"""
+        self.context.set_context(key, value)
+
+    def get_context(self) -> WorkflowContextManager:
+        return self.context
+
+    def run(self, prompt: str) -> Path:
+        """
+        The workflow manager is supposed to generate a codebase based on the prompt
+
+        Parameters
+        ----------
+        prompt: str
+            the prompt user gives
+
+        Returns
+        -------
+        Path
+            The workflow manager is expected to produce output that includes a codebase containing generated code, results, and reports in a designated location.
+            The path is returned
+
+            The output path should follow a specific format:
+            - TODO: design
+              There is a summarized report where user can start from.
+        """
+
+        # NOTE: The following items are not designed to make the workflow very flexible.
+        # - The generated tasks can't be changed after geting new information from the execution retuls.
+        #   - But it is required in some cases, if we want to build a external dataset, it maybe have to plan like autogpt...
+
+        # NOTE: default user prompt might be changed in the future and exposed to the user
+        if prompt is None:
+            self.set_context("user_intention", self.default_user_prompt)
+        else:
+            self.set_context("user_intention", prompt)
+        self.logger.info(f"user_intention: {self.get_context().get_context('user_intention')}", title="Start")
+
+        # NOTE: list may not be enough for general task list
+        task_list = [IdeaTask(), SummarizeTask()]
+        task_finished = []
+        while len(task_list):
+            task_list_info = [str(task) for task in task_list]
+
+            # task list is not long, so sort it is not a big problem
+            # TODO: sort the task list based on the priority of the task
+            # task_list = sorted(task_list, key=lambda x: x.task_type)
+            t = task_list.pop(0)
+            self.logger.info(
+                f"Task finished: {[str(task) for task in task_finished]}",
+                f"Task in queue: {task_list_info}",
+                f"Executing task: {str(t)}",
+                title="Task",
+            )
+
+            t.assign_context_manager(self.context)
+            res = t.execute()
+            t.summarize()
+            task_finished.append(t)
+            self.context.set_context("task_finished", task_finished)
+            self.logger.plain_info(f"{str(t)} finished.\n\n\n")
+
+            task_list = res + task_list
+
+        return self._workspace
+
+
+class LearnManager:
+    __DEFAULT_TOPICS = ["RollingModel"]
+
+    def __init__(self):
+        self.epoch = 0
+        self.wm = WorkflowManager()
+
+        self.topics = [
+            Topic(name=topic, system=self.wm.prompt_template.get(f"Topic_system"), user=self.wm.prompt_template.get(f"Topic_user")) for topic in self.__DEFAULT_TOPICS
+        ]
+        self.knowledge_base = KnowledgeBase()
+
+    def run(self, prompt):
+        # todo: add early stop condition
+        for i in range(10):
+            self.wm.logger.info(f"Round: {self.epoch+1}", title="Round")
+            self.wm.run(prompt)
+            self.learn()
+            self.epoch += 1
+
+    def learn(self):
+        workspace = self.wm.context.get_context("workspace")
+
+        def _drop_duplicate_task(_task: List):
+            unique_task = {}
+            for obj in _task:
+                task_name = obj.__class__.__name__
+                if task_name not in unique_task:
+                    unique_task[task_name] = obj
+            return list(unique_task.values())
+
+        # one task maybe run several times in workflow
+        task_finished = _drop_duplicate_task(self.wm.context.get_context("task_finished"))
+
+        user_intention = self.wm.context.get_context("user_intention")
+        summary = self.wm.context.get_context("summary")
+
+        
+        target = self.wm.context.get_context(f"target")
+        diffrence = self.wm.context.get_context(f"experiments_difference")
+        target_metrics = self.wm.context.get_context(f"high_level_metrics")
+
+        [topic.summarize(self.knowledge_base.practice_knowledge.knowledge[-2:], user_intention, target, diffrence, target_metrics) for topic in self.topics]
+        [self.knowledge_base.practice_knowledge.add([f"practice_knowledge on {topic.name}:\,{topic.knowledge}"]) for topic in self.topics]
+        # knowledge_of_topics = [{topic.name: topic.knowledge} for topic in self.topics]
+
+        # for task in task_finished:
+        #     prompt_workflow_selection = self.wm.prompt_template.get(f"{self.__class__.__name__}_user").render(
+        #         summary=summary,
+        #         brief=knowledge_of_topics,
+        #         task_finished=[str(t) for t in task_finished],
+        #         task=task.__class__.__name__, system=task.system.render(), user_intention=user_intention
+        #     )
+
+        #     response = APIBackend().build_messages_and_create_chat_completion(
+        #         user_prompt=prompt_workflow_selection,
+        #         system_prompt=self.wm.prompt_template.get(f"{self.__class__.__name__}_system").render(),
+        #     )
+
+        #     # todo: response assertion
+        #     task.prompt_template.update(key=f"{task.__class__.__name__}_system", value=Template(response))
+
+        self.wm.prompt_template.save(Path.joinpath(workspace, f"prompts/checkpoint_{self.epoch}.yml"))
+        self.wm.context.clear(reserve=["workspace"])
--- a/qlib/utils/mod.py
+++ b/qlib/utils/mod.py
@@ -206,6 +206,9 @@ def find_all_classes(module_path: Union[str, ModuleType], cls: type) -> List[typ
        >>> from qlib.data.dataset.handler import DataHandler
        >>> find_all_classes("qlib.contrib.data.handler", DataHandler)
        [<class 'qlib.contrib.data.handler.Alpha158'>, <class 'qlib.contrib.data.handler.Alpha158vwap'>, <class 'qlib.contrib.data.handler.Alpha360'>, <class 'qlib.contrib.data.handler.Alpha360vwap'>, <class 'qlib.data.dataset.handler.DataHandlerLP'>]
+        >>> from qlib.contrib.rolling.base import Rolling
+        >>> find_all_classes("qlib.contrib.rolling", Rolling)
+        [<class 'qlib.contrib.rolling.base.Rolling'>, <class 'qlib.contrib.rolling.ddgda.DDGDA'>]

    TODO:
    - skip import error
@@ -220,7 +223,7 @@ def find_all_classes(module_path: Union[str, ModuleType], cls: type) -> List[typ

    def _append_cls(obj):
        # Leverage the closure trick to reuse code
-        if isinstance(obj, type) and issubclass(obj, cls) and cls not in cls_list:
+        if isinstance(obj, type) and issubclass(obj, cls) and obj not in cls_list:
            cls_list.append(obj)

    for attr in dir(mod):
--- a/qlib/workflow/record_temp.py
+++ b/qlib/workflow/record_temp.py
@@ -18,7 +18,7 @@ from ..utils import fill_placeholder, flatten_dict, class_casting, get_date_by_s
 from ..utils.time import Freq
 from ..utils.data import deepcopy_basic_type
 from ..contrib.eva.alpha import calc_ic, calc_long_short_return, calc_long_short_prec
-
+from qlib.contrib.analyzer import HFAnalyzer, SignalAnalyzer

 logger = get_module_logger("workflow", logging.INFO)

@@ -155,6 +155,9 @@ class RecordTemp:
                with class_casting(self, self.depend_cls):
                    self.check(include_self=True)

+    def analyse(self):
+        raise NotImplementedError(f"Please implement the `analysis` method.")
+

 class SignalRecord(RecordTemp):
    """
--- a/scripts/finco/README.md
+++ b/scripts/finco/README.md
@@ -0,0 +1,15 @@
+
+
+# Requirements
+
+
+Use following install command to complete the project.
+```
+pip install -e '.[finco]'
+```
+
+
+# TODOs
+
+- [ ] Select the appropriate LLM API
+  - Which API is more suitable for meeting our requirements - the original API or an alternative like LangChain?
--- a/scripts/finco/cmd.sh
+++ b/scripts/finco/cmd.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+set -x # show command
+set -e # Error on exception
+
+DIR="$(
+	cd "$(dirname "$(readlink -f "$0")")" || exit
+	pwd -P
+)"
+# --load the cridentials
+if [ -e $DIR/cridential.sh ]; then
+	source $DIR/cridential.sh
+fi
+
+# run the command
+python -m qlib.finco.cli "build an A-share stock market daily portfolio in quantitative investment and minimize the maximum drawdown."
--- a/scripts/finco/cridential.sh.example
+++ b/scripts/finco/cridential.sh.example
@@ -0,0 +1,3 @@
+export OPENAI_API_TYPE=azure  # This only necessary for Azure OpenAI
+export OPENAI_API_KEY=
+export OPENAI_API_BASE=
--- a/setup.py
+++ b/setup.py
@@ -174,6 +174,15 @@ setup(
            "tianshou<=0.4.10",
            "torch",
        ],
+        "finco": [
+            # finco is not necessary for all Qlib users; So a single require section is used for it.
+            "openai",
+            "pydantic",  # Please add it to basic requirements after the design of pydantic is state.
+            "pydantic-settings",
+            "python-dotenv",  # I don't think this is necessary if we use pydantic.
+            "fuzzywuzzy",
+            "python-Levenshtein",  # not necessary but accelerate fuzzywuzzy calculation
+        ],
    },
    include_package_data=True,
    classifiers=[
--- a/tests/finco/test_cfg.py
+++ b/tests/finco/test_cfg.py
@@ -0,0 +1,71 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+import unittest
+import shutil
+import difflib
+from qlib.finco.tpl import get_tpl_path
+import ruamel.yaml as yaml
+
+from qlib.data.dataset.handler import DataHandlerLP
+from qlib.utils import init_instance_by_config
+from qlib.tests import TestAutoData
+
+from pathlib import Path
+from qlib.finco.tpl import get_tpl_path
+from qlib.finco.task import YamlEditTask
+
+DIRNAME = Path(__file__).absolute().resolve().parent
+
+
+class FincoTpl(TestAutoData):
+    def test_tpl_consistence(self):
+        """Motivation: make sure the configuable template is consistent with the default config"""
+        tpl_p = get_tpl_path()
+        with (tpl_p / "sl" / "workflow_config.yaml").open("rb") as fp:
+            config = yaml.safe_load(fp)
+        # init_data_handler
+        hd: DataHandlerLP = init_instance_by_config(config["task"]["dataset"]["kwargs"]["handler"])
+        # NOTE: The config in workflow_config.yaml is generated by the following code:
+        # dump in yaml format to file without auto linebreak
+        # print(yaml.dump(hd.data_loader.fields, width=10000, stream=open("_tmp", "w")))
+
+        with (tpl_p / "sl-cfg" / "workflow_config.yaml").open("rb") as fp:
+            config = yaml.safe_load(fp)
+        hd_ds: DataHandlerLP = init_instance_by_config(config["task"]["dataset"]["kwargs"]["handler"])
+        self.assertEqual(hd_ds.data_loader.fields, hd.data_loader.fields)
+
+        check = hd_ds.fetch().fillna(0.0) == hd.fetch().fillna(0.0)
+        self.assertTrue(check.all().all())
+
+    def test_update_yaml(self):
+        p = get_tpl_path() / "sl" / "workflow_config.yaml"
+        p_new = DIRNAME / "_test_config.yaml"
+        shutil.copy(p, p_new)
+        updated_content = """
+class: LGBModelTest
+module_path: qlib.contrib.model.gbdt
+kwargs:
+    loss: mse
+    colsample_bytree: 1.8879
+    learning_rate: 0.3
+    subsample: 0.8790
+    lambda_l1: 205.7000
+    lambda_l2: 580.9769
+    max_depth: 9
+    num_leaves: 211
+    num_threads: 21
+"""
+        t = YamlEditTask(p_new, "task.model", updated_content)
+        t.execute()
+        # NOTE: the formmat is changed by ruamel.yaml, so it can't be compared by text directly..
+        # print the diff between p and p_new with difflib
+        # with p.open("r") as fp:
+        #     content = fp.read()
+        # with p_new.open("r") as fp:
+        #     content_new = fp.read()
+        # for line in difflib.unified_diff(content, content_new, fromfile="original", tofile="new", lineterm=""):
+        #     print(line)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/finco/test_sumarize.py
+++ b/tests/finco/test_sumarize.py
@@ -0,0 +1,66 @@
+import unittest
+import os
+import shutil
+
+from dotenv import load_dotenv
+# pydantic support load_dotenv,   so load_dotenv will be deprecated in the future.
+
+from qlib.finco.task import SummarizeTask
+from qlib.finco.workflow import WorkflowContextManager
+from qlib.finco.llm import APIBackend
+from qlib.finco.workflow import WorkflowManager
+
+load_dotenv(verbose=True, override=True)
+
+
+class TestSummarize(unittest.TestCase):
+
+    def test_chat(self):
+        messages = [
+            {
+                "role": "system",
+                "content": "Your are a professional financial assistant.",
+            },
+            {
+                "role": "user",
+                "content": "How to write a perfect quant strategy.",
+            },
+        ]
+        response = APIBackend().try_create_chat_completion(messages=messages)
+        print(response)
+
+    def test_execution(self):
+        task = SummarizeTask()
+        context = WorkflowContextManager()
+        context.set_context("workspace", "../../examples/benchmarks/Linear")
+        context.set_context("user_prompt", "My main focus is on the performance of the strategy's return."
+                                           "Please summarize the information and give me some advice.")
+        task.assign_context_manager(context)
+        resp = task.execute()
+        print(resp)
+
+    def test_generate_batch_result(self):
+        wm = WorkflowManager()
+
+        prompt = wm.default_user_prompt
+        # prompt = ""
+
+        workdir = os.path.dirname(wm.get_context().get_context("workspace"))
+        summaries_path = os.path.join(workdir, "summaries")
+
+        if not os.path.exists(summaries_path):
+            os.makedirs(summaries_path)
+
+        for i in range(10):
+            wm.run(prompt)
+            if os.path.exists(f"{workdir}/finCoReport.md"):
+                shutil.move(f"{workdir}/finCoReport.md", f"{workdir}/summaries/finCoReport{i}.md")
+
+    def test_parse2txt(self):
+        task = SummarizeTask()
+        resp = task.get_info_from_file("")
+        print(resp)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/finco/test_utils.py
+++ b/tests/finco/test_utils.py
@@ -0,0 +1,23 @@
+import unittest
+from qlib.finco.utils import SingletonBaseClass
+
+
+class SingletonTest(unittest.TestCase):
+
+    def test_singleton(self):
+        # self.assertEqual(self.to_str(data.tail()), self.to_str(res))
+        closure_checker = []
+
+        class A(SingletonBaseClass):
+
+            def __init__(self) -> None:
+                closure_checker.append(0)
+
+        A()
+        self.assertEqual(len(closure_checker), 1)
+        A()
+        self.assertEqual(len(closure_checker), 1)
+
+
+if __name__ == "__main__":
+    unittest.main()
Author	SHA1	Message	Date
Fivele-Li	753c272202	optimize log	2023-07-20 12:45:07 +08:00
Xu Yang	f93f331a3b	Merge pull request #1609 from microsoft/xuyang1/finetune_prompts finetune prompts	2023-07-19 20:01:07 +08:00
Xu Yang	561086d9e1	commit	2023-07-19 20:00:09 +08:00
Young	8eb129358b	Add prompt logger	2023-07-18 21:47:58 +08:00
Xu Yang	ce8cb517e9	hot fix one small bug in template	2023-07-18 11:52:43 +08:00
Xu Yang	1c5a73aa81	small refinement in finance knowledge	2023-07-17 21:33:40 +08:00
Xu Yang	d909d54362	Merge pull request #1603 from microsoft/xuyang1/add_idea_task add idea task and round1	2023-07-17 20:38:43 +08:00
Xu Yang	13c63eee0a	merge into one commit	2023-07-17 20:33:47 +08:00
you-n-g	b21e044513	Fix find class bug (#1601 )	2023-07-17 20:09:13 +08:00
Fivele-Li	8c1905d1d7	Optimize KnowledgeBase to complete workflow (#1598 ) * optimize KnowledgeBase to complete workflow; * Update Knowledge methods of handle data IO; * Update task to handle multi recorders; * Integrate Knowledge to workflow; * optimize KnowledgeBase to complete workflow * Update TrainTask & AnalyseTask's recorder method; * Update SummarizeTask; * Update Workflow & Topic prompt;	2023-07-17 18:17:04 +08:00
you-n-g	1c9841b15e	Connect TrainTask & Rolling & DDG-DA (#1599 ) * Connect train task to ddg-da & rolling * Pylint & black formatting * Formatting	2023-07-17 09:58:58 +08:00
Xu Yang	5e0873ca81	Merge pull request #1592 from Fivele-Li/update_knowledge_module update knowledge module;	2023-07-16 11:36:31 +08:00
Cadenza-Li	8a56cf69b4	add KnowledgeBase to workflow; * Update CMDTask prompt example for Windows OS; * Windows OS decode output of subprocess in gbk by default, specify encoding format explict; * Add KnowledgeBase's 4 knowledge types to corresponding task;	2023-07-14 22:25:43 +08:00
you-n-g	a19e616bc3	Update test_utils.py	2023-07-14 16:43:43 +08:00
Cadenza-Li	025859acba	Merge branch 'finco' into update_knowledge_module	2023-07-14 16:19:57 +08:00
Xu Yang	e5f685ce08	merge all commit (#1593 ) Co-authored-by: Xu Yang <xuyang1@microsoft.com>	2023-07-14 16:17:24 +08:00
Cadenza-Li	b9b6938e71	Merge branch 'finco' into update_knowledge_module	2023-07-14 14:20:21 +08:00
Young	51a9403b15	Merge remote-tracking branch 'origin/main' into finco	2023-07-14 12:16:51 +08:00
Cadenza-Li	37d83fd747	update knowledge module; * Knowledge.storage to storages list; * optimize Knowledge & Storage save and load method; * optimize Knowledge query prompt;	2023-07-13 17:20:22 +08:00
Cadenza-Li	d7ab6935dd	update knowledge module; * add storage class; * new practice,execute,finance,infrastructure knowledge; * add query method to KnowledgeBase;	2023-07-12 17:23:47 +08:00
Fivele-Li	effed382e9	Optimize prompt for entire learn loop (#1589 ) * Adjust prompt and fix cases * adjust summarizeTask & learn prompts; * fix typos & drop duplicate task method; * adjust learn prompts;	2023-07-11 18:13:52 +08:00
Fivele-Li	86ffd1799d	Add knowledge module and tune summarizeTask (#1582 ) * Add knowledge module * add KnowledgeExperiment add KnowledgeBase; * add knowledge associate prompts to template; * Add Topic class * add Topic to summarize knowledge; * add recorder's metric to summarizeTask; --------- Co-authored-by: Cadenza-Li <362237642@qq.com>	2023-07-06 11:39:36 +08:00
Young	aef11536e3	rename & test	2023-07-04 20:28:08 +08:00
Xu Yang	8b0fdf1623	Merge pull request #1581 from microsoft/xuyang1/fix_singleton_bug fix singleton bug	2023-07-04 16:51:51 +08:00
Xu Yang	9a36f8da20	fix singleton bug	2023-07-04 16:20:02 +08:00
Xu Yang	b7757d5008	Merge pull request #1580 from microsoft/xuyang1/refine_workflow_to_increase_success_rate refine workflow to increase success rate	2023-07-03 17:59:54 +08:00
Xu Yang	ee5e5cfdd8	remove useless code	2023-07-03 17:57:13 +08:00
Xu Yang	6cb87ecfd1	refine code to use qrun	2023-07-03 17:56:22 +08:00
Xu Yang	9119bcdd3c	Merge pull request #1576 from microsoft/xuyang1/add_config_and_code_dump_task refine workflow and prompts	2023-06-30 14:43:49 +08:00
Xu Yang	4fccf8112d	fix one workflow	2023-06-30 14:33:41 +08:00
Xu Yang	73bd79ca1a	merge into one commit	2023-06-30 14:23:40 +08:00
Fivele-Li	7e84f3aae2	Add backtest and backforward task (#1568 ) * * add TrainTask & BacktestTask; * add BackForwardTask; * adjust prompt_template.yaml which default config failed to backtest; * run workflow in loop * add update method to prompt_template.py * remove debug code * Adjust Learn Process * add LearnManager class & use LearnManager to update system prompt; * use qrun to replace recorder for training and backtesting; * Adjust analyser * analyser independent of recorder; * rename analyser's workspace attribution; * analyser load variable by recorder. --------- Co-authored-by: Cadenza-Li <362237642@qq.com>	2023-06-30 10:04:43 +08:00
Fivele-Li	1326ac614d	Add docs to context and retrieve (#1566 ) * add analyser docstring to context; * add retrieve method to context manager; * add notes to retrieve	2023-06-24 21:47:27 +08:00
Fivele-Li	f12184cc0f	Add analyser task and optimize interact (#1552 ) * * optimize interact * add AnalyserTask * optimize logger format and add render feature * format optimize	2023-06-16 11:42:45 +08:00
Xu Yang	a70386ad52	Merge pull request #1550 from microsoft/xuyang1/refine_task_prompts add datahandler and design action task according to component	2023-06-14 14:52:42 +08:00
Xu Yang	74619ed8d8	fix using defaut in record strategy and backtest	2023-06-14 14:52:16 +08:00
Fivele-Li	1a523df007	Optimize log and interact of FinCo (#1549 ) * use FinCoLog for a better interact experience * addition file changes * optimize format * optimize format	2023-06-14 14:48:17 +08:00
Xu Yang	f9cc8a5aaa	remove useless prompt	2023-06-14 10:46:38 +08:00
Xu Yang	7762c5a1fd	add datahandler and design action task according to component	2023-06-13 23:28:27 +08:00
Xu Yang	fa7ef29281	Merge pull request #1548 from microsoft/xuyang1/add_dump_to_file_task add simple readme & move prompt templates to outer yaml file to make the code clean	2023-06-13 15:29:13 +08:00
Xu Yang	429c9a7c66	format	2023-06-13 15:27:59 +08:00
Xu Yang	80fbc00792	move prompt templates to yaml file to make code clean	2023-06-13 15:21:19 +08:00
Xu Yang	01accec24c	update code	2023-06-12 16:25:16 +08:00
Fivele-Li	1d88830b0d	Add recorder task and visualize (#1542 ) * add recorder task * add batch generate summarize report unittest. * * add recorder to RecorderTask; * add matplot figure to analyzer.py * add image to markdown; * Add some log * update figure path. --------- Co-authored-by: Young <afe.young@gmail.com> Co-authored-by: Cadenza-Li <362237642@qq.com>	2023-06-12 15:48:00 +08:00
you-n-g	ad7498e287	Edit yaml task (#1538 ) * Edit yaml task * update comments	2023-06-02 00:44:41 +08:00
you-n-g	73d51f05b4	Init workspace and CMDTask (#1537 ) * Update setup.py and config * WIP * init_workspace and CMDTask * Delete test_sumarize.py	2023-06-01 23:32:35 +08:00
Fivele-Li	3b56b8e6c0	Optimize summarize task prompt and others (#1533 ) * 1.update prompt; 2.update fetch information method. * 1.update prompt; 2.save result to markdown; * 1.get context info from context_manager; 2.run the entire process successfully.	2023-06-01 21:22:24 +08:00
you-n-g	40e0c329ba	Add configurable dataset (#1535 )	2023-06-01 20:05:02 +08:00
Xu Yang	e376648860	Merge pull request #1536 from microsoft/xuyang1/add_debug_mode_to_save_cache add a debug mode to speed up debug process	2023-06-01 19:44:17 +08:00
Xu Yang	5f37f32184	update code	2023-06-01 19:38:26 +08:00
Xu Yang	d46b4c1ebf	Merge pull request #1534 from microsoft/xuyang1/add_code_implementation_task add code implementation task	2023-06-01 18:13:05 +08:00
Xu Yang	0515524b51	add code implementation code	2023-06-01 18:04:31 +08:00
Xu Yang	cda32d5703	Merge pull request #1532 from microsoft/xuyang1/add-plan-and-config-task-implementation add the initial version of plan and config task implementation	2023-06-01 11:20:04 +08:00
Xu Yang	e2332a004b	imporove some words in prompt	2023-06-01 01:09:14 +08:00
Xu Yang	08d9dbccc9	update v1 code containing SLplan and config action	2023-06-01 00:36:04 +08:00
Fivele-Li	e7cd93a36d	add base method for summarization; (#1530 )	2023-05-31 15:50:34 +08:00
Xu Yang	3919678028	split task into workflow and task to make the strcture more clear	2023-05-31 11:45:25 +08:00
Xu Yang	421b1403b2	Merge pull request #1528 from microsoft/xuyang1/refine_task_and_implement_workflow_task_as_example Xuyang1/refine task and implement workflow task as example	2023-05-31 11:36:36 +08:00
Xu Yang	94102fb742	remove tasktype variable	2023-05-31 11:35:54 +08:00
Cadenza-Li	74a5d7c8af	add parse method for summarization;	2023-05-31 00:08:21 +08:00
Xu Yang	ce39b4b6f8	add qlib auto init so logger can display info	2023-05-30 21:52:35 +08:00
Xu Yang	2af35d9c89	second commit	2023-05-30 20:20:16 +08:00
Xu Yang	f37643550b	first round	2023-05-30 20:19:58 +08:00
Xu Yang	55611aa43e	Merge pull request #1527 from microsoft/xuyang1/add_openai_api_support add openai interface support	2023-05-30 13:44:10 +08:00
Xu Yang	f24253efd2	add openai interface support	2023-05-30 13:42:01 +08:00
Young	7c4f3b8a7d	Initial interface for discussion	2023-05-24 12:18:31 +08:00