From 7c4f3b8a7d7d0374a03e88acabb4c6f8b12bc79b Mon Sep 17 00:00:00 2001
From: Young <afe.young@gmail.com>
Date: Wed, 24 May 2023 12:18:31 +0800
Subject: [PATCH] Initial interface for discussion

---
 qlib/finco/__init__.py                  |   0
 qlib/finco/cli.py                       |  11 +++
 qlib/finco/conf.py                      |   8 ++
 qlib/finco/llm.py                       |  14 +++
 qlib/finco/task.py                      | 120 ++++++++++++++++++++++++
 qlib/finco/tpls/README.md               |   6 ++
 qlib/finco/tpls/sl/workflow_config.yaml |  72 ++++++++++++++
 scripts/finco/README.md                 |  14 +++
 scripts/finco/cmd.sh                    |  15 +++
 scripts/finco/cridential.sh.example     |   3 +
 10 files changed, 263 insertions(+)
 create mode 100644 qlib/finco/__init__.py
 create mode 100644 qlib/finco/cli.py
 create mode 100644 qlib/finco/conf.py
 create mode 100644 qlib/finco/llm.py
 create mode 100644 qlib/finco/task.py
 create mode 100644 qlib/finco/tpls/README.md
 create mode 100644 qlib/finco/tpls/sl/workflow_config.yaml
 create mode 100644 scripts/finco/README.md
 create mode 100644 scripts/finco/cmd.sh
 create mode 100644 scripts/finco/cridential.sh.example

diff --git a/qlib/finco/__init__.py b/qlib/finco/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/qlib/finco/cli.py b/qlib/finco/cli.py
new file mode 100644
index 000000000..8947bc6dc
--- /dev/null
+++ b/qlib/finco/cli.py
@@ -0,0 +1,11 @@
+import fire
+from qlib.finco.task import WorkflowManager
+
+
+def main(prompt):
+    wm = WorkflowManager()
+    wm.run(prompt)
+
+
+if __name__ == "__main__":
+    fire.Fire(main)
diff --git a/qlib/finco/conf.py b/qlib/finco/conf.py
new file mode 100644
index 000000000..6ca90443e
--- /dev/null
+++ b/qlib/finco/conf.py
@@ -0,0 +1,8 @@
+# TODO: use pydantic for other modules in Qlib
+from pydantic import BaseSettings
+
+
+class Conf(BaseSettings):
+    """module specific settings."""
+
+    ...
diff --git a/qlib/finco/llm.py b/qlib/finco/llm.py
new file mode 100644
index 000000000..5e573a93a
--- /dev/null
+++ b/qlib/finco/llm.py
@@ -0,0 +1,14 @@
+import openai
+
+
+def example():
+    response = openai.ChatCompletion.create(
+        engine="gpt-35-turbo",  # The deployment name you chose when you deployed the ChatGPT or GPT-4 model.
+        # engine="gpt-4",  # NOTE: this raises this error: openai.error.RateLimitError: Requests to the Creates a completion for the chat message Operation under Azure OpenAI API version 2023-05-15 have exceeded call rate limit of your current OpenAI S0 pricing tier
+        # engine="gpt-4-32k",  # This works for only;
+        messages=[
+            {"role": "system", "content": "Assistant is a large language model trained by OpenAI."},
+            {"role": "user", "content": "Who were the founders of Microsoft?"},
+        ],
+    )
+    print(response)
diff --git a/qlib/finco/task.py b/qlib/finco/task.py
new file mode 100644
index 000000000..4a7f8415b
--- /dev/null
+++ b/qlib/finco/task.py
@@ -0,0 +1,120 @@
+from pathlib import Path
+from typing import Any, List
+from qlib.typehint import Literal
+
+
+class Task:
+    """
+    The user's intention, which was initially represented by a prompt, is achieved through a sequence of tasks.
+
+    Some thoughts:
+    - Do we have to split create a new concept of Action besides Task?
+        - Most actions directly modify the disk, with their interfaces taking in and outputting text. The LLM's interface similarly takes in and outputs text.
+        - Some actions will run some commands.
+
+    Maybe we can just categorizing tasks by following?
+    - Planning task (it is at a high level and difficult to execute directly; therefore, it should be further divided):
+    - Action Task
+        - CMD Task: it is expected to run a cmd
+        - Edit Task: it is supposed to edit the code base directly.
+    """
+
+    def __init__(self, context=None) -> None:
+        pass
+
+    def summarize(self) -> str:
+        """After the execution of the task, it is supposed to generated some context about the execution"""
+        return ""
+
+    def update_context(self, latest_context):
+        ...
+
+    def execution(self) -> Any:
+        """The execution results of the task"""
+        pass
+
+
+class PlanTask(Task):
+    def execute(self) -> List[Task]:
+        return []
+
+
+class WorkflowTask(PlanTask):
+    """make the choice which main workflow (RL, SL) will be used"""
+
+    def execute(self):
+        ...
+
+
+class SLTask(PlanTask):
+    def exeute(self):
+        """
+        return a list of interested tasks
+        Copy the template project maybe a part of the task
+        """
+        return []
+
+
+class ActionTask(Task):
+    def execute(self) -> Literal["fail", "success"]:
+        return "success"
+
+
+class WorkflowManager:
+    """This manange the whole task automation workflow including tasks and actions"""
+
+    def __init__(self, name="project", output_path=None) -> None:
+
+        if output_path is None:
+            self._output_path = Path.cwd() / name
+        else:
+            self._output_path = Path(output_path)
+        self._context = []
+
+    def add_context(self, task_res):
+        self._context.append(task_res)
+
+    def get_context(self):
+        """TODO: context manger?"""
+
+    def run(self, prompt: str) -> Path:
+        """
+        The workflow manager is supposed to generate a codebase based on the prompt
+
+        Parameters
+        ----------
+        prompt: str
+            the prompt user gives
+
+        Returns
+        -------
+        Path
+            The workflow manager is expected to produce output that includes a codebase containing generated code, results, and reports in a designated location.
+            The path is returned
+
+            The output path should follow a specific format:
+            - TODO: design
+              There is a summarized report where user can start from.
+        """
+
+        # NOTE: The following items are not designed to make the workflow very flexible.
+        # - The generated tasks can't be changed after geting new information from the execution retuls.
+        #   - But it is required in some cases, if we want to build a external dataset, it maybe have to plan like autogpt...
+
+        # NOTE: list may not be enough for general task list
+        task_list = [WorkflowTask(prompt)]
+        while len(task_list):
+            # task_list.ap
+            t = task_list.pop(0)
+            t.update_context(self.get_context())
+            res = t.execute()
+            if isinstance(t, PlanTask):
+                task_list.extend(res)
+            elif isinstance(t, ActionTask):
+                if res != "success":
+                    ...
+                    # TODO: handle the unexpected execution Error
+            else:
+                raise NotImplementedError("Unsupported action type")
+            self.add_context(t.summarize())
+        return self._output_path
diff --git a/qlib/finco/tpls/README.md b/qlib/finco/tpls/README.md
new file mode 100644
index 000000000..b7b74547a
--- /dev/null
+++ b/qlib/finco/tpls/README.md
@@ -0,0 +1,6 @@
+This is a set of templates that should be copied for a new project.
+
+
+
+# TODO
+- [ ] [Copier](https://copier.readthedocs.io/en/stable/#quick-start) may be useful if the generation process becomes complicated
diff --git a/qlib/finco/tpls/sl/workflow_config.yaml b/qlib/finco/tpls/sl/workflow_config.yaml
new file mode 100644
index 000000000..2d441dea9
--- /dev/null
+++ b/qlib/finco/tpls/sl/workflow_config.yaml
@@ -0,0 +1,72 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy
+        kwargs:
+            model: <MODEL> 
+            dataset: <DATASET>
+            topk: 50
+            n_drop: 5
+    backtest:
+        start_time: 2017-01-01
+        end_time: 2020-08-01
+        account: 100000000
+        benchmark: *benchmark
+        exchange_kwargs:
+            limit_threshold: 0.095
+            deal_price: close
+            open_cost: 0.0005
+            close_cost: 0.0015
+            min_cost: 5
+task:
+    model:
+        class: LGBModel
+        module_path: qlib.contrib.model.gbdt
+        kwargs:
+            loss: mse
+            colsample_bytree: 0.8879
+            learning_rate: 0.2
+            subsample: 0.8789
+            lambda_l1: 205.6999
+            lambda_l2: 580.9768
+            max_depth: 8
+            num_leaves: 210
+            num_threads: 20
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            model: <MODEL>
+            dataset: <DATASET>
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
diff --git a/scripts/finco/README.md b/scripts/finco/README.md
new file mode 100644
index 000000000..3e5b626c1
--- /dev/null
+++ b/scripts/finco/README.md
@@ -0,0 +1,14 @@
+
+
+# Requirements
+
+```
+pydantic
+openai
+```
+
+
+# TODOs
+
+- [ ] Select the appropriate LLM API
+  - Which API is more suitable for meeting our requirements - the original API or an alternative like LangChain?
diff --git a/scripts/finco/cmd.sh b/scripts/finco/cmd.sh
new file mode 100644
index 000000000..06175863d
--- /dev/null
+++ b/scripts/finco/cmd.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+set -x # show command
+set -e # Error on exception
+
+DIR="$(
+	cd "$(dirname "$(readlink -f "$0")")" || exit
+	pwd -P
+)"
+# --load the cridentials
+if [ -e $DIR/cridential.sh ]; then
+	source $DIR/cridential.sh
+fi
+
+# run the command
+python -m qlib.finco.cli "please help me build a low turnover strategy that focus more on longterm return"
diff --git a/scripts/finco/cridential.sh.example b/scripts/finco/cridential.sh.example
new file mode 100644
index 000000000..1ecf2ad17
--- /dev/null
+++ b/scripts/finco/cridential.sh.example
@@ -0,0 +1,3 @@
+export OPENAI_API_TYPE=azure  # This only necessary for Azure OpenAI
+export OPENAI_API_KEY=
+export OPENAI_API_BASE=