From 8a56cf69b4bad651339bf313529e95bf713db168 Mon Sep 17 00:00:00 2001 From: Cadenza-Li <128388363+Fivele-Li@users.noreply.github.com> Date: Fri, 14 Jul 2023 22:25:43 +0800 Subject: [PATCH] add KnowledgeBase to workflow; * Update CMDTask prompt example for Windows OS; * Windows OS decode output of subprocess in gbk by default, specify encoding format explict; * Add KnowledgeBase's 4 knowledge types to corresponding task; --- qlib/finco/knowledge.py | 10 +++++-- qlib/finco/prompt_template.yaml | 6 ++++ qlib/finco/task.py | 53 ++++++++++++++++++++++++++------- 3 files changed, 56 insertions(+), 13 deletions(-) diff --git a/qlib/finco/knowledge.py b/qlib/finco/knowledge.py index 6920b6295..7a8633e56 100644 --- a/qlib/finco/knowledge.py +++ b/qlib/finco/knowledge.py @@ -223,9 +223,10 @@ class FinanceKnowledge(Knowledge): def __init__(self, storages: Union[List[YamlStorage], YamlStorage]): super().__init__(storages=storages, name="finance") - docs = self.read_files_in_directory(self.workdir.joinpath(self.name)) - self.add(docs) - self.summarize() + storage = self.get_storage(YamlStorage.DEFAULT_NAME) + if len(storage.documents) == 0: + docs = self.read_files_in_directory(self.workdir.joinpath(self.name)) + self.add(docs) def add(self, docs: List): storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(YamlStorage.DEFAULT_NAME)) @@ -438,6 +439,9 @@ class KnowledgeBase: # literal search/semantic search knowledge = self.get_knowledge(knowledge_type=knowledge_type) + if len(knowledge) == 0: + return "" + scores = [] for k in knowledge: scores.append(similarity(str(k), content)) diff --git a/qlib/finco/prompt_template.yaml b/qlib/finco/prompt_template.yaml index 2ae4bba41..b15bf8a9c 100644 --- a/qlib/finco/prompt_template.yaml +++ b/qlib/finco/prompt_template.yaml @@ -216,6 +216,12 @@ CMDTask_system : |- Example output: cp -r a/b/c d/e/f + Example input: + - User intention: Copy the folder from a/b/c to d/e/f + - User OS: Windows + Example output: + xcopy /Y /f a/b/c d/e/f + CMDTask_user : |- Example input: - User intention: "{{cmd_intention}}" diff --git a/qlib/finco/task.py b/qlib/finco/task.py index 7951408fe..f92cbed02 100644 --- a/qlib/finco/task.py +++ b/qlib/finco/task.py @@ -9,6 +9,7 @@ import re import subprocess import platform import inspect +from jinja2 import Template from qlib.finco.llm import APIBackend from qlib.finco.tpl import get_tpl_path @@ -17,6 +18,7 @@ from qlib.contrib.analyzer import HFAnalyzer, SignalAnalyzer from qlib.workflow import R from qlib.finco.log import FinCoLog, LogColors from qlib.finco.conf import Config +from qlib.finco.knowledge import KnowledgeBase, Topic COMPONENT_LIST = ["Dataset", "DataHandler", "Model", "Record", "Strategy", "Backtest"] @@ -176,8 +178,14 @@ class HighLevelPlanTask(PlanTask): assert thinking_detail is not None, "The thinking detail is not provided" assert user_intention is not None, "The user intention is not provided" + practice_knowledge = KnowledgeBase().query(knowledge_type=KnowledgeBase.KT_PRACTICE, content=user_intention) + finance_knowledge = KnowledgeBase().query(knowledge_type=KnowledgeBase.KT_FINANCE, content=user_intention) + system_prompt = self.system.render() - user_prompt = self.user.render(target=target, deliverable=deliverable, business_level=business_level, algorithm_level=algorithm_level, thinking_detail=thinking_detail, user_intention=user_intention) + user_prompt = self.user.render(target=target, deliverable=deliverable, business_level=business_level, + algorithm_level=algorithm_level, thinking_detail=thinking_detail, + practice_knowledge=practice_knowledge, finance_knowledge=finance_knowledge, + user_intention=user_intention) response = APIBackend().build_messages_and_create_chat_completion( user_prompt, system_prompt @@ -229,8 +237,14 @@ class SLPlanTask(PlanTask): experiment_count = max([i for i in range(10) if f"{i}." in experiments]) + infrastructure_knowledge = KnowledgeBase().query(knowledge_type=KnowledgeBase.KT_INFRASTRUCTURE, + content=experiments) + system_prompt = self.system.render() - user_prompt = self.user.render(target=target, deliverable=deliverable, business_level=business_level, algorithm_level=algorithm_level, thinking_detail=thinking_detail, user_intention=user_intention, experiments=experiments) + user_prompt = self.user.render(target=target, deliverable=deliverable, business_level=business_level, + algorithm_level=algorithm_level, thinking_detail=thinking_detail, + infrastructure_knowledge=infrastructure_knowledge, + user_intention=user_intention, experiments=experiments) former_messages = [] if self.replan: @@ -341,11 +355,14 @@ class TrainTask(Task): try: # Run the command and capture the output workspace = self._context_manager.get_context("workspace") - result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True, cwd=str(workspace)) + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, + text=True, encoding="utf8", cwd=str(workspace)) except subprocess.CalledProcessError as e: print(f"An error occurred while running the subprocess: {e.stderr} {e.stdout}") real_error = e.stderr+e.stdout + KnowledgeBase().execute_knowledge.add([real_error]) + if "data" in e.stdout.lower() or "handler" in e.stdout.lower(): return [HyperparameterActionTask("Dataset", regenerate=True, error=real_error), HyperparameterActionTask("DataHandler", regenerate=True, error=real_error), @@ -432,11 +449,9 @@ class AnalysisTask(Task): else "workflow_config.yaml" ) workspace = self._context_manager.get_context("workspace") - workflow_path = workspace.joinpath(workflow_config) - with workflow_path.open() as f: - workflow = yaml.safe_load(f) - experiment_name = workflow["experiment_name"] if "experiment_name" in workflow else "workflow" + # todo: analysis multi experiment(get recorder by id) + experiment_name = "workflow" R.set_uri(Path.joinpath(workspace, 'mlruns').as_uri()) tasks = [] @@ -650,11 +665,19 @@ class HyperparameterActionTask(ActionTask): hyperparameters.remove("dataset") hyperparameters.remove("recorder") target_component_classes_and_hyperparameters.append((module_path, class_name, hyperparameters)) + + execute_knowledge = KnowledgeBase().query(knowledge_type=KnowledgeBase.KT_EXECUTE, + content=target_component_plan) + infrastructure_knowledge = KnowledgeBase().query(knowledge_type=KnowledgeBase.KT_INFRASTRUCTURE, + content=target_component_plan) + user_prompt = self.user.render( user_requirement=user_prompt, target_component_plan=target_component_plan, target_component=self.target_component, - target_component_classes_and_hyperparameters=target_component_classes_and_hyperparameters + target_component_classes_and_hyperparameters=target_component_classes_and_hyperparameters, + execute_knowledge=execute_knowledge, + infrastructure_knowledge=infrastructure_knowledge ) former_messages = [] if self.regenerate: @@ -987,7 +1010,9 @@ class SummarizeTask(Task): file_info = self.get_info_from_file(workspace) context_info = self.get_info_from_context() # too long context make response unstable. - record_info = self.get_info_from_recorder(workspace, workflow_yaml["experiment_name"]) + + # todo: experiments perhaps have the same name, summarize experiment by loop + record_info = self.get_info_from_recorder(workspace, "workflow") figure_path = self.get_figure_path(workspace) information = context_info + file_info + record_info @@ -1012,7 +1037,7 @@ class SummarizeTask(Task): ) context_summary.update({key: response}) - recorder = R.get_recorder(experiment_name=workflow_yaml["experiment_name"]) + recorder = R.get_recorder(experiment_name="workflow") recorder.save_objects(context_summary=context_summary) prompt_workflow_selection = self.summarize_metrics_user.render( @@ -1029,6 +1054,14 @@ class SummarizeTask(Task): user_prompt=prompt_workflow_selection, system_prompt=self.system.render() ) + KnowledgeBase().practice_knowledge.add([{"user_intention": user_prompt, + "experiment_metrics": metrics_response}]) + + # notes: summarize after all experiment added to KnowledgeBase + topic = Topic(name="rollingModel", describe=Template("What conclusion can you draw")) + topic.summarize(KnowledgeBase().practice_knowledge.knowledge) + self.logger.info(f"Summary of topic: {topic.name}: {topic.knowledge}") + self._context_manager.set_context("summary", response) self.save_markdown(content=response, path=workspace) self.logger.info(f"Report has saved to {self.__DEFAULT_REPORT_NAME}", title="End")