diff --git a/qlib/contrib/analyzer.py b/qlib/contrib/analyzer.py index ef63679b6..304a32fba 100644 --- a/qlib/contrib/analyzer.py +++ b/qlib/contrib/analyzer.py @@ -35,6 +35,8 @@ class AnalyzerTemp: class HFAnalyzer(AnalyzerTemp): """ This is the Signal Analysis class that generates the analysis results such as IC and IR. + + default output image filename is "HFAnalyzerTable.jpeg" """ def __init__(self, **kwargs): @@ -70,11 +72,14 @@ class HFAnalyzer(AnalyzerTemp): plt.scatter(np.arange(0, len(label)), label.iloc[:, 0]) plt.title("HFAnalyzer") plt.savefig(self.workspace.joinpath("HFAnalyzer.jpeg")) + return "HFAnalyzer.jpeg" class SignalAnalyzer(AnalyzerTemp): """ This is the Signal Analysis class that generates the analysis results such as IC and IR. + + default output image filename is "signalAnalysis.jpeg" """ def __init__(self, **kwargs): @@ -101,4 +106,4 @@ class SignalAnalyzer(AnalyzerTemp): plt.title("SignalAnalyzer") plt.savefig(self.workspace.joinpath("signalAnalysis.jpeg")) - return raw_label + return "signalAnalysis.jpeg" diff --git a/qlib/finco/task.py b/qlib/finco/task.py index 364fc5987..e35dc8e34 100644 --- a/qlib/finco/task.py +++ b/qlib/finco/task.py @@ -282,6 +282,12 @@ class AnalysisTask(Task): def __init__(self): super().__init__() + def assign_context_manager(self, context_manager): + # todo: add docstring to context temperature, perhaps store them in non runtime place is better. + self._context_manager = context_manager + for k, v in self.__ANALYZERS_DOCS.items(): + self._context_manager.set_context(k, v) + def execute(self): prompt = self.user.render( user_prompt=self._context_manager.get_context("user_prompt") @@ -325,7 +331,7 @@ class AnalysisTask(Task): for task in tasks: resp = task.analyse() - self._context_manager.set_context(task.__class__.__name__, resp) + self._context_manager.set_context(resp, task.__class__.__doc__) return [] @@ -582,9 +588,6 @@ class SummarizeTask(Task): def summarize(self) -> str: return "" - def interact(self) -> Any: - return - def get_info_from_file(self, path) -> List: """ read specific type of files under path @@ -594,11 +597,11 @@ class SummarizeTask(Task): for root, dirs, files in os.walk(path): for filename in files: file_path = os.path.join(root, filename) - file_list.append(file_path) + file_list.append(Path(file_path)) result = [] for file in file_list: - postfix = file.split(".")[-1] + postfix = file.name.split(".")[-1] if postfix in ["py", "log", "yaml"]: with open(file) as f: content = f.read() @@ -606,7 +609,8 @@ class SummarizeTask(Task): # in case of too large file # TODO: Perhaps summarization method instead of truncation would be a better approach result.append( - {"file": file, "content": content[: self.__MAX_LENGTH_OF_FILE]} + {"file": file.name, "content": content[: self.__MAX_LENGTH_OF_FILE], + "additional": self._context_manager.retrieve(file.name)} ) return result @@ -636,7 +640,9 @@ class SummarizeTask(Task): for filename in files: postfix = filename.split(".")[-1] if postfix in ["jpeg"]: - file_list.append(str(Path(self.workspace).joinpath(filename))) + description = self._context_manager.retrieve(filename) + file_list.append({"file_name": filename, "description": description, + "path": str(Path(self.workspace).joinpath(filename))}) return file_list def save_markdown(self, content: str): diff --git a/qlib/finco/utils.py b/qlib/finco/utils.py index 234ef0165..4741eb69a 100644 --- a/qlib/finco/utils.py +++ b/qlib/finco/utils.py @@ -1,5 +1,7 @@ import json +from fuzzywuzzy import fuzz + class Singleton: _instance = None @@ -17,3 +19,11 @@ def parse_json(response): pass raise Exception(f"Failed to parse response: {response}, please report it or help us to fix it.") + + +def similarity(text1, text2): + text1 = text1 if isinstance(text1, str) else "" + text2 = text2 if isinstance(text2, str) else "" + + # Maybe we can use other similarity algorithm such as tfidf + return fuzz.ratio(text1, text2) diff --git a/qlib/finco/workflow.py b/qlib/finco/workflow.py index 631a0e951..2aae166c3 100644 --- a/qlib/finco/workflow.py +++ b/qlib/finco/workflow.py @@ -1,10 +1,11 @@ import sys import copy -from pathlib import Path import shutil +from pathlib import Path from qlib.finco.task import WorkflowTask, PlanTask, ActionTask, SummarizeTask, RecorderTask, AnalysisTask from qlib.finco.log import FinCoLog, LogColors +from qlib.finco.utils import similarity class WorkflowContextManager: @@ -39,6 +40,17 @@ class WorkflowContextManager: """TODO: do we need to return a deep copy?""" return copy.deepcopy(self.context) + def retrieve(self, query: str) -> dict: + if query in self.context.keys(): + return {query: self.context.get(query)} + + # Note: retrieve information from context by string similarity maybe abandon in future + scores = {} + for k, v in self.context.items(): + scores.update({k: max(similarity(query, k), similarity(query, v))}) + max_score_key = max(scores, key=scores.get) + return {max_score_key: self.context.get(max_score_key)} + class WorkflowManager: """This manange the whole task automation workflow including tasks and actions""" diff --git a/setup.py b/setup.py index ca9873451..bf533cfe4 100644 --- a/setup.py +++ b/setup.py @@ -178,6 +178,8 @@ setup( "openapi", "pydantic", # Please add it to basic requirements after the design of pydantic is state. "python-dotenv", # I don't think this is necessary if we use pydantic. + "fuzzywuzzy", + "python-Levenshtein" # not necessary but accelerate fuzzywuzzy calculation ], }, include_package_data=True,