Add docs to context and retrieve (#1566)

* add analyser docstring to context; * add retrieve method to context manager; * add notes to retrieve
2026-07-01 18:11:18 +08:00 · 2023-06-24 21:47:27 +08:00
parent f12184cc0f
commit 1326ac614d
5 changed files with 45 additions and 10 deletions
--- a/qlib/contrib/analyzer.py
+++ b/qlib/contrib/analyzer.py
@@ -35,6 +35,8 @@ class AnalyzerTemp:
 class HFAnalyzer(AnalyzerTemp):
    """
    This is the Signal Analysis class that generates the analysis results such as IC and IR.
+
+    default output image filename is "HFAnalyzerTable.jpeg"
    """

    def __init__(self, **kwargs):
@@ -70,11 +72,14 @@ class HFAnalyzer(AnalyzerTemp):
        plt.scatter(np.arange(0, len(label)), label.iloc[:, 0])
        plt.title("HFAnalyzer")
        plt.savefig(self.workspace.joinpath("HFAnalyzer.jpeg"))
+        return "HFAnalyzer.jpeg"


 class SignalAnalyzer(AnalyzerTemp):
    """
    This is the Signal Analysis class that generates the analysis results such as IC and IR.
+
+    default output image filename is "signalAnalysis.jpeg"
    """

    def __init__(self, **kwargs):
@@ -101,4 +106,4 @@ class SignalAnalyzer(AnalyzerTemp):
        plt.title("SignalAnalyzer")
        plt.savefig(self.workspace.joinpath("signalAnalysis.jpeg"))

-        return raw_label
+        return "signalAnalysis.jpeg"
--- a/qlib/finco/task.py
+++ b/qlib/finco/task.py
@@ -282,6 +282,12 @@ class AnalysisTask(Task):
    def __init__(self):
        super().__init__()

+    def assign_context_manager(self, context_manager):
+        # todo: add docstring to context temperature, perhaps store them in non runtime place is better.
+        self._context_manager = context_manager
+        for k, v in self.__ANALYZERS_DOCS.items():
+            self._context_manager.set_context(k, v)
+
    def execute(self):
        prompt = self.user.render(
            user_prompt=self._context_manager.get_context("user_prompt")
@@ -325,7 +331,7 @@ class AnalysisTask(Task):

            for task in tasks:
                resp = task.analyse()
-                self._context_manager.set_context(task.__class__.__name__, resp)
+                self._context_manager.set_context(resp, task.__class__.__doc__)

        return []

@@ -582,9 +588,6 @@ class SummarizeTask(Task):
    def summarize(self) -> str:
        return ""

-    def interact(self) -> Any:
-        return
-
    def get_info_from_file(self, path) -> List:
        """
        read specific type of files under path
@@ -594,11 +597,11 @@ class SummarizeTask(Task):
        for root, dirs, files in os.walk(path):
            for filename in files:
                file_path = os.path.join(root, filename)
-                file_list.append(file_path)
+                file_list.append(Path(file_path))

        result = []
        for file in file_list:
-            postfix = file.split(".")[-1]
+            postfix = file.name.split(".")[-1]
            if postfix in ["py", "log", "yaml"]:
                with open(file) as f:
                    content = f.read()
@@ -606,7 +609,8 @@ class SummarizeTask(Task):
                    # in case of too large file
                    # TODO: Perhaps summarization method instead of truncation would be a better approach
                    result.append(
-                        {"file": file, "content": content[: self.__MAX_LENGTH_OF_FILE]}
+                        {"file": file.name, "content": content[: self.__MAX_LENGTH_OF_FILE],
+                         "additional": self._context_manager.retrieve(file.name)}
                    )

        return result
@@ -636,7 +640,9 @@ class SummarizeTask(Task):
            for filename in files:
                postfix = filename.split(".")[-1]
                if postfix in ["jpeg"]:
-                    file_list.append(str(Path(self.workspace).joinpath(filename)))
+                    description = self._context_manager.retrieve(filename)
+                    file_list.append({"file_name": filename, "description": description,
+                                      "path": str(Path(self.workspace).joinpath(filename))})
        return file_list

    def save_markdown(self, content: str):
--- a/qlib/finco/utils.py
+++ b/qlib/finco/utils.py
@@ -1,5 +1,7 @@
 import json

+from fuzzywuzzy import fuzz
+

 class Singleton:
    _instance = None
@@ -17,3 +19,11 @@ def parse_json(response):
        pass

    raise Exception(f"Failed to parse response: {response}, please report it or help us to fix it.")
+
+
+def similarity(text1, text2):
+    text1 = text1 if isinstance(text1, str) else ""
+    text2 = text2 if isinstance(text2, str) else ""
+
+    # Maybe we can use other similarity algorithm such as tfidf
+    return fuzz.ratio(text1, text2)
--- a/qlib/finco/workflow.py
+++ b/qlib/finco/workflow.py
@@ -1,10 +1,11 @@
 import sys
 import copy
-from pathlib import Path
 import shutil
+from pathlib import Path

 from qlib.finco.task import WorkflowTask, PlanTask, ActionTask, SummarizeTask, RecorderTask, AnalysisTask
 from qlib.finco.log import FinCoLog, LogColors
+from qlib.finco.utils import similarity


 class WorkflowContextManager:
@@ -39,6 +40,17 @@ class WorkflowContextManager:
        """TODO: do we need to return a deep copy?"""
        return copy.deepcopy(self.context)

+    def retrieve(self, query: str) -> dict:
+        if query in self.context.keys():
+            return {query: self.context.get(query)}
+
+        # Note: retrieve information from context by string similarity maybe abandon in future
+        scores = {}
+        for k, v in self.context.items():
+            scores.update({k: max(similarity(query, k), similarity(query, v))})
+        max_score_key = max(scores, key=scores.get)
+        return {max_score_key: self.context.get(max_score_key)}
+

 class WorkflowManager:
    """This manange the whole task automation workflow including tasks and actions"""
--- a/setup.py
+++ b/setup.py
@@ -178,6 +178,8 @@ setup(
            "openapi",
            "pydantic",  # Please add it to basic requirements after the design of pydantic is state.
            "python-dotenv",  # I don't think this is necessary if we use pydantic.
+            "fuzzywuzzy",
+            "python-Levenshtein"    # not necessary but accelerate fuzzywuzzy calculation
        ],
    },
    include_package_data=True,