mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-01 18:11:18 +08:00
Add docs to context and retrieve (#1566)
* add analyser docstring to context; * add retrieve method to context manager; * add notes to retrieve
This commit is contained in:
@@ -35,6 +35,8 @@ class AnalyzerTemp:
|
||||
class HFAnalyzer(AnalyzerTemp):
|
||||
"""
|
||||
This is the Signal Analysis class that generates the analysis results such as IC and IR.
|
||||
|
||||
default output image filename is "HFAnalyzerTable.jpeg"
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
@@ -70,11 +72,14 @@ class HFAnalyzer(AnalyzerTemp):
|
||||
plt.scatter(np.arange(0, len(label)), label.iloc[:, 0])
|
||||
plt.title("HFAnalyzer")
|
||||
plt.savefig(self.workspace.joinpath("HFAnalyzer.jpeg"))
|
||||
return "HFAnalyzer.jpeg"
|
||||
|
||||
|
||||
class SignalAnalyzer(AnalyzerTemp):
|
||||
"""
|
||||
This is the Signal Analysis class that generates the analysis results such as IC and IR.
|
||||
|
||||
default output image filename is "signalAnalysis.jpeg"
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
@@ -101,4 +106,4 @@ class SignalAnalyzer(AnalyzerTemp):
|
||||
plt.title("SignalAnalyzer")
|
||||
plt.savefig(self.workspace.joinpath("signalAnalysis.jpeg"))
|
||||
|
||||
return raw_label
|
||||
return "signalAnalysis.jpeg"
|
||||
|
||||
@@ -282,6 +282,12 @@ class AnalysisTask(Task):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def assign_context_manager(self, context_manager):
|
||||
# todo: add docstring to context temperature, perhaps store them in non runtime place is better.
|
||||
self._context_manager = context_manager
|
||||
for k, v in self.__ANALYZERS_DOCS.items():
|
||||
self._context_manager.set_context(k, v)
|
||||
|
||||
def execute(self):
|
||||
prompt = self.user.render(
|
||||
user_prompt=self._context_manager.get_context("user_prompt")
|
||||
@@ -325,7 +331,7 @@ class AnalysisTask(Task):
|
||||
|
||||
for task in tasks:
|
||||
resp = task.analyse()
|
||||
self._context_manager.set_context(task.__class__.__name__, resp)
|
||||
self._context_manager.set_context(resp, task.__class__.__doc__)
|
||||
|
||||
return []
|
||||
|
||||
@@ -582,9 +588,6 @@ class SummarizeTask(Task):
|
||||
def summarize(self) -> str:
|
||||
return ""
|
||||
|
||||
def interact(self) -> Any:
|
||||
return
|
||||
|
||||
def get_info_from_file(self, path) -> List:
|
||||
"""
|
||||
read specific type of files under path
|
||||
@@ -594,11 +597,11 @@ class SummarizeTask(Task):
|
||||
for root, dirs, files in os.walk(path):
|
||||
for filename in files:
|
||||
file_path = os.path.join(root, filename)
|
||||
file_list.append(file_path)
|
||||
file_list.append(Path(file_path))
|
||||
|
||||
result = []
|
||||
for file in file_list:
|
||||
postfix = file.split(".")[-1]
|
||||
postfix = file.name.split(".")[-1]
|
||||
if postfix in ["py", "log", "yaml"]:
|
||||
with open(file) as f:
|
||||
content = f.read()
|
||||
@@ -606,7 +609,8 @@ class SummarizeTask(Task):
|
||||
# in case of too large file
|
||||
# TODO: Perhaps summarization method instead of truncation would be a better approach
|
||||
result.append(
|
||||
{"file": file, "content": content[: self.__MAX_LENGTH_OF_FILE]}
|
||||
{"file": file.name, "content": content[: self.__MAX_LENGTH_OF_FILE],
|
||||
"additional": self._context_manager.retrieve(file.name)}
|
||||
)
|
||||
|
||||
return result
|
||||
@@ -636,7 +640,9 @@ class SummarizeTask(Task):
|
||||
for filename in files:
|
||||
postfix = filename.split(".")[-1]
|
||||
if postfix in ["jpeg"]:
|
||||
file_list.append(str(Path(self.workspace).joinpath(filename)))
|
||||
description = self._context_manager.retrieve(filename)
|
||||
file_list.append({"file_name": filename, "description": description,
|
||||
"path": str(Path(self.workspace).joinpath(filename))})
|
||||
return file_list
|
||||
|
||||
def save_markdown(self, content: str):
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import json
|
||||
|
||||
from fuzzywuzzy import fuzz
|
||||
|
||||
|
||||
class Singleton:
|
||||
_instance = None
|
||||
@@ -17,3 +19,11 @@ def parse_json(response):
|
||||
pass
|
||||
|
||||
raise Exception(f"Failed to parse response: {response}, please report it or help us to fix it.")
|
||||
|
||||
|
||||
def similarity(text1, text2):
|
||||
text1 = text1 if isinstance(text1, str) else ""
|
||||
text2 = text2 if isinstance(text2, str) else ""
|
||||
|
||||
# Maybe we can use other similarity algorithm such as tfidf
|
||||
return fuzz.ratio(text1, text2)
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
import sys
|
||||
import copy
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
from qlib.finco.task import WorkflowTask, PlanTask, ActionTask, SummarizeTask, RecorderTask, AnalysisTask
|
||||
from qlib.finco.log import FinCoLog, LogColors
|
||||
from qlib.finco.utils import similarity
|
||||
|
||||
|
||||
class WorkflowContextManager:
|
||||
@@ -39,6 +40,17 @@ class WorkflowContextManager:
|
||||
"""TODO: do we need to return a deep copy?"""
|
||||
return copy.deepcopy(self.context)
|
||||
|
||||
def retrieve(self, query: str) -> dict:
|
||||
if query in self.context.keys():
|
||||
return {query: self.context.get(query)}
|
||||
|
||||
# Note: retrieve information from context by string similarity maybe abandon in future
|
||||
scores = {}
|
||||
for k, v in self.context.items():
|
||||
scores.update({k: max(similarity(query, k), similarity(query, v))})
|
||||
max_score_key = max(scores, key=scores.get)
|
||||
return {max_score_key: self.context.get(max_score_key)}
|
||||
|
||||
|
||||
class WorkflowManager:
|
||||
"""This manange the whole task automation workflow including tasks and actions"""
|
||||
|
||||
2
setup.py
2
setup.py
@@ -178,6 +178,8 @@ setup(
|
||||
"openapi",
|
||||
"pydantic", # Please add it to basic requirements after the design of pydantic is state.
|
||||
"python-dotenv", # I don't think this is necessary if we use pydantic.
|
||||
"fuzzywuzzy",
|
||||
"python-Levenshtein" # not necessary but accelerate fuzzywuzzy calculation
|
||||
],
|
||||
},
|
||||
include_package_data=True,
|
||||
|
||||
Reference in New Issue
Block a user