1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 14:01:28 +08:00

Merge pull request #1609 from microsoft/xuyang1/finetune_prompts

finetune prompts
This commit is contained in:
Xu Yang
2023-07-19 20:01:07 +08:00
committed by GitHub
5 changed files with 211 additions and 115 deletions

View File

@@ -238,10 +238,6 @@ class FinanceKnowledge(Knowledge):
storage = self.get_storage(YamlStorage.DEFAULT_NAME)
if len(storage.documents) == 0:
docs = self.read_files_in_directory(self.workdir.joinpath(self.name))
docs.extend([
{"content": "[Success]: XXXX, the results looks reasonable # Keywords: supervised learning, data"},
{"content": "[Fail]: XXXX, it raise memory error due to YYYYY "
"# Keywords: supervised learning, data"}])
self.add(docs)
self.summarize()
@@ -378,20 +374,27 @@ class InfrastructureKnowledge(Knowledge):
class Topic:
def __init__(self, name: str, describe: Template):
def __init__(self, name: str, system: Template, user: Template):
self.name = name
self.describe = describe
self.system_prompt_template = system
self.user_prompt_template = user
self.docs = []
self.knowledge = None
self.logger = FinCoLog()
def summarize(self, docs: list):
self.logger.info(f"Summarize Topic \nname: {self.name}\ndescribe: {self.describe.module}")
prompt_workflow_selection = self.describe.render(docs=docs)
response = APIBackend().build_messages_and_create_chat_completion(user_prompt=prompt_workflow_selection)
def summarize(self, practice_knowlege, user_intention, target, diffrence, target_metrics):
system_prompt = self.system_prompt_template.render(topic=self.name)
user_prompt = self.user_prompt_template.render(
experiment_1_info = practice_knowlege[0],
experiment_2_info = practice_knowlege[1],
user_intention=user_intention,
target=target,
diffrence=diffrence,
target_metrics=target_metrics)
response = APIBackend().build_messages_and_create_chat_completion(user_prompt=user_prompt, system_prompt=system_prompt)
self.knowledge = response
self.docs = docs
self.docs = practice_knowlege
self.logger.info(f"Summary of {self.name}:\n{self.knowledge}")
@@ -483,27 +486,48 @@ class KnowledgeBase(SingletonBaseClass):
# literal search/semantic search
knowledge = self.get_knowledge(knowledge_type=knowledge_type)
if len(knowledge) == 0:
if len(knowledge) == 0 or knowledge_type == "infrastructure":
return ""
if knowledge_type == "practice":
knowledge = [line for line in knowledge if line.startswith("practice_knowledge on")]
scores = []
for k in knowledge:
scores.append(similarity(str(k), content))
sorted_indexes = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
similar_n_indexes = sorted_indexes[:n]
similar_n_docs = [knowledge[i] for i in similar_n_indexes]
similar_n_docs = "\n".join([knowledge[i] for i in similar_n_indexes])
prompt = Template(
user_prompt_template = Template(
"""
find the most relevant doc with this query: '{{content}}' from docs='{{docs}}'.
Just return the most relevant item I provided, no more explain.
please treat the docs as sentences and always response no less than 5 relevant sentences.
List all the relevant sentences in number index without any interaction and conversation.
query: '{{query}}'
paragraph:
{{paragraph}}.
"""
)
prompt_workflow_selection = prompt.render(content=content, docs=similar_n_docs)
user_prompt = user_prompt_template.render(query=content, paragraph=similar_n_docs)
system_prompt = """
You are an assistant who find relevant sentences from a long paragraph to fit user's query sentence. Relevant means the sentence might provide userful information to explain user's query sentence. People after reading the relevant sentences might have a better understanding of the query sentence.
Please response no less than ten sentences, if paragraph is not enough, you can return less than ten. Don't pop out irrelevant sentences. Please list the sentences in a number index instead of a whole paragraph.
Example input:
query: what is the best model for image classification?
paragraph:
Image classification is the process of identifying and categorizing objects within an image into different groups or classes.
Machine learning is a type of artificial intelligence that enables computers to learn and make decisions without being explicitly programmed.
The solar system is a collection of celestial bodies, including the Sun, planets, moons, and other objects, that orbit around the Sun due to its gravitational pull.
A car is a wheeled vehicle, typically powered by an engine or electric motor, used for transportation of people and goods.
ResNet, short for Residual Network, is a type of deep learning architecture designed to improve the accuracy and training speed of neural networks for image recognition tasks.
Example output:
1. ResNet, short for Residual Network, is a type of deep learning architecture designed to improve the accuracy and training speed of neural networks for image recognition tasks.
2. Image classification is the process of identifying and categorizing objects within an image into different groups or classes.
3. Machine learning is a type of artificial intelligence that enables computers to learn and make decisions without being explicitly programmed.
"""
response = APIBackend().build_messages_and_create_chat_completion(
user_prompt=prompt_workflow_selection, system_prompt="You are an excellent assistant."
user_prompt=user_prompt, system_prompt=system_prompt
)
return response

View File

@@ -26,7 +26,6 @@ class ConvManager:
def _rotate_files(self):
pairs = []
for f in self.path.glob("*.json"):
print(f)
m = re.match(r"(\d+).json", f.name)
if m is not None:
n = int(m.group(1))

View File

@@ -37,26 +37,19 @@ IdeaTask_system : |-
Rolling or DDGDA is a kind of data controller which applys custom weight to data in time dimention. So set Data as target module when rolling or DDGDA is used in business level. Never do research both on rolling or DDGDA.
We often use linear model as default model supervised learning because it trains very fast. If the user didn't plan to achieve very high accuracy, use default model and datahandler is a good choice to save time.
User will tell you the knowledge type and content in the conversation, if user said "following lists the {practice or finance} knowledge:", you should memorize and understand them then answer "OK" without any other words, finally, user will tell you the research intention, you should answer exactly the same format as the input without any interaction or conversation.
User will tell you the type and content of knowledge and the research intention, you should answer exactly the same format as the input without any interaction or conversation.
Example input:
Input 1:
following lists the practice knowledge:
Output 1:
OK
Input 2:
following lists the finance knowledge:
Output 2:
OK
Input 3:
Research intention:
build an US stock market daily portfolio in quantitative investment and maximize the excess return.
Output 3:
Research intention: build an US stock market daily portfolio in quantitative investment and maximize the excess return.
Example output:
Target: maximize the excess return
Deliverables: a daily quantitative investment strategy in US stock market. A model will be included in the strategy.
Thinking directions:
@@ -68,7 +61,11 @@ IdeaTask_system : |-
Because the user wants to maximize the excess return and more complicated model often extracts more deep pattern from the data. So try a more complicated DNN model to get more excess return than a simple linear model.
IdeaTask_user : |-
pass
following lists the practice knowledge:
{{ practice_knowledge }}
following lists the finance knowledge:
{{ finance_knowledge }}
Research intention: {{ user_intention }}
HighLevelPlanTask_system: |-
You are an Quant investment Research and development assistant whose job is to determine high level plans to testify user's research intention.
@@ -1057,13 +1054,29 @@ ImplementActionTask_user : |-
target component: {{target_component}}
SummarizeTask_system : |-
You are an expert in quant domain.
Your task is to help user to analysis the output of qlib, your main focus is on the backtesting metrics of
user strategies. Warnings reported during runtime can be ignored if deemed appropriate.
your information including the strategy's backtest log and runtime log.
You may receive some scripts of the codes as well, you can use them to analysis the output.
At the same time, you can also use your knowledge of the Microsoft/Qlib project and finance to complete your tasks.
If there are any abnormal areas in the log or scripts, please also point them out.
You are an expert in quant domain. Your task is to help user to analyze the output of two experiments in Qlib, your main focus is on the backtesting metrics of user strategies.
User has conducted two experiments, which differs only in very small part.
On each experiment, user will give you:
1. user's intention why doint these experiments
2. The id to differ the experiments
3. The yaml config of the experiment
4. A small description of the experiment
5. the backtest metrics of the experiment
Finally, user will tell you the targte of doing these experiments, difference between the two experiments and target metrics from the user.
User will provide a figure path which user has generated some images, please include them in your report.
You should understand user's intention and target, compare the relevant metrics of the two experiments based on user's intention, give conclusion to the target.
Please make a table to compare the metrics of two experiments, and make it easy to rean like calculating some increase or highlighting some key metrics.
You should make summarizations to each experiments, conclusions and recommendations to the further reseach experiments to the user and you should make the report longer.
Notice:
1. max_drawdown might be presented in negative number or positive number, better max_drawdown (also known lower max_drawdown) means the abstract of it is small, so don't compare each max_drawdown with the number, use the abstract of it instead. This is very important because misunderstanding might cause totally wrong conclusion!!!
2. try not to say two experiments performs similar because small progress also means better, even two experiments performs similar, you should still point out who is better.
Example output 1:
The matrix in log shows that your strategy's max draw down is a bit large, based on your annualized return,
@@ -1103,8 +1116,19 @@ SummarizeTask_system : |-
Don't list data user doesn't provide.
SummarizeTask_user : |-
Here is my information: '{{information}}'
My intention is: {{user_intention}}. Please provide me with a summary and recommendation based on my intention and the information I have provided. There are some figures which absolute path are: {{figure_path}}, You must display these images in markdown using the appropriate image format.
Here is my results on two experiments:
experiment 1:
{{experiment_1_info}}
experiment 2:
{{experiment_2_info}}
target:
{{ target }}
difference:
{{ difference }}
target metrics:
{{ target_metrics }}
My intention is: {{user_intention}}.
Please provide me with a summary and recommendation based on my intention and the information I have provided. There are some figures which absolute path are: {{figure_path}}, You must display these images in markdown using the appropriate image format.
SummarizeTask_context_system : |-
Your purpose is to find out the important information offered by user. You can just show the data provided by user in markdown format.
@@ -1131,11 +1155,38 @@ LearnManager_user : |-
If you have no idea how to optimize the system prompt, you can just return the original system prompt.
you will adjust {{task}}'s system prompt to:
Topic_IC : |-
Summarize the influence of parameters on IC: {{docs}}. (Example response: Max draw-down become larger over time)
Topic_user : |-
experiment 1:
{{experiment_1_info}}
experiment 2:
{{experiment_2_info}}
target:
{{ target }}
difference:
{{ difference }}
target metrics:
{{ target_metrics }}
My intention is: {{user_intention}}.
Topic_MaxDropDown : |-
Summarize the influence of parameters on max dropdown: {{docs}}. (Example response: Max draw-down become larger over time)
Topic_system : |-
Your job is to summarize the influence of parameters on max dropdown.
Topic_RollingModel : |-
What conclusion can you draw from: {{docs}}. Answer questions as concisely as possible. (Example response: rolling model is good at making the Max draw-down smaller.)
User has conducted two experiments, which differs only in very small part.
On each experiment, user will give you:
1. user's intention why doint these experiments
2. The id to differ the experiments
3. The yaml config of the experiment
4. A small description of the experiment
5. the backtest metrics of the experiment
Finally, user will tell you the targte of doing these experiments, difference between the two experiments and target metrics from the user.
You should compare the metrics of two experiments and give a conclusion on the effection of the difference of the experiments on the topic {{ topic }}.
Notice: max_drawdown might be presented in negative number or positive number, better max_drawdown means the abstract of it is small, so don't compare each max_drawdown with the number, use the abstract of it instead. This is very important because misunderstanding might cause totally wrong conclusion!!!
After that, you should give a small explanation to your conclusion.
Example output format:
rolling model is good at making the Max draw-down smaller.
explanation: ...

View File

@@ -176,27 +176,21 @@ class IdeaTask(PlanTask):
practice_knowledge = KnowledgeBase().query(knowledge_type=KnowledgeBase.KT_PRACTICE, content=user_intention)
finance_knowledge = KnowledgeBase().query(knowledge_type=KnowledgeBase.KT_FINANCE, content=user_intention)
if practice_knowledge == "":
practice_knowledge = "So far no former experiments have been done, so no practice knowledge is available."
else:
practice_knowledge = "\nSo you can try other advanced algorithm."
finance_knowledge += "\n11. When rolling is proved to be working in reducing max drawdown, it's a good idea to try DDGDA as controllerto to further improve performance."
system_prompt = self.system.render()
former_messages = []
for knowlege in [practice_knowledge, finance_knowledge]:
if knowlege != '':
knowlege_type = "practice" if knowlege is practice_knowledge else "finance"
user_prompt = ""
user_prompt += f"following lists the {knowlege_type} knowledge:\n"
user_prompt += f"{knowlege}\n"
response = APIBackend().build_messages_and_create_chat_completion(
user_prompt, system_prompt, former_messages=former_messages
)
assert "ok" in response.lower(), "The response is not ok"
self.save_chat_history_to_context_manager(
user_prompt, response, system_prompt
)
former_messages = self._context_manager.get_context("chat_history")[self.__class__.__name__]['None'][1:]
user_prompt = f"""\nResearch intention:\n{user_intention}"""
user_prompt = self.user.render(practice_knowledge=practice_knowledge, finance_knowledge=finance_knowledge, user_intention=user_intention)
response = APIBackend().build_messages_and_create_chat_completion(
user_prompt, system_prompt, former_messages=former_messages
)
self.save_chat_history_to_context_manager(user_prompt, response, system_prompt)
re_search_pattern = f"Target: (.*)Deliverables:(.*)Thinking directions:(.*)Business level:(.*)Algorithm level:(.*)Details:(.*)"
re_search_res = re.search(re_search_pattern, response, re.S)
@@ -260,7 +254,13 @@ class HighLevelPlanTask(PlanTask):
), "The response of config action task is not in the correct format"
self._context_manager.set_context("high_level_workflow", res.group(1).strip())
self._context_manager.set_context("high_level_experiments", res.group(2).strip())
experiment_description_search_res = re.search("1.(.*)2.(.*)", res.group(2).strip(), re.S)
assert experiment_description_search_res is not None, "The experiment description is not in the correct format"
self._context_manager.set_context("experiments_desc_1", experiment_description_search_res.group(1).strip())
self._context_manager.set_context("experiments_desc_2", experiment_description_search_res.group(2).strip())
self._context_manager.set_context("high_level_metrics", res.group(3).strip())
if "supervised learning" in self._context_manager.get_context("high_level_workflow").lower():
@@ -354,6 +354,7 @@ class SLPlanTask(PlanTask):
assert decision in ["Default", "Personized"], f"The decision of {name} is not correct"
# TODO: the strctured experiments should replace
self._context_manager.struct_context.exp_list.append(exp)
self._context_manager.set_context("experiments_difference", match_res.group(experiment_count + 1))
# 1) create a workspace
# TODO: we have to make choice between `sl` and `sl-cfg`
@@ -479,10 +480,7 @@ class TrainTask(Task):
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True, cwd=str(workspace), shell=True
)
# todo: dont manage record by id, experiment_id=2 doesnt contains metrics
try:
exp = R.get_exp(experiment_id="3")
except qlib.utils.exceptions.ExpAlreadyExistError:
exp = R.get_exp(experiment_id="2")
exp = R.get_exp(experiment_name="Experiment")
else:
command = f"python -m qlib.contrib.rolling ddgda --conf_path {workflow_path} run"
@@ -491,11 +489,17 @@ class TrainTask(Task):
subprocess.run(
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True, cwd=str(workspace), shell=True
)
try:
exp = R.get_exp(experiment_id="3")
except qlib.utils.exceptions.ExpAlreadyExistError:
exp = R.get_exp(experiment_id="2")
exp = R.get_exp(experiment_name="Experiment")
with open(rf"{workspace}/script.sh", "a") as fw:
fw.write(command)
fw.write("\n")
fw.write("\n")
fw.flush()
with open(rf"{workspace}/README.md", "w") as fw:
fw.write(f"\n")
fw.flush()
# first recorder is the latest
recorder = exp.list_recorders(rtype=exp.RT_L)[0]
self._context_manager.set_context(f"experiment_{self._experiment_index}_recorder", recorder)
@@ -742,6 +746,8 @@ class HyperparameterFinetuneActionTask(ActionTask):
self._context_manager.set_context(f"experiment_{experiment_id}_ddgda", ddgda_res)
self._context_manager.set_context(f"experiment_{experiment_id}_config_finetune_reason", reason_res)
import shutil
shutil.copytree(r"/home/xuyang/workspace/qlib/qlib/finco/mlruns", r"/home/xuyang/workspace/qlib/qlib/finco/finco_workspace/mlruns")
return return_tasks
@@ -1160,16 +1166,12 @@ class SummarizeTask(Task):
def execute(self) -> Any:
workspace = self._context_manager.get_context("workspace")
user_intention = self._context_manager.get_context("user_intention")
file_info = self.get_info_from_file(workspace)
context_info = self.get_info_from_context() # too long context make response unstable.
target = self._context_manager.get_context(f"target")
diffrence = self._context_manager.get_context(f"experiments_difference")
target_metrics = self._context_manager.get_context(f"high_level_metrics")
figure_path = self.get_figure_path(workspace)
# todo: remove 'be' after test
be = APIBackend()
be.debug_mode = False
def _get_value_from_info(info: list, k: str):
for i in info:
if k in i.keys():
@@ -1179,40 +1181,55 @@ class SummarizeTask(Task):
experiment_count = self._context_manager.get_context("experiment_count")
for exp_id in range(1, experiment_count + 1):
recorder = self._context_manager.get_context(f"experiment_{exp_id}_recorder")
reason = self._context_manager.get_context(f"experiment_{exp_id}_config_finetune_reason")
experiments_desc = self._context_manager.get_context(f"experiments_desc_{exp_id}")
workflow_yaml = self._context_manager.get_context(f"workflow_{exp_id}_yaml")
record_info = [{"metrics": recorder.list_metrics()}]
information = context_info + file_info + record_info
# information = context_info + file_info + record_info
context_summary = {}
for key in self.__DEFAULT_SUMMARIZE_CONTEXT:
prompt_workflow_selection = self.summarize_context_user.render(
key=key, value=_get_value_from_info(info=information, k=key)
)
response = be.build_messages_and_create_chat_completion(
user_prompt=prompt_workflow_selection, system_prompt=self.summarize_context_system.render()
)
context_summary.update({key: response})
# context_summary = {}
# for key in self.__DEFAULT_SUMMARIZE_CONTEXT:
# prompt_workflow_selection = self.summarize_context_user.render(
# key=key, value=_get_value_from_info(info=information, k=key)
# )
# response = be.build_messages_and_create_chat_completion(
# user_prompt=prompt_workflow_selection, system_prompt=self.summarize_context_system.render()
# )
# context_summary.update({key: response})
recorder.save_objects(context_summary=context_summary)
# recorder.save_objects(context_summary=context_summary)
prompt_workflow_selection = self.summarize_metrics_user.render(
information=_get_value_from_info(info=record_info, k="metrics"), user_prompt=user_intention
)
metrics_response = be.build_messages_and_create_chat_completion(
metrics_response = APIBackend().build_messages_and_create_chat_completion(
user_prompt=prompt_workflow_selection, system_prompt=self.summarize_metrics_system.render()
)
KnowledgeBase().practice_knowledge.add([{"user_intention": user_intention, "experiment_id": exp_id,
"workflow": workflow_yaml, "reason": reason,
"experiment_metrics": metrics_response}])
experiment_practice_knowledge = f"""
user_intention: {user_intention},
experiment_id: {exp_id},
workflow yaml:
```yaml
{yaml.safe_dump(workflow_yaml)},
```
experiments description:
{experiments_desc},
experiment_metrics:
{metrics_response}
"""
KnowledgeBase().practice_knowledge.add([experiment_practice_knowledge])
prompt_workflow_selection = self.user.render(
information=file_info + KnowledgeBase().practice_knowledge.knowledge[-2:],
figure_path=figure_path, user_prompt=user_intention
experiment_1_info = KnowledgeBase().practice_knowledge.knowledge[-2],
experiment_2_info = KnowledgeBase().practice_knowledge.knowledge[-1],
figure_path=figure_path,
user_intention=user_intention,
target=target,
diffrence=diffrence,
target_metrics=target_metrics
)
response = be.build_messages_and_create_chat_completion(
response = APIBackend().build_messages_and_create_chat_completion(
user_prompt=prompt_workflow_selection, system_prompt=self.system.render()
)
self._context_manager.set_context("summary", response)

View File

@@ -55,7 +55,7 @@ class WorkflowManager:
self.prompt_template = PromptTemplate()
self.context = WorkflowContextManager(workspace=self._workspace)
self.context.set_context("workspace", self._workspace)
self.default_user_prompt = "build an A-share stock market daily portfolio in quantitative investment and minimize the maximum drawdown."
self.default_user_prompt = "build an A-share stock market daily portfolio in quantitative investment and minimize the maximum drawdown while maintaining return."
def _confirm_and_rm(self):
# if workspace exists, please confirm and remove it. Otherwise exit.
@@ -153,7 +153,7 @@ class LearnManager:
self.wm = WorkflowManager()
self.topics = [
Topic(name=topic, describe=self.wm.prompt_template.get(f"Topic_{topic}")) for topic in self.__DEFAULT_TOPICS
Topic(name=topic, system=self.wm.prompt_template.get(f"Topic_system"), user=self.wm.prompt_template.get(f"Topic_user")) for topic in self.__DEFAULT_TOPICS
]
self.knowledge_base = KnowledgeBase()
@@ -181,25 +181,30 @@ class LearnManager:
user_intention = self.wm.context.get_context("user_intention")
summary = self.wm.context.get_context("summary")
[topic.summarize(self.knowledge_base.practice_knowledge.knowledge[-2:]) for topic in self.topics]
[self.knowledge_base.practice_knowledge.add([{"practice_knowledge": topic.knowledge}]) for topic in self.topics]
knowledge_of_topics = [{topic.name: topic.knowledge} for topic in self.topics]
target = self.wm.context.get_context(f"target")
diffrence = self.wm.context.get_context(f"experiments_difference")
target_metrics = self.wm.context.get_context(f"high_level_metrics")
for task in task_finished:
prompt_workflow_selection = self.wm.prompt_template.get(f"{self.__class__.__name__}_user").render(
summary=summary,
brief=knowledge_of_topics,
task_finished=[str(t) for t in task_finished],
task=task.__class__.__name__, system=task.system.render(), user_intention=user_intention
)
[topic.summarize(self.knowledge_base.practice_knowledge.knowledge[-2:], user_intention, target, diffrence, target_metrics) for topic in self.topics]
[self.knowledge_base.practice_knowledge.add([f"practice_knowledge on {topic.name}:\,{topic.knowledge}"]) for topic in self.topics]
# knowledge_of_topics = [{topic.name: topic.knowledge} for topic in self.topics]
response = APIBackend().build_messages_and_create_chat_completion(
user_prompt=prompt_workflow_selection,
system_prompt=self.wm.prompt_template.get(f"{self.__class__.__name__}_system").render(),
)
# for task in task_finished:
# prompt_workflow_selection = self.wm.prompt_template.get(f"{self.__class__.__name__}_user").render(
# summary=summary,
# brief=knowledge_of_topics,
# task_finished=[str(t) for t in task_finished],
# task=task.__class__.__name__, system=task.system.render(), user_intention=user_intention
# )
# todo: response assertion
task.prompt_template.update(key=f"{task.__class__.__name__}_system", value=Template(response))
# response = APIBackend().build_messages_and_create_chat_completion(
# user_prompt=prompt_workflow_selection,
# system_prompt=self.wm.prompt_template.get(f"{self.__class__.__name__}_system").render(),
# )
# # todo: response assertion
# task.prompt_template.update(key=f"{task.__class__.__name__}_system", value=Template(response))
self.wm.prompt_template.save(Path.joinpath(workspace, f"prompts/checkpoint_{self.epoch}.yml"))
self.wm.context.clear(reserve=["workspace"])