mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
Update script
This commit is contained in:
37
README.md
37
README.md
@@ -192,24 +192,6 @@ The automatic workflow may not suite the research workflow of all Quant research
|
||||
|
||||
# [Quant Model Zoo](examples/benchmarks)
|
||||
|
||||
## Run a single model
|
||||
`Qlib` provides three different ways to run a single model, users can pick the one that fits their cases best:
|
||||
- User can use the tool `qrun` mentioned above to run a model's workflow based from a config file.
|
||||
- User can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder.
|
||||
- User can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py --models=lightgbm`. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
|
||||
|
||||
## Run multiple models
|
||||
`Qlib` also provides a script [`run_all_model.py`](examples/run_all_model.py) which can run multiple models for several iterations. (**Note**: the script only supprots *Linux* now. Other OS will be supported in the future.)
|
||||
|
||||
The script will create a unique virtual environment for each model, and delete the environments after training. Thus, only experiment results such as `IC` and `backtest` results will be generated and stored.
|
||||
|
||||
Here is an example of running all the models for 10 iterations:
|
||||
```python
|
||||
python run_all_model.py 10
|
||||
```
|
||||
|
||||
It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
|
||||
|
||||
Here is a list of models built on `Qlib`.
|
||||
- [GBDT based on LightGBM](qlib/contrib/model/gbdt.py)
|
||||
- [GBDT based on Catboost](qlib/contrib/model/catboost_model.py)
|
||||
@@ -226,6 +208,25 @@ Here is a list of models built on `Qlib`.
|
||||
|
||||
Your PR of new Quant models is highly welcomed.
|
||||
|
||||
## Run a single model
|
||||
`Qlib` provides three different ways to run a single model, users can pick the one that fits their cases best:
|
||||
- User can use the tool `qrun` mentioned above to run a model's workflow based from a config file.
|
||||
- User can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder.
|
||||
- User can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py --models=lightgbm`. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
|
||||
|
||||
## Run multiple models
|
||||
`Qlib` also provides a script [`run_all_model.py`](examples/run_all_model.py) which can run multiple models for several iterations. (**Note**: the script only supprots *Linux* now. Other OS will be supported in the future.)
|
||||
|
||||
The script will create a unique virtual environment for each model, and delete the environments after training. Thus, only experiment results such as `IC` and `backtest` results will be generated and stored. (**Note**: the script will erase your previous experiment records created by running itself.)
|
||||
|
||||
Here is an example of running all the models for 10 iterations:
|
||||
```python
|
||||
python run_all_model.py 10
|
||||
```
|
||||
|
||||
It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
|
||||
|
||||
|
||||
# Quant Dataset Zoo
|
||||
Dataset plays a very important role in Quant. Here is a list of the datasets built on `Qlib`.
|
||||
- [Alpha360](./qlib/contrib/data/handler.py)
|
||||
|
||||
@@ -69,7 +69,7 @@ Besides `provider_uri` and `region`, `qlib.init` has other parameters. The follo
|
||||
"class": "MLflowExpManager",
|
||||
"module_path": "qlib.workflow.expm",
|
||||
"kwargs": {
|
||||
"uri": "python_execution_path/mlruns"),
|
||||
"uri": "python_execution_path/mlruns",
|
||||
"default_exp_name": "Experiment",
|
||||
}
|
||||
}
|
||||
@@ -4,18 +4,20 @@
|
||||
import os
|
||||
import sys
|
||||
import fire
|
||||
import time
|
||||
import venv
|
||||
import glob
|
||||
import shutil
|
||||
import signal
|
||||
import inspect
|
||||
import tempfile
|
||||
import traceback
|
||||
import functools
|
||||
import statistics
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from operator import xor
|
||||
from subprocess import Popen, PIPE
|
||||
from threading import Thread
|
||||
from pprint import pprint
|
||||
from urllib.parse import urlparse
|
||||
from urllib.request import urlretrieve
|
||||
|
||||
import qlib
|
||||
from qlib.config import REG_CN
|
||||
@@ -23,144 +25,50 @@ from qlib.workflow import R
|
||||
from qlib.workflow.cli import workflow
|
||||
from qlib.utils import exists_qlib_data
|
||||
|
||||
|
||||
# init qlib
|
||||
provider_uri = "~/.qlib/qlib_data/cn_data"
|
||||
exp_manager = {
|
||||
"class": "MLflowExpManager",
|
||||
"module_path": "qlib.workflow.expm",
|
||||
"kwargs": {
|
||||
"uri": "file:" + str(Path(os.getcwd()).resolve() / "run_all_model_records"),
|
||||
"default_exp_name": "Experiment",
|
||||
},
|
||||
}
|
||||
if not exists_qlib_data(provider_uri):
|
||||
print(f"Qlib data is not found in {provider_uri}")
|
||||
sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
|
||||
from get_data import GetData
|
||||
|
||||
GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
|
||||
qlib.init(provider_uri=provider_uri, region=REG_CN)
|
||||
qlib.init(provider_uri=provider_uri, region=REG_CN, exp_manager=exp_manager)
|
||||
shutil.rmtree(str(Path(os.getcwd()).resolve() / "run_all_model_records"))
|
||||
|
||||
# decorator to check the arguments
|
||||
def only_allow_defined_args(function_to_decorate):
|
||||
@functools.wraps(function_to_decorate)
|
||||
def _return_wrapped(*args, **kwargs):
|
||||
"""Internal wrapper function."""
|
||||
argspec = inspect.getfullargspec(function_to_decorate)
|
||||
valid_names = set(argspec.args + argspec.kwonlyargs)
|
||||
if "self" in valid_names:
|
||||
valid_names.remove("self")
|
||||
for arg_name in kwargs:
|
||||
if arg_name not in valid_names:
|
||||
raise ValueError("Unknown argument seen '%s', expected: [%s]" % (arg_name, ", ".join(valid_names)))
|
||||
return function_to_decorate(*args, **kwargs)
|
||||
|
||||
return _return_wrapped
|
||||
|
||||
|
||||
class ExtendedEnvBuilder(venv.EnvBuilder):
|
||||
"""
|
||||
Thie class is modified based on https://docs.python.org/3/library/venv.html.
|
||||
This builder installs setuptools and pip so that you can pip or
|
||||
easy_install other packages into the created virtual environment.
|
||||
# function to handle ctrl z and ctrl c
|
||||
def handler(signum, frame):
|
||||
os.system("kill -9 %d" % os.getpid())
|
||||
|
||||
:param nodist: If true, setuptools and pip are not installed into the
|
||||
created virtual environment.
|
||||
:param nopip: If true, pip is not installed into the created
|
||||
virtual environment.
|
||||
:param progress: If setuptools or pip are installed, the progress of the
|
||||
installation can be monitored by passing a progress
|
||||
callable. If specified, it is called with two
|
||||
arguments: a string indicating some progress, and a
|
||||
context indicating where the string is coming from.
|
||||
The context argument can have one of three values:
|
||||
'main', indicating that it is called from virtualize()
|
||||
itself, and 'stdout' and 'stderr', which are obtained
|
||||
by reading lines from the output streams of a subprocess
|
||||
which is used to install the app.
|
||||
|
||||
If a callable is not specified, default progress
|
||||
information is output to sys.stderr.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.nodist = kwargs.pop("nodist", False)
|
||||
self.nopip = kwargs.pop("nopip", False)
|
||||
self.progress = kwargs.pop("progress", None)
|
||||
self.verbose = kwargs.pop("verbose", False)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def post_setup(self, context):
|
||||
"""
|
||||
Set up any packages which need to be pre-installed into the
|
||||
virtual environment being created.
|
||||
|
||||
:param context: The information for the virtual environment
|
||||
creation request being processed.
|
||||
"""
|
||||
os.environ["VIRTUAL_ENV"] = context.env_dir
|
||||
if not self.nodist:
|
||||
self.install_setuptools(context)
|
||||
# Can't install pip without setuptools
|
||||
if not self.nopip and not self.nodist:
|
||||
self.install_pip(context)
|
||||
|
||||
def reader(self, stream, context):
|
||||
"""
|
||||
Read lines from a subprocess' output stream and either pass to a progress
|
||||
callable (if specified) or write progress information to sys.stderr.
|
||||
"""
|
||||
progress = self.progress
|
||||
while True:
|
||||
s = stream.readline()
|
||||
if not s:
|
||||
break
|
||||
if progress is not None:
|
||||
progress(s, context)
|
||||
else:
|
||||
if not self.verbose:
|
||||
sys.stderr.write(".")
|
||||
else:
|
||||
sys.stderr.write(s.decode("utf-8"))
|
||||
sys.stderr.flush()
|
||||
stream.close()
|
||||
|
||||
def install_script(self, context, name, url):
|
||||
_, _, path, _, _, _ = urlparse(url)
|
||||
fn = os.path.split(path)[-1]
|
||||
binpath = context.bin_path
|
||||
distpath = os.path.join(binpath, fn)
|
||||
# Download script into the virtual environment's binaries folder
|
||||
urlretrieve(url, distpath)
|
||||
progress = self.progress
|
||||
if self.verbose:
|
||||
term = "\n"
|
||||
else:
|
||||
term = ""
|
||||
if progress is not None:
|
||||
progress("Installing %s ...%s" % (name, term), "main")
|
||||
else:
|
||||
sys.stderr.write("Installing %s ...%s" % (name, term))
|
||||
sys.stderr.flush()
|
||||
# Install in the virtual environment
|
||||
args = [context.env_exe, fn]
|
||||
p = Popen(args, stdout=PIPE, stderr=PIPE, cwd=binpath)
|
||||
t1 = Thread(target=self.reader, args=(p.stdout, "stdout"))
|
||||
t1.start()
|
||||
t2 = Thread(target=self.reader, args=(p.stderr, "stderr"))
|
||||
t2.start()
|
||||
p.wait()
|
||||
t1.join()
|
||||
t2.join()
|
||||
if progress is not None:
|
||||
progress("done.", "main")
|
||||
else:
|
||||
sys.stderr.write("done.\n")
|
||||
# Clean up - no longer needed
|
||||
os.unlink(distpath)
|
||||
|
||||
def install_setuptools(self, context):
|
||||
"""
|
||||
Install setuptools in the virtual environment.
|
||||
|
||||
:param context: The information for the virtual environment
|
||||
creation request being processed.
|
||||
"""
|
||||
url = "https://bootstrap.pypa.io/ez_setup.py"
|
||||
self.install_script(context, "setuptools", url)
|
||||
# clear up the setuptools archive which gets downloaded
|
||||
pred = lambda o: o.startswith("setuptools-") and o.endswith(".tar.gz")
|
||||
files = filter(pred, os.listdir(context.bin_path))
|
||||
for f in files:
|
||||
f = os.path.join(context.bin_path, f)
|
||||
os.unlink(f)
|
||||
|
||||
def install_pip(self, context):
|
||||
"""
|
||||
Install pip in the virtual environment.
|
||||
|
||||
:param context: The information for the virtual environment
|
||||
creation request being processed.
|
||||
"""
|
||||
url = "https://bootstrap.pypa.io/get-pip.py"
|
||||
self.install_script(context, "pip", url)
|
||||
|
||||
signal.signal(signal.SIGTSTP, handler)
|
||||
signal.signal(signal.SIGINT, handler)
|
||||
|
||||
# function to calculate the mean and std of a list in the results dictionary
|
||||
def cal_mean_std(results) -> dict:
|
||||
@@ -174,6 +82,36 @@ def cal_mean_std(results) -> dict:
|
||||
return mean_std
|
||||
|
||||
|
||||
# function to create the environment ofr an anaconda environment
|
||||
def create_env():
|
||||
# create env
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
env_path = Path(temp_dir).absolute()
|
||||
sys.stderr.write(f"Creating Virtual Environment with path: {env_path}...\n")
|
||||
execute(f"conda create --prefix {env_path} python=3.7 -y")
|
||||
python_path = env_path / "bin" / "python" # TODO: FIX ME!
|
||||
sys.stderr.write("\n")
|
||||
# get anaconda activate path
|
||||
conda_activate = Path(os.environ["CONDA_PREFIX"]) / "bin" / "activate" # TODO: FIX ME!
|
||||
return env_path, python_path, conda_activate
|
||||
|
||||
|
||||
# function to execute the cmd
|
||||
def execute(cmd):
|
||||
with subprocess.Popen(cmd, stdout=subprocess.PIPE, bufsize=1, universal_newlines=True, shell=True) as p:
|
||||
for line in p.stdout:
|
||||
sys.stdout.write(line.split("\b")[0])
|
||||
if "\b" in line:
|
||||
sys.stdout.flush()
|
||||
time.sleep(0.1)
|
||||
sys.stdout.write("\b" * 10 + "\b".join(line.split("\b")[1:-1]))
|
||||
|
||||
if p.returncode != 0:
|
||||
return p.stderr
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
# function to get all the folders benchmark folder
|
||||
def get_all_folders(models, exclude) -> dict:
|
||||
folders = dict()
|
||||
@@ -212,11 +150,12 @@ def get_all_results(folders) -> dict:
|
||||
result["information_ratio_with_cost"] = list()
|
||||
result["max_drawdown_with_cost"] = list()
|
||||
for recorder_id in recorders:
|
||||
recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn)
|
||||
metrics = recorder.list_metrics()
|
||||
result["annualized_return_with_cost"].append(metrics["excess_return_with_cost.annualized_return"])
|
||||
result["information_ratio_with_cost"].append(metrics["excess_return_with_cost.information_ratio"])
|
||||
result["max_drawdown_with_cost"].append(metrics["excess_return_with_cost.max_drawdown"])
|
||||
if recorders[recorder_id]["status"] == "FINISHED":
|
||||
recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn)
|
||||
metrics = recorder.list_metrics()
|
||||
result["annualized_return_with_cost"].append(metrics["excess_return_with_cost.annualized_return"])
|
||||
result["information_ratio_with_cost"].append(metrics["excess_return_with_cost.information_ratio"])
|
||||
result["max_drawdown_with_cost"].append(metrics["excess_return_with_cost.max_drawdown"])
|
||||
results[fn] = result
|
||||
return results
|
||||
|
||||
@@ -237,6 +176,7 @@ def gen_and_save_md_table(metrics):
|
||||
|
||||
|
||||
# function to run the all the models
|
||||
@only_allow_defined_args
|
||||
def run(times=1, models=None, exclude=False):
|
||||
"""
|
||||
Please be aware that this function can only work under Linux. MacOS and Windows will be supported in the future.
|
||||
@@ -275,53 +215,46 @@ def run(times=1, models=None, exclude=False):
|
||||
"""
|
||||
# get all folders
|
||||
folders = get_all_folders(models, exclude)
|
||||
# set up
|
||||
compatible = True
|
||||
if sys.version_info < (3, 3):
|
||||
compatible = False
|
||||
elif not hasattr(sys, "base_prefix"):
|
||||
compatible = False
|
||||
if not compatible:
|
||||
raise ValueError("This script is only for use with " "Python 3.3 or later")
|
||||
if os.name == "nt":
|
||||
use_symlinks = False
|
||||
else:
|
||||
use_symlinks = True
|
||||
builder = ExtendedEnvBuilder(
|
||||
system_site_packages=False,
|
||||
clear=False,
|
||||
symlinks=use_symlinks,
|
||||
upgrade=False,
|
||||
nodist=False,
|
||||
nopip=False,
|
||||
verbose=False,
|
||||
)
|
||||
# init error messages:
|
||||
errors = dict()
|
||||
# run all the model for iterations
|
||||
for fn in folders:
|
||||
# create env
|
||||
temp_dir = tempfile.mkdtemp()
|
||||
env_path = Path(temp_dir).absolute()
|
||||
sys.stderr.write(f"Creating Virtual Environment with path: {env_path}...\n")
|
||||
builder.create(str(env_path))
|
||||
python_path = env_path / "bin" / "python" # TODO: FIX ME!
|
||||
sys.stderr.write("\n")
|
||||
# create env by anaconda
|
||||
env_path, python_path, conda_activate = create_env()
|
||||
# get all files
|
||||
sys.stderr.write("Retrieving files...\n")
|
||||
yaml_path, req_path = get_all_files(folders[fn])
|
||||
sys.stderr.write("\n")
|
||||
# install requirements.txt
|
||||
sys.stderr.write("Installing requirements.txt...\n")
|
||||
os.system(f"{python_path} -m pip install -r {req_path}")
|
||||
execute(f"{python_path} -m pip install -r {req_path}")
|
||||
sys.stderr.write("\n")
|
||||
# setup gpu for tft
|
||||
if fn == "TFT":
|
||||
execute(
|
||||
f"conda install -y --prefix {env_path} anaconda cudatoolkit=10.0 && conda install -y --prefix {env_path} cudnn"
|
||||
)
|
||||
sys.stderr.write("\n")
|
||||
# install qlib
|
||||
sys.stderr.write("Installing qlib...\n")
|
||||
os.system(f"{python_path} -m pip install --upgrade cython") # TODO: FIX ME!
|
||||
os.system(f"{python_path} -m pip install -e git+https://github.com/you-n-g/qlib#egg=pyqlib") # TODO: FIX ME!
|
||||
execute(f"{python_path} -m pip install --upgrade cython") # TODO: FIX ME!
|
||||
if fn == "TFT":
|
||||
execute(
|
||||
f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall --ignore-installed PyYAML -e git+https://github.com/you-n-g/qlib#egg=pyqlib"
|
||||
) # TODO: FIX ME!
|
||||
else:
|
||||
execute(
|
||||
f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall -e git+https://github.com/you-n-g/qlib#egg=pyqlib"
|
||||
) # TODO: FIX ME!
|
||||
sys.stderr.write("\n")
|
||||
# run workflow_by_config for multiple times
|
||||
for i in range(times):
|
||||
sys.stderr.write(f"Running the model: {fn} for iteration {i+1}...\n")
|
||||
os.system(f"{python_path} {env_path / 'src/pyqlib/qlib/workflow/cli.py'} {yaml_path} {fn}")
|
||||
errs = execute(f"{python_path} {env_path / 'src/pyqlib/qlib/workflow/cli.py'} {yaml_path} {fn}")
|
||||
if errs is not None:
|
||||
_errs = errors.get(fn, {})
|
||||
_errs.update({i: errs})
|
||||
errors[fn] = _errs
|
||||
sys.stderr.write("\n")
|
||||
# remove env
|
||||
sys.stderr.write(f"Deleting the environment: {env_path}...\n")
|
||||
@@ -335,13 +268,12 @@ def run(times=1, models=None, exclude=False):
|
||||
# generating md table
|
||||
sys.stderr.write(f"Generating markdown table...\n")
|
||||
gen_and_save_md_table(results)
|
||||
sys.stderr.write("\n")
|
||||
# print erros
|
||||
sys.stderr.write(f"Here are some of the errors of the models...\n")
|
||||
pprint(errors)
|
||||
sys.stderr.write("\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
rc = 1
|
||||
try:
|
||||
fire.Fire(run) # run all the model
|
||||
rc = 0
|
||||
except Exception as e:
|
||||
print("Error: %s" % e, file=sys.stderr)
|
||||
sys.exit(rc)
|
||||
fire.Fire(run) # run all the model
|
||||
|
||||
@@ -22,5 +22,4 @@ scikit_learn==0.23.2
|
||||
torch==1.6.0
|
||||
tqdm==4.49.0
|
||||
yahooquery==2.2.7
|
||||
mlflow==1.12.1
|
||||
pytorch-tabnet==2.0.1
|
||||
mlflow==1.12.1
|
||||
Reference in New Issue
Block a user