1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

Update script

This commit is contained in:
Jactus
2020-11-27 19:46:52 +08:00
parent e4e730bada
commit 2311af5e47
4 changed files with 127 additions and 195 deletions

View File

@@ -192,24 +192,6 @@ The automatic workflow may not suite the research workflow of all Quant research
# [Quant Model Zoo](examples/benchmarks)
## Run a single model
`Qlib` provides three different ways to run a single model, users can pick the one that fits their cases best:
- User can use the tool `qrun` mentioned above to run a model's workflow based from a config file.
- User can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder.
- User can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py --models=lightgbm`. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
## Run multiple models
`Qlib` also provides a script [`run_all_model.py`](examples/run_all_model.py) which can run multiple models for several iterations. (**Note**: the script only supprots *Linux* now. Other OS will be supported in the future.)
The script will create a unique virtual environment for each model, and delete the environments after training. Thus, only experiment results such as `IC` and `backtest` results will be generated and stored.
Here is an example of running all the models for 10 iterations:
```python
python run_all_model.py 10
```
It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
Here is a list of models built on `Qlib`.
- [GBDT based on LightGBM](qlib/contrib/model/gbdt.py)
- [GBDT based on Catboost](qlib/contrib/model/catboost_model.py)
@@ -226,6 +208,25 @@ Here is a list of models built on `Qlib`.
Your PR of new Quant models is highly welcomed.
## Run a single model
`Qlib` provides three different ways to run a single model, users can pick the one that fits their cases best:
- User can use the tool `qrun` mentioned above to run a model's workflow based from a config file.
- User can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder.
- User can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py --models=lightgbm`. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
## Run multiple models
`Qlib` also provides a script [`run_all_model.py`](examples/run_all_model.py) which can run multiple models for several iterations. (**Note**: the script only supprots *Linux* now. Other OS will be supported in the future.)
The script will create a unique virtual environment for each model, and delete the environments after training. Thus, only experiment results such as `IC` and `backtest` results will be generated and stored. (**Note**: the script will erase your previous experiment records created by running itself.)
Here is an example of running all the models for 10 iterations:
```python
python run_all_model.py 10
```
It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
# Quant Dataset Zoo
Dataset plays a very important role in Quant. Here is a list of the datasets built on `Qlib`.
- [Alpha360](./qlib/contrib/data/handler.py)

View File

@@ -69,7 +69,7 @@ Besides `provider_uri` and `region`, `qlib.init` has other parameters. The follo
"class": "MLflowExpManager",
"module_path": "qlib.workflow.expm",
"kwargs": {
"uri": "python_execution_path/mlruns"),
"uri": "python_execution_path/mlruns",
"default_exp_name": "Experiment",
}
}

View File

@@ -4,18 +4,20 @@
import os
import sys
import fire
import time
import venv
import glob
import shutil
import signal
import inspect
import tempfile
import traceback
import functools
import statistics
import subprocess
from pathlib import Path
from operator import xor
from subprocess import Popen, PIPE
from threading import Thread
from pprint import pprint
from urllib.parse import urlparse
from urllib.request import urlretrieve
import qlib
from qlib.config import REG_CN
@@ -23,144 +25,50 @@ from qlib.workflow import R
from qlib.workflow.cli import workflow
from qlib.utils import exists_qlib_data
# init qlib
provider_uri = "~/.qlib/qlib_data/cn_data"
exp_manager = {
"class": "MLflowExpManager",
"module_path": "qlib.workflow.expm",
"kwargs": {
"uri": "file:" + str(Path(os.getcwd()).resolve() / "run_all_model_records"),
"default_exp_name": "Experiment",
},
}
if not exists_qlib_data(provider_uri):
print(f"Qlib data is not found in {provider_uri}")
sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
from get_data import GetData
GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
qlib.init(provider_uri=provider_uri, region=REG_CN)
qlib.init(provider_uri=provider_uri, region=REG_CN, exp_manager=exp_manager)
shutil.rmtree(str(Path(os.getcwd()).resolve() / "run_all_model_records"))
# decorator to check the arguments
def only_allow_defined_args(function_to_decorate):
@functools.wraps(function_to_decorate)
def _return_wrapped(*args, **kwargs):
"""Internal wrapper function."""
argspec = inspect.getfullargspec(function_to_decorate)
valid_names = set(argspec.args + argspec.kwonlyargs)
if "self" in valid_names:
valid_names.remove("self")
for arg_name in kwargs:
if arg_name not in valid_names:
raise ValueError("Unknown argument seen '%s', expected: [%s]" % (arg_name, ", ".join(valid_names)))
return function_to_decorate(*args, **kwargs)
return _return_wrapped
class ExtendedEnvBuilder(venv.EnvBuilder):
"""
Thie class is modified based on https://docs.python.org/3/library/venv.html.
This builder installs setuptools and pip so that you can pip or
easy_install other packages into the created virtual environment.
# function to handle ctrl z and ctrl c
def handler(signum, frame):
os.system("kill -9 %d" % os.getpid())
:param nodist: If true, setuptools and pip are not installed into the
created virtual environment.
:param nopip: If true, pip is not installed into the created
virtual environment.
:param progress: If setuptools or pip are installed, the progress of the
installation can be monitored by passing a progress
callable. If specified, it is called with two
arguments: a string indicating some progress, and a
context indicating where the string is coming from.
The context argument can have one of three values:
'main', indicating that it is called from virtualize()
itself, and 'stdout' and 'stderr', which are obtained
by reading lines from the output streams of a subprocess
which is used to install the app.
If a callable is not specified, default progress
information is output to sys.stderr.
"""
def __init__(self, *args, **kwargs):
self.nodist = kwargs.pop("nodist", False)
self.nopip = kwargs.pop("nopip", False)
self.progress = kwargs.pop("progress", None)
self.verbose = kwargs.pop("verbose", False)
super().__init__(*args, **kwargs)
def post_setup(self, context):
"""
Set up any packages which need to be pre-installed into the
virtual environment being created.
:param context: The information for the virtual environment
creation request being processed.
"""
os.environ["VIRTUAL_ENV"] = context.env_dir
if not self.nodist:
self.install_setuptools(context)
# Can't install pip without setuptools
if not self.nopip and not self.nodist:
self.install_pip(context)
def reader(self, stream, context):
"""
Read lines from a subprocess' output stream and either pass to a progress
callable (if specified) or write progress information to sys.stderr.
"""
progress = self.progress
while True:
s = stream.readline()
if not s:
break
if progress is not None:
progress(s, context)
else:
if not self.verbose:
sys.stderr.write(".")
else:
sys.stderr.write(s.decode("utf-8"))
sys.stderr.flush()
stream.close()
def install_script(self, context, name, url):
_, _, path, _, _, _ = urlparse(url)
fn = os.path.split(path)[-1]
binpath = context.bin_path
distpath = os.path.join(binpath, fn)
# Download script into the virtual environment's binaries folder
urlretrieve(url, distpath)
progress = self.progress
if self.verbose:
term = "\n"
else:
term = ""
if progress is not None:
progress("Installing %s ...%s" % (name, term), "main")
else:
sys.stderr.write("Installing %s ...%s" % (name, term))
sys.stderr.flush()
# Install in the virtual environment
args = [context.env_exe, fn]
p = Popen(args, stdout=PIPE, stderr=PIPE, cwd=binpath)
t1 = Thread(target=self.reader, args=(p.stdout, "stdout"))
t1.start()
t2 = Thread(target=self.reader, args=(p.stderr, "stderr"))
t2.start()
p.wait()
t1.join()
t2.join()
if progress is not None:
progress("done.", "main")
else:
sys.stderr.write("done.\n")
# Clean up - no longer needed
os.unlink(distpath)
def install_setuptools(self, context):
"""
Install setuptools in the virtual environment.
:param context: The information for the virtual environment
creation request being processed.
"""
url = "https://bootstrap.pypa.io/ez_setup.py"
self.install_script(context, "setuptools", url)
# clear up the setuptools archive which gets downloaded
pred = lambda o: o.startswith("setuptools-") and o.endswith(".tar.gz")
files = filter(pred, os.listdir(context.bin_path))
for f in files:
f = os.path.join(context.bin_path, f)
os.unlink(f)
def install_pip(self, context):
"""
Install pip in the virtual environment.
:param context: The information for the virtual environment
creation request being processed.
"""
url = "https://bootstrap.pypa.io/get-pip.py"
self.install_script(context, "pip", url)
signal.signal(signal.SIGTSTP, handler)
signal.signal(signal.SIGINT, handler)
# function to calculate the mean and std of a list in the results dictionary
def cal_mean_std(results) -> dict:
@@ -174,6 +82,36 @@ def cal_mean_std(results) -> dict:
return mean_std
# function to create the environment ofr an anaconda environment
def create_env():
# create env
temp_dir = tempfile.mkdtemp()
env_path = Path(temp_dir).absolute()
sys.stderr.write(f"Creating Virtual Environment with path: {env_path}...\n")
execute(f"conda create --prefix {env_path} python=3.7 -y")
python_path = env_path / "bin" / "python" # TODO: FIX ME!
sys.stderr.write("\n")
# get anaconda activate path
conda_activate = Path(os.environ["CONDA_PREFIX"]) / "bin" / "activate" # TODO: FIX ME!
return env_path, python_path, conda_activate
# function to execute the cmd
def execute(cmd):
with subprocess.Popen(cmd, stdout=subprocess.PIPE, bufsize=1, universal_newlines=True, shell=True) as p:
for line in p.stdout:
sys.stdout.write(line.split("\b")[0])
if "\b" in line:
sys.stdout.flush()
time.sleep(0.1)
sys.stdout.write("\b" * 10 + "\b".join(line.split("\b")[1:-1]))
if p.returncode != 0:
return p.stderr
else:
return None
# function to get all the folders benchmark folder
def get_all_folders(models, exclude) -> dict:
folders = dict()
@@ -212,11 +150,12 @@ def get_all_results(folders) -> dict:
result["information_ratio_with_cost"] = list()
result["max_drawdown_with_cost"] = list()
for recorder_id in recorders:
recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn)
metrics = recorder.list_metrics()
result["annualized_return_with_cost"].append(metrics["excess_return_with_cost.annualized_return"])
result["information_ratio_with_cost"].append(metrics["excess_return_with_cost.information_ratio"])
result["max_drawdown_with_cost"].append(metrics["excess_return_with_cost.max_drawdown"])
if recorders[recorder_id]["status"] == "FINISHED":
recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn)
metrics = recorder.list_metrics()
result["annualized_return_with_cost"].append(metrics["excess_return_with_cost.annualized_return"])
result["information_ratio_with_cost"].append(metrics["excess_return_with_cost.information_ratio"])
result["max_drawdown_with_cost"].append(metrics["excess_return_with_cost.max_drawdown"])
results[fn] = result
return results
@@ -237,6 +176,7 @@ def gen_and_save_md_table(metrics):
# function to run the all the models
@only_allow_defined_args
def run(times=1, models=None, exclude=False):
"""
Please be aware that this function can only work under Linux. MacOS and Windows will be supported in the future.
@@ -275,53 +215,46 @@ def run(times=1, models=None, exclude=False):
"""
# get all folders
folders = get_all_folders(models, exclude)
# set up
compatible = True
if sys.version_info < (3, 3):
compatible = False
elif not hasattr(sys, "base_prefix"):
compatible = False
if not compatible:
raise ValueError("This script is only for use with " "Python 3.3 or later")
if os.name == "nt":
use_symlinks = False
else:
use_symlinks = True
builder = ExtendedEnvBuilder(
system_site_packages=False,
clear=False,
symlinks=use_symlinks,
upgrade=False,
nodist=False,
nopip=False,
verbose=False,
)
# init error messages:
errors = dict()
# run all the model for iterations
for fn in folders:
# create env
temp_dir = tempfile.mkdtemp()
env_path = Path(temp_dir).absolute()
sys.stderr.write(f"Creating Virtual Environment with path: {env_path}...\n")
builder.create(str(env_path))
python_path = env_path / "bin" / "python" # TODO: FIX ME!
sys.stderr.write("\n")
# create env by anaconda
env_path, python_path, conda_activate = create_env()
# get all files
sys.stderr.write("Retrieving files...\n")
yaml_path, req_path = get_all_files(folders[fn])
sys.stderr.write("\n")
# install requirements.txt
sys.stderr.write("Installing requirements.txt...\n")
os.system(f"{python_path} -m pip install -r {req_path}")
execute(f"{python_path} -m pip install -r {req_path}")
sys.stderr.write("\n")
# setup gpu for tft
if fn == "TFT":
execute(
f"conda install -y --prefix {env_path} anaconda cudatoolkit=10.0 && conda install -y --prefix {env_path} cudnn"
)
sys.stderr.write("\n")
# install qlib
sys.stderr.write("Installing qlib...\n")
os.system(f"{python_path} -m pip install --upgrade cython") # TODO: FIX ME!
os.system(f"{python_path} -m pip install -e git+https://github.com/you-n-g/qlib#egg=pyqlib") # TODO: FIX ME!
execute(f"{python_path} -m pip install --upgrade cython") # TODO: FIX ME!
if fn == "TFT":
execute(
f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall --ignore-installed PyYAML -e git+https://github.com/you-n-g/qlib#egg=pyqlib"
) # TODO: FIX ME!
else:
execute(
f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall -e git+https://github.com/you-n-g/qlib#egg=pyqlib"
) # TODO: FIX ME!
sys.stderr.write("\n")
# run workflow_by_config for multiple times
for i in range(times):
sys.stderr.write(f"Running the model: {fn} for iteration {i+1}...\n")
os.system(f"{python_path} {env_path / 'src/pyqlib/qlib/workflow/cli.py'} {yaml_path} {fn}")
errs = execute(f"{python_path} {env_path / 'src/pyqlib/qlib/workflow/cli.py'} {yaml_path} {fn}")
if errs is not None:
_errs = errors.get(fn, {})
_errs.update({i: errs})
errors[fn] = _errs
sys.stderr.write("\n")
# remove env
sys.stderr.write(f"Deleting the environment: {env_path}...\n")
@@ -335,13 +268,12 @@ def run(times=1, models=None, exclude=False):
# generating md table
sys.stderr.write(f"Generating markdown table...\n")
gen_and_save_md_table(results)
sys.stderr.write("\n")
# print erros
sys.stderr.write(f"Here are some of the errors of the models...\n")
pprint(errors)
sys.stderr.write("\n")
if __name__ == "__main__":
rc = 1
try:
fire.Fire(run) # run all the model
rc = 0
except Exception as e:
print("Error: %s" % e, file=sys.stderr)
sys.exit(rc)
fire.Fire(run) # run all the model

View File

@@ -22,5 +22,4 @@ scikit_learn==0.23.2
torch==1.6.0
tqdm==4.49.0
yahooquery==2.2.7
mlflow==1.12.1
pytorch-tabnet==2.0.1
mlflow==1.12.1