diff --git a/README.md b/README.md index c890afaca..dc9df109b 100644 --- a/README.md +++ b/README.md @@ -192,24 +192,6 @@ The automatic workflow may not suite the research workflow of all Quant research # [Quant Model Zoo](examples/benchmarks) -## Run a single model -`Qlib` provides three different ways to run a single model, users can pick the one that fits their cases best: -- User can use the tool `qrun` mentioned above to run a model's workflow based from a config file. -- User can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder. -- User can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py --models=lightgbm`. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py). - -## Run multiple models -`Qlib` also provides a script [`run_all_model.py`](examples/run_all_model.py) which can run multiple models for several iterations. (**Note**: the script only supprots *Linux* now. Other OS will be supported in the future.) - -The script will create a unique virtual environment for each model, and delete the environments after training. Thus, only experiment results such as `IC` and `backtest` results will be generated and stored. - -Here is an example of running all the models for 10 iterations: -```python -python run_all_model.py 10 -``` - -It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py). - Here is a list of models built on `Qlib`. - [GBDT based on LightGBM](qlib/contrib/model/gbdt.py) - [GBDT based on Catboost](qlib/contrib/model/catboost_model.py) @@ -226,6 +208,25 @@ Here is a list of models built on `Qlib`. Your PR of new Quant models is highly welcomed. +## Run a single model +`Qlib` provides three different ways to run a single model, users can pick the one that fits their cases best: +- User can use the tool `qrun` mentioned above to run a model's workflow based from a config file. +- User can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder. +- User can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py --models=lightgbm`. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py). + +## Run multiple models +`Qlib` also provides a script [`run_all_model.py`](examples/run_all_model.py) which can run multiple models for several iterations. (**Note**: the script only supprots *Linux* now. Other OS will be supported in the future.) + +The script will create a unique virtual environment for each model, and delete the environments after training. Thus, only experiment results such as `IC` and `backtest` results will be generated and stored. (**Note**: the script will erase your previous experiment records created by running itself.) + +Here is an example of running all the models for 10 iterations: +```python +python run_all_model.py 10 +``` + +It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py). + + # Quant Dataset Zoo Dataset plays a very important role in Quant. Here is a list of the datasets built on `Qlib`. - [Alpha360](./qlib/contrib/data/handler.py) diff --git a/docs/start/initialization.rst b/docs/start/initialization.rst index 423d7edf8..5615556b6 100644 --- a/docs/start/initialization.rst +++ b/docs/start/initialization.rst @@ -69,7 +69,7 @@ Besides `provider_uri` and `region`, `qlib.init` has other parameters. The follo "class": "MLflowExpManager", "module_path": "qlib.workflow.expm", "kwargs": { - "uri": "python_execution_path/mlruns"), + "uri": "python_execution_path/mlruns", "default_exp_name": "Experiment", } } \ No newline at end of file diff --git a/examples/run_all_model.py b/examples/run_all_model.py index 2f6c4299e..c02077b32 100644 --- a/examples/run_all_model.py +++ b/examples/run_all_model.py @@ -4,18 +4,20 @@ import os import sys import fire +import time import venv import glob import shutil +import signal +import inspect import tempfile +import traceback +import functools import statistics +import subprocess from pathlib import Path from operator import xor -from subprocess import Popen, PIPE -from threading import Thread from pprint import pprint -from urllib.parse import urlparse -from urllib.request import urlretrieve import qlib from qlib.config import REG_CN @@ -23,144 +25,50 @@ from qlib.workflow import R from qlib.workflow.cli import workflow from qlib.utils import exists_qlib_data + # init qlib provider_uri = "~/.qlib/qlib_data/cn_data" +exp_manager = { + "class": "MLflowExpManager", + "module_path": "qlib.workflow.expm", + "kwargs": { + "uri": "file:" + str(Path(os.getcwd()).resolve() / "run_all_model_records"), + "default_exp_name": "Experiment", + }, +} if not exists_qlib_data(provider_uri): print(f"Qlib data is not found in {provider_uri}") sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts"))) from get_data import GetData GetData().qlib_data(target_dir=provider_uri, region=REG_CN) -qlib.init(provider_uri=provider_uri, region=REG_CN) +qlib.init(provider_uri=provider_uri, region=REG_CN, exp_manager=exp_manager) +shutil.rmtree(str(Path(os.getcwd()).resolve() / "run_all_model_records")) + +# decorator to check the arguments +def only_allow_defined_args(function_to_decorate): + @functools.wraps(function_to_decorate) + def _return_wrapped(*args, **kwargs): + """Internal wrapper function.""" + argspec = inspect.getfullargspec(function_to_decorate) + valid_names = set(argspec.args + argspec.kwonlyargs) + if "self" in valid_names: + valid_names.remove("self") + for arg_name in kwargs: + if arg_name not in valid_names: + raise ValueError("Unknown argument seen '%s', expected: [%s]" % (arg_name, ", ".join(valid_names))) + return function_to_decorate(*args, **kwargs) + + return _return_wrapped -class ExtendedEnvBuilder(venv.EnvBuilder): - """ - Thie class is modified based on https://docs.python.org/3/library/venv.html. - This builder installs setuptools and pip so that you can pip or - easy_install other packages into the created virtual environment. +# function to handle ctrl z and ctrl c +def handler(signum, frame): + os.system("kill -9 %d" % os.getpid()) - :param nodist: If true, setuptools and pip are not installed into the - created virtual environment. - :param nopip: If true, pip is not installed into the created - virtual environment. - :param progress: If setuptools or pip are installed, the progress of the - installation can be monitored by passing a progress - callable. If specified, it is called with two - arguments: a string indicating some progress, and a - context indicating where the string is coming from. - The context argument can have one of three values: - 'main', indicating that it is called from virtualize() - itself, and 'stdout' and 'stderr', which are obtained - by reading lines from the output streams of a subprocess - which is used to install the app. - - If a callable is not specified, default progress - information is output to sys.stderr. - """ - - def __init__(self, *args, **kwargs): - self.nodist = kwargs.pop("nodist", False) - self.nopip = kwargs.pop("nopip", False) - self.progress = kwargs.pop("progress", None) - self.verbose = kwargs.pop("verbose", False) - super().__init__(*args, **kwargs) - - def post_setup(self, context): - """ - Set up any packages which need to be pre-installed into the - virtual environment being created. - - :param context: The information for the virtual environment - creation request being processed. - """ - os.environ["VIRTUAL_ENV"] = context.env_dir - if not self.nodist: - self.install_setuptools(context) - # Can't install pip without setuptools - if not self.nopip and not self.nodist: - self.install_pip(context) - - def reader(self, stream, context): - """ - Read lines from a subprocess' output stream and either pass to a progress - callable (if specified) or write progress information to sys.stderr. - """ - progress = self.progress - while True: - s = stream.readline() - if not s: - break - if progress is not None: - progress(s, context) - else: - if not self.verbose: - sys.stderr.write(".") - else: - sys.stderr.write(s.decode("utf-8")) - sys.stderr.flush() - stream.close() - - def install_script(self, context, name, url): - _, _, path, _, _, _ = urlparse(url) - fn = os.path.split(path)[-1] - binpath = context.bin_path - distpath = os.path.join(binpath, fn) - # Download script into the virtual environment's binaries folder - urlretrieve(url, distpath) - progress = self.progress - if self.verbose: - term = "\n" - else: - term = "" - if progress is not None: - progress("Installing %s ...%s" % (name, term), "main") - else: - sys.stderr.write("Installing %s ...%s" % (name, term)) - sys.stderr.flush() - # Install in the virtual environment - args = [context.env_exe, fn] - p = Popen(args, stdout=PIPE, stderr=PIPE, cwd=binpath) - t1 = Thread(target=self.reader, args=(p.stdout, "stdout")) - t1.start() - t2 = Thread(target=self.reader, args=(p.stderr, "stderr")) - t2.start() - p.wait() - t1.join() - t2.join() - if progress is not None: - progress("done.", "main") - else: - sys.stderr.write("done.\n") - # Clean up - no longer needed - os.unlink(distpath) - - def install_setuptools(self, context): - """ - Install setuptools in the virtual environment. - - :param context: The information for the virtual environment - creation request being processed. - """ - url = "https://bootstrap.pypa.io/ez_setup.py" - self.install_script(context, "setuptools", url) - # clear up the setuptools archive which gets downloaded - pred = lambda o: o.startswith("setuptools-") and o.endswith(".tar.gz") - files = filter(pred, os.listdir(context.bin_path)) - for f in files: - f = os.path.join(context.bin_path, f) - os.unlink(f) - - def install_pip(self, context): - """ - Install pip in the virtual environment. - - :param context: The information for the virtual environment - creation request being processed. - """ - url = "https://bootstrap.pypa.io/get-pip.py" - self.install_script(context, "pip", url) +signal.signal(signal.SIGTSTP, handler) +signal.signal(signal.SIGINT, handler) # function to calculate the mean and std of a list in the results dictionary def cal_mean_std(results) -> dict: @@ -174,6 +82,36 @@ def cal_mean_std(results) -> dict: return mean_std +# function to create the environment ofr an anaconda environment +def create_env(): + # create env + temp_dir = tempfile.mkdtemp() + env_path = Path(temp_dir).absolute() + sys.stderr.write(f"Creating Virtual Environment with path: {env_path}...\n") + execute(f"conda create --prefix {env_path} python=3.7 -y") + python_path = env_path / "bin" / "python" # TODO: FIX ME! + sys.stderr.write("\n") + # get anaconda activate path + conda_activate = Path(os.environ["CONDA_PREFIX"]) / "bin" / "activate" # TODO: FIX ME! + return env_path, python_path, conda_activate + + +# function to execute the cmd +def execute(cmd): + with subprocess.Popen(cmd, stdout=subprocess.PIPE, bufsize=1, universal_newlines=True, shell=True) as p: + for line in p.stdout: + sys.stdout.write(line.split("\b")[0]) + if "\b" in line: + sys.stdout.flush() + time.sleep(0.1) + sys.stdout.write("\b" * 10 + "\b".join(line.split("\b")[1:-1])) + + if p.returncode != 0: + return p.stderr + else: + return None + + # function to get all the folders benchmark folder def get_all_folders(models, exclude) -> dict: folders = dict() @@ -212,11 +150,12 @@ def get_all_results(folders) -> dict: result["information_ratio_with_cost"] = list() result["max_drawdown_with_cost"] = list() for recorder_id in recorders: - recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn) - metrics = recorder.list_metrics() - result["annualized_return_with_cost"].append(metrics["excess_return_with_cost.annualized_return"]) - result["information_ratio_with_cost"].append(metrics["excess_return_with_cost.information_ratio"]) - result["max_drawdown_with_cost"].append(metrics["excess_return_with_cost.max_drawdown"]) + if recorders[recorder_id]["status"] == "FINISHED": + recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn) + metrics = recorder.list_metrics() + result["annualized_return_with_cost"].append(metrics["excess_return_with_cost.annualized_return"]) + result["information_ratio_with_cost"].append(metrics["excess_return_with_cost.information_ratio"]) + result["max_drawdown_with_cost"].append(metrics["excess_return_with_cost.max_drawdown"]) results[fn] = result return results @@ -237,6 +176,7 @@ def gen_and_save_md_table(metrics): # function to run the all the models +@only_allow_defined_args def run(times=1, models=None, exclude=False): """ Please be aware that this function can only work under Linux. MacOS and Windows will be supported in the future. @@ -275,53 +215,46 @@ def run(times=1, models=None, exclude=False): """ # get all folders folders = get_all_folders(models, exclude) - # set up - compatible = True - if sys.version_info < (3, 3): - compatible = False - elif not hasattr(sys, "base_prefix"): - compatible = False - if not compatible: - raise ValueError("This script is only for use with " "Python 3.3 or later") - if os.name == "nt": - use_symlinks = False - else: - use_symlinks = True - builder = ExtendedEnvBuilder( - system_site_packages=False, - clear=False, - symlinks=use_symlinks, - upgrade=False, - nodist=False, - nopip=False, - verbose=False, - ) + # init error messages: + errors = dict() # run all the model for iterations for fn in folders: - # create env - temp_dir = tempfile.mkdtemp() - env_path = Path(temp_dir).absolute() - sys.stderr.write(f"Creating Virtual Environment with path: {env_path}...\n") - builder.create(str(env_path)) - python_path = env_path / "bin" / "python" # TODO: FIX ME! - sys.stderr.write("\n") + # create env by anaconda + env_path, python_path, conda_activate = create_env() # get all files sys.stderr.write("Retrieving files...\n") yaml_path, req_path = get_all_files(folders[fn]) sys.stderr.write("\n") # install requirements.txt sys.stderr.write("Installing requirements.txt...\n") - os.system(f"{python_path} -m pip install -r {req_path}") + execute(f"{python_path} -m pip install -r {req_path}") sys.stderr.write("\n") + # setup gpu for tft + if fn == "TFT": + execute( + f"conda install -y --prefix {env_path} anaconda cudatoolkit=10.0 && conda install -y --prefix {env_path} cudnn" + ) + sys.stderr.write("\n") # install qlib sys.stderr.write("Installing qlib...\n") - os.system(f"{python_path} -m pip install --upgrade cython") # TODO: FIX ME! - os.system(f"{python_path} -m pip install -e git+https://github.com/you-n-g/qlib#egg=pyqlib") # TODO: FIX ME! + execute(f"{python_path} -m pip install --upgrade cython") # TODO: FIX ME! + if fn == "TFT": + execute( + f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall --ignore-installed PyYAML -e git+https://github.com/you-n-g/qlib#egg=pyqlib" + ) # TODO: FIX ME! + else: + execute( + f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall -e git+https://github.com/you-n-g/qlib#egg=pyqlib" + ) # TODO: FIX ME! sys.stderr.write("\n") # run workflow_by_config for multiple times for i in range(times): sys.stderr.write(f"Running the model: {fn} for iteration {i+1}...\n") - os.system(f"{python_path} {env_path / 'src/pyqlib/qlib/workflow/cli.py'} {yaml_path} {fn}") + errs = execute(f"{python_path} {env_path / 'src/pyqlib/qlib/workflow/cli.py'} {yaml_path} {fn}") + if errs is not None: + _errs = errors.get(fn, {}) + _errs.update({i: errs}) + errors[fn] = _errs sys.stderr.write("\n") # remove env sys.stderr.write(f"Deleting the environment: {env_path}...\n") @@ -335,13 +268,12 @@ def run(times=1, models=None, exclude=False): # generating md table sys.stderr.write(f"Generating markdown table...\n") gen_and_save_md_table(results) + sys.stderr.write("\n") + # print erros + sys.stderr.write(f"Here are some of the errors of the models...\n") + pprint(errors) + sys.stderr.write("\n") if __name__ == "__main__": - rc = 1 - try: - fire.Fire(run) # run all the model - rc = 0 - except Exception as e: - print("Error: %s" % e, file=sys.stderr) - sys.exit(rc) + fire.Fire(run) # run all the model diff --git a/requirements.txt b/requirements.txt index d3511d780..638ce22f4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,5 +22,4 @@ scikit_learn==0.23.2 torch==1.6.0 tqdm==4.49.0 yahooquery==2.2.7 -mlflow==1.12.1 -pytorch-tabnet==2.0.1 \ No newline at end of file +mlflow==1.12.1 \ No newline at end of file