Update script

2026-07-21 11:17:34 +08:00 · 2020-11-27 19:46:52 +08:00
parent e4e730bada
commit 2311af5e47
4 changed files with 127 additions and 195 deletions
--- a/README.md
+++ b/README.md
@@ -192,24 +192,6 @@ The automatic workflow may not suite the research workflow of all Quant research

 # [Quant Model Zoo](examples/benchmarks)

-## Run a single model
-`Qlib` provides three different ways to run a single model, users can pick the one that fits their cases best:
- User can use the tool `qrun` mentioned above to run a model's workflow based from a config file.
- User can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder.
- User can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py --models=lightgbm`. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
-
-## Run multiple models
-`Qlib` also provides a script [`run_all_model.py`](examples/run_all_model.py) which can run multiple models for several iterations. (**Note**: the script only supprots *Linux* now. Other OS will be supported in the future.)
-
-The script will create a unique virtual environment for each model, and delete the environments after training. Thus, only experiment results such as `IC` and `backtest` results will be generated and stored. 
-
-Here is an example of running all the models for 10 iterations:
-```python
-python run_all_model.py 10
-```
-
-It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py). 
-
 Here is a list of models built on `Qlib`.
 - [GBDT based on LightGBM](qlib/contrib/model/gbdt.py)
 - [GBDT based on Catboost](qlib/contrib/model/catboost_model.py)
@@ -226,6 +208,25 @@ Here is a list of models built on `Qlib`.

 Your PR of new Quant models is highly welcomed.

+## Run a single model
+`Qlib` provides three different ways to run a single model, users can pick the one that fits their cases best:
+- User can use the tool `qrun` mentioned above to run a model's workflow based from a config file.
+- User can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder.
+- User can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py --models=lightgbm`. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
+
+## Run multiple models
+`Qlib` also provides a script [`run_all_model.py`](examples/run_all_model.py) which can run multiple models for several iterations. (**Note**: the script only supprots *Linux* now. Other OS will be supported in the future.)
+
+The script will create a unique virtual environment for each model, and delete the environments after training. Thus, only experiment results such as `IC` and `backtest` results will be generated and stored. (**Note**: the script will erase your previous experiment records created by running itself.)
+
+Here is an example of running all the models for 10 iterations:
+```python
+python run_all_model.py 10
+```
+
+It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py). 
+
+
 # Quant Dataset Zoo
 Dataset plays a very important role in Quant. Here is a list of the datasets built on `Qlib`.
 - [Alpha360](./qlib/contrib/data/handler.py)
--- a/docs/start/initialization.rst
+++ b/docs/start/initialization.rst
@@ -69,7 +69,7 @@ Besides `provider_uri` and `region`, `qlib.init` has other parameters. The follo
            "class": "MLflowExpManager",
            "module_path": "qlib.workflow.expm",
            "kwargs": {
-                "uri": "python_execution_path/mlruns"),
+                "uri": "python_execution_path/mlruns",
                "default_exp_name": "Experiment",
            }
        }
--- a/examples/run_all_model.py
+++ b/examples/run_all_model.py
@@ -4,18 +4,20 @@
 import os
 import sys
 import fire
+import time
 import venv
 import glob
 import shutil
+import signal
+import inspect
 import tempfile
+import traceback
+import functools
 import statistics
+import subprocess
 from pathlib import Path
 from operator import xor
-from subprocess import Popen, PIPE
-from threading import Thread
 from pprint import pprint
-from urllib.parse import urlparse
-from urllib.request import urlretrieve

 import qlib
 from qlib.config import REG_CN
@@ -23,144 +25,50 @@ from qlib.workflow import R
 from qlib.workflow.cli import workflow
 from qlib.utils import exists_qlib_data

+
 # init qlib
 provider_uri = "~/.qlib/qlib_data/cn_data"
+exp_manager = {
+    "class": "MLflowExpManager",
+    "module_path": "qlib.workflow.expm",
+    "kwargs": {
+        "uri": "file:" + str(Path(os.getcwd()).resolve() / "run_all_model_records"),
+        "default_exp_name": "Experiment",
+    },
+}
 if not exists_qlib_data(provider_uri):
    print(f"Qlib data is not found in {provider_uri}")
    sys.path.append(str(Path(__file__).resolve().parent.parent.joinpath("scripts")))
    from get_data import GetData

    GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
-qlib.init(provider_uri=provider_uri, region=REG_CN)
+qlib.init(provider_uri=provider_uri, region=REG_CN, exp_manager=exp_manager)
+shutil.rmtree(str(Path(os.getcwd()).resolve() / "run_all_model_records"))
+
+# decorator to check the arguments
+def only_allow_defined_args(function_to_decorate):
+    @functools.wraps(function_to_decorate)
+    def _return_wrapped(*args, **kwargs):
+        """Internal wrapper function."""
+        argspec = inspect.getfullargspec(function_to_decorate)
+        valid_names = set(argspec.args + argspec.kwonlyargs)
+        if "self" in valid_names:
+            valid_names.remove("self")
+        for arg_name in kwargs:
+            if arg_name not in valid_names:
+                raise ValueError("Unknown argument seen '%s', expected: [%s]" % (arg_name, ", ".join(valid_names)))
+        return function_to_decorate(*args, **kwargs)
+
+    return _return_wrapped


-class ExtendedEnvBuilder(venv.EnvBuilder):
-    """
-    Thie class is modified based on https://docs.python.org/3/library/venv.html.
-    This builder installs setuptools and pip so that you can pip or
-    easy_install other packages into the created virtual environment.
+# function to handle ctrl z and ctrl c
+def handler(signum, frame):
+    os.system("kill -9 %d" % os.getpid())

-    :param nodist: If true, setuptools and pip are not installed into the
-                   created virtual environment.
-    :param nopip: If true, pip is not installed into the created
-                  virtual environment.
-    :param progress: If setuptools or pip are installed, the progress of the
-                     installation can be monitored by passing a progress
-                     callable. If specified, it is called with two
-                     arguments: a string indicating some progress, and a
-                     context indicating where the string is coming from.
-                     The context argument can have one of three values:
-                     'main', indicating that it is called from virtualize()
-                     itself, and 'stdout' and 'stderr', which are obtained
-                     by reading lines from the output streams of a subprocess
-                     which is used to install the app.
-
-                     If a callable is not specified, default progress
-                     information is output to sys.stderr.
-    """
-
-    def __init__(self, *args, **kwargs):
-        self.nodist = kwargs.pop("nodist", False)
-        self.nopip = kwargs.pop("nopip", False)
-        self.progress = kwargs.pop("progress", None)
-        self.verbose = kwargs.pop("verbose", False)
-        super().__init__(*args, **kwargs)
-
-    def post_setup(self, context):
-        """
-        Set up any packages which need to be pre-installed into the
-        virtual environment being created.
-
-        :param context: The information for the virtual environment
-                        creation request being processed.
-        """
-        os.environ["VIRTUAL_ENV"] = context.env_dir
-        if not self.nodist:
-            self.install_setuptools(context)
-        # Can't install pip without setuptools
-        if not self.nopip and not self.nodist:
-            self.install_pip(context)
-
-    def reader(self, stream, context):
-        """
-        Read lines from a subprocess' output stream and either pass to a progress
-        callable (if specified) or write progress information to sys.stderr.
-        """
-        progress = self.progress
-        while True:
-            s = stream.readline()
-            if not s:
-                break
-            if progress is not None:
-                progress(s, context)
-            else:
-                if not self.verbose:
-                    sys.stderr.write(".")
-                else:
-                    sys.stderr.write(s.decode("utf-8"))
-                sys.stderr.flush()
-        stream.close()
-
-    def install_script(self, context, name, url):
-        _, _, path, _, _, _ = urlparse(url)
-        fn = os.path.split(path)[-1]
-        binpath = context.bin_path
-        distpath = os.path.join(binpath, fn)
-        # Download script into the virtual environment's binaries folder
-        urlretrieve(url, distpath)
-        progress = self.progress
-        if self.verbose:
-            term = "\n"
-        else:
-            term = ""
-        if progress is not None:
-            progress("Installing %s ...%s" % (name, term), "main")
-        else:
-            sys.stderr.write("Installing %s ...%s" % (name, term))
-            sys.stderr.flush()
-        # Install in the virtual environment
-        args = [context.env_exe, fn]
-        p = Popen(args, stdout=PIPE, stderr=PIPE, cwd=binpath)
-        t1 = Thread(target=self.reader, args=(p.stdout, "stdout"))
-        t1.start()
-        t2 = Thread(target=self.reader, args=(p.stderr, "stderr"))
-        t2.start()
-        p.wait()
-        t1.join()
-        t2.join()
-        if progress is not None:
-            progress("done.", "main")
-        else:
-            sys.stderr.write("done.\n")
-        # Clean up - no longer needed
-        os.unlink(distpath)
-
-    def install_setuptools(self, context):
-        """
-        Install setuptools in the virtual environment.
-
-        :param context: The information for the virtual environment
-                        creation request being processed.
-        """
-        url = "https://bootstrap.pypa.io/ez_setup.py"
-        self.install_script(context, "setuptools", url)
-        # clear up the setuptools archive which gets downloaded
-        pred = lambda o: o.startswith("setuptools-") and o.endswith(".tar.gz")
-        files = filter(pred, os.listdir(context.bin_path))
-        for f in files:
-            f = os.path.join(context.bin_path, f)
-            os.unlink(f)
-
-    def install_pip(self, context):
-        """
-        Install pip in the virtual environment.
-
-        :param context: The information for the virtual environment
-                        creation request being processed.
-        """
-        url = "https://bootstrap.pypa.io/get-pip.py"
-        self.install_script(context, "pip", url)

+signal.signal(signal.SIGTSTP, handler)
+signal.signal(signal.SIGINT, handler)

 # function to calculate the mean and std of a list in the results dictionary
 def cal_mean_std(results) -> dict:
@@ -174,6 +82,36 @@ def cal_mean_std(results) -> dict:
    return mean_std


+# function to create the environment ofr an anaconda environment
+def create_env():
+    # create env
+    temp_dir = tempfile.mkdtemp()
+    env_path = Path(temp_dir).absolute()
+    sys.stderr.write(f"Creating Virtual Environment with path: {env_path}...\n")
+    execute(f"conda create --prefix {env_path} python=3.7 -y")
+    python_path = env_path / "bin" / "python"  # TODO: FIX ME!
+    sys.stderr.write("\n")
+    # get anaconda activate path
+    conda_activate = Path(os.environ["CONDA_PREFIX"]) / "bin" / "activate"  # TODO: FIX ME!
+    return env_path, python_path, conda_activate
+
+
+# function to execute the cmd
+def execute(cmd):
+    with subprocess.Popen(cmd, stdout=subprocess.PIPE, bufsize=1, universal_newlines=True, shell=True) as p:
+        for line in p.stdout:
+            sys.stdout.write(line.split("\b")[0])
+            if "\b" in line:
+                sys.stdout.flush()
+                time.sleep(0.1)
+                sys.stdout.write("\b" * 10 + "\b".join(line.split("\b")[1:-1]))
+
+    if p.returncode != 0:
+        return p.stderr
+    else:
+        return None
+
+
 # function to get all the folders benchmark folder
 def get_all_folders(models, exclude) -> dict:
    folders = dict()
@@ -212,11 +150,12 @@ def get_all_results(folders) -> dict:
        result["information_ratio_with_cost"] = list()
        result["max_drawdown_with_cost"] = list()
        for recorder_id in recorders:
-            recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn)
-            metrics = recorder.list_metrics()
-            result["annualized_return_with_cost"].append(metrics["excess_return_with_cost.annualized_return"])
-            result["information_ratio_with_cost"].append(metrics["excess_return_with_cost.information_ratio"])
-            result["max_drawdown_with_cost"].append(metrics["excess_return_with_cost.max_drawdown"])
+            if recorders[recorder_id]["status"] == "FINISHED":
+                recorder = R.get_recorder(recorder_id=recorder_id, experiment_name=fn)
+                metrics = recorder.list_metrics()
+                result["annualized_return_with_cost"].append(metrics["excess_return_with_cost.annualized_return"])
+                result["information_ratio_with_cost"].append(metrics["excess_return_with_cost.information_ratio"])
+                result["max_drawdown_with_cost"].append(metrics["excess_return_with_cost.max_drawdown"])
        results[fn] = result
    return results

@@ -237,6 +176,7 @@ def gen_and_save_md_table(metrics):


 # function to run the all the models
+@only_allow_defined_args
 def run(times=1, models=None, exclude=False):
    """
    Please be aware that this function can only work under Linux. MacOS and Windows will be supported in the future.
@@ -275,53 +215,46 @@ def run(times=1, models=None, exclude=False):
    """
    # get all folders
    folders = get_all_folders(models, exclude)
-    # set up
-    compatible = True
-    if sys.version_info < (3, 3):
-        compatible = False
-    elif not hasattr(sys, "base_prefix"):
-        compatible = False
-    if not compatible:
-        raise ValueError("This script is only for use with " "Python 3.3 or later")
-    if os.name == "nt":
-        use_symlinks = False
-    else:
-        use_symlinks = True
-    builder = ExtendedEnvBuilder(
-        system_site_packages=False,
-        clear=False,
-        symlinks=use_symlinks,
-        upgrade=False,
-        nodist=False,
-        nopip=False,
-        verbose=False,
-    )
+    # init error messages:
+    errors = dict()
    # run all the model for iterations
    for fn in folders:
-        # create env
-        temp_dir = tempfile.mkdtemp()
-        env_path = Path(temp_dir).absolute()
-        sys.stderr.write(f"Creating Virtual Environment with path: {env_path}...\n")
-        builder.create(str(env_path))
-        python_path = env_path / "bin" / "python"  # TODO: FIX ME!
-        sys.stderr.write("\n")
+        # create env by anaconda
+        env_path, python_path, conda_activate = create_env()
        # get all files
        sys.stderr.write("Retrieving files...\n")
        yaml_path, req_path = get_all_files(folders[fn])
        sys.stderr.write("\n")
        # install requirements.txt
        sys.stderr.write("Installing requirements.txt...\n")
-        os.system(f"{python_path} -m pip install -r {req_path}")
+        execute(f"{python_path} -m pip install -r {req_path}")
        sys.stderr.write("\n")
+        # setup gpu for tft
+        if fn == "TFT":
+            execute(
+                f"conda install -y --prefix {env_path} anaconda cudatoolkit=10.0 && conda install -y --prefix {env_path} cudnn"
+            )
+            sys.stderr.write("\n")
        # install qlib
        sys.stderr.write("Installing qlib...\n")
-        os.system(f"{python_path} -m pip install --upgrade cython")  # TODO: FIX ME!
-        os.system(f"{python_path} -m pip install -e git+https://github.com/you-n-g/qlib#egg=pyqlib")  # TODO: FIX ME!
+        execute(f"{python_path} -m pip install --upgrade cython")  # TODO: FIX ME!
+        if fn == "TFT":
+            execute(
+                f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall --ignore-installed PyYAML -e git+https://github.com/you-n-g/qlib#egg=pyqlib"
+            )  # TODO: FIX ME!
+        else:
+            execute(
+                f"cd {env_path} && {python_path} -m pip install --upgrade --force-reinstall -e git+https://github.com/you-n-g/qlib#egg=pyqlib"
+            )  # TODO: FIX ME!
        sys.stderr.write("\n")
        # run workflow_by_config for multiple times
        for i in range(times):
            sys.stderr.write(f"Running the model: {fn} for iteration {i+1}...\n")
-            os.system(f"{python_path} {env_path / 'src/pyqlib/qlib/workflow/cli.py'} {yaml_path} {fn}")
+            errs = execute(f"{python_path} {env_path / 'src/pyqlib/qlib/workflow/cli.py'} {yaml_path} {fn}")
+            if errs is not None:
+                _errs = errors.get(fn, {})
+                _errs.update({i: errs})
+                errors[fn] = _errs
            sys.stderr.write("\n")
        # remove env
        sys.stderr.write(f"Deleting the environment: {env_path}...\n")
@@ -335,13 +268,12 @@ def run(times=1, models=None, exclude=False):
    # generating md table
    sys.stderr.write(f"Generating markdown table...\n")
    gen_and_save_md_table(results)
+    sys.stderr.write("\n")
+    # print erros
+    sys.stderr.write(f"Here are some of the errors of the models...\n")
+    pprint(errors)
+    sys.stderr.write("\n")


 if __name__ == "__main__":
-    rc = 1
-    try:
-        fire.Fire(run)  # run all the model
-        rc = 0
-    except Exception as e:
-        print("Error: %s" % e, file=sys.stderr)
-    sys.exit(rc)
+    fire.Fire(run)  # run all the model
--- a/requirements.txt
+++ b/requirements.txt
@@ -22,5 +22,4 @@ scikit_learn==0.23.2
 torch==1.6.0
 tqdm==4.49.0
 yahooquery==2.2.7
-mlflow==1.12.1
-pytorch-tabnet==2.0.1
+mlflow==1.12.1