From 4fcfde7cfbb1dda3a7c5fa88ebe9b768bf0f89e8 Mon Sep 17 00:00:00 2001 From: zhupr Date: Wed, 20 Jan 2021 14:41:50 +0800 Subject: [PATCH] Initialization is split into: set_config and config_based_on_C --- qlib/__init__.py | 70 +++++++++--------------------------------- qlib/config.py | 3 ++ qlib/data/data.py | 9 +++--- qlib/utils/__init__.py | 56 +++++++++++++++++++++++++++++++-- 4 files changed, 76 insertions(+), 62 deletions(-) diff --git a/qlib/__init__.py b/qlib/__init__.py index f79b8c4f5..1f0c73949 100644 --- a/qlib/__init__.py +++ b/qlib/__init__.py @@ -6,88 +6,48 @@ __version__ = "0.6.0.dev" import os -import re -import sys -import copy import yaml import logging import platform import subprocess -from pathlib import Path -from .utils import can_use_cache, init_instance_by_config, get_module_by_module_path -from .workflow.utils import experiment_exit_handler +from .utils import set_config, config_based_on_c + # init qlib def init(default_conf="client", **kwargs): - from .config import C, REG_CN, REG_US, QlibConfig - from .data.data import register_all_wrappers - from .log import get_module_logger, set_log_with_config + from .config import C + from .log import get_module_logger from .data.cache import H - from .workflow import R, QlibRecorder - C.reset() H.clear() - _logging_config = C.logging_config - if "logging_config" in kwargs: - _logging_config = kwargs["logging_config"] - - # set global config - if _logging_config: - set_log_with_config(_logging_config) - # FIXME: this logger ignored the level in config - LOG = get_module_logger("Initialization", level=logging.INFO) - LOG.info(f"default_conf: {default_conf}.") + logger = get_module_logger("Initialization", level=logging.INFO) - C.set_mode(default_conf) - C.set_region(kwargs.get("region", C["region"] if "region" in C else REG_CN)) - - for k, v in kwargs.items(): - C[k] = v - if k not in C: - LOG.warning("Unrecognized config %s" % k) - - C.resolve_path() - - if not (C["expression_cache"] is None and C["dataset_cache"] is None): - # check redis - if not can_use_cache(): - LOG.warning( - f"redis connection failed(host={C['redis_host']} port={C['redis_port']}), cache will not be used!" - ) - C["expression_cache"] = None - C["dataset_cache"] = None + set_config(C, default_conf, **kwargs) # check path if server/local - if C.get_uri_type() == QlibConfig.LOCAL_URI: + if C.get_uri_type() == C.LOCAL_URI: if not os.path.exists(C["provider_uri"]): if C["auto_mount"]: - LOG.error( + logger.error( f"Invalid provider uri: {C['provider_uri']}, please check if a valid provider uri has been set. This path does not exist." ) else: - LOG.warning(f"auto_path is False, please make sure {C['mount_path']} is mounted") - elif C.get_uri_type() == QlibConfig.NFS_URI: + logger.warning(f"auto_path is False, please make sure {C['mount_path']} is mounted") + elif C.get_uri_type() == C.NFS_URI: _mount_nfs_uri(C) else: raise NotImplementedError(f"This type of URI is not supported") - LOG.info("qlib successfully initialized based on %s settings." % default_conf) - register_all_wrappers() - - LOG.info(f"data_path={C.get_data_path()}") - if "flask_server" in C: - LOG.info(f"flask_server={C['flask_server']}, flask_port={C['flask_port']}") + logger.info(f"flask_server={C['flask_server']}, flask_port={C['flask_port']}") - # set up QlibRecorder - exp_manager = init_instance_by_config(C["exp_manager"]) - qr = QlibRecorder(exp_manager) - R.register(qr) - # clean up experiment when python program ends - experiment_exit_handler() + config_based_on_c(C) + + logger.info("qlib successfully initialized based on %s settings." % default_conf) + logger.info(f"data_path={C.get_data_path()}") def _mount_nfs_uri(C): diff --git a/qlib/config.py b/qlib/config.py index 869ea99c9..c757fdb32 100644 --- a/qlib/config.py +++ b/qlib/config.py @@ -59,6 +59,9 @@ class Config: def update(self, *args, **kwargs): self.__dict__["_config"].update(*args, **kwargs) + def set_conf_from_C(self, config_c): + self.update(**config_c.__dict__["_config"]) + # REGION CONST REG_CN = "cn" diff --git a/qlib/data/data.py b/qlib/data/data.py index c142b6b26..89a2502f9 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -25,7 +25,7 @@ from ..log import get_module_logger from ..utils import parse_field, read_bin, hash_args, normalize_cache_fields from .base import Feature from .cache import DiskDatasetCache, DiskExpressionCache -from ..utils import Wrapper, init_instance_by_config, register_wrapper, get_module_by_module_path +from ..utils import Wrapper, init_instance_by_config, register_wrapper, get_module_by_module_path, config_based_on_c class CalendarProvider(abc.ABC): @@ -481,11 +481,10 @@ class DatasetProvider(abc.ABC): """ # FIXME: Windows OS or MacOS using spawn: https://docs.python.org/3.8/library/multiprocessing.html?highlight=spawn#contexts-and-start-methods - global C - C = g_config # NOTE: This place is compatible with windows, windows multi-process is spawn if getattr(ExpressionD, "_provider", None) is None: - register_all_wrappers() + C.set_conf_from_C(g_config) + config_based_on_c(g_config) obj = dict() for field in column_names: @@ -1043,7 +1042,7 @@ DatasetD = Wrapper() D = Wrapper() -def register_all_wrappers(): +def register_all_wrappers(C): """register_all_wrappers""" logger = get_module_logger("data") module = get_module_by_module_path("qlib.data") diff --git a/qlib/utils/__init__.py b/qlib/utils/__init__.py index b08f9426d..17a12ae9c 100644 --- a/qlib/utils/__init__.py +++ b/qlib/utils/__init__.py @@ -15,6 +15,7 @@ import bisect import shutil import difflib import hashlib +import logging import datetime import requests import tempfile @@ -26,8 +27,9 @@ import pandas as pd from pathlib import Path from typing import Union, Tuple -from ..config import C -from ..log import get_module_logger +from ..config import C, REG_CN +from ..log import get_module_logger, set_log_with_config + log = get_module_logger("utils") @@ -728,3 +730,53 @@ def load_dataset(path_or_obj): elif extension == ".csv": return pd.read_csv(path_or_obj, parse_dates=True, index_col=[0, 1]) raise ValueError(f"unsupported file type `{extension}`") + + +def set_config(config_c, default_conf="client", **kwargs): + + config_c.reset() + + _logging_config = config_c.logging_config + if "logging_config" in kwargs: + _logging_config = kwargs["logging_config"] + + # set global config + if _logging_config: + set_log_with_config(_logging_config) + + # FIXME: this logger ignored the level in config + logger = get_module_logger("Initialization", level=logging.INFO) + logger.info(f"default_conf: {default_conf}.") + + config_c.set_mode(default_conf) + config_c.set_region(kwargs.get("region", config_c["region"] if "region" in config_c else REG_CN)) + + for k, v in kwargs.items(): + if k not in config_c: + logger.warning("Unrecognized config %s" % k) + config_c[k] = v + + config_c.resolve_path() + + if not (config_c["expression_cache"] is None and config_c["dataset_cache"] is None): + # check redis + if not can_use_cache(): + logger.warning( + f"redis connection failed(host={config_c['redis_host']} port={config_c['redis_port']}), cache will not be used!" + ) + config_c["expression_cache"] = None + config_c["dataset_cache"] = None + + +def config_based_on_c(config_c): + from ..data.data import register_all_wrappers + from ..workflow import R, QlibRecorder + from ..workflow.utils import experiment_exit_handler + + register_all_wrappers(config_c) + # set up QlibRecorder + exp_manager = init_instance_by_config(config_c["exp_manager"]) + qr = QlibRecorder(exp_manager) + R.register(qr) + # clean up experiment when python program ends + experiment_exit_handler()