1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-30 17:41:18 +08:00

Initialization is split into: set_config and config_based_on_C

This commit is contained in:
zhupr
2021-01-20 14:41:50 +08:00
parent ecdfe49fd1
commit 4fcfde7cfb
4 changed files with 76 additions and 62 deletions

View File

@@ -6,88 +6,48 @@ __version__ = "0.6.0.dev"
import os
import re
import sys
import copy
import yaml
import logging
import platform
import subprocess
from pathlib import Path
from .utils import can_use_cache, init_instance_by_config, get_module_by_module_path
from .workflow.utils import experiment_exit_handler
from .utils import set_config, config_based_on_c
# init qlib
def init(default_conf="client", **kwargs):
from .config import C, REG_CN, REG_US, QlibConfig
from .data.data import register_all_wrappers
from .log import get_module_logger, set_log_with_config
from .config import C
from .log import get_module_logger
from .data.cache import H
from .workflow import R, QlibRecorder
C.reset()
H.clear()
_logging_config = C.logging_config
if "logging_config" in kwargs:
_logging_config = kwargs["logging_config"]
# set global config
if _logging_config:
set_log_with_config(_logging_config)
# FIXME: this logger ignored the level in config
LOG = get_module_logger("Initialization", level=logging.INFO)
LOG.info(f"default_conf: {default_conf}.")
logger = get_module_logger("Initialization", level=logging.INFO)
C.set_mode(default_conf)
C.set_region(kwargs.get("region", C["region"] if "region" in C else REG_CN))
for k, v in kwargs.items():
C[k] = v
if k not in C:
LOG.warning("Unrecognized config %s" % k)
C.resolve_path()
if not (C["expression_cache"] is None and C["dataset_cache"] is None):
# check redis
if not can_use_cache():
LOG.warning(
f"redis connection failed(host={C['redis_host']} port={C['redis_port']}), cache will not be used!"
)
C["expression_cache"] = None
C["dataset_cache"] = None
set_config(C, default_conf, **kwargs)
# check path if server/local
if C.get_uri_type() == QlibConfig.LOCAL_URI:
if C.get_uri_type() == C.LOCAL_URI:
if not os.path.exists(C["provider_uri"]):
if C["auto_mount"]:
LOG.error(
logger.error(
f"Invalid provider uri: {C['provider_uri']}, please check if a valid provider uri has been set. This path does not exist."
)
else:
LOG.warning(f"auto_path is False, please make sure {C['mount_path']} is mounted")
elif C.get_uri_type() == QlibConfig.NFS_URI:
logger.warning(f"auto_path is False, please make sure {C['mount_path']} is mounted")
elif C.get_uri_type() == C.NFS_URI:
_mount_nfs_uri(C)
else:
raise NotImplementedError(f"This type of URI is not supported")
LOG.info("qlib successfully initialized based on %s settings." % default_conf)
register_all_wrappers()
LOG.info(f"data_path={C.get_data_path()}")
if "flask_server" in C:
LOG.info(f"flask_server={C['flask_server']}, flask_port={C['flask_port']}")
logger.info(f"flask_server={C['flask_server']}, flask_port={C['flask_port']}")
# set up QlibRecorder
exp_manager = init_instance_by_config(C["exp_manager"])
qr = QlibRecorder(exp_manager)
R.register(qr)
# clean up experiment when python program ends
experiment_exit_handler()
config_based_on_c(C)
logger.info("qlib successfully initialized based on %s settings." % default_conf)
logger.info(f"data_path={C.get_data_path()}")
def _mount_nfs_uri(C):

View File

@@ -59,6 +59,9 @@ class Config:
def update(self, *args, **kwargs):
self.__dict__["_config"].update(*args, **kwargs)
def set_conf_from_C(self, config_c):
self.update(**config_c.__dict__["_config"])
# REGION CONST
REG_CN = "cn"

View File

@@ -25,7 +25,7 @@ from ..log import get_module_logger
from ..utils import parse_field, read_bin, hash_args, normalize_cache_fields
from .base import Feature
from .cache import DiskDatasetCache, DiskExpressionCache
from ..utils import Wrapper, init_instance_by_config, register_wrapper, get_module_by_module_path
from ..utils import Wrapper, init_instance_by_config, register_wrapper, get_module_by_module_path, config_based_on_c
class CalendarProvider(abc.ABC):
@@ -481,11 +481,10 @@ class DatasetProvider(abc.ABC):
"""
# FIXME: Windows OS or MacOS using spawn: https://docs.python.org/3.8/library/multiprocessing.html?highlight=spawn#contexts-and-start-methods
global C
C = g_config
# NOTE: This place is compatible with windows, windows multi-process is spawn
if getattr(ExpressionD, "_provider", None) is None:
register_all_wrappers()
C.set_conf_from_C(g_config)
config_based_on_c(g_config)
obj = dict()
for field in column_names:
@@ -1043,7 +1042,7 @@ DatasetD = Wrapper()
D = Wrapper()
def register_all_wrappers():
def register_all_wrappers(C):
"""register_all_wrappers"""
logger = get_module_logger("data")
module = get_module_by_module_path("qlib.data")

View File

@@ -15,6 +15,7 @@ import bisect
import shutil
import difflib
import hashlib
import logging
import datetime
import requests
import tempfile
@@ -26,8 +27,9 @@ import pandas as pd
from pathlib import Path
from typing import Union, Tuple
from ..config import C
from ..log import get_module_logger
from ..config import C, REG_CN
from ..log import get_module_logger, set_log_with_config
log = get_module_logger("utils")
@@ -728,3 +730,53 @@ def load_dataset(path_or_obj):
elif extension == ".csv":
return pd.read_csv(path_or_obj, parse_dates=True, index_col=[0, 1])
raise ValueError(f"unsupported file type `{extension}`")
def set_config(config_c, default_conf="client", **kwargs):
config_c.reset()
_logging_config = config_c.logging_config
if "logging_config" in kwargs:
_logging_config = kwargs["logging_config"]
# set global config
if _logging_config:
set_log_with_config(_logging_config)
# FIXME: this logger ignored the level in config
logger = get_module_logger("Initialization", level=logging.INFO)
logger.info(f"default_conf: {default_conf}.")
config_c.set_mode(default_conf)
config_c.set_region(kwargs.get("region", config_c["region"] if "region" in config_c else REG_CN))
for k, v in kwargs.items():
if k not in config_c:
logger.warning("Unrecognized config %s" % k)
config_c[k] = v
config_c.resolve_path()
if not (config_c["expression_cache"] is None and config_c["dataset_cache"] is None):
# check redis
if not can_use_cache():
logger.warning(
f"redis connection failed(host={config_c['redis_host']} port={config_c['redis_port']}), cache will not be used!"
)
config_c["expression_cache"] = None
config_c["dataset_cache"] = None
def config_based_on_c(config_c):
from ..data.data import register_all_wrappers
from ..workflow import R, QlibRecorder
from ..workflow.utils import experiment_exit_handler
register_all_wrappers(config_c)
# set up QlibRecorder
exp_manager = init_instance_by_config(config_c["exp_manager"])
qr = QlibRecorder(exp_manager)
R.register(qr)
# clean up experiment when python program ends
experiment_exit_handler()