1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-30 01:21:18 +08:00
Files
qlib/qlib/config.py
2021-04-07 03:33:27 +00:00

344 lines
11 KiB
Python

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""
About the configs
=================
The config will based on _default_config.
Two modes are supported
- client
- server
"""
import os
import re
import copy
import logging
import multiprocessing
from pathlib import Path
class Config:
def __init__(self, default_conf):
self.__dict__["_default_config"] = copy.deepcopy(default_conf) # avoiding conflictions with __getattr__
self.reset()
def __getitem__(self, key):
return self.__dict__["_config"][key]
def __getattr__(self, attr):
if attr in self.__dict__["_config"]:
return self.__dict__["_config"][attr]
raise AttributeError(f"No such {attr} in self._config")
def get(self, key, default=None):
return self.__dict__["_config"].get(key, default)
def __setitem__(self, key, value):
self.__dict__["_config"][key] = value
def __setattr__(self, attr, value):
self.__dict__["_config"][attr] = value
def __contains__(self, item):
return item in self.__dict__["_config"]
def __getstate__(self):
return self.__dict__
def __setstate__(self, state):
self.__dict__.update(state)
def __str__(self):
return str(self.__dict__["_config"])
def __repr__(self):
return str(self.__dict__["_config"])
def reset(self):
self.__dict__["_config"] = copy.deepcopy(self._default_config)
def update(self, *args, **kwargs):
self.__dict__["_config"].update(*args, **kwargs)
def set_conf_from_C(self, config_c):
self.update(**config_c.__dict__["_config"])
# REGION CONST
REG_CN = "cn"
REG_US = "us"
NUM_USABLE_CPU = max(multiprocessing.cpu_count() - 2, 1)
_default_config = {
# data provider config
"calendar_provider": "LocalCalendarProvider",
"instrument_provider": "LocalInstrumentProvider",
"feature_provider": "LocalFeatureProvider",
"expression_provider": "LocalExpressionProvider",
"dataset_provider": "LocalDatasetProvider",
"provider": "LocalProvider",
# config it in qlib.init()
"provider_uri": "",
# cache
"expression_cache": None,
"dataset_cache": None,
"calendar_cache": None,
# for simple dataset cache
"local_cache_path": None,
"kernels": NUM_USABLE_CPU,
# How many tasks belong to one process. Recommend 1 for high-frequency data and None for daily data.
"maxtasksperchild": None,
"default_disk_cache": 1, # 0:skip/1:use
"mem_cache_size_limit": 500,
# memory cache expire second, only in used 'DatasetURICache' and 'client D.calendar'
# default 1 hour
"mem_cache_expire": 60 * 60,
# memory cache space limit, default 5GB, only in used client
"mem_cache_space_limit": 1024 * 1024 * 1024 * 5,
# cache dir name
"dataset_cache_dir_name": "dataset_cache",
"features_cache_dir_name": "features_cache",
# redis
# in order to use cache
"redis_host": "127.0.0.1",
"redis_port": 6379,
"redis_task_db": 1,
# This value can be reset via qlib.init
"logging_level": logging.INFO,
# Global configuration of qlib log
# logging_level can control the logging level more finely
"logging_config": {
"version": 1,
"formatters": {
"logger_format": {
"format": "[%(process)s:%(threadName)s](%(asctime)s) %(levelname)s - %(name)s - [%(filename)s:%(lineno)d] - %(message)s"
}
},
"filters": {
"field_not_found": {
"()": "qlib.log.LogFilter",
"param": [".*?WARN: data not found for.*?"],
}
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"level": logging.DEBUG,
"formatter": "logger_format",
"filters": ["field_not_found"],
}
},
"loggers": {"qlib": {"level": logging.DEBUG, "handlers": ["console"]}},
},
# Default config for experiment manager
"exp_manager": {
"class": "MLflowExpManager",
"module_path": "qlib.workflow.expm",
"kwargs": {
"uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"),
"default_exp_name": "Experiment",
},
},
# Default config for MongoDB
"mongo": {
"task_url": "mongodb://localhost:27017/",
"task_db_name": "default_task_db",
},
}
MODE_CONF = {
"server": {
# data provider config
"calendar_provider": "LocalCalendarProvider",
"instrument_provider": "LocalInstrumentProvider",
"feature_provider": "LocalFeatureProvider",
"expression_provider": "LocalExpressionProvider",
"dataset_provider": "LocalDatasetProvider",
"provider": "LocalProvider",
# config it in qlib.init()
"provider_uri": "",
# redis
"redis_host": "127.0.0.1",
"redis_port": 6379,
"redis_task_db": 1,
"kernels": NUM_USABLE_CPU,
# cache
"expression_cache": "DiskExpressionCache",
"dataset_cache": "DiskDatasetCache",
"mount_path": None,
},
"client": {
# data provider config
"calendar_provider": "LocalCalendarProvider",
"instrument_provider": "LocalInstrumentProvider",
"feature_provider": "LocalFeatureProvider",
"expression_provider": "LocalExpressionProvider",
"dataset_provider": "LocalDatasetProvider",
"provider": "LocalProvider",
# config it in user's own code
"provider_uri": "~/.qlib/qlib_data/cn_data",
# cache
# Using parameter 'remote' to announce the client is using server_cache, and the writing access will be disabled.
"expression_cache": "DiskExpressionCache",
"dataset_cache": "DiskDatasetCache",
"calendar_cache": None,
# client config
"kernels": NUM_USABLE_CPU,
"mount_path": None,
"auto_mount": False, # The nfs is already mounted on our server[auto_mount: False].
# The nfs should be auto-mounted by qlib on other
# serversS(such as PAI) [auto_mount:True]
"timeout": 100,
"logging_level": logging.INFO,
"region": REG_CN,
## Custom Operator
"custom_ops": [],
},
}
HIGH_FREQ_CONFIG = {
"provider_uri": "~/.qlib/qlib_data/yahoo_cn_1min",
"dataset_cache": None,
"expression_cache": "DiskExpressionCache",
"region": REG_CN,
}
_default_region_config = {
REG_CN: {
"trade_unit": 100,
"limit_threshold": 0.099,
"deal_price": "vwap",
},
REG_US: {
"trade_unit": 1,
"limit_threshold": None,
"deal_price": "close",
},
}
class QlibConfig(Config):
# URI_TYPE
LOCAL_URI = "local"
NFS_URI = "nfs"
def __init__(self, default_conf):
super().__init__(default_conf)
self._registered = False
def set_mode(self, mode):
# raise KeyError
self.update(MODE_CONF[mode])
# TODO: update region based on kwargs
def set_region(self, region):
# raise KeyError
self.update(_default_region_config[region])
def resolve_path(self):
# resolve path
if self["mount_path"] is not None:
self["mount_path"] = str(Path(self["mount_path"]).expanduser().resolve())
if self.get_uri_type() == QlibConfig.LOCAL_URI:
self["provider_uri"] = str(Path(self["provider_uri"]).expanduser().resolve())
def get_uri_type(self):
is_win = re.match("^[a-zA-Z]:.*", self["provider_uri"]) is not None # such as 'C:\\data', 'D:'
is_nfs_or_win = (
re.match("^[^/]+:.+", self["provider_uri"]) is not None
) # such as 'host:/data/' (User may define short hostname by themselves or use localhost)
if is_nfs_or_win and not is_win:
return QlibConfig.NFS_URI
else:
return QlibConfig.LOCAL_URI
def get_data_path(self):
if self.get_uri_type() == QlibConfig.LOCAL_URI:
return self["provider_uri"]
elif self.get_uri_type() == QlibConfig.NFS_URI:
return self["mount_path"]
else:
raise NotImplementedError(f"This type of uri is not supported")
def set(self, default_conf="client", **kwargs):
from .utils import set_log_with_config, get_module_logger, can_use_cache
self.reset()
_logging_config = self.logging_config
if "logging_config" in kwargs:
_logging_config = kwargs["logging_config"]
# set global config
if _logging_config:
set_log_with_config(_logging_config)
# FIXME: this logger ignored the level in config
logger = get_module_logger("Initialization", level=logging.INFO)
logger.info(f"default_conf: {default_conf}.")
self.set_mode(default_conf)
self.set_region(kwargs.get("region", self["region"] if "region" in self else REG_CN))
for k, v in kwargs.items():
if k not in self:
logger.warning("Unrecognized config %s" % k)
self[k] = v
self.resolve_path()
if not (self["expression_cache"] is None and self["dataset_cache"] is None):
# check redis
if not can_use_cache():
logger.warning(
f"redis connection failed(host={self['redis_host']} port={self['redis_port']}), cache will not be used!"
)
self["expression_cache"] = None
self["dataset_cache"] = None
def register(self):
from .utils import init_instance_by_config
from .data.ops import register_all_ops
from .data.data import register_all_wrappers
from .workflow import R, QlibRecorder
from .workflow.utils import experiment_exit_handler
register_all_ops(self)
register_all_wrappers(self)
# set up QlibRecorder
exp_manager = init_instance_by_config(self["exp_manager"])
qr = QlibRecorder(exp_manager)
R.register(qr)
# clean up experiment when python program ends
experiment_exit_handler()
# Supporting user reset qlib version (useful when user want to connect to qlib server with old version)
self.reset_qlib_version()
self._registered = True
def reset_qlib_version(self):
import qlib
reset_version = self.get("qlib_reset_version", None)
if reset_version is not None:
qlib.__version__ = reset_version
else:
qlib.__version__ = getattr(qlib, "__version__bak")
# Due to a bug? that converting __version__ to _QlibConfig__version__bak
# Using __version__bak instead of __version__
@property
def registered(self):
return self._registered
# global config
C = QlibConfig(_default_config)