mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-05 20:11:08 +08:00
Update black formatter
This commit is contained in:
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
||||
- name: Lint with Black
|
||||
run: |
|
||||
cd ..
|
||||
python -m black qlib -l 120
|
||||
python -m black qlib -l 120 --check
|
||||
|
||||
- name: Unit tests with Pytest
|
||||
run: |
|
||||
|
||||
@@ -22,7 +22,6 @@ from qlib.utils import init_instance_by_config
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
|
||||
# use default data
|
||||
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
|
||||
if not exists_qlib_data(provider_uri):
|
||||
@@ -37,15 +36,14 @@ if __name__ == "__main__":
|
||||
MARKET = "csi300"
|
||||
BENCHMARK = "SH000300"
|
||||
|
||||
|
||||
###################################
|
||||
# train model
|
||||
###################################
|
||||
DATA_HANDLER_CONFIG = {
|
||||
"start_time": "2008-01-01",
|
||||
"end_time": "2020-08-01",
|
||||
"fit_start_time":"2008-01-01",
|
||||
"fit_end_time":"2014-12-31",
|
||||
"fit_start_time": "2008-01-01",
|
||||
"fit_end_time": "2014-12-31",
|
||||
"instruments": MARKET,
|
||||
}
|
||||
|
||||
@@ -72,31 +70,37 @@ if __name__ == "__main__":
|
||||
"max_depth": 8,
|
||||
"num_leaves": 210,
|
||||
"num_threads": 20,
|
||||
}
|
||||
},
|
||||
},
|
||||
"dataset": {
|
||||
"class": "DatasetH",
|
||||
"module_path": "qlib.data.dataset",
|
||||
"kwargs": {
|
||||
'handler': {
|
||||
"handler": {
|
||||
"class": "Alpha158",
|
||||
"module_path": "qlib.contrib.data.handler",
|
||||
"kwargs": DATA_HANDLER_CONFIG
|
||||
"kwargs": DATA_HANDLER_CONFIG,
|
||||
},
|
||||
'segments': {
|
||||
'train': ("2008-01-01", "2014-12-31"),
|
||||
'valid': ("2015-01-01", "2016-12-31",),
|
||||
'test': ("2017-01-01", "2020-08-01",),
|
||||
}
|
||||
}
|
||||
"segments": {
|
||||
"train": ("2008-01-01", "2014-12-31"),
|
||||
"valid": (
|
||||
"2015-01-01",
|
||||
"2016-12-31",
|
||||
),
|
||||
"test": (
|
||||
"2017-01-01",
|
||||
"2020-08-01",
|
||||
),
|
||||
},
|
||||
},
|
||||
}
|
||||
# You shoud record the data in specific sequence
|
||||
# "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
|
||||
}
|
||||
|
||||
# model = train_model(task)
|
||||
model = init_instance_by_config(task['model'])
|
||||
dataset = init_instance_by_config(task['dataset'])
|
||||
model = init_instance_by_config(task["model"])
|
||||
dataset = init_instance_by_config(task["dataset"])
|
||||
|
||||
model.fit(dataset)
|
||||
|
||||
|
||||
@@ -281,7 +281,6 @@ class Alpha158(DataHandlerLP):
|
||||
|
||||
|
||||
class Alpha158vwap(Alpha158):
|
||||
|
||||
def get_feature_config(self):
|
||||
conf = {
|
||||
"kbar": {},
|
||||
|
||||
@@ -9,17 +9,17 @@ from ...data.dataset.handler import DataHandlerLP
|
||||
|
||||
|
||||
class CatBoostModel(Model):
|
||||
"""CatBoost Model"""
|
||||
"""CatBoost Model"""
|
||||
|
||||
def __init__(self, loss="RMSE", **kwargs):
|
||||
# There are more options
|
||||
if loss not in {"RMSE", "Logloss"}:
|
||||
raise NotImplementedError
|
||||
self._params = {"loss_function": loss}
|
||||
self._params.update(kwargs)
|
||||
self.model = None
|
||||
def __init__(self, loss="RMSE", **kwargs):
|
||||
# There are more options
|
||||
if loss not in {"RMSE", "Logloss"}:
|
||||
raise NotImplementedError
|
||||
self._params = {"loss_function": loss}
|
||||
self._params.update(kwargs)
|
||||
self.model = None
|
||||
|
||||
def fit(
|
||||
def fit(
|
||||
self,
|
||||
dataset: DatasetH,
|
||||
num_boost_round=1000,
|
||||
@@ -27,48 +27,42 @@ class CatBoostModel(Model):
|
||||
verbose_eval=20,
|
||||
evals_result=dict(),
|
||||
**kwargs
|
||||
):
|
||||
df_train, df_valid = dataset.prepare(
|
||||
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
|
||||
):
|
||||
df_train, df_valid = dataset.prepare(
|
||||
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
|
||||
)
|
||||
x_train, y_train = df_train["feature"], df_train["label"]
|
||||
x_valid, y_valid = df_valid["feature"], df_valid["label"]
|
||||
x_train, y_train = df_train["feature"], df_train["label"]
|
||||
x_valid, y_valid = df_valid["feature"], df_valid["label"]
|
||||
|
||||
# CatBoost needs 1D array as its label
|
||||
if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
|
||||
y_train_1d, y_valid_1d = np.squeeze(y_train.values), np.squeeze(y_valid.values)
|
||||
else:
|
||||
raise ValueError("CatBoost doesn't support multi-label training")
|
||||
# CatBoost needs 1D array as its label
|
||||
if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
|
||||
y_train_1d, y_valid_1d = np.squeeze(y_train.values), np.squeeze(y_valid.values)
|
||||
else:
|
||||
raise ValueError("CatBoost doesn't support multi-label training")
|
||||
|
||||
train_pool = Pool(data = x_train, label = y_train_1d)
|
||||
valid_pool = Pool(data = x_valid, label = y_valid_1d)
|
||||
train_pool = Pool(data=x_train, label=y_train_1d)
|
||||
valid_pool = Pool(data=x_valid, label=y_valid_1d)
|
||||
|
||||
#Initialize the catboost model
|
||||
self._params['iterations'] = num_boost_round
|
||||
self._params['early_stopping_rounds'] = early_stopping_rounds
|
||||
self._params['verbose_eval'] = verbose_eval
|
||||
self._params['task_type'] = "GPU" if get_gpu_device_count() > 0 else "CPU"
|
||||
self.model = CatBoost(self._params, **kwargs)
|
||||
# Initialize the catboost model
|
||||
self._params["iterations"] = num_boost_round
|
||||
self._params["early_stopping_rounds"] = early_stopping_rounds
|
||||
self._params["verbose_eval"] = verbose_eval
|
||||
self._params["task_type"] = "GPU" if get_gpu_device_count() > 0 else "CPU"
|
||||
self.model = CatBoost(self._params, **kwargs)
|
||||
|
||||
#train the model
|
||||
self.model.fit(
|
||||
train_pool,
|
||||
eval_set = valid_pool,
|
||||
use_best_model = True,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
evals_result = self.model.get_evals_result()
|
||||
evals_result["train"] = list(evals_result["learn"].values())[0]
|
||||
evals_result["valid"] = list(evals_result["validation"].values())[0]
|
||||
# train the model
|
||||
self.model.fit(train_pool, eval_set=valid_pool, use_best_model=True, **kwargs)
|
||||
|
||||
evals_result = self.model.get_evals_result()
|
||||
evals_result["train"] = list(evals_result["learn"].values())[0]
|
||||
evals_result["valid"] = list(evals_result["validation"].values())[0]
|
||||
|
||||
def predict(self, dataset):
|
||||
if self.model is None:
|
||||
raise ValueError("model is not fitted yet!")
|
||||
x_test = dataset.prepare("test", col_set="feature")
|
||||
return pd.Series(self.model.predict(np.squeeze(x_test.values)), index=x_test.index)
|
||||
|
||||
|
||||
def predict(self, dataset):
|
||||
if self.model is None:
|
||||
raise ValueError("model is not fitted yet!")
|
||||
x_test = dataset.prepare("test", col_set="feature")
|
||||
return pd.Series(self.model.predict(np.squeeze(x_test.values)), index=x_test.index)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
cat = CatBoostModel()
|
||||
if __name__ == "__main__":
|
||||
cat = CatBoostModel()
|
||||
|
||||
@@ -159,9 +159,7 @@ class DNNModelPytorch(Model):
|
||||
x_valid, y_valid = df_valid["feature"], df_valid["label"]
|
||||
|
||||
try:
|
||||
wdf_train, wdf_valid = dataset.prepare(
|
||||
["train", "valid"], col_set=["weight"], data_key=DataHandlerLP.DK_L
|
||||
)
|
||||
wdf_train, wdf_valid = dataset.prepare(["train", "valid"], col_set=["weight"], data_key=DataHandlerLP.DK_L)
|
||||
w_train, w_valid = wdf_train["weight"], wdf_valid["weight"]
|
||||
except:
|
||||
w_train = pd.DataFrame(np.ones_like(y_train.values), index=y_train.index)
|
||||
|
||||
@@ -65,8 +65,9 @@ class DataHandler(Serializable):
|
||||
|
||||
self.data_loader = init_instance_by_config(
|
||||
data_loader,
|
||||
None if (isinstance(data_loader, dict) and 'module_path' in data_loader) else data_loader_module,
|
||||
accept_types=DataLoader)
|
||||
None if (isinstance(data_loader, dict) and "module_path" in data_loader) else data_loader_module,
|
||||
accept_types=DataLoader,
|
||||
)
|
||||
|
||||
self.instruments = instruments
|
||||
self.start_time = start_time
|
||||
|
||||
@@ -14,6 +14,7 @@ class DataLoader(abc.ABC):
|
||||
"""
|
||||
DataLoader is designed for loading raw data from original data source.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def load(self, instruments, start_time=None, end_time=None) -> pd.DataFrame:
|
||||
"""
|
||||
@@ -53,6 +54,7 @@ class DLWParser(DataLoader):
|
||||
|
||||
Extracting this class so that QlibDataLoader and other dataloaders(such as QdbDataLoader) can share the fields
|
||||
"""
|
||||
|
||||
def __init__(self, config: Tuple[list, tuple, dict]):
|
||||
"""
|
||||
Parameters
|
||||
@@ -113,7 +115,8 @@ class DLWParser(DataLoader):
|
||||
grp: self.load_group_df(instruments, exprs, names, start_time, end_time)
|
||||
for grp, (exprs, names) in self.fields.items()
|
||||
},
|
||||
axis=1)
|
||||
axis=1,
|
||||
)
|
||||
else:
|
||||
exprs, names = self.fields
|
||||
df = self.load_group_df(instruments, exprs, names, start_time, end_time)
|
||||
@@ -122,6 +125,7 @@ class DLWParser(DataLoader):
|
||||
|
||||
class QlibDataLoader(DLWParser):
|
||||
"""Same as QlibDataLoader. The fields can be define by config"""
|
||||
|
||||
def __init__(self, config: Tuple[list, tuple, dict], filter_pipe=None):
|
||||
"""
|
||||
Parameters
|
||||
|
||||
@@ -195,7 +195,7 @@ def get_cls_kwargs(config: Union[dict, str], module) -> (type, dict):
|
||||
|
||||
|
||||
def init_instance_by_config(
|
||||
config: Union[str, dict, object], module=None, accept_types: Union[type, Tuple[type]] = tuple([]), **kwargs
|
||||
config: Union[str, dict, object], module=None, accept_types: Union[type, Tuple[type]] = tuple([]), **kwargs
|
||||
) -> object:
|
||||
"""
|
||||
get initialized instance with config
|
||||
|
||||
@@ -5,8 +5,8 @@ from joblib import Parallel, delayed
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def datetime_groupby_apply(df, apply_func, axis=0, level='datetime', resample_rule="M", n_jobs=-1, skip_group=False):
|
||||
""" datetime_groupby_apply
|
||||
def datetime_groupby_apply(df, apply_func, axis=0, level="datetime", resample_rule="M", n_jobs=-1, skip_group=False):
|
||||
"""datetime_groupby_apply
|
||||
This function will apply the `apply_func` on the datetime level index.
|
||||
|
||||
Parameters
|
||||
@@ -26,12 +26,14 @@ def datetime_groupby_apply(df, apply_func, axis=0, level='datetime', resample_ru
|
||||
Returns:
|
||||
pd.DataFrame
|
||||
"""
|
||||
|
||||
def _naive_group_apply(df):
|
||||
return df.groupby(axis=axis, level=level).apply(apply_func)
|
||||
|
||||
if n_jobs != 1:
|
||||
dfs = Parallel(n_jobs=n_jobs)(delayed(_naive_group_apply)(sub_df)
|
||||
for idx, sub_df in df.resample(resample_rule, axis=axis, level=level))
|
||||
dfs = Parallel(n_jobs=n_jobs)(
|
||||
delayed(_naive_group_apply)(sub_df) for idx, sub_df in df.resample(resample_rule, axis=axis, level=level)
|
||||
)
|
||||
return pd.concat(dfs, axis=axis).sort_index()
|
||||
else:
|
||||
return _naive_group_apply(df)
|
||||
|
||||
@@ -6,6 +6,7 @@ from .expm import MLflowExpManager
|
||||
from ..utils import Wrapper
|
||||
from ..config import C
|
||||
|
||||
|
||||
class QlibRecorder:
|
||||
"""
|
||||
A global system that helps to manage the experiments.
|
||||
|
||||
@@ -8,6 +8,7 @@ from ..log import get_module_logger
|
||||
|
||||
logger = get_module_logger("workflow", "INFO")
|
||||
|
||||
|
||||
class Experiment:
|
||||
"""
|
||||
Thie is the `Experiment` class for each experiment being run. The API is designed
|
||||
@@ -17,22 +18,22 @@ class Experiment:
|
||||
self.name = None
|
||||
self.id = None
|
||||
self.active_recorder = None # only one recorder can running each time
|
||||
self.recorders = dict() # recorder id -> object
|
||||
self.recorders = dict() # recorder id -> object
|
||||
|
||||
def __repr__(self):
|
||||
return str(self.info)
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return str(self.info)
|
||||
return str(self.info)
|
||||
|
||||
@property
|
||||
def info(self):
|
||||
output = dict()
|
||||
output['class'] = "Experiment"
|
||||
output['id'] = self.id
|
||||
output['name'] = self.name
|
||||
output['active_recorder'] = self.active_recorder.id
|
||||
output['recorders'] = list(self.recorders.keys())
|
||||
output["class"] = "Experiment"
|
||||
output["id"] = self.id
|
||||
output["name"] = self.name
|
||||
output["active_recorder"] = self.active_recorder.id
|
||||
output["recorders"] = list(self.recorders.keys())
|
||||
|
||||
def start(self):
|
||||
"""
|
||||
@@ -137,7 +138,6 @@ class MLflowExperiment(Experiment):
|
||||
run = self.active_recorder.start_run()
|
||||
# store the recorder
|
||||
self.recorders[self.active_recorder.id] = recorder
|
||||
|
||||
return self.active_recorder
|
||||
|
||||
def end(self, status):
|
||||
@@ -147,7 +147,7 @@ class MLflowExperiment(Experiment):
|
||||
|
||||
def create_recorder(self):
|
||||
num = len(self.recorders)
|
||||
name = "Recorder_{}".format(num+1)
|
||||
name = "Recorder_{}".format(num + 1)
|
||||
recorder = MLflowRecorder(name, self.id)
|
||||
return recorder
|
||||
|
||||
@@ -170,9 +170,7 @@ class MLflowExperiment(Experiment):
|
||||
if self.recorders[rid].name == recorder_name:
|
||||
return self.recorders[rid]
|
||||
elif self.active_recorder is None:
|
||||
raise Exception('No valid active recorder exists. Please make sure the experiment is running.')
|
||||
raise Exception("No valid active recorder exists. Please make sure the experiment is running.")
|
||||
else:
|
||||
logger.info(
|
||||
"No experiment id or name is given. Return the current active experiment."
|
||||
)
|
||||
return self.active_recorder
|
||||
logger.info("No experiment id or name is given. Return the current active experiment.")
|
||||
return self.active_recorder
|
||||
|
||||
@@ -184,9 +184,7 @@ class MLflowExpManager(ExpManager):
|
||||
else:
|
||||
if experiment_name not in self.experiments:
|
||||
if mlflow.get_experiment_by_name(experiment_name) is not None:
|
||||
logger.info(
|
||||
"The experiment has already been created before. Try to resume the experiment..."
|
||||
)
|
||||
logger.info("The experiment has already been created before. Try to resume the experiment...")
|
||||
experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id
|
||||
else:
|
||||
experiment_id = mlflow.create_experiment(experiment_name)
|
||||
@@ -216,11 +214,9 @@ class MLflowExpManager(ExpManager):
|
||||
if self.experiments[name].id == experiment_id:
|
||||
return self.experiments[name]
|
||||
elif self.active_experiment is None:
|
||||
raise Exception('No valid active experiment exists. Please make sure experiment manager is running.')
|
||||
raise Exception("No valid active experiment exists. Please make sure experiment manager is running.")
|
||||
else:
|
||||
logger.info(
|
||||
"No experiment id or name is given. Return the current active experiment."
|
||||
)
|
||||
logger.info("No experiment id or name is given. Return the current active experiment.")
|
||||
return self.active_experiment
|
||||
|
||||
def delete_exp(self, experiment_id):
|
||||
|
||||
@@ -12,7 +12,7 @@ from ..utils import init_instance_by_config, get_module_by_module_path
|
||||
|
||||
class RecordTemp:
|
||||
"""
|
||||
This is the Records Template class that enables user to generate experiment results such as IC and
|
||||
This is the Records Template class that enables user to generate experiment results such as IC and
|
||||
backtest in a certain format.
|
||||
"""
|
||||
|
||||
@@ -116,8 +116,8 @@ class PortAnaRecord(SignalRecord):
|
||||
|
||||
def __init__(self, recorder, config, **kwargs):
|
||||
self.recorder = recorder
|
||||
self.strategy_config = config['strategy']
|
||||
self.backtest_config = config['backtest']
|
||||
self.strategy_config = config["strategy"]
|
||||
self.backtest_config = config["backtest"]
|
||||
self.strategy = init_instance_by_config(self.strategy_config)
|
||||
self.artifact_path = "portfolio_analysis"
|
||||
|
||||
|
||||
@@ -20,21 +20,21 @@ class Recorder:
|
||||
self.name = name
|
||||
self.experiment_id = experiment_id
|
||||
self.status = "SCHEDULED"
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return str(self.info)
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return str(self.info)
|
||||
return str(self.info)
|
||||
|
||||
@property
|
||||
def info(self):
|
||||
output = dict()
|
||||
output['class'] = "Recorder"
|
||||
output['id'] = self.id
|
||||
output['name'] = self.name
|
||||
output['experiment_id'] = self.experiment_id
|
||||
output['status'] = self.status
|
||||
output["class"] = "Recorder"
|
||||
output["id"] = self.id
|
||||
output["name"] = self.name
|
||||
output["experiment_id"] = self.experiment_id
|
||||
output["status"] = self.status
|
||||
|
||||
def set_recorder_name(self, rname):
|
||||
self.recorder_name = rname
|
||||
@@ -188,16 +188,16 @@ class MLflowRecorder(Recorder):
|
||||
client = mlflow.tracking.MlflowClient(tracking_uri=self._uri)
|
||||
if local_path is not None:
|
||||
client.log_artifacts(self.id, local_path, artifact_path)
|
||||
elif kwargs.get('data') is not None and kwargs.get('name') is not None:
|
||||
data, name = kwargs.get('data'), kwargs.get('name')
|
||||
elif kwargs.get("data") is not None and kwargs.get("name") is not None:
|
||||
data, name = kwargs.get("data"), kwargs.get("name")
|
||||
self.fm.save_obj(data, name)
|
||||
client.log_artifact(self.id, self.fm.path / name, artifact_path)
|
||||
elif kwargs.get('data_name_list') is not None:
|
||||
data_name_list = kwargs.get('data_name_list')
|
||||
elif kwargs.get("data_name_list") is not None:
|
||||
data_name_list = kwargs.get("data_name_list")
|
||||
self.fm.save_objs(data_name_list)
|
||||
client.log_artifacts(self.id, self.fm.path, artifact_path)
|
||||
else:
|
||||
raise Exception('Please provide valid arguments in order to save object properly.')
|
||||
raise Exception("Please provide valid arguments in order to save object properly.")
|
||||
|
||||
def load_object(self, name):
|
||||
client = mlflow.tracking.MlflowClient(tracking_uri=self._uri)
|
||||
|
||||
Reference in New Issue
Block a user