mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-02 10:31:00 +08:00
Merge branch 'main' of github.com:you-n-g/qlib into main
This commit is contained in:
@@ -5,8 +5,10 @@
|
||||
**GitHub**: https://github.com/google-research/google-research/tree/master/tft
|
||||
|
||||
## Run the Workflow
|
||||
Users can follow the ``workflow_by_code_tft.py`` to run the benchmark. Please be **aware** that this script can only support Python 3.5 - 3.8.
|
||||
Users can follow the ``workflow_by_code_tft.py`` to run the benchmark.
|
||||
|
||||
### Notes
|
||||
1. The model must run in GPU, or an error will be raised.
|
||||
2. New datasets should be registered in ``data_formatters``, for detail please visit the source.
|
||||
1. Please be **aware** that this script can only support `Python 3.5 - 3.8`.
|
||||
2. If the CUDA version on your machine is not 10.0, please remember to run the following commands `conda install anaconda cudatoolkit=10.0` and `conda install cudnn` on your machine.
|
||||
3. The model must run in GPU, or an error will be raised.
|
||||
4. New datasets should be registered in ``data_formatters``, for detail please visit the source.
|
||||
|
||||
@@ -10,6 +10,7 @@ import shutil
|
||||
import tempfile
|
||||
import statistics
|
||||
from pathlib import Path
|
||||
from operator import xor
|
||||
from subprocess import Popen, PIPE
|
||||
from threading import Thread
|
||||
from pprint import pprint
|
||||
@@ -174,11 +175,22 @@ def cal_mean_std(results) -> dict:
|
||||
|
||||
|
||||
# function to get all the folders benchmark folder
|
||||
def get_all_folders() -> dict:
|
||||
def get_all_folders(models, exclude) -> dict:
|
||||
folders = dict()
|
||||
if isinstance(models, str):
|
||||
model_list = models.split(",")
|
||||
models = [m.lower().strip("[ ]") for m in model_list]
|
||||
elif isinstance(models, list):
|
||||
models = [m.lower() for m in models]
|
||||
elif models is None:
|
||||
models = [f.name.lower() for f in os.scandir("benchmarks")]
|
||||
else:
|
||||
raise ValueError("Input models type is not supported. Please provide str or list without space.")
|
||||
for f in os.scandir("benchmarks"):
|
||||
path = Path("benchmarks") / f.name
|
||||
folders[f.name] = str(path.resolve())
|
||||
add = xor(bool(f.name.lower() in models), bool(exclude))
|
||||
if add:
|
||||
path = Path("benchmarks") / f.name
|
||||
folders[f.name] = str(path.resolve())
|
||||
return folders
|
||||
|
||||
|
||||
@@ -225,13 +237,44 @@ def gen_and_save_md_table(metrics):
|
||||
|
||||
|
||||
# function to run the all the models
|
||||
def run(times=1):
|
||||
def run(times=1, models=None, exclude=False):
|
||||
"""
|
||||
Please be aware that this function can only work under Linux. MacOS and Windows will be supported in the future.
|
||||
Any PR to enhance this method is highly welcomed.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
times : int
|
||||
determines how many times the model should be running.
|
||||
models : str or list
|
||||
determines the specific model or list of models to run or exclude.
|
||||
exclude : boolean
|
||||
determines whether the model being used is excluded or included.
|
||||
|
||||
Usage:
|
||||
-------
|
||||
Here are some use cases of the function in the bash:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# Case 1 - run all models multiple times
|
||||
python run_all_model.py 3
|
||||
|
||||
# Case 2 - run specific models multiple times
|
||||
python run_all_model.py 3 dnn
|
||||
|
||||
# Case 3 - run other models except those are given as arguments for multiple times
|
||||
python run_all_model.py 3 [dnn,tft,lstm] True
|
||||
|
||||
# Case 4 - run specific models for one time
|
||||
python run_all_model.py --models=[dnn,lightgbm]
|
||||
|
||||
# Case 5 - run other models except those are given as aruments for one time
|
||||
python run_all_model.py --models=[dnn,tft,sfm] --exclude=True
|
||||
|
||||
"""
|
||||
# get all folders
|
||||
folders = get_all_folders()
|
||||
folders = get_all_folders(models, exclude)
|
||||
# set up
|
||||
compatible = True
|
||||
if sys.version_info < (3, 3):
|
||||
|
||||
@@ -10,6 +10,28 @@ from inspect import getfullargspec
|
||||
import copy
|
||||
|
||||
|
||||
def check_transform_proc(proc_l, fit_start_time, fit_end_time):
|
||||
new_l = []
|
||||
for p in proc_l:
|
||||
if not isinstance(p, Processor):
|
||||
klass, pkwargs = get_cls_kwargs(p, processor_module)
|
||||
args = getfullargspec(klass).args
|
||||
if "fit_start_time" in args and "fit_end_time" in args:
|
||||
assert (
|
||||
fit_start_time is not None and fit_end_time is not None
|
||||
), "Make sure `fit_start_time` and `fit_end_time` are not None."
|
||||
pkwargs.update(
|
||||
{
|
||||
"fit_start_time": fit_start_time,
|
||||
"fit_end_time": fit_end_time,
|
||||
}
|
||||
)
|
||||
new_l.append({"class": klass.__name__, "kwargs": pkwargs})
|
||||
else:
|
||||
new_l.append(p)
|
||||
return new_l
|
||||
|
||||
|
||||
class ALPHA360_Denoise(DataHandlerLP):
|
||||
def __init__(self, instruments="csi500", start_time=None, end_time=None, fit_start_time=None, fit_end_time=None):
|
||||
data_loader = {
|
||||
@@ -83,8 +105,31 @@ class ALPHA360_Denoise(DataHandlerLP):
|
||||
return fields, names
|
||||
|
||||
|
||||
_DEFAULT_LEARN_PROCESSORS = [
|
||||
{"class": "DropnaLabel"},
|
||||
{"class": "CSZScoreNorm", "kwargs": {"fields_group": "label"}},
|
||||
]
|
||||
_DEFAULT_INFER_PROCESSORS = [
|
||||
{"class": "ProcessInf", "kwargs": {}},
|
||||
{"class": "ZScoreNorm", "kwargs": {}},
|
||||
{"class": "Fillna", "kwargs": {}},
|
||||
]
|
||||
|
||||
|
||||
class ALPHA360(DataHandlerLP):
|
||||
def __init__(self, instruments="csi500", start_time=None, end_time=None, fit_start_time=None, fit_end_time=None):
|
||||
def __init__(
|
||||
self,
|
||||
instruments="csi500",
|
||||
start_time=None,
|
||||
end_time=None,
|
||||
infer_processors=_DEFAULT_INFER_PROCESSORS,
|
||||
learn_processors=_DEFAULT_LEARN_PROCESSORS,
|
||||
fit_start_time=None,
|
||||
fit_end_time=None,
|
||||
):
|
||||
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
|
||||
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
|
||||
|
||||
data_loader = {
|
||||
"class": "QlibDataLoader",
|
||||
"kwargs": {
|
||||
@@ -95,16 +140,6 @@ class ALPHA360(DataHandlerLP):
|
||||
},
|
||||
}
|
||||
|
||||
learn_processors = [
|
||||
{"class": "DropnaLabel", "kwargs": {"fields_group": "label"}},
|
||||
{"class": "CSZScoreNorm", "kwargs": {"fields_group": "label"}},
|
||||
]
|
||||
infer_processors = [
|
||||
{"class": "ProcessInf", "kwargs": {}},
|
||||
{"class": "ZscoreNorm", "kwargs": {"fit_start_time": fit_start_time, "fit_end_time": fit_end_time}},
|
||||
{"class": "Fillna", "kwargs": {}},
|
||||
]
|
||||
|
||||
super().__init__(
|
||||
instruments,
|
||||
start_time,
|
||||
@@ -168,33 +203,12 @@ class Alpha158(DataHandlerLP):
|
||||
start_time=None,
|
||||
end_time=None,
|
||||
infer_processors=[],
|
||||
learn_processors=["DropnaLabel", {"class": "CSZScoreNorm", "kwargs": {"fields_group": "label"}}],
|
||||
learn_processors=_DEFAULT_LEARN_PROCESSORS,
|
||||
fit_start_time=None,
|
||||
fit_end_time=None,
|
||||
):
|
||||
def check_transform_proc(proc_l):
|
||||
new_l = []
|
||||
for p in proc_l:
|
||||
if not isinstance(p, Processor):
|
||||
klass, pkwargs = get_cls_kwargs(p, processor_module)
|
||||
args = getfullargspec(klass).args
|
||||
if "fit_start_time" in args and "fit_end_time" in args:
|
||||
assert (
|
||||
fit_start_time is not None and fit_end_time is not None
|
||||
), "Make sure `fit_start_time` and `fit_end_time` are not None."
|
||||
pkwargs.update(
|
||||
{
|
||||
"fit_start_time": fit_start_time,
|
||||
"fit_end_time": fit_end_time,
|
||||
}
|
||||
)
|
||||
new_l.append({"class": klass.__name__, "kwargs": pkwargs})
|
||||
else:
|
||||
new_l.append(p)
|
||||
return new_l
|
||||
|
||||
infer_processors = check_transform_proc(infer_processors)
|
||||
learn_processors = check_transform_proc(learn_processors)
|
||||
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
|
||||
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
|
||||
|
||||
data_loader = {
|
||||
"class": "QlibDataLoader",
|
||||
|
||||
@@ -28,14 +28,10 @@ class GRU(Model):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
input_dim : int
|
||||
input dimension
|
||||
output_dim : int
|
||||
output dimension
|
||||
layers : tuple
|
||||
layer sizes
|
||||
lr : float
|
||||
learning rate
|
||||
d_feat : int
|
||||
input dimension for each time step
|
||||
metric: str
|
||||
the evaluate metric used in early stop
|
||||
optimizer : str
|
||||
optimizer name
|
||||
GPU : str
|
||||
@@ -112,10 +108,6 @@ class GRU(Model):
|
||||
)
|
||||
)
|
||||
|
||||
if loss not in {"mse", "binary"}:
|
||||
raise NotImplementedError("loss {} is not supported!".format(loss))
|
||||
self._scorer = mean_squared_error if loss == "mse" else roc_auc_score
|
||||
|
||||
self.gru_model = GRUModel(
|
||||
d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout
|
||||
)
|
||||
@@ -251,7 +243,6 @@ class GRU(Model):
|
||||
# train
|
||||
self.logger.info("training...")
|
||||
self._fitted = True
|
||||
# return
|
||||
|
||||
for step in range(self.n_epochs):
|
||||
self.logger.info("Epoch%d:", step)
|
||||
|
||||
@@ -28,14 +28,10 @@ class LSTM(Model):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
input_dim : int
|
||||
input dimension
|
||||
output_dim : int
|
||||
output dimension
|
||||
layers : tuple
|
||||
layer sizes
|
||||
lr : float
|
||||
learning rate
|
||||
d_feat : int
|
||||
input dimension for each time step
|
||||
metric: str
|
||||
the evaluate metric used in early stop
|
||||
optimizer : str
|
||||
optimizer name
|
||||
GPU : str
|
||||
@@ -112,10 +108,6 @@ class LSTM(Model):
|
||||
)
|
||||
)
|
||||
|
||||
if loss not in {"mse", "binary"}:
|
||||
raise NotImplementedError("loss {} is not supported!".format(loss))
|
||||
self._scorer = mean_squared_error if loss == "mse" else roc_auc_score
|
||||
|
||||
self.lstm_model = LSTMModel(
|
||||
d_feat=self.d_feat, hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout
|
||||
)
|
||||
@@ -251,7 +243,6 @@ class LSTM(Model):
|
||||
# train
|
||||
self.logger.info("training...")
|
||||
self._fitted = True
|
||||
# return
|
||||
|
||||
for step in range(self.n_epochs):
|
||||
self.logger.info("Epoch%d:", step)
|
||||
|
||||
@@ -166,7 +166,9 @@ class MinMaxNorm(Processor):
|
||||
return df
|
||||
|
||||
|
||||
class ZscoreNorm(Processor):
|
||||
class ZScoreNorm(Processor):
|
||||
"""ZScore Normalization"""
|
||||
|
||||
def __init__(self, fit_start_time, fit_end_time, fields_group=None):
|
||||
self.fit_start_time = fit_start_time
|
||||
self.fit_end_time = fit_end_time
|
||||
@@ -193,6 +195,42 @@ class ZscoreNorm(Processor):
|
||||
return df
|
||||
|
||||
|
||||
class RobustZScoreNorm(Processor):
|
||||
"""Robust ZScore Normalization
|
||||
|
||||
Use robust statistics for Z-Score normalization:
|
||||
mean(x) = median(x)
|
||||
std(x) = MAD(x) * 1.4826
|
||||
|
||||
Reference:
|
||||
https://en.wikipedia.org/wiki/Median_absolute_deviation.
|
||||
"""
|
||||
|
||||
def __init__(self, fit_start_time, fit_end_time, fields_group=None, clip_outlier=True):
|
||||
self.fit_start_time = fit_start_time
|
||||
self.fit_end_time = fit_end_time
|
||||
self.fields_group = fields_group
|
||||
self.clip_outlier = clip_outlier
|
||||
|
||||
def fit(self, df):
|
||||
df = fetch_df_by_index(df, slice(self.fit_start_time, self.fit_end_time), level="datetime")
|
||||
self.cols = get_group_columns(df, self.fields_group)
|
||||
X = df[self.cols].values
|
||||
self.mean_train = np.nanmedian(X, axis=0)
|
||||
self.std_train = np.nanmedian(np.abs(X - self.mean_train), axis=0)
|
||||
self.std_train += EPS
|
||||
self.std_train *= 1.4826
|
||||
|
||||
def __call__(self, df):
|
||||
X = df[self.cols]
|
||||
X -= self.mean_train
|
||||
X /= self.std_train
|
||||
df[self.cols] = X
|
||||
if self.clip_outlier:
|
||||
df.clip(-3, 3, inplace=True)
|
||||
return df
|
||||
|
||||
|
||||
class CSZScoreNorm(Processor):
|
||||
"""Cross Sectional ZScore Normalization"""
|
||||
|
||||
|
||||
@@ -27,9 +27,9 @@ def sys_config(config, config_path):
|
||||
Parameters
|
||||
----------
|
||||
config : dict
|
||||
configuration of the workflow
|
||||
configuration of the workflow.
|
||||
config_path : str
|
||||
configuration of the path
|
||||
configuration of the path.
|
||||
"""
|
||||
sys_config = config.get("sys", {})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user