mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
Adjust rolling api (#1594)
* Intermediate version * Fix yaml template & Successfully run rolling * Be compatible with benchmark * Get same results with previous linear model * Black formatting * Update black * Update the placeholder mechanism * Update CI * Update CI * Upgrade Black * Fix CI and simplify code * Fix CI * Move the data processing caching mechanism into utils. * Adjusting DDG-DA * Organize import
This commit is contained in:
@@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
model: <MODEL>
|
||||
dataset: <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -35,9 +35,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
@@ -89,4 +87,4 @@ task:
|
||||
- class: PortAnaRecord
|
||||
module_path: qlib.workflow.record_temp
|
||||
kwargs:
|
||||
config: *port_analysis_config
|
||||
config: *port_analysis_config
|
||||
|
||||
@@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
model: <MODEL>
|
||||
dataset: <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -48,7 +48,6 @@ class Avg15minHandler(DataHandlerLP):
|
||||
)
|
||||
|
||||
def loader_config(self):
|
||||
|
||||
# Results for dataset: df: pd.DataFrame
|
||||
# len(df.columns) == 6 + 6 * 16, len(df.index.get_level_values(level="datetime").unique()) == T
|
||||
# df.columns: close0, close1, ..., close16, open0, ..., open16, ..., vwap16
|
||||
|
||||
@@ -14,8 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
model: <MODEL>
|
||||
dataset: <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -14,8 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
model: <MODEL>
|
||||
dataset: <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -33,9 +33,7 @@ port_analysis_config: &port_analysis_config
|
||||
kwargs:
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
backtest:
|
||||
verbose: False
|
||||
limit_threshold: 0.095
|
||||
|
||||
@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -31,9 +31,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -27,9 +27,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -27,9 +27,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -41,9 +41,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -41,9 +41,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -29,9 +29,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -36,8 +36,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
model: <MODEL>
|
||||
dataset: <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -28,8 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
model: <MODEL>
|
||||
dataset: <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -30,9 +30,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
@@ -95,4 +93,4 @@ task:
|
||||
- class: PortAnaRecord
|
||||
module_path: qlib.workflow.record_temp
|
||||
kwargs:
|
||||
config: *port_analysis_config
|
||||
config: *port_analysis_config
|
||||
|
||||
@@ -139,7 +139,6 @@ class GenericDataFormatter(abc.ABC):
|
||||
# Sanity checks first.
|
||||
# Ensure only one ID and time column exist
|
||||
def _check_single_column(input_type):
|
||||
|
||||
length = len([tup for tup in column_definition if tup[2] == input_type])
|
||||
|
||||
if length != 1:
|
||||
|
||||
@@ -78,7 +78,6 @@ class ExperimentConfig:
|
||||
|
||||
@property
|
||||
def hyperparam_iterations(self):
|
||||
|
||||
return 240 if self.experiment == "volatility" else 60
|
||||
|
||||
def make_data_formatter(self):
|
||||
|
||||
@@ -88,7 +88,6 @@ class HyperparamOptManager:
|
||||
params_file = os.path.join(self.hyperparam_folder, "params.csv")
|
||||
|
||||
if os.path.exists(results_file) and os.path.exists(params_file):
|
||||
|
||||
self.results = pd.read_csv(results_file, index_col=0)
|
||||
self.saved_params = pd.read_csv(params_file, index_col=0)
|
||||
|
||||
@@ -178,7 +177,6 @@ class HyperparamOptManager:
|
||||
return parameters
|
||||
|
||||
for _ in range(self._max_tries):
|
||||
|
||||
parameters = _get_next()
|
||||
name = self._get_name(parameters)
|
||||
|
||||
|
||||
@@ -475,7 +475,6 @@ class TemporalFusionTransformer:
|
||||
|
||||
embeddings = []
|
||||
for i in range(num_categorical_variables):
|
||||
|
||||
embedding = tf.keras.Sequential(
|
||||
[
|
||||
tf.keras.layers.InputLayer([time_steps]),
|
||||
@@ -680,7 +679,6 @@ class TemporalFusionTransformer:
|
||||
|
||||
data_map = {}
|
||||
for _, sliced in data.groupby(id_col):
|
||||
|
||||
col_mappings = {"identifier": [id_col], "time": [time_col], "outputs": [target_col], "inputs": input_cols}
|
||||
|
||||
for k in col_mappings:
|
||||
@@ -954,7 +952,6 @@ class TemporalFusionTransformer:
|
||||
"""
|
||||
|
||||
with tf.variable_scope(self.name):
|
||||
|
||||
transformer_layer, all_inputs, attention_components = self._build_base_graph()
|
||||
|
||||
outputs = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(self.output_size * len(self.quantiles)))(
|
||||
|
||||
@@ -16,9 +16,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -6,7 +6,6 @@ from qlib.utils import init_instance_by_config
|
||||
|
||||
|
||||
def main(seed, config_file="configs/config_alstm.yaml"):
|
||||
|
||||
# set random seed
|
||||
with open(config_file) as f:
|
||||
config = yaml.safe_load(f)
|
||||
@@ -30,7 +29,6 @@ def main(seed, config_file="configs/config_alstm.yaml"):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# set params from cmd
|
||||
parser = argparse.ArgumentParser(allow_abbrev=False)
|
||||
parser.add_argument("--seed", type=int, default=1000, help="random seed")
|
||||
|
||||
@@ -96,7 +96,6 @@ class MTSDatasetH(DatasetH):
|
||||
drop_last=False,
|
||||
**kwargs,
|
||||
):
|
||||
|
||||
assert horizon > 0, "please specify `horizon` to avoid data leakage"
|
||||
|
||||
self.seq_len = seq_len
|
||||
@@ -111,7 +110,6 @@ class MTSDatasetH(DatasetH):
|
||||
super().__init__(handler, segments, **kwargs)
|
||||
|
||||
def setup_data(self, handler_kwargs: dict = None, **kwargs):
|
||||
|
||||
super().setup_data()
|
||||
|
||||
# change index to <code, date>
|
||||
|
||||
@@ -45,7 +45,6 @@ class TRAModel(Model):
|
||||
avg_params=True,
|
||||
**kwargs,
|
||||
):
|
||||
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
|
||||
@@ -93,7 +92,6 @@ class TRAModel(Model):
|
||||
self.global_step = -1
|
||||
|
||||
def train_epoch(self, data_set):
|
||||
|
||||
self.model.train()
|
||||
self.tra.train()
|
||||
|
||||
@@ -146,7 +144,6 @@ class TRAModel(Model):
|
||||
return total_loss
|
||||
|
||||
def test_epoch(self, data_set, return_pred=False):
|
||||
|
||||
self.model.eval()
|
||||
self.tra.eval()
|
||||
data_set.eval()
|
||||
@@ -204,7 +201,6 @@ class TRAModel(Model):
|
||||
return metrics, preds
|
||||
|
||||
def fit(self, dataset, evals_result=dict()):
|
||||
|
||||
train_set, valid_set, test_set = dataset.prepare(["train", "valid", "test"])
|
||||
|
||||
best_score = -1
|
||||
@@ -380,7 +376,6 @@ class LSTM(nn.Module):
|
||||
self.output_size = hidden_size
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
x = self.input_drop(x)
|
||||
|
||||
if self.training and self.noise_level > 0:
|
||||
@@ -464,7 +459,6 @@ class Transformer(nn.Module):
|
||||
self.output_size = hidden_size
|
||||
|
||||
def forward(self, x):
|
||||
|
||||
x = self.input_drop(x)
|
||||
|
||||
if self.training and self.noise_level > 0:
|
||||
@@ -514,7 +508,6 @@ class TRA(nn.Module):
|
||||
self.predictors = nn.Linear(input_size, num_states)
|
||||
|
||||
def forward(self, hidden, hist_loss):
|
||||
|
||||
preds = self.predictors(hidden)
|
||||
|
||||
if self.num_states == 1:
|
||||
|
||||
@@ -57,9 +57,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -51,9 +51,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -51,9 +51,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -36,9 +36,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -28,9 +28,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -14,9 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -21,9 +21,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -16,12 +16,12 @@ Though the dataset is different, the conclusion remains the same. By applying `D
|
||||
# Run the Code
|
||||
Users can try `DDG-DA` by running the following command:
|
||||
```bash
|
||||
python workflow.py run_all
|
||||
python workflow.py run
|
||||
```
|
||||
|
||||
The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `forecast_model` parameter when `DDG-DA` initializes. For example, users can try `LightGBM` forecasting models by running the following command:
|
||||
```bash
|
||||
python workflow.py --forecast_model="gbdt" run_all
|
||||
python workflow.py --conf_path=../workflow_config_lightgbm_Alpha158.yaml run
|
||||
```
|
||||
|
||||
# Results
|
||||
|
||||
@@ -1,305 +1,40 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
from pathlib import Path
|
||||
from qlib.model.meta.task import MetaTask
|
||||
from qlib.contrib.meta.data_selection.model import MetaModelDS
|
||||
from qlib.contrib.meta.data_selection.dataset import InternalData, MetaDatasetDS
|
||||
from qlib.data.dataset.handler import DataHandlerLP
|
||||
from typing import Union
|
||||
|
||||
import pandas as pd
|
||||
import fire
|
||||
import sys
|
||||
import pickle
|
||||
from typing import Optional
|
||||
|
||||
from qlib import auto_init
|
||||
from qlib.model.trainer import TrainerR
|
||||
from qlib.typehint import Literal
|
||||
from qlib.utils import init_instance_by_config
|
||||
from qlib.workflow import R
|
||||
from qlib.contrib.rolling.ddgda import DDGDA
|
||||
from qlib.tests.data import GetData
|
||||
|
||||
DIRNAME = Path(__file__).absolute().resolve().parent
|
||||
sys.path.append(str(DIRNAME.parent / "baseline"))
|
||||
from rolling_benchmark import RollingBenchmark # NOTE: sys.path is changed for import RollingBenchmark
|
||||
BENCH_DIR = DIRNAME.parent / "baseline"
|
||||
|
||||
|
||||
class DDGDA:
|
||||
"""
|
||||
please run `python workflow.py run_all` to run the full workflow of the experiment
|
||||
class DDGDABench(DDGDA):
|
||||
# The config in the README.md
|
||||
CONF_LIST = [
|
||||
BENCH_DIR / "workflow_config_linear_Alpha158.yaml",
|
||||
BENCH_DIR / "workflow_config_lightgbm_Alpha158.yaml",
|
||||
]
|
||||
|
||||
**NOTE**
|
||||
before running the example, please clean your previous results with following command
|
||||
- `rm -r mlruns`
|
||||
"""
|
||||
DEFAULT_CONF = CONF_LIST[0] # Linear by default due to efficiency
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
sim_task_model: Literal["linear", "gbdt"] = "gbdt",
|
||||
forecast_model: Literal["linear", "gbdt"] = "linear",
|
||||
h_path: Optional[str] = None,
|
||||
test_end: Optional[str] = None,
|
||||
train_start: Optional[str] = None,
|
||||
meta_1st_train_end: Optional[str] = None,
|
||||
task_ext_conf: Optional[dict] = None,
|
||||
alpha: float = 0.01,
|
||||
proxy_hd: str = "handler_proxy.pkl",
|
||||
):
|
||||
"""
|
||||
def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None:
|
||||
# This code is for being compatible with the previous old code
|
||||
conf_path = Path(conf_path)
|
||||
super().__init__(conf_path=conf_path, horizon=horizon, working_dir=DIRNAME, **kwargs)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
train_start: Optional[str]
|
||||
the start datetime for data. It is used in training start time (for both tasks & meta learing)
|
||||
test_end: Optional[str]
|
||||
the end datetime for data. It is used in test end time
|
||||
meta_1st_train_end: Optional[str]
|
||||
the datetime of training end of the first meta_task
|
||||
alpha: float
|
||||
Setting the L2 regularization for ridge
|
||||
The `alpha` is only passed to MetaModelDS (it is not passed to sim_task_model currently..)
|
||||
"""
|
||||
self.step = 20
|
||||
# NOTE:
|
||||
# the horizon must match the meaning in the base task template
|
||||
self.horizon = 20
|
||||
self.meta_exp_name = "DDG-DA"
|
||||
self.sim_task_model = sim_task_model # The model to capture the distribution of data.
|
||||
self.forecast_model = forecast_model # downstream forecasting models' type
|
||||
self.rb_kwargs = {
|
||||
"h_path": h_path,
|
||||
"test_end": test_end,
|
||||
"train_start": train_start,
|
||||
"task_ext_conf": task_ext_conf,
|
||||
}
|
||||
self.alpha = alpha
|
||||
self.meta_1st_train_end = meta_1st_train_end
|
||||
self.proxy_hd = proxy_hd
|
||||
|
||||
def get_feature_importance(self):
|
||||
# this must be lightGBM, because it needs to get the feature importance
|
||||
rb = RollingBenchmark(model_type="gbdt", **self.rb_kwargs)
|
||||
task = rb.basic_task()
|
||||
|
||||
with R.start(experiment_name="feature_importance"):
|
||||
model = init_instance_by_config(task["model"])
|
||||
dataset = init_instance_by_config(task["dataset"])
|
||||
model.fit(dataset)
|
||||
|
||||
fi = model.get_feature_importance()
|
||||
|
||||
# Because the model use numpy instead of dataframe for training lightgbm
|
||||
# So the we must use following extra steps to get the right feature importance
|
||||
df = dataset.prepare(segments=slice(None), col_set="feature", data_key=DataHandlerLP.DK_R)
|
||||
cols = df.columns
|
||||
fi_named = {cols[int(k.split("_")[1])]: imp for k, imp in fi.to_dict().items()}
|
||||
|
||||
return pd.Series(fi_named)
|
||||
|
||||
def dump_data_for_proxy_model(self):
|
||||
"""
|
||||
Dump data for training meta model.
|
||||
The meta model will be trained upon the proxy forecasting model.
|
||||
This dataset is for the proxy forecasting model.
|
||||
"""
|
||||
topk = 30
|
||||
fi = self.get_feature_importance()
|
||||
col_selected = fi.nlargest(topk)
|
||||
|
||||
rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
|
||||
task = rb.basic_task()
|
||||
dataset = init_instance_by_config(task["dataset"])
|
||||
prep_ds = dataset.prepare(slice(None), col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
|
||||
|
||||
feature_df = prep_ds["feature"]
|
||||
label_df = prep_ds["label"]
|
||||
|
||||
feature_selected = feature_df.loc[:, col_selected.index]
|
||||
|
||||
feature_selected = feature_selected.groupby("datetime", group_keys=False).apply(
|
||||
lambda df: (df - df.mean()).div(df.std())
|
||||
)
|
||||
feature_selected = feature_selected.fillna(0.0)
|
||||
|
||||
df_all = {
|
||||
"label": label_df.reindex(feature_selected.index),
|
||||
"feature": feature_selected,
|
||||
}
|
||||
df_all = pd.concat(df_all, axis=1)
|
||||
df_all.to_pickle(DIRNAME / "fea_label_df.pkl")
|
||||
|
||||
# dump data in handler format for aligning the interface
|
||||
handler = DataHandlerLP(
|
||||
data_loader={
|
||||
"class": "qlib.data.dataset.loader.StaticDataLoader",
|
||||
"kwargs": {"config": DIRNAME / "fea_label_df.pkl"},
|
||||
}
|
||||
)
|
||||
handler.to_pickle(DIRNAME / self.proxy_hd, dump_all=True)
|
||||
|
||||
@property
|
||||
def _internal_data_path(self):
|
||||
return DIRNAME / f"internal_data_s{self.step}.pkl"
|
||||
|
||||
def dump_meta_ipt(self):
|
||||
"""
|
||||
Dump data for training meta model.
|
||||
This function will dump the input data for meta model
|
||||
"""
|
||||
# According to the experiments, the choice of the model type is very important for achieving good results
|
||||
rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
|
||||
sim_task = rb.basic_task()
|
||||
|
||||
if self.sim_task_model == "gbdt":
|
||||
sim_task["model"].setdefault("kwargs", {}).update({"early_stopping_rounds": None, "num_boost_round": 150})
|
||||
|
||||
exp_name_sim = f"data_sim_s{self.step}"
|
||||
|
||||
internal_data = InternalData(sim_task, self.step, exp_name=exp_name_sim)
|
||||
internal_data.setup(trainer=TrainerR)
|
||||
|
||||
with self._internal_data_path.open("wb") as f:
|
||||
pickle.dump(internal_data, f)
|
||||
|
||||
def train_meta_model(self, fill_method="max"):
|
||||
"""
|
||||
training a meta model based on a simplified linear proxy model;
|
||||
"""
|
||||
|
||||
# 1) leverage the simplified proxy forecasting model to train meta model.
|
||||
# - Only the dataset part is important, in current version of meta model will integrate the
|
||||
rb = RollingBenchmark(model_type=self.sim_task_model, **self.rb_kwargs)
|
||||
sim_task = rb.basic_task()
|
||||
# the train_start for training meta model does not necessarily align with final rolling
|
||||
train_start = "2008-01-01" if self.rb_kwargs.get("train_start") is None else self.rb_kwargs.get("train_start")
|
||||
train_end = "2010-12-31" if self.meta_1st_train_end is None else self.meta_1st_train_end
|
||||
test_start = (pd.Timestamp(train_end) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
proxy_forecast_model_task = {
|
||||
# "model": "qlib.contrib.model.linear.LinearModel",
|
||||
"dataset": {
|
||||
"class": "qlib.data.dataset.DatasetH",
|
||||
"kwargs": {
|
||||
"handler": f"file://{(DIRNAME / self.proxy_hd).absolute()}",
|
||||
"segments": {
|
||||
"train": (train_start, train_end),
|
||||
"test": (test_start, sim_task["dataset"]["kwargs"]["segments"]["test"][1]),
|
||||
},
|
||||
},
|
||||
},
|
||||
# "record": ["qlib.workflow.record_temp.SignalRecord"]
|
||||
}
|
||||
# the proxy_forecast_model_task will be used to create meta tasks.
|
||||
# The test date of first task will be 2011-01-01. Each test segment will be about 20days
|
||||
# The tasks include all training tasks and test tasks.
|
||||
|
||||
# 2) preparing meta dataset
|
||||
kwargs = dict(
|
||||
task_tpl=proxy_forecast_model_task,
|
||||
step=self.step,
|
||||
segments=0.62, # keep test period consistent with the dataset yaml
|
||||
trunc_days=1 + self.horizon,
|
||||
hist_step_n=30,
|
||||
fill_method=fill_method,
|
||||
rolling_ext_days=0,
|
||||
)
|
||||
# NOTE:
|
||||
# the input of meta model (internal data) are shared between proxy model and final forecasting model
|
||||
# but their task test segment are not aligned! It worked in my previous experiment.
|
||||
# So the misalignment will not affect the effectiveness of the method.
|
||||
with self._internal_data_path.open("rb") as f:
|
||||
internal_data = pickle.load(f)
|
||||
|
||||
md = MetaDatasetDS(exp_name=internal_data, **kwargs)
|
||||
|
||||
# 3) train and logging meta model
|
||||
with R.start(experiment_name=self.meta_exp_name):
|
||||
R.log_params(**kwargs)
|
||||
mm = MetaModelDS(
|
||||
step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=30, seed=43, alpha=self.alpha
|
||||
)
|
||||
mm.fit(md)
|
||||
R.save_objects(model=mm)
|
||||
|
||||
@property
|
||||
def _task_path(self):
|
||||
return DIRNAME / f"tasks_s{self.step}.pkl"
|
||||
|
||||
def meta_inference(self):
|
||||
"""
|
||||
Leverage meta-model for inference:
|
||||
- Given
|
||||
- baseline tasks
|
||||
- input for meta model(internal data)
|
||||
- meta model (its learnt knowledge on proxy forecasting model is expected to transfer to normal forecasting model)
|
||||
"""
|
||||
# 1) get meta model
|
||||
exp = R.get_exp(experiment_name=self.meta_exp_name)
|
||||
rec = exp.list_recorders(rtype=exp.RT_L)[0]
|
||||
meta_model: MetaModelDS = rec.load_object("model")
|
||||
|
||||
# 2)
|
||||
# we are transfer to knowledge of meta model to final forecasting tasks.
|
||||
# Create MetaTaskDataset for the final forecasting tasks
|
||||
# Aligning the setting of it to the MetaTaskDataset when training Meta model is necessary
|
||||
|
||||
# 2.1) get previous config
|
||||
param = rec.list_params()
|
||||
trunc_days = int(param["trunc_days"])
|
||||
step = int(param["step"])
|
||||
hist_step_n = int(param["hist_step_n"])
|
||||
fill_method = param.get("fill_method", "max")
|
||||
|
||||
rb = RollingBenchmark(model_type=self.forecast_model, **self.rb_kwargs)
|
||||
task_l = rb.create_rolling_tasks()
|
||||
|
||||
# 2.2) create meta dataset for final dataset
|
||||
kwargs = dict(
|
||||
task_tpl=task_l,
|
||||
step=step,
|
||||
segments=0.0, # all the tasks are for testing
|
||||
trunc_days=trunc_days,
|
||||
hist_step_n=hist_step_n,
|
||||
fill_method=fill_method,
|
||||
task_mode=MetaTask.PROC_MODE_TRANSFER,
|
||||
)
|
||||
|
||||
with self._internal_data_path.open("rb") as f:
|
||||
internal_data = pickle.load(f)
|
||||
mds = MetaDatasetDS(exp_name=internal_data, **kwargs)
|
||||
|
||||
# 3) meta model make inference and get new qlib task
|
||||
new_tasks = meta_model.inference(mds)
|
||||
with self._task_path.open("wb") as f:
|
||||
pickle.dump(new_tasks, f)
|
||||
|
||||
def train_and_eval_tasks(self):
|
||||
"""
|
||||
Training the tasks generated by meta model
|
||||
Then evaluate it
|
||||
"""
|
||||
with self._task_path.open("rb") as f:
|
||||
tasks = pickle.load(f)
|
||||
rb = RollingBenchmark(rolling_exp="rolling_ds", model_type=self.forecast_model, **self.rb_kwargs)
|
||||
rb.train_rolling_tasks(tasks)
|
||||
rb.ens_rolling()
|
||||
rb.update_rolling_rec()
|
||||
|
||||
def run_all(self):
|
||||
# 1) file: handler_proxy.pkl (self.proxy_hd)
|
||||
self.dump_data_for_proxy_model()
|
||||
# 2)
|
||||
# file: internal_data_s20.pkl
|
||||
# mlflow: data_sim_s20, models for calculating meta_ipt
|
||||
self.dump_meta_ipt()
|
||||
# 3) meta model will be stored in `DDG-DA`
|
||||
self.train_meta_model()
|
||||
# 4) new_tasks are saved in "tasks_s20.pkl" (reweighter is added)
|
||||
self.meta_inference()
|
||||
# 5) load the saved tasks and train model
|
||||
self.train_and_eval_tasks()
|
||||
for f in self.CONF_LIST:
|
||||
if conf_path.samefile(f):
|
||||
break
|
||||
else:
|
||||
self.logger.warning("Model type is not in the benchmark!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
GetData().qlib_data(exists_skip=True)
|
||||
auto_init()
|
||||
fire.Fire(DDGDA)
|
||||
fire.Fire(DDGDABench)
|
||||
|
||||
@@ -5,11 +5,12 @@ This is the framework of periodically Rolling Retrain (RR) forecasting models. R
|
||||
## Run the Code
|
||||
Users can try RR by running the following command:
|
||||
```bash
|
||||
python rolling_benchmark.py run_all
|
||||
python rolling_benchmark.py run
|
||||
```
|
||||
|
||||
The default forecasting models are `Linear`. Users can choose other forecasting models by changing the `model_type` parameter.
|
||||
For example, users can try `LightGBM` forecasting models by running the following command:
|
||||
```bash
|
||||
python rolling_benchmark.py --model_type="gbdt" run_all
|
||||
```
|
||||
python rolling_benchmark.py --conf_path=workflow_config_lightgbm_Alpha158.yaml run
|
||||
|
||||
```
|
||||
|
||||
@@ -1,161 +1,33 @@
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
from typing import Optional
|
||||
from qlib.model.ens.ensemble import RollingEnsemble
|
||||
from qlib.utils import init_instance_by_config
|
||||
import fire
|
||||
import yaml
|
||||
import pandas as pd
|
||||
from qlib import auto_init
|
||||
from pathlib import Path
|
||||
from tqdm.auto import tqdm
|
||||
from qlib.model.trainer import TrainerR
|
||||
from qlib.log import get_module_logger
|
||||
from qlib.utils.data import update_config
|
||||
from qlib.workflow import R
|
||||
from typing import Union
|
||||
|
||||
import fire
|
||||
|
||||
from qlib import auto_init
|
||||
from qlib.contrib.rolling.base import Rolling
|
||||
from qlib.tests.data import GetData
|
||||
|
||||
DIRNAME = Path(__file__).absolute().resolve().parent
|
||||
from qlib.workflow.task.gen import task_generator, RollingGen
|
||||
from qlib.workflow.task.collect import RecorderCollector
|
||||
from qlib.workflow.record_temp import PortAnaRecord, SigAnaRecord
|
||||
|
||||
|
||||
class RollingBenchmark:
|
||||
"""
|
||||
**NOTE**
|
||||
before running the example, please clean your previous results with following command
|
||||
- `rm -r mlruns`
|
||||
class RollingBenchmark(Rolling):
|
||||
# The config in the README.md
|
||||
CONF_LIST = [DIRNAME / "workflow_config_linear_Alpha158.yaml", DIRNAME / "workflow_config_lightgbm_Alpha158.yaml"]
|
||||
|
||||
"""
|
||||
DEFAULT_CONF = CONF_LIST[0]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
rolling_exp: str = "rolling_models",
|
||||
model_type: str = "linear",
|
||||
h_path: Optional[str] = None,
|
||||
train_start: Optional[str] = None,
|
||||
test_end: Optional[str] = None,
|
||||
task_ext_conf: Optional[dict] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
rolling_exp : str
|
||||
The name for the experiments for rolling
|
||||
model_type : str
|
||||
The model to be boosted.
|
||||
h_path : Optional[str]
|
||||
the dumped data handler;
|
||||
test_end : Optional[str]
|
||||
the test end for the data. It is typically used together with the handler
|
||||
train_start : Optional[str]
|
||||
the train start for the data. It is typically used together with the handler.
|
||||
task_ext_conf : Optional[dict]
|
||||
some option to update the
|
||||
"""
|
||||
self.step = 20
|
||||
self.horizon = 20
|
||||
self.rolling_exp = rolling_exp
|
||||
self.model_type = model_type
|
||||
self.h_path = h_path
|
||||
self.train_start = train_start
|
||||
self.test_end = test_end
|
||||
self.logger = get_module_logger("RollingBenchmark")
|
||||
self.task_ext_conf = task_ext_conf
|
||||
def __init__(self, conf_path: Union[str, Path] = DEFAULT_CONF, horizon=20, **kwargs) -> None:
|
||||
# This code is for being compatible with the previous old code
|
||||
conf_path = Path(conf_path)
|
||||
super().__init__(conf_path=conf_path, horizon=horizon, **kwargs)
|
||||
|
||||
def basic_task(self):
|
||||
"""For fast training rolling"""
|
||||
if self.model_type == "gbdt":
|
||||
conf_path = DIRNAME / "workflow_config_lightgbm_Alpha158.yaml"
|
||||
# dump the processed data on to disk for later loading to speed up the processing
|
||||
h_path = DIRNAME / "lightgbm_alpha158_handler_horizon{}.pkl".format(self.horizon)
|
||||
elif self.model_type == "linear":
|
||||
# We use ridge regression to stabilize the performance
|
||||
conf_path = DIRNAME / "workflow_config_linear_Alpha158.yaml"
|
||||
h_path = DIRNAME / "linear_alpha158_handler_horizon{}.pkl".format(self.horizon)
|
||||
for f in self.CONF_LIST:
|
||||
if conf_path.samefile(f):
|
||||
break
|
||||
else:
|
||||
raise AssertionError("Model type is not supported!")
|
||||
|
||||
if self.h_path is not None:
|
||||
h_path = Path(self.h_path)
|
||||
|
||||
with conf_path.open("r") as f:
|
||||
conf = yaml.safe_load(f)
|
||||
|
||||
# modify dataset horizon
|
||||
conf["task"]["dataset"]["kwargs"]["handler"]["kwargs"]["label"] = [
|
||||
"Ref($close, -{}) / Ref($close, -1) - 1".format(self.horizon + 1)
|
||||
]
|
||||
|
||||
task = conf["task"]
|
||||
|
||||
if self.task_ext_conf is not None:
|
||||
task = update_config(task, self.task_ext_conf)
|
||||
|
||||
if not h_path.exists():
|
||||
h_conf = task["dataset"]["kwargs"]["handler"]
|
||||
h = init_instance_by_config(h_conf)
|
||||
h.to_pickle(h_path, dump_all=True)
|
||||
|
||||
task["dataset"]["kwargs"]["handler"] = f"file://{h_path}"
|
||||
task["record"] = ["qlib.workflow.record_temp.SignalRecord"]
|
||||
|
||||
if self.train_start is not None:
|
||||
seg = task["dataset"]["kwargs"]["segments"]["train"]
|
||||
task["dataset"]["kwargs"]["segments"]["train"] = pd.Timestamp(self.train_start), seg[1]
|
||||
|
||||
if self.test_end is not None:
|
||||
seg = task["dataset"]["kwargs"]["segments"]["test"]
|
||||
task["dataset"]["kwargs"]["segments"]["test"] = seg[0], pd.Timestamp(self.test_end)
|
||||
self.logger.info(task)
|
||||
return task
|
||||
|
||||
def create_rolling_tasks(self):
|
||||
task = self.basic_task()
|
||||
task_l = task_generator(
|
||||
task, RollingGen(step=self.step, trunc_days=self.horizon + 1)
|
||||
) # the last two days should be truncated to avoid information leakage
|
||||
return task_l
|
||||
|
||||
def train_rolling_tasks(self, task_l=None):
|
||||
if task_l is None:
|
||||
task_l = self.create_rolling_tasks()
|
||||
trainer = TrainerR(experiment_name=self.rolling_exp)
|
||||
trainer(task_l)
|
||||
|
||||
COMB_EXP = "rolling"
|
||||
|
||||
def ens_rolling(self):
|
||||
rc = RecorderCollector(
|
||||
experiment=self.rolling_exp,
|
||||
artifacts_key=["pred", "label"],
|
||||
process_list=[RollingEnsemble()],
|
||||
# rec_key_func=lambda rec: (self.COMB_EXP, rec.info["id"]),
|
||||
artifacts_path={"pred": "pred.pkl", "label": "label.pkl"},
|
||||
)
|
||||
res = rc()
|
||||
with R.start(experiment_name=self.COMB_EXP):
|
||||
R.log_params(exp_name=self.rolling_exp)
|
||||
R.save_objects(**{"pred.pkl": res["pred"], "label.pkl": res["label"]})
|
||||
|
||||
def update_rolling_rec(self):
|
||||
"""
|
||||
Evaluate the combined rolling results
|
||||
"""
|
||||
for _, rec in R.list_recorders(experiment_name=self.COMB_EXP).items():
|
||||
for rt_cls in SigAnaRecord, PortAnaRecord:
|
||||
rt = rt_cls(recorder=rec, skip_existing=True)
|
||||
rt.generate()
|
||||
print(f"Your evaluation results can be found in the experiment named `{self.COMB_EXP}`.")
|
||||
|
||||
def run_all(self):
|
||||
# the results will be save in mlruns.
|
||||
# 1) each rolling task is saved in rolling_models
|
||||
self.train_rolling_tasks()
|
||||
# 2) combined rolling tasks and evaluation results are saved in rolling
|
||||
self.ens_rolling()
|
||||
self.update_rolling_rec()
|
||||
self.logger.warning("Model type is not in the benchmark!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -14,8 +14,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
model: <MODEL>
|
||||
dataset: <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -27,9 +27,7 @@ port_analysis_config: &port_analysis_config
|
||||
class: TopkDropoutStrategy
|
||||
module_path: qlib.contrib.strategy
|
||||
kwargs:
|
||||
signal:
|
||||
- <MODEL>
|
||||
- <DATASET>
|
||||
signal: <PRED>
|
||||
topk: 50
|
||||
n_drop: 5
|
||||
backtest:
|
||||
|
||||
@@ -14,7 +14,6 @@ class HighFreqHandler(DataHandlerLP):
|
||||
fit_end_time=None,
|
||||
drop_raw=True,
|
||||
):
|
||||
|
||||
infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
|
||||
learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@ from highfreq_ops import get_calendar_day, DayLast, FFillNan, BFillNan, Date, Se
|
||||
|
||||
|
||||
class HighfreqWorkflow:
|
||||
|
||||
SPEC_CONF = {"custom_ops": [DayLast, FFillNan, BFillNan, Date, Select, IsNull, Cut], "expression_cache": None}
|
||||
|
||||
MARKET = "all"
|
||||
|
||||
@@ -35,7 +35,6 @@ def objective(trial):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
provider_uri = "~/.qlib/qlib_data/cn_data"
|
||||
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
|
||||
qlib.init(provider_uri=provider_uri, region="cn")
|
||||
|
||||
@@ -38,7 +38,6 @@ def objective(trial):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
provider_uri = "~/.qlib/qlib_data/cn_data"
|
||||
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
|
||||
qlib.init(provider_uri=provider_uri, region=REG_CN)
|
||||
|
||||
@@ -11,7 +11,6 @@ from qlib.tests.config import CSI300_GBDT_TASK
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# use default data
|
||||
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
|
||||
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
|
||||
|
||||
@@ -9,7 +9,6 @@ from qlib.model.riskmodel import StructuredCovEstimator
|
||||
|
||||
|
||||
def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
|
||||
|
||||
universe = D.features(D.instruments("csi300"), ["$close"], start_time=start_time).swaplevel().sort_index()
|
||||
|
||||
price_all = (
|
||||
@@ -20,7 +19,6 @@ def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
|
||||
riskmodel = StructuredCovEstimator()
|
||||
|
||||
for i in range(T - 1, len(price_all)):
|
||||
|
||||
date = price_all.index[i]
|
||||
ref_date = price_all.index[i - T + 1]
|
||||
|
||||
@@ -47,7 +45,6 @@ def prepare_data(riskdata_root="./riskdata", T=240, start_time="2016-01-01"):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
import qlib
|
||||
|
||||
qlib.init(provider_uri="~/.qlib/qlib_data/cn_data")
|
||||
|
||||
@@ -13,7 +13,6 @@ from qlib.tests.data import GetData
|
||||
|
||||
|
||||
class RollingDataWorkflow:
|
||||
|
||||
MARKET = "csi300"
|
||||
start_time = "2010-01-01"
|
||||
end_time = "2019-12-31"
|
||||
@@ -93,7 +92,6 @@ class RollingDataWorkflow:
|
||||
dataset = init_instance_by_config(dataset_config)
|
||||
|
||||
for rolling_offset in range(self.rolling_cnt):
|
||||
|
||||
print(f"===========rolling{rolling_offset} start===========")
|
||||
if rolling_offset:
|
||||
dataset.config(
|
||||
|
||||
@@ -17,7 +17,6 @@ from qlib.tests.config import CSI300_BENCH, CSI300_GBDT_TASK
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# use default data
|
||||
provider_uri = "~/.qlib/qlib_data/cn_data" # target_dir
|
||||
GetData().qlib_data(target_dir=provider_uri, region=REG_CN, exists_skip=True)
|
||||
|
||||
Reference in New Issue
Block a user