1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-30 09:31:18 +08:00

Compare commits

..

3 Commits

Author SHA1 Message Date
Young
36950b905d Update Qlib Version 2022-06-15 14:48:54 +08:00
you-n-g
58540f76ee Csi500 example (#1126)
* Stage code

* Update results and scripts
2022-06-15 10:18:13 +08:00
YaOzI
3e6e2865ce Fixed a few mixed Chinese punctuation typos (#1123) 2022-06-14 20:12:14 +08:00
14 changed files with 219 additions and 15 deletions

View File

@@ -8,7 +8,7 @@ on:
jobs:
build:
timeout-minutes: 120
runs-on: ${{ matrix.os }}
strategy:
matrix:

View File

@@ -9,7 +9,7 @@ on:
jobs:
build:
timeout-minutes: 120
runs-on: ${{ matrix.os }}
strategy:
matrix:

View File

@@ -66,7 +66,7 @@ TopkDropoutStrategy
- Adopt the ``Topk-Drop`` algorithm to calculate the target amount of each stock
.. note::
There are two parameters for the ``Topk-Drop`` algorithm
There are two parameters for the ``Topk-Drop`` algorithm:
- `Topk`: The number of stocks held
- `Drop`: The number of stocks sold on each trading day

View File

@@ -1,3 +1,3 @@
pandas==1.1.2
numpy==1.21.0
lightgbm==3.1.0
lightgbm

View File

@@ -0,0 +1,72 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi500
benchmark: &benchmark SH000905
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
model: <MODEL>
dataset: <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LGBModel
module_path: qlib.contrib.model.gbdt
kwargs:
loss: mse
colsample_bytree: 0.8879
learning_rate: 0.2
subsample: 0.8789
lambda_l1: 205.6999
lambda_l2: 580.9768
max_depth: 8
num_leaves: 210
num_threads: 20
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha158
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -0,0 +1,80 @@
qlib_init:
provider_uri: "~/.qlib/qlib_data/cn_data"
region: cn
market: &market csi500
benchmark: &benchmark SH000905
data_handler_config: &data_handler_config
start_time: 2008-01-01
end_time: 2020-08-01
fit_start_time: 2008-01-01
fit_end_time: 2014-12-31
instruments: *market
infer_processors: []
learn_processors:
- class: DropnaLabel
- class: CSRankNorm
kwargs:
fields_group: label
label: ["Ref($close, -2) / Ref($close, -1) - 1"]
port_analysis_config: &port_analysis_config
strategy:
class: TopkDropoutStrategy
module_path: qlib.contrib.strategy
kwargs:
signal:
- <MODEL>
- <DATASET>
topk: 50
n_drop: 5
backtest:
start_time: 2017-01-01
end_time: 2020-08-01
account: 100000000
benchmark: *benchmark
exchange_kwargs:
limit_threshold: 0.095
deal_price: close
open_cost: 0.0005
close_cost: 0.0015
min_cost: 5
task:
model:
class: LGBModel
module_path: qlib.contrib.model.gbdt
kwargs:
loss: mse
colsample_bytree: 0.8879
learning_rate: 0.0421
subsample: 0.8789
lambda_l1: 205.6999
lambda_l2: 580.9768
max_depth: 8
num_leaves: 210
num_threads: 20
dataset:
class: DatasetH
module_path: qlib.data.dataset
kwargs:
handler:
class: Alpha360
module_path: qlib.contrib.data.handler
kwargs: *data_handler_config
segments:
train: [2008-01-01, 2014-12-31]
valid: [2015-01-01, 2016-12-31]
test: [2017-01-01, 2020-08-01]
record:
- class: SignalRecord
module_path: qlib.workflow.record_temp
kwargs:
model: <MODEL>
dataset: <DATASET>
- class: SigAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
ana_long_short: False
ann_scaler: 252
- class: PortAnaRecord
module_path: qlib.workflow.record_temp
kwargs:
config: *port_analysis_config

View File

@@ -20,7 +20,9 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
> NOTE:
> We have very limited resources to implement and finetune the models. We tried our best effort to fairly compare these models. But some models may have greater potential than what it looks like in the table below. Your contribution is highly welcomed to explore their potential.
## Alpha158 dataset
## Results on CSI300
### Alpha158 dataset
| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
|------------------------------------------|-------------------------------------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------|
@@ -44,7 +46,7 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
| DoubleEnsemble(Chuheng Zhang, et al.) | Alpha158 | 0.0544±0.00 | 0.4340±0.00 | 0.0523±0.00 | 0.4284±0.01 | 0.1168±0.01 | 1.3384±0.12 | -0.1036±0.01 |
## Alpha360 dataset
### Alpha360 dataset
| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
|-------------------------------------------|----------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------|
@@ -79,6 +81,38 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
- Signal-based evaluation: IC, ICIR, Rank IC, Rank ICIR
- Portfolio-based metrics: Annualized Return, Information Ratio, Max Drawdown
## Results on CSI500
The results on CSI500 is not complete. PR's for models on csi500 are welcome!
Transfer previous models in CSI300 to CSI500 is quite easy. You can try models with just a few commands below.
```
cd examples/benchmarks/LightGBM
pip install -r requirements.txt
# create new config and set the benchmark to csi500
cp workflow_config_lightgbm_Alpha158.yaml workflow_config_lightgbm_Alpha158_csi500.yaml
sed -i "s/csi300/csi500/g" workflow_config_lightgbm_Alpha158_csi500.yaml
sed -i "s/SH000300/SH000905/g" workflow_config_lightgbm_Alpha158_csi500.yaml
# you can either run the model once
qrun workflow_config_lightgbm_Alpha158_csi500.yaml
# or run it for multiple times automatically and get the summarized results.
cd ../../
python run_all_model.py run 3 lightgbm Alpha158 csi500 # for models with randomness. please run it for 20 times.
```
### Alpha158 dataset
| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
|------------|----------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------|
| LightGBM | Alpha158 | 0.0377±0.00 | 0.3860±0.00 | 0.0448±0.00 | 0.4675±0.00 | 0.1151±0.00 | 1.3884±0.00 | -0.0898±0.00 |
### Alpha360 dataset
| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
|------------|----------|-------------|-------------|-------------|-------------|-------------------|-------------------|--------------|
| LightGBM | Alpha360 | 0.0400±0.00 | 0.3605±0.00 | 0.0536±0.00 | 0.5431±0.00 | 0.0505±0.00 | 0.7658±0.02 | -0.1880±0.00 |
# Contributing

View File

@@ -117,8 +117,10 @@ def get_all_folders(models, exclude) -> dict:
# function to get all the files under the model folder
def get_all_files(folder_path, dataset) -> (str, str):
yaml_path = str(Path(f"{folder_path}") / f"*{dataset}*.yaml")
def get_all_files(folder_path, dataset, universe="") -> (str, str):
if universe != "":
universe = f"_{universe}"
yaml_path = str(Path(f"{folder_path}") / f"*{dataset}{universe}.yaml")
req_path = str(Path(f"{folder_path}") / f"*.txt")
yaml_file = glob.glob(yaml_path)
req_file = glob.glob(req_path)
@@ -224,6 +226,7 @@ class ModelRunner:
times=1,
models=None,
dataset="Alpha360",
universe="",
exclude=False,
qlib_uri: str = "git+https://github.com/microsoft/qlib#egg=pyqlib",
exp_folder_name: str = "run_all_model_records",
@@ -245,6 +248,9 @@ class ModelRunner:
determines whether the model being used is excluded or included.
dataset : str
determines the dataset to be used for each model.
universe : str
the stock universe of the dataset.
default "" indicates that
qlib_uri : str
the uri to install qlib with pip
it could be url on the we or local path (NOTE: the local path must be a absolute path)
@@ -259,6 +265,15 @@ class ModelRunner:
-------
Here are some use cases of the function in the bash:
The run_all_models will decide which config to run based no `models` `dataset` `universe`
Example 1):
models="lightgbm", dataset="Alpha158", universe="" will result in running the following config
examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
models="lightgbm", dataset="Alpha158", universe="csi500" will result in running the following config
examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_csi500.yaml
.. code-block:: bash
# Case 1 - run all models multiple times
@@ -279,6 +294,9 @@ class ModelRunner:
# Case 6 - run other models except those are given as arguments for one time
python run_all_model.py run --models=[mlp,tft,sfm] --exclude=True
# Case 7 - run lightgbm model on csi500.
python run_all_model.py run 3 lightgbm Alpha158 csi500
"""
self._init_qlib(exp_folder_name)
@@ -290,7 +308,7 @@ class ModelRunner:
for fn in folders:
# get all files
sys.stderr.write("Retrieving files...\n")
yaml_path, req_path = get_all_files(folders[fn], dataset)
yaml_path, req_path = get_all_files(folders[fn], dataset, universe=universe)
if yaml_path is None:
sys.stderr.write(f"There is no {dataset}.yaml file in {folders[fn]}")
continue

View File

@@ -2,7 +2,7 @@
# Licensed under the MIT License.
from pathlib import Path
__version__ = "0.8.5.99"
__version__ = "0.8.6"
__version__bak = __version__ # This version is backup for QlibConfig.reset_qlib_version
import os
from typing import Union

View File

@@ -113,7 +113,7 @@ _default_config = {
# "~/.qlib/stock_data/cn_data"
# # dict
# {"day": "~/.qlib/stock_data/cn_data", "1min": "~/.qlib/stock_data/cn_data_1min"}
# NOTE: provider_uri priority
# NOTE: provider_uri priority:
# 1. backend_config: backend_obj["kwargs"]["provider_uri"]
# 2. backend_config: backend_obj["kwargs"]["provider_uri_map"]
# 3. qlib.init: provider_uri

View File

@@ -217,7 +217,7 @@ class MetaDatasetDS(MetaTaskDataset):
----------
task_tpl : Union[dict, list]
Decide what tasks are used.
- dict : the task template the prepared task is generated with `step`, `trunc_days` and `RollingGen`
- dict : the task template, the prepared task is generated with `step`, `trunc_days` and `RollingGen`
- list : when list, use the list of tasks directly
the list is supposed to be sorted according timeline
step : int

View File

@@ -53,7 +53,7 @@ class TabnetModel(Model):
"""
TabNet model for Qlib
Args
Args:
ps: probability to generate the bernoulli mask
"""
# set hyper-parameters.

View File

@@ -24,7 +24,7 @@ class FileStorageMixin:
"""
# NOTE: provider_uri priority
# NOTE: provider_uri priority:
# 1. self._provider_uri : if provider_uri is provided.
# 2. provider_uri in qlib.config.C

View File

@@ -488,7 +488,7 @@ class DumpDataUpdate(DumpDataBase):
except Exception:
error_code[futures[_future]] = traceback.format_exc()
p_bar.update()
logger.info(f"dump bin errors {error_code}")
logger.info(f"dump bin errors: {error_code}")
logger.info("end of features dump.\n")