mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 14:01:28 +08:00
* Add docs for qlib.rl * Update docs for qlib.rl * Add homepage introduct to RL framework * Update index Link * Fix Icon * typo * Update catelog * Update docs for qlib.rl * Update docs for qlib.rl * Update figure * Update docs for qlib.rl * Update setup.py * FIx setup.py * Update docs and fix some typos * Fix the reference to RL docs * Update framework.svg * Update framework.svg * Update framework.svg * Update docs for qlibrl. * Update docs for qlibrl. * Update docs for Qlibrl. * Update docs for qlibrl. * Update docs for qlibrl. * Update docs for qlibrl. * Add new framework * Update jpg * Update framework.svg * Update framework.svg * Update Qlib framework and description * Update grammar * Update README.md * Update README.md * Update docs/component/rl.rst Co-authored-by: you-n-g <you-n-g@users.noreply.github.com> * Update docs/component/rl.rst Co-authored-by: you-n-g <you-n-g@users.noreply.github.com> * Update docs for qlib.rl * Change theme for docs. * Update docs for qlib.rl * Update docs for qlib.rl * Update docs for qlib.rl * Update docs for qlib.rl. * Update docs for qlib.rl * Update docs for qlib.rl * Update docs for qlib.rl Co-authored-by: Young <afe.young@gmail.com> Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>
176 lines
7.5 KiB
ReStructuredText
176 lines
7.5 KiB
ReStructuredText
|
|
Quick Start
|
|
============
|
|
.. currentmodule:: qlib
|
|
|
|
QlibRL provides an example of an implementation of a single asset order execution task and the following is an example of the config file to train with QlibRL.
|
|
|
|
.. code-block:: yaml
|
|
|
|
simulator:
|
|
# Each step contains 30mins
|
|
time_per_step: 30
|
|
# Upper bound of volume, should be null or a float between 0 and 1, if it is a float, represent upper bound is calculated by the percentage of the market volume
|
|
vol_limit: null
|
|
env:
|
|
# Concurrent environment workers.
|
|
concurrency: 1
|
|
# dummy or subproc or shmem. Corresponding to `parallelism in tianshou <https://tianshou.readthedocs.io/en/master/api/tianshou.env.html#vectorenv>`_.
|
|
parallel_mode: dummy
|
|
action_interpreter:
|
|
class: CategoricalActionInterpreter
|
|
kwargs:
|
|
# Candidate actions, it can be a list with length L: [a_1, a_2,..., a_L] or an integer n, in which case the list of length n+1 is auto-generated, i.e., [0, 1/n, 2/n,..., n/n].
|
|
values: 14
|
|
# Total number of steps (an upper-bound estimation)
|
|
max_step: 8
|
|
module_path: qlib.rl.order_execution.interpreter
|
|
state_interpreter:
|
|
class: FullHistoryStateInterpreter
|
|
kwargs:
|
|
# Number of dimensions in data.
|
|
data_dim: 6
|
|
# Equal to the total number of records. For example, in SAOE per minute, data_ticks is the length of the day in minutes.
|
|
data_ticks: 240
|
|
# The total number of steps (an upper-bound estimation). For example, 390min / 30min-per-step = 13 steps.
|
|
max_step: 8
|
|
# Provider of the processed data.
|
|
processed_data_provider:
|
|
class: PickleProcessedDataProvider
|
|
module_path: qlib.rl.data.pickle_styled
|
|
kwargs:
|
|
data_dir: ./data/pickle_dataframe/feature
|
|
module_path: qlib.rl.order_execution.interpreter
|
|
reward:
|
|
class: PAPenaltyReward
|
|
kwargs:
|
|
# The penalty for a large volume in a short time.
|
|
penalty: 100.0
|
|
module_path: qlib.rl.order_execution.reward
|
|
data:
|
|
source:
|
|
order_dir: ./data/training_order_split
|
|
data_dir: ./data/pickle_dataframe/backtest
|
|
# number of time indexes
|
|
total_time: 240
|
|
# start time index
|
|
default_start_time: 0
|
|
# end time index
|
|
default_end_time: 240
|
|
proc_data_dim: 6
|
|
num_workers: 0
|
|
queue_size: 20
|
|
network:
|
|
class: Recurrent
|
|
module_path: qlib.rl.order_execution.network
|
|
policy:
|
|
class: PPO
|
|
kwargs:
|
|
lr: 0.0001
|
|
module_path: qlib.rl.order_execution.policy
|
|
runtime:
|
|
seed: 42
|
|
use_cuda: false
|
|
trainer:
|
|
max_epoch: 2
|
|
# Number of episodes collected in each training iteration
|
|
repeat_per_collect: 5
|
|
earlystop_patience: 2
|
|
# Episodes per collect at training.
|
|
episode_per_collect: 20
|
|
batch_size: 16
|
|
# Perform validation every n iterations
|
|
val_every_n_epoch: 1
|
|
checkpoint_path: ./checkpoints
|
|
checkpoint_every_n_iters: 1
|
|
|
|
|
|
And the config file for backtesting:
|
|
|
|
.. code-block:: yaml
|
|
|
|
order_file: ./data/backtest_orders.csv
|
|
start_time: "9:45"
|
|
end_time: "14:44"
|
|
qlib:
|
|
provider_uri_1min: ./data/bin
|
|
feature_root_dir: ./data/pickle
|
|
# feature generated by today's information
|
|
feature_columns_today: [
|
|
"$open", "$high", "$low", "$close", "$vwap", "$volume",
|
|
]
|
|
# feature generated by yesterday's information
|
|
feature_columns_yesterday: [
|
|
"$open_v1", "$high_v1", "$low_v1", "$close_v1", "$vwap_v1", "$volume_v1",
|
|
]
|
|
exchange:
|
|
# the expression for buying and selling stock limitation
|
|
limit_threshold: ['$close == 0', '$close == 0']
|
|
# deal price for buying and selling
|
|
deal_price: ["If($close == 0, $vwap, $close)", "If($close == 0, $vwap, $close)"]
|
|
volume_threshold:
|
|
# volume limits are both buying and selling, "cum" means that this is a cumulative value over time
|
|
all: ["cum", "0.2 * DayCumsum($volume, '9:45', '14:44')"]
|
|
# the volume limits of buying
|
|
buy: ["current", "$close"]
|
|
# the volume limits of selling, "current" means that this is a real-time value and will not accumulate over time
|
|
sell: ["current", "$close"]
|
|
strategies:
|
|
30min:
|
|
class: TWAPStrategy
|
|
module_path: qlib.contrib.strategy.rule_strategy
|
|
kwargs: {}
|
|
1day:
|
|
class: SAOEIntStrategy
|
|
module_path: qlib.rl.order_execution.strategy
|
|
kwargs:
|
|
state_interpreter:
|
|
class: FullHistoryStateInterpreter
|
|
module_path: qlib.rl.order_execution.interpreter
|
|
kwargs:
|
|
max_step: 8
|
|
data_ticks: 240
|
|
data_dim: 6
|
|
processed_data_provider:
|
|
class: PickleProcessedDataProvider
|
|
module_path: qlib.rl.data.pickle_styled
|
|
kwargs:
|
|
data_dir: ./data/pickle_dataframe/feature
|
|
action_interpreter:
|
|
class: CategoricalActionInterpreter
|
|
module_path: qlib.rl.order_execution.interpreter
|
|
kwargs:
|
|
values: 14
|
|
max_step: 8
|
|
network:
|
|
class: Recurrent
|
|
module_path: qlib.rl.order_execution.network
|
|
kwargs: {}
|
|
policy:
|
|
class: PPO
|
|
module_path: qlib.rl.order_execution.policy
|
|
kwargs:
|
|
lr: 1.0e-4
|
|
# Local path to the latest model. The model is generated during training, so please run training first if you want to run backtest with a trained policy. You could also remove this parameter file to run backtest with a randomly initialized policy.
|
|
weight_file: ./checkpoints/latest.pth
|
|
# Concurrent environment workers.
|
|
concurrency: 5
|
|
|
|
With the above config files, you can start training the agent by the following command:
|
|
|
|
.. code-block:: console
|
|
|
|
$ python -m qlib.rl.contrib.train_onpolicy.py --config_path train_config.yml
|
|
|
|
After the training, you can backtest with the following command:
|
|
|
|
.. code-block:: console
|
|
|
|
$ python -m qlib.rl.contrib.backtest.py --config_path backtest_config.yml
|
|
|
|
In that case, :class:`~qlib.rl.order_execution.simulator_qlib.SingleAssetOrderExecution` and :class:`~qlib.rl.order_execution.simulator_simple.SingleAssetOrderExecutionSimple` as examples for simulator, :class:`qlib.rl.order_execution.interpreter.FullHistoryStateInterpreter` and :class:`qlib.rl.order_execution.interpreter.CategoricalActionInterpreter` as examples for interpreter, :class:`qlib.rl.order_execution.policy.PPO` as an example for policy, and :class:`qlib.rl.order_execution.reward.PAPenaltyReward` as an example for reward.
|
|
For the single asset order execution task, if developers have already defined their simulator/interpreters/reward function/policy, they could launch the training and backtest pipeline by simply modifying the corresponding settings in the config files.
|
|
The details about the example can be found `here <https://github.com/microsoft/qlib/blob/main/examples/rl/README.md>`_.
|
|
|
|
In the future, we will provide more examples for different scenarios such as RL-based portfolio construction.
|