mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-04 11:30:57 +08:00
* Migrate amc4th training * Refine RL example scripts * Resolve PR comments Co-authored-by: luocy16 <luocy16@mails.tsinghua.edu.cn>
60 lines
1.3 KiB
YAML
60 lines
1.3 KiB
YAML
simulator:
|
|
time_per_step: 30
|
|
vol_limit: null
|
|
env:
|
|
concurrency: 1
|
|
parallel_mode: dummy
|
|
action_interpreter:
|
|
class: CategoricalActionInterpreter
|
|
kwargs:
|
|
values: 14
|
|
max_step: 8
|
|
module_path: qlib.rl.order_execution.interpreter
|
|
state_interpreter:
|
|
class: FullHistoryStateInterpreter
|
|
kwargs:
|
|
data_dim: 6
|
|
data_ticks: 240
|
|
max_step: 8
|
|
processed_data_provider:
|
|
class: PickleProcessedDataProvider
|
|
module_path: qlib.rl.data.pickle_styled
|
|
kwargs:
|
|
data_dir: ./data/pickle_dataframe/feature
|
|
module_path: qlib.rl.order_execution.interpreter
|
|
reward:
|
|
class: PAPenaltyReward
|
|
kwargs:
|
|
penalty: 100.0
|
|
module_path: qlib.rl.order_execution.reward
|
|
data:
|
|
source:
|
|
order_dir: ./data/training_order_split
|
|
data_dir: ./data/pickle_dataframe/backtest
|
|
total_time: 240
|
|
default_start_time: 0
|
|
default_end_time: 240
|
|
proc_data_dim: 6
|
|
num_workers: 0
|
|
queue_size: 20
|
|
network:
|
|
class: Recurrent
|
|
module_path: qlib.rl.order_execution.network
|
|
policy:
|
|
class: PPO
|
|
kwargs:
|
|
lr: 0.0001
|
|
module_path: qlib.rl.order_execution.policy
|
|
runtime:
|
|
seed: 42
|
|
use_cuda: false
|
|
trainer:
|
|
max_epoch: 2
|
|
repeat_per_collect: 5
|
|
earlystop_patience: 2
|
|
episode_per_collect: 20
|
|
batch_size: 16
|
|
val_every_n_epoch: 1
|
|
checkpoint_path: ./checkpoints
|
|
checkpoint_every_n_iters: 1
|