Add docs for qlib.rl (#1322)

* Add docs for qlib.rl * Update docs for qlib.rl * Add homepage introduct to RL framework * Update index Link * Fix Icon * typo * Update catelog * Update docs for qlib.rl * Update docs for qlib.rl * Update figure * Update docs for qlib.rl * Update setup.py * FIx setup.py * Update docs and fix some typos * Fix the reference to RL docs * Update framework.svg * Update framework.svg * Update framework.svg * Update docs for qlibrl. * Update docs for qlibrl. * Update docs for Qlibrl. * Update docs for qlibrl. * Update docs for qlibrl. * Update docs for qlibrl. * Add new framework * Update jpg * Update framework.svg * Update framework.svg * Update Qlib framework and description * Update grammar * Update README.md * Update README.md * Update docs/component/rl.rst Co-authored-by: you-n-g <you-n-g@users.noreply.github.com> * Update docs/component/rl.rst Co-authored-by: you-n-g <you-n-g@users.noreply.github.com> * Update docs for qlib.rl * Change theme for docs. * Update docs for qlib.rl * Update docs for qlib.rl * Update docs for qlib.rl * Update docs for qlib.rl. * Update docs for qlib.rl * Update docs for qlib.rl * Update docs for qlib.rl Co-authored-by: Young <afe.young@gmail.com> Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>
2026-07-22 03:37:34 +08:00 · 2022-11-10 21:10:44 +08:00
parent 35794846ff
commit e182124e75
22 changed files with 492 additions and 134 deletions
--- a/examples/rl/README.md
+++ b/examples/rl/README.md
@@ -41,7 +41,7 @@ data
 Run:

 ```
-python ../../qlib/rl/contrib/train_onpolicy.py --config_path ./experiment_config/training/config.yml
+python -m qlib.rl.contrib.train_onpolicy.py --config_path ./experiment_config/training/config.yml
 ```

 After training, checkpoints will be stored under `checkpoints/`.
@@ -49,7 +49,7 @@ After training, checkpoints will be stored under `checkpoints/`.
 ## Run backtest

 ```
-python ../../qlib/rl/contrib/backtest.py --config_path ./experiment_config/backtest/config.py
+python -m qlib.rl.contrib.backtest.py --config_path ./experiment_config/backtest/config.yml
 ```

 The backtest workflow will use the trained model in `checkpoints/`. The backtest summary can be found in `outputs/`.
--- a/examples/rl/experiment_config/backtest/config.py
+++ b/examples/rl/experiment_config/backtest/config.py
@@ -1,53 +0,0 @@
-_base_ = ["./twap.yml"]
-
-strategies = {
-    "_delete_": True,
-    "30min": {
-        "class": "TWAPStrategy",
-        "module_path": "qlib.contrib.strategy.rule_strategy",
-        "kwargs": {},
-    },
-    "1day": {
-        "class": "SAOEIntStrategy",
-        "module_path": "qlib.rl.order_execution.strategy",
-        "kwargs": {
-            "state_interpreter": {
-                "class": "FullHistoryStateInterpreter",
-                "module_path": "qlib.rl.order_execution.interpreter",
-                "kwargs": {
-                    "max_step": 8,
-                    "data_ticks": 240,
-                    "data_dim": 6,
-                    "processed_data_provider": {
-                        "class": "PickleProcessedDataProvider",
-                        "module_path": "qlib.rl.data.pickle_styled",
-                        "kwargs": {
-                            "data_dir": "./data/pickle_dataframe/feature",
-                        },
-                    },
-                },
-            },
-            "action_interpreter": {
-                "class": "CategoricalActionInterpreter",
-                "module_path": "qlib.rl.order_execution.interpreter",
-                "kwargs": {
-                    "values": 14,
-                    "max_step": 8,
-                },
-            },
-            "network": {
-                "class": "Recurrent",
-                "module_path": "qlib.rl.order_execution.network",
-                "kwargs": {},
-            },
-            "policy": {
-                "class": "PPO",
-                "module_path": "qlib.rl.order_execution.policy",
-                "kwargs": {
-                    "lr": 1.0e-4,
-                    "weight_file": "./checkpoints/latest.pth",
-                },
-            },
-        },
-    },
-}
--- a/examples/rl/experiment_config/backtest/config.yml
+++ b/examples/rl/experiment_config/backtest/config.yml
@@ -0,0 +1,57 @@
+order_file: ./data/backtest_orders.csv
+start_time: "9:45"
+end_time: "14:44"
+qlib:
+  provider_uri_1min: ./data/bin
+  feature_root_dir: ./data/pickle
+  feature_columns_today: [
+    "$open", "$high", "$low", "$close", "$vwap", "$volume",
+  ]
+  feature_columns_yesterday: [
+    "$open_v1", "$high_v1", "$low_v1", "$close_v1", "$vwap_v1", "$volume_v1",
+  ]
+exchange:
+  limit_threshold: ['$close == 0', '$close == 0']
+  deal_price: ["If($close == 0, $vwap, $close)", "If($close == 0, $vwap, $close)"]
+  volume_threshold:
+    all: ["cum", "0.2 * DayCumsum($volume, '9:45', '14:44')"]
+    buy: ["current", "$close"]
+    sell: ["current", "$close"]
+strategies: 
+  30min: 
+    class: TWAPStrategy
+    module_path: qlib.contrib.strategy.rule_strategy
+    kwargs: {}
+  1day: 
+    class: SAOEIntStrategy
+    module_path: qlib.rl.order_execution.strategy
+    kwargs:
+      state_interpreter:
+        class: FullHistoryStateInterpreter
+        module_path: qlib.rl.order_execution.interpreter
+        kwargs:
+          max_step: 8
+          data_ticks: 240
+          data_dim: 6
+          processed_data_provider:
+            class: PickleProcessedDataProvider
+            module_path: qlib.rl.data.pickle_styled
+            kwargs:
+              data_dir: ./data/pickle_dataframe/feature
+      action_interpreter: 
+        class: CategoricalActionInterpreter
+        module_path: qlib.rl.order_execution.interpreter
+        kwargs: 
+          values: 14
+          max_step: 8
+      network: 
+          class: Recurrent
+          module_path: qlib.rl.order_execution.network
+          kwargs: {}
+      policy: 
+          class: PPO
+          module_path: qlib.rl.order_execution.policy
+          kwargs: 
+            lr: 1.0e-4
+            weight_file: ./checkpoints/latest.pth
+concurrency: 5
--- a/examples/rl/experiment_config/backtest/twap.yml
+++ b/examples/rl/experiment_config/backtest/twap.yml
@@ -1,21 +0,0 @@
-order_file: ./data/backtest_orders.csv
-start_time: "9:45"
-end_time: "14:44"
-qlib:
-  provider_uri_1min: ./data/bin
-  feature_root_dir: ./data/pickle
-  feature_columns_today: [
-    "$open", "$high", "$low", "$close", "$vwap", "$volume",
-  ]
-  feature_columns_yesterday: [
-    "$open_v1", "$high_v1", "$low_v1", "$close_v1", "$vwap_v1", "$volume_v1",
-  ]
-exchange:
-  limit_threshold: ['$close == 0', '$close == 0']
-  deal_price: ["If($close == 0, $vwap, $close)", "If($close == 0, $vwap, $close)"]
-  volume_threshold:
-    all: ["cum", "0.2 * DayCumsum($volume, '9:45', '14:44')"]
-    buy: ["current", "$close"]
-    sell: ["current", "$close"]
-strategies: {}  # Placeholder
-concurrency: 5