pass the whole workflow

2026-07-27 14:02:41 +08:00 · 2020-10-28 14:07:33 +00:00
parent 1a9ee6cef8
commit a50c9008b8
10 changed files with 296 additions and 221 deletions
--- a/examples/workflow_by_code.py
+++ b/examples/workflow_by_code.py
@@ -16,7 +16,8 @@ from qlib.contrib.evaluate import (
 )
 from qlib.utils import exists_qlib_data

-from qlib.model.learner import train_model
+# from qlib.model.learner import train_model
+from qlib.utils import init_instance_by_config


 if __name__ == "__main__":
@@ -57,13 +58,6 @@ if __name__ == "__main__":
        "test_end_time": "2020-08-01",
    }

-    # use default DataHandler
-    # custom DataHandler, refer to: TODO: DataHandler API url
-    handler = Alpha158(**DATA_HANDLER_CONFIG)
-
-    data = handler.fetch(slice('2008-01-01', '2014-12-31'), data_key=handler.DK_I)
-    print(data)
-
    task = {
        "model": {
            "class": "LGBModel",
@@ -80,59 +74,33 @@ if __name__ == "__main__":
                "num_threads": 20,
            }
        },
-        "data": {
-            "dataset": {
-                "class": "DatasetH",
-                "module_path": "qlib.data.dataset",
-                "kwargs": {
-                    'handler': {
-                        "class": "Alpha158",
-                        "kwargs": DATA_HANDLER_CONFIG
-                    },
-                    "train_start_time": "2008-01-01",
-                    "train_end_time": "2014-12-31",
-                    "validate_start_time": "2015-01-01",
-                    "validate_end_time": "2016-12-31",
-                    "test_start_time": "2017-01-01",
-                    "test_end_time": "2020-08-01",
+        "dataset": {
+            "class": "DatasetH",
+            "module_path": "qlib.data.dataset",
+            "kwargs": {
+                'handler': {
+                    "class": "Alpha158",
+                    "module_path": "qlib.contrib.data.handler",
+                    "kwargs": DATA_HANDLER_CONFIG
+                },
+                'segments': {
+                    'train': ("2008-01-01", "2014-12-31"),
+                    'valid': ("2015-01-01", "2016-12-31",),
+                    'test': ("2017-01-01", "2020-08-01",),
                }
            }
-        },
+        }
        # You shoud record the data in specific sequence
        # "record": ['SignalRecord', 'SigAnaRecord', 'PortAnaRecord'],
    }

-    model = train_model(task)
+    # model = train_model(task)
+    model = init_instance_by_config(task['model'])
+    dataset = init_instance_by_config(task['dataset'])

+    model.fit(dataset)

-
-    sys.exit(0)   # I have tested the code above  ---------------------------------------------
-
-    x_train, y_train, x_validate, y_validate, x_test, y_test = Alpha158(**DATA_HANDLER_CONFIG).get_split_data(
-        **TRAINER_CONFIG
-    )
-
-    MODEL_CONFIG = {
-        "loss": "mse",
-        "colsample_bytree": 0.8879,
-        "learning_rate": 0.0421,
-        "subsample": 0.8789,
-        "lambda_l1": 205.6999,
-        "lambda_l2": 580.9768,
-        "max_depth": 8,
-        "num_leaves": 210,
-        "num_threads": 20,
-    }
-    # use default model
-    # custom Model, refer to: TODO: Model API url
-    model = LGBModel(**MODEL_CONFIG)
-    model.fit(x_train, y_train, x_validate, y_validate)
-    _pred = model.predict(x_test)
-    _pred = pd.DataFrame(_pred, index=x_test.index, columns=y_test.columns)
-
-    # backtest requires pred_score
-    pred_score = pd.DataFrame(index=_pred.index)
-    pred_score["score"] = _pred.iloc(axis=1)[0]
+    pred_score = model.predict(dataset)

    # save pred_score to file
    pred_score_path = Path("~/tmp/qlib/pred_score.pkl").expanduser()