From 78b77e302b9cab90100d05c6c534e2ed13980860 Mon Sep 17 00:00:00 2001
From: you-n-g <you-n-g@users.noreply.github.com>
Date: Tue, 1 Jul 2025 21:32:11 +0800
Subject: [PATCH] feat: use pydantic-settings for MLflow config and update
 dependencies (#1962)

* feat: use pydantic-settings for MLflow config and update dependencies

* docs
---
 pyproject.toml            |  5 +++++
 qlib/config.py            | 35 +++++++++++++++++++++++++++++++++--
 qlib/workflow/__init__.py | 13 +++++++++++++
 3 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4409fa719..b902edfe2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,6 +27,9 @@ dependencies = [
   "pyyaml",
   "numpy",
   "pandas>=0.24",
+  # I encoutered an Error that the set_uri does not work when downloading artifacts in mlflow 3.1.1;
+  # But earlier versions of mlflow does not have this problem.
+  # But when I switch to 2.*.* version, another error occurs, which is even more strange...
   "mlflow",
   "filelock>=3.16.0",
   "redis",
@@ -45,6 +48,7 @@ dependencies = [
   "jupyter",
   "nbconvert",
   "pyarrow",
+  "pydantic-settings",
 ]
 
 [project.optional-dependencies]
@@ -90,6 +94,7 @@ test = [
 ]
 analysis = [
   "plotly",
+  "statsmodels",
 ]
 
 [tool.setuptools]
diff --git a/qlib/config.py b/qlib/config.py
index 2fa7d4535..a0b4aad28 100644
--- a/qlib/config.py
+++ b/qlib/config.py
@@ -27,6 +27,37 @@ from qlib.constant import REG_CN, REG_US, REG_TW
 if TYPE_CHECKING:
     from qlib.utils.time import Freq
 
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class MLflowSettings(BaseSettings):
+    uri: str = "file:" + str(Path(os.getcwd()).resolve() / "mlruns")
+    default_exp_name: str = "Experiment"
+
+
+class QSettings(BaseSettings):
+    """
+    Qlib's settings.
+    It tries to provide a default settings for most of Qlib's components.
+    But it would be a long journey to provide a comprehensive settings for all of Qlib's components.
+
+    Here is some design guidelines:
+    - The priority of settings is
+        - Actively passed-in settings, like `qlib.init(provider_uri=...)`
+        - The default settings
+            - QSettings tries to provide default settings for most of Qlib's components.
+    """
+
+    mlflow: MLflowSettings = MLflowSettings()
+
+    model_config = SettingsConfigDict(
+        env_prefix="QLIB_",
+        env_nested_delimiter="_",
+    )
+
+
+QSETTINGS = QSettings()
+
 
 class Config:
     def __init__(self, default_conf):
@@ -187,8 +218,8 @@ _default_config = {
         "class": "MLflowExpManager",
         "module_path": "qlib.workflow.expm",
         "kwargs": {
-            "uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"),
-            "default_exp_name": "Experiment",
+            "uri": QSETTINGS.mlflow.uri,
+            "default_exp_name": QSETTINGS.mlflow.default_exp_name,
         },
     },
     "pit_record_type": {
diff --git a/qlib/workflow/__init__.py b/qlib/workflow/__init__.py
index 9947c9805..a29e471c0 100644
--- a/qlib/workflow/__init__.py
+++ b/qlib/workflow/__init__.py
@@ -1,5 +1,18 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
+"""
+Motivation of this design (instead of using mlflow directly):
+- Better design than mlflow native design
+    - we have record object with a lot of methods(more intuitive), instead of use run_id everytime in mlflow
+        - So the recorder's interfaces like log, start, will be more intuitive.
+- Provide richer and tailerd features than mlflow native
+    - Logging code diff at the start of run.
+    - log_object and load_object to for Python object directly instead log_artifact and download_artifact
+- (weak) Allow diverse backend support
+
+To be honest, design always add burdens. For example,
+- You need to create an experiment before you can get a recorder. (In MLflow, experiments are more like tags, and you often just use a run_id in many interfaces without first defining an experiment.)
+"""
 
 from contextlib import contextmanager
 from typing import Text, Optional, Any, Dict