1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

feat: use pydantic-settings for MLflow config and update dependencies (#1962)

* feat: use pydantic-settings for MLflow config and update dependencies

* docs
This commit is contained in:
you-n-g
2025-07-01 21:32:11 +08:00
committed by GitHub
parent 38f02d25dc
commit 78b77e302b
3 changed files with 51 additions and 2 deletions

View File

@@ -27,6 +27,9 @@ dependencies = [
"pyyaml",
"numpy",
"pandas>=0.24",
# I encoutered an Error that the set_uri does not work when downloading artifacts in mlflow 3.1.1;
# But earlier versions of mlflow does not have this problem.
# But when I switch to 2.*.* version, another error occurs, which is even more strange...
"mlflow",
"filelock>=3.16.0",
"redis",
@@ -45,6 +48,7 @@ dependencies = [
"jupyter",
"nbconvert",
"pyarrow",
"pydantic-settings",
]
[project.optional-dependencies]
@@ -90,6 +94,7 @@ test = [
]
analysis = [
"plotly",
"statsmodels",
]
[tool.setuptools]

View File

@@ -27,6 +27,37 @@ from qlib.constant import REG_CN, REG_US, REG_TW
if TYPE_CHECKING:
from qlib.utils.time import Freq
from pydantic_settings import BaseSettings, SettingsConfigDict
class MLflowSettings(BaseSettings):
uri: str = "file:" + str(Path(os.getcwd()).resolve() / "mlruns")
default_exp_name: str = "Experiment"
class QSettings(BaseSettings):
"""
Qlib's settings.
It tries to provide a default settings for most of Qlib's components.
But it would be a long journey to provide a comprehensive settings for all of Qlib's components.
Here is some design guidelines:
- The priority of settings is
- Actively passed-in settings, like `qlib.init(provider_uri=...)`
- The default settings
- QSettings tries to provide default settings for most of Qlib's components.
"""
mlflow: MLflowSettings = MLflowSettings()
model_config = SettingsConfigDict(
env_prefix="QLIB_",
env_nested_delimiter="_",
)
QSETTINGS = QSettings()
class Config:
def __init__(self, default_conf):
@@ -187,8 +218,8 @@ _default_config = {
"class": "MLflowExpManager",
"module_path": "qlib.workflow.expm",
"kwargs": {
"uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"),
"default_exp_name": "Experiment",
"uri": QSETTINGS.mlflow.uri,
"default_exp_name": QSETTINGS.mlflow.default_exp_name,
},
},
"pit_record_type": {

View File

@@ -1,5 +1,18 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""
Motivation of this design (instead of using mlflow directly):
- Better design than mlflow native design
- we have record object with a lot of methods(more intuitive), instead of use run_id everytime in mlflow
- So the recorder's interfaces like log, start, will be more intuitive.
- Provide richer and tailerd features than mlflow native
- Logging code diff at the start of run.
- log_object and load_object to for Python object directly instead log_artifact and download_artifact
- (weak) Allow diverse backend support
To be honest, design always add burdens. For example,
- You need to create an experiment before you can get a recorder. (In MLflow, experiments are more like tags, and you often just use a run_id in many interfaces without first defining an experiment.)
"""
from contextlib import contextmanager
from typing import Text, Optional, Any, Dict