From 39634b2158251972bc861de2a445579dadad4437 Mon Sep 17 00:00:00 2001 From: Linlang <30293408+SunsetWolf@users.noreply.github.com> Date: Wed, 28 Jan 2026 22:19:43 +0800 Subject: [PATCH] fix(security): address reported unsafe pickle.load usages (#2099) --- qlib/contrib/rolling/ddgda.py | 5 +++-- qlib/contrib/tuner/tuner.py | 4 ++-- qlib/data/cache.py | 7 ++++--- qlib/data/dataset/loader.py | 4 ++-- qlib/utils/mod.py | 6 +++--- qlib/utils/objm.py | 3 ++- qlib/workflow/task/manage.py | 3 ++- 7 files changed, 18 insertions(+), 14 deletions(-) diff --git a/qlib/contrib/rolling/ddgda.py b/qlib/contrib/rolling/ddgda.py index b62820cce..0fe01d045 100644 --- a/qlib/contrib/rolling/ddgda.py +++ b/qlib/contrib/rolling/ddgda.py @@ -14,6 +14,7 @@ from qlib.model.meta.task import MetaTask from qlib.model.trainer import TrainerR from qlib.typehint import Literal from qlib.utils import init_instance_by_config +from qlib.utils.pickle_utils import restricted_pickle_load from qlib.workflow import R from qlib.workflow.task.utils import replace_task_handler_with_cache @@ -298,7 +299,7 @@ class DDGDA(Rolling): # but their task test segment are not aligned! It worked in my previous experiment. # So the misalignment will not affect the effectiveness of the method. with self._internal_data_path.open("rb") as f: - internal_data = pickle.load(f) + internal_data = restricted_pickle_load(f) md = MetaDatasetDS(exp_name=internal_data, **kwargs) @@ -360,7 +361,7 @@ class DDGDA(Rolling): ) with self._internal_data_path.open("rb") as f: - internal_data = pickle.load(f) + internal_data = restricted_pickle_load(f) mds = MetaDatasetDS(exp_name=internal_data, **kwargs) # 3) meta model make inference and get new qlib task diff --git a/qlib/contrib/tuner/tuner.py b/qlib/contrib/tuner/tuner.py index 7705ce8b7..9009f5721 100644 --- a/qlib/contrib/tuner/tuner.py +++ b/qlib/contrib/tuner/tuner.py @@ -8,7 +8,6 @@ import os import yaml import json import copy -import pickle import logging import importlib import subprocess @@ -18,6 +17,7 @@ import numpy as np from abc import abstractmethod from ...log import get_module_logger, TimeInspector +from ...utils.pickle_utils import restricted_pickle_load from hyperopt import fmin, tpe from hyperopt import STATUS_OK, STATUS_FAIL @@ -136,7 +136,7 @@ class QLibTuner(Tuner): exp_result_dir = os.path.join(self.ex_dir, QLibTuner.EXP_RESULT_DIR.format(estimator_ex_id)) exp_result_path = os.path.join(exp_result_dir, QLibTuner.EXP_RESULT_NAME) with open(exp_result_path, "rb") as fp: - analysis_df = pickle.load(fp) + analysis_df = restricted_pickle_load(fp) # 4. Get the backtest factor which user want to optimize, if user want to maximize the factor, then reverse the result res = analysis_df.loc[self.optim_config.report_type].loc[self.optim_config.report_factor] diff --git a/qlib/data/cache.py b/qlib/data/cache.py index 9ba87f3d2..fbf6e839d 100644 --- a/qlib/data/cache.py +++ b/qlib/data/cache.py @@ -30,6 +30,7 @@ from ..utils import ( normalize_cache_fields, normalize_cache_instruments, ) +from ..utils.pickle_utils import restricted_pickle_load from ..log import get_module_logger from .base import Feature @@ -225,7 +226,7 @@ class CacheUtils: cache_path = Path(cache_path) meta_path = cache_path.with_suffix(".meta") with meta_path.open("rb") as f: - d = pickle.load(f) + d = restricted_pickle_load(f) with meta_path.open("wb") as f: try: d["meta"]["last_visit"] = str(time.time()) @@ -592,7 +593,7 @@ class DiskExpressionCache(ExpressionCache): with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri())}:expression-{cache_uri}"): with meta_path.open("rb") as f: - d = pickle.load(f) + d = restricted_pickle_load(f) instrument = d["info"]["instrument"] field = d["info"]["field"] freq = d["info"]["freq"] @@ -959,7 +960,7 @@ class DiskDatasetCache(DatasetCache): im = DiskDatasetCache.IndexManager(cp_cache_uri) with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri())}:dataset-{cache_uri}"): with meta_path.open("rb") as f: - d = pickle.load(f) + d = restricted_pickle_load(f) instruments = d["info"]["instruments"] fields = d["info"]["fields"] freq = d["info"]["freq"] diff --git a/qlib/data/dataset/loader.py b/qlib/data/dataset/loader.py index d283cb4f6..2f3615a63 100644 --- a/qlib/data/dataset/loader.py +++ b/qlib/data/dataset/loader.py @@ -2,7 +2,6 @@ # Licensed under the MIT License. import abc -import pickle from pathlib import Path import warnings import pandas as pd @@ -11,6 +10,7 @@ from typing import Tuple, Union, List, Dict from qlib.data import D from qlib.utils import load_dataset, init_instance_by_config, time_to_slc_point +from qlib.utils.pickle_utils import restricted_pickle_load from qlib.log import get_module_logger from qlib.utils.serial import Serializable @@ -283,7 +283,7 @@ class StaticDataLoader(DataLoader, Serializable): self._data = pd.read_parquet(self._config, engine="pyarrow") else: with Path(self._config).open("rb") as f: - self._data = pickle.load(f) + self._data = restricted_pickle_load(f) elif isinstance(self._config, pd.DataFrame): self._data = self._config diff --git a/qlib/utils/mod.py b/qlib/utils/mod.py index 12fbc5870..5cb2ed3f4 100644 --- a/qlib/utils/mod.py +++ b/qlib/utils/mod.py @@ -11,7 +11,6 @@ import contextlib import importlib import os from pathlib import Path -import pickle import pkgutil import re import sys @@ -20,6 +19,7 @@ from typing import Any, Dict, List, Tuple, Union from urllib.parse import urlparse from qlib.typehint import InstConf +from qlib.utils.pickle_utils import restricted_pickle_load def get_module_by_module_path(module_path: Union[str, ModuleType]): @@ -168,10 +168,10 @@ def init_instance_by_config( pr_path = os.path.join(pr.netloc, path) if bool(pr.path) else pr.netloc with open(os.path.normpath(pr_path), "rb") as f: - return pickle.load(f) + return restricted_pickle_load(f) else: with config.open("rb") as f: - return pickle.load(f) + return restricted_pickle_load(f) klass, cls_kwargs = get_callable_kwargs(config, default_module=default_module) diff --git a/qlib/utils/objm.py b/qlib/utils/objm.py index aa9bed564..227adc7f3 100644 --- a/qlib/utils/objm.py +++ b/qlib/utils/objm.py @@ -6,6 +6,7 @@ import tempfile from pathlib import Path from qlib.config import C +from qlib.utils.pickle_utils import restricted_pickle_load class ObjManager: @@ -116,7 +117,7 @@ class FileManager(ObjManager): def load_obj(self, name): with (self.path / name).open("rb") as f: - return pickle.load(f) + return restricted_pickle_load(f) def exists(self, name): return (self.path / name).exists() diff --git a/qlib/workflow/task/manage.py b/qlib/workflow/task/manage.py index 7fe9f58d6..59815b5e2 100644 --- a/qlib/workflow/task/manage.py +++ b/qlib/workflow/task/manage.py @@ -28,6 +28,7 @@ from tqdm.cli import tqdm from .utils import get_mongodb from ...config import C +from ...utils.pickle_utils import restricted_pickle_loads class TaskManager: @@ -131,7 +132,7 @@ class TaskManager: for prefix in self.ENCODE_FIELDS_PREFIX: for k in list(task.keys()): if k.startswith(prefix): - task[k] = pickle.loads(task[k]) + task[k] = restricted_pickle_loads(task[k]) return task def _dict_to_str(self, flt):