1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

fix(security): address reported unsafe pickle.load usages (#2099)

This commit is contained in:
Linlang
2026-01-28 22:19:43 +08:00
committed by GitHub
parent 16acb76aba
commit 39634b2158
7 changed files with 18 additions and 14 deletions

View File

@@ -14,6 +14,7 @@ from qlib.model.meta.task import MetaTask
from qlib.model.trainer import TrainerR from qlib.model.trainer import TrainerR
from qlib.typehint import Literal from qlib.typehint import Literal
from qlib.utils import init_instance_by_config from qlib.utils import init_instance_by_config
from qlib.utils.pickle_utils import restricted_pickle_load
from qlib.workflow import R from qlib.workflow import R
from qlib.workflow.task.utils import replace_task_handler_with_cache from qlib.workflow.task.utils import replace_task_handler_with_cache
@@ -298,7 +299,7 @@ class DDGDA(Rolling):
# but their task test segment are not aligned! It worked in my previous experiment. # but their task test segment are not aligned! It worked in my previous experiment.
# So the misalignment will not affect the effectiveness of the method. # So the misalignment will not affect the effectiveness of the method.
with self._internal_data_path.open("rb") as f: with self._internal_data_path.open("rb") as f:
internal_data = pickle.load(f) internal_data = restricted_pickle_load(f)
md = MetaDatasetDS(exp_name=internal_data, **kwargs) md = MetaDatasetDS(exp_name=internal_data, **kwargs)
@@ -360,7 +361,7 @@ class DDGDA(Rolling):
) )
with self._internal_data_path.open("rb") as f: with self._internal_data_path.open("rb") as f:
internal_data = pickle.load(f) internal_data = restricted_pickle_load(f)
mds = MetaDatasetDS(exp_name=internal_data, **kwargs) mds = MetaDatasetDS(exp_name=internal_data, **kwargs)
# 3) meta model make inference and get new qlib task # 3) meta model make inference and get new qlib task

View File

@@ -8,7 +8,6 @@ import os
import yaml import yaml
import json import json
import copy import copy
import pickle
import logging import logging
import importlib import importlib
import subprocess import subprocess
@@ -18,6 +17,7 @@ import numpy as np
from abc import abstractmethod from abc import abstractmethod
from ...log import get_module_logger, TimeInspector from ...log import get_module_logger, TimeInspector
from ...utils.pickle_utils import restricted_pickle_load
from hyperopt import fmin, tpe from hyperopt import fmin, tpe
from hyperopt import STATUS_OK, STATUS_FAIL from hyperopt import STATUS_OK, STATUS_FAIL
@@ -136,7 +136,7 @@ class QLibTuner(Tuner):
exp_result_dir = os.path.join(self.ex_dir, QLibTuner.EXP_RESULT_DIR.format(estimator_ex_id)) exp_result_dir = os.path.join(self.ex_dir, QLibTuner.EXP_RESULT_DIR.format(estimator_ex_id))
exp_result_path = os.path.join(exp_result_dir, QLibTuner.EXP_RESULT_NAME) exp_result_path = os.path.join(exp_result_dir, QLibTuner.EXP_RESULT_NAME)
with open(exp_result_path, "rb") as fp: with open(exp_result_path, "rb") as fp:
analysis_df = pickle.load(fp) analysis_df = restricted_pickle_load(fp)
# 4. Get the backtest factor which user want to optimize, if user want to maximize the factor, then reverse the result # 4. Get the backtest factor which user want to optimize, if user want to maximize the factor, then reverse the result
res = analysis_df.loc[self.optim_config.report_type].loc[self.optim_config.report_factor] res = analysis_df.loc[self.optim_config.report_type].loc[self.optim_config.report_factor]

View File

@@ -30,6 +30,7 @@ from ..utils import (
normalize_cache_fields, normalize_cache_fields,
normalize_cache_instruments, normalize_cache_instruments,
) )
from ..utils.pickle_utils import restricted_pickle_load
from ..log import get_module_logger from ..log import get_module_logger
from .base import Feature from .base import Feature
@@ -225,7 +226,7 @@ class CacheUtils:
cache_path = Path(cache_path) cache_path = Path(cache_path)
meta_path = cache_path.with_suffix(".meta") meta_path = cache_path.with_suffix(".meta")
with meta_path.open("rb") as f: with meta_path.open("rb") as f:
d = pickle.load(f) d = restricted_pickle_load(f)
with meta_path.open("wb") as f: with meta_path.open("wb") as f:
try: try:
d["meta"]["last_visit"] = str(time.time()) d["meta"]["last_visit"] = str(time.time())
@@ -592,7 +593,7 @@ class DiskExpressionCache(ExpressionCache):
with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri())}:expression-{cache_uri}"): with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri())}:expression-{cache_uri}"):
with meta_path.open("rb") as f: with meta_path.open("rb") as f:
d = pickle.load(f) d = restricted_pickle_load(f)
instrument = d["info"]["instrument"] instrument = d["info"]["instrument"]
field = d["info"]["field"] field = d["info"]["field"]
freq = d["info"]["freq"] freq = d["info"]["freq"]
@@ -959,7 +960,7 @@ class DiskDatasetCache(DatasetCache):
im = DiskDatasetCache.IndexManager(cp_cache_uri) im = DiskDatasetCache.IndexManager(cp_cache_uri)
with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri())}:dataset-{cache_uri}"): with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri())}:dataset-{cache_uri}"):
with meta_path.open("rb") as f: with meta_path.open("rb") as f:
d = pickle.load(f) d = restricted_pickle_load(f)
instruments = d["info"]["instruments"] instruments = d["info"]["instruments"]
fields = d["info"]["fields"] fields = d["info"]["fields"]
freq = d["info"]["freq"] freq = d["info"]["freq"]

View File

@@ -2,7 +2,6 @@
# Licensed under the MIT License. # Licensed under the MIT License.
import abc import abc
import pickle
from pathlib import Path from pathlib import Path
import warnings import warnings
import pandas as pd import pandas as pd
@@ -11,6 +10,7 @@ from typing import Tuple, Union, List, Dict
from qlib.data import D from qlib.data import D
from qlib.utils import load_dataset, init_instance_by_config, time_to_slc_point from qlib.utils import load_dataset, init_instance_by_config, time_to_slc_point
from qlib.utils.pickle_utils import restricted_pickle_load
from qlib.log import get_module_logger from qlib.log import get_module_logger
from qlib.utils.serial import Serializable from qlib.utils.serial import Serializable
@@ -283,7 +283,7 @@ class StaticDataLoader(DataLoader, Serializable):
self._data = pd.read_parquet(self._config, engine="pyarrow") self._data = pd.read_parquet(self._config, engine="pyarrow")
else: else:
with Path(self._config).open("rb") as f: with Path(self._config).open("rb") as f:
self._data = pickle.load(f) self._data = restricted_pickle_load(f)
elif isinstance(self._config, pd.DataFrame): elif isinstance(self._config, pd.DataFrame):
self._data = self._config self._data = self._config

View File

@@ -11,7 +11,6 @@ import contextlib
import importlib import importlib
import os import os
from pathlib import Path from pathlib import Path
import pickle
import pkgutil import pkgutil
import re import re
import sys import sys
@@ -20,6 +19,7 @@ from typing import Any, Dict, List, Tuple, Union
from urllib.parse import urlparse from urllib.parse import urlparse
from qlib.typehint import InstConf from qlib.typehint import InstConf
from qlib.utils.pickle_utils import restricted_pickle_load
def get_module_by_module_path(module_path: Union[str, ModuleType]): def get_module_by_module_path(module_path: Union[str, ModuleType]):
@@ -168,10 +168,10 @@ def init_instance_by_config(
pr_path = os.path.join(pr.netloc, path) if bool(pr.path) else pr.netloc pr_path = os.path.join(pr.netloc, path) if bool(pr.path) else pr.netloc
with open(os.path.normpath(pr_path), "rb") as f: with open(os.path.normpath(pr_path), "rb") as f:
return pickle.load(f) return restricted_pickle_load(f)
else: else:
with config.open("rb") as f: with config.open("rb") as f:
return pickle.load(f) return restricted_pickle_load(f)
klass, cls_kwargs = get_callable_kwargs(config, default_module=default_module) klass, cls_kwargs = get_callable_kwargs(config, default_module=default_module)

View File

@@ -6,6 +6,7 @@ import tempfile
from pathlib import Path from pathlib import Path
from qlib.config import C from qlib.config import C
from qlib.utils.pickle_utils import restricted_pickle_load
class ObjManager: class ObjManager:
@@ -116,7 +117,7 @@ class FileManager(ObjManager):
def load_obj(self, name): def load_obj(self, name):
with (self.path / name).open("rb") as f: with (self.path / name).open("rb") as f:
return pickle.load(f) return restricted_pickle_load(f)
def exists(self, name): def exists(self, name):
return (self.path / name).exists() return (self.path / name).exists()

View File

@@ -28,6 +28,7 @@ from tqdm.cli import tqdm
from .utils import get_mongodb from .utils import get_mongodb
from ...config import C from ...config import C
from ...utils.pickle_utils import restricted_pickle_loads
class TaskManager: class TaskManager:
@@ -131,7 +132,7 @@ class TaskManager:
for prefix in self.ENCODE_FIELDS_PREFIX: for prefix in self.ENCODE_FIELDS_PREFIX:
for k in list(task.keys()): for k in list(task.keys()):
if k.startswith(prefix): if k.startswith(prefix):
task[k] = pickle.loads(task[k]) task[k] = restricted_pickle_loads(task[k])
return task return task
def _dict_to_str(self, flt): def _dict_to_str(self, flt):