1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00

fix(security): address reported unsafe pickle.load usages (#2099)

This commit is contained in:
Linlang
2026-01-28 22:19:43 +08:00
committed by GitHub
parent 16acb76aba
commit 39634b2158
7 changed files with 18 additions and 14 deletions

View File

@@ -14,6 +14,7 @@ from qlib.model.meta.task import MetaTask
from qlib.model.trainer import TrainerR
from qlib.typehint import Literal
from qlib.utils import init_instance_by_config
from qlib.utils.pickle_utils import restricted_pickle_load
from qlib.workflow import R
from qlib.workflow.task.utils import replace_task_handler_with_cache
@@ -298,7 +299,7 @@ class DDGDA(Rolling):
# but their task test segment are not aligned! It worked in my previous experiment.
# So the misalignment will not affect the effectiveness of the method.
with self._internal_data_path.open("rb") as f:
internal_data = pickle.load(f)
internal_data = restricted_pickle_load(f)
md = MetaDatasetDS(exp_name=internal_data, **kwargs)
@@ -360,7 +361,7 @@ class DDGDA(Rolling):
)
with self._internal_data_path.open("rb") as f:
internal_data = pickle.load(f)
internal_data = restricted_pickle_load(f)
mds = MetaDatasetDS(exp_name=internal_data, **kwargs)
# 3) meta model make inference and get new qlib task

View File

@@ -8,7 +8,6 @@ import os
import yaml
import json
import copy
import pickle
import logging
import importlib
import subprocess
@@ -18,6 +17,7 @@ import numpy as np
from abc import abstractmethod
from ...log import get_module_logger, TimeInspector
from ...utils.pickle_utils import restricted_pickle_load
from hyperopt import fmin, tpe
from hyperopt import STATUS_OK, STATUS_FAIL
@@ -136,7 +136,7 @@ class QLibTuner(Tuner):
exp_result_dir = os.path.join(self.ex_dir, QLibTuner.EXP_RESULT_DIR.format(estimator_ex_id))
exp_result_path = os.path.join(exp_result_dir, QLibTuner.EXP_RESULT_NAME)
with open(exp_result_path, "rb") as fp:
analysis_df = pickle.load(fp)
analysis_df = restricted_pickle_load(fp)
# 4. Get the backtest factor which user want to optimize, if user want to maximize the factor, then reverse the result
res = analysis_df.loc[self.optim_config.report_type].loc[self.optim_config.report_factor]

View File

@@ -30,6 +30,7 @@ from ..utils import (
normalize_cache_fields,
normalize_cache_instruments,
)
from ..utils.pickle_utils import restricted_pickle_load
from ..log import get_module_logger
from .base import Feature
@@ -225,7 +226,7 @@ class CacheUtils:
cache_path = Path(cache_path)
meta_path = cache_path.with_suffix(".meta")
with meta_path.open("rb") as f:
d = pickle.load(f)
d = restricted_pickle_load(f)
with meta_path.open("wb") as f:
try:
d["meta"]["last_visit"] = str(time.time())
@@ -592,7 +593,7 @@ class DiskExpressionCache(ExpressionCache):
with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri())}:expression-{cache_uri}"):
with meta_path.open("rb") as f:
d = pickle.load(f)
d = restricted_pickle_load(f)
instrument = d["info"]["instrument"]
field = d["info"]["field"]
freq = d["info"]["freq"]
@@ -959,7 +960,7 @@ class DiskDatasetCache(DatasetCache):
im = DiskDatasetCache.IndexManager(cp_cache_uri)
with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri())}:dataset-{cache_uri}"):
with meta_path.open("rb") as f:
d = pickle.load(f)
d = restricted_pickle_load(f)
instruments = d["info"]["instruments"]
fields = d["info"]["fields"]
freq = d["info"]["freq"]

View File

@@ -2,7 +2,6 @@
# Licensed under the MIT License.
import abc
import pickle
from pathlib import Path
import warnings
import pandas as pd
@@ -11,6 +10,7 @@ from typing import Tuple, Union, List, Dict
from qlib.data import D
from qlib.utils import load_dataset, init_instance_by_config, time_to_slc_point
from qlib.utils.pickle_utils import restricted_pickle_load
from qlib.log import get_module_logger
from qlib.utils.serial import Serializable
@@ -283,7 +283,7 @@ class StaticDataLoader(DataLoader, Serializable):
self._data = pd.read_parquet(self._config, engine="pyarrow")
else:
with Path(self._config).open("rb") as f:
self._data = pickle.load(f)
self._data = restricted_pickle_load(f)
elif isinstance(self._config, pd.DataFrame):
self._data = self._config

View File

@@ -11,7 +11,6 @@ import contextlib
import importlib
import os
from pathlib import Path
import pickle
import pkgutil
import re
import sys
@@ -20,6 +19,7 @@ from typing import Any, Dict, List, Tuple, Union
from urllib.parse import urlparse
from qlib.typehint import InstConf
from qlib.utils.pickle_utils import restricted_pickle_load
def get_module_by_module_path(module_path: Union[str, ModuleType]):
@@ -168,10 +168,10 @@ def init_instance_by_config(
pr_path = os.path.join(pr.netloc, path) if bool(pr.path) else pr.netloc
with open(os.path.normpath(pr_path), "rb") as f:
return pickle.load(f)
return restricted_pickle_load(f)
else:
with config.open("rb") as f:
return pickle.load(f)
return restricted_pickle_load(f)
klass, cls_kwargs = get_callable_kwargs(config, default_module=default_module)

View File

@@ -6,6 +6,7 @@ import tempfile
from pathlib import Path
from qlib.config import C
from qlib.utils.pickle_utils import restricted_pickle_load
class ObjManager:
@@ -116,7 +117,7 @@ class FileManager(ObjManager):
def load_obj(self, name):
with (self.path / name).open("rb") as f:
return pickle.load(f)
return restricted_pickle_load(f)
def exists(self, name):
return (self.path / name).exists()

View File

@@ -28,6 +28,7 @@ from tqdm.cli import tqdm
from .utils import get_mongodb
from ...config import C
from ...utils.pickle_utils import restricted_pickle_loads
class TaskManager:
@@ -131,7 +132,7 @@ class TaskManager:
for prefix in self.ENCODE_FIELDS_PREFIX:
for k in list(task.keys()):
if k.startswith(prefix):
task[k] = pickle.loads(task[k])
task[k] = restricted_pickle_loads(task[k])
return task
def _dict_to_str(self, flt):