mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
fix(security): address reported unsafe pickle.load usages (#2099)
This commit is contained in:
@@ -14,6 +14,7 @@ from qlib.model.meta.task import MetaTask
|
|||||||
from qlib.model.trainer import TrainerR
|
from qlib.model.trainer import TrainerR
|
||||||
from qlib.typehint import Literal
|
from qlib.typehint import Literal
|
||||||
from qlib.utils import init_instance_by_config
|
from qlib.utils import init_instance_by_config
|
||||||
|
from qlib.utils.pickle_utils import restricted_pickle_load
|
||||||
from qlib.workflow import R
|
from qlib.workflow import R
|
||||||
from qlib.workflow.task.utils import replace_task_handler_with_cache
|
from qlib.workflow.task.utils import replace_task_handler_with_cache
|
||||||
|
|
||||||
@@ -298,7 +299,7 @@ class DDGDA(Rolling):
|
|||||||
# but their task test segment are not aligned! It worked in my previous experiment.
|
# but their task test segment are not aligned! It worked in my previous experiment.
|
||||||
# So the misalignment will not affect the effectiveness of the method.
|
# So the misalignment will not affect the effectiveness of the method.
|
||||||
with self._internal_data_path.open("rb") as f:
|
with self._internal_data_path.open("rb") as f:
|
||||||
internal_data = pickle.load(f)
|
internal_data = restricted_pickle_load(f)
|
||||||
|
|
||||||
md = MetaDatasetDS(exp_name=internal_data, **kwargs)
|
md = MetaDatasetDS(exp_name=internal_data, **kwargs)
|
||||||
|
|
||||||
@@ -360,7 +361,7 @@ class DDGDA(Rolling):
|
|||||||
)
|
)
|
||||||
|
|
||||||
with self._internal_data_path.open("rb") as f:
|
with self._internal_data_path.open("rb") as f:
|
||||||
internal_data = pickle.load(f)
|
internal_data = restricted_pickle_load(f)
|
||||||
mds = MetaDatasetDS(exp_name=internal_data, **kwargs)
|
mds = MetaDatasetDS(exp_name=internal_data, **kwargs)
|
||||||
|
|
||||||
# 3) meta model make inference and get new qlib task
|
# 3) meta model make inference and get new qlib task
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import os
|
|||||||
import yaml
|
import yaml
|
||||||
import json
|
import json
|
||||||
import copy
|
import copy
|
||||||
import pickle
|
|
||||||
import logging
|
import logging
|
||||||
import importlib
|
import importlib
|
||||||
import subprocess
|
import subprocess
|
||||||
@@ -18,6 +17,7 @@ import numpy as np
|
|||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
|
|
||||||
from ...log import get_module_logger, TimeInspector
|
from ...log import get_module_logger, TimeInspector
|
||||||
|
from ...utils.pickle_utils import restricted_pickle_load
|
||||||
from hyperopt import fmin, tpe
|
from hyperopt import fmin, tpe
|
||||||
from hyperopt import STATUS_OK, STATUS_FAIL
|
from hyperopt import STATUS_OK, STATUS_FAIL
|
||||||
|
|
||||||
@@ -136,7 +136,7 @@ class QLibTuner(Tuner):
|
|||||||
exp_result_dir = os.path.join(self.ex_dir, QLibTuner.EXP_RESULT_DIR.format(estimator_ex_id))
|
exp_result_dir = os.path.join(self.ex_dir, QLibTuner.EXP_RESULT_DIR.format(estimator_ex_id))
|
||||||
exp_result_path = os.path.join(exp_result_dir, QLibTuner.EXP_RESULT_NAME)
|
exp_result_path = os.path.join(exp_result_dir, QLibTuner.EXP_RESULT_NAME)
|
||||||
with open(exp_result_path, "rb") as fp:
|
with open(exp_result_path, "rb") as fp:
|
||||||
analysis_df = pickle.load(fp)
|
analysis_df = restricted_pickle_load(fp)
|
||||||
|
|
||||||
# 4. Get the backtest factor which user want to optimize, if user want to maximize the factor, then reverse the result
|
# 4. Get the backtest factor which user want to optimize, if user want to maximize the factor, then reverse the result
|
||||||
res = analysis_df.loc[self.optim_config.report_type].loc[self.optim_config.report_factor]
|
res = analysis_df.loc[self.optim_config.report_type].loc[self.optim_config.report_factor]
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ from ..utils import (
|
|||||||
normalize_cache_fields,
|
normalize_cache_fields,
|
||||||
normalize_cache_instruments,
|
normalize_cache_instruments,
|
||||||
)
|
)
|
||||||
|
from ..utils.pickle_utils import restricted_pickle_load
|
||||||
|
|
||||||
from ..log import get_module_logger
|
from ..log import get_module_logger
|
||||||
from .base import Feature
|
from .base import Feature
|
||||||
@@ -225,7 +226,7 @@ class CacheUtils:
|
|||||||
cache_path = Path(cache_path)
|
cache_path = Path(cache_path)
|
||||||
meta_path = cache_path.with_suffix(".meta")
|
meta_path = cache_path.with_suffix(".meta")
|
||||||
with meta_path.open("rb") as f:
|
with meta_path.open("rb") as f:
|
||||||
d = pickle.load(f)
|
d = restricted_pickle_load(f)
|
||||||
with meta_path.open("wb") as f:
|
with meta_path.open("wb") as f:
|
||||||
try:
|
try:
|
||||||
d["meta"]["last_visit"] = str(time.time())
|
d["meta"]["last_visit"] = str(time.time())
|
||||||
@@ -592,7 +593,7 @@ class DiskExpressionCache(ExpressionCache):
|
|||||||
|
|
||||||
with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri())}:expression-{cache_uri}"):
|
with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri())}:expression-{cache_uri}"):
|
||||||
with meta_path.open("rb") as f:
|
with meta_path.open("rb") as f:
|
||||||
d = pickle.load(f)
|
d = restricted_pickle_load(f)
|
||||||
instrument = d["info"]["instrument"]
|
instrument = d["info"]["instrument"]
|
||||||
field = d["info"]["field"]
|
field = d["info"]["field"]
|
||||||
freq = d["info"]["freq"]
|
freq = d["info"]["freq"]
|
||||||
@@ -959,7 +960,7 @@ class DiskDatasetCache(DatasetCache):
|
|||||||
im = DiskDatasetCache.IndexManager(cp_cache_uri)
|
im = DiskDatasetCache.IndexManager(cp_cache_uri)
|
||||||
with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri())}:dataset-{cache_uri}"):
|
with CacheUtils.writer_lock(self.r, f"{str(C.dpm.get_data_uri())}:dataset-{cache_uri}"):
|
||||||
with meta_path.open("rb") as f:
|
with meta_path.open("rb") as f:
|
||||||
d = pickle.load(f)
|
d = restricted_pickle_load(f)
|
||||||
instruments = d["info"]["instruments"]
|
instruments = d["info"]["instruments"]
|
||||||
fields = d["info"]["fields"]
|
fields = d["info"]["fields"]
|
||||||
freq = d["info"]["freq"]
|
freq = d["info"]["freq"]
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
# Licensed under the MIT License.
|
# Licensed under the MIT License.
|
||||||
|
|
||||||
import abc
|
import abc
|
||||||
import pickle
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import warnings
|
import warnings
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@@ -11,6 +10,7 @@ from typing import Tuple, Union, List, Dict
|
|||||||
|
|
||||||
from qlib.data import D
|
from qlib.data import D
|
||||||
from qlib.utils import load_dataset, init_instance_by_config, time_to_slc_point
|
from qlib.utils import load_dataset, init_instance_by_config, time_to_slc_point
|
||||||
|
from qlib.utils.pickle_utils import restricted_pickle_load
|
||||||
from qlib.log import get_module_logger
|
from qlib.log import get_module_logger
|
||||||
from qlib.utils.serial import Serializable
|
from qlib.utils.serial import Serializable
|
||||||
|
|
||||||
@@ -283,7 +283,7 @@ class StaticDataLoader(DataLoader, Serializable):
|
|||||||
self._data = pd.read_parquet(self._config, engine="pyarrow")
|
self._data = pd.read_parquet(self._config, engine="pyarrow")
|
||||||
else:
|
else:
|
||||||
with Path(self._config).open("rb") as f:
|
with Path(self._config).open("rb") as f:
|
||||||
self._data = pickle.load(f)
|
self._data = restricted_pickle_load(f)
|
||||||
elif isinstance(self._config, pd.DataFrame):
|
elif isinstance(self._config, pd.DataFrame):
|
||||||
self._data = self._config
|
self._data = self._config
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,6 @@ import contextlib
|
|||||||
import importlib
|
import importlib
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import pickle
|
|
||||||
import pkgutil
|
import pkgutil
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
@@ -20,6 +19,7 @@ from typing import Any, Dict, List, Tuple, Union
|
|||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from qlib.typehint import InstConf
|
from qlib.typehint import InstConf
|
||||||
|
from qlib.utils.pickle_utils import restricted_pickle_load
|
||||||
|
|
||||||
|
|
||||||
def get_module_by_module_path(module_path: Union[str, ModuleType]):
|
def get_module_by_module_path(module_path: Union[str, ModuleType]):
|
||||||
@@ -168,10 +168,10 @@ def init_instance_by_config(
|
|||||||
|
|
||||||
pr_path = os.path.join(pr.netloc, path) if bool(pr.path) else pr.netloc
|
pr_path = os.path.join(pr.netloc, path) if bool(pr.path) else pr.netloc
|
||||||
with open(os.path.normpath(pr_path), "rb") as f:
|
with open(os.path.normpath(pr_path), "rb") as f:
|
||||||
return pickle.load(f)
|
return restricted_pickle_load(f)
|
||||||
else:
|
else:
|
||||||
with config.open("rb") as f:
|
with config.open("rb") as f:
|
||||||
return pickle.load(f)
|
return restricted_pickle_load(f)
|
||||||
|
|
||||||
klass, cls_kwargs = get_callable_kwargs(config, default_module=default_module)
|
klass, cls_kwargs = get_callable_kwargs(config, default_module=default_module)
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import tempfile
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from qlib.config import C
|
from qlib.config import C
|
||||||
|
from qlib.utils.pickle_utils import restricted_pickle_load
|
||||||
|
|
||||||
|
|
||||||
class ObjManager:
|
class ObjManager:
|
||||||
@@ -116,7 +117,7 @@ class FileManager(ObjManager):
|
|||||||
|
|
||||||
def load_obj(self, name):
|
def load_obj(self, name):
|
||||||
with (self.path / name).open("rb") as f:
|
with (self.path / name).open("rb") as f:
|
||||||
return pickle.load(f)
|
return restricted_pickle_load(f)
|
||||||
|
|
||||||
def exists(self, name):
|
def exists(self, name):
|
||||||
return (self.path / name).exists()
|
return (self.path / name).exists()
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ from tqdm.cli import tqdm
|
|||||||
|
|
||||||
from .utils import get_mongodb
|
from .utils import get_mongodb
|
||||||
from ...config import C
|
from ...config import C
|
||||||
|
from ...utils.pickle_utils import restricted_pickle_loads
|
||||||
|
|
||||||
|
|
||||||
class TaskManager:
|
class TaskManager:
|
||||||
@@ -131,7 +132,7 @@ class TaskManager:
|
|||||||
for prefix in self.ENCODE_FIELDS_PREFIX:
|
for prefix in self.ENCODE_FIELDS_PREFIX:
|
||||||
for k in list(task.keys()):
|
for k in list(task.keys()):
|
||||||
if k.startswith(prefix):
|
if k.startswith(prefix):
|
||||||
task[k] = pickle.loads(task[k])
|
task[k] = restricted_pickle_loads(task[k])
|
||||||
return task
|
return task
|
||||||
|
|
||||||
def _dict_to_str(self, flt):
|
def _dict_to_str(self, flt):
|
||||||
|
|||||||
Reference in New Issue
Block a user