From c276de40406022690af5fefe7ce0b9c9d11d8090 Mon Sep 17 00:00:00 2001
From: Pengrong Zhu <zhu.pengrong@foxmail.com>
Date: Tue, 7 Dec 2021 19:04:23 +0800
Subject: [PATCH] Fix backtest (#719)

* modify FileStorage to support multiple freqs

* modify backtest's sample documentation

* change the logging level of read data exception from error to debug

* fix the backtest exception when volume is 0 or np.nan

* fix test_storage.py

* add backtest_daily

* modify backtest_daily's docstring

* add __repr__/__str__ to Position

* fix the bug of nested_decision_execution example

Co-authored-by: Young <afe.young@gmail.com>
Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>
---
 docs/component/strategy.rst                   | 140 ++++++++++---
 .../nested_decision_execution/workflow.py     | 192 +++++++++++++++++-
 qlib/backtest/__init__.py                     |   6 +-
 qlib/backtest/account.py                      |  18 +-
 qlib/backtest/exchange.py                     |   8 +-
 qlib/backtest/executor.py                     |   2 +-
 qlib/backtest/position.py                     |   6 +
 qlib/backtest/utils.py                        |   4 +-
 qlib/config.py                                |  45 ++--
 qlib/contrib/evaluate.py                      | 189 ++++++++++-------
 .../report/analysis_position/report.py        |  55 ++++-
 .../report/analysis_position/risk_analysis.py |  70 +++++--
 qlib/data/base.py                             |   2 +-
 qlib/data/data.py                             |  35 +---
 qlib/data/ops.py                              |   6 +-
 qlib/data/storage/file_storage.py             |  98 +++++----
 qlib/data/storage/storage.py                  |   3 +-
 tests/storage_tests/test_storage.py           |  15 +-
 tests/test_all_pipeline.py                    |   1 +
 19 files changed, 663 insertions(+), 232 deletions(-)
diff --git a/docs/component/strategy.rst b/docs/component/strategy.rst
index c9d002ca1..5e58dcf37 100644
--- a/docs/component/strategy.rst
+++ b/docs/component/strategy.rst
@@ -84,31 +84,125 @@ Usage & Example
 ====================
 ``Portfolio Strategy`` can be specified in the ``Intraday Trading(Backtest)``, the example is as follows.
 
-.. code-block:: python
+- daily
 
-    from qlib.contrib.strategy.strategy import TopkDropoutStrategy
-    from qlib.contrib.evaluate import backtest
-    STRATEGY_CONFIG = {
-        "topk": 50,
-        "n_drop": 5,
-    }
-    BACKTEST_CONFIG = {
-        "limit_threshold": 0.095,
-        "account": 100000000,
-        "benchmark": BENCHMARK,
-        "deal_price": "close",
-        "open_cost": 0.0005,
-        "close_cost": 0.0015,
-        "min_cost": 5,
-        
-    }
-    # use default strategy
-    strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
+    .. code-block:: python
+
+        from pprint import pprint
+
+        import qlib
+        import pandas as pd
+        from qlib.utils.time import Freq
+        from qlib.utils import flatten_dict
+        from qlib.contrib.evaluate import backtest_daily
+        from qlib.contrib.evaluate import risk_analysis
+        from qlib.contrib.strategy import TopkDropoutStrategy
+
+        # init qlib
+        qlib.init(provider_uri=<qlib data dir>)
+
+        CSI300_BENCH = "SH000300"
+        STRATEGY_CONFIG = {
+            "topk": 50,
+            "n_drop": 5,
+            # pred_score, pd.Series
+            "signal": pred_score,
+        }
+
+
+        strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
+        report_normal, positions_normal = backtest_daily(
+            start_time="2017-01-01", end_time="2020-08-01", strategy=strategy_obj
+        )
+        analysis = dict()
+        analysis["excess_return_without_cost"] = risk_analysis(
+            report_normal["return"] - report_normal["bench"], freq=analysis_freq
+        )
+        analysis["excess_return_with_cost"] = risk_analysis(
+            report_normal["return"] - report_normal["bench"] - report_normal["cost"], freq=analysis_freq
+        )
+
+        analysis_df = pd.concat(analysis)  # type: pd.DataFrame
+        pprint(analysis_df)
+
+
+
+- nested decision execution
+
+    .. code-block:: python
+
+        from pprint import pprint
+
+        import qlib
+        import pandas as pd
+        from qlib.utils.time import Freq
+        from qlib.utils import flatten_dict
+        from qlib.backtest import backtest, executor
+        from qlib.contrib.evaluate import risk_analysis
+        from qlib.contrib.strategy import TopkDropoutStrategy
+
+        # init qlib
+        qlib.init(provider_uri=<qlib data dir>)
+
+        CSI300_BENCH = "SH000300"
+        FREQ = "day"
+        STRATEGY_CONFIG = {
+            "topk": 50,
+            "n_drop": 5,
+            # pred_score, pd.Series
+            "signal": pred_score,
+        }
+
+        EXECUTOR_CONFIG = {
+            "time_per_step": "day",
+            "generate_portfolio_metrics": True,
+        }
+
+        backtest_config = {
+            "start_time": "2017-01-01",
+            "end_time": "2020-08-01",
+            "account": 100000000,
+            "benchmark": CSI300_BENCH,
+            "exchange_kwargs": {
+                "freq": FREQ,
+                "limit_threshold": 0.095,
+                "deal_price": "close",
+                "open_cost": 0.0005,
+                "close_cost": 0.0015,
+                "min_cost": 5,
+            },
+        }
+
+        # strategy object
+        strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
+        # executor object
+        executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
+        # backtest
+        portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
+        analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))
+        # backtest info
+        report_normal, positions_normal = portfolio_metric_dict.get(analysis_freq)
+
+        # analysis
+        analysis = dict()
+        analysis["excess_return_without_cost"] = risk_analysis(
+            report_normal["return"] - report_normal["bench"], freq=analysis_freq
+        )
+        analysis["excess_return_with_cost"] = risk_analysis(
+            report_normal["return"] - report_normal["bench"] - report_normal["cost"], freq=analysis_freq
+        )
+
+        analysis_df = pd.concat(analysis)  # type: pd.DataFrame
+        # log metrics
+        analysis_dict = flatten_dict(analysis_df["risk"].unstack().T.to_dict())
+        # print out results
+        pprint(f"The following are analysis results of benchmark return({analysis_freq}).")
+        pprint(risk_analysis(report_normal["bench"], freq=analysis_freq))
+        pprint(f"The following are analysis results of the excess return without cost({analysis_freq}).")
+        pprint(analysis["excess_return_without_cost"])
+        pprint(f"The following are analysis results of the excess return with cost({analysis_freq}).")
+        pprint(analysis["excess_return_with_cost"])
 
-    # pred_score is the `prediction score` output by Model
-    report_normal, positions_normal = backtest(
-        pred_score, strategy=strategy, **BACKTEST_CONFIG
-    )
 
 To know more about the `prediction score` `pred_score` output by ``Forecast Model``, please refer to `Forecast Model: Model Training & Prediction <model.html>`_.
 
diff --git a/examples/nested_decision_execution/workflow.py b/examples/nested_decision_execution/workflow.py
index d7f5fc813..6cd642e42 100644
--- a/examples/nested_decision_execution/workflow.py
+++ b/examples/nested_decision_execution/workflow.py
@@ -1,9 +1,105 @@
 #  Copyright (c) Microsoft Corporation.
 #  Licensed under the MIT License.
+"""
+The expect result of `backtest` is following in current version
+
+'The following are analysis results of benchmark return(1day).'
+                       risk
+mean               0.000651
+std                0.012472
+annualized_return  0.154967
+information_ratio  0.805422
+max_drawdown      -0.160445
+'The following are analysis results of the excess return without cost(1day).'
+                       risk
+mean               0.001258
+std                0.007575
+annualized_return  0.299303
+information_ratio  2.561219
+max_drawdown      -0.068386
+'The following are analysis results of the excess return with cost(1day).'
+                       risk
+mean               0.001110
+std                0.007575
+annualized_return  0.264280
+information_ratio  2.261392
+max_drawdown      -0.071842
+[1706497:MainThread](2021-12-07 14:08:30,263) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_30minute.
+pkl' has been saved as the artifact of the Experiment 2
+'The following are analysis results of benchmark return(30minute).'
+                       risk
+mean               0.000078
+std                0.003646
+annualized_return  0.148787
+information_ratio  0.935252
+max_drawdown      -0.142830
+('The following are analysis results of the excess return without '
+ 'cost(30minute).')
+                       risk
+mean               0.000174
+std                0.003343
+annualized_return  0.331867
+information_ratio  2.275019
+max_drawdown      -0.074752
+'The following are analysis results of the excess return with cost(30minute).'
+                       risk
+mean               0.000155
+std                0.003343
+annualized_return  0.294536
+information_ratio  2.018860
+max_drawdown      -0.075579
+[1706497:MainThread](2021-12-07 14:08:30,277) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_5minute.p
+kl' has been saved as the artifact of the Experiment 2
+'The following are analysis results of benchmark return(5minute).'
+                       risk
+mean               0.000015
+std                0.001460
+annualized_return  0.172170
+information_ratio  1.103439
+max_drawdown      -0.144807
+'The following are analysis results of the excess return without cost(5minute).'
+                       risk
+mean               0.000028
+std                0.001412
+annualized_return  0.319771
+information_ratio  2.119563
+max_drawdown      -0.077426
+'The following are analysis results of the excess return with cost(5minute).'
+                       risk
+mean               0.000025
+std                0.001412
+annualized_return  0.281536
+information_ratio  1.866091
+max_drawdown      -0.078194
+[1706497:MainThread](2021-12-07 14:08:30,287) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_1day
+.pkl' has been saved as the artifact of the Experiment 2
+'The following are analysis results of indicators(1day).'
+        value
+ffr  0.945821
+pa   0.000324
+pos  0.542882
+[1706497:MainThread](2021-12-07 14:08:30,293) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_30mi
+nute.pkl' has been saved as the artifact of the Experiment 2
+'The following are analysis results of indicators(30minute).'
+        value
+ffr  0.982910
+pa   0.000037
+pos  0.500806
+[1706497:MainThread](2021-12-07 14:08:30,302) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_5min
+ute.pkl' has been saved as the artifact of the Experiment 2
+'The following are analysis results of indicators(5minute).'
+        value
+ffr  0.991017
+pa   0.000000
+pos  0.000000
+[1706497:MainThread](2021-12-07 14:08:30,627) INFO - qlib.timer - [log.py:113] - Time cost: 0.014s | waiting `async_log` Done
+"""
 
 
+from copy import deepcopy
 import qlib
 import fire
+import pandas as pd
 from qlib.config import REG_CN, HIGH_FREQ_CONFIG
 from qlib.data import D
 from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
@@ -14,6 +110,13 @@ from qlib.backtest import collect_data
 
 
 class NestedDecisionExecutionWorkflow:
+    # TODO: add test for nested workflow.
+    # 1) comparing same backtest
+    # - Basic test idea: the shared accumulated value are equal in multiple levels
+    #   - Aligning the profit calculation between multiple levels and single levels.
+    # 2) comparing different backtest
+    # - Basic test idea:
+    #   - the daily backtest will be similar as multi-level(the data quality makes this gap samller)
 
     market = "csi300"
     benchmark = "SH000300"
@@ -167,8 +270,6 @@ class NestedDecisionExecutionWorkflow:
             par = PortAnaRecord(
                 recorder,
                 self.port_analysis_config,
-                risk_analysis_freq=["day", "30min", "5min"],
-                indicator_analysis_freq=["day", "30min", "5min"],
                 indicator_analysis_method="value_weighted",
             )
             par.generate()
@@ -199,6 +300,93 @@ class NestedDecisionExecutionWorkflow:
         for trade_decision in data_generator:
             print(trade_decision)
 
+    # the code below are for checking, users don't have to care about it
+    def check_diff_freq(self):
+        self._init_qlib()
+        exp = R.get_exp(experiment_name="backtest")
+        rec = next(iter(exp.list_recorders().values()))  # assuming this will get the latest recorder
+        for check_key in "account", "total_turnover", "total_cost":
+            check_key = "total_cost"
+
+            acc_dict = {}
+            for freq in ["30minute", "5minute", "1day"]:
+                acc_dict[freq] = rec.load_object(f"portfolio_analysis/report_normal_{freq}.pkl")[check_key]
+            acc_df = pd.DataFrame(acc_dict)
+            acc_resam = acc_df.resample("1d").last().dropna()
+            assert (acc_resam["30minute"] == acc_resam["1day"]).all()
+
+    def backtest_only_daily(self):
+        """
+        This backtest is used for comparing the nested execution and single layer execution
+        Due to the low quality daily-level and miniute-level data, they are hardly comparable.
+        So it is used for detecting serious bugs which make the results different greatly.
+
+        .. code-block:: shell
+
+            [1724971:MainThread](2021-12-07 16:24:31,156) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_1day.pkl'
+            has been saved as the artifact of the Experiment 2
+            'The following are analysis results of benchmark return(1day).'
+                                   risk
+            mean               0.000651
+            std                0.012472
+            annualized_return  0.154967
+            information_ratio  0.805422
+            max_drawdown      -0.160445
+            'The following are analysis results of the excess return without cost(1day).'
+                                   risk
+            mean               0.001375
+            std                0.006103
+            annualized_return  0.327204
+            information_ratio  3.475016
+            max_drawdown      -0.024927
+            'The following are analysis results of the excess return with cost(1day).'
+                                   risk
+            mean               0.001184
+            std                0.006091
+            annualized_return  0.281801
+            information_ratio  2.998749
+            max_drawdown      -0.029568
+            [1724971:MainThread](2021-12-07 16:24:31,170) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_1day.
+            pkl' has been saved as the artifact of the Experiment 2
+            'The following are analysis results of indicators(1day).'
+                 value
+            ffr    1.0
+            pa     0.0
+            pos    0.0
+            [1724971:MainThread](2021-12-07 16:24:31,188) INFO - qlib.timer - [log.py:113] - Time cost: 0.007s | waiting `async_log` Done
+
+        """
+        self._init_qlib()
+        model = init_instance_by_config(self.task["model"])
+        dataset = init_instance_by_config(self.task["dataset"])
+        self._train_model(model, dataset)
+        strategy_config = {
+            "class": "TopkDropoutStrategy",
+            "module_path": "qlib.contrib.strategy.signal_strategy",
+            "kwargs": {
+                "signal": (model, dataset),
+                "topk": 50,
+                "n_drop": 5,
+            },
+        }
+        pa_conf = deepcopy(self.port_analysis_config)
+        pa_conf["strategy"] = strategy_config
+        pa_conf["executor"] = {
+            "class": "SimulatorExecutor",
+            "module_path": "qlib.backtest.executor",
+            "kwargs": {
+                "time_per_step": "day",
+                "generate_portfolio_metrics": True,
+                "verbose": True,
+            },
+        }
+        pa_conf["backtest"]["benchmark"] = self.benchmark
+
+        with R.start(experiment_name="backtest"):
+            recorder = R.get_recorder()
+            par = PortAnaRecord(recorder, pa_conf)
+            par.generate()
+
 
 if __name__ == "__main__":
     fire.Fire(NestedDecisionExecutionWorkflow)
diff --git a/qlib/backtest/__init__.py b/qlib/backtest/__init__.py
index 7c479f28d..b1d92c5a5 100644
--- a/qlib/backtest/__init__.py
+++ b/qlib/backtest/__init__.py
@@ -186,8 +186,10 @@ def get_strategy_executor(
     trade_exchange = get_exchange(**exchange_kwargs)
 
     common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=trade_exchange)
-    trade_strategy = init_instance_by_config(strategy, accept_types=BaseStrategy, common_infra=common_infra)
-    trade_executor = init_instance_by_config(executor, accept_types=BaseExecutor, common_infra=common_infra)
+    trade_strategy = init_instance_by_config(strategy, accept_types=BaseStrategy)
+    trade_strategy.reset_common_infra(common_infra)
+    trade_executor = init_instance_by_config(executor, accept_types=BaseExecutor)
+    trade_executor.reset_common_infra(common_infra)
 
     return trade_strategy, trade_executor
 
diff --git a/qlib/backtest/account.py b/qlib/backtest/account.py
index aa503ebc2..41fd43cb8 100644
--- a/qlib/backtest/account.py
+++ b/qlib/backtest/account.py
@@ -29,7 +29,10 @@ rtn & earning in the Account
 
 
 class AccumulatedInfo:
-    """accumulated trading info, including accumulated return/cost/turnover"""
+    """
+    accumulated trading info, including accumulated return/cost/turnover
+    AccumulatedInfo should be shared accross different levels
+    """
 
     def __init__(self):
         self.reset()
@@ -62,6 +65,11 @@ class AccumulatedInfo:
 
 
 class Account:
+    """
+    The correctness of the metrics of Account in nested execution depends on the shallow copy of `trade_account` in qlib/backtest/executor.py:NestedExecutor
+    Different level of executor has different Account object when calculating metrics. But the position object is shared cross all the Account object.
+    """
+
     def __init__(
         self,
         init_cash: float = 1e9,
@@ -95,6 +103,8 @@ class Account:
         self.init_vars(init_cash, position_dict, freq, benchmark_config)
 
     def init_vars(self, init_cash, position_dict, freq: str, benchmark_config: dict):
+        # 1) the following variables are shared by multiple layers
+        # - you will see a shallow copy instead of deepcopy in the NestedExecutor;
         self.init_cash = init_cash
         self.current_position: BasePosition = init_instance_by_config(
             {
@@ -106,6 +116,9 @@ class Account:
                 "module_path": "qlib.backtest.position",
             }
         )
+        self.accum_info = AccumulatedInfo()
+
+        # 2) following variables are not shared between layers
         self.portfolio_metrics = None
         self.hist_positions = {}
         self.reset(freq=freq, benchmark_config=benchmark_config)
@@ -119,7 +132,8 @@ class Account:
     def reset_report(self, freq, benchmark_config):
         # portfolio related metrics
         if self.is_port_metr_enabled():
-            self.accum_info = AccumulatedInfo()
+            # NOTE:
+            # `accum_info` and `current_position` are shared here
             self.portfolio_metrics = PortfolioMetrics(freq, benchmark_config)
             self.hist_positions = {}
 
diff --git a/qlib/backtest/exchange.py b/qlib/backtest/exchange.py
index cc88528fd..81037880b 100644
--- a/qlib/backtest/exchange.py
+++ b/qlib/backtest/exchange.py
@@ -231,7 +231,7 @@ class Exchange:
                 self.extra_quote["limit_buy"] = False
                 self.logger.warning("No limit_buy set for extra_quote. All stock will be able to be bought.")
             assert set(self.extra_quote.columns) == set(self.quote_df.columns) - {"$change"}
-            self.quote_df = pd.concat([self.quote_df, extra_quote], sort=False, axis=0)
+            self.quote_df = pd.concat([self.quote_df, self.extra_quote], sort=False, axis=0)
 
     LT_TP_EXP = "(exp)"  # Tuple[str, str]
     LT_FLT = "float"  # float
@@ -736,7 +736,11 @@ class Exchange:
 
         # TODO: the adjusted cost ratio can be overestimated as deal_amount will be clipped in the next steps
         trade_val = order.deal_amount * trade_price
-        adj_cost_ratio = self.impact_cost * (trade_val / total_trade_val) ** 2
+        if not total_trade_val or np.isnan(total_trade_val):
+            # TODO: assert trade_val == 0, f"trade_val != 0, total_trade_val: {total_trade_val}; order info: {order}"
+            adj_cost_ratio = self.impact_cost
+        else:
+            adj_cost_ratio = self.impact_cost * (trade_val / total_trade_val) ** 2
 
         if order.direction == Order.SELL:
             cost_ratio = self.close_cost + adj_cost_ratio
diff --git a/qlib/backtest/executor.py b/qlib/backtest/executor.py
index 44f3e8db0..a46bb2ee7 100644
--- a/qlib/backtest/executor.py
+++ b/qlib/backtest/executor.py
@@ -130,7 +130,7 @@ class BaseExecutor:
 
         if common_infra.has("trade_account"):
             # NOTE: there is a trick in the code.
-            # copy is used instead of deepcopy. So positions are shared
+            # shallow copy is used instead of deepcopy. So positions are shared
             self.trade_account: Account = copy.copy(common_infra.get("trade_account"))
             self.trade_account.reset(freq=self.time_per_step, port_metr_enabled=self.generate_portfolio_metrics)
 
diff --git a/qlib/backtest/position.py b/qlib/backtest/position.py
index 7abe85381..8ad2e957f 100644
--- a/qlib/backtest/position.py
+++ b/qlib/backtest/position.py
@@ -223,6 +223,12 @@ class BasePosition:
         """
         raise NotImplementedError(f"Please implement the `settle_commit` method")
 
+    def __str__(self):
+        return self.__dict__.__str__()
+
+    def __repr__(self):
+        return self.__dict__.__repr__()
+
 
 class Position(BasePosition):
     """Position
diff --git a/qlib/backtest/utils.py b/qlib/backtest/utils.py
index 5db7658b0..9aadccc9b 100644
--- a/qlib/backtest/utils.py
+++ b/qlib/backtest/utils.py
@@ -70,7 +70,7 @@ class TradeCalendarManager:
         - If self.trade_step >= self.self.trade_len, it means the trading is finished
         - If self.trade_step < self.self.trade_len, it means the number of trading step finished is self.trade_step
         """
-        return self.trade_step >= self.trade_len - 1
+        return self.trade_step >= self.trade_len
 
     def step(self):
         if self.finished():
@@ -222,7 +222,7 @@ class CommonInfrastructure(BaseInfrastructure):
 
 
 class LevelInfrastructure(BaseInfrastructure):
-    """level instrastructure is created by executor, and then shared to strategies on the same level"""
+    """level infrastructure is created by executor, and then shared to strategies on the same level"""
 
     def get_support_infra(self):
         """
diff --git a/qlib/config.py b/qlib/config.py
index 117b302fb..d95cede31 100644
--- a/qlib/config.py
+++ b/qlib/config.py
@@ -176,8 +176,6 @@ _default_config = {
     # if min_data_shift == 0, use default market time [9:30, 11:29, 1:00, 2:59]
     # if min_data_shift != 0, use shifted market time [9:30, 11:29, 1:00, 2:59] - shift*minute
     "min_data_shift": 0,
-    # whether to display the ops warning log, default False
-    "ops_warning_log": False,
 }
 
 MODE_CONF = {
@@ -246,8 +244,8 @@ HIGH_FREQ_CONFIG = {
 _default_region_config = {
     REG_CN: {
         "trade_unit": 100,
-        "limit_threshold": 0.099,
-        "deal_price": "vwap",
+        "limit_threshold": 0.095,
+        "deal_price": "close",
     },
     REG_US: {
         "trade_unit": 1,
@@ -272,6 +270,20 @@ class QlibConfig(Config):
             self.provider_uri = provider_uri
             self.mount_path = mount_path
 
+        @staticmethod
+        def format_provider_uri(provider_uri: Union[str, dict, Path]) -> dict:
+            if provider_uri is None:
+                raise ValueError("provider_uri cannot be None")
+            if isinstance(provider_uri, (str, dict, Path)):
+                if not isinstance(provider_uri, dict):
+                    provider_uri = {QlibConfig.DEFAULT_FREQ: provider_uri}
+            else:
+                raise TypeError(f"provider_uri does not support {type(provider_uri)}")
+            for freq, _uri in provider_uri.items():
+                if QlibConfig.DataPathManager.get_uri_type(_uri) == QlibConfig.LOCAL_URI:
+                    provider_uri[freq] = str(Path(_uri).expanduser().resolve())
+            return provider_uri
+
         @staticmethod
         def get_uri_type(uri: Union[str, Path]):
             uri = uri if isinstance(uri, str) else str(uri.expanduser().resolve())
@@ -318,11 +330,7 @@ class QlibConfig(Config):
     def resolve_path(self):
         # resolve path
         _mount_path = self["mount_path"]
-        _provider_uri = self["provider_uri"]
-        if _provider_uri is None:
-            raise ValueError("provider_uri cannot be None")
-        if not isinstance(_provider_uri, dict):
-            _provider_uri = {self.DEFAULT_FREQ: _provider_uri}
+        _provider_uri = self.DataPathManager.format_provider_uri(self["provider_uri"])
         if not isinstance(_mount_path, dict):
             _mount_path = {_freq: _mount_path for _freq in _provider_uri.keys()}
 
@@ -331,10 +339,7 @@ class QlibConfig(Config):
         assert len(_miss_freq) == 0, f"mount_path is missing freq: {_miss_freq}"
 
         # resolve
-        for _freq, _uri in _provider_uri.items():
-            # provider_uri
-            if self.DataPathManager.get_uri_type(_uri) == QlibConfig.LOCAL_URI:
-                _provider_uri[_freq] = str(Path(_uri).expanduser().resolve())
+        for _freq in _provider_uri.keys():
             # mount_path
             _mount_path[_freq] = (
                 _mount_path[_freq]
@@ -344,20 +349,6 @@ class QlibConfig(Config):
         self["provider_uri"] = _provider_uri
         self["mount_path"] = _mount_path
 
-    def get_uri_type(self):
-        path = self["provider_uri"]
-        if isinstance(path, Path):
-            path = str(path)
-        is_win = re.match("^[a-zA-Z]:.*", path) is not None  # such as 'C:\\data', 'D:'
-        is_nfs_or_win = (
-            re.match("^[^/]+:.+", path) is not None
-        )  # such as 'host:/data/'   (User may define short hostname by themselves or use localhost)
-
-        if is_nfs_or_win and not is_win:
-            return QlibConfig.NFS_URI
-        else:
-            return QlibConfig.LOCAL_URI
-
     def set(self, default_conf: str = "client", **kwargs):
         """
         configure qlib based on the input parameters
diff --git a/qlib/contrib/evaluate.py b/qlib/contrib/evaluate.py
index 31d24d8f5..5a74757de 100644
--- a/qlib/contrib/evaluate.py
+++ b/qlib/contrib/evaluate.py
@@ -3,15 +3,18 @@
 
 from __future__ import division
 from __future__ import print_function
-from logging import warn
 
 import numpy as np
 import pandas as pd
 import warnings
+from typing import Union
+
 from ..log import get_module_logger
-from ..backtest import get_exchange, backtest as backtest_func
 from ..utils import get_date_range
 from ..utils.resam import Freq
+from ..strategy.base import BaseStrategy
+from ..backtest import get_exchange, position, backtest as backtest_func, executor as _executor
+
 
 from ..data import D
 from ..config import C
@@ -117,84 +120,129 @@ def indicator_analysis(df, method="mean"):
 
 
 # This is the API for compatibility for legacy code
-def backtest(pred, account=1e9, shift=1, benchmark="SH000905", verbose=True, **kwargs):
-    """This function will help you set a reasonable Exchange and provide default value for strategy
+def backtest_daily(
+    start_time: Union[str, pd.Timestamp],
+    end_time: Union[str, pd.Timestamp],
+    strategy: Union[str, dict, BaseStrategy],
+    executor: Union[str, dict, _executor.BaseExecutor] = None,
+    account: Union[float, int, position.Position] = 1e8,
+    benchmark: str = "SH000300",
+    exchange_kwargs: dict = None,
+    pos_type: str = "Position",
+):
+    """initialize the strategy and executor, then executor the backtest of daily frequency
+
     Parameters
     ----------
+    start_time : Union[str, pd.Timestamp]
+        closed start time for backtest
+        **NOTE**: This will be applied to the outmost executor's calendar.
+    end_time : Union[str, pd.Timestamp]
+        closed end time for backtest
+        **NOTE**: This will be applied to the outmost executor's calendar.
+        E.g. Executor[day](Executor[1min]),   setting `end_time == 20XX0301` will include all the minutes on 20XX0301
+    strategy : Union[str, dict, BaseStrategy]
+        for initializing outermost portfolio strategy. Please refer to the docs of init_instance_by_config for more information.
 
-    - **backtest workflow related or commmon arguments**
+        E.g.
 
-    pred : pandas.DataFrame
-        predict should has <datetime, instrument> index and one `score` column.
-    account : float
-        init account value.
-    shift : int
-        whether to shift prediction by one day.
-    benchmark : str
-        benchmark code, default is SH000905 CSI 500.
-    verbose : bool
-        whether to print log.
+        .. code-block:: python
+            # dict
+            strategy = {
+                "class": "TopkDropoutStrategy",
+                "module_path": "qlib.contrib.strategy.signal_strategy",
+                "kwargs": {
+                    "signal": (model, dataset),
+                    "topk": 50,
+                    "n_drop": 5,
+                },
+            }
+            # BaseStrategy
+            pred_score = pd.read_pickle("score.pkl")["score"]
+            STRATEGY_CONFIG = {
+                "topk": 50,
+                "n_drop": 5,
+                "signal": pred_score,
+            }
+            strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
+            # str example.
+            # 1) specify a pickle object
+            #     - path like 'file:///<path to pickle file>/obj.pkl'
+            # 2) specify a class name
+            #     - "ClassName":  getattr(module, "ClassName")() will be used.
+            # 3) specify module path with class name
+            #     - "a.b.c.ClassName" getattr(<a.b.c.module>, "ClassName")() will be used.
 
-    - **strategy related arguments**
 
-    strategy : Strategy()
-        strategy used in backtest.
-    topk : int (Default value: 50)
-        top-N stocks to buy.
-    margin : int or float(Default value: 0.5)
-        - if isinstance(margin, int):
+    executor : Union[str, dict, BaseExecutor]
+        for initializing the outermost executor.
+    benchmark: str
+        the benchmark for reporting.
+    account : Union[float, int, Position]
+        information for describing how to creating the account
+        For `float` or `int`:
+            Using Account with only initial cash
+        For `Position`:
+            Using Account with a Position
+    exchange_kwargs : dict
+        the kwargs for initializing Exchange
+        E.g.
 
-            sell_limit = margin
+        .. code-block:: python
 
-        - else:
+            exchange_kwargs = {
+                "freq": freq,
+                "limit_threshold": None, # limit_threshold is None, using C.limit_threshold
+                "deal_price": None, # deal_price is None, using C.deal_price
+                "open_cost": 0.0005,
+                "close_cost": 0.0015,
+                "min_cost": 5,
+            }
 
-            sell_limit = pred_in_a_day.count() * margin
+    pos_type : str
+        the type of Position.
 
-        buffer margin, in single score_mode, continue holding stock if it is in nlargest(sell_limit).
-        sell_limit should be no less than topk.
-    n_drop : int
-        number of stocks to be replaced in each trading date.
-    risk_degree: float
-        0-1, 0.95 for example, use 95% money to trade.
-    str_type: 'amount', 'weight' or 'dropout'
-        strategy type: TopkAmountStrategy ,TopkWeightStrategy or TopkDropoutStrategy.
-
-    - **exchange related arguments**
-
-    exchange: Exchange()
-        pass the exchange for speeding up.
-    subscribe_fields: list
-        subscribe fields.
-    open_cost : float
-        open transaction cost. The default value is 0.002(0.2%).
-    close_cost : float
-        close transaction cost. The default value is 0.002(0.2%).
-    min_cost : float
-        min transaction cost.
-    trade_unit : int
-        100 for China A.
-    deal_price: str
-        dealing price type: 'close', 'open', 'vwap'.
-    limit_threshold : float
-        limit move 0.1 (10%) for example, long and short with same limit.
-    extract_codes: bool
-        will we pass the codes extracted from the pred to the exchange.
-
-        .. note:: This will be faster with offline qlib.
-
-    - **executor related arguments**
-
-    executor : BaseExecutor()
-        executor used in backtest.
-    verbose : bool
-        whether to print log.
+    Returns
+    -------
+    report_normal: pd.DataFrame
+        backtest report
+    positions_normal: pd.DataFrame
+        backtest positions
 
     """
-    warnings.warn("this function is deprecated, please use backtest function in qlib.backtest", DeprecationWarning)
-    report_dict = backtest_func(
-        pred=pred, account=account, shift=shift, benchmark=benchmark, verbose=verbose, return_order=False, **kwargs
+    freq = "day"
+    if executor is None:
+        executor_config = {
+            "time_per_step": freq,
+            "generate_portfolio_metrics": True,
+        }
+        executor = _executor.SimulatorExecutor(**executor_config)
+    _exchange_kwargs = {
+        "freq": freq,
+        "limit_threshold": None,
+        "deal_price": None,
+        "open_cost": 0.0005,
+        "close_cost": 0.0015,
+        "min_cost": 5,
+    }
+    if exchange_kwargs is not None:
+        _exchange_kwargs.update(exchange_kwargs)
+
+    portfolio_metric_dict, indicator_dict = backtest_func(
+        start_time=start_time,
+        end_time=end_time,
+        strategy=strategy,
+        executor=executor,
+        account=account,
+        benchmark=benchmark,
+        exchange_kwargs=_exchange_kwargs,
+        pos_type=pos_type,
     )
-    return report_dict.get("report_df"), report_dict.get("positions")
+    analysis_freq = "{0}{1}".format(*Freq.parse(freq))
+
+    report_normal, positions_normal = portfolio_metric_dict.get(analysis_freq)
+
+    return report_normal, positions_normal
 
 
 def long_short_backtest(
@@ -327,7 +375,12 @@ def t_run():
     pred["datetime"] = pd.to_datetime(pred["datetime"])
     pred = pred.set_index([pred.columns[0], pred.columns[1]])
     pred = pred.iloc[:9000]
-    report_df, positions = backtest(pred=pred)
+    strategy_config = {
+        "topk": 50,
+        "n_drop": 5,
+        "signal": pred,
+    }
+    report_df, positions = backtest_daily(start_time="2017-01-01", end_time="2020-08-01", strategy=strategy_config)
     print(report_df.head())
     print(positions.keys())
     print(positions[list(positions.keys())[0]])
diff --git a/qlib/contrib/report/analysis_position/report.py b/qlib/contrib/report/analysis_position/report.py
index 6b83f0734..daefb5295 100644
--- a/qlib/contrib/report/analysis_position/report.py
+++ b/qlib/contrib/report/analysis_position/report.py
@@ -171,20 +171,55 @@ def report_graph(report_df: pd.DataFrame, show_notebook: bool = True) -> [list,
 
             .. code-block:: python
 
-                from qlib.contrib.evaluate import backtest
+                import qlib
+                import pandas as pd
+                from qlib.utils.time import Freq
+                from qlib.utils import flatten_dict
+                from qlib.backtest import backtest, executor
+                from qlib.contrib.evaluate import risk_analysis
                 from qlib.contrib.strategy import TopkDropoutStrategy
 
-                # backtest parameters
-                bparas = {}
-                bparas['limit_threshold'] = 0.095
-                bparas['account'] = 1000000000
+                # init qlib
+                qlib.init(provider_uri=<qlib data dir>)
 
-                sparas = {}
-                sparas['topk'] = 50
-                sparas['n_drop'] = 230
-                strategy = TopkDropoutStrategy(**sparas)
+                CSI300_BENCH = "SH000300"
+                FREQ = "day"
+                STRATEGY_CONFIG = {
+                    "topk": 50,
+                    "n_drop": 5,
+                    # pred_score, pd.Series
+                    "signal": pred_score,
+                }
 
-                report_normal_df, _ = backtest(pred_df, strategy, **bparas)
+                EXECUTOR_CONFIG = {
+                    "time_per_step": "day",
+                    "generate_portfolio_metrics": True,
+                }
+
+                backtest_config = {
+                    "start_time": "2017-01-01",
+                    "end_time": "2020-08-01",
+                    "account": 100000000,
+                    "benchmark": CSI300_BENCH,
+                    "exchange_kwargs": {
+                        "freq": FREQ,
+                        "limit_threshold": 0.095,
+                        "deal_price": "close",
+                        "open_cost": 0.0005,
+                        "close_cost": 0.0015,
+                        "min_cost": 5,
+                    },
+                }
+
+                # strategy object
+                strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
+                # executor object
+                executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
+                # backtest
+                portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
+                analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))
+                # backtest info
+                report_normal_df, positions_normal = portfolio_metric_dict.get(analysis_freq)
 
                 qcr.analysis_position.report_graph(report_normal_df)
 
diff --git a/qlib/contrib/report/analysis_position/risk_analysis.py b/qlib/contrib/report/analysis_position/risk_analysis.py
index 051c78035..31de152dd 100644
--- a/qlib/contrib/report/analysis_position/risk_analysis.py
+++ b/qlib/contrib/report/analysis_position/risk_analysis.py
@@ -170,32 +170,64 @@ def risk_analysis_graph(
 
             .. code-block:: python
 
-                from qlib.contrib.evaluate import risk_analysis, backtest, long_short_backtest
+                import qlib
+                import pandas as pd
+                from qlib.utils.time import Freq
+                from qlib.utils import flatten_dict
+                from qlib.backtest import backtest, executor
+                from qlib.contrib.evaluate import risk_analysis
                 from qlib.contrib.strategy import TopkDropoutStrategy
-                from qlib.contrib.report import analysis_position
 
-                # backtest parameters
-                bparas = {}
-                bparas['limit_threshold'] = 0.095
-                bparas['account'] = 1000000000
+                # init qlib
+                qlib.init(provider_uri=<qlib data dir>)
 
-                sparas = {}
-                sparas['topk'] = 50
-                sparas['n_drop'] = 230
-                strategy = TopkDropoutStrategy(**sparas)
+                CSI300_BENCH = "SH000300"
+                FREQ = "day"
+                STRATEGY_CONFIG = {
+                    "topk": 50,
+                    "n_drop": 5,
+                    # pred_score, pd.Series
+                    "signal": pred_score,
+                }
 
-                report_normal_df, positions = backtest(pred_df, strategy, **bparas)
-                # long_short_map = long_short_backtest(pred_df)
-                # report_long_short_df = pd.DataFrame(long_short_map)
+                EXECUTOR_CONFIG = {
+                    "time_per_step": "day",
+                    "generate_portfolio_metrics": True,
+                }
 
+                backtest_config = {
+                    "start_time": "2017-01-01",
+                    "end_time": "2020-08-01",
+                    "account": 100000000,
+                    "benchmark": CSI300_BENCH,
+                    "exchange_kwargs": {
+                        "freq": FREQ,
+                        "limit_threshold": 0.095,
+                        "deal_price": "close",
+                        "open_cost": 0.0005,
+                        "close_cost": 0.0015,
+                        "min_cost": 5,
+                    },
+                }
+
+                # strategy object
+                strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
+                # executor object
+                executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
+                # backtest
+                portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
+                analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))
+                # backtest info
+                report_normal_df, positions_normal = portfolio_metric_dict.get(analysis_freq)
                 analysis = dict()
-                # analysis['pred_long'] = risk_analysis(report_long_short_df['long'])
-                # analysis['pred_short'] = risk_analysis(report_long_short_df['short'])
-                # analysis['pred_long_short'] = risk_analysis(report_long_short_df['long_short'])
-                analysis['excess_return_without_cost'] = risk_analysis(report_normal_df['return'] - report_normal_df['bench'])
-                analysis['excess_return_with_cost'] = risk_analysis(report_normal_df['return'] - report_normal_df['bench'] - report_normal_df['cost'])
-                analysis_df = pd.concat(analysis)
+                analysis["excess_return_without_cost"] = risk_analysis(
+                    report_normal_df["return"] - report_normal_df["bench"], freq=analysis_freq
+                )
+                analysis["excess_return_with_cost"] = risk_analysis(
+                    report_normal_df["return"] - report_normal_df["bench"] - report_normal_df["cost"], freq=analysis_freq
+                )
 
+                analysis_df = pd.concat(analysis)  # type: pd.DataFrame
                 analysis_position.risk_analysis_graph(analysis_df, report_normal_df)
 
 
diff --git a/qlib/data/base.py b/qlib/data/base.py
index 99c553357..f768f7067 100644
--- a/qlib/data/base.py
+++ b/qlib/data/base.py
@@ -155,7 +155,7 @@ class Expression(abc.ABC):
         try:
             series = self._load_internal(instrument, start_index, end_index, freq)
         except Exception as e:
-            get_module_logger("data").error(
+            get_module_logger("data").debug(
                 f"Loading data error: instrument={instrument}, expression={str(self)}, "
                 f"start_index={start_index}, end_index={end_index}, freq={freq}. "
                 f"error info: {str(e)}"
diff --git a/qlib/data/data.py b/qlib/data/data.py
index 1414e0d98..b2618435d 100644
--- a/qlib/data/data.py
+++ b/qlib/data/data.py
@@ -58,34 +58,13 @@ class ProviderBackendMixin:
         backend = copy.deepcopy(backend)
 
         # set default storage kwargs
+        # NOTE: provider_uri priority：
+        #   1. backend_config: backend_obj["kwargs"]["provider_uri"]
+        #   2. qlib.init: provider_uri
         backend_kwargs = backend.setdefault("kwargs", {})
-        # default provider_uri map
-        if "provider_uri" not in backend_kwargs:
-            # if the user has no uri configured, use: uri = uri_map[freq]
-            # NOTE: provider_uri priority：
-            #   1. backend_config: backend_obj["kwargs"]["provider_uri"]
-            #   2. backend_config: backend_obj["kwargs"]["provider_uri_map"]
-            #   3. qlib.init: provider_uri
-            provider_uri_map = backend_kwargs.setdefault("provider_uri_map", {})
-            freq = kwargs.get("freq", "day")
-            if freq not in provider_uri_map:
-                # NOTE: uri
-                #   1. If `freq` in C.dpm.provider_uri.keys(), uri = C.dpm.provider_uri[freq]
-                #   2. If `freq` not in C.dpm.provider_uri.keys()
-                #       - Get the `min_freq` closest to `freq` from C.dpm.provider_uri.keys(), uri = C.dpm.provider_uri[min_freq]
-                # NOTE: In Storage, only CalendarStorage is supported
-                #   1. If `uri` does not exist
-                #       - Get the `min_uri` of the closest `freq` under the same "directory" as the `uri`
-                #       - Read data from `min_uri` and resample to `freq`
-                try:
-                    _uri = C.dpm.get_data_uri(freq)
-                except KeyError:
-                    # provider_uri is dict and freq not in list(provider_uri.keys())
-                    # use the nearest freq greater than 0
-                    min_freq = Freq.get_recent_freq(freq, C.dpm.provider_uri.keys())
-                    _uri = C.dpm.get_data_uri(freq) if min_freq is None else C.dpm.get_data_uri(min_freq)
-                provider_uri_map[freq] = _uri
-            backend_kwargs["provider_uri"] = provider_uri_map[freq]
+        provider_uri = backend_kwargs.get("provider_uri", None)
+        provider_uri = C.dpm.provider_uri if provider_uri is None else C.dpm.format_provider_uri(provider_uri)
+        backend_kwargs["provider_uri"] = provider_uri
         backend.setdefault("kwargs", {}).update(**kwargs)
         return init_instance_by_config(backend)
 
@@ -730,7 +709,7 @@ class LocalExpressionProvider(ExpressionProvider):
         try:
             series = expression.load(instrument, max(0, start_index - lft_etd), end_index + rght_etd, freq)
         except Exception as e:
-            get_module_logger("data").error(
+            get_module_logger("data").debug(
                 f"Loading expression error: "
                 f"instrument={instrument}, field=({field}), start_time={start_time}, end_time={end_time}, freq={freq}. "
                 f"error info: {str(e)}"
diff --git a/qlib/data/ops.py b/qlib/data/ops.py
index f6b7a1fbe..7eb3a005f 100644
--- a/qlib/data/ops.py
+++ b/qlib/data/ops.py
@@ -324,11 +324,11 @@ class NpPairOperator(PairOperator):
         try:
             res = getattr(np, self.func)(series_left, series_right)
         except ValueError as e:
-            get_module_logger("ops").error(warning_info)
+            get_module_logger("ops").debug(warning_info)
             raise ValueError(f"{str(e)}. \n\t{warning_info}")
         else:
-            if check_length and len(series_left) != len(series_right) and C.ops_warning_log:
-                get_module_logger("ops").warning(warning_info)
+            if check_length and len(series_left) != len(series_right):
+                get_module_logger("ops").debug(warning_info)
         return res
 
 
diff --git a/qlib/data/storage/file_storage.py b/qlib/data/storage/file_storage.py
index 5114ced3c..c1ab01b43 100644
--- a/qlib/data/storage/file_storage.py
+++ b/qlib/data/storage/file_storage.py
@@ -10,23 +10,44 @@ import pandas as pd
 
 from qlib.utils.time import Freq
 from qlib.utils.resam import resam_calendar
+from qlib.config import C
 from qlib.log import get_module_logger
 from qlib.data.storage import CalendarStorage, InstrumentStorage, FeatureStorage, CalVT, InstKT, InstVT
-from qlib.data.cache import H
 
 logger = get_module_logger("file_storage")
 
 
 class FileStorageMixin:
+    """FileStorageMixin, applicable to FileXXXStorage
+    Subclasses need to have provider_uri, freq, storage_name, file_name attributes
+
+    """
+
+    @property
+    def dpm(self):
+        return C.DataPathManager(self.provider_uri, None)
+
+    @property
+    def support_freq(self) -> List[str]:
+        _v = "_support_freq"
+        if hasattr(self, _v):
+            return getattr(self, _v)
+        if len(self.provider_uri) == 1 and C.DEFAULT_FREQ in self.provider_uri:
+            freq = filter(
+                lambda _freq: not _freq.endswith("_future"),
+                map(lambda x: x.stem, self.dpm.get_data_uri(C.DEFAULT_FREQ).joinpath("calendars").glob("*.txt")),
+            )
+        else:
+            freq = self.provider_uri.keys()
+        freq = list(freq)
+        setattr(self, _v, freq)
+        return freq
+
     @property
     def uri(self) -> Path:
-        _provider_uri = self.kwargs.get("provider_uri", None)
-        if _provider_uri is None:
-            raise ValueError(
-                f"The `provider_uri` parameter is not found in {self.__class__.__name__}, "
-                f'please specify `provider_uri` in the "provider\'s backend"'
-            )
-        return Path(_provider_uri).expanduser().joinpath(f"{self.storage_name}s", self.file_name)
+        if self.freq not in self.support_freq:
+            raise ValueError(f"{self.storage_name}: {self.provider_uri} does not contain data for {self.freq}")
+        return self.dpm.get_data_uri(self.freq).joinpath(f"{self.storage_name}s", self.file_name)
 
     def check(self):
         """check self.uri
@@ -40,10 +61,19 @@ class FileStorageMixin:
 
 
 class FileCalendarStorage(FileStorageMixin, CalendarStorage):
-    def __init__(self, freq: str, future: bool, **kwargs):
+    def __init__(self, freq: str, future: bool, provider_uri: dict, **kwargs):
         super(FileCalendarStorage, self).__init__(freq, future, **kwargs)
         self.future = future
-        self.file_name = f"{freq}_future.txt" if future else f"{freq}.txt".lower()
+        self.provider_uri = C.DataPathManager.format_provider_uri(provider_uri)
+        self.resample_freq = None
+
+    @property
+    def file_name(self) -> str:
+        return f"{self.use_freq}_future.txt" if self.future else f"{self.use_freq}.txt".lower()
+
+    @property
+    def use_freq(self) -> str:
+        return self.freq if self.resample_freq is None else self.resample_freq
 
     def _read_calendar(self, skip_rows: int = 0, n_rows: int = None) -> List[CalVT]:
         if not self.uri.exists():
@@ -59,28 +89,26 @@ class FileCalendarStorage(FileStorageMixin, CalendarStorage):
             np.savetxt(fp, values, fmt="%s", encoding="utf-8")
 
     @property
-    def data(self) -> List[CalVT]:
-        # NOTE: uri
-        #   1. If `uri` does not exist
-        #       - Get the `min_uri` of the closest `freq` under the same "directory" as the `uri`
-        #       - Read data from `min_uri` and resample to `freq`
-        try:
-            self.check()
-            _calendar = self._read_calendar()
-        except ValueError:
-            freq_list = self._get_storage_freq()
-            _freq = Freq.get_recent_freq(self.freq, freq_list)
-            if _freq is None:
-                raise ValueError(f"can't find a freq from {freq_list} that can resample to {self.freq}!")
-            self.file_name = f"{_freq}_future.txt" if self.future else f"{_freq}.txt".lower()
-            # The cache is useful for the following cases
-            # - multiple frequencies are sampled from the same calendar
-            cache_key = self.uri
-            if cache_key not in H["c"]:
-                H["c"][cache_key] = self._read_calendar()
-            _calendar = H["c"][cache_key]
-            _calendar = resam_calendar(np.array(list(map(pd.Timestamp, _calendar))), _freq, self.freq)
+    def uri(self) -> Path:
+        freq = self.freq
+        if freq not in self.support_freq:
+            # NOTE: uri
+            #   1. If `uri` does not exist
+            #       - Get the `min_uri` of the closest `freq` under the same "directory" as the `uri`
+            #       - Read data from `min_uri` and resample to `freq`
 
+            freq = Freq.get_recent_freq(freq, self.support_freq)
+            if freq is None:
+                raise ValueError(f"can't find a freq from {self.support_freq} that can resample to {self.freq}!")
+            self.resample_freq = freq
+        return self.dpm.get_data_uri(self.use_freq).joinpath(f"{self.storage_name}s", self.file_name)
+
+    @property
+    def data(self) -> List[CalVT]:
+        self.check()
+        _calendar = self._read_calendar()
+        if self.resample_freq is not None:
+            _calendar = resam_calendar(np.array(list(map(pd.Timestamp, _calendar))), self.resample_freq, self.freq)
         return _calendar
 
     def _get_storage_freq(self) -> List[str]:
@@ -135,8 +163,9 @@ class FileInstrumentStorage(FileStorageMixin, InstrumentStorage):
     INSTRUMENT_END_FIELD = "end_datetime"
     SYMBOL_FIELD_NAME = "instrument"
 
-    def __init__(self, market: str, **kwargs):
-        super(FileInstrumentStorage, self).__init__(market, **kwargs)
+    def __init__(self, market: str, freq: str, provider_uri: dict, **kwargs):
+        super(FileInstrumentStorage, self).__init__(market, freq, **kwargs)
+        self.provider_uri = C.DataPathManager.format_provider_uri(provider_uri)
         self.file_name = f"{market.lower()}.txt"
 
     def _read_instrument(self) -> Dict[InstKT, InstVT]:
@@ -223,8 +252,9 @@ class FileInstrumentStorage(FileStorageMixin, InstrumentStorage):
 
 
 class FileFeatureStorage(FileStorageMixin, FeatureStorage):
-    def __init__(self, instrument: str, field: str, freq: str, **kwargs):
+    def __init__(self, instrument: str, field: str, freq: str, provider_uri: dict, **kwargs):
         super(FileFeatureStorage, self).__init__(instrument, field, freq, **kwargs)
+        self.provider_uri = C.DataPathManager.format_provider_uri(provider_uri)
         self.file_name = f"{instrument.lower()}/{field.lower()}.{freq.lower()}.bin"
 
     def clear(self):
diff --git a/qlib/data/storage/storage.py b/qlib/data/storage/storage.py
index 8426ebe66..fd388d4ce 100644
--- a/qlib/data/storage/storage.py
+++ b/qlib/data/storage/storage.py
@@ -195,8 +195,9 @@ class CalendarStorage(BaseStorage):
 
 
 class InstrumentStorage(BaseStorage):
-    def __init__(self, market: str, **kwargs):
+    def __init__(self, market: str, freq: str, **kwargs):
         self.market = market
+        self.freq = freq
         self.kwargs = kwargs
 
     @property
diff --git a/tests/storage_tests/test_storage.py b/tests/storage_tests/test_storage.py
index 95502875b..50b16a041 100644
--- a/tests/storage_tests/test_storage.py
+++ b/tests/storage_tests/test_storage.py
@@ -75,7 +75,7 @@ class TestStorage(TestAutoData):
 
         """
 
-        instrument = InstrumentStorage(market="csi300", provider_uri=self.provider_uri)
+        instrument = InstrumentStorage(market="csi300", provider_uri=self.provider_uri, freq="day")
 
         for inst, spans in instrument.data.items():
             assert isinstance(inst, str) and isinstance(
@@ -88,7 +88,7 @@ class TestStorage(TestAutoData):
 
         print(f"instrument['SH600000']: {instrument['SH600000']}")
 
-        instrument = InstrumentStorage(market="csi300", provider_uri="not_found")
+        instrument = InstrumentStorage(market="csi300", provider_uri="not_found", freq="day")
         with self.assertRaises(ValueError):
             print(instrument.data)
 
@@ -163,8 +163,9 @@ class TestStorage(TestAutoData):
 
         feature = FeatureStorage(instrument="SH600004", field="close", freq="day", provider_uri="not_fount")
 
-        assert feature[0] == (None, None), "FeatureStorage does not exist, feature[i] should return `(None, None)`"
-        assert feature[:].empty, "FeatureStorage does not exist, feature[:] should return `pd.Series(dtype=np.float32)`"
-        assert (
-            feature.data.empty
-        ), "FeatureStorage does not exist, feature.data should return `pd.Series(dtype=np.float32)`"
+        with self.assertRaises(ValueError):
+            print(feature[0])
+        with self.assertRaises(ValueError):
+            print(feature[:].empty)
+        with self.assertRaises(ValueError):
+            print(feature.data.empty)
diff --git a/tests/test_all_pipeline.py b/tests/test_all_pipeline.py
index de15d8722..179ab1e4a 100644
--- a/tests/test_all_pipeline.py
+++ b/tests/test_all_pipeline.py
@@ -201,6 +201,7 @@ class TestAllFlow(TestAutoData):
             0.10,
             "backtest failed",
         )
+        self.assertTrue(not analyze_df.isna().any().any(), "backtest failed")
 
     def test_3_expmanager(self):
         pass_default, pass_current, uri_path = fake_experiment()