mirror of
https://github.com/microsoft/qlib.git
synced 2026-06-06 05:51:17 +08:00
Fix the Errors with unexpected indentation when building Qlib's documentation (#1352)
* Fix ERROR: Unexpected indentation in qlib/data/dataset/handler.py * Fix ERROR: Unexpected indentation in qlib/data/dataset/__init__.py * Fix ERROR: Unexpected indentation in ../qlib/data/cache.py * Fix ERROR: Unexpected indentation in qlib/model/meta/task.py * Fix ERROR: Unexpected indentation in qlib/model/meta/dataset.py * Fix ERROR: Unexpected indentation in qlib/workflow/online/manager.py * Fix ERROR: Unexpected indentation in qlib/workflow/online/update.py * Fix ERROR: Unexpected indentation in /qlib/workflow/__init__.py * Fix ERROR: Unexpected indentation in qlib/data/base.py * Fix ERROR: Unexpected indentation in qlib/data/dataset/loader.py * Fix ERROR: Unexpected indentation in qlib/contrib/evaluate.py * Fix ERROR: Unexpected indentation in qlib/workflow/record_temp.py * Fix ERROR: Unexpected indentation in qlib/workflow/task/gen.py * Fix ERROR: Unexpected indentation in qlib/strategy/base.py * Fix qlib/data/dataset/handler.py * Retest
This commit is contained in:
@@ -187,9 +187,13 @@ def backtest_daily(
|
||||
the benchmark for reporting.
|
||||
account : Union[float, int, Position]
|
||||
information for describing how to creating the account
|
||||
|
||||
For `float` or `int`:
|
||||
|
||||
Using Account with only initial cash
|
||||
|
||||
For `Position`:
|
||||
|
||||
Using Account with a Position
|
||||
exchange_kwargs : dict
|
||||
the kwargs for initializing Exchange
|
||||
@@ -283,8 +287,8 @@ def long_short_backtest(
|
||||
NOTE: This will be faster with offline qlib.
|
||||
:return: The result of backtest, it is represented by a dict.
|
||||
{ "long": long_returns(excess),
|
||||
"short": short_returns(excess),
|
||||
"long_short": long_short_returns}
|
||||
"short": short_returns(excess),
|
||||
"long_short": long_short_returns}
|
||||
"""
|
||||
if get_level_index(pred, level="datetime") == 1:
|
||||
pred = pred.swaplevel().sort_index()
|
||||
|
||||
@@ -16,8 +16,10 @@ class Expression(abc.ABC):
|
||||
|
||||
Expression is designed to handle the calculation of data with the format below
|
||||
data with two dimension for each instrument,
|
||||
|
||||
- feature
|
||||
- time: it could be observation time or period time.
|
||||
|
||||
- period time is designed for Point-in-time database. For example, the period time maybe 2014Q4, its value can observed for multiple times(different value may be observed at different time due to amendment).
|
||||
"""
|
||||
|
||||
@@ -142,9 +144,12 @@ class Expression(abc.ABC):
|
||||
This function is responsible for loading feature/expression based on the expression engine.
|
||||
|
||||
The concrete implementation will be separated into two parts:
|
||||
|
||||
1) caching data, handle errors.
|
||||
|
||||
- This part is shared by all the expressions and implemented in Expression
|
||||
2) processing and calculating data based on the specific expression.
|
||||
|
||||
- This part is different in each expression and implemented in each expression
|
||||
|
||||
Expression Engine is shared by different data.
|
||||
|
||||
@@ -394,7 +394,7 @@ class DatasetCache(BaseProviderCache):
|
||||
|
||||
.. note:: The server use redis_lock to make sure
|
||||
read-write conflicts will not be triggered
|
||||
but client readers are not considered.
|
||||
but client readers are not considered.
|
||||
"""
|
||||
if disk_cache == 0:
|
||||
# skip cache
|
||||
|
||||
@@ -205,8 +205,9 @@ class DatasetH(Dataset):
|
||||
col_set : str
|
||||
The col_set will be passed to self.handler when fetching data.
|
||||
TODO: make it automatic:
|
||||
- select DK_I for test data
|
||||
- select DK_L for training data.
|
||||
|
||||
- select DK_I for test data
|
||||
- select DK_L for training data.
|
||||
data_key : str
|
||||
The data to fetch: DK_*
|
||||
Default is DK_I, which indicate fetching data for **inference**.
|
||||
|
||||
@@ -160,13 +160,17 @@ class DataHandler(Serializable):
|
||||
selector : Union[pd.Timestamp, slice, str]
|
||||
describe how to select data by index
|
||||
It can be categories as following
|
||||
|
||||
- fetch single index
|
||||
- fetch a range of index
|
||||
|
||||
- a slice range
|
||||
- pd.Index for specific indexes
|
||||
|
||||
Following conflictions may occurs
|
||||
- Does [20200101", "20210101"] mean selecting this slice or these two days?
|
||||
|
||||
- Does ["20200101", "20210101"] mean selecting this slice or these two days?
|
||||
|
||||
- slice have higher priorities
|
||||
|
||||
level : Union[str, int]
|
||||
@@ -178,7 +182,8 @@ class DataHandler(Serializable):
|
||||
|
||||
select a set of meaningful, pd.Index columns.(e.g. features, columns)
|
||||
|
||||
if col_set == CS_RAW:
|
||||
- if col_set == CS_RAW:
|
||||
|
||||
the raw dataset will be returned.
|
||||
|
||||
- if isinstance(col_set, List[str]):
|
||||
@@ -186,8 +191,10 @@ class DataHandler(Serializable):
|
||||
select several sets of meaningful columns, the returned data has multiple levels
|
||||
|
||||
proc_func: Callable
|
||||
|
||||
- Give a hook for processing data before fetching
|
||||
- An example to explain the necessity of the hook:
|
||||
|
||||
- A Dataset learned some processors to process data which is related to data segmentation
|
||||
- It will apply them every time when preparing data.
|
||||
- The learned processor require the dataframe remains the same format when fitting and applying
|
||||
@@ -326,18 +333,23 @@ class DataHandlerLP(DataHandler):
|
||||
DataHandler with **(L)earnable (P)rocessor**
|
||||
|
||||
This handler will produce three pieces of data in pd.DataFrame format.
|
||||
|
||||
- DK_R / self._data: the raw data loaded from the loader
|
||||
- DK_I / self._infer: the data processed for inference
|
||||
- DK_L / self._learn: the data processed for learning model.
|
||||
|
||||
The motivation of using different processor workflows for learning and inference
|
||||
Here are some examples.
|
||||
|
||||
- The instrument universe for learning and inference may be different.
|
||||
- The processing of some samples may rely on label (for example, some samples hit the limit may need extra processing or be dropped).
|
||||
These processors only apply to the learning phase.
|
||||
|
||||
- These processors only apply to the learning phase.
|
||||
|
||||
Tips to improve the performance of data handler
|
||||
|
||||
- To reduce the memory cost
|
||||
|
||||
- `drop_raw=True`: this will modify the data inplace on raw data;
|
||||
"""
|
||||
|
||||
@@ -482,12 +494,18 @@ class DataHandlerLP(DataHandler):
|
||||
Notation: (data) [processor]
|
||||
|
||||
# data processing flow of self.process_type == DataHandlerLP.PTYPE_I
|
||||
(self._data)-[shared_processors]-(_shared_df)-[learn_processors]-(_learn_df)
|
||||
\
|
||||
-[infer_processors]-(_infer_df)
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
(self._data)-[shared_processors]-(_shared_df)-[learn_processors]-(_learn_df)
|
||||
\\
|
||||
-[infer_processors]-(_infer_df)
|
||||
|
||||
# data processing flow of self.process_type == DataHandlerLP.PTYPE_A
|
||||
(self._data)-[shared_processors]-(_shared_df)-[infer_processors]-(_infer_df)-[learn_processors]-(_learn_df)
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
(self._data)-[shared_processors]-(_shared_df)-[infer_processors]-(_infer_df)-[learn_processors]-(_learn_df)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
@@ -278,7 +278,9 @@ class DataLoaderDH(DataLoader):
|
||||
- If you just want to load data from single datahandler, you can write them in single data handler
|
||||
|
||||
TODO: What make this module not that easy to use.
|
||||
|
||||
- For online scenario
|
||||
|
||||
- The underlayer data handler should be configured. But data loader doesn't provide such interface & hook.
|
||||
"""
|
||||
|
||||
|
||||
@@ -12,11 +12,15 @@ class MetaTaskDataset(Serializable, metaclass=abc.ABCMeta):
|
||||
A dataset fetching the data in a meta-level.
|
||||
|
||||
A Meta Dataset is responsible for
|
||||
|
||||
- input tasks(e.g. Qlib tasks) and prepare meta tasks
|
||||
|
||||
- meta task contains more information than normal tasks (e.g. input data for meta model)
|
||||
|
||||
The learnt pattern could transfer to other meta dataset. The following cases should be supported
|
||||
|
||||
- A meta-model trained on meta-dataset A and then applied to meta-dataset B
|
||||
|
||||
- Some pattern are shared between meta-dataset A and B, so meta-input on meta-dataset A are used when meta model are applied on meta-dataset-B
|
||||
"""
|
||||
|
||||
|
||||
@@ -11,9 +11,11 @@ class MetaTask:
|
||||
It serves as a component as in MetaDatasetDS
|
||||
|
||||
The data processing is different
|
||||
|
||||
- the processed input may be different between training and testing
|
||||
|
||||
- When training, the X, y, X_test, y_test in training tasks are necessary (# PROC_MODE_FULL #)
|
||||
but not necessary in test tasks. (# PROC_MODE_TEST #)
|
||||
but not necessary in test tasks. (# PROC_MODE_TEST #)
|
||||
- When the meta model can be transferred into other dataset, only meta_info is necessary (# PROC_MODE_TRANSFER #)
|
||||
"""
|
||||
|
||||
@@ -24,6 +26,7 @@ class MetaTask:
|
||||
def __init__(self, task: dict, meta_info: object, mode: str = PROC_MODE_FULL):
|
||||
"""
|
||||
The `__init__` func is responsible for
|
||||
|
||||
- store the task
|
||||
- store the origin input data for
|
||||
- process the input data for meta data
|
||||
|
||||
@@ -36,6 +36,7 @@ class BaseStrategy:
|
||||
outer_trade_decision : BaseTradeDecision, optional
|
||||
the trade decision of outer strategy which this strategy relies, and it will be traded in
|
||||
[start_time, end_time], by default None
|
||||
|
||||
- If the strategy is used to split trade decision, it will be used
|
||||
- If the strategy is used for portfolio management, it can be ignored
|
||||
level_infra : LevelInfrastructure, optional
|
||||
@@ -45,11 +46,13 @@ class BaseStrategy:
|
||||
|
||||
trade_exchange : Exchange
|
||||
exchange that provides market info, used to deal order and generate report
|
||||
|
||||
- If `trade_exchange` is None, self.trade_exchange will be set with common_infra
|
||||
- It allows different trade_exchanges is used in different executions.
|
||||
- For example:
|
||||
|
||||
- In daily execution, both daily exchange and minutely are usable, but the daily exchange is
|
||||
recommended because it run faster.
|
||||
recommended because it run faster.
|
||||
- In minutely execution, the daily exchange is not usable, only the minutely exchange is recommended.
|
||||
"""
|
||||
|
||||
@@ -137,6 +140,7 @@ class BaseStrategy:
|
||||
----------
|
||||
execute_result : List[object], optional
|
||||
the executed result for trade decision, by default None
|
||||
|
||||
- When call the generate_trade_decision firstly, `execute_result` could be None
|
||||
"""
|
||||
raise NotImplementedError("generate_trade_decision is not implemented!")
|
||||
|
||||
@@ -350,6 +350,7 @@ class QlibRecorder:
|
||||
Method to reset the current uri of current experiment manager.
|
||||
|
||||
NOTE:
|
||||
|
||||
- When the uri is refer to a file path, please using the absolute path instead of strings like "~/mlruns/"
|
||||
The backend don't support strings like this.
|
||||
"""
|
||||
|
||||
@@ -78,7 +78,9 @@ For simplicity
|
||||
|
||||
|
||||
# Can we simplify current workflow?
|
||||
|
||||
- Can reduce the number of state of tasks?
|
||||
|
||||
- For each task, we have three phases (i.e. task, partly trained task, final trained task)
|
||||
"""
|
||||
|
||||
|
||||
@@ -82,19 +82,23 @@ class RecordUpdater(metaclass=ABCMeta):
|
||||
class DSBasedUpdater(RecordUpdater, metaclass=ABCMeta):
|
||||
"""
|
||||
Dataset-Based Updater
|
||||
|
||||
- Providing updating feature for Updating data based on Qlib Dataset
|
||||
|
||||
Assumption
|
||||
- Based on Qlib dataset
|
||||
- The data to be updated is a multi-level index pd.DataFrame. For example label , prediction.
|
||||
|
||||
LABEL0
|
||||
datetime instrument
|
||||
2021-05-10 SH600000 0.006965
|
||||
SH600004 0.003407
|
||||
... ...
|
||||
2021-05-28 SZ300498 0.015748
|
||||
SZ300676 -0.001321
|
||||
- Based on Qlib dataset
|
||||
- The data to be updated is a multi-level index pd.DataFrame. For example label, prediction.
|
||||
|
||||
.. code-block::
|
||||
|
||||
LABEL0
|
||||
datetime instrument
|
||||
2021-05-10 SH600000 0.006965
|
||||
SH600004 0.003407
|
||||
... ...
|
||||
2021-05-28 SZ300498 0.015748
|
||||
SZ300676 -0.001321
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -111,6 +115,7 @@ class DSBasedUpdater(RecordUpdater, metaclass=ABCMeta):
|
||||
Init PredUpdater.
|
||||
|
||||
Expected behavior in following cases:
|
||||
|
||||
- if `to_date` is greater than the max date in the calendar, the data will be updated to the latest date
|
||||
- if there are data before `from_date` or after `to_date`, only the data between `from_date` and `to_date` are affected.
|
||||
|
||||
@@ -118,11 +123,15 @@ class DSBasedUpdater(RecordUpdater, metaclass=ABCMeta):
|
||||
record : Recorder
|
||||
to_date :
|
||||
update to prediction to the `to_date`
|
||||
|
||||
if to_date is None:
|
||||
|
||||
data will updated to the latest date.
|
||||
from_date :
|
||||
the update will start from `from_date`
|
||||
|
||||
if from_date is None:
|
||||
|
||||
the updating will occur on the next tick after the latest data in historical data
|
||||
hist_ref : int
|
||||
Sometimes, the dataset will have historical depends.
|
||||
|
||||
@@ -349,7 +349,9 @@ class PortAnaRecord(ACRecordTemp):
|
||||
This is the Portfolio Analysis Record class that generates the analysis results such as those of backtest. This class inherits the ``RecordTemp`` class.
|
||||
|
||||
The following files will be stored in recorder
|
||||
|
||||
- report_normal.pkl & positions_normal.pkl:
|
||||
|
||||
- The return report and detailed positions of the backtest, returned by `qlib/contrib/evaluate.py:backtest`
|
||||
- port_analysis.pkl : The risk analysis of your portfolio, returned by `qlib/contrib/evaluate.py:risk_analysis`
|
||||
"""
|
||||
|
||||
@@ -94,7 +94,9 @@ def handler_mod(task: dict, rolling_gen):
|
||||
"""
|
||||
Help to modify the handler end time when using RollingGen
|
||||
It try to handle the following case
|
||||
|
||||
- Hander's data end_time is earlier than dataset's test_data's segments.
|
||||
|
||||
- To handle this, handler's data's end_time is extended.
|
||||
|
||||
If the handler's end_time is None, then it is not necessary to change it's end time.
|
||||
|
||||
Reference in New Issue
Block a user