mirror of
https://github.com/microsoft/qlib.git
synced 2026-07-02 18:40:58 +08:00
update docs and fix duplicated pred bug
This commit is contained in:
@@ -50,6 +50,9 @@ class DataHandler(Serializable):
|
||||
SH600004 13.313329 11800983.0 13.313329 13.317701 0.183632 0.0042
|
||||
SH600005 37.796539 12231662.0 38.258602 37.919757 0.970325 0.0289
|
||||
|
||||
|
||||
Tips for improving the performance of datahandler
|
||||
- Fetching data with `col_set=CS_RAW` will return the raw data and may avoid pandas from copying the data when calling `loc`
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -257,6 +260,10 @@ class DataHandler(Serializable):
|
||||
class DataHandlerLP(DataHandler):
|
||||
"""
|
||||
DataHandler with **(L)earnable (P)rocessor**
|
||||
|
||||
Tips to improving the performance of data handler
|
||||
- To reduce the memory cost
|
||||
- `drop_raw=True`: this will modify the data inplace on raw data;
|
||||
"""
|
||||
|
||||
# data key
|
||||
|
||||
@@ -46,7 +46,12 @@ class RollingEnsemble:
|
||||
pred_l = []
|
||||
for rec in rec_l:
|
||||
pred_l.append(rec.load_object("pred.pkl").iloc[:, 0])
|
||||
pred = pd.concat(pred_l).sort_index()
|
||||
# Make sure the pred are sorted according to the rolling start time
|
||||
pred_l.sort(key=lambda pred: pred.index.get_level_values("datetime").min())
|
||||
pred = pd.concat(pred_l)
|
||||
# If there are duplicated predition, we use the latest perdiction
|
||||
pred = pred[~pred.index.duplicated(keep="last")]
|
||||
pred = pred.sort_index()
|
||||
reduce_group[k] = pred
|
||||
|
||||
return reduce_group
|
||||
|
||||
Reference in New Issue
Block a user