diff --git a/qlib/data/dataset/handler.py b/qlib/data/dataset/handler.py
index 2889c4465..25d02fdf6 100644
--- a/qlib/data/dataset/handler.py
+++ b/qlib/data/dataset/handler.py
@@ -50,6 +50,9 @@ class DataHandler(Serializable):
                 SH600004    13.313329  11800983.0       13.313329        13.317701    0.183632  0.0042
                 SH600005    37.796539  12231662.0       38.258602        37.919757    0.970325  0.0289
 
+
+    Tips for improving the performance of datahandler
+    - Fetching data with `col_set=CS_RAW` will return the raw data and may avoid pandas from copying the data when calling `loc`
     """
 
     def __init__(
@@ -257,6 +260,10 @@ class DataHandler(Serializable):
 class DataHandlerLP(DataHandler):
     """
     DataHandler with **(L)earnable (P)rocessor**
+
+    Tips to improving the performance of data handler
+    - To reduce the memory cost
+        - `drop_raw=True`: this will modify the data inplace on raw data;
     """
 
     # data key
diff --git a/qlib/workflow/task/collect.py b/qlib/workflow/task/collect.py
index 7cdca30fa..6c4e45c72 100644
--- a/qlib/workflow/task/collect.py
+++ b/qlib/workflow/task/collect.py
@@ -46,7 +46,12 @@ class RollingEnsemble:
             pred_l = []
             for rec in rec_l:
                 pred_l.append(rec.load_object("pred.pkl").iloc[:, 0])
-            pred = pd.concat(pred_l).sort_index()
+            # Make sure the pred are sorted according to the rolling start time
+            pred_l.sort(key=lambda pred: pred.index.get_level_values("datetime").min())
+            pred = pd.concat(pred_l)
+            # If there are duplicated predition, we use the latest perdiction
+            pred = pred[~pred.index.duplicated(keep="last")]
+            pred = pred.sort_index()
             reduce_group[k] = pred
 
         return reduce_group