From e7a1b5ea1ff4412792eeb606a639e0dde422053a Mon Sep 17 00:00:00 2001
From: Linlang <30293408+SunsetWolf@users.noreply.github.com>
Date: Wed, 2 Apr 2025 18:50:52 +0800
Subject: [PATCH] fix col name error when fetch data (#1904)

* fix col name error when fetch data

* fix col name error when fetch data

* fix install qlib error

* optimize code

* optimize code

* optimize code
---
 pyproject.toml              | 4 ++++
 qlib/data/dataset/loader.py | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 275d632be..b7dc17187 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,9 @@ description = "A Quantitative-research Platform"
 requires-python = ">=3.8.0"
 readme = {file = "README.md", content-type = "text/markdown"}
 
+# On 2025-04-02 osqp released version 1.0.2, osqp is used as a dependency for cvxpy. 
+# It would lead to errors installing qlib, so we limited the version of osqp.
+# refs: https://github.com/osqp/osqp/issues/728
 dependencies = [
   "pyyaml",
   "numpy",
@@ -39,6 +42,7 @@ dependencies = [
   "loguru",
   "lightgbm",
   "gym",
+  "osqp<1.0.2",
   "cvxpy",
   "joblib",
   "matplotlib",
diff --git a/qlib/data/dataset/loader.py b/qlib/data/dataset/loader.py
index fab194433..d283cb4f6 100644
--- a/qlib/data/dataset/loader.py
+++ b/qlib/data/dataset/loader.py
@@ -339,6 +339,10 @@ class NestedDataLoader(DataLoader):
             if df_full is None:
                 df_full = df_current
             else:
+                current_columns = df_current.columns.tolist()
+                full_columns = df_full.columns.tolist()
+                columns_to_drop = [col for col in current_columns if col in full_columns]
+                df_full.drop(columns=columns_to_drop, inplace=True)
                 df_full = pd.merge(df_full, df_current, left_index=True, right_index=True, how=self.join)
         return df_full.sort_index(axis=1)