From e7a1b5ea1ff4412792eeb606a639e0dde422053a Mon Sep 17 00:00:00 2001 From: Linlang <30293408+SunsetWolf@users.noreply.github.com> Date: Wed, 2 Apr 2025 18:50:52 +0800 Subject: [PATCH] fix col name error when fetch data (#1904) * fix col name error when fetch data * fix col name error when fetch data * fix install qlib error * optimize code * optimize code * optimize code --- pyproject.toml | 4 ++++ qlib/data/dataset/loader.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 275d632be..b7dc17187 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,9 @@ description = "A Quantitative-research Platform" requires-python = ">=3.8.0" readme = {file = "README.md", content-type = "text/markdown"} +# On 2025-04-02 osqp released version 1.0.2, osqp is used as a dependency for cvxpy. +# It would lead to errors installing qlib, so we limited the version of osqp. +# refs: https://github.com/osqp/osqp/issues/728 dependencies = [ "pyyaml", "numpy", @@ -39,6 +42,7 @@ dependencies = [ "loguru", "lightgbm", "gym", + "osqp<1.0.2", "cvxpy", "joblib", "matplotlib", diff --git a/qlib/data/dataset/loader.py b/qlib/data/dataset/loader.py index fab194433..d283cb4f6 100644 --- a/qlib/data/dataset/loader.py +++ b/qlib/data/dataset/loader.py @@ -339,6 +339,10 @@ class NestedDataLoader(DataLoader): if df_full is None: df_full = df_current else: + current_columns = df_current.columns.tolist() + full_columns = df_full.columns.tolist() + columns_to_drop = [col for col in current_columns if col in full_columns] + df_full.drop(columns=columns_to_drop, inplace=True) df_full = pd.merge(df_full, df_current, left_index=True, right_index=True, how=self.join) return df_full.sort_index(axis=1)