optimize log

Merge pull request #1609 from microsoft/xuyang1/finetune_prompts
finetune prompts
2026-07-27 22:12:47 +08:00 · 2023-07-20 12:45:07 +08:00 · 2023-07-19 20:01:07 +08:00 · 2023-07-19 20:00:09 +08:00 · 2023-07-18 21:47:58 +08:00 · 2023-07-18 11:52:43 +08:00
169 changed files with 5623 additions and 3472 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,8 +0,0 @@
-__pycache__
-*.pyc
-*.pyo
-*.pyd
-.Python
-.env
-.git
-
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -12,54 +12,53 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [windows-latest, macos-13, macos-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
-        exclude:
-          - os: macos-13
-            python-version: "3.11"
-          - os: macos-13
-            python-version: "3.12"
+        os: [windows-latest, macos-11]
+        # FIXME:  macos-latest will raise error now.
+        # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
+        python-version: [3.7, 3.8]

    steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install dependencies
      run: |
-        make dev
-    - name: Build wheel on ${{ matrix.os }}
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine
+    - name: Build wheel on Windows
      run: |
-        make build
-    - name: Upload to PyPi
+        pip install numpy
+        pip install cython
+        python setup.py bdist_wheel
+    - name: Build and publish
      env:
-        TWINE_USERNAME: __token__
-        TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
      run: |
-        twine check dist/*.whl
-        twine upload dist/*.whl --verbose
+        twine upload dist/*

  deploy_with_manylinux:
    runs-on: ubuntu-latest
    steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
-      with:
-        python-version: ${{ matrix.python-version }}
+    - uses: actions/checkout@v2
    - name: Build wheel on Linux
-      uses: RalfG/python-wheels-manylinux-build@v0.7.1-manylinux2014_x86_64
+      uses: RalfG/python-wheels-manylinux-build@v0.3.1-manylinux2010_x86_64
      with:
-        python-versions: 'cp38-cp38 cp39-cp39 cp310-cp310 cp311-cp311 cp312-cp312'
+        # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
+        python-versions: 'cp37-cp37m cp38-cp38'
        build-requirements: 'numpy cython'
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.7
    - name: Install dependencies
      run: |
-        python -m pip install twine
-    - name: Upload to PyPi
+        pip install twine  
+    - name: Build and publish
      env:
-        TWINE_USERNAME: __token__
-        TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
      run: |
-        twine check dist/pyqlib-*-manylinux*.whl
-        twine upload dist/pyqlib-*-manylinux*.whl --verbose
+        twine upload dist/pyqlib-*-manylinux*.whl
--- a/.github/workflows/release-drafter.yml
+++ b/.github/workflows/release-drafter.yml
@@ -6,14 +6,8 @@ on:
    branches:
      - main

-permissions:
-  contents: read
-
 jobs:
  update_release_draft:
-    permissions:
-      contents: write
-      pull-requests: read
    runs-on: ubuntu-latest
    steps:
      # Drafts your next Release notes as Pull Requests are merged into "master"
--- a/.github/workflows/test_qlib_from_pip.yml
+++ b/.github/workflows/test_qlib_from_pip.yml
@@ -8,22 +8,31 @@ on:

 jobs:
  build:
+    if: ${{ false }}  #  FIXME: temporarily disable... Due to we are rushing a feature
    timeout-minutes: 120

    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-13, macos-14, macos-15]
-        # In github action, using python 3.7, pip install will not match the latest version of the package.
-        # Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
-        # All things considered, we have removed python 3.7.
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
+        # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
+        python-version: [3.7, 3.8]

    steps:
    - name: Test qlib from pip
      uses: actions/checkout@v3

+    # Since version 3.7 of python for MacOS is installed in CI, version 3.7.17, this version causes "_bz not found error".
+    # So we make the version number of python 3.7 for MacOS more specific.
+    # refs: https://github.com/actions/setup-python/issues/682
    - name: Set up Python ${{ matrix.python-version }}
+      if: (matrix.os == 'macos-latest' && matrix.python-version == '3.7') || (matrix.os == 'macos-11' && matrix.python-version == '3.7')
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.7.16"
+
+    - name: Set up Python ${{ matrix.python-version }}
+      if: (matrix.os != 'macos-latest' || matrix.python-version != '3.7') && (matrix.os != 'macos-11' || matrix.python-version != '3.7')
      uses: actions/setup-python@v4
      with:
        python-version: ${{ matrix.python-version }}
@@ -35,9 +44,12 @@ jobs:
    - name: Qlib installation test
      run: |
        python -m pip install pyqlib
+        # Specify the numpy version because the numpy upgrade caused the CI test to fail, 
+        # and this line of code will be removed when the next version of qlib is released.
+        python -m pip install "numpy<1.23"

    - name: Install Lightgbm for MacOS
-      if: ${{ matrix.os == 'macos-13' || matrix.os == 'macos-14' || matrix.os == 'macos-15' }}
+      if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }}
      run: |
        /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)"
        HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm
--- a/.github/workflows/test_qlib_from_source.yml
+++ b/.github/workflows/test_qlib_from_source.yml
@@ -14,27 +14,37 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-13, macos-14, macos-15]
-        # In github action, using python 3.7, pip install will not match the latest version of the package.
-        # Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
-        # All things considered, we have removed python 3.7.
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
+        # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
+        python-version: [3.7, 3.8]

    steps:
    - name: Test qlib from source
      uses: actions/checkout@v3

+    # Since version 3.7 of python for MacOS is installed in CI, version 3.7.17, this version causes "_bz not found error".
+    # So we make the version number of python 3.7 for MacOS more specific.
+    # refs: https://github.com/actions/setup-python/issues/682
    - name: Set up Python ${{ matrix.python-version }}
+      if: (matrix.os == 'macos-latest' && matrix.python-version == '3.7') || (matrix.os == 'macos-11' && matrix.python-version == '3.7')
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.7.16"
+
+    - name: Set up Python ${{ matrix.python-version }}
+      if: (matrix.os != 'macos-latest' || matrix.python-version != '3.7') && (matrix.os != 'macos-11' || matrix.python-version != '3.7')
      uses: actions/setup-python@v4
      with:
        python-version: ${{ matrix.python-version }}

    - name: Update pip to the latest version
+      # pip release version 23.1 on Apr.15 2023, CI failed to run, Please refer to #1495 ofr detailed logs.
+      # The pip version has been temporarily fixed to 23.0
      run: |
-        python -m pip install --upgrade pip
+        python -m pip install pip==23.0

    - name: Installing pytorch for macos
-      if: ${{ matrix.os == 'macos-13' || matrix.os == 'macos-14' || matrix.os == 'macos-15' }}
+      if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }}
      run: |
        python -m pip install torch torchvision torchaudio

@@ -50,33 +60,84 @@ jobs:

    - name: Set up Python tools
      run: |
-        make dev
+        python -m pip install --upgrade cython
+        python -m pip install -e .[dev]

    - name: Lint with Black
+      # Python 3.7 will use a black with low level. So we use python with higher version for black check
+      if: (matrix.python-version != '3.7')
      run: |
-        make black
+        pip install -U black  # follow the latest version of black, previous Qlib dependency will downgrade black
+        black . -l 120 --check --diff

    - name: Make html with sphinx
-      # Since read the docs builds on ubuntu 22.04, we only need to test that the build passes on ubuntu 22.04.
-      if: ${{ matrix.os == 'ubuntu-22.04' }}
      run: |
-        make docs-gen
+        cd docs 
+        sphinx-build -W --keep-going -b html . _build
+        cd ..

+    # Check Qlib with pylint
+    # TODO: These problems we will solve in the future. Important among them are: W0221, W0223, W0237, E1102
+      # C0103: invalid-name
+      # C0209: consider-using-f-string
+      # R0402: consider-using-from-import
+      # R1705: no-else-return
+      # R1710: inconsistent-return-statements
+      # R1725: super-with-arguments
+      # R1735: use-dict-literal
+      # W0102: dangerous-default-value
+      # W0212: protected-access
+      # W0221: arguments-differ
+      # W0223: abstract-method
+      # W0231: super-init-not-called
+      # W0237: arguments-renamed
+      # W0612: unused-variable
+      # W0621: redefined-outer-name
+      # W0622: redefined-builtin
+      # FIXME: specify exception type
+      # W0703: broad-except
+      # W1309: f-string-without-interpolation
+      # E1102: not-callable
+      # E1136: unsubscriptable-object
+    # References for parameters: https://github.com/PyCQA/pylint/issues/4577#issuecomment-1000245962
+    # We use sys.setrecursionlimit(2000) to make the recursion depth larger to ensure that pylint works properly (the default recursion depth is 1000).
    - name: Check Qlib with pylint
      run: |
-        make pylint
+        pylint --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,E0401,E1121,C0103,C0209,R0402,R1705,R1710,R1725,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0612,W0621,W0622,W0703,W1309,E1102,E1136 --const-rgx='[a-z_][a-z0-9_]{2,30}$' qlib --init-hook "import astroid; astroid.context.InferenceContext.max_inferred = 500; import sys; sys.setrecursionlimit(2000)"

+    # The following flake8 error codes were ignored:
+      # E501 line too long
+        # Description: We have used black to limit the length of each line to 120.
+      # F541 f-string is missing placeholders
+        # Description: The same thing is done when using pylint for detection.
+      # E266 too many leading '#' for block comment
+        # Description: To make the code more readable, a lot of "#" is used.
+        # This error code appears centrally in:
+          # qlib/backtest/executor.py
+          # qlib/data/ops.py
+          # qlib/utils/__init__.py
+      # E402 module level import not at top of file
+        # Description: There are times when module level import is not available at the top of the file.
+      # W503 line break before binary operator
+        # Description: Since black formats the length of each line of code, it has to perform a line break when a line of arithmetic is too long.
+      # E731 do not assign a lambda expression, use a def
+        # Description: Restricts the use of lambda expressions, but at some point lambda expressions are required.
+      # E203 whitespace before ':'
+        # Description: If there is whitespace before ":", it cannot pass the black check.
    - name: Check Qlib with flake8
      run: |
-        make flake8
+        flake8 --ignore=E501,F541,E266,E402,W503,E731,E203 --per-file-ignores="__init__.py:F401,F403" qlib

+    # https://github.com/python/mypy/issues/10600
    - name: Check Qlib with mypy
      run: |
-        make mypy
+        mypy qlib --install-types --non-interactive || true
+        mypy qlib --verbose
    
    - name: Check Qlib ipynb with nbqa
      run: |
-        make nbqa
+        nbqa black . -l 120 --check --diff
+        nbqa pylint . --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,E0401,E1121,C0103,C0209,R0402,R1705,R1710,R1725,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0612,W0621,W0622,W0703,W1309,E1102,E1136,W0719,W0104,W0404,C0412,W0611,C0410 --const-rgx='[a-z_][a-z0-9_]{2,30}$'

    - name: Test data downloads
      run: |
@@ -84,7 +145,7 @@ jobs:
        python scripts/get_data.py download_data --file_name rl_data.zip --target_dir tests/.data/rl

    - name: Install Lightgbm for MacOS
-      if: ${{ matrix.os == 'macos-13' || matrix.os == 'macos-14' || matrix.os == 'macos-15' }}
+      if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }}
      run: |
        /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)"
        HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm
@@ -94,9 +155,11 @@ jobs:
        brew unlink libomp
        brew install libomp.rb

+    # Run after data downloads
    - name: Check Qlib ipynb with nbconvert
      run: |
-        make nbconvert
+        # add more ipynb files in future
+        jupyter nbconvert --to notebook --execute examples/workflow_by_code.ipynb

    - name: Test workflow by config (install from source)
      run: |
--- a/.github/workflows/test_qlib_from_source_slow.yml
+++ b/.github/workflows/test_qlib_from_source_slow.yml
@@ -14,31 +14,43 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-13, macos-14, macos-15]
-        # In github action, using python 3.7, pip install will not match the latest version of the package.
-        # Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
-        # All things considered, we have removed python 3.7.
-        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
+        # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
+        python-version: [3.7, 3.8]

    steps:
    - name: Test qlib from source slow
      uses: actions/checkout@v3

+    # Since version 3.7 of python for MacOS is installed in CI, version 3.7.17, this version causes "_bz not found error".
+    # So we make the version number of python 3.7 for MacOS more specific.
+    # refs: https://github.com/actions/setup-python/issues/682
    - name: Set up Python ${{ matrix.python-version }}
+      if: (matrix.os == 'macos-latest' && matrix.python-version == '3.7') || (matrix.os == 'macos-11' && matrix.python-version == '3.7')
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.7.16"
+
+    - name: Set up Python ${{ matrix.python-version }}
+      if: (matrix.os != 'macos-latest' || matrix.python-version != '3.7') && (matrix.os != 'macos-11' || matrix.python-version != '3.7')
      uses: actions/setup-python@v4
      with:
        python-version: ${{ matrix.python-version }}

    - name: Set up Python tools
+      # pip release version 23.1 on Apr.15 2023, CI failed to run, Please refer to #1495 ofr detailed logs.
+      # The pip version has been temporarily fixed to 23.0
      run: |
-        make dev
+        python -m pip install pip==23.0
+        pip install --upgrade cython numpy
+        pip install -e .[dev]

    - name: Downloads dependencies data
      run: |
        python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn

    - name: Install Lightgbm for MacOS
-      if: ${{ matrix.os == 'macos-13' || matrix.os == 'macos-14' || matrix.os == 'macos-15' }}
+      if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }}
      run: |
        /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)"
        HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,10 @@ dist/
 qlib/VERSION.txt
 qlib/data/_libs/expanding.cpp
 qlib/data/_libs/rolling.cpp
+qlib/finco/prompt_cache.json
+qlib/finco/finco_workspace/
+qlib/finco/knowledge/*/knowledge.pkl
+qlib/finco/knowledge/*/storage.yml
 examples/estimator/estimator_example/
 examples/rl/data/
 examples/rl/checkpoints/
@@ -49,4 +53,3 @@ tags

 ./pretrain
 .idea/
-.aider*
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -5,12 +5,6 @@
 # Required
 version: 2

-# Set the version of Python and other tools you might need
-build:
-  os: ubuntu-22.04
-  tools:
-    python: "3.8"
-
 # Build documentation in the docs/ directory with Sphinx
 sphinx:
  configuration: docs/conf.py
@@ -20,6 +14,7 @@ formats: all

 # Optionally set the version of Python and requirements required to build your docs
 python:
+  version: 3.7
  install:
    - requirements: docs/requirements.txt
    - method: pip
--- a/31
+++ b/31
@@ -1,31 +0,0 @@
-FROM continuumio/miniconda3:latest
-
-WORKDIR /qlib
-
-COPY . .
-
-RUN apt-get update && \
-    apt-get install -y build-essential
-
-RUN conda create --name qlib_env python=3.8 -y
-RUN echo "conda activate qlib_env" >> ~/.bashrc
-ENV PATH /opt/conda/envs/qlib_env/bin:$PATH
-
-RUN python -m pip install --upgrade pip
-
-RUN python -m pip install numpy==1.23.5
-RUN python -m pip install pandas==1.5.3
-RUN python -m pip install importlib-metadata==5.2.0
-RUN python -m pip install "cloudpickle<3"
-RUN python -m pip install scikit-learn==1.3.2
-
-RUN python -m pip install cython packaging tables matplotlib statsmodels
-RUN python -m pip install pybind11 cvxpy
-
-ARG IS_STABLE="yes"
-
-RUN if [ "$IS_STABLE" = "yes" ]; then \
-        python -m pip install pyqlib; \
-    else \
-        python setup.py install; \
-    fi
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,6 +1 @@
-exclude tests/*
-include qlib/*
-include qlib/*/*
-include qlib/*/*/*
-include qlib/*/*/*/*
-include qlib/*/*/*/*/*
+include qlib/VERSION.txt
--- a/195
+++ b/195
@@ -1,195 +0,0 @@
-.PHONY: clean deepclean prerequisite dependencies lightgbm rl develop lint docs package test analysis all install dev black pylint flake8 mypy nbqa nbconvert lint build upload docs-gen
-#You can modify it according to your terminal
-SHELL := /bin/bash
-
-########################################################################################
-# Variables
-########################################################################################
-
-# Documentation target directory, will be adapted to specific folder for readthedocs.
-PUBLIC_DIR := $(shell [ "$$READTHEDOCS" = "True" ] && echo "$$READTHEDOCS_OUTPUT/html" || echo "public")
-
-SO_DIR := qlib/data/_libs
-SO_FILES := $(wildcard $(SO_DIR)/*.so)
-
-########################################################################################
-# Development Environment Management
-########################################################################################
-# Remove common intermediate files.
-clean:
-	-rm -rf \
-		$(PUBLIC_DIR) \
-		qlib/data/_libs/*.cpp \
-		qlib/data/_libs/*.so \
-		mlruns \
-		public \
-		build \
-		.coverage \
-		.mypy_cache \
-		.pytest_cache \
-		.ruff_cache \
-		Pipfile* \
-		coverage.xml \
-		dist \
-		release-notes.md
-
-	find . -name '*.egg-info' -print0 | xargs -0 rm -rf
-	find . -name '*.pyc' -print0 | xargs -0 rm -f
-	find . -name '*.swp' -print0 | xargs -0 rm -f
-	find . -name '.DS_Store' -print0 | xargs -0 rm -f
-	find . -name '__pycache__' -print0 | xargs -0 rm -rf
-
-# Remove pre-commit hook, virtual environment alongside itermediate files.
-deepclean: clean
-	if command -v pre-commit > /dev/null 2>&1; then pre-commit uninstall --hook-type pre-push; fi
-	if command -v pipenv >/dev/null 2>&1 && pipenv --venv >/dev/null 2>&1; then pipenv --rm; fi
-
-# Prerequisite section
-# What this code does is compile two Cython modules, rolling and expanding, using setuptools and Cython,
-# and builds them as binary expansion modules that can be imported directly into Python.
-# Since pyproject.toml can't do that, we compile it here.
-prerequisite:
-	@if [ -n "$(SO_FILES)" ]; then \
-		echo "Shared library files exist, skipping build."; \
-	else \
-		echo "No shared library files found, building..."; \
-		pip install --upgrade setuptools wheel; \
-		python -m pip install cython numpy; \
-		python -c "from setuptools import setup, Extension; from Cython.Build import cythonize; import numpy; extensions = [Extension('qlib.data._libs.rolling', ['qlib/data/_libs/rolling.pyx'], language='c++', include_dirs=[numpy.get_include()]), Extension('qlib.data._libs.expanding', ['qlib/data/_libs/expanding.pyx'], language='c++', include_dirs=[numpy.get_include()])]; setup(ext_modules=cythonize(extensions, language_level='3'), script_args=['build_ext', '--inplace'])"; \
-	fi
-
-# Install the package in editable mode.
-dependencies:
-	python -m pip install -e .
-
-lightgbm:
-	python -m pip install lightgbm --prefer-binary
-
-rl:
-	python -m pip install -e .[rl]
-
-develop:
-	python -m pip install -e .[dev]
-
-lint:
-	python -m pip install -e .[lint]
-
-docs:
-	python -m pip install -e .[docs]
-
-package:
-	python -m pip install -e .[package]
-
-test:
-	python -m pip install -e .[test]
-
-analysis:
-	python -m pip install -e .[analysis]
-
-all:
-	python -m pip install -e .[dev,lint,docs,package,test,analysis,rl]
-
-install: prerequisite dependencies
-
-dev: prerequisite all
-
-########################################################################################
-# Lint and pre-commit
-########################################################################################
-
-# Check lint with black.
-black:
-	black . -l 120 --check --diff
-
-# Check code folder with pylint.
-# TODO: These problems we will solve in the future. Important among them are: W0221, W0223, W0237, E1102
-# 	C0103: invalid-name
-# 	C0209: consider-using-f-string
-# 	R0402: consider-using-from-import
-# 	R1705: no-else-return
-# 	R1710: inconsistent-return-statements
-# 	R1725: super-with-arguments
-# 	R1735: use-dict-literal
-# 	W0102: dangerous-default-value
-# 	W0212: protected-access
-# 	W0221: arguments-differ
-# 	W0223: abstract-method
-# 	W0231: super-init-not-called
-# 	W0237: arguments-renamed
-# 	W0612: unused-variable
-# 	W0621: redefined-outer-name
-# 	W0622: redefined-builtin
-# 	FIXME: specify exception type
-# 	W0703: broad-except
-# 	W1309: f-string-without-interpolation
-# 	E1102: not-callable
-# 	E1136: unsubscriptable-object
-# 	W4904: deprecated-class
-# 	R0917: too-many-positional-arguments
-# 	E1123: unexpected-keyword-arg
-# References for disable error: https://pylint.pycqa.org/en/latest/user_guide/messages/messages_overview.html
-# We use sys.setrecursionlimit(2000) to make the recursion depth larger to ensure that pylint works properly (the default recursion depth is 1000).
-# References for parameters: https://github.com/PyCQA/pylint/issues/4577#issuecomment-1000245962
-pylint:
-	pylint --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,W4904,E0401,E1121,C0103,C0209,R0402,R1705,R1710,R1725,R1730,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0612,W0621,W0622,W0703,W1309,E1102,E1136 --const-rgx='[a-z_][a-z0-9_]{2,30}' qlib --init-hook="import astroid; astroid.context.InferenceContext.max_inferred = 500; import sys; sys.setrecursionlimit(2000)"
-	pylint --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,E0401,E1121,E1123,C0103,C0209,R0402,R1705,R1710,R1725,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0246,W0612,W0621,W0622,W0703,W1309,E1102,E1136 --const-rgx='[a-z_][a-z0-9_]{2,30}' scripts --init-hook="import astroid; astroid.context.InferenceContext.max_inferred = 500; import sys; sys.setrecursionlimit(2000)"
-
-# Check code with flake8.
-# The following flake8 error codes were ignored:
-# E501 line too long
-# 	Description: We have used black to limit the length of each line to 120.
-# F541 f-string is missing placeholders
-# 	Description: The same thing is done when using pylint for detection.
-# E266 too many leading '#' for block comment
-# 	Description: To make the code more readable, a lot of "#" is used.
-#         This error code appears centrally in:
-# 			qlib/backtest/executor.py
-# 			qlib/data/ops.py
-# 			qlib/utils/__init__.py
-# E402 module level import not at top of file
-# 	Description: There are times when module level import is not available at the top of the file.
-# W503 line break before binary operator
-# 	Description: Since black formats the length of each line of code, it has to perform a line break when a line of arithmetic is too long.
-# E731 do not assign a lambda expression, use a def
-# 	Description: Restricts the use of lambda expressions, but at some point lambda expressions are required.
-# E203 whitespace before ':'
-# 	Description: If there is whitespace before ":", it cannot pass the black check.
-flake8:
-	flake8 --ignore=E501,F541,E266,E402,W503,E731,E203 --per-file-ignores="__init__.py:F401,F403" qlib
-
-# Check code with mypy.
-# https://github.com/python/mypy/issues/10600
-mypy:
-	mypy qlib --install-types --non-interactive
-	mypy qlib --verbose
-
-# Check ipynb with nbqa.
-nbqa:
-	nbqa black . -l 120 --check --diff
-	nbqa pylint . --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,E0401,E1121,C0103,C0209,R0402,R1705,R1710,R1725,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0612,W0621,W0622,W0703,W1309,E1102,E1136,W0719,W0104,W0404,C0412,W0611,C0410 --const-rgx='[a-z_][a-z0-9_]{2,30}'
-
-# Check ipynb with nbconvert.(Run after data downloads)
-# TODO: Add more ipynb files in future
-nbconvert:
-	jupyter nbconvert --to notebook --execute examples/workflow_by_code.ipynb
-
-lint: black pylint flake8 mypy nbqa
-
-########################################################################################
-# Package
-########################################################################################
-
-# Build the package.
-build:
-	python -m build --wheel
-
-# Upload the package.
-upload:
-	python -m twine upload dist/*
-
-########################################################################################
-# Documentation
-########################################################################################
-
-docs-gen:
-	python -m sphinx.cmd.build -W docs $(PUBLIC_DIR)
--- a/README.md
+++ b/README.md
@@ -8,30 +8,9 @@
 [![Join the chat at https://gitter.im/Microsoft/qlib](https://badges.gitter.im/Microsoft/qlib.svg)](https://gitter.im/Microsoft/qlib?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

 ## :newspaper: **What's NEW!** &nbsp;   :sparkling_heart: 
-
 Recent released features
-
-### Introducing <a href="https://github.com/microsoft/RD-Agent"><img src="docs/_static/img/rdagent_logo.png" alt="RD_Agent" style="height: 2em"></a>: LLM-Based Autonomous Evolving Agents for Industrial Data-Driven R&D
-
-We are excited to announce the release of **RD-Agent**📢, a powerful tool that supports automated factor mining and model optimization in quant investment R&D.
-
-RD-Agent is now available on [GitHub](https://github.com/microsoft/RD-Agent), and we welcome your star🌟!
-
-To learn more, please visit our [♾️Demo page](https://rdagent.azurewebsites.net/). Here, you will find demo videos in both English and Chinese to help you better understand the scenario and usage of RD-Agent.
-
-We have prepared several demo videos for you:
-| Scenario | Demo video (English) | Demo video (中文) |
-| --                      | ------    | ------    |
-| Quant Factor Mining | [Link](https://rdagent.azurewebsites.net/factor_loop?lang=en) | [Link](https://rdagent.azurewebsites.net/factor_loop?lang=zh) |
-| Quant Factor Mining from reports | [Link](https://rdagent.azurewebsites.net/report_factor?lang=en) | [Link](https://rdagent.azurewebsites.net/report_factor?lang=zh) |
-| Quant Model Optimization | [Link](https://rdagent.azurewebsites.net/model_loop?lang=en) | [Link](https://rdagent.azurewebsites.net/model_loop?lang=zh) |
-
-***
-
 | Feature | Status |
 | --                      | ------    |
-| BPQP for End-to-end learning | 📈Coming soon!([Under review](https://github.com/microsoft/qlib/pull/1863)) |
-| 🔥LLM-driven Auto Quant Factory🔥 | 🚀 Released in [♾️RD-Agent](https://github.com/microsoft/RD-Agent) on Aug 8, 2024 |
 | KRNN and Sandwich models | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/1414/) on May 26, 2023 |
 | Release Qlib v0.9.0 | :octocat: [Released](https://github.com/microsoft/qlib/releases/tag/v0.9.0) on Dec 9, 2022 |
 | RL Learning Framework | :hammer: :chart_with_upwards_trend: Released on Nov 10, 2022. [#1332](https://github.com/microsoft/qlib/pull/1332), [#1322](https://github.com/microsoft/qlib/pull/1322), [#1316](https://github.com/microsoft/qlib/pull/1316),[#1299](https://github.com/microsoft/qlib/pull/1299),[#1263](https://github.com/microsoft/qlib/pull/1263), [#1244](https://github.com/microsoft/qlib/pull/1244), [#1169](https://github.com/microsoft/qlib/pull/1169), [#1125](https://github.com/microsoft/qlib/pull/1125), [#1076](https://github.com/microsoft/qlib/pull/1076)|
@@ -61,7 +40,7 @@ We have prepared several demo videos for you:
 Features released before 2021 are not listed here.

 <p align="center">
-  <img src="docs/_static/img/logo/1.png" />
+  <img src="http://fintech.msra.cn/images_v070/logo/1.png" />
 </p>

 Qlib is an open-source, AI-oriented quantitative investment platform that aims to realize the potential, empower research, and create value using AI technologies in quantitative investment, from exploring ideas to implementing productions. Qlib supports diverse machine learning modeling paradigms, including supervised learning, market dynamics modeling, and reinforcement learning.
@@ -153,18 +132,17 @@ Here is a quick **[demo](https://terminalizer.com/view/3f24561a4470)** shows how
 ## Installation

 This table demonstrates the supported Python version of `Qlib`:
-|               | install with pip      | install from source  |        plot        |
-| ------------- |:---------------------:|:--------------------:|:------------------:|
+|               | install with pip           | install from source  | plot |
+| ------------- |:---------------------:|:--------------------:|:----:|
+| Python 3.7    | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |
 | Python 3.8    | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |
-| Python 3.9    | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |
-| Python 3.10   | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |
-| Python 3.11   | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |
-| Python 3.12   | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |
+| Python 3.9    | :x:                   | :heavy_check_mark:   | :x: |

 **Note**: 
-1. **Conda** is suggested for managing your Python environment. In some cases, using Python outside of a `conda` environment may result in missing header files, causing the installation failure of certain packages.
-2. Please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.8 or higher, or use `conda`'s Python to install ``Qlib`` from source.
-3. For Python 3.9, `Qlib` supports running workflows such as training models, doing backtest and plot most of the related figures (those included in [notebook](examples/workflow_by_code.ipynb)). However, plotting for the *model performance* is not supported for now and we will fix this when the dependent packages are upgraded in the future.
+1. **Conda** is suggested for managing your Python environment.
+1. Please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.7 or use `conda`'s Python to install ``Qlib`` from source.
+1. For Python 3.9, `Qlib` supports running workflows such as training models, doing backtest and plot most of the related figures (those included in [notebook](examples/workflow_by_code.ipynb)). However, plotting for the *model performance* is not supported for now and we will fix this when the dependent packages are upgraded in the future.
+1. `Qlib`Requires `tables` package, `hdf5` in tables does not support python3.9. 

 ### Install with pip
 Users can easily install ``Qlib`` by pip according to the following command.
@@ -182,34 +160,19 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor

  ```bash
  pip install numpy
-  pip install --upgrade cython
+  pip install --upgrade  cython
  ```

 * Clone the repository and install ``Qlib`` as follows.
    ```bash
    git clone https://github.com/microsoft/qlib.git && cd qlib
-    pip install .  # `pip install -e .[dev]` is recommended for development. check details in docs/developer/code_standard_and_dev_guide.rst
+    pip install .
    ```
+  **Note**:  You can install Qlib with `python setup.py install` as well. But it is not the recommended approach. It will skip `pip` and cause obscure problems. For example, **only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, while the command ``python setup.py install`` **can't**.

 **Tips**: If you fail to install `Qlib` or run the examples in your environment,  comparing your steps and the [CI workflow](.github/workflows/test_qlib_from_source.yml) may help you find the problem.

-**Tips for Mac**: If you are using Mac with M1, you might encounter issues in building the wheel for LightGBM, which is due to missing dependencies from OpenMP. To solve the problem, install openmp first with ``brew install libomp`` and then run ``pip install .`` to build it successfully. 
-
 ## Data Preparation
-❗ Due to more restrict data security policy. The offical dataset is disabled temporarily. You can try [this data source](https://github.com/chenditc/investment_data/releases) contributed by the community.
-Here is an example to download the data updated on 20240809.
-```bash
-wget https://github.com/chenditc/investment_data/releases/download/2024-08-09/qlib_bin.tar.gz
-mkdir -p ~/.qlib/qlib_data/cn_data
-tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=1
-rm -f qlib_bin.tar.gz
-```
-
-The official dataset below will resume in short future.
-
-
----
-
 Load and prepare data by running the following code:

 ### Get with module
@@ -293,38 +256,6 @@ We recommend users to prepare their own data if they have a high-quality dataset
  ```
 -->

-## Docker images
-1. Pulling a docker image from a docker hub repository
-    ```bash
-    docker pull pyqlib/qlib_image_stable:stable
-    ```
-2. Start a new Docker container
-    ```bash
-    docker run -it --name <container name> -v <Mounted local directory>:/app qlib_image_stable
-    ```
-3. At this point you are in the docker environment and can run the qlib scripts. An example:
-    ```bash
-    >>> python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
-    >>> python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
-    ```
-4. Exit the container
-    ```bash
-    >>> exit
-    ```
-5. Restart the container
-    ```bash
-    docker start -i -a <container name>
-    ```
-6. Stop the container
-    ```bash
-    docker stop <container name>
-    ```
-7. Delete the container
-    ```bash
-    docker rm <container name>
-    ```
-8. If you want to know more information, please refer to the [documentation](https://qlib.readthedocs.io/en/latest/developer/how_to_build_image.html).
-
 ## Auto Quant Research Workflow
 Qlib provides a tool named `qrun` to run the whole workflow automatically (including building dataset, training models, backtest and evaluation). You can start an auto quant research workflow and have a graphical reports analysis according to the following steps: 

@@ -358,22 +289,22 @@ Qlib provides a tool named `qrun` to run the whole workflow automatically (inclu
    ```
    Here are detailed documents for `qrun` and [workflow](https://qlib.readthedocs.io/en/latest/component/workflow.html).

-2. Graphical Reports Analysis: First, run `python -m pip install .[analysis]` to install the required dependencies. Then run `examples/workflow_by_code.ipynb` with `jupyter notebook` to get graphical reports. 
+2. Graphical Reports Analysis: Run `examples/workflow_by_code.ipynb` with `jupyter notebook` to get graphical reports
    - Forecasting signal (model prediction) analysis
      - Cumulative Return of groups
-      ![Cumulative Return](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_cumulative_return.png)
+      ![Cumulative Return](http://fintech.msra.cn/images_v070/analysis/analysis_model_cumulative_return.png?v=0.1)
      - Return distribution
-      ![long_short](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_long_short.png)
+      ![long_short](http://fintech.msra.cn/images_v070/analysis/analysis_model_long_short.png?v=0.1)
      - Information Coefficient (IC)
-      ![Information Coefficient](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_IC.png)
-      ![Monthly IC](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_monthly_IC.png)
-      ![IC](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_NDQ.png)
+      ![Information Coefficient](http://fintech.msra.cn/images_v070/analysis/analysis_model_IC.png?v=0.1)
+      ![Monthly IC](http://fintech.msra.cn/images_v070/analysis/analysis_model_monthly_IC.png?v=0.1)
+      ![IC](http://fintech.msra.cn/images_v070/analysis/analysis_model_NDQ.png?v=0.1)
      - Auto Correlation of forecasting signal (model prediction)
-      ![Auto Correlation](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_auto_correlation.png)
+      ![Auto Correlation](http://fintech.msra.cn/images_v070/analysis/analysis_model_auto_correlation.png?v=0.1)

    - Portfolio analysis
      - Backtest return
-      ![Report](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/report.png)
+      ![Report](http://fintech.msra.cn/images_v070/analysis/report.png?v=0.1)
      <!-- 
      - Score IC
      ![Score IC](docs/_static/img/score_ic.png)
@@ -390,7 +321,7 @@ Qlib provides a tool named `qrun` to run the whole workflow automatically (inclu
 The automatic workflow may not suit the research workflow of all Quant researchers. To support a flexible Quant research workflow, Qlib also provides a modularized interface to allow researchers to build their own workflow by code. [Here](examples/workflow_by_code.ipynb) is a demo for customized Quant research workflow by code.

 # Main Challenges & Solutions in Quant Research
-Quant investment is a very unique scenario with lots of key challenges to be solved.
+Quant investment is an very unique scenario with lots of key challenges to be solved.
 Currently, Qlib provides some solutions for several of them.

 ## Forecasting: Finding Valuable Signals/Patterns
@@ -429,7 +360,7 @@ Here is a list of models built on `Qlib`.

 Your PR of new Quant models is highly welcomed.

-The performance of each model on the `Alpha158` and `Alpha360` datasets can be found [here](examples/benchmarks/README.md).
+The performance of each model on the `Alpha158` and `Alpha360` dataset can be found [here](examples/benchmarks/README.md).

 ### Run a single model
 All the models listed above are runnable with ``Qlib``. Users can find the config files we provide and some details about the model through the [benchmarks](examples/benchmarks) folder. More information can be retrieved at the model files listed above.
@@ -552,7 +483,7 @@ Qlib data are stored in a compact format, which is efficient to be combined into
 Join IM discussion groups:
 |[Gitter](https://gitter.im/Microsoft/qlib)|
 |----|
-|![image](https://github.com/microsoft/qlib/blob/main/docs/_static/img/qrcode/gitter_qr.png)|
+|![image](http://fintech.msra.cn/images_v070/qrcode/gitter_qr.png)|

 # Contributing
 We appreciate all contributions and thank all the contributors!
--- a/build_docker_image.sh
+++ b/build_docker_image.sh
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-docker_user="your_dockerhub_username"
-
-read -p "Do you want to build the nightly version of the qlib image? (default is stable) (yes/no): " answer;
-answer=$(echo "$answer" | tr '[:upper:]' '[:lower:]')
-
-if [ "$answer" = "yes" ]; then
-    # Build the nightly version of the qlib image
-    docker build --build-arg IS_STABLE=no -t qlib_image -f ./Dockerfile .
-    image_tag="nightly"
-else
-    # Build the stable version of the qlib image
-    docker build -t qlib_image -f ./Dockerfile .
-    image_tag="stable"
-fi
-
-read -p "Is it uploaded to docker hub? (default is no) (yes/no): " answer;
-answer=$(echo "$answer" | tr '[:upper:]' '[:lower:]')
-
-if [ "$answer" = "yes" ]; then
-    # Log in to Docker Hub
-    # If you are a new docker hub user, please verify your email address before proceeding with this step.
-    docker login
-    # Tag the Docker image
-    docker tag qlib_image "$docker_user/qlib_image:$image_tag"
-    # Push the Docker image to Docker Hub
-    docker push "$docker_user/qlib_image:$image_tag"
-else
-    echo "Not uploaded to docker hub."
-fi
--- a/docs/_static/img/rdagent_logo.png
+++ b/docs/_static/img/rdagent_logo.png
--- a/docs/component/data.rst
+++ b/docs/component/data.rst
@@ -52,7 +52,7 @@ Also, ``Qlib`` provides a high-frequency dataset. Users can run a high-frequency
 Qlib Format Dataset
 -------------------
 ``Qlib`` has provided an off-the-shelf dataset in `.bin` format, users could use the script ``scripts/get_data.py`` to download the China-Stock dataset as follows. User can also use numpy to load `.bin` file to validate data.
-The price volume data look different from the actual dealing price because of they are **adjusted** (`adjusted price <https://www.investopedia.com/terms/a/adjusted_closing_price.asp>`_).  And then you may find that the adjusted price may be different from different data sources. This is because different data sources may vary in the way of adjusting prices. Qlib normalize the price on first trading day of each stock to 1 when adjusting them.
+The price volume data look different from the actual dealling price because of they are **adjusted** (`adjusted price <https://www.investopedia.com/terms/a/adjusted_closing_price.asp>`_).  And then you may find that the adjusted price may be different from different data sources. This is because different data sources may vary in the way of adjusting prices. Qlib normalize the price on first trading day of each stock to 1 when adjusting them.
 Users can leverage `$factor` to get the original trading price (e.g. `$close / $factor` to get the original close price).

 Here are some discussions about the price adjusting of Qlib. 
@@ -140,13 +140,12 @@ Users can also provide their own data in CSV format. However, the CSV data **mus

        where the data are in the following format:

-            +-----------+-------+
-            | symbol    | close |
-            +===========+=======+
-            | SH600000  | 120   |
-            +-----------+-------+
+        .. code-block::

- CSV file **must** include a column for the date, and when dumping the data, user must specify the date column name. Here is an example:
+            symbol,close
+            SH600000,120
+
+- CSV file **must** includes a column for the date, and when dumping the data, user must specify the date column name. Here is an example:

    .. code-block:: bash

@@ -154,13 +153,11 @@ Users can also provide their own data in CSV format. However, the CSV data **mus

    where the data are in the following format:

-        +---------+------------+-------+------+----------+
-        | symbol  | date       | close | open | volume   |
-        +=========+============+=======+======+==========+
-        | SH600000| 2020-11-01 | 120   | 121  | 12300000 |
-        +---------+------------+-------+------+----------+
-        | SH600000| 2020-11-02 | 123   | 120  | 12300000 |
-        +---------+------------+-------+------+----------+
+    .. code-block::
+
+        symbol,date,close,open,volume
+        SH600000,2020-11-01,120,121,12300000
+        SH600000,2020-11-02,123,120,12300000


 Supposed that users prepare their CSV format data in the directory ``~/.qlib/csv_data/my_data``, they can run the following command to start the conversion.
--- a/docs/component/model.rst
+++ b/docs/component/model.rst
@@ -86,7 +86,7 @@ Example
            },
        }

-        # model initialization
+        # model initiaiton
        model = init_instance_by_config(task["model"])
        dataset = init_instance_by_config(task["dataset"])

--- a/docs/conf.py
+++ b/docs/conf.py
@@ -123,6 +123,7 @@ html_logo = "_static/img/logo/1.png"
 html_theme_options = {
    "logo_only": True,
    "collapse_navigation": False,
+    "display_version": False,
    "navigation_depth": 4,
 }

--- a/docs/developer/code_standard_and_dev_guide.rst
+++ b/docs/developer/code_standard_and_dev_guide.rst
@@ -60,4 +60,4 @@ The `[dev]` option will help you to install some related packages when developin

 .. code-block:: bash

-    pip install -e ".[dev]"
+    pip install -e .[dev]
--- a/docs/developer/how_to_build_image.rst
+++ b/docs/developer/how_to_build_image.rst
@@ -1,81 +0,0 @@
-.. _docker_image:
-
-==================
-Build Docker Image
-==================
-
-Dockerfile
-==========
-
-There is a **Dockerfile** file in the root directory of the project from which you can build the docker image. There are two build methods in Dockerfile to choose from.
-When executing the build command, use the ``--build-arg`` parameter to control the image version. The ``--build-arg`` parameter defaults to ``yes``, which builds the ``stable`` version of the qlib image.
-
-1.For the ``stable`` version, use ``pip install pyqlib`` to build the qlib image.
-
-.. code-block:: bash
-
-    docker build --build-arg IS_STABLE=yes -t <image name> -f ./Dockerfile .
-
-.. code-block:: bash
-
-    docker build -t <image name> -f ./Dockerfile .
-
-2. For the ``nightly`` version, use current source code to build the qlib image.
-
-.. code-block:: bash
-
-    docker build --build-arg IS_STABLE=no -t <image name> -f ./Dockerfile .
-
-Auto build of qlib images
-=========================
-
-1. There is a **build_docker_image.sh** file in the root directory of your project, which can be used to automatically build docker images and upload them to your docker hub repository(Optional, configuration required).
-
-.. code-block:: bash
-
-    sh build_docker_image.sh
-    >>> Do you want to build the nightly version of the qlib image? (default is stable) (yes/no):
-    >>> Is it uploaded to docker hub? (default is no) (yes/no):
-
-2. If you want to upload the built image to your docker hub repository, you need to edit your **build_docker_image.sh** file first, fill in ``docker_user`` in the file, and then execute this file.
-
-How to use qlib images
-======================
-1. Start a new Docker container
-
-.. code-block:: bash
-
-    docker run -it --name <container name> -v <Mounted local directory>:/app <image name>
-
-2. At this point you are in the docker environment and can run the qlib scripts. An example:
-
-.. code-block:: bash
-
-    >>> python scripts/get_data.py qlib_data --name qlib_data_simple --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
-    >>> python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
-
-3. Exit the container
-
-.. code-block:: bash
-
-    >>> exit
-
-4. Restart the container
-
-.. code-block:: bash
-
-    docker start -i -a <container name>
-
-5. Stop the container
-
-.. code-block:: bash
-
-    docker stop -i -a <container name>
-
-6. Delete the container
-
-.. code-block:: bash
-
-    docker rm <container name>
-
-7. For more information on using docker see the `docker documentation <https://docs.docker.com/reference/cli/docker/>`_.
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -61,7 +61,6 @@ Document Structure
   :caption: FOR DEVELOPERS:

   Code Standard & Development Guidance <developer/code_standard_and_dev_guide.rst>
-   How to build image <developer/how_to_build_image.rst>

 .. toctree::
   :maxdepth: 3
--- a/docs/introduction/introduction.rst
+++ b/docs/introduction/introduction.rst
@@ -36,7 +36,7 @@ Name                         Description
                             the training process of models which enable algorithms controlling the
                             training process.

-`Learning Framework` layer   The `Forecast Model` and `Trading Agent` are trainable. They are trained
+`Learning Framework` layer   The `Forecast Model` and `Trading Agent` are learnable. They are learned
                             based on the `Learning Framework` layer and then applied to multiple scenarios
                             in `Workflow` layer. The supported learning paradigms can be categorized into
                             reinforcement learning and supervised learning.  The learning framework
@@ -51,7 +51,7 @@ Name                         Description
                             modules.  With these signals `Decision Generator` will generate the target
                             trading decisions(i.e. portfolio, orders)
                             If RL-based Strategies are adopted, the `Policy` is learned in a end-to-end way,
-                             the trading decisions are generated directly.
+                             the trading deicsions are generated directly.
                             Decisions will be executed by `Execution Env`
                             (i.e. the trading market).  There may be multiple levels of `Strategy`
                             and `Executor` (e.g. an *order executor trading strategy and intraday order executor*
--- a/docs/introduction/quick.rst
+++ b/docs/introduction/quick.rst
@@ -16,7 +16,7 @@ This ``Quick Start`` guide tries to demonstrate
 Installation
 ============

-Users can easily install ``Qlib`` according to the following steps:
+Users can easily intsall ``Qlib`` according to the following steps:

 - Before installing ``Qlib`` from source, users need to install some dependencies:

--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -5,4 +5,3 @@ scipy
 scikit-learn
 pandas
 tianshou
-sphinx_rtd_theme
--- a/examples/benchmarks/GeneralPtNN/README.md
+++ b/examples/benchmarks/GeneralPtNN/README.md
@@ -1,19 +0,0 @@
-
-
-# Introduction
-
-What is GeneralPtNN
- Fix previous design that fail to support both Time-series and tabular data
- Now you can just replace the Pytorch model structure to run a NN model.
-
-We provide an example to demonstrate the effectiveness of the current design.
- `workflow_config_gru.yaml` align with previous results [GRU(Kyunghyun Cho, et al.)](../README.md#Alpha158-dataset)
-  - `workflow_config_gru2mlp.yaml` to demonstrate we can convert config from time-series to tabular data with minimal changes
-    - You only have to change the net & dataset class to make the conversion.
- `workflow_config_mlp.yaml` achieved similar functionality with [MLP](../README.md#Alpha158-dataset)
-
-# TODO
-
- We will align existing models to current design.
-
- The result of `workflow_config_mlp.yaml` is different with the result of [MLP](../README.md#Alpha158-dataset) since GeneralPtNN has a different stopping method compared to previous implementations. Specificly, GeneralPtNN controls training according to epoches, whereas previous methods controlled by max_steps. 
--- a/examples/benchmarks/GeneralPtNN/workflow_config_gru.yaml
+++ b/examples/benchmarks/GeneralPtNN/workflow_config_gru.yaml
@@ -1,100 +0,0 @@
-qlib_init:
-    provider_uri: "~/.qlib/qlib_data/cn_data"
-    region: cn
-market: &market csi300
-benchmark: &benchmark SH000300
-data_handler_config: &data_handler_config
-    start_time: 2008-01-01
-    end_time: 2020-08-01
-    fit_start_time: 2008-01-01
-    fit_end_time: 2014-12-31
-    instruments: *market
-    infer_processors:
-        - class: FilterCol
-          kwargs:
-              fields_group: feature
-              col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", 
-                            "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", 
-                            "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"
-                        ]
-        - class: RobustZScoreNorm
-          kwargs:
-              fields_group: feature
-              clip_outlier: true
-        - class: Fillna
-          kwargs:
-              fields_group: feature
-    learn_processors:
-        - class: DropnaLabel
-        - class: CSRankNorm
-          kwargs:
-              fields_group: label
-    label: ["Ref($close, -2) / Ref($close, -1) - 1"] 
-
-port_analysis_config: &port_analysis_config
-    strategy:
-        class: TopkDropoutStrategy
-        module_path: qlib.contrib.strategy
-        kwargs:
-            signal: <PRED>
-            topk: 50
-            n_drop: 5
-    backtest:
-        start_time: 2017-01-01
-        end_time: 2020-08-01
-        account: 100000000
-        benchmark: *benchmark
-        exchange_kwargs:
-            limit_threshold: 0.095
-            deal_price: close
-            open_cost: 0.0005
-            close_cost: 0.0015
-            min_cost: 5
-task:
-    model:
-        class: GeneralPTNN
-        module_path: qlib.contrib.model.pytorch_general_nn
-        kwargs:
-            n_epochs: 200
-            lr: 2e-4
-            early_stop: 10
-            batch_size: 800
-            metric: loss
-            loss: mse
-            n_jobs: 20
-            GPU: 0
-            pt_model_uri: "qlib.contrib.model.pytorch_gru_ts.GRUModel"
-            pt_model_kwargs: {
-                "d_feat": 20,
-                "hidden_size": 64,
-                "num_layers": 2,
-                "dropout": 0.,
-            }
-    dataset:
-        class: TSDatasetH
-        module_path: qlib.data.dataset
-        kwargs:
-            handler:
-                class: Alpha158
-                module_path: qlib.contrib.data.handler
-                kwargs: *data_handler_config
-            segments:
-                train: [2008-01-01, 2014-12-31]
-                valid: [2015-01-01, 2016-12-31]
-                test: [2017-01-01, 2020-08-01]
-            step_len: 20
-    record: 
-        - class: SignalRecord
-          module_path: qlib.workflow.record_temp
-          kwargs: 
-            model: <MODEL>
-            dataset: <DATASET>
-        - class: SigAnaRecord
-          module_path: qlib.workflow.record_temp
-          kwargs: 
-            ana_long_short: False
-            ann_scaler: 252
-        - class: PortAnaRecord
-          module_path: qlib.workflow.record_temp
-          kwargs: 
-            config: *port_analysis_config
--- a/examples/benchmarks/GeneralPtNN/workflow_config_gru2mlp.yaml
+++ b/examples/benchmarks/GeneralPtNN/workflow_config_gru2mlp.yaml
@@ -1,93 +0,0 @@
-qlib_init:
-    provider_uri: "~/.qlib/qlib_data/cn_data"
-    region: cn
-market: &market csi300
-benchmark: &benchmark SH000300
-data_handler_config: &data_handler_config
-    start_time: 2008-01-01
-    end_time: 2020-08-01
-    fit_start_time: 2008-01-01
-    fit_end_time: 2014-12-31
-    instruments: *market
-    infer_processors:
-        - class: FilterCol
-          kwargs:
-              fields_group: feature
-              col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", 
-                            "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", 
-                            "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"
-                        ]
-        - class: RobustZScoreNorm
-          kwargs:
-              fields_group: feature
-              clip_outlier: true
-        - class: Fillna
-          kwargs:
-              fields_group: feature
-    learn_processors:
-        - class: DropnaLabel
-        - class: CSRankNorm
-          kwargs:
-              fields_group: label
-    label: ["Ref($close, -2) / Ref($close, -1) - 1"] 
-
-port_analysis_config: &port_analysis_config
-    strategy:
-        class: TopkDropoutStrategy
-        module_path: qlib.contrib.strategy
-        kwargs:
-            signal: <PRED>
-            topk: 50
-            n_drop: 5
-    backtest:
-        start_time: 2017-01-01
-        end_time: 2020-08-01
-        account: 100000000
-        benchmark: *benchmark
-        exchange_kwargs:
-            limit_threshold: 0.095
-            deal_price: close
-            open_cost: 0.0005
-            close_cost: 0.0015
-            min_cost: 5
-task:
-    model:
-        class: GeneralPTNN
-        module_path: qlib.contrib.model.pytorch_general_nn
-        kwargs:
-            lr: 1e-3
-            n_epochs: 1
-            batch_size: 800
-            loss: mse
-            optimizer: adam
-            pt_model_uri: "qlib.contrib.model.pytorch_nn.Net"
-            pt_model_kwargs: 
-                input_dim: 20
-                layers: [20,]
-    dataset:
-        class: DatasetH
-        module_path: qlib.data.dataset
-        kwargs:
-            handler:
-                class: Alpha158
-                module_path: qlib.contrib.data.handler
-                kwargs: *data_handler_config
-            segments:
-                train: [2008-01-01, 2014-12-31]
-                valid: [2015-01-01, 2016-12-31]
-                test: [2017-01-01, 2020-08-01]
-    record: 
-        - class: SignalRecord
-          module_path: qlib.workflow.record_temp
-          kwargs: 
-            model: <MODEL>
-            dataset: <DATASET>
-        - class: SigAnaRecord
-          module_path: qlib.workflow.record_temp
-          kwargs: 
-            ana_long_short: False
-            ann_scaler: 252
-        - class: PortAnaRecord
-          module_path: qlib.workflow.record_temp
-          kwargs: 
-            config: *port_analysis_config
--- a/examples/benchmarks/GeneralPtNN/workflow_config_mlp.yaml
+++ b/examples/benchmarks/GeneralPtNN/workflow_config_mlp.yaml
@@ -1,98 +0,0 @@
-qlib_init:
-    provider_uri: "~/.qlib/qlib_data/cn_data"
-    region: cn
-market: &market csi300
-benchmark: &benchmark SH000300
-data_handler_config: &data_handler_config
-    start_time: 2008-01-01
-    end_time: 2020-08-01
-    fit_start_time: 2008-01-01
-    fit_end_time: 2014-12-31
-    instruments: *market
-    infer_processors: [
-        {
-            "class" : "DropCol", 
-            "kwargs":{"col_list": ["VWAP0"]}
-        },
-        {
-             "class" : "CSZFillna", 
-             "kwargs":{"fields_group": "feature"}
-        }
-    ]
-    learn_processors: [
-        {
-            "class" : "DropCol", 
-            "kwargs":{"col_list": ["VWAP0"]}
-        },
-        {
-            "class" : "DropnaProcessor", 
-            "kwargs":{"fields_group": "feature"}
-        },
-        "DropnaLabel",
-        {
-            "class": "CSZScoreNorm", 
-            "kwargs": {"fields_group": "label"}
-        }
-    ]
-    process_type: "independent"
-
-port_analysis_config: &port_analysis_config
-    strategy:
-        class: TopkDropoutStrategy
-        module_path: qlib.contrib.strategy
-        kwargs:
-            signal: <PRED>
-            topk: 50
-            n_drop: 5
-    backtest:
-        start_time: 2017-01-01
-        end_time: 2020-08-01
-        account: 100000000
-        benchmark: *benchmark
-        exchange_kwargs:
-            limit_threshold: 0.095
-            deal_price: close
-            open_cost: 0.0005
-            close_cost: 0.0015
-            min_cost: 5
-task:
-    model:
-        class: GeneralPTNN
-        module_path: qlib.contrib.model.pytorch_general_nn
-        kwargs:
-            # FIXME: wrong parameters.
-            lr: 2e-3
-            batch_size: 8192
-            loss: mse
-            weight_decay: 0.0002
-            optimizer: adam
-            pt_model_uri: "qlib.contrib.model.pytorch_nn.Net"
-            pt_model_kwargs: 
-                input_dim: 157
-    dataset:
-        class: DatasetH
-        module_path: qlib.data.dataset
-        kwargs:
-            handler:
-                class: Alpha158
-                module_path: qlib.contrib.data.handler
-                kwargs: *data_handler_config
-            segments:
-                train: [2008-01-01, 2014-12-31]
-                valid: [2015-01-01, 2016-12-31]
-                test: [2017-01-01, 2020-08-01]
-    record: 
-        - class: SignalRecord
-          module_path: qlib.workflow.record_temp
-          kwargs: 
-            model: <MODEL>
-            dataset: <DATASET>
-        - class: SigAnaRecord
-          module_path: qlib.workflow.record_temp
-          kwargs: 
-            ana_long_short: False
-            ann_scaler: 252
-        - class: PortAnaRecord
-          module_path: qlib.workflow.record_temp
-          kwargs: 
-            config: *port_analysis_config
--- a/examples/benchmarks/README.md
+++ b/examples/benchmarks/README.md
@@ -136,7 +136,7 @@ If you want to contribute your new models, you can follow the steps below.
    - `README.md`: a brief introduction to your models
    - `workflow_config_<model name>_<dataset>.yaml`: a configuration which can read by `qrun`. You are encouraged to run your model in all datasets.
 3. You can integrate your model as a module [in this folder](https://github.com/microsoft/qlib/tree/main/qlib/contrib/model).
-4. Please update your results in the above **Benchmark Tables**, e.g. [Alpha360](#alpha158-dataset), [Alpha158](#alpha158-dataset)(the values of each metric are the mean and std calculated based on **20 Runs** with different random seeds. You can accomplish the above operations through the automated [script](https://github.com/microsoft/qlib/blob/main/examples/run_all_model.py) provided by Qlib, and get the final result in the .md file. if you don't have enough computational resource, you can ask for help in the PR).
+4. Please update your results in the above **Benchmark Tables**, e.g. [Alpha360](#alpha158-dataset), [Alpha158](#alpha158-dataset)(the values of each metric are the mean and std calculated based on **20 Runs** with different random seeds. You can accomplish the above operations through the automated [script](https://github.com/microsoft/qlib/blob/main/examples/run_all_model.py#LL286C22-L286C22) provided by Qlib, and get the final result in the .md file. if you don't have enough computational resource, you can ask for help in the PR).
 5. Update the info in the index page in the [news list](https://github.com/microsoft/qlib#newspaper-whats-new----sparkling_heart) and [model list](https://github.com/microsoft/qlib#quant-model-paper-zoo).

 Finally, you can send PR for review. ([here is an example](https://github.com/microsoft/qlib/pull/1040))
--- a/examples/benchmarks/TRA/example.py
+++ b/examples/benchmarks/TRA/example.py
@@ -1,15 +1,14 @@
 import argparse

 import qlib
-from ruamel.yaml import YAML
+import ruamel.yaml as yaml
 from qlib.utils import init_instance_by_config


 def main(seed, config_file="configs/config_alstm.yaml"):
    # set random seed
    with open(config_file) as f:
-        yaml = YAML(typ="safe", pure=True)
-        config = yaml.load(f)
+        config = yaml.safe_load(f)

    # seed_suffix = "/seed1000" if "init" in config_file else f"/seed{seed}"
    seed_suffix = ""
--- a/examples/benchmarks/TRA/src/model.py
+++ b/examples/benchmarks/TRA/src/model.py
@@ -324,6 +324,7 @@ class TRAModel(Model):


 class LSTM(nn.Module):
+
    """LSTM Model

    Args:
@@ -413,6 +414,7 @@ class PositionalEncoding(nn.Module):


 class Transformer(nn.Module):
+
    """Transformer Model

    Args:
@@ -473,6 +475,7 @@ class Transformer(nn.Module):


 class TRA(nn.Module):
+
    """Temporal Routing Adaptor (TRA)

    TRA takes historical prediction errors & latent representation as inputs,
--- a/examples/benchmarks_dynamic/DDG-DA/workflow.py
+++ b/examples/benchmarks_dynamic/DDG-DA/workflow.py
@@ -1,6 +1,5 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
-import os
 from pathlib import Path
 from typing import Union

@@ -36,10 +35,6 @@ class DDGDABench(DDGDA):


 if __name__ == "__main__":
-    kwargs = {}
-    if os.environ.get("PROVIDER_URI", "") == "":
-        GetData().qlib_data(exists_skip=True)
-    else:
-        kwargs["provider_uri"] = os.environ["PROVIDER_URI"]
-    auto_init(**kwargs)
+    GetData().qlib_data(exists_skip=True)
+    auto_init()
    fire.Fire(DDGDABench)
--- a/examples/benchmarks_dynamic/baseline/rolling_benchmark.py
+++ b/examples/benchmarks_dynamic/baseline/rolling_benchmark.py
@@ -1,6 +1,5 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
-import os
 from pathlib import Path
 from typing import Union

@@ -32,10 +31,6 @@ class RollingBenchmark(Rolling):


 if __name__ == "__main__":
-    kwargs = {}
-    if os.environ.get("PROVIDER_URI", "") == "":
-        GetData().qlib_data(exists_skip=True)
-    else:
-        kwargs["provider_uri"] = os.environ["PROVIDER_URI"]
-    auto_init(**kwargs)
+    GetData().qlib_data(exists_skip=True)
+    auto_init()
    fire.Fire(RollingBenchmark)
--- a/examples/data_demo/data_cache_demo.py
+++ b/examples/data_demo/data_cache_demo.py
@@ -9,8 +9,8 @@ from copy import deepcopy
 from pathlib import Path
 import pickle
 from pprint import pprint
-from ruamel.yaml import YAML
 import subprocess
+import yaml
 from qlib.log import TimeInspector

 from qlib import init
@@ -30,8 +30,7 @@ if __name__ == "__main__":
        subprocess.run(f"qrun {config_path}", shell=True)

    # 2) dump handler
-    yaml = YAML(typ="safe", pure=True)
-    task_config = yaml.load(config_path.open())
+    task_config = yaml.safe_load(config_path.open())
    hd_conf = task_config["task"]["dataset"]["kwargs"]["handler"]
    pprint(hd_conf)
    hd: DataHandlerLP = init_instance_by_config(hd_conf)
--- a/examples/data_demo/data_mem_resuse_demo.py
+++ b/examples/data_demo/data_mem_resuse_demo.py
@@ -9,9 +9,10 @@ from copy import deepcopy
 from pathlib import Path
 import pickle
 from pprint import pprint
-from ruamel.yaml import YAML
 import subprocess

+import yaml
+
 from qlib import init
 from qlib.data.dataset.handler import DataHandlerLP
 from qlib.log import TimeInspector
@@ -28,8 +29,7 @@ if __name__ == "__main__":
    exp_name = "data_mem_reuse_demo"

    config_path = DIRNAME.parent / "benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml"
-    yaml = YAML(typ="safe", pure=True)
-    task_config = yaml.load(config_path.open())
+    task_config = yaml.safe_load(config_path.open())

    # 1) without using processed data in memory
    with TimeInspector.logt("The original time without reusing processed data in memory:"):
--- a/examples/orderbook_data/README.md
+++ b/examples/orderbook_data/README.md
@@ -16,7 +16,7 @@ Current version of script with default value tries to connect localhost **via de

 Run following command to install necessary libraries
 ```
-pip install pytest coverage gdown
+pip install pytest coverage
 pip install arctic  # NOTE: pip may fail to resolve the right package dependency !!! Please make sure the dependency are satisfied.
 ```

@@ -27,12 +27,13 @@ pip install arctic  # NOTE: pip may fail to resolve the right package dependency
 2. Please follow following steps to download example data
 ```bash
 cd examples/orderbook_data/
-gdown https://drive.google.com/uc?id=15nZF7tFT_eKVZAcMFL1qPS4jGyJflH7e  # Proxies may be necessary here.
-python ../../scripts/get_data.py _unzip --file_path highfreq_orderbook_example_data.zip --target_dir .
+wget http://fintech.msra.cn/stock_data/downloads/highfreq_orderboook_example_data.tar.bz2
+tar xf highfreq_orderboook_example_data.tar.bz2
 ```

 3. Please import the example data to your mongo db
 ```bash
+cd examples/orderbook_data/
 python create_dataset.py initialize_library  # Initialization Libraries
 python create_dataset.py import_data  # Initialization Libraries
 ```
@@ -41,6 +42,7 @@ python create_dataset.py import_data  # Initialization Libraries

 After importing these data, you run `example.py` to create some high-frequency features.
 ```bash
+cd examples/orderbook_data/
 pytest -s --disable-warnings example.py   # If you want run all examples
 pytest -s --disable-warnings example.py::TestClass::test_exp_10  # If you want to run specific example
 ```
--- a/examples/portfolio/README.md
+++ b/examples/portfolio/README.md
@@ -20,7 +20,7 @@ We use China stock market data for our example.
 1. Prepare CSI300 weight:

   ```bash
-   wget https://github.com/SunsetWolf/qlib_dataset/releases/download/v0/csi300_weight.zip
+   wget http://fintech.msra.cn/stock_data/downloads/csi300_weight.zip
   unzip -d ~/.qlib/qlib_data/cn_data csi300_weight.zip
   rm -f csi300_weight.zip
   ```
--- a/examples/run_all_model.py
+++ b/examples/run_all_model.py
@@ -6,6 +6,7 @@ import sys
 import fire
 import time
 import glob
+import yaml
 import shutil
 import signal
 import inspect
@@ -14,7 +15,6 @@ import functools
 import statistics
 import subprocess
 from datetime import datetime
-from ruamel.yaml import YAML
 from pathlib import Path
 from operator import xor
 from pprint import pprint
@@ -188,8 +188,7 @@ def gen_and_save_md_table(metrics, dataset):
 # read yaml, remove seed kwargs of model, and then save file in the temp_dir
 def gen_yaml_file_without_seed_kwargs(yaml_path, temp_dir):
    with open(yaml_path, "r") as fp:
-        yaml = YAML(typ="safe", pure=True)
-        config = yaml.load(fp)
+        config = yaml.safe_load(fp)
    try:
        del config["task"]["model"]["kwargs"]["seed"]
    except KeyError:
--- a/examples/workflow_by_code.ipynb
+++ b/examples/workflow_by_code.ipynb
@@ -161,7 +161,7 @@
    "    },\n",
    "}\n",
    "\n",
-    "# model initialization\n",
+    "# model initiaiton\n",
    "model = init_instance_by_config(task[\"model\"])\n",
    "dataset = init_instance_by_config(task[\"dataset\"])\n",
    "\n",
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,93 +0,0 @@
-[build-system]
-requires = ["setuptools", "cython", "numpy>=1.24.0"]
-build-backend = "setuptools.build_meta"
-
-[project]
-classifiers = [
-  "Operating System :: POSIX :: Linux",
-  "Operating System :: Microsoft :: Windows",
-  "Operating System :: MacOS",
-  "License :: OSI Approved :: MIT License",
-  "Development Status :: 3 - Alpha",
-  "Programming Language :: Python",
-  "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.8",
-  "Programming Language :: Python :: 3.9",
-  "Programming Language :: Python :: 3.10",
-  "Programming Language :: Python :: 3.11",
-  "Programming Language :: Python :: 3.12",
-]
-name = "pyqlib"
-dynamic = ["version"]
-description = "A Quantitative-research Platform"
-requires-python = ">=3.8.0"
-readme = {file = "README.md", content-type = "text/markdown"}
-
-dependencies = [
-  "pyyaml",
-  "numpy",
-  "pandas",
-  "mlflow",
-  "filelock>=3.16.0",
-  "redis",
-  "dill",
-  "fire",
-  "ruamel.yaml>=0.17.38",
-  "python-redis-lock",
-  "tqdm",
-  "pymongo",
-  "loguru",
-  "lightgbm",
-  "gym",
-  "cvxpy",
-  "joblib",
-  "matplotlib",
-  "jupyter",
-  "nbconvert",
-]
-
-[project.optional-dependencies]
-dev = [
-  "pytest",
-  "statsmodels",
-]
-# On macos-13 system, when using python version greater than or equal to 3.10,
-# pytorch can't fully support Numpy version above 2.0, so, when you want to install torch,
-# it will limit the version of Numpy less than 2.0.
-rl = [
-  "tianshou<=0.4.10",
-  "torch",
-  "numpy<2.0.0",
-]
-lint = [
-  "black",
-  "pylint",
-  "mypy<1.5.0",
-  "flake8",
-  "nbqa",
-]
-docs = [
-  "sphinx",
-  "sphinx_rtd_theme",
-  "readthedocs_sphinx_ext",
-]
-package = [
-  "twine",
-  "build",
-]
-# test_pit dependency packages
-test = [
-  "yahooquery",
-  "baostock",
-]
-analysis = [
-  "plotly",
-]
-
-[tool.setuptools]
-packages = [
-  "qlib",
-]
-
-[project.scripts]
-qrun = "qlib.workflow.cli:run"
--- a/qlib/init.py
+++ b/qlib/init.py
@@ -2,11 +2,11 @@
 # Licensed under the MIT License.
 from pathlib import Path

-__version__ = "0.9.6.99"
+__version__ = "0.9.2.99"
 __version__bak = __version__  # This version is backup for QlibConfig.reset_qlib_version
 import os
 from typing import Union
-from ruamel.yaml import YAML
+import yaml
 import logging
 import platform
 import subprocess
@@ -176,8 +176,7 @@ def init_from_yaml_conf(conf_path, **kwargs):
        config = {}
    else:
        with open(conf_path) as f:
-            yaml = YAML(typ="safe", pure=True)
-            config = yaml.load(f)
+            config = yaml.safe_load(f)
    config.update(kwargs)
    default_conf = config.pop("default_conf", "client")
    init(default_conf, **config)
@@ -273,8 +272,7 @@ def auto_init(**kwargs):
        logger = get_module_logger("Initialization")
        conf_pp = pp / "config.yaml"
        with conf_pp.open() as f:
-            yaml = YAML(typ="safe", pure=True)
-            conf = yaml.load(f)
+            conf = yaml.safe_load(f)

        conf_type = conf.get("conf_type", "origin")
        if conf_type == "origin":
--- a/qlib/backtest/init.py
+++ b/qlib/backtest/init.py
@@ -162,15 +162,13 @@ def create_account_instance(
        init_cash=init_cash,
        position_dict=position_dict,
        pos_type=pos_type,
-        benchmark_config=(
-            {}
-            if benchmark is None
-            else {
-                "benchmark": benchmark,
-                "start_time": start_time,
-                "end_time": end_time,
-            }
-        ),
+        benchmark_config={}
+        if benchmark is None
+        else {
+            "benchmark": benchmark,
+            "start_time": start_time,
+            "end_time": end_time,
+        },
    )


--- a/qlib/backtest/high_performance_ds.py
+++ b/qlib/backtest/high_performance_ds.py
@@ -278,7 +278,7 @@ class BaseSingleMetric:
        raise NotImplementedError(f"Please implement the `empty` method")

    def add(self, other: BaseSingleMetric, fill_value: float = None) -> BaseSingleMetric:
-        """Replace np.nan with fill_value in two metrics and add them."""
+        """Replace np.NaN with fill_value in two metrics and add them."""

        raise NotImplementedError(f"Please implement the `add` method")

@@ -412,7 +412,7 @@ class BaseOrderIndicator:
        metrics : Union[str, List[str]]
            all metrics needs to be sumed.
        fill_value : float, optional
-            fill np.nan with value. By default None.
+            fill np.NaN with value. By default None.
        """

        raise NotImplementedError(f"Please implement the 'sum_all_indicators' method")
--- a/qlib/backtest/report.py
+++ b/qlib/backtest/report.py
@@ -325,9 +325,9 @@ class Indicator:

    def _update_order_fulfill_rate(self) -> None:
        def func(deal_amount, amount):
-            # deal_amount is np.nan or None when there is no inner decision. So full fill rate is 0.
+            # deal_amount is np.NaN or None when there is no inner decision. So full fill rate is 0.
            tmp_deal_amount = deal_amount.reindex(amount.index, 0)
-            tmp_deal_amount = tmp_deal_amount.replace({np.nan: 0})
+            tmp_deal_amount = tmp_deal_amount.replace({np.NaN: 0})
            return tmp_deal_amount / amount

        self.order_indicator.transfer(func, "ffr")
@@ -354,8 +354,8 @@ class Indicator:
        )

        def func(trade_price, deal_amount):
-            # trade_price is np.nan instead of inf when deal_amount is zero.
-            tmp_deal_amount = deal_amount.replace({0: np.nan})
+            # trade_price is np.NaN instead of inf when deal_amount is zero.
+            tmp_deal_amount = deal_amount.replace({0: np.NaN})
            return trade_price / tmp_deal_amount

        self.order_indicator.transfer(func, "trade_price")
@@ -425,7 +425,7 @@ class Indicator:
        assert isinstance(price_s, idd.SingleData)
        price_s = price_s.loc[(price_s > 1e-08).data.astype(bool)]
        # NOTE ~(price_s < 1e-08) is different from price_s >= 1e-8
-        #   ~(np.nan < 1e-8) -> ~(False)  -> True
+        #   ~(np.NaN < 1e-8) -> ~(False)  -> True

        assert isinstance(price_s, idd.SingleData)
        if agg == "vwap":
@@ -622,11 +622,9 @@ class Indicator:
            print(
                "[Indicator({}) {}]: FFR: {}, PA: {}, POS: {}".format(
                    freq,
-                    (
-                        trade_start_time
-                        if isinstance(trade_start_time, str)
-                        else trade_start_time.strftime("%Y-%m-%d %H:%M:%S")
-                    ),
+                    trade_start_time
+                    if isinstance(trade_start_time, str)
+                    else trade_start_time.strftime("%Y-%m-%d %H:%M:%S"),
                    fulfill_rate,
                    price_advantage,
                    positive_rate,
--- a/qlib/config.py
+++ b/qlib/config.py
@@ -173,11 +173,7 @@ _default_config = {
                "filters": ["field_not_found"],
            }
        },
-        # Normally this should be set to `False` to avoid duplicated logging [1].
-        # However, due to bug in pytest, it requires log message to propagate to root logger to be captured by `caplog` [2].
-        # [1] https://github.com/microsoft/qlib/pull/1661
-        # [2] https://github.com/pytest-dev/pytest/issues/3697
-        "loggers": {"qlib": {"level": logging.DEBUG, "handlers": ["console"], "propagate": False}},
+        "loggers": {"qlib": {"level": logging.DEBUG, "handlers": ["console"]}},
        # To let qlib work with other packages, we shouldn't disable existing loggers.
        # Note that this param is default to True according to the documentation of logging.
        "disable_existing_loggers": False,
@@ -490,5 +486,8 @@ class QlibConfig(Config):
        return self._registered


+DEFAULT_QLIB_DOT_PATH = Path("~/.qlib/").expanduser()
+
+
 # global config
 C = QlibConfig(_default_config)
--- a/qlib/contrib/analyzer.py
+++ b/qlib/contrib/analyzer.py
@@ -0,0 +1,111 @@
+import logging
+import matplotlib.pyplot as plt
+from pathlib import Path
+import numpy as np
+
+from ..log import get_module_logger
+from ..contrib.eva.alpha import calc_ic, calc_long_short_return, calc_long_short_prec
+
+logger = get_module_logger("analysis", logging.INFO)
+
+
+class AnalyzerTemp:
+    def __init__(self, recorder, output_dir=None, **kwargs):
+        self.recorder = recorder
+        self.output_dir = Path(output_dir) if output_dir else "./"
+
+    def load(self, name: str):
+        """
+        It behaves the same as self.recorder.load_object.
+        But it is an easier interface because users don't have to care about `get_path` and `artifact_path`
+
+        Parameters
+        ----------
+        name : str
+            the name for the file to be load.
+
+        Return
+        ------
+        The stored records.
+        """
+        return self.recorder.load_object(name)
+
+    def analyse(self, **kwargs):
+        """
+        Analyse data index, distribution .etc
+
+        Parameters
+        ----------
+
+
+        Return
+        ------
+        The handled data.
+        """
+        raise NotImplementedError(f"Please implement the `analysis` method.")
+
+
+class HFAnalyzer(AnalyzerTemp):
+    """
+    This is the Signal Analysis class that generates the analysis results such as IC and IR.
+
+    default output image filename is "HFAnalyzerTable.jpeg"
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def analyse(self):
+        pred = self.load("pred.pkl")
+        label = self.load("label.pkl")
+
+        long_pre, short_pre = calc_long_short_prec(pred.iloc[:, 0], label.iloc[:, 0], is_alpha=True)
+        ic, ric = calc_ic(pred.iloc[:, 0], label.iloc[:, 0])
+        metrics = {
+            "IC": ic.mean(),
+            "ICIR": ic.mean() / ic.std(),
+            "Rank IC": ric.mean(),
+            "Rank ICIR": ric.mean() / ric.std(),
+            "Long precision": long_pre.mean(),
+            "Short precision": short_pre.mean(),
+        }
+
+        long_short_r, long_avg_r = calc_long_short_return(pred.iloc[:, 0], label.iloc[:, 0])
+        metrics.update(
+            {
+                "Long-Short Average Return": long_short_r.mean(),
+                "Long-Short Average Sharpe": long_short_r.mean() / long_short_r.std(),
+            }
+        )
+
+        table = [[k, v] for (k, v) in metrics.items()]
+        plt.table(cellText=table, loc="center")
+        plt.axis("off")
+        plt.savefig(self.output_dir.joinpath("HFAnalyzerTable.jpeg"))
+        plt.clf()
+
+        plt.scatter(np.arange(0, len(pred)), pred.iloc[:, 0])
+        plt.scatter(np.arange(0, len(label)), label.iloc[:, 0])
+        plt.title("HFAnalyzer")
+        plt.savefig(self.output_dir.joinpath("HFAnalyzer.jpeg"))
+        return "HFAnalyzer.jpeg"
+
+
+class SignalAnalyzer(AnalyzerTemp):
+    """
+    This is the Signal Analysis class that generates the analysis results such as IC and IR.
+
+    default output image filename is "signalAnalysis.jpeg"
+    """
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+    def analyse(self, dataset=None, **kwargs):
+        label = self.load("label.pkl")
+
+        plt.hist(label)
+        plt.title("SignalAnalyzer")
+        plt.savefig(self.output_dir.joinpath("signalAnalysis.jpeg"))
+
+        return "signalAnalysis.jpeg"
--- a/qlib/contrib/data/handler.py
+++ b/qlib/contrib/data/handler.py
@@ -1,7 +1,8 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.

-from qlib.contrib.data.loader import Alpha158DL, Alpha360DL
+from typing import Optional
+from qlib.utils.data import update_config
 from ...data.dataset.handler import DataHandlerLP
 from ...data.dataset.processor import Processor
 from ...utils import get_callable_kwargs
@@ -58,16 +59,17 @@ class Alpha360(DataHandlerLP):
        fit_end_time=None,
        filter_pipe=None,
        inst_processors=None,
-        **kwargs,
+        data_loader: Optional[dict] = None,
+        **kwargs
    ):
        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
        learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)

-        data_loader = {
+        _data_loader = {
            "class": "QlibDataLoader",
            "kwargs": {
                "config": {
-                    "feature": Alpha360DL.get_feature_config(),
+                    "feature": self.get_feature_config(),
                    "label": kwargs.pop("label", self.get_label_config()),
                },
                "filter_pipe": filter_pipe,
@@ -75,20 +77,67 @@ class Alpha360(DataHandlerLP):
                "inst_processors": inst_processors,
            },
        }
+        if data_loader is not None:
+            update_config(_data_loader, data_loader)

        super().__init__(
            instruments=instruments,
            start_time=start_time,
            end_time=end_time,
-            data_loader=data_loader,
+            data_loader=_data_loader,
            learn_processors=learn_processors,
            infer_processors=infer_processors,
-            **kwargs,
+            **kwargs
        )

    def get_label_config(self):
        return ["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"]

+    @staticmethod
+    def get_feature_config():
+        # NOTE:
+        # Alpha360 tries to provide a dataset with original price data
+        # the original price data includes the prices and volume in the last 60 days.
+        # To make it easier to learn models from this dataset, all the prices and volume
+        # are normalized by the latest price and volume data ( dividing by $close, $volume)
+        # So the latest normalized $close will be 1 (with name CLOSE0), the latest normalized $volume will be 1 (with name VOLUME0)
+        # If further normalization are executed (e.g. centralization),  CLOSE0 and VOLUME0 will be 0.
+        fields = []
+        names = []
+
+        for i in range(59, 0, -1):
+            fields += ["Ref($close, %d)/$close" % i]
+            names += ["CLOSE%d" % i]
+        fields += ["$close/$close"]
+        names += ["CLOSE0"]
+        for i in range(59, 0, -1):
+            fields += ["Ref($open, %d)/$close" % i]
+            names += ["OPEN%d" % i]
+        fields += ["$open/$close"]
+        names += ["OPEN0"]
+        for i in range(59, 0, -1):
+            fields += ["Ref($high, %d)/$close" % i]
+            names += ["HIGH%d" % i]
+        fields += ["$high/$close"]
+        names += ["HIGH0"]
+        for i in range(59, 0, -1):
+            fields += ["Ref($low, %d)/$close" % i]
+            names += ["LOW%d" % i]
+        fields += ["$low/$close"]
+        names += ["LOW0"]
+        for i in range(59, 0, -1):
+            fields += ["Ref($vwap, %d)/$close" % i]
+            names += ["VWAP%d" % i]
+        fields += ["$vwap/$close"]
+        names += ["VWAP0"]
+        for i in range(59, 0, -1):
+            fields += ["Ref($volume, %d)/($volume+1e-12)" % i]
+            names += ["VOLUME%d" % i]
+        fields += ["$volume/($volume+1e-12)"]
+        names += ["VOLUME0"]
+
+        return fields, names
+

 class Alpha360vwap(Alpha360):
    def get_label_config(self):
@@ -109,12 +158,13 @@ class Alpha158(DataHandlerLP):
        process_type=DataHandlerLP.PTYPE_A,
        filter_pipe=None,
        inst_processors=None,
-        **kwargs,
+        data_loader: Optional[dict] = None,
+        **kwargs
    ):
        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
        learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)

-        data_loader = {
+        _data_loader = {
            "class": "QlibDataLoader",
            "kwargs": {
                "config": {
@@ -126,15 +176,17 @@ class Alpha158(DataHandlerLP):
                "inst_processors": inst_processors,
            },
        }
+        if data_loader is not None:
+            update_config(_data_loader, data_loader)
        super().__init__(
            instruments=instruments,
            start_time=start_time,
            end_time=end_time,
-            data_loader=data_loader,
+            data_loader=_data_loader,
            infer_processors=infer_processors,
            learn_processors=learn_processors,
            process_type=process_type,
-            **kwargs,
+            **kwargs
        )

    def get_feature_config(self):
@@ -146,11 +198,242 @@ class Alpha158(DataHandlerLP):
            },
            "rolling": {},
        }
-        return Alpha158DL.get_feature_config(conf)
+        return self.parse_config_to_fields(conf)

    def get_label_config(self):
        return ["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"]

+    @staticmethod
+    def parse_config_to_fields(config):
+        """create factors from config
+
+        config = {
+            'kbar': {}, # whether to use some hard-code kbar features
+            'price': { # whether to use raw price features
+                'windows': [0, 1, 2, 3, 4], # use price at n days ago
+                'feature': ['OPEN', 'HIGH', 'LOW'] # which price field to use
+            },
+            'volume': { # whether to use raw volume features
+                'windows': [0, 1, 2, 3, 4], # use volume at n days ago
+            },
+            'rolling': { # whether to use rolling operator based features
+                'windows': [5, 10, 20, 30, 60], # rolling windows size
+                'include': ['ROC', 'MA', 'STD'], # rolling operator to use
+                #if include is None we will use default operators
+                'exclude': ['RANK'], # rolling operator not to use
+            }
+        }
+        """
+        fields = []
+        names = []
+        if "kbar" in config:
+            fields += [
+                "($close-$open)/$open",
+                "($high-$low)/$open",
+                "($close-$open)/($high-$low+1e-12)",
+                "($high-Greater($open, $close))/$open",
+                "($high-Greater($open, $close))/($high-$low+1e-12)",
+                "(Less($open, $close)-$low)/$open",
+                "(Less($open, $close)-$low)/($high-$low+1e-12)",
+                "(2*$close-$high-$low)/$open",
+                "(2*$close-$high-$low)/($high-$low+1e-12)",
+            ]
+            names += [
+                "KMID",
+                "KLEN",
+                "KMID2",
+                "KUP",
+                "KUP2",
+                "KLOW",
+                "KLOW2",
+                "KSFT",
+                "KSFT2",
+            ]
+        if "price" in config:
+            windows = config["price"].get("windows", range(5))
+            feature = config["price"].get("feature", ["OPEN", "HIGH", "LOW", "CLOSE", "VWAP"])
+            for field in feature:
+                field = field.lower()
+                fields += ["Ref($%s, %d)/$close" % (field, d) if d != 0 else "$%s/$close" % field for d in windows]
+                names += [field.upper() + str(d) for d in windows]
+        if "volume" in config:
+            windows = config["volume"].get("windows", range(5))
+            fields += ["Ref($volume, %d)/($volume+1e-12)" % d if d != 0 else "$volume/($volume+1e-12)" for d in windows]
+            names += ["VOLUME" + str(d) for d in windows]
+        if "rolling" in config:
+            windows = config["rolling"].get("windows", [5, 10, 20, 30, 60])
+            include = config["rolling"].get("include", None)
+            exclude = config["rolling"].get("exclude", [])
+            # `exclude` in dataset config unnecessary filed
+            # `include` in dataset config necessary field
+
+            def use(x):
+                return x not in exclude and (include is None or x in include)
+
+            # Some factor ref: https://guorn.com/static/upload/file/3/134065454575605.pdf
+            if use("ROC"):
+                # https://www.investopedia.com/terms/r/rateofchange.asp
+                # Rate of change, the price change in the past d days, divided by latest close price to remove unit
+                fields += ["Ref($close, %d)/$close" % d for d in windows]
+                names += ["ROC%d" % d for d in windows]
+            if use("MA"):
+                # https://www.investopedia.com/ask/answers/071414/whats-difference-between-moving-average-and-weighted-moving-average.asp
+                # Simple Moving Average, the simple moving average in the past d days, divided by latest close price to remove unit
+                fields += ["Mean($close, %d)/$close" % d for d in windows]
+                names += ["MA%d" % d for d in windows]
+            if use("STD"):
+                # The standard diviation of close price for the past d days, divided by latest close price to remove unit
+                fields += ["Std($close, %d)/$close" % d for d in windows]
+                names += ["STD%d" % d for d in windows]
+            if use("BETA"):
+                # The rate of close price change in the past d days, divided by latest close price to remove unit
+                # For example, price increase 10 dollar per day in the past d days, then Slope will be 10.
+                fields += ["Slope($close, %d)/$close" % d for d in windows]
+                names += ["BETA%d" % d for d in windows]
+            if use("RSQR"):
+                # The R-sqaure value of linear regression for the past d days, represent the trend linear
+                fields += ["Rsquare($close, %d)" % d for d in windows]
+                names += ["RSQR%d" % d for d in windows]
+            if use("RESI"):
+                # The redisdual for linear regression for the past d days, represent the trend linearity for past d days.
+                fields += ["Resi($close, %d)/$close" % d for d in windows]
+                names += ["RESI%d" % d for d in windows]
+            if use("MAX"):
+                # The max price for past d days, divided by latest close price to remove unit
+                fields += ["Max($high, %d)/$close" % d for d in windows]
+                names += ["MAX%d" % d for d in windows]
+            if use("LOW"):
+                # The low price for past d days, divided by latest close price to remove unit
+                fields += ["Min($low, %d)/$close" % d for d in windows]
+                names += ["MIN%d" % d for d in windows]
+            if use("QTLU"):
+                # The 80% quantile of past d day's close price, divided by latest close price to remove unit
+                # Used with MIN and MAX
+                fields += ["Quantile($close, %d, 0.8)/$close" % d for d in windows]
+                names += ["QTLU%d" % d for d in windows]
+            if use("QTLD"):
+                # The 20% quantile of past d day's close price, divided by latest close price to remove unit
+                fields += ["Quantile($close, %d, 0.2)/$close" % d for d in windows]
+                names += ["QTLD%d" % d for d in windows]
+            if use("RANK"):
+                # Get the percentile of current close price in past d day's close price.
+                # Represent the current price level comparing to past N days, add additional information to moving average.
+                fields += ["Rank($close, %d)" % d for d in windows]
+                names += ["RANK%d" % d for d in windows]
+            if use("RSV"):
+                # Represent the price position between upper and lower resistent price for past d days.
+                fields += ["($close-Min($low, %d))/(Max($high, %d)-Min($low, %d)+1e-12)" % (d, d, d) for d in windows]
+                names += ["RSV%d" % d for d in windows]
+            if use("IMAX"):
+                # The number of days between current date and previous highest price date.
+                # Part of Aroon Indicator https://www.investopedia.com/terms/a/aroon.asp
+                # The indicator measures the time between highs and the time between lows over a time period.
+                # The idea is that strong uptrends will regularly see new highs, and strong downtrends will regularly see new lows.
+                fields += ["IdxMax($high, %d)/%d" % (d, d) for d in windows]
+                names += ["IMAX%d" % d for d in windows]
+            if use("IMIN"):
+                # The number of days between current date and previous lowest price date.
+                # Part of Aroon Indicator https://www.investopedia.com/terms/a/aroon.asp
+                # The indicator measures the time between highs and the time between lows over a time period.
+                # The idea is that strong uptrends will regularly see new highs, and strong downtrends will regularly see new lows.
+                fields += ["IdxMin($low, %d)/%d" % (d, d) for d in windows]
+                names += ["IMIN%d" % d for d in windows]
+            if use("IMXD"):
+                # The time period between previous lowest-price date occur after highest price date.
+                # Large value suggest downward momemtum.
+                fields += ["(IdxMax($high, %d)-IdxMin($low, %d))/%d" % (d, d, d) for d in windows]
+                names += ["IMXD%d" % d for d in windows]
+            if use("CORR"):
+                # The correlation between absolute close price and log scaled trading volume
+                fields += ["Corr($close, Log($volume+1), %d)" % d for d in windows]
+                names += ["CORR%d" % d for d in windows]
+            if use("CORD"):
+                # The correlation between price change ratio and volume change ratio
+                fields += ["Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), %d)" % d for d in windows]
+                names += ["CORD%d" % d for d in windows]
+            if use("CNTP"):
+                # The percentage of days in past d days that price go up.
+                fields += ["Mean($close>Ref($close, 1), %d)" % d for d in windows]
+                names += ["CNTP%d" % d for d in windows]
+            if use("CNTN"):
+                # The percentage of days in past d days that price go down.
+                fields += ["Mean($close<Ref($close, 1), %d)" % d for d in windows]
+                names += ["CNTN%d" % d for d in windows]
+            if use("CNTD"):
+                # The diff between past up day and past down day
+                fields += ["Mean($close>Ref($close, 1), %d)-Mean($close<Ref($close, 1), %d)" % (d, d) for d in windows]
+                names += ["CNTD%d" % d for d in windows]
+            if use("SUMP"):
+                # The total gain / the absolute total price changed
+                # Similar to RSI indicator. https://www.investopedia.com/terms/r/rsi.asp
+                fields += [
+                    "Sum(Greater($close-Ref($close, 1), 0), %d)/(Sum(Abs($close-Ref($close, 1)), %d)+1e-12)" % (d, d)
+                    for d in windows
+                ]
+                names += ["SUMP%d" % d for d in windows]
+            if use("SUMN"):
+                # The total lose / the absolute total price changed
+                # Can be derived from SUMP by SUMN = 1 - SUMP
+                # Similar to RSI indicator. https://www.investopedia.com/terms/r/rsi.asp
+                fields += [
+                    "Sum(Greater(Ref($close, 1)-$close, 0), %d)/(Sum(Abs($close-Ref($close, 1)), %d)+1e-12)" % (d, d)
+                    for d in windows
+                ]
+                names += ["SUMN%d" % d for d in windows]
+            if use("SUMD"):
+                # The diff ratio between total gain and total lose
+                # Similar to RSI indicator. https://www.investopedia.com/terms/r/rsi.asp
+                fields += [
+                    "(Sum(Greater($close-Ref($close, 1), 0), %d)-Sum(Greater(Ref($close, 1)-$close, 0), %d))"
+                    "/(Sum(Abs($close-Ref($close, 1)), %d)+1e-12)" % (d, d, d)
+                    for d in windows
+                ]
+                names += ["SUMD%d" % d for d in windows]
+            if use("VMA"):
+                # Simple Volume Moving average: https://www.barchart.com/education/technical-indicators/volume_moving_average
+                fields += ["Mean($volume, %d)/($volume+1e-12)" % d for d in windows]
+                names += ["VMA%d" % d for d in windows]
+            if use("VSTD"):
+                # The standard deviation for volume in past d days.
+                fields += ["Std($volume, %d)/($volume+1e-12)" % d for d in windows]
+                names += ["VSTD%d" % d for d in windows]
+            if use("WVMA"):
+                # The volume weighted price change volatility
+                fields += [
+                    "Std(Abs($close/Ref($close, 1)-1)*$volume, %d)/(Mean(Abs($close/Ref($close, 1)-1)*$volume, %d)+1e-12)"
+                    % (d, d)
+                    for d in windows
+                ]
+                names += ["WVMA%d" % d for d in windows]
+            if use("VSUMP"):
+                # The total volume increase / the absolute total volume changed
+                fields += [
+                    "Sum(Greater($volume-Ref($volume, 1), 0), %d)/(Sum(Abs($volume-Ref($volume, 1)), %d)+1e-12)"
+                    % (d, d)
+                    for d in windows
+                ]
+                names += ["VSUMP%d" % d for d in windows]
+            if use("VSUMN"):
+                # The total volume increase / the absolute total volume changed
+                # Can be derived from VSUMP by VSUMN = 1 - VSUMP
+                fields += [
+                    "Sum(Greater(Ref($volume, 1)-$volume, 0), %d)/(Sum(Abs($volume-Ref($volume, 1)), %d)+1e-12)"
+                    % (d, d)
+                    for d in windows
+                ]
+                names += ["VSUMN%d" % d for d in windows]
+            if use("VSUMD"):
+                # The diff ratio between total volume increase and total volume decrease
+                # RSI indicator for volume
+                fields += [
+                    "(Sum(Greater($volume-Ref($volume, 1), 0), %d)-Sum(Greater(Ref($volume, 1)-$volume, 0), %d))"
+                    "/(Sum(Abs($volume-Ref($volume, 1)), %d)+1e-12)" % (d, d, d)
+                    for d in windows
+                ]
+                names += ["VSUMD%d" % d for d in windows]
+
+        return fields, names
+

 class Alpha158vwap(Alpha158):
    def get_label_config(self):
--- a/qlib/contrib/data/loader.py
+++ b/qlib/contrib/data/loader.py
@@ -1,310 +0,0 @@
-from qlib.data.dataset.loader import QlibDataLoader
-
-
-class Alpha360DL(QlibDataLoader):
-    """Dataloader to get Alpha360"""
-
-    def __init__(self, config=None, **kwargs):
-        _config = {
-            "feature": self.get_feature_config(),
-        }
-        if config is not None:
-            _config.update(config)
-        super().__init__(config=_config, **kwargs)
-
-    @staticmethod
-    def get_feature_config():
-        # NOTE:
-        # Alpha360 tries to provide a dataset with original price data
-        # the original price data includes the prices and volume in the last 60 days.
-        # To make it easier to learn models from this dataset, all the prices and volume
-        # are normalized by the latest price and volume data ( dividing by $close, $volume)
-        # So the latest normalized $close will be 1 (with name CLOSE0), the latest normalized $volume will be 1 (with name VOLUME0)
-        # If further normalization are executed (e.g. centralization),  CLOSE0 and VOLUME0 will be 0.
-        fields = []
-        names = []
-
-        for i in range(59, 0, -1):
-            fields += ["Ref($close, %d)/$close" % i]
-            names += ["CLOSE%d" % i]
-        fields += ["$close/$close"]
-        names += ["CLOSE0"]
-        for i in range(59, 0, -1):
-            fields += ["Ref($open, %d)/$close" % i]
-            names += ["OPEN%d" % i]
-        fields += ["$open/$close"]
-        names += ["OPEN0"]
-        for i in range(59, 0, -1):
-            fields += ["Ref($high, %d)/$close" % i]
-            names += ["HIGH%d" % i]
-        fields += ["$high/$close"]
-        names += ["HIGH0"]
-        for i in range(59, 0, -1):
-            fields += ["Ref($low, %d)/$close" % i]
-            names += ["LOW%d" % i]
-        fields += ["$low/$close"]
-        names += ["LOW0"]
-        for i in range(59, 0, -1):
-            fields += ["Ref($vwap, %d)/$close" % i]
-            names += ["VWAP%d" % i]
-        fields += ["$vwap/$close"]
-        names += ["VWAP0"]
-        for i in range(59, 0, -1):
-            fields += ["Ref($volume, %d)/($volume+1e-12)" % i]
-            names += ["VOLUME%d" % i]
-        fields += ["$volume/($volume+1e-12)"]
-        names += ["VOLUME0"]
-
-        return fields, names
-
-
-class Alpha158DL(QlibDataLoader):
-    """Dataloader to get Alpha158"""
-
-    def __init__(self, config=None, **kwargs):
-        _config = {
-            "feature": self.get_feature_config(),
-        }
-        if config is not None:
-            _config.update(config)
-        super().__init__(config=_config, **kwargs)
-
-    @staticmethod
-    def get_feature_config(
-        config={
-            "kbar": {},
-            "price": {
-                "windows": [0],
-                "feature": ["OPEN", "HIGH", "LOW", "VWAP"],
-            },
-            "rolling": {},
-        }
-    ):
-        """create factors from config
-
-        config = {
-            'kbar': {}, # whether to use some hard-code kbar features
-            'price': { # whether to use raw price features
-                'windows': [0, 1, 2, 3, 4], # use price at n days ago
-                'feature': ['OPEN', 'HIGH', 'LOW'] # which price field to use
-            },
-            'volume': { # whether to use raw volume features
-                'windows': [0, 1, 2, 3, 4], # use volume at n days ago
-            },
-            'rolling': { # whether to use rolling operator based features
-                'windows': [5, 10, 20, 30, 60], # rolling windows size
-                'include': ['ROC', 'MA', 'STD'], # rolling operator to use
-                #if include is None we will use default operators
-                'exclude': ['RANK'], # rolling operator not to use
-            }
-        }
-        """
-        fields = []
-        names = []
-        if "kbar" in config:
-            fields += [
-                "($close-$open)/$open",
-                "($high-$low)/$open",
-                "($close-$open)/($high-$low+1e-12)",
-                "($high-Greater($open, $close))/$open",
-                "($high-Greater($open, $close))/($high-$low+1e-12)",
-                "(Less($open, $close)-$low)/$open",
-                "(Less($open, $close)-$low)/($high-$low+1e-12)",
-                "(2*$close-$high-$low)/$open",
-                "(2*$close-$high-$low)/($high-$low+1e-12)",
-            ]
-            names += [
-                "KMID",
-                "KLEN",
-                "KMID2",
-                "KUP",
-                "KUP2",
-                "KLOW",
-                "KLOW2",
-                "KSFT",
-                "KSFT2",
-            ]
-        if "price" in config:
-            windows = config["price"].get("windows", range(5))
-            feature = config["price"].get("feature", ["OPEN", "HIGH", "LOW", "CLOSE", "VWAP"])
-            for field in feature:
-                field = field.lower()
-                fields += ["Ref($%s, %d)/$close" % (field, d) if d != 0 else "$%s/$close" % field for d in windows]
-                names += [field.upper() + str(d) for d in windows]
-        if "volume" in config:
-            windows = config["volume"].get("windows", range(5))
-            fields += ["Ref($volume, %d)/($volume+1e-12)" % d if d != 0 else "$volume/($volume+1e-12)" for d in windows]
-            names += ["VOLUME" + str(d) for d in windows]
-        if "rolling" in config:
-            windows = config["rolling"].get("windows", [5, 10, 20, 30, 60])
-            include = config["rolling"].get("include", None)
-            exclude = config["rolling"].get("exclude", [])
-            # `exclude` in dataset config unnecessary filed
-            # `include` in dataset config necessary field
-
-            def use(x):
-                return x not in exclude and (include is None or x in include)
-
-            # Some factor ref: https://guorn.com/static/upload/file/3/134065454575605.pdf
-            if use("ROC"):
-                # https://www.investopedia.com/terms/r/rateofchange.asp
-                # Rate of change, the price change in the past d days, divided by latest close price to remove unit
-                fields += ["Ref($close, %d)/$close" % d for d in windows]
-                names += ["ROC%d" % d for d in windows]
-            if use("MA"):
-                # https://www.investopedia.com/ask/answers/071414/whats-difference-between-moving-average-and-weighted-moving-average.asp
-                # Simple Moving Average, the simple moving average in the past d days, divided by latest close price to remove unit
-                fields += ["Mean($close, %d)/$close" % d for d in windows]
-                names += ["MA%d" % d for d in windows]
-            if use("STD"):
-                # The standard diviation of close price for the past d days, divided by latest close price to remove unit
-                fields += ["Std($close, %d)/$close" % d for d in windows]
-                names += ["STD%d" % d for d in windows]
-            if use("BETA"):
-                # The rate of close price change in the past d days, divided by latest close price to remove unit
-                # For example, price increase 10 dollar per day in the past d days, then Slope will be 10.
-                fields += ["Slope($close, %d)/$close" % d for d in windows]
-                names += ["BETA%d" % d for d in windows]
-            if use("RSQR"):
-                # The R-sqaure value of linear regression for the past d days, represent the trend linear
-                fields += ["Rsquare($close, %d)" % d for d in windows]
-                names += ["RSQR%d" % d for d in windows]
-            if use("RESI"):
-                # The redisdual for linear regression for the past d days, represent the trend linearity for past d days.
-                fields += ["Resi($close, %d)/$close" % d for d in windows]
-                names += ["RESI%d" % d for d in windows]
-            if use("MAX"):
-                # The max price for past d days, divided by latest close price to remove unit
-                fields += ["Max($high, %d)/$close" % d for d in windows]
-                names += ["MAX%d" % d for d in windows]
-            if use("LOW"):
-                # The low price for past d days, divided by latest close price to remove unit
-                fields += ["Min($low, %d)/$close" % d for d in windows]
-                names += ["MIN%d" % d for d in windows]
-            if use("QTLU"):
-                # The 80% quantile of past d day's close price, divided by latest close price to remove unit
-                # Used with MIN and MAX
-                fields += ["Quantile($close, %d, 0.8)/$close" % d for d in windows]
-                names += ["QTLU%d" % d for d in windows]
-            if use("QTLD"):
-                # The 20% quantile of past d day's close price, divided by latest close price to remove unit
-                fields += ["Quantile($close, %d, 0.2)/$close" % d for d in windows]
-                names += ["QTLD%d" % d for d in windows]
-            if use("RANK"):
-                # Get the percentile of current close price in past d day's close price.
-                # Represent the current price level comparing to past N days, add additional information to moving average.
-                fields += ["Rank($close, %d)" % d for d in windows]
-                names += ["RANK%d" % d for d in windows]
-            if use("RSV"):
-                # Represent the price position between upper and lower resistent price for past d days.
-                fields += ["($close-Min($low, %d))/(Max($high, %d)-Min($low, %d)+1e-12)" % (d, d, d) for d in windows]
-                names += ["RSV%d" % d for d in windows]
-            if use("IMAX"):
-                # The number of days between current date and previous highest price date.
-                # Part of Aroon Indicator https://www.investopedia.com/terms/a/aroon.asp
-                # The indicator measures the time between highs and the time between lows over a time period.
-                # The idea is that strong uptrends will regularly see new highs, and strong downtrends will regularly see new lows.
-                fields += ["IdxMax($high, %d)/%d" % (d, d) for d in windows]
-                names += ["IMAX%d" % d for d in windows]
-            if use("IMIN"):
-                # The number of days between current date and previous lowest price date.
-                # Part of Aroon Indicator https://www.investopedia.com/terms/a/aroon.asp
-                # The indicator measures the time between highs and the time between lows over a time period.
-                # The idea is that strong uptrends will regularly see new highs, and strong downtrends will regularly see new lows.
-                fields += ["IdxMin($low, %d)/%d" % (d, d) for d in windows]
-                names += ["IMIN%d" % d for d in windows]
-            if use("IMXD"):
-                # The time period between previous lowest-price date occur after highest price date.
-                # Large value suggest downward momemtum.
-                fields += ["(IdxMax($high, %d)-IdxMin($low, %d))/%d" % (d, d, d) for d in windows]
-                names += ["IMXD%d" % d for d in windows]
-            if use("CORR"):
-                # The correlation between absolute close price and log scaled trading volume
-                fields += ["Corr($close, Log($volume+1), %d)" % d for d in windows]
-                names += ["CORR%d" % d for d in windows]
-            if use("CORD"):
-                # The correlation between price change ratio and volume change ratio
-                fields += ["Corr($close/Ref($close,1), Log($volume/Ref($volume, 1)+1), %d)" % d for d in windows]
-                names += ["CORD%d" % d for d in windows]
-            if use("CNTP"):
-                # The percentage of days in past d days that price go up.
-                fields += ["Mean($close>Ref($close, 1), %d)" % d for d in windows]
-                names += ["CNTP%d" % d for d in windows]
-            if use("CNTN"):
-                # The percentage of days in past d days that price go down.
-                fields += ["Mean($close<Ref($close, 1), %d)" % d for d in windows]
-                names += ["CNTN%d" % d for d in windows]
-            if use("CNTD"):
-                # The diff between past up day and past down day
-                fields += ["Mean($close>Ref($close, 1), %d)-Mean($close<Ref($close, 1), %d)" % (d, d) for d in windows]
-                names += ["CNTD%d" % d for d in windows]
-            if use("SUMP"):
-                # The total gain / the absolute total price changed
-                # Similar to RSI indicator. https://www.investopedia.com/terms/r/rsi.asp
-                fields += [
-                    "Sum(Greater($close-Ref($close, 1), 0), %d)/(Sum(Abs($close-Ref($close, 1)), %d)+1e-12)" % (d, d)
-                    for d in windows
-                ]
-                names += ["SUMP%d" % d for d in windows]
-            if use("SUMN"):
-                # The total lose / the absolute total price changed
-                # Can be derived from SUMP by SUMN = 1 - SUMP
-                # Similar to RSI indicator. https://www.investopedia.com/terms/r/rsi.asp
-                fields += [
-                    "Sum(Greater(Ref($close, 1)-$close, 0), %d)/(Sum(Abs($close-Ref($close, 1)), %d)+1e-12)" % (d, d)
-                    for d in windows
-                ]
-                names += ["SUMN%d" % d for d in windows]
-            if use("SUMD"):
-                # The diff ratio between total gain and total lose
-                # Similar to RSI indicator. https://www.investopedia.com/terms/r/rsi.asp
-                fields += [
-                    "(Sum(Greater($close-Ref($close, 1), 0), %d)-Sum(Greater(Ref($close, 1)-$close, 0), %d))"
-                    "/(Sum(Abs($close-Ref($close, 1)), %d)+1e-12)" % (d, d, d)
-                    for d in windows
-                ]
-                names += ["SUMD%d" % d for d in windows]
-            if use("VMA"):
-                # Simple Volume Moving average: https://www.barchart.com/education/technical-indicators/volume_moving_average
-                fields += ["Mean($volume, %d)/($volume+1e-12)" % d for d in windows]
-                names += ["VMA%d" % d for d in windows]
-            if use("VSTD"):
-                # The standard deviation for volume in past d days.
-                fields += ["Std($volume, %d)/($volume+1e-12)" % d for d in windows]
-                names += ["VSTD%d" % d for d in windows]
-            if use("WVMA"):
-                # The volume weighted price change volatility
-                fields += [
-                    "Std(Abs($close/Ref($close, 1)-1)*$volume, %d)/(Mean(Abs($close/Ref($close, 1)-1)*$volume, %d)+1e-12)"
-                    % (d, d)
-                    for d in windows
-                ]
-                names += ["WVMA%d" % d for d in windows]
-            if use("VSUMP"):
-                # The total volume increase / the absolute total volume changed
-                fields += [
-                    "Sum(Greater($volume-Ref($volume, 1), 0), %d)/(Sum(Abs($volume-Ref($volume, 1)), %d)+1e-12)"
-                    % (d, d)
-                    for d in windows
-                ]
-                names += ["VSUMP%d" % d for d in windows]
-            if use("VSUMN"):
-                # The total volume increase / the absolute total volume changed
-                # Can be derived from VSUMP by VSUMN = 1 - VSUMP
-                fields += [
-                    "Sum(Greater(Ref($volume, 1)-$volume, 0), %d)/(Sum(Abs($volume-Ref($volume, 1)), %d)+1e-12)"
-                    % (d, d)
-                    for d in windows
-                ]
-                names += ["VSUMN%d" % d for d in windows]
-            if use("VSUMD"):
-                # The diff ratio between total volume increase and total volume decrease
-                # RSI indicator for volume
-                fields += [
-                    "(Sum(Greater($volume-Ref($volume, 1), 0), %d)-Sum(Greater(Ref($volume, 1)-$volume, 0), %d))"
-                    "/(Sum(Abs($volume-Ref($volume, 1)), %d)+1e-12)" % (d, d, d)
-                    for d in windows
-                ]
-                names += ["VSUMD%d" % d for d in windows]
-
-        return fields, names
--- a/qlib/contrib/eva/alpha.py
+++ b/qlib/contrib/eva/alpha.py
@@ -3,7 +3,6 @@ Here is a batch of evaluation functions.

 The interface should be redesigned carefully in the future.
 """
-
 import pandas as pd
 from typing import Tuple
 from qlib import get_module_logger
--- a/qlib/contrib/meta/data_selection/dataset.py
+++ b/qlib/contrib/meta/data_selection/dataset.py
@@ -243,7 +243,7 @@ class MetaDatasetDS(MetaTaskDataset):
        trunc_days: int = None,
        rolling_ext_days: int = 0,
        exp_name: Union[str, InternalData],
-        segments: Union[Dict[Text, Tuple], float, str],
+        segments: Union[Dict[Text, Tuple], float],
        hist_step_n: int = 10,
        task_mode: str = MetaTask.PROC_MODE_FULL,
        fill_method: str = "max",
@@ -271,16 +271,12 @@ class MetaDatasetDS(MetaTaskDataset):
            - str: the name of the experiment to store the performance of data
            - InternalData: a prepared internal data
        segments: Union[Dict[Text, Tuple], float]
-            if the segment is a Dict
-                the segments to divide data
-                both left and right are included
+            the segments to divide data
+            both left and right
            if segments is a float:
                the float represents the percentage of data for training
-            if segments is a string:
-                it will try its best to put its data in training and ensure that the date `segments` is in the test set
        hist_step_n: int
            length of historical steps for the meta infomation
-            Number of steps of the data similarity information
        task_mode : str
            Please refer to the docs of MetaTask
        """
@@ -387,30 +383,10 @@ class MetaDatasetDS(MetaTaskDataset):
        if isinstance(self.segments, float):
            train_task_n = int(len(self.meta_task_l) * self.segments)
            if segment == "train":
-                train_tasks = self.meta_task_l[:train_task_n]
-                get_module_logger("MetaDatasetDS").info(f"The first train meta task: {train_tasks[0]}")
-                return train_tasks
+                return self.meta_task_l[:train_task_n]
            elif segment == "test":
-                test_tasks = self.meta_task_l[train_task_n:]
-                get_module_logger("MetaDatasetDS").info(f"The first test meta task: {test_tasks[0]}")
-                return test_tasks
+                return self.meta_task_l[train_task_n:]
            else:
                raise NotImplementedError(f"This type of input is not supported")
-        elif isinstance(self.segments, str):
-            train_tasks = []
-            test_tasks = []
-            for t in self.meta_task_l:
-                test_end = t.task["dataset"]["kwargs"]["segments"]["test"][1]
-                if test_end is None or pd.Timestamp(test_end) < pd.Timestamp(self.segments):
-                    train_tasks.append(t)
-                else:
-                    test_tasks.append(t)
-            get_module_logger("MetaDatasetDS").info(f"The first train meta task: {train_tasks[0]}")
-            get_module_logger("MetaDatasetDS").info(f"The first test meta task: {test_tasks[0]}")
-            if segment == "train":
-                return train_tasks
-            elif segment == "test":
-                return test_tasks
-            raise NotImplementedError(f"This type of input is not supported")
        else:
            raise NotImplementedError(f"This type of input is not supported")
--- a/qlib/contrib/meta/data_selection/model.py
+++ b/qlib/contrib/meta/data_selection/model.py
@@ -53,12 +53,7 @@ class MetaModelDS(MetaTaskModel):
        max_epoch=100,
        seed=43,
        alpha=0.0,
-        loss_skip_thresh=50,
    ):
-        """
-        loss_skip_size: int
-            The number of threshold to skip the loss calculation for each day.
-        """
        self.step = step
        self.hist_step_n = hist_step_n
        self.clip_method = clip_method
@@ -68,7 +63,6 @@ class MetaModelDS(MetaTaskModel):
        self.max_epoch = max_epoch
        self.fitted = False
        self.alpha = alpha
-        self.loss_skip_thresh = loss_skip_thresh
        torch.manual_seed(seed)

    def run_epoch(self, phase, task_list, epoch, opt, loss_l, ignore_weight=False):
@@ -94,14 +88,12 @@ class MetaModelDS(MetaTaskModel):
                criterion = nn.MSELoss()
                loss = criterion(pred, meta_input["y_test"])
            elif self.criterion == "ic_loss":
-                criterion = ICLoss(self.loss_skip_thresh)
+                criterion = ICLoss()
                try:
-                    loss = criterion(pred, meta_input["y_test"], meta_input["test_idx"])
+                    loss = criterion(pred, meta_input["y_test"], meta_input["test_idx"], skip_size=50)
                except ValueError as e:
                    get_module_logger("MetaModelDS").warning(f"Exception `{e}` when calculating IC loss")
                    continue
-            else:
-                raise ValueError(f"Unknown criterion: {self.criterion}")

            assert not np.isnan(loss.detach().item()), "NaN loss!"

--- a/qlib/contrib/meta/data_selection/utils.py
+++ b/qlib/contrib/meta/data_selection/utils.py
@@ -10,11 +10,7 @@ from qlib.log import get_module_logger


 class ICLoss(nn.Module):
-    def __init__(self, skip_size=50):
-        super().__init__()
-        self.skip_size = skip_size
-
-    def forward(self, pred, y, idx):
+    def forward(self, pred, y, idx, skip_size=50):
        """forward.
        FIXME:
        - Some times it will be a slightly different from the result from `pandas.corr()`
@@ -37,7 +33,7 @@ class ICLoss(nn.Module):
        skip_n = 0
        for start_i, end_i in zip(diff_point, diff_point[1:]):
            pred_focus = pred[start_i:end_i]  # TODO: just for fake
-            if pred_focus.shape[0] < self.skip_size:
+            if pred_focus.shape[0] < skip_size:
                # skip some days which have very small amount of stock.
                skip_n += 1
                continue
@@ -54,7 +50,6 @@ class ICLoss(nn.Module):
            )
            ic_all += ic_day
        if len(diff_point) - 1 - skip_n <= 0:
-            __import__("ipdb").set_trace()
            raise ValueError("No enough data for calculating IC")
        if skip_n > 0:
            get_module_logger("ICLoss").info(
--- a/qlib/contrib/model/catboost_model.py
+++ b/qlib/contrib/model/catboost_model.py
@@ -33,7 +33,7 @@ class CatBoostModel(Model, FeatureInt):
        verbose_eval=20,
        evals_result=dict(),
        reweighter=None,
-        **kwargs,
+        **kwargs
    ):
        df_train, df_valid = dataset.prepare(
            ["train", "valid"],
--- a/qlib/contrib/model/double_ensemble.py
+++ b/qlib/contrib/model/double_ensemble.py
@@ -31,7 +31,7 @@ class DEnsembleModel(Model, FeatureInt):
        sub_weights=None,
        epochs=100,
        early_stopping_rounds=None,
-        **kwargs,
+        **kwargs
    ):
        self.base_model = base_model  # "gbm" or "mlp", specifically, we use lgbm for "gbm"
        self.num_models = num_models  # the number of sub-models
--- a/qlib/contrib/model/linear.py
+++ b/qlib/contrib/model/linear.py
@@ -63,7 +63,6 @@ class LinearModel(Model):
                df_train = pd.concat([df_train, df_valid])
            except KeyError:
                get_module_logger("LinearModel").info("include_valid=True, but valid does not exist")
-        df_train = df_train.dropna()
        if df_train.empty:
            raise ValueError("Empty data from dataset, please check your dataset config.")
        if reweighter is not None:
--- a/qlib/contrib/model/pytorch_adarnn.py
+++ b/qlib/contrib/model/pytorch_adarnn.py
@@ -56,7 +56,7 @@ class ADARNN(Model):
        n_splits=2,
        GPU=0,
        seed=None,
-        **_,
+        **_
    ):
        # Set logger.
        self.logger = get_module_logger("ADARNN")
@@ -154,7 +154,10 @@ class ADARNN(Model):
        self.model.train()
        criterion = nn.MSELoss()
        dist_mat = torch.zeros(self.num_layers, self.len_seq).to(self.device)
-        out_weight_list = None
+        len_loader = np.inf
+        for loader in train_loader_list:
+            if len(loader) < len_loader:
+                len_loader = len(loader)
        for data_all in zip(*train_loader_list):
            #  for data_all in zip(*train_loader_list):
            self.train_optimizer.zero_grad()
@@ -568,7 +571,6 @@ class TransferLoss:
        Returns:
            [tensor] -- transfer loss
        """
-        loss = None
        if self.loss_type in ("mmd_lin", "mmd"):
            mmdloss = MMD_loss(kernel_type="linear")
            loss = mmdloss(X, Y)
--- a/qlib/contrib/model/pytorch_add.py
+++ b/qlib/contrib/model/pytorch_add.py
@@ -63,7 +63,7 @@ class ADD(Model):
        mu=0.05,
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("ADD")
--- a/qlib/contrib/model/pytorch_alstm.py
+++ b/qlib/contrib/model/pytorch_alstm.py
@@ -52,7 +52,7 @@ class ALSTM(Model):
        optimizer="adam",
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("ALSTM")
--- a/qlib/contrib/model/pytorch_alstm_ts.py
+++ b/qlib/contrib/model/pytorch_alstm_ts.py
@@ -56,7 +56,7 @@ class ALSTM(Model):
        n_jobs=10,
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("ALSTM")
@@ -160,10 +160,6 @@ class ALSTM(Model):

        if self.metric in ("", "loss"):
            return -self.loss_fn(pred[mask], label[mask])
-        elif self.metric == "mse":
-            mask = ~torch.isnan(label)
-            weight = torch.ones_like(label)
-            return -self.mse(pred[mask], label[mask], weight[mask])

        raise ValueError("unknown metric `%s`" % self.metric)

--- a/qlib/contrib/model/pytorch_gats.py
+++ b/qlib/contrib/model/pytorch_gats.py
@@ -56,7 +56,7 @@ class GATs(Model):
        optimizer="adam",
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("GATs")
--- a/qlib/contrib/model/pytorch_gats_ts.py
+++ b/qlib/contrib/model/pytorch_gats_ts.py
@@ -73,7 +73,7 @@ class GATs(Model):
        GPU=0,
        n_jobs=10,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("GATs")
--- a/qlib/contrib/model/pytorch_general_nn.py
+++ b/qlib/contrib/model/pytorch_general_nn.py
@@ -1,358 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-from __future__ import division
-from __future__ import print_function
-
-from torch.utils.data import DataLoader
-
-
-import numpy as np
-import pandas as pd
-from typing import Union
-import copy
-
-import torch
-import torch.optim as optim
-
-from qlib.data.dataset.weight import Reweighter
-
-from .pytorch_utils import count_parameters
-from ...model.base import Model
-from ...data.dataset import DatasetH, TSDatasetH
-from ...data.dataset.handler import DataHandlerLP
-from ...utils import (
-    init_instance_by_config,
-    get_or_create_path,
-)
-from ...log import get_module_logger
-
-from ...model.utils import ConcatDataset
-
-
-class GeneralPTNN(Model):
-    """
-    Motivation:
-        We want to provide a Qlib General Pytorch Model Adaptor
-        You can reuse it for all kinds of Pytorch models.
-        It should include the training and predict process
-
-    Parameters
-    ----------
-    d_feat : int
-        input dimension for each time step
-    metric: str
-        the evaluation metric used in early stop
-    optimizer : str
-        optimizer name
-    GPU : str
-        the GPU ID(s) used for training
-    """
-
-    def __init__(
-        self,
-        n_epochs=200,
-        lr=0.001,
-        metric="",
-        batch_size=2000,
-        early_stop=20,
-        loss="mse",
-        weight_decay=0.0,
-        optimizer="adam",
-        n_jobs=10,
-        GPU=0,
-        seed=None,
-        pt_model_uri="qlib.contrib.model.pytorch_gru_ts.GRUModel",
-        pt_model_kwargs={
-            "d_feat": 6,
-            "hidden_size": 64,
-            "num_layers": 2,
-            "dropout": 0.0,
-        },
-    ):
-        # Set logger.
-        self.logger = get_module_logger("GeneralPTNN")
-        self.logger.info("GeneralPTNN pytorch version...")
-
-        # set hyper-parameters.
-        self.n_epochs = n_epochs
-        self.lr = lr
-        self.metric = metric
-        self.batch_size = batch_size
-        self.early_stop = early_stop
-        self.optimizer = optimizer.lower()
-        self.loss = loss
-        self.weight_decay = weight_decay
-        self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
-        self.n_jobs = n_jobs
-        self.seed = seed
-
-        self.pt_model_uri, self.pt_model_kwargs = pt_model_uri, pt_model_kwargs
-        self.dnn_model = init_instance_by_config({"class": pt_model_uri, "kwargs": pt_model_kwargs})
-
-        self.logger.info(
-            "GeneralPTNN parameters setting:"
-            "\nn_epochs : {}"
-            "\nlr : {}"
-            "\nmetric : {}"
-            "\nbatch_size : {}"
-            "\nearly_stop : {}"
-            "\noptimizer : {}"
-            "\nloss_type : {}"
-            "\ndevice : {}"
-            "\nn_jobs : {}"
-            "\nuse_GPU : {}"
-            "\nweight_decay : {}"
-            "\nseed : {}"
-            "\npt_model_uri: {}"
-            "\npt_model_kwargs: {}".format(
-                n_epochs,
-                lr,
-                metric,
-                batch_size,
-                early_stop,
-                optimizer.lower(),
-                loss,
-                self.device,
-                n_jobs,
-                self.use_gpu,
-                weight_decay,
-                seed,
-                pt_model_uri,
-                pt_model_kwargs,
-            )
-        )
-
-        if self.seed is not None:
-            np.random.seed(self.seed)
-            torch.manual_seed(self.seed)
-
-        self.logger.info("model:\n{:}".format(self.dnn_model))
-        self.logger.info("model size: {:.4f} MB".format(count_parameters(self.dnn_model)))
-
-        if optimizer.lower() == "adam":
-            self.train_optimizer = optim.Adam(self.dnn_model.parameters(), lr=self.lr, weight_decay=weight_decay)
-        elif optimizer.lower() == "gd":
-            self.train_optimizer = optim.SGD(self.dnn_model.parameters(), lr=self.lr, weight_decay=weight_decay)
-        else:
-            raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
-
-        self.fitted = False
-        self.dnn_model.to(self.device)
-
-    @property
-    def use_gpu(self):
-        return self.device != torch.device("cpu")
-
-    def mse(self, pred, label, weight):
-        loss = weight * (pred - label) ** 2
-        return torch.mean(loss)
-
-    def loss_fn(self, pred, label, weight=None):
-        mask = ~torch.isnan(label)
-
-        if weight is None:
-            weight = torch.ones_like(label)
-
-        if self.loss == "mse":
-            return self.mse(pred[mask], label[mask], weight[mask])
-
-        raise ValueError("unknown loss `%s`" % self.loss)
-
-    def metric_fn(self, pred, label):
-        mask = torch.isfinite(label)
-
-        if self.metric in ("", "loss"):
-            return -self.loss_fn(pred[mask], label[mask])
-
-        raise ValueError("unknown metric `%s`" % self.metric)
-
-    def _get_fl(self, data: torch.Tensor):
-        """
-        get feature and label from data
-        - Handle the different data shape of time series and tabular data
-
-        Parameters
-        ----------
-        data : torch.Tensor
-            input data which maybe 3 dimension or 2 dimension
-            - 3dim: [batch_size, time_step, feature_dim]
-            - 2dim: [batch_size, feature_dim]
-
-        Returns
-        -------
-        Tuple[torch.Tensor, torch.Tensor]
-        """
-        if data.dim() == 3:
-            # it is a time series dataset
-            feature = data[:, :, 0:-1].to(self.device)
-            label = data[:, -1, -1].to(self.device)
-        elif data.dim() == 2:
-            # it is a tabular dataset
-            feature = data[:, 0:-1].to(self.device)
-            label = data[:, -1].to(self.device)
-        else:
-            raise ValueError("Unsupported data shape.")
-        return feature, label
-
-    def train_epoch(self, data_loader):
-        self.dnn_model.train()
-
-        for data, weight in data_loader:
-            feature, label = self._get_fl(data)
-
-            pred = self.dnn_model(feature.float())
-            loss = self.loss_fn(pred, label, weight.to(self.device))
-
-            self.train_optimizer.zero_grad()
-            loss.backward()
-            torch.nn.utils.clip_grad_value_(self.dnn_model.parameters(), 3.0)
-            self.train_optimizer.step()
-
-    def test_epoch(self, data_loader):
-        self.dnn_model.eval()
-
-        scores = []
-        losses = []
-
-        for data, weight in data_loader:
-            feature, label = self._get_fl(data)
-
-            with torch.no_grad():
-                pred = self.dnn_model(feature.float())
-                loss = self.loss_fn(pred, label, weight.to(self.device))
-                losses.append(loss.item())
-
-                score = self.metric_fn(pred, label)
-                scores.append(score.item())
-
-        return np.mean(losses), np.mean(scores)
-
-    def fit(
-        self,
-        dataset: Union[DatasetH, TSDatasetH],
-        evals_result=dict(),
-        save_path=None,
-        reweighter=None,
-    ):
-        ists = isinstance(dataset, TSDatasetH)  # is this time series dataset
-
-        dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
-        dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
-        if dl_train.empty or dl_valid.empty:
-            raise ValueError("Empty data from dataset, please check your dataset config.")
-
-        if reweighter is None:
-            wl_train = np.ones(len(dl_train))
-            wl_valid = np.ones(len(dl_valid))
-        elif isinstance(reweighter, Reweighter):
-            wl_train = reweighter.reweight(dl_train)
-            wl_valid = reweighter.reweight(dl_valid)
-        else:
-            raise ValueError("Unsupported reweighter type.")
-
-        # Preprocess for data.  To align to Dataset Interface for DataLoader
-        if ists:
-            dl_train.config(fillna_type="ffill+bfill")  # process nan brought by dataloader
-            dl_valid.config(fillna_type="ffill+bfill")  # process nan brought by dataloader
-        else:
-            # If it is a tabular, we convert the dataframe to numpy to be indexable by DataLoader
-            dl_train = dl_train.values
-            dl_valid = dl_valid.values
-
-        train_loader = DataLoader(
-            ConcatDataset(dl_train, wl_train),
-            batch_size=self.batch_size,
-            shuffle=True,
-            num_workers=self.n_jobs,
-            drop_last=True,
-        )
-        valid_loader = DataLoader(
-            ConcatDataset(dl_valid, wl_valid),
-            batch_size=self.batch_size,
-            shuffle=False,
-            num_workers=self.n_jobs,
-            drop_last=True,
-        )
-        del dl_train, dl_valid, wl_train, wl_valid
-
-        save_path = get_or_create_path(save_path)
-
-        stop_steps = 0
-        train_loss = 0
-        best_score = -np.inf
-        best_epoch = 0
-        evals_result["train"] = []
-        evals_result["valid"] = []
-
-        # train
-        self.logger.info("training...")
-        self.fitted = True
-
-        for step in range(self.n_epochs):
-            self.logger.info("Epoch%d:", step)
-            self.logger.info("training...")
-            self.train_epoch(train_loader)
-            self.logger.info("evaluating...")
-            train_loss, train_score = self.test_epoch(train_loader)
-            val_loss, val_score = self.test_epoch(valid_loader)
-            self.logger.info("train %.6f, valid %.6f" % (train_score, val_score))
-            evals_result["train"].append(train_score)
-            evals_result["valid"].append(val_score)
-
-            if step == 0:
-                best_param = copy.deepcopy(self.dnn_model.state_dict())
-            if val_score > best_score:
-                best_score = val_score
-                stop_steps = 0
-                best_epoch = step
-                best_param = copy.deepcopy(self.dnn_model.state_dict())
-            else:
-                stop_steps += 1
-                if stop_steps >= self.early_stop:
-                    self.logger.info("early stop")
-                    break
-
-        self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch))
-        self.dnn_model.load_state_dict(best_param)
-        torch.save(best_param, save_path)
-
-        if self.use_gpu:
-            torch.cuda.empty_cache()
-
-    def predict(
-        self,
-        dataset: Union[DatasetH, TSDatasetH],
-        batch_size=None,
-        n_jobs=None,
-    ):
-        if not self.fitted:
-            raise ValueError("model is not fitted yet!")
-
-        dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I)
-
-        if isinstance(dataset, TSDatasetH):
-            dl_test.config(fillna_type="ffill+bfill")  # process nan brought by dataloader
-            index = dl_test.get_index()
-        else:
-            # If it is a tabular, we convert the dataframe to numpy to be indexable by DataLoader
-            index = dl_test.index
-            dl_test = dl_test.values
-
-        test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs)
-        self.dnn_model.eval()
-        preds = []
-
-        for data in test_loader:
-            feature, _ = self._get_fl(data)
-            feature = feature.to(self.device)
-
-            with torch.no_grad():
-                pred = self.dnn_model(feature.float()).detach().cpu().numpy()
-
-            preds.append(pred)
-
-        preds_concat = np.concatenate(preds)
-        if preds_concat.ndim != 1:
-            preds_concat = preds_concat.ravel()
-
-        return pd.Series(preds_concat, index=index)
--- a/qlib/contrib/model/pytorch_gru.py
+++ b/qlib/contrib/model/pytorch_gru.py
@@ -1,25 +1,25 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.

+
 from __future__ import division
 from __future__ import print_function
-import copy
-from typing import Text, Union

 import numpy as np
 import pandas as pd
+from typing import Text, Union
+import copy
+from ...utils import get_or_create_path
+from ...log import get_module_logger
+
 import torch
 import torch.nn as nn
 import torch.optim as optim

-from qlib.workflow import R
-
+from .pytorch_utils import count_parameters
+from ...model.base import Model
 from ...data.dataset import DatasetH
 from ...data.dataset.handler import DataHandlerLP
-from ...log import get_module_logger
-from ...model.base import Model
-from ...utils import get_or_create_path
-from .pytorch_utils import count_parameters


 class GRU(Model):
@@ -52,7 +52,7 @@ class GRU(Model):
        optimizer="adam",
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("GRU")
@@ -212,31 +212,16 @@ class GRU(Model):
        evals_result=dict(),
        save_path=None,
    ):
-        # prepare training and validation data
-        dfs = {
-            k: dataset.prepare(
-                k,
-                col_set=["feature", "label"],
-                data_key=DataHandlerLP.DK_L,
-            )
-            for k in ["train", "valid"]
-            if k in dataset.segments
-        }
-        df_train, df_valid = dfs.get("train", pd.DataFrame()), dfs.get("valid", pd.DataFrame())
+        df_train, df_valid, df_test = dataset.prepare(
+            ["train", "valid", "test"],
+            col_set=["feature", "label"],
+            data_key=DataHandlerLP.DK_L,
+        )
+        if df_train.empty or df_valid.empty:
+            raise ValueError("Empty data from dataset, please check your dataset config.")

-        # check if training data is empty
-        if df_train.empty:
-            raise ValueError("Empty training data from dataset, please check your dataset config.")
-
-        df_train = df_train.dropna()
        x_train, y_train = df_train["feature"], df_train["label"]
-
-        # check if validation data is provided
-        if not df_valid.empty:
-            df_valid = df_valid.dropna()
-            x_valid, y_valid = df_valid["feature"], df_valid["label"]
-        else:
-            x_valid, y_valid = None, None
+        x_valid, y_valid = df_valid["feature"], df_valid["label"]

        save_path = get_or_create_path(save_path)
        stop_steps = 0
@@ -250,42 +235,32 @@ class GRU(Model):
        self.logger.info("training...")
        self.fitted = True

-        best_param = copy.deepcopy(self.gru_model.state_dict())
        for step in range(self.n_epochs):
            self.logger.info("Epoch%d:", step)
            self.logger.info("training...")
            self.train_epoch(x_train, y_train)
            self.logger.info("evaluating...")
            train_loss, train_score = self.test_epoch(x_train, y_train)
+            val_loss, val_score = self.test_epoch(x_valid, y_valid)
+            self.logger.info("train %.6f, valid %.6f" % (train_score, val_score))
            evals_result["train"].append(train_score)
+            evals_result["valid"].append(val_score)

-            # evaluate on validation data if provided
-            if x_valid is not None and y_valid is not None:
-                val_loss, val_score = self.test_epoch(x_valid, y_valid)
-                self.logger.info("train %.6f, valid %.6f" % (train_score, val_score))
-                evals_result["valid"].append(val_score)
-
-                if val_score > best_score:
-                    best_score = val_score
-                    stop_steps = 0
-                    best_epoch = step
-                    best_param = copy.deepcopy(self.gru_model.state_dict())
-                else:
-                    stop_steps += 1
-                    if stop_steps >= self.early_stop:
-                        self.logger.info("early stop")
-                        break
+            if val_score > best_score:
+                best_score = val_score
+                stop_steps = 0
+                best_epoch = step
+                best_param = copy.deepcopy(self.gru_model.state_dict())
+            else:
+                stop_steps += 1
+                if stop_steps >= self.early_stop:
+                    self.logger.info("early stop")
+                    break

        self.logger.info("best score: %.6lf @ %d" % (best_score, best_epoch))
        self.gru_model.load_state_dict(best_param)
        torch.save(best_param, save_path)

-        # Logging
-        rec = R.get_recorder()
-        for k, v_l in evals_result.items():
-            for i, v in enumerate(v_l):
-                rec.log_metrics(step=i, **{k: v})
-
        if self.use_gpu:
            torch.cuda.empty_cache()

--- a/qlib/contrib/model/pytorch_gru_ts.py
+++ b/qlib/contrib/model/pytorch_gru_ts.py
@@ -54,7 +54,7 @@ class GRU(Model):
        n_jobs=10,
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("GRU")
--- a/qlib/contrib/model/pytorch_hist.py
+++ b/qlib/contrib/model/pytorch_hist.py
@@ -59,7 +59,7 @@ class HIST(Model):
        optimizer="adam",
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("HIST")
@@ -256,7 +256,7 @@ class HIST(Model):
            raise ValueError("Empty data from dataset, please check your dataset config.")

        if not os.path.exists(self.stock2concept):
-            url = "https://github.com/SunsetWolf/qlib_dataset/releases/download/v0/qlib_csi300_stock2concept.npy"
+            url = "http://fintech.msra.cn/stock_data/downloads/qlib_csi300_stock2concept.npy"
            urllib.request.urlretrieve(url, self.stock2concept)

        stock_index = np.load(self.stock_index, allow_pickle=True).item()
--- a/qlib/contrib/model/pytorch_igmtf.py
+++ b/qlib/contrib/model/pytorch_igmtf.py
@@ -55,7 +55,7 @@ class IGMTF(Model):
        optimizer="adam",
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("IGMTF")
--- a/qlib/contrib/model/pytorch_krnn.py
+++ b/qlib/contrib/model/pytorch_krnn.py
@@ -255,7 +255,7 @@ class KRNN(Model):
        optimizer="adam",
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("KRNN")
--- a/qlib/contrib/model/pytorch_localformer.py
+++ b/qlib/contrib/model/pytorch_localformer.py
@@ -44,7 +44,7 @@ class LocalformerModel(Model):
        n_jobs=10,
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # set hyper-parameters.
        self.d_model = d_model
--- a/qlib/contrib/model/pytorch_localformer_ts.py
+++ b/qlib/contrib/model/pytorch_localformer_ts.py
@@ -42,7 +42,7 @@ class LocalformerModel(Model):
        n_jobs=10,
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # set hyper-parameters.
        self.d_model = d_model
--- a/qlib/contrib/model/pytorch_lstm.py
+++ b/qlib/contrib/model/pytorch_lstm.py
@@ -51,7 +51,7 @@ class LSTM(Model):
        optimizer="adam",
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("LSTM")
--- a/qlib/contrib/model/pytorch_lstm_ts.py
+++ b/qlib/contrib/model/pytorch_lstm_ts.py
@@ -53,7 +53,7 @@ class LSTM(Model):
        n_jobs=10,
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("LSTM")
--- a/qlib/contrib/model/pytorch_sandwich.py
+++ b/qlib/contrib/model/pytorch_sandwich.py
@@ -35,7 +35,7 @@ class SandwichModel(nn.Module):
        rnn_layers,
        dropout,
        device,
-        **params,
+        **params
    ):
        """Build a Sandwich model

@@ -129,7 +129,7 @@ class Sandwich(Model):
        optimizer="adam",
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("Sandwich")
--- a/qlib/contrib/model/pytorch_sfm.py
+++ b/qlib/contrib/model/pytorch_sfm.py
@@ -212,7 +212,7 @@ class SFM(Model):
        optimizer="gd",
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("SFM")
--- a/qlib/contrib/model/pytorch_tcn.py
+++ b/qlib/contrib/model/pytorch_tcn.py
@@ -56,7 +56,7 @@ class TCN(Model):
        optimizer="adam",
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("TCN")
--- a/qlib/contrib/model/pytorch_tcn_ts.py
+++ b/qlib/contrib/model/pytorch_tcn_ts.py
@@ -54,7 +54,7 @@ class TCN(Model):
        n_jobs=10,
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("TCN")
--- a/qlib/contrib/model/pytorch_tcts.py
+++ b/qlib/contrib/model/pytorch_tcts.py
@@ -58,7 +58,7 @@ class TCTS(Model):
        mode="soft",
        seed=None,
        lowest_valid_performance=0.993,
-        **kwargs,
+        **kwargs
    ):
        # Set logger.
        self.logger = get_module_logger("TCTS")
--- a/qlib/contrib/model/pytorch_tra.py
+++ b/qlib/contrib/model/pytorch_tra.py
@@ -511,6 +511,7 @@ class TRAModel(Model):


 class RNN(nn.Module):
+
    """RNN Model

    Args:
@@ -600,6 +601,7 @@ class PositionalEncoding(nn.Module):


 class Transformer(nn.Module):
+
    """Transformer Model

    Args:
@@ -647,6 +649,7 @@ class Transformer(nn.Module):


 class TRA(nn.Module):
+
    """Temporal Routing Adaptor (TRA)

    TRA takes historical prediction errors & latent representation as inputs,
--- a/qlib/contrib/model/pytorch_transformer.py
+++ b/qlib/contrib/model/pytorch_transformer.py
@@ -43,7 +43,7 @@ class TransformerModel(Model):
        n_jobs=10,
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # set hyper-parameters.
        self.d_model = d_model
--- a/qlib/contrib/model/pytorch_transformer_ts.py
+++ b/qlib/contrib/model/pytorch_transformer_ts.py
@@ -41,7 +41,7 @@ class TransformerModel(Model):
        n_jobs=10,
        GPU=0,
        seed=None,
-        **kwargs,
+        **kwargs
    ):
        # set hyper-parameters.
        self.d_model = d_model
--- a/qlib/contrib/model/xgboost.py
+++ b/qlib/contrib/model/xgboost.py
@@ -28,7 +28,7 @@ class XGBModel(Model, FeatureInt):
        verbose_eval=20,
        evals_result=dict(),
        reweighter=None,
-        **kwargs,
+        **kwargs
    ):
        df_train, df_valid = dataset.prepare(
            ["train", "valid"],
@@ -63,7 +63,7 @@ class XGBModel(Model, FeatureInt):
            early_stopping_rounds=early_stopping_rounds,
            verbose_eval=verbose_eval,
            evals_result=evals_result,
-            **kwargs,
+            **kwargs
        )
        evals_result["train"] = list(evals_result["train"].values())[0]
        evals_result["valid"] = list(evals_result["valid"].values())[0]
--- a/qlib/contrib/online/manager.py
+++ b/qlib/contrib/online/manager.py
@@ -4,10 +4,10 @@
 # pylint: skip-file
 # flake8: noqa

+import yaml
 import pathlib
 import pandas as pd
 import shutil
-from ruamel.yaml import YAML
 from ...backtest.account import Account
 from .user import User
 from .utils import load_instance, save_instance
@@ -110,8 +110,7 @@ class UserManager:
            raise ValueError("User data for {} already exists".format(user_id))

        with config_file.open("r") as fp:
-            yaml = YAML(typ="safe", pure=True)
-            config = yaml.load(fp)
+            config = yaml.safe_load(fp)
        # load model
        model = init_instance_by_config(config["model"])

--- a/qlib/contrib/online/utils.py
+++ b/qlib/contrib/online/utils.py
@@ -6,8 +6,8 @@

 import pathlib
 import pickle
+import yaml
 import pandas as pd
-from ruamel.yaml import YAML
 from ...data import D
 from ...config import C
 from ...log import get_module_logger
@@ -91,8 +91,7 @@ def prepare(um, today, user_id, exchange_config=None):
    dates.append(get_next_trading_date(dates[-1], future=True))
    if exchange_config:
        with pathlib.Path(exchange_config).open("r") as fp:
-            yaml = YAML(typ="safe", pure=True)
-            exchange_paras = yaml.load(fp)
+            exchange_paras = yaml.safe_load(fp)
    else:
        exchange_paras = {}
    trade_exchange = Exchange(trade_dates=dates, **exchange_paras)
--- a/qlib/contrib/report/data/ana.py
+++ b/qlib/contrib/report/data/ana.py
@@ -1,17 +1,5 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
-"""
-Here we have a comprehensive set of analysis classes.
-
-Here is an example.
-
-.. code-block:: python
-
-    from qlib.contrib.report.data.ana import FeaMeanStd
-    fa = FeaMeanStd(ret_df)
-    fa.plot_all(wspace=0.3, sub_figsize=(12, 3), col_n=5)
-
-"""
 import pandas as pd
 import numpy as np
 from qlib.contrib.report.data.base import FeaAnalyser
@@ -164,7 +152,6 @@ class FeaSkewTurt(NumFeaAnalyser):
        self._kurt[col].plot(ax=right_ax, label="kurt", color="green")
        right_ax.set_xlabel("")
        right_ax.set_ylabel("kurt")
-        right_ax.grid(None)  # set the grid to None to avoid two layer of grid

        h1, l1 = ax.get_legend_handles_labels()
        h2, l2 = right_ax.get_legend_handles_labels()
@@ -184,15 +171,12 @@ class FeaMeanStd(NumFeaAnalyser):
        ax.set_xlabel("")
        ax.set_ylabel("mean")
        ax.legend()
-        ax.tick_params(axis="x", rotation=90)

        right_ax = ax.twinx()

        self._std[col].plot(ax=right_ax, label="std", color="green")
        right_ax.set_xlabel("")
        right_ax.set_ylabel("std")
-        right_ax.tick_params(axis="x", rotation=90)
-        right_ax.grid(None)  # set the grid to None to avoid two layer of grid

        h1, l1 = ax.get_legend_handles_labels()
        h2, l2 = right_ax.get_legend_handles_labels()
--- a/qlib/contrib/report/data/base.py
+++ b/qlib/contrib/report/data/base.py
@@ -14,24 +14,6 @@ from qlib.contrib.report.utils import sub_fig_generator

 class FeaAnalyser:
    def __init__(self, dataset: pd.DataFrame):
-        """
-
-        Parameters
-        ----------
-        dataset : pd.DataFrame
-
-            We often have multiple columns for dataset. Each column corresponds to one sub figure.
-            There will be a datatime column in the index levels.
-            Aggretation will be used for more summarized metrics overtime.
-            Here is an example of data:
-
-            .. code-block::
-
-                                            return
-                datetime   instrument
-                2007-02-06 equity_tpx     0.010087
-                           equity_spx     0.000786
-        """
        self._dataset = dataset
        with TimeInspector.logt("calc_stat_values"):
            self.calc_stat_values()
--- a/qlib/contrib/report/graph.py
+++ b/qlib/contrib/report/graph.py
@@ -176,7 +176,7 @@ class HeatmapGraph(BaseGraph):
                x=self._df.columns,
                y=self._df.index,
                z=self._df.values.tolist(),
-                **self._graph_kwargs,
+                **self._graph_kwargs
            )
        ]
        return _data
@@ -213,7 +213,7 @@ class SubplotsGraph:
        sub_graph_layout: dict = None,
        sub_graph_data: list = None,
        subplots_kwargs: dict = None,
-        **kwargs,
+        **kwargs
    ):
        """

@@ -355,7 +355,7 @@ class SubplotsGraph:
                        df=self._df.loc[:, [column_name]],
                        name_dict={column_name: temp_name},
                        graph_kwargs=_graph_kwargs,
-                    ),
+                    )
                )
            else:
                raise TypeError()
--- a/qlib/contrib/report/utils.py
+++ b/qlib/contrib/report/utils.py
@@ -4,7 +4,7 @@ import matplotlib.pyplot as plt
 import pandas as pd


-def sub_fig_generator(sub_figsize=(3, 3), col_n=10, row_n=1, wspace=None, hspace=None, sharex=False, sharey=False):
+def sub_fig_generator(sub_fs=(3, 3), col_n=10, row_n=1, wspace=None, hspace=None, sharex=False, sharey=False):
    """sub_fig_generator.
    it will return a generator, each row contains <col_n> sub graph

@@ -13,7 +13,7 @@ def sub_fig_generator(sub_figsize=(3, 3), col_n=10, row_n=1, wspace=None, hspace

    Parameters
    ----------
-    sub_figsize :
+    sub_fs :
        the figure size of each subgraph in <col_n> * <row_n> subgraphs
    col_n :
        the number of subgraph in each row;  It will generating a new graph after generating <col_n> of subgraphs.
@@ -33,7 +33,7 @@ def sub_fig_generator(sub_figsize=(3, 3), col_n=10, row_n=1, wspace=None, hspace

    while True:
        fig, axes = plt.subplots(
-            row_n, col_n, figsize=(sub_figsize[0] * col_n, sub_figsize[1] * row_n), sharex=sharex, sharey=sharey
+            row_n, col_n, figsize=(sub_fs[0] * col_n, sub_fs[1] * row_n), sharex=sharex, sharey=sharey
        )
        plt.subplots_adjust(wspace=wspace, hspace=hspace)
        axes = axes.reshape(row_n, col_n)
--- a/qlib/contrib/rolling/base.py
+++ b/qlib/contrib/rolling/base.py
@@ -2,11 +2,11 @@
 # Licensed under the MIT License.
 from copy import deepcopy
 from pathlib import Path
-from ruamel.yaml import YAML
 from typing import List, Optional, Union

 import fire
 import pandas as pd
+import yaml

 from qlib import auto_init
 from qlib.log import get_module_logger
@@ -73,8 +73,8 @@ class Rolling:
            The horizon of the prediction target.
            This is used to override the prediction horizon of the file.
        h_path : Optional[str]
-            It is other data source that is dumped as a handler. It will override the data handler section in the config.
-            If it is not given, it will create a customized cache for the handler when `enable_handler_cache=True`
+            the dumped data handler;
+            It may come from other data source. It will override the data handler in the config.
        test_end : Optional[str]
            the test end for the data. It is typically used together with the handler
            You can do the same thing with task_ext_conf in a more complicated way
@@ -117,10 +117,9 @@ class Rolling:

    def _raw_conf(self) -> dict:
        with self.conf_path.open("r") as f:
-            yaml = YAML(typ="safe", pure=True)
-            return yaml.load(f)
+            return yaml.safe_load(f)

-    def _replace_handler_with_cache(self, task: dict):
+    def _replace_hanler_with_cache(self, task: dict):
        """
        Due to the data processing part in original rolling is slow. So we have to
        This class tries to add more feature
@@ -160,20 +159,13 @@ class Rolling:
            # - get horizon automatically from the expression!!!!
            raise NotImplementedError(f"This type of input is not supported")
        else:
-            if enable_handler_cache and self.h_path is not None:
-                self.logger.info("Fail to override the horizon due to data handler cache")
-            else:
-                self.logger.info("The prediction horizon is overrided")
-                if isinstance(task["dataset"]["kwargs"]["handler"], dict):
-                    task["dataset"]["kwargs"]["handler"]["kwargs"]["label"] = [
-                        "Ref($close, -{}) / Ref($close, -1) - 1".format(self.horizon + 1)
-                    ]
-                else:
-                    self.logger.warning("Try to automatically configure the lablel but failed.")
+            self.logger.info("The prediction horizon is overrided")
+            task["dataset"]["kwargs"]["handler"]["kwargs"]["label"] = [
+                "Ref($close, -{}) / Ref($close, -1) - 1".format(self.horizon + 1)
+            ]

-        if self.h_path is not None or enable_handler_cache:
-            # if we already have provided data source or we want to create one
-            task = self._replace_handler_with_cache(task)
+        if enable_handler_cache:
+            task = self._replace_hanler_with_cache(task)
        task = self._update_start_end_time(task)

        if self.task_ext_conf is not None:
@@ -181,16 +173,6 @@ class Rolling:
        self.logger.info(task)
        return task

-    def run_basic_task(self):
-        """
-        Run the basic task without rolling.
-        This is for fast testing for model tunning.
-        """
-        task = self.basic_task()
-        print(task)
-        trainer = TrainerR(experiment_name=self.exp_name)
-        trainer([task])
-
    def get_task_list(self) -> List[dict]:
        """return a batch of tasks for rolling."""
        task = self.basic_task()
--- a/qlib/contrib/rolling/ddgda.py
+++ b/qlib/contrib/rolling/ddgda.py
@@ -80,11 +80,6 @@ class DDGDA(Rolling):
        sim_task_model: UTIL_MODEL_TYPE = "gbdt",
        meta_1st_train_end: Optional[str] = None,
        alpha: float = 0.01,
-        loss_skip_thresh: int = 50,
-        fea_imp_n: Optional[int] = 30,
-        meta_data_proc: Optional[str] = "V01",
-        segments: Union[float, str] = 0.62,
-        hist_step_n: int = 30,
        working_dir: Optional[Union[str, Path]] = None,
        **kwargs,
    ):
@@ -99,15 +94,6 @@ class DDGDA(Rolling):
        alpha: float
            Setting the L2 regularization for ridge
            The `alpha` is only passed to MetaModelDS (it is not passed to sim_task_model currently..)
-        loss_skip_thresh: int
-            The thresh to skip the loss calculation for each day. If the number of item is less than it, it will skip the loss on that day.
-        meta_data_proc : Optional[str]
-            How we process the meta dataset for learning meta model.
-        segments : Union[float, str]
-            if segments is a float:
-                The ratio of training data in the meta task dataset
-            if segments is a string:
-                it will try its best to put its data in training and ensure that the date `segments` is in the test set
        """
        # NOTE:
        # the horizon must match the meaning in the base task template
@@ -118,22 +104,14 @@ class DDGDA(Rolling):
        super().__init__(**kwargs)
        self.working_dir = self.conf_path.parent if working_dir is None else Path(working_dir)
        self.proxy_hd = self.working_dir / "handler_proxy.pkl"
-        self.fea_imp_n = fea_imp_n
-        self.meta_data_proc = meta_data_proc
-        self.loss_skip_thresh = loss_skip_thresh
-        self.segments = segments
-        self.hist_step_n = hist_step_n

    def _adjust_task(self, task: dict, astype: UTIL_MODEL_TYPE):
        """
-        Base on the original task, we need to do some extra things.
-
+        some task are use for special purpose.
        For example:
        - GBDT for calculating feature importance
        - Linear or GBDT for calculating similarity
        - Datset (well processed) that aligned to Linear that for meta learning
-
-        So we may need to change the dataset and model for the special purpose and other settings remains the same.
        """
        # NOTE: here is just for aligning with previous implementation
        # It is not necessary for the current implementation
@@ -141,16 +119,12 @@ class DDGDA(Rolling):
        if astype == "gbdt":
            task["model"] = LGBM_MODEL
            if isinstance(handler, dict):
-                # We don't need preprocessing when using GBDT model
                for k in ["infer_processors", "learn_processors"]:
                    if k in handler.setdefault("kwargs", {}):
                        handler["kwargs"].pop(k)
        elif astype == "linear":
            task["model"] = LINEAR_MODEL
-            if isinstance(handler, dict):
-                handler["kwargs"].update(PROC_ARGS)
-            else:
-                self.logger.warning("The handler can't be adjusted.")
+            handler["kwargs"].update(PROC_ARGS)
        else:
            raise ValueError(f"astype not supported: {astype}")
        return task
@@ -181,15 +155,12 @@ class DDGDA(Rolling):
        The meta model will be trained upon the proxy forecasting model.
        This dataset is for the proxy forecasting model.
        """
-
+        topk = 30
+        fi = self._get_feature_importance()
+        col_selected = fi.nlargest(topk)
        # NOTE: adjusting to `self.sim_task_model` just for aligning with previous implementation.
-        # In previous version. The data for proxy model is using sim_task_model's way for processing
        task = self._adjust_task(self.basic_task(enable_handler_cache=False), self.sim_task_model)
        task = replace_task_handler_with_cache(task, self.working_dir)
-        # if self.meta_data_proc is not None:
-        # else:
-        #     # Otherwise, we don't need futher processing
-        #     task = self.basic_task()

        dataset = init_instance_by_config(task["dataset"])
        prep_ds = dataset.prepare(slice(None), col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -197,18 +168,12 @@ class DDGDA(Rolling):
        feature_df = prep_ds["feature"]
        label_df = prep_ds["label"]

-        if self.fea_imp_n is not None:
-            fi = self._get_feature_importance()
-            col_selected = fi.nlargest(self.fea_imp_n)
-            feature_selected = feature_df.loc[:, col_selected.index]
-        else:
-            feature_selected = feature_df
+        feature_selected = feature_df.loc[:, col_selected.index]

-        if self.meta_data_proc == "V01":
-            feature_selected = feature_selected.groupby("datetime", group_keys=False).apply(
-                lambda df: (df - df.mean()).div(df.std())
-            )
-            feature_selected = feature_selected.fillna(0.0)
+        feature_selected = feature_selected.groupby("datetime", group_keys=False).apply(
+            lambda df: (df - df.mean()).div(df.std())
+        )
+        feature_selected = feature_selected.fillna(0.0)

        df_all = {
            "label": label_df.reindex(feature_selected.index),
@@ -258,10 +223,7 @@ class DDGDA(Rolling):
        # 1) leverage the simplified proxy forecasting model to train meta model.
        # - Only the dataset part is important, in current version of meta model will integrate the

-        # NOTE:
-        # - The train_start for training meta model does not necessarily align with final rolling
-        #   But please select a right time to make sure the finnal rolling tasks are not leaked in the training data.
-        # - The test_start is automatically aligned to the next day of test_end.  Validation is ignored.
+        # the train_start for training meta model does not necessarily align with final rolling
        train_start = "2008-01-01" if self.train_start is None else self.train_start
        train_end = "2010-12-31" if self.meta_1st_train_end is None else self.meta_1st_train_end
        test_start = (pd.Timestamp(train_end) + pd.Timedelta(days=1)).strftime("%Y-%m-%d")
@@ -287,9 +249,9 @@ class DDGDA(Rolling):
        kwargs = dict(
            task_tpl=proxy_forecast_model_task,
            step=self.step,
-            segments=self.segments,  # keep test period consistent with the dataset yaml
+            segments=0.62,  # keep test period consistent with the dataset yaml
            trunc_days=1 + self.horizon,
-            hist_step_n=self.hist_step_n,
+            hist_step_n=30,
            fill_method=fill_method,
            rolling_ext_days=0,
        )
@@ -306,13 +268,7 @@ class DDGDA(Rolling):
        with R.start(experiment_name=self.meta_exp_name):
            R.log_params(**kwargs)
            mm = MetaModelDS(
-                step=self.step,
-                hist_step_n=kwargs["hist_step_n"],
-                lr=0.001,
-                max_epoch=30,
-                seed=43,
-                alpha=self.alpha,
-                loss_skip_thresh=self.loss_skip_thresh,
+                step=self.step, hist_step_n=kwargs["hist_step_n"], lr=0.001, max_epoch=30, seed=43, alpha=self.alpha
            )
            mm.fit(md)
            R.save_objects(model=mm)
--- a/qlib/contrib/strategy/signal_strategy.py
+++ b/qlib/contrib/strategy/signal_strategy.py
@@ -373,6 +373,7 @@ class WeightStrategyBase(BaseSignalStrategy):


 class EnhancedIndexingStrategy(WeightStrategyBase):
+
    """Enhanced Indexing Strategy

    Enhanced indexing combines the arts of active management and passive management,
--- a/qlib/contrib/tuner/config.py
+++ b/qlib/contrib/tuner/config.py
@@ -4,9 +4,9 @@
 # pylint: skip-file
 # flake8: noqa

+import yaml
 import copy
 import os
-from ruamel.yaml import YAML


 class TunerConfigManager:
@@ -16,8 +16,7 @@ class TunerConfigManager:
        self.config_path = config_path

        with open(config_path) as fp:
-            yaml = YAML(typ="safe", pure=True)
-            config = yaml.load(fp)
+            config = yaml.safe_load(fp)
        self.config = copy.deepcopy(config)

        self.pipeline_ex_config = PipelineExperimentConfig(config.get("experiment", dict()), self)
--- a/qlib/data/client.py
+++ b/qlib/data/client.py
@@ -35,7 +35,7 @@ class Client:
    def connect_server(self):
        """Connect to server."""
        try:
-            self.sio.connect(f"ws://{self.server_host}:{self.server_port}")
+            self.sio.connect("ws://" + self.server_host + ":" + str(self.server_port))
        except socketio.exceptions.ConnectionError:
            self.logger.error("Cannot connect to server - check your network or server status")

--- a/qlib/data/data.py
+++ b/qlib/data/data.py
@@ -536,6 +536,7 @@ class DatasetProvider(abc.ABC):
        """
        if len(fields) == 0:
            raise ValueError("fields cannot be empty")
+        fields = fields.copy()
        column_names = [str(f) for f in fields]
        return column_names

@@ -616,7 +617,7 @@ class DatasetProvider(abc.ABC):

        data = pd.DataFrame(obj)
        if not data.empty and not np.issubdtype(data.index.dtype, np.dtype("M")):
-            # If the underlaying provides the data not in datetime format, we'll convert it into datetime format
+            # If the underlaying provides the data not in datatime formmat, we'll convert it into datetime format
            _calendar = Cal.calendar(freq=freq)
            data.index = _calendar[data.index.values.astype(int)]
        data.index.names = ["datetime"]
--- a/qlib/data/dataset/init.py
+++ b/qlib/data/dataset/init.py
@@ -403,7 +403,7 @@ class TSDataSampler:
            np.full((1, self.data_arr.shape[1]), np.nan, dtype=self.data_arr.dtype),
            axis=0,
        )
-        self.nan_idx = len(self.data_arr) - 1  # The last line is all NaN; setting it to -1 can cause bug #1716
+        self.nan_idx = -1  # The last line is all NaN

        # the data type will be changed
        # The index of usable data is between start_idx and end_idx
--- a/qlib/data/dataset/loader.py
+++ b/qlib/data/dataset/loader.py
@@ -7,7 +7,7 @@ from pathlib import Path
 import warnings
 import pandas as pd

-from typing import Tuple, Union, List, Dict
+from typing import Tuple, Union, List

 from qlib.data import D
 from qlib.utils import load_dataset, init_instance_by_config, time_to_slc_point
@@ -41,7 +41,6 @@ class DataLoader(abc.ABC):
        ----------
        instruments : str or dict
            it can either be the market name or the config file of instruments generated by InstrumentProvider.
-            If the value of instruments is None, it means that no filtering is done.
        start_time : str
            start of the time range.
        end_time : str
@@ -51,11 +50,6 @@ class DataLoader(abc.ABC):
        -------
        pd.DataFrame:
            data load from the under layer source
-
-        Raise
-        -----
-        KeyError:
-            if the instruments filter is not supported, raise KeyError
        """


@@ -253,14 +247,10 @@ class StaticDataLoader(DataLoader, Serializable):

    def load(self, instruments=None, start_time=None, end_time=None) -> pd.DataFrame:
        self._maybe_load_raw_data()
-
-        # 1) Filter by instruments
        if instruments is None:
            df = self._data
        else:
            df = self._data.loc(axis=0)[:, instruments]
-
-        # 2) Filter by Datetime
        if start_time is None and end_time is None:
            return df  # NOTE: avoid copy by loc
        # pd.Timestamp(None) == NaT, use NaT as index can not fetch correct thing, so do not change None.
@@ -285,61 +275,6 @@ class StaticDataLoader(DataLoader, Serializable):
            self._data = self._config


-class NestedDataLoader(DataLoader):
-    """
-    We have multiple DataLoader, we can use this class to combine them.
-    """
-
-    def __init__(self, dataloader_l: List[Dict], join="left") -> None:
-        """
-
-        Parameters
-        ----------
-        dataloader_l : list[dict]
-            A list of dataloader, for exmaple
-
-            .. code-block:: python
-
-                nd = NestedDataLoader(
-                    dataloader_l=[
-                        {
-                            "class": "qlib.contrib.data.loader.Alpha158DL",
-                        }, {
-                            "class": "qlib.contrib.data.loader.Alpha360DL",
-                            "kwargs": {
-                                "config": {
-                                    "label": ( ["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])
-                                }
-                            }
-                        }
-                    ]
-                )
-        join :
-            it will pass to pd.concat when merging it.
-        """
-        super().__init__()
-        self.data_loader_l = [
-            (dl if isinstance(dl, DataLoader) else init_instance_by_config(dl)) for dl in dataloader_l
-        ]
-        self.join = join
-
-    def load(self, instruments=None, start_time=None, end_time=None) -> pd.DataFrame:
-        df_full = None
-        for dl in self.data_loader_l:
-            try:
-                df_current = dl.load(instruments, start_time, end_time)
-            except KeyError:
-                warnings.warn(
-                    "If the value of `instruments` cannot be processed, it will set instruments to None to get all the data."
-                )
-                df_current = dl.load(instruments=None, start_time=start_time, end_time=end_time)
-            if df_full is None:
-                df_full = df_current
-            else:
-                df_full = pd.merge(df_full, df_current, left_index=True, right_index=True, how=self.join)
-        return df_full.sort_index(axis=1)
-
-
 class DataLoaderDH(DataLoader):
    """DataLoaderDH
    DataLoader based on (D)ata (H)andler
--- a/qlib/data/dataset/processor.py
+++ b/qlib/data/dataset/processor.py
@@ -318,13 +318,9 @@ class CSZScoreNorm(Processor):
        # try not modify original dataframe
        if not isinstance(self.fields_group, list):
            self.fields_group = [self.fields_group]
-        # depress warning by references:
-        # https://stackoverflow.com/questions/20625582/how-to-deal-with-settingwithcopywarning-in-pandas
-        # https://pandas.pydata.org/pandas-docs/stable/user_guide/options.html#getting-and-setting-options
-        with pd.option_context("mode.chained_assignment", None):
-            for g in self.fields_group:
-                cols = get_group_columns(df, g)
-                df[cols] = df[cols].groupby("datetime", group_keys=False).apply(self.zscore_func)
+        for g in self.fields_group:
+            cols = get_group_columns(df, g)
+            df[cols] = df[cols].groupby("datetime", group_keys=False).apply(self.zscore_func)
        return df


--- a/qlib/data/dataset/storage.py
+++ b/qlib/data/dataset/storage.py
@@ -104,24 +104,15 @@ class HashingStockStorage(BaseHandlerStorage):
        """

        stock_selector = slice(None)
-        time_selector = slice(None)  # by default not filter by time.

        if level is None:
-            # For directly applying.
            if isinstance(selector, tuple) and self.stock_level < len(selector):
-                # full selector format
                stock_selector = selector[self.stock_level]
-                time_selector = selector[1 - self.stock_level]
            elif isinstance(selector, (list, str)) and self.stock_level == 0:
-                # only stock selector
                stock_selector = selector
        elif level in ("instrument", self.stock_level):
            if isinstance(selector, tuple):
-                # NOTE: How could the stock level selector be a tuple?
                stock_selector = selector[0]
-                raise TypeError(
-                    "I forget why would this case appear. But I think it does not make sense. So we raise a error for that case."
-                )
            elif isinstance(selector, (list, str)):
                stock_selector = selector

@@ -129,7 +120,7 @@ class HashingStockStorage(BaseHandlerStorage):
            raise TypeError(f"stock selector must be type str|list, or slice(None), rather than {stock_selector}")

        if stock_selector == slice(None):
-            return self.hash_df, time_selector
+            return self.hash_df

        if isinstance(stock_selector, str):
            stock_selector = [stock_selector]
@@ -138,7 +129,7 @@ class HashingStockStorage(BaseHandlerStorage):
        for each_stock in sorted(stock_selector):
            if each_stock in self.hash_df:
                select_dict[each_stock] = self.hash_df[each_stock]
-        return select_dict, time_selector
+        return select_dict

    def fetch(
        self,
@@ -147,13 +138,10 @@ class HashingStockStorage(BaseHandlerStorage):
        col_set: Union[str, List[str]] = DataHandler.CS_ALL,
        fetch_orig: bool = True,
    ) -> pd.DataFrame:
-        fetch_stock_df_list, time_selector = self._fetch_hash_df_by_stock(selector=selector, level=level)
-        fetch_stock_df_list = list(fetch_stock_df_list.values())
+        fetch_stock_df_list = list(self._fetch_hash_df_by_stock(selector=selector, level=level).values())
        for _index, stock_df in enumerate(fetch_stock_df_list):
            fetch_col_df = fetch_df_by_col(df=stock_df, col_set=col_set)
-            fetch_index_df = fetch_df_by_index(
-                df=fetch_col_df, selector=time_selector, level="datetime", fetch_orig=fetch_orig
-            )
+            fetch_index_df = fetch_df_by_index(df=fetch_col_df, selector=selector, level=level, fetch_orig=fetch_orig)
            fetch_stock_df_list[_index] = fetch_index_df
        if len(fetch_stock_df_list) == 0:
            index_names = ("instrument", "datetime") if self.stock_level == 0 else ("datetime", "instrument")
--- a/qlib/data/dataset/utils.py
+++ b/qlib/data/dataset/utils.py
@@ -9,7 +9,7 @@ if TYPE_CHECKING:
    from qlib.data.dataset import DataHandler


-def get_level_index(df: pd.DataFrame, level: Union[str, int]) -> int:
+def get_level_index(df: pd.DataFrame, level=Union[str, int]) -> int:
    """

    get the level index of `df` given `level`
--- a/qlib/data/filter.py
+++ b/qlib/data/filter.py
@@ -164,7 +164,6 @@ class SeriesDFilter(BaseDFilter):
        timestamp = []
        _lbool = None
        _ltime = None
-        _cur_start = None
        for _ts, _bool in timestamp_series.items():
            # there is likely to be NAN when the filter series don't have the
            # bool value, so we just change the NAN into False
--- a/qlib/finco/.env.example
+++ b/qlib/finco/.env.example
@@ -0,0 +1,20 @@
+
+OPENAI_API_KEY=your_api_key
+
+# USE_AZURE=True
+# AZURE_API_BASE=your_api_base
+# AZURE_API_VERSION=your_api_version
+
+# use gpt-4 means more token but more wait time
+# MODEL=gpt-4
+# MAX_TOKENS=1600
+# MAX_RETRY=1000
+
+
+MAX_TOKENS=1600
+MAX_RETRY=120
+
+CONTINOUS_MODE=True
+DEBUG_MODE=True
+
+# TEMPERATURE=
--- a/qlib/finco/README.md
+++ b/qlib/finco/README.md
@@ -0,0 +1,22 @@
+# This is an experimental branch of "`FI`nancial `CO`pilot of `Qlib`"
+
+## Installation
+
+- To run this module, you need to first install Qlib following the instruction in [install-from-source](/README.md#install-from-source) or follow:
+
+```python
+python -m pip install git+https://github.com/microsoft/qlib.git@finco
+```
+
+- then you need to install other dependencies of finco:
+```python
+python -m pip install pydantic openai python-dotenv
+```
+
+## Quick run
+
+To run this module, you can start the workflow easily with one command:
+
+```sh
+cd qlib/finco; python cli.py "your prompt"
+```
--- a/qlib/finco/init.py
+++ b/qlib/finco/init.py
@@ -0,0 +1,13 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+from pathlib import Path
+
+DIRNAME = Path(__file__).absolute().resolve().parent
+
+
+def get_finco_path() -> Path:
+    """
+    return the template path
+    Because the template path is located in the folder. We don't know where it is located. So __file__ for this module will be used.
+    """
+    return DIRNAME
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Fivele-Li	753c272202	optimize log	2023-07-20 12:45:07 +08:00
Xu Yang	f93f331a3b	Merge pull request #1609 from microsoft/xuyang1/finetune_prompts finetune prompts	2023-07-19 20:01:07 +08:00
Xu Yang	561086d9e1	commit	2023-07-19 20:00:09 +08:00
Young	8eb129358b	Add prompt logger	2023-07-18 21:47:58 +08:00
Xu Yang	ce8cb517e9	hot fix one small bug in template	2023-07-18 11:52:43 +08:00
Xu Yang	1c5a73aa81	small refinement in finance knowledge	2023-07-17 21:33:40 +08:00
Xu Yang	d909d54362	Merge pull request #1603 from microsoft/xuyang1/add_idea_task add idea task and round1	2023-07-17 20:38:43 +08:00
Xu Yang	13c63eee0a	merge into one commit	2023-07-17 20:33:47 +08:00
you-n-g	b21e044513	Fix find class bug (#1601 )	2023-07-17 20:09:13 +08:00
Fivele-Li	8c1905d1d7	Optimize KnowledgeBase to complete workflow (#1598 ) * optimize KnowledgeBase to complete workflow; * Update Knowledge methods of handle data IO; * Update task to handle multi recorders; * Integrate Knowledge to workflow; * optimize KnowledgeBase to complete workflow * Update TrainTask & AnalyseTask's recorder method; * Update SummarizeTask; * Update Workflow & Topic prompt;	2023-07-17 18:17:04 +08:00
you-n-g	1c9841b15e	Connect TrainTask & Rolling & DDG-DA (#1599 ) * Connect train task to ddg-da & rolling * Pylint & black formatting * Formatting	2023-07-17 09:58:58 +08:00
Xu Yang	5e0873ca81	Merge pull request #1592 from Fivele-Li/update_knowledge_module update knowledge module;	2023-07-16 11:36:31 +08:00
Cadenza-Li	8a56cf69b4	add KnowledgeBase to workflow; * Update CMDTask prompt example for Windows OS; * Windows OS decode output of subprocess in gbk by default, specify encoding format explict; * Add KnowledgeBase's 4 knowledge types to corresponding task;	2023-07-14 22:25:43 +08:00
you-n-g	a19e616bc3	Update test_utils.py	2023-07-14 16:43:43 +08:00
Cadenza-Li	025859acba	Merge branch 'finco' into update_knowledge_module	2023-07-14 16:19:57 +08:00
Xu Yang	e5f685ce08	merge all commit (#1593 ) Co-authored-by: Xu Yang <xuyang1@microsoft.com>	2023-07-14 16:17:24 +08:00
Cadenza-Li	b9b6938e71	Merge branch 'finco' into update_knowledge_module	2023-07-14 14:20:21 +08:00
Young	51a9403b15	Merge remote-tracking branch 'origin/main' into finco	2023-07-14 12:16:51 +08:00
Cadenza-Li	37d83fd747	update knowledge module; * Knowledge.storage to storages list; * optimize Knowledge & Storage save and load method; * optimize Knowledge query prompt;	2023-07-13 17:20:22 +08:00
Cadenza-Li	d7ab6935dd	update knowledge module; * add storage class; * new practice,execute,finance,infrastructure knowledge; * add query method to KnowledgeBase;	2023-07-12 17:23:47 +08:00
Fivele-Li	effed382e9	Optimize prompt for entire learn loop (#1589 ) * Adjust prompt and fix cases * adjust summarizeTask & learn prompts; * fix typos & drop duplicate task method; * adjust learn prompts;	2023-07-11 18:13:52 +08:00
Fivele-Li	86ffd1799d	Add knowledge module and tune summarizeTask (#1582 ) * Add knowledge module * add KnowledgeExperiment add KnowledgeBase; * add knowledge associate prompts to template; * Add Topic class * add Topic to summarize knowledge; * add recorder's metric to summarizeTask; --------- Co-authored-by: Cadenza-Li <362237642@qq.com>	2023-07-06 11:39:36 +08:00
Young	aef11536e3	rename & test	2023-07-04 20:28:08 +08:00
Xu Yang	8b0fdf1623	Merge pull request #1581 from microsoft/xuyang1/fix_singleton_bug fix singleton bug	2023-07-04 16:51:51 +08:00
Xu Yang	9a36f8da20	fix singleton bug	2023-07-04 16:20:02 +08:00
Xu Yang	b7757d5008	Merge pull request #1580 from microsoft/xuyang1/refine_workflow_to_increase_success_rate refine workflow to increase success rate	2023-07-03 17:59:54 +08:00
Xu Yang	ee5e5cfdd8	remove useless code	2023-07-03 17:57:13 +08:00
Xu Yang	6cb87ecfd1	refine code to use qrun	2023-07-03 17:56:22 +08:00
Xu Yang	9119bcdd3c	Merge pull request #1576 from microsoft/xuyang1/add_config_and_code_dump_task refine workflow and prompts	2023-06-30 14:43:49 +08:00
Xu Yang	4fccf8112d	fix one workflow	2023-06-30 14:33:41 +08:00
Xu Yang	73bd79ca1a	merge into one commit	2023-06-30 14:23:40 +08:00
Fivele-Li	7e84f3aae2	Add backtest and backforward task (#1568 ) * * add TrainTask & BacktestTask; * add BackForwardTask; * adjust prompt_template.yaml which default config failed to backtest; * run workflow in loop * add update method to prompt_template.py * remove debug code * Adjust Learn Process * add LearnManager class & use LearnManager to update system prompt; * use qrun to replace recorder for training and backtesting; * Adjust analyser * analyser independent of recorder; * rename analyser's workspace attribution; * analyser load variable by recorder. --------- Co-authored-by: Cadenza-Li <362237642@qq.com>	2023-06-30 10:04:43 +08:00
Fivele-Li	1326ac614d	Add docs to context and retrieve (#1566 ) * add analyser docstring to context; * add retrieve method to context manager; * add notes to retrieve	2023-06-24 21:47:27 +08:00
Fivele-Li	f12184cc0f	Add analyser task and optimize interact (#1552 ) * * optimize interact * add AnalyserTask * optimize logger format and add render feature * format optimize	2023-06-16 11:42:45 +08:00
Xu Yang	a70386ad52	Merge pull request #1550 from microsoft/xuyang1/refine_task_prompts add datahandler and design action task according to component	2023-06-14 14:52:42 +08:00
Xu Yang	74619ed8d8	fix using defaut in record strategy and backtest	2023-06-14 14:52:16 +08:00
Fivele-Li	1a523df007	Optimize log and interact of FinCo (#1549 ) * use FinCoLog for a better interact experience * addition file changes * optimize format * optimize format	2023-06-14 14:48:17 +08:00
Xu Yang	f9cc8a5aaa	remove useless prompt	2023-06-14 10:46:38 +08:00
Xu Yang	7762c5a1fd	add datahandler and design action task according to component	2023-06-13 23:28:27 +08:00
Xu Yang	fa7ef29281	Merge pull request #1548 from microsoft/xuyang1/add_dump_to_file_task add simple readme & move prompt templates to outer yaml file to make the code clean	2023-06-13 15:29:13 +08:00
Xu Yang	429c9a7c66	format	2023-06-13 15:27:59 +08:00
Xu Yang	80fbc00792	move prompt templates to yaml file to make code clean	2023-06-13 15:21:19 +08:00
Xu Yang	01accec24c	update code	2023-06-12 16:25:16 +08:00
Fivele-Li	1d88830b0d	Add recorder task and visualize (#1542 ) * add recorder task * add batch generate summarize report unittest. * * add recorder to RecorderTask; * add matplot figure to analyzer.py * add image to markdown; * Add some log * update figure path. --------- Co-authored-by: Young <afe.young@gmail.com> Co-authored-by: Cadenza-Li <362237642@qq.com>	2023-06-12 15:48:00 +08:00
you-n-g	ad7498e287	Edit yaml task (#1538 ) * Edit yaml task * update comments	2023-06-02 00:44:41 +08:00
you-n-g	73d51f05b4	Init workspace and CMDTask (#1537 ) * Update setup.py and config * WIP * init_workspace and CMDTask * Delete test_sumarize.py	2023-06-01 23:32:35 +08:00
Fivele-Li	3b56b8e6c0	Optimize summarize task prompt and others (#1533 ) * 1.update prompt; 2.update fetch information method. * 1.update prompt; 2.save result to markdown; * 1.get context info from context_manager; 2.run the entire process successfully.	2023-06-01 21:22:24 +08:00
you-n-g	40e0c329ba	Add configurable dataset (#1535 )	2023-06-01 20:05:02 +08:00
Xu Yang	e376648860	Merge pull request #1536 from microsoft/xuyang1/add_debug_mode_to_save_cache add a debug mode to speed up debug process	2023-06-01 19:44:17 +08:00
Xu Yang	5f37f32184	update code	2023-06-01 19:38:26 +08:00
Xu Yang	d46b4c1ebf	Merge pull request #1534 from microsoft/xuyang1/add_code_implementation_task add code implementation task	2023-06-01 18:13:05 +08:00
Xu Yang	0515524b51	add code implementation code	2023-06-01 18:04:31 +08:00
Xu Yang	cda32d5703	Merge pull request #1532 from microsoft/xuyang1/add-plan-and-config-task-implementation add the initial version of plan and config task implementation	2023-06-01 11:20:04 +08:00
Xu Yang	e2332a004b	imporove some words in prompt	2023-06-01 01:09:14 +08:00
Xu Yang	08d9dbccc9	update v1 code containing SLplan and config action	2023-06-01 00:36:04 +08:00
Fivele-Li	e7cd93a36d	add base method for summarization; (#1530 )	2023-05-31 15:50:34 +08:00
Xu Yang	3919678028	split task into workflow and task to make the strcture more clear	2023-05-31 11:45:25 +08:00
Xu Yang	421b1403b2	Merge pull request #1528 from microsoft/xuyang1/refine_task_and_implement_workflow_task_as_example Xuyang1/refine task and implement workflow task as example	2023-05-31 11:36:36 +08:00
Xu Yang	94102fb742	remove tasktype variable	2023-05-31 11:35:54 +08:00
Cadenza-Li	74a5d7c8af	add parse method for summarization;	2023-05-31 00:08:21 +08:00
Xu Yang	ce39b4b6f8	add qlib auto init so logger can display info	2023-05-30 21:52:35 +08:00
Xu Yang	2af35d9c89	second commit	2023-05-30 20:20:16 +08:00
Xu Yang	f37643550b	first round	2023-05-30 20:19:58 +08:00
Xu Yang	55611aa43e	Merge pull request #1527 from microsoft/xuyang1/add_openai_api_support add openai interface support	2023-05-30 13:44:10 +08:00
Xu Yang	f24253efd2	add openai interface support	2023-05-30 13:42:01 +08:00
Young	7c4f3b8a7d	Initial interface for discussion	2023-05-24 12:18:31 +08:00