fix(rl): 🐛 dataFrame selection problem in high-freq execution workflow (#1348 )

Update README.md
Merge pull request #632 from Mingzhe-Han/high-freq-execution
2026-06-29 00:51:19 +08:00 · 2022-11-11 11:32:22 +08:00 · 2022-01-24 17:27:42 +08:00 · 2021-10-03 21:13:13 +08:00 · 2021-09-17 14:21:57 +00:00 · 2021-09-17 11:09:07 +08:00
280 changed files with 27511 additions and 6571 deletions
--- a/.github/ISSUE_TEMPLATE/bug-report.md
+++ b/.github/ISSUE_TEMPLATE/bug-report.md
@@ -0,0 +1,41 @@
+---
+name: "\U0001F41B Bug Report"
+about: Submit a bug report to help us improve Qlib
+labels: bug
+
+---
+
+## 🐛 Bug Description
+
+<!-- A clear and concise description of what the bug is. -->
+
+## To Reproduce
+
+Steps to reproduce the behavior:
+
+1.
+1.
+1.
+
+
+## Expected Behavior
+
+<!-- A clear and concise description of what you expected to happen. -->
+
+## Screenshot
+
+<!-- A screenshot of the error message or anything shouldn't appear-->
+
+## Environment
+
+**Note**: User could run `cd scripts && python collect_info.py all` under project directory to get system information
+and paste them here directly.
+
+ - Qlib version:
+ - Python version:
+ - OS (`Windows`, `Linux`, `MacOS`):
+ - Commit number (optional, please provide it if you are using the dev version):
+
+## Additional Notes
+
+<!-- Add any other information about the problem here. -->
--- a/.github/ISSUE_TEMPLATE/documentation.md
+++ b/.github/ISSUE_TEMPLATE/documentation.md
@@ -0,0 +1,9 @@
+---
+name: "\U0001F4D6 Documentation"
+about: Report an issue related to documentation
+
+---
+
+## 📖 Documentation
+
+<!-- Please specify whether it's tutorial part or API reference part, and describe it.-->
--- a/.github/ISSUE_TEMPLATE/feature-request.md
+++ b/.github/ISSUE_TEMPLATE/feature-request.md
@@ -0,0 +1,25 @@
+---
+name: "\U0001F31FFeature Request"
+about: Request for a new Qlib feature
+labels: enhancement
+
+---
+
+## 🌟 Feature Description
+<!-- A clear and concise description of the feature proposal -->
+
+## Motivation
+
+1. Application scenario
+2. Related works (Papers, Github repos etc.):
+3. Any other relevant and important information:
+
+<!-- Please describe why the feature is important. -->
+
+## Alternatives
+
+<!-- A short description of any alternative solutions or features you've considered. -->
+
+## Additional Notes
+
+<!-- Add any other context or screenshots about the feature request here. -->
--- a/.github/ISSUE_TEMPLATE/question.md
+++ b/.github/ISSUE_TEMPLATE/question.md
@@ -0,0 +1,10 @@
+---
+name: "❓Questions & Help"
+about: Have some questions? We can offer help.
+labels: question
+
+---
+
+## ❓ Questions and Help
+
+We sincerely suggest you to carefully read the [documentation](http://qlib.readthedocs.io/) of our library as well as the official [paper](https://arxiv.org/abs/2009.11189). After that, if you still feel puzzled, please describe the question clearly under this issue.
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,24 @@
+<!--- Provide a general summary of your changes in the Title above -->
+
+## Description
+<!--- Describe your changes in detail -->
+
+## Motivation and Context
+<!--- Are there any related issues? If so, please put the link here. -->
+<!--- Why is this change required? What problem does it solve? -->
+
+## How Has This Been Tested?
+- [ ] Pass the test by running: `pytest qlib/tests/test_all_pipeline.py` under upper directory of `qlib`.
+- [ ] If you are adding a new feature, test on your own test scripts.
+
+<!--- **ATTENTION**: If you are adding a new feature, please make sure your codes are **correctly tested**. If our test scripts do not cover your cases, please provide your own test scripts under the `tests` folder and test them. More information about test scripts can be found [here](https://docs.python.org/3/library/unittest.html#basic-example), or you could refer to those we provide under the `tests` folder. -->
+
+## Screenshots of Test Results (if appropriate):
+1. Pipeline test:
+2. Your own tests:
+
+## Types of changes
+<!--- What types of changes does your code introduce? Put an `x` in all the boxes that apply: -->
+- [ ] Fix bugs
+- [ ] Add new feature
+- [ ] Update documentation
--- a/.github/brew_install.sh
+++ b/.github/brew_install.sh
@@ -0,0 +1,649 @@
+#!/bin/bash
+set -u
+
+# First check if the OS is Linux.
+if [[ "$(uname)" = "Linux" ]]; then
+  HOMEBREW_ON_LINUX=1
+fi
+
+# On macOS, this script installs to /usr/local only.
+# On Linux, it installs to /home/linuxbrew/.linuxbrew if you have sudo access
+# and ~/.linuxbrew otherwise.
+# To install elsewhere (which is unsupported)
+# you can untar https://github.com/Homebrew/brew/tarball/master
+# anywhere you like.
+if [[ -z "${HOMEBREW_ON_LINUX-}" ]]; then
+  HOMEBREW_PREFIX="/usr/local"
+  HOMEBREW_REPOSITORY="/usr/local/Homebrew"
+  HOMEBREW_CACHE="${HOME}/Library/Caches/Homebrew"
+
+  STAT="stat -f"
+  CHOWN="/usr/sbin/chown"
+  CHGRP="/usr/bin/chgrp"
+  GROUP="admin"
+  TOUCH="/usr/bin/touch"
+else
+  HOMEBREW_PREFIX_DEFAULT="/home/linuxbrew/.linuxbrew"
+  HOMEBREW_CACHE="${HOME}/.cache/Homebrew"
+
+  STAT="stat --printf"
+  CHOWN="/bin/chown"
+  CHGRP="/bin/chgrp"
+  GROUP="$(id -gn)"
+  TOUCH="/bin/touch"
+fi
+BREW_REPO="https://github.com/Homebrew/brew"
+
+# TODO: bump version when new macOS is released
+MACOS_LATEST_SUPPORTED="10.15"
+# TODO: bump version when new macOS is released
+MACOS_OLDEST_SUPPORTED="10.13"
+
+# For Homebrew on Linux
+REQUIRED_RUBY_VERSION=2.6  # https://github.com/Homebrew/brew/pull/6556
+REQUIRED_GLIBC_VERSION=2.13  # https://docs.brew.sh/Homebrew-on-Linux#requirements
+
+# no analytics during installation
+export HOMEBREW_NO_ANALYTICS_THIS_RUN=1
+export HOMEBREW_NO_ANALYTICS_MESSAGE_OUTPUT=1
+
+# string formatters
+if [[ -t 1 ]]; then
+  tty_escape() { printf "\033[%sm" "$1"; }
+else
+  tty_escape() { :; }
+fi
+tty_mkbold() { tty_escape "1;$1"; }
+tty_underline="$(tty_escape "4;39")"
+tty_blue="$(tty_mkbold 34)"
+tty_red="$(tty_mkbold 31)"
+tty_bold="$(tty_mkbold 39)"
+tty_reset="$(tty_escape 0)"
+
+have_sudo_access() {
+  local -a args
+  if [[ -n "${SUDO_ASKPASS-}" ]]; then
+    args=("-A")
+  fi
+
+  if [[ -z "${HAVE_SUDO_ACCESS-}" ]]; then
+    if [[ -n "${args[*]-}" ]]; then
+      /usr/bin/sudo "${args[@]}" -l mkdir &>/dev/null
+    else
+      /usr/bin/sudo -l mkdir &>/dev/null
+    fi
+    HAVE_SUDO_ACCESS="$?"
+  fi
+
+  if [[ -z "${HOMEBREW_ON_LINUX-}" ]] && [[ "$HAVE_SUDO_ACCESS" -ne 0 ]]; then
+    abort "Need sudo access on macOS (e.g. the user $USER to be an Administrator)!"
+  fi
+
+  return "$HAVE_SUDO_ACCESS"
+}
+
+shell_join() {
+  local arg
+  printf "%s" "$1"
+  shift
+  for arg in "$@"; do
+    printf " "
+    printf "%s" "${arg// /\ }"
+  done
+}
+
+chomp() {
+  printf "%s" "${1/"$'\n'"/}"
+}
+
+ohai() {
+  printf "${tty_blue}==>${tty_bold} %s${tty_reset}\n" "$(shell_join "$@")"
+}
+
+warn() {
+  printf "${tty_red}Warning${tty_reset}: %s\n" "$(chomp "$1")"
+}
+
+abort() {
+  printf "%s\n" "$1"
+  exit 1
+}
+
+execute() {
+  if ! "$@"; then
+    abort "$(printf "Failed during: %s" "$(shell_join "$@")")"
+  fi
+}
+
+execute_sudo() {
+  local -a args=("$@")
+  if [[ -n "${SUDO_ASKPASS-}" ]]; then
+    args=("-A" "${args[@]}")
+  fi
+  if have_sudo_access; then
+    ohai "/usr/bin/sudo" "${args[@]}"
+    execute "/usr/bin/sudo" "${args[@]}"
+  else
+    ohai "${args[@]}"
+    execute "${args[@]}"
+  fi
+}
+
+getc() {
+  local save_state
+  save_state=$(/bin/stty -g)
+  /bin/stty raw -echo
+  IFS= read -r -n 1 -d '' "$@"
+  /bin/stty "$save_state"
+}
+
+wait_for_user() {
+  local c
+  echo
+  echo "Press RETURN to continue or any other key to abort"
+  getc c
+  # we test for \r and \n because some stuff does \r instead
+  if ! [[ "$c" == $'\r' || "$c" == $'\n' ]]; then
+    exit 1
+  fi
+}
+
+major_minor() {
+  echo "${1%%.*}.$(x="${1#*.}"; echo "${x%%.*}")"
+}
+
+if [[ -z "${HOMEBREW_ON_LINUX-}" ]]; then
+  macos_version="$(major_minor "$(/usr/bin/sw_vers -productVersion)")"
+fi
+
+version_gt() {
+  [[ "${1%.*}" -gt "${2%.*}" ]] || [[ "${1%.*}" -eq "${2%.*}" && "${1#*.}" -gt "${2#*.}" ]]
+}
+version_ge() {
+  [[ "${1%.*}" -gt "${2%.*}" ]] || [[ "${1%.*}" -eq "${2%.*}" && "${1#*.}" -ge "${2#*.}" ]]
+}
+version_lt() {
+  [[ "${1%.*}" -lt "${2%.*}" ]] || [[ "${1%.*}" -eq "${2%.*}" && "${1#*.}" -lt "${2#*.}" ]]
+}
+
+should_install_command_line_tools() {
+  if [[ -n "${HOMEBREW_ON_LINUX-}" ]]; then
+    return 1
+  fi
+
+  if version_gt "$macos_version" "10.13"; then
+    ! [[ -e "/Library/Developer/CommandLineTools/usr/bin/git" ]]
+  else
+    ! [[ -e "/Library/Developer/CommandLineTools/usr/bin/git" ]] ||
+      ! [[ -e "/usr/include/iconv.h" ]]
+  fi
+}
+
+get_permission() {
+  $STAT "%A" "$1"
+}
+
+user_only_chmod() {
+  [[ -d "$1" ]] && [[ "$(get_permission "$1")" != "755" ]]
+}
+
+exists_but_not_writable() {
+  [[ -e "$1" ]] && ! [[ -r "$1" && -w "$1" && -x "$1" ]]
+}
+
+get_owner() {
+  $STAT "%u" "$1"
+}
+
+file_not_owned() {
+  [[ "$(get_owner "$1")" != "$(id -u)" ]]
+}
+
+get_group() {
+  $STAT "%g" "$1"
+}
+
+file_not_grpowned() {
+  [[ " $(id -G "$USER") " != *" $(get_group "$1") "*  ]]
+}
+
+# Please sync with 'test_ruby()' in 'Library/Homebrew/utils/ruby.sh' from Homebrew/brew repository.
+test_ruby () {
+  if [[ ! -x $1 ]]
+  then
+    return 1
+  fi
+
+  "$1" --enable-frozen-string-literal --disable=gems,did_you_mean,rubyopt -rrubygems -e \
+    "abort if Gem::Version.new(RUBY_VERSION.to_s.dup).to_s.split('.').first(2) != \
+              Gem::Version.new('$REQUIRED_RUBY_VERSION').to_s.split('.').first(2)" 2>/dev/null
+}
+
+no_usable_ruby() {
+  local ruby_exec
+  IFS=$'\n' # Do word splitting on new lines only
+  for ruby_exec in $(which -a ruby); do
+    if test_ruby "$ruby_exec"; then
+      return 1
+    fi
+  done
+  IFS=$' \t\n' # Restore IFS to its default value
+  return 0
+}
+
+outdated_glibc() {
+  local glibc_version
+  glibc_version=$(ldd --version | head -n1 | grep -o '[0-9.]*$' | grep -o '^[0-9]\+\.[0-9]\+')
+  version_lt "$glibc_version" "$REQUIRED_GLIBC_VERSION"
+}
+
+if [[ -n "${HOMEBREW_ON_LINUX-}" ]] && no_usable_ruby && outdated_glibc
+then
+    abort "$(cat <<-EOFABORT
+	Homebrew requires Ruby $REQUIRED_RUBY_VERSION which was not found on your system.
+	Homebrew portable Ruby requires Glibc version $REQUIRED_GLIBC_VERSION or newer,
+	and your Glibc version is too old.
+	See ${tty_underline}https://docs.brew.sh/Homebrew-on-Linux#requirements${tty_reset}
+	Install Ruby $REQUIRED_RUBY_VERSION and add its location to your PATH.
+	EOFABORT
+    )"
+fi
+
+# USER isn't always set so provide a fall back for the installer and subprocesses.
+if [[ -z "${USER-}" ]]; then
+  USER="$(chomp "$(id -un)")"
+  export USER
+fi
+
+# Invalidate sudo timestamp before exiting (if it wasn't active before).
+if ! /usr/bin/sudo -n -v 2>/dev/null; then
+  trap '/usr/bin/sudo -k' EXIT
+fi
+
+# Things can fail later if `pwd` doesn't exist.
+# Also sudo prints a warning message for no good reason
+cd "/usr" || exit 1
+
+####################################################################### script
+if ! command -v git >/dev/null; then
+    abort "$(cat <<EOABORT
+You must install Git before installing Homebrew. See:
+  ${tty_underline}https://docs.brew.sh/Installation${tty_reset}
+EOABORT
+)"
+fi
+
+if ! command -v curl >/dev/null; then
+    abort "$(cat <<EOABORT
+You must install cURL before installing Homebrew. See:
+  ${tty_underline}https://docs.brew.sh/Installation${tty_reset}
+EOABORT
+)"
+fi
+
+if [[ -z "${HOMEBREW_ON_LINUX-}" ]]; then
+ have_sudo_access
+else
+  if [[ -n "${CI-}" ]] || [[ -w "$HOMEBREW_PREFIX_DEFAULT" ]] || [[ -w "/home/linuxbrew" ]] || [[ -w "/home" ]]; then
+    HOMEBREW_PREFIX="$HOMEBREW_PREFIX_DEFAULT"
+  else
+    trap exit SIGINT
+    if [[ $(/usr/bin/sudo -n -l mkdir 2>&1) != *"mkdir"* ]]; then
+      ohai "Select the Homebrew installation directory"
+      echo "- ${tty_bold}Enter your password${tty_reset} to install to ${tty_underline}${HOMEBREW_PREFIX_DEFAULT}${tty_reset} (${tty_bold}recommended${tty_reset})"
+      echo "- ${tty_bold}Press Control-D${tty_reset} to install to ${tty_underline}$HOME/.linuxbrew${tty_reset}"
+      echo "- ${tty_bold}Press Control-C${tty_reset} to cancel installation"
+    fi
+    if have_sudo_access; then
+      HOMEBREW_PREFIX="$HOMEBREW_PREFIX_DEFAULT"
+    else
+      HOMEBREW_PREFIX="$HOME/.linuxbrew"
+    fi
+    trap - SIGINT
+  fi
+  HOMEBREW_REPOSITORY="${HOMEBREW_PREFIX}/Homebrew"
+fi
+
+if [[ "$UID" == "0" ]]; then
+  abort "Don't run this as root!"
+elif [[ -d "$HOMEBREW_PREFIX" && ! -x "$HOMEBREW_PREFIX" ]]; then
+  abort "$(cat <<EOABORT
+The Homebrew prefix, ${HOMEBREW_PREFIX}, exists but is not searchable. If this is
+not intentional, please restore the default permissions and try running the
+installer again:
+    sudo chmod 775 ${HOMEBREW_PREFIX}
+EOABORT
+)"
+fi
+
+if [[ -z "${HOMEBREW_ON_LINUX-}" ]]; then
+  if version_lt "$macos_version" "10.7"; then
+    abort "$(cat <<EOABORT
+Your Mac OS X version is too old. See:
+  ${tty_underline}https://github.com/mistydemeo/tigerbrew${tty_reset}
+EOABORT
+)"
+  elif version_lt "$macos_version" "10.10"; then
+    abort "Your OS X version is too old"
+  elif version_gt "$macos_version" "$MACOS_LATEST_SUPPORTED" || \
+    version_lt "$macos_version" "$MACOS_OLDEST_SUPPORTED"; then
+    who="We"
+    what=""
+    if version_gt "$macos_version" "$MACOS_LATEST_SUPPORTED"; then
+      what="pre-release version"
+    else
+      who+=" (and Apple)"
+      what="old version"
+    fi
+    ohai "You are using macOS ${macos_version}."
+    ohai "${who} do not provide support for this ${what}."
+
+    echo "$(cat <<EOS
+This installation may not succeed.
+After installation, you will encounter build failures with some formulae.
+Please create pull requests instead of asking for help on Homebrew\'s GitHub,
+Discourse, Twitter or IRC. You are responsible for resolving any issues you
+experience while you are running this ${what}.
+EOS
+)
+"
+  fi
+fi
+
+ohai "This script will install:"
+echo "${HOMEBREW_PREFIX}/bin/brew"
+echo "${HOMEBREW_PREFIX}/share/doc/homebrew"
+echo "${HOMEBREW_PREFIX}/share/man/man1/brew.1"
+echo "${HOMEBREW_PREFIX}/share/zsh/site-functions/_brew"
+echo "${HOMEBREW_PREFIX}/etc/bash_completion.d/brew"
+echo "${HOMEBREW_REPOSITORY}"
+
+# Keep relatively in sync with
+# https://github.com/Homebrew/brew/blob/master/Library/Homebrew/keg.rb
+directories=(bin etc include lib sbin share opt var
+             Frameworks
+             etc/bash_completion.d lib/pkgconfig
+             share/aclocal share/doc share/info share/locale share/man
+             share/man/man1 share/man/man2 share/man/man3 share/man/man4
+             share/man/man5 share/man/man6 share/man/man7 share/man/man8
+             var/log var/homebrew var/homebrew/linked
+             bin/brew)
+group_chmods=()
+for dir in "${directories[@]}"; do
+  if exists_but_not_writable "${HOMEBREW_PREFIX}/${dir}"; then
+    group_chmods+=("${HOMEBREW_PREFIX}/${dir}")
+  fi
+done
+
+# zsh refuses to read from these directories if group writable
+directories=(share/zsh share/zsh/site-functions)
+zsh_dirs=()
+for dir in "${directories[@]}"; do
+  zsh_dirs+=("${HOMEBREW_PREFIX}/${dir}")
+done
+
+directories=(bin etc include lib sbin share var opt
+             share/zsh share/zsh/site-functions
+             var/homebrew var/homebrew/linked
+             Cellar Caskroom Homebrew Frameworks)
+mkdirs=()
+for dir in "${directories[@]}"; do
+  if ! [[ -d "${HOMEBREW_PREFIX}/${dir}" ]]; then
+    mkdirs+=("${HOMEBREW_PREFIX}/${dir}")
+  fi
+done
+
+user_chmods=()
+if [[ "${#zsh_dirs[@]}" -gt 0 ]]; then
+  for dir in "${zsh_dirs[@]}"; do
+    if user_only_chmod "${dir}"; then
+      user_chmods+=("${dir}")
+    fi
+  done
+fi
+
+chmods=()
+if [[ "${#group_chmods[@]}" -gt 0 ]]; then
+  chmods+=("${group_chmods[@]}")
+fi
+if [[ "${#user_chmods[@]}" -gt 0 ]]; then
+  chmods+=("${user_chmods[@]}")
+fi
+
+chowns=()
+chgrps=()
+if [[ "${#chmods[@]}" -gt 0 ]]; then
+  for dir in "${chmods[@]}"; do
+    if file_not_owned "${dir}"; then
+      chowns+=("${dir}")
+    fi
+    if file_not_grpowned "${dir}"; then
+      chgrps+=("${dir}")
+    fi
+  done
+fi
+
+if [[ "${#group_chmods[@]}" -gt 0 ]]; then
+  ohai "The following existing directories will be made group writable:"
+  printf "%s\n" "${group_chmods[@]}"
+fi
+if [[ "${#user_chmods[@]}" -gt 0 ]]; then
+  ohai "The following existing directories will be made writable by user only:"
+  printf "%s\n" "${user_chmods[@]}"
+fi
+if [[ "${#chowns[@]}" -gt 0 ]]; then
+  ohai "The following existing directories will have their owner set to ${tty_underline}${USER}${tty_reset}:"
+  printf "%s\n" "${chowns[@]}"
+fi
+if [[ "${#chgrps[@]}" -gt 0 ]]; then
+  ohai "The following existing directories will have their group set to ${tty_underline}${GROUP}${tty_reset}:"
+  printf "%s\n" "${chgrps[@]}"
+fi
+if [[ "${#mkdirs[@]}" -gt 0 ]]; then
+  ohai "The following new directories will be created:"
+  printf "%s\n" "${mkdirs[@]}"
+fi
+
+if should_install_command_line_tools; then
+  ohai "The Xcode Command Line Tools will be installed."
+fi
+
+if [[ -t 0 && -z "${CI-}" ]]; then
+  wait_for_user
+fi
+
+if [[ -d "${HOMEBREW_PREFIX}" ]]; then
+  if [[ "${#chmods[@]}" -gt 0 ]]; then
+    execute_sudo "/bin/chmod" "u+rwx" "${chmods[@]}"
+  fi
+  if [[ "${#group_chmods[@]}" -gt 0 ]]; then
+    execute_sudo "/bin/chmod" "g+rwx" "${group_chmods[@]}"
+  fi
+  if [[ "${#user_chmods[@]}" -gt 0 ]]; then
+    execute_sudo "/bin/chmod" "755" "${user_chmods[@]}"
+  fi
+  if [[ "${#chowns[@]}" -gt 0 ]]; then
+    execute_sudo "$CHOWN" "$USER" "${chowns[@]}"
+  fi
+  if [[ "${#chgrps[@]}" -gt 0 ]]; then
+    execute_sudo "$CHGRP" "$GROUP" "${chgrps[@]}"
+  fi
+else
+  execute_sudo "/bin/mkdir" "-p" "${HOMEBREW_PREFIX}"
+  if [[ -z "${HOMEBREW_ON_LINUX-}" ]]; then
+    execute_sudo "$CHOWN" "root:wheel" "${HOMEBREW_PREFIX}"
+  else
+    execute_sudo "$CHOWN" "$USER:$GROUP" "${HOMEBREW_PREFIX}"
+  fi
+fi
+
+if [[ "${#mkdirs[@]}" -gt 0 ]]; then
+  execute_sudo "/bin/mkdir" "-p" "${mkdirs[@]}"
+  execute_sudo "/bin/chmod" "g+rwx" "${mkdirs[@]}"
+  execute_sudo "$CHOWN" "$USER" "${mkdirs[@]}"
+  execute_sudo "$CHGRP" "$GROUP" "${mkdirs[@]}"
+fi
+
+if ! [[ -d "${HOMEBREW_CACHE}" ]]; then
+  if [[ -z "${HOMEBREW_ON_LINUX-}" ]]; then
+    execute_sudo "/bin/mkdir" "-p" "${HOMEBREW_CACHE}"
+  else
+    execute "/bin/mkdir" "-p" "${HOMEBREW_CACHE}"
+  fi
+fi
+if exists_but_not_writable "${HOMEBREW_CACHE}"; then
+  execute_sudo "/bin/chmod" "g+rwx" "${HOMEBREW_CACHE}"
+fi
+if file_not_owned "${HOMEBREW_CACHE}"; then
+  execute_sudo "$CHOWN" "$USER" "${HOMEBREW_CACHE}"
+fi
+if file_not_grpowned "${HOMEBREW_CACHE}"; then
+  execute_sudo "$CHGRP" "$GROUP" "${HOMEBREW_CACHE}"
+fi
+if [[ -d "${HOMEBREW_CACHE}" ]]; then
+  execute "$TOUCH" "${HOMEBREW_CACHE}/.cleaned"
+fi
+
+if should_install_command_line_tools && version_ge "$macos_version" "10.13"; then
+  ohai "Searching online for the Command Line Tools"
+  # This temporary file prompts the 'softwareupdate' utility to list the Command Line Tools
+  clt_placeholder="/tmp/.com.apple.dt.CommandLineTools.installondemand.in-progress"
+  execute_sudo "$TOUCH" "$clt_placeholder"
+
+  clt_label_command="/usr/sbin/softwareupdate -l |
+                      grep -B 1 -E 'Command Line Tools' |
+                      awk -F'*' '/^ *\\*/ {print \$2}' |
+                      sed -e 's/^ *Label: //' -e 's/^ *//' |
+                      sort -V |
+                      tail -n1"
+  clt_label="$(chomp "$(/bin/bash -c "$clt_label_command")")"
+
+  if [[ -n "$clt_label" ]]; then
+    ohai "Installing $clt_label"
+    execute_sudo "/usr/sbin/softwareupdate" "-i" "$clt_label"
+    execute_sudo "/bin/rm" "-f" "$clt_placeholder"
+    execute_sudo "/usr/bin/xcode-select" "--switch" "/Library/Developer/CommandLineTools"
+  fi
+fi
+
+# Headless install may have failed, so fallback to original 'xcode-select' method
+if should_install_command_line_tools && test -t 0; then
+  ohai "Installing the Command Line Tools (expect a GUI popup):"
+  execute_sudo "/usr/bin/xcode-select" "--install"
+  echo "Press any key when the installation has completed."
+  getc
+  execute_sudo "/usr/bin/xcode-select" "--switch" "/Library/Developer/CommandLineTools"
+fi
+
+if [[ -z "${HOMEBREW_ON_LINUX-}" ]] && ! output="$(/usr/bin/xcrun clang 2>&1)" && [[ "$output" == *"license"* ]]; then
+  abort "$(cat <<EOABORT
+You have not agreed to the Xcode license.
+Before running the installer again please agree to the license by opening
+Xcode.app or running:
+    sudo xcodebuild -license
+EOABORT
+)"
+fi
+
+ohai "Downloading and installing Homebrew..."
+(
+  cd "${HOMEBREW_REPOSITORY}" >/dev/null || return
+
+  # we do it in four steps to avoid merge errors when reinstalling
+  execute "git" "init" "-q"
+
+  # "git remote add" will fail if the remote is defined in the global config
+  execute "git" "config" "remote.origin.url" "${BREW_REPO}"
+  execute "git" "config" "remote.origin.fetch" "+refs/heads/*:refs/remotes/origin/*"
+
+  # ensure we don't munge line endings on checkout
+  execute "git" "config" "core.autocrlf" "false"
+
+  execute "git" "fetch" "origin" "--force"
+  execute "git" "fetch" "origin" "--tags" "--force"
+
+  execute "git" "reset" "--hard" "origin/master"
+
+  execute "ln" "-sf" "${HOMEBREW_REPOSITORY}/bin/brew" "${HOMEBREW_PREFIX}/bin/brew"
+
+) || exit 1
+
+if [[ ":${PATH}:" != *":${HOMEBREW_PREFIX}/bin:"* ]]; then
+  warn "${HOMEBREW_PREFIX}/bin is not in your PATH."
+fi
+
+ohai "Installation successful!"
+echo
+
+# Use the shell's audible bell.
+if [[ -t 1 ]]; then
+  printf "\a"
+fi
+
+# Use an extra newline and bold to avoid this being missed.
+ohai "Homebrew has enabled anonymous aggregate formulae and cask analytics."
+echo "$(cat <<EOS
+${tty_bold}Read the analytics documentation (and how to opt-out) here:
+  ${tty_underline}https://docs.brew.sh/Analytics${tty_reset}
+No analytics data has been sent yet (or will be during this \`install\` run).
+EOS
+)
+"
+
+ohai "Homebrew is run entirely by unpaid volunteers. Please consider donating:"
+echo "$(cat <<EOS
+  ${tty_underline}https://github.com/Homebrew/brew#donations${tty_reset}
+EOS
+)
+"
+
+(
+  cd "${HOMEBREW_REPOSITORY}" >/dev/null || return
+  execute "git" "config" "--replace-all" "homebrew.analyticsmessage" "true"
+  execute "git" "config" "--replace-all" "homebrew.caskanalyticsmessage" "true"
+) || exit 1
+
+ohai "Next steps:"
+echo "- Run \`brew help\` to get started"
+echo "- Further documentation: "
+echo "    ${tty_underline}https://docs.brew.sh${tty_reset}"
+
+if [[ -n "${HOMEBREW_ON_LINUX-}" ]]; then
+  case "$SHELL" in
+    */bash*)
+      if [[ -r "$HOME/.bash_profile" ]]; then
+        shell_profile="$HOME/.bash_profile"
+      else
+        shell_profile="$HOME/.profile"
+      fi
+      ;;
+    */zsh*)
+      shell_profile="$HOME/.zprofile"
+      ;;
+    *)
+      shell_profile="$HOME/.profile"
+      ;;
+  esac
+
+  echo "- Install the Homebrew dependencies if you have sudo access:"
+
+  if [[ $(command -v apt-get) ]]; then
+    echo "    sudo apt-get install build-essential"
+  elif [[ $(command -v yum) ]]; then
+    echo "    sudo yum groupinstall 'Development Tools'"
+  elif [[ $(command -v pacman) ]]; then
+    echo "    sudo pacman -S base-devel"
+  elif [[ $(command -v apk) ]]; then
+    echo "    sudo apk add build-base"
+  fi
+
+  cat <<EOS
+    See ${tty_underline}https://docs.brew.sh/linux${tty_reset} for more information
+- Add Homebrew to your ${tty_bold}PATH${tty_reset} in ${tty_underline}${shell_profile}${tty_reset}:
+    echo 'eval \$(${HOMEBREW_PREFIX}/bin/brew shellenv)' >> ${shell_profile}
+    eval \$(${HOMEBREW_PREFIX}/bin/brew shellenv)
+- We recommend that you install GCC:
+    brew install gcc
+
+EOS
+fi
--- a/.github/release-drafter.yml
+++ b/.github/release-drafter.yml
@@ -0,0 +1,33 @@
+name-template: 'v$RESOLVED_VERSION 🌈'
+tag-template: 'v$RESOLVED_VERSION'
+categories:
+  - title: '🌟 Features'
+    labels:
+      - 'feature'
+      - 'enhancement'
+  - title: '🐛 Bug Fixes'
+    labels:
+      - 'fix'
+      - 'bugfix'
+      - 'bug'
+  - title: '📚 Documentation'
+    label: 
+      - 'doc'
+      - 'documentation'
+change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
+change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
+version-resolver:
+  major:
+    labels:
+      - 'major'
+  minor:
+    labels:
+      - 'minor'
+  patch:
+    labels:
+      - 'patch'
+  default: patch
+template: |
+  ## Changes
+
+  $CHANGES
--- a/.github/stale.yml
+++ b/.github/stale.yml
@@ -0,0 +1,62 @@
+# Configuration for probot-stale - https://github.com/probot/stale
+
+# Number of days of inactivity before an Issue or Pull Request becomes stale
+daysUntilStale: 60
+
+# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
+# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
+daysUntilClose: 7
+
+# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
+onlyLabels: []
+
+# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
+exemptLabels:
+  - bug
+  - pinned
+  - security
+  - "[Status] Maybe Later"
+
+# Set to true to ignore issues in a project (defaults to false)
+exemptProjects: false
+
+# Set to true to ignore issues in a milestone (defaults to false)
+exemptMilestones: false
+
+# Set to true to ignore issues with an assignee (defaults to false)
+exemptAssignees: false
+
+# Label to use when marking as stale
+staleLabel: wontfix
+
+# Comment to post when marking as stale. Set to `false` to disable
+markComment: >
+  This issue has been automatically marked as stale because it has not had
+  recent activity. It will be closed if no further activity occurs. Thank you
+  for your contributions.
+
+# Comment to post when removing the stale label.
+# unmarkComment: >
+#   Your comment here.
+
+# Comment to post when closing a stale Issue or Pull Request.
+# closeComment: >
+#   Your comment here.
+
+# Limit the number of actions per hour, from 1-30. Default is 30
+limitPerRun: 30
+
+# Limit to only `issues` or `pulls`
+# only: issues
+
+# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
+# pulls:
+#   daysUntilStale: 30
+#   markComment: >
+#     This pull request has been automatically marked as stale because it has not had
+#     recent activity. It will be closed if no further activity occurs. Thank you
+#     for your contributions.
+
+# issues:
+#   exemptLabels:
+#     - confirmed
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,61 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  deploy_with_bdist_wheel:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [windows-latest, macos-latest]
+        python-version: [3.6, 3.7, 3.8]
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine
+    - name: Build wheel on Windows
+      run: |
+        pip install numpy
+        pip install cython
+        python setup.py bdist_wheel
+    - name: Build and publish
+      env:
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+      run: |
+        twine upload dist/*
+        
+  deploy_with_manylinux:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Build wheel on Linux
+      uses: RalfG/python-wheels-manylinux-build@v0.3.1-manylinux2010_x86_64
+      with:
+        python-versions: 'cp36-cp36m cp37-cp37m cp38-cp38'
+        build-requirements: 'numpy cython'
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.7
+    - name: Install dependencies
+      run: |
+        pip install twine  
+    - name: Build and publish
+      env:
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+      run: |
+        twine upload dist/pyqlib-*-manylinux*.whl
--- a/.github/workflows/release-drafter.yml
+++ b/.github/workflows/release-drafter.yml
@@ -0,0 +1,16 @@
+name: Release Drafter
+
+on:
+  push:
+    # branches to consider in the event; optional, defaults to all
+    branches:
+      - main
+
+jobs:
+  update_release_draft:
+    runs-on: ubuntu-latest
+    steps:
+      # Drafts your next Release notes as Pull Requests are merged into "master"
+      - uses: release-drafter/release-drafter@v5.11.0
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -0,0 +1,118 @@
+name: Test 
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  build:
+
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [windows-latest, ubuntu-16.04, ubuntu-18.04, ubuntu-20.04, macos-latest]
+        python-version: [3.6, 3.7, 3.8, 3.9]
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Lint with Black
+      run: |
+        cd ..
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe -m pip install black
+          $CONDA\\python.exe -m black qlib -l 120 --check --diff
+        else
+          sudo $CONDA/bin/python -m pip install black
+          $CONDA/bin/python -m black qlib -l 120 --check --diff
+        fi
+      shell: bash
+
+    # Test Qlib installed with pip
+    - name: Install Qlib with pip
+      run: |
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe -m pip install pyqlib --ignore-installed ruamel.yaml --user
+        else
+          sudo $CONDA/bin/python -m pip install pyqlib --ignore-installed ruamel.yaml
+        fi
+      shell: bash
+    
+    - name: Install Lightgbm for MacOS
+      if: runner.os == 'macOS'
+      run: |
+        /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Microsoft/qlib/main/.github/brew_install.sh)"
+        HOMEBREW_NO_AUTO_UPDATE=1 brew install lightgbm
+
+    - name: Test data downloads
+      run: |
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
+        else
+          $CONDA/bin/python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --interval 1d --region cn
+        fi
+      shell: bash
+
+    - name: Test workflow by config (install from pip)
+      run: |
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml 
+          $CONDA\\python.exe -m pip uninstall -y pyqlib
+        else
+          $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml 
+          sudo $CONDA/bin/python -m pip uninstall -y pyqlib
+        fi
+      shell: bash
+      
+    # Test Qlib installed from source
+    - name: Install Qlib from source
+      run: |
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe -m pip install --upgrade cython
+          $CONDA\\python.exe -m pip install numpy jupyter jupyter_contrib_nbextensions
+          $CONDA\\python.exe -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't
+          $CONDA\\python.exe setup.py install
+        else
+          sudo $CONDA/bin/python -m pip install --upgrade cython
+          sudo $CONDA/bin/python -m pip install numpy jupyter jupyter_contrib_nbextensions
+          sudo $CONDA/bin/python -m pip install -U scipy scikit-learn # installing without this line will cause errors on GitHub Actions, while instsalling locally won't
+          sudo $CONDA/bin/python setup.py install
+        fi
+      shell: bash
+
+    - name: Install test dependencies
+      run: |
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe -m pip install --upgrade pip
+          $CONDA\\python.exe -m pip install black pytest
+        else
+          sudo $CONDA/bin/python -m pip install --upgrade pip
+          sudo $CONDA/bin/python -m pip install black pytest
+        fi
+      shell: bash 
+
+    - name: Unit tests with Pytest
+      run: |
+        cd tests
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe -m pytest . --durations=0
+        else
+          $CONDA/bin/python -m pytest . --durations=0
+        fi
+      shell: bash
+
+    - name: Test workflow by config (install from source)
+      run: |
+        if [ "$RUNNER_OS" == "Windows" ]; then
+          $CONDA\\python.exe qlib\\workflow\\cli.py examples\\benchmarks\\LightGBM\\workflow_config_lightgbm_Alpha158.yaml 
+        else
+          $CONDA/bin/python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml 
+        fi
+      shell: bash
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 __pycache__/

 *.pyc
+*.pyd
 *.so
 *.ipynb
 .ipynb_checkpoints
@@ -31,3 +32,5 @@ mlruns/

 tags

+.pytest_cache/
+.vscode/
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -0,0 +1,21 @@
+# .readthedocs.yml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Build documentation in the docs/ directory with Sphinx
+sphinx:
+  configuration: docs/conf.py
+
+# Build all formats
+formats: all
+
+# Optionally set the version of Python and requirements required to build your docs
+python:
+  version: 3.7
+  install:
+    - requirements: docs/requirements.txt
+    - method: setuptools
+      path: .
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -114,7 +114,7 @@ Version 0.4.1
 Version 0.4.2
 --------------------
 - Refactor DataHandler
- Add ``ALPHA360`` DataHandler
+- Add ``Alpha360`` DataHandler


 Version 0.4.3
@@ -150,3 +150,15 @@ Version 0.4.6
 - Some bugs are fixed
    - The default config in `Version 0.4.5` is not friendly to daily frequency data.
    - Backtest error in TopkWeightStrategy when `WithInteract=True`.
+
+
+Version 0.5.0
+--------------------
+- First opensource version
+    - Refine the docs, code
+    - Add baselines
+    - public data crawler
+
+Version greater than Version 0.5.0
+----------------------------------
+Please refer to `Github release Notes <https://github.com/microsoft/qlib/releases>`_
--- a/README.md
+++ b/README.md
@@ -1,44 +1,59 @@
+[![Python Versions](https://img.shields.io/pypi/pyversions/pyqlib.svg?logo=python&logoColor=white)](https://pypi.org/project/pyqlib/#files)
+[![Platform](https://img.shields.io/badge/platform-linux%20%7C%20windows%20%7C%20macos-lightgrey)](https://pypi.org/project/pyqlib/#files)
+[![PypI Versions](https://img.shields.io/pypi/v/pyqlib)](https://pypi.org/project/pyqlib/#history)
+[![Upload Python Package](https://github.com/microsoft/qlib/workflows/Upload%20Python%20Package/badge.svg)](https://pypi.org/project/pyqlib/)
+[![Github Actions Test Status](https://github.com/microsoft/qlib/workflows/Test/badge.svg?branch=main)](https://github.com/microsoft/qlib/actions)
+[![Documentation Status](https://readthedocs.org/projects/qlib/badge/?version=latest)](https://qlib.readthedocs.io/en/latest/?badge=latest)
+[![License](https://img.shields.io/pypi/l/pyqlib)](LICENSE)
+[![Join the chat at https://gitter.im/Microsoft/qlib](https://badges.gitter.im/Microsoft/qlib.svg)](https://gitter.im/Microsoft/qlib?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

-<div style="align: center">
-<img src="docs/_static/img/logo/white_bg_rec+word.png" />
-</div>
+
+<p align="center">
+  <img src="http://fintech.msra.cn/images_v060/logo/1.png" />
+</p>


 Qlib is an AI-oriented quantitative investment platform, which aims to realize the potential, empower the research, and create the value of AI technologies in quantitative investment.

-With Qlib, you can easily try your ideas to create better Quant investment strategies.
+It contains the full ML pipeline of data processing, model training, back-testing; and covers the entire chain of quantitative investment: alpha seeking, risk modeling, portfolio optimization, and order execution. 
+
+With Qlib, user can easily try ideas to create better Quant investment strategies.

 For more details, please refer to our paper ["Qlib: An AI-oriented Quantitative Investment Platform"](https://arxiv.org/abs/2009.11189).

-
 - [Framework of Qlib](#framework-of-qlib)
- [Quick Start](#Quick-Start)
-  - [Installation](#Installation)
-  - [Data Preparation](#Data-Preparation)
-  - [Auto Quant Research Workflow with](#Auto-Quant-Research-Workflow)
-  - [Building Customized Quant Research Workflow by Code](#Building-Customized-Quant-Research-Workflow-by-Code)
- [More About Qlib](#More-About-Qlib)
- [Offline mode and online mode of data server](#Offline-Mode-and-Online-Mode-of-the-Data-Server)
-  - [Performance of Qlib Data Server](#Performance-of-Qlib-Data-Server)
- [Contributing](#Contributing)
+- [Quick Start](#quick-start)
+  - [Installation](#installation)
+  - [Data Preparation](#data-preparation)
+  - [Auto Quant Research Workflow](#auto-quant-research-workflow)
+  - [Building Customized Quant Research Workflow by Code](#building-customized-quant-research-workflow-by-code)
+- [**Quant Model Zoo**](#quant-model-zoo)
+  - [Run a single model](#run-a-single-model)
+  - [Run multiple models](#run-multiple-models)
+- [**Quant Dataset Zoo**](#quant-dataset-zoo)
+- [High-frequency execution](#high-frequency-execution)
+- [More About Qlib](#more-about-qlib)
+- [Offline Mode and Online Mode](#offline-mode-and-online-mode)
+  - [Performance of Qlib Data Server](#performance-of-qlib-data-server)
+- [Related Reports](#related-reports)
+- [Contributing](#contributing)



 # Framework of Qlib
+
 <div style="align: center">
-<img src="docs/_static/img/framework.png" />
+<img src="http://fintech.msra.cn/images_v060/framework.png?v=0.1" />
 </div>


 At the module level, Qlib is a platform that consists of the above components. The components are designed as loose-coupled modules and each component could be used stand-alone.

-| Name                | Description                                                                                                                                                                                                                                                   |
-| ------              | -----                                                                                                                                                                                                                                                         |
-| `Data layer`        | `DataServer` focuses on providing high-performance infrastructure for users to manage and retrieve raw data. `DataEnhancement` will preprocess the data and provide the best dataset to be fed into the models.                                                    |
-| `Interday Model`    | `Interday model` focuses on producing prediction scores (aka. _alpha_). Models are trained by `Model Creator` and managed by `Model Manager`. Users could choose one or multiple models for prediction. Multiple models could be combined with `Ensemble` module. |
-| `Interday Strategy` | `Portfolio Generator` will take prediction scores as input and output the orders based on the current position to achieve the target portfolio.                                                                                                                      |
-| `Intraday Trading`  | `Order Executor` is responsible for executing orders output by `Interday Strategy` and returning the executed results.                                                                                                                                        |
-| `Analysis`          | Users could get a detailed analysis report of forecasting signals and portfolios in this part.                                                                                                                                                                     |
+| Name                   | Description                                                                                                                                                                                                                                                                                                                                                             |
+| ------                 | -----                                                                                                                                                                                                                                                                                                                                                                   |
+| `Infrastructure` layer | `Infrastructure` layer provides underlying support for Quant research. `DataServer` provides high-performance infrastructure for users to manage and retrieve raw data. `Trainer` provides flexible interface to control the training process of models which enable algorithms controlling the training process.                                                       |
+| `Workflow` layer       | `Workflow` layer covers the whole workflow of quantitative investment. `Information Extractor` extracts data for models. `Forecast Model` focuses on producing all kinds of forecast signals (e.g. _alpha_, risk) for other modules. With these signals `Portfolio Generator` will generate the target portfolio and produce orders to be executed by `Order Executor`. |
+| `Interface` layer      | `Interface` layer tries to present a user-friendly interface for the underlying system. `Analyser` module will provide users detailed analysis reports of forecasting signals, portfolios and execution results                                                                                                                                                                 |

 * The modules with hand-drawn style are under development and will be released in the future.
 * The modules with dashed borders are highly user-customizable and extendible.
@@ -48,36 +63,70 @@ At the module level, Qlib is a platform that consists of the above components. T

 This quick start guide tries to demonstrate
 1. It's very easy to build a complete Quant research workflow and try your ideas with _Qlib_.
-1. Though with *public data* and *simple models*, machine learning technologies **work very well** in practical Quant investment.
+2. Though with *public data* and *simple models*, machine learning technologies **work very well** in practical Quant investment.
+
+Here is a quick **[demo](https://terminalizer.com/view/3f24561a4470)** shows how to install ``Qlib``, and run LightGBM with ``qrun``. **But**, please make sure you have already prepared the data following the [instruction](#data-preparation).
+

 ## Installation

-Users can easily intsall ``Qlib`` according to the following steps:
+This table demonstrates the supported Python version of `Qlib`:
+|               | install with pip           | install from source  | plot |
+| ------------- |:---------------------:|:--------------------:|:----:|
+| Python 3.6    | :heavy_check_mark:    | :heavy_check_mark: (only with `Anaconda`)                  | :heavy_check_mark: |
+| Python 3.7    | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |
+| Python 3.8    | :heavy_check_mark:    | :heavy_check_mark:   | :heavy_check_mark: |
+| Python 3.9    | :x:                   | :heavy_check_mark:   | :x: |

-* Before installing ``Qlib`` from source, you need to install some dependencies:
+**Note**: 
+1. Please pay attention that installing cython in Python 3.6 will raise some error when installing ``Qlib`` from source. If users use Python 3.6 on their machines, it is recommended to *upgrade* Python to version 3.7 or use `conda`'s Python to install ``Qlib`` from source.
+2. For Python 3.9, `Qlib` supports running workflows such as training models, doing backtest and plot most of the related figures (those included in [notebook](examples/workflow_by_code.ipynb)). However, plotting for the *model performance* is not supported for now and we will fix this when the dependent packages are upgraded in the future.
+
+### Install with pip
+Users can easily install ``Qlib`` by pip according to the following command.
+
+```bash
+  pip install pyqlib
+```
+
+**Note**: pip will install the latest stable qlib. However, the main branch of qlib is in active development. If you want to test the latest scripts or functions in the main branch. Please install qlib with the methods below.
+
+### Install from source
+Also, users can install the latest dev version ``Qlib`` by the source code according to the following steps:
+
+* Before installing ``Qlib`` from source, users need to install some dependencies:

  ```bash
  pip install numpy
  pip install --upgrade  cython
  ```

-* Clone the repository and install ``Qlib``:
-
-  ```bash
-  git clone https://github.com/microsoft/qlib.git && cd qlib
-  python setup.py install
-  ```
+* Clone the repository and install ``Qlib`` as follows.
+  * If you haven't installed qlib by the command ``pip install pyqlib`` before:
+    ```bash
+    git clone https://github.com/microsoft/qlib.git && cd qlib
+    python setup.py install
+    ```
+  * If you have already installed the stable version by the command ``pip install pyqlib``:
+    ```bash
+    git clone https://github.com/microsoft/qlib.git && cd qlib
+    pip install .
+    ```
+  **Note**: **Only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, while the command ``python setup.py install`` **can't**.

+**Tips**: If you fail to install `Qlib` or run the examples in your environment,  comparing your steps and the [CI workflow](.github/workflows/test.yml) may help you find the problem.

 ## Data Preparation
 Load and prepare data by running the following code:
  ```bash
-  python scripts/get_data.py qlib_data_cn --target_dir ~/.qlib/qlib_data/cn_data
+  python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
  ```

 This dataset is created by public data collected by [crawler scripts](scripts/data_collector/), which have been released in
 the same repository.
-Users could create the same dataset with it.
+Users could create the same dataset with it. 
+
+*Please pay **ATTENTION** that the data is collected from [Yahoo Finance](https://finance.yahoo.com/lookup) and the data might not be perfect. We recommend users to prepare their own data if they have high-quality dataset. For more information, users can refer to the [related document](https://qlib.readthedocs.io/en/latest/component/data.html#converting-csv-format-into-qlib-format)*.

 <!-- 
 - Run the initialization code and get stock data:
@@ -107,50 +156,54 @@ Users could create the same dataset with it.
 -->

 ## Auto Quant Research Workflow
-Qlib provides a tool named `Estimator` to run the whole workflow automatically (including building dataset, training models, backtest and evaluation). You can start an auto quant research workflow and have a graphical reports analysis according to the following steps: 
+Qlib provides a tool named `qrun` to run the whole workflow automatically (including building dataset, training models, backtest and evaluation). You can start an auto quant research workflow and have a graphical reports analysis according to the following steps: 

-1. Quant Research Workflow: Run  `Estimator` with [estimator_config.yaml](examples/estimator/estimator_config.yaml) as following.
+1. Quant Research Workflow: Run  `qrun` with lightgbm workflow config ([workflow_config_lightgbm_Alpha158.yaml](examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml) as following.
    ```bash
      cd examples  # Avoid running program under the directory contains `qlib`
-      estimator -c estimator/estimator_config.yaml
+      qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
    ```
-    The result of `Estimator` is as follows, please refer to please refer to [Intraday Trading](https://qlib.readthedocs.io/en/latest/component/backtest.html) for more details about the result. 
+    If users want to use `qrun` under debug mode, please use the following command:
+    ```bash
+    python -m pdb qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
+    ```
+    The result of `qrun` is as follows, please refer to [Intraday Trading](https://qlib.readthedocs.io/en/latest/component/backtest.html) for more details about the result. 

    ```bash

-                                                      risk
-    excess_return_without_cost mean               0.000605
-                               std                0.005481
-                               annualized_return  0.152373
-                               information_ratio  1.751319
-                               max_drawdown      -0.059055
-    excess_return_with_cost    mean               0.000410
-                               std                0.005478
-                               annualized_return  0.103265
-                               information_ratio  1.187411
-                               max_drawdown      -0.075024
-
-
+    'The following are analysis results of the excess return without cost.'
+                           risk
+    mean               0.000708
+    std                0.005626
+    annualized_return  0.178316
+    information_ratio  1.996555
+    max_drawdown      -0.081806
+    'The following are analysis results of the excess return with cost.'
+                           risk
+    mean               0.000512
+    std                0.005626
+    annualized_return  0.128982
+    information_ratio  1.444287
+    max_drawdown      -0.091078
    ```
-    Here are detailed documents for [Estimator](https://qlib.readthedocs.io/en/latest/component/estimator.html).
+    Here are detailed documents for `qrun` and [workflow](https://qlib.readthedocs.io/en/latest/component/workflow.html).

-2. Graphical Reports Analysis: Run `examples/estimator/analyze_from_estimator.ipynb` with `jupyter notebook` to get graphical reports
+2. Graphical Reports Analysis: Run `examples/workflow_by_code.ipynb` with `jupyter notebook` to get graphical reports
    - Forecasting signal (model prediction) analysis
      - Cumulative Return of groups
-      ![Cumulative Return](docs/_static/img/analysis/analysis_model_cumulative_return.png)
+      ![Cumulative Return](http://fintech.msra.cn/images_v060/analysis/analysis_model_cumulative_return.png?v=0.1)
      - Return distribution
-      ![long_short](docs/_static/img/analysis/analysis_model_long_short.png)
+      ![long_short](http://fintech.msra.cn/images_v060/analysis/analysis_model_long_short.png?v=0.1)
      - Information Coefficient (IC)
-      ![Information Coefficient](docs/_static/img/analysis/analysis_model_IC.png)        
-      ![Monthly IC](docs/_static/img/analysis/analysis_model_monthly_IC.png)        
-      ![IC](docs/_static/img/analysis/analysis_model_NDQ.png)
+      ![Information Coefficient](http://fintech.msra.cn/images_v060/analysis/analysis_model_IC.png?v=0.1)        
+      ![Monthly IC](http://fintech.msra.cn/images_v060/analysis/analysis_model_monthly_IC.png?v=0.1)
+      ![IC](http://fintech.msra.cn/images_v060/analysis/analysis_model_NDQ.png?v=0.1)
      - Auto Correlation of forecasting signal (model prediction)
-      ![Auto Correlation](docs/_static/img/analysis/analysis_model_auto_correlation.png)
-
+      ![Auto Correlation](http://fintech.msra.cn/images_v060/analysis/analysis_model_auto_correlation.png?v=0.1)

    - Portfolio analysis
      - Backtest return
-      ![Report](docs/_static/img/analysis/report.png)
+      ![Report](http://fintech.msra.cn/images_v060/analysis/report.png?v=0.1)
      <!-- 
      - Score IC
      ![Score IC](docs/_static/img/score_ic.png)
@@ -163,9 +216,68 @@ Qlib provides a tool named `Estimator` to run the whole workflow automatically (
      -->

 ## Building Customized Quant Research Workflow by Code
-The automatic workflow may not suite the research workflow of all Quant researchers. To support a flexible Quant research workflow, Qlib also provides a modularized interface to allow researchers to build their own workflow by code. [Here](examples/train_backtest_analyze.ipynb) is a demo for customized Quant research workflow by code
+The automatic workflow may not suite the research workflow of all Quant researchers. To support a flexible Quant research workflow, Qlib also provides a modularized interface to allow researchers to build their own workflow by code. [Here](examples/workflow_by_code.ipynb) is a demo for customized Quant research workflow by code.


+# [Quant Model Zoo](examples/benchmarks)
+
+Here is a list of models built on `Qlib`.
+- [GBDT based on XGBoost (Tianqi Chen, et al. 2016)](qlib/contrib/model/xgboost.py)
+- [GBDT based on LightGBM (Guolin Ke, et al. 2017)](qlib/contrib/model/gbdt.py)
+- [GBDT based on Catboost (Liudmila Prokhorenkova, et al. 2017)](qlib/contrib/model/catboost_model.py)
+- [MLP based on pytorch](qlib/contrib/model/pytorch_nn.py)
+- [LSTM based on pytorch (Sepp Hochreiter, et al. 1997)](qlib/contrib/model/pytorch_lstm.py)
+- [GRU based on pytorch (Kyunghyun Cho, et al. 2014)](qlib/contrib/model/pytorch_gru.py)
+- [ALSTM based on pytorch (Yao Qin, et al. 2017)](qlib/contrib/model/pytorch_alstm.py)
+- [GATs based on pytorch (Petar Velickovic, et al. 2017)](qlib/contrib/model/pytorch_gats.py)
+- [SFM based on pytorch (Liheng Zhang, et al. 2017)](qlib/contrib/model/pytorch_sfm.py)
+- [TFT based on tensorflow (Bryan Lim, et al. 2019)](examples/benchmarks/TFT/tft.py)
+- [TabNet based on pytorch (Sercan O. Arik, et al. 2019)](qlib/contrib/model/pytorch_tabnet.py)
+
+Your PR of new Quant models is highly welcomed.
+
+The performance of each model on the `Alpha158` and `Alpha360` dataset can be found [here](examples/benchmarks/README.md).
+
+## Run a single model
+All the models listed above are runnable with ``Qlib``. Users can find the config files we provide and some details about the model through the [benchmarks](examples/benchmarks) folder. More information can be retrieved at the model files listed above.
+
+`Qlib` provides three different ways to run a single model, users can pick the one that fits their cases best:
+- User can use the tool `qrun` mentioned above to run a model's workflow based from a config file.
+- User can create a `workflow_by_code` python script based on the [one](examples/workflow_by_code.py) listed in the `examples` folder.
+
+- User can use the script [`run_all_model.py`](examples/run_all_model.py) listed in the `examples` folder to run a model. Here is an example of the specific shell command to be used: `python run_all_model.py --models=lightgbm`, where the `--models` arguments can take any number of models listed above(the available models can be found  in [benchmarks](examples/benchmarks/)). For more use cases, please refer to the file's [docstrings](examples/run_all_model.py).
+
+## Run multiple models
+`Qlib` also provides a script [`run_all_model.py`](examples/run_all_model.py) which can run multiple models for several iterations. (**Note**: the script only support *Linux* for now. Other OS will be supported in the future. Besides, it doesn't support parrallel running the same model for multiple times as well, and this will be fixed in the future development too.)
+
+The script will create a unique virtual environment for each model, and delete the environments after training. Thus, only experiment results such as `IC` and `backtest` results will be generated and stored.
+
+Here is an example of running all the models for 10 iterations:
+```python
+python run_all_model.py 10
+```
+
+It also provides the API to run specific models at once. For more use cases, please refer to the file's [docstrings](examples/run_all_model.py). 
+
+
+# Quant Dataset Zoo
+Dataset plays a very important role in Quant. Here is a list of the datasets built on `Qlib`:
+
+| Dataset                                    | US Market | China Market |
+| --                                         | --        | --           |
+| [Alpha360](./qlib/contrib/data/handler.py) |  √        |  √           |
+| [Alpha158](./qlib/contrib/data/handler.py) |  √        |  √           |
+
+[Here](https://qlib.readthedocs.io/en/latest/advanced/alpha.html) is a tutorial to build dataset with `Qlib`.
+Your PR to build new Quant dataset is highly welcomed.
+
+# High-Frequency Execution
+High-frequency order execution is a fundamental problem in quantitative finance.
+It aims at fulfilling a specific trading order, either liquidation or acquirement, for a given instrument.
+AI has the potential to mine patterns from a huge mass of high-frequency market data and helps traders make better decisions during order execution.
+Here is a list of solutions built on `Qlib`.
+- [Universal Trading for Order Execution with Oracle Policy Distillation](examples/trade/)
+

 # More About Qlib
 The detailed documents are organized in [docs](docs/).
@@ -183,12 +295,12 @@ Qlib is in active and continuing development. Our plan is in the roadmap, which



-# Offline Mode and Online Mode of the Data Server
-The data server of Qlib can either deployed as offline mode or online mode. The default mode is offline mode.
+# Offline Mode and Online Mode
+The data server of Qlib can either deployed as `Offline` mode or `Online` mode. The default mode is offline mode.

-Under offline mode, the data will be deployed locally. 
+Under `Offline` mode, the data will be deployed locally. 

-Under online mode, the data will be deployed as a shared data service. The data and their cache will be shared by all the clients. The data retrieval performance is expected to be improved due to a higher rate of cache hits. It will consume less disk space, too. The documents of the online mode can be found in [Qlib-Server](https://qlib-server.readthedocs.io/). The online mode can be deployed automatically with [Azure CLI based scripts](https://qlib-server.readthedocs.io/en/latest/build.html#one-click-deployment-in-azure). The source code of online data server can be found in [qlib-server repository](https://github.com/microsoft/qlib-server).
+Under `Online` mode, the data will be deployed as a shared data service. The data and their cache will be shared by all the clients. The data retrieval performance is expected to be improved due to a higher rate of cache hits. It will consume less disk space, too. The documents of the online mode can be found in [Qlib-Server](https://qlib-server.readthedocs.io/). The online mode can be deployed automatically with [Azure CLI based scripts](https://qlib-server.readthedocs.io/en/latest/build.html#one-click-deployment-in-azure). The source code of online data server can be found in [Qlib-Server repository](https://github.com/microsoft/qlib-server).

 ## Performance of Qlib Data Server
 The performance of data processing is important to data-driven methods like AI technologies. As an AI-oriented platform, Qlib provides a solution for data storage and data processing. To demonstrate the performance of Qlib data server, we
@@ -209,7 +321,11 @@ Such overheads greatly slow down the data loading process.
 Qlib data are stored in a compact format, which is efficient to be combined into arrays for scientific computation.


-
+# Related Reports
+- [Guide To Qlib: Microsoft’s AI Investment Platform](https://analyticsindiamag.com/qlib/)
+- [【华泰金工林晓明团队】微软AI量化投资平台Qlib体验——华泰人工智能系列之四十](https://mp.weixin.qq.com/s/Brcd7im4NibJOJzZfMn6tQ)
+- [微软也搞AI量化平台？还是开源的！](https://mp.weixin.qq.com/s/47bP5YwxfTp2uTHjUBzJQQ)
+- [微矿Qlib：业内首个AI量化投资开源平台](https://mp.weixin.qq.com/s/vsJv7lsgjEi-ALYUz4CvtQ)


 # Contributing
--- a/docs/FAQ/FAQ.rst
+++ b/docs/FAQ/FAQ.rst
@@ -0,0 +1,72 @@
+
+Qlib FAQ
+############
+
+Qlib Frequently Asked Questions
+================================
+.. contents::
+    :depth: 1
+    :local:
+    :backlinks: none
+
+------
+
+
+1. RuntimeError: An attempt has been made to start a new process before the current process has finished its bootstrapping phase...
+------------------------------------------------------------------------------------------------------------------------------------
+
+.. code-block:: console
+
+    RuntimeError:
+            An attempt has been made to start a new process before the
+            current process has finished its bootstrapping phase.
+
+            This probably means that you are not using fork to start your
+            child processes and you have forgotten to use the proper idiom
+            in the main module:
+
+                if __name__ == '__main__':
+                    freeze_support()
+                    ...
+
+            The "freeze_support()" line can be omitted if the program
+            is not going to be frozen to produce an executable.
+
+This is caused by the limitation of multiprocessing under windows OS. Please refer to `here <https://stackoverflow.com/a/24374798>`_ for more info.
+
+**Solution**: To select a start method you use the ``D.features`` in the if __name__ == '__main__' clause of the main module. For example:
+
+.. code-block:: python
+
+    import qlib
+    from qlib.data import D
+
+
+    if __name__ == "__main__":
+        qlib.init()
+        instruments = ["SH600000"]
+        fields = ["$close", "$change"]
+        df = D.features(instruments, fields, start_time='2010-01-01', end_time='2012-12-31')
+        print(df.head())
+
+
+
+2. qlib.data.cache.QlibCacheException: It sees the key(...) of the redis lock has existed in your redis db now.
+-----------------------------------------------------------------------------------------------------------------
+
+It sees the key of the redis lock has existed in your redis db now. You can use the following command to clear your redis keys and rerun your commands
+
+.. code-block:: console
+
+    $ redis-cli
+    > select 1
+    > flushdb
+
+If the issue is not resolved, use ``keys *`` to find if multiple keys exist. If so, try using ``flushall`` to clear all the keys.
+
+.. note::
+
+    ``qlib.config.redis_task_db`` defaults is ``1``, users can use ``qlib.init(redis_task_db=<other_db>)`` settings.
+
+
+Also, feel free to post a new issue in our GitHub repository. We always check each issue carefully and try our best to solve them.
--- a/docs/_static/demo.sh
+++ b/docs/_static/demo.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+git clone https://github.com/microsoft/qlib.git
+cd qlib
+ls
+pip install pyqlib
+# or
+# pip install numpy
+# pip install --upgrade cython
+# python setup.py install
+cd examples
+ls
+qrun benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
--- a/docs/_static/img/analysis/analysis_model_IC.png
+++ b/docs/_static/img/analysis/analysis_model_IC.png
--- a/docs/_static/img/analysis/analysis_model_NDQ.png
+++ b/docs/_static/img/analysis/analysis_model_NDQ.png
--- a/docs/_static/img/analysis/analysis_model_auto_correlation.png
+++ b/docs/_static/img/analysis/analysis_model_auto_correlation.png
--- a/docs/_static/img/analysis/analysis_model_cumulative_return.png
+++ b/docs/_static/img/analysis/analysis_model_cumulative_return.png
--- a/docs/_static/img/analysis/analysis_model_long_short.png
+++ b/docs/_static/img/analysis/analysis_model_long_short.png
--- a/docs/_static/img/analysis/analysis_model_monthly_IC.png
+++ b/docs/_static/img/analysis/analysis_model_monthly_IC.png
--- a/docs/_static/img/analysis/report.png
+++ b/docs/_static/img/analysis/report.png
--- a/docs/_static/img/analysis/risk_analysis_annualized_return.png
+++ b/docs/_static/img/analysis/risk_analysis_annualized_return.png
--- a/docs/_static/img/analysis/risk_analysis_bar.png
+++ b/docs/_static/img/analysis/risk_analysis_bar.png
--- a/docs/_static/img/analysis/risk_analysis_information_ratio.png
+++ b/docs/_static/img/analysis/risk_analysis_information_ratio.png
--- a/docs/_static/img/analysis/risk_analysis_max_drawdown.png
+++ b/docs/_static/img/analysis/risk_analysis_max_drawdown.png
--- a/docs/_static/img/analysis/risk_analysis_std.png
+++ b/docs/_static/img/analysis/risk_analysis_std.png
--- a/docs/_static/img/analysis/score_ic.png
+++ b/docs/_static/img/analysis/score_ic.png
--- a/docs/_static/img/framework.png
+++ b/docs/_static/img/framework.png
--- a/docs/advanced/alpha.rst
+++ b/docs/advanced/alpha.rst
@@ -1,4 +1,5 @@
 .. _alpha:
+
 ===========================
 Building Formulaic Alphas 
 ===========================
@@ -49,56 +50,37 @@ Users can use ``Data Handler`` to build formulaic alphas `MACD` in qlib:

 .. code-block:: python

-    >>> from qlib.contrib.estimator.handler import QLibDataHandler
-    >>> fields = ['(EMA($close, 12) - EMA($close, 26))/$close - EMA((EMA($close, 12) - EMA($close, 26))/$close, 9)/$close'] # MACD
-    >>> names = ['MACD']
-    >>> labels = ['Ref($vwap, -2)/Ref($vwap, -1) - 1'] # label
-    >>> label_names = ['LABEL']
-    >>> data_handler = QLibDataHandler(start_date='2010-01-01', end_date='2017-12-31', fields=fields, names=names, labels=labels, label_names=label_names)
-    >>> TRAINER_CONFIG = {
-    ...     "train_start_date": "2007-01-01",
-    ...     "train_end_date": "2014-12-31",
-    ...     "validate_start_date": "2015-01-01",
-    ...     "validate_end_date": "2016-12-31",
-    ...  "test_start_date": "2017-01-01",
-    ...  "test_end_date": "2020-08-01",
-    ... }
-    >>> feature_train, label_train, feature_validate, label_validate, feature_test, label_test = data_handler.get_split_data(**TRAINER_CONFIG)
-    >>> print(feature_train, label_train)
-                                MACD
-    instrument  datetime            
-    SH600004    2012-01-04 -0.030853
-                2012-01-05 -0.030452
-                2012-01-06 -0.028252
-                2012-01-09 -0.024507
-                2012-01-10 -0.019744
-    ...                         ...
-    SZ300273    2014-12-25  0.031339
-                2014-12-26  0.029695
-                2014-12-29  0.025577
-                2014-12-30  0.020493
-                2014-12-31  0.017089
-
-    [605882 rows x 1 columns]
-                               label
-    instrument  datetime            
-    SH600004    2012-01-04  0.003021
-                2012-01-05  0.017434
-                2012-01-06  0.015490
-                2012-01-09  0.002324
-                2012-01-10 -0.002542
-    ...                         ...
-    SZ300273    2014-12-25 -0.032454
-                2014-12-26 -0.016638
-                2014-12-29  0.008263
-                2014-12-30 -0.011985
-                2014-12-31  0.047797
-
-    [605882 rows x 1 columns]
+    >> from qlib.data.dataset.loader import QlibDataLoader
+    >> MACD_EXP = '(EMA($close, 12) - EMA($close, 26))/$close - EMA((EMA($close, 12) - EMA($close, 26))/$close, 9)/$close'
+    >> fields = [MACD_EXP] # MACD
+    >> names = ['MACD']
+    >> labels = ['Ref($close, -2)/Ref($close, -1) - 1'] # label
+    >> label_names = ['LABEL']
+    >> data_loader_config = {
+    ..     "feature": (fields, names),
+    ..     "label": (labels, label_names)
+    .. }
+    >> data_loader = QlibDataLoader(config=data_loader_config)
+    >> df = data_loader.load(instruments='csi300', start_time='2010-01-01', end_time='2017-12-31')
+    >> print(df)
+                            feature     label
+                               MACD     LABEL
+    datetime   instrument                    
+    2010-01-04 SH600000   -0.011547 -0.019672
+               SH600004    0.002745 -0.014721
+               SH600006    0.010133  0.002911
+               SH600008   -0.001113  0.009818
+               SH600009    0.025878 -0.017758
+    ...                         ...       ...
+    2017-12-29 SZ300124    0.007306 -0.005074
+               SZ300136   -0.013492  0.056352
+               SZ300144   -0.000966  0.011853
+               SZ300251    0.004383  0.021739
+               SZ300315   -0.030557  0.012455

 Reference
 ===========

-To kown more about ``Data Handler``, please refer to `Data Handler <../component/data.html>`_
+To learn more about ``Data Loader``, please refer to `Data Loader <../component/data.html#data-loader>`_

-To kown more about ``Data Api``, please refer to `Data Api <../component/data.html>`_
+To learn more about ``Data API``, please refer to `Data API <../component/data.html>`_
--- a/docs/advanced/server.rst
+++ b/docs/advanced/server.rst
@@ -1,4 +1,5 @@
 .. _server:
+
 =================================
 ``Online`` & ``Offline`` mode
 =================================
--- a/docs/component/backtest.rst
+++ b/docs/component/backtest.rst
@@ -1,4 +1,5 @@
 .. _backtest:
+
 ============================================
 Intraday Trading: Model&Strategy Testing
 ============================================
@@ -12,7 +13,7 @@ Introduction

 .. note::

-    ``Intraday Trading`` uses ``Order Executor`` to trade and execute orders output by ``Interday Strategy``. ``Order Executor`` is a component in `Qlib Framework <../introduction/introduction.html#framework>`_, which can execute orders. ``Vwap Executor`` and ``Close Executor`` is supported by ``Qlib`` now. In the future, ``Qlib`` will support ``HighFreq Executor`` also. 
+    ``Intraday Trading`` uses ``Order Executor`` to trade and execute orders output by ``Portfolio Strategy``. ``Order Executor`` is a component in `Qlib Framework <../introduction/introduction.html#framework>`_, which can execute orders. ``VWAP Executor`` and ``Close Executor`` is supported by ``Qlib`` now. In the future, ``Qlib`` will support ``HighFreq Executor`` also. 



@@ -31,34 +32,34 @@ The simple example of the default strategy is as follows.
    # pred_score is the prediction score
    report, positions = backtest(pred_score, topk=50, n_drop=0.5, verbose=False, limit_threshold=0.0095)

-To know more about backtesting with a specific strategy, please refer to `Strategy <strategy.html>`_.
+To know more about backtesting with a specific ``Strategy``, please refer to `Portfolio Strategy <strategy.html>`_.

-To know more about the prediction score `pred_score` output by ``Model``, please refer to `Interday Model: Model Training & Prediction <model.html>`_.
+To know more about the prediction score `pred_score` output by ``Forecast Model``, please refer to `Forecast Model: Model Training & Prediction <model.html>`_.

 Prediction Score
 -----------------

-The `prediction score` is a pandas DataFrame. Its index is <instrument(str), datetime(pd.Timestamp)> and it must
+The `prediction score` is a pandas DataFrame. Its index is <datetime(pd.Timestamp), instrument(str)> and it must
 contains a `score` column.

 A prediction sample is shown as follows.

 .. code-block:: python

-    instrument datetime   score
-    SH600000   2019-01-04 -0.505488
-    SZ002531   2019-01-04 -0.320391
-    SZ000999   2019-01-04  0.583808
-    SZ300569   2019-01-04  0.819628
-    SZ001696   2019-01-04 -0.137140
-    ...                         ...
-    SZ000996   2019-04-30 -1.027618
-    SH603127   2019-04-30  0.225677
-    SH603126   2019-04-30  0.462443
-    SH603133   2019-04-30 -0.302460
-    SZ300760   2019-04-30 -0.126383
+      datetime instrument     score
+    2019-01-04   SH600000 -0.505488
+    2019-01-04   SZ002531 -0.320391
+    2019-01-04   SZ000999  0.583808
+    2019-01-04   SZ300569  0.819628
+    2019-01-04   SZ001696 -0.137140
+                 ...            ...
+    2019-04-30   SZ000996 -1.027618
+    2019-04-30   SH603127  0.225677
+    2019-04-30   SH603126  0.462443
+    2019-04-30   SH603133 -0.302460
+    2019-04-30   SZ300760 -0.126383

-``Model`` module can make predictions, please refer to `Model <model.html>`_.
+``Forecast Model`` module can make predictions, please refer to `Forecast Model: Model Training & Prediction <model.html>`_.

 Backtest Result
 ------------------
@@ -110,4 +111,4 @@ The backtest results are in the following form:
 Reference
 ==============

-To know more about ``Intraday Trading``, please refer to `Backtest API <../reference/api.html>`_.
+To know more about ``Intraday Trading``, please refer to `Intraday Trading <../reference/api.html#module-qlib.contrib.evaluate>`_.
--- a/docs/component/data.rst
+++ b/docs/component/data.rst
@@ -1,6 +1,7 @@
 .. _data:
+
 ================================
-Data Layer: Data Framework&Usage
+Data Layer: Data Framework & Usage
 ================================

 Introduction
@@ -14,7 +15,9 @@ The introduction of ``Data Layer`` includes the following parts.

 - Data Preparation
 - Data API
+- Data Loader
 - Data Handler
+- Dataset
 - Cache
 - Data and Cache File Structure

@@ -26,17 +29,34 @@ Qlib Format Data
 ------------------

 We've specially designed a data structure to manage financial data, please refer to the `File storage design section in Qlib paper <https://arxiv.org/abs/2009.11189>`_ for detailed information.
-Such data will be stored with filename suffix `.bin` (We'll call them `.bin` file, `.bin` format or qlib format). `.bin` file is designed for scientific computing on finance data
+Such data will be stored with filename suffix `.bin` (We'll call them `.bin` file, `.bin` format, or qlib format). `.bin` file is designed for scientific computing on finance data.
+
+``Qlib`` provides two different off-the-shelf dataset, which can be accessed through this `link <https://github.com/microsoft/qlib/blob/main/qlib/contrib/data/handler.py>`_:
+
+========================  =================  ================
+Dataset                   US Market          China Market
+========================  =================  ================
+Alpha360                  √                  √
+
+Alpha158                  √                  √
+========================  =================  ================
+

 Qlib Format Dataset
 --------------------
-``Qlib`` has provided an off-the-shelf dataset in `.bin` format, users could use the script ``scripts/get_data.py`` to download the dataset as follows.
+``Qlib`` has provided an off-the-shelf dataset in `.bin` format, users could use the script ``scripts/get_data.py`` to download the China-Stock dataset as follows.

 .. code-block:: bash

-    python scripts/get_data.py qlib_data_cn --target_dir ~/.qlib/qlib_data/cn_data
+    python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn

-After running the above command, users can find china-stock data in Qlib format in the ``~/.qlib/csv_data/cn_data`` directory.
+In addition to China-Stock data, ``Qlib`` also includes a US-Stock dataset, which can be downloaded with the following command:
+
+.. code-block:: bash
+
+    python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/us_data --region us
+
+After running the above command, users can find china-stock and us-stock data in ``Qlib`` format in the ``~/.qlib/csv_data/cn_data`` directory and ``~/.qlib/csv_data/us_data`` directory respectively.

 ``Qlib`` also provides the scripts in ``scripts/data_collector`` to help users crawl the latest data on the Internet and convert it to qlib format.

@@ -45,45 +65,96 @@ When ``Qlib`` is initialized with this dataset, users could build and evaluate t
 Converting CSV Format into Qlib Format
 -------------------------------------------

-``Qlib`` has provided the script ``scripts/dump_bin.py`` to convert data in CSV format into `.bin` files(Qlib format).
+``Qlib`` has provided the script ``scripts/dump_bin.py`` to convert **any** data in CSV format into `.bin` files (``Qlib`` format) as long as they are in the correct format.

-
-Users can download the china-stock data in CSV format as follows for reference to the CSV format.
+Users can download the demo china-stock data in CSV format as follows for reference to the CSV format.

 .. code-block:: bash

    python scripts/get_data.py csv_data_cn --target_dir ~/.qlib/csv_data/cn_data

+Users can also provide their own data in CSV format. However, the CSV data **must satisfies** following criterions:
+
+- CSV file is named after a specific stock *or* the CSV file includes a column of the stock name
+
+    - Name the CSV file after a stock: `SH600000.csv`, `AAPL.csv` (not case sensitive).
+    
+    - CSV file includes a column of the stock name. User **must** specify the column name when dumping the data. Here is an example:
+
+        .. code-block:: bash
+
+            python scripts/dump_bin.py dump_all ... --symbol_field_name symbol
+        
+        where the data are in the following format:
+
+        .. code-block:: 
+
+            symbol,close
+            SH600000,120
+
+- CSV file **must** includes a column for the date, and when dumping the data, user must specify the date column name. Here is an example:
+
+    .. code-block:: bash
+
+        python scripts/dump_bin.py dump_all ... --date_field_name date
+    
+    where the data are in the following format:
+
+    .. code-block:: 
+
+        symbol,date,close,open,volume
+        SH600000,2020-11-01,120,121,12300000
+        SH600000,2020-11-02,123,120,12300000
+

 Supposed that users prepare their CSV format data in the directory ``~/.qlib/csv_data/my_data``, they can run the following command to start the conversion.

 .. code-block:: bash

-    python scripts/dump_bin.py dump --csv_path  ~/.qlib/csv_data/my_data --qlib_dir ~/.qlib/qlib_data/my_data --include_fields open,close,high,low,volume,factor
+    python scripts/dump_bin.py dump_all --csv_path  ~/.qlib/csv_data/my_data --qlib_dir ~/.qlib/qlib_data/my_data --include_fields open,close,high,low,volume,factor
+
+For other supported parameters when dumping the data into `.bin` file, users can refer to the information by running the following commands:
+
+.. code-block:: bash
+
+    python dump_bin.py dump_all --help

 After conversion, users can find their Qlib format data in the directory `~/.qlib/qlib_data/my_data`.

 .. note::

-    The arguments of `--include_fields` should correspond with the columns names of CSV files. The columns names of dataset provided by ``Qlib`` includes open,close,high,low,volume,factor.
+    The arguments of `--include_fields` should correspond with the column names of CSV files. The columns names of dataset provided by ``Qlib`` should include open, close, high, low, volume and factor at least.
    
    - `open`
-        The opening price
+        The adjusted opening price
    - `close`
-        The closing price
+        The adjusted closing price
    - `high`
-        The highest price
+        The adjusted highest price
    - `low`
-        The lowest price
+        The adjusted lowest price
    - `volume`
-        The trading volume
+        The adjusted trading volume
    - `factor`
-        The Restoration factor
+        The Restoration factor. Normally, ``factor = adjusted_price / original_price``, `adjusted price` reference: `split adjusted <https://www.investopedia.com/terms/s/splitadjusted.asp>`_

+    In the convention of `Qlib` data processing, `open, close, high, low, volume, money and factor` will be set to NaN if the stock is suspended. 

-China-Stock Mode & US-Stock Mode
+Multiple Stock Modes
 --------------------------------

+``Qlib`` now provides two different stock modes for users: China-Stock Mode & US-Stock Mode. Here are some different settings of these two modes:
+
+==============  =================  ================
+Region          Trade Unit         Limit Threshold
+==============  =================  ================
+China           100                0.099
+
+US              1                  None
+==============  =================  ================
+
+The `trade unit` defines the unit number of stocks can be used in a trade, and the `limit threshold` defines the bound set to the percentage of ups and downs of a stock.
+
 - If users use ``Qlib`` in china-stock mode, china-stock data is required. Users can use ``Qlib`` in china-stock mode according to the following steps:
    - Download china-stock in qlib format, please refer to section `Qlib Format Dataset <#qlib-format-dataset>`_.
    - Initialize ``Qlib`` in china-stock mode
@@ -95,9 +166,8 @@ China-Stock Mode & US-Stock Mode
            qlib.init(provider_uri='~/.qlib/qlib_data/cn_data', region=REG_CN)
        

- If users use ``Qlib`` in US-stock mode, US-stock data is required. ``Qlib`` does not provide a script to download US-stock data. Users can use ``Qlib`` in US-stock mode according to the following steps:
-    - Prepare data in CSV format
-    - Convert data from CSV format to Qlib format,  please refer to section `Converting CSV Format into Qlib Format <#converting-csv-format-into-qlib-format>`_.
+- If users use ``Qlib`` in US-stock mode, US-stock data is required. ``Qlib`` also provides a script to download US-stock data. Users can use ``Qlib`` in US-stock mode according to the following steps:
+    - Download china-stock in qlib format, please refer to section `Qlib Format Dataset <#qlib-format-dataset>`_.
    - Initialize ``Qlib`` in US-stock mode
        Supposed that users prepare their Qlib format data in the directory ``~/.qlib/csv_data/us_data``. Users only need to initialize ``Qlib`` as follows.
        
@@ -124,9 +194,10 @@ Feature

 - `ExpressionOps`
    `ExpressionOps` will use operator for feature construction.
-    To know more about  ``Operator``, please refer to `Operator API <../reference/api.html>`_.
+    To know more about  ``Operator``, please refer to `Operator API <../reference/api.html#module-qlib.data.ops>`_.
+    Also, ``Qlib`` supports users to define their own custom ``Operator``, an example has been given in ``tests/test_register_ops.py``.

-To know more about  ``Feature``, please refer to `Feature API <../reference/api.html>`_.
+To know more about  ``Feature``, please refer to `Feature API <../reference/api.html#module-qlib.data.base>`_.

 Filter
 -------------------
@@ -139,123 +210,168 @@ Filter
    Expression dynamic instrument filter. Filter the instruments based on a certain expression. An expression rule indicating a certain feature field is required.
    
    - `basic features filter`: rule_expression = '$close/$open>5'
-    - `cross-sectional features filter` : rule_expression = '$rank($close)<10'
+    - `cross-sectional features filter` \: rule_expression = '$rank($close)<10'
    - `time-sequence features filter`: rule_expression = '$Ref($close, 3)>100'

-To know more about ``Filter``, please refer to `Filter API <../reference/api.html>`_.
+To know more about ``Filter``, please refer to `Filter API <../reference/api.html#module-qlib.data.filter>`_.

-
-API
+Reference
 -------------

-To know more about ``Data API``, please refer to `Data API <../reference/api.html>`_.
+To know more about ``Data API``, please refer to `Data API <../reference/api.html#data>`_.
+
+
+Data Loader
+=================
+
+``Data Loader`` in ``Qlib`` is designed to load raw data from the original data source. It will be loaded and used in the ``Data Handler`` module.
+
+QlibDataLoader
+---------------
+
+The ``QlibDataLoader`` class in ``Qlib`` is such an interface that allows users to load raw data from the ``Qlib`` data source.
+
+StaticDataLoader
+---------------
+
+The ``StaticDataLoader`` class in ``Qlib`` is such an interface that allows users to load raw data from file or as provided.
+
+
+Interface
+------------
+
+Here are some interfaces of the ``QlibDataLoader`` class:
+
+.. autoclass:: qlib.data.dataset.loader.DataLoader
+    :members:
+
+API
+-----------
+
+To know more about ``Data Loader``, please refer to `Data Loader API <../reference/api.html#module-qlib.data.dataset.loader>`_.
+

 Data Handler
 =================

-Users can use ``Data Handler`` in an automatic workflow by ``Estimator``, refer to `Estimator <estimator.html>`_ for more details. 
+The ``Data Handler`` module in ``Qlib`` is designed to handler those common data processing methods which will be used by most of the models.

-Also, ``Data Handler`` can be used as an independent module, by which users can easily preprocess data(standardization, remove NaN, etc.) and build datasets. It is a subclass of ``qlib.contrib.estimator.handler.BaseDataHandler``, which provides some interfaces as follows.
+Users can use ``Data Handler`` in an automatic workflow by ``qrun``, refer to `Workflow: Workflow Management <workflow.html>`_ for more details. 

-Base Class & Interface
+DataHandlerLP
+--------------
+
+In addition to use ``Data Handler`` in an automatic workflow with ``qrun``, ``Data Handler`` can be used as an independent module, by which users can easily preprocess data (standardization, remove NaN, etc.) and build datasets. 
+
+In order to achieve so, ``Qlib`` provides a base class `qlib.data.dataset.DataHandlerLP <../reference/api.html#qlib.data.dataset.handler.DataHandlerLP>`_. The core idea of this class is that: we will have some leanable ``Processors`` which can learn the parameters of data processing(e.g., parameters for zscore normalization). When new data comes in, these `trained` ``Processors`` can then process the new data and thus processing real-time data in an efficient way becomes possible. More information about ``Processors`` will be listed in the next subsection.
+
+
+Interface
 ----------------------

-Qlib provides a base class `qlib.contrib.estimator.BaseDataHandler <../reference/api.html#class-qlib.contrib.estimator.BaseDataHandler>`_, which provides the following interfaces:
+Here are some important interfaces that ``DataHandlerLP`` provides:

- `setup_feature`    
-    Implement the interface to load the data features.
+.. autoclass:: qlib.data.dataset.handler.DataHandlerLP
+    :members: __init__, fetch, get_cols

- `setup_label`   
-    Implement the interface to load the data labels and calculate the users' labels. 
+If users want to load features and labels by config, users can inherit ``qlib.data.dataset.handler.ConfigDataHandler``, ``Qlib`` also provides some preprocess method in this subclass.

- `setup_processed_data`    
-    Implement the interface for data preprocessing, such as preparing feature columns, discarding blank lines, and so on.
-
-Qlib also provides two functions to help users init the data handler, users can override them for users' needs.
-
- `_init_kwargs`
-    Users can init the kwargs of the data handler in this function, some kwargs may be used when init the raw df.
-    Kwargs are the other attributes in data.args, like dropna_label, dropna_feature
-
- `_init_raw_df`
-    Users can init the raw df, feature names, and label names of data handler in this function. 
-    If the index of feature df and label df are not same, users need to override this method to merge them (e.g. inner, left, right merge).
-
-If users want to load features and labels by config, users can inherit ``qlib.contrib.estimator.handler.ConfigDataHandler``, ``Qlib`` also have provided some preprocess method in this subclass.
 If users want to use qlib data, `QLibDataHandler` is recommended. Users can inherit their custom class from `QLibDataHandler`, which is also a subclass of `ConfigDataHandler`.


-Usage
--------------
+Processor
+----------

-``Data Handler`` can be used as a single module, which provides the following mehtods:
+The ``Processor`` module in ``Qlib`` is designed to be learnable and it is responsible for handling data processing such as `normalization` and `drop none/nan features/labels`.

- `get_split_data`
-    - According to the start and end dates, return features and labels of the pandas DataFrame type used for the 'Model'
-
- `get_rolling_data`
-    - According to the start and end dates, and `rolling_period`, an iterator is returned, which can be used to traverse the features and labels used for rolling.
+``Qlib`` provides the following ``Processors``:

+- ``DropnaProcessor``: `processor` that drops N/A features.
+- ``DropnaLabel``: `processor` that drops N/A labels.
+- ``TanhProcess``: `processor` that uses `tanh` to process noise data.
+- ``ProcessInf``: `processor` that handles infinity values, it will be replaces by the mean of the column.
+- ``Fillna``: `processor` that handles N/A values, which will fill the N/A value by 0 or other given number.
+- ``MinMaxNorm``: `processor` that applies min-max normalization.
+- ``ZscoreNorm``: `processor` that applies z-score normalization.
+- ``RobustZScoreNorm``: `processor` that applies robust z-score normalization.
+- ``CSZScoreNorm``: `processor` that applies cross sectional z-score normalization.
+- ``CSRankNorm``: `processor` that applies cross sectional rank normalization.
+- ``CSZFillna``: `processor` that fills N/A values in a cross sectional way by the mean of the column.

+Users can also create their own `processor` by inheriting the base class of ``Processor``. Please refer to the implementation of all the processors for more information (`Processor Link <https://github.com/microsoft/qlib/blob/main/qlib/data/dataset/processor.py>`_). 

+To know more about ``Processor``, please refer to `Processor API <../reference/api.html#module-qlib.data.dataset.processor>`_.

 Example
 --------------

-``Data Handler`` can be run with ``estimator`` by modifying the configuration file, and can also be used as a single module. 
+``Data Handler`` can be run with ``qrun`` by modifying the configuration file, and can also be used as a single module. 

-Know more about how to run ``Data Handler`` with ``estimator``, please refer to `Estimator <estimator.html#about-data>`_.
+Know more about how to run ``Data Handler`` with ``qrun``, please refer to `Workflow: Workflow Management <workflow.html>`_

-Qlib provides implemented data handler `QLibDataHandlerClose`. The following example shows how to run `QLibDataHandlerV1` as a single module. 
+Qlib provides implemented data handler `Alpha158`. The following example shows how to run `Alpha158` as a single module.

 .. note:: Users need to initialize ``Qlib`` with `qlib.init` first, please refer to `initialization <../start/initialization.html>`_.


 .. code-block:: Python

-    from qlib.contrib.estimator.handler import QLibDataHandlerClose
-    from qlib.contrib.model.gbdt import LGBModel
+    import qlib
+    from qlib.contrib.data.handler import Alpha158

-    DATA_HANDLER_CONFIG = {
-        "dropna_label": True,
-        "start_date": "2007-01-01",
-        "end_date": "2020-08-01",
-        "market": "csi300",
+    data_handler_config = {
+        "start_time": "2008-01-01",
+        "end_time": "2020-08-01",
+        "fit_start_time": "2008-01-01",
+        "fit_end_time": "2014-12-31",
+        "instruments": "csi300",
    }

-    TRAINER_CONFIG = {
-        "train_start_date": "2007-01-01",
-        "train_end_date": "2014-12-31",
-        "validate_start_date": "2015-01-01",
-        "validate_end_date": "2016-12-31",
-        "test_start_date": "2017-01-01",
-        "test_end_date": "2020-08-01",
-    }
+    if __name__ == "__main__":
+        qlib.init()
+        h = Alpha158(**data_handler_config)

-    exampleDataHandler = QLibDataHandlerClose(**DATA_HANDLER_CONFIG)
+        # get all the columns of the data
+        print(h.get_cols())

-    # example of 'get_split_data'
-    x_train, y_train, x_validate, y_validate, x_test, y_test = exampleDataHandler.get_split_data(**TRAINER_CONFIG)
+        # fetch all the labels
+        print(h.fetch(col_set="label"))

-    # example of 'get_rolling_data'
-
-    for (x_train, y_train, x_validate, y_validate, x_test, y_test) in exampleDataHandler.get_rolling_data(**TRAINER_CONFIG):
-        print(x_train, y_train, x_validate, y_validate, x_test, y_test) 
-
-
-.. note:: (x_train, y_train, x_validate, y_validate, x_test, y_test) can be used as arguments for the ``fit``, ``predict``, and ``score`` methods of the 'Model' , please refer to `Model <model.html#Interface>`_.
-
-Also, the above example has been given in ``examples.estimator.train_backtest_analyze.ipynb``.
+        # fetch all the features
+        print(h.fetch(col_set="feature"))

 API
 ---------

-To know more about ``Data Handler``, please refer to `Data Handler API <../reference/api.html#handler>`_.
+To know more about ``Data Handler``, please refer to `Data Handler API <../reference/api.html#module-qlib.data.dataset.handler>`_.
+
+
+Dataset
+=================
+
+The ``Dataset`` module in ``Qlib`` aims to prepare data for model training and inferencing.
+
+The motivation of this module is that we want to maximize the flexibility of of different models to handle data that are suitable for themselves. This module gives the model the flexibility to process their data in an unique way. For instance, models such as ``GBDT`` may work well on data that contains `nan` or `None` value, while neural networks such as ``MLP`` will break down on such data. 
+
+If user's model need process its data in a different way, user could implement his own ``Dataset`` class. If the model's
+data processing is not special, ``DatasetH`` can be used directly.
+
+The ``DatasetH`` class is the `dataset` with `Data Handler`. Here is the most important interface of the class:
+
+.. autoclass:: qlib.data.dataset.__init__.DatasetH
+    :members:
+
+API
+---------
+
+To know more about ``Dataset``, please refer to `Dataset API <../reference/api.html#module-qlib.data.dataset.__init__>`_.
+
+

 Cache
 ==========

-``Cache`` is an optional module that helps accelerate providing data by saving some frequently-used data as cache file. ``Qlib`` provides a `Memcache` class to cache the most-frequently-used data in memory, an inheritable `ExpressionCache` class and an inheritable `DatasetCache` class.
+``Cache`` is an optional module that helps accelerate providing data by saving some frequently-used data as cache file. ``Qlib`` provides a `Memcache` class to cache the most-frequently-used data in memory, an inheritable `ExpressionCache` class, and an inheritable `DatasetCache` class.

 Global Memory Cache
 ---------------------
@@ -297,14 +413,14 @@ The following shows the details about the interfaces:
 .. autoclass:: qlib.data.cache.DatasetCache
    :members:

-``Qlib`` has currently provided implemented disk cache `DiskDatasetCache` which inherits from `DatasetCache` . The datasets data will be stored in the disk.
+``Qlib`` has currently provided implemented disk cache `DiskDatasetCache` which inherits from `DatasetCache` . The datasets' data will be stored in the disk.



 Data and Cache File Structure
 ==================================

-We've specially designed a file structure to manage data and cache, please refer to the `File storage design section in Qlib paper <https://arxiv.org/abs/2009.11189>`_ for detailed information.The file structure of data and cache is listed as follows.
+We've specially designed a file structure to manage data and cache, please refer to the `File storage design section in Qlib paper <https://arxiv.org/abs/2009.11189>`_ for detailed information. The file structure of data and cache is listed as follows.

 .. code-block:: json

@@ -336,5 +452,3 @@ We've specially designed a file structure to manage data and cache, please refer
                - .index : an assorted index file recording the line index of all calendars
            - ...

-
-.. TODO: refer to paper
--- a/docs/component/estimator.rst
+++ b/docs/component/estimator.rst
@@ -1,692 +0,0 @@
-.. _estimator:
-=================================
-Estimator: Workflow Management
-=================================
-.. currentmodule:: qlib
-
-Introduction
-===================
-
-The components in `Qlib Framework <../introduction/introduction.html#framework>`_ are designed in a loosely-coupled way. Users could build their own Quant research workflow with these components like `Example <https://github.com/microsoft/qlib/blob/main/examples/train_and_backtest.py>`_
-
-
-Besides, ``Qlib`` provides more user-friendly interfaces named ``Estimator`` to automatically run the whole workflow defined by configuration.  A concrete execution of the whole workflow is called an `experiment`.
-With ``Estimator``, user can easily run an `experiment`, which includes the following steps:
-
- Data
-    - Loading
-    - Processing
-    - Slicing
- Model
-    - Training and inference(static or rolling)
-    - Saving & loading
- Evaluation(Back-testing)
-
-For each `experiment`, ``Qlib`` will capture the model training details, performance evaluation results and basic information (e.g. names, ids). The captured data will be stored in backend-storage (disk or database).
-
-Complete Example
-===================
-
-Before getting into details, here is a complete example of ``Estimator``, which defines the workflow in typical Quant research.
-Below is a typical config file of ``Estimator``.
-
-.. code-block:: YAML
-
-    experiment:
-      name: estimator_example
-      observer_type: file_storage
-      mode: train
-    model:
-      class: LGBModel
-      module_path: qlib.contrib.model.gbdt
-      args:
-        loss: mse
-        colsample_bytree: 0.8879
-        learning_rate: 0.0421
-        subsample: 0.8789
-        lambda_l1: 205.6999
-        lambda_l2: 580.9768
-        max_depth: 8
-        num_leaves: 210
-        num_threads: 20
-    data:
-      class: QLibDataHandlerClose
-      args:
-        dropna_label: True
-      filter:
-        market: csi500
-    trainer:
-      class: StaticTrainer
-      args:
-        rolling_period: 360
-        train_start_date: 2007-01-01
-        train_end_date: 2014-12-31
-        validate_start_date: 2015-01-01
-        validate_end_date: 2016-12-31
-        test_start_date: 2017-01-01
-        test_end_date: 2020-08-01
-    strategy:
-      class: TopkDropoutStrategy
-      args:
-        topk: 50
-        n_drop: 5
-    backtest:
-      normal_backtest_args:
-        verbose: False
-        limit_threshold: 0.095
-        account: 100000000
-        benchmark: SH000905
-        deal_price: close
-        open_cost: 0.0005
-        close_cost: 0.0015
-        min_cost: 5
-    qlib_data:
-      # when testing, please modify the following parameters according to the specific environment
-      provider_uri: "~/.qlib/qlib_data/cn_data"
-      region: "cn"
-
-After saving the config into `configuration.yaml`, users could start the workflow and test their ideas with a single command below.
-
-.. code-block:: bash
-
-    estimator -c configuration.yaml
-
-.. note:: `estimator` will be placed in your $PATH directory when installing ``Qlib``.
-
-
-
-Configuration File
-===================
-
-Let's get into details of ``Estimator`` in this section.
-
-Before using ``estimator``, users need to prepare a configuration file. The following content shows how to prepare each part of the configuration file.
-
-Experiment Section
--------------------
-
-At first, the configuration file needs to contain a section named `experiment` about the basic information. This section describes how `estimator` tracks and persists current `experiment`. ``Qlib`` used `sacred`, a lightweight open-source tool, to configure, organize, generate logs, and manage experiment results. Partial behaviors of `sacred` will base on the `experiment` section.
-
-Following files will be saved by `sacred` after `estimator` finish an `experiment`:
-
- `model.bin`, model binary file
- `pred.pkl`, model prediction result file
- `analysis.pkl`, backtest performance analysis file
- `positions.pkl`, backtest position records file
- `run`, the experiment information object, usually contains some meta information such as the experiment name, experiment date, etc.
-
-Here is the typical configuration of `experiment section`
-
-.. code-block:: YAML
-
-     experiment:
-        name: test_experiment
-        observer_type: mongo
-        mongo_url: mongodb://MONGO_URL
-        db_name: public
-        finetune: false
-        exp_info_path: /home/test_user/exp_info.json
-        mode: test
-        loader:
-            id: 677
-	
-
-The meaning of each field is as follows:
-
- `name`   
-    The experiment name, str type, `sacred <https://github.com/IDSIA/sacred>_` will use this experiment name as an identifier for some important internal processes. Users can find this field in `run` object of `sacred`.  The default value is `test_experiment`.
-
- `observer_type`
-    Observer type, str type, there are two choices which include `file_storage` and `mongo` respectively. If `file_storage` is selected, all the above-mentioned managed contents will be stored in the `dir` directory, separated by the number of times of experiments as a subfolder. If it is `mongo`, the content will be stored in the database. The default is `file_storage`.
-
-    - For `file_storage` observer.
-        - `dir`
-            Directory URL, str type, directory for `file_storage` observer type, files captured and managed by sacred with `file_storage` observer will be saved to this directory, which is the same directory as `config.json` by default.
-
-    - For `mongo` observer.
-        - `mongo_url`
-            Database URL, str type, required if the observer type is `mongo`.
-
-        - `db_name`    
-            Database name, str type, required if the observer type is `mongo`.
-
- `finetune`
-    ``Estimator``'s behaviors to train models will base on this flag.
-    If you just want to train models from scratch each time instead of based on existing models, please leave `finetune=false`. Otherwise please read the
-    details below.
-
-    The following table is the processing logic for different situations.
-
-    ==========  ===========================================   ====================================    ===========================================  ==========================================
-      .            Static                                                                             Rolling
-      .            finetune:true                              finetune:false                          finetune:true                                finetune:false
-    ==========  ===========================================   ====================================    ===========================================  ==========================================
-    Train       - Need to provide model (Static or Rolling)   - No need to provide model              - Need to provide model (Static or Rolling)  - Need to provide model (Static or Rolling)
-                - The args in model section will be           - The args in model section will be     - The args in model section will be          - The args in model section will be
-                  used for finetuning                           used for training                       used for finetuning                          used for finetuning
-                - Update based on the provided model          - Train model from scratch              - Update based on the provided model         - Based on the provided model update
-                  and parameters                                                                        and parameters                             - Train model from scratch
-                                                                                                      - **Each rolling time slice is based on**    - **Train each rolling time slice**
-                                                                                                        **a model updated from the previous**        **separately**
-                                                                                                        **time**        
-    Test        - Model must exist, otherwise an exception will be raised.
-                - For `StaticTrainer`, users need to train a model and record 'exp_info' for 'Test'.
-                - For `RollingTrainer`, users need to train a set of models until the latest time, and record 'exp_info' for 'Test'.
-    ==========  =============================================================================================================================================================================
-
-    .. note::
-
-        1. finetune parameters: share model.args parameters.
-
-        2. provide model: from `loader.model_index`, load the index of the model(starting from 0).
-
-        3. If `loader.model_index` is None:
-            - In 'Static Finetune=True', if provide 'Rolling', use the last model to update.
-
-            - For `RollingTrainer` with Finetune=True.
-
-                - If `StaticTrainer` is used in loader, the model will be used for initialization for finetuning.
-
-                - If `RollingTrainer` is used in loader, the existing models will be used without any modification and the new models will be initialized with the model in the last period and finetune one by one.
-
-
- `exp_info_path`
-    save path of experiment info, str type, save the experiment info and model `prediction score` after the experiment is finished. Optional parameter, the default value is `<config_file_dir>/ex_name/exp_info.json`.
-
- `mode`
-    `train` or `test`, str type.
-        - `test mode` is designed for inference. Under `test mode`, it will load the model according to the parameters of `loader` and skip model training.
-        - `train model`  is the default value. It will train new models by default and 
-    Please note that when it fails to load model, it will fall back to `fit` model.
-    
-    .. note::
-
-        if users choose ` test mode`, they need to make sure:
-        - The loader of `test_start_date` must be less than or equal to the current `test_start_date`.
-        - If other parameters of the `loader` model args are different, a warning will appear.
-
-
- `loader`
-    If you just want to train models from scratch each time instead of based on existing models, please ignore `loader` section. Otherwise please read the
-    details below.
-
-    The `loader` section only works when the `mode` is `test` or `finetune` is `true`.
-
-    - `model_index`
-        Model index, int type. The index of the loaded model in loader_models (starting at 0) for the first `finetune`. The default value is None.
-
-    - `exp_info_path`
-        Loader model experiment info path, str type. If the field exists, the following parameters will be parsed from `exp_info_path`, and the following parameters will not work. One of this field and `id` must exist at least .
-
-    - `id`
-        The experiment id of the model that needs to be loaded, int type. If the `mode` is `test`, this value is required. This field and `exp_info_path` must exist one.
-
-    - `name`
-        The experiment name of the model that needs to be loaded, str type. The default value is the current experiment `name`.
-
-    - `observer_type`
-        The experiment observer type of the model that needs to be loaded, str type. The default value is the current experiment `observer_type`.
-	
-        .. note:: The observer type is a concept of the `sacred` module, which determines how files, standard input, and output which are managed by sacred are stored.
-        
-        
-        - `file_storage`
-            If `observer_type` is `file_storage`, the config may be as follows.
-
-            .. code-block:: YAML
-
-                experiment:
-                    name: test_experiment
-                    dir: <path to a directory> # default is dir of `config.yml`
-                    observer_type: file_storage
-        - `mongo`
-            If `observer_type` is `mongo`, the config may be as follows.
-
-            .. code-block:: YAML
-
-                experiment:
-                    name: test_experiment
-                    observer_type: mongo
-                    mongo_url: mongodb://MONGO_URL
-                    db_name: public
-
-            Users need to indicate `mongo_url` and `db_name` for a mongo observer.
-            
-            .. note::
-
-                If users choose the mongo observer, they need to make sure:
-                    - Have an environment with the mongodb installed and a mongo database dedicated to storing the results of the experiments.
-                    - The python environment (the version of python and package) to run the experiments and the one to fetch the results are consistent.
-
-Model Section
-----------------
-
-Users can use a specified model by configuration with hyper-parameters.
-
-Custom Models
-~~~~~~~~~~~~~~~~~
-
-Qlib supports custom models, but it must be a subclass of the `qlib.contrib.model.Model`, the config for a custom model may be as following.
-
-.. code-block:: YAML
-
-    model:
-        class: SomeModel
-        module_path: /tmp/my_experment/custom_model.py
-        args:
-            loss: binary
-
-
-The class `SomeModel` should be in the module `custom_model`, and ``Qlib`` could parse the `module_path` to load the class.
-
-To know more about ``Model``, please refer to `Model <model.html>`_.
-
-Data Section
-----------------
-
-``Data Handler`` can be used to load raw data, prepare features and label columns, preprocess data (standardization, remove NaN, etc.), split training, validation, and test sets. It is a subclass of `qlib.contrib.estimator.handler.BaseDataHandler`.
-
-Users can use the specified data handler by config as follows.
-
-.. code-block:: YAML
-
-    data:
-        class: QLibDataHandlerClose
-        args:
-            start_date: 2005-01-01
-            end_date: 2018-04-30  
-            dropna_label: True
-        filter:
-            market: csi500
-            filter_pipeline:
-              -
-                class: NameDFilter
-                module_path: qlib.filter
-                args:
-                  name_rule_re: S(?!Z3)
-                  fstart_time: 2018-01-01
-                  fend_time: 2018-12-11
-              -
-                class: ExpressionDFilter
-                module_path: qlib.filter
-                args:
-                  rule_expression: $open/$factor<=45
-                  fstart_time: 2018-01-01
-                  fend_time: 2018-12-11
-
- `class`    
-    Data handler class, str type, which should be a subclass of `qlib.contrib.estimator.handler.BaseDataHandler`, and implements 5 important interfaces for loading features, loading raw data, preprocessing raw data, slicing train, validation, and test data. The default value is `ALPHA360`. If users want to write a data handler to retrieve the data in ``Qlib``, `QlibDataHandler` is suggested.
-
- `module_path`    
-   The module path, str type, absolute url is also supported, indicates the path of the `class` implementation of the data processor class. The default value is `qlib.contrib.estimator.handler`.
-
- `args`
-    Parameters used for ``Data Handler`` initialization.
-
-    - `train_start_date`
-        Training start time, str type, the default value is `2005-01-01`.
-
-    - `start_date`
-        Data start date, str type. 
-
-    - `end_date`
-        Data end date, str type. the data from start_date to end_date decides which part of data will be loaded in `datahandler`, users can only use these data in the following parts.
-
-    - `dropna_feature` (Optional in args)
-        Drop Nan feature, bool type, the default value is False. 
-
-    - `dropna_label` (Optional in args)
-        Drop Nan label, bool type, the default value is True. Some multi-label tasks will use this.
-
-    - `normalize_method` (Optional in args)
-        Normalize data by a given method. str type. ``Qlib`` gives two normalizing methods, `MinMax` and `Std`.
-        If users want to build their own method, please override `_process_normalize_feature`.
-  
- `filter`
-    Dynamically filtering the stocks based on the filter pipeline.
-
-    - `market`
-        index name, str type, the default value is `csi500`.
-
-    - `filter_pipeline`
-        Filter rule list, list type, the default value is []. Can be customized according to users' needs.
-
-        - `class`
-            Filter class name, str type.
-
-        - `module_path`
-            The module path, str type.
-
-        - `args`
-            The filter class parameters, these parameters are set according to the `class`, and all the parameters as kwargs to `class`.
-
-Custom Data Handler
-~~~~~~~~~~~~~~~~~~~~~~
-
-Qlib support custom data handler, but it must be a subclass of the ``qlib.contrib.estimator.handler.BaseDataHandler``, the config for custom data handler may be as follows.
-
-.. code-block:: YAML
-
-    data:
-        class: SomeDataHandler
-        module_path: /tmp/my_experment/custom_data_handler.py
-        args:
-            start_date: 2005-01-01
-            end_date: 2018-04-30  
-
-The class `SomeDataHandler` should be in the module `custom_data_handler`, and ``Qlib`` could parse the `module_path` to load the class.
-
-If users want to load features and labels by config, they can inherit ``qlib.contrib.estimator.handler.ConfigDataHandler``, ``Qlib`` also has provided some preprocess methods in this subclass.
-If users want to use qlib data, `QLibDataHandler` is recommended, from which users can inherit the custom class. `QLibDataHandler` is also a subclass of `ConfigDataHandler`.
-
-To know more about ``Data Handler``, please refer to `Data Framework&Usage <data.html>`_.
-
-Trainer Section
-----------------
-
-Users can specify the trainer ``Trainer`` by the config file, which is a subclass of ``qlib.contrib.estimator.trainer.BaseTrainer`` and implement three important interfaces for training the model, restoring the model, and getting model predictions as follows.
-
- `train`    
-    Implement this interface to train the model.
-
- `load`   
-    Implement this interface to recover the model from disk.
-
- `get_pred`   
-    Implement this interface to get model prediction results.
-
-Qlib have provided two implemented trainer,
-
- `StaticTrainer`   
-    The static trainer will be trained using the training, validation, and test data of the data processor static slicing.
-
- `RollingTrainer`    
-    The rolling trainer will use the rolling iterator of the data processor to split data for rolling training.
-
-
-Users can specify `trainer` with the configuration file:
-
-.. code-block:: YAML
-
-    trainer:
-        class: StaticTrainer # or RollingTrainer
-        args:
-            rolling_period: 360
-            train_start_date: 2005-01-01
-            train_end_date: 2014-12-31
-            validate_start_date: 2015-01-01 
-            validate_end_date: 2016-06-30
-            test_start_date: 2016-07-01
-            test_end_date: 2017-07-31
-
- `class`   
-    Trainer class, which should be a subclass of `qlib.contrib.estimator.trainer.BaseTrainer`, and needs to implement three important interfaces, the default value is `StaticTrainer`.
-
- `module_path`    
-    The module path, str type, absolute url is also supported, indicates the path of the trainer class implementation.
-
- `args`
-    Parameters used for ``Trainer`` initialization.
-
-    - `rolling_period`    
-        The rolling period, integer type, indicates how many time steps need rolling when rolling the data. The default value is `60`. Only used in `RollingTrainer`.
-
-    - `train_start_date`
-        Training start time, str type.
-
-    - `train_end_date`      
-        Training end time, str type.
-
-    - `validate_start_date`    
-        Validation start time, str type.
-
-    - `validate_end_date`    
-        Validation end time, str type.
-
-    - `test_start_date`    
-        Test start time, str type.
-
-    - `test_end_date`     
-        Test end time, str type. If `test_end_date` is `-1` or greater than the last date of the data, the last date of the data will be used as `test_end_date`.
-
-Custom Trainer
-~~~~~~~~~~~~~~~~~~
-
-Qlib supports custom trainer, but it must be a subclass of the `qlib.contrib.estimator.trainer.BaseTrainer`, the config for a custom trainer may be as following:
-
-.. code-block:: YAML
-
-    trainer:
-        class: SomeTrainer
-        module_path: /tmp/my_experment/custom_trainer.py
-        args:
-            train_start_date: 2005-01-01
-            train_end_date: 2014-12-31
-            validate_start_date: 2015-01-01
-            validate_end_date: 2016-06-30
-            test_start_date: 2016-07-01
-            test_end_date: 2017-07-31
-
-
-The class `SomeTrainer` should be in the module `custom_trainer`, and ``Qlib`` could parse the `module_path` to load the class.
-
-Strategy Section
-----------------
-
-Users can specify strategy through a config file, for example:
-
-.. code-block:: YAML
-
-    strategy :
-        class: TopkDropoutStrategy
-        args:
-            topk: 50
-            n_drop: 5
-
- `class`
-    The strategy class, str type, should be a subclass of `qlib.contrib.strategy.strategy.BaseStrategy`. The default value is `TopkDropoutStrategy`.
-
- `module_path`
-    The module location, str type, absolute url is also supported, and absolute path is also supported, indicates the location of the policy class implementation.
-
- `args`
-    Parameters used for ``Trainer`` initialization.
-
-    - `topk`    
-        The number of stocks in the portfolio
-
-    - `n_drop`    
-        Number of stocks to be replaced in each trading date
-
-Custom Strategy
-^^^^^^^^^^^^^^^^^^^
-
-Qlib supports custom strategy, but it must be a subclass of the ``qlib.contrib.strategy.strategy.BaseStrategy``, the config for custom strategy may be as following:
-
-
-.. code-block:: YAML
-
-    strategy :
-        class: SomeStrategy
-        module_path: /tmp/my_experment/custom_strategy.py
-
-The class `SomeStrategy` should be in the module `custom_strategy`, and ``Qlib`` could parse the `module_path` to load the class.
-
-To know more about ``Strategy``, please refer to `Strategy <strategy.html>`_.
-
-Backtest Section
-----------------
-
-Users can specify `backtest` through a config file, for example:
-
-.. code-block:: YAML
-
-    backtest :
-        normal_backtest_args:
-            topk: 50
-            benchmark: SH000905
-            account: 500000
-            deal_price: close
-            min_cost: 5
-            subscribe_fields:
-              - $close
-              - $change
-              - $factor
-
- `normal_backtest_args`
-    Normal backtest parameters. All the parameters in this section will be passed to the ``qlib.contrib.evaluate.backtest`` function in the form of `**kwargs`.
-
-    - `benchmark`
-        Stock index symbol, str, or list type, the default value is `None`.
-
-        .. note::
-
-            * If `benchmark` is None, it will use the average change of the day of all stocks in 'pred' as the 'bench'.
-
-            * If `benchmark` is list, it will use the daily average change of the stock pool in the list as the 'bench'.
-
-            * If `benchmark` is str, it will use the daily change as the 'bench'.
-
-
-    - `account`
-        Backtest initial cash, integer type. The `account` in `strategy` section is deprecated. It only works when `account` is not set in `backtest` section. It will be overridden by `account` in the `backtest` section. The default value is 1e9.
-
-    - `deal_price`
-        Order transaction price field, str type, the default value is vwap.
-
-    - `min_cost`
-        Min transaction cost, float type, the default value is 5.
-
-    - `subscribe_fields`
-        Subscribe quote fields, array type, the default value is [`deal_price`, $close, $change, $factor].
-
-
-Qlib Data Section
--------------------
-
-The `qlib_data` field describes the parameters of qlib initialization.
-
-.. code-block:: YAML
-
-    qlib_data:
-      # when testing, please modify the following parameters according to the specific environment
-      provider_uri: "~/.qlib/qlib_data/cn_data"
-      region: "cn"
-    
- `provider_uri`
-    The local directory where the data loaded by 'get_data.py' is stored.
- `region`
-    - If region == ``qlib.config.REG_CN``, 'qlib' will be initialized in US-stock mode. 
-    - If region == ``qlib.config.REG_US``, 'qlib' will be initialized in china-stock mode.
-
-Please refer to `Initialization <../start/initialization.html>`_.
-
-Experiment Result
-===================
-
-Form of Experimental Result
----------------------------
-The result of the experiment is also the result of the ``Interdat Trading(Backtest)``, please refer to `Interday Trading <backtest.html>`_.
-
-
-Get Experiment Result
----------------------------
-
-Base Class & Interface
-~~~~~~~~~~~~~~~~~~~~~~~
-
-Users can check the experiment results from file storage directly, or check the experiment results from the database, or get the experiment results through two interfaces of a base class `Fetcher` provided by ``Qlib``.
-
-The `Fetcher` provides the following interface
-    - `get_experiments(self, exp_name=None):`   
-        The interface takes one parameters. The `exp_name` is the experiment name, the default is all experiments. Users can get the returned dictionary with a list of ids and test end date as follows.
-
-        .. code-block:: JSON
-
-            {
-                "ex_a": [
-                    {
-                        "id": 1,
-                        "test_end_date": "2017-01-01"
-                    }
-                ],
-                "ex_b": [
-                    ...
-                ]
-            }
-
-
-    - `get_experiment(exp_name, exp_id, fields=None)`
-        The interface takes three parameters. The first parameter is the experiment name, the second parameter is the experiment id, and the third parameter is list of fields. The default value of `fields` is None, which means all fields.
-        
-
-        .. note::
-            Currently supported fields:
-                ['model', 'analysis', 'positions', 'report_normal', 'pred', 'task_config', 'label']
-
-        Users can get the returned dictionary as follows.
-
-        .. code-block:: JSON
-
-            {
-                'analysis': analysis_df,
-                'pred': pred_df,
-                'positions': positions_dic,
-                'report_normal': report_normal_df,
-            }
-
-Implemented `Fetcher` s & Examples
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-``Qlib`` provides two implemented `Fetcher` s as follows.
-
-`FileFetcher`
-^^^^^^^^^^^^^^^
-
-The `FileFetcher` is a subclass of `Fetcher`, which could fetch files from `file_storage` observer. The following is an example:
-.. code-block:: python
-
-    >>> from qlib.contrib.estimator.fetcher import FileFetcher
-    >>> f = FileFetcher(experiments_dir=r'./')
-    >>> print(f.get_experiments())
-    {
-        'test_experiment': [
-            {
-                'id': '1',
-                'config': ...
-            }, 
-            {   
-                'id': '2',
-                'config': ...
-            }, 
-            {   
-                'id': '3', 
-                'config': ...
-            }
-        ]
-    }
-    >>> print(f.get_experiment('test_experiment', '1'))
-                                                    risk
-    excess_return_without_cost mean               0.000605
-                               std                0.005481
-                               annualized_return  0.152373
-                               information_ratio  1.751319
-                               max_drawdown      -0.059055
-    excess_return_with_cost    mean               0.000410
-                               std                0.005478
-                               annualized_return  0.103265
-                               information_ratio  1.187411
-                               max_drawdown      -0.075024
-
-
-
-`MongoFetcher`
-^^^^^^^^^^^^^^^
-
-The `FileFetcher` is a subclass of `Fetcher`, which could fetch files from `mongo` observer. Users should initialize the fetcher with `mongo_url`. The following is an example:
-
-.. code-block:: python
-
-    >>> from qlib.contrib.estimator.fetcher import MongoFetcher
-    >>> f = MongoFetcher(mongo_url=..., db_name=...)
-
--- a/docs/component/model.rst
+++ b/docs/component/model.rst
@@ -1,170 +1,109 @@
 .. _model:
+
 ============================================
-Interday Model: Model Training & Prediction
+Forecast Model: Model Training & Prediction
 ============================================

 Introduction
 ===================

-``Interday Model`` is designed to make the `prediction score` about stocks. Users can use the ``Interday Model`` in an automatic workflow by ``Estimator``, please refer to `Estimator <estimator.html>`_.  
+``Forecast Model`` is designed to make the `prediction score` about stocks. Users can use the ``Forecast Model`` in an automatic workflow by ``qrun``, please refer to `Workflow: Workflow Management <workflow.html>`_.  

-Because the components in ``Qlib`` are designed in a loosely-coupled way, ``Interday Model`` can be used as an independent module also.
+Because the components in ``Qlib`` are designed in a loosely-coupled way, ``Forecast Model`` can be used as an independent module also.

 Base Class & Interface
 ======================

-``Qlib`` provides a base class `qlib.contrib.model.base.Model <../reference/api.html#module-qlib.contrib.model.base>`_ from which all models should inherit.
+``Qlib`` provides a base class `qlib.model.base.Model <../reference/api.html#module-qlib.model.base>`_ from which all models should inherit.

 The base class provides the following interfaces:

- `__init__(**kwargs)`
-    - Initialization.
-    - If users use ``Estimator`` to start an `experiment`, the parameter of `__init__` method shoule be consistent with the hyperparameters in the configuration file.
+.. autoclass:: qlib.model.base.Model
+    :members:

- `fit(self, x_train, y_train, x_valid, y_valid, w_train=None, w_valid=None, **kwargs)`
-    - Train model.
-    - Parameter:
-        - `x_train`, pd.DataFrame type, train feature
-            The following example explains the value of `x_train`:
-
-            .. code-block:: YAML
-                                
-                                        KMID      KLEN      KMID2     KUP       KUP2
-                instrument  datetime                                                       
-                SH600004    2012-01-04  0.000000  0.017685  0.000000  0.012862  0.727275   
-                            2012-01-05 -0.006473  0.025890 -0.250001  0.012945  0.499998   
-                            2012-01-06  0.008117  0.019481  0.416666  0.008117  0.416666   
-                            2012-01-09  0.016051  0.025682  0.624998  0.006421  0.250001   
-                            2012-01-10  0.017323  0.026772  0.647057  0.003150  0.117648   
-                ...                         ...       ...       ...       ...       ...   
-                SZ300273    2014-12-25 -0.005295  0.038697 -0.136843  0.016293  0.421052   
-                            2014-12-26 -0.022486  0.041701 -0.539215  0.002453  0.058824   
-                            2014-12-29 -0.031526  0.039092 -0.806451  0.000000  0.000000   
-                            2014-12-30 -0.010000  0.032174 -0.310811  0.013913  0.432433   
-                            2014-12-31  0.010917  0.020087  0.543479  0.001310  0.065216   
-
-            
-            `x_train` is a pandas DataFrame, whose index is MultiIndex <instrument(str), datetime(pd.Timestamp)>. Each column of `x_train` corresponds to a feature, and the column name is the feature name. 
-            
-            .. note::
-            
-                The number and names of the columns are determined by the data handler, please refer to `Data Handler <data.html#data-handler>`_ and `Estimator Data <estimator.html#about-data>`_.
-            
-        - `y_train`, pd.DataFrame type, train label
-            The following example explains the value of `y_train`:
-
-             .. code-block:: YAML
-                                
-                                        LABEL
-                instrument  datetime            
-                SH600004    2012-01-04 -0.798456
-                            2012-01-05 -1.366716
-                            2012-01-06 -0.491026
-                            2012-01-09  0.296900
-                            2012-01-10  0.501426
-                ...                         ...
-                SZ300273    2014-12-25 -0.465540
-                            2014-12-26  0.233864
-                            2014-12-29  0.471368
-                            2014-12-30  0.411914
-                            2014-12-31  1.342723
-            
-            `y_train` is a pandas DataFrame, whose index is MultiIndex <instrument(str), datetime(pd.Timestamp)>. The `LABEL` column represents the value of train label.
-
-            .. note::
-
-                The number and names of the columns are determined by the ``Data Handler``, please refer to `Data Handler <data.html#data-handler>`_.
-
-        - `x_valid`, pd.DataFrame type, validation feature
-            The format of `x_valid` is same as `x_train`
-
-
-        - `y_valid`, pd.DataFrame type, validation label
-            The format of `y_valid` is same as `y_train`
-
-        - `w_train`(Optional args, default is None), pd.DataFrame type, train weight
-            `w_train` is a pandas DataFrame, whose shape and index is same as `x_train`. The float value in `w_train` represents the weight of the feature at the same position in `x_train`.
-
-        - `w_train`(Optional args, default is None), pd.DataFrame type, validation weight
-            `w_train` is a pandas DataFrame, whose shape and index is the same as `x_valid`. The float value in `w_train` represents the weight of the feature at the same position in `x_train`.
-
- `predict(self, x_test, **kwargs)`
-    - Predict test data 'x_test'
-    - Parameter:
-        - `x_test`, pd.DataFrame type, test features
-            The form of `x_test` is same as `x_train` in 'fit' method.
-    - Return: 
-        - `label`, np.ndarray type, test label
-            The label of `x_test` that predicted by model.
-
- `score(self, x_test, y_test, w_test=None, **kwargs)`
-    - Evaluate model with test feature/label
-    - Parameter:
-        - `x_test`, pd.DataFrame type, test feature
-            The format of `x_test` is same as `x_train` in `fit` method.
-        
-        - `x_test`, pd.DataFrame type, test label
-            The format of `y_test` is same as `y_train` in `fit` method.
-
-        - `w_test`, pd.DataFrame type, test weight
-            The format of `w_test` is same as `w_train` in `fit` method.
-    - Return: float type, evaluation score
-
-For other interfaces such as `save`, `load`, `finetune`, please refer to `Model API <../reference/api.html#module-qlib.contrib.model.base>`_.
+``Qlib`` also provides a base class `qlib.model.base.ModelFT <../reference/api.html#qlib.model.base.ModelFT>`_, which includes the method for finetuning the model.
+    
+For other interfaces such as `finetune`, please refer to `Model API <../reference/api.html#module-qlib.model.base>`_.

 Example
 ==================

-``Qlib`` provides ``LightGBM`` and ``DNN`` models as the baseline, the following steps show how to run`` LightGBM`` as an independent module.
+``Qlib``'s `Model Zoo` includes models such as ``LightGBM``, ``MLP``, ``LSTM``, etc.. These models are treated as the baselines of ``Forecast Model``. The following steps show how to run`` LightGBM`` as an independent module.

- Initialize ``Qlib`` with `qlib.init` first, please refer to `initialization <../start/initialization.html>`_.
+- Initialize ``Qlib`` with `qlib.init` first, please refer to `Initialization <../start/initialization.html>`_.
 - Run the following code to get the `prediction score` `pred_score`
    .. code-block:: Python

-        from qlib.contrib.estimator.handler import QLibDataHandlerClose
        from qlib.contrib.model.gbdt import LGBModel
+        from qlib.contrib.data.handler import Alpha158
+        from qlib.utils import init_instance_by_config, flatten_dict
+        from qlib.workflow import R
+        from qlib.workflow.record_temp import SignalRecord, PortAnaRecord

-        DATA_HANDLER_CONFIG = {
-            "dropna_label": True,
-            "start_date": "2007-01-01",
-            "end_date": "2020-08-01",
-            "market": MARKET,
+        market = "csi300"
+        benchmark = "SH000300"
+
+        data_handler_config = {
+            "start_time": "2008-01-01",
+            "end_time": "2020-08-01",
+            "fit_start_time": "2008-01-01",
+            "fit_end_time": "2014-12-31",
+            "instruments": market,
        }

-        TRAINER_CONFIG = {
-            "train_start_date": "2007-01-01",
-            "train_end_date": "2014-12-31",
-            "validate_start_date": "2015-01-01",
-            "validate_end_date": "2016-12-31",
-            "test_start_date": "2017-01-01",
-            "test_end_date": "2020-08-01",
+        task = {
+            "model": {
+                "class": "LGBModel",
+                "module_path": "qlib.contrib.model.gbdt",
+                "kwargs": {
+                    "loss": "mse",
+                    "colsample_bytree": 0.8879,
+                    "learning_rate": 0.0421,
+                    "subsample": 0.8789,
+                    "lambda_l1": 205.6999,
+                    "lambda_l2": 580.9768,
+                    "max_depth": 8,
+                    "num_leaves": 210,
+                    "num_threads": 20,
+                },
+            },
+            "dataset": {
+                "class": "DatasetH",
+                "module_path": "qlib.data.dataset",
+                "kwargs": {
+                    "handler": {
+                        "class": "Alpha158",
+                        "module_path": "qlib.contrib.data.handler",
+                        "kwargs": data_handler_config,
+                    },
+                    "segments": {
+                        "train": ("2008-01-01", "2014-12-31"),
+                        "valid": ("2015-01-01", "2016-12-31"),
+                        "test": ("2017-01-01", "2020-08-01"),
+                    },
+                },
+            },
        }
+        
+        # model initiaiton
+        model = init_instance_by_config(task["model"])
+        dataset = init_instance_by_config(task["dataset"])

-        x_train, y_train, x_validate, y_validate, x_test, y_test = QLibDataHandlerClose(
-            **DATA_HANDLER_CONFIG
-        ).get_split_data(**TRAINER_CONFIG)
+        # start exp
+        with R.start(experiment_name="workflow"):
+            # train
+            R.log_params(**flatten_dict(task))
+            model.fit(dataset)

+            # prediction
+            recorder = R.get_recorder()
+            sr = SignalRecord(model, dataset, recorder)
+            sr.generate()

-        MODEL_CONFIG = {
-            "loss": "mse",
-            "colsample_bytree": 0.8879,
-            "learning_rate": 0.0421,
-            "subsample": 0.8789,
-            "lambda_l1": 205.6999,
-            "lambda_l2": 580.9768,
-            "max_depth": 8,
-            "num_leaves": 210,
-            "num_threads": 20,
-        }
-        # use default model
-        # custom Model, refer to: TODO: Model API url
-        model = LGBModel(**MODEL_CONFIG)
-        model.fit(x_train, y_train, x_validate, y_validate)
-        _pred = model.predict(x_test)
-        pred_score = pd.DataFrame(index=_pred.index)
-        pred_score["score"] = _pred.iloc(axis=1)[0]
-
-    .. note:: `QLibDataHandlerClose` is the data handler provided by ``Qlib``, please refer to `Data Handler <data.html#data-handler>`_.
+    .. note:: 
+        
+        `Alpha158` is the data handler provided by ``Qlib``, please refer to `Data Handler <data.html#data-handler>`_.
+        `SignalRecord` is the `Record Template` in ``Qlib``, please refer to `Workflow <recorder.html#record-template>`_.

 Also, the above example has been given in ``examples/train_backtest_analyze.ipynb``.

@@ -176,4 +115,4 @@ Qlib supports custom models. If users are interested in customizing their own mo

 API
 ===================
-Please refer to `Model API <../reference/api.html#module-qlib.contrib.model.base>`_.
+Please refer to `Model API <../reference/api.html#module-qlib.model.base>`_.
--- a/docs/component/recorder.rst
+++ b/docs/component/recorder.rst
@@ -0,0 +1,99 @@
+.. _recorder:
+
+====================================
+Qlib Recorder: Experiment Management
+====================================
+.. currentmodule:: qlib
+
+Introduction
+===================
+``Qlib`` contains an experiment management system named ``QlibRecorder``, which is designed to help users handle experiment and analyse results in an efficient way. 
+
+There are three components of the system:
+
+- `ExperimentManager`
+    a class that manages experiments.
+
+- `Experiment`
+    a class of experiment, and each instance of it is responsible for a single experiment.
+
+- `Recorder`
+    a class of recorder, and each instance of it is responsible for a single run.
+
+Here is a general view of the structure of the system:
+
+.. code-block::
+
+    ExperimentManager
+        - Experiment 1
+            - Recorder 1
+            - Recorder 2
+            - ...
+        - Experiment 2
+            - Recorder 1
+            - Recorder 2
+            - ...
+        - ...
+This experiment management system defines a set of interface and provided a concrete implementation ``MLflowExpManager``, which is based on the machine learning platform: ``MLFlow`` (`link <https://mlflow.org/>`_). 
+
+If users set the implementation of ``ExpManager`` to be ``MLflowExpManager``, they can use the command `mlflow ui` to visualize and check the experiment results. For more information, pleaes refer to the related documents `here <https://www.mlflow.org/docs/latest/cli.html#mlflow-ui>`_.
+
+Qlib Recorder
+===================
+``QlibRecorder`` provides a high level API for users to use the experiment management system. The interfaces are wrapped in the variable ``R`` in ``Qlib``, and users can directly use ``R`` to interact with the system. The following command shows how to import ``R`` in Python:
+
+.. code-block:: Python
+
+        from qlib.workflow import R
+
+``QlibRecorder`` includes several common API for managing `experiments` and `recorders` within a workflow. For more available APIs, please refer to the following section about `Experiment Manager`, `Experiment` and `Recorder`.
+
+Here are the available interfaces of ``QlibRecorder``:
+
+.. autoclass:: qlib.workflow.__init__.QlibRecorder
+    :members:
+
+Experiment Manager
+===================
+
+The ``ExpManager`` module in ``Qlib`` is responsible for managing different experiments. Most of the APIs of ``ExpManager`` are similar to ``QlibRecorder``, and the most important API will be the ``get_exp`` method. User can directly refer to the documents above for some detailed information about how to use the ``get_exp`` method.
+
+.. autoclass:: qlib.workflow.expm.ExpManager
+    :members: get_exp, list_experiments
+
+For other interfaces such as `create_exp`, `delete_exp`, please refer to `Experiment Manager API <../reference/api.html#experiment-manager>`_.
+
+Experiment
+===================
+
+The ``Experiment`` class is solely responsible for a single experiment, and it will handle any operations that are related to an experiment. Basic methods such as `start`, `end` an experiment are included. Besides, methods related to `recorders` are also available: such methods include `get_recorder` and `list_recorders`.
+
+.. autoclass:: qlib.workflow.exp.Experiment
+    :members: get_recorder, list_recorders
+
+For other interfaces such as `search_records`, `delete_recorder`, please refer to `Experiment API <../reference/api.html#experiment>`_.
+
+``Qlib`` also provides a default ``Experiment``, which will be created and used under certain situations when users use the APIs such as `log_metrics` or `get_exp`. If the default ``Experiment`` is used, there will be related logged information when running ``Qlib``. Users are able to change the name of the default ``Experiment`` in the config file of ``Qlib`` or during ``Qlib``'s `initialization <../start/initialization.html#parameters>`_, which is set to be '`Experiment`'.
+
+Recorder
+===================
+
+The ``Recorder`` class is responsible for a single recorder. It will handle some detailed operations such as ``log_metrics``, ``log_params`` of a single run. It is designed to help user to easily track results and things being generated during a run.
+
+Here are some important APIs that are not included in the ``QlibRecorder``:
+
+.. autoclass:: qlib.workflow.recorder.Recorder
+    :members: list_artifacts, list_metrics, list_params, list_tags
+
+For other interfaces such as `save_objects`, `load_object`, please refer to `Recorder API <../reference/api.html#recorder>`_.
+
+Record Template
+===================
+
+The ``RecordTemp`` class is a class that enables generate experiment results such as IC and backtest in a certain format. We have provided three different `Record Template` class:
+
+- ``SignalRecord``: This class generates the `prediction` results of the model.
+- ``SigAnaRecord``: This class generates the `IC`, `ICIR`, `Rank IC` and `Rank ICIR` of the model.
+- ``PortAnaRecord``: This class generates the results of `backtest`. The detailed information about `backtest` as well as the available `strategy`, users can refer to `Strategy <../component/strategy.html>`_ and `Backtest <../component/backtest.html>`_.
+
+For more information about the APIs, please refer to `Record Template API <../reference/api.html#module-qlib.workflow.record_temp>`_.
--- a/docs/component/report.rst
+++ b/docs/component/report.rst
@@ -1,12 +1,13 @@
 .. _report:
+
 ==========================================
-Aanalysis: Evaluation & Results Analysis
+Analysis: Evaluation & Results Analysis
 ==========================================

 Introduction
 ===================

-``Aanalysis`` is designed to show the graphical reports of ``Intraday Trading`` , which helps users to evaluate and analyse investment portfolios visually. The following are some graphics to view:
+``Analysis`` is designed to show the graphical reports of ``Intraday Trading`` , which helps users to evaluate and analyse investment portfolios visually. The following are some graphics to view:

 - analysis_position
    - report_graph
@@ -51,7 +52,7 @@ API
 Graphical Result
 ~~~~~~~~~~~~~~~~

-.. note:: 
+.. note::

    - Axis X: Trading day
    - Axis Y: 
@@ -100,7 +101,7 @@ Graphical Result
    - Axis Y: 
        - `ic`
            The `Pearson correlation coefficient` series between `label` and `prediction score`.
-            In the above example, the `label` is formulated as `Ref($close, -1)/$close - 1`. Please refer to `Data API Featrue <data.html>`_ for more details.
+            In the above example, the `label` is formulated as `Ref($close, -1)/$close - 1`. Please refer to `Data Featrue <data.html#feature>`_ for more details.
                
        - `rank_ic`
            The `Spearman's rank correlation coefficient` series between `label` and `prediction score`.
@@ -108,35 +109,35 @@ Graphical Result
 .. image:: ../_static/img/analysis/score_ic.png 


-Usage of `analysis_position.cumulative_return`
----------------------------------------------
-
-API
-~~~~~~~~~~~~~~~~
-
-.. automodule:: qlib.contrib.report.analysis_position.cumulative_return
-    :members:
-
-Graphical Result
-~~~~~~~~~~~~~~~~~
-
-.. note:: 
-
-    - Axis X: Trading day
-    - Axis Y:
-        - Above axis Y: `(((Ref($close, -1)/$close - 1) * weight).sum() / weight.sum()).cumsum()`
-        - Below axis Y: Daily weight sum
-    - In the **sell** graph, `y < 0` stands for profit; in other cases, `y > 0` stands for profit.
-    - In the **buy_minus_sell** graph, the **y** value of the **weight** graph at the bottom is `buy_weight + sell_weight`.
-    - In each graph, the **red line** in the histogram on the right represents the average.                                                                                                        
-
-.. image:: ../_static/img/analysis/cumulative_return_buy.png 
-
-.. image:: ../_static/img/analysis/cumulative_return_sell.png 
-
-.. image:: ../_static/img/analysis/cumulative_return_buy_minus_sell.png 
-
-.. image:: ../_static/img/analysis/cumulative_return_hold.png 
+.. Usage of `analysis_position.cumulative_return`
+.. ----------------------------------------------
+..
+.. API
+.. ~~~~~~~~~~~~~~~~
+..
+.. .. automodule:: qlib.contrib.report.analysis_position.cumulative_return
+..     :members:
+..
+.. Graphical Result
+.. ~~~~~~~~~~~~~~~~~
+..
+.. .. note:: 
+..
+..     - Axis X: Trading day
+..     - Axis Y:
+..         - Above axis Y: `(((Ref($close, -1)/$close - 1) * weight).sum() / weight.sum()).cumsum()`
+..         - Below axis Y: Daily weight sum
+..     - In the **sell** graph, `y < 0` stands for profit; in other cases, `y > 0` stands for profit.
+..     - In the **buy_minus_sell** graph, the **y** value of the **weight** graph at the bottom is `buy_weight + sell_weight`.
+..     - In each graph, the **red line** in the histogram on the right represents the average.
+..
+.. .. image:: ../_static/img/analysis/cumulative_return_buy.png 
+..
+.. .. image:: ../_static/img/analysis/cumulative_return_sell.png 
+..
+.. .. image:: ../_static/img/analysis/cumulative_return_buy_minus_sell.png 
+..
+.. .. image:: ../_static/img/analysis/cumulative_return_hold.png 


 Usage of `analysis_position.risk_analysis`
@@ -152,7 +153,7 @@ API
 Graphical Result
 ~~~~~~~~~~~~~~~~~

-.. note:: 
+.. note::

    - general graphics
        - `std`
@@ -178,10 +179,10 @@ Graphical Result
                The `Maximum Drawdown` of `CAR` (cumulative abnormal return) with cost.


-.. image:: ../_static/img/analysis/risk_analysis_bar.png 
+.. image:: ../_static/img/analysis/risk_analysis_bar.png
    :align: center

-.. note:: 
+.. note::

    - annualized_return/max_drawdown/information_ratio/std graphics
        - Axis X: Trading days grouped by month
@@ -220,42 +221,42 @@ Graphical Result
 .. image:: ../_static/img/analysis/risk_analysis_std.png 
    :align: center

-
-Usage of `analysis_position.rank_label`
----------------------------------------------
-
-API
-~~~~~
-
-.. automodule:: qlib.contrib.report.analysis_position.rank_label
-    :members:
-
-
-Graphical Result
-~~~~~~~~~~~~~~~~~
-
-.. note:: 
-
-    - hold/sell/buy graphics:
-        - Axis X: Trading day
-        - Axis Y: 
-            Average `ranking ratio`of `label` for stocks that is held/sold/bought on the trading day.
-
-            In the above example, the `label` is formulated as `Ref($close, -1)/$close - 1`. The `ranking ratio` can be formulated as follows.
-            .. math::
-                
-                ranking\ ratio = \frac{Ascending\ Ranking\ of\ label}{Number\ of\ Stocks\ in\ the\ Portfolio}
-
-.. image:: ../_static/img/analysis/rank_label_hold.png 
-    :align: center
-
-.. image:: ../_static/img/analysis/rank_label_buy.png 
-    :align: center
-
-.. image:: ../_static/img/analysis/rank_label_sell.png 
-    :align: center
-
-
+..
+.. Usage of `analysis_position.rank_label`
+.. ----------------------------------------------
+..
+.. API
+.. ~~~~~
+..
+.. .. automodule:: qlib.contrib.report.analysis_position.rank_label
+..     :members:
+..
+..
+.. Graphical Result
+.. ~~~~~~~~~~~~~~~~~
+..
+.. .. note:: 
+..
+..     - hold/sell/buy graphics:
+..         - Axis X: Trading day
+..         - Axis Y: 
+..             Average `ranking ratio`of `label` for stocks that is held/sold/bought on the trading day.
+..
+..             In the above example, the `label` is formulated as `Ref($close, -1)/$close - 1`. The `ranking ratio` can be formulated as follows.
+..             .. math::
+..                 
+..                 ranking\ ratio = \frac{Ascending\ Ranking\ of\ label}{Number\ of\ Stocks\ in\ the\ Portfolio}
+..
+.. .. image:: ../_static/img/analysis/rank_label_hold.png 
+..     :align: center
+..
+.. .. image:: ../_static/img/analysis/rank_label_buy.png 
+..     :align: center
+..
+.. .. image:: ../_static/img/analysis/rank_label_sell.png 
+..     :align: center
+..
+..

 Usage of `analysis_model.analysis_model_performance`
 -----------------------------------------------------
--- a/docs/component/strategy.rst
+++ b/docs/component/strategy.rst
@@ -1,17 +1,18 @@
 .. _strategy:
+
 ========================================
-Interday Strategy: Portfolio Management
+Portfolio Strategy: Portfolio Management
 ========================================
 .. currentmodule:: qlib

 Introduction
 ===================

-``Interday Strategy`` is designed to adopt different trading strategies, which means that users can adopt different algorithms to generate investment portfolios based on the prediction scores of the ``Interday Model``. Users can use the ``Interday Strategy`` in an automatic workflow by ``Estimator``, please refer to `Estimator <estimator.html>`_.  
+``Portfolio Strategy`` is designed to adopt different portfolio strategies, which means that users can adopt different algorithms to generate investment portfolios based on the prediction scores of the ``Forecast Model``. Users can use the ``Portfolio Strategy`` in an automatic workflow by ``Workflow`` module, please refer to `Workflow: Workflow Management <workflow.html>`_.  

-Because the components in ``Qlib`` are designed in a loosely-coupled way, ``Interday Strategy`` can be used as an independent module also.
+Because the components in ``Qlib`` are designed in a loosely-coupled way, ``Portfolio Strategy`` can be used as an independent module also.

-``Qlib`` provides several implemented trading strategies. Also, ``Qlib`` supports custom strategy, users can customize strategies according to their own needs.
+``Qlib`` provides several implemented portfolio strategies. Also, ``Qlib`` supports custom strategy, users can customize strategies according to their own needs.

 Base Class & Interface
 ======================
@@ -25,19 +26,20 @@ Qlib provides a base class ``qlib.contrib.strategy.BaseStrategy``. All strategy
    Return the proportion of your total value you will use in investment. Dynamically risk_degree will result in Market timing.

 - `generate_order_list`
-    Rerturn the order list. 
+    Return the order list. 

 Users can inherit `BaseStrategy` to customize their strategy class.

 WeightStrategyBase
 --------------------

-Qlib alse provides a class ``qlib.contrib.strategy.WeightStrategyBase`` that is a subclass of `BaseStrategy`. 
+Qlib also provides a class ``qlib.contrib.strategy.WeightStrategyBase`` that is a subclass of `BaseStrategy`. 

 `WeightStrategyBase` only focuses on the target positions, and automatically generates an order list based on positions. It provides the `generate_target_weight_position` interface.

 - `generate_target_weight_position`
-    - According to the current position and trading date to generate the target position. The cash is not considered.
+    - According to the current position and trading date to generate the target position. The cash is not considered in
+      the output weight distribution.
    - Return the target position.

    .. note::
@@ -80,7 +82,7 @@ TopkDropoutStrategy

 Usage & Example
 ====================
-``Interday Strategy`` can be specified in the ``Intraday Trading(Backtest)``, the example is as follows.
+``Portfolio Strategy`` can be specified in the ``Intraday Trading(Backtest)``, the example is as follows.

 .. code-block:: python

@@ -95,11 +97,13 @@ Usage & Example
        "limit_threshold": 0.095,
        "account": 100000000,
        "benchmark": BENCHMARK,
-        "deal_price": "vwap",
+        "deal_price": "close",
+        "open_cost": 0.0005,
+        "close_cost": 0.0015,
+        "min_cost": 5,
+        
    }
-
    # use default strategy
-    # custom Strategy, refer to: TODO: Strategy API url
    strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)

    # pred_score is the `prediction score` output by Model
@@ -107,12 +111,12 @@ Usage & Example
        pred_score, strategy=strategy, **BACKTEST_CONFIG
    )

-Also, the above example has been given in ``examples\train_backtest_analyze.ipynb``.
+Also, the above example has been given in ``examples/train_backtest_analyze.ipynb``.

-To know more about the `prediction score` `pred_score` output by ``Interday Model``, please refer to `Interday Model: Model Training & Prediction <model.html>`_.
+To know more about the `prediction score` `pred_score` output by ``Forecast Model``, please refer to `Forecast Model: Model Training & Prediction <model.html>`_.

 To know more about ``Intraday Trading``, please refer to `Intraday Trading: Model&Strategy Testing <backtest.html>`_.

 Reference
 ===================
-To know more about ``Interday Strategy``, please refer to `Strategy API <../reference/api.html>`_.
+To know more about ``Portfolio Strategy``, please refer to `Strategy API <../reference/api.html#module-qlib.contrib.strategy.strategy>`_.
--- a/docs/component/workflow.rst
+++ b/docs/component/workflow.rst
@@ -0,0 +1,269 @@
+.. _workflow:
+
+=================================
+Workflow: Workflow Management
+=================================
+.. currentmodule:: qlib
+
+Introduction
+===================
+
+The components in `Qlib Framework <../introduction/introduction.html#framework>`_ are designed in a loosely-coupled way. Users could build their own Quant research workflow with these components like `Example <https://github.com/microsoft/qlib/blob/main/examples/workflow_by_code.py>`_.
+
+
+Besides, ``Qlib`` provides more user-friendly interfaces named ``qrun`` to automatically run the whole workflow defined by configuration. Running the whole workflow is called an `execution`.
+With ``qrun``, user can easily start an `execution`, which includes the following steps:
+
+- Data
+    - Loading
+    - Processing
+    - Slicing
+- Model
+    - Training and inference
+    - Saving & loading
+- Evaluation
+    - Forecast signal analysis
+    - Backtest
+
+For each `execution`, ``Qlib`` has a complete system to tracking all the information as well as artifacts generated during training, inference and evaluation phase. For more information about how ``Qlib`` handles this, please refer to the related document: `Recorder: Experiment Management <../component/recorder.html>`_.
+
+Complete Example
+===================
+
+Before getting into details, here is a complete example of ``qrun``, which defines the workflow in typical Quant research.
+Below is a typical config file of ``qrun``.
+
+.. code-block:: YAML
+
+    qlib_init:
+        provider_uri: "~/.qlib/qlib_data/cn_data"
+        region: cn
+    market: &market csi300
+    benchmark: &benchmark SH000300
+    data_handler_config: &data_handler_config
+        start_time: 2008-01-01
+        end_time: 2020-08-01
+        fit_start_time: 2008-01-01
+        fit_end_time: 2014-12-31
+        instruments: *market
+    port_analysis_config: &port_analysis_config
+        strategy:
+            class: TopkDropoutStrategy
+            module_path: qlib.contrib.strategy.strategy
+            kwargs:
+                topk: 50
+                n_drop: 5
+        backtest:
+            verbose: False
+            limit_threshold: 0.095
+            account: 100000000
+            benchmark: *benchmark
+            deal_price: close
+            open_cost: 0.0005
+            close_cost: 0.0015
+            min_cost: 5
+    task:
+        model:
+            class: LGBModel
+            module_path: qlib.contrib.model.gbdt
+            kwargs:
+                loss: mse
+                colsample_bytree: 0.8879
+                learning_rate: 0.0421
+                subsample: 0.8789
+                lambda_l1: 205.6999
+                lambda_l2: 580.9768
+                max_depth: 8
+                num_leaves: 210
+                num_threads: 20
+        dataset:
+            class: DatasetH
+            module_path: qlib.data.dataset
+            kwargs:
+                handler:
+                    class: Alpha158
+                    module_path: qlib.contrib.data.handler
+                    kwargs: *data_handler_config
+                segments:
+                    train: [2008-01-01, 2014-12-31]
+                    valid: [2015-01-01, 2016-12-31]
+                    test: [2017-01-01, 2020-08-01]
+        record: 
+            - class: SignalRecord
+            module_path: qlib.workflow.record_temp
+            kwargs: {}
+            - class: PortAnaRecord
+            module_path: qlib.workflow.record_temp
+            kwargs: 
+                config: *port_analysis_config
+
+After saving the config into `configuration.yaml`, users could start the workflow and test their ideas with a single command below.
+
+.. code-block:: bash
+
+    qrun configuration.yaml
+
+If users want to use ``qrun`` under debug mode, please use the following command:
+
+.. code-block:: bash
+
+    python -m pdb qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
+
+.. note:: 
+
+    `qrun` will be placed in your $PATH directory when installing ``Qlib``.
+
+.. note:: 
+        
+    The symbol `&` in `yaml` file stands for an anchor of a field, which is useful when another fields include this parameter as part of the value. Taking the configuration file above as an example, users can directly change the value of `market` and `benchmark` without traversing the entire configuration file.
+
+
+Configuration File
+===================
+
+Let's get into details of ``qrun`` in this section.
+
+Before using ``qrun``, users need to prepare a configuration file. The following content shows how to prepare each part of the configuration file.
+
+Qlib Init Section
+--------------------
+
+At first, the configuration file needs to contain several basic parameters which will be used for qlib initialization.
+
+.. code-block:: YAML
+
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+
+The meaning of each field is as follows:
+
+- `provider_uri`
+    Type: str. The URI of the Qlib data. For example, it could be the location where the data loaded by ``get_data.py`` are stored.
+
+- `region`
+    - If `region` == "us", ``Qlib`` will be initialized in US-stock mode. 
+    - If `region` == "cn", ``Qlib`` will be initialized in china-stock mode.
+
+    .. note:: 
+        
+        The value of `region` should be aligned with the data stored in `provider_uri`.
+
+
+Task Section
+--------------------
+
+The `task` field in the configuration corresponds to a `task`, which contains the parameters of three different subsections: `Model`, `Dataset` and `Record`.
+
+Model Section
+~~~~~~~~~~~~~~~~~~~~
+
+In the `task` field, the `model` section describes the parameters of the model to be used for training and inference. For more information about the base ``Model`` class, please refer to `Qlib Model <../component/model.html>`_.
+
+.. code-block:: YAML
+
+    model:
+        class: LGBModel
+        module_path: qlib.contrib.model.gbdt
+        kwargs:
+            loss: mse
+            colsample_bytree: 0.8879
+            learning_rate: 0.0421
+            subsample: 0.8789
+            lambda_l1: 205.6999
+            lambda_l2: 580.9768
+            max_depth: 8
+            num_leaves: 210
+            num_threads: 20
+
+The meaning of each field is as follows:
+
+- `class`
+    Type: str. The name for the model class.
+
+- `module_path`
+    Type: str. The path for the model in qlib.
+
+- `kwargs`
+    The keywords arguments for the model. Please refer to the specific model implementation for more information: `models <https://github.com/microsoft/qlib/blob/main/qlib/contrib/model>`_. 
+
+.. note:: 
+        
+    ``Qlib`` provides a util named: ``init_instance_by_config`` to initialize any class inside ``Qlib`` with the configuration includes the fields: `class`, `module_path` and `kwargs`.
+
+Dataset Section
+~~~~~~~~~~~~~~~~~~~~
+
+The `dataset` field describes the parameters for the ``Dataset`` module in ``Qlib`` as well those for the module ``DataHandler``. For more information about the ``Dataset`` module, please refer to `Qlib Model <../component/data.html#dataset>`_.
+
+The keywords arguments configuration of the ``DataHandler`` is as follows:
+
+.. code-block:: YAML
+
+    data_handler_config: &data_handler_config
+        start_time: 2008-01-01
+        end_time: 2020-08-01
+        fit_start_time: 2008-01-01
+        fit_end_time: 2014-12-31
+        instruments: *market
+
+Users can refer to the document of `DataHandler <../component/data.html#datahandler>`_ for more information about the meaning of each field in the configuration.
+
+Here is the configuration for the ``Dataset`` module which will take care of data preprossing and slicing during the training and testing phase.
+
+.. code-block:: YAML
+
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+
+Record Section
+~~~~~~~~~~~~~~~~~~~~
+
+The `record` field is about the parameters the ``Record`` module in ``Qlib``. ``Record`` is responsible for tracking training process and results such as `information Coefficient (IC)` and `backtest` in a standard format.
+
+The following script is the configuration of `backtest` and the `strategy` used in `backtest`:
+
+.. code-block:: YAML
+
+    port_analysis_config: &port_analysis_config
+        strategy:
+            class: TopkDropoutStrategy
+            module_path: qlib.contrib.strategy.strategy
+            kwargs:
+                topk: 50
+                n_drop: 5
+        backtest:
+            verbose: False
+            limit_threshold: 0.095
+            account: 100000000
+            benchmark: *benchmark
+            deal_price: close
+            open_cost: 0.0005
+            close_cost: 0.0015
+            min_cost: 5
+
+For more information about the meaning of each field in configuration of `strategy` and `backtest`, users can look up the documents: `Strategy <../component/strategy.html>`_ and `Backtest <../component/backtest.html>`_.
+
+Here is the configuration details of different `Record Template` such as ``SignalRecord`` and ``PortAnaRecord``:
+
+.. code-block:: YAML
+
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
+
+For more information about the ``Record`` module in ``Qlib``, user can refer to the related document: `Record <../component/recorder.html#record-template>`_.
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -53,7 +53,6 @@ source_suffix = ".rst"
 master_doc = "index"


-
 # General information about the project.
 project = u"QLib"
 copyright = u"Microsoft"
@@ -64,9 +63,9 @@ author = u"Microsoft"
 # built documents.
 #
 # The short X.Y version.
-version = pkg_resources.get_distribution("qlib").version
+version = pkg_resources.get_distribution("pyqlib").version
 # The full version, including alpha/beta/rc tags.
-release = pkg_resources.get_distribution("qlib").version
+release = pkg_resources.get_distribution("pyqlib").version

 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -104,8 +103,7 @@ todo_include_todos = True
 #
 html_theme = "sphinx_rtd_theme"

-html_logo = '_static/img/logo/1.png'
-
+html_logo = "_static/img/logo/1.png"


 # Theme options are theme-specific and customize the look and feel of a theme
@@ -126,7 +124,7 @@ html_theme_options = {
    "logo_only": True,
    "collapse_navigation": False,
    "display_version": False,
-    "navigation_depth": 3,
+    "navigation_depth": 4,
 }

 # Add any paths that contain custom static files (such as style sheets) here,
@@ -161,15 +159,12 @@ latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',
-
    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',
-
    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',
-
    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
@@ -231,3 +226,8 @@ epub_exclude_files = ["search.html"]

 autodoc_member_order = "bysource"
 autodoc_default_flags = ["members"]
+autodoc_default_options = {
+    "members": True,
+    "member-order": "bysource",
+    "special-members": "__init__",
+}
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -35,12 +35,13 @@ Document Structure
   :maxdepth: 3
   :caption: COMPONENTS:

-   Estimator: Workflow Management <component/estimator.rst>
+   Workflow: Workflow Management <component/workflow.rst>
   Data Layer: Data Framework&Usage <component/data.rst>
-   Interday Model: Model Training & Prediction <component/model.rst>
-   Interday Strategy: Portfolio Management <component/strategy.rst>
+   Forecast Model: Model Training & Prediction <component/model.rst>
+   Strategy: Portfolio Management <component/strategy.rst>
   Intraday Trading: Model&Strategy Testing <component/backtest.rst>
-   Aanalysis: Evaluation & Results Analysis <component/report.rst>
+   Qlib Recorder: Experiment Management <component/recorder.rst>
+   Analysis: Evaluation & Results Analysis <component/report.rst>

 .. toctree::
   :maxdepth: 3
@@ -48,12 +49,18 @@ Document Structure
   
   Building Formulaic Alphas <advanced/alpha.rst>
   Online & Offline mode <advanced/server.rst>
+
 .. toctree::
   :maxdepth: 3
   :caption: REFERENCE:

   API <reference/api.rst>

+.. toctree::
+   :maxdepth: 3
+
+   FAQ <FAQ/FAQ.rst>
+
 .. toctree::
   :maxdepth: 3
   :caption: Change Log:
--- a/docs/introduction/introduction.rst
+++ b/docs/introduction/introduction.rst
@@ -21,27 +21,27 @@ Framework

 At the module level, Qlib is a platform that consists of above components. The components are designed as loose-coupled modules and each component could be used stand-alone.

-======================  ==============================================================================
-Name                    Description
-======================  ==============================================================================
-`Data layer`            `DataServer` focuses on providing high-performance infrastructure for users to
-                        manage and retrieve raw data. `DataEnhancement` will preprocess the data and 
-                        provide the best dataset to be fed into the models.

-`Interday Model`        `Interday model` focuses on producing prediction scores (aka. `alpha`). Models
-                        are trained by `Model Creator` and managed by `Model Manager`. Users could 
-                        choose one or multiple models for prediction. Multiple models could be combined
-                        with `Ensemble` module.

-`Interday Strategy`     `Portfolio Generator` will take prediction scores as input and output the 
-                        orders based on the current position to achieve the target portfolio.
+========================  ==============================================================================
+Name                      Description
+========================  ==============================================================================
+`Infrastructure` layer    `Infrastructure` layer provides underlying support for Quant research.
+                          `DataServer` provides high-performance infrastructure for users to manage 
+                          and retrieve raw data. `Trainer` provides flexible interface to control
+                          the training process of models which enable algorithms controlling the
+                          training process.

-`Intraday Trading`      `Order Executor` is responsible for executing orders output by 
-                        `Interday Strategy` and returning the executed results.
+`Workflow` layer          `Workflow` layer covers the whole workflow of quantitative investment.
+                          `Information Extractor` extracts data for models. `Forecast Model` focuses
+                          on producing all kinds of forecast signals (e.g. _alpha_, risk) for other
+                          modules. With these signals `Portfolio Generator` will generate the target
+                          portfolio and produce orders to be executed by `Order Executor`.

-`Analysis`              Users could get a detailed analysis report of forecasting signals and portfolios
-                        in this part.
-======================  ==============================================================================
+`Interface` layer         `Interface` layer tries to present a user-friendly interface for the underlying
+                          system. `Analyser` module will provide users detailed analysis reports of
+                          forecasting signals, portfolios and execution results
+========================  ==============================================================================

 - The modules with hand-drawn style are under development and will be released in the future.
 - The modules with dashed borders are highly user-customizable and extendible.
--- a/docs/introduction/quick.rst
+++ b/docs/introduction/quick.rst
@@ -40,27 +40,28 @@ Load and prepare data by running the following code:

 .. code-block::

-    python scripts/get_data.py qlib_data_cn --target_dir ~/.qlib/qlib_data/cn_data
+    python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn

 This dataset is created by public data collected by crawler scripts in ``scripts/data_collector/``, which have been released in the same repository. Users could create the same dataset with it.

-To kown more about `prepare data`, please refer to `Data Preparation <../component/data.html>`_.
+To kown more about `prepare data`, please refer to `Data Preparation <../component/data.html#data-preparation>`_.

 Auto Quant Research Workflow
 ====================================

-``Qlib`` provides a tool named ``Estimator`` to run the whole workflow automatically (including building dataset, training models, backtest and evaluation). Users can start an auto quant research workflow and have a graphical reports analysis according to the following steps: 
+``Qlib`` provides a tool named ``qrun`` to run the whole workflow automatically (including building dataset, training models, backtest and evaluation). Users can start an auto quant research workflow and have a graphical reports analysis according to the following steps: 

 - Quant Research Workflow: 
-    - Run  ``Estimator`` with `estimator_config.yaml` as following.
+    - Run  ``qrun`` with a config file of the LightGBM model `workflow_config_lightgbm.yaml` as following.
+
        .. code-block:: 

            cd examples  # Avoid running program under the directory contains `qlib`
-            estimator -c estimator/estimator_config.yaml
+            qrun benchmarks/LightGBM/workflow_config_lightgbm.yaml


-    - Estimator result
-        The result of ``Estimator`` is as follows, which is also the result of ``Interday Trading``. Please refer to please refer to `Interdat Trading <../component/backtest.html>`_. for more details about the result.
+    - Workflow result
+        The result of ``qrun`` is as follows, which is also the typical result of ``Forecast model(alpha)``. Please refer to  `Intraday Trading <../component/backtest.html>`_. for more details about the result.

        .. code-block:: python
        
@@ -77,17 +78,17 @@ Auto Quant Research Workflow
                                       max_drawdown      -0.075024

        
-    To know more about `Estimator`, please refer to `Estimator <../component/estimator.html>`_.
+    To know more about `workflow` and `qrun`, please refer to `Workflow: Workflow Management <../component/workflow.html>`_.

 - Graphical Reports Analysis:
-    - Run ``examples/estimator/analyze_from_estimator.ipynb`` with jupyter notebook
-        Users can have portfolio analysis or prediction score (model prediction) analysis by run ``examples/estimator/analyze_from_estimator.ipynb``.
+    - Run ``examples/workflow_by_code.ipynb`` with jupyter notebook
+        Users can have portfolio analysis or prediction score (model prediction) analysis by run ``examples/workflow_by_code.ipynb``.
    - Graphical Reports
-        Users can get graphical reports about the analysis, please refer to `Aanalysis: Evaluation & Results Analysis <../component/report.html>`_ for more details.
+        Users can get graphical reports about the analysis, please refer to `Analysis: Evaluation & Results Analysis <../component/report.html>`_ for more details.



 Custom Model Integration
 ===============================================

-``Qlib`` provides ``lightGBM`` and ``Dnn`` model as the baseline of ``Interday Model``. In addition to the default model, users can integrate their own custom models into ``Qlib``. If users are interested in the custom model, please refer to `Custom Model Integration <../start/integration.html>`_.
+``Qlib`` provides a batch of models (such as ``lightGBM`` and ``MLP`` models) as examples of ``Forecast Model``. In addition to the default model, users can integrate their own custom models into ``Qlib``. If users are interested in the custom model, please refer to `Custom Model Integration <../start/integration.html>`_.
--- a/docs/reference/api.rst
+++ b/docs/reference/api.rst
@@ -23,16 +23,13 @@ Filter
 .. automodule:: qlib.data.filter
    :members:

-Feature
--------------------
-
 Class
-~~~~~~~~~~~~~~~~~~~~
+--------------------
 .. automodule:: qlib.data.base
    :members:

 Operator
-~~~~~~~~~~~~~~~~~~~~
+--------------------
 .. automodule:: qlib.data.ops
    :members:
 	       
@@ -56,19 +53,36 @@ Cache
 .. autoclass:: qlib.data.cache.DiskDatasetCache
    :members:

+Dataset
+---------------
+
+Dataset Class
+~~~~~~~~~~~~~~~~~~~~
+.. automodule:: qlib.data.dataset.__init__
+    :members:
+
+Data Loader
+~~~~~~~~~~~~~~~~~~~~
+.. automodule:: qlib.data.dataset.loader
+    :members:
+
+Data Handler
+~~~~~~~~~~~~~~~~~~~~
+.. automodule:: qlib.data.dataset.handler
+    :members:
+
+Processor
+~~~~~~~~~~~~~~~~~~~~
+.. automodule:: qlib.data.dataset.processor
+    :members:
+

 Contrib
 ====================

-
-Data Handler
---------------
-.. automodule:: qlib.contrib.estimator.handler
-    :members:
-
 Model
 --------------------
-.. automodule:: qlib.contrib.model.base
+.. automodule:: qlib.model.base
    :members:

 Strategy
@@ -116,3 +130,26 @@ Report
    :members:


+Workflow
+====================
+
+
+Experiment Manager
+--------------------
+.. autoclass:: qlib.workflow.expm.ExpManager
+    :members:
+
+Experiment
+--------------------
+.. autoclass:: qlib.workflow.exp.Experiment
+    :members:
+
+Recorder
+--------------------
+.. autoclass:: qlib.workflow.recorder.Recorder
+    :members:
+
+Record Template
+--------------------
+.. automodule:: qlib.workflow.record_temp
+    :members:
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1 +1,5 @@
-Cython==0.29.21
+Cython
+cmake
+numpy
+scipy
+scikit-learn
--- a/docs/start/getdata.rst
+++ b/docs/start/getdata.rst
@@ -1,4 +1,5 @@
 .. _getdata:
+
 =============================
 Data Retrieval
 =============================
--- a/docs/start/initialization.rst
+++ b/docs/start/initialization.rst
@@ -1,4 +1,5 @@
 .. _initialization:
+
 ====================
 Qlib Initialization
 ====================
@@ -11,14 +12,16 @@ Initialization

 Please follow the steps below to initialize ``Qlib``.

- Download and prepare the Data: execute the following command to download stock data.
+Download and prepare the Data: execute the following command to download stock data. Please pay `attention` that the data is collected from `Yahoo Finance <https://finance.yahoo.com/lookup>`_ and the data might not be perfect. We recommend users to prepare their own data if they have high-quality datasets. Please refer to `Data <../component/data.html#converting-csv-format-into-qlib-format>`_ for more information about customized dataset.
+    
    .. code-block:: bash
    
-        python scripts/get_data.py qlib_data_cn --target_dir ~/.qlib/qlib_data/cn_data
-    Please refer to `Raw Data  <../component/data.html>`_ for more information about ``get_data.py``,
+        python scripts/get_data.py qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn
+        
+Please refer to `Data Preparation <../component/data.html#data-preparation>`_ for more information about `get_data.py`,


- Initialize Qlib before calling other APIs: run following code in python.
+Initialize Qlib before calling other APIs: run following code in python.

    .. code-block:: Python

@@ -28,7 +31,8 @@ Please follow the steps below to initialize ``Qlib``.
        provider_uri = "~/.qlib/qlib_data/cn_data"  # target_dir
        qlib.init(provider_uri=provider_uri, region=REG_CN)
    
-
+.. note::
+   Do not import qlib package in the repository directory  of ``Qlib``, otherwise, errors may occur.

 Parameters
 -------------------
@@ -43,7 +47,7 @@ Besides `provider_uri` and `region`, `qlib.init` has other parameters. The follo
        - ``qlib.config.REG_US``: US stock market.
        - ``qlib.config.REG_CN``: China stock market.

-        Different modse will result in different trading limitations and costs.
+        Different modes will result in different trading limitations and costs.
 - `redis_host`
    Type: str, optional parameter(default: "127.0.0.1"), host of `redis`
        The lock and cache mechanism relies on redis.
@@ -57,3 +61,17 @@ Besides `provider_uri` and `region`, `qlib.init` has other parameters. The follo
    .. note::
        
        If Qlib fails to connect redis via `redis_host` and `redis_port`, cache mechanism will not be used! Please refer to `Cache <../component/data.html#cache>`_ for details.
+- `exp_manager`
+    Type: dict, optional parameter, the setting of `experiment manager` to be used in qlib. Users can specify an experiment manager class, as well as the tracking URI for all the experiments. However, please be aware that we only support input of a dictionary in the following style for `exp_manager`. For more information about `exp_manager`, users can refer to `Recorder: Experiment Management <../component/recorder.html>`_.
+    
+    .. code-block:: Python
+
+        # For example, if you want to set your tracking_uri to a <specific folder>, you can initialize qlib below
+        qlib.init(provider_uri=provider_uri, region=REG_CN, exp_manager= {
+            "class": "MLflowExpManager",
+            "module_path": "qlib.workflow.expm",
+            "kwargs": {
+                "uri": "python_execution_path/mlruns",
+                "default_exp_name": "Experiment",
+            }
+        })
--- a/docs/start/installation.rst
+++ b/docs/start/installation.rst
@@ -1,4 +1,5 @@
 .. _installation:
+
 ====================
 Installation
 ====================
@@ -12,7 +13,14 @@ Installation

   `Qlib` supports both `Windows` and `Linux`. It's recommended to use `Qlib` in `Linux`. ``Qlib`` supports Python3, which is up to Python3.8.

-Please follow the steps below to install ``Qlib``:
+Users can easily install ``Qlib`` by pip according to the following command:
+
+.. code-block:: bash
+
+   pip install pyqlib
+
+
+Also, Users can install ``Qlib`` by the source code according to the following steps:

 - Enter the root directory of ``Qlib``, in which the file ``setup.py`` exists.
 - Then, please execute the following command to install the environment dependencies and install ``Qlib``:
@@ -24,12 +32,8 @@ Please follow the steps below to install ``Qlib``:
      $ git clone https://github.com/microsoft/qlib.git && cd qlib
      $ python setup.py install

-
 .. note::
   It's recommended to use anaconda/miniconda to setup the environment. ``Qlib`` needs lightgbm and pytorch packages, use pip to install them.
-
-.. note::
-   Do not import qlib in the root directory of ``Qlib``, otherwise, errors may occur.
   


--- a/docs/start/integration.rst
+++ b/docs/start/integration.rst
@@ -5,22 +5,22 @@ Custom Model Integration
 Introduction
 ===================

-``Qlib`` provides ``lightGBM`` and ``Dnn`` model as the baseline of ``Interday Model``. In addition to the default model, users can integrate their own custom models into ``Qlib``.
+``Qlib``'s `Model Zoo` includes models such as ``LightGBM``, ``MLP``, ``LSTM``, etc.. These models are examples of ``Forecast Model``. In addition to the default models ``Qlib`` provide, users can integrate their own custom models into ``Qlib``.

 Users can integrate their own custom models according to the following steps.

- Define a custom model class, which should be a subclass of the `qlib.contrib.model.base.Model <../reference/api.html#module-qlib.contrib.model.base>`_.
+- Define a custom model class, which should be a subclass of the `qlib.model.base.Model <../reference/api.html#module-qlib.model.base>`_.
 - Write a configuration file that describes the path and parameters of the custom model.
 - Test the custom model.

 Custom Model Class
 ===========================
-The Custom models need to inherit `qlib.contrib.model.base.Model <../reference/api.html#module-qlib.contrib.model.base>`_ and override the methods in it.
+The Custom models need to inherit `qlib.model.base.Model <../reference/api.html#module-qlib.model.base>`_ and override the methods in it.

 - Override the `__init__` method
    - ``Qlib`` passes the initialized parameters to the \_\_init\_\_ method.
-    - The parameter must be consistent with the hyperparameters in the configuration file.
-    - Code Example: In the following example, the hyperparameter filed of the configuration file should contain parameters such as `loss:mse`.
+    - The hyperparameters of model in the configuration must be consistent with those defined in the `__init__` method.
+    - Code Example: In the following example, the hyperparameters of model in the configuration file should contain parameters such as `loss:mse`.
    .. code-block:: Python

        def __init__(self, loss='mse', **kwargs):
@@ -31,80 +31,79 @@ The Custom models need to inherit `qlib.contrib.model.base.Model <../reference/a
            self._model = None

 - Override the `fit` method
-    - ``Qlib`` calls the fit method to train the model
-    - The parameters must include training feature `x_train`, training label `y_train`, test feature `x_valid`, test label `y_valid` at least.
-    - The parameters could include some optional parameters with default values, such as train weight `w_train`, test weight `w_valid` and `num_boost_round = 1000`.
+    - ``Qlib`` calls the fit method to train the model.
+    - The parameters must include training feature `dataset`, which is designed in the interface.
+    - The parameters could include some `optional` parameters with default values, such as `num_boost_round = 1000` for `GBDT`.
    - Code Example: In the following example, `num_boost_round = 1000` is an optional parameter.
    .. code-block:: Python
    
-        def fit(self, x_train:pd.DataFrame, y_train:pd.DataFrame, x_valid:pd.DataFrame, y_valid:pd.DataFrame,
-            w_train:pd.DataFrame = None, w_valid:pd.DataFrame = None, num_boost_round = 1000, **kwargs):
+        def fit(self, dataset: DatasetH, num_boost_round = 1000, **kwargs):
+
+            # prepare dataset for lgb training and evaluation
+            df_train, df_valid = dataset.prepare(
+                ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
+            )
+            x_train, y_train = df_train["feature"], df_train["label"]
+            x_valid, y_valid = df_valid["feature"], df_valid["label"]

            # Lightgbm need 1D array as its label
            if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
-                y_train_1d, y_valid_1d = np.squeeze(y_train.values), np.squeeze(y_valid.values)
+                y_train, y_valid = np.squeeze(y_train.values), np.squeeze(y_valid.values)
            else:
-                raise ValueError('LightGBM doesn\'t support multi-label training')
+                raise ValueError("LightGBM doesn't support multi-label training")

-            w_train_weight = None if w_train is None else w_train.values
-            w_valid_weight = None if w_valid is None else w_valid.values
+            dtrain = lgb.Dataset(x_train.values, label=y_train)
+            dvalid = lgb.Dataset(x_valid.values, label=y_valid)

-            dtrain = lgb.Dataset(x_train.values, label=y_train_1d, weight=w_train_weight)
-            dvalid = lgb.Dataset(x_valid.values, label=y_valid_1d, weight=w_valid_weight)
-            self._model = lgb.train(
-                self._params, 
-                dtrain, 
+            # fit the model
+            self.model = lgb.train(
+                self.params,
+                dtrain,
                num_boost_round=num_boost_round,
                valid_sets=[dtrain, dvalid],
-                valid_names=['train', 'valid'],
+                valid_names=["train", "valid"],
+                early_stopping_rounds=early_stopping_rounds,
+                verbose_eval=verbose_eval,
+                evals_result=evals_result,
                **kwargs
            )

 - Override the `predict` method
-    - The parameters include the test features.
+    - The parameters must include the parameter `dataset`, which will be userd to get the test dataset.
    - Return the `prediction score`.
-    - Please refer to `qlib.contrib.model.base.Model <../reference/api.html#module-qlib.contrib.model.base>`_ for the parameter types of the fit method.
-    - Code Example: In the following example, users need to use dnn to predict the label(such as `preds`) of test data `x_test` and return it.
+    - Please refer to `Model API <../reference/api.html#module-qlib.model.base>`_ for the parameter types of the fit method.
+    - Code Example: In the following example, users need to use `LightGBM` to predict the label(such as `preds`) of test data `x_test` and return it.
    .. code-block:: Python

-        def predict(self, x_test:pd.DataFrame, **kwargs)-> numpy.ndarray:
-            if self._model is None:
-                raise ValueError('model is not fitted yet!')
-            return self._model.predict(x_test.values)
+        def predict(self, dataset: DatasetH, **kwargs)-> pandas.Series:
+            if self.model is None:
+                raise ValueError("model is not fitted yet!")
+            x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
+            return pd.Series(self.model.predict(x_test.values), index=x_test.index)

- Override the `score` method
-    - The parameters include the test features and test labels.
-    - Return the evaluation score of the model. It's recommended to adopt the loss between labels and `prediction score`.
-    - Code Example: In the following example, users need to calculate the weighted loss with test data `x_test`,  test label `y_test` and the weight `w_test`.
+- Override the `finetune` method (Optional)
+    - This method is optional to the users, and when users one to use this method on their own models, they should inherit the ``ModelFT`` base class, which includes the interface of `finetune`.
+    - The parameters must include the parameter `dataset`.
+    - Code Example: In the following example, users will use `LightGBM` as the model and finetune it.
    .. code-block:: Python

-        def score(self, x_test:pd.Dataframe, y_test:pd.Dataframe, w_test:pd.DataFrame = None) -> float:
-            # Remove rows from x, y and w, which contain Nan in any columns in y_test.
-            x_test, y_test, w_test = drop_nan_by_y_index(x_test, y_test, w_test)
-            preds = self.predict(x_test)
-            w_test_weight = None if w_test is None else w_test.values
-            scorer = mean_squared_error if self.loss_type == 'mse' else roc_auc_score
-            return scorer(y_test.values, preds, sample_weight=w_test_weight)
-
- Override the `save` method & `load` method
-    - The `save` method parameter includes the a `filename` that represents an absolute path, user need to save model into the path.
-    - The `load` method parameter includes the a `buffer` read from the `filename` passed in the `save` method, users need to load model from the `buffer`.
-    - Code Example:
-    .. code-block:: Python
-
-        def save(self, filename):
-            if self._model is None:
-                raise ValueError('model is not fitted yet!')
-            self._model.save_model(filename)
-
-        def load(self, buffer):
-            self._model = lgb.Booster(params={'model_str': buffer.decode('utf-8')})
-
+        def finetune(self, dataset: DatasetH, num_boost_round=10, verbose_eval=20):
+            # Based on existing model and finetune by train more rounds
+            dtrain, _ = self._prepare_data(dataset)
+            self.model = lgb.train(
+                self.params,
+                dtrain,
+                num_boost_round=num_boost_round,
+                init_model=self.model,
+                valid_sets=[dtrain],
+                valid_names=["train"],
+                verbose_eval=verbose_eval,
+            )

 Configuration File
 =======================

-The configuration file is described in detail in the `estimator <../component/estimator.html#complete-example>`_ document. In order to integrate the custom model into ``Qlib``, users need to modify the "model" field in the configuration file.
+The configuration file is described in detail in the `Workflow <../component/workflow.html#complete-example>`_ document. In order to integrate the custom model into ``Qlib``, users need to modify the "model" field in the configuration file. The configuration describes which models to use and how we can initialize it.

 - Example: The following example describes the `model` field of configuration file about the custom lightgbm model mentioned above, where `module_path` is the module path, `class` is the class name, and `args` is the hyperparameter passed into the __init__ method. All parameters in the field is passed to `self._params` by `\*\*kwargs` in `__init__` except `loss = mse`. 

@@ -124,23 +123,23 @@ The configuration file is described in detail in the `estimator <../component/es
            num_leaves: 210
            num_threads: 20

-Users could find configuration file of the baseline of the ``Model`` in ``qlib/examples/estimator/estimator_config.yaml`` and ``qlib/examples/estimator/estimator_config_dnn.yaml``
+Users could find configuration file of the baselines of the ``Model`` in ``examples/benchmarks``. All the configurations of different models are listed under the corresponding model folder.

 Model Testing
 =====================
-Assuming that the configuration file is ``examples/estimator/estimator_config.yaml``, users can run the following command to test the custom model:
+Assuming that the configuration file is ``examples/benchmarks/LightGBM/workflow_config_lightgbm.yaml``, users can run the following command to test the custom model:

 .. code-block:: bash

    cd examples  # Avoid running program under the directory contains `qlib`
-    estimator -c estimator/estimator_config.yaml
+    qrun benchmarks/LightGBM/workflow_config_lightgbm.yaml

-.. note:: ``estimator`` is a built-in command of ``Qlib``.
+.. note:: ``qrun`` is a built-in command of ``Qlib``.

-Also, ``Model`` can also be tested as a single module. An example has been given in ``examples/train_backtest_analyze.ipynb``. 
+Also, ``Model`` can also be tested as a single module. An example has been given in ``examples/workflow_by_code.ipynb``. 


 Reference
 =====================

-To know more about ``Model``, please refer to `Interday Model: Model Training & Prediction <../component/model.html>`_ and `Model API <../reference/api.html#module-qlib.contrib.model.base>`_.
+To know more about ``Forecast Model``, please refer to `Forecast Model: Model Training & Prediction <../component/model.html>`_ and `Model API <../reference/api.html#module-qlib.model.base>`_.
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,8 +1,8 @@
 # Requirements

-Here is the minimal hardware requirements to run the example.
+Here is the minimal hardware requirements to run the `workflow_by_code` example.
 - Memory: 16G
- Free Dick: 5G
+- Free Disk: 5G


 # NOTE
--- a/examples/benchmarks/ALSTM/README.md
+++ b/examples/benchmarks/ALSTM/README.md
@@ -0,0 +1,8 @@
+# ALSTM
+
+- ALSTM contains a temporal attentive aggregation layer based on normal LSTM.
+
+- Paper: A dual-stage attention-based recurrent neural network for time series prediction.
+
+  [https://www.ijcai.org/Proceedings/2017/0366.pdf](https://www.ijcai.org/Proceedings/2017/0366.pdf)
+
--- a/examples/benchmarks/ALSTM/requirements.txt
+++ b/examples/benchmarks/ALSTM/requirements.txt
@@ -0,0 +1,4 @@
+numpy==1.17.4
+pandas==1.1.2
+scikit_learn==0.23.2
+torch==1.7.0
--- a/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha158.yaml
+++ b/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha158.yaml
@@ -0,0 +1,93 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: FilterCol
+          kwargs:
+              fields_group: feature
+              col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", 
+                            "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", 
+                            "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"
+                        ]
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"] 
+
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: ALSTM
+        module_path: qlib.contrib.model.pytorch_alstm_ts
+        kwargs:
+            d_feat: 20
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 10
+            batch_size: 800
+            metric: loss
+            loss: mse
+            n_jobs: 20
+            GPU: 0
+            rnn_type: GRU
+    dataset:
+        class: TSDatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+            step_len: 20
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha360.yaml
+++ b/examples/benchmarks/ALSTM/workflow_config_alstm_Alpha360.yaml
@@ -0,0 +1,83 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: ALSTM
+        module_path: qlib.contrib.model.pytorch_alstm
+        kwargs:
+            d_feat: 6
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 20
+            batch_size: 800
+            metric: loss
+            loss: mse
+            GPU: 0
+            rnn_type: GRU
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/CatBoost/README.md
+++ b/examples/benchmarks/CatBoost/README.md
@@ -0,0 +1,3 @@
+# CatBoost
+* Code: [https://github.com/catboost/catboost](https://github.com/catboost/catboost)
+* Paper: CatBoost: unbiased boosting with categorical features. [https://proceedings.neurips.cc/paper/2018/file/14491b756b3a51daac41c24863285549-Paper.pdf](https://proceedings.neurips.cc/paper/2018/file/14491b756b3a51daac41c24863285549-Paper.pdf).
--- a/examples/benchmarks/CatBoost/requirements.txt
+++ b/examples/benchmarks/CatBoost/requirements.txt
@@ -0,0 +1,3 @@
+pandas==1.1.2
+numpy==1.17.4
+catboost==0.24.3
--- a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158.yaml
+++ b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha158.yaml
@@ -0,0 +1,65 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: CatBoostModel
+        module_path: qlib.contrib.model.catboost_model
+        kwargs:
+            loss: RMSE
+            learning_rate: 0.0421
+            subsample: 0.8789
+            max_depth: 6
+            num_leaves: 100
+            thread_count: 20
+            grow_policy: Lossguide
+            bootstrap_type: Poisson
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360.yaml
+++ b/examples/benchmarks/CatBoost/workflow_config_catboost_Alpha360.yaml
@@ -0,0 +1,72 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors: []
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: CatBoostModel
+        module_path: qlib.contrib.model.catboost_model
+        kwargs:
+            loss: RMSE
+            learning_rate: 0.0421
+            subsample: 0.8789
+            max_depth: 6
+            num_leaves: 100
+            thread_count: 20
+            grow_policy: Lossguide
+            bootstrap_type: Poisson
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/GATs/README.md
+++ b/examples/benchmarks/GATs/README.md
@@ -0,0 +1,5 @@
+# GATs
+* Graph Attention Networks(GATs) leverage masked self-attentional layers on graph-structured data. The nodes in stacked layers have different weights and they are able to attend over their
+neighborhoods’ features, without requiring any kind of costly matrix operation (such as inversion) or depending on knowing the graph structure upfront.
+* This code used in Qlib is implemented with PyTorch by ourselves.
+* Paper: Graph Attention Networks https://arxiv.org/pdf/1710.10903.pdf
--- a/examples/benchmarks/GATs/requirements.txt
+++ b/examples/benchmarks/GATs/requirements.txt
@@ -0,0 +1,4 @@
+pandas==1.1.2
+numpy==1.17.4
+scikit_learn==0.23.2
+torch==1.7.0
--- a/examples/benchmarks/GATs/workflow_config_gats_Alpha158.yaml
+++ b/examples/benchmarks/GATs/workflow_config_gats_Alpha158.yaml
@@ -0,0 +1,92 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: FilterCol
+          kwargs:
+              fields_group: feature
+              col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", 
+                            "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", 
+                            "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"
+                        ]
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"] 
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: GATs
+        module_path: qlib.contrib.model.pytorch_gats_ts
+        kwargs:
+            d_feat: 20
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.7
+            n_epochs: 200
+            lr: 1e-4
+            early_stop: 10
+            metric: loss
+            loss: mse
+            base_model: LSTM
+            with_pretrain: True
+            model_path: "benchmarks/LSTM/csi300_lstm_ts.pkl"
+            GPU: 0
+    dataset:
+        class: TSDatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+            step_len: 20
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/GATs/workflow_config_gats_Alpha360.yaml
+++ b/examples/benchmarks/GATs/workflow_config_gats_Alpha360.yaml
@@ -0,0 +1,84 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: GATs
+        module_path: qlib.contrib.model.pytorch_gats
+        kwargs:
+            d_feat: 6
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.7
+            n_epochs: 200
+            lr: 1e-4
+            early_stop: 20
+            metric: loss
+            loss: mse
+            base_model: LSTM
+            with_pretrain: True
+            model_path: "benchmarks/LSTM/model_lstm_csi300.pkl"
+            GPU: 0
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/GRU/csi300_gru_ts.pkl
+++ b/examples/benchmarks/GRU/csi300_gru_ts.pkl
--- a/examples/benchmarks/GRU/model_gru_csi300.pkl
+++ b/examples/benchmarks/GRU/model_gru_csi300.pkl
--- a/examples/benchmarks/GRU/requirements.txt
+++ b/examples/benchmarks/GRU/requirements.txt
@@ -0,0 +1,4 @@
+numpy==1.17.4
+pandas==1.1.2
+scikit_learn==0.23.2
+torch==1.7.0
--- a/examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml
+++ b/examples/benchmarks/GRU/workflow_config_gru_Alpha158.yaml
@@ -0,0 +1,92 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: FilterCol
+          kwargs:
+              fields_group: feature
+              col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", 
+                            "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", 
+                            "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"
+                        ]
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"] 
+
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: GRU
+        module_path: qlib.contrib.model.pytorch_gru_ts
+        kwargs:
+            d_feat: 20
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 2e-4
+            early_stop: 10
+            batch_size: 800
+            metric: loss
+            loss: mse
+            n_jobs: 20
+            GPU: 0
+    dataset:
+        class: TSDatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+            step_len: 20
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/GRU/workflow_config_gru_Alpha360.yaml
+++ b/examples/benchmarks/GRU/workflow_config_gru_Alpha360.yaml
@@ -0,0 +1,82 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: GRU
+        module_path: qlib.contrib.model.pytorch_gru
+        kwargs:
+            d_feat: 6
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 20
+            batch_size: 800
+            metric: loss
+            loss: mse
+            GPU: 0
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/LSTM/csi300_lstm_ts.pkl
+++ b/examples/benchmarks/LSTM/csi300_lstm_ts.pkl
--- a/examples/benchmarks/LSTM/model_lstm_csi300.pkl
+++ b/examples/benchmarks/LSTM/model_lstm_csi300.pkl
--- a/examples/benchmarks/LSTM/requirements.txt
+++ b/examples/benchmarks/LSTM/requirements.txt
@@ -0,0 +1,4 @@
+numpy==1.17.4
+pandas==1.1.2
+scikit_learn==0.23.2
+torch==1.7.0
--- a/examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml
+++ b/examples/benchmarks/LSTM/workflow_config_lstm_Alpha158.yaml
@@ -0,0 +1,92 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: FilterCol
+          kwargs:
+              fields_group: feature
+              col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", 
+                            "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", 
+                            "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"
+                        ]
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"] 
+
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: LSTM
+        module_path: qlib.contrib.model.pytorch_lstm_ts
+        kwargs:
+            d_feat: 20
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 10
+            batch_size: 800
+            metric: loss
+            loss: mse
+            n_jobs: 20
+            GPU: 0
+    dataset:
+        class: TSDatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+            step_len: 20
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/LSTM/workflow_config_lstm_Alpha360.yaml
+++ b/examples/benchmarks/LSTM/workflow_config_lstm_Alpha360.yaml
@@ -0,0 +1,82 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: LSTM
+        module_path: qlib.contrib.model.pytorch_lstm
+        kwargs:
+            d_feat: 6
+            hidden_size: 64
+            num_layers: 2
+            dropout: 0.0
+            n_epochs: 200
+            lr: 1e-3
+            early_stop: 20
+            batch_size: 800
+            metric: loss
+            loss: mse
+            GPU: 0
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/LightGBM/README.md
+++ b/examples/benchmarks/LightGBM/README.md
@@ -0,0 +1,4 @@
+# LightGBM
+* Code: [https://github.com/microsoft/LightGBM](https://github.com/microsoft/LightGBM)
+* Paper: LightGBM: A Highly Efficient Gradient Boosting
+Decision Tree. [https://proceedings.neurips.cc/paper/2017/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf](https://proceedings.neurips.cc/paper/2017/file/6449f44a102fde848669bdd9eb6b76fa-Paper.pdf).
--- a/examples/benchmarks/LightGBM/requirements.txt
+++ b/examples/benchmarks/LightGBM/requirements.txt
@@ -0,0 +1,3 @@
+pandas==1.1.2
+numpy==1.17.4
+lightgbm==3.1.0
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
@@ -0,0 +1,66 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: LGBModel
+        module_path: qlib.contrib.model.gbdt
+        kwargs:
+            loss: mse
+            colsample_bytree: 0.8879
+            learning_rate: 0.2
+            subsample: 0.8789
+            lambda_l1: 205.6999
+            lambda_l2: 580.9768
+            max_depth: 8
+            num_leaves: 210
+            num_threads: 20
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha360.yaml
@@ -0,0 +1,73 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors: []
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: LGBModel
+        module_path: qlib.contrib.model.gbdt
+        kwargs:
+            loss: mse
+            colsample_bytree: 0.8879
+            learning_rate: 0.0421
+            subsample: 0.8789
+            lambda_l1: 205.6999
+            lambda_l2: 580.9768
+            max_depth: 8
+            num_leaves: 210
+            num_threads: 20
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/Linear/requirements.txt
+++ b/examples/benchmarks/Linear/requirements.txt
@@ -0,0 +1,3 @@
+numpy>=1.17.4
+pandas>=1.0.1
+scikit-learn>=0.23.1
--- a/examples/benchmarks/Linear/workflow_config_linear_Alpha158.yaml
+++ b/examples/benchmarks/Linear/workflow_config_linear_Alpha158.yaml
@@ -0,0 +1,72 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: LinearModel
+        module_path: qlib.contrib.model.linear
+        kwargs:
+            estimator: ols
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record:
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs:
+            ana_long_short: True
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs:
+            config: *port_analysis_config
--- a/examples/benchmarks/MLP/requirements.txt
+++ b/examples/benchmarks/MLP/requirements.txt
@@ -0,0 +1,4 @@
+pandas==1.1.2
+numpy==1.17.4
+scikit_learn==0.23.2
+torch==1.7.0
--- a/examples/benchmarks/MLP/workflow_config_mlp_Alpha158.yaml
+++ b/examples/benchmarks/MLP/workflow_config_mlp_Alpha158.yaml
@@ -0,0 +1,95 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors: [
+        {
+            "class" : "DropCol", 
+            "kwargs":{"col_list": ["VWAP0"]}
+        },
+        {
+             "class" : "CSZFillna", 
+             "kwargs":{"fields_group": "feature"}
+        }
+    ]
+    learn_processors: [
+        {
+            "class" : "DropCol", 
+            "kwargs":{"col_list": ["VWAP0"]}
+        },
+        {
+            "class" : "DropnaProcessor", 
+            "kwargs":{"fields_group": "feature"}
+        },
+        "DropnaLabel",
+        {
+            "class": "CSZScoreNorm", 
+            "kwargs": {"fields_group": "label"}
+        }
+    ]
+    process_type: "independent"
+
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: DNNModelPytorch
+        module_path: qlib.contrib.model.pytorch_nn
+        kwargs:
+            loss: mse
+            input_dim: 157
+            output_dim: 1
+            lr: 0.002
+            lr_decay: 0.96
+            lr_decay_steps: 100
+            optimizer: adam
+            max_steps: 8000
+            batch_size: 8192
+            GPU: 0
+            weight_decay: 0.0002
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/MLP/workflow_config_mlp_Alpha360.yaml
+++ b/examples/benchmarks/MLP/workflow_config_mlp_Alpha360.yaml
@@ -0,0 +1,82 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: DNNModelPytorch
+        module_path: qlib.contrib.model.pytorch_nn
+        kwargs:
+            loss: mse
+            input_dim: 360
+            output_dim: 1
+            lr: 0.002
+            lr_decay: 0.96
+            lr_decay_steps: 100
+            optimizer: adam
+            max_steps: 8000
+            batch_size: 4096
+            GPU: 0
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/README.md
+++ b/examples/benchmarks/README.md
@@ -0,0 +1,35 @@
+# Benchmarks Performance
+
+Here are the results of each benchmark model running on Qlib's `Alpha360` and `Alpha158` dataset with China's A shared-stock & CSI300 data respectively. The values of each metric are the mean and std calculated based on 20 runs.
+
+The numbers shown below demonstrate the performance of the entire `workflow` of each model. We will update the `workflow` as well as models in the near future for better results.
+
+## Alpha360 dataset
+| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
+|---|---|---|---|---|---|---|---|---|
+| Linear | Alpha360 | 0.0150±0.00 | 0.1049±0.00| 0.0284±0.00 | 0.1970±0.00 | -0.0659±0.00 | -0.7072±0.00| -0.2955±0.00 |
+| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha360 | 0.0397±0.00 | 0.2878±0.00| 0.0470±0.00 | 0.3703±0.00 | 0.0342±0.00 | 0.4092±0.00| -0.1057±0.00 |
+| XGBoost (Tianqi Chen, et al.) | Alpha360 | 0.0400±0.00 | 0.3031±0.00| 0.0461±0.00 | 0.3862±0.00 | 0.0528±0.00 | 0.6307±0.00| -0.1113±0.00 |
+| LightGBM (Guolin Ke, et al.) | Alpha360 | 0.0399±0.00 | 0.3075±0.00| 0.0492±0.00 | 0.4019±0.00 | 0.0323±0.00 | 0.4370±0.00| -0.0917±0.00 |
+| MLP | Alpha360 | 0.0285±0.00 | 0.1981±0.02| 0.0402±0.00 | 0.2993±0.02 | 0.0073±0.02 | 0.0880±0.22| -0.1446±0.03 |
+| GRU (Kyunghyun Cho, et al.) | Alpha360 | 0.0490±0.01 | 0.3787±0.05| 0.0581±0.00 | 0.4664±0.04 | 0.0726±0.02 | 0.9817±0.34| -0.0902±0.03 |
+| LSTM (Sepp Hochreiter, et al.) | Alpha360 | 0.0443±0.01 | 0.3401±0.05| 0.0536±0.01 | 0.4248±0.05 | 0.0627±0.03 | 0.8441±0.48| -0.0882±0.03 |
+| ALSTM (Yao Qin, et al.) | Alpha360 | 0.0493±0.01 | 0.3778±0.06| 0.0585±0.00 | 0.4606±0.04 | 0.0513±0.03 | 0.6727±0.38| -0.1085±0.02 |
+| GATs (Petar Velickovic, et al.) | Alpha360 | 0.0475±0.00 | 0.3515±0.02| 0.0592±0.00 | 0.4585±0.01 | 0.0876±0.02 | 1.1513±0.27| -0.0795±0.02 |
+
+## Alpha158 dataset
+| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
+|---|---|---|---|---|---|---|---|---|
+| Linear | Alpha158 | 0.0393±0.00 | 0.2980±0.00| 0.0475±0.00 | 0.3546±0.00 | 0.0795±0.00 | 1.0712±0.00| -0.1449±0.00 |
+| CatBoost (Liudmila Prokhorenkova, et al.) | Alpha158 | 0.0503±0.00 | 0.3586±0.00| 0.0483±0.00 | 0.3667±0.00 | 0.1080±0.00 | 1.1561±0.00| -0.0787±0.00 |
+| XGBoost (Tianqi Chen, et al.) | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 |
+| LightGBM (Guolin Ke, et al.) | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 |
+| MLP | Alpha158 | 0.0358±0.00 | 0.2738±0.03| 0.0425±0.00 | 0.3221±0.01 | 0.0836±0.02 | 1.0323±0.25| -0.1127±0.02 |
+| TabNet with pretrain (Sercan O. Arikm et al) | Alpha158 | 0.0344±0.00|0.205±0.11|0.0398±0.00 |0.3479±0.01|0.0827±0.02|1.1141±0.32 |-0.0925±0.02  |
+| TFT (Bryan Lim, et al.) | Alpha158 (with selected 20 features) | 0.0343±0.00 | 0.2071±0.02| 0.0107±0.00 | 0.0660±0.02 | 0.0623±0.02 | 0.5818±0.20| -0.1762±0.01 |
+| GRU (Kyunghyun Cho, et al.) | Alpha158 (with selected 20 features) | 0.0311±0.00 | 0.2418±0.04| 0.0425±0.00 | 0.3434±0.02 | 0.0330±0.02 | 0.4805±0.30| -0.1021±0.02 |
+| LSTM (Sepp Hochreiter, et al.) | Alpha158 (with selected 20 features) | 0.0312±0.00 | 0.2394±0.04| 0.0418±0.00 | 0.3324±0.03 | 0.0298±0.02 | 0.4198±0.33| -0.1348±0.03 |
+| ALSTM (Yao Qin, et al.) | Alpha158 (with selected 20 features) | 0.0385±0.01 | 0.3022±0.06| 0.0478±0.00 | 0.3874±0.04 | 0.0486±0.03 | 0.7141±0.45| -0.1088±0.03 |
+| GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2511±0.01| 0.0457±0.00 | 0.3537±0.01 | 0.0578±0.02 | 0.8221±0.25| -0.0824±0.02 |
+
+- The selected 20 features are based on the feature importance of a lightgbm-based model.
--- a/examples/benchmarks/SFM/README.md
+++ b/examples/benchmarks/SFM/README.md
@@ -0,0 +1,3 @@
+# State-Frequency-Memory
+- State Frequency Memory (SFM) is a novel recurrent network that uses Discrete Fourier Transform to decompose the hidden states of memory cells and capture the multi-frequency trading patterns from past market data to make stock price predictions. 
+- Paper: Stock Price Prediction via Discovering Multi-Frequency Trading Patterns. [http://www.eecs.ucf.edu/~gqi/publications/kdd2017_stock.pdf.](http://www.eecs.ucf.edu/~gqi/publications/kdd2017_stock.pdf)
--- a/examples/benchmarks/SFM/requirements.txt
+++ b/examples/benchmarks/SFM/requirements.txt
@@ -0,0 +1,4 @@
+pandas==1.1.2
+numpy==1.17.4
+scikit_learn==0.23.2
+torch==1.7.0
--- a/examples/benchmarks/SFM/workflow_config_sfm_Alpha360.yaml
+++ b/examples/benchmarks/SFM/workflow_config_sfm_Alpha360.yaml
@@ -0,0 +1,85 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: SFM
+        module_path: qlib.contrib.model.pytorch_sfm
+        kwargs:
+            d_feat: 6
+            hidden_size: 64
+            output_dim: 32
+            freq_dim: 25
+            dropout_W: 0.5
+            dropout_U: 0.5
+            n_epochs: 20
+            lr: 1e-3
+            batch_size: 1600
+            early_stop: 20
+            eval_steps: 5
+            loss: mse
+            optimizer: adam
+            GPU: 0
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/examples/benchmarks/TFT/README.md
+++ b/examples/benchmarks/TFT/README.md
@@ -0,0 +1,14 @@
+# Temporal Fusion Transformers Benchmark
+## Source
+**Reference**: Lim, Bryan, et al. "Temporal fusion transformers for interpretable multi-horizon time series forecasting." arXiv preprint arXiv:1912.09363 (2019).
+
+**GitHub**: https://github.com/google-research/google-research/tree/master/tft
+
+## Run the Workflow
+Users can follow the ``workflow_by_code_tft.py`` to run the benchmark. 
+
+### Notes
+1. Please be **aware** that this script can only support `Python 3.5 - 3.8`.
+2. If the CUDA version on your machine is not 10.0, please remember to run the following commands `conda install anaconda cudatoolkit=10.0` and `conda install cudnn` on your machine.
+3. The model must run in GPU, or an error will be raised.
+4. New datasets should be registered in ``data_formatters``, for detail please visit the source.
--- a/examples/benchmarks/TFT/data_formatters/init.py
+++ b/examples/benchmarks/TFT/data_formatters/init.py
@@ -0,0 +1,14 @@
+# coding=utf-8
+# Copyright 2020 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/examples/benchmarks/TFT/data_formatters/base.py
+++ b/examples/benchmarks/TFT/data_formatters/base.py
@@ -0,0 +1,223 @@
+# coding=utf-8
+# Copyright 2020 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Default data formatting functions for experiments.
+
+For new datasets, inherit form GenericDataFormatter and implement
+all abstract functions.
+
+These dataset-specific methods:
+1) Define the column and input types for tabular dataframes used by model
+2) Perform the necessary input feature engineering & normalisation steps
+3) Reverts the normalisation for predictions
+4) Are responsible for train, validation and test splits
+
+
+"""
+
+import abc
+import enum
+
+
+# Type defintions
+class DataTypes(enum.IntEnum):
+    """Defines numerical types of each column."""
+
+    REAL_VALUED = 0
+    CATEGORICAL = 1
+    DATE = 2
+
+
+class InputTypes(enum.IntEnum):
+    """Defines input types of each column."""
+
+    TARGET = 0
+    OBSERVED_INPUT = 1
+    KNOWN_INPUT = 2
+    STATIC_INPUT = 3
+    ID = 4  # Single column used as an entity identifier
+    TIME = 5  # Single column exclusively used as a time index
+
+
+class GenericDataFormatter(abc.ABC):
+    """Abstract base class for all data formatters.
+
+    User can implement the abstract methods below to perform dataset-specific
+    manipulations.
+
+    """
+
+    @abc.abstractmethod
+    def set_scalers(self, df):
+        """Calibrates scalers using the data supplied."""
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def transform_inputs(self, df):
+        """Performs feature transformation."""
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def format_predictions(self, df):
+        """Reverts any normalisation to give predictions in original scale."""
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def split_data(self, df):
+        """Performs the default train, validation and test splits."""
+        raise NotImplementedError()
+
+    @property
+    @abc.abstractmethod
+    def _column_definition(self):
+        """Defines order, input type and data type of each column."""
+        raise NotImplementedError()
+
+    @abc.abstractmethod
+    def get_fixed_params(self):
+        """Defines the fixed parameters used by the model for training.
+
+        Requires the following keys:
+          'total_time_steps': Defines the total number of time steps used by TFT
+          'num_encoder_steps': Determines length of LSTM encoder (i.e. history)
+          'num_epochs': Maximum number of epochs for training
+          'early_stopping_patience': Early stopping param for keras
+          'multiprocessing_workers': # of cpus for data processing
+
+
+        Returns:
+          A dictionary of fixed parameters, e.g.:
+
+          fixed_params = {
+              'total_time_steps': 252 + 5,
+              'num_encoder_steps': 252,
+              'num_epochs': 100,
+              'early_stopping_patience': 5,
+              'multiprocessing_workers': 5,
+          }
+        """
+        raise NotImplementedError
+
+    # Shared functions across data-formatters
+    @property
+    def num_classes_per_cat_input(self):
+        """Returns number of categories per relevant input.
+
+        This is seqeuently required for keras embedding layers.
+        """
+        return self._num_classes_per_cat_input
+
+    def get_num_samples_for_calibration(self):
+        """Gets the default number of training and validation samples.
+
+        Use to sub-sample the data for network calibration and a value of -1 uses
+        all available samples.
+
+        Returns:
+          Tuple of (training samples, validation samples)
+        """
+        return -1, -1
+
+    def get_column_definition(self):
+        """"Returns formatted column definition in order expected by the TFT."""
+
+        column_definition = self._column_definition
+
+        # Sanity checks first.
+        # Ensure only one ID and time column exist
+        def _check_single_column(input_type):
+
+            length = len([tup for tup in column_definition if tup[2] == input_type])
+
+            if length != 1:
+                raise ValueError("Illegal number of inputs ({}) of type {}".format(length, input_type))
+
+        _check_single_column(InputTypes.ID)
+        _check_single_column(InputTypes.TIME)
+
+        identifier = [tup for tup in column_definition if tup[2] == InputTypes.ID]
+        time = [tup for tup in column_definition if tup[2] == InputTypes.TIME]
+        real_inputs = [
+            tup
+            for tup in column_definition
+            if tup[1] == DataTypes.REAL_VALUED and tup[2] not in {InputTypes.ID, InputTypes.TIME}
+        ]
+        categorical_inputs = [
+            tup
+            for tup in column_definition
+            if tup[1] == DataTypes.CATEGORICAL and tup[2] not in {InputTypes.ID, InputTypes.TIME}
+        ]
+
+        return identifier + time + real_inputs + categorical_inputs
+
+    def _get_input_columns(self):
+        """Returns names of all input columns."""
+        return [tup[0] for tup in self.get_column_definition() if tup[2] not in {InputTypes.ID, InputTypes.TIME}]
+
+    def _get_tft_input_indices(self):
+        """Returns the relevant indexes and input sizes required by TFT."""
+
+        # Functions
+        def _extract_tuples_from_data_type(data_type, defn):
+            return [tup for tup in defn if tup[1] == data_type and tup[2] not in {InputTypes.ID, InputTypes.TIME}]
+
+        def _get_locations(input_types, defn):
+            return [i for i, tup in enumerate(defn) if tup[2] in input_types]
+
+        # Start extraction
+        column_definition = [
+            tup for tup in self.get_column_definition() if tup[2] not in {InputTypes.ID, InputTypes.TIME}
+        ]
+
+        categorical_inputs = _extract_tuples_from_data_type(DataTypes.CATEGORICAL, column_definition)
+        real_inputs = _extract_tuples_from_data_type(DataTypes.REAL_VALUED, column_definition)
+
+        locations = {
+            "input_size": len(self._get_input_columns()),
+            "output_size": len(_get_locations({InputTypes.TARGET}, column_definition)),
+            "category_counts": self.num_classes_per_cat_input,
+            "input_obs_loc": _get_locations({InputTypes.TARGET}, column_definition),
+            "static_input_loc": _get_locations({InputTypes.STATIC_INPUT}, column_definition),
+            "known_regular_inputs": _get_locations({InputTypes.STATIC_INPUT, InputTypes.KNOWN_INPUT}, real_inputs),
+            "known_categorical_inputs": _get_locations(
+                {InputTypes.STATIC_INPUT, InputTypes.KNOWN_INPUT}, categorical_inputs
+            ),
+        }
+
+        return locations
+
+    def get_experiment_params(self):
+        """Returns fixed model parameters for experiments."""
+
+        required_keys = [
+            "total_time_steps",
+            "num_encoder_steps",
+            "num_epochs",
+            "early_stopping_patience",
+            "multiprocessing_workers",
+        ]
+
+        fixed_params = self.get_fixed_params()
+
+        for k in required_keys:
+            if k not in fixed_params:
+                raise ValueError("Field {}".format(k) + " missing from fixed parameter definitions!")
+
+        fixed_params["column_definition"] = self.get_column_definition()
+
+        fixed_params.update(self._get_tft_input_indices())
+
+        return fixed_params
--- a/examples/benchmarks/TFT/data_formatters/qlib_Alpha158.py
+++ b/examples/benchmarks/TFT/data_formatters/qlib_Alpha158.py
@@ -0,0 +1,229 @@
+# coding=utf-8
+# Copyright 2020 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Custom formatting functions for Alpha158 dataset.
+
+Defines dataset specific column definitions and data transformations.
+"""
+
+import data_formatters.base
+import libs.utils as utils
+import sklearn.preprocessing
+
+GenericDataFormatter = data_formatters.base.GenericDataFormatter
+DataTypes = data_formatters.base.DataTypes
+InputTypes = data_formatters.base.InputTypes
+
+
+class Alpha158Formatter(GenericDataFormatter):
+    """Defines and formats data for the Alpha158 dataset.
+
+    Attributes:
+      column_definition: Defines input and data type of column used in the
+        experiment.
+      identifiers: Entity identifiers used in experiments.
+    """
+
+    _column_definition = [
+        ("instrument", DataTypes.CATEGORICAL, InputTypes.ID),
+        ("LABEL0", DataTypes.REAL_VALUED, InputTypes.TARGET),
+        ("date", DataTypes.DATE, InputTypes.TIME),
+        ("month", DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
+        ("day_of_week", DataTypes.CATEGORICAL, InputTypes.KNOWN_INPUT),
+        # Selected features
+        ("RESI5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("WVMA5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("RSQR5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("KLEN", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("RSQR10", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("CORR5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("CORD5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("CORR10", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("ROC60", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("RESI10", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("VSTD5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("RSQR60", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("CORR60", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("WVMA60", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("STD5", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("RSQR20", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("CORD60", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("CORD10", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("CORR20", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("KLOW", DataTypes.REAL_VALUED, InputTypes.OBSERVED_INPUT),
+        ("const", DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
+    ]
+
+    def __init__(self):
+        """Initialises formatter."""
+
+        self.identifiers = None
+        self._real_scalers = None
+        self._cat_scalers = None
+        self._target_scaler = None
+        self._num_classes_per_cat_input = None
+
+    def split_data(self, df, valid_boundary=2016, test_boundary=2018):
+        """Splits data frame into training-validation-test data frames.
+
+        This also calibrates scaling object, and transforms data for each split.
+
+        Args:
+          df: Source data frame to split.
+          valid_boundary: Starting year for validation data
+          test_boundary: Starting year for test data
+
+        Returns:
+          Tuple of transformed (train, valid, test) data.
+        """
+
+        print("Formatting train-valid-test splits.")
+
+        index = df["year"]
+        train = df.loc[index < valid_boundary]
+        valid = df.loc[(index >= valid_boundary) & (index < test_boundary)]
+        test = df.loc[index >= test_boundary]
+
+        self.set_scalers(train)
+
+        return (self.transform_inputs(data) for data in [train, valid, test])
+
+    def set_scalers(self, df):
+        """Calibrates scalers using the data supplied.
+
+        Args:
+          df: Data to use to calibrate scalers.
+        """
+        print("Setting scalers with training data...")
+
+        column_definitions = self.get_column_definition()
+        id_column = utils.get_single_col_by_input_type(InputTypes.ID, column_definitions)
+        target_column = utils.get_single_col_by_input_type(InputTypes.TARGET, column_definitions)
+
+        # Extract identifiers in case required
+        self.identifiers = list(df[id_column].unique())
+
+        # Format real scalers
+        real_inputs = utils.extract_cols_from_data_type(
+            DataTypes.REAL_VALUED, column_definitions, {InputTypes.ID, InputTypes.TIME}
+        )
+
+        data = df[real_inputs].values
+        self._real_scalers = sklearn.preprocessing.StandardScaler().fit(data)
+        self._target_scaler = sklearn.preprocessing.StandardScaler().fit(
+            df[[target_column]].values
+        )  # used for predictions
+
+        # Format categorical scalers
+        categorical_inputs = utils.extract_cols_from_data_type(
+            DataTypes.CATEGORICAL, column_definitions, {InputTypes.ID, InputTypes.TIME}
+        )
+
+        categorical_scalers = {}
+        num_classes = []
+        for col in categorical_inputs:
+            # Set all to str so that we don't have mixed integer/string columns
+            srs = df[col].apply(str)
+            categorical_scalers[col] = sklearn.preprocessing.LabelEncoder().fit(srs.values)
+            num_classes.append(srs.nunique())
+
+        # Set categorical scaler outputs
+        self._cat_scalers = categorical_scalers
+        self._num_classes_per_cat_input = num_classes
+
+    def transform_inputs(self, df):
+        """Performs feature transformations.
+
+        This includes both feature engineering, preprocessing and normalisation.
+
+        Args:
+          df: Data frame to transform.
+
+        Returns:
+          Transformed data frame.
+
+        """
+        output = df.copy()
+
+        if self._real_scalers is None and self._cat_scalers is None:
+            raise ValueError("Scalers have not been set!")
+
+        column_definitions = self.get_column_definition()
+
+        real_inputs = utils.extract_cols_from_data_type(
+            DataTypes.REAL_VALUED, column_definitions, {InputTypes.ID, InputTypes.TIME}
+        )
+        categorical_inputs = utils.extract_cols_from_data_type(
+            DataTypes.CATEGORICAL, column_definitions, {InputTypes.ID, InputTypes.TIME}
+        )
+
+        # Format real inputs
+        output[real_inputs] = self._real_scalers.transform(df[real_inputs].values)
+
+        # Format categorical inputs
+        for col in categorical_inputs:
+            string_df = df[col].apply(str)
+            output[col] = self._cat_scalers[col].transform(string_df)
+
+        return output
+
+    def format_predictions(self, predictions):
+        """Reverts any normalisation to give predictions in original scale.
+
+        Args:
+          predictions: Dataframe of model predictions.
+
+        Returns:
+          Data frame of unnormalised predictions.
+        """
+        output = predictions.copy()
+
+        column_names = predictions.columns
+
+        for col in column_names:
+            if col not in {"forecast_time", "identifier"}:
+                output[col] = self._target_scaler.inverse_transform(predictions[col])
+
+        return output
+
+    # Default params
+    def get_fixed_params(self):
+        """Returns fixed model parameters for experiments."""
+
+        fixed_params = {
+            "total_time_steps": 6 + 6,
+            "num_encoder_steps": 6,
+            "num_epochs": 100,
+            "early_stopping_patience": 10,
+            "multiprocessing_workers": 5,
+        }
+
+        return fixed_params
+
+    def get_default_model_params(self):
+        """Returns default optimised model parameters."""
+
+        model_params = {
+            "dropout_rate": 0.4,
+            "hidden_layer_size": 160,
+            "learning_rate": 0.0001,
+            "minibatch_size": 128,
+            "max_gradient_norm": 0.0135,
+            "num_heads": 1,
+            "stack_size": 1,
+        }
+
+        return model_params
--- a/examples/benchmarks/TFT/expt_settings/init.py
+++ b/examples/benchmarks/TFT/expt_settings/init.py
@@ -0,0 +1,14 @@
+# coding=utf-8
+# Copyright 2020 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/examples/benchmarks/TFT/expt_settings/configs.py
+++ b/examples/benchmarks/TFT/expt_settings/configs.py
@@ -0,0 +1,95 @@
+# coding=utf-8
+# Copyright 2020 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Default configs for TFT experiments.
+
+Contains the default output paths for data, serialised models and predictions
+for the main experiments used in the publication.
+"""
+
+import os
+
+import data_formatters.qlib_Alpha158
+
+
+class ExperimentConfig:
+    """Defines experiment configs and paths to outputs.
+
+    Attributes:
+      root_folder: Root folder to contain all experimental outputs.
+      experiment: Name of experiment to run.
+      data_folder: Folder to store data for experiment.
+      model_folder: Folder to store serialised models.
+      results_folder: Folder to store results.
+      data_csv_path: Path to primary data csv file used in experiment.
+      hyperparam_iterations: Default number of random search iterations for
+        experiment.
+    """
+
+    default_experiments = ["Alpha158"]
+
+    def __init__(self, experiment="volatility", root_folder=None):
+        """Creates configs based on default experiment chosen.
+
+        Args:
+          experiment: Name of experiment.
+          root_folder: Root folder to save all outputs of training.
+        """
+
+        if experiment not in self.default_experiments:
+            raise ValueError("Unrecognised experiment={}".format(experiment))
+
+        # Defines all relevant paths
+        if root_folder is None:
+            root_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "outputs")
+            print("Using root folder {}".format(root_folder))
+
+        self.root_folder = root_folder
+        self.experiment = experiment
+        self.data_folder = os.path.join(root_folder, "data", experiment)
+        self.model_folder = os.path.join(root_folder, "saved_models", experiment)
+        self.results_folder = os.path.join(root_folder, "results", experiment)
+
+        # Creates folders if they don't exist
+        for relevant_directory in [self.root_folder, self.data_folder, self.model_folder, self.results_folder]:
+            if not os.path.exists(relevant_directory):
+                os.makedirs(relevant_directory)
+
+    @property
+    def data_csv_path(self):
+        csv_map = {
+            "Alpha158": "Alpha158.csv",
+        }
+
+        return os.path.join(self.data_folder, csv_map[self.experiment])
+
+    @property
+    def hyperparam_iterations(self):
+
+        return 240 if self.experiment == "volatility" else 60
+
+    def make_data_formatter(self):
+        """Gets a data formatter object for experiment.
+
+        Returns:
+          Default DataFormatter per experiment.
+        """
+
+        data_formatter_class = {
+            "Alpha158": data_formatters.qlib_Alpha158.Alpha158Formatter,
+        }
+
+        return data_formatter_class[self.experiment]()
--- a/examples/benchmarks/TFT/libs/init.py
+++ b/examples/benchmarks/TFT/libs/init.py
@@ -0,0 +1,14 @@
+# coding=utf-8
+# Copyright 2020 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/examples/benchmarks/TFT/libs/hyperparam_opt.py
+++ b/examples/benchmarks/TFT/libs/hyperparam_opt.py
@@ -0,0 +1,430 @@
+# coding=utf-8
+# Copyright 2020 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Classes used for hyperparameter optimisation.
+
+Two main classes exist:
+1) HyperparamOptManager used for optimisation on a single machine/GPU.
+2) DistributedHyperparamOptManager for multiple GPUs on different machines.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import os
+import shutil
+import libs.utils as utils
+import numpy as np
+import pandas as pd
+
+Deque = collections.deque
+
+
+class HyperparamOptManager:
+    """Manages hyperparameter optimisation using random search for a single GPU.
+
+    Attributes:
+      param_ranges: Discrete hyperparameter range for random search.
+      results: Dataframe of validation results.
+      fixed_params: Fixed model parameters per experiment.
+      saved_params: Dataframe of parameters trained.
+      best_score: Minimum validation loss observed thus far.
+      optimal_name: Key to best configuration.
+      hyperparam_folder: Where to save optimisation outputs.
+    """
+
+    def __init__(self, param_ranges, fixed_params, model_folder, override_w_fixed_params=True):
+        """Instantiates model.
+
+        Args:
+          param_ranges: Discrete hyperparameter range for random search.
+          fixed_params: Fixed model parameters per experiment.
+          model_folder: Folder to store optimisation artifacts.
+          override_w_fixed_params: Whether to override serialsed fixed model
+            parameters with new supplied values.
+        """
+
+        self.param_ranges = param_ranges
+
+        self._max_tries = 1000
+        self.results = pd.DataFrame()
+        self.fixed_params = fixed_params
+        self.saved_params = pd.DataFrame()
+
+        self.best_score = np.Inf
+        self.optimal_name = ""
+
+        # Setup
+        # Create folder for saving if its not there
+        self.hyperparam_folder = model_folder
+        utils.create_folder_if_not_exist(self.hyperparam_folder)
+
+        self._override_w_fixed_params = override_w_fixed_params
+
+    def load_results(self):
+        """Loads results from previous hyperparameter optimisation.
+
+        Returns:
+          A boolean indicating if previous results can be loaded.
+        """
+        print("Loading results from", self.hyperparam_folder)
+
+        results_file = os.path.join(self.hyperparam_folder, "results.csv")
+        params_file = os.path.join(self.hyperparam_folder, "params.csv")
+
+        if os.path.exists(results_file) and os.path.exists(params_file):
+
+            self.results = pd.read_csv(results_file, index_col=0)
+            self.saved_params = pd.read_csv(params_file, index_col=0)
+
+            if not self.results.empty:
+                self.results.at["loss"] = self.results.loc["loss"].apply(float)
+                self.best_score = self.results.loc["loss"].min()
+
+                is_optimal = self.results.loc["loss"] == self.best_score
+                self.optimal_name = self.results.T[is_optimal].index[0]
+
+                return True
+
+        return False
+
+    def _get_params_from_name(self, name):
+        """Returns previously saved parameters given a key."""
+        params = self.saved_params
+
+        selected_params = dict(params[name])
+
+        if self._override_w_fixed_params:
+            for k in self.fixed_params:
+                selected_params[k] = self.fixed_params[k]
+
+        return selected_params
+
+    def get_best_params(self):
+        """Returns the optimal hyperparameters thus far."""
+
+        optimal_name = self.optimal_name
+
+        return self._get_params_from_name(optimal_name)
+
+    def clear(self):
+        """Clears all previous results and saved parameters."""
+        shutil.rmtree(self.hyperparam_folder)
+        os.makedirs(self.hyperparam_folder)
+        self.results = pd.DataFrame()
+        self.saved_params = pd.DataFrame()
+
+    def _check_params(self, params):
+        """Checks that parameter map is properly defined."""
+
+        valid_fields = list(self.param_ranges.keys()) + list(self.fixed_params.keys())
+        invalid_fields = [k for k in params if k not in valid_fields]
+        missing_fields = [k for k in valid_fields if k not in params]
+
+        if invalid_fields:
+            raise ValueError("Invalid Fields Found {} - Valid ones are {}".format(invalid_fields, valid_fields))
+        if missing_fields:
+            raise ValueError("Missing Fields Found {} - Valid ones are {}".format(missing_fields, valid_fields))
+
+    def _get_name(self, params):
+        """Returns a unique key for the supplied set of params."""
+
+        self._check_params(params)
+
+        fields = list(params.keys())
+        fields.sort()
+
+        return "_".join([str(params[k]) for k in fields])
+
+    def get_next_parameters(self, ranges_to_skip=None):
+        """Returns the next set of parameters to optimise.
+
+        Args:
+          ranges_to_skip: Explicitly defines a set of keys to skip.
+        """
+        if ranges_to_skip is None:
+            ranges_to_skip = set(self.results.index)
+
+        if not isinstance(self.param_ranges, dict):
+            raise ValueError("Only works for random search!")
+
+        param_range_keys = list(self.param_ranges.keys())
+        param_range_keys.sort()
+
+        def _get_next():
+            """Returns next hyperparameter set per try."""
+
+            parameters = {k: np.random.choice(self.param_ranges[k]) for k in param_range_keys}
+
+            # Adds fixed params
+            for k in self.fixed_params:
+                parameters[k] = self.fixed_params[k]
+
+            return parameters
+
+        for _ in range(self._max_tries):
+
+            parameters = _get_next()
+            name = self._get_name(parameters)
+
+            if name not in ranges_to_skip:
+                return parameters
+
+        raise ValueError("Exceeded max number of hyperparameter searches!!")
+
+    def update_score(self, parameters, loss, model, info=""):
+        """Updates the results from last optimisation run.
+
+        Args:
+          parameters: Hyperparameters used in optimisation.
+          loss: Validation loss obtained.
+          model: Model to serialised if required.
+          info: Any ancillary information to tag on to results.
+
+        Returns:
+          Boolean flag indicating if the model is the best seen so far.
+        """
+
+        if np.isnan(loss):
+            loss = np.Inf
+
+        if not os.path.isdir(self.hyperparam_folder):
+            os.makedirs(self.hyperparam_folder)
+
+        name = self._get_name(parameters)
+
+        is_optimal = self.results.empty or loss < self.best_score
+
+        # save the first model
+        if is_optimal:
+            # Try saving first, before updating info
+            if model is not None:
+                print("Optimal model found, updating")
+                model.save(self.hyperparam_folder)
+            self.best_score = loss
+            self.optimal_name = name
+
+        self.results[name] = pd.Series({"loss": loss, "info": info})
+        self.saved_params[name] = pd.Series(parameters)
+
+        self.results.to_csv(os.path.join(self.hyperparam_folder, "results.csv"))
+        self.saved_params.to_csv(os.path.join(self.hyperparam_folder, "params.csv"))
+
+        return is_optimal
+
+
+class DistributedHyperparamOptManager(HyperparamOptManager):
+    """Manages distributed hyperparameter optimisation across many gpus."""
+
+    def __init__(
+        self,
+        param_ranges,
+        fixed_params,
+        root_model_folder,
+        worker_number,
+        search_iterations=1000,
+        num_iterations_per_worker=5,
+        clear_serialised_params=False,
+    ):
+        """Instantiates optimisation manager.
+
+        This hyperparameter optimisation pre-generates #search_iterations
+        hyperparameter combinations and serialises them
+        at the start. At runtime, each worker goes through their own set of
+        parameter ranges. The pregeneration
+        allows for multiple workers to run in parallel on different machines without
+        resulting in parameter overlaps.
+
+        Args:
+          param_ranges: Discrete hyperparameter range for random search.
+          fixed_params: Fixed model parameters per experiment.
+          root_model_folder: Folder to store optimisation artifacts.
+          worker_number: Worker index definining which set of hyperparameters to
+            test.
+          search_iterations: Maximum numer of random search iterations.
+          num_iterations_per_worker: How many iterations are handled per worker.
+          clear_serialised_params: Whether to regenerate hyperparameter
+            combinations.
+        """
+
+        max_workers = int(np.ceil(search_iterations / num_iterations_per_worker))
+
+        # Sanity checks
+        if worker_number > max_workers:
+            raise ValueError(
+                "Worker number ({}) cannot be larger than the total number of workers!".format(max_workers)
+            )
+        if worker_number > search_iterations:
+            raise ValueError(
+                "Worker number ({}) cannot be larger than the max search iterations ({})!".format(
+                    worker_number, search_iterations
+                )
+            )
+
+        print("*** Creating hyperparameter manager for worker {} ***".format(worker_number))
+
+        hyperparam_folder = os.path.join(root_model_folder, str(worker_number))
+        super().__init__(param_ranges, fixed_params, hyperparam_folder, override_w_fixed_params=True)
+
+        serialised_ranges_folder = os.path.join(root_model_folder, "hyperparams")
+        if clear_serialised_params:
+            print("Regenerating hyperparameter list")
+            if os.path.exists(serialised_ranges_folder):
+                shutil.rmtree(serialised_ranges_folder)
+
+        utils.create_folder_if_not_exist(serialised_ranges_folder)
+
+        self.serialised_ranges_path = os.path.join(serialised_ranges_folder, "ranges_{}.csv".format(search_iterations))
+        self.hyperparam_folder = hyperparam_folder  # override
+        self.worker_num = worker_number
+        self.total_search_iterations = search_iterations
+        self.num_iterations_per_worker = num_iterations_per_worker
+        self.global_hyperparam_df = self.load_serialised_hyperparam_df()
+        self.worker_search_queue = self._get_worker_search_queue()
+
+    @property
+    def optimisation_completed(self):
+        return False if self.worker_search_queue else True
+
+    def get_next_parameters(self):
+        """Returns next dictionary of hyperparameters to optimise."""
+        param_name = self.worker_search_queue.pop()
+
+        params = self.global_hyperparam_df.loc[param_name, :].to_dict()
+
+        # Always override!
+        for k in self.fixed_params:
+            print("Overriding saved {}: {}".format(k, self.fixed_params[k]))
+
+            params[k] = self.fixed_params[k]
+
+        return params
+
+    def load_serialised_hyperparam_df(self):
+        """Loads serialsed hyperparameter ranges from file.
+
+        Returns:
+          DataFrame containing hyperparameter combinations.
+        """
+        print(
+            "Loading params for {} search iterations form {}".format(
+                self.total_search_iterations, self.serialised_ranges_path
+            )
+        )
+
+        if os.path.exists(self.serialised_ranges_folder):
+            df = pd.read_csv(self.serialised_ranges_path, index_col=0)
+        else:
+            print("Unable to load - regenerating serach ranges instead")
+            df = self.update_serialised_hyperparam_df()
+
+        return df
+
+    def update_serialised_hyperparam_df(self):
+        """Regenerates hyperparameter combinations and saves to file.
+
+        Returns:
+          DataFrame containing hyperparameter combinations.
+        """
+        search_df = self._generate_full_hyperparam_df()
+
+        print(
+            "Serialising params for {} search iterations to {}".format(
+                self.total_search_iterations, self.serialised_ranges_path
+            )
+        )
+
+        search_df.to_csv(self.serialised_ranges_path)
+
+        return search_df
+
+    def _generate_full_hyperparam_df(self):
+        """Generates actual hyperparameter combinations.
+
+        Returns:
+          DataFrame containing hyperparameter combinations.
+        """
+
+        np.random.seed(131)  # for reproducibility of hyperparam list
+
+        name_list = []
+        param_list = []
+        for _ in range(self.total_search_iterations):
+            params = super().get_next_parameters(name_list)
+
+            name = self._get_name(params)
+
+            name_list.append(name)
+            param_list.append(params)
+
+        full_search_df = pd.DataFrame(param_list, index=name_list)
+
+        return full_search_df
+
+    def clear(self):  # reset when cleared
+        """Clears results for hyperparameter manager and resets."""
+        super().clear()
+        self.worker_search_queue = self._get_worker_search_queue()
+
+    def load_results(self):
+        """Load results from file and queue parameter combinations to try.
+
+        Returns:
+          Boolean indicating if results were successfully loaded.
+        """
+        success = super().load_results()
+
+        if success:
+            self.worker_search_queue = self._get_worker_search_queue()
+
+        return success
+
+    def _get_worker_search_queue(self):
+        """Generates the queue of param combinations for current worker.
+
+        Returns:
+          Queue of hyperparameter combinations outstanding.
+        """
+        global_df = self.assign_worker_numbers(self.global_hyperparam_df)
+        worker_df = global_df[global_df["worker"] == self.worker_num]
+
+        left_overs = [s for s in worker_df.index if s not in self.results.columns]
+
+        return Deque(left_overs)
+
+    def assign_worker_numbers(self, df):
+        """Updates parameter combinations with the index of the worker used.
+
+        Args:
+          df: DataFrame of parameter combinations.
+
+        Returns:
+          Updated DataFrame with worker number.
+        """
+        output = df.copy()
+
+        n = self.total_search_iterations
+        batch_size = self.num_iterations_per_worker
+
+        max_worker_num = int(np.ceil(n / batch_size))
+
+        worker_idx = np.concatenate([np.tile(i + 1, self.num_iterations_per_worker) for i in range(max_worker_num)])
+
+        output["worker"] = worker_idx[: len(output)]
+
+        return output
--- a/examples/benchmarks/TFT/libs/tft_model.py
+++ b/examples/benchmarks/TFT/libs/tft_model.py
--- a/examples/benchmarks/TFT/libs/utils.py
+++ b/examples/benchmarks/TFT/libs/utils.py
@@ -0,0 +1,224 @@
+# coding=utf-8
+# Copyright 2020 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Generic helper functions used across codebase."""
+
+import os
+import pathlib
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.tools.inspect_checkpoint import print_tensors_in_checkpoint_file
+
+
+# Generic.
+def get_single_col_by_input_type(input_type, column_definition):
+    """Returns name of single column.
+
+    Args:
+      input_type: Input type of column to extract
+      column_definition: Column definition list for experiment
+    """
+
+    l = [tup[0] for tup in column_definition if tup[2] == input_type]
+
+    if len(l) != 1:
+        raise ValueError("Invalid number of columns for {}".format(input_type))
+
+    return l[0]
+
+
+def extract_cols_from_data_type(data_type, column_definition, excluded_input_types):
+    """Extracts the names of columns that correspond to a define data_type.
+
+    Args:
+      data_type: DataType of columns to extract.
+      column_definition: Column definition to use.
+      excluded_input_types: Set of input types to exclude
+
+    Returns:
+      List of names for columns with data type specified.
+    """
+    return [tup[0] for tup in column_definition if tup[1] == data_type and tup[2] not in excluded_input_types]
+
+
+# Loss functions.
+def tensorflow_quantile_loss(y, y_pred, quantile):
+    """Computes quantile loss for tensorflow.
+
+    Standard quantile loss as defined in the "Training Procedure" section of
+    the main TFT paper
+
+    Args:
+      y: Targets
+      y_pred: Predictions
+      quantile: Quantile to use for loss calculations (between 0 & 1)
+
+    Returns:
+      Tensor for quantile loss.
+    """
+
+    # Checks quantile
+    if quantile < 0 or quantile > 1:
+        raise ValueError("Illegal quantile value={}! Values should be between 0 and 1.".format(quantile))
+
+    prediction_underflow = y - y_pred
+    q_loss = quantile * tf.maximum(prediction_underflow, 0.0) + (1.0 - quantile) * tf.maximum(
+        -prediction_underflow, 0.0
+    )
+
+    return tf.reduce_sum(q_loss, axis=-1)
+
+
+def numpy_normalised_quantile_loss(y, y_pred, quantile):
+    """Computes normalised quantile loss for numpy arrays.
+
+    Uses the q-Risk metric as defined in the "Training Procedure" section of the
+    main TFT paper.
+
+    Args:
+      y: Targets
+      y_pred: Predictions
+      quantile: Quantile to use for loss calculations (between 0 & 1)
+
+    Returns:
+      Float for normalised quantile loss.
+    """
+    prediction_underflow = y - y_pred
+    weighted_errors = quantile * np.maximum(prediction_underflow, 0.0) + (1.0 - quantile) * np.maximum(
+        -prediction_underflow, 0.0
+    )
+
+    quantile_loss = weighted_errors.mean()
+    normaliser = y.abs().mean()
+
+    return 2 * quantile_loss / normaliser
+
+
+# OS related functions.
+def create_folder_if_not_exist(directory):
+    """Creates folder if it doesn't exist.
+
+    Args:
+      directory: Folder path to create.
+    """
+    # Also creates directories recursively
+    pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
+
+
+# Tensorflow related functions.
+def get_default_tensorflow_config(tf_device="gpu", gpu_id=0):
+    """Creates tensorflow config for graphs to run on CPU or GPU.
+
+    Specifies whether to run graph on gpu or cpu and which GPU ID to use for multi
+    GPU machines.
+
+    Args:
+      tf_device: 'cpu' or 'gpu'
+      gpu_id: GPU ID to use if relevant
+
+    Returns:
+      Tensorflow config.
+    """
+
+    if tf_device == "cpu":
+        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # for training on cpu
+        tf_config = tf.ConfigProto(log_device_placement=False, device_count={"GPU": 0})
+
+    else:
+        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
+        os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
+
+        print("Selecting GPU ID={}".format(gpu_id))
+
+        tf_config = tf.ConfigProto(log_device_placement=False)
+        tf_config.gpu_options.allow_growth = True
+
+    return tf_config
+
+
+def save(tf_session, model_folder, cp_name, scope=None):
+    """Saves Tensorflow graph to checkpoint.
+
+    Saves all trainiable variables under a given variable scope to checkpoint.
+
+    Args:
+      tf_session: Session containing graph
+      model_folder: Folder to save models
+      cp_name: Name of Tensorflow checkpoint
+      scope: Variable scope containing variables to save
+    """
+    # Save model
+    if scope is None:
+        saver = tf.train.Saver()
+    else:
+        var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
+        saver = tf.train.Saver(var_list=var_list, max_to_keep=100000)
+
+    save_path = saver.save(tf_session, os.path.join(model_folder, "{0}.ckpt".format(cp_name)))
+    print("Model saved to: {0}".format(save_path))
+
+
+def load(tf_session, model_folder, cp_name, scope=None, verbose=False):
+    """Loads Tensorflow graph from checkpoint.
+
+    Args:
+      tf_session: Session to load graph into
+      model_folder: Folder containing serialised model
+      cp_name: Name of Tensorflow checkpoint
+      scope: Variable scope to use.
+      verbose: Whether to print additional debugging information.
+    """
+    # Load model proper
+    load_path = os.path.join(model_folder, "{0}.ckpt".format(cp_name))
+
+    print("Loading model from {0}".format(load_path))
+
+    print_weights_in_checkpoint(model_folder, cp_name)
+
+    initial_vars = set([v.name for v in tf.get_default_graph().as_graph_def().node])
+
+    # Saver
+    if scope is None:
+        saver = tf.train.Saver()
+    else:
+        var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope)
+        saver = tf.train.Saver(var_list=var_list, max_to_keep=100000)
+    # Load
+    saver.restore(tf_session, load_path)
+    all_vars = set([v.name for v in tf.get_default_graph().as_graph_def().node])
+
+    if verbose:
+        print("Restored {0}".format(",".join(initial_vars.difference(all_vars))))
+        print("Existing {0}".format(",".join(all_vars.difference(initial_vars))))
+        print("All {0}".format(",".join(all_vars)))
+
+    print("Done.")
+
+
+def print_weights_in_checkpoint(model_folder, cp_name):
+    """Prints all weights in Tensorflow checkpoint.
+
+    Args:
+      model_folder: Folder containing checkpoint
+      cp_name: Name of checkpoint
+
+    Returns:
+
+    """
+    load_path = os.path.join(model_folder, "{0}.ckpt".format(cp_name))
+
+    print_tensors_in_checkpoint_file(file_name=load_path, tensor_name="", all_tensors=True, all_tensor_names=True)
--- a/examples/benchmarks/TFT/requirements.txt
+++ b/examples/benchmarks/TFT/requirements.txt
@@ -0,0 +1,3 @@
+tensorflow-gpu==1.15.0
+numpy == 1.19.4
+pandas==1.1.0
--- a/examples/benchmarks/TFT/tft.py
+++ b/examples/benchmarks/TFT/tft.py
@@ -0,0 +1,291 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import numpy as np
+import pandas as pd
+import tensorflow.compat.v1 as tf
+import data_formatters.base
+import expt_settings.configs
+import libs.hyperparam_opt
+import libs.tft_model
+import libs.utils as utils
+import os
+import datetime as dte
+
+
+from qlib.model.base import ModelFT
+from qlib.data.dataset import DatasetH
+from qlib.data.dataset.handler import DataHandlerLP
+
+
+# To register new datasets, please add them here.
+ALLOW_DATASET = ["Alpha158", "Alpha360"]
+# To register new datasets, please add their configurations here.
+DATASET_SETTING = {
+    "Alpha158": {
+        "feature_col": [
+            "RESI5",
+            "WVMA5",
+            "RSQR5",
+            "KLEN",
+            "RSQR10",
+            "CORR5",
+            "CORD5",
+            "CORR10",
+            "ROC60",
+            "RESI10",
+            "VSTD5",
+            "RSQR60",
+            "CORR60",
+            "WVMA60",
+            "STD5",
+            "RSQR20",
+            "CORD60",
+            "CORD10",
+            "CORR20",
+            "KLOW",
+        ],
+        "label_col": "LABEL0",
+    },
+    "Alpha360": {
+        "feature_col": [
+            "HIGH0",
+            "LOW0",
+            "OPEN0",
+            "CLOSE1",
+            "HIGH1",
+            "VOLUME1",
+            "LOW1",
+            "VOLUME3",
+            "OPEN1",
+            "VOLUME4",
+            "CLOSE2",
+            "CLOSE4",
+            "VOLUME5",
+            "LOW2",
+            "CLOSE3",
+            "VOLUME2",
+            "HIGH2",
+            "LOW4",
+            "VOLUME8",
+            "VOLUME11",
+        ],
+        "label_col": "LABEL0",
+    },
+}
+
+
+def get_shifted_label(data_df, shifts=5, col_shift="LABEL0"):
+    return data_df[[col_shift]].groupby("instrument").apply(lambda df: df.shift(shifts))
+
+
+def fill_test_na(test_df):
+    test_df_res = test_df.copy()
+    feature_cols = ~test_df_res.columns.str.contains("label", case=False)
+    test_feature_fna = test_df_res.loc[:, feature_cols].groupby("datetime").apply(lambda df: df.fillna(df.mean()))
+    test_df_res.loc[:, feature_cols] = test_feature_fna
+    return test_df_res
+
+
+def process_qlib_data(df, dataset, fillna=False):
+    """Prepare data to fit the TFT model.
+
+    Args:
+      df: Original DataFrame.
+      fillna: Whether to fill the data with the mean values.
+
+    Returns:
+      Transformed DataFrame.
+
+    """
+    # Several features selected manually
+    feature_col = DATASET_SETTING[dataset]["feature_col"]
+    label_col = [DATASET_SETTING[dataset]["label_col"]]
+    temp_df = df.loc[:, feature_col + label_col]
+    if fillna:
+        temp_df = fill_test_na(temp_df)
+    temp_df = temp_df.swaplevel()
+    temp_df = temp_df.sort_index()
+    temp_df = temp_df.reset_index(level=0)
+    dates = pd.to_datetime(temp_df.index)
+    temp_df["date"] = dates
+    temp_df["day_of_week"] = dates.dayofweek
+    temp_df["month"] = dates.month
+    temp_df["year"] = dates.year
+    temp_df["const"] = 1.0
+    return temp_df
+
+
+def process_predicted(df, col_name):
+    """Transform the TFT predicted data into Qlib format.
+
+    Args:
+      df: Original DataFrame.
+      fillna: New column name.
+
+    Returns:
+      Transformed DataFrame.
+
+    """
+    df_res = df.copy()
+    df_res = df_res.rename(columns={"forecast_time": "datetime", "identifier": "instrument", "t+4": col_name})
+    df_res = df_res.set_index(["datetime", "instrument"]).sort_index()
+    df_res = df_res[[col_name]]
+    return df_res
+
+
+def format_score(forecast_df, col_name="pred", label_shift=5):
+    pred = process_predicted(forecast_df, col_name=col_name)
+    pred = get_shifted_label(pred, shifts=-label_shift, col_shift=col_name)
+    pred = pred.dropna()[col_name]
+    return pred
+
+
+def transform_df(df, col_name="LABEL0"):
+    df_res = df["feature"]
+    df_res[col_name] = df["label"]
+    return df_res
+
+
+class TFTModel(ModelFT):
+    """TFT Model"""
+
+    def __init__(self, **kwargs):
+        self.model = None
+        self.params = {"DATASET": "Alpha158", "label_shift": 5}
+        self.params.update(kwargs)
+
+    def _prepare_data(self, dataset: DatasetH):
+        df_train, df_valid = dataset.prepare(
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
+        )
+        return transform_df(df_train), transform_df(df_valid)
+
+    def fit(self, dataset: DatasetH, MODEL_FOLDER="qlib_tft_model", USE_GPU_ID=0, **kwargs):
+        DATASET = self.params["DATASET"]
+        LABEL_SHIFT = self.params["label_shift"]
+        LABEL_COL = DATASET_SETTING[DATASET]["label_col"]
+
+        if DATASET not in ALLOW_DATASET:
+            raise AssertionError("The dataset is not supported, please make a new formatter to fit this dataset")
+
+        dtrain, dvalid = self._prepare_data(dataset)
+        dtrain.loc[:, LABEL_COL] = get_shifted_label(dtrain, shifts=LABEL_SHIFT, col_shift=LABEL_COL)
+        dvalid.loc[:, LABEL_COL] = get_shifted_label(dvalid, shifts=LABEL_SHIFT, col_shift=LABEL_COL)
+
+        train = process_qlib_data(dtrain, DATASET, fillna=True).dropna()
+        valid = process_qlib_data(dvalid, DATASET, fillna=True).dropna()
+
+        ExperimentConfig = expt_settings.configs.ExperimentConfig
+        config = ExperimentConfig(DATASET)
+        self.data_formatter = config.make_data_formatter()
+        self.model_folder = MODEL_FOLDER
+        self.gpu_id = USE_GPU_ID
+        self.label_shift = LABEL_SHIFT
+        self.expt_name = DATASET
+        self.label_col = LABEL_COL
+
+        use_gpu = (True, self.gpu_id)
+        # ===========================Training Process===========================
+        ModelClass = libs.tft_model.TemporalFusionTransformer
+        if not isinstance(self.data_formatter, data_formatters.base.GenericDataFormatter):
+            raise ValueError(
+                "Data formatters should inherit from"
+                + "AbstractDataFormatter! Type={}".format(type(self.data_formatter))
+            )
+
+        default_keras_session = tf.keras.backend.get_session()
+
+        if use_gpu[0]:
+            self.tf_config = utils.get_default_tensorflow_config(tf_device="gpu", gpu_id=use_gpu[1])
+        else:
+            self.tf_config = utils.get_default_tensorflow_config(tf_device="cpu")
+
+        self.data_formatter.set_scalers(train)
+
+        # Sets up default params
+        fixed_params = self.data_formatter.get_experiment_params()
+        params = self.data_formatter.get_default_model_params()
+
+        # Wendi: 合并调优的参数和非调优的参数
+        params = {**params, **fixed_params}
+
+        if not os.path.exists(self.model_folder):
+            os.makedirs(self.model_folder)
+        params["model_folder"] = self.model_folder
+
+        print("*** Begin training ***")
+        best_loss = np.Inf
+
+        tf.reset_default_graph()
+
+        self.tf_graph = tf.Graph()
+        with self.tf_graph.as_default():
+            self.sess = tf.Session(config=self.tf_config)
+            tf.keras.backend.set_session(self.sess)
+            self.model = ModelClass(params, use_cudnn=use_gpu[0])
+            self.sess.run(tf.global_variables_initializer())
+            self.model.fit(train_df=train, valid_df=valid)
+            print("*** Finished training ***")
+            saved_model_dir = self.model_folder + "/" + "saved_model"
+            if not os.path.exists(saved_model_dir):
+                os.makedirs(saved_model_dir)
+            self.model.save(saved_model_dir)
+
+            def extract_numerical_data(data):
+                """Strips out forecast time and identifier columns."""
+                return data[[col for col in data.columns if col not in {"forecast_time", "identifier"}]]
+
+            # p50_loss = utils.numpy_normalised_quantile_loss(
+            #    extract_numerical_data(targets), extract_numerical_data(p50_forecast),
+            #    0.5)
+            # p90_loss = utils.numpy_normalised_quantile_loss(
+            #    extract_numerical_data(targets), extract_numerical_data(p90_forecast),
+            #    0.9)
+            tf.keras.backend.set_session(default_keras_session)
+        print("Training completed.".format(dte.datetime.now()))
+        # ===========================Training Process===========================
+
+    def predict(self, dataset):
+        if self.model is None:
+            raise ValueError("model is not fitted yet!")
+        d_test = dataset.prepare("test", col_set=["feature", "label"])
+        d_test = transform_df(d_test)
+        d_test.loc[:, self.label_col] = get_shifted_label(d_test, shifts=self.label_shift, col_shift=self.label_col)
+        test = process_qlib_data(d_test, self.expt_name, fillna=True).dropna()
+
+        use_gpu = (True, self.gpu_id)
+        # ===========================Predicting Process===========================
+        default_keras_session = tf.keras.backend.get_session()
+
+        # Sets up default params
+        fixed_params = self.data_formatter.get_experiment_params()
+        params = self.data_formatter.get_default_model_params()
+        params = {**params, **fixed_params}
+
+        print("*** Begin predicting ***")
+        tf.reset_default_graph()
+
+        with self.tf_graph.as_default():
+            tf.keras.backend.set_session(self.sess)
+            output_map = self.model.predict(test, return_targets=True)
+            targets = self.data_formatter.format_predictions(output_map["targets"])
+            p50_forecast = self.data_formatter.format_predictions(output_map["p50"])
+            p90_forecast = self.data_formatter.format_predictions(output_map["p90"])
+            tf.keras.backend.set_session(default_keras_session)
+
+        predict50 = format_score(p50_forecast, "pred", 1)
+        predict90 = format_score(p90_forecast, "pred", 1)
+        predict = (predict50 + predict90) / 2  # self.label_shift
+        # ===========================Predicting Process===========================
+        return predict
+
+    def finetune(self, dataset: DatasetH):
+        """
+        finetune model
+        Parameters
+        ----------
+        dataset : DatasetH
+            dataset for finetuning
+        """
+        pass
--- a/examples/benchmarks/TFT/workflow_config_tft_Alpha158.yaml
+++ b/examples/benchmarks/TFT/workflow_config_tft_Alpha158.yaml
@@ -0,0 +1,58 @@
+sys:
+    rel_path: .
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: TFTModel
+        module_path: tft
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
--- a/Show More
+++ b/Show More