Supporting Arctic Backend Provider & Orderbook, Tick Data Example (#744)

* change weight_decay & batchsize * del weight_decay * big weight_decay * mid weight_decay * small layer * 2 layer * full layer * no weight decay * divide into two data source * change parse field * delete some debug * add Toperator * new format of arctic * fix cache bug to arctic read * fix connection problem * add some operator * final version for arcitc * clear HZ cache * remove not used function * add topswrappers * successfully import data and run first test * A simpler version to support arctic * Successfully run all high-freq expressions * Black format and fix add docs * Add docs for download and test data * update scripts and docs * Add docs * fix bug * Refine docs * fix test bug * fix CI error * clean code Co-authored-by: bxdd <bxddream@gmail.com> Co-authored-by: wangwenxi.handsome <wangwenxi.handsome@gmail.com> Co-authored-by: Young <afe.young@gmail.com>
2026-07-03 02:50:58 +08:00 · 2022-01-18 09:13:11 +08:00
parent 7f274b1e4e
commit 2bb8a4ce0e
16 changed files with 923 additions and 90 deletions
--- a/examples/orderbook_data/README.md
+++ b/examples/orderbook_data/README.md
@@ -0,0 +1,51 @@
+# Introduction
+
+This example tries to demonstrate how Qlib supports data without fixed shared frequency.
+
+For example,
+- Daily prices volume data are fixed-frequency data. The data comes in a fixed frequency (i.e. daily)
+- Orders are not fixed data and they may come at any time point
+
+To support such non-fixed-frequency, Qlib implements an Arctic-based backend.
+Here is an example to import and query data based on this backend.
+
+# Installation
+
+Please refer to [the installation docs](https://docs.mongodb.com/manual/installation/) of mongodb.
+Current version of script with default value tries to connect localhost **via default port without authentication**.
+
+Run following command to install necessary libraries
+```
+pip install pytest
+```
+
+# Importing example data
+
+
+1. (Optional) Please follow the first part of [this section](https://github.com/microsoft/qlib#data-preparation) to **get 1min data** of Qlib.
+2. Please follow following steps to download example data
+```bash
+cd examples/orderbook_data/
+wget http://fintech.msra.cn/stock_data/downloads/highfreq_orderboook_example_data.tar.bz2
+tar xf highfreq_orderboook_example_data.tar.bz2
+```
+
+3. Please import the example data to your mongo db
+```bash
+cd examples/orderbook_data/
+python create_dataset.py initialize_library  # Initialization Libraries
+python create_dataset.py import_data  # Initialization Libraries
+```
+
+# Query Examples
+
+After importing these data, you run `example.py` to create some high-frequency features.
+```bash
+cd examples/orderbook_data/
+pytest -s --disable-warnings example.py   # If you want run all examples
+pytest -s --disable-warnings example.py::TestClass::test_exp_10  # If you want to run specific example
+```
+
+
+# Known limitations
+Expression computing between different frequencies are not supported yet
--- a/examples/orderbook_data/create_dataset.py
+++ b/examples/orderbook_data/create_dataset.py
@@ -0,0 +1,315 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""
+    NOTE:
+    - This scripts is a demo to import example data import Qlib
+    - !!!!!!!!!!!!!!!TODO!!!!!!!!!!!!!!!!!!!:
+        - Its structure is not well designed and very ugly, your contribution is welcome to make importing dataset easier
+"""
+from datetime import date, datetime as dt
+import os
+from pathlib import Path
+import random
+import shutil
+import time
+import traceback
+
+from arctic import Arctic, chunkstore
+import arctic
+from arctic import Arctic, CHUNK_STORE
+from arctic.chunkstore.chunkstore import CHUNK_SIZE
+import fire
+from joblib import Parallel, delayed, parallel
+import numpy as np
+import pandas as pd
+from pandas import DataFrame
+from pandas.core.indexes.datetimes import date_range
+from pymongo.mongo_client import MongoClient
+
+DIRNAME = Path(__file__).absolute().resolve().parent
+
+# CONFIG
+N_JOBS = -1  # leaving one kernel free
+LOG_FILE_PATH = DIRNAME / "log_file"
+DATA_PATH = DIRNAME / "raw_data"
+DATABASE_PATH = DIRNAME / "orig_data"
+DATA_INFO_PATH = DIRNAME / "data_info"
+DATA_FINISH_INFO_PATH = DIRNAME / "./data_finish_info"
+DOC_TYPE = ["Tick", "Order", "OrderQueue", "Transaction", "Day", "Minute"]
+MAX_SIZE = 3000 * 1024 * 1024 * 1024
+ALL_STOCK_PATH = DATABASE_PATH / "all.txt"
+ARCTIC_SRV = "127.0.0.1"
+
+
+def get_library_name(doc_type):
+    if str.lower(doc_type) == str.lower("Tick"):
+        return "ticks"
+    else:
+        return str.lower(doc_type)
+
+
+def is_stock(exchange_place, code):
+    if exchange_place == "SH" and code[0] != "6":
+        return False
+    if exchange_place == "SZ" and code[0] != "0" and code[:2] != "30":
+        return False
+    return True
+
+
+def add_one_stock_daily_data(filepath, type, exchange_place, arc, date):
+    """
+    exchange_place: "SZ" OR "SH"
+    type: "tick", "orderbook", ...
+    filepath: the path of csv
+    arc: arclink created by a process
+    """
+    code = os.path.split(filepath)[-1].split(".csv")[0]
+    if exchange_place == "SH" and code[0] != "6":
+        return
+    if exchange_place == "SZ" and code[0] != "0" and code[:2] != "30":
+        return
+
+    df = pd.read_csv(filepath, encoding="gbk", dtype={"code": str})
+    code = os.path.split(filepath)[-1].split(".csv")[0]
+
+    def format_time(day, hms):
+        day = str(day)
+        hms = str(hms)
+        if hms[0] == "1":  # >=10,
+            return (
+                "-".join([day[0:4], day[4:6], day[6:8]]) + " " + ":".join([hms[:2], hms[2:4], hms[4:6] + "." + hms[6:]])
+            )
+        else:
+            return (
+                "-".join([day[0:4], day[4:6], day[6:8]]) + " " + ":".join([hms[:1], hms[1:3], hms[3:5] + "." + hms[5:]])
+            )
+
+    ## Discard the entire row if wrong data timestamp encoutered.
+    timestamp = list(zip(list(df["date"]), list(df["time"])))
+    error_index_list = []
+    for index, t in enumerate(timestamp):
+        try:
+            pd.Timestamp(format_time(t[0], t[1]))
+        except Exception:
+            error_index_list.append(index)  ## The row number of the error line
+
+    # to-do: writting to logs
+
+    if len(error_index_list) > 0:
+        print("error: {}, {}".format(filepath, len(error_index_list)))
+
+    df = df.drop(error_index_list)
+    timestamp = list(zip(list(df["date"]), list(df["time"])))  ## The cleaned timestamp
+    # generate timestamp
+    pd_timestamp = pd.DatetimeIndex(
+        [pd.Timestamp(format_time(timestamp[i][0], timestamp[i][1])) for i in range(len(df["date"]))]
+    )
+    df = df.drop(columns=["date", "time", "name", "code", "wind_code"])
+    # df = pd.DataFrame(data=df.to_dict("list"), index=pd_timestamp)
+    df["date"] = pd.to_datetime(pd_timestamp)
+    df.set_index("date", inplace=True)
+
+    if str.lower(type) == "orderqueue":
+        ## extract ab1~ab50
+        df["ab"] = [
+            ",".join([str(int(row["ab" + str(i + 1)])) for i in range(0, row["ab_items"])])
+            for timestamp, row in df.iterrows()
+        ]
+        df = df.drop(columns=["ab" + str(i) for i in range(1, 51)])
+
+    type = get_library_name(type)
+    # arc.initialize_library(type, lib_type=CHUNK_STORE)
+    lib = arc[type]
+
+    symbol = "".join([exchange_place, code])
+    if symbol in lib.list_symbols():
+        print("update {0}, date={1}".format(symbol, date))
+        if df.empty == True:
+            return error_index_list
+        lib.update(symbol, df, chunk_size="D")
+    else:
+        print("write {0}, date={1}".format(symbol, date))
+        lib.write(symbol, df, chunk_size="D")
+    return error_index_list
+
+
+def add_one_stock_daily_data_wrapper(filepath, type, exchange_place, index, date):
+    pid = os.getpid()
+    code = os.path.split(filepath)[-1].split(".csv")[0]
+    arc = Arctic(ARCTIC_SRV)
+    try:
+        if index % 100 == 0:
+            print("index = {}, filepath = {}".format(index, filepath))
+        error_index_list = add_one_stock_daily_data(filepath, type, exchange_place, arc, date)
+        if error_index_list is not None and len(error_index_list) > 0:
+            f = open(os.path.join(LOG_FILE_PATH, "temp_timestamp_error_{0}_{1}_{2}.txt".format(pid, date, type)), "a+")
+            f.write("{}, {}, {}\n".format(filepath, error_index_list, exchange_place + "_" + code))
+            f.close()
+
+    except Exception as e:
+        info = traceback.format_exc()
+        print("error:" + str(e))
+        f = open(os.path.join(LOG_FILE_PATH, "temp_fail_{0}_{1}_{2}.txt".format(pid, date, type)), "a+")
+        f.write("fail:" + str(filepath) + "\n" + str(e) + "\n" + str(info) + "\n")
+        f.close()
+
+    finally:
+        arc.reset()
+
+
+def add_data(tick_date, doc_type, stock_name_dict):
+    pid = os.getpid()
+
+    if doc_type not in DOC_TYPE:
+        print("doc_type not in {}".format(DOC_TYPE))
+        return
+    try:
+        begin_time = time.time()
+        os.system(f"cp {DATABASE_PATH}/{tick_date + '_{}.tar.gz'.format(doc_type)} {DATA_PATH}/")
+
+        os.system(
+            f"tar -xvzf {DATA_PATH}/{tick_date + '_{}.tar.gz'.format(doc_type)} -C {DATA_PATH}/ {tick_date + '_' + doc_type}/SH"
+        )
+        os.system(
+            f"tar -xvzf {DATA_PATH}/{tick_date + '_{}.tar.gz'.format(doc_type)} -C {DATA_PATH}/ {tick_date + '_' + doc_type}/SZ"
+        )
+        os.system(f"chmod 777 {DATA_PATH}")
+        os.system(f"chmod 777 {DATA_PATH}/{tick_date + '_' + doc_type}")
+        os.system(f"chmod 777 {DATA_PATH}/{tick_date + '_' + doc_type}/SH")
+        os.system(f"chmod 777 {DATA_PATH}/{tick_date + '_' + doc_type}/SZ")
+        os.system(f"chmod 777 {DATA_PATH}/{tick_date + '_' + doc_type}/SH/{tick_date}")
+        os.system(f"chmod 777 {DATA_PATH}/{tick_date + '_' + doc_type}/SZ/{tick_date}")
+
+        print("tick_date={}".format(tick_date))
+
+        temp_data_path_sh = os.path.join(DATA_PATH, tick_date + "_" + doc_type, "SH", tick_date)
+        temp_data_path_sz = os.path.join(DATA_PATH, tick_date + "_" + doc_type, "SZ", tick_date)
+        is_files_exist = {"sh": os.path.exists(temp_data_path_sh), "sz": os.path.exists(temp_data_path_sz)}
+
+        sz_files = (
+            (
+                set([i.split(".csv")[0] for i in os.listdir(temp_data_path_sz) if i[:2] == "30" or i[0] == "0"])
+                & set(stock_name_dict["SZ"])
+            )
+            if is_files_exist["sz"]
+            else set()
+        )
+        sz_file_nums = len(sz_files) if is_files_exist["sz"] else 0
+        sh_files = (
+            (
+                set([i.split(".csv")[0] for i in os.listdir(temp_data_path_sh) if i[0] == "6"])
+                & set(stock_name_dict["SH"])
+            )
+            if is_files_exist["sh"]
+            else set()
+        )
+        sh_file_nums = len(sh_files) if is_files_exist["sh"] else 0
+        print("sz_file_nums:{}, sh_file_nums:{}".format(sz_file_nums, sh_file_nums))
+
+        f = (DATA_INFO_PATH / "data_info_log_{}_{}".format(doc_type, tick_date)).open("w+")
+        f.write("sz:{}, sh:{}, date:{}:".format(sz_file_nums, sh_file_nums, tick_date) + "\n")
+        f.close()
+
+        if sh_file_nums > 0:
+            # write is not thread-safe, update may be thread-safe
+            Parallel(n_jobs=N_JOBS)(
+                delayed(add_one_stock_daily_data_wrapper)(
+                    os.path.join(temp_data_path_sh, name + ".csv"), doc_type, "SH", index, tick_date
+                )
+                for index, name in enumerate(list(sh_files))
+            )
+        if sz_file_nums > 0:
+            # write is not thread-safe, update may be thread-safe
+            Parallel(n_jobs=N_JOBS)(
+                delayed(add_one_stock_daily_data_wrapper)(
+                    os.path.join(temp_data_path_sz, name + ".csv"), doc_type, "SZ", index, tick_date
+                )
+                for index, name in enumerate(list(sz_files))
+            )
+
+        os.system(f"rm -f {DATA_PATH}/{tick_date + '_{}.tar.gz'.format(doc_type)}")
+        os.system(f"rm -rf {DATA_PATH}/{tick_date + '_' + doc_type}")
+        total_time = time.time() - begin_time
+        f = (DATA_FINISH_INFO_PATH / "data_info_finish_log_{}_{}".format(doc_type, tick_date)).open("w+")
+        f.write("finish: date:{}, consume_time:{}, end_time: {}".format(tick_date, total_time, time.time()) + "\n")
+        f.close()
+
+    except Exception as e:
+        info = traceback.format_exc()
+        print("date error:" + str(e))
+        f = open(os.path.join(LOG_FILE_PATH, "temp_fail_{0}_{1}_{2}.txt".format(pid, tick_date, doc_type)), "a+")
+        f.write("fail:" + str(tick_date) + "\n" + str(e) + "\n" + str(info) + "\n")
+        f.close()
+
+
+class DSCreator:
+    """Dataset creator"""
+
+    def clear(self):
+        client = MongoClient(ARCTIC_SRV)
+        client.drop_database("arctic")
+
+    def initialize_library(self):
+        arc = Arctic(ARCTIC_SRV)
+        for doc_type in DOC_TYPE:
+            arc.initialize_library(get_library_name(doc_type), lib_type=CHUNK_STORE)
+
+    def _get_empty_folder(self, fp: Path):
+        fp = Path(fp)
+        if fp.exists():
+            shutil.rmtree(fp)
+        fp.mkdir(parents=True, exist_ok=True)
+
+    def import_data(self, doc_type_l=["Tick", "Transaction", "Order"]):
+        # clear all the old files
+        for fp in LOG_FILE_PATH, DATA_INFO_PATH, DATA_FINISH_INFO_PATH, DATA_PATH:
+            self._get_empty_folder(fp)
+
+        arc = Arctic(ARCTIC_SRV)
+        for doc_type in DOC_TYPE:
+            # arc.initialize_library(get_library_name(doc_type), lib_type=CHUNK_STORE)
+            arc.set_quota(get_library_name(doc_type), MAX_SIZE)
+        arc.reset()
+
+        # doc_type = 'Day'
+        for doc_type in doc_type_l:
+            date_list = list(set([int(path.split("_")[0]) for path in os.listdir(DATABASE_PATH) if doc_type in path]))
+            date_list.sort()
+            date_list = [str(date) for date in date_list]
+
+            f = open(ALL_STOCK_PATH, "r")
+            stock_name_list = [lines.split("\t")[0] for lines in f.readlines()]
+            f.close()
+            stock_name_dict = {
+                "SH": [stock_name[2:] for stock_name in stock_name_list if "SH" in stock_name],
+                "SZ": [stock_name[2:] for stock_name in stock_name_list if "SZ" in stock_name],
+            }
+
+            lib_name = get_library_name(doc_type)
+            a = Arctic(ARCTIC_SRV)
+            # a.initialize_library(lib_name, lib_type=CHUNK_STORE)
+
+            stock_name_exist = a[lib_name].list_symbols()
+            lib = a[lib_name]
+            initialize_count = 0
+            for stock_name in stock_name_list:
+                if stock_name not in stock_name_exist:
+                    initialize_count += 1
+                    # A placeholder for stocks
+                    pdf = pd.DataFrame(index=[pd.Timestamp("1900-01-01")])
+                    pdf.index.name = "date"  # an col named date is necessary
+                    lib.write(stock_name, pdf)
+            print("initialize count: {}".format(initialize_count))
+            print("tasks: {}".format(date_list))
+            a.reset()
+
+            # date_list = [files.split("_")[0] for files in os.listdir("./raw_data_price") if "tar" in files]
+            # print(len(date_list))
+            date_list = ["20201231"]  # for test
+            Parallel(n_jobs=min(2, len(date_list)))(
+                delayed(add_data)(date, doc_type, stock_name_dict) for date in date_list
+            )
+
+
+if __name__ == "__main__":
+    fire.Fire(DSCreator)
--- a/examples/orderbook_data/example.py
+++ b/examples/orderbook_data/example.py
@@ -0,0 +1,308 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+from arctic.arctic import Arctic
+import qlib
+from qlib.data import D
+import unittest
+
+
+class TestClass(unittest.TestCase):
+    """
+    Useful commands
+    - run all tests: pytest examples/orderbook_data/example.py
+    - run a single test:  pytest -s --pdb --disable-warnings examples/orderbook_data/example.py::TestClass::test_basic01
+    """
+
+    def setUp(self):
+        """
+        Configure for arctic
+        """
+        provider_uri = "~/.qlib/qlib_data/yahoo_cn_1min"
+        qlib.init(
+            provider_uri=provider_uri,
+            mem_cache_size_limit=1024 ** 3 * 2,
+            mem_cache_type="sizeof",
+            kernels=1,
+            expression_provider={"class": "LocalExpressionProvider", "kwargs": {"time2idx": False}},
+            feature_provider={"class": "ArcticFeatureProvider", "kwargs": {"uri": "127.0.0.1"}},
+            dataset_provider={
+                "class": "LocalDatasetProvider",
+                "kwargs": {
+                    "align_time": False,  # Order book is not fixed, so it can't be align to a shared fixed frequency calendar
+                },
+            },
+        )
+        # self.stocks_list = ["SH600519"]
+        self.stocks_list = ["SZ000725"]
+
+    def test_basic(self):
+        # NOTE: this data contains a lot of zeros in $askX and $bidX
+        df = D.features(
+            self.stocks_list,
+            fields=["$ask1", "$ask2", "$bid1", "$bid2"],
+            freq="ticks",
+            start_time="20201230",
+            end_time="20210101",
+        )
+        print(df)
+
+    def test_basic_without_time(self):
+        df = D.features(self.stocks_list, fields=["$ask1"], freq="ticks")
+        print(df)
+
+    def test_basic01(self):
+        df = D.features(
+            self.stocks_list,
+            fields=["TResample($ask1, '1min', 'last')"],
+            freq="ticks",
+            start_time="20201230",
+            end_time="20210101",
+        )
+        print(df)
+
+    def test_basic02(self):
+        df = D.features(
+            self.stocks_list,
+            fields=["$function_code"],
+            freq="transaction",
+            start_time="20201230",
+            end_time="20210101",
+        )
+        print(df)
+
+    def test_basic03(self):
+        df = D.features(
+            self.stocks_list,
+            fields=["$function_code"],
+            freq="order",
+            start_time="20201230",
+            end_time="20210101",
+        )
+        print(df)
+
+    # Here are some popular expressions for high-frequency
+    # 1) some shared expression
+    expr_sum_buy_ask_1 = "(TResample($ask1, '1min', 'last') + TResample($bid1, '1min', 'last'))"
+    total_volume = (
+        "TResample("
+        + "+".join([f"${name}{i}" for i in range(1, 11) for name in ["asize", "bsize"]])
+        + ", '1min', 'sum')"
+    )
+
+    @staticmethod
+    def total_func(name, method):
+        return "TResample(" + "+".join([f"${name}{i}" for i in range(1, 11)]) + ",'1min', '{}')".format(method)
+
+    def test_exp_01(self):
+        exprs = []
+        names = []
+        for name in ["asize", "bsize"]:
+            for i in range(1, 11):
+                exprs.append(f"TResample(${name}{i}, '1min', 'mean') / ({self.total_volume})")
+                names.append(f"v_{name}_{i}")
+        df = D.features(self.stocks_list, fields=exprs, freq="ticks")
+        df.columns = names
+        print(df)
+
+    # 2) some often used papers;
+    def test_exp_02(self):
+        spread_func = (
+            lambda index: f"2 * TResample($ask{index} - $bid{index}, '1min', 'last') / {self.expr_sum_buy_ask_1}"
+        )
+        mid_func = (
+            lambda index: f"2 * TResample(($ask{index} + $bid{index})/2, '1min', 'last') / {self.expr_sum_buy_ask_1}"
+        )
+
+        exprs = []
+        names = []
+        for i in range(1, 11):
+            exprs.extend([spread_func(i), mid_func(i)])
+            names.extend([f"p_spread_{i}", f"p_mid_{i}"])
+        df = D.features(self.stocks_list, fields=exprs, freq="ticks")
+        df.columns = names
+        print(df)
+
+    def test_exp_03(self):
+        expr3_func1 = (
+            lambda name, index_left, index_right: f"2 * TResample(Abs(${name}{index_left} - ${name}{index_right}), '1min', 'last') / {self.expr_sum_buy_ask_1}"
+        )
+        for name in ["ask", "bid"]:
+            for i in range(1, 10):
+                exprs = [expr3_func1(name, i + 1, i)]
+                names = [f"p_diff_{name}_{i}_{i+1}"]
+        exprs.extend([expr3_func1("ask", 10, 1), expr3_func1("bid", 1, 10)])
+        names.extend(["p_diff_ask_10_1", "p_diff_bid_1_10"])
+        df = D.features(self.stocks_list, fields=exprs, freq="ticks")
+        df.columns = names
+        print(df)
+
+    def test_exp_04(self):
+        exprs = []
+        names = []
+        for name in ["asize", "bsize"]:
+            exprs.append(f"(({ self.total_func(name, 'mean')}) / 10) / {self.total_volume}")
+            names.append(f"v_avg_{name}")
+
+        df = D.features(self.stocks_list, fields=exprs, freq="ticks")
+        df.columns = names
+        print(df)
+
+    def test_exp_05(self):
+        exprs = [
+            f"2 * Sub({ self.total_func('ask', 'last')}, {self.total_func('bid', 'last')})/{self.expr_sum_buy_ask_1}",
+            f"Sub({ self.total_func('asize', 'mean')}, {self.total_func('bsize', 'mean')})/{self.total_volume}",
+        ]
+        names = ["p_accspread", "v_accspread"]
+
+        df = D.features(self.stocks_list, fields=exprs, freq="ticks")
+        df.columns = names
+        print(df)
+
+    #  (p|v)_diff_(ask|bid|asize|bsize)_(time_interval)
+    def test_exp_06(self):
+        t = 3
+        expr6_price_func = (
+            lambda name, index, method: f'2 * (TResample(${name}{index}, "{t}s", "{method}") - Ref(TResample(${name}{index}, "{t}s", "{method}"), 1)) / {t}'
+        )
+        exprs = []
+        names = []
+        for i in range(1, 11):
+            for name in ["bid", "ask"]:
+                exprs.append(
+                    f"TResample({expr6_price_func(name, i, 'last')}, '1min', 'mean') / {self.expr_sum_buy_ask_1}"
+                )
+                names.append(f"p_diff_{name}{i}_{t}s")
+
+        for i in range(1, 11):
+            for name in ["asize", "bsize"]:
+                exprs.append(f"TResample({expr6_price_func(name, i, 'mean')}, '1min', 'mean') / {self.total_volume}")
+                names.append(f"v_diff_{name}{i}_{t}s")
+
+        df = D.features(self.stocks_list, fields=exprs, freq="ticks")
+        df.columns = names
+        print(df)
+
+    # TODOs:
+    # Following expressions may be implemented in the future
+    # expr7_2 = lambda funccode, bsflag, time_interval: \
+    #     "TResample(TRolling(TEq(@transaction.function_code,  {}) & TEq(@transaction.bs_flag ,{}), '{}s', 'sum') / \
+    #     TRolling(@transaction.function_code, '{}s', 'count') , '1min', 'mean')".format(ord(funccode), bsflag,time_interval,time_interval)
+    # create_dataset(7, "SH600000", [expr7_2("C")] + [expr7(funccode, ordercode) for funccode in ['B','S'] for ordercode in ['0','1']])
+    # create_dataset(7,  ["SH600000"], [expr7_2("C", 48)] )
+
+    @staticmethod
+    def expr7_init(funccode, ordercode, time_interval):
+        # NOTE: based on on order frequency (i.e. freq="order")
+        return f"Rolling(Eq($function_code,  {ord(funccode)}) & Eq($order_kind ,{ord(ordercode)}), '{time_interval}s', 'sum') / Rolling($function_code, '{time_interval}s', 'count')"
+
+    # (la|lb|ma|mb|ca|cb)_intensity_(time_interval)
+    def test_exp_07_1(self):
+        # NOTE: based on transaction frequency (i.e. freq="transaction")
+        expr7_3 = (
+            lambda funccode, code, time_interval: f"TResample(Rolling(Eq($function_code,  {ord(funccode)}) & {code}($ask_order, $bid_order) , '{time_interval}s', 'sum')   / Rolling($function_code, '{time_interval}s', 'count') , '1min', 'mean')"
+        )
+
+        exprs = [expr7_3("C", "Gt", "3"), expr7_3("C", "Lt", "3")]
+        names = ["ca_intensity_3s", "cb_intensity_3s"]
+
+        df = D.features(self.stocks_list, fields=exprs, freq="transaction")
+        df.columns = names
+        print(df)
+
+    trans_dict = {"B": "a", "S": "b", "0": "l", "1": "m"}
+
+    def test_exp_07_2(self):
+        # NOTE: based on on order frequency
+        expr7 = (
+            lambda funccode, ordercode, time_interval: f"TResample({self.expr7_init(funccode, ordercode, time_interval)}, '1min', 'mean')"
+        )
+
+        exprs = []
+        names = []
+        for funccode in ["B", "S"]:
+            for ordercode in ["0", "1"]:
+                exprs.append(expr7(funccode, ordercode, "3"))
+                names.append(self.trans_dict[ordercode] + self.trans_dict[funccode] + "_intensity_3s")
+        df = D.features(self.stocks_list, fields=exprs, freq="transaction")
+        df.columns = names
+        print(df)
+
+    @staticmethod
+    def expr7_3_init(funccode, code, time_interval):
+        # NOTE: It depends on transaction frequency
+        return f"Rolling(Eq($function_code,  {ord(funccode)}) & {code}($ask_order, $bid_order) , '{time_interval}s', 'sum') / Rolling($function_code, '{time_interval}s', 'count')"
+
+    # (la|lb|ma|mb|ca|cb)_relative_intensity_(time_interval_small)_(time_interval_big)
+    def test_exp_08_1(self):
+        expr8_1 = (
+            lambda funccode, ordercode, time_interval_short, time_interval_long: f"TResample(Gt({self.expr7_init(funccode, ordercode, time_interval_short)},{self.expr7_init(funccode, ordercode, time_interval_long)}), '1min', 'mean')"
+        )
+
+        exprs = []
+        names = []
+        for funccode in ["B", "S"]:
+            for ordercode in ["0", "1"]:
+                exprs.append(expr8_1(funccode, ordercode, "10", "900"))
+                names.append(self.trans_dict[ordercode] + self.trans_dict[funccode] + "_relative_intensity_10s_900s")
+
+        df = D.features(self.stocks_list, fields=exprs, freq="order")
+        df.columns = names
+        print(df)
+
+    def test_exp_08_2(self):
+        # NOTE: It depends on transaction frequency
+        expr8_2 = (
+            lambda funccode, ordercode, time_interval_short, time_interval_long: f"TResample(Gt({self.expr7_3_init(funccode, ordercode, time_interval_short)},{self.expr7_3_init(funccode, ordercode, time_interval_long)}), '1min', 'mean')"
+        )
+
+        exprs = [expr8_2("C", "Gt", "10", "900"), expr8_2("C", "Lt", "10", "900")]
+        names = ["ca_relative_intensity_10s_900s", "cb_relative_intensity_10s_900s"]
+
+        df = D.features(self.stocks_list, fields=exprs, freq="transaction")
+        df.columns = names
+        print(df)
+
+    ## v9(la|lb|ma|mb|ca|cb)_diff_intensity_(time_interval1)_(time_interval2)
+    # 1) calculating the original data
+    # 2) Resample data to 3s and calculate the changing rate
+    # 3) Resample data to 1min
+
+    def test_exp_09_trans(self):
+        exprs = [
+            f'TResample(Div(Sub(TResample({self.expr7_3_init("C", "Gt", "3")}, "3s", "last"), Ref(TResample({self.expr7_3_init("C", "Gt", "3")}, "3s","last"), 1)), 3), "1min", "mean")',
+            f'TResample(Div(Sub(TResample({self.expr7_3_init("C", "Lt", "3")}, "3s", "last"), Ref(TResample({self.expr7_3_init("C", "Lt", "3")}, "3s","last"), 1)), 3), "1min", "mean")',
+        ]
+        names = ["ca_diff_intensity_3s_3s", "cb_diff_intensity_3s_3s"]
+        df = D.features(self.stocks_list, fields=exprs, freq="transaction")
+        df.columns = names
+        print(df)
+
+    def test_exp_09_order(self):
+        exprs = []
+        names = []
+        for funccode in ["B", "S"]:
+            for ordercode in ["0", "1"]:
+                exprs.append(
+                    f'TResample(Div(Sub(TResample({self.expr7_init(funccode, ordercode, "3")}, "3s", "last"), Ref(TResample({self.expr7_init(funccode, ordercode, "3")},"3s", "last"), 1)), 3) ,"1min", "mean")'
+                )
+                names.append(self.trans_dict[ordercode] + self.trans_dict[funccode] + "_diff_intensity_3s_3s")
+        df = D.features(self.stocks_list, fields=exprs, freq="order")
+        df.columns = names
+        print(df)
+
+    def test_exp_10(self):
+        exprs = []
+        names = []
+        for i in [5, 10, 30, 60]:
+            exprs.append(
+                f'TResample(Ref(TResample($ask1 + $bid1, "1s", "ffill"), {-i}) / TResample($ask1 + $bid1, "1s", "ffill") - 1, "1min", "mean" )'
+            )
+            names.append(f"lag_{i}_change_rate" for i in [5, 10, 30, 60])
+        df = D.features(self.stocks_list, fields=exprs, freq="ticks")
+        df.columns = names
+        print(df)
+
+
+if __name__ == "__main__":
+    unittest.main()