1
0
mirror of https://github.com/microsoft/qlib.git synced 2026-06-06 05:51:17 +08:00
Files
qlib/scripts/data_collector/pit/test_pit.py
bxdd faa99f30fa Support Point-in-time Data Operation (#343)
* add period ops class

* black format

* add pit data read

* fix bug in period ops

* update ops runnable

* update PIT test example

* black format

* update PIT test

* update tets_PIT

* update code format

* add check_feature_exist

* black format

* optimize the PIT Algorithm

* fix bug

* update example

* update test_PIT name

* add pit collector

* black format

* fix bugs

* fix try

* fix bug & add dump_pit.py

* Successfully run and understand PIT

* Add some docs and remove a bug

* mv crypto collector

* black format

* Run succesfully after merging master

* Pass test and fix code

* remove useless PIT code

* fix PYlint

* Rename

Co-authored-by: Young <afe.young@gmail.com>
2022-03-10 14:27:52 +08:00

195 lines
9.6 KiB
Python

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
import qlib
from qlib.data import D
import unittest
class TestPIT(unittest.TestCase):
"""
NOTE!!!!!!
The assert of this test assumes that users follows the cmd below and only download 2 stock.
`python collector.py download_data --source_dir ./csv_pit --start 2000-01-01 --end 2020-01-01 --interval quarterly --symbol_flt_regx "^(600519|000725).*"`
"""
def setUp(self):
# qlib.init(kernels=1) # NOTE: set kernel to 1 to make it debug easier
qlib.init() # NOTE: set kernel to 1 to make it debug easier
def to_str(self, obj):
return "".join(str(obj).split())
def check_same(self, a, b):
self.assertEqual(self.to_str(a), self.to_str(b))
def test_query(self):
instruments = ["sh600519"]
fields = ["P($$roewa_q)", "P($$yoyni_q)"]
# Mao Tai published 2019Q2 report at 2019-07-13 & 2019-07-18
# - http://www.cninfo.com.cn/new/commonUrl/pageOfSearch?url=disclosure/list/search&lastPage=index
data = D.features(instruments, fields, start_time="2019-01-01", end_time="20190719", freq="day")
print(data)
res = """
P($$roewa_q) P($$yoyni_q)
count 133.000000 133.000000
mean 0.196412 0.277930
std 0.097591 0.030262
min 0.000000 0.243892
25% 0.094737 0.243892
50% 0.255220 0.304181
75% 0.255220 0.305041
max 0.344644 0.305041
"""
self.check_same(data.describe(), res)
res = """
P($$roewa_q) P($$yoyni_q)
instrument datetime
sh600519 2019-07-15 0.000000 0.305041
2019-07-16 0.000000 0.305041
2019-07-17 0.000000 0.305041
2019-07-18 0.175322 0.252650
2019-07-19 0.175322 0.252650
"""
self.check_same(data.tail(), res)
def test_no_exist_data(self):
fields = ["P($$roewa_q)", "P($$yoyni_q)", "$close"]
data = D.features(["sh600519", "sh601988"], fields, start_time="2019-01-01", end_time="20190719", freq="day")
data["$close"] = 1 # in case of different dataset gives different values
print(data)
expect = """
P($$roewa_q) P($$yoyni_q) $close
instrument datetime
sh600519 2019-01-02 0.25522 0.243892 1
2019-01-03 0.25522 0.243892 1
2019-01-04 0.25522 0.243892 1
2019-01-07 0.25522 0.243892 1
2019-01-08 0.25522 0.243892 1
... ... ... ...
sh601988 2019-07-15 NaN NaN 1
2019-07-16 NaN NaN 1
2019-07-17 NaN NaN 1
2019-07-18 NaN NaN 1
2019-07-19 NaN NaN 1
[266 rows x 3 columns]
"""
self.check_same(data, expect)
def test_expr(self):
fields = [
"P(Mean($$roewa_q, 1))",
"P($$roewa_q)",
"P(Mean($$roewa_q, 2))",
"P(Ref($$roewa_q, 1))",
"P((Ref($$roewa_q, 1) +$$roewa_q) / 2)",
]
instruments = ["sh600519"]
data = D.features(instruments, fields, start_time="2019-01-01", end_time="20190719", freq="day")
expect = """
P(Mean($$roewa_q, 1)) P($$roewa_q) P(Mean($$roewa_q, 2)) P(Ref($$roewa_q, 1)) P((Ref($$roewa_q, 1) +$$roewa_q) / 2)
instrument datetime
sh600519 2019-07-01 0.094737 0.094737 0.219691 0.344644 0.219691
2019-07-02 0.094737 0.094737 0.219691 0.344644 0.219691
2019-07-03 0.094737 0.094737 0.219691 0.344644 0.219691
2019-07-04 0.094737 0.094737 0.219691 0.344644 0.219691
2019-07-05 0.094737 0.094737 0.219691 0.344644 0.219691
2019-07-08 0.094737 0.094737 0.219691 0.344644 0.219691
2019-07-09 0.094737 0.094737 0.219691 0.344644 0.219691
2019-07-10 0.094737 0.094737 0.219691 0.344644 0.219691
2019-07-11 0.094737 0.094737 0.219691 0.344644 0.219691
2019-07-12 0.094737 0.094737 0.219691 0.344644 0.219691
2019-07-15 0.000000 0.000000 0.047369 0.094737 0.047369
2019-07-16 0.000000 0.000000 0.047369 0.094737 0.047369
2019-07-17 0.000000 0.000000 0.047369 0.094737 0.047369
2019-07-18 0.175322 0.175322 0.135029 0.094737 0.135029
2019-07-19 0.175322 0.175322 0.135029 0.094737 0.135029
"""
self.check_same(data.tail(15), expect)
def test_unlimit(self):
# fields = ["P(Mean($$roewa_q, 1))", "P($$roewa_q)", "P(Mean($$roewa_q, 2))", "P(Ref($$roewa_q, 1))", "P((Ref($$roewa_q, 1) +$$roewa_q) / 2)"]
fields = ["P($$roewa_q)"]
instruments = ["sh600519"]
_ = D.features(instruments, fields, freq="day") # this should not raise error
data = D.features(instruments, fields, end_time="20200101", freq="day") # this should not raise error
s = data.iloc[:, 0]
# You can check the expected value based on the content in `docs/advanced/PIT.rst`
expect = """
instrument datetime
sh600519 1999-11-10 NaN
2007-04-30 0.090219
2007-08-17 0.139330
2007-10-23 0.245863
2008-03-03 0.347900
2008-03-13 0.395989
2008-04-22 0.100724
2008-08-28 0.249968
2008-10-27 0.334120
2009-03-25 0.390117
2009-04-21 0.102675
2009-08-07 0.230712
2009-10-26 0.300730
2010-04-02 0.335461
2010-04-26 0.083825
2010-08-12 0.200545
2010-10-29 0.260986
2011-03-21 0.307393
2011-04-25 0.097411
2011-08-31 0.248251
2011-10-18 0.318919
2012-03-23 0.403900
2012-04-11 0.403925
2012-04-26 0.112148
2012-08-10 0.264847
2012-10-26 0.370487
2013-03-29 0.450047
2013-04-18 0.099958
2013-09-02 0.210442
2013-10-16 0.304543
2014-03-25 0.394328
2014-04-25 0.083217
2014-08-29 0.164503
2014-10-30 0.234085
2015-04-21 0.078494
2015-08-28 0.137504
2015-10-26 0.201709
2016-03-24 0.264205
2016-04-21 0.073664
2016-08-29 0.136576
2016-10-31 0.188062
2017-04-17 0.244385
2017-04-25 0.080614
2017-07-28 0.151510
2017-10-26 0.254166
2018-03-28 0.329542
2018-05-02 0.088887
2018-08-02 0.170563
2018-10-29 0.255220
2019-03-29 0.344644
2019-04-25 0.094737
2019-07-15 0.000000
2019-07-18 0.175322
2019-10-16 0.255819
Name: P($$roewa_q), dtype: float32
"""
self.check_same(s[~s.duplicated().values], expect)
def test_expr2(self):
instruments = ["sh600519"]
fields = ["P($$roewa_q)", "P($$yoyni_q)"]
fields += ["P(($$roewa_q / $$yoyni_q) / Ref($$roewa_q / $$yoyni_q, 1) - 1)"]
fields += ["P(Sum($$yoyni_q, 4))"]
fields += ["$close", "P($$roewa_q) * $close"]
data = D.features(instruments, fields, start_time="2019-01-01", end_time="2020-01-01", freq="day")
print(data)
print(data.describe())
if __name__ == "__main__":
unittest.main()