From cc8339acd925a2df0027ae64bb0b8a4a360ed504 Mon Sep 17 00:00:00 2001 From: Yuge Zhang Date: Wed, 2 Jun 2021 16:49:52 +0800 Subject: [PATCH] Add a few comments --- rl_orders | Bin 0 -> 3464 bytes rl_playground.py | 16 +++++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) create mode 100644 rl_orders diff --git a/rl_orders b/rl_orders new file mode 100644 index 0000000000000000000000000000000000000000..7902b901c000bfd82fb7fcc0386c588f3f78cbb4 GIT binary patch literal 3464 zcmai$eM}Q~7{^;->?j+k)cJzoL`)bn&F}ioCT}CMX-(Nc=HkTe`a-UQzU+G4Bus`Q zI>BQSJ%TO}L`B0UZjDnjmStp_EHR5j&`k+(i2|Dw#>|9GmWBPU9rq*4-LLmYFYP_~ z-1m8&@9%k97u&MuNk#Z7=QFwFx2oKBjh%8-vaSHD@i9&p!*h=nhwn%DXZG@YU=$Hx zeU3_-+sSi8=}SfcOhCtTag@gw^s+p+%p3Iht2GWE zVH$dr2E9}wiP)^8GkM_zf_6G4Qc*e%7IFswTD`%@(*z_fDI|2)$?me_$pzt9dbveG zub@-uH2P_Jtwl+v!=Uwrv709+a%EvAg9!UZb-%Y|UHDQ}%7Dpe7Gb78?v?ND zOvJ;c2G2?=6Z~Qzvqjjb>Wh7UcMV5+Dwk$~(<%vrH3Pt?6Je)8NAJDdB)qbph&f3Z zR7L`Tl>J^@(iBX}!o$vOJcNOvZZ?uRz2~V7cv$N`ZODF0d10vz53|RWOTvWXWLR*}S6m-_MO`qBw?_Vz zOvwfaHPdTR&GPqMiNE!58D3eX@~EUTAx)hQS%Yc(Zt9o#=kT!BHPTZ$G zyve?R77v@K@XIPQk;rE=LqiGwG)HMbd#Oea21ql4)sN1mzA(KGudM4wP7)?GK&vN3 zpB-%P?w@Rk#lub{zbe^Hp=M?if;GDQ>ANoK@vzksJ+jK+zXnm+{qJwi7HzqS_u2N2 z?U8{0?M*?M&Wx<__>JOEE~Uiam2G|PUCA_e=-?4BqrI6j=WQ+x#p7X*KD#TKrl5?> zMmylzdy8|`Em7uK_o75;Wx{bHkuvqQnz8(kpTaA9N7X5>%z(zWZ+emj5i%p_1m(_;NTX0w)?lfma&mM zJgkmgFAFm**a5@A(LP&wl!fx_;d;4l0>gssba_1WTw7=aU$fym6_POMGq?lL)OfK! zcs@f#;?qp&|4=qfok=U!?D*>1*`lPCcvwS^^mYpGO%nNxFPkXaoCNT&+GB*IvhX|u zNLk6u!L7u)O?cSd99D7(3pInIR!q~dZQOS8Oq80b_n(qe28RZM{b0Er^8OlS1gRdL Jo`< @dataclass class EpisodicState: """ - A simplified data structure for RL-related components to process observations and rewards + A simplified data structure as the input of RL-related components to calculate observations and rewards. + Some of the metrics info are calculated on-the-fly in this class. """ # requirements stock_id: int @@ -181,6 +182,7 @@ class SingleOrderEnv(gym.Env): return self.observation.observation_space def retrieve_backtest_data(self, field: str): + # Retrieve backtest data for RL-specific use (including reward calculation) return D.features( [self.cur_order.stock_id], ['$open', '$close', '$high', '$low', '$volume'], @@ -190,6 +192,7 @@ class SingleOrderEnv(gym.Env): )[field].to_numpy() def initialize_state(self): + # Synchronous state for executor to EpisodicState self.executor.reset(start_time=self.cur_order.start_time, end_time=self.cur_order.end_time) state = EpisodicState( stock_id=self.cur_order.stock_id, @@ -207,6 +210,7 @@ class SingleOrderEnv(gym.Env): return state def update_state(self, exec_vol): + # Synchronous exec_vol to executor and synchronous back to EpisodicState calendar = self.executor.trade_calendar state = self.ep_state @@ -273,6 +277,7 @@ class SingleOrderEnv(gym.Env): 'ins': self.ep_state.stock_id, 'date': self.ep_state.start_time, } + # TODO: collect logs pprint(info) return self.observation(self.ep_state), reward, self.ep_state.done, info @@ -327,13 +332,18 @@ def _main(): ) return SingleOrderEnv( observation, action, reward_fn, - iter(DataLoader(QlibOrderDataset('rl.pkl'), batch_size=None, shuffle=True)), executor) + iter(DataLoader(QlibOrderDataset('rl_orders'), batch_size=None, shuffle=True)), executor) policy = DummyPolicy() + # This can not be replaced with SubprocVectorEnv + # File "/xxx/qlib/qlib/data/data.py", line 462, in dataset_processor + # p = Pool(processes=workers) + # AssertionError: daemonic processes are not allowed to have children envs = DummyVectorEnv([dummy_env for _ in range(4)]) test_collector = Collector(policy, envs) policy.eval() + # TODO: create a queue for all orders and make it auto-complete when all the orders are processed test_collector.collect(n_episode=10)