61/main.py
2024-10-14 07:37:45 +00:00

135 lines
4.2 KiB
Python

#!/usr/bin/env python
# coding: utf-8
# In[16]:
from datetime import datetime, timedelta
import pandas as pd
from tms_data_interface import SQLQueryInterface
query = """
SELECT
n.TRADER_ID,
n.trade_time_window,
n.net_volume,
n.order_count, -- Include number of orders
COALESCE(t.total_trade_volume, 0) AS total_trade_volume,
CASE
WHEN COALESCE(t.total_trade_volume, 0) > 0 THEN n.net_volume / t.total_trade_volume
ELSE 0 -- or another value to indicate no trades
END AS order_trade_ratio,
CASE
WHEN net_volume_all.total_net_volume_all > 0 THEN
(n.net_volume / net_volume_all.total_net_volume_all) * 100
ELSE 0
END AS volume_percentage -- Calculate volume percentage
FROM (
-- Step 2: Subquery for net_order_volume
SELECT
o.TRADER_ID,
t.DATE_TIME AS trade_time_window,
SUM(CASE
WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME
WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME
WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME
ELSE 0 END
) AS net_volume,
COUNT(o.ORDER_ID) AS order_count -- Count the number of orders
FROM {order_10m} o
JOIN {trade_data_1b} t
ON o.TRADER_ID = t.TRADER_ID
WHERE o.SIDE = 'buy'
AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME
GROUP BY o.TRADER_ID, t.DATE_TIME
) AS n
LEFT JOIN (
-- Step 6: Subquery for total_trade_volume (opposite side trades after spoofing)
SELECT
t.TRADER_ID,
t.DATE_TIME,
SUM(t.TRADE_VOLUME) AS total_trade_volume
FROM (
-- Step 5: Subquery for relevant_trades
SELECT t1.*
FROM {trade_data_1b} t1
WHERE t1.TRADE_SIDE = 'buy'
AND EXISTS (
SELECT 1
FROM {trade_data_1b} t2
WHERE t2.TRADER_ID = t1.TRADER_ID
AND t2.DATE_TIME BETWEEN t1.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t1.DATE_TIME
)
) AS t
GROUP BY t.DATE_TIME, t.TRADER_ID
) AS t
ON n.TRADER_ID = t.TRADER_ID AND n.trade_time_window = t.DATE_TIME
-- New subquery for total net volume for all traders in the same time window
LEFT JOIN (
SELECT
t.DATE_TIME AS trade_time_window,
SUM(CASE
WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME
WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME
WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME
ELSE 0 END
) AS total_net_volume_all
FROM {order_10m} o
JOIN {trade_data_1b} t
ON o.TRADER_ID = t.TRADER_ID
WHERE o.SIDE = 'buy'
AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME
GROUP BY t.DATE_TIME
) AS net_volume_all
ON n.trade_time_window = net_volume_all.trade_time_window
ORDER BY n.trade_time_window
"""
from tms_data_interface import SQLQueryInterface
class Scenario:
seq = SQLQueryInterface(schema="trade_schema")
def logic(self, **kwargs):
validation_window = kwargs.get('validation_window')
spoofing_side = kwargs.get('buy')
time_window_s = int(validation_window/1000)
query_start_time = datetime.now()
print("Query start time :",query_start_time)
row_list = self.seq.execute_raw(query.format(trade_data_1b="trade_10m_v3",
order_10m = 'order_10m',
time_window_s = time_window_s)
)
cols = [
'focal_ID',
'trade_time_window',
'net_volume',
'order_count',
'total_trade_volume',
'order_trade_ratio',
'volume_percentage'
]
final_scenario_df = pd.DataFrame(row_list, columns = cols)
final_scenario_df['Segment'] = 'Default'
final_scenario_df['SAR_FLAG'] = 'N'
final_scenario_df['Risk'] = 'Low Risk'
final_scenario_df.dropna(inplace=True)
# final_scenario_df['RUN_DATE'] = final_scenario_df['END_DATE']
return final_scenario_df
# In[17]:
# scenario = Scenario()
# scenario.logic(validation_window=300000)
# In[ ]: