diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb index f8d7d2d..cec154d 100644 --- a/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/.ipynb_checkpoints/main-checkpoint.ipynb @@ -2,9 +2,11 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from datetime import datetime, timedelta\n", @@ -12,71 +14,83 @@ "from tms_data_interface import SQLQueryInterface\n", "\n", "query = \"\"\"\n", - "WITH time_windows AS (\n", - " SELECT\n", - " -- End time is the current trade time\n", - " date_time AS end_time,\n", + "SELECT \n", + " n.TRADER_ID,\n", + " n.trade_time_window,\n", + " n.net_volume,\n", + " n.order_count, -- Include number of orders\n", + " COALESCE(t.total_trade_volume, 0) AS total_trade_volume,\n", + " CASE \n", + " WHEN COALESCE(t.total_trade_volume, 0) > 0 THEN n.net_volume / t.total_trade_volume\n", + " ELSE 0 -- or another value to indicate no trades\n", + " END AS order_trade_ratio,\n", + " CASE \n", + " WHEN net_volume_all.total_net_volume_all > 0 THEN \n", + " (n.net_volume / net_volume_all.total_net_volume_all) * 100 \n", + " ELSE 0 \n", + " END AS volume_percentage -- Calculate volume percentage\n", + "FROM (\n", + " -- Step 2: Subquery for net_order_volume\n", + " SELECT \n", + " o.TRADER_ID,\n", + " t.DATE_TIME AS trade_time_window,\n", + " SUM(CASE \n", + " WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME\n", + " ELSE 0 END\n", + " ) AS net_volume,\n", + " COUNT(o.ORDER_ID) AS order_count -- Count the number of orders\n", + " FROM {order_10m} o\n", + " JOIN {trade_data_1b} t\n", + " ON o.TRADER_ID = t.TRADER_ID\n", + " WHERE o.SIDE = 'buy'\n", + " AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME\n", + " GROUP BY o.TRADER_ID, t.DATE_TIME\n", + ") AS n\n", + "LEFT JOIN (\n", + " -- Step 6: Subquery for total_trade_volume (opposite side trades after spoofing)\n", + " SELECT \n", + " t.TRADER_ID,\n", + " t.DATE_TIME,\n", + " SUM(t.TRADE_VOLUME) AS total_trade_volume\n", + " FROM (\n", + " -- Step 5: Subquery for relevant_trades\n", + " SELECT t1.*\n", + " FROM {trade_data_1b} t1\n", + " WHERE t1.TRADE_SIDE = 'buy'\n", + " AND EXISTS (\n", + " SELECT 1\n", + " FROM {trade_data_1b} t2\n", + " WHERE t2.TRADER_ID = t1.TRADER_ID\n", + " AND t2.DATE_TIME BETWEEN t1.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t1.DATE_TIME\n", + " )\n", + " ) AS t\n", + " GROUP BY t.DATE_TIME, t.TRADER_ID\n", + ") AS t \n", + "ON n.TRADER_ID = t.TRADER_ID AND n.trade_time_window = t.DATE_TIME\n", "\n", - " -- Subtract seconds from the end_time using date_add() with negative integer interval\n", - " date_add('second', -{time_window_s}, date_time) AS start_time,\n", + "-- New subquery for total net volume for all traders in the same time window\n", + "LEFT JOIN (\n", + " SELECT \n", + " t.DATE_TIME AS trade_time_window,\n", + " SUM(CASE \n", + " WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME\n", + " ELSE 0 END\n", + " ) AS total_net_volume_all\n", + " FROM {order_10m} o\n", + " JOIN {trade_data_1b} t\n", + " ON o.TRADER_ID = t.TRADER_ID\n", + " WHERE o.SIDE = 'buy'\n", + " AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME\n", + " GROUP BY t.DATE_TIME\n", + ") AS net_volume_all\n", + "ON n.trade_time_window = net_volume_all.trade_time_window\n", "\n", - " -- Trade details\n", - " trade_price,\n", - " trade_volume,\n", - " trader_id,\n", - "\n", - " -- Calculate minimum price within the time window\n", - " MIN(trade_price) OVER (\n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS min_price,\n", - "\n", - " -- Calculate maximum price within the time window\n", - " MAX(trade_price) OVER (\n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS max_price,\n", - "\n", - " -- Calculate total trade volume within the time window\n", - " SUM(trade_volume) OVER ( \n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS total_volume,\n", - "\n", - " -- Calculate participant's trade volume within the time window\n", - " SUM(CASE WHEN trader_id = trader_id THEN trade_volume ELSE 0 END) OVER (\n", - " PARTITION BY trader_id \n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS participant_volume\n", - " FROM\n", - " {trade_data_1b}\n", - ")\n", - "SELECT\n", - " -- Select the time window details\n", - " start_time,\n", - " end_time,\n", - "\n", - " -- Select the participant (trader) ID\n", - " trader_id AS \"Participant\",\n", - "\n", - " -- Select the calculated min and max prices\n", - " min_price,\n", - " max_price,\n", - "\n", - " -- Calculate the price change percentage\n", - " (max_price - min_price) / NULLIF(min_price, 0) * 100 AS \"Price Change (%)\",\n", - "\n", - " -- Calculate the participant's volume as a percentage of total volume\n", - " (participant_volume / NULLIF(total_volume, 0)) * 100 AS \"Volume (%)\",\n", - "\n", - " -- Participant volume\n", - " participant_volume,\n", - "\n", - " -- Select the total volume within the window\n", - " total_volume AS \"Total Volume\"\n", - "FROM\n", - " time_windows\n", + "ORDER BY n.trade_time_window\n", + "limit 1000\n", "\"\"\"\n", "\n", "\n", @@ -86,26 +100,24 @@ " seq = SQLQueryInterface(schema=\"trade_schema\")\n", " def logic(self, **kwargs):\n", " validation_window = kwargs.get('validation_window')\n", + " spoofing_side = kwargs.get('buy')\n", " time_window_s = int(validation_window/1000)\n", " query_start_time = datetime.now()\n", " print(\"Query start time :\",query_start_time)\n", " row_list = self.seq.execute_raw(query.format(trade_data_1b=\"trade_10m_v3\",\n", + " order_10m = 'order_10m',\n", " time_window_s = time_window_s)\n", " )\n", " cols = [\n", - " 'START_DATE_TIME',\n", - " 'END_DATE_TIME',\n", - " 'Focal_id',\n", - " 'MIN_PRICE',\n", - " 'MAX_PRICE',\n", - " 'PRICE_CHANGE_PCT',\n", - " 'PARTICIPANT_VOLUME_PCT',\n", - " 'PARTICIPANT_VOLUME',\n", - " 'TOTAL_VOLUME',\n", + " 'TRADER_ID',\n", + " 'trade_time_window',\n", + " 'net_volume',\n", + " 'order_count',\n", + " 'total_trade_volume',\n", + " 'order_trade_ratio',\n", + " 'volume_percentage'\n", " ]\n", " final_scenario_df = pd.DataFrame(row_list, columns = cols)\n", - " final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME']/\\\n", - " final_scenario_df['TOTAL_VOLUME'] * 100\n", " final_scenario_df['Segment'] = 'Default'\n", " final_scenario_df['SAR_FLAG'] = 'N'\n", " final_scenario_df['Risk'] = 'Low Risk'\n", @@ -113,6 +125,35 @@ " # final_scenario_df['RUN_DATE'] = final_scenario_df['END_DATE']\n", " return final_scenario_df\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5c4307f-bc35-47e2-b680-fd1df2168d6c", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Query start time : 2024-10-14 06:23:07.242919\n" + ] + } + ], + "source": [ + "scenario = Scenario()\n", + "scenario.logic(validation_window=300000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36b1b24a-aeca-4d22-a2b3-6e04aca31695", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/main.ipynb b/main.ipynb index afa552e..7ef04be 100644 --- a/main.ipynb +++ b/main.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 16, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] @@ -14,72 +14,83 @@ "from tms_data_interface import SQLQueryInterface\n", "\n", "query = \"\"\"\n", - "WITH time_windows AS (\n", - " SELECT\n", - " -- End time is the current trade time\n", - " date_time AS end_time,\n", + "SELECT \n", + " n.TRADER_ID,\n", + " n.trade_time_window,\n", + " n.net_volume,\n", + " n.order_count, -- Include number of orders\n", + " COALESCE(t.total_trade_volume, 0) AS total_trade_volume,\n", + " CASE \n", + " WHEN COALESCE(t.total_trade_volume, 0) > 0 THEN n.net_volume / t.total_trade_volume\n", + " ELSE 0 -- or another value to indicate no trades\n", + " END AS order_trade_ratio,\n", + " CASE \n", + " WHEN net_volume_all.total_net_volume_all > 0 THEN \n", + " (n.net_volume / net_volume_all.total_net_volume_all) * 100 \n", + " ELSE 0 \n", + " END AS volume_percentage -- Calculate volume percentage\n", + "FROM (\n", + " -- Step 2: Subquery for net_order_volume\n", + " SELECT \n", + " o.TRADER_ID,\n", + " t.DATE_TIME AS trade_time_window,\n", + " SUM(CASE \n", + " WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME\n", + " ELSE 0 END\n", + " ) AS net_volume,\n", + " COUNT(o.ORDER_ID) AS order_count -- Count the number of orders\n", + " FROM {order_10m} o\n", + " JOIN {trade_data_1b} t\n", + " ON o.TRADER_ID = t.TRADER_ID\n", + " WHERE o.SIDE = 'buy'\n", + " AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME\n", + " GROUP BY o.TRADER_ID, t.DATE_TIME\n", + ") AS n\n", + "LEFT JOIN (\n", + " -- Step 6: Subquery for total_trade_volume (opposite side trades after spoofing)\n", + " SELECT \n", + " t.TRADER_ID,\n", + " t.DATE_TIME,\n", + " SUM(t.TRADE_VOLUME) AS total_trade_volume\n", + " FROM (\n", + " -- Step 5: Subquery for relevant_trades\n", + " SELECT t1.*\n", + " FROM {trade_data_1b} t1\n", + " WHERE t1.TRADE_SIDE = 'buy'\n", + " AND EXISTS (\n", + " SELECT 1\n", + " FROM {trade_data_1b} t2\n", + " WHERE t2.TRADER_ID = t1.TRADER_ID\n", + " AND t2.DATE_TIME BETWEEN t1.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t1.DATE_TIME\n", + " )\n", + " ) AS t\n", + " GROUP BY t.DATE_TIME, t.TRADER_ID\n", + ") AS t \n", + "ON n.TRADER_ID = t.TRADER_ID AND n.trade_time_window = t.DATE_TIME\n", "\n", - " -- Subtract seconds from the end_time using date_add() with negative integer interval\n", - " date_add('second', -{time_window_s}, date_time) AS start_time,\n", + "-- New subquery for total net volume for all traders in the same time window\n", + "LEFT JOIN (\n", + " SELECT \n", + " t.DATE_TIME AS trade_time_window,\n", + " SUM(CASE \n", + " WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME\n", + " ELSE 0 END\n", + " ) AS total_net_volume_all\n", + " FROM {order_10m} o\n", + " JOIN {trade_data_1b} t\n", + " ON o.TRADER_ID = t.TRADER_ID\n", + " WHERE o.SIDE = 'buy'\n", + " AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME\n", + " GROUP BY t.DATE_TIME\n", + ") AS net_volume_all\n", + "ON n.trade_time_window = net_volume_all.trade_time_window\n", "\n", - " -- Trade details\n", - " trade_price,\n", - " trade_volume,\n", - " trader_id,\n", - "\n", - " -- Calculate minimum price within the time window\n", - " MIN(trade_price) OVER (\n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS min_price,\n", - "\n", - " -- Calculate maximum price within the time window\n", - " MAX(trade_price) OVER (\n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS max_price,\n", - "\n", - " -- Calculate total trade volume within the time window\n", - " SUM(trade_volume) OVER ( \n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS total_volume,\n", - "\n", - " -- Calculate participant's trade volume within the time window\n", - " SUM(CASE WHEN trader_id = trader_id THEN trade_volume ELSE 0 END) OVER (\n", - " PARTITION BY trader_id \n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS participant_volume\n", - " FROM\n", - " {trade_data_1b}\n", - ")\n", - "SELECT\n", - " -- Select the time window details\n", - " start_time,\n", - " end_time,\n", - "\n", - " -- Select the participant (trader) ID\n", - " trader_id AS \"Participant\",\n", - "\n", - " -- Select the calculated min and max prices\n", - " min_price,\n", - " max_price,\n", - "\n", - " -- Calculate the price change percentage\n", - " (max_price - min_price) / NULLIF(min_price, 0) * 100 AS \"Price Change (%)\",\n", - "\n", - " -- Calculate the participant's volume as a percentage of total volume\n", - " (participant_volume / NULLIF(total_volume, 0)) * 100 AS \"Volume (%)\",\n", - "\n", - " -- Participant volume\n", - " participant_volume,\n", - "\n", - " -- Select the total volume within the window\n", - " total_volume AS \"Total Volume\"\n", - "FROM\n", - " time_windows\n", - " limit 1000\n", + "ORDER BY n.trade_time_window\n", + "limit 1000\n", "\"\"\"\n", "\n", "\n", @@ -89,26 +100,24 @@ " seq = SQLQueryInterface(schema=\"trade_schema\")\n", " def logic(self, **kwargs):\n", " validation_window = kwargs.get('validation_window')\n", + " spoofing_side = kwargs.get('buy')\n", " time_window_s = int(validation_window/1000)\n", " query_start_time = datetime.now()\n", " print(\"Query start time :\",query_start_time)\n", " row_list = self.seq.execute_raw(query.format(trade_data_1b=\"trade_10m_v3\",\n", + " order_10m = 'order_10m',\n", " time_window_s = time_window_s)\n", " )\n", " cols = [\n", - " 'START_DATE_TIME',\n", - " 'END_DATE_TIME',\n", - " 'Focal_id',\n", - " 'MIN_PRICE',\n", - " 'MAX_PRICE',\n", - " 'PRICE_CHANGE_PCT',\n", - " 'PARTICIPANT_VOLUME_PCT',\n", - " 'PARTICIPANT_VOLUME',\n", - " 'TOTAL_VOLUME',\n", + " 'TRADER_ID',\n", + " 'trade_time_window',\n", + " 'net_volume',\n", + " 'order_count',\n", + " 'total_trade_volume',\n", + " 'order_trade_ratio',\n", + " 'volume_percentage'\n", " ]\n", " final_scenario_df = pd.DataFrame(row_list, columns = cols)\n", - " final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME']/\\\n", - " final_scenario_df['TOTAL_VOLUME'] * 100\n", " final_scenario_df['Segment'] = 'Default'\n", " final_scenario_df['SAR_FLAG'] = 'N'\n", " final_scenario_df['Risk'] = 'Low Risk'\n", @@ -119,14 +128,261 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "b5c4307f-bc35-47e2-b680-fd1df2168d6c", - "metadata": {}, - "outputs": [], + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Query start time : 2024-10-14 06:23:07.242919\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TRADER_IDtrade_time_windownet_volumeorder_counttotal_trade_volumeorder_trade_ratiovolume_percentageSegmentSAR_FLAGRisk
030977282072024-01-01 00:03:00-92.0192-1.00.0DefaultNLow Risk
132286453222024-01-01 00:06:00-689.01689-1.00.0DefaultNLow Risk
227018727272024-01-01 00:09:00-42.0142-1.00.0DefaultNLow Risk
316590566552024-01-01 00:11:00-167.01167-1.00.0DefaultNLow Risk
416612888872024-01-01 00:13:00-756.01756-1.00.0DefaultNLow Risk
.................................
99533821979852024-01-01 02:30:00-159.01159-1.00.0DefaultNLow Risk
99611290089902024-01-01 02:30:00-582.01582-1.00.0DefaultNLow Risk
99729441228932024-01-01 02:30:00-65.0165-1.00.0DefaultNLow Risk
99829108764052024-01-01 02:30:00-117.01117-1.00.0DefaultNLow Risk
99918169422262024-01-01 02:30:00-732.01732-1.00.0DefaultNLow Risk
\n", + "

1000 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " TRADER_ID trade_time_window net_volume order_count \\\n", + "0 3097728207 2024-01-01 00:03:00 -92.0 1 \n", + "1 3228645322 2024-01-01 00:06:00 -689.0 1 \n", + "2 2701872727 2024-01-01 00:09:00 -42.0 1 \n", + "3 1659056655 2024-01-01 00:11:00 -167.0 1 \n", + "4 1661288887 2024-01-01 00:13:00 -756.0 1 \n", + ".. ... ... ... ... \n", + "995 3382197985 2024-01-01 02:30:00 -159.0 1 \n", + "996 1129008990 2024-01-01 02:30:00 -582.0 1 \n", + "997 2944122893 2024-01-01 02:30:00 -65.0 1 \n", + "998 2910876405 2024-01-01 02:30:00 -117.0 1 \n", + "999 1816942226 2024-01-01 02:30:00 -732.0 1 \n", + "\n", + " total_trade_volume order_trade_ratio volume_percentage Segment \\\n", + "0 92 -1.0 0.0 Default \n", + "1 689 -1.0 0.0 Default \n", + "2 42 -1.0 0.0 Default \n", + "3 167 -1.0 0.0 Default \n", + "4 756 -1.0 0.0 Default \n", + ".. ... ... ... ... \n", + "995 159 -1.0 0.0 Default \n", + "996 582 -1.0 0.0 Default \n", + "997 65 -1.0 0.0 Default \n", + "998 117 -1.0 0.0 Default \n", + "999 732 -1.0 0.0 Default \n", + "\n", + " SAR_FLAG Risk \n", + "0 N Low Risk \n", + "1 N Low Risk \n", + "2 N Low Risk \n", + "3 N Low Risk \n", + "4 N Low Risk \n", + ".. ... ... \n", + "995 N Low Risk \n", + "996 N Low Risk \n", + "997 N Low Risk \n", + "998 N Low Risk \n", + "999 N Low Risk \n", + "\n", + "[1000 rows x 10 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "scenario = Scenario()\n", - "scenario.logic()" + "scenario.logic(validation_window=300000)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36b1b24a-aeca-4d22-a2b3-6e04aca31695", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/main.py b/main.py index 1efd05c..ee202cf 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # coding: utf-8 -# In[2]: +# In[16]: from datetime import datetime, timedelta @@ -9,72 +9,83 @@ import pandas as pd from tms_data_interface import SQLQueryInterface query = """ -WITH time_windows AS ( - SELECT - -- End time is the current trade time - date_time AS end_time, +SELECT + n.TRADER_ID, + n.trade_time_window, + n.net_volume, + n.order_count, -- Include number of orders + COALESCE(t.total_trade_volume, 0) AS total_trade_volume, + CASE + WHEN COALESCE(t.total_trade_volume, 0) > 0 THEN n.net_volume / t.total_trade_volume + ELSE 0 -- or another value to indicate no trades + END AS order_trade_ratio, + CASE + WHEN net_volume_all.total_net_volume_all > 0 THEN + (n.net_volume / net_volume_all.total_net_volume_all) * 100 + ELSE 0 + END AS volume_percentage -- Calculate volume percentage +FROM ( + -- Step 2: Subquery for net_order_volume + SELECT + o.TRADER_ID, + t.DATE_TIME AS trade_time_window, + SUM(CASE + WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME + WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME + WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME + ELSE 0 END + ) AS net_volume, + COUNT(o.ORDER_ID) AS order_count -- Count the number of orders + FROM {order_10m} o + JOIN {trade_data_1b} t + ON o.TRADER_ID = t.TRADER_ID + WHERE o.SIDE = 'buy' + AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME + GROUP BY o.TRADER_ID, t.DATE_TIME +) AS n +LEFT JOIN ( + -- Step 6: Subquery for total_trade_volume (opposite side trades after spoofing) + SELECT + t.TRADER_ID, + t.DATE_TIME, + SUM(t.TRADE_VOLUME) AS total_trade_volume + FROM ( + -- Step 5: Subquery for relevant_trades + SELECT t1.* + FROM {trade_data_1b} t1 + WHERE t1.TRADE_SIDE = 'buy' + AND EXISTS ( + SELECT 1 + FROM {trade_data_1b} t2 + WHERE t2.TRADER_ID = t1.TRADER_ID + AND t2.DATE_TIME BETWEEN t1.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t1.DATE_TIME + ) + ) AS t + GROUP BY t.DATE_TIME, t.TRADER_ID +) AS t +ON n.TRADER_ID = t.TRADER_ID AND n.trade_time_window = t.DATE_TIME - -- Subtract seconds from the end_time using date_add() with negative integer interval - date_add('second', -{time_window_s}, date_time) AS start_time, +-- New subquery for total net volume for all traders in the same time window +LEFT JOIN ( + SELECT + t.DATE_TIME AS trade_time_window, + SUM(CASE + WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME + WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME + WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME + ELSE 0 END + ) AS total_net_volume_all + FROM {order_10m} o + JOIN {trade_data_1b} t + ON o.TRADER_ID = t.TRADER_ID + WHERE o.SIDE = 'buy' + AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME + GROUP BY t.DATE_TIME +) AS net_volume_all +ON n.trade_time_window = net_volume_all.trade_time_window - -- Trade details - trade_price, - trade_volume, - trader_id, - - -- Calculate minimum price within the time window - MIN(trade_price) OVER ( - ORDER BY date_time - RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW - ) AS min_price, - - -- Calculate maximum price within the time window - MAX(trade_price) OVER ( - ORDER BY date_time - RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW - ) AS max_price, - - -- Calculate total trade volume within the time window - SUM(trade_volume) OVER ( - ORDER BY date_time - RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW - ) AS total_volume, - - -- Calculate participant's trade volume within the time window - SUM(CASE WHEN trader_id = trader_id THEN trade_volume ELSE 0 END) OVER ( - PARTITION BY trader_id - ORDER BY date_time - RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW - ) AS participant_volume - FROM - {trade_data_1b} -) -SELECT - -- Select the time window details - start_time, - end_time, - - -- Select the participant (trader) ID - trader_id AS "Participant", - - -- Select the calculated min and max prices - min_price, - max_price, - - -- Calculate the price change percentage - (max_price - min_price) / NULLIF(min_price, 0) * 100 AS "Price Change (%)", - - -- Calculate the participant's volume as a percentage of total volume - (participant_volume / NULLIF(total_volume, 0)) * 100 AS "Volume (%)", - - -- Participant volume - participant_volume, - - -- Select the total volume within the window - total_volume AS "Total Volume" -FROM - time_windows - limit 1000 +ORDER BY n.trade_time_window +limit 1000 """ @@ -84,26 +95,24 @@ class Scenario: seq = SQLQueryInterface(schema="trade_schema") def logic(self, **kwargs): validation_window = kwargs.get('validation_window') + spoofing_side = kwargs.get('buy') time_window_s = int(validation_window/1000) query_start_time = datetime.now() print("Query start time :",query_start_time) row_list = self.seq.execute_raw(query.format(trade_data_1b="trade_10m_v3", + order_10m = 'order_10m', time_window_s = time_window_s) ) cols = [ - 'START_DATE_TIME', - 'END_DATE_TIME', - 'Focal_id', - 'MIN_PRICE', - 'MAX_PRICE', - 'PRICE_CHANGE_PCT', - 'PARTICIPANT_VOLUME_PCT', - 'PARTICIPANT_VOLUME', - 'TOTAL_VOLUME', + 'TRADER_ID', + 'trade_time_window', + 'net_volume', + 'order_count', + 'total_trade_volume', + 'order_trade_ratio', + 'volume_percentage' ] final_scenario_df = pd.DataFrame(row_list, columns = cols) - final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME']/\ - final_scenario_df['TOTAL_VOLUME'] * 100 final_scenario_df['Segment'] = 'Default' final_scenario_df['SAR_FLAG'] = 'N' final_scenario_df['Risk'] = 'Low Risk' @@ -112,9 +121,15 @@ class Scenario: return final_scenario_df -# In[ ]: +# In[17]: scenario = Scenario() -scenario.logic() +scenario.logic(validation_window=300000) + + +# In[ ]: + + +