diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb index f8d7d2d..cec154d 100644 --- a/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/.ipynb_checkpoints/main-checkpoint.ipynb @@ -2,9 +2,11 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from datetime import datetime, timedelta\n", @@ -12,71 +14,83 @@ "from tms_data_interface import SQLQueryInterface\n", "\n", "query = \"\"\"\n", - "WITH time_windows AS (\n", - " SELECT\n", - " -- End time is the current trade time\n", - " date_time AS end_time,\n", + "SELECT \n", + " n.TRADER_ID,\n", + " n.trade_time_window,\n", + " n.net_volume,\n", + " n.order_count, -- Include number of orders\n", + " COALESCE(t.total_trade_volume, 0) AS total_trade_volume,\n", + " CASE \n", + " WHEN COALESCE(t.total_trade_volume, 0) > 0 THEN n.net_volume / t.total_trade_volume\n", + " ELSE 0 -- or another value to indicate no trades\n", + " END AS order_trade_ratio,\n", + " CASE \n", + " WHEN net_volume_all.total_net_volume_all > 0 THEN \n", + " (n.net_volume / net_volume_all.total_net_volume_all) * 100 \n", + " ELSE 0 \n", + " END AS volume_percentage -- Calculate volume percentage\n", + "FROM (\n", + " -- Step 2: Subquery for net_order_volume\n", + " SELECT \n", + " o.TRADER_ID,\n", + " t.DATE_TIME AS trade_time_window,\n", + " SUM(CASE \n", + " WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME\n", + " ELSE 0 END\n", + " ) AS net_volume,\n", + " COUNT(o.ORDER_ID) AS order_count -- Count the number of orders\n", + " FROM {order_10m} o\n", + " JOIN {trade_data_1b} t\n", + " ON o.TRADER_ID = t.TRADER_ID\n", + " WHERE o.SIDE = 'buy'\n", + " AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME\n", + " GROUP BY o.TRADER_ID, t.DATE_TIME\n", + ") AS n\n", + "LEFT JOIN (\n", + " -- Step 6: Subquery for total_trade_volume (opposite side trades after spoofing)\n", + " SELECT \n", + " t.TRADER_ID,\n", + " t.DATE_TIME,\n", + " SUM(t.TRADE_VOLUME) AS total_trade_volume\n", + " FROM (\n", + " -- Step 5: Subquery for relevant_trades\n", + " SELECT t1.*\n", + " FROM {trade_data_1b} t1\n", + " WHERE t1.TRADE_SIDE = 'buy'\n", + " AND EXISTS (\n", + " SELECT 1\n", + " FROM {trade_data_1b} t2\n", + " WHERE t2.TRADER_ID = t1.TRADER_ID\n", + " AND t2.DATE_TIME BETWEEN t1.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t1.DATE_TIME\n", + " )\n", + " ) AS t\n", + " GROUP BY t.DATE_TIME, t.TRADER_ID\n", + ") AS t \n", + "ON n.TRADER_ID = t.TRADER_ID AND n.trade_time_window = t.DATE_TIME\n", "\n", - " -- Subtract seconds from the end_time using date_add() with negative integer interval\n", - " date_add('second', -{time_window_s}, date_time) AS start_time,\n", + "-- New subquery for total net volume for all traders in the same time window\n", + "LEFT JOIN (\n", + " SELECT \n", + " t.DATE_TIME AS trade_time_window,\n", + " SUM(CASE \n", + " WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME\n", + " ELSE 0 END\n", + " ) AS total_net_volume_all\n", + " FROM {order_10m} o\n", + " JOIN {trade_data_1b} t\n", + " ON o.TRADER_ID = t.TRADER_ID\n", + " WHERE o.SIDE = 'buy'\n", + " AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME\n", + " GROUP BY t.DATE_TIME\n", + ") AS net_volume_all\n", + "ON n.trade_time_window = net_volume_all.trade_time_window\n", "\n", - " -- Trade details\n", - " trade_price,\n", - " trade_volume,\n", - " trader_id,\n", - "\n", - " -- Calculate minimum price within the time window\n", - " MIN(trade_price) OVER (\n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS min_price,\n", - "\n", - " -- Calculate maximum price within the time window\n", - " MAX(trade_price) OVER (\n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS max_price,\n", - "\n", - " -- Calculate total trade volume within the time window\n", - " SUM(trade_volume) OVER ( \n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS total_volume,\n", - "\n", - " -- Calculate participant's trade volume within the time window\n", - " SUM(CASE WHEN trader_id = trader_id THEN trade_volume ELSE 0 END) OVER (\n", - " PARTITION BY trader_id \n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS participant_volume\n", - " FROM\n", - " {trade_data_1b}\n", - ")\n", - "SELECT\n", - " -- Select the time window details\n", - " start_time,\n", - " end_time,\n", - "\n", - " -- Select the participant (trader) ID\n", - " trader_id AS \"Participant\",\n", - "\n", - " -- Select the calculated min and max prices\n", - " min_price,\n", - " max_price,\n", - "\n", - " -- Calculate the price change percentage\n", - " (max_price - min_price) / NULLIF(min_price, 0) * 100 AS \"Price Change (%)\",\n", - "\n", - " -- Calculate the participant's volume as a percentage of total volume\n", - " (participant_volume / NULLIF(total_volume, 0)) * 100 AS \"Volume (%)\",\n", - "\n", - " -- Participant volume\n", - " participant_volume,\n", - "\n", - " -- Select the total volume within the window\n", - " total_volume AS \"Total Volume\"\n", - "FROM\n", - " time_windows\n", + "ORDER BY n.trade_time_window\n", + "limit 1000\n", "\"\"\"\n", "\n", "\n", @@ -86,26 +100,24 @@ " seq = SQLQueryInterface(schema=\"trade_schema\")\n", " def logic(self, **kwargs):\n", " validation_window = kwargs.get('validation_window')\n", + " spoofing_side = kwargs.get('buy')\n", " time_window_s = int(validation_window/1000)\n", " query_start_time = datetime.now()\n", " print(\"Query start time :\",query_start_time)\n", " row_list = self.seq.execute_raw(query.format(trade_data_1b=\"trade_10m_v3\",\n", + " order_10m = 'order_10m',\n", " time_window_s = time_window_s)\n", " )\n", " cols = [\n", - " 'START_DATE_TIME',\n", - " 'END_DATE_TIME',\n", - " 'Focal_id',\n", - " 'MIN_PRICE',\n", - " 'MAX_PRICE',\n", - " 'PRICE_CHANGE_PCT',\n", - " 'PARTICIPANT_VOLUME_PCT',\n", - " 'PARTICIPANT_VOLUME',\n", - " 'TOTAL_VOLUME',\n", + " 'TRADER_ID',\n", + " 'trade_time_window',\n", + " 'net_volume',\n", + " 'order_count',\n", + " 'total_trade_volume',\n", + " 'order_trade_ratio',\n", + " 'volume_percentage'\n", " ]\n", " final_scenario_df = pd.DataFrame(row_list, columns = cols)\n", - " final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME']/\\\n", - " final_scenario_df['TOTAL_VOLUME'] * 100\n", " final_scenario_df['Segment'] = 'Default'\n", " final_scenario_df['SAR_FLAG'] = 'N'\n", " final_scenario_df['Risk'] = 'Low Risk'\n", @@ -113,6 +125,35 @@ " # final_scenario_df['RUN_DATE'] = final_scenario_df['END_DATE']\n", " return final_scenario_df\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5c4307f-bc35-47e2-b680-fd1df2168d6c", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Query start time : 2024-10-14 06:23:07.242919\n" + ] + } + ], + "source": [ + "scenario = Scenario()\n", + "scenario.logic(validation_window=300000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36b1b24a-aeca-4d22-a2b3-6e04aca31695", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/main.ipynb b/main.ipynb index afa552e..7ef04be 100644 --- a/main.ipynb +++ b/main.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 16, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] @@ -14,72 +14,83 @@ "from tms_data_interface import SQLQueryInterface\n", "\n", "query = \"\"\"\n", - "WITH time_windows AS (\n", - " SELECT\n", - " -- End time is the current trade time\n", - " date_time AS end_time,\n", + "SELECT \n", + " n.TRADER_ID,\n", + " n.trade_time_window,\n", + " n.net_volume,\n", + " n.order_count, -- Include number of orders\n", + " COALESCE(t.total_trade_volume, 0) AS total_trade_volume,\n", + " CASE \n", + " WHEN COALESCE(t.total_trade_volume, 0) > 0 THEN n.net_volume / t.total_trade_volume\n", + " ELSE 0 -- or another value to indicate no trades\n", + " END AS order_trade_ratio,\n", + " CASE \n", + " WHEN net_volume_all.total_net_volume_all > 0 THEN \n", + " (n.net_volume / net_volume_all.total_net_volume_all) * 100 \n", + " ELSE 0 \n", + " END AS volume_percentage -- Calculate volume percentage\n", + "FROM (\n", + " -- Step 2: Subquery for net_order_volume\n", + " SELECT \n", + " o.TRADER_ID,\n", + " t.DATE_TIME AS trade_time_window,\n", + " SUM(CASE \n", + " WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME\n", + " ELSE 0 END\n", + " ) AS net_volume,\n", + " COUNT(o.ORDER_ID) AS order_count -- Count the number of orders\n", + " FROM {order_10m} o\n", + " JOIN {trade_data_1b} t\n", + " ON o.TRADER_ID = t.TRADER_ID\n", + " WHERE o.SIDE = 'buy'\n", + " AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME\n", + " GROUP BY o.TRADER_ID, t.DATE_TIME\n", + ") AS n\n", + "LEFT JOIN (\n", + " -- Step 6: Subquery for total_trade_volume (opposite side trades after spoofing)\n", + " SELECT \n", + " t.TRADER_ID,\n", + " t.DATE_TIME,\n", + " SUM(t.TRADE_VOLUME) AS total_trade_volume\n", + " FROM (\n", + " -- Step 5: Subquery for relevant_trades\n", + " SELECT t1.*\n", + " FROM {trade_data_1b} t1\n", + " WHERE t1.TRADE_SIDE = 'buy'\n", + " AND EXISTS (\n", + " SELECT 1\n", + " FROM {trade_data_1b} t2\n", + " WHERE t2.TRADER_ID = t1.TRADER_ID\n", + " AND t2.DATE_TIME BETWEEN t1.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t1.DATE_TIME\n", + " )\n", + " ) AS t\n", + " GROUP BY t.DATE_TIME, t.TRADER_ID\n", + ") AS t \n", + "ON n.TRADER_ID = t.TRADER_ID AND n.trade_time_window = t.DATE_TIME\n", "\n", - " -- Subtract seconds from the end_time using date_add() with negative integer interval\n", - " date_add('second', -{time_window_s}, date_time) AS start_time,\n", + "-- New subquery for total net volume for all traders in the same time window\n", + "LEFT JOIN (\n", + " SELECT \n", + " t.DATE_TIME AS trade_time_window,\n", + " SUM(CASE \n", + " WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME\n", + " WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME\n", + " ELSE 0 END\n", + " ) AS total_net_volume_all\n", + " FROM {order_10m} o\n", + " JOIN {trade_data_1b} t\n", + " ON o.TRADER_ID = t.TRADER_ID\n", + " WHERE o.SIDE = 'buy'\n", + " AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME\n", + " GROUP BY t.DATE_TIME\n", + ") AS net_volume_all\n", + "ON n.trade_time_window = net_volume_all.trade_time_window\n", "\n", - " -- Trade details\n", - " trade_price,\n", - " trade_volume,\n", - " trader_id,\n", - "\n", - " -- Calculate minimum price within the time window\n", - " MIN(trade_price) OVER (\n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS min_price,\n", - "\n", - " -- Calculate maximum price within the time window\n", - " MAX(trade_price) OVER (\n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS max_price,\n", - "\n", - " -- Calculate total trade volume within the time window\n", - " SUM(trade_volume) OVER ( \n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS total_volume,\n", - "\n", - " -- Calculate participant's trade volume within the time window\n", - " SUM(CASE WHEN trader_id = trader_id THEN trade_volume ELSE 0 END) OVER (\n", - " PARTITION BY trader_id \n", - " ORDER BY date_time \n", - " RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n", - " ) AS participant_volume\n", - " FROM\n", - " {trade_data_1b}\n", - ")\n", - "SELECT\n", - " -- Select the time window details\n", - " start_time,\n", - " end_time,\n", - "\n", - " -- Select the participant (trader) ID\n", - " trader_id AS \"Participant\",\n", - "\n", - " -- Select the calculated min and max prices\n", - " min_price,\n", - " max_price,\n", - "\n", - " -- Calculate the price change percentage\n", - " (max_price - min_price) / NULLIF(min_price, 0) * 100 AS \"Price Change (%)\",\n", - "\n", - " -- Calculate the participant's volume as a percentage of total volume\n", - " (participant_volume / NULLIF(total_volume, 0)) * 100 AS \"Volume (%)\",\n", - "\n", - " -- Participant volume\n", - " participant_volume,\n", - "\n", - " -- Select the total volume within the window\n", - " total_volume AS \"Total Volume\"\n", - "FROM\n", - " time_windows\n", - " limit 1000\n", + "ORDER BY n.trade_time_window\n", + "limit 1000\n", "\"\"\"\n", "\n", "\n", @@ -89,26 +100,24 @@ " seq = SQLQueryInterface(schema=\"trade_schema\")\n", " def logic(self, **kwargs):\n", " validation_window = kwargs.get('validation_window')\n", + " spoofing_side = kwargs.get('buy')\n", " time_window_s = int(validation_window/1000)\n", " query_start_time = datetime.now()\n", " print(\"Query start time :\",query_start_time)\n", " row_list = self.seq.execute_raw(query.format(trade_data_1b=\"trade_10m_v3\",\n", + " order_10m = 'order_10m',\n", " time_window_s = time_window_s)\n", " )\n", " cols = [\n", - " 'START_DATE_TIME',\n", - " 'END_DATE_TIME',\n", - " 'Focal_id',\n", - " 'MIN_PRICE',\n", - " 'MAX_PRICE',\n", - " 'PRICE_CHANGE_PCT',\n", - " 'PARTICIPANT_VOLUME_PCT',\n", - " 'PARTICIPANT_VOLUME',\n", - " 'TOTAL_VOLUME',\n", + " 'TRADER_ID',\n", + " 'trade_time_window',\n", + " 'net_volume',\n", + " 'order_count',\n", + " 'total_trade_volume',\n", + " 'order_trade_ratio',\n", + " 'volume_percentage'\n", " ]\n", " final_scenario_df = pd.DataFrame(row_list, columns = cols)\n", - " final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME']/\\\n", - " final_scenario_df['TOTAL_VOLUME'] * 100\n", " final_scenario_df['Segment'] = 'Default'\n", " final_scenario_df['SAR_FLAG'] = 'N'\n", " final_scenario_df['Risk'] = 'Low Risk'\n", @@ -119,14 +128,261 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "b5c4307f-bc35-47e2-b680-fd1df2168d6c", - "metadata": {}, - "outputs": [], + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Query start time : 2024-10-14 06:23:07.242919\n" + ] + }, + { + "data": { + "text/html": [ + "
| \n", + " | TRADER_ID | \n", + "trade_time_window | \n", + "net_volume | \n", + "order_count | \n", + "total_trade_volume | \n", + "order_trade_ratio | \n", + "volume_percentage | \n", + "Segment | \n", + "SAR_FLAG | \n", + "Risk | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "3097728207 | \n", + "2024-01-01 00:03:00 | \n", + "-92.0 | \n", + "1 | \n", + "92 | \n", + "-1.0 | \n", + "0.0 | \n", + "Default | \n", + "N | \n", + "Low Risk | \n", + "
| 1 | \n", + "3228645322 | \n", + "2024-01-01 00:06:00 | \n", + "-689.0 | \n", + "1 | \n", + "689 | \n", + "-1.0 | \n", + "0.0 | \n", + "Default | \n", + "N | \n", + "Low Risk | \n", + "
| 2 | \n", + "2701872727 | \n", + "2024-01-01 00:09:00 | \n", + "-42.0 | \n", + "1 | \n", + "42 | \n", + "-1.0 | \n", + "0.0 | \n", + "Default | \n", + "N | \n", + "Low Risk | \n", + "
| 3 | \n", + "1659056655 | \n", + "2024-01-01 00:11:00 | \n", + "-167.0 | \n", + "1 | \n", + "167 | \n", + "-1.0 | \n", + "0.0 | \n", + "Default | \n", + "N | \n", + "Low Risk | \n", + "
| 4 | \n", + "1661288887 | \n", + "2024-01-01 00:13:00 | \n", + "-756.0 | \n", + "1 | \n", + "756 | \n", + "-1.0 | \n", + "0.0 | \n", + "Default | \n", + "N | \n", + "Low Risk | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 995 | \n", + "3382197985 | \n", + "2024-01-01 02:30:00 | \n", + "-159.0 | \n", + "1 | \n", + "159 | \n", + "-1.0 | \n", + "0.0 | \n", + "Default | \n", + "N | \n", + "Low Risk | \n", + "
| 996 | \n", + "1129008990 | \n", + "2024-01-01 02:30:00 | \n", + "-582.0 | \n", + "1 | \n", + "582 | \n", + "-1.0 | \n", + "0.0 | \n", + "Default | \n", + "N | \n", + "Low Risk | \n", + "
| 997 | \n", + "2944122893 | \n", + "2024-01-01 02:30:00 | \n", + "-65.0 | \n", + "1 | \n", + "65 | \n", + "-1.0 | \n", + "0.0 | \n", + "Default | \n", + "N | \n", + "Low Risk | \n", + "
| 998 | \n", + "2910876405 | \n", + "2024-01-01 02:30:00 | \n", + "-117.0 | \n", + "1 | \n", + "117 | \n", + "-1.0 | \n", + "0.0 | \n", + "Default | \n", + "N | \n", + "Low Risk | \n", + "
| 999 | \n", + "1816942226 | \n", + "2024-01-01 02:30:00 | \n", + "-732.0 | \n", + "1 | \n", + "732 | \n", + "-1.0 | \n", + "0.0 | \n", + "Default | \n", + "N | \n", + "Low Risk | \n", + "
1000 rows × 10 columns
\n", + "