{ "cells": [ { "cell_type": "code", "execution_count": 16, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] }, "outputs": [], "source": [ "from datetime import datetime, timedelta\n", "import pandas as pd\n", "from tms_data_interface import SQLQueryInterface\n", "\n", "query = \"\"\"\n", "SELECT \n", " n.TRADER_ID,\n", " n.trade_time_window,\n", " n.net_volume,\n", " n.order_count, -- Include number of orders\n", " COALESCE(t.total_trade_volume, 0) AS total_trade_volume,\n", " CASE \n", " WHEN COALESCE(t.total_trade_volume, 0) > 0 THEN n.net_volume / t.total_trade_volume\n", " ELSE 0 -- or another value to indicate no trades\n", " END AS order_trade_ratio,\n", " CASE \n", " WHEN net_volume_all.total_net_volume_all > 0 THEN \n", " (n.net_volume / net_volume_all.total_net_volume_all) * 100 \n", " ELSE 0 \n", " END AS volume_percentage -- Calculate volume percentage\n", "FROM (\n", " -- Step 2: Subquery for net_order_volume\n", " SELECT \n", " o.TRADER_ID,\n", " t.DATE_TIME AS trade_time_window,\n", " SUM(CASE \n", " WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME\n", " WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME\n", " WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME\n", " ELSE 0 END\n", " ) AS net_volume,\n", " COUNT(o.ORDER_ID) AS order_count -- Count the number of orders\n", " FROM {order_10m} o\n", " JOIN {trade_data_1b} t\n", " ON o.TRADER_ID = t.TRADER_ID\n", " WHERE o.SIDE = 'buy'\n", " AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME\n", " GROUP BY o.TRADER_ID, t.DATE_TIME\n", ") AS n\n", "LEFT JOIN (\n", " -- Step 6: Subquery for total_trade_volume (opposite side trades after spoofing)\n", " SELECT \n", " t.TRADER_ID,\n", " t.DATE_TIME,\n", " SUM(t.TRADE_VOLUME) AS total_trade_volume\n", " FROM (\n", " -- Step 5: Subquery for relevant_trades\n", " SELECT t1.*\n", " FROM {trade_data_1b} t1\n", " WHERE t1.TRADE_SIDE = 'buy'\n", " AND EXISTS (\n", " SELECT 1\n", " FROM {trade_data_1b} t2\n", " WHERE t2.TRADER_ID = t1.TRADER_ID\n", " AND t2.DATE_TIME BETWEEN t1.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t1.DATE_TIME\n", " )\n", " ) AS t\n", " GROUP BY t.DATE_TIME, t.TRADER_ID\n", ") AS t \n", "ON n.TRADER_ID = t.TRADER_ID AND n.trade_time_window = t.DATE_TIME\n", "\n", "-- New subquery for total net volume for all traders in the same time window\n", "LEFT JOIN (\n", " SELECT \n", " t.DATE_TIME AS trade_time_window,\n", " SUM(CASE \n", " WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME\n", " WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME\n", " WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME\n", " ELSE 0 END\n", " ) AS total_net_volume_all\n", " FROM {order_10m} o\n", " JOIN {trade_data_1b} t\n", " ON o.TRADER_ID = t.TRADER_ID\n", " WHERE o.SIDE = 'buy'\n", " AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME\n", " GROUP BY t.DATE_TIME\n", ") AS net_volume_all\n", "ON n.trade_time_window = net_volume_all.trade_time_window\n", "\n", "ORDER BY n.trade_time_window\n", "limit 1000\n", "\"\"\"\n", "\n", "\n", "from tms_data_interface import SQLQueryInterface\n", "\n", "class Scenario:\n", " seq = SQLQueryInterface(schema=\"trade_schema\")\n", " def logic(self, **kwargs):\n", " validation_window = kwargs.get('validation_window')\n", " spoofing_side = kwargs.get('buy')\n", " time_window_s = int(validation_window/1000)\n", " query_start_time = datetime.now()\n", " print(\"Query start time :\",query_start_time)\n", " row_list = self.seq.execute_raw(query.format(trade_data_1b=\"trade_10m_v3\",\n", " order_10m = 'order_10m',\n", " time_window_s = time_window_s)\n", " )\n", " cols = [\n", " 'focal_ID',\n", " 'trade_time_window',\n", " 'net_volume',\n", " 'order_count',\n", " 'total_trade_volume',\n", " 'order_trade_ratio',\n", " 'volume_percentage'\n", " ]\n", " final_scenario_df = pd.DataFrame(row_list, columns = cols)\n", " final_scenario_df['Segment'] = 'Default'\n", " final_scenario_df['SAR_FLAG'] = 'N'\n", " final_scenario_df['Risk'] = 'Low Risk'\n", " final_scenario_df.dropna(inplace=True)\n", " # final_scenario_df['RUN_DATE'] = final_scenario_df['END_DATE']\n", " return final_scenario_df\n" ] }, { "cell_type": "code", "execution_count": 17, "id": "b5c4307f-bc35-47e2-b680-fd1df2168d6c", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Query start time : 2024-10-14 06:23:07.242919\n" ] }, { "data": { "text/html": [ "
| \n", " | TRADER_ID | \n", "trade_time_window | \n", "net_volume | \n", "order_count | \n", "total_trade_volume | \n", "order_trade_ratio | \n", "volume_percentage | \n", "Segment | \n", "SAR_FLAG | \n", "Risk | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "3097728207 | \n", "2024-01-01 00:03:00 | \n", "-92.0 | \n", "1 | \n", "92 | \n", "-1.0 | \n", "0.0 | \n", "Default | \n", "N | \n", "Low Risk | \n", "
| 1 | \n", "3228645322 | \n", "2024-01-01 00:06:00 | \n", "-689.0 | \n", "1 | \n", "689 | \n", "-1.0 | \n", "0.0 | \n", "Default | \n", "N | \n", "Low Risk | \n", "
| 2 | \n", "2701872727 | \n", "2024-01-01 00:09:00 | \n", "-42.0 | \n", "1 | \n", "42 | \n", "-1.0 | \n", "0.0 | \n", "Default | \n", "N | \n", "Low Risk | \n", "
| 3 | \n", "1659056655 | \n", "2024-01-01 00:11:00 | \n", "-167.0 | \n", "1 | \n", "167 | \n", "-1.0 | \n", "0.0 | \n", "Default | \n", "N | \n", "Low Risk | \n", "
| 4 | \n", "1661288887 | \n", "2024-01-01 00:13:00 | \n", "-756.0 | \n", "1 | \n", "756 | \n", "-1.0 | \n", "0.0 | \n", "Default | \n", "N | \n", "Low Risk | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 995 | \n", "3382197985 | \n", "2024-01-01 02:30:00 | \n", "-159.0 | \n", "1 | \n", "159 | \n", "-1.0 | \n", "0.0 | \n", "Default | \n", "N | \n", "Low Risk | \n", "
| 996 | \n", "1129008990 | \n", "2024-01-01 02:30:00 | \n", "-582.0 | \n", "1 | \n", "582 | \n", "-1.0 | \n", "0.0 | \n", "Default | \n", "N | \n", "Low Risk | \n", "
| 997 | \n", "2944122893 | \n", "2024-01-01 02:30:00 | \n", "-65.0 | \n", "1 | \n", "65 | \n", "-1.0 | \n", "0.0 | \n", "Default | \n", "N | \n", "Low Risk | \n", "
| 998 | \n", "2910876405 | \n", "2024-01-01 02:30:00 | \n", "-117.0 | \n", "1 | \n", "117 | \n", "-1.0 | \n", "0.0 | \n", "Default | \n", "N | \n", "Low Risk | \n", "
| 999 | \n", "1816942226 | \n", "2024-01-01 02:30:00 | \n", "-732.0 | \n", "1 | \n", "732 | \n", "-1.0 | \n", "0.0 | \n", "Default | \n", "N | \n", "Low Risk | \n", "
1000 rows × 10 columns
\n", "