61/main.ipynb
2024-10-15 07:39:06 +00:00

438 lines
15 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 21,
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"from tms_data_interface import SQLQueryInterface\n",
"\n",
"query = \"\"\"\n",
"WITH \n",
"-- Capture all orders and trades within the spoofing time window\n",
"trade_window AS (\n",
" SELECT\n",
" t.trade_id,\n",
" t.trader_id,\n",
" t.date_time AS trade_time,\n",
" t.trade_side,\n",
" t.trade_volume,\n",
" o.trader_id AS order_trader_id,\n",
" o.date_time AS order_time,\n",
" o.order_volume,\n",
" o.order_status,\n",
" o.order_price,\n",
" o.side AS order_side\n",
" FROM \n",
" {trade_data_1b} t\n",
" LEFT JOIN \n",
" order_10m o ON o.date_time BETWEEN t.date_time - INTERVAL '{spoofing_time_window_s}' SECOND \n",
" AND t.date_time\n",
" WHERE \n",
" o.side = '{spoofing_side}'\n",
"),\n",
"\n",
"-- Calculate net order volume for the specific trader\n",
"net_order_volume_cte AS (\n",
" SELECT \n",
" trader_id,\n",
" trade_id,\n",
" trade_time,\n",
" SUM(CASE \n",
" WHEN order_status = 'new' THEN order_volume \n",
" WHEN order_status = 'cancelled' THEN -order_volume \n",
" WHEN order_status = 'fulfilled' THEN -order_volume \n",
" ELSE 0 \n",
" END) AS net_order_volume,\n",
" COUNT(*) AS num_orders\n",
" FROM trade_window\n",
" WHERE order_trader_id = trader_id -- Filter by the trader who executed the trade\n",
" GROUP BY trader_id, trade_id, trade_time\n",
"),\n",
"\n",
"-- Calculate total net order volume for all traders (i.e., for spoofing side orders)\n",
"net_order_volume_all_cte AS (\n",
" SELECT \n",
" trade_id,\n",
" SUM(CASE \n",
" WHEN order_status = 'new' THEN order_volume \n",
" WHEN order_status = 'cancelled' THEN -order_volume \n",
" WHEN order_status = 'fulfilled' THEN -order_volume \n",
" ELSE 0 \n",
" END) AS net_order_volume_all\n",
" FROM trade_window\n",
" GROUP BY trade_id\n",
"),\n",
"\n",
"-- Calculate total trade volume on the opposite side (e.g., sell if spoofing is on buy)\n",
"opposite_trade_volume_cte AS (\n",
" SELECT \n",
" t.trader_id,\n",
" t.trade_id,\n",
" SUM(t.trade_volume) AS total_trade_volume\n",
" FROM {trade_data_1b} t\n",
" WHERE \n",
" t.date_time BETWEEN t.date_time - INTERVAL '{trade_time_window_s}' SECOND\n",
" AND t.date_time\n",
" AND t.trade_side = CASE WHEN '{spoofing_side}' = 'buy' THEN 'sell' ELSE 'buy' END\n",
" GROUP BY t.trader_id, t.trade_id\n",
")\n",
"\n",
"-- Final result with calculated spoofing indicators\n",
"SELECT\n",
" n.trade_id,\n",
" n.trader_id,\n",
" n.trade_time,\n",
" n.num_orders,\n",
" n.net_order_volume,\n",
" CASE \n",
" WHEN o.total_trade_volume > 0 THEN n.net_order_volume / o.total_trade_volume\n",
" ELSE NULL\n",
" END AS order_trade_ratio,\n",
" CASE \n",
" WHEN a.net_order_volume_all > 0 THEN n.net_order_volume / a.net_order_volume_all\n",
" ELSE NULL\n",
" END AS volume_percentage\n",
"FROM \n",
" net_order_volume_cte n\n",
"LEFT JOIN \n",
" opposite_trade_volume_cte o ON n.trade_id = o.trade_id\n",
"LEFT JOIN \n",
" net_order_volume_all_cte a ON n.trade_id = a.trade_id\n",
"WHERE \n",
" n.net_order_volume > 0 -- Only consider positive net order volumes (potential spoofing);\n",
" limit 1000\n",
"\"\"\"\n",
"\n",
"class Scenario:\n",
" seq = SQLQueryInterface(schema=\"internal\")\n",
"\n",
" def logic(self, **params):\n",
" spoofing_time_window = params.get('spoofing_time_window', 300000) # default to 300,000 ms (5 minutes)\n",
" spoofing_side = params.get('spoofing_side', 'buy')\n",
" use_volume_for_order_trade_ratio = params.get('use_volume_for_order_trade_ratio', True)\n",
" trade_time_window = params.get('trade_time_window', 300000)\n",
" ignore_trade_after_spoofing = params.get('ignore_trade_after_spoofing', True)\n",
" ignore_price_improvement = params.get('ignore_price_improvement', True)\n",
"\n",
" # Convert time windows from milliseconds to seconds\n",
" spoofing_time_window_s = int(spoofing_time_window / 1000)\n",
" trade_time_window_s = int(trade_time_window / 1000)\n",
"\n",
" query_start_time = datetime.now()\n",
" print(\"Query start time:\", query_start_time)\n",
"\n",
" # Execute the query with the parameters passed from `params`\n",
" row_list = self.seq.execute_raw(query.format(\n",
" trade_data_1b=\"trade_10m_v3\", # Replace with actual table name\n",
" spoofing_time_window_s=spoofing_time_window_s,\n",
" trade_time_window_s=trade_time_window_s,\n",
" spoofing_side=spoofing_side\n",
" ))\n",
"\n",
" # Define columns for the resulting DataFrame\n",
" cols = [\n",
" 'trade_id', 'focal_id', 'trade_time', 'num_orders', \n",
" 'net_order_volume', 'order_trade_ratio', 'volume_percentage'\n",
" ]\n",
"\n",
" # Create a DataFrame from the query result\n",
" final_scenario_df = pd.DataFrame(row_list, columns=cols)\n",
"\n",
"\n",
" # Adding additional columns\n",
" final_scenario_df['segment'] = 'Default'\n",
" final_scenario_df['sar_flag'] = 'N'\n",
" final_scenario_df['risk'] = 'Low Risk'\n",
"\n",
" return final_scenario_df"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "b5c4307f-bc35-47e2-b680-fd1df2168d6c",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Query start time : 2024-10-14 07:40:43.846637\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>focal_ID</th>\n",
" <th>trade_time_window</th>\n",
" <th>net_volume</th>\n",
" <th>order_count</th>\n",
" <th>total_trade_volume</th>\n",
" <th>order_trade_ratio</th>\n",
" <th>volume_percentage</th>\n",
" <th>Segment</th>\n",
" <th>SAR_FLAG</th>\n",
" <th>Risk</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3097728207</td>\n",
" <td>2024-01-01 00:03:00</td>\n",
" <td>-92.0</td>\n",
" <td>1</td>\n",
" <td>92</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3228645322</td>\n",
" <td>2024-01-01 00:06:00</td>\n",
" <td>-689.0</td>\n",
" <td>1</td>\n",
" <td>689</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2701872727</td>\n",
" <td>2024-01-01 00:09:00</td>\n",
" <td>-42.0</td>\n",
" <td>1</td>\n",
" <td>42</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1659056655</td>\n",
" <td>2024-01-01 00:11:00</td>\n",
" <td>-167.0</td>\n",
" <td>1</td>\n",
" <td>167</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1661288887</td>\n",
" <td>2024-01-01 00:13:00</td>\n",
" <td>-756.0</td>\n",
" <td>1</td>\n",
" <td>756</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95</th>\n",
" <td>1945772682</td>\n",
" <td>2024-01-01 00:43:00</td>\n",
" <td>-854.0</td>\n",
" <td>1</td>\n",
" <td>854</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>2137478041</td>\n",
" <td>2024-01-01 00:43:00</td>\n",
" <td>-926.0</td>\n",
" <td>1</td>\n",
" <td>926</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>97</th>\n",
" <td>7138329164</td>\n",
" <td>2024-01-01 00:43:00</td>\n",
" <td>-433.0</td>\n",
" <td>1</td>\n",
" <td>433</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>1867007441</td>\n",
" <td>2024-01-01 00:43:00</td>\n",
" <td>-626.0</td>\n",
" <td>1</td>\n",
" <td>626</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>2347906349</td>\n",
" <td>2024-01-01 00:43:00</td>\n",
" <td>-69.0</td>\n",
" <td>1</td>\n",
" <td>69</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" focal_ID trade_time_window net_volume order_count \\\n",
"0 3097728207 2024-01-01 00:03:00 -92.0 1 \n",
"1 3228645322 2024-01-01 00:06:00 -689.0 1 \n",
"2 2701872727 2024-01-01 00:09:00 -42.0 1 \n",
"3 1659056655 2024-01-01 00:11:00 -167.0 1 \n",
"4 1661288887 2024-01-01 00:13:00 -756.0 1 \n",
".. ... ... ... ... \n",
"95 1945772682 2024-01-01 00:43:00 -854.0 1 \n",
"96 2137478041 2024-01-01 00:43:00 -926.0 1 \n",
"97 7138329164 2024-01-01 00:43:00 -433.0 1 \n",
"98 1867007441 2024-01-01 00:43:00 -626.0 1 \n",
"99 2347906349 2024-01-01 00:43:00 -69.0 1 \n",
"\n",
" total_trade_volume order_trade_ratio volume_percentage Segment \\\n",
"0 92 -1.0 0.0 Default \n",
"1 689 -1.0 0.0 Default \n",
"2 42 -1.0 0.0 Default \n",
"3 167 -1.0 0.0 Default \n",
"4 756 -1.0 0.0 Default \n",
".. ... ... ... ... \n",
"95 854 -1.0 0.0 Default \n",
"96 926 -1.0 0.0 Default \n",
"97 433 -1.0 0.0 Default \n",
"98 626 -1.0 0.0 Default \n",
"99 69 -1.0 0.0 Default \n",
"\n",
" SAR_FLAG Risk \n",
"0 N Low Risk \n",
"1 N Low Risk \n",
"2 N Low Risk \n",
"3 N Low Risk \n",
"4 N Low Risk \n",
".. ... ... \n",
"95 N Low Risk \n",
"96 N Low Risk \n",
"97 N Low Risk \n",
"98 N Low Risk \n",
"99 N Low Risk \n",
"\n",
"[100 rows x 10 columns]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scenario = Scenario()\n",
"scenario.logic(validation_window=300000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36b1b24a-aeca-4d22-a2b3-6e04aca31695",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}