61/main.ipynb
2024-10-14 07:37:45 +00:00

409 lines
14 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 16,
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from datetime import datetime, timedelta\n",
"import pandas as pd\n",
"from tms_data_interface import SQLQueryInterface\n",
"\n",
"query = \"\"\"\n",
"SELECT \n",
" n.TRADER_ID,\n",
" n.trade_time_window,\n",
" n.net_volume,\n",
" n.order_count, -- Include number of orders\n",
" COALESCE(t.total_trade_volume, 0) AS total_trade_volume,\n",
" CASE \n",
" WHEN COALESCE(t.total_trade_volume, 0) > 0 THEN n.net_volume / t.total_trade_volume\n",
" ELSE 0 -- or another value to indicate no trades\n",
" END AS order_trade_ratio,\n",
" CASE \n",
" WHEN net_volume_all.total_net_volume_all > 0 THEN \n",
" (n.net_volume / net_volume_all.total_net_volume_all) * 100 \n",
" ELSE 0 \n",
" END AS volume_percentage -- Calculate volume percentage\n",
"FROM (\n",
" -- Step 2: Subquery for net_order_volume\n",
" SELECT \n",
" o.TRADER_ID,\n",
" t.DATE_TIME AS trade_time_window,\n",
" SUM(CASE \n",
" WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME\n",
" WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME\n",
" WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME\n",
" ELSE 0 END\n",
" ) AS net_volume,\n",
" COUNT(o.ORDER_ID) AS order_count -- Count the number of orders\n",
" FROM {order_10m} o\n",
" JOIN {trade_data_1b} t\n",
" ON o.TRADER_ID = t.TRADER_ID\n",
" WHERE o.SIDE = 'buy'\n",
" AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME\n",
" GROUP BY o.TRADER_ID, t.DATE_TIME\n",
") AS n\n",
"LEFT JOIN (\n",
" -- Step 6: Subquery for total_trade_volume (opposite side trades after spoofing)\n",
" SELECT \n",
" t.TRADER_ID,\n",
" t.DATE_TIME,\n",
" SUM(t.TRADE_VOLUME) AS total_trade_volume\n",
" FROM (\n",
" -- Step 5: Subquery for relevant_trades\n",
" SELECT t1.*\n",
" FROM {trade_data_1b} t1\n",
" WHERE t1.TRADE_SIDE = 'buy'\n",
" AND EXISTS (\n",
" SELECT 1\n",
" FROM {trade_data_1b} t2\n",
" WHERE t2.TRADER_ID = t1.TRADER_ID\n",
" AND t2.DATE_TIME BETWEEN t1.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t1.DATE_TIME\n",
" )\n",
" ) AS t\n",
" GROUP BY t.DATE_TIME, t.TRADER_ID\n",
") AS t \n",
"ON n.TRADER_ID = t.TRADER_ID AND n.trade_time_window = t.DATE_TIME\n",
"\n",
"-- New subquery for total net volume for all traders in the same time window\n",
"LEFT JOIN (\n",
" SELECT \n",
" t.DATE_TIME AS trade_time_window,\n",
" SUM(CASE \n",
" WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME\n",
" WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME\n",
" WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME\n",
" ELSE 0 END\n",
" ) AS total_net_volume_all\n",
" FROM {order_10m} o\n",
" JOIN {trade_data_1b} t\n",
" ON o.TRADER_ID = t.TRADER_ID\n",
" WHERE o.SIDE = 'buy'\n",
" AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME\n",
" GROUP BY t.DATE_TIME\n",
") AS net_volume_all\n",
"ON n.trade_time_window = net_volume_all.trade_time_window\n",
"\n",
"ORDER BY n.trade_time_window\n",
"\"\"\"\n",
"\n",
"\n",
"from tms_data_interface import SQLQueryInterface\n",
"\n",
"class Scenario:\n",
" seq = SQLQueryInterface(schema=\"trade_schema\")\n",
" def logic(self, **kwargs):\n",
" validation_window = kwargs.get('validation_window')\n",
" spoofing_side = kwargs.get('buy')\n",
" time_window_s = int(validation_window/1000)\n",
" query_start_time = datetime.now()\n",
" print(\"Query start time :\",query_start_time)\n",
" row_list = self.seq.execute_raw(query.format(trade_data_1b=\"trade_10m_v3\",\n",
" order_10m = 'order_10m',\n",
" time_window_s = time_window_s)\n",
" )\n",
" cols = [\n",
" 'focal_ID',\n",
" 'trade_time_window',\n",
" 'net_volume',\n",
" 'order_count',\n",
" 'total_trade_volume',\n",
" 'order_trade_ratio',\n",
" 'volume_percentage'\n",
" ]\n",
" final_scenario_df = pd.DataFrame(row_list, columns = cols)\n",
" final_scenario_df['Segment'] = 'Default'\n",
" final_scenario_df['SAR_FLAG'] = 'N'\n",
" final_scenario_df['Risk'] = 'Low Risk'\n",
" final_scenario_df.dropna(inplace=True)\n",
" # final_scenario_df['RUN_DATE'] = final_scenario_df['END_DATE']\n",
" return final_scenario_df\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "b5c4307f-bc35-47e2-b680-fd1df2168d6c",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Query start time : 2024-10-14 06:23:07.242919\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>TRADER_ID</th>\n",
" <th>trade_time_window</th>\n",
" <th>net_volume</th>\n",
" <th>order_count</th>\n",
" <th>total_trade_volume</th>\n",
" <th>order_trade_ratio</th>\n",
" <th>volume_percentage</th>\n",
" <th>Segment</th>\n",
" <th>SAR_FLAG</th>\n",
" <th>Risk</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3097728207</td>\n",
" <td>2024-01-01 00:03:00</td>\n",
" <td>-92.0</td>\n",
" <td>1</td>\n",
" <td>92</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3228645322</td>\n",
" <td>2024-01-01 00:06:00</td>\n",
" <td>-689.0</td>\n",
" <td>1</td>\n",
" <td>689</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2701872727</td>\n",
" <td>2024-01-01 00:09:00</td>\n",
" <td>-42.0</td>\n",
" <td>1</td>\n",
" <td>42</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1659056655</td>\n",
" <td>2024-01-01 00:11:00</td>\n",
" <td>-167.0</td>\n",
" <td>1</td>\n",
" <td>167</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1661288887</td>\n",
" <td>2024-01-01 00:13:00</td>\n",
" <td>-756.0</td>\n",
" <td>1</td>\n",
" <td>756</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>995</th>\n",
" <td>3382197985</td>\n",
" <td>2024-01-01 02:30:00</td>\n",
" <td>-159.0</td>\n",
" <td>1</td>\n",
" <td>159</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>996</th>\n",
" <td>1129008990</td>\n",
" <td>2024-01-01 02:30:00</td>\n",
" <td>-582.0</td>\n",
" <td>1</td>\n",
" <td>582</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>997</th>\n",
" <td>2944122893</td>\n",
" <td>2024-01-01 02:30:00</td>\n",
" <td>-65.0</td>\n",
" <td>1</td>\n",
" <td>65</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>998</th>\n",
" <td>2910876405</td>\n",
" <td>2024-01-01 02:30:00</td>\n",
" <td>-117.0</td>\n",
" <td>1</td>\n",
" <td>117</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999</th>\n",
" <td>1816942226</td>\n",
" <td>2024-01-01 02:30:00</td>\n",
" <td>-732.0</td>\n",
" <td>1</td>\n",
" <td>732</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1000 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" TRADER_ID trade_time_window net_volume order_count \\\n",
"0 3097728207 2024-01-01 00:03:00 -92.0 1 \n",
"1 3228645322 2024-01-01 00:06:00 -689.0 1 \n",
"2 2701872727 2024-01-01 00:09:00 -42.0 1 \n",
"3 1659056655 2024-01-01 00:11:00 -167.0 1 \n",
"4 1661288887 2024-01-01 00:13:00 -756.0 1 \n",
".. ... ... ... ... \n",
"995 3382197985 2024-01-01 02:30:00 -159.0 1 \n",
"996 1129008990 2024-01-01 02:30:00 -582.0 1 \n",
"997 2944122893 2024-01-01 02:30:00 -65.0 1 \n",
"998 2910876405 2024-01-01 02:30:00 -117.0 1 \n",
"999 1816942226 2024-01-01 02:30:00 -732.0 1 \n",
"\n",
" total_trade_volume order_trade_ratio volume_percentage Segment \\\n",
"0 92 -1.0 0.0 Default \n",
"1 689 -1.0 0.0 Default \n",
"2 42 -1.0 0.0 Default \n",
"3 167 -1.0 0.0 Default \n",
"4 756 -1.0 0.0 Default \n",
".. ... ... ... ... \n",
"995 159 -1.0 0.0 Default \n",
"996 582 -1.0 0.0 Default \n",
"997 65 -1.0 0.0 Default \n",
"998 117 -1.0 0.0 Default \n",
"999 732 -1.0 0.0 Default \n",
"\n",
" SAR_FLAG Risk \n",
"0 N Low Risk \n",
"1 N Low Risk \n",
"2 N Low Risk \n",
"3 N Low Risk \n",
"4 N Low Risk \n",
".. ... ... \n",
"995 N Low Risk \n",
"996 N Low Risk \n",
"997 N Low Risk \n",
"998 N Low Risk \n",
"999 N Low Risk \n",
"\n",
"[1000 rows x 10 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# scenario = Scenario()\n",
"# scenario.logic(validation_window=300000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36b1b24a-aeca-4d22-a2b3-6e04aca31695",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}