61/.ipynb_checkpoints/main-checkpoint.ipynb
2024-10-14 07:49:12 +00:00

409 lines
14 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 21,
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from datetime import datetime, timedelta\n",
"import pandas as pd\n",
"from tms_data_interface import SQLQueryInterface\n",
"\n",
"query = \"\"\"\n",
"SELECT \n",
" n.TRADER_ID,\n",
" n.trade_time_window,\n",
" n.net_volume,\n",
" n.order_count, -- Include number of orders\n",
" COALESCE(t.total_trade_volume, 0) AS total_trade_volume,\n",
" CASE \n",
" WHEN COALESCE(t.total_trade_volume, 0) > 0 THEN n.net_volume / t.total_trade_volume\n",
" ELSE 0 -- or another value to indicate no trades\n",
" END AS order_trade_ratio,\n",
" CASE \n",
" WHEN net_volume_all.total_net_volume_all > 0 THEN \n",
" (n.net_volume / net_volume_all.total_net_volume_all) * 100 \n",
" ELSE 0 \n",
" END AS volume_percentage -- Calculate volume percentage\n",
"FROM (\n",
" -- Step 2: Subquery for net_order_volume\n",
" SELECT \n",
" o.TRADER_ID,\n",
" t.DATE_TIME AS trade_time_window,\n",
" SUM(CASE \n",
" WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME\n",
" WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME\n",
" WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME\n",
" ELSE 0 END\n",
" ) AS net_volume,\n",
" COUNT(o.ORDER_ID) AS order_count -- Count the number of orders\n",
" FROM {order_10m} o\n",
" JOIN {trade_data_1b} t\n",
" ON o.TRADER_ID = t.TRADER_ID\n",
" WHERE o.SIDE = 'buy'\n",
" AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME\n",
" GROUP BY o.TRADER_ID, t.DATE_TIME\n",
") AS n\n",
"LEFT JOIN (\n",
" -- Step 6: Subquery for total_trade_volume (opposite side trades after spoofing)\n",
" SELECT \n",
" t.TRADER_ID,\n",
" t.DATE_TIME,\n",
" SUM(t.TRADE_VOLUME) AS total_trade_volume\n",
" FROM (\n",
" -- Step 5: Subquery for relevant_trades\n",
" SELECT t1.*\n",
" FROM {trade_data_1b} t1\n",
" WHERE t1.TRADE_SIDE = 'buy'\n",
" AND EXISTS (\n",
" SELECT 1\n",
" FROM {trade_data_1b} t2\n",
" WHERE t2.TRADER_ID = t1.TRADER_ID\n",
" AND t2.DATE_TIME BETWEEN t1.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t1.DATE_TIME\n",
" )\n",
" ) AS t\n",
" GROUP BY t.DATE_TIME, t.TRADER_ID\n",
") AS t \n",
"ON n.TRADER_ID = t.TRADER_ID AND n.trade_time_window = t.DATE_TIME\n",
"\n",
"-- New subquery for total net volume for all traders in the same time window\n",
"LEFT JOIN (\n",
" SELECT \n",
" t.DATE_TIME AS trade_time_window,\n",
" SUM(CASE \n",
" WHEN o.ORDER_STATUS = 'New' THEN o.ORDER_VOLUME\n",
" WHEN o.ORDER_STATUS = 'Cancelled' THEN -o.ORDER_VOLUME\n",
" WHEN o.ORDER_STATUS = 'Fulfilled' THEN -o.ORDER_VOLUME\n",
" ELSE 0 END\n",
" ) AS total_net_volume_all\n",
" FROM {order_10m} o\n",
" JOIN {trade_data_1b} t\n",
" ON o.TRADER_ID = t.TRADER_ID\n",
" WHERE o.SIDE = 'buy'\n",
" AND o.DATE_TIME BETWEEN t.DATE_TIME - INTERVAL '{time_window_s}' SECOND AND t.DATE_TIME\n",
" GROUP BY t.DATE_TIME\n",
") AS net_volume_all\n",
"ON n.trade_time_window = net_volume_all.trade_time_window\n",
"\n",
"ORDER BY n.trade_time_window\n",
"\"\"\"\n",
"\n",
"\n",
"from tms_data_interface import SQLQueryInterface\n",
"\n",
"class Scenario:\n",
" seq = SQLQueryInterface(schema=\"trade_schema\")\n",
" def logic(self, **kwargs):\n",
" validation_window = kwargs.get('validation_window')\n",
" spoofing_side = kwargs.get('buy')\n",
" time_window_s = int(validation_window/1000)\n",
" query_start_time = datetime.now()\n",
" print(\"Query start time :\",query_start_time)\n",
" row_list = self.seq.execute_raw(query.format(trade_data_1b=\"trade_10m_v3\",\n",
" order_10m = 'order_10m',\n",
" time_window_s = time_window_s)\n",
" )\n",
" cols = [\n",
" 'focal_ID',\n",
" 'trade_time_window',\n",
" 'net_volume',\n",
" 'order_count',\n",
" 'total_trade_volume',\n",
" 'order_trade_ratio',\n",
" 'volume_percentage'\n",
" ]\n",
" final_scenario_df = pd.DataFrame(row_list, columns = cols)\n",
" final_scenario_df['Segment'] = 'Default'\n",
" final_scenario_df['SAR_FLAG'] = 'N'\n",
" final_scenario_df['Risk'] = 'Low Risk'\n",
" final_scenario_df.dropna(inplace=True)\n",
" # final_scenario_df['RUN_DATE'] = final_scenario_df['END_DATE']\n",
" return final_scenario_df\n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "b5c4307f-bc35-47e2-b680-fd1df2168d6c",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Query start time : 2024-10-14 07:40:43.846637\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>focal_ID</th>\n",
" <th>trade_time_window</th>\n",
" <th>net_volume</th>\n",
" <th>order_count</th>\n",
" <th>total_trade_volume</th>\n",
" <th>order_trade_ratio</th>\n",
" <th>volume_percentage</th>\n",
" <th>Segment</th>\n",
" <th>SAR_FLAG</th>\n",
" <th>Risk</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3097728207</td>\n",
" <td>2024-01-01 00:03:00</td>\n",
" <td>-92.0</td>\n",
" <td>1</td>\n",
" <td>92</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3228645322</td>\n",
" <td>2024-01-01 00:06:00</td>\n",
" <td>-689.0</td>\n",
" <td>1</td>\n",
" <td>689</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2701872727</td>\n",
" <td>2024-01-01 00:09:00</td>\n",
" <td>-42.0</td>\n",
" <td>1</td>\n",
" <td>42</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1659056655</td>\n",
" <td>2024-01-01 00:11:00</td>\n",
" <td>-167.0</td>\n",
" <td>1</td>\n",
" <td>167</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1661288887</td>\n",
" <td>2024-01-01 00:13:00</td>\n",
" <td>-756.0</td>\n",
" <td>1</td>\n",
" <td>756</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>95</th>\n",
" <td>1945772682</td>\n",
" <td>2024-01-01 00:43:00</td>\n",
" <td>-854.0</td>\n",
" <td>1</td>\n",
" <td>854</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>96</th>\n",
" <td>2137478041</td>\n",
" <td>2024-01-01 00:43:00</td>\n",
" <td>-926.0</td>\n",
" <td>1</td>\n",
" <td>926</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>97</th>\n",
" <td>7138329164</td>\n",
" <td>2024-01-01 00:43:00</td>\n",
" <td>-433.0</td>\n",
" <td>1</td>\n",
" <td>433</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>1867007441</td>\n",
" <td>2024-01-01 00:43:00</td>\n",
" <td>-626.0</td>\n",
" <td>1</td>\n",
" <td>626</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" <tr>\n",
" <th>99</th>\n",
" <td>2347906349</td>\n",
" <td>2024-01-01 00:43:00</td>\n",
" <td>-69.0</td>\n",
" <td>1</td>\n",
" <td>69</td>\n",
" <td>-1.0</td>\n",
" <td>0.0</td>\n",
" <td>Default</td>\n",
" <td>N</td>\n",
" <td>Low Risk</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" focal_ID trade_time_window net_volume order_count \\\n",
"0 3097728207 2024-01-01 00:03:00 -92.0 1 \n",
"1 3228645322 2024-01-01 00:06:00 -689.0 1 \n",
"2 2701872727 2024-01-01 00:09:00 -42.0 1 \n",
"3 1659056655 2024-01-01 00:11:00 -167.0 1 \n",
"4 1661288887 2024-01-01 00:13:00 -756.0 1 \n",
".. ... ... ... ... \n",
"95 1945772682 2024-01-01 00:43:00 -854.0 1 \n",
"96 2137478041 2024-01-01 00:43:00 -926.0 1 \n",
"97 7138329164 2024-01-01 00:43:00 -433.0 1 \n",
"98 1867007441 2024-01-01 00:43:00 -626.0 1 \n",
"99 2347906349 2024-01-01 00:43:00 -69.0 1 \n",
"\n",
" total_trade_volume order_trade_ratio volume_percentage Segment \\\n",
"0 92 -1.0 0.0 Default \n",
"1 689 -1.0 0.0 Default \n",
"2 42 -1.0 0.0 Default \n",
"3 167 -1.0 0.0 Default \n",
"4 756 -1.0 0.0 Default \n",
".. ... ... ... ... \n",
"95 854 -1.0 0.0 Default \n",
"96 926 -1.0 0.0 Default \n",
"97 433 -1.0 0.0 Default \n",
"98 626 -1.0 0.0 Default \n",
"99 69 -1.0 0.0 Default \n",
"\n",
" SAR_FLAG Risk \n",
"0 N Low Risk \n",
"1 N Low Risk \n",
"2 N Low Risk \n",
"3 N Low Risk \n",
"4 N Low Risk \n",
".. ... ... \n",
"95 N Low Risk \n",
"96 N Low Risk \n",
"97 N Low Risk \n",
"98 N Low Risk \n",
"99 N Low Risk \n",
"\n",
"[100 rows x 10 columns]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# scenario = Scenario()\n",
"# scenario.logic(validation_window=300000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36b1b24a-aeca-4d22-a2b3-6e04aca31695",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}