System save at 11/10/2024 11:55 by user_client2024

This commit is contained in:
user_client2024 2024-10-11 06:25:54 +00:00
parent 511222ac72
commit 05d1197ef0
2 changed files with 442 additions and 35 deletions

View File

@ -2,11 +2,228 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 1,
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
"metadata": {
"tags": []
},
"outputs": [
{
"ename": "IndentationError",
"evalue": "expected an indented block after function definition on line 6 (1665995538.py, line 8)",
"output_type": "error",
"traceback": [
"\u001b[0;36m Cell \u001b[0;32mIn[1], line 8\u001b[0;36m\u001b[0m\n\u001b[0;31m import pandas as pd\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m expected an indented block after function definition on line 6\n"
]
}
],
"source": [
"from tms_data_interface import SQLQueryInterface\n",
"\n",
"class Scenario:\n",
"\tseq = SQLQueryInterface()\n",
"\n",
"\tdef logic(self, **kwargs):\n",
"\t\t# from datetime import datetime, timedelta\n",
"import pandas as pd\n",
"from tms_data_interface import SQLQueryInterface\n",
"\n",
"query = \"\"\"\n",
"WITH trade_data AS (\n",
" SELECT \n",
" trader_id,\n",
" date_time,\n",
" trade_price,\n",
" trade_volume,\n",
" -- Create a time window for each trade\n",
" date_time - INTERVAL '1 second' * {time_window_s} AS window_start,\n",
" date_time AS window_end\n",
" FROM {trade_data_1b}\n",
"),\n",
"\n",
"aggregated_trades AS (\n",
" SELECT \n",
" td.trader_id,\n",
" td.window_start,\n",
" td.window_end,\n",
" SUM(CASE WHEN trade_side = 'buy' THEN trade_volume ELSE 0 END) \n",
" OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n",
" RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS buy_volume,\n",
" SUM(CASE WHEN trade_side = 'sell' THEN trade_volume ELSE 0 END) \n",
" OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n",
" RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS sell_volume,\n",
" SUM(trade_volume) OVER (ORDER BY td.date_time \n",
" RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS total_volume,\n",
" MAX(trade_price) OVER (ORDER BY td.date_time \n",
" RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS highest_price,\n",
" MIN(trade_price) OVER (ORDER BY td.date_time \n",
" RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS lowest_price,\n",
" COUNT(*) OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n",
" RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS number_of_trades\n",
" FROM trade_data td\n",
")\n",
"\n",
"SELECT \n",
" window_start AS start_time,\n",
" window_end AS end_time,\n",
" trader_id AS \"Participant\",\n",
" lowest_price AS min_price,\n",
" highest_price AS max_price,\n",
" (highest_price - lowest_price) / NULLIF(lowest_price, 0) * 100 AS \"Price Change (%)\",\n",
" buy_volume AS participant_volume,\n",
" total_volume,\n",
" (buy_volume / NULLIF(total_volume, 0)) * 100 AS \"Volume (%)\"\n",
"FROM aggregated_trades\n",
"WHERE buy_volume > 0 OR sell_volume > 0\n",
"\"\"\"\n",
"\n",
"class Scenario:\n",
" seq = SQLQueryInterface(schema=\"internal\")\n",
"\n",
" def logic(self, **kwargs):\n",
" validation_window = kwargs.get('validation_window')\n",
" time_window_s = int(validation_window / 1000)\n",
" query_start_time = datetime.now()\n",
" print(\"Query start time :\", query_start_time)\n",
"\n",
" row_list = self.seq.execute_raw(query.format(trade_data_1b=\"trade_data_2b\",\n",
" time_window_s=time_window_s)\n",
" )\n",
"\n",
" cols = [\n",
" 'START_DATE_TIME',\n",
" 'END_DATE_TIME',\n",
" 'PARTICIPANT',\n",
" 'MIN_PRICE',\n",
" 'MAX_PRICE',\n",
" 'PRICE_CHANGE (%)',\n",
" 'PARTICIPANT_VOLUME',\n",
" 'TOTAL_VOLUME',\n",
" 'VOLUME (%)',\n",
" ]\n",
"\n",
" final_scenario_df = pd.DataFrame(row_list, columns=cols)\n",
" final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME'] / \\\n",
" final_scenario_df['TOTAL_VOLUME'] * 100\n",
"\n",
" # Adding additional columns\n",
" final_scenario_df['Segment'] = 'Default'\n",
" final_scenario_df['SAR_FLAG'] = 'N'\n",
" final_scenario_df['Risk'] = 'Low Risk'\n",
"\n",
" return final_scenario_df\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "90c70e46-71a0-44a6-8090-f53aad3193c3",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from datetime import datetime\n",
"import pandas as pd\n",
"from tms_data_interface import SQLQueryInterface\n",
"\n",
"# SQL query to aggregate trade data and compute metrics\n",
"query = \"\"\"\n",
"WITH trade_data AS (\n",
" SELECT \n",
" trader_id,\n",
" date_time,\n",
" trade_price,\n",
" trade_volume,\n",
" -- Create a time window for each trade\n",
" date_time - INTERVAL '1 second' * {time_window_s} AS window_start,\n",
" date_time AS window_end\n",
" FROM {trade_data_1b}\n",
"),\n",
"\n",
"aggregated_trades AS (\n",
" SELECT \n",
" td.trader_id,\n",
" td.window_start,\n",
" td.window_end,\n",
" SUM(CASE WHEN trade_side = 'buy' THEN trade_volume ELSE 0 END) \n",
" OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n",
" RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS buy_volume,\n",
" SUM(CASE WHEN trade_side = 'sell' THEN trade_volume ELSE 0 END) \n",
" OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n",
" RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS sell_volume,\n",
" SUM(trade_volume) OVER (ORDER BY td.date_time \n",
" RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS total_volume,\n",
" MAX(trade_price) OVER (ORDER BY td.date_time \n",
" RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS highest_price,\n",
" MIN(trade_price) OVER (ORDER BY td.date_time \n",
" RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS lowest_price,\n",
" COUNT(*) OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n",
" RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS number_of_trades\n",
" FROM trade_data td\n",
")\n",
"\n",
"SELECT \n",
" window_start AS start_time,\n",
" window_end AS end_time,\n",
" trader_id AS \"Participant\",\n",
" lowest_price AS min_price,\n",
" highest_price AS max_price,\n",
" (highest_price - lowest_price) / NULLIF(lowest_price, 0) * 100 AS \"Price Change (%)\",\n",
" buy_volume AS participant_volume,\n",
" total_volume,\n",
" (buy_volume / NULLIF(total_volume, 0)) * 100 AS \"Volume (%)\"\n",
"FROM aggregated_trades\n",
"WHERE buy_volume > 0 OR sell_volume > 0\n",
"\"\"\"\n",
"\n",
"class Scenario:\n",
" seq = SQLQueryInterface(schema=\"internal\")\n",
"\n",
" def logic(self, **kwargs):\n",
" validation_window = kwargs.get('validation_window')\n",
" time_window_s = int(validation_window / 1000)\n",
" \n",
" query_start_time = datetime.now()\n",
" print(\"Query start time:\", query_start_time)\n",
"\n",
" row_list = self.seq.execute_raw(query.format(\n",
" trade_data_1b=\"trade_data_2b\",\n",
" time_window_s=time_window_s\n",
" ))\n",
"\n",
" cols = [\n",
" 'START_DATE_TIME',\n",
" 'END_DATE_TIME',\n",
" 'PARTICIPANT',\n",
" 'MIN_PRICE',\n",
" 'MAX_PRICE',\n",
" 'PRICE_CHANGE (%)',\n",
" 'PARTICIPANT_VOLUME',\n",
" 'TOTAL_VOLUME',\n",
" 'VOLUME (%)',\n",
" ]\n",
"\n",
" final_scenario_df = pd.DataFrame(row_list, columns=cols)\n",
" final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME'] / \\\n",
" final_scenario_df['TOTAL_VOLUME'] * 100\n",
"\n",
" # Adding additional columns\n",
" final_scenario_df['Segment'] = 'Default'\n",
" final_scenario_df['SAR_FLAG'] = 'N'\n",
" final_scenario_df['Risk'] = 'Low Risk'\n",
"\n",
" return final_scenario_df\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "caee5554-5254-4388-bf24-029281d77890",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": "from tms_data_interface import SQLQueryInterface\n\nclass Scenario:\n\tseq = SQLQueryInterface()\n\n\tdef logic(self, **kwargs):\n\t\t# Write your code here\n" "source": []
} }
], ],
"metadata": { "metadata": {
@ -25,7 +242,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.13" "version": "3.11.8"
} }
}, },
"nbformat": 4, "nbformat": 4,

194
main.py
View File

@ -1,7 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
# In[ ]: # In[1]:
from tms_data_interface import SQLQueryInterface from tms_data_interface import SQLQueryInterface
@ -10,5 +10,195 @@ class Scenario:
seq = SQLQueryInterface() seq = SQLQueryInterface()
def logic(self, **kwargs): def logic(self, **kwargs):
# Write your code here # from datetime import datetime, timedelta
import pandas as pd
from tms_data_interface import SQLQueryInterface
query = """
WITH trade_data AS (
SELECT
trader_id,
date_time,
trade_price,
trade_volume,
-- Create a time window for each trade
date_time - INTERVAL '1 second' * {time_window_s} AS window_start,
date_time AS window_end
FROM {trade_data_1b}
),
aggregated_trades AS (
SELECT
td.trader_id,
td.window_start,
td.window_end,
SUM(CASE WHEN trade_side = 'buy' THEN trade_volume ELSE 0 END)
OVER (PARTITION BY td.trader_id ORDER BY td.date_time
RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS buy_volume,
SUM(CASE WHEN trade_side = 'sell' THEN trade_volume ELSE 0 END)
OVER (PARTITION BY td.trader_id ORDER BY td.date_time
RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS sell_volume,
SUM(trade_volume) OVER (ORDER BY td.date_time
RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS total_volume,
MAX(trade_price) OVER (ORDER BY td.date_time
RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS highest_price,
MIN(trade_price) OVER (ORDER BY td.date_time
RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS lowest_price,
COUNT(*) OVER (PARTITION BY td.trader_id ORDER BY td.date_time
RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS number_of_trades
FROM trade_data td
)
SELECT
window_start AS start_time,
window_end AS end_time,
trader_id AS "Participant",
lowest_price AS min_price,
highest_price AS max_price,
(highest_price - lowest_price) / NULLIF(lowest_price, 0) * 100 AS "Price Change (%)",
buy_volume AS participant_volume,
total_volume,
(buy_volume / NULLIF(total_volume, 0)) * 100 AS "Volume (%)"
FROM aggregated_trades
WHERE buy_volume > 0 OR sell_volume > 0
"""
class Scenario:
seq = SQLQueryInterface(schema="internal")
def logic(self, **kwargs):
validation_window = kwargs.get('validation_window')
time_window_s = int(validation_window / 1000)
query_start_time = datetime.now()
print("Query start time :", query_start_time)
row_list = self.seq.execute_raw(query.format(trade_data_1b="trade_data_2b",
time_window_s=time_window_s)
)
cols = [
'START_DATE_TIME',
'END_DATE_TIME',
'PARTICIPANT',
'MIN_PRICE',
'MAX_PRICE',
'PRICE_CHANGE (%)',
'PARTICIPANT_VOLUME',
'TOTAL_VOLUME',
'VOLUME (%)',
]
final_scenario_df = pd.DataFrame(row_list, columns=cols)
final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME'] / \
final_scenario_df['TOTAL_VOLUME'] * 100
# Adding additional columns
final_scenario_df['Segment'] = 'Default'
final_scenario_df['SAR_FLAG'] = 'N'
final_scenario_df['Risk'] = 'Low Risk'
return final_scenario_df
# In[2]:
from datetime import datetime
import pandas as pd
from tms_data_interface import SQLQueryInterface
# SQL query to aggregate trade data and compute metrics
query = """
WITH trade_data AS (
SELECT
trader_id,
date_time,
trade_price,
trade_volume,
-- Create a time window for each trade
date_time - INTERVAL '1 second' * {time_window_s} AS window_start,
date_time AS window_end
FROM {trade_data_1b}
),
aggregated_trades AS (
SELECT
td.trader_id,
td.window_start,
td.window_end,
SUM(CASE WHEN trade_side = 'buy' THEN trade_volume ELSE 0 END)
OVER (PARTITION BY td.trader_id ORDER BY td.date_time
RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS buy_volume,
SUM(CASE WHEN trade_side = 'sell' THEN trade_volume ELSE 0 END)
OVER (PARTITION BY td.trader_id ORDER BY td.date_time
RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS sell_volume,
SUM(trade_volume) OVER (ORDER BY td.date_time
RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS total_volume,
MAX(trade_price) OVER (ORDER BY td.date_time
RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS highest_price,
MIN(trade_price) OVER (ORDER BY td.date_time
RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS lowest_price,
COUNT(*) OVER (PARTITION BY td.trader_id ORDER BY td.date_time
RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS number_of_trades
FROM trade_data td
)
SELECT
window_start AS start_time,
window_end AS end_time,
trader_id AS "Participant",
lowest_price AS min_price,
highest_price AS max_price,
(highest_price - lowest_price) / NULLIF(lowest_price, 0) * 100 AS "Price Change (%)",
buy_volume AS participant_volume,
total_volume,
(buy_volume / NULLIF(total_volume, 0)) * 100 AS "Volume (%)"
FROM aggregated_trades
WHERE buy_volume > 0 OR sell_volume > 0
"""
class Scenario:
seq = SQLQueryInterface(schema="internal")
def logic(self, **kwargs):
validation_window = kwargs.get('validation_window')
time_window_s = int(validation_window / 1000)
query_start_time = datetime.now()
print("Query start time:", query_start_time)
row_list = self.seq.execute_raw(query.format(
trade_data_1b="trade_data_2b",
time_window_s=time_window_s
))
cols = [
'START_DATE_TIME',
'END_DATE_TIME',
'PARTICIPANT',
'MIN_PRICE',
'MAX_PRICE',
'PRICE_CHANGE (%)',
'PARTICIPANT_VOLUME',
'TOTAL_VOLUME',
'VOLUME (%)',
]
final_scenario_df = pd.DataFrame(row_list, columns=cols)
final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME'] / \
final_scenario_df['TOTAL_VOLUME'] * 100
# Adding additional columns
final_scenario_df['Segment'] = 'Default'
final_scenario_df['SAR_FLAG'] = 'N'
final_scenario_df['Risk'] = 'Low Risk'
return final_scenario_df
# In[ ]: