From 05d1197ef06e402955a8809951c4c50406b6e32a Mon Sep 17 00:00:00 2001 From: user_client2024 Date: Fri, 11 Oct 2024 06:25:54 +0000 Subject: [PATCH] System save at 11/10/2024 11:55 by user_client2024 --- main.ipynb | 283 ++++++++++++++++++++++++++++++++++++++++++++++------- main.py | 194 +++++++++++++++++++++++++++++++++++- 2 files changed, 442 insertions(+), 35 deletions(-) diff --git a/main.ipynb b/main.ipynb index 3277afb..898f991 100644 --- a/main.ipynb +++ b/main.ipynb @@ -1,33 +1,250 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", - "metadata": {}, - "outputs": [], - "source": "from tms_data_interface import SQLQueryInterface\n\nclass Scenario:\n\tseq = SQLQueryInterface()\n\n\tdef logic(self, **kwargs):\n\t\t# Write your code here\n" - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "ename": "IndentationError", + "evalue": "expected an indented block after function definition on line 6 (1665995538.py, line 8)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m Cell \u001b[0;32mIn[1], line 8\u001b[0;36m\u001b[0m\n\u001b[0;31m import pandas as pd\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m expected an indented block after function definition on line 6\n" + ] + } + ], + "source": [ + "from tms_data_interface import SQLQueryInterface\n", + "\n", + "class Scenario:\n", + "\tseq = SQLQueryInterface()\n", + "\n", + "\tdef logic(self, **kwargs):\n", + "\t\t# from datetime import datetime, timedelta\n", + "import pandas as pd\n", + "from tms_data_interface import SQLQueryInterface\n", + "\n", + "query = \"\"\"\n", + "WITH trade_data AS (\n", + " SELECT \n", + " trader_id,\n", + " date_time,\n", + " trade_price,\n", + " trade_volume,\n", + " -- Create a time window for each trade\n", + " date_time - INTERVAL '1 second' * {time_window_s} AS window_start,\n", + " date_time AS window_end\n", + " FROM {trade_data_1b}\n", + "),\n", + "\n", + "aggregated_trades AS (\n", + " SELECT \n", + " td.trader_id,\n", + " td.window_start,\n", + " td.window_end,\n", + " SUM(CASE WHEN trade_side = 'buy' THEN trade_volume ELSE 0 END) \n", + " OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS buy_volume,\n", + " SUM(CASE WHEN trade_side = 'sell' THEN trade_volume ELSE 0 END) \n", + " OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS sell_volume,\n", + " SUM(trade_volume) OVER (ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS total_volume,\n", + " MAX(trade_price) OVER (ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS highest_price,\n", + " MIN(trade_price) OVER (ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS lowest_price,\n", + " COUNT(*) OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS number_of_trades\n", + " FROM trade_data td\n", + ")\n", + "\n", + "SELECT \n", + " window_start AS start_time,\n", + " window_end AS end_time,\n", + " trader_id AS \"Participant\",\n", + " lowest_price AS min_price,\n", + " highest_price AS max_price,\n", + " (highest_price - lowest_price) / NULLIF(lowest_price, 0) * 100 AS \"Price Change (%)\",\n", + " buy_volume AS participant_volume,\n", + " total_volume,\n", + " (buy_volume / NULLIF(total_volume, 0)) * 100 AS \"Volume (%)\"\n", + "FROM aggregated_trades\n", + "WHERE buy_volume > 0 OR sell_volume > 0\n", + "\"\"\"\n", + "\n", + "class Scenario:\n", + " seq = SQLQueryInterface(schema=\"internal\")\n", + "\n", + " def logic(self, **kwargs):\n", + " validation_window = kwargs.get('validation_window')\n", + " time_window_s = int(validation_window / 1000)\n", + " query_start_time = datetime.now()\n", + " print(\"Query start time :\", query_start_time)\n", + "\n", + " row_list = self.seq.execute_raw(query.format(trade_data_1b=\"trade_data_2b\",\n", + " time_window_s=time_window_s)\n", + " )\n", + "\n", + " cols = [\n", + " 'START_DATE_TIME',\n", + " 'END_DATE_TIME',\n", + " 'PARTICIPANT',\n", + " 'MIN_PRICE',\n", + " 'MAX_PRICE',\n", + " 'PRICE_CHANGE (%)',\n", + " 'PARTICIPANT_VOLUME',\n", + " 'TOTAL_VOLUME',\n", + " 'VOLUME (%)',\n", + " ]\n", + "\n", + " final_scenario_df = pd.DataFrame(row_list, columns=cols)\n", + " final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME'] / \\\n", + " final_scenario_df['TOTAL_VOLUME'] * 100\n", + "\n", + " # Adding additional columns\n", + " final_scenario_df['Segment'] = 'Default'\n", + " final_scenario_df['SAR_FLAG'] = 'N'\n", + " final_scenario_df['Risk'] = 'Low Risk'\n", + "\n", + " return final_scenario_df\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "90c70e46-71a0-44a6-8090-f53aad3193c3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "import pandas as pd\n", + "from tms_data_interface import SQLQueryInterface\n", + "\n", + "# SQL query to aggregate trade data and compute metrics\n", + "query = \"\"\"\n", + "WITH trade_data AS (\n", + " SELECT \n", + " trader_id,\n", + " date_time,\n", + " trade_price,\n", + " trade_volume,\n", + " -- Create a time window for each trade\n", + " date_time - INTERVAL '1 second' * {time_window_s} AS window_start,\n", + " date_time AS window_end\n", + " FROM {trade_data_1b}\n", + "),\n", + "\n", + "aggregated_trades AS (\n", + " SELECT \n", + " td.trader_id,\n", + " td.window_start,\n", + " td.window_end,\n", + " SUM(CASE WHEN trade_side = 'buy' THEN trade_volume ELSE 0 END) \n", + " OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS buy_volume,\n", + " SUM(CASE WHEN trade_side = 'sell' THEN trade_volume ELSE 0 END) \n", + " OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS sell_volume,\n", + " SUM(trade_volume) OVER (ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS total_volume,\n", + " MAX(trade_price) OVER (ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS highest_price,\n", + " MIN(trade_price) OVER (ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS lowest_price,\n", + " COUNT(*) OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS number_of_trades\n", + " FROM trade_data td\n", + ")\n", + "\n", + "SELECT \n", + " window_start AS start_time,\n", + " window_end AS end_time,\n", + " trader_id AS \"Participant\",\n", + " lowest_price AS min_price,\n", + " highest_price AS max_price,\n", + " (highest_price - lowest_price) / NULLIF(lowest_price, 0) * 100 AS \"Price Change (%)\",\n", + " buy_volume AS participant_volume,\n", + " total_volume,\n", + " (buy_volume / NULLIF(total_volume, 0)) * 100 AS \"Volume (%)\"\n", + "FROM aggregated_trades\n", + "WHERE buy_volume > 0 OR sell_volume > 0\n", + "\"\"\"\n", + "\n", + "class Scenario:\n", + " seq = SQLQueryInterface(schema=\"internal\")\n", + "\n", + " def logic(self, **kwargs):\n", + " validation_window = kwargs.get('validation_window')\n", + " time_window_s = int(validation_window / 1000)\n", + " \n", + " query_start_time = datetime.now()\n", + " print(\"Query start time:\", query_start_time)\n", + "\n", + " row_list = self.seq.execute_raw(query.format(\n", + " trade_data_1b=\"trade_data_2b\",\n", + " time_window_s=time_window_s\n", + " ))\n", + "\n", + " cols = [\n", + " 'START_DATE_TIME',\n", + " 'END_DATE_TIME',\n", + " 'PARTICIPANT',\n", + " 'MIN_PRICE',\n", + " 'MAX_PRICE',\n", + " 'PRICE_CHANGE (%)',\n", + " 'PARTICIPANT_VOLUME',\n", + " 'TOTAL_VOLUME',\n", + " 'VOLUME (%)',\n", + " ]\n", + "\n", + " final_scenario_df = pd.DataFrame(row_list, columns=cols)\n", + " final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME'] / \\\n", + " final_scenario_df['TOTAL_VOLUME'] * 100\n", + "\n", + " # Adding additional columns\n", + " final_scenario_df['Segment'] = 'Default'\n", + " final_scenario_df['SAR_FLAG'] = 'N'\n", + " final_scenario_df['Risk'] = 'Low Risk'\n", + "\n", + " return final_scenario_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "caee5554-5254-4388-bf24-029281d77890", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/main.py b/main.py index f220144..171681e 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # coding: utf-8 -# In[ ]: +# In[1]: from tms_data_interface import SQLQueryInterface @@ -10,5 +10,195 @@ class Scenario: seq = SQLQueryInterface() def logic(self, **kwargs): - # Write your code here + # from datetime import datetime, timedelta +import pandas as pd +from tms_data_interface import SQLQueryInterface + +query = """ +WITH trade_data AS ( + SELECT + trader_id, + date_time, + trade_price, + trade_volume, + -- Create a time window for each trade + date_time - INTERVAL '1 second' * {time_window_s} AS window_start, + date_time AS window_end + FROM {trade_data_1b} +), + +aggregated_trades AS ( + SELECT + td.trader_id, + td.window_start, + td.window_end, + SUM(CASE WHEN trade_side = 'buy' THEN trade_volume ELSE 0 END) + OVER (PARTITION BY td.trader_id ORDER BY td.date_time + RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS buy_volume, + SUM(CASE WHEN trade_side = 'sell' THEN trade_volume ELSE 0 END) + OVER (PARTITION BY td.trader_id ORDER BY td.date_time + RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS sell_volume, + SUM(trade_volume) OVER (ORDER BY td.date_time + RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS total_volume, + MAX(trade_price) OVER (ORDER BY td.date_time + RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS highest_price, + MIN(trade_price) OVER (ORDER BY td.date_time + RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS lowest_price, + COUNT(*) OVER (PARTITION BY td.trader_id ORDER BY td.date_time + RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS number_of_trades + FROM trade_data td +) + +SELECT + window_start AS start_time, + window_end AS end_time, + trader_id AS "Participant", + lowest_price AS min_price, + highest_price AS max_price, + (highest_price - lowest_price) / NULLIF(lowest_price, 0) * 100 AS "Price Change (%)", + buy_volume AS participant_volume, + total_volume, + (buy_volume / NULLIF(total_volume, 0)) * 100 AS "Volume (%)" +FROM aggregated_trades +WHERE buy_volume > 0 OR sell_volume > 0 +""" + +class Scenario: + seq = SQLQueryInterface(schema="internal") + + def logic(self, **kwargs): + validation_window = kwargs.get('validation_window') + time_window_s = int(validation_window / 1000) + query_start_time = datetime.now() + print("Query start time :", query_start_time) + + row_list = self.seq.execute_raw(query.format(trade_data_1b="trade_data_2b", + time_window_s=time_window_s) + ) + + cols = [ + 'START_DATE_TIME', + 'END_DATE_TIME', + 'PARTICIPANT', + 'MIN_PRICE', + 'MAX_PRICE', + 'PRICE_CHANGE (%)', + 'PARTICIPANT_VOLUME', + 'TOTAL_VOLUME', + 'VOLUME (%)', + ] + + final_scenario_df = pd.DataFrame(row_list, columns=cols) + final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME'] / \ + final_scenario_df['TOTAL_VOLUME'] * 100 + + # Adding additional columns + final_scenario_df['Segment'] = 'Default' + final_scenario_df['SAR_FLAG'] = 'N' + final_scenario_df['Risk'] = 'Low Risk' + + return final_scenario_df + + + +# In[2]: + + +from datetime import datetime +import pandas as pd +from tms_data_interface import SQLQueryInterface + +# SQL query to aggregate trade data and compute metrics +query = """ +WITH trade_data AS ( + SELECT + trader_id, + date_time, + trade_price, + trade_volume, + -- Create a time window for each trade + date_time - INTERVAL '1 second' * {time_window_s} AS window_start, + date_time AS window_end + FROM {trade_data_1b} +), + +aggregated_trades AS ( + SELECT + td.trader_id, + td.window_start, + td.window_end, + SUM(CASE WHEN trade_side = 'buy' THEN trade_volume ELSE 0 END) + OVER (PARTITION BY td.trader_id ORDER BY td.date_time + RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS buy_volume, + SUM(CASE WHEN trade_side = 'sell' THEN trade_volume ELSE 0 END) + OVER (PARTITION BY td.trader_id ORDER BY td.date_time + RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS sell_volume, + SUM(trade_volume) OVER (ORDER BY td.date_time + RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS total_volume, + MAX(trade_price) OVER (ORDER BY td.date_time + RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS highest_price, + MIN(trade_price) OVER (ORDER BY td.date_time + RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS lowest_price, + COUNT(*) OVER (PARTITION BY td.trader_id ORDER BY td.date_time + RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS number_of_trades + FROM trade_data td +) + +SELECT + window_start AS start_time, + window_end AS end_time, + trader_id AS "Participant", + lowest_price AS min_price, + highest_price AS max_price, + (highest_price - lowest_price) / NULLIF(lowest_price, 0) * 100 AS "Price Change (%)", + buy_volume AS participant_volume, + total_volume, + (buy_volume / NULLIF(total_volume, 0)) * 100 AS "Volume (%)" +FROM aggregated_trades +WHERE buy_volume > 0 OR sell_volume > 0 +""" + +class Scenario: + seq = SQLQueryInterface(schema="internal") + + def logic(self, **kwargs): + validation_window = kwargs.get('validation_window') + time_window_s = int(validation_window / 1000) + + query_start_time = datetime.now() + print("Query start time:", query_start_time) + + row_list = self.seq.execute_raw(query.format( + trade_data_1b="trade_data_2b", + time_window_s=time_window_s + )) + + cols = [ + 'START_DATE_TIME', + 'END_DATE_TIME', + 'PARTICIPANT', + 'MIN_PRICE', + 'MAX_PRICE', + 'PRICE_CHANGE (%)', + 'PARTICIPANT_VOLUME', + 'TOTAL_VOLUME', + 'VOLUME (%)', + ] + + final_scenario_df = pd.DataFrame(row_list, columns=cols) + final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME'] / \ + final_scenario_df['TOTAL_VOLUME'] * 100 + + # Adding additional columns + final_scenario_df['Segment'] = 'Default' + final_scenario_df['SAR_FLAG'] = 'N' + final_scenario_df['Risk'] = 'Low Risk' + + return final_scenario_df + + +# In[ ]: + + +