diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb index 3277afb..4045375 100644 --- a/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/.ipynb_checkpoints/main-checkpoint.ipynb @@ -1,33 +1,135 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", - "metadata": {}, - "outputs": [], - "source": "from tms_data_interface import SQLQueryInterface\n\nclass Scenario:\n\tseq = SQLQueryInterface()\n\n\tdef logic(self, **kwargs):\n\t\t# Write your code here\n" - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "90c70e46-71a0-44a6-8090-f53aad3193c3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "import pandas as pd\n", + "from tms_data_interface import SQLQueryInterface\n", + "\n", + "# SQL query to aggregate trade data and compute metrics\n", + "query = \"\"\"\n", + "WITH trade_data AS (\n", + " SELECT \n", + " trader_id,\n", + " date_time,\n", + " trade_price,\n", + " trade_volume,\n", + " -- Create a time window for each trade\n", + " date_time - INTERVAL '1 second' * {time_window_s} AS window_start,\n", + " date_time AS window_end\n", + " FROM {trade_data_1b}\n", + "),\n", + "\n", + "aggregated_trades AS (\n", + " SELECT \n", + " td.trader_id,\n", + " td.window_start,\n", + " td.window_end,\n", + " SUM(CASE WHEN trade_side = 'buy' THEN trade_volume ELSE 0 END) \n", + " OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS buy_volume,\n", + " SUM(CASE WHEN trade_side = 'sell' THEN trade_volume ELSE 0 END) \n", + " OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS sell_volume,\n", + " SUM(trade_volume) OVER (ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS total_volume,\n", + " MAX(trade_price) OVER (ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS highest_price,\n", + " MIN(trade_price) OVER (ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS lowest_price,\n", + " COUNT(*) OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n", + " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS number_of_trades\n", + " FROM trade_data td\n", + ")\n", + "\n", + "SELECT \n", + " window_start AS start_time,\n", + " window_end AS end_time,\n", + " trader_id AS \"Participant\",\n", + " lowest_price AS min_price,\n", + " highest_price AS max_price,\n", + " (highest_price - lowest_price) / NULLIF(lowest_price, 0) * 100 AS \"Price Change (%)\",\n", + " buy_volume AS participant_volume,\n", + " total_volume,\n", + " (buy_volume / NULLIF(total_volume, 0)) * 100 AS \"Volume (%)\"\n", + "FROM aggregated_trades\n", + "WHERE buy_volume > 0 OR sell_volume > 0\n", + "\"\"\"\n", + "\n", + "class Scenario:\n", + " seq = SQLQueryInterface(schema=\"internal\")\n", + "\n", + " def logic(self, **kwargs):\n", + " validation_window = kwargs.get('validation_window')\n", + " time_window_s = int(validation_window / 1000)\n", + " \n", + " query_start_time = datetime.now()\n", + " print(\"Query start time:\", query_start_time)\n", + "\n", + " row_list = self.seq.execute_raw(query.format(\n", + " trade_data_1b=\"trade_data_2b\",\n", + " time_window_s=time_window_s\n", + " ))\n", + "\n", + " cols = [\n", + " 'START_DATE_TIME',\n", + " 'END_DATE_TIME',\n", + " 'PARTICIPANT',\n", + " 'MIN_PRICE',\n", + " 'MAX_PRICE',\n", + " 'PRICE_CHANGE (%)',\n", + " 'PARTICIPANT_VOLUME',\n", + " 'TOTAL_VOLUME',\n", + " 'VOLUME (%)',\n", + " ]\n", + "\n", + " final_scenario_df = pd.DataFrame(row_list, columns=cols)\n", + " final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME'] / \\\n", + " final_scenario_df['TOTAL_VOLUME'] * 100\n", + "\n", + " # Adding additional columns\n", + " final_scenario_df['Segment'] = 'Default'\n", + " final_scenario_df['SAR_FLAG'] = 'N'\n", + " final_scenario_df['Risk'] = 'Low Risk'\n", + "\n", + " return final_scenario_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "caee5554-5254-4388-bf24-029281d77890", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/main.ipynb b/main.ipynb index 898f991..4045375 100644 --- a/main.ipynb +++ b/main.ipynb @@ -1,120 +1,5 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "ename": "IndentationError", - "evalue": "expected an indented block after function definition on line 6 (1665995538.py, line 8)", - "output_type": "error", - "traceback": [ - "\u001b[0;36m Cell \u001b[0;32mIn[1], line 8\u001b[0;36m\u001b[0m\n\u001b[0;31m import pandas as pd\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m expected an indented block after function definition on line 6\n" - ] - } - ], - "source": [ - "from tms_data_interface import SQLQueryInterface\n", - "\n", - "class Scenario:\n", - "\tseq = SQLQueryInterface()\n", - "\n", - "\tdef logic(self, **kwargs):\n", - "\t\t# from datetime import datetime, timedelta\n", - "import pandas as pd\n", - "from tms_data_interface import SQLQueryInterface\n", - "\n", - "query = \"\"\"\n", - "WITH trade_data AS (\n", - " SELECT \n", - " trader_id,\n", - " date_time,\n", - " trade_price,\n", - " trade_volume,\n", - " -- Create a time window for each trade\n", - " date_time - INTERVAL '1 second' * {time_window_s} AS window_start,\n", - " date_time AS window_end\n", - " FROM {trade_data_1b}\n", - "),\n", - "\n", - "aggregated_trades AS (\n", - " SELECT \n", - " td.trader_id,\n", - " td.window_start,\n", - " td.window_end,\n", - " SUM(CASE WHEN trade_side = 'buy' THEN trade_volume ELSE 0 END) \n", - " OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n", - " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS buy_volume,\n", - " SUM(CASE WHEN trade_side = 'sell' THEN trade_volume ELSE 0 END) \n", - " OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n", - " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS sell_volume,\n", - " SUM(trade_volume) OVER (ORDER BY td.date_time \n", - " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS total_volume,\n", - " MAX(trade_price) OVER (ORDER BY td.date_time \n", - " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS highest_price,\n", - " MIN(trade_price) OVER (ORDER BY td.date_time \n", - " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS lowest_price,\n", - " COUNT(*) OVER (PARTITION BY td.trader_id ORDER BY td.date_time \n", - " RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS number_of_trades\n", - " FROM trade_data td\n", - ")\n", - "\n", - "SELECT \n", - " window_start AS start_time,\n", - " window_end AS end_time,\n", - " trader_id AS \"Participant\",\n", - " lowest_price AS min_price,\n", - " highest_price AS max_price,\n", - " (highest_price - lowest_price) / NULLIF(lowest_price, 0) * 100 AS \"Price Change (%)\",\n", - " buy_volume AS participant_volume,\n", - " total_volume,\n", - " (buy_volume / NULLIF(total_volume, 0)) * 100 AS \"Volume (%)\"\n", - "FROM aggregated_trades\n", - "WHERE buy_volume > 0 OR sell_volume > 0\n", - "\"\"\"\n", - "\n", - "class Scenario:\n", - " seq = SQLQueryInterface(schema=\"internal\")\n", - "\n", - " def logic(self, **kwargs):\n", - " validation_window = kwargs.get('validation_window')\n", - " time_window_s = int(validation_window / 1000)\n", - " query_start_time = datetime.now()\n", - " print(\"Query start time :\", query_start_time)\n", - "\n", - " row_list = self.seq.execute_raw(query.format(trade_data_1b=\"trade_data_2b\",\n", - " time_window_s=time_window_s)\n", - " )\n", - "\n", - " cols = [\n", - " 'START_DATE_TIME',\n", - " 'END_DATE_TIME',\n", - " 'PARTICIPANT',\n", - " 'MIN_PRICE',\n", - " 'MAX_PRICE',\n", - " 'PRICE_CHANGE (%)',\n", - " 'PARTICIPANT_VOLUME',\n", - " 'TOTAL_VOLUME',\n", - " 'VOLUME (%)',\n", - " ]\n", - "\n", - " final_scenario_df = pd.DataFrame(row_list, columns=cols)\n", - " final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME'] / \\\n", - " final_scenario_df['TOTAL_VOLUME'] * 100\n", - "\n", - " # Adding additional columns\n", - " final_scenario_df['Segment'] = 'Default'\n", - " final_scenario_df['SAR_FLAG'] = 'N'\n", - " final_scenario_df['Risk'] = 'Low Risk'\n", - "\n", - " return final_scenario_df\n", - " " - ] - }, { "cell_type": "code", "execution_count": 2, diff --git a/main.py b/main.py index 171681e..9bf5d6a 100644 --- a/main.py +++ b/main.py @@ -1,106 +1,6 @@ #!/usr/bin/env python # coding: utf-8 -# In[1]: - - -from tms_data_interface import SQLQueryInterface - -class Scenario: - seq = SQLQueryInterface() - - def logic(self, **kwargs): - # from datetime import datetime, timedelta -import pandas as pd -from tms_data_interface import SQLQueryInterface - -query = """ -WITH trade_data AS ( - SELECT - trader_id, - date_time, - trade_price, - trade_volume, - -- Create a time window for each trade - date_time - INTERVAL '1 second' * {time_window_s} AS window_start, - date_time AS window_end - FROM {trade_data_1b} -), - -aggregated_trades AS ( - SELECT - td.trader_id, - td.window_start, - td.window_end, - SUM(CASE WHEN trade_side = 'buy' THEN trade_volume ELSE 0 END) - OVER (PARTITION BY td.trader_id ORDER BY td.date_time - RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS buy_volume, - SUM(CASE WHEN trade_side = 'sell' THEN trade_volume ELSE 0 END) - OVER (PARTITION BY td.trader_id ORDER BY td.date_time - RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS sell_volume, - SUM(trade_volume) OVER (ORDER BY td.date_time - RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS total_volume, - MAX(trade_price) OVER (ORDER BY td.date_time - RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS highest_price, - MIN(trade_price) OVER (ORDER BY td.date_time - RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS lowest_price, - COUNT(*) OVER (PARTITION BY td.trader_id ORDER BY td.date_time - RANGE BETWEEN {time_window_s} PRECEDING AND CURRENT ROW) AS number_of_trades - FROM trade_data td -) - -SELECT - window_start AS start_time, - window_end AS end_time, - trader_id AS "Participant", - lowest_price AS min_price, - highest_price AS max_price, - (highest_price - lowest_price) / NULLIF(lowest_price, 0) * 100 AS "Price Change (%)", - buy_volume AS participant_volume, - total_volume, - (buy_volume / NULLIF(total_volume, 0)) * 100 AS "Volume (%)" -FROM aggregated_trades -WHERE buy_volume > 0 OR sell_volume > 0 -""" - -class Scenario: - seq = SQLQueryInterface(schema="internal") - - def logic(self, **kwargs): - validation_window = kwargs.get('validation_window') - time_window_s = int(validation_window / 1000) - query_start_time = datetime.now() - print("Query start time :", query_start_time) - - row_list = self.seq.execute_raw(query.format(trade_data_1b="trade_data_2b", - time_window_s=time_window_s) - ) - - cols = [ - 'START_DATE_TIME', - 'END_DATE_TIME', - 'PARTICIPANT', - 'MIN_PRICE', - 'MAX_PRICE', - 'PRICE_CHANGE (%)', - 'PARTICIPANT_VOLUME', - 'TOTAL_VOLUME', - 'VOLUME (%)', - ] - - final_scenario_df = pd.DataFrame(row_list, columns=cols) - final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME'] / \ - final_scenario_df['TOTAL_VOLUME'] * 100 - - # Adding additional columns - final_scenario_df['Segment'] = 'Default' - final_scenario_df['SAR_FLAG'] = 'N' - final_scenario_df['Risk'] = 'Low Risk' - - return final_scenario_df - - - # In[2]: