generated from dhairya/scenario_template
System save at 10/11/2025 14:52 by user_client2024
This commit is contained in:
parent
76bf79b3fd
commit
21612b735d
188
.ipynb_checkpoints/main-checkpoint.ipynb
Normal file
188
.ipynb_checkpoints/main-checkpoint.ipynb
Normal file
@ -0,0 +1,188 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d3b830ff-94b7-4596-b85a-320060180a6b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from tms_data_interface import SQLQueryInterface\n",
|
||||
"\n",
|
||||
"def apply_sar_flag(df, var1, var2, var3, random_state=42):\n",
|
||||
" \"\"\"\n",
|
||||
" Apply percentile-based thresholds, split data into alerting and non-alerting,\n",
|
||||
" flag random 10% of alerting data as 'Y', and merge back.\n",
|
||||
"\n",
|
||||
" Parameters:\n",
|
||||
" df (pd.DataFrame): Input dataframe\n",
|
||||
" var1 (str): First variable (for 50th percentile threshold)\n",
|
||||
" var2 (str): Second variable (for 50th percentile threshold)\n",
|
||||
" var3 (str): Third variable (for 90th percentile threshold)\n",
|
||||
" random_state (int): Seed for reproducibility\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" pd.DataFrame: DataFrame with 'SAR_Flag' column added\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" # Calculate thresholds\n",
|
||||
" th1 = np.percentile(df[var1].dropna(), 90)\n",
|
||||
" th2 = np.percentile(df[var2].dropna(), 90)\n",
|
||||
" th3 = np.percentile(df[var3].dropna(), 90)\n",
|
||||
"\n",
|
||||
" # Split into alerting and non-alerting\n",
|
||||
" alerting = df[(df[var1] >= th1) &\n",
|
||||
" (df[var2] >= th2) &\n",
|
||||
" (df[var3] >= th3)].copy()\n",
|
||||
"\n",
|
||||
" non_alerting = df.loc[~df.index.isin(alerting.index)].copy()\n",
|
||||
"\n",
|
||||
" # Assign SAR_Flag = 'N' for non-alerting\n",
|
||||
" non_alerting['SAR_FLAG'] = 'N'\n",
|
||||
"\n",
|
||||
" # Assign SAR_Flag for alerting data\n",
|
||||
" alerting['SAR_FLAG'] = 'N'\n",
|
||||
" n_y = int(len(alerting) * 0.1) # 10% count\n",
|
||||
" if n_y > 0:\n",
|
||||
" y_indices = alerting.sample(n=n_y, random_state=random_state).index\n",
|
||||
" alerting.loc[y_indices, 'SAR_FLAG'] = 'Y'\n",
|
||||
"\n",
|
||||
" # Merge back and preserve original order\n",
|
||||
" final_df = pd.concat([alerting, non_alerting]).sort_index()\n",
|
||||
"\n",
|
||||
" return final_df\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"WITH time_windows AS (\n",
|
||||
" SELECT\n",
|
||||
" -- End time is the current trade time\n",
|
||||
" date_time AS end_time,\n",
|
||||
"\n",
|
||||
" -- Subtract seconds from the end_time using date_add() with negative integer interval\n",
|
||||
" date_add('second', -{time_window_s}, date_time) AS start_time,\n",
|
||||
"\n",
|
||||
" -- Trade details\n",
|
||||
" trade_price,\n",
|
||||
" trade_volume,\n",
|
||||
" trader_id,\n",
|
||||
"\n",
|
||||
" -- Calculate minimum price within the time window\n",
|
||||
" MIN(trade_price) OVER (\n",
|
||||
" ORDER BY date_time \n",
|
||||
" RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
|
||||
" ) AS min_price,\n",
|
||||
"\n",
|
||||
" -- Calculate maximum price within the time window\n",
|
||||
" MAX(trade_price) OVER (\n",
|
||||
" ORDER BY date_time \n",
|
||||
" RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
|
||||
" ) AS max_price,\n",
|
||||
"\n",
|
||||
" -- Calculate total trade volume within the time window\n",
|
||||
" SUM(trade_volume) OVER ( \n",
|
||||
" ORDER BY date_time \n",
|
||||
" RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
|
||||
" ) AS total_volume,\n",
|
||||
"\n",
|
||||
" -- Calculate participant's trade volume within the time window\n",
|
||||
" SUM(CASE WHEN trader_id = trader_id THEN trade_volume ELSE 0 END) OVER (\n",
|
||||
" PARTITION BY trader_id \n",
|
||||
" ORDER BY date_time \n",
|
||||
" RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
|
||||
" ) AS participant_volume\n",
|
||||
" FROM\n",
|
||||
" {trade_data_1b}\n",
|
||||
")\n",
|
||||
"SELECT\n",
|
||||
" -- Select the time window details\n",
|
||||
" start_time,\n",
|
||||
" end_time,\n",
|
||||
"\n",
|
||||
" -- Select the participant (trader) ID\n",
|
||||
" trader_id AS \"Participant\",\n",
|
||||
"\n",
|
||||
" -- Select the calculated min and max prices\n",
|
||||
" min_price,\n",
|
||||
" max_price,\n",
|
||||
"\n",
|
||||
" -- Calculate the price change percentage\n",
|
||||
" (max_price - min_price) / NULLIF(min_price, 0) * 100 AS \"Price Change (%)\",\n",
|
||||
"\n",
|
||||
" -- Calculate the participant's volume as a percentage of total volume\n",
|
||||
" (participant_volume / NULLIF(total_volume, 0)) * 100 AS \"Volume (%)\",\n",
|
||||
"\n",
|
||||
" -- Participant volume\n",
|
||||
" participant_volume,\n",
|
||||
"\n",
|
||||
" -- Select the total volume within the window\n",
|
||||
" total_volume AS \"Total Volume\"\n",
|
||||
"FROM\n",
|
||||
" time_windows\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from tms_data_interface import SQLQueryInterface\n",
|
||||
"\n",
|
||||
"class Scenario:\n",
|
||||
" seq = SQLQueryInterface(schema=\"trade_schema\")\n",
|
||||
" def logic(self, **kwargs):\n",
|
||||
" validation_window = kwargs.get('validation_window', 300000)\n",
|
||||
" time_window_s = int(validation_window/1000)\n",
|
||||
" query_start_time = datetime.now()\n",
|
||||
" print(\"Query start time :\",query_start_time)\n",
|
||||
" row_list = self.seq.execute_raw(query.format(trade_data_1b=\"trade_10m_v3\",\n",
|
||||
" time_window_s = time_window_s)\n",
|
||||
" )\n",
|
||||
" cols = [\n",
|
||||
" 'START_DATE_TIME',\n",
|
||||
" 'END_DATE_TIME',\n",
|
||||
" 'Focal_id',\n",
|
||||
" 'MIN_PRICE',\n",
|
||||
" 'MAX_PRICE',\n",
|
||||
" 'PRICE_CHANGE_PCT',\n",
|
||||
" 'PARTICIPANT_VOLUME_PCT',\n",
|
||||
" 'PARTICIPANT_VOLUME',\n",
|
||||
" 'TOTAL_VOLUME',\n",
|
||||
" ]\n",
|
||||
" final_scenario_df = pd.DataFrame(row_list, columns = cols)\n",
|
||||
" final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME']/\\\n",
|
||||
" final_scenario_df['TOTAL_VOLUME'] * 100\n",
|
||||
" final_scenario_df['Segment'] = 'Default'\n",
|
||||
" # final_scenario_df['SAR_FLAG'] = 'N'\n",
|
||||
" final_scenario_df['Risk'] = 'Medium Risk'\n",
|
||||
" final_scenario_df.dropna(inplace=True)\n",
|
||||
" final_scenario_df = apply_sar_flag(final_scenario_df,\n",
|
||||
" 'PRICE_CHANGE_PCT',\n",
|
||||
" 'PARTICIPANT_VOLUME_PCT',\n",
|
||||
" 'TOTAL_VOLUME',\n",
|
||||
" random_state=42)\n",
|
||||
" # final_scenario_df['RUN_DATE'] = final_scenario_df['END_DATE']\n",
|
||||
" return final_scenario_df\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
221
main.ipynb
221
main.ipynb
@ -1,33 +1,188 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": "from tms_data_interface import SQLQueryInterface\n\nclass Scenario:\n\tseq = SQLQueryInterface()\n\n\tdef logic(self, **kwargs):\n\t\t# Write your code here\n"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d3b830ff-94b7-4596-b85a-320060180a6b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"from tms_data_interface import SQLQueryInterface\n",
|
||||
"\n",
|
||||
"def apply_sar_flag(df, var1, var2, var3, random_state=42):\n",
|
||||
" \"\"\"\n",
|
||||
" Apply percentile-based thresholds, split data into alerting and non-alerting,\n",
|
||||
" flag random 10% of alerting data as 'Y', and merge back.\n",
|
||||
"\n",
|
||||
" Parameters:\n",
|
||||
" df (pd.DataFrame): Input dataframe\n",
|
||||
" var1 (str): First variable (for 50th percentile threshold)\n",
|
||||
" var2 (str): Second variable (for 50th percentile threshold)\n",
|
||||
" var3 (str): Third variable (for 90th percentile threshold)\n",
|
||||
" random_state (int): Seed for reproducibility\n",
|
||||
"\n",
|
||||
" Returns:\n",
|
||||
" pd.DataFrame: DataFrame with 'SAR_Flag' column added\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" # Calculate thresholds\n",
|
||||
" th1 = np.percentile(df[var1].dropna(), 90)\n",
|
||||
" th2 = np.percentile(df[var2].dropna(), 90)\n",
|
||||
" th3 = np.percentile(df[var3].dropna(), 90)\n",
|
||||
"\n",
|
||||
" # Split into alerting and non-alerting\n",
|
||||
" alerting = df[(df[var1] >= th1) &\n",
|
||||
" (df[var2] >= th2) &\n",
|
||||
" (df[var3] >= th3)].copy()\n",
|
||||
"\n",
|
||||
" non_alerting = df.loc[~df.index.isin(alerting.index)].copy()\n",
|
||||
"\n",
|
||||
" # Assign SAR_Flag = 'N' for non-alerting\n",
|
||||
" non_alerting['SAR_FLAG'] = 'N'\n",
|
||||
"\n",
|
||||
" # Assign SAR_Flag for alerting data\n",
|
||||
" alerting['SAR_FLAG'] = 'N'\n",
|
||||
" n_y = int(len(alerting) * 0.1) # 10% count\n",
|
||||
" if n_y > 0:\n",
|
||||
" y_indices = alerting.sample(n=n_y, random_state=random_state).index\n",
|
||||
" alerting.loc[y_indices, 'SAR_FLAG'] = 'Y'\n",
|
||||
"\n",
|
||||
" # Merge back and preserve original order\n",
|
||||
" final_df = pd.concat([alerting, non_alerting]).sort_index()\n",
|
||||
"\n",
|
||||
" return final_df\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
"WITH time_windows AS (\n",
|
||||
" SELECT\n",
|
||||
" -- End time is the current trade time\n",
|
||||
" date_time AS end_time,\n",
|
||||
"\n",
|
||||
" -- Subtract seconds from the end_time using date_add() with negative integer interval\n",
|
||||
" date_add('second', -{time_window_s}, date_time) AS start_time,\n",
|
||||
"\n",
|
||||
" -- Trade details\n",
|
||||
" trade_price,\n",
|
||||
" trade_volume,\n",
|
||||
" trader_id,\n",
|
||||
"\n",
|
||||
" -- Calculate minimum price within the time window\n",
|
||||
" MIN(trade_price) OVER (\n",
|
||||
" ORDER BY date_time \n",
|
||||
" RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
|
||||
" ) AS min_price,\n",
|
||||
"\n",
|
||||
" -- Calculate maximum price within the time window\n",
|
||||
" MAX(trade_price) OVER (\n",
|
||||
" ORDER BY date_time \n",
|
||||
" RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
|
||||
" ) AS max_price,\n",
|
||||
"\n",
|
||||
" -- Calculate total trade volume within the time window\n",
|
||||
" SUM(trade_volume) OVER ( \n",
|
||||
" ORDER BY date_time \n",
|
||||
" RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
|
||||
" ) AS total_volume,\n",
|
||||
"\n",
|
||||
" -- Calculate participant's trade volume within the time window\n",
|
||||
" SUM(CASE WHEN trader_id = trader_id THEN trade_volume ELSE 0 END) OVER (\n",
|
||||
" PARTITION BY trader_id \n",
|
||||
" ORDER BY date_time \n",
|
||||
" RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
|
||||
" ) AS participant_volume\n",
|
||||
" FROM\n",
|
||||
" {trade_data_1b}\n",
|
||||
")\n",
|
||||
"SELECT\n",
|
||||
" -- Select the time window details\n",
|
||||
" start_time,\n",
|
||||
" end_time,\n",
|
||||
"\n",
|
||||
" -- Select the participant (trader) ID\n",
|
||||
" trader_id AS \"Participant\",\n",
|
||||
"\n",
|
||||
" -- Select the calculated min and max prices\n",
|
||||
" min_price,\n",
|
||||
" max_price,\n",
|
||||
"\n",
|
||||
" -- Calculate the price change percentage\n",
|
||||
" (max_price - min_price) / NULLIF(min_price, 0) * 100 AS \"Price Change (%)\",\n",
|
||||
"\n",
|
||||
" -- Calculate the participant's volume as a percentage of total volume\n",
|
||||
" (participant_volume / NULLIF(total_volume, 0)) * 100 AS \"Volume (%)\",\n",
|
||||
"\n",
|
||||
" -- Participant volume\n",
|
||||
" participant_volume,\n",
|
||||
"\n",
|
||||
" -- Select the total volume within the window\n",
|
||||
" total_volume AS \"Total Volume\"\n",
|
||||
"FROM\n",
|
||||
" time_windows\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from tms_data_interface import SQLQueryInterface\n",
|
||||
"\n",
|
||||
"class Scenario:\n",
|
||||
" seq = SQLQueryInterface(schema=\"trade_schema\")\n",
|
||||
" def logic(self, **kwargs):\n",
|
||||
" validation_window = kwargs.get('validation_window', 300000)\n",
|
||||
" time_window_s = int(validation_window/1000)\n",
|
||||
" query_start_time = datetime.now()\n",
|
||||
" print(\"Query start time :\",query_start_time)\n",
|
||||
" row_list = self.seq.execute_raw(query.format(trade_data_1b=\"trade_10m_v3\",\n",
|
||||
" time_window_s = time_window_s)\n",
|
||||
" )\n",
|
||||
" cols = [\n",
|
||||
" 'START_DATE_TIME',\n",
|
||||
" 'END_DATE_TIME',\n",
|
||||
" 'Focal_id',\n",
|
||||
" 'MIN_PRICE',\n",
|
||||
" 'MAX_PRICE',\n",
|
||||
" 'PRICE_CHANGE_PCT',\n",
|
||||
" 'PARTICIPANT_VOLUME_PCT',\n",
|
||||
" 'PARTICIPANT_VOLUME',\n",
|
||||
" 'TOTAL_VOLUME',\n",
|
||||
" ]\n",
|
||||
" final_scenario_df = pd.DataFrame(row_list, columns = cols)\n",
|
||||
" final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME']/\\\n",
|
||||
" final_scenario_df['TOTAL_VOLUME'] * 100\n",
|
||||
" final_scenario_df['Segment'] = 'Default'\n",
|
||||
" # final_scenario_df['SAR_FLAG'] = 'N'\n",
|
||||
" final_scenario_df['Risk'] = 'Medium Risk'\n",
|
||||
" final_scenario_df.dropna(inplace=True)\n",
|
||||
" final_scenario_df = apply_sar_flag(final_scenario_df,\n",
|
||||
" 'PRICE_CHANGE_PCT',\n",
|
||||
" 'PARTICIPANT_VOLUME_PCT',\n",
|
||||
" 'TOTAL_VOLUME',\n",
|
||||
" random_state=42)\n",
|
||||
" # final_scenario_df['RUN_DATE'] = final_scenario_df['END_DATE']\n",
|
||||
" return final_scenario_df\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
161
main.py
Normal file
161
main.py
Normal file
@ -0,0 +1,161 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from tms_data_interface import SQLQueryInterface
|
||||
|
||||
def apply_sar_flag(df, var1, var2, var3, random_state=42):
|
||||
"""
|
||||
Apply percentile-based thresholds, split data into alerting and non-alerting,
|
||||
flag random 10% of alerting data as 'Y', and merge back.
|
||||
|
||||
Parameters:
|
||||
df (pd.DataFrame): Input dataframe
|
||||
var1 (str): First variable (for 50th percentile threshold)
|
||||
var2 (str): Second variable (for 50th percentile threshold)
|
||||
var3 (str): Third variable (for 90th percentile threshold)
|
||||
random_state (int): Seed for reproducibility
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: DataFrame with 'SAR_Flag' column added
|
||||
"""
|
||||
|
||||
# Calculate thresholds
|
||||
th1 = np.percentile(df[var1].dropna(), 90)
|
||||
th2 = np.percentile(df[var2].dropna(), 90)
|
||||
th3 = np.percentile(df[var3].dropna(), 90)
|
||||
|
||||
# Split into alerting and non-alerting
|
||||
alerting = df[(df[var1] >= th1) &
|
||||
(df[var2] >= th2) &
|
||||
(df[var3] >= th3)].copy()
|
||||
|
||||
non_alerting = df.loc[~df.index.isin(alerting.index)].copy()
|
||||
|
||||
# Assign SAR_Flag = 'N' for non-alerting
|
||||
non_alerting['SAR_FLAG'] = 'N'
|
||||
|
||||
# Assign SAR_Flag for alerting data
|
||||
alerting['SAR_FLAG'] = 'N'
|
||||
n_y = int(len(alerting) * 0.1) # 10% count
|
||||
if n_y > 0:
|
||||
y_indices = alerting.sample(n=n_y, random_state=random_state).index
|
||||
alerting.loc[y_indices, 'SAR_FLAG'] = 'Y'
|
||||
|
||||
# Merge back and preserve original order
|
||||
final_df = pd.concat([alerting, non_alerting]).sort_index()
|
||||
|
||||
return final_df
|
||||
|
||||
query = """
|
||||
WITH time_windows AS (
|
||||
SELECT
|
||||
-- End time is the current trade time
|
||||
date_time AS end_time,
|
||||
|
||||
-- Subtract seconds from the end_time using date_add() with negative integer interval
|
||||
date_add('second', -{time_window_s}, date_time) AS start_time,
|
||||
|
||||
-- Trade details
|
||||
trade_price,
|
||||
trade_volume,
|
||||
trader_id,
|
||||
|
||||
-- Calculate minimum price within the time window
|
||||
MIN(trade_price) OVER (
|
||||
ORDER BY date_time
|
||||
RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW
|
||||
) AS min_price,
|
||||
|
||||
-- Calculate maximum price within the time window
|
||||
MAX(trade_price) OVER (
|
||||
ORDER BY date_time
|
||||
RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW
|
||||
) AS max_price,
|
||||
|
||||
-- Calculate total trade volume within the time window
|
||||
SUM(trade_volume) OVER (
|
||||
ORDER BY date_time
|
||||
RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW
|
||||
) AS total_volume,
|
||||
|
||||
-- Calculate participant's trade volume within the time window
|
||||
SUM(CASE WHEN trader_id = trader_id THEN trade_volume ELSE 0 END) OVER (
|
||||
PARTITION BY trader_id
|
||||
ORDER BY date_time
|
||||
RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW
|
||||
) AS participant_volume
|
||||
FROM
|
||||
{trade_data_1b}
|
||||
)
|
||||
SELECT
|
||||
-- Select the time window details
|
||||
start_time,
|
||||
end_time,
|
||||
|
||||
-- Select the participant (trader) ID
|
||||
trader_id AS "Participant",
|
||||
|
||||
-- Select the calculated min and max prices
|
||||
min_price,
|
||||
max_price,
|
||||
|
||||
-- Calculate the price change percentage
|
||||
(max_price - min_price) / NULLIF(min_price, 0) * 100 AS "Price Change (%)",
|
||||
|
||||
-- Calculate the participant's volume as a percentage of total volume
|
||||
(participant_volume / NULLIF(total_volume, 0)) * 100 AS "Volume (%)",
|
||||
|
||||
-- Participant volume
|
||||
participant_volume,
|
||||
|
||||
-- Select the total volume within the window
|
||||
total_volume AS "Total Volume"
|
||||
FROM
|
||||
time_windows
|
||||
"""
|
||||
|
||||
|
||||
from tms_data_interface import SQLQueryInterface
|
||||
|
||||
class Scenario:
|
||||
seq = SQLQueryInterface(schema="trade_schema")
|
||||
def logic(self, **kwargs):
|
||||
validation_window = kwargs.get('validation_window', 300000)
|
||||
time_window_s = int(validation_window/1000)
|
||||
query_start_time = datetime.now()
|
||||
print("Query start time :",query_start_time)
|
||||
row_list = self.seq.execute_raw(query.format(trade_data_1b="trade_10m_v3",
|
||||
time_window_s = time_window_s)
|
||||
)
|
||||
cols = [
|
||||
'START_DATE_TIME',
|
||||
'END_DATE_TIME',
|
||||
'Focal_id',
|
||||
'MIN_PRICE',
|
||||
'MAX_PRICE',
|
||||
'PRICE_CHANGE_PCT',
|
||||
'PARTICIPANT_VOLUME_PCT',
|
||||
'PARTICIPANT_VOLUME',
|
||||
'TOTAL_VOLUME',
|
||||
]
|
||||
final_scenario_df = pd.DataFrame(row_list, columns = cols)
|
||||
final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME']/\
|
||||
final_scenario_df['TOTAL_VOLUME'] * 100
|
||||
final_scenario_df['Segment'] = 'Default'
|
||||
# final_scenario_df['SAR_FLAG'] = 'N'
|
||||
final_scenario_df['Risk'] = 'Medium Risk'
|
||||
final_scenario_df.dropna(inplace=True)
|
||||
final_scenario_df = apply_sar_flag(final_scenario_df,
|
||||
'PRICE_CHANGE_PCT',
|
||||
'PARTICIPANT_VOLUME_PCT',
|
||||
'TOTAL_VOLUME',
|
||||
random_state=42)
|
||||
# final_scenario_df['RUN_DATE'] = final_scenario_df['END_DATE']
|
||||
return final_scenario_df
|
||||
|
||||
Loading…
Reference in New Issue
Block a user