System save at 10/11/2025 14:52 by user_client2024

2025-11-10 09:22:23 +00:00 · 2025-11-10 09:22:23 +00:00 · 21612b735d
commit 21612b735d
parent 76bf79b3fd
3 changed files with 537 additions and 33 deletions
--- a/.ipynb_checkpoints/main-checkpoint.ipynb
+++ b/.ipynb_checkpoints/main-checkpoint.ipynb
@ -0,0 +1,188 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d3b830ff-94b7-4596-b85a-320060180a6b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datetime import datetime, timedelta\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from tms_data_interface import SQLQueryInterface\n",
+    "\n",
+    "def apply_sar_flag(df, var1, var2, var3, random_state=42):\n",
+    "    \"\"\"\n",
+    "    Apply percentile-based thresholds, split data into alerting and non-alerting,\n",
+    "    flag random 10% of alerting data as 'Y', and merge back.\n",
+    "\n",
+    "    Parameters:\n",
+    "        df (pd.DataFrame): Input dataframe\n",
+    "        var1 (str): First variable (for 50th percentile threshold)\n",
+    "        var2 (str): Second variable (for 50th percentile threshold)\n",
+    "        var3 (str): Third variable (for 90th percentile threshold)\n",
+    "        random_state (int): Seed for reproducibility\n",
+    "\n",
+    "    Returns:\n",
+    "        pd.DataFrame: DataFrame with 'SAR_Flag' column added\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Calculate thresholds\n",
+    "    th1 = np.percentile(df[var1].dropna(), 90)\n",
+    "    th2 = np.percentile(df[var2].dropna(), 90)\n",
+    "    th3 = np.percentile(df[var3].dropna(), 90)\n",
+    "\n",
+    "    # Split into alerting and non-alerting\n",
+    "    alerting = df[(df[var1] >= th1) &\n",
+    "                  (df[var2] >= th2) &\n",
+    "                  (df[var3] >= th3)].copy()\n",
+    "\n",
+    "    non_alerting = df.loc[~df.index.isin(alerting.index)].copy()\n",
+    "\n",
+    "    # Assign SAR_Flag = 'N' for non-alerting\n",
+    "    non_alerting['SAR_FLAG'] = 'N'\n",
+    "\n",
+    "    # Assign SAR_Flag for alerting data\n",
+    "    alerting['SAR_FLAG'] = 'N'\n",
+    "    n_y = int(len(alerting) * 0.1)   # 10% count\n",
+    "    if n_y > 0:\n",
+    "        y_indices = alerting.sample(n=n_y, random_state=random_state).index\n",
+    "        alerting.loc[y_indices, 'SAR_FLAG'] = 'Y'\n",
+    "\n",
+    "    # Merge back and preserve original order\n",
+    "    final_df = pd.concat([alerting, non_alerting]).sort_index()\n",
+    "\n",
+    "    return final_df\n",
+    "\n",
+    "query = \"\"\"\n",
+    "WITH time_windows AS (\n",
+    "    SELECT\n",
+    "        -- End time is the current trade time\n",
+    "        date_time AS end_time,\n",
+    "\n",
+    "        -- Subtract seconds from the end_time using date_add() with negative integer interval\n",
+    "        date_add('second', -{time_window_s}, date_time) AS start_time,\n",
+    "\n",
+    "        -- Trade details\n",
+    "        trade_price,\n",
+    "        trade_volume,\n",
+    "        trader_id,\n",
+    "\n",
+    "        -- Calculate minimum price within the time window\n",
+    "        MIN(trade_price) OVER (\n",
+    "            ORDER BY date_time \n",
+    "            RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
+    "        ) AS min_price,\n",
+    "\n",
+    "        -- Calculate maximum price within the time window\n",
+    "        MAX(trade_price) OVER (\n",
+    "            ORDER BY date_time \n",
+    "            RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
+    "        ) AS max_price,\n",
+    "\n",
+    "        -- Calculate total trade volume within the time window\n",
+    "        SUM(trade_volume) OVER ( \n",
+    "            ORDER BY date_time \n",
+    "            RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
+    "        ) AS total_volume,\n",
+    "\n",
+    "        -- Calculate participant's trade volume within the time window\n",
+    "        SUM(CASE WHEN trader_id = trader_id THEN trade_volume ELSE 0 END) OVER (\n",
+    "            PARTITION BY trader_id \n",
+    "            ORDER BY date_time \n",
+    "            RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
+    "        ) AS participant_volume\n",
+    "    FROM\n",
+    "        {trade_data_1b}\n",
+    ")\n",
+    "SELECT\n",
+    "    -- Select the time window details\n",
+    "    start_time,\n",
+    "    end_time,\n",
+    "\n",
+    "    -- Select the participant (trader) ID\n",
+    "    trader_id AS \"Participant\",\n",
+    "\n",
+    "    -- Select the calculated min and max prices\n",
+    "    min_price,\n",
+    "    max_price,\n",
+    "\n",
+    "    -- Calculate the price change percentage\n",
+    "    (max_price - min_price) / NULLIF(min_price, 0) * 100 AS \"Price Change (%)\",\n",
+    "\n",
+    "    -- Calculate the participant's volume as a percentage of total volume\n",
+    "    (participant_volume / NULLIF(total_volume, 0)) * 100 AS \"Volume (%)\",\n",
+    "\n",
+    "    --  Participant volume\n",
+    "    participant_volume,\n",
+    "\n",
+    "    -- Select the total volume within the window\n",
+    "    total_volume AS \"Total Volume\"\n",
+    "FROM\n",
+    "    time_windows\n",
+    "\"\"\"\n",
+    "\n",
+    "\n",
+    "from tms_data_interface import SQLQueryInterface\n",
+    "\n",
+    "class Scenario:\n",
+    "    seq = SQLQueryInterface(schema=\"trade_schema\")\n",
+    "    def logic(self, **kwargs):\n",
+    "        validation_window = kwargs.get('validation_window', 300000)\n",
+    "        time_window_s = int(validation_window/1000)\n",
+    "        query_start_time = datetime.now()\n",
+    "        print(\"Query start time :\",query_start_time)\n",
+    "        row_list = self.seq.execute_raw(query.format(trade_data_1b=\"trade_10m_v3\",\n",
+    "                                                    time_window_s = time_window_s)\n",
+    "                                       )\n",
+    "        cols = [\n",
+    "        'START_DATE_TIME',\n",
+    "        'END_DATE_TIME',\n",
+    "        'Focal_id',\n",
+    "        'MIN_PRICE',\n",
+    "        'MAX_PRICE',\n",
+    "        'PRICE_CHANGE_PCT',\n",
+    "        'PARTICIPANT_VOLUME_PCT',\n",
+    "        'PARTICIPANT_VOLUME',\n",
+    "        'TOTAL_VOLUME',\n",
+    "        ]\n",
+    "        final_scenario_df = pd.DataFrame(row_list, columns = cols)\n",
+    "        final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME']/\\\n",
+    "                                                        final_scenario_df['TOTAL_VOLUME'] * 100\n",
+    "        final_scenario_df['Segment'] = 'Default'\n",
+    "        # final_scenario_df['SAR_FLAG'] = 'N'\n",
+    "        final_scenario_df['Risk'] = 'Medium Risk'\n",
+    "        final_scenario_df.dropna(inplace=True)\n",
+    "        final_scenario_df = apply_sar_flag(final_scenario_df,\n",
+    "                                           'PRICE_CHANGE_PCT',\n",
+    "                                           'PARTICIPANT_VOLUME_PCT',\n",
+    "                                           'TOTAL_VOLUME',\n",
+    "                                           random_state=42)\n",
+    "        # final_scenario_df['RUN_DATE'] = final_scenario_df['END_DATE']\n",
+    "        return final_scenario_df\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/main.ipynb
+++ b/main.ipynb
@ -1,33 +1,188 @@
-{
-    "cells": [
-        {
-            "cell_type": "code",
-            "execution_count": null,
-            "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
-            "metadata": {},
-            "outputs": [],
-            "source": "from tms_data_interface import SQLQueryInterface\n\nclass Scenario:\n\tseq = SQLQueryInterface()\n\n\tdef logic(self, **kwargs):\n\t\t# Write your code here\n"
-        }
-    ],
-    "metadata": {
-        "kernelspec": {
-            "display_name": "Python 3 (ipykernel)",
-            "language": "python",
-            "name": "python3"
-        },
-        "language_info": {
-            "codemirror_mode": {
-                "name": "ipython",
-                "version": 3
-            },
-            "file_extension": ".py",
-            "mimetype": "text/x-python",
-            "name": "python",
-            "nbconvert_exporter": "python",
-            "pygments_lexer": "ipython3",
-            "version": "3.8.13"
-        }
-    },
-    "nbformat": 4,
-    "nbformat_minor": 5
-}
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d3b830ff-94b7-4596-b85a-320060180a6b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datetime import datetime, timedelta\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from tms_data_interface import SQLQueryInterface\n",
+    "\n",
+    "def apply_sar_flag(df, var1, var2, var3, random_state=42):\n",
+    "    \"\"\"\n",
+    "    Apply percentile-based thresholds, split data into alerting and non-alerting,\n",
+    "    flag random 10% of alerting data as 'Y', and merge back.\n",
+    "\n",
+    "    Parameters:\n",
+    "        df (pd.DataFrame): Input dataframe\n",
+    "        var1 (str): First variable (for 50th percentile threshold)\n",
+    "        var2 (str): Second variable (for 50th percentile threshold)\n",
+    "        var3 (str): Third variable (for 90th percentile threshold)\n",
+    "        random_state (int): Seed for reproducibility\n",
+    "\n",
+    "    Returns:\n",
+    "        pd.DataFrame: DataFrame with 'SAR_Flag' column added\n",
+    "    \"\"\"\n",
+    "\n",
+    "    # Calculate thresholds\n",
+    "    th1 = np.percentile(df[var1].dropna(), 90)\n",
+    "    th2 = np.percentile(df[var2].dropna(), 90)\n",
+    "    th3 = np.percentile(df[var3].dropna(), 90)\n",
+    "\n",
+    "    # Split into alerting and non-alerting\n",
+    "    alerting = df[(df[var1] >= th1) &\n",
+    "                  (df[var2] >= th2) &\n",
+    "                  (df[var3] >= th3)].copy()\n",
+    "\n",
+    "    non_alerting = df.loc[~df.index.isin(alerting.index)].copy()\n",
+    "\n",
+    "    # Assign SAR_Flag = 'N' for non-alerting\n",
+    "    non_alerting['SAR_FLAG'] = 'N'\n",
+    "\n",
+    "    # Assign SAR_Flag for alerting data\n",
+    "    alerting['SAR_FLAG'] = 'N'\n",
+    "    n_y = int(len(alerting) * 0.1)   # 10% count\n",
+    "    if n_y > 0:\n",
+    "        y_indices = alerting.sample(n=n_y, random_state=random_state).index\n",
+    "        alerting.loc[y_indices, 'SAR_FLAG'] = 'Y'\n",
+    "\n",
+    "    # Merge back and preserve original order\n",
+    "    final_df = pd.concat([alerting, non_alerting]).sort_index()\n",
+    "\n",
+    "    return final_df\n",
+    "\n",
+    "query = \"\"\"\n",
+    "WITH time_windows AS (\n",
+    "    SELECT\n",
+    "        -- End time is the current trade time\n",
+    "        date_time AS end_time,\n",
+    "\n",
+    "        -- Subtract seconds from the end_time using date_add() with negative integer interval\n",
+    "        date_add('second', -{time_window_s}, date_time) AS start_time,\n",
+    "\n",
+    "        -- Trade details\n",
+    "        trade_price,\n",
+    "        trade_volume,\n",
+    "        trader_id,\n",
+    "\n",
+    "        -- Calculate minimum price within the time window\n",
+    "        MIN(trade_price) OVER (\n",
+    "            ORDER BY date_time \n",
+    "            RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
+    "        ) AS min_price,\n",
+    "\n",
+    "        -- Calculate maximum price within the time window\n",
+    "        MAX(trade_price) OVER (\n",
+    "            ORDER BY date_time \n",
+    "            RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
+    "        ) AS max_price,\n",
+    "\n",
+    "        -- Calculate total trade volume within the time window\n",
+    "        SUM(trade_volume) OVER ( \n",
+    "            ORDER BY date_time \n",
+    "            RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
+    "        ) AS total_volume,\n",
+    "\n",
+    "        -- Calculate participant's trade volume within the time window\n",
+    "        SUM(CASE WHEN trader_id = trader_id THEN trade_volume ELSE 0 END) OVER (\n",
+    "            PARTITION BY trader_id \n",
+    "            ORDER BY date_time \n",
+    "            RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW\n",
+    "        ) AS participant_volume\n",
+    "    FROM\n",
+    "        {trade_data_1b}\n",
+    ")\n",
+    "SELECT\n",
+    "    -- Select the time window details\n",
+    "    start_time,\n",
+    "    end_time,\n",
+    "\n",
+    "    -- Select the participant (trader) ID\n",
+    "    trader_id AS \"Participant\",\n",
+    "\n",
+    "    -- Select the calculated min and max prices\n",
+    "    min_price,\n",
+    "    max_price,\n",
+    "\n",
+    "    -- Calculate the price change percentage\n",
+    "    (max_price - min_price) / NULLIF(min_price, 0) * 100 AS \"Price Change (%)\",\n",
+    "\n",
+    "    -- Calculate the participant's volume as a percentage of total volume\n",
+    "    (participant_volume / NULLIF(total_volume, 0)) * 100 AS \"Volume (%)\",\n",
+    "\n",
+    "    --  Participant volume\n",
+    "    participant_volume,\n",
+    "\n",
+    "    -- Select the total volume within the window\n",
+    "    total_volume AS \"Total Volume\"\n",
+    "FROM\n",
+    "    time_windows\n",
+    "\"\"\"\n",
+    "\n",
+    "\n",
+    "from tms_data_interface import SQLQueryInterface\n",
+    "\n",
+    "class Scenario:\n",
+    "    seq = SQLQueryInterface(schema=\"trade_schema\")\n",
+    "    def logic(self, **kwargs):\n",
+    "        validation_window = kwargs.get('validation_window', 300000)\n",
+    "        time_window_s = int(validation_window/1000)\n",
+    "        query_start_time = datetime.now()\n",
+    "        print(\"Query start time :\",query_start_time)\n",
+    "        row_list = self.seq.execute_raw(query.format(trade_data_1b=\"trade_10m_v3\",\n",
+    "                                                    time_window_s = time_window_s)\n",
+    "                                       )\n",
+    "        cols = [\n",
+    "        'START_DATE_TIME',\n",
+    "        'END_DATE_TIME',\n",
+    "        'Focal_id',\n",
+    "        'MIN_PRICE',\n",
+    "        'MAX_PRICE',\n",
+    "        'PRICE_CHANGE_PCT',\n",
+    "        'PARTICIPANT_VOLUME_PCT',\n",
+    "        'PARTICIPANT_VOLUME',\n",
+    "        'TOTAL_VOLUME',\n",
+    "        ]\n",
+    "        final_scenario_df = pd.DataFrame(row_list, columns = cols)\n",
+    "        final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME']/\\\n",
+    "                                                        final_scenario_df['TOTAL_VOLUME'] * 100\n",
+    "        final_scenario_df['Segment'] = 'Default'\n",
+    "        # final_scenario_df['SAR_FLAG'] = 'N'\n",
+    "        final_scenario_df['Risk'] = 'Medium Risk'\n",
+    "        final_scenario_df.dropna(inplace=True)\n",
+    "        final_scenario_df = apply_sar_flag(final_scenario_df,\n",
+    "                                           'PRICE_CHANGE_PCT',\n",
+    "                                           'PARTICIPANT_VOLUME_PCT',\n",
+    "                                           'TOTAL_VOLUME',\n",
+    "                                           random_state=42)\n",
+    "        # final_scenario_df['RUN_DATE'] = final_scenario_df['END_DATE']\n",
+    "        return final_scenario_df\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/main.py
+++ b/main.py
@ -0,0 +1,161 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[ ]:
+
+
+from datetime import datetime, timedelta
+import pandas as pd
+import numpy as np
+from tms_data_interface import SQLQueryInterface
+
+def apply_sar_flag(df, var1, var2, var3, random_state=42):
+    """
+    Apply percentile-based thresholds, split data into alerting and non-alerting,
+    flag random 10% of alerting data as 'Y', and merge back.
+
+    Parameters:
+        df (pd.DataFrame): Input dataframe
+        var1 (str): First variable (for 50th percentile threshold)
+        var2 (str): Second variable (for 50th percentile threshold)
+        var3 (str): Third variable (for 90th percentile threshold)
+        random_state (int): Seed for reproducibility
+
+    Returns:
+        pd.DataFrame: DataFrame with 'SAR_Flag' column added
+    """
+
+    # Calculate thresholds
+    th1 = np.percentile(df[var1].dropna(), 90)
+    th2 = np.percentile(df[var2].dropna(), 90)
+    th3 = np.percentile(df[var3].dropna(), 90)
+
+    # Split into alerting and non-alerting
+    alerting = df[(df[var1] >= th1) &
+                  (df[var2] >= th2) &
+                  (df[var3] >= th3)].copy()
+
+    non_alerting = df.loc[~df.index.isin(alerting.index)].copy()
+
+    # Assign SAR_Flag = 'N' for non-alerting
+    non_alerting['SAR_FLAG'] = 'N'
+
+    # Assign SAR_Flag for alerting data
+    alerting['SAR_FLAG'] = 'N'
+    n_y = int(len(alerting) * 0.1)   # 10% count
+    if n_y > 0:
+        y_indices = alerting.sample(n=n_y, random_state=random_state).index
+        alerting.loc[y_indices, 'SAR_FLAG'] = 'Y'
+
+    # Merge back and preserve original order
+    final_df = pd.concat([alerting, non_alerting]).sort_index()
+
+    return final_df
+
+query = """
+WITH time_windows AS (
+    SELECT
+        -- End time is the current trade time
+        date_time AS end_time,
+
+        -- Subtract seconds from the end_time using date_add() with negative integer interval
+        date_add('second', -{time_window_s}, date_time) AS start_time,
+
+        -- Trade details
+        trade_price,
+        trade_volume,
+        trader_id,
+
+        -- Calculate minimum price within the time window
+        MIN(trade_price) OVER (
+            ORDER BY date_time 
+            RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW
+        ) AS min_price,
+
+        -- Calculate maximum price within the time window
+        MAX(trade_price) OVER (
+            ORDER BY date_time 
+            RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW
+        ) AS max_price,
+
+        -- Calculate total trade volume within the time window
+        SUM(trade_volume) OVER ( 
+            ORDER BY date_time 
+            RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW
+        ) AS total_volume,
+
+        -- Calculate participant's trade volume within the time window
+        SUM(CASE WHEN trader_id = trader_id THEN trade_volume ELSE 0 END) OVER (
+            PARTITION BY trader_id 
+            ORDER BY date_time 
+            RANGE BETWEEN INTERVAL '{time_window_s}' SECOND PRECEDING AND CURRENT ROW
+        ) AS participant_volume
+    FROM
+        {trade_data_1b}
+)
+SELECT
+    -- Select the time window details
+    start_time,
+    end_time,
+
+    -- Select the participant (trader) ID
+    trader_id AS "Participant",
+
+    -- Select the calculated min and max prices
+    min_price,
+    max_price,
+
+    -- Calculate the price change percentage
+    (max_price - min_price) / NULLIF(min_price, 0) * 100 AS "Price Change (%)",
+
+    -- Calculate the participant's volume as a percentage of total volume
+    (participant_volume / NULLIF(total_volume, 0)) * 100 AS "Volume (%)",
+
+    --  Participant volume
+    participant_volume,
+
+    -- Select the total volume within the window
+    total_volume AS "Total Volume"
+FROM
+    time_windows
+"""
+
+
+from tms_data_interface import SQLQueryInterface
+
+class Scenario:
+    seq = SQLQueryInterface(schema="trade_schema")
+    def logic(self, **kwargs):
+        validation_window = kwargs.get('validation_window', 300000)
+        time_window_s = int(validation_window/1000)
+        query_start_time = datetime.now()
+        print("Query start time :",query_start_time)
+        row_list = self.seq.execute_raw(query.format(trade_data_1b="trade_10m_v3",
+                                                    time_window_s = time_window_s)
+                                       )
+        cols = [
+        'START_DATE_TIME',
+        'END_DATE_TIME',
+        'Focal_id',
+        'MIN_PRICE',
+        'MAX_PRICE',
+        'PRICE_CHANGE_PCT',
+        'PARTICIPANT_VOLUME_PCT',
+        'PARTICIPANT_VOLUME',
+        'TOTAL_VOLUME',
+        ]
+        final_scenario_df = pd.DataFrame(row_list, columns = cols)
+        final_scenario_df['PARTICIPANT_VOLUME_PCT'] = final_scenario_df['PARTICIPANT_VOLUME']/\
+                                                        final_scenario_df['TOTAL_VOLUME'] * 100
+        final_scenario_df['Segment'] = 'Default'
+        # final_scenario_df['SAR_FLAG'] = 'N'
+        final_scenario_df['Risk'] = 'Medium Risk'
+        final_scenario_df.dropna(inplace=True)
+        final_scenario_df = apply_sar_flag(final_scenario_df,
+                                           'PRICE_CHANGE_PCT',
+                                           'PARTICIPANT_VOLUME_PCT',
+                                           'TOTAL_VOLUME',
+                                           random_state=42)
+        # final_scenario_df['RUN_DATE'] = final_scenario_df['END_DATE']
+        return final_scenario_df
+