From cc3022fd4d92d0f560863c59195f5db47a68100b Mon Sep 17 00:00:00 2001
From: user_client2024 <shalini.v@solytics-partners.com>
Date: Thu, 27 Nov 2025 04:36:36 +0000
Subject: [PATCH] System save at 27/11/2025 10:06 by user_client2024

---
 .ipynb_checkpoints/main-checkpoint.ipynb | 22 ++++++++++++++++++++++
 main.ipynb                               | 22 ++++++++++++++++++++++
 main.py                                  | 22 ++++++++++++++++++++++
 3 files changed, 66 insertions(+)

diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb
index e553d03..87a7667 100644
--- a/.ipynb_checkpoints/main-checkpoint.ipynb
+++ b/.ipynb_checkpoints/main-checkpoint.ipynb
@@ -149,6 +149,28 @@
     "                \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
     "        df = pd.DataFrame(row_list, columns = cols)\n",
     "        # df['Segment'] = 'Individual'\n",
+    "        \n",
+    "        p98 = (\n",
+    "        df.groupby(\"Segment\")[\"Total_Wire_Deposit_Amt\"]\n",
+    "              .quantile(0.98)\n",
+    "              .reset_index()\n",
+    "              .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n",
+    "        )\n",
+    "\n",
+    "        # Merge percentile back to main dataframe\n",
+    "        df = df.merge(p98, on=\"Segment\", how=\"left\")\n",
+    "\n",
+    "        # Step 2: Identify population above 98th percentile\n",
+    "        high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n",
+    "\n",
+    "        # Step 3: From this high-risk population, select 0.1% random sample\n",
+    "        sample_fraction = 0.001    # 0.1%\n",
+    "        high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
+    "\n",
+    "        # Step 4: Assign SAR_FLAG\n",
+    "        df[\"SAR_FLAG\"] = \"N\"              # default for all\n",
+    "        df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\"   # assign Y to random 0.1% above 98th percentile\n",
+    "\n",
     "        return df"
    ]
   },
diff --git a/main.ipynb b/main.ipynb
index e553d03..87a7667 100644
--- a/main.ipynb
+++ b/main.ipynb
@@ -149,6 +149,28 @@
     "                \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
     "        df = pd.DataFrame(row_list, columns = cols)\n",
     "        # df['Segment'] = 'Individual'\n",
+    "        \n",
+    "        p98 = (\n",
+    "        df.groupby(\"Segment\")[\"Total_Wire_Deposit_Amt\"]\n",
+    "              .quantile(0.98)\n",
+    "              .reset_index()\n",
+    "              .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n",
+    "        )\n",
+    "\n",
+    "        # Merge percentile back to main dataframe\n",
+    "        df = df.merge(p98, on=\"Segment\", how=\"left\")\n",
+    "\n",
+    "        # Step 2: Identify population above 98th percentile\n",
+    "        high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n",
+    "\n",
+    "        # Step 3: From this high-risk population, select 0.1% random sample\n",
+    "        sample_fraction = 0.001    # 0.1%\n",
+    "        high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
+    "\n",
+    "        # Step 4: Assign SAR_FLAG\n",
+    "        df[\"SAR_FLAG\"] = \"N\"              # default for all\n",
+    "        df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\"   # assign Y to random 0.1% above 98th percentile\n",
+    "\n",
     "        return df"
    ]
   },
diff --git a/main.py b/main.py
index a92d4e7..f3efe41 100644
--- a/main.py
+++ b/main.py
@@ -102,6 +102,28 @@ class Scenario:
                 "Segment", "Risk", "SAR_FLAG"]
         df = pd.DataFrame(row_list, columns = cols)
         # df['Segment'] = 'Individual'
+        
+        p98 = (
+        df.groupby("Segment")["Total_Wire_Deposit_Amt"]
+              .quantile(0.98)
+              .reset_index()
+              .rename(columns={"Total_Wire_Deposit_Amt": "P98_Value"})
+        )
+
+        # Merge percentile back to main dataframe
+        df = df.merge(p98, on="Segment", how="left")
+
+        # Step 2: Identify population above 98th percentile
+        high_pop = df["Total_Wire_Deposit_Amt"] > df["P98_Value"]
+
+        # Step 3: From this high-risk population, select 0.1% random sample
+        sample_fraction = 0.001    # 0.1%
+        high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index
+
+        # Step 4: Assign SAR_FLAG
+        df["SAR_FLAG"] = "N"              # default for all
+        df.loc[high_pop_indices, "SAR_FLAG"] = "Y"   # assign Y to random 0.1% above 98th percentile
+
         return df