diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb
index d4445a1..d0ae983 100644
--- a/.ipynb_checkpoints/main-checkpoint.ipynb
+++ b/.ipynb_checkpoints/main-checkpoint.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
    "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
    "metadata": {
     "tags": []
@@ -57,7 +57,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "id": "134d0b3d-5481-4975-af07-c80ab09d6dd2",
    "metadata": {
     "tags": []
@@ -157,7 +157,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 34,
    "id": "d220561a-34c9-48d2-8e2f-5d174a87540b",
    "metadata": {
     "tags": []
@@ -180,247 +180,79 @@
     "                \"Debit_transaction_amount\", \"Total_no_of_debit_transactions\",\n",
     "                \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
     "        df = pd.DataFrame(row_list, columns = cols)\n",
+    "        \n",
+    "        # Step 1: Compute 90th percentiles per Segment for all 3 fields\n",
+    "        percentiles = (\n",
+    "            df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n",
+    "                                   \"Debit_transaction_amount\",\n",
+    "                                   \"Wash_Ratio\"]]\n",
+    "              .quantile(0.98)\n",
+    "              .reset_index()\n",
+    "        )\n",
+    "\n",
+    "        # Rename columns for clarity\n",
+    "        percentiles = percentiles.rename(columns={\n",
+    "            \"Credit_transaction_amount\": \"P90_Credit\",\n",
+    "            \"Debit_transaction_amount\": \"P90_Debit\",\n",
+    "            \"Wash_Ratio\": \"P90_Wash\"\n",
+    "        })\n",
+    "\n",
+    "        # Step 2: Merge back to main df\n",
+    "        df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n",
+    "\n",
+    "        # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n",
+    "        high_pop = (\n",
+    "            (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) |\n",
+    "            (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) |\n",
+    "            (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n",
+    "        )\n",
+    "\n",
+    "        # Step 4: Randomly select 0.1% sample from high-risk population\n",
+    "        sample_fraction = 0.1   # 0.1%\n",
+    "        high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
+    "\n",
+    "        # Step 5: Set SAR_FLAG values\n",
+    "        df[\"SAR_FLAG\"] = \"N\"   # default for all\n",
+    "        df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\"   # assign Y to 0.1% random high-risk population\n",
     "        return df"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 40,
    "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a",
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Focal_id</th>\n",
-       "      <th>Credit_transaction_amount</th>\n",
-       "      <th>Total_no_of_credit_transactions</th>\n",
-       "      <th>Debit_transaction_amount</th>\n",
-       "      <th>Total_no_of_debit_transactions</th>\n",
-       "      <th>Wash_Ratio</th>\n",
-       "      <th>Segment</th>\n",
-       "      <th>Risk</th>\n",
-       "      <th>SAR_FLAG</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>PN808624</td>\n",
-       "      <td>4.601504e+09</td>\n",
-       "      <td>3239</td>\n",
-       "      <td>4.461280e+09</td>\n",
-       "      <td>3129</td>\n",
-       "      <td>1.031431</td>\n",
-       "      <td>Corporate Banking</td>\n",
-       "      <td>Medium Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>PN663041</td>\n",
-       "      <td>2.106224e+09</td>\n",
-       "      <td>1573</td>\n",
-       "      <td>2.281829e+09</td>\n",
-       "      <td>1563</td>\n",
-       "      <td>0.923042</td>\n",
-       "      <td>Corporate Banking</td>\n",
-       "      <td>Low Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>PN525913</td>\n",
-       "      <td>1.057799e+09</td>\n",
-       "      <td>776</td>\n",
-       "      <td>1.223876e+09</td>\n",
-       "      <td>850</td>\n",
-       "      <td>0.864302</td>\n",
-       "      <td>Whole Sale Banking</td>\n",
-       "      <td>Low Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>PN440274</td>\n",
-       "      <td>4.806265e+09</td>\n",
-       "      <td>3506</td>\n",
-       "      <td>4.972813e+09</td>\n",
-       "      <td>3599</td>\n",
-       "      <td>0.966508</td>\n",
-       "      <td>Whole Sale Banking</td>\n",
-       "      <td>Medium Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>PN213026</td>\n",
-       "      <td>3.982349e+09</td>\n",
-       "      <td>2809</td>\n",
-       "      <td>4.122674e+09</td>\n",
-       "      <td>2783</td>\n",
-       "      <td>0.965963</td>\n",
-       "      <td>Whole Sale Banking</td>\n",
-       "      <td>Medium Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10009</th>\n",
-       "      <td>PN774741</td>\n",
-       "      <td>3.373466e+07</td>\n",
-       "      <td>250</td>\n",
-       "      <td>2.443148e+07</td>\n",
-       "      <td>381</td>\n",
-       "      <td>1.380787</td>\n",
-       "      <td>Priority Banking</td>\n",
-       "      <td>Medium Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10010</th>\n",
-       "      <td>PN868326</td>\n",
-       "      <td>3.785344e+07</td>\n",
-       "      <td>259</td>\n",
-       "      <td>2.408309e+07</td>\n",
-       "      <td>352</td>\n",
-       "      <td>1.571785</td>\n",
-       "      <td>Ultra High NetWorth</td>\n",
-       "      <td>Medium Risk</td>\n",
-       "      <td>Y</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10011</th>\n",
-       "      <td>PN667837</td>\n",
-       "      <td>3.330357e+07</td>\n",
-       "      <td>256</td>\n",
-       "      <td>2.676301e+07</td>\n",
-       "      <td>359</td>\n",
-       "      <td>1.244388</td>\n",
-       "      <td>Mass Market</td>\n",
-       "      <td>Medium Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10012</th>\n",
-       "      <td>PN809566</td>\n",
-       "      <td>3.890076e+07</td>\n",
-       "      <td>276</td>\n",
-       "      <td>2.554121e+07</td>\n",
-       "      <td>400</td>\n",
-       "      <td>1.523059</td>\n",
-       "      <td>Others</td>\n",
-       "      <td>Low Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10013</th>\n",
-       "      <td>PN739647</td>\n",
-       "      <td>3.505184e+07</td>\n",
-       "      <td>223</td>\n",
-       "      <td>2.232980e+07</td>\n",
-       "      <td>381</td>\n",
-       "      <td>1.569734</td>\n",
-       "      <td>Others</td>\n",
-       "      <td>Low Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>10014 rows × 9 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "       Focal_id  Credit_transaction_amount  Total_no_of_credit_transactions  \\\n",
-       "0      PN808624               4.601504e+09                             3239   \n",
-       "1      PN663041               2.106224e+09                             1573   \n",
-       "2      PN525913               1.057799e+09                              776   \n",
-       "3      PN440274               4.806265e+09                             3506   \n",
-       "4      PN213026               3.982349e+09                             2809   \n",
-       "...         ...                        ...                              ...   \n",
-       "10009  PN774741               3.373466e+07                              250   \n",
-       "10010  PN868326               3.785344e+07                              259   \n",
-       "10011  PN667837               3.330357e+07                              256   \n",
-       "10012  PN809566               3.890076e+07                              276   \n",
-       "10013  PN739647               3.505184e+07                              223   \n",
-       "\n",
-       "       Debit_transaction_amount  Total_no_of_debit_transactions  Wash_Ratio  \\\n",
-       "0                  4.461280e+09                            3129    1.031431   \n",
-       "1                  2.281829e+09                            1563    0.923042   \n",
-       "2                  1.223876e+09                             850    0.864302   \n",
-       "3                  4.972813e+09                            3599    0.966508   \n",
-       "4                  4.122674e+09                            2783    0.965963   \n",
-       "...                         ...                             ...         ...   \n",
-       "10009              2.443148e+07                             381    1.380787   \n",
-       "10010              2.408309e+07                             352    1.571785   \n",
-       "10011              2.676301e+07                             359    1.244388   \n",
-       "10012              2.554121e+07                             400    1.523059   \n",
-       "10013              2.232980e+07                             381    1.569734   \n",
-       "\n",
-       "                   Segment         Risk SAR_FLAG  \n",
-       "0        Corporate Banking  Medium Risk        N  \n",
-       "1        Corporate Banking     Low Risk        N  \n",
-       "2       Whole Sale Banking     Low Risk        N  \n",
-       "3       Whole Sale Banking  Medium Risk        N  \n",
-       "4       Whole Sale Banking  Medium Risk        N  \n",
-       "...                    ...          ...      ...  \n",
-       "10009     Priority Banking  Medium Risk        N  \n",
-       "10010  Ultra High NetWorth  Medium Risk        Y  \n",
-       "10011          Mass Market  Medium Risk        N  \n",
-       "10012               Others     Low Risk        N  \n",
-       "10013               Others     Low Risk        N  \n",
-       "\n",
-       "[10014 rows x 9 columns]"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# sen = Scenario()\n",
-    "# sen.logic()"
+    "# a = sen.logic()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "150bb5ce-6be1-44fc-a606-6d375354626d",
-   "metadata": {},
+   "execution_count": 39,
+   "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a",
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
-   "source": []
+   "source": [
+    "# a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "150bb5ce-6be1-44fc-a606-6d375354626d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# a[a[\"SAR_FLAG\"] == \"Y\"]"
+   ]
   }
  ],
  "metadata": {
diff --git a/main.ipynb b/main.ipynb
index d4445a1..d0ae983 100644
--- a/main.ipynb
+++ b/main.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
    "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
    "metadata": {
     "tags": []
@@ -57,7 +57,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "id": "134d0b3d-5481-4975-af07-c80ab09d6dd2",
    "metadata": {
     "tags": []
@@ -157,7 +157,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 34,
    "id": "d220561a-34c9-48d2-8e2f-5d174a87540b",
    "metadata": {
     "tags": []
@@ -180,247 +180,79 @@
     "                \"Debit_transaction_amount\", \"Total_no_of_debit_transactions\",\n",
     "                \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
     "        df = pd.DataFrame(row_list, columns = cols)\n",
+    "        \n",
+    "        # Step 1: Compute 90th percentiles per Segment for all 3 fields\n",
+    "        percentiles = (\n",
+    "            df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n",
+    "                                   \"Debit_transaction_amount\",\n",
+    "                                   \"Wash_Ratio\"]]\n",
+    "              .quantile(0.98)\n",
+    "              .reset_index()\n",
+    "        )\n",
+    "\n",
+    "        # Rename columns for clarity\n",
+    "        percentiles = percentiles.rename(columns={\n",
+    "            \"Credit_transaction_amount\": \"P90_Credit\",\n",
+    "            \"Debit_transaction_amount\": \"P90_Debit\",\n",
+    "            \"Wash_Ratio\": \"P90_Wash\"\n",
+    "        })\n",
+    "\n",
+    "        # Step 2: Merge back to main df\n",
+    "        df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n",
+    "\n",
+    "        # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n",
+    "        high_pop = (\n",
+    "            (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) |\n",
+    "            (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) |\n",
+    "            (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n",
+    "        )\n",
+    "\n",
+    "        # Step 4: Randomly select 0.1% sample from high-risk population\n",
+    "        sample_fraction = 0.1   # 0.1%\n",
+    "        high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
+    "\n",
+    "        # Step 5: Set SAR_FLAG values\n",
+    "        df[\"SAR_FLAG\"] = \"N\"   # default for all\n",
+    "        df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\"   # assign Y to 0.1% random high-risk population\n",
     "        return df"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 40,
    "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a",
    "metadata": {
     "tags": []
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Focal_id</th>\n",
-       "      <th>Credit_transaction_amount</th>\n",
-       "      <th>Total_no_of_credit_transactions</th>\n",
-       "      <th>Debit_transaction_amount</th>\n",
-       "      <th>Total_no_of_debit_transactions</th>\n",
-       "      <th>Wash_Ratio</th>\n",
-       "      <th>Segment</th>\n",
-       "      <th>Risk</th>\n",
-       "      <th>SAR_FLAG</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>PN808624</td>\n",
-       "      <td>4.601504e+09</td>\n",
-       "      <td>3239</td>\n",
-       "      <td>4.461280e+09</td>\n",
-       "      <td>3129</td>\n",
-       "      <td>1.031431</td>\n",
-       "      <td>Corporate Banking</td>\n",
-       "      <td>Medium Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>PN663041</td>\n",
-       "      <td>2.106224e+09</td>\n",
-       "      <td>1573</td>\n",
-       "      <td>2.281829e+09</td>\n",
-       "      <td>1563</td>\n",
-       "      <td>0.923042</td>\n",
-       "      <td>Corporate Banking</td>\n",
-       "      <td>Low Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>PN525913</td>\n",
-       "      <td>1.057799e+09</td>\n",
-       "      <td>776</td>\n",
-       "      <td>1.223876e+09</td>\n",
-       "      <td>850</td>\n",
-       "      <td>0.864302</td>\n",
-       "      <td>Whole Sale Banking</td>\n",
-       "      <td>Low Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>PN440274</td>\n",
-       "      <td>4.806265e+09</td>\n",
-       "      <td>3506</td>\n",
-       "      <td>4.972813e+09</td>\n",
-       "      <td>3599</td>\n",
-       "      <td>0.966508</td>\n",
-       "      <td>Whole Sale Banking</td>\n",
-       "      <td>Medium Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>PN213026</td>\n",
-       "      <td>3.982349e+09</td>\n",
-       "      <td>2809</td>\n",
-       "      <td>4.122674e+09</td>\n",
-       "      <td>2783</td>\n",
-       "      <td>0.965963</td>\n",
-       "      <td>Whole Sale Banking</td>\n",
-       "      <td>Medium Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10009</th>\n",
-       "      <td>PN774741</td>\n",
-       "      <td>3.373466e+07</td>\n",
-       "      <td>250</td>\n",
-       "      <td>2.443148e+07</td>\n",
-       "      <td>381</td>\n",
-       "      <td>1.380787</td>\n",
-       "      <td>Priority Banking</td>\n",
-       "      <td>Medium Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10010</th>\n",
-       "      <td>PN868326</td>\n",
-       "      <td>3.785344e+07</td>\n",
-       "      <td>259</td>\n",
-       "      <td>2.408309e+07</td>\n",
-       "      <td>352</td>\n",
-       "      <td>1.571785</td>\n",
-       "      <td>Ultra High NetWorth</td>\n",
-       "      <td>Medium Risk</td>\n",
-       "      <td>Y</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10011</th>\n",
-       "      <td>PN667837</td>\n",
-       "      <td>3.330357e+07</td>\n",
-       "      <td>256</td>\n",
-       "      <td>2.676301e+07</td>\n",
-       "      <td>359</td>\n",
-       "      <td>1.244388</td>\n",
-       "      <td>Mass Market</td>\n",
-       "      <td>Medium Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10012</th>\n",
-       "      <td>PN809566</td>\n",
-       "      <td>3.890076e+07</td>\n",
-       "      <td>276</td>\n",
-       "      <td>2.554121e+07</td>\n",
-       "      <td>400</td>\n",
-       "      <td>1.523059</td>\n",
-       "      <td>Others</td>\n",
-       "      <td>Low Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10013</th>\n",
-       "      <td>PN739647</td>\n",
-       "      <td>3.505184e+07</td>\n",
-       "      <td>223</td>\n",
-       "      <td>2.232980e+07</td>\n",
-       "      <td>381</td>\n",
-       "      <td>1.569734</td>\n",
-       "      <td>Others</td>\n",
-       "      <td>Low Risk</td>\n",
-       "      <td>N</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>10014 rows × 9 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "       Focal_id  Credit_transaction_amount  Total_no_of_credit_transactions  \\\n",
-       "0      PN808624               4.601504e+09                             3239   \n",
-       "1      PN663041               2.106224e+09                             1573   \n",
-       "2      PN525913               1.057799e+09                              776   \n",
-       "3      PN440274               4.806265e+09                             3506   \n",
-       "4      PN213026               3.982349e+09                             2809   \n",
-       "...         ...                        ...                              ...   \n",
-       "10009  PN774741               3.373466e+07                              250   \n",
-       "10010  PN868326               3.785344e+07                              259   \n",
-       "10011  PN667837               3.330357e+07                              256   \n",
-       "10012  PN809566               3.890076e+07                              276   \n",
-       "10013  PN739647               3.505184e+07                              223   \n",
-       "\n",
-       "       Debit_transaction_amount  Total_no_of_debit_transactions  Wash_Ratio  \\\n",
-       "0                  4.461280e+09                            3129    1.031431   \n",
-       "1                  2.281829e+09                            1563    0.923042   \n",
-       "2                  1.223876e+09                             850    0.864302   \n",
-       "3                  4.972813e+09                            3599    0.966508   \n",
-       "4                  4.122674e+09                            2783    0.965963   \n",
-       "...                         ...                             ...         ...   \n",
-       "10009              2.443148e+07                             381    1.380787   \n",
-       "10010              2.408309e+07                             352    1.571785   \n",
-       "10011              2.676301e+07                             359    1.244388   \n",
-       "10012              2.554121e+07                             400    1.523059   \n",
-       "10013              2.232980e+07                             381    1.569734   \n",
-       "\n",
-       "                   Segment         Risk SAR_FLAG  \n",
-       "0        Corporate Banking  Medium Risk        N  \n",
-       "1        Corporate Banking     Low Risk        N  \n",
-       "2       Whole Sale Banking     Low Risk        N  \n",
-       "3       Whole Sale Banking  Medium Risk        N  \n",
-       "4       Whole Sale Banking  Medium Risk        N  \n",
-       "...                    ...          ...      ...  \n",
-       "10009     Priority Banking  Medium Risk        N  \n",
-       "10010  Ultra High NetWorth  Medium Risk        Y  \n",
-       "10011          Mass Market  Medium Risk        N  \n",
-       "10012               Others     Low Risk        N  \n",
-       "10013               Others     Low Risk        N  \n",
-       "\n",
-       "[10014 rows x 9 columns]"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# sen = Scenario()\n",
-    "# sen.logic()"
+    "# a = sen.logic()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "150bb5ce-6be1-44fc-a606-6d375354626d",
-   "metadata": {},
+   "execution_count": 39,
+   "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a",
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
-   "source": []
+   "source": [
+    "# a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "150bb5ce-6be1-44fc-a606-6d375354626d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# a[a[\"SAR_FLAG\"] == \"Y\"]"
+   ]
   }
  ],
  "metadata": {
diff --git a/main.py b/main.py
index 2f5e9af..98e9fe8 100644
--- a/main.py
+++ b/main.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # coding: utf-8
 
-# In[1]:
+# In[4]:
 
 
 import pandas as pd
@@ -20,7 +20,7 @@ seq = SQLQueryInterface(schema="transactionschema")
 seq.execute_raw("show tables")
 
 
-# In[7]:
+# In[6]:
 
 
 query = """
@@ -114,7 +114,7 @@ query = """
 """
 
 
-# In[8]:
+# In[34]:
 
 
 from tms_data_interface import SQLQueryInterface
@@ -133,18 +133,58 @@ class Scenario:
                 "Debit_transaction_amount", "Total_no_of_debit_transactions",
                 "Wash_Ratio", "Segment", "Risk", "SAR_FLAG"]
         df = pd.DataFrame(row_list, columns = cols)
+        
+        # Step 1: Compute 90th percentiles per Segment for all 3 fields
+        percentiles = (
+            df.groupby("Segment")[["Credit_transaction_amount",
+                                   "Debit_transaction_amount",
+                                   "Wash_Ratio"]]
+              .quantile(0.98)
+              .reset_index()
+        )
+
+        # Rename columns for clarity
+        percentiles = percentiles.rename(columns={
+            "Credit_transaction_amount": "P90_Credit",
+            "Debit_transaction_amount": "P90_Debit",
+            "Wash_Ratio": "P90_Wash"
+        })
+
+        # Step 2: Merge back to main df
+        df = df.merge(percentiles, on="Segment", how="left")
+
+        # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics
+        high_pop = (
+            (df["Credit_transaction_amount"] > df["P90_Credit"]) |
+            (df["Debit_transaction_amount"] > df["P90_Debit"]) |
+            (df["Wash_Ratio"] > df["P90_Wash"])
+        )
+
+        # Step 4: Randomly select 0.1% sample from high-risk population
+        sample_fraction = 0.1   # 0.1%
+        high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index
+
+        # Step 5: Set SAR_FLAG values
+        df["SAR_FLAG"] = "N"   # default for all
+        df.loc[high_pop_indices, "SAR_FLAG"] = "Y"   # assign Y to 0.1% random high-risk population
         return df
 
 
-# In[9]:
+# In[40]:
 
 
 # sen = Scenario()
-# sen.logic()
+# a = sen.logic()
 
 
-# In[ ]:
+# In[39]:
 
 
+# a
 
 
+# In[38]:
+
+
+# a[a["SAR_FLAG"] == "Y"]
+

	Focal_id	Credit_transaction_amount	Total_no_of_credit_transactions	Debit_transaction_amount	Total_no_of_debit_transactions	Wash_Ratio	Segment	Risk	SAR_FLAG
0	PN808624	4.601504e+09	3239	4.461280e+09	3129	1.031431	Corporate Banking	Medium Risk	N
1	PN663041	2.106224e+09	1573	2.281829e+09	1563	0.923042	Corporate Banking	Low Risk	N
2	PN525913	1.057799e+09	776	1.223876e+09	850	0.864302	Whole Sale Banking	Low Risk	N
3	PN440274	4.806265e+09	3506	4.972813e+09	3599	0.966508	Whole Sale Banking	Medium Risk	N
4	PN213026	3.982349e+09	2809	4.122674e+09	2783	0.965963	Whole Sale Banking	Medium Risk	N
...	...	...	...	...	...	...	...	...	...
10009	PN774741	3.373466e+07	250	2.443148e+07	381	1.380787	Priority Banking	Medium Risk	N
10010	PN868326	3.785344e+07	259	2.408309e+07	352	1.571785	Ultra High NetWorth	Medium Risk	Y
10011	PN667837	3.330357e+07	256	2.676301e+07	359	1.244388	Mass Market	Medium Risk	N
10012	PN809566	3.890076e+07	276	2.554121e+07	400	1.523059	Others	Low Risk	N
10013	PN739647	3.505184e+07	223	2.232980e+07	381	1.569734	Others	Low Risk	N