System save at 27/11/2025 10:06 by user_client2024

2025-11-27 04:36:36 +00:00 · 2025-11-27 04:36:36 +00:00 · cc3022fd4d
commit cc3022fd4d
parent b66a89f986
3 changed files with 66 additions and 0 deletions
--- a/.ipynb_checkpoints/main-checkpoint.ipynb
+++ b/.ipynb_checkpoints/main-checkpoint.ipynb
@ -149,6 +149,28 @@
    "                \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
    "        df = pd.DataFrame(row_list, columns = cols)\n",
    "        # df['Segment'] = 'Individual'\n",
    "        \n",
    "        p98 = (\n",
    "        df.groupby(\"Segment\")[\"Total_Wire_Deposit_Amt\"]\n",
    "              .quantile(0.98)\n",
    "              .reset_index()\n",
    "              .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n",
    "        )\n",
    "\n",
    "        # Merge percentile back to main dataframe\n",
    "        df = df.merge(p98, on=\"Segment\", how=\"left\")\n",
    "\n",
    "        # Step 2: Identify population above 98th percentile\n",
    "        high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n",
    "\n",
    "        # Step 3: From this high-risk population, select 0.1% random sample\n",
    "        sample_fraction = 0.001    # 0.1%\n",
    "        high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
    "\n",
    "        # Step 4: Assign SAR_FLAG\n",
    "        df[\"SAR_FLAG\"] = \"N\"              # default for all\n",
    "        df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\"   # assign Y to random 0.1% above 98th percentile\n",
    "\n",
    "        return df"
   ]
  },
--- a/main.ipynb
+++ b/main.ipynb
@ -149,6 +149,28 @@
    "                \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
    "        df = pd.DataFrame(row_list, columns = cols)\n",
    "        # df['Segment'] = 'Individual'\n",
    "        \n",
    "        p98 = (\n",
    "        df.groupby(\"Segment\")[\"Total_Wire_Deposit_Amt\"]\n",
    "              .quantile(0.98)\n",
    "              .reset_index()\n",
    "              .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n",
    "        )\n",
    "\n",
    "        # Merge percentile back to main dataframe\n",
    "        df = df.merge(p98, on=\"Segment\", how=\"left\")\n",
    "\n",
    "        # Step 2: Identify population above 98th percentile\n",
    "        high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n",
    "\n",
    "        # Step 3: From this high-risk population, select 0.1% random sample\n",
    "        sample_fraction = 0.001    # 0.1%\n",
    "        high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
    "\n",
    "        # Step 4: Assign SAR_FLAG\n",
    "        df[\"SAR_FLAG\"] = \"N\"              # default for all\n",
    "        df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\"   # assign Y to random 0.1% above 98th percentile\n",
    "\n",
    "        return df"
   ]
  },
--- a/main.py
+++ b/main.py
@ -102,6 +102,28 @@ class Scenario:
                "Segment", "Risk", "SAR_FLAG"]
        df = pd.DataFrame(row_list, columns = cols)
        # df['Segment'] = 'Individual'
        p98 = (
        df.groupby("Segment")["Total_Wire_Deposit_Amt"]
              .quantile(0.98)
              .reset_index()
              .rename(columns={"Total_Wire_Deposit_Amt": "P98_Value"})
        )
        # Merge percentile back to main dataframe
        df = df.merge(p98, on="Segment", how="left")
        # Step 2: Identify population above 98th percentile
        high_pop = df["Total_Wire_Deposit_Amt"] > df["P98_Value"]
        # Step 3: From this high-risk population, select 0.1% random sample
        sample_fraction = 0.001    # 0.1%
        high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index
        # Step 4: Assign SAR_FLAG
        df["SAR_FLAG"] = "N"              # default for all
        df.loc[high_pop_indices, "SAR_FLAG"] = "Y"   # assign Y to random 0.1% above 98th percentile
        return df