From cc3022fd4d92d0f560863c59195f5db47a68100b Mon Sep 17 00:00:00 2001 From: user_client2024 Date: Thu, 27 Nov 2025 04:36:36 +0000 Subject: [PATCH] System save at 27/11/2025 10:06 by user_client2024 --- .ipynb_checkpoints/main-checkpoint.ipynb | 22 ++++++++++++++++++++++ main.ipynb | 22 ++++++++++++++++++++++ main.py | 22 ++++++++++++++++++++++ 3 files changed, 66 insertions(+) diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb index e553d03..87a7667 100644 --- a/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/.ipynb_checkpoints/main-checkpoint.ipynb @@ -149,6 +149,28 @@ " \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " # df['Segment'] = 'Individual'\n", + " \n", + " p98 = (\n", + " df.groupby(\"Segment\")[\"Total_Wire_Deposit_Amt\"]\n", + " .quantile(0.98)\n", + " .reset_index()\n", + " .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n", + " )\n", + "\n", + " # Merge percentile back to main dataframe\n", + " df = df.merge(p98, on=\"Segment\", how=\"left\")\n", + "\n", + " # Step 2: Identify population above 98th percentile\n", + " high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n", + "\n", + " # Step 3: From this high-risk population, select 0.1% random sample\n", + " sample_fraction = 0.001 # 0.1%\n", + " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", + "\n", + " # Step 4: Assign SAR_FLAG\n", + " df[\"SAR_FLAG\"] = \"N\" # default for all\n", + " df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to random 0.1% above 98th percentile\n", + "\n", " return df" ] }, diff --git a/main.ipynb b/main.ipynb index e553d03..87a7667 100644 --- a/main.ipynb +++ b/main.ipynb @@ -149,6 +149,28 @@ " \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " # df['Segment'] = 'Individual'\n", + " \n", + " p98 = (\n", + " df.groupby(\"Segment\")[\"Total_Wire_Deposit_Amt\"]\n", + " .quantile(0.98)\n", + " .reset_index()\n", + " .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n", + " )\n", + "\n", + " # Merge percentile back to main dataframe\n", + " df = df.merge(p98, on=\"Segment\", how=\"left\")\n", + "\n", + " # Step 2: Identify population above 98th percentile\n", + " high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n", + "\n", + " # Step 3: From this high-risk population, select 0.1% random sample\n", + " sample_fraction = 0.001 # 0.1%\n", + " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", + "\n", + " # Step 4: Assign SAR_FLAG\n", + " df[\"SAR_FLAG\"] = \"N\" # default for all\n", + " df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to random 0.1% above 98th percentile\n", + "\n", " return df" ] }, diff --git a/main.py b/main.py index a92d4e7..f3efe41 100644 --- a/main.py +++ b/main.py @@ -102,6 +102,28 @@ class Scenario: "Segment", "Risk", "SAR_FLAG"] df = pd.DataFrame(row_list, columns = cols) # df['Segment'] = 'Individual' + + p98 = ( + df.groupby("Segment")["Total_Wire_Deposit_Amt"] + .quantile(0.98) + .reset_index() + .rename(columns={"Total_Wire_Deposit_Amt": "P98_Value"}) + ) + + # Merge percentile back to main dataframe + df = df.merge(p98, on="Segment", how="left") + + # Step 2: Identify population above 98th percentile + high_pop = df["Total_Wire_Deposit_Amt"] > df["P98_Value"] + + # Step 3: From this high-risk population, select 0.1% random sample + sample_fraction = 0.001 # 0.1% + high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index + + # Step 4: Assign SAR_FLAG + df["SAR_FLAG"] = "N" # default for all + df.loc[high_pop_indices, "SAR_FLAG"] = "Y" # assign Y to random 0.1% above 98th percentile + return df