System save at 27/11/2025 10:06 by user_client2024

This commit is contained in:
user_client2024 2025-11-27 04:36:36 +00:00
parent b66a89f986
commit cc3022fd4d
3 changed files with 66 additions and 0 deletions

View File

@ -149,6 +149,28 @@
" \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
" df = pd.DataFrame(row_list, columns = cols)\n",
" # df['Segment'] = 'Individual'\n",
" \n",
" p98 = (\n",
" df.groupby(\"Segment\")[\"Total_Wire_Deposit_Amt\"]\n",
" .quantile(0.98)\n",
" .reset_index()\n",
" .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n",
" )\n",
"\n",
" # Merge percentile back to main dataframe\n",
" df = df.merge(p98, on=\"Segment\", how=\"left\")\n",
"\n",
" # Step 2: Identify population above 98th percentile\n",
" high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n",
"\n",
" # Step 3: From this high-risk population, select 0.1% random sample\n",
" sample_fraction = 0.001 # 0.1%\n",
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
"\n",
" # Step 4: Assign SAR_FLAG\n",
" df[\"SAR_FLAG\"] = \"N\" # default for all\n",
" df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to random 0.1% above 98th percentile\n",
"\n",
" return df"
]
},

View File

@ -149,6 +149,28 @@
" \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
" df = pd.DataFrame(row_list, columns = cols)\n",
" # df['Segment'] = 'Individual'\n",
" \n",
" p98 = (\n",
" df.groupby(\"Segment\")[\"Total_Wire_Deposit_Amt\"]\n",
" .quantile(0.98)\n",
" .reset_index()\n",
" .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n",
" )\n",
"\n",
" # Merge percentile back to main dataframe\n",
" df = df.merge(p98, on=\"Segment\", how=\"left\")\n",
"\n",
" # Step 2: Identify population above 98th percentile\n",
" high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n",
"\n",
" # Step 3: From this high-risk population, select 0.1% random sample\n",
" sample_fraction = 0.001 # 0.1%\n",
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
"\n",
" # Step 4: Assign SAR_FLAG\n",
" df[\"SAR_FLAG\"] = \"N\" # default for all\n",
" df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to random 0.1% above 98th percentile\n",
"\n",
" return df"
]
},

22
main.py
View File

@ -102,6 +102,28 @@ class Scenario:
"Segment", "Risk", "SAR_FLAG"]
df = pd.DataFrame(row_list, columns = cols)
# df['Segment'] = 'Individual'
p98 = (
df.groupby("Segment")["Total_Wire_Deposit_Amt"]
.quantile(0.98)
.reset_index()
.rename(columns={"Total_Wire_Deposit_Amt": "P98_Value"})
)
# Merge percentile back to main dataframe
df = df.merge(p98, on="Segment", how="left")
# Step 2: Identify population above 98th percentile
high_pop = df["Total_Wire_Deposit_Amt"] > df["P98_Value"]
# Step 3: From this high-risk population, select 0.1% random sample
sample_fraction = 0.001 # 0.1%
high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index
# Step 4: Assign SAR_FLAG
df["SAR_FLAG"] = "N" # default for all
df.loc[high_pop_indices, "SAR_FLAG"] = "Y" # assign Y to random 0.1% above 98th percentile
return df