generated from user_client2024/184
System save at 27/11/2025 10:06 by user_client2024
This commit is contained in:
parent
b66a89f986
commit
cc3022fd4d
@ -149,6 +149,28 @@
|
|||||||
" \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
|
" \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
|
||||||
" df = pd.DataFrame(row_list, columns = cols)\n",
|
" df = pd.DataFrame(row_list, columns = cols)\n",
|
||||||
" # df['Segment'] = 'Individual'\n",
|
" # df['Segment'] = 'Individual'\n",
|
||||||
|
" \n",
|
||||||
|
" p98 = (\n",
|
||||||
|
" df.groupby(\"Segment\")[\"Total_Wire_Deposit_Amt\"]\n",
|
||||||
|
" .quantile(0.98)\n",
|
||||||
|
" .reset_index()\n",
|
||||||
|
" .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" # Merge percentile back to main dataframe\n",
|
||||||
|
" df = df.merge(p98, on=\"Segment\", how=\"left\")\n",
|
||||||
|
"\n",
|
||||||
|
" # Step 2: Identify population above 98th percentile\n",
|
||||||
|
" high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n",
|
||||||
|
"\n",
|
||||||
|
" # Step 3: From this high-risk population, select 0.1% random sample\n",
|
||||||
|
" sample_fraction = 0.001 # 0.1%\n",
|
||||||
|
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
|
||||||
|
"\n",
|
||||||
|
" # Step 4: Assign SAR_FLAG\n",
|
||||||
|
" df[\"SAR_FLAG\"] = \"N\" # default for all\n",
|
||||||
|
" df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to random 0.1% above 98th percentile\n",
|
||||||
|
"\n",
|
||||||
" return df"
|
" return df"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
22
main.ipynb
22
main.ipynb
@ -149,6 +149,28 @@
|
|||||||
" \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
|
" \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
|
||||||
" df = pd.DataFrame(row_list, columns = cols)\n",
|
" df = pd.DataFrame(row_list, columns = cols)\n",
|
||||||
" # df['Segment'] = 'Individual'\n",
|
" # df['Segment'] = 'Individual'\n",
|
||||||
|
" \n",
|
||||||
|
" p98 = (\n",
|
||||||
|
" df.groupby(\"Segment\")[\"Total_Wire_Deposit_Amt\"]\n",
|
||||||
|
" .quantile(0.98)\n",
|
||||||
|
" .reset_index()\n",
|
||||||
|
" .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" # Merge percentile back to main dataframe\n",
|
||||||
|
" df = df.merge(p98, on=\"Segment\", how=\"left\")\n",
|
||||||
|
"\n",
|
||||||
|
" # Step 2: Identify population above 98th percentile\n",
|
||||||
|
" high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n",
|
||||||
|
"\n",
|
||||||
|
" # Step 3: From this high-risk population, select 0.1% random sample\n",
|
||||||
|
" sample_fraction = 0.001 # 0.1%\n",
|
||||||
|
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
|
||||||
|
"\n",
|
||||||
|
" # Step 4: Assign SAR_FLAG\n",
|
||||||
|
" df[\"SAR_FLAG\"] = \"N\" # default for all\n",
|
||||||
|
" df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to random 0.1% above 98th percentile\n",
|
||||||
|
"\n",
|
||||||
" return df"
|
" return df"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
22
main.py
22
main.py
@ -102,6 +102,28 @@ class Scenario:
|
|||||||
"Segment", "Risk", "SAR_FLAG"]
|
"Segment", "Risk", "SAR_FLAG"]
|
||||||
df = pd.DataFrame(row_list, columns = cols)
|
df = pd.DataFrame(row_list, columns = cols)
|
||||||
# df['Segment'] = 'Individual'
|
# df['Segment'] = 'Individual'
|
||||||
|
|
||||||
|
p98 = (
|
||||||
|
df.groupby("Segment")["Total_Wire_Deposit_Amt"]
|
||||||
|
.quantile(0.98)
|
||||||
|
.reset_index()
|
||||||
|
.rename(columns={"Total_Wire_Deposit_Amt": "P98_Value"})
|
||||||
|
)
|
||||||
|
|
||||||
|
# Merge percentile back to main dataframe
|
||||||
|
df = df.merge(p98, on="Segment", how="left")
|
||||||
|
|
||||||
|
# Step 2: Identify population above 98th percentile
|
||||||
|
high_pop = df["Total_Wire_Deposit_Amt"] > df["P98_Value"]
|
||||||
|
|
||||||
|
# Step 3: From this high-risk population, select 0.1% random sample
|
||||||
|
sample_fraction = 0.001 # 0.1%
|
||||||
|
high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index
|
||||||
|
|
||||||
|
# Step 4: Assign SAR_FLAG
|
||||||
|
df["SAR_FLAG"] = "N" # default for all
|
||||||
|
df.loc[high_pop_indices, "SAR_FLAG"] = "Y" # assign Y to random 0.1% above 98th percentile
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user