generated from user_client2024/77
System save at 27/11/2025 11:27 by user_client2024
This commit is contained in:
parent
33c217b504
commit
b152e9fbc9
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 4,
|
||||
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -57,7 +57,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"id": "134d0b3d-5481-4975-af07-c80ab09d6dd2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -157,7 +157,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 34,
|
||||
"id": "d220561a-34c9-48d2-8e2f-5d174a87540b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -180,247 +180,79 @@
|
||||
" \"Debit_transaction_amount\", \"Total_no_of_debit_transactions\",\n",
|
||||
" \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
|
||||
" df = pd.DataFrame(row_list, columns = cols)\n",
|
||||
" \n",
|
||||
" # Step 1: Compute 90th percentiles per Segment for all 3 fields\n",
|
||||
" percentiles = (\n",
|
||||
" df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n",
|
||||
" \"Debit_transaction_amount\",\n",
|
||||
" \"Wash_Ratio\"]]\n",
|
||||
" .quantile(0.98)\n",
|
||||
" .reset_index()\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Rename columns for clarity\n",
|
||||
" percentiles = percentiles.rename(columns={\n",
|
||||
" \"Credit_transaction_amount\": \"P90_Credit\",\n",
|
||||
" \"Debit_transaction_amount\": \"P90_Debit\",\n",
|
||||
" \"Wash_Ratio\": \"P90_Wash\"\n",
|
||||
" })\n",
|
||||
"\n",
|
||||
" # Step 2: Merge back to main df\n",
|
||||
" df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n",
|
||||
"\n",
|
||||
" # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n",
|
||||
" high_pop = (\n",
|
||||
" (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) |\n",
|
||||
" (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) |\n",
|
||||
" (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Step 4: Randomly select 0.1% sample from high-risk population\n",
|
||||
" sample_fraction = 0.1 # 0.1%\n",
|
||||
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
|
||||
"\n",
|
||||
" # Step 5: Set SAR_FLAG values\n",
|
||||
" df[\"SAR_FLAG\"] = \"N\" # default for all\n",
|
||||
" df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n",
|
||||
" return df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 40,
|
||||
"id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Focal_id</th>\n",
|
||||
" <th>Credit_transaction_amount</th>\n",
|
||||
" <th>Total_no_of_credit_transactions</th>\n",
|
||||
" <th>Debit_transaction_amount</th>\n",
|
||||
" <th>Total_no_of_debit_transactions</th>\n",
|
||||
" <th>Wash_Ratio</th>\n",
|
||||
" <th>Segment</th>\n",
|
||||
" <th>Risk</th>\n",
|
||||
" <th>SAR_FLAG</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>PN808624</td>\n",
|
||||
" <td>4.601504e+09</td>\n",
|
||||
" <td>3239</td>\n",
|
||||
" <td>4.461280e+09</td>\n",
|
||||
" <td>3129</td>\n",
|
||||
" <td>1.031431</td>\n",
|
||||
" <td>Corporate Banking</td>\n",
|
||||
" <td>Medium Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>PN663041</td>\n",
|
||||
" <td>2.106224e+09</td>\n",
|
||||
" <td>1573</td>\n",
|
||||
" <td>2.281829e+09</td>\n",
|
||||
" <td>1563</td>\n",
|
||||
" <td>0.923042</td>\n",
|
||||
" <td>Corporate Banking</td>\n",
|
||||
" <td>Low Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>PN525913</td>\n",
|
||||
" <td>1.057799e+09</td>\n",
|
||||
" <td>776</td>\n",
|
||||
" <td>1.223876e+09</td>\n",
|
||||
" <td>850</td>\n",
|
||||
" <td>0.864302</td>\n",
|
||||
" <td>Whole Sale Banking</td>\n",
|
||||
" <td>Low Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>PN440274</td>\n",
|
||||
" <td>4.806265e+09</td>\n",
|
||||
" <td>3506</td>\n",
|
||||
" <td>4.972813e+09</td>\n",
|
||||
" <td>3599</td>\n",
|
||||
" <td>0.966508</td>\n",
|
||||
" <td>Whole Sale Banking</td>\n",
|
||||
" <td>Medium Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>PN213026</td>\n",
|
||||
" <td>3.982349e+09</td>\n",
|
||||
" <td>2809</td>\n",
|
||||
" <td>4.122674e+09</td>\n",
|
||||
" <td>2783</td>\n",
|
||||
" <td>0.965963</td>\n",
|
||||
" <td>Whole Sale Banking</td>\n",
|
||||
" <td>Medium Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10009</th>\n",
|
||||
" <td>PN774741</td>\n",
|
||||
" <td>3.373466e+07</td>\n",
|
||||
" <td>250</td>\n",
|
||||
" <td>2.443148e+07</td>\n",
|
||||
" <td>381</td>\n",
|
||||
" <td>1.380787</td>\n",
|
||||
" <td>Priority Banking</td>\n",
|
||||
" <td>Medium Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10010</th>\n",
|
||||
" <td>PN868326</td>\n",
|
||||
" <td>3.785344e+07</td>\n",
|
||||
" <td>259</td>\n",
|
||||
" <td>2.408309e+07</td>\n",
|
||||
" <td>352</td>\n",
|
||||
" <td>1.571785</td>\n",
|
||||
" <td>Ultra High NetWorth</td>\n",
|
||||
" <td>Medium Risk</td>\n",
|
||||
" <td>Y</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10011</th>\n",
|
||||
" <td>PN667837</td>\n",
|
||||
" <td>3.330357e+07</td>\n",
|
||||
" <td>256</td>\n",
|
||||
" <td>2.676301e+07</td>\n",
|
||||
" <td>359</td>\n",
|
||||
" <td>1.244388</td>\n",
|
||||
" <td>Mass Market</td>\n",
|
||||
" <td>Medium Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10012</th>\n",
|
||||
" <td>PN809566</td>\n",
|
||||
" <td>3.890076e+07</td>\n",
|
||||
" <td>276</td>\n",
|
||||
" <td>2.554121e+07</td>\n",
|
||||
" <td>400</td>\n",
|
||||
" <td>1.523059</td>\n",
|
||||
" <td>Others</td>\n",
|
||||
" <td>Low Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10013</th>\n",
|
||||
" <td>PN739647</td>\n",
|
||||
" <td>3.505184e+07</td>\n",
|
||||
" <td>223</td>\n",
|
||||
" <td>2.232980e+07</td>\n",
|
||||
" <td>381</td>\n",
|
||||
" <td>1.569734</td>\n",
|
||||
" <td>Others</td>\n",
|
||||
" <td>Low Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>10014 rows × 9 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n",
|
||||
"0 PN808624 4.601504e+09 3239 \n",
|
||||
"1 PN663041 2.106224e+09 1573 \n",
|
||||
"2 PN525913 1.057799e+09 776 \n",
|
||||
"3 PN440274 4.806265e+09 3506 \n",
|
||||
"4 PN213026 3.982349e+09 2809 \n",
|
||||
"... ... ... ... \n",
|
||||
"10009 PN774741 3.373466e+07 250 \n",
|
||||
"10010 PN868326 3.785344e+07 259 \n",
|
||||
"10011 PN667837 3.330357e+07 256 \n",
|
||||
"10012 PN809566 3.890076e+07 276 \n",
|
||||
"10013 PN739647 3.505184e+07 223 \n",
|
||||
"\n",
|
||||
" Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n",
|
||||
"0 4.461280e+09 3129 1.031431 \n",
|
||||
"1 2.281829e+09 1563 0.923042 \n",
|
||||
"2 1.223876e+09 850 0.864302 \n",
|
||||
"3 4.972813e+09 3599 0.966508 \n",
|
||||
"4 4.122674e+09 2783 0.965963 \n",
|
||||
"... ... ... ... \n",
|
||||
"10009 2.443148e+07 381 1.380787 \n",
|
||||
"10010 2.408309e+07 352 1.571785 \n",
|
||||
"10011 2.676301e+07 359 1.244388 \n",
|
||||
"10012 2.554121e+07 400 1.523059 \n",
|
||||
"10013 2.232980e+07 381 1.569734 \n",
|
||||
"\n",
|
||||
" Segment Risk SAR_FLAG \n",
|
||||
"0 Corporate Banking Medium Risk N \n",
|
||||
"1 Corporate Banking Low Risk N \n",
|
||||
"2 Whole Sale Banking Low Risk N \n",
|
||||
"3 Whole Sale Banking Medium Risk N \n",
|
||||
"4 Whole Sale Banking Medium Risk N \n",
|
||||
"... ... ... ... \n",
|
||||
"10009 Priority Banking Medium Risk N \n",
|
||||
"10010 Ultra High NetWorth Medium Risk Y \n",
|
||||
"10011 Mass Market Medium Risk N \n",
|
||||
"10012 Others Low Risk N \n",
|
||||
"10013 Others Low Risk N \n",
|
||||
"\n",
|
||||
"[10014 rows x 9 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# sen = Scenario()\n",
|
||||
"# sen.logic()"
|
||||
"# a = sen.logic()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "150bb5ce-6be1-44fc-a606-6d375354626d",
|
||||
"metadata": {},
|
||||
"execution_count": 39,
|
||||
"id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"# a"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"id": "150bb5ce-6be1-44fc-a606-6d375354626d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# a[a[\"SAR_FLAG\"] == \"Y\"]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
288
main.ipynb
288
main.ipynb
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 4,
|
||||
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -57,7 +57,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 6,
|
||||
"id": "134d0b3d-5481-4975-af07-c80ab09d6dd2",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -157,7 +157,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 34,
|
||||
"id": "d220561a-34c9-48d2-8e2f-5d174a87540b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -180,247 +180,79 @@
|
||||
" \"Debit_transaction_amount\", \"Total_no_of_debit_transactions\",\n",
|
||||
" \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
|
||||
" df = pd.DataFrame(row_list, columns = cols)\n",
|
||||
" \n",
|
||||
" # Step 1: Compute 90th percentiles per Segment for all 3 fields\n",
|
||||
" percentiles = (\n",
|
||||
" df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n",
|
||||
" \"Debit_transaction_amount\",\n",
|
||||
" \"Wash_Ratio\"]]\n",
|
||||
" .quantile(0.98)\n",
|
||||
" .reset_index()\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Rename columns for clarity\n",
|
||||
" percentiles = percentiles.rename(columns={\n",
|
||||
" \"Credit_transaction_amount\": \"P90_Credit\",\n",
|
||||
" \"Debit_transaction_amount\": \"P90_Debit\",\n",
|
||||
" \"Wash_Ratio\": \"P90_Wash\"\n",
|
||||
" })\n",
|
||||
"\n",
|
||||
" # Step 2: Merge back to main df\n",
|
||||
" df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n",
|
||||
"\n",
|
||||
" # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n",
|
||||
" high_pop = (\n",
|
||||
" (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) |\n",
|
||||
" (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) |\n",
|
||||
" (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Step 4: Randomly select 0.1% sample from high-risk population\n",
|
||||
" sample_fraction = 0.1 # 0.1%\n",
|
||||
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
|
||||
"\n",
|
||||
" # Step 5: Set SAR_FLAG values\n",
|
||||
" df[\"SAR_FLAG\"] = \"N\" # default for all\n",
|
||||
" df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n",
|
||||
" return df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 40,
|
||||
"id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Focal_id</th>\n",
|
||||
" <th>Credit_transaction_amount</th>\n",
|
||||
" <th>Total_no_of_credit_transactions</th>\n",
|
||||
" <th>Debit_transaction_amount</th>\n",
|
||||
" <th>Total_no_of_debit_transactions</th>\n",
|
||||
" <th>Wash_Ratio</th>\n",
|
||||
" <th>Segment</th>\n",
|
||||
" <th>Risk</th>\n",
|
||||
" <th>SAR_FLAG</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>PN808624</td>\n",
|
||||
" <td>4.601504e+09</td>\n",
|
||||
" <td>3239</td>\n",
|
||||
" <td>4.461280e+09</td>\n",
|
||||
" <td>3129</td>\n",
|
||||
" <td>1.031431</td>\n",
|
||||
" <td>Corporate Banking</td>\n",
|
||||
" <td>Medium Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>PN663041</td>\n",
|
||||
" <td>2.106224e+09</td>\n",
|
||||
" <td>1573</td>\n",
|
||||
" <td>2.281829e+09</td>\n",
|
||||
" <td>1563</td>\n",
|
||||
" <td>0.923042</td>\n",
|
||||
" <td>Corporate Banking</td>\n",
|
||||
" <td>Low Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>PN525913</td>\n",
|
||||
" <td>1.057799e+09</td>\n",
|
||||
" <td>776</td>\n",
|
||||
" <td>1.223876e+09</td>\n",
|
||||
" <td>850</td>\n",
|
||||
" <td>0.864302</td>\n",
|
||||
" <td>Whole Sale Banking</td>\n",
|
||||
" <td>Low Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>PN440274</td>\n",
|
||||
" <td>4.806265e+09</td>\n",
|
||||
" <td>3506</td>\n",
|
||||
" <td>4.972813e+09</td>\n",
|
||||
" <td>3599</td>\n",
|
||||
" <td>0.966508</td>\n",
|
||||
" <td>Whole Sale Banking</td>\n",
|
||||
" <td>Medium Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>PN213026</td>\n",
|
||||
" <td>3.982349e+09</td>\n",
|
||||
" <td>2809</td>\n",
|
||||
" <td>4.122674e+09</td>\n",
|
||||
" <td>2783</td>\n",
|
||||
" <td>0.965963</td>\n",
|
||||
" <td>Whole Sale Banking</td>\n",
|
||||
" <td>Medium Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10009</th>\n",
|
||||
" <td>PN774741</td>\n",
|
||||
" <td>3.373466e+07</td>\n",
|
||||
" <td>250</td>\n",
|
||||
" <td>2.443148e+07</td>\n",
|
||||
" <td>381</td>\n",
|
||||
" <td>1.380787</td>\n",
|
||||
" <td>Priority Banking</td>\n",
|
||||
" <td>Medium Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10010</th>\n",
|
||||
" <td>PN868326</td>\n",
|
||||
" <td>3.785344e+07</td>\n",
|
||||
" <td>259</td>\n",
|
||||
" <td>2.408309e+07</td>\n",
|
||||
" <td>352</td>\n",
|
||||
" <td>1.571785</td>\n",
|
||||
" <td>Ultra High NetWorth</td>\n",
|
||||
" <td>Medium Risk</td>\n",
|
||||
" <td>Y</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10011</th>\n",
|
||||
" <td>PN667837</td>\n",
|
||||
" <td>3.330357e+07</td>\n",
|
||||
" <td>256</td>\n",
|
||||
" <td>2.676301e+07</td>\n",
|
||||
" <td>359</td>\n",
|
||||
" <td>1.244388</td>\n",
|
||||
" <td>Mass Market</td>\n",
|
||||
" <td>Medium Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10012</th>\n",
|
||||
" <td>PN809566</td>\n",
|
||||
" <td>3.890076e+07</td>\n",
|
||||
" <td>276</td>\n",
|
||||
" <td>2.554121e+07</td>\n",
|
||||
" <td>400</td>\n",
|
||||
" <td>1.523059</td>\n",
|
||||
" <td>Others</td>\n",
|
||||
" <td>Low Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>10013</th>\n",
|
||||
" <td>PN739647</td>\n",
|
||||
" <td>3.505184e+07</td>\n",
|
||||
" <td>223</td>\n",
|
||||
" <td>2.232980e+07</td>\n",
|
||||
" <td>381</td>\n",
|
||||
" <td>1.569734</td>\n",
|
||||
" <td>Others</td>\n",
|
||||
" <td>Low Risk</td>\n",
|
||||
" <td>N</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>10014 rows × 9 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n",
|
||||
"0 PN808624 4.601504e+09 3239 \n",
|
||||
"1 PN663041 2.106224e+09 1573 \n",
|
||||
"2 PN525913 1.057799e+09 776 \n",
|
||||
"3 PN440274 4.806265e+09 3506 \n",
|
||||
"4 PN213026 3.982349e+09 2809 \n",
|
||||
"... ... ... ... \n",
|
||||
"10009 PN774741 3.373466e+07 250 \n",
|
||||
"10010 PN868326 3.785344e+07 259 \n",
|
||||
"10011 PN667837 3.330357e+07 256 \n",
|
||||
"10012 PN809566 3.890076e+07 276 \n",
|
||||
"10013 PN739647 3.505184e+07 223 \n",
|
||||
"\n",
|
||||
" Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n",
|
||||
"0 4.461280e+09 3129 1.031431 \n",
|
||||
"1 2.281829e+09 1563 0.923042 \n",
|
||||
"2 1.223876e+09 850 0.864302 \n",
|
||||
"3 4.972813e+09 3599 0.966508 \n",
|
||||
"4 4.122674e+09 2783 0.965963 \n",
|
||||
"... ... ... ... \n",
|
||||
"10009 2.443148e+07 381 1.380787 \n",
|
||||
"10010 2.408309e+07 352 1.571785 \n",
|
||||
"10011 2.676301e+07 359 1.244388 \n",
|
||||
"10012 2.554121e+07 400 1.523059 \n",
|
||||
"10013 2.232980e+07 381 1.569734 \n",
|
||||
"\n",
|
||||
" Segment Risk SAR_FLAG \n",
|
||||
"0 Corporate Banking Medium Risk N \n",
|
||||
"1 Corporate Banking Low Risk N \n",
|
||||
"2 Whole Sale Banking Low Risk N \n",
|
||||
"3 Whole Sale Banking Medium Risk N \n",
|
||||
"4 Whole Sale Banking Medium Risk N \n",
|
||||
"... ... ... ... \n",
|
||||
"10009 Priority Banking Medium Risk N \n",
|
||||
"10010 Ultra High NetWorth Medium Risk Y \n",
|
||||
"10011 Mass Market Medium Risk N \n",
|
||||
"10012 Others Low Risk N \n",
|
||||
"10013 Others Low Risk N \n",
|
||||
"\n",
|
||||
"[10014 rows x 9 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# sen = Scenario()\n",
|
||||
"# sen.logic()"
|
||||
"# a = sen.logic()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "150bb5ce-6be1-44fc-a606-6d375354626d",
|
||||
"metadata": {},
|
||||
"execution_count": 39,
|
||||
"id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"# a"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"id": "150bb5ce-6be1-44fc-a606-6d375354626d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# a[a[\"SAR_FLAG\"] == \"Y\"]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
52
main.py
52
main.py
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[1]:
|
||||
# In[4]:
|
||||
|
||||
|
||||
import pandas as pd
|
||||
@ -20,7 +20,7 @@ seq = SQLQueryInterface(schema="transactionschema")
|
||||
seq.execute_raw("show tables")
|
||||
|
||||
|
||||
# In[7]:
|
||||
# In[6]:
|
||||
|
||||
|
||||
query = """
|
||||
@ -114,7 +114,7 @@ query = """
|
||||
"""
|
||||
|
||||
|
||||
# In[8]:
|
||||
# In[34]:
|
||||
|
||||
|
||||
from tms_data_interface import SQLQueryInterface
|
||||
@ -133,18 +133,58 @@ class Scenario:
|
||||
"Debit_transaction_amount", "Total_no_of_debit_transactions",
|
||||
"Wash_Ratio", "Segment", "Risk", "SAR_FLAG"]
|
||||
df = pd.DataFrame(row_list, columns = cols)
|
||||
|
||||
# Step 1: Compute 90th percentiles per Segment for all 3 fields
|
||||
percentiles = (
|
||||
df.groupby("Segment")[["Credit_transaction_amount",
|
||||
"Debit_transaction_amount",
|
||||
"Wash_Ratio"]]
|
||||
.quantile(0.98)
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
# Rename columns for clarity
|
||||
percentiles = percentiles.rename(columns={
|
||||
"Credit_transaction_amount": "P90_Credit",
|
||||
"Debit_transaction_amount": "P90_Debit",
|
||||
"Wash_Ratio": "P90_Wash"
|
||||
})
|
||||
|
||||
# Step 2: Merge back to main df
|
||||
df = df.merge(percentiles, on="Segment", how="left")
|
||||
|
||||
# Step 3: Identify customers above 90th percentile in ANY of the 3 metrics
|
||||
high_pop = (
|
||||
(df["Credit_transaction_amount"] > df["P90_Credit"]) |
|
||||
(df["Debit_transaction_amount"] > df["P90_Debit"]) |
|
||||
(df["Wash_Ratio"] > df["P90_Wash"])
|
||||
)
|
||||
|
||||
# Step 4: Randomly select 0.1% sample from high-risk population
|
||||
sample_fraction = 0.1 # 0.1%
|
||||
high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index
|
||||
|
||||
# Step 5: Set SAR_FLAG values
|
||||
df["SAR_FLAG"] = "N" # default for all
|
||||
df.loc[high_pop_indices, "SAR_FLAG"] = "Y" # assign Y to 0.1% random high-risk population
|
||||
return df
|
||||
|
||||
|
||||
# In[9]:
|
||||
# In[40]:
|
||||
|
||||
|
||||
# sen = Scenario()
|
||||
# sen.logic()
|
||||
# a = sen.logic()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
# In[39]:
|
||||
|
||||
|
||||
# a
|
||||
|
||||
|
||||
# In[38]:
|
||||
|
||||
|
||||
# a[a["SAR_FLAG"] == "Y"]
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user