generated from user_client2024/203
Initial commit
This commit is contained in:
commit
35c02db444
176
.ipynb_checkpoints/main-checkpoint.ipynb
Normal file
176
.ipynb_checkpoints/main-checkpoint.ipynb
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
" select final.CUSTOMER_NUMBER_main as Focal_id,\n",
|
||||||
|
" CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,\n",
|
||||||
|
" final.Cash_deposit_count,\n",
|
||||||
|
" final.SEGMENT,\n",
|
||||||
|
" final.RISK,\n",
|
||||||
|
" final.SAR_FLAG\n",
|
||||||
|
"from \n",
|
||||||
|
"(\n",
|
||||||
|
" (\n",
|
||||||
|
" select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n",
|
||||||
|
" subquery.Cash_deposit_total,\n",
|
||||||
|
" subquery.Cash_deposit_count\n",
|
||||||
|
" from \n",
|
||||||
|
" (\n",
|
||||||
|
" select customer_number as CUSTOMER_NUMBER_1, \n",
|
||||||
|
" sum(transaction_amount) as Cash_deposit_total, \n",
|
||||||
|
" count(*) as Cash_deposit_count\n",
|
||||||
|
" from \n",
|
||||||
|
" (\n",
|
||||||
|
" select * \n",
|
||||||
|
" from {trans_data} trans_table \n",
|
||||||
|
" left join {acc_data} acc_table\n",
|
||||||
|
" on trans_table.benef_account_number = acc_table.account_number\n",
|
||||||
|
" ) trans\n",
|
||||||
|
" where account_number not in ('None')\n",
|
||||||
|
" and transaction_desc = 'CASH RELATED TRANSACTION'\n",
|
||||||
|
" group by customer_number\n",
|
||||||
|
" ) subquery\n",
|
||||||
|
" ) main \n",
|
||||||
|
" left join \n",
|
||||||
|
" (\n",
|
||||||
|
" select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n",
|
||||||
|
" cd.SEGMENT,\n",
|
||||||
|
" cd.RISK,\n",
|
||||||
|
" case\n",
|
||||||
|
" when ad.SAR_FLAG is NULL then 'N'\n",
|
||||||
|
" else ad.SAR_FLAG\n",
|
||||||
|
" end as SAR_FLAG \n",
|
||||||
|
" from\n",
|
||||||
|
" (\n",
|
||||||
|
" select customer_number as CUSTOMER_NUMBER_3, \n",
|
||||||
|
" business_segment as SEGMENT,\n",
|
||||||
|
" case\n",
|
||||||
|
" when RISK_CLASSIFICATION = 1 then 'Low Risk'\n",
|
||||||
|
" when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n",
|
||||||
|
" when RISK_CLASSIFICATION = 3 then 'High Risk'\n",
|
||||||
|
" else 'Unknown Risk'\n",
|
||||||
|
" end AS RISK\n",
|
||||||
|
" from {cust_data}\n",
|
||||||
|
" ) cd \n",
|
||||||
|
" left join\n",
|
||||||
|
" (\n",
|
||||||
|
" select customer_number as CUSTOMER_NUMBER_4, \n",
|
||||||
|
" sar_flag as SAR_FLAG\n",
|
||||||
|
" from {alert_data}\n",
|
||||||
|
" ) ad \n",
|
||||||
|
" on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n",
|
||||||
|
" ) as cust_alert\n",
|
||||||
|
" on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n",
|
||||||
|
") as final\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"from tms_data_interface import SQLQueryInterface\n",
|
||||||
|
"\n",
|
||||||
|
"class Scenario:\n",
|
||||||
|
" seq = SQLQueryInterface(schema=\"transactionschema\")\n",
|
||||||
|
"\n",
|
||||||
|
" def logic(self, **kwargs):\n",
|
||||||
|
" row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n",
|
||||||
|
" cust_data=\"customer_data_v1\",\n",
|
||||||
|
" acc_data=\"account_data_v1\",\n",
|
||||||
|
" alert_data=\"alert_data_v1\")\n",
|
||||||
|
" )\n",
|
||||||
|
" cols = [\"Focal_id\", \"Cash_deposit_total\", \"Cash_deposit_count\",\n",
|
||||||
|
" \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
|
||||||
|
" df = pd.DataFrame(row_list, columns = cols)\n",
|
||||||
|
" df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n",
|
||||||
|
" \n",
|
||||||
|
" \n",
|
||||||
|
"\n",
|
||||||
|
" # Step 1: Compute 90th percentiles per Segment for all 3 fields\n",
|
||||||
|
" percentiles = (\n",
|
||||||
|
" df.groupby(\"Segment\")[[\"Cash_deposit_total\",\n",
|
||||||
|
" \"Cash_deposit_count\"]]\n",
|
||||||
|
" .quantile(0.98)\n",
|
||||||
|
" .reset_index()\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" # Rename columns for clarity\n",
|
||||||
|
" percentiles = percentiles.rename(columns={\n",
|
||||||
|
" \"Cash_deposit_total\": \"P90_Credit\",\n",
|
||||||
|
" \"Cash_deposit_count\": \"P90_Credit_count\"\n",
|
||||||
|
" })\n",
|
||||||
|
"\n",
|
||||||
|
" # Step 2: Merge back to main df\n",
|
||||||
|
" df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n",
|
||||||
|
"\n",
|
||||||
|
" # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n",
|
||||||
|
" high_pop = (\n",
|
||||||
|
" (df[\"Cash_deposit_total\"] > df[\"P90_Credit\"]) &\n",
|
||||||
|
" (df[\"Cash_deposit_count\"] > df[\"P90_Credit_count\"])\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" # Step 4: Randomly select 0.1% sample from high-risk population\n",
|
||||||
|
" sample_fraction = 0.1 # 0.1%\n",
|
||||||
|
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
|
||||||
|
"\n",
|
||||||
|
" # Step 5: Set SAR_FLAG values\n",
|
||||||
|
" df[\"SAR_FLAG\"] = \"N\" # default for all\n",
|
||||||
|
" df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n",
|
||||||
|
"\n",
|
||||||
|
" return df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"id": "1f20337b-8116-47e5-8743-1ba41e2df819",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# sen = Scenario()\n",
|
||||||
|
"# a = sen.logic()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"id": "6de62b37-00d1-4c88-b27b-9a70e05add91",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# a[a[\"SAR_FLAG\"] == \"Y\"]"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.8"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
176
main.ipynb
Normal file
176
main.ipynb
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"query = \"\"\"\n",
|
||||||
|
" select final.CUSTOMER_NUMBER_main as Focal_id,\n",
|
||||||
|
" CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,\n",
|
||||||
|
" final.Cash_deposit_count,\n",
|
||||||
|
" final.SEGMENT,\n",
|
||||||
|
" final.RISK,\n",
|
||||||
|
" final.SAR_FLAG\n",
|
||||||
|
"from \n",
|
||||||
|
"(\n",
|
||||||
|
" (\n",
|
||||||
|
" select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n",
|
||||||
|
" subquery.Cash_deposit_total,\n",
|
||||||
|
" subquery.Cash_deposit_count\n",
|
||||||
|
" from \n",
|
||||||
|
" (\n",
|
||||||
|
" select customer_number as CUSTOMER_NUMBER_1, \n",
|
||||||
|
" sum(transaction_amount) as Cash_deposit_total, \n",
|
||||||
|
" count(*) as Cash_deposit_count\n",
|
||||||
|
" from \n",
|
||||||
|
" (\n",
|
||||||
|
" select * \n",
|
||||||
|
" from {trans_data} trans_table \n",
|
||||||
|
" left join {acc_data} acc_table\n",
|
||||||
|
" on trans_table.benef_account_number = acc_table.account_number\n",
|
||||||
|
" ) trans\n",
|
||||||
|
" where account_number not in ('None')\n",
|
||||||
|
" and transaction_desc = 'CASH RELATED TRANSACTION'\n",
|
||||||
|
" group by customer_number\n",
|
||||||
|
" ) subquery\n",
|
||||||
|
" ) main \n",
|
||||||
|
" left join \n",
|
||||||
|
" (\n",
|
||||||
|
" select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n",
|
||||||
|
" cd.SEGMENT,\n",
|
||||||
|
" cd.RISK,\n",
|
||||||
|
" case\n",
|
||||||
|
" when ad.SAR_FLAG is NULL then 'N'\n",
|
||||||
|
" else ad.SAR_FLAG\n",
|
||||||
|
" end as SAR_FLAG \n",
|
||||||
|
" from\n",
|
||||||
|
" (\n",
|
||||||
|
" select customer_number as CUSTOMER_NUMBER_3, \n",
|
||||||
|
" business_segment as SEGMENT,\n",
|
||||||
|
" case\n",
|
||||||
|
" when RISK_CLASSIFICATION = 1 then 'Low Risk'\n",
|
||||||
|
" when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n",
|
||||||
|
" when RISK_CLASSIFICATION = 3 then 'High Risk'\n",
|
||||||
|
" else 'Unknown Risk'\n",
|
||||||
|
" end AS RISK\n",
|
||||||
|
" from {cust_data}\n",
|
||||||
|
" ) cd \n",
|
||||||
|
" left join\n",
|
||||||
|
" (\n",
|
||||||
|
" select customer_number as CUSTOMER_NUMBER_4, \n",
|
||||||
|
" sar_flag as SAR_FLAG\n",
|
||||||
|
" from {alert_data}\n",
|
||||||
|
" ) ad \n",
|
||||||
|
" on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n",
|
||||||
|
" ) as cust_alert\n",
|
||||||
|
" on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n",
|
||||||
|
") as final\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"from tms_data_interface import SQLQueryInterface\n",
|
||||||
|
"\n",
|
||||||
|
"class Scenario:\n",
|
||||||
|
" seq = SQLQueryInterface(schema=\"transactionschema\")\n",
|
||||||
|
"\n",
|
||||||
|
" def logic(self, **kwargs):\n",
|
||||||
|
" row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n",
|
||||||
|
" cust_data=\"customer_data_v1\",\n",
|
||||||
|
" acc_data=\"account_data_v1\",\n",
|
||||||
|
" alert_data=\"alert_data_v1\")\n",
|
||||||
|
" )\n",
|
||||||
|
" cols = [\"Focal_id\", \"Cash_deposit_total\", \"Cash_deposit_count\",\n",
|
||||||
|
" \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
|
||||||
|
" df = pd.DataFrame(row_list, columns = cols)\n",
|
||||||
|
" df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n",
|
||||||
|
" \n",
|
||||||
|
" \n",
|
||||||
|
"\n",
|
||||||
|
" # Step 1: Compute 90th percentiles per Segment for all 3 fields\n",
|
||||||
|
" percentiles = (\n",
|
||||||
|
" df.groupby(\"Segment\")[[\"Cash_deposit_total\",\n",
|
||||||
|
" \"Cash_deposit_count\"]]\n",
|
||||||
|
" .quantile(0.98)\n",
|
||||||
|
" .reset_index()\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" # Rename columns for clarity\n",
|
||||||
|
" percentiles = percentiles.rename(columns={\n",
|
||||||
|
" \"Cash_deposit_total\": \"P90_Credit\",\n",
|
||||||
|
" \"Cash_deposit_count\": \"P90_Credit_count\"\n",
|
||||||
|
" })\n",
|
||||||
|
"\n",
|
||||||
|
" # Step 2: Merge back to main df\n",
|
||||||
|
" df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n",
|
||||||
|
"\n",
|
||||||
|
" # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n",
|
||||||
|
" high_pop = (\n",
|
||||||
|
" (df[\"Cash_deposit_total\"] > df[\"P90_Credit\"]) &\n",
|
||||||
|
" (df[\"Cash_deposit_count\"] > df[\"P90_Credit_count\"])\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" # Step 4: Randomly select 0.1% sample from high-risk population\n",
|
||||||
|
" sample_fraction = 0.1 # 0.1%\n",
|
||||||
|
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
|
||||||
|
"\n",
|
||||||
|
" # Step 5: Set SAR_FLAG values\n",
|
||||||
|
" df[\"SAR_FLAG\"] = \"N\" # default for all\n",
|
||||||
|
" df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n",
|
||||||
|
"\n",
|
||||||
|
" return df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"id": "1f20337b-8116-47e5-8743-1ba41e2df819",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# sen = Scenario()\n",
|
||||||
|
"# a = sen.logic()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"id": "6de62b37-00d1-4c88-b27b-9a70e05add91",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# a[a[\"SAR_FLAG\"] == \"Y\"]"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.8"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
135
main.py
Normal file
135
main.py
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[7]:
|
||||||
|
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
query = """
|
||||||
|
select final.CUSTOMER_NUMBER_main as Focal_id,
|
||||||
|
CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,
|
||||||
|
final.Cash_deposit_count,
|
||||||
|
final.SEGMENT,
|
||||||
|
final.RISK,
|
||||||
|
final.SAR_FLAG
|
||||||
|
from
|
||||||
|
(
|
||||||
|
(
|
||||||
|
select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,
|
||||||
|
subquery.Cash_deposit_total,
|
||||||
|
subquery.Cash_deposit_count
|
||||||
|
from
|
||||||
|
(
|
||||||
|
select customer_number as CUSTOMER_NUMBER_1,
|
||||||
|
sum(transaction_amount) as Cash_deposit_total,
|
||||||
|
count(*) as Cash_deposit_count
|
||||||
|
from
|
||||||
|
(
|
||||||
|
select *
|
||||||
|
from {trans_data} trans_table
|
||||||
|
left join {acc_data} acc_table
|
||||||
|
on trans_table.benef_account_number = acc_table.account_number
|
||||||
|
) trans
|
||||||
|
where account_number not in ('None')
|
||||||
|
and transaction_desc = 'CASH RELATED TRANSACTION'
|
||||||
|
group by customer_number
|
||||||
|
) subquery
|
||||||
|
) main
|
||||||
|
left join
|
||||||
|
(
|
||||||
|
select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,
|
||||||
|
cd.SEGMENT,
|
||||||
|
cd.RISK,
|
||||||
|
case
|
||||||
|
when ad.SAR_FLAG is NULL then 'N'
|
||||||
|
else ad.SAR_FLAG
|
||||||
|
end as SAR_FLAG
|
||||||
|
from
|
||||||
|
(
|
||||||
|
select customer_number as CUSTOMER_NUMBER_3,
|
||||||
|
business_segment as SEGMENT,
|
||||||
|
case
|
||||||
|
when RISK_CLASSIFICATION = 1 then 'Low Risk'
|
||||||
|
when RISK_CLASSIFICATION = 2 then 'Medium Risk'
|
||||||
|
when RISK_CLASSIFICATION = 3 then 'High Risk'
|
||||||
|
else 'Unknown Risk'
|
||||||
|
end AS RISK
|
||||||
|
from {cust_data}
|
||||||
|
) cd
|
||||||
|
left join
|
||||||
|
(
|
||||||
|
select customer_number as CUSTOMER_NUMBER_4,
|
||||||
|
sar_flag as SAR_FLAG
|
||||||
|
from {alert_data}
|
||||||
|
) ad
|
||||||
|
on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4
|
||||||
|
) as cust_alert
|
||||||
|
on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main
|
||||||
|
) as final
|
||||||
|
"""
|
||||||
|
|
||||||
|
from tms_data_interface import SQLQueryInterface
|
||||||
|
|
||||||
|
class Scenario:
|
||||||
|
seq = SQLQueryInterface(schema="transactionschema")
|
||||||
|
|
||||||
|
def logic(self, **kwargs):
|
||||||
|
row_list = self.seq.execute_raw(query.format(trans_data="transaction10m",
|
||||||
|
cust_data="customer_data_v1",
|
||||||
|
acc_data="account_data_v1",
|
||||||
|
alert_data="alert_data_v1")
|
||||||
|
)
|
||||||
|
cols = ["Focal_id", "Cash_deposit_total", "Cash_deposit_count",
|
||||||
|
"Segment", "Risk", "SAR_FLAG"]
|
||||||
|
df = pd.DataFrame(row_list, columns = cols)
|
||||||
|
df["Cash_deposit_total"] = df["Cash_deposit_total"].astype(float)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Step 1: Compute 90th percentiles per Segment for all 3 fields
|
||||||
|
percentiles = (
|
||||||
|
df.groupby("Segment")[["Cash_deposit_total",
|
||||||
|
"Cash_deposit_count"]]
|
||||||
|
.quantile(0.98)
|
||||||
|
.reset_index()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Rename columns for clarity
|
||||||
|
percentiles = percentiles.rename(columns={
|
||||||
|
"Cash_deposit_total": "P90_Credit",
|
||||||
|
"Cash_deposit_count": "P90_Credit_count"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Step 2: Merge back to main df
|
||||||
|
df = df.merge(percentiles, on="Segment", how="left")
|
||||||
|
|
||||||
|
# Step 3: Identify customers above 90th percentile in ANY of the 3 metrics
|
||||||
|
high_pop = (
|
||||||
|
(df["Cash_deposit_total"] > df["P90_Credit"]) &
|
||||||
|
(df["Cash_deposit_count"] > df["P90_Credit_count"])
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 4: Randomly select 0.1% sample from high-risk population
|
||||||
|
sample_fraction = 0.1 # 0.1%
|
||||||
|
high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index
|
||||||
|
|
||||||
|
# Step 5: Set SAR_FLAG values
|
||||||
|
df["SAR_FLAG"] = "N" # default for all
|
||||||
|
df.loc[high_pop_indices, "SAR_FLAG"] = "Y" # assign Y to 0.1% random high-risk population
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
# In[8]:
|
||||||
|
|
||||||
|
|
||||||
|
# sen = Scenario()
|
||||||
|
# a = sen.logic()
|
||||||
|
|
||||||
|
|
||||||
|
# In[10]:
|
||||||
|
|
||||||
|
|
||||||
|
# a[a["SAR_FLAG"] == "Y"]
|
||||||
|
|
||||||
Loading…
Reference in New Issue
Block a user