generated from user_client2024/78
System save at 28/11/2025 13:07 by user_client2024
This commit is contained in:
parent
245ad6d96f
commit
1e47342934
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 7,
|
||||
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -89,104 +89,46 @@
|
||||
" \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
|
||||
" df = pd.DataFrame(row_list, columns = cols)\n",
|
||||
" df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
"\n",
|
||||
" # Step 1: Compute 90th percentiles per Segment for all 3 fields\n",
|
||||
" percentiles = (\n",
|
||||
" df.groupby(\"Segment\")[[\"Cash_deposit_total\",\n",
|
||||
" \"Cash_deposit_count\"]]\n",
|
||||
" .quantile(0.98)\n",
|
||||
" .reset_index()\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Rename columns for clarity\n",
|
||||
" percentiles = percentiles.rename(columns={\n",
|
||||
" \"Cash_deposit_total\": \"P90_Credit\",\n",
|
||||
" \"Cash_deposit_count\": \"P90_Credit_count\"\n",
|
||||
" })\n",
|
||||
"\n",
|
||||
" # Step 2: Merge back to main df\n",
|
||||
" df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n",
|
||||
"\n",
|
||||
" # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n",
|
||||
" high_pop = (\n",
|
||||
" (df[\"Cash_deposit_total\"] > df[\"P90_Credit\"]) &\n",
|
||||
" (df[\"Cash_deposit_count\"] > df[\"P90_Credit_count\"])\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Step 4: Randomly select 0.1% sample from high-risk population\n",
|
||||
" sample_fraction = 0.1 # 0.1%\n",
|
||||
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
|
||||
"\n",
|
||||
" # Step 5: Set SAR_FLAG values\n",
|
||||
" df[\"SAR_FLAG\"] = \"N\" # default for all\n",
|
||||
" df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n",
|
||||
"\n",
|
||||
" return df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "b6c85de2-6a47-4109-8885-c138c289ec25",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import pandas as pd\n",
|
||||
"\n",
|
||||
"# query = \"\"\"\n",
|
||||
"# SELECT \n",
|
||||
"# t.transaction_id,\n",
|
||||
"# t.transaction_date,\n",
|
||||
"# t.transaction_amount,\n",
|
||||
"# t.transaction_desc,\n",
|
||||
"# t.benef_account_number,\n",
|
||||
"\n",
|
||||
"# -- Account data\n",
|
||||
"# a.account_number,\n",
|
||||
"# a.customer_number AS acc_customer_number,\n",
|
||||
"# a.account_type,\n",
|
||||
"# a.branch_code,\n",
|
||||
"\n",
|
||||
"# -- Party data\n",
|
||||
"# p.customer_number AS party_customer_number,\n",
|
||||
"# p.customer_name,\n",
|
||||
"# p.date_of_birth,\n",
|
||||
"# p.nationality,\n",
|
||||
"# p.business_segment,\n",
|
||||
"# CASE\n",
|
||||
"# WHEN p.risk_classification = 1 THEN 'Low Risk'\n",
|
||||
"# WHEN p.risk_classification = 2 THEN 'Medium Risk'\n",
|
||||
"# WHEN p.risk_classification = 3 THEN 'High Risk'\n",
|
||||
"# ELSE 'Unknown Risk'\n",
|
||||
"# END AS risk_level,\n",
|
||||
"\n",
|
||||
"# -- Alert data\n",
|
||||
"# COALESCE(al.sar_flag, 'N') AS sar_flag\n",
|
||||
"\n",
|
||||
"# FROM {trans_data} t\n",
|
||||
"\n",
|
||||
"# -- Join with account data on beneficiary account\n",
|
||||
"# LEFT JOIN {acc_data} a\n",
|
||||
"# ON t.benef_account_number = a.account_number\n",
|
||||
"\n",
|
||||
"# -- Join with party/customer data using account's customer number\n",
|
||||
"# LEFT JOIN {cust_data} p\n",
|
||||
"# ON a.customer_number = p.customer_number\n",
|
||||
"\n",
|
||||
"# -- Join with alert data using party's customer number\n",
|
||||
"# LEFT JOIN {alert_data} al\n",
|
||||
"# ON p.customer_number = al.customer_number\n",
|
||||
"\n",
|
||||
"# WHERE a.account_number IS NOT NULL\n",
|
||||
"# limit 100\n",
|
||||
"# \"\"\"\n",
|
||||
"\n",
|
||||
"# from tms_data_interface import SQLQueryInterface\n",
|
||||
"\n",
|
||||
"# class Scenario:\n",
|
||||
"# seq = SQLQueryInterface(schema=\"transactionschema\")\n",
|
||||
"\n",
|
||||
"# def logic(self, **kwargs):\n",
|
||||
"# row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n",
|
||||
"# cust_data=\"customer_data_v1\",\n",
|
||||
"# acc_data=\"account_data_v1\",\n",
|
||||
"# alert_data=\"alert_data_v1\")\n",
|
||||
"# )\n",
|
||||
"# cols = [\n",
|
||||
"# \"transaction_id\",\n",
|
||||
"# \"transaction_date\",\n",
|
||||
"# \"transaction_amount\",\n",
|
||||
"# \"transaction_desc\",\n",
|
||||
"# \"benef_account_number\",\n",
|
||||
"# \"account_number\",\n",
|
||||
"# \"acc_customer_number\",\n",
|
||||
"# \"account_type\",\n",
|
||||
"# \"branch_code\",\n",
|
||||
"# \"party_customer_number\",\n",
|
||||
"# \"customer_name\",\n",
|
||||
"# \"date_of_birth\",\n",
|
||||
"# \"nationality\",\n",
|
||||
"# \"business_segment\",\n",
|
||||
"# \"risk_level\",\n",
|
||||
"# \"sar_flag\"\n",
|
||||
"# ]\n",
|
||||
"# df = pd.DataFrame(row_list, columns = cols)\n",
|
||||
"# return df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 8,
|
||||
"id": "1f20337b-8116-47e5-8743-1ba41e2df819",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -194,16 +136,20 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# sen = Scenario()\n",
|
||||
"# sen.logic()"
|
||||
"# a = sen.logic()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"id": "6de62b37-00d1-4c88-b27b-9a70e05add91",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"# a[a[\"SAR_FLAG\"] == \"Y\"]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
142
main.ipynb
142
main.ipynb
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 7,
|
||||
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -89,104 +89,46 @@
|
||||
" \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
|
||||
" df = pd.DataFrame(row_list, columns = cols)\n",
|
||||
" df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
"\n",
|
||||
" # Step 1: Compute 90th percentiles per Segment for all 3 fields\n",
|
||||
" percentiles = (\n",
|
||||
" df.groupby(\"Segment\")[[\"Cash_deposit_total\",\n",
|
||||
" \"Cash_deposit_count\"]]\n",
|
||||
" .quantile(0.98)\n",
|
||||
" .reset_index()\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Rename columns for clarity\n",
|
||||
" percentiles = percentiles.rename(columns={\n",
|
||||
" \"Cash_deposit_total\": \"P90_Credit\",\n",
|
||||
" \"Cash_deposit_count\": \"P90_Credit_count\"\n",
|
||||
" })\n",
|
||||
"\n",
|
||||
" # Step 2: Merge back to main df\n",
|
||||
" df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n",
|
||||
"\n",
|
||||
" # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n",
|
||||
" high_pop = (\n",
|
||||
" (df[\"Cash_deposit_total\"] > df[\"P90_Credit\"]) &\n",
|
||||
" (df[\"Cash_deposit_count\"] > df[\"P90_Credit_count\"])\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Step 4: Randomly select 0.1% sample from high-risk population\n",
|
||||
" sample_fraction = 0.1 # 0.1%\n",
|
||||
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
|
||||
"\n",
|
||||
" # Step 5: Set SAR_FLAG values\n",
|
||||
" df[\"SAR_FLAG\"] = \"N\" # default for all\n",
|
||||
" df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n",
|
||||
"\n",
|
||||
" return df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "b6c85de2-6a47-4109-8885-c138c289ec25",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import pandas as pd\n",
|
||||
"\n",
|
||||
"# query = \"\"\"\n",
|
||||
"# SELECT \n",
|
||||
"# t.transaction_id,\n",
|
||||
"# t.transaction_date,\n",
|
||||
"# t.transaction_amount,\n",
|
||||
"# t.transaction_desc,\n",
|
||||
"# t.benef_account_number,\n",
|
||||
"\n",
|
||||
"# -- Account data\n",
|
||||
"# a.account_number,\n",
|
||||
"# a.customer_number AS acc_customer_number,\n",
|
||||
"# a.account_type,\n",
|
||||
"# a.branch_code,\n",
|
||||
"\n",
|
||||
"# -- Party data\n",
|
||||
"# p.customer_number AS party_customer_number,\n",
|
||||
"# p.customer_name,\n",
|
||||
"# p.date_of_birth,\n",
|
||||
"# p.nationality,\n",
|
||||
"# p.business_segment,\n",
|
||||
"# CASE\n",
|
||||
"# WHEN p.risk_classification = 1 THEN 'Low Risk'\n",
|
||||
"# WHEN p.risk_classification = 2 THEN 'Medium Risk'\n",
|
||||
"# WHEN p.risk_classification = 3 THEN 'High Risk'\n",
|
||||
"# ELSE 'Unknown Risk'\n",
|
||||
"# END AS risk_level,\n",
|
||||
"\n",
|
||||
"# -- Alert data\n",
|
||||
"# COALESCE(al.sar_flag, 'N') AS sar_flag\n",
|
||||
"\n",
|
||||
"# FROM {trans_data} t\n",
|
||||
"\n",
|
||||
"# -- Join with account data on beneficiary account\n",
|
||||
"# LEFT JOIN {acc_data} a\n",
|
||||
"# ON t.benef_account_number = a.account_number\n",
|
||||
"\n",
|
||||
"# -- Join with party/customer data using account's customer number\n",
|
||||
"# LEFT JOIN {cust_data} p\n",
|
||||
"# ON a.customer_number = p.customer_number\n",
|
||||
"\n",
|
||||
"# -- Join with alert data using party's customer number\n",
|
||||
"# LEFT JOIN {alert_data} al\n",
|
||||
"# ON p.customer_number = al.customer_number\n",
|
||||
"\n",
|
||||
"# WHERE a.account_number IS NOT NULL\n",
|
||||
"# limit 100\n",
|
||||
"# \"\"\"\n",
|
||||
"\n",
|
||||
"# from tms_data_interface import SQLQueryInterface\n",
|
||||
"\n",
|
||||
"# class Scenario:\n",
|
||||
"# seq = SQLQueryInterface(schema=\"transactionschema\")\n",
|
||||
"\n",
|
||||
"# def logic(self, **kwargs):\n",
|
||||
"# row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n",
|
||||
"# cust_data=\"customer_data_v1\",\n",
|
||||
"# acc_data=\"account_data_v1\",\n",
|
||||
"# alert_data=\"alert_data_v1\")\n",
|
||||
"# )\n",
|
||||
"# cols = [\n",
|
||||
"# \"transaction_id\",\n",
|
||||
"# \"transaction_date\",\n",
|
||||
"# \"transaction_amount\",\n",
|
||||
"# \"transaction_desc\",\n",
|
||||
"# \"benef_account_number\",\n",
|
||||
"# \"account_number\",\n",
|
||||
"# \"acc_customer_number\",\n",
|
||||
"# \"account_type\",\n",
|
||||
"# \"branch_code\",\n",
|
||||
"# \"party_customer_number\",\n",
|
||||
"# \"customer_name\",\n",
|
||||
"# \"date_of_birth\",\n",
|
||||
"# \"nationality\",\n",
|
||||
"# \"business_segment\",\n",
|
||||
"# \"risk_level\",\n",
|
||||
"# \"sar_flag\"\n",
|
||||
"# ]\n",
|
||||
"# df = pd.DataFrame(row_list, columns = cols)\n",
|
||||
"# return df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 8,
|
||||
"id": "1f20337b-8116-47e5-8743-1ba41e2df819",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -194,16 +136,20 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# sen = Scenario()\n",
|
||||
"# sen.logic()"
|
||||
"# a = sen.logic()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 10,
|
||||
"id": "6de62b37-00d1-4c88-b27b-9a70e05add91",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"# a[a[\"SAR_FLAG\"] == \"Y\"]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
130
main.py
130
main.py
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[3]:
|
||||
# In[7]:
|
||||
|
||||
|
||||
import pandas as pd
|
||||
@ -84,104 +84,52 @@ class Scenario:
|
||||
"Segment", "Risk", "SAR_FLAG"]
|
||||
df = pd.DataFrame(row_list, columns = cols)
|
||||
df["Cash_deposit_total"] = df["Cash_deposit_total"].astype(float)
|
||||
|
||||
|
||||
|
||||
# Step 1: Compute 90th percentiles per Segment for all 3 fields
|
||||
percentiles = (
|
||||
df.groupby("Segment")[["Cash_deposit_total",
|
||||
"Cash_deposit_count"]]
|
||||
.quantile(0.98)
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
# Rename columns for clarity
|
||||
percentiles = percentiles.rename(columns={
|
||||
"Cash_deposit_total": "P90_Credit",
|
||||
"Cash_deposit_count": "P90_Credit_count"
|
||||
})
|
||||
|
||||
# Step 2: Merge back to main df
|
||||
df = df.merge(percentiles, on="Segment", how="left")
|
||||
|
||||
# Step 3: Identify customers above 90th percentile in ANY of the 3 metrics
|
||||
high_pop = (
|
||||
(df["Cash_deposit_total"] > df["P90_Credit"]) &
|
||||
(df["Cash_deposit_count"] > df["P90_Credit_count"])
|
||||
)
|
||||
|
||||
# Step 4: Randomly select 0.1% sample from high-risk population
|
||||
sample_fraction = 0.1 # 0.1%
|
||||
high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index
|
||||
|
||||
# Step 5: Set SAR_FLAG values
|
||||
df["SAR_FLAG"] = "N" # default for all
|
||||
df.loc[high_pop_indices, "SAR_FLAG"] = "Y" # assign Y to 0.1% random high-risk population
|
||||
|
||||
return df
|
||||
|
||||
|
||||
# In[6]:
|
||||
|
||||
|
||||
# import pandas as pd
|
||||
|
||||
# query = """
|
||||
# SELECT
|
||||
# t.transaction_id,
|
||||
# t.transaction_date,
|
||||
# t.transaction_amount,
|
||||
# t.transaction_desc,
|
||||
# t.benef_account_number,
|
||||
|
||||
# -- Account data
|
||||
# a.account_number,
|
||||
# a.customer_number AS acc_customer_number,
|
||||
# a.account_type,
|
||||
# a.branch_code,
|
||||
|
||||
# -- Party data
|
||||
# p.customer_number AS party_customer_number,
|
||||
# p.customer_name,
|
||||
# p.date_of_birth,
|
||||
# p.nationality,
|
||||
# p.business_segment,
|
||||
# CASE
|
||||
# WHEN p.risk_classification = 1 THEN 'Low Risk'
|
||||
# WHEN p.risk_classification = 2 THEN 'Medium Risk'
|
||||
# WHEN p.risk_classification = 3 THEN 'High Risk'
|
||||
# ELSE 'Unknown Risk'
|
||||
# END AS risk_level,
|
||||
|
||||
# -- Alert data
|
||||
# COALESCE(al.sar_flag, 'N') AS sar_flag
|
||||
|
||||
# FROM {trans_data} t
|
||||
|
||||
# -- Join with account data on beneficiary account
|
||||
# LEFT JOIN {acc_data} a
|
||||
# ON t.benef_account_number = a.account_number
|
||||
|
||||
# -- Join with party/customer data using account's customer number
|
||||
# LEFT JOIN {cust_data} p
|
||||
# ON a.customer_number = p.customer_number
|
||||
|
||||
# -- Join with alert data using party's customer number
|
||||
# LEFT JOIN {alert_data} al
|
||||
# ON p.customer_number = al.customer_number
|
||||
|
||||
# WHERE a.account_number IS NOT NULL
|
||||
# limit 100
|
||||
# """
|
||||
|
||||
# from tms_data_interface import SQLQueryInterface
|
||||
|
||||
# class Scenario:
|
||||
# seq = SQLQueryInterface(schema="transactionschema")
|
||||
|
||||
# def logic(self, **kwargs):
|
||||
# row_list = self.seq.execute_raw(query.format(trans_data="transaction10m",
|
||||
# cust_data="customer_data_v1",
|
||||
# acc_data="account_data_v1",
|
||||
# alert_data="alert_data_v1")
|
||||
# )
|
||||
# cols = [
|
||||
# "transaction_id",
|
||||
# "transaction_date",
|
||||
# "transaction_amount",
|
||||
# "transaction_desc",
|
||||
# "benef_account_number",
|
||||
# "account_number",
|
||||
# "acc_customer_number",
|
||||
# "account_type",
|
||||
# "branch_code",
|
||||
# "party_customer_number",
|
||||
# "customer_name",
|
||||
# "date_of_birth",
|
||||
# "nationality",
|
||||
# "business_segment",
|
||||
# "risk_level",
|
||||
# "sar_flag"
|
||||
# ]
|
||||
# df = pd.DataFrame(row_list, columns = cols)
|
||||
# return df
|
||||
|
||||
|
||||
# In[5]:
|
||||
# In[8]:
|
||||
|
||||
|
||||
# sen = Scenario()
|
||||
# sen.logic()
|
||||
# a = sen.logic()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
# In[10]:
|
||||
|
||||
|
||||
# a[a["SAR_FLAG"] == "Y"]
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user