201/.ipynb_checkpoints/main-checkpoint.ipynb
2025-11-27 07:09:21 +00:00

550 lines
21 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 53,
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "2f9a4ca7-c066-4d93-9957-0d9145f9265d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from tms_data_interface import SQLQueryInterface\n",
"seq = SQLQueryInterface(schema=\"transactionschema\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "fc212ace-ca7a-45f2-8137-f436c6123652",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[['account_data_v1'],\n",
" ['account_data_v2'],\n",
" ['alert_data_v1'],\n",
" ['alert_data_v2'],\n",
" ['customer_data_v1'],\n",
" ['customer_data_v2'],\n",
" ['transaction10m'],\n",
" ['transaction60m']]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"seq.execute_raw(\"show tables\")"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "134d0b3d-5481-4975-af07-c80ab09d6dd2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"query = \"\"\"\n",
" select final.CUSTOMER_NUMBER_main as Focal_id,\n",
" final.Credit_transaction_amount,\n",
" final.Total_no_of_credit_transactions,\n",
" final.Debit_transaction_amount,\n",
" final.Total_no_of_debit_transactions,\n",
" final.Wash_Ratio,\n",
" final.SEGMENT,\n",
" final.RISK,\n",
" final.SAR_FLAG\n",
" from \n",
" (\n",
" (\n",
" select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n",
" subquery.Credit_transaction_amount,\n",
" subquery.Total_no_of_credit_transactions,\n",
" case\n",
" when subquery.Debit_transaction_amount is NULL then 0\n",
" else Debit_transaction_amount\n",
" end as Debit_transaction_amount,\n",
" case\n",
" when subquery.Total_no_of_debit_transactions is NULL then 0\n",
" else Total_no_of_debit_transactions\n",
" end as Total_no_of_debit_transactions,\n",
" case\n",
" when subquery.Debit_transaction_amount = 0\n",
" or subquery.Debit_transaction_amount is NULL then 0\n",
" else subquery.Credit_transaction_amount / subquery.Debit_transaction_amount\n",
" end as Wash_Ratio\n",
" from \n",
" (\n",
" (\n",
" select customer_number as CUSTOMER_NUMBER_1, \n",
" sum(transaction_amount) as Credit_transaction_amount, \n",
" count(*) as Total_no_of_credit_transactions\n",
" from \n",
" (\n",
" select * \n",
" from {trans_data} as trans_table left join {acc_data} as acc_table\n",
" on trans_table.benef_account_number = acc_table.account_number\n",
" )\n",
" where account_number not in ('None')\n",
" group by 1\n",
" ) credit left join\n",
" (\n",
" select customer_number as CUSTOMER_NUMBER_2, \n",
" sum(transaction_amount) as Debit_transaction_amount, \n",
" count(*) as Total_no_of_debit_transactions\n",
" from \n",
" (\n",
" select * \n",
" from {trans_data} as trans_table left join {acc_data} as acc_table\n",
" on trans_table.orig_account_number = acc_table.account_number\n",
" )\n",
" where account_number not in ('None')\n",
" group by 1\n",
" ) debit on credit.CUSTOMER_NUMBER_1 = debit.CUSTOMER_NUMBER_2 \n",
" ) subquery\n",
" ) main left join \n",
" (\n",
" select subquery.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n",
" subquery.SEGMENT,\n",
" subquery.RISK,\n",
" case\n",
" when subquery.SAR_FLAG is NULL then 'N'\n",
" else subquery.SAR_FLAG\n",
" end as SAR_FLAG \n",
" from\n",
" (\n",
" (\n",
" select customer_number as CUSTOMER_NUMBER_3, \n",
" business_segment as SEGMENT,\n",
" case\n",
" when RISK_CLASSIFICATION = 1 then 'Low Risk'\n",
" when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n",
" when RISK_CLASSIFICATION = 3 then 'High Risk'\n",
" else 'Unknown Risk'\n",
" end AS RISK\n",
" from {cust_data}\n",
" ) cd left join\n",
" (\n",
" select customer_number as CUSTOMER_NUMBER_4, \n",
" sar_flag as SAR_FLAG\n",
" from {alert_data}\n",
" ) ad on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n",
" ) subquery\n",
" ) cust_alert on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n",
" ) final\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "d220561a-34c9-48d2-8e2f-5d174a87540b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from tms_data_interface import SQLQueryInterface\n",
"\n",
"class Scenario:\n",
" seq = SQLQueryInterface(schema=\"transactionschema\")\n",
"\n",
" def logic(self, **kwargs):\n",
" row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n",
" cust_data=\"customer_data_v1\",\n",
" acc_data=\"account_data_v1\",\n",
" alert_data=\"alert_data_v1\")\n",
" )\n",
" cols = [\"Focal_id\", \"Credit_transaction_amount\",\n",
" \"Total_no_of_credit_transactions\",\n",
" \"Debit_transaction_amount\", \"Total_no_of_debit_transactions\",\n",
" \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
" df = pd.DataFrame(row_list, columns = cols)\n",
" df[[\"Credit_transaction_amount\",\n",
" \"Debit_transaction_amount\"]] = df[[\"Credit_transaction_amount\",\n",
" \"Debit_transaction_amount\"]].astype('int')\n",
" df[\"Wash_Ratio\"] = df[\"Wash_Ratio\"].astype('float')\n",
" # Step 1: Compute 90th percentiles per Segment for all 3 fields\n",
" percentiles = (\n",
" df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n",
" \"Debit_transaction_amount\",\n",
" \"Wash_Ratio\"]]\n",
" .quantile(0.90)\n",
" .reset_index()\n",
" )\n",
"\n",
" # Rename columns for clarity\n",
" percentiles = percentiles.rename(columns={\n",
" \"Credit_transaction_amount\": \"P90_Credit\",\n",
" \"Debit_transaction_amount\": \"P90_Debit\",\n",
" \"Wash_Ratio\": \"P90_Wash\"\n",
" })\n",
"\n",
" # Step 2: Merge back to main df\n",
" df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n",
"\n",
" # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n",
" high_pop = (\n",
" # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n",
" (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n",
" (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n",
" )\n",
"\n",
" # Step 4: Randomly select 0.1% sample from high-risk population\n",
" sample_fraction = 0.3 # 0.1%\n",
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
"\n",
" # Step 5: Set SAR_FLAG values\n",
" df[\"SAR_FLAG\"] = \"N\" # default for all\n",
" df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# sen = Scenario()\n",
"# a = sen.logic()"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Focal_id</th>\n",
" <th>Credit_transaction_amount</th>\n",
" <th>Total_no_of_credit_transactions</th>\n",
" <th>Debit_transaction_amount</th>\n",
" <th>Total_no_of_debit_transactions</th>\n",
" <th>Wash_Ratio</th>\n",
" <th>Segment</th>\n",
" <th>Risk</th>\n",
" <th>SAR_FLAG</th>\n",
" <th>P90_Credit</th>\n",
" <th>P90_Debit</th>\n",
" <th>P90_Wash</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>PN489144</td>\n",
" <td>2830802741</td>\n",
" <td>2060</td>\n",
" <td>2847556186</td>\n",
" <td>1976</td>\n",
" <td>0.994117</td>\n",
" <td>Whole Sale Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.400246e+09</td>\n",
" <td>4.332448e+09</td>\n",
" <td>1.058020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>PN394780</td>\n",
" <td>2872685364</td>\n",
" <td>2029</td>\n",
" <td>2743931855</td>\n",
" <td>1999</td>\n",
" <td>1.046923</td>\n",
" <td>Whole Sale Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.400246e+09</td>\n",
" <td>4.332448e+09</td>\n",
" <td>1.058020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>PN195722</td>\n",
" <td>5604208368</td>\n",
" <td>3937</td>\n",
" <td>5557946505</td>\n",
" <td>4039</td>\n",
" <td>1.008324</td>\n",
" <td>SME</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.532321e+09</td>\n",
" <td>4.534860e+09</td>\n",
" <td>1.062759</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>PN652566</td>\n",
" <td>1630905248</td>\n",
" <td>1152</td>\n",
" <td>1686713614</td>\n",
" <td>1169</td>\n",
" <td>0.966913</td>\n",
" <td>Whole Sale Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.400246e+09</td>\n",
" <td>4.332448e+09</td>\n",
" <td>1.058020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>PN181960</td>\n",
" <td>2157634332</td>\n",
" <td>1613</td>\n",
" <td>2039953312</td>\n",
" <td>1552</td>\n",
" <td>1.057688</td>\n",
" <td>Corporate Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>5.021582e+09</td>\n",
" <td>5.003501e+09</td>\n",
" <td>1.063161</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10009</th>\n",
" <td>PN479491</td>\n",
" <td>31124877</td>\n",
" <td>246</td>\n",
" <td>23590191</td>\n",
" <td>357</td>\n",
" <td>1.319399</td>\n",
" <td>Private Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.461828e+07</td>\n",
" <td>3.176446e+07</td>\n",
" <td>1.760285</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10010</th>\n",
" <td>PN267550</td>\n",
" <td>36558708</td>\n",
" <td>260</td>\n",
" <td>27361057</td>\n",
" <td>366</td>\n",
" <td>1.336158</td>\n",
" <td>Priority Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.410392e+07</td>\n",
" <td>3.076443e+07</td>\n",
" <td>1.729168</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10011</th>\n",
" <td>PN293003</td>\n",
" <td>33990478</td>\n",
" <td>255</td>\n",
" <td>24465835</td>\n",
" <td>323</td>\n",
" <td>1.389304</td>\n",
" <td>Others</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>6.334963e+07</td>\n",
" <td>4.223903e+07</td>\n",
" <td>1.740112</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10012</th>\n",
" <td>PN534105</td>\n",
" <td>39934813</td>\n",
" <td>278</td>\n",
" <td>28247858</td>\n",
" <td>403</td>\n",
" <td>1.413729</td>\n",
" <td>Others</td>\n",
" <td>High Risk</td>\n",
" <td>N</td>\n",
" <td>6.334963e+07</td>\n",
" <td>4.223903e+07</td>\n",
" <td>1.740112</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10013</th>\n",
" <td>PN390430</td>\n",
" <td>36894062</td>\n",
" <td>257</td>\n",
" <td>29162252</td>\n",
" <td>371</td>\n",
" <td>1.265131</td>\n",
" <td>Private Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.461828e+07</td>\n",
" <td>3.176446e+07</td>\n",
" <td>1.760285</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10014 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n",
"0 PN489144 2830802741 2060 \n",
"1 PN394780 2872685364 2029 \n",
"2 PN195722 5604208368 3937 \n",
"3 PN652566 1630905248 1152 \n",
"4 PN181960 2157634332 1613 \n",
"... ... ... ... \n",
"10009 PN479491 31124877 246 \n",
"10010 PN267550 36558708 260 \n",
"10011 PN293003 33990478 255 \n",
"10012 PN534105 39934813 278 \n",
"10013 PN390430 36894062 257 \n",
"\n",
" Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n",
"0 2847556186 1976 0.994117 \n",
"1 2743931855 1999 1.046923 \n",
"2 5557946505 4039 1.008324 \n",
"3 1686713614 1169 0.966913 \n",
"4 2039953312 1552 1.057688 \n",
"... ... ... ... \n",
"10009 23590191 357 1.319399 \n",
"10010 27361057 366 1.336158 \n",
"10011 24465835 323 1.389304 \n",
"10012 28247858 403 1.413729 \n",
"10013 29162252 371 1.265131 \n",
"\n",
" Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n",
"0 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
"1 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
"2 SME Low Risk N 4.532321e+09 4.534860e+09 \n",
"3 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
"4 Corporate Banking Low Risk N 5.021582e+09 5.003501e+09 \n",
"... ... ... ... ... ... \n",
"10009 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n",
"10010 Priority Banking Low Risk N 4.410392e+07 3.076443e+07 \n",
"10011 Others Low Risk N 6.334963e+07 4.223903e+07 \n",
"10012 Others High Risk N 6.334963e+07 4.223903e+07 \n",
"10013 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n",
"\n",
" P90_Wash \n",
"0 1.058020 \n",
"1 1.058020 \n",
"2 1.062759 \n",
"3 1.058020 \n",
"4 1.063161 \n",
"... ... \n",
"10009 1.760285 \n",
"10010 1.729168 \n",
"10011 1.740112 \n",
"10012 1.740112 \n",
"10013 1.760285 \n",
"\n",
"[10014 rows x 12 columns]"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# a"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "150bb5ce-6be1-44fc-a606-6d375354626d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# a[a[\"SAR_FLAG\"] == \"Y\"]\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}