generated from user_client2024/77
550 lines
21 KiB
Plaintext
550 lines
21 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"id": "2f9a4ca7-c066-4d93-9957-0d9145f9265d",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from tms_data_interface import SQLQueryInterface\n",
|
||
"seq = SQLQueryInterface(schema=\"transactionschema\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "fc212ace-ca7a-45f2-8137-f436c6123652",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[['account_data_v1'],\n",
|
||
" ['account_data_v2'],\n",
|
||
" ['alert_data_v1'],\n",
|
||
" ['alert_data_v2'],\n",
|
||
" ['customer_data_v1'],\n",
|
||
" ['customer_data_v2'],\n",
|
||
" ['transaction10m'],\n",
|
||
" ['transaction60m']]"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"seq.execute_raw(\"show tables\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"id": "134d0b3d-5481-4975-af07-c80ab09d6dd2",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"query = \"\"\"\n",
|
||
" select final.CUSTOMER_NUMBER_main as Focal_id,\n",
|
||
" final.Credit_transaction_amount,\n",
|
||
" final.Total_no_of_credit_transactions,\n",
|
||
" final.Debit_transaction_amount,\n",
|
||
" final.Total_no_of_debit_transactions,\n",
|
||
" final.Wash_Ratio,\n",
|
||
" final.SEGMENT,\n",
|
||
" final.RISK,\n",
|
||
" final.SAR_FLAG\n",
|
||
" from \n",
|
||
" (\n",
|
||
" (\n",
|
||
" select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n",
|
||
" subquery.Credit_transaction_amount,\n",
|
||
" subquery.Total_no_of_credit_transactions,\n",
|
||
" case\n",
|
||
" when subquery.Debit_transaction_amount is NULL then 0\n",
|
||
" else Debit_transaction_amount\n",
|
||
" end as Debit_transaction_amount,\n",
|
||
" case\n",
|
||
" when subquery.Total_no_of_debit_transactions is NULL then 0\n",
|
||
" else Total_no_of_debit_transactions\n",
|
||
" end as Total_no_of_debit_transactions,\n",
|
||
" case\n",
|
||
" when subquery.Debit_transaction_amount = 0\n",
|
||
" or subquery.Debit_transaction_amount is NULL then 0\n",
|
||
" else subquery.Credit_transaction_amount / subquery.Debit_transaction_amount\n",
|
||
" end as Wash_Ratio\n",
|
||
" from \n",
|
||
" (\n",
|
||
" (\n",
|
||
" select customer_number as CUSTOMER_NUMBER_1, \n",
|
||
" sum(transaction_amount) as Credit_transaction_amount, \n",
|
||
" count(*) as Total_no_of_credit_transactions\n",
|
||
" from \n",
|
||
" (\n",
|
||
" select * \n",
|
||
" from {trans_data} as trans_table left join {acc_data} as acc_table\n",
|
||
" on trans_table.benef_account_number = acc_table.account_number\n",
|
||
" )\n",
|
||
" where account_number not in ('None')\n",
|
||
" group by 1\n",
|
||
" ) credit left join\n",
|
||
" (\n",
|
||
" select customer_number as CUSTOMER_NUMBER_2, \n",
|
||
" sum(transaction_amount) as Debit_transaction_amount, \n",
|
||
" count(*) as Total_no_of_debit_transactions\n",
|
||
" from \n",
|
||
" (\n",
|
||
" select * \n",
|
||
" from {trans_data} as trans_table left join {acc_data} as acc_table\n",
|
||
" on trans_table.orig_account_number = acc_table.account_number\n",
|
||
" )\n",
|
||
" where account_number not in ('None')\n",
|
||
" group by 1\n",
|
||
" ) debit on credit.CUSTOMER_NUMBER_1 = debit.CUSTOMER_NUMBER_2 \n",
|
||
" ) subquery\n",
|
||
" ) main left join \n",
|
||
" (\n",
|
||
" select subquery.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n",
|
||
" subquery.SEGMENT,\n",
|
||
" subquery.RISK,\n",
|
||
" case\n",
|
||
" when subquery.SAR_FLAG is NULL then 'N'\n",
|
||
" else subquery.SAR_FLAG\n",
|
||
" end as SAR_FLAG \n",
|
||
" from\n",
|
||
" (\n",
|
||
" (\n",
|
||
" select customer_number as CUSTOMER_NUMBER_3, \n",
|
||
" business_segment as SEGMENT,\n",
|
||
" case\n",
|
||
" when RISK_CLASSIFICATION = 1 then 'Low Risk'\n",
|
||
" when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n",
|
||
" when RISK_CLASSIFICATION = 3 then 'High Risk'\n",
|
||
" else 'Unknown Risk'\n",
|
||
" end AS RISK\n",
|
||
" from {cust_data}\n",
|
||
" ) cd left join\n",
|
||
" (\n",
|
||
" select customer_number as CUSTOMER_NUMBER_4, \n",
|
||
" sar_flag as SAR_FLAG\n",
|
||
" from {alert_data}\n",
|
||
" ) ad on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n",
|
||
" ) subquery\n",
|
||
" ) cust_alert on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n",
|
||
" ) final\n",
|
||
"\"\"\""
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 84,
|
||
"id": "d220561a-34c9-48d2-8e2f-5d174a87540b",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from tms_data_interface import SQLQueryInterface\n",
|
||
"\n",
|
||
"class Scenario:\n",
|
||
" seq = SQLQueryInterface(schema=\"transactionschema\")\n",
|
||
"\n",
|
||
" def logic(self, **kwargs):\n",
|
||
" row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n",
|
||
" cust_data=\"customer_data_v1\",\n",
|
||
" acc_data=\"account_data_v1\",\n",
|
||
" alert_data=\"alert_data_v1\")\n",
|
||
" )\n",
|
||
" cols = [\"Focal_id\", \"Credit_transaction_amount\",\n",
|
||
" \"Total_no_of_credit_transactions\",\n",
|
||
" \"Debit_transaction_amount\", \"Total_no_of_debit_transactions\",\n",
|
||
" \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
|
||
" df = pd.DataFrame(row_list, columns = cols)\n",
|
||
" df[[\"Credit_transaction_amount\",\n",
|
||
" \"Debit_transaction_amount\"]] = df[[\"Credit_transaction_amount\",\n",
|
||
" \"Debit_transaction_amount\"]].astype('int')\n",
|
||
" df[\"Wash_Ratio\"] = df[\"Wash_Ratio\"].astype('float')\n",
|
||
" # Step 1: Compute 90th percentiles per Segment for all 3 fields\n",
|
||
" percentiles = (\n",
|
||
" df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n",
|
||
" \"Debit_transaction_amount\",\n",
|
||
" \"Wash_Ratio\"]]\n",
|
||
" .quantile(0.90)\n",
|
||
" .reset_index()\n",
|
||
" )\n",
|
||
"\n",
|
||
" # Rename columns for clarity\n",
|
||
" percentiles = percentiles.rename(columns={\n",
|
||
" \"Credit_transaction_amount\": \"P90_Credit\",\n",
|
||
" \"Debit_transaction_amount\": \"P90_Debit\",\n",
|
||
" \"Wash_Ratio\": \"P90_Wash\"\n",
|
||
" })\n",
|
||
"\n",
|
||
" # Step 2: Merge back to main df\n",
|
||
" df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n",
|
||
"\n",
|
||
" # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n",
|
||
" high_pop = (\n",
|
||
" # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n",
|
||
" (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n",
|
||
" (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n",
|
||
" )\n",
|
||
"\n",
|
||
" # Step 4: Randomly select 0.1% sample from high-risk population\n",
|
||
" sample_fraction = 0.3 # 0.1%\n",
|
||
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
|
||
"\n",
|
||
" # Step 5: Set SAR_FLAG values\n",
|
||
" df[\"SAR_FLAG\"] = \"N\" # default for all\n",
|
||
" df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n",
|
||
" return df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 85,
|
||
"id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# sen = Scenario()\n",
|
||
"# a = sen.logic()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 86,
|
||
"id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Focal_id</th>\n",
|
||
" <th>Credit_transaction_amount</th>\n",
|
||
" <th>Total_no_of_credit_transactions</th>\n",
|
||
" <th>Debit_transaction_amount</th>\n",
|
||
" <th>Total_no_of_debit_transactions</th>\n",
|
||
" <th>Wash_Ratio</th>\n",
|
||
" <th>Segment</th>\n",
|
||
" <th>Risk</th>\n",
|
||
" <th>SAR_FLAG</th>\n",
|
||
" <th>P90_Credit</th>\n",
|
||
" <th>P90_Debit</th>\n",
|
||
" <th>P90_Wash</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>PN489144</td>\n",
|
||
" <td>2830802741</td>\n",
|
||
" <td>2060</td>\n",
|
||
" <td>2847556186</td>\n",
|
||
" <td>1976</td>\n",
|
||
" <td>0.994117</td>\n",
|
||
" <td>Whole Sale Banking</td>\n",
|
||
" <td>Low Risk</td>\n",
|
||
" <td>N</td>\n",
|
||
" <td>4.400246e+09</td>\n",
|
||
" <td>4.332448e+09</td>\n",
|
||
" <td>1.058020</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>PN394780</td>\n",
|
||
" <td>2872685364</td>\n",
|
||
" <td>2029</td>\n",
|
||
" <td>2743931855</td>\n",
|
||
" <td>1999</td>\n",
|
||
" <td>1.046923</td>\n",
|
||
" <td>Whole Sale Banking</td>\n",
|
||
" <td>Low Risk</td>\n",
|
||
" <td>N</td>\n",
|
||
" <td>4.400246e+09</td>\n",
|
||
" <td>4.332448e+09</td>\n",
|
||
" <td>1.058020</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>PN195722</td>\n",
|
||
" <td>5604208368</td>\n",
|
||
" <td>3937</td>\n",
|
||
" <td>5557946505</td>\n",
|
||
" <td>4039</td>\n",
|
||
" <td>1.008324</td>\n",
|
||
" <td>SME</td>\n",
|
||
" <td>Low Risk</td>\n",
|
||
" <td>N</td>\n",
|
||
" <td>4.532321e+09</td>\n",
|
||
" <td>4.534860e+09</td>\n",
|
||
" <td>1.062759</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>PN652566</td>\n",
|
||
" <td>1630905248</td>\n",
|
||
" <td>1152</td>\n",
|
||
" <td>1686713614</td>\n",
|
||
" <td>1169</td>\n",
|
||
" <td>0.966913</td>\n",
|
||
" <td>Whole Sale Banking</td>\n",
|
||
" <td>Low Risk</td>\n",
|
||
" <td>N</td>\n",
|
||
" <td>4.400246e+09</td>\n",
|
||
" <td>4.332448e+09</td>\n",
|
||
" <td>1.058020</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>PN181960</td>\n",
|
||
" <td>2157634332</td>\n",
|
||
" <td>1613</td>\n",
|
||
" <td>2039953312</td>\n",
|
||
" <td>1552</td>\n",
|
||
" <td>1.057688</td>\n",
|
||
" <td>Corporate Banking</td>\n",
|
||
" <td>Low Risk</td>\n",
|
||
" <td>N</td>\n",
|
||
" <td>5.021582e+09</td>\n",
|
||
" <td>5.003501e+09</td>\n",
|
||
" <td>1.063161</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10009</th>\n",
|
||
" <td>PN479491</td>\n",
|
||
" <td>31124877</td>\n",
|
||
" <td>246</td>\n",
|
||
" <td>23590191</td>\n",
|
||
" <td>357</td>\n",
|
||
" <td>1.319399</td>\n",
|
||
" <td>Private Banking</td>\n",
|
||
" <td>Low Risk</td>\n",
|
||
" <td>N</td>\n",
|
||
" <td>4.461828e+07</td>\n",
|
||
" <td>3.176446e+07</td>\n",
|
||
" <td>1.760285</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10010</th>\n",
|
||
" <td>PN267550</td>\n",
|
||
" <td>36558708</td>\n",
|
||
" <td>260</td>\n",
|
||
" <td>27361057</td>\n",
|
||
" <td>366</td>\n",
|
||
" <td>1.336158</td>\n",
|
||
" <td>Priority Banking</td>\n",
|
||
" <td>Low Risk</td>\n",
|
||
" <td>N</td>\n",
|
||
" <td>4.410392e+07</td>\n",
|
||
" <td>3.076443e+07</td>\n",
|
||
" <td>1.729168</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10011</th>\n",
|
||
" <td>PN293003</td>\n",
|
||
" <td>33990478</td>\n",
|
||
" <td>255</td>\n",
|
||
" <td>24465835</td>\n",
|
||
" <td>323</td>\n",
|
||
" <td>1.389304</td>\n",
|
||
" <td>Others</td>\n",
|
||
" <td>Low Risk</td>\n",
|
||
" <td>N</td>\n",
|
||
" <td>6.334963e+07</td>\n",
|
||
" <td>4.223903e+07</td>\n",
|
||
" <td>1.740112</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10012</th>\n",
|
||
" <td>PN534105</td>\n",
|
||
" <td>39934813</td>\n",
|
||
" <td>278</td>\n",
|
||
" <td>28247858</td>\n",
|
||
" <td>403</td>\n",
|
||
" <td>1.413729</td>\n",
|
||
" <td>Others</td>\n",
|
||
" <td>High Risk</td>\n",
|
||
" <td>N</td>\n",
|
||
" <td>6.334963e+07</td>\n",
|
||
" <td>4.223903e+07</td>\n",
|
||
" <td>1.740112</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10013</th>\n",
|
||
" <td>PN390430</td>\n",
|
||
" <td>36894062</td>\n",
|
||
" <td>257</td>\n",
|
||
" <td>29162252</td>\n",
|
||
" <td>371</td>\n",
|
||
" <td>1.265131</td>\n",
|
||
" <td>Private Banking</td>\n",
|
||
" <td>Low Risk</td>\n",
|
||
" <td>N</td>\n",
|
||
" <td>4.461828e+07</td>\n",
|
||
" <td>3.176446e+07</td>\n",
|
||
" <td>1.760285</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>10014 rows × 12 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n",
|
||
"0 PN489144 2830802741 2060 \n",
|
||
"1 PN394780 2872685364 2029 \n",
|
||
"2 PN195722 5604208368 3937 \n",
|
||
"3 PN652566 1630905248 1152 \n",
|
||
"4 PN181960 2157634332 1613 \n",
|
||
"... ... ... ... \n",
|
||
"10009 PN479491 31124877 246 \n",
|
||
"10010 PN267550 36558708 260 \n",
|
||
"10011 PN293003 33990478 255 \n",
|
||
"10012 PN534105 39934813 278 \n",
|
||
"10013 PN390430 36894062 257 \n",
|
||
"\n",
|
||
" Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n",
|
||
"0 2847556186 1976 0.994117 \n",
|
||
"1 2743931855 1999 1.046923 \n",
|
||
"2 5557946505 4039 1.008324 \n",
|
||
"3 1686713614 1169 0.966913 \n",
|
||
"4 2039953312 1552 1.057688 \n",
|
||
"... ... ... ... \n",
|
||
"10009 23590191 357 1.319399 \n",
|
||
"10010 27361057 366 1.336158 \n",
|
||
"10011 24465835 323 1.389304 \n",
|
||
"10012 28247858 403 1.413729 \n",
|
||
"10013 29162252 371 1.265131 \n",
|
||
"\n",
|
||
" Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n",
|
||
"0 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
|
||
"1 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
|
||
"2 SME Low Risk N 4.532321e+09 4.534860e+09 \n",
|
||
"3 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
|
||
"4 Corporate Banking Low Risk N 5.021582e+09 5.003501e+09 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"10009 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n",
|
||
"10010 Priority Banking Low Risk N 4.410392e+07 3.076443e+07 \n",
|
||
"10011 Others Low Risk N 6.334963e+07 4.223903e+07 \n",
|
||
"10012 Others High Risk N 6.334963e+07 4.223903e+07 \n",
|
||
"10013 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n",
|
||
"\n",
|
||
" P90_Wash \n",
|
||
"0 1.058020 \n",
|
||
"1 1.058020 \n",
|
||
"2 1.062759 \n",
|
||
"3 1.058020 \n",
|
||
"4 1.063161 \n",
|
||
"... ... \n",
|
||
"10009 1.760285 \n",
|
||
"10010 1.729168 \n",
|
||
"10011 1.740112 \n",
|
||
"10012 1.740112 \n",
|
||
"10013 1.760285 \n",
|
||
"\n",
|
||
"[10014 rows x 12 columns]"
|
||
]
|
||
},
|
||
"execution_count": 86,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# a"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 88,
|
||
"id": "150bb5ce-6be1-44fc-a606-6d375354626d",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# a[a[\"SAR_FLAG\"] == \"Y\"]\n"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.8"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|