{ "cells": [ { "cell_type": "code", "execution_count": 53, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 43, "id": "2f9a4ca7-c066-4d93-9957-0d9145f9265d", "metadata": { "tags": [] }, "outputs": [], "source": [ "from tms_data_interface import SQLQueryInterface\n", "seq = SQLQueryInterface(schema=\"transactionschema\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "fc212ace-ca7a-45f2-8137-f436c6123652", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "[['account_data_v1'],\n", " ['account_data_v2'],\n", " ['alert_data_v1'],\n", " ['alert_data_v2'],\n", " ['customer_data_v1'],\n", " ['customer_data_v2'],\n", " ['transaction10m'],\n", " ['transaction60m']]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seq.execute_raw(\"show tables\")" ] }, { "cell_type": "code", "execution_count": 54, "id": "134d0b3d-5481-4975-af07-c80ab09d6dd2", "metadata": { "tags": [] }, "outputs": [], "source": [ "query = \"\"\"\n", " select final.CUSTOMER_NUMBER_main as Focal_id,\n", " final.Credit_transaction_amount,\n", " final.Total_no_of_credit_transactions,\n", " final.Debit_transaction_amount,\n", " final.Total_no_of_debit_transactions,\n", " final.Wash_Ratio,\n", " final.SEGMENT,\n", " final.RISK,\n", " final.SAR_FLAG\n", " from \n", " (\n", " (\n", " select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n", " subquery.Credit_transaction_amount,\n", " subquery.Total_no_of_credit_transactions,\n", " case\n", " when subquery.Debit_transaction_amount is NULL then 0\n", " else Debit_transaction_amount\n", " end as Debit_transaction_amount,\n", " case\n", " when subquery.Total_no_of_debit_transactions is NULL then 0\n", " else Total_no_of_debit_transactions\n", " end as Total_no_of_debit_transactions,\n", " case\n", " when subquery.Debit_transaction_amount = 0\n", " or subquery.Debit_transaction_amount is NULL then 0\n", " else subquery.Credit_transaction_amount / subquery.Debit_transaction_amount\n", " end as Wash_Ratio\n", " from \n", " (\n", " (\n", " select customer_number as CUSTOMER_NUMBER_1, \n", " sum(transaction_amount) as Credit_transaction_amount, \n", " count(*) as Total_no_of_credit_transactions\n", " from \n", " (\n", " select * \n", " from {trans_data} as trans_table left join {acc_data} as acc_table\n", " on trans_table.benef_account_number = acc_table.account_number\n", " )\n", " where account_number not in ('None')\n", " group by 1\n", " ) credit left join\n", " (\n", " select customer_number as CUSTOMER_NUMBER_2, \n", " sum(transaction_amount) as Debit_transaction_amount, \n", " count(*) as Total_no_of_debit_transactions\n", " from \n", " (\n", " select * \n", " from {trans_data} as trans_table left join {acc_data} as acc_table\n", " on trans_table.orig_account_number = acc_table.account_number\n", " )\n", " where account_number not in ('None')\n", " group by 1\n", " ) debit on credit.CUSTOMER_NUMBER_1 = debit.CUSTOMER_NUMBER_2 \n", " ) subquery\n", " ) main left join \n", " (\n", " select subquery.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n", " subquery.SEGMENT,\n", " subquery.RISK,\n", " case\n", " when subquery.SAR_FLAG is NULL then 'N'\n", " else subquery.SAR_FLAG\n", " end as SAR_FLAG \n", " from\n", " (\n", " (\n", " select customer_number as CUSTOMER_NUMBER_3, \n", " business_segment as SEGMENT,\n", " case\n", " when RISK_CLASSIFICATION = 1 then 'Low Risk'\n", " when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n", " when RISK_CLASSIFICATION = 3 then 'High Risk'\n", " else 'Unknown Risk'\n", " end AS RISK\n", " from {cust_data}\n", " ) cd left join\n", " (\n", " select customer_number as CUSTOMER_NUMBER_4, \n", " sar_flag as SAR_FLAG\n", " from {alert_data}\n", " ) ad on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n", " ) subquery\n", " ) cust_alert on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n", " ) final\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 84, "id": "d220561a-34c9-48d2-8e2f-5d174a87540b", "metadata": { "tags": [] }, "outputs": [], "source": [ "from tms_data_interface import SQLQueryInterface\n", "\n", "class Scenario:\n", " seq = SQLQueryInterface(schema=\"transactionschema\")\n", "\n", " def logic(self, **kwargs):\n", " row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", " cust_data=\"customer_data_v1\",\n", " acc_data=\"account_data_v1\",\n", " alert_data=\"alert_data_v1\")\n", " )\n", " cols = [\"Focal_id\", \"Credit_transaction_amount\",\n", " \"Total_no_of_credit_transactions\",\n", " \"Debit_transaction_amount\", \"Total_no_of_debit_transactions\",\n", " \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " df[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\"]] = df[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\"]].astype('int')\n", " df[\"Wash_Ratio\"] = df[\"Wash_Ratio\"].astype('float')\n", " # Step 1: Compute 90th percentiles per Segment for all 3 fields\n", " percentiles = (\n", " df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\",\n", " \"Wash_Ratio\"]]\n", " .quantile(0.90)\n", " .reset_index()\n", " )\n", "\n", " # Rename columns for clarity\n", " percentiles = percentiles.rename(columns={\n", " \"Credit_transaction_amount\": \"P90_Credit\",\n", " \"Debit_transaction_amount\": \"P90_Debit\",\n", " \"Wash_Ratio\": \"P90_Wash\"\n", " })\n", "\n", " # Step 2: Merge back to main df\n", " df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n", "\n", " # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n", " high_pop = (\n", " # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n", " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n", " (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n", " )\n", "\n", " # Step 4: Randomly select 0.1% sample from high-risk population\n", " sample_fraction = 0.3 # 0.1%\n", " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", "\n", " # Step 5: Set SAR_FLAG values\n", " df[\"SAR_FLAG\"] = \"N\" # default for all\n", " df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n", " return df" ] }, { "cell_type": "code", "execution_count": 85, "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a", "metadata": { "tags": [] }, "outputs": [], "source": [ "# sen = Scenario()\n", "# a = sen.logic()" ] }, { "cell_type": "code", "execution_count": 86, "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Focal_idCredit_transaction_amountTotal_no_of_credit_transactionsDebit_transaction_amountTotal_no_of_debit_transactionsWash_RatioSegmentRiskSAR_FLAGP90_CreditP90_DebitP90_Wash
0PN48914428308027412060284755618619760.994117Whole Sale BankingLow RiskN4.400246e+094.332448e+091.058020
1PN39478028726853642029274393185519991.046923Whole Sale BankingLow RiskN4.400246e+094.332448e+091.058020
2PN19572256042083683937555794650540391.008324SMELow RiskN4.532321e+094.534860e+091.062759
3PN65256616309052481152168671361411690.966913Whole Sale BankingLow RiskN4.400246e+094.332448e+091.058020
4PN18196021576343321613203995331215521.057688Corporate BankingLow RiskN5.021582e+095.003501e+091.063161
.......................................
10009PN47949131124877246235901913571.319399Private BankingLow RiskN4.461828e+073.176446e+071.760285
10010PN26755036558708260273610573661.336158Priority BankingLow RiskN4.410392e+073.076443e+071.729168
10011PN29300333990478255244658353231.389304OthersLow RiskN6.334963e+074.223903e+071.740112
10012PN53410539934813278282478584031.413729OthersHigh RiskN6.334963e+074.223903e+071.740112
10013PN39043036894062257291622523711.265131Private BankingLow RiskN4.461828e+073.176446e+071.760285
\n", "

10014 rows × 12 columns

\n", "
" ], "text/plain": [ " Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n", "0 PN489144 2830802741 2060 \n", "1 PN394780 2872685364 2029 \n", "2 PN195722 5604208368 3937 \n", "3 PN652566 1630905248 1152 \n", "4 PN181960 2157634332 1613 \n", "... ... ... ... \n", "10009 PN479491 31124877 246 \n", "10010 PN267550 36558708 260 \n", "10011 PN293003 33990478 255 \n", "10012 PN534105 39934813 278 \n", "10013 PN390430 36894062 257 \n", "\n", " Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n", "0 2847556186 1976 0.994117 \n", "1 2743931855 1999 1.046923 \n", "2 5557946505 4039 1.008324 \n", "3 1686713614 1169 0.966913 \n", "4 2039953312 1552 1.057688 \n", "... ... ... ... \n", "10009 23590191 357 1.319399 \n", "10010 27361057 366 1.336158 \n", "10011 24465835 323 1.389304 \n", "10012 28247858 403 1.413729 \n", "10013 29162252 371 1.265131 \n", "\n", " Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n", "0 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n", "1 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n", "2 SME Low Risk N 4.532321e+09 4.534860e+09 \n", "3 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n", "4 Corporate Banking Low Risk N 5.021582e+09 5.003501e+09 \n", "... ... ... ... ... ... \n", "10009 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n", "10010 Priority Banking Low Risk N 4.410392e+07 3.076443e+07 \n", "10011 Others Low Risk N 6.334963e+07 4.223903e+07 \n", "10012 Others High Risk N 6.334963e+07 4.223903e+07 \n", "10013 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n", "\n", " P90_Wash \n", "0 1.058020 \n", "1 1.058020 \n", "2 1.062759 \n", "3 1.058020 \n", "4 1.063161 \n", "... ... \n", "10009 1.760285 \n", "10010 1.729168 \n", "10011 1.740112 \n", "10012 1.740112 \n", "10013 1.760285 \n", "\n", "[10014 rows x 12 columns]" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# a" ] }, { "cell_type": "code", "execution_count": 88, "id": "150bb5ce-6be1-44fc-a606-6d375354626d", "metadata": { "tags": [] }, "outputs": [], "source": [ "# a[a[\"SAR_FLAG\"] == \"Y\"]\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 5 }