{ "cells": [ { "cell_type": "code", "execution_count": 53, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 43, "id": "2f9a4ca7-c066-4d93-9957-0d9145f9265d", "metadata": { "tags": [] }, "outputs": [], "source": [ "from tms_data_interface import SQLQueryInterface\n", "seq = SQLQueryInterface(schema=\"transactionschema\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "fc212ace-ca7a-45f2-8137-f436c6123652", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "[['account_data_v1'],\n", " ['account_data_v2'],\n", " ['alert_data_v1'],\n", " ['alert_data_v2'],\n", " ['customer_data_v1'],\n", " ['customer_data_v2'],\n", " ['transaction10m'],\n", " ['transaction60m']]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seq.execute_raw(\"show tables\")" ] }, { "cell_type": "code", "execution_count": 54, "id": "134d0b3d-5481-4975-af07-c80ab09d6dd2", "metadata": { "tags": [] }, "outputs": [], "source": [ "query = \"\"\"\n", " select final.CUSTOMER_NUMBER_main as Focal_id,\n", " final.Credit_transaction_amount,\n", " final.Total_no_of_credit_transactions,\n", " final.Debit_transaction_amount,\n", " final.Total_no_of_debit_transactions,\n", " final.Wash_Ratio,\n", " final.SEGMENT,\n", " final.RISK,\n", " final.SAR_FLAG\n", " from \n", " (\n", " (\n", " select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n", " subquery.Credit_transaction_amount,\n", " subquery.Total_no_of_credit_transactions,\n", " case\n", " when subquery.Debit_transaction_amount is NULL then 0\n", " else Debit_transaction_amount\n", " end as Debit_transaction_amount,\n", " case\n", " when subquery.Total_no_of_debit_transactions is NULL then 0\n", " else Total_no_of_debit_transactions\n", " end as Total_no_of_debit_transactions,\n", " case\n", " when subquery.Debit_transaction_amount = 0\n", " or subquery.Debit_transaction_amount is NULL then 0\n", " else subquery.Credit_transaction_amount / subquery.Debit_transaction_amount\n", " end as Wash_Ratio\n", " from \n", " (\n", " (\n", " select customer_number as CUSTOMER_NUMBER_1, \n", " sum(transaction_amount) as Credit_transaction_amount, \n", " count(*) as Total_no_of_credit_transactions\n", " from \n", " (\n", " select * \n", " from {trans_data} as trans_table left join {acc_data} as acc_table\n", " on trans_table.benef_account_number = acc_table.account_number\n", " )\n", " where account_number not in ('None')\n", " group by 1\n", " ) credit left join\n", " (\n", " select customer_number as CUSTOMER_NUMBER_2, \n", " sum(transaction_amount) as Debit_transaction_amount, \n", " count(*) as Total_no_of_debit_transactions\n", " from \n", " (\n", " select * \n", " from {trans_data} as trans_table left join {acc_data} as acc_table\n", " on trans_table.orig_account_number = acc_table.account_number\n", " )\n", " where account_number not in ('None')\n", " group by 1\n", " ) debit on credit.CUSTOMER_NUMBER_1 = debit.CUSTOMER_NUMBER_2 \n", " ) subquery\n", " ) main left join \n", " (\n", " select subquery.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n", " subquery.SEGMENT,\n", " subquery.RISK,\n", " case\n", " when subquery.SAR_FLAG is NULL then 'N'\n", " else subquery.SAR_FLAG\n", " end as SAR_FLAG \n", " from\n", " (\n", " (\n", " select customer_number as CUSTOMER_NUMBER_3, \n", " business_segment as SEGMENT,\n", " case\n", " when RISK_CLASSIFICATION = 1 then 'Low Risk'\n", " when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n", " when RISK_CLASSIFICATION = 3 then 'High Risk'\n", " else 'Unknown Risk'\n", " end AS RISK\n", " from {cust_data}\n", " ) cd left join\n", " (\n", " select customer_number as CUSTOMER_NUMBER_4, \n", " sar_flag as SAR_FLAG\n", " from {alert_data}\n", " ) ad on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n", " ) subquery\n", " ) cust_alert on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n", " ) final\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 84, "id": "d220561a-34c9-48d2-8e2f-5d174a87540b", "metadata": { "tags": [] }, "outputs": [], "source": [ "from tms_data_interface import SQLQueryInterface\n", "\n", "class Scenario:\n", " seq = SQLQueryInterface(schema=\"transactionschema\")\n", "\n", " def logic(self, **kwargs):\n", " row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", " cust_data=\"customer_data_v1\",\n", " acc_data=\"account_data_v1\",\n", " alert_data=\"alert_data_v1\")\n", " )\n", " cols = [\"Focal_id\", \"Credit_transaction_amount\",\n", " \"Total_no_of_credit_transactions\",\n", " \"Debit_transaction_amount\", \"Total_no_of_debit_transactions\",\n", " \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " df[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\"]] = df[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\"]].astype('int')\n", " df[\"Wash_Ratio\"] = df[\"Wash_Ratio\"].astype('float')\n", " # Step 1: Compute 90th percentiles per Segment for all 3 fields\n", " percentiles = (\n", " df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\",\n", " \"Wash_Ratio\"]]\n", " .quantile(0.90)\n", " .reset_index()\n", " )\n", "\n", " # Rename columns for clarity\n", " percentiles = percentiles.rename(columns={\n", " \"Credit_transaction_amount\": \"P90_Credit\",\n", " \"Debit_transaction_amount\": \"P90_Debit\",\n", " \"Wash_Ratio\": \"P90_Wash\"\n", " })\n", "\n", " # Step 2: Merge back to main df\n", " df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n", "\n", " # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n", " high_pop = (\n", " # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n", " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n", " (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n", " )\n", "\n", " # Step 4: Randomly select 0.1% sample from high-risk population\n", " sample_fraction = 0.3 # 0.1%\n", " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", "\n", " # Step 5: Set SAR_FLAG values\n", " df[\"SAR_FLAG\"] = \"N\" # default for all\n", " df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n", " return df" ] }, { "cell_type": "code", "execution_count": 85, "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a", "metadata": { "tags": [] }, "outputs": [], "source": [ "# sen = Scenario()\n", "# a = sen.logic()" ] }, { "cell_type": "code", "execution_count": 86, "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
| \n", " | Focal_id | \n", "Credit_transaction_amount | \n", "Total_no_of_credit_transactions | \n", "Debit_transaction_amount | \n", "Total_no_of_debit_transactions | \n", "Wash_Ratio | \n", "Segment | \n", "Risk | \n", "SAR_FLAG | \n", "P90_Credit | \n", "P90_Debit | \n", "P90_Wash | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "PN489144 | \n", "2830802741 | \n", "2060 | \n", "2847556186 | \n", "1976 | \n", "0.994117 | \n", "Whole Sale Banking | \n", "Low Risk | \n", "N | \n", "4.400246e+09 | \n", "4.332448e+09 | \n", "1.058020 | \n", "
| 1 | \n", "PN394780 | \n", "2872685364 | \n", "2029 | \n", "2743931855 | \n", "1999 | \n", "1.046923 | \n", "Whole Sale Banking | \n", "Low Risk | \n", "N | \n", "4.400246e+09 | \n", "4.332448e+09 | \n", "1.058020 | \n", "
| 2 | \n", "PN195722 | \n", "5604208368 | \n", "3937 | \n", "5557946505 | \n", "4039 | \n", "1.008324 | \n", "SME | \n", "Low Risk | \n", "N | \n", "4.532321e+09 | \n", "4.534860e+09 | \n", "1.062759 | \n", "
| 3 | \n", "PN652566 | \n", "1630905248 | \n", "1152 | \n", "1686713614 | \n", "1169 | \n", "0.966913 | \n", "Whole Sale Banking | \n", "Low Risk | \n", "N | \n", "4.400246e+09 | \n", "4.332448e+09 | \n", "1.058020 | \n", "
| 4 | \n", "PN181960 | \n", "2157634332 | \n", "1613 | \n", "2039953312 | \n", "1552 | \n", "1.057688 | \n", "Corporate Banking | \n", "Low Risk | \n", "N | \n", "5.021582e+09 | \n", "5.003501e+09 | \n", "1.063161 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 10009 | \n", "PN479491 | \n", "31124877 | \n", "246 | \n", "23590191 | \n", "357 | \n", "1.319399 | \n", "Private Banking | \n", "Low Risk | \n", "N | \n", "4.461828e+07 | \n", "3.176446e+07 | \n", "1.760285 | \n", "
| 10010 | \n", "PN267550 | \n", "36558708 | \n", "260 | \n", "27361057 | \n", "366 | \n", "1.336158 | \n", "Priority Banking | \n", "Low Risk | \n", "N | \n", "4.410392e+07 | \n", "3.076443e+07 | \n", "1.729168 | \n", "
| 10011 | \n", "PN293003 | \n", "33990478 | \n", "255 | \n", "24465835 | \n", "323 | \n", "1.389304 | \n", "Others | \n", "Low Risk | \n", "N | \n", "6.334963e+07 | \n", "4.223903e+07 | \n", "1.740112 | \n", "
| 10012 | \n", "PN534105 | \n", "39934813 | \n", "278 | \n", "28247858 | \n", "403 | \n", "1.413729 | \n", "Others | \n", "High Risk | \n", "N | \n", "6.334963e+07 | \n", "4.223903e+07 | \n", "1.740112 | \n", "
| 10013 | \n", "PN390430 | \n", "36894062 | \n", "257 | \n", "29162252 | \n", "371 | \n", "1.265131 | \n", "Private Banking | \n", "Low Risk | \n", "N | \n", "4.461828e+07 | \n", "3.176446e+07 | \n", "1.760285 | \n", "
10014 rows × 12 columns
\n", "