{ "cells": [ { "cell_type": "code", "execution_count": 53, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 43, "id": "2f9a4ca7-c066-4d93-9957-0d9145f9265d", "metadata": { "tags": [] }, "outputs": [], "source": [ "from tms_data_interface import SQLQueryInterface\n", "seq = SQLQueryInterface(schema=\"transactionschema\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "fc212ace-ca7a-45f2-8137-f436c6123652", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "[['account_data_v1'],\n", " ['account_data_v2'],\n", " ['alert_data_v1'],\n", " ['alert_data_v2'],\n", " ['customer_data_v1'],\n", " ['customer_data_v2'],\n", " ['transaction10m'],\n", " ['transaction60m']]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seq.execute_raw(\"show tables\")" ] }, { "cell_type": "code", "execution_count": 54, "id": "134d0b3d-5481-4975-af07-c80ab09d6dd2", "metadata": { "tags": [] }, "outputs": [], "source": [ "query = \"\"\"\n", " select final.CUSTOMER_NUMBER_main as Focal_id,\n", " final.Credit_transaction_amount,\n", " final.Total_no_of_credit_transactions,\n", " final.Debit_transaction_amount,\n", " final.Total_no_of_debit_transactions,\n", " final.Wash_Ratio,\n", " final.SEGMENT,\n", " final.RISK,\n", " final.SAR_FLAG\n", " from \n", " (\n", " (\n", " select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n", " subquery.Credit_transaction_amount,\n", " subquery.Total_no_of_credit_transactions,\n", " case\n", " when subquery.Debit_transaction_amount is NULL then 0\n", " else Debit_transaction_amount\n", " end as Debit_transaction_amount,\n", " case\n", " when subquery.Total_no_of_debit_transactions is NULL then 0\n", " else Total_no_of_debit_transactions\n", " end as Total_no_of_debit_transactions,\n", " case\n", " when subquery.Debit_transaction_amount = 0\n", " or subquery.Debit_transaction_amount is NULL then 0\n", " else subquery.Credit_transaction_amount / subquery.Debit_transaction_amount\n", " end as Wash_Ratio\n", " from \n", " (\n", " (\n", " select customer_number as CUSTOMER_NUMBER_1, \n", " sum(transaction_amount) as Credit_transaction_amount, \n", " count(*) as Total_no_of_credit_transactions\n", " from \n", " (\n", " select * \n", " from {trans_data} as trans_table left join {acc_data} as acc_table\n", " on trans_table.benef_account_number = acc_table.account_number\n", " )\n", " where account_number not in ('None')\n", " group by 1\n", " ) credit left join\n", " (\n", " select customer_number as CUSTOMER_NUMBER_2, \n", " sum(transaction_amount) as Debit_transaction_amount, \n", " count(*) as Total_no_of_debit_transactions\n", " from \n", " (\n", " select * \n", " from {trans_data} as trans_table left join {acc_data} as acc_table\n", " on trans_table.orig_account_number = acc_table.account_number\n", " )\n", " where account_number not in ('None')\n", " group by 1\n", " ) debit on credit.CUSTOMER_NUMBER_1 = debit.CUSTOMER_NUMBER_2 \n", " ) subquery\n", " ) main left join \n", " (\n", " select subquery.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n", " subquery.SEGMENT,\n", " subquery.RISK,\n", " case\n", " when subquery.SAR_FLAG is NULL then 'N'\n", " else subquery.SAR_FLAG\n", " end as SAR_FLAG \n", " from\n", " (\n", " (\n", " select customer_number as CUSTOMER_NUMBER_3, \n", " business_segment as SEGMENT,\n", " case\n", " when RISK_CLASSIFICATION = 1 then 'Low Risk'\n", " when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n", " when RISK_CLASSIFICATION = 3 then 'High Risk'\n", " else 'Unknown Risk'\n", " end AS RISK\n", " from {cust_data}\n", " ) cd left join\n", " (\n", " select customer_number as CUSTOMER_NUMBER_4, \n", " sar_flag as SAR_FLAG\n", " from {alert_data}\n", " ) ad on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n", " ) subquery\n", " ) cust_alert on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n", " ) final\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 57, "id": "d220561a-34c9-48d2-8e2f-5d174a87540b", "metadata": { "tags": [] }, "outputs": [], "source": [ "from tms_data_interface import SQLQueryInterface\n", "\n", "class Scenario:\n", " seq = SQLQueryInterface(schema=\"transactionschema\")\n", "\n", " def logic(self, **kwargs):\n", " row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", " cust_data=\"customer_data_v1\",\n", " acc_data=\"account_data_v1\",\n", " alert_data=\"alert_data_v1\")\n", " )\n", " cols = [\"Focal_id\", \"Credit_transaction_amount\",\n", " \"Total_no_of_credit_transactions\",\n", " \"Debit_transaction_amount\", \"Total_no_of_debit_transactions\",\n", " \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " df[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\"]] = df[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\"]].astype('int')\n", " df[\"Wash_Ratio\"] = df[\"Wash_Ratio\"].astype('float')\n", " # Step 1: Compute 90th percentiles per Segment for all 3 fields\n", " percentiles = (\n", " df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\",\n", " \"Wash_Ratio\"]]\n", " .quantile(0.98)\n", " .reset_index()\n", " )\n", "\n", " # Rename columns for clarity\n", " percentiles = percentiles.rename(columns={\n", " \"Credit_transaction_amount\": \"P90_Credit\",\n", " \"Debit_transaction_amount\": \"P90_Debit\",\n", " \"Wash_Ratio\": \"P90_Wash\"\n", " })\n", "\n", " # Step 2: Merge back to main df\n", " df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n", "\n", " # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n", " high_pop = (\n", " (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) |\n", " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) |\n", " (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n", " )\n", "\n", " # Step 4: Randomly select 0.1% sample from high-risk population\n", " sample_fraction = 0.1 # 0.1%\n", " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", "\n", " # Step 5: Set SAR_FLAG values\n", " df[\"SAR_FLAG\"] = \"N\" # default for all\n", " df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n", " return df" ] }, { "cell_type": "code", "execution_count": 58, "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a", "metadata": { "tags": [] }, "outputs": [], "source": [ "sen = Scenario()\n", "a = sen.logic()" ] }, { "cell_type": "code", "execution_count": 59, "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
| \n", " | Focal_id | \n", "Credit_transaction_amount | \n", "Total_no_of_credit_transactions | \n", "Debit_transaction_amount | \n", "Total_no_of_debit_transactions | \n", "Wash_Ratio | \n", "Segment | \n", "Risk | \n", "SAR_FLAG | \n", "P90_Credit | \n", "P90_Debit | \n", "P90_Wash | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "PN478710 | \n", "2805352312 | \n", "2020 | \n", "2787126309 | \n", "2025 | \n", "1.006539 | \n", "Corporate Banking | \n", "Low Risk | \n", "N | \n", "6.274828e+09 | \n", "6.259298e+09 | \n", "1.090121 | \n", "
| 1 | \n", "PN483125 | \n", "3890052135 | \n", "2797 | \n", "3968882113 | \n", "2850 | \n", "0.980138 | \n", "Govt. Entities | \n", "Low Risk | \n", "N | \n", "6.112897e+09 | \n", "6.072409e+09 | \n", "1.112059 | \n", "
| 2 | \n", "PN890403 | \n", "4136296083 | \n", "2937 | \n", "3999785063 | \n", "2824 | \n", "1.034130 | \n", "SME | \n", "Low Risk | \n", "N | \n", "5.709904e+09 | \n", "5.559419e+09 | \n", "1.118816 | \n", "
| 3 | \n", "PN531475 | \n", "4183673982 | \n", "2861 | \n", "3987068168 | \n", "2770 | \n", "1.049311 | \n", "Corporate Banking | \n", "Low Risk | \n", "N | \n", "6.274828e+09 | \n", "6.259298e+09 | \n", "1.090121 | \n", "
| 4 | \n", "PN147722 | \n", "1775594615 | \n", "1225 | \n", "1641559222 | \n", "1221 | \n", "1.081651 | \n", "SME | \n", "Low Risk | \n", "N | \n", "5.709904e+09 | \n", "5.559419e+09 | \n", "1.118816 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 10009 | \n", "PN955059 | \n", "31106290 | \n", "264 | \n", "25266130 | \n", "369 | \n", "1.231146 | \n", "Priority Banking | \n", "Low Risk | \n", "N | \n", "7.616620e+07 | \n", "5.263062e+07 | \n", "1.921224 | \n", "
| 10010 | \n", "PN602067 | \n", "29780658 | \n", "238 | \n", "27796448 | \n", "405 | \n", "1.071384 | \n", "Others | \n", "High Risk | \n", "N | \n", "7.897534e+07 | \n", "5.488447e+07 | \n", "1.931817 | \n", "
| 10011 | \n", "PN213487 | \n", "41410071 | \n", "274 | \n", "23896844 | \n", "368 | \n", "1.732868 | \n", "Others | \n", "Low Risk | \n", "N | \n", "7.897534e+07 | \n", "5.488447e+07 | \n", "1.931817 | \n", "
| 10012 | \n", "PN563065 | \n", "34009021 | \n", "251 | \n", "32563582 | \n", "375 | \n", "1.044388 | \n", "Others | \n", "Low Risk | \n", "N | \n", "7.897534e+07 | \n", "5.488447e+07 | \n", "1.931817 | \n", "
| 10013 | \n", "PN388875 | \n", "30904340 | \n", "236 | \n", "21938266 | \n", "344 | \n", "1.408696 | \n", "Mass Market | \n", "Medium Risk | \n", "N | \n", "7.921967e+07 | \n", "5.290545e+07 | \n", "1.915159 | \n", "
10014 rows × 12 columns
\n", "