{ "cells": [ { "cell_type": "code", "execution_count": 53, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 43, "id": "2f9a4ca7-c066-4d93-9957-0d9145f9265d", "metadata": { "tags": [] }, "outputs": [], "source": [ "from tms_data_interface import SQLQueryInterface\n", "seq = SQLQueryInterface(schema=\"transactionschema\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "fc212ace-ca7a-45f2-8137-f436c6123652", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "[['account_data_v1'],\n", " ['account_data_v2'],\n", " ['alert_data_v1'],\n", " ['alert_data_v2'],\n", " ['customer_data_v1'],\n", " ['customer_data_v2'],\n", " ['transaction10m'],\n", " ['transaction60m']]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seq.execute_raw(\"show tables\")" ] }, { "cell_type": "code", "execution_count": 54, "id": "134d0b3d-5481-4975-af07-c80ab09d6dd2", "metadata": { "tags": [] }, "outputs": [], "source": [ "query = \"\"\"\n", " select final.CUSTOMER_NUMBER_main as Focal_id,\n", " final.Credit_transaction_amount,\n", " final.Total_no_of_credit_transactions,\n", " final.Debit_transaction_amount,\n", " final.Total_no_of_debit_transactions,\n", " final.Wash_Ratio,\n", " final.SEGMENT,\n", " final.RISK,\n", " final.SAR_FLAG\n", " from \n", " (\n", " (\n", " select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n", " subquery.Credit_transaction_amount,\n", " subquery.Total_no_of_credit_transactions,\n", " case\n", " when subquery.Debit_transaction_amount is NULL then 0\n", " else Debit_transaction_amount\n", " end as Debit_transaction_amount,\n", " case\n", " when subquery.Total_no_of_debit_transactions is NULL then 0\n", " else Total_no_of_debit_transactions\n", " end as Total_no_of_debit_transactions,\n", " case\n", " when subquery.Debit_transaction_amount = 0\n", " or subquery.Debit_transaction_amount is NULL then 0\n", " else subquery.Credit_transaction_amount / subquery.Debit_transaction_amount\n", " end as Wash_Ratio\n", " from \n", " (\n", " (\n", " select customer_number as CUSTOMER_NUMBER_1, \n", " sum(transaction_amount) as Credit_transaction_amount, \n", " count(*) as Total_no_of_credit_transactions\n", " from \n", " (\n", " select * \n", " from {trans_data} as trans_table left join {acc_data} as acc_table\n", " on trans_table.benef_account_number = acc_table.account_number\n", " )\n", " where account_number not in ('None')\n", " group by 1\n", " ) credit left join\n", " (\n", " select customer_number as CUSTOMER_NUMBER_2, \n", " sum(transaction_amount) as Debit_transaction_amount, \n", " count(*) as Total_no_of_debit_transactions\n", " from \n", " (\n", " select * \n", " from {trans_data} as trans_table left join {acc_data} as acc_table\n", " on trans_table.orig_account_number = acc_table.account_number\n", " )\n", " where account_number not in ('None')\n", " group by 1\n", " ) debit on credit.CUSTOMER_NUMBER_1 = debit.CUSTOMER_NUMBER_2 \n", " ) subquery\n", " ) main left join \n", " (\n", " select subquery.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n", " subquery.SEGMENT,\n", " subquery.RISK,\n", " case\n", " when subquery.SAR_FLAG is NULL then 'N'\n", " else subquery.SAR_FLAG\n", " end as SAR_FLAG \n", " from\n", " (\n", " (\n", " select customer_number as CUSTOMER_NUMBER_3, \n", " business_segment as SEGMENT,\n", " case\n", " when RISK_CLASSIFICATION = 1 then 'Low Risk'\n", " when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n", " when RISK_CLASSIFICATION = 3 then 'High Risk'\n", " else 'Unknown Risk'\n", " end AS RISK\n", " from {cust_data}\n", " ) cd left join\n", " (\n", " select customer_number as CUSTOMER_NUMBER_4, \n", " sar_flag as SAR_FLAG\n", " from {alert_data}\n", " ) ad on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n", " ) subquery\n", " ) cust_alert on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n", " ) final\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 57, "id": "d220561a-34c9-48d2-8e2f-5d174a87540b", "metadata": { "tags": [] }, "outputs": [], "source": [ "from tms_data_interface import SQLQueryInterface\n", "\n", "class Scenario:\n", " seq = SQLQueryInterface(schema=\"transactionschema\")\n", "\n", " def logic(self, **kwargs):\n", " row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", " cust_data=\"customer_data_v1\",\n", " acc_data=\"account_data_v1\",\n", " alert_data=\"alert_data_v1\")\n", " )\n", " cols = [\"Focal_id\", \"Credit_transaction_amount\",\n", " \"Total_no_of_credit_transactions\",\n", " \"Debit_transaction_amount\", \"Total_no_of_debit_transactions\",\n", " \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " df[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\"]] = df[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\"]].astype('int')\n", " df[\"Wash_Ratio\"] = df[\"Wash_Ratio\"].astype('float')\n", " # Step 1: Compute 90th percentiles per Segment for all 3 fields\n", " percentiles = (\n", " df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\",\n", " \"Wash_Ratio\"]]\n", " .quantile(0.98)\n", " .reset_index()\n", " )\n", "\n", " # Rename columns for clarity\n", " percentiles = percentiles.rename(columns={\n", " \"Credit_transaction_amount\": \"P90_Credit\",\n", " \"Debit_transaction_amount\": \"P90_Debit\",\n", " \"Wash_Ratio\": \"P90_Wash\"\n", " })\n", "\n", " # Step 2: Merge back to main df\n", " df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n", "\n", " # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n", " high_pop = (\n", " (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) |\n", " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) |\n", " (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n", " )\n", "\n", " # Step 4: Randomly select 0.1% sample from high-risk population\n", " sample_fraction = 0.1 # 0.1%\n", " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", "\n", " # Step 5: Set SAR_FLAG values\n", " df[\"SAR_FLAG\"] = \"N\" # default for all\n", " df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n", " return df" ] }, { "cell_type": "code", "execution_count": 58, "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a", "metadata": { "tags": [] }, "outputs": [], "source": [ "sen = Scenario()\n", "a = sen.logic()" ] }, { "cell_type": "code", "execution_count": 59, "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Focal_idCredit_transaction_amountTotal_no_of_credit_transactionsDebit_transaction_amountTotal_no_of_debit_transactionsWash_RatioSegmentRiskSAR_FLAGP90_CreditP90_DebitP90_Wash
0PN47871028053523122020278712630920251.006539Corporate BankingLow RiskN6.274828e+096.259298e+091.090121
1PN48312538900521352797396888211328500.980138Govt. EntitiesLow RiskN6.112897e+096.072409e+091.112059
2PN89040341362960832937399978506328241.034130SMELow RiskN5.709904e+095.559419e+091.118816
3PN53147541836739822861398706816827701.049311Corporate BankingLow RiskN6.274828e+096.259298e+091.090121
4PN14772217755946151225164155922212211.081651SMELow RiskN5.709904e+095.559419e+091.118816
.......................................
10009PN95505931106290264252661303691.231146Priority BankingLow RiskN7.616620e+075.263062e+071.921224
10010PN60206729780658238277964484051.071384OthersHigh RiskN7.897534e+075.488447e+071.931817
10011PN21348741410071274238968443681.732868OthersLow RiskN7.897534e+075.488447e+071.931817
10012PN56306534009021251325635823751.044388OthersLow RiskN7.897534e+075.488447e+071.931817
10013PN38887530904340236219382663441.408696Mass MarketMedium RiskN7.921967e+075.290545e+071.915159
\n", "

10014 rows × 12 columns

\n", "
" ], "text/plain": [ " Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n", "0 PN478710 2805352312 2020 \n", "1 PN483125 3890052135 2797 \n", "2 PN890403 4136296083 2937 \n", "3 PN531475 4183673982 2861 \n", "4 PN147722 1775594615 1225 \n", "... ... ... ... \n", "10009 PN955059 31106290 264 \n", "10010 PN602067 29780658 238 \n", "10011 PN213487 41410071 274 \n", "10012 PN563065 34009021 251 \n", "10013 PN388875 30904340 236 \n", "\n", " Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n", "0 2787126309 2025 1.006539 \n", "1 3968882113 2850 0.980138 \n", "2 3999785063 2824 1.034130 \n", "3 3987068168 2770 1.049311 \n", "4 1641559222 1221 1.081651 \n", "... ... ... ... \n", "10009 25266130 369 1.231146 \n", "10010 27796448 405 1.071384 \n", "10011 23896844 368 1.732868 \n", "10012 32563582 375 1.044388 \n", "10013 21938266 344 1.408696 \n", "\n", " Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n", "0 Corporate Banking Low Risk N 6.274828e+09 6.259298e+09 \n", "1 Govt. Entities Low Risk N 6.112897e+09 6.072409e+09 \n", "2 SME Low Risk N 5.709904e+09 5.559419e+09 \n", "3 Corporate Banking Low Risk N 6.274828e+09 6.259298e+09 \n", "4 SME Low Risk N 5.709904e+09 5.559419e+09 \n", "... ... ... ... ... ... \n", "10009 Priority Banking Low Risk N 7.616620e+07 5.263062e+07 \n", "10010 Others High Risk N 7.897534e+07 5.488447e+07 \n", "10011 Others Low Risk N 7.897534e+07 5.488447e+07 \n", "10012 Others Low Risk N 7.897534e+07 5.488447e+07 \n", "10013 Mass Market Medium Risk N 7.921967e+07 5.290545e+07 \n", "\n", " P90_Wash \n", "0 1.090121 \n", "1 1.112059 \n", "2 1.118816 \n", "3 1.090121 \n", "4 1.118816 \n", "... ... \n", "10009 1.921224 \n", "10010 1.931817 \n", "10011 1.931817 \n", "10012 1.931817 \n", "10013 1.915159 \n", "\n", "[10014 rows x 12 columns]" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a" ] }, { "cell_type": "code", "execution_count": 49, "id": "150bb5ce-6be1-44fc-a606-6d375354626d", "metadata": { "tags": [] }, "outputs": [], "source": [ "# a[a[\"SAR_FLAG\"] == \"Y\"]\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 5 }