{ "cells": [ { "cell_type": "code", "execution_count": 93, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 94, "id": "2f9a4ca7-c066-4d93-9957-0d9145f9265d", "metadata": { "tags": [] }, "outputs": [], "source": [ "from tms_data_interface import SQLQueryInterface\n", "seq = SQLQueryInterface(schema=\"transactionschema\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "fc212ace-ca7a-45f2-8137-f436c6123652", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "[['account_data_v1'],\n", " ['account_data_v2'],\n", " ['alert_data_v1'],\n", " ['alert_data_v2'],\n", " ['customer_data_v1'],\n", " ['customer_data_v2'],\n", " ['transaction10m'],\n", " ['transaction60m']]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seq.execute_raw(\"show tables\")" ] }, { "cell_type": "code", "execution_count": 95, "id": "134d0b3d-5481-4975-af07-c80ab09d6dd2", "metadata": { "tags": [] }, "outputs": [], "source": [ "query = \"\"\"\n", " select final.CUSTOMER_NUMBER_main as Focal_id,\n", " final.Credit_transaction_amount,\n", " final.Total_no_of_credit_transactions,\n", " final.Debit_transaction_amount,\n", " final.Total_no_of_debit_transactions,\n", " final.Wash_Ratio,\n", " final.SEGMENT,\n", " final.RISK,\n", " final.SAR_FLAG\n", " from \n", " (\n", " (\n", " select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n", " subquery.Credit_transaction_amount,\n", " subquery.Total_no_of_credit_transactions,\n", " case\n", " when subquery.Debit_transaction_amount is NULL then 0\n", " else Debit_transaction_amount\n", " end as Debit_transaction_amount,\n", " case\n", " when subquery.Total_no_of_debit_transactions is NULL then 0\n", " else Total_no_of_debit_transactions\n", " end as Total_no_of_debit_transactions,\n", " case\n", " when subquery.Debit_transaction_amount = 0\n", " or subquery.Debit_transaction_amount is NULL then 0\n", " else subquery.Credit_transaction_amount / subquery.Debit_transaction_amount\n", " end as Wash_Ratio\n", " from \n", " (\n", " (\n", " select customer_number as CUSTOMER_NUMBER_1, \n", " sum(transaction_amount) as Credit_transaction_amount, \n", " count(*) as Total_no_of_credit_transactions\n", " from \n", " (\n", " select * \n", " from {trans_data} as trans_table left join {acc_data} as acc_table\n", " on trans_table.benef_account_number = acc_table.account_number\n", " )\n", " where account_number not in ('None')\n", " group by 1\n", " ) credit left join\n", " (\n", " select customer_number as CUSTOMER_NUMBER_2, \n", " sum(transaction_amount) as Debit_transaction_amount, \n", " count(*) as Total_no_of_debit_transactions\n", " from \n", " (\n", " select * \n", " from {trans_data} as trans_table left join {acc_data} as acc_table\n", " on trans_table.orig_account_number = acc_table.account_number\n", " )\n", " where account_number not in ('None')\n", " group by 1\n", " ) debit on credit.CUSTOMER_NUMBER_1 = debit.CUSTOMER_NUMBER_2 \n", " ) subquery\n", " ) main left join \n", " (\n", " select subquery.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n", " subquery.SEGMENT,\n", " subquery.RISK,\n", " case\n", " when subquery.SAR_FLAG is NULL then 'N'\n", " else subquery.SAR_FLAG\n", " end as SAR_FLAG \n", " from\n", " (\n", " (\n", " select customer_number as CUSTOMER_NUMBER_3, \n", " business_segment as SEGMENT,\n", " case\n", " when RISK_CLASSIFICATION = 1 then 'Low Risk'\n", " when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n", " when RISK_CLASSIFICATION = 3 then 'High Risk'\n", " else 'Unknown Risk'\n", " end AS RISK\n", " from {cust_data}\n", " ) cd left join\n", " (\n", " select customer_number as CUSTOMER_NUMBER_4, \n", " sar_flag as SAR_FLAG\n", " from {alert_data}\n", " ) ad on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n", " ) subquery\n", " ) cust_alert on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n", " ) final\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 101, "id": "d220561a-34c9-48d2-8e2f-5d174a87540b", "metadata": { "tags": [] }, "outputs": [], "source": [ "from tms_data_interface import SQLQueryInterface\n", "\n", "class Scenario:\n", " seq = SQLQueryInterface(schema=\"transactionschema\")\n", "\n", " def logic(self, **kwargs):\n", " row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", " cust_data=\"customer_data_v1\",\n", " acc_data=\"account_data_v1\",\n", " alert_data=\"alert_data_v1\")\n", " )\n", " cols = [\"Focal_id\", \"Credit_transaction_amount\",\n", " \"Total_no_of_credit_transactions\",\n", " \"Debit_transaction_amount\", \"Total_no_of_debit_transactions\",\n", " \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " df[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\"]] = df[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\"]].astype('int')\n", " df[\"Wash_Ratio\"] = df[\"Wash_Ratio\"].astype('float')\n", " # Step 1: Compute 90th percentiles per Segment for all 3 fields\n", " percentiles = (\n", " df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\",\n", " \"Wash_Ratio\"]]\n", " .quantile(0.95)\n", " .reset_index()\n", " )\n", "\n", " # Rename columns for clarity\n", " percentiles = percentiles.rename(columns={\n", " \"Credit_transaction_amount\": \"P90_Credit\",\n", " \"Debit_transaction_amount\": \"P90_Debit\",\n", " \"Wash_Ratio\": \"P90_Wash\"\n", " })\n", "\n", " # Step 2: Merge back to main df\n", " df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n", "\n", " # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n", " high_pop = (\n", " # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n", " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n", " (df[\"Wash_Ratio\"] > 0.90)\n", " )\n", "\n", " # Step 4: Randomly select 0.1% sample from high-risk population\n", " sample_fraction = 0.1 # 0.1%\n", " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", "\n", " # Step 5: Set SAR_FLAG values\n", " df[\"SAR_FLAG\"] = \"N\" # default for all\n", " df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n", " return df" ] }, { "cell_type": "code", "execution_count": 107, "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a", "metadata": { "tags": [] }, "outputs": [], "source": [ "# sen = Scenario()\n", "# a = sen.logic()" ] }, { "cell_type": "code", "execution_count": 106, "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a", "metadata": { "tags": [] }, "outputs": [], "source": [ "# a" ] }, { "cell_type": "code", "execution_count": 105, "id": "150bb5ce-6be1-44fc-a606-6d375354626d", "metadata": { "tags": [] }, "outputs": [], "source": [ "# a[a[\"SAR_FLAG\"] == \"Y\"]\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 5 }