{ "cells": [ { "cell_type": "code", "execution_count": 7, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "query = \"\"\"\n", " select final.CUSTOMER_NUMBER_main as Focal_id,\n", " CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,\n", " final.Cash_deposit_count,\n", " final.SEGMENT,\n", " final.RISK,\n", " final.SAR_FLAG\n", "from \n", "(\n", " (\n", " select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n", " subquery.Cash_deposit_total,\n", " subquery.Cash_deposit_count\n", " from \n", " (\n", " select customer_number as CUSTOMER_NUMBER_1, \n", " sum(transaction_amount) as Cash_deposit_total, \n", " count(*) as Cash_deposit_count\n", " from \n", " (\n", " select * \n", " from {trans_data} trans_table \n", " left join {acc_data} acc_table\n", " on trans_table.benef_account_number = acc_table.account_number\n", " ) trans\n", " where account_number not in ('None')\n", " and transaction_desc = 'CASH RELATED TRANSACTION'\n", " group by customer_number\n", " ) subquery\n", " ) main \n", " left join \n", " (\n", " select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n", " cd.SEGMENT,\n", " cd.RISK,\n", " case\n", " when ad.SAR_FLAG is NULL then 'N'\n", " else ad.SAR_FLAG\n", " end as SAR_FLAG \n", " from\n", " (\n", " select customer_number as CUSTOMER_NUMBER_3, \n", " business_segment as SEGMENT,\n", " case\n", " when RISK_CLASSIFICATION = 1 then 'Low Risk'\n", " when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n", " when RISK_CLASSIFICATION = 3 then 'High Risk'\n", " else 'Unknown Risk'\n", " end AS RISK\n", " from {cust_data}\n", " ) cd \n", " left join\n", " (\n", " select customer_number as CUSTOMER_NUMBER_4, \n", " sar_flag as SAR_FLAG\n", " from {alert_data}\n", " ) ad \n", " on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n", " ) as cust_alert\n", " on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n", ") as final\n", "\"\"\"\n", "\n", "from tms_data_interface import SQLQueryInterface\n", "\n", "class Scenario:\n", " seq = SQLQueryInterface(schema=\"transactionschema\")\n", "\n", " def logic(self, **kwargs):\n", " row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", " cust_data=\"customer_data_v1\",\n", " acc_data=\"account_data_v1\",\n", " alert_data=\"alert_data_v1\")\n", " )\n", " cols = [\"Focal_id\", \"Cash_deposit_total\", \"Cash_deposit_count\",\n", " \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n", " \n", " \n", "\n", " # Step 1: Compute 90th percentiles per Segment for all 3 fields\n", " percentiles = (\n", " df.groupby(\"Segment\")[[\"Cash_deposit_total\",\n", " \"Cash_deposit_count\"]]\n", " .quantile(0.98)\n", " .reset_index()\n", " )\n", "\n", " # Rename columns for clarity\n", " percentiles = percentiles.rename(columns={\n", " \"Cash_deposit_total\": \"P90_Credit\",\n", " \"Cash_deposit_count\": \"P90_Credit_count\"\n", " })\n", "\n", " # Step 2: Merge back to main df\n", " df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n", "\n", " # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n", " high_pop = (\n", " (df[\"Cash_deposit_total\"] > df[\"P90_Credit\"]) &\n", " (df[\"Cash_deposit_count\"] > df[\"P90_Credit_count\"])\n", " )\n", "\n", " # Step 4: Randomly select 0.1% sample from high-risk population\n", " sample_fraction = 0.1 # 0.1%\n", " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", "\n", " # Step 5: Set SAR_FLAG values\n", " df[\"SAR_FLAG\"] = \"N\" # default for all\n", " df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n", "\n", " return df" ] }, { "cell_type": "code", "execution_count": 8, "id": "1f20337b-8116-47e5-8743-1ba41e2df819", "metadata": { "tags": [] }, "outputs": [], "source": [ "# sen = Scenario()\n", "# a = sen.logic()" ] }, { "cell_type": "code", "execution_count": 10, "id": "6de62b37-00d1-4c88-b27b-9a70e05add91", "metadata": { "tags": [] }, "outputs": [], "source": [ "# a[a[\"SAR_FLAG\"] == \"Y\"]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 5 }