203/main.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "query = \"\"\"\n",
    "    select  final.CUSTOMER_NUMBER_main as Focal_id,\n",
    "        CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,\n",
    "        final.Cash_deposit_count,\n",
    "        final.SEGMENT,\n",
    "        final.RISK,\n",
    "        final.SAR_FLAG\n",
    "from \n",
    "(\n",
    "    (\n",
    "        select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n",
    "               subquery.Cash_deposit_total,\n",
    "               subquery.Cash_deposit_count\n",
    "        from \n",
    "        (\n",
    "            select customer_number as CUSTOMER_NUMBER_1, \n",
    "                   sum(transaction_amount) as Cash_deposit_total, \n",
    "                   count(*) as Cash_deposit_count\n",
    "            from \n",
    "            (\n",
    "                select * \n",
    "                from {trans_data} trans_table \n",
    "                left join {acc_data} acc_table\n",
    "                on trans_table.benef_account_number = acc_table.account_number\n",
    "            ) trans\n",
    "            where account_number not in ('None')\n",
    "            and transaction_desc = 'CASH RELATED TRANSACTION'\n",
    "            group by customer_number\n",
    "        ) subquery\n",
    "    ) main \n",
    "    left join \n",
    "   (\n",
    "        select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n",
    "               cd.SEGMENT,\n",
    "               cd.RISK,\n",
    "               case\n",
    "                    when ad.SAR_FLAG is NULL then 'N'\n",
    "                    else ad.SAR_FLAG\n",
    "               end as SAR_FLAG    \n",
    "        from\n",
    "        (\n",
    "            select customer_number as CUSTOMER_NUMBER_3, \n",
    "                   business_segment as SEGMENT,\n",
    "                   case\n",
    "                       when RISK_CLASSIFICATION = 1 then 'Low Risk'\n",
    "                       when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n",
    "                       when RISK_CLASSIFICATION = 3 then 'High Risk'\n",
    "                       else 'Unknown Risk'\n",
    "                   end AS RISK\n",
    "            from {cust_data}\n",
    "        ) cd \n",
    "        left join\n",
    "        (\n",
    "            select customer_number as CUSTOMER_NUMBER_4, \n",
    "                   sar_flag as SAR_FLAG\n",
    "            from {alert_data}\n",
    "        ) ad \n",
    "        on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n",
    "    ) as cust_alert\n",
    "    on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n",
    ") as final\n",
    "\"\"\"\n",
    "\n",
    "from tms_data_interface import SQLQueryInterface\n",
    "\n",
    "class Scenario:\n",
    "    seq = SQLQueryInterface(schema=\"transactionschema\")\n",
    "\n",
    "    def logic(self, **kwargs):\n",
    "        row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n",
    "                                                    cust_data=\"customer_data_v1\",\n",
    "                                                    acc_data=\"account_data_v1\",\n",
    "                                                    alert_data=\"alert_data_v1\")\n",
    "                                       )\n",
    "        cols = [\"Focal_id\", \"Cash_deposit_total\", \"Cash_deposit_count\",\n",
    "                \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
    "        df = pd.DataFrame(row_list, columns = cols)\n",
    "        df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n",
    "        \n",
    "        \n",
    "\n",
    "        # Step 1: Compute 90th percentiles per Segment for all 3 fields\n",
    "        percentiles = (\n",
    "            df.groupby(\"Segment\")[[\"Cash_deposit_total\",\n",
    "                                   \"Cash_deposit_count\"]]\n",
    "              .quantile(0.98)\n",
    "              .reset_index()\n",
    "        )\n",
    "\n",
    "        # Rename columns for clarity\n",
    "        percentiles = percentiles.rename(columns={\n",
    "            \"Cash_deposit_total\": \"P90_Credit\",\n",
    "            \"Cash_deposit_count\": \"P90_Credit_count\"\n",
    "        })\n",
    "\n",
    "        # Step 2: Merge back to main df\n",
    "        df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n",
    "\n",
    "        # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n",
    "        high_pop = (\n",
    "            (df[\"Cash_deposit_total\"] > df[\"P90_Credit\"]) &\n",
    "            (df[\"Cash_deposit_count\"] > df[\"P90_Credit_count\"])\n",
    "        )\n",
    "\n",
    "        # Step 4: Randomly select 0.1% sample from high-risk population\n",
    "        sample_fraction = 0.1   # 0.1%\n",
    "        high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
    "\n",
    "        # Step 5: Set SAR_FLAG values\n",
    "        df[\"SAR_FLAG\"] = \"N\"   # default for all\n",
    "        df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\"   # assign Y to 0.1% random high-risk population\n",
    "\n",
    "        return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "1f20337b-8116-47e5-8743-1ba41e2df819",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# sen = Scenario()\n",
    "# a = sen.logic()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "6de62b37-00d1-4c88-b27b-9a70e05add91",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# a[a[\"SAR_FLAG\"] == \"Y\"]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}