{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 93,
   "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "id": "2f9a4ca7-c066-4d93-9957-0d9145f9265d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from tms_data_interface import SQLQueryInterface\n",
    "seq = SQLQueryInterface(schema=\"transactionschema\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "fc212ace-ca7a-45f2-8137-f436c6123652",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['account_data_v1'],\n",
       " ['account_data_v2'],\n",
       " ['alert_data_v1'],\n",
       " ['alert_data_v2'],\n",
       " ['customer_data_v1'],\n",
       " ['customer_data_v2'],\n",
       " ['transaction10m'],\n",
       " ['transaction60m']]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "seq.execute_raw(\"show tables\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "id": "134d0b3d-5481-4975-af07-c80ab09d6dd2",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "query = \"\"\"\n",
    "    select  final.CUSTOMER_NUMBER_main as Focal_id,\n",
    "            final.Credit_transaction_amount,\n",
    "            final.Total_no_of_credit_transactions,\n",
    "            final.Debit_transaction_amount,\n",
    "            final.Total_no_of_debit_transactions,\n",
    "            final.Wash_Ratio,\n",
    "            final.SEGMENT,\n",
    "            final.RISK,\n",
    "            final.SAR_FLAG\n",
    "    from \n",
    "    (\n",
    "        (\n",
    "            select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n",
    "                    subquery.Credit_transaction_amount,\n",
    "                    subquery.Total_no_of_credit_transactions,\n",
    "                    case\n",
    "                         when subquery.Debit_transaction_amount is NULL then 0\n",
    "                         else Debit_transaction_amount\n",
    "                    end as Debit_transaction_amount,\n",
    "                    case\n",
    "                         when subquery.Total_no_of_debit_transactions is NULL then 0\n",
    "                         else Total_no_of_debit_transactions\n",
    "                    end as Total_no_of_debit_transactions,\n",
    "                    case\n",
    "                         when subquery.Debit_transaction_amount = 0\n",
    "                         or subquery.Debit_transaction_amount is NULL then 0\n",
    "                         else subquery.Credit_transaction_amount / subquery.Debit_transaction_amount\n",
    "                    end as Wash_Ratio\n",
    "            from \n",
    "            (\n",
    "                (\n",
    "                    select customer_number as CUSTOMER_NUMBER_1, \n",
    "                            sum(transaction_amount) as Credit_transaction_amount, \n",
    "                            count(*) as Total_no_of_credit_transactions\n",
    "                    from \n",
    "                    (\n",
    "                        select * \n",
    "                        from {trans_data} as trans_table left join {acc_data} as acc_table\n",
    "                        on trans_table.benef_account_number = acc_table.account_number\n",
    "                    )\n",
    "                    where account_number not in ('None')\n",
    "                    group by 1\n",
    "                ) credit left join\n",
    "                (\n",
    "                    select customer_number as CUSTOMER_NUMBER_2, \n",
    "                            sum(transaction_amount) as Debit_transaction_amount, \n",
    "                            count(*) as Total_no_of_debit_transactions\n",
    "                    from \n",
    "                    (\n",
    "                        select * \n",
    "                        from {trans_data} as trans_table left join {acc_data} as acc_table\n",
    "                        on trans_table.orig_account_number = acc_table.account_number\n",
    "                    )\n",
    "                    where account_number not in ('None')\n",
    "                    group by 1\n",
    "                ) debit on credit.CUSTOMER_NUMBER_1 = debit.CUSTOMER_NUMBER_2 \n",
    "            ) subquery\n",
    "        ) main left join \n",
    "        (\n",
    "            select subquery.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n",
    "                    subquery.SEGMENT,\n",
    "                    subquery.RISK,\n",
    "                    case\n",
    "                         when subquery.SAR_FLAG is NULL then 'N'\n",
    "                         else subquery.SAR_FLAG\n",
    "                    end as SAR_FLAG    \n",
    "            from\n",
    "            (\n",
    "                (\n",
    "                    select customer_number as CUSTOMER_NUMBER_3, \n",
    "                            business_segment as SEGMENT,\n",
    "                            case\n",
    "                                when RISK_CLASSIFICATION = 1 then 'Low Risk'\n",
    "                                when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n",
    "                                when RISK_CLASSIFICATION = 3 then 'High Risk'\n",
    "                                else 'Unknown Risk'\n",
    "                            end AS RISK\n",
    "                    from {cust_data}\n",
    "                ) cd left join\n",
    "                (\n",
    "                    select customer_number as CUSTOMER_NUMBER_4, \n",
    "                            sar_flag as SAR_FLAG\n",
    "                    from {alert_data}\n",
    "                ) ad on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n",
    "            ) subquery\n",
    "        ) cust_alert on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n",
    "    ) final\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "id": "d220561a-34c9-48d2-8e2f-5d174a87540b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from tms_data_interface import SQLQueryInterface\n",
    "\n",
    "class Scenario:\n",
    "    seq = SQLQueryInterface(schema=\"transactionschema\")\n",
    "\n",
    "    def logic(self, **kwargs):\n",
    "        row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n",
    "                                                    cust_data=\"customer_data_v1\",\n",
    "                                                    acc_data=\"account_data_v1\",\n",
    "                                                    alert_data=\"alert_data_v1\")\n",
    "                                       )\n",
    "        cols = [\"Focal_id\", \"Credit_transaction_amount\",\n",
    "                \"Total_no_of_credit_transactions\",\n",
    "                \"Debit_transaction_amount\", \"Total_no_of_debit_transactions\",\n",
    "                \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
    "        df = pd.DataFrame(row_list, columns = cols)\n",
    "        df[[\"Credit_transaction_amount\",\n",
    "                \"Debit_transaction_amount\"]] = df[[\"Credit_transaction_amount\",\n",
    "                \"Debit_transaction_amount\"]].astype('int')\n",
    "        df[\"Wash_Ratio\"] = df[\"Wash_Ratio\"].astype('float')\n",
    "        # Step 1: Compute 90th percentiles per Segment for all 3 fields\n",
    "        percentiles = (\n",
    "            df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n",
    "                                   \"Debit_transaction_amount\",\n",
    "                                   \"Wash_Ratio\"]]\n",
    "              .quantile(0.95)\n",
    "              .reset_index()\n",
    "        )\n",
    "\n",
    "        # Rename columns for clarity\n",
    "        percentiles = percentiles.rename(columns={\n",
    "            \"Credit_transaction_amount\": \"P90_Credit\",\n",
    "            \"Debit_transaction_amount\": \"P90_Debit\",\n",
    "            \"Wash_Ratio\": \"P90_Wash\"\n",
    "        })\n",
    "\n",
    "        # Step 2: Merge back to main df\n",
    "        df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n",
    "\n",
    "        # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n",
    "        high_pop = (\n",
    "            # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n",
    "            (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n",
    "            (df[\"Wash_Ratio\"] > 0.90)\n",
    "        )\n",
    "\n",
    "        # Step 4: Randomly select 0.1% sample from high-risk population\n",
    "        sample_fraction = 0.1  # 0.1%\n",
    "        high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
    "\n",
    "        # Step 5: Set SAR_FLAG values\n",
    "        df[\"SAR_FLAG\"] = \"N\"   # default for all\n",
    "        df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\"   # assign Y to 0.1% random high-risk population\n",
    "        return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# sen = Scenario()\n",
    "# a = sen.logic()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "id": "150bb5ce-6be1-44fc-a606-6d375354626d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# a[a[\"SAR_FLAG\"] == \"Y\"]\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}