{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] }, "outputs": [], "source": [ "# import pandas as pd\n", "\n", "# query = \"\"\"\n", "# select final.CUSTOMER_NUMBER_main as Focal_id,\n", "# CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,\n", "# final.Cash_deposit_count,\n", "# final.SEGMENT,\n", "# final.RISK,\n", "# final.SAR_FLAG\n", "# from \n", "# (\n", "# (\n", "# select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n", "# subquery.Cash_deposit_total,\n", "# subquery.Cash_deposit_count\n", "# from \n", "# (\n", "# select customer_number as CUSTOMER_NUMBER_1, \n", "# sum(transaction_amount) as Cash_deposit_total, \n", "# count(*) as Cash_deposit_count\n", "# from \n", "# (\n", "# select * \n", "# from {trans_data} trans_table \n", "# left join {acc_data} acc_table\n", "# on trans_table.benef_account_number = acc_table.account_number\n", "# ) trans\n", "# where account_number not in ('None')\n", "# and transaction_desc = 'CASH RELATED TRANSACTION'\n", "# group by customer_number\n", "# ) subquery\n", "# ) main \n", "# left join \n", "# (\n", "# select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n", "# cd.SEGMENT,\n", "# cd.RISK,\n", "# case\n", "# when ad.SAR_FLAG is NULL then 'N'\n", "# else ad.SAR_FLAG\n", "# end as SAR_FLAG \n", "# from\n", "# (\n", "# select customer_number as CUSTOMER_NUMBER_3, \n", "# business_segment as SEGMENT,\n", "# case\n", "# when RISK_CLASSIFICATION = 1 then 'Low Risk'\n", "# when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n", "# when RISK_CLASSIFICATION = 3 then 'High Risk'\n", "# else 'Unknown Risk'\n", "# end AS RISK\n", "# from {cust_data}\n", "# ) cd \n", "# left join\n", "# (\n", "# select customer_number as CUSTOMER_NUMBER_4, \n", "# sar_flag as SAR_FLAG\n", "# from {alert_data}\n", "# ) ad \n", "# on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n", "# ) as cust_alert\n", "# on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n", "# ) as final\n", "# \"\"\"\n", "\n", "# from tms_data_interface import SQLQueryInterface\n", "\n", "# class Scenario:\n", "# seq = SQLQueryInterface(schema=\"transactionschema\")\n", "\n", "# def logic(self, **kwargs):\n", "# row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", "# cust_data=\"customer_data_v1\",\n", "# acc_data=\"account_data_v1\",\n", "# alert_data=\"alert_data_v1\")\n", "# )\n", "# cols = [\"Focal_id\", \"Cash_deposit_total\", \"Cash_deposit_count\",\n", "# \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", "# df = pd.DataFrame(row_list, columns = cols)\n", "# df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n", "# return df" ] }, { "cell_type": "code", "execution_count": 41, "id": "87cf157c-3725-4d90-bfb6-91cba5028826", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from tms_data_interface import SQLQueryInterface" ] }, { "cell_type": "code", "execution_count": 42, "id": "b68368ea-9cbb-4060-b138-d8b7c3d8a193", "metadata": { "tags": [] }, "outputs": [], "source": [ "query2 = \"\"\"\n", " SELECT *\n", "\n", " FROM percentile_dist\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 43, "id": "69d6771d-be1c-4ae1-802a-3ba7b2e8c5fb", "metadata": { "tags": [] }, "outputs": [], "source": [ "\n", "query = \"\"\"\n", " SELECT \n", " t.transaction_key,\n", " t.transaction_date,\n", " t.transaction_amount,\n", " t.transaction_desc,\n", " t.benef_account_number,\n", "\n", " -- Account data\n", " a.account_number,\n", " a.customer_number AS acc_customer_number,\n", "\n", " -- Party data\n", " p.customer_number AS party_customer_number,\n", " p.customer_name,\n", " p.date_of_birth,\n", " p.nationality,\n", " p.business_segment,\n", " CASE\n", " WHEN p.risk_classification = 1 THEN 'Low Risk'\n", " WHEN p.risk_classification = 2 THEN 'Medium Risk'\n", " WHEN p.risk_classification = 3 THEN 'High Risk'\n", " ELSE 'Unknown Risk'\n", " END AS risk_level,\n", "\n", " -- Alert data\n", " COALESCE(al.sar_flag, 'N') AS sar_flag\n", "\n", " FROM {trans_data} t\n", "\n", " -- Join with account data on beneficiary account\n", " LEFT JOIN {acc_data} a\n", " ON t.benef_account_number = a.account_number\n", "\n", " -- Join with party/customer data using account's customer number\n", " LEFT JOIN {cust_data} p\n", " ON a.customer_number = p.customer_number\n", "\n", " -- Join with alert data using party's customer number\n", " LEFT JOIN {alert_data} al\n", " ON p.customer_number = al.customer_number\n", "\n", " WHERE a.account_number IS NOT NULL\n", "\"\"\"\n" ] }, { "cell_type": "code", "execution_count": 44, "id": "82c2152f-513c-4fde-a4a9-6ee3a01ef897", "metadata": { "tags": [] }, "outputs": [], "source": [ "def trx_count_sum_groupwise(data_filt_partywise): \n", " data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') \n", " groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', \n", " 'MIN_LIMIT', 'PCT_RANGE'])\n", " \n", " trxns = data_filt_partywise['transaction_amount'].values\n", " pct_range = data_filt_partywise['PCT_RANGE'].max()\n", " min_value = data_filt_partywise['MIN_LIMIT'].max()\n", "\n", " trxns = trxns[trxns >= min_value]\n", " if len(trxns) > 0:\n", " min_value = trxns[0]\n", "\n", " group_count = 0\n", " while len(trxns) > 0:\n", " max_value = min_value + (pct_range * 0.01 * min_value)\n", " mask = np.logical_and(trxns >= min_value, trxns <= max_value)\n", " group_filter_trx = trxns[mask]\n", " trx_count = len(group_filter_trx)\n", " trx_sum = np.sum(group_filter_trx)\n", " group_count += 1\n", " groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, \n", " min_value, pct_range]\n", " trxns = trxns[trxns > max_value]\n", " if len(trxns) > 0:\n", " min_value = trxns[0]\n", "\n", " return groupeddata.to_dict('list')\n", "\n", "# ---------------------------\n", "# Function 4: Run scenario 9\n", "# ---------------------------\n", "def scenario9_data(data1): \n", " grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply(\n", " trx_count_sum_groupwise).reset_index()\n", "\n", " df_list = []\n", " for i in grouped.index:\n", " df_party = pd.DataFrame(grouped.iloc[i, -1])\n", " df_party['Focal_id'] = grouped.loc[i, 'Focal_id']\n", " df_list.append(df_party)\n", "\n", " final_df = pd.concat(df_list, ignore_index=True) \n", " Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index()\n", " Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index()\n", " SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index()\n", " \n", " final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left')\n", " final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left')\n", " final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left')\n", " \n", " return final_df\n", " " ] }, { "cell_type": "code", "execution_count": 45, "id": "fdefde12-97ab-4545-9612-153f707b7bc9", "metadata": { "tags": [] }, "outputs": [], "source": [ "seq = SQLQueryInterface(schema=\"transactionschema\")\n", "data = seq.execute_raw(query2)\n", "Columns = ['point_of_percentile', 'value', 'total_event',\n", " 'true_positive', 'false_positive', 'tpsar']\n", "percent_dist = pd.DataFrame(data,columns = Columns)" ] }, { "cell_type": "code", "execution_count": 52, "id": "8cc4b541-75a6-4fa7-978a-12df50f94d32", "metadata": { "tags": [] }, "outputs": [], "source": [ "# round(int(percent_dist[percent_dist['point_of_percentile']\\\n", "# == 75]['value'].iloc[0])/100)*100" ] }, { "cell_type": "code", "execution_count": 53, "id": "b6c85de2-6a47-4109-8885-c138c289ec25", "metadata": { "tags": [] }, "outputs": [], "source": [ "\n", "class Scenario:\n", " seq = SQLQueryInterface(schema=\"transactionschema\")\n", "\n", " def logic(self, **kwargs):\n", " row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", " cust_data=\"customer_data_v1\",\n", " acc_data=\"account_data_v1\",\n", " alert_data=\"alert_data_v1\")\n", " )\n", " cols = [\n", " \"transaction_key\",\n", " \"transaction_date\",\n", " \"transaction_amount\",\n", " \"transaction_desc\",\n", " \"benef_account_number\",\n", " \"account_number\",\n", " \"acc_customer_number\",\n", " \"Focal_id\",\n", " \"customer_name\",\n", " \"date_of_birth\",\n", " \"nationality\",\n", " \"Segment\",\n", " \"Risk\", \n", " \"SAR_FLAG\"\n", " ]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " df['Segment'] = 'SME'\n", " df['MIN_LIMIT'] = round(int(percent_dist[percent_dist['point_of_percentile']\\\n", " == 75]['value'].iloc[0])/100)*100\n", " df['PCT_RANGE'] = 20\n", " \n", " scenario_data = scenario9_data(df)\n", " \n", " return scenario_data" ] }, { "cell_type": "code", "execution_count": 55, "id": "1f20337b-8116-47e5-8743-1ba41e2df819", "metadata": { "tags": [] }, "outputs": [], "source": [ "# sen = Scenario()\n", "# sen.logic()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.8" } }, "nbformat": 4, "nbformat_minor": 5 }