commit 64ece05d230fbc6b9d5d85765bd9cfba1513a567 Author: user_client2024 Date: Fri May 23 15:12:51 2025 +0000 Initial commit diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb new file mode 100644 index 0000000..46ea052 --- /dev/null +++ b/.ipynb_checkpoints/main-checkpoint.ipynb @@ -0,0 +1,299 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# import pandas as pd\n", + "\n", + "# query = \"\"\"\n", + "# select final.CUSTOMER_NUMBER_main as Focal_id,\n", + "# CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,\n", + "# final.Cash_deposit_count,\n", + "# final.SEGMENT,\n", + "# final.RISK,\n", + "# final.SAR_FLAG\n", + "# from \n", + "# (\n", + "# (\n", + "# select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n", + "# subquery.Cash_deposit_total,\n", + "# subquery.Cash_deposit_count\n", + "# from \n", + "# (\n", + "# select customer_number as CUSTOMER_NUMBER_1, \n", + "# sum(transaction_amount) as Cash_deposit_total, \n", + "# count(*) as Cash_deposit_count\n", + "# from \n", + "# (\n", + "# select * \n", + "# from {trans_data} trans_table \n", + "# left join {acc_data} acc_table\n", + "# on trans_table.benef_account_number = acc_table.account_number\n", + "# ) trans\n", + "# where account_number not in ('None')\n", + "# and transaction_desc = 'CASH RELATED TRANSACTION'\n", + "# group by customer_number\n", + "# ) subquery\n", + "# ) main \n", + "# left join \n", + "# (\n", + "# select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n", + "# cd.SEGMENT,\n", + "# cd.RISK,\n", + "# case\n", + "# when ad.SAR_FLAG is NULL then 'N'\n", + "# else ad.SAR_FLAG\n", + "# end as SAR_FLAG \n", + "# from\n", + "# (\n", + "# select customer_number as CUSTOMER_NUMBER_3, \n", + "# business_segment as SEGMENT,\n", + "# case\n", + "# when RISK_CLASSIFICATION = 1 then 'Low Risk'\n", + "# when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n", + "# when RISK_CLASSIFICATION = 3 then 'High Risk'\n", + "# else 'Unknown Risk'\n", + "# end AS RISK\n", + "# from {cust_data}\n", + "# ) cd \n", + "# left join\n", + "# (\n", + "# select customer_number as CUSTOMER_NUMBER_4, \n", + "# sar_flag as SAR_FLAG\n", + "# from {alert_data}\n", + "# ) ad \n", + "# on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n", + "# ) as cust_alert\n", + "# on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n", + "# ) as final\n", + "# \"\"\"\n", + "\n", + "# from tms_data_interface import SQLQueryInterface\n", + "\n", + "# class Scenario:\n", + "# seq = SQLQueryInterface(schema=\"transactionschema\")\n", + "\n", + "# def logic(self, **kwargs):\n", + "# row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", + "# cust_data=\"customer_data_v1\",\n", + "# acc_data=\"account_data_v1\",\n", + "# alert_data=\"alert_data_v1\")\n", + "# )\n", + "# cols = [\"Focal_id\", \"Cash_deposit_total\", \"Cash_deposit_count\",\n", + "# \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", + "# df = pd.DataFrame(row_list, columns = cols)\n", + "# df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n", + "# return df" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "69d6771d-be1c-4ae1-802a-3ba7b2e8c5fb", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "query = \"\"\"\n", + " SELECT \n", + " t.transaction_key,\n", + " t.transaction_date,\n", + " t.transaction_amount,\n", + " t.transaction_desc,\n", + " t.benef_account_number,\n", + "\n", + " -- Account data\n", + " a.account_number,\n", + " a.customer_number AS acc_customer_number,\n", + "\n", + " -- Party data\n", + " p.customer_number AS party_customer_number,\n", + " p.customer_name,\n", + " p.date_of_birth,\n", + " p.nationality,\n", + " p.business_segment,\n", + " CASE\n", + " WHEN p.risk_classification = 1 THEN 'Low Risk'\n", + " WHEN p.risk_classification = 2 THEN 'Medium Risk'\n", + " WHEN p.risk_classification = 3 THEN 'High Risk'\n", + " ELSE 'Unknown Risk'\n", + " END AS risk_level,\n", + "\n", + " -- Alert data\n", + " COALESCE(al.sar_flag, 'N') AS sar_flag\n", + "\n", + " FROM {trans_data} t\n", + "\n", + " -- Join with account data on beneficiary account\n", + " LEFT JOIN {acc_data} a\n", + " ON t.benef_account_number = a.account_number\n", + "\n", + " -- Join with party/customer data using account's customer number\n", + " LEFT JOIN {cust_data} p\n", + " ON a.customer_number = p.customer_number\n", + "\n", + " -- Join with alert data using party's customer number\n", + " LEFT JOIN {alert_data} al\n", + " ON p.customer_number = al.customer_number\n", + "\n", + " WHERE a.account_number IS NOT NULL\n", + " limit 100\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "82c2152f-513c-4fde-a4a9-6ee3a01ef897", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def trx_count_sum_groupwise(data_filt_partywise): \n", + " data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') \n", + " groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', \n", + " 'MIN_LIMIT', 'PCT_RANGE'])\n", + " \n", + " trxns = data_filt_partywise['transaction_amount'].values\n", + " pct_range = data_filt_partywise['PCT_RANGE'].max()\n", + " min_value = data_filt_partywise['MIN_LIMIT'].max()\n", + "\n", + " trxns = trxns[trxns >= min_value]\n", + " if len(trxns) > 0:\n", + " min_value = trxns[0]\n", + "\n", + " group_count = 0\n", + " while len(trxns) > 0:\n", + " max_value = min_value + (pct_range * 0.01 * min_value)\n", + " mask = np.logical_and(trxns >= min_value, trxns <= max_value)\n", + " group_filter_trx = trxns[mask]\n", + " trx_count = len(group_filter_trx)\n", + " trx_sum = np.sum(group_filter_trx)\n", + " group_count += 1\n", + " groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, \n", + " min_value, pct_range]\n", + " trxns = trxns[trxns > max_value]\n", + " if len(trxns) > 0:\n", + " min_value = trxns[0]\n", + "\n", + " return groupeddata.to_dict('list')\n", + "\n", + "# ---------------------------\n", + "# Function 4: Run scenario 9\n", + "# ---------------------------\n", + "def scenario9_data(data1): \n", + " grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply(\n", + " trx_count_sum_groupwise).reset_index()\n", + "\n", + " df_list = []\n", + " for i in grouped.index:\n", + " df_party = pd.DataFrame(grouped.iloc[i, -1])\n", + " df_party['Focal_id'] = grouped.loc[i, 'Focal_id']\n", + " df_list.append(df_party)\n", + "\n", + " final_df = pd.concat(df_list, ignore_index=True) \n", + " Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index()\n", + " Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index()\n", + " SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index()\n", + " \n", + " final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left')\n", + " final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left')\n", + " final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left')\n", + " \n", + " return final_df\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "b6c85de2-6a47-4109-8885-c138c289ec25", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "\n", + "from tms_data_interface import SQLQueryInterface\n", + "\n", + "class Scenario:\n", + " seq = SQLQueryInterface(schema=\"transactionschema\")\n", + "\n", + " def logic(self, **kwargs):\n", + " row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", + " cust_data=\"customer_data_v1\",\n", + " acc_data=\"account_data_v1\",\n", + " alert_data=\"alert_data_v1\")\n", + " )\n", + " cols = [\n", + " \"transaction_key\",\n", + " \"transaction_date\",\n", + " \"transaction_amount\",\n", + " \"transaction_desc\",\n", + " \"benef_account_number\",\n", + " \"account_number\",\n", + " \"acc_customer_number\",\n", + " \"Focal_id\",\n", + " \"customer_name\",\n", + " \"date_of_birth\",\n", + " \"nationality\",\n", + " \"Segment\",\n", + " \"Risk\", \n", + " \"SAR_FLAG\"\n", + " ]\n", + " df = pd.DataFrame(row_list, columns = cols)\n", + " df['Segment'] = 'SME'\n", + " df['MIN_LIMIT'] = 50000\n", + " df['PCT_RANGE'] = 20\n", + " \n", + " scenario_data = scenario9_data(df)\n", + " \n", + " return scenario_data" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "1f20337b-8116-47e5-8743-1ba41e2df819", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# sen = Scenario()\n", + "# sen.logic()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/main.ipynb b/main.ipynb new file mode 100644 index 0000000..46ea052 --- /dev/null +++ b/main.ipynb @@ -0,0 +1,299 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# import pandas as pd\n", + "\n", + "# query = \"\"\"\n", + "# select final.CUSTOMER_NUMBER_main as Focal_id,\n", + "# CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,\n", + "# final.Cash_deposit_count,\n", + "# final.SEGMENT,\n", + "# final.RISK,\n", + "# final.SAR_FLAG\n", + "# from \n", + "# (\n", + "# (\n", + "# select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n", + "# subquery.Cash_deposit_total,\n", + "# subquery.Cash_deposit_count\n", + "# from \n", + "# (\n", + "# select customer_number as CUSTOMER_NUMBER_1, \n", + "# sum(transaction_amount) as Cash_deposit_total, \n", + "# count(*) as Cash_deposit_count\n", + "# from \n", + "# (\n", + "# select * \n", + "# from {trans_data} trans_table \n", + "# left join {acc_data} acc_table\n", + "# on trans_table.benef_account_number = acc_table.account_number\n", + "# ) trans\n", + "# where account_number not in ('None')\n", + "# and transaction_desc = 'CASH RELATED TRANSACTION'\n", + "# group by customer_number\n", + "# ) subquery\n", + "# ) main \n", + "# left join \n", + "# (\n", + "# select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n", + "# cd.SEGMENT,\n", + "# cd.RISK,\n", + "# case\n", + "# when ad.SAR_FLAG is NULL then 'N'\n", + "# else ad.SAR_FLAG\n", + "# end as SAR_FLAG \n", + "# from\n", + "# (\n", + "# select customer_number as CUSTOMER_NUMBER_3, \n", + "# business_segment as SEGMENT,\n", + "# case\n", + "# when RISK_CLASSIFICATION = 1 then 'Low Risk'\n", + "# when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n", + "# when RISK_CLASSIFICATION = 3 then 'High Risk'\n", + "# else 'Unknown Risk'\n", + "# end AS RISK\n", + "# from {cust_data}\n", + "# ) cd \n", + "# left join\n", + "# (\n", + "# select customer_number as CUSTOMER_NUMBER_4, \n", + "# sar_flag as SAR_FLAG\n", + "# from {alert_data}\n", + "# ) ad \n", + "# on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n", + "# ) as cust_alert\n", + "# on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n", + "# ) as final\n", + "# \"\"\"\n", + "\n", + "# from tms_data_interface import SQLQueryInterface\n", + "\n", + "# class Scenario:\n", + "# seq = SQLQueryInterface(schema=\"transactionschema\")\n", + "\n", + "# def logic(self, **kwargs):\n", + "# row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", + "# cust_data=\"customer_data_v1\",\n", + "# acc_data=\"account_data_v1\",\n", + "# alert_data=\"alert_data_v1\")\n", + "# )\n", + "# cols = [\"Focal_id\", \"Cash_deposit_total\", \"Cash_deposit_count\",\n", + "# \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", + "# df = pd.DataFrame(row_list, columns = cols)\n", + "# df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n", + "# return df" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "69d6771d-be1c-4ae1-802a-3ba7b2e8c5fb", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "query = \"\"\"\n", + " SELECT \n", + " t.transaction_key,\n", + " t.transaction_date,\n", + " t.transaction_amount,\n", + " t.transaction_desc,\n", + " t.benef_account_number,\n", + "\n", + " -- Account data\n", + " a.account_number,\n", + " a.customer_number AS acc_customer_number,\n", + "\n", + " -- Party data\n", + " p.customer_number AS party_customer_number,\n", + " p.customer_name,\n", + " p.date_of_birth,\n", + " p.nationality,\n", + " p.business_segment,\n", + " CASE\n", + " WHEN p.risk_classification = 1 THEN 'Low Risk'\n", + " WHEN p.risk_classification = 2 THEN 'Medium Risk'\n", + " WHEN p.risk_classification = 3 THEN 'High Risk'\n", + " ELSE 'Unknown Risk'\n", + " END AS risk_level,\n", + "\n", + " -- Alert data\n", + " COALESCE(al.sar_flag, 'N') AS sar_flag\n", + "\n", + " FROM {trans_data} t\n", + "\n", + " -- Join with account data on beneficiary account\n", + " LEFT JOIN {acc_data} a\n", + " ON t.benef_account_number = a.account_number\n", + "\n", + " -- Join with party/customer data using account's customer number\n", + " LEFT JOIN {cust_data} p\n", + " ON a.customer_number = p.customer_number\n", + "\n", + " -- Join with alert data using party's customer number\n", + " LEFT JOIN {alert_data} al\n", + " ON p.customer_number = al.customer_number\n", + "\n", + " WHERE a.account_number IS NOT NULL\n", + " limit 100\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "82c2152f-513c-4fde-a4a9-6ee3a01ef897", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def trx_count_sum_groupwise(data_filt_partywise): \n", + " data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') \n", + " groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', \n", + " 'MIN_LIMIT', 'PCT_RANGE'])\n", + " \n", + " trxns = data_filt_partywise['transaction_amount'].values\n", + " pct_range = data_filt_partywise['PCT_RANGE'].max()\n", + " min_value = data_filt_partywise['MIN_LIMIT'].max()\n", + "\n", + " trxns = trxns[trxns >= min_value]\n", + " if len(trxns) > 0:\n", + " min_value = trxns[0]\n", + "\n", + " group_count = 0\n", + " while len(trxns) > 0:\n", + " max_value = min_value + (pct_range * 0.01 * min_value)\n", + " mask = np.logical_and(trxns >= min_value, trxns <= max_value)\n", + " group_filter_trx = trxns[mask]\n", + " trx_count = len(group_filter_trx)\n", + " trx_sum = np.sum(group_filter_trx)\n", + " group_count += 1\n", + " groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, \n", + " min_value, pct_range]\n", + " trxns = trxns[trxns > max_value]\n", + " if len(trxns) > 0:\n", + " min_value = trxns[0]\n", + "\n", + " return groupeddata.to_dict('list')\n", + "\n", + "# ---------------------------\n", + "# Function 4: Run scenario 9\n", + "# ---------------------------\n", + "def scenario9_data(data1): \n", + " grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply(\n", + " trx_count_sum_groupwise).reset_index()\n", + "\n", + " df_list = []\n", + " for i in grouped.index:\n", + " df_party = pd.DataFrame(grouped.iloc[i, -1])\n", + " df_party['Focal_id'] = grouped.loc[i, 'Focal_id']\n", + " df_list.append(df_party)\n", + "\n", + " final_df = pd.concat(df_list, ignore_index=True) \n", + " Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index()\n", + " Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index()\n", + " SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index()\n", + " \n", + " final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left')\n", + " final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left')\n", + " final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left')\n", + " \n", + " return final_df\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "b6c85de2-6a47-4109-8885-c138c289ec25", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "\n", + "from tms_data_interface import SQLQueryInterface\n", + "\n", + "class Scenario:\n", + " seq = SQLQueryInterface(schema=\"transactionschema\")\n", + "\n", + " def logic(self, **kwargs):\n", + " row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", + " cust_data=\"customer_data_v1\",\n", + " acc_data=\"account_data_v1\",\n", + " alert_data=\"alert_data_v1\")\n", + " )\n", + " cols = [\n", + " \"transaction_key\",\n", + " \"transaction_date\",\n", + " \"transaction_amount\",\n", + " \"transaction_desc\",\n", + " \"benef_account_number\",\n", + " \"account_number\",\n", + " \"acc_customer_number\",\n", + " \"Focal_id\",\n", + " \"customer_name\",\n", + " \"date_of_birth\",\n", + " \"nationality\",\n", + " \"Segment\",\n", + " \"Risk\", \n", + " \"SAR_FLAG\"\n", + " ]\n", + " df = pd.DataFrame(row_list, columns = cols)\n", + " df['Segment'] = 'SME'\n", + " df['MIN_LIMIT'] = 50000\n", + " df['PCT_RANGE'] = 20\n", + " \n", + " scenario_data = scenario9_data(df)\n", + " \n", + " return scenario_data" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "1f20337b-8116-47e5-8743-1ba41e2df819", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# sen = Scenario()\n", + "# sen.logic()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/main.py b/main.py new file mode 100644 index 0000000..b9f9b92 --- /dev/null +++ b/main.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[1]: + + +# import pandas as pd + +# query = """ +# select final.CUSTOMER_NUMBER_main as Focal_id, +# CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total, +# final.Cash_deposit_count, +# final.SEGMENT, +# final.RISK, +# final.SAR_FLAG +# from +# ( +# ( +# select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main, +# subquery.Cash_deposit_total, +# subquery.Cash_deposit_count +# from +# ( +# select customer_number as CUSTOMER_NUMBER_1, +# sum(transaction_amount) as Cash_deposit_total, +# count(*) as Cash_deposit_count +# from +# ( +# select * +# from {trans_data} trans_table +# left join {acc_data} acc_table +# on trans_table.benef_account_number = acc_table.account_number +# ) trans +# where account_number not in ('None') +# and transaction_desc = 'CASH RELATED TRANSACTION' +# group by customer_number +# ) subquery +# ) main +# left join +# ( +# select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust, +# cd.SEGMENT, +# cd.RISK, +# case +# when ad.SAR_FLAG is NULL then 'N' +# else ad.SAR_FLAG +# end as SAR_FLAG +# from +# ( +# select customer_number as CUSTOMER_NUMBER_3, +# business_segment as SEGMENT, +# case +# when RISK_CLASSIFICATION = 1 then 'Low Risk' +# when RISK_CLASSIFICATION = 2 then 'Medium Risk' +# when RISK_CLASSIFICATION = 3 then 'High Risk' +# else 'Unknown Risk' +# end AS RISK +# from {cust_data} +# ) cd +# left join +# ( +# select customer_number as CUSTOMER_NUMBER_4, +# sar_flag as SAR_FLAG +# from {alert_data} +# ) ad +# on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4 +# ) as cust_alert +# on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main +# ) as final +# """ + +# from tms_data_interface import SQLQueryInterface + +# class Scenario: +# seq = SQLQueryInterface(schema="transactionschema") + +# def logic(self, **kwargs): +# row_list = self.seq.execute_raw(query.format(trans_data="transaction10m", +# cust_data="customer_data_v1", +# acc_data="account_data_v1", +# alert_data="alert_data_v1") +# ) +# cols = ["Focal_id", "Cash_deposit_total", "Cash_deposit_count", +# "Segment", "Risk", "SAR_FLAG"] +# df = pd.DataFrame(row_list, columns = cols) +# df["Cash_deposit_total"] = df["Cash_deposit_total"].astype(float) +# return df + + +# In[15]: + + +import pandas as pd +import numpy as np +query = """ + SELECT + t.transaction_key, + t.transaction_date, + t.transaction_amount, + t.transaction_desc, + t.benef_account_number, + + -- Account data + a.account_number, + a.customer_number AS acc_customer_number, + + -- Party data + p.customer_number AS party_customer_number, + p.customer_name, + p.date_of_birth, + p.nationality, + p.business_segment, + CASE + WHEN p.risk_classification = 1 THEN 'Low Risk' + WHEN p.risk_classification = 2 THEN 'Medium Risk' + WHEN p.risk_classification = 3 THEN 'High Risk' + ELSE 'Unknown Risk' + END AS risk_level, + + -- Alert data + COALESCE(al.sar_flag, 'N') AS sar_flag + + FROM {trans_data} t + + -- Join with account data on beneficiary account + LEFT JOIN {acc_data} a + ON t.benef_account_number = a.account_number + + -- Join with party/customer data using account's customer number + LEFT JOIN {cust_data} p + ON a.customer_number = p.customer_number + + -- Join with alert data using party's customer number + LEFT JOIN {alert_data} al + ON p.customer_number = al.customer_number + + WHERE a.account_number IS NOT NULL + limit 100 +""" + + +# In[20]: + + +def trx_count_sum_groupwise(data_filt_partywise): + data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') + groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', + 'MIN_LIMIT', 'PCT_RANGE']) + + trxns = data_filt_partywise['transaction_amount'].values + pct_range = data_filt_partywise['PCT_RANGE'].max() + min_value = data_filt_partywise['MIN_LIMIT'].max() + + trxns = trxns[trxns >= min_value] + if len(trxns) > 0: + min_value = trxns[0] + + group_count = 0 + while len(trxns) > 0: + max_value = min_value + (pct_range * 0.01 * min_value) + mask = np.logical_and(trxns >= min_value, trxns <= max_value) + group_filter_trx = trxns[mask] + trx_count = len(group_filter_trx) + trx_sum = np.sum(group_filter_trx) + group_count += 1 + groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, + min_value, pct_range] + trxns = trxns[trxns > max_value] + if len(trxns) > 0: + min_value = trxns[0] + + return groupeddata.to_dict('list') + +# --------------------------- +# Function 4: Run scenario 9 +# --------------------------- +def scenario9_data(data1): + grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply( + trx_count_sum_groupwise).reset_index() + + df_list = [] + for i in grouped.index: + df_party = pd.DataFrame(grouped.iloc[i, -1]) + df_party['Focal_id'] = grouped.loc[i, 'Focal_id'] + df_list.append(df_party) + + final_df = pd.concat(df_list, ignore_index=True) + Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index() + Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index() + SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index() + + final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left') + final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left') + final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left') + + return final_df + + + +# In[17]: + + +from tms_data_interface import SQLQueryInterface + +class Scenario: + seq = SQLQueryInterface(schema="transactionschema") + + def logic(self, **kwargs): + row_list = self.seq.execute_raw(query.format(trans_data="transaction10m", + cust_data="customer_data_v1", + acc_data="account_data_v1", + alert_data="alert_data_v1") + ) + cols = [ + "transaction_key", + "transaction_date", + "transaction_amount", + "transaction_desc", + "benef_account_number", + "account_number", + "acc_customer_number", + "Focal_id", + "customer_name", + "date_of_birth", + "nationality", + "Segment", + "Risk", + "SAR_FLAG" + ] + df = pd.DataFrame(row_list, columns = cols) + df['Segment'] = 'SME' + df['MIN_LIMIT'] = 50000 + df['PCT_RANGE'] = 20 + + scenario_data = scenario9_data(df) + + return scenario_data + + +# In[19]: + + +# sen = Scenario() +# sen.logic() +