generated from user_client2024/154
Initial commit
This commit is contained in:
commit
64ece05d23
299
.ipynb_checkpoints/main-checkpoint.ipynb
Normal file
299
.ipynb_checkpoints/main-checkpoint.ipynb
Normal file
@ -0,0 +1,299 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import pandas as pd\n",
|
||||
"\n",
|
||||
"# query = \"\"\"\n",
|
||||
"# select final.CUSTOMER_NUMBER_main as Focal_id,\n",
|
||||
"# CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,\n",
|
||||
"# final.Cash_deposit_count,\n",
|
||||
"# final.SEGMENT,\n",
|
||||
"# final.RISK,\n",
|
||||
"# final.SAR_FLAG\n",
|
||||
"# from \n",
|
||||
"# (\n",
|
||||
"# (\n",
|
||||
"# select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n",
|
||||
"# subquery.Cash_deposit_total,\n",
|
||||
"# subquery.Cash_deposit_count\n",
|
||||
"# from \n",
|
||||
"# (\n",
|
||||
"# select customer_number as CUSTOMER_NUMBER_1, \n",
|
||||
"# sum(transaction_amount) as Cash_deposit_total, \n",
|
||||
"# count(*) as Cash_deposit_count\n",
|
||||
"# from \n",
|
||||
"# (\n",
|
||||
"# select * \n",
|
||||
"# from {trans_data} trans_table \n",
|
||||
"# left join {acc_data} acc_table\n",
|
||||
"# on trans_table.benef_account_number = acc_table.account_number\n",
|
||||
"# ) trans\n",
|
||||
"# where account_number not in ('None')\n",
|
||||
"# and transaction_desc = 'CASH RELATED TRANSACTION'\n",
|
||||
"# group by customer_number\n",
|
||||
"# ) subquery\n",
|
||||
"# ) main \n",
|
||||
"# left join \n",
|
||||
"# (\n",
|
||||
"# select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n",
|
||||
"# cd.SEGMENT,\n",
|
||||
"# cd.RISK,\n",
|
||||
"# case\n",
|
||||
"# when ad.SAR_FLAG is NULL then 'N'\n",
|
||||
"# else ad.SAR_FLAG\n",
|
||||
"# end as SAR_FLAG \n",
|
||||
"# from\n",
|
||||
"# (\n",
|
||||
"# select customer_number as CUSTOMER_NUMBER_3, \n",
|
||||
"# business_segment as SEGMENT,\n",
|
||||
"# case\n",
|
||||
"# when RISK_CLASSIFICATION = 1 then 'Low Risk'\n",
|
||||
"# when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n",
|
||||
"# when RISK_CLASSIFICATION = 3 then 'High Risk'\n",
|
||||
"# else 'Unknown Risk'\n",
|
||||
"# end AS RISK\n",
|
||||
"# from {cust_data}\n",
|
||||
"# ) cd \n",
|
||||
"# left join\n",
|
||||
"# (\n",
|
||||
"# select customer_number as CUSTOMER_NUMBER_4, \n",
|
||||
"# sar_flag as SAR_FLAG\n",
|
||||
"# from {alert_data}\n",
|
||||
"# ) ad \n",
|
||||
"# on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n",
|
||||
"# ) as cust_alert\n",
|
||||
"# on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n",
|
||||
"# ) as final\n",
|
||||
"# \"\"\"\n",
|
||||
"\n",
|
||||
"# from tms_data_interface import SQLQueryInterface\n",
|
||||
"\n",
|
||||
"# class Scenario:\n",
|
||||
"# seq = SQLQueryInterface(schema=\"transactionschema\")\n",
|
||||
"\n",
|
||||
"# def logic(self, **kwargs):\n",
|
||||
"# row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n",
|
||||
"# cust_data=\"customer_data_v1\",\n",
|
||||
"# acc_data=\"account_data_v1\",\n",
|
||||
"# alert_data=\"alert_data_v1\")\n",
|
||||
"# )\n",
|
||||
"# cols = [\"Focal_id\", \"Cash_deposit_total\", \"Cash_deposit_count\",\n",
|
||||
"# \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
|
||||
"# df = pd.DataFrame(row_list, columns = cols)\n",
|
||||
"# df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n",
|
||||
"# return df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "69d6771d-be1c-4ae1-802a-3ba7b2e8c5fb",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"query = \"\"\"\n",
|
||||
" SELECT \n",
|
||||
" t.transaction_key,\n",
|
||||
" t.transaction_date,\n",
|
||||
" t.transaction_amount,\n",
|
||||
" t.transaction_desc,\n",
|
||||
" t.benef_account_number,\n",
|
||||
"\n",
|
||||
" -- Account data\n",
|
||||
" a.account_number,\n",
|
||||
" a.customer_number AS acc_customer_number,\n",
|
||||
"\n",
|
||||
" -- Party data\n",
|
||||
" p.customer_number AS party_customer_number,\n",
|
||||
" p.customer_name,\n",
|
||||
" p.date_of_birth,\n",
|
||||
" p.nationality,\n",
|
||||
" p.business_segment,\n",
|
||||
" CASE\n",
|
||||
" WHEN p.risk_classification = 1 THEN 'Low Risk'\n",
|
||||
" WHEN p.risk_classification = 2 THEN 'Medium Risk'\n",
|
||||
" WHEN p.risk_classification = 3 THEN 'High Risk'\n",
|
||||
" ELSE 'Unknown Risk'\n",
|
||||
" END AS risk_level,\n",
|
||||
"\n",
|
||||
" -- Alert data\n",
|
||||
" COALESCE(al.sar_flag, 'N') AS sar_flag\n",
|
||||
"\n",
|
||||
" FROM {trans_data} t\n",
|
||||
"\n",
|
||||
" -- Join with account data on beneficiary account\n",
|
||||
" LEFT JOIN {acc_data} a\n",
|
||||
" ON t.benef_account_number = a.account_number\n",
|
||||
"\n",
|
||||
" -- Join with party/customer data using account's customer number\n",
|
||||
" LEFT JOIN {cust_data} p\n",
|
||||
" ON a.customer_number = p.customer_number\n",
|
||||
"\n",
|
||||
" -- Join with alert data using party's customer number\n",
|
||||
" LEFT JOIN {alert_data} al\n",
|
||||
" ON p.customer_number = al.customer_number\n",
|
||||
"\n",
|
||||
" WHERE a.account_number IS NOT NULL\n",
|
||||
" limit 100\n",
|
||||
"\"\"\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "82c2152f-513c-4fde-a4a9-6ee3a01ef897",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def trx_count_sum_groupwise(data_filt_partywise): \n",
|
||||
" data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') \n",
|
||||
" groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', \n",
|
||||
" 'MIN_LIMIT', 'PCT_RANGE'])\n",
|
||||
" \n",
|
||||
" trxns = data_filt_partywise['transaction_amount'].values\n",
|
||||
" pct_range = data_filt_partywise['PCT_RANGE'].max()\n",
|
||||
" min_value = data_filt_partywise['MIN_LIMIT'].max()\n",
|
||||
"\n",
|
||||
" trxns = trxns[trxns >= min_value]\n",
|
||||
" if len(trxns) > 0:\n",
|
||||
" min_value = trxns[0]\n",
|
||||
"\n",
|
||||
" group_count = 0\n",
|
||||
" while len(trxns) > 0:\n",
|
||||
" max_value = min_value + (pct_range * 0.01 * min_value)\n",
|
||||
" mask = np.logical_and(trxns >= min_value, trxns <= max_value)\n",
|
||||
" group_filter_trx = trxns[mask]\n",
|
||||
" trx_count = len(group_filter_trx)\n",
|
||||
" trx_sum = np.sum(group_filter_trx)\n",
|
||||
" group_count += 1\n",
|
||||
" groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, \n",
|
||||
" min_value, pct_range]\n",
|
||||
" trxns = trxns[trxns > max_value]\n",
|
||||
" if len(trxns) > 0:\n",
|
||||
" min_value = trxns[0]\n",
|
||||
"\n",
|
||||
" return groupeddata.to_dict('list')\n",
|
||||
"\n",
|
||||
"# ---------------------------\n",
|
||||
"# Function 4: Run scenario 9\n",
|
||||
"# ---------------------------\n",
|
||||
"def scenario9_data(data1): \n",
|
||||
" grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply(\n",
|
||||
" trx_count_sum_groupwise).reset_index()\n",
|
||||
"\n",
|
||||
" df_list = []\n",
|
||||
" for i in grouped.index:\n",
|
||||
" df_party = pd.DataFrame(grouped.iloc[i, -1])\n",
|
||||
" df_party['Focal_id'] = grouped.loc[i, 'Focal_id']\n",
|
||||
" df_list.append(df_party)\n",
|
||||
"\n",
|
||||
" final_df = pd.concat(df_list, ignore_index=True) \n",
|
||||
" Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index()\n",
|
||||
" Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index()\n",
|
||||
" SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index()\n",
|
||||
" \n",
|
||||
" final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left')\n",
|
||||
" final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left')\n",
|
||||
" final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left')\n",
|
||||
" \n",
|
||||
" return final_df\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "b6c85de2-6a47-4109-8885-c138c289ec25",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from tms_data_interface import SQLQueryInterface\n",
|
||||
"\n",
|
||||
"class Scenario:\n",
|
||||
" seq = SQLQueryInterface(schema=\"transactionschema\")\n",
|
||||
"\n",
|
||||
" def logic(self, **kwargs):\n",
|
||||
" row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n",
|
||||
" cust_data=\"customer_data_v1\",\n",
|
||||
" acc_data=\"account_data_v1\",\n",
|
||||
" alert_data=\"alert_data_v1\")\n",
|
||||
" )\n",
|
||||
" cols = [\n",
|
||||
" \"transaction_key\",\n",
|
||||
" \"transaction_date\",\n",
|
||||
" \"transaction_amount\",\n",
|
||||
" \"transaction_desc\",\n",
|
||||
" \"benef_account_number\",\n",
|
||||
" \"account_number\",\n",
|
||||
" \"acc_customer_number\",\n",
|
||||
" \"Focal_id\",\n",
|
||||
" \"customer_name\",\n",
|
||||
" \"date_of_birth\",\n",
|
||||
" \"nationality\",\n",
|
||||
" \"Segment\",\n",
|
||||
" \"Risk\", \n",
|
||||
" \"SAR_FLAG\"\n",
|
||||
" ]\n",
|
||||
" df = pd.DataFrame(row_list, columns = cols)\n",
|
||||
" df['Segment'] = 'SME'\n",
|
||||
" df['MIN_LIMIT'] = 50000\n",
|
||||
" df['PCT_RANGE'] = 20\n",
|
||||
" \n",
|
||||
" scenario_data = scenario9_data(df)\n",
|
||||
" \n",
|
||||
" return scenario_data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "1f20337b-8116-47e5-8743-1ba41e2df819",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# sen = Scenario()\n",
|
||||
"# sen.logic()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
299
main.ipynb
Normal file
299
main.ipynb
Normal file
@ -0,0 +1,299 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import pandas as pd\n",
|
||||
"\n",
|
||||
"# query = \"\"\"\n",
|
||||
"# select final.CUSTOMER_NUMBER_main as Focal_id,\n",
|
||||
"# CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,\n",
|
||||
"# final.Cash_deposit_count,\n",
|
||||
"# final.SEGMENT,\n",
|
||||
"# final.RISK,\n",
|
||||
"# final.SAR_FLAG\n",
|
||||
"# from \n",
|
||||
"# (\n",
|
||||
"# (\n",
|
||||
"# select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n",
|
||||
"# subquery.Cash_deposit_total,\n",
|
||||
"# subquery.Cash_deposit_count\n",
|
||||
"# from \n",
|
||||
"# (\n",
|
||||
"# select customer_number as CUSTOMER_NUMBER_1, \n",
|
||||
"# sum(transaction_amount) as Cash_deposit_total, \n",
|
||||
"# count(*) as Cash_deposit_count\n",
|
||||
"# from \n",
|
||||
"# (\n",
|
||||
"# select * \n",
|
||||
"# from {trans_data} trans_table \n",
|
||||
"# left join {acc_data} acc_table\n",
|
||||
"# on trans_table.benef_account_number = acc_table.account_number\n",
|
||||
"# ) trans\n",
|
||||
"# where account_number not in ('None')\n",
|
||||
"# and transaction_desc = 'CASH RELATED TRANSACTION'\n",
|
||||
"# group by customer_number\n",
|
||||
"# ) subquery\n",
|
||||
"# ) main \n",
|
||||
"# left join \n",
|
||||
"# (\n",
|
||||
"# select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n",
|
||||
"# cd.SEGMENT,\n",
|
||||
"# cd.RISK,\n",
|
||||
"# case\n",
|
||||
"# when ad.SAR_FLAG is NULL then 'N'\n",
|
||||
"# else ad.SAR_FLAG\n",
|
||||
"# end as SAR_FLAG \n",
|
||||
"# from\n",
|
||||
"# (\n",
|
||||
"# select customer_number as CUSTOMER_NUMBER_3, \n",
|
||||
"# business_segment as SEGMENT,\n",
|
||||
"# case\n",
|
||||
"# when RISK_CLASSIFICATION = 1 then 'Low Risk'\n",
|
||||
"# when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n",
|
||||
"# when RISK_CLASSIFICATION = 3 then 'High Risk'\n",
|
||||
"# else 'Unknown Risk'\n",
|
||||
"# end AS RISK\n",
|
||||
"# from {cust_data}\n",
|
||||
"# ) cd \n",
|
||||
"# left join\n",
|
||||
"# (\n",
|
||||
"# select customer_number as CUSTOMER_NUMBER_4, \n",
|
||||
"# sar_flag as SAR_FLAG\n",
|
||||
"# from {alert_data}\n",
|
||||
"# ) ad \n",
|
||||
"# on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n",
|
||||
"# ) as cust_alert\n",
|
||||
"# on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n",
|
||||
"# ) as final\n",
|
||||
"# \"\"\"\n",
|
||||
"\n",
|
||||
"# from tms_data_interface import SQLQueryInterface\n",
|
||||
"\n",
|
||||
"# class Scenario:\n",
|
||||
"# seq = SQLQueryInterface(schema=\"transactionschema\")\n",
|
||||
"\n",
|
||||
"# def logic(self, **kwargs):\n",
|
||||
"# row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n",
|
||||
"# cust_data=\"customer_data_v1\",\n",
|
||||
"# acc_data=\"account_data_v1\",\n",
|
||||
"# alert_data=\"alert_data_v1\")\n",
|
||||
"# )\n",
|
||||
"# cols = [\"Focal_id\", \"Cash_deposit_total\", \"Cash_deposit_count\",\n",
|
||||
"# \"Segment\", \"Risk\", \"SAR_FLAG\"]\n",
|
||||
"# df = pd.DataFrame(row_list, columns = cols)\n",
|
||||
"# df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n",
|
||||
"# return df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "69d6771d-be1c-4ae1-802a-3ba7b2e8c5fb",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"query = \"\"\"\n",
|
||||
" SELECT \n",
|
||||
" t.transaction_key,\n",
|
||||
" t.transaction_date,\n",
|
||||
" t.transaction_amount,\n",
|
||||
" t.transaction_desc,\n",
|
||||
" t.benef_account_number,\n",
|
||||
"\n",
|
||||
" -- Account data\n",
|
||||
" a.account_number,\n",
|
||||
" a.customer_number AS acc_customer_number,\n",
|
||||
"\n",
|
||||
" -- Party data\n",
|
||||
" p.customer_number AS party_customer_number,\n",
|
||||
" p.customer_name,\n",
|
||||
" p.date_of_birth,\n",
|
||||
" p.nationality,\n",
|
||||
" p.business_segment,\n",
|
||||
" CASE\n",
|
||||
" WHEN p.risk_classification = 1 THEN 'Low Risk'\n",
|
||||
" WHEN p.risk_classification = 2 THEN 'Medium Risk'\n",
|
||||
" WHEN p.risk_classification = 3 THEN 'High Risk'\n",
|
||||
" ELSE 'Unknown Risk'\n",
|
||||
" END AS risk_level,\n",
|
||||
"\n",
|
||||
" -- Alert data\n",
|
||||
" COALESCE(al.sar_flag, 'N') AS sar_flag\n",
|
||||
"\n",
|
||||
" FROM {trans_data} t\n",
|
||||
"\n",
|
||||
" -- Join with account data on beneficiary account\n",
|
||||
" LEFT JOIN {acc_data} a\n",
|
||||
" ON t.benef_account_number = a.account_number\n",
|
||||
"\n",
|
||||
" -- Join with party/customer data using account's customer number\n",
|
||||
" LEFT JOIN {cust_data} p\n",
|
||||
" ON a.customer_number = p.customer_number\n",
|
||||
"\n",
|
||||
" -- Join with alert data using party's customer number\n",
|
||||
" LEFT JOIN {alert_data} al\n",
|
||||
" ON p.customer_number = al.customer_number\n",
|
||||
"\n",
|
||||
" WHERE a.account_number IS NOT NULL\n",
|
||||
" limit 100\n",
|
||||
"\"\"\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "82c2152f-513c-4fde-a4a9-6ee3a01ef897",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def trx_count_sum_groupwise(data_filt_partywise): \n",
|
||||
" data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') \n",
|
||||
" groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', \n",
|
||||
" 'MIN_LIMIT', 'PCT_RANGE'])\n",
|
||||
" \n",
|
||||
" trxns = data_filt_partywise['transaction_amount'].values\n",
|
||||
" pct_range = data_filt_partywise['PCT_RANGE'].max()\n",
|
||||
" min_value = data_filt_partywise['MIN_LIMIT'].max()\n",
|
||||
"\n",
|
||||
" trxns = trxns[trxns >= min_value]\n",
|
||||
" if len(trxns) > 0:\n",
|
||||
" min_value = trxns[0]\n",
|
||||
"\n",
|
||||
" group_count = 0\n",
|
||||
" while len(trxns) > 0:\n",
|
||||
" max_value = min_value + (pct_range * 0.01 * min_value)\n",
|
||||
" mask = np.logical_and(trxns >= min_value, trxns <= max_value)\n",
|
||||
" group_filter_trx = trxns[mask]\n",
|
||||
" trx_count = len(group_filter_trx)\n",
|
||||
" trx_sum = np.sum(group_filter_trx)\n",
|
||||
" group_count += 1\n",
|
||||
" groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, \n",
|
||||
" min_value, pct_range]\n",
|
||||
" trxns = trxns[trxns > max_value]\n",
|
||||
" if len(trxns) > 0:\n",
|
||||
" min_value = trxns[0]\n",
|
||||
"\n",
|
||||
" return groupeddata.to_dict('list')\n",
|
||||
"\n",
|
||||
"# ---------------------------\n",
|
||||
"# Function 4: Run scenario 9\n",
|
||||
"# ---------------------------\n",
|
||||
"def scenario9_data(data1): \n",
|
||||
" grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply(\n",
|
||||
" trx_count_sum_groupwise).reset_index()\n",
|
||||
"\n",
|
||||
" df_list = []\n",
|
||||
" for i in grouped.index:\n",
|
||||
" df_party = pd.DataFrame(grouped.iloc[i, -1])\n",
|
||||
" df_party['Focal_id'] = grouped.loc[i, 'Focal_id']\n",
|
||||
" df_list.append(df_party)\n",
|
||||
"\n",
|
||||
" final_df = pd.concat(df_list, ignore_index=True) \n",
|
||||
" Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index()\n",
|
||||
" Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index()\n",
|
||||
" SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index()\n",
|
||||
" \n",
|
||||
" final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left')\n",
|
||||
" final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left')\n",
|
||||
" final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left')\n",
|
||||
" \n",
|
||||
" return final_df\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "b6c85de2-6a47-4109-8885-c138c289ec25",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"from tms_data_interface import SQLQueryInterface\n",
|
||||
"\n",
|
||||
"class Scenario:\n",
|
||||
" seq = SQLQueryInterface(schema=\"transactionschema\")\n",
|
||||
"\n",
|
||||
" def logic(self, **kwargs):\n",
|
||||
" row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n",
|
||||
" cust_data=\"customer_data_v1\",\n",
|
||||
" acc_data=\"account_data_v1\",\n",
|
||||
" alert_data=\"alert_data_v1\")\n",
|
||||
" )\n",
|
||||
" cols = [\n",
|
||||
" \"transaction_key\",\n",
|
||||
" \"transaction_date\",\n",
|
||||
" \"transaction_amount\",\n",
|
||||
" \"transaction_desc\",\n",
|
||||
" \"benef_account_number\",\n",
|
||||
" \"account_number\",\n",
|
||||
" \"acc_customer_number\",\n",
|
||||
" \"Focal_id\",\n",
|
||||
" \"customer_name\",\n",
|
||||
" \"date_of_birth\",\n",
|
||||
" \"nationality\",\n",
|
||||
" \"Segment\",\n",
|
||||
" \"Risk\", \n",
|
||||
" \"SAR_FLAG\"\n",
|
||||
" ]\n",
|
||||
" df = pd.DataFrame(row_list, columns = cols)\n",
|
||||
" df['Segment'] = 'SME'\n",
|
||||
" df['MIN_LIMIT'] = 50000\n",
|
||||
" df['PCT_RANGE'] = 20\n",
|
||||
" \n",
|
||||
" scenario_data = scenario9_data(df)\n",
|
||||
" \n",
|
||||
" return scenario_data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "1f20337b-8116-47e5-8743-1ba41e2df819",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# sen = Scenario()\n",
|
||||
"# sen.logic()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
245
main.py
Normal file
245
main.py
Normal file
@ -0,0 +1,245 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[1]:
|
||||
|
||||
|
||||
# import pandas as pd
|
||||
|
||||
# query = """
|
||||
# select final.CUSTOMER_NUMBER_main as Focal_id,
|
||||
# CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,
|
||||
# final.Cash_deposit_count,
|
||||
# final.SEGMENT,
|
||||
# final.RISK,
|
||||
# final.SAR_FLAG
|
||||
# from
|
||||
# (
|
||||
# (
|
||||
# select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,
|
||||
# subquery.Cash_deposit_total,
|
||||
# subquery.Cash_deposit_count
|
||||
# from
|
||||
# (
|
||||
# select customer_number as CUSTOMER_NUMBER_1,
|
||||
# sum(transaction_amount) as Cash_deposit_total,
|
||||
# count(*) as Cash_deposit_count
|
||||
# from
|
||||
# (
|
||||
# select *
|
||||
# from {trans_data} trans_table
|
||||
# left join {acc_data} acc_table
|
||||
# on trans_table.benef_account_number = acc_table.account_number
|
||||
# ) trans
|
||||
# where account_number not in ('None')
|
||||
# and transaction_desc = 'CASH RELATED TRANSACTION'
|
||||
# group by customer_number
|
||||
# ) subquery
|
||||
# ) main
|
||||
# left join
|
||||
# (
|
||||
# select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,
|
||||
# cd.SEGMENT,
|
||||
# cd.RISK,
|
||||
# case
|
||||
# when ad.SAR_FLAG is NULL then 'N'
|
||||
# else ad.SAR_FLAG
|
||||
# end as SAR_FLAG
|
||||
# from
|
||||
# (
|
||||
# select customer_number as CUSTOMER_NUMBER_3,
|
||||
# business_segment as SEGMENT,
|
||||
# case
|
||||
# when RISK_CLASSIFICATION = 1 then 'Low Risk'
|
||||
# when RISK_CLASSIFICATION = 2 then 'Medium Risk'
|
||||
# when RISK_CLASSIFICATION = 3 then 'High Risk'
|
||||
# else 'Unknown Risk'
|
||||
# end AS RISK
|
||||
# from {cust_data}
|
||||
# ) cd
|
||||
# left join
|
||||
# (
|
||||
# select customer_number as CUSTOMER_NUMBER_4,
|
||||
# sar_flag as SAR_FLAG
|
||||
# from {alert_data}
|
||||
# ) ad
|
||||
# on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4
|
||||
# ) as cust_alert
|
||||
# on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main
|
||||
# ) as final
|
||||
# """
|
||||
|
||||
# from tms_data_interface import SQLQueryInterface
|
||||
|
||||
# class Scenario:
|
||||
# seq = SQLQueryInterface(schema="transactionschema")
|
||||
|
||||
# def logic(self, **kwargs):
|
||||
# row_list = self.seq.execute_raw(query.format(trans_data="transaction10m",
|
||||
# cust_data="customer_data_v1",
|
||||
# acc_data="account_data_v1",
|
||||
# alert_data="alert_data_v1")
|
||||
# )
|
||||
# cols = ["Focal_id", "Cash_deposit_total", "Cash_deposit_count",
|
||||
# "Segment", "Risk", "SAR_FLAG"]
|
||||
# df = pd.DataFrame(row_list, columns = cols)
|
||||
# df["Cash_deposit_total"] = df["Cash_deposit_total"].astype(float)
|
||||
# return df
|
||||
|
||||
|
||||
# In[15]:
|
||||
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
query = """
|
||||
SELECT
|
||||
t.transaction_key,
|
||||
t.transaction_date,
|
||||
t.transaction_amount,
|
||||
t.transaction_desc,
|
||||
t.benef_account_number,
|
||||
|
||||
-- Account data
|
||||
a.account_number,
|
||||
a.customer_number AS acc_customer_number,
|
||||
|
||||
-- Party data
|
||||
p.customer_number AS party_customer_number,
|
||||
p.customer_name,
|
||||
p.date_of_birth,
|
||||
p.nationality,
|
||||
p.business_segment,
|
||||
CASE
|
||||
WHEN p.risk_classification = 1 THEN 'Low Risk'
|
||||
WHEN p.risk_classification = 2 THEN 'Medium Risk'
|
||||
WHEN p.risk_classification = 3 THEN 'High Risk'
|
||||
ELSE 'Unknown Risk'
|
||||
END AS risk_level,
|
||||
|
||||
-- Alert data
|
||||
COALESCE(al.sar_flag, 'N') AS sar_flag
|
||||
|
||||
FROM {trans_data} t
|
||||
|
||||
-- Join with account data on beneficiary account
|
||||
LEFT JOIN {acc_data} a
|
||||
ON t.benef_account_number = a.account_number
|
||||
|
||||
-- Join with party/customer data using account's customer number
|
||||
LEFT JOIN {cust_data} p
|
||||
ON a.customer_number = p.customer_number
|
||||
|
||||
-- Join with alert data using party's customer number
|
||||
LEFT JOIN {alert_data} al
|
||||
ON p.customer_number = al.customer_number
|
||||
|
||||
WHERE a.account_number IS NOT NULL
|
||||
limit 100
|
||||
"""
|
||||
|
||||
|
||||
# In[20]:
|
||||
|
||||
|
||||
def trx_count_sum_groupwise(data_filt_partywise):
|
||||
data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount')
|
||||
groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt',
|
||||
'MIN_LIMIT', 'PCT_RANGE'])
|
||||
|
||||
trxns = data_filt_partywise['transaction_amount'].values
|
||||
pct_range = data_filt_partywise['PCT_RANGE'].max()
|
||||
min_value = data_filt_partywise['MIN_LIMIT'].max()
|
||||
|
||||
trxns = trxns[trxns >= min_value]
|
||||
if len(trxns) > 0:
|
||||
min_value = trxns[0]
|
||||
|
||||
group_count = 0
|
||||
while len(trxns) > 0:
|
||||
max_value = min_value + (pct_range * 0.01 * min_value)
|
||||
mask = np.logical_and(trxns >= min_value, trxns <= max_value)
|
||||
group_filter_trx = trxns[mask]
|
||||
trx_count = len(group_filter_trx)
|
||||
trx_sum = np.sum(group_filter_trx)
|
||||
group_count += 1
|
||||
groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum,
|
||||
min_value, pct_range]
|
||||
trxns = trxns[trxns > max_value]
|
||||
if len(trxns) > 0:
|
||||
min_value = trxns[0]
|
||||
|
||||
return groupeddata.to_dict('list')
|
||||
|
||||
# ---------------------------
|
||||
# Function 4: Run scenario 9
|
||||
# ---------------------------
|
||||
def scenario9_data(data1):
|
||||
grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply(
|
||||
trx_count_sum_groupwise).reset_index()
|
||||
|
||||
df_list = []
|
||||
for i in grouped.index:
|
||||
df_party = pd.DataFrame(grouped.iloc[i, -1])
|
||||
df_party['Focal_id'] = grouped.loc[i, 'Focal_id']
|
||||
df_list.append(df_party)
|
||||
|
||||
final_df = pd.concat(df_list, ignore_index=True)
|
||||
Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index()
|
||||
Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index()
|
||||
SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index()
|
||||
|
||||
final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left')
|
||||
final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left')
|
||||
final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left')
|
||||
|
||||
return final_df
|
||||
|
||||
|
||||
|
||||
# In[17]:
|
||||
|
||||
|
||||
from tms_data_interface import SQLQueryInterface
|
||||
|
||||
class Scenario:
|
||||
seq = SQLQueryInterface(schema="transactionschema")
|
||||
|
||||
def logic(self, **kwargs):
|
||||
row_list = self.seq.execute_raw(query.format(trans_data="transaction10m",
|
||||
cust_data="customer_data_v1",
|
||||
acc_data="account_data_v1",
|
||||
alert_data="alert_data_v1")
|
||||
)
|
||||
cols = [
|
||||
"transaction_key",
|
||||
"transaction_date",
|
||||
"transaction_amount",
|
||||
"transaction_desc",
|
||||
"benef_account_number",
|
||||
"account_number",
|
||||
"acc_customer_number",
|
||||
"Focal_id",
|
||||
"customer_name",
|
||||
"date_of_birth",
|
||||
"nationality",
|
||||
"Segment",
|
||||
"Risk",
|
||||
"SAR_FLAG"
|
||||
]
|
||||
df = pd.DataFrame(row_list, columns = cols)
|
||||
df['Segment'] = 'SME'
|
||||
df['MIN_LIMIT'] = 50000
|
||||
df['PCT_RANGE'] = 20
|
||||
|
||||
scenario_data = scenario9_data(df)
|
||||
|
||||
return scenario_data
|
||||
|
||||
|
||||
# In[19]:
|
||||
|
||||
|
||||
# sen = Scenario()
|
||||
# sen.logic()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user