diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb index 90f3c1f..4bce2ae 100644 --- a/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/.ipynb_checkpoints/main-checkpoint.ipynb @@ -2,153 +2,76 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] }, "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "query = \"\"\"\n", - " select final.CUSTOMER_NUMBER_main as Focal_id,\n", - " CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,\n", - " final.Cash_deposit_count,\n", - " final.SEGMENT,\n", - " final.RISK,\n", - " final.SAR_FLAG\n", - "from \n", - "(\n", - " (\n", - " select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n", - " subquery.Cash_deposit_total,\n", - " subquery.Cash_deposit_count\n", - " from \n", - " (\n", - " select customer_number as CUSTOMER_NUMBER_1, \n", - " sum(transaction_amount) as Cash_deposit_total, \n", - " count(*) as Cash_deposit_count\n", - " from \n", - " (\n", - " select * \n", - " from {trans_data} trans_table \n", - " left join {acc_data} acc_table\n", - " on trans_table.benef_account_number = acc_table.account_number\n", - " ) trans\n", - " where account_number not in ('None')\n", - " and transaction_desc = 'CASH RELATED TRANSACTION'\n", - " group by customer_number\n", - " ) subquery\n", - " ) main \n", - " left join \n", - " (\n", - " select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n", - " cd.SEGMENT,\n", - " cd.RISK,\n", - " case\n", - " when ad.SAR_FLAG is NULL then 'N'\n", - " else ad.SAR_FLAG\n", - " end as SAR_FLAG \n", - " from\n", - " (\n", - " select customer_number as CUSTOMER_NUMBER_3, \n", - " business_segment as SEGMENT,\n", - " case\n", - " when RISK_CLASSIFICATION = 1 then 'Low Risk'\n", - " when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n", - " when RISK_CLASSIFICATION = 3 then 'High Risk'\n", - " else 'Unknown Risk'\n", - " end AS RISK\n", - " from {cust_data}\n", - " ) cd \n", - " left join\n", - " (\n", - " select customer_number as CUSTOMER_NUMBER_4, \n", - " sar_flag as SAR_FLAG\n", - " from {alert_data}\n", - " ) ad \n", - " on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n", - " ) as cust_alert\n", - " on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n", - ") as final\n", - "\"\"\"\n", - "\n", - "from tms_data_interface import SQLQueryInterface\n", - "\n", - "class Scenario:\n", - " seq = SQLQueryInterface(schema=\"transactionschema\")\n", - "\n", - " def logic(self, **kwargs):\n", - " row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", - " cust_data=\"customer_data_v1\",\n", - " acc_data=\"account_data_v1\",\n", - " alert_data=\"alert_data_v1\")\n", - " )\n", - " cols = [\"Focal_id\", \"Cash_deposit_total\", \"Cash_deposit_count\",\n", - " \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", - " df = pd.DataFrame(row_list, columns = cols)\n", - " df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "b6c85de2-6a47-4109-8885-c138c289ec25", - "metadata": { - "tags": [] - }, - "outputs": [], "source": [ "# import pandas as pd\n", "\n", "# query = \"\"\"\n", - "# SELECT \n", - "# t.transaction_id,\n", - "# t.transaction_date,\n", - "# t.transaction_amount,\n", - "# t.transaction_desc,\n", - "# t.benef_account_number,\n", - "\n", - "# -- Account data\n", - "# a.account_number,\n", - "# a.customer_number AS acc_customer_number,\n", - "# a.account_type,\n", - "# a.branch_code,\n", - "\n", - "# -- Party data\n", - "# p.customer_number AS party_customer_number,\n", - "# p.customer_name,\n", - "# p.date_of_birth,\n", - "# p.nationality,\n", - "# p.business_segment,\n", - "# CASE\n", - "# WHEN p.risk_classification = 1 THEN 'Low Risk'\n", - "# WHEN p.risk_classification = 2 THEN 'Medium Risk'\n", - "# WHEN p.risk_classification = 3 THEN 'High Risk'\n", - "# ELSE 'Unknown Risk'\n", - "# END AS risk_level,\n", - "\n", - "# -- Alert data\n", - "# COALESCE(al.sar_flag, 'N') AS sar_flag\n", - "\n", - "# FROM {trans_data} t\n", - "\n", - "# -- Join with account data on beneficiary account\n", - "# LEFT JOIN {acc_data} a\n", - "# ON t.benef_account_number = a.account_number\n", - "\n", - "# -- Join with party/customer data using account's customer number\n", - "# LEFT JOIN {cust_data} p\n", - "# ON a.customer_number = p.customer_number\n", - "\n", - "# -- Join with alert data using party's customer number\n", - "# LEFT JOIN {alert_data} al\n", - "# ON p.customer_number = al.customer_number\n", - "\n", - "# WHERE a.account_number IS NOT NULL\n", - "# limit 100\n", + "# select final.CUSTOMER_NUMBER_main as Focal_id,\n", + "# CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,\n", + "# final.Cash_deposit_count,\n", + "# final.SEGMENT,\n", + "# final.RISK,\n", + "# final.SAR_FLAG\n", + "# from \n", + "# (\n", + "# (\n", + "# select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n", + "# subquery.Cash_deposit_total,\n", + "# subquery.Cash_deposit_count\n", + "# from \n", + "# (\n", + "# select customer_number as CUSTOMER_NUMBER_1, \n", + "# sum(transaction_amount) as Cash_deposit_total, \n", + "# count(*) as Cash_deposit_count\n", + "# from \n", + "# (\n", + "# select * \n", + "# from {trans_data} trans_table \n", + "# left join {acc_data} acc_table\n", + "# on trans_table.benef_account_number = acc_table.account_number\n", + "# ) trans\n", + "# where account_number not in ('None')\n", + "# and transaction_desc = 'CASH RELATED TRANSACTION'\n", + "# group by customer_number\n", + "# ) subquery\n", + "# ) main \n", + "# left join \n", + "# (\n", + "# select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n", + "# cd.SEGMENT,\n", + "# cd.RISK,\n", + "# case\n", + "# when ad.SAR_FLAG is NULL then 'N'\n", + "# else ad.SAR_FLAG\n", + "# end as SAR_FLAG \n", + "# from\n", + "# (\n", + "# select customer_number as CUSTOMER_NUMBER_3, \n", + "# business_segment as SEGMENT,\n", + "# case\n", + "# when RISK_CLASSIFICATION = 1 then 'Low Risk'\n", + "# when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n", + "# when RISK_CLASSIFICATION = 3 then 'High Risk'\n", + "# else 'Unknown Risk'\n", + "# end AS RISK\n", + "# from {cust_data}\n", + "# ) cd \n", + "# left join\n", + "# (\n", + "# select customer_number as CUSTOMER_NUMBER_4, \n", + "# sar_flag as SAR_FLAG\n", + "# from {alert_data}\n", + "# ) ad \n", + "# on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n", + "# ) as cust_alert\n", + "# on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n", + "# ) as final\n", "# \"\"\"\n", "\n", "# from tms_data_interface import SQLQueryInterface\n", @@ -162,31 +85,108 @@ "# acc_data=\"account_data_v1\",\n", "# alert_data=\"alert_data_v1\")\n", "# )\n", - "# cols = [\n", - "# \"transaction_id\",\n", - "# \"transaction_date\",\n", - "# \"transaction_amount\",\n", - "# \"transaction_desc\",\n", - "# \"benef_account_number\",\n", - "# \"account_number\",\n", - "# \"acc_customer_number\",\n", - "# \"account_type\",\n", - "# \"branch_code\",\n", - "# \"party_customer_number\",\n", - "# \"customer_name\",\n", - "# \"date_of_birth\",\n", - "# \"nationality\",\n", - "# \"business_segment\",\n", - "# \"risk_level\",\n", - "# \"sar_flag\"\n", - "# ]\n", + "# cols = [\"Focal_id\", \"Cash_deposit_total\", \"Cash_deposit_count\",\n", + "# \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", "# df = pd.DataFrame(row_list, columns = cols)\n", + "# df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n", "# return df" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, + "id": "b6c85de2-6a47-4109-8885-c138c289ec25", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "query = \"\"\"\n", + " SELECT \n", + " t.transaction_id,\n", + " t.transaction_date,\n", + " t.transaction_amount,\n", + " t.transaction_desc,\n", + " t.benef_account_number,\n", + "\n", + " -- Account data\n", + " a.account_number,\n", + " a.customer_number AS acc_customer_number,\n", + " a.account_type,\n", + " a.branch_code,\n", + "\n", + " -- Party data\n", + " p.customer_number AS party_customer_number,\n", + " p.customer_name,\n", + " p.date_of_birth,\n", + " p.nationality,\n", + " p.business_segment,\n", + " CASE\n", + " WHEN p.risk_classification = 1 THEN 'Low Risk'\n", + " WHEN p.risk_classification = 2 THEN 'Medium Risk'\n", + " WHEN p.risk_classification = 3 THEN 'High Risk'\n", + " ELSE 'Unknown Risk'\n", + " END AS risk_level,\n", + "\n", + " -- Alert data\n", + " COALESCE(al.sar_flag, 'N') AS sar_flag\n", + "\n", + " FROM {trans_data} t\n", + "\n", + " -- Join with account data on beneficiary account\n", + " LEFT JOIN {acc_data} a\n", + " ON t.benef_account_number = a.account_number\n", + "\n", + " -- Join with party/customer data using account's customer number\n", + " LEFT JOIN {cust_data} p\n", + " ON a.customer_number = p.customer_number\n", + "\n", + " -- Join with alert data using party's customer number\n", + " LEFT JOIN {alert_data} al\n", + " ON p.customer_number = al.customer_number\n", + "\n", + " WHERE a.account_number IS NOT NULL\n", + " limit 100\n", + "\"\"\"\n", + "\n", + "from tms_data_interface import SQLQueryInterface\n", + "\n", + "class Scenario:\n", + " seq = SQLQueryInterface(schema=\"transactionschema\")\n", + "\n", + " def logic(self, **kwargs):\n", + " row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", + " cust_data=\"customer_data_v1\",\n", + " acc_data=\"account_data_v1\",\n", + " alert_data=\"alert_data_v1\")\n", + " )\n", + " cols = [\n", + " \"transaction_id\",\n", + " \"transaction_date\",\n", + " \"transaction_amount\",\n", + " \"transaction_desc\",\n", + " \"benef_account_number\",\n", + " \"account_number\",\n", + " \"acc_customer_number\",\n", + " \"account_type\",\n", + " \"branch_code\",\n", + " \"party_customer_number\",\n", + " \"customer_name\",\n", + " \"date_of_birth\",\n", + " \"nationality\",\n", + " \"business_segment\",\n", + " \"risk_level\",\n", + " \"sar_flag\"\n", + " ]\n", + " df = pd.DataFrame(row_list, columns = cols)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "id": "1f20337b-8116-47e5-8743-1ba41e2df819", "metadata": { "tags": [] diff --git a/main.ipynb b/main.ipynb index 90f3c1f..4bce2ae 100644 --- a/main.ipynb +++ b/main.ipynb @@ -2,153 +2,76 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] }, "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "query = \"\"\"\n", - " select final.CUSTOMER_NUMBER_main as Focal_id,\n", - " CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,\n", - " final.Cash_deposit_count,\n", - " final.SEGMENT,\n", - " final.RISK,\n", - " final.SAR_FLAG\n", - "from \n", - "(\n", - " (\n", - " select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n", - " subquery.Cash_deposit_total,\n", - " subquery.Cash_deposit_count\n", - " from \n", - " (\n", - " select customer_number as CUSTOMER_NUMBER_1, \n", - " sum(transaction_amount) as Cash_deposit_total, \n", - " count(*) as Cash_deposit_count\n", - " from \n", - " (\n", - " select * \n", - " from {trans_data} trans_table \n", - " left join {acc_data} acc_table\n", - " on trans_table.benef_account_number = acc_table.account_number\n", - " ) trans\n", - " where account_number not in ('None')\n", - " and transaction_desc = 'CASH RELATED TRANSACTION'\n", - " group by customer_number\n", - " ) subquery\n", - " ) main \n", - " left join \n", - " (\n", - " select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n", - " cd.SEGMENT,\n", - " cd.RISK,\n", - " case\n", - " when ad.SAR_FLAG is NULL then 'N'\n", - " else ad.SAR_FLAG\n", - " end as SAR_FLAG \n", - " from\n", - " (\n", - " select customer_number as CUSTOMER_NUMBER_3, \n", - " business_segment as SEGMENT,\n", - " case\n", - " when RISK_CLASSIFICATION = 1 then 'Low Risk'\n", - " when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n", - " when RISK_CLASSIFICATION = 3 then 'High Risk'\n", - " else 'Unknown Risk'\n", - " end AS RISK\n", - " from {cust_data}\n", - " ) cd \n", - " left join\n", - " (\n", - " select customer_number as CUSTOMER_NUMBER_4, \n", - " sar_flag as SAR_FLAG\n", - " from {alert_data}\n", - " ) ad \n", - " on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n", - " ) as cust_alert\n", - " on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n", - ") as final\n", - "\"\"\"\n", - "\n", - "from tms_data_interface import SQLQueryInterface\n", - "\n", - "class Scenario:\n", - " seq = SQLQueryInterface(schema=\"transactionschema\")\n", - "\n", - " def logic(self, **kwargs):\n", - " row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", - " cust_data=\"customer_data_v1\",\n", - " acc_data=\"account_data_v1\",\n", - " alert_data=\"alert_data_v1\")\n", - " )\n", - " cols = [\"Focal_id\", \"Cash_deposit_total\", \"Cash_deposit_count\",\n", - " \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", - " df = pd.DataFrame(row_list, columns = cols)\n", - " df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "b6c85de2-6a47-4109-8885-c138c289ec25", - "metadata": { - "tags": [] - }, - "outputs": [], "source": [ "# import pandas as pd\n", "\n", "# query = \"\"\"\n", - "# SELECT \n", - "# t.transaction_id,\n", - "# t.transaction_date,\n", - "# t.transaction_amount,\n", - "# t.transaction_desc,\n", - "# t.benef_account_number,\n", - "\n", - "# -- Account data\n", - "# a.account_number,\n", - "# a.customer_number AS acc_customer_number,\n", - "# a.account_type,\n", - "# a.branch_code,\n", - "\n", - "# -- Party data\n", - "# p.customer_number AS party_customer_number,\n", - "# p.customer_name,\n", - "# p.date_of_birth,\n", - "# p.nationality,\n", - "# p.business_segment,\n", - "# CASE\n", - "# WHEN p.risk_classification = 1 THEN 'Low Risk'\n", - "# WHEN p.risk_classification = 2 THEN 'Medium Risk'\n", - "# WHEN p.risk_classification = 3 THEN 'High Risk'\n", - "# ELSE 'Unknown Risk'\n", - "# END AS risk_level,\n", - "\n", - "# -- Alert data\n", - "# COALESCE(al.sar_flag, 'N') AS sar_flag\n", - "\n", - "# FROM {trans_data} t\n", - "\n", - "# -- Join with account data on beneficiary account\n", - "# LEFT JOIN {acc_data} a\n", - "# ON t.benef_account_number = a.account_number\n", - "\n", - "# -- Join with party/customer data using account's customer number\n", - "# LEFT JOIN {cust_data} p\n", - "# ON a.customer_number = p.customer_number\n", - "\n", - "# -- Join with alert data using party's customer number\n", - "# LEFT JOIN {alert_data} al\n", - "# ON p.customer_number = al.customer_number\n", - "\n", - "# WHERE a.account_number IS NOT NULL\n", - "# limit 100\n", + "# select final.CUSTOMER_NUMBER_main as Focal_id,\n", + "# CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,\n", + "# final.Cash_deposit_count,\n", + "# final.SEGMENT,\n", + "# final.RISK,\n", + "# final.SAR_FLAG\n", + "# from \n", + "# (\n", + "# (\n", + "# select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,\n", + "# subquery.Cash_deposit_total,\n", + "# subquery.Cash_deposit_count\n", + "# from \n", + "# (\n", + "# select customer_number as CUSTOMER_NUMBER_1, \n", + "# sum(transaction_amount) as Cash_deposit_total, \n", + "# count(*) as Cash_deposit_count\n", + "# from \n", + "# (\n", + "# select * \n", + "# from {trans_data} trans_table \n", + "# left join {acc_data} acc_table\n", + "# on trans_table.benef_account_number = acc_table.account_number\n", + "# ) trans\n", + "# where account_number not in ('None')\n", + "# and transaction_desc = 'CASH RELATED TRANSACTION'\n", + "# group by customer_number\n", + "# ) subquery\n", + "# ) main \n", + "# left join \n", + "# (\n", + "# select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,\n", + "# cd.SEGMENT,\n", + "# cd.RISK,\n", + "# case\n", + "# when ad.SAR_FLAG is NULL then 'N'\n", + "# else ad.SAR_FLAG\n", + "# end as SAR_FLAG \n", + "# from\n", + "# (\n", + "# select customer_number as CUSTOMER_NUMBER_3, \n", + "# business_segment as SEGMENT,\n", + "# case\n", + "# when RISK_CLASSIFICATION = 1 then 'Low Risk'\n", + "# when RISK_CLASSIFICATION = 2 then 'Medium Risk'\n", + "# when RISK_CLASSIFICATION = 3 then 'High Risk'\n", + "# else 'Unknown Risk'\n", + "# end AS RISK\n", + "# from {cust_data}\n", + "# ) cd \n", + "# left join\n", + "# (\n", + "# select customer_number as CUSTOMER_NUMBER_4, \n", + "# sar_flag as SAR_FLAG\n", + "# from {alert_data}\n", + "# ) ad \n", + "# on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4\n", + "# ) as cust_alert\n", + "# on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main\n", + "# ) as final\n", "# \"\"\"\n", "\n", "# from tms_data_interface import SQLQueryInterface\n", @@ -162,31 +85,108 @@ "# acc_data=\"account_data_v1\",\n", "# alert_data=\"alert_data_v1\")\n", "# )\n", - "# cols = [\n", - "# \"transaction_id\",\n", - "# \"transaction_date\",\n", - "# \"transaction_amount\",\n", - "# \"transaction_desc\",\n", - "# \"benef_account_number\",\n", - "# \"account_number\",\n", - "# \"acc_customer_number\",\n", - "# \"account_type\",\n", - "# \"branch_code\",\n", - "# \"party_customer_number\",\n", - "# \"customer_name\",\n", - "# \"date_of_birth\",\n", - "# \"nationality\",\n", - "# \"business_segment\",\n", - "# \"risk_level\",\n", - "# \"sar_flag\"\n", - "# ]\n", + "# cols = [\"Focal_id\", \"Cash_deposit_total\", \"Cash_deposit_count\",\n", + "# \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", "# df = pd.DataFrame(row_list, columns = cols)\n", + "# df[\"Cash_deposit_total\"] = df[\"Cash_deposit_total\"].astype(float)\n", "# return df" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, + "id": "b6c85de2-6a47-4109-8885-c138c289ec25", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "query = \"\"\"\n", + " SELECT \n", + " t.transaction_id,\n", + " t.transaction_date,\n", + " t.transaction_amount,\n", + " t.transaction_desc,\n", + " t.benef_account_number,\n", + "\n", + " -- Account data\n", + " a.account_number,\n", + " a.customer_number AS acc_customer_number,\n", + " a.account_type,\n", + " a.branch_code,\n", + "\n", + " -- Party data\n", + " p.customer_number AS party_customer_number,\n", + " p.customer_name,\n", + " p.date_of_birth,\n", + " p.nationality,\n", + " p.business_segment,\n", + " CASE\n", + " WHEN p.risk_classification = 1 THEN 'Low Risk'\n", + " WHEN p.risk_classification = 2 THEN 'Medium Risk'\n", + " WHEN p.risk_classification = 3 THEN 'High Risk'\n", + " ELSE 'Unknown Risk'\n", + " END AS risk_level,\n", + "\n", + " -- Alert data\n", + " COALESCE(al.sar_flag, 'N') AS sar_flag\n", + "\n", + " FROM {trans_data} t\n", + "\n", + " -- Join with account data on beneficiary account\n", + " LEFT JOIN {acc_data} a\n", + " ON t.benef_account_number = a.account_number\n", + "\n", + " -- Join with party/customer data using account's customer number\n", + " LEFT JOIN {cust_data} p\n", + " ON a.customer_number = p.customer_number\n", + "\n", + " -- Join with alert data using party's customer number\n", + " LEFT JOIN {alert_data} al\n", + " ON p.customer_number = al.customer_number\n", + "\n", + " WHERE a.account_number IS NOT NULL\n", + " limit 100\n", + "\"\"\"\n", + "\n", + "from tms_data_interface import SQLQueryInterface\n", + "\n", + "class Scenario:\n", + " seq = SQLQueryInterface(schema=\"transactionschema\")\n", + "\n", + " def logic(self, **kwargs):\n", + " row_list = self.seq.execute_raw(query.format(trans_data=\"transaction10m\",\n", + " cust_data=\"customer_data_v1\",\n", + " acc_data=\"account_data_v1\",\n", + " alert_data=\"alert_data_v1\")\n", + " )\n", + " cols = [\n", + " \"transaction_id\",\n", + " \"transaction_date\",\n", + " \"transaction_amount\",\n", + " \"transaction_desc\",\n", + " \"benef_account_number\",\n", + " \"account_number\",\n", + " \"acc_customer_number\",\n", + " \"account_type\",\n", + " \"branch_code\",\n", + " \"party_customer_number\",\n", + " \"customer_name\",\n", + " \"date_of_birth\",\n", + " \"nationality\",\n", + " \"business_segment\",\n", + " \"risk_level\",\n", + " \"sar_flag\"\n", + " ]\n", + " df = pd.DataFrame(row_list, columns = cols)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "id": "1f20337b-8116-47e5-8743-1ba41e2df819", "metadata": { "tags": [] diff --git a/main.py b/main.py index 7d20451..81b6a29 100644 --- a/main.py +++ b/main.py @@ -1,143 +1,72 @@ #!/usr/bin/env python # coding: utf-8 -# In[3]: - - -import pandas as pd - -query = """ - select final.CUSTOMER_NUMBER_main as Focal_id, - CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total, - final.Cash_deposit_count, - final.SEGMENT, - final.RISK, - final.SAR_FLAG -from -( - ( - select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main, - subquery.Cash_deposit_total, - subquery.Cash_deposit_count - from - ( - select customer_number as CUSTOMER_NUMBER_1, - sum(transaction_amount) as Cash_deposit_total, - count(*) as Cash_deposit_count - from - ( - select * - from {trans_data} trans_table - left join {acc_data} acc_table - on trans_table.benef_account_number = acc_table.account_number - ) trans - where account_number not in ('None') - and transaction_desc = 'CASH RELATED TRANSACTION' - group by customer_number - ) subquery - ) main - left join - ( - select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust, - cd.SEGMENT, - cd.RISK, - case - when ad.SAR_FLAG is NULL then 'N' - else ad.SAR_FLAG - end as SAR_FLAG - from - ( - select customer_number as CUSTOMER_NUMBER_3, - business_segment as SEGMENT, - case - when RISK_CLASSIFICATION = 1 then 'Low Risk' - when RISK_CLASSIFICATION = 2 then 'Medium Risk' - when RISK_CLASSIFICATION = 3 then 'High Risk' - else 'Unknown Risk' - end AS RISK - from {cust_data} - ) cd - left join - ( - select customer_number as CUSTOMER_NUMBER_4, - sar_flag as SAR_FLAG - from {alert_data} - ) ad - on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4 - ) as cust_alert - on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main -) as final -""" - -from tms_data_interface import SQLQueryInterface - -class Scenario: - seq = SQLQueryInterface(schema="transactionschema") - - def logic(self, **kwargs): - row_list = self.seq.execute_raw(query.format(trans_data="transaction10m", - cust_data="customer_data_v1", - acc_data="account_data_v1", - alert_data="alert_data_v1") - ) - cols = ["Focal_id", "Cash_deposit_total", "Cash_deposit_count", - "Segment", "Risk", "SAR_FLAG"] - df = pd.DataFrame(row_list, columns = cols) - df["Cash_deposit_total"] = df["Cash_deposit_total"].astype(float) - return df - - -# In[6]: +# In[1]: # import pandas as pd # query = """ -# SELECT -# t.transaction_id, -# t.transaction_date, -# t.transaction_amount, -# t.transaction_desc, -# t.benef_account_number, - -# -- Account data -# a.account_number, -# a.customer_number AS acc_customer_number, -# a.account_type, -# a.branch_code, - -# -- Party data -# p.customer_number AS party_customer_number, -# p.customer_name, -# p.date_of_birth, -# p.nationality, -# p.business_segment, -# CASE -# WHEN p.risk_classification = 1 THEN 'Low Risk' -# WHEN p.risk_classification = 2 THEN 'Medium Risk' -# WHEN p.risk_classification = 3 THEN 'High Risk' -# ELSE 'Unknown Risk' -# END AS risk_level, - -# -- Alert data -# COALESCE(al.sar_flag, 'N') AS sar_flag - -# FROM {trans_data} t - -# -- Join with account data on beneficiary account -# LEFT JOIN {acc_data} a -# ON t.benef_account_number = a.account_number - -# -- Join with party/customer data using account's customer number -# LEFT JOIN {cust_data} p -# ON a.customer_number = p.customer_number - -# -- Join with alert data using party's customer number -# LEFT JOIN {alert_data} al -# ON p.customer_number = al.customer_number - -# WHERE a.account_number IS NOT NULL -# limit 100 +# select final.CUSTOMER_NUMBER_main as Focal_id, +# CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total, +# final.Cash_deposit_count, +# final.SEGMENT, +# final.RISK, +# final.SAR_FLAG +# from +# ( +# ( +# select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main, +# subquery.Cash_deposit_total, +# subquery.Cash_deposit_count +# from +# ( +# select customer_number as CUSTOMER_NUMBER_1, +# sum(transaction_amount) as Cash_deposit_total, +# count(*) as Cash_deposit_count +# from +# ( +# select * +# from {trans_data} trans_table +# left join {acc_data} acc_table +# on trans_table.benef_account_number = acc_table.account_number +# ) trans +# where account_number not in ('None') +# and transaction_desc = 'CASH RELATED TRANSACTION' +# group by customer_number +# ) subquery +# ) main +# left join +# ( +# select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust, +# cd.SEGMENT, +# cd.RISK, +# case +# when ad.SAR_FLAG is NULL then 'N' +# else ad.SAR_FLAG +# end as SAR_FLAG +# from +# ( +# select customer_number as CUSTOMER_NUMBER_3, +# business_segment as SEGMENT, +# case +# when RISK_CLASSIFICATION = 1 then 'Low Risk' +# when RISK_CLASSIFICATION = 2 then 'Medium Risk' +# when RISK_CLASSIFICATION = 3 then 'High Risk' +# else 'Unknown Risk' +# end AS RISK +# from {cust_data} +# ) cd +# left join +# ( +# select customer_number as CUSTOMER_NUMBER_4, +# sar_flag as SAR_FLAG +# from {alert_data} +# ) ad +# on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4 +# ) as cust_alert +# on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main +# ) as final # """ # from tms_data_interface import SQLQueryInterface @@ -151,29 +80,100 @@ class Scenario: # acc_data="account_data_v1", # alert_data="alert_data_v1") # ) -# cols = [ -# "transaction_id", -# "transaction_date", -# "transaction_amount", -# "transaction_desc", -# "benef_account_number", -# "account_number", -# "acc_customer_number", -# "account_type", -# "branch_code", -# "party_customer_number", -# "customer_name", -# "date_of_birth", -# "nationality", -# "business_segment", -# "risk_level", -# "sar_flag" -# ] +# cols = ["Focal_id", "Cash_deposit_total", "Cash_deposit_count", +# "Segment", "Risk", "SAR_FLAG"] # df = pd.DataFrame(row_list, columns = cols) +# df["Cash_deposit_total"] = df["Cash_deposit_total"].astype(float) # return df -# In[5]: +# In[2]: + + +import pandas as pd + +query = """ + SELECT + t.transaction_id, + t.transaction_date, + t.transaction_amount, + t.transaction_desc, + t.benef_account_number, + + -- Account data + a.account_number, + a.customer_number AS acc_customer_number, + a.account_type, + a.branch_code, + + -- Party data + p.customer_number AS party_customer_number, + p.customer_name, + p.date_of_birth, + p.nationality, + p.business_segment, + CASE + WHEN p.risk_classification = 1 THEN 'Low Risk' + WHEN p.risk_classification = 2 THEN 'Medium Risk' + WHEN p.risk_classification = 3 THEN 'High Risk' + ELSE 'Unknown Risk' + END AS risk_level, + + -- Alert data + COALESCE(al.sar_flag, 'N') AS sar_flag + + FROM {trans_data} t + + -- Join with account data on beneficiary account + LEFT JOIN {acc_data} a + ON t.benef_account_number = a.account_number + + -- Join with party/customer data using account's customer number + LEFT JOIN {cust_data} p + ON a.customer_number = p.customer_number + + -- Join with alert data using party's customer number + LEFT JOIN {alert_data} al + ON p.customer_number = al.customer_number + + WHERE a.account_number IS NOT NULL + limit 100 +""" + +from tms_data_interface import SQLQueryInterface + +class Scenario: + seq = SQLQueryInterface(schema="transactionschema") + + def logic(self, **kwargs): + row_list = self.seq.execute_raw(query.format(trans_data="transaction10m", + cust_data="customer_data_v1", + acc_data="account_data_v1", + alert_data="alert_data_v1") + ) + cols = [ + "transaction_id", + "transaction_date", + "transaction_amount", + "transaction_desc", + "benef_account_number", + "account_number", + "acc_customer_number", + "account_type", + "branch_code", + "party_customer_number", + "customer_name", + "date_of_birth", + "nationality", + "business_segment", + "risk_level", + "sar_flag" + ] + df = pd.DataFrame(row_list, columns = cols) + return df + + +# In[4]: # sen = Scenario()