generated from user_client2024/154
277 lines
8.4 KiB
Python
277 lines
8.4 KiB
Python
#!/usr/bin/env python
|
|
# coding: utf-8
|
|
|
|
# In[1]:
|
|
|
|
|
|
# import pandas as pd
|
|
|
|
# query = """
|
|
# select final.CUSTOMER_NUMBER_main as Focal_id,
|
|
# CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total,
|
|
# final.Cash_deposit_count,
|
|
# final.SEGMENT,
|
|
# final.RISK,
|
|
# final.SAR_FLAG
|
|
# from
|
|
# (
|
|
# (
|
|
# select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,
|
|
# subquery.Cash_deposit_total,
|
|
# subquery.Cash_deposit_count
|
|
# from
|
|
# (
|
|
# select customer_number as CUSTOMER_NUMBER_1,
|
|
# sum(transaction_amount) as Cash_deposit_total,
|
|
# count(*) as Cash_deposit_count
|
|
# from
|
|
# (
|
|
# select *
|
|
# from {trans_data} trans_table
|
|
# left join {acc_data} acc_table
|
|
# on trans_table.benef_account_number = acc_table.account_number
|
|
# ) trans
|
|
# where account_number not in ('None')
|
|
# and transaction_desc = 'CASH RELATED TRANSACTION'
|
|
# group by customer_number
|
|
# ) subquery
|
|
# ) main
|
|
# left join
|
|
# (
|
|
# select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,
|
|
# cd.SEGMENT,
|
|
# cd.RISK,
|
|
# case
|
|
# when ad.SAR_FLAG is NULL then 'N'
|
|
# else ad.SAR_FLAG
|
|
# end as SAR_FLAG
|
|
# from
|
|
# (
|
|
# select customer_number as CUSTOMER_NUMBER_3,
|
|
# business_segment as SEGMENT,
|
|
# case
|
|
# when RISK_CLASSIFICATION = 1 then 'Low Risk'
|
|
# when RISK_CLASSIFICATION = 2 then 'Medium Risk'
|
|
# when RISK_CLASSIFICATION = 3 then 'High Risk'
|
|
# else 'Unknown Risk'
|
|
# end AS RISK
|
|
# from {cust_data}
|
|
# ) cd
|
|
# left join
|
|
# (
|
|
# select customer_number as CUSTOMER_NUMBER_4,
|
|
# sar_flag as SAR_FLAG
|
|
# from {alert_data}
|
|
# ) ad
|
|
# on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4
|
|
# ) as cust_alert
|
|
# on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main
|
|
# ) as final
|
|
# """
|
|
|
|
# from tms_data_interface import SQLQueryInterface
|
|
|
|
# class Scenario:
|
|
# seq = SQLQueryInterface(schema="transactionschema")
|
|
|
|
# def logic(self, **kwargs):
|
|
# row_list = self.seq.execute_raw(query.format(trans_data="transaction10m",
|
|
# cust_data="customer_data_v1",
|
|
# acc_data="account_data_v1",
|
|
# alert_data="alert_data_v1")
|
|
# )
|
|
# cols = ["Focal_id", "Cash_deposit_total", "Cash_deposit_count",
|
|
# "Segment", "Risk", "SAR_FLAG"]
|
|
# df = pd.DataFrame(row_list, columns = cols)
|
|
# df["Cash_deposit_total"] = df["Cash_deposit_total"].astype(float)
|
|
# return df
|
|
|
|
|
|
# In[41]:
|
|
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
from tms_data_interface import SQLQueryInterface
|
|
|
|
|
|
# In[42]:
|
|
|
|
|
|
query2 = """
|
|
SELECT *
|
|
|
|
FROM percentile_dist
|
|
"""
|
|
|
|
|
|
# In[43]:
|
|
|
|
|
|
query = """
|
|
SELECT
|
|
t.transaction_key,
|
|
t.transaction_date,
|
|
t.transaction_amount,
|
|
t.transaction_desc,
|
|
t.benef_account_number,
|
|
|
|
-- Account data
|
|
a.account_number,
|
|
a.customer_number AS acc_customer_number,
|
|
|
|
-- Party data
|
|
p.customer_number AS party_customer_number,
|
|
p.customer_name,
|
|
p.date_of_birth,
|
|
p.nationality,
|
|
p.business_segment,
|
|
CASE
|
|
WHEN p.risk_classification = 1 THEN 'Low Risk'
|
|
WHEN p.risk_classification = 2 THEN 'Medium Risk'
|
|
WHEN p.risk_classification = 3 THEN 'High Risk'
|
|
ELSE 'Unknown Risk'
|
|
END AS risk_level,
|
|
|
|
-- Alert data
|
|
COALESCE(al.sar_flag, 'N') AS sar_flag
|
|
|
|
FROM {trans_data} t
|
|
|
|
-- Join with account data on beneficiary account
|
|
LEFT JOIN {acc_data} a
|
|
ON t.benef_account_number = a.account_number
|
|
|
|
-- Join with party/customer data using account's customer number
|
|
LEFT JOIN {cust_data} p
|
|
ON a.customer_number = p.customer_number
|
|
|
|
-- Join with alert data using party's customer number
|
|
LEFT JOIN {alert_data} al
|
|
ON p.customer_number = al.customer_number
|
|
|
|
WHERE a.account_number IS NOT NULL
|
|
"""
|
|
|
|
|
|
# In[44]:
|
|
|
|
|
|
def trx_count_sum_groupwise(data_filt_partywise):
|
|
data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount')
|
|
groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt',
|
|
'MIN_LIMIT', 'PCT_RANGE'])
|
|
|
|
trxns = data_filt_partywise['transaction_amount'].values
|
|
pct_range = data_filt_partywise['PCT_RANGE'].max()
|
|
min_value = data_filt_partywise['MIN_LIMIT'].max()
|
|
|
|
trxns = trxns[trxns >= min_value]
|
|
if len(trxns) > 0:
|
|
min_value = trxns[0]
|
|
|
|
group_count = 0
|
|
while len(trxns) > 0:
|
|
max_value = min_value + (pct_range * 0.01 * min_value)
|
|
mask = np.logical_and(trxns >= min_value, trxns <= max_value)
|
|
group_filter_trx = trxns[mask]
|
|
trx_count = len(group_filter_trx)
|
|
trx_sum = np.sum(group_filter_trx)
|
|
group_count += 1
|
|
groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum,
|
|
min_value, pct_range]
|
|
trxns = trxns[trxns > max_value]
|
|
if len(trxns) > 0:
|
|
min_value = trxns[0]
|
|
|
|
return groupeddata.to_dict('list')
|
|
|
|
# ---------------------------
|
|
# Function 4: Run scenario 9
|
|
# ---------------------------
|
|
def scenario9_data(data1):
|
|
grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply(
|
|
trx_count_sum_groupwise).reset_index()
|
|
|
|
df_list = []
|
|
for i in grouped.index:
|
|
df_party = pd.DataFrame(grouped.iloc[i, -1])
|
|
df_party['Focal_id'] = grouped.loc[i, 'Focal_id']
|
|
df_list.append(df_party)
|
|
|
|
final_df = pd.concat(df_list, ignore_index=True)
|
|
Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index()
|
|
Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index()
|
|
SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index()
|
|
|
|
final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left')
|
|
final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left')
|
|
final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left')
|
|
|
|
return final_df
|
|
|
|
|
|
|
|
# In[45]:
|
|
|
|
|
|
seq = SQLQueryInterface(schema="transactionschema")
|
|
data = seq.execute_raw(query2)
|
|
Columns = ['point_of_percentile', 'value', 'total_event',
|
|
'true_positive', 'false_positive', 'tpsar']
|
|
percent_dist = pd.DataFrame(data,columns = Columns)
|
|
|
|
|
|
# In[52]:
|
|
|
|
|
|
# round(int(percent_dist[percent_dist['point_of_percentile']\
|
|
# == 75]['value'].iloc[0])/100)*100
|
|
|
|
|
|
# In[53]:
|
|
|
|
|
|
class Scenario:
|
|
seq = SQLQueryInterface(schema="transactionschema")
|
|
|
|
def logic(self, **kwargs):
|
|
row_list = self.seq.execute_raw(query.format(trans_data="transaction10m",
|
|
cust_data="customer_data_v1",
|
|
acc_data="account_data_v1",
|
|
alert_data="alert_data_v1")
|
|
)
|
|
cols = [
|
|
"transaction_key",
|
|
"transaction_date",
|
|
"transaction_amount",
|
|
"transaction_desc",
|
|
"benef_account_number",
|
|
"account_number",
|
|
"acc_customer_number",
|
|
"Focal_id",
|
|
"customer_name",
|
|
"date_of_birth",
|
|
"nationality",
|
|
"Segment",
|
|
"Risk",
|
|
"SAR_FLAG"
|
|
]
|
|
df = pd.DataFrame(row_list, columns = cols)
|
|
df['Segment'] = 'SME'
|
|
df['MIN_LIMIT'] = round(int(percent_dist[percent_dist['point_of_percentile']\
|
|
== 75]['value'].iloc[0])/100)*100
|
|
df['PCT_RANGE'] = 20
|
|
|
|
scenario_data = scenario9_data(df)
|
|
|
|
return scenario_data
|
|
|
|
|
|
# In[55]:
|
|
|
|
|
|
# sen = Scenario()
|
|
# sen.logic()
|
|
|