#!/usr/bin/env python # coding: utf-8 # In[1]: # import pandas as pd # query = """ # select final.CUSTOMER_NUMBER_main as Focal_id, # CAST(final.Cash_deposit_total AS DECIMAL(18, 2)) AS Cash_deposit_total, # final.Cash_deposit_count, # final.SEGMENT, # final.RISK, # final.SAR_FLAG # from # ( # ( # select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main, # subquery.Cash_deposit_total, # subquery.Cash_deposit_count # from # ( # select customer_number as CUSTOMER_NUMBER_1, # sum(transaction_amount) as Cash_deposit_total, # count(*) as Cash_deposit_count # from # ( # select * # from {trans_data} trans_table # left join {acc_data} acc_table # on trans_table.benef_account_number = acc_table.account_number # ) trans # where account_number not in ('None') # and transaction_desc = 'CASH RELATED TRANSACTION' # group by customer_number # ) subquery # ) main # left join # ( # select cd.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust, # cd.SEGMENT, # cd.RISK, # case # when ad.SAR_FLAG is NULL then 'N' # else ad.SAR_FLAG # end as SAR_FLAG # from # ( # select customer_number as CUSTOMER_NUMBER_3, # business_segment as SEGMENT, # case # when RISK_CLASSIFICATION = 1 then 'Low Risk' # when RISK_CLASSIFICATION = 2 then 'Medium Risk' # when RISK_CLASSIFICATION = 3 then 'High Risk' # else 'Unknown Risk' # end AS RISK # from {cust_data} # ) cd # left join # ( # select customer_number as CUSTOMER_NUMBER_4, # sar_flag as SAR_FLAG # from {alert_data} # ) ad # on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4 # ) as cust_alert # on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main # ) as final # """ # from tms_data_interface import SQLQueryInterface # class Scenario: # seq = SQLQueryInterface(schema="transactionschema") # def logic(self, **kwargs): # row_list = self.seq.execute_raw(query.format(trans_data="transaction10m", # cust_data="customer_data_v1", # acc_data="account_data_v1", # alert_data="alert_data_v1") # ) # cols = ["Focal_id", "Cash_deposit_total", "Cash_deposit_count", # "Segment", "Risk", "SAR_FLAG"] # df = pd.DataFrame(row_list, columns = cols) # df["Cash_deposit_total"] = df["Cash_deposit_total"].astype(float) # return df # In[15]: import pandas as pd import numpy as np query = """ SELECT t.transaction_key, t.transaction_date, t.transaction_amount, t.transaction_desc, t.benef_account_number, -- Account data a.account_number, a.customer_number AS acc_customer_number, -- Party data p.customer_number AS party_customer_number, p.customer_name, p.date_of_birth, p.nationality, p.business_segment, CASE WHEN p.risk_classification = 1 THEN 'Low Risk' WHEN p.risk_classification = 2 THEN 'Medium Risk' WHEN p.risk_classification = 3 THEN 'High Risk' ELSE 'Unknown Risk' END AS risk_level, -- Alert data COALESCE(al.sar_flag, 'N') AS sar_flag FROM {trans_data} t -- Join with account data on beneficiary account LEFT JOIN {acc_data} a ON t.benef_account_number = a.account_number -- Join with party/customer data using account's customer number LEFT JOIN {cust_data} p ON a.customer_number = p.customer_number -- Join with alert data using party's customer number LEFT JOIN {alert_data} al ON p.customer_number = al.customer_number WHERE a.account_number IS NOT NULL """ # In[20]: # def trx_count_sum_groupwise(data_filt_partywise): # data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') # groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', # 'MIN_LIMIT', 'PCT_RANGE']) # trxns = data_filt_partywise['transaction_amount'].values # pct_range = data_filt_partywise['PCT_RANGE'].max() # min_value = data_filt_partywise['MIN_LIMIT'].max() # trxns = trxns[trxns >= min_value] # if len(trxns) > 0: # min_value = trxns[0] # group_count = 0 # while len(trxns) > 0: # max_value = min_value + (pct_range * 0.01 * min_value) # mask = np.logical_and(trxns >= min_value, trxns <= max_value) # group_filter_trx = trxns[mask] # trx_count = len(group_filter_trx) # trx_sum = np.sum(group_filter_trx) # group_count += 1 # groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, # min_value, pct_range] # trxns = trxns[trxns > max_value] # if len(trxns) > 0: # min_value = trxns[0] # return groupeddata.to_dict('list') # # --------------------------- # # Function 4: Run scenario 9 # # --------------------------- # def scenario9_data(data1): # grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply( # trx_count_sum_groupwise).reset_index() # df_list = [] # for i in grouped.index: # df_party = pd.DataFrame(grouped.iloc[i, -1]) # df_party['Focal_id'] = grouped.loc[i, 'Focal_id'] # df_list.append(df_party) # final_df = pd.concat(df_list, ignore_index=True) # Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index() # Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index() # SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index() # final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left') # final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left') # final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left') # return final_df # In[17]: from tms_data_interface import SQLQueryInterface class Scenario: seq = SQLQueryInterface(schema="transactionschema") def logic(self, **kwargs): row_list = self.seq.execute_raw(query.format(trans_data="transaction10m", cust_data="customer_data_v1", acc_data="account_data_v1", alert_data="alert_data_v1") ) cols = [ "transaction_key", "transaction_date", "transaction_amount", "transaction_desc", "benef_account_number", "account_number", "acc_customer_number", "Focal_id", "customer_name", "date_of_birth", "nationality", "Segment", "Risk", "SAR_FLAG" ] df = pd.DataFrame(row_list, columns = cols) df['Segment'] = 'SME' # df['MIN_LIMIT'] = 50000 # df['PCT_RANGE'] = 20 # scenario_data = scenario9_data(df) return df # In[19]: # sen = Scenario() # sen.logic()