generated from user_client2024/77
194 lines
6.7 KiB
Python
194 lines
6.7 KiB
Python
#!/usr/bin/env python
|
|
# coding: utf-8
|
|
|
|
# In[93]:
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
# In[94]:
|
|
|
|
|
|
from tms_data_interface import SQLQueryInterface
|
|
seq = SQLQueryInterface(schema="transactionschema")
|
|
|
|
|
|
# In[6]:
|
|
|
|
|
|
seq.execute_raw("show tables")
|
|
|
|
|
|
# In[95]:
|
|
|
|
|
|
query = """
|
|
select final.CUSTOMER_NUMBER_main as Focal_id,
|
|
final.Credit_transaction_amount,
|
|
final.Total_no_of_credit_transactions,
|
|
final.Debit_transaction_amount,
|
|
final.Total_no_of_debit_transactions,
|
|
final.Wash_Ratio,
|
|
final.SEGMENT,
|
|
final.RISK,
|
|
final.SAR_FLAG
|
|
from
|
|
(
|
|
(
|
|
select subquery.CUSTOMER_NUMBER_1 as CUSTOMER_NUMBER_main,
|
|
subquery.Credit_transaction_amount,
|
|
subquery.Total_no_of_credit_transactions,
|
|
case
|
|
when subquery.Debit_transaction_amount is NULL then 0
|
|
else Debit_transaction_amount
|
|
end as Debit_transaction_amount,
|
|
case
|
|
when subquery.Total_no_of_debit_transactions is NULL then 0
|
|
else Total_no_of_debit_transactions
|
|
end as Total_no_of_debit_transactions,
|
|
case
|
|
when subquery.Debit_transaction_amount = 0
|
|
or subquery.Debit_transaction_amount is NULL then 0
|
|
else subquery.Credit_transaction_amount / subquery.Debit_transaction_amount
|
|
end as Wash_Ratio
|
|
from
|
|
(
|
|
(
|
|
select customer_number as CUSTOMER_NUMBER_1,
|
|
sum(transaction_amount) as Credit_transaction_amount,
|
|
count(*) as Total_no_of_credit_transactions
|
|
from
|
|
(
|
|
select *
|
|
from {trans_data} as trans_table left join {acc_data} as acc_table
|
|
on trans_table.benef_account_number = acc_table.account_number
|
|
)
|
|
where account_number not in ('None')
|
|
group by 1
|
|
) credit left join
|
|
(
|
|
select customer_number as CUSTOMER_NUMBER_2,
|
|
sum(transaction_amount) as Debit_transaction_amount,
|
|
count(*) as Total_no_of_debit_transactions
|
|
from
|
|
(
|
|
select *
|
|
from {trans_data} as trans_table left join {acc_data} as acc_table
|
|
on trans_table.orig_account_number = acc_table.account_number
|
|
)
|
|
where account_number not in ('None')
|
|
group by 1
|
|
) debit on credit.CUSTOMER_NUMBER_1 = debit.CUSTOMER_NUMBER_2
|
|
) subquery
|
|
) main left join
|
|
(
|
|
select subquery.CUSTOMER_NUMBER_3 as CUSTOMER_NUMBER_cust,
|
|
subquery.SEGMENT,
|
|
subquery.RISK,
|
|
case
|
|
when subquery.SAR_FLAG is NULL then 'N'
|
|
else subquery.SAR_FLAG
|
|
end as SAR_FLAG
|
|
from
|
|
(
|
|
(
|
|
select customer_number as CUSTOMER_NUMBER_3,
|
|
business_segment as SEGMENT,
|
|
case
|
|
when RISK_CLASSIFICATION = 1 then 'Low Risk'
|
|
when RISK_CLASSIFICATION = 2 then 'Medium Risk'
|
|
when RISK_CLASSIFICATION = 3 then 'High Risk'
|
|
else 'Unknown Risk'
|
|
end AS RISK
|
|
from {cust_data}
|
|
) cd left join
|
|
(
|
|
select customer_number as CUSTOMER_NUMBER_4,
|
|
sar_flag as SAR_FLAG
|
|
from {alert_data}
|
|
) ad on cd.CUSTOMER_NUMBER_3 = ad.CUSTOMER_NUMBER_4
|
|
) subquery
|
|
) cust_alert on cust_alert.CUSTOMER_NUMBER_cust = main.CUSTOMER_NUMBER_main
|
|
) final
|
|
"""
|
|
|
|
|
|
# In[101]:
|
|
|
|
|
|
from tms_data_interface import SQLQueryInterface
|
|
|
|
class Scenario:
|
|
seq = SQLQueryInterface(schema="transactionschema")
|
|
|
|
def logic(self, **kwargs):
|
|
row_list = self.seq.execute_raw(query.format(trans_data="transaction10m",
|
|
cust_data="customer_data_v1",
|
|
acc_data="account_data_v1",
|
|
alert_data="alert_data_v1")
|
|
)
|
|
cols = ["Focal_id", "Credit_transaction_amount",
|
|
"Total_no_of_credit_transactions",
|
|
"Debit_transaction_amount", "Total_no_of_debit_transactions",
|
|
"Wash_Ratio", "Segment", "Risk", "SAR_FLAG"]
|
|
df = pd.DataFrame(row_list, columns = cols)
|
|
df[["Credit_transaction_amount",
|
|
"Debit_transaction_amount"]] = df[["Credit_transaction_amount",
|
|
"Debit_transaction_amount"]].astype('int')
|
|
df["Wash_Ratio"] = df["Wash_Ratio"].astype('float')
|
|
# Step 1: Compute 90th percentiles per Segment for all 3 fields
|
|
percentiles = (
|
|
df.groupby("Segment")[["Credit_transaction_amount",
|
|
"Debit_transaction_amount",
|
|
"Wash_Ratio"]]
|
|
.quantile(0.95)
|
|
.reset_index()
|
|
)
|
|
|
|
# Rename columns for clarity
|
|
percentiles = percentiles.rename(columns={
|
|
"Credit_transaction_amount": "P90_Credit",
|
|
"Debit_transaction_amount": "P90_Debit",
|
|
"Wash_Ratio": "P90_Wash"
|
|
})
|
|
|
|
# Step 2: Merge back to main df
|
|
df = df.merge(percentiles, on="Segment", how="left")
|
|
|
|
# Step 3: Identify customers above 90th percentile in ANY of the 3 metrics
|
|
high_pop = (
|
|
# (df["Credit_transaction_amount"] > df["P90_Credit"]) &
|
|
(df["Debit_transaction_amount"] > df["P90_Debit"]) &
|
|
(df["Wash_Ratio"] > 0.90)
|
|
)
|
|
|
|
# Step 4: Randomly select 0.1% sample from high-risk population
|
|
sample_fraction = 0.1 # 0.1%
|
|
high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index
|
|
|
|
# Step 5: Set SAR_FLAG values
|
|
df["SAR_FLAG"] = "N" # default for all
|
|
df.loc[high_pop_indices, "SAR_FLAG"] = "Y" # assign Y to 0.1% random high-risk population
|
|
return df
|
|
|
|
|
|
# In[107]:
|
|
|
|
|
|
# sen = Scenario()
|
|
# a = sen.logic()
|
|
|
|
|
|
# In[106]:
|
|
|
|
|
|
# a
|
|
|
|
|
|
# In[105]:
|
|
|
|
|
|
# a[a["SAR_FLAG"] == "Y"]
|
|
|