From b152e9fbc943af7e879090aac71275ce9ab42030 Mon Sep 17 00:00:00 2001 From: user_client2024 Date: Thu, 27 Nov 2025 05:57:34 +0000 Subject: [PATCH] System save at 27/11/2025 11:27 by user_client2024 --- .ipynb_checkpoints/main-checkpoint.ipynb | 288 +++++------------------ main.ipynb | 288 +++++------------------ main.py | 52 +++- 3 files changed, 166 insertions(+), 462 deletions(-) diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb index d4445a1..d0ae983 100644 --- a/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/.ipynb_checkpoints/main-checkpoint.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "134d0b3d-5481-4975-af07-c80ab09d6dd2", "metadata": { "tags": [] @@ -157,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 34, "id": "d220561a-34c9-48d2-8e2f-5d174a87540b", "metadata": { "tags": [] @@ -180,247 +180,79 @@ " \"Debit_transaction_amount\", \"Total_no_of_debit_transactions\",\n", " \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", " df = pd.DataFrame(row_list, columns = cols)\n", + " \n", + " # Step 1: Compute 90th percentiles per Segment for all 3 fields\n", + " percentiles = (\n", + " df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n", + " \"Debit_transaction_amount\",\n", + " \"Wash_Ratio\"]]\n", + " .quantile(0.98)\n", + " .reset_index()\n", + " )\n", + "\n", + " # Rename columns for clarity\n", + " percentiles = percentiles.rename(columns={\n", + " \"Credit_transaction_amount\": \"P90_Credit\",\n", + " \"Debit_transaction_amount\": \"P90_Debit\",\n", + " \"Wash_Ratio\": \"P90_Wash\"\n", + " })\n", + "\n", + " # Step 2: Merge back to main df\n", + " df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n", + "\n", + " # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n", + " high_pop = (\n", + " (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) |\n", + " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) |\n", + " (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n", + " )\n", + "\n", + " # Step 4: Randomly select 0.1% sample from high-risk population\n", + " sample_fraction = 0.1 # 0.1%\n", + " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", + "\n", + " # Step 5: Set SAR_FLAG values\n", + " df[\"SAR_FLAG\"] = \"N\" # default for all\n", + " df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n", " return df" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 40, "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Focal_idCredit_transaction_amountTotal_no_of_credit_transactionsDebit_transaction_amountTotal_no_of_debit_transactionsWash_RatioSegmentRiskSAR_FLAG
0PN8086244.601504e+0932394.461280e+0931291.031431Corporate BankingMedium RiskN
1PN6630412.106224e+0915732.281829e+0915630.923042Corporate BankingLow RiskN
2PN5259131.057799e+097761.223876e+098500.864302Whole Sale BankingLow RiskN
3PN4402744.806265e+0935064.972813e+0935990.966508Whole Sale BankingMedium RiskN
4PN2130263.982349e+0928094.122674e+0927830.965963Whole Sale BankingMedium RiskN
..............................
10009PN7747413.373466e+072502.443148e+073811.380787Priority BankingMedium RiskN
10010PN8683263.785344e+072592.408309e+073521.571785Ultra High NetWorthMedium RiskY
10011PN6678373.330357e+072562.676301e+073591.244388Mass MarketMedium RiskN
10012PN8095663.890076e+072762.554121e+074001.523059OthersLow RiskN
10013PN7396473.505184e+072232.232980e+073811.569734OthersLow RiskN
\n", - "

10014 rows × 9 columns

\n", - "
" - ], - "text/plain": [ - " Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n", - "0 PN808624 4.601504e+09 3239 \n", - "1 PN663041 2.106224e+09 1573 \n", - "2 PN525913 1.057799e+09 776 \n", - "3 PN440274 4.806265e+09 3506 \n", - "4 PN213026 3.982349e+09 2809 \n", - "... ... ... ... \n", - "10009 PN774741 3.373466e+07 250 \n", - "10010 PN868326 3.785344e+07 259 \n", - "10011 PN667837 3.330357e+07 256 \n", - "10012 PN809566 3.890076e+07 276 \n", - "10013 PN739647 3.505184e+07 223 \n", - "\n", - " Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n", - "0 4.461280e+09 3129 1.031431 \n", - "1 2.281829e+09 1563 0.923042 \n", - "2 1.223876e+09 850 0.864302 \n", - "3 4.972813e+09 3599 0.966508 \n", - "4 4.122674e+09 2783 0.965963 \n", - "... ... ... ... \n", - "10009 2.443148e+07 381 1.380787 \n", - "10010 2.408309e+07 352 1.571785 \n", - "10011 2.676301e+07 359 1.244388 \n", - "10012 2.554121e+07 400 1.523059 \n", - "10013 2.232980e+07 381 1.569734 \n", - "\n", - " Segment Risk SAR_FLAG \n", - "0 Corporate Banking Medium Risk N \n", - "1 Corporate Banking Low Risk N \n", - "2 Whole Sale Banking Low Risk N \n", - "3 Whole Sale Banking Medium Risk N \n", - "4 Whole Sale Banking Medium Risk N \n", - "... ... ... ... \n", - "10009 Priority Banking Medium Risk N \n", - "10010 Ultra High NetWorth Medium Risk Y \n", - "10011 Mass Market Medium Risk N \n", - "10012 Others Low Risk N \n", - "10013 Others Low Risk N \n", - "\n", - "[10014 rows x 9 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# sen = Scenario()\n", - "# sen.logic()" + "# a = sen.logic()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "150bb5ce-6be1-44fc-a606-6d375354626d", - "metadata": {}, + "execution_count": 39, + "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a", + "metadata": { + "tags": [] + }, "outputs": [], - "source": [] + "source": [ + "# a" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "150bb5ce-6be1-44fc-a606-6d375354626d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# a[a[\"SAR_FLAG\"] == \"Y\"]" + ] } ], "metadata": { diff --git a/main.ipynb b/main.ipynb index d4445a1..d0ae983 100644 --- a/main.ipynb +++ b/main.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "134d0b3d-5481-4975-af07-c80ab09d6dd2", "metadata": { "tags": [] @@ -157,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 34, "id": "d220561a-34c9-48d2-8e2f-5d174a87540b", "metadata": { "tags": [] @@ -180,247 +180,79 @@ " \"Debit_transaction_amount\", \"Total_no_of_debit_transactions\",\n", " \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", " df = pd.DataFrame(row_list, columns = cols)\n", + " \n", + " # Step 1: Compute 90th percentiles per Segment for all 3 fields\n", + " percentiles = (\n", + " df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n", + " \"Debit_transaction_amount\",\n", + " \"Wash_Ratio\"]]\n", + " .quantile(0.98)\n", + " .reset_index()\n", + " )\n", + "\n", + " # Rename columns for clarity\n", + " percentiles = percentiles.rename(columns={\n", + " \"Credit_transaction_amount\": \"P90_Credit\",\n", + " \"Debit_transaction_amount\": \"P90_Debit\",\n", + " \"Wash_Ratio\": \"P90_Wash\"\n", + " })\n", + "\n", + " # Step 2: Merge back to main df\n", + " df = df.merge(percentiles, on=\"Segment\", how=\"left\")\n", + "\n", + " # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n", + " high_pop = (\n", + " (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) |\n", + " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) |\n", + " (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n", + " )\n", + "\n", + " # Step 4: Randomly select 0.1% sample from high-risk population\n", + " sample_fraction = 0.1 # 0.1%\n", + " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", + "\n", + " # Step 5: Set SAR_FLAG values\n", + " df[\"SAR_FLAG\"] = \"N\" # default for all\n", + " df.loc[high_pop_indices, \"SAR_FLAG\"] = \"Y\" # assign Y to 0.1% random high-risk population\n", " return df" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 40, "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Focal_idCredit_transaction_amountTotal_no_of_credit_transactionsDebit_transaction_amountTotal_no_of_debit_transactionsWash_RatioSegmentRiskSAR_FLAG
0PN8086244.601504e+0932394.461280e+0931291.031431Corporate BankingMedium RiskN
1PN6630412.106224e+0915732.281829e+0915630.923042Corporate BankingLow RiskN
2PN5259131.057799e+097761.223876e+098500.864302Whole Sale BankingLow RiskN
3PN4402744.806265e+0935064.972813e+0935990.966508Whole Sale BankingMedium RiskN
4PN2130263.982349e+0928094.122674e+0927830.965963Whole Sale BankingMedium RiskN
..............................
10009PN7747413.373466e+072502.443148e+073811.380787Priority BankingMedium RiskN
10010PN8683263.785344e+072592.408309e+073521.571785Ultra High NetWorthMedium RiskY
10011PN6678373.330357e+072562.676301e+073591.244388Mass MarketMedium RiskN
10012PN8095663.890076e+072762.554121e+074001.523059OthersLow RiskN
10013PN7396473.505184e+072232.232980e+073811.569734OthersLow RiskN
\n", - "

10014 rows × 9 columns

\n", - "
" - ], - "text/plain": [ - " Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n", - "0 PN808624 4.601504e+09 3239 \n", - "1 PN663041 2.106224e+09 1573 \n", - "2 PN525913 1.057799e+09 776 \n", - "3 PN440274 4.806265e+09 3506 \n", - "4 PN213026 3.982349e+09 2809 \n", - "... ... ... ... \n", - "10009 PN774741 3.373466e+07 250 \n", - "10010 PN868326 3.785344e+07 259 \n", - "10011 PN667837 3.330357e+07 256 \n", - "10012 PN809566 3.890076e+07 276 \n", - "10013 PN739647 3.505184e+07 223 \n", - "\n", - " Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n", - "0 4.461280e+09 3129 1.031431 \n", - "1 2.281829e+09 1563 0.923042 \n", - "2 1.223876e+09 850 0.864302 \n", - "3 4.972813e+09 3599 0.966508 \n", - "4 4.122674e+09 2783 0.965963 \n", - "... ... ... ... \n", - "10009 2.443148e+07 381 1.380787 \n", - "10010 2.408309e+07 352 1.571785 \n", - "10011 2.676301e+07 359 1.244388 \n", - "10012 2.554121e+07 400 1.523059 \n", - "10013 2.232980e+07 381 1.569734 \n", - "\n", - " Segment Risk SAR_FLAG \n", - "0 Corporate Banking Medium Risk N \n", - "1 Corporate Banking Low Risk N \n", - "2 Whole Sale Banking Low Risk N \n", - "3 Whole Sale Banking Medium Risk N \n", - "4 Whole Sale Banking Medium Risk N \n", - "... ... ... ... \n", - "10009 Priority Banking Medium Risk N \n", - "10010 Ultra High NetWorth Medium Risk Y \n", - "10011 Mass Market Medium Risk N \n", - "10012 Others Low Risk N \n", - "10013 Others Low Risk N \n", - "\n", - "[10014 rows x 9 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# sen = Scenario()\n", - "# sen.logic()" + "# a = sen.logic()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "150bb5ce-6be1-44fc-a606-6d375354626d", - "metadata": {}, + "execution_count": 39, + "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a", + "metadata": { + "tags": [] + }, "outputs": [], - "source": [] + "source": [ + "# a" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "150bb5ce-6be1-44fc-a606-6d375354626d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# a[a[\"SAR_FLAG\"] == \"Y\"]" + ] } ], "metadata": { diff --git a/main.py b/main.py index 2f5e9af..98e9fe8 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # coding: utf-8 -# In[1]: +# In[4]: import pandas as pd @@ -20,7 +20,7 @@ seq = SQLQueryInterface(schema="transactionschema") seq.execute_raw("show tables") -# In[7]: +# In[6]: query = """ @@ -114,7 +114,7 @@ query = """ """ -# In[8]: +# In[34]: from tms_data_interface import SQLQueryInterface @@ -133,18 +133,58 @@ class Scenario: "Debit_transaction_amount", "Total_no_of_debit_transactions", "Wash_Ratio", "Segment", "Risk", "SAR_FLAG"] df = pd.DataFrame(row_list, columns = cols) + + # Step 1: Compute 90th percentiles per Segment for all 3 fields + percentiles = ( + df.groupby("Segment")[["Credit_transaction_amount", + "Debit_transaction_amount", + "Wash_Ratio"]] + .quantile(0.98) + .reset_index() + ) + + # Rename columns for clarity + percentiles = percentiles.rename(columns={ + "Credit_transaction_amount": "P90_Credit", + "Debit_transaction_amount": "P90_Debit", + "Wash_Ratio": "P90_Wash" + }) + + # Step 2: Merge back to main df + df = df.merge(percentiles, on="Segment", how="left") + + # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics + high_pop = ( + (df["Credit_transaction_amount"] > df["P90_Credit"]) | + (df["Debit_transaction_amount"] > df["P90_Debit"]) | + (df["Wash_Ratio"] > df["P90_Wash"]) + ) + + # Step 4: Randomly select 0.1% sample from high-risk population + sample_fraction = 0.1 # 0.1% + high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index + + # Step 5: Set SAR_FLAG values + df["SAR_FLAG"] = "N" # default for all + df.loc[high_pop_indices, "SAR_FLAG"] = "Y" # assign Y to 0.1% random high-risk population return df -# In[9]: +# In[40]: # sen = Scenario() -# sen.logic() +# a = sen.logic() -# In[ ]: +# In[39]: +# a +# In[38]: + + +# a[a["SAR_FLAG"] == "Y"] +