diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb
index 774458a..a083585 100644
--- a/.ipynb_checkpoints/main-checkpoint.ipynb
+++ b/.ipynb_checkpoints/main-checkpoint.ipynb
@@ -157,7 +157,7 @@
},
{
"cell_type": "code",
- "execution_count": 57,
+ "execution_count": 77,
"id": "d220561a-34c9-48d2-8e2f-5d174a87540b",
"metadata": {
"tags": []
@@ -189,7 +189,7 @@
" df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n",
" \"Debit_transaction_amount\",\n",
" \"Wash_Ratio\"]]\n",
- " .quantile(0.98)\n",
+ " .quantile(0.90)\n",
" .reset_index()\n",
" )\n",
"\n",
@@ -205,13 +205,13 @@
"\n",
" # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n",
" high_pop = (\n",
- " (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) |\n",
- " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) |\n",
+ " # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n",
+ " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n",
" (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n",
" )\n",
"\n",
" # Step 4: Randomly select 0.1% sample from high-risk population\n",
- " sample_fraction = 0.1 # 0.1%\n",
+ " sample_fraction = 0.1 # 0.1%\n",
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
"\n",
" # Step 5: Set SAR_FLAG values\n",
@@ -222,299 +222,32 @@
},
{
"cell_type": "code",
- "execution_count": 58,
+ "execution_count": 83,
"id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "sen = Scenario()\n",
- "a = sen.logic()"
+ "# sen = Scenario()\n",
+ "# a = sen.logic()"
]
},
{
"cell_type": "code",
- "execution_count": 59,
+ "execution_count": 82,
"id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Focal_id | \n",
- " Credit_transaction_amount | \n",
- " Total_no_of_credit_transactions | \n",
- " Debit_transaction_amount | \n",
- " Total_no_of_debit_transactions | \n",
- " Wash_Ratio | \n",
- " Segment | \n",
- " Risk | \n",
- " SAR_FLAG | \n",
- " P90_Credit | \n",
- " P90_Debit | \n",
- " P90_Wash | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " PN478710 | \n",
- " 2805352312 | \n",
- " 2020 | \n",
- " 2787126309 | \n",
- " 2025 | \n",
- " 1.006539 | \n",
- " Corporate Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 6.274828e+09 | \n",
- " 6.259298e+09 | \n",
- " 1.090121 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " PN483125 | \n",
- " 3890052135 | \n",
- " 2797 | \n",
- " 3968882113 | \n",
- " 2850 | \n",
- " 0.980138 | \n",
- " Govt. Entities | \n",
- " Low Risk | \n",
- " N | \n",
- " 6.112897e+09 | \n",
- " 6.072409e+09 | \n",
- " 1.112059 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " PN890403 | \n",
- " 4136296083 | \n",
- " 2937 | \n",
- " 3999785063 | \n",
- " 2824 | \n",
- " 1.034130 | \n",
- " SME | \n",
- " Low Risk | \n",
- " N | \n",
- " 5.709904e+09 | \n",
- " 5.559419e+09 | \n",
- " 1.118816 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " PN531475 | \n",
- " 4183673982 | \n",
- " 2861 | \n",
- " 3987068168 | \n",
- " 2770 | \n",
- " 1.049311 | \n",
- " Corporate Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 6.274828e+09 | \n",
- " 6.259298e+09 | \n",
- " 1.090121 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " PN147722 | \n",
- " 1775594615 | \n",
- " 1225 | \n",
- " 1641559222 | \n",
- " 1221 | \n",
- " 1.081651 | \n",
- " SME | \n",
- " Low Risk | \n",
- " N | \n",
- " 5.709904e+09 | \n",
- " 5.559419e+09 | \n",
- " 1.118816 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 10009 | \n",
- " PN955059 | \n",
- " 31106290 | \n",
- " 264 | \n",
- " 25266130 | \n",
- " 369 | \n",
- " 1.231146 | \n",
- " Priority Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 7.616620e+07 | \n",
- " 5.263062e+07 | \n",
- " 1.921224 | \n",
- "
\n",
- " \n",
- " | 10010 | \n",
- " PN602067 | \n",
- " 29780658 | \n",
- " 238 | \n",
- " 27796448 | \n",
- " 405 | \n",
- " 1.071384 | \n",
- " Others | \n",
- " High Risk | \n",
- " N | \n",
- " 7.897534e+07 | \n",
- " 5.488447e+07 | \n",
- " 1.931817 | \n",
- "
\n",
- " \n",
- " | 10011 | \n",
- " PN213487 | \n",
- " 41410071 | \n",
- " 274 | \n",
- " 23896844 | \n",
- " 368 | \n",
- " 1.732868 | \n",
- " Others | \n",
- " Low Risk | \n",
- " N | \n",
- " 7.897534e+07 | \n",
- " 5.488447e+07 | \n",
- " 1.931817 | \n",
- "
\n",
- " \n",
- " | 10012 | \n",
- " PN563065 | \n",
- " 34009021 | \n",
- " 251 | \n",
- " 32563582 | \n",
- " 375 | \n",
- " 1.044388 | \n",
- " Others | \n",
- " Low Risk | \n",
- " N | \n",
- " 7.897534e+07 | \n",
- " 5.488447e+07 | \n",
- " 1.931817 | \n",
- "
\n",
- " \n",
- " | 10013 | \n",
- " PN388875 | \n",
- " 30904340 | \n",
- " 236 | \n",
- " 21938266 | \n",
- " 344 | \n",
- " 1.408696 | \n",
- " Mass Market | \n",
- " Medium Risk | \n",
- " N | \n",
- " 7.921967e+07 | \n",
- " 5.290545e+07 | \n",
- " 1.915159 | \n",
- "
\n",
- " \n",
- "
\n",
- "
10014 rows × 12 columns
\n",
- "
"
- ],
- "text/plain": [
- " Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n",
- "0 PN478710 2805352312 2020 \n",
- "1 PN483125 3890052135 2797 \n",
- "2 PN890403 4136296083 2937 \n",
- "3 PN531475 4183673982 2861 \n",
- "4 PN147722 1775594615 1225 \n",
- "... ... ... ... \n",
- "10009 PN955059 31106290 264 \n",
- "10010 PN602067 29780658 238 \n",
- "10011 PN213487 41410071 274 \n",
- "10012 PN563065 34009021 251 \n",
- "10013 PN388875 30904340 236 \n",
- "\n",
- " Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n",
- "0 2787126309 2025 1.006539 \n",
- "1 3968882113 2850 0.980138 \n",
- "2 3999785063 2824 1.034130 \n",
- "3 3987068168 2770 1.049311 \n",
- "4 1641559222 1221 1.081651 \n",
- "... ... ... ... \n",
- "10009 25266130 369 1.231146 \n",
- "10010 27796448 405 1.071384 \n",
- "10011 23896844 368 1.732868 \n",
- "10012 32563582 375 1.044388 \n",
- "10013 21938266 344 1.408696 \n",
- "\n",
- " Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n",
- "0 Corporate Banking Low Risk N 6.274828e+09 6.259298e+09 \n",
- "1 Govt. Entities Low Risk N 6.112897e+09 6.072409e+09 \n",
- "2 SME Low Risk N 5.709904e+09 5.559419e+09 \n",
- "3 Corporate Banking Low Risk N 6.274828e+09 6.259298e+09 \n",
- "4 SME Low Risk N 5.709904e+09 5.559419e+09 \n",
- "... ... ... ... ... ... \n",
- "10009 Priority Banking Low Risk N 7.616620e+07 5.263062e+07 \n",
- "10010 Others High Risk N 7.897534e+07 5.488447e+07 \n",
- "10011 Others Low Risk N 7.897534e+07 5.488447e+07 \n",
- "10012 Others Low Risk N 7.897534e+07 5.488447e+07 \n",
- "10013 Mass Market Medium Risk N 7.921967e+07 5.290545e+07 \n",
- "\n",
- " P90_Wash \n",
- "0 1.090121 \n",
- "1 1.112059 \n",
- "2 1.118816 \n",
- "3 1.090121 \n",
- "4 1.118816 \n",
- "... ... \n",
- "10009 1.921224 \n",
- "10010 1.931817 \n",
- "10011 1.931817 \n",
- "10012 1.931817 \n",
- "10013 1.915159 \n",
- "\n",
- "[10014 rows x 12 columns]"
- ]
- },
- "execution_count": 59,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "a"
+ "# a"
]
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": 81,
"id": "150bb5ce-6be1-44fc-a606-6d375354626d",
"metadata": {
"tags": []
diff --git a/main.ipynb b/main.ipynb
index 774458a..a083585 100644
--- a/main.ipynb
+++ b/main.ipynb
@@ -157,7 +157,7 @@
},
{
"cell_type": "code",
- "execution_count": 57,
+ "execution_count": 77,
"id": "d220561a-34c9-48d2-8e2f-5d174a87540b",
"metadata": {
"tags": []
@@ -189,7 +189,7 @@
" df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n",
" \"Debit_transaction_amount\",\n",
" \"Wash_Ratio\"]]\n",
- " .quantile(0.98)\n",
+ " .quantile(0.90)\n",
" .reset_index()\n",
" )\n",
"\n",
@@ -205,13 +205,13 @@
"\n",
" # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n",
" high_pop = (\n",
- " (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) |\n",
- " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) |\n",
+ " # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n",
+ " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n",
" (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n",
" )\n",
"\n",
" # Step 4: Randomly select 0.1% sample from high-risk population\n",
- " sample_fraction = 0.1 # 0.1%\n",
+ " sample_fraction = 0.1 # 0.1%\n",
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
"\n",
" # Step 5: Set SAR_FLAG values\n",
@@ -222,299 +222,32 @@
},
{
"cell_type": "code",
- "execution_count": 58,
+ "execution_count": 83,
"id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
- "sen = Scenario()\n",
- "a = sen.logic()"
+ "# sen = Scenario()\n",
+ "# a = sen.logic()"
]
},
{
"cell_type": "code",
- "execution_count": 59,
+ "execution_count": 82,
"id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Focal_id | \n",
- " Credit_transaction_amount | \n",
- " Total_no_of_credit_transactions | \n",
- " Debit_transaction_amount | \n",
- " Total_no_of_debit_transactions | \n",
- " Wash_Ratio | \n",
- " Segment | \n",
- " Risk | \n",
- " SAR_FLAG | \n",
- " P90_Credit | \n",
- " P90_Debit | \n",
- " P90_Wash | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " PN478710 | \n",
- " 2805352312 | \n",
- " 2020 | \n",
- " 2787126309 | \n",
- " 2025 | \n",
- " 1.006539 | \n",
- " Corporate Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 6.274828e+09 | \n",
- " 6.259298e+09 | \n",
- " 1.090121 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " PN483125 | \n",
- " 3890052135 | \n",
- " 2797 | \n",
- " 3968882113 | \n",
- " 2850 | \n",
- " 0.980138 | \n",
- " Govt. Entities | \n",
- " Low Risk | \n",
- " N | \n",
- " 6.112897e+09 | \n",
- " 6.072409e+09 | \n",
- " 1.112059 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " PN890403 | \n",
- " 4136296083 | \n",
- " 2937 | \n",
- " 3999785063 | \n",
- " 2824 | \n",
- " 1.034130 | \n",
- " SME | \n",
- " Low Risk | \n",
- " N | \n",
- " 5.709904e+09 | \n",
- " 5.559419e+09 | \n",
- " 1.118816 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " PN531475 | \n",
- " 4183673982 | \n",
- " 2861 | \n",
- " 3987068168 | \n",
- " 2770 | \n",
- " 1.049311 | \n",
- " Corporate Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 6.274828e+09 | \n",
- " 6.259298e+09 | \n",
- " 1.090121 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " PN147722 | \n",
- " 1775594615 | \n",
- " 1225 | \n",
- " 1641559222 | \n",
- " 1221 | \n",
- " 1.081651 | \n",
- " SME | \n",
- " Low Risk | \n",
- " N | \n",
- " 5.709904e+09 | \n",
- " 5.559419e+09 | \n",
- " 1.118816 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 10009 | \n",
- " PN955059 | \n",
- " 31106290 | \n",
- " 264 | \n",
- " 25266130 | \n",
- " 369 | \n",
- " 1.231146 | \n",
- " Priority Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 7.616620e+07 | \n",
- " 5.263062e+07 | \n",
- " 1.921224 | \n",
- "
\n",
- " \n",
- " | 10010 | \n",
- " PN602067 | \n",
- " 29780658 | \n",
- " 238 | \n",
- " 27796448 | \n",
- " 405 | \n",
- " 1.071384 | \n",
- " Others | \n",
- " High Risk | \n",
- " N | \n",
- " 7.897534e+07 | \n",
- " 5.488447e+07 | \n",
- " 1.931817 | \n",
- "
\n",
- " \n",
- " | 10011 | \n",
- " PN213487 | \n",
- " 41410071 | \n",
- " 274 | \n",
- " 23896844 | \n",
- " 368 | \n",
- " 1.732868 | \n",
- " Others | \n",
- " Low Risk | \n",
- " N | \n",
- " 7.897534e+07 | \n",
- " 5.488447e+07 | \n",
- " 1.931817 | \n",
- "
\n",
- " \n",
- " | 10012 | \n",
- " PN563065 | \n",
- " 34009021 | \n",
- " 251 | \n",
- " 32563582 | \n",
- " 375 | \n",
- " 1.044388 | \n",
- " Others | \n",
- " Low Risk | \n",
- " N | \n",
- " 7.897534e+07 | \n",
- " 5.488447e+07 | \n",
- " 1.931817 | \n",
- "
\n",
- " \n",
- " | 10013 | \n",
- " PN388875 | \n",
- " 30904340 | \n",
- " 236 | \n",
- " 21938266 | \n",
- " 344 | \n",
- " 1.408696 | \n",
- " Mass Market | \n",
- " Medium Risk | \n",
- " N | \n",
- " 7.921967e+07 | \n",
- " 5.290545e+07 | \n",
- " 1.915159 | \n",
- "
\n",
- " \n",
- "
\n",
- "
10014 rows × 12 columns
\n",
- "
"
- ],
- "text/plain": [
- " Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n",
- "0 PN478710 2805352312 2020 \n",
- "1 PN483125 3890052135 2797 \n",
- "2 PN890403 4136296083 2937 \n",
- "3 PN531475 4183673982 2861 \n",
- "4 PN147722 1775594615 1225 \n",
- "... ... ... ... \n",
- "10009 PN955059 31106290 264 \n",
- "10010 PN602067 29780658 238 \n",
- "10011 PN213487 41410071 274 \n",
- "10012 PN563065 34009021 251 \n",
- "10013 PN388875 30904340 236 \n",
- "\n",
- " Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n",
- "0 2787126309 2025 1.006539 \n",
- "1 3968882113 2850 0.980138 \n",
- "2 3999785063 2824 1.034130 \n",
- "3 3987068168 2770 1.049311 \n",
- "4 1641559222 1221 1.081651 \n",
- "... ... ... ... \n",
- "10009 25266130 369 1.231146 \n",
- "10010 27796448 405 1.071384 \n",
- "10011 23896844 368 1.732868 \n",
- "10012 32563582 375 1.044388 \n",
- "10013 21938266 344 1.408696 \n",
- "\n",
- " Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n",
- "0 Corporate Banking Low Risk N 6.274828e+09 6.259298e+09 \n",
- "1 Govt. Entities Low Risk N 6.112897e+09 6.072409e+09 \n",
- "2 SME Low Risk N 5.709904e+09 5.559419e+09 \n",
- "3 Corporate Banking Low Risk N 6.274828e+09 6.259298e+09 \n",
- "4 SME Low Risk N 5.709904e+09 5.559419e+09 \n",
- "... ... ... ... ... ... \n",
- "10009 Priority Banking Low Risk N 7.616620e+07 5.263062e+07 \n",
- "10010 Others High Risk N 7.897534e+07 5.488447e+07 \n",
- "10011 Others Low Risk N 7.897534e+07 5.488447e+07 \n",
- "10012 Others Low Risk N 7.897534e+07 5.488447e+07 \n",
- "10013 Mass Market Medium Risk N 7.921967e+07 5.290545e+07 \n",
- "\n",
- " P90_Wash \n",
- "0 1.090121 \n",
- "1 1.112059 \n",
- "2 1.118816 \n",
- "3 1.090121 \n",
- "4 1.118816 \n",
- "... ... \n",
- "10009 1.921224 \n",
- "10010 1.931817 \n",
- "10011 1.931817 \n",
- "10012 1.931817 \n",
- "10013 1.915159 \n",
- "\n",
- "[10014 rows x 12 columns]"
- ]
- },
- "execution_count": 59,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "a"
+ "# a"
]
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": 81,
"id": "150bb5ce-6be1-44fc-a606-6d375354626d",
"metadata": {
"tags": []
diff --git a/main.py b/main.py
index 5101e30..71fc2fc 100644
--- a/main.py
+++ b/main.py
@@ -114,7 +114,7 @@ query = """
"""
-# In[57]:
+# In[77]:
from tms_data_interface import SQLQueryInterface
@@ -142,7 +142,7 @@ class Scenario:
df.groupby("Segment")[["Credit_transaction_amount",
"Debit_transaction_amount",
"Wash_Ratio"]]
- .quantile(0.98)
+ .quantile(0.90)
.reset_index()
)
@@ -158,13 +158,13 @@ class Scenario:
# Step 3: Identify customers above 90th percentile in ANY of the 3 metrics
high_pop = (
- (df["Credit_transaction_amount"] > df["P90_Credit"]) |
- (df["Debit_transaction_amount"] > df["P90_Debit"]) |
+ # (df["Credit_transaction_amount"] > df["P90_Credit"]) &
+ (df["Debit_transaction_amount"] > df["P90_Debit"]) &
(df["Wash_Ratio"] > df["P90_Wash"])
)
# Step 4: Randomly select 0.1% sample from high-risk population
- sample_fraction = 0.1 # 0.1%
+ sample_fraction = 0.1 # 0.1%
high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index
# Step 5: Set SAR_FLAG values
@@ -173,20 +173,20 @@ class Scenario:
return df
-# In[58]:
+# In[83]:
-sen = Scenario()
-a = sen.logic()
+# sen = Scenario()
+# a = sen.logic()
-# In[59]:
+# In[82]:
-a
+# a
-# In[49]:
+# In[81]:
# a[a["SAR_FLAG"] == "Y"]