diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb index 0632273..da32eb3 100644 --- a/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/.ipynb_checkpoints/main-checkpoint.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 53, + "execution_count": 93, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 94, "id": "2f9a4ca7-c066-4d93-9957-0d9145f9265d", "metadata": { "tags": [] @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 95, "id": "134d0b3d-5481-4975-af07-c80ab09d6dd2", "metadata": { "tags": [] @@ -157,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 101, "id": "d220561a-34c9-48d2-8e2f-5d174a87540b", "metadata": { "tags": [] @@ -189,7 +189,7 @@ " df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\",\n", " \"Wash_Ratio\"]]\n", - " .quantile(0.90)\n", + " .quantile(0.95)\n", " .reset_index()\n", " )\n", "\n", @@ -207,11 +207,11 @@ " high_pop = (\n", " # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n", " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n", - " (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n", + " (df[\"Wash_Ratio\"] > 0.90)\n", " )\n", "\n", " # Step 4: Randomly select 0.1% sample from high-risk population\n", - " sample_fraction = 0.3 # 0.1%\n", + " sample_fraction = 0.1 # 0.1%\n", " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", "\n", " # Step 5: Set SAR_FLAG values\n", @@ -222,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 107, "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a", "metadata": { "tags": [] @@ -235,286 +235,19 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 106, "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Focal_idCredit_transaction_amountTotal_no_of_credit_transactionsDebit_transaction_amountTotal_no_of_debit_transactionsWash_RatioSegmentRiskSAR_FLAGP90_CreditP90_DebitP90_Wash
0PN48914428308027412060284755618619760.994117Whole Sale BankingLow RiskN4.400246e+094.332448e+091.058020
1PN39478028726853642029274393185519991.046923Whole Sale BankingLow RiskN4.400246e+094.332448e+091.058020
2PN19572256042083683937555794650540391.008324SMELow RiskN4.532321e+094.534860e+091.062759
3PN65256616309052481152168671361411690.966913Whole Sale BankingLow RiskN4.400246e+094.332448e+091.058020
4PN18196021576343321613203995331215521.057688Corporate BankingLow RiskN5.021582e+095.003501e+091.063161
.......................................
10009PN47949131124877246235901913571.319399Private BankingLow RiskN4.461828e+073.176446e+071.760285
10010PN26755036558708260273610573661.336158Priority BankingLow RiskN4.410392e+073.076443e+071.729168
10011PN29300333990478255244658353231.389304OthersLow RiskN6.334963e+074.223903e+071.740112
10012PN53410539934813278282478584031.413729OthersHigh RiskN6.334963e+074.223903e+071.740112
10013PN39043036894062257291622523711.265131Private BankingLow RiskN4.461828e+073.176446e+071.760285
\n", - "

10014 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n", - "0 PN489144 2830802741 2060 \n", - "1 PN394780 2872685364 2029 \n", - "2 PN195722 5604208368 3937 \n", - "3 PN652566 1630905248 1152 \n", - "4 PN181960 2157634332 1613 \n", - "... ... ... ... \n", - "10009 PN479491 31124877 246 \n", - "10010 PN267550 36558708 260 \n", - "10011 PN293003 33990478 255 \n", - "10012 PN534105 39934813 278 \n", - "10013 PN390430 36894062 257 \n", - "\n", - " Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n", - "0 2847556186 1976 0.994117 \n", - "1 2743931855 1999 1.046923 \n", - "2 5557946505 4039 1.008324 \n", - "3 1686713614 1169 0.966913 \n", - "4 2039953312 1552 1.057688 \n", - "... ... ... ... \n", - "10009 23590191 357 1.319399 \n", - "10010 27361057 366 1.336158 \n", - "10011 24465835 323 1.389304 \n", - "10012 28247858 403 1.413729 \n", - "10013 29162252 371 1.265131 \n", - "\n", - " Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n", - "0 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n", - "1 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n", - "2 SME Low Risk N 4.532321e+09 4.534860e+09 \n", - "3 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n", - "4 Corporate Banking Low Risk N 5.021582e+09 5.003501e+09 \n", - "... ... ... ... ... ... \n", - "10009 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n", - "10010 Priority Banking Low Risk N 4.410392e+07 3.076443e+07 \n", - "10011 Others Low Risk N 6.334963e+07 4.223903e+07 \n", - "10012 Others High Risk N 6.334963e+07 4.223903e+07 \n", - "10013 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n", - "\n", - " P90_Wash \n", - "0 1.058020 \n", - "1 1.058020 \n", - "2 1.062759 \n", - "3 1.058020 \n", - "4 1.063161 \n", - "... ... \n", - "10009 1.760285 \n", - "10010 1.729168 \n", - "10011 1.740112 \n", - "10012 1.740112 \n", - "10013 1.760285 \n", - "\n", - "[10014 rows x 12 columns]" - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# a" ] }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 105, "id": "150bb5ce-6be1-44fc-a606-6d375354626d", "metadata": { "tags": [] diff --git a/main.ipynb b/main.ipynb index 0632273..da32eb3 100644 --- a/main.ipynb +++ b/main.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 53, + "execution_count": 93, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 94, "id": "2f9a4ca7-c066-4d93-9957-0d9145f9265d", "metadata": { "tags": [] @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 95, "id": "134d0b3d-5481-4975-af07-c80ab09d6dd2", "metadata": { "tags": [] @@ -157,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 101, "id": "d220561a-34c9-48d2-8e2f-5d174a87540b", "metadata": { "tags": [] @@ -189,7 +189,7 @@ " df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\",\n", " \"Wash_Ratio\"]]\n", - " .quantile(0.90)\n", + " .quantile(0.95)\n", " .reset_index()\n", " )\n", "\n", @@ -207,11 +207,11 @@ " high_pop = (\n", " # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n", " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n", - " (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n", + " (df[\"Wash_Ratio\"] > 0.90)\n", " )\n", "\n", " # Step 4: Randomly select 0.1% sample from high-risk population\n", - " sample_fraction = 0.3 # 0.1%\n", + " sample_fraction = 0.1 # 0.1%\n", " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", "\n", " # Step 5: Set SAR_FLAG values\n", @@ -222,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 107, "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a", "metadata": { "tags": [] @@ -235,286 +235,19 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 106, "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Focal_idCredit_transaction_amountTotal_no_of_credit_transactionsDebit_transaction_amountTotal_no_of_debit_transactionsWash_RatioSegmentRiskSAR_FLAGP90_CreditP90_DebitP90_Wash
0PN48914428308027412060284755618619760.994117Whole Sale BankingLow RiskN4.400246e+094.332448e+091.058020
1PN39478028726853642029274393185519991.046923Whole Sale BankingLow RiskN4.400246e+094.332448e+091.058020
2PN19572256042083683937555794650540391.008324SMELow RiskN4.532321e+094.534860e+091.062759
3PN65256616309052481152168671361411690.966913Whole Sale BankingLow RiskN4.400246e+094.332448e+091.058020
4PN18196021576343321613203995331215521.057688Corporate BankingLow RiskN5.021582e+095.003501e+091.063161
.......................................
10009PN47949131124877246235901913571.319399Private BankingLow RiskN4.461828e+073.176446e+071.760285
10010PN26755036558708260273610573661.336158Priority BankingLow RiskN4.410392e+073.076443e+071.729168
10011PN29300333990478255244658353231.389304OthersLow RiskN6.334963e+074.223903e+071.740112
10012PN53410539934813278282478584031.413729OthersHigh RiskN6.334963e+074.223903e+071.740112
10013PN39043036894062257291622523711.265131Private BankingLow RiskN4.461828e+073.176446e+071.760285
\n", - "

10014 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n", - "0 PN489144 2830802741 2060 \n", - "1 PN394780 2872685364 2029 \n", - "2 PN195722 5604208368 3937 \n", - "3 PN652566 1630905248 1152 \n", - "4 PN181960 2157634332 1613 \n", - "... ... ... ... \n", - "10009 PN479491 31124877 246 \n", - "10010 PN267550 36558708 260 \n", - "10011 PN293003 33990478 255 \n", - "10012 PN534105 39934813 278 \n", - "10013 PN390430 36894062 257 \n", - "\n", - " Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n", - "0 2847556186 1976 0.994117 \n", - "1 2743931855 1999 1.046923 \n", - "2 5557946505 4039 1.008324 \n", - "3 1686713614 1169 0.966913 \n", - "4 2039953312 1552 1.057688 \n", - "... ... ... ... \n", - "10009 23590191 357 1.319399 \n", - "10010 27361057 366 1.336158 \n", - "10011 24465835 323 1.389304 \n", - "10012 28247858 403 1.413729 \n", - "10013 29162252 371 1.265131 \n", - "\n", - " Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n", - "0 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n", - "1 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n", - "2 SME Low Risk N 4.532321e+09 4.534860e+09 \n", - "3 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n", - "4 Corporate Banking Low Risk N 5.021582e+09 5.003501e+09 \n", - "... ... ... ... ... ... \n", - "10009 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n", - "10010 Priority Banking Low Risk N 4.410392e+07 3.076443e+07 \n", - "10011 Others Low Risk N 6.334963e+07 4.223903e+07 \n", - "10012 Others High Risk N 6.334963e+07 4.223903e+07 \n", - "10013 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n", - "\n", - " P90_Wash \n", - "0 1.058020 \n", - "1 1.058020 \n", - "2 1.062759 \n", - "3 1.058020 \n", - "4 1.063161 \n", - "... ... \n", - "10009 1.760285 \n", - "10010 1.729168 \n", - "10011 1.740112 \n", - "10012 1.740112 \n", - "10013 1.760285 \n", - "\n", - "[10014 rows x 12 columns]" - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# a" ] }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 105, "id": "150bb5ce-6be1-44fc-a606-6d375354626d", "metadata": { "tags": [] diff --git a/main.py b/main.py index 205b8b8..d7c13b2 100644 --- a/main.py +++ b/main.py @@ -1,13 +1,13 @@ #!/usr/bin/env python # coding: utf-8 -# In[53]: +# In[93]: import pandas as pd -# In[43]: +# In[94]: from tms_data_interface import SQLQueryInterface @@ -20,7 +20,7 @@ seq = SQLQueryInterface(schema="transactionschema") seq.execute_raw("show tables") -# In[54]: +# In[95]: query = """ @@ -114,7 +114,7 @@ query = """ """ -# In[84]: +# In[101]: from tms_data_interface import SQLQueryInterface @@ -142,7 +142,7 @@ class Scenario: df.groupby("Segment")[["Credit_transaction_amount", "Debit_transaction_amount", "Wash_Ratio"]] - .quantile(0.90) + .quantile(0.95) .reset_index() ) @@ -160,11 +160,11 @@ class Scenario: high_pop = ( # (df["Credit_transaction_amount"] > df["P90_Credit"]) & (df["Debit_transaction_amount"] > df["P90_Debit"]) & - (df["Wash_Ratio"] > df["P90_Wash"]) + (df["Wash_Ratio"] > 0.90) ) # Step 4: Randomly select 0.1% sample from high-risk population - sample_fraction = 0.3 # 0.1% + sample_fraction = 0.1 # 0.1% high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index # Step 5: Set SAR_FLAG values @@ -173,20 +173,20 @@ class Scenario: return df -# In[85]: +# In[107]: # sen = Scenario() # a = sen.logic() -# In[86]: +# In[106]: # a -# In[88]: +# In[105]: # a[a["SAR_FLAG"] == "Y"]