diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb index 774458a..a083585 100644 --- a/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/.ipynb_checkpoints/main-checkpoint.ipynb @@ -157,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 77, "id": "d220561a-34c9-48d2-8e2f-5d174a87540b", "metadata": { "tags": [] @@ -189,7 +189,7 @@ " df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\",\n", " \"Wash_Ratio\"]]\n", - " .quantile(0.98)\n", + " .quantile(0.90)\n", " .reset_index()\n", " )\n", "\n", @@ -205,13 +205,13 @@ "\n", " # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n", " high_pop = (\n", - " (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) |\n", - " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) |\n", + " # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n", + " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n", " (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n", " )\n", "\n", " # Step 4: Randomly select 0.1% sample from high-risk population\n", - " sample_fraction = 0.1 # 0.1%\n", + " sample_fraction = 0.1 # 0.1%\n", " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", "\n", " # Step 5: Set SAR_FLAG values\n", @@ -222,299 +222,32 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 83, "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a", "metadata": { "tags": [] }, "outputs": [], "source": [ - "sen = Scenario()\n", - "a = sen.logic()" + "# sen = Scenario()\n", + "# a = sen.logic()" ] }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 82, "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Focal_idCredit_transaction_amountTotal_no_of_credit_transactionsDebit_transaction_amountTotal_no_of_debit_transactionsWash_RatioSegmentRiskSAR_FLAGP90_CreditP90_DebitP90_Wash
0PN47871028053523122020278712630920251.006539Corporate BankingLow RiskN6.274828e+096.259298e+091.090121
1PN48312538900521352797396888211328500.980138Govt. EntitiesLow RiskN6.112897e+096.072409e+091.112059
2PN89040341362960832937399978506328241.034130SMELow RiskN5.709904e+095.559419e+091.118816
3PN53147541836739822861398706816827701.049311Corporate BankingLow RiskN6.274828e+096.259298e+091.090121
4PN14772217755946151225164155922212211.081651SMELow RiskN5.709904e+095.559419e+091.118816
.......................................
10009PN95505931106290264252661303691.231146Priority BankingLow RiskN7.616620e+075.263062e+071.921224
10010PN60206729780658238277964484051.071384OthersHigh RiskN7.897534e+075.488447e+071.931817
10011PN21348741410071274238968443681.732868OthersLow RiskN7.897534e+075.488447e+071.931817
10012PN56306534009021251325635823751.044388OthersLow RiskN7.897534e+075.488447e+071.931817
10013PN38887530904340236219382663441.408696Mass MarketMedium RiskN7.921967e+075.290545e+071.915159
\n", - "

10014 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n", - "0 PN478710 2805352312 2020 \n", - "1 PN483125 3890052135 2797 \n", - "2 PN890403 4136296083 2937 \n", - "3 PN531475 4183673982 2861 \n", - "4 PN147722 1775594615 1225 \n", - "... ... ... ... \n", - "10009 PN955059 31106290 264 \n", - "10010 PN602067 29780658 238 \n", - "10011 PN213487 41410071 274 \n", - "10012 PN563065 34009021 251 \n", - "10013 PN388875 30904340 236 \n", - "\n", - " Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n", - "0 2787126309 2025 1.006539 \n", - "1 3968882113 2850 0.980138 \n", - "2 3999785063 2824 1.034130 \n", - "3 3987068168 2770 1.049311 \n", - "4 1641559222 1221 1.081651 \n", - "... ... ... ... \n", - "10009 25266130 369 1.231146 \n", - "10010 27796448 405 1.071384 \n", - "10011 23896844 368 1.732868 \n", - "10012 32563582 375 1.044388 \n", - "10013 21938266 344 1.408696 \n", - "\n", - " Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n", - "0 Corporate Banking Low Risk N 6.274828e+09 6.259298e+09 \n", - "1 Govt. Entities Low Risk N 6.112897e+09 6.072409e+09 \n", - "2 SME Low Risk N 5.709904e+09 5.559419e+09 \n", - "3 Corporate Banking Low Risk N 6.274828e+09 6.259298e+09 \n", - "4 SME Low Risk N 5.709904e+09 5.559419e+09 \n", - "... ... ... ... ... ... \n", - "10009 Priority Banking Low Risk N 7.616620e+07 5.263062e+07 \n", - "10010 Others High Risk N 7.897534e+07 5.488447e+07 \n", - "10011 Others Low Risk N 7.897534e+07 5.488447e+07 \n", - "10012 Others Low Risk N 7.897534e+07 5.488447e+07 \n", - "10013 Mass Market Medium Risk N 7.921967e+07 5.290545e+07 \n", - "\n", - " P90_Wash \n", - "0 1.090121 \n", - "1 1.112059 \n", - "2 1.118816 \n", - "3 1.090121 \n", - "4 1.118816 \n", - "... ... \n", - "10009 1.921224 \n", - "10010 1.931817 \n", - "10011 1.931817 \n", - "10012 1.931817 \n", - "10013 1.915159 \n", - "\n", - "[10014 rows x 12 columns]" - ] - }, - "execution_count": 59, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "a" + "# a" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 81, "id": "150bb5ce-6be1-44fc-a606-6d375354626d", "metadata": { "tags": [] diff --git a/main.ipynb b/main.ipynb index 774458a..a083585 100644 --- a/main.ipynb +++ b/main.ipynb @@ -157,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 77, "id": "d220561a-34c9-48d2-8e2f-5d174a87540b", "metadata": { "tags": [] @@ -189,7 +189,7 @@ " df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n", " \"Debit_transaction_amount\",\n", " \"Wash_Ratio\"]]\n", - " .quantile(0.98)\n", + " .quantile(0.90)\n", " .reset_index()\n", " )\n", "\n", @@ -205,13 +205,13 @@ "\n", " # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics\n", " high_pop = (\n", - " (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) |\n", - " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) |\n", + " # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n", + " (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n", " (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n", " )\n", "\n", " # Step 4: Randomly select 0.1% sample from high-risk population\n", - " sample_fraction = 0.1 # 0.1%\n", + " sample_fraction = 0.1 # 0.1%\n", " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", "\n", " # Step 5: Set SAR_FLAG values\n", @@ -222,299 +222,32 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 83, "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a", "metadata": { "tags": [] }, "outputs": [], "source": [ - "sen = Scenario()\n", - "a = sen.logic()" + "# sen = Scenario()\n", + "# a = sen.logic()" ] }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 82, "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a", "metadata": { "tags": [] }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Focal_idCredit_transaction_amountTotal_no_of_credit_transactionsDebit_transaction_amountTotal_no_of_debit_transactionsWash_RatioSegmentRiskSAR_FLAGP90_CreditP90_DebitP90_Wash
0PN47871028053523122020278712630920251.006539Corporate BankingLow RiskN6.274828e+096.259298e+091.090121
1PN48312538900521352797396888211328500.980138Govt. EntitiesLow RiskN6.112897e+096.072409e+091.112059
2PN89040341362960832937399978506328241.034130SMELow RiskN5.709904e+095.559419e+091.118816
3PN53147541836739822861398706816827701.049311Corporate BankingLow RiskN6.274828e+096.259298e+091.090121
4PN14772217755946151225164155922212211.081651SMELow RiskN5.709904e+095.559419e+091.118816
.......................................
10009PN95505931106290264252661303691.231146Priority BankingLow RiskN7.616620e+075.263062e+071.921224
10010PN60206729780658238277964484051.071384OthersHigh RiskN7.897534e+075.488447e+071.931817
10011PN21348741410071274238968443681.732868OthersLow RiskN7.897534e+075.488447e+071.931817
10012PN56306534009021251325635823751.044388OthersLow RiskN7.897534e+075.488447e+071.931817
10013PN38887530904340236219382663441.408696Mass MarketMedium RiskN7.921967e+075.290545e+071.915159
\n", - "

10014 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n", - "0 PN478710 2805352312 2020 \n", - "1 PN483125 3890052135 2797 \n", - "2 PN890403 4136296083 2937 \n", - "3 PN531475 4183673982 2861 \n", - "4 PN147722 1775594615 1225 \n", - "... ... ... ... \n", - "10009 PN955059 31106290 264 \n", - "10010 PN602067 29780658 238 \n", - "10011 PN213487 41410071 274 \n", - "10012 PN563065 34009021 251 \n", - "10013 PN388875 30904340 236 \n", - "\n", - " Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n", - "0 2787126309 2025 1.006539 \n", - "1 3968882113 2850 0.980138 \n", - "2 3999785063 2824 1.034130 \n", - "3 3987068168 2770 1.049311 \n", - "4 1641559222 1221 1.081651 \n", - "... ... ... ... \n", - "10009 25266130 369 1.231146 \n", - "10010 27796448 405 1.071384 \n", - "10011 23896844 368 1.732868 \n", - "10012 32563582 375 1.044388 \n", - "10013 21938266 344 1.408696 \n", - "\n", - " Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n", - "0 Corporate Banking Low Risk N 6.274828e+09 6.259298e+09 \n", - "1 Govt. Entities Low Risk N 6.112897e+09 6.072409e+09 \n", - "2 SME Low Risk N 5.709904e+09 5.559419e+09 \n", - "3 Corporate Banking Low Risk N 6.274828e+09 6.259298e+09 \n", - "4 SME Low Risk N 5.709904e+09 5.559419e+09 \n", - "... ... ... ... ... ... \n", - "10009 Priority Banking Low Risk N 7.616620e+07 5.263062e+07 \n", - "10010 Others High Risk N 7.897534e+07 5.488447e+07 \n", - "10011 Others Low Risk N 7.897534e+07 5.488447e+07 \n", - "10012 Others Low Risk N 7.897534e+07 5.488447e+07 \n", - "10013 Mass Market Medium Risk N 7.921967e+07 5.290545e+07 \n", - "\n", - " P90_Wash \n", - "0 1.090121 \n", - "1 1.112059 \n", - "2 1.118816 \n", - "3 1.090121 \n", - "4 1.118816 \n", - "... ... \n", - "10009 1.921224 \n", - "10010 1.931817 \n", - "10011 1.931817 \n", - "10012 1.931817 \n", - "10013 1.915159 \n", - "\n", - "[10014 rows x 12 columns]" - ] - }, - "execution_count": 59, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "a" + "# a" ] }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 81, "id": "150bb5ce-6be1-44fc-a606-6d375354626d", "metadata": { "tags": [] diff --git a/main.py b/main.py index 5101e30..71fc2fc 100644 --- a/main.py +++ b/main.py @@ -114,7 +114,7 @@ query = """ """ -# In[57]: +# In[77]: from tms_data_interface import SQLQueryInterface @@ -142,7 +142,7 @@ class Scenario: df.groupby("Segment")[["Credit_transaction_amount", "Debit_transaction_amount", "Wash_Ratio"]] - .quantile(0.98) + .quantile(0.90) .reset_index() ) @@ -158,13 +158,13 @@ class Scenario: # Step 3: Identify customers above 90th percentile in ANY of the 3 metrics high_pop = ( - (df["Credit_transaction_amount"] > df["P90_Credit"]) | - (df["Debit_transaction_amount"] > df["P90_Debit"]) | + # (df["Credit_transaction_amount"] > df["P90_Credit"]) & + (df["Debit_transaction_amount"] > df["P90_Debit"]) & (df["Wash_Ratio"] > df["P90_Wash"]) ) # Step 4: Randomly select 0.1% sample from high-risk population - sample_fraction = 0.1 # 0.1% + sample_fraction = 0.1 # 0.1% high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index # Step 5: Set SAR_FLAG values @@ -173,20 +173,20 @@ class Scenario: return df -# In[58]: +# In[83]: -sen = Scenario() -a = sen.logic() +# sen = Scenario() +# a = sen.logic() -# In[59]: +# In[82]: -a +# a -# In[49]: +# In[81]: # a[a["SAR_FLAG"] == "Y"]