diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb index 10a09d0..774458a 100644 --- a/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/.ipynb_checkpoints/main-checkpoint.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 42, + "execution_count": 53, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 54, "id": "134d0b3d-5481-4975-af07-c80ab09d6dd2", "metadata": { "tags": [] @@ -157,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 57, "id": "d220561a-34c9-48d2-8e2f-5d174a87540b", "metadata": { "tags": [] @@ -181,10 +181,9 @@ " \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " df[[\"Credit_transaction_amount\",\n", - " \"Debit_transaction_amount\",\n", - " \"Wash_Ratio\"]] = df[[\"Credit_transaction_amount\",\n", - " \"Debit_transaction_amount\",\n", - " \"Wash_Ratio\"]].astype('int')\n", + " \"Debit_transaction_amount\"]] = df[[\"Credit_transaction_amount\",\n", + " \"Debit_transaction_amount\"]].astype('int')\n", + " df[\"Wash_Ratio\"] = df[\"Wash_Ratio\"].astype('float')\n", " # Step 1: Compute 90th percentiles per Segment for all 3 fields\n", " percentiles = (\n", " df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n", @@ -223,27 +222,294 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 58, "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# sen = Scenario()\n", - "# a = sen.logic()" + "sen = Scenario()\n", + "a = sen.logic()" ] }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 59, "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Focal_idCredit_transaction_amountTotal_no_of_credit_transactionsDebit_transaction_amountTotal_no_of_debit_transactionsWash_RatioSegmentRiskSAR_FLAGP90_CreditP90_DebitP90_Wash
0PN47871028053523122020278712630920251.006539Corporate BankingLow RiskN6.274828e+096.259298e+091.090121
1PN48312538900521352797396888211328500.980138Govt. EntitiesLow RiskN6.112897e+096.072409e+091.112059
2PN89040341362960832937399978506328241.034130SMELow RiskN5.709904e+095.559419e+091.118816
3PN53147541836739822861398706816827701.049311Corporate BankingLow RiskN6.274828e+096.259298e+091.090121
4PN14772217755946151225164155922212211.081651SMELow RiskN5.709904e+095.559419e+091.118816
.......................................
10009PN95505931106290264252661303691.231146Priority BankingLow RiskN7.616620e+075.263062e+071.921224
10010PN60206729780658238277964484051.071384OthersHigh RiskN7.897534e+075.488447e+071.931817
10011PN21348741410071274238968443681.732868OthersLow RiskN7.897534e+075.488447e+071.931817
10012PN56306534009021251325635823751.044388OthersLow RiskN7.897534e+075.488447e+071.931817
10013PN38887530904340236219382663441.408696Mass MarketMedium RiskN7.921967e+075.290545e+071.915159
\n", + "

10014 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n", + "0 PN478710 2805352312 2020 \n", + "1 PN483125 3890052135 2797 \n", + "2 PN890403 4136296083 2937 \n", + "3 PN531475 4183673982 2861 \n", + "4 PN147722 1775594615 1225 \n", + "... ... ... ... \n", + "10009 PN955059 31106290 264 \n", + "10010 PN602067 29780658 238 \n", + "10011 PN213487 41410071 274 \n", + "10012 PN563065 34009021 251 \n", + "10013 PN388875 30904340 236 \n", + "\n", + " Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n", + "0 2787126309 2025 1.006539 \n", + "1 3968882113 2850 0.980138 \n", + "2 3999785063 2824 1.034130 \n", + "3 3987068168 2770 1.049311 \n", + "4 1641559222 1221 1.081651 \n", + "... ... ... ... \n", + "10009 25266130 369 1.231146 \n", + "10010 27796448 405 1.071384 \n", + "10011 23896844 368 1.732868 \n", + "10012 32563582 375 1.044388 \n", + "10013 21938266 344 1.408696 \n", + "\n", + " Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n", + "0 Corporate Banking Low Risk N 6.274828e+09 6.259298e+09 \n", + "1 Govt. Entities Low Risk N 6.112897e+09 6.072409e+09 \n", + "2 SME Low Risk N 5.709904e+09 5.559419e+09 \n", + "3 Corporate Banking Low Risk N 6.274828e+09 6.259298e+09 \n", + "4 SME Low Risk N 5.709904e+09 5.559419e+09 \n", + "... ... ... ... ... ... \n", + "10009 Priority Banking Low Risk N 7.616620e+07 5.263062e+07 \n", + "10010 Others High Risk N 7.897534e+07 5.488447e+07 \n", + "10011 Others Low Risk N 7.897534e+07 5.488447e+07 \n", + "10012 Others Low Risk N 7.897534e+07 5.488447e+07 \n", + "10013 Mass Market Medium Risk N 7.921967e+07 5.290545e+07 \n", + "\n", + " P90_Wash \n", + "0 1.090121 \n", + "1 1.112059 \n", + "2 1.118816 \n", + "3 1.090121 \n", + "4 1.118816 \n", + "... ... \n", + "10009 1.921224 \n", + "10010 1.931817 \n", + "10011 1.931817 \n", + "10012 1.931817 \n", + "10013 1.915159 \n", + "\n", + "[10014 rows x 12 columns]" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# a" + "a" ] }, { diff --git a/main.ipynb b/main.ipynb index 10a09d0..774458a 100644 --- a/main.ipynb +++ b/main.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 42, + "execution_count": 53, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 54, "id": "134d0b3d-5481-4975-af07-c80ab09d6dd2", "metadata": { "tags": [] @@ -157,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 57, "id": "d220561a-34c9-48d2-8e2f-5d174a87540b", "metadata": { "tags": [] @@ -181,10 +181,9 @@ " \"Wash_Ratio\", \"Segment\", \"Risk\", \"SAR_FLAG\"]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " df[[\"Credit_transaction_amount\",\n", - " \"Debit_transaction_amount\",\n", - " \"Wash_Ratio\"]] = df[[\"Credit_transaction_amount\",\n", - " \"Debit_transaction_amount\",\n", - " \"Wash_Ratio\"]].astype('int')\n", + " \"Debit_transaction_amount\"]] = df[[\"Credit_transaction_amount\",\n", + " \"Debit_transaction_amount\"]].astype('int')\n", + " df[\"Wash_Ratio\"] = df[\"Wash_Ratio\"].astype('float')\n", " # Step 1: Compute 90th percentiles per Segment for all 3 fields\n", " percentiles = (\n", " df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n", @@ -223,27 +222,294 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 58, "id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# sen = Scenario()\n", - "# a = sen.logic()" + "sen = Scenario()\n", + "a = sen.logic()" ] }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 59, "id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Focal_idCredit_transaction_amountTotal_no_of_credit_transactionsDebit_transaction_amountTotal_no_of_debit_transactionsWash_RatioSegmentRiskSAR_FLAGP90_CreditP90_DebitP90_Wash
0PN47871028053523122020278712630920251.006539Corporate BankingLow RiskN6.274828e+096.259298e+091.090121
1PN48312538900521352797396888211328500.980138Govt. EntitiesLow RiskN6.112897e+096.072409e+091.112059
2PN89040341362960832937399978506328241.034130SMELow RiskN5.709904e+095.559419e+091.118816
3PN53147541836739822861398706816827701.049311Corporate BankingLow RiskN6.274828e+096.259298e+091.090121
4PN14772217755946151225164155922212211.081651SMELow RiskN5.709904e+095.559419e+091.118816
.......................................
10009PN95505931106290264252661303691.231146Priority BankingLow RiskN7.616620e+075.263062e+071.921224
10010PN60206729780658238277964484051.071384OthersHigh RiskN7.897534e+075.488447e+071.931817
10011PN21348741410071274238968443681.732868OthersLow RiskN7.897534e+075.488447e+071.931817
10012PN56306534009021251325635823751.044388OthersLow RiskN7.897534e+075.488447e+071.931817
10013PN38887530904340236219382663441.408696Mass MarketMedium RiskN7.921967e+075.290545e+071.915159
\n", + "

10014 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n", + "0 PN478710 2805352312 2020 \n", + "1 PN483125 3890052135 2797 \n", + "2 PN890403 4136296083 2937 \n", + "3 PN531475 4183673982 2861 \n", + "4 PN147722 1775594615 1225 \n", + "... ... ... ... \n", + "10009 PN955059 31106290 264 \n", + "10010 PN602067 29780658 238 \n", + "10011 PN213487 41410071 274 \n", + "10012 PN563065 34009021 251 \n", + "10013 PN388875 30904340 236 \n", + "\n", + " Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n", + "0 2787126309 2025 1.006539 \n", + "1 3968882113 2850 0.980138 \n", + "2 3999785063 2824 1.034130 \n", + "3 3987068168 2770 1.049311 \n", + "4 1641559222 1221 1.081651 \n", + "... ... ... ... \n", + "10009 25266130 369 1.231146 \n", + "10010 27796448 405 1.071384 \n", + "10011 23896844 368 1.732868 \n", + "10012 32563582 375 1.044388 \n", + "10013 21938266 344 1.408696 \n", + "\n", + " Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n", + "0 Corporate Banking Low Risk N 6.274828e+09 6.259298e+09 \n", + "1 Govt. Entities Low Risk N 6.112897e+09 6.072409e+09 \n", + "2 SME Low Risk N 5.709904e+09 5.559419e+09 \n", + "3 Corporate Banking Low Risk N 6.274828e+09 6.259298e+09 \n", + "4 SME Low Risk N 5.709904e+09 5.559419e+09 \n", + "... ... ... ... ... ... \n", + "10009 Priority Banking Low Risk N 7.616620e+07 5.263062e+07 \n", + "10010 Others High Risk N 7.897534e+07 5.488447e+07 \n", + "10011 Others Low Risk N 7.897534e+07 5.488447e+07 \n", + "10012 Others Low Risk N 7.897534e+07 5.488447e+07 \n", + "10013 Mass Market Medium Risk N 7.921967e+07 5.290545e+07 \n", + "\n", + " P90_Wash \n", + "0 1.090121 \n", + "1 1.112059 \n", + "2 1.118816 \n", + "3 1.090121 \n", + "4 1.118816 \n", + "... ... \n", + "10009 1.921224 \n", + "10010 1.931817 \n", + "10011 1.931817 \n", + "10012 1.931817 \n", + "10013 1.915159 \n", + "\n", + "[10014 rows x 12 columns]" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# a" + "a" ] }, { diff --git a/main.py b/main.py index 3e7f4ba..5101e30 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # coding: utf-8 -# In[42]: +# In[53]: import pandas as pd @@ -20,7 +20,7 @@ seq = SQLQueryInterface(schema="transactionschema") seq.execute_raw("show tables") -# In[44]: +# In[54]: query = """ @@ -114,7 +114,7 @@ query = """ """ -# In[45]: +# In[57]: from tms_data_interface import SQLQueryInterface @@ -134,10 +134,9 @@ class Scenario: "Wash_Ratio", "Segment", "Risk", "SAR_FLAG"] df = pd.DataFrame(row_list, columns = cols) df[["Credit_transaction_amount", - "Debit_transaction_amount", - "Wash_Ratio"]] = df[["Credit_transaction_amount", - "Debit_transaction_amount", - "Wash_Ratio"]].astype('int') + "Debit_transaction_amount"]] = df[["Credit_transaction_amount", + "Debit_transaction_amount"]].astype('int') + df["Wash_Ratio"] = df["Wash_Ratio"].astype('float') # Step 1: Compute 90th percentiles per Segment for all 3 fields percentiles = ( df.groupby("Segment")[["Credit_transaction_amount", @@ -174,17 +173,17 @@ class Scenario: return df -# In[51]: +# In[58]: -# sen = Scenario() -# a = sen.logic() +sen = Scenario() +a = sen.logic() -# In[50]: +# In[59]: -# a +a # In[49]: