diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb
index 0632273..da32eb3 100644
--- a/.ipynb_checkpoints/main-checkpoint.ipynb
+++ b/.ipynb_checkpoints/main-checkpoint.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 93,
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
"metadata": {
"tags": []
@@ -14,7 +14,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 94,
"id": "2f9a4ca7-c066-4d93-9957-0d9145f9265d",
"metadata": {
"tags": []
@@ -57,7 +57,7 @@
},
{
"cell_type": "code",
- "execution_count": 54,
+ "execution_count": 95,
"id": "134d0b3d-5481-4975-af07-c80ab09d6dd2",
"metadata": {
"tags": []
@@ -157,7 +157,7 @@
},
{
"cell_type": "code",
- "execution_count": 84,
+ "execution_count": 101,
"id": "d220561a-34c9-48d2-8e2f-5d174a87540b",
"metadata": {
"tags": []
@@ -189,7 +189,7 @@
" df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n",
" \"Debit_transaction_amount\",\n",
" \"Wash_Ratio\"]]\n",
- " .quantile(0.90)\n",
+ " .quantile(0.95)\n",
" .reset_index()\n",
" )\n",
"\n",
@@ -207,11 +207,11 @@
" high_pop = (\n",
" # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n",
" (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n",
- " (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n",
+ " (df[\"Wash_Ratio\"] > 0.90)\n",
" )\n",
"\n",
" # Step 4: Randomly select 0.1% sample from high-risk population\n",
- " sample_fraction = 0.3 # 0.1%\n",
+ " sample_fraction = 0.1 # 0.1%\n",
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
"\n",
" # Step 5: Set SAR_FLAG values\n",
@@ -222,7 +222,7 @@
},
{
"cell_type": "code",
- "execution_count": 85,
+ "execution_count": 107,
"id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a",
"metadata": {
"tags": []
@@ -235,286 +235,19 @@
},
{
"cell_type": "code",
- "execution_count": 86,
+ "execution_count": 106,
"id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Focal_id | \n",
- " Credit_transaction_amount | \n",
- " Total_no_of_credit_transactions | \n",
- " Debit_transaction_amount | \n",
- " Total_no_of_debit_transactions | \n",
- " Wash_Ratio | \n",
- " Segment | \n",
- " Risk | \n",
- " SAR_FLAG | \n",
- " P90_Credit | \n",
- " P90_Debit | \n",
- " P90_Wash | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " PN489144 | \n",
- " 2830802741 | \n",
- " 2060 | \n",
- " 2847556186 | \n",
- " 1976 | \n",
- " 0.994117 | \n",
- " Whole Sale Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 4.400246e+09 | \n",
- " 4.332448e+09 | \n",
- " 1.058020 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " PN394780 | \n",
- " 2872685364 | \n",
- " 2029 | \n",
- " 2743931855 | \n",
- " 1999 | \n",
- " 1.046923 | \n",
- " Whole Sale Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 4.400246e+09 | \n",
- " 4.332448e+09 | \n",
- " 1.058020 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " PN195722 | \n",
- " 5604208368 | \n",
- " 3937 | \n",
- " 5557946505 | \n",
- " 4039 | \n",
- " 1.008324 | \n",
- " SME | \n",
- " Low Risk | \n",
- " N | \n",
- " 4.532321e+09 | \n",
- " 4.534860e+09 | \n",
- " 1.062759 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " PN652566 | \n",
- " 1630905248 | \n",
- " 1152 | \n",
- " 1686713614 | \n",
- " 1169 | \n",
- " 0.966913 | \n",
- " Whole Sale Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 4.400246e+09 | \n",
- " 4.332448e+09 | \n",
- " 1.058020 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " PN181960 | \n",
- " 2157634332 | \n",
- " 1613 | \n",
- " 2039953312 | \n",
- " 1552 | \n",
- " 1.057688 | \n",
- " Corporate Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 5.021582e+09 | \n",
- " 5.003501e+09 | \n",
- " 1.063161 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 10009 | \n",
- " PN479491 | \n",
- " 31124877 | \n",
- " 246 | \n",
- " 23590191 | \n",
- " 357 | \n",
- " 1.319399 | \n",
- " Private Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 4.461828e+07 | \n",
- " 3.176446e+07 | \n",
- " 1.760285 | \n",
- "
\n",
- " \n",
- " | 10010 | \n",
- " PN267550 | \n",
- " 36558708 | \n",
- " 260 | \n",
- " 27361057 | \n",
- " 366 | \n",
- " 1.336158 | \n",
- " Priority Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 4.410392e+07 | \n",
- " 3.076443e+07 | \n",
- " 1.729168 | \n",
- "
\n",
- " \n",
- " | 10011 | \n",
- " PN293003 | \n",
- " 33990478 | \n",
- " 255 | \n",
- " 24465835 | \n",
- " 323 | \n",
- " 1.389304 | \n",
- " Others | \n",
- " Low Risk | \n",
- " N | \n",
- " 6.334963e+07 | \n",
- " 4.223903e+07 | \n",
- " 1.740112 | \n",
- "
\n",
- " \n",
- " | 10012 | \n",
- " PN534105 | \n",
- " 39934813 | \n",
- " 278 | \n",
- " 28247858 | \n",
- " 403 | \n",
- " 1.413729 | \n",
- " Others | \n",
- " High Risk | \n",
- " N | \n",
- " 6.334963e+07 | \n",
- " 4.223903e+07 | \n",
- " 1.740112 | \n",
- "
\n",
- " \n",
- " | 10013 | \n",
- " PN390430 | \n",
- " 36894062 | \n",
- " 257 | \n",
- " 29162252 | \n",
- " 371 | \n",
- " 1.265131 | \n",
- " Private Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 4.461828e+07 | \n",
- " 3.176446e+07 | \n",
- " 1.760285 | \n",
- "
\n",
- " \n",
- "
\n",
- "
10014 rows × 12 columns
\n",
- "
"
- ],
- "text/plain": [
- " Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n",
- "0 PN489144 2830802741 2060 \n",
- "1 PN394780 2872685364 2029 \n",
- "2 PN195722 5604208368 3937 \n",
- "3 PN652566 1630905248 1152 \n",
- "4 PN181960 2157634332 1613 \n",
- "... ... ... ... \n",
- "10009 PN479491 31124877 246 \n",
- "10010 PN267550 36558708 260 \n",
- "10011 PN293003 33990478 255 \n",
- "10012 PN534105 39934813 278 \n",
- "10013 PN390430 36894062 257 \n",
- "\n",
- " Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n",
- "0 2847556186 1976 0.994117 \n",
- "1 2743931855 1999 1.046923 \n",
- "2 5557946505 4039 1.008324 \n",
- "3 1686713614 1169 0.966913 \n",
- "4 2039953312 1552 1.057688 \n",
- "... ... ... ... \n",
- "10009 23590191 357 1.319399 \n",
- "10010 27361057 366 1.336158 \n",
- "10011 24465835 323 1.389304 \n",
- "10012 28247858 403 1.413729 \n",
- "10013 29162252 371 1.265131 \n",
- "\n",
- " Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n",
- "0 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
- "1 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
- "2 SME Low Risk N 4.532321e+09 4.534860e+09 \n",
- "3 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
- "4 Corporate Banking Low Risk N 5.021582e+09 5.003501e+09 \n",
- "... ... ... ... ... ... \n",
- "10009 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n",
- "10010 Priority Banking Low Risk N 4.410392e+07 3.076443e+07 \n",
- "10011 Others Low Risk N 6.334963e+07 4.223903e+07 \n",
- "10012 Others High Risk N 6.334963e+07 4.223903e+07 \n",
- "10013 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n",
- "\n",
- " P90_Wash \n",
- "0 1.058020 \n",
- "1 1.058020 \n",
- "2 1.062759 \n",
- "3 1.058020 \n",
- "4 1.063161 \n",
- "... ... \n",
- "10009 1.760285 \n",
- "10010 1.729168 \n",
- "10011 1.740112 \n",
- "10012 1.740112 \n",
- "10013 1.760285 \n",
- "\n",
- "[10014 rows x 12 columns]"
- ]
- },
- "execution_count": 86,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# a"
]
},
{
"cell_type": "code",
- "execution_count": 88,
+ "execution_count": 105,
"id": "150bb5ce-6be1-44fc-a606-6d375354626d",
"metadata": {
"tags": []
diff --git a/main.ipynb b/main.ipynb
index 0632273..da32eb3 100644
--- a/main.ipynb
+++ b/main.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 93,
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
"metadata": {
"tags": []
@@ -14,7 +14,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 94,
"id": "2f9a4ca7-c066-4d93-9957-0d9145f9265d",
"metadata": {
"tags": []
@@ -57,7 +57,7 @@
},
{
"cell_type": "code",
- "execution_count": 54,
+ "execution_count": 95,
"id": "134d0b3d-5481-4975-af07-c80ab09d6dd2",
"metadata": {
"tags": []
@@ -157,7 +157,7 @@
},
{
"cell_type": "code",
- "execution_count": 84,
+ "execution_count": 101,
"id": "d220561a-34c9-48d2-8e2f-5d174a87540b",
"metadata": {
"tags": []
@@ -189,7 +189,7 @@
" df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n",
" \"Debit_transaction_amount\",\n",
" \"Wash_Ratio\"]]\n",
- " .quantile(0.90)\n",
+ " .quantile(0.95)\n",
" .reset_index()\n",
" )\n",
"\n",
@@ -207,11 +207,11 @@
" high_pop = (\n",
" # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n",
" (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n",
- " (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n",
+ " (df[\"Wash_Ratio\"] > 0.90)\n",
" )\n",
"\n",
" # Step 4: Randomly select 0.1% sample from high-risk population\n",
- " sample_fraction = 0.3 # 0.1%\n",
+ " sample_fraction = 0.1 # 0.1%\n",
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
"\n",
" # Step 5: Set SAR_FLAG values\n",
@@ -222,7 +222,7 @@
},
{
"cell_type": "code",
- "execution_count": 85,
+ "execution_count": 107,
"id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a",
"metadata": {
"tags": []
@@ -235,286 +235,19 @@
},
{
"cell_type": "code",
- "execution_count": 86,
+ "execution_count": 106,
"id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a",
"metadata": {
"tags": []
},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Focal_id | \n",
- " Credit_transaction_amount | \n",
- " Total_no_of_credit_transactions | \n",
- " Debit_transaction_amount | \n",
- " Total_no_of_debit_transactions | \n",
- " Wash_Ratio | \n",
- " Segment | \n",
- " Risk | \n",
- " SAR_FLAG | \n",
- " P90_Credit | \n",
- " P90_Debit | \n",
- " P90_Wash | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " PN489144 | \n",
- " 2830802741 | \n",
- " 2060 | \n",
- " 2847556186 | \n",
- " 1976 | \n",
- " 0.994117 | \n",
- " Whole Sale Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 4.400246e+09 | \n",
- " 4.332448e+09 | \n",
- " 1.058020 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " PN394780 | \n",
- " 2872685364 | \n",
- " 2029 | \n",
- " 2743931855 | \n",
- " 1999 | \n",
- " 1.046923 | \n",
- " Whole Sale Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 4.400246e+09 | \n",
- " 4.332448e+09 | \n",
- " 1.058020 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " PN195722 | \n",
- " 5604208368 | \n",
- " 3937 | \n",
- " 5557946505 | \n",
- " 4039 | \n",
- " 1.008324 | \n",
- " SME | \n",
- " Low Risk | \n",
- " N | \n",
- " 4.532321e+09 | \n",
- " 4.534860e+09 | \n",
- " 1.062759 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " PN652566 | \n",
- " 1630905248 | \n",
- " 1152 | \n",
- " 1686713614 | \n",
- " 1169 | \n",
- " 0.966913 | \n",
- " Whole Sale Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 4.400246e+09 | \n",
- " 4.332448e+09 | \n",
- " 1.058020 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " PN181960 | \n",
- " 2157634332 | \n",
- " 1613 | \n",
- " 2039953312 | \n",
- " 1552 | \n",
- " 1.057688 | \n",
- " Corporate Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 5.021582e+09 | \n",
- " 5.003501e+09 | \n",
- " 1.063161 | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 10009 | \n",
- " PN479491 | \n",
- " 31124877 | \n",
- " 246 | \n",
- " 23590191 | \n",
- " 357 | \n",
- " 1.319399 | \n",
- " Private Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 4.461828e+07 | \n",
- " 3.176446e+07 | \n",
- " 1.760285 | \n",
- "
\n",
- " \n",
- " | 10010 | \n",
- " PN267550 | \n",
- " 36558708 | \n",
- " 260 | \n",
- " 27361057 | \n",
- " 366 | \n",
- " 1.336158 | \n",
- " Priority Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 4.410392e+07 | \n",
- " 3.076443e+07 | \n",
- " 1.729168 | \n",
- "
\n",
- " \n",
- " | 10011 | \n",
- " PN293003 | \n",
- " 33990478 | \n",
- " 255 | \n",
- " 24465835 | \n",
- " 323 | \n",
- " 1.389304 | \n",
- " Others | \n",
- " Low Risk | \n",
- " N | \n",
- " 6.334963e+07 | \n",
- " 4.223903e+07 | \n",
- " 1.740112 | \n",
- "
\n",
- " \n",
- " | 10012 | \n",
- " PN534105 | \n",
- " 39934813 | \n",
- " 278 | \n",
- " 28247858 | \n",
- " 403 | \n",
- " 1.413729 | \n",
- " Others | \n",
- " High Risk | \n",
- " N | \n",
- " 6.334963e+07 | \n",
- " 4.223903e+07 | \n",
- " 1.740112 | \n",
- "
\n",
- " \n",
- " | 10013 | \n",
- " PN390430 | \n",
- " 36894062 | \n",
- " 257 | \n",
- " 29162252 | \n",
- " 371 | \n",
- " 1.265131 | \n",
- " Private Banking | \n",
- " Low Risk | \n",
- " N | \n",
- " 4.461828e+07 | \n",
- " 3.176446e+07 | \n",
- " 1.760285 | \n",
- "
\n",
- " \n",
- "
\n",
- "
10014 rows × 12 columns
\n",
- "
"
- ],
- "text/plain": [
- " Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n",
- "0 PN489144 2830802741 2060 \n",
- "1 PN394780 2872685364 2029 \n",
- "2 PN195722 5604208368 3937 \n",
- "3 PN652566 1630905248 1152 \n",
- "4 PN181960 2157634332 1613 \n",
- "... ... ... ... \n",
- "10009 PN479491 31124877 246 \n",
- "10010 PN267550 36558708 260 \n",
- "10011 PN293003 33990478 255 \n",
- "10012 PN534105 39934813 278 \n",
- "10013 PN390430 36894062 257 \n",
- "\n",
- " Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n",
- "0 2847556186 1976 0.994117 \n",
- "1 2743931855 1999 1.046923 \n",
- "2 5557946505 4039 1.008324 \n",
- "3 1686713614 1169 0.966913 \n",
- "4 2039953312 1552 1.057688 \n",
- "... ... ... ... \n",
- "10009 23590191 357 1.319399 \n",
- "10010 27361057 366 1.336158 \n",
- "10011 24465835 323 1.389304 \n",
- "10012 28247858 403 1.413729 \n",
- "10013 29162252 371 1.265131 \n",
- "\n",
- " Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n",
- "0 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
- "1 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
- "2 SME Low Risk N 4.532321e+09 4.534860e+09 \n",
- "3 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
- "4 Corporate Banking Low Risk N 5.021582e+09 5.003501e+09 \n",
- "... ... ... ... ... ... \n",
- "10009 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n",
- "10010 Priority Banking Low Risk N 4.410392e+07 3.076443e+07 \n",
- "10011 Others Low Risk N 6.334963e+07 4.223903e+07 \n",
- "10012 Others High Risk N 6.334963e+07 4.223903e+07 \n",
- "10013 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n",
- "\n",
- " P90_Wash \n",
- "0 1.058020 \n",
- "1 1.058020 \n",
- "2 1.062759 \n",
- "3 1.058020 \n",
- "4 1.063161 \n",
- "... ... \n",
- "10009 1.760285 \n",
- "10010 1.729168 \n",
- "10011 1.740112 \n",
- "10012 1.740112 \n",
- "10013 1.760285 \n",
- "\n",
- "[10014 rows x 12 columns]"
- ]
- },
- "execution_count": 86,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# a"
]
},
{
"cell_type": "code",
- "execution_count": 88,
+ "execution_count": 105,
"id": "150bb5ce-6be1-44fc-a606-6d375354626d",
"metadata": {
"tags": []
diff --git a/main.py b/main.py
index 205b8b8..d7c13b2 100644
--- a/main.py
+++ b/main.py
@@ -1,13 +1,13 @@
#!/usr/bin/env python
# coding: utf-8
-# In[53]:
+# In[93]:
import pandas as pd
-# In[43]:
+# In[94]:
from tms_data_interface import SQLQueryInterface
@@ -20,7 +20,7 @@ seq = SQLQueryInterface(schema="transactionschema")
seq.execute_raw("show tables")
-# In[54]:
+# In[95]:
query = """
@@ -114,7 +114,7 @@ query = """
"""
-# In[84]:
+# In[101]:
from tms_data_interface import SQLQueryInterface
@@ -142,7 +142,7 @@ class Scenario:
df.groupby("Segment")[["Credit_transaction_amount",
"Debit_transaction_amount",
"Wash_Ratio"]]
- .quantile(0.90)
+ .quantile(0.95)
.reset_index()
)
@@ -160,11 +160,11 @@ class Scenario:
high_pop = (
# (df["Credit_transaction_amount"] > df["P90_Credit"]) &
(df["Debit_transaction_amount"] > df["P90_Debit"]) &
- (df["Wash_Ratio"] > df["P90_Wash"])
+ (df["Wash_Ratio"] > 0.90)
)
# Step 4: Randomly select 0.1% sample from high-risk population
- sample_fraction = 0.3 # 0.1%
+ sample_fraction = 0.1 # 0.1%
high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index
# Step 5: Set SAR_FLAG values
@@ -173,20 +173,20 @@ class Scenario:
return df
-# In[85]:
+# In[107]:
# sen = Scenario()
# a = sen.logic()
-# In[86]:
+# In[106]:
# a
-# In[88]:
+# In[105]:
# a[a["SAR_FLAG"] == "Y"]