System save at 27/11/2025 12:45 by user_client2024

This commit is contained in:
user_client2024 2025-11-27 07:15:08 +00:00
parent 5f0be70e69
commit bc1de694e9
3 changed files with 32 additions and 566 deletions

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 93,
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
"metadata": {
"tags": []
@ -14,7 +14,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 94,
"id": "2f9a4ca7-c066-4d93-9957-0d9145f9265d",
"metadata": {
"tags": []
@ -57,7 +57,7 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 95,
"id": "134d0b3d-5481-4975-af07-c80ab09d6dd2",
"metadata": {
"tags": []
@ -157,7 +157,7 @@
},
{
"cell_type": "code",
"execution_count": 84,
"execution_count": 101,
"id": "d220561a-34c9-48d2-8e2f-5d174a87540b",
"metadata": {
"tags": []
@ -189,7 +189,7 @@
" df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n",
" \"Debit_transaction_amount\",\n",
" \"Wash_Ratio\"]]\n",
" .quantile(0.90)\n",
" .quantile(0.95)\n",
" .reset_index()\n",
" )\n",
"\n",
@ -207,11 +207,11 @@
" high_pop = (\n",
" # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n",
" (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n",
" (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n",
" (df[\"Wash_Ratio\"] > 0.90)\n",
" )\n",
"\n",
" # Step 4: Randomly select 0.1% sample from high-risk population\n",
" sample_fraction = 0.3 # 0.1%\n",
" sample_fraction = 0.1 # 0.1%\n",
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
"\n",
" # Step 5: Set SAR_FLAG values\n",
@ -222,7 +222,7 @@
},
{
"cell_type": "code",
"execution_count": 85,
"execution_count": 107,
"id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a",
"metadata": {
"tags": []
@ -235,286 +235,19 @@
},
{
"cell_type": "code",
"execution_count": 86,
"execution_count": 106,
"id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Focal_id</th>\n",
" <th>Credit_transaction_amount</th>\n",
" <th>Total_no_of_credit_transactions</th>\n",
" <th>Debit_transaction_amount</th>\n",
" <th>Total_no_of_debit_transactions</th>\n",
" <th>Wash_Ratio</th>\n",
" <th>Segment</th>\n",
" <th>Risk</th>\n",
" <th>SAR_FLAG</th>\n",
" <th>P90_Credit</th>\n",
" <th>P90_Debit</th>\n",
" <th>P90_Wash</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>PN489144</td>\n",
" <td>2830802741</td>\n",
" <td>2060</td>\n",
" <td>2847556186</td>\n",
" <td>1976</td>\n",
" <td>0.994117</td>\n",
" <td>Whole Sale Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.400246e+09</td>\n",
" <td>4.332448e+09</td>\n",
" <td>1.058020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>PN394780</td>\n",
" <td>2872685364</td>\n",
" <td>2029</td>\n",
" <td>2743931855</td>\n",
" <td>1999</td>\n",
" <td>1.046923</td>\n",
" <td>Whole Sale Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.400246e+09</td>\n",
" <td>4.332448e+09</td>\n",
" <td>1.058020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>PN195722</td>\n",
" <td>5604208368</td>\n",
" <td>3937</td>\n",
" <td>5557946505</td>\n",
" <td>4039</td>\n",
" <td>1.008324</td>\n",
" <td>SME</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.532321e+09</td>\n",
" <td>4.534860e+09</td>\n",
" <td>1.062759</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>PN652566</td>\n",
" <td>1630905248</td>\n",
" <td>1152</td>\n",
" <td>1686713614</td>\n",
" <td>1169</td>\n",
" <td>0.966913</td>\n",
" <td>Whole Sale Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.400246e+09</td>\n",
" <td>4.332448e+09</td>\n",
" <td>1.058020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>PN181960</td>\n",
" <td>2157634332</td>\n",
" <td>1613</td>\n",
" <td>2039953312</td>\n",
" <td>1552</td>\n",
" <td>1.057688</td>\n",
" <td>Corporate Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>5.021582e+09</td>\n",
" <td>5.003501e+09</td>\n",
" <td>1.063161</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10009</th>\n",
" <td>PN479491</td>\n",
" <td>31124877</td>\n",
" <td>246</td>\n",
" <td>23590191</td>\n",
" <td>357</td>\n",
" <td>1.319399</td>\n",
" <td>Private Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.461828e+07</td>\n",
" <td>3.176446e+07</td>\n",
" <td>1.760285</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10010</th>\n",
" <td>PN267550</td>\n",
" <td>36558708</td>\n",
" <td>260</td>\n",
" <td>27361057</td>\n",
" <td>366</td>\n",
" <td>1.336158</td>\n",
" <td>Priority Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.410392e+07</td>\n",
" <td>3.076443e+07</td>\n",
" <td>1.729168</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10011</th>\n",
" <td>PN293003</td>\n",
" <td>33990478</td>\n",
" <td>255</td>\n",
" <td>24465835</td>\n",
" <td>323</td>\n",
" <td>1.389304</td>\n",
" <td>Others</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>6.334963e+07</td>\n",
" <td>4.223903e+07</td>\n",
" <td>1.740112</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10012</th>\n",
" <td>PN534105</td>\n",
" <td>39934813</td>\n",
" <td>278</td>\n",
" <td>28247858</td>\n",
" <td>403</td>\n",
" <td>1.413729</td>\n",
" <td>Others</td>\n",
" <td>High Risk</td>\n",
" <td>N</td>\n",
" <td>6.334963e+07</td>\n",
" <td>4.223903e+07</td>\n",
" <td>1.740112</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10013</th>\n",
" <td>PN390430</td>\n",
" <td>36894062</td>\n",
" <td>257</td>\n",
" <td>29162252</td>\n",
" <td>371</td>\n",
" <td>1.265131</td>\n",
" <td>Private Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.461828e+07</td>\n",
" <td>3.176446e+07</td>\n",
" <td>1.760285</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10014 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n",
"0 PN489144 2830802741 2060 \n",
"1 PN394780 2872685364 2029 \n",
"2 PN195722 5604208368 3937 \n",
"3 PN652566 1630905248 1152 \n",
"4 PN181960 2157634332 1613 \n",
"... ... ... ... \n",
"10009 PN479491 31124877 246 \n",
"10010 PN267550 36558708 260 \n",
"10011 PN293003 33990478 255 \n",
"10012 PN534105 39934813 278 \n",
"10013 PN390430 36894062 257 \n",
"\n",
" Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n",
"0 2847556186 1976 0.994117 \n",
"1 2743931855 1999 1.046923 \n",
"2 5557946505 4039 1.008324 \n",
"3 1686713614 1169 0.966913 \n",
"4 2039953312 1552 1.057688 \n",
"... ... ... ... \n",
"10009 23590191 357 1.319399 \n",
"10010 27361057 366 1.336158 \n",
"10011 24465835 323 1.389304 \n",
"10012 28247858 403 1.413729 \n",
"10013 29162252 371 1.265131 \n",
"\n",
" Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n",
"0 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
"1 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
"2 SME Low Risk N 4.532321e+09 4.534860e+09 \n",
"3 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
"4 Corporate Banking Low Risk N 5.021582e+09 5.003501e+09 \n",
"... ... ... ... ... ... \n",
"10009 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n",
"10010 Priority Banking Low Risk N 4.410392e+07 3.076443e+07 \n",
"10011 Others Low Risk N 6.334963e+07 4.223903e+07 \n",
"10012 Others High Risk N 6.334963e+07 4.223903e+07 \n",
"10013 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n",
"\n",
" P90_Wash \n",
"0 1.058020 \n",
"1 1.058020 \n",
"2 1.062759 \n",
"3 1.058020 \n",
"4 1.063161 \n",
"... ... \n",
"10009 1.760285 \n",
"10010 1.729168 \n",
"10011 1.740112 \n",
"10012 1.740112 \n",
"10013 1.760285 \n",
"\n",
"[10014 rows x 12 columns]"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# a"
]
},
{
"cell_type": "code",
"execution_count": 88,
"execution_count": 105,
"id": "150bb5ce-6be1-44fc-a606-6d375354626d",
"metadata": {
"tags": []

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 93,
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
"metadata": {
"tags": []
@ -14,7 +14,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 94,
"id": "2f9a4ca7-c066-4d93-9957-0d9145f9265d",
"metadata": {
"tags": []
@ -57,7 +57,7 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 95,
"id": "134d0b3d-5481-4975-af07-c80ab09d6dd2",
"metadata": {
"tags": []
@ -157,7 +157,7 @@
},
{
"cell_type": "code",
"execution_count": 84,
"execution_count": 101,
"id": "d220561a-34c9-48d2-8e2f-5d174a87540b",
"metadata": {
"tags": []
@ -189,7 +189,7 @@
" df.groupby(\"Segment\")[[\"Credit_transaction_amount\",\n",
" \"Debit_transaction_amount\",\n",
" \"Wash_Ratio\"]]\n",
" .quantile(0.90)\n",
" .quantile(0.95)\n",
" .reset_index()\n",
" )\n",
"\n",
@ -207,11 +207,11 @@
" high_pop = (\n",
" # (df[\"Credit_transaction_amount\"] > df[\"P90_Credit\"]) &\n",
" (df[\"Debit_transaction_amount\"] > df[\"P90_Debit\"]) &\n",
" (df[\"Wash_Ratio\"] > df[\"P90_Wash\"])\n",
" (df[\"Wash_Ratio\"] > 0.90)\n",
" )\n",
"\n",
" # Step 4: Randomly select 0.1% sample from high-risk population\n",
" sample_fraction = 0.3 # 0.1%\n",
" sample_fraction = 0.1 # 0.1%\n",
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
"\n",
" # Step 5: Set SAR_FLAG values\n",
@ -222,7 +222,7 @@
},
{
"cell_type": "code",
"execution_count": 85,
"execution_count": 107,
"id": "2e5a0ea9-64cd-4a8d-9a5d-e5e7b36a401a",
"metadata": {
"tags": []
@ -235,286 +235,19 @@
},
{
"cell_type": "code",
"execution_count": 86,
"execution_count": 106,
"id": "830c7ec3-9707-46db-9b27-ac4f9d46a03a",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Focal_id</th>\n",
" <th>Credit_transaction_amount</th>\n",
" <th>Total_no_of_credit_transactions</th>\n",
" <th>Debit_transaction_amount</th>\n",
" <th>Total_no_of_debit_transactions</th>\n",
" <th>Wash_Ratio</th>\n",
" <th>Segment</th>\n",
" <th>Risk</th>\n",
" <th>SAR_FLAG</th>\n",
" <th>P90_Credit</th>\n",
" <th>P90_Debit</th>\n",
" <th>P90_Wash</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>PN489144</td>\n",
" <td>2830802741</td>\n",
" <td>2060</td>\n",
" <td>2847556186</td>\n",
" <td>1976</td>\n",
" <td>0.994117</td>\n",
" <td>Whole Sale Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.400246e+09</td>\n",
" <td>4.332448e+09</td>\n",
" <td>1.058020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>PN394780</td>\n",
" <td>2872685364</td>\n",
" <td>2029</td>\n",
" <td>2743931855</td>\n",
" <td>1999</td>\n",
" <td>1.046923</td>\n",
" <td>Whole Sale Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.400246e+09</td>\n",
" <td>4.332448e+09</td>\n",
" <td>1.058020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>PN195722</td>\n",
" <td>5604208368</td>\n",
" <td>3937</td>\n",
" <td>5557946505</td>\n",
" <td>4039</td>\n",
" <td>1.008324</td>\n",
" <td>SME</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.532321e+09</td>\n",
" <td>4.534860e+09</td>\n",
" <td>1.062759</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>PN652566</td>\n",
" <td>1630905248</td>\n",
" <td>1152</td>\n",
" <td>1686713614</td>\n",
" <td>1169</td>\n",
" <td>0.966913</td>\n",
" <td>Whole Sale Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.400246e+09</td>\n",
" <td>4.332448e+09</td>\n",
" <td>1.058020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>PN181960</td>\n",
" <td>2157634332</td>\n",
" <td>1613</td>\n",
" <td>2039953312</td>\n",
" <td>1552</td>\n",
" <td>1.057688</td>\n",
" <td>Corporate Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>5.021582e+09</td>\n",
" <td>5.003501e+09</td>\n",
" <td>1.063161</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10009</th>\n",
" <td>PN479491</td>\n",
" <td>31124877</td>\n",
" <td>246</td>\n",
" <td>23590191</td>\n",
" <td>357</td>\n",
" <td>1.319399</td>\n",
" <td>Private Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.461828e+07</td>\n",
" <td>3.176446e+07</td>\n",
" <td>1.760285</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10010</th>\n",
" <td>PN267550</td>\n",
" <td>36558708</td>\n",
" <td>260</td>\n",
" <td>27361057</td>\n",
" <td>366</td>\n",
" <td>1.336158</td>\n",
" <td>Priority Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.410392e+07</td>\n",
" <td>3.076443e+07</td>\n",
" <td>1.729168</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10011</th>\n",
" <td>PN293003</td>\n",
" <td>33990478</td>\n",
" <td>255</td>\n",
" <td>24465835</td>\n",
" <td>323</td>\n",
" <td>1.389304</td>\n",
" <td>Others</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>6.334963e+07</td>\n",
" <td>4.223903e+07</td>\n",
" <td>1.740112</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10012</th>\n",
" <td>PN534105</td>\n",
" <td>39934813</td>\n",
" <td>278</td>\n",
" <td>28247858</td>\n",
" <td>403</td>\n",
" <td>1.413729</td>\n",
" <td>Others</td>\n",
" <td>High Risk</td>\n",
" <td>N</td>\n",
" <td>6.334963e+07</td>\n",
" <td>4.223903e+07</td>\n",
" <td>1.740112</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10013</th>\n",
" <td>PN390430</td>\n",
" <td>36894062</td>\n",
" <td>257</td>\n",
" <td>29162252</td>\n",
" <td>371</td>\n",
" <td>1.265131</td>\n",
" <td>Private Banking</td>\n",
" <td>Low Risk</td>\n",
" <td>N</td>\n",
" <td>4.461828e+07</td>\n",
" <td>3.176446e+07</td>\n",
" <td>1.760285</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10014 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" Focal_id Credit_transaction_amount Total_no_of_credit_transactions \\\n",
"0 PN489144 2830802741 2060 \n",
"1 PN394780 2872685364 2029 \n",
"2 PN195722 5604208368 3937 \n",
"3 PN652566 1630905248 1152 \n",
"4 PN181960 2157634332 1613 \n",
"... ... ... ... \n",
"10009 PN479491 31124877 246 \n",
"10010 PN267550 36558708 260 \n",
"10011 PN293003 33990478 255 \n",
"10012 PN534105 39934813 278 \n",
"10013 PN390430 36894062 257 \n",
"\n",
" Debit_transaction_amount Total_no_of_debit_transactions Wash_Ratio \\\n",
"0 2847556186 1976 0.994117 \n",
"1 2743931855 1999 1.046923 \n",
"2 5557946505 4039 1.008324 \n",
"3 1686713614 1169 0.966913 \n",
"4 2039953312 1552 1.057688 \n",
"... ... ... ... \n",
"10009 23590191 357 1.319399 \n",
"10010 27361057 366 1.336158 \n",
"10011 24465835 323 1.389304 \n",
"10012 28247858 403 1.413729 \n",
"10013 29162252 371 1.265131 \n",
"\n",
" Segment Risk SAR_FLAG P90_Credit P90_Debit \\\n",
"0 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
"1 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
"2 SME Low Risk N 4.532321e+09 4.534860e+09 \n",
"3 Whole Sale Banking Low Risk N 4.400246e+09 4.332448e+09 \n",
"4 Corporate Banking Low Risk N 5.021582e+09 5.003501e+09 \n",
"... ... ... ... ... ... \n",
"10009 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n",
"10010 Priority Banking Low Risk N 4.410392e+07 3.076443e+07 \n",
"10011 Others Low Risk N 6.334963e+07 4.223903e+07 \n",
"10012 Others High Risk N 6.334963e+07 4.223903e+07 \n",
"10013 Private Banking Low Risk N 4.461828e+07 3.176446e+07 \n",
"\n",
" P90_Wash \n",
"0 1.058020 \n",
"1 1.058020 \n",
"2 1.062759 \n",
"3 1.058020 \n",
"4 1.063161 \n",
"... ... \n",
"10009 1.760285 \n",
"10010 1.729168 \n",
"10011 1.740112 \n",
"10012 1.740112 \n",
"10013 1.760285 \n",
"\n",
"[10014 rows x 12 columns]"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"# a"
]
},
{
"cell_type": "code",
"execution_count": 88,
"execution_count": 105,
"id": "150bb5ce-6be1-44fc-a606-6d375354626d",
"metadata": {
"tags": []

20
main.py
View File

@ -1,13 +1,13 @@
#!/usr/bin/env python
# coding: utf-8
# In[53]:
# In[93]:
import pandas as pd
# In[43]:
# In[94]:
from tms_data_interface import SQLQueryInterface
@ -20,7 +20,7 @@ seq = SQLQueryInterface(schema="transactionschema")
seq.execute_raw("show tables")
# In[54]:
# In[95]:
query = """
@ -114,7 +114,7 @@ query = """
"""
# In[84]:
# In[101]:
from tms_data_interface import SQLQueryInterface
@ -142,7 +142,7 @@ class Scenario:
df.groupby("Segment")[["Credit_transaction_amount",
"Debit_transaction_amount",
"Wash_Ratio"]]
.quantile(0.90)
.quantile(0.95)
.reset_index()
)
@ -160,11 +160,11 @@ class Scenario:
high_pop = (
# (df["Credit_transaction_amount"] > df["P90_Credit"]) &
(df["Debit_transaction_amount"] > df["P90_Debit"]) &
(df["Wash_Ratio"] > df["P90_Wash"])
(df["Wash_Ratio"] > 0.90)
)
# Step 4: Randomly select 0.1% sample from high-risk population
sample_fraction = 0.3 # 0.1%
sample_fraction = 0.1 # 0.1%
high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index
# Step 5: Set SAR_FLAG values
@ -173,20 +173,20 @@ class Scenario:
return df
# In[85]:
# In[107]:
# sen = Scenario()
# a = sen.logic()
# In[86]:
# In[106]:
# a
# In[88]:
# In[105]:
# a[a["SAR_FLAG"] == "Y"]