System save at 27/11/2025 10:18 by user_client2024

This commit is contained in:
user_client2024 2025-11-27 04:48:20 +00:00
parent 1dc9b40c55
commit a088b1f19d
3 changed files with 70 additions and 28 deletions

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 3,
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -14,7 +14,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 4,
"id": "f35b1262-3c20-44a6-bbd3-2679a15551e6", "id": "f35b1262-3c20-44a6-bbd3-2679a15551e6",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -27,7 +27,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 5,
"id": "e52124e8-4f62-449d-8852-1e04f8c01ecc", "id": "e52124e8-4f62-449d-8852-1e04f8c01ecc",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -36,17 +36,20 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[['account_data_v1'],\n", "[['abhitesttable'],\n",
" ['abhitesttable22'],\n",
" ['account_data_v1'],\n",
" ['account_data_v2'],\n", " ['account_data_v2'],\n",
" ['alert_data_v1'],\n", " ['alert_data_v1'],\n",
" ['alert_data_v2'],\n", " ['alert_data_v2'],\n",
" ['customer_data_v1'],\n", " ['customer_data_v1'],\n",
" ['customer_data_v2'],\n", " ['customer_data_v2'],\n",
" ['percentile_dist'],\n",
" ['transaction10m'],\n", " ['transaction10m'],\n",
" ['transaction60m']]" " ['transaction60m']]"
] ]
}, },
"execution_count": 3, "execution_count": 5,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -57,7 +60,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 6,
"id": "dda35e8d-8997-42d4-a472-844c208d0f49", "id": "dda35e8d-8997-42d4-a472-844c208d0f49",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -127,7 +130,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 25,
"id": "fb0405fe-cd10-4da1-9f06-fe52cff942b4", "id": "fb0405fe-cd10-4da1-9f06-fe52cff942b4",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -158,15 +161,17 @@
" .reset_index()\n", " .reset_index()\n",
" .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n", " .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n",
" )\n", " )\n",
" print(p98)\n",
"\n", "\n",
" # Merge percentile back to main dataframe\n", " # Merge percentile back to main dataframe\n",
" df = df.merge(p98, on=\"Segment\", how=\"left\")\n", " df = df.merge(p98, on=\"Segment\", how=\"left\")\n",
"\n", "\n",
" # Step 2: Identify population above 98th percentile\n", " # Step 2: Identify population above 98th percentile\n",
" high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n", " high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n",
" print(high_pop)\n",
"\n", "\n",
" # Step 3: From this high-risk population, select 0.1% random sample\n", " # Step 3: From this high-risk population, select 0.1% random sample\n",
" sample_fraction = 0.001 # 0.1%\n", " sample_fraction = 0.1 # 0.1%\n",
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
"\n", "\n",
" # Step 4: Assign SAR_FLAG\n", " # Step 4: Assign SAR_FLAG\n",
@ -178,7 +183,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 28,
"id": "ddc11b42-6cbb-419b-9e26-73e7606e18a6", "id": "ddc11b42-6cbb-419b-9e26-73e7606e18a6",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -186,7 +191,19 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# sen = Scenario()\n", "# sen = Scenario()\n",
"# sen.logic()" "# a = sen.logic()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "2eac1531-e4b9-4b51-a216-75013c4c4f15",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# a[a['SAR_FLAG'] == \"Y\"]"
] ]
}, },
{ {

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 3,
"id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -14,7 +14,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 4,
"id": "f35b1262-3c20-44a6-bbd3-2679a15551e6", "id": "f35b1262-3c20-44a6-bbd3-2679a15551e6",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -27,7 +27,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 5,
"id": "e52124e8-4f62-449d-8852-1e04f8c01ecc", "id": "e52124e8-4f62-449d-8852-1e04f8c01ecc",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -36,17 +36,20 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[['account_data_v1'],\n", "[['abhitesttable'],\n",
" ['abhitesttable22'],\n",
" ['account_data_v1'],\n",
" ['account_data_v2'],\n", " ['account_data_v2'],\n",
" ['alert_data_v1'],\n", " ['alert_data_v1'],\n",
" ['alert_data_v2'],\n", " ['alert_data_v2'],\n",
" ['customer_data_v1'],\n", " ['customer_data_v1'],\n",
" ['customer_data_v2'],\n", " ['customer_data_v2'],\n",
" ['percentile_dist'],\n",
" ['transaction10m'],\n", " ['transaction10m'],\n",
" ['transaction60m']]" " ['transaction60m']]"
] ]
}, },
"execution_count": 3, "execution_count": 5,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -57,7 +60,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 6,
"id": "dda35e8d-8997-42d4-a472-844c208d0f49", "id": "dda35e8d-8997-42d4-a472-844c208d0f49",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -127,7 +130,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 25,
"id": "fb0405fe-cd10-4da1-9f06-fe52cff942b4", "id": "fb0405fe-cd10-4da1-9f06-fe52cff942b4",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -158,15 +161,17 @@
" .reset_index()\n", " .reset_index()\n",
" .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n", " .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n",
" )\n", " )\n",
" print(p98)\n",
"\n", "\n",
" # Merge percentile back to main dataframe\n", " # Merge percentile back to main dataframe\n",
" df = df.merge(p98, on=\"Segment\", how=\"left\")\n", " df = df.merge(p98, on=\"Segment\", how=\"left\")\n",
"\n", "\n",
" # Step 2: Identify population above 98th percentile\n", " # Step 2: Identify population above 98th percentile\n",
" high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n", " high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n",
" print(high_pop)\n",
"\n", "\n",
" # Step 3: From this high-risk population, select 0.1% random sample\n", " # Step 3: From this high-risk population, select 0.1% random sample\n",
" sample_fraction = 0.001 # 0.1%\n", " sample_fraction = 0.1 # 0.1%\n",
" high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n",
"\n", "\n",
" # Step 4: Assign SAR_FLAG\n", " # Step 4: Assign SAR_FLAG\n",
@ -178,7 +183,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 28,
"id": "ddc11b42-6cbb-419b-9e26-73e7606e18a6", "id": "ddc11b42-6cbb-419b-9e26-73e7606e18a6",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -186,7 +191,19 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# sen = Scenario()\n", "# sen = Scenario()\n",
"# sen.logic()" "# a = sen.logic()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "2eac1531-e4b9-4b51-a216-75013c4c4f15",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# a[a['SAR_FLAG'] == \"Y\"]"
] ]
}, },
{ {

24
main.py
View File

@ -1,26 +1,26 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
# In[1]: # In[3]:
import pandas as pd import pandas as pd
# In[2]: # In[4]:
from tms_data_interface import SQLQueryInterface from tms_data_interface import SQLQueryInterface
seq = SQLQueryInterface(schema="transactionschema") seq = SQLQueryInterface(schema="transactionschema")
# In[3]: # In[5]:
seq.execute_raw("show tables") seq.execute_raw("show tables")
# In[4]: # In[6]:
query = """ query = """
@ -84,7 +84,7 @@ query = """
""" """
# In[5]: # In[25]:
from tms_data_interface import SQLQueryInterface from tms_data_interface import SQLQueryInterface
@ -111,15 +111,17 @@ class Scenario:
.reset_index() .reset_index()
.rename(columns={"Total_Wire_Deposit_Amt": "P98_Value"}) .rename(columns={"Total_Wire_Deposit_Amt": "P98_Value"})
) )
print(p98)
# Merge percentile back to main dataframe # Merge percentile back to main dataframe
df = df.merge(p98, on="Segment", how="left") df = df.merge(p98, on="Segment", how="left")
# Step 2: Identify population above 98th percentile # Step 2: Identify population above 98th percentile
high_pop = df["Total_Wire_Deposit_Amt"] > df["P98_Value"] high_pop = df["Total_Wire_Deposit_Amt"] > df["P98_Value"]
print(high_pop)
# Step 3: From this high-risk population, select 0.1% random sample # Step 3: From this high-risk population, select 0.1% random sample
sample_fraction = 0.001 # 0.1% sample_fraction = 0.1 # 0.1%
high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index
# Step 4: Assign SAR_FLAG # Step 4: Assign SAR_FLAG
@ -129,11 +131,17 @@ class Scenario:
return df return df
# In[7]: # In[28]:
# sen = Scenario() # sen = Scenario()
# sen.logic() # a = sen.logic()
# In[29]:
# a[a['SAR_FLAG'] == "Y"]
# In[ ]: # In[ ]: