From a088b1f19d14f160da18f73015ac13d3d90874dd Mon Sep 17 00:00:00 2001 From: user_client2024 Date: Thu, 27 Nov 2025 04:48:20 +0000 Subject: [PATCH] System save at 27/11/2025 10:18 by user_client2024 --- .ipynb_checkpoints/main-checkpoint.ipynb | 37 +++++++++++++++++------- main.ipynb | 37 +++++++++++++++++------- main.py | 24 ++++++++++----- 3 files changed, 70 insertions(+), 28 deletions(-) diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb index a2888b8..107e3d3 100644 --- a/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/.ipynb_checkpoints/main-checkpoint.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "id": "f35b1262-3c20-44a6-bbd3-2679a15551e6", "metadata": { "tags": [] @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "e52124e8-4f62-449d-8852-1e04f8c01ecc", "metadata": { "tags": [] @@ -36,17 +36,20 @@ { "data": { "text/plain": [ - "[['account_data_v1'],\n", + "[['abhitesttable'],\n", + " ['abhitesttable22'],\n", + " ['account_data_v1'],\n", " ['account_data_v2'],\n", " ['alert_data_v1'],\n", " ['alert_data_v2'],\n", " ['customer_data_v1'],\n", " ['customer_data_v2'],\n", + " ['percentile_dist'],\n", " ['transaction10m'],\n", " ['transaction60m']]" ] }, - "execution_count": 3, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -57,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "id": "dda35e8d-8997-42d4-a472-844c208d0f49", "metadata": { "tags": [] @@ -127,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 25, "id": "fb0405fe-cd10-4da1-9f06-fe52cff942b4", "metadata": { "tags": [] @@ -158,15 +161,17 @@ " .reset_index()\n", " .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n", " )\n", + " print(p98)\n", "\n", " # Merge percentile back to main dataframe\n", " df = df.merge(p98, on=\"Segment\", how=\"left\")\n", "\n", " # Step 2: Identify population above 98th percentile\n", " high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n", + " print(high_pop)\n", "\n", " # Step 3: From this high-risk population, select 0.1% random sample\n", - " sample_fraction = 0.001 # 0.1%\n", + " sample_fraction = 0.1 # 0.1%\n", " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", "\n", " # Step 4: Assign SAR_FLAG\n", @@ -178,7 +183,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 28, "id": "ddc11b42-6cbb-419b-9e26-73e7606e18a6", "metadata": { "tags": [] @@ -186,7 +191,19 @@ "outputs": [], "source": [ "# sen = Scenario()\n", - "# sen.logic()" + "# a = sen.logic()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "2eac1531-e4b9-4b51-a216-75013c4c4f15", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# a[a['SAR_FLAG'] == \"Y\"]" ] }, { diff --git a/main.ipynb b/main.ipynb index a2888b8..107e3d3 100644 --- a/main.ipynb +++ b/main.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "e706cfb0-2234-4c4c-95d8-d1968f656aa0", "metadata": { "tags": [] @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "id": "f35b1262-3c20-44a6-bbd3-2679a15551e6", "metadata": { "tags": [] @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "e52124e8-4f62-449d-8852-1e04f8c01ecc", "metadata": { "tags": [] @@ -36,17 +36,20 @@ { "data": { "text/plain": [ - "[['account_data_v1'],\n", + "[['abhitesttable'],\n", + " ['abhitesttable22'],\n", + " ['account_data_v1'],\n", " ['account_data_v2'],\n", " ['alert_data_v1'],\n", " ['alert_data_v2'],\n", " ['customer_data_v1'],\n", " ['customer_data_v2'],\n", + " ['percentile_dist'],\n", " ['transaction10m'],\n", " ['transaction60m']]" ] }, - "execution_count": 3, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -57,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "id": "dda35e8d-8997-42d4-a472-844c208d0f49", "metadata": { "tags": [] @@ -127,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 25, "id": "fb0405fe-cd10-4da1-9f06-fe52cff942b4", "metadata": { "tags": [] @@ -158,15 +161,17 @@ " .reset_index()\n", " .rename(columns={\"Total_Wire_Deposit_Amt\": \"P98_Value\"})\n", " )\n", + " print(p98)\n", "\n", " # Merge percentile back to main dataframe\n", " df = df.merge(p98, on=\"Segment\", how=\"left\")\n", "\n", " # Step 2: Identify population above 98th percentile\n", " high_pop = df[\"Total_Wire_Deposit_Amt\"] > df[\"P98_Value\"]\n", + " print(high_pop)\n", "\n", " # Step 3: From this high-risk population, select 0.1% random sample\n", - " sample_fraction = 0.001 # 0.1%\n", + " sample_fraction = 0.1 # 0.1%\n", " high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index\n", "\n", " # Step 4: Assign SAR_FLAG\n", @@ -178,7 +183,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 28, "id": "ddc11b42-6cbb-419b-9e26-73e7606e18a6", "metadata": { "tags": [] @@ -186,7 +191,19 @@ "outputs": [], "source": [ "# sen = Scenario()\n", - "# sen.logic()" + "# a = sen.logic()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "2eac1531-e4b9-4b51-a216-75013c4c4f15", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# a[a['SAR_FLAG'] == \"Y\"]" ] }, { diff --git a/main.py b/main.py index 8cbee45..5279646 100644 --- a/main.py +++ b/main.py @@ -1,26 +1,26 @@ #!/usr/bin/env python # coding: utf-8 -# In[1]: +# In[3]: import pandas as pd -# In[2]: +# In[4]: from tms_data_interface import SQLQueryInterface seq = SQLQueryInterface(schema="transactionschema") -# In[3]: +# In[5]: seq.execute_raw("show tables") -# In[4]: +# In[6]: query = """ @@ -84,7 +84,7 @@ query = """ """ -# In[5]: +# In[25]: from tms_data_interface import SQLQueryInterface @@ -111,15 +111,17 @@ class Scenario: .reset_index() .rename(columns={"Total_Wire_Deposit_Amt": "P98_Value"}) ) + print(p98) # Merge percentile back to main dataframe df = df.merge(p98, on="Segment", how="left") # Step 2: Identify population above 98th percentile high_pop = df["Total_Wire_Deposit_Amt"] > df["P98_Value"] + print(high_pop) # Step 3: From this high-risk population, select 0.1% random sample - sample_fraction = 0.001 # 0.1% + sample_fraction = 0.1 # 0.1% high_pop_indices = df[high_pop].sample(frac=sample_fraction, random_state=42).index # Step 4: Assign SAR_FLAG @@ -129,11 +131,17 @@ class Scenario: return df -# In[7]: +# In[28]: # sen = Scenario() -# sen.logic() +# a = sen.logic() + + +# In[29]: + + +# a[a['SAR_FLAG'] == "Y"] # In[ ]: