From 383071036a1db9b870ee9ec7eb96e6edf57e1b29 Mon Sep 17 00:00:00 2001 From: user_client2024 Date: Fri, 23 May 2025 15:08:56 +0000 Subject: [PATCH] System save at 23/05/2025 20:38 by user_client2024 --- .ipynb_checkpoints/main-checkpoint.ipynb | 99 ++++++++++++++++++++---- main.ipynb | 99 ++++++++++++++++++++---- main.py | 80 ++++++++++++++++--- 3 files changed, 243 insertions(+), 35 deletions(-) diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb index 21cba9a..e5d1926 100644 --- a/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/.ipynb_checkpoints/main-checkpoint.ipynb @@ -94,15 +94,15 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "b6c85de2-6a47-4109-8885-c138c289ec25", + "execution_count": 11, + "id": "69d6771d-be1c-4ae1-802a-3ba7b2e8c5fb", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", - "\n", + "import numpy as np\n", "query = \"\"\"\n", " SELECT \n", " t.transaction_key,\n", @@ -146,7 +146,83 @@ " ON p.customer_number = al.customer_number\n", "\n", " WHERE a.account_number IS NOT NULL\n", - "\"\"\"\n", + " limit 100\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "82c2152f-513c-4fde-a4a9-6ee3a01ef897", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def trx_count_sum_groupwise(data_filt_partywise): \n", + " data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') \n", + " groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', \n", + " 'MIN_LIMIT', 'PCT_RANGE'])\n", + " \n", + " trxns = data_filt_partywise['transaction_amount'].values\n", + " pct_range = data_filt_partywise['PCT_RANGE'].max()\n", + " min_value = data_filt_partywise['MIN_LIMIT'].max()\n", + "\n", + " trxns = trxns[trxns >= min_value]\n", + " if len(trxns) > 0:\n", + " min_value = trxns[0]\n", + "\n", + " group_count = 0\n", + " while len(trxns) > 0:\n", + " max_value = min_value + (pct_range * 0.01 * min_value)\n", + " mask = np.logical_and(trxns >= min_value, trxns <= max_value)\n", + " group_filter_trx = trxns[mask]\n", + " trx_count = len(group_filter_trx)\n", + " trx_sum = np.sum(group_filter_trx)\n", + " group_count += 1\n", + " groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, \n", + " min_value, pct_range]\n", + " trxns = trxns[trxns > max_value]\n", + " if len(trxns) > 0:\n", + " min_value = trxns[0]\n", + "\n", + " return groupeddata.to_dict('list')\n", + "\n", + "# ---------------------------\n", + "# Function 4: Run scenario 9\n", + "# ---------------------------\n", + "def scenario9_data(data1): \n", + " grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply(\n", + " trx_count_sum_groupwise).reset_index()\n", + "\n", + " df_list = []\n", + " for i in grouped.index:\n", + " df_party = pd.DataFrame(grouped.iloc[i, -1])\n", + " df_party['Focal_id'] = grouped.loc[i, 'Focal_id']\n", + " df_list.append(df_party)\n", + "\n", + " final_df = pd.concat(df_list, ignore_index=True) \n", + " Segment = final_df.groupby('Focal_id')['Segment'].agg('max').reset_index()\n", + " Risk = final_df.groupby('Focal_id')['Risk'].agg('max').reset_index()\n", + " SAR_FLAG = final_df.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index()\n", + " \n", + " final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left')\n", + " final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left')\n", + " final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left')\n", + " \n", + " return final_df\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b6c85de2-6a47-4109-8885-c138c289ec25", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ "\n", "from tms_data_interface import SQLQueryInterface\n", "\n", @@ -177,7 +253,12 @@ " ]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " df['Segment'] = 'SME'\n", - " return df" + " df['MIN_LIMIT'] = 50000\n", + " df['PCT_RANGE'] = 20\n", + " \n", + " scenario_data = scenario9_data(df)\n", + " \n", + " return scenario_data" ] }, { @@ -192,14 +273,6 @@ "# sen = Scenario()\n", "# sen.logic()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6de62b37-00d1-4c88-b27b-9a70e05add91", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/main.ipynb b/main.ipynb index 21cba9a..e5d1926 100644 --- a/main.ipynb +++ b/main.ipynb @@ -94,15 +94,15 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "b6c85de2-6a47-4109-8885-c138c289ec25", + "execution_count": 11, + "id": "69d6771d-be1c-4ae1-802a-3ba7b2e8c5fb", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", - "\n", + "import numpy as np\n", "query = \"\"\"\n", " SELECT \n", " t.transaction_key,\n", @@ -146,7 +146,83 @@ " ON p.customer_number = al.customer_number\n", "\n", " WHERE a.account_number IS NOT NULL\n", - "\"\"\"\n", + " limit 100\n", + "\"\"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "82c2152f-513c-4fde-a4a9-6ee3a01ef897", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def trx_count_sum_groupwise(data_filt_partywise): \n", + " data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') \n", + " groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', \n", + " 'MIN_LIMIT', 'PCT_RANGE'])\n", + " \n", + " trxns = data_filt_partywise['transaction_amount'].values\n", + " pct_range = data_filt_partywise['PCT_RANGE'].max()\n", + " min_value = data_filt_partywise['MIN_LIMIT'].max()\n", + "\n", + " trxns = trxns[trxns >= min_value]\n", + " if len(trxns) > 0:\n", + " min_value = trxns[0]\n", + "\n", + " group_count = 0\n", + " while len(trxns) > 0:\n", + " max_value = min_value + (pct_range * 0.01 * min_value)\n", + " mask = np.logical_and(trxns >= min_value, trxns <= max_value)\n", + " group_filter_trx = trxns[mask]\n", + " trx_count = len(group_filter_trx)\n", + " trx_sum = np.sum(group_filter_trx)\n", + " group_count += 1\n", + " groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, \n", + " min_value, pct_range]\n", + " trxns = trxns[trxns > max_value]\n", + " if len(trxns) > 0:\n", + " min_value = trxns[0]\n", + "\n", + " return groupeddata.to_dict('list')\n", + "\n", + "# ---------------------------\n", + "# Function 4: Run scenario 9\n", + "# ---------------------------\n", + "def scenario9_data(data1): \n", + " grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply(\n", + " trx_count_sum_groupwise).reset_index()\n", + "\n", + " df_list = []\n", + " for i in grouped.index:\n", + " df_party = pd.DataFrame(grouped.iloc[i, -1])\n", + " df_party['Focal_id'] = grouped.loc[i, 'Focal_id']\n", + " df_list.append(df_party)\n", + "\n", + " final_df = pd.concat(df_list, ignore_index=True) \n", + " Segment = final_df.groupby('Focal_id')['Segment'].agg('max').reset_index()\n", + " Risk = final_df.groupby('Focal_id')['Risk'].agg('max').reset_index()\n", + " SAR_FLAG = final_df.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index()\n", + " \n", + " final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left')\n", + " final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left')\n", + " final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left')\n", + " \n", + " return final_df\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b6c85de2-6a47-4109-8885-c138c289ec25", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ "\n", "from tms_data_interface import SQLQueryInterface\n", "\n", @@ -177,7 +253,12 @@ " ]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " df['Segment'] = 'SME'\n", - " return df" + " df['MIN_LIMIT'] = 50000\n", + " df['PCT_RANGE'] = 20\n", + " \n", + " scenario_data = scenario9_data(df)\n", + " \n", + " return scenario_data" ] }, { @@ -192,14 +273,6 @@ "# sen = Scenario()\n", "# sen.logic()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6de62b37-00d1-4c88-b27b-9a70e05add91", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/main.py b/main.py index dc92076..91d14be 100644 --- a/main.py +++ b/main.py @@ -87,11 +87,11 @@ # return df -# In[5]: +# In[11]: import pandas as pd - +import numpy as np query = """ SELECT t.transaction_key, @@ -135,8 +135,71 @@ query = """ ON p.customer_number = al.customer_number WHERE a.account_number IS NOT NULL + limit 100 """ + +# In[12]: + + +def trx_count_sum_groupwise(data_filt_partywise): + data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') + groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', + 'MIN_LIMIT', 'PCT_RANGE']) + + trxns = data_filt_partywise['transaction_amount'].values + pct_range = data_filt_partywise['PCT_RANGE'].max() + min_value = data_filt_partywise['MIN_LIMIT'].max() + + trxns = trxns[trxns >= min_value] + if len(trxns) > 0: + min_value = trxns[0] + + group_count = 0 + while len(trxns) > 0: + max_value = min_value + (pct_range * 0.01 * min_value) + mask = np.logical_and(trxns >= min_value, trxns <= max_value) + group_filter_trx = trxns[mask] + trx_count = len(group_filter_trx) + trx_sum = np.sum(group_filter_trx) + group_count += 1 + groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, + min_value, pct_range] + trxns = trxns[trxns > max_value] + if len(trxns) > 0: + min_value = trxns[0] + + return groupeddata.to_dict('list') + +# --------------------------- +# Function 4: Run scenario 9 +# --------------------------- +def scenario9_data(data1): + grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply( + trx_count_sum_groupwise).reset_index() + + df_list = [] + for i in grouped.index: + df_party = pd.DataFrame(grouped.iloc[i, -1]) + df_party['Focal_id'] = grouped.loc[i, 'Focal_id'] + df_list.append(df_party) + + final_df = pd.concat(df_list, ignore_index=True) + Segment = final_df.groupby('Focal_id')['Segment'].agg('max').reset_index() + Risk = final_df.groupby('Focal_id')['Risk'].agg('max').reset_index() + SAR_FLAG = final_df.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index() + + final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left') + final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left') + final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left') + + return final_df + + + +# In[13]: + + from tms_data_interface import SQLQueryInterface class Scenario: @@ -166,7 +229,12 @@ class Scenario: ] df = pd.DataFrame(row_list, columns = cols) df['Segment'] = 'SME' - return df + df['MIN_LIMIT'] = 50000 + df['PCT_RANGE'] = 20 + + scenario_data = scenario9_data(df) + + return scenario_data # In[4]: @@ -175,9 +243,3 @@ class Scenario: # sen = Scenario() # sen.logic() - -# In[ ]: - - - -