diff --git a/.ipynb_checkpoints/main-checkpoint.ipynb b/.ipynb_checkpoints/main-checkpoint.ipynb index 46ea052..83b09a8 100644 --- a/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/.ipynb_checkpoints/main-checkpoint.ipynb @@ -159,58 +159,58 @@ }, "outputs": [], "source": [ - "def trx_count_sum_groupwise(data_filt_partywise): \n", - " data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') \n", - " groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', \n", - " 'MIN_LIMIT', 'PCT_RANGE'])\n", + "# def trx_count_sum_groupwise(data_filt_partywise): \n", + "# data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') \n", + "# groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', \n", + "# 'MIN_LIMIT', 'PCT_RANGE'])\n", " \n", - " trxns = data_filt_partywise['transaction_amount'].values\n", - " pct_range = data_filt_partywise['PCT_RANGE'].max()\n", - " min_value = data_filt_partywise['MIN_LIMIT'].max()\n", + "# trxns = data_filt_partywise['transaction_amount'].values\n", + "# pct_range = data_filt_partywise['PCT_RANGE'].max()\n", + "# min_value = data_filt_partywise['MIN_LIMIT'].max()\n", "\n", - " trxns = trxns[trxns >= min_value]\n", - " if len(trxns) > 0:\n", - " min_value = trxns[0]\n", + "# trxns = trxns[trxns >= min_value]\n", + "# if len(trxns) > 0:\n", + "# min_value = trxns[0]\n", "\n", - " group_count = 0\n", - " while len(trxns) > 0:\n", - " max_value = min_value + (pct_range * 0.01 * min_value)\n", - " mask = np.logical_and(trxns >= min_value, trxns <= max_value)\n", - " group_filter_trx = trxns[mask]\n", - " trx_count = len(group_filter_trx)\n", - " trx_sum = np.sum(group_filter_trx)\n", - " group_count += 1\n", - " groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, \n", - " min_value, pct_range]\n", - " trxns = trxns[trxns > max_value]\n", - " if len(trxns) > 0:\n", - " min_value = trxns[0]\n", + "# group_count = 0\n", + "# while len(trxns) > 0:\n", + "# max_value = min_value + (pct_range * 0.01 * min_value)\n", + "# mask = np.logical_and(trxns >= min_value, trxns <= max_value)\n", + "# group_filter_trx = trxns[mask]\n", + "# trx_count = len(group_filter_trx)\n", + "# trx_sum = np.sum(group_filter_trx)\n", + "# group_count += 1\n", + "# groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, \n", + "# min_value, pct_range]\n", + "# trxns = trxns[trxns > max_value]\n", + "# if len(trxns) > 0:\n", + "# min_value = trxns[0]\n", "\n", - " return groupeddata.to_dict('list')\n", + "# return groupeddata.to_dict('list')\n", "\n", - "# ---------------------------\n", - "# Function 4: Run scenario 9\n", - "# ---------------------------\n", - "def scenario9_data(data1): \n", - " grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply(\n", - " trx_count_sum_groupwise).reset_index()\n", + "# # ---------------------------\n", + "# # Function 4: Run scenario 9\n", + "# # ---------------------------\n", + "# def scenario9_data(data1): \n", + "# grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply(\n", + "# trx_count_sum_groupwise).reset_index()\n", "\n", - " df_list = []\n", - " for i in grouped.index:\n", - " df_party = pd.DataFrame(grouped.iloc[i, -1])\n", - " df_party['Focal_id'] = grouped.loc[i, 'Focal_id']\n", - " df_list.append(df_party)\n", + "# df_list = []\n", + "# for i in grouped.index:\n", + "# df_party = pd.DataFrame(grouped.iloc[i, -1])\n", + "# df_party['Focal_id'] = grouped.loc[i, 'Focal_id']\n", + "# df_list.append(df_party)\n", "\n", - " final_df = pd.concat(df_list, ignore_index=True) \n", - " Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index()\n", - " Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index()\n", - " SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index()\n", + "# final_df = pd.concat(df_list, ignore_index=True) \n", + "# Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index()\n", + "# Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index()\n", + "# SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index()\n", " \n", - " final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left')\n", - " final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left')\n", - " final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left')\n", + "# final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left')\n", + "# final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left')\n", + "# final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left')\n", " \n", - " return final_df\n", + "# return final_df\n", " " ] }, @@ -253,10 +253,10 @@ " ]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " df['Segment'] = 'SME'\n", - " df['MIN_LIMIT'] = 50000\n", - " df['PCT_RANGE'] = 20\n", + "# df['MIN_LIMIT'] = 50000\n", + "# df['PCT_RANGE'] = 20\n", " \n", - " scenario_data = scenario9_data(df)\n", + "# scenario_data = scenario9_data(df)\n", " \n", " return scenario_data" ] diff --git a/main.ipynb b/main.ipynb index 46ea052..83b09a8 100644 --- a/main.ipynb +++ b/main.ipynb @@ -159,58 +159,58 @@ }, "outputs": [], "source": [ - "def trx_count_sum_groupwise(data_filt_partywise): \n", - " data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') \n", - " groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', \n", - " 'MIN_LIMIT', 'PCT_RANGE'])\n", + "# def trx_count_sum_groupwise(data_filt_partywise): \n", + "# data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') \n", + "# groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', \n", + "# 'MIN_LIMIT', 'PCT_RANGE'])\n", " \n", - " trxns = data_filt_partywise['transaction_amount'].values\n", - " pct_range = data_filt_partywise['PCT_RANGE'].max()\n", - " min_value = data_filt_partywise['MIN_LIMIT'].max()\n", + "# trxns = data_filt_partywise['transaction_amount'].values\n", + "# pct_range = data_filt_partywise['PCT_RANGE'].max()\n", + "# min_value = data_filt_partywise['MIN_LIMIT'].max()\n", "\n", - " trxns = trxns[trxns >= min_value]\n", - " if len(trxns) > 0:\n", - " min_value = trxns[0]\n", + "# trxns = trxns[trxns >= min_value]\n", + "# if len(trxns) > 0:\n", + "# min_value = trxns[0]\n", "\n", - " group_count = 0\n", - " while len(trxns) > 0:\n", - " max_value = min_value + (pct_range * 0.01 * min_value)\n", - " mask = np.logical_and(trxns >= min_value, trxns <= max_value)\n", - " group_filter_trx = trxns[mask]\n", - " trx_count = len(group_filter_trx)\n", - " trx_sum = np.sum(group_filter_trx)\n", - " group_count += 1\n", - " groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, \n", - " min_value, pct_range]\n", - " trxns = trxns[trxns > max_value]\n", - " if len(trxns) > 0:\n", - " min_value = trxns[0]\n", + "# group_count = 0\n", + "# while len(trxns) > 0:\n", + "# max_value = min_value + (pct_range * 0.01 * min_value)\n", + "# mask = np.logical_and(trxns >= min_value, trxns <= max_value)\n", + "# group_filter_trx = trxns[mask]\n", + "# trx_count = len(group_filter_trx)\n", + "# trx_sum = np.sum(group_filter_trx)\n", + "# group_count += 1\n", + "# groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, \n", + "# min_value, pct_range]\n", + "# trxns = trxns[trxns > max_value]\n", + "# if len(trxns) > 0:\n", + "# min_value = trxns[0]\n", "\n", - " return groupeddata.to_dict('list')\n", + "# return groupeddata.to_dict('list')\n", "\n", - "# ---------------------------\n", - "# Function 4: Run scenario 9\n", - "# ---------------------------\n", - "def scenario9_data(data1): \n", - " grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply(\n", - " trx_count_sum_groupwise).reset_index()\n", + "# # ---------------------------\n", + "# # Function 4: Run scenario 9\n", + "# # ---------------------------\n", + "# def scenario9_data(data1): \n", + "# grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply(\n", + "# trx_count_sum_groupwise).reset_index()\n", "\n", - " df_list = []\n", - " for i in grouped.index:\n", - " df_party = pd.DataFrame(grouped.iloc[i, -1])\n", - " df_party['Focal_id'] = grouped.loc[i, 'Focal_id']\n", - " df_list.append(df_party)\n", + "# df_list = []\n", + "# for i in grouped.index:\n", + "# df_party = pd.DataFrame(grouped.iloc[i, -1])\n", + "# df_party['Focal_id'] = grouped.loc[i, 'Focal_id']\n", + "# df_list.append(df_party)\n", "\n", - " final_df = pd.concat(df_list, ignore_index=True) \n", - " Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index()\n", - " Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index()\n", - " SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index()\n", + "# final_df = pd.concat(df_list, ignore_index=True) \n", + "# Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index()\n", + "# Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index()\n", + "# SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index()\n", " \n", - " final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left')\n", - " final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left')\n", - " final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left')\n", + "# final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left')\n", + "# final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left')\n", + "# final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left')\n", " \n", - " return final_df\n", + "# return final_df\n", " " ] }, @@ -253,10 +253,10 @@ " ]\n", " df = pd.DataFrame(row_list, columns = cols)\n", " df['Segment'] = 'SME'\n", - " df['MIN_LIMIT'] = 50000\n", - " df['PCT_RANGE'] = 20\n", + "# df['MIN_LIMIT'] = 50000\n", + "# df['PCT_RANGE'] = 20\n", " \n", - " scenario_data = scenario9_data(df)\n", + "# scenario_data = scenario9_data(df)\n", " \n", " return scenario_data" ] diff --git a/main.py b/main.py index b9f9b92..47b7fca 100644 --- a/main.py +++ b/main.py @@ -142,58 +142,58 @@ query = """ # In[20]: -def trx_count_sum_groupwise(data_filt_partywise): - data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') - groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', - 'MIN_LIMIT', 'PCT_RANGE']) +# def trx_count_sum_groupwise(data_filt_partywise): +# data_filt_partywise = data_filt_partywise.sort_values(by='transaction_amount') +# groupeddata = pd.DataFrame(columns=['group_no', 'trxn_cnt', 'trxn_sum_amt', +# 'MIN_LIMIT', 'PCT_RANGE']) - trxns = data_filt_partywise['transaction_amount'].values - pct_range = data_filt_partywise['PCT_RANGE'].max() - min_value = data_filt_partywise['MIN_LIMIT'].max() +# trxns = data_filt_partywise['transaction_amount'].values +# pct_range = data_filt_partywise['PCT_RANGE'].max() +# min_value = data_filt_partywise['MIN_LIMIT'].max() - trxns = trxns[trxns >= min_value] - if len(trxns) > 0: - min_value = trxns[0] +# trxns = trxns[trxns >= min_value] +# if len(trxns) > 0: +# min_value = trxns[0] - group_count = 0 - while len(trxns) > 0: - max_value = min_value + (pct_range * 0.01 * min_value) - mask = np.logical_and(trxns >= min_value, trxns <= max_value) - group_filter_trx = trxns[mask] - trx_count = len(group_filter_trx) - trx_sum = np.sum(group_filter_trx) - group_count += 1 - groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, - min_value, pct_range] - trxns = trxns[trxns > max_value] - if len(trxns) > 0: - min_value = trxns[0] +# group_count = 0 +# while len(trxns) > 0: +# max_value = min_value + (pct_range * 0.01 * min_value) +# mask = np.logical_and(trxns >= min_value, trxns <= max_value) +# group_filter_trx = trxns[mask] +# trx_count = len(group_filter_trx) +# trx_sum = np.sum(group_filter_trx) +# group_count += 1 +# groupeddata.loc[len(groupeddata)] = [group_count, trx_count, trx_sum, +# min_value, pct_range] +# trxns = trxns[trxns > max_value] +# if len(trxns) > 0: +# min_value = trxns[0] - return groupeddata.to_dict('list') +# return groupeddata.to_dict('list') -# --------------------------- -# Function 4: Run scenario 9 -# --------------------------- -def scenario9_data(data1): - grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply( - trx_count_sum_groupwise).reset_index() +# # --------------------------- +# # Function 4: Run scenario 9 +# # --------------------------- +# def scenario9_data(data1): +# grouped = data1.groupby('Focal_id')[['transaction_amount', 'MIN_LIMIT', 'PCT_RANGE']].apply( +# trx_count_sum_groupwise).reset_index() - df_list = [] - for i in grouped.index: - df_party = pd.DataFrame(grouped.iloc[i, -1]) - df_party['Focal_id'] = grouped.loc[i, 'Focal_id'] - df_list.append(df_party) +# df_list = [] +# for i in grouped.index: +# df_party = pd.DataFrame(grouped.iloc[i, -1]) +# df_party['Focal_id'] = grouped.loc[i, 'Focal_id'] +# df_list.append(df_party) - final_df = pd.concat(df_list, ignore_index=True) - Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index() - Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index() - SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index() +# final_df = pd.concat(df_list, ignore_index=True) +# Segment = data1.groupby('Focal_id')['Segment'].agg('max').reset_index() +# Risk = data1.groupby('Focal_id')['Risk'].agg('max').reset_index() +# SAR_FLAG = data1.groupby('Focal_id')['SAR_FLAG'].agg('max').reset_index() - final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left') - final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left') - final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left') +# final_df = final_df.merge(Segment,on = 'Focal_id', how = 'left') +# final_df = final_df.merge(Risk,on = 'Focal_id', how = 'left') +# final_df = final_df.merge(SAR_FLAG,on = 'Focal_id', how = 'left') - return final_df +# return final_df @@ -229,10 +229,10 @@ class Scenario: ] df = pd.DataFrame(row_list, columns = cols) df['Segment'] = 'SME' - df['MIN_LIMIT'] = 50000 - df['PCT_RANGE'] = 20 +# df['MIN_LIMIT'] = 50000 +# df['PCT_RANGE'] = 20 - scenario_data = scenario9_data(df) +# scenario_data = scenario9_data(df) return scenario_data