{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Generating seriatim model points for cluster analysis example" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook is modified from *generate_model_points_with_duration.ipynb* in the *basiclife* library and\n", "generates the seriatim policies for the example performed by the *cluster_model_points.ipynb* notebook.\n", "The modifications are:\n", "\n", "* `policy_count` is set to 1 for all the model points.\n", "* `duration_mth` is modified to be positive, i.e. all model points are existing policies.\n", "\n", "**Columns:**\n", "\n", "* `point_id`: Model point identifier\n", "* `age_at_entry`: Issue age. The samples are distributed uniformly from 20 to 59.\n", "* `sex`: \"M\" or \"F\" to indicate policy holder's sex. Not used.\n", "* `policy_term`: Policy term in years. The samples are evenly distriubted among 10, 15 and 20.\n", "* `policy_count`: The number of policies. Uniformly distributed from 0 to 100.\n", "* `sum_assured`: Sum assured. The samples are uniformly distributed from 10,000 to 1,000,000.\n", "* `duration_mth`: Months elapsed from the issue til t=0. Uniformly distributed from 1 to 12 times `policy_term` - 1. \n", "\n", "**Number of model points:**\n", "\n", "* 10000\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from numpy.random import default_rng # Requires NumPy 1.17 or newer\n", "\n", "rng = default_rng(12345)\n", "\n", "# Number of Model Points\n", "MPCount = 10000\n", "\n", "# Issue Age (Integer): 20 - 59 year old\n", "age_at_entry = rng.integers(low=20, high=60, size=MPCount)\n", "\n", "# Sex (Char)\n", "Sex = [\n", " \"M\",\n", " \"F\"\n", "]\n", "\n", "sex = np.fromiter(map(lambda i: Sex[i], rng.integers(low=0, high=len(Sex), size=MPCount)), np.dtype('\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
age_at_entrysexpolicy_termpolicy_countsum_assuredduration_mth
policy_id
147M101622000.028
229M201752000.0213
351F101799000.039
432F201422000.0140
528M151605000.076
.....................
999647M201827000.0168
999730M151826000.0169
999845F201783000.0158
999939M201302000.041
1000022F151576000.0167
\n", "

10000 rows × 6 columns

\n", "" ], "text/plain": [ " age_at_entry sex policy_term policy_count sum_assured \\\n", "policy_id \n", "1 47 M 10 1 622000.0 \n", "2 29 M 20 1 752000.0 \n", "3 51 F 10 1 799000.0 \n", "4 32 F 20 1 422000.0 \n", "5 28 M 15 1 605000.0 \n", "... ... .. ... ... ... \n", "9996 47 M 20 1 827000.0 \n", "9997 30 M 15 1 826000.0 \n", "9998 45 F 20 1 783000.0 \n", "9999 39 M 20 1 302000.0 \n", "10000 22 F 15 1 576000.0 \n", "\n", " duration_mth \n", "policy_id \n", "1 28 \n", "2 213 \n", "3 39 \n", "4 140 \n", "5 76 \n", "... ... \n", "9996 168 \n", "9997 169 \n", "9998 158 \n", "9999 41 \n", "10000 167 \n", "\n", "[10000 rows x 6 columns]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "attrs = [\n", " \"age_at_entry\",\n", " \"sex\",\n", " \"policy_term\",\n", " \"policy_count\",\n", " \"sum_assured\",\n", " \"duration_mth\"\n", "]\n", "\n", "data = [\n", " age_at_entry,\n", " sex,\n", " policy_term,\n", " policy_count,\n", " sum_assured,\n", " duration_mth\n", "]\n", "\n", "model_point_table = pd.DataFrame(dict(zip(attrs, data)), index=range(1, MPCount+1))\n", "model_point_table.index.name = \"policy_id\"\n", "model_point_table" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 2 }