{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Generating seriatim model points for cluster analysis example"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook is modified from *generate_model_points_with_duration.ipynb* in the *basiclife* library and\n",
    "generates the seriatim policies for the example performed by the *cluster_model_points.ipynb* notebook.\n",
    "The modifications are:\n",
    "\n",
    "* `policy_count` is set to 1 for all the model points.\n",
    "* `duration_mth` is modified to be positive, i.e. all model points are existing policies.\n",
    "\n",
    "**Columns:**\n",
    "\n",
    "* `point_id`: Model point identifier\n",
    "* `age_at_entry`: Issue age. The samples are distributed uniformly from 20 to 59.\n",
    "* `sex`: \"M\" or \"F\" to indicate policy holder's sex. Not used.\n",
    "* `policy_term`: Policy term in years. The samples are evenly distriubted among 10, 15 and 20.\n",
    "* `policy_count`: The number of policies. Uniformly distributed from 0 to 100.\n",
    "* `sum_assured`: Sum assured. The samples are uniformly distributed from 10,000 to 1,000,000.\n",
    "* `duration_mth`: Months elapsed from the issue til t=0. Uniformly distributed from 1 to 12 times `policy_term` - 1.  \n",
    "\n",
    "**Number of model points:**\n",
    "\n",
    "* 10000\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from numpy.random import default_rng  # Requires NumPy 1.17 or newer\n",
    "\n",
    "rng = default_rng(12345)\n",
    "\n",
    "# Number of Model Points\n",
    "MPCount = 10000\n",
    "\n",
    "# Issue Age (Integer): 20 - 59 year old\n",
    "age_at_entry = rng.integers(low=20, high=60, size=MPCount)\n",
    "\n",
    "# Sex (Char)\n",
    "Sex = [\n",
    "    \"M\",\n",
    "    \"F\"\n",
    "]\n",
    "\n",
    "sex = np.fromiter(map(lambda i: Sex[i], rng.integers(low=0, high=len(Sex), size=MPCount)), np.dtype('<U1'))\n",
    "\n",
    "# Policy Term (Integer): 10, 15, 20 \n",
    "policy_term = rng.integers(low=0, high=3, size=MPCount) * 5 + 10\n",
    "\n",
    " \n",
    "# Sum Assured (Float): 10000 - 1000000  \n",
    "sum_assured = np.round((1000000 - 10000) * rng.random(size=MPCount) + 10000, -3)\n",
    "\n",
    "# Duration in month (Int): 0 < Duration(mth) < Policy Term in month\n",
    "duration_mth = np.floor((policy_term * 12 - 1) * rng.random(size=MPCount)).astype(int) + 1\n",
    "\n",
    "# Policy Count (Integer): 1\n",
    "policy_count = 1 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age_at_entry</th>\n",
       "      <th>sex</th>\n",
       "      <th>policy_term</th>\n",
       "      <th>policy_count</th>\n",
       "      <th>sum_assured</th>\n",
       "      <th>duration_mth</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>policy_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>47</td>\n",
       "      <td>M</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>622000.0</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>29</td>\n",
       "      <td>M</td>\n",
       "      <td>20</td>\n",
       "      <td>1</td>\n",
       "      <td>752000.0</td>\n",
       "      <td>213</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>51</td>\n",
       "      <td>F</td>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>799000.0</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>32</td>\n",
       "      <td>F</td>\n",
       "      <td>20</td>\n",
       "      <td>1</td>\n",
       "      <td>422000.0</td>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>28</td>\n",
       "      <td>M</td>\n",
       "      <td>15</td>\n",
       "      <td>1</td>\n",
       "      <td>605000.0</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9996</th>\n",
       "      <td>47</td>\n",
       "      <td>M</td>\n",
       "      <td>20</td>\n",
       "      <td>1</td>\n",
       "      <td>827000.0</td>\n",
       "      <td>168</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9997</th>\n",
       "      <td>30</td>\n",
       "      <td>M</td>\n",
       "      <td>15</td>\n",
       "      <td>1</td>\n",
       "      <td>826000.0</td>\n",
       "      <td>169</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9998</th>\n",
       "      <td>45</td>\n",
       "      <td>F</td>\n",
       "      <td>20</td>\n",
       "      <td>1</td>\n",
       "      <td>783000.0</td>\n",
       "      <td>158</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9999</th>\n",
       "      <td>39</td>\n",
       "      <td>M</td>\n",
       "      <td>20</td>\n",
       "      <td>1</td>\n",
       "      <td>302000.0</td>\n",
       "      <td>41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10000</th>\n",
       "      <td>22</td>\n",
       "      <td>F</td>\n",
       "      <td>15</td>\n",
       "      <td>1</td>\n",
       "      <td>576000.0</td>\n",
       "      <td>167</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10000 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           age_at_entry sex  policy_term  policy_count  sum_assured  \\\n",
       "policy_id                                                             \n",
       "1                    47   M           10             1     622000.0   \n",
       "2                    29   M           20             1     752000.0   \n",
       "3                    51   F           10             1     799000.0   \n",
       "4                    32   F           20             1     422000.0   \n",
       "5                    28   M           15             1     605000.0   \n",
       "...                 ...  ..          ...           ...          ...   \n",
       "9996                 47   M           20             1     827000.0   \n",
       "9997                 30   M           15             1     826000.0   \n",
       "9998                 45   F           20             1     783000.0   \n",
       "9999                 39   M           20             1     302000.0   \n",
       "10000                22   F           15             1     576000.0   \n",
       "\n",
       "           duration_mth  \n",
       "policy_id                \n",
       "1                    28  \n",
       "2                   213  \n",
       "3                    39  \n",
       "4                   140  \n",
       "5                    76  \n",
       "...                 ...  \n",
       "9996                168  \n",
       "9997                169  \n",
       "9998                158  \n",
       "9999                 41  \n",
       "10000               167  \n",
       "\n",
       "[10000 rows x 6 columns]"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "attrs = [\n",
    "    \"age_at_entry\",\n",
    "    \"sex\",\n",
    "    \"policy_term\",\n",
    "    \"policy_count\",\n",
    "    \"sum_assured\",\n",
    "    \"duration_mth\"\n",
    "]\n",
    "\n",
    "data = [\n",
    "    age_at_entry,\n",
    "    sex,\n",
    "    policy_term,\n",
    "    policy_count,\n",
    "    sum_assured,\n",
    "    duration_mth\n",
    "]\n",
    "\n",
    "model_point_table = pd.DataFrame(dict(zip(attrs, data)), index=range(1, MPCount+1))\n",
    "model_point_table.index.name = \"policy_id\"\n",
    "model_point_table"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}