ch13

2021-10-28 17:41:16 +05:30
parent d4c7ab6c61
commit 03bb4d43c0
7 changed files with 2865 additions and 0 deletions
@@ -0,0 +1,372 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Chapter 9 - Data Science\n",
+    "## Data Preparation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 0 - Setting up the notebook"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import random\n",
+    "from datetime import date, timedelta\n",
+    "\n",
+    "import faker"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1 - Preparing the Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create the faker to populate the data\n",
+    "fake = faker.Faker()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "usernames = set()\n",
+    "usernames_no = 1000\n",
+    "\n",
+    "# populate the set with 1000 unique usernames\n",
+    "while len(usernames) < usernames_no:\n",
+    "    usernames.add(fake.user_name())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['{\"username\": \"susan42\", \"name\": \"Emily Smith\", \"gender\": \"F\", \"email\": \"vmckinney@leon.com\", \"age\": 53, \"address\": \"66537 Riley Mission Apt. 337\\\\nNorth Jennifer, NH 95781\"}',\n",
+       " '{\"username\": \"sarahcarpenter\", \"name\": \"Michael Kane\", \"gender\": \"M\", \"email\": \"tamara51@yahoo.com\", \"age\": 58, \"address\": \"7129 Patrick Walks Suite 215\\\\nLaurenside, LA 97179\"}',\n",
+       " '{\"username\": \"kevin37\", \"name\": \"Nathaniel Miller\", \"gender\": \"M\", \"email\": \"maria21@gmail.com\", \"age\": 36, \"address\": \"8247 Manning Burgs Suite 806\\\\nLopezshire, MS 06606\"}']"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def get_random_name_and_gender():\n",
+    "    skew = .6  # 60% of users will be female\n",
+    "    male = random.random() > skew\n",
+    "    if male:\n",
+    "        return fake.name_male(), 'M'\n",
+    "    else:\n",
+    "        return fake.name_female(), 'F'\n",
+    "\n",
+    "# for each username, create a complete user profile\n",
+    "# simulate user data coming from an API. It is a list\n",
+    "# of JSON strings (users).\n",
+    "def get_users(usernames):\n",
+    "    users = []\n",
+    "    for username in usernames:\n",
+    "        name, gender = get_random_name_and_gender()\n",
+    "        user = {\n",
+    "            'username': username,\n",
+    "            'name': name,\n",
+    "            'gender': gender,\n",
+    "            'email': fake.email(),\n",
+    "            'age': fake.random_int(min=18, max=90),\n",
+    "            'address': fake.address(),\n",
+    "        }\n",
+    "        users.append(json.dumps(user))\n",
+    "    return users\n",
+    "\n",
+    "users = get_users(usernames)\n",
+    "users[:3]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# campaign name format:\n",
+    "# InternalType_StartDate_EndDate_TargetAge_TargetGender_Currency\n",
+    "def get_type():\n",
+    "    # just some gibberish internal codes\n",
+    "    types = ['AKX', 'BYU', 'GRZ', 'KTR']\n",
+    "    return random.choice(types)\n",
+    "\n",
+    "def get_start_end_dates():\n",
+    "    duration = random.randint(1, 2 * 365)\n",
+    "    offset = random.randint(-365, 365)\n",
+    "    start = date.today() - timedelta(days=offset)\n",
+    "    end = start + timedelta(days=duration)\n",
+    "    \n",
+    "    def _format_date(date_):\n",
+    "        return date_.strftime(\"%Y%m%d\")\n",
+    "    \n",
+    "    return _format_date(start), _format_date(end)\n",
+    "\n",
+    "def get_age():\n",
+    "    age = random.randrange(20, 46, 5)\n",
+    "    diff = random.randrange(5, 26, 5)\n",
+    "    return '{}-{}'.format(age, age + diff)\n",
+    "\n",
+    "def get_gender():\n",
+    "    return random.choice(('M', 'F', 'B'))\n",
+    "\n",
+    "def get_currency():\n",
+    "    return random.choice(('GBP', 'EUR', 'USD'))\n",
+    "\n",
+    "def get_campaign_name():\n",
+    "    separator = '_'\n",
+    "    type_ = get_type()\n",
+    "    start, end = get_start_end_dates()\n",
+    "    age = get_age()\n",
+    "    gender = get_gender()\n",
+    "    currency = get_currency()\n",
+    "    return separator.join(\n",
+    "        (type_, start, end, age, gender, currency))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# campaign data:\n",
+    "# name, budget, spent, clicks, impressions\n",
+    "def get_campaign_data():\n",
+    "    name = get_campaign_name()\n",
+    "    budget = random.randint(10**3, 10**6)\n",
+    "    spent = random.randint(10**2, budget)    \n",
+    "    clicks = int(random.triangular(10**2, 10**5, 0.2 * 10**5))    \n",
+    "    impressions = int(random.gauss(0.5 * 10**6, 2))\n",
+    "    return {\n",
+    "        'cmp_name': name,\n",
+    "        'cmp_bgt': budget,\n",
+    "        'cmp_spent': spent,\n",
+    "        'cmp_clicks': clicks,\n",
+    "        'cmp_impr': impressions\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# assemble the logic to get the final version of the rough data\n",
+    "# data will be a list of dictionaries. Each dictionary will follow\n",
+    "# this structure:\n",
+    "# {'user': user_json, 'campaigns': [c1, c2, ...]}\n",
+    "# where user_json is the JSON string version of a user data dict\n",
+    "# and c1, c2, ... are campaign dicts as returned by\n",
+    "# get_campaign_data\n",
+    "\n",
+    "def get_data(users):\n",
+    "    data = []\n",
+    "    for user in users:\n",
+    "        campaigns = [get_campaign_data()\n",
+    "                     for _ in range(random.randint(2, 8))]\n",
+    "        data.append({'user': user, 'campaigns': campaigns})\n",
+    "    return data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2 - Cleaning the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'user': '{\"username\": \"susan42\", \"name\": \"Emily Smith\", \"gender\": \"F\", \"email\": \"vmckinney@leon.com\", \"age\": 53, \"address\": \"66537 Riley Mission Apt. 337\\\\nNorth Jennifer, NH 95781\"}',\n",
+       "  'campaigns': [{'cmp_name': 'GRZ_20210131_20210411_30-40_F_GBP',\n",
+       "    'cmp_bgt': 253951,\n",
+       "    'cmp_spent': 17953,\n",
+       "    'cmp_clicks': 52573,\n",
+       "    'cmp_impr': 500001},\n",
+       "   {'cmp_name': 'BYU_20210109_20221204_30-35_M_GBP',\n",
+       "    'cmp_bgt': 150314,\n",
+       "    'cmp_spent': 125884,\n",
+       "    'cmp_clicks': 24575,\n",
+       "    'cmp_impr': 499999},\n",
+       "   {'cmp_name': 'GRZ_20211124_20220921_20-35_B_EUR',\n",
+       "    'cmp_bgt': 791397,\n",
+       "    'cmp_spent': 480963,\n",
+       "    'cmp_clicks': 39668,\n",
+       "    'cmp_impr': 499999},\n",
+       "   {'cmp_name': 'GRZ_20210727_20220211_35-45_B_EUR',\n",
+       "    'cmp_bgt': 910204,\n",
+       "    'cmp_spent': 339997,\n",
+       "    'cmp_clicks': 16698,\n",
+       "    'cmp_impr': 500000},\n",
+       "   {'cmp_name': 'BYU_20220216_20220407_20-25_F_EUR',\n",
+       "    'cmp_bgt': 393134,\n",
+       "    'cmp_spent': 158930,\n",
+       "    'cmp_clicks': 46631,\n",
+       "    'cmp_impr': 500000}]},\n",
+       " {'user': '{\"username\": \"sarahcarpenter\", \"name\": \"Michael Kane\", \"gender\": \"M\", \"email\": \"tamara51@yahoo.com\", \"age\": 58, \"address\": \"7129 Patrick Walks Suite 215\\\\nLaurenside, LA 97179\"}',\n",
+       "  'campaigns': [{'cmp_name': 'BYU_20220324_20221230_20-45_B_USD',\n",
+       "    'cmp_bgt': 819948,\n",
+       "    'cmp_spent': 105178,\n",
+       "    'cmp_clicks': 27755,\n",
+       "    'cmp_impr': 500004},\n",
+       "   {'cmp_name': 'GRZ_20201008_20210604_30-40_B_GBP',\n",
+       "    'cmp_bgt': 829698,\n",
+       "    'cmp_spent': 143193,\n",
+       "    'cmp_clicks': 88114,\n",
+       "    'cmp_impr': 499998},\n",
+       "   {'cmp_name': 'GRZ_20210710_20211130_25-30_B_USD',\n",
+       "    'cmp_bgt': 815470,\n",
+       "    'cmp_spent': 79377,\n",
+       "    'cmp_clicks': 28283,\n",
+       "    'cmp_impr': 500002},\n",
+       "   {'cmp_name': 'AKX_20211028_20220112_25-35_F_USD',\n",
+       "    'cmp_bgt': 944028,\n",
+       "    'cmp_spent': 657427,\n",
+       "    'cmp_clicks': 6668,\n",
+       "    'cmp_impr': 499999},\n",
+       "   {'cmp_name': 'AKX_20211025_20220314_25-35_M_EUR',\n",
+       "    'cmp_bgt': 39136,\n",
+       "    'cmp_spent': 29326,\n",
+       "    'cmp_clicks': 20927,\n",
+       "    'cmp_impr': 499998},\n",
+       "   {'cmp_name': 'BYU_20211227_20220615_20-35_F_USD',\n",
+       "    'cmp_bgt': 940412,\n",
+       "    'cmp_spent': 131757,\n",
+       "    'cmp_clicks': 57384,\n",
+       "    'cmp_impr': 500001},\n",
+       "   {'cmp_name': 'AKX_20220323_20230602_35-55_M_GBP',\n",
+       "    'cmp_bgt': 545483,\n",
+       "    'cmp_spent': 96427,\n",
+       "    'cmp_clicks': 43290,\n",
+       "    'cmp_impr': 499999},\n",
+       "   {'cmp_name': 'AKX_20210917_20220912_35-55_B_USD',\n",
+       "    'cmp_bgt': 129347,\n",
+       "    'cmp_spent': 4747,\n",
+       "    'cmp_clicks': 88217,\n",
+       "    'cmp_impr': 499999}]}]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# fetch simulated rough data\n",
+    "rough_data = get_data(users)\n",
+    "\n",
+    "rough_data[:2]  # let's take a peek"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'cmp_name': 'GRZ_20210131_20210411_30-40_F_GBP',\n",
+       "  'cmp_bgt': 253951,\n",
+       "  'cmp_spent': 17953,\n",
+       "  'cmp_clicks': 52573,\n",
+       "  'cmp_impr': 500001,\n",
+       "  'user': '{\"username\": \"susan42\", \"name\": \"Emily Smith\", \"gender\": \"F\", \"email\": \"vmckinney@leon.com\", \"age\": 53, \"address\": \"66537 Riley Mission Apt. 337\\\\nNorth Jennifer, NH 95781\"}'},\n",
+       " {'cmp_name': 'BYU_20210109_20221204_30-35_M_GBP',\n",
+       "  'cmp_bgt': 150314,\n",
+       "  'cmp_spent': 125884,\n",
+       "  'cmp_clicks': 24575,\n",
+       "  'cmp_impr': 499999,\n",
+       "  'user': '{\"username\": \"susan42\", \"name\": \"Emily Smith\", \"gender\": \"F\", \"email\": \"vmckinney@leon.com\", \"age\": 53, \"address\": \"66537 Riley Mission Apt. 337\\\\nNorth Jennifer, NH 95781\"}'}]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Let's start from having a different version of the data\n",
+    "# I want a list whose items will be dicts. Each dict is \n",
+    "# the original campaign dict plus the user JSON\n",
+    "\n",
+    "data = []\n",
+    "for datum in rough_data:\n",
+    "    for campaign in datum['campaigns']:\n",
+    "        campaign.update({'user': datum['user']})\n",
+    "        data.append(campaign)\n",
+    "data[:2]  # let's take another peek"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Warning: Uncommenting and executing this cell will overwrite data.json\n",
+    "#with open('data.json', 'w') as stream:\n",
+    "#     stream.write(json.dumps(data))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1 @@
+{"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":0,"widgets":["notebook:ch13-dataprep.ipynb","notebook:ch13.ipynb"]},"current":"notebook:ch13-dataprep.ipynb"},"down":{"size":0,"widgets":[]},"left":{"collapsed":false,"current":"filebrowser","widgets":["filebrowser","running-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0.12670368500757193,0.8732963149924281,0]},"workspace-ui:lastSave":"ch13.jupyterlab-workspace","@jupyterlab/settingeditor-extension:plugin":{"sizes":[0.12337371018393899,0.876626289816061],"container":{"plugin":"@jupyterlab/apputils-extension:themes","sizes":[0.48540706605222733,0.5145929339477726]}},"notebook:ch13.ipynb":{"data":{"path":"ch13.ipynb","factory":"Notebook"}},"notebook:ch13-dataprep.ipynb":{"data":{"path":"ch13-dataprep.ipynb","factory":"Notebook"}}},"metadata":{"id":"ch13","last_modified":"2021-08-15T15:16:14.928473+00:00","created":"2021-08-15T15:16:14.928473+00:00"}}
@@ -0,0 +1,75 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def fibonacci(N):\n",
+    "    a, b = 0, 1\n",
+    "    while a < N:\n",
+    "        yield a\n",
+    "        a, b = b, a + b"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "list(fibonacci(100))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.35 µs ± 7.01 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%timeit list(fibonacci(10**4))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
@@ -0,0 +1,8 @@
+arrow
+faker
+jupyter
+jupyterlab
+matplotlib
+numpy
+openpyxl
+pandas
@@ -0,0 +1,292 @@
+#
+# This file is autogenerated by pip-compile with python 3.9
+# To update, run:
+#
+#    pip-compile requirements.in
+#
+anyio==3.3.0
+    # via jupyter-server
+argon2-cffi==20.1.0
+    # via
+    #   jupyter-server
+    #   notebook
+arrow==1.1.1
+    # via -r requirements.in
+async-generator==1.10
+    # via nbclient
+attrs==21.2.0
+    # via jsonschema
+babel==2.9.1
+    # via jupyterlab-server
+backcall==0.2.0
+    # via ipython
+bleach==4.0.0
+    # via nbconvert
+certifi==2021.5.30
+    # via requests
+cffi==1.14.6
+    # via argon2-cffi
+charset-normalizer==2.0.4
+    # via requests
+cycler==0.10.0
+    # via matplotlib
+debugpy==1.4.1
+    # via ipykernel
+decorator==5.0.9
+    # via ipython
+defusedxml==0.7.1
+    # via nbconvert
+entrypoints==0.3
+    # via
+    #   jupyterlab-server
+    #   nbconvert
+et-xmlfile==1.1.0
+    # via openpyxl
+faker==8.11.0
+    # via -r requirements.in
+idna==3.2
+    # via
+    #   anyio
+    #   requests
+ipykernel==6.1.0
+    # via
+    #   ipywidgets
+    #   jupyter
+    #   jupyter-console
+    #   notebook
+    #   qtconsole
+ipython==7.26.0
+    # via
+    #   ipykernel
+    #   ipywidgets
+    #   jupyter-console
+    #   jupyterlab
+ipython-genutils==0.2.0
+    # via
+    #   jupyter-server
+    #   nbformat
+    #   notebook
+    #   qtconsole
+    #   traitlets
+ipywidgets==7.6.3
+    # via jupyter
+jedi==0.18.0
+    # via ipython
+jinja2==3.0.1
+    # via
+    #   jupyter-server
+    #   jupyterlab
+    #   jupyterlab-server
+    #   nbconvert
+    #   notebook
+json5==0.9.6
+    # via jupyterlab-server
+jsonschema==3.2.0
+    # via
+    #   jupyterlab-server
+    #   nbformat
+jupyter==1.0.0
+    # via -r requirements.in
+jupyter-client==6.1.12
+    # via
+    #   ipykernel
+    #   jupyter-console
+    #   jupyter-server
+    #   nbclient
+    #   notebook
+    #   qtconsole
+jupyter-console==6.4.0
+    # via jupyter
+jupyter-core==4.7.1
+    # via
+    #   jupyter-client
+    #   jupyter-server
+    #   jupyterlab
+    #   nbconvert
+    #   nbformat
+    #   notebook
+    #   qtconsole
+jupyter-server==1.10.2
+    # via
+    #   jupyterlab
+    #   jupyterlab-server
+    #   nbclassic
+jupyterlab==3.1.6
+    # via -r requirements.in
+jupyterlab-pygments==0.1.2
+    # via nbconvert
+jupyterlab-server==2.7.0
+    # via jupyterlab
+jupyterlab-widgets==1.0.0
+    # via ipywidgets
+kiwisolver==1.3.1
+    # via matplotlib
+markupsafe==2.0.1
+    # via jinja2
+matplotlib==3.4.3
+    # via -r requirements.in
+matplotlib-inline==0.1.2
+    # via
+    #   ipykernel
+    #   ipython
+mistune==0.8.4
+    # via nbconvert
+nbclassic==0.3.1
+    # via jupyterlab
+nbclient==0.5.3
+    # via nbconvert
+nbconvert==6.1.0
+    # via
+    #   jupyter
+    #   jupyter-server
+    #   notebook
+nbformat==5.1.3
+    # via
+    #   ipywidgets
+    #   jupyter-server
+    #   nbclient
+    #   nbconvert
+    #   notebook
+nest-asyncio==1.5.1
+    # via nbclient
+notebook==6.4.3
+    # via
+    #   jupyter
+    #   nbclassic
+    #   widgetsnbextension
+numpy==1.21.1
+    # via
+    #   -r requirements.in
+    #   matplotlib
+    #   pandas
+openpyxl==3.0.7
+    # via -r requirements.in
+packaging==21.0
+    # via
+    #   bleach
+    #   jupyterlab
+    #   jupyterlab-server
+pandas==1.3.1
+    # via -r requirements.in
+pandocfilters==1.4.3
+    # via nbconvert
+parso==0.8.2
+    # via jedi
+pexpect==4.8.0
+    # via ipython
+pickleshare==0.7.5
+    # via ipython
+pillow==8.3.1
+    # via matplotlib
+prometheus-client==0.11.0
+    # via
+    #   jupyter-server
+    #   notebook
+prompt-toolkit==3.0.19
+    # via
+    #   ipython
+    #   jupyter-console
+ptyprocess==0.7.0
+    # via
+    #   pexpect
+    #   terminado
+pycparser==2.20
+    # via cffi
+pygments==2.9.0
+    # via
+    #   ipython
+    #   jupyter-console
+    #   jupyterlab-pygments
+    #   nbconvert
+    #   qtconsole
+pyparsing==2.4.7
+    # via
+    #   matplotlib
+    #   packaging
+pyrsistent==0.18.0
+    # via jsonschema
+python-dateutil==2.8.2
+    # via
+    #   arrow
+    #   faker
+    #   jupyter-client
+    #   matplotlib
+    #   pandas
+pytz==2021.1
+    # via
+    #   babel
+    #   pandas
+pyzmq==22.2.1
+    # via
+    #   jupyter-client
+    #   jupyter-server
+    #   notebook
+    #   qtconsole
+qtconsole==5.1.1
+    # via jupyter
+qtpy==1.9.0
+    # via qtconsole
+requests==2.26.0
+    # via
+    #   jupyterlab-server
+    #   requests-unixsocket
+requests-unixsocket==0.2.0
+    # via jupyter-server
+send2trash==1.8.0
+    # via
+    #   jupyter-server
+    #   notebook
+six==1.16.0
+    # via
+    #   argon2-cffi
+    #   bleach
+    #   cycler
+    #   jsonschema
+    #   python-dateutil
+sniffio==1.2.0
+    # via anyio
+terminado==0.11.0
+    # via
+    #   jupyter-server
+    #   notebook
+testpath==0.5.0
+    # via nbconvert
+text-unidecode==1.3
+    # via faker
+tornado==6.1
+    # via
+    #   ipykernel
+    #   jupyter-client
+    #   jupyter-server
+    #   jupyterlab
+    #   notebook
+    #   terminado
+traitlets==5.0.5
+    # via
+    #   ipykernel
+    #   ipython
+    #   ipywidgets
+    #   jupyter-client
+    #   jupyter-core
+    #   jupyter-server
+    #   matplotlib-inline
+    #   nbclient
+    #   nbconvert
+    #   nbformat
+    #   notebook
+    #   qtconsole
+urllib3==1.26.6
+    # via
+    #   requests
+    #   requests-unixsocket
+wcwidth==0.2.5
+    # via prompt-toolkit
+webencodings==0.5.1
+    # via bleach
+websocket-client==1.2.1
+    # via jupyter-server
+widgetsnbextension==3.5.1
+    # via ipywidgets
+
+# The following packages are considered to be unsafe in a requirements file:
+# setuptools
				`@@ -0,0 +1 @@`
				{"data":{"layout-restorer:data":{"main":{"dock":{"type":"tab-area","currentIndex":0,"widgets":["notebook:ch13-dataprep.ipynb","notebook:ch13.ipynb"]},"current":"notebook:ch13-dataprep.ipynb"},"down":{"size":0,"widgets":[]},"left":{"collapsed":false,"current":"filebrowser","widgets":["filebrowser","running-sessions","@jupyterlab/toc:plugin","extensionmanager.main-view"]},"right":{"collapsed":true,"widgets":["jp-property-inspector","debugger-sidebar"]},"relativeSizes":[0.12670368500757193,0.8732963149924281,0]},"workspace-ui:lastSave":"ch13.jupyterlab-workspace","@jupyterlab/settingeditor-extension:plugin":{"sizes":[0.12337371018393899,0.876626289816061],"container":{"plugin":"@jupyterlab/apputils-extension:themes","sizes":[0.48540706605222733,0.5145929339477726]}},"notebook:ch13.ipynb":{"data":{"path":"ch13.ipynb","factory":"Notebook"}},"notebook:ch13-dataprep.ipynb":{"data":{"path":"ch13-dataprep.ipynb","factory":"Notebook"}}},"metadata":{"id":"ch13","last_modified":"2021-08-15T15:16:14.928473+00:00","created":"2021-08-15T15:16:14.928473+00:00"}}