diff --git a/Ch02/emails.ipynb b/Ch02/emails.ipynb
new file mode 100644
index 0000000..fc89588
--- /dev/null
+++ b/Ch02/emails.ipynb
@@ -0,0 +1,1483 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " emailsOpened | \n",
+ " member | \n",
+ " week | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 3.0 | \n",
+ " 1.0 | \n",
+ " 2015-06-29 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2.0 | \n",
+ " 1.0 | \n",
+ " 2015-07-13 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2.0 | \n",
+ " 1.0 | \n",
+ " 2015-07-20 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3.0 | \n",
+ " 1.0 | \n",
+ " 2015-07-27 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 2015-08-03 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " emailsOpened member week\n",
+ "0 3.0 1.0 2015-06-29\n",
+ "1 2.0 1.0 2015-07-13\n",
+ "2 2.0 1.0 2015-07-20\n",
+ "3 3.0 1.0 2015-07-27\n",
+ "4 1.0 1.0 2015-08-03"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "emails = pd.read_csv('data/emails.csv',\n",
+ " header=0,\n",
+ " names=['emailsOpened', 'member', 'week'],\n",
+ " parse_dates=['week'])\n",
+ "\n",
+ "emails.head()\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " amount | \n",
+ " timestamp | \n",
+ " member | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 25.0 | \n",
+ " 2017-11-12 11:13:44 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 50.0 | \n",
+ " 2015-08-25 19:01:45 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 25.0 | \n",
+ " 2015-03-26 12:03:47 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 50.0 | \n",
+ " 2016-07-06 12:24:55 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 50.0 | \n",
+ " 2016-05-11 18:13:04 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 2671 | \n",
+ " 25.0 | \n",
+ " 2016-09-02 11:20:00 | \n",
+ " 992.0 | \n",
+ "
\n",
+ " \n",
+ " | 2672 | \n",
+ " 50.0 | \n",
+ " 2017-11-02 12:17:06 | \n",
+ " 993.0 | \n",
+ "
\n",
+ " \n",
+ " | 2673 | \n",
+ " 1000.0 | \n",
+ " 2016-09-13 21:09:47 | \n",
+ " 995.0 | \n",
+ "
\n",
+ " \n",
+ " | 2674 | \n",
+ " 1000.0 | \n",
+ " 2017-09-29 20:03:01 | \n",
+ " 995.0 | \n",
+ "
\n",
+ " \n",
+ " | 2675 | \n",
+ " 50.0 | \n",
+ " 2018-01-03 19:24:24 | \n",
+ " 998.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2676 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " amount timestamp member\n",
+ "0 25.0 2017-11-12 11:13:44 0.0\n",
+ "1 50.0 2015-08-25 19:01:45 0.0\n",
+ "2 25.0 2015-03-26 12:03:47 0.0\n",
+ "3 50.0 2016-07-06 12:24:55 0.0\n",
+ "4 50.0 2016-05-11 18:13:04 1.0\n",
+ "... ... ... ...\n",
+ "2671 25.0 2016-09-02 11:20:00 992.0\n",
+ "2672 50.0 2017-11-02 12:17:06 993.0\n",
+ "2673 1000.0 2016-09-13 21:09:47 995.0\n",
+ "2674 1000.0 2017-09-29 20:03:01 995.0\n",
+ "2675 50.0 2018-01-03 19:24:24 998.0\n",
+ "\n",
+ "[2676 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "donations = pd.read_csv('data/donations.csv', parse_dates=['timestamp'])\n",
+ "donations.columns = ['amount', 'timestamp', 'member']\n",
+ "donations\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " memberId | \n",
+ " memberStats | \n",
+ " yearJoined | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0 | \n",
+ " silver | \n",
+ " 2014 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " silver | \n",
+ " 2015 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2 | \n",
+ " silver | \n",
+ " 2016 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " bronze | \n",
+ " 2018 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " silver | \n",
+ " 2018 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 995 | \n",
+ " 995 | \n",
+ " bronze | \n",
+ " 2016 | \n",
+ "
\n",
+ " \n",
+ " | 996 | \n",
+ " 996 | \n",
+ " bronze | \n",
+ " 2018 | \n",
+ "
\n",
+ " \n",
+ " | 997 | \n",
+ " 997 | \n",
+ " bronze | \n",
+ " 2018 | \n",
+ "
\n",
+ " \n",
+ " | 998 | \n",
+ " 998 | \n",
+ " bronze | \n",
+ " 2017 | \n",
+ "
\n",
+ " \n",
+ " | 999 | \n",
+ " 999 | \n",
+ " silver | \n",
+ " 2014 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1000 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " memberId memberStats yearJoined\n",
+ "0 0 silver 2014\n",
+ "1 1 silver 2015\n",
+ "2 2 silver 2016\n",
+ "3 3 bronze 2018\n",
+ "4 4 silver 2018\n",
+ ".. ... ... ...\n",
+ "995 995 bronze 2016\n",
+ "996 996 bronze 2018\n",
+ "997 997 bronze 2018\n",
+ "998 998 bronze 2017\n",
+ "999 999 silver 2014\n",
+ "\n",
+ "[1000 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "YearJoined = pd.read_csv('data/year_joined.csv')\n",
+ "YearJoined.columns = ['memberId', 'memberStats', 'yearJoined']\n",
+ "YearJoined\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " week | \n",
+ " member | \n",
+ " emailsOpened | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 81388 | \n",
+ " 2015-02-09 | \n",
+ " 998.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 58211 | \n",
+ " 2015-02-16 | \n",
+ " 998.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 58750 | \n",
+ " 2015-02-23 | \n",
+ " 998.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 65757 | \n",
+ " 2015-03-02 | \n",
+ " 998.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 72225 | \n",
+ " 2015-03-09 | \n",
+ " 998.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 15091 | \n",
+ " 2018-04-30 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " | 45275 | \n",
+ " 2018-05-07 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " | 23176 | \n",
+ " 2018-05-14 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " | 14552 | \n",
+ " 2018-05-21 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " | 2694 | \n",
+ " 2018-05-28 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
173 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " week member emailsOpened\n",
+ "81388 2015-02-09 998.0 0.0\n",
+ "58211 2015-02-16 998.0 0.0\n",
+ "58750 2015-02-23 998.0 0.0\n",
+ "65757 2015-03-02 998.0 0.0\n",
+ "72225 2015-03-09 998.0 0.0\n",
+ "... ... ... ...\n",
+ "15091 2018-04-30 998.0 3.0\n",
+ "45275 2018-05-07 998.0 3.0\n",
+ "23176 2018-05-14 998.0 3.0\n",
+ "14552 2018-05-21 998.0 3.0\n",
+ "2694 2018-05-28 998.0 3.0\n",
+ "\n",
+ "[173 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "complete_idx = pd.MultiIndex.from_product((set(emails.week), set(emails.member)))\n",
+ "\n",
+ "all_email = (emails.set_index(['week', 'member'])\n",
+ " .reindex(complete_idx, fill_value=0)\n",
+ " .reset_index())\n",
+ "all_email.columns = ['week', 'member', 'emailsOpened']\n",
+ "\n",
+ "all_email[all_email.member==998].sort_values(by='week')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " week | \n",
+ " member | \n",
+ " emailsOpened | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2016-12-05 | \n",
+ " 1.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2016-12-05 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 2016-12-05 | \n",
+ " 9.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 2016-12-05 | \n",
+ " 14.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 2016-12-05 | \n",
+ " 20.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 93231 | \n",
+ " 2017-05-08 | \n",
+ " 959.0 | \n",
+ " 2.0 | \n",
+ "
\n",
+ " \n",
+ " | 93236 | \n",
+ " 2017-05-08 | \n",
+ " 970.0 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " | 93237 | \n",
+ " 2017-05-08 | \n",
+ " 973.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " | 93241 | \n",
+ " 2017-05-08 | \n",
+ " 987.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " | 93245 | \n",
+ " 2017-05-08 | \n",
+ " 995.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
31836 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " week member emailsOpened\n",
+ "0 2016-12-05 1.0 3.0\n",
+ "3 2016-12-05 6.0 1.0\n",
+ "4 2016-12-05 9.0 3.0\n",
+ "6 2016-12-05 14.0 0.0\n",
+ "8 2016-12-05 20.0 1.0\n",
+ "... ... ... ...\n",
+ "93231 2017-05-08 959.0 2.0\n",
+ "93236 2017-05-08 970.0 1.0\n",
+ "93237 2017-05-08 973.0 3.0\n",
+ "93241 2017-05-08 987.0 3.0\n",
+ "93245 2017-05-08 995.0 3.0\n",
+ "\n",
+ "[31836 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cutoff_dates = emails.groupby('member').week.agg(['min', 'max']).reset_index()\n",
+ "\n",
+ "for _, row in cutoff_dates.iterrows():\n",
+ " member = row['member']\n",
+ " start_date = row['min']\n",
+ " end_date = row['max']\n",
+ " query = 'member == @member and (week < @start_date or week > @end_date)'\n",
+ " all_email.drop(all_email.query(query).index, axis='index', inplace=True)\n",
+ "\n",
+ "all_email"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Constructing a found time series"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " member | \n",
+ " amount | \n",
+ "
\n",
+ " \n",
+ " | timestamp | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2015-03-30 | \n",
+ " 0.0 | \n",
+ " 25.0 | \n",
+ "
\n",
+ " \n",
+ " | 2015-08-31 | \n",
+ " 0.0 | \n",
+ " 50.0 | \n",
+ "
\n",
+ " \n",
+ " | 2016-07-11 | \n",
+ " 0.0 | \n",
+ " 50.0 | \n",
+ "
\n",
+ " \n",
+ " | 2017-11-13 | \n",
+ " 0.0 | \n",
+ " 25.0 | \n",
+ "
\n",
+ " \n",
+ " | 2016-05-09 | \n",
+ " 1.0 | \n",
+ " 50.0 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 2018-05-21 | \n",
+ " 992.0 | \n",
+ " 50.0 | \n",
+ "
\n",
+ " \n",
+ " | 2017-11-06 | \n",
+ " 993.0 | \n",
+ " 50.0 | \n",
+ "
\n",
+ " \n",
+ " | 2016-09-19 | \n",
+ " 995.0 | \n",
+ " 1000.0 | \n",
+ "
\n",
+ " \n",
+ " | 2017-10-02 | \n",
+ " 995.0 | \n",
+ " 1000.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-01-08 | \n",
+ " 998.0 | \n",
+ " 50.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2612 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " member amount\n",
+ "timestamp \n",
+ "2015-03-30 0.0 25.0\n",
+ "2015-08-31 0.0 50.0\n",
+ "2016-07-11 0.0 50.0\n",
+ "2017-11-13 0.0 25.0\n",
+ "2016-05-09 1.0 50.0\n",
+ "... ... ...\n",
+ "2018-05-21 992.0 50.0\n",
+ "2017-11-06 993.0 50.0\n",
+ "2016-09-19 995.0 1000.0\n",
+ "2017-10-02 995.0 1000.0\n",
+ "2018-01-08 998.0 50.0\n",
+ "\n",
+ "[2612 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Constructing a found time series\n",
+ "donations.set_index('timestamp', inplace=True)\n",
+ "agg_don = (donations.groupby('member')\n",
+ " .apply(lambda df: df.amount.resample('W-MON').sum()))\n",
+ "\n",
+ "agg_don = agg_don[agg_don != 0]\n",
+ "agg_don = agg_don.reset_index().set_index('timestamp') \n",
+ "agg_don"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/lib/python3.7/site-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " \n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " member | \n",
+ " emailsOpened | \n",
+ " amount | \n",
+ "
\n",
+ " \n",
+ " | week | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2015-06-29 | \n",
+ " 1.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2015-07-06 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2015-07-13 | \n",
+ " 1.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2015-07-20 | \n",
+ " 1.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2015-07-27 | \n",
+ " 1.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 2018-04-30 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-05-07 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-05-14 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-05-21 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-05-28 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
31836 rows × 3 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " member emailsOpened amount\n",
+ "week \n",
+ "2015-06-29 1.0 3.0 0.0\n",
+ "2015-07-06 1.0 0.0 0.0\n",
+ "2015-07-13 1.0 2.0 0.0\n",
+ "2015-07-20 1.0 2.0 0.0\n",
+ "2015-07-27 1.0 3.0 0.0\n",
+ "... ... ... ...\n",
+ "2018-04-30 998.0 3.0 0.0\n",
+ "2018-05-07 998.0 3.0 0.0\n",
+ "2018-05-14 998.0 3.0 0.0\n",
+ "2018-05-21 998.0 3.0 0.0\n",
+ "2018-05-28 998.0 3.0 0.0\n",
+ "\n",
+ "[31836 rows x 3 columns]"
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "lst = [] \n",
+ "for member, member_email in all_email.groupby('member'): \n",
+ " member_donations = agg_don.query('member == @member') \n",
+ " \n",
+ " member_email.set_index('week', inplace=True) \n",
+ " member_email.sort_index(inplace=True) \n",
+ " \n",
+ " df = pd.merge(member_email, member_donations, \n",
+ " how='left', \n",
+ " left_index=True, right_index=True) \n",
+ " df.fillna(0, inplace=True) \n",
+ " df['member'] = df.member_x \n",
+ " lst.append(df.reset_index()[['member', 'week', 'emailsOpened', 'amount']]) \n",
+ " \n",
+ "merged_df = pd.concat(lst).set_index('week') \n",
+ "merged_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " member | \n",
+ " emailsOpened | \n",
+ " amount | \n",
+ "
\n",
+ " \n",
+ " | week | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2017-12-04 | \n",
+ " 998.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2017-12-11 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2017-12-18 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2017-12-25 | \n",
+ " 998.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-01-01 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-01-08 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 50.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-01-15 | \n",
+ " 998.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-01-22 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-01-29 | \n",
+ " 998.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-02-05 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-02-12 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-02-19 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-02-26 | \n",
+ " 998.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-03-05 | \n",
+ " 998.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-03-12 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-03-19 | \n",
+ " 998.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-03-26 | \n",
+ " 998.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-04-02 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-04-09 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-04-16 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-04-23 | \n",
+ " 998.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-04-30 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-05-07 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-05-14 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-05-21 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-05-28 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " member emailsOpened amount\n",
+ "week \n",
+ "2017-12-04 998.0 1.0 0.0\n",
+ "2017-12-11 998.0 3.0 0.0\n",
+ "2017-12-18 998.0 3.0 0.0\n",
+ "2017-12-25 998.0 0.0 0.0\n",
+ "2018-01-01 998.0 3.0 0.0\n",
+ "2018-01-08 998.0 3.0 50.0\n",
+ "2018-01-15 998.0 2.0 0.0\n",
+ "2018-01-22 998.0 3.0 0.0\n",
+ "2018-01-29 998.0 2.0 0.0\n",
+ "2018-02-05 998.0 3.0 0.0\n",
+ "2018-02-12 998.0 3.0 0.0\n",
+ "2018-02-19 998.0 3.0 0.0\n",
+ "2018-02-26 998.0 2.0 0.0\n",
+ "2018-03-05 998.0 2.0 0.0\n",
+ "2018-03-12 998.0 3.0 0.0\n",
+ "2018-03-19 998.0 2.0 0.0\n",
+ "2018-03-26 998.0 2.0 0.0\n",
+ "2018-04-02 998.0 3.0 0.0\n",
+ "2018-04-09 998.0 3.0 0.0\n",
+ "2018-04-16 998.0 3.0 0.0\n",
+ "2018-04-23 998.0 0.0 0.0\n",
+ "2018-04-30 998.0 3.0 0.0\n",
+ "2018-05-07 998.0 3.0 0.0\n",
+ "2018-05-14 998.0 3.0 0.0\n",
+ "2018-05-21 998.0 3.0 0.0\n",
+ "2018-05-28 998.0 3.0 0.0"
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = merged_df.query('member == 998')\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " \"\"\"Entry point for launching an IPython kernel.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " member | \n",
+ " emailsOpened | \n",
+ " amount | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " | week | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2017-12-04 | \n",
+ " 998.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2017-12-11 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2017-12-18 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2017-12-25 | \n",
+ " 998.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-01-01 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-01-08 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 50.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-01-15 | \n",
+ " 998.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ " 50.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-01-22 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-01-29 | \n",
+ " 998.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-02-05 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-02-12 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-02-19 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-02-26 | \n",
+ " 998.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-03-05 | \n",
+ " 998.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-03-12 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-03-19 | \n",
+ " 998.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-03-26 | \n",
+ " 998.0 | \n",
+ " 2.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-04-02 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-04-09 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-04-16 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-04-23 | \n",
+ " 998.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-04-30 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-05-07 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-05-14 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-05-21 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2018-05-28 | \n",
+ " 998.0 | \n",
+ " 3.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " member emailsOpened amount target\n",
+ "week \n",
+ "2017-12-04 998.0 1.0 0.0 NaN\n",
+ "2017-12-11 998.0 3.0 0.0 0.0\n",
+ "2017-12-18 998.0 3.0 0.0 0.0\n",
+ "2017-12-25 998.0 0.0 0.0 0.0\n",
+ "2018-01-01 998.0 3.0 0.0 0.0\n",
+ "2018-01-08 998.0 3.0 50.0 0.0\n",
+ "2018-01-15 998.0 2.0 0.0 50.0\n",
+ "2018-01-22 998.0 3.0 0.0 0.0\n",
+ "2018-01-29 998.0 2.0 0.0 0.0\n",
+ "2018-02-05 998.0 3.0 0.0 0.0\n",
+ "2018-02-12 998.0 3.0 0.0 0.0\n",
+ "2018-02-19 998.0 3.0 0.0 0.0\n",
+ "2018-02-26 998.0 2.0 0.0 0.0\n",
+ "2018-03-05 998.0 2.0 0.0 0.0\n",
+ "2018-03-12 998.0 3.0 0.0 0.0\n",
+ "2018-03-19 998.0 2.0 0.0 0.0\n",
+ "2018-03-26 998.0 2.0 0.0 0.0\n",
+ "2018-04-02 998.0 3.0 0.0 0.0\n",
+ "2018-04-09 998.0 3.0 0.0 0.0\n",
+ "2018-04-16 998.0 3.0 0.0 0.0\n",
+ "2018-04-23 998.0 0.0 0.0 0.0\n",
+ "2018-04-30 998.0 3.0 0.0 0.0\n",
+ "2018-05-07 998.0 3.0 0.0 0.0\n",
+ "2018-05-14 998.0 3.0 0.0 0.0\n",
+ "2018-05-21 998.0 3.0 0.0 0.0\n",
+ "2018-05-28 998.0 3.0 0.0 0.0"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df['target'] = df.amount.shift(1) \n",
+ "df "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Ch02/myemails.py b/Ch02/myemails.py
new file mode 100644
index 0000000..7d2d135
--- /dev/null
+++ b/Ch02/myemails.py
@@ -0,0 +1,61 @@
+import pandas as pd
+
+emails = pd.read_csv('data/emails.csv',
+ header=0,
+ names=['emailsOpened', 'member', 'week'],
+ parse_dates=['week'])
+
+print(emails.head())
+
+donations = pd.read_csv('data/donations.csv', parse_dates=['timestamp'])
+donations.columns = ['amount', 'timestamp', 'member']
+
+YearJoined = pd.read_csv('data/year_joined.csv')
+YearJoined.columns = ['memberId', 'memberStats', 'yearJoined']
+
+complete_idx = pd.MultiIndex.from_product((set(emails.week), set(emails.member)))
+
+all_email = (emails.set_index(['week', 'member'])
+ .reindex(complete_idx, fill_value=0)
+ .reset_index())
+all_email.columns = ['week', 'member', 'emailsOpened']
+
+print(all_email[all_email.member==998].sort_values(by='week'))
+
+cutoff_dates = emails.groupby('member').week.agg(['min', 'max']).reset_index()
+
+for _, row in cutoff_dates.iterrows():
+ member = row['member']
+ start_date = row['min']
+ end_date = row['max']
+ query = 'member == @member and (week < @start_date or week > @end_date)'
+ all_email.drop(all_email.query(query).index, axis='index', inplace=True)
+
+# Constructing a found time series
+donations.set_index('timestamp', inplace=True)
+agg_don = (donations.groupby('member')
+ .apply(lambda df: df.amount.resample('W-MON').sum()))
+
+agg_don = agg_don[agg_don != 0]
+agg_don = agg_don.reset_index().set_index('timestamp')
+
+
+lst = []
+for member, member_email in all_email.groupby('member'):
+ member_donations = agg_don.query('member == @member')
+
+ member_email.set_index('week', inplace=True)
+ member_email.sort_index(inplace=True)
+
+ df = pd.merge(member_email, member_donations,
+ how='left',
+ left_index=True, right_index=True)
+ df.fillna(0, inplace=True)
+ df['member'] = df.member_x
+ lst.append(df.reset_index()[['member', 'week', 'emailsOpened', 'amount']])
+
+merged_df = pd.concat(lst).set_index('week')
+df = merged_df.query('member == 998')
+
+df['target'] = df.amount.shift(1)
+df
diff --git a/Ch02/unemp.py b/Ch02/unemp.py
new file mode 100644
index 0000000..eed6a14
--- /dev/null
+++ b/Ch02/unemp.py
@@ -0,0 +1,54 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+
+unemp = pd.read_csv('data/UNRATE.csv', parse_dates=['DATE'], index_col='DATE')
+
+# generate a data set where data is randomly missing
+rand_unemp = unemp.sample(frac=0.9).sort_index()
+
+# generate a dataset where data is more likely to be missing when unemployment
+# is high
+high_unemp = unemp.query('UNRATE > 8').sample(frac=0.2)
+bias_unemp = unemp.drop(high_unemp.index, axis='index')
+
+#
+rand_unemp = rand_unemp.resample('M').mean()
+bias_unemp = bias_unemp.resample('M').mean()
+
+# Seems unnecessary.
+rand_unemp['rpt'] = rand_unemp['UNRATE'].isnull()
+
+data = {
+ 'amt': [99, 100, 5, 15, 11, 1200],
+ 'dt': ['2019-02-27', '2019-03-02', '2019-06-13', '2019-08-01', '2019-08-31', '2019-09-15',]
+}
+
+donations = pd.DataFrame(data, index=list(range(1,7)))
+donations['dt'] = pd.to_datetime(donations['dt'])
+
+identifier = ['q4q42', '4299hj', 'bbg2']
+dt = [pd.Timestamp(x) for x in ['2019-1-1', '2019-4-1', '2019-7-1']]
+publicity = pd.DataFrame({'identifier': identifier, 'dt': dt}, index=list(range(1,4)))
+
+donations.set_index('dt', inplace=True)
+publicity.set_index('dt', inplace=True)
+
+# we wish to label each donation according to
+# what publicity campaign most recently preceded it.
+new_index = pd.date_range('2019-01-01', periods=270)
+df = donations.join(publicity.reindex(new_index, method='ffill'))
+print(df)
+
+# identify the missing data
+# Hey, it's already done by resample.
+
+idx = slice(350, 400)
+unemp_idx = unemp.iloc[idx]
+rand_unemp_ff = rand_unemp.iloc[idx].fillna(method='ffill')
+
+# plot a sample graph showing the flat portions.
+fig, ax = plt.subplots()
+unemp_idx.plot(ax=ax, linestyle='-', marker='o', markerfacecolor='none')
+rand_unemp_ff.plot(ax=ax)
+rand_unemp_ff.loc[rand_unemp_ff.rpt, 'UNRATE'].plot(linestyle='none', marker='o', color='r')
+ax.legend(['UNRATE', 'Missing', 'FFILL'])