Update code listings

This commit is contained in:
Michael Hartl 2023-02-19 08:22:47 -08:00
parent e9030432e6
commit 9769a429df
28 changed files with 194 additions and 188 deletions

6
Listing_11.1.txt Normal file
View File

@ -0,0 +1,6 @@
$ cd ~/repo
$ mkdir python_data_science
$ cd python_data_science/
$ python3 -m venv venv
$ source venv/bin/activate
(venv) $

View File

@ -1,19 +1,5 @@
>>> from math import tau
>>> from numpy.random import default_rng
>>> rng = default_rng()
>>> df = pd.DataFrame(
... {
... "Number": 1.0,
... "String": "foo",
... "Angles": np.linspace(0, tau, 5),
... "Random": pd.Series(rng.standard_normal(5)),
... "Timestamp": pd.Timestamp("20221020"),
... "Size": pd.Categorical(["tiny", "small", "mid", "big", "huge"])
... })
>>> df
Number String Angles Random Timestamp Size
0 1.0 foo 0.000000 -1.954002 2022-10-20 tiny
1 1.0 foo 1.570796 0.967171 2022-10-20 small
2 1.0 foo 3.141593 -1.149739 2022-10-20 mid
3 1.0 foo 4.712389 -0.084962 2022-10-20 big
4 1.0 foo 6.283185 0.310634 2022-10-20 huge
>>> x = np.linspace(0, tau, 100)
>>> fig, (ax1, ax2) = plt.subplots(2)
>>> fig.suptitle(r"Vertically stacked plots of $\cos\theta$ and $\sin\theta$.")
>>> ax1.plot(x, np.cos(x))
>>> ax2.plot(x, np.sin(x))

View File

@ -1,7 +1,19 @@
>>> sizes = {"tiny": 4, "small": 8, "mid": 12, "big": 16, "huge": 24}
>>> df["Size"].map(sizes)
0 4
1 8
2 12
3 16
4 24
>>> from math import tau
>>> from numpy.random import default_rng
>>> rng = default_rng()
>>> df = pd.DataFrame(
... {
... "Number": 1.0,
... "String": "foo",
... "Angles": np.linspace(0, tau, 5),
... "Random": pd.Series(rng.standard_normal(5)),
... "Timestamp": pd.Timestamp("20221020"),
... "Size": pd.Categorical(["tiny", "small", "mid", "big", "huge"])
... })
>>> df
Number String Angles Random Timestamp Size
0 1.0 foo 0.000000 -1.954002 2022-10-20 tiny
1 1.0 foo 1.570796 0.967171 2022-10-20 small
2 1.0 foo 3.141593 -1.149739 2022-10-20 mid
3 1.0 foo 4.712389 -0.084962 2022-10-20 big
4 1.0 foo 6.283185 0.310634 2022-10-20 huge

View File

@ -1,9 +1,7 @@
>>> nobel.head()
id firstname ... city country
0 1 Wilhelm Conrad ... Munich Germany
1 2 Hendrik A. ... Leiden the Netherlands
2 3 Pieter ... Amsterdam the Netherlands
3 4 Henri ... Paris France
4 5 Pierre ... Paris France
[5 rows x 20 columns]
>>> sizes = {"tiny": 4, "small": 8, "mid": 12, "big": 16, "huge": 24}
>>> df["Size"].map(sizes)
0 4
1 8
2 12
3 16
4 24

View File

@ -1,3 +1,9 @@
>>> nobel.loc[nobel["firstname"].str.contains("Kip")]
id firstname surname ... name city country
916 943 Kip S. Thorne ... LIGO/VIRGO Collaboration NaN NaN
>>> nobel.head()
id firstname ... city country
0 1 Wilhelm Conrad ... Munich Germany
1 2 Hendrik A. ... Leiden the Netherlands
2 3 Pieter ... Amsterdam the Netherlands
3 4 Henri ... Paris France
4 5 Pierre ... Paris France
[5 rows x 20 columns]

View File

@ -1,7 +1,3 @@
>>> curies = nobel.loc[nobel["surname"].str.contains("Curie", na=False)]
>>> curies
id firstname ... city country
4 5 Pierre ... Paris France
5 6 Marie ... NaN NaN
6 6 Marie ... Paris France
191 194 Irène ... Paris France
>>> nobel.loc[nobel["firstname"].str.contains("Kip")]
id firstname surname ... name city country
916 943 Kip S. Thorne ... LIGO/VIRGO Collaboration NaN NaN

View File

@ -1,8 +1,7 @@
>>> laureates = nobel.groupby(["id", "firstname", "surname"])
>>> sizes = laureates.size()
>>> sizes[sizes > 1]
id firstname surname
6 Marie Curie 2
66 John Bardeen 2
217 Linus Pauling 2
222 Frederick Sanger 2
>>> curies = nobel.loc[nobel["surname"].str.contains("Curie", na=False)]
>>> curies
id firstname ... city country
4 5 Pierre ... Paris France
5 6 Marie ... NaN NaN
6 6 Marie ... Paris France
191 194 Irène ... Paris France

View File

@ -1,3 +1,8 @@
>>> nobel.hist(column="lifespan")
array([[<AxesSubplot:title={'center':'lifespan'}>]], dtype=object)
>>> plt.show()
>>> laureates = nobel.groupby(["id", "firstname", "surname"])
>>> sizes = laureates.size()
>>> sizes[sizes > 1]
id firstname surname
6 Marie Curie 2
66 John Bardeen 2
217 Linus Pauling 2
222 Frederick Sanger 2

View File

@ -1,2 +1,3 @@
>>> URL = "https://learnenough.s3.amazonaws.com/titanic.csv"
>>> titanic = pd.read_csv(URL)
>>> nobel.hist(column="lifespan")
array([[<AxesSubplot:title={'center':'lifespan'}>]], dtype=object)
>>> plt.show()

View File

@ -1,9 +1,2 @@
>>> titanic = pd.read_csv(URL, index_col="Name")
>>> titanic.head()
PassengerId ... Embarked
Name ...
Braund, Mr. Owen Harris 1 ... S
Cumings, Mrs. John Bradley (Florence Briggs Tha... 2 ... C
Heikkinen, Miss. Laina 3 ... S
Futrelle, Mrs. Jacques Heath (Lily May Peel) 4 ... S
Allen, Mr. William Henry 5 ... S
>>> URL = "https://learnenough.s3.amazonaws.com/titanic.csv"
>>> titanic = pd.read_csv(URL)

View File

@ -1,15 +1,9 @@
>>> titanic["Age"].notna()
Name
Braund, Mr. Owen Harris True
Cumings, Mrs. John Bradley (Florence Briggs Thayer) True
Heikkinen, Miss. Laina True
Futrelle, Mrs. Jacques Heath (Lily May Peel) True
Allen, Mr. William Henry True
...
Montvila, Rev. Juozas True
Graham, Miss. Margaret Edith True
Johnston, Miss. Catherine Helen "Carrie" False
Behr, Mr. Karl Howell True
Dooley, Mr. Patrick True
Name: Age, Length: 891, dtype: bool
>>> valid_ages = titanic[titanic["Age"].notna()]
>>> titanic = pd.read_csv(URL, index_col="Name")
>>> titanic.head()
PassengerId ... Embarked
Name ...
Braund, Mr. Owen Harris 1 ... S
Cumings, Mrs. John Bradley (Florence Briggs Tha... 2 ... C
Heikkinen, Miss. Laina 3 ... S
Futrelle, Mrs. Jacques Heath (Lily May Peel) 4 ... S
Allen, Mr. William Henry 5 ... S

View File

@ -1,2 +1,15 @@
titanic[(titanic["Sex"] == "female") &
(titanic["Pclass"] == 3)]["Survived"].mean()
>>> titanic["Age"].notna()
Name
Braund, Mr. Owen Harris True
Cumings, Mrs. John Bradley (Florence Briggs Thayer) True
Heikkinen, Miss. Laina True
Futrelle, Mrs. Jacques Heath (Lily May Peel) True
Allen, Mr. William Henry True
...
Montvila, Rev. Juozas True
Graham, Miss. Margaret Edith True
Johnston, Miss. Catherine Helen "Carrie" False
Behr, Mr. Karl Howell True
Dooley, Mr. Patrick True
Name: Age, Length: 891, dtype: bool
>>> valid_ages = titanic[titanic["Age"].notna()]

View File

@ -1,4 +1,2 @@
male_passengers = titanic[titanic["Sex"] == "male"]
female_passengers = titanic[titanic["Sex"] == "female"]
valid_male_ages = male_passengers[titanic["Age"].notna()]
valid_female_ages = female_passengers[titanic["Age"].notna()]
titanic[(titanic["Sex"] == "female") &
(titanic["Pclass"] == 3)]["Survived"].mean()

View File

@ -1 +1,4 @@
>>> from sklearn.linear_model import LinearRegression
male_passengers = titanic[titanic["Sex"] == "male"]
female_passengers = titanic[titanic["Sex"] == "female"]
valid_male_ages = male_passengers[titanic["Age"].notna()]
valid_female_ages = female_passengers[titanic["Age"].notna()]

View File

@ -1,5 +1 @@
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
>>> from sklearn.linear_model import LinearRegression

View File

@ -1,7 +1,5 @@
Model
Score
0.854749 Decision Tree
0.854749 Random Forest
0.787709 Logistic Regression
0.770950 Naive Bayes
0.743017 Perceptron
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

7
Listing_11.25.py Normal file
View File

@ -0,0 +1,7 @@
Model
Score
0.854749 Decision Tree
0.854749 Random Forest
0.787709 Logistic Regression
0.770950 Naive Bayes
0.743017 Perceptron

View File

@ -1,17 +0,0 @@
>>> a.reshape((-1, 1))
array([[ 0],
[ 1],
[ 2],
[ 3],
[ 4],
[ 5],
[ 6],
[ 7],
[ 8],
[ 9],
[10],
[11],
[12],
[13],
[14],
[15]])

View File

@ -1,5 +1,17 @@
>>> np.arange(5)
array([0, 1, 2, 3, 4])
>>> angles = math.tau * np.arange(5) / 4
>>> angles
array([0. , 1.57079633, 3.14159265, 4.71238898, 6.28318531])
>>> a.reshape((-1, 1))
array([[ 0],
[ 1],
[ 2],
[ 3],
[ 4],
[ 5],
[ 6],
[ 7],
[ 8],
[ 9],
[10],
[11],
[12],
[13],
[14],
[15]])

View File

@ -1,8 +1,5 @@
>>> math.cos(angles)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: only size-1 arrays can be converted to Python scalars
>>> a = np.cos(angles)
>>> a
array([ 1.0000000e+00, 6.1232340e-17, -1.0000000e+00, -1.8369702e-16,
1.0000000e+00])
>>> np.arange(5)
array([0, 1, 2, 3, 4])
>>> angles = math.tau * np.arange(5) / 4
>>> angles
array([0. , 1.57079633, 3.14159265, 4.71238898, 6.28318531])

View File

@ -1,5 +1,8 @@
>>> a[np.isclose(a, 0)]
array([ 6.1232340e-17, -1.8369702e-16])
>>> a[np.isclose(a, 0)] = 0
>>> math.cos(angles)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: only size-1 arrays can be converted to Python scalars
>>> a = np.cos(angles)
>>> a
array([ 1., 0., -1., 0., 1.])
array([ 1.0000000e+00, 6.1232340e-17, -1.0000000e+00, -1.8369702e-16,
1.0000000e+00])

View File

@ -1,31 +1,5 @@
from math import tau
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, tau, 100)
fig, ax = plt.subplots()
ax.set_xticks([0, tau/4, tau/2, 3*tau/4, tau])
ax.set_yticks([-1, -1/2, 0, 1/2, 1])
plt.grid(True)
ax.set_xticklabels([r"$0$", r"$\tau/4$", r"$\tau/2$", r"$3\tau/4$", r"$\tau$"])
ax.set_yticklabels([r"$-1$", r"$-1/2$", r"$0$", r"$1/2$", r"$1$"])
ax.set_title("One period of cosine and sine", fontsize=16)
ax.set_xlabel(r"$\theta$", fontsize=16)
ax.set_ylabel(r"$f(\theta)$", fontsize=16)
ax.annotate(r"$\cos\theta$", xy=(1.75, -0.3), xytext=(0.5, -0.75),
arrowprops={"facecolor": "black", "width": 1}, fontsize=16)
ax.annotate(r"$\sin\theta$", xy=(2.75, 0.5), xytext=(3.5, 0.75),
arrowprops={"facecolor": "black", "width": 1}, fontsize=16)
fig.set_dpi(150)
ax.plot(x, np.cos(x), color="red", linestyle="dashed")
ax.plot(x, np.sin(x), color="blue", linestyle="dotted")
plt.show()
>>> a[np.isclose(a, 0)]
array([ 6.1232340e-17, -1.8369702e-16])
>>> a[np.isclose(a, 0)] = 0
>>> a
array([ 1., 0., -1., 0., 1.])

View File

@ -1,15 +1,31 @@
>>> from numpy.random import default_rng
>>> rng = default_rng()
>>> n_pts = 50
>>> x = rng.standard_normal(n_pts)
>>> x
array([ 0.41256003, 0.67594205, 1.264653 , 1.16351491, -0.41594407,
-0.60157015, 0.84889823, -0.59984223, 0.24374326, 0.06055498,
-0.48512829, 1.02253594, -1.10982933, -0.40609179, 0.55076245,
0.13046238, 0.86712869, 0.06139358, -2.26538163, 1.45785923,
-0.56220574, -1.38775239, -2.39643977, -0.77498392, 1.16794796,
-0.6588802 , 1.66343434, 1.57475219, -0.03374501, -0.62757059,
-0.99378175, 0.69259747, -1.04555996, 0.62653116, -0.9042063 ,
-0.32565268, -0.99762804, -0.4270288 , 0.69940045, -0.46574267,
1.82225132, 0.23925201, -1.0443741 , -0.54779683, 1.17466477,
-2.54906663, -0.31495622, 0.25224765, -1.20869217, -1.02737145])
from math import tau
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(0, tau, 100)
fig, ax = plt.subplots()
ax.set_xticks([0, tau/4, tau/2, 3*tau/4, tau])
ax.set_yticks([-1, -1/2, 0, 1/2, 1])
plt.grid(True)
ax.set_xticklabels([r"$0$", r"$\tau/4$", r"$\tau/2$", r"$3\tau/4$", r"$\tau$"])
ax.set_yticklabels([r"$-1$", r"$-1/2$", r"$0$", r"$1/2$", r"$1$"])
ax.set_title("One period of cosine and sine", fontsize=16)
ax.set_xlabel(r"$\theta$", fontsize=16)
ax.set_ylabel(r"$f(\theta)$", fontsize=16)
ax.annotate(r"$\cos\theta$", xy=(1.75, -0.3), xytext=(0.5, -0.75),
arrowprops={"facecolor": "black", "width": 1}, fontsize=16)
ax.annotate(r"$\sin\theta$", xy=(2.75, 0.5), xytext=(3.5, 0.75),
arrowprops={"facecolor": "black", "width": 1}, fontsize=16)
fig.set_dpi(150)
ax.plot(x, np.cos(x), color="red", linestyle="dashed")
ax.plot(x, np.sin(x), color="blue", linestyle="dotted")
plt.show()

View File

@ -1,5 +1,15 @@
>>> x = np.linspace(0, tau, 100)
>>> fig, (ax1, ax2) = plt.subplots(2)
>>> fig.suptitle(r"Vertically stacked plots of $\cos\theta$ and $\sin\theta$.")
>>> ax1.plot(x, np.cos(x))
>>> ax2.plot(x, np.sin(x))
>>> from numpy.random import default_rng
>>> rng = default_rng()
>>> n_pts = 50
>>> x = rng.standard_normal(n_pts)
>>> x
array([ 0.41256003, 0.67594205, 1.264653 , 1.16351491, -0.41594407,
-0.60157015, 0.84889823, -0.59984223, 0.24374326, 0.06055498,
-0.48512829, 1.02253594, -1.10982933, -0.40609179, 0.55076245,
0.13046238, 0.86712869, 0.06139358, -2.26538163, 1.45785923,
-0.56220574, -1.38775239, -2.39643977, -0.77498392, 1.16794796,
-0.6588802 , 1.66343434, 1.57475219, -0.03374501, -0.62757059,
-0.99378175, 0.69259747, -1.04555996, 0.62653116, -0.9042063 ,
-0.32565268, -0.99762804, -0.4270288 , 0.69940045, -0.46574267,
1.82225132, 0.23925201, -1.0443741 , -0.54779683, 1.17466477,
-2.54906663, -0.31495622, 0.25224765, -1.20869217, -1.02737145])

View File

@ -1,4 +1,4 @@
>>> reload(palindrome)
>>> frase = palindrome.TranslatedPhrase("recognize", "reconocer")
>>> frase.ispalidrome()
>>> frase.ispalindrome()
False

View File

@ -1,4 +1,4 @@
>>> reload(palindrome)
>>> frase = palindrome.TranslatedPhrase("recognize", "reconocer")
>>> frase.ispalidrome()
>>> frase.ispalindrome()
True