diff --git a/Listing_11.1.txt b/Listing_11.1.txt new file mode 100644 index 0000000..3559282 --- /dev/null +++ b/Listing_11.1.txt @@ -0,0 +1,6 @@ +$ cd ~/repo +$ mkdir python_data_science +$ cd python_data_science/ +$ python3 -m venv venv +$ source venv/bin/activate +(venv) $ diff --git a/Listing_11.10.py b/Listing_11.10.py index 262e03d..e8cf971 100644 --- a/Listing_11.10.py +++ b/Listing_11.10.py @@ -1,19 +1,5 @@ ->>> from math import tau ->>> from numpy.random import default_rng ->>> rng = default_rng() ->>> df = pd.DataFrame( -... { -... "Number": 1.0, -... "String": "foo", -... "Angles": np.linspace(0, tau, 5), -... "Random": pd.Series(rng.standard_normal(5)), -... "Timestamp": pd.Timestamp("20221020"), -... "Size": pd.Categorical(["tiny", "small", "mid", "big", "huge"]) -... }) ->>> df - Number String Angles Random Timestamp Size -0 1.0 foo 0.000000 -1.954002 2022-10-20 tiny -1 1.0 foo 1.570796 0.967171 2022-10-20 small -2 1.0 foo 3.141593 -1.149739 2022-10-20 mid -3 1.0 foo 4.712389 -0.084962 2022-10-20 big -4 1.0 foo 6.283185 0.310634 2022-10-20 huge +>>> x = np.linspace(0, tau, 100) +>>> fig, (ax1, ax2) = plt.subplots(2) +>>> fig.suptitle(r"Vertically stacked plots of $\cos\theta$ and $\sin\theta$.") +>>> ax1.plot(x, np.cos(x)) +>>> ax2.plot(x, np.sin(x)) diff --git a/Listing_11.11.py b/Listing_11.11.py index ef68edd..262e03d 100644 --- a/Listing_11.11.py +++ b/Listing_11.11.py @@ -1,7 +1,19 @@ ->>> sizes = {"tiny": 4, "small": 8, "mid": 12, "big": 16, "huge": 24} ->>> df["Size"].map(sizes) -0 4 -1 8 -2 12 -3 16 -4 24 +>>> from math import tau +>>> from numpy.random import default_rng +>>> rng = default_rng() +>>> df = pd.DataFrame( +... { +... "Number": 1.0, +... "String": "foo", +... "Angles": np.linspace(0, tau, 5), +... "Random": pd.Series(rng.standard_normal(5)), +... "Timestamp": pd.Timestamp("20221020"), +... "Size": pd.Categorical(["tiny", "small", "mid", "big", "huge"]) +... }) +>>> df + Number String Angles Random Timestamp Size +0 1.0 foo 0.000000 -1.954002 2022-10-20 tiny +1 1.0 foo 1.570796 0.967171 2022-10-20 small +2 1.0 foo 3.141593 -1.149739 2022-10-20 mid +3 1.0 foo 4.712389 -0.084962 2022-10-20 big +4 1.0 foo 6.283185 0.310634 2022-10-20 huge diff --git a/Listing_11.12.py b/Listing_11.12.py index d7b9780..ef68edd 100644 --- a/Listing_11.12.py +++ b/Listing_11.12.py @@ -1,9 +1,7 @@ - ->>> nobel.head() - id firstname ... city country -0 1 Wilhelm Conrad ... Munich Germany -1 2 Hendrik A. ... Leiden the Netherlands -2 3 Pieter ... Amsterdam the Netherlands -3 4 Henri ... Paris France -4 5 Pierre ... Paris France -[5 rows x 20 columns] +>>> sizes = {"tiny": 4, "small": 8, "mid": 12, "big": 16, "huge": 24} +>>> df["Size"].map(sizes) +0 4 +1 8 +2 12 +3 16 +4 24 diff --git a/Listing_11.13.py b/Listing_11.13.py index a9bec60..d7b9780 100644 --- a/Listing_11.13.py +++ b/Listing_11.13.py @@ -1,3 +1,9 @@ ->>> nobel.loc[nobel["firstname"].str.contains("Kip")] - id firstname surname ... name city country -916 943 Kip S. Thorne ... LIGO/VIRGO Collaboration NaN NaN + +>>> nobel.head() + id firstname ... city country +0 1 Wilhelm Conrad ... Munich Germany +1 2 Hendrik A. ... Leiden the Netherlands +2 3 Pieter ... Amsterdam the Netherlands +3 4 Henri ... Paris France +4 5 Pierre ... Paris France +[5 rows x 20 columns] diff --git a/Listing_11.14.py b/Listing_11.14.py index b992b19..a9bec60 100644 --- a/Listing_11.14.py +++ b/Listing_11.14.py @@ -1,7 +1,3 @@ ->>> curies = nobel.loc[nobel["surname"].str.contains("Curie", na=False)] ->>> curies - id firstname ... city country -4 5 Pierre ... Paris France -5 6 Marie ... NaN NaN -6 6 Marie ... Paris France -191 194 Irène ... Paris France +>>> nobel.loc[nobel["firstname"].str.contains("Kip")] + id firstname surname ... name city country +916 943 Kip S. Thorne ... LIGO/VIRGO Collaboration NaN NaN diff --git a/Listing_11.15.py b/Listing_11.15.py index d47fc1b..b992b19 100644 --- a/Listing_11.15.py +++ b/Listing_11.15.py @@ -1,8 +1,7 @@ ->>> laureates = nobel.groupby(["id", "firstname", "surname"]) ->>> sizes = laureates.size() ->>> sizes[sizes > 1] -id firstname surname -6 Marie Curie 2 -66 John Bardeen 2 -217 Linus Pauling 2 -222 Frederick Sanger 2 +>>> curies = nobel.loc[nobel["surname"].str.contains("Curie", na=False)] +>>> curies + id firstname ... city country +4 5 Pierre ... Paris France +5 6 Marie ... NaN NaN +6 6 Marie ... Paris France +191 194 Irène ... Paris France diff --git a/Listing_11.16.py b/Listing_11.16.py index 01023f1..d47fc1b 100644 --- a/Listing_11.16.py +++ b/Listing_11.16.py @@ -1,3 +1,8 @@ ->>> nobel.hist(column="lifespan") -array([[]], dtype=object) ->>> plt.show() +>>> laureates = nobel.groupby(["id", "firstname", "surname"]) +>>> sizes = laureates.size() +>>> sizes[sizes > 1] +id firstname surname +6 Marie Curie 2 +66 John Bardeen 2 +217 Linus Pauling 2 +222 Frederick Sanger 2 diff --git a/Listing_11.17.py b/Listing_11.17.py index 64fb983..01023f1 100644 --- a/Listing_11.17.py +++ b/Listing_11.17.py @@ -1,2 +1,3 @@ ->>> URL = "https://learnenough.s3.amazonaws.com/titanic.csv" ->>> titanic = pd.read_csv(URL) +>>> nobel.hist(column="lifespan") +array([[]], dtype=object) +>>> plt.show() diff --git a/Listing_11.18.py b/Listing_11.18.py index f6a4463..64fb983 100644 --- a/Listing_11.18.py +++ b/Listing_11.18.py @@ -1,9 +1,2 @@ ->>> titanic = pd.read_csv(URL, index_col="Name") ->>> titanic.head() - PassengerId ... Embarked -Name ... -Braund, Mr. Owen Harris 1 ... S -Cumings, Mrs. John Bradley (Florence Briggs Tha... 2 ... C -Heikkinen, Miss. Laina 3 ... S -Futrelle, Mrs. Jacques Heath (Lily May Peel) 4 ... S -Allen, Mr. William Henry 5 ... S +>>> URL = "https://learnenough.s3.amazonaws.com/titanic.csv" +>>> titanic = pd.read_csv(URL) diff --git a/Listing_11.19.py b/Listing_11.19.py index d54b37e..f6a4463 100644 --- a/Listing_11.19.py +++ b/Listing_11.19.py @@ -1,15 +1,9 @@ ->>> titanic["Age"].notna() -Name -Braund, Mr. Owen Harris True -Cumings, Mrs. John Bradley (Florence Briggs Thayer) True -Heikkinen, Miss. Laina True -Futrelle, Mrs. Jacques Heath (Lily May Peel) True -Allen, Mr. William Henry True - ... -Montvila, Rev. Juozas True -Graham, Miss. Margaret Edith True -Johnston, Miss. Catherine Helen "Carrie" False -Behr, Mr. Karl Howell True -Dooley, Mr. Patrick True -Name: Age, Length: 891, dtype: bool ->>> valid_ages = titanic[titanic["Age"].notna()] +>>> titanic = pd.read_csv(URL, index_col="Name") +>>> titanic.head() + PassengerId ... Embarked +Name ... +Braund, Mr. Owen Harris 1 ... S +Cumings, Mrs. John Bradley (Florence Briggs Tha... 2 ... C +Heikkinen, Miss. Laina 3 ... S +Futrelle, Mrs. Jacques Heath (Lily May Peel) 4 ... S +Allen, Mr. William Henry 5 ... S diff --git a/Listing_11.1.bash b/Listing_11.2.bash similarity index 100% rename from Listing_11.1.bash rename to Listing_11.2.bash diff --git a/Listing_11.20.py b/Listing_11.20.py index e8c7a7a..d54b37e 100644 --- a/Listing_11.20.py +++ b/Listing_11.20.py @@ -1,2 +1,15 @@ -titanic[(titanic["Sex"] == "female") & - (titanic["Pclass"] == 3)]["Survived"].mean() +>>> titanic["Age"].notna() +Name +Braund, Mr. Owen Harris True +Cumings, Mrs. John Bradley (Florence Briggs Thayer) True +Heikkinen, Miss. Laina True +Futrelle, Mrs. Jacques Heath (Lily May Peel) True +Allen, Mr. William Henry True + ... +Montvila, Rev. Juozas True +Graham, Miss. Margaret Edith True +Johnston, Miss. Catherine Helen "Carrie" False +Behr, Mr. Karl Howell True +Dooley, Mr. Patrick True +Name: Age, Length: 891, dtype: bool +>>> valid_ages = titanic[titanic["Age"].notna()] diff --git a/Listing_11.21.py b/Listing_11.21.py index c2ad9e7..e8c7a7a 100644 --- a/Listing_11.21.py +++ b/Listing_11.21.py @@ -1,4 +1,2 @@ -male_passengers = titanic[titanic["Sex"] == "male"] -female_passengers = titanic[titanic["Sex"] == "female"] -valid_male_ages = male_passengers[titanic["Age"].notna()] -valid_female_ages = female_passengers[titanic["Age"].notna()] +titanic[(titanic["Sex"] == "female") & + (titanic["Pclass"] == 3)]["Survived"].mean() diff --git a/Listing_11.22.py b/Listing_11.22.py index a99ce28..c2ad9e7 100644 --- a/Listing_11.22.py +++ b/Listing_11.22.py @@ -1 +1,4 @@ ->>> from sklearn.linear_model import LinearRegression +male_passengers = titanic[titanic["Sex"] == "male"] +female_passengers = titanic[titanic["Sex"] == "female"] +valid_male_ages = male_passengers[titanic["Age"].notna()] +valid_female_ages = female_passengers[titanic["Age"].notna()] diff --git a/Listing_11.23.py b/Listing_11.23.py index a9e70fd..a99ce28 100644 --- a/Listing_11.23.py +++ b/Listing_11.23.py @@ -1,5 +1 @@ -from sklearn.linear_model import LogisticRegression -from sklearn.naive_bayes import GaussianNB -from sklearn.linear_model import Perceptron -from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import RandomForestClassifier +>>> from sklearn.linear_model import LinearRegression diff --git a/Listing_11.24.py b/Listing_11.24.py index f5075e6..a9e70fd 100644 --- a/Listing_11.24.py +++ b/Listing_11.24.py @@ -1,7 +1,5 @@ - Model -Score -0.854749 Decision Tree -0.854749 Random Forest -0.787709 Logistic Regression -0.770950 Naive Bayes -0.743017 Perceptron +from sklearn.linear_model import LogisticRegression +from sklearn.naive_bayes import GaussianNB +from sklearn.linear_model import Perceptron +from sklearn.tree import DecisionTreeClassifier +from sklearn.ensemble import RandomForestClassifier diff --git a/Listing_11.25.py b/Listing_11.25.py new file mode 100644 index 0000000..f5075e6 --- /dev/null +++ b/Listing_11.25.py @@ -0,0 +1,7 @@ + Model +Score +0.854749 Decision Tree +0.854749 Random Forest +0.787709 Logistic Regression +0.770950 Naive Bayes +0.743017 Perceptron diff --git a/Listing_11.3.py b/Listing_11.3.py deleted file mode 100644 index f8b7c1b..0000000 --- a/Listing_11.3.py +++ /dev/null @@ -1,17 +0,0 @@ ->>> a.reshape((-1, 1)) -array([[ 0], - [ 1], - [ 2], - [ 3], - [ 4], - [ 5], - [ 6], - [ 7], - [ 8], - [ 9], - [10], - [11], - [12], - [13], - [14], - [15]]) diff --git a/Listing_11.2.txt b/Listing_11.3.txt similarity index 100% rename from Listing_11.2.txt rename to Listing_11.3.txt diff --git a/Listing_11.4.py b/Listing_11.4.py index abdb3d7..f8b7c1b 100644 --- a/Listing_11.4.py +++ b/Listing_11.4.py @@ -1,5 +1,17 @@ ->>> np.arange(5) -array([0, 1, 2, 3, 4]) ->>> angles = math.tau * np.arange(5) / 4 ->>> angles -array([0. , 1.57079633, 3.14159265, 4.71238898, 6.28318531]) +>>> a.reshape((-1, 1)) +array([[ 0], + [ 1], + [ 2], + [ 3], + [ 4], + [ 5], + [ 6], + [ 7], + [ 8], + [ 9], + [10], + [11], + [12], + [13], + [14], + [15]]) diff --git a/Listing_11.5.py b/Listing_11.5.py index 174e993..abdb3d7 100644 --- a/Listing_11.5.py +++ b/Listing_11.5.py @@ -1,8 +1,5 @@ ->>> math.cos(angles) -Traceback (most recent call last): - File "", line 1, in -TypeError: only size-1 arrays can be converted to Python scalars ->>> a = np.cos(angles) ->>> a -array([ 1.0000000e+00, 6.1232340e-17, -1.0000000e+00, -1.8369702e-16, - 1.0000000e+00]) +>>> np.arange(5) +array([0, 1, 2, 3, 4]) +>>> angles = math.tau * np.arange(5) / 4 +>>> angles +array([0. , 1.57079633, 3.14159265, 4.71238898, 6.28318531]) diff --git a/Listing_11.6.py b/Listing_11.6.py index 0c4d477..174e993 100644 --- a/Listing_11.6.py +++ b/Listing_11.6.py @@ -1,5 +1,8 @@ ->>> a[np.isclose(a, 0)] -array([ 6.1232340e-17, -1.8369702e-16]) ->>> a[np.isclose(a, 0)] = 0 +>>> math.cos(angles) +Traceback (most recent call last): + File "", line 1, in +TypeError: only size-1 arrays can be converted to Python scalars +>>> a = np.cos(angles) >>> a -array([ 1., 0., -1., 0., 1.]) +array([ 1.0000000e+00, 6.1232340e-17, -1.0000000e+00, -1.8369702e-16, + 1.0000000e+00]) diff --git a/Listing_11.7.py b/Listing_11.7.py index 42cb4a7..0c4d477 100644 --- a/Listing_11.7.py +++ b/Listing_11.7.py @@ -1,31 +1,5 @@ -from math import tau - -import numpy as np -import matplotlib.pyplot as plt - - -x = np.linspace(0, tau, 100) - -fig, ax = plt.subplots() - -ax.set_xticks([0, tau/4, tau/2, 3*tau/4, tau]) -ax.set_yticks([-1, -1/2, 0, 1/2, 1]) -plt.grid(True) - -ax.set_xticklabels([r"$0$", r"$\tau/4$", r"$\tau/2$", r"$3\tau/4$", r"$\tau$"]) -ax.set_yticklabels([r"$-1$", r"$-1/2$", r"$0$", r"$1/2$", r"$1$"]) - -ax.set_title("One period of cosine and sine", fontsize=16) -ax.set_xlabel(r"$\theta$", fontsize=16) -ax.set_ylabel(r"$f(\theta)$", fontsize=16) - -ax.annotate(r"$\cos\theta$", xy=(1.75, -0.3), xytext=(0.5, -0.75), - arrowprops={"facecolor": "black", "width": 1}, fontsize=16) -ax.annotate(r"$\sin\theta$", xy=(2.75, 0.5), xytext=(3.5, 0.75), - arrowprops={"facecolor": "black", "width": 1}, fontsize=16) - -fig.set_dpi(150) - -ax.plot(x, np.cos(x), color="red", linestyle="dashed") -ax.plot(x, np.sin(x), color="blue", linestyle="dotted") -plt.show() +>>> a[np.isclose(a, 0)] +array([ 6.1232340e-17, -1.8369702e-16]) +>>> a[np.isclose(a, 0)] = 0 +>>> a +array([ 1., 0., -1., 0., 1.]) diff --git a/Listing_11.8.py b/Listing_11.8.py index 1a05ae8..42cb4a7 100644 --- a/Listing_11.8.py +++ b/Listing_11.8.py @@ -1,15 +1,31 @@ ->>> from numpy.random import default_rng ->>> rng = default_rng() ->>> n_pts = 50 ->>> x = rng.standard_normal(n_pts) ->>> x -array([ 0.41256003, 0.67594205, 1.264653 , 1.16351491, -0.41594407, - -0.60157015, 0.84889823, -0.59984223, 0.24374326, 0.06055498, - -0.48512829, 1.02253594, -1.10982933, -0.40609179, 0.55076245, - 0.13046238, 0.86712869, 0.06139358, -2.26538163, 1.45785923, - -0.56220574, -1.38775239, -2.39643977, -0.77498392, 1.16794796, - -0.6588802 , 1.66343434, 1.57475219, -0.03374501, -0.62757059, - -0.99378175, 0.69259747, -1.04555996, 0.62653116, -0.9042063 , - -0.32565268, -0.99762804, -0.4270288 , 0.69940045, -0.46574267, - 1.82225132, 0.23925201, -1.0443741 , -0.54779683, 1.17466477, - -2.54906663, -0.31495622, 0.25224765, -1.20869217, -1.02737145]) +from math import tau + +import numpy as np +import matplotlib.pyplot as plt + + +x = np.linspace(0, tau, 100) + +fig, ax = plt.subplots() + +ax.set_xticks([0, tau/4, tau/2, 3*tau/4, tau]) +ax.set_yticks([-1, -1/2, 0, 1/2, 1]) +plt.grid(True) + +ax.set_xticklabels([r"$0$", r"$\tau/4$", r"$\tau/2$", r"$3\tau/4$", r"$\tau$"]) +ax.set_yticklabels([r"$-1$", r"$-1/2$", r"$0$", r"$1/2$", r"$1$"]) + +ax.set_title("One period of cosine and sine", fontsize=16) +ax.set_xlabel(r"$\theta$", fontsize=16) +ax.set_ylabel(r"$f(\theta)$", fontsize=16) + +ax.annotate(r"$\cos\theta$", xy=(1.75, -0.3), xytext=(0.5, -0.75), + arrowprops={"facecolor": "black", "width": 1}, fontsize=16) +ax.annotate(r"$\sin\theta$", xy=(2.75, 0.5), xytext=(3.5, 0.75), + arrowprops={"facecolor": "black", "width": 1}, fontsize=16) + +fig.set_dpi(150) + +ax.plot(x, np.cos(x), color="red", linestyle="dashed") +ax.plot(x, np.sin(x), color="blue", linestyle="dotted") +plt.show() diff --git a/Listing_11.9.py b/Listing_11.9.py index e8cf971..1a05ae8 100644 --- a/Listing_11.9.py +++ b/Listing_11.9.py @@ -1,5 +1,15 @@ ->>> x = np.linspace(0, tau, 100) ->>> fig, (ax1, ax2) = plt.subplots(2) ->>> fig.suptitle(r"Vertically stacked plots of $\cos\theta$ and $\sin\theta$.") ->>> ax1.plot(x, np.cos(x)) ->>> ax2.plot(x, np.sin(x)) +>>> from numpy.random import default_rng +>>> rng = default_rng() +>>> n_pts = 50 +>>> x = rng.standard_normal(n_pts) +>>> x +array([ 0.41256003, 0.67594205, 1.264653 , 1.16351491, -0.41594407, + -0.60157015, 0.84889823, -0.59984223, 0.24374326, 0.06055498, + -0.48512829, 1.02253594, -1.10982933, -0.40609179, 0.55076245, + 0.13046238, 0.86712869, 0.06139358, -2.26538163, 1.45785923, + -0.56220574, -1.38775239, -2.39643977, -0.77498392, 1.16794796, + -0.6588802 , 1.66343434, 1.57475219, -0.03374501, -0.62757059, + -0.99378175, 0.69259747, -1.04555996, 0.62653116, -0.9042063 , + -0.32565268, -0.99762804, -0.4270288 , 0.69940045, -0.46574267, + 1.82225132, 0.23925201, -1.0443741 , -0.54779683, 1.17466477, + -2.54906663, -0.31495622, 0.25224765, -1.20869217, -1.02737145]) diff --git a/Listing_7.12.py b/Listing_7.12.py index 8384982..f7e00a9 100644 --- a/Listing_7.12.py +++ b/Listing_7.12.py @@ -1,4 +1,4 @@ >>> reload(palindrome) >>> frase = palindrome.TranslatedPhrase("recognize", "reconocer") ->>> frase.ispalidrome() +>>> frase.ispalindrome() False diff --git a/Listing_7.14.py b/Listing_7.14.py index 63d136e..945986b 100644 --- a/Listing_7.14.py +++ b/Listing_7.14.py @@ -1,4 +1,4 @@ >>> reload(palindrome) >>> frase = palindrome.TranslatedPhrase("recognize", "reconocer") ->>> frase.ispalidrome() +>>> frase.ispalindrome() True