Uploading source code for chapter 13 (ML)

2021-08-11 17:30:39 +04:00 · 2021-08-11 17:30:39 +04:00 · 4159ea8b58
commit 4159ea8b58
parent ab204b305a
10 changed files with 404 additions and 0 deletions
--- a/Chapter13/gcp/input.json
+++ b/Chapter13/gcp/input.json
@ -0,0 +1,2 @@
+[5.6, 2.5, 3.9, 1.1]
+[3.2, 1.4, 3.0, 1.8]
--- a/Chapter13/gcp/model.joblib
+++ b/Chapter13/gcp/model.joblib
--- a/Chapter13/iris_eval_svc_model/iris.data
+++ b/Chapter13/iris_eval_svc_model/iris.data
@ -0,0 +1,151 @@
+5.1,3.5,1.4,0.2,Iris-setosa
+4.9,3.0,1.4,0.2,Iris-setosa
+4.7,3.2,1.3,0.2,Iris-setosa
+4.6,3.1,1.5,0.2,Iris-setosa
+5.0,3.6,1.4,0.2,Iris-setosa
+5.4,3.9,1.7,0.4,Iris-setosa
+4.6,3.4,1.4,0.3,Iris-setosa
+5.0,3.4,1.5,0.2,Iris-setosa
+4.4,2.9,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+5.4,3.7,1.5,0.2,Iris-setosa
+4.8,3.4,1.6,0.2,Iris-setosa
+4.8,3.0,1.4,0.1,Iris-setosa
+4.3,3.0,1.1,0.1,Iris-setosa
+5.8,4.0,1.2,0.2,Iris-setosa
+5.7,4.4,1.5,0.4,Iris-setosa
+5.4,3.9,1.3,0.4,Iris-setosa
+5.1,3.5,1.4,0.3,Iris-setosa
+5.7,3.8,1.7,0.3,Iris-setosa
+5.1,3.8,1.5,0.3,Iris-setosa
+5.4,3.4,1.7,0.2,Iris-setosa
+5.1,3.7,1.5,0.4,Iris-setosa
+4.6,3.6,1.0,0.2,Iris-setosa
+5.1,3.3,1.7,0.5,Iris-setosa
+4.8,3.4,1.9,0.2,Iris-setosa
+5.0,3.0,1.6,0.2,Iris-setosa
+5.0,3.4,1.6,0.4,Iris-setosa
+5.2,3.5,1.5,0.2,Iris-setosa
+5.2,3.4,1.4,0.2,Iris-setosa
+4.7,3.2,1.6,0.2,Iris-setosa
+4.8,3.1,1.6,0.2,Iris-setosa
+5.4,3.4,1.5,0.4,Iris-setosa
+5.2,4.1,1.5,0.1,Iris-setosa
+5.5,4.2,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+5.0,3.2,1.2,0.2,Iris-setosa
+5.5,3.5,1.3,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+4.4,3.0,1.3,0.2,Iris-setosa
+5.1,3.4,1.5,0.2,Iris-setosa
+5.0,3.5,1.3,0.3,Iris-setosa
+4.5,2.3,1.3,0.3,Iris-setosa
+4.4,3.2,1.3,0.2,Iris-setosa
+5.0,3.5,1.6,0.6,Iris-setosa
+5.1,3.8,1.9,0.4,Iris-setosa
+4.8,3.0,1.4,0.3,Iris-setosa
+5.1,3.8,1.6,0.2,Iris-setosa
+4.6,3.2,1.4,0.2,Iris-setosa
+5.3,3.7,1.5,0.2,Iris-setosa
+5.0,3.3,1.4,0.2,Iris-setosa
+7.0,3.2,4.7,1.4,Iris-versicolor
+6.4,3.2,4.5,1.5,Iris-versicolor
+6.9,3.1,4.9,1.5,Iris-versicolor
+5.5,2.3,4.0,1.3,Iris-versicolor
+6.5,2.8,4.6,1.5,Iris-versicolor
+5.7,2.8,4.5,1.3,Iris-versicolor
+6.3,3.3,4.7,1.6,Iris-versicolor
+4.9,2.4,3.3,1.0,Iris-versicolor
+6.6,2.9,4.6,1.3,Iris-versicolor
+5.2,2.7,3.9,1.4,Iris-versicolor
+5.0,2.0,3.5,1.0,Iris-versicolor
+5.9,3.0,4.2,1.5,Iris-versicolor
+6.0,2.2,4.0,1.0,Iris-versicolor
+6.1,2.9,4.7,1.4,Iris-versicolor
+5.6,2.9,3.6,1.3,Iris-versicolor
+6.7,3.1,4.4,1.4,Iris-versicolor
+5.6,3.0,4.5,1.5,Iris-versicolor
+5.8,2.7,4.1,1.0,Iris-versicolor
+6.2,2.2,4.5,1.5,Iris-versicolor
+5.6,2.5,3.9,1.1,Iris-versicolor
+5.9,3.2,4.8,1.8,Iris-versicolor
+6.1,2.8,4.0,1.3,Iris-versicolor
+6.3,2.5,4.9,1.5,Iris-versicolor
+6.1,2.8,4.7,1.2,Iris-versicolor
+6.4,2.9,4.3,1.3,Iris-versicolor
+6.6,3.0,4.4,1.4,Iris-versicolor
+6.8,2.8,4.8,1.4,Iris-versicolor
+6.7,3.0,5.0,1.7,Iris-versicolor
+6.0,2.9,4.5,1.5,Iris-versicolor
+5.7,2.6,3.5,1.0,Iris-versicolor
+5.5,2.4,3.8,1.1,Iris-versicolor
+5.5,2.4,3.7,1.0,Iris-versicolor
+5.8,2.7,3.9,1.2,Iris-versicolor
+6.0,2.7,5.1,1.6,Iris-versicolor
+5.4,3.0,4.5,1.5,Iris-versicolor
+6.0,3.4,4.5,1.6,Iris-versicolor
+6.7,3.1,4.7,1.5,Iris-versicolor
+6.3,2.3,4.4,1.3,Iris-versicolor
+5.6,3.0,4.1,1.3,Iris-versicolor
+5.5,2.5,4.0,1.3,Iris-versicolor
+5.5,2.6,4.4,1.2,Iris-versicolor
+6.1,3.0,4.6,1.4,Iris-versicolor
+5.8,2.6,4.0,1.2,Iris-versicolor
+5.0,2.3,3.3,1.0,Iris-versicolor
+5.6,2.7,4.2,1.3,Iris-versicolor
+5.7,3.0,4.2,1.2,Iris-versicolor
+5.7,2.9,4.2,1.3,Iris-versicolor
+6.2,2.9,4.3,1.3,Iris-versicolor
+5.1,2.5,3.0,1.1,Iris-versicolor
+5.7,2.8,4.1,1.3,Iris-versicolor
+6.3,3.3,6.0,2.5,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+7.1,3.0,5.9,2.1,Iris-virginica
+6.3,2.9,5.6,1.8,Iris-virginica
+6.5,3.0,5.8,2.2,Iris-virginica
+7.6,3.0,6.6,2.1,Iris-virginica
+4.9,2.5,4.5,1.7,Iris-virginica
+7.3,2.9,6.3,1.8,Iris-virginica
+6.7,2.5,5.8,1.8,Iris-virginica
+7.2,3.6,6.1,2.5,Iris-virginica
+6.5,3.2,5.1,2.0,Iris-virginica
+6.4,2.7,5.3,1.9,Iris-virginica
+6.8,3.0,5.5,2.1,Iris-virginica
+5.7,2.5,5.0,2.0,Iris-virginica
+5.8,2.8,5.1,2.4,Iris-virginica
+6.4,3.2,5.3,2.3,Iris-virginica
+6.5,3.0,5.5,1.8,Iris-virginica
+7.7,3.8,6.7,2.2,Iris-virginica
+7.7,2.6,6.9,2.3,Iris-virginica
+6.0,2.2,5.0,1.5,Iris-virginica
+6.9,3.2,5.7,2.3,Iris-virginica
+5.6,2.8,4.9,2.0,Iris-virginica
+7.7,2.8,6.7,2.0,Iris-virginica
+6.3,2.7,4.9,1.8,Iris-virginica
+6.7,3.3,5.7,2.1,Iris-virginica
+7.2,3.2,6.0,1.8,Iris-virginica
+6.2,2.8,4.8,1.8,Iris-virginica
+6.1,3.0,4.9,1.8,Iris-virginica
+6.4,2.8,5.6,2.1,Iris-virginica
+7.2,3.0,5.8,1.6,Iris-virginica
+7.4,2.8,6.1,1.9,Iris-virginica
+7.9,3.8,6.4,2.0,Iris-virginica
+6.4,2.8,5.6,2.2,Iris-virginica
+6.3,2.8,5.1,1.5,Iris-virginica
+6.1,2.6,5.6,1.4,Iris-virginica
+7.7,3.0,6.1,2.3,Iris-virginica
+6.3,3.4,5.6,2.4,Iris-virginica
+6.4,3.1,5.5,1.8,Iris-virginica
+6.0,3.0,4.8,1.8,Iris-virginica
+6.9,3.1,5.4,2.1,Iris-virginica
+6.7,3.1,5.6,2.4,Iris-virginica
+6.9,3.1,5.1,2.3,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+6.8,3.2,5.9,2.3,Iris-virginica
+6.7,3.3,5.7,2.5,Iris-virginica
+6.7,3.0,5.2,2.3,Iris-virginica
+6.3,2.5,5.0,1.9,Iris-virginica
+6.5,3.0,5.2,2.0,Iris-virginica
+6.2,3.4,5.4,2.3,Iris-virginica
+5.9,3.0,5.1,1.8,Iris-virginica
+
--- a/Chapter13/iris_eval_svc_model/iris.names
+++ b/Chapter13/iris_eval_svc_model/iris.names
@ -0,0 +1,69 @@
+1. Title: Iris Plants Database
+	Updated Sept 21 by C.Blake - Added discrepency information
+
+2. Sources:
+     (a) Creator: R.A. Fisher
+     (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
+     (c) Date: July, 1988
+
+3. Past Usage:
+   - Publications: too many to mention!!!  Here are a few.
+   1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
+      Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
+      to Mathematical Statistics" (John Wiley, NY, 1950).
+   2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
+      (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.
+   3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
+      Structure and Classification Rule for Recognition in Partially Exposed
+      Environments".  IEEE Transactions on Pattern Analysis and Machine
+      Intelligence, Vol. PAMI-2, No. 1, 67-71.
+      -- Results:
+         -- very low misclassification rates (0% for the setosa class)
+   4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE 
+      Transactions on Information Theory, May 1972, 431-433.
+      -- Results:
+         -- very low misclassification rates again
+   5. See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al's AUTOCLASS II
+      conceptual clustering system finds 3 classes in the data.
+
+4. Relevant Information:
+   --- This is perhaps the best known database to be found in the pattern
+       recognition literature.  Fisher's paper is a classic in the field
+       and is referenced frequently to this day.  (See Duda & Hart, for
+       example.)  The data set contains 3 classes of 50 instances each,
+       where each class refers to a type of iris plant.  One class is
+       linearly separable from the other 2; the latter are NOT linearly
+       separable from each other.
+   --- Predicted attribute: class of iris plant.
+   --- This is an exceedingly simple domain.
+   --- This data differs from the data presented in Fishers article
+	(identified by Steve Chadwick,  spchadwick@espeedaz.net )
+	The 35th sample should be: 4.9,3.1,1.5,0.2,"Iris-setosa"
+	where the error is in the fourth feature.
+	The 38th sample: 4.9,3.6,1.4,0.1,"Iris-setosa"
+	where the errors are in the second and third features.  
+
+5. Number of Instances: 150 (50 in each of three classes)
+
+6. Number of Attributes: 4 numeric, predictive attributes and the class
+
+7. Attribute Information:
+   1. sepal length in cm
+   2. sepal width in cm
+   3. petal length in cm
+   4. petal width in cm
+   5. class: 
+      -- Iris Setosa
+      -- Iris Versicolour
+      -- Iris Virginica
+
+8. Missing Attribute Values: None
+
+Summary Statistics:
+	         Min  Max   Mean    SD   Class Correlation
+   sepal length: 4.3  7.9   5.84  0.83    0.7826   
+    sepal width: 2.0  4.4   3.05  0.43   -0.4194
+   petal length: 1.0  6.9   3.76  1.76    0.9490  (high!)
+    petal width: 0.1  2.5   1.20  0.76    0.9565  (high!)
+
+9. Class Distribution: 33.3% for each of 3 classes.
--- a/Chapter13/iris_eval_svc_model/iris_build_svm_model.py
+++ b/Chapter13/iris_eval_svc_model/iris_build_svm_model.py
@ -0,0 +1,24 @@
+#iris_build_svm_model.py
+from pandas import read_csv
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report
+from sklearn.metrics import accuracy_score
+from sklearn.svm import SVC
+
+data_file = "iris/iris.data"
+iris_names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
+df = read_csv(data_file, names=iris_names)
+
+X = df.drop('class', axis =1)
+y = df['class']
+X_train, X_test, y_train, y_test = train_test_split(X, y,
+				test_size=0.20, random_state=1, shuffle=True)
+
+# Make predictions
+model = SVC(gamma='auto')
+model.fit(X_train, y_train)
+predictions = model.predict(X_test)
+
+print(accuracy_score(y_test, predictions))
+print(classification_report(y_test, predictions))
+
--- a/Chapter13/iris_eval_svc_model/iris_data_analysis.py
+++ b/Chapter13/iris_eval_svc_model/iris_data_analysis.py
@ -0,0 +1,25 @@
+#iris_data_analysis.py
+from pandas import read_csv
+from matplotlib import pyplot
+from pandas.plotting import scatter_matrix
+
+data_file = "iris/iris.data"
+iris_names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
+df = read_csv(data_file, names=iris_names)
+
+print(df.shape)
+print(df.head(20))
+print(df.describe())
+print(df.groupby('class').size())
+
+# box and whisker plots
+df.plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)
+pyplot.show()
+
+# check the histograms
+df.hist()
+pyplot.show()
+
+# scatter plot matrix
+scatter_matrix(df)
+pyplot.show()
--- a/Chapter13/iris_eval_svc_model/iris_eval_svc_model.py
+++ b/Chapter13/iris_eval_svc_model/iris_eval_svc_model.py
@ -0,0 +1,34 @@
+#iris_eval_svc_model
+from pandas import read_csv
+from sklearn.metrics import classification_report
+from sklearn.model_selection import train_test_split, GridSearchCV,RandomizedSearchCV
+
+from sklearn.datasets import load_iris
+from sklearn.svm import SVC
+
+data_file = "iris.data"
+iris_names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
+df = read_csv(data_file, names=iris_names)
+
+X = df.drop('class', axis =1)
+y = df['class']
+X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.2,random_state=1, shuffle=True)
+
+params = {"C":[0.001, 0.01, 1, 5, 10, 100],
+              "gamma": [0.001, 0.01, 0.1, 1, 10, 100]}
+
+model1=SVC()
+rand_cv=RandomizedSearchCV(model1,params, n_iter = 5, cv=5)
+rand_cv.fit(X_train,y_train)
+print(f"RandomizedSearch - best parameter: {rand_cv.best_params_}")
+print(f"RandomizedSearch - accuracy: {rand_cv.best_score_}")
+print(classification_report(y_test,rand_cv.best_estimator_.predict(X_test)))
+
+model2=SVC()
+grid_cv=GridSearchCV(model2,params,  cv=5)
+grid_cv.fit(X_train,y_train)
+print(f"GridSearch- best parameter: {grid_cv.best_params_}")
+print(f"GridSearch- accuracy: {grid_cv.best_score_}")
+print(classification_report(y_test,grid_cv.best_estimator_.predict(X_test)))
+
+
--- a/Chapter13/iris_eval_svc_model/iris_save_load_grid_svm_modelv2.py
+++ b/Chapter13/iris_eval_svc_model/iris_save_load_grid_svm_modelv2.py
@ -0,0 +1,41 @@
+import pickle
+
+from sklearn.metrics import accuracy_score, classification_report
+from sklearn.model_selection import train_test_split, GridSearchCV,RandomizedSearchCV
+
+from sklearn.datasets import load_iris
+from sklearn.svm import SVC
+
+iris= load_iris()
+
+X = iris.data
+y = iris.target
+x_train, x_test, y_train, y_test=train_test_split(X,y,test_size=0.2)
+
+#the numpy. logspace() function returns number spaces evenly w.r.t interval on a log scale
+params = {"C":[0.001, 0.01, 1, 5, 10, 100],
+              "gamma": [0.001, 0.01, 0.1, 1, 10, 100]}
+model=SVC()
+grid_cv=GridSearchCV(model,params,  cv=5)
+grid_cv.fit(x_train,y_train)
+
+with open("svm_grid.pkl", 'wb') as file:
+	pickle.dump(grid_cv, file)
+
+with open("svm_grid.pkl", 'rb') as file:
+	loaded_grid_cv = pickle.load(file)
+
+print(f"GridSearch- best parameter: {loaded_grid_cv.best_params_}")
+print(f"GridSearch- accuracy: {loaded_grid_cv.best_score_}")
+
+
+with open("svm_grid_best.pkl", 'wb') as file:
+	pickle.dump(grid_cv.best_estimator_, file)
+
+with open("svm_grid_best.pkl", 'rb') as file:
+		loaded_model = pickle.load(file)
+		predictions = loaded_model.predict(x_test)
+
+		# Evaluate predictions
+		print(accuracy_score(y_test, predictions))
+		print(classification_report(y_test, predictions))
--- a/Chapter13/iris_eval_svc_model/iris_save_load_predict_gridmodel.py
+++ b/Chapter13/iris_eval_svc_model/iris_save_load_predict_gridmodel.py
@ -0,0 +1,31 @@
+#iris_save_load_predict_gridmodel.py
+
+import joblib
+from pandas import read_csv
+from sklearn.metrics import accuracy_score, classification_report
+from sklearn.model_selection import train_test_split, GridSearchCV,RandomizedSearchCV
+
+from sklearn.datasets import load_iris
+from sklearn.svm import SVC
+
+data_file = "iris.data"
+iris_names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
+df = read_csv(data_file, names=iris_names)
+
+X = df.drop('class', axis =1)
+y = df['class']
+x_train, x_test, y_train, y_test \
+	= train_test_split(X, y, test_size=0.20,
+					   random_state=1, shuffle=True)
+
+params = {"C":[0.001, 0.01, 1, 5, 10, 100],
+              "gamma": [0.001, 0.01, 0.1, 1, 10, 100]}
+model=SVC()
+grid_cv=GridSearchCV(model,params,  cv=5)
+grid_cv.fit(x_train,y_train)
+
+joblib.dump(grid_cv.best_estimator_, "model.joblib")
+loaded_model = joblib.load("model.joblib")
+x_new = [[5.6, 2.5, 3.9, 1.1]]
+y_new = loaded_model.predict(x_new)
+print("X=%s, Predicted=%s" % (x_new[0], y_new[0]))
--- a/Chapter13/iris_eval_svc_model/iris_save_load_predict_model.py
+++ b/Chapter13/iris_eval_svc_model/iris_save_load_predict_model.py
@ -0,0 +1,27 @@
+#iris_save_load_predict_model.py
+from pandas import read_csv
+from sklearn.model_selection import train_test_split
+from sklearn.svm import SVC
+import pickle
+
+data_file = "iris.data"
+iris_names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
+df = read_csv(data_file, names=iris_names)
+
+X = df.drop('class', axis =1)
+y = df['class']
+x_train, x_test, y_train, y_test \
+	= train_test_split(X, y, test_size=0.20,
+					   random_state=1, shuffle=True)
+
+model = SVC(gamma='auto')
+model.fit(x_train, y_train)
+
+with open("model.pkl", 'wb') as file:
+	pickle.dump(model, file)
+
+with open("model.pkl", 'rb') as file:
+	loaded_model = pickle.load(file)
+	x_new = [[5.6, 2.5, 3.9, 1.1]]
+	y_new = loaded_model.predict(x_new)
+	print("X=%s, Predicted=%s" % (x_new[0], y_new[0]))