# import and instantiate model from sklearn.linear_model import LinearRegression model = LinearRegression() #prepare test data features_train = df_train.loc[:, 'feature_name'] target_train = df_train.loc[:, 'target_name'] #fit (train) model and print coefficient and intercept model.fit(features_train , target_train ) print(model.coef_) print(model.intercept_) # calculate model quality from sklearn.metrics import mean_squared_error from sklearn.metrics import r2_score target_prediction = model.predict(features_train) print(mean_squared_error(target_train , target_prediction)) print(r2_score(target_train , target_prediction)) # test predictions features_test = df_train.loc[:, 'feature_name'] target_test = df_train.loc[:, 'target_name'] target_prediction_test = model.predict(features_test) print(mean_squared_error(target_test, target_prediction_test )) print(r2_score(target_test, target_prediction_test ))
Confusion Matrix
Too confused of the confusion matrix?
Let me bring some clarity into this topic!
Data Science Datasets: Iris flower data set
The Iris flower data set or Fisher’s Iris data set became a typical test case for many statistical classification techniques in machine learning such as support vector machines.
It is sometimes called Anderson’s Iris data set because Edgar Anderson collected the data to quantify the morphological variation of Iris flowers of three related species.
This data set can be imported from scikit-learn like the following:
from sklearn import datasets iris = datasets.load_iris() iris.data.shape, iris.target.shape