import numpy as np # just an opensource version of mini-matlab for matrix mult and stuffs
import matplotlib.pyplot as plt # plotting library
import matplotlib.patches as mpatches # cool patches
from mpl_toolkits.mplot3d import Axes3D # 3D stuffs
from sklearn import datasets, linear_model # ML lib
from sklearn.datasets import make_regression # autocreation of dataset and stuffs
from sklearn.linear_model import LinearRegression # Linear Regression module
from sklearn.preprocessing import PolynomialFeatures # ploynomial regression
from sklearn.metrics import mean_squared_error, r2_score # for generating RMSE and other metrics for eval.
Artificially generated 1D dataset, using y = $m$x + $\epsilon$. Here, Gaussian noise $\epsilon$ = 6.
X, y = make_regression(n_samples=100, n_features=1, noise=6)
# plot regression dataset
fig, ax = plt.subplots()
ax.set_title('The plot for the whole dataset')
scatter = ax.scatter(X,y,color='#4224eb')
handles, labels = scatter.legend_elements(prop="sizes", alpha=0.6)
red_patch = mpatches.Patch(color='#4224eb', label='1D dataset')
plt.legend(handles=[red_patch],loc="lower right", title="Legend")
ax.grid(True)
plt.show()
# Split the data into training/testing sets
X_train = X[:-20]
X_test = X[-20:]
# Split the targets into training/testing sets
y_train = y[:-20]
y_test = y[-20:]
# Create linear regression object
regr = LinearRegression()
# Train the model using the training sets
regr.fit(X_train, y_train)
# Make predictions using the testing set
y_pred = regr.predict(X_test)
print(y_pred)
# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print('Root Mean squared error [RMSE]: %.2f'
% np.sqrt(mean_squared_error(y_test, y_pred)))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f'
% r2_score(y_test, y_pred))
# Plot outputs
fig, ax = plt.subplots()
ax.set_title('The plot for the test set')
scatter = ax.scatter(X_test, y_test,color='#4224eb')
handles, labels = scatter.legend_elements(prop="sizes", alpha=0.6)
red_patch = mpatches.Patch(color='#4224eb', label='Test set')
plt.legend(handles=[red_patch],loc="lower right", title="Legend")
plt.plot(X_test, y_pred, color='#f90909', linewidth=3)
# plt.xticks(())
# plt.yticks(())
ax.grid(True)
plt.show()
${h _\theta }\left( x \right) = {\theta _0} + {\theta _1}{x _1} + {\theta _2}x _1^2 + {\theta _3}x _1^3 + ... + {\theta _n}x _1^n$
X, y = make_regression(n_samples=100, n_features=4, noise=6)
X
X[:,0]
# Split the data into training/testing sets
X_train = X[:-20]
X_test = X[-20:]
# Split the targets into training/testing sets
y_train = y[:-20]
y_test = y[-20:]
polynomial_features= PolynomialFeatures(degree=4)
x_poly = polynomial_features.fit_transform(X_train)
x_test_poly = polynomial_features.fit_transform(X_test)
model = LinearRegression()
model.fit(x_poly, y_train)
y_poly_pred = model.predict(x_test_poly)
rmse = np.sqrt(mean_squared_error(y_test,y_poly_pred))
r2 = r2_score(y_test,y_poly_pred)
print("Root Mean Squared Error [RMSE] : ",rmse)
print("R-2 score : ",r2)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
img = ax.scatter(X_test[:,0], X_test[:,1], X_test[:,2], c=X_test[:,3], cmap=plt.hot())
fig.colorbar(img)
plt.show()
import matplotlib
from scipy.interpolate import griddata
from matplotlib import cm
name_color_map = 'seismic';
list_name_variables = ['x', 'y', 'z', 'c'];
index_x = 0; index_y = 1; index_z = 2; index_c = 3;
# X-Y are transformed into 2D grids. It's like a form of interpolation
X_test_1 = np.linspace(X_test[:,0].min(), X_test[:,0].max(), len(np.unique(X_test[:,0])));
y_test = np.linspace(y_test.min(), y_test.max(), len(np.unique(y_test)));
x2, y2 = np.meshgrid(X_test_1, y_test);
# Interpolation of Z: old X-Y to the new X-Y grid.
# Note: Sometimes values can be < z.min and so it may be better to set
# the values too low to the true minimum value.
z2 = griddata( (X_test[:,0], y_test), X_test[:,1], (x2, y2), method='cubic', fill_value = 0);
z2[z2 < X_test[:,1].min()] = X_test[:,1].min();
# Interpolation of C: old X-Y on the new X-Y grid (as we did for Z)
# The only problem is the fact that the interpolation of C does not take
# into account Z and that, consequently, the representation is less
# valid compared to the previous solutions.
c2 = griddata( (X_test[:,0], y_test), X_test[:,2], (x2, y2), method='cubic', fill_value = 0);
c2[c2 < X_test[:,2].min()] = X_test[:,2].min();
#--------
color_dimension = c2; # It must be in 2D - as for "X, Y, Z".
minn, maxx = color_dimension.min(), color_dimension.max();
norm = matplotlib.colors.Normalize(minn, maxx);
m = plt.cm.ScalarMappable(norm=norm, cmap = name_color_map);
m.set_array([]);
fcolors = m.to_rgba(color_dimension);
# At this time, X-Y-Z-C are all 2D and we can use "plot_surface".
fig = plt.figure(); ax = fig.gca(projection='3d');
surf = ax.plot_surface(x2, y2, z2, facecolors = fcolors, linewidth=0, rstride=1, cstride=1,
antialiased=False);
cbar = fig.colorbar(m, shrink=0.5, aspect=5);
cbar.ax.get_yaxis().labelpad = 15; cbar.ax.set_ylabel(list_name_variables[index_c], rotation = 270);
ax.set_xlabel(list_name_variables[index_x]); ax.set_ylabel(list_name_variables[index_y]);
ax.set_zlabel(list_name_variables[index_z]);
plt.title('%s in fcn of %s, %s and %s' % (list_name_variables[index_c], list_name_variables[index_x], list_name_variables[index_y], list_name_variables[index_z]) );
plt.show();