from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
%matplotlib inline
sns.set()
X1 = np.random.multivariate_normal(np.array([5,5]),np.array([[4,0],[0,1]]),200).reshape(200,2)
y1 = np.ones(200).reshape(200,1)
#d1 = np.concatenate([X1,y1],axis=1)
print(X1[:10])
#print(d1[:10])
X2 = np.random.multivariate_normal(np.array([5,10]),np.array([[3,0],[0,1]]),200).reshape(200,2)
y2 = np.ones(200).reshape(200,1)*2
#d2 = np.concatenate([X2,y2],axis=1)
#d2[:20]
X3 = np.random.multivariate_normal(np.array([10,15]),np.array([[4,0],[0,4]]),200).reshape(200,2)
y3 = np.ones(200).reshape(200,1)*3
#d3 = np.concatenate([X3,y3],axis=1)
#d3[:20]
X4 = np.random.multivariate_normal(np.array([16,5]),np.array([[2,0],[0,2]]),200).reshape(200,2)
y4 = np.ones(200).reshape(200,1)*4
#d4 = np.concatenate([X4,y4],axis=1)
#d4[:20]
# full_dataset = np.append(d1,d2,axis=0)
# full_dataset = np.append(full_dataset,d3,axis=0)
# full_dataset = np.append(full_dataset,d4,axis=0)
# full_dataset[-20:]
fig = plt.figure(figsize=(16,9))
#ax = plt.axes(projection='3d')
#ax.scatter(full_dataset[:,0],full_dataset[:,1], c=full_dataset[:,2], cmap='viridis', linewidth=0.5);
plt.scatter(list(X1[:,0]), list(X1[:,1]), marker='^',color='red')
plt.scatter(list(X2[:,0]), list(X2[:,1]), marker='o',color='green')
plt.scatter(list(X3[:,0]), list(X3[:,1]), marker='.',color='yellow')
plt.scatter(list(X4[:,0]), list(X4[:,1]), marker='+',color='blue')
plt.show()
X2[-40:].shape
#X_train = []
# Split the data into training/testing sets
X_train = np.append(X1[:-40],X2[:-40],axis=0)
X_train = np.append(X_train,X3[:-40],axis=0)
X_train = np.append(X_train,X4[:-40],axis=0)
#X_test = list(X1[-40:])
X_test = np.append(X1[-40:],X2[-40:],axis=0)
X_test = np.append(X_test,X3[-40:],axis=0)
X_test = np.append(X_test,X4[-40:],axis=0)
y_train = np.append(y1[:-40],y2[:-40],axis=0)
y_train = np.append(y_train,y3[:-40],axis=0)
y_train = np.append(y_train,y4[:-40],axis=0)
#X_test = list(X1[-40:])
y_test = np.append(y1[-40:],y2[-40:],axis=0)
y_test = np.append(y_test,y3[-40:],axis=0)
y_test = np.append(y_test,y4[-40:],axis=0)
X_train.shape
y_train.shape
X_test.shape
y_test.shape
logreg = LogisticRegression(C=1e5)
# Create an instance of Logistic Regression Classifier and fit the data.
logreg.fit(X_train, y_train)
prediction = logreg.predict(X_test)
print(prediction)
#print(y_test)
$Accuracy$ = $\frac{number of correct predictions}{total predictions}$
# no of correct predictions
cp=0
for x_,y_ in zip(prediction,y_test):
#print(x_,y_)
if x_ == y_:
cp = cp+1
else:
print("prediction was class : ",int(x_)," but actual class is : ",int(y_))
print("\n\nNo of correct predictions = ",cp)
accuracy = cp/prediction.size
print("accuracy : ",accuracy)
fig = plt.figure(figsize=(16,9))
#ax = plt.axes(projection='3d')
#ax.scatter(full_dataset[:,0],full_dataset[:,1], c=full_dataset[:,2], cmap='viridis', linewidth=0.5);
c = ['null','#fc6b03','#03fc28','#a7ab2e','#03e8fc']
m = ['null','^','o','.','+']
for x_,y_ in zip(X_test,prediction):
#print(x_," - ",y_)
plt.scatter(x_[0], x_[1], marker=m[int(y_)],color=c[int(y_)],s=150)
plt.scatter(list(X1[:-40,0]), list(X1[:-40,1]), marker='^',color='red')
plt.scatter(list(X2[:-40,0]), list(X2[:-40,1]), marker='o',color='green')
plt.scatter(list(X3[:-40,0]), list(X3[:-40,1]), marker='.',color='yellow')
plt.scatter(list(X4[:-40,0]), list(X4[:-40,1]), marker='+',color='blue')
plt.show()