20MIS1025 - Regression - Ipynb - Colaboratory
20MIS1025 - Regression - Ipynb - Colaboratory
ipynb - Colaboratory
Overview
from IPython.display import Image
%matplotlib inline
Source: https://archive.ics.uci.edu/ml/datasets/Housing
Attributes:
import pandas as pd
#df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data', header=None, sep='\s+')
df = pd.read_csv('KDD_Train.csv')
X=df.iloc[: , 22:26].values
y=df.iloc[:, 27].values
print(X)
[[ 2. 2. 0. 0.]
[ 13. 1. 0. 0.]
[123. 6. 1. 1.]
...
[ 1. 1. 0. 0.]
https://colab.research.google.com/drive/12StZd_gIxuO71hxieKEPhxYWmM4cpSxr#scrollTo=kbWX24mXbhQz&printMode=true 1/5
8/23/23, 11:42 PM 20MIS1025_Regression.ipynb - Colaboratory
[144. 8. 1. 1.]
[ 1. 1. 0. 0.]]
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid', context='notebook')
cols = ['count', 'srv_count', 'serror_rate']
sns.pairplot(df[cols], height=2.5)
plt.tight_layout()
# plt.savefig('./figures/scatter.png', dpi=300)
plt.show()
import numpy as np
cm = np.corrcoef(df[cols].values.T)
sns.set(font_scale=1.5)
hm = sns.heatmap(cm,
cbar=True,
annot=True,
square=True,
fmt='.2f',
annot_kws={'size': 15},
yticklabels=cols,
xticklabels=cols)
# plt.tight_layout()
# plt.savefig('./figures/corr_mat.png', dpi=300)
plt.show()
https://colab.research.google.com/drive/12StZd_gIxuO71hxieKEPhxYWmM4cpSxr#scrollTo=kbWX24mXbhQz&printMode=true 2/5
8/23/23, 11:42 PM 20MIS1025_Regression.ipynb - Colaboratory
sns.reset_orig()
%matplotlib inline
from sklearn.linear_model import LinearRegression
X = df[['count']].values
y = df['srv_count'].values
from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()
sc_y = StandardScaler()
X_std = sc_x.fit_transform(X)
y_std = sc_y.fit_transform(y[:, np.newaxis]).flatten()
y_std
slr = LinearRegression()
slr.fit(X, y)
y_pred = slr.predict(X)
print('Slope: %.3f' % slr.coef_[0])
print('Intercept: %.3f' % slr.intercept_)
Slope: 0.299
Intercept: 2.605
y_pred
def lin_regplot(X, y, model):
plt.scatter(X, y, c='lightblue')
plt.plot(X, model.predict(X), color='red', linewidth=2)
return
lin_regplot(X, y, slr)
plt.xlabel('[count]')
plt.ylabel('[srv_count]')
plt.tight_layout()
# plt.savefig('./figures/scikit_lr_fit.png', dpi=300)
plt.show()
https://colab.research.google.com/drive/12StZd_gIxuO71hxieKEPhxYWmM4cpSxr#scrollTo=kbWX24mXbhQz&printMode=true 3/5
8/23/23, 11:42 PM 20MIS1025_Regression.ipynb - Colaboratory
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
slr = LinearRegression()
slr.fit(X_train, y_train)
y_train_pred = slr.predict(X_train)
y_test_pred = slr.predict(X_test)
plt.scatter(y_train_pred, y_train_pred - y_train,
c='blue', marker='o', label='Training data')
plt.scatter(y_test_pred, y_test_pred - y_test,
c='lightgreen', marker='s', label='Test data')
plt.xlabel('Predicted values')
plt.ylabel('Residuals')
plt.legend(loc='upper left')
plt.hlines(y=0, xmin=-10, xmax=50, lw=2, color='red')
plt.xlim([-10, 50])
plt.tight_layout()
# plt.savefig('./figures/slr_residuals.png', dpi=300)
plt.show()
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
https://colab.research.google.com/drive/12StZd_gIxuO71hxieKEPhxYWmM4cpSxr#scrollTo=kbWX24mXbhQz&printMode=true 4/5
8/23/23, 11:42 PM 20MIS1025_Regression.ipynb - Colaboratory
print('MSE train: %.3f, test: %.3f' % (
mean_squared_error(y_train, y_train_pred),
mean_squared_error(y_test, y_test_pred)))
print('R^2 train: %.3f, test: %.3f' % (
r2_score(y_train, y_train_pred),
r2_score(y_test, y_test_pred)))
from sklearn.linear_model import Lasso
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)
y_train_pred = lasso.predict(X_train)
y_test_pred = lasso.predict(X_test)
print(lasso.coef_)
[0.29923578]
print('MSE train: %.3f, test: %.3f' % (
mean_squared_error(y_train, y_train_pred),
mean_squared_error(y_test, y_test_pred)))
print('R^2 train: %.3f, test: %.3f' % (
r2_score(y_train, y_train_pred),
r2_score(y_test, y_test_pred)))
C l b id d t C l t t h
check 0s completed at 11:41 PM
https://colab.research.google.com/drive/12StZd_gIxuO71hxieKEPhxYWmM4cpSxr#scrollTo=kbWX24mXbhQz&printMode=true 5/5