Regression Prac 9
Regression Prac 9
Department of Mathematics
Practical 9
Regression Analysis
In [3]: df.head(5)
Out[3]:
Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA Research Chance of Admit
In [4]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 9 columns):
Serial No. 400 non-null int64
GRE Score 400 non-null int64
TOEFL Score 400 non-null int64
University Rating 400 non-null int64
SOP 400 non-null float64
LOR 400 non-null float64
CGPA 400 non-null float64
Research 400 non-null int64
Chance of Admit 400 non-null float64
dtypes: float64(4), int64(5)
memory usage: 28.2 KB
In [5]: df.describe()
Out[5]:
TOEFL University Chance of
Serial No. GRE Score SOP LOR CGPA Research
Score Rating Admit
count 400.000000 400.000000 400.000000 400.000000 400.000000 400.000000 400.000000 400.000000 400.000000
mean 200.500000 316.807500 107.410000 3.087500 3.400000 3.452500 8.598925 0.547500 0.724350
std 115.614301 11.473646 6.069514 1.143728 1.006869 0.898478 0.596317 0.498362 0.142609
min 1.000000 290.000000 92.000000 1.000000 1.000000 1.000000 6.800000 0.000000 0.340000
25% 100.750000 308.000000 103.000000 2.000000 2.500000 3.000000 8.170000 0.000000 0.640000
50% 200.500000 317.000000 107.000000 3.000000 3.500000 3.500000 8.610000 1.000000 0.730000
75% 300.250000 325.000000 112.000000 4.000000 4.000000 4.000000 9.062500 1.000000 0.830000
max 400.000000 340.000000 120.000000 5.000000 5.000000 5.000000 9.920000 1.000000 0.970000
Out[6]: Index(['Serial No.', 'GRE Score', 'TOEFL Score', 'University Rating', 'SOP',
'LOR ', 'CGPA', 'Research', 'Chance of Admit '],
dtype='object')
In [8]: x=df[['GRE Score', 'TOEFL Score', 'University Rating', 'SOP','LOR ', 'CGPA', 'Research']]
In [10]: df.shape
Out[10]: (400, 9)
Out[17]: LinearRegression()
In [19]: y_pred
In [20]: y_test
342 0.58
83 0.92
131 0.77
101 0.64
320 0.75
323 0.62
333 0.71
72 0.93
380 0.78
319 0.80
62 0.54
304 0.62
200 0.73
13 0.62
291 0.56
262 0.70
376 0.34
283 0.80
264 0.75
Name: Chance of Admit , Length: 120, dtype: float64
In [22]: mean_absolute_error(y_test,y_pred)
Out[22]: 0.04666971599519059
In [24]: per_e
Out[24]: 0.07354722873305462
In [26]: accuracy
Out[26]: 92.64527712669454
Out[27]: 0.003699173479770781
In [30]: model2.fit(x_train,y_train)
Out[30]: KNeighborsRegressor()
In [32]: y_pred2
Out[32]: array([0.804, 0.708, 0.796, 0.8 , 0.736, 0.64 , 0.644, 0.702, 0.57 ,
0.848, 0.786, 0.746, 0.684, 0.662, 0.734, 0.674, 0.526, 0.746,
0.716, 0.574, 0.542, 0.578, 0.91 , 0.702, 0.594, 0.642, 0.656,
0.942, 0.892, 0.92 , 0.756, 0.626, 0.91 , 0.83 , 0.72 , 0.95 ,
0.458, 0.528, 0.626, 0.92 , 0.542, 0.778, 0.826, 0.626, 0.768,
0.71 , 0.92 , 0.648, 0.94 , 0.514, 0.708, 0.714, 0.596, 0.92 ,
0.806, 0.532, 0.7 , 0.618, 0.686, 0.646, 0.812, 0.74 , 0.946,
0.796, 0.688, 0.682, 0.624, 0.86 , 0.916, 0.534, 0.708, 0.72 ,
0.94 , 0.552, 0.94 , 0.608, 0.74 , 0.65 , 0.652, 0.924, 0.83 ,
0.666, 0.68 , 0.694, 0.598, 0.746, 0.694, 0.592, 0.64 , 0.884,
0.516, 0.642, 0.462, 0.796, 0.928, 0.532, 0.63 , 0.744, 0.658,
0.66 , 0.658, 0.652, 0.86 , 0.646, 0.618, 0.674, 0.594, 0.738,
0.882, 0.73 , 0.776, 0.684, 0.628, 0.672, 0.592, 0.606, 0.676,
0.484, 0.784, 0.762])
In [33]: y_test
Out[33]: 26 0.76
258 0.77
128 0.84
126 0.85
6 0.75
293 0.64
110 0.61
20 0.64
57 0.46
133 0.79
48 0.82
53 0.72
85 0.76
38 0.52
261 0.71
181 0.71
30 0.65
398 0.67
281 0.80
159 0 52
In [34]: error2 = mean_absolute_percentage_error(y_test,y_pred2)
In [35]: error2
Out[35]: 0.09038456485463613
In [37]: accuracy2
Out[37]: 90.96154351453639
Model 3 DecisionTreeRegressor
In [38]: from sklearn.tree import DecisionTreeRegressor
Out[40]: DecisionTreeRegressor()
In [42]: y_pred3
Out[42]: array([0.78, 0.76, 0.79, 0.91, 0.79, 0.71, 0.64, 0.66, 0.44, 0.76, 0.69,
0.7 , 0.64, 0.57, 0.48, 0.64, 0.44, 0.74, 0.86, 0.59, 0.57, 0.63,
0.93, 0.71, 0.68, 0.64, 0.57, 0.9 , 0.86, 0.97, 0.72, 0.61, 0.93,
0.78, 0.81, 0.89, 0.47, 0.63, 0.72, 0.93, 0.54, 0.79, 0.87, 0.61,
0.86, 0.42, 0.89, 0.65, 0.94, 0.54, 0.7 , 0.73, 0.54, 0.89, 0.79,
0.62, 0.74, 0.65, 0.73, 0.65, 0.8 , 0.72, 0.95, 0.82, 0.72, 0.73,
0.65, 0.88, 0.91, 0.57, 0.65, 0.64, 0.89, 0.44, 0.91, 0.65, 0.82,
0.73, 0.61, 0.94, 0.86, 0.64, 0.67, 0.66, 0.61, 0.92, 0.69, 0.63,
0.62, 0.89, 0.49, 0.47, 0.47, 0.69, 0.95, 0.58, 0.71, 0.68, 0.66,
0.54, 0.47, 0.61, 0.93, 0.64, 0.72, 0.66, 0.59, 0.71, 0.86, 0.77,
0.8 , 0.72, 0.53, 0.71, 0.46, 0.54, 0.69, 0.44, 0.77, 0.79])
In [43]: y_test
Out[43]: 26 0.76
258 0.77
128 0.84
126 0.85
6 0.75
293 0.64
110 0.61
20 0.64
57 0.46
133 0.79
48 0.82
53 0.72
85 0.76
38 0.52
261 0.71
181 0.71
30 0.65
398 0.67
281 0.80
159 0.52
118 0.47
179 0.73
399 0.95
340 0.75
256 0.76
86 0.72
137 0.71
286 0.92
69 0.78
143 0.97
...
168 0.64
109 0.68
79 0.46
360 0.85
212 0.95
8 0.50
226 0.63
334 0.73
393 0.77
387 0.53
269 0.77
342 0.58
83 0.92
131 0.77
101 0.64
320 0.75
323 0.62
333 0.71
72 0.93
380 0.78
319 0.80
62 0.54
304 0.62
200 0.73
13 0.62
291 0.56
262 0.70
376 0.34
283 0.80
264 0.75
Name: Chance of Admit , Length: 120, dtype: float64
In [45]: error3
Out[45]: 0.1074076957985312
In [47]: accuracy3
Out[47]: 89.25923042014688
Model 4 RandomForestRegressor
In [48]: from sklearn.ensemble import RandomForestRegressor
Out[50]: RandomForestRegressor()
In [51]: y_pred4=model4.predict(x_test)
In [52]: y_pred4
In [53]: y_test
Out[53]: 26 0.76
258 0.77
128 0.84
126 0.85
6 0.75
293 0.64
110 0.61
20 0.64
57 0.46
133 0.79
48 0.82
53 0.72
85 0.76
38 0.52
261 0.71
181 0.71
30 0.65
398 0.67
281 0.80
159 0 52
In [54]: error4 = mean_absolute_percentage_error(y_test,y_pred4)
In [56]: accuracy4
Out[56]: 91.9966051693334
Linear Regression
In [57]: accuracy
Out[57]: 92.64527712669454
KNeighborsRegressor
In [58]: accuracy2
Out[58]: 90.96154351453639
DecisionTreeRegressor
In [59]: accuracy3
Out[59]: 89.25923042014688
In [ ]:
RandomForestRegressor
In [60]: accuracy4
Out[60]: 91.9966051693334
In [ ]: