Data Preprocessing
Data Preprocessing
import numpy as np
df=pd.read_excel(r"/content/Untitled spreadsheet.xlsx")
df
print("Independent data")
print(df.iloc[:,:-1])
print("dependent data")
print(df.iloc[:,-1])
Independent data
roll no. attendance percentage CPI
0 2042000101 77 6.7
1 2042000102 61 7.4
2 2042000103 95 7.0
3 2042000104 85 7.6
4 2042000105 96 8.3
5 2042000106 70 8.4
6 2042000107 68 9.2
7 2042000108 95 6.2
8 2042000109 43 5.9
9 2042000110 75 7.8
dependent data
0 NO
1 NO
2 NO
3 YES
4 YES
5 YES
6 YES
7 YES
8 NO
9 YES
Name: PLACED, dtype: object
print("Mean of cpi:",np.mean(df['CPI']))
print("Median of cpi:",np.median(df['CPI']))
print("Mean of attendance percantage:",np.mean(df['attendance
percentage']))
print("Median of attendance percantage:",np.median(df['attendance
percentage']))
mean_value=np.mean(df['attendance percentage'])
df1['attendance percentage'].fillna(value=mean_value, inplace=True)
df1
median_value=np.median(df['CPI'])
df1['CPI'].fillna(value=median_value, inplace=True)
df1
mode_value=df['PLACED'].mode()[0]
df1['PLACED'].fillna(value=mode_value, inplace=True)
df1
df['PLACED'].mode()[0]
{"type":"string"}
import pandas as pd
df2=pd.read_excel(r"/content/Untitled spreadsheet.xlsx")
from sklearn.preprocessing import OneHotEncoder
ohe=OneHotEncoder()
x=ohe.fit_transform(df2).toarray()
x
array([[1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0.],
[0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1.],
[0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0.],
[0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0.],
[0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0.]])
import pandas as pd
df4=pd.read_excel(r"/content/program6.xlsx")
from sklearn.preprocessing import MinMaxScaler
s=MinMaxScaler()
print(s.fit_transform(df4))