Missing Values - Weather Data
Missing Values - Weather Data
-----------------------------------------------------------------------------------
--------------------------------------------------
# Importing the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)
weather_bk = weather.copy()
weather.head()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the dataset information
weather.info()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Identify the missing data in all variables
weather.isnull().sum()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Use KNNImputer to address missing values
weather['Sunshine'] = imputer_int.fit_transform(weather[['Sunshine']])
-----------------------------------------------------------------------------------
--------------------------------------------------
# Use SimpleImputer to address missing values
weather.info()
-----------------------------------------------------------------------------------
--------------------------------------------------
# "Groupby" we have important information about the possible conditions that may or
may not cause rain condition
weather[['Rainfall','Sunshine','Evaporation','WindGustSpeed','WindSpeed9am','WindSp
eed3pm','Humidity9am',
'Humidity3pm','Pressure9am','Pressure3pm','Cloud9am','Cloud3pm','Temp9am',
'Temp3pm']].groupby(weather['RainToday']).mean()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the WindGustDir by sub-variable count
weather['WindGustDir'].value_counts()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Check the null values
weather.isnull().sum()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the 'weather' dataset variables by sub-variable count
for i in weather.columns:
print(weather[i].value_counts())
-----------------------------------------------------------------------------------
--------------------------------------------------
# Identify the numerical and categorical variables
num_vars = weather.columns[weather.dtypes != 'object']
cat_vars = weather.columns[weather.dtypes == 'object']
print(num_vars)
print(cat_vars)
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the WindGustDir by sub-variable count
weather['WindGustDir'].value_counts()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Use LabelEncoder to handle categorical data
LE = LabelEncoder()
weather['WindGustDir'] = LE.fit_transform(weather[['WindGustDir']])
weather['WindDir9am'] = LE.fit_transform(weather[['WindDir9am']])
weather['WindDir3pm'] = LE.fit_transform(weather[['WindDir3pm']])
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the RainToday by sub-variable count
weather['RainToday'].value_counts()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the RainTomorrow by sub-variable count
weather['RainTomorrow'].value_counts()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Use LabelBinarizer for target variables
LB = LabelBinarizer()
weather['RainToday'] = LB.fit_transform(weather['RainToday'])
weather['RainTomorrow'] = LB.fit_transform(weather['RainTomorrow'])
weather.head()
-----------------------------------------------------------------------------------
--------------------------------------------------
-----------------------------------------------------------------------------------
--------------------------------------------------
-----------------------------------------------------------------------------------
--------------------------------------------------
-----------------------------------------------------------------------------------
--------------------------------------------------
-----------------------------------------------------------------------------------
--------------------------------------------------
-----------------------------------------------------------------------------------
--------------------------------------------------
-----------------------------------------------------------------------------------
--------------------------------------------------
-----------------------------------------------------------------------------------
--------------------------------------------------
-----------------------------------------------------------------------------------
--------------------------------------------------