0% found this document useful (0 votes)
35 views4 pages

Missing Values - Weather Data

Uploaded by

rayachotiusa
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
35 views4 pages

Missing Values - Weather Data

Uploaded by

rayachotiusa
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

Missing Values - Weather Data:

-----------------------------------------------------------------------------------
--------------------------------------------------
# Importing the libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Ignore harmless warnings

import warnings
warnings.filterwarnings("ignore")

# Set to display all the columns in dataset

pd.set_option("display.max_columns", None)

# Import psql to run queries

import pandasql as psql


-----------------------------------------------------------------------------------
--------------------------------------------------
# Load the weather dataset

weather = pd.read_csv(r"E:\R3SPAnalytics\00 IIT KGP Hyd\00-LATA\Sessions\Session-


07\weather.csv", header=0)

# Copy to back-up file

weather_bk = weather.copy()

# Display first 5 records

weather.head()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the dataset information

weather.info()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Identify the missing data in all variables

weather.isnull().sum()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Use KNNImputer to address missing values

from sklearn.impute import KNNImputer

imputer_int = KNNImputer(missing_values=np.nan, n_neighbors=5, weights='uniform',


metric='nan_euclidean',
copy=True, add_indicator=False)

weather['Sunshine'] = imputer_int.fit_transform(weather[['Sunshine']])
-----------------------------------------------------------------------------------
--------------------------------------------------
# Use SimpleImputer to address missing values

from sklearn.impute import SimpleImputer

imputer_str = SimpleImputer(missing_values=np.nan, strategy='most_frequent',


fill_value=None, verbose=0,
copy=True, add_indicator=False)
#Claims_BK1['Policy_Type'] =
imputer_si.fit_transform(Claims_BK1['Policy_Type'].values.reshape(-1,1))[:,0]
weather['WindGustDir'] =
imputer_str.fit_transform(weather['WindGustDir'].values.reshape(-1,1))[:,0]
weather['WindGustSpeed'] =
imputer_int.fit_transform(weather['WindGustSpeed'].values.reshape(-1,1))[:,0]
weather['WindDir9am'] =
imputer_str.fit_transform(weather['WindDir9am'].values.reshape(-1,1))[:,0]
weather['WindDir3pm'] =
imputer_str.fit_transform(weather['WindDir3pm'].values.reshape(-1,1))[:,0]
weather['WindSpeed9am'] =
imputer_int.fit_transform(weather['WindSpeed9am'].values.reshape(-1,1))[:,0]
-----------------------------------------------------------------------------------
--------------------------------------------------
# Drop the RISK_MM variable

weather.drop('RISK_MM', inplace=True, axis=1)


weather.head()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Info after addressing the missing values

weather.info()
-----------------------------------------------------------------------------------
--------------------------------------------------
# "Groupby" we have important information about the possible conditions that may or
may not cause rain condition

weather[['Rainfall','Sunshine','Evaporation','WindGustSpeed','WindSpeed9am','WindSp
eed3pm','Humidity9am',
'Humidity3pm','Pressure9am','Pressure3pm','Cloud9am','Cloud3pm','Temp9am',
'Temp3pm']].groupby(weather['RainToday']).mean()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the WindGustDir by sub-variable count

weather['WindGustDir'].value_counts()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Check the null values

weather.isnull().sum()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the 'weather' dataset variables by sub-variable count

for i in weather.columns:
print(weather[i].value_counts())
-----------------------------------------------------------------------------------
--------------------------------------------------
# Identify the numerical and categorical variables
num_vars = weather.columns[weather.dtypes != 'object']
cat_vars = weather.columns[weather.dtypes == 'object']
print(num_vars)
print(cat_vars)
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the WindGustDir by sub-variable count

weather['WindGustDir'].value_counts()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Use LabelEncoder to handle categorical data

from sklearn.preprocessing import LabelEncoder

LE = LabelEncoder()

weather['WindGustDir'] = LE.fit_transform(weather[['WindGustDir']])
weather['WindDir9am'] = LE.fit_transform(weather[['WindDir9am']])
weather['WindDir3pm'] = LE.fit_transform(weather[['WindDir3pm']])
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the RainToday by sub-variable count

weather['RainToday'].value_counts()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the RainTomorrow by sub-variable count

weather['RainTomorrow'].value_counts()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Use LabelBinarizer for target variables

from sklearn.preprocessing import LabelBinarizer

LB = LabelBinarizer()

weather['RainToday'] = LB.fit_transform(weather['RainToday'])
weather['RainTomorrow'] = LB.fit_transform(weather['RainTomorrow'])

weather.head()
-----------------------------------------------------------------------------------
--------------------------------------------------

-----------------------------------------------------------------------------------
--------------------------------------------------

-----------------------------------------------------------------------------------
--------------------------------------------------

-----------------------------------------------------------------------------------
--------------------------------------------------

-----------------------------------------------------------------------------------
--------------------------------------------------

-----------------------------------------------------------------------------------
--------------------------------------------------
-----------------------------------------------------------------------------------
--------------------------------------------------

-----------------------------------------------------------------------------------
--------------------------------------------------

-----------------------------------------------------------------------------------
--------------------------------------------------

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy