Cars Sales Dashboard
Cars Sales Dashboard
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [3]: cars_data.shape
Data Cleaning
In [4]: cars_data.isna().sum()
Out[4]: Manufacturer 0
Model 0
Sales_in_thousands 0
__year_resale_value 36
Vehicle_type 0
Price_in_thousands 2
Engine_size 1
Horsepower 1
Wheelbase 1
Width 1
Length 1
Curb_weight 2
Fuel_capacity 1
Fuel_efficiency 3
Latest_Launch 0
Power_perf_factor 2
dtype: int64
In [6]: cars_data['__year_resale_value'].fillna(cars_data['__year_resale_value'].median(),i
cars_data.isna().sum()
Out[6]: Manufacturer 0
Model 0
Sales_in_thousands 0
__year_resale_value 0
Vehicle_type 0
Price_in_thousands 2
Engine_size 1
Horsepower 1
Wheelbase 1
Width 1
Length 1
Curb_weight 2
Fuel_capacity 1
Fuel_efficiency 3
Latest_Launch 0
Power_perf_factor 2
dtype: int64
threhold : 7.8500000000000005
Out[7]: Manufacturer 0
Model 0
Sales_in_thousands 0
__year_resale_value 0
Vehicle_type 0
Price_in_thousands 0
Engine_size 0
Horsepower 0
Wheelbase 0
Width 0
Length 0
Curb_weight 0
Fuel_capacity 0
Fuel_efficiency 0
Latest_Launch 0
Power_perf_factor 0
dtype: int64
In [8]: cars_data.describe()
Out[8]: Sales_in_thousands __year_resale_value Price_in_thousands Engine_size Horsepower
Distribution Analysis
In [14]: df_num = cars_data.select_dtypes(include = ['float64', 'int64'])
print(df_num.columns)
df_num = df_num.drop(columns=["__year_resale_value","Price_in_thousands"])
print(df_num.columns)