0% found this document useful (0 votes)

37 views10 pages

Boston House Prediction - Colab1

kaggle lab sheet boston house prediction

Uploaded by

Avinash Official

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

37 views10 pages

Boston House Prediction - Colab1

kaggle lab sheet boston house prediction

Uploaded by

Avinash Official

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 10

Avinash Shukla (27)

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KA # TO THE CORRECT LOCATION
(/kaggle/input) IN YOUR NOT # THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE # ENVIRONMENT SO THERE MAY
BE MISSING LIBRARIES USED # NOTEBOOK.

import os import sys

from tempfile import NamedTemporaryFile from urllib.request import urlopen
from urllib.parse import unquote, urlparse from urllib.error import HTTPError
from zipfile import ZipFile import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'boston-house-price-prediction:

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working' KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null

shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True) os.makedirs(KAGGLE_WORKING_PATH,
0o777, exist_ok=True

try:
os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'i except FileExistsError:
pass try:
os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", except FileExistsError:
pass
Avinash Shukla (27)
for data_source_mapping in DATA_SOURCE_MAPPING.split( directory, download_url_encoded =
data_source_map download_url = unquote(download_url_encoded)
filename = urlparse(download_url).path
destination_path = os.path.join(KAGGLE_INPUT_PATH try:
with urlopen(download_url) as fileres, NamedT total_length = fileres.headers['content-l
print(f'Downloading {directory}, {total_l dl = 0
data = fileres.read(CHUNK_SIZE) while len(data) > 0:
dl += len(data) tfile.write(data)
done = int(50 * dl / int(total_length sys.stdout.write(f"\r[{'=' * done}{'
sys.stdout.flush()
data = fileres.read(CHUNK_SIZE) if filename.endswith('.zip'):
with ZipFile(tfile) as zfile:
zfile.extractall(destination_path) else:
with tarfile.open(tfile.name) as tarfil tarfile.extractall(destination_path)
print(f'\nDownloaded and uncompressed: {d except HTTPError as e:
print(f'Failed to load (likely expired) {down continue
except OSError as e:
print(f'Failed to load {download_url} to path continue

print('Data source import complete.')

Avinash Shukla (27)
# Importing necessary libraries import pandas as pd
import numpy as np
import matplotlib.pyplot as plt import seaborn as sns

# Load the dataset

data = pd.read_csv('/kaggle/input/boston-house-price-

#Displaying the first few rows of the dataframe print("First 5 rows of the dataset:")
print(data.head())
First 5 rows of the dataset:

crim zn indus chas nox rm age dis rad tax ptratio \

0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3
1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8
2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8
3 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7
4 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7

b lstat medv
0 396.90 4.98 24.0
1 396.90 9.14 21.6
2 392.83 4.03 34.7
3 394.63 2.94 33.4
4 396.90 5.33 36.2

# Checking for any missing values in the dataset print("\nMissing values in the dataset:")
print(data.isnull().sum())

Missing values in the dataset:

crim 0

zn 0

indus 0

chas 0

nox 0

rm 5

age 0

dis 0

rad 0

tax 0

ptratio 0

b 0

lstat 0

medv 0
dtype: int64

# Displaying the summary statistics of the dataset print("\nSummary Statistics:")

print(data.describe())

Summary Statistics:
crim zn indus chas nox rm \
count 506.00000 506.00000 506.00000 506.00000 506.00000 501.000000
0 0 0 0 0
mean 3.613524 11.363636 11.136779 0.069170 0.554695 6.284341
std 8.601545 23.322453 6.860353 0.253994 0.115878 0.705587
min 0.006320 0.000000 0.460000 0.000000 0.385000 3.561000
25% 0.082045 0.000000 5.190000 0.000000 0.449000 5.884000
50% 0.256510 0.000000 9.690000 0.000000 0.538000 6.208000
75% 3.677083 12.500000 18.100000 0.000000 0.624000 6.625000
max 88.976200 100.00000 27.740000 1.000000 0.871000 8.780000
0
Avinash Shukla (27)
age dis rad tax ptratio b \

count 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000

mean 68.574901 3.795043 9.549407 408.237154 18.455534 356.674032
std 28.148861 2.105710 8.707259 168.537116 2.164946 91.294864
min 2.900000 1.129600 1.000000 187.000000 12.600000 0.320000
25% 45.025000 2.100175 4.000000 279.000000 17.400000 375.377500
50% 77.500000 3.207450 5.000000 330.000000 19.050000 391.440000
75% 94.075000 5.188425 24.000000 666.000000 20.200000 396.225000
max 100.000000 12.126500 24.000000 711.000000 22.000000 396.900000

lstat med
count 506.00000 v
0 506.00000
0
mean 12.653063 22.532806
std 7.141062 9.197104
min 1.730000 5.000000
25% 6.950000 17.025000
50% 11.360000 21.200000
75% 16.955000 25.000000
max 37.970000 50.000000

# Visualizing missing data (optional, for better unde plt.figure(figsize=(10, 6))

sns.heatmap(data.isnull(), cbar=False, cmap="viridis" plt.title('Missing Values Heatmap')
plt.show()

C C

# Visualizing the distribution of a few important fea plt.figure(figsize=(14, 10))

plt.subplot(2, 2, 1)
sns.histplot(data['crim'], kde=True) plt.title('Crime Rate Distribution')

plt.subplot(2, 2, 2)
sns.histplot(data['rm'], kde=True)
plt.title('Average Number of Rooms Distribution')
Avinash Shukla (27)
plt.subplot(2, 2, 3)
sns.histplot(data['lstat'], kde=True)
plt.title('Lower Status Population (%) Distribution')

plt.subplot(2, 2, 4)
sns.histplot(data['medv'], kde=True)
plt.title('Median Home Value Distribution')

plt.tight_layout() plt.show()

Question No.02
Avinash Shukla (27)
# Calculate the correlation matrix correlation_matrix = data.corr()

# Displaying the correlation matrix print("Correlation Matrix:")

print(correlation_matrix)
Correlation Matrix:

crim zn indus chas nox rm age \

crim 1.000000 -0.200469 0.406583 -0.055892 0.420972 -0.219433 0.352734

zn -0.200469 1.000000 -0.533828 -0.042697 -0.516604 0.311173 -0.569537
indus 0.406583 -0.533828 1.000000 0.062938 0.763651 -0.394193 0.644779

chas -0.055892 -0.042697 0.062938 1.000000 0.091203 0.091468 0.086518

nox 0.420972 -0.516604 0.763651 0.091203 1.000000 -0.302751 0.731470

rm -0.219433 0.311173 -0.394193 0.091468 -0.302751 1.000000 -0.240286

age 0.352734 -0.569537 0.644779 0.086518 0.731470 -0.240286 1.000000

dis -0.379670 0.664408 -0.708027 -0.099176 -0.769230 0.203507 -0.747881

rad 0.625505 -0.311948 0.595129 -0.007368 0.611441 -0.210718 0.456022

tax 0.582764 -0.314563 0.720760 -0.035587 0.668023 -0.292794 0.506456

ptratio 0.289946 -0.391679 0.383248 -0.121515 0.188933 -0.357612 0.261515
b -0.385064 0.175520 -0.356977 0.048788 -0.380051 0.128107 -0.273534
lstat 0.455621 -0.412995 0.603800 -0.053929 0.590879 -0.615721 0.602339
medv -0.388305 0.360445 -0.483725 0.175260 -0.427321 0.696169 -0.376955

dis rad tax ptratio b lstat medv

crim -0.379670 0.625505 0.582764 0.289946 -0.385064 0.455621 -0.388305

zn 0.664408 -0.311948 -0.314563 -0.391679 0.175520 -0.412995 0.360445

indus -0.708027 0.595129 0.720760 0.383248 -0.356977 0.603800 -0.483725

chas -0.099176 -0.007368 -0.035587 -0.121515 0.048788 -0.053929 0.175260

nox -0.769230 0.611441 0.668023 0.188933 -0.380051 0.590879 -0.427321

rm 0.203507 -0.210718 -0.292794 -0.357612 0.128107 -0.615721 0.696169

age -0.747881 0.456022 0.506456 0.261515 -0.273534 0.602339 -0.376955

dis 1.000000 -0.494588 -0.534432 -0.232471 0.291512 -0.496996 0.249929

rad -0.494588 1.000000 0.910228 0.464741 -0.444413 0.488676 -0.381626

tax -0.534432 0.910228 1.000000 0.460853 -0.441808 0.543993 -0.468536

ptratio -0.232471 0.464741 0.460853 1.000000 -0.177383 0.374044 -0.507787

b 0.291512 -0.444413 -0.441808 -0.177383 1.000000 -0.366087 0.333461

lstat -0.496996 0.488676 0.543993 0.374044 -0.366087 1.000000 -0.737663

medv 0.249929 -0.381626 -0.468536 -0.507787 0.333461 -0.737663 1.000000

# Visualize the correlation matrix using a heatmap plt.figure(figsize=(12, 8))

sns.heatmap(correlation_matrix, annot=True, cmap='coo plt.title('Correlation Matrix Heatmap')
plt.show()
Avinash Shukla (27)

C C

# Identify the features with the highest positive and # Assume 'medv' is the target variable (median
home v target_variable = 'medv'
correlation_with_target = correlation_matrix[target_v

# Display the features with highest positive and nega print("\nFeatures with highest positive correlation
w print(correlation_with_target[correlation_with_target

print("\nFeatures with highest negative correlation w

print(correlation_with_target[correlation_with_target

Features with highest positive correlation with house prices:

medv 1.000000
rm 0.696169
zn 0.360445
b 0.333461
dis 0.249929
Name: medv, dtype: float64

Features with highest negative correlation with house prices:

age -0.376955
rad -0.381626
crim -0.388305
nox -0.427321
tax -0.468536
Name: medv, dtype: float64
Avinash Shukla (27)
Question no.03
# Import necessary libraries
from sklearn.model_selection import train_test_split from sklearn.preprocessing import
StandardScaler

# Select the features and the target variable

X = data.drop(columns=['medv']) # Features (all colu y = data['medv'] # Target variable (house
prices)

# Split the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X

# Standardize the feature variables using StandardSca scaler = StandardScaler()

# Fit the scaler on the training data and transform b X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Print shapes to verify the splits

print("Training data shape:", X_train_scaled.shape) print("Testing data shape:", X_test_scaled.shape)
Training data shape: (404, 13)

Testing data shape: (102, 13)

Question No.04

from sklearn.linear_model import LinearRegression

# Impute missing values using the mean for each colum data.fillna(data.mean(), inplace=True)

# Re-select the features and target variable after im

X = data.drop(columns=['medv']) # Features (all colu y = data['medv'] # Target variable (house
prices)

# Split the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X

# Standardize the feature variables using StandardSca scaler = StandardScaler()

Avinash Shukla (27)
# Fit the scaler on the training data and transform b X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the Linear Regression model model = LinearRegression()

model.fit(X_train_scaled, y_train)
▾ LinearRegression

C C

# Display the model's coefficients and intercept print("Model Coefficients:", model.coef_)

print("Model Intercept:", model.intercept_)
Model Coefficients: [-1.00208747 0.69855082 0.28733122 0.71955092 -2.02070833 3.13708935
-0.17081271 -3.06972351 2.25417948 -1.76697719 -2.04359481 1.12936985
-3.61451369]
Model Intercept: 22.796534653465343

Question No.05

# Import necessary libraries

from sklearn.metrics import mean_absolute_error, mean import numpy as np

# Predict the house prices using the testing data y_pred = model.predict(X_test_scaled)

# Calculate and display performance metrics # Mean Absolute Error (MAE)

mae = mean_absolute_error(y_test, y_pred) # Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred) # Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae}") print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
Mean Absolute Error (MAE): 3.2064039639003856 Mean Squared Error (MSE): 24.40482518814648
Root Mean Squared Error (RMSE): 4.940124005341008

# Plot the predicted vs actual house prices plt figure(figsize=(8 6))

Avinash Shukla (27)
plt.scatter(y_test, y_pred, color='blue', edgecolor=' plt.plot([min(y_test), max(y_test)], [min(y_test), ma
plt.title('Predicted vs Actual House Prices')
plt.xlabel('Actual House Prices')
plt.ylabel('Predicted House Prices') plt.grid(True)
plt.show()

Data Visualization
No ratings yet
Data Visualization
70 pages
Exp 1 A
No ratings yet
Exp 1 A
5 pages
Unit 1: Shobana T S Assistant Professor Dept. of ISE, BMSCE
No ratings yet
Unit 1: Shobana T S Assistant Professor Dept. of ISE, BMSCE
127 pages
Machine Learning (BCSL606) Lab Manual
No ratings yet
Machine Learning (BCSL606) Lab Manual
117 pages
ML 3
No ratings yet
ML 3
24 pages
ML Labmanual
No ratings yet
ML Labmanual
33 pages
Injecttive Blockchain
No ratings yet
Injecttive Blockchain
14 pages
Data Mining Practicals Complete
No ratings yet
Data Mining Practicals Complete
13 pages
ML Lab Manual
No ratings yet
ML Lab Manual
110 pages
DL (Pra 01)
No ratings yet
DL (Pra 01)
9 pages
CSA105-LinearRegression-HousePrice-Prediction - Ipynb - Colaboratory
No ratings yet
CSA105-LinearRegression-HousePrice-Prediction - Ipynb - Colaboratory
17 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
33 pages
A4 Dsbda Sana
No ratings yet
A4 Dsbda Sana
16 pages
Data Analytucs 1
No ratings yet
Data Analytucs 1
5 pages
ML - Datascience Manual
No ratings yet
ML - Datascience Manual
64 pages
Assignment 2
No ratings yet
Assignment 2
12 pages
The Boston Housing Dataset
100% (2)
The Boston Housing Dataset
4 pages
Data Science and Analtics Laboratory
No ratings yet
Data Science and Analtics Laboratory
21 pages
M PDF
No ratings yet
M PDF
13 pages
ML Lab Program 1& 2
No ratings yet
ML Lab Program 1& 2
6 pages
P04 The Regression Pipeline - Preprocessing Ans
No ratings yet
P04 The Regression Pipeline - Preprocessing Ans
19 pages
Xgboost
No ratings yet
Xgboost
12 pages
ML Lab Mannual1
No ratings yet
ML Lab Mannual1
37 pages
Machine Learning Laboratory
No ratings yet
Machine Learning Laboratory
23 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
42 pages
Comprehensive Data Exploration With Python
No ratings yet
Comprehensive Data Exploration With Python
20 pages
Data Visualization EDA-print
No ratings yet
Data Visualization EDA-print
18 pages
ML Final Prac
No ratings yet
ML Final Prac
47 pages
Analysis and Prediction of House Prices by Linear Regression Model
No ratings yet
Analysis and Prediction of House Prices by Linear Regression Model
91 pages
ModuleAr Merged
No ratings yet
ModuleAr Merged
42 pages
ML Manual
No ratings yet
ML Manual
30 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
26 pages
Machine Learning Lab Manaul BCSL606
No ratings yet
Machine Learning Lab Manaul BCSL606
27 pages
Lab Questionbank
No ratings yet
Lab Questionbank
3 pages
20MIS1025 - Regression - Ipynb - Colaboratory
No ratings yet
20MIS1025 - Regression - Ipynb - Colaboratory
5 pages
Exp - 2-EDA - CaliforniaData Set - HeatMap - PairPlot-checkpoint - Jupyter Notebook
No ratings yet
Exp - 2-EDA - CaliforniaData Set - HeatMap - PairPlot-checkpoint - Jupyter Notebook
12 pages
DALab Part-B BCU&BU
No ratings yet
DALab Part-B BCU&BU
12 pages
Exercise3 Solution
No ratings yet
Exercise3 Solution
19 pages
MLLab Manual
No ratings yet
MLLab Manual
24 pages
DA Manual - Part B
No ratings yet
DA Manual - Part B
13 pages
Week 1 Get Familier With Jupyter Notebook
No ratings yet
Week 1 Get Familier With Jupyter Notebook
4 pages
ML Observation
No ratings yet
ML Observation
29 pages
ML Lab - Exp1-10
No ratings yet
ML Lab - Exp1-10
4 pages
ML Merged
No ratings yet
ML Merged
28 pages
Ass 1 ML
No ratings yet
Ass 1 ML
21 pages
DSBDA Prac4 2
No ratings yet
DSBDA Prac4 2
1 page
Data Preprocessing 2
No ratings yet
Data Preprocessing 2
5 pages
2 Program
No ratings yet
2 Program
8 pages
Hint Sheet
No ratings yet
Hint Sheet
13 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
20 pages
Making Predictions
No ratings yet
Making Predictions
13 pages
Main - Py Text File
No ratings yet
Main - Py Text File
5 pages
West Rox
No ratings yet
West Rox
29 pages
Linear Regression Analysis - Polynomial Regression
No ratings yet
Linear Regression Analysis - Polynomial Regression
25 pages
ML Short Code - Under Updating
No ratings yet
ML Short Code - Under Updating
4 pages
Real Estate
No ratings yet
Real Estate
10 pages
Exp 12 and 15
No ratings yet
Exp 12 and 15
4 pages
Forecasting Questions With Answers
No ratings yet
Forecasting Questions With Answers
3 pages
Advanced Econometrics
No ratings yet
Advanced Econometrics
513 pages
5.1) Binary Logistic Regression
No ratings yet
5.1) Binary Logistic Regression
32 pages
Kinetic Theory of Gases
No ratings yet
Kinetic Theory of Gases
46 pages
(Ebook PDF) Introductory Econometrics: A Modern Approach 7th Edition by Jeffrey Download
100% (2)
(Ebook PDF) Introductory Econometrics: A Modern Approach 7th Edition by Jeffrey Download
57 pages
Chapter 3 Multiple Regression
No ratings yet
Chapter 3 Multiple Regression
49 pages
Chapter 3 Multiple Regression Analysis Estimation
No ratings yet
Chapter 3 Multiple Regression Analysis Estimation
9 pages
Newbold-Presentación Regresión Cap 11
No ratings yet
Newbold-Presentación Regresión Cap 11
43 pages
Sample MCQs
No ratings yet
Sample MCQs
67 pages
Rohini 73149042113
No ratings yet
Rohini 73149042113
11 pages
PROCESS Documentation Addendum
No ratings yet
PROCESS Documentation Addendum
26 pages
UnivariateRegression 2
No ratings yet
UnivariateRegression 2
72 pages
Intro Regression Modeling
No ratings yet
Intro Regression Modeling
11 pages
Introduction To Simple Linear Regression: - K.Tejashree (23H51A66F8)
No ratings yet
Introduction To Simple Linear Regression: - K.Tejashree (23H51A66F8)
10 pages
Doucet, de Freitas, Gordon - An Introduction To Sequential Monte Carlo Methods
No ratings yet
Doucet, de Freitas, Gordon - An Introduction To Sequential Monte Carlo Methods
12 pages
What Do Case-Control Studies Estimate? Survey of Methods and Assumptions in Published Case-Control Research
No ratings yet
What Do Case-Control Studies Estimate? Survey of Methods and Assumptions in Published Case-Control Research
9 pages
ReCell Project PDF
No ratings yet
ReCell Project PDF
21 pages
Employee Turnover Problem Statement
No ratings yet
Employee Turnover Problem Statement
5 pages
Linear Regression Skills Quiz
No ratings yet
Linear Regression Skills Quiz
13 pages
Genomic Quantitative Genetics To Study Evolution in The Wild
No ratings yet
Genomic Quantitative Genetics To Study Evolution in The Wild
12 pages
Q4-M1 Stat
No ratings yet
Q4-M1 Stat
18 pages
2 Multiple Linear Regression I
No ratings yet
2 Multiple Linear Regression I
9 pages
Midterm Exam Spring 2023 - Answers
No ratings yet
Midterm Exam Spring 2023 - Answers
6 pages
Panel Data Analysis
No ratings yet
Panel Data Analysis
9 pages
ECD202 Lec04 2023
No ratings yet
ECD202 Lec04 2023
9 pages
Exercises
No ratings yet
Exercises
2 pages
Quiz 2 Formula Sheet
No ratings yet
Quiz 2 Formula Sheet
2 pages
Forecasting - Muhammad Idzhar Faisa - 120310200084
No ratings yet
Forecasting - Muhammad Idzhar Faisa - 120310200084
10 pages
Contoh Analisa Kemajuan Peserta
No ratings yet
Contoh Analisa Kemajuan Peserta
3 pages
Study Guide Gases Student
No ratings yet
Study Guide Gases Student
6 pages
The Fibonacci Number Series
From Everand
The Fibonacci Number Series
Michael Husted
5/5 (1)
A List of Factorial Math Constants
From Everand
A List of Factorial Math Constants
Archive Classics
No ratings yet
Masters Among Us: An Exploration of Supernal Encounters and Miraculous Phenomena
From Everand
Masters Among Us: An Exploration of Supernal Encounters and Miraculous Phenomena
Thomas Curley
No ratings yet

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Boston House Prediction - Colab1

Uploaded by

Boston House Prediction - Colab1

Uploaded by

Avinash Shukla (27)

import os import sys

!umount /kaggle/input/ 2> /dev/null

print('Data source import complete.')

# Load the dataset

crim zn indus chas nox rm age dis rad tax ptratio \

Missing values in the dataset:

# Displaying the summary statistics of the dataset print("\nSummary Statistics:")

count 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000

# Visualizing missing data (optional, for better unde plt.figure(figsize=(10, 6))

# Visualizing the distribution of a few important fea plt.figure(figsize=(14, 10))

# Displaying the correlation matrix print("Correlation Matrix:")

crim zn indus chas nox rm age \

crim 1.000000 -0.200469 0.406583 -0.055892 0.420972 -0.219433 0.352734

chas -0.055892 -0.042697 0.062938 1.000000 0.091203 0.091468 0.086518

nox 0.420972 -0.516604 0.763651 0.091203 1.000000 -0.302751 0.731470

rm -0.219433 0.311173 -0.394193 0.091468 -0.302751 1.000000 -0.240286

age 0.352734 -0.569537 0.644779 0.086518 0.731470 -0.240286 1.000000

dis -0.379670 0.664408 -0.708027 -0.099176 -0.769230 0.203507 -0.747881

rad 0.625505 -0.311948 0.595129 -0.007368 0.611441 -0.210718 0.456022

tax 0.582764 -0.314563 0.720760 -0.035587 0.668023 -0.292794 0.506456

dis rad tax ptratio b lstat medv

zn 0.664408 -0.311948 -0.314563 -0.391679 0.175520 -0.412995 0.360445

indus -0.708027 0.595129 0.720760 0.383248 -0.356977 0.603800 -0.483725

chas -0.099176 -0.007368 -0.035587 -0.121515 0.048788 -0.053929 0.175260

nox -0.769230 0.611441 0.668023 0.188933 -0.380051 0.590879 -0.427321

rm 0.203507 -0.210718 -0.292794 -0.357612 0.128107 -0.615721 0.696169

age -0.747881 0.456022 0.506456 0.261515 -0.273534 0.602339 -0.376955

dis 1.000000 -0.494588 -0.534432 -0.232471 0.291512 -0.496996 0.249929

tax -0.534432 0.910228 1.000000 0.460853 -0.441808 0.543993 -0.468536

ptratio -0.232471 0.464741 0.460853 1.000000 -0.177383 0.374044 -0.507787

lstat -0.496996 0.488676 0.543993 0.374044 -0.366087 1.000000 -0.737663

medv 0.249929 -0.381626 -0.468536 -0.507787 0.333461 -0.737663 1.000000

# Visualize the correlation matrix using a heatmap plt.figure(figsize=(12, 8))

print("\nFeatures with highest negative correlation w

Features with highest positive correlation with house prices:

Features with highest negative correlation with house prices:

# Select the features and the target variable

# Split the dataset into training and testing sets

# Standardize the feature variables using StandardSca scaler = StandardScaler()

# Print shapes to verify the splits

Testing data shape: (102, 13)

from sklearn.linear_model import LinearRegression

# Re-select the features and target variable after im

# Split the dataset into training and testing sets

# Standardize the feature variables using StandardSca scaler = StandardScaler()

# Train the Linear Regression model model = LinearRegression()

# Display the model's coefficients and intercept print("Model Coefficients:", model.coef_)

# Import necessary libraries

# Calculate and display performance metrics # Mean Absolute Error (MAE)

# Plot the predicted vs actual house prices plt figure(figsize=(8 6))

You might also like

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.