0% found this document useful (0 votes)

15 views11 pages

Sentimental

Uploaded by

trishhh3174

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

15 views11 pages

Sentimental

Uploaded by

trishhh3174

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 11

#Installation

!pip install wordcloud

!pip install matplotlib
!pip install panda
!pip install xgboost

#importing Libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.stem.porter import PorterStemmer

nltk.download('stopwords')
from nltk.corpus import stopwords

STOPWORDS = set(stopwords.words('english'))

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from wordcloud import WordCloud
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
import pickle
import re

#Load the data I

data = pd.read_csv(r"C:\Users\ASUS\Downloads\dataSetNew.tsv", delimiter = '\t',

quoting = 3)

print(f"Dataset shape {data.shape}")

#coloumn names

print(f"Features name: {data.columns.values}")

data.isnull().sum()

#finding null record

data[data['verified_reviews'].isna() == True]
data.dropna(inplace=True)

print(f" Dataset shape after dropping null values : {data.shape}")

data['length'] = data['verified_reviews'].apply(len)

data.head()

print(f"'verified_reviews' column value: {data.iloc[10]['verified_reviews']}")

#original value
print(f" Length of the review : {len(data.iloc[10]['verified_reviews'])}") #length
of the variable using len()
print(f"'length' column value: {data.iloc[10]['length']}") #value of the column
length

data.dtypes

len(data)

print(f"Rating value count: \n{data['rating'].value_counts()}")

data.dtypes

#Rate Distribuiom Count

data['rating'].value_counts().plot.bar( color = 'red' )
plt.title('Rating distribution count')
plt.xlabel('Ratngs')
plt.ylabel('Count')
plt.show()

print(f" PercentageDistribution: \n
{round(data['rating'].value_counts()/data.shape[0]*100,2)}")

fig = plt.figure(figsize=(7,7))

colors = ('red', 'green', 'blue','orange','yellow')

wp = {'linewidth':1, "edgecolor":'black'}

tags = data['rating'].value_counts()/data.shape[0]

explode=(0.1,0.1,0.1,0.1,0.1)

tags.plot(kind='pie', autopct="%1.1f%%", shadow=True, colors=colors, startangle=90,

wedgeprops=wp, explode=explode, label='Percentage wise distrubution of rating')

from io import BytesIO

graph = BytesIO()

fig.savefig(graph, format="png")

print(f"Feedback value count: \n{data['feedback'].value_counts()}")

review_0 = data[data['feedback'] == 0].iloc[1]['verified_reviews']

print(review_0)

review_1 = data[data['feedback'] == 1].iloc[1]['verified_reviews']

print(review_1)

data['feedback'].value_counts().plot.bar(color = 'blue')
plt.title('Feedback distribution count')
plt.xlabel('Feedback')
plt.ylabel('Count')
plt.show()

print(f"Feedback value count - percentage distribution: \

n{round(data['feedback'].value_counts()/data.shape[0]*100,2)}")

fig = plt.figure(figsize=(7,7))
colors = ('red', 'green')
wp = {'linewidth':1, "edgecolor":'black'}
tags = data['feedback'].value_counts()/data.shape[0]
explode=(0.1,0.1)
tags.plot(kind='pie', autopct="%1.1f%%", shadow=True, colors=colors, startangle=90,
wedgeprops=wp, explode=explode, label='Percentage wise distrubution of feedback')

data[data['feedback'] == 0]['rating'].value_counts()

data[data['feedback'] == 1]['rating'].value_counts()

print(f"Variation value count: \n{data['variation'].value_counts()}")

data['variation'].value_counts().plot.bar(color = 'orange')
plt.title('Variation distribution count')
plt.xlabel('Variation')
plt.ylabel('Count')
plt.show()

print(f"Variation value count - percentage distribution: \

n{round(data['variation'].value_counts()/data.shape[0]*100,2)}")

data.groupby('variation')['rating'].mean()

data.groupby('variation')['rating'].mean().sort_values().plot.bar(color = 'brown',
figsize=(11, 6))
plt.title("Mean rating according to variation")
plt.xlabel('Variation')
plt.ylabel('Mean rating')
plt.show()
data['length'].describe()

sns.histplot(data['length'],color='blue').set(title='Distribution of length of
review ')

sns.histplot(data[data['feedback']==0]
['length'],color='red').set(title='Distribution of length of review if feedback =
0')

sns.histplot(data[data['feedback']==1]
['length'],color='green').set(title='Distribution of length of review if feedback =
1')

data.groupby('length')['rating'].mean().plot.hist(color = 'blue', figsize=(7, 6),

bins = 20)
plt.title(" Review length wise mean ratings")
plt.xlabel('ratings')
plt.ylabel('length')
plt.show()

#Preprocessing and Modelling

corpus = []
stemmer = PorterStemmer()
for i in range(0, data.shape[0]):
review = re.sub('[^a-zA-Z]', ' ', data.iloc[i]['verified_reviews'])
review = review.lower().split()
review = [stemmer.stem(word) for word in review if not word in STOPWORDS]
review = ' '.join(review)
corpus.append(review)

cv = CountVectorizer(max_features = 2500)

#Storing independent and dependent variables in X and y

X = cv.fit_transform(corpus).toarray()
y = data['feedback'].values

import os
import pickle

os.makedirs('Models', exist_ok=True)

pickle.dump(cv, open('Models/countVectorizer.pkl', 'wb'))

print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3,

random_state = 15)

print(f"X train: {X_train.shape}")

print(f"y train: {y_train.shape}")
print(f"X test: {X_test.shape}")
print(f"y test: {y_test.shape}")

#keeping values between 0 and 1 by scaling

scaler = MinMaxScaler()

X_train_scl = scaler.fit_transform(X_train)
X_test_scl = scaler.transform(X_test)

pickle.dump(scaler, open('Models/scaler.pkl', 'wb'))

#Random Forest

model_rf = RandomForestClassifier()
model_rf.fit(X_train_scl, y_train)

#Accuracy of the model on training and testing data

print("Training Accuracy :", model_rf.score(X_train_scl, y_train))

print("Testing Accuracy :", model_rf.score(X_test_scl, y_test))

#Predicting on the test set

y_preds = model_rf.predict(X_test_scl)

#Confusion Matrix
cm = confusion_matrix(y_test, y_preds)

cm_display =
ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=model_rf.classes_)
cm_display.plot()
plt.show()

params = {
'bootstrap': [True],
'max_depth': [80, 100],
'min_samples_split': [8, 12],
'n_estimators': [100, 300]
}

cv_object = StratifiedKFold(n_splits = 2)

grid_search = GridSearchCV(estimator = model_rf, param_grid = params, cv =

cv_object, verbose = 0, return_train_score = True)
grid_search.fit(X_train_scl, y_train.ravel())

accuracies = cross_val_score(estimator = model_rf, X = X_train_scl, y = y_train, cv

= 10)

print("Accuracy :", accuracies.mean())

print("Standard Variance :", accuracies.std())

#Getting the best parameters from the grid search

print("Best Parameter Combination : {}".format(grid_search.best_params_))

print("Cross validation mean accuracy on train set :

{}".format(grid_search.cv_results_['mean_train_score'].mean()*100))
print("Cross validation mean accuracy on test set :
{}".format(grid_search.cv_results_['mean_test_score'].mean()*100))
print("Accuracy score for test set :", accuracy_score(y_test, y_preds))

#XgBoost

model_xgb = XGBClassifier()
model_xgb.fit(X_train_scl, y_train)

#Accuracy of the model on training and testing data

print("Training Accuracy :", model_xgb.score(X_train_scl, y_train))

print("Testing Accuracy :", model_xgb.score(X_test_scl, y_test))

y_preds = model_xgb.predict(X_test)

#Confusion Matrix
cm = confusion_matrix(y_test, y_preds)
print(cm)

cm_display =
ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=model_xgb.classes_)
cm_display.plot()
plt.show()
#Saving the XGBoost classifier
pickle.dump(model_xgb, open('Models/model_xgb.pkl', 'wb'))

#SVM Model (Self made)

from sklearn.svm import SVC

# Fitting scaled X_train and y_train on SVM classifier

model_svm = SVC(kernel='linear', C=1, random_state=42)
model_svm.fit(X_train_scl, y_train)

# Accuracy of the model on training and testing data

print("Training Accuracy :", model_svm.score(X_train_scl, y_train))
print("Testing Accuracy :", model_svm.score(X_test_scl, y_test))

#Confusion Matrix
cm = confusion_matrix(y_test, y_preds)
print(cm)

cm_display =
ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=model_svm.classes_)
cm_display.plot()
plt.show()

#Decision Tree

model_dt = DecisionTreeClassifier()
model_dt.fit(X_train_scl, y_train)

#Accuracy of the model on training and testing data

print("Training Accuracy :", model_dt.score(X_train_scl, y_train))

print("Testing Accuracy :", model_dt.score(X_test_scl, y_test))

y_preds = model_dt.predict(X_test)

#Confusion Matrix
cm = confusion_matrix(y_test, y_preds)
print(cm)

cm_display =
ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=model_dt.classes_)
cm_display.plot()
plt.show()

#SVM

from sklearn.svm import SVC

# Fitting scaled X_train and y_train on SVM classifier

model_svm = SVC()
model_svm.fit(X_train_scl, y_train)

#Accuracy of the model on training and testing data

print("Training Accuracy :", model_svm.score(X_train_scl, y_train))
print("Testing Accuracy :", model_svm.score(X_test_scl, y_test))

y_preds = model_svm.predict(X_test_scl)

# Confusion Matrix
cm = confusion_matrix(y_test, y_preds)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm,
display_labels=model_svm.classes_)
cm_display.plot()
plt.show()

#Kneighbors

from sklearn.neighbors import KNeighborsClassifier

# Fitting scaled X_train and y_train on KNN classifier

model_knn = KNeighborsClassifier()
model_knn.fit(X_train_scl, y_train)

# Accuracy of the model on training and testing data

print("Training Accuracy :", model_knn.score(X_train_scl, y_train))
print("Testing Accuracy :", model_knn.score(X_test_scl, y_test))

#Naive Bayes Model

from sklearn.naive_bayes import GaussianNB

# Fitting scaled X_train and y_train on Naive Bayes classifier

model_nb = GaussianNB()
model_nb.fit(X_train_scl, y_train)
print("Training Accuracy :", model_nb.score(X_train_scl, y_train))
print("Testing Accuracy :", model_nb.score(X_test_scl, y_test))

#Logistic Regression

from sklearn.linear_model import LogisticRegression

# Fitting scaled X_train and y_train on Logistic Regression classifier

model_lr = LogisticRegression()
model_lr.fit(X_train_scl, y_train)

# Accuracy of the model on training and testing data

print("Training Accuracy :", model_lr.score(X_train_scl, y_train))
print("Testing Accuracy :", model_lr.score(X_test_scl, y_test))

#AdaBoost

from sklearn.ensemble import AdaBoostClassifier

# Fitting scaled X_train and y_train on AdaBoost classifier

model_adaboost = AdaBoostClassifier()
model_adaboost.fit(X_train_scl, y_train)

# Accuracy of the model on training and testing data

print("Training Accuracy :", model_adaboost.score(X_train_scl, y_train))
print("Testing Accuracy :", model_adaboost.score(X_test_scl, y_test))

# Hybrid (Combining RandomForest and Xgboost)------DONOT TRY (Construction is going

on)

from sklearn.ensemble import RandomForestClassifier, StackingClassifier

# Loading the preprocessed data and models

X_train_scl = pickle.load(open('Models/X_train_scl.pkl', 'rb'))
X_test_scl = pickle.load(open('Models/X_test_scl.pkl', 'rb'))
y_train = pickle.load(open('Models/y_train.pkl', 'rb'))
y_test = pickle.load(open('Models/y_test.pkl', 'rb'))
# Loading the trained models
model_rf = pickle.load(open('Models/model_rf.pkl', 'rb'))
model_xgb = pickle.load(open('Models/model_xgb.pkl', 'rb'))

# Creating a stacking classifier

estimators = [
('rf', model_rf),
('xgb', model_xgb)
]

stacking_clf = StackingClassifier(
estimators=estimators,
final_estimator=DecisionTreeClassifier()
)

# Training the stacking classifier

stacking_clf.fit(X_train_scl, y_train)

# Evaluating the stacking classifier

print("Training Accuracy :", stacking_clf.score(X_train_scl, y_train))
print("Testing Accuracy :", stacking_clf.score(X_test_scl, y_test))

# Predicting on the test set

y_preds = stacking_clf.predict(X_test_scl)

# Confusion Matrix
cm = confusion_matrix(y_test, y_preds)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm,
display_labels=stacking_clf.classes_)
cm_display.plot()
plt.show()

# Saving the stacking classifier

pickle.dump(stacking_clf, open('Models/stacking_clf.pkl', 'wb'))

AML Lab
No ratings yet
AML Lab
14 pages
Practicalpgm ML
No ratings yet
Practicalpgm ML
33 pages
PR
No ratings yet
PR
17 pages
Model Evaluation and Selection Cheatsheet 1708023215
No ratings yet
Model Evaluation and Selection Cheatsheet 1708023215
7 pages
22MCA1008 - Varun ML LAB ASSIGNMENTS
100% (1)
22MCA1008 - Varun ML LAB ASSIGNMENTS
41 pages
Code ExerciseModelSelection
100% (1)
Code ExerciseModelSelection
19 pages
F.M.L. Thompson - The Cambridge Social History of Britain, 1750-1950, Vol. 01. Regions and Communities
No ratings yet
F.M.L. Thompson - The Cambridge Social History of Britain, 1750-1950, Vol. 01. Regions and Communities
592 pages
ML Manual
No ratings yet
ML Manual
18 pages
ML5&6&7&8&9&10
No ratings yet
ML5&6&7&8&9&10
35 pages
Logcat
No ratings yet
Logcat
4,525 pages
Program
No ratings yet
Program
10 pages
Data Mining Practicals
No ratings yet
Data Mining Practicals
22 pages
Car Mock - ML Ans
No ratings yet
Car Mock - ML Ans
6 pages
ML Functions
No ratings yet
ML Functions
12 pages
ITERATORS
No ratings yet
ITERATORS
8 pages
ML W8 Merged
No ratings yet
ML W8 Merged
27 pages
S 10
No ratings yet
S 10
11 pages
SVM K NN MLP With Sklearn Jupyter NoteBo
No ratings yet
SVM K NN MLP With Sklearn Jupyter NoteBo
22 pages
IS 4308 Product Manual
No ratings yet
IS 4308 Product Manual
7 pages
Da Programs
No ratings yet
Da Programs
10 pages
Angular
No ratings yet
Angular
330 pages
Python For Data Science Cheat Sheet: Scikit-Learn Create Your Model Evaluate Your Model's Performance
100% (1)
Python For Data Science Cheat Sheet: Scikit-Learn Create Your Model Evaluate Your Model's Performance
1 page
PYHTONPRACT
No ratings yet
PYHTONPRACT
4 pages
Untitled 57
No ratings yet
Untitled 57
4 pages
ML Fat
No ratings yet
ML Fat
9 pages
Prathamesh KRAI
No ratings yet
Prathamesh KRAI
38 pages
Ann Experiential Learning
No ratings yet
Ann Experiential Learning
43 pages
ML PDF
No ratings yet
ML PDF
30 pages
Data Science Practical
No ratings yet
Data Science Practical
22 pages
ML 7
No ratings yet
ML 7
6 pages
BTVN5 Code
No ratings yet
BTVN5 Code
2 pages
Mercedes-Benz Greener Manufacturing Ai
0% (1)
Mercedes-Benz Greener Manufacturing Ai
16 pages
Detect Fake Profiles in Online Social Networks Using Support Vector Machine
No ratings yet
Detect Fake Profiles in Online Social Networks Using Support Vector Machine
8 pages
Multimedia SYsytem Unit 1
No ratings yet
Multimedia SYsytem Unit 1
20 pages
1
No ratings yet
1
13 pages
RCD3601
No ratings yet
RCD3601
20 pages
ML Complete Notes Hridoy
No ratings yet
ML Complete Notes Hridoy
5 pages
DM ML Practical
No ratings yet
DM ML Practical
13 pages
Reference Guide - Validation & Cross-Validation
No ratings yet
Reference Guide - Validation & Cross-Validation
7 pages
Ai Int-1
No ratings yet
Ai Int-1
6 pages
ML NEW Final Format
No ratings yet
ML NEW Final Format
37 pages
Aiml 5-8
No ratings yet
Aiml 5-8
19 pages
ML Lab Programs 2
No ratings yet
ML Lab Programs 2
16 pages
Supple Maximizing Performance in Cs CuBiCl
No ratings yet
Supple Maximizing Performance in Cs CuBiCl
5 pages
Waves Interference Remote Lab1
25% (4)
Waves Interference Remote Lab1
3 pages
Prakhar - Week 5
No ratings yet
Prakhar - Week 5
8 pages
AML Code For m2
No ratings yet
AML Code For m2
7 pages
AI ML - Cycle 2 Programs
No ratings yet
AI ML - Cycle 2 Programs
15 pages
Sma Exp 10 Code Print
No ratings yet
Sma Exp 10 Code Print
7 pages
Implementing Custom Randomsearchcv: 'Red' 'Blue'
No ratings yet
Implementing Custom Randomsearchcv: 'Red' 'Blue'
1 page
ML Lab Prgms Split
No ratings yet
ML Lab Prgms Split
3 pages
16BCB0126 VL2018195002535 Pe003
No ratings yet
16BCB0126 VL2018195002535 Pe003
40 pages
Slip
No ratings yet
Slip
5 pages
Experiment 1
No ratings yet
Experiment 1
19 pages
INFO1113 Assignment 2023 S2
No ratings yet
INFO1113 Assignment 2023 S2
11 pages
Setup: This Notebook Contains All The Sample Code and Solutions To The Exercises in Chapter 3
No ratings yet
Setup: This Notebook Contains All The Sample Code and Solutions To The Exercises in Chapter 3
30 pages
DB en Trio Ups 2g 1ac 1ac 120v 750va 107057 en 01
No ratings yet
DB en Trio Ups 2g 1ac 1ac 120v 750va 107057 en 01
24 pages
The Marketig Plan of Cocoon Viet Nam
No ratings yet
The Marketig Plan of Cocoon Viet Nam
36 pages
Data Preprocessing
No ratings yet
Data Preprocessing
9 pages
5) Randomforest - Ipynb - Colaboratory
No ratings yet
5) Randomforest - Ipynb - Colaboratory
12 pages
To Improve The Performance of Models Predicting Ba
No ratings yet
To Improve The Performance of Models Predicting Ba
6 pages
LG HG6 Datasheet
No ratings yet
LG HG6 Datasheet
9 pages
Hyperparameter Tuning
No ratings yet
Hyperparameter Tuning
7 pages
Python CA 4
No ratings yet
Python CA 4
9 pages
Hyper Parameter Tuning
No ratings yet
Hyper Parameter Tuning
4 pages
Python Essential Methods in Machine Learning
No ratings yet
Python Essential Methods in Machine Learning
6 pages
Aiml Ex 4-7
No ratings yet
Aiml Ex 4-7
8 pages
Sony kv-27fs13 27fs17 27fv17 29fv17-c 32fs13 32fs17 34fs13c 34fs17 CH Ba-5
No ratings yet
Sony kv-27fs13 27fs17 27fv17 29fv17-c 32fs13 32fs17 34fs13c 34fs17 CH Ba-5
299 pages
Unit2 ML Programs
No ratings yet
Unit2 ML Programs
7 pages
AIML Prograns
No ratings yet
AIML Prograns
6 pages
Linearregression SVM
No ratings yet
Linearregression SVM
3 pages
PEC1-Format Prak Corporate 2024 (Autosaved)
No ratings yet
PEC1-Format Prak Corporate 2024 (Autosaved)
25 pages
Table Morgan Sample Thesis
86% (7)
Table Morgan Sample Thesis
1 page
ITP AND Reports Only Approved
No ratings yet
ITP AND Reports Only Approved
18 pages
Schneider Electric - ComPacT-NSX-new-generation - LV432642
No ratings yet
Schneider Electric - ComPacT-NSX-new-generation - LV432642
3 pages
HBRI Brochure
0% (1)
HBRI Brochure
8 pages
Psychology Research Proposal Template
No ratings yet
Psychology Research Proposal Template
8 pages
BS en 50164-6-2009
No ratings yet
BS en 50164-6-2009
18 pages
Case Study
No ratings yet
Case Study
2 pages
Lesson Plan in Science 6
100% (1)
Lesson Plan in Science 6
6 pages
Sensors: Implementation of Parameter Observer For Capacitors
No ratings yet
Sensors: Implementation of Parameter Observer For Capacitors
19 pages
Bageshwori Civil Consult Pvt. LTD: Kathmandu, Nepal
No ratings yet
Bageshwori Civil Consult Pvt. LTD: Kathmandu, Nepal
7 pages
Final Trial Exam - 2021: Text One
No ratings yet
Final Trial Exam - 2021: Text One
7 pages
Printpix CX 400
No ratings yet
Printpix CX 400
53 pages
Data Structure - AVL Tree
No ratings yet
Data Structure - AVL Tree
6 pages
Development Length Tables
No ratings yet
Development Length Tables
1 page
Basfiber For Construction Market (US Customary Units) .
No ratings yet
Basfiber For Construction Market (US Customary Units) .
4 pages
Bok Seng Logistics Pte LTD: Chains Working Load Limits 6.00 T
No ratings yet
Bok Seng Logistics Pte LTD: Chains Working Load Limits 6.00 T
1 page
C# Tutorial - SoloLearn - Learn To Code For FREE!
No ratings yet
C# Tutorial - SoloLearn - Learn To Code For FREE!
1 page

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Sentimental

Uploaded by

Sentimental

Uploaded by

#Installation

!pip install wordcloud

from sklearn.model_selection import train_test_split

#Load the data I

data = pd.read_csv(r"C:\Users\ASUS\Downloads\dataSetNew.tsv", delimiter = '\t',

print(f"Dataset shape {data.shape}")

print(f"Features name: {data.columns.values}")

#finding null record

print(f" Dataset shape after dropping null values : {data.shape}")

print(f"'verified_reviews' column value: {data.iloc[10]['verified_reviews']}")

print(f"Rating value count: \n{data['rating'].value_counts()}")

#Rate Distribuiom Count

colors = ('red', 'green', 'blue','orange','yellow')

tags.plot(kind='pie', autopct="%1.1f%%", shadow=True, colors=colors, startangle=90,

from io import BytesIO

print(f"Feedback value count: \n{data['feedback'].value_counts()}")

review_0 = data[data['feedback'] == 0].iloc[1]['verified_reviews']

review_1 = data[data['feedback'] == 1].iloc[1]['verified_reviews']

print(f"Feedback value count - percentage distribution: \

print(f"Variation value count: \n{data['variation'].value_counts()}")

print(f"Variation value count - percentage distribution: \

data.groupby('length')['rating'].mean().plot.hist(color = 'blue', figsize=(7, 6),

#Preprocessing and Modelling

#Storing independent and dependent variables in X and y

pickle.dump(cv, open('Models/countVectorizer.pkl', 'wb'))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3,

print(f"X train: {X_train.shape}")

#keeping values between 0 and 1 by scaling

pickle.dump(scaler, open('Models/scaler.pkl', 'wb'))

#Accuracy of the model on training and testing data

print("Training Accuracy :", model_rf.score(X_train_scl, y_train))

#Predicting on the test set

grid_search = GridSearchCV(estimator = model_rf, param_grid = params, cv =

accuracies = cross_val_score(estimator = model_rf, X = X_train_scl, y = y_train, cv

print("Accuracy :", accuracies.mean())

#Getting the best parameters from the grid search

print("Best Parameter Combination : {}".format(grid_search.best_params_))

print("Cross validation mean accuracy on train set :

#Accuracy of the model on training and testing data

print("Training Accuracy :", model_xgb.score(X_train_scl, y_train))

#SVM Model (Self made)

from sklearn.svm import SVC

# Fitting scaled X_train and y_train on SVM classifier

# Accuracy of the model on training and testing data

#Accuracy of the model on training and testing data

print("Training Accuracy :", model_dt.score(X_train_scl, y_train))

from sklearn.svm import SVC

# Fitting scaled X_train and y_train on SVM classifier

#Accuracy of the model on training and testing data

from sklearn.neighbors import KNeighborsClassifier

# Fitting scaled X_train and y_train on KNN classifier

# Accuracy of the model on training and testing data

#Naive Bayes Model

from sklearn.naive_bayes import GaussianNB

# Fitting scaled X_train and y_train on Naive Bayes classifier

from sklearn.linear_model import LogisticRegression

# Fitting scaled X_train and y_train on Logistic Regression classifier

# Accuracy of the model on training and testing data

from sklearn.ensemble import AdaBoostClassifier

# Fitting scaled X_train and y_train on AdaBoost classifier

# Accuracy of the model on training and testing data

# Hybrid (Combining RandomForest and Xgboost)------DONOT TRY (Construction is going

from sklearn.ensemble import RandomForestClassifier, StackingClassifier

# Loading the preprocessed data and models

# Creating a stacking classifier

# Training the stacking classifier

# Evaluating the stacking classifier

# Predicting on the test set

# Saving the stacking classifier

You might also like

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.