0% found this document useful (0 votes)

8 views

Coding Notes Data Science

All coding related to data science

Uploaded by

Saman

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

8 views

Coding Notes Data Science

All coding related to data science

Uploaded by

Saman

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 4

Coding Notes Data Science

pd.read_csv
import pandas as pd

# reading csv file

df = pd.read_csv("people.csv")
df
pd.read_excel
pip install pandas
pip install xlrd
import pandas as pd
df = pd.read_excel("sample.xlsx")
print(df)
pd.read_sql
# import the modules
import pandas as pd
from sqlalchemy import create_engine

# SQLAlchemy connectable
cnx = create_engine('sqlite:///contacts.db ').connect()

# table named 'contacts' will be returned as a dataframe.

df = pd.read_sql_table('contacts', cnx)
print(df)
pd.read_table
# importing pandas
import pandas as pd

pd.read_table('people.csv', delimiter=',')
Clean a real world messy dataset (eg: Kaggle)
# modules we'll use
import pandas as pd
import numpy as np

# read in all our data

nfl_data = pd.read_csv("../input/nflplaybyplay2009to2016/NFL Play by Play 2009-2017 (v4).csv")

# set seed for reproducibility

np.random.seed(0)
# look at the first five rows of the nfl_data file.
# I can see a handful of missing data already!
nfl_data.head()
# get the number of missing data points per column
missing_values_count = nfl_data.isnull().sum()

# look at the # of missing points in the first ten columns

missing_values_count[0:10]
# how many total missing values do we have?
total_cells = np.product(nfl_data.shape)
total_missing = missing_values_count.sum()

# percent of data that is missing

percent_missing = (total_missing/total_cells) * 100
print(percent_missing)
# look at the # of missing points in the first ten columns
missing_values_count[0:10]
# remove all the rows that contain a missing value
nfl_data.dropna()
# remove all columns with at least one missing value
columns_with_na_dropped = nfl_data.dropna(axis=1)
columns_with_na_dropped.head()
# just how much data did we lose?
print("Columns in original dataset: %d \n" % nfl_data.shape[1])
print("Columns with na's dropped: %d" % columns_with_na_dropped.shape[1])
# get a small subset of the NFL dataset
subset_nfl_data = nfl_data.loc[:, 'EPA':'Season'].head()
subset_nfl_data
# replace all NA's with 0
subset_nfl_data.fillna(0)
# replace all NA's the value that comes directly after it in the same column,
# then replace all the remaining na's with 0
subset_nfl_data.fillna(method='bfill', axis=0).fillna(0)
Apply EDA on a student performance dataset
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in

import numpy as np # linear algebra

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_csv('/kaggle/input/students-performance-in-exams/
StudentsPerformance.csv')
df.info()
df.describe()
df.shape
df.isnull().sum() #checks if there are any missing values
Lets start with plotting graphs
plt.rcParams['figure.figsize'] = (20, 10)
sns.countplot(df['math score'], palette = 'dark')
plt.title('Math Score',fontsize = 20)
plt.show()
To analyse the data in more deeper way, lets few new columns: Total marks, Percentage and Grades.

df['total marks']=df['math score']+df['reading score']+df['writing score']

df['percentage']=df['total marks']/300*100
#Assigning the grades

def determine_grade(scores):
if scores >= 85 and scores <= 100:
return 'Grade A'
elif scores >= 70 and scores < 85:
return 'Grade B'
elif scores >= 55 and scores < 70:
return 'Grade C'
elif scores >= 35 and scores < 55:
return 'Grade D'
elif scores >= 0 and scores < 35:
return 'Grade E'

df['grades']=df['percentage'].apply(determine_grade)
df.info()
df['grades'].value_counts().plot.pie(autopct="%1.1f%%")
plt.show()
Implementation of Linear Regression Model
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Sample data (replace with your actual data)

X = np.array([[1], [2], [3], [4], [5]]) # Independent variable (features)
y = np.array([2, 4, 5, 4, 5]) # Dependent variable (target)

# Split data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a linear regression model

model = LinearRegression()

# Train the model using the training data

model.fit(X_train, y_train)

# Make predictions on the test data

y_pred = model.predict(X_test)

# Evaluate the model

mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Print the coefficients

print(f"Intercept: {model.intercept_}")
print(f"Coefficient: {model.coef_}")

Implementation of Random Forest Model

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd

# Load the data (replace 'your_data.csv' with your actual file)

data = pd.read_csv('your_data.csv')

# Separate features (X) and target (y)

X = data.drop('target_column_name', axis=1) # Replace 'target_column_name'
y = data['target_column_name']

# Split data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Classifier model

model = RandomForestClassifier(n_estimators=100, random_state=42)
# n_estimators is the number of trees in the forest

# Train the model

model.fit(X_train, y_train)

# Make predictions on the test set

y_pred = model.predict(X_test)

# Evaluate the model

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

4TH Quarter Examination
100% (3)
4TH Quarter Examination
19 pages
Machine File
No ratings yet
Machine File
27 pages
ml file syllabus
No ratings yet
ml file syllabus
43 pages
PW2 DataCleaning
No ratings yet
PW2 DataCleaning
6 pages
AIDS - DM Using Python - Lab Programs
No ratings yet
AIDS - DM Using Python - Lab Programs
19 pages
DataAnalytics Lab Manual (1)
No ratings yet
DataAnalytics Lab Manual (1)
35 pages
1
No ratings yet
1
3 pages
Python practice questions (1)
No ratings yet
Python practice questions (1)
5 pages
External
No ratings yet
External
11 pages
Datascience
No ratings yet
Datascience
8 pages
Data Analytics lab manual
No ratings yet
Data Analytics lab manual
47 pages
Stat Lab
No ratings yet
Stat Lab
24 pages
DA_Programs
No ratings yet
DA_Programs
44 pages
Day-4 DS Practicals
No ratings yet
Day-4 DS Practicals
5 pages
DA lab
No ratings yet
DA lab
27 pages
DATA_SCIENCE_MANAUL (TE) (1)
No ratings yet
DATA_SCIENCE_MANAUL (TE) (1)
78 pages
Data Science
No ratings yet
Data Science
18 pages
Some Exercises
No ratings yet
Some Exercises
9 pages
Modelling and Simmulation Assignment - Ipynb - Colab
No ratings yet
Modelling and Simmulation Assignment - Ipynb - Colab
7 pages
Lab 13
No ratings yet
Lab 13
5 pages
Exp 01-B Feature Selection and Extraction
No ratings yet
Exp 01-B Feature Selection and Extraction
12 pages
Lab 08 - Data Preprocessing
No ratings yet
Lab 08 - Data Preprocessing
9 pages
IS5312 Mini Project-2
No ratings yet
IS5312 Mini Project-2
5 pages
DSBDA Lab Plan
No ratings yet
DSBDA Lab Plan
5 pages
Syllabus AIML
No ratings yet
Syllabus AIML
14 pages
PDA_Assignment
No ratings yet
PDA_Assignment
6 pages
DSBDA LAB_1_1736243987425
No ratings yet
DSBDA LAB_1_1736243987425
10 pages
Python Report Ritik
No ratings yet
Python Report Ritik
15 pages
Ethics And Ai Exp-2
No ratings yet
Ethics And Ai Exp-2
5 pages
ELC Assignment
No ratings yet
ELC Assignment
4 pages
DSBDA Lab Manual24-25
No ratings yet
DSBDA Lab Manual24-25
58 pages
Lab 3 & 4
No ratings yet
Lab 3 & 4
10 pages
Data Science in Society Cat
No ratings yet
Data Science in Society Cat
5 pages
Data Analysis by Using Python
No ratings yet
Data Analysis by Using Python
15 pages
Monika Sree 11-07-2024
No ratings yet
Monika Sree 11-07-2024
36 pages
IDS-1
No ratings yet
IDS-1
30 pages
Machine Learning Project Roadmap
No ratings yet
Machine Learning Project Roadmap
4 pages
Bussiness Report PM
No ratings yet
Bussiness Report PM
44 pages
DS-DS Lab-1
No ratings yet
DS-DS Lab-1
4 pages
04 DS 2023
No ratings yet
04 DS 2023
63 pages
DA PROGRAM UPTO 6 (1)
No ratings yet
DA PROGRAM UPTO 6 (1)
20 pages
Introduction To Python and Computer Programming 1704298503
No ratings yet
Introduction To Python and Computer Programming 1704298503
44 pages
Activity 4 CGPA Vs Placement Package Program
No ratings yet
Activity 4 CGPA Vs Placement Package Program
4 pages
Predictive Modelling Anee Das Pgpdsba.o.june22.c
No ratings yet
Predictive Modelling Anee Das Pgpdsba.o.june22.c
20 pages
MACHINE LEARNING manual
No ratings yet
MACHINE LEARNING manual
36 pages
Assignment-2 IDS
No ratings yet
Assignment-2 IDS
2 pages
Practical File Question 28.09.2022
No ratings yet
Practical File Question 28.09.2022
15 pages
List of Programs For Informatics - XII - IP
No ratings yet
List of Programs For Informatics - XII - IP
26 pages
data analytics lab manual
No ratings yet
data analytics lab manual
26 pages
Data Analysis Lab - Final - 23-24
No ratings yet
Data Analysis Lab - Final - 23-24
11 pages
index
No ratings yet
index
4 pages
Pattern Recognition
No ratings yet
Pattern Recognition
26 pages
Data Analytics Lab Manual_250402_095326
No ratings yet
Data Analytics Lab Manual_250402_095326
58 pages
1152CS239-Intro. To Data Science-Syllabus
No ratings yet
1152CS239-Intro. To Data Science-Syllabus
6 pages
Data Preparation Basics#
No ratings yet
Data Preparation Basics#
2 pages
ML Lab Manual (1-10) FINAL
No ratings yet
ML Lab Manual (1-10) FINAL
34 pages
Group A Assignment No2 Writeup
No ratings yet
Group A Assignment No2 Writeup
9 pages
DVA Lab Manual
No ratings yet
DVA Lab Manual
20 pages
featureselection
No ratings yet
featureselection
11 pages
Assignment 2 Oops
No ratings yet
Assignment 2 Oops
10 pages
Python For Beginners
From Everand
Python For Beginners
Célio Azevedo
No ratings yet
Time Series Analysis and Forecasting
No ratings yet
Time Series Analysis and Forecasting
7 pages
Natural Language Processing
No ratings yet
Natural Language Processing
6 pages
Normalization
No ratings yet
Normalization
2 pages
Introduction to Data Science Lecture 1
No ratings yet
Introduction to Data Science Lecture 1
4 pages
Case Study Normalization
No ratings yet
Case Study Normalization
1 page
Problem A. Corona Virus Testing: Input
No ratings yet
Problem A. Corona Virus Testing: Input
18 pages
2023 Photobiomodulation in Dental Implant Stability and Post-Surgical Healing and Inflammation. A Randomised Double-Blind Study 2023
No ratings yet
2023 Photobiomodulation in Dental Implant Stability and Post-Surgical Healing and Inflammation. A Randomised Double-Blind Study 2023
11 pages
Tetra Spira Floc Tubular Heat Exchanger
No ratings yet
Tetra Spira Floc Tubular Heat Exchanger
4 pages
QUIZ 2 - SET 2 No Answer
No ratings yet
QUIZ 2 - SET 2 No Answer
1 page
Leitch Routerworks Manual
100% (1)
Leitch Routerworks Manual
138 pages
Overexpression of Snail Is Associated With Lymph Node Metastasis and Poor Prognosis in Patients With Gastric Cancer
No ratings yet
Overexpression of Snail Is Associated With Lymph Node Metastasis and Poor Prognosis in Patients With Gastric Cancer
24 pages
HK Instruments - Catálogo Productos
No ratings yet
HK Instruments - Catálogo Productos
19 pages
Regression Models Course Notes
No ratings yet
Regression Models Course Notes
102 pages
Bultaco-370-Frontera
No ratings yet
Bultaco-370-Frontera
4 pages
P29 003 511 2022 Eng
No ratings yet
P29 003 511 2022 Eng
40 pages
g485 5 1 3 Electromagnetism B
No ratings yet
g485 5 1 3 Electromagnetism B
10 pages
Rule-Based Machine Learning
No ratings yet
Rule-Based Machine Learning
3 pages
Maths Key Notes ch7 Congruence of Triangles Unlocked
No ratings yet
Maths Key Notes ch7 Congruence of Triangles Unlocked
2 pages
DAQNavi Overview
No ratings yet
DAQNavi Overview
4 pages
Siemens BTSs
No ratings yet
Siemens BTSs
86 pages
Ijser: The Lebesgue Integral and Measure Theory
100% (1)
Ijser: The Lebesgue Integral and Measure Theory
2 pages
خطة بحث- انكليزي
No ratings yet
خطة بحث- انكليزي
15 pages
CBSE Class 9 Science Sample Paper SA 1 Set 2 PDF
0% (1)
CBSE Class 9 Science Sample Paper SA 1 Set 2 PDF
2 pages
Ch1 E3 E4 Ews NGSDH Concepts
No ratings yet
Ch1 E3 E4 Ews NGSDH Concepts
25 pages
5L eLLK Series Fluorescent Non-Metallic Luminaires: 2013 Eaton's Crouse-Hinds Business
No ratings yet
5L eLLK Series Fluorescent Non-Metallic Luminaires: 2013 Eaton's Crouse-Hinds Business
4 pages
OS Lecture 2
No ratings yet
OS Lecture 2
15 pages
Core_v6.0_Vol0
No ratings yet
Core_v6.0_Vol0
212 pages
Edexcel International Lower Secondary Curriculum DRAFT SchemeOfWork Science
No ratings yet
Edexcel International Lower Secondary Curriculum DRAFT SchemeOfWork Science
14 pages
4.5 Notes: Solving Systems of Linear Inequalities: Test 3
No ratings yet
4.5 Notes: Solving Systems of Linear Inequalities: Test 3
4 pages
RS Aggarwal Solution Class 10 Maths Chapter 7 Triangles Exercise 7D
No ratings yet
RS Aggarwal Solution Class 10 Maths Chapter 7 Triangles Exercise 7D
8 pages
Past Paper Booklet - QP
100% (1)
Past Paper Booklet - QP
506 pages
MAN0012966 CloneJET PCR Cloning 40rxn UG PDF
No ratings yet
MAN0012966 CloneJET PCR Cloning 40rxn UG PDF
2 pages
ABB Ability™ System 800xa
No ratings yet
ABB Ability™ System 800xa
8 pages
Pa28 Service PDF
75% (4)
Pa28 Service PDF
912 pages

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Coding Notes Data Science

Uploaded by

Coding Notes Data Science

Uploaded by

Coding Notes Data Science

# reading csv file

# table named 'contacts' will be returned as a dataframe.

# read in all our data

# set seed for reproducibility

# look at the # of missing points in the first ten columns

# percent of data that is missing

import numpy as np # linear algebra

df['total marks']=df['math score']+df['reading score']+df['writing score']

# Sample data (replace with your actual data)

# Split data into training and testing sets

# Create a linear regression model

# Train the model using the training data

# Make predictions on the test data

# Evaluate the model

# Print the coefficients

Implementation of Random Forest Model

# Load the data (replace 'your_data.csv' with your actual file)

# Separate features (X) and target (y)

# Split data into training and testing sets

# Create a Random Forest Classifier model

# Train the model

# Make predictions on the test set

# Evaluate the model

You might also like

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.