0% found this document useful (0 votes)

12 views5 pages

Part 1 - Import and Wragling Codes

The document provides a comprehensive guide on data entry and importation into R, detailing methods for creating data frames, reading data from various formats (text, Excel, SPSS, Stata), and handling missing values. It also covers data wrangling techniques including creating new variables, extracting data, applying boolean logic, sorting, subsetting, merging data frames, and recoding variables. Additionally, it includes instructions for exporting data to different file formats.

Uploaded by

letadereje0

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

12 views5 pages

Part 1 - Import and Wragling Codes

Uploaded by

letadereje0

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 5

# Data entry/ Import to R directly

infant_data<-data.frame(Age=numeric(0),gender=character(0),weight=numeric(0))

mydata<-edit(infant_data)

mydata

# Age gender weight

# 2 m 3.4

# 3 f 2.8

# 2 m 4.0

# 5 m 3.8

# 4 f 3.0

# 6 f 2.5

# to edit or enter additional data

mydata<-edit(mydata)

## embeding data

#(1)

mydatatxt <-"age gender weight

2 m 3.4

3 f 2.8

2 m 4.0"

mydata1<-read.table(header = TRUE, text = mydatatxt )

# read.table: reads data from a file or a character vector

#(2)

studentgrades<- "StudentID, First, Last, Math, Science, Social

011, Bob, Smith, 90, 80, 67

012, Jane, Weary, 75, , 80 "

grade <-read.table(header = TRUE, text = studentgrades )

# DETECTION OF MISSING VALUES

airquality[!complete.cases(airquality),] # show each row with NA

any(is.na(airquality$Ozone)) # whether there is a missing under the column or not

is.na(airquality) # show each row

colSums(is.na(airquality)) # sum of missing observation under each column

# importing Data from excel, spss and stata

install.packages("xlsx")

install.packages("haven")

install.packages("foriegn")

library(xlsx)

library(haven)

library(foreign)

# stata

practice <- read_dta("practice.dta")

View(practice)

#SPSS

Rain <- read_sav("Rain.sav")

View(Rain)

install.packages("Hmisc")

library(Hmisc)

Rain1 <- spss.get("Rain.sav",use.value.labels=TRUE)

Rain1

#EXCEL

EastHar <- read_excel("EastHar.xlsx")

View(EastHar)
# CSV file

exceldata <- read_csv("exceldata.csv")

View(exceldata)

# Import excel data from the nth sheet

install.packages("readxl")

library(readxl)

data <- read_excel("EastHar.xlsx", sheet = 2)

## Exporting data

write.table(df, file = "exported1.txt", col.names=TRUE, row.names=FALSE, sep = "")

write.table(df, file = "exported2.csv", col.names=TRUE, row.names=FALSE, sep = ",")

write.table(Rain, file = "exported3.csv", col.names=TRUE, row.names=FALSE, sep = ",")

##DATA WRAGLING

# create a new variable

quiz<- data.frame(x1 = c(2, 2, 6, 4), x2 = c(3, 4, 2, 8))

quiz

quiz$SUM <- quiz$x1 + quiz$x2 # assigned to a new column named "SUM" within the quiz data frame

quiz

#EXTRACTING

str(USArrests)

USArrests[3,2]

USArrests$Assault[3]

Assault[3]
str(USArrests)

USArrests[USArrests$Assault >= 200, ]

USArrests[USArrests$UrbanPop == 80, ]

attach(USArrests)

USArrests[Assault >= 200, ]

USArrests[UrbanPop == 80, ]

detach(USArrests)

#BOOLEAN AND& , OR/

USArrests[USArrests$Assault > 50 & USArrests$UrbanPop < 70, ]

USArrests[USArrests$UrbanPop > 50 | USArrests$UrbanPop < 60, ]

attach(USArrests)

USArrests[Assault > 50 & UrbanPop < 70, ]

USArrests[UrbanPop > 50 | UrbanPop < 60, ]

detach(USArrests)

# order() function to sort a data frame

USArrests[order(USArrests$Murder), ] #Ascending

USArrests[order(-USArrests$Murder), ] #Descending OR

USArrests[order(USArrests$Murder, decreasing = TRUE), ]

attach(USArrests)

USArrests[order(Murder), ] #Ascending

USArrests[order(-Murder), ] #Descending OR

USArrests[order(Murder, decreasing = TRUE), ]

detach(USArrests)

# subset() function to to select parts of a data frame.

subset(USArrests, UrbanPop > 50& UrbanPop < 60)

# merge() merges two data frames horizontally

DF1<- data.frame(ID=1:3, course1=c("A","B", "C"), course2=c("B", "C", "A"))

DF2<- data.frame(ID=1:4, course3=c("A","A", "B", "C"), course4=c("A", "A", "A", "B"))

total <- merge(DF1, DF2, by="ID") #by ID

## total <- merge(DF1, DF2, by=c("ID", "Region") # by Region

total <- merge(DF1, DF2, all = TRUE) # include all data from both data frames.

str(cars)

colSums(is.na(cars)) # sum of missing values under each column

force(cars)# to disply the data in the console

## RECODING VARIABLES

cars$dist[cars$dist == 999] <- NA

cars$distcat[cars$dist < 50] <- "Small"

cars$distcat[cars$dist >= 50 & cars$dist < 80] <- "Medium"

cars$distcat[cars$dist >= 80] <- "Long"

str(cars)

attach(cars)

cars$dist[dist == 999] <- NA

cars$distcat[dist < 50] <- "Small"

cars$distcat[dist >= 50 & cars$dist < 80] <- "Medium"

cars$distcat[dist >= 80] <- "Long"

str(cars)

detach(cars)

Programming For Data Science Assignment-2
No ratings yet
Programming For Data Science Assignment-2
23 pages
Codes - Part 1
No ratings yet
Codes - Part 1
7 pages
Lecture 5 (Managing and Understanding Data)
No ratings yet
Lecture 5 (Managing and Understanding Data)
9 pages
R-Programming Lab Mannual
No ratings yet
R-Programming Lab Mannual
33 pages
R File Code
No ratings yet
R File Code
16 pages
R Code
No ratings yet
R Code
9 pages
Experiment 5
No ratings yet
Experiment 5
13 pages
Data Cleaning
No ratings yet
Data Cleaning
2 pages
RSTUDIO
No ratings yet
RSTUDIO
44 pages
Analysis Using Statistical: Introduction & Data Exploration
No ratings yet
Analysis Using Statistical: Introduction & Data Exploration
23 pages
FE418 RLectureNotes1
No ratings yet
FE418 RLectureNotes1
15 pages
R Functions
No ratings yet
R Functions
8 pages
R Tutorial #1: Applied Econometrics (Econ3005)
No ratings yet
R Tutorial #1: Applied Econometrics (Econ3005)
21 pages
R Syntax Examples 1
No ratings yet
R Syntax Examples 1
6 pages
Unit 2
No ratings yet
Unit 2
76 pages
Engineering Data Analysis
No ratings yet
Engineering Data Analysis
5 pages
58.tidy Data in R For Linguists
No ratings yet
58.tidy Data in R For Linguists
14 pages
Unit - 3 Learning Notes
No ratings yet
Unit - 3 Learning Notes
8 pages
Practical 1 EDA
No ratings yet
Practical 1 EDA
14 pages
Lab 02 - Compound Data Structures
No ratings yet
Lab 02 - Compound Data Structures
12 pages
2.3 Data Frame
No ratings yet
2.3 Data Frame
3 pages
A Short List of Some Useful R Commands: Input and Display
No ratings yet
A Short List of Some Useful R Commands: Input and Display
2 pages
Ds
No ratings yet
Ds
2 pages
Data Cleansing
No ratings yet
Data Cleansing
18 pages
R Sharing
No ratings yet
R Sharing
16 pages
Group 10A - GA2
No ratings yet
Group 10A - GA2
10 pages
UL2
No ratings yet
UL2
2 pages
Statistic and R Programming Lab Exercise
No ratings yet
Statistic and R Programming Lab Exercise
8 pages
Workshop Activity: X Seq y Length
No ratings yet
Workshop Activity: X Seq y Length
3 pages
Simple Tutorial in R
No ratings yet
Simple Tutorial in R
15 pages
R Programming Cheat Sheet
No ratings yet
R Programming Cheat Sheet
7 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
11 pages
Kids C ("Jack", "Jill") : 5.1 Creating Data Frames
No ratings yet
Kids C ("Jack", "Jill") : 5.1 Creating Data Frames
11 pages
First Machine Problem
No ratings yet
First Machine Problem
6 pages
Unit 2
No ratings yet
Unit 2
29 pages
BIO259 Note
No ratings yet
BIO259 Note
55 pages
Expt. No. Basic Math Date
No ratings yet
Expt. No. Basic Math Date
24 pages
BAN5
No ratings yet
BAN5
2 pages
Materi 4
No ratings yet
Materi 4
30 pages
20Sc02P-Statistical and Analytical: Name of Student: Branch: Sem: Register Number
No ratings yet
20Sc02P-Statistical and Analytical: Name of Student: Branch: Sem: Register Number
77 pages
Advanced R Data Analysis Training PDF
No ratings yet
Advanced R Data Analysis Training PDF
72 pages
DMPA Codes
No ratings yet
DMPA Codes
16 pages
Lab Book
No ratings yet
Lab Book
24 pages
Lab File AD PDF
No ratings yet
Lab File AD PDF
25 pages
Comp Lab 2 GunExample 2425
No ratings yet
Comp Lab 2 GunExample 2425
15 pages
Rtips. Revival 2012!: Paul E. Johnson June 8, 2012
No ratings yet
Rtips. Revival 2012!: Paul E. Johnson June 8, 2012
72 pages
Coding Self-Assessment 2023
No ratings yet
Coding Self-Assessment 2023
5 pages
Dofile - Quan Ly Va Lam Sach Du Lieu 2
No ratings yet
Dofile - Quan Ly Va Lam Sach Du Lieu 2
6 pages
DV Lab
No ratings yet
DV Lab
52 pages
All Values in The First Column
No ratings yet
All Values in The First Column
7 pages
Data Science Practicals
No ratings yet
Data Science Practicals
47 pages
Broomspatial
No ratings yet
Broomspatial
31 pages
Module 2.9
No ratings yet
Module 2.9
11 pages
R Cheatsheet ABC
No ratings yet
R Cheatsheet ABC
3 pages
Group A Assignment No2 Writeup
No ratings yet
Group A Assignment No2 Writeup
9 pages
SML Practical 1to11
No ratings yet
SML Practical 1to11
23 pages
Data Tidying With Tidyr::: Cheat Sheet
No ratings yet
Data Tidying With Tidyr::: Cheat Sheet
2 pages
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet
Analytic Geometry: Graphic Solutions Using Matlab Language
From Everand
Analytic Geometry: Graphic Solutions Using Matlab Language
Ing. Mario Castillo
No ratings yet
No Ph.D. Game Design With Three.js
From Everand
No Ph.D. Game Design With Three.js
Nikiforos Kontopoulos
No ratings yet

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Part 1 - Import and Wragling Codes

Uploaded by

Part 1 - Import and Wragling Codes

Uploaded by

# Data entry/ Import to R directly

# Age gender weight

# to edit or enter additional data

mydatatxt <-"age gender weight

mydata1<-read.table(header = TRUE, text = mydatatxt )

# read.table: reads data from a file or a character vector

studentgrades<- "StudentID, First, Last, Math, Science, Social

011, Bob, Smith, 90, 80, 67

012, Jane, Weary, 75, , 80 "

grade <-read.table(header = TRUE, text = studentgrades )

# DETECTION OF MISSING VALUES

airquality[!complete.cases(airquality),] # show each row with NA

any(is.na(airquality$Ozone)) # whether there is a missing under the column or not

colSums(is.na(airquality)) # sum of missing observation under each column

# importing Data from excel, spss and stata

practice <- read_dta("practice.dta")

Rain <- read_sav("Rain.sav")

Rain1 <- spss.get("Rain.sav",use.value.labels=TRUE)

EastHar <- read_excel("EastHar.xlsx")

exceldata <- read_csv("exceldata.csv")

# Import excel data from the nth sheet

data <- read_excel("EastHar.xlsx", sheet = 2)

write.table(df, file = "exported1.txt", col.names=TRUE, row.names=FALSE, sep = "")

write.table(df, file = "exported2.csv", col.names=TRUE, row.names=FALSE, sep = ",")

write.table(Rain, file = "exported3.csv", col.names=TRUE, row.names=FALSE, sep = ",")

# create a new variable

quiz<- data.frame(x1 = c(2, 2, 6, 4), x2 = c(3, 4, 2, 8))

USArrests[USArrests$Assault >= 200, ]

USArrests[Assault >= 200, ]

#BOOLEAN AND& , OR/

USArrests[USArrests$Assault > 50 & USArrests$UrbanPop < 70, ]

USArrests[USArrests$UrbanPop > 50 | USArrests$UrbanPop < 60, ]

USArrests[Assault > 50 & UrbanPop < 70, ]

USArrests[UrbanPop > 50 | UrbanPop < 60, ]

# order() function to sort a data frame

USArrests[order(USArrests$Murder, decreasing = TRUE), ]

USArrests[order(Murder, decreasing = TRUE), ]

# subset() function to to select parts of a data frame.

subset(USArrests, UrbanPop > 50& UrbanPop < 60)

DF1<- data.frame(ID=1:3, course1=c("A","B", "C"), course2=c("B", "C", "A"))

DF2<- data.frame(ID=1:4, course3=c("A","A", "B", "C"), course4=c("A", "A", "A", "B"))

total <- merge(DF1, DF2, by="ID") #by ID

## total <- merge(DF1, DF2, by=c("ID", "Region") # by Region

colSums(is.na(cars)) # sum of missing values under each column

force(cars)# to disply the data in the console

cars$dist[cars$dist == 999] <- NA

cars$distcat[cars$dist < 50] <- "Small"

cars$distcat[cars$dist >= 50 & cars$dist < 80] <- "Medium"

cars$distcat[cars$dist >= 80] <- "Long"

cars$dist[dist == 999] <- NA

cars$distcat[dist < 50] <- "Small"

cars$distcat[dist >= 50 & cars$dist < 80] <- "Medium"

cars$distcat[dist >= 80] <- "Long"

You might also like

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.