0% found this document useful (0 votes)
12 views5 pages

Part 1 - Import and Wragling Codes

The document provides a comprehensive guide on data entry and importation into R, detailing methods for creating data frames, reading data from various formats (text, Excel, SPSS, Stata), and handling missing values. It also covers data wrangling techniques including creating new variables, extracting data, applying boolean logic, sorting, subsetting, merging data frames, and recoding variables. Additionally, it includes instructions for exporting data to different file formats.

Uploaded by

letadereje0
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
12 views5 pages

Part 1 - Import and Wragling Codes

The document provides a comprehensive guide on data entry and importation into R, detailing methods for creating data frames, reading data from various formats (text, Excel, SPSS, Stata), and handling missing values. It also covers data wrangling techniques including creating new variables, extracting data, applying boolean logic, sorting, subsetting, merging data frames, and recoding variables. Additionally, it includes instructions for exporting data to different file formats.

Uploaded by

letadereje0
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 5

# Data entry/ Import to R directly

infant_data<-data.frame(Age=numeric(0),gender=character(0),weight=numeric(0))

mydata<-edit(infant_data)

mydata

# Age gender weight

# 2 m 3.4

# 3 f 2.8

# 2 m 4.0

# 5 m 3.8

# 4 f 3.0

# 6 f 2.5

# to edit or enter additional data

mydata<-edit(mydata)

## embeding data

#(1)

mydatatxt <-"age gender weight

2 m 3.4

3 f 2.8

2 m 4.0"

mydata1<-read.table(header = TRUE, text = mydatatxt )

# read.table: reads data from a file or a character vector

#(2)

studentgrades<- "StudentID, First, Last, Math, Science, Social

011, Bob, Smith, 90, 80, 67

012, Jane, Weary, 75, , 80 "

grade <-read.table(header = TRUE, text = studentgrades )

# DETECTION OF MISSING VALUES

airquality[!complete.cases(airquality),] # show each row with NA

any(is.na(airquality$Ozone)) # whether there is a missing under the column or not


is.na(airquality) # show each row

colSums(is.na(airquality)) # sum of missing observation under each column

# importing Data from excel, spss and stata

install.packages("xlsx")

install.packages("haven")

install.packages("foriegn")

library(xlsx)

library(haven)

library(foreign)

# stata

practice <- read_dta("practice.dta")

View(practice)

#SPSS

Rain <- read_sav("Rain.sav")

View(Rain)

install.packages("Hmisc")

library(Hmisc)

Rain1 <- spss.get("Rain.sav",use.value.labels=TRUE)

Rain1

#EXCEL

EastHar <- read_excel("EastHar.xlsx")

View(EastHar)
# CSV file

exceldata <- read_csv("exceldata.csv")

View(exceldata)

# Import excel data from the nth sheet

install.packages("readxl")

library(readxl)

data <- read_excel("EastHar.xlsx", sheet = 2)

## Exporting data

df

write.table(df, file = "exported1.txt", col.names=TRUE, row.names=FALSE, sep = "")

write.table(df, file = "exported2.csv", col.names=TRUE, row.names=FALSE, sep = ",")

write.table(Rain, file = "exported3.csv", col.names=TRUE, row.names=FALSE, sep = ",")

##DATA WRAGLING

# create a new variable

quiz<- data.frame(x1 = c(2, 2, 6, 4), x2 = c(3, 4, 2, 8))

quiz

quiz$SUM <- quiz$x1 + quiz$x2 # assigned to a new column named "SUM" within the quiz data frame

quiz

#EXTRACTING

str(USArrests)

USArrests[3,2]

USArrests$Assault[3]

Assault[3]
str(USArrests)

USArrests[USArrests$Assault >= 200, ]

USArrests[USArrests$UrbanPop == 80, ]

attach(USArrests)

USArrests[Assault >= 200, ]

USArrests[UrbanPop == 80, ]

detach(USArrests)

#BOOLEAN AND& , OR/

USArrests[USArrests$Assault > 50 & USArrests$UrbanPop < 70, ]

USArrests[USArrests$UrbanPop > 50 | USArrests$UrbanPop < 60, ]

attach(USArrests)

USArrests[Assault > 50 & UrbanPop < 70, ]

USArrests[UrbanPop > 50 | UrbanPop < 60, ]

detach(USArrests)

# order() function to sort a data frame

USArrests[order(USArrests$Murder), ] #Ascending

USArrests[order(-USArrests$Murder), ] #Descending OR

USArrests[order(USArrests$Murder, decreasing = TRUE), ]

attach(USArrests)

USArrests[order(Murder), ] #Ascending

USArrests[order(-Murder), ] #Descending OR

USArrests[order(Murder, decreasing = TRUE), ]

detach(USArrests)

# subset() function to to select parts of a data frame.

subset(USArrests, UrbanPop > 50& UrbanPop < 60)


# merge() merges two data frames horizontally

DF1<- data.frame(ID=1:3, course1=c("A","B", "C"), course2=c("B", "C", "A"))

DF2<- data.frame(ID=1:4, course3=c("A","A", "B", "C"), course4=c("A", "A", "A", "B"))

total <- merge(DF1, DF2, by="ID") #by ID

## total <- merge(DF1, DF2, by=c("ID", "Region") # by Region

total <- merge(DF1, DF2, all = TRUE) # include all data from both data frames.

##

str(cars)

colSums(is.na(cars)) # sum of missing values under each column

force(cars)# to disply the data in the console

## RECODING VARIABLES

cars$dist[cars$dist == 999] <- NA

cars$distcat[cars$dist < 50] <- "Small"

cars$distcat[cars$dist >= 50 & cars$dist < 80] <- "Medium"

cars$distcat[cars$dist >= 80] <- "Long"

str(cars)

attach(cars)

cars$dist[dist == 999] <- NA

cars$distcat[dist < 50] <- "Small"

cars$distcat[dist >= 50 & cars$dist < 80] <- "Medium"

cars$distcat[dist >= 80] <- "Long"

str(cars)

detach(cars)

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy