If
you do not see the menu on the left click here to see it
#Working directory
setwd("H:/public_html/R")
#Identifying the working directory
getwd()
#Memory limit
memory.limit()
#Library --foreign--
to read Stata files
library(foreign)
#Getting data from web
mydata <- read.dta("http://dss.princeton.edu/training/students.dta")
![]()
#Extracting variables of interest (optional), here for illustration purposes
library(foreign)
mydata <- read.dta("http://dss.princeton.edu/training/students.dta")
mydata1 <- mydata[c("major","sat")]
mydata1
![]()
#Sorting
the data by group and value
library(foreign)
mydata <- read.dta("http://dss.princeton.edu/training/students.dta")
#Sorting the data by major
attach(mydata)
mydata <- mydata[order(major,
sat),]
detach(mydata)
mydata
mydata1 <- mydata[c("major","sat")]
mydata1
attach(mydata1)
mydata1 <- mydata1[order(major,sat),]
detach(mydata1)
mydata1
![]()
##### Getting descriptive
statistics per group
library(foreign)
mydata <- read.dta("http://dss.princeton.edu/training/students.dta")
mydata1 <- mydata[c("major","sat")]
mydata1
###Getting the
max value by group, in this case by 'major'
max.sat <- with(mydata1,
tapply(sat, major, max))
#Adding max variable to the dataset
mydata1$max.sat
<- max.sat[as.character(mydata1$major)]
mydata1
#Extracting only data that matches the max value
mydata1.final
<- subset(mydata1, sat==max.sat)
mydata1.final
###Getting the mean value by group
mean.sat <- with(mydata1,
tapply(sat, major, mean))
#Adding mean.sat variable to the
dataset
mydata1$mean.sat
<- mean.sat[as.character(mydata1$major)]
mydata1
#Getting the sum value by
group, in this case by 'major'
sum.sat <- with(mydata, tapply(sat, major, sum))
#Adding sums per group to the dataset
mydata$sum.sat <- sum.sat[as.character(mydata$major)]
mydata$sum.sat.mis <- sum.sat.mis[as.character(mydata$major)]
![]()
#####Creating variables
#Creating variables conditional
mydata1$mean1 <- ifelse(mydata1$sat>mydata1$mean.sat, mydata1$sat, mydata1$mean.sat)
mydata1$mean2 <- with(mydata1, ifelse(sat>mean.sat, sat, mean.sat))
![]()
#####Replacing values within variables
#Replacing specific values using conditional
mydata$sat.miss <- ifelse(mydata$sat>1600, mydata$sat,
NA)
#Replacing specific value directly
mydata$sat.miss2 <- mydata$sat
mydata$sat.miss2[mydata$sat.miss2<1601]
<- NA
![]()
mydata[, c("major","sat", "sat.miss",
"sat.miss2", "sum.sat", "sum.sat.mis")]
![]()
#Collapse per category
library(foreign)
mydata <- read.dta("http://dss.princeton.edu/training/students.dta")
#Mean by
gender (mean on all numeric variables
mean.gender <- aggregate(mydata,by=list(sex=mydata$gender),
mean,na.rm=TRUE)
#Sum by major
(no collapse)
aggregate(mydata[c("sat")], mydata["major"], sum, na.rm=TRUE)
![]()
#Misc
sum.sat.mis <- with(mydata, tapply(sat.miss, major, sum))
#mydata$sat.miss[is.na(mydata$sat.miss)] <- mean(mydata$sat.miss,
na.rm = TRUE)
mydata$sat.miss[is.na(mydata$sat.miss)]
<- 0
mydata$sat.miss2[is.na(mydata$sat.miss2)]
<- 0
sum.sat.mis <- with(mydata, tapply(sat.miss, major, sum))
sum.sat.mis2
<- with(mydata, tapply(sat.miss2, major, sum, na.rm=T))
mydata <- as.data.frame(mydata)
sum <- tapply(mydata$sat,mydata$gender, sum)