If you do not see the menu on the left click here to see it

 

#Working directory

 

setwd("H:/public_html/R")

 

#Identifying the working directory

 

getwd()

 

#Memory limit

 

memory.limit()

 

#Library --foreign-- to read Stata files

 

library(foreign)

 

#Getting data from web

 

mydata <- read.dta("http://dss.princeton.edu/training/students.dta")

 

 

 


#Extracting variables of interest (optional), here for illustration purposes

 

library(foreign)

mydata <- read.dta("http://dss.princeton.edu/training/students.dta")

 

mydata1 <- mydata[c("major","sat")]

 

mydata1

 

 

 


#Sorting the data by group and value

 

library(foreign)

mydata <- read.dta("http://dss.princeton.edu/training/students.dta")

 

#Sorting the data by major

 

attach(mydata)

mydata <- mydata[order(major, sat),]

detach(mydata)

mydata

 

 

mydata1 <- mydata[c("major","sat")]

mydata1

 

attach(mydata1)

mydata1 <- mydata1[order(major,sat),]

detach(mydata1)

mydata1

 

 

 

 

 

 


##### Getting descriptive statistics per group

 

library(foreign)

mydata <- read.dta("http://dss.princeton.edu/training/students.dta")

mydata1 <- mydata[c("major","sat")]

mydata1

 

###Getting the max value by group, in this case by 'major'

 

max.sat <- with(mydata1, tapply(sat, major, max))

 

#Adding max variable to the dataset

 

mydata1$max.sat <- max.sat[as.character(mydata1$major)]

mydata1

 

#Extracting only data that matches the max value

 

mydata1.final <- subset(mydata1, sat==max.sat)

mydata1.final

 

 

###Getting the mean value by group

 

mean.sat <- with(mydata1, tapply(sat, major, mean))

 

#Adding mean.sat variable to the dataset

 

mydata1$mean.sat <- mean.sat[as.character(mydata1$major)]

mydata1

 

 

 

#Getting the sum value by group, in this case by 'major'

 

sum.sat <- with(mydata, tapply(sat, major, sum))

 

#Adding sums per group to the dataset

 

mydata$sum.sat <- sum.sat[as.character(mydata$major)]

 

 

 

mydata$sum.sat.mis <- sum.sat.mis[as.character(mydata$major)]

 

 

 

 

 

 

 

 


#####Creating variables

 

 

#Creating variables conditional

 

mydata1$mean1 <- ifelse(mydata1$sat>mydata1$mean.sat, mydata1$sat, mydata1$mean.sat)

mydata1$mean2 <- with(mydata1, ifelse(sat>mean.sat, sat, mean.sat))

 

 

 

 


#####Replacing values within variables

 

 

#Replacing specific values using conditional

 

mydata$sat.miss <- ifelse(mydata$sat>1600, mydata$sat, NA)

 

 

#Replacing specific value directly

 

mydata$sat.miss2 <- mydata$sat

mydata$sat.miss2[mydata$sat.miss2<1601] <- NA

 

 

 

 

 


#Browsing selected variables

 

mydata[, c("major","sat", "sat.miss", "sat.miss2", "sum.sat", "sum.sat.mis")]

 

 

 

 

 

 


#Collapse per category

 

library(foreign)

 

mydata <- read.dta("http://dss.princeton.edu/training/students.dta")

 

#Mean by gender (mean on all numeric variables

 

mean.gender <- aggregate(mydata,by=list(sex=mydata$gender), mean,na.rm=TRUE)

 

#Sum by major (no collapse)

 

aggregate(mydata[c("sat")], mydata["major"], sum, na.rm=TRUE)

 

 

 

 

 

 

 


#Misc

 

sum.sat.mis <- with(mydata, tapply(sat.miss, major, sum))

#mydata$sat.miss[is.na(mydata$sat.miss)] <- mean(mydata$sat.miss, na.rm = TRUE)

mydata$sat.miss[is.na(mydata$sat.miss)] <- 0

mydata$sat.miss2[is.na(mydata$sat.miss2)] <- 0

sum.sat.mis <- with(mydata, tapply(sat.miss, major, sum))

sum.sat.mis2 <- with(mydata, tapply(sat.miss2, major, sum, na.rm=T))

 

 

 

mydata <- as.data.frame(mydata)

sum <- tapply(mydata$sat,mydata$gender, sum)