library(rpart)#for running trees library(foreign)#for importing Stata datasets true.malice.split <- 6 true.falsity.split <- 4 n <- 300 #number of cases set.seed(1)#set seeds to get same values every time script is run falsity <- runif(n, 0, 10)#create uniform variable on 0:10 interger scale malice <- runif(n,0,10) # same set.seed(2) e1 <- rnorm(n,0,2)#1st error term set.seed(3) e2 <- rnorm(n,0,2)#2nd error term y <- ifelse((falsity+e1)>=4 & (malice+e2)>=6, 1,0) table(y) # 0 1 #222 78 #FIGURE 3: #LOGIT RESULTS logit.example <- glm(y~falsity + malice, family=binomial (link = "logit")) summary(logit.example) predicted.logit <- ifelse(logit.example$fitted.values>.5,1,0) correct.logit <- predicted.logit==y table(correct.logit) #FALSE TRUE # 54 246 =.82 #get proportional reduction in error #100 x [(% correct.logitly predicted.logit - % in modal category )/(100%-% in modal category.] modal.cat <- (length(y)-sum(y))/length(y)#get # of cases in modal category pred.logit.cor <- sum(correct.logit)/length(y)#percent predicted correctly pre.logit.libel <- 100*((100*pred.logit.cor - 100*modal.cat)/(100-100*modal.cat))#pre print(pre.logit.libel, digits = 3)#=39.8 ########################################## ########################################## #TREE ########################################## ########################################## #get line for 2-dimensional partition #set y = .5, and each coef = 0 to get y-intercept and slope, #then take logit of both sides of equation intercept.logit <- -logit.example$coef[1]/logit.example$coef[2] slope.logit <- -intercept.logit/ (-logit.example$coef[1]/logit.example$coef[3]) tree.libel<-rpart(y~falsity + malice, cp=.001, method = "class")#create tree post(tree.libel, file="")#plot tree in R graphics window; file command tells R not to create pdf printcp(tree.libel, digits = 2)#print cp table #Root node error: 83/300 = 07667 #n= 300 # CP nsplit rel error xerror xstd #1 0469880 0 1.00000 1.00000 0.093353 #2 0.0481928 2 0.50602 0.59036 0.077143 #3 0.0040161 4 0.40964 0.57831 0.076504 #4 0.0010000 7 0.39759 0.62651 0.078993 #prune tree based on cp results to 2 splits tree.libel <- prune(tree.libel, cp = .10) #PLOT TREE post(tree.libel, file="")#plot tree in R graphics window; file command tells R not to create pdf split.1.error <- 5.046 #1st branch split split.2.error <- 4.801 #2nd branch split resub.predicted <- ifelse(predict(tree.libel, type="vector") == 2, 1, 0) #create vector with resubsitution prediction #default is 2's for 1's and 1's for 0 - use ifelse to switch correct.resub <- resub.predicted==y table(correct.resub) #FALSE TRUE = .86 # 41 259 pred.tree.cor <- sum(correct.resub)/length(y)#percent predicted correctly for PRE pred.tree.cor pre.tree.libel <- 100*((100*pred.tree.cor - 100*modal.cat)/(100-100*modal.cat))#pre pre.tree.libel #Cross-Validated Prediction #simulate 1000 times and take mean n.sims <- 1000 pre.xv.libel <- rep(NA, length(n.sims)) pred.correct <- rep(NA, length(n.sims)) for (i in 1:n.sims){ xv.libel <- ifelse((xpred.rpart(tree.libel, xval=10, cp = .02)) == 2, 1, 0) #cross-validated prediction; correct.xv.libel <- ifelse(xv.libel==y,1,0) pred.cor.xv.libel <- sum(correct.xv.libel)/length(y) pred.correct[i] <- pred.cor.xv.libel pre.xv.libel[i] <- 100*((100*pred.cor.xv.libel - 100*modal.cat)/(100-100*modal.cat)) } print(summary(pred.correct)) #Cross-validation prediction rate print(summary(pre.xv.libel))#Cross-validation PRE cv.predicted.tree <- ifelse(xpred.rpart(tree.libel)[,2] == 2, 1,0) #cross-validated prediction, contained in 2nd column of matrix correct.cv.tree <- cv.predicted.tree==y table(correct.cv.tree) #FALSE TRUE = .83 # 47 253 ################################################################# ################################################################# #FIGURE 4 ################################################################# ################################################################# pdf("Figure_4.pdf", height = 5, width = 5) par(mfrow = c(1,1), mar=c(4,4,1,1), pty = "s")#make plot square plot(malice, falsity, xlab="",ylab="", type="n", axes = F, xaxs = "i", yaxs = "i", xlim = c(0,10), ylim = c(0,10)) # call empty plot axis(1, at = seq(0,10, by = 2), mgp = c(2,.5,0), cex.axis = 1.2) axis(2, at = seq(0,10, by = 2), las = 1, mgp = c(2,.5,0) , cex.axis = 1.2) mtext("Malice", 1, line = 2, cex = 1.5) mtext("Falsity", 2, line = 2, cex = 1.5) #add shading for incorrect regions of logit partition #draw first so it doesn't cancel out lines #first: get intersections where logit lines hits plot boundaries and true rule top.int <- (10-intercept.logit)/slope.logit#where line hits top of panel right.int <- intercept.logit + 10*slope.logit#where line hits right of panel top.rule.int <- intercept.logit + slope.logit*true.malice.split#where line crosses vertical split of rule right.rule.int <- (true.falsity.split - intercept.logit)/slope.logit# #shading -- first -- where logit is wrong polygon(x=c(top.int, true.malice.split, true.malice.split), y = c(10, 10,top.rule.int ), #left-hand block of "not liable" region col= "gray90", border=F) polygon(x=c(true.malice.split, true.malice.split, right.rule.int), y = c(top.rule.int, true.falsity.split, true.falsity.split), #left-hand block of "not liable" region col= "gray90", border=F) polygon(x=c(right.rule.int, 10,10), y = c( true.falsity.split, true.falsity.split, right.int), #left-hand block of "not liable" region col= "gray90", border=F) box() #draw box around plot -- this is the case-space region segments(true.malice.split,true.falsity.split,true.malice.split,10)#draw true rule -- solid line segments(true.malice.split,true.falsity.split,10,true.falsity.split) segments(split.1.error,split.2.error,split.1.error,10, lwd = 1.1, lty = 5)#vertical line marking estimates case partition segments(split.1.error,split.2.error,10,split.2.error, lwd = 1.1, lty = 5)#horizontal line marking case partition abline(intercept.logit,slope.logit,lty=2, lwd = 1.1) #Plot logit partition text(3,3,"Not Liable", cex = 1.5, , font = 2) text(8.5,7,"Liable", cex = 1.5, font = 2) text(2.5,9.3,"Logit partition", cex = 1.2) text(7.9,9, "True rule", cex = 1.2) text(2.9,6, "Tree partition", cex = 1.2) arrows(4,9.3,4.5,9.3, length =.05) #Logit arrow arrows(6.7,9,6.15,9, length =.05) # Rule arrow arrows(4.5,6,4.9,6, length =.05) #Tree arrow dev.off()