# PollingExample.r
#
# Supplementary materials for 
# `An assessment of the causes of the errors In the 2015 UK General Election opinion polls
# by Sturgis, P., Kuha, J., Baker, N., Callegaro, M., Fisher, S., Green, J., Jennings, W., Lauderdale, B. E., and Smith, P.   
# Journal of the Royal Statistical Society, Series A
#

source("PollingExampleDataAndFunctions.R")
# save(PollData,poll.estimation,poll.bootstrap,quota.resample,file="PollingExample.RData") 

library(survey)

###########################################################################
# Variables in the data frame PollData:

head(PollData)
table(PollData$SexbyAge)
table(PollData$Region)
table(PollData$PartyID)
table(PollData$LTV)
table(PollData$VoteIntention)

###########################################################################
# Quota variables: Dummy variables for each of their categories, 
# for use in bootstrap resampling later  

quota.cols <- NULL
j <- 0
for(i in levels(PollData$SexbyAge)){
	j <- j+1
	name.tmp <- paste("Dsexage",j,sep="") 
	PollData[,name.tmp] <- as.numeric(PollData$SexbyAge==i)
	quota.cols <- c(quota.cols, name.tmp)
}

j <- 0
for(i in levels(PollData$Region)){
	j <- j+1
	name.tmp <- paste("Dregion",j,sep="") 
	PollData[,name.tmp] <- as.numeric(PollData$Region==i)
	quota.cols <- c(quota.cols, name.tmp)
}

# Check:
t.tmp <- c(table(PollData$SexbyAge),table(PollData$Region))
t.tmp-colSums(PollData[,quota.cols]) # Should be all 0

###########################################################################
# Weighting targets

table(PollData$SexbyAge)
sex.age.dist <- data.frame(
	SexbyAge=levels(PollData$SexbyAge),
	Freq=c(6,12.6,16.9,13,5.9,12.7,17.3,15.6)/100	
)
sex.age.dist

table(PollData$Region)
region.dist <- data.frame(
	Region=levels(PollData$Region),
	Freq=c(24.6,16.4,9.6,12.8,22.9,5,8.7)/100	
)
region.dist

table(PollData$PartyID)
partyid.dist <- data.frame(
	PartyID=levels(PollData$PartyID),
	Freq=c(31.6,27.8,9.5,1.9,4.9,24.3)/100	
)
partyid.dist

##########################################################################
# Gathering everything in one object for convenience

levels(PollData$VoteIntention) # check, to inform the value of parties below

Poll.information <- list(data=PollData,
	est.function=poll.estimation,
	quota.columns=quota.cols, 
	weighting.margins=list(~SexbyAge,~Region,~PartyID),
	weighting.targets=list(sex.age.dist,region.dist,partyid.dist),
	LTV.variable="LTV",
	Vote.variable="VoteIntention",
	parties=levels(PollData$VoteIntention)[1:8]
)

#########################################################################
# Point estimation of vote shares:

Poll.information$est.function(Poll.information)

# Ignore the warning message "No weights or probabilities supplied, assuming equal probability"

#########################################################################
# Resampling from the data, to match quota targets
# ** An illustration only: This is done inside the poll.boostrap function, 
# and does not need to be carried out separately
#

resample.tmp <- quota.resample(Poll.information$data,Poll.information$quota.columns)

# Ideally these should be the same, but the sample size of the resample
# may also be smaller, if the resampling algorithm ran out of observations 
# to sample from before the resample was filled. 

c(nrow(Poll.information$data),nrow(resample.tmp)) 

# These should be all 0, unless the resample is smaller than the original sample 

colSums(PollData[,quota.cols])-colSums(resample.tmp[,quota.cols]) 

#########################################################################
# Bootstrap estimates of sampling variability
#
# Using the poll.bootstrap function just to calculate the point estimates

poll.bootstrap(Poll.information,n.bootstrap=0)

# Boostrap samples and confidence intervals, first excluding the (slower) BCA intervals
# Try first with a small number of boostrap samples (here 100), and then
# a larger one (we have used 10000)

Poll.bootstrap.res <- poll.bootstrap(Poll.information,n.bootstrap=100)
Poll.bootstrap.res[1:2]
head(Poll.bootstrap.res[[3]])

# Adding the BCA (adjusted percentile) intervals (this will also take some minutes)

Poll.bootstrap.res <- poll.bootstrap(Poll.information,bootstrap.samples=Poll.bootstrap.res,bca=T)
Poll.bootstrap.res[1:2]
round(head(Poll.bootstrap.res[[3]]),3)

########################################################################## 




