*************************************************************
*Practical 2: Discrete-Time Logit Models for Recurrent Events
*Entry (re-entry) into employment for women
*************************************************************

set more off

********************************
*2. Exploring the data structure
********************************


use bhps, clear 
sort pid spell t

list pid spell t employ event everjob lptime marstat in 1/30


** Total number of women **

*Number of unique values of PID

codebook pid

*Alternatively create an indicator for the first record for each woman

by pid: gen firstwom=1 if _n==1
sum pid if firstwom==1

*Drop records for spells in employment

drop if employ==1

*Re-create an indicator for the first record for each woman in the restricted sample

drop firstwom
by pid: gen firstwom=1 if _n==1
sum pid if firstwom==1

** Total number of non-employment spells **

by pid spell: gen lastsp=1 if _n==_N
sum pid if lastsp==1

** Distribution of the number of non-employment spells per woman **

by pid: egen nspell=count(lastsp)
drop lastsp
tab nspell if firstwom==1



****************************************
*3. Modelling recurrent events in Stata
****************************************

*Create dummies for duration (grouping durations >=10)

tab tgp, gen(tgp)

*Declare individual ID

xtset pid

*Fit random effects logit model with tgp=1 as the reference

xtlogit event tgp2-tgp10, re

*Create dummies for age categories

tab ageg8, gen(agegp)

*Declare individual ID

xtset pid

*Fit random effects logit model with tgp=1 and agegp1 as the reference

xtlogit event tgp2-tgp10 agegp2-agegp8 everjob, re


********************************
*4. Prediction of individual discrete-time
*hazard probabilities
********************************

*Re-estimate the model and store the results

xtlogit event tgp2-tgp10 agegp2-agegp8 everjob, re
estimates store m1

****METHOD 1: PREDICTIONS WITH U FIXED AT 0 (cluster-specific probabilities)****

***Set everjob to zero
replace everjob=0

**Probability of transition at tgp=1

*Set all tgp dummies to zero
foreach i of num 2/10	{
	replace tgp`i'=0
	}
*Generate linear prediction (with u=0)
estimates for m1: predict xbt1e0, xb
*Convert linear prediction to a probability
gen pt1e0=invlogit(xbt1e0)
drop xbt1e0

**Probability of transition at tgp=2,...,tgp=10
 
foreach i of num 2/10	{
		replace tgp`i'=1
		estimates for m1: predict xbt`i'e0, xb
		gen pt`i'e0=invlogit(xbt`i'e0)
		drop xbt`i'e0
		replace tgp`i'=0
		}

***Reset everjob to one
replace everjob=1

**Probability of transition at tgp=1

foreach i of num 2/10	{
	replace tgp`i'=0
	}
estimates for m1: predict xbt1e1, xb
gen pt1e1=invlogit(xbt1e1)
drop xbt1e1
 
**Probability of transition at tgp=2,...,tgp=10

foreach i of num 2/10	{
		replace tgp`i'=1
		estimates for m1: predict xbt`i'e1, xb
		gen pt`i'e1=invlogit(xbt`i'e1)
		drop xbt`i'e1
		replace tgp`i'=0
		}


sum pt1e0-pt10e1, sep(0)

codebook pt5e0

****METHOD 2: PREDICTIONS WITH SIMULATED VALUES OF U (population-averaged probabilities)****

*Create indicator for first observation per woman
sort pid
by pid: gen firstob=_n==1

*Set random number seed
set seed 121

***Set everjob to zero
replace everjob=0

**Probability of transition at tgp=1

foreach i of num 2/10	{
	replace tgp`i'=0
	}
estimates for m1: predict xbt1e0, xb
*Draw individual random effects for one observation per individual
estimates for m1: gen u=rnormal(0,e(sigma_u)) if firstob==1
*Fill in the same random effect value for all years for a given individual
by pid: replace u=u[_n-1] if u==.
*Add to the linear predictor and convert to a probability
gen pst1e0=invlogit(xbt1e0+u)
drop xbt1e0 u

**Probability of transition at tgp=2,...,tgp=10
 
foreach i of num 2/10	{
		replace tgp`i'=1
		estimates for m1: predict xbt`i'e0, xb
		estimates for m1: gen u=rnormal(0,e(sigma_u)) if firstob==1
		by pid: replace u=u[_n-1] if u==.
		gen pst`i'e0=invlogit(xbt`i'e0+u)
		drop xbt`i'e0 u
		replace tgp`i'=0
		}

***Set everjob to one
replace everjob=1

**Probability of transition at tgp=1

foreach i of num 2/10	{
	replace tgp`i'=0
	}
estimates for m1: predict xbt1e1, xb
estimates for m1: gen u=rnormal(0,e(sigma_u)) if firstob==1
by pid: replace u=u[_n-1] if u==.
gen pst1e1=invlogit(xbt1e1+u)
drop xbt1e1 u
 
**Probability of transition at tgp=2,...,tgp=10

foreach i of num 2/10	{
		replace tgp`i'=1
		estimates for m1: predict xbt`i'e1, xb
		estimates for m1: gen u=rnormal(0,e(sigma_u)) if firstob==1
		by pid: replace u=u[_n-1] if u==.
		gen pst`i'e1=invlogit(xbt`i'e1+u)
		drop xbt`i'e1 u
		replace tgp`i'=0
		}

sum pst1e0-pst10e1, sep(0)

codebook pst5e0

********************************
*5. Creating a dataset of average predictions
********************************

collapse (mean) pt1e0-pt10e1 pst1e0-pst10e1 

*Set up 20 rows of data indexed by possible combinations of everjob and tgp
expand 20
gen everjob=0 in 1/10
replace everjob=1 in 11/20
sort everjob
by everjob: gen tgp=_n

*Create long-form hazard variables and fill in the appropriate values
gen pmethod1=.
gen pmethod2=.

foreach i of num 1/10	{
	foreach j of num 0/1	{
		replace pmethod1=pt`i'e`j' if tgp==`i' & everjob==`j'
		replace pmethod2=pst`i'e`j' if tgp==`i' & everjob==`j'
		drop pt`i'e`j' pst`i'e`j'
		}
	}

list

*Plot the discrete-time hazard functions
twoway (connected pmethod1 tgp if everjob==0) ///
(connected pmethod1 tgp if everjob==1) ///
(connected pmethod2 tgp if everjob==0) ///
(connected pmethod2 tgp if everjob==1), ///
legend(order(1 "everjob=0 (method 1)" 2 "everjob=1 (method 1)" ///
3 "everjob=0 (method 2)" 4 "everjob=1 (method 2)")) scheme(s1mono)

*Derive the associated survival functions
sort everjob tgp
gen smethod1=1
gen smethod2=1
by everjob: replace smethod1=smethod1[_n-1]*(1-pmethod1[_n-1]) if tgp>1
by everjob: replace smethod2=smethod2[_n-1]*(1-pmethod2[_n-1]) if tgp>1

list

*Plot the survival functions
twoway (connected smethod1 tgp if everjob==0) ///
(connected smethod1 tgp if everjob==1) ///
(connected smethod2 tgp if everjob==0) ///
(connected smethod2 tgp if everjob==1), ///
legend(order(1 "everjob=0 (method 1)" 2 "everjob=1 (method 1)" ///
3 "everjob=0 (method 2)" 4 "everjob=1 (method 2)")) scheme(s1mono)


