*************************************************************
*Practical 3: Two-state duration model
*Transitions between employment and non-employment
*************************************************************

set more off

**************************************
*Import parameter estimates from Sabre
**************************************

*Import parameter estimates from edited Sabre log file
*Variables are parameter name (par), estimate (est) and standard error (se)
infile str12 par est se using prac3_parests.txt, clear
drop se
save parests, replace

*Store fixed part parameter estimates as a matrix
use parests, clear
keep if _n<=18 
mkmat est, matrix(br1) rown(par)

use parests, clear
keep if _n>18 & _n<=35 
mkmat est, matrix(br2) rown(par)

use parests, clear
keep if _n>35
mkmat est, matrix(sig) rown(par)

*Store random effect standard deviations and correlation as scalars
use parests, clear
keep if _n==36
scalar sigr1=est

use parests, clear
keep if _n==37
scalar sigr2=est

use parests, clear
keep if _n==38
scalar corr=est

matrix list br1
matrix list br2
scalar list sigr1 sigr2 corr


***********************************************************
*Read BHPS data and recreate explanatory variables
*Most of this syntax has been copied directly from prac3.do
***********************************************************

use bhps, clear
sort pid spell t

* Create dummy variables for all categorical variables (taking 1st category as reference in each case)

local i = 2
while `i' <=10 {
  gen tgp`i' = tgp==`i'
  local i = `i' + 1
}
local i = 2
while `i' <=8 {
  gen age`i' = ageg8==`i'
  local i = `i' + 1
}
 
*Create dummies for employment and non-employment states
*Create response index (1=non-employment, 2=employment)
gen r1 = employ==0
gen r2 = employ==1
gen r=employ+1

gen r1_t2=r1*tgp2
gen r1_t3=r1*tgp3
gen r1_t4=r1*tgp4
gen r1_t5=r1*tgp5
gen r1_t6=r1*tgp6
gen r1_t7=r1*tgp7
gen r1_t8=r1*tgp8
gen r1_t9=r1*tgp9
gen r1_t10=r1*tgp10
gen r1_age2=r1*age2
gen r1_age3=r1*age3
gen r1_age4=r1*age4
gen r1_age5=r1*age5
gen r1_age6=r1*age6
gen r1_age7=r1*age7
gen r1_age8=r1*age8
gen r1_ejob=r1*everjob

gen r2_t2=r2*tgp2
gen r2_t3=r2*tgp3
gen r2_t4=r2*tgp4
gen r2_t5=r2*tgp5
gen r2_t6=r2*tgp6
gen r2_t7=r2*tgp7
gen r2_t8=r2*tgp8
gen r2_t9=r2*tgp9
gen r2_t10=r2*tgp10
gen r2_age2=r2*age2
gen r2_age3=r2*age3
gen r2_age4=r2*age4
gen r2_age5=r2*age5
gen r2_age6=r2*age6
gen r2_age7=r2*age7
gen r2_age8=r2*age8

compress

*Keep explanatory variables and put them in the same order as the parameter estimates

keep pid spell t ///
   r1 r1_t2 r1_t3 r1_t4 r1_t5 r1_t6 r1_t7 r1_t8 r1_t9 r1_t10 /// 
   r1_age2 r1_age3 r1_age4 r1_age5 r1_age6 r1_age7 r1_age8 r1_ejob /// 
   r2 r2_t2 r2_t3 r2_t4 r2_t5 r2_t6 r2_t7 r2_t8 r2_t9 r2_t10 /// 
   r2_age2 r2_age3 r2_age4 r2_age5 r2_age6 r2_age7 r2_age8
   
order pid spell t ///
   r1 r1_t2 r1_t3 r1_t4 r1_t5 r1_t6 r1_t7 r1_t8 r1_t9 r1_t10 /// 
   r1_age2 r1_age3 r1_age4 r1_age5 r1_age6 r1_age7 r1_age8 r1_ejob /// 
   r2 r2_t2 r2_t3 r2_t4 r2_t5 r2_t6 r2_t7 r2_t8 r2_t9 r2_t10 /// 
   r2_age2 r2_age3 r2_age4 r2_age5 r2_age6 r2_age7 r2_age8
   

******************************************************************************
*Prediction of individual discrete-time hazard probabilities
*For illustration we will compute probabilities for transitions
*out of employment.  The relevant coefficients and covariates have 'r1' prefix
*These predictions will be made only for years when a woman is employed
******************************************************************************

*Select employment spells
keep if r1==1

*List of covariates (in same order as coefficients in Sabre output)
global xvar r1 r1_t2 r1_t3 r1_t4 r1_t5 r1_t6 r1_t7 r1_t8 r1_t9 r1_t10 /// 
   r1_age2 r1_age3 r1_age4 r1_age5 r1_age6 r1_age7 r1_age8 r1_ejob


****METHOD 1: PREDICTIONS WITH U FIXED AT 0 (cluster-specific probabilities)****

***Set everjob to zero (i.e. r1_ejob=0)
replace r1_ejob=0

**Probability of transition into employment at tgp=1

*Set all tgp dummies to zero
foreach i of num 2/10	{
	replace r1_t`i'=0
}
*Generate linear prediction (with u=0)
local k=1
gen xb=0
foreach x in $xvar {
	scalar br1`k'=br1[`k',1]
	replace xb=xb+`x'*br1`k' 
	local k=`k'+1
}  
gen pt1e0=invlogit(xb)
drop xb

**Probability of transition into employment at tgp=2,...,tgp=10
 
foreach i of num 2/10	{
	replace r1_t`i'=1
	local k=1
	gen xb=0
	foreach x in $xvar {
		scalar br1`k'=br1[`k',1]
		replace xb=xb+`x'*br1`k' 
		local k=`k'+1
	}  
	gen pt`i'e0=invlogit(xb)
	drop xb	
	replace r1_t`i'=0
}		

***reset everjob to one (i.e. r1_ejob=1)
replace r1_ejob=1

**Probability of transition into employment at tgp=1

*Set all tgp dummies to zero
foreach i of num 2/10	{
	replace r1_t`i'=0
}
*Generate linear prediction xb (with u=0)
local k=1
gen xb=0
foreach x in $xvar {
	scalar br1`k'=br1[`k',1]
	replace xb=xb+`x'*br1`k' 
	local k=`k'+1
}  
gen pt1e1=invlogit(xb)
drop xb

**Probability of transition into employment at tgp=2,...,tgp=10
 
foreach i of num 2/10	{
	replace r1_t`i'=1
	local k=1
	gen xb=0
	foreach x in $xvar {
		scalar br1`k'=br1[`k',1]
		replace xb=xb+`x'*br1`k' 
		local k=`k'+1
	}  
	gen pt`i'e1=invlogit(xb)
	drop xb	
	replace r1_t`i'=0
}		

sum pt1e0-pt10e1, sep(0)


****METHOD 2: PREDICTIONS WITH SIMULATED VALUES OF U (population-averaged probabilities)****

*Set random number seed
set seed 121

*Draw individual random effect for one observation per individual
*Draw from bivariate normal distribution (using estimated correlation, stored in corr)
by pid: gen firstob=1 if _n==1
matrix C=(1, corr, 1)
drawnorm ur1 ur2, corr(C) cstorage(lower) 
replace ur1=ur1*sigr1
drop ur2
replace ur1=. if firstob~=1
*Fill in the same random effect value for all years for a given individual
by pid: replace ur1=ur1[_n-1] if ur1==.

***Set everjob to zero (i.e. r1_ejob=0)
replace r1_ejob=0

**Probability of transition into employment at tgp=1

*Set all tgp dummies to zero
foreach i of num 2/10	{
	replace r1_t`i'=0
}
*Generate linear prediction (with simulated u)
local k=1
gen xb=0
foreach x in $xvar {
	scalar br1`k'=br1[`k',1]
	replace xb=xb+`x'*br1`k' 
	local k=`k'+1
}  
gen pst1e0=invlogit(xb+ur1)
drop xb

**Probability of transition into employment at tgp=2,...,tgp=10
 
foreach i of num 2/10	{
	replace r1_t`i'=1
	local k=1
	gen xb=0
	foreach x in $xvar {
		scalar br1`k'=br1[`k',1]
		replace xb=xb+`x'*br1`k' 
		local k=`k'+1
	}  
	gen pst`i'e0=invlogit(xb+ur1)
	drop xb	
	replace r1_t`i'=0
}		

***reset everjob to one (i.e. r1_ejob=1)
replace r1_ejob=1

**Probability of transition into employment at tgp=1

*Set all tgp dummies to zero
foreach i of num 2/10	{
	replace r1_t`i'=0
}
*Generate linear prediction xb (with u=0)
local k=1
gen xb=0
foreach x in $xvar {
	scalar br1`k'=br1[`k',1]
	replace xb=xb+`x'*br1`k' 
	local k=`k'+1
}  
gen pst1e1=invlogit(xb+ur1)
drop xb

**Probability of transition into employment at tgp=2,...,tgp=10
 
foreach i of num 2/10	{
	replace r1_t`i'=1
	local k=1
	gen xb=0
	foreach x in $xvar {
		scalar br1`k'=br1[`k',1]
		replace xb=xb+`x'*br1`k' 
		local k=`k'+1
	}  
	gen pst`i'e1=invlogit(xb+ur1)
	drop xb	
	replace r1_t`i'=0
}		

sum pst1e0-pst10e1, sep(0)


*****************************************
*Creating a dataset of average predictions
*Copied directly from prac2.do
******************************************

collapse (mean) pt1e0-pt10e1 pst1e0-pst10e1 

*Set up 20 rows of data indexed by possible combinations of everjob and tgp
expand 20
gen everjob=0 in 1/10
replace everjob=1 in 11/20
sort everjob
by everjob: gen tgp=_n

*Create long-form hazard variables and fill in the appropriate values
gen pmethod1=.
gen pmethod2=.

foreach i of num 1/10	{
	foreach j of num 0/1	{
		replace pmethod1=pt`i'e`j' if tgp==`i' & everjob==`j'
		replace pmethod2=pst`i'e`j' if tgp==`i' & everjob==`j'
		drop pt`i'e`j' pst`i'e`j'
		}
	}

list

*Plot the discrete-time hazard functions
twoway (connected pmethod1 tgp if everjob==0) ///
(connected pmethod1 tgp if everjob==1) ///
(connected pmethod2 tgp if everjob==0) ///
(connected pmethod2 tgp if everjob==1), ///
legend(order(1 "everjob=0 (method 1)" 2 "everjob=1 (method 1)" ///
3 "everjob=0 (method 2)" 4 "everjob=1 (method 2)")) scheme(s1mono)

*Derive the associated survival functions
sort everjob tgp
gen smethod1=1
gen smethod2=1
by everjob: replace smethod1=smethod1[_n-1]*(1-pmethod1[_n-1]) if tgp>1
by everjob: replace smethod2=smethod2[_n-1]*(1-pmethod2[_n-1]) if tgp>1

list

*Plot the survival functions
twoway (connected smethod1 tgp if everjob==0) ///
(connected smethod1 tgp if everjob==1) ///
(connected smethod2 tgp if everjob==0) ///
(connected smethod2 tgp if everjob==1), ///
legend(order(1 "everjob=0 (method 1)" 2 "everjob=1 (method 1)" ///
3 "everjob=0 (method 2)" 4 "everjob=1 (method 2)")) scheme(s1mono)



